imdb 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/MIT-LICENSE +21 -0
- data/README.md +97 -0
- data/imdb.gemspec +1 -1
- data/lib/imdb.rb +1 -1
- data/lib/imdb/base.rb +36 -36
- data/lib/imdb/episode.rb +2 -2
- data/lib/imdb/movie_list.rb +6 -10
- data/lib/imdb/search.rb +4 -4
- data/lib/imdb/season.rb +3 -3
- data/lib/imdb/serie.rb +2 -3
- data/lib/imdb/top_250.rb +1 -1
- data/lib/imdb/version.rb +1 -1
- data/spec/fixtures/locations +1167 -0
- data/spec/fixtures/plotsummary +1063 -897
- data/spec/fixtures/search_kannethirey_thondrinal +344 -346
- data/spec/fixtures/search_killed_wife +344 -346
- data/spec/fixtures/search_star_trek +344 -346
- data/spec/fixtures/synopsis +457 -446
- data/spec/fixtures/thewalkingdead-s1 +740 -555
- data/spec/fixtures/thewalkingdead-s1e2 +505 -480
- data/spec/fixtures/top_250 +10749 -1082
- data/spec/fixtures/tt0036855 +553 -519
- data/spec/fixtures/tt0083987 +563 -512
- data/spec/fixtures/tt0095016 +518 -536
- data/spec/fixtures/tt0110912 +572 -512
- data/spec/fixtures/tt0111161 +559 -508
- data/spec/fixtures/tt0117731 +542 -510
- data/spec/fixtures/tt0166222 +969 -872
- data/spec/fixtures/tt0242653 +530 -524
- data/spec/fixtures/tt0330508 +845 -717
- data/spec/fixtures/tt0468569 +533 -543
- data/spec/fixtures/tt1401252 +472 -449
- data/spec/fixtures/tt1520211 +562 -542
- data/spec/imdb/movie_spec.rb +13 -5
- data/spec/imdb/search_spec.rb +4 -4
- data/spec/imdb/series_spec.rb +1 -1
- data/spec/imdb/top_250_spec.rb +5 -5
- data/spec/spec_helper.rb +1 -0
- metadata +12 -9
- data/README.rdoc +0 -114
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c84d21278933772df9156ec48a141b0d7b7375e
|
4
|
+
data.tar.gz: 1f54dd630b773de99a1703ad8fbeb379a7d5cbf3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca8f765c422677819391031671bbd0467c7a280511435dda5eee4426f8291b6809c014d63c9662ff89813cb04a065c6a64e62ef4eae60b878a15e9a60ecd2bcd
|
7
|
+
data.tar.gz: c6e895315a23977569618ee9448128ebca33290568e94909241e4b2be81ca8e74ad95145a6f1f6d0d1ef3fd4c8cf3a525e905458ea2dec0778660b5139f53903
|
data/.travis.yml
CHANGED
@@ -6,12 +6,18 @@ notifications:
|
|
6
6
|
rvm:
|
7
7
|
- 1.9.2
|
8
8
|
- 1.9.3
|
9
|
+
- 2.0.0
|
9
10
|
- ruby-head
|
10
11
|
- rbx-19mode
|
12
|
+
- 1.8.7
|
13
|
+
- ree
|
11
14
|
|
12
15
|
matrix:
|
13
16
|
allow_failures:
|
14
17
|
- rvm: rbx-19mode
|
18
|
+
- rvm: ruby-head
|
19
|
+
- rvm: 1.8.7
|
20
|
+
- rvm: ree
|
15
21
|
|
16
22
|
script:
|
17
23
|
- "bundle exec rake spec"
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2009-2013 Ariejan de Vroom
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
data/README.md
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
# imdb [](https://travis-ci.org/ariejan/imdb)
|
2
|
+
|
3
|
+
* [Sources](https://github.com/ariejan/imdb)
|
4
|
+
* [Issues](https://github.com/ariejan/imdb/issues)
|
5
|
+
|
6
|
+
## Description
|
7
|
+
|
8
|
+
This gem allows you to easy access publicly available data from IMDB.
|
9
|
+
|
10
|
+
## Features
|
11
|
+
|
12
|
+
IMDB currently features the following:
|
13
|
+
|
14
|
+
* Querying details movie info
|
15
|
+
* Searching for movies
|
16
|
+
* Command-line utility included.
|
17
|
+
|
18
|
+
## Synopsis
|
19
|
+
|
20
|
+
### Movies:
|
21
|
+
|
22
|
+
i = Imdb::Movie.new("0095016")
|
23
|
+
|
24
|
+
i.title
|
25
|
+
#=> "Die Hard"
|
26
|
+
|
27
|
+
i.cast_members.first
|
28
|
+
#=> "Bruce Willis"
|
29
|
+
|
30
|
+
### Series:
|
31
|
+
|
32
|
+
serie = Imdb::Serie.new("1520211")
|
33
|
+
|
34
|
+
serie.title
|
35
|
+
#=> "\"The Walking Dead\""
|
36
|
+
|
37
|
+
serie.rating
|
38
|
+
#=> 8.8
|
39
|
+
|
40
|
+
serie.seasons.size
|
41
|
+
#=> 3
|
42
|
+
|
43
|
+
serie.seaon(1).episodes.size
|
44
|
+
#=> 6
|
45
|
+
|
46
|
+
series.season(1).episode(2).title
|
47
|
+
#=> "Guts"
|
48
|
+
|
49
|
+
### Searching:
|
50
|
+
|
51
|
+
i = Imdb::Search.new("Star Trek")
|
52
|
+
|
53
|
+
i.movies.size
|
54
|
+
#=> 97
|
55
|
+
|
56
|
+
### Using the command line utility is quite easy:
|
57
|
+
|
58
|
+
$ imdb Star Trek
|
59
|
+
|
60
|
+
or get movie info
|
61
|
+
|
62
|
+
$ imdb 0095016
|
63
|
+
|
64
|
+
## Installation
|
65
|
+
|
66
|
+
gem install imdb
|
67
|
+
|
68
|
+
## Running Tests
|
69
|
+
|
70
|
+
You'll need rspec and fakeweb installed to run the specs.
|
71
|
+
|
72
|
+
$ bundle install
|
73
|
+
$ bundle exec rake spec
|
74
|
+
|
75
|
+
Although not recommended, you may run the specs against the live imdb.com
|
76
|
+
website. This will make a lot of calls to imdb.com, use it wisely.
|
77
|
+
|
78
|
+
$ LIVE_TEST=true bundle exec rake spec
|
79
|
+
|
80
|
+
To update the packaged fixtures files with actual imdb.com samples, use the
|
81
|
+
`fixtures:refresh` rake task
|
82
|
+
|
83
|
+
$ bundle exec rake fixtures:refresh
|
84
|
+
|
85
|
+
## Disclaimer
|
86
|
+
|
87
|
+
I, or any developer who contributed to this project, accepts any kind of
|
88
|
+
liabilty for your use of this library.
|
89
|
+
|
90
|
+
IMDB does not permit use of its data by third parties without their consent.
|
91
|
+
|
92
|
+
Using this library for anything other than limited personal use may result
|
93
|
+
in an IP ban to the IMDB website.
|
94
|
+
|
95
|
+
## License
|
96
|
+
|
97
|
+
See [MIT-LICENSE](https://github.com/ariejan/imdb/blob/master/MIT-LICENSE)
|
data/imdb.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.add_dependency '
|
22
|
+
s.add_dependency 'nokogiri', '>= 1.6.0'
|
23
23
|
|
24
24
|
s.add_development_dependency 'rake', '~> 10.0.3'
|
25
25
|
s.add_development_dependency 'rspec', '~> 2.13.0'
|
data/lib/imdb.rb
CHANGED
data/lib/imdb/base.rb
CHANGED
@@ -20,7 +20,7 @@ module Imdb
|
|
20
20
|
|
21
21
|
# Returns an array with cast members
|
22
22
|
def cast_members
|
23
|
-
document.search("table.cast td.nm a").map { |link| link.
|
23
|
+
document.search("table.cast td.nm a").map { |link| link.content.strip } rescue []
|
24
24
|
end
|
25
25
|
|
26
26
|
def cast_member_ids
|
@@ -29,7 +29,7 @@ module Imdb
|
|
29
29
|
|
30
30
|
# Returns an array with cast characters
|
31
31
|
def cast_characters
|
32
|
-
document.search("table.cast td.char").map { |link| link.
|
32
|
+
document.search("table.cast td.char").map { |link| link.content.strip } rescue []
|
33
33
|
end
|
34
34
|
|
35
35
|
# Returns an array with cast members and characters
|
@@ -40,58 +40,58 @@ module Imdb
|
|
40
40
|
memb_char[i] = "#{self.cast_members[i]} #{sep} #{self.cast_characters[i]}"
|
41
41
|
i=i+1
|
42
42
|
}
|
43
|
-
|
43
|
+
memb_char
|
44
44
|
end
|
45
45
|
|
46
46
|
# Returns the name of the director
|
47
47
|
def director
|
48
|
-
document.search("h5[text()^='Director'] ~ a").map { |link| link.
|
48
|
+
document.search("h5[text()^='Director'] ~ div a").map { |link| link.content.strip } rescue []
|
49
49
|
end
|
50
50
|
|
51
51
|
# Returns the url to the "Watch a trailer" page
|
52
52
|
def trailer_url
|
53
|
-
'http://imdb.com' + document.at("a[@href
|
53
|
+
'http://imdb.com' + document.at("a[@href*='/video/screenplay/']")["href"] rescue nil
|
54
54
|
end
|
55
55
|
|
56
56
|
# Returns an array of genres (as strings)
|
57
57
|
def genres
|
58
|
-
document.search("h5[text()='Genre:'] ~ a[@href
|
58
|
+
document.search("h5[text()='Genre:'] ~ div a[@href*='/Sections/Genres/']").map { |link| link.content.strip } rescue []
|
59
59
|
end
|
60
60
|
|
61
61
|
# Returns an array of languages as strings.
|
62
62
|
def languages
|
63
|
-
document.search("h5[text()='Language:'] ~ a[@href
|
63
|
+
document.search("h5[text()='Language:'] ~ div a[@href*='/language/']").map { |link| link.content.strip } rescue []
|
64
64
|
end
|
65
65
|
|
66
66
|
# Returns an array of countries as strings.
|
67
67
|
def countries
|
68
|
-
document.search("h5[text()='Country:'] ~ a[@href
|
68
|
+
document.search("h5[text()='Country:'] ~ div a[@href*='/country/']").map { |link| link.content.strip } rescue []
|
69
69
|
end
|
70
70
|
|
71
71
|
# Returns the duration of the movie in minutes as an integer.
|
72
72
|
def length
|
73
|
-
document.
|
73
|
+
document.at("h5[text()='Runtime:'] ~ div").content[/\d+ min/].to_i rescue nil
|
74
74
|
end
|
75
75
|
|
76
76
|
# Returns the company
|
77
77
|
def company
|
78
|
-
document.search("h5[text()='Company:'] ~ a[@href
|
78
|
+
document.search("h5[text()='Company:'] ~ div a[@href*='/company/']").map { |link| link.content.strip }.first rescue nil
|
79
79
|
end
|
80
80
|
|
81
81
|
# Returns a string containing the plot.
|
82
82
|
def plot
|
83
|
-
sanitize_plot(document.
|
83
|
+
sanitize_plot(document.at("h5[text()='Plot:'] ~ div").content) rescue nil
|
84
84
|
end
|
85
85
|
|
86
86
|
# Returns a string containing the plot summary
|
87
87
|
def plot_synopsis
|
88
|
-
doc =
|
89
|
-
doc.
|
88
|
+
doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :synopsis))
|
89
|
+
doc.at("div[@id='swiki.2.1']").content.strip rescue nil
|
90
90
|
end
|
91
91
|
|
92
92
|
def plot_summary
|
93
|
-
doc =
|
94
|
-
doc.
|
93
|
+
doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :plotsummary))
|
94
|
+
doc.at("p.plotSummary").inner_html.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
|
95
95
|
end
|
96
96
|
|
97
97
|
# Returns a string containing the URL to the movie poster.
|
@@ -107,22 +107,22 @@ module Imdb
|
|
107
107
|
|
108
108
|
# Returns a float containing the average user rating
|
109
109
|
def rating
|
110
|
-
document.at(".starbar-meta b").
|
110
|
+
document.at(".starbar-meta b").content.split('/').first.strip.to_f rescue nil
|
111
111
|
end
|
112
112
|
|
113
113
|
# Returns an int containing the number of user ratings
|
114
114
|
def votes
|
115
|
-
document.at("#tn15rating .tn15more").
|
115
|
+
document.at("#tn15rating .tn15more").content.strip.gsub(/[^\d+]/, "").to_i rescue nil
|
116
116
|
end
|
117
117
|
|
118
118
|
# Returns a string containing the tagline
|
119
119
|
def tagline
|
120
|
-
document.search("h5[text()='Tagline:'] ~ div").first.
|
120
|
+
document.search("h5[text()='Tagline:'] ~ div").first.inner_html.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
121
121
|
end
|
122
122
|
|
123
123
|
# Returns a string containing the mpaa rating and reason for rating
|
124
124
|
def mpaa_rating
|
125
|
-
document.
|
125
|
+
document.at("//a[starts-with(.,'MPAA')]/../following-sibling::*").content.strip rescue nil
|
126
126
|
end
|
127
127
|
|
128
128
|
# Returns a string containing the title
|
@@ -130,25 +130,34 @@ module Imdb
|
|
130
130
|
if @title && !force_refresh
|
131
131
|
@title
|
132
132
|
else
|
133
|
-
@title = document.at("h1").
|
133
|
+
@title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html rescue nil
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
137
|
# Returns an integer containing the year (CCYY) the movie was released in.
|
138
138
|
def year
|
139
|
-
document.
|
139
|
+
document.at("a[@href^='/year/']").content.to_i rescue nil
|
140
140
|
end
|
141
141
|
|
142
142
|
# Returns release date for the movie.
|
143
143
|
def release_date
|
144
|
-
sanitize_release_date(document.
|
144
|
+
sanitize_release_date(document.at("h5[text()*='Release Date'] ~ div").content) rescue nil
|
145
|
+
end
|
146
|
+
|
147
|
+
# Returns filming locations from imdb_url/locations
|
148
|
+
def filming_locations
|
149
|
+
locations_document.search("#filming_locations_content .soda dt a").map { |link| link.content.strip } rescue []
|
145
150
|
end
|
146
151
|
|
147
152
|
private
|
148
153
|
|
149
|
-
# Returns a new
|
154
|
+
# Returns a new Nokogiri document for parsing.
|
150
155
|
def document
|
151
|
-
@document ||=
|
156
|
+
@document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id))
|
157
|
+
end
|
158
|
+
|
159
|
+
def locations_document
|
160
|
+
@locations_document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id, "locations"))
|
152
161
|
end
|
153
162
|
|
154
163
|
# Use HTTParty to fetch the raw HTML for this movie.
|
@@ -166,26 +175,17 @@ module Imdb
|
|
166
175
|
end
|
167
176
|
|
168
177
|
def sanitize_plot(the_plot)
|
169
|
-
the_plot = the_plot.imdb_strip_tags
|
170
|
-
|
171
178
|
the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
|
172
179
|
the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
|
173
|
-
the_plot = the_plot.gsub(
|
174
|
-
the_plot = the_plot.gsub(/see|more/i, "")
|
180
|
+
the_plot = the_plot.gsub(/see|more|\u00BB|\u00A0/i, "")
|
175
181
|
the_plot = the_plot.gsub(/\|/i, "")
|
176
|
-
|
177
|
-
the_plot = the_plot.strip.imdb_unescape_html
|
182
|
+
the_plot.strip
|
178
183
|
end
|
179
184
|
|
180
185
|
def sanitize_release_date(the_release_date)
|
181
|
-
the_release_date
|
182
|
-
the_release_date = the_release_date.gsub(/ |»/i, "")
|
183
|
-
the_release_date = the_release_date.gsub(/see|more/i, "")
|
184
|
-
|
185
|
-
the_release_date = the_release_date.strip.imdb_unescape_html
|
186
|
+
the_release_date.gsub(/see|more|\u00BB|\u00A0/i, "").strip
|
186
187
|
end
|
187
188
|
|
188
189
|
end # Movie
|
189
190
|
|
190
191
|
end # Imdb
|
191
|
-
|
data/lib/imdb/episode.rb
CHANGED
@@ -11,13 +11,13 @@ module Imdb
|
|
11
11
|
|
12
12
|
# Return the original air date for this episode
|
13
13
|
def air_date
|
14
|
-
document.
|
14
|
+
document.at("h5[text()*='Original Air Date'] ~ div").content.strip.split("\n").first.strip rescue nil
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def document
|
20
|
-
@document ||=
|
20
|
+
@document ||= Nokogiri::HTML(open(@url))
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
data/lib/imdb/movie_list.rb
CHANGED
@@ -7,19 +7,15 @@ module Imdb
|
|
7
7
|
|
8
8
|
private
|
9
9
|
def parse_movies
|
10
|
-
document.search(
|
11
|
-
element.
|
12
|
-
element.
|
10
|
+
document.search("a[@href^='/title/tt']").reject do |element|
|
11
|
+
element.inner_html.imdb_strip_tags.empty? ||
|
12
|
+
element.inner_html.imdb_strip_tags == "X" ||
|
13
|
+
element.parent.inner_html =~ /media from/i
|
13
14
|
end.map do |element|
|
14
15
|
id = element['href'][/\d+/]
|
15
16
|
|
16
|
-
data = element.parent.
|
17
|
-
|
18
|
-
title = data[1]
|
19
|
-
else
|
20
|
-
title = data[0]
|
21
|
-
end
|
22
|
-
|
17
|
+
data = element.parent.inner_html.split("<br />")
|
18
|
+
title = (!data[0].nil? && !data[1].nil? && data[0] =~ /img/) ? data[1] : data[0]
|
23
19
|
title = title.imdb_strip_tags.imdb_unescape_html
|
24
20
|
title.gsub!(/\s+\(\d\d\d\d\)$/, '')
|
25
21
|
|
data/lib/imdb/search.rb
CHANGED
@@ -23,7 +23,7 @@ module Imdb
|
|
23
23
|
|
24
24
|
private
|
25
25
|
def document
|
26
|
-
@document ||=
|
26
|
+
@document ||= Nokogiri::HTML(Imdb::Search.query(@query))
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.query(query)
|
@@ -31,15 +31,15 @@ module Imdb
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def parse_movie
|
34
|
-
id
|
35
|
-
title
|
34
|
+
id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
|
35
|
+
title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html
|
36
36
|
|
37
37
|
[Imdb::Movie.new(id, title)]
|
38
38
|
end
|
39
39
|
|
40
40
|
# Returns true if the search yielded only one result, an exact match
|
41
41
|
def exact_match?
|
42
|
-
!document.at("
|
42
|
+
!document.at("table[@id='title-overview-widget-layout']").nil?
|
43
43
|
end
|
44
44
|
|
45
45
|
end # Search
|
data/lib/imdb/season.rb
CHANGED
@@ -15,12 +15,12 @@ module Imdb
|
|
15
15
|
def episodes
|
16
16
|
@episodes = []
|
17
17
|
|
18
|
-
document.search("div.eplist a[@itemprop*=name]").each_with_index do |link, index|
|
18
|
+
document.search("div.eplist a[@itemprop*='name']").each_with_index do |link, index|
|
19
19
|
@episodes << Imdb::Episode.new(
|
20
20
|
link[:href].scan(/\d+/).first,
|
21
21
|
@season_number,
|
22
22
|
index + 1,
|
23
|
-
link.
|
23
|
+
link.content.strip
|
24
24
|
)
|
25
25
|
end
|
26
26
|
|
@@ -30,7 +30,7 @@ module Imdb
|
|
30
30
|
private
|
31
31
|
|
32
32
|
def document
|
33
|
-
@document ||=
|
33
|
+
@document ||= Nokogiri::HTML(open(@url))
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|