imdb 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/MIT-LICENSE +21 -0
- data/README.md +97 -0
- data/imdb.gemspec +1 -1
- data/lib/imdb.rb +1 -1
- data/lib/imdb/base.rb +36 -36
- data/lib/imdb/episode.rb +2 -2
- data/lib/imdb/movie_list.rb +6 -10
- data/lib/imdb/search.rb +4 -4
- data/lib/imdb/season.rb +3 -3
- data/lib/imdb/serie.rb +2 -3
- data/lib/imdb/top_250.rb +1 -1
- data/lib/imdb/version.rb +1 -1
- data/spec/fixtures/locations +1167 -0
- data/spec/fixtures/plotsummary +1063 -897
- data/spec/fixtures/search_kannethirey_thondrinal +344 -346
- data/spec/fixtures/search_killed_wife +344 -346
- data/spec/fixtures/search_star_trek +344 -346
- data/spec/fixtures/synopsis +457 -446
- data/spec/fixtures/thewalkingdead-s1 +740 -555
- data/spec/fixtures/thewalkingdead-s1e2 +505 -480
- data/spec/fixtures/top_250 +10749 -1082
- data/spec/fixtures/tt0036855 +553 -519
- data/spec/fixtures/tt0083987 +563 -512
- data/spec/fixtures/tt0095016 +518 -536
- data/spec/fixtures/tt0110912 +572 -512
- data/spec/fixtures/tt0111161 +559 -508
- data/spec/fixtures/tt0117731 +542 -510
- data/spec/fixtures/tt0166222 +969 -872
- data/spec/fixtures/tt0242653 +530 -524
- data/spec/fixtures/tt0330508 +845 -717
- data/spec/fixtures/tt0468569 +533 -543
- data/spec/fixtures/tt1401252 +472 -449
- data/spec/fixtures/tt1520211 +562 -542
- data/spec/imdb/movie_spec.rb +13 -5
- data/spec/imdb/search_spec.rb +4 -4
- data/spec/imdb/series_spec.rb +1 -1
- data/spec/imdb/top_250_spec.rb +5 -5
- data/spec/spec_helper.rb +1 -0
- metadata +12 -9
- data/README.rdoc +0 -114
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c84d21278933772df9156ec48a141b0d7b7375e
|
4
|
+
data.tar.gz: 1f54dd630b773de99a1703ad8fbeb379a7d5cbf3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca8f765c422677819391031671bbd0467c7a280511435dda5eee4426f8291b6809c014d63c9662ff89813cb04a065c6a64e62ef4eae60b878a15e9a60ecd2bcd
|
7
|
+
data.tar.gz: c6e895315a23977569618ee9448128ebca33290568e94909241e4b2be81ca8e74ad95145a6f1f6d0d1ef3fd4c8cf3a525e905458ea2dec0778660b5139f53903
|
data/.travis.yml
CHANGED
@@ -6,12 +6,18 @@ notifications:
|
|
6
6
|
rvm:
|
7
7
|
- 1.9.2
|
8
8
|
- 1.9.3
|
9
|
+
- 2.0.0
|
9
10
|
- ruby-head
|
10
11
|
- rbx-19mode
|
12
|
+
- 1.8.7
|
13
|
+
- ree
|
11
14
|
|
12
15
|
matrix:
|
13
16
|
allow_failures:
|
14
17
|
- rvm: rbx-19mode
|
18
|
+
- rvm: ruby-head
|
19
|
+
- rvm: 1.8.7
|
20
|
+
- rvm: ree
|
15
21
|
|
16
22
|
script:
|
17
23
|
- "bundle exec rake spec"
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2009-2013 Ariejan de Vroom
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
data/README.md
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
# imdb [![Build Status](https://travis-ci.org/ariejan/imdb.png?branch=master)](https://travis-ci.org/ariejan/imdb)
|
2
|
+
|
3
|
+
* [Sources](https://github.com/ariejan/imdb)
|
4
|
+
* [Issues](https://github.com/ariejan/imdb/issues)
|
5
|
+
|
6
|
+
## Description
|
7
|
+
|
8
|
+
This gem allows you to easy access publicly available data from IMDB.
|
9
|
+
|
10
|
+
## Features
|
11
|
+
|
12
|
+
IMDB currently features the following:
|
13
|
+
|
14
|
+
* Querying details movie info
|
15
|
+
* Searching for movies
|
16
|
+
* Command-line utility included.
|
17
|
+
|
18
|
+
## Synopsis
|
19
|
+
|
20
|
+
### Movies:
|
21
|
+
|
22
|
+
i = Imdb::Movie.new("0095016")
|
23
|
+
|
24
|
+
i.title
|
25
|
+
#=> "Die Hard"
|
26
|
+
|
27
|
+
i.cast_members.first
|
28
|
+
#=> "Bruce Willis"
|
29
|
+
|
30
|
+
### Series:
|
31
|
+
|
32
|
+
serie = Imdb::Serie.new("1520211")
|
33
|
+
|
34
|
+
serie.title
|
35
|
+
#=> "\"The Walking Dead\""
|
36
|
+
|
37
|
+
serie.rating
|
38
|
+
#=> 8.8
|
39
|
+
|
40
|
+
serie.seasons.size
|
41
|
+
#=> 3
|
42
|
+
|
43
|
+
serie.seaon(1).episodes.size
|
44
|
+
#=> 6
|
45
|
+
|
46
|
+
series.season(1).episode(2).title
|
47
|
+
#=> "Guts"
|
48
|
+
|
49
|
+
### Searching:
|
50
|
+
|
51
|
+
i = Imdb::Search.new("Star Trek")
|
52
|
+
|
53
|
+
i.movies.size
|
54
|
+
#=> 97
|
55
|
+
|
56
|
+
### Using the command line utility is quite easy:
|
57
|
+
|
58
|
+
$ imdb Star Trek
|
59
|
+
|
60
|
+
or get movie info
|
61
|
+
|
62
|
+
$ imdb 0095016
|
63
|
+
|
64
|
+
## Installation
|
65
|
+
|
66
|
+
gem install imdb
|
67
|
+
|
68
|
+
## Running Tests
|
69
|
+
|
70
|
+
You'll need rspec and fakeweb installed to run the specs.
|
71
|
+
|
72
|
+
$ bundle install
|
73
|
+
$ bundle exec rake spec
|
74
|
+
|
75
|
+
Although not recommended, you may run the specs against the live imdb.com
|
76
|
+
website. This will make a lot of calls to imdb.com, use it wisely.
|
77
|
+
|
78
|
+
$ LIVE_TEST=true bundle exec rake spec
|
79
|
+
|
80
|
+
To update the packaged fixtures files with actual imdb.com samples, use the
|
81
|
+
`fixtures:refresh` rake task
|
82
|
+
|
83
|
+
$ bundle exec rake fixtures:refresh
|
84
|
+
|
85
|
+
## Disclaimer
|
86
|
+
|
87
|
+
I, or any developer who contributed to this project, accepts any kind of
|
88
|
+
liabilty for your use of this library.
|
89
|
+
|
90
|
+
IMDB does not permit use of its data by third parties without their consent.
|
91
|
+
|
92
|
+
Using this library for anything other than limited personal use may result
|
93
|
+
in an IP ban to the IMDB website.
|
94
|
+
|
95
|
+
## License
|
96
|
+
|
97
|
+
See [MIT-LICENSE](https://github.com/ariejan/imdb/blob/master/MIT-LICENSE)
|
data/imdb.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
21
|
|
22
|
-
s.add_dependency '
|
22
|
+
s.add_dependency 'nokogiri', '>= 1.6.0'
|
23
23
|
|
24
24
|
s.add_development_dependency 'rake', '~> 10.0.3'
|
25
25
|
s.add_development_dependency 'rspec', '~> 2.13.0'
|
data/lib/imdb.rb
CHANGED
data/lib/imdb/base.rb
CHANGED
@@ -20,7 +20,7 @@ module Imdb
|
|
20
20
|
|
21
21
|
# Returns an array with cast members
|
22
22
|
def cast_members
|
23
|
-
document.search("table.cast td.nm a").map { |link| link.
|
23
|
+
document.search("table.cast td.nm a").map { |link| link.content.strip } rescue []
|
24
24
|
end
|
25
25
|
|
26
26
|
def cast_member_ids
|
@@ -29,7 +29,7 @@ module Imdb
|
|
29
29
|
|
30
30
|
# Returns an array with cast characters
|
31
31
|
def cast_characters
|
32
|
-
document.search("table.cast td.char").map { |link| link.
|
32
|
+
document.search("table.cast td.char").map { |link| link.content.strip } rescue []
|
33
33
|
end
|
34
34
|
|
35
35
|
# Returns an array with cast members and characters
|
@@ -40,58 +40,58 @@ module Imdb
|
|
40
40
|
memb_char[i] = "#{self.cast_members[i]} #{sep} #{self.cast_characters[i]}"
|
41
41
|
i=i+1
|
42
42
|
}
|
43
|
-
|
43
|
+
memb_char
|
44
44
|
end
|
45
45
|
|
46
46
|
# Returns the name of the director
|
47
47
|
def director
|
48
|
-
document.search("h5[text()^='Director'] ~ a").map { |link| link.
|
48
|
+
document.search("h5[text()^='Director'] ~ div a").map { |link| link.content.strip } rescue []
|
49
49
|
end
|
50
50
|
|
51
51
|
# Returns the url to the "Watch a trailer" page
|
52
52
|
def trailer_url
|
53
|
-
'http://imdb.com' + document.at("a[@href
|
53
|
+
'http://imdb.com' + document.at("a[@href*='/video/screenplay/']")["href"] rescue nil
|
54
54
|
end
|
55
55
|
|
56
56
|
# Returns an array of genres (as strings)
|
57
57
|
def genres
|
58
|
-
document.search("h5[text()='Genre:'] ~ a[@href
|
58
|
+
document.search("h5[text()='Genre:'] ~ div a[@href*='/Sections/Genres/']").map { |link| link.content.strip } rescue []
|
59
59
|
end
|
60
60
|
|
61
61
|
# Returns an array of languages as strings.
|
62
62
|
def languages
|
63
|
-
document.search("h5[text()='Language:'] ~ a[@href
|
63
|
+
document.search("h5[text()='Language:'] ~ div a[@href*='/language/']").map { |link| link.content.strip } rescue []
|
64
64
|
end
|
65
65
|
|
66
66
|
# Returns an array of countries as strings.
|
67
67
|
def countries
|
68
|
-
document.search("h5[text()='Country:'] ~ a[@href
|
68
|
+
document.search("h5[text()='Country:'] ~ div a[@href*='/country/']").map { |link| link.content.strip } rescue []
|
69
69
|
end
|
70
70
|
|
71
71
|
# Returns the duration of the movie in minutes as an integer.
|
72
72
|
def length
|
73
|
-
document.
|
73
|
+
document.at("h5[text()='Runtime:'] ~ div").content[/\d+ min/].to_i rescue nil
|
74
74
|
end
|
75
75
|
|
76
76
|
# Returns the company
|
77
77
|
def company
|
78
|
-
document.search("h5[text()='Company:'] ~ a[@href
|
78
|
+
document.search("h5[text()='Company:'] ~ div a[@href*='/company/']").map { |link| link.content.strip }.first rescue nil
|
79
79
|
end
|
80
80
|
|
81
81
|
# Returns a string containing the plot.
|
82
82
|
def plot
|
83
|
-
sanitize_plot(document.
|
83
|
+
sanitize_plot(document.at("h5[text()='Plot:'] ~ div").content) rescue nil
|
84
84
|
end
|
85
85
|
|
86
86
|
# Returns a string containing the plot summary
|
87
87
|
def plot_synopsis
|
88
|
-
doc =
|
89
|
-
doc.
|
88
|
+
doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :synopsis))
|
89
|
+
doc.at("div[@id='swiki.2.1']").content.strip rescue nil
|
90
90
|
end
|
91
91
|
|
92
92
|
def plot_summary
|
93
|
-
doc =
|
94
|
-
doc.
|
93
|
+
doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :plotsummary))
|
94
|
+
doc.at("p.plotSummary").inner_html.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
|
95
95
|
end
|
96
96
|
|
97
97
|
# Returns a string containing the URL to the movie poster.
|
@@ -107,22 +107,22 @@ module Imdb
|
|
107
107
|
|
108
108
|
# Returns a float containing the average user rating
|
109
109
|
def rating
|
110
|
-
document.at(".starbar-meta b").
|
110
|
+
document.at(".starbar-meta b").content.split('/').first.strip.to_f rescue nil
|
111
111
|
end
|
112
112
|
|
113
113
|
# Returns an int containing the number of user ratings
|
114
114
|
def votes
|
115
|
-
document.at("#tn15rating .tn15more").
|
115
|
+
document.at("#tn15rating .tn15more").content.strip.gsub(/[^\d+]/, "").to_i rescue nil
|
116
116
|
end
|
117
117
|
|
118
118
|
# Returns a string containing the tagline
|
119
119
|
def tagline
|
120
|
-
document.search("h5[text()='Tagline:'] ~ div").first.
|
120
|
+
document.search("h5[text()='Tagline:'] ~ div").first.inner_html.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
121
121
|
end
|
122
122
|
|
123
123
|
# Returns a string containing the mpaa rating and reason for rating
|
124
124
|
def mpaa_rating
|
125
|
-
document.
|
125
|
+
document.at("//a[starts-with(.,'MPAA')]/../following-sibling::*").content.strip rescue nil
|
126
126
|
end
|
127
127
|
|
128
128
|
# Returns a string containing the title
|
@@ -130,25 +130,34 @@ module Imdb
|
|
130
130
|
if @title && !force_refresh
|
131
131
|
@title
|
132
132
|
else
|
133
|
-
@title = document.at("h1").
|
133
|
+
@title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html rescue nil
|
134
134
|
end
|
135
135
|
end
|
136
136
|
|
137
137
|
# Returns an integer containing the year (CCYY) the movie was released in.
|
138
138
|
def year
|
139
|
-
document.
|
139
|
+
document.at("a[@href^='/year/']").content.to_i rescue nil
|
140
140
|
end
|
141
141
|
|
142
142
|
# Returns release date for the movie.
|
143
143
|
def release_date
|
144
|
-
sanitize_release_date(document.
|
144
|
+
sanitize_release_date(document.at("h5[text()*='Release Date'] ~ div").content) rescue nil
|
145
|
+
end
|
146
|
+
|
147
|
+
# Returns filming locations from imdb_url/locations
|
148
|
+
def filming_locations
|
149
|
+
locations_document.search("#filming_locations_content .soda dt a").map { |link| link.content.strip } rescue []
|
145
150
|
end
|
146
151
|
|
147
152
|
private
|
148
153
|
|
149
|
-
# Returns a new
|
154
|
+
# Returns a new Nokogiri document for parsing.
|
150
155
|
def document
|
151
|
-
@document ||=
|
156
|
+
@document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id))
|
157
|
+
end
|
158
|
+
|
159
|
+
def locations_document
|
160
|
+
@locations_document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id, "locations"))
|
152
161
|
end
|
153
162
|
|
154
163
|
# Use HTTParty to fetch the raw HTML for this movie.
|
@@ -166,26 +175,17 @@ module Imdb
|
|
166
175
|
end
|
167
176
|
|
168
177
|
def sanitize_plot(the_plot)
|
169
|
-
the_plot = the_plot.imdb_strip_tags
|
170
|
-
|
171
178
|
the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
|
172
179
|
the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
|
173
|
-
the_plot = the_plot.gsub(
|
174
|
-
the_plot = the_plot.gsub(/see|more/i, "")
|
180
|
+
the_plot = the_plot.gsub(/see|more|\u00BB|\u00A0/i, "")
|
175
181
|
the_plot = the_plot.gsub(/\|/i, "")
|
176
|
-
|
177
|
-
the_plot = the_plot.strip.imdb_unescape_html
|
182
|
+
the_plot.strip
|
178
183
|
end
|
179
184
|
|
180
185
|
def sanitize_release_date(the_release_date)
|
181
|
-
the_release_date
|
182
|
-
the_release_date = the_release_date.gsub(/ |»/i, "")
|
183
|
-
the_release_date = the_release_date.gsub(/see|more/i, "")
|
184
|
-
|
185
|
-
the_release_date = the_release_date.strip.imdb_unescape_html
|
186
|
+
the_release_date.gsub(/see|more|\u00BB|\u00A0/i, "").strip
|
186
187
|
end
|
187
188
|
|
188
189
|
end # Movie
|
189
190
|
|
190
191
|
end # Imdb
|
191
|
-
|
data/lib/imdb/episode.rb
CHANGED
@@ -11,13 +11,13 @@ module Imdb
|
|
11
11
|
|
12
12
|
# Return the original air date for this episode
|
13
13
|
def air_date
|
14
|
-
document.
|
14
|
+
document.at("h5[text()*='Original Air Date'] ~ div").content.strip.split("\n").first.strip rescue nil
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def document
|
20
|
-
@document ||=
|
20
|
+
@document ||= Nokogiri::HTML(open(@url))
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
data/lib/imdb/movie_list.rb
CHANGED
@@ -7,19 +7,15 @@ module Imdb
|
|
7
7
|
|
8
8
|
private
|
9
9
|
def parse_movies
|
10
|
-
document.search(
|
11
|
-
element.
|
12
|
-
element.
|
10
|
+
document.search("a[@href^='/title/tt']").reject do |element|
|
11
|
+
element.inner_html.imdb_strip_tags.empty? ||
|
12
|
+
element.inner_html.imdb_strip_tags == "X" ||
|
13
|
+
element.parent.inner_html =~ /media from/i
|
13
14
|
end.map do |element|
|
14
15
|
id = element['href'][/\d+/]
|
15
16
|
|
16
|
-
data = element.parent.
|
17
|
-
|
18
|
-
title = data[1]
|
19
|
-
else
|
20
|
-
title = data[0]
|
21
|
-
end
|
22
|
-
|
17
|
+
data = element.parent.inner_html.split("<br />")
|
18
|
+
title = (!data[0].nil? && !data[1].nil? && data[0] =~ /img/) ? data[1] : data[0]
|
23
19
|
title = title.imdb_strip_tags.imdb_unescape_html
|
24
20
|
title.gsub!(/\s+\(\d\d\d\d\)$/, '')
|
25
21
|
|
data/lib/imdb/search.rb
CHANGED
@@ -23,7 +23,7 @@ module Imdb
|
|
23
23
|
|
24
24
|
private
|
25
25
|
def document
|
26
|
-
@document ||=
|
26
|
+
@document ||= Nokogiri::HTML(Imdb::Search.query(@query))
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.query(query)
|
@@ -31,15 +31,15 @@ module Imdb
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def parse_movie
|
34
|
-
id
|
35
|
-
title
|
34
|
+
id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
|
35
|
+
title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html
|
36
36
|
|
37
37
|
[Imdb::Movie.new(id, title)]
|
38
38
|
end
|
39
39
|
|
40
40
|
# Returns true if the search yielded only one result, an exact match
|
41
41
|
def exact_match?
|
42
|
-
!document.at("
|
42
|
+
!document.at("table[@id='title-overview-widget-layout']").nil?
|
43
43
|
end
|
44
44
|
|
45
45
|
end # Search
|
data/lib/imdb/season.rb
CHANGED
@@ -15,12 +15,12 @@ module Imdb
|
|
15
15
|
def episodes
|
16
16
|
@episodes = []
|
17
17
|
|
18
|
-
document.search("div.eplist a[@itemprop*=name]").each_with_index do |link, index|
|
18
|
+
document.search("div.eplist a[@itemprop*='name']").each_with_index do |link, index|
|
19
19
|
@episodes << Imdb::Episode.new(
|
20
20
|
link[:href].scan(/\d+/).first,
|
21
21
|
@season_number,
|
22
22
|
index + 1,
|
23
|
-
link.
|
23
|
+
link.content.strip
|
24
24
|
)
|
25
25
|
end
|
26
26
|
|
@@ -30,7 +30,7 @@ module Imdb
|
|
30
30
|
private
|
31
31
|
|
32
32
|
def document
|
33
|
-
@document ||=
|
33
|
+
@document ||= Nokogiri::HTML(open(@url))
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|