imdb 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -0
  3. data/MIT-LICENSE +21 -0
  4. data/README.md +97 -0
  5. data/imdb.gemspec +1 -1
  6. data/lib/imdb.rb +1 -1
  7. data/lib/imdb/base.rb +36 -36
  8. data/lib/imdb/episode.rb +2 -2
  9. data/lib/imdb/movie_list.rb +6 -10
  10. data/lib/imdb/search.rb +4 -4
  11. data/lib/imdb/season.rb +3 -3
  12. data/lib/imdb/serie.rb +2 -3
  13. data/lib/imdb/top_250.rb +1 -1
  14. data/lib/imdb/version.rb +1 -1
  15. data/spec/fixtures/locations +1167 -0
  16. data/spec/fixtures/plotsummary +1063 -897
  17. data/spec/fixtures/search_kannethirey_thondrinal +344 -346
  18. data/spec/fixtures/search_killed_wife +344 -346
  19. data/spec/fixtures/search_star_trek +344 -346
  20. data/spec/fixtures/synopsis +457 -446
  21. data/spec/fixtures/thewalkingdead-s1 +740 -555
  22. data/spec/fixtures/thewalkingdead-s1e2 +505 -480
  23. data/spec/fixtures/top_250 +10749 -1082
  24. data/spec/fixtures/tt0036855 +553 -519
  25. data/spec/fixtures/tt0083987 +563 -512
  26. data/spec/fixtures/tt0095016 +518 -536
  27. data/spec/fixtures/tt0110912 +572 -512
  28. data/spec/fixtures/tt0111161 +559 -508
  29. data/spec/fixtures/tt0117731 +542 -510
  30. data/spec/fixtures/tt0166222 +969 -872
  31. data/spec/fixtures/tt0242653 +530 -524
  32. data/spec/fixtures/tt0330508 +845 -717
  33. data/spec/fixtures/tt0468569 +533 -543
  34. data/spec/fixtures/tt1401252 +472 -449
  35. data/spec/fixtures/tt1520211 +562 -542
  36. data/spec/imdb/movie_spec.rb +13 -5
  37. data/spec/imdb/search_spec.rb +4 -4
  38. data/spec/imdb/series_spec.rb +1 -1
  39. data/spec/imdb/top_250_spec.rb +5 -5
  40. data/spec/spec_helper.rb +1 -0
  41. metadata +12 -9
  42. data/README.rdoc +0 -114
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ca782282c0207181d57c8732b2028c46e91854fa
4
- data.tar.gz: 93857f4686f71950e2bbb514805e87ab0c075de4
3
+ metadata.gz: 2c84d21278933772df9156ec48a141b0d7b7375e
4
+ data.tar.gz: 1f54dd630b773de99a1703ad8fbeb379a7d5cbf3
5
5
  SHA512:
6
- metadata.gz: b64b0288a5ee43eced42d0fe5ebbd6d4f9d4057d3941924f08399e8235a3467d4f950c5186e58b1f1deee95f90413b74b2dc9cfa295f9763c0f7e33ccf093618
7
- data.tar.gz: 423bb452e2c11f1c74925c5ff591cc1fb11ed197242f65284166e6f2b9864106efcfcdb49648ec0064e313d3b4f697cfbd24b8fe399ed74206eee08f9054a1e9
6
+ metadata.gz: ca8f765c422677819391031671bbd0467c7a280511435dda5eee4426f8291b6809c014d63c9662ff89813cb04a065c6a64e62ef4eae60b878a15e9a60ecd2bcd
7
+ data.tar.gz: c6e895315a23977569618ee9448128ebca33290568e94909241e4b2be81ca8e74ad95145a6f1f6d0d1ef3fd4c8cf3a525e905458ea2dec0778660b5139f53903
@@ -6,12 +6,18 @@ notifications:
6
6
  rvm:
7
7
  - 1.9.2
8
8
  - 1.9.3
9
+ - 2.0.0
9
10
  - ruby-head
10
11
  - rbx-19mode
12
+ - 1.8.7
13
+ - ree
11
14
 
12
15
  matrix:
13
16
  allow_failures:
14
17
  - rvm: rbx-19mode
18
+ - rvm: ruby-head
19
+ - rvm: 1.8.7
20
+ - rvm: ree
15
21
 
16
22
  script:
17
23
  - "bundle exec rake spec"
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2009-2013 Ariejan de Vroom
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
@@ -0,0 +1,97 @@
1
+ # imdb [![Build Status](https://travis-ci.org/ariejan/imdb.png?branch=master)](https://travis-ci.org/ariejan/imdb)
2
+
3
+ * [Sources](https://github.com/ariejan/imdb)
4
+ * [Issues](https://github.com/ariejan/imdb/issues)
5
+
6
+ ## Description
7
+
8
+ This gem allows you to easy access publicly available data from IMDB.
9
+
10
+ ## Features
11
+
12
+ IMDB currently features the following:
13
+
14
+ * Querying details movie info
15
+ * Searching for movies
16
+ * Command-line utility included.
17
+
18
+ ## Synopsis
19
+
20
+ ### Movies:
21
+
22
+ i = Imdb::Movie.new("0095016")
23
+
24
+ i.title
25
+ #=> "Die Hard"
26
+
27
+ i.cast_members.first
28
+ #=> "Bruce Willis"
29
+
30
+ ### Series:
31
+
32
+ serie = Imdb::Serie.new("1520211")
33
+
34
+ serie.title
35
+ #=> "\"The Walking Dead\""
36
+
37
+ serie.rating
38
+ #=> 8.8
39
+
40
+ serie.seasons.size
41
+ #=> 3
42
+
43
+ serie.seaon(1).episodes.size
44
+ #=> 6
45
+
46
+ series.season(1).episode(2).title
47
+ #=> "Guts"
48
+
49
+ ### Searching:
50
+
51
+ i = Imdb::Search.new("Star Trek")
52
+
53
+ i.movies.size
54
+ #=> 97
55
+
56
+ ### Using the command line utility is quite easy:
57
+
58
+ $ imdb Star Trek
59
+
60
+ or get movie info
61
+
62
+ $ imdb 0095016
63
+
64
+ ## Installation
65
+
66
+ gem install imdb
67
+
68
+ ## Running Tests
69
+
70
+ You'll need rspec and fakeweb installed to run the specs.
71
+
72
+ $ bundle install
73
+ $ bundle exec rake spec
74
+
75
+ Although not recommended, you may run the specs against the live imdb.com
76
+ website. This will make a lot of calls to imdb.com, use it wisely.
77
+
78
+ $ LIVE_TEST=true bundle exec rake spec
79
+
80
+ To update the packaged fixtures files with actual imdb.com samples, use the
81
+ `fixtures:refresh` rake task
82
+
83
+ $ bundle exec rake fixtures:refresh
84
+
85
+ ## Disclaimer
86
+
87
+ I, or any developer who contributed to this project, accepts any kind of
88
+ liabilty for your use of this library.
89
+
90
+ IMDB does not permit use of its data by third parties without their consent.
91
+
92
+ Using this library for anything other than limited personal use may result
93
+ in an IP ban to the IMDB website.
94
+
95
+ ## License
96
+
97
+ See [MIT-LICENSE](https://github.com/ariejan/imdb/blob/master/MIT-LICENSE)
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_dependency 'hpricot', '~> 0.8.6'
22
+ s.add_dependency 'nokogiri', '>= 1.6.0'
23
23
 
24
24
  s.add_development_dependency 'rake', '~> 10.0.3'
25
25
  s.add_development_dependency 'rspec', '~> 2.13.0'
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)) unless
3
3
 
4
4
  require 'open-uri'
5
5
  require 'rubygems'
6
- require 'hpricot'
6
+ require 'nokogiri'
7
7
 
8
8
  require 'imdb/base'
9
9
  require 'imdb/movie'
@@ -20,7 +20,7 @@ module Imdb
20
20
 
21
21
  # Returns an array with cast members
22
22
  def cast_members
23
- document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
23
+ document.search("table.cast td.nm a").map { |link| link.content.strip } rescue []
24
24
  end
25
25
 
26
26
  def cast_member_ids
@@ -29,7 +29,7 @@ module Imdb
29
29
 
30
30
  # Returns an array with cast characters
31
31
  def cast_characters
32
- document.search("table.cast td.char").map { |link| link.innerText } rescue []
32
+ document.search("table.cast td.char").map { |link| link.content.strip } rescue []
33
33
  end
34
34
 
35
35
  # Returns an array with cast members and characters
@@ -40,58 +40,58 @@ module Imdb
40
40
  memb_char[i] = "#{self.cast_members[i]} #{sep} #{self.cast_characters[i]}"
41
41
  i=i+1
42
42
  }
43
- return memb_char
43
+ memb_char
44
44
  end
45
45
 
46
46
  # Returns the name of the director
47
47
  def director
48
- document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
48
+ document.search("h5[text()^='Director'] ~ div a").map { |link| link.content.strip } rescue []
49
49
  end
50
50
 
51
51
  # Returns the url to the "Watch a trailer" page
52
52
  def trailer_url
53
- 'http://imdb.com' + document.at("a[@href*=/video/screenplay/]")["href"] rescue nil
53
+ 'http://imdb.com' + document.at("a[@href*='/video/screenplay/']")["href"] rescue nil
54
54
  end
55
55
 
56
56
  # Returns an array of genres (as strings)
57
57
  def genres
58
- document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
58
+ document.search("h5[text()='Genre:'] ~ div a[@href*='/Sections/Genres/']").map { |link| link.content.strip } rescue []
59
59
  end
60
60
 
61
61
  # Returns an array of languages as strings.
62
62
  def languages
63
- document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
63
+ document.search("h5[text()='Language:'] ~ div a[@href*='/language/']").map { |link| link.content.strip } rescue []
64
64
  end
65
65
 
66
66
  # Returns an array of countries as strings.
67
67
  def countries
68
- document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
68
+ document.search("h5[text()='Country:'] ~ div a[@href*='/country/']").map { |link| link.content.strip } rescue []
69
69
  end
70
70
 
71
71
  # Returns the duration of the movie in minutes as an integer.
72
72
  def length
73
- document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
73
+ document.at("h5[text()='Runtime:'] ~ div").content[/\d+ min/].to_i rescue nil
74
74
  end
75
75
 
76
76
  # Returns the company
77
77
  def company
78
- document.search("h5[text()='Company:'] ~ a[@href*=/company/']").map { |link| link.innerHTML.strip.imdb_unescape_html }.first rescue nil
78
+ document.search("h5[text()='Company:'] ~ div a[@href*='/company/']").map { |link| link.content.strip }.first rescue nil
79
79
  end
80
80
 
81
81
  # Returns a string containing the plot.
82
82
  def plot
83
- sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
83
+ sanitize_plot(document.at("h5[text()='Plot:'] ~ div").content) rescue nil
84
84
  end
85
85
 
86
86
  # Returns a string containing the plot summary
87
87
  def plot_synopsis
88
- doc = Hpricot(Imdb::Movie.find_by_id(@id, :synopsis))
89
- doc.search("div[@id='swiki.2.1']").innerHTML.strip.imdb_unescape_html.imdb_strip_tags rescue nil
88
+ doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :synopsis))
89
+ doc.at("div[@id='swiki.2.1']").content.strip rescue nil
90
90
  end
91
91
 
92
92
  def plot_summary
93
- doc = Hpricot(Imdb::Movie.find_by_id(@id, :plotsummary))
94
- doc.search("p[@class='plotpar']").first.innerHTML.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
93
+ doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :plotsummary))
94
+ doc.at("p.plotSummary").inner_html.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
95
95
  end
96
96
 
97
97
  # Returns a string containing the URL to the movie poster.
@@ -107,22 +107,22 @@ module Imdb
107
107
 
108
108
  # Returns a float containing the average user rating
109
109
  def rating
110
- document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
110
+ document.at(".starbar-meta b").content.split('/').first.strip.to_f rescue nil
111
111
  end
112
112
 
113
113
  # Returns an int containing the number of user ratings
114
114
  def votes
115
- document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
115
+ document.at("#tn15rating .tn15more").content.strip.gsub(/[^\d+]/, "").to_i rescue nil
116
116
  end
117
117
 
118
118
  # Returns a string containing the tagline
119
119
  def tagline
120
- document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
120
+ document.search("h5[text()='Tagline:'] ~ div").first.inner_html.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
121
121
  end
122
122
 
123
123
  # Returns a string containing the mpaa rating and reason for rating
124
124
  def mpaa_rating
125
- document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
125
+ document.at("//a[starts-with(.,'MPAA')]/../following-sibling::*").content.strip rescue nil
126
126
  end
127
127
 
128
128
  # Returns a string containing the title
@@ -130,25 +130,34 @@ module Imdb
130
130
  if @title && !force_refresh
131
131
  @title
132
132
  else
133
- @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
133
+ @title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html rescue nil
134
134
  end
135
135
  end
136
136
 
137
137
  # Returns an integer containing the year (CCYY) the movie was released in.
138
138
  def year
139
- document.search('a[@href^="/year/"]').innerHTML.to_i
139
+ document.at("a[@href^='/year/']").content.to_i rescue nil
140
140
  end
141
141
 
142
142
  # Returns release date for the movie.
143
143
  def release_date
144
- sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
144
+ sanitize_release_date(document.at("h5[text()*='Release Date'] ~ div").content) rescue nil
145
+ end
146
+
147
+ # Returns filming locations from imdb_url/locations
148
+ def filming_locations
149
+ locations_document.search("#filming_locations_content .soda dt a").map { |link| link.content.strip } rescue []
145
150
  end
146
151
 
147
152
  private
148
153
 
149
- # Returns a new Hpricot document for parsing.
154
+ # Returns a new Nokogiri document for parsing.
150
155
  def document
151
- @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
156
+ @document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id))
157
+ end
158
+
159
+ def locations_document
160
+ @locations_document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id, "locations"))
152
161
  end
153
162
 
154
163
  # Use HTTParty to fetch the raw HTML for this movie.
@@ -166,26 +175,17 @@ module Imdb
166
175
  end
167
176
 
168
177
  def sanitize_plot(the_plot)
169
- the_plot = the_plot.imdb_strip_tags
170
-
171
178
  the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
172
179
  the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
173
- the_plot = the_plot.gsub(/&nbsp;|&raquo;/i, "")
174
- the_plot = the_plot.gsub(/see|more/i, "")
180
+ the_plot = the_plot.gsub(/see|more|\u00BB|\u00A0/i, "")
175
181
  the_plot = the_plot.gsub(/\|/i, "")
176
-
177
- the_plot = the_plot.strip.imdb_unescape_html
182
+ the_plot.strip
178
183
  end
179
184
 
180
185
  def sanitize_release_date(the_release_date)
181
- the_release_date = the_release_date.gsub(/<a.*a>/,"")
182
- the_release_date = the_release_date.gsub(/&nbsp;|&raquo;/i, "")
183
- the_release_date = the_release_date.gsub(/see|more/i, "")
184
-
185
- the_release_date = the_release_date.strip.imdb_unescape_html
186
+ the_release_date.gsub(/see|more|\u00BB|\u00A0/i, "").strip
186
187
  end
187
188
 
188
189
  end # Movie
189
190
 
190
191
  end # Imdb
191
-
@@ -11,13 +11,13 @@ module Imdb
11
11
 
12
12
  # Return the original air date for this episode
13
13
  def air_date
14
- document.search('h5[text()*=Original Air Date]').first.next_sibling.innerHTML.to_s.strip.split("\n").first.strip rescue nil
14
+ document.at("h5[text()*='Original Air Date'] ~ div").content.strip.split("\n").first.strip rescue nil
15
15
  end
16
16
 
17
17
  private
18
18
 
19
19
  def document
20
- @document ||= Hpricot(open(@url))
20
+ @document ||= Nokogiri::HTML(open(@url))
21
21
  end
22
22
  end
23
23
  end
@@ -7,19 +7,15 @@ module Imdb
7
7
 
8
8
  private
9
9
  def parse_movies
10
- document.search('a[@href^="/title/tt"]').reject do |element|
11
- element.innerHTML.imdb_strip_tags.empty? ||
12
- element.parent.innerHTML =~ /media from/i
10
+ document.search("a[@href^='/title/tt']").reject do |element|
11
+ element.inner_html.imdb_strip_tags.empty? ||
12
+ element.inner_html.imdb_strip_tags == "X" ||
13
+ element.parent.inner_html =~ /media from/i
13
14
  end.map do |element|
14
15
  id = element['href'][/\d+/]
15
16
 
16
- data = element.parent.innerHTML.split("<br />")
17
- if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
18
- title = data[1]
19
- else
20
- title = data[0]
21
- end
22
-
17
+ data = element.parent.inner_html.split("<br />")
18
+ title = (!data[0].nil? && !data[1].nil? && data[0] =~ /img/) ? data[1] : data[0]
23
19
  title = title.imdb_strip_tags.imdb_unescape_html
24
20
  title.gsub!(/\s+\(\d\d\d\d\)$/, '')
25
21
 
@@ -23,7 +23,7 @@ module Imdb
23
23
 
24
24
  private
25
25
  def document
26
- @document ||= Hpricot(Imdb::Search.query(@query))
26
+ @document ||= Nokogiri::HTML(Imdb::Search.query(@query))
27
27
  end
28
28
 
29
29
  def self.query(query)
@@ -31,15 +31,15 @@ module Imdb
31
31
  end
32
32
 
33
33
  def parse_movie
34
- id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
35
- title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
34
+ id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
35
+ title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html
36
36
 
37
37
  [Imdb::Movie.new(id, title)]
38
38
  end
39
39
 
40
40
  # Returns true if the search yielded only one result, an exact match
41
41
  def exact_match?
42
- !document.at("//table[@id='title-overview-widget-layout']").nil?
42
+ !document.at("table[@id='title-overview-widget-layout']").nil?
43
43
  end
44
44
 
45
45
  end # Search
@@ -15,12 +15,12 @@ module Imdb
15
15
  def episodes
16
16
  @episodes = []
17
17
 
18
- document.search("div.eplist a[@itemprop*=name]").each_with_index do |link, index|
18
+ document.search("div.eplist a[@itemprop*='name']").each_with_index do |link, index|
19
19
  @episodes << Imdb::Episode.new(
20
20
  link[:href].scan(/\d+/).first,
21
21
  @season_number,
22
22
  index + 1,
23
- link.innerHTML.strip.imdb_unescape_html
23
+ link.content.strip
24
24
  )
25
25
  end
26
26
 
@@ -30,7 +30,7 @@ module Imdb
30
30
  private
31
31
 
32
32
  def document
33
- @document ||= Hpricot(open(@url))
33
+ @document ||= Nokogiri::HTML(open(@url))
34
34
  end
35
35
  end
36
36
  end