imdb 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -0
  3. data/MIT-LICENSE +21 -0
  4. data/README.md +97 -0
  5. data/imdb.gemspec +1 -1
  6. data/lib/imdb.rb +1 -1
  7. data/lib/imdb/base.rb +36 -36
  8. data/lib/imdb/episode.rb +2 -2
  9. data/lib/imdb/movie_list.rb +6 -10
  10. data/lib/imdb/search.rb +4 -4
  11. data/lib/imdb/season.rb +3 -3
  12. data/lib/imdb/serie.rb +2 -3
  13. data/lib/imdb/top_250.rb +1 -1
  14. data/lib/imdb/version.rb +1 -1
  15. data/spec/fixtures/locations +1167 -0
  16. data/spec/fixtures/plotsummary +1063 -897
  17. data/spec/fixtures/search_kannethirey_thondrinal +344 -346
  18. data/spec/fixtures/search_killed_wife +344 -346
  19. data/spec/fixtures/search_star_trek +344 -346
  20. data/spec/fixtures/synopsis +457 -446
  21. data/spec/fixtures/thewalkingdead-s1 +740 -555
  22. data/spec/fixtures/thewalkingdead-s1e2 +505 -480
  23. data/spec/fixtures/top_250 +10749 -1082
  24. data/spec/fixtures/tt0036855 +553 -519
  25. data/spec/fixtures/tt0083987 +563 -512
  26. data/spec/fixtures/tt0095016 +518 -536
  27. data/spec/fixtures/tt0110912 +572 -512
  28. data/spec/fixtures/tt0111161 +559 -508
  29. data/spec/fixtures/tt0117731 +542 -510
  30. data/spec/fixtures/tt0166222 +969 -872
  31. data/spec/fixtures/tt0242653 +530 -524
  32. data/spec/fixtures/tt0330508 +845 -717
  33. data/spec/fixtures/tt0468569 +533 -543
  34. data/spec/fixtures/tt1401252 +472 -449
  35. data/spec/fixtures/tt1520211 +562 -542
  36. data/spec/imdb/movie_spec.rb +13 -5
  37. data/spec/imdb/search_spec.rb +4 -4
  38. data/spec/imdb/series_spec.rb +1 -1
  39. data/spec/imdb/top_250_spec.rb +5 -5
  40. data/spec/spec_helper.rb +1 -0
  41. metadata +12 -9
  42. data/README.rdoc +0 -114
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ca782282c0207181d57c8732b2028c46e91854fa
4
- data.tar.gz: 93857f4686f71950e2bbb514805e87ab0c075de4
3
+ metadata.gz: 2c84d21278933772df9156ec48a141b0d7b7375e
4
+ data.tar.gz: 1f54dd630b773de99a1703ad8fbeb379a7d5cbf3
5
5
  SHA512:
6
- metadata.gz: b64b0288a5ee43eced42d0fe5ebbd6d4f9d4057d3941924f08399e8235a3467d4f950c5186e58b1f1deee95f90413b74b2dc9cfa295f9763c0f7e33ccf093618
7
- data.tar.gz: 423bb452e2c11f1c74925c5ff591cc1fb11ed197242f65284166e6f2b9864106efcfcdb49648ec0064e313d3b4f697cfbd24b8fe399ed74206eee08f9054a1e9
6
+ metadata.gz: ca8f765c422677819391031671bbd0467c7a280511435dda5eee4426f8291b6809c014d63c9662ff89813cb04a065c6a64e62ef4eae60b878a15e9a60ecd2bcd
7
+ data.tar.gz: c6e895315a23977569618ee9448128ebca33290568e94909241e4b2be81ca8e74ad95145a6f1f6d0d1ef3fd4c8cf3a525e905458ea2dec0778660b5139f53903
@@ -6,12 +6,18 @@ notifications:
6
6
  rvm:
7
7
  - 1.9.2
8
8
  - 1.9.3
9
+ - 2.0.0
9
10
  - ruby-head
10
11
  - rbx-19mode
12
+ - 1.8.7
13
+ - ree
11
14
 
12
15
  matrix:
13
16
  allow_failures:
14
17
  - rvm: rbx-19mode
18
+ - rvm: ruby-head
19
+ - rvm: 1.8.7
20
+ - rvm: ree
15
21
 
16
22
  script:
17
23
  - "bundle exec rake spec"
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2009-2013 Ariejan de Vroom
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
@@ -0,0 +1,97 @@
1
+ # imdb [![Build Status](https://travis-ci.org/ariejan/imdb.png?branch=master)](https://travis-ci.org/ariejan/imdb)
2
+
3
+ * [Sources](https://github.com/ariejan/imdb)
4
+ * [Issues](https://github.com/ariejan/imdb/issues)
5
+
6
+ ## Description
7
+
8
+ This gem allows you to easy access publicly available data from IMDB.
9
+
10
+ ## Features
11
+
12
+ IMDB currently features the following:
13
+
14
+ * Querying details movie info
15
+ * Searching for movies
16
+ * Command-line utility included.
17
+
18
+ ## Synopsis
19
+
20
+ ### Movies:
21
+
22
+ i = Imdb::Movie.new("0095016")
23
+
24
+ i.title
25
+ #=> "Die Hard"
26
+
27
+ i.cast_members.first
28
+ #=> "Bruce Willis"
29
+
30
+ ### Series:
31
+
32
+ serie = Imdb::Serie.new("1520211")
33
+
34
+ serie.title
35
+ #=> "\"The Walking Dead\""
36
+
37
+ serie.rating
38
+ #=> 8.8
39
+
40
+ serie.seasons.size
41
+ #=> 3
42
+
43
+ serie.seaon(1).episodes.size
44
+ #=> 6
45
+
46
+ series.season(1).episode(2).title
47
+ #=> "Guts"
48
+
49
+ ### Searching:
50
+
51
+ i = Imdb::Search.new("Star Trek")
52
+
53
+ i.movies.size
54
+ #=> 97
55
+
56
+ ### Using the command line utility is quite easy:
57
+
58
+ $ imdb Star Trek
59
+
60
+ or get movie info
61
+
62
+ $ imdb 0095016
63
+
64
+ ## Installation
65
+
66
+ gem install imdb
67
+
68
+ ## Running Tests
69
+
70
+ You'll need rspec and fakeweb installed to run the specs.
71
+
72
+ $ bundle install
73
+ $ bundle exec rake spec
74
+
75
+ Although not recommended, you may run the specs against the live imdb.com
76
+ website. This will make a lot of calls to imdb.com, use it wisely.
77
+
78
+ $ LIVE_TEST=true bundle exec rake spec
79
+
80
+ To update the packaged fixtures files with actual imdb.com samples, use the
81
+ `fixtures:refresh` rake task
82
+
83
+ $ bundle exec rake fixtures:refresh
84
+
85
+ ## Disclaimer
86
+
87
+ I, or any developer who contributed to this project, accepts any kind of
88
+ liabilty for your use of this library.
89
+
90
+ IMDB does not permit use of its data by third parties without their consent.
91
+
92
+ Using this library for anything other than limited personal use may result
93
+ in an IP ban to the IMDB website.
94
+
95
+ ## License
96
+
97
+ See [MIT-LICENSE](https://github.com/ariejan/imdb/blob/master/MIT-LICENSE)
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_dependency 'hpricot', '~> 0.8.6'
22
+ s.add_dependency 'nokogiri', '>= 1.6.0'
23
23
 
24
24
  s.add_development_dependency 'rake', '~> 10.0.3'
25
25
  s.add_development_dependency 'rspec', '~> 2.13.0'
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)) unless
3
3
 
4
4
  require 'open-uri'
5
5
  require 'rubygems'
6
- require 'hpricot'
6
+ require 'nokogiri'
7
7
 
8
8
  require 'imdb/base'
9
9
  require 'imdb/movie'
@@ -20,7 +20,7 @@ module Imdb
20
20
 
21
21
  # Returns an array with cast members
22
22
  def cast_members
23
- document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
23
+ document.search("table.cast td.nm a").map { |link| link.content.strip } rescue []
24
24
  end
25
25
 
26
26
  def cast_member_ids
@@ -29,7 +29,7 @@ module Imdb
29
29
 
30
30
  # Returns an array with cast characters
31
31
  def cast_characters
32
- document.search("table.cast td.char").map { |link| link.innerText } rescue []
32
+ document.search("table.cast td.char").map { |link| link.content.strip } rescue []
33
33
  end
34
34
 
35
35
  # Returns an array with cast members and characters
@@ -40,58 +40,58 @@ module Imdb
40
40
  memb_char[i] = "#{self.cast_members[i]} #{sep} #{self.cast_characters[i]}"
41
41
  i=i+1
42
42
  }
43
- return memb_char
43
+ memb_char
44
44
  end
45
45
 
46
46
  # Returns the name of the director
47
47
  def director
48
- document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
48
+ document.search("h5[text()^='Director'] ~ div a").map { |link| link.content.strip } rescue []
49
49
  end
50
50
 
51
51
  # Returns the url to the "Watch a trailer" page
52
52
  def trailer_url
53
- 'http://imdb.com' + document.at("a[@href*=/video/screenplay/]")["href"] rescue nil
53
+ 'http://imdb.com' + document.at("a[@href*='/video/screenplay/']")["href"] rescue nil
54
54
  end
55
55
 
56
56
  # Returns an array of genres (as strings)
57
57
  def genres
58
- document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
58
+ document.search("h5[text()='Genre:'] ~ div a[@href*='/Sections/Genres/']").map { |link| link.content.strip } rescue []
59
59
  end
60
60
 
61
61
  # Returns an array of languages as strings.
62
62
  def languages
63
- document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
63
+ document.search("h5[text()='Language:'] ~ div a[@href*='/language/']").map { |link| link.content.strip } rescue []
64
64
  end
65
65
 
66
66
  # Returns an array of countries as strings.
67
67
  def countries
68
- document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
68
+ document.search("h5[text()='Country:'] ~ div a[@href*='/country/']").map { |link| link.content.strip } rescue []
69
69
  end
70
70
 
71
71
  # Returns the duration of the movie in minutes as an integer.
72
72
  def length
73
- document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
73
+ document.at("h5[text()='Runtime:'] ~ div").content[/\d+ min/].to_i rescue nil
74
74
  end
75
75
 
76
76
  # Returns the company
77
77
  def company
78
- document.search("h5[text()='Company:'] ~ a[@href*=/company/']").map { |link| link.innerHTML.strip.imdb_unescape_html }.first rescue nil
78
+ document.search("h5[text()='Company:'] ~ div a[@href*='/company/']").map { |link| link.content.strip }.first rescue nil
79
79
  end
80
80
 
81
81
  # Returns a string containing the plot.
82
82
  def plot
83
- sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
83
+ sanitize_plot(document.at("h5[text()='Plot:'] ~ div").content) rescue nil
84
84
  end
85
85
 
86
86
  # Returns a string containing the plot summary
87
87
  def plot_synopsis
88
- doc = Hpricot(Imdb::Movie.find_by_id(@id, :synopsis))
89
- doc.search("div[@id='swiki.2.1']").innerHTML.strip.imdb_unescape_html.imdb_strip_tags rescue nil
88
+ doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :synopsis))
89
+ doc.at("div[@id='swiki.2.1']").content.strip rescue nil
90
90
  end
91
91
 
92
92
  def plot_summary
93
- doc = Hpricot(Imdb::Movie.find_by_id(@id, :plotsummary))
94
- doc.search("p[@class='plotpar']").first.innerHTML.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
93
+ doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :plotsummary))
94
+ doc.at("p.plotSummary").inner_html.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
95
95
  end
96
96
 
97
97
  # Returns a string containing the URL to the movie poster.
@@ -107,22 +107,22 @@ module Imdb
107
107
 
108
108
  # Returns a float containing the average user rating
109
109
  def rating
110
- document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
110
+ document.at(".starbar-meta b").content.split('/').first.strip.to_f rescue nil
111
111
  end
112
112
 
113
113
  # Returns an int containing the number of user ratings
114
114
  def votes
115
- document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
115
+ document.at("#tn15rating .tn15more").content.strip.gsub(/[^\d+]/, "").to_i rescue nil
116
116
  end
117
117
 
118
118
  # Returns a string containing the tagline
119
119
  def tagline
120
- document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
120
+ document.search("h5[text()='Tagline:'] ~ div").first.inner_html.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
121
121
  end
122
122
 
123
123
  # Returns a string containing the mpaa rating and reason for rating
124
124
  def mpaa_rating
125
- document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
125
+ document.at("//a[starts-with(.,'MPAA')]/../following-sibling::*").content.strip rescue nil
126
126
  end
127
127
 
128
128
  # Returns a string containing the title
@@ -130,25 +130,34 @@ module Imdb
130
130
  if @title && !force_refresh
131
131
  @title
132
132
  else
133
- @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
133
+ @title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html rescue nil
134
134
  end
135
135
  end
136
136
 
137
137
  # Returns an integer containing the year (CCYY) the movie was released in.
138
138
  def year
139
- document.search('a[@href^="/year/"]').innerHTML.to_i
139
+ document.at("a[@href^='/year/']").content.to_i rescue nil
140
140
  end
141
141
 
142
142
  # Returns release date for the movie.
143
143
  def release_date
144
- sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
144
+ sanitize_release_date(document.at("h5[text()*='Release Date'] ~ div").content) rescue nil
145
+ end
146
+
147
+ # Returns filming locations from imdb_url/locations
148
+ def filming_locations
149
+ locations_document.search("#filming_locations_content .soda dt a").map { |link| link.content.strip } rescue []
145
150
  end
146
151
 
147
152
  private
148
153
 
149
- # Returns a new Hpricot document for parsing.
154
+ # Returns a new Nokogiri document for parsing.
150
155
  def document
151
- @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
156
+ @document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id))
157
+ end
158
+
159
+ def locations_document
160
+ @locations_document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id, "locations"))
152
161
  end
153
162
 
154
163
  # Use HTTParty to fetch the raw HTML for this movie.
@@ -166,26 +175,17 @@ module Imdb
166
175
  end
167
176
 
168
177
  def sanitize_plot(the_plot)
169
- the_plot = the_plot.imdb_strip_tags
170
-
171
178
  the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
172
179
  the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
173
- the_plot = the_plot.gsub(/&nbsp;|&raquo;/i, "")
174
- the_plot = the_plot.gsub(/see|more/i, "")
180
+ the_plot = the_plot.gsub(/see|more|\u00BB|\u00A0/i, "")
175
181
  the_plot = the_plot.gsub(/\|/i, "")
176
-
177
- the_plot = the_plot.strip.imdb_unescape_html
182
+ the_plot.strip
178
183
  end
179
184
 
180
185
  def sanitize_release_date(the_release_date)
181
- the_release_date = the_release_date.gsub(/<a.*a>/,"")
182
- the_release_date = the_release_date.gsub(/&nbsp;|&raquo;/i, "")
183
- the_release_date = the_release_date.gsub(/see|more/i, "")
184
-
185
- the_release_date = the_release_date.strip.imdb_unescape_html
186
+ the_release_date.gsub(/see|more|\u00BB|\u00A0/i, "").strip
186
187
  end
187
188
 
188
189
  end # Movie
189
190
 
190
191
  end # Imdb
191
-
@@ -11,13 +11,13 @@ module Imdb
11
11
 
12
12
  # Return the original air date for this episode
13
13
  def air_date
14
- document.search('h5[text()*=Original Air Date]').first.next_sibling.innerHTML.to_s.strip.split("\n").first.strip rescue nil
14
+ document.at("h5[text()*='Original Air Date'] ~ div").content.strip.split("\n").first.strip rescue nil
15
15
  end
16
16
 
17
17
  private
18
18
 
19
19
  def document
20
- @document ||= Hpricot(open(@url))
20
+ @document ||= Nokogiri::HTML(open(@url))
21
21
  end
22
22
  end
23
23
  end
@@ -7,19 +7,15 @@ module Imdb
7
7
 
8
8
  private
9
9
  def parse_movies
10
- document.search('a[@href^="/title/tt"]').reject do |element|
11
- element.innerHTML.imdb_strip_tags.empty? ||
12
- element.parent.innerHTML =~ /media from/i
10
+ document.search("a[@href^='/title/tt']").reject do |element|
11
+ element.inner_html.imdb_strip_tags.empty? ||
12
+ element.inner_html.imdb_strip_tags == "X" ||
13
+ element.parent.inner_html =~ /media from/i
13
14
  end.map do |element|
14
15
  id = element['href'][/\d+/]
15
16
 
16
- data = element.parent.innerHTML.split("<br />")
17
- if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
18
- title = data[1]
19
- else
20
- title = data[0]
21
- end
22
-
17
+ data = element.parent.inner_html.split("<br />")
18
+ title = (!data[0].nil? && !data[1].nil? && data[0] =~ /img/) ? data[1] : data[0]
23
19
  title = title.imdb_strip_tags.imdb_unescape_html
24
20
  title.gsub!(/\s+\(\d\d\d\d\)$/, '')
25
21
 
@@ -23,7 +23,7 @@ module Imdb
23
23
 
24
24
  private
25
25
  def document
26
- @document ||= Hpricot(Imdb::Search.query(@query))
26
+ @document ||= Nokogiri::HTML(Imdb::Search.query(@query))
27
27
  end
28
28
 
29
29
  def self.query(query)
@@ -31,15 +31,15 @@ module Imdb
31
31
  end
32
32
 
33
33
  def parse_movie
34
- id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
35
- title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
34
+ id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
35
+ title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html
36
36
 
37
37
  [Imdb::Movie.new(id, title)]
38
38
  end
39
39
 
40
40
  # Returns true if the search yielded only one result, an exact match
41
41
  def exact_match?
42
- !document.at("//table[@id='title-overview-widget-layout']").nil?
42
+ !document.at("table[@id='title-overview-widget-layout']").nil?
43
43
  end
44
44
 
45
45
  end # Search
@@ -15,12 +15,12 @@ module Imdb
15
15
  def episodes
16
16
  @episodes = []
17
17
 
18
- document.search("div.eplist a[@itemprop*=name]").each_with_index do |link, index|
18
+ document.search("div.eplist a[@itemprop*='name']").each_with_index do |link, index|
19
19
  @episodes << Imdb::Episode.new(
20
20
  link[:href].scan(/\d+/).first,
21
21
  @season_number,
22
22
  index + 1,
23
- link.innerHTML.strip.imdb_unescape_html
23
+ link.content.strip
24
24
  )
25
25
  end
26
26
 
@@ -30,7 +30,7 @@ module Imdb
30
30
  private
31
31
 
32
32
  def document
33
- @document ||= Hpricot(open(@url))
33
+ @document ||= Nokogiri::HTML(open(@url))
34
34
  end
35
35
  end
36
36
  end