RubyGems - imdb - Versions diffs - 0.7.0 → 0.8.0 - Mend

imdb 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/.travis.yml +6 -0
data/MIT-LICENSE +21 -0
data/README.md +97 -0
data/imdb.gemspec +1 -1
data/lib/imdb.rb +1 -1
data/lib/imdb/base.rb +36 -36
data/lib/imdb/episode.rb +2 -2
data/lib/imdb/movie_list.rb +6 -10
data/lib/imdb/search.rb +4 -4
data/lib/imdb/season.rb +3 -3
data/lib/imdb/serie.rb +2 -3
data/lib/imdb/top_250.rb +1 -1
data/lib/imdb/version.rb +1 -1
data/spec/fixtures/locations +1167 -0
data/spec/fixtures/plotsummary +1063 -897
data/spec/fixtures/search_kannethirey_thondrinal +344 -346
data/spec/fixtures/search_killed_wife +344 -346
data/spec/fixtures/search_star_trek +344 -346
data/spec/fixtures/synopsis +457 -446
data/spec/fixtures/thewalkingdead-s1 +740 -555
data/spec/fixtures/thewalkingdead-s1e2 +505 -480
data/spec/fixtures/top_250 +10749 -1082
data/spec/fixtures/tt0036855 +553 -519
data/spec/fixtures/tt0083987 +563 -512
data/spec/fixtures/tt0095016 +518 -536
data/spec/fixtures/tt0110912 +572 -512
data/spec/fixtures/tt0111161 +559 -508
data/spec/fixtures/tt0117731 +542 -510
data/spec/fixtures/tt0166222 +969 -872
data/spec/fixtures/tt0242653 +530 -524
data/spec/fixtures/tt0330508 +845 -717
data/spec/fixtures/tt0468569 +533 -543
data/spec/fixtures/tt1401252 +472 -449
data/spec/fixtures/tt1520211 +562 -542
data/spec/imdb/movie_spec.rb +13 -5
data/spec/imdb/search_spec.rb +4 -4
data/spec/imdb/series_spec.rb +1 -1
data/spec/imdb/top_250_spec.rb +5 -5
data/spec/spec_helper.rb +1 -0
metadata +12 -9
data/README.rdoc +0 -114

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ca782282c0207181d57c8732b2028c46e91854fa
-  data.tar.gz: 93857f4686f71950e2bbb514805e87ab0c075de4
+  metadata.gz: 2c84d21278933772df9156ec48a141b0d7b7375e
+  data.tar.gz: 1f54dd630b773de99a1703ad8fbeb379a7d5cbf3
 SHA512:
-  metadata.gz: b64b0288a5ee43eced42d0fe5ebbd6d4f9d4057d3941924f08399e8235a3467d4f950c5186e58b1f1deee95f90413b74b2dc9cfa295f9763c0f7e33ccf093618
-  data.tar.gz: 423bb452e2c11f1c74925c5ff591cc1fb11ed197242f65284166e6f2b9864106efcfcdb49648ec0064e313d3b4f697cfbd24b8fe399ed74206eee08f9054a1e9
+  metadata.gz: ca8f765c422677819391031671bbd0467c7a280511435dda5eee4426f8291b6809c014d63c9662ff89813cb04a065c6a64e62ef4eae60b878a15e9a60ecd2bcd
+  data.tar.gz: c6e895315a23977569618ee9448128ebca33290568e94909241e4b2be81ca8e74ad95145a6f1f6d0d1ef3fd4c8cf3a525e905458ea2dec0778660b5139f53903

data/.travis.yml CHANGED

@@ -6,12 +6,18 @@ notifications:
 rvm:
   - 1.9.2
   - 1.9.3
+  - 2.0.0
   - ruby-head
   - rbx-19mode
+  - 1.8.7
+  - ree
 matrix:
   allow_failures:
     - rvm: rbx-19mode
+    - rvm: ruby-head
+    - rvm: 1.8.7
+    - rvm: ree
 script:
   - "bundle exec rake spec"

data/MIT-LICENSE ADDED

@@ -0,0 +1,21 @@
+Copyright (c) 2009-2013 Ariejan de Vroom
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,97 @@
+# imdb [![Build Status](https://travis-ci.org/ariejan/imdb.png?branch=master)](https://travis-ci.org/ariejan/imdb)
+* [Sources](https://github.com/ariejan/imdb)
+* [Issues](https://github.com/ariejan/imdb/issues)
+## Description
+This gem allows you to easy access publicly available data from IMDB.
+## Features
+IMDB currently features the following:
+* Querying details movie info
+* Searching for movies
+* Command-line utility included.
+## Synopsis
+### Movies:
+    i = Imdb::Movie.new("0095016")
+    i.title
+    #=> "Die Hard"
+    i.cast_members.first
+    #=> "Bruce Willis"
+### Series:
+    serie = Imdb::Serie.new("1520211")
+    serie.title
+    #=> "\"The Walking Dead\""
+    serie.rating
+    #=> 8.8
+    serie.seasons.size
+    #=> 3
+    serie.seaon(1).episodes.size
+    #=> 6
+    series.season(1).episode(2).title
+    #=> "Guts"
+### Searching:
+    i = Imdb::Search.new("Star Trek")
+    i.movies.size
+    #=> 97
+### Using the command line utility is quite easy:
+    $ imdb Star Trek
+or get movie info
+    $ imdb 0095016
+## Installation
+    gem install imdb
+## Running Tests
+You'll need rspec and fakeweb installed to run the specs.
+    $ bundle install
+    $ bundle exec rake spec
+Although not recommended, you may run the specs against the live imdb.com
+website. This will make a lot of calls to imdb.com, use it wisely.
+    $ LIVE_TEST=true bundle exec rake spec
+To update the packaged fixtures files with actual imdb.com samples, use the
+`fixtures:refresh` rake task
+    $ bundle exec rake fixtures:refresh
+## Disclaimer
+I, or any developer who contributed to this project, accepts any kind of
+liabilty for your use of this library.
+IMDB does not permit use of its data by third parties without their consent.
+Using this library for anything other than limited personal use may result
+in an IP ban to the IMDB website.
+## License
+See [MIT-LICENSE](https://github.com/ariejan/imdb/blob/master/MIT-LICENSE)

data/imdb.gemspec CHANGED

@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = ["lib"]
-  s.add_dependency 'hpricot', '~> 0.8.6'
+  s.add_dependency 'nokogiri', '>= 1.6.0'
   s.add_development_dependency 'rake', '~> 10.0.3'
   s.add_development_dependency 'rspec', '~> 2.13.0'

data/lib/imdb.rb CHANGED

@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)) unless
 require 'open-uri'
 require 'rubygems'
-require 'hpricot'
+require 'nokogiri'
 require 'imdb/base'
 require 'imdb/movie'

data/lib/imdb/base.rb CHANGED

@@ -20,7 +20,7 @@ module Imdb
     # Returns an array with cast members
     def cast_members
-      document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+      document.search("table.cast td.nm a").map { |link| link.content.strip } rescue []
     end
     def cast_member_ids
@@ -29,7 +29,7 @@ module Imdb
     # Returns an array with cast characters
     def cast_characters
-      document.search("table.cast td.char").map { |link| link.innerText } rescue []
+      document.search("table.cast td.char").map { |link| link.content.strip } rescue []
     end
     # Returns an array with cast members and characters
@@ -40,58 +40,58 @@ module Imdb
         memb_char[i] = "#{self.cast_members[i]} #{sep} #{self.cast_characters[i]}"
         i=i+1
       }
-      return memb_char
+      memb_char
     end
     # Returns the name of the director
     def director
-      document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+      document.search("h5[text()^='Director'] ~ div a").map { |link| link.content.strip } rescue []
     end
     # Returns the url to the "Watch a trailer" page
     def trailer_url
-      'http://imdb.com' + document.at("a[@href*=/video/screenplay/]")["href"] rescue nil
+      'http://imdb.com' + document.at("a[@href*='/video/screenplay/']")["href"] rescue nil
     end
     # Returns an array of genres (as strings)
     def genres
-      document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+      document.search("h5[text()='Genre:'] ~ div a[@href*='/Sections/Genres/']").map { |link| link.content.strip } rescue []
     end
     # Returns an array of languages as strings.
     def languages
-      document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+      document.search("h5[text()='Language:'] ~ div a[@href*='/language/']").map { |link| link.content.strip } rescue []
     end
     # Returns an array of countries as strings.
     def countries
-      document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+      document.search("h5[text()='Country:'] ~ div a[@href*='/country/']").map { |link| link.content.strip } rescue []
     end
     # Returns the duration of the movie in minutes as an integer.
     def length
-      document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
+      document.at("h5[text()='Runtime:'] ~ div").content[/\d+ min/].to_i rescue nil
     end
     # Returns the company
     def company
-      document.search("h5[text()='Company:'] ~ a[@href*=/company/']").map { |link| link.innerHTML.strip.imdb_unescape_html }.first rescue nil
+      document.search("h5[text()='Company:'] ~ div a[@href*='/company/']").map { |link| link.content.strip }.first rescue nil
     end
     # Returns a string containing the plot.
     def plot
-      sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
+      sanitize_plot(document.at("h5[text()='Plot:'] ~ div").content) rescue nil
     end
     # Returns a string containing the plot summary
     def plot_synopsis
-      doc = Hpricot(Imdb::Movie.find_by_id(@id, :synopsis))
-      doc.search("div[@id='swiki.2.1']").innerHTML.strip.imdb_unescape_html.imdb_strip_tags rescue nil
+      doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :synopsis))
+      doc.at("div[@id='swiki.2.1']").content.strip rescue nil
     end
     def plot_summary
-      doc = Hpricot(Imdb::Movie.find_by_id(@id, :plotsummary))
-      doc.search("p[@class='plotpar']").first.innerHTML.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
+      doc = Nokogiri::HTML(Imdb::Movie.find_by_id(@id, :plotsummary))
+      doc.at("p.plotSummary").inner_html.gsub(/<i.*/im, '').strip.imdb_unescape_html rescue nil
     end
     # Returns a string containing the URL to the movie poster.
@@ -107,22 +107,22 @@ module Imdb
     # Returns a float containing the average user rating
     def rating
-      document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
+      document.at(".starbar-meta b").content.split('/').first.strip.to_f rescue nil
     end
     # Returns an int containing the number of user ratings
     def votes
-      document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
+      document.at("#tn15rating .tn15more").content.strip.gsub(/[^\d+]/, "").to_i rescue nil
     end
     # Returns a string containing the tagline
     def tagline
-      document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
+      document.search("h5[text()='Tagline:'] ~ div").first.inner_html.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
     end
     # Returns a string containing the mpaa rating and reason for rating
     def mpaa_rating
-      document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
+      document.at("//a[starts-with(.,'MPAA')]/../following-sibling::*").content.strip rescue nil
     end
     # Returns a string containing the title
@@ -130,25 +130,34 @@ module Imdb
       if @title && !force_refresh
         @title
       else
-        @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
+        @title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html rescue nil
       end
     end
     # Returns an integer containing the year (CCYY) the movie was released in.
     def year
-      document.search('a[@href^="/year/"]').innerHTML.to_i
+      document.at("a[@href^='/year/']").content.to_i rescue nil
     end
     # Returns release date for the movie.
     def release_date
-      sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
+      sanitize_release_date(document.at("h5[text()*='Release Date'] ~ div").content) rescue nil
+    end
+    # Returns filming locations from imdb_url/locations
+    def filming_locations
+      locations_document.search("#filming_locations_content .soda dt a").map { |link| link.content.strip } rescue []
     end
     private
-    # Returns a new Hpricot document for parsing.
+    # Returns a new Nokogiri document for parsing.
     def document
-      @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
+      @document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id))
+    end
+    def locations_document
+      @locations_document ||= Nokogiri::HTML(Imdb::Movie.find_by_id(@id, "locations"))
     end
     # Use HTTParty to fetch the raw HTML for this movie.
@@ -166,26 +175,17 @@ module Imdb
     end
     def sanitize_plot(the_plot)
-      the_plot = the_plot.imdb_strip_tags
       the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
       the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
-      the_plot = the_plot.gsub(/&nbsp;|&raquo;/i, "")
-      the_plot = the_plot.gsub(/see|more/i, "")
+      the_plot = the_plot.gsub(/see|more|\u00BB|\u00A0/i, "")
       the_plot = the_plot.gsub(/\|/i, "")
-      the_plot = the_plot.strip.imdb_unescape_html
+      the_plot.strip
     end
     def sanitize_release_date(the_release_date)
-      the_release_date = the_release_date.gsub(/<a.*a>/,"")
-      the_release_date = the_release_date.gsub(/&nbsp;|&raquo;/i, "")
-      the_release_date = the_release_date.gsub(/see|more/i, "")
-      the_release_date = the_release_date.strip.imdb_unescape_html
+      the_release_date.gsub(/see|more|\u00BB|\u00A0/i, "").strip
     end
   end # Movie
 end # Imdb

data/lib/imdb/episode.rb CHANGED

@@ -11,13 +11,13 @@ module Imdb
     # Return the original air date for this episode
     def air_date
-      document.search('h5[text()*=Original Air Date]').first.next_sibling.innerHTML.to_s.strip.split("\n").first.strip rescue nil
+      document.at("h5[text()*='Original Air Date'] ~ div").content.strip.split("\n").first.strip rescue nil
     end
     private
     def document
-      @document ||= Hpricot(open(@url))
+      @document ||= Nokogiri::HTML(open(@url))
     end
   end
 end

data/lib/imdb/movie_list.rb CHANGED

@@ -7,19 +7,15 @@ module Imdb
     private
     def parse_movies
-      document.search('a[@href^="/title/tt"]').reject do |element|
-        element.innerHTML.imdb_strip_tags.empty? ||
-        element.parent.innerHTML =~ /media from/i
+      document.search("a[@href^='/title/tt']").reject do |element|
+        element.inner_html.imdb_strip_tags.empty? ||
+        element.inner_html.imdb_strip_tags == "X" ||
+        element.parent.inner_html =~ /media from/i
       end.map do |element|
         id = element['href'][/\d+/]
-        data = element.parent.innerHTML.split("<br />")
-        if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
-          title = data[1]
-        else
-          title = data[0]
-        end
+        data = element.parent.inner_html.split("<br />")
+        title = (!data[0].nil? && !data[1].nil? && data[0] =~ /img/) ? data[1] : data[0]
         title = title.imdb_strip_tags.imdb_unescape_html
         title.gsub!(/\s+\(\d\d\d\d\)$/, '')

data/lib/imdb/search.rb CHANGED

@@ -23,7 +23,7 @@ module Imdb
     private
     def document
-      @document ||= Hpricot(Imdb::Search.query(@query))
+      @document ||= Nokogiri::HTML(Imdb::Search.query(@query))
     end
     def self.query(query)
@@ -31,15 +31,15 @@ module Imdb
     end
     def parse_movie
-      id                 = document.at("head/link[@rel='canonical']")['href'][/\d+/]
-      title              = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
+      id    = document.at("head/link[@rel='canonical']")['href'][/\d+/]
+      title = document.at("h1").inner_html.split('<span').first.strip.imdb_unescape_html
       [Imdb::Movie.new(id, title)]
     end
     # Returns true if the search yielded only one result, an exact match
     def exact_match?
-      !document.at("//table[@id='title-overview-widget-layout']").nil?
+      !document.at("table[@id='title-overview-widget-layout']").nil?
     end
   end # Search

data/lib/imdb/season.rb CHANGED

@@ -15,12 +15,12 @@ module Imdb
     def episodes
       @episodes = []
-      document.search("div.eplist a[@itemprop*=name]").each_with_index do |link, index|
+      document.search("div.eplist a[@itemprop*='name']").each_with_index do |link, index|
         @episodes << Imdb::Episode.new(
           link[:href].scan(/\d+/).first,
           @season_number,
           index + 1,
-          link.innerHTML.strip.imdb_unescape_html
+          link.content.strip
         )
       end
@@ -30,7 +30,7 @@ module Imdb
     private
     def document
-      @document ||= Hpricot(open(@url))
+      @document ||= Nokogiri::HTML(open(@url))
     end
   end
 end