RubyGems - imdb - Versions diffs - 0.0.1 → 0.1.0 - Mend

imdb 0.0.1 → 0.1.0

Files changed (9) hide show

data/History.txt +4 -0
data/Manifest.txt +2 -0
data/Rakefile +12 -1
data/lib/imdb.rb +2 -1
data/lib/imdb/movie.rb +30 -5
data/lib/imdb/search.rb +68 -0
data/lib/imdb/string_extensions.rb +4 -1
data/spec/imdb_search_spec.rb +47 -0
metadata +3 -1

data/History.txt CHANGED

@@ -1,3 +1,7 @@
+== 0.1.0 2009-06-03
+* Added Imdb::Search that allows search IMDB for a specific movie.
 == 0.0.1 2009-06-03
 * First release of the IMDB gem.

data/Manifest.txt CHANGED

@@ -4,11 +4,13 @@ README.rdoc
 Rakefile
 lib/imdb.rb
 lib/imdb/movie.rb
+lib/imdb/search.rb
 lib/imdb/string_extensions.rb
 script/console
 script/destroy
 script/generate
 spec/imdb_movie_spec.rb
+spec/imdb_search_spec.rb
 spec/spec.opts
 spec/spec_helper.rb
 tasks/rspec.rake

data/Rakefile CHANGED

@@ -17,7 +17,7 @@ $hoe = Hoe.new('imdb', Imdb::VERSION) do |p|
   p.clean_globs |= %w[**/.DS_Store tmp *.log]
   path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
-  p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
+  p.remote_rdoc_dir = 'clown'
   p.rsync_args = '-av --delete --ignore-errors'
 end
@@ -26,3 +26,14 @@ Dir['tasks/**/*.rake'].each { |t| load t }
 # TODO - want other tests/tasks run by default? Add them to the list
 # task :default => [:spec, :features]
+remove_task :publish_docs
+desc 'Publish RDoc to RubyForge.'
+task :publish_docs => [:clean, :docs] do
+  local_dir = 'doc'
+  host = website_config["host"]
+  host = host ? "#{host}:" : ""
+  remote_dir = File.join(website_config["remote_dir"], "")
+  sh %{rsync -aCv #{local_dir}/ #{host}#{remote_dir}}
+end

data/lib/imdb.rb CHANGED

@@ -6,8 +6,9 @@ require 'httparty'
 require 'hpricot'
 require 'imdb/movie'
+require 'imdb/search'
 require 'imdb/string_extensions'
 module Imdb
-  VERSION = '0.0.1'
+  VERSION = '0.1.0'
 end

data/lib/imdb/movie.rb CHANGED

@@ -1,62 +1,87 @@
 module Imdb
+  # Represents a Movie on IMDB.com
   class Movie
     include HTTParty
-    attr_accessor :id, :url
+    attr_accessor :id, :url, :title
-    # Initialize a new IMDB movie object.
-    def initialize(imdb_id)
+    # Initialize a new IMDB movie object with it's IMDB id (as a String)
+    #
+    #   movie = Imdb::Movie.new("0095016")
+    #
+    # Imdb::Movie objects are lazy loading, meaning that no HTTP request
+    # will be performed when a new object is created. Only when you use an
+    # accessor that needs the remote data, a HTTP request is made (once).
+    #
+    def initialize(imdb_id, title = nil)
       @id = imdb_id
       @url = "http://www.imdb.com/title/tt#{imdb_id}/"
+      @title = title
     end
+    # Returns an array with cast members
     def cast_members
       document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
     end
+    # Returns the name of the director
     def director
       document.at("h5[text()='Director:'] ~ a").innerHTML.strip.imdb_unescape_html rescue nil
     end
+    # Returns an array of genres (as strings)
     def genres
       document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
     end
+    # Returns the duration of the movie in minutes as an integer.
     def length
       document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/].to_i rescue nil
     end
+    # Returns a string containing the plot.
     def plot
       document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
     end
+    # Returns a string containing the URL to the movie poster.
     def poster
       document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
     end
+    # Returns a float containing the average user rating
     def rating
       document.at(".general.rating b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
     end
+    # Returns a string containing the tagline
     def tagline
       document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
     end
-    def title
-      document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
+    # Returns a string containing the title
+    def title(force_refresh = false)
+      if @title && !force_refresh
+        @title
+      else
+        @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
+      end
     end
+    # Returns an integer containing the year (CCYY) the movie was released in.
     def year
       document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
     end
     private
+    # Returns a new Hpricot document for parsing.
     def document
       @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
     end
+    # Use HTTParty to fetch the raw HTML for this movie.
     def self.find_by_id(imdb_id)
       get("http://www.imdb.com/title/tt#{imdb_id}/")
     end

data/lib/imdb/search.rb ADDED

@@ -0,0 +1,68 @@
+module Imdb
+  # Search IMDB for a title
+  class Search
+    include HTTParty
+    # Initialize a new IMDB search with the specified query
+    #
+    #   search = Imdb::Search.new("Star Trek")
+    #
+    # Imdb::Search is lazy loading, meaning that unless you access the +movies+
+    # attribute, no query is made to IMDB.com.
+    #
+    def initialize(query)
+      @query = query
+    end
+    # Returns an array of Imdb::Movie objects for easy search result yielded.
+    # If the +query+ was an exact match, a single element array will be returned.
+    def movies
+      @movies ||= (exact_match? ? parse_movie : parse_movies)
+    end
+    #private
+    def document
+      @document ||= Hpricot(Imdb::Search.query(@query))
+    end
+    def self.query(query)
+      get("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt")
+    end
+    def parse_movies
+      document.search('a[@href^="/title/tt"]').reject do |element|
+        element.innerHTML.imdb_strip_tags.empty? ||
+        element.parent.innerHTML =~ /media from/i
+      end.map do |element|
+        id = element['href'][/\d+/]
+        data = element.parent.innerHTML.split("<br />")
+        if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
+          title = data[1]
+        else
+          title = data[0]
+        end
+        title = title.imdb_strip_tags.imdb_unescape_html
+        [id, title]
+      end.uniq.map do |values|
+        Imdb::Movie.new(*values)
+      end
+    end
+    def parse_movie
+      id = document.at("a[@name='poster']")['href'][/\d+$/]
+      title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
+      [Imdb::Movie.new(id, title)]
+    end
+    # Returns true if the search yielded only one result, an exact match
+    def exact_match?
+      !document.at("//h3[text()^='Overview']/..").nil?
+    end
+  end # Search
+end # Imdb

data/lib/imdb/string_extensions.rb CHANGED

@@ -1,13 +1,16 @@
 require 'cgi'
 require 'iconv'
-module Imdb
+module Imdb #:nordoc:
   module StringExtensions
+    # Unescape HTML
     def imdb_unescape_html
       Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
     end
+    # Strip tags
     def imdb_strip_tags
       gsub(/<\/?[^>]*>/, "")
     end

data/spec/imdb_search_spec.rb ADDED

@@ -0,0 +1,47 @@
+require File.dirname(__FILE__) + '/spec_helper.rb'
+### WARNING: This spec uses live data!
+#
+# Many may object to testing against a live website, and for good reason.
+# However, the IMDB interface changes over time, and to guarantee the parser
+# works with the currently available IMDB website, tests are run against
+# IMDB.com instead.
+#
+# This test searches for "Star Trek"
+#
+describe "Imdb::Search with multiple search results" do
+  before(:each) do
+    # Search for "Star Trek"
+    @search = Imdb::Search.new("Star Trek")
+  end
+  it "should find > 10 results" do
+    @search.movies.size.should > 10
+  end
+  it "should return Imdb::Movie objects only" do
+    @search.movies.each { |movie| movie.should be_an(Imdb::Movie) }
+  end
+  it "should not return movies with no title" do
+    @search.movies.each { |movie| movie.title.should_not be_blank }
+  end
+end
+describe "Imdb::Search with an exact match" do
+  before(:each) do
+    # Search for "Star Trek"
+    @search = Imdb::Search.new("Matrix Revolutions")
+  end
+  it "should find one result" do
+    @search.movies.size.should eql(1)
+  end
+  it "should have the corrected title" do
+    @search.movies.first.title.should =~ /The Matrix Revolutions/i
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: imdb
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.1.0
 platform: ruby
 authors:
 - Ariejan de Vroom
@@ -70,11 +70,13 @@ files:
 - Rakefile
 - lib/imdb.rb
 - lib/imdb/movie.rb
+- lib/imdb/search.rb
 - lib/imdb/string_extensions.rb
 - script/console
 - script/destroy
 - script/generate
 - spec/imdb_movie_spec.rb
+- spec/imdb_search_spec.rb
 - spec/spec.opts
 - spec/spec_helper.rb
 - tasks/rspec.rake