RubyGems - imdb_parser - Versions diffs - 0.6.6 - Mend

imdb_parser 0.6.6

Files changed (48) hide show

data/.gitignore +8 -0
data/Gemfile +3 -0
data/History.txt +74 -0
data/Manifest.txt +29 -0
data/README.rdoc +108 -0
data/Rakefile +35 -0
data/bin/imdb +10 -0
data/config/website.yml +2 -0
data/imdb.gemspec +29 -0
data/lib/imdb_parser/cli.rb +109 -0
data/lib/imdb_parser/episode.rb +26 -0
data/lib/imdb_parser/imdb_base.rb +146 -0
data/lib/imdb_parser/movie.rb +13 -0
data/lib/imdb_parser/movie_list.rb +41 -0
data/lib/imdb_parser/search.rb +46 -0
data/lib/imdb_parser/season.rb +45 -0
data/lib/imdb_parser/serie.rb +24 -0
data/lib/imdb_parser/string_extensions.rb +28 -0
data/lib/imdb_parser/top_250.rb +10 -0
data/lib/imdb_parser/version.rb +3 -0
data/lib/imdb_parser.rb +17 -0
data/script/console +11 -0
data/script/destroy +14 -0
data/script/generate +14 -0
data/spec/fixtures/search_kannethirey_thondrinal +14 -0
data/spec/fixtures/search_killed_wife +14 -0
data/spec/fixtures/search_star_trek +834 -0
data/spec/fixtures/top_250 +1433 -0
data/spec/fixtures/tt0036855 +1255 -0
data/spec/fixtures/tt0083987 +1261 -0
data/spec/fixtures/tt0095016 +1286 -0
data/spec/fixtures/tt0110912 +1262 -0
data/spec/fixtures/tt0111161 +1272 -0
data/spec/fixtures/tt0117731 +1246 -0
data/spec/fixtures/tt0166222 +1806 -0
data/spec/fixtures/tt0242653 +1254 -0
data/spec/fixtures/tt0330508 +1581 -0
data/spec/fixtures/tt0468569 +1305 -0
data/spec/fixtures/tt1401252 +1109 -0
data/spec/imdb/cli_spec.rb +49 -0
data/spec/imdb/movie_spec.rb +204 -0
data/spec/imdb/search_spec.rb +78 -0
data/spec/imdb/top_250_spec.rb +21 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +57 -0
data/tasks/fixtures.rake +15 -0
data/tasks/rspec.rake +21 -0
metadata +183 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,8 @@
+pkg/*
+doc/*
+rdoc/*
+*.gem
+.bundle
+Gemfile.lock
+.rvmrc
+.gh_pages

data/Gemfile ADDED Viewed

@@ -0,0 +1,3 @@
+source :rubygems
+gemspec

data/History.txt ADDED Viewed

@@ -0,0 +1,74 @@
+== 0.6.6 2011-09-14
+-> No history was kept, so here's a short changelog since 2010-02-14 based on git:
+ariejan - 2011-09-14 21:37:06 +0200 - Tested against latest IMDB site
+ariejan - 2011-09-14 21:30:51 +0200 - Ignore .rvmrc
+ariejan - 2011-09-14 12:27:16 -0700 - Merge pull request #21 from defeed/master
+Arjom - 2011-09-14 18:48:39 +0300 - Added method to get countries
+ariejan - 2011-09-05 22:49:27 -0700 - Merge pull request #20 from mguterl/use_bundler
+mguterl - 2011-09-05 09:31:23 -0400 - replace jeweler with bundler
+ariejan - 2011-06-16 03:32:45 -0700 - Merge pull request #18 from rbu/master
+rbu - 2011-06-15 22:25:11 +0200 - increase version to 0.6.5.1 and update gemspec
+rbu - 2011-06-15 22:16:01 +0200 - Add method to get the number of votes
+rbu - 2011-06-15 22:06:07 +0200 - automatic fixtures update, and add a note about a flaky test
+rbu - 2011-06-15 22:02:18 +0200 - Fix test, 'Die Hard' had some changes in imdb
+rbu - 2011-06-15 22:01:24 +0200 - Fix test, pick another movie for 'without poster' case
+rbu - 2011-06-15 21:49:25 +0200 - Fix test, Matrix Revolutions is not an exact match anymore
+rbu - 2011-06-15 21:16:43 +0200 - Use akas. subdomain to avoid localized titles
+tolosa - 2010-12-04 04:26:19 -0300 - Updated fixtures and sources
+tolosa - 2010-12-04 04:25:50 -0300 - Fixed search result for exact match
+tolosa - 2010-10-24 21:16:05 -0300 - Modified movie class to load data from new URL, in order to bypass the recent design changes in the IMDB website
+tolosa - 2010-10-24 20:53:43 -0300 - Changed movie URLs to load in fixture data
+tolosa - 2010-10-24 20:49:52 -0300 - Updated fixture data from new movie URLs
+ghedamat - 2010-10-08 06:16:58 -0700 - changed h5 to h4 due to Imdb site layout change
+ariejan - 2010-04-29 23:40:20 +0200 - Regenerated gemspec for version 0.6.5
+ariejan - 2010-04-29 23:40:11 +0200 - Version bump to 0.6.5
+rick - 2010-04-30 05:35:08 +0800 - Adding a means of returning cast member IMDB id's for further lookups.
+kenpratt - 2010-04-30 05:34:55 +0800 - Improved poster image parsing (increased success rate on top 250 from ~81% to 100%).
+hornairs - 2010-04-30 05:34:42 +0800 - Fixed parsing of plot and release date after IMDB added little arrows, all tests passing.
+sandeep kumar - 2010-02-15 15:47:35 +0800 - adding method for release_date for imdb movie and testcase for the same
+== 0.5.0 patch-1 2010-02-14
+* Added methods for fetching release date [if available] from IMDB
+* Added Testcase for the same as well.
+== 0.5.0 2009-06-17
+* Added Top 250 listing [mguterl]
+* Made general improvements to data retrieval [mguterl]
+== 0.4.2 2009-06-14
+* Updated manifest to include all spec fixtures. [ariejan]
+== 0.4.1 2009-06-14
+* Added support for FakeWeb so specs run faster. [mguterl]
+* Cache the search query i Imdb::Search.query. [mguterl]
+* Added a convenience method Imdb::Search.search. [mguterl]
+== 0.4.0 2009-06-14
+* Updates to the console 'imdb' utility [ariejan]
+  * Show the IMDB ID
+  * Show the full IMDB URL
+== 0.3.0 2009-06-07
+* Fixed typo in CLI field name 'Cast by' [ariejan]
+* Fixed retrieval of multiple directors. (#1) [ariejan]
+== 0.2.0 2009-06-04
+* Added console tool 'imdb' for searching and getting movie info. [ariejan]
+* Fixed issue #2 [ariejan]
+== 0.1.0 2009-06-03
+* Added Imdb::Search that allows search IMDB for a specific movie. [ariejan]
+== 0.0.1 2009-06-03
+* First release of the IMDB gem. [ariejan]

data/Manifest.txt ADDED Viewed

@@ -0,0 +1,29 @@
+History.txt
+Manifest.txt
+README.rdoc
+Rakefile
+bin/imdb
+lib/imdb.rb
+lib/imdb/cli.rb
+lib/imdb/movie.rb
+lib/imdb/movie_list.rb
+lib/imdb/search.rb
+lib/imdb/string_extensions.rb
+lib/imdb/top_250.rb
+script/console
+script/destroy
+script/generate
+spec/fixtures/search_matrix_revolutions
+spec/fixtures/search_star_trek
+spec/fixtures/top_250
+spec/fixtures/tt0095016
+spec/fixtures/tt0111161
+spec/fixtures/tt0117731
+spec/fixtures/tt0242653
+spec/imdb/cli_spec.rb
+spec/imdb/movie_spec.rb
+spec/imdb/search_spec.rb
+spec/imdb/top_250_spec.rb
+spec/spec.opts
+spec/spec_helper.rb
+tasks/rspec.rake

data/README.rdoc ADDED Viewed

@@ -0,0 +1,108 @@
+= imdb
+Allows you to search and inspect movies and series from IMDB.com.
+== DESCRIPTION:
+This packages allows you to easy access publicly available data from IMDB.
+== FEATURES/PROBLEMS:
+IMDB currently features the following:
+* Querying details movie info
+* Searching for movies
+* Command-line utility included.
+* Quering details serie info (season, episode)
+== SYNOPSIS:
+Movies:
+  i = Imdb::Movie.new("0095016")
+  i.title
+  #=> "Die Hard"
+  i.cast_members.first
+  #=> "Bruce Willis"
+Serie:
+  s = Imdb::Serie.new("0773262")
+  season = s.seasons.first
+  e = season.episodes.last
+  e.title
+  #=> "Born Free"
+  e.synopsis
+  #=> "Dexter races against the clock..."
+Searching:
+    i = Imdb::Search.new("Star Trek")
+    i.movies.size
+    #=> 97
+Using the command line utility is quite easy:
+    $ imdb Star Trek
+or to get movie info
+    $ imdb 0095016
+== REQUIREMENTS:
+All required gems are installed automagically through RubyGems.
+* Hpricot 0.8.1
+== INSTALL:
+    $ sudo gem install imdb -s http://gemcutter.org
+== DOCUMENTATION:
+== TESTING:
+You'll need rspec and fakeweb installed to run the specs. Y
+    $ bundle install
+    $ rake spec
+Although not recommended, you may run the specs against the live imdb.com
+website. This will make a lot of calls to imdb.com, use it wisely.
+    $ LIVE_TEST=true rake spec
+To update the packaged fixtures files with actual imdb.com samples, use the
+fixtures:refresh rake task
+    $ rake fixtures:refresh
+== LICENSE:
+(The MIT License)
+Copyright (c) 2009 Ariejan de Vroom
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Rakefile ADDED Viewed

@@ -0,0 +1,35 @@
+require 'bundler'
+Bundler::GemHelper.install_tasks
+load File.expand_path(File.dirname(__FILE__) + "/tasks/fixtures.rake")
+require 'spec/rake/spectask'
+Spec::Rake::SpecTask.new(:spec) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.spec_files = FileList['spec/**/*_spec.rb']
+end
+Spec::Rake::SpecTask.new(:rcov) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.pattern = 'spec/**/*_spec.rb'
+  spec.rcov = true
+end
+task :default => :spec
+require 'imdb/version'
+require 'hanna/rdoctask'
+Rake::RDocTask.new(:rdoc) do |rdoc|
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "imdb #{Imdb::VERSION} documentation"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+  rdoc.options << '--webcvs=http://github.com/ariejan/imdb/tree/master/'
+end
+require 'gokdok'
+Gokdok::Dokker.new do |gd|
+  gd.repo_url = "git@github.com:ariejan/imdb.git"
+  gd.doc_home = "rdoc"
+  gd.remote_path = "."
+end

data/bin/imdb ADDED Viewed

@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+#
+#  Created on 2009-6-4.
+#  Copyright (c) 2009. All rights reserved.
+require File.expand_path(File.dirname(__FILE__) + "/../lib/imdb")
+require "imdb/cli"
+Imdb::CLI.execute(STDOUT, ARGV)

data/config/website.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ host: ariejan@rubyforge.org
2	+ remote_dir: /var/www/gforge-projects/imdb/

data/imdb.gemspec ADDED Viewed

@@ -0,0 +1,29 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "imdb/version"
+Gem::Specification.new do |s|
+  s.name        = "imdb_parser"
+  s.version     = Imdb::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Matthieu Lamarque"]
+  s.email       = ["lamarque.matthieu@gmail.com"]
+  s.homepage    = "http://github.com/mlamarque/imdb"
+  s.summary     = %q{Access to Movie, Serie on Imdb.com}
+  s.description = %q{Easily use Ruby or the command line to find Movie, Serie information on IMDB.com.}
+  s.rubyforge_project = "imdb"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_dependency 'hpricot', '~> 0.8.4'
+  s.add_development_dependency 'rdoc'
+  s.add_development_dependency 'hanna'
+  s.add_development_dependency 'gokdok'
+  s.add_development_dependency 'rspec', '~> 1.3.2'
+  s.add_development_dependency 'fakeweb'
+end

data/lib/imdb_parser/cli.rb ADDED Viewed

@@ -0,0 +1,109 @@
+require 'optparse'
+module Imdb
+  class CLI
+    # Run the imdb command
+    #
+    # Searching
+    #
+    #   imdb Star Trek
+    #
+    # Get a movie, supply a 7 digit IMDB id or the IMDB URL
+    #
+    #   imdb 0095016
+    #   imdb http://akas.imdb.com/title/tt0796366/
+    #
+    def self.execute(stdout, arguments=[])
+      @stdout = stdout
+      @stdout.puts "IMDB Scraper #{Imdb::VERSION}"
+      options = {
+      }
+      mandatory_options = %w(  )
+      parser = OptionParser.new do |opts|
+        opts.banner = <<-BANNER.gsub(/^          /,'')
+Usage: #{File.basename($0)} Search Query
+       #{File.basename($0)} 0095016
+        BANNER
+        opts.separator ""
+        opts.on("-v", "--version",
+                "Show the current version.") { stdout.puts "IMDB #{Imdb::VERSION}"; exit }
+        opts.on("-h", "--help",
+                "Show this help message.") { stdout.puts opts; exit }
+        opts.parse!(arguments)
+        if mandatory_options && mandatory_options.find { |option| options[option.to_sym].nil? }
+          stdout.puts opts; exit
+        end
+      end
+      query = arguments.join(" ").strip
+      exit if query.blank?
+      movie, search = nil, nil
+      # If ID, fetch movie
+      if query.match(/(\d\d\d\d\d\d\d)/) || query.downcase.match(/^http:\/\/[www.]*imdb.com\/title\/tt(.+)\/$/)
+        fetch_movie($1)
+      else
+        search_movie(query)
+      end
+    end
+    def self.fetch_movie(imdb_id)
+      @stdout.puts
+      @stdout.puts " - fetching movie #{imdb_id}"
+      movie = Imdb::Movie.new(imdb_id)
+      display_movie_details(movie)
+    end
+    def self.search_movie(query)
+      @stdout.puts
+      @stdout.puts " - searching for \"#{query}\""
+      search = Imdb::Search.new(query)
+      if search.movies.size == 1
+        display_movie_details(search.movies.first)
+      else
+        display_search_results(search.movies)
+      end
+    end
+    def self.display_movie_details(movie)
+      title = "#{movie.title} (#{movie.year})"
+      id    = "ID #{movie.id}"
+      @stdout.puts
+      @stdout.puts "#{title}#{" " * (75 - 1 - title.length - id.length)}#{id} "
+      @stdout.puts "=" * 75
+      @stdout.puts "Rating: #{movie.rating}"
+      @stdout.puts "Duration: #{movie.length} minutes"
+      @stdout.puts "Directed by: #{movie.director.join(", ")}"
+      @stdout.puts "Cast: #{movie.cast_members[0..4].join(", ")}"
+      @stdout.puts "Genre: #{movie.genres.join(", ")}"
+      @stdout.puts "Plot: #{movie.plot}"
+      @stdout.puts "Poster URL: #{movie.poster}"
+      @stdout.puts "IMDB URL: #{movie.url}"
+      @stdout.puts "=" * 75
+      @stdout.puts
+    end
+    def self.display_search_results(movies = [])
+      movies = movies[0..9] # limit to ten top hits
+      movies.each do |movie|
+        @stdout.puts " > #{movie.id} | #{movie.title}"
+      end
+    end
+  end
+end

data/lib/imdb_parser/episode.rb ADDED Viewed

@@ -0,0 +1,26 @@
+module Imdb
+  class Episode
+    attr_accessor :title, :synopsis, :number, :release_date
+    def initialize(number, node, season)
+      @number = number
+      @node = node
+      @season = season
+    end
+    def title
+      @node.search('table tr td h3 a').innerHTML.imdb_unescape_html rescue nil
+    end
+    def synopsis
+      @node.search('table tr td/text()').text rescue nil
+    end
+    def release_date
+      @node.search('table tr td span strong').innerHTML.imdb_unescape_html rescue nil
+    end
+  end  #Episode
+end # Imdb

data/lib/imdb_parser/imdb_base.rb ADDED Viewed

@@ -0,0 +1,146 @@
+module Imdb
+  # Represents a ImdbBase
+  class ImdbBase
+    attr_accessor :id, :url, :title, :also_known_as
+    def initialize(imdb_id, title = nil, also_known_as = [])
+      @id = imdb_id
+      @url = "http://akas.imdb.com/title/tt#{imdb_id}/combined"
+      @title = title.gsub(/"/, "") if title
+      @also_known_as = also_known_as
+    end
+    # Returns an array with cast members
+    def cast_members
+      document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+    end
+    def cast_member_ids
+      document.search("table.cast td.nm a").map {|l| l['href'].sub(%r{^/name/(.*)/}, '\1') }
+    end
+    # Returns the name of the director
+    def director
+      document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+    end
+    # Returns an array of genres (as strings)
+    def genres
+      document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+    end
+    # Returns an array of languages as strings.
+    def languages
+      document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+    end
+    # Returns an array of countries as strings.
+    def countries
+      document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
+    end
+    # Returns the duration of the movie in minutes as an integer.
+    def length
+      document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
+    end
+    # Returns a string containing the plot.
+    def plot
+      sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
+    end
+    # Returns a string containing the URL to the movie poster.
+    def poster
+      src = document.at("a[@name='poster'] img")['src'] rescue nil
+      case src
+      when /^(http:.+@@)/
+        $1 + '.jpg'
+      when /^(http:.+?)\.[^\/]+$/
+        $1 + '.jpg'
+      end
+    end
+    # Returns a float containing the average user rating
+    def rating
+      document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
+    end
+    # Returns an int containing the number of user ratings
+    def votes
+      document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
+    end
+    # Returns a string containing the tagline
+    def tagline
+      document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
+    end
+    # Returns a string containing the mpaa rating and reason for rating
+    def mpaa_rating
+      document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
+    end
+    # Returns a string containing the title
+    def title(force_refresh = false)
+      if @title && !force_refresh
+        @title
+      else
+        @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
+      end
+    end
+    # Returns an integer containing the year (CCYY) the movie was released in.
+    def year
+      document.search('a[@href^="/year/"]').innerHTML.to_i
+    end
+    # Returns release date for the movie.
+    def release_date
+      sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
+    end
+    private
+    # Returns a new Hpricot document for parsing.
+    def document
+      @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
+    end
+    # Use HTTParty to fetch the raw HTML for this movie.
+    def self.find_by_id(imdb_id)
+      open("http://akas.imdb.com/title/tt#{imdb_id}/combined")
+    end
+    # Convenience method for search
+    def self.search(query)
+      Imdb::Search.new(query).movies
+    end
+    def self.top_250
+      Imdb::Top250.new.movies
+    end
+    def sanitize_plot(the_plot)
+      the_plot = the_plot.imdb_strip_tags
+      the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
+      the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
+      the_plot = the_plot.gsub(/&nbsp;|&raquo;/i, "")
+      the_plot = the_plot.gsub(/see|more/i, "")
+      the_plot = the_plot.gsub(/\|/i, "")
+      the_plot = the_plot.strip.imdb_unescape_html
+    end
+    def sanitize_release_date(the_release_date)
+      the_release_date = the_release_date.gsub(/<a.*a>/,"")
+      the_release_date = the_release_date.gsub(/&nbsp;|&raquo;/i, "")
+      the_release_date = the_release_date.gsub(/see|more/i, "")
+      the_release_date = the_release_date.strip.imdb_unescape_html
+    end
+  end # Movie
+end # Imdb

data/lib/imdb_parser/movie.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module Imdb
+  # Represents a Movie on IMDB.com
+  class Movie < ImdbBase
+    def trailers
+      document.search("a[@href*='/video/screenplay/']").map { |link| "http://akas.imdb.com" + link.get_attribute("href") } rescue []
+    end
+  end # Movie
+end # Imdb

data/lib/imdb_parser/movie_list.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module Imdb
+  class MovieList
+    def movies
+      @movies ||= parse_movies
+    end
+    private
+    def parse_movies
+      document.search('a[@href^="/title/tt"]').reject do |element|
+        element.innerHTML.imdb_strip_tags.empty? ||
+        element.parent.innerHTML =~ /media from/i
+      end.map do |element|
+        id = element['href'][/\d+/]
+        data = element.parent.innerHTML.split("<br />")
+        if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
+          title = data[1]
+        else
+          title = data[0]
+        end
+        title = title.imdb_strip_tags.imdb_unescape_html
+        title.gsub!(/\s+\(\d\d\d\d\)$/, '')
+        alternative_titles = []
+        if title =~ /\saka\s/
+          titles = title.split(/\saka\s/)
+          title = titles.shift.strip.imdb_unescape_html
+          alternative_titles = titles.map { |t| t.strip.imdb_strip_tags.imdb_unescape_html }
+        end
+        [id, title, alternative_titles]
+      end.uniq.map do |values|
+        Imdb::Movie.new(*values)
+      end
+    end
+  end # MovieList
+end # Imdb

data/lib/imdb_parser/search.rb ADDED Viewed

@@ -0,0 +1,46 @@
+module Imdb
+  # Search IMDB for a title
+  class Search < MovieList
+    attr_reader :query
+    # Initialize a new IMDB search with the specified query
+    #
+    #   search = Imdb::Search.new("Star Trek")
+    #
+    # Imdb::Search is lazy loading, meaning that unless you access the +movies+
+    # attribute, no query is made to IMDB.com.
+    #
+    def initialize(query)
+      @query = query
+    end
+    # Returns an array of Imdb::Movie objects for easy search result yielded.
+    # If the +query+ was an exact match, a single element array will be returned.
+    def movies
+      @movies ||= (exact_match? ? parse_movie : parse_movies)
+    end
+    private
+    def document
+      @document ||= Hpricot(Imdb::Search.query(@query))
+    end
+    def self.query(query)
+      open("http://akas.imdb.com/find?q=#{CGI::escape(query)};s=tt")
+    end
+    def parse_movie
+      id                 = document.at("head/link[@rel='canonical']")['href'][/\d+/]
+      title              = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
+      [Imdb::Movie.new(id, title)]
+    end
+    # Returns true if the search yielded only one result, an exact match
+    def exact_match?
+      !document.at("//table[@id='title-overview-widget-layout']").nil?
+    end
+  end # Search
+end # Imdb