imdb_parser 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/.gitignore +8 -0
  2. data/Gemfile +3 -0
  3. data/History.txt +74 -0
  4. data/Manifest.txt +29 -0
  5. data/README.rdoc +108 -0
  6. data/Rakefile +35 -0
  7. data/bin/imdb +10 -0
  8. data/config/website.yml +2 -0
  9. data/imdb.gemspec +29 -0
  10. data/lib/imdb_parser/cli.rb +109 -0
  11. data/lib/imdb_parser/episode.rb +26 -0
  12. data/lib/imdb_parser/imdb_base.rb +146 -0
  13. data/lib/imdb_parser/movie.rb +13 -0
  14. data/lib/imdb_parser/movie_list.rb +41 -0
  15. data/lib/imdb_parser/search.rb +46 -0
  16. data/lib/imdb_parser/season.rb +45 -0
  17. data/lib/imdb_parser/serie.rb +24 -0
  18. data/lib/imdb_parser/string_extensions.rb +28 -0
  19. data/lib/imdb_parser/top_250.rb +10 -0
  20. data/lib/imdb_parser/version.rb +3 -0
  21. data/lib/imdb_parser.rb +17 -0
  22. data/script/console +11 -0
  23. data/script/destroy +14 -0
  24. data/script/generate +14 -0
  25. data/spec/fixtures/search_kannethirey_thondrinal +14 -0
  26. data/spec/fixtures/search_killed_wife +14 -0
  27. data/spec/fixtures/search_star_trek +834 -0
  28. data/spec/fixtures/top_250 +1433 -0
  29. data/spec/fixtures/tt0036855 +1255 -0
  30. data/spec/fixtures/tt0083987 +1261 -0
  31. data/spec/fixtures/tt0095016 +1286 -0
  32. data/spec/fixtures/tt0110912 +1262 -0
  33. data/spec/fixtures/tt0111161 +1272 -0
  34. data/spec/fixtures/tt0117731 +1246 -0
  35. data/spec/fixtures/tt0166222 +1806 -0
  36. data/spec/fixtures/tt0242653 +1254 -0
  37. data/spec/fixtures/tt0330508 +1581 -0
  38. data/spec/fixtures/tt0468569 +1305 -0
  39. data/spec/fixtures/tt1401252 +1109 -0
  40. data/spec/imdb/cli_spec.rb +49 -0
  41. data/spec/imdb/movie_spec.rb +204 -0
  42. data/spec/imdb/search_spec.rb +78 -0
  43. data/spec/imdb/top_250_spec.rb +21 -0
  44. data/spec/spec.opts +1 -0
  45. data/spec/spec_helper.rb +57 -0
  46. data/tasks/fixtures.rake +15 -0
  47. data/tasks/rspec.rake +21 -0
  48. metadata +183 -0
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ pkg/*
2
+ doc/*
3
+ rdoc/*
4
+ *.gem
5
+ .bundle
6
+ Gemfile.lock
7
+ .rvmrc
8
+ .gh_pages
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/History.txt ADDED
@@ -0,0 +1,74 @@
1
+ == 0.6.6 2011-09-14
2
+
3
+ -> No history was kept, so here's a short changelog since 2010-02-14 based on git:
4
+
5
+ ariejan - 2011-09-14 21:37:06 +0200 - Tested against latest IMDB site
6
+ ariejan - 2011-09-14 21:30:51 +0200 - Ignore .rvmrc
7
+ ariejan - 2011-09-14 12:27:16 -0700 - Merge pull request #21 from defeed/master
8
+ Arjom - 2011-09-14 18:48:39 +0300 - Added method to get countries
9
+ ariejan - 2011-09-05 22:49:27 -0700 - Merge pull request #20 from mguterl/use_bundler
10
+ mguterl - 2011-09-05 09:31:23 -0400 - replace jeweler with bundler
11
+ ariejan - 2011-06-16 03:32:45 -0700 - Merge pull request #18 from rbu/master
12
+ rbu - 2011-06-15 22:25:11 +0200 - increase version to 0.6.5.1 and update gemspec
13
+ rbu - 2011-06-15 22:16:01 +0200 - Add method to get the number of votes
14
+ rbu - 2011-06-15 22:06:07 +0200 - automatic fixtures update, and add a note about a flaky test
15
+ rbu - 2011-06-15 22:02:18 +0200 - Fix test, 'Die Hard' had some changes in imdb
16
+ rbu - 2011-06-15 22:01:24 +0200 - Fix test, pick another movie for 'without poster' case
17
+ rbu - 2011-06-15 21:49:25 +0200 - Fix test, Matrix Revolutions is not an exact match anymore
18
+ rbu - 2011-06-15 21:16:43 +0200 - Use akas. subdomain to avoid localized titles
19
+ tolosa - 2010-12-04 04:26:19 -0300 - Updated fixtures and sources
20
+ tolosa - 2010-12-04 04:25:50 -0300 - Fixed search result for exact match
21
+ tolosa - 2010-10-24 21:16:05 -0300 - Modified movie class to load data from new URL, in order to bypass the recent design changes in the IMDB website
22
+ tolosa - 2010-10-24 20:53:43 -0300 - Changed movie URLs to load in fixture data
23
+ tolosa - 2010-10-24 20:49:52 -0300 - Updated fixture data from new movie URLs
24
+ ghedamat - 2010-10-08 06:16:58 -0700 - changed h5 to h4 due to Imdb site layout change
25
+ ariejan - 2010-04-29 23:40:20 +0200 - Regenerated gemspec for version 0.6.5
26
+ ariejan - 2010-04-29 23:40:11 +0200 - Version bump to 0.6.5
27
+ rick - 2010-04-30 05:35:08 +0800 - Adding a means of returning cast member IMDB id's for further lookups.
28
+ kenpratt - 2010-04-30 05:34:55 +0800 - Improved poster image parsing (increased success rate on top 250 from ~81% to 100%).
29
+ hornairs - 2010-04-30 05:34:42 +0800 - Fixed parsing of plot and release date after IMDB added little arrows, all tests passing.
30
+ sandeep kumar - 2010-02-15 15:47:35 +0800 - adding method for release_date for imdb movie and testcase for the same
31
+
32
+ == 0.5.0 patch-1 2010-02-14
33
+
34
+ * Added methods for fetching release date [if available] from IMDB
35
+ * Added Testcase for the same as well.
36
+
37
+ == 0.5.0 2009-06-17
38
+
39
+ * Added Top 250 listing [mguterl]
40
+ * Made general improvements to data retrieval [mguterl]
41
+
42
+ == 0.4.2 2009-06-14
43
+
44
+ * Updated manifest to include all spec fixtures. [ariejan]
45
+
46
+ == 0.4.1 2009-06-14
47
+
48
+ * Added support for FakeWeb so specs run faster. [mguterl]
49
+ * Cache the search query i Imdb::Search.query. [mguterl]
50
+ * Added a convenience method Imdb::Search.search. [mguterl]
51
+
52
+ == 0.4.0 2009-06-14
53
+
54
+ * Updates to the console 'imdb' utility [ariejan]
55
+ * Show the IMDB ID
56
+ * Show the full IMDB URL
57
+
58
+ == 0.3.0 2009-06-07
59
+
60
+ * Fixed typo in CLI field name 'Cast by' [ariejan]
61
+ * Fixed retrieval of multiple directors. (#1) [ariejan]
62
+
63
+ == 0.2.0 2009-06-04
64
+
65
+ * Added console tool 'imdb' for searching and getting movie info. [ariejan]
66
+ * Fixed issue #2 [ariejan]
67
+
68
+ == 0.1.0 2009-06-03
69
+
70
+ * Added Imdb::Search that allows search IMDB for a specific movie. [ariejan]
71
+
72
+ == 0.0.1 2009-06-03
73
+
74
+ * First release of the IMDB gem. [ariejan]
data/Manifest.txt ADDED
@@ -0,0 +1,29 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ bin/imdb
6
+ lib/imdb.rb
7
+ lib/imdb/cli.rb
8
+ lib/imdb/movie.rb
9
+ lib/imdb/movie_list.rb
10
+ lib/imdb/search.rb
11
+ lib/imdb/string_extensions.rb
12
+ lib/imdb/top_250.rb
13
+ script/console
14
+ script/destroy
15
+ script/generate
16
+ spec/fixtures/search_matrix_revolutions
17
+ spec/fixtures/search_star_trek
18
+ spec/fixtures/top_250
19
+ spec/fixtures/tt0095016
20
+ spec/fixtures/tt0111161
21
+ spec/fixtures/tt0117731
22
+ spec/fixtures/tt0242653
23
+ spec/imdb/cli_spec.rb
24
+ spec/imdb/movie_spec.rb
25
+ spec/imdb/search_spec.rb
26
+ spec/imdb/top_250_spec.rb
27
+ spec/spec.opts
28
+ spec/spec_helper.rb
29
+ tasks/rspec.rake
data/README.rdoc ADDED
@@ -0,0 +1,108 @@
1
+ = imdb
2
+
3
+ Allows you to search and inspect movies and series from IMDB.com.
4
+
5
+ == DESCRIPTION:
6
+
7
+ This packages allows you to easy access publicly available data from IMDB.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ IMDB currently features the following:
12
+
13
+ * Querying details movie info
14
+ * Searching for movies
15
+ * Command-line utility included.
16
+ * Quering details serie info (season, episode)
17
+
18
+ == SYNOPSIS:
19
+
20
+ Movies:
21
+
22
+ i = Imdb::Movie.new("0095016")
23
+
24
+ i.title
25
+ #=> "Die Hard"
26
+ i.cast_members.first
27
+ #=> "Bruce Willis"
28
+
29
+ Serie:
30
+
31
+ s = Imdb::Serie.new("0773262")
32
+ season = s.seasons.first
33
+ e = season.episodes.last
34
+
35
+ e.title
36
+ #=> "Born Free"
37
+ e.synopsis
38
+ #=> "Dexter races against the clock..."
39
+
40
+ Searching:
41
+
42
+ i = Imdb::Search.new("Star Trek")
43
+
44
+ i.movies.size
45
+ #=> 97
46
+
47
+ Using the command line utility is quite easy:
48
+
49
+ $ imdb Star Trek
50
+
51
+ or to get movie info
52
+
53
+ $ imdb 0095016
54
+
55
+ == REQUIREMENTS:
56
+
57
+ All required gems are installed automagically through RubyGems.
58
+
59
+ * Hpricot 0.8.1
60
+
61
+ == INSTALL:
62
+
63
+ $ sudo gem install imdb -s http://gemcutter.org
64
+
65
+ == DOCUMENTATION:
66
+
67
+
68
+ == TESTING:
69
+
70
+ You'll need rspec and fakeweb installed to run the specs. Y
71
+
72
+ $ bundle install
73
+ $ rake spec
74
+
75
+ Although not recommended, you may run the specs against the live imdb.com
76
+ website. This will make a lot of calls to imdb.com, use it wisely.
77
+
78
+ $ LIVE_TEST=true rake spec
79
+
80
+ To update the packaged fixtures files with actual imdb.com samples, use the
81
+ fixtures:refresh rake task
82
+
83
+ $ rake fixtures:refresh
84
+
85
+ == LICENSE:
86
+
87
+ (The MIT License)
88
+
89
+ Copyright (c) 2009 Ariejan de Vroom
90
+
91
+ Permission is hereby granted, free of charge, to any person obtaining
92
+ a copy of this software and associated documentation files (the
93
+ 'Software'), to deal in the Software without restriction, including
94
+ without limitation the rights to use, copy, modify, merge, publish,
95
+ distribute, sublicense, and/or sell copies of the Software, and to
96
+ permit persons to whom the Software is furnished to do so, subject to
97
+ the following conditions:
98
+
99
+ The above copyright notice and this permission notice shall be
100
+ included in all copies or substantial portions of the Software.
101
+
102
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
103
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
104
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
105
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
106
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
107
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
108
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ load File.expand_path(File.dirname(__FILE__) + "/tasks/fixtures.rake")
5
+
6
+ require 'spec/rake/spectask'
7
+ Spec::Rake::SpecTask.new(:spec) do |spec|
8
+ spec.libs << 'lib' << 'spec'
9
+ spec.spec_files = FileList['spec/**/*_spec.rb']
10
+ end
11
+
12
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
13
+ spec.libs << 'lib' << 'spec'
14
+ spec.pattern = 'spec/**/*_spec.rb'
15
+ spec.rcov = true
16
+ end
17
+
18
+ task :default => :spec
19
+
20
+ require 'imdb/version'
21
+ require 'hanna/rdoctask'
22
+ Rake::RDocTask.new(:rdoc) do |rdoc|
23
+ rdoc.rdoc_dir = 'rdoc'
24
+ rdoc.title = "imdb #{Imdb::VERSION} documentation"
25
+ rdoc.rdoc_files.include('README*')
26
+ rdoc.rdoc_files.include('lib/**/*.rb')
27
+ rdoc.options << '--webcvs=http://github.com/ariejan/imdb/tree/master/'
28
+ end
29
+
30
+ require 'gokdok'
31
+ Gokdok::Dokker.new do |gd|
32
+ gd.repo_url = "git@github.com:ariejan/imdb.git"
33
+ gd.doc_home = "rdoc"
34
+ gd.remote_path = "."
35
+ end
data/bin/imdb ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Created on 2009-6-4.
4
+ # Copyright (c) 2009. All rights reserved.
5
+
6
+ require File.expand_path(File.dirname(__FILE__) + "/../lib/imdb")
7
+
8
+ require "imdb/cli"
9
+
10
+ Imdb::CLI.execute(STDOUT, ARGV)
@@ -0,0 +1,2 @@
1
+ host: ariejan@rubyforge.org
2
+ remote_dir: /var/www/gforge-projects/imdb/
data/imdb.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "imdb/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "imdb_parser"
7
+ s.version = Imdb::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Matthieu Lamarque"]
10
+ s.email = ["lamarque.matthieu@gmail.com"]
11
+ s.homepage = "http://github.com/mlamarque/imdb"
12
+ s.summary = %q{Access to Movie, Serie on Imdb.com}
13
+ s.description = %q{Easily use Ruby or the command line to find Movie, Serie information on IMDB.com.}
14
+
15
+ s.rubyforge_project = "imdb"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency 'hpricot', '~> 0.8.4'
23
+
24
+ s.add_development_dependency 'rdoc'
25
+ s.add_development_dependency 'hanna'
26
+ s.add_development_dependency 'gokdok'
27
+ s.add_development_dependency 'rspec', '~> 1.3.2'
28
+ s.add_development_dependency 'fakeweb'
29
+ end
@@ -0,0 +1,109 @@
1
+ require 'optparse'
2
+
3
+ module Imdb
4
+ class CLI
5
+
6
+ # Run the imdb command
7
+ #
8
+ # Searching
9
+ #
10
+ # imdb Star Trek
11
+ #
12
+ # Get a movie, supply a 7 digit IMDB id or the IMDB URL
13
+ #
14
+ # imdb 0095016
15
+ # imdb http://akas.imdb.com/title/tt0796366/
16
+ #
17
+ def self.execute(stdout, arguments=[])
18
+
19
+ @stdout = stdout
20
+
21
+ @stdout.puts "IMDB Scraper #{Imdb::VERSION}"
22
+
23
+ options = {
24
+ }
25
+ mandatory_options = %w( )
26
+
27
+ parser = OptionParser.new do |opts|
28
+ opts.banner = <<-BANNER.gsub(/^ /,'')
29
+
30
+ Usage: #{File.basename($0)} Search Query
31
+ #{File.basename($0)} 0095016
32
+
33
+ BANNER
34
+ opts.separator ""
35
+ opts.on("-v", "--version",
36
+ "Show the current version.") { stdout.puts "IMDB #{Imdb::VERSION}"; exit }
37
+ opts.on("-h", "--help",
38
+ "Show this help message.") { stdout.puts opts; exit }
39
+ opts.parse!(arguments)
40
+
41
+ if mandatory_options && mandatory_options.find { |option| options[option.to_sym].nil? }
42
+ stdout.puts opts; exit
43
+ end
44
+ end
45
+
46
+ query = arguments.join(" ").strip
47
+ exit if query.blank?
48
+
49
+ movie, search = nil, nil
50
+
51
+ # If ID, fetch movie
52
+ if query.match(/(\d\d\d\d\d\d\d)/) || query.downcase.match(/^http:\/\/[www.]*imdb.com\/title\/tt(.+)\/$/)
53
+ fetch_movie($1)
54
+ else
55
+ search_movie(query)
56
+ end
57
+ end
58
+
59
+ def self.fetch_movie(imdb_id)
60
+ @stdout.puts
61
+ @stdout.puts " - fetching movie #{imdb_id}"
62
+
63
+ movie = Imdb::Movie.new(imdb_id)
64
+
65
+ display_movie_details(movie)
66
+ end
67
+
68
+ def self.search_movie(query)
69
+ @stdout.puts
70
+ @stdout.puts " - searching for \"#{query}\""
71
+
72
+ search = Imdb::Search.new(query)
73
+
74
+ if search.movies.size == 1
75
+ display_movie_details(search.movies.first)
76
+ else
77
+ display_search_results(search.movies)
78
+ end
79
+ end
80
+
81
+ def self.display_movie_details(movie)
82
+ title = "#{movie.title} (#{movie.year})"
83
+ id = "ID #{movie.id}"
84
+
85
+ @stdout.puts
86
+ @stdout.puts "#{title}#{" " * (75 - 1 - title.length - id.length)}#{id} "
87
+ @stdout.puts "=" * 75
88
+ @stdout.puts "Rating: #{movie.rating}"
89
+ @stdout.puts "Duration: #{movie.length} minutes"
90
+ @stdout.puts "Directed by: #{movie.director.join(", ")}"
91
+ @stdout.puts "Cast: #{movie.cast_members[0..4].join(", ")}"
92
+ @stdout.puts "Genre: #{movie.genres.join(", ")}"
93
+ @stdout.puts "Plot: #{movie.plot}"
94
+ @stdout.puts "Poster URL: #{movie.poster}"
95
+ @stdout.puts "IMDB URL: #{movie.url}"
96
+ @stdout.puts "=" * 75
97
+ @stdout.puts
98
+ end
99
+
100
+ def self.display_search_results(movies = [])
101
+ movies = movies[0..9] # limit to ten top hits
102
+
103
+ movies.each do |movie|
104
+ @stdout.puts " > #{movie.id} | #{movie.title}"
105
+ end
106
+ end
107
+
108
+ end
109
+ end
@@ -0,0 +1,26 @@
1
+ module Imdb
2
+
3
+ class Episode
4
+
5
+ attr_accessor :title, :synopsis, :number, :release_date
6
+
7
+ def initialize(number, node, season)
8
+ @number = number
9
+ @node = node
10
+ @season = season
11
+ end
12
+
13
+ def title
14
+ @node.search('table tr td h3 a').innerHTML.imdb_unescape_html rescue nil
15
+ end
16
+
17
+ def synopsis
18
+ @node.search('table tr td/text()').text rescue nil
19
+ end
20
+
21
+ def release_date
22
+ @node.search('table tr td span strong').innerHTML.imdb_unescape_html rescue nil
23
+ end
24
+
25
+ end #Episode
26
+ end # Imdb
@@ -0,0 +1,146 @@
1
+ module Imdb
2
+
3
+ # Represents a ImdbBase
4
+ class ImdbBase
5
+ attr_accessor :id, :url, :title, :also_known_as
6
+
7
+ def initialize(imdb_id, title = nil, also_known_as = [])
8
+ @id = imdb_id
9
+ @url = "http://akas.imdb.com/title/tt#{imdb_id}/combined"
10
+ @title = title.gsub(/"/, "") if title
11
+ @also_known_as = also_known_as
12
+ end
13
+
14
+ # Returns an array with cast members
15
+ def cast_members
16
+ document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
17
+ end
18
+
19
+ def cast_member_ids
20
+ document.search("table.cast td.nm a").map {|l| l['href'].sub(%r{^/name/(.*)/}, '\1') }
21
+ end
22
+
23
+ # Returns the name of the director
24
+ def director
25
+ document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
26
+ end
27
+
28
+ # Returns an array of genres (as strings)
29
+ def genres
30
+ document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
31
+ end
32
+
33
+ # Returns an array of languages as strings.
34
+ def languages
35
+ document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
36
+ end
37
+
38
+ # Returns an array of countries as strings.
39
+ def countries
40
+ document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
41
+ end
42
+
43
+ # Returns the duration of the movie in minutes as an integer.
44
+ def length
45
+ document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
46
+ end
47
+
48
+ # Returns a string containing the plot.
49
+ def plot
50
+ sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
51
+ end
52
+
53
+ # Returns a string containing the URL to the movie poster.
54
+ def poster
55
+ src = document.at("a[@name='poster'] img")['src'] rescue nil
56
+ case src
57
+ when /^(http:.+@@)/
58
+ $1 + '.jpg'
59
+ when /^(http:.+?)\.[^\/]+$/
60
+ $1 + '.jpg'
61
+ end
62
+ end
63
+
64
+ # Returns a float containing the average user rating
65
+ def rating
66
+ document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
67
+ end
68
+
69
+ # Returns an int containing the number of user ratings
70
+ def votes
71
+ document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
72
+ end
73
+
74
+ # Returns a string containing the tagline
75
+ def tagline
76
+ document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
77
+ end
78
+
79
+ # Returns a string containing the mpaa rating and reason for rating
80
+ def mpaa_rating
81
+ document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
82
+ end
83
+
84
+ # Returns a string containing the title
85
+ def title(force_refresh = false)
86
+ if @title && !force_refresh
87
+ @title
88
+ else
89
+ @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
90
+ end
91
+ end
92
+
93
+ # Returns an integer containing the year (CCYY) the movie was released in.
94
+ def year
95
+ document.search('a[@href^="/year/"]').innerHTML.to_i
96
+ end
97
+
98
+ # Returns release date for the movie.
99
+ def release_date
100
+ sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
101
+ end
102
+
103
+ private
104
+
105
+ # Returns a new Hpricot document for parsing.
106
+ def document
107
+ @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
108
+ end
109
+
110
+ # Use HTTParty to fetch the raw HTML for this movie.
111
+ def self.find_by_id(imdb_id)
112
+ open("http://akas.imdb.com/title/tt#{imdb_id}/combined")
113
+ end
114
+
115
+ # Convenience method for search
116
+ def self.search(query)
117
+ Imdb::Search.new(query).movies
118
+ end
119
+
120
+ def self.top_250
121
+ Imdb::Top250.new.movies
122
+ end
123
+
124
+ def sanitize_plot(the_plot)
125
+ the_plot = the_plot.imdb_strip_tags
126
+
127
+ the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
128
+ the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
129
+ the_plot = the_plot.gsub(/&nbsp;|&raquo;/i, "")
130
+ the_plot = the_plot.gsub(/see|more/i, "")
131
+ the_plot = the_plot.gsub(/\|/i, "")
132
+
133
+ the_plot = the_plot.strip.imdb_unescape_html
134
+ end
135
+
136
+ def sanitize_release_date(the_release_date)
137
+ the_release_date = the_release_date.gsub(/<a.*a>/,"")
138
+ the_release_date = the_release_date.gsub(/&nbsp;|&raquo;/i, "")
139
+ the_release_date = the_release_date.gsub(/see|more/i, "")
140
+
141
+ the_release_date = the_release_date.strip.imdb_unescape_html
142
+ end
143
+
144
+ end # Movie
145
+
146
+ end # Imdb
@@ -0,0 +1,13 @@
1
+ module Imdb
2
+
3
+ # Represents a Movie on IMDB.com
4
+ class Movie < ImdbBase
5
+
6
+ def trailers
7
+ document.search("a[@href*='/video/screenplay/']").map { |link| "http://akas.imdb.com" + link.get_attribute("href") } rescue []
8
+ end
9
+
10
+
11
+ end # Movie
12
+
13
+ end # Imdb
@@ -0,0 +1,41 @@
1
+ module Imdb
2
+
3
+ class MovieList
4
+ def movies
5
+ @movies ||= parse_movies
6
+ end
7
+
8
+ private
9
+ def parse_movies
10
+ document.search('a[@href^="/title/tt"]').reject do |element|
11
+ element.innerHTML.imdb_strip_tags.empty? ||
12
+ element.parent.innerHTML =~ /media from/i
13
+ end.map do |element|
14
+ id = element['href'][/\d+/]
15
+
16
+ data = element.parent.innerHTML.split("<br />")
17
+ if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
18
+ title = data[1]
19
+ else
20
+ title = data[0]
21
+ end
22
+
23
+ title = title.imdb_strip_tags.imdb_unescape_html
24
+ title.gsub!(/\s+\(\d\d\d\d\)$/, '')
25
+
26
+ alternative_titles = []
27
+
28
+ if title =~ /\saka\s/
29
+ titles = title.split(/\saka\s/)
30
+ title = titles.shift.strip.imdb_unescape_html
31
+ alternative_titles = titles.map { |t| t.strip.imdb_strip_tags.imdb_unescape_html }
32
+ end
33
+
34
+ [id, title, alternative_titles]
35
+ end.uniq.map do |values|
36
+ Imdb::Movie.new(*values)
37
+ end
38
+ end
39
+ end # MovieList
40
+
41
+ end # Imdb
@@ -0,0 +1,46 @@
1
+ module Imdb
2
+
3
+ # Search IMDB for a title
4
+ class Search < MovieList
5
+ attr_reader :query
6
+
7
+ # Initialize a new IMDB search with the specified query
8
+ #
9
+ # search = Imdb::Search.new("Star Trek")
10
+ #
11
+ # Imdb::Search is lazy loading, meaning that unless you access the +movies+
12
+ # attribute, no query is made to IMDB.com.
13
+ #
14
+ def initialize(query)
15
+ @query = query
16
+ end
17
+
18
+ # Returns an array of Imdb::Movie objects for easy search result yielded.
19
+ # If the +query+ was an exact match, a single element array will be returned.
20
+ def movies
21
+ @movies ||= (exact_match? ? parse_movie : parse_movies)
22
+ end
23
+
24
+ private
25
+ def document
26
+ @document ||= Hpricot(Imdb::Search.query(@query))
27
+ end
28
+
29
+ def self.query(query)
30
+ open("http://akas.imdb.com/find?q=#{CGI::escape(query)};s=tt")
31
+ end
32
+
33
+ def parse_movie
34
+ id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
35
+ title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
36
+
37
+ [Imdb::Movie.new(id, title)]
38
+ end
39
+
40
+ # Returns true if the search yielded only one result, an exact match
41
+ def exact_match?
42
+ !document.at("//table[@id='title-overview-widget-layout']").nil?
43
+ end
44
+
45
+ end # Search
46
+ end # Imdb