imdb_parser 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/.gitignore +8 -0
  2. data/Gemfile +3 -0
  3. data/History.txt +74 -0
  4. data/Manifest.txt +29 -0
  5. data/README.rdoc +108 -0
  6. data/Rakefile +35 -0
  7. data/bin/imdb +10 -0
  8. data/config/website.yml +2 -0
  9. data/imdb.gemspec +29 -0
  10. data/lib/imdb_parser/cli.rb +109 -0
  11. data/lib/imdb_parser/episode.rb +26 -0
  12. data/lib/imdb_parser/imdb_base.rb +146 -0
  13. data/lib/imdb_parser/movie.rb +13 -0
  14. data/lib/imdb_parser/movie_list.rb +41 -0
  15. data/lib/imdb_parser/search.rb +46 -0
  16. data/lib/imdb_parser/season.rb +45 -0
  17. data/lib/imdb_parser/serie.rb +24 -0
  18. data/lib/imdb_parser/string_extensions.rb +28 -0
  19. data/lib/imdb_parser/top_250.rb +10 -0
  20. data/lib/imdb_parser/version.rb +3 -0
  21. data/lib/imdb_parser.rb +17 -0
  22. data/script/console +11 -0
  23. data/script/destroy +14 -0
  24. data/script/generate +14 -0
  25. data/spec/fixtures/search_kannethirey_thondrinal +14 -0
  26. data/spec/fixtures/search_killed_wife +14 -0
  27. data/spec/fixtures/search_star_trek +834 -0
  28. data/spec/fixtures/top_250 +1433 -0
  29. data/spec/fixtures/tt0036855 +1255 -0
  30. data/spec/fixtures/tt0083987 +1261 -0
  31. data/spec/fixtures/tt0095016 +1286 -0
  32. data/spec/fixtures/tt0110912 +1262 -0
  33. data/spec/fixtures/tt0111161 +1272 -0
  34. data/spec/fixtures/tt0117731 +1246 -0
  35. data/spec/fixtures/tt0166222 +1806 -0
  36. data/spec/fixtures/tt0242653 +1254 -0
  37. data/spec/fixtures/tt0330508 +1581 -0
  38. data/spec/fixtures/tt0468569 +1305 -0
  39. data/spec/fixtures/tt1401252 +1109 -0
  40. data/spec/imdb/cli_spec.rb +49 -0
  41. data/spec/imdb/movie_spec.rb +204 -0
  42. data/spec/imdb/search_spec.rb +78 -0
  43. data/spec/imdb/top_250_spec.rb +21 -0
  44. data/spec/spec.opts +1 -0
  45. data/spec/spec_helper.rb +57 -0
  46. data/tasks/fixtures.rake +15 -0
  47. data/tasks/rspec.rake +21 -0
  48. metadata +183 -0
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ pkg/*
2
+ doc/*
3
+ rdoc/*
4
+ *.gem
5
+ .bundle
6
+ Gemfile.lock
7
+ .rvmrc
8
+ .gh_pages
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/History.txt ADDED
@@ -0,0 +1,74 @@
1
+ == 0.6.6 2011-09-14
2
+
3
+ -> No history was kept, so here's a short changelog since 2010-02-14 based on git:
4
+
5
+ ariejan - 2011-09-14 21:37:06 +0200 - Tested against latest IMDB site
6
+ ariejan - 2011-09-14 21:30:51 +0200 - Ignore .rvmrc
7
+ ariejan - 2011-09-14 12:27:16 -0700 - Merge pull request #21 from defeed/master
8
+ Arjom - 2011-09-14 18:48:39 +0300 - Added method to get countries
9
+ ariejan - 2011-09-05 22:49:27 -0700 - Merge pull request #20 from mguterl/use_bundler
10
+ mguterl - 2011-09-05 09:31:23 -0400 - replace jeweler with bundler
11
+ ariejan - 2011-06-16 03:32:45 -0700 - Merge pull request #18 from rbu/master
12
+ rbu - 2011-06-15 22:25:11 +0200 - increase version to 0.6.5.1 and update gemspec
13
+ rbu - 2011-06-15 22:16:01 +0200 - Add method to get the number of votes
14
+ rbu - 2011-06-15 22:06:07 +0200 - automatic fixtures update, and add a note about a flaky test
15
+ rbu - 2011-06-15 22:02:18 +0200 - Fix test, 'Die Hard' had some changes in imdb
16
+ rbu - 2011-06-15 22:01:24 +0200 - Fix test, pick another movie for 'without poster' case
17
+ rbu - 2011-06-15 21:49:25 +0200 - Fix test, Matrix Revolutions is not an exact match anymore
18
+ rbu - 2011-06-15 21:16:43 +0200 - Use akas. subdomain to avoid localized titles
19
+ tolosa - 2010-12-04 04:26:19 -0300 - Updated fixtures and sources
20
+ tolosa - 2010-12-04 04:25:50 -0300 - Fixed search result for exact match
21
+ tolosa - 2010-10-24 21:16:05 -0300 - Modified movie class to load data from new URL, in order to bypass the recent design changes in the IMDB website
22
+ tolosa - 2010-10-24 20:53:43 -0300 - Changed movie URLs to load in fixture data
23
+ tolosa - 2010-10-24 20:49:52 -0300 - Updated fixture data from new movie URLs
24
+ ghedamat - 2010-10-08 06:16:58 -0700 - changed h5 to h4 due to Imdb site layout change
25
+ ariejan - 2010-04-29 23:40:20 +0200 - Regenerated gemspec for version 0.6.5
26
+ ariejan - 2010-04-29 23:40:11 +0200 - Version bump to 0.6.5
27
+ rick - 2010-04-30 05:35:08 +0800 - Adding a means of returning cast member IMDB id's for further lookups.
28
+ kenpratt - 2010-04-30 05:34:55 +0800 - Improved poster image parsing (increased success rate on top 250 from ~81% to 100%).
29
+ hornairs - 2010-04-30 05:34:42 +0800 - Fixed parsing of plot and release date after IMDB added little arrows, all tests passing.
30
+ sandeep kumar - 2010-02-15 15:47:35 +0800 - adding method for release_date for imdb movie and testcase for the same
31
+
32
+ == 0.5.0 patch-1 2010-02-14
33
+
34
+ * Added methods for fetching release date [if available] from IMDB
35
+ * Added Testcase for the same as well.
36
+
37
+ == 0.5.0 2009-06-17
38
+
39
+ * Added Top 250 listing [mguterl]
40
+ * Made general improvements to data retrieval [mguterl]
41
+
42
+ == 0.4.2 2009-06-14
43
+
44
+ * Updated manifest to include all spec fixtures. [ariejan]
45
+
46
+ == 0.4.1 2009-06-14
47
+
48
+ * Added support for FakeWeb so specs run faster. [mguterl]
49
+ * Cache the search query i Imdb::Search.query. [mguterl]
50
+ * Added a convenience method Imdb::Search.search. [mguterl]
51
+
52
+ == 0.4.0 2009-06-14
53
+
54
+ * Updates to the console 'imdb' utility [ariejan]
55
+ * Show the IMDB ID
56
+ * Show the full IMDB URL
57
+
58
+ == 0.3.0 2009-06-07
59
+
60
+ * Fixed typo in CLI field name 'Cast by' [ariejan]
61
+ * Fixed retrieval of multiple directors. (#1) [ariejan]
62
+
63
+ == 0.2.0 2009-06-04
64
+
65
+ * Added console tool 'imdb' for searching and getting movie info. [ariejan]
66
+ * Fixed issue #2 [ariejan]
67
+
68
+ == 0.1.0 2009-06-03
69
+
70
+ * Added Imdb::Search that allows search IMDB for a specific movie. [ariejan]
71
+
72
+ == 0.0.1 2009-06-03
73
+
74
+ * First release of the IMDB gem. [ariejan]
data/Manifest.txt ADDED
@@ -0,0 +1,29 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.rdoc
4
+ Rakefile
5
+ bin/imdb
6
+ lib/imdb.rb
7
+ lib/imdb/cli.rb
8
+ lib/imdb/movie.rb
9
+ lib/imdb/movie_list.rb
10
+ lib/imdb/search.rb
11
+ lib/imdb/string_extensions.rb
12
+ lib/imdb/top_250.rb
13
+ script/console
14
+ script/destroy
15
+ script/generate
16
+ spec/fixtures/search_matrix_revolutions
17
+ spec/fixtures/search_star_trek
18
+ spec/fixtures/top_250
19
+ spec/fixtures/tt0095016
20
+ spec/fixtures/tt0111161
21
+ spec/fixtures/tt0117731
22
+ spec/fixtures/tt0242653
23
+ spec/imdb/cli_spec.rb
24
+ spec/imdb/movie_spec.rb
25
+ spec/imdb/search_spec.rb
26
+ spec/imdb/top_250_spec.rb
27
+ spec/spec.opts
28
+ spec/spec_helper.rb
29
+ tasks/rspec.rake
data/README.rdoc ADDED
@@ -0,0 +1,108 @@
1
+ = imdb
2
+
3
+ Allows you to search and inspect movies and series from IMDB.com.
4
+
5
+ == DESCRIPTION:
6
+
7
+ This packages allows you to easy access publicly available data from IMDB.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ IMDB currently features the following:
12
+
13
+ * Querying details movie info
14
+ * Searching for movies
15
+ * Command-line utility included.
16
+ * Quering details serie info (season, episode)
17
+
18
+ == SYNOPSIS:
19
+
20
+ Movies:
21
+
22
+ i = Imdb::Movie.new("0095016")
23
+
24
+ i.title
25
+ #=> "Die Hard"
26
+ i.cast_members.first
27
+ #=> "Bruce Willis"
28
+
29
+ Serie:
30
+
31
+ s = Imdb::Serie.new("0773262")
32
+ season = s.seasons.first
33
+ e = season.episodes.last
34
+
35
+ e.title
36
+ #=> "Born Free"
37
+ e.synopsis
38
+ #=> "Dexter races against the clock..."
39
+
40
+ Searching:
41
+
42
+ i = Imdb::Search.new("Star Trek")
43
+
44
+ i.movies.size
45
+ #=> 97
46
+
47
+ Using the command line utility is quite easy:
48
+
49
+ $ imdb Star Trek
50
+
51
+ or to get movie info
52
+
53
+ $ imdb 0095016
54
+
55
+ == REQUIREMENTS:
56
+
57
+ All required gems are installed automagically through RubyGems.
58
+
59
+ * Hpricot 0.8.1
60
+
61
+ == INSTALL:
62
+
63
+ $ sudo gem install imdb -s http://gemcutter.org
64
+
65
+ == DOCUMENTATION:
66
+
67
+
68
+ == TESTING:
69
+
70
+ You'll need rspec and fakeweb installed to run the specs. Y
71
+
72
+ $ bundle install
73
+ $ rake spec
74
+
75
+ Although not recommended, you may run the specs against the live imdb.com
76
+ website. This will make a lot of calls to imdb.com, use it wisely.
77
+
78
+ $ LIVE_TEST=true rake spec
79
+
80
+ To update the packaged fixtures files with actual imdb.com samples, use the
81
+ fixtures:refresh rake task
82
+
83
+ $ rake fixtures:refresh
84
+
85
+ == LICENSE:
86
+
87
+ (The MIT License)
88
+
89
+ Copyright (c) 2009 Ariejan de Vroom
90
+
91
+ Permission is hereby granted, free of charge, to any person obtaining
92
+ a copy of this software and associated documentation files (the
93
+ 'Software'), to deal in the Software without restriction, including
94
+ without limitation the rights to use, copy, modify, merge, publish,
95
+ distribute, sublicense, and/or sell copies of the Software, and to
96
+ permit persons to whom the Software is furnished to do so, subject to
97
+ the following conditions:
98
+
99
+ The above copyright notice and this permission notice shall be
100
+ included in all copies or substantial portions of the Software.
101
+
102
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
103
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
104
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
105
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
106
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
107
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
108
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ load File.expand_path(File.dirname(__FILE__) + "/tasks/fixtures.rake")
5
+
6
+ require 'spec/rake/spectask'
7
+ Spec::Rake::SpecTask.new(:spec) do |spec|
8
+ spec.libs << 'lib' << 'spec'
9
+ spec.spec_files = FileList['spec/**/*_spec.rb']
10
+ end
11
+
12
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
13
+ spec.libs << 'lib' << 'spec'
14
+ spec.pattern = 'spec/**/*_spec.rb'
15
+ spec.rcov = true
16
+ end
17
+
18
+ task :default => :spec
19
+
20
+ require 'imdb/version'
21
+ require 'hanna/rdoctask'
22
+ Rake::RDocTask.new(:rdoc) do |rdoc|
23
+ rdoc.rdoc_dir = 'rdoc'
24
+ rdoc.title = "imdb #{Imdb::VERSION} documentation"
25
+ rdoc.rdoc_files.include('README*')
26
+ rdoc.rdoc_files.include('lib/**/*.rb')
27
+ rdoc.options << '--webcvs=http://github.com/ariejan/imdb/tree/master/'
28
+ end
29
+
30
+ require 'gokdok'
31
+ Gokdok::Dokker.new do |gd|
32
+ gd.repo_url = "git@github.com:ariejan/imdb.git"
33
+ gd.doc_home = "rdoc"
34
+ gd.remote_path = "."
35
+ end
data/bin/imdb ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Created on 2009-6-4.
4
+ # Copyright (c) 2009. All rights reserved.
5
+
6
+ require File.expand_path(File.dirname(__FILE__) + "/../lib/imdb")
7
+
8
+ require "imdb/cli"
9
+
10
+ Imdb::CLI.execute(STDOUT, ARGV)
@@ -0,0 +1,2 @@
1
+ host: ariejan@rubyforge.org
2
+ remote_dir: /var/www/gforge-projects/imdb/
data/imdb.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "imdb/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "imdb_parser"
7
+ s.version = Imdb::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Matthieu Lamarque"]
10
+ s.email = ["lamarque.matthieu@gmail.com"]
11
+ s.homepage = "http://github.com/mlamarque/imdb"
12
+ s.summary = %q{Access to Movie, Serie on Imdb.com}
13
+ s.description = %q{Easily use Ruby or the command line to find Movie, Serie information on IMDB.com.}
14
+
15
+ s.rubyforge_project = "imdb"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency 'hpricot', '~> 0.8.4'
23
+
24
+ s.add_development_dependency 'rdoc'
25
+ s.add_development_dependency 'hanna'
26
+ s.add_development_dependency 'gokdok'
27
+ s.add_development_dependency 'rspec', '~> 1.3.2'
28
+ s.add_development_dependency 'fakeweb'
29
+ end
@@ -0,0 +1,109 @@
1
+ require 'optparse'
2
+
3
+ module Imdb
4
+ class CLI
5
+
6
+ # Run the imdb command
7
+ #
8
+ # Searching
9
+ #
10
+ # imdb Star Trek
11
+ #
12
+ # Get a movie, supply a 7 digit IMDB id or the IMDB URL
13
+ #
14
+ # imdb 0095016
15
+ # imdb http://akas.imdb.com/title/tt0796366/
16
+ #
17
+ def self.execute(stdout, arguments=[])
18
+
19
+ @stdout = stdout
20
+
21
+ @stdout.puts "IMDB Scraper #{Imdb::VERSION}"
22
+
23
+ options = {
24
+ }
25
+ mandatory_options = %w( )
26
+
27
+ parser = OptionParser.new do |opts|
28
+ opts.banner = <<-BANNER.gsub(/^ /,'')
29
+
30
+ Usage: #{File.basename($0)} Search Query
31
+ #{File.basename($0)} 0095016
32
+
33
+ BANNER
34
+ opts.separator ""
35
+ opts.on("-v", "--version",
36
+ "Show the current version.") { stdout.puts "IMDB #{Imdb::VERSION}"; exit }
37
+ opts.on("-h", "--help",
38
+ "Show this help message.") { stdout.puts opts; exit }
39
+ opts.parse!(arguments)
40
+
41
+ if mandatory_options && mandatory_options.find { |option| options[option.to_sym].nil? }
42
+ stdout.puts opts; exit
43
+ end
44
+ end
45
+
46
+ query = arguments.join(" ").strip
47
+ exit if query.blank?
48
+
49
+ movie, search = nil, nil
50
+
51
+ # If ID, fetch movie
52
+ if query.match(/(\d\d\d\d\d\d\d)/) || query.downcase.match(/^http:\/\/[www.]*imdb.com\/title\/tt(.+)\/$/)
53
+ fetch_movie($1)
54
+ else
55
+ search_movie(query)
56
+ end
57
+ end
58
+
59
+ def self.fetch_movie(imdb_id)
60
+ @stdout.puts
61
+ @stdout.puts " - fetching movie #{imdb_id}"
62
+
63
+ movie = Imdb::Movie.new(imdb_id)
64
+
65
+ display_movie_details(movie)
66
+ end
67
+
68
+ def self.search_movie(query)
69
+ @stdout.puts
70
+ @stdout.puts " - searching for \"#{query}\""
71
+
72
+ search = Imdb::Search.new(query)
73
+
74
+ if search.movies.size == 1
75
+ display_movie_details(search.movies.first)
76
+ else
77
+ display_search_results(search.movies)
78
+ end
79
+ end
80
+
81
+ def self.display_movie_details(movie)
82
+ title = "#{movie.title} (#{movie.year})"
83
+ id = "ID #{movie.id}"
84
+
85
+ @stdout.puts
86
+ @stdout.puts "#{title}#{" " * (75 - 1 - title.length - id.length)}#{id} "
87
+ @stdout.puts "=" * 75
88
+ @stdout.puts "Rating: #{movie.rating}"
89
+ @stdout.puts "Duration: #{movie.length} minutes"
90
+ @stdout.puts "Directed by: #{movie.director.join(", ")}"
91
+ @stdout.puts "Cast: #{movie.cast_members[0..4].join(", ")}"
92
+ @stdout.puts "Genre: #{movie.genres.join(", ")}"
93
+ @stdout.puts "Plot: #{movie.plot}"
94
+ @stdout.puts "Poster URL: #{movie.poster}"
95
+ @stdout.puts "IMDB URL: #{movie.url}"
96
+ @stdout.puts "=" * 75
97
+ @stdout.puts
98
+ end
99
+
100
+ def self.display_search_results(movies = [])
101
+ movies = movies[0..9] # limit to ten top hits
102
+
103
+ movies.each do |movie|
104
+ @stdout.puts " > #{movie.id} | #{movie.title}"
105
+ end
106
+ end
107
+
108
+ end
109
+ end
@@ -0,0 +1,26 @@
1
+ module Imdb
2
+
3
+ class Episode
4
+
5
+ attr_accessor :title, :synopsis, :number, :release_date
6
+
7
+ def initialize(number, node, season)
8
+ @number = number
9
+ @node = node
10
+ @season = season
11
+ end
12
+
13
+ def title
14
+ @node.search('table tr td h3 a').innerHTML.imdb_unescape_html rescue nil
15
+ end
16
+
17
+ def synopsis
18
+ @node.search('table tr td/text()').text rescue nil
19
+ end
20
+
21
+ def release_date
22
+ @node.search('table tr td span strong').innerHTML.imdb_unescape_html rescue nil
23
+ end
24
+
25
+ end #Episode
26
+ end # Imdb
@@ -0,0 +1,146 @@
1
+ module Imdb
2
+
3
+ # Represents a ImdbBase
4
+ class ImdbBase
5
+ attr_accessor :id, :url, :title, :also_known_as
6
+
7
+ def initialize(imdb_id, title = nil, also_known_as = [])
8
+ @id = imdb_id
9
+ @url = "http://akas.imdb.com/title/tt#{imdb_id}/combined"
10
+ @title = title.gsub(/"/, "") if title
11
+ @also_known_as = also_known_as
12
+ end
13
+
14
+ # Returns an array with cast members
15
+ def cast_members
16
+ document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
17
+ end
18
+
19
+ def cast_member_ids
20
+ document.search("table.cast td.nm a").map {|l| l['href'].sub(%r{^/name/(.*)/}, '\1') }
21
+ end
22
+
23
+ # Returns the name of the director
24
+ def director
25
+ document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
26
+ end
27
+
28
+ # Returns an array of genres (as strings)
29
+ def genres
30
+ document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
31
+ end
32
+
33
+ # Returns an array of languages as strings.
34
+ def languages
35
+ document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
36
+ end
37
+
38
+ # Returns an array of countries as strings.
39
+ def countries
40
+ document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
41
+ end
42
+
43
+ # Returns the duration of the movie in minutes as an integer.
44
+ def length
45
+ document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
46
+ end
47
+
48
+ # Returns a string containing the plot.
49
+ def plot
50
+ sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
51
+ end
52
+
53
+ # Returns a string containing the URL to the movie poster.
54
+ def poster
55
+ src = document.at("a[@name='poster'] img")['src'] rescue nil
56
+ case src
57
+ when /^(http:.+@@)/
58
+ $1 + '.jpg'
59
+ when /^(http:.+?)\.[^\/]+$/
60
+ $1 + '.jpg'
61
+ end
62
+ end
63
+
64
+ # Returns a float containing the average user rating
65
+ def rating
66
+ document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
67
+ end
68
+
69
+ # Returns an int containing the number of user ratings
70
+ def votes
71
+ document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
72
+ end
73
+
74
+ # Returns a string containing the tagline
75
+ def tagline
76
+ document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
77
+ end
78
+
79
+ # Returns a string containing the mpaa rating and reason for rating
80
+ def mpaa_rating
81
+ document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
82
+ end
83
+
84
+ # Returns a string containing the title
85
+ def title(force_refresh = false)
86
+ if @title && !force_refresh
87
+ @title
88
+ else
89
+ @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
90
+ end
91
+ end
92
+
93
+ # Returns an integer containing the year (CCYY) the movie was released in.
94
+ def year
95
+ document.search('a[@href^="/year/"]').innerHTML.to_i
96
+ end
97
+
98
+ # Returns release date for the movie.
99
+ def release_date
100
+ sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
101
+ end
102
+
103
+ private
104
+
105
+ # Returns a new Hpricot document for parsing.
106
+ def document
107
+ @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
108
+ end
109
+
110
+ # Use HTTParty to fetch the raw HTML for this movie.
111
+ def self.find_by_id(imdb_id)
112
+ open("http://akas.imdb.com/title/tt#{imdb_id}/combined")
113
+ end
114
+
115
+ # Convenience method for search
116
+ def self.search(query)
117
+ Imdb::Search.new(query).movies
118
+ end
119
+
120
+ def self.top_250
121
+ Imdb::Top250.new.movies
122
+ end
123
+
124
+ def sanitize_plot(the_plot)
125
+ the_plot = the_plot.imdb_strip_tags
126
+
127
+ the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
128
+ the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
129
+ the_plot = the_plot.gsub(/&nbsp;|&raquo;/i, "")
130
+ the_plot = the_plot.gsub(/see|more/i, "")
131
+ the_plot = the_plot.gsub(/\|/i, "")
132
+
133
+ the_plot = the_plot.strip.imdb_unescape_html
134
+ end
135
+
136
+ def sanitize_release_date(the_release_date)
137
+ the_release_date = the_release_date.gsub(/<a.*a>/,"")
138
+ the_release_date = the_release_date.gsub(/&nbsp;|&raquo;/i, "")
139
+ the_release_date = the_release_date.gsub(/see|more/i, "")
140
+
141
+ the_release_date = the_release_date.strip.imdb_unescape_html
142
+ end
143
+
144
+ end # Movie
145
+
146
+ end # Imdb
@@ -0,0 +1,13 @@
1
+ module Imdb
2
+
3
+ # Represents a Movie on IMDB.com
4
+ class Movie < ImdbBase
5
+
6
+ def trailers
7
+ document.search("a[@href*='/video/screenplay/']").map { |link| "http://akas.imdb.com" + link.get_attribute("href") } rescue []
8
+ end
9
+
10
+
11
+ end # Movie
12
+
13
+ end # Imdb
@@ -0,0 +1,41 @@
1
+ module Imdb
2
+
3
+ class MovieList
4
+ def movies
5
+ @movies ||= parse_movies
6
+ end
7
+
8
+ private
9
+ def parse_movies
10
+ document.search('a[@href^="/title/tt"]').reject do |element|
11
+ element.innerHTML.imdb_strip_tags.empty? ||
12
+ element.parent.innerHTML =~ /media from/i
13
+ end.map do |element|
14
+ id = element['href'][/\d+/]
15
+
16
+ data = element.parent.innerHTML.split("<br />")
17
+ if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
18
+ title = data[1]
19
+ else
20
+ title = data[0]
21
+ end
22
+
23
+ title = title.imdb_strip_tags.imdb_unescape_html
24
+ title.gsub!(/\s+\(\d\d\d\d\)$/, '')
25
+
26
+ alternative_titles = []
27
+
28
+ if title =~ /\saka\s/
29
+ titles = title.split(/\saka\s/)
30
+ title = titles.shift.strip.imdb_unescape_html
31
+ alternative_titles = titles.map { |t| t.strip.imdb_strip_tags.imdb_unescape_html }
32
+ end
33
+
34
+ [id, title, alternative_titles]
35
+ end.uniq.map do |values|
36
+ Imdb::Movie.new(*values)
37
+ end
38
+ end
39
+ end # MovieList
40
+
41
+ end # Imdb
@@ -0,0 +1,46 @@
1
+ module Imdb
2
+
3
+ # Search IMDB for a title
4
+ class Search < MovieList
5
+ attr_reader :query
6
+
7
+ # Initialize a new IMDB search with the specified query
8
+ #
9
+ # search = Imdb::Search.new("Star Trek")
10
+ #
11
+ # Imdb::Search is lazy loading, meaning that unless you access the +movies+
12
+ # attribute, no query is made to IMDB.com.
13
+ #
14
+ def initialize(query)
15
+ @query = query
16
+ end
17
+
18
+ # Returns an array of Imdb::Movie objects for easy search result yielded.
19
+ # If the +query+ was an exact match, a single element array will be returned.
20
+ def movies
21
+ @movies ||= (exact_match? ? parse_movie : parse_movies)
22
+ end
23
+
24
+ private
25
+ def document
26
+ @document ||= Hpricot(Imdb::Search.query(@query))
27
+ end
28
+
29
+ def self.query(query)
30
+ open("http://akas.imdb.com/find?q=#{CGI::escape(query)};s=tt")
31
+ end
32
+
33
+ def parse_movie
34
+ id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
35
+ title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
36
+
37
+ [Imdb::Movie.new(id, title)]
38
+ end
39
+
40
+ # Returns true if the search yielded only one result, an exact match
41
+ def exact_match?
42
+ !document.at("//table[@id='title-overview-widget-layout']").nil?
43
+ end
44
+
45
+ end # Search
46
+ end # Imdb