imdb_parser 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/Gemfile +3 -0
- data/History.txt +74 -0
- data/Manifest.txt +29 -0
- data/README.rdoc +108 -0
- data/Rakefile +35 -0
- data/bin/imdb +10 -0
- data/config/website.yml +2 -0
- data/imdb.gemspec +29 -0
- data/lib/imdb_parser/cli.rb +109 -0
- data/lib/imdb_parser/episode.rb +26 -0
- data/lib/imdb_parser/imdb_base.rb +146 -0
- data/lib/imdb_parser/movie.rb +13 -0
- data/lib/imdb_parser/movie_list.rb +41 -0
- data/lib/imdb_parser/search.rb +46 -0
- data/lib/imdb_parser/season.rb +45 -0
- data/lib/imdb_parser/serie.rb +24 -0
- data/lib/imdb_parser/string_extensions.rb +28 -0
- data/lib/imdb_parser/top_250.rb +10 -0
- data/lib/imdb_parser/version.rb +3 -0
- data/lib/imdb_parser.rb +17 -0
- data/script/console +11 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/spec/fixtures/search_kannethirey_thondrinal +14 -0
- data/spec/fixtures/search_killed_wife +14 -0
- data/spec/fixtures/search_star_trek +834 -0
- data/spec/fixtures/top_250 +1433 -0
- data/spec/fixtures/tt0036855 +1255 -0
- data/spec/fixtures/tt0083987 +1261 -0
- data/spec/fixtures/tt0095016 +1286 -0
- data/spec/fixtures/tt0110912 +1262 -0
- data/spec/fixtures/tt0111161 +1272 -0
- data/spec/fixtures/tt0117731 +1246 -0
- data/spec/fixtures/tt0166222 +1806 -0
- data/spec/fixtures/tt0242653 +1254 -0
- data/spec/fixtures/tt0330508 +1581 -0
- data/spec/fixtures/tt0468569 +1305 -0
- data/spec/fixtures/tt1401252 +1109 -0
- data/spec/imdb/cli_spec.rb +49 -0
- data/spec/imdb/movie_spec.rb +204 -0
- data/spec/imdb/search_spec.rb +78 -0
- data/spec/imdb/top_250_spec.rb +21 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +57 -0
- data/tasks/fixtures.rake +15 -0
- data/tasks/rspec.rake +21 -0
- metadata +183 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/History.txt
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
== 0.6.6 2011-09-14
|
2
|
+
|
3
|
+
-> No history was kept, so here's a short changelog since 2010-02-14 based on git:
|
4
|
+
|
5
|
+
ariejan - 2011-09-14 21:37:06 +0200 - Tested against latest IMDB site
|
6
|
+
ariejan - 2011-09-14 21:30:51 +0200 - Ignore .rvmrc
|
7
|
+
ariejan - 2011-09-14 12:27:16 -0700 - Merge pull request #21 from defeed/master
|
8
|
+
Arjom - 2011-09-14 18:48:39 +0300 - Added method to get countries
|
9
|
+
ariejan - 2011-09-05 22:49:27 -0700 - Merge pull request #20 from mguterl/use_bundler
|
10
|
+
mguterl - 2011-09-05 09:31:23 -0400 - replace jeweler with bundler
|
11
|
+
ariejan - 2011-06-16 03:32:45 -0700 - Merge pull request #18 from rbu/master
|
12
|
+
rbu - 2011-06-15 22:25:11 +0200 - increase version to 0.6.5.1 and update gemspec
|
13
|
+
rbu - 2011-06-15 22:16:01 +0200 - Add method to get the number of votes
|
14
|
+
rbu - 2011-06-15 22:06:07 +0200 - automatic fixtures update, and add a note about a flaky test
|
15
|
+
rbu - 2011-06-15 22:02:18 +0200 - Fix test, 'Die Hard' had some changes in imdb
|
16
|
+
rbu - 2011-06-15 22:01:24 +0200 - Fix test, pick another movie for 'without poster' case
|
17
|
+
rbu - 2011-06-15 21:49:25 +0200 - Fix test, Matrix Revolutions is not an exact match anymore
|
18
|
+
rbu - 2011-06-15 21:16:43 +0200 - Use akas. subdomain to avoid localized titles
|
19
|
+
tolosa - 2010-12-04 04:26:19 -0300 - Updated fixtures and sources
|
20
|
+
tolosa - 2010-12-04 04:25:50 -0300 - Fixed search result for exact match
|
21
|
+
tolosa - 2010-10-24 21:16:05 -0300 - Modified movie class to load data from new URL, in order to bypass the recent design changes in the IMDB website
|
22
|
+
tolosa - 2010-10-24 20:53:43 -0300 - Changed movie URLs to load in fixture data
|
23
|
+
tolosa - 2010-10-24 20:49:52 -0300 - Updated fixture data from new movie URLs
|
24
|
+
ghedamat - 2010-10-08 06:16:58 -0700 - changed h5 to h4 due to Imdb site layout change
|
25
|
+
ariejan - 2010-04-29 23:40:20 +0200 - Regenerated gemspec for version 0.6.5
|
26
|
+
ariejan - 2010-04-29 23:40:11 +0200 - Version bump to 0.6.5
|
27
|
+
rick - 2010-04-30 05:35:08 +0800 - Adding a means of returning cast member IMDB id's for further lookups.
|
28
|
+
kenpratt - 2010-04-30 05:34:55 +0800 - Improved poster image parsing (increased success rate on top 250 from ~81% to 100%).
|
29
|
+
hornairs - 2010-04-30 05:34:42 +0800 - Fixed parsing of plot and release date after IMDB added little arrows, all tests passing.
|
30
|
+
sandeep kumar - 2010-02-15 15:47:35 +0800 - adding method for release_date for imdb movie and testcase for the same
|
31
|
+
|
32
|
+
== 0.5.0 patch-1 2010-02-14
|
33
|
+
|
34
|
+
* Added methods for fetching release date [if available] from IMDB
|
35
|
+
* Added Testcase for the same as well.
|
36
|
+
|
37
|
+
== 0.5.0 2009-06-17
|
38
|
+
|
39
|
+
* Added Top 250 listing [mguterl]
|
40
|
+
* Made general improvements to data retrieval [mguterl]
|
41
|
+
|
42
|
+
== 0.4.2 2009-06-14
|
43
|
+
|
44
|
+
* Updated manifest to include all spec fixtures. [ariejan]
|
45
|
+
|
46
|
+
== 0.4.1 2009-06-14
|
47
|
+
|
48
|
+
* Added support for FakeWeb so specs run faster. [mguterl]
|
49
|
+
* Cache the search query i Imdb::Search.query. [mguterl]
|
50
|
+
* Added a convenience method Imdb::Search.search. [mguterl]
|
51
|
+
|
52
|
+
== 0.4.0 2009-06-14
|
53
|
+
|
54
|
+
* Updates to the console 'imdb' utility [ariejan]
|
55
|
+
* Show the IMDB ID
|
56
|
+
* Show the full IMDB URL
|
57
|
+
|
58
|
+
== 0.3.0 2009-06-07
|
59
|
+
|
60
|
+
* Fixed typo in CLI field name 'Cast by' [ariejan]
|
61
|
+
* Fixed retrieval of multiple directors. (#1) [ariejan]
|
62
|
+
|
63
|
+
== 0.2.0 2009-06-04
|
64
|
+
|
65
|
+
* Added console tool 'imdb' for searching and getting movie info. [ariejan]
|
66
|
+
* Fixed issue #2 [ariejan]
|
67
|
+
|
68
|
+
== 0.1.0 2009-06-03
|
69
|
+
|
70
|
+
* Added Imdb::Search that allows search IMDB for a specific movie. [ariejan]
|
71
|
+
|
72
|
+
== 0.0.1 2009-06-03
|
73
|
+
|
74
|
+
* First release of the IMDB gem. [ariejan]
|
data/Manifest.txt
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.rdoc
|
4
|
+
Rakefile
|
5
|
+
bin/imdb
|
6
|
+
lib/imdb.rb
|
7
|
+
lib/imdb/cli.rb
|
8
|
+
lib/imdb/movie.rb
|
9
|
+
lib/imdb/movie_list.rb
|
10
|
+
lib/imdb/search.rb
|
11
|
+
lib/imdb/string_extensions.rb
|
12
|
+
lib/imdb/top_250.rb
|
13
|
+
script/console
|
14
|
+
script/destroy
|
15
|
+
script/generate
|
16
|
+
spec/fixtures/search_matrix_revolutions
|
17
|
+
spec/fixtures/search_star_trek
|
18
|
+
spec/fixtures/top_250
|
19
|
+
spec/fixtures/tt0095016
|
20
|
+
spec/fixtures/tt0111161
|
21
|
+
spec/fixtures/tt0117731
|
22
|
+
spec/fixtures/tt0242653
|
23
|
+
spec/imdb/cli_spec.rb
|
24
|
+
spec/imdb/movie_spec.rb
|
25
|
+
spec/imdb/search_spec.rb
|
26
|
+
spec/imdb/top_250_spec.rb
|
27
|
+
spec/spec.opts
|
28
|
+
spec/spec_helper.rb
|
29
|
+
tasks/rspec.rake
|
data/README.rdoc
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
= imdb
|
2
|
+
|
3
|
+
Allows you to search and inspect movies and series from IMDB.com.
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
This packages allows you to easy access publicly available data from IMDB.
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
IMDB currently features the following:
|
12
|
+
|
13
|
+
* Querying details movie info
|
14
|
+
* Searching for movies
|
15
|
+
* Command-line utility included.
|
16
|
+
* Quering details serie info (season, episode)
|
17
|
+
|
18
|
+
== SYNOPSIS:
|
19
|
+
|
20
|
+
Movies:
|
21
|
+
|
22
|
+
i = Imdb::Movie.new("0095016")
|
23
|
+
|
24
|
+
i.title
|
25
|
+
#=> "Die Hard"
|
26
|
+
i.cast_members.first
|
27
|
+
#=> "Bruce Willis"
|
28
|
+
|
29
|
+
Serie:
|
30
|
+
|
31
|
+
s = Imdb::Serie.new("0773262")
|
32
|
+
season = s.seasons.first
|
33
|
+
e = season.episodes.last
|
34
|
+
|
35
|
+
e.title
|
36
|
+
#=> "Born Free"
|
37
|
+
e.synopsis
|
38
|
+
#=> "Dexter races against the clock..."
|
39
|
+
|
40
|
+
Searching:
|
41
|
+
|
42
|
+
i = Imdb::Search.new("Star Trek")
|
43
|
+
|
44
|
+
i.movies.size
|
45
|
+
#=> 97
|
46
|
+
|
47
|
+
Using the command line utility is quite easy:
|
48
|
+
|
49
|
+
$ imdb Star Trek
|
50
|
+
|
51
|
+
or to get movie info
|
52
|
+
|
53
|
+
$ imdb 0095016
|
54
|
+
|
55
|
+
== REQUIREMENTS:
|
56
|
+
|
57
|
+
All required gems are installed automagically through RubyGems.
|
58
|
+
|
59
|
+
* Hpricot 0.8.1
|
60
|
+
|
61
|
+
== INSTALL:
|
62
|
+
|
63
|
+
$ sudo gem install imdb -s http://gemcutter.org
|
64
|
+
|
65
|
+
== DOCUMENTATION:
|
66
|
+
|
67
|
+
|
68
|
+
== TESTING:
|
69
|
+
|
70
|
+
You'll need rspec and fakeweb installed to run the specs. Y
|
71
|
+
|
72
|
+
$ bundle install
|
73
|
+
$ rake spec
|
74
|
+
|
75
|
+
Although not recommended, you may run the specs against the live imdb.com
|
76
|
+
website. This will make a lot of calls to imdb.com, use it wisely.
|
77
|
+
|
78
|
+
$ LIVE_TEST=true rake spec
|
79
|
+
|
80
|
+
To update the packaged fixtures files with actual imdb.com samples, use the
|
81
|
+
fixtures:refresh rake task
|
82
|
+
|
83
|
+
$ rake fixtures:refresh
|
84
|
+
|
85
|
+
== LICENSE:
|
86
|
+
|
87
|
+
(The MIT License)
|
88
|
+
|
89
|
+
Copyright (c) 2009 Ariejan de Vroom
|
90
|
+
|
91
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
92
|
+
a copy of this software and associated documentation files (the
|
93
|
+
'Software'), to deal in the Software without restriction, including
|
94
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
95
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
96
|
+
permit persons to whom the Software is furnished to do so, subject to
|
97
|
+
the following conditions:
|
98
|
+
|
99
|
+
The above copyright notice and this permission notice shall be
|
100
|
+
included in all copies or substantial portions of the Software.
|
101
|
+
|
102
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
103
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
104
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
105
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
106
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
107
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
108
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
load File.expand_path(File.dirname(__FILE__) + "/tasks/fixtures.rake")
|
5
|
+
|
6
|
+
require 'spec/rake/spectask'
|
7
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
8
|
+
spec.libs << 'lib' << 'spec'
|
9
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
10
|
+
end
|
11
|
+
|
12
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
13
|
+
spec.libs << 'lib' << 'spec'
|
14
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
15
|
+
spec.rcov = true
|
16
|
+
end
|
17
|
+
|
18
|
+
task :default => :spec
|
19
|
+
|
20
|
+
require 'imdb/version'
|
21
|
+
require 'hanna/rdoctask'
|
22
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
23
|
+
rdoc.rdoc_dir = 'rdoc'
|
24
|
+
rdoc.title = "imdb #{Imdb::VERSION} documentation"
|
25
|
+
rdoc.rdoc_files.include('README*')
|
26
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
27
|
+
rdoc.options << '--webcvs=http://github.com/ariejan/imdb/tree/master/'
|
28
|
+
end
|
29
|
+
|
30
|
+
require 'gokdok'
|
31
|
+
Gokdok::Dokker.new do |gd|
|
32
|
+
gd.repo_url = "git@github.com:ariejan/imdb.git"
|
33
|
+
gd.doc_home = "rdoc"
|
34
|
+
gd.remote_path = "."
|
35
|
+
end
|
data/bin/imdb
ADDED
data/config/website.yml
ADDED
data/imdb.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "imdb/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "imdb_parser"
|
7
|
+
s.version = Imdb::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Matthieu Lamarque"]
|
10
|
+
s.email = ["lamarque.matthieu@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/mlamarque/imdb"
|
12
|
+
s.summary = %q{Access to Movie, Serie on Imdb.com}
|
13
|
+
s.description = %q{Easily use Ruby or the command line to find Movie, Serie information on IMDB.com.}
|
14
|
+
|
15
|
+
s.rubyforge_project = "imdb"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
s.add_dependency 'hpricot', '~> 0.8.4'
|
23
|
+
|
24
|
+
s.add_development_dependency 'rdoc'
|
25
|
+
s.add_development_dependency 'hanna'
|
26
|
+
s.add_development_dependency 'gokdok'
|
27
|
+
s.add_development_dependency 'rspec', '~> 1.3.2'
|
28
|
+
s.add_development_dependency 'fakeweb'
|
29
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Imdb
|
4
|
+
class CLI
|
5
|
+
|
6
|
+
# Run the imdb command
|
7
|
+
#
|
8
|
+
# Searching
|
9
|
+
#
|
10
|
+
# imdb Star Trek
|
11
|
+
#
|
12
|
+
# Get a movie, supply a 7 digit IMDB id or the IMDB URL
|
13
|
+
#
|
14
|
+
# imdb 0095016
|
15
|
+
# imdb http://akas.imdb.com/title/tt0796366/
|
16
|
+
#
|
17
|
+
def self.execute(stdout, arguments=[])
|
18
|
+
|
19
|
+
@stdout = stdout
|
20
|
+
|
21
|
+
@stdout.puts "IMDB Scraper #{Imdb::VERSION}"
|
22
|
+
|
23
|
+
options = {
|
24
|
+
}
|
25
|
+
mandatory_options = %w( )
|
26
|
+
|
27
|
+
parser = OptionParser.new do |opts|
|
28
|
+
opts.banner = <<-BANNER.gsub(/^ /,'')
|
29
|
+
|
30
|
+
Usage: #{File.basename($0)} Search Query
|
31
|
+
#{File.basename($0)} 0095016
|
32
|
+
|
33
|
+
BANNER
|
34
|
+
opts.separator ""
|
35
|
+
opts.on("-v", "--version",
|
36
|
+
"Show the current version.") { stdout.puts "IMDB #{Imdb::VERSION}"; exit }
|
37
|
+
opts.on("-h", "--help",
|
38
|
+
"Show this help message.") { stdout.puts opts; exit }
|
39
|
+
opts.parse!(arguments)
|
40
|
+
|
41
|
+
if mandatory_options && mandatory_options.find { |option| options[option.to_sym].nil? }
|
42
|
+
stdout.puts opts; exit
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
query = arguments.join(" ").strip
|
47
|
+
exit if query.blank?
|
48
|
+
|
49
|
+
movie, search = nil, nil
|
50
|
+
|
51
|
+
# If ID, fetch movie
|
52
|
+
if query.match(/(\d\d\d\d\d\d\d)/) || query.downcase.match(/^http:\/\/[www.]*imdb.com\/title\/tt(.+)\/$/)
|
53
|
+
fetch_movie($1)
|
54
|
+
else
|
55
|
+
search_movie(query)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.fetch_movie(imdb_id)
|
60
|
+
@stdout.puts
|
61
|
+
@stdout.puts " - fetching movie #{imdb_id}"
|
62
|
+
|
63
|
+
movie = Imdb::Movie.new(imdb_id)
|
64
|
+
|
65
|
+
display_movie_details(movie)
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.search_movie(query)
|
69
|
+
@stdout.puts
|
70
|
+
@stdout.puts " - searching for \"#{query}\""
|
71
|
+
|
72
|
+
search = Imdb::Search.new(query)
|
73
|
+
|
74
|
+
if search.movies.size == 1
|
75
|
+
display_movie_details(search.movies.first)
|
76
|
+
else
|
77
|
+
display_search_results(search.movies)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.display_movie_details(movie)
|
82
|
+
title = "#{movie.title} (#{movie.year})"
|
83
|
+
id = "ID #{movie.id}"
|
84
|
+
|
85
|
+
@stdout.puts
|
86
|
+
@stdout.puts "#{title}#{" " * (75 - 1 - title.length - id.length)}#{id} "
|
87
|
+
@stdout.puts "=" * 75
|
88
|
+
@stdout.puts "Rating: #{movie.rating}"
|
89
|
+
@stdout.puts "Duration: #{movie.length} minutes"
|
90
|
+
@stdout.puts "Directed by: #{movie.director.join(", ")}"
|
91
|
+
@stdout.puts "Cast: #{movie.cast_members[0..4].join(", ")}"
|
92
|
+
@stdout.puts "Genre: #{movie.genres.join(", ")}"
|
93
|
+
@stdout.puts "Plot: #{movie.plot}"
|
94
|
+
@stdout.puts "Poster URL: #{movie.poster}"
|
95
|
+
@stdout.puts "IMDB URL: #{movie.url}"
|
96
|
+
@stdout.puts "=" * 75
|
97
|
+
@stdout.puts
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.display_search_results(movies = [])
|
101
|
+
movies = movies[0..9] # limit to ten top hits
|
102
|
+
|
103
|
+
movies.each do |movie|
|
104
|
+
@stdout.puts " > #{movie.id} | #{movie.title}"
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
class Episode
|
4
|
+
|
5
|
+
attr_accessor :title, :synopsis, :number, :release_date
|
6
|
+
|
7
|
+
def initialize(number, node, season)
|
8
|
+
@number = number
|
9
|
+
@node = node
|
10
|
+
@season = season
|
11
|
+
end
|
12
|
+
|
13
|
+
def title
|
14
|
+
@node.search('table tr td h3 a').innerHTML.imdb_unescape_html rescue nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def synopsis
|
18
|
+
@node.search('table tr td/text()').text rescue nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def release_date
|
22
|
+
@node.search('table tr td span strong').innerHTML.imdb_unescape_html rescue nil
|
23
|
+
end
|
24
|
+
|
25
|
+
end #Episode
|
26
|
+
end # Imdb
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
# Represents a ImdbBase
|
4
|
+
class ImdbBase
|
5
|
+
attr_accessor :id, :url, :title, :also_known_as
|
6
|
+
|
7
|
+
def initialize(imdb_id, title = nil, also_known_as = [])
|
8
|
+
@id = imdb_id
|
9
|
+
@url = "http://akas.imdb.com/title/tt#{imdb_id}/combined"
|
10
|
+
@title = title.gsub(/"/, "") if title
|
11
|
+
@also_known_as = also_known_as
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns an array with cast members
|
15
|
+
def cast_members
|
16
|
+
document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
17
|
+
end
|
18
|
+
|
19
|
+
def cast_member_ids
|
20
|
+
document.search("table.cast td.nm a").map {|l| l['href'].sub(%r{^/name/(.*)/}, '\1') }
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the name of the director
|
24
|
+
def director
|
25
|
+
document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns an array of genres (as strings)
|
29
|
+
def genres
|
30
|
+
document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns an array of languages as strings.
|
34
|
+
def languages
|
35
|
+
document.search("h5[text()='Language:'] ~ a[@href*=/language/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns an array of countries as strings.
|
39
|
+
def countries
|
40
|
+
document.search("h5[text()='Country:'] ~ a[@href*=/country/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns the duration of the movie in minutes as an integer.
|
44
|
+
def length
|
45
|
+
document.search("//h5[text()='Runtime:']/..").innerHTML[/\d+ min/].to_i rescue nil
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns a string containing the plot.
|
49
|
+
def plot
|
50
|
+
sanitize_plot(document.search("h5[text()='Plot:'] ~ div").first.innerHTML) rescue nil
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns a string containing the URL to the movie poster.
|
54
|
+
def poster
|
55
|
+
src = document.at("a[@name='poster'] img")['src'] rescue nil
|
56
|
+
case src
|
57
|
+
when /^(http:.+@@)/
|
58
|
+
$1 + '.jpg'
|
59
|
+
when /^(http:.+?)\.[^\/]+$/
|
60
|
+
$1 + '.jpg'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns a float containing the average user rating
|
65
|
+
def rating
|
66
|
+
document.at(".starbar-meta b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
|
67
|
+
end
|
68
|
+
|
69
|
+
# Returns an int containing the number of user ratings
|
70
|
+
def votes
|
71
|
+
document.at("#tn15rating .tn15more").innerHTML.strip.imdb_unescape_html.gsub(/[^\d+]/, "").to_i rescue nil
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns a string containing the tagline
|
75
|
+
def tagline
|
76
|
+
document.search("h5[text()='Tagline:'] ~ div").first.innerHTML.gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
77
|
+
end
|
78
|
+
|
79
|
+
# Returns a string containing the mpaa rating and reason for rating
|
80
|
+
def mpaa_rating
|
81
|
+
document.search("h5[text()='MPAA:'] ~ div").first.innerHTML.strip.imdb_unescape_html rescue nil
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns a string containing the title
|
85
|
+
def title(force_refresh = false)
|
86
|
+
if @title && !force_refresh
|
87
|
+
@title
|
88
|
+
else
|
89
|
+
@title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns an integer containing the year (CCYY) the movie was released in.
|
94
|
+
def year
|
95
|
+
document.search('a[@href^="/year/"]').innerHTML.to_i
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns release date for the movie.
|
99
|
+
def release_date
|
100
|
+
sanitize_release_date(document.search('h5[text()*=Release Date]').first.next_sibling.innerHTML.to_s) rescue nil
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# Returns a new Hpricot document for parsing.
|
106
|
+
def document
|
107
|
+
@document ||= Hpricot(Imdb::Movie.find_by_id(@id))
|
108
|
+
end
|
109
|
+
|
110
|
+
# Use HTTParty to fetch the raw HTML for this movie.
|
111
|
+
def self.find_by_id(imdb_id)
|
112
|
+
open("http://akas.imdb.com/title/tt#{imdb_id}/combined")
|
113
|
+
end
|
114
|
+
|
115
|
+
# Convenience method for search
|
116
|
+
def self.search(query)
|
117
|
+
Imdb::Search.new(query).movies
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.top_250
|
121
|
+
Imdb::Top250.new.movies
|
122
|
+
end
|
123
|
+
|
124
|
+
def sanitize_plot(the_plot)
|
125
|
+
the_plot = the_plot.imdb_strip_tags
|
126
|
+
|
127
|
+
the_plot = the_plot.gsub(/add\ssummary|full\ssummary/i, "")
|
128
|
+
the_plot = the_plot.gsub(/add\ssynopsis|full\ssynopsis/i, "")
|
129
|
+
the_plot = the_plot.gsub(/ |»/i, "")
|
130
|
+
the_plot = the_plot.gsub(/see|more/i, "")
|
131
|
+
the_plot = the_plot.gsub(/\|/i, "")
|
132
|
+
|
133
|
+
the_plot = the_plot.strip.imdb_unescape_html
|
134
|
+
end
|
135
|
+
|
136
|
+
def sanitize_release_date(the_release_date)
|
137
|
+
the_release_date = the_release_date.gsub(/<a.*a>/,"")
|
138
|
+
the_release_date = the_release_date.gsub(/ |»/i, "")
|
139
|
+
the_release_date = the_release_date.gsub(/see|more/i, "")
|
140
|
+
|
141
|
+
the_release_date = the_release_date.strip.imdb_unescape_html
|
142
|
+
end
|
143
|
+
|
144
|
+
end # Movie
|
145
|
+
|
146
|
+
end # Imdb
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
# Represents a Movie on IMDB.com
|
4
|
+
class Movie < ImdbBase
|
5
|
+
|
6
|
+
def trailers
|
7
|
+
document.search("a[@href*='/video/screenplay/']").map { |link| "http://akas.imdb.com" + link.get_attribute("href") } rescue []
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
end # Movie
|
12
|
+
|
13
|
+
end # Imdb
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
class MovieList
|
4
|
+
def movies
|
5
|
+
@movies ||= parse_movies
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
def parse_movies
|
10
|
+
document.search('a[@href^="/title/tt"]').reject do |element|
|
11
|
+
element.innerHTML.imdb_strip_tags.empty? ||
|
12
|
+
element.parent.innerHTML =~ /media from/i
|
13
|
+
end.map do |element|
|
14
|
+
id = element['href'][/\d+/]
|
15
|
+
|
16
|
+
data = element.parent.innerHTML.split("<br />")
|
17
|
+
if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
|
18
|
+
title = data[1]
|
19
|
+
else
|
20
|
+
title = data[0]
|
21
|
+
end
|
22
|
+
|
23
|
+
title = title.imdb_strip_tags.imdb_unescape_html
|
24
|
+
title.gsub!(/\s+\(\d\d\d\d\)$/, '')
|
25
|
+
|
26
|
+
alternative_titles = []
|
27
|
+
|
28
|
+
if title =~ /\saka\s/
|
29
|
+
titles = title.split(/\saka\s/)
|
30
|
+
title = titles.shift.strip.imdb_unescape_html
|
31
|
+
alternative_titles = titles.map { |t| t.strip.imdb_strip_tags.imdb_unescape_html }
|
32
|
+
end
|
33
|
+
|
34
|
+
[id, title, alternative_titles]
|
35
|
+
end.uniq.map do |values|
|
36
|
+
Imdb::Movie.new(*values)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end # MovieList
|
40
|
+
|
41
|
+
end # Imdb
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
# Search IMDB for a title
|
4
|
+
class Search < MovieList
|
5
|
+
attr_reader :query
|
6
|
+
|
7
|
+
# Initialize a new IMDB search with the specified query
|
8
|
+
#
|
9
|
+
# search = Imdb::Search.new("Star Trek")
|
10
|
+
#
|
11
|
+
# Imdb::Search is lazy loading, meaning that unless you access the +movies+
|
12
|
+
# attribute, no query is made to IMDB.com.
|
13
|
+
#
|
14
|
+
def initialize(query)
|
15
|
+
@query = query
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns an array of Imdb::Movie objects for easy search result yielded.
|
19
|
+
# If the +query+ was an exact match, a single element array will be returned.
|
20
|
+
def movies
|
21
|
+
@movies ||= (exact_match? ? parse_movie : parse_movies)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
def document
|
26
|
+
@document ||= Hpricot(Imdb::Search.query(@query))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.query(query)
|
30
|
+
open("http://akas.imdb.com/find?q=#{CGI::escape(query)};s=tt")
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_movie
|
34
|
+
id = document.at("head/link[@rel='canonical']")['href'][/\d+/]
|
35
|
+
title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
|
36
|
+
|
37
|
+
[Imdb::Movie.new(id, title)]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns true if the search yielded only one result, an exact match
|
41
|
+
def exact_match?
|
42
|
+
!document.at("//table[@id='title-overview-widget-layout']").nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
end # Search
|
46
|
+
end # Imdb
|