imdb-scan 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +22 -0
- data/VERSION +1 -0
- data/features/movie.feature +38 -0
- data/features/person.feature +11 -0
- data/features/search.feature +12 -0
- data/features/step_definitions/movie_steps.rb +94 -0
- data/features/step_definitions/person_steps.rb +48 -0
- data/features/step_definitions/search_steps.rb +33 -0
- data/imdb-scan.gemspec +48 -0
- data/lib/configuration.rb +22 -0
- data/lib/imdb.rb +22 -0
- data/lib/imdb/cast.rb +38 -0
- data/lib/imdb/movie.rb +165 -0
- data/lib/imdb/person.rb +128 -0
- data/lib/imdb/search.rb +82 -0
- data/lib/imdb/skeleton.rb +83 -0
- metadata +147 -0
data/lib/imdb/movie.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
module IMDB
|
2
|
+
# Get movie information with IMDB movie id.
|
3
|
+
# @example Get Yahsi Bati movie title and cast listing [http://www.imdb.com/title/tt1567448/]
|
4
|
+
# m = IMDB::Movie.new('1567448')
|
5
|
+
# puts m.title
|
6
|
+
class Movie < IMDB::Skeleton
|
7
|
+
attr_accessor :link, :imdb_id
|
8
|
+
|
9
|
+
def initialize(id_of)
|
10
|
+
# !!!DON'T FORGET DEFINE NEW METHODS IN SUPER!!!
|
11
|
+
super("Movie", { :imdb_id => String,
|
12
|
+
:poster => String,
|
13
|
+
:title => String,
|
14
|
+
:release_date => String,
|
15
|
+
:cast => Array,
|
16
|
+
:photos => Array,
|
17
|
+
:director => String,
|
18
|
+
:director_person => Person,
|
19
|
+
:genres => Array,
|
20
|
+
:rating => Float,
|
21
|
+
:movielength => Integer,
|
22
|
+
:short_description => String,
|
23
|
+
:writers => Array }, [:imdb_id])
|
24
|
+
|
25
|
+
@imdb_id = id_of
|
26
|
+
|
27
|
+
@link = "http://www.imdb.com/title/tt#{@imdb_id}"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get movie poster address
|
31
|
+
# @return [String]
|
32
|
+
def poster
|
33
|
+
src = doc.at("#img_primary img")["src"] rescue nil
|
34
|
+
unless src.nil?
|
35
|
+
if src.match(/\._V1/)
|
36
|
+
return src.match(/(.*)\._V1.*(.jpg)/)[1, 2].join
|
37
|
+
else
|
38
|
+
return src
|
39
|
+
end
|
40
|
+
end
|
41
|
+
src
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get movie title
|
45
|
+
# @return [String]
|
46
|
+
def title
|
47
|
+
doc.at("//head/meta[@name='title']")["content"].split(/\(\d+\)/)[0].strip! ||
|
48
|
+
doc.at("h1.header").children.first.text.strip
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
# Get movie cast listing
|
53
|
+
# @return [Cast[]]
|
54
|
+
def cast
|
55
|
+
doc.search("table.cast tr").map do |link|
|
56
|
+
#picture = link.children[0].search("img")[0]["src"] rescue nil
|
57
|
+
#name = link.children[1].content.strip rescue nil
|
58
|
+
id = link.children[1].search('a[@href^="/name/nm"]').first["href"].match(/\/name\/nm([0-9]+)/)[1] rescue nil
|
59
|
+
char = link.children[3].content.strip rescue nil
|
60
|
+
unless id.nil?
|
61
|
+
person = IMDB::Person.new(id)
|
62
|
+
IMDB::Cast.new(self, person, char)
|
63
|
+
end
|
64
|
+
end.compact
|
65
|
+
end
|
66
|
+
|
67
|
+
# Get movie photos
|
68
|
+
# @return [Array]
|
69
|
+
def photos
|
70
|
+
begin
|
71
|
+
doc.search('#main .thumb_list img').map { |i| i["src"] }
|
72
|
+
rescue
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Get release date
|
78
|
+
# @return [String]
|
79
|
+
def release_date
|
80
|
+
if (node = doc.xpath("//h4[contains(., 'Release Date')]/..")).length > 0
|
81
|
+
date = node.search("time").first["datetime"]
|
82
|
+
if date.match /^\d{4}$/
|
83
|
+
"#{date}-01-01"
|
84
|
+
else
|
85
|
+
Date.parse(date).to_s
|
86
|
+
end
|
87
|
+
else
|
88
|
+
year = doc.at("h1.header .nobr").text[/\d{4}/]
|
89
|
+
"#{year}-01-01"
|
90
|
+
end
|
91
|
+
rescue
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
|
95
|
+
# Get Director
|
96
|
+
# @return [String]
|
97
|
+
def director
|
98
|
+
self.director_person.name rescue nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Get Director Person class
|
102
|
+
# @return [Person]
|
103
|
+
def director_person
|
104
|
+
begin
|
105
|
+
link=doc.xpath("//h4[contains(., 'Director')]/..").at('a[@href^="/name/nm"]')
|
106
|
+
profile = link['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
|
107
|
+
IMDB::Person.new(profile) unless profile.nil?
|
108
|
+
rescue
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Genre List
|
114
|
+
# @return [Array]
|
115
|
+
def genres
|
116
|
+
doc.xpath("//h4[contains(., 'Genre')]/..").search("a").map { |g|
|
117
|
+
g.content unless g.content =~ /See more/
|
118
|
+
}.compact
|
119
|
+
rescue
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
123
|
+
# Writer List
|
124
|
+
# @return [Float]
|
125
|
+
def rating
|
126
|
+
@rating ||= doc.search(".star-box-giga-star").text.strip.to_f
|
127
|
+
rescue
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
|
131
|
+
#Get the movielength of the movie in minutes
|
132
|
+
# @return [Integer]
|
133
|
+
def movielength
|
134
|
+
doc.at("//h4[text()='Runtime:']/..").inner_html[/\d+ min/].to_i rescue nil
|
135
|
+
end
|
136
|
+
|
137
|
+
# Writer List
|
138
|
+
# @return [Array]
|
139
|
+
def writers
|
140
|
+
doc.xpath("//a[@name='writers']/../../../..").search('a[@href^="/name/nm"]').map { |w|
|
141
|
+
profile = w['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
|
142
|
+
IMDB::Person.new(profile) unless profile.nil?
|
143
|
+
}
|
144
|
+
end
|
145
|
+
|
146
|
+
# @return [String]
|
147
|
+
def short_description
|
148
|
+
doc.at("#overview-top p[itemprop=description]").text.strip
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def doc
|
154
|
+
if caller[0] =~ /`([^']*)'/ and ($1 == "cast" or $1 == "writers")
|
155
|
+
@doc_full ||= Nokogiri::HTML(open("#{@link}/fullcredits"))
|
156
|
+
elsif caller[0] =~ /`([^']*)'/ and ($1 == "photos")
|
157
|
+
@doc_photo ||= Nokogiri::HTML(open("#{@link}/mediaindex"))
|
158
|
+
else
|
159
|
+
@doc ||= Nokogiri::HTML(open("#{@link}"))
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end # Movie
|
164
|
+
end # IMDB
|
165
|
+
|
data/lib/imdb/person.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
module IMDB
|
2
|
+
# Get Person information with IMDB person id.
|
3
|
+
# @example Get Christian Bale information [http://www.imdb.com/name/nm0000288/]
|
4
|
+
# m = IMDB::Person.new('0000288')
|
5
|
+
# puts m.name
|
6
|
+
# puts m.real_name
|
7
|
+
class Person < IMDB::Skeleton
|
8
|
+
attr_accessor :id
|
9
|
+
|
10
|
+
def initialize(imdb_id)
|
11
|
+
super("Person", { :id => String,
|
12
|
+
:name => String,
|
13
|
+
:real_name => String,
|
14
|
+
:birthdate => Date,
|
15
|
+
:deathdate => Date,
|
16
|
+
:nationality => String,
|
17
|
+
:height => String,
|
18
|
+
:biography => String,
|
19
|
+
:photo => String,
|
20
|
+
:profile_path => String,
|
21
|
+
:filmography => Hash,
|
22
|
+
:main_document => Nokogiri,
|
23
|
+
:bio_document => Nokogiri,
|
24
|
+
:photo_document => Nokogiri,
|
25
|
+
:photo_document_url => String }, [:id])
|
26
|
+
@id = imdb_id
|
27
|
+
end
|
28
|
+
|
29
|
+
#Get the profile path
|
30
|
+
#@return [String]
|
31
|
+
def profile_path
|
32
|
+
"/name/nm#{@id}"
|
33
|
+
end
|
34
|
+
|
35
|
+
#Get the name of the person
|
36
|
+
#@return [String]
|
37
|
+
def name
|
38
|
+
bio_document.at("a[@class='main']").inner_text rescue nil
|
39
|
+
end
|
40
|
+
|
41
|
+
#Get The Real Born name of the Person
|
42
|
+
#@return [String]
|
43
|
+
def real_name
|
44
|
+
bio_document.at("h5[text()*='Birth Name']").next.inner_text.strip rescue nil
|
45
|
+
end
|
46
|
+
|
47
|
+
#Get The Birth Date
|
48
|
+
#@return [Date]
|
49
|
+
def birthdate
|
50
|
+
date_month = bio_document.at("h5[text()*='Date of Birth']").next_element.inner_text.strip rescue ""
|
51
|
+
year = bio_document.at("a[@href*='birth_year']").inner_text.strip rescue ""
|
52
|
+
Date.parse("#{date_month} #{year}") rescue nil
|
53
|
+
end
|
54
|
+
|
55
|
+
#Get The death date else nil
|
56
|
+
#@return [Date]
|
57
|
+
def deathdate
|
58
|
+
date_month = bio_document.at("h5[text()*='Date of Death']").next_element.inner_text.strip rescue ""
|
59
|
+
year = bio_document.at("a[@href*='death_date']").inner_text.strip rescue ""
|
60
|
+
Date.parse("#{date_month} #{year}") rescue nil
|
61
|
+
end
|
62
|
+
|
63
|
+
#Get the Nationality
|
64
|
+
#@return [String]
|
65
|
+
def nationality
|
66
|
+
bio_document.at("a[@href*='birth_place']").inner_text.strip rescue nil
|
67
|
+
end
|
68
|
+
|
69
|
+
#Get the height
|
70
|
+
#@return [String]
|
71
|
+
def height
|
72
|
+
bio_document.at("h5[text()*='Height']").next.inner_text.match(/\((.+)\)/)[1] rescue nil
|
73
|
+
end
|
74
|
+
|
75
|
+
#Get The Biography
|
76
|
+
#@return [String]
|
77
|
+
def biography
|
78
|
+
bio_document.at("h5[text()*='Biography']").next_element.inner_text rescue nil
|
79
|
+
end
|
80
|
+
|
81
|
+
#Return the principal Photo
|
82
|
+
#@return [String]
|
83
|
+
def photo
|
84
|
+
photo_document.at("img#primary-img").get_attribute('src') if photo_document rescue nil
|
85
|
+
end
|
86
|
+
|
87
|
+
#Return the Filmography
|
88
|
+
#for the moment I can't make subdivision of this, then i take all in an array
|
89
|
+
#@return [Movie]
|
90
|
+
def filmography
|
91
|
+
#@return [Hash]
|
92
|
+
# writer: [Movie]
|
93
|
+
# actor: [Movie]
|
94
|
+
# director: [Movie]
|
95
|
+
# composer: [Movie]
|
96
|
+
#as_writer = main_document.at("#filmo-head-Writer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
97
|
+
#as_actor = main_document.at("#filmo-head-Actor").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
98
|
+
#as_director = main_document.at("#filmo-head-Director").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
99
|
+
#as_composer = main_document.at("#filmo-head-Composer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
100
|
+
#{ writer: as_writer.map { |m| Movie.new(m) }, actor: as_actor.map { |m| Movie.new(m) }, director: as_director.map { |m| Movie.new(m) }, composer: as_composer.map { |m| Movie.new(m) } }
|
101
|
+
films=main_document.css(".filmo-row b a").map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
102
|
+
films.map { |f| Movie.new(f.to_i) }
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def main_document
|
107
|
+
#@main_document ||= Nokogiri open("http://www.imdb.com#{profile_path}")
|
108
|
+
@main_document ||= Nokogiri::HTML(open("http://www.imdb.com#{profile_path}"))
|
109
|
+
end
|
110
|
+
|
111
|
+
def bio_document
|
112
|
+
@bio_document ||= Nokogiri open("http://www.imdb.com#{profile_path}/bio")
|
113
|
+
end
|
114
|
+
|
115
|
+
def photo_document
|
116
|
+
@photo_document ||= if photo_document_url then
|
117
|
+
Nokogiri open("http://www.imdb.com" + photo_document_url)
|
118
|
+
else
|
119
|
+
nil
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def photo_document_url
|
124
|
+
bio_document.at(".photo a[@name=headshot]").get_attribute('href') rescue nil
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
data/lib/imdb/search.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
module IMDB
|
2
|
+
class Search
|
3
|
+
def movie(keyword)
|
4
|
+
doc = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=#{CGI.escape(keyword)}"))
|
5
|
+
@ret_val = []
|
6
|
+
if doc.at("h1.header") # we're already being redirected to movie's page
|
7
|
+
single_result(doc)
|
8
|
+
else
|
9
|
+
result_list(doc)
|
10
|
+
end
|
11
|
+
@ret_val
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_hash
|
15
|
+
i = 0
|
16
|
+
tmp_hash = { }
|
17
|
+
@ret_val.each { |r|
|
18
|
+
tmp_hash[i] = r.to_hash
|
19
|
+
i = i + 1
|
20
|
+
}
|
21
|
+
tmp_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json
|
25
|
+
to_hash.to_json
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def single_result(doc)
|
30
|
+
title = doc.at("h1.header")
|
31
|
+
link = doc.at("link[rel=canonical]")["href"]
|
32
|
+
title = title.text.strip.gsub(/\s+/, " ")
|
33
|
+
@ret_val << IMDB::Result.new(link[/\d+/], title, link)
|
34
|
+
end
|
35
|
+
|
36
|
+
def result_list(doc)
|
37
|
+
@ret_val = doc.search('a[@href^="/title/tt"]').reduce([]) do |ret_val, node|
|
38
|
+
unless node.content.blank?
|
39
|
+
link = "http://www.imdb.com#{node['href']}"
|
40
|
+
id = node["href"][/\d+/]
|
41
|
+
ret_val << IMDB::Result.new(id, node.content, link)
|
42
|
+
end
|
43
|
+
ret_val
|
44
|
+
end
|
45
|
+
|
46
|
+
h = {}
|
47
|
+
@ret_val.each {|e| h[e.imdb_id]=e}
|
48
|
+
|
49
|
+
@ret_val=h.values
|
50
|
+
end
|
51
|
+
end # Search
|
52
|
+
|
53
|
+
class Result < IMDB::Skeleton
|
54
|
+
def initialize(imdb_id, title, link)
|
55
|
+
super("Result", {
|
56
|
+
:title => String,
|
57
|
+
:link => String,
|
58
|
+
:imdb_id => String }, [:imdb_id])
|
59
|
+
@title = title
|
60
|
+
@link = link
|
61
|
+
@imdb_id = imdb_id
|
62
|
+
end
|
63
|
+
|
64
|
+
def title
|
65
|
+
@title
|
66
|
+
end
|
67
|
+
|
68
|
+
def link
|
69
|
+
@link
|
70
|
+
end
|
71
|
+
|
72
|
+
def imdb_id
|
73
|
+
@imdb_id
|
74
|
+
end
|
75
|
+
|
76
|
+
def movie
|
77
|
+
Movie.new(@imdb_id)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module IMDB
|
2
|
+
# IMDB generic interface.
|
3
|
+
class Skeleton
|
4
|
+
attr_accessor :model, :method_names
|
5
|
+
|
6
|
+
def initialize(model_name = "", method_names = {}, keys = [])
|
7
|
+
if IMDB::Configuration.caching
|
8
|
+
@model = Class.new do
|
9
|
+
include MongoMapper::Document
|
10
|
+
set_collection_name model_name
|
11
|
+
method_names.each { |m, t|
|
12
|
+
key m, t
|
13
|
+
}
|
14
|
+
end
|
15
|
+
class_eval do
|
16
|
+
method_names.each_key { |meth|
|
17
|
+
unless keys.include?(meth)
|
18
|
+
old_meth = "old_#{meth}".to_sym
|
19
|
+
alias_method old_meth, meth.to_sym
|
20
|
+
define_method meth do
|
21
|
+
k = keys.to_imdb_hash { |k| k; self.send(k) }
|
22
|
+
|
23
|
+
@db_query = self.model.first(k)
|
24
|
+
|
25
|
+
if @db_query.nil?
|
26
|
+
@db_query = self.model.new(keys.to_imdb_hash { |k| k; self.send(k)})
|
27
|
+
@db_query.save
|
28
|
+
end
|
29
|
+
|
30
|
+
if @db_query[meth].nil? or (@db_query[meth].length.zero? if @db_query[meth].kind_of?(Array))
|
31
|
+
a = send(old_meth)
|
32
|
+
if a.kind_of?(Array)
|
33
|
+
a.compact!
|
34
|
+
a.map! { |c|
|
35
|
+
if c.kind_of?(String)
|
36
|
+
c
|
37
|
+
else
|
38
|
+
c.to_hash
|
39
|
+
end
|
40
|
+
}
|
41
|
+
@db_query[meth] = a
|
42
|
+
else
|
43
|
+
@db_query[meth] = a
|
44
|
+
end
|
45
|
+
@db_query.save
|
46
|
+
end
|
47
|
+
@db_query[meth]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
}
|
51
|
+
end
|
52
|
+
end
|
53
|
+
@method_names = method_names
|
54
|
+
end
|
55
|
+
|
56
|
+
# Serialize method's output to json
|
57
|
+
def to_json(*a)
|
58
|
+
tmp_hash = to_hash
|
59
|
+
|
60
|
+
tmp_hash.to_json(*a)
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_hash(*a)
|
64
|
+
tmp_hash = {}
|
65
|
+
@method_names.each_key { |x|
|
66
|
+
evaled = self.send x
|
67
|
+
if evaled.kind_of?(Array)
|
68
|
+
tmp_hash[x] = evaled.collect! {|e|
|
69
|
+
e
|
70
|
+
}
|
71
|
+
elsif evaled.kind_of?(String)
|
72
|
+
tmp_hash[x] = evaled
|
73
|
+
end
|
74
|
+
}
|
75
|
+
tmp_hash
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.json_create(o)
|
79
|
+
new(*o['data'])
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|