imdb-scan 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +22 -0
- data/VERSION +1 -0
- data/features/movie.feature +38 -0
- data/features/person.feature +11 -0
- data/features/search.feature +12 -0
- data/features/step_definitions/movie_steps.rb +94 -0
- data/features/step_definitions/person_steps.rb +48 -0
- data/features/step_definitions/search_steps.rb +33 -0
- data/imdb-scan.gemspec +48 -0
- data/lib/configuration.rb +22 -0
- data/lib/imdb.rb +22 -0
- data/lib/imdb/cast.rb +38 -0
- data/lib/imdb/movie.rb +165 -0
- data/lib/imdb/person.rb +128 -0
- data/lib/imdb/search.rb +82 -0
- data/lib/imdb/skeleton.rb +83 -0
- metadata +147 -0
data/lib/imdb/movie.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
module IMDB
|
2
|
+
# Get movie information with IMDB movie id.
|
3
|
+
# @example Get Yahsi Bati movie title and cast listing [http://www.imdb.com/title/tt1567448/]
|
4
|
+
# m = IMDB::Movie.new('1567448')
|
5
|
+
# puts m.title
|
6
|
+
class Movie < IMDB::Skeleton
|
7
|
+
attr_accessor :link, :imdb_id
|
8
|
+
|
9
|
+
def initialize(id_of)
|
10
|
+
# !!!DON'T FORGET DEFINE NEW METHODS IN SUPER!!!
|
11
|
+
super("Movie", { :imdb_id => String,
|
12
|
+
:poster => String,
|
13
|
+
:title => String,
|
14
|
+
:release_date => String,
|
15
|
+
:cast => Array,
|
16
|
+
:photos => Array,
|
17
|
+
:director => String,
|
18
|
+
:director_person => Person,
|
19
|
+
:genres => Array,
|
20
|
+
:rating => Float,
|
21
|
+
:movielength => Integer,
|
22
|
+
:short_description => String,
|
23
|
+
:writers => Array }, [:imdb_id])
|
24
|
+
|
25
|
+
@imdb_id = id_of
|
26
|
+
|
27
|
+
@link = "http://www.imdb.com/title/tt#{@imdb_id}"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get movie poster address
|
31
|
+
# @return [String]
|
32
|
+
def poster
|
33
|
+
src = doc.at("#img_primary img")["src"] rescue nil
|
34
|
+
unless src.nil?
|
35
|
+
if src.match(/\._V1/)
|
36
|
+
return src.match(/(.*)\._V1.*(.jpg)/)[1, 2].join
|
37
|
+
else
|
38
|
+
return src
|
39
|
+
end
|
40
|
+
end
|
41
|
+
src
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get movie title
|
45
|
+
# @return [String]
|
46
|
+
def title
|
47
|
+
doc.at("//head/meta[@name='title']")["content"].split(/\(\d+\)/)[0].strip! ||
|
48
|
+
doc.at("h1.header").children.first.text.strip
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
# Get movie cast listing
|
53
|
+
# @return [Cast[]]
|
54
|
+
def cast
|
55
|
+
doc.search("table.cast tr").map do |link|
|
56
|
+
#picture = link.children[0].search("img")[0]["src"] rescue nil
|
57
|
+
#name = link.children[1].content.strip rescue nil
|
58
|
+
id = link.children[1].search('a[@href^="/name/nm"]').first["href"].match(/\/name\/nm([0-9]+)/)[1] rescue nil
|
59
|
+
char = link.children[3].content.strip rescue nil
|
60
|
+
unless id.nil?
|
61
|
+
person = IMDB::Person.new(id)
|
62
|
+
IMDB::Cast.new(self, person, char)
|
63
|
+
end
|
64
|
+
end.compact
|
65
|
+
end
|
66
|
+
|
67
|
+
# Get movie photos
|
68
|
+
# @return [Array]
|
69
|
+
def photos
|
70
|
+
begin
|
71
|
+
doc.search('#main .thumb_list img').map { |i| i["src"] }
|
72
|
+
rescue
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Get release date
|
78
|
+
# @return [String]
|
79
|
+
def release_date
|
80
|
+
if (node = doc.xpath("//h4[contains(., 'Release Date')]/..")).length > 0
|
81
|
+
date = node.search("time").first["datetime"]
|
82
|
+
if date.match /^\d{4}$/
|
83
|
+
"#{date}-01-01"
|
84
|
+
else
|
85
|
+
Date.parse(date).to_s
|
86
|
+
end
|
87
|
+
else
|
88
|
+
year = doc.at("h1.header .nobr").text[/\d{4}/]
|
89
|
+
"#{year}-01-01"
|
90
|
+
end
|
91
|
+
rescue
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
|
95
|
+
# Get Director
|
96
|
+
# @return [String]
|
97
|
+
def director
|
98
|
+
self.director_person.name rescue nil
|
99
|
+
end
|
100
|
+
|
101
|
+
# Get Director Person class
|
102
|
+
# @return [Person]
|
103
|
+
def director_person
|
104
|
+
begin
|
105
|
+
link=doc.xpath("//h4[contains(., 'Director')]/..").at('a[@href^="/name/nm"]')
|
106
|
+
profile = link['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
|
107
|
+
IMDB::Person.new(profile) unless profile.nil?
|
108
|
+
rescue
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Genre List
|
114
|
+
# @return [Array]
|
115
|
+
def genres
|
116
|
+
doc.xpath("//h4[contains(., 'Genre')]/..").search("a").map { |g|
|
117
|
+
g.content unless g.content =~ /See more/
|
118
|
+
}.compact
|
119
|
+
rescue
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
123
|
+
# Writer List
|
124
|
+
# @return [Float]
|
125
|
+
def rating
|
126
|
+
@rating ||= doc.search(".star-box-giga-star").text.strip.to_f
|
127
|
+
rescue
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
|
131
|
+
#Get the movielength of the movie in minutes
|
132
|
+
# @return [Integer]
|
133
|
+
def movielength
|
134
|
+
doc.at("//h4[text()='Runtime:']/..").inner_html[/\d+ min/].to_i rescue nil
|
135
|
+
end
|
136
|
+
|
137
|
+
# Writer List
|
138
|
+
# @return [Array]
|
139
|
+
def writers
|
140
|
+
doc.xpath("//a[@name='writers']/../../../..").search('a[@href^="/name/nm"]').map { |w|
|
141
|
+
profile = w['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
|
142
|
+
IMDB::Person.new(profile) unless profile.nil?
|
143
|
+
}
|
144
|
+
end
|
145
|
+
|
146
|
+
# @return [String]
|
147
|
+
def short_description
|
148
|
+
doc.at("#overview-top p[itemprop=description]").text.strip
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def doc
|
154
|
+
if caller[0] =~ /`([^']*)'/ and ($1 == "cast" or $1 == "writers")
|
155
|
+
@doc_full ||= Nokogiri::HTML(open("#{@link}/fullcredits"))
|
156
|
+
elsif caller[0] =~ /`([^']*)'/ and ($1 == "photos")
|
157
|
+
@doc_photo ||= Nokogiri::HTML(open("#{@link}/mediaindex"))
|
158
|
+
else
|
159
|
+
@doc ||= Nokogiri::HTML(open("#{@link}"))
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end # Movie
|
164
|
+
end # IMDB
|
165
|
+
|
data/lib/imdb/person.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
module IMDB
|
2
|
+
# Get Person information with IMDB person id.
|
3
|
+
# @example Get Christian Bale information [http://www.imdb.com/name/nm0000288/]
|
4
|
+
# m = IMDB::Person.new('0000288')
|
5
|
+
# puts m.name
|
6
|
+
# puts m.real_name
|
7
|
+
class Person < IMDB::Skeleton
|
8
|
+
attr_accessor :id
|
9
|
+
|
10
|
+
def initialize(imdb_id)
|
11
|
+
super("Person", { :id => String,
|
12
|
+
:name => String,
|
13
|
+
:real_name => String,
|
14
|
+
:birthdate => Date,
|
15
|
+
:deathdate => Date,
|
16
|
+
:nationality => String,
|
17
|
+
:height => String,
|
18
|
+
:biography => String,
|
19
|
+
:photo => String,
|
20
|
+
:profile_path => String,
|
21
|
+
:filmography => Hash,
|
22
|
+
:main_document => Nokogiri,
|
23
|
+
:bio_document => Nokogiri,
|
24
|
+
:photo_document => Nokogiri,
|
25
|
+
:photo_document_url => String }, [:id])
|
26
|
+
@id = imdb_id
|
27
|
+
end
|
28
|
+
|
29
|
+
#Get the profile path
|
30
|
+
#@return [String]
|
31
|
+
def profile_path
|
32
|
+
"/name/nm#{@id}"
|
33
|
+
end
|
34
|
+
|
35
|
+
#Get the name of the person
|
36
|
+
#@return [String]
|
37
|
+
def name
|
38
|
+
bio_document.at("a[@class='main']").inner_text rescue nil
|
39
|
+
end
|
40
|
+
|
41
|
+
#Get The Real Born name of the Person
|
42
|
+
#@return [String]
|
43
|
+
def real_name
|
44
|
+
bio_document.at("h5[text()*='Birth Name']").next.inner_text.strip rescue nil
|
45
|
+
end
|
46
|
+
|
47
|
+
#Get The Birth Date
|
48
|
+
#@return [Date]
|
49
|
+
def birthdate
|
50
|
+
date_month = bio_document.at("h5[text()*='Date of Birth']").next_element.inner_text.strip rescue ""
|
51
|
+
year = bio_document.at("a[@href*='birth_year']").inner_text.strip rescue ""
|
52
|
+
Date.parse("#{date_month} #{year}") rescue nil
|
53
|
+
end
|
54
|
+
|
55
|
+
#Get The death date else nil
|
56
|
+
#@return [Date]
|
57
|
+
def deathdate
|
58
|
+
date_month = bio_document.at("h5[text()*='Date of Death']").next_element.inner_text.strip rescue ""
|
59
|
+
year = bio_document.at("a[@href*='death_date']").inner_text.strip rescue ""
|
60
|
+
Date.parse("#{date_month} #{year}") rescue nil
|
61
|
+
end
|
62
|
+
|
63
|
+
#Get the Nationality
|
64
|
+
#@return [String]
|
65
|
+
def nationality
|
66
|
+
bio_document.at("a[@href*='birth_place']").inner_text.strip rescue nil
|
67
|
+
end
|
68
|
+
|
69
|
+
#Get the height
|
70
|
+
#@return [String]
|
71
|
+
def height
|
72
|
+
bio_document.at("h5[text()*='Height']").next.inner_text.match(/\((.+)\)/)[1] rescue nil
|
73
|
+
end
|
74
|
+
|
75
|
+
#Get The Biography
|
76
|
+
#@return [String]
|
77
|
+
def biography
|
78
|
+
bio_document.at("h5[text()*='Biography']").next_element.inner_text rescue nil
|
79
|
+
end
|
80
|
+
|
81
|
+
#Return the principal Photo
|
82
|
+
#@return [String]
|
83
|
+
def photo
|
84
|
+
photo_document.at("img#primary-img").get_attribute('src') if photo_document rescue nil
|
85
|
+
end
|
86
|
+
|
87
|
+
#Return the Filmography
|
88
|
+
#for the moment I can't make subdivision of this, then i take all in an array
|
89
|
+
#@return [Movie]
|
90
|
+
def filmography
|
91
|
+
#@return [Hash]
|
92
|
+
# writer: [Movie]
|
93
|
+
# actor: [Movie]
|
94
|
+
# director: [Movie]
|
95
|
+
# composer: [Movie]
|
96
|
+
#as_writer = main_document.at("#filmo-head-Writer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
97
|
+
#as_actor = main_document.at("#filmo-head-Actor").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
98
|
+
#as_director = main_document.at("#filmo-head-Director").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
99
|
+
#as_composer = main_document.at("#filmo-head-Composer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
100
|
+
#{ writer: as_writer.map { |m| Movie.new(m) }, actor: as_actor.map { |m| Movie.new(m) }, director: as_director.map { |m| Movie.new(m) }, composer: as_composer.map { |m| Movie.new(m) } }
|
101
|
+
films=main_document.css(".filmo-row b a").map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
|
102
|
+
films.map { |f| Movie.new(f.to_i) }
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def main_document
|
107
|
+
#@main_document ||= Nokogiri open("http://www.imdb.com#{profile_path}")
|
108
|
+
@main_document ||= Nokogiri::HTML(open("http://www.imdb.com#{profile_path}"))
|
109
|
+
end
|
110
|
+
|
111
|
+
def bio_document
|
112
|
+
@bio_document ||= Nokogiri open("http://www.imdb.com#{profile_path}/bio")
|
113
|
+
end
|
114
|
+
|
115
|
+
def photo_document
|
116
|
+
@photo_document ||= if photo_document_url then
|
117
|
+
Nokogiri open("http://www.imdb.com" + photo_document_url)
|
118
|
+
else
|
119
|
+
nil
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def photo_document_url
|
124
|
+
bio_document.at(".photo a[@name=headshot]").get_attribute('href') rescue nil
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
data/lib/imdb/search.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
module IMDB
|
2
|
+
class Search
|
3
|
+
def movie(keyword)
|
4
|
+
doc = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=#{CGI.escape(keyword)}"))
|
5
|
+
@ret_val = []
|
6
|
+
if doc.at("h1.header") # we're already being redirected to movie's page
|
7
|
+
single_result(doc)
|
8
|
+
else
|
9
|
+
result_list(doc)
|
10
|
+
end
|
11
|
+
@ret_val
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_hash
|
15
|
+
i = 0
|
16
|
+
tmp_hash = { }
|
17
|
+
@ret_val.each { |r|
|
18
|
+
tmp_hash[i] = r.to_hash
|
19
|
+
i = i + 1
|
20
|
+
}
|
21
|
+
tmp_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json
|
25
|
+
to_hash.to_json
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def single_result(doc)
|
30
|
+
title = doc.at("h1.header")
|
31
|
+
link = doc.at("link[rel=canonical]")["href"]
|
32
|
+
title = title.text.strip.gsub(/\s+/, " ")
|
33
|
+
@ret_val << IMDB::Result.new(link[/\d+/], title, link)
|
34
|
+
end
|
35
|
+
|
36
|
+
def result_list(doc)
|
37
|
+
@ret_val = doc.search('a[@href^="/title/tt"]').reduce([]) do |ret_val, node|
|
38
|
+
unless node.content.blank?
|
39
|
+
link = "http://www.imdb.com#{node['href']}"
|
40
|
+
id = node["href"][/\d+/]
|
41
|
+
ret_val << IMDB::Result.new(id, node.content, link)
|
42
|
+
end
|
43
|
+
ret_val
|
44
|
+
end
|
45
|
+
|
46
|
+
h = {}
|
47
|
+
@ret_val.each {|e| h[e.imdb_id]=e}
|
48
|
+
|
49
|
+
@ret_val=h.values
|
50
|
+
end
|
51
|
+
end # Search
|
52
|
+
|
53
|
+
class Result < IMDB::Skeleton
|
54
|
+
def initialize(imdb_id, title, link)
|
55
|
+
super("Result", {
|
56
|
+
:title => String,
|
57
|
+
:link => String,
|
58
|
+
:imdb_id => String }, [:imdb_id])
|
59
|
+
@title = title
|
60
|
+
@link = link
|
61
|
+
@imdb_id = imdb_id
|
62
|
+
end
|
63
|
+
|
64
|
+
def title
|
65
|
+
@title
|
66
|
+
end
|
67
|
+
|
68
|
+
def link
|
69
|
+
@link
|
70
|
+
end
|
71
|
+
|
72
|
+
def imdb_id
|
73
|
+
@imdb_id
|
74
|
+
end
|
75
|
+
|
76
|
+
def movie
|
77
|
+
Movie.new(@imdb_id)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module IMDB
|
2
|
+
# IMDB generic interface.
|
3
|
+
class Skeleton
|
4
|
+
attr_accessor :model, :method_names
|
5
|
+
|
6
|
+
def initialize(model_name = "", method_names = {}, keys = [])
|
7
|
+
if IMDB::Configuration.caching
|
8
|
+
@model = Class.new do
|
9
|
+
include MongoMapper::Document
|
10
|
+
set_collection_name model_name
|
11
|
+
method_names.each { |m, t|
|
12
|
+
key m, t
|
13
|
+
}
|
14
|
+
end
|
15
|
+
class_eval do
|
16
|
+
method_names.each_key { |meth|
|
17
|
+
unless keys.include?(meth)
|
18
|
+
old_meth = "old_#{meth}".to_sym
|
19
|
+
alias_method old_meth, meth.to_sym
|
20
|
+
define_method meth do
|
21
|
+
k = keys.to_imdb_hash { |k| k; self.send(k) }
|
22
|
+
|
23
|
+
@db_query = self.model.first(k)
|
24
|
+
|
25
|
+
if @db_query.nil?
|
26
|
+
@db_query = self.model.new(keys.to_imdb_hash { |k| k; self.send(k)})
|
27
|
+
@db_query.save
|
28
|
+
end
|
29
|
+
|
30
|
+
if @db_query[meth].nil? or (@db_query[meth].length.zero? if @db_query[meth].kind_of?(Array))
|
31
|
+
a = send(old_meth)
|
32
|
+
if a.kind_of?(Array)
|
33
|
+
a.compact!
|
34
|
+
a.map! { |c|
|
35
|
+
if c.kind_of?(String)
|
36
|
+
c
|
37
|
+
else
|
38
|
+
c.to_hash
|
39
|
+
end
|
40
|
+
}
|
41
|
+
@db_query[meth] = a
|
42
|
+
else
|
43
|
+
@db_query[meth] = a
|
44
|
+
end
|
45
|
+
@db_query.save
|
46
|
+
end
|
47
|
+
@db_query[meth]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
}
|
51
|
+
end
|
52
|
+
end
|
53
|
+
@method_names = method_names
|
54
|
+
end
|
55
|
+
|
56
|
+
# Serialize method's output to json
|
57
|
+
def to_json(*a)
|
58
|
+
tmp_hash = to_hash
|
59
|
+
|
60
|
+
tmp_hash.to_json(*a)
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_hash(*a)
|
64
|
+
tmp_hash = {}
|
65
|
+
@method_names.each_key { |x|
|
66
|
+
evaled = self.send x
|
67
|
+
if evaled.kind_of?(Array)
|
68
|
+
tmp_hash[x] = evaled.collect! {|e|
|
69
|
+
e
|
70
|
+
}
|
71
|
+
elsif evaled.kind_of?(String)
|
72
|
+
tmp_hash[x] = evaled
|
73
|
+
end
|
74
|
+
}
|
75
|
+
tmp_hash
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.json_create(o)
|
79
|
+
new(*o['data'])
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|