imdb-scan 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,165 @@
1
+ module IMDB
2
+ # Get movie information with IMDB movie id.
3
+ # @example Get Yahsi Bati movie title and cast listing [http://www.imdb.com/title/tt1567448/]
4
+ # m = IMDB::Movie.new('1567448')
5
+ # puts m.title
6
+ class Movie < IMDB::Skeleton
7
+ attr_accessor :link, :imdb_id
8
+
9
+ def initialize(id_of)
10
+ # !!!DON'T FORGET DEFINE NEW METHODS IN SUPER!!!
11
+ super("Movie", { :imdb_id => String,
12
+ :poster => String,
13
+ :title => String,
14
+ :release_date => String,
15
+ :cast => Array,
16
+ :photos => Array,
17
+ :director => String,
18
+ :director_person => Person,
19
+ :genres => Array,
20
+ :rating => Float,
21
+ :movielength => Integer,
22
+ :short_description => String,
23
+ :writers => Array }, [:imdb_id])
24
+
25
+ @imdb_id = id_of
26
+
27
+ @link = "http://www.imdb.com/title/tt#{@imdb_id}"
28
+ end
29
+
30
+ # Get movie poster address
31
+ # @return [String]
32
+ def poster
33
+ src = doc.at("#img_primary img")["src"] rescue nil
34
+ unless src.nil?
35
+ if src.match(/\._V1/)
36
+ return src.match(/(.*)\._V1.*(.jpg)/)[1, 2].join
37
+ else
38
+ return src
39
+ end
40
+ end
41
+ src
42
+ end
43
+
44
+ # Get movie title
45
+ # @return [String]
46
+ def title
47
+ doc.at("//head/meta[@name='title']")["content"].split(/\(\d+\)/)[0].strip! ||
48
+ doc.at("h1.header").children.first.text.strip
49
+
50
+ end
51
+
52
+ # Get movie cast listing
53
+ # @return [Cast[]]
54
+ def cast
55
+ doc.search("table.cast tr").map do |link|
56
+ #picture = link.children[0].search("img")[0]["src"] rescue nil
57
+ #name = link.children[1].content.strip rescue nil
58
+ id = link.children[1].search('a[@href^="/name/nm"]').first["href"].match(/\/name\/nm([0-9]+)/)[1] rescue nil
59
+ char = link.children[3].content.strip rescue nil
60
+ unless id.nil?
61
+ person = IMDB::Person.new(id)
62
+ IMDB::Cast.new(self, person, char)
63
+ end
64
+ end.compact
65
+ end
66
+
67
+ # Get movie photos
68
+ # @return [Array]
69
+ def photos
70
+ begin
71
+ doc.search('#main .thumb_list img').map { |i| i["src"] }
72
+ rescue
73
+ nil
74
+ end
75
+ end
76
+
77
+ # Get release date
78
+ # @return [String]
79
+ def release_date
80
+ if (node = doc.xpath("//h4[contains(., 'Release Date')]/..")).length > 0
81
+ date = node.search("time").first["datetime"]
82
+ if date.match /^\d{4}$/
83
+ "#{date}-01-01"
84
+ else
85
+ Date.parse(date).to_s
86
+ end
87
+ else
88
+ year = doc.at("h1.header .nobr").text[/\d{4}/]
89
+ "#{year}-01-01"
90
+ end
91
+ rescue
92
+ nil
93
+ end
94
+
95
+ # Get Director
96
+ # @return [String]
97
+ def director
98
+ self.director_person.name rescue nil
99
+ end
100
+
101
+ # Get Director Person class
102
+ # @return [Person]
103
+ def director_person
104
+ begin
105
+ link=doc.xpath("//h4[contains(., 'Director')]/..").at('a[@href^="/name/nm"]')
106
+ profile = link['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
107
+ IMDB::Person.new(profile) unless profile.nil?
108
+ rescue
109
+ nil
110
+ end
111
+ end
112
+
113
+ # Genre List
114
+ # @return [Array]
115
+ def genres
116
+ doc.xpath("//h4[contains(., 'Genre')]/..").search("a").map { |g|
117
+ g.content unless g.content =~ /See more/
118
+ }.compact
119
+ rescue
120
+ nil
121
+ end
122
+
123
+ # Writer List
124
+ # @return [Float]
125
+ def rating
126
+ @rating ||= doc.search(".star-box-giga-star").text.strip.to_f
127
+ rescue
128
+ nil
129
+ end
130
+
131
+ #Get the movielength of the movie in minutes
132
+ # @return [Integer]
133
+ def movielength
134
+ doc.at("//h4[text()='Runtime:']/..").inner_html[/\d+ min/].to_i rescue nil
135
+ end
136
+
137
+ # Writer List
138
+ # @return [Array]
139
+ def writers
140
+ doc.xpath("//a[@name='writers']/../../../..").search('a[@href^="/name/nm"]').map { |w|
141
+ profile = w['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
142
+ IMDB::Person.new(profile) unless profile.nil?
143
+ }
144
+ end
145
+
146
+ # @return [String]
147
+ def short_description
148
+ doc.at("#overview-top p[itemprop=description]").text.strip
149
+ end
150
+
151
+ private
152
+
153
+ def doc
154
+ if caller[0] =~ /`([^']*)'/ and ($1 == "cast" or $1 == "writers")
155
+ @doc_full ||= Nokogiri::HTML(open("#{@link}/fullcredits"))
156
+ elsif caller[0] =~ /`([^']*)'/ and ($1 == "photos")
157
+ @doc_photo ||= Nokogiri::HTML(open("#{@link}/mediaindex"))
158
+ else
159
+ @doc ||= Nokogiri::HTML(open("#{@link}"))
160
+ end
161
+ end
162
+
163
+ end # Movie
164
+ end # IMDB
165
+
@@ -0,0 +1,128 @@
1
+ module IMDB
2
+ # Get Person information with IMDB person id.
3
+ # @example Get Christian Bale information [http://www.imdb.com/name/nm0000288/]
4
+ # m = IMDB::Person.new('0000288')
5
+ # puts m.name
6
+ # puts m.real_name
7
+ class Person < IMDB::Skeleton
8
+ attr_accessor :id
9
+
10
+ def initialize(imdb_id)
11
+ super("Person", { :id => String,
12
+ :name => String,
13
+ :real_name => String,
14
+ :birthdate => Date,
15
+ :deathdate => Date,
16
+ :nationality => String,
17
+ :height => String,
18
+ :biography => String,
19
+ :photo => String,
20
+ :profile_path => String,
21
+ :filmography => Hash,
22
+ :main_document => Nokogiri,
23
+ :bio_document => Nokogiri,
24
+ :photo_document => Nokogiri,
25
+ :photo_document_url => String }, [:id])
26
+ @id = imdb_id
27
+ end
28
+
29
+ #Get the profile path
30
+ #@return [String]
31
+ def profile_path
32
+ "/name/nm#{@id}"
33
+ end
34
+
35
+ #Get the name of the person
36
+ #@return [String]
37
+ def name
38
+ bio_document.at("a[@class='main']").inner_text rescue nil
39
+ end
40
+
41
+ #Get The Real Born name of the Person
42
+ #@return [String]
43
+ def real_name
44
+ bio_document.at("h5[text()*='Birth Name']").next.inner_text.strip rescue nil
45
+ end
46
+
47
+ #Get The Birth Date
48
+ #@return [Date]
49
+ def birthdate
50
+ date_month = bio_document.at("h5[text()*='Date of Birth']").next_element.inner_text.strip rescue ""
51
+ year = bio_document.at("a[@href*='birth_year']").inner_text.strip rescue ""
52
+ Date.parse("#{date_month} #{year}") rescue nil
53
+ end
54
+
55
+ #Get The death date else nil
56
+ #@return [Date]
57
+ def deathdate
58
+ date_month = bio_document.at("h5[text()*='Date of Death']").next_element.inner_text.strip rescue ""
59
+ year = bio_document.at("a[@href*='death_date']").inner_text.strip rescue ""
60
+ Date.parse("#{date_month} #{year}") rescue nil
61
+ end
62
+
63
+ #Get the Nationality
64
+ #@return [String]
65
+ def nationality
66
+ bio_document.at("a[@href*='birth_place']").inner_text.strip rescue nil
67
+ end
68
+
69
+ #Get the height
70
+ #@return [String]
71
+ def height
72
+ bio_document.at("h5[text()*='Height']").next.inner_text.match(/\((.+)\)/)[1] rescue nil
73
+ end
74
+
75
+ #Get The Biography
76
+ #@return [String]
77
+ def biography
78
+ bio_document.at("h5[text()*='Biography']").next_element.inner_text rescue nil
79
+ end
80
+
81
+ #Return the principal Photo
82
+ #@return [String]
83
+ def photo
84
+ photo_document.at("img#primary-img").get_attribute('src') if photo_document rescue nil
85
+ end
86
+
87
+ #Return the Filmography
88
+ #for the moment I can't make subdivision of this, then i take all in an array
89
+ #@return [Movie]
90
+ def filmography
91
+ #@return [Hash]
92
+ # writer: [Movie]
93
+ # actor: [Movie]
94
+ # director: [Movie]
95
+ # composer: [Movie]
96
+ #as_writer = main_document.at("#filmo-head-Writer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
97
+ #as_actor = main_document.at("#filmo-head-Actor").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
98
+ #as_director = main_document.at("#filmo-head-Director").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
99
+ #as_composer = main_document.at("#filmo-head-Composer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
100
+ #{ writer: as_writer.map { |m| Movie.new(m) }, actor: as_actor.map { |m| Movie.new(m) }, director: as_director.map { |m| Movie.new(m) }, composer: as_composer.map { |m| Movie.new(m) } }
101
+ films=main_document.css(".filmo-row b a").map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
102
+ films.map { |f| Movie.new(f.to_i) }
103
+ end
104
+
105
+
106
+ def main_document
107
+ #@main_document ||= Nokogiri open("http://www.imdb.com#{profile_path}")
108
+ @main_document ||= Nokogiri::HTML(open("http://www.imdb.com#{profile_path}"))
109
+ end
110
+
111
+ def bio_document
112
+ @bio_document ||= Nokogiri open("http://www.imdb.com#{profile_path}/bio")
113
+ end
114
+
115
+ def photo_document
116
+ @photo_document ||= if photo_document_url then
117
+ Nokogiri open("http://www.imdb.com" + photo_document_url)
118
+ else
119
+ nil
120
+ end
121
+ end
122
+
123
+ def photo_document_url
124
+ bio_document.at(".photo a[@name=headshot]").get_attribute('href') rescue nil
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,82 @@
1
+ module IMDB
2
+ class Search
3
+ def movie(keyword)
4
+ doc = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=#{CGI.escape(keyword)}"))
5
+ @ret_val = []
6
+ if doc.at("h1.header") # we're already being redirected to movie's page
7
+ single_result(doc)
8
+ else
9
+ result_list(doc)
10
+ end
11
+ @ret_val
12
+ end
13
+
14
+ def to_hash
15
+ i = 0
16
+ tmp_hash = { }
17
+ @ret_val.each { |r|
18
+ tmp_hash[i] = r.to_hash
19
+ i = i + 1
20
+ }
21
+ tmp_hash
22
+ end
23
+
24
+ def to_json
25
+ to_hash.to_json
26
+ end
27
+
28
+ private
29
+ def single_result(doc)
30
+ title = doc.at("h1.header")
31
+ link = doc.at("link[rel=canonical]")["href"]
32
+ title = title.text.strip.gsub(/\s+/, " ")
33
+ @ret_val << IMDB::Result.new(link[/\d+/], title, link)
34
+ end
35
+
36
+ def result_list(doc)
37
+ @ret_val = doc.search('a[@href^="/title/tt"]').reduce([]) do |ret_val, node|
38
+ unless node.content.blank?
39
+ link = "http://www.imdb.com#{node['href']}"
40
+ id = node["href"][/\d+/]
41
+ ret_val << IMDB::Result.new(id, node.content, link)
42
+ end
43
+ ret_val
44
+ end
45
+
46
+ h = {}
47
+ @ret_val.each {|e| h[e.imdb_id]=e}
48
+
49
+ @ret_val=h.values
50
+ end
51
+ end # Search
52
+
53
+ class Result < IMDB::Skeleton
54
+ def initialize(imdb_id, title, link)
55
+ super("Result", {
56
+ :title => String,
57
+ :link => String,
58
+ :imdb_id => String }, [:imdb_id])
59
+ @title = title
60
+ @link = link
61
+ @imdb_id = imdb_id
62
+ end
63
+
64
+ def title
65
+ @title
66
+ end
67
+
68
+ def link
69
+ @link
70
+ end
71
+
72
+ def imdb_id
73
+ @imdb_id
74
+ end
75
+
76
+ def movie
77
+ Movie.new(@imdb_id)
78
+ end
79
+
80
+ end
81
+ end
82
+
@@ -0,0 +1,83 @@
1
+ module IMDB
2
+ # IMDB generic interface.
3
+ class Skeleton
4
+ attr_accessor :model, :method_names
5
+
6
+ def initialize(model_name = "", method_names = {}, keys = [])
7
+ if IMDB::Configuration.caching
8
+ @model = Class.new do
9
+ include MongoMapper::Document
10
+ set_collection_name model_name
11
+ method_names.each { |m, t|
12
+ key m, t
13
+ }
14
+ end
15
+ class_eval do
16
+ method_names.each_key { |meth|
17
+ unless keys.include?(meth)
18
+ old_meth = "old_#{meth}".to_sym
19
+ alias_method old_meth, meth.to_sym
20
+ define_method meth do
21
+ k = keys.to_imdb_hash { |k| k; self.send(k) }
22
+
23
+ @db_query = self.model.first(k)
24
+
25
+ if @db_query.nil?
26
+ @db_query = self.model.new(keys.to_imdb_hash { |k| k; self.send(k)})
27
+ @db_query.save
28
+ end
29
+
30
+ if @db_query[meth].nil? or (@db_query[meth].length.zero? if @db_query[meth].kind_of?(Array))
31
+ a = send(old_meth)
32
+ if a.kind_of?(Array)
33
+ a.compact!
34
+ a.map! { |c|
35
+ if c.kind_of?(String)
36
+ c
37
+ else
38
+ c.to_hash
39
+ end
40
+ }
41
+ @db_query[meth] = a
42
+ else
43
+ @db_query[meth] = a
44
+ end
45
+ @db_query.save
46
+ end
47
+ @db_query[meth]
48
+ end
49
+ end
50
+ }
51
+ end
52
+ end
53
+ @method_names = method_names
54
+ end
55
+
56
+ # Serialize method's output to json
57
+ def to_json(*a)
58
+ tmp_hash = to_hash
59
+
60
+ tmp_hash.to_json(*a)
61
+ end
62
+
63
+ def to_hash(*a)
64
+ tmp_hash = {}
65
+ @method_names.each_key { |x|
66
+ evaled = self.send x
67
+ if evaled.kind_of?(Array)
68
+ tmp_hash[x] = evaled.collect! {|e|
69
+ e
70
+ }
71
+ elsif evaled.kind_of?(String)
72
+ tmp_hash[x] = evaled
73
+ end
74
+ }
75
+ tmp_hash
76
+ end
77
+
78
+ def self.json_create(o)
79
+ new(*o['data'])
80
+ end
81
+ end
82
+ end
83
+