imdb-scan 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ module IMDB
2
+ # Get movie information with IMDB movie id.
3
+ # @example Get Yahsi Bati movie title and cast listing [http://www.imdb.com/title/tt1567448/]
4
+ # m = IMDB::Movie.new('1567448')
5
+ # puts m.title
6
+ class Movie < IMDB::Skeleton
7
+ attr_accessor :link, :imdb_id
8
+
9
+ def initialize(id_of)
10
+ # !!!DON'T FORGET DEFINE NEW METHODS IN SUPER!!!
11
+ super("Movie", { :imdb_id => String,
12
+ :poster => String,
13
+ :title => String,
14
+ :release_date => String,
15
+ :cast => Array,
16
+ :photos => Array,
17
+ :director => String,
18
+ :director_person => Person,
19
+ :genres => Array,
20
+ :rating => Float,
21
+ :movielength => Integer,
22
+ :short_description => String,
23
+ :writers => Array }, [:imdb_id])
24
+
25
+ @imdb_id = id_of
26
+
27
+ @link = "http://www.imdb.com/title/tt#{@imdb_id}"
28
+ end
29
+
30
+ # Get movie poster address
31
+ # @return [String]
32
+ def poster
33
+ src = doc.at("#img_primary img")["src"] rescue nil
34
+ unless src.nil?
35
+ if src.match(/\._V1/)
36
+ return src.match(/(.*)\._V1.*(.jpg)/)[1, 2].join
37
+ else
38
+ return src
39
+ end
40
+ end
41
+ src
42
+ end
43
+
44
+ # Get movie title
45
+ # @return [String]
46
+ def title
47
+ doc.at("//head/meta[@name='title']")["content"].split(/\(\d+\)/)[0].strip! ||
48
+ doc.at("h1.header").children.first.text.strip
49
+
50
+ end
51
+
52
+ # Get movie cast listing
53
+ # @return [Cast[]]
54
+ def cast
55
+ doc.search("table.cast tr").map do |link|
56
+ #picture = link.children[0].search("img")[0]["src"] rescue nil
57
+ #name = link.children[1].content.strip rescue nil
58
+ id = link.children[1].search('a[@href^="/name/nm"]').first["href"].match(/\/name\/nm([0-9]+)/)[1] rescue nil
59
+ char = link.children[3].content.strip rescue nil
60
+ unless id.nil?
61
+ person = IMDB::Person.new(id)
62
+ IMDB::Cast.new(self, person, char)
63
+ end
64
+ end.compact
65
+ end
66
+
67
+ # Get movie photos
68
+ # @return [Array]
69
+ def photos
70
+ begin
71
+ doc.search('#main .thumb_list img').map { |i| i["src"] }
72
+ rescue
73
+ nil
74
+ end
75
+ end
76
+
77
+ # Get release date
78
+ # @return [String]
79
+ def release_date
80
+ if (node = doc.xpath("//h4[contains(., 'Release Date')]/..")).length > 0
81
+ date = node.search("time").first["datetime"]
82
+ if date.match /^\d{4}$/
83
+ "#{date}-01-01"
84
+ else
85
+ Date.parse(date).to_s
86
+ end
87
+ else
88
+ year = doc.at("h1.header .nobr").text[/\d{4}/]
89
+ "#{year}-01-01"
90
+ end
91
+ rescue
92
+ nil
93
+ end
94
+
95
+ # Get Director
96
+ # @return [String]
97
+ def director
98
+ self.director_person.name rescue nil
99
+ end
100
+
101
+ # Get Director Person class
102
+ # @return [Person]
103
+ def director_person
104
+ begin
105
+ link=doc.xpath("//h4[contains(., 'Director')]/..").at('a[@href^="/name/nm"]')
106
+ profile = link['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
107
+ IMDB::Person.new(profile) unless profile.nil?
108
+ rescue
109
+ nil
110
+ end
111
+ end
112
+
113
+ # Genre List
114
+ # @return [Array]
115
+ def genres
116
+ doc.xpath("//h4[contains(., 'Genre')]/..").search("a").map { |g|
117
+ g.content unless g.content =~ /See more/
118
+ }.compact
119
+ rescue
120
+ nil
121
+ end
122
+
123
+ # Writer List
124
+ # @return [Float]
125
+ def rating
126
+ @rating ||= doc.search(".star-box-giga-star").text.strip.to_f
127
+ rescue
128
+ nil
129
+ end
130
+
131
+ #Get the movielength of the movie in minutes
132
+ # @return [Integer]
133
+ def movielength
134
+ doc.at("//h4[text()='Runtime:']/..").inner_html[/\d+ min/].to_i rescue nil
135
+ end
136
+
137
+ # Writer List
138
+ # @return [Array]
139
+ def writers
140
+ doc.xpath("//a[@name='writers']/../../../..").search('a[@href^="/name/nm"]').map { |w|
141
+ profile = w['href'].match(/\/name\/nm([0-9]+)/)[1] rescue nil
142
+ IMDB::Person.new(profile) unless profile.nil?
143
+ }
144
+ end
145
+
146
+ # @return [String]
147
+ def short_description
148
+ doc.at("#overview-top p[itemprop=description]").text.strip
149
+ end
150
+
151
+ private
152
+
153
+ def doc
154
+ if caller[0] =~ /`([^']*)'/ and ($1 == "cast" or $1 == "writers")
155
+ @doc_full ||= Nokogiri::HTML(open("#{@link}/fullcredits"))
156
+ elsif caller[0] =~ /`([^']*)'/ and ($1 == "photos")
157
+ @doc_photo ||= Nokogiri::HTML(open("#{@link}/mediaindex"))
158
+ else
159
+ @doc ||= Nokogiri::HTML(open("#{@link}"))
160
+ end
161
+ end
162
+
163
+ end # Movie
164
+ end # IMDB
165
+
@@ -0,0 +1,128 @@
1
+ module IMDB
2
+ # Get Person information with IMDB person id.
3
+ # @example Get Christian Bale information [http://www.imdb.com/name/nm0000288/]
4
+ # m = IMDB::Person.new('0000288')
5
+ # puts m.name
6
+ # puts m.real_name
7
+ class Person < IMDB::Skeleton
8
+ attr_accessor :id
9
+
10
+ def initialize(imdb_id)
11
+ super("Person", { :id => String,
12
+ :name => String,
13
+ :real_name => String,
14
+ :birthdate => Date,
15
+ :deathdate => Date,
16
+ :nationality => String,
17
+ :height => String,
18
+ :biography => String,
19
+ :photo => String,
20
+ :profile_path => String,
21
+ :filmography => Hash,
22
+ :main_document => Nokogiri,
23
+ :bio_document => Nokogiri,
24
+ :photo_document => Nokogiri,
25
+ :photo_document_url => String }, [:id])
26
+ @id = imdb_id
27
+ end
28
+
29
+ #Get the profile path
30
+ #@return [String]
31
+ def profile_path
32
+ "/name/nm#{@id}"
33
+ end
34
+
35
+ #Get the name of the person
36
+ #@return [String]
37
+ def name
38
+ bio_document.at("a[@class='main']").inner_text rescue nil
39
+ end
40
+
41
+ #Get The Real Born name of the Person
42
+ #@return [String]
43
+ def real_name
44
+ bio_document.at("h5[text()*='Birth Name']").next.inner_text.strip rescue nil
45
+ end
46
+
47
+ #Get The Birth Date
48
+ #@return [Date]
49
+ def birthdate
50
+ date_month = bio_document.at("h5[text()*='Date of Birth']").next_element.inner_text.strip rescue ""
51
+ year = bio_document.at("a[@href*='birth_year']").inner_text.strip rescue ""
52
+ Date.parse("#{date_month} #{year}") rescue nil
53
+ end
54
+
55
+ #Get The death date else nil
56
+ #@return [Date]
57
+ def deathdate
58
+ date_month = bio_document.at("h5[text()*='Date of Death']").next_element.inner_text.strip rescue ""
59
+ year = bio_document.at("a[@href*='death_date']").inner_text.strip rescue ""
60
+ Date.parse("#{date_month} #{year}") rescue nil
61
+ end
62
+
63
+ #Get the Nationality
64
+ #@return [String]
65
+ def nationality
66
+ bio_document.at("a[@href*='birth_place']").inner_text.strip rescue nil
67
+ end
68
+
69
+ #Get the height
70
+ #@return [String]
71
+ def height
72
+ bio_document.at("h5[text()*='Height']").next.inner_text.match(/\((.+)\)/)[1] rescue nil
73
+ end
74
+
75
+ #Get The Biography
76
+ #@return [String]
77
+ def biography
78
+ bio_document.at("h5[text()*='Biography']").next_element.inner_text rescue nil
79
+ end
80
+
81
+ #Return the principal Photo
82
+ #@return [String]
83
+ def photo
84
+ photo_document.at("img#primary-img").get_attribute('src') if photo_document rescue nil
85
+ end
86
+
87
+ #Return the Filmography
88
+ #for the moment I can't make subdivision of this, then i take all in an array
89
+ #@return [Movie]
90
+ def filmography
91
+ #@return [Hash]
92
+ # writer: [Movie]
93
+ # actor: [Movie]
94
+ # director: [Movie]
95
+ # composer: [Movie]
96
+ #as_writer = main_document.at("#filmo-head-Writer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
97
+ #as_actor = main_document.at("#filmo-head-Actor").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
98
+ #as_director = main_document.at("#filmo-head-Director").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
99
+ #as_composer = main_document.at("#filmo-head-Composer").next_element.search('b a').map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
100
+ #{ writer: as_writer.map { |m| Movie.new(m) }, actor: as_actor.map { |m| Movie.new(m) }, director: as_director.map { |m| Movie.new(m) }, composer: as_composer.map { |m| Movie.new(m) } }
101
+ films=main_document.css(".filmo-row b a").map { |e| e.get_attribute('href')[/tt(\d+)/, 1] } rescue []
102
+ films.map { |f| Movie.new(f.to_i) }
103
+ end
104
+
105
+
106
+ def main_document
107
+ #@main_document ||= Nokogiri open("http://www.imdb.com#{profile_path}")
108
+ @main_document ||= Nokogiri::HTML(open("http://www.imdb.com#{profile_path}"))
109
+ end
110
+
111
+ def bio_document
112
+ @bio_document ||= Nokogiri open("http://www.imdb.com#{profile_path}/bio")
113
+ end
114
+
115
+ def photo_document
116
+ @photo_document ||= if photo_document_url then
117
+ Nokogiri open("http://www.imdb.com" + photo_document_url)
118
+ else
119
+ nil
120
+ end
121
+ end
122
+
123
+ def photo_document_url
124
+ bio_document.at(".photo a[@name=headshot]").get_attribute('href') rescue nil
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,82 @@
1
+ module IMDB
2
+ class Search
3
+ def movie(keyword)
4
+ doc = Nokogiri::HTML(open("http://www.imdb.com/find?s=tt&q=#{CGI.escape(keyword)}"))
5
+ @ret_val = []
6
+ if doc.at("h1.header") # we're already being redirected to movie's page
7
+ single_result(doc)
8
+ else
9
+ result_list(doc)
10
+ end
11
+ @ret_val
12
+ end
13
+
14
+ def to_hash
15
+ i = 0
16
+ tmp_hash = { }
17
+ @ret_val.each { |r|
18
+ tmp_hash[i] = r.to_hash
19
+ i = i + 1
20
+ }
21
+ tmp_hash
22
+ end
23
+
24
+ def to_json
25
+ to_hash.to_json
26
+ end
27
+
28
+ private
29
+ def single_result(doc)
30
+ title = doc.at("h1.header")
31
+ link = doc.at("link[rel=canonical]")["href"]
32
+ title = title.text.strip.gsub(/\s+/, " ")
33
+ @ret_val << IMDB::Result.new(link[/\d+/], title, link)
34
+ end
35
+
36
+ def result_list(doc)
37
+ @ret_val = doc.search('a[@href^="/title/tt"]').reduce([]) do |ret_val, node|
38
+ unless node.content.blank?
39
+ link = "http://www.imdb.com#{node['href']}"
40
+ id = node["href"][/\d+/]
41
+ ret_val << IMDB::Result.new(id, node.content, link)
42
+ end
43
+ ret_val
44
+ end
45
+
46
+ h = {}
47
+ @ret_val.each {|e| h[e.imdb_id]=e}
48
+
49
+ @ret_val=h.values
50
+ end
51
+ end # Search
52
+
53
+ class Result < IMDB::Skeleton
54
+ def initialize(imdb_id, title, link)
55
+ super("Result", {
56
+ :title => String,
57
+ :link => String,
58
+ :imdb_id => String }, [:imdb_id])
59
+ @title = title
60
+ @link = link
61
+ @imdb_id = imdb_id
62
+ end
63
+
64
+ def title
65
+ @title
66
+ end
67
+
68
+ def link
69
+ @link
70
+ end
71
+
72
+ def imdb_id
73
+ @imdb_id
74
+ end
75
+
76
+ def movie
77
+ Movie.new(@imdb_id)
78
+ end
79
+
80
+ end
81
+ end
82
+
@@ -0,0 +1,83 @@
1
+ module IMDB
2
+ # IMDB generic interface.
3
+ class Skeleton
4
+ attr_accessor :model, :method_names
5
+
6
+ def initialize(model_name = "", method_names = {}, keys = [])
7
+ if IMDB::Configuration.caching
8
+ @model = Class.new do
9
+ include MongoMapper::Document
10
+ set_collection_name model_name
11
+ method_names.each { |m, t|
12
+ key m, t
13
+ }
14
+ end
15
+ class_eval do
16
+ method_names.each_key { |meth|
17
+ unless keys.include?(meth)
18
+ old_meth = "old_#{meth}".to_sym
19
+ alias_method old_meth, meth.to_sym
20
+ define_method meth do
21
+ k = keys.to_imdb_hash { |k| k; self.send(k) }
22
+
23
+ @db_query = self.model.first(k)
24
+
25
+ if @db_query.nil?
26
+ @db_query = self.model.new(keys.to_imdb_hash { |k| k; self.send(k)})
27
+ @db_query.save
28
+ end
29
+
30
+ if @db_query[meth].nil? or (@db_query[meth].length.zero? if @db_query[meth].kind_of?(Array))
31
+ a = send(old_meth)
32
+ if a.kind_of?(Array)
33
+ a.compact!
34
+ a.map! { |c|
35
+ if c.kind_of?(String)
36
+ c
37
+ else
38
+ c.to_hash
39
+ end
40
+ }
41
+ @db_query[meth] = a
42
+ else
43
+ @db_query[meth] = a
44
+ end
45
+ @db_query.save
46
+ end
47
+ @db_query[meth]
48
+ end
49
+ end
50
+ }
51
+ end
52
+ end
53
+ @method_names = method_names
54
+ end
55
+
56
+ # Serialize method's output to json
57
+ def to_json(*a)
58
+ tmp_hash = to_hash
59
+
60
+ tmp_hash.to_json(*a)
61
+ end
62
+
63
+ def to_hash(*a)
64
+ tmp_hash = {}
65
+ @method_names.each_key { |x|
66
+ evaled = self.send x
67
+ if evaled.kind_of?(Array)
68
+ tmp_hash[x] = evaled.collect! {|e|
69
+ e
70
+ }
71
+ elsif evaled.kind_of?(String)
72
+ tmp_hash[x] = evaled
73
+ end
74
+ }
75
+ tmp_hash
76
+ end
77
+
78
+ def self.json_create(o)
79
+ new(*o['data'])
80
+ end
81
+ end
82
+ end
83
+