yayimdbs 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/yay_imdbs.rb +30 -10
- metadata +3 -3
data/lib/yay_imdbs.rb
CHANGED
@@ -18,6 +18,13 @@ class YayImdbs
|
|
18
18
|
|
19
19
|
STRIP_WHITESPACE = /(\s{2,}|\n|\||\302\240\302\273)/u
|
20
20
|
|
21
|
+
MORE_INFO_LINKS = ['See more',
|
22
|
+
'Add/edit official sites',
|
23
|
+
'See all certifications',
|
24
|
+
'See full summary',
|
25
|
+
'see all',
|
26
|
+
]
|
27
|
+
|
21
28
|
DATE_PROPERTIES = [:release_date]
|
22
29
|
LIST_PROPERTIES = [:genres, :plot_keywords, :country, :sound_mix, :language]
|
23
30
|
INT_LIST_PROPERTIES = [:year, :season]
|
@@ -26,7 +33,8 @@ class YayImdbs
|
|
26
33
|
:year => :years,
|
27
34
|
:season => :seasons,
|
28
35
|
:language => :languages,
|
29
|
-
:motion_picture_rating_mpaa => :mpaa
|
36
|
+
:motion_picture_rating_mpaa => :mpaa,
|
37
|
+
:official_sites => :official_site}
|
30
38
|
|
31
39
|
class << self
|
32
40
|
|
@@ -95,7 +103,7 @@ class YayImdbs
|
|
95
103
|
movie_properties(doc) do |key, value|
|
96
104
|
found_info_divs = true
|
97
105
|
info_hash["raw_#{key}"] = value
|
98
|
-
info_hash[key] = clean_movie_property(key, value)
|
106
|
+
info_hash[key] = clean_movie_property(key, value, imdb_id)
|
99
107
|
info_hash[PROPERTY_ALIAS[key]] = info_hash[key] if PROPERTY_ALIAS[key]
|
100
108
|
end
|
101
109
|
|
@@ -115,7 +123,7 @@ class YayImdbs
|
|
115
123
|
return info_hash
|
116
124
|
end
|
117
125
|
|
118
|
-
def clean_movie_property(key, value)
|
126
|
+
def clean_movie_property(key, value, imdb_id)
|
119
127
|
if DATE_PROPERTIES.include?(key)
|
120
128
|
value = Date.strptime(value, '%d %B %Y') rescue nil
|
121
129
|
elsif key == :runtime
|
@@ -124,6 +132,8 @@ class YayImdbs
|
|
124
132
|
else
|
125
133
|
value = nil
|
126
134
|
end
|
135
|
+
elsif key == :official_sites
|
136
|
+
value = get_official_site_url(value, imdb_id)
|
127
137
|
elsif LIST_PROPERTIES.include?(key)
|
128
138
|
value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9\-]/, '') }
|
129
139
|
elsif INT_LIST_PROPERTIES.include?(key)
|
@@ -138,15 +148,21 @@ class YayImdbs
|
|
138
148
|
raw_key = h4.inner_text
|
139
149
|
key = raw_key.sub(':', '').strip.downcase
|
140
150
|
value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
|
141
|
-
value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(
|
151
|
+
value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(#{MORE_INFO_LINKS.join(')|(')})$/i, '').strip
|
142
152
|
symbol_key = key.downcase.gsub(/[^a-zA-Z0-9 ]/, '').gsub(/\s/, '_').to_sym
|
143
|
-
if symbol_key == :official_sites
|
144
|
-
value = div.inner_html.match(/href=\"(.*?)\"/)[1]
|
145
|
-
end
|
146
153
|
yield symbol_key, value
|
147
154
|
end
|
148
155
|
end
|
149
156
|
|
157
|
+
# TODO capture all official sites, not all sites have an "Official site" link (e.g. Lost)
|
158
|
+
def get_official_site_url(value, imdb_id)
|
159
|
+
value = value.match(/<a href="(.*?)">Official site<\/a>/)
|
160
|
+
if value.nil?
|
161
|
+
value = get_official_sites_page(imdb_id).inner_html.match(/<a href="(.*?)">Official site<\/a>/)
|
162
|
+
end
|
163
|
+
return $1
|
164
|
+
end
|
165
|
+
|
150
166
|
def scrap_images(doc, info_hash)
|
151
167
|
#scrap poster image urls
|
152
168
|
thumbnail_url = doc.at_css("td[id=img_primary] a img").try(:[], 'src')
|
@@ -174,9 +190,9 @@ class YayImdbs
|
|
174
190
|
|
175
191
|
raw_date = e_div.at_css('strong').inner_text.strip
|
176
192
|
episode['date'] = Date.parse(raw_date) rescue nil
|
177
|
-
|
178
|
-
|
179
|
-
|
193
|
+
|
194
|
+
# Seems that the day can sometimes be ???? which doesnt play will with regex
|
195
|
+
episode['plot'] = $'.strip if e_div.inner_text =~ /#{raw_date}/ rescue nil
|
180
196
|
|
181
197
|
episodes << episode
|
182
198
|
end
|
@@ -190,6 +206,10 @@ class YayImdbs
|
|
190
206
|
def get_movie_page(imdb_id)
|
191
207
|
Nokogiri::HTML(open(IMDB_MOVIE_URL + imdb_id))
|
192
208
|
end
|
209
|
+
|
210
|
+
def get_official_sites_page(imdb_id)
|
211
|
+
Nokogiri::HTML(open(IMDB_MOVIE_URL + imdb_id + '/officialsites' ))
|
212
|
+
end
|
193
213
|
|
194
214
|
def get_episodes_page(imdb_id)
|
195
215
|
Nokogiri::HTML(open(IMDB_MOVIE_URL + imdb_id + '/episodes'))
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 4
|
9
|
+
version: 0.2.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Sam Cavenagh
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-08-02 00:00:00 +10:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|