yayimdbs 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/yay_imdbs.rb +30 -10
  2. metadata +3 -3
data/lib/yay_imdbs.rb CHANGED
@@ -18,6 +18,13 @@ class YayImdbs
18
18
 
19
19
  STRIP_WHITESPACE = /(\s{2,}|\n|\||\302\240\302\273)/u
20
20
 
21
+ MORE_INFO_LINKS = ['See more',
22
+ 'Add/edit official sites',
23
+ 'See all certifications',
24
+ 'See full summary',
25
+ 'see all',
26
+ ]
27
+
21
28
  DATE_PROPERTIES = [:release_date]
22
29
  LIST_PROPERTIES = [:genres, :plot_keywords, :country, :sound_mix, :language]
23
30
  INT_LIST_PROPERTIES = [:year, :season]
@@ -26,7 +33,8 @@ class YayImdbs
26
33
  :year => :years,
27
34
  :season => :seasons,
28
35
  :language => :languages,
29
- :motion_picture_rating_mpaa => :mpaa}
36
+ :motion_picture_rating_mpaa => :mpaa,
37
+ :official_sites => :official_site}
30
38
 
31
39
  class << self
32
40
 
@@ -95,7 +103,7 @@ class YayImdbs
95
103
  movie_properties(doc) do |key, value|
96
104
  found_info_divs = true
97
105
  info_hash["raw_#{key}"] = value
98
- info_hash[key] = clean_movie_property(key, value)
106
+ info_hash[key] = clean_movie_property(key, value, imdb_id)
99
107
  info_hash[PROPERTY_ALIAS[key]] = info_hash[key] if PROPERTY_ALIAS[key]
100
108
  end
101
109
 
@@ -115,7 +123,7 @@ class YayImdbs
115
123
  return info_hash
116
124
  end
117
125
 
118
- def clean_movie_property(key, value)
126
+ def clean_movie_property(key, value, imdb_id)
119
127
  if DATE_PROPERTIES.include?(key)
120
128
  value = Date.strptime(value, '%d %B %Y') rescue nil
121
129
  elsif key == :runtime
@@ -124,6 +132,8 @@ class YayImdbs
124
132
  else
125
133
  value = nil
126
134
  end
135
+ elsif key == :official_sites
136
+ value = get_official_site_url(value, imdb_id)
127
137
  elsif LIST_PROPERTIES.include?(key)
128
138
  value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9\-]/, '') }
129
139
  elsif INT_LIST_PROPERTIES.include?(key)
@@ -138,15 +148,21 @@ class YayImdbs
138
148
  raw_key = h4.inner_text
139
149
  key = raw_key.sub(':', '').strip.downcase
140
150
  value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
141
- value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)|(See all certifications)|(Add\/edit official sites)|(See full summary)$/, '').strip
151
+ value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(#{MORE_INFO_LINKS.join(')|(')})$/i, '').strip
142
152
  symbol_key = key.downcase.gsub(/[^a-zA-Z0-9 ]/, '').gsub(/\s/, '_').to_sym
143
- if symbol_key == :official_sites
144
- value = div.inner_html.match(/href=\"(.*?)\"/)[1]
145
- end
146
153
  yield symbol_key, value
147
154
  end
148
155
  end
149
156
 
157
+ # TODO capture all official sites, not all sites have an "Official site" link (e.g. Lost)
158
+ def get_official_site_url(value, imdb_id)
159
+ value = value.match(/<a href="(.*?)">Official site<\/a>/)
160
+ if value.nil?
161
+ value = get_official_sites_page(imdb_id).inner_html.match(/<a href="(.*?)">Official site<\/a>/)
162
+ end
163
+ return $1
164
+ end
165
+
150
166
  def scrap_images(doc, info_hash)
151
167
  #scrap poster image urls
152
168
  thumbnail_url = doc.at_css("td[id=img_primary] a img").try(:[], 'src')
@@ -174,9 +190,9 @@ class YayImdbs
174
190
 
175
191
  raw_date = e_div.at_css('strong').inner_text.strip
176
192
  episode['date'] = Date.parse(raw_date) rescue nil
177
- if e_div.inner_text =~ /#{raw_date}/
178
- episode['plot'] = $'.strip
179
- end
193
+
194
+ # Seems that the day can sometimes be ???? which doesnt play will with regex
195
+ episode['plot'] = $'.strip if e_div.inner_text =~ /#{raw_date}/ rescue nil
180
196
 
181
197
  episodes << episode
182
198
  end
@@ -190,6 +206,10 @@ class YayImdbs
190
206
  def get_movie_page(imdb_id)
191
207
  Nokogiri::HTML(open(IMDB_MOVIE_URL + imdb_id))
192
208
  end
209
+
210
+ def get_official_sites_page(imdb_id)
211
+ Nokogiri::HTML(open(IMDB_MOVIE_URL + imdb_id + '/officialsites' ))
212
+ end
193
213
 
194
214
  def get_episodes_page(imdb_id)
195
215
  Nokogiri::HTML(open(IMDB_MOVIE_URL + imdb_id + '/episodes'))
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 3
9
- version: 0.2.3
8
+ - 4
9
+ version: 0.2.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sam Cavenagh
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-07-28 00:00:00 +10:00
17
+ date: 2011-08-02 00:00:00 +10:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency