yayimdbs 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/yay_imdbs.rb +22 -23
- metadata +8 -9
data/lib/yay_imdbs.rb
CHANGED
@@ -43,7 +43,7 @@ class YayImdbs
|
|
43
43
|
td.xpath(".//a").each do |link|
|
44
44
|
href = link['href']
|
45
45
|
current_name = link.content
|
46
|
-
|
46
|
+
|
47
47
|
# Ignore links with no text (e.g. image links)
|
48
48
|
next unless current_name.present?
|
49
49
|
current_name = self.clean_title(current_name)
|
@@ -70,17 +70,17 @@ class YayImdbs
|
|
70
70
|
end
|
71
71
|
info_hash['video_type'] = self.video_type_from_meta(doc)
|
72
72
|
|
73
|
+
info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
|
74
|
+
|
73
75
|
found_info_divs = false
|
74
|
-
doc.xpath("//div[@class='
|
75
|
-
next if div.xpath(".//
|
76
|
+
doc.xpath("//div[@class='txt-block']").each do |div|
|
77
|
+
next if div.xpath(".//h4").empty?
|
76
78
|
found_info_divs = true
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
value =
|
81
|
-
|
82
|
-
value = strip_whitespace div.xpath(value_search).first.content
|
83
|
-
end
|
79
|
+
raw_key = div.xpath(".//h4").first.inner_text
|
80
|
+
key = raw_key.sub(':', '').strip.downcase
|
81
|
+
value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
|
82
|
+
value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
|
83
|
+
|
84
84
|
if key == 'release date'
|
85
85
|
begin
|
86
86
|
value = Date.strptime(value, '%d %B %Y')
|
@@ -95,15 +95,14 @@ class YayImdbs
|
|
95
95
|
p "Unexpected runtime format #{value} for movie #{imdb_id}"
|
96
96
|
end
|
97
97
|
elsif key == 'genre'
|
98
|
-
value = value.
|
98
|
+
value = value.strip.split
|
99
|
+
elsif key == 'year'
|
100
|
+
value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
|
99
101
|
elsif key == 'language'
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
value = language if value.nil?
|
105
|
-
value = language if language.downcase == 'english'
|
106
|
-
end
|
102
|
+
value = value.split('|').collect { |l| l.strip }
|
103
|
+
elsif key == 'taglines'
|
104
|
+
# Backwards compatibility
|
105
|
+
info_hash['tagline'] = value
|
107
106
|
end
|
108
107
|
info_hash[key.downcase.gsub(/\s/, '_')] = value
|
109
108
|
end
|
@@ -116,7 +115,7 @@ class YayImdbs
|
|
116
115
|
|
117
116
|
#scrap poster image urls
|
118
117
|
thumb = doc.xpath("//div[@class = 'photo']/a/img")
|
119
|
-
if thumb
|
118
|
+
if thumb.first
|
120
119
|
thumbnail_url = thumb.first['src']
|
121
120
|
if not thumbnail_url =~ /addposter.jpg$/
|
122
121
|
info_hash['small_image'] = thumbnail_url
|
@@ -130,7 +129,7 @@ class YayImdbs
|
|
130
129
|
end
|
131
130
|
|
132
131
|
#scrap episodes if tv series
|
133
|
-
if info_hash.has_key?('
|
132
|
+
if info_hash.has_key?('season')
|
134
133
|
episodes = []
|
135
134
|
doc = self.get_episodes_page(imdb_id)
|
136
135
|
episode_divs = doc.css(".filter-all")
|
@@ -167,8 +166,8 @@ class YayImdbs
|
|
167
166
|
return nil, nil unless doc.xpath("//meta[@name='title']").first
|
168
167
|
|
169
168
|
title_text = doc.xpath("//meta[@name='title']").first['content']
|
170
|
-
# Matches 'Movie Name (2010)' or 'Movie Name (2010/I)'
|
171
|
-
if title_text =~ /(.*) \((\d{4})
|
169
|
+
# Matches 'Movie Name (2010)' or 'Movie Name (2010/I)' or 'Lost (TV Series 2004–2010)'
|
170
|
+
if title_text =~ /(.*) \((?:TV\sSeries\s)?(\d{4})((\/\w*)|(.\d{4}))?\)/
|
172
171
|
movie_title = $1
|
173
172
|
movie_year = $2.to_i
|
174
173
|
|
@@ -202,4 +201,4 @@ class YayImdbs
|
|
202
201
|
else return :movie
|
203
202
|
end
|
204
203
|
end
|
205
|
-
end
|
204
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 5
|
9
|
+
version: 0.1.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Sam Cavenagh
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-10-12 00:00:00 +11:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -44,8 +44,7 @@ dependencies:
|
|
44
44
|
- 3
|
45
45
|
- 0
|
46
46
|
- 0
|
47
|
-
|
48
|
-
version: 3.0.0.beta4
|
47
|
+
version: 3.0.0
|
49
48
|
type: :runtime
|
50
49
|
version_requirements: *id002
|
51
50
|
- !ruby/object:Gem::Dependency
|
@@ -87,10 +86,10 @@ dependencies:
|
|
87
86
|
- - ">="
|
88
87
|
- !ruby/object:Gem::Version
|
89
88
|
segments:
|
90
|
-
-
|
91
|
-
-
|
92
|
-
-
|
93
|
-
version:
|
89
|
+
- 1
|
90
|
+
- 3
|
91
|
+
- 1
|
92
|
+
version: 1.3.1
|
94
93
|
type: :development
|
95
94
|
version_requirements: *id005
|
96
95
|
description: A simple imdb scraper built on Nokogiri for ruby 1.9+
|