yayimdbs 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/yay_imdbs.rb +22 -23
- metadata +8 -9
data/lib/yay_imdbs.rb
CHANGED
@@ -43,7 +43,7 @@ class YayImdbs
|
|
43
43
|
td.xpath(".//a").each do |link|
|
44
44
|
href = link['href']
|
45
45
|
current_name = link.content
|
46
|
-
|
46
|
+
|
47
47
|
# Ignore links with no text (e.g. image links)
|
48
48
|
next unless current_name.present?
|
49
49
|
current_name = self.clean_title(current_name)
|
@@ -70,17 +70,17 @@ class YayImdbs
|
|
70
70
|
end
|
71
71
|
info_hash['video_type'] = self.video_type_from_meta(doc)
|
72
72
|
|
73
|
+
info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
|
74
|
+
|
73
75
|
found_info_divs = false
|
74
|
-
doc.xpath("//div[@class='
|
75
|
-
next if div.xpath(".//
|
76
|
+
doc.xpath("//div[@class='txt-block']").each do |div|
|
77
|
+
next if div.xpath(".//h4").empty?
|
76
78
|
found_info_divs = true
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
value =
|
81
|
-
|
82
|
-
value = strip_whitespace div.xpath(value_search).first.content
|
83
|
-
end
|
79
|
+
raw_key = div.xpath(".//h4").first.inner_text
|
80
|
+
key = raw_key.sub(':', '').strip.downcase
|
81
|
+
value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
|
82
|
+
value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
|
83
|
+
|
84
84
|
if key == 'release date'
|
85
85
|
begin
|
86
86
|
value = Date.strptime(value, '%d %B %Y')
|
@@ -95,15 +95,14 @@ class YayImdbs
|
|
95
95
|
p "Unexpected runtime format #{value} for movie #{imdb_id}"
|
96
96
|
end
|
97
97
|
elsif key == 'genre'
|
98
|
-
value = value.
|
98
|
+
value = value.strip.split
|
99
|
+
elsif key == 'year'
|
100
|
+
value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
|
99
101
|
elsif key == 'language'
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
value = language if value.nil?
|
105
|
-
value = language if language.downcase == 'english'
|
106
|
-
end
|
102
|
+
value = value.split('|').collect { |l| l.strip }
|
103
|
+
elsif key == 'taglines'
|
104
|
+
# Backwards compatibility
|
105
|
+
info_hash['tagline'] = value
|
107
106
|
end
|
108
107
|
info_hash[key.downcase.gsub(/\s/, '_')] = value
|
109
108
|
end
|
@@ -116,7 +115,7 @@ class YayImdbs
|
|
116
115
|
|
117
116
|
#scrap poster image urls
|
118
117
|
thumb = doc.xpath("//div[@class = 'photo']/a/img")
|
119
|
-
if thumb
|
118
|
+
if thumb.first
|
120
119
|
thumbnail_url = thumb.first['src']
|
121
120
|
if not thumbnail_url =~ /addposter.jpg$/
|
122
121
|
info_hash['small_image'] = thumbnail_url
|
@@ -130,7 +129,7 @@ class YayImdbs
|
|
130
129
|
end
|
131
130
|
|
132
131
|
#scrap episodes if tv series
|
133
|
-
if info_hash.has_key?('
|
132
|
+
if info_hash.has_key?('season')
|
134
133
|
episodes = []
|
135
134
|
doc = self.get_episodes_page(imdb_id)
|
136
135
|
episode_divs = doc.css(".filter-all")
|
@@ -167,8 +166,8 @@ class YayImdbs
|
|
167
166
|
return nil, nil unless doc.xpath("//meta[@name='title']").first
|
168
167
|
|
169
168
|
title_text = doc.xpath("//meta[@name='title']").first['content']
|
170
|
-
# Matches 'Movie Name (2010)' or 'Movie Name (2010/I)'
|
171
|
-
if title_text =~ /(.*) \((\d{4})
|
169
|
+
# Matches 'Movie Name (2010)' or 'Movie Name (2010/I)' or 'Lost (TV Series 2004–2010)'
|
170
|
+
if title_text =~ /(.*) \((?:TV\sSeries\s)?(\d{4})((\/\w*)|(.\d{4}))?\)/
|
172
171
|
movie_title = $1
|
173
172
|
movie_year = $2.to_i
|
174
173
|
|
@@ -202,4 +201,4 @@ class YayImdbs
|
|
202
201
|
else return :movie
|
203
202
|
end
|
204
203
|
end
|
205
|
-
end
|
204
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 5
|
9
|
+
version: 0.1.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Sam Cavenagh
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-10-12 00:00:00 +11:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -44,8 +44,7 @@ dependencies:
|
|
44
44
|
- 3
|
45
45
|
- 0
|
46
46
|
- 0
|
47
|
-
|
48
|
-
version: 3.0.0.beta4
|
47
|
+
version: 3.0.0
|
49
48
|
type: :runtime
|
50
49
|
version_requirements: *id002
|
51
50
|
- !ruby/object:Gem::Dependency
|
@@ -87,10 +86,10 @@ dependencies:
|
|
87
86
|
- - ">="
|
88
87
|
- !ruby/object:Gem::Version
|
89
88
|
segments:
|
90
|
-
-
|
91
|
-
-
|
92
|
-
-
|
93
|
-
version:
|
89
|
+
- 1
|
90
|
+
- 3
|
91
|
+
- 1
|
92
|
+
version: 1.3.1
|
94
93
|
type: :development
|
95
94
|
version_requirements: *id005
|
96
95
|
description: A simple imdb scraper built on Nokogiri for ruby 1.9+
|