yayimdbs 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/yay_imdbs.rb +29 -11
- metadata +5 -35
data/lib/yay_imdbs.rb
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'open-uri'
|
3
3
|
require 'nokogiri'
|
4
|
-
|
4
|
+
|
5
|
+
begin
|
6
|
+
# Rails 3
|
7
|
+
require 'active_support/core_ext/object'
|
8
|
+
require 'active_support/core_ext/hash/indifferent_access.rb'
|
9
|
+
rescue
|
10
|
+
# Rails 2.3
|
11
|
+
require 'active_support/all'
|
12
|
+
end
|
5
13
|
|
6
14
|
class YayImdbs
|
7
15
|
IMDB_BASE_URL = 'http://www.imdb.com/'
|
@@ -74,10 +82,10 @@ class YayImdbs
|
|
74
82
|
info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
|
75
83
|
|
76
84
|
found_info_divs = false
|
77
|
-
doc.xpath("//div
|
78
|
-
|
85
|
+
doc.xpath("//div/h4").each do |h4|
|
86
|
+
div = h4.parent
|
79
87
|
found_info_divs = true
|
80
|
-
raw_key =
|
88
|
+
raw_key = h4.inner_text
|
81
89
|
key = raw_key.sub(':', '').strip.downcase
|
82
90
|
value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
|
83
91
|
value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
|
@@ -95,15 +103,24 @@ class YayImdbs
|
|
95
103
|
else
|
96
104
|
p "Unexpected runtime format #{value} for movie #{imdb_id}"
|
97
105
|
end
|
98
|
-
elsif key == '
|
99
|
-
value = value.
|
106
|
+
elsif key == 'genres'
|
107
|
+
value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9\-]/, '') }
|
108
|
+
# Backwards compatibility hack
|
109
|
+
info_hash[:genre] = value
|
100
110
|
elsif key == 'year'
|
101
111
|
value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
|
112
|
+
# TV shows can have multiple years
|
113
|
+
info_hash[:years] = value
|
114
|
+
value = value.sort.first
|
102
115
|
elsif key == 'language'
|
103
|
-
value = value.split('|').collect { |l| l.
|
116
|
+
value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9]/, '') }
|
104
117
|
elsif key == 'taglines'
|
105
118
|
# Backwards compatibility
|
106
119
|
info_hash['tagline'] = value
|
120
|
+
elsif key == 'motion picture rating (mpaa)'
|
121
|
+
value = value.gsub(/See all certifications/, '').strip
|
122
|
+
# Backwards compatibility FIXME do with a map
|
123
|
+
info_hash['mpaa'] = value
|
107
124
|
end
|
108
125
|
info_hash[key.downcase.gsub(/\s/, '_')] = value
|
109
126
|
end
|
@@ -117,7 +134,7 @@ class YayImdbs
|
|
117
134
|
|
118
135
|
#scrap episodes if tv series
|
119
136
|
if info_hash.has_key?('season')
|
120
|
-
self.scrap_episodes(
|
137
|
+
self.scrap_episodes(info_hash)
|
121
138
|
end
|
122
139
|
|
123
140
|
return info_hash
|
@@ -144,15 +161,16 @@ class YayImdbs
|
|
144
161
|
end
|
145
162
|
end
|
146
163
|
|
147
|
-
def self.scrap_episodes(
|
164
|
+
def self.scrap_episodes(info_hash)
|
148
165
|
episodes = []
|
149
166
|
doc = self.get_episodes_page(info_hash[:imdb_id])
|
150
167
|
episode_divs = doc.css(".filter-all")
|
151
168
|
episode_divs.each do |e_div|
|
152
169
|
if e_div.xpath('.//h3').inner_text =~ /Season (\d+), Episode (\d+):/
|
153
170
|
episode = {"series" => $1.to_i, "episode" => $2.to_i, "title" => $'.strip}
|
154
|
-
|
155
|
-
|
171
|
+
raw_date = e_div.xpath('.//span/strong').inner_text.strip
|
172
|
+
episode['date'] = Date.parse(raw_date)
|
173
|
+
if e_div.inner_text =~ /#{raw_date}/
|
156
174
|
episode['plot'] = $'.strip
|
157
175
|
end
|
158
176
|
episodes << episode
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 9
|
9
|
+
version: 0.1.9
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Sam Cavenagh
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-10-
|
17
|
+
date: 2010-10-25 00:00:00 +11:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -47,40 +47,10 @@ dependencies:
|
|
47
47
|
version: 3.0.0
|
48
48
|
type: :runtime
|
49
49
|
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: tzinfo
|
52
|
-
prerelease: false
|
53
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
-
none: false
|
55
|
-
requirements:
|
56
|
-
- - ">="
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 0
|
60
|
-
- 3
|
61
|
-
- 22
|
62
|
-
version: 0.3.22
|
63
|
-
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: i18n
|
67
|
-
prerelease: false
|
68
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
-
none: false
|
70
|
-
requirements:
|
71
|
-
- - ">="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 0
|
75
|
-
- 4
|
76
|
-
- 1
|
77
|
-
version: 0.4.1
|
78
|
-
type: :runtime
|
79
|
-
version_requirements: *id004
|
80
50
|
- !ruby/object:Gem::Dependency
|
81
51
|
name: rspec
|
82
52
|
prerelease: false
|
83
|
-
requirement: &
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
84
54
|
none: false
|
85
55
|
requirements:
|
86
56
|
- - ">="
|
@@ -91,7 +61,7 @@ dependencies:
|
|
91
61
|
- 0
|
92
62
|
version: 2.0.0
|
93
63
|
type: :development
|
94
|
-
version_requirements: *
|
64
|
+
version_requirements: *id003
|
95
65
|
description: A simple imdb scraper built on Nokogiri for ruby 1.9+
|
96
66
|
email: cavenaghweb@hotmail.com
|
97
67
|
executables: []
|