yayimdbs 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/yay_imdbs.rb +29 -11
  2. metadata +5 -35
data/lib/yay_imdbs.rb CHANGED
@@ -1,7 +1,15 @@
1
1
  # encoding: UTF-8
2
2
  require 'open-uri'
3
3
  require 'nokogiri'
4
- require 'active_support/all'
4
+
5
+ begin
6
+ # Rails 3
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/hash/indifferent_access.rb'
9
+ rescue
10
+ # Rails 2.3
11
+ require 'active_support/all'
12
+ end
5
13
 
6
14
  class YayImdbs
7
15
  IMDB_BASE_URL = 'http://www.imdb.com/'
@@ -74,10 +82,10 @@ class YayImdbs
74
82
  info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
75
83
 
76
84
  found_info_divs = false
77
- doc.xpath("//div[@class='txt-block']").each do |div|
78
- next if div.xpath(".//h4").empty?
85
+ doc.xpath("//div/h4").each do |h4|
86
+ div = h4.parent
79
87
  found_info_divs = true
80
- raw_key = div.xpath(".//h4").first.inner_text
88
+ raw_key = h4.inner_text
81
89
  key = raw_key.sub(':', '').strip.downcase
82
90
  value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
83
91
  value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
@@ -95,15 +103,24 @@ class YayImdbs
95
103
  else
96
104
  p "Unexpected runtime format #{value} for movie #{imdb_id}"
97
105
  end
98
- elsif key == 'genre'
99
- value = value.strip.split
106
+ elsif key == 'genres'
107
+ value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9\-]/, '') }
108
+ # Backwards compatibility hack
109
+ info_hash[:genre] = value
100
110
  elsif key == 'year'
101
111
  value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
112
+ # TV shows can have multiple years
113
+ info_hash[:years] = value
114
+ value = value.sort.first
102
115
  elsif key == 'language'
103
- value = value.split('|').collect { |l| l.strip }
116
+ value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9]/, '') }
104
117
  elsif key == 'taglines'
105
118
  # Backwards compatibility
106
119
  info_hash['tagline'] = value
120
+ elsif key == 'motion picture rating (mpaa)'
121
+ value = value.gsub(/See all certifications/, '').strip
122
+ # Backwards compatibility FIXME do with a map
123
+ info_hash['mpaa'] = value
107
124
  end
108
125
  info_hash[key.downcase.gsub(/\s/, '_')] = value
109
126
  end
@@ -117,7 +134,7 @@ class YayImdbs
117
134
 
118
135
  #scrap episodes if tv series
119
136
  if info_hash.has_key?('season')
120
- self.scrap_episodes(doc, info_hash)
137
+ self.scrap_episodes(info_hash)
121
138
  end
122
139
 
123
140
  return info_hash
@@ -144,15 +161,16 @@ class YayImdbs
144
161
  end
145
162
  end
146
163
 
147
- def self.scrap_episodes(doc, info_hash)
164
+ def self.scrap_episodes(info_hash)
148
165
  episodes = []
149
166
  doc = self.get_episodes_page(info_hash[:imdb_id])
150
167
  episode_divs = doc.css(".filter-all")
151
168
  episode_divs.each do |e_div|
152
169
  if e_div.xpath('.//h3').inner_text =~ /Season (\d+), Episode (\d+):/
153
170
  episode = {"series" => $1.to_i, "episode" => $2.to_i, "title" => $'.strip}
154
- if e_div.xpath(".//td").inner_text =~ /(\d+ (January|February|March|April|May|June|July|August|September|October|November|December) \d{4})/
155
- episode['date'] = Date.parse($1)
171
+ raw_date = e_div.xpath('.//span/strong').inner_text.strip
172
+ episode['date'] = Date.parse(raw_date)
173
+ if e_div.inner_text =~ /#{raw_date}/
156
174
  episode['plot'] = $'.strip
157
175
  end
158
176
  episodes << episode
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 8
9
- version: 0.1.8
8
+ - 9
9
+ version: 0.1.9
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sam Cavenagh
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-14 00:00:00 +11:00
17
+ date: 2010-10-25 00:00:00 +11:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -47,40 +47,10 @@ dependencies:
47
47
  version: 3.0.0
48
48
  type: :runtime
49
49
  version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: tzinfo
52
- prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 0
60
- - 3
61
- - 22
62
- version: 0.3.22
63
- type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: i18n
67
- prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
69
- none: false
70
- requirements:
71
- - - ">="
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 0
75
- - 4
76
- - 1
77
- version: 0.4.1
78
- type: :runtime
79
- version_requirements: *id004
80
50
  - !ruby/object:Gem::Dependency
81
51
  name: rspec
82
52
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
53
+ requirement: &id003 !ruby/object:Gem::Requirement
84
54
  none: false
85
55
  requirements:
86
56
  - - ">="
@@ -91,7 +61,7 @@ dependencies:
91
61
  - 0
92
62
  version: 2.0.0
93
63
  type: :development
94
- version_requirements: *id005
64
+ version_requirements: *id003
95
65
  description: A simple imdb scraper built on Nokogiri for ruby 1.9+
96
66
  email: cavenaghweb@hotmail.com
97
67
  executables: []