yayimdbs 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/yay_imdbs.rb +29 -11
  2. metadata +5 -35
data/lib/yay_imdbs.rb CHANGED
@@ -1,7 +1,15 @@
1
1
  # encoding: UTF-8
2
2
  require 'open-uri'
3
3
  require 'nokogiri'
4
- require 'active_support/all'
4
+
5
+ begin
6
+ # Rails 3
7
+ require 'active_support/core_ext/object'
8
+ require 'active_support/core_ext/hash/indifferent_access.rb'
9
+ rescue
10
+ # Rails 2.3
11
+ require 'active_support/all'
12
+ end
5
13
 
6
14
  class YayImdbs
7
15
  IMDB_BASE_URL = 'http://www.imdb.com/'
@@ -74,10 +82,10 @@ class YayImdbs
74
82
  info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
75
83
 
76
84
  found_info_divs = false
77
- doc.xpath("//div[@class='txt-block']").each do |div|
78
- next if div.xpath(".//h4").empty?
85
+ doc.xpath("//div/h4").each do |h4|
86
+ div = h4.parent
79
87
  found_info_divs = true
80
- raw_key = div.xpath(".//h4").first.inner_text
88
+ raw_key = h4.inner_text
81
89
  key = raw_key.sub(':', '').strip.downcase
82
90
  value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
83
91
  value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
@@ -95,15 +103,24 @@ class YayImdbs
95
103
  else
96
104
  p "Unexpected runtime format #{value} for movie #{imdb_id}"
97
105
  end
98
- elsif key == 'genre'
99
- value = value.strip.split
106
+ elsif key == 'genres'
107
+ value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9\-]/, '') }
108
+ # Backwards compatibility hack
109
+ info_hash[:genre] = value
100
110
  elsif key == 'year'
101
111
  value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
112
+ # TV shows can have multiple years
113
+ info_hash[:years] = value
114
+ value = value.sort.first
102
115
  elsif key == 'language'
103
- value = value.split('|').collect { |l| l.strip }
116
+ value = value.split('|').collect { |l| l.gsub(/[^a-zA-Z0-9]/, '') }
104
117
  elsif key == 'taglines'
105
118
  # Backwards compatibility
106
119
  info_hash['tagline'] = value
120
+ elsif key == 'motion picture rating (mpaa)'
121
+ value = value.gsub(/See all certifications/, '').strip
122
+ # Backwards compatibility FIXME do with a map
123
+ info_hash['mpaa'] = value
107
124
  end
108
125
  info_hash[key.downcase.gsub(/\s/, '_')] = value
109
126
  end
@@ -117,7 +134,7 @@ class YayImdbs
117
134
 
118
135
  #scrap episodes if tv series
119
136
  if info_hash.has_key?('season')
120
- self.scrap_episodes(doc, info_hash)
137
+ self.scrap_episodes(info_hash)
121
138
  end
122
139
 
123
140
  return info_hash
@@ -144,15 +161,16 @@ class YayImdbs
144
161
  end
145
162
  end
146
163
 
147
- def self.scrap_episodes(doc, info_hash)
164
+ def self.scrap_episodes(info_hash)
148
165
  episodes = []
149
166
  doc = self.get_episodes_page(info_hash[:imdb_id])
150
167
  episode_divs = doc.css(".filter-all")
151
168
  episode_divs.each do |e_div|
152
169
  if e_div.xpath('.//h3').inner_text =~ /Season (\d+), Episode (\d+):/
153
170
  episode = {"series" => $1.to_i, "episode" => $2.to_i, "title" => $'.strip}
154
- if e_div.xpath(".//td").inner_text =~ /(\d+ (January|February|March|April|May|June|July|August|September|October|November|December) \d{4})/
155
- episode['date'] = Date.parse($1)
171
+ raw_date = e_div.xpath('.//span/strong').inner_text.strip
172
+ episode['date'] = Date.parse(raw_date)
173
+ if e_div.inner_text =~ /#{raw_date}/
156
174
  episode['plot'] = $'.strip
157
175
  end
158
176
  episodes << episode
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 8
9
- version: 0.1.8
8
+ - 9
9
+ version: 0.1.9
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sam Cavenagh
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-14 00:00:00 +11:00
17
+ date: 2010-10-25 00:00:00 +11:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -47,40 +47,10 @@ dependencies:
47
47
  version: 3.0.0
48
48
  type: :runtime
49
49
  version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: tzinfo
52
- prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 0
60
- - 3
61
- - 22
62
- version: 0.3.22
63
- type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: i18n
67
- prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
69
- none: false
70
- requirements:
71
- - - ">="
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 0
75
- - 4
76
- - 1
77
- version: 0.4.1
78
- type: :runtime
79
- version_requirements: *id004
80
50
  - !ruby/object:Gem::Dependency
81
51
  name: rspec
82
52
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
53
+ requirement: &id003 !ruby/object:Gem::Requirement
84
54
  none: false
85
55
  requirements:
86
56
  - - ">="
@@ -91,7 +61,7 @@ dependencies:
91
61
  - 0
92
62
  version: 2.0.0
93
63
  type: :development
94
- version_requirements: *id005
64
+ version_requirements: *id003
95
65
  description: A simple imdb scraper built on Nokogiri for ruby 1.9+
96
66
  email: cavenaghweb@hotmail.com
97
67
  executables: []