yayimdbs 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/yay_imdbs.rb +22 -23
  2. metadata +8 -9
@@ -43,7 +43,7 @@ class YayImdbs
43
43
  td.xpath(".//a").each do |link|
44
44
  href = link['href']
45
45
  current_name = link.content
46
-
46
+
47
47
  # Ignore links with no text (e.g. image links)
48
48
  next unless current_name.present?
49
49
  current_name = self.clean_title(current_name)
@@ -70,17 +70,17 @@ class YayImdbs
70
70
  end
71
71
  info_hash['video_type'] = self.video_type_from_meta(doc)
72
72
 
73
+ info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
74
+
73
75
  found_info_divs = false
74
- doc.xpath("//div[@class='info']").each do |div|
75
- next if div.xpath(".//h5").empty?
76
+ doc.xpath("//div[@class='txt-block']").each do |div|
77
+ next if div.xpath(".//h4").empty?
76
78
  found_info_divs = true
77
- key = div.xpath(".//h5").first.inner_text.sub(':', '').downcase
78
- value_search = ".//div[@class = 'info-content']"
79
- # Try to only get text values and ignore links as some info blocks have a "click for more info" type link at the end
80
- value = strip_whitespace div.xpath(value_search).first.children.map{|e| e.text? ? e.to_s : ''}.join
81
- if value.empty?
82
- value = strip_whitespace div.xpath(value_search).first.content
83
- end
79
+ raw_key = div.xpath(".//h4").first.inner_text
80
+ key = raw_key.sub(':', '').strip.downcase
81
+ value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
82
+ value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
83
+
84
84
  if key == 'release date'
85
85
  begin
86
86
  value = Date.strptime(value, '%d %B %Y')
@@ -95,15 +95,14 @@ class YayImdbs
95
95
  p "Unexpected runtime format #{value} for movie #{imdb_id}"
96
96
  end
97
97
  elsif key == 'genre'
98
- value = value.sub(/(See more$)|(more$)/, '').strip.split
98
+ value = value.strip.split
99
+ elsif key == 'year'
100
+ value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
99
101
  elsif key == 'language'
100
- # This is a bit of a hack, I dont really want to deal with multiple langauges, so if there is more than one
101
- # just use english or the first one found
102
- value = nil
103
- div.xpath(value_search).first.inner_text.split(/\|/).collect {|l| l.strip}.each do |language|
104
- value = language if value.nil?
105
- value = language if language.downcase == 'english'
106
- end
102
+ value = value.split('|').collect { |l| l.strip }
103
+ elsif key == 'taglines'
104
+ # Backwards compatibility
105
+ info_hash['tagline'] = value
107
106
  end
108
107
  info_hash[key.downcase.gsub(/\s/, '_')] = value
109
108
  end
@@ -116,7 +115,7 @@ class YayImdbs
116
115
 
117
116
  #scrap poster image urls
118
117
  thumb = doc.xpath("//div[@class = 'photo']/a/img")
119
- if thumb
118
+ if thumb.first
120
119
  thumbnail_url = thumb.first['src']
121
120
  if not thumbnail_url =~ /addposter.jpg$/
122
121
  info_hash['small_image'] = thumbnail_url
@@ -130,7 +129,7 @@ class YayImdbs
130
129
  end
131
130
 
132
131
  #scrap episodes if tv series
133
- if info_hash.has_key?('seasons')
132
+ if info_hash.has_key?('season')
134
133
  episodes = []
135
134
  doc = self.get_episodes_page(imdb_id)
136
135
  episode_divs = doc.css(".filter-all")
@@ -167,8 +166,8 @@ class YayImdbs
167
166
  return nil, nil unless doc.xpath("//meta[@name='title']").first
168
167
 
169
168
  title_text = doc.xpath("//meta[@name='title']").first['content']
170
- # Matches 'Movie Name (2010)' or 'Movie Name (2010/I)'
171
- if title_text =~ /(.*) \((\d{4})\/?\w*\)/
169
+ # Matches 'Movie Name (2010)' or 'Movie Name (2010/I)' or 'Lost (TV Series 2004–2010)'
170
+ if title_text =~ /(.*) \((?:TV\sSeries\s)?(\d{4})((\/\w*)|(.\d{4}))?\)/
172
171
  movie_title = $1
173
172
  movie_year = $2.to_i
174
173
 
@@ -202,4 +201,4 @@ class YayImdbs
202
201
  else return :movie
203
202
  end
204
203
  end
205
- end
204
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 4
9
- version: 0.1.4
8
+ - 5
9
+ version: 0.1.5
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sam Cavenagh
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-07-08 00:00:00 +10:00
17
+ date: 2010-10-12 00:00:00 +11:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -44,8 +44,7 @@ dependencies:
44
44
  - 3
45
45
  - 0
46
46
  - 0
47
- - beta4
48
- version: 3.0.0.beta4
47
+ version: 3.0.0
49
48
  type: :runtime
50
49
  version_requirements: *id002
51
50
  - !ruby/object:Gem::Dependency
@@ -87,10 +86,10 @@ dependencies:
87
86
  - - ">="
88
87
  - !ruby/object:Gem::Version
89
88
  segments:
90
- - 2
91
- - 5
92
- - 8
93
- version: 2.5.8
89
+ - 1
90
+ - 3
91
+ - 1
92
+ version: 1.3.1
94
93
  type: :development
95
94
  version_requirements: *id005
96
95
  description: A simple imdb scraper built on Nokogiri for ruby 1.9+