yayimdbs 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/yay_imdbs.rb +22 -23
  2. metadata +8 -9
@@ -43,7 +43,7 @@ class YayImdbs
43
43
  td.xpath(".//a").each do |link|
44
44
  href = link['href']
45
45
  current_name = link.content
46
-
46
+
47
47
  # Ignore links with no text (e.g. image links)
48
48
  next unless current_name.present?
49
49
  current_name = self.clean_title(current_name)
@@ -70,17 +70,17 @@ class YayImdbs
70
70
  end
71
71
  info_hash['video_type'] = self.video_type_from_meta(doc)
72
72
 
73
+ info_hash[:plot] = doc.xpath("//td[@id='overview-top']/p[2]").inner_text.strip
74
+
73
75
  found_info_divs = false
74
- doc.xpath("//div[@class='info']").each do |div|
75
- next if div.xpath(".//h5").empty?
76
+ doc.xpath("//div[@class='txt-block']").each do |div|
77
+ next if div.xpath(".//h4").empty?
76
78
  found_info_divs = true
77
- key = div.xpath(".//h5").first.inner_text.sub(':', '').downcase
78
- value_search = ".//div[@class = 'info-content']"
79
- # Try to only get text values and ignore links as some info blocks have a "click for more info" type link at the end
80
- value = strip_whitespace div.xpath(value_search).first.children.map{|e| e.text? ? e.to_s : ''}.join
81
- if value.empty?
82
- value = strip_whitespace div.xpath(value_search).first.content
83
- end
79
+ raw_key = div.xpath(".//h4").first.inner_text
80
+ key = raw_key.sub(':', '').strip.downcase
81
+ value = div.inner_text[((div.inner_text =~ /#{Regexp.escape(raw_key)}/) + raw_key.length).. -1]
82
+ value = value.gsub(/\302\240\302\273/u, '').strip.gsub(/(See more)|(see all)$/, '').strip
83
+
84
84
  if key == 'release date'
85
85
  begin
86
86
  value = Date.strptime(value, '%d %B %Y')
@@ -95,15 +95,14 @@ class YayImdbs
95
95
  p "Unexpected runtime format #{value} for movie #{imdb_id}"
96
96
  end
97
97
  elsif key == 'genre'
98
- value = value.sub(/(See more$)|(more$)/, '').strip.split
98
+ value = value.strip.split
99
+ elsif key == 'year'
100
+ value = value.split('|').collect { |l| l.strip.to_i }.reject { |y| y <= 0 }
99
101
  elsif key == 'language'
100
- # This is a bit of a hack, I dont really want to deal with multiple langauges, so if there is more than one
101
- # just use english or the first one found
102
- value = nil
103
- div.xpath(value_search).first.inner_text.split(/\|/).collect {|l| l.strip}.each do |language|
104
- value = language if value.nil?
105
- value = language if language.downcase == 'english'
106
- end
102
+ value = value.split('|').collect { |l| l.strip }
103
+ elsif key == 'taglines'
104
+ # Backwards compatibility
105
+ info_hash['tagline'] = value
107
106
  end
108
107
  info_hash[key.downcase.gsub(/\s/, '_')] = value
109
108
  end
@@ -116,7 +115,7 @@ class YayImdbs
116
115
 
117
116
  #scrap poster image urls
118
117
  thumb = doc.xpath("//div[@class = 'photo']/a/img")
119
- if thumb
118
+ if thumb.first
120
119
  thumbnail_url = thumb.first['src']
121
120
  if not thumbnail_url =~ /addposter.jpg$/
122
121
  info_hash['small_image'] = thumbnail_url
@@ -130,7 +129,7 @@ class YayImdbs
130
129
  end
131
130
 
132
131
  #scrap episodes if tv series
133
- if info_hash.has_key?('seasons')
132
+ if info_hash.has_key?('season')
134
133
  episodes = []
135
134
  doc = self.get_episodes_page(imdb_id)
136
135
  episode_divs = doc.css(".filter-all")
@@ -167,8 +166,8 @@ class YayImdbs
167
166
  return nil, nil unless doc.xpath("//meta[@name='title']").first
168
167
 
169
168
  title_text = doc.xpath("//meta[@name='title']").first['content']
170
- # Matches 'Movie Name (2010)' or 'Movie Name (2010/I)'
171
- if title_text =~ /(.*) \((\d{4})\/?\w*\)/
169
+ # Matches 'Movie Name (2010)' or 'Movie Name (2010/I)' or 'Lost (TV Series 2004–2010)'
170
+ if title_text =~ /(.*) \((?:TV\sSeries\s)?(\d{4})((\/\w*)|(.\d{4}))?\)/
172
171
  movie_title = $1
173
172
  movie_year = $2.to_i
174
173
 
@@ -202,4 +201,4 @@ class YayImdbs
202
201
  else return :movie
203
202
  end
204
203
  end
205
- end
204
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 4
9
- version: 0.1.4
8
+ - 5
9
+ version: 0.1.5
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sam Cavenagh
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-07-08 00:00:00 +10:00
17
+ date: 2010-10-12 00:00:00 +11:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -44,8 +44,7 @@ dependencies:
44
44
  - 3
45
45
  - 0
46
46
  - 0
47
- - beta4
48
- version: 3.0.0.beta4
47
+ version: 3.0.0
49
48
  type: :runtime
50
49
  version_requirements: *id002
51
50
  - !ruby/object:Gem::Dependency
@@ -87,10 +86,10 @@ dependencies:
87
86
  - - ">="
88
87
  - !ruby/object:Gem::Version
89
88
  segments:
90
- - 2
91
- - 5
92
- - 8
93
- version: 2.5.8
89
+ - 1
90
+ - 3
91
+ - 1
92
+ version: 1.3.1
94
93
  type: :development
95
94
  version_requirements: *id005
96
95
  description: A simple imdb scraper built on Nokogiri for ruby 1.9+