royw-imdb 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -5,7 +5,6 @@ ImdbMovie Jet Pilot
5
5
  - should convert to yaml
6
6
 
7
7
  ImdbMovie Indiana Jones and the Last Crusade
8
- - should query IMDB url
9
8
  - should get the title
10
9
  - should get director(s)
11
10
  - should get the poster url
@@ -30,7 +29,6 @@ ImdbMovie Indiana Jones and the Last Crusade
30
29
  - should be able to convert to and then from xml
31
30
 
32
31
  ImdbMovie Han robado una estrella
33
- - should query IMDB url
34
32
  - should get the title
35
33
  - should get director(s)
36
34
  - should not get the poster
@@ -46,9 +44,6 @@ ImdbMovie Han robado una estrella
46
44
  - should get the company
47
45
  - should not get any photos
48
46
 
49
- ImdbSearch search that returns multiple movies
50
- - should query IMDB url
51
-
52
47
  ImdbSearch search that returns multiple movies movies
53
48
  - should be a collection of ImdbMovie instances
54
49
  - should include 'Indiana Jones and the Last Crusade'
@@ -80,7 +75,6 @@ ImdbSearch searches that match on AKA title "Meltdown" movies
80
75
  - should have only one movie from 1995
81
76
 
82
77
  ImdbMovie Indiana Jones and the Last Crusade
83
- - should query IMDB url
84
78
  - should get the image
85
79
 
86
80
  String unescape_html
@@ -90,6 +84,6 @@ String unescape_html
90
84
  String strip_tags
91
85
  - should strip HTML tags
92
86
 
93
- Finished in 4.131553 seconds
87
+ Finished in 3.696984 seconds
94
88
 
95
- 65 examples, 0 failures
89
+ 61 examples, 0 failures
@@ -1,19 +1,46 @@
1
1
  # @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX216_SY316_.jpg'
2
2
 
3
3
  class ImdbImage
4
-
4
+
5
5
  attr_accessor :url
6
-
6
+
7
7
  def initialize(url)
8
8
  @url = File.join("http://www.imdb.com/", url)
9
9
  end
10
-
10
+
11
11
  def image
12
12
  document.at("table#principal tr td img")['src'] rescue nil
13
13
  end
14
-
14
+
15
15
  def document
16
- @document ||= Hpricot(open(self.url).read)
16
+ @document ||= Hpricot(fetch(self.url))
17
17
  end
18
-
18
+
19
+ private
20
+
21
+ MAX_ATTEMPTS = 3
22
+ SECONDS_BETWEEN_RETRIES = 1.0
23
+
24
+ def fetch(page)
25
+ doc = nil
26
+ attempts = 0
27
+ begin
28
+ doc = read_page(page)
29
+ rescue Exception => e
30
+ attempts += 1
31
+ if attempts > MAX_ATTEMPTS
32
+ raise
33
+ else
34
+ sleep SECONDS_BETWEEN_RETRIES
35
+ retry
36
+ end
37
+ end
38
+ doc
39
+ end
40
+
41
+ def read_page(page)
42
+ puts "ImdbImage::read_page"
43
+ open(page).read
44
+ end
45
+
19
46
  end
@@ -210,27 +210,23 @@ class ImdbMovie
210
210
  # #document.at("div#tn15title h1").innerHTML.split('<span>').first.unescape_html rescue nil
211
211
  # end
212
212
 
213
+ # Fetch the document with retry to handle the occasional glitches
214
+ def document
215
+ if @document.nil?
216
+ html = fetch(self.url)
217
+ @document = Hpricot(html)
218
+ end
219
+ @document
220
+ end
221
+
213
222
  MAX_ATTEMPTS = 3
214
223
  SECONDS_BETWEEN_RETRIES = 1.0
215
224
 
216
- # Fetch the document with retry to handle the occasional glitches
217
- def document
225
+ def fetch(page)
226
+ doc = nil
218
227
  attempts = 0
219
228
  begin
220
- if @document.nil?
221
- if ImdbMovie::use_html_cache
222
- begin
223
- filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
224
- html = open(filespec).read
225
- rescue Exception
226
- html = open(self.url).read
227
- cache_html_files(html)
228
- end
229
- else
230
- html = open(self.url).read
231
- end
232
- @document = Hpricot(html)
233
- end
229
+ doc = read_page(page)
234
230
  rescue Exception => e
235
231
  attempts += 1
236
232
  if attempts > MAX_ATTEMPTS
@@ -240,21 +236,12 @@ class ImdbMovie
240
236
  retry
241
237
  end
242
238
  end
243
- @document
239
+ doc
244
240
  end
245
241
 
246
- # this is used to save imdb pages so they may be used by rspec
247
- def cache_html_files(html)
248
- begin
249
- filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
250
- unless File.exist?(filespec)
251
- puts "caching #{filespec}"
252
- File.mkdirs(File.dirname(filespec))
253
- File.open(filespec, 'w') { |f| f.puts html }
254
- end
255
- rescue Exception => eMsg
256
- puts eMsg.to_s
257
- end
242
+ def read_page(page)
243
+ puts "ImdbMovie::read_page"
244
+ open(page).read
258
245
  end
259
246
 
260
247
  end
@@ -86,7 +86,32 @@ class ImdbSearch
86
86
 
87
87
  def document
88
88
  filespec = "http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt"
89
- @document ||= Hpricot(open(filespec).read)
89
+ @document ||= Hpricot(fetch(filespec))
90
+ end
91
+
92
+ MAX_ATTEMPTS = 3
93
+ SECONDS_BETWEEN_RETRIES = 1.0
94
+
95
+ def fetch(page)
96
+ doc = nil
97
+ attempts = 0
98
+ begin
99
+ doc = read_page(page)
100
+ rescue Exception => e
101
+ attempts += 1
102
+ if attempts > MAX_ATTEMPTS
103
+ raise
104
+ else
105
+ sleep SECONDS_BETWEEN_RETRIES
106
+ retry
107
+ end
108
+ end
109
+ doc
110
+ end
111
+
112
+ def read_page(page)
113
+ puts "ImdbSearch::read_page"
114
+ open(page).read
90
115
  end
91
116
 
92
117
  def parse_movies_from_document
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: royw-imdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergio Gil