royw-imdb 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -5,7 +5,6 @@ ImdbMovie Jet Pilot
5
5
  - should convert to yaml
6
6
 
7
7
  ImdbMovie Indiana Jones and the Last Crusade
8
- - should query IMDB url
9
8
  - should get the title
10
9
  - should get director(s)
11
10
  - should get the poster url
@@ -30,7 +29,6 @@ ImdbMovie Indiana Jones and the Last Crusade
30
29
  - should be able to convert to and then from xml
31
30
 
32
31
  ImdbMovie Han robado una estrella
33
- - should query IMDB url
34
32
  - should get the title
35
33
  - should get director(s)
36
34
  - should not get the poster
@@ -46,9 +44,6 @@ ImdbMovie Han robado una estrella
46
44
  - should get the company
47
45
  - should not get any photos
48
46
 
49
- ImdbSearch search that returns multiple movies
50
- - should query IMDB url
51
-
52
47
  ImdbSearch search that returns multiple movies movies
53
48
  - should be a collection of ImdbMovie instances
54
49
  - should include 'Indiana Jones and the Last Crusade'
@@ -80,7 +75,6 @@ ImdbSearch searches that match on AKA title "Meltdown" movies
80
75
  - should have only one movie from 1995
81
76
 
82
77
  ImdbMovie Indiana Jones and the Last Crusade
83
- - should query IMDB url
84
78
  - should get the image
85
79
 
86
80
  String unescape_html
@@ -90,6 +84,6 @@ String unescape_html
90
84
  String strip_tags
91
85
  - should strip HTML tags
92
86
 
93
- Finished in 4.131553 seconds
87
+ Finished in 3.696984 seconds
94
88
 
95
- 65 examples, 0 failures
89
+ 61 examples, 0 failures
@@ -1,19 +1,46 @@
1
1
  # @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX216_SY316_.jpg'
2
2
 
3
3
  class ImdbImage
4
-
4
+
5
5
  attr_accessor :url
6
-
6
+
7
7
  def initialize(url)
8
8
  @url = File.join("http://www.imdb.com/", url)
9
9
  end
10
-
10
+
11
11
  def image
12
12
  document.at("table#principal tr td img")['src'] rescue nil
13
13
  end
14
-
14
+
15
15
  def document
16
- @document ||= Hpricot(open(self.url).read)
16
+ @document ||= Hpricot(fetch(self.url))
17
17
  end
18
-
18
+
19
+ private
20
+
21
+ MAX_ATTEMPTS = 3
22
+ SECONDS_BETWEEN_RETRIES = 1.0
23
+
24
+ def fetch(page)
25
+ doc = nil
26
+ attempts = 0
27
+ begin
28
+ doc = read_page(page)
29
+ rescue Exception => e
30
+ attempts += 1
31
+ if attempts > MAX_ATTEMPTS
32
+ raise
33
+ else
34
+ sleep SECONDS_BETWEEN_RETRIES
35
+ retry
36
+ end
37
+ end
38
+ doc
39
+ end
40
+
41
+ def read_page(page)
42
+ puts "ImdbImage::read_page"
43
+ open(page).read
44
+ end
45
+
19
46
  end
@@ -210,27 +210,23 @@ class ImdbMovie
210
210
  # #document.at("div#tn15title h1").innerHTML.split('<span>').first.unescape_html rescue nil
211
211
  # end
212
212
 
213
+ # Fetch the document with retry to handle the occasional glitches
214
+ def document
215
+ if @document.nil?
216
+ html = fetch(self.url)
217
+ @document = Hpricot(html)
218
+ end
219
+ @document
220
+ end
221
+
213
222
  MAX_ATTEMPTS = 3
214
223
  SECONDS_BETWEEN_RETRIES = 1.0
215
224
 
216
- # Fetch the document with retry to handle the occasional glitches
217
- def document
225
+ def fetch(page)
226
+ doc = nil
218
227
  attempts = 0
219
228
  begin
220
- if @document.nil?
221
- if ImdbMovie::use_html_cache
222
- begin
223
- filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
224
- html = open(filespec).read
225
- rescue Exception
226
- html = open(self.url).read
227
- cache_html_files(html)
228
- end
229
- else
230
- html = open(self.url).read
231
- end
232
- @document = Hpricot(html)
233
- end
229
+ doc = read_page(page)
234
230
  rescue Exception => e
235
231
  attempts += 1
236
232
  if attempts > MAX_ATTEMPTS
@@ -240,21 +236,12 @@ class ImdbMovie
240
236
  retry
241
237
  end
242
238
  end
243
- @document
239
+ doc
244
240
  end
245
241
 
246
- # this is used to save imdb pages so they may be used by rspec
247
- def cache_html_files(html)
248
- begin
249
- filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
250
- unless File.exist?(filespec)
251
- puts "caching #{filespec}"
252
- File.mkdirs(File.dirname(filespec))
253
- File.open(filespec, 'w') { |f| f.puts html }
254
- end
255
- rescue Exception => eMsg
256
- puts eMsg.to_s
257
- end
242
+ def read_page(page)
243
+ puts "ImdbMovie::read_page"
244
+ open(page).read
258
245
  end
259
246
 
260
247
  end
@@ -86,7 +86,32 @@ class ImdbSearch
86
86
 
87
87
  def document
88
88
  filespec = "http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt"
89
- @document ||= Hpricot(open(filespec).read)
89
+ @document ||= Hpricot(fetch(filespec))
90
+ end
91
+
92
+ MAX_ATTEMPTS = 3
93
+ SECONDS_BETWEEN_RETRIES = 1.0
94
+
95
+ def fetch(page)
96
+ doc = nil
97
+ attempts = 0
98
+ begin
99
+ doc = read_page(page)
100
+ rescue Exception => e
101
+ attempts += 1
102
+ if attempts > MAX_ATTEMPTS
103
+ raise
104
+ else
105
+ sleep SECONDS_BETWEEN_RETRIES
106
+ retry
107
+ end
108
+ end
109
+ doc
110
+ end
111
+
112
+ def read_page(page)
113
+ puts "ImdbSearch::read_page"
114
+ open(page).read
90
115
  end
91
116
 
92
117
  def parse_movies_from_document
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: royw-imdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergio Gil