royw-imdb 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -8
- data/lib/imdb/imdb_image.rb +33 -6
- data/lib/imdb/imdb_movie.rb +16 -29
- data/lib/imdb/imdb_search.rb +26 -1
- metadata +1 -1
data/README
CHANGED
@@ -5,7 +5,6 @@ ImdbMovie Jet Pilot
|
|
5
5
|
- should convert to yaml
|
6
6
|
|
7
7
|
ImdbMovie Indiana Jones and the Last Crusade
|
8
|
-
- should query IMDB url
|
9
8
|
- should get the title
|
10
9
|
- should get director(s)
|
11
10
|
- should get the poster url
|
@@ -30,7 +29,6 @@ ImdbMovie Indiana Jones and the Last Crusade
|
|
30
29
|
- should be able to convert to and then from xml
|
31
30
|
|
32
31
|
ImdbMovie Han robado una estrella
|
33
|
-
- should query IMDB url
|
34
32
|
- should get the title
|
35
33
|
- should get director(s)
|
36
34
|
- should not get the poster
|
@@ -46,9 +44,6 @@ ImdbMovie Han robado una estrella
|
|
46
44
|
- should get the company
|
47
45
|
- should not get any photos
|
48
46
|
|
49
|
-
ImdbSearch search that returns multiple movies
|
50
|
-
- should query IMDB url
|
51
|
-
|
52
47
|
ImdbSearch search that returns multiple movies movies
|
53
48
|
- should be a collection of ImdbMovie instances
|
54
49
|
- should include 'Indiana Jones and the Last Crusade'
|
@@ -80,7 +75,6 @@ ImdbSearch searches that match on AKA title "Meltdown" movies
|
|
80
75
|
- should have only one movie from 1995
|
81
76
|
|
82
77
|
ImdbMovie Indiana Jones and the Last Crusade
|
83
|
-
- should query IMDB url
|
84
78
|
- should get the image
|
85
79
|
|
86
80
|
String unescape_html
|
@@ -90,6 +84,6 @@ String unescape_html
|
|
90
84
|
String strip_tags
|
91
85
|
- should strip HTML tags
|
92
86
|
|
93
|
-
Finished in
|
87
|
+
Finished in 3.696984 seconds
|
94
88
|
|
95
|
-
|
89
|
+
61 examples, 0 failures
|
data/lib/imdb/imdb_image.rb
CHANGED
@@ -1,19 +1,46 @@
|
|
1
1
|
# @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX216_SY316_.jpg'
|
2
2
|
|
3
3
|
class ImdbImage
|
4
|
-
|
4
|
+
|
5
5
|
attr_accessor :url
|
6
|
-
|
6
|
+
|
7
7
|
def initialize(url)
|
8
8
|
@url = File.join("http://www.imdb.com/", url)
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
11
|
def image
|
12
12
|
document.at("table#principal tr td img")['src'] rescue nil
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
def document
|
16
|
-
@document ||= Hpricot(
|
16
|
+
@document ||= Hpricot(fetch(self.url))
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
MAX_ATTEMPTS = 3
|
22
|
+
SECONDS_BETWEEN_RETRIES = 1.0
|
23
|
+
|
24
|
+
def fetch(page)
|
25
|
+
doc = nil
|
26
|
+
attempts = 0
|
27
|
+
begin
|
28
|
+
doc = read_page(page)
|
29
|
+
rescue Exception => e
|
30
|
+
attempts += 1
|
31
|
+
if attempts > MAX_ATTEMPTS
|
32
|
+
raise
|
33
|
+
else
|
34
|
+
sleep SECONDS_BETWEEN_RETRIES
|
35
|
+
retry
|
36
|
+
end
|
37
|
+
end
|
38
|
+
doc
|
39
|
+
end
|
40
|
+
|
41
|
+
def read_page(page)
|
42
|
+
puts "ImdbImage::read_page"
|
43
|
+
open(page).read
|
44
|
+
end
|
45
|
+
|
19
46
|
end
|
data/lib/imdb/imdb_movie.rb
CHANGED
@@ -210,27 +210,23 @@ class ImdbMovie
|
|
210
210
|
# #document.at("div#tn15title h1").innerHTML.split('<span>').first.unescape_html rescue nil
|
211
211
|
# end
|
212
212
|
|
213
|
+
# Fetch the document with retry to handle the occasional glitches
|
214
|
+
def document
|
215
|
+
if @document.nil?
|
216
|
+
html = fetch(self.url)
|
217
|
+
@document = Hpricot(html)
|
218
|
+
end
|
219
|
+
@document
|
220
|
+
end
|
221
|
+
|
213
222
|
MAX_ATTEMPTS = 3
|
214
223
|
SECONDS_BETWEEN_RETRIES = 1.0
|
215
224
|
|
216
|
-
|
217
|
-
|
225
|
+
def fetch(page)
|
226
|
+
doc = nil
|
218
227
|
attempts = 0
|
219
228
|
begin
|
220
|
-
|
221
|
-
if ImdbMovie::use_html_cache
|
222
|
-
begin
|
223
|
-
filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
|
224
|
-
html = open(filespec).read
|
225
|
-
rescue Exception
|
226
|
-
html = open(self.url).read
|
227
|
-
cache_html_files(html)
|
228
|
-
end
|
229
|
-
else
|
230
|
-
html = open(self.url).read
|
231
|
-
end
|
232
|
-
@document = Hpricot(html)
|
233
|
-
end
|
229
|
+
doc = read_page(page)
|
234
230
|
rescue Exception => e
|
235
231
|
attempts += 1
|
236
232
|
if attempts > MAX_ATTEMPTS
|
@@ -240,21 +236,12 @@ class ImdbMovie
|
|
240
236
|
retry
|
241
237
|
end
|
242
238
|
end
|
243
|
-
|
239
|
+
doc
|
244
240
|
end
|
245
241
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
|
250
|
-
unless File.exist?(filespec)
|
251
|
-
puts "caching #{filespec}"
|
252
|
-
File.mkdirs(File.dirname(filespec))
|
253
|
-
File.open(filespec, 'w') { |f| f.puts html }
|
254
|
-
end
|
255
|
-
rescue Exception => eMsg
|
256
|
-
puts eMsg.to_s
|
257
|
-
end
|
242
|
+
def read_page(page)
|
243
|
+
puts "ImdbMovie::read_page"
|
244
|
+
open(page).read
|
258
245
|
end
|
259
246
|
|
260
247
|
end
|
data/lib/imdb/imdb_search.rb
CHANGED
@@ -86,7 +86,32 @@ class ImdbSearch
|
|
86
86
|
|
87
87
|
def document
|
88
88
|
filespec = "http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt"
|
89
|
-
@document ||= Hpricot(
|
89
|
+
@document ||= Hpricot(fetch(filespec))
|
90
|
+
end
|
91
|
+
|
92
|
+
MAX_ATTEMPTS = 3
|
93
|
+
SECONDS_BETWEEN_RETRIES = 1.0
|
94
|
+
|
95
|
+
def fetch(page)
|
96
|
+
doc = nil
|
97
|
+
attempts = 0
|
98
|
+
begin
|
99
|
+
doc = read_page(page)
|
100
|
+
rescue Exception => e
|
101
|
+
attempts += 1
|
102
|
+
if attempts > MAX_ATTEMPTS
|
103
|
+
raise
|
104
|
+
else
|
105
|
+
sleep SECONDS_BETWEEN_RETRIES
|
106
|
+
retry
|
107
|
+
end
|
108
|
+
end
|
109
|
+
doc
|
110
|
+
end
|
111
|
+
|
112
|
+
def read_page(page)
|
113
|
+
puts "ImdbSearch::read_page"
|
114
|
+
open(page).read
|
90
115
|
end
|
91
116
|
|
92
117
|
def parse_movies_from_document
|