royw-imdb 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -8
- data/lib/imdb/imdb_image.rb +33 -6
- data/lib/imdb/imdb_movie.rb +16 -29
- data/lib/imdb/imdb_search.rb +26 -1
- metadata +1 -1
data/README
CHANGED
@@ -5,7 +5,6 @@ ImdbMovie Jet Pilot
|
|
5
5
|
- should convert to yaml
|
6
6
|
|
7
7
|
ImdbMovie Indiana Jones and the Last Crusade
|
8
|
-
- should query IMDB url
|
9
8
|
- should get the title
|
10
9
|
- should get director(s)
|
11
10
|
- should get the poster url
|
@@ -30,7 +29,6 @@ ImdbMovie Indiana Jones and the Last Crusade
|
|
30
29
|
- should be able to convert to and then from xml
|
31
30
|
|
32
31
|
ImdbMovie Han robado una estrella
|
33
|
-
- should query IMDB url
|
34
32
|
- should get the title
|
35
33
|
- should get director(s)
|
36
34
|
- should not get the poster
|
@@ -46,9 +44,6 @@ ImdbMovie Han robado una estrella
|
|
46
44
|
- should get the company
|
47
45
|
- should not get any photos
|
48
46
|
|
49
|
-
ImdbSearch search that returns multiple movies
|
50
|
-
- should query IMDB url
|
51
|
-
|
52
47
|
ImdbSearch search that returns multiple movies movies
|
53
48
|
- should be a collection of ImdbMovie instances
|
54
49
|
- should include 'Indiana Jones and the Last Crusade'
|
@@ -80,7 +75,6 @@ ImdbSearch searches that match on AKA title "Meltdown" movies
|
|
80
75
|
- should have only one movie from 1995
|
81
76
|
|
82
77
|
ImdbMovie Indiana Jones and the Last Crusade
|
83
|
-
- should query IMDB url
|
84
78
|
- should get the image
|
85
79
|
|
86
80
|
String unescape_html
|
@@ -90,6 +84,6 @@ String unescape_html
|
|
90
84
|
String strip_tags
|
91
85
|
- should strip HTML tags
|
92
86
|
|
93
|
-
Finished in
|
87
|
+
Finished in 3.696984 seconds
|
94
88
|
|
95
|
-
|
89
|
+
61 examples, 0 failures
|
data/lib/imdb/imdb_image.rb
CHANGED
@@ -1,19 +1,46 @@
|
|
1
1
|
# @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX216_SY316_.jpg'
|
2
2
|
|
3
3
|
class ImdbImage
|
4
|
-
|
4
|
+
|
5
5
|
attr_accessor :url
|
6
|
-
|
6
|
+
|
7
7
|
def initialize(url)
|
8
8
|
@url = File.join("http://www.imdb.com/", url)
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
11
|
def image
|
12
12
|
document.at("table#principal tr td img")['src'] rescue nil
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
def document
|
16
|
-
@document ||= Hpricot(
|
16
|
+
@document ||= Hpricot(fetch(self.url))
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
MAX_ATTEMPTS = 3
|
22
|
+
SECONDS_BETWEEN_RETRIES = 1.0
|
23
|
+
|
24
|
+
def fetch(page)
|
25
|
+
doc = nil
|
26
|
+
attempts = 0
|
27
|
+
begin
|
28
|
+
doc = read_page(page)
|
29
|
+
rescue Exception => e
|
30
|
+
attempts += 1
|
31
|
+
if attempts > MAX_ATTEMPTS
|
32
|
+
raise
|
33
|
+
else
|
34
|
+
sleep SECONDS_BETWEEN_RETRIES
|
35
|
+
retry
|
36
|
+
end
|
37
|
+
end
|
38
|
+
doc
|
39
|
+
end
|
40
|
+
|
41
|
+
def read_page(page)
|
42
|
+
puts "ImdbImage::read_page"
|
43
|
+
open(page).read
|
44
|
+
end
|
45
|
+
|
19
46
|
end
|
data/lib/imdb/imdb_movie.rb
CHANGED
@@ -210,27 +210,23 @@ class ImdbMovie
|
|
210
210
|
# #document.at("div#tn15title h1").innerHTML.split('<span>').first.unescape_html rescue nil
|
211
211
|
# end
|
212
212
|
|
213
|
+
# Fetch the document with retry to handle the occasional glitches
|
214
|
+
def document
|
215
|
+
if @document.nil?
|
216
|
+
html = fetch(self.url)
|
217
|
+
@document = Hpricot(html)
|
218
|
+
end
|
219
|
+
@document
|
220
|
+
end
|
221
|
+
|
213
222
|
MAX_ATTEMPTS = 3
|
214
223
|
SECONDS_BETWEEN_RETRIES = 1.0
|
215
224
|
|
216
|
-
|
217
|
-
|
225
|
+
def fetch(page)
|
226
|
+
doc = nil
|
218
227
|
attempts = 0
|
219
228
|
begin
|
220
|
-
|
221
|
-
if ImdbMovie::use_html_cache
|
222
|
-
begin
|
223
|
-
filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
|
224
|
-
html = open(filespec).read
|
225
|
-
rescue Exception
|
226
|
-
html = open(self.url).read
|
227
|
-
cache_html_files(html)
|
228
|
-
end
|
229
|
-
else
|
230
|
-
html = open(self.url).read
|
231
|
-
end
|
232
|
-
@document = Hpricot(html)
|
233
|
-
end
|
229
|
+
doc = read_page(page)
|
234
230
|
rescue Exception => e
|
235
231
|
attempts += 1
|
236
232
|
if attempts > MAX_ATTEMPTS
|
@@ -240,21 +236,12 @@ class ImdbMovie
|
|
240
236
|
retry
|
241
237
|
end
|
242
238
|
end
|
243
|
-
|
239
|
+
doc
|
244
240
|
end
|
245
241
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
|
250
|
-
unless File.exist?(filespec)
|
251
|
-
puts "caching #{filespec}"
|
252
|
-
File.mkdirs(File.dirname(filespec))
|
253
|
-
File.open(filespec, 'w') { |f| f.puts html }
|
254
|
-
end
|
255
|
-
rescue Exception => eMsg
|
256
|
-
puts eMsg.to_s
|
257
|
-
end
|
242
|
+
def read_page(page)
|
243
|
+
puts "ImdbMovie::read_page"
|
244
|
+
open(page).read
|
258
245
|
end
|
259
246
|
|
260
247
|
end
|
data/lib/imdb/imdb_search.rb
CHANGED
@@ -86,7 +86,32 @@ class ImdbSearch
|
|
86
86
|
|
87
87
|
def document
|
88
88
|
filespec = "http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt"
|
89
|
-
@document ||= Hpricot(
|
89
|
+
@document ||= Hpricot(fetch(filespec))
|
90
|
+
end
|
91
|
+
|
92
|
+
MAX_ATTEMPTS = 3
|
93
|
+
SECONDS_BETWEEN_RETRIES = 1.0
|
94
|
+
|
95
|
+
def fetch(page)
|
96
|
+
doc = nil
|
97
|
+
attempts = 0
|
98
|
+
begin
|
99
|
+
doc = read_page(page)
|
100
|
+
rescue Exception => e
|
101
|
+
attempts += 1
|
102
|
+
if attempts > MAX_ATTEMPTS
|
103
|
+
raise
|
104
|
+
else
|
105
|
+
sleep SECONDS_BETWEEN_RETRIES
|
106
|
+
retry
|
107
|
+
end
|
108
|
+
end
|
109
|
+
doc
|
110
|
+
end
|
111
|
+
|
112
|
+
def read_page(page)
|
113
|
+
puts "ImdbSearch::read_page"
|
114
|
+
open(page).read
|
90
115
|
end
|
91
116
|
|
92
117
|
def parse_movies_from_document
|