RubyGems - royw-imdb - Versions diffs - 0.0.14 → 0.0.15 - Mend

royw-imdb 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README CHANGED Viewed

@@ -5,7 +5,6 @@ ImdbMovie Jet Pilot
 - should convert to yaml
 ImdbMovie Indiana Jones and the Last Crusade
-- should query IMDB url
 - should get the title
 - should get director(s)
 - should get the poster url
@@ -30,7 +29,6 @@ ImdbMovie Indiana Jones and the Last Crusade
 - should be able to convert to and then from xml
 ImdbMovie Han robado una estrella
-- should query IMDB url
 - should get the title
 - should get director(s)
 - should not get the poster
@@ -46,9 +44,6 @@ ImdbMovie Han robado una estrella
 - should get the company
 - should not get any photos
-ImdbSearch search that returns multiple movies
-- should query IMDB url
 ImdbSearch search that returns multiple movies movies
 - should be a collection of ImdbMovie instances
 - should include 'Indiana Jones and the Last Crusade'
@@ -80,7 +75,6 @@ ImdbSearch searches that match on AKA title "Meltdown" movies
 - should have only one movie from 1995
 ImdbMovie Indiana Jones and the Last Crusade
-- should query IMDB url
 - should get the image
 String unescape_html
@@ -90,6 +84,6 @@ String unescape_html
 String strip_tags
 - should strip HTML tags
-Finished in 4.131553 seconds
+Finished in 3.696984 seconds
-65 examples, 0 failures
+61 examples, 0 failures

data/lib/imdb/imdb_image.rb CHANGED Viewed

@@ -1,19 +1,46 @@
 # @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX216_SY316_.jpg'
 class ImdbImage
   attr_accessor :url
   def initialize(url)
     @url = File.join("http://www.imdb.com/", url)
   end
   def image
     document.at("table#principal tr td img")['src'] rescue nil
   end
   def document
-    @document ||= Hpricot(open(self.url).read)
+    @document ||= Hpricot(fetch(self.url))
   end
+  private
+  MAX_ATTEMPTS = 3
+  SECONDS_BETWEEN_RETRIES = 1.0
+  def fetch(page)
+    doc = nil
+    attempts = 0
+    begin
+      doc = read_page(page)
+    rescue Exception => e
+      attempts += 1
+      if attempts > MAX_ATTEMPTS
+        raise
+      else
+        sleep SECONDS_BETWEEN_RETRIES
+        retry
+      end
+    end
+    doc
+  end
+  def read_page(page)
+    puts "ImdbImage::read_page"
+    open(page).read
+  end
 end

data/lib/imdb/imdb_movie.rb CHANGED Viewed

@@ -210,27 +210,23 @@ class ImdbMovie
 #     #document.at("div#tn15title h1").innerHTML.split('<span>').first.unescape_html rescue nil
 #   end
+  # Fetch the document with retry to handle the occasional glitches
+  def document
+    if @document.nil?
+      html = fetch(self.url)
+      @document = Hpricot(html)
+    end
+    @document
+  end
   MAX_ATTEMPTS = 3
   SECONDS_BETWEEN_RETRIES = 1.0
-  # Fetch the document with retry to handle the occasional glitches
-  def document
+  def fetch(page)
+    doc = nil
     attempts = 0
     begin
-      if @document.nil?
-        if ImdbMovie::use_html_cache
-          begin
-            filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
-            html = open(filespec).read
-          rescue Exception
-            html = open(self.url).read
-            cache_html_files(html)
-          end
-        else
-          html = open(self.url).read
-        end
-        @document = Hpricot(html)
-      end
+      doc = read_page(page)
     rescue Exception => e
       attempts += 1
       if attempts > MAX_ATTEMPTS
@@ -240,21 +236,12 @@ class ImdbMovie
         retry
       end
     end
-    @document
+    doc
   end
-  # this is used to save imdb pages so they may be used by rspec
-  def cache_html_files(html)
-    begin
-      filespec = self.url.gsub(/^http:\//, 'spec/samples').gsub(/\/$/, '.html')
-      unless File.exist?(filespec)
-        puts "caching #{filespec}"
-        File.mkdirs(File.dirname(filespec))
-        File.open(filespec, 'w') { |f| f.puts html }
-      end
-    rescue Exception => eMsg
-      puts eMsg.to_s
-    end
+  def read_page(page)
+    puts "ImdbMovie::read_page"
+    open(page).read
   end
 end

data/lib/imdb/imdb_search.rb CHANGED Viewed

@@ -86,7 +86,32 @@ class ImdbSearch
   def document
     filespec = "http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt"
-    @document ||= Hpricot(open(filespec).read)
+    @document ||= Hpricot(fetch(filespec))
+  end
+  MAX_ATTEMPTS = 3
+  SECONDS_BETWEEN_RETRIES = 1.0
+  def fetch(page)
+    doc = nil
+    attempts = 0
+    begin
+      doc = read_page(page)
+    rescue Exception => e
+      attempts += 1
+      if attempts > MAX_ATTEMPTS
+        raise
+      else
+        sleep SECONDS_BETWEEN_RETRIES
+        retry
+      end
+    end
+    doc
+  end
+  def read_page(page)
+    puts "ImdbSearch::read_page"
+    open(page).read
   end
   def parse_movies_from_document

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: royw-imdb
 version: !ruby/object:Gem::Version
-  version: 0.0.14
+  version: 0.0.15
 platform: ruby
 authors:
 - Sergio Gil