RubyGems - content_scrapper - Versions diffs - 0.0.9 → 0.0.10 - Mend

content_scrapper 0.0.9 → 0.0.10

Files changed (7) hide show

data/VERSION CHANGED

data/content_scrapper.gemspec CHANGED

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{content_scrapper}
-  s.version = "0.0.9"
+  s.version = "0.0.10"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Gyorgy Frivolt"]
-  s.date = %q{2010-03-09}
+  s.date = %q{2010-03-12}
   s.description = %q{If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.}
   s.email = %q{gyorgy.frivolt@gmail.com}
   s.extra_rdoc_files = [

data/lib/content_scrapper.rb CHANGED

@@ -59,7 +59,9 @@ class ContentScrapper
           doc = Nokogiri::HTML(options[:use_page] || Kernel.open(url))
           return content_mapping.scrap_content(doc, content_scrapper = self)
         rescue Exception
-          @scrapping_exception_handler_block.call($!) unless @scrapping_exception_handler_block.nil?
+          unless @scrapping_exception_handler_block.nil?
+            @scrapping_exception_handler_block.call($!, url)
+          end
           return nil
         end
       end

@@ -30,7 +30,7 @@ class ContentMapping
       content_section = doc.xpath(content_xpath)
       content = content_section.to_a.join("\n")
       content = content_scrapper.clean_content(content) unless content_scrapper.nil?
-      content = Iconv.conv(to=iconv_to, from=iconv_from, content) unless iconv_to.nil?
+      content = Iconv.conv(iconv_to, iconv_from, content) unless iconv_to.nil?
       return content if content_section.count > 0
     end
     nil

@@ -169,13 +169,15 @@ class TestContentScrapper < Test::Unit::TestCase
       setup do
         Kernel.expects(:open).raises(Exception, 'something failed')
         @exception_handle_flag = nil
-        @scrapper.rescue_scrapping do |exception|
+        @scrapper.rescue_scrapping do |exception, url|
           @exception_handle_flag = exception.message
+          @exception_url = url
         end
       end
       should "catch the exception and handle it" do
         assert_nil @scrapper.scrap_content('http://www.pretty.url')
         assert_equal 'something failed', @exception_handle_flag
+        assert_equal 'http://www.pretty.url', @exception_url
       end
     end

data/test/test_pages.rb CHANGED

@@ -21,7 +21,6 @@ class TestContentScrapper < Test::Unit::TestCase
       Kernel.expects(:open).returns(StringIO.new(cdata_content))
     end
     should "not escape the cdata entries, should leave cdata unvisible" do
-      #<!--<![CDATA[
       assert_match /<!--</, @scrapper.scrap_content('http://www.cdata.url/hsdae')
     end
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: content_scrapper
 version: !ruby/object:Gem::Version
-  version: 0.0.9
+  version: 0.0.10
 platform: ruby
 authors:
 - Gyorgy Frivolt
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-03-09 00:00:00 +01:00
+date: 2010-03-12 00:00:00 +01:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency