content_scrapper 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{content_scrapper}
8
- s.version = "0.0.9"
8
+ s.version = "0.0.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Gyorgy Frivolt"]
12
- s.date = %q{2010-03-09}
12
+ s.date = %q{2010-03-12}
13
13
  s.description = %q{If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.}
14
14
  s.email = %q{gyorgy.frivolt@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -59,7 +59,9 @@ class ContentScrapper
59
59
  doc = Nokogiri::HTML(options[:use_page] || Kernel.open(url))
60
60
  return content_mapping.scrap_content(doc, content_scrapper = self)
61
61
  rescue Exception
62
- @scrapping_exception_handler_block.call($!) unless @scrapping_exception_handler_block.nil?
62
+ unless @scrapping_exception_handler_block.nil?
63
+ @scrapping_exception_handler_block.call($!, url)
64
+ end
63
65
  return nil
64
66
  end
65
67
  end
@@ -30,7 +30,7 @@ class ContentMapping
30
30
  content_section = doc.xpath(content_xpath)
31
31
  content = content_section.to_a.join("\n")
32
32
  content = content_scrapper.clean_content(content) unless content_scrapper.nil?
33
- content = Iconv.conv(to=iconv_to, from=iconv_from, content) unless iconv_to.nil?
33
+ content = Iconv.conv(iconv_to, iconv_from, content) unless iconv_to.nil?
34
34
  return content if content_section.count > 0
35
35
  end
36
36
  nil
@@ -169,13 +169,15 @@ class TestContentScrapper < Test::Unit::TestCase
169
169
  setup do
170
170
  Kernel.expects(:open).raises(Exception, 'something failed')
171
171
  @exception_handle_flag = nil
172
- @scrapper.rescue_scrapping do |exception|
172
+ @scrapper.rescue_scrapping do |exception, url|
173
173
  @exception_handle_flag = exception.message
174
+ @exception_url = url
174
175
  end
175
176
  end
176
177
  should "catch the exception and handle it" do
177
178
  assert_nil @scrapper.scrap_content('http://www.pretty.url')
178
179
  assert_equal 'something failed', @exception_handle_flag
180
+ assert_equal 'http://www.pretty.url', @exception_url
179
181
  end
180
182
  end
181
183
 
@@ -21,7 +21,6 @@ class TestContentScrapper < Test::Unit::TestCase
21
21
  Kernel.expects(:open).returns(StringIO.new(cdata_content))
22
22
  end
23
23
  should "not escape the cdata entries, should leave cdata unvisible" do
24
- #<!--<![CDATA[
25
24
  assert_match /<!--</, @scrapper.scrap_content('http://www.cdata.url/hsdae')
26
25
  end
27
26
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: content_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gyorgy Frivolt
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-09 00:00:00 +01:00
12
+ date: 2010-03-12 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency