content_scrapper 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{content_scrapper}
8
- s.version = "0.0.9"
8
+ s.version = "0.0.10"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Gyorgy Frivolt"]
12
- s.date = %q{2010-03-09}
12
+ s.date = %q{2010-03-12}
13
13
  s.description = %q{If you want to cut only the content of pages, without any other part (like the menu, header, footer, commercials, etc.), you might find this gem very handy. A DSL is also defined for nifty definitions for your screen scrapping and sanitization.}
14
14
  s.email = %q{gyorgy.frivolt@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -59,7 +59,9 @@ class ContentScrapper
59
59
  doc = Nokogiri::HTML(options[:use_page] || Kernel.open(url))
60
60
  return content_mapping.scrap_content(doc, content_scrapper = self)
61
61
  rescue Exception
62
- @scrapping_exception_handler_block.call($!) unless @scrapping_exception_handler_block.nil?
62
+ unless @scrapping_exception_handler_block.nil?
63
+ @scrapping_exception_handler_block.call($!, url)
64
+ end
63
65
  return nil
64
66
  end
65
67
  end
@@ -30,7 +30,7 @@ class ContentMapping
30
30
  content_section = doc.xpath(content_xpath)
31
31
  content = content_section.to_a.join("\n")
32
32
  content = content_scrapper.clean_content(content) unless content_scrapper.nil?
33
- content = Iconv.conv(to=iconv_to, from=iconv_from, content) unless iconv_to.nil?
33
+ content = Iconv.conv(iconv_to, iconv_from, content) unless iconv_to.nil?
34
34
  return content if content_section.count > 0
35
35
  end
36
36
  nil
@@ -169,13 +169,15 @@ class TestContentScrapper < Test::Unit::TestCase
169
169
  setup do
170
170
  Kernel.expects(:open).raises(Exception, 'something failed')
171
171
  @exception_handle_flag = nil
172
- @scrapper.rescue_scrapping do |exception|
172
+ @scrapper.rescue_scrapping do |exception, url|
173
173
  @exception_handle_flag = exception.message
174
+ @exception_url = url
174
175
  end
175
176
  end
176
177
  should "catch the exception and handle it" do
177
178
  assert_nil @scrapper.scrap_content('http://www.pretty.url')
178
179
  assert_equal 'something failed', @exception_handle_flag
180
+ assert_equal 'http://www.pretty.url', @exception_url
179
181
  end
180
182
  end
181
183
 
@@ -21,7 +21,6 @@ class TestContentScrapper < Test::Unit::TestCase
21
21
  Kernel.expects(:open).returns(StringIO.new(cdata_content))
22
22
  end
23
23
  should "not escape the cdata entries, should leave cdata unvisible" do
24
- #<!--<![CDATA[
25
24
  assert_match /<!--</, @scrapper.scrap_content('http://www.cdata.url/hsdae')
26
25
  end
27
26
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: content_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gyorgy Frivolt
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-09 00:00:00 +01:00
12
+ date: 2010-03-12 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency