content_scrapper 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.8
1
+ 0.0.9
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{content_scrapper}
8
- s.version = "0.0.8"
8
+ s.version = "0.0.9"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Gyorgy Frivolt"]
@@ -51,12 +51,12 @@ class ContentScrapper
51
51
  end
52
52
  end
53
53
 
54
- def scrap_content(url, full_page = nil)
54
+ def scrap_content(url, options = {})
55
55
  content_mappings.each do | content_mapping |
56
56
  if content_mapping.matches_url?(url)
57
57
  return nil if content_mapping.content_xpaths_list.empty?
58
58
  begin
59
- doc = Nokogiri::HTML(full_page || Kernel.open(url))
59
+ doc = Nokogiri::HTML(options[:use_page] || Kernel.open(url))
60
60
  return content_mapping.scrap_content(doc, content_scrapper = self)
61
61
  rescue Exception
62
62
  @scrapping_exception_handler_block.call($!) unless @scrapping_exception_handler_block.nil?
@@ -6,13 +6,13 @@ module Feedzirra
6
6
  module FeedEntryUtilities
7
7
 
8
8
  # Scrap the content based on the URL and the existing content and return it
9
- def scrap_content(scrapper = ContentScrapper.default, full_page = nil)
10
- scrapper.scrap_content(self.url, full_page = full_page) || self.content.to_s
9
+ def scrap_content(scrapper = ContentScrapper.default, options = {})
10
+ scrapper.scrap_content(self.url, options) || self.content.to_s
11
11
  end
12
12
 
13
13
  # Scrap the content or use the existing one and change the feed entry
14
- def scrap_content!(scrapper = ContentScrapper.default, full_page = nil)
15
- self.content = scrap_content(scrapper, full_page = full_page)
14
+ def scrap_content!(scrapper = ContentScrapper.default, options = {})
15
+ self.content = scrap_content(scrapper, options)
16
16
  end
17
17
  end
18
18
  end
@@ -112,7 +112,7 @@ class TestContentScrapper < Test::Unit::TestCase
112
112
  pretty_content = File.open("#{File.dirname(__FILE__)}/test_pages/pretty.html").read
113
113
  Kernel.expects(:open).never
114
114
  @scrapped_content = @scrapper.scrap_content('http://www.pretty.url/hsdae',
115
- full_page = pretty_content)
115
+ :use_page => pretty_content)
116
116
  end
117
117
  should "scrap from the provided full page" do
118
118
  assert_match(%r{<p><strong>This is a strong text</strong></p>}, @scrapped_content)
@@ -157,8 +157,8 @@ class TestContentScrapper < Test::Unit::TestCase
157
157
  should("return the original feed content") do
158
158
  @feed_entries.each do |feed_entry|
159
159
  assert_match(%r{<p><strong>This is a strong text</strong></p>},
160
- feed_entry.scrap_content(@scrapper, full_page = @pretty_content))
161
- feed_entry.scrap_content!(@scrapper, full_page = @pretty_content)
160
+ feed_entry.scrap_content(@scrapper, :use_page => @pretty_content))
161
+ feed_entry.scrap_content!(@scrapper, :use_page => @pretty_content)
162
162
  assert_match(%r{<p><strong>This is a strong text</strong></p>}, feed_entry.content)
163
163
  end
164
164
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: content_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gyorgy Frivolt