feed_ninja 0.0.4 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 33efbad5fc0d64ded0e3319aca0f866180264cf6
4
- data.tar.gz: aa93bcb5af9786308807831e3494d2148aa783e4
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NWQyZDQ2ZGJmYThiNTMzOWVhMzY1MzhmM2IyOGU3MzllYzZhNGIwMw==
5
+ data.tar.gz: !binary |-
6
+ MjZhZDhhN2M3Yjg2N2VlYjllZGFjZTFiMjZlZTAzMmE2YTI2ZTZmNw==
5
7
  SHA512:
6
- metadata.gz: e4f48c89a57c342f0c7c4689ba519c73cf4f23a1bfc4d389157506a10453278328688e7cbf2b9e4964b9d1f5a6fc872e2e52bbbfc2887b0ae4e7dce2324acc01
7
- data.tar.gz: 8717dfcd4f7e361a0df1641420480003849ccbafe523971d078ab250d7156aaaddb5baa5c437e3ce4bb98e07528810592e4065b057ff0b112d147464099f42d9
8
+ metadata.gz: !binary |-
9
+ MmNkNDUzODJhZmIzMDI4MjAxYzY3MGIyY2I4MTIxNjYzOTA4ZGNkZjhlNzkx
10
+ M2U4MzRjMGE3ODU3Y2VkZjY2MDJiNTUyMTc1YjQ4OTYyYTQzYjQ4NDNmNGE3
11
+ OWEwY2FkOWU5OTEyMmQwYTcxNmIyMTE0ZmE4NDRhNGFhZGU2YmY=
12
+ data.tar.gz: !binary |-
13
+ N2U3NGMzMGFhOWU1YTY4ZDZmZGNlYzVkMmZiODZlNTlmZmYyYTVlNzA1NGQy
14
+ MTVhOTIyM2JmYTgxZjc2NjU3OTZmNzFhZjcxODEyNjIwZTliYzZkNjQ1OTAy
15
+ MDg5NzdlZDQ5YTFkODVlMmI2ZmEyNWRhMGY2YzU2MjliNDFlMjc=
@@ -31,6 +31,10 @@ end
31
31
  class Entry
32
32
  attr_accessor :title, :link, :images, :updated, :summary, :id
33
33
 
34
+ def initialize
35
+ @summary = []
36
+ end
37
+
34
38
  def to_s
35
39
  %{ <entry>
36
40
  <title>#{@title}</title>
@@ -49,7 +53,6 @@ class Entry
49
53
  <img src="#{src}"/>
50
54
  </a>
51
55
  }
52
- #end + summary || ""
53
- end
56
+ end + @summary.join("\n")
54
57
  end
55
58
  end
@@ -1,3 +1,6 @@
1
+ require 'uri'
2
+ require 'open-uri'
3
+
1
4
  class Extractor
2
5
  attr_accessor :doc
3
6
 
@@ -8,9 +11,11 @@ class Extractor
8
11
  end
9
12
  end
10
13
 
11
- def extract_images(base_url, *xpaths)
12
- Array(xpaths).collect_concat do |xpath|
13
- extract_image(base_url, xpath)
14
+ def extract_images(base_url, xpaths)
15
+ LOGGER.debug{ "collecting images for #{xpaths}" }
16
+ [*xpaths].collect_concat do |xpath|
17
+ LOGGER.debug{ "collecting image:xpath #{xpath}" }
18
+ extract_image(URI(base_url), xpath)
14
19
  end
15
20
  end
16
21
 
@@ -19,14 +24,18 @@ class Extractor
19
24
  if(picture_src.to_s.start_with? 'http') then
20
25
  picture_src.to_s
21
26
  else
27
+ LOGGER.debug { "BASE URL IS #{base_url.class}" }
22
28
  "#{base_url.scheme}://#{base_url.host}/#{base_url.path}#{picture_src}"
23
29
  end
24
30
  end
25
31
  end
26
32
 
27
- def extract_xml *xpaths
28
- Array(xpaths).collect_concat do |xpath|
33
+ def extract_xml(xpaths)
34
+ LOGGER.debug{ "collecting text" }
35
+ [*xpaths].collect_concat do |xpath|
36
+ LOGGER.debug{ "collecting text:xpath #{xpath}" }
29
37
  @doc.xpath(xpath).collect do |result|
38
+ LOGGER.debug{ "collecting text:result #{result}" }
30
39
  result.to_s
31
40
  end
32
41
  end
@@ -5,6 +5,8 @@ require 'time'
5
5
  require 'thread'
6
6
  require 'thwait'
7
7
 
8
+ Thread.abort_on_exception = true
9
+
8
10
  class FeedNinja
9
11
  attr_accessor :uri, :picture_xpath, :text_xpath, :title_regex, :limit
10
12
 
@@ -32,6 +34,9 @@ class FeedNinja
32
34
  # get the feed and iterate over the entries
33
35
  def fetch url
34
36
  open(url) do |feed|
37
+ if feed.content_encoding == ['gzip'] then
38
+ feed = Zlib::GzipReader.new(StringIO.new(feed.read)).read
39
+ end
35
40
  doc = RSS::Parser.parse(feed)
36
41
  initialize_writer(doc)
37
42
  process_items(doc)
@@ -52,6 +57,7 @@ class FeedNinja
52
57
 
53
58
  def process_item(original, feed_type, index)
54
59
  @writer.new_entry(index) do |entry|
60
+ LOGGER.debug{ "making new entry #{index}" }
55
61
  extractor = Extractor.new
56
62
  case feed_type
57
63
  when "atom"
@@ -68,9 +74,12 @@ class FeedNinja
68
74
  extractor.fetch original.link
69
75
  end
70
76
 
71
- entry.images = extractor.extract_images @picture_xpath
77
+ LOGGER.debug{ "extracting for entry #{index} #{entry}" }
78
+ entry.images = extractor.extract_images(entry.link, @picture_xpath)
79
+ LOGGER.debug{ "RATATAT" }
72
80
  entry.summary = extractor.extract_xml @text_xpath
73
81
 
82
+ LOGGER.debug{ "adding entry #{index} #{entry}" }
74
83
  entry #it's kind of fishy to explicitly have to return the entry here...
75
84
  end
76
85
  end
data/lib/feed_ninja.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'feed_ninja/feed_ninja'
2
2
  require 'feed_ninja/atomish'
3
3
  require 'feed_ninja/extractor'
4
+ require 'logger'
4
5
 
5
6
  def get (url, &block)
6
7
  ninja = FeedNinja.new
@@ -9,3 +10,6 @@ def get (url, &block)
9
10
  puts "Content-type: application/atom+xml\n\n"
10
11
  puts ninja.to_s
11
12
  end
13
+
14
+ LOGGER = Logger.new(STDERR)
15
+ LOGGER.level = Logger::INFO
@@ -4,6 +4,8 @@ require 'feed_ninja'
4
4
  describe FeedNinja do
5
5
  before :each do
6
6
  @ninja = FeedNinja.new
7
+ @extractor = double()
8
+ Extractor.should_receive(:new).and_return(@extractor)
7
9
  #Extractor.stub(:extract_images => [])
8
10
  #Extractor.stub(:extract_xml => "")
9
11
  end
@@ -18,9 +20,9 @@ describe FeedNinja do
18
20
  @ninja.fetch 'spec/feeds/rss.xml'
19
21
  end
20
22
 
21
- it 'should not read more than the given limit' do
23
+ it 'should not read more than the given limit', :focus do
22
24
  @ninja.limit = 1
23
- Extractor.should_receive(:new).once
24
- @ninja.fetch 'spec/feeds/rss.xml'
25
+ @extractor.should_receive(:fetch)
26
+ @ninja.fetch './spec/feeds/rss.xml'
25
27
  end
26
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_ninja
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Latzer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-16 00:00:00.000000000 Z
11
+ date: 2014-02-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -38,9 +38,12 @@ dependencies:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.6.1
41
- description: |-
42
- This gem can be used to take an RSS or Atom feed, follow the links they provide and extract images and/or text with xpath. The data is then reformatted into a new Atom feed.
43
- It is inteded to be used with feeds that only provide a sneak peek of the content, to rip all the interesting bits out for displaying in your feed reader immediately.
41
+ description: ! 'This gem can be used to take an RSS or Atom feed, follow the links
42
+ they provide and extract images and/or text with xpath. The data is then reformatted
43
+ into a new Atom feed.
44
+
45
+ It is inteded to be used with feeds that only provide a sneak peek of the content,
46
+ to rip all the interesting bits out for displaying in your feed reader immediately.'
44
47
  email: latzer.daniel@gmail.com
45
48
  executables: []
46
49
  extensions: []
@@ -68,12 +71,12 @@ require_paths:
68
71
  - lib
69
72
  required_ruby_version: !ruby/object:Gem::Requirement
70
73
  requirements:
71
- - - ">="
74
+ - - ! '>='
72
75
  - !ruby/object:Gem::Version
73
76
  version: '0'
74
77
  required_rubygems_version: !ruby/object:Gem::Requirement
75
78
  requirements:
76
- - - ">="
79
+ - - ! '>='
77
80
  - !ruby/object:Gem::Version
78
81
  version: '0'
79
82
  requirements: []