feed_ninja 0.0.4 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +13 -5
- data/lib/feed_ninja/atomish.rb +5 -2
- data/lib/feed_ninja/extractor.rb +14 -5
- data/lib/feed_ninja/feed_ninja.rb +10 -1
- data/lib/feed_ninja.rb +4 -0
- data/spec/feed_ninja_spec.rb +5 -3
- metadata +10 -7
checksums.yaml
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NWQyZDQ2ZGJmYThiNTMzOWVhMzY1MzhmM2IyOGU3MzllYzZhNGIwMw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjZhZDhhN2M3Yjg2N2VlYjllZGFjZTFiMjZlZTAzMmE2YTI2ZTZmNw==
|
5
7
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MmNkNDUzODJhZmIzMDI4MjAxYzY3MGIyY2I4MTIxNjYzOTA4ZGNkZjhlNzkx
|
10
|
+
M2U4MzRjMGE3ODU3Y2VkZjY2MDJiNTUyMTc1YjQ4OTYyYTQzYjQ4NDNmNGE3
|
11
|
+
OWEwY2FkOWU5OTEyMmQwYTcxNmIyMTE0ZmE4NDRhNGFhZGU2YmY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
N2U3NGMzMGFhOWU1YTY4ZDZmZGNlYzVkMmZiODZlNTlmZmYyYTVlNzA1NGQy
|
14
|
+
MTVhOTIyM2JmYTgxZjc2NjU3OTZmNzFhZjcxODEyNjIwZTliYzZkNjQ1OTAy
|
15
|
+
MDg5NzdlZDQ5YTFkODVlMmI2ZmEyNWRhMGY2YzU2MjliNDFlMjc=
|
data/lib/feed_ninja/atomish.rb
CHANGED
@@ -31,6 +31,10 @@ end
|
|
31
31
|
class Entry
|
32
32
|
attr_accessor :title, :link, :images, :updated, :summary, :id
|
33
33
|
|
34
|
+
def initialize
|
35
|
+
@summary = []
|
36
|
+
end
|
37
|
+
|
34
38
|
def to_s
|
35
39
|
%{ <entry>
|
36
40
|
<title>#{@title}</title>
|
@@ -49,7 +53,6 @@ class Entry
|
|
49
53
|
<img src="#{src}"/>
|
50
54
|
</a>
|
51
55
|
}
|
52
|
-
|
53
|
-
end
|
56
|
+
end + @summary.join("\n")
|
54
57
|
end
|
55
58
|
end
|
data/lib/feed_ninja/extractor.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
1
4
|
class Extractor
|
2
5
|
attr_accessor :doc
|
3
6
|
|
@@ -8,9 +11,11 @@ class Extractor
|
|
8
11
|
end
|
9
12
|
end
|
10
13
|
|
11
|
-
def extract_images(base_url,
|
12
|
-
|
13
|
-
|
14
|
+
def extract_images(base_url, xpaths)
|
15
|
+
LOGGER.debug{ "collecting images for #{xpaths}" }
|
16
|
+
[*xpaths].collect_concat do |xpath|
|
17
|
+
LOGGER.debug{ "collecting image:xpath #{xpath}" }
|
18
|
+
extract_image(URI(base_url), xpath)
|
14
19
|
end
|
15
20
|
end
|
16
21
|
|
@@ -19,14 +24,18 @@ class Extractor
|
|
19
24
|
if(picture_src.to_s.start_with? 'http') then
|
20
25
|
picture_src.to_s
|
21
26
|
else
|
27
|
+
LOGGER.debug { "BASE URL IS #{base_url.class}" }
|
22
28
|
"#{base_url.scheme}://#{base_url.host}/#{base_url.path}#{picture_src}"
|
23
29
|
end
|
24
30
|
end
|
25
31
|
end
|
26
32
|
|
27
|
-
def extract_xml
|
28
|
-
|
33
|
+
def extract_xml(xpaths)
|
34
|
+
LOGGER.debug{ "collecting text" }
|
35
|
+
[*xpaths].collect_concat do |xpath|
|
36
|
+
LOGGER.debug{ "collecting text:xpath #{xpath}" }
|
29
37
|
@doc.xpath(xpath).collect do |result|
|
38
|
+
LOGGER.debug{ "collecting text:result #{result}" }
|
30
39
|
result.to_s
|
31
40
|
end
|
32
41
|
end
|
@@ -5,6 +5,8 @@ require 'time'
|
|
5
5
|
require 'thread'
|
6
6
|
require 'thwait'
|
7
7
|
|
8
|
+
Thread.abort_on_exception = true
|
9
|
+
|
8
10
|
class FeedNinja
|
9
11
|
attr_accessor :uri, :picture_xpath, :text_xpath, :title_regex, :limit
|
10
12
|
|
@@ -32,6 +34,9 @@ class FeedNinja
|
|
32
34
|
# get the feed and iterate over the entries
|
33
35
|
def fetch url
|
34
36
|
open(url) do |feed|
|
37
|
+
if feed.content_encoding == ['gzip'] then
|
38
|
+
feed = Zlib::GzipReader.new(StringIO.new(feed.read)).read
|
39
|
+
end
|
35
40
|
doc = RSS::Parser.parse(feed)
|
36
41
|
initialize_writer(doc)
|
37
42
|
process_items(doc)
|
@@ -52,6 +57,7 @@ class FeedNinja
|
|
52
57
|
|
53
58
|
def process_item(original, feed_type, index)
|
54
59
|
@writer.new_entry(index) do |entry|
|
60
|
+
LOGGER.debug{ "making new entry #{index}" }
|
55
61
|
extractor = Extractor.new
|
56
62
|
case feed_type
|
57
63
|
when "atom"
|
@@ -68,9 +74,12 @@ class FeedNinja
|
|
68
74
|
extractor.fetch original.link
|
69
75
|
end
|
70
76
|
|
71
|
-
|
77
|
+
LOGGER.debug{ "extracting for entry #{index} #{entry}" }
|
78
|
+
entry.images = extractor.extract_images(entry.link, @picture_xpath)
|
79
|
+
LOGGER.debug{ "RATATAT" }
|
72
80
|
entry.summary = extractor.extract_xml @text_xpath
|
73
81
|
|
82
|
+
LOGGER.debug{ "adding entry #{index} #{entry}" }
|
74
83
|
entry #it's kind of fishy to explicitly have to return the entry here...
|
75
84
|
end
|
76
85
|
end
|
data/lib/feed_ninja.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'feed_ninja/feed_ninja'
|
2
2
|
require 'feed_ninja/atomish'
|
3
3
|
require 'feed_ninja/extractor'
|
4
|
+
require 'logger'
|
4
5
|
|
5
6
|
def get (url, &block)
|
6
7
|
ninja = FeedNinja.new
|
@@ -9,3 +10,6 @@ def get (url, &block)
|
|
9
10
|
puts "Content-type: application/atom+xml\n\n"
|
10
11
|
puts ninja.to_s
|
11
12
|
end
|
13
|
+
|
14
|
+
LOGGER = Logger.new(STDERR)
|
15
|
+
LOGGER.level = Logger::INFO
|
data/spec/feed_ninja_spec.rb
CHANGED
@@ -4,6 +4,8 @@ require 'feed_ninja'
|
|
4
4
|
describe FeedNinja do
|
5
5
|
before :each do
|
6
6
|
@ninja = FeedNinja.new
|
7
|
+
@extractor = double()
|
8
|
+
Extractor.should_receive(:new).and_return(@extractor)
|
7
9
|
#Extractor.stub(:extract_images => [])
|
8
10
|
#Extractor.stub(:extract_xml => "")
|
9
11
|
end
|
@@ -18,9 +20,9 @@ describe FeedNinja do
|
|
18
20
|
@ninja.fetch 'spec/feeds/rss.xml'
|
19
21
|
end
|
20
22
|
|
21
|
-
it 'should not read more than the given limit' do
|
23
|
+
it 'should not read more than the given limit', :focus do
|
22
24
|
@ninja.limit = 1
|
23
|
-
|
24
|
-
@ninja.fetch 'spec/feeds/rss.xml'
|
25
|
+
@extractor.should_receive(:fetch)
|
26
|
+
@ninja.fetch './spec/feeds/rss.xml'
|
25
27
|
end
|
26
28
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feed_ninja
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Latzer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -38,9 +38,12 @@ dependencies:
|
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.6.1
|
41
|
-
description:
|
42
|
-
|
43
|
-
|
41
|
+
description: ! 'This gem can be used to take an RSS or Atom feed, follow the links
|
42
|
+
they provide and extract images and/or text with xpath. The data is then reformatted
|
43
|
+
into a new Atom feed.
|
44
|
+
|
45
|
+
It is inteded to be used with feeds that only provide a sneak peek of the content,
|
46
|
+
to rip all the interesting bits out for displaying in your feed reader immediately.'
|
44
47
|
email: latzer.daniel@gmail.com
|
45
48
|
executables: []
|
46
49
|
extensions: []
|
@@ -68,12 +71,12 @@ require_paths:
|
|
68
71
|
- lib
|
69
72
|
required_ruby_version: !ruby/object:Gem::Requirement
|
70
73
|
requirements:
|
71
|
-
- -
|
74
|
+
- - ! '>='
|
72
75
|
- !ruby/object:Gem::Version
|
73
76
|
version: '0'
|
74
77
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
78
|
requirements:
|
76
|
-
- -
|
79
|
+
- - ! '>='
|
77
80
|
- !ruby/object:Gem::Version
|
78
81
|
version: '0'
|
79
82
|
requirements: []
|