feedzirra 0.0.18.1 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/core_ext/string.rb +1 -1
- data/lib/feedzirra.rb +2 -2
- data/lib/feedzirra/parser/atom.rb +0 -12
- data/lib/feedzirra/parser/atom_entry.rb +2 -14
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +1 -1
- data/lib/feedzirra/parser/rss.rb +0 -12
- data/lib/feedzirra/parser/rss_entry.rb +0 -13
- data/spec/feedzirra/feed_entry_utilities_spec.rb +7 -7
- data/spec/feedzirra/parser/atom_spec.rb +0 -20
- data/spec/feedzirra/parser/rss_entry_spec.rb +34 -84
- data/spec/feedzirra/parser/rss_spec.rb +1 -48
- data/spec/spec_helper.rb +1 -9
- metadata +6 -6
data/lib/core_ext/string.rb
CHANGED
data/lib/feedzirra.rb
CHANGED
@@ -5,7 +5,7 @@ gem 'activesupport'
|
|
5
5
|
require 'zlib'
|
6
6
|
require 'curb'
|
7
7
|
require 'sax-machine'
|
8
|
-
require '
|
8
|
+
require 'loofah'
|
9
9
|
require 'uri'
|
10
10
|
require 'active_support/basic_object'
|
11
11
|
require 'active_support/core_ext/object'
|
@@ -30,5 +30,5 @@ require 'feedzirra/parser/atom'
|
|
30
30
|
require 'feedzirra/parser/atom_feed_burner'
|
31
31
|
|
32
32
|
module Feedzirra
|
33
|
-
VERSION = "0.0.
|
33
|
+
VERSION = "0.0.19"
|
34
34
|
end
|
@@ -5,28 +5,16 @@ module Feedzirra
|
|
5
5
|
# Parser for dealing with Atom feeds.
|
6
6
|
#
|
7
7
|
# == Attributes
|
8
|
-
# * prev_page
|
9
|
-
# * next_page
|
10
|
-
# * lat_page
|
11
8
|
# * title
|
12
|
-
# * subtitle
|
13
|
-
# * updated
|
14
9
|
# * feed_url
|
15
10
|
# * url
|
16
|
-
# * related
|
17
11
|
# * entries
|
18
12
|
class Atom
|
19
13
|
include SAXMachine
|
20
14
|
include FeedUtilities
|
21
|
-
element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
|
22
|
-
element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
|
23
|
-
element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
|
24
15
|
element :title
|
25
|
-
element :subtitle
|
26
|
-
element :updated
|
27
16
|
element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
|
28
17
|
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
29
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
30
18
|
elements :link, :as => :links, :value => :href
|
31
19
|
elements :entry, :as => :entries, :class => AtomEntry
|
32
20
|
|
@@ -7,22 +7,16 @@ module Feedzirra
|
|
7
7
|
# == Attributes
|
8
8
|
# * title
|
9
9
|
# * url
|
10
|
-
# * related
|
11
10
|
# * author
|
12
11
|
# * content
|
13
12
|
# * summary
|
14
13
|
# * published
|
15
14
|
# * categories
|
16
|
-
# * media_content
|
17
|
-
# * media_description
|
18
|
-
# * media_thumbnail
|
19
|
-
# * enclosure
|
20
15
|
class AtomEntry
|
21
16
|
include SAXMachine
|
22
17
|
include FeedEntryUtilities
|
23
18
|
element :title
|
24
|
-
element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
|
25
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
19
|
+
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
26
20
|
element :name, :as => :author
|
27
21
|
element :content
|
28
22
|
element :summary
|
@@ -33,14 +27,8 @@ module Feedzirra
|
|
33
27
|
element :updated
|
34
28
|
element :modified, :as => :updated
|
35
29
|
elements :category, :as => :categories, :value => :term
|
36
|
-
|
37
|
-
element :"media:content", :as => :media_content, :value => :url
|
38
|
-
element :"media:description", :as => :media_description
|
39
|
-
element :"media:thumbnail", :as => :media_thumbnail, :value => :url
|
40
|
-
element :enclosure, :value => :url
|
41
|
-
|
42
30
|
elements :link, :as => :links, :value => :href
|
43
|
-
|
31
|
+
|
44
32
|
def url
|
45
33
|
@url || links.first
|
46
34
|
end
|
@@ -17,7 +17,7 @@ module Feedzirra
|
|
17
17
|
include FeedEntryUtilities
|
18
18
|
element :title
|
19
19
|
element :name, :as => :author
|
20
|
-
element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
|
20
|
+
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
21
21
|
element :"feedburner:origLink", :as => :url
|
22
22
|
element :summary
|
23
23
|
element :content
|
data/lib/feedzirra/parser/rss.rb
CHANGED
@@ -5,27 +5,15 @@ module Feedzirra
|
|
5
5
|
# Parser for dealing with RSS feeds.
|
6
6
|
#
|
7
7
|
# == Attributes
|
8
|
-
# * prev_page
|
9
|
-
# * next_page
|
10
|
-
# * lat_page
|
11
8
|
# * title
|
12
9
|
# * feed_url
|
13
10
|
# * url
|
14
|
-
# * related
|
15
|
-
# * description
|
16
|
-
# * language
|
17
11
|
# * entries
|
18
12
|
class RSS
|
19
13
|
include SAXMachine
|
20
14
|
include FeedUtilities
|
21
|
-
element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
|
22
|
-
element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
|
23
|
-
element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
|
24
15
|
element :title
|
25
16
|
element :link, :as => :url
|
26
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
27
|
-
element :description
|
28
|
-
element :language
|
29
17
|
elements :item, :as => :entries, :class => RSSEntry
|
30
18
|
|
31
19
|
attr_accessor :feed_url
|
@@ -7,25 +7,17 @@ module Feedzirra
|
|
7
7
|
# == Attributes
|
8
8
|
# * title
|
9
9
|
# * url
|
10
|
-
# * related
|
11
10
|
# * author
|
12
11
|
# * content
|
13
12
|
# * summary
|
14
13
|
# * published
|
15
|
-
# * updated
|
16
14
|
# * categories
|
17
|
-
# * media_content
|
18
|
-
# * media_description
|
19
|
-
# * media_thumbnail
|
20
|
-
# * enclosure
|
21
15
|
class RSSEntry
|
22
16
|
include SAXMachine
|
23
17
|
include FeedEntryUtilities
|
24
18
|
element :title
|
25
19
|
element :link, :as => :url
|
26
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
27
20
|
|
28
|
-
element :author
|
29
21
|
element :"dc:creator", :as => :author
|
30
22
|
element :author, :as => :author
|
31
23
|
element :"content:encoded", :as => :content
|
@@ -42,11 +34,6 @@ module Feedzirra
|
|
42
34
|
element :issued, :as => :published
|
43
35
|
elements :category, :as => :categories
|
44
36
|
|
45
|
-
element :"media:content", :as => :media_content, :value => :url
|
46
|
-
element :"media:description", :as => :media_description
|
47
|
-
element :"media:thumbnail", :as => :media_thumbnail, :value => :url
|
48
|
-
element :enclosure, :value => :url
|
49
|
-
|
50
37
|
element :guid, :as => :id
|
51
38
|
end
|
52
39
|
|
@@ -22,16 +22,16 @@ describe Feedzirra::FeedUtilities do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
it "should provide a sanitized title" do
|
25
|
-
new_title = "<script>" + @entry.title
|
25
|
+
new_title = "<script>this is not safe</script>" + @entry.title
|
26
26
|
@entry.title = new_title
|
27
|
-
@entry.title.sanitize.should ==
|
27
|
+
@entry.title.sanitize.should == Loofah.scrub_fragment(new_title, :prune).to_s
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should sanitize content in place" do
|
31
31
|
new_content = "<script>" + @entry.content
|
32
32
|
@entry.content = new_content.dup
|
33
|
-
@entry.content.sanitize!.should ==
|
34
|
-
@entry.content.should ==
|
33
|
+
@entry.content.sanitize!.should == Loofah.scrub_fragment(new_content, :prune).to_s
|
34
|
+
@entry.content.should == Loofah.scrub_fragment(new_content, :prune).to_s
|
35
35
|
end
|
36
36
|
|
37
37
|
it "should sanitize things in place" do
|
@@ -39,9 +39,9 @@ describe Feedzirra::FeedUtilities do
|
|
39
39
|
@entry.author += "<script>"
|
40
40
|
@entry.content += "<script>"
|
41
41
|
|
42
|
-
cleaned_title =
|
43
|
-
cleaned_author =
|
44
|
-
cleaned_content =
|
42
|
+
cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
|
43
|
+
cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
|
44
|
+
cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
|
45
45
|
|
46
46
|
@entry.sanitize!
|
47
47
|
@entry.title.should == cleaned_title
|
@@ -24,14 +24,6 @@ describe Feedzirra::Parser::Atom do
|
|
24
24
|
@feed.url.should == "http://aws.typepad.com/aws/"
|
25
25
|
end
|
26
26
|
|
27
|
-
it "should parse updated" do
|
28
|
-
@feed.updated.should == "2009-01-16T18:21:00Z"
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should parse the subtitle" do
|
32
|
-
@feed.subtitle.should == "Amazon Web Services, Products, Tools, and Developer Information..."
|
33
|
-
end
|
34
|
-
|
35
27
|
it "should parse the url even when it doesn't have the type='text/html' attribute" do
|
36
28
|
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
|
37
29
|
end
|
@@ -44,18 +36,6 @@ describe Feedzirra::Parser::Atom do
|
|
44
36
|
@feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
|
45
37
|
end
|
46
38
|
|
47
|
-
it "should parse the prev atom:link" do
|
48
|
-
@feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should parse the next atom:link" do
|
52
|
-
@feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should parse the last atom:link" do
|
56
|
-
@feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
|
57
|
-
end
|
58
|
-
|
59
39
|
it "should parse entries" do
|
60
40
|
@feed.entries.size.should == 10
|
61
41
|
end
|
@@ -1,91 +1,41 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
2
|
|
3
3
|
describe Feedzirra::Parser::RSSEntry do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
it "
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
@entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should parse the categories" do
|
36
|
-
@entry.categories.should == ['computadora', 'nokogiri', 'rails']
|
37
|
-
end
|
38
|
-
|
39
|
-
it "should parse the guid as id" do
|
40
|
-
@entry.id.should == "http://tenderlovemaking.com/?p=198"
|
41
|
-
end
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on RSSEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "Nokogiri’s Slop Feature"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse the url" do
|
15
|
+
@entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse the author" do
|
19
|
+
@entry.author.should == "Aaron Patterson"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse the content" do
|
23
|
+
@entry.content.should == sample_rss_entry_content
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should provide a summary" do
|
27
|
+
@entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop(<<-eohtml)\n<html>\n  <body>\n  [...]"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse the published date" do
|
31
|
+
@entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
42
32
|
end
|
43
33
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
it "should parse the title" do
|
52
|
-
@entry.title.should == "the new boy on the block"
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should parse the url" do
|
56
|
-
@entry.url.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should parse link rel='related' as related" do
|
60
|
-
@entry.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
|
61
|
-
end
|
62
|
-
|
63
|
-
it "should provide a summary" do
|
64
|
-
@entry.summary.should == sample_media_rss_entry_content
|
65
|
-
end
|
66
|
-
|
67
|
-
it "should parse the published date" do
|
68
|
-
@entry.published.to_s.should == "Mon Mar 23 07:55:43 UTC 2009"
|
69
|
-
end
|
70
|
-
|
71
|
-
it "should parse the guid as id" do
|
72
|
-
@entry.id.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
|
73
|
-
end
|
74
|
-
|
75
|
-
it "should parse media:content url as media_content" do
|
76
|
-
@entry.media_content.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
|
77
|
-
end
|
78
|
-
|
79
|
-
it "should parse media:description as media_description" do
|
80
|
-
@entry.media_description.should == "big crane from Wellington visits the site for the week"
|
81
|
-
end
|
82
|
-
|
83
|
-
it "should parse media:thumbnail url as media_thumbnail" do
|
84
|
-
@entry.media_thumbnail.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_medium.JPG"
|
85
|
-
end
|
86
|
-
|
87
|
-
it "should parse enclosure url as enclosure" do
|
88
|
-
@entry.enclosure.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
|
89
|
-
end
|
34
|
+
it "should parse the categories" do
|
35
|
+
@entry.categories.should == ['computadora', 'nokogiri', 'rails']
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should parse the guid as id" do
|
39
|
+
@entry.id.should == "http://tenderlovemaking.com/?p=198"
|
90
40
|
end
|
91
41
|
end
|
@@ -16,7 +16,7 @@ describe Feedzirra::Parser::RSS do
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
describe "parsing
|
19
|
+
describe "parsing" do
|
20
20
|
before(:each) do
|
21
21
|
@feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
|
22
22
|
end
|
@@ -38,51 +38,4 @@ describe Feedzirra::Parser::RSS do
|
|
38
38
|
@feed.entries.size.should == 10
|
39
39
|
end
|
40
40
|
end
|
41
|
-
|
42
|
-
describe "parsing of media rss feed" do
|
43
|
-
before(:each) do
|
44
|
-
@feed = Feedzirra::Parser::RSS.parse(sample_media_rss_feed)
|
45
|
-
end
|
46
|
-
|
47
|
-
it "should parse the title" do
|
48
|
-
@feed.title.should == "horowhenua.kete.net.nz - Latest 50 Results in images"
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should parse the url" do
|
52
|
-
@feed.url.should == "http://horowhenua.kete.net.nz/site/all/images/rss.xml?search_terms=wellington"
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should parse link rel='related' as related" do
|
56
|
-
@feed.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should parse the description" do
|
60
|
-
@feed.description.should == "Showing 1 - 50 results of 368"
|
61
|
-
end
|
62
|
-
|
63
|
-
it "should parse the language" do
|
64
|
-
@feed.language.should == "en-nz"
|
65
|
-
end
|
66
|
-
|
67
|
-
it "should provide an accessor for the feed_url" do
|
68
|
-
@feed.respond_to?(:feed_url).should == true
|
69
|
-
@feed.respond_to?(:feed_url=).should == true
|
70
|
-
end
|
71
|
-
|
72
|
-
it "should parse the prev atom:link" do
|
73
|
-
@feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
|
74
|
-
end
|
75
|
-
|
76
|
-
it "should parse the next atom:link" do
|
77
|
-
@feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
|
78
|
-
end
|
79
|
-
|
80
|
-
it "should parse the last atom:link" do
|
81
|
-
@feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
|
82
|
-
end
|
83
|
-
|
84
|
-
it "should parse entries" do
|
85
|
-
@feed.entries.size.should == 50
|
86
|
-
end
|
87
|
-
end
|
88
41
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -55,12 +55,4 @@ end
|
|
55
55
|
|
56
56
|
def sample_wfw_feed
|
57
57
|
load_sample("PaulDixExplainsNothingWFW.xml")
|
58
|
-
end
|
59
|
-
|
60
|
-
def sample_media_rss_feed
|
61
|
-
load_sample("KeteHorowhenua.xml")
|
62
|
-
end
|
63
|
-
|
64
|
-
def sample_media_rss_entry_content
|
65
|
-
load_sample("KeteHorowhenuaFirstEntry.xml")
|
66
|
-
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedzirra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-02-19 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -23,7 +23,7 @@ dependencies:
|
|
23
23
|
version: 0.0.0
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
26
|
+
name: sax-machine
|
27
27
|
type: :runtime
|
28
28
|
version_requirement:
|
29
29
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 0.0.12
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
36
|
+
name: curb
|
37
37
|
type: :runtime
|
38
38
|
version_requirement:
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -63,14 +63,14 @@ dependencies:
|
|
63
63
|
version: 2.0.0
|
64
64
|
version:
|
65
65
|
- !ruby/object:Gem::Dependency
|
66
|
-
name:
|
66
|
+
name: loofah
|
67
67
|
type: :runtime
|
68
68
|
version_requirement:
|
69
69
|
version_requirements: !ruby/object:Gem::Requirement
|
70
70
|
requirements:
|
71
71
|
- - ">="
|
72
72
|
- !ruby/object:Gem::Version
|
73
|
-
version: 0.
|
73
|
+
version: 0.3.1
|
74
74
|
version:
|
75
75
|
description:
|
76
76
|
email: paul@pauldix.net
|