feedzirra 0.0.18.1 → 0.0.19
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/core_ext/string.rb +1 -1
- data/lib/feedzirra.rb +2 -2
- data/lib/feedzirra/parser/atom.rb +0 -12
- data/lib/feedzirra/parser/atom_entry.rb +2 -14
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +1 -1
- data/lib/feedzirra/parser/rss.rb +0 -12
- data/lib/feedzirra/parser/rss_entry.rb +0 -13
- data/spec/feedzirra/feed_entry_utilities_spec.rb +7 -7
- data/spec/feedzirra/parser/atom_spec.rb +0 -20
- data/spec/feedzirra/parser/rss_entry_spec.rb +34 -84
- data/spec/feedzirra/parser/rss_spec.rb +1 -48
- data/spec/spec_helper.rb +1 -9
- metadata +6 -6
data/lib/core_ext/string.rb
CHANGED
data/lib/feedzirra.rb
CHANGED
@@ -5,7 +5,7 @@ gem 'activesupport'
|
|
5
5
|
require 'zlib'
|
6
6
|
require 'curb'
|
7
7
|
require 'sax-machine'
|
8
|
-
require '
|
8
|
+
require 'loofah'
|
9
9
|
require 'uri'
|
10
10
|
require 'active_support/basic_object'
|
11
11
|
require 'active_support/core_ext/object'
|
@@ -30,5 +30,5 @@ require 'feedzirra/parser/atom'
|
|
30
30
|
require 'feedzirra/parser/atom_feed_burner'
|
31
31
|
|
32
32
|
module Feedzirra
|
33
|
-
VERSION = "0.0.
|
33
|
+
VERSION = "0.0.19"
|
34
34
|
end
|
@@ -5,28 +5,16 @@ module Feedzirra
|
|
5
5
|
# Parser for dealing with Atom feeds.
|
6
6
|
#
|
7
7
|
# == Attributes
|
8
|
-
# * prev_page
|
9
|
-
# * next_page
|
10
|
-
# * lat_page
|
11
8
|
# * title
|
12
|
-
# * subtitle
|
13
|
-
# * updated
|
14
9
|
# * feed_url
|
15
10
|
# * url
|
16
|
-
# * related
|
17
11
|
# * entries
|
18
12
|
class Atom
|
19
13
|
include SAXMachine
|
20
14
|
include FeedUtilities
|
21
|
-
element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
|
22
|
-
element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
|
23
|
-
element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
|
24
15
|
element :title
|
25
|
-
element :subtitle
|
26
|
-
element :updated
|
27
16
|
element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
|
28
17
|
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
29
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
30
18
|
elements :link, :as => :links, :value => :href
|
31
19
|
elements :entry, :as => :entries, :class => AtomEntry
|
32
20
|
|
@@ -7,22 +7,16 @@ module Feedzirra
|
|
7
7
|
# == Attributes
|
8
8
|
# * title
|
9
9
|
# * url
|
10
|
-
# * related
|
11
10
|
# * author
|
12
11
|
# * content
|
13
12
|
# * summary
|
14
13
|
# * published
|
15
14
|
# * categories
|
16
|
-
# * media_content
|
17
|
-
# * media_description
|
18
|
-
# * media_thumbnail
|
19
|
-
# * enclosure
|
20
15
|
class AtomEntry
|
21
16
|
include SAXMachine
|
22
17
|
include FeedEntryUtilities
|
23
18
|
element :title
|
24
|
-
element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
|
25
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
19
|
+
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
26
20
|
element :name, :as => :author
|
27
21
|
element :content
|
28
22
|
element :summary
|
@@ -33,14 +27,8 @@ module Feedzirra
|
|
33
27
|
element :updated
|
34
28
|
element :modified, :as => :updated
|
35
29
|
elements :category, :as => :categories, :value => :term
|
36
|
-
|
37
|
-
element :"media:content", :as => :media_content, :value => :url
|
38
|
-
element :"media:description", :as => :media_description
|
39
|
-
element :"media:thumbnail", :as => :media_thumbnail, :value => :url
|
40
|
-
element :enclosure, :value => :url
|
41
|
-
|
42
30
|
elements :link, :as => :links, :value => :href
|
43
|
-
|
31
|
+
|
44
32
|
def url
|
45
33
|
@url || links.first
|
46
34
|
end
|
@@ -17,7 +17,7 @@ module Feedzirra
|
|
17
17
|
include FeedEntryUtilities
|
18
18
|
element :title
|
19
19
|
element :name, :as => :author
|
20
|
-
element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
|
20
|
+
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
21
21
|
element :"feedburner:origLink", :as => :url
|
22
22
|
element :summary
|
23
23
|
element :content
|
data/lib/feedzirra/parser/rss.rb
CHANGED
@@ -5,27 +5,15 @@ module Feedzirra
|
|
5
5
|
# Parser for dealing with RSS feeds.
|
6
6
|
#
|
7
7
|
# == Attributes
|
8
|
-
# * prev_page
|
9
|
-
# * next_page
|
10
|
-
# * lat_page
|
11
8
|
# * title
|
12
9
|
# * feed_url
|
13
10
|
# * url
|
14
|
-
# * related
|
15
|
-
# * description
|
16
|
-
# * language
|
17
11
|
# * entries
|
18
12
|
class RSS
|
19
13
|
include SAXMachine
|
20
14
|
include FeedUtilities
|
21
|
-
element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
|
22
|
-
element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
|
23
|
-
element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
|
24
15
|
element :title
|
25
16
|
element :link, :as => :url
|
26
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
27
|
-
element :description
|
28
|
-
element :language
|
29
17
|
elements :item, :as => :entries, :class => RSSEntry
|
30
18
|
|
31
19
|
attr_accessor :feed_url
|
@@ -7,25 +7,17 @@ module Feedzirra
|
|
7
7
|
# == Attributes
|
8
8
|
# * title
|
9
9
|
# * url
|
10
|
-
# * related
|
11
10
|
# * author
|
12
11
|
# * content
|
13
12
|
# * summary
|
14
13
|
# * published
|
15
|
-
# * updated
|
16
14
|
# * categories
|
17
|
-
# * media_content
|
18
|
-
# * media_description
|
19
|
-
# * media_thumbnail
|
20
|
-
# * enclosure
|
21
15
|
class RSSEntry
|
22
16
|
include SAXMachine
|
23
17
|
include FeedEntryUtilities
|
24
18
|
element :title
|
25
19
|
element :link, :as => :url
|
26
|
-
elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
|
27
20
|
|
28
|
-
element :author
|
29
21
|
element :"dc:creator", :as => :author
|
30
22
|
element :author, :as => :author
|
31
23
|
element :"content:encoded", :as => :content
|
@@ -42,11 +34,6 @@ module Feedzirra
|
|
42
34
|
element :issued, :as => :published
|
43
35
|
elements :category, :as => :categories
|
44
36
|
|
45
|
-
element :"media:content", :as => :media_content, :value => :url
|
46
|
-
element :"media:description", :as => :media_description
|
47
|
-
element :"media:thumbnail", :as => :media_thumbnail, :value => :url
|
48
|
-
element :enclosure, :value => :url
|
49
|
-
|
50
37
|
element :guid, :as => :id
|
51
38
|
end
|
52
39
|
|
@@ -22,16 +22,16 @@ describe Feedzirra::FeedUtilities do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
it "should provide a sanitized title" do
|
25
|
-
new_title = "<script>" + @entry.title
|
25
|
+
new_title = "<script>this is not safe</script>" + @entry.title
|
26
26
|
@entry.title = new_title
|
27
|
-
@entry.title.sanitize.should ==
|
27
|
+
@entry.title.sanitize.should == Loofah.scrub_fragment(new_title, :prune).to_s
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should sanitize content in place" do
|
31
31
|
new_content = "<script>" + @entry.content
|
32
32
|
@entry.content = new_content.dup
|
33
|
-
@entry.content.sanitize!.should ==
|
34
|
-
@entry.content.should ==
|
33
|
+
@entry.content.sanitize!.should == Loofah.scrub_fragment(new_content, :prune).to_s
|
34
|
+
@entry.content.should == Loofah.scrub_fragment(new_content, :prune).to_s
|
35
35
|
end
|
36
36
|
|
37
37
|
it "should sanitize things in place" do
|
@@ -39,9 +39,9 @@ describe Feedzirra::FeedUtilities do
|
|
39
39
|
@entry.author += "<script>"
|
40
40
|
@entry.content += "<script>"
|
41
41
|
|
42
|
-
cleaned_title =
|
43
|
-
cleaned_author =
|
44
|
-
cleaned_content =
|
42
|
+
cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
|
43
|
+
cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
|
44
|
+
cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
|
45
45
|
|
46
46
|
@entry.sanitize!
|
47
47
|
@entry.title.should == cleaned_title
|
@@ -24,14 +24,6 @@ describe Feedzirra::Parser::Atom do
|
|
24
24
|
@feed.url.should == "http://aws.typepad.com/aws/"
|
25
25
|
end
|
26
26
|
|
27
|
-
it "should parse updated" do
|
28
|
-
@feed.updated.should == "2009-01-16T18:21:00Z"
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should parse the subtitle" do
|
32
|
-
@feed.subtitle.should == "Amazon Web Services, Products, Tools, and Developer Information..."
|
33
|
-
end
|
34
|
-
|
35
27
|
it "should parse the url even when it doesn't have the type='text/html' attribute" do
|
36
28
|
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
|
37
29
|
end
|
@@ -44,18 +36,6 @@ describe Feedzirra::Parser::Atom do
|
|
44
36
|
@feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
|
45
37
|
end
|
46
38
|
|
47
|
-
it "should parse the prev atom:link" do
|
48
|
-
@feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should parse the next atom:link" do
|
52
|
-
@feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should parse the last atom:link" do
|
56
|
-
@feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
|
57
|
-
end
|
58
|
-
|
59
39
|
it "should parse entries" do
|
60
40
|
@feed.entries.size.should == 10
|
61
41
|
end
|
@@ -1,91 +1,41 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
2
|
|
3
3
|
describe Feedzirra::Parser::RSSEntry do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
it "
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
@entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should parse the categories" do
|
36
|
-
@entry.categories.should == ['computadora', 'nokogiri', 'rails']
|
37
|
-
end
|
38
|
-
|
39
|
-
it "should parse the guid as id" do
|
40
|
-
@entry.id.should == "http://tenderlovemaking.com/?p=198"
|
41
|
-
end
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on RSSEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the title" do
|
11
|
+
@entry.title.should == "Nokogiri’s Slop Feature"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse the url" do
|
15
|
+
@entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should parse the author" do
|
19
|
+
@entry.author.should == "Aaron Patterson"
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should parse the content" do
|
23
|
+
@entry.content.should == sample_rss_entry_content
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should provide a summary" do
|
27
|
+
@entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop(<<-eohtml)\n<html>\n  <body>\n  [...]"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse the published date" do
|
31
|
+
@entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
42
32
|
end
|
43
33
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
it "should parse the title" do
|
52
|
-
@entry.title.should == "the new boy on the block"
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should parse the url" do
|
56
|
-
@entry.url.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should parse link rel='related' as related" do
|
60
|
-
@entry.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
|
61
|
-
end
|
62
|
-
|
63
|
-
it "should provide a summary" do
|
64
|
-
@entry.summary.should == sample_media_rss_entry_content
|
65
|
-
end
|
66
|
-
|
67
|
-
it "should parse the published date" do
|
68
|
-
@entry.published.to_s.should == "Mon Mar 23 07:55:43 UTC 2009"
|
69
|
-
end
|
70
|
-
|
71
|
-
it "should parse the guid as id" do
|
72
|
-
@entry.id.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
|
73
|
-
end
|
74
|
-
|
75
|
-
it "should parse media:content url as media_content" do
|
76
|
-
@entry.media_content.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
|
77
|
-
end
|
78
|
-
|
79
|
-
it "should parse media:description as media_description" do
|
80
|
-
@entry.media_description.should == "big crane from Wellington visits the site for the week"
|
81
|
-
end
|
82
|
-
|
83
|
-
it "should parse media:thumbnail url as media_thumbnail" do
|
84
|
-
@entry.media_thumbnail.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_medium.JPG"
|
85
|
-
end
|
86
|
-
|
87
|
-
it "should parse enclosure url as enclosure" do
|
88
|
-
@entry.enclosure.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
|
89
|
-
end
|
34
|
+
it "should parse the categories" do
|
35
|
+
@entry.categories.should == ['computadora', 'nokogiri', 'rails']
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should parse the guid as id" do
|
39
|
+
@entry.id.should == "http://tenderlovemaking.com/?p=198"
|
90
40
|
end
|
91
41
|
end
|
@@ -16,7 +16,7 @@ describe Feedzirra::Parser::RSS do
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
describe "parsing
|
19
|
+
describe "parsing" do
|
20
20
|
before(:each) do
|
21
21
|
@feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
|
22
22
|
end
|
@@ -38,51 +38,4 @@ describe Feedzirra::Parser::RSS do
|
|
38
38
|
@feed.entries.size.should == 10
|
39
39
|
end
|
40
40
|
end
|
41
|
-
|
42
|
-
describe "parsing of media rss feed" do
|
43
|
-
before(:each) do
|
44
|
-
@feed = Feedzirra::Parser::RSS.parse(sample_media_rss_feed)
|
45
|
-
end
|
46
|
-
|
47
|
-
it "should parse the title" do
|
48
|
-
@feed.title.should == "horowhenua.kete.net.nz - Latest 50 Results in images"
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should parse the url" do
|
52
|
-
@feed.url.should == "http://horowhenua.kete.net.nz/site/all/images/rss.xml?search_terms=wellington"
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should parse link rel='related' as related" do
|
56
|
-
@feed.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should parse the description" do
|
60
|
-
@feed.description.should == "Showing 1 - 50 results of 368"
|
61
|
-
end
|
62
|
-
|
63
|
-
it "should parse the language" do
|
64
|
-
@feed.language.should == "en-nz"
|
65
|
-
end
|
66
|
-
|
67
|
-
it "should provide an accessor for the feed_url" do
|
68
|
-
@feed.respond_to?(:feed_url).should == true
|
69
|
-
@feed.respond_to?(:feed_url=).should == true
|
70
|
-
end
|
71
|
-
|
72
|
-
it "should parse the prev atom:link" do
|
73
|
-
@feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
|
74
|
-
end
|
75
|
-
|
76
|
-
it "should parse the next atom:link" do
|
77
|
-
@feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
|
78
|
-
end
|
79
|
-
|
80
|
-
it "should parse the last atom:link" do
|
81
|
-
@feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
|
82
|
-
end
|
83
|
-
|
84
|
-
it "should parse entries" do
|
85
|
-
@feed.entries.size.should == 50
|
86
|
-
end
|
87
|
-
end
|
88
41
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -55,12 +55,4 @@ end
|
|
55
55
|
|
56
56
|
def sample_wfw_feed
|
57
57
|
load_sample("PaulDixExplainsNothingWFW.xml")
|
58
|
-
end
|
59
|
-
|
60
|
-
def sample_media_rss_feed
|
61
|
-
load_sample("KeteHorowhenua.xml")
|
62
|
-
end
|
63
|
-
|
64
|
-
def sample_media_rss_entry_content
|
65
|
-
load_sample("KeteHorowhenuaFirstEntry.xml")
|
66
|
-
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedzirra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-02-19 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -23,7 +23,7 @@ dependencies:
|
|
23
23
|
version: 0.0.0
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
26
|
+
name: sax-machine
|
27
27
|
type: :runtime
|
28
28
|
version_requirement:
|
29
29
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -33,7 +33,7 @@ dependencies:
|
|
33
33
|
version: 0.0.12
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
36
|
+
name: curb
|
37
37
|
type: :runtime
|
38
38
|
version_requirement:
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -63,14 +63,14 @@ dependencies:
|
|
63
63
|
version: 2.0.0
|
64
64
|
version:
|
65
65
|
- !ruby/object:Gem::Dependency
|
66
|
-
name:
|
66
|
+
name: loofah
|
67
67
|
type: :runtime
|
68
68
|
version_requirement:
|
69
69
|
version_requirements: !ruby/object:Gem::Requirement
|
70
70
|
requirements:
|
71
71
|
- - ">="
|
72
72
|
- !ruby/object:Gem::Version
|
73
|
-
version: 0.
|
73
|
+
version: 0.3.1
|
74
74
|
version:
|
75
75
|
description:
|
76
76
|
email: paul@pauldix.net
|