feedzirra 0.0.18.1 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,6 @@ class String
4
4
  end
5
5
 
6
6
  def sanitize
7
- Dryopteris.sanitize(self)
7
+ Loofah.scrub_fragment(self, :prune).to_s
8
8
  end
9
9
  end
data/lib/feedzirra.rb CHANGED
@@ -5,7 +5,7 @@ gem 'activesupport'
5
5
  require 'zlib'
6
6
  require 'curb'
7
7
  require 'sax-machine'
8
- require 'dryopteris'
8
+ require 'loofah'
9
9
  require 'uri'
10
10
  require 'active_support/basic_object'
11
11
  require 'active_support/core_ext/object'
@@ -30,5 +30,5 @@ require 'feedzirra/parser/atom'
30
30
  require 'feedzirra/parser/atom_feed_burner'
31
31
 
32
32
  module Feedzirra
33
- VERSION = "0.0.18.1"
33
+ VERSION = "0.0.19"
34
34
  end
@@ -5,28 +5,16 @@ module Feedzirra
5
5
  # Parser for dealing with Atom feeds.
6
6
  #
7
7
  # == Attributes
8
- # * prev_page
9
- # * next_page
10
- # * lat_page
11
8
  # * title
12
- # * subtitle
13
- # * updated
14
9
  # * feed_url
15
10
  # * url
16
- # * related
17
11
  # * entries
18
12
  class Atom
19
13
  include SAXMachine
20
14
  include FeedUtilities
21
- element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
22
- element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
23
- element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
24
15
  element :title
25
- element :subtitle
26
- element :updated
27
16
  element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
28
17
  element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
29
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
30
18
  elements :link, :as => :links, :value => :href
31
19
  elements :entry, :as => :entries, :class => AtomEntry
32
20
 
@@ -7,22 +7,16 @@ module Feedzirra
7
7
  # == Attributes
8
8
  # * title
9
9
  # * url
10
- # * related
11
10
  # * author
12
11
  # * content
13
12
  # * summary
14
13
  # * published
15
14
  # * categories
16
- # * media_content
17
- # * media_description
18
- # * media_thumbnail
19
- # * enclosure
20
15
  class AtomEntry
21
16
  include SAXMachine
22
17
  include FeedEntryUtilities
23
18
  element :title
24
- element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
25
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
26
20
  element :name, :as => :author
27
21
  element :content
28
22
  element :summary
@@ -33,14 +27,8 @@ module Feedzirra
33
27
  element :updated
34
28
  element :modified, :as => :updated
35
29
  elements :category, :as => :categories, :value => :term
36
-
37
- element :"media:content", :as => :media_content, :value => :url
38
- element :"media:description", :as => :media_description
39
- element :"media:thumbnail", :as => :media_thumbnail, :value => :url
40
- element :enclosure, :value => :url
41
-
42
30
  elements :link, :as => :links, :value => :href
43
-
31
+
44
32
  def url
45
33
  @url || links.first
46
34
  end
@@ -17,7 +17,7 @@ module Feedzirra
17
17
  include FeedEntryUtilities
18
18
  element :title
19
19
  element :name, :as => :author
20
- element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
20
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
21
21
  element :"feedburner:origLink", :as => :url
22
22
  element :summary
23
23
  element :content
@@ -5,27 +5,15 @@ module Feedzirra
5
5
  # Parser for dealing with RSS feeds.
6
6
  #
7
7
  # == Attributes
8
- # * prev_page
9
- # * next_page
10
- # * lat_page
11
8
  # * title
12
9
  # * feed_url
13
10
  # * url
14
- # * related
15
- # * description
16
- # * language
17
11
  # * entries
18
12
  class RSS
19
13
  include SAXMachine
20
14
  include FeedUtilities
21
- element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
22
- element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
23
- element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
24
15
  element :title
25
16
  element :link, :as => :url
26
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
27
- element :description
28
- element :language
29
17
  elements :item, :as => :entries, :class => RSSEntry
30
18
 
31
19
  attr_accessor :feed_url
@@ -7,25 +7,17 @@ module Feedzirra
7
7
  # == Attributes
8
8
  # * title
9
9
  # * url
10
- # * related
11
10
  # * author
12
11
  # * content
13
12
  # * summary
14
13
  # * published
15
- # * updated
16
14
  # * categories
17
- # * media_content
18
- # * media_description
19
- # * media_thumbnail
20
- # * enclosure
21
15
  class RSSEntry
22
16
  include SAXMachine
23
17
  include FeedEntryUtilities
24
18
  element :title
25
19
  element :link, :as => :url
26
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
27
20
 
28
- element :author
29
21
  element :"dc:creator", :as => :author
30
22
  element :author, :as => :author
31
23
  element :"content:encoded", :as => :content
@@ -42,11 +34,6 @@ module Feedzirra
42
34
  element :issued, :as => :published
43
35
  elements :category, :as => :categories
44
36
 
45
- element :"media:content", :as => :media_content, :value => :url
46
- element :"media:description", :as => :media_description
47
- element :"media:thumbnail", :as => :media_thumbnail, :value => :url
48
- element :enclosure, :value => :url
49
-
50
37
  element :guid, :as => :id
51
38
  end
52
39
 
@@ -22,16 +22,16 @@ describe Feedzirra::FeedUtilities do
22
22
  end
23
23
 
24
24
  it "should provide a sanitized title" do
25
- new_title = "<script>" + @entry.title
25
+ new_title = "<script>this is not safe</script>" + @entry.title
26
26
  @entry.title = new_title
27
- @entry.title.sanitize.should == Dryopteris.sanitize(new_title)
27
+ @entry.title.sanitize.should == Loofah.scrub_fragment(new_title, :prune).to_s
28
28
  end
29
29
 
30
30
  it "should sanitize content in place" do
31
31
  new_content = "<script>" + @entry.content
32
32
  @entry.content = new_content.dup
33
- @entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
34
- @entry.content.should == Dryopteris.sanitize(new_content)
33
+ @entry.content.sanitize!.should == Loofah.scrub_fragment(new_content, :prune).to_s
34
+ @entry.content.should == Loofah.scrub_fragment(new_content, :prune).to_s
35
35
  end
36
36
 
37
37
  it "should sanitize things in place" do
@@ -39,9 +39,9 @@ describe Feedzirra::FeedUtilities do
39
39
  @entry.author += "<script>"
40
40
  @entry.content += "<script>"
41
41
 
42
- cleaned_title = Dryopteris.sanitize(@entry.title)
43
- cleaned_author = Dryopteris.sanitize(@entry.author)
44
- cleaned_content = Dryopteris.sanitize(@entry.content)
42
+ cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
43
+ cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
44
+ cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
45
45
 
46
46
  @entry.sanitize!
47
47
  @entry.title.should == cleaned_title
@@ -24,14 +24,6 @@ describe Feedzirra::Parser::Atom do
24
24
  @feed.url.should == "http://aws.typepad.com/aws/"
25
25
  end
26
26
 
27
- it "should parse updated" do
28
- @feed.updated.should == "2009-01-16T18:21:00Z"
29
- end
30
-
31
- it "should parse the subtitle" do
32
- @feed.subtitle.should == "Amazon Web Services, Products, Tools, and Developer Information..."
33
- end
34
-
35
27
  it "should parse the url even when it doesn't have the type='text/html' attribute" do
36
28
  Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
37
29
  end
@@ -44,18 +36,6 @@ describe Feedzirra::Parser::Atom do
44
36
  @feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
45
37
  end
46
38
 
47
- it "should parse the prev atom:link" do
48
- @feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
49
- end
50
-
51
- it "should parse the next atom:link" do
52
- @feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
53
- end
54
-
55
- it "should parse the last atom:link" do
56
- @feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
57
- end
58
-
59
39
  it "should parse entries" do
60
40
  @feed.entries.size.should == 10
61
41
  end
@@ -1,91 +1,41 @@
1
1
  require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
2
 
3
3
  describe Feedzirra::Parser::RSSEntry do
4
- describe "parsing of simple rss feed item" do
5
- before(:each) do
6
- # I don't really like doing it this way because these unit test should only rely on RSSEntry,
7
- # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
8
- @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
9
- end
10
-
11
- it "should parse the title" do
12
- @entry.title.should == "Nokogiri’s Slop Feature"
13
- end
14
-
15
- it "should parse the url" do
16
- @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
17
- end
18
-
19
- it "should parse the author" do
20
- @entry.author.should == "Aaron Patterson"
21
- end
22
-
23
- it "should parse the content" do
24
- @entry.content.should == sample_rss_entry_content
25
- end
26
-
27
- it "should provide a summary" do
28
- @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
29
- end
30
-
31
- it "should parse the published date" do
32
- @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
33
- end
34
-
35
- it "should parse the categories" do
36
- @entry.categories.should == ['computadora', 'nokogiri', 'rails']
37
- end
38
-
39
- it "should parse the guid as id" do
40
- @entry.id.should == "http://tenderlovemaking.com/?p=198"
41
- end
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "Nokogiri’s Slop Feature"
12
+ end
13
+
14
+ it "should parse the url" do
15
+ @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
16
+ end
17
+
18
+ it "should parse the author" do
19
+ @entry.author.should == "Aaron Patterson"
20
+ end
21
+
22
+ it "should parse the content" do
23
+ @entry.content.should == sample_rss_entry_content
24
+ end
25
+
26
+ it "should provide a summary" do
27
+ @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
28
+ end
29
+
30
+ it "should parse the published date" do
31
+ @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
42
32
  end
43
33
 
44
- describe "parsing of media rss feed item" do
45
- before(:each) do
46
- # I don't really like doing it this way because these unit test should only rely on RSSEntry,
47
- # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
48
- @entry = Feedzirra::Parser::RSS.parse(sample_media_rss_feed).entries.first
49
- end
50
-
51
- it "should parse the title" do
52
- @entry.title.should == "the new boy on the block"
53
- end
54
-
55
- it "should parse the url" do
56
- @entry.url.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
57
- end
58
-
59
- it "should parse link rel='related' as related" do
60
- @entry.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
61
- end
62
-
63
- it "should provide a summary" do
64
- @entry.summary.should == sample_media_rss_entry_content
65
- end
66
-
67
- it "should parse the published date" do
68
- @entry.published.to_s.should == "Mon Mar 23 07:55:43 UTC 2009"
69
- end
70
-
71
- it "should parse the guid as id" do
72
- @entry.id.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
73
- end
74
-
75
- it "should parse media:content url as media_content" do
76
- @entry.media_content.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
77
- end
78
-
79
- it "should parse media:description as media_description" do
80
- @entry.media_description.should == "big crane from Wellington visits the site for the week"
81
- end
82
-
83
- it "should parse media:thumbnail url as media_thumbnail" do
84
- @entry.media_thumbnail.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_medium.JPG"
85
- end
86
-
87
- it "should parse enclosure url as enclosure" do
88
- @entry.enclosure.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
89
- end
34
+ it "should parse the categories" do
35
+ @entry.categories.should == ['computadora', 'nokogiri', 'rails']
36
+ end
37
+
38
+ it "should parse the guid as id" do
39
+ @entry.id.should == "http://tenderlovemaking.com/?p=198"
90
40
  end
91
41
  end
@@ -16,7 +16,7 @@ describe Feedzirra::Parser::RSS do
16
16
  end
17
17
  end
18
18
 
19
- describe "parsing of simple rss feed" do
19
+ describe "parsing" do
20
20
  before(:each) do
21
21
  @feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
22
22
  end
@@ -38,51 +38,4 @@ describe Feedzirra::Parser::RSS do
38
38
  @feed.entries.size.should == 10
39
39
  end
40
40
  end
41
-
42
- describe "parsing of media rss feed" do
43
- before(:each) do
44
- @feed = Feedzirra::Parser::RSS.parse(sample_media_rss_feed)
45
- end
46
-
47
- it "should parse the title" do
48
- @feed.title.should == "horowhenua.kete.net.nz - Latest 50 Results in images"
49
- end
50
-
51
- it "should parse the url" do
52
- @feed.url.should == "http://horowhenua.kete.net.nz/site/all/images/rss.xml?search_terms=wellington"
53
- end
54
-
55
- it "should parse link rel='related' as related" do
56
- @feed.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
57
- end
58
-
59
- it "should parse the description" do
60
- @feed.description.should == "Showing 1 - 50 results of 368"
61
- end
62
-
63
- it "should parse the language" do
64
- @feed.language.should == "en-nz"
65
- end
66
-
67
- it "should provide an accessor for the feed_url" do
68
- @feed.respond_to?(:feed_url).should == true
69
- @feed.respond_to?(:feed_url=).should == true
70
- end
71
-
72
- it "should parse the prev atom:link" do
73
- @feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
74
- end
75
-
76
- it "should parse the next atom:link" do
77
- @feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
78
- end
79
-
80
- it "should parse the last atom:link" do
81
- @feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
82
- end
83
-
84
- it "should parse entries" do
85
- @feed.entries.size.should == 50
86
- end
87
- end
88
41
  end
data/spec/spec_helper.rb CHANGED
@@ -55,12 +55,4 @@ end
55
55
 
56
56
  def sample_wfw_feed
57
57
  load_sample("PaulDixExplainsNothingWFW.xml")
58
- end
59
-
60
- def sample_media_rss_feed
61
- load_sample("KeteHorowhenua.xml")
62
- end
63
-
64
- def sample_media_rss_entry_content
65
- load_sample("KeteHorowhenuaFirstEntry.xml")
66
- end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedzirra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18.1
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-03 00:00:00 +12:00
12
+ date: 2009-02-19 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -23,7 +23,7 @@ dependencies:
23
23
  version: 0.0.0
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
- name: pauldix-sax-machine
26
+ name: sax-machine
27
27
  type: :runtime
28
28
  version_requirement:
29
29
  version_requirements: !ruby/object:Gem::Requirement
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: 0.0.12
34
34
  version:
35
35
  - !ruby/object:Gem::Dependency
36
- name: taf2-curb
36
+ name: curb
37
37
  type: :runtime
38
38
  version_requirement:
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -63,14 +63,14 @@ dependencies:
63
63
  version: 2.0.0
64
64
  version:
65
65
  - !ruby/object:Gem::Dependency
66
- name: mdalessio-dryopteris
66
+ name: loofah
67
67
  type: :runtime
68
68
  version_requirement:
69
69
  version_requirements: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - ">="
72
72
  - !ruby/object:Gem::Version
73
- version: 0.0.0
73
+ version: 0.3.1
74
74
  version:
75
75
  description:
76
76
  email: paul@pauldix.net