feedzirra 0.0.18.1 → 0.0.19

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,6 @@ class String
4
4
  end
5
5
 
6
6
  def sanitize
7
- Dryopteris.sanitize(self)
7
+ Loofah.scrub_fragment(self, :prune).to_s
8
8
  end
9
9
  end
data/lib/feedzirra.rb CHANGED
@@ -5,7 +5,7 @@ gem 'activesupport'
5
5
  require 'zlib'
6
6
  require 'curb'
7
7
  require 'sax-machine'
8
- require 'dryopteris'
8
+ require 'loofah'
9
9
  require 'uri'
10
10
  require 'active_support/basic_object'
11
11
  require 'active_support/core_ext/object'
@@ -30,5 +30,5 @@ require 'feedzirra/parser/atom'
30
30
  require 'feedzirra/parser/atom_feed_burner'
31
31
 
32
32
  module Feedzirra
33
- VERSION = "0.0.18.1"
33
+ VERSION = "0.0.19"
34
34
  end
@@ -5,28 +5,16 @@ module Feedzirra
5
5
  # Parser for dealing with Atom feeds.
6
6
  #
7
7
  # == Attributes
8
- # * prev_page
9
- # * next_page
10
- # * lat_page
11
8
  # * title
12
- # * subtitle
13
- # * updated
14
9
  # * feed_url
15
10
  # * url
16
- # * related
17
11
  # * entries
18
12
  class Atom
19
13
  include SAXMachine
20
14
  include FeedUtilities
21
- element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
22
- element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
23
- element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
24
15
  element :title
25
- element :subtitle
26
- element :updated
27
16
  element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
28
17
  element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
29
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
30
18
  elements :link, :as => :links, :value => :href
31
19
  elements :entry, :as => :entries, :class => AtomEntry
32
20
 
@@ -7,22 +7,16 @@ module Feedzirra
7
7
  # == Attributes
8
8
  # * title
9
9
  # * url
10
- # * related
11
10
  # * author
12
11
  # * content
13
12
  # * summary
14
13
  # * published
15
14
  # * categories
16
- # * media_content
17
- # * media_description
18
- # * media_thumbnail
19
- # * enclosure
20
15
  class AtomEntry
21
16
  include SAXMachine
22
17
  include FeedEntryUtilities
23
18
  element :title
24
- element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
25
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
26
20
  element :name, :as => :author
27
21
  element :content
28
22
  element :summary
@@ -33,14 +27,8 @@ module Feedzirra
33
27
  element :updated
34
28
  element :modified, :as => :updated
35
29
  elements :category, :as => :categories, :value => :term
36
-
37
- element :"media:content", :as => :media_content, :value => :url
38
- element :"media:description", :as => :media_description
39
- element :"media:thumbnail", :as => :media_thumbnail, :value => :url
40
- element :enclosure, :value => :url
41
-
42
30
  elements :link, :as => :links, :value => :href
43
-
31
+
44
32
  def url
45
33
  @url || links.first
46
34
  end
@@ -17,7 +17,7 @@ module Feedzirra
17
17
  include FeedEntryUtilities
18
18
  element :title
19
19
  element :name, :as => :author
20
- element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
20
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
21
21
  element :"feedburner:origLink", :as => :url
22
22
  element :summary
23
23
  element :content
@@ -5,27 +5,15 @@ module Feedzirra
5
5
  # Parser for dealing with RSS feeds.
6
6
  #
7
7
  # == Attributes
8
- # * prev_page
9
- # * next_page
10
- # * lat_page
11
8
  # * title
12
9
  # * feed_url
13
10
  # * url
14
- # * related
15
- # * description
16
- # * language
17
11
  # * entries
18
12
  class RSS
19
13
  include SAXMachine
20
14
  include FeedUtilities
21
- element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
22
- element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
23
- element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
24
15
  element :title
25
16
  element :link, :as => :url
26
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
27
- element :description
28
- element :language
29
17
  elements :item, :as => :entries, :class => RSSEntry
30
18
 
31
19
  attr_accessor :feed_url
@@ -7,25 +7,17 @@ module Feedzirra
7
7
  # == Attributes
8
8
  # * title
9
9
  # * url
10
- # * related
11
10
  # * author
12
11
  # * content
13
12
  # * summary
14
13
  # * published
15
- # * updated
16
14
  # * categories
17
- # * media_content
18
- # * media_description
19
- # * media_thumbnail
20
- # * enclosure
21
15
  class RSSEntry
22
16
  include SAXMachine
23
17
  include FeedEntryUtilities
24
18
  element :title
25
19
  element :link, :as => :url
26
- elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
27
20
 
28
- element :author
29
21
  element :"dc:creator", :as => :author
30
22
  element :author, :as => :author
31
23
  element :"content:encoded", :as => :content
@@ -42,11 +34,6 @@ module Feedzirra
42
34
  element :issued, :as => :published
43
35
  elements :category, :as => :categories
44
36
 
45
- element :"media:content", :as => :media_content, :value => :url
46
- element :"media:description", :as => :media_description
47
- element :"media:thumbnail", :as => :media_thumbnail, :value => :url
48
- element :enclosure, :value => :url
49
-
50
37
  element :guid, :as => :id
51
38
  end
52
39
 
@@ -22,16 +22,16 @@ describe Feedzirra::FeedUtilities do
22
22
  end
23
23
 
24
24
  it "should provide a sanitized title" do
25
- new_title = "<script>" + @entry.title
25
+ new_title = "<script>this is not safe</script>" + @entry.title
26
26
  @entry.title = new_title
27
- @entry.title.sanitize.should == Dryopteris.sanitize(new_title)
27
+ @entry.title.sanitize.should == Loofah.scrub_fragment(new_title, :prune).to_s
28
28
  end
29
29
 
30
30
  it "should sanitize content in place" do
31
31
  new_content = "<script>" + @entry.content
32
32
  @entry.content = new_content.dup
33
- @entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
34
- @entry.content.should == Dryopteris.sanitize(new_content)
33
+ @entry.content.sanitize!.should == Loofah.scrub_fragment(new_content, :prune).to_s
34
+ @entry.content.should == Loofah.scrub_fragment(new_content, :prune).to_s
35
35
  end
36
36
 
37
37
  it "should sanitize things in place" do
@@ -39,9 +39,9 @@ describe Feedzirra::FeedUtilities do
39
39
  @entry.author += "<script>"
40
40
  @entry.content += "<script>"
41
41
 
42
- cleaned_title = Dryopteris.sanitize(@entry.title)
43
- cleaned_author = Dryopteris.sanitize(@entry.author)
44
- cleaned_content = Dryopteris.sanitize(@entry.content)
42
+ cleaned_title = Loofah.scrub_fragment(@entry.title, :prune).to_s
43
+ cleaned_author = Loofah.scrub_fragment(@entry.author, :prune).to_s
44
+ cleaned_content = Loofah.scrub_fragment(@entry.content, :prune).to_s
45
45
 
46
46
  @entry.sanitize!
47
47
  @entry.title.should == cleaned_title
@@ -24,14 +24,6 @@ describe Feedzirra::Parser::Atom do
24
24
  @feed.url.should == "http://aws.typepad.com/aws/"
25
25
  end
26
26
 
27
- it "should parse updated" do
28
- @feed.updated.should == "2009-01-16T18:21:00Z"
29
- end
30
-
31
- it "should parse the subtitle" do
32
- @feed.subtitle.should == "Amazon Web Services, Products, Tools, and Developer Information..."
33
- end
34
-
35
27
  it "should parse the url even when it doesn't have the type='text/html' attribute" do
36
28
  Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
37
29
  end
@@ -44,18 +36,6 @@ describe Feedzirra::Parser::Atom do
44
36
  @feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
45
37
  end
46
38
 
47
- it "should parse the prev atom:link" do
48
- @feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
49
- end
50
-
51
- it "should parse the next atom:link" do
52
- @feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
53
- end
54
-
55
- it "should parse the last atom:link" do
56
- @feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
57
- end
58
-
59
39
  it "should parse entries" do
60
40
  @feed.entries.size.should == 10
61
41
  end
@@ -1,91 +1,41 @@
1
1
  require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
2
2
 
3
3
  describe Feedzirra::Parser::RSSEntry do
4
- describe "parsing of simple rss feed item" do
5
- before(:each) do
6
- # I don't really like doing it this way because these unit test should only rely on RSSEntry,
7
- # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
8
- @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
9
- end
10
-
11
- it "should parse the title" do
12
- @entry.title.should == "Nokogiri’s Slop Feature"
13
- end
14
-
15
- it "should parse the url" do
16
- @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
17
- end
18
-
19
- it "should parse the author" do
20
- @entry.author.should == "Aaron Patterson"
21
- end
22
-
23
- it "should parse the content" do
24
- @entry.content.should == sample_rss_entry_content
25
- end
26
-
27
- it "should provide a summary" do
28
- @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
29
- end
30
-
31
- it "should parse the published date" do
32
- @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
33
- end
34
-
35
- it "should parse the categories" do
36
- @entry.categories.should == ['computadora', 'nokogiri', 'rails']
37
- end
38
-
39
- it "should parse the guid as id" do
40
- @entry.id.should == "http://tenderlovemaking.com/?p=198"
41
- end
4
+ before(:each) do
5
+ # I don't really like doing it this way because these unit test should only rely on RSSEntry,
6
+ # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
7
+ @entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
8
+ end
9
+
10
+ it "should parse the title" do
11
+ @entry.title.should == "Nokogiri’s Slop Feature"
12
+ end
13
+
14
+ it "should parse the url" do
15
+ @entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
16
+ end
17
+
18
+ it "should parse the author" do
19
+ @entry.author.should == "Aaron Patterson"
20
+ end
21
+
22
+ it "should parse the content" do
23
+ @entry.content.should == sample_rss_entry_content
24
+ end
25
+
26
+ it "should provide a summary" do
27
+ @entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop&#40;&#60;&#60;-eohtml&#41;\n&#60;html&#62;\n&#160; &#60;body&#62;\n&#160; [...]"
28
+ end
29
+
30
+ it "should parse the published date" do
31
+ @entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
42
32
  end
43
33
 
44
- describe "parsing of media rss feed item" do
45
- before(:each) do
46
- # I don't really like doing it this way because these unit test should only rely on RSSEntry,
47
- # but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
48
- @entry = Feedzirra::Parser::RSS.parse(sample_media_rss_feed).entries.first
49
- end
50
-
51
- it "should parse the title" do
52
- @entry.title.should == "the new boy on the block"
53
- end
54
-
55
- it "should parse the url" do
56
- @entry.url.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
57
- end
58
-
59
- it "should parse link rel='related' as related" do
60
- @entry.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
61
- end
62
-
63
- it "should provide a summary" do
64
- @entry.summary.should == sample_media_rss_entry_content
65
- end
66
-
67
- it "should parse the published date" do
68
- @entry.published.to_s.should == "Mon Mar 23 07:55:43 UTC 2009"
69
- end
70
-
71
- it "should parse the guid as id" do
72
- @entry.id.should == "http://horowhenua.kete.net.nz/site/images/show/15535-the-new-boy-on-the-block"
73
- end
74
-
75
- it "should parse media:content url as media_content" do
76
- @entry.media_content.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
77
- end
78
-
79
- it "should parse media:description as media_description" do
80
- @entry.media_description.should == "big crane from Wellington visits the site for the week"
81
- end
82
-
83
- it "should parse media:thumbnail url as media_thumbnail" do
84
- @entry.media_thumbnail.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_medium.JPG"
85
- end
86
-
87
- it "should parse enclosure url as enclosure" do
88
- @entry.enclosure.should == "http://horowhenua.kete.net.nz/image_files/0000/0008/1232/DSCF1122_large.JPG"
89
- end
34
+ it "should parse the categories" do
35
+ @entry.categories.should == ['computadora', 'nokogiri', 'rails']
36
+ end
37
+
38
+ it "should parse the guid as id" do
39
+ @entry.id.should == "http://tenderlovemaking.com/?p=198"
90
40
  end
91
41
  end
@@ -16,7 +16,7 @@ describe Feedzirra::Parser::RSS do
16
16
  end
17
17
  end
18
18
 
19
- describe "parsing of simple rss feed" do
19
+ describe "parsing" do
20
20
  before(:each) do
21
21
  @feed = Feedzirra::Parser::RSS.parse(sample_rss_feed)
22
22
  end
@@ -38,51 +38,4 @@ describe Feedzirra::Parser::RSS do
38
38
  @feed.entries.size.should == 10
39
39
  end
40
40
  end
41
-
42
- describe "parsing of media rss feed" do
43
- before(:each) do
44
- @feed = Feedzirra::Parser::RSS.parse(sample_media_rss_feed)
45
- end
46
-
47
- it "should parse the title" do
48
- @feed.title.should == "horowhenua.kete.net.nz - Latest 50 Results in images"
49
- end
50
-
51
- it "should parse the url" do
52
- @feed.url.should == "http://horowhenua.kete.net.nz/site/all/images/rss.xml?search_terms=wellington"
53
- end
54
-
55
- it "should parse link rel='related' as related" do
56
- @feed.related.should == ["http://horowhenua.kete.net.nz/", "http://horowhenua.kete.net.nz/site/all/images"]
57
- end
58
-
59
- it "should parse the description" do
60
- @feed.description.should == "Showing 1 - 50 results of 368"
61
- end
62
-
63
- it "should parse the language" do
64
- @feed.language.should == "en-nz"
65
- end
66
-
67
- it "should provide an accessor for the feed_url" do
68
- @feed.respond_to?(:feed_url).should == true
69
- @feed.respond_to?(:feed_url=).should == true
70
- end
71
-
72
- it "should parse the prev atom:link" do
73
- @feed.prev_page.should == "http://aws.typepad.com/aws/atom.xml?page=1"
74
- end
75
-
76
- it "should parse the next atom:link" do
77
- @feed.next_page.should == "http://aws.typepad.com/aws/atom.xml?page=3"
78
- end
79
-
80
- it "should parse the last atom:link" do
81
- @feed.last_page.should == "http://aws.typepad.com/aws/atom.xml?page=5"
82
- end
83
-
84
- it "should parse entries" do
85
- @feed.entries.size.should == 50
86
- end
87
- end
88
41
  end
data/spec/spec_helper.rb CHANGED
@@ -55,12 +55,4 @@ end
55
55
 
56
56
  def sample_wfw_feed
57
57
  load_sample("PaulDixExplainsNothingWFW.xml")
58
- end
59
-
60
- def sample_media_rss_feed
61
- load_sample("KeteHorowhenua.xml")
62
- end
63
-
64
- def sample_media_rss_entry_content
65
- load_sample("KeteHorowhenuaFirstEntry.xml")
66
- end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedzirra
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18.1
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-03 00:00:00 +12:00
12
+ date: 2009-02-19 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -23,7 +23,7 @@ dependencies:
23
23
  version: 0.0.0
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
- name: pauldix-sax-machine
26
+ name: sax-machine
27
27
  type: :runtime
28
28
  version_requirement:
29
29
  version_requirements: !ruby/object:Gem::Requirement
@@ -33,7 +33,7 @@ dependencies:
33
33
  version: 0.0.12
34
34
  version:
35
35
  - !ruby/object:Gem::Dependency
36
- name: taf2-curb
36
+ name: curb
37
37
  type: :runtime
38
38
  version_requirement:
39
39
  version_requirements: !ruby/object:Gem::Requirement
@@ -63,14 +63,14 @@ dependencies:
63
63
  version: 2.0.0
64
64
  version:
65
65
  - !ruby/object:Gem::Dependency
66
- name: mdalessio-dryopteris
66
+ name: loofah
67
67
  type: :runtime
68
68
  version_requirement:
69
69
  version_requirements: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - ">="
72
72
  - !ruby/object:Gem::Version
73
- version: 0.0.0
73
+ version: 0.3.1
74
74
  version:
75
75
  description:
76
76
  email: paul@pauldix.net