julien51-feedzirra 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +194 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +317 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +51 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +26 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +34 -0
  11. data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
  12. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
  13. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  14. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  15. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  16. data/lib/feedzirra/parser/rss.rb +28 -0
  17. data/lib/feedzirra/parser/rss_entry.rb +40 -0
  18. data/lib/feedzirra/push_parser.rb +56 -0
  19. data/lib/feedzirra.rb +37 -0
  20. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  21. data/spec/feedzirra/feed_spec.rb +551 -0
  22. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  23. data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
  24. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  25. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  26. data/spec/feedzirra/parser/atom_spec.rb +35 -0
  27. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +52 -0
  28. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  29. data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
  30. data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
  31. data/spec/feedzirra/parser/rss_spec.rb +41 -0
  32. data/spec/feedzirra/push_parser_spec.rb +16 -0
  33. data/spec/spec.opts +2 -0
  34. data/spec/spec_helper.rb +58 -0
  35. metadata +145 -0
@@ -0,0 +1,40 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RDF feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class RSSEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url
20
+
21
+ element :"dc:creator", :as => :author
22
+ element :"content:encoded", :as => :content
23
+ element :description, :as => :summary
24
+
25
+ element :pubDate, :as => :published
26
+ element :"dc:date", :as => :published
27
+ element :"dc:Date", :as => :published
28
+ element :"dcterms:created", :as => :published
29
+
30
+
31
+ element :"dcterms:modified", :as => :updated
32
+ element :issued, :as => :published
33
+ elements :category, :as => :categories
34
+
35
+ element :guid, :as => :id
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,56 @@
1
+ module Feedzirra
2
+ ##
3
+ # Contrary to Feedzirra::Feed, Feedzirra::PushParser doesn't expect
4
+ # the whole document to be given in one String, but allows
5
+ # subsequent parsing of chunks.
6
+ class PushParser
7
+ ##
8
+ # How many bytes to buffer before starting to parse, helps
9
+ # Feedzirra's content detection.
10
+ BUF_MIN_THRESHOLD = 1000
11
+
12
+ ##
13
+ # Just resets instance variables
14
+ def initialize
15
+ @buf = ''
16
+ @parser = nil
17
+ end
18
+
19
+ ##
20
+ # Either buffer up til BUF_MIN_THRESHOLD or, if reached, actually
21
+ # parse a chunk
22
+ def push(chunk)
23
+ if @parser
24
+ @parser.parse(chunk)
25
+ else
26
+ @buf += chunk
27
+ if @buf.size > BUF_MIN_THRESHOLD
28
+ start_parsing
29
+ end
30
+ end
31
+ end
32
+
33
+ ##
34
+ # Really start parsing, if BUF_MIN_THRESHOLD wasn't reached yet,
35
+ # finalize, and return the actual parser/document
36
+ def finish
37
+ # TODO: if we haven't started yet we won't even need a
38
+ # PushParser
39
+ start_parsing unless @parser
40
+
41
+ @parser.parse_finish
42
+ @parser
43
+ end
44
+
45
+ private
46
+
47
+ def start_parsing
48
+ unless klass = Feed::determine_feed_parser_for_xml(@buf)
49
+ raise NoParserAvailable.new("No valid parser for XML.")
50
+ end
51
+ @parser = klass.new
52
+ @parser.parse(@buf)
53
+ @buf = nil
54
+ end
55
+ end
56
+ end
data/lib/feedzirra.rb ADDED
@@ -0,0 +1,37 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ gem 'activesupport'
4
+
5
+ require 'zlib'
6
+ require 'curb'
7
+ require 'sax-machine'
8
+ require 'dryopteris'
9
+ require 'uri'
10
+ require 'active_support/basic_object'
11
+ require 'active_support/core_ext/object'
12
+ require 'active_support/core_ext/time'
13
+
14
+ require 'core_ext/date'
15
+ require 'core_ext/string'
16
+
17
+ require 'feedzirra/feed_utilities'
18
+ require 'feedzirra/feed_entry_utilities'
19
+ require 'feedzirra/feed'
20
+ require 'feedzirra/push_parser'
21
+
22
+ require 'feedzirra/push_parser'
23
+
24
+ require 'feedzirra/parser/rss_entry'
25
+ require 'feedzirra/parser/itunes_rss_owner'
26
+ require 'feedzirra/parser/itunes_rss_item'
27
+ require 'feedzirra/parser/atom_entry'
28
+ require 'feedzirra/parser/atom_feed_burner_entry'
29
+
30
+ require 'feedzirra/parser/rss'
31
+ require 'feedzirra/parser/itunes_rss'
32
+ require 'feedzirra/parser/atom'
33
+ require 'feedzirra/parser/atom_feed_burner'
34
+
35
+ module Feedzirra
36
+ VERSION = "0.0.12"
37
+ end
@@ -0,0 +1,52 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe Feedzirra::FeedUtilities do
4
+ before(:each) do
5
+ @klass = Class.new do
6
+ include Feedzirra::FeedEntryUtilities
7
+ end
8
+ end
9
+
10
+ describe "handling dates" do
11
+ it "should parse an ISO 8601 formatted datetime into Time" do
12
+ time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
13
+ time.class.should == Time
14
+ time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
15
+ end
16
+ end
17
+
18
+ describe "sanitizing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Feed.parse(sample_atom_feed)
21
+ @entry = @feed.entries.first
22
+ end
23
+
24
+ it "should provide a sanitized title" do
25
+ new_title = "<script>" + @entry.title
26
+ @entry.title = new_title
27
+ @entry.title.sanitize.should == Dryopteris.sanitize(new_title)
28
+ end
29
+
30
+ it "should sanitize content in place" do
31
+ new_content = "<script>" + @entry.content
32
+ @entry.content = new_content.dup
33
+ @entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
34
+ @entry.content.should == Dryopteris.sanitize(new_content)
35
+ end
36
+
37
+ it "should sanitize things in place" do
38
+ @entry.title += "<script>"
39
+ @entry.author += "<script>"
40
+ @entry.content += "<script>"
41
+
42
+ cleaned_title = Dryopteris.sanitize(@entry.title)
43
+ cleaned_author = Dryopteris.sanitize(@entry.author)
44
+ cleaned_content = Dryopteris.sanitize(@entry.content)
45
+
46
+ @entry.sanitize!
47
+ @entry.title.should == cleaned_title
48
+ @entry.author.should == cleaned_author
49
+ @entry.content.should == cleaned_content
50
+ end
51
+ end
52
+ end