julien51-feedzirra 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +194 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +317 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +51 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +26 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +34 -0
  11. data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
  12. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
  13. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  14. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  15. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  16. data/lib/feedzirra/parser/rss.rb +28 -0
  17. data/lib/feedzirra/parser/rss_entry.rb +40 -0
  18. data/lib/feedzirra/push_parser.rb +56 -0
  19. data/lib/feedzirra.rb +37 -0
  20. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  21. data/spec/feedzirra/feed_spec.rb +551 -0
  22. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  23. data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
  24. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  25. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  26. data/spec/feedzirra/parser/atom_spec.rb +35 -0
  27. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +52 -0
  28. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  29. data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
  30. data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
  31. data/spec/feedzirra/parser/rss_spec.rb +41 -0
  32. data/spec/feedzirra/push_parser_spec.rb +16 -0
  33. data/spec/spec.opts +2 -0
  34. data/spec/spec_helper.rb +58 -0
  35. metadata +145 -0
@@ -0,0 +1,40 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RDF feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class RSSEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url
20
+
21
+ element :"dc:creator", :as => :author
22
+ element :"content:encoded", :as => :content
23
+ element :description, :as => :summary
24
+
25
+ element :pubDate, :as => :published
26
+ element :"dc:date", :as => :published
27
+ element :"dc:Date", :as => :published
28
+ element :"dcterms:created", :as => :published
29
+
30
+
31
+ element :"dcterms:modified", :as => :updated
32
+ element :issued, :as => :published
33
+ elements :category, :as => :categories
34
+
35
+ element :guid, :as => :id
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,56 @@
1
+ module Feedzirra
2
+ ##
3
+ # Contrary to Feedzirra::Feed, Feedzirra::PushParser doesn't expect
4
+ # the whole document to be given in one String, but allows
5
+ # subsequent parsing of chunks.
6
+ class PushParser
7
+ ##
8
+ # How many bytes to buffer before starting to parse, helps
9
+ # Feedzirra's content detection.
10
+ BUF_MIN_THRESHOLD = 1000
11
+
12
+ ##
13
+ # Just resets instance variables
14
+ def initialize
15
+ @buf = ''
16
+ @parser = nil
17
+ end
18
+
19
+ ##
20
+ # Either buffer up til BUF_MIN_THRESHOLD or, if reached, actually
21
+ # parse a chunk
22
+ def push(chunk)
23
+ if @parser
24
+ @parser.parse(chunk)
25
+ else
26
+ @buf += chunk
27
+ if @buf.size > BUF_MIN_THRESHOLD
28
+ start_parsing
29
+ end
30
+ end
31
+ end
32
+
33
+ ##
34
+ # Really start parsing, if BUF_MIN_THRESHOLD wasn't reached yet,
35
+ # finalize, and return the actual parser/document
36
+ def finish
37
+ # TODO: if we haven't started yet we won't even need a
38
+ # PushParser
39
+ start_parsing unless @parser
40
+
41
+ @parser.parse_finish
42
+ @parser
43
+ end
44
+
45
+ private
46
+
47
+ def start_parsing
48
+ unless klass = Feed::determine_feed_parser_for_xml(@buf)
49
+ raise NoParserAvailable.new("No valid parser for XML.")
50
+ end
51
+ @parser = klass.new
52
+ @parser.parse(@buf)
53
+ @buf = nil
54
+ end
55
+ end
56
+ end
data/lib/feedzirra.rb ADDED
@@ -0,0 +1,37 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ gem 'activesupport'
4
+
5
+ require 'zlib'
6
+ require 'curb'
7
+ require 'sax-machine'
8
+ require 'dryopteris'
9
+ require 'uri'
10
+ require 'active_support/basic_object'
11
+ require 'active_support/core_ext/object'
12
+ require 'active_support/core_ext/time'
13
+
14
+ require 'core_ext/date'
15
+ require 'core_ext/string'
16
+
17
+ require 'feedzirra/feed_utilities'
18
+ require 'feedzirra/feed_entry_utilities'
19
+ require 'feedzirra/feed'
20
+ require 'feedzirra/push_parser'
21
+
22
+ require 'feedzirra/push_parser'
23
+
24
+ require 'feedzirra/parser/rss_entry'
25
+ require 'feedzirra/parser/itunes_rss_owner'
26
+ require 'feedzirra/parser/itunes_rss_item'
27
+ require 'feedzirra/parser/atom_entry'
28
+ require 'feedzirra/parser/atom_feed_burner_entry'
29
+
30
+ require 'feedzirra/parser/rss'
31
+ require 'feedzirra/parser/itunes_rss'
32
+ require 'feedzirra/parser/atom'
33
+ require 'feedzirra/parser/atom_feed_burner'
34
+
35
+ module Feedzirra
36
+ VERSION = "0.0.12"
37
+ end
@@ -0,0 +1,52 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe Feedzirra::FeedUtilities do
4
+ before(:each) do
5
+ @klass = Class.new do
6
+ include Feedzirra::FeedEntryUtilities
7
+ end
8
+ end
9
+
10
+ describe "handling dates" do
11
+ it "should parse an ISO 8601 formatted datetime into Time" do
12
+ time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
13
+ time.class.should == Time
14
+ time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
15
+ end
16
+ end
17
+
18
+ describe "sanitizing" do
19
+ before(:each) do
20
+ @feed = Feedzirra::Feed.parse(sample_atom_feed)
21
+ @entry = @feed.entries.first
22
+ end
23
+
24
+ it "should provide a sanitized title" do
25
+ new_title = "<script>" + @entry.title
26
+ @entry.title = new_title
27
+ @entry.title.sanitize.should == Dryopteris.sanitize(new_title)
28
+ end
29
+
30
+ it "should sanitize content in place" do
31
+ new_content = "<script>" + @entry.content
32
+ @entry.content = new_content.dup
33
+ @entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
34
+ @entry.content.should == Dryopteris.sanitize(new_content)
35
+ end
36
+
37
+ it "should sanitize things in place" do
38
+ @entry.title += "<script>"
39
+ @entry.author += "<script>"
40
+ @entry.content += "<script>"
41
+
42
+ cleaned_title = Dryopteris.sanitize(@entry.title)
43
+ cleaned_author = Dryopteris.sanitize(@entry.author)
44
+ cleaned_content = Dryopteris.sanitize(@entry.content)
45
+
46
+ @entry.sanitize!
47
+ @entry.title.should == cleaned_title
48
+ @entry.author.should == cleaned_author
49
+ @entry.content.should == cleaned_content
50
+ end
51
+ end
52
+ end