julien51-feedzirra 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +169 -0
- data/README.textile +194 -0
- data/Rakefile +56 -0
- data/lib/core_ext/date.rb +21 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra/feed.rb +317 -0
- data/lib/feedzirra/feed_entry_utilities.rb +51 -0
- data/lib/feedzirra/feed_utilities.rb +71 -0
- data/lib/feedzirra/parser/atom.rb +26 -0
- data/lib/feedzirra/parser/atom_entry.rb +34 -0
- data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
- data/lib/feedzirra/parser/itunes_rss.rb +50 -0
- data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
- data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
- data/lib/feedzirra/parser/rss.rb +28 -0
- data/lib/feedzirra/parser/rss_entry.rb +40 -0
- data/lib/feedzirra/push_parser.rb +56 -0
- data/lib/feedzirra.rb +37 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +551 -0
- data/spec/feedzirra/feed_utilities_spec.rb +149 -0
- data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
- data/spec/feedzirra/parser/atom_spec.rb +35 -0
- data/spec/feedzirra/parser/itunes_rss_item_spec.rb +52 -0
- data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
- data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
- data/spec/feedzirra/parser/rss_spec.rb +41 -0
- data/spec/feedzirra/push_parser_spec.rb +16 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +58 -0
- metadata +145 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# == Summary
|
5
|
+
# Parser for dealing with RDF feed entries.
|
6
|
+
#
|
7
|
+
# == Attributes
|
8
|
+
# * title
|
9
|
+
# * url
|
10
|
+
# * author
|
11
|
+
# * content
|
12
|
+
# * summary
|
13
|
+
# * published
|
14
|
+
# * categories
|
15
|
+
class RSSEntry
|
16
|
+
include SAXMachine
|
17
|
+
include FeedEntryUtilities
|
18
|
+
element :title
|
19
|
+
element :link, :as => :url
|
20
|
+
|
21
|
+
element :"dc:creator", :as => :author
|
22
|
+
element :"content:encoded", :as => :content
|
23
|
+
element :description, :as => :summary
|
24
|
+
|
25
|
+
element :pubDate, :as => :published
|
26
|
+
element :"dc:date", :as => :published
|
27
|
+
element :"dc:Date", :as => :published
|
28
|
+
element :"dcterms:created", :as => :published
|
29
|
+
|
30
|
+
|
31
|
+
element :"dcterms:modified", :as => :updated
|
32
|
+
element :issued, :as => :published
|
33
|
+
elements :category, :as => :categories
|
34
|
+
|
35
|
+
element :guid, :as => :id
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
##
|
3
|
+
# Contrary to Feedzirra::Feed, Feedzirra::PushParser doesn't expect
|
4
|
+
# the whole document to be given in one String, but allows
|
5
|
+
# subsequent parsing of chunks.
|
6
|
+
class PushParser
|
7
|
+
##
|
8
|
+
# How many bytes to buffer before starting to parse, helps
|
9
|
+
# Feedzirra's content detection.
|
10
|
+
BUF_MIN_THRESHOLD = 1000
|
11
|
+
|
12
|
+
##
|
13
|
+
# Just resets instance variables
|
14
|
+
def initialize
|
15
|
+
@buf = ''
|
16
|
+
@parser = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
##
|
20
|
+
# Either buffer up til BUF_MIN_THRESHOLD or, if reached, actually
|
21
|
+
# parse a chunk
|
22
|
+
def push(chunk)
|
23
|
+
if @parser
|
24
|
+
@parser.parse(chunk)
|
25
|
+
else
|
26
|
+
@buf += chunk
|
27
|
+
if @buf.size > BUF_MIN_THRESHOLD
|
28
|
+
start_parsing
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Really start parsing, if BUF_MIN_THRESHOLD wasn't reached yet,
|
35
|
+
# finalize, and return the actual parser/document
|
36
|
+
def finish
|
37
|
+
# TODO: if we haven't started yet we won't even need a
|
38
|
+
# PushParser
|
39
|
+
start_parsing unless @parser
|
40
|
+
|
41
|
+
@parser.parse_finish
|
42
|
+
@parser
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def start_parsing
|
48
|
+
unless klass = Feed::determine_feed_parser_for_xml(@buf)
|
49
|
+
raise NoParserAvailable.new("No valid parser for XML.")
|
50
|
+
end
|
51
|
+
@parser = klass.new
|
52
|
+
@parser.parse(@buf)
|
53
|
+
@buf = nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/feedzirra.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
gem 'activesupport'
|
4
|
+
|
5
|
+
require 'zlib'
|
6
|
+
require 'curb'
|
7
|
+
require 'sax-machine'
|
8
|
+
require 'dryopteris'
|
9
|
+
require 'uri'
|
10
|
+
require 'active_support/basic_object'
|
11
|
+
require 'active_support/core_ext/object'
|
12
|
+
require 'active_support/core_ext/time'
|
13
|
+
|
14
|
+
require 'core_ext/date'
|
15
|
+
require 'core_ext/string'
|
16
|
+
|
17
|
+
require 'feedzirra/feed_utilities'
|
18
|
+
require 'feedzirra/feed_entry_utilities'
|
19
|
+
require 'feedzirra/feed'
|
20
|
+
require 'feedzirra/push_parser'
|
21
|
+
|
22
|
+
require 'feedzirra/push_parser'
|
23
|
+
|
24
|
+
require 'feedzirra/parser/rss_entry'
|
25
|
+
require 'feedzirra/parser/itunes_rss_owner'
|
26
|
+
require 'feedzirra/parser/itunes_rss_item'
|
27
|
+
require 'feedzirra/parser/atom_entry'
|
28
|
+
require 'feedzirra/parser/atom_feed_burner_entry'
|
29
|
+
|
30
|
+
require 'feedzirra/parser/rss'
|
31
|
+
require 'feedzirra/parser/itunes_rss'
|
32
|
+
require 'feedzirra/parser/atom'
|
33
|
+
require 'feedzirra/parser/atom_feed_burner'
|
34
|
+
|
35
|
+
module Feedzirra
|
36
|
+
VERSION = "0.0.12"
|
37
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedzirra::FeedEntryUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "handling dates" do
|
11
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
12
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
|
+
time.class.should == Time
|
14
|
+
time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "sanitizing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::Feed.parse(sample_atom_feed)
|
21
|
+
@entry = @feed.entries.first
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should provide a sanitized title" do
|
25
|
+
new_title = "<script>" + @entry.title
|
26
|
+
@entry.title = new_title
|
27
|
+
@entry.title.sanitize.should == Dryopteris.sanitize(new_title)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should sanitize content in place" do
|
31
|
+
new_content = "<script>" + @entry.content
|
32
|
+
@entry.content = new_content.dup
|
33
|
+
@entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
|
34
|
+
@entry.content.should == Dryopteris.sanitize(new_content)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should sanitize things in place" do
|
38
|
+
@entry.title += "<script>"
|
39
|
+
@entry.author += "<script>"
|
40
|
+
@entry.content += "<script>"
|
41
|
+
|
42
|
+
cleaned_title = Dryopteris.sanitize(@entry.title)
|
43
|
+
cleaned_author = Dryopteris.sanitize(@entry.author)
|
44
|
+
cleaned_content = Dryopteris.sanitize(@entry.content)
|
45
|
+
|
46
|
+
@entry.sanitize!
|
47
|
+
@entry.title.should == cleaned_title
|
48
|
+
@entry.author.should == cleaned_author
|
49
|
+
@entry.content.should == cleaned_content
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|