julien51-feedzirra 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +169 -0
- data/README.textile +194 -0
- data/Rakefile +56 -0
- data/lib/core_ext/date.rb +21 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra/feed.rb +317 -0
- data/lib/feedzirra/feed_entry_utilities.rb +51 -0
- data/lib/feedzirra/feed_utilities.rb +71 -0
- data/lib/feedzirra/parser/atom.rb +26 -0
- data/lib/feedzirra/parser/atom_entry.rb +34 -0
- data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
- data/lib/feedzirra/parser/itunes_rss.rb +50 -0
- data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
- data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
- data/lib/feedzirra/parser/rss.rb +28 -0
- data/lib/feedzirra/parser/rss_entry.rb +40 -0
- data/lib/feedzirra/push_parser.rb +56 -0
- data/lib/feedzirra.rb +37 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +551 -0
- data/spec/feedzirra/feed_utilities_spec.rb +149 -0
- data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
- data/spec/feedzirra/parser/atom_spec.rb +35 -0
- data/spec/feedzirra/parser/itunes_rss_item_spec.rb +52 -0
- data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
- data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
- data/spec/feedzirra/parser/rss_spec.rb +41 -0
- data/spec/feedzirra/push_parser_spec.rb +16 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +58 -0
- metadata +145 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# == Summary
|
5
|
+
# Parser for dealing with RDF feed entries.
|
6
|
+
#
|
7
|
+
# == Attributes
|
8
|
+
# * title
|
9
|
+
# * url
|
10
|
+
# * author
|
11
|
+
# * content
|
12
|
+
# * summary
|
13
|
+
# * published
|
14
|
+
# * categories
|
15
|
+
class RSSEntry
|
16
|
+
include SAXMachine
|
17
|
+
include FeedEntryUtilities
|
18
|
+
element :title
|
19
|
+
element :link, :as => :url
|
20
|
+
|
21
|
+
element :"dc:creator", :as => :author
|
22
|
+
element :"content:encoded", :as => :content
|
23
|
+
element :description, :as => :summary
|
24
|
+
|
25
|
+
element :pubDate, :as => :published
|
26
|
+
element :"dc:date", :as => :published
|
27
|
+
element :"dc:Date", :as => :published
|
28
|
+
element :"dcterms:created", :as => :published
|
29
|
+
|
30
|
+
|
31
|
+
element :"dcterms:modified", :as => :updated
|
32
|
+
element :issued, :as => :published
|
33
|
+
elements :category, :as => :categories
|
34
|
+
|
35
|
+
element :guid, :as => :id
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
##
|
3
|
+
# Contrary to Feedzirra::Feed, Feedzirra::PushParser doesn't expect
|
4
|
+
# the whole document to be given in one String, but allows
|
5
|
+
# subsequent parsing of chunks.
|
6
|
+
class PushParser
|
7
|
+
##
|
8
|
+
# How many bytes to buffer before starting to parse, helps
|
9
|
+
# Feedzirra's content detection.
|
10
|
+
BUF_MIN_THRESHOLD = 1000
|
11
|
+
|
12
|
+
##
|
13
|
+
# Just resets instance variables
|
14
|
+
def initialize
|
15
|
+
@buf = ''
|
16
|
+
@parser = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
##
|
20
|
+
# Either buffer up til BUF_MIN_THRESHOLD or, if reached, actually
|
21
|
+
# parse a chunk
|
22
|
+
def push(chunk)
|
23
|
+
if @parser
|
24
|
+
@parser.parse(chunk)
|
25
|
+
else
|
26
|
+
@buf += chunk
|
27
|
+
if @buf.size > BUF_MIN_THRESHOLD
|
28
|
+
start_parsing
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Really start parsing, if BUF_MIN_THRESHOLD wasn't reached yet,
|
35
|
+
# finalize, and return the actual parser/document
|
36
|
+
def finish
|
37
|
+
# TODO: if we haven't started yet we won't even need a
|
38
|
+
# PushParser
|
39
|
+
start_parsing unless @parser
|
40
|
+
|
41
|
+
@parser.parse_finish
|
42
|
+
@parser
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def start_parsing
|
48
|
+
unless klass = Feed::determine_feed_parser_for_xml(@buf)
|
49
|
+
raise NoParserAvailable.new("No valid parser for XML.")
|
50
|
+
end
|
51
|
+
@parser = klass.new
|
52
|
+
@parser.parse(@buf)
|
53
|
+
@buf = nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/lib/feedzirra.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
gem 'activesupport'
|
4
|
+
|
5
|
+
require 'zlib'
|
6
|
+
require 'curb'
|
7
|
+
require 'sax-machine'
|
8
|
+
require 'dryopteris'
|
9
|
+
require 'uri'
|
10
|
+
require 'active_support/basic_object'
|
11
|
+
require 'active_support/core_ext/object'
|
12
|
+
require 'active_support/core_ext/time'
|
13
|
+
|
14
|
+
require 'core_ext/date'
|
15
|
+
require 'core_ext/string'
|
16
|
+
|
17
|
+
require 'feedzirra/feed_utilities'
|
18
|
+
require 'feedzirra/feed_entry_utilities'
|
19
|
+
require 'feedzirra/feed'
|
20
|
+
require 'feedzirra/push_parser'
|
21
|
+
|
22
|
+
require 'feedzirra/push_parser'
|
23
|
+
|
24
|
+
require 'feedzirra/parser/rss_entry'
|
25
|
+
require 'feedzirra/parser/itunes_rss_owner'
|
26
|
+
require 'feedzirra/parser/itunes_rss_item'
|
27
|
+
require 'feedzirra/parser/atom_entry'
|
28
|
+
require 'feedzirra/parser/atom_feed_burner_entry'
|
29
|
+
|
30
|
+
require 'feedzirra/parser/rss'
|
31
|
+
require 'feedzirra/parser/itunes_rss'
|
32
|
+
require 'feedzirra/parser/atom'
|
33
|
+
require 'feedzirra/parser/atom_feed_burner'
|
34
|
+
|
35
|
+
module Feedzirra
|
36
|
+
VERSION = "0.0.12"
|
37
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedzirra::FeedEntryUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "handling dates" do
|
11
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
12
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
|
+
time.class.should == Time
|
14
|
+
time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "sanitizing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::Feed.parse(sample_atom_feed)
|
21
|
+
@entry = @feed.entries.first
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should provide a sanitized title" do
|
25
|
+
new_title = "<script>" + @entry.title
|
26
|
+
@entry.title = new_title
|
27
|
+
@entry.title.sanitize.should == Dryopteris.sanitize(new_title)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should sanitize content in place" do
|
31
|
+
new_content = "<script>" + @entry.content
|
32
|
+
@entry.content = new_content.dup
|
33
|
+
@entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
|
34
|
+
@entry.content.should == Dryopteris.sanitize(new_content)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should sanitize things in place" do
|
38
|
+
@entry.title += "<script>"
|
39
|
+
@entry.author += "<script>"
|
40
|
+
@entry.content += "<script>"
|
41
|
+
|
42
|
+
cleaned_title = Dryopteris.sanitize(@entry.title)
|
43
|
+
cleaned_author = Dryopteris.sanitize(@entry.author)
|
44
|
+
cleaned_content = Dryopteris.sanitize(@entry.content)
|
45
|
+
|
46
|
+
@entry.sanitize!
|
47
|
+
@entry.title.should == cleaned_title
|
48
|
+
@entry.author.should == cleaned_author
|
49
|
+
@entry.content.should == cleaned_content
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|