Spectives-feedzirra 0.0.28
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +169 -0
- data/README.textile +205 -0
- data/Rakefile +56 -0
- data/lib/core_ext/date.rb +21 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra/feed.rb +334 -0
- data/lib/feedzirra/feed_entry_utilities.rb +45 -0
- data/lib/feedzirra/feed_utilities.rb +71 -0
- data/lib/feedzirra/parser/atom.rb +35 -0
- data/lib/feedzirra/parser/atom_entry.rb +41 -0
- data/lib/feedzirra/parser/itunes_category.rb +12 -0
- data/lib/feedzirra/parser/mrss_category.rb +11 -0
- data/lib/feedzirra/parser/mrss_content.rb +48 -0
- data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
- data/lib/feedzirra/parser/mrss_credit.rb +11 -0
- data/lib/feedzirra/parser/mrss_group.rb +37 -0
- data/lib/feedzirra/parser/mrss_hash.rb +10 -0
- data/lib/feedzirra/parser/mrss_player.rb +11 -0
- data/lib/feedzirra/parser/mrss_rating.rb +10 -0
- data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
- data/lib/feedzirra/parser/mrss_text.rb +13 -0
- data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
- data/lib/feedzirra/parser/rss.rb +83 -0
- data/lib/feedzirra/parser/rss_entry.rb +83 -0
- data/lib/feedzirra/parser/rss_image.rb +15 -0
- data/lib/feedzirra.rb +44 -0
- data/spec/benchmarks/feed_benchmarks.rb +98 -0
- data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
- data/spec/benchmarks/fetching_benchmarks.rb +28 -0
- data/spec/benchmarks/parsing_benchmark.rb +30 -0
- data/spec/benchmarks/updating_benchmarks.rb +33 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +546 -0
- data/spec/feedzirra/feed_utilities_spec.rb +149 -0
- data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
- data/spec/feedzirra/parser/atom_spec.rb +43 -0
- data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
- data/spec/feedzirra/parser/rss_spec.rb +93 -0
- data/spec/sample_feeds/run_against_sample.rb +20 -0
- data/spec/spec_helper.rb +62 -0
- metadata +154 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSPlayer
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:player', :value => :url, :as => :url
|
7
|
+
element :'media:player', :value => :width, :as => :width
|
8
|
+
element :'media:player', :value => :height, :as => :height
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSRestriction
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:restriction', :as => :value
|
7
|
+
element :'media:restriction', :as => :scope, :value => :type
|
8
|
+
element :'media:restriction', :as => :relationship, :value => :relationship
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSText
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:text', :as => :type, :value => :type
|
7
|
+
element :'media:text', :as => :lang, :value => :lang
|
8
|
+
element :'media:text', :as => :start, :value => :start
|
9
|
+
element :'media:text', :as => :end, :value => :end
|
10
|
+
element :'media:text', :as => :text
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSThumbnail
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:thumbnail', :as => :url, :value => :url
|
7
|
+
element :'media:thumbnail', :as => :with, :value => :width
|
8
|
+
element :'media:thumbnail', :as => :height, :value => :height
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/mrss_credit'
|
2
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
3
|
+
require File.dirname(__FILE__) + '/mrss_category'
|
4
|
+
require File.dirname(__FILE__) + '/mrss_copyright'
|
5
|
+
require File.dirname(__FILE__) + '/mrss_hash'
|
6
|
+
require File.dirname(__FILE__) + '/mrss_player'
|
7
|
+
require File.dirname(__FILE__) + '/mrss_rating'
|
8
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
9
|
+
require File.dirname(__FILE__) + '/mrss_text'
|
10
|
+
require File.dirname(__FILE__) + '/mrss_thumbnail'
|
11
|
+
|
12
|
+
module Feedzirra
|
13
|
+
module Parser
|
14
|
+
# == Summary
|
15
|
+
# Parser for dealing with RSS feeds.
|
16
|
+
#
|
17
|
+
class RSS
|
18
|
+
include SAXMachine
|
19
|
+
include FeedUtilities
|
20
|
+
|
21
|
+
attr_accessor :feed_url
|
22
|
+
|
23
|
+
# RSS 2.0 required elements
|
24
|
+
element :title
|
25
|
+
element :link, :as => :url
|
26
|
+
element :description
|
27
|
+
elements :item, :as => :entries, :class => RSSEntry
|
28
|
+
|
29
|
+
# RSS 2.0 optional elements
|
30
|
+
element :language
|
31
|
+
element :copyright
|
32
|
+
element :managingEditor
|
33
|
+
element :webMaster
|
34
|
+
element :pubDate
|
35
|
+
element :lastBuildDate
|
36
|
+
element :category
|
37
|
+
element :generator
|
38
|
+
element :docs
|
39
|
+
element :cloud
|
40
|
+
element :ttl
|
41
|
+
element :image, :class => RSSImage
|
42
|
+
element :rating
|
43
|
+
element :textInput
|
44
|
+
element :skipHours
|
45
|
+
element :skipDays
|
46
|
+
|
47
|
+
# iTunes
|
48
|
+
element :'itunes:author', :as => :author
|
49
|
+
element :'itunes:block', :as => :itunes_block
|
50
|
+
element :'itunes:image', :as => :image, :value => :href
|
51
|
+
element :'itunes:explicit', :as => :explicit
|
52
|
+
element :'itunes:keywords', :as => :keywords
|
53
|
+
element :'itunes:new-feed-url', :as => :feed_url
|
54
|
+
element :'itunes:name', :as => :owner_name
|
55
|
+
element :'itunes:email', :as => :owner_email
|
56
|
+
element :'itunes:subtitle', :as => :subtitle
|
57
|
+
element :'itunes:summary', :as => :summary
|
58
|
+
|
59
|
+
elements :'itunes:category', :as => :categories, :value => :text
|
60
|
+
# elements :'itunes:category', :as => :itunes_categories,
|
61
|
+
# :class => ITunesCategory
|
62
|
+
|
63
|
+
# MediaRSS support
|
64
|
+
element :'media:title', :as => :media_title
|
65
|
+
element :'media:keywords', :as => :media_keywords
|
66
|
+
element :'media:description', :as => :media_description
|
67
|
+
|
68
|
+
element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
|
69
|
+
element :'media:rating', :as => :rating, :class => MRSSRating
|
70
|
+
element :'media:category', :as => :media_category, :class => MRSSCategory
|
71
|
+
element :'media:hash', :as => :media_hash, :class => MRSSHash
|
72
|
+
element :'media:player', :as => :media_player, :class => MRSSPlayer
|
73
|
+
elements :'media:credit', :as => :credits, :class => MRSSCredit
|
74
|
+
element :'media:copyright', :as => :copyright, :class => MRSSCopyright
|
75
|
+
element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
|
76
|
+
element :'media:text', :as => :text, :class => MRSSText
|
77
|
+
|
78
|
+
def self.able_to_parse?(xml) #:nodoc:
|
79
|
+
xml =~ /\<rss|rdf/
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/mrss_content'
|
2
|
+
require File.dirname(__FILE__) + '/mrss_credit'
|
3
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
4
|
+
require File.dirname(__FILE__) + '/mrss_group'
|
5
|
+
require File.dirname(__FILE__) + '/mrss_category'
|
6
|
+
require File.dirname(__FILE__) + '/mrss_copyright'
|
7
|
+
require File.dirname(__FILE__) + '/mrss_hash'
|
8
|
+
require File.dirname(__FILE__) + '/mrss_player'
|
9
|
+
require File.dirname(__FILE__) + '/mrss_rating'
|
10
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
11
|
+
require File.dirname(__FILE__) + '/mrss_text'
|
12
|
+
require File.dirname(__FILE__) + '/mrss_thumbnail'
|
13
|
+
|
14
|
+
module Feedzirra
|
15
|
+
module Parser
|
16
|
+
# == Summary
|
17
|
+
# Parser for dealing with RDF feed entries.
|
18
|
+
#
|
19
|
+
# == Attributes
|
20
|
+
# * title
|
21
|
+
# * url
|
22
|
+
# * author
|
23
|
+
# * content
|
24
|
+
# * summary
|
25
|
+
# * published
|
26
|
+
# * categories
|
27
|
+
class RSSEntry
|
28
|
+
include SAXMachine
|
29
|
+
include FeedEntryUtilities
|
30
|
+
|
31
|
+
# RSS 2.0 elements
|
32
|
+
element :title
|
33
|
+
element :link, :as => :url
|
34
|
+
element :"feedburner:origLink", :as => :url
|
35
|
+
element :description, :as => :summary
|
36
|
+
element :author
|
37
|
+
elements :category, :as => :categories
|
38
|
+
element :comments
|
39
|
+
element :guid, :as => :id
|
40
|
+
element :pubDate, :as => :published
|
41
|
+
element :source
|
42
|
+
element :enclosure, :value => :length, :as => :enclosure_length
|
43
|
+
element :enclosure, :value => :type, :as => :enclosure_type
|
44
|
+
element :enclosure, :value => :url, :as => :enclosure_url
|
45
|
+
|
46
|
+
|
47
|
+
# RDF elements
|
48
|
+
element :"dc:date", :as => :published
|
49
|
+
element :"dc:Date", :as => :published
|
50
|
+
element :"dcterms:created", :as => :published
|
51
|
+
element :issued, :as => :published
|
52
|
+
element :"content:encoded", :as => :content
|
53
|
+
element :"dc:creator", :as => :author
|
54
|
+
element :"dcterms:modified", :as => :updated
|
55
|
+
|
56
|
+
# MediaRSS support, optional elements
|
57
|
+
element :'media:title', :as => :media_title
|
58
|
+
element :'media:keywords', :as => :media_keywords
|
59
|
+
element :'media:description', :as => :media_description
|
60
|
+
|
61
|
+
element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
|
62
|
+
element :'media:rating', :as => :rating, :class => MRSSRating
|
63
|
+
element :'media:category', :as => :media_category, :class => MRSSCategory
|
64
|
+
element :'media:hash', :as => :media_hash, :class => MRSSHash
|
65
|
+
element :'media:player', :as => :media_player, :class => MRSSPlayer
|
66
|
+
elements :'media:credit', :as => :credits, :class => MRSSCredit
|
67
|
+
element :'media:copyright', :as => :copyright, :class => MRSSCopyright
|
68
|
+
element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
|
69
|
+
element :'media:text', :as => :text, :class => MRSSText
|
70
|
+
elements :'media:content', :as => :media_content, :class => MRSSContent
|
71
|
+
elements :'media:group', :as => :media_groups, :class => MRSSGroup
|
72
|
+
|
73
|
+
# iTunes
|
74
|
+
element :'itunes:author', :as => :author
|
75
|
+
element :'itunes:block', :as => :itunes_block
|
76
|
+
element :'itunes:duration', :as => :duration
|
77
|
+
element :'itunes:explicit', :as => :explicit
|
78
|
+
element :'itunes:keywords', :as => :keywords
|
79
|
+
element :'itunes:subtitle', :as => :subtitle
|
80
|
+
element :'itunes:summary', :as => :summary
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/lib/feedzirra.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
gem 'activesupport'
|
4
|
+
|
5
|
+
require 'zlib'
|
6
|
+
require 'curb'
|
7
|
+
require 'sax-machine'
|
8
|
+
require 'dryopteris'
|
9
|
+
require 'uri'
|
10
|
+
require 'active_support/basic_object'
|
11
|
+
require 'active_support/core_ext/object'
|
12
|
+
require 'active_support/core_ext/time'
|
13
|
+
|
14
|
+
|
15
|
+
require 'core_ext/date'
|
16
|
+
require 'core_ext/string'
|
17
|
+
|
18
|
+
require 'feedzirra/feed_utilities'
|
19
|
+
require 'feedzirra/feed_entry_utilities'
|
20
|
+
require 'feedzirra/feed'
|
21
|
+
|
22
|
+
require 'feedzirra/parser/mrss_content'
|
23
|
+
require 'feedzirra/parser/mrss_credit'
|
24
|
+
require 'feedzirra/parser/mrss_restriction'
|
25
|
+
require 'feedzirra/parser/mrss_group'
|
26
|
+
require 'feedzirra/parser/mrss_category'
|
27
|
+
require 'feedzirra/parser/mrss_copyright'
|
28
|
+
require 'feedzirra/parser/mrss_hash'
|
29
|
+
require 'feedzirra/parser/mrss_player'
|
30
|
+
require 'feedzirra/parser/mrss_rating'
|
31
|
+
require 'feedzirra/parser/mrss_restriction'
|
32
|
+
require 'feedzirra/parser/mrss_text'
|
33
|
+
require 'feedzirra/parser/mrss_thumbnail'
|
34
|
+
require 'feedzirra/parser/rss_entry'
|
35
|
+
require 'feedzirra/parser/rss_image'
|
36
|
+
require 'feedzirra/parser/itunes_category'
|
37
|
+
require 'feedzirra/parser/atom_entry'
|
38
|
+
|
39
|
+
require 'feedzirra/parser/rss'
|
40
|
+
require 'feedzirra/parser/atom'
|
41
|
+
|
42
|
+
module Feedzirra
|
43
|
+
VERSION = "0.0.28"
|
44
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# this is some spike code to compare the speed of different methods for performing
|
2
|
+
# multiple feed fetches
|
3
|
+
require 'rubygems'
|
4
|
+
require 'curb'
|
5
|
+
require 'activesupport'
|
6
|
+
|
7
|
+
require 'net/http'
|
8
|
+
require 'uri'
|
9
|
+
|
10
|
+
require 'benchmark'
|
11
|
+
include Benchmark
|
12
|
+
|
13
|
+
GET_COUNT = 1
|
14
|
+
urls = ["http://www.pauldix.net"] * GET_COUNT
|
15
|
+
|
16
|
+
|
17
|
+
benchmark do |t|
|
18
|
+
t.report("taf2-curb") do
|
19
|
+
multi = Curl::Multi.new
|
20
|
+
urls.each do |url|
|
21
|
+
easy = Curl::Easy.new(url) do |curl|
|
22
|
+
curl.headers["User-Agent"] = "feedzirra"
|
23
|
+
# curl.headers["If-Modified-Since"] = Time.now.httpdate
|
24
|
+
# curl.headers["If-None-Match"] = "ziEyTl4q9GH04BR4jgkImd0GvSE"
|
25
|
+
curl.follow_location = true
|
26
|
+
curl.on_success do |c|
|
27
|
+
# puts c.header_str.inspect
|
28
|
+
# puts c.response_code
|
29
|
+
# puts c.body_str.slice(0, 500)
|
30
|
+
end
|
31
|
+
curl.on_failure do |c|
|
32
|
+
puts "**** #{c.response_code}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
multi.add(easy)
|
36
|
+
end
|
37
|
+
|
38
|
+
multi.perform
|
39
|
+
end
|
40
|
+
|
41
|
+
t.report("nethttp") do
|
42
|
+
urls.each do |url|
|
43
|
+
res = Net::HTTP.get(URI.parse(url))
|
44
|
+
# puts res.slice(0, 500)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
require 'rfuzz/session'
|
49
|
+
include RFuzz
|
50
|
+
t.report("rfuzz") do
|
51
|
+
GET_COUNT.times do
|
52
|
+
http = HttpClient.new("www.pauldix.net", 80)
|
53
|
+
response = http.get("/")
|
54
|
+
if response.http_status != "200"
|
55
|
+
puts "***** #{response.http_status}"
|
56
|
+
else
|
57
|
+
# puts response.http_status
|
58
|
+
# puts response.http_body.slice(0, 500)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
require 'eventmachine'
|
64
|
+
t.report("eventmachine") do
|
65
|
+
counter = GET_COUNT
|
66
|
+
EM.run do
|
67
|
+
GET_COUNT.times do
|
68
|
+
http = EM::Protocols::HttpClient2.connect("www.pauldix.net", 80)
|
69
|
+
request = http.get("/")
|
70
|
+
request.callback do
|
71
|
+
# puts request.status
|
72
|
+
# puts request.content.slice(0, 500)
|
73
|
+
counter -= 1
|
74
|
+
EM.stop if counter == 0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
require 'curl-multi'
|
82
|
+
t.report("curl multi") do
|
83
|
+
multi = Curl::Multi.new
|
84
|
+
urls.each do |url|
|
85
|
+
on_failure = lambda do |ex|
|
86
|
+
puts "****** Failed to retrieve #{url}"
|
87
|
+
end
|
88
|
+
|
89
|
+
on_success = lambda do |body|
|
90
|
+
# puts "got #{url}"
|
91
|
+
# puts body.slice(0, 500)
|
92
|
+
end
|
93
|
+
multi.get(url, on_success, on_failure)
|
94
|
+
end
|
95
|
+
|
96
|
+
multi.select([], []) while multi.size > 0
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
2
|
+
require 'rfeedparser'
|
3
|
+
require 'feed-normalizer'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
require 'benchmark'
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
iterations = 10
|
10
|
+
urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
|
11
|
+
puts "benchmarks on #{urls.size} feeds"
|
12
|
+
puts "************************************"
|
13
|
+
benchmark do |t|
|
14
|
+
t.report("feedzirra") do
|
15
|
+
iterations.times do
|
16
|
+
Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
t.report("rfeedparser") do
|
21
|
+
iterations.times do
|
22
|
+
urls.each do |url|
|
23
|
+
feed = FeedParser.parse(url)
|
24
|
+
$stdout.print '.'
|
25
|
+
$stdout.flush
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
t.report("feed-normalizer") do
|
31
|
+
iterations.times do
|
32
|
+
urls.each do |url|
|
33
|
+
# have to use the :force option to make feed-normalizer parse an atom feed
|
34
|
+
feed = FeedNormalizer::FeedNormalizer.parse(open(url), :force_parser => FeedNormalizer::SimpleRssParser)
|
35
|
+
$stdout.print '.'
|
36
|
+
$stdout.flush
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
3
|
+
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
require 'benchmark'
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
iterations = 10
|
10
|
+
urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
|
11
|
+
puts "benchmarks on #{urls.size} feeds"
|
12
|
+
puts "************************************"
|
13
|
+
benchmark do |t|
|
14
|
+
t.report("feedzirra open uri") do
|
15
|
+
iterations.times do
|
16
|
+
urls.each do |url|
|
17
|
+
Feedzirra::Feed.parse(open(url, "User-Agent" => "feedzirra http://github.com/pauldix/feedzirra/tree/master").read)
|
18
|
+
$stdout.print '.'; $stdout.flush
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
t.report("feedzirra fetch and parse") do
|
24
|
+
iterations.times do
|
25
|
+
Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
2
|
+
require 'rfeedparser'
|
3
|
+
require 'feed-normalizer'
|
4
|
+
|
5
|
+
require 'benchmark'
|
6
|
+
include Benchmark
|
7
|
+
|
8
|
+
iterations = 50
|
9
|
+
xml = File.read(File.dirname(__FILE__) + '/../sample_feeds/PaulDixExplainsNothing.xml')
|
10
|
+
|
11
|
+
benchmark do |t|
|
12
|
+
t.report("feedzirra") do
|
13
|
+
iterations.times do
|
14
|
+
Feedzirra::Feed.parse(xml)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
t.report("rfeedparser") do
|
19
|
+
iterations.times do
|
20
|
+
FeedParser.parse(xml)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
t.report("feed-normalizer") do
|
25
|
+
iterations.times do
|
26
|
+
# have to use the :force option to make feed-normalizer parse an atom feed
|
27
|
+
FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
|
3
|
+
|
4
|
+
require 'benchmark'
|
5
|
+
include Benchmark
|
6
|
+
|
7
|
+
urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt")
|
8
|
+
puts "benchmarks on #{urls.size} feeds"
|
9
|
+
puts "************************************"
|
10
|
+
benchmark do |t|
|
11
|
+
feeds = {}
|
12
|
+
t.report("feedzirra fetch and parse") do
|
13
|
+
feeds = Feedzirra::Feed.fetch_and_parse(urls,
|
14
|
+
:on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush },
|
15
|
+
:on_failure => lambda {|url, response_code, header, body| puts "#{response_code} ERROR on #{url}"})
|
16
|
+
end
|
17
|
+
|
18
|
+
# curb caches the dns lookups for 60 seconds. to make things fair we have to wait for the cache to expire
|
19
|
+
puts "sleeping to wait for dns cache to clear"
|
20
|
+
65.times {$stdout.print('.'); sleep(1)}
|
21
|
+
puts "done"
|
22
|
+
|
23
|
+
updated_feeds = []
|
24
|
+
t.report("feedzirra update") do
|
25
|
+
updated_feeds = Feedzirra::Feed.update(feeds.values.reject {|f| f.class == Fixnum},
|
26
|
+
:on_success => lambda {|feed| $stdout.print '.'; $stdout.flush},
|
27
|
+
:on_failure => lambda {|feed, response_code, header, body| puts "#{response_code} ERROR on #{feed.feed_url}"})
|
28
|
+
end
|
29
|
+
|
30
|
+
updated_feeds.each do |feed|
|
31
|
+
puts feed.feed_url if feed.updated?
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe Feedzirra::FeedUtilities do
|
4
|
+
before(:each) do
|
5
|
+
@klass = Class.new do
|
6
|
+
include Feedzirra::FeedEntryUtilities
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "handling dates" do
|
11
|
+
it "should parse an ISO 8601 formatted datetime into Time" do
|
12
|
+
time = @klass.new.parse_datetime("2008-02-20T8:05:00-010:00")
|
13
|
+
time.class.should == Time
|
14
|
+
time.to_s.should == "Wed Feb 20 18:05:00 UTC 2008"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "sanitizing" do
|
19
|
+
before(:each) do
|
20
|
+
@feed = Feedzirra::Feed.parse(sample_atom_feed)
|
21
|
+
@entry = @feed.entries.first
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should provide a sanitized title" do
|
25
|
+
new_title = "<script>" + @entry.title
|
26
|
+
@entry.title = new_title
|
27
|
+
@entry.title.sanitize.should == Dryopteris.sanitize(new_title)
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should sanitize content in place" do
|
31
|
+
new_content = "<script>" + @entry.content
|
32
|
+
@entry.content = new_content.dup
|
33
|
+
@entry.content.sanitize!.should == Dryopteris.sanitize(new_content)
|
34
|
+
@entry.content.should == Dryopteris.sanitize(new_content)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should sanitize things in place" do
|
38
|
+
@entry.title += "<script>"
|
39
|
+
@entry.author += "<script>"
|
40
|
+
@entry.content += "<script>"
|
41
|
+
|
42
|
+
cleaned_title = Dryopteris.sanitize(@entry.title)
|
43
|
+
cleaned_author = Dryopteris.sanitize(@entry.author)
|
44
|
+
cleaned_content = Dryopteris.sanitize(@entry.content)
|
45
|
+
|
46
|
+
@entry.sanitize!
|
47
|
+
@entry.title.should == cleaned_title
|
48
|
+
@entry.author.should == cleaned_author
|
49
|
+
@entry.content.should == cleaned_content
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|