logophobia-logophobia-feedzirra 0.0.31
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +169 -0
- data/README.textile +205 -0
- data/Rakefile +56 -0
- data/lib/core_ext/date.rb +21 -0
- data/lib/core_ext/string.rb +9 -0
- data/lib/feedzirra.rb +44 -0
- data/lib/feedzirra/feed.rb +333 -0
- data/lib/feedzirra/feed_entry_utilities.rb +45 -0
- data/lib/feedzirra/feed_utilities.rb +71 -0
- data/lib/feedzirra/parser/atom.rb +35 -0
- data/lib/feedzirra/parser/atom_entry.rb +41 -0
- data/lib/feedzirra/parser/itunes_category.rb +12 -0
- data/lib/feedzirra/parser/mrss_category.rb +11 -0
- data/lib/feedzirra/parser/mrss_content.rb +48 -0
- data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
- data/lib/feedzirra/parser/mrss_credit.rb +11 -0
- data/lib/feedzirra/parser/mrss_group.rb +37 -0
- data/lib/feedzirra/parser/mrss_hash.rb +10 -0
- data/lib/feedzirra/parser/mrss_player.rb +11 -0
- data/lib/feedzirra/parser/mrss_rating.rb +10 -0
- data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
- data/lib/feedzirra/parser/mrss_text.rb +13 -0
- data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
- data/lib/feedzirra/parser/rss.rb +83 -0
- data/lib/feedzirra/parser/rss_entry.rb +83 -0
- data/lib/feedzirra/parser/rss_image.rb +15 -0
- data/spec/benchmarks/feed_benchmarks.rb +98 -0
- data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
- data/spec/benchmarks/fetching_benchmarks.rb +28 -0
- data/spec/benchmarks/parsing_benchmark.rb +30 -0
- data/spec/benchmarks/updating_benchmarks.rb +33 -0
- data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
- data/spec/feedzirra/feed_spec.rb +546 -0
- data/spec/feedzirra/feed_utilities_spec.rb +149 -0
- data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
- data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
- data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
- data/spec/feedzirra/parser/atom_spec.rb +43 -0
- data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
- data/spec/feedzirra/parser/rss_spec.rb +93 -0
- data/spec/sample_feeds/run_against_sample.rb +20 -0
- data/spec/spec_helper.rb +62 -0
- metadata +155 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# == Summary
|
5
|
+
# Parser for dealing with Atom feeds.
|
6
|
+
#
|
7
|
+
# == Attributes
|
8
|
+
# * title
|
9
|
+
# * feed_url
|
10
|
+
# * url
|
11
|
+
# * entries
|
12
|
+
class Atom
|
13
|
+
include SAXMachine
|
14
|
+
include FeedUtilities
|
15
|
+
element :title
|
16
|
+
element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
|
17
|
+
element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
|
18
|
+
elements :link, :as => :links, :value => :href
|
19
|
+
elements :entry, :as => :entries, :class => AtomEntry
|
20
|
+
|
21
|
+
def self.able_to_parse?(xml) #:nodoc:
|
22
|
+
xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
|
23
|
+
end
|
24
|
+
|
25
|
+
def url
|
26
|
+
@url || links.last
|
27
|
+
end
|
28
|
+
|
29
|
+
def feed_url
|
30
|
+
@feed_url || links.first
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
# == Summary
|
5
|
+
# Parser for dealing with Atom feed entries.
|
6
|
+
#
|
7
|
+
# == Attributes
|
8
|
+
# * title
|
9
|
+
# * url
|
10
|
+
# * author
|
11
|
+
# * content
|
12
|
+
# * summary
|
13
|
+
# * published
|
14
|
+
# * categories
|
15
|
+
class AtomEntry
|
16
|
+
include SAXMachine
|
17
|
+
include FeedEntryUtilities
|
18
|
+
element :title
|
19
|
+
element :"feedburner:origLink", :as => :orig_url # stupid feedburner does weird things with some feeds, we need to be able to manually distinguis them
|
20
|
+
element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
|
21
|
+
element :name, :as => :author
|
22
|
+
element :content
|
23
|
+
element :summary
|
24
|
+
element :published
|
25
|
+
element :id
|
26
|
+
element :created, :as => :published
|
27
|
+
element :issued, :as => :published
|
28
|
+
element :updated
|
29
|
+
element :modified, :as => :updated
|
30
|
+
elements :category, :as => :categories, :value => :term
|
31
|
+
elements :link, :as => :links, :value => :href
|
32
|
+
elements :link, :as => :enclosure_links, :value => :href, :with => {:rel => "enclosure"}
|
33
|
+
|
34
|
+
def url
|
35
|
+
@url || links.first
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSCategory
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:category', :as => :category
|
7
|
+
element :'media:category', :value => :scheme, :as => :scheme
|
8
|
+
element :'media:category', :value => :label, :as => :label
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/mrss_credit'
|
2
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
3
|
+
require File.dirname(__FILE__) + '/mrss_category'
|
4
|
+
require File.dirname(__FILE__) + '/mrss_copyright'
|
5
|
+
require File.dirname(__FILE__) + '/mrss_hash'
|
6
|
+
require File.dirname(__FILE__) + '/mrss_player'
|
7
|
+
require File.dirname(__FILE__) + '/mrss_rating'
|
8
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
9
|
+
require File.dirname(__FILE__) + '/mrss_text'
|
10
|
+
require File.dirname(__FILE__) + '/mrss_thumbnail'
|
11
|
+
|
12
|
+
module Feedzirra
|
13
|
+
module Parser
|
14
|
+
class MRSSContent
|
15
|
+
include SAXMachine
|
16
|
+
|
17
|
+
element :'media:content', :as => :url, :value => :url
|
18
|
+
element :'media:content', :as => :content_type, :value => :type
|
19
|
+
element :'media:content', :as => :medium, :value => :medium
|
20
|
+
element :'media:content', :as => :duration, :value => :duration
|
21
|
+
element :'media:content', :as => :isDefault, :value => :isDefault
|
22
|
+
element :'media:content', :as => :expression, :value => :expression
|
23
|
+
element :'media:content', :as => :bitrate, :value => :bitrate
|
24
|
+
element :'media:content', :as => :framerate, :value => :framerate
|
25
|
+
element :'media:content', :as => :samplingrate, :value => :sampling
|
26
|
+
element :'media:content', :as => :channels, :value => :duration
|
27
|
+
element :'media:content', :as => :height, :value => :height
|
28
|
+
element :'media:content', :as => :width, :value => :width
|
29
|
+
element :'media:content', :as => :lang, :value => :lang
|
30
|
+
element :'media:content', :as => :fileSize, :value => :fileSize
|
31
|
+
|
32
|
+
# optional elements
|
33
|
+
element :'media:title', :as => :media_title
|
34
|
+
element :'media:keywords', :as => :media_keywords
|
35
|
+
element :'media:description', :as => :media_description
|
36
|
+
|
37
|
+
element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
|
38
|
+
element :'media:rating', :as => :rating, :class => MRSSRating
|
39
|
+
element :'media:category', :as => :media_category, :class => MRSSCategory
|
40
|
+
element :'media:hash', :as => :media_hash, :class => MRSSHash
|
41
|
+
element :'media:player', :as => :media_player, :class => MRSSPlayer
|
42
|
+
elements :'media:credit', :as => :credits, :class => MRSSCredit
|
43
|
+
element :'media:copyright', :as => :copyright, :class => MRSSCopyright
|
44
|
+
element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
|
45
|
+
element :'media:text', :as => :text, :class => MRSSText
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/mrss_content'
|
2
|
+
require File.dirname(__FILE__) + '/mrss_credit'
|
3
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
4
|
+
require File.dirname(__FILE__) + '/mrss_group'
|
5
|
+
require File.dirname(__FILE__) + '/mrss_category'
|
6
|
+
require File.dirname(__FILE__) + '/mrss_copyright'
|
7
|
+
require File.dirname(__FILE__) + '/mrss_hash'
|
8
|
+
require File.dirname(__FILE__) + '/mrss_player'
|
9
|
+
require File.dirname(__FILE__) + '/mrss_rating'
|
10
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
11
|
+
require File.dirname(__FILE__) + '/mrss_text'
|
12
|
+
require File.dirname(__FILE__) + '/mrss_thumbnail'
|
13
|
+
|
14
|
+
module Feedzirra
|
15
|
+
module Parser
|
16
|
+
class MRSSGroup
|
17
|
+
include SAXMachine
|
18
|
+
|
19
|
+
elements :'media:content', :as => :media_content, :class => MRSSContent
|
20
|
+
|
21
|
+
# optional elements
|
22
|
+
element :'media:title', :as => :media_title
|
23
|
+
element :'media:keywords', :as => :media_keywords
|
24
|
+
element :'media:description', :as => :media_description
|
25
|
+
|
26
|
+
element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
|
27
|
+
element :'media:rating', :as => :rating, :class => MRSSRating
|
28
|
+
element :'media:category', :as => :media_category, :class => MRSSCategory
|
29
|
+
element :'media:hash', :as => :media_hash, :class => MRSSHash
|
30
|
+
element :'media:player', :as => :media_player, :class => MRSSPlayer
|
31
|
+
elements :'media:credit', :as => :credits, :class => MRSSCredit
|
32
|
+
element :'media:copyright', :as => :copyright, :class => MRSSCopyright
|
33
|
+
element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
|
34
|
+
element :'media:text', :as => :text, :class => MRSSText
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSPlayer
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:player', :value => :url, :as => :url
|
7
|
+
element :'media:player', :value => :width, :as => :width
|
8
|
+
element :'media:player', :value => :height, :as => :height
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSRestriction
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:restriction', :as => :value
|
7
|
+
element :'media:restriction', :as => :scope, :value => :type
|
8
|
+
element :'media:restriction', :as => :relationship, :value => :relationship
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSText
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:text', :as => :type, :value => :type
|
7
|
+
element :'media:text', :as => :lang, :value => :lang
|
8
|
+
element :'media:text', :as => :start, :value => :start
|
9
|
+
element :'media:text', :as => :end, :value => :end
|
10
|
+
element :'media:text', :as => :text
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module Feedzirra
|
2
|
+
module Parser
|
3
|
+
class MRSSThumbnail
|
4
|
+
include SAXMachine
|
5
|
+
|
6
|
+
element :'media:thumbnail', :as => :url, :value => :url
|
7
|
+
element :'media:thumbnail', :as => :with, :value => :width
|
8
|
+
element :'media:thumbnail', :as => :height, :value => :height
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/mrss_credit'
|
2
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
3
|
+
require File.dirname(__FILE__) + '/mrss_category'
|
4
|
+
require File.dirname(__FILE__) + '/mrss_copyright'
|
5
|
+
require File.dirname(__FILE__) + '/mrss_hash'
|
6
|
+
require File.dirname(__FILE__) + '/mrss_player'
|
7
|
+
require File.dirname(__FILE__) + '/mrss_rating'
|
8
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
9
|
+
require File.dirname(__FILE__) + '/mrss_text'
|
10
|
+
require File.dirname(__FILE__) + '/mrss_thumbnail'
|
11
|
+
|
12
|
+
module Feedzirra
|
13
|
+
module Parser
|
14
|
+
# == Summary
|
15
|
+
# Parser for dealing with RSS feeds.
|
16
|
+
#
|
17
|
+
class RSS
|
18
|
+
include SAXMachine
|
19
|
+
include FeedUtilities
|
20
|
+
|
21
|
+
attr_accessor :feed_url
|
22
|
+
|
23
|
+
# RSS 2.0 required elements
|
24
|
+
element :title
|
25
|
+
element :link, :as => :url
|
26
|
+
element :description
|
27
|
+
elements :item, :as => :entries, :class => RSSEntry
|
28
|
+
|
29
|
+
# RSS 2.0 optional elements
|
30
|
+
element :language
|
31
|
+
element :copyright
|
32
|
+
element :managingEditor
|
33
|
+
element :webMaster
|
34
|
+
element :pubDate
|
35
|
+
element :lastBuildDate
|
36
|
+
element :category
|
37
|
+
element :generator
|
38
|
+
element :docs
|
39
|
+
element :cloud
|
40
|
+
element :ttl
|
41
|
+
element :image, :class => RSSImage
|
42
|
+
element :rating
|
43
|
+
element :textInput
|
44
|
+
element :skipHours
|
45
|
+
element :skipDays
|
46
|
+
|
47
|
+
# iTunes
|
48
|
+
element :'itunes:author', :as => :author
|
49
|
+
element :'itunes:block', :as => :itunes_block
|
50
|
+
element :'itunes:image', :as => :image, :value => :href
|
51
|
+
element :'itunes:explicit', :as => :explicit
|
52
|
+
element :'itunes:keywords', :as => :keywords
|
53
|
+
element :'itunes:new-feed-url', :as => :feed_url
|
54
|
+
element :'itunes:name', :as => :owner_name
|
55
|
+
element :'itunes:email', :as => :owner_email
|
56
|
+
element :'itunes:subtitle', :as => :subtitle
|
57
|
+
element :'itunes:summary', :as => :summary
|
58
|
+
|
59
|
+
elements :'itunes:category', :as => :categories, :value => :text
|
60
|
+
# elements :'itunes:category', :as => :itunes_categories,
|
61
|
+
# :class => ITunesCategory
|
62
|
+
|
63
|
+
# MediaRSS support
|
64
|
+
element :'media:title', :as => :media_title
|
65
|
+
element :'media:keywords', :as => :media_keywords
|
66
|
+
element :'media:description', :as => :media_description
|
67
|
+
|
68
|
+
element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
|
69
|
+
element :'media:rating', :as => :rating, :class => MRSSRating
|
70
|
+
element :'media:category', :as => :media_category, :class => MRSSCategory
|
71
|
+
element :'media:hash', :as => :media_hash, :class => MRSSHash
|
72
|
+
element :'media:player', :as => :media_player, :class => MRSSPlayer
|
73
|
+
elements :'media:credit', :as => :credits, :class => MRSSCredit
|
74
|
+
element :'media:copyright', :as => :copyright, :class => MRSSCopyright
|
75
|
+
element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
|
76
|
+
element :'media:text', :as => :text, :class => MRSSText
|
77
|
+
|
78
|
+
def self.able_to_parse?(xml) #:nodoc:
|
79
|
+
xml =~ /\<rss|rdf/
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/mrss_content'
|
2
|
+
require File.dirname(__FILE__) + '/mrss_credit'
|
3
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
4
|
+
require File.dirname(__FILE__) + '/mrss_group'
|
5
|
+
require File.dirname(__FILE__) + '/mrss_category'
|
6
|
+
require File.dirname(__FILE__) + '/mrss_copyright'
|
7
|
+
require File.dirname(__FILE__) + '/mrss_hash'
|
8
|
+
require File.dirname(__FILE__) + '/mrss_player'
|
9
|
+
require File.dirname(__FILE__) + '/mrss_rating'
|
10
|
+
require File.dirname(__FILE__) + '/mrss_restriction'
|
11
|
+
require File.dirname(__FILE__) + '/mrss_text'
|
12
|
+
require File.dirname(__FILE__) + '/mrss_thumbnail'
|
13
|
+
|
14
|
+
module Feedzirra
|
15
|
+
module Parser
|
16
|
+
# == Summary
|
17
|
+
# Parser for dealing with RDF feed entries.
|
18
|
+
#
|
19
|
+
# == Attributes
|
20
|
+
# * title
|
21
|
+
# * url
|
22
|
+
# * author
|
23
|
+
# * content
|
24
|
+
# * summary
|
25
|
+
# * published
|
26
|
+
# * categories
|
27
|
+
class RSSEntry
|
28
|
+
include SAXMachine
|
29
|
+
include FeedEntryUtilities
|
30
|
+
|
31
|
+
# RSS 2.0 elements
|
32
|
+
element :title
|
33
|
+
element :"feedburner:origLink", :as => :orig_url # stupid feedburner does weird things with some feeds, we need to be able to manually distinguis them
|
34
|
+
element :link, :as => :url
|
35
|
+
element :description, :as => :summary
|
36
|
+
element :author
|
37
|
+
elements :category, :as => :categories
|
38
|
+
element :comments
|
39
|
+
element :guid, :as => :id
|
40
|
+
element :pubDate, :as => :published
|
41
|
+
element :source
|
42
|
+
element :enclosure, :value => :length, :as => :enclosure_length
|
43
|
+
element :enclosure, :value => :type, :as => :enclosure_type
|
44
|
+
element :enclosure, :value => :url, :as => :enclosure_url
|
45
|
+
|
46
|
+
|
47
|
+
# RDF elements
|
48
|
+
element :"dc:date", :as => :published
|
49
|
+
element :"dc:Date", :as => :published
|
50
|
+
element :"dcterms:created", :as => :published
|
51
|
+
element :issued, :as => :published
|
52
|
+
element :"content:encoded", :as => :content
|
53
|
+
element :"dc:creator", :as => :author
|
54
|
+
element :"dcterms:modified", :as => :updated
|
55
|
+
|
56
|
+
# MediaRSS support, optional elements
|
57
|
+
element :'media:title', :as => :media_title
|
58
|
+
element :'media:keywords', :as => :media_keywords
|
59
|
+
element :'media:description', :as => :media_description
|
60
|
+
|
61
|
+
element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
|
62
|
+
element :'media:rating', :as => :rating, :class => MRSSRating
|
63
|
+
element :'media:category', :as => :media_category, :class => MRSSCategory
|
64
|
+
element :'media:hash', :as => :media_hash, :class => MRSSHash
|
65
|
+
element :'media:player', :as => :media_player, :class => MRSSPlayer
|
66
|
+
elements :'media:credit', :as => :credits, :class => MRSSCredit
|
67
|
+
element :'media:copyright', :as => :copyright, :class => MRSSCopyright
|
68
|
+
element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
|
69
|
+
element :'media:text', :as => :text, :class => MRSSText
|
70
|
+
elements :'media:content', :as => :media_content, :class => MRSSContent
|
71
|
+
elements :'media:group', :as => :media_groups, :class => MRSSGroup
|
72
|
+
|
73
|
+
# iTunes
|
74
|
+
element :'itunes:author', :as => :author
|
75
|
+
element :'itunes:block', :as => :itunes_block
|
76
|
+
element :'itunes:duration', :as => :duration
|
77
|
+
element :'itunes:explicit', :as => :explicit
|
78
|
+
element :'itunes:keywords', :as => :keywords
|
79
|
+
element :'itunes:subtitle', :as => :subtitle
|
80
|
+
element :'itunes:summary', :as => :summary
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|