feedzirra 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/Gemfile +0 -14
  4. data/README.md +2 -241
  5. data/feedzirra.gemspec +2 -8
  6. data/lib/feedzirra.rb +2 -15
  7. data/lib/feedzirra/version.rb +1 -1
  8. metadata +7 -182
  9. data/.rspec +0 -1
  10. data/.travis.yml +0 -8
  11. data/Guardfile +0 -5
  12. data/Rakefile +0 -6
  13. data/benchmarks/README.md +0 -90
  14. data/benchmarks/basic.rb +0 -31
  15. data/benchmarks/feed_list.txt +0 -10
  16. data/benchmarks/feed_xml/apple.xml +0 -149
  17. data/benchmarks/feed_xml/cnn.xml +0 -278
  18. data/benchmarks/feed_xml/daring_fireball.xml +0 -1697
  19. data/benchmarks/feed_xml/engadget.xml +0 -604
  20. data/benchmarks/feed_xml/feedzirra_commits.xml +0 -370
  21. data/benchmarks/feed_xml/gizmodo.xml +0 -2
  22. data/benchmarks/feed_xml/loop.xml +0 -441
  23. data/benchmarks/feed_xml/rails.xml +0 -1938
  24. data/benchmarks/feed_xml/white_house.xml +0 -951
  25. data/benchmarks/feed_xml/xkcd.xml +0 -2
  26. data/benchmarks/fetching_systems.rb +0 -23
  27. data/benchmarks/other_libraries.rb +0 -73
  28. data/lib/feedzirra/core_ext.rb +0 -3
  29. data/lib/feedzirra/core_ext/date.rb +0 -19
  30. data/lib/feedzirra/core_ext/string.rb +0 -9
  31. data/lib/feedzirra/core_ext/time.rb +0 -31
  32. data/lib/feedzirra/feed.rb +0 -459
  33. data/lib/feedzirra/feed_entry_utilities.rb +0 -66
  34. data/lib/feedzirra/feed_utilities.rb +0 -103
  35. data/lib/feedzirra/parser.rb +0 -20
  36. data/lib/feedzirra/parser/atom.rb +0 -61
  37. data/lib/feedzirra/parser/atom_entry.rb +0 -34
  38. data/lib/feedzirra/parser/atom_feed_burner.rb +0 -22
  39. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +0 -35
  40. data/lib/feedzirra/parser/google_docs_atom.rb +0 -28
  41. data/lib/feedzirra/parser/google_docs_atom_entry.rb +0 -29
  42. data/lib/feedzirra/parser/itunes_rss.rb +0 -50
  43. data/lib/feedzirra/parser/itunes_rss_item.rb +0 -41
  44. data/lib/feedzirra/parser/itunes_rss_owner.rb +0 -12
  45. data/lib/feedzirra/parser/rss.rb +0 -24
  46. data/lib/feedzirra/parser/rss_entry.rb +0 -37
  47. data/lib/feedzirra/parser/rss_feed_burner.rb +0 -23
  48. data/lib/feedzirra/parser/rss_feed_burner_entry.rb +0 -43
  49. data/spec/feedzirra/feed_entry_utilities_spec.rb +0 -62
  50. data/spec/feedzirra/feed_spec.rb +0 -762
  51. data/spec/feedzirra/feed_utilities_spec.rb +0 -273
  52. data/spec/feedzirra/parser/atom_entry_spec.rb +0 -86
  53. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +0 -47
  54. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +0 -56
  55. data/spec/feedzirra/parser/atom_spec.rb +0 -76
  56. data/spec/feedzirra/parser/google_docs_atom_entry_spec.rb +0 -22
  57. data/spec/feedzirra/parser/google_docs_atom_spec.rb +0 -31
  58. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +0 -63
  59. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +0 -18
  60. data/spec/feedzirra/parser/itunes_rss_spec.rb +0 -58
  61. data/spec/feedzirra/parser/rss_entry_spec.rb +0 -85
  62. data/spec/feedzirra/parser/rss_feed_burner_entry_spec.rb +0 -85
  63. data/spec/feedzirra/parser/rss_feed_burner_spec.rb +0 -57
  64. data/spec/feedzirra/parser/rss_spec.rb +0 -57
  65. data/spec/sample_feeds/AmazonWebServicesBlog.xml +0 -797
  66. data/spec/sample_feeds/AmazonWebServicesBlogFirstEntryContent.xml +0 -63
  67. data/spec/sample_feeds/AtomFeedWithSpacesAroundEquals.xml +0 -61
  68. data/spec/sample_feeds/FeedBurnerUrlNoAlternate.xml +0 -28
  69. data/spec/sample_feeds/GoogleDocsList.xml +0 -188
  70. data/spec/sample_feeds/HREFConsideredHarmful.xml +0 -314
  71. data/spec/sample_feeds/HREFConsideredHarmfulFirstEntry.xml +0 -22
  72. data/spec/sample_feeds/ITunesWithSpacesInAttributes.xml +0 -63
  73. data/spec/sample_feeds/PaulDixExplainsNothing.xml +0 -175
  74. data/spec/sample_feeds/PaulDixExplainsNothingAlternate.xml +0 -175
  75. data/spec/sample_feeds/PaulDixExplainsNothingFirstEntryContent.xml +0 -19
  76. data/spec/sample_feeds/PaulDixExplainsNothingWFW.xml +0 -174
  77. data/spec/sample_feeds/SamRuby.xml +0 -583
  78. data/spec/sample_feeds/TechCrunch.xml +0 -1515
  79. data/spec/sample_feeds/TechCrunchFirstEntry.xml +0 -9
  80. data/spec/sample_feeds/TechCrunchFirstEntryDescription.xml +0 -3
  81. data/spec/sample_feeds/TenderLovemaking.xml +0 -516
  82. data/spec/sample_feeds/TenderLovemakingFirstEntry.xml +0 -66
  83. data/spec/sample_feeds/TrotterCashionHome.xml +0 -611
  84. data/spec/sample_feeds/TypePadNews.xml +0 -368
  85. data/spec/sample_feeds/atom_with_link_tag_for_url_unmarked.xml +0 -31
  86. data/spec/sample_feeds/itunes.xml +0 -67
  87. data/spec/sample_feeds/pet_atom.xml +0 -497
  88. data/spec/spec_helper.rb +0 -88
@@ -1,66 +0,0 @@
1
- module Feedzirra
2
- module FeedEntryUtilities
3
-
4
- include Enumerable
5
-
6
- def published
7
- @published ||= @updated
8
- end
9
-
10
- def parse_datetime(string)
11
- begin
12
- DateTime.parse(string).feed_utils_to_gm_time
13
- rescue
14
- warn "Failed to parse date #{string.inspect}"
15
- nil
16
- end
17
- end
18
-
19
- ##
20
- # Returns the id of the entry or its url if not id is present, as some formats don't support it
21
- def id
22
- @entry_id ||= @url
23
- end
24
-
25
- ##
26
- # Writer for published. By default, we keep the "oldest" publish time found.
27
- def published=(val)
28
- parsed = parse_datetime(val)
29
- @published = parsed if !@published || parsed < @published
30
- end
31
-
32
- ##
33
- # Writer for updated. By default, we keep the most recent update time found.
34
- def updated=(val)
35
- parsed = parse_datetime(val)
36
- @updated = parsed if !@updated || parsed > @updated
37
- end
38
-
39
- def sanitize!
40
- %w[title author summary content image].each do |name|
41
- if self.respond_to?(name) && self.send(name).respond_to?(:sanitize!)
42
- self.send(name).send :sanitize!
43
- end
44
- end
45
- end
46
-
47
- alias_method :last_modified, :published
48
-
49
- def each
50
- @rss_fields ||= self.instance_variables
51
-
52
- @rss_fields.each do |field|
53
- yield(field.to_s.sub('@', ''), self.instance_variable_get(field))
54
- end
55
- end
56
-
57
- def [](field)
58
- self.instance_variable_get("@#{field.to_s}")
59
- end
60
-
61
- def []=(field, value)
62
- self.instance_variable_set("@#{field.to_s}", value)
63
- end
64
-
65
- end
66
- end
@@ -1,103 +0,0 @@
1
- module Feedzirra
2
- module FeedUtilities
3
- UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified etag)
4
-
5
- attr_writer :new_entries, :updated, :last_modified
6
- attr_accessor :etag
7
-
8
- def self.included(base)
9
- base.extend ClassMethods
10
- end
11
-
12
- module ClassMethods
13
- def parse(xml, &block)
14
- xml = preprocess(xml) if preprocess_xml
15
- super xml.lstrip, &block
16
- end
17
-
18
- def preprocess(xml)
19
- # noop
20
- xml
21
- end
22
-
23
- def preprocess_xml=(value)
24
- @preprocess_xml = value
25
- end
26
-
27
- def preprocess_xml
28
- @preprocess_xml
29
- end
30
- end
31
-
32
- def last_modified
33
- @last_modified ||= begin
34
- entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
35
- entry ? entry.published : nil
36
- end
37
- end
38
-
39
- def updated?
40
- @updated || false
41
- end
42
-
43
- def new_entries
44
- @new_entries ||= []
45
- end
46
-
47
- def has_new_entries?
48
- new_entries.size > 0
49
- end
50
-
51
- def update_from_feed(feed)
52
- self.new_entries += find_new_entries_for(feed)
53
- self.entries.unshift(*self.new_entries)
54
-
55
- @updated = false
56
-
57
- UPDATABLE_ATTRIBUTES.each do |name|
58
- @updated ||= update_attribute(feed, name)
59
- end
60
- end
61
-
62
- def update_attribute(feed, name)
63
- old_value, new_value = send(name), feed.send(name)
64
-
65
- if old_value != new_value
66
- send("#{name}=", new_value)
67
- true
68
- else
69
- false
70
- end
71
- end
72
-
73
- def sanitize_entries!
74
- entries.each {|entry| entry.sanitize!}
75
- end
76
-
77
- private
78
-
79
- def find_new_entries_for(feed)
80
- # this implementation is a hack, which is why it's so ugly.
81
- # it's to get around the fact that not all feeds have a published date.
82
- # however, they're always ordered with the newest one first.
83
- # So we go through the entries just parsed and insert each one as a new entry
84
- # until we get to one that has the same id as the the newest for the feed
85
- return feed.entries if self.entries.length == 0
86
- latest_entry = self.entries.first
87
- found_new_entries = []
88
- feed.entries.each do |entry|
89
- if entry.entry_id.nil? && latest_entry.entry_id.nil?
90
- break if entry.url == latest_entry.url
91
- else
92
- break if entry.entry_id == latest_entry.entry_id || entry.url == latest_entry.url
93
- end
94
- found_new_entries << entry
95
- end
96
- found_new_entries
97
- end
98
-
99
- def existing_entry?(test_entry)
100
- entries.any? { |entry| entry.id == test_entry.id }
101
- end
102
- end
103
- end
@@ -1,20 +0,0 @@
1
- module Feedzirra
2
- module Parser
3
- autoload :RSS, 'feedzirra/parser/rss'
4
- autoload :RSSEntry, 'feedzirra/parser/rss_entry'
5
- autoload :RSSFeedBurner, 'feedzirra/parser/rss_feed_burner'
6
- autoload :RSSFeedBurnerEntry, 'feedzirra/parser/rss_feed_burner_entry'
7
-
8
- autoload :ITunesRSS, 'feedzirra/parser/itunes_rss'
9
- autoload :ITunesRSSItem, 'feedzirra/parser/itunes_rss_item'
10
- autoload :ITunesRSSOwner, 'feedzirra/parser/itunes_rss_owner'
11
-
12
- autoload :GoogleDocsAtom, 'feedzirra/parser/google_docs_atom'
13
- autoload :GoogleDocsAtomEntry, 'feedzirra/parser/google_docs_atom_entry'
14
-
15
- autoload :Atom, 'feedzirra/parser/atom'
16
- autoload :AtomEntry, 'feedzirra/parser/atom_entry'
17
- autoload :AtomFeedBurner, 'feedzirra/parser/atom_feed_burner'
18
- autoload :AtomFeedBurnerEntry, 'feedzirra/parser/atom_feed_burner_entry'
19
- end
20
- end
@@ -1,61 +0,0 @@
1
- module Feedzirra
2
- module Parser
3
- # Parser for dealing with Atom feeds.
4
- class Atom
5
- include SAXMachine
6
- include FeedUtilities
7
- element :title
8
- element :subtitle, :as => :description
9
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
10
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
11
- elements :link, :as => :links, :value => :href
12
- elements :link, :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomEntry
14
-
15
- def self.able_to_parse?(xml) #:nodoc:
16
- /\<feed[^\>]+xmlns\s?=\s?[\"|\'](http:\/\/www\.w3\.org\/2005\/Atom|http:\/\/purl\.org\/atom\/ns\#)[\"|\'][^\>]*\>/ =~ xml
17
- end
18
-
19
- def url
20
- @url || links.last
21
- end
22
-
23
- def feed_url
24
- @feed_url ||= links.first
25
- end
26
-
27
- def self.preprocess(xml)
28
- Preprocessor.new(xml).to_xml
29
- end
30
-
31
- class Preprocessor
32
- def initialize(xml)
33
- @xml = xml
34
- end
35
-
36
- def to_xml
37
- process_nodes
38
- doc.to_xml
39
- end
40
-
41
- private
42
-
43
- def process_nodes
44
- nodes.each { |node| node.content = raw_html(node) unless node.cdata? }
45
- end
46
-
47
- def nodes
48
- doc.search 'entry > content[type="xhtml"]'
49
- end
50
-
51
- def raw_html(node)
52
- CGI.unescape_html node.inner_html
53
- end
54
-
55
- def doc
56
- @doc ||= Nokogiri::XML @xml
57
- end
58
- end
59
- end
60
- end
61
- end
@@ -1,34 +0,0 @@
1
- module Feedzirra
2
-
3
- module Parser
4
- # Parser for dealing with Atom feed entries.
5
- class AtomEntry
6
- include SAXMachine
7
- include FeedEntryUtilities
8
-
9
- element :title
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
11
- element :name, :as => :author
12
- element :content
13
- element :summary
14
-
15
- element :"media:content", :as => :image, :value => :url
16
- element :enclosure, :as => :image, :value => :href
17
-
18
- element :published
19
- element :id, :as => :entry_id
20
- element :created, :as => :published
21
- element :issued, :as => :published
22
- element :updated
23
- element :modified, :as => :updated
24
- elements :category, :as => :categories, :value => :term
25
- elements :link, :as => :links, :value => :href
26
-
27
- def url
28
- @url ||= links.first
29
- end
30
- end
31
-
32
- end
33
-
34
- end
@@ -1,22 +0,0 @@
1
- module Feedzirra
2
-
3
- module Parser
4
- # Parser for dealing with Feedburner Atom feeds.
5
- class AtomFeedBurner
6
- include SAXMachine
7
- include FeedUtilities
8
- element :title
9
- element :subtitle, :as => :description
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
- elements :"atom10:link", :as => :hubs, :value => :href, :with => {:rel => "hub"}
13
- elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
14
-
15
- def self.able_to_parse?(xml) #:nodoc:
16
- ((/Atom/ =~ xml) && (/feedburner/ =~ xml) && !(/\<rss|\<rdf/ =~ xml)) || false
17
- end
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,35 +0,0 @@
1
- module Feedzirra
2
-
3
- module Parser
4
- # Parser for dealing with Feedburner Atom feed entries.
5
- class AtomFeedBurnerEntry
6
- include SAXMachine
7
- include FeedEntryUtilities
8
-
9
- element :title
10
- element :name, :as => :author
11
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
12
- element :"feedburner:origLink", :as => :url
13
- element :summary
14
- element :content
15
-
16
- element :"media:content", :as => :image, :value => :url
17
- element :enclosure, :as => :image, :value => :href
18
-
19
- element :published
20
- element :id, :as => :entry_id
21
- element :issued, :as => :published
22
- element :created, :as => :published
23
- element :updated
24
- element :modified, :as => :updated
25
- elements :category, :as => :categories, :value => :term
26
- elements :link, :as => :links, :value => :href
27
-
28
- def url
29
- @url ||= links.first
30
- end
31
-
32
- end
33
- end
34
-
35
- end
@@ -1,28 +0,0 @@
1
- require File.expand_path('./atom', File.dirname(__FILE__))
2
-
3
- module Feedzirra
4
- module Parser
5
- class GoogleDocsAtom
6
- include SAXMachine
7
- include FeedUtilities
8
- element :title
9
- element :subtitle, :as => :description
10
- element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
11
- element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
12
- elements :link, :as => :links, :value => :href
13
- elements :entry, :as => :entries, :class => GoogleDocsAtomEntry
14
-
15
- def url
16
- @url ||= links.first
17
- end
18
-
19
- def self.able_to_parse?(xml) #:nodoc:
20
- %r{<id>https?://docs.google.com/.*\</id\>} =~ xml
21
- end
22
-
23
- def feed_url
24
- @feed_url ||= links.first
25
- end
26
- end
27
- end
28
- end
@@ -1,29 +0,0 @@
1
- module Feedzirra
2
- module Parser
3
- class GoogleDocsAtomEntry
4
- include SAXMachine
5
- include FeedEntryUtilities
6
-
7
- element :title
8
- element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
9
- element :name, :as => :author
10
- element :content
11
- element :summary
12
- element :published
13
- element :id, :as => :entry_id
14
- element :created, :as => :published
15
- element :issued, :as => :published
16
- element :updated
17
- element :modified, :as => :updated
18
- elements :category, :as => :categories, :value => :term
19
- elements :link, :as => :links, :value => :href
20
- element :"docs:md5Checksum", :as => :checksum
21
- element :"docs:filename", :as => :original_filename
22
- element :"docs:suggestedFilename", :as => :suggested_filename
23
-
24
- def url
25
- @url ||= links.first
26
- end
27
- end
28
- end
29
- end
@@ -1,50 +0,0 @@
1
- module Feedzirra
2
-
3
- module Parser
4
- # iTunes is RSS 2.0 + some apple extensions
5
- # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
- class ITunesRSS
7
- include SAXMachine
8
- include FeedUtilities
9
-
10
- attr_accessor :feed_url
11
-
12
- # RSS 2.0 elements that need including
13
- element :copyright
14
- element :description
15
- element :language
16
- element :managingEditor
17
- element :title
18
- element :link, :as => :url
19
-
20
- # If author is not present use managingEditor on the channel
21
- element :"itunes:author", :as => :itunes_author
22
- element :"itunes:block", :as => :itunes_block
23
- element :"itunes:image", :value => :href, :as => :itunes_image
24
- element :"itunes:explicit", :as => :itunes_explicit
25
- element :"itunes:keywords", :as => :itunes_keywords
26
- # New URL for the podcast feed
27
- element :"itunes:new-feed-url", :as => :itunes_new_feed_url
28
- element :"itunes:subtitle", :as => :itunes_subtitle
29
- # If summary is not present, use the description tag
30
- element :"itunes:summary", :as => :itunes_summary
31
-
32
- # iTunes RSS feeds can have multiple main categories...
33
- # ...and multiple sub-categories per category
34
- # TODO subcategories not supported correctly - they are at the same level
35
- # as the main categories
36
- elements :"itunes:category", :as => :itunes_categories, :value => :text
37
-
38
- elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
39
-
40
- elements :item, :as => :entries, :class => ITunesRSSItem
41
-
42
- def self.able_to_parse?(xml)
43
- /xmlns:itunes\s?=\s?\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/i =~ xml
44
- end
45
-
46
- end
47
-
48
- end
49
-
50
- end