feedtools 0.2.15 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -0
- data/lib/feed_tools.rb +6 -1
- data/lib/feed_tools/feed.rb +81 -40
- data/lib/feed_tools/feed_item.rb +38 -5
- data/rakefile +1 -1
- data/test/rss_test.rb +50 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== FeedTools 0.2.16
|
2
|
+
* atom feed generation now defaults to atom 1.0
|
3
|
+
* removed erroneous published element at atom feed-level
|
4
|
+
* fixed some incompatibilities with ruby 1.8.3
|
5
|
+
* better support for universal subscription mechanism within feeds
|
6
|
+
* now handles both atom 0.3 and 1.0 namespaces
|
7
|
+
* fixed timestamping of feed items
|
1
8
|
== FeedTools 0.2.15
|
2
9
|
* changed so that tidied elements no longer handle entities differently
|
3
10
|
* tidy now assumes utf-8 encoding instead of ascii
|
data/lib/feed_tools.rb
CHANGED
@@ -32,13 +32,14 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
32
32
|
ENV['RAILS_ENV'] ||
|
33
33
|
'production' # :nodoc:
|
34
34
|
|
35
|
-
FEED_TOOLS_VERSION = "0.2.
|
35
|
+
FEED_TOOLS_VERSION = "0.2.16"
|
36
36
|
|
37
37
|
FEED_TOOLS_NAMESPACES = {
|
38
38
|
"admin" => "http://webns.net/mvcb/",
|
39
39
|
"ag" => "http://purl.org/rss/1.0/modules/aggregation/",
|
40
40
|
"annotate" => "http://purl.org/rss/1.0/modules/annotate/",
|
41
41
|
"atom" => "http://www.w3.org/2005/Atom",
|
42
|
+
"atom03" => "http://purl.org/atom/ns#",
|
42
43
|
"audio" => "http://media.tangent.org/rss/1.0/",
|
43
44
|
"blogChannel" => "http://backend.userland.com/blogChannelModule",
|
44
45
|
"cc" => "http://web.resource.org/cc/",
|
@@ -81,6 +82,9 @@ FEED_TOOLS_NAMESPACES = {
|
|
81
82
|
"xml" => "http://www.w3.org/XML/1998/namespace"
|
82
83
|
}
|
83
84
|
|
85
|
+
FEED_TOOLS_NAMESPACES_WITH_OLD_ATOM = FEED_TOOLS_NAMESPACES.clone
|
86
|
+
FEED_TOOLS_NAMESPACES_WITH_OLD_ATOM['atom'] = "http://purl.org/atom/ns#"
|
87
|
+
|
84
88
|
$:.unshift(File.dirname(__FILE__))
|
85
89
|
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
86
90
|
|
@@ -508,6 +512,7 @@ module FeedTools
|
|
508
512
|
end
|
509
513
|
tidy_html.gsub!(/&/, "&")
|
510
514
|
tidy_html.gsub!(/&/, "&")
|
515
|
+
tidy_html.gsub!(/\320\262\320\202\342\204\242/, "\342\200\231")
|
511
516
|
else
|
512
517
|
tidy_html = html
|
513
518
|
end
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -185,7 +185,12 @@ module FeedTools
|
|
185
185
|
end
|
186
186
|
|
187
187
|
# Find out what method we're going to be using to obtain this feed.
|
188
|
-
|
188
|
+
begin
|
189
|
+
uri = URI.parse(self.url)
|
190
|
+
rescue URI::InvalidURIError
|
191
|
+
raise FeedAccessError,
|
192
|
+
"Cannot retrieve feed using invalid URL: " + self.url.to_s
|
193
|
+
end
|
189
194
|
retrieval_method = "http"
|
190
195
|
case uri.scheme
|
191
196
|
when "http"
|
@@ -352,8 +357,8 @@ module FeedTools
|
|
352
357
|
end
|
353
358
|
unless @http_response.kind_of? Net::HTTPRedirection
|
354
359
|
@http_headers = {}
|
355
|
-
self.http_response.each_header do |
|
356
|
-
self.http_headers[
|
360
|
+
self.http_response.each_header do |key, value|
|
361
|
+
self.http_headers[key.downcase] = value
|
357
362
|
end
|
358
363
|
self.last_retrieved = Time.now
|
359
364
|
self.feed_data = self.http_response.body
|
@@ -392,8 +397,8 @@ module FeedTools
|
|
392
397
|
end
|
393
398
|
if @http_response != nil
|
394
399
|
@http_headers = {}
|
395
|
-
self.http_response.each_header do |
|
396
|
-
self.http_headers[
|
400
|
+
self.http_response.each_header do |key, value|
|
401
|
+
self.http_headers[key.downcase] = value
|
397
402
|
end
|
398
403
|
if self.http_response.code.to_i == 304
|
399
404
|
self.last_retrieved = Time.now
|
@@ -680,6 +685,10 @@ module FeedTools
|
|
680
685
|
if @id.nil?
|
681
686
|
unless channel_node.nil?
|
682
687
|
@id = XPath.first(channel_node, "id/text()").to_s
|
688
|
+
if @id == ""
|
689
|
+
@id = XPath.first(channel_node, "atom:id/text()",
|
690
|
+
FEED_TOOLS_NAMESPACES).to_s
|
691
|
+
end
|
683
692
|
if @id == ""
|
684
693
|
@id = XPath.first(channel_node, "guid/text()").to_s
|
685
694
|
end
|
@@ -688,6 +697,10 @@ module FeedTools
|
|
688
697
|
if @id == "" || @id.nil?
|
689
698
|
@id = XPath.first(root_node, "id/text()").to_s
|
690
699
|
end
|
700
|
+
if @id == ""
|
701
|
+
@id = XPath.first(root_node, "atom:id/text()",
|
702
|
+
FEED_TOOLS_NAMESPACES).to_s
|
703
|
+
end
|
691
704
|
if @id == ""
|
692
705
|
@id = XPath.first(root_node, "guid/text()").to_s
|
693
706
|
end
|
@@ -704,9 +717,66 @@ module FeedTools
|
|
704
717
|
|
705
718
|
# Returns the feed url.
|
706
719
|
def url
|
707
|
-
|
720
|
+
original_url = @url
|
721
|
+
override_url = lambda do
|
722
|
+
begin
|
723
|
+
if @url == nil && self.feed_data != nil
|
724
|
+
true
|
725
|
+
elsif @url != nil &&
|
726
|
+
!(["http", "https"].include?(URI.parse(@url).scheme))
|
727
|
+
if self.feed_data != nil
|
728
|
+
true
|
729
|
+
else
|
730
|
+
false
|
731
|
+
end
|
732
|
+
else
|
733
|
+
false
|
734
|
+
end
|
735
|
+
rescue
|
736
|
+
true
|
737
|
+
end
|
738
|
+
end
|
739
|
+
if override_url.call
|
708
740
|
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
709
741
|
@url = nil if @url == ""
|
742
|
+
if override_url.call
|
743
|
+
@url = XPath.first(channel_node, "atom:link[@rel='self']/@href").to_s
|
744
|
+
@url = nil if @url == ""
|
745
|
+
end
|
746
|
+
if override_url.call
|
747
|
+
@url = XPath.first(channel_node, "atom:link[@rel='self']/@href",
|
748
|
+
FEED_TOOLS_NAMESPACES).to_s
|
749
|
+
@url = nil if @url == ""
|
750
|
+
end
|
751
|
+
if override_url.call
|
752
|
+
@url = XPath.first(channel_node, "atom:link[@rel='self']/@href",
|
753
|
+
FEED_TOOLS_NAMESPACES_WITH_OLD_ATOM).to_s
|
754
|
+
@url = nil if @url == ""
|
755
|
+
end
|
756
|
+
if override_url.call
|
757
|
+
@url = XPath.first(channel_node, "admin:feed/@rdf:resource").to_s
|
758
|
+
@url = nil if @url == ""
|
759
|
+
end
|
760
|
+
if override_url.call
|
761
|
+
@url = XPath.first(channel_node, "admin:feed/@rdf:resource",
|
762
|
+
FEED_TOOLS_NAMESPACES).to_s
|
763
|
+
@url = nil if @url == ""
|
764
|
+
end
|
765
|
+
if override_url.call
|
766
|
+
@url = XPath.first(channel_node, "admin:feed/@resource").to_s
|
767
|
+
@url = nil if @url == ""
|
768
|
+
end
|
769
|
+
if override_url.call
|
770
|
+
@url = XPath.first(channel_node, "feed/@rdf:resource").to_s
|
771
|
+
@url = nil if @url == ""
|
772
|
+
end
|
773
|
+
if override_url.call
|
774
|
+
@url = XPath.first(channel_node, "feed/@resource").to_s
|
775
|
+
@url = nil if @url == ""
|
776
|
+
end
|
777
|
+
if @url == nil
|
778
|
+
@url = original_url
|
779
|
+
end
|
710
780
|
end
|
711
781
|
return @url
|
712
782
|
end
|
@@ -1213,7 +1283,7 @@ module FeedTools
|
|
1213
1283
|
return @itunes_author
|
1214
1284
|
end
|
1215
1285
|
|
1216
|
-
# Returns the feed
|
1286
|
+
# Returns the feed time
|
1217
1287
|
def time
|
1218
1288
|
if @time.nil?
|
1219
1289
|
unless channel_node.nil?
|
@@ -1233,11 +1303,8 @@ module FeedTools
|
|
1233
1303
|
end
|
1234
1304
|
begin
|
1235
1305
|
if time_string != nil && time_string != ""
|
1236
|
-
@time = Time.parse(time_string)
|
1237
|
-
|
1238
|
-
@time = self.succ_time
|
1239
|
-
end
|
1240
|
-
if @time == nil
|
1306
|
+
@time = Time.parse(time_string)
|
1307
|
+
else
|
1241
1308
|
@time = Time.now
|
1242
1309
|
end
|
1243
1310
|
rescue
|
@@ -1252,29 +1319,6 @@ module FeedTools
|
|
1252
1319
|
@time = new_time
|
1253
1320
|
end
|
1254
1321
|
|
1255
|
-
# Returns 1 second after the previous item's time.
|
1256
|
-
def succ_time #:nodoc:
|
1257
|
-
begin
|
1258
|
-
if feed.nil?
|
1259
|
-
return nil
|
1260
|
-
end
|
1261
|
-
feed.items
|
1262
|
-
unsorted_items = feed.instance_variable_get("@items")
|
1263
|
-
item_index = unsorted_items.index(self)
|
1264
|
-
if item_index.nil?
|
1265
|
-
return nil
|
1266
|
-
end
|
1267
|
-
if item_index <= 0
|
1268
|
-
return Time.now
|
1269
|
-
end
|
1270
|
-
previous_item = unsorted_items[item_index - 1]
|
1271
|
-
return previous_item.time.succ
|
1272
|
-
rescue
|
1273
|
-
return nil
|
1274
|
-
end
|
1275
|
-
end
|
1276
|
-
private :succ_time
|
1277
|
-
|
1278
1322
|
# Returns the feed item updated time
|
1279
1323
|
def updated
|
1280
1324
|
if @updated.nil?
|
@@ -1729,7 +1773,7 @@ module FeedTools
|
|
1729
1773
|
# create the individual feed items
|
1730
1774
|
@items = []
|
1731
1775
|
if raw_items != nil
|
1732
|
-
for item_node in raw_items
|
1776
|
+
for item_node in raw_items.reverse
|
1733
1777
|
new_item = FeedItem.new
|
1734
1778
|
new_item.feed_data = item_node.to_s
|
1735
1779
|
new_item.feed_data_type = self.feed_data_type
|
@@ -1816,7 +1860,7 @@ module FeedTools
|
|
1816
1860
|
if feed_type == "rss" && (version == nil || version == 0.0)
|
1817
1861
|
version = 1.0
|
1818
1862
|
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1819
|
-
version = 0
|
1863
|
+
version = 1.0
|
1820
1864
|
end
|
1821
1865
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
|
1822
1866
|
version == 1.1)
|
@@ -2030,9 +2074,6 @@ module FeedTools
|
|
2030
2074
|
else
|
2031
2075
|
xml_builder.updated(Time.now.iso8601)
|
2032
2076
|
end
|
2033
|
-
unless self.published.nil?
|
2034
|
-
xml_builder.published(self.published.iso8601)
|
2035
|
-
end
|
2036
2077
|
xml_builder.generator("FeedTools - " +
|
2037
2078
|
"http://www.sporkmonger.com/projects/feedtools")
|
2038
2079
|
if self.id != nil
|
data/lib/feed_tools/feed_item.rb
CHANGED
@@ -1266,9 +1266,17 @@ module FeedTools
|
|
1266
1266
|
time_string = XPath.first(root_node, "time/text()").to_s
|
1267
1267
|
end
|
1268
1268
|
end
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1269
|
+
begin
|
1270
|
+
time_string = "" if time_string.nil?
|
1271
|
+
if time_string != ""
|
1272
|
+
@time = Time.parse(time_string)
|
1273
|
+
else
|
1274
|
+
@time = succ_time
|
1275
|
+
end
|
1276
|
+
rescue
|
1277
|
+
@time = succ_time
|
1278
|
+
end
|
1279
|
+
if @time.nil?
|
1272
1280
|
@time = Time.now
|
1273
1281
|
end
|
1274
1282
|
end
|
@@ -1279,6 +1287,31 @@ module FeedTools
|
|
1279
1287
|
def time=(new_time)
|
1280
1288
|
@time = new_time
|
1281
1289
|
end
|
1290
|
+
|
1291
|
+
# Returns 1 second after the previous item's time.
|
1292
|
+
def succ_time #:nodoc:
|
1293
|
+
begin
|
1294
|
+
if feed.nil?
|
1295
|
+
return nil
|
1296
|
+
end
|
1297
|
+
if feed.instance_variable_get("@items").nil?
|
1298
|
+
feed.items
|
1299
|
+
end
|
1300
|
+
unsorted_items = feed.instance_variable_get("@items")
|
1301
|
+
item_index = unsorted_items.index(self)
|
1302
|
+
if item_index.nil?
|
1303
|
+
return nil
|
1304
|
+
end
|
1305
|
+
if item_index <= 0
|
1306
|
+
return Time.now
|
1307
|
+
end
|
1308
|
+
previous_item = unsorted_items[item_index - 1]
|
1309
|
+
return previous_item.time.succ
|
1310
|
+
rescue
|
1311
|
+
return nil
|
1312
|
+
end
|
1313
|
+
end
|
1314
|
+
private :succ_time
|
1282
1315
|
|
1283
1316
|
# Returns the feed item updated time
|
1284
1317
|
def updated
|
@@ -1462,7 +1495,7 @@ module FeedTools
|
|
1462
1495
|
if feed_type == "rss" && (version == nil || version == 0.0)
|
1463
1496
|
version = 1.0
|
1464
1497
|
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1465
|
-
version = 0
|
1498
|
+
version = 1.0
|
1466
1499
|
end
|
1467
1500
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1468
1501
|
# RDF-based rss format
|
@@ -1663,4 +1696,4 @@ module FeedTools
|
|
1663
1696
|
"LINK:#{self.link}>"
|
1664
1697
|
end
|
1665
1698
|
end
|
1666
|
-
end
|
1699
|
+
end
|
data/rakefile
CHANGED
data/test/rss_test.rb
CHANGED
@@ -581,9 +581,58 @@ class RssTest < Test::Unit::TestCase
|
|
581
581
|
</content:encoded>
|
582
582
|
</item>
|
583
583
|
</channel>
|
584
|
-
</
|
584
|
+
</rss>
|
585
585
|
FEED
|
586
586
|
assert_equal("Test Feed Title", feed.items.first.title)
|
587
587
|
assert_equal("Test Feed Content", feed.items.first.content)
|
588
588
|
end
|
589
|
+
|
590
|
+
def test_item_order
|
591
|
+
feed = FeedTools::Feed.new
|
592
|
+
feed.feed_data = <<-FEED
|
593
|
+
<rss>
|
594
|
+
<channel>
|
595
|
+
<item>
|
596
|
+
<title>Item 1</title>
|
597
|
+
</item>
|
598
|
+
<item>
|
599
|
+
<title>Item 2</title>
|
600
|
+
</item>
|
601
|
+
<item>
|
602
|
+
<title>Item 3</title>
|
603
|
+
</item>
|
604
|
+
</channel>
|
605
|
+
</rss>
|
606
|
+
FEED
|
607
|
+
assert_equal("Item 1", feed.items[0].title)
|
608
|
+
assert_equal("Item 2", feed.items[1].title)
|
609
|
+
assert_equal("Item 3", feed.items[2].title)
|
610
|
+
end
|
611
|
+
|
612
|
+
def test_usm
|
613
|
+
feed = FeedTools::Feed.new
|
614
|
+
feed.feed_data = <<-FEED
|
615
|
+
<rss xmlns:atom="http://www.w3.org/2005/Atom">
|
616
|
+
<channel>
|
617
|
+
<atom:link href="http://nowhere.com/feed.xml" rel="self" />
|
618
|
+
<title>Feed Title</title>
|
619
|
+
</channel>
|
620
|
+
</rss>
|
621
|
+
FEED
|
622
|
+
assert_equal("http://nowhere.com/feed.xml", feed.url)
|
623
|
+
assert_equal("Feed Title", feed.title)
|
624
|
+
|
625
|
+
feed = FeedTools::Feed.new
|
626
|
+
feed.feed_data = <<-FEED
|
627
|
+
<rss xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
628
|
+
xmlns:admin="http://webns.net/mvcb/">
|
629
|
+
<channel>
|
630
|
+
<admin:feed rdf:resource="http://nowhere.com/feed.xml" />
|
631
|
+
<title>Feed Title</title>
|
632
|
+
</channel>
|
633
|
+
</rss>
|
634
|
+
FEED
|
635
|
+
assert_equal("http://nowhere.com/feed.xml", feed.url)
|
636
|
+
assert_equal("Feed Title", feed.title)
|
637
|
+
end
|
589
638
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.2.16
|
7
|
+
date: 2005-10-18 00:00:00 -04:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|