feedtools 0.2.15 → 0.2.16
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -0
- data/lib/feed_tools.rb +6 -1
- data/lib/feed_tools/feed.rb +81 -40
- data/lib/feed_tools/feed_item.rb +38 -5
- data/rakefile +1 -1
- data/test/rss_test.rb +50 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
== FeedTools 0.2.16
|
2
|
+
* atom feed generation now defaults to atom 1.0
|
3
|
+
* removed erroneous published element at atom feed-level
|
4
|
+
* fixed some incompatibilities with ruby 1.8.3
|
5
|
+
* better support for universal subscription mechanism within feeds
|
6
|
+
* now handles both atom 0.3 and 1.0 namespaces
|
7
|
+
* fixed timestamping of feed items
|
1
8
|
== FeedTools 0.2.15
|
2
9
|
* changed so that tidied elements no longer handle entities differently
|
3
10
|
* tidy now assumes utf-8 encoding instead of ascii
|
data/lib/feed_tools.rb
CHANGED
@@ -32,13 +32,14 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
32
32
|
ENV['RAILS_ENV'] ||
|
33
33
|
'production' # :nodoc:
|
34
34
|
|
35
|
-
FEED_TOOLS_VERSION = "0.2.
|
35
|
+
FEED_TOOLS_VERSION = "0.2.16"
|
36
36
|
|
37
37
|
FEED_TOOLS_NAMESPACES = {
|
38
38
|
"admin" => "http://webns.net/mvcb/",
|
39
39
|
"ag" => "http://purl.org/rss/1.0/modules/aggregation/",
|
40
40
|
"annotate" => "http://purl.org/rss/1.0/modules/annotate/",
|
41
41
|
"atom" => "http://www.w3.org/2005/Atom",
|
42
|
+
"atom03" => "http://purl.org/atom/ns#",
|
42
43
|
"audio" => "http://media.tangent.org/rss/1.0/",
|
43
44
|
"blogChannel" => "http://backend.userland.com/blogChannelModule",
|
44
45
|
"cc" => "http://web.resource.org/cc/",
|
@@ -81,6 +82,9 @@ FEED_TOOLS_NAMESPACES = {
|
|
81
82
|
"xml" => "http://www.w3.org/XML/1998/namespace"
|
82
83
|
}
|
83
84
|
|
85
|
+
FEED_TOOLS_NAMESPACES_WITH_OLD_ATOM = FEED_TOOLS_NAMESPACES.clone
|
86
|
+
FEED_TOOLS_NAMESPACES_WITH_OLD_ATOM['atom'] = "http://purl.org/atom/ns#"
|
87
|
+
|
84
88
|
$:.unshift(File.dirname(__FILE__))
|
85
89
|
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
86
90
|
|
@@ -508,6 +512,7 @@ module FeedTools
|
|
508
512
|
end
|
509
513
|
tidy_html.gsub!(/&/, "&")
|
510
514
|
tidy_html.gsub!(/&/, "&")
|
515
|
+
tidy_html.gsub!(/\320\262\320\202\342\204\242/, "\342\200\231")
|
511
516
|
else
|
512
517
|
tidy_html = html
|
513
518
|
end
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -185,7 +185,12 @@ module FeedTools
|
|
185
185
|
end
|
186
186
|
|
187
187
|
# Find out what method we're going to be using to obtain this feed.
|
188
|
-
|
188
|
+
begin
|
189
|
+
uri = URI.parse(self.url)
|
190
|
+
rescue URI::InvalidURIError
|
191
|
+
raise FeedAccessError,
|
192
|
+
"Cannot retrieve feed using invalid URL: " + self.url.to_s
|
193
|
+
end
|
189
194
|
retrieval_method = "http"
|
190
195
|
case uri.scheme
|
191
196
|
when "http"
|
@@ -352,8 +357,8 @@ module FeedTools
|
|
352
357
|
end
|
353
358
|
unless @http_response.kind_of? Net::HTTPRedirection
|
354
359
|
@http_headers = {}
|
355
|
-
self.http_response.each_header do |
|
356
|
-
self.http_headers[
|
360
|
+
self.http_response.each_header do |key, value|
|
361
|
+
self.http_headers[key.downcase] = value
|
357
362
|
end
|
358
363
|
self.last_retrieved = Time.now
|
359
364
|
self.feed_data = self.http_response.body
|
@@ -392,8 +397,8 @@ module FeedTools
|
|
392
397
|
end
|
393
398
|
if @http_response != nil
|
394
399
|
@http_headers = {}
|
395
|
-
self.http_response.each_header do |
|
396
|
-
self.http_headers[
|
400
|
+
self.http_response.each_header do |key, value|
|
401
|
+
self.http_headers[key.downcase] = value
|
397
402
|
end
|
398
403
|
if self.http_response.code.to_i == 304
|
399
404
|
self.last_retrieved = Time.now
|
@@ -680,6 +685,10 @@ module FeedTools
|
|
680
685
|
if @id.nil?
|
681
686
|
unless channel_node.nil?
|
682
687
|
@id = XPath.first(channel_node, "id/text()").to_s
|
688
|
+
if @id == ""
|
689
|
+
@id = XPath.first(channel_node, "atom:id/text()",
|
690
|
+
FEED_TOOLS_NAMESPACES).to_s
|
691
|
+
end
|
683
692
|
if @id == ""
|
684
693
|
@id = XPath.first(channel_node, "guid/text()").to_s
|
685
694
|
end
|
@@ -688,6 +697,10 @@ module FeedTools
|
|
688
697
|
if @id == "" || @id.nil?
|
689
698
|
@id = XPath.first(root_node, "id/text()").to_s
|
690
699
|
end
|
700
|
+
if @id == ""
|
701
|
+
@id = XPath.first(root_node, "atom:id/text()",
|
702
|
+
FEED_TOOLS_NAMESPACES).to_s
|
703
|
+
end
|
691
704
|
if @id == ""
|
692
705
|
@id = XPath.first(root_node, "guid/text()").to_s
|
693
706
|
end
|
@@ -704,9 +717,66 @@ module FeedTools
|
|
704
717
|
|
705
718
|
# Returns the feed url.
|
706
719
|
def url
|
707
|
-
|
720
|
+
original_url = @url
|
721
|
+
override_url = lambda do
|
722
|
+
begin
|
723
|
+
if @url == nil && self.feed_data != nil
|
724
|
+
true
|
725
|
+
elsif @url != nil &&
|
726
|
+
!(["http", "https"].include?(URI.parse(@url).scheme))
|
727
|
+
if self.feed_data != nil
|
728
|
+
true
|
729
|
+
else
|
730
|
+
false
|
731
|
+
end
|
732
|
+
else
|
733
|
+
false
|
734
|
+
end
|
735
|
+
rescue
|
736
|
+
true
|
737
|
+
end
|
738
|
+
end
|
739
|
+
if override_url.call
|
708
740
|
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
709
741
|
@url = nil if @url == ""
|
742
|
+
if override_url.call
|
743
|
+
@url = XPath.first(channel_node, "atom:link[@rel='self']/@href").to_s
|
744
|
+
@url = nil if @url == ""
|
745
|
+
end
|
746
|
+
if override_url.call
|
747
|
+
@url = XPath.first(channel_node, "atom:link[@rel='self']/@href",
|
748
|
+
FEED_TOOLS_NAMESPACES).to_s
|
749
|
+
@url = nil if @url == ""
|
750
|
+
end
|
751
|
+
if override_url.call
|
752
|
+
@url = XPath.first(channel_node, "atom:link[@rel='self']/@href",
|
753
|
+
FEED_TOOLS_NAMESPACES_WITH_OLD_ATOM).to_s
|
754
|
+
@url = nil if @url == ""
|
755
|
+
end
|
756
|
+
if override_url.call
|
757
|
+
@url = XPath.first(channel_node, "admin:feed/@rdf:resource").to_s
|
758
|
+
@url = nil if @url == ""
|
759
|
+
end
|
760
|
+
if override_url.call
|
761
|
+
@url = XPath.first(channel_node, "admin:feed/@rdf:resource",
|
762
|
+
FEED_TOOLS_NAMESPACES).to_s
|
763
|
+
@url = nil if @url == ""
|
764
|
+
end
|
765
|
+
if override_url.call
|
766
|
+
@url = XPath.first(channel_node, "admin:feed/@resource").to_s
|
767
|
+
@url = nil if @url == ""
|
768
|
+
end
|
769
|
+
if override_url.call
|
770
|
+
@url = XPath.first(channel_node, "feed/@rdf:resource").to_s
|
771
|
+
@url = nil if @url == ""
|
772
|
+
end
|
773
|
+
if override_url.call
|
774
|
+
@url = XPath.first(channel_node, "feed/@resource").to_s
|
775
|
+
@url = nil if @url == ""
|
776
|
+
end
|
777
|
+
if @url == nil
|
778
|
+
@url = original_url
|
779
|
+
end
|
710
780
|
end
|
711
781
|
return @url
|
712
782
|
end
|
@@ -1213,7 +1283,7 @@ module FeedTools
|
|
1213
1283
|
return @itunes_author
|
1214
1284
|
end
|
1215
1285
|
|
1216
|
-
# Returns the feed
|
1286
|
+
# Returns the feed time
|
1217
1287
|
def time
|
1218
1288
|
if @time.nil?
|
1219
1289
|
unless channel_node.nil?
|
@@ -1233,11 +1303,8 @@ module FeedTools
|
|
1233
1303
|
end
|
1234
1304
|
begin
|
1235
1305
|
if time_string != nil && time_string != ""
|
1236
|
-
@time = Time.parse(time_string)
|
1237
|
-
|
1238
|
-
@time = self.succ_time
|
1239
|
-
end
|
1240
|
-
if @time == nil
|
1306
|
+
@time = Time.parse(time_string)
|
1307
|
+
else
|
1241
1308
|
@time = Time.now
|
1242
1309
|
end
|
1243
1310
|
rescue
|
@@ -1252,29 +1319,6 @@ module FeedTools
|
|
1252
1319
|
@time = new_time
|
1253
1320
|
end
|
1254
1321
|
|
1255
|
-
# Returns 1 second after the previous item's time.
|
1256
|
-
def succ_time #:nodoc:
|
1257
|
-
begin
|
1258
|
-
if feed.nil?
|
1259
|
-
return nil
|
1260
|
-
end
|
1261
|
-
feed.items
|
1262
|
-
unsorted_items = feed.instance_variable_get("@items")
|
1263
|
-
item_index = unsorted_items.index(self)
|
1264
|
-
if item_index.nil?
|
1265
|
-
return nil
|
1266
|
-
end
|
1267
|
-
if item_index <= 0
|
1268
|
-
return Time.now
|
1269
|
-
end
|
1270
|
-
previous_item = unsorted_items[item_index - 1]
|
1271
|
-
return previous_item.time.succ
|
1272
|
-
rescue
|
1273
|
-
return nil
|
1274
|
-
end
|
1275
|
-
end
|
1276
|
-
private :succ_time
|
1277
|
-
|
1278
1322
|
# Returns the feed item updated time
|
1279
1323
|
def updated
|
1280
1324
|
if @updated.nil?
|
@@ -1729,7 +1773,7 @@ module FeedTools
|
|
1729
1773
|
# create the individual feed items
|
1730
1774
|
@items = []
|
1731
1775
|
if raw_items != nil
|
1732
|
-
for item_node in raw_items
|
1776
|
+
for item_node in raw_items.reverse
|
1733
1777
|
new_item = FeedItem.new
|
1734
1778
|
new_item.feed_data = item_node.to_s
|
1735
1779
|
new_item.feed_data_type = self.feed_data_type
|
@@ -1816,7 +1860,7 @@ module FeedTools
|
|
1816
1860
|
if feed_type == "rss" && (version == nil || version == 0.0)
|
1817
1861
|
version = 1.0
|
1818
1862
|
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1819
|
-
version = 0
|
1863
|
+
version = 1.0
|
1820
1864
|
end
|
1821
1865
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
|
1822
1866
|
version == 1.1)
|
@@ -2030,9 +2074,6 @@ module FeedTools
|
|
2030
2074
|
else
|
2031
2075
|
xml_builder.updated(Time.now.iso8601)
|
2032
2076
|
end
|
2033
|
-
unless self.published.nil?
|
2034
|
-
xml_builder.published(self.published.iso8601)
|
2035
|
-
end
|
2036
2077
|
xml_builder.generator("FeedTools - " +
|
2037
2078
|
"http://www.sporkmonger.com/projects/feedtools")
|
2038
2079
|
if self.id != nil
|
data/lib/feed_tools/feed_item.rb
CHANGED
@@ -1266,9 +1266,17 @@ module FeedTools
|
|
1266
1266
|
time_string = XPath.first(root_node, "time/text()").to_s
|
1267
1267
|
end
|
1268
1268
|
end
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1269
|
+
begin
|
1270
|
+
time_string = "" if time_string.nil?
|
1271
|
+
if time_string != ""
|
1272
|
+
@time = Time.parse(time_string)
|
1273
|
+
else
|
1274
|
+
@time = succ_time
|
1275
|
+
end
|
1276
|
+
rescue
|
1277
|
+
@time = succ_time
|
1278
|
+
end
|
1279
|
+
if @time.nil?
|
1272
1280
|
@time = Time.now
|
1273
1281
|
end
|
1274
1282
|
end
|
@@ -1279,6 +1287,31 @@ module FeedTools
|
|
1279
1287
|
def time=(new_time)
|
1280
1288
|
@time = new_time
|
1281
1289
|
end
|
1290
|
+
|
1291
|
+
# Returns 1 second after the previous item's time.
|
1292
|
+
def succ_time #:nodoc:
|
1293
|
+
begin
|
1294
|
+
if feed.nil?
|
1295
|
+
return nil
|
1296
|
+
end
|
1297
|
+
if feed.instance_variable_get("@items").nil?
|
1298
|
+
feed.items
|
1299
|
+
end
|
1300
|
+
unsorted_items = feed.instance_variable_get("@items")
|
1301
|
+
item_index = unsorted_items.index(self)
|
1302
|
+
if item_index.nil?
|
1303
|
+
return nil
|
1304
|
+
end
|
1305
|
+
if item_index <= 0
|
1306
|
+
return Time.now
|
1307
|
+
end
|
1308
|
+
previous_item = unsorted_items[item_index - 1]
|
1309
|
+
return previous_item.time.succ
|
1310
|
+
rescue
|
1311
|
+
return nil
|
1312
|
+
end
|
1313
|
+
end
|
1314
|
+
private :succ_time
|
1282
1315
|
|
1283
1316
|
# Returns the feed item updated time
|
1284
1317
|
def updated
|
@@ -1462,7 +1495,7 @@ module FeedTools
|
|
1462
1495
|
if feed_type == "rss" && (version == nil || version == 0.0)
|
1463
1496
|
version = 1.0
|
1464
1497
|
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1465
|
-
version = 0
|
1498
|
+
version = 1.0
|
1466
1499
|
end
|
1467
1500
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1468
1501
|
# RDF-based rss format
|
@@ -1663,4 +1696,4 @@ module FeedTools
|
|
1663
1696
|
"LINK:#{self.link}>"
|
1664
1697
|
end
|
1665
1698
|
end
|
1666
|
-
end
|
1699
|
+
end
|
data/rakefile
CHANGED
data/test/rss_test.rb
CHANGED
@@ -581,9 +581,58 @@ class RssTest < Test::Unit::TestCase
|
|
581
581
|
</content:encoded>
|
582
582
|
</item>
|
583
583
|
</channel>
|
584
|
-
</
|
584
|
+
</rss>
|
585
585
|
FEED
|
586
586
|
assert_equal("Test Feed Title", feed.items.first.title)
|
587
587
|
assert_equal("Test Feed Content", feed.items.first.content)
|
588
588
|
end
|
589
|
+
|
590
|
+
def test_item_order
|
591
|
+
feed = FeedTools::Feed.new
|
592
|
+
feed.feed_data = <<-FEED
|
593
|
+
<rss>
|
594
|
+
<channel>
|
595
|
+
<item>
|
596
|
+
<title>Item 1</title>
|
597
|
+
</item>
|
598
|
+
<item>
|
599
|
+
<title>Item 2</title>
|
600
|
+
</item>
|
601
|
+
<item>
|
602
|
+
<title>Item 3</title>
|
603
|
+
</item>
|
604
|
+
</channel>
|
605
|
+
</rss>
|
606
|
+
FEED
|
607
|
+
assert_equal("Item 1", feed.items[0].title)
|
608
|
+
assert_equal("Item 2", feed.items[1].title)
|
609
|
+
assert_equal("Item 3", feed.items[2].title)
|
610
|
+
end
|
611
|
+
|
612
|
+
def test_usm
|
613
|
+
feed = FeedTools::Feed.new
|
614
|
+
feed.feed_data = <<-FEED
|
615
|
+
<rss xmlns:atom="http://www.w3.org/2005/Atom">
|
616
|
+
<channel>
|
617
|
+
<atom:link href="http://nowhere.com/feed.xml" rel="self" />
|
618
|
+
<title>Feed Title</title>
|
619
|
+
</channel>
|
620
|
+
</rss>
|
621
|
+
FEED
|
622
|
+
assert_equal("http://nowhere.com/feed.xml", feed.url)
|
623
|
+
assert_equal("Feed Title", feed.title)
|
624
|
+
|
625
|
+
feed = FeedTools::Feed.new
|
626
|
+
feed.feed_data = <<-FEED
|
627
|
+
<rss xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
628
|
+
xmlns:admin="http://webns.net/mvcb/">
|
629
|
+
<channel>
|
630
|
+
<admin:feed rdf:resource="http://nowhere.com/feed.xml" />
|
631
|
+
<title>Feed Title</title>
|
632
|
+
</channel>
|
633
|
+
</rss>
|
634
|
+
FEED
|
635
|
+
assert_equal("http://nowhere.com/feed.xml", feed.url)
|
636
|
+
assert_equal("Feed Title", feed.title)
|
637
|
+
end
|
589
638
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.2.16
|
7
|
+
date: 2005-10-18 00:00:00 -04:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|