feedtools 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/db/schema.mysql.sql +2 -1
- data/db/schema.postgresql.sql +2 -1
- data/db/schema.sqlite.sql +2 -1
- data/lib/feed_tools.rb +1 -1
- data/lib/feed_tools/database_feed_cache.rb +1 -1
- data/lib/feed_tools/feed.rb +95 -46
- data/lib/feed_tools/feed_item.rb +28 -12
- data/rakefile +1 -1
- data/test/amp_test.rb +1 -1
- data/test/cache_test.rb +36 -3
- data/test/cdf_test.rb +4 -4
- data/test/nonstandard_test.rb +2 -2
- metadata +1 -1
data/CHANGELOG
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
== FeedTools 0.2.12
|
2
|
+
* schema change!
|
3
|
+
* fixed missing cache connection breaking redirects
|
4
|
+
* fixed redirects creating duplicate cache entry
|
5
|
+
* exception now raised if cache_only option is set but the cache is disabled
|
6
|
+
* additional unit tests to verify that the cache works with redirects
|
7
|
+
* fixed feed expiration bugs with expire!
|
8
|
+
* xml_data renamed to feed_data
|
9
|
+
* feed_data_type method added
|
1
10
|
== FeedTools 0.2.11
|
2
11
|
* ruby -w shouldn't produce nearly as many warnings for feed_tools.rb anymore
|
3
12
|
* you can now force the open method to only pull from the cache
|
data/db/schema.mysql.sql
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
`url` varchar(255) default NULL,
|
5
5
|
`title` varchar(255) default NULL,
|
6
6
|
`link` varchar(255) default NULL,
|
7
|
-
`
|
7
|
+
`feed_data` longtext default NULL,
|
8
|
+
`feed_data_type` varchar(20) default NULL,
|
8
9
|
`http_headers` text default NULL,
|
9
10
|
`last_retrieved` datetime default NULL,
|
10
11
|
PRIMARY KEY (`id`)
|
data/db/schema.postgresql.sql
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
url varchar(255) default NULL,
|
5
5
|
title varchar(255) default NULL,
|
6
6
|
link varchar(255) default NULL,
|
7
|
-
|
7
|
+
feed_data text default NULL,
|
8
|
+
feed_data_type varchar(20) default NULL,
|
8
9
|
http_headers text default NULL,
|
9
10
|
last_retrieved timestamp default NULL
|
10
11
|
);
|
data/db/schema.sqlite.sql
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
url VARCHAR(255) DEFAULT NULL,
|
5
5
|
title VARCHAR(255) DEFAULT NULL,
|
6
6
|
link VARCHAR(255) DEFAULT NULL,
|
7
|
-
|
7
|
+
feed_data TEXT DEFAULT NULL,
|
8
|
+
feed_data_type VARCHAR(20) DEFAULT NULL,
|
8
9
|
http_headers TEXT DEFAULT NULL,
|
9
10
|
last_retrieved DATETIME DEFAULT NULL
|
10
11
|
);
|
data/lib/feed_tools.rb
CHANGED
@@ -32,7 +32,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
32
32
|
ENV['RAILS_ENV'] ||
|
33
33
|
'production' # :nodoc:
|
34
34
|
|
35
|
-
FEED_TOOLS_VERSION = "0.2.
|
35
|
+
FEED_TOOLS_VERSION = "0.2.12"
|
36
36
|
|
37
37
|
$:.unshift(File.dirname(__FILE__))
|
38
38
|
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
@@ -71,7 +71,7 @@ module FeedTools
|
|
71
71
|
def DatabaseFeedCache.table_exists?
|
72
72
|
begin
|
73
73
|
ActiveRecord::Base.connection.execute "select id, url, title, " +
|
74
|
-
"link,
|
74
|
+
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
75
75
|
"from #{self.table_name()} limit 1"
|
76
76
|
rescue ActiveRecord::StatementInvalid
|
77
77
|
return false
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -101,7 +101,8 @@ module FeedTools
|
|
101
101
|
@cache_object = nil
|
102
102
|
@http_headers = nil
|
103
103
|
@xml_doc = nil
|
104
|
-
@
|
104
|
+
@feed_data = nil
|
105
|
+
@feed_data_type = nil
|
105
106
|
@root_node = nil
|
106
107
|
@channel_node = nil
|
107
108
|
@url = nil
|
@@ -134,6 +135,11 @@ module FeedTools
|
|
134
135
|
options.keys)
|
135
136
|
options = { :cache_only => false }.merge(options)
|
136
137
|
|
138
|
+
if options[:cache_only] && FeedTools.feed_cache.nil?
|
139
|
+
raise(ArgumentError, "There is currently no caching mechanism set. " +
|
140
|
+
"Cannot retrieve cached feeds.")
|
141
|
+
end
|
142
|
+
|
137
143
|
# clean up the url
|
138
144
|
url = FeedTools.normalize_url(url)
|
139
145
|
|
@@ -269,15 +275,28 @@ module FeedTools
|
|
269
275
|
|
270
276
|
# Find out if we've already seen the url we've been
|
271
277
|
# redirected to.
|
272
|
-
cached_feed = FeedTools::Feed.open(new_location,
|
273
|
-
:cache_only => true)
|
274
278
|
found_redirect = false
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
279
|
+
begin
|
280
|
+
cached_feed = FeedTools::Feed.open(new_location,
|
281
|
+
:cache_only => true)
|
282
|
+
if cached_feed.cache_object != nil &&
|
283
|
+
cached_feed.cache_object.new_record? != true
|
284
|
+
unless cached_feed.expired?
|
285
|
+
# Copy the cached state, starting with the url
|
286
|
+
self.url = cached_feed.url
|
287
|
+
self.title = cached_feed.title
|
288
|
+
self.link = cached_feed.link
|
289
|
+
self.feed_data = cached_feed.feed_data
|
290
|
+
self.feed_data_type = cached_feed.feed_data_type
|
291
|
+
self.last_retrieved = cached_feed.last_retrieved
|
292
|
+
self.http_headers = cached_feed.http_headers
|
293
|
+
self.cache_object = cached_feed.cache_object
|
294
|
+
@live = false
|
295
|
+
found_redirect = true
|
296
|
+
end
|
280
297
|
end
|
298
|
+
rescue
|
299
|
+
# If anything goes wrong, ignore it.
|
281
300
|
end
|
282
301
|
unless found_redirect
|
283
302
|
# TODO: deal with stupid people using relative urls
|
@@ -337,29 +356,29 @@ module FeedTools
|
|
337
356
|
self.http_headers[header.first.downcase] = header.last
|
338
357
|
end
|
339
358
|
self.last_retrieved = Time.now
|
340
|
-
self.
|
359
|
+
self.feed_data = self.http_response.body
|
341
360
|
end
|
342
361
|
rescue FeedAccessError
|
343
362
|
@live = false
|
344
|
-
if self.
|
363
|
+
if self.feed_data.nil?
|
345
364
|
raise
|
346
365
|
end
|
347
366
|
rescue Timeout::Error
|
348
|
-
# if we time out, do nothing, it should fall back to the
|
367
|
+
# if we time out, do nothing, it should fall back to the feed_data
|
349
368
|
# stored in the cache.
|
350
369
|
@live = false
|
351
|
-
if self.
|
370
|
+
if self.feed_data.nil?
|
352
371
|
raise
|
353
372
|
end
|
354
373
|
rescue Errno::ECONNRESET
|
355
374
|
# if the connection gets reset by peer, oh well, fall back to the
|
356
|
-
#
|
375
|
+
# feed_data stored in the cache
|
357
376
|
@live = false
|
358
|
-
if self.
|
377
|
+
if self.feed_data.nil?
|
359
378
|
raise
|
360
379
|
end
|
361
380
|
rescue => error
|
362
|
-
# heck, if anything at all bad happens, fall back to the
|
381
|
+
# heck, if anything at all bad happens, fall back to the feed_data
|
363
382
|
# stored in the cache.
|
364
383
|
|
365
384
|
# If we can, get the HTTPResponse...
|
@@ -381,7 +400,7 @@ module FeedTools
|
|
381
400
|
end
|
382
401
|
end
|
383
402
|
@live = false
|
384
|
-
if self.
|
403
|
+
if self.feed_data.nil?
|
385
404
|
if error.respond_to?(:response) &&
|
386
405
|
error.response.respond_to?(:response_chain)
|
387
406
|
redirects = error.response.response_chain.map do |pair|
|
@@ -409,7 +428,8 @@ module FeedTools
|
|
409
428
|
@http_response = nil
|
410
429
|
@http_headers = {}
|
411
430
|
self.last_retrieved = Time.now
|
412
|
-
self.
|
431
|
+
self.feed_data = file.read
|
432
|
+
self.feed_data_type = :xml
|
413
433
|
end
|
414
434
|
rescue
|
415
435
|
@live = false
|
@@ -437,35 +457,60 @@ module FeedTools
|
|
437
457
|
return @http_headers
|
438
458
|
end
|
439
459
|
|
440
|
-
# Returns the feed's raw
|
441
|
-
def
|
442
|
-
if @
|
460
|
+
# Returns the feed's raw data.
|
461
|
+
def feed_data
|
462
|
+
if @feed_data.nil?
|
443
463
|
unless self.cache_object.nil?
|
444
|
-
@
|
464
|
+
@feed_data = self.cache_object.feed_data
|
445
465
|
end
|
446
466
|
end
|
447
|
-
return @
|
467
|
+
return @feed_data
|
448
468
|
end
|
449
469
|
|
450
|
-
# Sets the feed's
|
451
|
-
def
|
452
|
-
@
|
470
|
+
# Sets the feed's data.
|
471
|
+
def feed_data=(new_feed_data)
|
472
|
+
@feed_data = new_feed_data
|
473
|
+
unless self.cache_object.nil?
|
474
|
+
self.cache_object.feed_data = new_feed_data
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
# Returns the data type of the feed
|
479
|
+
# Possible values:
|
480
|
+
# * :xml
|
481
|
+
# * :yaml
|
482
|
+
# * :text
|
483
|
+
def feed_data_type
|
484
|
+
if @feed_data_type.nil?
|
485
|
+
# Right now, nothing else is supported
|
486
|
+
@feed_data_type = :xml
|
487
|
+
end
|
488
|
+
return @feed_data_type
|
489
|
+
end
|
490
|
+
|
491
|
+
# Sets the feed's data type.
|
492
|
+
def feed_data_type=(new_feed_data_type)
|
493
|
+
@feed_data_type = new_feed_data_type
|
453
494
|
unless self.cache_object.nil?
|
454
|
-
self.cache_object.
|
495
|
+
self.cache_object.feed_data_type = new_feed_data_type
|
455
496
|
end
|
456
497
|
end
|
457
498
|
|
458
|
-
# Returns a REXML Document of the
|
499
|
+
# Returns a REXML Document of the feed_data
|
459
500
|
def xml
|
460
|
-
if
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
501
|
+
if self.feed_data_type != :xml
|
502
|
+
@xml_doc = nil
|
503
|
+
else
|
504
|
+
if @xml_doc.nil?
|
505
|
+
begin
|
506
|
+
# TODO: :ignore_whitespace_nodes => :all
|
507
|
+
# Add that?
|
508
|
+
# ======================================
|
509
|
+
@xml_doc = Document.new(feed_data)
|
510
|
+
rescue
|
511
|
+
# Something failed, attempt to repair the xml with htree.
|
512
|
+
@xml_doc = HTree.parse(feed_data).to_rexml
|
513
|
+
end
|
469
514
|
end
|
470
515
|
end
|
471
516
|
return @xml_doc
|
@@ -539,8 +584,10 @@ module FeedTools
|
|
539
584
|
# title=
|
540
585
|
# link
|
541
586
|
# link=
|
542
|
-
#
|
543
|
-
#
|
587
|
+
# feed_data
|
588
|
+
# feed_data=
|
589
|
+
# feed_data_type
|
590
|
+
# feed_data_type=
|
544
591
|
# etag
|
545
592
|
# etag=
|
546
593
|
# last_modified
|
@@ -657,7 +704,7 @@ module FeedTools
|
|
657
704
|
|
658
705
|
# Returns the feed url.
|
659
706
|
def url
|
660
|
-
if @url.nil? && self.
|
707
|
+
if @url.nil? && self.feed_data != nil
|
661
708
|
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
662
709
|
@url = nil if @url == ""
|
663
710
|
end
|
@@ -1675,7 +1722,8 @@ module FeedTools
|
|
1675
1722
|
if raw_items != nil
|
1676
1723
|
for item_node in raw_items
|
1677
1724
|
new_item = FeedItem.new
|
1678
|
-
new_item.
|
1725
|
+
new_item.feed_data = item_node.to_s
|
1726
|
+
new_item.feed_data_type = self.feed_data_type
|
1679
1727
|
new_item.feed = self
|
1680
1728
|
@items << new_item
|
1681
1729
|
end
|
@@ -1748,7 +1796,7 @@ module FeedTools
|
|
1748
1796
|
|
1749
1797
|
# Forces this feed to expire.
|
1750
1798
|
def expire!
|
1751
|
-
self.last_retrieved = Time.mktime(1970)
|
1799
|
+
self.last_retrieved = Time.mktime(1970).gmtime
|
1752
1800
|
self.save
|
1753
1801
|
end
|
1754
1802
|
|
@@ -2014,15 +2062,16 @@ module FeedTools
|
|
2014
2062
|
raise "Caching is currently disabled. Cannot save to cache."
|
2015
2063
|
elsif self.url.nil?
|
2016
2064
|
raise "The url field must be set to save to the cache."
|
2017
|
-
elsif self.xml_data.nil?
|
2018
|
-
raise "The xml_data field must be set to save to the cache."
|
2019
2065
|
elsif self.cache_object.nil?
|
2020
2066
|
raise "The cache_object is currently nil. Cannot save to cache."
|
2021
2067
|
else
|
2022
2068
|
self.cache_object.url = self.url
|
2023
|
-
self.
|
2024
|
-
|
2025
|
-
|
2069
|
+
unless self.feed_data.nil?
|
2070
|
+
self.cache_object.title = self.title
|
2071
|
+
self.cache_object.link = self.link
|
2072
|
+
self.cache_object.feed_data = self.feed_data
|
2073
|
+
self.cache_object.feed_data_type = self.feed_data_type.to_s
|
2074
|
+
end
|
2026
2075
|
unless self.http_response.nil?
|
2027
2076
|
self.cache_object.http_headers = self.http_headers.to_yaml
|
2028
2077
|
end
|
data/lib/feed_tools/feed_item.rb
CHANGED
@@ -126,6 +126,8 @@ module FeedTools
|
|
126
126
|
def initialize
|
127
127
|
super
|
128
128
|
@feed = nil
|
129
|
+
@feed_data = nil
|
130
|
+
@feed_data_type = nil
|
129
131
|
@xml_doc = nil
|
130
132
|
@root_node = nil
|
131
133
|
@title = nil
|
@@ -143,23 +145,37 @@ module FeedTools
|
|
143
145
|
@feed = new_feed
|
144
146
|
end
|
145
147
|
|
146
|
-
# Returns the feed item's raw
|
147
|
-
def
|
148
|
-
return @
|
148
|
+
# Returns the feed item's raw data.
|
149
|
+
def feed_data
|
150
|
+
return @feed_data
|
149
151
|
end
|
150
152
|
|
151
|
-
# Sets the feed item's
|
152
|
-
def
|
153
|
-
@
|
153
|
+
# Sets the feed item's data.
|
154
|
+
def feed_data=(new_feed_data)
|
155
|
+
@feed_data = new_feed_data
|
154
156
|
end
|
155
157
|
|
156
|
-
# Returns
|
158
|
+
# Returns the feed item's data type.
|
159
|
+
def feed_data_type
|
160
|
+
return @feed_data_type
|
161
|
+
end
|
162
|
+
|
163
|
+
# Sets the feed item's data type.
|
164
|
+
def feed_data_type=(new_feed_data_type)
|
165
|
+
@feed_data_type = new_feed_data_type
|
166
|
+
end
|
167
|
+
|
168
|
+
# Returns a REXML Document of the feed_data
|
157
169
|
def xml
|
158
|
-
if
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
170
|
+
if self.feed_data_type != :xml
|
171
|
+
@xml_doc = nil
|
172
|
+
else
|
173
|
+
if @xml_doc.nil?
|
174
|
+
# TODO: :ignore_whitespace_nodes => :all
|
175
|
+
# Add that?
|
176
|
+
# ======================================
|
177
|
+
@xml_doc = Document.new(self.feed_data)
|
178
|
+
end
|
163
179
|
end
|
164
180
|
return @xml_doc
|
165
181
|
end
|
data/rakefile
CHANGED
data/test/amp_test.rb
CHANGED
@@ -456,7 +456,7 @@ class AmpTest < Test::Unit::TestCase
|
|
456
456
|
|
457
457
|
def test_amp_65
|
458
458
|
feed = FeedTools::Feed.new
|
459
|
-
feed.
|
459
|
+
feed.feed_data = <<-FEED
|
460
460
|
<feed version="0.3">
|
461
461
|
<title><strong>1 &amp; 2 & 3</strong></title>
|
462
462
|
<tagline><strong>1 &amp; 2 & 3</strong></tagline>
|
data/test/cache_test.rb
CHANGED
@@ -7,7 +7,7 @@ class CacheTest < Test::Unit::TestCase
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def test_database_connection
|
10
|
-
#
|
10
|
+
# Ensure the cache is on for this test
|
11
11
|
FeedTools.feed_cache = FeedTools::DatabaseFeedCache
|
12
12
|
|
13
13
|
unless FeedTools.feed_cache.nil?
|
@@ -16,8 +16,41 @@ class CacheTest < Test::Unit::TestCase
|
|
16
16
|
else
|
17
17
|
puts "\nSkipping cache test since the cache is still disabled.\n"
|
18
18
|
end
|
19
|
-
|
20
|
-
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_redirects_when_cache_disabled
|
22
|
+
# Turn the cache off for this test
|
21
23
|
FeedTools.feed_cache = nil
|
24
|
+
|
25
|
+
# We just want to make sure there's no exception
|
26
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
27
|
+
|
28
|
+
# Turn the cache back on
|
29
|
+
FeedTools.feed_cache = FeedTools::DatabaseFeedCache
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_redirects_when_cache_enabled
|
33
|
+
# Ensure the cache is on for this test
|
34
|
+
FeedTools.feed_cache = FeedTools::DatabaseFeedCache
|
35
|
+
|
36
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
37
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
38
|
+
slashdot_feed.expire!
|
39
|
+
assert_equal(true, slashdot_feed.expired?)
|
40
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
41
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
42
|
+
assert_equal(true, slashdot_feed.live?)
|
43
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
44
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
45
|
+
assert_equal(false, slashdot_feed.live?)
|
46
|
+
slashdot_feed.expire!
|
47
|
+
slashdot_feed.expire!
|
48
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
49
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
50
|
+
assert_equal(true, slashdot_feed.live?)
|
51
|
+
FeedTools::Feed.open(slashdot_feed.url)
|
52
|
+
|
53
|
+
entries = FeedTools::DatabaseFeedCache.find_all_by_url(slashdot_feed.url)
|
54
|
+
assert_equal(1, entries.size)
|
22
55
|
end
|
23
56
|
end
|
data/test/cdf_test.rb
CHANGED
@@ -8,7 +8,7 @@ class CdfTest < Test::Unit::TestCase
|
|
8
8
|
|
9
9
|
def test_feed_title
|
10
10
|
feed = FeedTools::Feed.new
|
11
|
-
feed.
|
11
|
+
feed.feed_data = <<-FEED
|
12
12
|
<CHANNEL>
|
13
13
|
<TITLE>Example Title</TITLE>
|
14
14
|
</CHANNEL>
|
@@ -36,7 +36,7 @@ class CdfTest < Test::Unit::TestCase
|
|
36
36
|
|
37
37
|
def test_feed_href
|
38
38
|
feed = FeedTools::Feed.new
|
39
|
-
feed.
|
39
|
+
feed.feed_data = <<-FEED
|
40
40
|
<CHANNEL HREF="http://www.example.com/">
|
41
41
|
</CHANNEL>
|
42
42
|
FEED
|
@@ -54,7 +54,7 @@ class CdfTest < Test::Unit::TestCase
|
|
54
54
|
|
55
55
|
def test_feed_images
|
56
56
|
feed = FeedTools::Feed.new
|
57
|
-
feed.
|
57
|
+
feed.feed_data = <<-FEED
|
58
58
|
<CHANNEL>
|
59
59
|
<LOGO HREF="http://www.example.com/exampleicon.gif" STYLE="ICON" />
|
60
60
|
<LOGO HREF="http://www.example.com/exampleimage.gif" STYLE="IMAGE" />
|
@@ -100,7 +100,7 @@ class CdfTest < Test::Unit::TestCase
|
|
100
100
|
|
101
101
|
def test_feed_item_images
|
102
102
|
feed = FeedTools::Feed.new
|
103
|
-
feed.
|
103
|
+
feed.feed_data = <<-FEED
|
104
104
|
<CHANNEL>
|
105
105
|
<ITEM HREF="http://www.example.com/item">
|
106
106
|
<LOGO HREF="http://www.example.com/exampleicon.gif" STYLE="ICON" />
|
data/test/nonstandard_test.rb
CHANGED
@@ -8,7 +8,7 @@ class NonStandardTest < Test::Unit::TestCase
|
|
8
8
|
|
9
9
|
def test_xss_strict
|
10
10
|
feed = FeedTools::Feed.new
|
11
|
-
feed.
|
11
|
+
feed.feed_data = <<-FEED
|
12
12
|
<?xml version="1.0" encoding="iso-8859-1"?>
|
13
13
|
<rss version="2.0/XSS-strict">
|
14
14
|
<channel>
|
@@ -66,7 +66,7 @@ class NonStandardTest < Test::Unit::TestCase
|
|
66
66
|
def test_rss_30_lite
|
67
67
|
# Delusions of grandeur...
|
68
68
|
feed = FeedTools::Feed.new
|
69
|
-
feed.
|
69
|
+
feed.feed_data = <<-FEED
|
70
70
|
<?xml version="1.0" encoding="UTF-8"?>
|
71
71
|
<rss version="3.0" type="lite"
|
72
72
|
source="http://www.rss3.org/files/liteSample.rss">
|
metadata
CHANGED