feedtools 0.2.11 → 0.2.12
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/db/schema.mysql.sql +2 -1
- data/db/schema.postgresql.sql +2 -1
- data/db/schema.sqlite.sql +2 -1
- data/lib/feed_tools.rb +1 -1
- data/lib/feed_tools/database_feed_cache.rb +1 -1
- data/lib/feed_tools/feed.rb +95 -46
- data/lib/feed_tools/feed_item.rb +28 -12
- data/rakefile +1 -1
- data/test/amp_test.rb +1 -1
- data/test/cache_test.rb +36 -3
- data/test/cdf_test.rb +4 -4
- data/test/nonstandard_test.rb +2 -2
- metadata +1 -1
data/CHANGELOG
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
== FeedTools 0.2.12
|
2
|
+
* schema change!
|
3
|
+
* fixed missing cache connection breaking redirects
|
4
|
+
* fixed redirects creating duplicate cache entry
|
5
|
+
* exception now raised if cache_only option is set but the cache is disabled
|
6
|
+
* additional unit tests to verify that the cache works with redirects
|
7
|
+
* fixed feed expiration bugs with expire!
|
8
|
+
* xml_data renamed to feed_data
|
9
|
+
* feed_data_type method added
|
1
10
|
== FeedTools 0.2.11
|
2
11
|
* ruby -w shouldn't produce nearly as many warnings for feed_tools.rb anymore
|
3
12
|
* you can now force the open method to only pull from the cache
|
data/db/schema.mysql.sql
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
`url` varchar(255) default NULL,
|
5
5
|
`title` varchar(255) default NULL,
|
6
6
|
`link` varchar(255) default NULL,
|
7
|
-
`
|
7
|
+
`feed_data` longtext default NULL,
|
8
|
+
`feed_data_type` varchar(20) default NULL,
|
8
9
|
`http_headers` text default NULL,
|
9
10
|
`last_retrieved` datetime default NULL,
|
10
11
|
PRIMARY KEY (`id`)
|
data/db/schema.postgresql.sql
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
url varchar(255) default NULL,
|
5
5
|
title varchar(255) default NULL,
|
6
6
|
link varchar(255) default NULL,
|
7
|
-
|
7
|
+
feed_data text default NULL,
|
8
|
+
feed_data_type varchar(20) default NULL,
|
8
9
|
http_headers text default NULL,
|
9
10
|
last_retrieved timestamp default NULL
|
10
11
|
);
|
data/db/schema.sqlite.sql
CHANGED
@@ -4,7 +4,8 @@
|
|
4
4
|
url VARCHAR(255) DEFAULT NULL,
|
5
5
|
title VARCHAR(255) DEFAULT NULL,
|
6
6
|
link VARCHAR(255) DEFAULT NULL,
|
7
|
-
|
7
|
+
feed_data TEXT DEFAULT NULL,
|
8
|
+
feed_data_type VARCHAR(20) DEFAULT NULL,
|
8
9
|
http_headers TEXT DEFAULT NULL,
|
9
10
|
last_retrieved DATETIME DEFAULT NULL
|
10
11
|
);
|
data/lib/feed_tools.rb
CHANGED
@@ -32,7 +32,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
32
32
|
ENV['RAILS_ENV'] ||
|
33
33
|
'production' # :nodoc:
|
34
34
|
|
35
|
-
FEED_TOOLS_VERSION = "0.2.
|
35
|
+
FEED_TOOLS_VERSION = "0.2.12"
|
36
36
|
|
37
37
|
$:.unshift(File.dirname(__FILE__))
|
38
38
|
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
@@ -71,7 +71,7 @@ module FeedTools
|
|
71
71
|
def DatabaseFeedCache.table_exists?
|
72
72
|
begin
|
73
73
|
ActiveRecord::Base.connection.execute "select id, url, title, " +
|
74
|
-
"link,
|
74
|
+
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
75
75
|
"from #{self.table_name()} limit 1"
|
76
76
|
rescue ActiveRecord::StatementInvalid
|
77
77
|
return false
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -101,7 +101,8 @@ module FeedTools
|
|
101
101
|
@cache_object = nil
|
102
102
|
@http_headers = nil
|
103
103
|
@xml_doc = nil
|
104
|
-
@
|
104
|
+
@feed_data = nil
|
105
|
+
@feed_data_type = nil
|
105
106
|
@root_node = nil
|
106
107
|
@channel_node = nil
|
107
108
|
@url = nil
|
@@ -134,6 +135,11 @@ module FeedTools
|
|
134
135
|
options.keys)
|
135
136
|
options = { :cache_only => false }.merge(options)
|
136
137
|
|
138
|
+
if options[:cache_only] && FeedTools.feed_cache.nil?
|
139
|
+
raise(ArgumentError, "There is currently no caching mechanism set. " +
|
140
|
+
"Cannot retrieve cached feeds.")
|
141
|
+
end
|
142
|
+
|
137
143
|
# clean up the url
|
138
144
|
url = FeedTools.normalize_url(url)
|
139
145
|
|
@@ -269,15 +275,28 @@ module FeedTools
|
|
269
275
|
|
270
276
|
# Find out if we've already seen the url we've been
|
271
277
|
# redirected to.
|
272
|
-
cached_feed = FeedTools::Feed.open(new_location,
|
273
|
-
:cache_only => true)
|
274
278
|
found_redirect = false
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
279
|
+
begin
|
280
|
+
cached_feed = FeedTools::Feed.open(new_location,
|
281
|
+
:cache_only => true)
|
282
|
+
if cached_feed.cache_object != nil &&
|
283
|
+
cached_feed.cache_object.new_record? != true
|
284
|
+
unless cached_feed.expired?
|
285
|
+
# Copy the cached state, starting with the url
|
286
|
+
self.url = cached_feed.url
|
287
|
+
self.title = cached_feed.title
|
288
|
+
self.link = cached_feed.link
|
289
|
+
self.feed_data = cached_feed.feed_data
|
290
|
+
self.feed_data_type = cached_feed.feed_data_type
|
291
|
+
self.last_retrieved = cached_feed.last_retrieved
|
292
|
+
self.http_headers = cached_feed.http_headers
|
293
|
+
self.cache_object = cached_feed.cache_object
|
294
|
+
@live = false
|
295
|
+
found_redirect = true
|
296
|
+
end
|
280
297
|
end
|
298
|
+
rescue
|
299
|
+
# If anything goes wrong, ignore it.
|
281
300
|
end
|
282
301
|
unless found_redirect
|
283
302
|
# TODO: deal with stupid people using relative urls
|
@@ -337,29 +356,29 @@ module FeedTools
|
|
337
356
|
self.http_headers[header.first.downcase] = header.last
|
338
357
|
end
|
339
358
|
self.last_retrieved = Time.now
|
340
|
-
self.
|
359
|
+
self.feed_data = self.http_response.body
|
341
360
|
end
|
342
361
|
rescue FeedAccessError
|
343
362
|
@live = false
|
344
|
-
if self.
|
363
|
+
if self.feed_data.nil?
|
345
364
|
raise
|
346
365
|
end
|
347
366
|
rescue Timeout::Error
|
348
|
-
# if we time out, do nothing, it should fall back to the
|
367
|
+
# if we time out, do nothing, it should fall back to the feed_data
|
349
368
|
# stored in the cache.
|
350
369
|
@live = false
|
351
|
-
if self.
|
370
|
+
if self.feed_data.nil?
|
352
371
|
raise
|
353
372
|
end
|
354
373
|
rescue Errno::ECONNRESET
|
355
374
|
# if the connection gets reset by peer, oh well, fall back to the
|
356
|
-
#
|
375
|
+
# feed_data stored in the cache
|
357
376
|
@live = false
|
358
|
-
if self.
|
377
|
+
if self.feed_data.nil?
|
359
378
|
raise
|
360
379
|
end
|
361
380
|
rescue => error
|
362
|
-
# heck, if anything at all bad happens, fall back to the
|
381
|
+
# heck, if anything at all bad happens, fall back to the feed_data
|
363
382
|
# stored in the cache.
|
364
383
|
|
365
384
|
# If we can, get the HTTPResponse...
|
@@ -381,7 +400,7 @@ module FeedTools
|
|
381
400
|
end
|
382
401
|
end
|
383
402
|
@live = false
|
384
|
-
if self.
|
403
|
+
if self.feed_data.nil?
|
385
404
|
if error.respond_to?(:response) &&
|
386
405
|
error.response.respond_to?(:response_chain)
|
387
406
|
redirects = error.response.response_chain.map do |pair|
|
@@ -409,7 +428,8 @@ module FeedTools
|
|
409
428
|
@http_response = nil
|
410
429
|
@http_headers = {}
|
411
430
|
self.last_retrieved = Time.now
|
412
|
-
self.
|
431
|
+
self.feed_data = file.read
|
432
|
+
self.feed_data_type = :xml
|
413
433
|
end
|
414
434
|
rescue
|
415
435
|
@live = false
|
@@ -437,35 +457,60 @@ module FeedTools
|
|
437
457
|
return @http_headers
|
438
458
|
end
|
439
459
|
|
440
|
-
# Returns the feed's raw
|
441
|
-
def
|
442
|
-
if @
|
460
|
+
# Returns the feed's raw data.
|
461
|
+
def feed_data
|
462
|
+
if @feed_data.nil?
|
443
463
|
unless self.cache_object.nil?
|
444
|
-
@
|
464
|
+
@feed_data = self.cache_object.feed_data
|
445
465
|
end
|
446
466
|
end
|
447
|
-
return @
|
467
|
+
return @feed_data
|
448
468
|
end
|
449
469
|
|
450
|
-
# Sets the feed's
|
451
|
-
def
|
452
|
-
@
|
470
|
+
# Sets the feed's data.
|
471
|
+
def feed_data=(new_feed_data)
|
472
|
+
@feed_data = new_feed_data
|
473
|
+
unless self.cache_object.nil?
|
474
|
+
self.cache_object.feed_data = new_feed_data
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
# Returns the data type of the feed
|
479
|
+
# Possible values:
|
480
|
+
# * :xml
|
481
|
+
# * :yaml
|
482
|
+
# * :text
|
483
|
+
def feed_data_type
|
484
|
+
if @feed_data_type.nil?
|
485
|
+
# Right now, nothing else is supported
|
486
|
+
@feed_data_type = :xml
|
487
|
+
end
|
488
|
+
return @feed_data_type
|
489
|
+
end
|
490
|
+
|
491
|
+
# Sets the feed's data type.
|
492
|
+
def feed_data_type=(new_feed_data_type)
|
493
|
+
@feed_data_type = new_feed_data_type
|
453
494
|
unless self.cache_object.nil?
|
454
|
-
self.cache_object.
|
495
|
+
self.cache_object.feed_data_type = new_feed_data_type
|
455
496
|
end
|
456
497
|
end
|
457
498
|
|
458
|
-
# Returns a REXML Document of the
|
499
|
+
# Returns a REXML Document of the feed_data
|
459
500
|
def xml
|
460
|
-
if
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
501
|
+
if self.feed_data_type != :xml
|
502
|
+
@xml_doc = nil
|
503
|
+
else
|
504
|
+
if @xml_doc.nil?
|
505
|
+
begin
|
506
|
+
# TODO: :ignore_whitespace_nodes => :all
|
507
|
+
# Add that?
|
508
|
+
# ======================================
|
509
|
+
@xml_doc = Document.new(feed_data)
|
510
|
+
rescue
|
511
|
+
# Something failed, attempt to repair the xml with htree.
|
512
|
+
@xml_doc = HTree.parse(feed_data).to_rexml
|
513
|
+
end
|
469
514
|
end
|
470
515
|
end
|
471
516
|
return @xml_doc
|
@@ -539,8 +584,10 @@ module FeedTools
|
|
539
584
|
# title=
|
540
585
|
# link
|
541
586
|
# link=
|
542
|
-
#
|
543
|
-
#
|
587
|
+
# feed_data
|
588
|
+
# feed_data=
|
589
|
+
# feed_data_type
|
590
|
+
# feed_data_type=
|
544
591
|
# etag
|
545
592
|
# etag=
|
546
593
|
# last_modified
|
@@ -657,7 +704,7 @@ module FeedTools
|
|
657
704
|
|
658
705
|
# Returns the feed url.
|
659
706
|
def url
|
660
|
-
if @url.nil? && self.
|
707
|
+
if @url.nil? && self.feed_data != nil
|
661
708
|
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
662
709
|
@url = nil if @url == ""
|
663
710
|
end
|
@@ -1675,7 +1722,8 @@ module FeedTools
|
|
1675
1722
|
if raw_items != nil
|
1676
1723
|
for item_node in raw_items
|
1677
1724
|
new_item = FeedItem.new
|
1678
|
-
new_item.
|
1725
|
+
new_item.feed_data = item_node.to_s
|
1726
|
+
new_item.feed_data_type = self.feed_data_type
|
1679
1727
|
new_item.feed = self
|
1680
1728
|
@items << new_item
|
1681
1729
|
end
|
@@ -1748,7 +1796,7 @@ module FeedTools
|
|
1748
1796
|
|
1749
1797
|
# Forces this feed to expire.
|
1750
1798
|
def expire!
|
1751
|
-
self.last_retrieved = Time.mktime(1970)
|
1799
|
+
self.last_retrieved = Time.mktime(1970).gmtime
|
1752
1800
|
self.save
|
1753
1801
|
end
|
1754
1802
|
|
@@ -2014,15 +2062,16 @@ module FeedTools
|
|
2014
2062
|
raise "Caching is currently disabled. Cannot save to cache."
|
2015
2063
|
elsif self.url.nil?
|
2016
2064
|
raise "The url field must be set to save to the cache."
|
2017
|
-
elsif self.xml_data.nil?
|
2018
|
-
raise "The xml_data field must be set to save to the cache."
|
2019
2065
|
elsif self.cache_object.nil?
|
2020
2066
|
raise "The cache_object is currently nil. Cannot save to cache."
|
2021
2067
|
else
|
2022
2068
|
self.cache_object.url = self.url
|
2023
|
-
self.
|
2024
|
-
|
2025
|
-
|
2069
|
+
unless self.feed_data.nil?
|
2070
|
+
self.cache_object.title = self.title
|
2071
|
+
self.cache_object.link = self.link
|
2072
|
+
self.cache_object.feed_data = self.feed_data
|
2073
|
+
self.cache_object.feed_data_type = self.feed_data_type.to_s
|
2074
|
+
end
|
2026
2075
|
unless self.http_response.nil?
|
2027
2076
|
self.cache_object.http_headers = self.http_headers.to_yaml
|
2028
2077
|
end
|
data/lib/feed_tools/feed_item.rb
CHANGED
@@ -126,6 +126,8 @@ module FeedTools
|
|
126
126
|
def initialize
|
127
127
|
super
|
128
128
|
@feed = nil
|
129
|
+
@feed_data = nil
|
130
|
+
@feed_data_type = nil
|
129
131
|
@xml_doc = nil
|
130
132
|
@root_node = nil
|
131
133
|
@title = nil
|
@@ -143,23 +145,37 @@ module FeedTools
|
|
143
145
|
@feed = new_feed
|
144
146
|
end
|
145
147
|
|
146
|
-
# Returns the feed item's raw
|
147
|
-
def
|
148
|
-
return @
|
148
|
+
# Returns the feed item's raw data.
|
149
|
+
def feed_data
|
150
|
+
return @feed_data
|
149
151
|
end
|
150
152
|
|
151
|
-
# Sets the feed item's
|
152
|
-
def
|
153
|
-
@
|
153
|
+
# Sets the feed item's data.
|
154
|
+
def feed_data=(new_feed_data)
|
155
|
+
@feed_data = new_feed_data
|
154
156
|
end
|
155
157
|
|
156
|
-
# Returns
|
158
|
+
# Returns the feed item's data type.
|
159
|
+
def feed_data_type
|
160
|
+
return @feed_data_type
|
161
|
+
end
|
162
|
+
|
163
|
+
# Sets the feed item's data type.
|
164
|
+
def feed_data_type=(new_feed_data_type)
|
165
|
+
@feed_data_type = new_feed_data_type
|
166
|
+
end
|
167
|
+
|
168
|
+
# Returns a REXML Document of the feed_data
|
157
169
|
def xml
|
158
|
-
if
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
170
|
+
if self.feed_data_type != :xml
|
171
|
+
@xml_doc = nil
|
172
|
+
else
|
173
|
+
if @xml_doc.nil?
|
174
|
+
# TODO: :ignore_whitespace_nodes => :all
|
175
|
+
# Add that?
|
176
|
+
# ======================================
|
177
|
+
@xml_doc = Document.new(self.feed_data)
|
178
|
+
end
|
163
179
|
end
|
164
180
|
return @xml_doc
|
165
181
|
end
|
data/rakefile
CHANGED
data/test/amp_test.rb
CHANGED
@@ -456,7 +456,7 @@ class AmpTest < Test::Unit::TestCase
|
|
456
456
|
|
457
457
|
def test_amp_65
|
458
458
|
feed = FeedTools::Feed.new
|
459
|
-
feed.
|
459
|
+
feed.feed_data = <<-FEED
|
460
460
|
<feed version="0.3">
|
461
461
|
<title><strong>1 &amp; 2 & 3</strong></title>
|
462
462
|
<tagline><strong>1 &amp; 2 & 3</strong></tagline>
|
data/test/cache_test.rb
CHANGED
@@ -7,7 +7,7 @@ class CacheTest < Test::Unit::TestCase
|
|
7
7
|
end
|
8
8
|
|
9
9
|
def test_database_connection
|
10
|
-
#
|
10
|
+
# Ensure the cache is on for this test
|
11
11
|
FeedTools.feed_cache = FeedTools::DatabaseFeedCache
|
12
12
|
|
13
13
|
unless FeedTools.feed_cache.nil?
|
@@ -16,8 +16,41 @@ class CacheTest < Test::Unit::TestCase
|
|
16
16
|
else
|
17
17
|
puts "\nSkipping cache test since the cache is still disabled.\n"
|
18
18
|
end
|
19
|
-
|
20
|
-
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_redirects_when_cache_disabled
|
22
|
+
# Turn the cache off for this test
|
21
23
|
FeedTools.feed_cache = nil
|
24
|
+
|
25
|
+
# We just want to make sure there's no exception
|
26
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
27
|
+
|
28
|
+
# Turn the cache back on
|
29
|
+
FeedTools.feed_cache = FeedTools::DatabaseFeedCache
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_redirects_when_cache_enabled
|
33
|
+
# Ensure the cache is on for this test
|
34
|
+
FeedTools.feed_cache = FeedTools::DatabaseFeedCache
|
35
|
+
|
36
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
37
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
38
|
+
slashdot_feed.expire!
|
39
|
+
assert_equal(true, slashdot_feed.expired?)
|
40
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
41
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
42
|
+
assert_equal(true, slashdot_feed.live?)
|
43
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
44
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
45
|
+
assert_equal(false, slashdot_feed.live?)
|
46
|
+
slashdot_feed.expire!
|
47
|
+
slashdot_feed.expire!
|
48
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
49
|
+
assert(slashdot_feed.feed_data != nil, "No content retrieved.")
|
50
|
+
assert_equal(true, slashdot_feed.live?)
|
51
|
+
FeedTools::Feed.open(slashdot_feed.url)
|
52
|
+
|
53
|
+
entries = FeedTools::DatabaseFeedCache.find_all_by_url(slashdot_feed.url)
|
54
|
+
assert_equal(1, entries.size)
|
22
55
|
end
|
23
56
|
end
|
data/test/cdf_test.rb
CHANGED
@@ -8,7 +8,7 @@ class CdfTest < Test::Unit::TestCase
|
|
8
8
|
|
9
9
|
def test_feed_title
|
10
10
|
feed = FeedTools::Feed.new
|
11
|
-
feed.
|
11
|
+
feed.feed_data = <<-FEED
|
12
12
|
<CHANNEL>
|
13
13
|
<TITLE>Example Title</TITLE>
|
14
14
|
</CHANNEL>
|
@@ -36,7 +36,7 @@ class CdfTest < Test::Unit::TestCase
|
|
36
36
|
|
37
37
|
def test_feed_href
|
38
38
|
feed = FeedTools::Feed.new
|
39
|
-
feed.
|
39
|
+
feed.feed_data = <<-FEED
|
40
40
|
<CHANNEL HREF="http://www.example.com/">
|
41
41
|
</CHANNEL>
|
42
42
|
FEED
|
@@ -54,7 +54,7 @@ class CdfTest < Test::Unit::TestCase
|
|
54
54
|
|
55
55
|
def test_feed_images
|
56
56
|
feed = FeedTools::Feed.new
|
57
|
-
feed.
|
57
|
+
feed.feed_data = <<-FEED
|
58
58
|
<CHANNEL>
|
59
59
|
<LOGO HREF="http://www.example.com/exampleicon.gif" STYLE="ICON" />
|
60
60
|
<LOGO HREF="http://www.example.com/exampleimage.gif" STYLE="IMAGE" />
|
@@ -100,7 +100,7 @@ class CdfTest < Test::Unit::TestCase
|
|
100
100
|
|
101
101
|
def test_feed_item_images
|
102
102
|
feed = FeedTools::Feed.new
|
103
|
-
feed.
|
103
|
+
feed.feed_data = <<-FEED
|
104
104
|
<CHANNEL>
|
105
105
|
<ITEM HREF="http://www.example.com/item">
|
106
106
|
<LOGO HREF="http://www.example.com/exampleicon.gif" STYLE="ICON" />
|
data/test/nonstandard_test.rb
CHANGED
@@ -8,7 +8,7 @@ class NonStandardTest < Test::Unit::TestCase
|
|
8
8
|
|
9
9
|
def test_xss_strict
|
10
10
|
feed = FeedTools::Feed.new
|
11
|
-
feed.
|
11
|
+
feed.feed_data = <<-FEED
|
12
12
|
<?xml version="1.0" encoding="iso-8859-1"?>
|
13
13
|
<rss version="2.0/XSS-strict">
|
14
14
|
<channel>
|
@@ -66,7 +66,7 @@ class NonStandardTest < Test::Unit::TestCase
|
|
66
66
|
def test_rss_30_lite
|
67
67
|
# Delusions of grandeur...
|
68
68
|
feed = FeedTools::Feed.new
|
69
|
-
feed.
|
69
|
+
feed.feed_data = <<-FEED
|
70
70
|
<?xml version="1.0" encoding="UTF-8"?>
|
71
71
|
<rss version="3.0" type="lite"
|
72
72
|
source="http://www.rss3.org/files/liteSample.rss">
|
metadata
CHANGED