feedtools 0.2.11 → 0.2.12

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,12 @@
1
+ == FeedTools 0.2.12
2
+ * schema change!
3
+ * fixed missing cache connection breaking redirects
4
+ * fixed redirects creating duplicate cache entry
5
+ * exception now raised if cache_only option is set but the cache is disabled
6
+ * additional unit tests to verify that the cache works with redirects
7
+ * fixed feed expiration bugs with expire!
8
+ * xml_data renamed to feed_data
9
+ * feed_data_type method added
1
10
  == FeedTools 0.2.11
2
11
  * ruby -w shouldn't produce nearly as many warnings for feed_tools.rb anymore
3
12
  * you can now force the open method to only pull from the cache
@@ -4,7 +4,8 @@
4
4
  `url` varchar(255) default NULL,
5
5
  `title` varchar(255) default NULL,
6
6
  `link` varchar(255) default NULL,
7
- `xml_data` longtext default NULL,
7
+ `feed_data` longtext default NULL,
8
+ `feed_data_type` varchar(20) default NULL,
8
9
  `http_headers` text default NULL,
9
10
  `last_retrieved` datetime default NULL,
10
11
  PRIMARY KEY (`id`)
@@ -4,7 +4,8 @@
4
4
  url varchar(255) default NULL,
5
5
  title varchar(255) default NULL,
6
6
  link varchar(255) default NULL,
7
- xml_data text default NULL,
7
+ feed_data text default NULL,
8
+ feed_data_type varchar(20) default NULL,
8
9
  http_headers text default NULL,
9
10
  last_retrieved timestamp default NULL
10
11
  );
@@ -4,7 +4,8 @@
4
4
  url VARCHAR(255) DEFAULT NULL,
5
5
  title VARCHAR(255) DEFAULT NULL,
6
6
  link VARCHAR(255) DEFAULT NULL,
7
- xml_data TEXT DEFAULT NULL,
7
+ feed_data TEXT DEFAULT NULL,
8
+ feed_data_type VARCHAR(20) DEFAULT NULL,
8
9
  http_headers TEXT DEFAULT NULL,
9
10
  last_retrieved DATETIME DEFAULT NULL
10
11
  );
@@ -32,7 +32,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
32
32
  ENV['RAILS_ENV'] ||
33
33
  'production' # :nodoc:
34
34
 
35
- FEED_TOOLS_VERSION = "0.2.11"
35
+ FEED_TOOLS_VERSION = "0.2.12"
36
36
 
37
37
  $:.unshift(File.dirname(__FILE__))
38
38
  $:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
@@ -71,7 +71,7 @@ module FeedTools
71
71
  def DatabaseFeedCache.table_exists?
72
72
  begin
73
73
  ActiveRecord::Base.connection.execute "select id, url, title, " +
74
- "link, xml_data, http_headers, last_retrieved " +
74
+ "link, feed_data, feed_data_type, http_headers, last_retrieved " +
75
75
  "from #{self.table_name()} limit 1"
76
76
  rescue ActiveRecord::StatementInvalid
77
77
  return false
@@ -101,7 +101,8 @@ module FeedTools
101
101
  @cache_object = nil
102
102
  @http_headers = nil
103
103
  @xml_doc = nil
104
- @xml_data = nil
104
+ @feed_data = nil
105
+ @feed_data_type = nil
105
106
  @root_node = nil
106
107
  @channel_node = nil
107
108
  @url = nil
@@ -134,6 +135,11 @@ module FeedTools
134
135
  options.keys)
135
136
  options = { :cache_only => false }.merge(options)
136
137
 
138
+ if options[:cache_only] && FeedTools.feed_cache.nil?
139
+ raise(ArgumentError, "There is currently no caching mechanism set. " +
140
+ "Cannot retrieve cached feeds.")
141
+ end
142
+
137
143
  # clean up the url
138
144
  url = FeedTools.normalize_url(url)
139
145
 
@@ -269,15 +275,28 @@ module FeedTools
269
275
 
270
276
  # Find out if we've already seen the url we've been
271
277
  # redirected to.
272
- cached_feed = FeedTools::Feed.open(new_location,
273
- :cache_only => true)
274
278
  found_redirect = false
275
- if cached_feed.cache_object.new_record? != true
276
- unless cached_feed.expired?
277
- self.url = cached_feed.url
278
- @live = false
279
- found_redirect = true
279
+ begin
280
+ cached_feed = FeedTools::Feed.open(new_location,
281
+ :cache_only => true)
282
+ if cached_feed.cache_object != nil &&
283
+ cached_feed.cache_object.new_record? != true
284
+ unless cached_feed.expired?
285
+ # Copy the cached state, starting with the url
286
+ self.url = cached_feed.url
287
+ self.title = cached_feed.title
288
+ self.link = cached_feed.link
289
+ self.feed_data = cached_feed.feed_data
290
+ self.feed_data_type = cached_feed.feed_data_type
291
+ self.last_retrieved = cached_feed.last_retrieved
292
+ self.http_headers = cached_feed.http_headers
293
+ self.cache_object = cached_feed.cache_object
294
+ @live = false
295
+ found_redirect = true
296
+ end
280
297
  end
298
+ rescue
299
+ # If anything goes wrong, ignore it.
281
300
  end
282
301
  unless found_redirect
283
302
  # TODO: deal with stupid people using relative urls
@@ -337,29 +356,29 @@ module FeedTools
337
356
  self.http_headers[header.first.downcase] = header.last
338
357
  end
339
358
  self.last_retrieved = Time.now
340
- self.xml_data = self.http_response.body
359
+ self.feed_data = self.http_response.body
341
360
  end
342
361
  rescue FeedAccessError
343
362
  @live = false
344
- if self.xml_data.nil?
363
+ if self.feed_data.nil?
345
364
  raise
346
365
  end
347
366
  rescue Timeout::Error
348
- # if we time out, do nothing, it should fall back to the xml_data
367
+ # if we time out, do nothing, it should fall back to the feed_data
349
368
  # stored in the cache.
350
369
  @live = false
351
- if self.xml_data.nil?
370
+ if self.feed_data.nil?
352
371
  raise
353
372
  end
354
373
  rescue Errno::ECONNRESET
355
374
  # if the connection gets reset by peer, oh well, fall back to the
356
- # xml_data stored in the cache
375
+ # feed_data stored in the cache
357
376
  @live = false
358
- if self.xml_data.nil?
377
+ if self.feed_data.nil?
359
378
  raise
360
379
  end
361
380
  rescue => error
362
- # heck, if anything at all bad happens, fall back to the xml_data
381
+ # heck, if anything at all bad happens, fall back to the feed_data
363
382
  # stored in the cache.
364
383
 
365
384
  # If we can, get the HTTPResponse...
@@ -381,7 +400,7 @@ module FeedTools
381
400
  end
382
401
  end
383
402
  @live = false
384
- if self.xml_data.nil?
403
+ if self.feed_data.nil?
385
404
  if error.respond_to?(:response) &&
386
405
  error.response.respond_to?(:response_chain)
387
406
  redirects = error.response.response_chain.map do |pair|
@@ -409,7 +428,8 @@ module FeedTools
409
428
  @http_response = nil
410
429
  @http_headers = {}
411
430
  self.last_retrieved = Time.now
412
- self.xml_data = file.read
431
+ self.feed_data = file.read
432
+ self.feed_data_type = :xml
413
433
  end
414
434
  rescue
415
435
  @live = false
@@ -437,35 +457,60 @@ module FeedTools
437
457
  return @http_headers
438
458
  end
439
459
 
440
- # Returns the feed's raw xml data.
441
- def xml_data
442
- if @xml_data.nil?
460
+ # Returns the feed's raw data.
461
+ def feed_data
462
+ if @feed_data.nil?
443
463
  unless self.cache_object.nil?
444
- @xml_data = self.cache_object.xml_data
464
+ @feed_data = self.cache_object.feed_data
445
465
  end
446
466
  end
447
- return @xml_data
467
+ return @feed_data
448
468
  end
449
469
 
450
- # Sets the feed's xml data.
451
- def xml_data=(new_xml_data)
452
- @xml_data = new_xml_data
470
+ # Sets the feed's data.
471
+ def feed_data=(new_feed_data)
472
+ @feed_data = new_feed_data
473
+ unless self.cache_object.nil?
474
+ self.cache_object.feed_data = new_feed_data
475
+ end
476
+ end
477
+
478
+ # Returns the data type of the feed
479
+ # Possible values:
480
+ # * :xml
481
+ # * :yaml
482
+ # * :text
483
+ def feed_data_type
484
+ if @feed_data_type.nil?
485
+ # Right now, nothing else is supported
486
+ @feed_data_type = :xml
487
+ end
488
+ return @feed_data_type
489
+ end
490
+
491
+ # Sets the feed's data type.
492
+ def feed_data_type=(new_feed_data_type)
493
+ @feed_data_type = new_feed_data_type
453
494
  unless self.cache_object.nil?
454
- self.cache_object.xml_data = new_xml_data
495
+ self.cache_object.feed_data_type = new_feed_data_type
455
496
  end
456
497
  end
457
498
 
458
- # Returns a REXML Document of the xml_data
499
+ # Returns a REXML Document of the feed_data
459
500
  def xml
460
- if @xml_doc.nil?
461
- begin
462
- # TODO: :ignore_whitespace_nodes => :all
463
- # Add that?
464
- # ======================================
465
- @xml_doc = Document.new(xml_data)
466
- rescue
467
- # Something failed, attempt to repair the xml with htree.
468
- @xml_doc = HTree.parse(xml_data).to_rexml
501
+ if self.feed_data_type != :xml
502
+ @xml_doc = nil
503
+ else
504
+ if @xml_doc.nil?
505
+ begin
506
+ # TODO: :ignore_whitespace_nodes => :all
507
+ # Add that?
508
+ # ======================================
509
+ @xml_doc = Document.new(feed_data)
510
+ rescue
511
+ # Something failed, attempt to repair the xml with htree.
512
+ @xml_doc = HTree.parse(feed_data).to_rexml
513
+ end
469
514
  end
470
515
  end
471
516
  return @xml_doc
@@ -539,8 +584,10 @@ module FeedTools
539
584
  # title=
540
585
  # link
541
586
  # link=
542
- # xml_data
543
- # xml_data=
587
+ # feed_data
588
+ # feed_data=
589
+ # feed_data_type
590
+ # feed_data_type=
544
591
  # etag
545
592
  # etag=
546
593
  # last_modified
@@ -657,7 +704,7 @@ module FeedTools
657
704
 
658
705
  # Returns the feed url.
659
706
  def url
660
- if @url.nil? && self.xml_data != nil
707
+ if @url.nil? && self.feed_data != nil
661
708
  @url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
662
709
  @url = nil if @url == ""
663
710
  end
@@ -1675,7 +1722,8 @@ module FeedTools
1675
1722
  if raw_items != nil
1676
1723
  for item_node in raw_items
1677
1724
  new_item = FeedItem.new
1678
- new_item.xml_data = item_node.to_s
1725
+ new_item.feed_data = item_node.to_s
1726
+ new_item.feed_data_type = self.feed_data_type
1679
1727
  new_item.feed = self
1680
1728
  @items << new_item
1681
1729
  end
@@ -1748,7 +1796,7 @@ module FeedTools
1748
1796
 
1749
1797
  # Forces this feed to expire.
1750
1798
  def expire!
1751
- self.last_retrieved = Time.mktime(1970)
1799
+ self.last_retrieved = Time.mktime(1970).gmtime
1752
1800
  self.save
1753
1801
  end
1754
1802
 
@@ -2014,15 +2062,16 @@ module FeedTools
2014
2062
  raise "Caching is currently disabled. Cannot save to cache."
2015
2063
  elsif self.url.nil?
2016
2064
  raise "The url field must be set to save to the cache."
2017
- elsif self.xml_data.nil?
2018
- raise "The xml_data field must be set to save to the cache."
2019
2065
  elsif self.cache_object.nil?
2020
2066
  raise "The cache_object is currently nil. Cannot save to cache."
2021
2067
  else
2022
2068
  self.cache_object.url = self.url
2023
- self.cache_object.title = self.title
2024
- self.cache_object.link = self.link
2025
- self.cache_object.xml_data = self.xml_data
2069
+ unless self.feed_data.nil?
2070
+ self.cache_object.title = self.title
2071
+ self.cache_object.link = self.link
2072
+ self.cache_object.feed_data = self.feed_data
2073
+ self.cache_object.feed_data_type = self.feed_data_type.to_s
2074
+ end
2026
2075
  unless self.http_response.nil?
2027
2076
  self.cache_object.http_headers = self.http_headers.to_yaml
2028
2077
  end
@@ -126,6 +126,8 @@ module FeedTools
126
126
  def initialize
127
127
  super
128
128
  @feed = nil
129
+ @feed_data = nil
130
+ @feed_data_type = nil
129
131
  @xml_doc = nil
130
132
  @root_node = nil
131
133
  @title = nil
@@ -143,23 +145,37 @@ module FeedTools
143
145
  @feed = new_feed
144
146
  end
145
147
 
146
- # Returns the feed item's raw xml data.
147
- def xml_data
148
- return @xml_data
148
+ # Returns the feed item's raw data.
149
+ def feed_data
150
+ return @feed_data
149
151
  end
150
152
 
151
- # Sets the feed item's xml data.
152
- def xml_data=(new_xml_data)
153
- @xml_data = new_xml_data
153
+ # Sets the feed item's data.
154
+ def feed_data=(new_feed_data)
155
+ @feed_data = new_feed_data
154
156
  end
155
157
 
156
- # Returns a REXML Document of the xml_data
158
+ # Returns the feed item's data type.
159
+ def feed_data_type
160
+ return @feed_data_type
161
+ end
162
+
163
+ # Sets the feed item's data type.
164
+ def feed_data_type=(new_feed_data_type)
165
+ @feed_data_type = new_feed_data_type
166
+ end
167
+
168
+ # Returns a REXML Document of the feed_data
157
169
  def xml
158
- if @xml_doc.nil?
159
- # TODO: :ignore_whitespace_nodes => :all
160
- # Add that?
161
- # ======================================
162
- @xml_doc = Document.new(xml_data)
170
+ if self.feed_data_type != :xml
171
+ @xml_doc = nil
172
+ else
173
+ if @xml_doc.nil?
174
+ # TODO: :ignore_whitespace_nodes => :all
175
+ # Add that?
176
+ # ======================================
177
+ @xml_doc = Document.new(self.feed_data)
178
+ end
163
179
  end
164
180
  return @xml_doc
165
181
  end
data/rakefile CHANGED
@@ -7,7 +7,7 @@ require 'rake/gempackagetask'
7
7
  require 'rake/contrib/rubyforgepublisher'
8
8
 
9
9
  PKG_NAME = 'feedtools'
10
- PKG_VERSION = '0.2.11'
10
+ PKG_VERSION = '0.2.12'
11
11
  PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
12
12
 
13
13
  RELEASE_NAME = "REL #{PKG_VERSION}"
@@ -456,7 +456,7 @@ class AmpTest < Test::Unit::TestCase
456
456
 
457
457
  def test_amp_65
458
458
  feed = FeedTools::Feed.new
459
- feed.xml_data = <<-FEED
459
+ feed.feed_data = <<-FEED
460
460
  <feed version="0.3">
461
461
  <title>&lt;strong>1 &amp;amp; 2 &amp; 3&lt;/strong></title>
462
462
  <tagline>&lt;strong>1 &amp;amp; 2 &amp; 3&lt;/strong></tagline>
@@ -7,7 +7,7 @@ class CacheTest < Test::Unit::TestCase
7
7
  end
8
8
 
9
9
  def test_database_connection
10
- # turn the cache on for this test
10
+ # Ensure the cache is on for this test
11
11
  FeedTools.feed_cache = FeedTools::DatabaseFeedCache
12
12
 
13
13
  unless FeedTools.feed_cache.nil?
@@ -16,8 +16,41 @@ class CacheTest < Test::Unit::TestCase
16
16
  else
17
17
  puts "\nSkipping cache test since the cache is still disabled.\n"
18
18
  end
19
-
20
- # turn the cache back off
19
+ end
20
+
21
+ def test_redirects_when_cache_disabled
22
+ # Turn the cache off for this test
21
23
  FeedTools.feed_cache = nil
24
+
25
+ # We just want to make sure there's no exception
26
+ slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
27
+
28
+ # Turn the cache back on
29
+ FeedTools.feed_cache = FeedTools::DatabaseFeedCache
30
+ end
31
+
32
+ def test_redirects_when_cache_enabled
33
+ # Ensure the cache is on for this test
34
+ FeedTools.feed_cache = FeedTools::DatabaseFeedCache
35
+
36
+ slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
37
+ assert(slashdot_feed.feed_data != nil, "No content retrieved.")
38
+ slashdot_feed.expire!
39
+ assert_equal(true, slashdot_feed.expired?)
40
+ slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
41
+ assert(slashdot_feed.feed_data != nil, "No content retrieved.")
42
+ assert_equal(true, slashdot_feed.live?)
43
+ slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
44
+ assert(slashdot_feed.feed_data != nil, "No content retrieved.")
45
+ assert_equal(false, slashdot_feed.live?)
46
+ slashdot_feed.expire!
47
+ slashdot_feed.expire!
48
+ slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
49
+ assert(slashdot_feed.feed_data != nil, "No content retrieved.")
50
+ assert_equal(true, slashdot_feed.live?)
51
+ FeedTools::Feed.open(slashdot_feed.url)
52
+
53
+ entries = FeedTools::DatabaseFeedCache.find_all_by_url(slashdot_feed.url)
54
+ assert_equal(1, entries.size)
22
55
  end
23
56
  end
@@ -8,7 +8,7 @@ class CdfTest < Test::Unit::TestCase
8
8
 
9
9
  def test_feed_title
10
10
  feed = FeedTools::Feed.new
11
- feed.xml_data = <<-FEED
11
+ feed.feed_data = <<-FEED
12
12
  <CHANNEL>
13
13
  <TITLE>Example Title</TITLE>
14
14
  </CHANNEL>
@@ -36,7 +36,7 @@ class CdfTest < Test::Unit::TestCase
36
36
 
37
37
  def test_feed_href
38
38
  feed = FeedTools::Feed.new
39
- feed.xml_data = <<-FEED
39
+ feed.feed_data = <<-FEED
40
40
  <CHANNEL HREF="http://www.example.com/">
41
41
  </CHANNEL>
42
42
  FEED
@@ -54,7 +54,7 @@ class CdfTest < Test::Unit::TestCase
54
54
 
55
55
  def test_feed_images
56
56
  feed = FeedTools::Feed.new
57
- feed.xml_data = <<-FEED
57
+ feed.feed_data = <<-FEED
58
58
  <CHANNEL>
59
59
  <LOGO HREF="http://www.example.com/exampleicon.gif" STYLE="ICON" />
60
60
  <LOGO HREF="http://www.example.com/exampleimage.gif" STYLE="IMAGE" />
@@ -100,7 +100,7 @@ class CdfTest < Test::Unit::TestCase
100
100
 
101
101
  def test_feed_item_images
102
102
  feed = FeedTools::Feed.new
103
- feed.xml_data = <<-FEED
103
+ feed.feed_data = <<-FEED
104
104
  <CHANNEL>
105
105
  <ITEM HREF="http://www.example.com/item">
106
106
  <LOGO HREF="http://www.example.com/exampleicon.gif" STYLE="ICON" />
@@ -8,7 +8,7 @@ class NonStandardTest < Test::Unit::TestCase
8
8
 
9
9
  def test_xss_strict
10
10
  feed = FeedTools::Feed.new
11
- feed.xml_data = <<-FEED
11
+ feed.feed_data = <<-FEED
12
12
  <?xml version="1.0" encoding="iso-8859-1"?>
13
13
  <rss version="2.0/XSS-strict">
14
14
  <channel>
@@ -66,7 +66,7 @@ class NonStandardTest < Test::Unit::TestCase
66
66
  def test_rss_30_lite
67
67
  # Delusions of grandeur...
68
68
  feed = FeedTools::Feed.new
69
- feed.xml_data = <<-FEED
69
+ feed.feed_data = <<-FEED
70
70
  <?xml version="1.0" encoding="UTF-8"?>
71
71
  <rss version="3.0" type="lite"
72
72
  source="http://www.rss3.org/files/liteSample.rss">
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: feedtools
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.11
6
+ version: 0.2.12
7
7
  date: 2005-09-27 00:00:00 -04:00
8
8
  summary: "Parsing, generation, and caching system for xml news feeds."
9
9
  require_paths: