feedtools 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/CHANGELOG +3 -0
  2. data/lib/feed_tools.rb +369 -296
  3. data/rakefile +1 -1
  4. metadata +2 -2
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ == FeedTools 0.2.6
2
+ * Added support for merging feeds
3
+ * Vastly improved handling of feed creation
1
4
  == FeedTools 0.2.5
2
5
  * fixed multiple rows being created in the cache after a 301 redirection
3
6
  * fixed broken table creation for postgresql and sqlite
data/lib/feed_tools.rb CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
25
25
  ENV['RAILS_ENV'] ||
26
26
  'production' # :nodoc:
27
27
 
28
- FEED_TOOLS_VERSION = "0.2.5"
28
+ FEED_TOOLS_VERSION = "0.2.6"
29
29
 
30
30
  $:.unshift(File.dirname(__FILE__))
31
31
  $:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
@@ -101,8 +101,9 @@ module FeedTools
101
101
  begin
102
102
  possible_config_files = [
103
103
  "./config/database.yml",
104
- "../database.yml",
105
- "./database.yml"
104
+ "../config/database.yml",
105
+ "./database.yml",
106
+ "../database.yml"
106
107
  ]
107
108
  database_config_file = nil
108
109
  for file in possible_config_files
@@ -560,6 +561,7 @@ module FeedTools
560
561
 
561
562
  # Escapes all html entities
562
563
  def FeedTools.escape_entities(html)
564
+ return nil if html.nil?
563
565
  escaped_html = CGI.escapeHTML(html)
564
566
  unescaped_html.gsub!(/'/, "'")
565
567
  unescaped_html.gsub!(/"/, """)
@@ -568,6 +570,7 @@ module FeedTools
568
570
 
569
571
  # Unescapes all html entities
570
572
  def FeedTools.unescape_entities(html)
573
+ return nil if html.nil?
571
574
  unescaped_html = html
572
575
  unescaped_html.gsub!(/&/, "&")
573
576
  unescaped_html.gsub!(/&/, "&")
@@ -579,6 +582,7 @@ module FeedTools
579
582
 
580
583
  # Removes all html tags from the html formatted text.
581
584
  def FeedTools.strip_html(html)
585
+ return nil if html.nil?
582
586
  # TODO: do this properly
583
587
  # ======================
584
588
  stripped_html = html.gsub(/<\/?[^>]+>/, "")
@@ -587,6 +591,7 @@ module FeedTools
587
591
 
588
592
  # Tidys up the html
589
593
  def FeedTools.tidy_html(html)
594
+ return nil if html.nil?
590
595
  if FeedTools.tidy_enabled?
591
596
  is_fragment = true
592
597
  html.gsub!(/&lt;!'/, "&amp;lt;!'")
@@ -628,6 +633,7 @@ module FeedTools
628
633
  # elements and all children will be removed entirely.
629
634
  # Dangerous or unknown attributes are always removed.
630
635
  def FeedTools.sanitize_html(html, mode=:strip)
636
+ return nil if html.nil?
631
637
 
632
638
  # Lists borrowed from Mark Pilgrim's feedparser
633
639
  acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
@@ -688,6 +694,21 @@ module FeedTools
688
694
  html = html_doc.root.inner_xml
689
695
  return html
690
696
  end
697
+
698
+ # Creates a merged "planet" feed from a set of urls.
699
+ def FeedTools.build_merged_feed(url_array)
700
+ return nil if url_array.nil?
701
+ merged_feed = Feed.new
702
+ url_array.each do |feed_url|
703
+ feed = Feed.open(feed_url)
704
+ merged_feed.entries.concat(
705
+ feed.entries.collect do |entry|
706
+ entry.title = "#{feed.title}: #{entry.title}"
707
+ entry
708
+ end )
709
+ end
710
+ return merged_feed
711
+ end
691
712
 
692
713
  class Feed
693
714
  include REXML # :nodoc:
@@ -1089,7 +1110,7 @@ module FeedTools
1089
1110
 
1090
1111
  # Returns the channel node of the feed.
1091
1112
  def channel_node
1092
- if @channel_node.nil?
1113
+ if @channel_node.nil? && root_node != nil
1093
1114
  @channel_node = XPath.first(root_node, "channel")
1094
1115
  if @channel_node == nil
1095
1116
  @channel_node = XPath.first(root_node, "CHANNEL")
@@ -1225,9 +1246,19 @@ module FeedTools
1225
1246
  # Returns the feed's unique id
1226
1247
  def id
1227
1248
  if @id.nil?
1228
- @id = XPath.first(root_node, "id/text()").to_s
1229
- if @id == ""
1230
- @id = XPath.first(root_node, "guid/text()").to_s
1249
+ unless channel_node.nil?
1250
+ @id = XPath.first(channel_node, "id/text()").to_s
1251
+ if @id == ""
1252
+ @id = XPath.first(channel_node, "guid/text()").to_s
1253
+ end
1254
+ end
1255
+ unless root_node.nil?
1256
+ if @id == "" || @id.nil?
1257
+ @id = XPath.first(root_node, "id/text()").to_s
1258
+ end
1259
+ if @id == ""
1260
+ @id = XPath.first(root_node, "guid/text()").to_s
1261
+ end
1231
1262
  end
1232
1263
  @id = nil if @id == ""
1233
1264
  end
@@ -1257,13 +1288,15 @@ module FeedTools
1257
1288
  # Returns the feed title
1258
1289
  def title
1259
1290
  if @title.nil?
1260
- repair_entities = false
1261
- title_node = XPath.first(channel_node, "title")
1262
- if title_node.nil?
1263
- title_node = XPath.first(channel_node, "dc:title")
1264
- end
1265
- if title_node.nil?
1266
- title_node = XPath.first(channel_node, "TITLE")
1291
+ unless channel_node.nil?
1292
+ repair_entities = false
1293
+ title_node = XPath.first(channel_node, "title")
1294
+ if title_node.nil?
1295
+ title_node = XPath.first(channel_node, "dc:title")
1296
+ end
1297
+ if title_node.nil?
1298
+ title_node = XPath.first(channel_node, "TITLE")
1299
+ end
1267
1300
  end
1268
1301
  if title_node.nil?
1269
1302
  return nil
@@ -1304,41 +1337,43 @@ module FeedTools
1304
1337
  # Returns the feed description
1305
1338
  def description
1306
1339
  if @description.nil?
1307
- repair_entities = false
1308
- description_node = XPath.first(channel_node, "description")
1309
- if description_node.nil?
1310
- description_node = XPath.first(channel_node, "tagline")
1311
- end
1312
- if description_node.nil?
1313
- description_node = XPath.first(channel_node, "subtitle")
1314
- end
1315
- if description_node.nil?
1316
- description_node = XPath.first(channel_node, "summary")
1317
- end
1318
- if description_node.nil?
1319
- description_node = XPath.first(channel_node, "abstract")
1320
- end
1321
- if description_node.nil?
1322
- description_node = XPath.first(channel_node, "ABSTRACT")
1323
- end
1324
- if description_node.nil?
1325
- description_node = XPath.first(channel_node, "info")
1326
- end
1327
- if description_node.nil?
1328
- description_node = XPath.first(channel_node, "content:encoded")
1329
- @bozo = true unless description_node.nil?
1330
- end
1331
- if description_node.nil?
1332
- description_node = XPath.first(channel_node, "content")
1333
- @bozo = true unless description_node.nil?
1334
- end
1335
- if description_node.nil?
1336
- description_node = XPath.first(channel_node, "xhtml:body")
1337
- @bozo = true unless description_node.nil?
1338
- end
1339
- if description_node.nil?
1340
- description_node = XPath.first(channel_node, "body")
1341
- @bozo = true unless description_node.nil?
1340
+ unless channel_node.nil?
1341
+ repair_entities = false
1342
+ description_node = XPath.first(channel_node, "description")
1343
+ if description_node.nil?
1344
+ description_node = XPath.first(channel_node, "tagline")
1345
+ end
1346
+ if description_node.nil?
1347
+ description_node = XPath.first(channel_node, "subtitle")
1348
+ end
1349
+ if description_node.nil?
1350
+ description_node = XPath.first(channel_node, "summary")
1351
+ end
1352
+ if description_node.nil?
1353
+ description_node = XPath.first(channel_node, "abstract")
1354
+ end
1355
+ if description_node.nil?
1356
+ description_node = XPath.first(channel_node, "ABSTRACT")
1357
+ end
1358
+ if description_node.nil?
1359
+ description_node = XPath.first(channel_node, "info")
1360
+ end
1361
+ if description_node.nil?
1362
+ description_node = XPath.first(channel_node, "content:encoded")
1363
+ @bozo = true unless description_node.nil?
1364
+ end
1365
+ if description_node.nil?
1366
+ description_node = XPath.first(channel_node, "content")
1367
+ @bozo = true unless description_node.nil?
1368
+ end
1369
+ if description_node.nil?
1370
+ description_node = XPath.first(channel_node, "xhtml:body")
1371
+ @bozo = true unless description_node.nil?
1372
+ end
1373
+ if description_node.nil?
1374
+ description_node = XPath.first(channel_node, "body")
1375
+ @bozo = true unless description_node.nil?
1376
+ end
1342
1377
  end
1343
1378
  if description_node.nil?
1344
1379
  return nil
@@ -1392,8 +1427,16 @@ module FeedTools
1392
1427
  # Returns the contents of the itunes:summary element
1393
1428
  def itunes_summary
1394
1429
  if @itunes_summary.nil?
1395
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
1396
- "itunes:summary/text()").to_s)
1430
+ unless channel_node.nil?
1431
+ @itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
1432
+ "itunes:summary/text()").to_s)
1433
+ end
1434
+ unless root_node.nil?
1435
+ if @itunes_summary == "" || @itunes_summary.nil?
1436
+ @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
1437
+ "itunes:summary/text()").to_s)
1438
+ end
1439
+ end
1397
1440
  if @itunes_summary == ""
1398
1441
  @itunes_summary = nil
1399
1442
  end
@@ -1411,8 +1454,16 @@ module FeedTools
1411
1454
  # Returns the contents of the itunes:subtitle element
1412
1455
  def itunes_subtitle
1413
1456
  if @itunes_subtitle.nil?
1414
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
1415
- "itunes:subtitle/text()").to_s)
1457
+ unless channel_node.nil?
1458
+ @itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
1459
+ "itunes:subtitle/text()").to_s)
1460
+ end
1461
+ unless root_node.nil?
1462
+ if @itunes_subtitle == "" || @itunes_subtitle.nil?
1463
+ @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
1464
+ "itunes:subtitle/text()").to_s)
1465
+ end
1466
+ end
1416
1467
  if @itunes_subtitle == ""
1417
1468
  @itunes_subtitle = nil
1418
1469
  end
@@ -1431,35 +1482,37 @@ module FeedTools
1431
1482
  # Returns the feed link
1432
1483
  def link
1433
1484
  if @link.nil?
1434
- # get the feed link from the xml document
1435
- @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
1436
- if @link == ""
1437
- @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
1438
- end
1439
- if @link == ""
1440
- @link = XPath.first(channel_node, "link/@href").to_s
1441
- end
1442
- if @link == ""
1443
- @link = XPath.first(channel_node, "link/text()").to_s
1444
- end
1445
- if @link == ""
1446
- @link = XPath.first(channel_node, "@href").to_s
1447
- end
1448
- if @link == ""
1449
- @link = XPath.first(channel_node, "@HREF").to_s
1450
- end
1451
- if @link == ""
1452
- @link = XPath.first(channel_node, "a/@href").to_s
1453
- end
1454
- if @link == ""
1455
- @link = XPath.first(channel_node, "A/@HREF").to_s
1485
+ unless channel_node.nil?
1486
+ # get the feed link from the xml document
1487
+ @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
1488
+ if @link == ""
1489
+ @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
1490
+ end
1491
+ if @link == ""
1492
+ @link = XPath.first(channel_node, "link/@href").to_s
1493
+ end
1494
+ if @link == ""
1495
+ @link = XPath.first(channel_node, "link/text()").to_s
1496
+ end
1497
+ if @link == ""
1498
+ @link = XPath.first(channel_node, "@href").to_s
1499
+ end
1500
+ if @link == ""
1501
+ @link = XPath.first(channel_node, "@HREF").to_s
1502
+ end
1503
+ if @link == ""
1504
+ @link = XPath.first(channel_node, "a/@href").to_s
1505
+ end
1506
+ if @link == ""
1507
+ @link = XPath.first(channel_node, "A/@HREF").to_s
1508
+ end
1456
1509
  end
1457
- if @link == ""
1510
+ if @link == "" || @link.nil?
1458
1511
  if FeedTools.is_url? self.guid
1459
1512
  @link = self.guid
1460
1513
  end
1461
1514
  end
1462
- if @link == ""
1515
+ if @link == "" && channel_node != nil
1463
1516
  # Technically, we shouldn't use the base attribute for this, but if the href attribute
1464
1517
  # is missing, it's already a given that we're looking at a messed up CDF file. We can
1465
1518
  # always pray it's correct.
@@ -1755,53 +1808,55 @@ module FeedTools
1755
1808
  def images
1756
1809
  if @images.nil?
1757
1810
  @images = []
1758
- image_nodes = XPath.match(channel_node, "image")
1759
- if image_nodes.nil? || image_nodes.empty?
1760
- image_nodes = XPath.match(channel_node, "link")
1761
- end
1762
- if image_nodes.nil? || image_nodes.empty?
1763
- image_nodes = XPath.match(channel_node, "logo")
1764
- end
1765
- if image_nodes.nil? || image_nodes.empty?
1766
- image_nodes = XPath.match(channel_node, "LOGO")
1767
- end
1768
- unless image_nodes.nil?
1769
- for image_node in image_nodes
1770
- image = FeedTools::Feed::Image.new
1771
- image.url = XPath.first(image_node, "url/text()").to_s
1772
- if image.url == ""
1773
- image.url = XPath.first(image_node, "@rdf:resource").to_s
1774
- end
1775
- if image.url == "" && (image_node.name == "logo" ||
1776
- (image_node.attributes['type'] =~ /^image/) == 0)
1777
- image.url = XPath.first(image_node, "@href").to_s
1778
- end
1779
- if image.url == "" && image_node.name == "LOGO"
1780
- image.url = XPath.first(image_node, "@HREF").to_s
1781
- end
1782
- image.url.strip! unless image.url.nil?
1783
- image.url = nil if image.url == ""
1784
- image.title = XPath.first(image_node, "title/text()").to_s
1785
- image.title.strip! unless image.title.nil?
1786
- image.title = nil if image.title == ""
1787
- image.description =
1788
- XPath.first(image_node, "description/text()").to_s
1789
- image.description.strip! unless image.description.nil?
1790
- image.description = nil if image.description == ""
1791
- image.link = XPath.first(image_node, "link/text()").to_s
1792
- image.link.strip! unless image.link.nil?
1793
- image.link = nil if image.link == ""
1794
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
1795
- image.height = nil if image.height <= 0
1796
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
1797
- image.width = nil if image.width <= 0
1798
- image.style = XPath.first(image_node, "@style").to_s.downcase
1799
- if image.style == ""
1800
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1811
+ unless channel_node.nil?
1812
+ image_nodes = XPath.match(channel_node, "image")
1813
+ if image_nodes.nil? || image_nodes.empty?
1814
+ image_nodes = XPath.match(channel_node, "link")
1815
+ end
1816
+ if image_nodes.nil? || image_nodes.empty?
1817
+ image_nodes = XPath.match(channel_node, "logo")
1818
+ end
1819
+ if image_nodes.nil? || image_nodes.empty?
1820
+ image_nodes = XPath.match(channel_node, "LOGO")
1821
+ end
1822
+ unless image_nodes.nil?
1823
+ for image_node in image_nodes
1824
+ image = FeedTools::Feed::Image.new
1825
+ image.url = XPath.first(image_node, "url/text()").to_s
1826
+ if image.url == ""
1827
+ image.url = XPath.first(image_node, "@rdf:resource").to_s
1828
+ end
1829
+ if image.url == "" && (image_node.name == "logo" ||
1830
+ (image_node.attributes['type'] =~ /^image/) == 0)
1831
+ image.url = XPath.first(image_node, "@href").to_s
1832
+ end
1833
+ if image.url == "" && image_node.name == "LOGO"
1834
+ image.url = XPath.first(image_node, "@HREF").to_s
1835
+ end
1836
+ image.url.strip! unless image.url.nil?
1837
+ image.url = nil if image.url == ""
1838
+ image.title = XPath.first(image_node, "title/text()").to_s
1839
+ image.title.strip! unless image.title.nil?
1840
+ image.title = nil if image.title == ""
1841
+ image.description =
1842
+ XPath.first(image_node, "description/text()").to_s
1843
+ image.description.strip! unless image.description.nil?
1844
+ image.description = nil if image.description == ""
1845
+ image.link = XPath.first(image_node, "link/text()").to_s
1846
+ image.link.strip! unless image.link.nil?
1847
+ image.link = nil if image.link == ""
1848
+ image.height = XPath.first(image_node, "height/text()").to_s.to_i
1849
+ image.height = nil if image.height <= 0
1850
+ image.width = XPath.first(image_node, "width/text()").to_s.to_i
1851
+ image.width = nil if image.width <= 0
1852
+ image.style = XPath.first(image_node, "@style").to_s.downcase
1853
+ if image.style == ""
1854
+ image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1855
+ end
1856
+ image.style.strip! unless image.style.nil?
1857
+ image.style = nil if image.style == ""
1858
+ @images << image
1801
1859
  end
1802
- image.style.strip! unless image.style.nil?
1803
- image.style = nil if image.style == ""
1804
- @images << image
1805
1860
  end
1806
1861
  end
1807
1862
  end
@@ -1855,86 +1910,88 @@ module FeedTools
1855
1910
  # Returns the number of seconds before the feed should expire
1856
1911
  def time_to_live
1857
1912
  if @time_to_live.nil?
1858
- # get the feed time to live from the xml document
1859
- update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
1860
- if update_frequency != ""
1861
- update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
1862
- if update_period == "daily"
1863
- @time_to_live = update_frequency.to_i.day
1864
- elsif update_period == "weekly"
1865
- @time_to_live = update_frequency.to_i.week
1866
- elsif update_period == "monthly"
1867
- @time_to_live = update_frequency.to_i.month
1868
- elsif update_period == "yearly"
1869
- @time_to_live = update_frequency.to_i.year
1870
- else
1871
- # hourly
1872
- @time_to_live = update_frequency.to_i.hour
1913
+ unless channel_node.nil?
1914
+ # get the feed time to live from the xml document
1915
+ update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
1916
+ if update_frequency != ""
1917
+ update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
1918
+ if update_period == "daily"
1919
+ @time_to_live = update_frequency.to_i.day
1920
+ elsif update_period == "weekly"
1921
+ @time_to_live = update_frequency.to_i.week
1922
+ elsif update_period == "monthly"
1923
+ @time_to_live = update_frequency.to_i.month
1924
+ elsif update_period == "yearly"
1925
+ @time_to_live = update_frequency.to_i.year
1926
+ else
1927
+ # hourly
1928
+ @time_to_live = update_frequency.to_i.hour
1929
+ end
1930
+ end
1931
+ if @time_to_live.nil?
1932
+ # usually expressed in minutes
1933
+ update_frequency = XPath.first(channel_node, "ttl/text()").to_s
1934
+ if update_frequency != ""
1935
+ update_span = XPath.first(channel_node, "ttl/@span").to_s
1936
+ if update_span == "seconds"
1937
+ @time_to_live = update_frequency.to_i
1938
+ elsif update_span == "minutes"
1939
+ @time_to_live = update_frequency.to_i.minute
1940
+ elsif update_span == "hours"
1941
+ @time_to_live = update_frequency.to_i.hour
1942
+ elsif update_span == "days"
1943
+ @time_to_live = update_frequency.to_i.day
1944
+ elsif update_span == "weeks"
1945
+ @time_to_live = update_frequency.to_i.week
1946
+ elsif update_span == "months"
1947
+ @time_to_live = update_frequency.to_i.month
1948
+ elsif update_span == "years"
1949
+ @time_to_live = update_frequency.to_i.year
1950
+ elsif update_frequency.to_i >= 3000
1951
+ # Normally, this should default to minutes, but realistically,
1952
+ # if they meant minutes, you're rarely going to see a value higher
1953
+ # than 120. If we see >= 3000, we're either dealing with a stupid
1954
+ # pseudo-spec that decided to use seconds, or we're looking at
1955
+ # someone who only has weekly updated content. Worst case, we
1956
+ # misreport the time, and we update too often. Best case, we
1957
+ # avoid accidentally updating the feed only once a year. In the
1958
+ # interests of being pragmatic, and since the problem we avoid
1959
+ # is a far greater one than the one we cause, just run the check
1960
+ # and hope no one actually gets hurt.
1961
+ @time_to_live = update_frequency.to_i
1962
+ else
1963
+ @time_to_live = update_frequency.to_i.minute
1964
+ end
1965
+ end
1873
1966
  end
1874
- end
1875
- end
1876
- if @time_to_live.nil?
1877
- # usually expressed in minutes
1878
- update_frequency = XPath.first(channel_node, "ttl/text()").to_s
1879
- if update_frequency != ""
1880
- update_span = XPath.first(channel_node, "ttl/@span").to_s
1881
- if update_span == "seconds"
1882
- @time_to_live = update_frequency.to_i
1883
- elsif update_span == "minutes"
1884
- @time_to_live = update_frequency.to_i.minute
1885
- elsif update_span == "hours"
1886
- @time_to_live = update_frequency.to_i.hour
1887
- elsif update_span == "days"
1888
- @time_to_live = update_frequency.to_i.day
1889
- elsif update_span == "weeks"
1890
- @time_to_live = update_frequency.to_i.week
1891
- elsif update_span == "months"
1892
- @time_to_live = update_frequency.to_i.month
1893
- elsif update_span == "years"
1894
- @time_to_live = update_frequency.to_i.year
1895
- elsif update_frequency.to_i >= 3000
1896
- # Normally, this should default to minutes, but realistically,
1897
- # if they meant minutes, you're rarely going to see a value higher
1898
- # than 120. If we see >= 3000, we're either dealing with a stupid
1899
- # pseudo-spec that decided to use seconds, or we're looking at
1900
- # someone who only has weekly updated content. Worst case, we
1901
- # misreport the time, and we update too often. Best case, we
1902
- # avoid accidentally updating the feed only once a year. In the
1903
- # interests of being pragmatic, and since the problem we avoid
1904
- # is a far greater one than the one we cause, just run the check
1905
- # and hope no one actually gets hurt.
1906
- @time_to_live = update_frequency.to_i
1907
- else
1908
- @time_to_live = update_frequency.to_i.minute
1967
+ if @time_to_live.nil?
1968
+ @time_to_live = 0
1969
+ update_frequency_days =
1970
+ XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1971
+ update_frequency_hours =
1972
+ XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1973
+ update_frequency_minutes =
1974
+ XPath.first(channel_node, "schedule/intervaltime/@min").to_s
1975
+ update_frequency_seconds =
1976
+ XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
1977
+ if update_frequency_days != ""
1978
+ @time_to_live = @time_to_live + update_frequency_days.to_i.day
1979
+ end
1980
+ if update_frequency_hours != ""
1981
+ @time_to_live = @time_to_live + update_frequency_hours.to_i.hour
1982
+ end
1983
+ if update_frequency_minutes != ""
1984
+ @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
1985
+ end
1986
+ if update_frequency_seconds != ""
1987
+ @time_to_live = @time_to_live + update_frequency_seconds.to_i
1988
+ end
1989
+ if @time_to_live == 0
1990
+ @time_to_live = 1.hour
1991
+ end
1909
1992
  end
1910
1993
  end
1911
1994
  end
1912
- if @time_to_live.nil?
1913
- @time_to_live = 0
1914
- update_frequency_days =
1915
- XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1916
- update_frequency_hours =
1917
- XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1918
- update_frequency_minutes =
1919
- XPath.first(channel_node, "schedule/intervaltime/@min").to_s
1920
- update_frequency_seconds =
1921
- XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
1922
- if update_frequency_days != ""
1923
- @time_to_live = @time_to_live + update_frequency_days.to_i.day
1924
- end
1925
- if update_frequency_hours != ""
1926
- @time_to_live = @time_to_live + update_frequency_hours.to_i.hour
1927
- end
1928
- if update_frequency_minutes != ""
1929
- @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
1930
- end
1931
- if update_frequency_seconds != ""
1932
- @time_to_live = @time_to_live + update_frequency_seconds.to_i
1933
- end
1934
- if @time_to_live == 0
1935
- @time_to_live = 1.hour
1936
- end
1937
- end
1938
1995
  if @time_to_live.nil? || @time_to_live == 0
1939
1996
  # Default to one hour
1940
1997
  @time_to_live = 1.hour
@@ -2009,17 +2066,19 @@ module FeedTools
2009
2066
  # Returns the feed language
2010
2067
  def language
2011
2068
  if @language.nil?
2012
- @language = XPath.first(channel_node, "language/text()").to_s
2013
- if @language == ""
2014
- @language = XPath.first(channel_node, "dc:language/text()").to_s
2015
- end
2016
- if @language == ""
2017
- @language = XPath.first(channel_node, "xml:lang/text()").to_s
2018
- end
2019
- if @language == ""
2020
- @language = XPath.first(root_node, "xml:lang/text()").to_s
2069
+ unless channel_node.nil?
2070
+ @language = XPath.first(channel_node, "language/text()").to_s
2071
+ if @language == ""
2072
+ @language = XPath.first(channel_node, "dc:language/text()").to_s
2073
+ end
2074
+ if @language == ""
2075
+ @language = XPath.first(channel_node, "xml:lang/text()").to_s
2076
+ end
2077
+ if @language == ""
2078
+ @language = XPath.first(root_node, "xml:lang/text()").to_s
2079
+ end
2021
2080
  end
2022
- if @language == ""
2081
+ if @language == "" || @language.nil?
2023
2082
  @language = "en-us"
2024
2083
  end
2025
2084
  @language = @language.downcase
@@ -2058,23 +2117,25 @@ module FeedTools
2058
2117
  # Returns the feed items
2059
2118
  def items
2060
2119
  if @items.nil?
2061
- raw_items = XPath.match(root_node, "item")
2062
- if raw_items == nil || raw_items == []
2063
- raw_items = XPath.match(channel_node, "item")
2064
- end
2065
- if raw_items == nil || raw_items == []
2066
- raw_items = XPath.match(channel_node, "ITEM")
2067
- end
2068
- if raw_items == nil || raw_items == []
2069
- raw_items = XPath.match(root_node, "ITEM")
2070
- end
2071
- if raw_items == nil || raw_items == []
2072
- raw_items = XPath.match(channel_node, "entry")
2073
- end
2074
- if raw_items == nil || raw_items == []
2075
- raw_items = XPath.match(root_node, "entry")
2120
+ unless root_node.nil?
2121
+ raw_items = XPath.match(root_node, "item")
2122
+ if raw_items == nil || raw_items == []
2123
+ raw_items = XPath.match(channel_node, "item")
2124
+ end
2125
+ if raw_items == nil || raw_items == []
2126
+ raw_items = XPath.match(channel_node, "ITEM")
2127
+ end
2128
+ if raw_items == nil || raw_items == []
2129
+ raw_items = XPath.match(root_node, "ITEM")
2130
+ end
2131
+ if raw_items == nil || raw_items == []
2132
+ raw_items = XPath.match(channel_node, "entry")
2133
+ end
2134
+ if raw_items == nil || raw_items == []
2135
+ raw_items = XPath.match(root_node, "entry")
2136
+ end
2076
2137
  end
2077
-
2138
+
2078
2139
  # create the individual feed items
2079
2140
  @items = []
2080
2141
  if raw_items != nil
@@ -2174,7 +2235,11 @@ module FeedTools
2174
2235
  if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
2175
2236
  # RDF-based rss format
2176
2237
  return xml_builder.tag!("rdf:RDF") do
2177
- xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
2238
+ channel_attributes = {}
2239
+ unless self.link.nil?
2240
+ channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
2241
+ end
2242
+ xml_builder.channel(channel_attributes) do
2178
2243
  unless title.nil? || title == ""
2179
2244
  xml_builder.title(title)
2180
2245
  else
@@ -2534,9 +2599,11 @@ module FeedTools
2534
2599
  # Returns the feed items's unique id
2535
2600
  def id
2536
2601
  if @id.nil?
2537
- @id = XPath.first(root_node, "id/text()").to_s
2538
- if @id == ""
2539
- @id = XPath.first(root_node, "guid/text()").to_s
2602
+ unless root_node.nil?
2603
+ @id = XPath.first(root_node, "id/text()").to_s
2604
+ if @id == ""
2605
+ @id = XPath.first(root_node, "guid/text()").to_s
2606
+ end
2540
2607
  end
2541
2608
  @id = nil if @id == ""
2542
2609
  end
@@ -2551,13 +2618,15 @@ module FeedTools
2551
2618
  # Returns the feed item title
2552
2619
  def title
2553
2620
  if @title.nil?
2554
- repair_entities = false
2555
- title_node = XPath.first(root_node, "title")
2556
- if title_node.nil?
2557
- title_node = XPath.first(root_node, "dc:title")
2558
- end
2559
- if title_node.nil?
2560
- title_node = XPath.first(root_node, "TITLE")
2621
+ unless root_node.nil?
2622
+ repair_entities = false
2623
+ title_node = XPath.first(root_node, "title")
2624
+ if title_node.nil?
2625
+ title_node = XPath.first(root_node, "dc:title")
2626
+ end
2627
+ if title_node.nil?
2628
+ title_node = XPath.first(root_node, "TITLE")
2629
+ end
2561
2630
  end
2562
2631
  if title_node.nil?
2563
2632
  return nil
@@ -2606,41 +2675,43 @@ module FeedTools
2606
2675
  # Returns the feed item description
2607
2676
  def description
2608
2677
  if @description.nil?
2609
- repair_entities = false
2610
- description_node = XPath.first(root_node, "description")
2611
- if description_node.nil?
2612
- description_node = XPath.first(root_node, "xhtml:body")
2613
- end
2614
- if description_node.nil?
2615
- description_node = XPath.first(root_node, "body")
2616
- end
2617
- if description_node.nil?
2618
- description_node = XPath.first(root_node, "tagline")
2619
- end
2620
- if description_node.nil?
2621
- description_node = XPath.first(root_node, "subtitle")
2622
- end
2623
- if description_node.nil?
2624
- description_node = XPath.first(root_node, "summary")
2625
- end
2626
- if description_node.nil?
2627
- description_node = XPath.first(root_node, "abstract")
2628
- end
2629
- if description_node.nil?
2630
- description_node = XPath.first(root_node, "ABSTRACT")
2631
- end
2632
- if description_node.nil?
2633
- description_node = XPath.first(root_node, "content:encoded")
2634
- end
2635
- if description_node.nil?
2636
- description_node = XPath.first(root_node, "content")
2637
- end
2638
- if description_node.nil?
2639
- description_node = XPath.first(root_node, "fullitem")
2640
- end
2641
- if description_node.nil?
2642
- description_node = XPath.first(root_node, "info")
2643
- @bozo = true unless description_node.nil?
2678
+ unless root_node.nil?
2679
+ repair_entities = false
2680
+ description_node = XPath.first(root_node, "description")
2681
+ if description_node.nil?
2682
+ description_node = XPath.first(root_node, "xhtml:body")
2683
+ end
2684
+ if description_node.nil?
2685
+ description_node = XPath.first(root_node, "body")
2686
+ end
2687
+ if description_node.nil?
2688
+ description_node = XPath.first(root_node, "tagline")
2689
+ end
2690
+ if description_node.nil?
2691
+ description_node = XPath.first(root_node, "subtitle")
2692
+ end
2693
+ if description_node.nil?
2694
+ description_node = XPath.first(root_node, "summary")
2695
+ end
2696
+ if description_node.nil?
2697
+ description_node = XPath.first(root_node, "abstract")
2698
+ end
2699
+ if description_node.nil?
2700
+ description_node = XPath.first(root_node, "ABSTRACT")
2701
+ end
2702
+ if description_node.nil?
2703
+ description_node = XPath.first(root_node, "content:encoded")
2704
+ end
2705
+ if description_node.nil?
2706
+ description_node = XPath.first(root_node, "content")
2707
+ end
2708
+ if description_node.nil?
2709
+ description_node = XPath.first(root_node, "fullitem")
2710
+ end
2711
+ if description_node.nil?
2712
+ description_node = XPath.first(root_node, "info")
2713
+ @bozo = true unless description_node.nil?
2714
+ end
2644
2715
  end
2645
2716
  if description_node.nil?
2646
2717
  return nil
@@ -2754,32 +2825,34 @@ module FeedTools
2754
2825
  # Returns the feed item link
2755
2826
  def link
2756
2827
  if @link.nil?
2757
- @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
2758
- if @link == ""
2759
- @link = XPath.first(root_node, "link/@href").to_s
2760
- end
2761
- if @link == ""
2762
- @link = XPath.first(root_node, "link/text()").to_s
2763
- end
2764
- if @link == ""
2765
- @link = XPath.first(root_node, "@rdf:about").to_s
2766
- end
2767
- if @link == ""
2768
- @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
2769
- end
2770
- if @link == ""
2771
- @link = XPath.first(root_node, "@href").to_s
2772
- end
2773
- if @link == ""
2774
- @link = XPath.first(root_node, "a/@href").to_s
2775
- end
2776
- if @link == ""
2777
- @link = XPath.first(root_node, "@HREF").to_s
2778
- end
2779
- if @link == ""
2780
- @link = XPath.first(root_node, "A/@HREF").to_s
2828
+ unless root_node.nil?
2829
+ @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
2830
+ if @link == ""
2831
+ @link = XPath.first(root_node, "link/@href").to_s
2832
+ end
2833
+ if @link == ""
2834
+ @link = XPath.first(root_node, "link/text()").to_s
2835
+ end
2836
+ if @link == ""
2837
+ @link = XPath.first(root_node, "@rdf:about").to_s
2838
+ end
2839
+ if @link == ""
2840
+ @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
2841
+ end
2842
+ if @link == ""
2843
+ @link = XPath.first(root_node, "@href").to_s
2844
+ end
2845
+ if @link == ""
2846
+ @link = XPath.first(root_node, "a/@href").to_s
2847
+ end
2848
+ if @link == ""
2849
+ @link = XPath.first(root_node, "@HREF").to_s
2850
+ end
2851
+ if @link == ""
2852
+ @link = XPath.first(root_node, "A/@HREF").to_s
2853
+ end
2781
2854
  end
2782
- if @link == ""
2855
+ if @link == "" || @link.nil?
2783
2856
  if FeedTools.is_url? self.guid
2784
2857
  @link = self.guid
2785
2858
  end
data/rakefile CHANGED
@@ -7,7 +7,7 @@ require 'rake/gempackagetask'
7
7
  require 'rake/contrib/rubyforgepublisher'
8
8
 
9
9
  PKG_NAME = 'feedtools'
10
- PKG_VERSION = '0.2.5'
10
+ PKG_VERSION = '0.2.6'
11
11
  PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
12
12
 
13
13
  RELEASE_NAME = "REL #{PKG_VERSION}"
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: feedtools
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.5
7
- date: 2005-08-19 00:00:00 -04:00
6
+ version: 0.2.6
7
+ date: 2005-09-01 00:00:00 -04:00
8
8
  summary: "Parsing, generation, and caching system for xml news feeds."
9
9
  require_paths:
10
10
  - lib