feedtools 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/lib/feed_tools.rb +369 -296
- data/rakefile +1 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.6"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -101,8 +101,9 @@ module FeedTools
|
|
101
101
|
begin
|
102
102
|
possible_config_files = [
|
103
103
|
"./config/database.yml",
|
104
|
-
"../database.yml",
|
105
|
-
"./database.yml"
|
104
|
+
"../config/database.yml",
|
105
|
+
"./database.yml",
|
106
|
+
"../database.yml"
|
106
107
|
]
|
107
108
|
database_config_file = nil
|
108
109
|
for file in possible_config_files
|
@@ -560,6 +561,7 @@ module FeedTools
|
|
560
561
|
|
561
562
|
# Escapes all html entities
|
562
563
|
def FeedTools.escape_entities(html)
|
564
|
+
return nil if html.nil?
|
563
565
|
escaped_html = CGI.escapeHTML(html)
|
564
566
|
unescaped_html.gsub!(/'/, "'")
|
565
567
|
unescaped_html.gsub!(/"/, """)
|
@@ -568,6 +570,7 @@ module FeedTools
|
|
568
570
|
|
569
571
|
# Unescapes all html entities
|
570
572
|
def FeedTools.unescape_entities(html)
|
573
|
+
return nil if html.nil?
|
571
574
|
unescaped_html = html
|
572
575
|
unescaped_html.gsub!(/&/, "&")
|
573
576
|
unescaped_html.gsub!(/&/, "&")
|
@@ -579,6 +582,7 @@ module FeedTools
|
|
579
582
|
|
580
583
|
# Removes all html tags from the html formatted text.
|
581
584
|
def FeedTools.strip_html(html)
|
585
|
+
return nil if html.nil?
|
582
586
|
# TODO: do this properly
|
583
587
|
# ======================
|
584
588
|
stripped_html = html.gsub(/<\/?[^>]+>/, "")
|
@@ -587,6 +591,7 @@ module FeedTools
|
|
587
591
|
|
588
592
|
# Tidys up the html
|
589
593
|
def FeedTools.tidy_html(html)
|
594
|
+
return nil if html.nil?
|
590
595
|
if FeedTools.tidy_enabled?
|
591
596
|
is_fragment = true
|
592
597
|
html.gsub!(/<!'/, "&lt;!'")
|
@@ -628,6 +633,7 @@ module FeedTools
|
|
628
633
|
# elements and all children will be removed entirely.
|
629
634
|
# Dangerous or unknown attributes are always removed.
|
630
635
|
def FeedTools.sanitize_html(html, mode=:strip)
|
636
|
+
return nil if html.nil?
|
631
637
|
|
632
638
|
# Lists borrowed from Mark Pilgrim's feedparser
|
633
639
|
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
@@ -688,6 +694,21 @@ module FeedTools
|
|
688
694
|
html = html_doc.root.inner_xml
|
689
695
|
return html
|
690
696
|
end
|
697
|
+
|
698
|
+
# Creates a merged "planet" feed from a set of urls.
|
699
|
+
def FeedTools.build_merged_feed(url_array)
|
700
|
+
return nil if url_array.nil?
|
701
|
+
merged_feed = Feed.new
|
702
|
+
url_array.each do |feed_url|
|
703
|
+
feed = Feed.open(feed_url)
|
704
|
+
merged_feed.entries.concat(
|
705
|
+
feed.entries.collect do |entry|
|
706
|
+
entry.title = "#{feed.title}: #{entry.title}"
|
707
|
+
entry
|
708
|
+
end )
|
709
|
+
end
|
710
|
+
return merged_feed
|
711
|
+
end
|
691
712
|
|
692
713
|
class Feed
|
693
714
|
include REXML # :nodoc:
|
@@ -1089,7 +1110,7 @@ module FeedTools
|
|
1089
1110
|
|
1090
1111
|
# Returns the channel node of the feed.
|
1091
1112
|
def channel_node
|
1092
|
-
if @channel_node.nil?
|
1113
|
+
if @channel_node.nil? && root_node != nil
|
1093
1114
|
@channel_node = XPath.first(root_node, "channel")
|
1094
1115
|
if @channel_node == nil
|
1095
1116
|
@channel_node = XPath.first(root_node, "CHANNEL")
|
@@ -1225,9 +1246,19 @@ module FeedTools
|
|
1225
1246
|
# Returns the feed's unique id
|
1226
1247
|
def id
|
1227
1248
|
if @id.nil?
|
1228
|
-
|
1229
|
-
|
1230
|
-
@id
|
1249
|
+
unless channel_node.nil?
|
1250
|
+
@id = XPath.first(channel_node, "id/text()").to_s
|
1251
|
+
if @id == ""
|
1252
|
+
@id = XPath.first(channel_node, "guid/text()").to_s
|
1253
|
+
end
|
1254
|
+
end
|
1255
|
+
unless root_node.nil?
|
1256
|
+
if @id == "" || @id.nil?
|
1257
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
1258
|
+
end
|
1259
|
+
if @id == ""
|
1260
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
1261
|
+
end
|
1231
1262
|
end
|
1232
1263
|
@id = nil if @id == ""
|
1233
1264
|
end
|
@@ -1257,13 +1288,15 @@ module FeedTools
|
|
1257
1288
|
# Returns the feed title
|
1258
1289
|
def title
|
1259
1290
|
if @title.nil?
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
title_node
|
1264
|
-
|
1265
|
-
|
1266
|
-
title_node
|
1291
|
+
unless channel_node.nil?
|
1292
|
+
repair_entities = false
|
1293
|
+
title_node = XPath.first(channel_node, "title")
|
1294
|
+
if title_node.nil?
|
1295
|
+
title_node = XPath.first(channel_node, "dc:title")
|
1296
|
+
end
|
1297
|
+
if title_node.nil?
|
1298
|
+
title_node = XPath.first(channel_node, "TITLE")
|
1299
|
+
end
|
1267
1300
|
end
|
1268
1301
|
if title_node.nil?
|
1269
1302
|
return nil
|
@@ -1304,41 +1337,43 @@ module FeedTools
|
|
1304
1337
|
# Returns the feed description
|
1305
1338
|
def description
|
1306
1339
|
if @description.nil?
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
description_node
|
1311
|
-
|
1312
|
-
|
1313
|
-
description_node
|
1314
|
-
|
1315
|
-
|
1316
|
-
description_node
|
1317
|
-
|
1318
|
-
|
1319
|
-
description_node
|
1320
|
-
|
1321
|
-
|
1322
|
-
description_node
|
1323
|
-
|
1324
|
-
|
1325
|
-
description_node
|
1326
|
-
|
1327
|
-
|
1328
|
-
description_node
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
description_node
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
description_node
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
description_node
|
1341
|
-
|
1340
|
+
unless channel_node.nil?
|
1341
|
+
repair_entities = false
|
1342
|
+
description_node = XPath.first(channel_node, "description")
|
1343
|
+
if description_node.nil?
|
1344
|
+
description_node = XPath.first(channel_node, "tagline")
|
1345
|
+
end
|
1346
|
+
if description_node.nil?
|
1347
|
+
description_node = XPath.first(channel_node, "subtitle")
|
1348
|
+
end
|
1349
|
+
if description_node.nil?
|
1350
|
+
description_node = XPath.first(channel_node, "summary")
|
1351
|
+
end
|
1352
|
+
if description_node.nil?
|
1353
|
+
description_node = XPath.first(channel_node, "abstract")
|
1354
|
+
end
|
1355
|
+
if description_node.nil?
|
1356
|
+
description_node = XPath.first(channel_node, "ABSTRACT")
|
1357
|
+
end
|
1358
|
+
if description_node.nil?
|
1359
|
+
description_node = XPath.first(channel_node, "info")
|
1360
|
+
end
|
1361
|
+
if description_node.nil?
|
1362
|
+
description_node = XPath.first(channel_node, "content:encoded")
|
1363
|
+
@bozo = true unless description_node.nil?
|
1364
|
+
end
|
1365
|
+
if description_node.nil?
|
1366
|
+
description_node = XPath.first(channel_node, "content")
|
1367
|
+
@bozo = true unless description_node.nil?
|
1368
|
+
end
|
1369
|
+
if description_node.nil?
|
1370
|
+
description_node = XPath.first(channel_node, "xhtml:body")
|
1371
|
+
@bozo = true unless description_node.nil?
|
1372
|
+
end
|
1373
|
+
if description_node.nil?
|
1374
|
+
description_node = XPath.first(channel_node, "body")
|
1375
|
+
@bozo = true unless description_node.nil?
|
1376
|
+
end
|
1342
1377
|
end
|
1343
1378
|
if description_node.nil?
|
1344
1379
|
return nil
|
@@ -1392,8 +1427,16 @@ module FeedTools
|
|
1392
1427
|
# Returns the contents of the itunes:summary element
|
1393
1428
|
def itunes_summary
|
1394
1429
|
if @itunes_summary.nil?
|
1395
|
-
|
1396
|
-
|
1430
|
+
unless channel_node.nil?
|
1431
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
|
1432
|
+
"itunes:summary/text()").to_s)
|
1433
|
+
end
|
1434
|
+
unless root_node.nil?
|
1435
|
+
if @itunes_summary == "" || @itunes_summary.nil?
|
1436
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
1437
|
+
"itunes:summary/text()").to_s)
|
1438
|
+
end
|
1439
|
+
end
|
1397
1440
|
if @itunes_summary == ""
|
1398
1441
|
@itunes_summary = nil
|
1399
1442
|
end
|
@@ -1411,8 +1454,16 @@ module FeedTools
|
|
1411
1454
|
# Returns the contents of the itunes:subtitle element
|
1412
1455
|
def itunes_subtitle
|
1413
1456
|
if @itunes_subtitle.nil?
|
1414
|
-
|
1415
|
-
|
1457
|
+
unless channel_node.nil?
|
1458
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
|
1459
|
+
"itunes:subtitle/text()").to_s)
|
1460
|
+
end
|
1461
|
+
unless root_node.nil?
|
1462
|
+
if @itunes_subtitle == "" || @itunes_subtitle.nil?
|
1463
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
1464
|
+
"itunes:subtitle/text()").to_s)
|
1465
|
+
end
|
1466
|
+
end
|
1416
1467
|
if @itunes_subtitle == ""
|
1417
1468
|
@itunes_subtitle = nil
|
1418
1469
|
end
|
@@ -1431,35 +1482,37 @@ module FeedTools
|
|
1431
1482
|
# Returns the feed link
|
1432
1483
|
def link
|
1433
1484
|
if @link.nil?
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
@link
|
1438
|
-
|
1439
|
-
|
1440
|
-
@link
|
1441
|
-
|
1442
|
-
|
1443
|
-
@link
|
1444
|
-
|
1445
|
-
|
1446
|
-
@link
|
1447
|
-
|
1448
|
-
|
1449
|
-
@link
|
1450
|
-
|
1451
|
-
|
1452
|
-
@link
|
1453
|
-
|
1454
|
-
|
1455
|
-
@link
|
1485
|
+
unless channel_node.nil?
|
1486
|
+
# get the feed link from the xml document
|
1487
|
+
@link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
1488
|
+
if @link == ""
|
1489
|
+
@link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
|
1490
|
+
end
|
1491
|
+
if @link == ""
|
1492
|
+
@link = XPath.first(channel_node, "link/@href").to_s
|
1493
|
+
end
|
1494
|
+
if @link == ""
|
1495
|
+
@link = XPath.first(channel_node, "link/text()").to_s
|
1496
|
+
end
|
1497
|
+
if @link == ""
|
1498
|
+
@link = XPath.first(channel_node, "@href").to_s
|
1499
|
+
end
|
1500
|
+
if @link == ""
|
1501
|
+
@link = XPath.first(channel_node, "@HREF").to_s
|
1502
|
+
end
|
1503
|
+
if @link == ""
|
1504
|
+
@link = XPath.first(channel_node, "a/@href").to_s
|
1505
|
+
end
|
1506
|
+
if @link == ""
|
1507
|
+
@link = XPath.first(channel_node, "A/@HREF").to_s
|
1508
|
+
end
|
1456
1509
|
end
|
1457
|
-
if @link == ""
|
1510
|
+
if @link == "" || @link.nil?
|
1458
1511
|
if FeedTools.is_url? self.guid
|
1459
1512
|
@link = self.guid
|
1460
1513
|
end
|
1461
1514
|
end
|
1462
|
-
if @link == ""
|
1515
|
+
if @link == "" && channel_node != nil
|
1463
1516
|
# Technically, we shouldn't use the base attribute for this, but if the href attribute
|
1464
1517
|
# is missing, it's already a given that we're looking at a messed up CDF file. We can
|
1465
1518
|
# always pray it's correct.
|
@@ -1755,53 +1808,55 @@ module FeedTools
|
|
1755
1808
|
def images
|
1756
1809
|
if @images.nil?
|
1757
1810
|
@images = []
|
1758
|
-
|
1759
|
-
|
1760
|
-
image_nodes
|
1761
|
-
|
1762
|
-
|
1763
|
-
image_nodes
|
1764
|
-
|
1765
|
-
|
1766
|
-
image_nodes
|
1767
|
-
|
1768
|
-
|
1769
|
-
|
1770
|
-
|
1771
|
-
|
1772
|
-
|
1773
|
-
image.url
|
1774
|
-
|
1775
|
-
|
1776
|
-
|
1777
|
-
|
1778
|
-
|
1779
|
-
|
1780
|
-
image.url
|
1781
|
-
|
1782
|
-
|
1783
|
-
|
1784
|
-
|
1785
|
-
|
1786
|
-
|
1787
|
-
|
1788
|
-
|
1789
|
-
|
1790
|
-
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
|
1798
|
-
|
1799
|
-
|
1800
|
-
image.style
|
1811
|
+
unless channel_node.nil?
|
1812
|
+
image_nodes = XPath.match(channel_node, "image")
|
1813
|
+
if image_nodes.nil? || image_nodes.empty?
|
1814
|
+
image_nodes = XPath.match(channel_node, "link")
|
1815
|
+
end
|
1816
|
+
if image_nodes.nil? || image_nodes.empty?
|
1817
|
+
image_nodes = XPath.match(channel_node, "logo")
|
1818
|
+
end
|
1819
|
+
if image_nodes.nil? || image_nodes.empty?
|
1820
|
+
image_nodes = XPath.match(channel_node, "LOGO")
|
1821
|
+
end
|
1822
|
+
unless image_nodes.nil?
|
1823
|
+
for image_node in image_nodes
|
1824
|
+
image = FeedTools::Feed::Image.new
|
1825
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
1826
|
+
if image.url == ""
|
1827
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
1828
|
+
end
|
1829
|
+
if image.url == "" && (image_node.name == "logo" ||
|
1830
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
1831
|
+
image.url = XPath.first(image_node, "@href").to_s
|
1832
|
+
end
|
1833
|
+
if image.url == "" && image_node.name == "LOGO"
|
1834
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
1835
|
+
end
|
1836
|
+
image.url.strip! unless image.url.nil?
|
1837
|
+
image.url = nil if image.url == ""
|
1838
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
1839
|
+
image.title.strip! unless image.title.nil?
|
1840
|
+
image.title = nil if image.title == ""
|
1841
|
+
image.description =
|
1842
|
+
XPath.first(image_node, "description/text()").to_s
|
1843
|
+
image.description.strip! unless image.description.nil?
|
1844
|
+
image.description = nil if image.description == ""
|
1845
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
1846
|
+
image.link.strip! unless image.link.nil?
|
1847
|
+
image.link = nil if image.link == ""
|
1848
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1849
|
+
image.height = nil if image.height <= 0
|
1850
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1851
|
+
image.width = nil if image.width <= 0
|
1852
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1853
|
+
if image.style == ""
|
1854
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1855
|
+
end
|
1856
|
+
image.style.strip! unless image.style.nil?
|
1857
|
+
image.style = nil if image.style == ""
|
1858
|
+
@images << image
|
1801
1859
|
end
|
1802
|
-
image.style.strip! unless image.style.nil?
|
1803
|
-
image.style = nil if image.style == ""
|
1804
|
-
@images << image
|
1805
1860
|
end
|
1806
1861
|
end
|
1807
1862
|
end
|
@@ -1855,86 +1910,88 @@ module FeedTools
|
|
1855
1910
|
# Returns the number of seconds before the feed should expire
|
1856
1911
|
def time_to_live
|
1857
1912
|
if @time_to_live.nil?
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
1913
|
+
unless channel_node.nil?
|
1914
|
+
# get the feed time to live from the xml document
|
1915
|
+
update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
|
1916
|
+
if update_frequency != ""
|
1917
|
+
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1918
|
+
if update_period == "daily"
|
1919
|
+
@time_to_live = update_frequency.to_i.day
|
1920
|
+
elsif update_period == "weekly"
|
1921
|
+
@time_to_live = update_frequency.to_i.week
|
1922
|
+
elsif update_period == "monthly"
|
1923
|
+
@time_to_live = update_frequency.to_i.month
|
1924
|
+
elsif update_period == "yearly"
|
1925
|
+
@time_to_live = update_frequency.to_i.year
|
1926
|
+
else
|
1927
|
+
# hourly
|
1928
|
+
@time_to_live = update_frequency.to_i.hour
|
1929
|
+
end
|
1930
|
+
end
|
1931
|
+
if @time_to_live.nil?
|
1932
|
+
# usually expressed in minutes
|
1933
|
+
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1934
|
+
if update_frequency != ""
|
1935
|
+
update_span = XPath.first(channel_node, "ttl/@span").to_s
|
1936
|
+
if update_span == "seconds"
|
1937
|
+
@time_to_live = update_frequency.to_i
|
1938
|
+
elsif update_span == "minutes"
|
1939
|
+
@time_to_live = update_frequency.to_i.minute
|
1940
|
+
elsif update_span == "hours"
|
1941
|
+
@time_to_live = update_frequency.to_i.hour
|
1942
|
+
elsif update_span == "days"
|
1943
|
+
@time_to_live = update_frequency.to_i.day
|
1944
|
+
elsif update_span == "weeks"
|
1945
|
+
@time_to_live = update_frequency.to_i.week
|
1946
|
+
elsif update_span == "months"
|
1947
|
+
@time_to_live = update_frequency.to_i.month
|
1948
|
+
elsif update_span == "years"
|
1949
|
+
@time_to_live = update_frequency.to_i.year
|
1950
|
+
elsif update_frequency.to_i >= 3000
|
1951
|
+
# Normally, this should default to minutes, but realistically,
|
1952
|
+
# if they meant minutes, you're rarely going to see a value higher
|
1953
|
+
# than 120. If we see >= 3000, we're either dealing with a stupid
|
1954
|
+
# pseudo-spec that decided to use seconds, or we're looking at
|
1955
|
+
# someone who only has weekly updated content. Worst case, we
|
1956
|
+
# misreport the time, and we update too often. Best case, we
|
1957
|
+
# avoid accidentally updating the feed only once a year. In the
|
1958
|
+
# interests of being pragmatic, and since the problem we avoid
|
1959
|
+
# is a far greater one than the one we cause, just run the check
|
1960
|
+
# and hope no one actually gets hurt.
|
1961
|
+
@time_to_live = update_frequency.to_i
|
1962
|
+
else
|
1963
|
+
@time_to_live = update_frequency.to_i.minute
|
1964
|
+
end
|
1965
|
+
end
|
1873
1966
|
end
|
1874
|
-
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1891
|
-
|
1892
|
-
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
# pseudo-spec that decided to use seconds, or we're looking at
|
1900
|
-
# someone who only has weekly updated content. Worst case, we
|
1901
|
-
# misreport the time, and we update too often. Best case, we
|
1902
|
-
# avoid accidentally updating the feed only once a year. In the
|
1903
|
-
# interests of being pragmatic, and since the problem we avoid
|
1904
|
-
# is a far greater one than the one we cause, just run the check
|
1905
|
-
# and hope no one actually gets hurt.
|
1906
|
-
@time_to_live = update_frequency.to_i
|
1907
|
-
else
|
1908
|
-
@time_to_live = update_frequency.to_i.minute
|
1967
|
+
if @time_to_live.nil?
|
1968
|
+
@time_to_live = 0
|
1969
|
+
update_frequency_days =
|
1970
|
+
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1971
|
+
update_frequency_hours =
|
1972
|
+
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1973
|
+
update_frequency_minutes =
|
1974
|
+
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1975
|
+
update_frequency_seconds =
|
1976
|
+
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1977
|
+
if update_frequency_days != ""
|
1978
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1979
|
+
end
|
1980
|
+
if update_frequency_hours != ""
|
1981
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1982
|
+
end
|
1983
|
+
if update_frequency_minutes != ""
|
1984
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1985
|
+
end
|
1986
|
+
if update_frequency_seconds != ""
|
1987
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1988
|
+
end
|
1989
|
+
if @time_to_live == 0
|
1990
|
+
@time_to_live = 1.hour
|
1991
|
+
end
|
1909
1992
|
end
|
1910
1993
|
end
|
1911
1994
|
end
|
1912
|
-
if @time_to_live.nil?
|
1913
|
-
@time_to_live = 0
|
1914
|
-
update_frequency_days =
|
1915
|
-
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1916
|
-
update_frequency_hours =
|
1917
|
-
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1918
|
-
update_frequency_minutes =
|
1919
|
-
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1920
|
-
update_frequency_seconds =
|
1921
|
-
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1922
|
-
if update_frequency_days != ""
|
1923
|
-
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1924
|
-
end
|
1925
|
-
if update_frequency_hours != ""
|
1926
|
-
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1927
|
-
end
|
1928
|
-
if update_frequency_minutes != ""
|
1929
|
-
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1930
|
-
end
|
1931
|
-
if update_frequency_seconds != ""
|
1932
|
-
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1933
|
-
end
|
1934
|
-
if @time_to_live == 0
|
1935
|
-
@time_to_live = 1.hour
|
1936
|
-
end
|
1937
|
-
end
|
1938
1995
|
if @time_to_live.nil? || @time_to_live == 0
|
1939
1996
|
# Default to one hour
|
1940
1997
|
@time_to_live = 1.hour
|
@@ -2009,17 +2066,19 @@ module FeedTools
|
|
2009
2066
|
# Returns the feed language
|
2010
2067
|
def language
|
2011
2068
|
if @language.nil?
|
2012
|
-
|
2013
|
-
|
2014
|
-
@language
|
2015
|
-
|
2016
|
-
|
2017
|
-
@language
|
2018
|
-
|
2019
|
-
|
2020
|
-
@language
|
2069
|
+
unless channel_node.nil?
|
2070
|
+
@language = XPath.first(channel_node, "language/text()").to_s
|
2071
|
+
if @language == ""
|
2072
|
+
@language = XPath.first(channel_node, "dc:language/text()").to_s
|
2073
|
+
end
|
2074
|
+
if @language == ""
|
2075
|
+
@language = XPath.first(channel_node, "xml:lang/text()").to_s
|
2076
|
+
end
|
2077
|
+
if @language == ""
|
2078
|
+
@language = XPath.first(root_node, "xml:lang/text()").to_s
|
2079
|
+
end
|
2021
2080
|
end
|
2022
|
-
if @language == ""
|
2081
|
+
if @language == "" || @language.nil?
|
2023
2082
|
@language = "en-us"
|
2024
2083
|
end
|
2025
2084
|
@language = @language.downcase
|
@@ -2058,23 +2117,25 @@ module FeedTools
|
|
2058
2117
|
# Returns the feed items
|
2059
2118
|
def items
|
2060
2119
|
if @items.nil?
|
2061
|
-
|
2062
|
-
|
2063
|
-
raw_items
|
2064
|
-
|
2065
|
-
|
2066
|
-
raw_items
|
2067
|
-
|
2068
|
-
|
2069
|
-
raw_items
|
2070
|
-
|
2071
|
-
|
2072
|
-
raw_items
|
2073
|
-
|
2074
|
-
|
2075
|
-
raw_items
|
2120
|
+
unless root_node.nil?
|
2121
|
+
raw_items = XPath.match(root_node, "item")
|
2122
|
+
if raw_items == nil || raw_items == []
|
2123
|
+
raw_items = XPath.match(channel_node, "item")
|
2124
|
+
end
|
2125
|
+
if raw_items == nil || raw_items == []
|
2126
|
+
raw_items = XPath.match(channel_node, "ITEM")
|
2127
|
+
end
|
2128
|
+
if raw_items == nil || raw_items == []
|
2129
|
+
raw_items = XPath.match(root_node, "ITEM")
|
2130
|
+
end
|
2131
|
+
if raw_items == nil || raw_items == []
|
2132
|
+
raw_items = XPath.match(channel_node, "entry")
|
2133
|
+
end
|
2134
|
+
if raw_items == nil || raw_items == []
|
2135
|
+
raw_items = XPath.match(root_node, "entry")
|
2136
|
+
end
|
2076
2137
|
end
|
2077
|
-
|
2138
|
+
|
2078
2139
|
# create the individual feed items
|
2079
2140
|
@items = []
|
2080
2141
|
if raw_items != nil
|
@@ -2174,7 +2235,11 @@ module FeedTools
|
|
2174
2235
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
2175
2236
|
# RDF-based rss format
|
2176
2237
|
return xml_builder.tag!("rdf:RDF") do
|
2177
|
-
|
2238
|
+
channel_attributes = {}
|
2239
|
+
unless self.link.nil?
|
2240
|
+
channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
|
2241
|
+
end
|
2242
|
+
xml_builder.channel(channel_attributes) do
|
2178
2243
|
unless title.nil? || title == ""
|
2179
2244
|
xml_builder.title(title)
|
2180
2245
|
else
|
@@ -2534,9 +2599,11 @@ module FeedTools
|
|
2534
2599
|
# Returns the feed items's unique id
|
2535
2600
|
def id
|
2536
2601
|
if @id.nil?
|
2537
|
-
|
2538
|
-
|
2539
|
-
@id
|
2602
|
+
unless root_node.nil?
|
2603
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
2604
|
+
if @id == ""
|
2605
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
2606
|
+
end
|
2540
2607
|
end
|
2541
2608
|
@id = nil if @id == ""
|
2542
2609
|
end
|
@@ -2551,13 +2618,15 @@ module FeedTools
|
|
2551
2618
|
# Returns the feed item title
|
2552
2619
|
def title
|
2553
2620
|
if @title.nil?
|
2554
|
-
|
2555
|
-
|
2556
|
-
|
2557
|
-
title_node
|
2558
|
-
|
2559
|
-
|
2560
|
-
title_node
|
2621
|
+
unless root_node.nil?
|
2622
|
+
repair_entities = false
|
2623
|
+
title_node = XPath.first(root_node, "title")
|
2624
|
+
if title_node.nil?
|
2625
|
+
title_node = XPath.first(root_node, "dc:title")
|
2626
|
+
end
|
2627
|
+
if title_node.nil?
|
2628
|
+
title_node = XPath.first(root_node, "TITLE")
|
2629
|
+
end
|
2561
2630
|
end
|
2562
2631
|
if title_node.nil?
|
2563
2632
|
return nil
|
@@ -2606,41 +2675,43 @@ module FeedTools
|
|
2606
2675
|
# Returns the feed item description
|
2607
2676
|
def description
|
2608
2677
|
if @description.nil?
|
2609
|
-
|
2610
|
-
|
2611
|
-
|
2612
|
-
description_node
|
2613
|
-
|
2614
|
-
|
2615
|
-
description_node
|
2616
|
-
|
2617
|
-
|
2618
|
-
description_node
|
2619
|
-
|
2620
|
-
|
2621
|
-
description_node
|
2622
|
-
|
2623
|
-
|
2624
|
-
description_node
|
2625
|
-
|
2626
|
-
|
2627
|
-
description_node
|
2628
|
-
|
2629
|
-
|
2630
|
-
description_node
|
2631
|
-
|
2632
|
-
|
2633
|
-
description_node
|
2634
|
-
|
2635
|
-
|
2636
|
-
description_node
|
2637
|
-
|
2638
|
-
|
2639
|
-
description_node
|
2640
|
-
|
2641
|
-
|
2642
|
-
description_node
|
2643
|
-
|
2678
|
+
unless root_node.nil?
|
2679
|
+
repair_entities = false
|
2680
|
+
description_node = XPath.first(root_node, "description")
|
2681
|
+
if description_node.nil?
|
2682
|
+
description_node = XPath.first(root_node, "xhtml:body")
|
2683
|
+
end
|
2684
|
+
if description_node.nil?
|
2685
|
+
description_node = XPath.first(root_node, "body")
|
2686
|
+
end
|
2687
|
+
if description_node.nil?
|
2688
|
+
description_node = XPath.first(root_node, "tagline")
|
2689
|
+
end
|
2690
|
+
if description_node.nil?
|
2691
|
+
description_node = XPath.first(root_node, "subtitle")
|
2692
|
+
end
|
2693
|
+
if description_node.nil?
|
2694
|
+
description_node = XPath.first(root_node, "summary")
|
2695
|
+
end
|
2696
|
+
if description_node.nil?
|
2697
|
+
description_node = XPath.first(root_node, "abstract")
|
2698
|
+
end
|
2699
|
+
if description_node.nil?
|
2700
|
+
description_node = XPath.first(root_node, "ABSTRACT")
|
2701
|
+
end
|
2702
|
+
if description_node.nil?
|
2703
|
+
description_node = XPath.first(root_node, "content:encoded")
|
2704
|
+
end
|
2705
|
+
if description_node.nil?
|
2706
|
+
description_node = XPath.first(root_node, "content")
|
2707
|
+
end
|
2708
|
+
if description_node.nil?
|
2709
|
+
description_node = XPath.first(root_node, "fullitem")
|
2710
|
+
end
|
2711
|
+
if description_node.nil?
|
2712
|
+
description_node = XPath.first(root_node, "info")
|
2713
|
+
@bozo = true unless description_node.nil?
|
2714
|
+
end
|
2644
2715
|
end
|
2645
2716
|
if description_node.nil?
|
2646
2717
|
return nil
|
@@ -2754,32 +2825,34 @@ module FeedTools
|
|
2754
2825
|
# Returns the feed item link
|
2755
2826
|
def link
|
2756
2827
|
if @link.nil?
|
2757
|
-
|
2758
|
-
|
2759
|
-
@link
|
2760
|
-
|
2761
|
-
|
2762
|
-
@link
|
2763
|
-
|
2764
|
-
|
2765
|
-
@link
|
2766
|
-
|
2767
|
-
|
2768
|
-
@link
|
2769
|
-
|
2770
|
-
|
2771
|
-
@link
|
2772
|
-
|
2773
|
-
|
2774
|
-
@link
|
2775
|
-
|
2776
|
-
|
2777
|
-
@link
|
2778
|
-
|
2779
|
-
|
2780
|
-
@link
|
2828
|
+
unless root_node.nil?
|
2829
|
+
@link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
|
2830
|
+
if @link == ""
|
2831
|
+
@link = XPath.first(root_node, "link/@href").to_s
|
2832
|
+
end
|
2833
|
+
if @link == ""
|
2834
|
+
@link = XPath.first(root_node, "link/text()").to_s
|
2835
|
+
end
|
2836
|
+
if @link == ""
|
2837
|
+
@link = XPath.first(root_node, "@rdf:about").to_s
|
2838
|
+
end
|
2839
|
+
if @link == ""
|
2840
|
+
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
|
2841
|
+
end
|
2842
|
+
if @link == ""
|
2843
|
+
@link = XPath.first(root_node, "@href").to_s
|
2844
|
+
end
|
2845
|
+
if @link == ""
|
2846
|
+
@link = XPath.first(root_node, "a/@href").to_s
|
2847
|
+
end
|
2848
|
+
if @link == ""
|
2849
|
+
@link = XPath.first(root_node, "@HREF").to_s
|
2850
|
+
end
|
2851
|
+
if @link == ""
|
2852
|
+
@link = XPath.first(root_node, "A/@HREF").to_s
|
2853
|
+
end
|
2781
2854
|
end
|
2782
|
-
if @link == ""
|
2855
|
+
if @link == "" || @link.nil?
|
2783
2856
|
if FeedTools.is_url? self.guid
|
2784
2857
|
@link = self.guid
|
2785
2858
|
end
|
data/rakefile
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.2.6
|
7
|
+
date: 2005-09-01 00:00:00 -04:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|