feedtools 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/lib/feed_tools.rb +369 -296
- data/rakefile +1 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.6"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -101,8 +101,9 @@ module FeedTools
|
|
101
101
|
begin
|
102
102
|
possible_config_files = [
|
103
103
|
"./config/database.yml",
|
104
|
-
"../database.yml",
|
105
|
-
"./database.yml"
|
104
|
+
"../config/database.yml",
|
105
|
+
"./database.yml",
|
106
|
+
"../database.yml"
|
106
107
|
]
|
107
108
|
database_config_file = nil
|
108
109
|
for file in possible_config_files
|
@@ -560,6 +561,7 @@ module FeedTools
|
|
560
561
|
|
561
562
|
# Escapes all html entities
|
562
563
|
def FeedTools.escape_entities(html)
|
564
|
+
return nil if html.nil?
|
563
565
|
escaped_html = CGI.escapeHTML(html)
|
564
566
|
unescaped_html.gsub!(/'/, "'")
|
565
567
|
unescaped_html.gsub!(/"/, """)
|
@@ -568,6 +570,7 @@ module FeedTools
|
|
568
570
|
|
569
571
|
# Unescapes all html entities
|
570
572
|
def FeedTools.unescape_entities(html)
|
573
|
+
return nil if html.nil?
|
571
574
|
unescaped_html = html
|
572
575
|
unescaped_html.gsub!(/&/, "&")
|
573
576
|
unescaped_html.gsub!(/&/, "&")
|
@@ -579,6 +582,7 @@ module FeedTools
|
|
579
582
|
|
580
583
|
# Removes all html tags from the html formatted text.
|
581
584
|
def FeedTools.strip_html(html)
|
585
|
+
return nil if html.nil?
|
582
586
|
# TODO: do this properly
|
583
587
|
# ======================
|
584
588
|
stripped_html = html.gsub(/<\/?[^>]+>/, "")
|
@@ -587,6 +591,7 @@ module FeedTools
|
|
587
591
|
|
588
592
|
# Tidys up the html
|
589
593
|
def FeedTools.tidy_html(html)
|
594
|
+
return nil if html.nil?
|
590
595
|
if FeedTools.tidy_enabled?
|
591
596
|
is_fragment = true
|
592
597
|
html.gsub!(/<!'/, "&lt;!'")
|
@@ -628,6 +633,7 @@ module FeedTools
|
|
628
633
|
# elements and all children will be removed entirely.
|
629
634
|
# Dangerous or unknown attributes are always removed.
|
630
635
|
def FeedTools.sanitize_html(html, mode=:strip)
|
636
|
+
return nil if html.nil?
|
631
637
|
|
632
638
|
# Lists borrowed from Mark Pilgrim's feedparser
|
633
639
|
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
@@ -688,6 +694,21 @@ module FeedTools
|
|
688
694
|
html = html_doc.root.inner_xml
|
689
695
|
return html
|
690
696
|
end
|
697
|
+
|
698
|
+
# Creates a merged "planet" feed from a set of urls.
|
699
|
+
def FeedTools.build_merged_feed(url_array)
|
700
|
+
return nil if url_array.nil?
|
701
|
+
merged_feed = Feed.new
|
702
|
+
url_array.each do |feed_url|
|
703
|
+
feed = Feed.open(feed_url)
|
704
|
+
merged_feed.entries.concat(
|
705
|
+
feed.entries.collect do |entry|
|
706
|
+
entry.title = "#{feed.title}: #{entry.title}"
|
707
|
+
entry
|
708
|
+
end )
|
709
|
+
end
|
710
|
+
return merged_feed
|
711
|
+
end
|
691
712
|
|
692
713
|
class Feed
|
693
714
|
include REXML # :nodoc:
|
@@ -1089,7 +1110,7 @@ module FeedTools
|
|
1089
1110
|
|
1090
1111
|
# Returns the channel node of the feed.
|
1091
1112
|
def channel_node
|
1092
|
-
if @channel_node.nil?
|
1113
|
+
if @channel_node.nil? && root_node != nil
|
1093
1114
|
@channel_node = XPath.first(root_node, "channel")
|
1094
1115
|
if @channel_node == nil
|
1095
1116
|
@channel_node = XPath.first(root_node, "CHANNEL")
|
@@ -1225,9 +1246,19 @@ module FeedTools
|
|
1225
1246
|
# Returns the feed's unique id
|
1226
1247
|
def id
|
1227
1248
|
if @id.nil?
|
1228
|
-
|
1229
|
-
|
1230
|
-
@id
|
1249
|
+
unless channel_node.nil?
|
1250
|
+
@id = XPath.first(channel_node, "id/text()").to_s
|
1251
|
+
if @id == ""
|
1252
|
+
@id = XPath.first(channel_node, "guid/text()").to_s
|
1253
|
+
end
|
1254
|
+
end
|
1255
|
+
unless root_node.nil?
|
1256
|
+
if @id == "" || @id.nil?
|
1257
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
1258
|
+
end
|
1259
|
+
if @id == ""
|
1260
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
1261
|
+
end
|
1231
1262
|
end
|
1232
1263
|
@id = nil if @id == ""
|
1233
1264
|
end
|
@@ -1257,13 +1288,15 @@ module FeedTools
|
|
1257
1288
|
# Returns the feed title
|
1258
1289
|
def title
|
1259
1290
|
if @title.nil?
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
title_node
|
1264
|
-
|
1265
|
-
|
1266
|
-
title_node
|
1291
|
+
unless channel_node.nil?
|
1292
|
+
repair_entities = false
|
1293
|
+
title_node = XPath.first(channel_node, "title")
|
1294
|
+
if title_node.nil?
|
1295
|
+
title_node = XPath.first(channel_node, "dc:title")
|
1296
|
+
end
|
1297
|
+
if title_node.nil?
|
1298
|
+
title_node = XPath.first(channel_node, "TITLE")
|
1299
|
+
end
|
1267
1300
|
end
|
1268
1301
|
if title_node.nil?
|
1269
1302
|
return nil
|
@@ -1304,41 +1337,43 @@ module FeedTools
|
|
1304
1337
|
# Returns the feed description
|
1305
1338
|
def description
|
1306
1339
|
if @description.nil?
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
description_node
|
1311
|
-
|
1312
|
-
|
1313
|
-
description_node
|
1314
|
-
|
1315
|
-
|
1316
|
-
description_node
|
1317
|
-
|
1318
|
-
|
1319
|
-
description_node
|
1320
|
-
|
1321
|
-
|
1322
|
-
description_node
|
1323
|
-
|
1324
|
-
|
1325
|
-
description_node
|
1326
|
-
|
1327
|
-
|
1328
|
-
description_node
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
description_node
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
description_node
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
description_node
|
1341
|
-
|
1340
|
+
unless channel_node.nil?
|
1341
|
+
repair_entities = false
|
1342
|
+
description_node = XPath.first(channel_node, "description")
|
1343
|
+
if description_node.nil?
|
1344
|
+
description_node = XPath.first(channel_node, "tagline")
|
1345
|
+
end
|
1346
|
+
if description_node.nil?
|
1347
|
+
description_node = XPath.first(channel_node, "subtitle")
|
1348
|
+
end
|
1349
|
+
if description_node.nil?
|
1350
|
+
description_node = XPath.first(channel_node, "summary")
|
1351
|
+
end
|
1352
|
+
if description_node.nil?
|
1353
|
+
description_node = XPath.first(channel_node, "abstract")
|
1354
|
+
end
|
1355
|
+
if description_node.nil?
|
1356
|
+
description_node = XPath.first(channel_node, "ABSTRACT")
|
1357
|
+
end
|
1358
|
+
if description_node.nil?
|
1359
|
+
description_node = XPath.first(channel_node, "info")
|
1360
|
+
end
|
1361
|
+
if description_node.nil?
|
1362
|
+
description_node = XPath.first(channel_node, "content:encoded")
|
1363
|
+
@bozo = true unless description_node.nil?
|
1364
|
+
end
|
1365
|
+
if description_node.nil?
|
1366
|
+
description_node = XPath.first(channel_node, "content")
|
1367
|
+
@bozo = true unless description_node.nil?
|
1368
|
+
end
|
1369
|
+
if description_node.nil?
|
1370
|
+
description_node = XPath.first(channel_node, "xhtml:body")
|
1371
|
+
@bozo = true unless description_node.nil?
|
1372
|
+
end
|
1373
|
+
if description_node.nil?
|
1374
|
+
description_node = XPath.first(channel_node, "body")
|
1375
|
+
@bozo = true unless description_node.nil?
|
1376
|
+
end
|
1342
1377
|
end
|
1343
1378
|
if description_node.nil?
|
1344
1379
|
return nil
|
@@ -1392,8 +1427,16 @@ module FeedTools
|
|
1392
1427
|
# Returns the contents of the itunes:summary element
|
1393
1428
|
def itunes_summary
|
1394
1429
|
if @itunes_summary.nil?
|
1395
|
-
|
1396
|
-
|
1430
|
+
unless channel_node.nil?
|
1431
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
|
1432
|
+
"itunes:summary/text()").to_s)
|
1433
|
+
end
|
1434
|
+
unless root_node.nil?
|
1435
|
+
if @itunes_summary == "" || @itunes_summary.nil?
|
1436
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
1437
|
+
"itunes:summary/text()").to_s)
|
1438
|
+
end
|
1439
|
+
end
|
1397
1440
|
if @itunes_summary == ""
|
1398
1441
|
@itunes_summary = nil
|
1399
1442
|
end
|
@@ -1411,8 +1454,16 @@ module FeedTools
|
|
1411
1454
|
# Returns the contents of the itunes:subtitle element
|
1412
1455
|
def itunes_subtitle
|
1413
1456
|
if @itunes_subtitle.nil?
|
1414
|
-
|
1415
|
-
|
1457
|
+
unless channel_node.nil?
|
1458
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
|
1459
|
+
"itunes:subtitle/text()").to_s)
|
1460
|
+
end
|
1461
|
+
unless root_node.nil?
|
1462
|
+
if @itunes_subtitle == "" || @itunes_subtitle.nil?
|
1463
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
1464
|
+
"itunes:subtitle/text()").to_s)
|
1465
|
+
end
|
1466
|
+
end
|
1416
1467
|
if @itunes_subtitle == ""
|
1417
1468
|
@itunes_subtitle = nil
|
1418
1469
|
end
|
@@ -1431,35 +1482,37 @@ module FeedTools
|
|
1431
1482
|
# Returns the feed link
|
1432
1483
|
def link
|
1433
1484
|
if @link.nil?
|
1434
|
-
|
1435
|
-
|
1436
|
-
|
1437
|
-
@link
|
1438
|
-
|
1439
|
-
|
1440
|
-
@link
|
1441
|
-
|
1442
|
-
|
1443
|
-
@link
|
1444
|
-
|
1445
|
-
|
1446
|
-
@link
|
1447
|
-
|
1448
|
-
|
1449
|
-
@link
|
1450
|
-
|
1451
|
-
|
1452
|
-
@link
|
1453
|
-
|
1454
|
-
|
1455
|
-
@link
|
1485
|
+
unless channel_node.nil?
|
1486
|
+
# get the feed link from the xml document
|
1487
|
+
@link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
1488
|
+
if @link == ""
|
1489
|
+
@link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
|
1490
|
+
end
|
1491
|
+
if @link == ""
|
1492
|
+
@link = XPath.first(channel_node, "link/@href").to_s
|
1493
|
+
end
|
1494
|
+
if @link == ""
|
1495
|
+
@link = XPath.first(channel_node, "link/text()").to_s
|
1496
|
+
end
|
1497
|
+
if @link == ""
|
1498
|
+
@link = XPath.first(channel_node, "@href").to_s
|
1499
|
+
end
|
1500
|
+
if @link == ""
|
1501
|
+
@link = XPath.first(channel_node, "@HREF").to_s
|
1502
|
+
end
|
1503
|
+
if @link == ""
|
1504
|
+
@link = XPath.first(channel_node, "a/@href").to_s
|
1505
|
+
end
|
1506
|
+
if @link == ""
|
1507
|
+
@link = XPath.first(channel_node, "A/@HREF").to_s
|
1508
|
+
end
|
1456
1509
|
end
|
1457
|
-
if @link == ""
|
1510
|
+
if @link == "" || @link.nil?
|
1458
1511
|
if FeedTools.is_url? self.guid
|
1459
1512
|
@link = self.guid
|
1460
1513
|
end
|
1461
1514
|
end
|
1462
|
-
if @link == ""
|
1515
|
+
if @link == "" && channel_node != nil
|
1463
1516
|
# Technically, we shouldn't use the base attribute for this, but if the href attribute
|
1464
1517
|
# is missing, it's already a given that we're looking at a messed up CDF file. We can
|
1465
1518
|
# always pray it's correct.
|
@@ -1755,53 +1808,55 @@ module FeedTools
|
|
1755
1808
|
def images
|
1756
1809
|
if @images.nil?
|
1757
1810
|
@images = []
|
1758
|
-
|
1759
|
-
|
1760
|
-
image_nodes
|
1761
|
-
|
1762
|
-
|
1763
|
-
image_nodes
|
1764
|
-
|
1765
|
-
|
1766
|
-
image_nodes
|
1767
|
-
|
1768
|
-
|
1769
|
-
|
1770
|
-
|
1771
|
-
|
1772
|
-
|
1773
|
-
image.url
|
1774
|
-
|
1775
|
-
|
1776
|
-
|
1777
|
-
|
1778
|
-
|
1779
|
-
|
1780
|
-
image.url
|
1781
|
-
|
1782
|
-
|
1783
|
-
|
1784
|
-
|
1785
|
-
|
1786
|
-
|
1787
|
-
|
1788
|
-
|
1789
|
-
|
1790
|
-
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
|
1798
|
-
|
1799
|
-
|
1800
|
-
image.style
|
1811
|
+
unless channel_node.nil?
|
1812
|
+
image_nodes = XPath.match(channel_node, "image")
|
1813
|
+
if image_nodes.nil? || image_nodes.empty?
|
1814
|
+
image_nodes = XPath.match(channel_node, "link")
|
1815
|
+
end
|
1816
|
+
if image_nodes.nil? || image_nodes.empty?
|
1817
|
+
image_nodes = XPath.match(channel_node, "logo")
|
1818
|
+
end
|
1819
|
+
if image_nodes.nil? || image_nodes.empty?
|
1820
|
+
image_nodes = XPath.match(channel_node, "LOGO")
|
1821
|
+
end
|
1822
|
+
unless image_nodes.nil?
|
1823
|
+
for image_node in image_nodes
|
1824
|
+
image = FeedTools::Feed::Image.new
|
1825
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
1826
|
+
if image.url == ""
|
1827
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
1828
|
+
end
|
1829
|
+
if image.url == "" && (image_node.name == "logo" ||
|
1830
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
1831
|
+
image.url = XPath.first(image_node, "@href").to_s
|
1832
|
+
end
|
1833
|
+
if image.url == "" && image_node.name == "LOGO"
|
1834
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
1835
|
+
end
|
1836
|
+
image.url.strip! unless image.url.nil?
|
1837
|
+
image.url = nil if image.url == ""
|
1838
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
1839
|
+
image.title.strip! unless image.title.nil?
|
1840
|
+
image.title = nil if image.title == ""
|
1841
|
+
image.description =
|
1842
|
+
XPath.first(image_node, "description/text()").to_s
|
1843
|
+
image.description.strip! unless image.description.nil?
|
1844
|
+
image.description = nil if image.description == ""
|
1845
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
1846
|
+
image.link.strip! unless image.link.nil?
|
1847
|
+
image.link = nil if image.link == ""
|
1848
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1849
|
+
image.height = nil if image.height <= 0
|
1850
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1851
|
+
image.width = nil if image.width <= 0
|
1852
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1853
|
+
if image.style == ""
|
1854
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1855
|
+
end
|
1856
|
+
image.style.strip! unless image.style.nil?
|
1857
|
+
image.style = nil if image.style == ""
|
1858
|
+
@images << image
|
1801
1859
|
end
|
1802
|
-
image.style.strip! unless image.style.nil?
|
1803
|
-
image.style = nil if image.style == ""
|
1804
|
-
@images << image
|
1805
1860
|
end
|
1806
1861
|
end
|
1807
1862
|
end
|
@@ -1855,86 +1910,88 @@ module FeedTools
|
|
1855
1910
|
# Returns the number of seconds before the feed should expire
|
1856
1911
|
def time_to_live
|
1857
1912
|
if @time_to_live.nil?
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
1913
|
+
unless channel_node.nil?
|
1914
|
+
# get the feed time to live from the xml document
|
1915
|
+
update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
|
1916
|
+
if update_frequency != ""
|
1917
|
+
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1918
|
+
if update_period == "daily"
|
1919
|
+
@time_to_live = update_frequency.to_i.day
|
1920
|
+
elsif update_period == "weekly"
|
1921
|
+
@time_to_live = update_frequency.to_i.week
|
1922
|
+
elsif update_period == "monthly"
|
1923
|
+
@time_to_live = update_frequency.to_i.month
|
1924
|
+
elsif update_period == "yearly"
|
1925
|
+
@time_to_live = update_frequency.to_i.year
|
1926
|
+
else
|
1927
|
+
# hourly
|
1928
|
+
@time_to_live = update_frequency.to_i.hour
|
1929
|
+
end
|
1930
|
+
end
|
1931
|
+
if @time_to_live.nil?
|
1932
|
+
# usually expressed in minutes
|
1933
|
+
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1934
|
+
if update_frequency != ""
|
1935
|
+
update_span = XPath.first(channel_node, "ttl/@span").to_s
|
1936
|
+
if update_span == "seconds"
|
1937
|
+
@time_to_live = update_frequency.to_i
|
1938
|
+
elsif update_span == "minutes"
|
1939
|
+
@time_to_live = update_frequency.to_i.minute
|
1940
|
+
elsif update_span == "hours"
|
1941
|
+
@time_to_live = update_frequency.to_i.hour
|
1942
|
+
elsif update_span == "days"
|
1943
|
+
@time_to_live = update_frequency.to_i.day
|
1944
|
+
elsif update_span == "weeks"
|
1945
|
+
@time_to_live = update_frequency.to_i.week
|
1946
|
+
elsif update_span == "months"
|
1947
|
+
@time_to_live = update_frequency.to_i.month
|
1948
|
+
elsif update_span == "years"
|
1949
|
+
@time_to_live = update_frequency.to_i.year
|
1950
|
+
elsif update_frequency.to_i >= 3000
|
1951
|
+
# Normally, this should default to minutes, but realistically,
|
1952
|
+
# if they meant minutes, you're rarely going to see a value higher
|
1953
|
+
# than 120. If we see >= 3000, we're either dealing with a stupid
|
1954
|
+
# pseudo-spec that decided to use seconds, or we're looking at
|
1955
|
+
# someone who only has weekly updated content. Worst case, we
|
1956
|
+
# misreport the time, and we update too often. Best case, we
|
1957
|
+
# avoid accidentally updating the feed only once a year. In the
|
1958
|
+
# interests of being pragmatic, and since the problem we avoid
|
1959
|
+
# is a far greater one than the one we cause, just run the check
|
1960
|
+
# and hope no one actually gets hurt.
|
1961
|
+
@time_to_live = update_frequency.to_i
|
1962
|
+
else
|
1963
|
+
@time_to_live = update_frequency.to_i.minute
|
1964
|
+
end
|
1965
|
+
end
|
1873
1966
|
end
|
1874
|
-
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1891
|
-
|
1892
|
-
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
# pseudo-spec that decided to use seconds, or we're looking at
|
1900
|
-
# someone who only has weekly updated content. Worst case, we
|
1901
|
-
# misreport the time, and we update too often. Best case, we
|
1902
|
-
# avoid accidentally updating the feed only once a year. In the
|
1903
|
-
# interests of being pragmatic, and since the problem we avoid
|
1904
|
-
# is a far greater one than the one we cause, just run the check
|
1905
|
-
# and hope no one actually gets hurt.
|
1906
|
-
@time_to_live = update_frequency.to_i
|
1907
|
-
else
|
1908
|
-
@time_to_live = update_frequency.to_i.minute
|
1967
|
+
if @time_to_live.nil?
|
1968
|
+
@time_to_live = 0
|
1969
|
+
update_frequency_days =
|
1970
|
+
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1971
|
+
update_frequency_hours =
|
1972
|
+
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1973
|
+
update_frequency_minutes =
|
1974
|
+
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1975
|
+
update_frequency_seconds =
|
1976
|
+
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1977
|
+
if update_frequency_days != ""
|
1978
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1979
|
+
end
|
1980
|
+
if update_frequency_hours != ""
|
1981
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1982
|
+
end
|
1983
|
+
if update_frequency_minutes != ""
|
1984
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1985
|
+
end
|
1986
|
+
if update_frequency_seconds != ""
|
1987
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1988
|
+
end
|
1989
|
+
if @time_to_live == 0
|
1990
|
+
@time_to_live = 1.hour
|
1991
|
+
end
|
1909
1992
|
end
|
1910
1993
|
end
|
1911
1994
|
end
|
1912
|
-
if @time_to_live.nil?
|
1913
|
-
@time_to_live = 0
|
1914
|
-
update_frequency_days =
|
1915
|
-
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1916
|
-
update_frequency_hours =
|
1917
|
-
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1918
|
-
update_frequency_minutes =
|
1919
|
-
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1920
|
-
update_frequency_seconds =
|
1921
|
-
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1922
|
-
if update_frequency_days != ""
|
1923
|
-
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1924
|
-
end
|
1925
|
-
if update_frequency_hours != ""
|
1926
|
-
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1927
|
-
end
|
1928
|
-
if update_frequency_minutes != ""
|
1929
|
-
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1930
|
-
end
|
1931
|
-
if update_frequency_seconds != ""
|
1932
|
-
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1933
|
-
end
|
1934
|
-
if @time_to_live == 0
|
1935
|
-
@time_to_live = 1.hour
|
1936
|
-
end
|
1937
|
-
end
|
1938
1995
|
if @time_to_live.nil? || @time_to_live == 0
|
1939
1996
|
# Default to one hour
|
1940
1997
|
@time_to_live = 1.hour
|
@@ -2009,17 +2066,19 @@ module FeedTools
|
|
2009
2066
|
# Returns the feed language
|
2010
2067
|
def language
|
2011
2068
|
if @language.nil?
|
2012
|
-
|
2013
|
-
|
2014
|
-
@language
|
2015
|
-
|
2016
|
-
|
2017
|
-
@language
|
2018
|
-
|
2019
|
-
|
2020
|
-
@language
|
2069
|
+
unless channel_node.nil?
|
2070
|
+
@language = XPath.first(channel_node, "language/text()").to_s
|
2071
|
+
if @language == ""
|
2072
|
+
@language = XPath.first(channel_node, "dc:language/text()").to_s
|
2073
|
+
end
|
2074
|
+
if @language == ""
|
2075
|
+
@language = XPath.first(channel_node, "xml:lang/text()").to_s
|
2076
|
+
end
|
2077
|
+
if @language == ""
|
2078
|
+
@language = XPath.first(root_node, "xml:lang/text()").to_s
|
2079
|
+
end
|
2021
2080
|
end
|
2022
|
-
if @language == ""
|
2081
|
+
if @language == "" || @language.nil?
|
2023
2082
|
@language = "en-us"
|
2024
2083
|
end
|
2025
2084
|
@language = @language.downcase
|
@@ -2058,23 +2117,25 @@ module FeedTools
|
|
2058
2117
|
# Returns the feed items
|
2059
2118
|
def items
|
2060
2119
|
if @items.nil?
|
2061
|
-
|
2062
|
-
|
2063
|
-
raw_items
|
2064
|
-
|
2065
|
-
|
2066
|
-
raw_items
|
2067
|
-
|
2068
|
-
|
2069
|
-
raw_items
|
2070
|
-
|
2071
|
-
|
2072
|
-
raw_items
|
2073
|
-
|
2074
|
-
|
2075
|
-
raw_items
|
2120
|
+
unless root_node.nil?
|
2121
|
+
raw_items = XPath.match(root_node, "item")
|
2122
|
+
if raw_items == nil || raw_items == []
|
2123
|
+
raw_items = XPath.match(channel_node, "item")
|
2124
|
+
end
|
2125
|
+
if raw_items == nil || raw_items == []
|
2126
|
+
raw_items = XPath.match(channel_node, "ITEM")
|
2127
|
+
end
|
2128
|
+
if raw_items == nil || raw_items == []
|
2129
|
+
raw_items = XPath.match(root_node, "ITEM")
|
2130
|
+
end
|
2131
|
+
if raw_items == nil || raw_items == []
|
2132
|
+
raw_items = XPath.match(channel_node, "entry")
|
2133
|
+
end
|
2134
|
+
if raw_items == nil || raw_items == []
|
2135
|
+
raw_items = XPath.match(root_node, "entry")
|
2136
|
+
end
|
2076
2137
|
end
|
2077
|
-
|
2138
|
+
|
2078
2139
|
# create the individual feed items
|
2079
2140
|
@items = []
|
2080
2141
|
if raw_items != nil
|
@@ -2174,7 +2235,11 @@ module FeedTools
|
|
2174
2235
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
2175
2236
|
# RDF-based rss format
|
2176
2237
|
return xml_builder.tag!("rdf:RDF") do
|
2177
|
-
|
2238
|
+
channel_attributes = {}
|
2239
|
+
unless self.link.nil?
|
2240
|
+
channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
|
2241
|
+
end
|
2242
|
+
xml_builder.channel(channel_attributes) do
|
2178
2243
|
unless title.nil? || title == ""
|
2179
2244
|
xml_builder.title(title)
|
2180
2245
|
else
|
@@ -2534,9 +2599,11 @@ module FeedTools
|
|
2534
2599
|
# Returns the feed items's unique id
|
2535
2600
|
def id
|
2536
2601
|
if @id.nil?
|
2537
|
-
|
2538
|
-
|
2539
|
-
@id
|
2602
|
+
unless root_node.nil?
|
2603
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
2604
|
+
if @id == ""
|
2605
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
2606
|
+
end
|
2540
2607
|
end
|
2541
2608
|
@id = nil if @id == ""
|
2542
2609
|
end
|
@@ -2551,13 +2618,15 @@ module FeedTools
|
|
2551
2618
|
# Returns the feed item title
|
2552
2619
|
def title
|
2553
2620
|
if @title.nil?
|
2554
|
-
|
2555
|
-
|
2556
|
-
|
2557
|
-
title_node
|
2558
|
-
|
2559
|
-
|
2560
|
-
title_node
|
2621
|
+
unless root_node.nil?
|
2622
|
+
repair_entities = false
|
2623
|
+
title_node = XPath.first(root_node, "title")
|
2624
|
+
if title_node.nil?
|
2625
|
+
title_node = XPath.first(root_node, "dc:title")
|
2626
|
+
end
|
2627
|
+
if title_node.nil?
|
2628
|
+
title_node = XPath.first(root_node, "TITLE")
|
2629
|
+
end
|
2561
2630
|
end
|
2562
2631
|
if title_node.nil?
|
2563
2632
|
return nil
|
@@ -2606,41 +2675,43 @@ module FeedTools
|
|
2606
2675
|
# Returns the feed item description
|
2607
2676
|
def description
|
2608
2677
|
if @description.nil?
|
2609
|
-
|
2610
|
-
|
2611
|
-
|
2612
|
-
description_node
|
2613
|
-
|
2614
|
-
|
2615
|
-
description_node
|
2616
|
-
|
2617
|
-
|
2618
|
-
description_node
|
2619
|
-
|
2620
|
-
|
2621
|
-
description_node
|
2622
|
-
|
2623
|
-
|
2624
|
-
description_node
|
2625
|
-
|
2626
|
-
|
2627
|
-
description_node
|
2628
|
-
|
2629
|
-
|
2630
|
-
description_node
|
2631
|
-
|
2632
|
-
|
2633
|
-
description_node
|
2634
|
-
|
2635
|
-
|
2636
|
-
description_node
|
2637
|
-
|
2638
|
-
|
2639
|
-
description_node
|
2640
|
-
|
2641
|
-
|
2642
|
-
description_node
|
2643
|
-
|
2678
|
+
unless root_node.nil?
|
2679
|
+
repair_entities = false
|
2680
|
+
description_node = XPath.first(root_node, "description")
|
2681
|
+
if description_node.nil?
|
2682
|
+
description_node = XPath.first(root_node, "xhtml:body")
|
2683
|
+
end
|
2684
|
+
if description_node.nil?
|
2685
|
+
description_node = XPath.first(root_node, "body")
|
2686
|
+
end
|
2687
|
+
if description_node.nil?
|
2688
|
+
description_node = XPath.first(root_node, "tagline")
|
2689
|
+
end
|
2690
|
+
if description_node.nil?
|
2691
|
+
description_node = XPath.first(root_node, "subtitle")
|
2692
|
+
end
|
2693
|
+
if description_node.nil?
|
2694
|
+
description_node = XPath.first(root_node, "summary")
|
2695
|
+
end
|
2696
|
+
if description_node.nil?
|
2697
|
+
description_node = XPath.first(root_node, "abstract")
|
2698
|
+
end
|
2699
|
+
if description_node.nil?
|
2700
|
+
description_node = XPath.first(root_node, "ABSTRACT")
|
2701
|
+
end
|
2702
|
+
if description_node.nil?
|
2703
|
+
description_node = XPath.first(root_node, "content:encoded")
|
2704
|
+
end
|
2705
|
+
if description_node.nil?
|
2706
|
+
description_node = XPath.first(root_node, "content")
|
2707
|
+
end
|
2708
|
+
if description_node.nil?
|
2709
|
+
description_node = XPath.first(root_node, "fullitem")
|
2710
|
+
end
|
2711
|
+
if description_node.nil?
|
2712
|
+
description_node = XPath.first(root_node, "info")
|
2713
|
+
@bozo = true unless description_node.nil?
|
2714
|
+
end
|
2644
2715
|
end
|
2645
2716
|
if description_node.nil?
|
2646
2717
|
return nil
|
@@ -2754,32 +2825,34 @@ module FeedTools
|
|
2754
2825
|
# Returns the feed item link
|
2755
2826
|
def link
|
2756
2827
|
if @link.nil?
|
2757
|
-
|
2758
|
-
|
2759
|
-
@link
|
2760
|
-
|
2761
|
-
|
2762
|
-
@link
|
2763
|
-
|
2764
|
-
|
2765
|
-
@link
|
2766
|
-
|
2767
|
-
|
2768
|
-
@link
|
2769
|
-
|
2770
|
-
|
2771
|
-
@link
|
2772
|
-
|
2773
|
-
|
2774
|
-
@link
|
2775
|
-
|
2776
|
-
|
2777
|
-
@link
|
2778
|
-
|
2779
|
-
|
2780
|
-
@link
|
2828
|
+
unless root_node.nil?
|
2829
|
+
@link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
|
2830
|
+
if @link == ""
|
2831
|
+
@link = XPath.first(root_node, "link/@href").to_s
|
2832
|
+
end
|
2833
|
+
if @link == ""
|
2834
|
+
@link = XPath.first(root_node, "link/text()").to_s
|
2835
|
+
end
|
2836
|
+
if @link == ""
|
2837
|
+
@link = XPath.first(root_node, "@rdf:about").to_s
|
2838
|
+
end
|
2839
|
+
if @link == ""
|
2840
|
+
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
|
2841
|
+
end
|
2842
|
+
if @link == ""
|
2843
|
+
@link = XPath.first(root_node, "@href").to_s
|
2844
|
+
end
|
2845
|
+
if @link == ""
|
2846
|
+
@link = XPath.first(root_node, "a/@href").to_s
|
2847
|
+
end
|
2848
|
+
if @link == ""
|
2849
|
+
@link = XPath.first(root_node, "@HREF").to_s
|
2850
|
+
end
|
2851
|
+
if @link == ""
|
2852
|
+
@link = XPath.first(root_node, "A/@HREF").to_s
|
2853
|
+
end
|
2781
2854
|
end
|
2782
|
-
if @link == ""
|
2855
|
+
if @link == "" || @link.nil?
|
2783
2856
|
if FeedTools.is_url? self.guid
|
2784
2857
|
@link = self.guid
|
2785
2858
|
end
|
data/rakefile
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.2.6
|
7
|
+
date: 2005-09-01 00:00:00 -04:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|