feedtools 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/lib/feed_tools.rb +244 -193
- data/rakefile +1 -1
- data/test/nonstandard_test.rb +147 -0
- metadata +6 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== FeedTools 0.2.5
|
2
|
+
* fixed multiple rows being created in the cache after a 301 redirection
|
3
|
+
* fixed broken table creation for postgresql and sqlite
|
4
|
+
* testing against non-standard feeds
|
5
|
+
* removed the 'comment_link' method in favor of the 'comments' method
|
1
6
|
== FeedTools 0.2.4
|
2
7
|
* fixed bug in the sqlite table creation query
|
3
8
|
* greatly improved image support
|
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.5"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -182,7 +182,7 @@ module FeedTools
|
|
182
182
|
'link' VARCHAR(255) DEFAULT NULL,
|
183
183
|
'xml_data' TEXT DEFAULT NULL,
|
184
184
|
'http_headers' TEXT DEFAULT NULL,
|
185
|
-
'last_retrieved' DATETIME DEFAULT NULL
|
185
|
+
'last_retrieved' DATETIME DEFAULT NULL
|
186
186
|
);
|
187
187
|
SQL_END
|
188
188
|
feeds_psql = <<-SQL_END
|
@@ -193,7 +193,7 @@ module FeedTools
|
|
193
193
|
link varchar(255) default NULL,
|
194
194
|
xml_data text default NULL,
|
195
195
|
http_headers text default NULL,
|
196
|
-
last_retrieved
|
196
|
+
last_retrieved timestamp default NULL
|
197
197
|
);
|
198
198
|
SQL_END
|
199
199
|
table_creation_sql = nil
|
@@ -489,7 +489,7 @@ module FeedTools
|
|
489
489
|
if url.nil? || url == ""
|
490
490
|
return nil
|
491
491
|
end
|
492
|
-
normalized_url = url
|
492
|
+
normalized_url = url.strip
|
493
493
|
|
494
494
|
# if a url begins with the '/' character, it only makes sense that they
|
495
495
|
# meant to be using a file:// url. Fix it for them.
|
@@ -900,6 +900,8 @@ module FeedTools
|
|
900
900
|
# redirections, and see if we need to update the url.
|
901
901
|
for redirected_response in response_chain
|
902
902
|
if redirected_response.last.code.to_i == 301
|
903
|
+
# Reset the cache object or we may get duplicate entries
|
904
|
+
self.cache_object = nil
|
903
905
|
self.url = redirected_response.last['location']
|
904
906
|
else
|
905
907
|
# Jump out as soon as we hit anything that isn't a
|
@@ -1483,42 +1485,43 @@ module FeedTools
|
|
1483
1485
|
#
|
1484
1486
|
# This method uses the url from the link field in order to avoid grabbing
|
1485
1487
|
# the favicon for services like feedburner.
|
1486
|
-
def
|
1487
|
-
if @
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
@icon_link = XPath.first(channel_node,
|
1492
|
-
"link[@rel='shortcut icon']/@href").to_s
|
1488
|
+
def icon
|
1489
|
+
if @icon.nil?
|
1490
|
+
icon_node = XPath.first(channel_node, "link[@rel='icon']")
|
1491
|
+
if icon_node.nil?
|
1492
|
+
icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
|
1493
1493
|
end
|
1494
|
-
if
|
1495
|
-
|
1496
|
-
"link[@type='image/x-icon']/@href").to_s
|
1494
|
+
if icon_node.nil?
|
1495
|
+
icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
|
1497
1496
|
end
|
1498
|
-
if
|
1499
|
-
|
1500
|
-
"icon/@href").to_s
|
1497
|
+
if icon_node.nil?
|
1498
|
+
icon_node = XPath.first(channel_node, "icon")
|
1501
1499
|
end
|
1502
|
-
if
|
1503
|
-
|
1504
|
-
"icon/text()").to_s
|
1500
|
+
if icon_node.nil?
|
1501
|
+
icon_node = XPath.first(channel_node, "logo[@style='icon']")
|
1505
1502
|
end
|
1506
|
-
if
|
1507
|
-
|
1508
|
-
"logo[@style='icon']/@href").to_s
|
1503
|
+
if icon_node.nil?
|
1504
|
+
icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
|
1509
1505
|
end
|
1510
|
-
|
1511
|
-
@
|
1512
|
-
"
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1506
|
+
unless icon_node.nil?
|
1507
|
+
@icon = FeedTools.unescape_entities(
|
1508
|
+
XPath.first(icon_node, "@href").to_s)
|
1509
|
+
if @icon == ""
|
1510
|
+
@icon = FeedTools.unescape_entities(
|
1511
|
+
XPath.first(icon_node, "text()").to_s)
|
1512
|
+
unless FeedTools.is_url? @icon
|
1513
|
+
@icon = ""
|
1514
|
+
end
|
1515
|
+
end
|
1516
|
+
if @icon == "" && self.link != nil && self.link != ""
|
1517
|
+
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1518
|
+
@icon =
|
1519
|
+
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
1520
|
+
end
|
1521
|
+
@icon = nil if @icon == ""
|
1518
1522
|
end
|
1519
|
-
icon_link = nil if icon_link == ""
|
1520
1523
|
end
|
1521
|
-
return @
|
1524
|
+
return @icon
|
1522
1525
|
end
|
1523
1526
|
|
1524
1527
|
# Returns the feed author
|
@@ -1526,70 +1529,83 @@ module FeedTools
|
|
1526
1529
|
if @author.nil?
|
1527
1530
|
@author = FeedTools::Feed::Author.new
|
1528
1531
|
|
1529
|
-
|
1530
|
-
|
1531
|
-
XPath.first(channel_node, "
|
1532
|
-
|
1533
|
-
@author.raw = FeedTools.unescape_entities(
|
1534
|
-
XPath.first(channel_node, "author/text()").to_s)
|
1535
|
-
if @author.raw == ""
|
1536
|
-
@author.raw = FeedTools.unescape_entities(
|
1537
|
-
XPath.first(channel_node, "dc:creator/text()").to_s)
|
1532
|
+
author_node = XPath.first(channel_node, "author")
|
1533
|
+
if author_node.nil?
|
1534
|
+
author_node = XPath.first(channel_node, "managingEditor")
|
1538
1535
|
end
|
1539
|
-
if
|
1540
|
-
|
1541
|
-
XPath.first(channel_node, "dc:author/text()").to_s)
|
1536
|
+
if author_node.nil?
|
1537
|
+
author_node = XPath.first(channel_node, "dc:author")
|
1542
1538
|
end
|
1543
|
-
if
|
1544
|
-
|
1545
|
-
XPath.first(channel_node, "managingEditor/text()").to_s)
|
1539
|
+
if author_node.nil?
|
1540
|
+
author_node = XPath.first(channel_node, "dc:creator")
|
1546
1541
|
end
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1542
|
+
if author_node.nil?
|
1543
|
+
author_node = XPath.first(channel_node, "atom:author")
|
1544
|
+
end
|
1545
|
+
unless author_node.nil?
|
1546
|
+
@author.raw = FeedTools.unescape_entities(
|
1547
|
+
XPath.first(author_node, "text()").to_s)
|
1548
|
+
@author.raw = nil if @author.raw == ""
|
1549
|
+
unless @author.raw.nil?
|
1551
1550
|
raw_scan = @author.raw.scan(
|
1552
|
-
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1560
|
-
if email_scan != nil && email_scan.size > 0
|
1561
|
-
@author.email = email_scan.first.strip
|
1551
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1552
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1553
|
+
raw_scan = @author.raw.scan(
|
1554
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1555
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
1556
|
+
else
|
1557
|
+
author_raw_pair = raw_scan.first
|
1562
1558
|
end
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1572
|
-
|
1573
|
-
|
1559
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1560
|
+
email_scan = @author.raw.scan(
|
1561
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1562
|
+
if email_scan != nil && email_scan.size > 0
|
1563
|
+
@author.email = email_scan.first.strip
|
1564
|
+
end
|
1565
|
+
end
|
1566
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
1567
|
+
@author.name = author_raw_pair.first.strip
|
1568
|
+
@author.email = author_raw_pair.last.strip
|
1569
|
+
else
|
1570
|
+
unless @author.raw.include?("@")
|
1571
|
+
# We can be reasonably sure we are looking at something
|
1572
|
+
# that the creator didn't intend to contain an email address if
|
1573
|
+
# it got through the preceeding regexes and it doesn't
|
1574
|
+
# contain the tell-tale '@' symbol.
|
1575
|
+
@author.name = @author.raw
|
1576
|
+
end
|
1574
1577
|
end
|
1575
1578
|
end
|
1579
|
+
@author.name = "" if @author.name.nil?
|
1580
|
+
if @author.name == ""
|
1581
|
+
@author.name = FeedTools.unescape_entities(
|
1582
|
+
XPath.first(author_node, "name/text()").to_s)
|
1583
|
+
end
|
1584
|
+
if @author.name == ""
|
1585
|
+
@author.name = FeedTools.unescape_entities(
|
1586
|
+
XPath.first(author_node, "@name").to_s)
|
1587
|
+
end
|
1588
|
+
if @author.email == ""
|
1589
|
+
@author.email = FeedTools.unescape_entities(
|
1590
|
+
XPath.first(author_node, "email/text()").to_s)
|
1591
|
+
end
|
1592
|
+
if @author.email == ""
|
1593
|
+
@author.email = FeedTools.unescape_entities(
|
1594
|
+
XPath.first(author_node, "@email").to_s)
|
1595
|
+
end
|
1596
|
+
if @author.url == ""
|
1597
|
+
@author.url = FeedTools.unescape_entities(
|
1598
|
+
XPath.first(author_node, "url/text()").to_s)
|
1599
|
+
end
|
1600
|
+
if @author.url == ""
|
1601
|
+
@author.url = FeedTools.unescape_entities(
|
1602
|
+
XPath.first(author_node, "@url").to_s)
|
1603
|
+
end
|
1604
|
+
@author.name = nil if @author.name == ""
|
1605
|
+
@author.raw = nil if @author.raw == ""
|
1606
|
+
@author.email = nil if @author.email == ""
|
1607
|
+
@author.url = nil if @author.url == ""
|
1576
1608
|
end
|
1577
|
-
|
1578
|
-
@author.name = nil if @author.name == ""
|
1579
|
-
@author.raw = nil if @author.raw == ""
|
1580
|
-
|
1581
|
-
# Set the author email
|
1582
|
-
if @author.email == ""
|
1583
|
-
@author.email = FeedTools.unescape_entities(
|
1584
|
-
XPath.first(channel_node, "author/email/text()").to_s)
|
1585
|
-
end
|
1586
|
-
@author.email = nil if @author.email == ""
|
1587
|
-
|
1588
|
-
# Set the author url
|
1589
|
-
@author.url = FeedTools.unescape_entities(
|
1590
|
-
XPath.first(channel_node, "author/url/text()").to_s)
|
1591
|
-
@author.url = nil if @author.url == ""
|
1592
|
-
|
1593
1609
|
# Fallback on the itunes module if we didn't find an author name
|
1594
1610
|
begin
|
1595
1611
|
@author.name = self.itunes_author if @author.name.nil?
|
@@ -1822,6 +1838,9 @@ module FeedTools
|
|
1822
1838
|
if @copyright == ""
|
1823
1839
|
@copyright = XPath.first(channel_node, "dc:rights/text()").to_s
|
1824
1840
|
end
|
1841
|
+
if @copyright == ""
|
1842
|
+
@copyright = XPath.first(channel_node, "copyrights/text()").to_s
|
1843
|
+
end
|
1825
1844
|
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1826
1845
|
@copyright = nil if @copyright == ""
|
1827
1846
|
end
|
@@ -1841,60 +1860,93 @@ module FeedTools
|
|
1841
1860
|
if update_frequency != ""
|
1842
1861
|
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1843
1862
|
if update_period == "daily"
|
1844
|
-
@time_to_live = update_frequency.to_i
|
1863
|
+
@time_to_live = update_frequency.to_i.day
|
1845
1864
|
elsif update_period == "weekly"
|
1846
|
-
@time_to_live = update_frequency.to_i
|
1865
|
+
@time_to_live = update_frequency.to_i.week
|
1847
1866
|
elsif update_period == "monthly"
|
1848
|
-
@time_to_live = update_frequency.to_i
|
1867
|
+
@time_to_live = update_frequency.to_i.month
|
1849
1868
|
elsif update_period == "yearly"
|
1850
|
-
@time_to_live = update_frequency.to_i
|
1869
|
+
@time_to_live = update_frequency.to_i.year
|
1851
1870
|
else
|
1852
1871
|
# hourly
|
1853
|
-
@time_to_live = update_frequency.to_i
|
1872
|
+
@time_to_live = update_frequency.to_i.hour
|
1854
1873
|
end
|
1855
1874
|
end
|
1856
1875
|
end
|
1857
1876
|
if @time_to_live.nil?
|
1858
|
-
# expressed in minutes
|
1877
|
+
# usually expressed in minutes
|
1859
1878
|
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1860
1879
|
if update_frequency != ""
|
1861
|
-
|
1880
|
+
update_span = XPath.first(channel_node, "ttl/@span").to_s
|
1881
|
+
if update_span == "seconds"
|
1882
|
+
@time_to_live = update_frequency.to_i
|
1883
|
+
elsif update_span == "minutes"
|
1884
|
+
@time_to_live = update_frequency.to_i.minute
|
1885
|
+
elsif update_span == "hours"
|
1886
|
+
@time_to_live = update_frequency.to_i.hour
|
1887
|
+
elsif update_span == "days"
|
1888
|
+
@time_to_live = update_frequency.to_i.day
|
1889
|
+
elsif update_span == "weeks"
|
1890
|
+
@time_to_live = update_frequency.to_i.week
|
1891
|
+
elsif update_span == "months"
|
1892
|
+
@time_to_live = update_frequency.to_i.month
|
1893
|
+
elsif update_span == "years"
|
1894
|
+
@time_to_live = update_frequency.to_i.year
|
1895
|
+
elsif update_frequency.to_i >= 3000
|
1896
|
+
# Normally, this should default to minutes, but realistically,
|
1897
|
+
# if they meant minutes, you're rarely going to see a value higher
|
1898
|
+
# than 120. If we see >= 3000, we're either dealing with a stupid
|
1899
|
+
# pseudo-spec that decided to use seconds, or we're looking at
|
1900
|
+
# someone who only has weekly updated content. Worst case, we
|
1901
|
+
# misreport the time, and we update too often. Best case, we
|
1902
|
+
# avoid accidentally updating the feed only once a year. In the
|
1903
|
+
# interests of being pragmatic, and since the problem we avoid
|
1904
|
+
# is a far greater one than the one we cause, just run the check
|
1905
|
+
# and hope no one actually gets hurt.
|
1906
|
+
@time_to_live = update_frequency.to_i
|
1907
|
+
else
|
1908
|
+
@time_to_live = update_frequency.to_i.minute
|
1909
|
+
end
|
1862
1910
|
end
|
1863
1911
|
end
|
1864
1912
|
if @time_to_live.nil?
|
1865
1913
|
@time_to_live = 0
|
1866
|
-
update_frequency_days =
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1914
|
+
update_frequency_days =
|
1915
|
+
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1916
|
+
update_frequency_hours =
|
1917
|
+
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1918
|
+
update_frequency_minutes =
|
1919
|
+
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1920
|
+
update_frequency_seconds =
|
1921
|
+
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1870
1922
|
if update_frequency_days != ""
|
1871
|
-
@time_to_live = @time_to_live + update_frequency_days.to_i
|
1923
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1872
1924
|
end
|
1873
1925
|
if update_frequency_hours != ""
|
1874
|
-
@time_to_live = @time_to_live + update_frequency_hours.to_i
|
1926
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1875
1927
|
end
|
1876
1928
|
if update_frequency_minutes != ""
|
1877
|
-
@time_to_live = @time_to_live + update_frequency_minutes.to_i
|
1929
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1878
1930
|
end
|
1879
1931
|
if update_frequency_seconds != ""
|
1880
|
-
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1932
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1881
1933
|
end
|
1882
1934
|
if @time_to_live == 0
|
1883
|
-
@time_to_live =
|
1935
|
+
@time_to_live = 1.hour
|
1884
1936
|
end
|
1885
1937
|
end
|
1886
1938
|
if @time_to_live.nil? || @time_to_live == 0
|
1887
1939
|
# Default to one hour
|
1888
|
-
@time_to_live = 1
|
1940
|
+
@time_to_live = 1.hour
|
1889
1941
|
end
|
1890
1942
|
@time_to_live = @time_to_live.round
|
1891
|
-
return @time_to_live
|
1943
|
+
return @time_to_live
|
1892
1944
|
end
|
1893
1945
|
|
1894
1946
|
# Sets the feed time to live
|
1895
1947
|
def time_to_live=(new_time_to_live)
|
1896
|
-
@time_to_live =
|
1897
|
-
@time_to_live = 1 if @time_to_live < 1
|
1948
|
+
@time_to_live = new_time_to_live.round
|
1949
|
+
@time_to_live = 1.hour if @time_to_live < 1.hour
|
1898
1950
|
end
|
1899
1951
|
|
1900
1952
|
# Returns the feed's cloud
|
@@ -2735,13 +2787,16 @@ module FeedTools
|
|
2735
2787
|
if @link != ""
|
2736
2788
|
@link = FeedTools.unescape_entities(@link)
|
2737
2789
|
end
|
2738
|
-
|
2739
|
-
|
2740
|
-
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2790
|
+
# TODO: Actually implement proper relative url resolving instead of this crap
|
2791
|
+
# ===========================================================================
|
2792
|
+
#
|
2793
|
+
# if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
|
2794
|
+
# if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
|
2795
|
+
# @link = @link[1..-1]
|
2796
|
+
# end
|
2797
|
+
# # prepend the base to the link since they seem to have used a relative path
|
2798
|
+
# @link = feed.base + @link
|
2799
|
+
# end
|
2745
2800
|
@link = FeedTools.normalize_url(@link)
|
2746
2801
|
end
|
2747
2802
|
return @link
|
@@ -2751,25 +2806,7 @@ module FeedTools
|
|
2751
2806
|
def link=(new_link)
|
2752
2807
|
@link = new_link
|
2753
2808
|
end
|
2754
|
-
|
2755
|
-
# Returns the feed item comment link
|
2756
|
-
def comment_link
|
2757
|
-
if @comment_link.nil?
|
2758
|
-
# get the feed comment link from the xml document
|
2759
|
-
@comment_link = XPath.first(root_node, "comments/text()").to_s
|
2760
|
-
if @comment_link == ""
|
2761
|
-
@comment_link = self.link
|
2762
|
-
end
|
2763
|
-
@comment_link = FeedTools.normalize_url(@comment_link)
|
2764
|
-
end
|
2765
|
-
return @comment_link
|
2766
|
-
end
|
2767
|
-
|
2768
|
-
# Sets the feed item comment link
|
2769
|
-
def comment_link=(new_comment_link)
|
2770
|
-
@comment_link = new_comment_link
|
2771
|
-
end
|
2772
|
-
|
2809
|
+
|
2773
2810
|
# Returns a list of the feed item's categories
|
2774
2811
|
def categories
|
2775
2812
|
if @categories.nil?
|
@@ -3291,70 +3328,83 @@ module FeedTools
|
|
3291
3328
|
if @author.nil?
|
3292
3329
|
@author = FeedTools::Feed::Author.new
|
3293
3330
|
|
3294
|
-
|
3295
|
-
|
3296
|
-
XPath.first(root_node, "
|
3297
|
-
|
3298
|
-
@author.raw = FeedTools.unescape_entities(
|
3299
|
-
XPath.first(root_node, "author/text()").to_s)
|
3300
|
-
if @author.raw == ""
|
3301
|
-
@author.raw = FeedTools.unescape_entities(
|
3302
|
-
XPath.first(root_node, "dc:creator/text()").to_s)
|
3331
|
+
author_node = XPath.first(root_node, "author")
|
3332
|
+
if author_node.nil?
|
3333
|
+
author_node = XPath.first(root_node, "managingEditor")
|
3303
3334
|
end
|
3304
|
-
if
|
3305
|
-
|
3306
|
-
XPath.first(root_node, "dc:author/text()").to_s)
|
3335
|
+
if author_node.nil?
|
3336
|
+
author_node = XPath.first(root_node, "dc:author")
|
3307
3337
|
end
|
3308
|
-
if
|
3309
|
-
|
3310
|
-
XPath.first(root_node, "managingEditor/text()").to_s)
|
3338
|
+
if author_node.nil?
|
3339
|
+
author_node = XPath.first(root_node, "dc:creator")
|
3311
3340
|
end
|
3312
|
-
|
3313
|
-
|
3314
|
-
|
3315
|
-
|
3341
|
+
if author_node.nil?
|
3342
|
+
author_node = XPath.first(root_node, "atom:author")
|
3343
|
+
end
|
3344
|
+
unless author_node.nil?
|
3345
|
+
@author.raw = FeedTools.unescape_entities(
|
3346
|
+
XPath.first(author_node, "text()").to_s)
|
3347
|
+
@author.raw = nil if @author.raw == ""
|
3348
|
+
unless @author.raw.nil?
|
3316
3349
|
raw_scan = @author.raw.scan(
|
3317
|
-
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\
|
3318
|
-
|
3319
|
-
|
3320
|
-
|
3321
|
-
|
3322
|
-
|
3323
|
-
|
3324
|
-
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
3325
|
-
if email_scan != nil && email_scan.size > 0
|
3326
|
-
@author.email = email_scan.first.strip
|
3350
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
3351
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3352
|
+
raw_scan = @author.raw.scan(
|
3353
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
3354
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
3355
|
+
else
|
3356
|
+
author_raw_pair = raw_scan.first
|
3327
3357
|
end
|
3328
|
-
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3334
|
-
|
3335
|
-
|
3336
|
-
|
3337
|
-
|
3338
|
-
|
3358
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3359
|
+
email_scan = @author.raw.scan(
|
3360
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
3361
|
+
if email_scan != nil && email_scan.size > 0
|
3362
|
+
@author.email = email_scan.first.strip
|
3363
|
+
end
|
3364
|
+
end
|
3365
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
3366
|
+
@author.name = author_raw_pair.first.strip
|
3367
|
+
@author.email = author_raw_pair.last.strip
|
3368
|
+
else
|
3369
|
+
unless @author.raw.include?("@")
|
3370
|
+
# We can be reasonably sure we are looking at something
|
3371
|
+
# that the creator didn't intend to contain an email address if
|
3372
|
+
# it got through the preceeding regexes and it doesn't
|
3373
|
+
# contain the tell-tale '@' symbol.
|
3374
|
+
@author.name = @author.raw
|
3375
|
+
end
|
3339
3376
|
end
|
3340
3377
|
end
|
3378
|
+
@author.name = "" if @author.name.nil?
|
3379
|
+
if @author.name == ""
|
3380
|
+
@author.name = FeedTools.unescape_entities(
|
3381
|
+
XPath.first(author_node, "name/text()").to_s)
|
3382
|
+
end
|
3383
|
+
if @author.name == ""
|
3384
|
+
@author.name = FeedTools.unescape_entities(
|
3385
|
+
XPath.first(author_node, "@name").to_s)
|
3386
|
+
end
|
3387
|
+
if @author.email == ""
|
3388
|
+
@author.email = FeedTools.unescape_entities(
|
3389
|
+
XPath.first(author_node, "email/text()").to_s)
|
3390
|
+
end
|
3391
|
+
if @author.email == ""
|
3392
|
+
@author.email = FeedTools.unescape_entities(
|
3393
|
+
XPath.first(author_node, "@email").to_s)
|
3394
|
+
end
|
3395
|
+
if @author.url == ""
|
3396
|
+
@author.url = FeedTools.unescape_entities(
|
3397
|
+
XPath.first(author_node, "url/text()").to_s)
|
3398
|
+
end
|
3399
|
+
if @author.url == ""
|
3400
|
+
@author.url = FeedTools.unescape_entities(
|
3401
|
+
XPath.first(author_node, "@url").to_s)
|
3402
|
+
end
|
3403
|
+
@author.name = nil if @author.name == ""
|
3404
|
+
@author.raw = nil if @author.raw == ""
|
3405
|
+
@author.email = nil if @author.email == ""
|
3406
|
+
@author.url = nil if @author.url == ""
|
3341
3407
|
end
|
3342
|
-
|
3343
|
-
@author.name = nil if @author.name == ""
|
3344
|
-
@author.raw = nil if @author.raw == ""
|
3345
|
-
|
3346
|
-
# Set the author email
|
3347
|
-
if @author.email == ""
|
3348
|
-
@author.email = FeedTools.unescape_entities(
|
3349
|
-
XPath.first(root_node, "author/email/text()").to_s)
|
3350
|
-
end
|
3351
|
-
@author.email = nil if @author.email == ""
|
3352
|
-
|
3353
|
-
# Set the author url
|
3354
|
-
@author.url = FeedTools.unescape_entities(
|
3355
|
-
XPath.first(root_node, "author/url/text()").to_s)
|
3356
|
-
@author.url = nil if @author.url == ""
|
3357
|
-
|
3358
3408
|
# Fallback on the itunes module if we didn't find an author name
|
3359
3409
|
begin
|
3360
3410
|
@author.name = self.itunes_author if @author.name.nil?
|
@@ -3522,7 +3572,8 @@ module FeedTools
|
|
3522
3572
|
# Returns the url for posting comments
|
3523
3573
|
def comments
|
3524
3574
|
if @comments.nil?
|
3525
|
-
@comments =
|
3575
|
+
@comments = FeedTools.normalize_url(
|
3576
|
+
XPath.first(root_node, "comments/text()").to_s)
|
3526
3577
|
@comments = nil if @comments == ""
|
3527
3578
|
end
|
3528
3579
|
return @comments
|
@@ -3791,4 +3842,4 @@ begin
|
|
3791
3842
|
FeedTools.feed_cache.initialize_cache
|
3792
3843
|
end
|
3793
3844
|
rescue
|
3794
|
-
end
|
3845
|
+
end
|
data/rakefile
CHANGED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'feed_tools'
|
3
|
+
|
4
|
+
class NonStandardTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
FeedTools.tidy_enabled = false
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_xss_strict
|
10
|
+
feed = FeedTools::Feed.new
|
11
|
+
feed.xml_data = <<-FEED
|
12
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
13
|
+
<rss version="2.0/XSS-strict">
|
14
|
+
<channel>
|
15
|
+
<title>tima thinking outloud.</title>
|
16
|
+
<link>http://www.timaoutloud.org/</link>
|
17
|
+
<description>The personal weblog of Timothy Appnel</description>
|
18
|
+
<item>
|
19
|
+
<link>http://www.timaoutloud.org/archives/000415.html</link>
|
20
|
+
<title>OSCON Wrap-Up.</title>
|
21
|
+
<description>
|
22
|
+
It's been a week since OSCON ended and I'm just
|
23
|
+
beginning to recover. This uber post records my notes and
|
24
|
+
personal views as a speaker and attendee.
|
25
|
+
</description>
|
26
|
+
</item>
|
27
|
+
<item>
|
28
|
+
<link>http://www.timaoutloud.org/archives/000414.html</link>
|
29
|
+
<title>Write For The People Who Support You.</title>
|
30
|
+
<description>
|
31
|
+
Hooray! Mena is back. Ben too. Anil is celebrating
|
32
|
+
6 years of blogging.
|
33
|
+
</description>
|
34
|
+
</item>
|
35
|
+
<item>
|
36
|
+
<link>http://www.timaoutloud.org/archives/000413.html</link>
|
37
|
+
<title>tima@OSCON</title>
|
38
|
+
<description>
|
39
|
+
Ben Hammersley and I will be presenting 45 syndication hacks
|
40
|
+
in 45 minutes. Will I be able to keep pace with the madness?
|
41
|
+
</description>
|
42
|
+
</item>
|
43
|
+
</channel>
|
44
|
+
</rss>
|
45
|
+
FEED
|
46
|
+
assert_equal("tima thinking outloud.", feed.title)
|
47
|
+
assert_equal("http://www.timaoutloud.org/", feed.link)
|
48
|
+
assert_equal("The personal weblog of Timothy Appnel", feed.description)
|
49
|
+
|
50
|
+
assert_equal("OSCON Wrap-Up.", feed.items[0].title)
|
51
|
+
assert_equal("http://www.timaoutloud.org/archives/000415.html",
|
52
|
+
feed.items[0].link)
|
53
|
+
assert_equal(false, feed.items[0].description == nil)
|
54
|
+
|
55
|
+
assert_equal("Write For The People Who Support You.", feed.items[1].title)
|
56
|
+
assert_equal("http://www.timaoutloud.org/archives/000414.html",
|
57
|
+
feed.items[1].link)
|
58
|
+
assert_equal(false, feed.items[1].description == nil)
|
59
|
+
|
60
|
+
assert_equal("tima@OSCON", feed.items[2].title)
|
61
|
+
assert_equal("http://www.timaoutloud.org/archives/000413.html",
|
62
|
+
feed.items[2].link)
|
63
|
+
assert_equal(false, feed.items[2].description == nil)
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_rss_30_lite
|
67
|
+
# Delusions of grandeur...
|
68
|
+
feed = FeedTools::Feed.new
|
69
|
+
feed.xml_data = <<-FEED
|
70
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
71
|
+
<rss version="3.0" type="lite"
|
72
|
+
source="http://www.rss3.org/files/liteSample.rss">
|
73
|
+
<channel>
|
74
|
+
<title>RSS Version 3</title>
|
75
|
+
<link>http://www.rss3.org/</link>
|
76
|
+
<description>This is a sample RSS 3 Lite-type feed</description>
|
77
|
+
|
78
|
+
<lastBuildDate>Sun, 14 Aug 2005 09:53:59 +0000</lastBuildDate>
|
79
|
+
<generator name="RSS3Maker">http://no.address/</generator>
|
80
|
+
<language rel="both">en</language>
|
81
|
+
<icon>http://www.rss3.org/files/r1.ico</icon>
|
82
|
+
<copyright>Jonathan Avidan 2005 (c)</copyright>
|
83
|
+
<managingEditor name="Jonathan Avidan">
|
84
|
+
editor@rss3.org
|
85
|
+
</managingEditor>
|
86
|
+
<webMaster name="Jonathan Avidan">webmaster@rss3.org</webMaster>
|
87
|
+
<ttl span="days">7</ttl>
|
88
|
+
<docs>http://www.rss3.org/rss3lite.html</docs>
|
89
|
+
<item>
|
90
|
+
<title>RSS 3 Lite First Draft Now Available</title>
|
91
|
+
<link>
|
92
|
+
http://www.rss3.org/archive/rss3lite/first_draft.html
|
93
|
+
</link>
|
94
|
+
<description>
|
95
|
+
The RSS 3 Lite-type specification first publicly
|
96
|
+
available version
|
97
|
+
</description>
|
98
|
+
<pubDate>Sun, 18 Aug 2005 09:53:59 +0000</pubDate>
|
99
|
+
<author name="Jonathan Avidan">jonathan@rss3.org</author>
|
100
|
+
<guid type="code">6457894357689</guid>
|
101
|
+
</item>
|
102
|
+
<item isUpdated="true" updateNum="1">
|
103
|
+
<title>Welcome to the RSS 3 Official Blog!</title>
|
104
|
+
<link>http://www.rss3.org/official_blog/?p=2</link>
|
105
|
+
<description>The RSS 3 Official Blog welcome message</description>
|
106
|
+
<comments type="both">
|
107
|
+
http://www.rss3.org/official_blog/?p=2#comments
|
108
|
+
</comments>
|
109
|
+
<pubDate>Wed, 27 Jul 2005 14:34:51 +0000</pubDate>
|
110
|
+
<author name="Jonathan Avidan" type="writer">
|
111
|
+
jonathan@rss3.org
|
112
|
+
</author>
|
113
|
+
<guid type="link">http://www.rss3.org/official_blog/?p=2</guid>
|
114
|
+
</item>
|
115
|
+
</channel>
|
116
|
+
</rss>
|
117
|
+
FEED
|
118
|
+
assert_equal("RSS Version 3", feed.title)
|
119
|
+
assert_equal("http://www.rss3.org/", feed.link)
|
120
|
+
assert_equal("This is a sample RSS 3 Lite-type feed", feed.description)
|
121
|
+
assert_equal("http://no.address/", feed.generator)
|
122
|
+
assert_equal("en", feed.language)
|
123
|
+
assert_equal("http://www.rss3.org/files/r1.ico", feed.icon)
|
124
|
+
assert_equal("Jonathan Avidan 2005 (c)", feed.copyright)
|
125
|
+
assert_equal(7.day, feed.ttl)
|
126
|
+
assert_equal("http://www.rss3.org/rss3lite.html", feed.docs)
|
127
|
+
|
128
|
+
assert_equal("RSS 3 Lite First Draft Now Available", feed.items[0].title)
|
129
|
+
assert_equal("http://www.rss3.org/archive/rss3lite/first_draft.html",
|
130
|
+
feed.items[0].link)
|
131
|
+
assert_equal(false, feed.items[0].description == nil)
|
132
|
+
assert_equal(Time.utc(2005, "Aug", 18, 9, 53, 59), feed.items[0].time)
|
133
|
+
assert_equal("Jonathan Avidan", feed.items[0].author.name)
|
134
|
+
assert_equal("jonathan@rss3.org", feed.items[0].author.email)
|
135
|
+
assert_equal("6457894357689", feed.items[0].guid)
|
136
|
+
|
137
|
+
assert_equal("Welcome to the RSS 3 Official Blog!", feed.items[1].title)
|
138
|
+
assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].link)
|
139
|
+
assert_equal(false, feed.items[1].description == nil)
|
140
|
+
assert_equal("http://www.rss3.org/official_blog/?p=2#comments",
|
141
|
+
feed.items[1].comments)
|
142
|
+
assert_equal(Time.utc(2005, "Jul", 27, 14, 34, 51), feed.items[1].time)
|
143
|
+
assert_equal("Jonathan Avidan", feed.items[1].author.name)
|
144
|
+
assert_equal("jonathan@rss3.org", feed.items[1].author.email)
|
145
|
+
assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].guid)
|
146
|
+
end
|
147
|
+
end
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.11
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-08-
|
6
|
+
version: 0.2.5
|
7
|
+
date: 2005-08-19 00:00:00 -04:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -24,6 +24,8 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
24
24
|
version: 0.0.0
|
25
25
|
version:
|
26
26
|
platform: ruby
|
27
|
+
signing_key:
|
28
|
+
cert_chain:
|
27
29
|
authors:
|
28
30
|
- Bob Aman
|
29
31
|
files:
|
@@ -73,6 +75,7 @@ files:
|
|
73
75
|
- test/cache_test.rb
|
74
76
|
- test/cdf_test.rb
|
75
77
|
- test/helper_test.rb
|
78
|
+
- test/nonstandard_test.rb
|
76
79
|
- test/rss_test.rb
|
77
80
|
test_files: []
|
78
81
|
rdoc_options:
|