feedtools 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/lib/feed_tools.rb +244 -193
- data/rakefile +1 -1
- data/test/nonstandard_test.rb +147 -0
- metadata +6 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== FeedTools 0.2.5
|
2
|
+
* fixed multiple rows being created in the cache after a 301 redirection
|
3
|
+
* fixed broken table creation for postgresql and sqlite
|
4
|
+
* testing against non-standard feeds
|
5
|
+
* removed the 'comment_link' method in favor of the 'comments' method
|
1
6
|
== FeedTools 0.2.4
|
2
7
|
* fixed bug in the sqlite table creation query
|
3
8
|
* greatly improved image support
|
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.5"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -182,7 +182,7 @@ module FeedTools
|
|
182
182
|
'link' VARCHAR(255) DEFAULT NULL,
|
183
183
|
'xml_data' TEXT DEFAULT NULL,
|
184
184
|
'http_headers' TEXT DEFAULT NULL,
|
185
|
-
'last_retrieved' DATETIME DEFAULT NULL
|
185
|
+
'last_retrieved' DATETIME DEFAULT NULL
|
186
186
|
);
|
187
187
|
SQL_END
|
188
188
|
feeds_psql = <<-SQL_END
|
@@ -193,7 +193,7 @@ module FeedTools
|
|
193
193
|
link varchar(255) default NULL,
|
194
194
|
xml_data text default NULL,
|
195
195
|
http_headers text default NULL,
|
196
|
-
last_retrieved
|
196
|
+
last_retrieved timestamp default NULL
|
197
197
|
);
|
198
198
|
SQL_END
|
199
199
|
table_creation_sql = nil
|
@@ -489,7 +489,7 @@ module FeedTools
|
|
489
489
|
if url.nil? || url == ""
|
490
490
|
return nil
|
491
491
|
end
|
492
|
-
normalized_url = url
|
492
|
+
normalized_url = url.strip
|
493
493
|
|
494
494
|
# if a url begins with the '/' character, it only makes sense that they
|
495
495
|
# meant to be using a file:// url. Fix it for them.
|
@@ -900,6 +900,8 @@ module FeedTools
|
|
900
900
|
# redirections, and see if we need to update the url.
|
901
901
|
for redirected_response in response_chain
|
902
902
|
if redirected_response.last.code.to_i == 301
|
903
|
+
# Reset the cache object or we may get duplicate entries
|
904
|
+
self.cache_object = nil
|
903
905
|
self.url = redirected_response.last['location']
|
904
906
|
else
|
905
907
|
# Jump out as soon as we hit anything that isn't a
|
@@ -1483,42 +1485,43 @@ module FeedTools
|
|
1483
1485
|
#
|
1484
1486
|
# This method uses the url from the link field in order to avoid grabbing
|
1485
1487
|
# the favicon for services like feedburner.
|
1486
|
-
def
|
1487
|
-
if @
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
@icon_link = XPath.first(channel_node,
|
1492
|
-
"link[@rel='shortcut icon']/@href").to_s
|
1488
|
+
def icon
|
1489
|
+
if @icon.nil?
|
1490
|
+
icon_node = XPath.first(channel_node, "link[@rel='icon']")
|
1491
|
+
if icon_node.nil?
|
1492
|
+
icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
|
1493
1493
|
end
|
1494
|
-
if
|
1495
|
-
|
1496
|
-
"link[@type='image/x-icon']/@href").to_s
|
1494
|
+
if icon_node.nil?
|
1495
|
+
icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
|
1497
1496
|
end
|
1498
|
-
if
|
1499
|
-
|
1500
|
-
"icon/@href").to_s
|
1497
|
+
if icon_node.nil?
|
1498
|
+
icon_node = XPath.first(channel_node, "icon")
|
1501
1499
|
end
|
1502
|
-
if
|
1503
|
-
|
1504
|
-
"icon/text()").to_s
|
1500
|
+
if icon_node.nil?
|
1501
|
+
icon_node = XPath.first(channel_node, "logo[@style='icon']")
|
1505
1502
|
end
|
1506
|
-
if
|
1507
|
-
|
1508
|
-
"logo[@style='icon']/@href").to_s
|
1503
|
+
if icon_node.nil?
|
1504
|
+
icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
|
1509
1505
|
end
|
1510
|
-
|
1511
|
-
@
|
1512
|
-
"
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1506
|
+
unless icon_node.nil?
|
1507
|
+
@icon = FeedTools.unescape_entities(
|
1508
|
+
XPath.first(icon_node, "@href").to_s)
|
1509
|
+
if @icon == ""
|
1510
|
+
@icon = FeedTools.unescape_entities(
|
1511
|
+
XPath.first(icon_node, "text()").to_s)
|
1512
|
+
unless FeedTools.is_url? @icon
|
1513
|
+
@icon = ""
|
1514
|
+
end
|
1515
|
+
end
|
1516
|
+
if @icon == "" && self.link != nil && self.link != ""
|
1517
|
+
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1518
|
+
@icon =
|
1519
|
+
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
1520
|
+
end
|
1521
|
+
@icon = nil if @icon == ""
|
1518
1522
|
end
|
1519
|
-
icon_link = nil if icon_link == ""
|
1520
1523
|
end
|
1521
|
-
return @
|
1524
|
+
return @icon
|
1522
1525
|
end
|
1523
1526
|
|
1524
1527
|
# Returns the feed author
|
@@ -1526,70 +1529,83 @@ module FeedTools
|
|
1526
1529
|
if @author.nil?
|
1527
1530
|
@author = FeedTools::Feed::Author.new
|
1528
1531
|
|
1529
|
-
|
1530
|
-
|
1531
|
-
XPath.first(channel_node, "
|
1532
|
-
|
1533
|
-
@author.raw = FeedTools.unescape_entities(
|
1534
|
-
XPath.first(channel_node, "author/text()").to_s)
|
1535
|
-
if @author.raw == ""
|
1536
|
-
@author.raw = FeedTools.unescape_entities(
|
1537
|
-
XPath.first(channel_node, "dc:creator/text()").to_s)
|
1532
|
+
author_node = XPath.first(channel_node, "author")
|
1533
|
+
if author_node.nil?
|
1534
|
+
author_node = XPath.first(channel_node, "managingEditor")
|
1538
1535
|
end
|
1539
|
-
if
|
1540
|
-
|
1541
|
-
XPath.first(channel_node, "dc:author/text()").to_s)
|
1536
|
+
if author_node.nil?
|
1537
|
+
author_node = XPath.first(channel_node, "dc:author")
|
1542
1538
|
end
|
1543
|
-
if
|
1544
|
-
|
1545
|
-
XPath.first(channel_node, "managingEditor/text()").to_s)
|
1539
|
+
if author_node.nil?
|
1540
|
+
author_node = XPath.first(channel_node, "dc:creator")
|
1546
1541
|
end
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1542
|
+
if author_node.nil?
|
1543
|
+
author_node = XPath.first(channel_node, "atom:author")
|
1544
|
+
end
|
1545
|
+
unless author_node.nil?
|
1546
|
+
@author.raw = FeedTools.unescape_entities(
|
1547
|
+
XPath.first(author_node, "text()").to_s)
|
1548
|
+
@author.raw = nil if @author.raw == ""
|
1549
|
+
unless @author.raw.nil?
|
1551
1550
|
raw_scan = @author.raw.scan(
|
1552
|
-
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1560
|
-
if email_scan != nil && email_scan.size > 0
|
1561
|
-
@author.email = email_scan.first.strip
|
1551
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1552
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1553
|
+
raw_scan = @author.raw.scan(
|
1554
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1555
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
1556
|
+
else
|
1557
|
+
author_raw_pair = raw_scan.first
|
1562
1558
|
end
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1572
|
-
|
1573
|
-
|
1559
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1560
|
+
email_scan = @author.raw.scan(
|
1561
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1562
|
+
if email_scan != nil && email_scan.size > 0
|
1563
|
+
@author.email = email_scan.first.strip
|
1564
|
+
end
|
1565
|
+
end
|
1566
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
1567
|
+
@author.name = author_raw_pair.first.strip
|
1568
|
+
@author.email = author_raw_pair.last.strip
|
1569
|
+
else
|
1570
|
+
unless @author.raw.include?("@")
|
1571
|
+
# We can be reasonably sure we are looking at something
|
1572
|
+
# that the creator didn't intend to contain an email address if
|
1573
|
+
# it got through the preceeding regexes and it doesn't
|
1574
|
+
# contain the tell-tale '@' symbol.
|
1575
|
+
@author.name = @author.raw
|
1576
|
+
end
|
1574
1577
|
end
|
1575
1578
|
end
|
1579
|
+
@author.name = "" if @author.name.nil?
|
1580
|
+
if @author.name == ""
|
1581
|
+
@author.name = FeedTools.unescape_entities(
|
1582
|
+
XPath.first(author_node, "name/text()").to_s)
|
1583
|
+
end
|
1584
|
+
if @author.name == ""
|
1585
|
+
@author.name = FeedTools.unescape_entities(
|
1586
|
+
XPath.first(author_node, "@name").to_s)
|
1587
|
+
end
|
1588
|
+
if @author.email == ""
|
1589
|
+
@author.email = FeedTools.unescape_entities(
|
1590
|
+
XPath.first(author_node, "email/text()").to_s)
|
1591
|
+
end
|
1592
|
+
if @author.email == ""
|
1593
|
+
@author.email = FeedTools.unescape_entities(
|
1594
|
+
XPath.first(author_node, "@email").to_s)
|
1595
|
+
end
|
1596
|
+
if @author.url == ""
|
1597
|
+
@author.url = FeedTools.unescape_entities(
|
1598
|
+
XPath.first(author_node, "url/text()").to_s)
|
1599
|
+
end
|
1600
|
+
if @author.url == ""
|
1601
|
+
@author.url = FeedTools.unescape_entities(
|
1602
|
+
XPath.first(author_node, "@url").to_s)
|
1603
|
+
end
|
1604
|
+
@author.name = nil if @author.name == ""
|
1605
|
+
@author.raw = nil if @author.raw == ""
|
1606
|
+
@author.email = nil if @author.email == ""
|
1607
|
+
@author.url = nil if @author.url == ""
|
1576
1608
|
end
|
1577
|
-
|
1578
|
-
@author.name = nil if @author.name == ""
|
1579
|
-
@author.raw = nil if @author.raw == ""
|
1580
|
-
|
1581
|
-
# Set the author email
|
1582
|
-
if @author.email == ""
|
1583
|
-
@author.email = FeedTools.unescape_entities(
|
1584
|
-
XPath.first(channel_node, "author/email/text()").to_s)
|
1585
|
-
end
|
1586
|
-
@author.email = nil if @author.email == ""
|
1587
|
-
|
1588
|
-
# Set the author url
|
1589
|
-
@author.url = FeedTools.unescape_entities(
|
1590
|
-
XPath.first(channel_node, "author/url/text()").to_s)
|
1591
|
-
@author.url = nil if @author.url == ""
|
1592
|
-
|
1593
1609
|
# Fallback on the itunes module if we didn't find an author name
|
1594
1610
|
begin
|
1595
1611
|
@author.name = self.itunes_author if @author.name.nil?
|
@@ -1822,6 +1838,9 @@ module FeedTools
|
|
1822
1838
|
if @copyright == ""
|
1823
1839
|
@copyright = XPath.first(channel_node, "dc:rights/text()").to_s
|
1824
1840
|
end
|
1841
|
+
if @copyright == ""
|
1842
|
+
@copyright = XPath.first(channel_node, "copyrights/text()").to_s
|
1843
|
+
end
|
1825
1844
|
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1826
1845
|
@copyright = nil if @copyright == ""
|
1827
1846
|
end
|
@@ -1841,60 +1860,93 @@ module FeedTools
|
|
1841
1860
|
if update_frequency != ""
|
1842
1861
|
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1843
1862
|
if update_period == "daily"
|
1844
|
-
@time_to_live = update_frequency.to_i
|
1863
|
+
@time_to_live = update_frequency.to_i.day
|
1845
1864
|
elsif update_period == "weekly"
|
1846
|
-
@time_to_live = update_frequency.to_i
|
1865
|
+
@time_to_live = update_frequency.to_i.week
|
1847
1866
|
elsif update_period == "monthly"
|
1848
|
-
@time_to_live = update_frequency.to_i
|
1867
|
+
@time_to_live = update_frequency.to_i.month
|
1849
1868
|
elsif update_period == "yearly"
|
1850
|
-
@time_to_live = update_frequency.to_i
|
1869
|
+
@time_to_live = update_frequency.to_i.year
|
1851
1870
|
else
|
1852
1871
|
# hourly
|
1853
|
-
@time_to_live = update_frequency.to_i
|
1872
|
+
@time_to_live = update_frequency.to_i.hour
|
1854
1873
|
end
|
1855
1874
|
end
|
1856
1875
|
end
|
1857
1876
|
if @time_to_live.nil?
|
1858
|
-
# expressed in minutes
|
1877
|
+
# usually expressed in minutes
|
1859
1878
|
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1860
1879
|
if update_frequency != ""
|
1861
|
-
|
1880
|
+
update_span = XPath.first(channel_node, "ttl/@span").to_s
|
1881
|
+
if update_span == "seconds"
|
1882
|
+
@time_to_live = update_frequency.to_i
|
1883
|
+
elsif update_span == "minutes"
|
1884
|
+
@time_to_live = update_frequency.to_i.minute
|
1885
|
+
elsif update_span == "hours"
|
1886
|
+
@time_to_live = update_frequency.to_i.hour
|
1887
|
+
elsif update_span == "days"
|
1888
|
+
@time_to_live = update_frequency.to_i.day
|
1889
|
+
elsif update_span == "weeks"
|
1890
|
+
@time_to_live = update_frequency.to_i.week
|
1891
|
+
elsif update_span == "months"
|
1892
|
+
@time_to_live = update_frequency.to_i.month
|
1893
|
+
elsif update_span == "years"
|
1894
|
+
@time_to_live = update_frequency.to_i.year
|
1895
|
+
elsif update_frequency.to_i >= 3000
|
1896
|
+
# Normally, this should default to minutes, but realistically,
|
1897
|
+
# if they meant minutes, you're rarely going to see a value higher
|
1898
|
+
# than 120. If we see >= 3000, we're either dealing with a stupid
|
1899
|
+
# pseudo-spec that decided to use seconds, or we're looking at
|
1900
|
+
# someone who only has weekly updated content. Worst case, we
|
1901
|
+
# misreport the time, and we update too often. Best case, we
|
1902
|
+
# avoid accidentally updating the feed only once a year. In the
|
1903
|
+
# interests of being pragmatic, and since the problem we avoid
|
1904
|
+
# is a far greater one than the one we cause, just run the check
|
1905
|
+
# and hope no one actually gets hurt.
|
1906
|
+
@time_to_live = update_frequency.to_i
|
1907
|
+
else
|
1908
|
+
@time_to_live = update_frequency.to_i.minute
|
1909
|
+
end
|
1862
1910
|
end
|
1863
1911
|
end
|
1864
1912
|
if @time_to_live.nil?
|
1865
1913
|
@time_to_live = 0
|
1866
|
-
update_frequency_days =
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1914
|
+
update_frequency_days =
|
1915
|
+
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1916
|
+
update_frequency_hours =
|
1917
|
+
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1918
|
+
update_frequency_minutes =
|
1919
|
+
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1920
|
+
update_frequency_seconds =
|
1921
|
+
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1870
1922
|
if update_frequency_days != ""
|
1871
|
-
@time_to_live = @time_to_live + update_frequency_days.to_i
|
1923
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1872
1924
|
end
|
1873
1925
|
if update_frequency_hours != ""
|
1874
|
-
@time_to_live = @time_to_live + update_frequency_hours.to_i
|
1926
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1875
1927
|
end
|
1876
1928
|
if update_frequency_minutes != ""
|
1877
|
-
@time_to_live = @time_to_live + update_frequency_minutes.to_i
|
1929
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1878
1930
|
end
|
1879
1931
|
if update_frequency_seconds != ""
|
1880
|
-
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1932
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1881
1933
|
end
|
1882
1934
|
if @time_to_live == 0
|
1883
|
-
@time_to_live =
|
1935
|
+
@time_to_live = 1.hour
|
1884
1936
|
end
|
1885
1937
|
end
|
1886
1938
|
if @time_to_live.nil? || @time_to_live == 0
|
1887
1939
|
# Default to one hour
|
1888
|
-
@time_to_live = 1
|
1940
|
+
@time_to_live = 1.hour
|
1889
1941
|
end
|
1890
1942
|
@time_to_live = @time_to_live.round
|
1891
|
-
return @time_to_live
|
1943
|
+
return @time_to_live
|
1892
1944
|
end
|
1893
1945
|
|
1894
1946
|
# Sets the feed time to live
|
1895
1947
|
def time_to_live=(new_time_to_live)
|
1896
|
-
@time_to_live =
|
1897
|
-
@time_to_live = 1 if @time_to_live < 1
|
1948
|
+
@time_to_live = new_time_to_live.round
|
1949
|
+
@time_to_live = 1.hour if @time_to_live < 1.hour
|
1898
1950
|
end
|
1899
1951
|
|
1900
1952
|
# Returns the feed's cloud
|
@@ -2735,13 +2787,16 @@ module FeedTools
|
|
2735
2787
|
if @link != ""
|
2736
2788
|
@link = FeedTools.unescape_entities(@link)
|
2737
2789
|
end
|
2738
|
-
|
2739
|
-
|
2740
|
-
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2790
|
+
# TODO: Actually implement proper relative url resolving instead of this crap
|
2791
|
+
# ===========================================================================
|
2792
|
+
#
|
2793
|
+
# if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
|
2794
|
+
# if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
|
2795
|
+
# @link = @link[1..-1]
|
2796
|
+
# end
|
2797
|
+
# # prepend the base to the link since they seem to have used a relative path
|
2798
|
+
# @link = feed.base + @link
|
2799
|
+
# end
|
2745
2800
|
@link = FeedTools.normalize_url(@link)
|
2746
2801
|
end
|
2747
2802
|
return @link
|
@@ -2751,25 +2806,7 @@ module FeedTools
|
|
2751
2806
|
def link=(new_link)
|
2752
2807
|
@link = new_link
|
2753
2808
|
end
|
2754
|
-
|
2755
|
-
# Returns the feed item comment link
|
2756
|
-
def comment_link
|
2757
|
-
if @comment_link.nil?
|
2758
|
-
# get the feed comment link from the xml document
|
2759
|
-
@comment_link = XPath.first(root_node, "comments/text()").to_s
|
2760
|
-
if @comment_link == ""
|
2761
|
-
@comment_link = self.link
|
2762
|
-
end
|
2763
|
-
@comment_link = FeedTools.normalize_url(@comment_link)
|
2764
|
-
end
|
2765
|
-
return @comment_link
|
2766
|
-
end
|
2767
|
-
|
2768
|
-
# Sets the feed item comment link
|
2769
|
-
def comment_link=(new_comment_link)
|
2770
|
-
@comment_link = new_comment_link
|
2771
|
-
end
|
2772
|
-
|
2809
|
+
|
2773
2810
|
# Returns a list of the feed item's categories
|
2774
2811
|
def categories
|
2775
2812
|
if @categories.nil?
|
@@ -3291,70 +3328,83 @@ module FeedTools
|
|
3291
3328
|
if @author.nil?
|
3292
3329
|
@author = FeedTools::Feed::Author.new
|
3293
3330
|
|
3294
|
-
|
3295
|
-
|
3296
|
-
XPath.first(root_node, "
|
3297
|
-
|
3298
|
-
@author.raw = FeedTools.unescape_entities(
|
3299
|
-
XPath.first(root_node, "author/text()").to_s)
|
3300
|
-
if @author.raw == ""
|
3301
|
-
@author.raw = FeedTools.unescape_entities(
|
3302
|
-
XPath.first(root_node, "dc:creator/text()").to_s)
|
3331
|
+
author_node = XPath.first(root_node, "author")
|
3332
|
+
if author_node.nil?
|
3333
|
+
author_node = XPath.first(root_node, "managingEditor")
|
3303
3334
|
end
|
3304
|
-
if
|
3305
|
-
|
3306
|
-
XPath.first(root_node, "dc:author/text()").to_s)
|
3335
|
+
if author_node.nil?
|
3336
|
+
author_node = XPath.first(root_node, "dc:author")
|
3307
3337
|
end
|
3308
|
-
if
|
3309
|
-
|
3310
|
-
XPath.first(root_node, "managingEditor/text()").to_s)
|
3338
|
+
if author_node.nil?
|
3339
|
+
author_node = XPath.first(root_node, "dc:creator")
|
3311
3340
|
end
|
3312
|
-
|
3313
|
-
|
3314
|
-
|
3315
|
-
|
3341
|
+
if author_node.nil?
|
3342
|
+
author_node = XPath.first(root_node, "atom:author")
|
3343
|
+
end
|
3344
|
+
unless author_node.nil?
|
3345
|
+
@author.raw = FeedTools.unescape_entities(
|
3346
|
+
XPath.first(author_node, "text()").to_s)
|
3347
|
+
@author.raw = nil if @author.raw == ""
|
3348
|
+
unless @author.raw.nil?
|
3316
3349
|
raw_scan = @author.raw.scan(
|
3317
|
-
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\
|
3318
|
-
|
3319
|
-
|
3320
|
-
|
3321
|
-
|
3322
|
-
|
3323
|
-
|
3324
|
-
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
3325
|
-
if email_scan != nil && email_scan.size > 0
|
3326
|
-
@author.email = email_scan.first.strip
|
3350
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
3351
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3352
|
+
raw_scan = @author.raw.scan(
|
3353
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
3354
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
3355
|
+
else
|
3356
|
+
author_raw_pair = raw_scan.first
|
3327
3357
|
end
|
3328
|
-
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3334
|
-
|
3335
|
-
|
3336
|
-
|
3337
|
-
|
3338
|
-
|
3358
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3359
|
+
email_scan = @author.raw.scan(
|
3360
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
3361
|
+
if email_scan != nil && email_scan.size > 0
|
3362
|
+
@author.email = email_scan.first.strip
|
3363
|
+
end
|
3364
|
+
end
|
3365
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
3366
|
+
@author.name = author_raw_pair.first.strip
|
3367
|
+
@author.email = author_raw_pair.last.strip
|
3368
|
+
else
|
3369
|
+
unless @author.raw.include?("@")
|
3370
|
+
# We can be reasonably sure we are looking at something
|
3371
|
+
# that the creator didn't intend to contain an email address if
|
3372
|
+
# it got through the preceeding regexes and it doesn't
|
3373
|
+
# contain the tell-tale '@' symbol.
|
3374
|
+
@author.name = @author.raw
|
3375
|
+
end
|
3339
3376
|
end
|
3340
3377
|
end
|
3378
|
+
@author.name = "" if @author.name.nil?
|
3379
|
+
if @author.name == ""
|
3380
|
+
@author.name = FeedTools.unescape_entities(
|
3381
|
+
XPath.first(author_node, "name/text()").to_s)
|
3382
|
+
end
|
3383
|
+
if @author.name == ""
|
3384
|
+
@author.name = FeedTools.unescape_entities(
|
3385
|
+
XPath.first(author_node, "@name").to_s)
|
3386
|
+
end
|
3387
|
+
if @author.email == ""
|
3388
|
+
@author.email = FeedTools.unescape_entities(
|
3389
|
+
XPath.first(author_node, "email/text()").to_s)
|
3390
|
+
end
|
3391
|
+
if @author.email == ""
|
3392
|
+
@author.email = FeedTools.unescape_entities(
|
3393
|
+
XPath.first(author_node, "@email").to_s)
|
3394
|
+
end
|
3395
|
+
if @author.url == ""
|
3396
|
+
@author.url = FeedTools.unescape_entities(
|
3397
|
+
XPath.first(author_node, "url/text()").to_s)
|
3398
|
+
end
|
3399
|
+
if @author.url == ""
|
3400
|
+
@author.url = FeedTools.unescape_entities(
|
3401
|
+
XPath.first(author_node, "@url").to_s)
|
3402
|
+
end
|
3403
|
+
@author.name = nil if @author.name == ""
|
3404
|
+
@author.raw = nil if @author.raw == ""
|
3405
|
+
@author.email = nil if @author.email == ""
|
3406
|
+
@author.url = nil if @author.url == ""
|
3341
3407
|
end
|
3342
|
-
|
3343
|
-
@author.name = nil if @author.name == ""
|
3344
|
-
@author.raw = nil if @author.raw == ""
|
3345
|
-
|
3346
|
-
# Set the author email
|
3347
|
-
if @author.email == ""
|
3348
|
-
@author.email = FeedTools.unescape_entities(
|
3349
|
-
XPath.first(root_node, "author/email/text()").to_s)
|
3350
|
-
end
|
3351
|
-
@author.email = nil if @author.email == ""
|
3352
|
-
|
3353
|
-
# Set the author url
|
3354
|
-
@author.url = FeedTools.unescape_entities(
|
3355
|
-
XPath.first(root_node, "author/url/text()").to_s)
|
3356
|
-
@author.url = nil if @author.url == ""
|
3357
|
-
|
3358
3408
|
# Fallback on the itunes module if we didn't find an author name
|
3359
3409
|
begin
|
3360
3410
|
@author.name = self.itunes_author if @author.name.nil?
|
@@ -3522,7 +3572,8 @@ module FeedTools
|
|
3522
3572
|
# Returns the url for posting comments
|
3523
3573
|
def comments
|
3524
3574
|
if @comments.nil?
|
3525
|
-
@comments =
|
3575
|
+
@comments = FeedTools.normalize_url(
|
3576
|
+
XPath.first(root_node, "comments/text()").to_s)
|
3526
3577
|
@comments = nil if @comments == ""
|
3527
3578
|
end
|
3528
3579
|
return @comments
|
@@ -3791,4 +3842,4 @@ begin
|
|
3791
3842
|
FeedTools.feed_cache.initialize_cache
|
3792
3843
|
end
|
3793
3844
|
rescue
|
3794
|
-
end
|
3845
|
+
end
|
data/rakefile
CHANGED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'feed_tools'
|
3
|
+
|
4
|
+
class NonStandardTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
FeedTools.tidy_enabled = false
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_xss_strict
|
10
|
+
feed = FeedTools::Feed.new
|
11
|
+
feed.xml_data = <<-FEED
|
12
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
13
|
+
<rss version="2.0/XSS-strict">
|
14
|
+
<channel>
|
15
|
+
<title>tima thinking outloud.</title>
|
16
|
+
<link>http://www.timaoutloud.org/</link>
|
17
|
+
<description>The personal weblog of Timothy Appnel</description>
|
18
|
+
<item>
|
19
|
+
<link>http://www.timaoutloud.org/archives/000415.html</link>
|
20
|
+
<title>OSCON Wrap-Up.</title>
|
21
|
+
<description>
|
22
|
+
It's been a week since OSCON ended and I'm just
|
23
|
+
beginning to recover. This uber post records my notes and
|
24
|
+
personal views as a speaker and attendee.
|
25
|
+
</description>
|
26
|
+
</item>
|
27
|
+
<item>
|
28
|
+
<link>http://www.timaoutloud.org/archives/000414.html</link>
|
29
|
+
<title>Write For The People Who Support You.</title>
|
30
|
+
<description>
|
31
|
+
Hooray! Mena is back. Ben too. Anil is celebrating
|
32
|
+
6 years of blogging.
|
33
|
+
</description>
|
34
|
+
</item>
|
35
|
+
<item>
|
36
|
+
<link>http://www.timaoutloud.org/archives/000413.html</link>
|
37
|
+
<title>tima@OSCON</title>
|
38
|
+
<description>
|
39
|
+
Ben Hammersley and I will be presenting 45 syndication hacks
|
40
|
+
in 45 minutes. Will I be able to keep pace with the madness?
|
41
|
+
</description>
|
42
|
+
</item>
|
43
|
+
</channel>
|
44
|
+
</rss>
|
45
|
+
FEED
|
46
|
+
assert_equal("tima thinking outloud.", feed.title)
|
47
|
+
assert_equal("http://www.timaoutloud.org/", feed.link)
|
48
|
+
assert_equal("The personal weblog of Timothy Appnel", feed.description)
|
49
|
+
|
50
|
+
assert_equal("OSCON Wrap-Up.", feed.items[0].title)
|
51
|
+
assert_equal("http://www.timaoutloud.org/archives/000415.html",
|
52
|
+
feed.items[0].link)
|
53
|
+
assert_equal(false, feed.items[0].description == nil)
|
54
|
+
|
55
|
+
assert_equal("Write For The People Who Support You.", feed.items[1].title)
|
56
|
+
assert_equal("http://www.timaoutloud.org/archives/000414.html",
|
57
|
+
feed.items[1].link)
|
58
|
+
assert_equal(false, feed.items[1].description == nil)
|
59
|
+
|
60
|
+
assert_equal("tima@OSCON", feed.items[2].title)
|
61
|
+
assert_equal("http://www.timaoutloud.org/archives/000413.html",
|
62
|
+
feed.items[2].link)
|
63
|
+
assert_equal(false, feed.items[2].description == nil)
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_rss_30_lite
|
67
|
+
# Delusions of grandeur...
|
68
|
+
feed = FeedTools::Feed.new
|
69
|
+
feed.xml_data = <<-FEED
|
70
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
71
|
+
<rss version="3.0" type="lite"
|
72
|
+
source="http://www.rss3.org/files/liteSample.rss">
|
73
|
+
<channel>
|
74
|
+
<title>RSS Version 3</title>
|
75
|
+
<link>http://www.rss3.org/</link>
|
76
|
+
<description>This is a sample RSS 3 Lite-type feed</description>
|
77
|
+
|
78
|
+
<lastBuildDate>Sun, 14 Aug 2005 09:53:59 +0000</lastBuildDate>
|
79
|
+
<generator name="RSS3Maker">http://no.address/</generator>
|
80
|
+
<language rel="both">en</language>
|
81
|
+
<icon>http://www.rss3.org/files/r1.ico</icon>
|
82
|
+
<copyright>Jonathan Avidan 2005 (c)</copyright>
|
83
|
+
<managingEditor name="Jonathan Avidan">
|
84
|
+
editor@rss3.org
|
85
|
+
</managingEditor>
|
86
|
+
<webMaster name="Jonathan Avidan">webmaster@rss3.org</webMaster>
|
87
|
+
<ttl span="days">7</ttl>
|
88
|
+
<docs>http://www.rss3.org/rss3lite.html</docs>
|
89
|
+
<item>
|
90
|
+
<title>RSS 3 Lite First Draft Now Available</title>
|
91
|
+
<link>
|
92
|
+
http://www.rss3.org/archive/rss3lite/first_draft.html
|
93
|
+
</link>
|
94
|
+
<description>
|
95
|
+
The RSS 3 Lite-type specification first publicly
|
96
|
+
available version
|
97
|
+
</description>
|
98
|
+
<pubDate>Sun, 18 Aug 2005 09:53:59 +0000</pubDate>
|
99
|
+
<author name="Jonathan Avidan">jonathan@rss3.org</author>
|
100
|
+
<guid type="code">6457894357689</guid>
|
101
|
+
</item>
|
102
|
+
<item isUpdated="true" updateNum="1">
|
103
|
+
<title>Welcome to the RSS 3 Official Blog!</title>
|
104
|
+
<link>http://www.rss3.org/official_blog/?p=2</link>
|
105
|
+
<description>The RSS 3 Official Blog welcome message</description>
|
106
|
+
<comments type="both">
|
107
|
+
http://www.rss3.org/official_blog/?p=2#comments
|
108
|
+
</comments>
|
109
|
+
<pubDate>Wed, 27 Jul 2005 14:34:51 +0000</pubDate>
|
110
|
+
<author name="Jonathan Avidan" type="writer">
|
111
|
+
jonathan@rss3.org
|
112
|
+
</author>
|
113
|
+
<guid type="link">http://www.rss3.org/official_blog/?p=2</guid>
|
114
|
+
</item>
|
115
|
+
</channel>
|
116
|
+
</rss>
|
117
|
+
FEED
|
118
|
+
assert_equal("RSS Version 3", feed.title)
|
119
|
+
assert_equal("http://www.rss3.org/", feed.link)
|
120
|
+
assert_equal("This is a sample RSS 3 Lite-type feed", feed.description)
|
121
|
+
assert_equal("http://no.address/", feed.generator)
|
122
|
+
assert_equal("en", feed.language)
|
123
|
+
assert_equal("http://www.rss3.org/files/r1.ico", feed.icon)
|
124
|
+
assert_equal("Jonathan Avidan 2005 (c)", feed.copyright)
|
125
|
+
assert_equal(7.day, feed.ttl)
|
126
|
+
assert_equal("http://www.rss3.org/rss3lite.html", feed.docs)
|
127
|
+
|
128
|
+
assert_equal("RSS 3 Lite First Draft Now Available", feed.items[0].title)
|
129
|
+
assert_equal("http://www.rss3.org/archive/rss3lite/first_draft.html",
|
130
|
+
feed.items[0].link)
|
131
|
+
assert_equal(false, feed.items[0].description == nil)
|
132
|
+
assert_equal(Time.utc(2005, "Aug", 18, 9, 53, 59), feed.items[0].time)
|
133
|
+
assert_equal("Jonathan Avidan", feed.items[0].author.name)
|
134
|
+
assert_equal("jonathan@rss3.org", feed.items[0].author.email)
|
135
|
+
assert_equal("6457894357689", feed.items[0].guid)
|
136
|
+
|
137
|
+
assert_equal("Welcome to the RSS 3 Official Blog!", feed.items[1].title)
|
138
|
+
assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].link)
|
139
|
+
assert_equal(false, feed.items[1].description == nil)
|
140
|
+
assert_equal("http://www.rss3.org/official_blog/?p=2#comments",
|
141
|
+
feed.items[1].comments)
|
142
|
+
assert_equal(Time.utc(2005, "Jul", 27, 14, 34, 51), feed.items[1].time)
|
143
|
+
assert_equal("Jonathan Avidan", feed.items[1].author.name)
|
144
|
+
assert_equal("jonathan@rss3.org", feed.items[1].author.email)
|
145
|
+
assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].guid)
|
146
|
+
end
|
147
|
+
end
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.11
|
3
3
|
specification_version: 1
|
4
4
|
name: feedtools
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
7
|
-
date: 2005-08-
|
6
|
+
version: 0.2.5
|
7
|
+
date: 2005-08-19 00:00:00 -04:00
|
8
8
|
summary: "Parsing, generation, and caching system for xml news feeds."
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -24,6 +24,8 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
24
24
|
version: 0.0.0
|
25
25
|
version:
|
26
26
|
platform: ruby
|
27
|
+
signing_key:
|
28
|
+
cert_chain:
|
27
29
|
authors:
|
28
30
|
- Bob Aman
|
29
31
|
files:
|
@@ -73,6 +75,7 @@ files:
|
|
73
75
|
- test/cache_test.rb
|
74
76
|
- test/cdf_test.rb
|
75
77
|
- test/helper_test.rb
|
78
|
+
- test/nonstandard_test.rb
|
76
79
|
- test/rss_test.rb
|
77
80
|
test_files: []
|
78
81
|
rdoc_options:
|