feedtools 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ == FeedTools 0.2.5
2
+ * fixed multiple rows being created in the cache after a 301 redirection
3
+ * fixed broken table creation for postgresql and sqlite
4
+ * testing against non-standard feeds
5
+ * removed the 'comment_link' method in favor of the 'comments' method
1
6
  == FeedTools 0.2.4
2
7
  * fixed bug in the sqlite table creation query
3
8
  * greatly improved image support
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
25
25
  ENV['RAILS_ENV'] ||
26
26
  'production' # :nodoc:
27
27
 
28
- FEED_TOOLS_VERSION = "0.2.4"
28
+ FEED_TOOLS_VERSION = "0.2.5"
29
29
 
30
30
  $:.unshift(File.dirname(__FILE__))
31
31
  $:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
@@ -182,7 +182,7 @@ module FeedTools
182
182
  'link' VARCHAR(255) DEFAULT NULL,
183
183
  'xml_data' TEXT DEFAULT NULL,
184
184
  'http_headers' TEXT DEFAULT NULL,
185
- 'last_retrieved' DATETIME DEFAULT NULL,
185
+ 'last_retrieved' DATETIME DEFAULT NULL
186
186
  );
187
187
  SQL_END
188
188
  feeds_psql = <<-SQL_END
@@ -193,7 +193,7 @@ module FeedTools
193
193
  link varchar(255) default NULL,
194
194
  xml_data text default NULL,
195
195
  http_headers text default NULL,
196
- last_retrieved datetime default NULL,
196
+ last_retrieved timestamp default NULL
197
197
  );
198
198
  SQL_END
199
199
  table_creation_sql = nil
@@ -489,7 +489,7 @@ module FeedTools
489
489
  if url.nil? || url == ""
490
490
  return nil
491
491
  end
492
- normalized_url = url
492
+ normalized_url = url.strip
493
493
 
494
494
  # if a url begins with the '/' character, it only makes sense that they
495
495
  # meant to be using a file:// url. Fix it for them.
@@ -900,6 +900,8 @@ module FeedTools
900
900
  # redirections, and see if we need to update the url.
901
901
  for redirected_response in response_chain
902
902
  if redirected_response.last.code.to_i == 301
903
+ # Reset the cache object or we may get duplicate entries
904
+ self.cache_object = nil
903
905
  self.url = redirected_response.last['location']
904
906
  else
905
907
  # Jump out as soon as we hit anything that isn't a
@@ -1483,42 +1485,43 @@ module FeedTools
1483
1485
  #
1484
1486
  # This method uses the url from the link field in order to avoid grabbing
1485
1487
  # the favicon for services like feedburner.
1486
- def icon_link
1487
- if @icon_link.nil?
1488
- @icon_link = XPath.first(channel_node,
1489
- "link[@rel='icon']/@href").to_s
1490
- if @icon_link == ""
1491
- @icon_link = XPath.first(channel_node,
1492
- "link[@rel='shortcut icon']/@href").to_s
1488
+ def icon
1489
+ if @icon.nil?
1490
+ icon_node = XPath.first(channel_node, "link[@rel='icon']")
1491
+ if icon_node.nil?
1492
+ icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
1493
1493
  end
1494
- if @icon_link == ""
1495
- @icon_link = XPath.first(channel_node,
1496
- "link[@type='image/x-icon']/@href").to_s
1494
+ if icon_node.nil?
1495
+ icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
1497
1496
  end
1498
- if @icon_link == ""
1499
- @icon_link = XPath.first(channel_node,
1500
- "icon/@href").to_s
1497
+ if icon_node.nil?
1498
+ icon_node = XPath.first(channel_node, "icon")
1501
1499
  end
1502
- if @icon_link == ""
1503
- @icon_link = XPath.first(channel_node,
1504
- "icon/text()").to_s
1500
+ if icon_node.nil?
1501
+ icon_node = XPath.first(channel_node, "logo[@style='icon']")
1505
1502
  end
1506
- if @icon_link == ""
1507
- @icon_link = XPath.first(channel_node,
1508
- "logo[@style='icon']/@href").to_s
1503
+ if icon_node.nil?
1504
+ icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
1509
1505
  end
1510
- if @icon_link == ""
1511
- @icon_link = XPath.first(channel_node,
1512
- "LOGO[@STYLE='ICON']/@HREF").to_s
1513
- end
1514
- if @icon_link == "" && self.link != nil && self.link != ""
1515
- link_uri = URI.parse(FeedTools.normalize_url(self.link))
1516
- @icon_link =
1517
- link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1506
+ unless icon_node.nil?
1507
+ @icon = FeedTools.unescape_entities(
1508
+ XPath.first(icon_node, "@href").to_s)
1509
+ if @icon == ""
1510
+ @icon = FeedTools.unescape_entities(
1511
+ XPath.first(icon_node, "text()").to_s)
1512
+ unless FeedTools.is_url? @icon
1513
+ @icon = ""
1514
+ end
1515
+ end
1516
+ if @icon == "" && self.link != nil && self.link != ""
1517
+ link_uri = URI.parse(FeedTools.normalize_url(self.link))
1518
+ @icon =
1519
+ link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1520
+ end
1521
+ @icon = nil if @icon == ""
1518
1522
  end
1519
- icon_link = nil if icon_link == ""
1520
1523
  end
1521
- return @icon_link
1524
+ return @icon
1522
1525
  end
1523
1526
 
1524
1527
  # Returns the feed author
@@ -1526,70 +1529,83 @@ module FeedTools
1526
1529
  if @author.nil?
1527
1530
  @author = FeedTools::Feed::Author.new
1528
1531
 
1529
- # Set the author name
1530
- @author.name = FeedTools.unescape_entities(
1531
- XPath.first(channel_node, "author/name/text()").to_s)
1532
-
1533
- @author.raw = FeedTools.unescape_entities(
1534
- XPath.first(channel_node, "author/text()").to_s)
1535
- if @author.raw == ""
1536
- @author.raw = FeedTools.unescape_entities(
1537
- XPath.first(channel_node, "dc:creator/text()").to_s)
1532
+ author_node = XPath.first(channel_node, "author")
1533
+ if author_node.nil?
1534
+ author_node = XPath.first(channel_node, "managingEditor")
1538
1535
  end
1539
- if @author.raw == ""
1540
- @author.raw = FeedTools.unescape_entities(
1541
- XPath.first(channel_node, "dc:author/text()").to_s)
1536
+ if author_node.nil?
1537
+ author_node = XPath.first(channel_node, "dc:author")
1542
1538
  end
1543
- if @author.raw == ""
1544
- @author.raw = FeedTools.unescape_entities(
1545
- XPath.first(channel_node, "managingEditor/text()").to_s)
1539
+ if author_node.nil?
1540
+ author_node = XPath.first(channel_node, "dc:creator")
1546
1541
  end
1547
- unless @author.raw == ""
1548
- raw_scan = @author.raw.scan(
1549
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1550
- if raw_scan.nil? || raw_scan.size == 0
1542
+ if author_node.nil?
1543
+ author_node = XPath.first(channel_node, "atom:author")
1544
+ end
1545
+ unless author_node.nil?
1546
+ @author.raw = FeedTools.unescape_entities(
1547
+ XPath.first(author_node, "text()").to_s)
1548
+ @author.raw = nil if @author.raw == ""
1549
+ unless @author.raw.nil?
1551
1550
  raw_scan = @author.raw.scan(
1552
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1553
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
1554
- else
1555
- author_raw_pair = raw_scan.first
1556
- end
1557
- if raw_scan.nil? || raw_scan.size == 0
1558
- email_scan = @author.raw.scan(
1559
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1560
- if email_scan != nil && email_scan.size > 0
1561
- @author.email = email_scan.first.strip
1551
+ /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1552
+ if raw_scan.nil? || raw_scan.size == 0
1553
+ raw_scan = @author.raw.scan(
1554
+ /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1555
+ author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
1556
+ else
1557
+ author_raw_pair = raw_scan.first
1562
1558
  end
1563
- end
1564
- unless author_raw_pair.nil? || author_raw_pair.size == 0
1565
- @author.name = author_raw_pair.first.strip
1566
- @author.email = author_raw_pair.last.strip
1567
- else
1568
- unless @author.raw.include?("@")
1569
- # We can be reasonably sure we are looking at something
1570
- # that the creator didn't intend to contain an email address if
1571
- # it got through the preceeding regexes and it doesn't
1572
- # contain the tell-tale '@' symbol.
1573
- @author.name = @author.raw
1559
+ if raw_scan.nil? || raw_scan.size == 0
1560
+ email_scan = @author.raw.scan(
1561
+ /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1562
+ if email_scan != nil && email_scan.size > 0
1563
+ @author.email = email_scan.first.strip
1564
+ end
1565
+ end
1566
+ unless author_raw_pair.nil? || author_raw_pair.size == 0
1567
+ @author.name = author_raw_pair.first.strip
1568
+ @author.email = author_raw_pair.last.strip
1569
+ else
1570
+ unless @author.raw.include?("@")
1571
+ # We can be reasonably sure we are looking at something
1572
+ # that the creator didn't intend to contain an email address if
1573
+ # it got through the preceeding regexes and it doesn't
1574
+ # contain the tell-tale '@' symbol.
1575
+ @author.name = @author.raw
1576
+ end
1574
1577
  end
1575
1578
  end
1579
+ @author.name = "" if @author.name.nil?
1580
+ if @author.name == ""
1581
+ @author.name = FeedTools.unescape_entities(
1582
+ XPath.first(author_node, "name/text()").to_s)
1583
+ end
1584
+ if @author.name == ""
1585
+ @author.name = FeedTools.unescape_entities(
1586
+ XPath.first(author_node, "@name").to_s)
1587
+ end
1588
+ if @author.email == ""
1589
+ @author.email = FeedTools.unescape_entities(
1590
+ XPath.first(author_node, "email/text()").to_s)
1591
+ end
1592
+ if @author.email == ""
1593
+ @author.email = FeedTools.unescape_entities(
1594
+ XPath.first(author_node, "@email").to_s)
1595
+ end
1596
+ if @author.url == ""
1597
+ @author.url = FeedTools.unescape_entities(
1598
+ XPath.first(author_node, "url/text()").to_s)
1599
+ end
1600
+ if @author.url == ""
1601
+ @author.url = FeedTools.unescape_entities(
1602
+ XPath.first(author_node, "@url").to_s)
1603
+ end
1604
+ @author.name = nil if @author.name == ""
1605
+ @author.raw = nil if @author.raw == ""
1606
+ @author.email = nil if @author.email == ""
1607
+ @author.url = nil if @author.url == ""
1576
1608
  end
1577
-
1578
- @author.name = nil if @author.name == ""
1579
- @author.raw = nil if @author.raw == ""
1580
-
1581
- # Set the author email
1582
- if @author.email == ""
1583
- @author.email = FeedTools.unescape_entities(
1584
- XPath.first(channel_node, "author/email/text()").to_s)
1585
- end
1586
- @author.email = nil if @author.email == ""
1587
-
1588
- # Set the author url
1589
- @author.url = FeedTools.unescape_entities(
1590
- XPath.first(channel_node, "author/url/text()").to_s)
1591
- @author.url = nil if @author.url == ""
1592
-
1593
1609
  # Fallback on the itunes module if we didn't find an author name
1594
1610
  begin
1595
1611
  @author.name = self.itunes_author if @author.name.nil?
@@ -1822,6 +1838,9 @@ module FeedTools
1822
1838
  if @copyright == ""
1823
1839
  @copyright = XPath.first(channel_node, "dc:rights/text()").to_s
1824
1840
  end
1841
+ if @copyright == ""
1842
+ @copyright = XPath.first(channel_node, "copyrights/text()").to_s
1843
+ end
1825
1844
  @copyright = FeedTools.sanitize_html(@copyright, :strip)
1826
1845
  @copyright = nil if @copyright == ""
1827
1846
  end
@@ -1841,60 +1860,93 @@ module FeedTools
1841
1860
  if update_frequency != ""
1842
1861
  update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
1843
1862
  if update_period == "daily"
1844
- @time_to_live = update_frequency.to_i * 24
1863
+ @time_to_live = update_frequency.to_i.day
1845
1864
  elsif update_period == "weekly"
1846
- @time_to_live = update_frequency.to_i * 24 * 7
1865
+ @time_to_live = update_frequency.to_i.week
1847
1866
  elsif update_period == "monthly"
1848
- @time_to_live = update_frequency.to_i * 24 * 30
1867
+ @time_to_live = update_frequency.to_i.month
1849
1868
  elsif update_period == "yearly"
1850
- @time_to_live = update_frequency.to_i * 24 * 365
1869
+ @time_to_live = update_frequency.to_i.year
1851
1870
  else
1852
1871
  # hourly
1853
- @time_to_live = update_frequency.to_i
1872
+ @time_to_live = update_frequency.to_i.hour
1854
1873
  end
1855
1874
  end
1856
1875
  end
1857
1876
  if @time_to_live.nil?
1858
- # expressed in minutes
1877
+ # usually expressed in minutes
1859
1878
  update_frequency = XPath.first(channel_node, "ttl/text()").to_s
1860
1879
  if update_frequency != ""
1861
- @time_to_live = (update_frequency.to_i / 60)
1880
+ update_span = XPath.first(channel_node, "ttl/@span").to_s
1881
+ if update_span == "seconds"
1882
+ @time_to_live = update_frequency.to_i
1883
+ elsif update_span == "minutes"
1884
+ @time_to_live = update_frequency.to_i.minute
1885
+ elsif update_span == "hours"
1886
+ @time_to_live = update_frequency.to_i.hour
1887
+ elsif update_span == "days"
1888
+ @time_to_live = update_frequency.to_i.day
1889
+ elsif update_span == "weeks"
1890
+ @time_to_live = update_frequency.to_i.week
1891
+ elsif update_span == "months"
1892
+ @time_to_live = update_frequency.to_i.month
1893
+ elsif update_span == "years"
1894
+ @time_to_live = update_frequency.to_i.year
1895
+ elsif update_frequency.to_i >= 3000
1896
+ # Normally, this should default to minutes, but realistically,
1897
+ # if they meant minutes, you're rarely going to see a value higher
1898
+ # than 120. If we see >= 3000, we're either dealing with a stupid
1899
+ # pseudo-spec that decided to use seconds, or we're looking at
1900
+ # someone who only has weekly updated content. Worst case, we
1901
+ # misreport the time, and we update too often. Best case, we
1902
+ # avoid accidentally updating the feed only once a year. In the
1903
+ # interests of being pragmatic, and since the problem we avoid
1904
+ # is a far greater one than the one we cause, just run the check
1905
+ # and hope no one actually gets hurt.
1906
+ @time_to_live = update_frequency.to_i
1907
+ else
1908
+ @time_to_live = update_frequency.to_i.minute
1909
+ end
1862
1910
  end
1863
1911
  end
1864
1912
  if @time_to_live.nil?
1865
1913
  @time_to_live = 0
1866
- update_frequency_days = XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1867
- update_frequency_hours = XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1868
- update_frequency_minutes = XPath.first(channel_node, "schedule/intervaltime/@min").to_s
1869
- update_frequency_seconds = XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
1914
+ update_frequency_days =
1915
+ XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1916
+ update_frequency_hours =
1917
+ XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1918
+ update_frequency_minutes =
1919
+ XPath.first(channel_node, "schedule/intervaltime/@min").to_s
1920
+ update_frequency_seconds =
1921
+ XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
1870
1922
  if update_frequency_days != ""
1871
- @time_to_live = @time_to_live + update_frequency_days.to_i * 24
1923
+ @time_to_live = @time_to_live + update_frequency_days.to_i.day
1872
1924
  end
1873
1925
  if update_frequency_hours != ""
1874
- @time_to_live = @time_to_live + update_frequency_hours.to_i * 1
1926
+ @time_to_live = @time_to_live + update_frequency_hours.to_i.hour
1875
1927
  end
1876
1928
  if update_frequency_minutes != ""
1877
- @time_to_live = @time_to_live + update_frequency_minutes.to_i / 60
1929
+ @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
1878
1930
  end
1879
1931
  if update_frequency_seconds != ""
1880
- @time_to_live = @time_to_live + update_frequency_seconds.to_i / 3600
1932
+ @time_to_live = @time_to_live + update_frequency_seconds.to_i
1881
1933
  end
1882
1934
  if @time_to_live == 0
1883
- @time_to_live = nil
1935
+ @time_to_live = 1.hour
1884
1936
  end
1885
1937
  end
1886
1938
  if @time_to_live.nil? || @time_to_live == 0
1887
1939
  # Default to one hour
1888
- @time_to_live = 1
1940
+ @time_to_live = 1.hour
1889
1941
  end
1890
1942
  @time_to_live = @time_to_live.round
1891
- return @time_to_live.hour
1943
+ return @time_to_live
1892
1944
  end
1893
1945
 
1894
1946
  # Sets the feed time to live
1895
1947
  def time_to_live=(new_time_to_live)
1896
- @time_to_live = (new_time_to_live / 3600).round
1897
- @time_to_live = 1 if @time_to_live < 1
1948
+ @time_to_live = new_time_to_live.round
1949
+ @time_to_live = 1.hour if @time_to_live < 1.hour
1898
1950
  end
1899
1951
 
1900
1952
  # Returns the feed's cloud
@@ -2735,13 +2787,16 @@ module FeedTools
2735
2787
  if @link != ""
2736
2788
  @link = FeedTools.unescape_entities(@link)
2737
2789
  end
2738
- if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
2739
- if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
2740
- @link = @link[1..-1]
2741
- end
2742
- # prepend the base to the link since they seem to have used a relative path
2743
- @link = feed.base + @link
2744
- end
2790
+ # TODO: Actually implement proper relative url resolving instead of this crap
2791
+ # ===========================================================================
2792
+ #
2793
+ # if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
2794
+ # if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
2795
+ # @link = @link[1..-1]
2796
+ # end
2797
+ # # prepend the base to the link since they seem to have used a relative path
2798
+ # @link = feed.base + @link
2799
+ # end
2745
2800
  @link = FeedTools.normalize_url(@link)
2746
2801
  end
2747
2802
  return @link
@@ -2751,25 +2806,7 @@ module FeedTools
2751
2806
  def link=(new_link)
2752
2807
  @link = new_link
2753
2808
  end
2754
-
2755
- # Returns the feed item comment link
2756
- def comment_link
2757
- if @comment_link.nil?
2758
- # get the feed comment link from the xml document
2759
- @comment_link = XPath.first(root_node, "comments/text()").to_s
2760
- if @comment_link == ""
2761
- @comment_link = self.link
2762
- end
2763
- @comment_link = FeedTools.normalize_url(@comment_link)
2764
- end
2765
- return @comment_link
2766
- end
2767
-
2768
- # Sets the feed item comment link
2769
- def comment_link=(new_comment_link)
2770
- @comment_link = new_comment_link
2771
- end
2772
-
2809
+
2773
2810
  # Returns a list of the feed item's categories
2774
2811
  def categories
2775
2812
  if @categories.nil?
@@ -3291,70 +3328,83 @@ module FeedTools
3291
3328
  if @author.nil?
3292
3329
  @author = FeedTools::Feed::Author.new
3293
3330
 
3294
- # Set the author name
3295
- @author.name = FeedTools.unescape_entities(
3296
- XPath.first(root_node, "author/name/text()").to_s)
3297
-
3298
- @author.raw = FeedTools.unescape_entities(
3299
- XPath.first(root_node, "author/text()").to_s)
3300
- if @author.raw == ""
3301
- @author.raw = FeedTools.unescape_entities(
3302
- XPath.first(root_node, "dc:creator/text()").to_s)
3331
+ author_node = XPath.first(root_node, "author")
3332
+ if author_node.nil?
3333
+ author_node = XPath.first(root_node, "managingEditor")
3303
3334
  end
3304
- if @author.raw == ""
3305
- @author.raw = FeedTools.unescape_entities(
3306
- XPath.first(root_node, "dc:author/text()").to_s)
3335
+ if author_node.nil?
3336
+ author_node = XPath.first(root_node, "dc:author")
3307
3337
  end
3308
- if @author.raw == ""
3309
- @author.raw = FeedTools.unescape_entities(
3310
- XPath.first(root_node, "managingEditor/text()").to_s)
3338
+ if author_node.nil?
3339
+ author_node = XPath.first(root_node, "dc:creator")
3311
3340
  end
3312
- unless @author.raw == ""
3313
- raw_scan = @author.raw.scan(
3314
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3315
- if raw_scan.nil? || raw_scan.size == 0
3341
+ if author_node.nil?
3342
+ author_node = XPath.first(root_node, "atom:author")
3343
+ end
3344
+ unless author_node.nil?
3345
+ @author.raw = FeedTools.unescape_entities(
3346
+ XPath.first(author_node, "text()").to_s)
3347
+ @author.raw = nil if @author.raw == ""
3348
+ unless @author.raw.nil?
3316
3349
  raw_scan = @author.raw.scan(
3317
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3318
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
3319
- else
3320
- author_raw_pair = raw_scan.first
3321
- end
3322
- if raw_scan.nil? || raw_scan.size == 0
3323
- email_scan = @author.raw.scan(
3324
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3325
- if email_scan != nil && email_scan.size > 0
3326
- @author.email = email_scan.first.strip
3350
+ /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3351
+ if raw_scan.nil? || raw_scan.size == 0
3352
+ raw_scan = @author.raw.scan(
3353
+ /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3354
+ author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
3355
+ else
3356
+ author_raw_pair = raw_scan.first
3327
3357
  end
3328
- end
3329
- unless author_raw_pair.nil? || author_raw_pair.size == 0
3330
- @author.name = author_raw_pair.first.strip
3331
- @author.email = author_raw_pair.last.strip
3332
- else
3333
- unless @author.raw.include?("@")
3334
- # We can be reasonably sure we are looking at something
3335
- # that the creator didn't intend to contain an email address if
3336
- # it got through the preceeding regexes and it doesn't
3337
- # contain the tell-tale '@' symbol.
3338
- @author.name = @author.raw
3358
+ if raw_scan.nil? || raw_scan.size == 0
3359
+ email_scan = @author.raw.scan(
3360
+ /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3361
+ if email_scan != nil && email_scan.size > 0
3362
+ @author.email = email_scan.first.strip
3363
+ end
3364
+ end
3365
+ unless author_raw_pair.nil? || author_raw_pair.size == 0
3366
+ @author.name = author_raw_pair.first.strip
3367
+ @author.email = author_raw_pair.last.strip
3368
+ else
3369
+ unless @author.raw.include?("@")
3370
+ # We can be reasonably sure we are looking at something
3371
+ # that the creator didn't intend to contain an email address if
3372
+ # it got through the preceeding regexes and it doesn't
3373
+ # contain the tell-tale '@' symbol.
3374
+ @author.name = @author.raw
3375
+ end
3339
3376
  end
3340
3377
  end
3378
+ @author.name = "" if @author.name.nil?
3379
+ if @author.name == ""
3380
+ @author.name = FeedTools.unescape_entities(
3381
+ XPath.first(author_node, "name/text()").to_s)
3382
+ end
3383
+ if @author.name == ""
3384
+ @author.name = FeedTools.unescape_entities(
3385
+ XPath.first(author_node, "@name").to_s)
3386
+ end
3387
+ if @author.email == ""
3388
+ @author.email = FeedTools.unescape_entities(
3389
+ XPath.first(author_node, "email/text()").to_s)
3390
+ end
3391
+ if @author.email == ""
3392
+ @author.email = FeedTools.unescape_entities(
3393
+ XPath.first(author_node, "@email").to_s)
3394
+ end
3395
+ if @author.url == ""
3396
+ @author.url = FeedTools.unescape_entities(
3397
+ XPath.first(author_node, "url/text()").to_s)
3398
+ end
3399
+ if @author.url == ""
3400
+ @author.url = FeedTools.unescape_entities(
3401
+ XPath.first(author_node, "@url").to_s)
3402
+ end
3403
+ @author.name = nil if @author.name == ""
3404
+ @author.raw = nil if @author.raw == ""
3405
+ @author.email = nil if @author.email == ""
3406
+ @author.url = nil if @author.url == ""
3341
3407
  end
3342
-
3343
- @author.name = nil if @author.name == ""
3344
- @author.raw = nil if @author.raw == ""
3345
-
3346
- # Set the author email
3347
- if @author.email == ""
3348
- @author.email = FeedTools.unescape_entities(
3349
- XPath.first(root_node, "author/email/text()").to_s)
3350
- end
3351
- @author.email = nil if @author.email == ""
3352
-
3353
- # Set the author url
3354
- @author.url = FeedTools.unescape_entities(
3355
- XPath.first(root_node, "author/url/text()").to_s)
3356
- @author.url = nil if @author.url == ""
3357
-
3358
3408
  # Fallback on the itunes module if we didn't find an author name
3359
3409
  begin
3360
3410
  @author.name = self.itunes_author if @author.name.nil?
@@ -3522,7 +3572,8 @@ module FeedTools
3522
3572
  # Returns the url for posting comments
3523
3573
  def comments
3524
3574
  if @comments.nil?
3525
- @comments = XPath.first(root_node, "comments/text()").to_s
3575
+ @comments = FeedTools.normalize_url(
3576
+ XPath.first(root_node, "comments/text()").to_s)
3526
3577
  @comments = nil if @comments == ""
3527
3578
  end
3528
3579
  return @comments
@@ -3791,4 +3842,4 @@ begin
3791
3842
  FeedTools.feed_cache.initialize_cache
3792
3843
  end
3793
3844
  rescue
3794
- end
3845
+ end
data/rakefile CHANGED
@@ -7,7 +7,7 @@ require 'rake/gempackagetask'
7
7
  require 'rake/contrib/rubyforgepublisher'
8
8
 
9
9
  PKG_NAME = 'feedtools'
10
- PKG_VERSION = '0.2.4'
10
+ PKG_VERSION = '0.2.5'
11
11
  PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
12
12
 
13
13
  RELEASE_NAME = "REL #{PKG_VERSION}"
@@ -0,0 +1,147 @@
1
+ require 'test/unit'
2
+ require 'feed_tools'
3
+
4
+ class NonStandardTest < Test::Unit::TestCase
5
+ def setup
6
+ FeedTools.tidy_enabled = false
7
+ end
8
+
9
+ def test_xss_strict
10
+ feed = FeedTools::Feed.new
11
+ feed.xml_data = <<-FEED
12
+ <?xml version="1.0" encoding="iso-8859-1"?>
13
+ <rss version="2.0/XSS-strict">
14
+ <channel>
15
+ <title>tima thinking outloud.</title>
16
+ <link>http://www.timaoutloud.org/</link>
17
+ <description>The personal weblog of Timothy Appnel</description>
18
+ <item>
19
+ <link>http://www.timaoutloud.org/archives/000415.html</link>
20
+ <title>OSCON Wrap-Up.</title>
21
+ <description>
22
+ It&apos;s been a week since OSCON ended and I&apos;m just
23
+ beginning to recover. This uber post records my notes and
24
+ personal views as a speaker and attendee.
25
+ </description>
26
+ </item>
27
+ <item>
28
+ <link>http://www.timaoutloud.org/archives/000414.html</link>
29
+ <title>Write For The People Who Support You.</title>
30
+ <description>
31
+ Hooray! Mena is back. Ben too. Anil is celebrating
32
+ 6 years of blogging.
33
+ </description>
34
+ </item>
35
+ <item>
36
+ <link>http://www.timaoutloud.org/archives/000413.html</link>
37
+ <title>tima@OSCON</title>
38
+ <description>
39
+ Ben Hammersley and I will be presenting 45 syndication hacks
40
+ in 45 minutes. Will I be able to keep pace with the madness?
41
+ </description>
42
+ </item>
43
+ </channel>
44
+ </rss>
45
+ FEED
46
+ assert_equal("tima thinking outloud.", feed.title)
47
+ assert_equal("http://www.timaoutloud.org/", feed.link)
48
+ assert_equal("The personal weblog of Timothy Appnel", feed.description)
49
+
50
+ assert_equal("OSCON Wrap-Up.", feed.items[0].title)
51
+ assert_equal("http://www.timaoutloud.org/archives/000415.html",
52
+ feed.items[0].link)
53
+ assert_equal(false, feed.items[0].description == nil)
54
+
55
+ assert_equal("Write For The People Who Support You.", feed.items[1].title)
56
+ assert_equal("http://www.timaoutloud.org/archives/000414.html",
57
+ feed.items[1].link)
58
+ assert_equal(false, feed.items[1].description == nil)
59
+
60
+ assert_equal("tima@OSCON", feed.items[2].title)
61
+ assert_equal("http://www.timaoutloud.org/archives/000413.html",
62
+ feed.items[2].link)
63
+ assert_equal(false, feed.items[2].description == nil)
64
+ end
65
+
66
+ def test_rss_30_lite
67
+ # Delusions of grandeur...
68
+ feed = FeedTools::Feed.new
69
+ feed.xml_data = <<-FEED
70
+ <?xml version="1.0" encoding="UTF-8"?>
71
+ <rss version="3.0" type="lite"
72
+ source="http://www.rss3.org/files/liteSample.rss">
73
+ <channel>
74
+ <title>RSS Version 3</title>
75
+ <link>http://www.rss3.org/</link>
76
+ <description>This is a sample RSS 3 Lite-type feed</description>
77
+
78
+ <lastBuildDate>Sun, 14 Aug 2005 09:53:59 +0000</lastBuildDate>
79
+ <generator name="RSS3Maker">http://no.address/</generator>
80
+ <language rel="both">en</language>
81
+ <icon>http://www.rss3.org/files/r1.ico</icon>
82
+ <copyright>Jonathan Avidan 2005 (c)</copyright>
83
+ <managingEditor name="Jonathan Avidan">
84
+ editor@rss3.org
85
+ </managingEditor>
86
+ <webMaster name="Jonathan Avidan">webmaster@rss3.org</webMaster>
87
+ <ttl span="days">7</ttl>
88
+ <docs>http://www.rss3.org/rss3lite.html</docs>
89
+ <item>
90
+ <title>RSS 3 Lite First Draft Now Available</title>
91
+ <link>
92
+ http://www.rss3.org/archive/rss3lite/first_draft.html
93
+ </link>
94
+ <description>
95
+ The RSS 3 Lite-type specification first publicly
96
+ available version
97
+ </description>
98
+ <pubDate>Sun, 18 Aug 2005 09:53:59 +0000</pubDate>
99
+ <author name="Jonathan Avidan">jonathan@rss3.org</author>
100
+ <guid type="code">6457894357689</guid>
101
+ </item>
102
+ <item isUpdated="true" updateNum="1">
103
+ <title>Welcome to the RSS 3 Official Blog!</title>
104
+ <link>http://www.rss3.org/official_blog/?p=2</link>
105
+ <description>The RSS 3 Official Blog welcome message</description>
106
+ <comments type="both">
107
+ http://www.rss3.org/official_blog/?p=2#comments
108
+ </comments>
109
+ <pubDate>Wed, 27 Jul 2005 14:34:51 +0000</pubDate>
110
+ <author name="Jonathan Avidan" type="writer">
111
+ jonathan@rss3.org
112
+ </author>
113
+ <guid type="link">http://www.rss3.org/official_blog/?p=2</guid>
114
+ </item>
115
+ </channel>
116
+ </rss>
117
+ FEED
118
+ assert_equal("RSS Version 3", feed.title)
119
+ assert_equal("http://www.rss3.org/", feed.link)
120
+ assert_equal("This is a sample RSS 3 Lite-type feed", feed.description)
121
+ assert_equal("http://no.address/", feed.generator)
122
+ assert_equal("en", feed.language)
123
+ assert_equal("http://www.rss3.org/files/r1.ico", feed.icon)
124
+ assert_equal("Jonathan Avidan 2005 (c)", feed.copyright)
125
+ assert_equal(7.day, feed.ttl)
126
+ assert_equal("http://www.rss3.org/rss3lite.html", feed.docs)
127
+
128
+ assert_equal("RSS 3 Lite First Draft Now Available", feed.items[0].title)
129
+ assert_equal("http://www.rss3.org/archive/rss3lite/first_draft.html",
130
+ feed.items[0].link)
131
+ assert_equal(false, feed.items[0].description == nil)
132
+ assert_equal(Time.utc(2005, "Aug", 18, 9, 53, 59), feed.items[0].time)
133
+ assert_equal("Jonathan Avidan", feed.items[0].author.name)
134
+ assert_equal("jonathan@rss3.org", feed.items[0].author.email)
135
+ assert_equal("6457894357689", feed.items[0].guid)
136
+
137
+ assert_equal("Welcome to the RSS 3 Official Blog!", feed.items[1].title)
138
+ assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].link)
139
+ assert_equal(false, feed.items[1].description == nil)
140
+ assert_equal("http://www.rss3.org/official_blog/?p=2#comments",
141
+ feed.items[1].comments)
142
+ assert_equal(Time.utc(2005, "Jul", 27, 14, 34, 51), feed.items[1].time)
143
+ assert_equal("Jonathan Avidan", feed.items[1].author.name)
144
+ assert_equal("jonathan@rss3.org", feed.items[1].author.email)
145
+ assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].guid)
146
+ end
147
+ end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.10
2
+ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: feedtools
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.4
7
- date: 2005-08-16
6
+ version: 0.2.5
7
+ date: 2005-08-19 00:00:00 -04:00
8
8
  summary: "Parsing, generation, and caching system for xml news feeds."
9
9
  require_paths:
10
10
  - lib
@@ -24,6 +24,8 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
24
24
  version: 0.0.0
25
25
  version:
26
26
  platform: ruby
27
+ signing_key:
28
+ cert_chain:
27
29
  authors:
28
30
  - Bob Aman
29
31
  files:
@@ -73,6 +75,7 @@ files:
73
75
  - test/cache_test.rb
74
76
  - test/cdf_test.rb
75
77
  - test/helper_test.rb
78
+ - test/nonstandard_test.rb
76
79
  - test/rss_test.rb
77
80
  test_files: []
78
81
  rdoc_options: