feedtools 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ == FeedTools 0.2.5
2
+ * fixed multiple rows being created in the cache after a 301 redirection
3
+ * fixed broken table creation for postgresql and sqlite
4
+ * testing against non-standard feeds
5
+ * removed the 'comment_link' method in favor of the 'comments' method
1
6
  == FeedTools 0.2.4
2
7
  * fixed bug in the sqlite table creation query
3
8
  * greatly improved image support
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
25
25
  ENV['RAILS_ENV'] ||
26
26
  'production' # :nodoc:
27
27
 
28
- FEED_TOOLS_VERSION = "0.2.4"
28
+ FEED_TOOLS_VERSION = "0.2.5"
29
29
 
30
30
  $:.unshift(File.dirname(__FILE__))
31
31
  $:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
@@ -182,7 +182,7 @@ module FeedTools
182
182
  'link' VARCHAR(255) DEFAULT NULL,
183
183
  'xml_data' TEXT DEFAULT NULL,
184
184
  'http_headers' TEXT DEFAULT NULL,
185
- 'last_retrieved' DATETIME DEFAULT NULL,
185
+ 'last_retrieved' DATETIME DEFAULT NULL
186
186
  );
187
187
  SQL_END
188
188
  feeds_psql = <<-SQL_END
@@ -193,7 +193,7 @@ module FeedTools
193
193
  link varchar(255) default NULL,
194
194
  xml_data text default NULL,
195
195
  http_headers text default NULL,
196
- last_retrieved datetime default NULL,
196
+ last_retrieved timestamp default NULL
197
197
  );
198
198
  SQL_END
199
199
  table_creation_sql = nil
@@ -489,7 +489,7 @@ module FeedTools
489
489
  if url.nil? || url == ""
490
490
  return nil
491
491
  end
492
- normalized_url = url
492
+ normalized_url = url.strip
493
493
 
494
494
  # if a url begins with the '/' character, it only makes sense that they
495
495
  # meant to be using a file:// url. Fix it for them.
@@ -900,6 +900,8 @@ module FeedTools
900
900
  # redirections, and see if we need to update the url.
901
901
  for redirected_response in response_chain
902
902
  if redirected_response.last.code.to_i == 301
903
+ # Reset the cache object or we may get duplicate entries
904
+ self.cache_object = nil
903
905
  self.url = redirected_response.last['location']
904
906
  else
905
907
  # Jump out as soon as we hit anything that isn't a
@@ -1483,42 +1485,43 @@ module FeedTools
1483
1485
  #
1484
1486
  # This method uses the url from the link field in order to avoid grabbing
1485
1487
  # the favicon for services like feedburner.
1486
- def icon_link
1487
- if @icon_link.nil?
1488
- @icon_link = XPath.first(channel_node,
1489
- "link[@rel='icon']/@href").to_s
1490
- if @icon_link == ""
1491
- @icon_link = XPath.first(channel_node,
1492
- "link[@rel='shortcut icon']/@href").to_s
1488
+ def icon
1489
+ if @icon.nil?
1490
+ icon_node = XPath.first(channel_node, "link[@rel='icon']")
1491
+ if icon_node.nil?
1492
+ icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
1493
1493
  end
1494
- if @icon_link == ""
1495
- @icon_link = XPath.first(channel_node,
1496
- "link[@type='image/x-icon']/@href").to_s
1494
+ if icon_node.nil?
1495
+ icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
1497
1496
  end
1498
- if @icon_link == ""
1499
- @icon_link = XPath.first(channel_node,
1500
- "icon/@href").to_s
1497
+ if icon_node.nil?
1498
+ icon_node = XPath.first(channel_node, "icon")
1501
1499
  end
1502
- if @icon_link == ""
1503
- @icon_link = XPath.first(channel_node,
1504
- "icon/text()").to_s
1500
+ if icon_node.nil?
1501
+ icon_node = XPath.first(channel_node, "logo[@style='icon']")
1505
1502
  end
1506
- if @icon_link == ""
1507
- @icon_link = XPath.first(channel_node,
1508
- "logo[@style='icon']/@href").to_s
1503
+ if icon_node.nil?
1504
+ icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
1509
1505
  end
1510
- if @icon_link == ""
1511
- @icon_link = XPath.first(channel_node,
1512
- "LOGO[@STYLE='ICON']/@HREF").to_s
1513
- end
1514
- if @icon_link == "" && self.link != nil && self.link != ""
1515
- link_uri = URI.parse(FeedTools.normalize_url(self.link))
1516
- @icon_link =
1517
- link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1506
+ unless icon_node.nil?
1507
+ @icon = FeedTools.unescape_entities(
1508
+ XPath.first(icon_node, "@href").to_s)
1509
+ if @icon == ""
1510
+ @icon = FeedTools.unescape_entities(
1511
+ XPath.first(icon_node, "text()").to_s)
1512
+ unless FeedTools.is_url? @icon
1513
+ @icon = ""
1514
+ end
1515
+ end
1516
+ if @icon == "" && self.link != nil && self.link != ""
1517
+ link_uri = URI.parse(FeedTools.normalize_url(self.link))
1518
+ @icon =
1519
+ link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1520
+ end
1521
+ @icon = nil if @icon == ""
1518
1522
  end
1519
- icon_link = nil if icon_link == ""
1520
1523
  end
1521
- return @icon_link
1524
+ return @icon
1522
1525
  end
1523
1526
 
1524
1527
  # Returns the feed author
@@ -1526,70 +1529,83 @@ module FeedTools
1526
1529
  if @author.nil?
1527
1530
  @author = FeedTools::Feed::Author.new
1528
1531
 
1529
- # Set the author name
1530
- @author.name = FeedTools.unescape_entities(
1531
- XPath.first(channel_node, "author/name/text()").to_s)
1532
-
1533
- @author.raw = FeedTools.unescape_entities(
1534
- XPath.first(channel_node, "author/text()").to_s)
1535
- if @author.raw == ""
1536
- @author.raw = FeedTools.unescape_entities(
1537
- XPath.first(channel_node, "dc:creator/text()").to_s)
1532
+ author_node = XPath.first(channel_node, "author")
1533
+ if author_node.nil?
1534
+ author_node = XPath.first(channel_node, "managingEditor")
1538
1535
  end
1539
- if @author.raw == ""
1540
- @author.raw = FeedTools.unescape_entities(
1541
- XPath.first(channel_node, "dc:author/text()").to_s)
1536
+ if author_node.nil?
1537
+ author_node = XPath.first(channel_node, "dc:author")
1542
1538
  end
1543
- if @author.raw == ""
1544
- @author.raw = FeedTools.unescape_entities(
1545
- XPath.first(channel_node, "managingEditor/text()").to_s)
1539
+ if author_node.nil?
1540
+ author_node = XPath.first(channel_node, "dc:creator")
1546
1541
  end
1547
- unless @author.raw == ""
1548
- raw_scan = @author.raw.scan(
1549
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1550
- if raw_scan.nil? || raw_scan.size == 0
1542
+ if author_node.nil?
1543
+ author_node = XPath.first(channel_node, "atom:author")
1544
+ end
1545
+ unless author_node.nil?
1546
+ @author.raw = FeedTools.unescape_entities(
1547
+ XPath.first(author_node, "text()").to_s)
1548
+ @author.raw = nil if @author.raw == ""
1549
+ unless @author.raw.nil?
1551
1550
  raw_scan = @author.raw.scan(
1552
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1553
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
1554
- else
1555
- author_raw_pair = raw_scan.first
1556
- end
1557
- if raw_scan.nil? || raw_scan.size == 0
1558
- email_scan = @author.raw.scan(
1559
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1560
- if email_scan != nil && email_scan.size > 0
1561
- @author.email = email_scan.first.strip
1551
+ /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1552
+ if raw_scan.nil? || raw_scan.size == 0
1553
+ raw_scan = @author.raw.scan(
1554
+ /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1555
+ author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
1556
+ else
1557
+ author_raw_pair = raw_scan.first
1562
1558
  end
1563
- end
1564
- unless author_raw_pair.nil? || author_raw_pair.size == 0
1565
- @author.name = author_raw_pair.first.strip
1566
- @author.email = author_raw_pair.last.strip
1567
- else
1568
- unless @author.raw.include?("@")
1569
- # We can be reasonably sure we are looking at something
1570
- # that the creator didn't intend to contain an email address if
1571
- # it got through the preceeding regexes and it doesn't
1572
- # contain the tell-tale '@' symbol.
1573
- @author.name = @author.raw
1559
+ if raw_scan.nil? || raw_scan.size == 0
1560
+ email_scan = @author.raw.scan(
1561
+ /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1562
+ if email_scan != nil && email_scan.size > 0
1563
+ @author.email = email_scan.first.strip
1564
+ end
1565
+ end
1566
+ unless author_raw_pair.nil? || author_raw_pair.size == 0
1567
+ @author.name = author_raw_pair.first.strip
1568
+ @author.email = author_raw_pair.last.strip
1569
+ else
1570
+ unless @author.raw.include?("@")
1571
+ # We can be reasonably sure we are looking at something
1572
+ # that the creator didn't intend to contain an email address if
1573
+ # it got through the preceeding regexes and it doesn't
1574
+ # contain the tell-tale '@' symbol.
1575
+ @author.name = @author.raw
1576
+ end
1574
1577
  end
1575
1578
  end
1579
+ @author.name = "" if @author.name.nil?
1580
+ if @author.name == ""
1581
+ @author.name = FeedTools.unescape_entities(
1582
+ XPath.first(author_node, "name/text()").to_s)
1583
+ end
1584
+ if @author.name == ""
1585
+ @author.name = FeedTools.unescape_entities(
1586
+ XPath.first(author_node, "@name").to_s)
1587
+ end
1588
+ if @author.email == ""
1589
+ @author.email = FeedTools.unescape_entities(
1590
+ XPath.first(author_node, "email/text()").to_s)
1591
+ end
1592
+ if @author.email == ""
1593
+ @author.email = FeedTools.unescape_entities(
1594
+ XPath.first(author_node, "@email").to_s)
1595
+ end
1596
+ if @author.url == ""
1597
+ @author.url = FeedTools.unescape_entities(
1598
+ XPath.first(author_node, "url/text()").to_s)
1599
+ end
1600
+ if @author.url == ""
1601
+ @author.url = FeedTools.unescape_entities(
1602
+ XPath.first(author_node, "@url").to_s)
1603
+ end
1604
+ @author.name = nil if @author.name == ""
1605
+ @author.raw = nil if @author.raw == ""
1606
+ @author.email = nil if @author.email == ""
1607
+ @author.url = nil if @author.url == ""
1576
1608
  end
1577
-
1578
- @author.name = nil if @author.name == ""
1579
- @author.raw = nil if @author.raw == ""
1580
-
1581
- # Set the author email
1582
- if @author.email == ""
1583
- @author.email = FeedTools.unescape_entities(
1584
- XPath.first(channel_node, "author/email/text()").to_s)
1585
- end
1586
- @author.email = nil if @author.email == ""
1587
-
1588
- # Set the author url
1589
- @author.url = FeedTools.unescape_entities(
1590
- XPath.first(channel_node, "author/url/text()").to_s)
1591
- @author.url = nil if @author.url == ""
1592
-
1593
1609
  # Fallback on the itunes module if we didn't find an author name
1594
1610
  begin
1595
1611
  @author.name = self.itunes_author if @author.name.nil?
@@ -1822,6 +1838,9 @@ module FeedTools
1822
1838
  if @copyright == ""
1823
1839
  @copyright = XPath.first(channel_node, "dc:rights/text()").to_s
1824
1840
  end
1841
+ if @copyright == ""
1842
+ @copyright = XPath.first(channel_node, "copyrights/text()").to_s
1843
+ end
1825
1844
  @copyright = FeedTools.sanitize_html(@copyright, :strip)
1826
1845
  @copyright = nil if @copyright == ""
1827
1846
  end
@@ -1841,60 +1860,93 @@ module FeedTools
1841
1860
  if update_frequency != ""
1842
1861
  update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
1843
1862
  if update_period == "daily"
1844
- @time_to_live = update_frequency.to_i * 24
1863
+ @time_to_live = update_frequency.to_i.day
1845
1864
  elsif update_period == "weekly"
1846
- @time_to_live = update_frequency.to_i * 24 * 7
1865
+ @time_to_live = update_frequency.to_i.week
1847
1866
  elsif update_period == "monthly"
1848
- @time_to_live = update_frequency.to_i * 24 * 30
1867
+ @time_to_live = update_frequency.to_i.month
1849
1868
  elsif update_period == "yearly"
1850
- @time_to_live = update_frequency.to_i * 24 * 365
1869
+ @time_to_live = update_frequency.to_i.year
1851
1870
  else
1852
1871
  # hourly
1853
- @time_to_live = update_frequency.to_i
1872
+ @time_to_live = update_frequency.to_i.hour
1854
1873
  end
1855
1874
  end
1856
1875
  end
1857
1876
  if @time_to_live.nil?
1858
- # expressed in minutes
1877
+ # usually expressed in minutes
1859
1878
  update_frequency = XPath.first(channel_node, "ttl/text()").to_s
1860
1879
  if update_frequency != ""
1861
- @time_to_live = (update_frequency.to_i / 60)
1880
+ update_span = XPath.first(channel_node, "ttl/@span").to_s
1881
+ if update_span == "seconds"
1882
+ @time_to_live = update_frequency.to_i
1883
+ elsif update_span == "minutes"
1884
+ @time_to_live = update_frequency.to_i.minute
1885
+ elsif update_span == "hours"
1886
+ @time_to_live = update_frequency.to_i.hour
1887
+ elsif update_span == "days"
1888
+ @time_to_live = update_frequency.to_i.day
1889
+ elsif update_span == "weeks"
1890
+ @time_to_live = update_frequency.to_i.week
1891
+ elsif update_span == "months"
1892
+ @time_to_live = update_frequency.to_i.month
1893
+ elsif update_span == "years"
1894
+ @time_to_live = update_frequency.to_i.year
1895
+ elsif update_frequency.to_i >= 3000
1896
+ # Normally, this should default to minutes, but realistically,
1897
+ # if they meant minutes, you're rarely going to see a value higher
1898
+ # than 120. If we see >= 3000, we're either dealing with a stupid
1899
+ # pseudo-spec that decided to use seconds, or we're looking at
1900
+ # someone who only has weekly updated content. Worst case, we
1901
+ # misreport the time, and we update too often. Best case, we
1902
+ # avoid accidentally updating the feed only once a year. In the
1903
+ # interests of being pragmatic, and since the problem we avoid
1904
+ # is a far greater one than the one we cause, just run the check
1905
+ # and hope no one actually gets hurt.
1906
+ @time_to_live = update_frequency.to_i
1907
+ else
1908
+ @time_to_live = update_frequency.to_i.minute
1909
+ end
1862
1910
  end
1863
1911
  end
1864
1912
  if @time_to_live.nil?
1865
1913
  @time_to_live = 0
1866
- update_frequency_days = XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1867
- update_frequency_hours = XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1868
- update_frequency_minutes = XPath.first(channel_node, "schedule/intervaltime/@min").to_s
1869
- update_frequency_seconds = XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
1914
+ update_frequency_days =
1915
+ XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1916
+ update_frequency_hours =
1917
+ XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1918
+ update_frequency_minutes =
1919
+ XPath.first(channel_node, "schedule/intervaltime/@min").to_s
1920
+ update_frequency_seconds =
1921
+ XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
1870
1922
  if update_frequency_days != ""
1871
- @time_to_live = @time_to_live + update_frequency_days.to_i * 24
1923
+ @time_to_live = @time_to_live + update_frequency_days.to_i.day
1872
1924
  end
1873
1925
  if update_frequency_hours != ""
1874
- @time_to_live = @time_to_live + update_frequency_hours.to_i * 1
1926
+ @time_to_live = @time_to_live + update_frequency_hours.to_i.hour
1875
1927
  end
1876
1928
  if update_frequency_minutes != ""
1877
- @time_to_live = @time_to_live + update_frequency_minutes.to_i / 60
1929
+ @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
1878
1930
  end
1879
1931
  if update_frequency_seconds != ""
1880
- @time_to_live = @time_to_live + update_frequency_seconds.to_i / 3600
1932
+ @time_to_live = @time_to_live + update_frequency_seconds.to_i
1881
1933
  end
1882
1934
  if @time_to_live == 0
1883
- @time_to_live = nil
1935
+ @time_to_live = 1.hour
1884
1936
  end
1885
1937
  end
1886
1938
  if @time_to_live.nil? || @time_to_live == 0
1887
1939
  # Default to one hour
1888
- @time_to_live = 1
1940
+ @time_to_live = 1.hour
1889
1941
  end
1890
1942
  @time_to_live = @time_to_live.round
1891
- return @time_to_live.hour
1943
+ return @time_to_live
1892
1944
  end
1893
1945
 
1894
1946
  # Sets the feed time to live
1895
1947
  def time_to_live=(new_time_to_live)
1896
- @time_to_live = (new_time_to_live / 3600).round
1897
- @time_to_live = 1 if @time_to_live < 1
1948
+ @time_to_live = new_time_to_live.round
1949
+ @time_to_live = 1.hour if @time_to_live < 1.hour
1898
1950
  end
1899
1951
 
1900
1952
  # Returns the feed's cloud
@@ -2735,13 +2787,16 @@ module FeedTools
2735
2787
  if @link != ""
2736
2788
  @link = FeedTools.unescape_entities(@link)
2737
2789
  end
2738
- if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
2739
- if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
2740
- @link = @link[1..-1]
2741
- end
2742
- # prepend the base to the link since they seem to have used a relative path
2743
- @link = feed.base + @link
2744
- end
2790
+ # TODO: Actually implement proper relative url resolving instead of this crap
2791
+ # ===========================================================================
2792
+ #
2793
+ # if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
2794
+ # if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
2795
+ # @link = @link[1..-1]
2796
+ # end
2797
+ # # prepend the base to the link since they seem to have used a relative path
2798
+ # @link = feed.base + @link
2799
+ # end
2745
2800
  @link = FeedTools.normalize_url(@link)
2746
2801
  end
2747
2802
  return @link
@@ -2751,25 +2806,7 @@ module FeedTools
2751
2806
  def link=(new_link)
2752
2807
  @link = new_link
2753
2808
  end
2754
-
2755
- # Returns the feed item comment link
2756
- def comment_link
2757
- if @comment_link.nil?
2758
- # get the feed comment link from the xml document
2759
- @comment_link = XPath.first(root_node, "comments/text()").to_s
2760
- if @comment_link == ""
2761
- @comment_link = self.link
2762
- end
2763
- @comment_link = FeedTools.normalize_url(@comment_link)
2764
- end
2765
- return @comment_link
2766
- end
2767
-
2768
- # Sets the feed item comment link
2769
- def comment_link=(new_comment_link)
2770
- @comment_link = new_comment_link
2771
- end
2772
-
2809
+
2773
2810
  # Returns a list of the feed item's categories
2774
2811
  def categories
2775
2812
  if @categories.nil?
@@ -3291,70 +3328,83 @@ module FeedTools
3291
3328
  if @author.nil?
3292
3329
  @author = FeedTools::Feed::Author.new
3293
3330
 
3294
- # Set the author name
3295
- @author.name = FeedTools.unescape_entities(
3296
- XPath.first(root_node, "author/name/text()").to_s)
3297
-
3298
- @author.raw = FeedTools.unescape_entities(
3299
- XPath.first(root_node, "author/text()").to_s)
3300
- if @author.raw == ""
3301
- @author.raw = FeedTools.unescape_entities(
3302
- XPath.first(root_node, "dc:creator/text()").to_s)
3331
+ author_node = XPath.first(root_node, "author")
3332
+ if author_node.nil?
3333
+ author_node = XPath.first(root_node, "managingEditor")
3303
3334
  end
3304
- if @author.raw == ""
3305
- @author.raw = FeedTools.unescape_entities(
3306
- XPath.first(root_node, "dc:author/text()").to_s)
3335
+ if author_node.nil?
3336
+ author_node = XPath.first(root_node, "dc:author")
3307
3337
  end
3308
- if @author.raw == ""
3309
- @author.raw = FeedTools.unescape_entities(
3310
- XPath.first(root_node, "managingEditor/text()").to_s)
3338
+ if author_node.nil?
3339
+ author_node = XPath.first(root_node, "dc:creator")
3311
3340
  end
3312
- unless @author.raw == ""
3313
- raw_scan = @author.raw.scan(
3314
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3315
- if raw_scan.nil? || raw_scan.size == 0
3341
+ if author_node.nil?
3342
+ author_node = XPath.first(root_node, "atom:author")
3343
+ end
3344
+ unless author_node.nil?
3345
+ @author.raw = FeedTools.unescape_entities(
3346
+ XPath.first(author_node, "text()").to_s)
3347
+ @author.raw = nil if @author.raw == ""
3348
+ unless @author.raw.nil?
3316
3349
  raw_scan = @author.raw.scan(
3317
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3318
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
3319
- else
3320
- author_raw_pair = raw_scan.first
3321
- end
3322
- if raw_scan.nil? || raw_scan.size == 0
3323
- email_scan = @author.raw.scan(
3324
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3325
- if email_scan != nil && email_scan.size > 0
3326
- @author.email = email_scan.first.strip
3350
+ /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3351
+ if raw_scan.nil? || raw_scan.size == 0
3352
+ raw_scan = @author.raw.scan(
3353
+ /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3354
+ author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
3355
+ else
3356
+ author_raw_pair = raw_scan.first
3327
3357
  end
3328
- end
3329
- unless author_raw_pair.nil? || author_raw_pair.size == 0
3330
- @author.name = author_raw_pair.first.strip
3331
- @author.email = author_raw_pair.last.strip
3332
- else
3333
- unless @author.raw.include?("@")
3334
- # We can be reasonably sure we are looking at something
3335
- # that the creator didn't intend to contain an email address if
3336
- # it got through the preceeding regexes and it doesn't
3337
- # contain the tell-tale '@' symbol.
3338
- @author.name = @author.raw
3358
+ if raw_scan.nil? || raw_scan.size == 0
3359
+ email_scan = @author.raw.scan(
3360
+ /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3361
+ if email_scan != nil && email_scan.size > 0
3362
+ @author.email = email_scan.first.strip
3363
+ end
3364
+ end
3365
+ unless author_raw_pair.nil? || author_raw_pair.size == 0
3366
+ @author.name = author_raw_pair.first.strip
3367
+ @author.email = author_raw_pair.last.strip
3368
+ else
3369
+ unless @author.raw.include?("@")
3370
+ # We can be reasonably sure we are looking at something
3371
+ # that the creator didn't intend to contain an email address if
3372
+ # it got through the preceeding regexes and it doesn't
3373
+ # contain the tell-tale '@' symbol.
3374
+ @author.name = @author.raw
3375
+ end
3339
3376
  end
3340
3377
  end
3378
+ @author.name = "" if @author.name.nil?
3379
+ if @author.name == ""
3380
+ @author.name = FeedTools.unescape_entities(
3381
+ XPath.first(author_node, "name/text()").to_s)
3382
+ end
3383
+ if @author.name == ""
3384
+ @author.name = FeedTools.unescape_entities(
3385
+ XPath.first(author_node, "@name").to_s)
3386
+ end
3387
+ if @author.email == ""
3388
+ @author.email = FeedTools.unescape_entities(
3389
+ XPath.first(author_node, "email/text()").to_s)
3390
+ end
3391
+ if @author.email == ""
3392
+ @author.email = FeedTools.unescape_entities(
3393
+ XPath.first(author_node, "@email").to_s)
3394
+ end
3395
+ if @author.url == ""
3396
+ @author.url = FeedTools.unescape_entities(
3397
+ XPath.first(author_node, "url/text()").to_s)
3398
+ end
3399
+ if @author.url == ""
3400
+ @author.url = FeedTools.unescape_entities(
3401
+ XPath.first(author_node, "@url").to_s)
3402
+ end
3403
+ @author.name = nil if @author.name == ""
3404
+ @author.raw = nil if @author.raw == ""
3405
+ @author.email = nil if @author.email == ""
3406
+ @author.url = nil if @author.url == ""
3341
3407
  end
3342
-
3343
- @author.name = nil if @author.name == ""
3344
- @author.raw = nil if @author.raw == ""
3345
-
3346
- # Set the author email
3347
- if @author.email == ""
3348
- @author.email = FeedTools.unescape_entities(
3349
- XPath.first(root_node, "author/email/text()").to_s)
3350
- end
3351
- @author.email = nil if @author.email == ""
3352
-
3353
- # Set the author url
3354
- @author.url = FeedTools.unescape_entities(
3355
- XPath.first(root_node, "author/url/text()").to_s)
3356
- @author.url = nil if @author.url == ""
3357
-
3358
3408
  # Fallback on the itunes module if we didn't find an author name
3359
3409
  begin
3360
3410
  @author.name = self.itunes_author if @author.name.nil?
@@ -3522,7 +3572,8 @@ module FeedTools
3522
3572
  # Returns the url for posting comments
3523
3573
  def comments
3524
3574
  if @comments.nil?
3525
- @comments = XPath.first(root_node, "comments/text()").to_s
3575
+ @comments = FeedTools.normalize_url(
3576
+ XPath.first(root_node, "comments/text()").to_s)
3526
3577
  @comments = nil if @comments == ""
3527
3578
  end
3528
3579
  return @comments
@@ -3791,4 +3842,4 @@ begin
3791
3842
  FeedTools.feed_cache.initialize_cache
3792
3843
  end
3793
3844
  rescue
3794
- end
3845
+ end
data/rakefile CHANGED
@@ -7,7 +7,7 @@ require 'rake/gempackagetask'
7
7
  require 'rake/contrib/rubyforgepublisher'
8
8
 
9
9
  PKG_NAME = 'feedtools'
10
- PKG_VERSION = '0.2.4'
10
+ PKG_VERSION = '0.2.5'
11
11
  PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
12
12
 
13
13
  RELEASE_NAME = "REL #{PKG_VERSION}"
@@ -0,0 +1,147 @@
1
+ require 'test/unit'
2
+ require 'feed_tools'
3
+
4
+ class NonStandardTest < Test::Unit::TestCase
5
+ def setup
6
+ FeedTools.tidy_enabled = false
7
+ end
8
+
9
+ def test_xss_strict
10
+ feed = FeedTools::Feed.new
11
+ feed.xml_data = <<-FEED
12
+ <?xml version="1.0" encoding="iso-8859-1"?>
13
+ <rss version="2.0/XSS-strict">
14
+ <channel>
15
+ <title>tima thinking outloud.</title>
16
+ <link>http://www.timaoutloud.org/</link>
17
+ <description>The personal weblog of Timothy Appnel</description>
18
+ <item>
19
+ <link>http://www.timaoutloud.org/archives/000415.html</link>
20
+ <title>OSCON Wrap-Up.</title>
21
+ <description>
22
+ It&apos;s been a week since OSCON ended and I&apos;m just
23
+ beginning to recover. This uber post records my notes and
24
+ personal views as a speaker and attendee.
25
+ </description>
26
+ </item>
27
+ <item>
28
+ <link>http://www.timaoutloud.org/archives/000414.html</link>
29
+ <title>Write For The People Who Support You.</title>
30
+ <description>
31
+ Hooray! Mena is back. Ben too. Anil is celebrating
32
+ 6 years of blogging.
33
+ </description>
34
+ </item>
35
+ <item>
36
+ <link>http://www.timaoutloud.org/archives/000413.html</link>
37
+ <title>tima@OSCON</title>
38
+ <description>
39
+ Ben Hammersley and I will be presenting 45 syndication hacks
40
+ in 45 minutes. Will I be able to keep pace with the madness?
41
+ </description>
42
+ </item>
43
+ </channel>
44
+ </rss>
45
+ FEED
46
+ assert_equal("tima thinking outloud.", feed.title)
47
+ assert_equal("http://www.timaoutloud.org/", feed.link)
48
+ assert_equal("The personal weblog of Timothy Appnel", feed.description)
49
+
50
+ assert_equal("OSCON Wrap-Up.", feed.items[0].title)
51
+ assert_equal("http://www.timaoutloud.org/archives/000415.html",
52
+ feed.items[0].link)
53
+ assert_equal(false, feed.items[0].description == nil)
54
+
55
+ assert_equal("Write For The People Who Support You.", feed.items[1].title)
56
+ assert_equal("http://www.timaoutloud.org/archives/000414.html",
57
+ feed.items[1].link)
58
+ assert_equal(false, feed.items[1].description == nil)
59
+
60
+ assert_equal("tima@OSCON", feed.items[2].title)
61
+ assert_equal("http://www.timaoutloud.org/archives/000413.html",
62
+ feed.items[2].link)
63
+ assert_equal(false, feed.items[2].description == nil)
64
+ end
65
+
66
+ def test_rss_30_lite
67
+ # Delusions of grandeur...
68
+ feed = FeedTools::Feed.new
69
+ feed.xml_data = <<-FEED
70
+ <?xml version="1.0" encoding="UTF-8"?>
71
+ <rss version="3.0" type="lite"
72
+ source="http://www.rss3.org/files/liteSample.rss">
73
+ <channel>
74
+ <title>RSS Version 3</title>
75
+ <link>http://www.rss3.org/</link>
76
+ <description>This is a sample RSS 3 Lite-type feed</description>
77
+
78
+ <lastBuildDate>Sun, 14 Aug 2005 09:53:59 +0000</lastBuildDate>
79
+ <generator name="RSS3Maker">http://no.address/</generator>
80
+ <language rel="both">en</language>
81
+ <icon>http://www.rss3.org/files/r1.ico</icon>
82
+ <copyright>Jonathan Avidan 2005 (c)</copyright>
83
+ <managingEditor name="Jonathan Avidan">
84
+ editor@rss3.org
85
+ </managingEditor>
86
+ <webMaster name="Jonathan Avidan">webmaster@rss3.org</webMaster>
87
+ <ttl span="days">7</ttl>
88
+ <docs>http://www.rss3.org/rss3lite.html</docs>
89
+ <item>
90
+ <title>RSS 3 Lite First Draft Now Available</title>
91
+ <link>
92
+ http://www.rss3.org/archive/rss3lite/first_draft.html
93
+ </link>
94
+ <description>
95
+ The RSS 3 Lite-type specification first publicly
96
+ available version
97
+ </description>
98
+ <pubDate>Sun, 18 Aug 2005 09:53:59 +0000</pubDate>
99
+ <author name="Jonathan Avidan">jonathan@rss3.org</author>
100
+ <guid type="code">6457894357689</guid>
101
+ </item>
102
+ <item isUpdated="true" updateNum="1">
103
+ <title>Welcome to the RSS 3 Official Blog!</title>
104
+ <link>http://www.rss3.org/official_blog/?p=2</link>
105
+ <description>The RSS 3 Official Blog welcome message</description>
106
+ <comments type="both">
107
+ http://www.rss3.org/official_blog/?p=2#comments
108
+ </comments>
109
+ <pubDate>Wed, 27 Jul 2005 14:34:51 +0000</pubDate>
110
+ <author name="Jonathan Avidan" type="writer">
111
+ jonathan@rss3.org
112
+ </author>
113
+ <guid type="link">http://www.rss3.org/official_blog/?p=2</guid>
114
+ </item>
115
+ </channel>
116
+ </rss>
117
+ FEED
118
+ assert_equal("RSS Version 3", feed.title)
119
+ assert_equal("http://www.rss3.org/", feed.link)
120
+ assert_equal("This is a sample RSS 3 Lite-type feed", feed.description)
121
+ assert_equal("http://no.address/", feed.generator)
122
+ assert_equal("en", feed.language)
123
+ assert_equal("http://www.rss3.org/files/r1.ico", feed.icon)
124
+ assert_equal("Jonathan Avidan 2005 (c)", feed.copyright)
125
+ assert_equal(7.day, feed.ttl)
126
+ assert_equal("http://www.rss3.org/rss3lite.html", feed.docs)
127
+
128
+ assert_equal("RSS 3 Lite First Draft Now Available", feed.items[0].title)
129
+ assert_equal("http://www.rss3.org/archive/rss3lite/first_draft.html",
130
+ feed.items[0].link)
131
+ assert_equal(false, feed.items[0].description == nil)
132
+ assert_equal(Time.utc(2005, "Aug", 18, 9, 53, 59), feed.items[0].time)
133
+ assert_equal("Jonathan Avidan", feed.items[0].author.name)
134
+ assert_equal("jonathan@rss3.org", feed.items[0].author.email)
135
+ assert_equal("6457894357689", feed.items[0].guid)
136
+
137
+ assert_equal("Welcome to the RSS 3 Official Blog!", feed.items[1].title)
138
+ assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].link)
139
+ assert_equal(false, feed.items[1].description == nil)
140
+ assert_equal("http://www.rss3.org/official_blog/?p=2#comments",
141
+ feed.items[1].comments)
142
+ assert_equal(Time.utc(2005, "Jul", 27, 14, 34, 51), feed.items[1].time)
143
+ assert_equal("Jonathan Avidan", feed.items[1].author.name)
144
+ assert_equal("jonathan@rss3.org", feed.items[1].author.email)
145
+ assert_equal("http://www.rss3.org/official_blog/?p=2", feed.items[1].guid)
146
+ end
147
+ end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.10
2
+ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: feedtools
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.4
7
- date: 2005-08-16
6
+ version: 0.2.5
7
+ date: 2005-08-19 00:00:00 -04:00
8
8
  summary: "Parsing, generation, and caching system for xml news feeds."
9
9
  require_paths:
10
10
  - lib
@@ -24,6 +24,8 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
24
24
  version: 0.0.0
25
25
  version:
26
26
  platform: ruby
27
+ signing_key:
28
+ cert_chain:
27
29
  authors:
28
30
  - Bob Aman
29
31
  files:
@@ -73,6 +75,7 @@ files:
73
75
  - test/cache_test.rb
74
76
  - test/cdf_test.rb
75
77
  - test/helper_test.rb
78
+ - test/nonstandard_test.rb
76
79
  - test/rss_test.rb
77
80
  test_files: []
78
81
  rdoc_options: