feedtools 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +8 -0
- data/lib/feed_tools.rb +730 -301
- data/rakefile +1 -1
- data/test/amp_test.rb +475 -0
- data/test/atom_test.rb +38 -0
- data/test/cache_test.rb +23 -0
- data/test/helper_test.rb +29 -0
- data/test/rss_test.rb +99 -5
- metadata +6 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
== FeedTools 0.2.2
|
2
|
+
* fixed http redirection bug
|
3
|
+
* fixed several documentation typos
|
4
|
+
* still more unit tests
|
5
|
+
* improved support for atom
|
6
|
+
* minor improvements to the database caching mechanism
|
7
|
+
* more complete support for rss elements
|
8
|
+
* major improvements to the handling of tags containing html content
|
1
9
|
== FeedTools 0.2.1
|
2
10
|
* fixed incorrect dependancy on ActiveRecord 1.10.1
|
3
11
|
* more unit tests
|
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.2"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -131,6 +131,19 @@ module FeedTools
|
|
131
131
|
end
|
132
132
|
return nil
|
133
133
|
end
|
134
|
+
|
135
|
+
# Returns true if a connection to the database has been established and the
|
136
|
+
# required table structure is in place.
|
137
|
+
def DatabaseFeedCache.connected?
|
138
|
+
begin
|
139
|
+
ActiveRecord::Base.connection
|
140
|
+
return false if ActiveRecord::Base.configurations.nil?
|
141
|
+
return false unless DatabaseFeedCache.table_exists?
|
142
|
+
rescue => error
|
143
|
+
return false
|
144
|
+
end
|
145
|
+
return true
|
146
|
+
end
|
134
147
|
|
135
148
|
# True if the appropriate database table already exists
|
136
149
|
def DatabaseFeedCache.table_exists?
|
@@ -258,12 +271,25 @@ module FeedTools
|
|
258
271
|
# find_by_id
|
259
272
|
# find_by_url
|
260
273
|
# initialize_cache
|
274
|
+
# connected?
|
261
275
|
def FeedTools.feed_cache=(new_feed_cache)
|
262
276
|
# TODO: ensure that the feed cache class actually does those things.
|
263
277
|
# ==================================================================
|
264
278
|
@feed_cache = new_feed_cache
|
265
279
|
end
|
266
280
|
|
281
|
+
# Returns true if FeedTools.feed_cache is not nil and a connection with
|
282
|
+
# the cache has been successfully established. Also returns false if an
|
283
|
+
# error is raised while trying to determine the status of the cache.
|
284
|
+
def FeedTools.feed_cache_connected?
|
285
|
+
begin
|
286
|
+
return false if FeedTools.feed_cache.nil?
|
287
|
+
return FeedTools.feed_cache.connected?
|
288
|
+
rescue
|
289
|
+
return false
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
267
293
|
# Returns the currently used user agent string.
|
268
294
|
def FeedTools.user_agent
|
269
295
|
return @user_agent
|
@@ -455,6 +481,25 @@ module FeedTools
|
|
455
481
|
return true
|
456
482
|
end
|
457
483
|
|
484
|
+
# Escapes all html entities
|
485
|
+
def FeedTools.escape_entities(html)
|
486
|
+
escaped_html = CGI.escapeHTML(html)
|
487
|
+
unescaped_html.gsub!(/'/, "'")
|
488
|
+
unescaped_html.gsub!(/"/, """)
|
489
|
+
return escaped_html
|
490
|
+
end
|
491
|
+
|
492
|
+
# Unescapes all html entities
|
493
|
+
def FeedTools.unescape_entities(html)
|
494
|
+
unescaped_html = html
|
495
|
+
unescaped_html.gsub!(/&/, "&")
|
496
|
+
unescaped_html.gsub!(/&/, "&")
|
497
|
+
unescaped_html = CGI.unescapeHTML(unescaped_html)
|
498
|
+
unescaped_html.gsub!(/'/, "'")
|
499
|
+
unescaped_html.gsub!(/"/, "\"")
|
500
|
+
return unescaped_html
|
501
|
+
end
|
502
|
+
|
458
503
|
# Removes all html tags from the html formatted text.
|
459
504
|
def FeedTools.strip_html(html)
|
460
505
|
# TODO: do this properly
|
@@ -467,6 +512,7 @@ module FeedTools
|
|
467
512
|
def FeedTools.tidy_html(html)
|
468
513
|
if FeedTools.tidy_enabled?
|
469
514
|
is_fragment = true
|
515
|
+
html.gsub!(/<!'/, "&lt;!'")
|
470
516
|
if (html.strip =~ /<html>(.|\n)*<body>/) != nil ||
|
471
517
|
(html.strip =~ /<\/body>(.|\n)*<\/html>$/) != nil
|
472
518
|
is_fragment = false
|
@@ -484,13 +530,15 @@ module FeedTools
|
|
484
530
|
xml
|
485
531
|
end
|
486
532
|
if is_fragment
|
487
|
-
# Tidy
|
533
|
+
# Tidy sticks <html>...<body>[our html]</body>...</html> in.
|
488
534
|
# We don't want this.
|
489
535
|
tidy_html.strip!
|
490
536
|
tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
|
491
537
|
tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
|
492
538
|
tidy_html.strip!
|
493
539
|
end
|
540
|
+
tidy_html.gsub!(/&/, "&")
|
541
|
+
tidy_html.gsub!(/&/, "&")
|
494
542
|
else
|
495
543
|
tidy_html = html
|
496
544
|
end
|
@@ -502,7 +550,7 @@ module FeedTools
|
|
502
550
|
# be escaped. If mode is set to :strip, dangerous and unknown
|
503
551
|
# elements and all children will be removed entirely.
|
504
552
|
# Dangerous or unknown attributes are always removed.
|
505
|
-
def FeedTools.sanitize_html(html, mode=:
|
553
|
+
def FeedTools.sanitize_html(html, mode=:strip)
|
506
554
|
|
507
555
|
# Lists borrowed from Mark Pilgrim's feedparser
|
508
556
|
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
@@ -527,18 +575,14 @@ module FeedTools
|
|
527
575
|
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
528
576
|
'type', 'usemap', 'valign', 'value', 'vspace', 'width']
|
529
577
|
|
530
|
-
#
|
531
|
-
|
532
|
-
|
533
|
-
html.gsub!(
|
534
|
-
|
535
|
-
# The closer we are to proper xhtml, the more accurate the
|
536
|
-
# sanitization will be.
|
537
|
-
html = FeedTools.tidy_html(html)
|
538
|
-
|
578
|
+
# Replace with appropriate named entities
|
579
|
+
html.gsub!(/&/, "&")
|
580
|
+
html.gsub!(/&/, "&")
|
581
|
+
html.gsub!(/<!'/, "&lt;!'")
|
582
|
+
|
539
583
|
# Hackity hack. But it works, and it seems plenty fast enough.
|
540
584
|
html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
|
541
|
-
|
585
|
+
|
542
586
|
sanitize_node = lambda do |html_node|
|
543
587
|
if html_node.respond_to? :children
|
544
588
|
for child in html_node.children
|
@@ -564,13 +608,83 @@ module FeedTools
|
|
564
608
|
html_node
|
565
609
|
end
|
566
610
|
sanitize_node.call(html_doc.root)
|
567
|
-
|
611
|
+
html = html_doc.root.inner_xml
|
612
|
+
return html
|
568
613
|
end
|
569
614
|
|
570
615
|
class Feed
|
571
616
|
include REXML
|
572
617
|
include AttributeDictionary
|
573
618
|
|
619
|
+
# Represents a feed/feed item's category
|
620
|
+
class Category
|
621
|
+
# The category term value
|
622
|
+
attr_accessor :term
|
623
|
+
# The categorization scheme
|
624
|
+
attr_accessor :scheme
|
625
|
+
# A human-readable description of the category
|
626
|
+
attr_accessor :label
|
627
|
+
|
628
|
+
# Relays any unknown methods to the term so that you can treat the
|
629
|
+
# category object as a string
|
630
|
+
def method_missing(msg, *params)
|
631
|
+
self.term.send(msg, params)
|
632
|
+
end
|
633
|
+
|
634
|
+
# Relays the to_s method to the term field
|
635
|
+
def to_s
|
636
|
+
self.term.to_s
|
637
|
+
end
|
638
|
+
|
639
|
+
# Relays the inspect method to the term field
|
640
|
+
def inspect
|
641
|
+
self.term.inspect
|
642
|
+
end
|
643
|
+
end
|
644
|
+
|
645
|
+
# Represents a feed/feed item's author
|
646
|
+
class Author
|
647
|
+
# The author's real name
|
648
|
+
attr_accessor :name
|
649
|
+
# The author's email address
|
650
|
+
attr_accessor :email
|
651
|
+
# The url of the author's homepage
|
652
|
+
attr_accessor :url
|
653
|
+
# The raw value of the author tag if present
|
654
|
+
attr_accessor :raw
|
655
|
+
|
656
|
+
# Relays any unknown methods to the name so that you can treat the
|
657
|
+
# author object as a string
|
658
|
+
def method_missing(msg, *params)
|
659
|
+
self.name.send(msg, params)
|
660
|
+
end
|
661
|
+
|
662
|
+
# Relays the to_s method to the name field
|
663
|
+
def to_s
|
664
|
+
self.name.to_s
|
665
|
+
end
|
666
|
+
|
667
|
+
# Relays the inspect method to the name field
|
668
|
+
def inspect
|
669
|
+
self.name.inspect
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
# Represents a feed's cloud.
|
674
|
+
class Cloud
|
675
|
+
# The domain of the cloud.
|
676
|
+
attr_accessor :domain
|
677
|
+
# The path for the cloud.
|
678
|
+
attr_accessor :path
|
679
|
+
# The port the cloud is listening on.
|
680
|
+
attr_accessor :port
|
681
|
+
# The web services protocol the cloud uses.
|
682
|
+
# Possible values are either "xml-rpc" or "soap".
|
683
|
+
attr_accessor :protocol
|
684
|
+
# The procedure to use to request notification.
|
685
|
+
attr_accessor :register_procedure
|
686
|
+
end
|
687
|
+
|
574
688
|
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
|
575
689
|
def Feed.open(url)
|
576
690
|
# clean up the url
|
@@ -671,14 +785,14 @@ module FeedTools
|
|
671
785
|
|
672
786
|
Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
|
673
787
|
response = http.request_get(feed_uri.path, http_headers)
|
674
|
-
|
788
|
+
|
675
789
|
case response
|
676
790
|
when Net::HTTPSuccess
|
677
791
|
# We've reached the final destination, process all previous
|
678
792
|
# redirections, and see if we need to update the url.
|
679
793
|
for redirected_response in response_chain
|
680
794
|
if redirected_response.last.code.to_i == 301
|
681
|
-
self.url = redirected_response.
|
795
|
+
self.url = redirected_response.last['location']
|
682
796
|
else
|
683
797
|
# Jump out as soon as we hit anything that isn't a
|
684
798
|
# permanently moved redirection.
|
@@ -690,7 +804,7 @@ module FeedTools
|
|
690
804
|
if response.code.to_i == 304
|
691
805
|
response.error!
|
692
806
|
else
|
693
|
-
if response['
|
807
|
+
if response['location'].nil?
|
694
808
|
raise FeedAccessError,
|
695
809
|
"No location to redirect to supplied: " + response.code
|
696
810
|
end
|
@@ -913,6 +1027,30 @@ module FeedTools
|
|
913
1027
|
@cache_object = new_cache_object
|
914
1028
|
end
|
915
1029
|
|
1030
|
+
# Returns the type of feed
|
1031
|
+
# Possible values:
|
1032
|
+
# "rss", "atom", "cdf", "!okay/news"
|
1033
|
+
def feed_type
|
1034
|
+
if @feed_type.nil?
|
1035
|
+
case self.root_node.name.downcase
|
1036
|
+
when "feed"
|
1037
|
+
@feed_type = "atom"
|
1038
|
+
when "rdf:rdf"
|
1039
|
+
@feed_type = "rss"
|
1040
|
+
when "rss"
|
1041
|
+
@feed_type = "rss"
|
1042
|
+
when "channel"
|
1043
|
+
@feed_type = "cdf"
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
return @feed_type
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
# Sets the default feed type
|
1050
|
+
def feed_type=(new_feed_type)
|
1051
|
+
@feed_type = new_feed_type
|
1052
|
+
end
|
1053
|
+
|
916
1054
|
# Returns the feed's unique id
|
917
1055
|
def id
|
918
1056
|
if @id.nil?
|
@@ -948,24 +1086,30 @@ module FeedTools
|
|
948
1086
|
# Returns the feed title
|
949
1087
|
def title
|
950
1088
|
if @title.nil?
|
951
|
-
|
952
|
-
|
1089
|
+
repair_entities = false
|
1090
|
+
if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
|
1091
|
+
XPath.first(channel_node, "title/@mode").to_s == "xhtml" ||
|
1092
|
+
XPath.first(channel_node, "title/@type").to_s == "xml" ||
|
1093
|
+
XPath.first(channel_node, "title/@mode").to_s == "xml" ||
|
1094
|
+
XPath.first(channel_node, "title/@type").to_s ==
|
1095
|
+
"application/xhtml+xml"
|
953
1096
|
@title = XPath.first(channel_node, "title").inner_xml
|
954
1097
|
elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
|
955
1098
|
XPath.first(channel_node, "title/@mode").to_s == "escaped"
|
956
|
-
@title =
|
1099
|
+
@title = FeedTools.unescape_entities(
|
957
1100
|
XPath.first(channel_node, "title/text()").to_s)
|
958
1101
|
else
|
959
|
-
|
960
|
-
|
1102
|
+
title_node = XPath.first(channel_node, "title")
|
1103
|
+
@title = title_node.inner_xml
|
1104
|
+
repair_entities = true
|
961
1105
|
end
|
962
1106
|
unless @title.nil?
|
963
|
-
@title =
|
964
|
-
|
965
|
-
|
966
|
-
@title = FeedTools.strip_html(@title).strip
|
1107
|
+
@title = FeedTools.sanitize_html(@title, :strip)
|
1108
|
+
@title = FeedTools.unescape_entities(@title) if repair_entities
|
1109
|
+
@title = FeedTools.tidy_html(@title)
|
967
1110
|
end
|
968
1111
|
@title.gsub!(/\n/, " ")
|
1112
|
+
@title.strip!
|
969
1113
|
@title = nil if @title == ""
|
970
1114
|
self.cache_object.title = @title unless self.cache_object.nil?
|
971
1115
|
end
|
@@ -981,57 +1125,57 @@ module FeedTools
|
|
981
1125
|
# Returns the feed description
|
982
1126
|
def description
|
983
1127
|
if @description.nil?
|
984
|
-
|
985
|
-
|
986
|
-
if
|
987
|
-
|
988
|
-
|
1128
|
+
repair_entities = false
|
1129
|
+
description_node = XPath.first(channel_node, "description")
|
1130
|
+
if description_node.nil?
|
1131
|
+
description_node = XPath.first(channel_node, "tagline")
|
1132
|
+
end
|
1133
|
+
if description_node.nil?
|
1134
|
+
description_node = XPath.first(channel_node, "subtitle")
|
1135
|
+
end
|
1136
|
+
if description_node.nil?
|
1137
|
+
description_node = XPath.first(channel_node, "summary")
|
1138
|
+
end
|
1139
|
+
if description_node.nil?
|
1140
|
+
description_node = XPath.first(channel_node, "abstract")
|
1141
|
+
end
|
1142
|
+
if description_node.nil?
|
1143
|
+
description_node = XPath.first(channel_node, "info")
|
1144
|
+
end
|
1145
|
+
if description_node.nil?
|
1146
|
+
description_node = XPath.first(channel_node, "content:encoded")
|
1147
|
+
@bozo = true unless description_node.nil?
|
1148
|
+
end
|
1149
|
+
if description_node.nil?
|
1150
|
+
description_node = XPath.first(channel_node, "content")
|
1151
|
+
@bozo = true unless description_node.nil?
|
1152
|
+
end
|
1153
|
+
if description_node.nil?
|
1154
|
+
description_node = XPath.first(channel_node, "xhtml:body")
|
1155
|
+
@bozo = true unless description_node.nil?
|
1156
|
+
end
|
1157
|
+
if description_node.nil?
|
1158
|
+
description_node = XPath.first(channel_node, "body")
|
1159
|
+
@bozo = true unless description_node.nil?
|
1160
|
+
end
|
1161
|
+
unless description_node.nil?
|
1162
|
+
if XPath.first(description_node, "@encoding").to_s != ""
|
1163
|
+
@description =
|
1164
|
+
"[Embedded data objects are not currently supported.]"
|
1165
|
+
elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
|
1166
|
+
XPath.first(description_node, "@mode").to_s == "xhtml" ||
|
1167
|
+
XPath.first(description_node, "@type").to_s == "xml" ||
|
1168
|
+
XPath.first(description_node, "@mode").to_s == "xml" ||
|
1169
|
+
XPath.first(description_node, "@type").to_s ==
|
1170
|
+
"application/xhtml+xml"
|
1171
|
+
@description = description_node.inner_xml
|
1172
|
+
elsif XPath.first(description_node, "@type").to_s == "escaped" ||
|
1173
|
+
XPath.first(description_node, "@mode").to_s == "escaped"
|
1174
|
+
@description = FeedTools.unescape_entities(
|
1175
|
+
description_node.inner_xml)
|
989
1176
|
else
|
990
|
-
@description =
|
991
|
-
|
992
|
-
end
|
993
|
-
if @description == ""
|
994
|
-
@description = XPath.first(channel_node, "subtitle/text()").to_s
|
995
|
-
if @description != "" &&
|
996
|
-
XPath.first(channel_node, "subtitle/@mode").to_s == "escaped"
|
997
|
-
@description = CGI.unescapeHTML(description)
|
998
|
-
end
|
999
|
-
end
|
1000
|
-
if @description == ""
|
1001
|
-
@description = XPath.first(channel_node, "tagline/text()").to_s
|
1002
|
-
if @description != "" &&
|
1003
|
-
XPath.first(channel_node, "tagline/@mode").to_s == "escaped"
|
1004
|
-
@description = CGI.unescapeHTML(description)
|
1005
|
-
end
|
1006
|
-
end
|
1007
|
-
if @description == "" && XPath.first(channel_node, "tagline") == nil
|
1008
|
-
@description = XPath.first(channel_node, "info/text()").to_s
|
1009
|
-
if @description != "" &&
|
1010
|
-
XPath.first(channel_node, "info/@mode").to_s == "escaped"
|
1011
|
-
@description = CGI.unescapeHTML(description)
|
1012
|
-
end
|
1013
|
-
end
|
1014
|
-
if @description == ""
|
1015
|
-
@description = CGI.unescapeHTML(
|
1016
|
-
XPath.first(channel_node, "abstract/text()").to_s)
|
1017
|
-
end
|
1018
|
-
if @description == ""
|
1019
|
-
@description = CGI.unescapeHTML(
|
1020
|
-
XPath.first(channel_node, "summary/text()").to_s)
|
1021
|
-
end
|
1022
|
-
if @description == ""
|
1023
|
-
# I don't think this is valid for anyone to do, but this is probably
|
1024
|
-
# what they meant if they do it.
|
1025
|
-
@description = CGI.unescapeHTML(
|
1026
|
-
XPath.first(channel_node, "content:encoded/text()").to_s)
|
1027
|
-
if @description != ""
|
1028
|
-
@bozo = true
|
1029
|
-
end
|
1030
|
-
end
|
1031
|
-
if @description == ""
|
1032
|
-
begin
|
1033
|
-
@description = XPath.first(channel_node, "description").inner_xml
|
1034
|
-
rescue
|
1177
|
+
@description = description_node.inner_xml
|
1178
|
+
repair_entities = true
|
1035
1179
|
end
|
1036
1180
|
end
|
1037
1181
|
if @description == ""
|
@@ -1043,13 +1187,12 @@ module FeedTools
|
|
1043
1187
|
@description = "" if @description.nil?
|
1044
1188
|
end
|
1045
1189
|
|
1046
|
-
@description
|
1047
|
-
FeedTools.sanitize_html(@description)
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1190
|
+
unless @description.nil?
|
1191
|
+
@description = FeedTools.sanitize_html(@description, :strip)
|
1192
|
+
@description = FeedTools.unescape_entities(@description) if repair_entities
|
1193
|
+
@description = FeedTools.tidy_html(@description)
|
1194
|
+
end
|
1195
|
+
|
1053
1196
|
@description.gsub!(/\n/, " ") if @description.size < 80
|
1054
1197
|
@description = @description.strip unless @description.nil?
|
1055
1198
|
@description = nil if @description == ""
|
@@ -1065,7 +1208,7 @@ module FeedTools
|
|
1065
1208
|
# Returns the contents of the itunes:summary element
|
1066
1209
|
def itunes_summary
|
1067
1210
|
if @itunes_summary.nil?
|
1068
|
-
@itunes_summary =
|
1211
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
1069
1212
|
"itunes:summary/text()").to_s)
|
1070
1213
|
if @itunes_summary == ""
|
1071
1214
|
@itunes_summary = nil
|
@@ -1084,7 +1227,7 @@ module FeedTools
|
|
1084
1227
|
# Returns the contents of the itunes:subtitle element
|
1085
1228
|
def itunes_subtitle
|
1086
1229
|
if @itunes_subtitle.nil?
|
1087
|
-
@itunes_subtitle =
|
1230
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
1088
1231
|
"itunes:subtitle/text()").to_s)
|
1089
1232
|
if @itunes_subtitle == ""
|
1090
1233
|
@itunes_subtitle = nil
|
@@ -1211,6 +1354,197 @@ module FeedTools
|
|
1211
1354
|
return @icon_link
|
1212
1355
|
end
|
1213
1356
|
|
1357
|
+
# Returns the feed author
|
1358
|
+
def author
|
1359
|
+
if @author.nil?
|
1360
|
+
@author = FeedTools::Feed::Author.new
|
1361
|
+
|
1362
|
+
# Set the author name
|
1363
|
+
@author.name = FeedTools.unescape_entities(
|
1364
|
+
XPath.first(channel_node, "author/name/text()").to_s)
|
1365
|
+
|
1366
|
+
@author.raw = FeedTools.unescape_entities(
|
1367
|
+
XPath.first(channel_node, "author/text()").to_s)
|
1368
|
+
if @author.raw == ""
|
1369
|
+
@author.raw = FeedTools.unescape_entities(
|
1370
|
+
XPath.first(channel_node, "dc:creator/text()").to_s)
|
1371
|
+
end
|
1372
|
+
if @author.raw == ""
|
1373
|
+
@author.raw = FeedTools.unescape_entities(
|
1374
|
+
XPath.first(channel_node, "dc:author/text()").to_s)
|
1375
|
+
end
|
1376
|
+
unless @author.raw == ""
|
1377
|
+
raw_scan = @author.raw.scan(
|
1378
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1379
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1380
|
+
raw_scan = @author.raw.scan(
|
1381
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1382
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
1383
|
+
else
|
1384
|
+
author_raw_pair = raw_scan.first
|
1385
|
+
end
|
1386
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1387
|
+
email_scan = @author.raw.scan(
|
1388
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1389
|
+
if email_scan != nil && email_scan.size > 0
|
1390
|
+
@author.email = email_scan.first.strip
|
1391
|
+
end
|
1392
|
+
end
|
1393
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
1394
|
+
@author.name = author_raw_pair.first.strip
|
1395
|
+
@author.email = author_raw_pair.last.strip
|
1396
|
+
else
|
1397
|
+
unless @author.raw.include?("@")
|
1398
|
+
# We can be reasonably sure we are looking at something
|
1399
|
+
# that the creator didn't intend to contain an email address if
|
1400
|
+
# it got through the preceeding regexes and it doesn't
|
1401
|
+
# contain the tell-tale '@' symbol.
|
1402
|
+
@author.name = @author.raw
|
1403
|
+
end
|
1404
|
+
end
|
1405
|
+
end
|
1406
|
+
|
1407
|
+
@author.name = nil if @author.name == ""
|
1408
|
+
@author.raw = nil if @author.raw == ""
|
1409
|
+
|
1410
|
+
# Set the author email
|
1411
|
+
if @author.email == ""
|
1412
|
+
@author.email = FeedTools.unescape_entities(
|
1413
|
+
XPath.first(channel_node, "author/email/text()").to_s)
|
1414
|
+
end
|
1415
|
+
@author.email = nil if @author.email == ""
|
1416
|
+
|
1417
|
+
# Set the author url
|
1418
|
+
@author.url = FeedTools.unescape_entities(
|
1419
|
+
XPath.first(channel_node, "author/url/text()").to_s)
|
1420
|
+
@author.url = nil if @author.url == ""
|
1421
|
+
|
1422
|
+
# Fallback on the itunes module if we didn't find an author name
|
1423
|
+
begin
|
1424
|
+
@author.name = self.itunes_author if @author.name.nil?
|
1425
|
+
rescue
|
1426
|
+
@author.name = nil
|
1427
|
+
end
|
1428
|
+
end
|
1429
|
+
return @author
|
1430
|
+
end
|
1431
|
+
|
1432
|
+
# Sets the feed author
|
1433
|
+
def author=(new_author)
|
1434
|
+
if new_author.respond_to?(:name) &&
|
1435
|
+
new_author.respond_to?(:email) &&
|
1436
|
+
new_author.respond_to?(:url)
|
1437
|
+
# It's a complete author object, just set it.
|
1438
|
+
@author = new_author
|
1439
|
+
else
|
1440
|
+
# We're not looking at an author object, this is probably a string,
|
1441
|
+
# default to setting the author's name.
|
1442
|
+
if @author.nil?
|
1443
|
+
@author = FeedTools::Feed::Author.new
|
1444
|
+
end
|
1445
|
+
@author.name = new_author
|
1446
|
+
end
|
1447
|
+
end
|
1448
|
+
|
1449
|
+
# Returns the feed publisher
|
1450
|
+
def publisher
|
1451
|
+
if @publisher.nil?
|
1452
|
+
@publisher = FeedTools::Feed::Author.new
|
1453
|
+
|
1454
|
+
# Set the author name
|
1455
|
+
@publisher.raw = FeedTools.unescape_entities(
|
1456
|
+
XPath.first(channel_node, "dc:publisher/text()").to_s)
|
1457
|
+
unless @publisher.raw == ""
|
1458
|
+
raw_scan = @publisher.raw.scan(
|
1459
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1460
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1461
|
+
raw_scan = @publisher.raw.scan(
|
1462
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1463
|
+
unless raw_scan.size == 0
|
1464
|
+
publisher_raw_pair = raw_scan.first.reverse
|
1465
|
+
end
|
1466
|
+
else
|
1467
|
+
publisher_raw_pair = raw_scan.first
|
1468
|
+
end
|
1469
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1470
|
+
email_scan = @publisher.raw.scan(
|
1471
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1472
|
+
if email_scan != nil && email_scan.size > 0
|
1473
|
+
@publisher.email = email_scan.first.strip
|
1474
|
+
end
|
1475
|
+
end
|
1476
|
+
unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
|
1477
|
+
@publisher.name = publisher_raw_pair.first.strip
|
1478
|
+
@publisher.email = publisher_raw_pair.last.strip
|
1479
|
+
else
|
1480
|
+
unless @publisher.raw.include?("@")
|
1481
|
+
# We can be reasonably sure we are looking at something
|
1482
|
+
# that the creator didn't intend to contain an email address if
|
1483
|
+
# it got through the preceeding regexes and it doesn't
|
1484
|
+
# contain the tell-tale '@' symbol.
|
1485
|
+
@publisher.name = @publisher.raw
|
1486
|
+
end
|
1487
|
+
end
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
@publisher.name = nil if @publisher.name == ""
|
1491
|
+
@publisher.raw = nil if @publisher.raw == ""
|
1492
|
+
@publisher.email = nil if @publisher.email == ""
|
1493
|
+
@publisher.url = nil if @publisher.url == ""
|
1494
|
+
end
|
1495
|
+
return @publisher
|
1496
|
+
end
|
1497
|
+
|
1498
|
+
# Sets the feed publisher
|
1499
|
+
def publisher=(new_publisher)
|
1500
|
+
if new_publisher.respond_to?(:name) &&
|
1501
|
+
new_publisher.respond_to?(:email) &&
|
1502
|
+
new_publisher.respond_to?(:url)
|
1503
|
+
# It's a complete Author object, just set it.
|
1504
|
+
@publisher = new_publisher
|
1505
|
+
else
|
1506
|
+
# We're not looking at an Author object, this is probably a string,
|
1507
|
+
# default to setting the publisher's name.
|
1508
|
+
if @publisher.nil?
|
1509
|
+
@publisher = FeedTools::Feed::Author.new
|
1510
|
+
end
|
1511
|
+
@publisher.name = new_publisher
|
1512
|
+
end
|
1513
|
+
end
|
1514
|
+
|
1515
|
+
# Returns the contents of the itunes:author element
|
1516
|
+
#
|
1517
|
+
# Returns any incorrectly placed channel-level itunes:author
|
1518
|
+
# elements. They're actually amazingly common. People don't read specs.
|
1519
|
+
# There is no setter for this, since this is a "bozo" attribute.
|
1520
|
+
def itunes_author
|
1521
|
+
if @itunes_author.nil?
|
1522
|
+
@itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
|
1523
|
+
"itunes:author/text()").to_s)
|
1524
|
+
@itunes_author = nil if @itunes_author == ""
|
1525
|
+
@bozo = true unless @itunes_author.nil?
|
1526
|
+
end
|
1527
|
+
return @itunes_author
|
1528
|
+
end
|
1529
|
+
|
1530
|
+
# Returns the feed's copyright information
|
1531
|
+
def copyright
|
1532
|
+
if @copyright.nil?
|
1533
|
+
@copyright = XPath.first(channel_node, "copyright/text()").to_s
|
1534
|
+
if @copyright == ""
|
1535
|
+
@copyright = XPath.first(channel_node, "dc:rights/text()").to_s
|
1536
|
+
end
|
1537
|
+
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1538
|
+
@copyright = nil if @copyright == ""
|
1539
|
+
end
|
1540
|
+
return @copyright
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
# Sets the feed's copyright information
|
1544
|
+
def copyright=(new_copyright)
|
1545
|
+
@copyright = new_copyright
|
1546
|
+
end
|
1547
|
+
|
1214
1548
|
# Returns the number of seconds before the feed should expire
|
1215
1549
|
def time_to_live
|
1216
1550
|
if @time_to_live.nil?
|
@@ -1275,6 +1609,33 @@ module FeedTools
|
|
1275
1609
|
@time_to_live = 1 if @time_to_live < 1
|
1276
1610
|
end
|
1277
1611
|
|
1612
|
+
# Returns the feed's cloud
|
1613
|
+
def cloud
|
1614
|
+
if @cloud.nil?
|
1615
|
+
@cloud = FeedTools::Feed::Cloud.new
|
1616
|
+
@cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
|
1617
|
+
@cloud.port = XPath.first(channel_node, "cloud/@port").to_s
|
1618
|
+
@cloud.path = XPath.first(channel_node, "cloud/@path").to_s
|
1619
|
+
@cloud.register_procedure =
|
1620
|
+
XPath.first(channel_node, "cloud/@registerProcedure").to_s
|
1621
|
+
@cloud.protocol =
|
1622
|
+
XPath.first(channel_node, "cloud/@protocol").to_s.downcase
|
1623
|
+
@cloud.domain = nil if @cloud.domain == ""
|
1624
|
+
@cloud.port = nil if @cloud.port == ""
|
1625
|
+
@cloud.port = @cloud.port.to_i unless @cloud.port.nil?
|
1626
|
+
@cloud.port = nil if @cloud.port == 0
|
1627
|
+
@cloud.path = nil if @cloud.path == ""
|
1628
|
+
@cloud.register_procedure = nil if @cloud.register_procedure == ""
|
1629
|
+
@cloud.protocol = nil if @cloud.protocol == ""
|
1630
|
+
end
|
1631
|
+
return @cloud
|
1632
|
+
end
|
1633
|
+
|
1634
|
+
# Sets the feed's cloud
|
1635
|
+
def cloud=(new_cloud)
|
1636
|
+
@cloud = new_cloud
|
1637
|
+
end
|
1638
|
+
|
1278
1639
|
# Returns the feed generator
|
1279
1640
|
def generator
|
1280
1641
|
if @generator.nil?
|
@@ -1454,7 +1815,8 @@ module FeedTools
|
|
1454
1815
|
end
|
1455
1816
|
|
1456
1817
|
# Generates xml based on the content of the feed
|
1457
|
-
def build_xml(feed_type="rss", version=0.0,
|
1818
|
+
def build_xml(feed_type=(self.feed_type or "rss"), version=0.0,
|
1819
|
+
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1458
1820
|
if feed_type == "rss" && version == 0.0
|
1459
1821
|
version = 1.0
|
1460
1822
|
elsif feed_type == "atom" && version == 0.0
|
@@ -1801,22 +2163,47 @@ module FeedTools
|
|
1801
2163
|
return @root_node
|
1802
2164
|
end
|
1803
2165
|
|
2166
|
+
# Returns the feed items's unique id
|
2167
|
+
def id
|
2168
|
+
if @id.nil?
|
2169
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
2170
|
+
if @id == ""
|
2171
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
2172
|
+
end
|
2173
|
+
@id = nil if @id == ""
|
2174
|
+
end
|
2175
|
+
return @id
|
2176
|
+
end
|
2177
|
+
|
2178
|
+
# Sets the feed item's unique id
|
2179
|
+
def id=(new_id)
|
2180
|
+
@id = new_id
|
2181
|
+
end
|
2182
|
+
|
1804
2183
|
# Returns the feed item title
|
1805
2184
|
def title
|
1806
2185
|
if @title.nil?
|
2186
|
+
repair_entities = false
|
1807
2187
|
if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
|
1808
|
-
XPath.first(root_node, "title/@mode").to_s == "xhtml"
|
2188
|
+
XPath.first(root_node, "title/@mode").to_s == "xhtml" ||
|
2189
|
+
XPath.first(root_node, "title/@type").to_s == "xml" ||
|
2190
|
+
XPath.first(root_node, "title/@mode").to_s == "xml" ||
|
2191
|
+
XPath.first(root_node, "title/@type").to_s ==
|
2192
|
+
"application/xhtml+xml"
|
1809
2193
|
@title = XPath.first(root_node, "title").inner_xml
|
1810
2194
|
elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
|
1811
2195
|
XPath.first(root_node, "title/@mode").to_s == "escaped"
|
1812
|
-
@title =
|
2196
|
+
@title = FeedTools.unescape_entities(
|
1813
2197
|
XPath.first(root_node, "title/text()").to_s)
|
1814
2198
|
else
|
1815
|
-
|
1816
|
-
|
2199
|
+
title_node = XPath.first(root_node, "title")
|
2200
|
+
@title = title_node.inner_xml
|
2201
|
+
repair_entities = true
|
1817
2202
|
end
|
1818
2203
|
unless @title.nil?
|
1819
|
-
@title =
|
2204
|
+
@title = FeedTools.sanitize_html(@title, :strip)
|
2205
|
+
@title = FeedTools.unescape_entities(@title) if repair_entities
|
2206
|
+
@title = FeedTools.tidy_html(@title)
|
1820
2207
|
end
|
1821
2208
|
if @title != ""
|
1822
2209
|
# Some blogging tools include the number of comments in a post
|
@@ -1826,10 +2213,10 @@ module FeedTools
|
|
1826
2213
|
#
|
1827
2214
|
# If for some incredibly wierd reason you need the actual
|
1828
2215
|
# unstripped title, just use find_node("title/text()").to_s
|
1829
|
-
@title =
|
1830
|
-
@title.strip.gsub(/\[\d*\]$/, "")).strip
|
1831
|
-
@title.gsub!(/\n/, " ")
|
2216
|
+
@title = @title.strip.gsub(/\[\d*\]$/, "").strip
|
1832
2217
|
end
|
2218
|
+
@title.gsub!(/\n/, " ")
|
2219
|
+
@title.strip!
|
1833
2220
|
@title = nil if @title == ""
|
1834
2221
|
end
|
1835
2222
|
return @title
|
@@ -1843,53 +2230,54 @@ module FeedTools
|
|
1843
2230
|
# Returns the feed item description
|
1844
2231
|
def description
|
1845
2232
|
if @description.nil?
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
XPath.first(
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
@description =
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1891
|
-
|
1892
|
-
|
2233
|
+
repair_entities = false
|
2234
|
+
description_node = XPath.first(root_node, "description")
|
2235
|
+
if description_node.nil?
|
2236
|
+
description_node = XPath.first(root_node, "xhtml:body")
|
2237
|
+
end
|
2238
|
+
if description_node.nil?
|
2239
|
+
description_node = XPath.first(root_node, "body")
|
2240
|
+
end
|
2241
|
+
if description_node.nil?
|
2242
|
+
description_node = XPath.first(root_node, "tagline")
|
2243
|
+
end
|
2244
|
+
if description_node.nil?
|
2245
|
+
description_node = XPath.first(root_node, "subtitle")
|
2246
|
+
end
|
2247
|
+
if description_node.nil?
|
2248
|
+
description_node = XPath.first(root_node, "summary")
|
2249
|
+
end
|
2250
|
+
if description_node.nil?
|
2251
|
+
description_node = XPath.first(root_node, "abstract")
|
2252
|
+
end
|
2253
|
+
if description_node.nil?
|
2254
|
+
description_node = XPath.first(root_node, "content:encoded")
|
2255
|
+
end
|
2256
|
+
if description_node.nil?
|
2257
|
+
description_node = XPath.first(root_node, "content")
|
2258
|
+
end
|
2259
|
+
if description_node.nil?
|
2260
|
+
description_node = XPath.first(root_node, "info")
|
2261
|
+
@bozo = true unless description_node.nil?
|
2262
|
+
end
|
2263
|
+
unless description_node.nil?
|
2264
|
+
if XPath.first(description_node, "@encoding").to_s != ""
|
2265
|
+
@description =
|
2266
|
+
"[Embedded data objects are not currently supported.]"
|
2267
|
+
elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
|
2268
|
+
XPath.first(description_node, "@mode").to_s == "xhtml" ||
|
2269
|
+
XPath.first(description_node, "@type").to_s == "xml" ||
|
2270
|
+
XPath.first(description_node, "@mode").to_s == "xml" ||
|
2271
|
+
XPath.first(description_node, "@type").to_s ==
|
2272
|
+
"application/xhtml+xml"
|
2273
|
+
@description = description_node.inner_xml
|
2274
|
+
elsif XPath.first(description_node, "@type").to_s == "escaped" ||
|
2275
|
+
XPath.first(description_node, "@mode").to_s == "escaped"
|
2276
|
+
@description = FeedTools.unescape_entities(
|
2277
|
+
description_node.inner_xml)
|
2278
|
+
else
|
2279
|
+
@description = description_node.inner_xml
|
2280
|
+
repair_entities = true
|
1893
2281
|
end
|
1894
2282
|
end
|
1895
2283
|
if @description == ""
|
@@ -1900,20 +2288,13 @@ module FeedTools
|
|
1900
2288
|
@description = self.itunes_subtitle
|
1901
2289
|
@description = "" if @description.nil?
|
1902
2290
|
end
|
1903
|
-
if @description == ""
|
1904
|
-
@description = self.media_text
|
1905
|
-
@description = "" if @description.nil?
|
1906
|
-
end
|
1907
2291
|
|
1908
2292
|
unless @description.nil?
|
1909
|
-
@description = FeedTools.sanitize_html(@description)
|
2293
|
+
@description = FeedTools.sanitize_html(@description, :strip)
|
2294
|
+
@description = FeedTools.unescape_entities(@description) if repair_entities
|
2295
|
+
@description = FeedTools.tidy_html(@description)
|
1910
2296
|
end
|
1911
2297
|
|
1912
|
-
# If it started with a bunch of divs, hack them right off. We can put
|
1913
|
-
# them back later if they're needed.
|
1914
|
-
@description.gsub!(/^(<div[^>]*>)*/, "")
|
1915
|
-
@description.gsub!(/(<\/div>)*$/, "")
|
1916
|
-
|
1917
2298
|
@description.gsub!(/\n/, " ") if @description.size < 80
|
1918
2299
|
@description = @description.strip unless @description.nil?
|
1919
2300
|
@description = nil if @description == ""
|
@@ -1925,6 +2306,66 @@ module FeedTools
|
|
1925
2306
|
def description=(new_description)
|
1926
2307
|
@description = new_description
|
1927
2308
|
end
|
2309
|
+
|
2310
|
+
# Returns the contents of the itunes:summary element
|
2311
|
+
def itunes_summary
|
2312
|
+
if @itunes_summary.nil?
|
2313
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
2314
|
+
"itunes:summary/text()").to_s)
|
2315
|
+
if @itunes_summary == ""
|
2316
|
+
@itunes_summary = nil
|
2317
|
+
end
|
2318
|
+
unless @itunes_summary.nil?
|
2319
|
+
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
|
2320
|
+
end
|
2321
|
+
end
|
2322
|
+
return @itunes_summary
|
2323
|
+
end
|
2324
|
+
|
2325
|
+
# Sets the contents of the itunes:summary element
|
2326
|
+
def itunes_summary=(new_itunes_summary)
|
2327
|
+
@itunes_summary = new_itunes_summary
|
2328
|
+
end
|
2329
|
+
|
2330
|
+
# Returns the contents of the itunes:subtitle element
|
2331
|
+
def itunes_subtitle
|
2332
|
+
if @itunes_subtitle.nil?
|
2333
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
2334
|
+
"itunes:subtitle/text()").to_s)
|
2335
|
+
if @itunes_subtitle == ""
|
2336
|
+
@itunes_subtitle = nil
|
2337
|
+
end
|
2338
|
+
unless @itunes_subtitle.nil?
|
2339
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
2340
|
+
end
|
2341
|
+
end
|
2342
|
+
return @itunes_subtitle
|
2343
|
+
end
|
2344
|
+
|
2345
|
+
# Sets the contents of the itunes:subtitle element
|
2346
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
2347
|
+
@itunes_subtitle = new_itunes_subtitle
|
2348
|
+
end
|
2349
|
+
|
2350
|
+
# Returns the contents of the media:text element
|
2351
|
+
def media_text
|
2352
|
+
if @media_text.nil?
|
2353
|
+
@media_text = FeedTools.unescape_entities(XPath.first(root_node,
|
2354
|
+
"itunes:subtitle/text()").to_s)
|
2355
|
+
if @media_text == ""
|
2356
|
+
@media_text = nil
|
2357
|
+
end
|
2358
|
+
unless @media_text.nil?
|
2359
|
+
@media_text = FeedTools.sanitize_html(@media_text)
|
2360
|
+
end
|
2361
|
+
end
|
2362
|
+
return @media_text
|
2363
|
+
end
|
2364
|
+
|
2365
|
+
# Sets the contents of the media:text element
|
2366
|
+
def media_text=(new_media_text)
|
2367
|
+
@media_text = new_media_text
|
2368
|
+
end
|
1928
2369
|
|
1929
2370
|
# Returns the feed item link
|
1930
2371
|
def link
|
@@ -1948,7 +2389,7 @@ module FeedTools
|
|
1948
2389
|
end
|
1949
2390
|
end
|
1950
2391
|
if @link != ""
|
1951
|
-
@link =
|
2392
|
+
@link = FeedTools.unescape_entities(@link)
|
1952
2393
|
end
|
1953
2394
|
if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
|
1954
2395
|
if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
|
@@ -2084,23 +2525,6 @@ module FeedTools
|
|
2084
2525
|
@media_thumbnail_link = new_media_thumbnail_link
|
2085
2526
|
end
|
2086
2527
|
|
2087
|
-
# Returns the feed items's unique id
|
2088
|
-
def id
|
2089
|
-
if @id.nil?
|
2090
|
-
@id = XPath.first(root_node, "id/text()").to_s
|
2091
|
-
if @id == ""
|
2092
|
-
@id = XPath.first(root_node, "guid/text()").to_s
|
2093
|
-
end
|
2094
|
-
@id = nil if @id == ""
|
2095
|
-
end
|
2096
|
-
return @id
|
2097
|
-
end
|
2098
|
-
|
2099
|
-
# Sets the feed item's unique id
|
2100
|
-
def id=(new_id)
|
2101
|
-
@id = new_id
|
2102
|
-
end
|
2103
|
-
|
2104
2528
|
# Returns all feed item enclosures
|
2105
2529
|
def enclosures
|
2106
2530
|
if @enclosures.nil?
|
@@ -2116,7 +2540,7 @@ module FeedTools
|
|
2116
2540
|
# sometimes these also manage to show up in atom files.
|
2117
2541
|
for enclosure_node in rss_enclosures
|
2118
2542
|
enclosure = Enclosure.new
|
2119
|
-
enclosure.url =
|
2543
|
+
enclosure.url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
|
2120
2544
|
enclosure.type = enclosure_node.attributes["type"].to_s
|
2121
2545
|
enclosure.file_size = enclosure_node.attributes["length"].to_i
|
2122
2546
|
enclosure.credits = []
|
@@ -2127,7 +2551,7 @@ module FeedTools
|
|
2127
2551
|
# Parse atom-type enclosures. If there are repeats of the same enclosure object,
|
2128
2552
|
# we merge the two together.
|
2129
2553
|
for enclosure_node in atom_enclosures
|
2130
|
-
enclosure_url =
|
2554
|
+
enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["href"].to_s)
|
2131
2555
|
enclosure = nil
|
2132
2556
|
new_enclosure = false
|
2133
2557
|
for existing_enclosure in @enclosures
|
@@ -2156,7 +2580,7 @@ module FeedTools
|
|
2156
2580
|
parse_media_content = lambda do |media_content_nodes|
|
2157
2581
|
affected_enclosures = []
|
2158
2582
|
for enclosure_node in media_content_nodes
|
2159
|
-
enclosure_url =
|
2583
|
+
enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
|
2160
2584
|
enclosure = nil
|
2161
2585
|
new_enclosure = false
|
2162
2586
|
for existing_enclosure in @enclosures
|
@@ -2182,9 +2606,9 @@ module FeedTools
|
|
2182
2606
|
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
|
2183
2607
|
if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
|
2184
2608
|
enclosure.thumbnail = EnclosureThumbnail.new(
|
2185
|
-
|
2186
|
-
|
2187
|
-
|
2609
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
|
2610
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
|
2611
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
|
2188
2612
|
)
|
2189
2613
|
if enclosure.thumbnail.height == ""
|
2190
2614
|
enclosure.thumbnail.height = nil
|
@@ -2196,9 +2620,9 @@ module FeedTools
|
|
2196
2620
|
enclosure.categories = []
|
2197
2621
|
for category in XPath.match(enclosure_node, "media:category")
|
2198
2622
|
enclosure.categories << EnclosureCategory.new(
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2623
|
+
FeedTools.unescape_entities(category.text),
|
2624
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s),
|
2625
|
+
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2202
2626
|
)
|
2203
2627
|
if enclosure.categories.last.scheme == ""
|
2204
2628
|
enclosure.categories.last.scheme = nil
|
@@ -2209,16 +2633,16 @@ module FeedTools
|
|
2209
2633
|
end
|
2210
2634
|
if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
|
2211
2635
|
enclosure.hash = EnclosureHash.new(
|
2212
|
-
FeedTools.sanitize_html(
|
2636
|
+
FeedTools.sanitize_html(FeedTools.unescape_entities(XPath.first(
|
2213
2637
|
enclosure_node, "media:hash/text()").to_s), :strip),
|
2214
2638
|
"md5"
|
2215
2639
|
)
|
2216
2640
|
end
|
2217
2641
|
if XPath.first(enclosure_node, "media:player/@url").to_s != ""
|
2218
2642
|
enclosure.player = EnclosurePlayer.new(
|
2219
|
-
|
2220
|
-
|
2221
|
-
|
2643
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@url").to_s),
|
2644
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@height").to_s),
|
2645
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@width").to_s)
|
2222
2646
|
)
|
2223
2647
|
if enclosure.player.height == ""
|
2224
2648
|
enclosure.player.height = nil
|
@@ -2230,8 +2654,8 @@ module FeedTools
|
|
2230
2654
|
enclosure.credits = []
|
2231
2655
|
for credit in XPath.match(enclosure_node, "media:credit")
|
2232
2656
|
enclosure.credits << EnclosureCredit.new(
|
2233
|
-
|
2234
|
-
|
2657
|
+
FeedTools.unescape_entities(credit.text),
|
2658
|
+
FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
|
2235
2659
|
)
|
2236
2660
|
if enclosure.credits.last.role == ""
|
2237
2661
|
enclosure.credits.last.role = nil
|
@@ -2240,7 +2664,7 @@ module FeedTools
|
|
2240
2664
|
enclosure.explicit = (XPath.first(enclosure_node,
|
2241
2665
|
"media:adult/text()").to_s.downcase == "true")
|
2242
2666
|
if XPath.first(enclosure_node, "media:text/text()").to_s != ""
|
2243
|
-
enclosure.text =
|
2667
|
+
enclosure.text = FeedTools.unescape_entities(XPath.first(enclosure_node,
|
2244
2668
|
"media:text/text()").to_s)
|
2245
2669
|
end
|
2246
2670
|
affected_enclosures << enclosure
|
@@ -2271,11 +2695,11 @@ module FeedTools
|
|
2271
2695
|
if enclosure.thumbnail.nil? &&
|
2272
2696
|
XPath.first(media_group, "media:thumbnail/@url").to_s != ""
|
2273
2697
|
enclosure.thumbnail = EnclosureThumbnail.new(
|
2274
|
-
|
2698
|
+
FeedTools.unescape_entities(
|
2275
2699
|
XPath.first(media_group, "media:thumbnail/@url").to_s),
|
2276
|
-
|
2700
|
+
FeedTools.unescape_entities(
|
2277
2701
|
XPath.first(media_group, "media:thumbnail/@height").to_s),
|
2278
|
-
|
2702
|
+
FeedTools.unescape_entities(
|
2279
2703
|
XPath.first(media_group, "media:thumbnail/@width").to_s)
|
2280
2704
|
)
|
2281
2705
|
if enclosure.thumbnail.height == ""
|
@@ -2289,9 +2713,9 @@ module FeedTools
|
|
2289
2713
|
enclosure.categories = []
|
2290
2714
|
for category in XPath.match(media_group, "media:category")
|
2291
2715
|
enclosure.categories << EnclosureCategory.new(
|
2292
|
-
|
2293
|
-
|
2294
|
-
|
2716
|
+
FeedTools.unescape_entities(category.text),
|
2717
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s),
|
2718
|
+
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2295
2719
|
)
|
2296
2720
|
if enclosure.categories.last.scheme == ""
|
2297
2721
|
enclosure.categories.last.scheme = nil
|
@@ -2304,16 +2728,16 @@ module FeedTools
|
|
2304
2728
|
if enclosure.hash.nil? &&
|
2305
2729
|
XPath.first(media_group, "media:hash/text()").to_s != ""
|
2306
2730
|
enclosure.hash = EnclosureHash.new(
|
2307
|
-
|
2731
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:hash/text()").to_s),
|
2308
2732
|
"md5"
|
2309
2733
|
)
|
2310
2734
|
end
|
2311
2735
|
if enclosure.player.nil? &&
|
2312
2736
|
XPath.first(media_group, "media:player/@url").to_s != ""
|
2313
2737
|
enclosure.player = EnclosurePlayer.new(
|
2314
|
-
|
2315
|
-
|
2316
|
-
|
2738
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:player/@url").to_s),
|
2739
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:player/@height").to_s),
|
2740
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:player/@width").to_s)
|
2317
2741
|
)
|
2318
2742
|
if enclosure.player.height == ""
|
2319
2743
|
enclosure.player.height = nil
|
@@ -2326,8 +2750,8 @@ module FeedTools
|
|
2326
2750
|
enclosure.credits = []
|
2327
2751
|
for credit in XPath.match(media_group, "media:credit")
|
2328
2752
|
enclosure.credits << EnclosureCredit.new(
|
2329
|
-
|
2330
|
-
|
2753
|
+
FeedTools.unescape_entities(credit.text),
|
2754
|
+
FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
|
2331
2755
|
)
|
2332
2756
|
if enclosure.credits.last.role == ""
|
2333
2757
|
enclosure.credits.last.role = nil
|
@@ -2340,7 +2764,7 @@ module FeedTools
|
|
2340
2764
|
end
|
2341
2765
|
if enclosure.text.nil? &&
|
2342
2766
|
XPath.first(media_group, "media:text/text()").to_s != ""
|
2343
|
-
enclosure.text = FeedTools.sanitize_html(
|
2767
|
+
enclosure.text = FeedTools.sanitize_html(FeedTools.unescape_entities(
|
2344
2768
|
XPath.first(media_group, "media:text/text()").to_s), :strip)
|
2345
2769
|
end
|
2346
2770
|
end
|
@@ -2373,9 +2797,9 @@ module FeedTools
|
|
2373
2797
|
enclosure.categories = []
|
2374
2798
|
end
|
2375
2799
|
enclosure.categories << EnclosureCategory.new(
|
2376
|
-
|
2377
|
-
|
2378
|
-
|
2800
|
+
FeedTools.unescape_entities(category_path),
|
2801
|
+
FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
|
2802
|
+
FeedTools.unescape_entities("iTunes Music Store Categories")
|
2379
2803
|
)
|
2380
2804
|
end
|
2381
2805
|
end
|
@@ -2464,136 +2888,140 @@ module FeedTools
|
|
2464
2888
|
def enclosures=(new_enclosures)
|
2465
2889
|
@enclosures = new_enclosures
|
2466
2890
|
end
|
2467
|
-
|
2468
|
-
# Returns the feed item author
|
2469
|
-
def author_name
|
2470
|
-
# TODO: make this not suck, actually ensure we're looking at a name
|
2471
|
-
# and not an email address.
|
2472
|
-
# Also, factor in itunes module.
|
2473
|
-
# =================================================================
|
2474
|
-
if @author_name.nil?
|
2475
|
-
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/name/text()").to_s)
|
2476
|
-
if @author_name == ""
|
2477
|
-
@author_name = CGI.unescapeHTML(XPath.first(root_node, "dc:creator/text()").to_s)
|
2478
|
-
end
|
2479
|
-
if @author_name == ""
|
2480
|
-
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/text()").to_s)
|
2481
|
-
end
|
2482
|
-
end
|
2483
|
-
return @author_name
|
2484
|
-
end
|
2485
2891
|
|
2486
|
-
#
|
2487
|
-
def
|
2488
|
-
@
|
2489
|
-
|
2490
|
-
|
2491
|
-
|
2492
|
-
|
2493
|
-
|
2494
|
-
|
2495
|
-
|
2496
|
-
|
2497
|
-
|
2498
|
-
|
2499
|
-
|
2500
|
-
|
2892
|
+
# Returns the feed item author
|
2893
|
+
def author
|
2894
|
+
if @author.nil?
|
2895
|
+
@author = FeedTools::Feed::Author.new
|
2896
|
+
|
2897
|
+
# Set the author name
|
2898
|
+
@author.name = FeedTools.unescape_entities(
|
2899
|
+
XPath.first(root_node, "author/name/text()").to_s)
|
2900
|
+
|
2901
|
+
@author.raw = FeedTools.unescape_entities(
|
2902
|
+
XPath.first(root_node, "author/text()").to_s)
|
2903
|
+
if @author.raw == ""
|
2904
|
+
@author.raw = FeedTools.unescape_entities(
|
2905
|
+
XPath.first(root_node, "dc:creator/text()").to_s)
|
2906
|
+
end
|
2907
|
+
if @author.raw == ""
|
2908
|
+
@author.raw = FeedTools.unescape_entities(
|
2909
|
+
XPath.first(root_node, "dc:author/text()").to_s)
|
2910
|
+
end
|
2911
|
+
unless @author.raw == ""
|
2912
|
+
raw_scan = @author.raw.scan(
|
2913
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
2914
|
+
if raw_scan.nil? || raw_scan.size == 0
|
2915
|
+
raw_scan = @author.raw.scan(
|
2916
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
2917
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
2918
|
+
else
|
2919
|
+
author_raw_pair = raw_scan.first
|
2920
|
+
end
|
2921
|
+
if raw_scan.nil? || raw_scan.size == 0
|
2922
|
+
email_scan = @author.raw.scan(
|
2923
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
2924
|
+
if email_scan != nil && email_scan.size > 0
|
2925
|
+
@author.email = email_scan.first.strip
|
2926
|
+
end
|
2927
|
+
end
|
2928
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
2929
|
+
@author.name = author_raw_pair.first.strip
|
2930
|
+
@author.email = author_raw_pair.last.strip
|
2931
|
+
else
|
2932
|
+
unless @author.raw.include?("@")
|
2933
|
+
# We can be reasonably sure we are looking at something
|
2934
|
+
# that the creator didn't intend to contain an email address if
|
2935
|
+
# it got through the preceeding regexes and it doesn't
|
2936
|
+
# contain the tell-tale '@' symbol.
|
2937
|
+
@author.name = @author.raw
|
2938
|
+
end
|
2939
|
+
end
|
2501
2940
|
end
|
2502
|
-
end
|
2503
|
-
return @itunes_summary
|
2504
|
-
end
|
2505
2941
|
|
2506
|
-
|
2507
|
-
|
2508
|
-
@itunes_summary = new_itunes_summary
|
2509
|
-
end
|
2942
|
+
@author.name = nil if @author.name == ""
|
2943
|
+
@author.raw = nil if @author.raw == ""
|
2510
2944
|
|
2511
|
-
|
2512
|
-
|
2513
|
-
|
2514
|
-
|
2515
|
-
"itunes:subtitle/text()").to_s)
|
2516
|
-
if @itunes_subtitle == ""
|
2517
|
-
@itunes_subtitle = nil
|
2945
|
+
# Set the author email
|
2946
|
+
if @author.email == ""
|
2947
|
+
@author.email = FeedTools.unescape_entities(
|
2948
|
+
XPath.first(root_node, "author/email/text()").to_s)
|
2518
2949
|
end
|
2519
|
-
|
2520
|
-
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
2521
|
-
end
|
2522
|
-
end
|
2523
|
-
return @itunes_subtitle
|
2524
|
-
end
|
2950
|
+
@author.email = nil if @author.email == ""
|
2525
2951
|
|
2526
|
-
|
2527
|
-
|
2528
|
-
|
2529
|
-
|
2952
|
+
# Set the author url
|
2953
|
+
@author.url = FeedTools.unescape_entities(
|
2954
|
+
XPath.first(root_node, "author/url/text()").to_s)
|
2955
|
+
@author.url = nil if @author.url == ""
|
2530
2956
|
|
2531
|
-
|
2532
|
-
|
2533
|
-
|
2534
|
-
|
2535
|
-
|
2536
|
-
if @media_text == ""
|
2537
|
-
@media_text = nil
|
2538
|
-
end
|
2539
|
-
unless @media_text.nil?
|
2540
|
-
@media_text = FeedTools.sanitize_html(@media_text)
|
2957
|
+
# Fallback on the itunes module if we didn't find an author name
|
2958
|
+
begin
|
2959
|
+
@author.name = self.itunes_author if @author.name.nil?
|
2960
|
+
rescue
|
2961
|
+
@author.name = nil
|
2541
2962
|
end
|
2542
2963
|
end
|
2543
|
-
return @
|
2964
|
+
return @author
|
2544
2965
|
end
|
2545
|
-
|
2546
|
-
# Sets the
|
2547
|
-
def
|
2548
|
-
|
2966
|
+
|
2967
|
+
# Sets the feed item author
|
2968
|
+
def author=(new_author)
|
2969
|
+
if new_author.respond_to?(:name) &&
|
2970
|
+
new_author.respond_to?(:email) &&
|
2971
|
+
new_author.respond_to?(:url)
|
2972
|
+
# It's a complete author object, just set it.
|
2973
|
+
@author = new_author
|
2974
|
+
else
|
2975
|
+
# We're not looking at an author object, this is probably a string,
|
2976
|
+
# default to setting the author's name.
|
2977
|
+
if @author.nil?
|
2978
|
+
@author = FeedTools::Feed::Author.new
|
2979
|
+
end
|
2980
|
+
@author.name = new_author
|
2981
|
+
end
|
2549
2982
|
end
|
2550
2983
|
|
2551
2984
|
# Returns the contents of the itunes:author element
|
2552
2985
|
#
|
2553
2986
|
# This inherits from any incorrectly placed channel-level itunes:author
|
2554
|
-
# elements. They're actually amazingly
|
2987
|
+
# elements. They're actually amazingly common. People don't read specs.
|
2555
2988
|
def itunes_author
|
2556
2989
|
if @itunes_author.nil?
|
2557
|
-
@itunes_author =
|
2990
|
+
@itunes_author = FeedTools.unescape_entities(XPath.first(root_node,
|
2558
2991
|
"itunes:author/text()").to_s)
|
2559
|
-
if @itunes_author == ""
|
2560
|
-
|
2561
|
-
"itunes:author/text()").to_s)
|
2562
|
-
end
|
2563
|
-
if @itunes_author == ""
|
2564
|
-
@itunes_author = nil
|
2565
|
-
end
|
2992
|
+
@itunes_author = feed.itunes_author if @itunes_author == ""
|
2993
|
+
@itunes_author = nil if @itunes_author == ""
|
2566
2994
|
end
|
2567
2995
|
return @itunes_author
|
2568
2996
|
end
|
2569
|
-
|
2997
|
+
|
2570
2998
|
# Sets the contents of the itunes:author element
|
2571
2999
|
def itunes_author=(new_itunes_author)
|
2572
3000
|
@itunes_author = new_itunes_author
|
2573
|
-
end
|
2574
|
-
|
3001
|
+
end
|
3002
|
+
|
2575
3003
|
# Returns the number of seconds that the associated media runs for
|
2576
|
-
def
|
2577
|
-
if @
|
2578
|
-
|
3004
|
+
def itunes_duration
|
3005
|
+
if @itunes_duration.nil?
|
3006
|
+
raw_duration = FeedTools.unescape_entities(XPath.first(root_node,
|
2579
3007
|
"itunes:duration/text()").to_s)
|
2580
|
-
if
|
2581
|
-
hms =
|
3008
|
+
if raw_duration != ""
|
3009
|
+
hms = raw_duration.split(":").map { |x| x.to_i }
|
2582
3010
|
if hms.size == 3
|
2583
|
-
@
|
3011
|
+
@itunes_duration = hms[0].hour + hms[1].minute + hms[2]
|
2584
3012
|
elsif hms.size == 2
|
2585
|
-
@
|
3013
|
+
@itunes_duration = hms[0].minute + hms[1]
|
2586
3014
|
elsif hms.size == 1
|
2587
|
-
@
|
3015
|
+
@itunes_duration = hms[0]
|
2588
3016
|
end
|
2589
3017
|
end
|
2590
3018
|
end
|
2591
|
-
return @
|
3019
|
+
return @itunes_duration
|
2592
3020
|
end
|
2593
3021
|
|
2594
3022
|
# Sets the number of seconds that the associate media runs for
|
2595
|
-
def
|
2596
|
-
@
|
3023
|
+
def itunes_duration=(new_itunes_duration)
|
3024
|
+
@itunes_duration = new_itunes_duration
|
2597
3025
|
end
|
2598
3026
|
|
2599
3027
|
# Sets the itunes:summary
|
@@ -2722,7 +3150,8 @@ module FeedTools
|
|
2722
3150
|
end
|
2723
3151
|
|
2724
3152
|
# Generates xml based on the content of the feed item
|
2725
|
-
def build_xml(feed_type="rss", version=0.0,
|
3153
|
+
def build_xml(feed_type=(self.feed.feed_type or "rss"), version=0.0,
|
3154
|
+
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
2726
3155
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
2727
3156
|
# RDF-based rss format
|
2728
3157
|
if link.nil?
|
@@ -2831,9 +3260,9 @@ module FeedTools
|
|
2831
3260
|
end
|
2832
3261
|
end
|
2833
3262
|
|
2834
|
-
module REXML
|
2835
|
-
class Element
|
2836
|
-
def inner_xml
|
3263
|
+
module REXML # :nodoc:
|
3264
|
+
class Element # :nodoc:
|
3265
|
+
def inner_xml # :nodoc:
|
2837
3266
|
result = ""
|
2838
3267
|
self.each_child do |child|
|
2839
3268
|
result << child.to_s
|
@@ -2848,4 +3277,4 @@ begin
|
|
2848
3277
|
FeedTools.feed_cache.initialize_cache
|
2849
3278
|
end
|
2850
3279
|
rescue
|
2851
|
-
end
|
3280
|
+
end
|