feedtools 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +8 -0
- data/lib/feed_tools.rb +730 -301
- data/rakefile +1 -1
- data/test/amp_test.rb +475 -0
- data/test/atom_test.rb +38 -0
- data/test/cache_test.rb +23 -0
- data/test/helper_test.rb +29 -0
- data/test/rss_test.rb +99 -5
- metadata +6 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
== FeedTools 0.2.2
|
2
|
+
* fixed http redirection bug
|
3
|
+
* fixed several documentation typos
|
4
|
+
* still more unit tests
|
5
|
+
* improved support for atom
|
6
|
+
* minor improvements to the database caching mechanism
|
7
|
+
* more complete support for rss elements
|
8
|
+
* major improvements to the handling of tags containing html content
|
1
9
|
== FeedTools 0.2.1
|
2
10
|
* fixed incorrect dependancy on ActiveRecord 1.10.1
|
3
11
|
* more unit tests
|
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.2"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -131,6 +131,19 @@ module FeedTools
|
|
131
131
|
end
|
132
132
|
return nil
|
133
133
|
end
|
134
|
+
|
135
|
+
# Returns true if a connection to the database has been established and the
|
136
|
+
# required table structure is in place.
|
137
|
+
def DatabaseFeedCache.connected?
|
138
|
+
begin
|
139
|
+
ActiveRecord::Base.connection
|
140
|
+
return false if ActiveRecord::Base.configurations.nil?
|
141
|
+
return false unless DatabaseFeedCache.table_exists?
|
142
|
+
rescue => error
|
143
|
+
return false
|
144
|
+
end
|
145
|
+
return true
|
146
|
+
end
|
134
147
|
|
135
148
|
# True if the appropriate database table already exists
|
136
149
|
def DatabaseFeedCache.table_exists?
|
@@ -258,12 +271,25 @@ module FeedTools
|
|
258
271
|
# find_by_id
|
259
272
|
# find_by_url
|
260
273
|
# initialize_cache
|
274
|
+
# connected?
|
261
275
|
def FeedTools.feed_cache=(new_feed_cache)
|
262
276
|
# TODO: ensure that the feed cache class actually does those things.
|
263
277
|
# ==================================================================
|
264
278
|
@feed_cache = new_feed_cache
|
265
279
|
end
|
266
280
|
|
281
|
+
# Returns true if FeedTools.feed_cache is not nil and a connection with
|
282
|
+
# the cache has been successfully established. Also returns false if an
|
283
|
+
# error is raised while trying to determine the status of the cache.
|
284
|
+
def FeedTools.feed_cache_connected?
|
285
|
+
begin
|
286
|
+
return false if FeedTools.feed_cache.nil?
|
287
|
+
return FeedTools.feed_cache.connected?
|
288
|
+
rescue
|
289
|
+
return false
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
267
293
|
# Returns the currently used user agent string.
|
268
294
|
def FeedTools.user_agent
|
269
295
|
return @user_agent
|
@@ -455,6 +481,25 @@ module FeedTools
|
|
455
481
|
return true
|
456
482
|
end
|
457
483
|
|
484
|
+
# Escapes all html entities
|
485
|
+
def FeedTools.escape_entities(html)
|
486
|
+
escaped_html = CGI.escapeHTML(html)
|
487
|
+
unescaped_html.gsub!(/'/, "'")
|
488
|
+
unescaped_html.gsub!(/"/, """)
|
489
|
+
return escaped_html
|
490
|
+
end
|
491
|
+
|
492
|
+
# Unescapes all html entities
|
493
|
+
def FeedTools.unescape_entities(html)
|
494
|
+
unescaped_html = html
|
495
|
+
unescaped_html.gsub!(/&/, "&")
|
496
|
+
unescaped_html.gsub!(/&/, "&")
|
497
|
+
unescaped_html = CGI.unescapeHTML(unescaped_html)
|
498
|
+
unescaped_html.gsub!(/'/, "'")
|
499
|
+
unescaped_html.gsub!(/"/, "\"")
|
500
|
+
return unescaped_html
|
501
|
+
end
|
502
|
+
|
458
503
|
# Removes all html tags from the html formatted text.
|
459
504
|
def FeedTools.strip_html(html)
|
460
505
|
# TODO: do this properly
|
@@ -467,6 +512,7 @@ module FeedTools
|
|
467
512
|
def FeedTools.tidy_html(html)
|
468
513
|
if FeedTools.tidy_enabled?
|
469
514
|
is_fragment = true
|
515
|
+
html.gsub!(/<!'/, "&lt;!'")
|
470
516
|
if (html.strip =~ /<html>(.|\n)*<body>/) != nil ||
|
471
517
|
(html.strip =~ /<\/body>(.|\n)*<\/html>$/) != nil
|
472
518
|
is_fragment = false
|
@@ -484,13 +530,15 @@ module FeedTools
|
|
484
530
|
xml
|
485
531
|
end
|
486
532
|
if is_fragment
|
487
|
-
# Tidy
|
533
|
+
# Tidy sticks <html>...<body>[our html]</body>...</html> in.
|
488
534
|
# We don't want this.
|
489
535
|
tidy_html.strip!
|
490
536
|
tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
|
491
537
|
tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
|
492
538
|
tidy_html.strip!
|
493
539
|
end
|
540
|
+
tidy_html.gsub!(/&/, "&")
|
541
|
+
tidy_html.gsub!(/&/, "&")
|
494
542
|
else
|
495
543
|
tidy_html = html
|
496
544
|
end
|
@@ -502,7 +550,7 @@ module FeedTools
|
|
502
550
|
# be escaped. If mode is set to :strip, dangerous and unknown
|
503
551
|
# elements and all children will be removed entirely.
|
504
552
|
# Dangerous or unknown attributes are always removed.
|
505
|
-
def FeedTools.sanitize_html(html, mode=:
|
553
|
+
def FeedTools.sanitize_html(html, mode=:strip)
|
506
554
|
|
507
555
|
# Lists borrowed from Mark Pilgrim's feedparser
|
508
556
|
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
@@ -527,18 +575,14 @@ module FeedTools
|
|
527
575
|
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
528
576
|
'type', 'usemap', 'valign', 'value', 'vspace', 'width']
|
529
577
|
|
530
|
-
#
|
531
|
-
|
532
|
-
|
533
|
-
html.gsub!(
|
534
|
-
|
535
|
-
# The closer we are to proper xhtml, the more accurate the
|
536
|
-
# sanitization will be.
|
537
|
-
html = FeedTools.tidy_html(html)
|
538
|
-
|
578
|
+
# Replace with appropriate named entities
|
579
|
+
html.gsub!(/&/, "&")
|
580
|
+
html.gsub!(/&/, "&")
|
581
|
+
html.gsub!(/<!'/, "&lt;!'")
|
582
|
+
|
539
583
|
# Hackity hack. But it works, and it seems plenty fast enough.
|
540
584
|
html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
|
541
|
-
|
585
|
+
|
542
586
|
sanitize_node = lambda do |html_node|
|
543
587
|
if html_node.respond_to? :children
|
544
588
|
for child in html_node.children
|
@@ -564,13 +608,83 @@ module FeedTools
|
|
564
608
|
html_node
|
565
609
|
end
|
566
610
|
sanitize_node.call(html_doc.root)
|
567
|
-
|
611
|
+
html = html_doc.root.inner_xml
|
612
|
+
return html
|
568
613
|
end
|
569
614
|
|
570
615
|
class Feed
|
571
616
|
include REXML
|
572
617
|
include AttributeDictionary
|
573
618
|
|
619
|
+
# Represents a feed/feed item's category
|
620
|
+
class Category
|
621
|
+
# The category term value
|
622
|
+
attr_accessor :term
|
623
|
+
# The categorization scheme
|
624
|
+
attr_accessor :scheme
|
625
|
+
# A human-readable description of the category
|
626
|
+
attr_accessor :label
|
627
|
+
|
628
|
+
# Relays any unknown methods to the term so that you can treat the
|
629
|
+
# category object as a string
|
630
|
+
def method_missing(msg, *params)
|
631
|
+
self.term.send(msg, params)
|
632
|
+
end
|
633
|
+
|
634
|
+
# Relays the to_s method to the term field
|
635
|
+
def to_s
|
636
|
+
self.term.to_s
|
637
|
+
end
|
638
|
+
|
639
|
+
# Relays the inspect method to the term field
|
640
|
+
def inspect
|
641
|
+
self.term.inspect
|
642
|
+
end
|
643
|
+
end
|
644
|
+
|
645
|
+
# Represents a feed/feed item's author
|
646
|
+
class Author
|
647
|
+
# The author's real name
|
648
|
+
attr_accessor :name
|
649
|
+
# The author's email address
|
650
|
+
attr_accessor :email
|
651
|
+
# The url of the author's homepage
|
652
|
+
attr_accessor :url
|
653
|
+
# The raw value of the author tag if present
|
654
|
+
attr_accessor :raw
|
655
|
+
|
656
|
+
# Relays any unknown methods to the name so that you can treat the
|
657
|
+
# author object as a string
|
658
|
+
def method_missing(msg, *params)
|
659
|
+
self.name.send(msg, params)
|
660
|
+
end
|
661
|
+
|
662
|
+
# Relays the to_s method to the name field
|
663
|
+
def to_s
|
664
|
+
self.name.to_s
|
665
|
+
end
|
666
|
+
|
667
|
+
# Relays the inspect method to the name field
|
668
|
+
def inspect
|
669
|
+
self.name.inspect
|
670
|
+
end
|
671
|
+
end
|
672
|
+
|
673
|
+
# Represents a feed's cloud.
|
674
|
+
class Cloud
|
675
|
+
# The domain of the cloud.
|
676
|
+
attr_accessor :domain
|
677
|
+
# The path for the cloud.
|
678
|
+
attr_accessor :path
|
679
|
+
# The port the cloud is listening on.
|
680
|
+
attr_accessor :port
|
681
|
+
# The web services protocol the cloud uses.
|
682
|
+
# Possible values are either "xml-rpc" or "soap".
|
683
|
+
attr_accessor :protocol
|
684
|
+
# The procedure to use to request notification.
|
685
|
+
attr_accessor :register_procedure
|
686
|
+
end
|
687
|
+
|
574
688
|
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
|
575
689
|
def Feed.open(url)
|
576
690
|
# clean up the url
|
@@ -671,14 +785,14 @@ module FeedTools
|
|
671
785
|
|
672
786
|
Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
|
673
787
|
response = http.request_get(feed_uri.path, http_headers)
|
674
|
-
|
788
|
+
|
675
789
|
case response
|
676
790
|
when Net::HTTPSuccess
|
677
791
|
# We've reached the final destination, process all previous
|
678
792
|
# redirections, and see if we need to update the url.
|
679
793
|
for redirected_response in response_chain
|
680
794
|
if redirected_response.last.code.to_i == 301
|
681
|
-
self.url = redirected_response.
|
795
|
+
self.url = redirected_response.last['location']
|
682
796
|
else
|
683
797
|
# Jump out as soon as we hit anything that isn't a
|
684
798
|
# permanently moved redirection.
|
@@ -690,7 +804,7 @@ module FeedTools
|
|
690
804
|
if response.code.to_i == 304
|
691
805
|
response.error!
|
692
806
|
else
|
693
|
-
if response['
|
807
|
+
if response['location'].nil?
|
694
808
|
raise FeedAccessError,
|
695
809
|
"No location to redirect to supplied: " + response.code
|
696
810
|
end
|
@@ -913,6 +1027,30 @@ module FeedTools
|
|
913
1027
|
@cache_object = new_cache_object
|
914
1028
|
end
|
915
1029
|
|
1030
|
+
# Returns the type of feed
|
1031
|
+
# Possible values:
|
1032
|
+
# "rss", "atom", "cdf", "!okay/news"
|
1033
|
+
def feed_type
|
1034
|
+
if @feed_type.nil?
|
1035
|
+
case self.root_node.name.downcase
|
1036
|
+
when "feed"
|
1037
|
+
@feed_type = "atom"
|
1038
|
+
when "rdf:rdf"
|
1039
|
+
@feed_type = "rss"
|
1040
|
+
when "rss"
|
1041
|
+
@feed_type = "rss"
|
1042
|
+
when "channel"
|
1043
|
+
@feed_type = "cdf"
|
1044
|
+
end
|
1045
|
+
end
|
1046
|
+
return @feed_type
|
1047
|
+
end
|
1048
|
+
|
1049
|
+
# Sets the default feed type
|
1050
|
+
def feed_type=(new_feed_type)
|
1051
|
+
@feed_type = new_feed_type
|
1052
|
+
end
|
1053
|
+
|
916
1054
|
# Returns the feed's unique id
|
917
1055
|
def id
|
918
1056
|
if @id.nil?
|
@@ -948,24 +1086,30 @@ module FeedTools
|
|
948
1086
|
# Returns the feed title
|
949
1087
|
def title
|
950
1088
|
if @title.nil?
|
951
|
-
|
952
|
-
|
1089
|
+
repair_entities = false
|
1090
|
+
if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
|
1091
|
+
XPath.first(channel_node, "title/@mode").to_s == "xhtml" ||
|
1092
|
+
XPath.first(channel_node, "title/@type").to_s == "xml" ||
|
1093
|
+
XPath.first(channel_node, "title/@mode").to_s == "xml" ||
|
1094
|
+
XPath.first(channel_node, "title/@type").to_s ==
|
1095
|
+
"application/xhtml+xml"
|
953
1096
|
@title = XPath.first(channel_node, "title").inner_xml
|
954
1097
|
elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
|
955
1098
|
XPath.first(channel_node, "title/@mode").to_s == "escaped"
|
956
|
-
@title =
|
1099
|
+
@title = FeedTools.unescape_entities(
|
957
1100
|
XPath.first(channel_node, "title/text()").to_s)
|
958
1101
|
else
|
959
|
-
|
960
|
-
|
1102
|
+
title_node = XPath.first(channel_node, "title")
|
1103
|
+
@title = title_node.inner_xml
|
1104
|
+
repair_entities = true
|
961
1105
|
end
|
962
1106
|
unless @title.nil?
|
963
|
-
@title =
|
964
|
-
|
965
|
-
|
966
|
-
@title = FeedTools.strip_html(@title).strip
|
1107
|
+
@title = FeedTools.sanitize_html(@title, :strip)
|
1108
|
+
@title = FeedTools.unescape_entities(@title) if repair_entities
|
1109
|
+
@title = FeedTools.tidy_html(@title)
|
967
1110
|
end
|
968
1111
|
@title.gsub!(/\n/, " ")
|
1112
|
+
@title.strip!
|
969
1113
|
@title = nil if @title == ""
|
970
1114
|
self.cache_object.title = @title unless self.cache_object.nil?
|
971
1115
|
end
|
@@ -981,57 +1125,57 @@ module FeedTools
|
|
981
1125
|
# Returns the feed description
|
982
1126
|
def description
|
983
1127
|
if @description.nil?
|
984
|
-
|
985
|
-
|
986
|
-
if
|
987
|
-
|
988
|
-
|
1128
|
+
repair_entities = false
|
1129
|
+
description_node = XPath.first(channel_node, "description")
|
1130
|
+
if description_node.nil?
|
1131
|
+
description_node = XPath.first(channel_node, "tagline")
|
1132
|
+
end
|
1133
|
+
if description_node.nil?
|
1134
|
+
description_node = XPath.first(channel_node, "subtitle")
|
1135
|
+
end
|
1136
|
+
if description_node.nil?
|
1137
|
+
description_node = XPath.first(channel_node, "summary")
|
1138
|
+
end
|
1139
|
+
if description_node.nil?
|
1140
|
+
description_node = XPath.first(channel_node, "abstract")
|
1141
|
+
end
|
1142
|
+
if description_node.nil?
|
1143
|
+
description_node = XPath.first(channel_node, "info")
|
1144
|
+
end
|
1145
|
+
if description_node.nil?
|
1146
|
+
description_node = XPath.first(channel_node, "content:encoded")
|
1147
|
+
@bozo = true unless description_node.nil?
|
1148
|
+
end
|
1149
|
+
if description_node.nil?
|
1150
|
+
description_node = XPath.first(channel_node, "content")
|
1151
|
+
@bozo = true unless description_node.nil?
|
1152
|
+
end
|
1153
|
+
if description_node.nil?
|
1154
|
+
description_node = XPath.first(channel_node, "xhtml:body")
|
1155
|
+
@bozo = true unless description_node.nil?
|
1156
|
+
end
|
1157
|
+
if description_node.nil?
|
1158
|
+
description_node = XPath.first(channel_node, "body")
|
1159
|
+
@bozo = true unless description_node.nil?
|
1160
|
+
end
|
1161
|
+
unless description_node.nil?
|
1162
|
+
if XPath.first(description_node, "@encoding").to_s != ""
|
1163
|
+
@description =
|
1164
|
+
"[Embedded data objects are not currently supported.]"
|
1165
|
+
elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
|
1166
|
+
XPath.first(description_node, "@mode").to_s == "xhtml" ||
|
1167
|
+
XPath.first(description_node, "@type").to_s == "xml" ||
|
1168
|
+
XPath.first(description_node, "@mode").to_s == "xml" ||
|
1169
|
+
XPath.first(description_node, "@type").to_s ==
|
1170
|
+
"application/xhtml+xml"
|
1171
|
+
@description = description_node.inner_xml
|
1172
|
+
elsif XPath.first(description_node, "@type").to_s == "escaped" ||
|
1173
|
+
XPath.first(description_node, "@mode").to_s == "escaped"
|
1174
|
+
@description = FeedTools.unescape_entities(
|
1175
|
+
description_node.inner_xml)
|
989
1176
|
else
|
990
|
-
@description =
|
991
|
-
|
992
|
-
end
|
993
|
-
if @description == ""
|
994
|
-
@description = XPath.first(channel_node, "subtitle/text()").to_s
|
995
|
-
if @description != "" &&
|
996
|
-
XPath.first(channel_node, "subtitle/@mode").to_s == "escaped"
|
997
|
-
@description = CGI.unescapeHTML(description)
|
998
|
-
end
|
999
|
-
end
|
1000
|
-
if @description == ""
|
1001
|
-
@description = XPath.first(channel_node, "tagline/text()").to_s
|
1002
|
-
if @description != "" &&
|
1003
|
-
XPath.first(channel_node, "tagline/@mode").to_s == "escaped"
|
1004
|
-
@description = CGI.unescapeHTML(description)
|
1005
|
-
end
|
1006
|
-
end
|
1007
|
-
if @description == "" && XPath.first(channel_node, "tagline") == nil
|
1008
|
-
@description = XPath.first(channel_node, "info/text()").to_s
|
1009
|
-
if @description != "" &&
|
1010
|
-
XPath.first(channel_node, "info/@mode").to_s == "escaped"
|
1011
|
-
@description = CGI.unescapeHTML(description)
|
1012
|
-
end
|
1013
|
-
end
|
1014
|
-
if @description == ""
|
1015
|
-
@description = CGI.unescapeHTML(
|
1016
|
-
XPath.first(channel_node, "abstract/text()").to_s)
|
1017
|
-
end
|
1018
|
-
if @description == ""
|
1019
|
-
@description = CGI.unescapeHTML(
|
1020
|
-
XPath.first(channel_node, "summary/text()").to_s)
|
1021
|
-
end
|
1022
|
-
if @description == ""
|
1023
|
-
# I don't think this is valid for anyone to do, but this is probably
|
1024
|
-
# what they meant if they do it.
|
1025
|
-
@description = CGI.unescapeHTML(
|
1026
|
-
XPath.first(channel_node, "content:encoded/text()").to_s)
|
1027
|
-
if @description != ""
|
1028
|
-
@bozo = true
|
1029
|
-
end
|
1030
|
-
end
|
1031
|
-
if @description == ""
|
1032
|
-
begin
|
1033
|
-
@description = XPath.first(channel_node, "description").inner_xml
|
1034
|
-
rescue
|
1177
|
+
@description = description_node.inner_xml
|
1178
|
+
repair_entities = true
|
1035
1179
|
end
|
1036
1180
|
end
|
1037
1181
|
if @description == ""
|
@@ -1043,13 +1187,12 @@ module FeedTools
|
|
1043
1187
|
@description = "" if @description.nil?
|
1044
1188
|
end
|
1045
1189
|
|
1046
|
-
@description
|
1047
|
-
FeedTools.sanitize_html(@description)
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1190
|
+
unless @description.nil?
|
1191
|
+
@description = FeedTools.sanitize_html(@description, :strip)
|
1192
|
+
@description = FeedTools.unescape_entities(@description) if repair_entities
|
1193
|
+
@description = FeedTools.tidy_html(@description)
|
1194
|
+
end
|
1195
|
+
|
1053
1196
|
@description.gsub!(/\n/, " ") if @description.size < 80
|
1054
1197
|
@description = @description.strip unless @description.nil?
|
1055
1198
|
@description = nil if @description == ""
|
@@ -1065,7 +1208,7 @@ module FeedTools
|
|
1065
1208
|
# Returns the contents of the itunes:summary element
|
1066
1209
|
def itunes_summary
|
1067
1210
|
if @itunes_summary.nil?
|
1068
|
-
@itunes_summary =
|
1211
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
1069
1212
|
"itunes:summary/text()").to_s)
|
1070
1213
|
if @itunes_summary == ""
|
1071
1214
|
@itunes_summary = nil
|
@@ -1084,7 +1227,7 @@ module FeedTools
|
|
1084
1227
|
# Returns the contents of the itunes:subtitle element
|
1085
1228
|
def itunes_subtitle
|
1086
1229
|
if @itunes_subtitle.nil?
|
1087
|
-
@itunes_subtitle =
|
1230
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
1088
1231
|
"itunes:subtitle/text()").to_s)
|
1089
1232
|
if @itunes_subtitle == ""
|
1090
1233
|
@itunes_subtitle = nil
|
@@ -1211,6 +1354,197 @@ module FeedTools
|
|
1211
1354
|
return @icon_link
|
1212
1355
|
end
|
1213
1356
|
|
1357
|
+
# Returns the feed author
|
1358
|
+
def author
|
1359
|
+
if @author.nil?
|
1360
|
+
@author = FeedTools::Feed::Author.new
|
1361
|
+
|
1362
|
+
# Set the author name
|
1363
|
+
@author.name = FeedTools.unescape_entities(
|
1364
|
+
XPath.first(channel_node, "author/name/text()").to_s)
|
1365
|
+
|
1366
|
+
@author.raw = FeedTools.unescape_entities(
|
1367
|
+
XPath.first(channel_node, "author/text()").to_s)
|
1368
|
+
if @author.raw == ""
|
1369
|
+
@author.raw = FeedTools.unescape_entities(
|
1370
|
+
XPath.first(channel_node, "dc:creator/text()").to_s)
|
1371
|
+
end
|
1372
|
+
if @author.raw == ""
|
1373
|
+
@author.raw = FeedTools.unescape_entities(
|
1374
|
+
XPath.first(channel_node, "dc:author/text()").to_s)
|
1375
|
+
end
|
1376
|
+
unless @author.raw == ""
|
1377
|
+
raw_scan = @author.raw.scan(
|
1378
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1379
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1380
|
+
raw_scan = @author.raw.scan(
|
1381
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1382
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
1383
|
+
else
|
1384
|
+
author_raw_pair = raw_scan.first
|
1385
|
+
end
|
1386
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1387
|
+
email_scan = @author.raw.scan(
|
1388
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1389
|
+
if email_scan != nil && email_scan.size > 0
|
1390
|
+
@author.email = email_scan.first.strip
|
1391
|
+
end
|
1392
|
+
end
|
1393
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
1394
|
+
@author.name = author_raw_pair.first.strip
|
1395
|
+
@author.email = author_raw_pair.last.strip
|
1396
|
+
else
|
1397
|
+
unless @author.raw.include?("@")
|
1398
|
+
# We can be reasonably sure we are looking at something
|
1399
|
+
# that the creator didn't intend to contain an email address if
|
1400
|
+
# it got through the preceeding regexes and it doesn't
|
1401
|
+
# contain the tell-tale '@' symbol.
|
1402
|
+
@author.name = @author.raw
|
1403
|
+
end
|
1404
|
+
end
|
1405
|
+
end
|
1406
|
+
|
1407
|
+
@author.name = nil if @author.name == ""
|
1408
|
+
@author.raw = nil if @author.raw == ""
|
1409
|
+
|
1410
|
+
# Set the author email
|
1411
|
+
if @author.email == ""
|
1412
|
+
@author.email = FeedTools.unescape_entities(
|
1413
|
+
XPath.first(channel_node, "author/email/text()").to_s)
|
1414
|
+
end
|
1415
|
+
@author.email = nil if @author.email == ""
|
1416
|
+
|
1417
|
+
# Set the author url
|
1418
|
+
@author.url = FeedTools.unescape_entities(
|
1419
|
+
XPath.first(channel_node, "author/url/text()").to_s)
|
1420
|
+
@author.url = nil if @author.url == ""
|
1421
|
+
|
1422
|
+
# Fallback on the itunes module if we didn't find an author name
|
1423
|
+
begin
|
1424
|
+
@author.name = self.itunes_author if @author.name.nil?
|
1425
|
+
rescue
|
1426
|
+
@author.name = nil
|
1427
|
+
end
|
1428
|
+
end
|
1429
|
+
return @author
|
1430
|
+
end
|
1431
|
+
|
1432
|
+
# Sets the feed author
|
1433
|
+
def author=(new_author)
|
1434
|
+
if new_author.respond_to?(:name) &&
|
1435
|
+
new_author.respond_to?(:email) &&
|
1436
|
+
new_author.respond_to?(:url)
|
1437
|
+
# It's a complete author object, just set it.
|
1438
|
+
@author = new_author
|
1439
|
+
else
|
1440
|
+
# We're not looking at an author object, this is probably a string,
|
1441
|
+
# default to setting the author's name.
|
1442
|
+
if @author.nil?
|
1443
|
+
@author = FeedTools::Feed::Author.new
|
1444
|
+
end
|
1445
|
+
@author.name = new_author
|
1446
|
+
end
|
1447
|
+
end
|
1448
|
+
|
1449
|
+
# Returns the feed publisher
|
1450
|
+
def publisher
|
1451
|
+
if @publisher.nil?
|
1452
|
+
@publisher = FeedTools::Feed::Author.new
|
1453
|
+
|
1454
|
+
# Set the author name
|
1455
|
+
@publisher.raw = FeedTools.unescape_entities(
|
1456
|
+
XPath.first(channel_node, "dc:publisher/text()").to_s)
|
1457
|
+
unless @publisher.raw == ""
|
1458
|
+
raw_scan = @publisher.raw.scan(
|
1459
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1460
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1461
|
+
raw_scan = @publisher.raw.scan(
|
1462
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1463
|
+
unless raw_scan.size == 0
|
1464
|
+
publisher_raw_pair = raw_scan.first.reverse
|
1465
|
+
end
|
1466
|
+
else
|
1467
|
+
publisher_raw_pair = raw_scan.first
|
1468
|
+
end
|
1469
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1470
|
+
email_scan = @publisher.raw.scan(
|
1471
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1472
|
+
if email_scan != nil && email_scan.size > 0
|
1473
|
+
@publisher.email = email_scan.first.strip
|
1474
|
+
end
|
1475
|
+
end
|
1476
|
+
unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
|
1477
|
+
@publisher.name = publisher_raw_pair.first.strip
|
1478
|
+
@publisher.email = publisher_raw_pair.last.strip
|
1479
|
+
else
|
1480
|
+
unless @publisher.raw.include?("@")
|
1481
|
+
# We can be reasonably sure we are looking at something
|
1482
|
+
# that the creator didn't intend to contain an email address if
|
1483
|
+
# it got through the preceeding regexes and it doesn't
|
1484
|
+
# contain the tell-tale '@' symbol.
|
1485
|
+
@publisher.name = @publisher.raw
|
1486
|
+
end
|
1487
|
+
end
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
@publisher.name = nil if @publisher.name == ""
|
1491
|
+
@publisher.raw = nil if @publisher.raw == ""
|
1492
|
+
@publisher.email = nil if @publisher.email == ""
|
1493
|
+
@publisher.url = nil if @publisher.url == ""
|
1494
|
+
end
|
1495
|
+
return @publisher
|
1496
|
+
end
|
1497
|
+
|
1498
|
+
# Sets the feed publisher
|
1499
|
+
def publisher=(new_publisher)
|
1500
|
+
if new_publisher.respond_to?(:name) &&
|
1501
|
+
new_publisher.respond_to?(:email) &&
|
1502
|
+
new_publisher.respond_to?(:url)
|
1503
|
+
# It's a complete Author object, just set it.
|
1504
|
+
@publisher = new_publisher
|
1505
|
+
else
|
1506
|
+
# We're not looking at an Author object, this is probably a string,
|
1507
|
+
# default to setting the publisher's name.
|
1508
|
+
if @publisher.nil?
|
1509
|
+
@publisher = FeedTools::Feed::Author.new
|
1510
|
+
end
|
1511
|
+
@publisher.name = new_publisher
|
1512
|
+
end
|
1513
|
+
end
|
1514
|
+
|
1515
|
+
# Returns the contents of the itunes:author element
|
1516
|
+
#
|
1517
|
+
# Returns any incorrectly placed channel-level itunes:author
|
1518
|
+
# elements. They're actually amazingly common. People don't read specs.
|
1519
|
+
# There is no setter for this, since this is a "bozo" attribute.
|
1520
|
+
def itunes_author
|
1521
|
+
if @itunes_author.nil?
|
1522
|
+
@itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
|
1523
|
+
"itunes:author/text()").to_s)
|
1524
|
+
@itunes_author = nil if @itunes_author == ""
|
1525
|
+
@bozo = true unless @itunes_author.nil?
|
1526
|
+
end
|
1527
|
+
return @itunes_author
|
1528
|
+
end
|
1529
|
+
|
1530
|
+
# Returns the feed's copyright information
|
1531
|
+
def copyright
|
1532
|
+
if @copyright.nil?
|
1533
|
+
@copyright = XPath.first(channel_node, "copyright/text()").to_s
|
1534
|
+
if @copyright == ""
|
1535
|
+
@copyright = XPath.first(channel_node, "dc:rights/text()").to_s
|
1536
|
+
end
|
1537
|
+
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1538
|
+
@copyright = nil if @copyright == ""
|
1539
|
+
end
|
1540
|
+
return @copyright
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
# Sets the feed's copyright information
|
1544
|
+
def copyright=(new_copyright)
|
1545
|
+
@copyright = new_copyright
|
1546
|
+
end
|
1547
|
+
|
1214
1548
|
# Returns the number of seconds before the feed should expire
|
1215
1549
|
def time_to_live
|
1216
1550
|
if @time_to_live.nil?
|
@@ -1275,6 +1609,33 @@ module FeedTools
|
|
1275
1609
|
@time_to_live = 1 if @time_to_live < 1
|
1276
1610
|
end
|
1277
1611
|
|
1612
|
+
# Returns the feed's cloud
|
1613
|
+
def cloud
|
1614
|
+
if @cloud.nil?
|
1615
|
+
@cloud = FeedTools::Feed::Cloud.new
|
1616
|
+
@cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
|
1617
|
+
@cloud.port = XPath.first(channel_node, "cloud/@port").to_s
|
1618
|
+
@cloud.path = XPath.first(channel_node, "cloud/@path").to_s
|
1619
|
+
@cloud.register_procedure =
|
1620
|
+
XPath.first(channel_node, "cloud/@registerProcedure").to_s
|
1621
|
+
@cloud.protocol =
|
1622
|
+
XPath.first(channel_node, "cloud/@protocol").to_s.downcase
|
1623
|
+
@cloud.domain = nil if @cloud.domain == ""
|
1624
|
+
@cloud.port = nil if @cloud.port == ""
|
1625
|
+
@cloud.port = @cloud.port.to_i unless @cloud.port.nil?
|
1626
|
+
@cloud.port = nil if @cloud.port == 0
|
1627
|
+
@cloud.path = nil if @cloud.path == ""
|
1628
|
+
@cloud.register_procedure = nil if @cloud.register_procedure == ""
|
1629
|
+
@cloud.protocol = nil if @cloud.protocol == ""
|
1630
|
+
end
|
1631
|
+
return @cloud
|
1632
|
+
end
|
1633
|
+
|
1634
|
+
# Sets the feed's cloud
|
1635
|
+
def cloud=(new_cloud)
|
1636
|
+
@cloud = new_cloud
|
1637
|
+
end
|
1638
|
+
|
1278
1639
|
# Returns the feed generator
|
1279
1640
|
def generator
|
1280
1641
|
if @generator.nil?
|
@@ -1454,7 +1815,8 @@ module FeedTools
|
|
1454
1815
|
end
|
1455
1816
|
|
1456
1817
|
# Generates xml based on the content of the feed
|
1457
|
-
def build_xml(feed_type="rss", version=0.0,
|
1818
|
+
def build_xml(feed_type=(self.feed_type or "rss"), version=0.0,
|
1819
|
+
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1458
1820
|
if feed_type == "rss" && version == 0.0
|
1459
1821
|
version = 1.0
|
1460
1822
|
elsif feed_type == "atom" && version == 0.0
|
@@ -1801,22 +2163,47 @@ module FeedTools
|
|
1801
2163
|
return @root_node
|
1802
2164
|
end
|
1803
2165
|
|
2166
|
+
# Returns the feed items's unique id
|
2167
|
+
def id
|
2168
|
+
if @id.nil?
|
2169
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
2170
|
+
if @id == ""
|
2171
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
2172
|
+
end
|
2173
|
+
@id = nil if @id == ""
|
2174
|
+
end
|
2175
|
+
return @id
|
2176
|
+
end
|
2177
|
+
|
2178
|
+
# Sets the feed item's unique id
|
2179
|
+
def id=(new_id)
|
2180
|
+
@id = new_id
|
2181
|
+
end
|
2182
|
+
|
1804
2183
|
# Returns the feed item title
|
1805
2184
|
def title
|
1806
2185
|
if @title.nil?
|
2186
|
+
repair_entities = false
|
1807
2187
|
if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
|
1808
|
-
XPath.first(root_node, "title/@mode").to_s == "xhtml"
|
2188
|
+
XPath.first(root_node, "title/@mode").to_s == "xhtml" ||
|
2189
|
+
XPath.first(root_node, "title/@type").to_s == "xml" ||
|
2190
|
+
XPath.first(root_node, "title/@mode").to_s == "xml" ||
|
2191
|
+
XPath.first(root_node, "title/@type").to_s ==
|
2192
|
+
"application/xhtml+xml"
|
1809
2193
|
@title = XPath.first(root_node, "title").inner_xml
|
1810
2194
|
elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
|
1811
2195
|
XPath.first(root_node, "title/@mode").to_s == "escaped"
|
1812
|
-
@title =
|
2196
|
+
@title = FeedTools.unescape_entities(
|
1813
2197
|
XPath.first(root_node, "title/text()").to_s)
|
1814
2198
|
else
|
1815
|
-
|
1816
|
-
|
2199
|
+
title_node = XPath.first(root_node, "title")
|
2200
|
+
@title = title_node.inner_xml
|
2201
|
+
repair_entities = true
|
1817
2202
|
end
|
1818
2203
|
unless @title.nil?
|
1819
|
-
@title =
|
2204
|
+
@title = FeedTools.sanitize_html(@title, :strip)
|
2205
|
+
@title = FeedTools.unescape_entities(@title) if repair_entities
|
2206
|
+
@title = FeedTools.tidy_html(@title)
|
1820
2207
|
end
|
1821
2208
|
if @title != ""
|
1822
2209
|
# Some blogging tools include the number of comments in a post
|
@@ -1826,10 +2213,10 @@ module FeedTools
|
|
1826
2213
|
#
|
1827
2214
|
# If for some incredibly wierd reason you need the actual
|
1828
2215
|
# unstripped title, just use find_node("title/text()").to_s
|
1829
|
-
@title =
|
1830
|
-
@title.strip.gsub(/\[\d*\]$/, "")).strip
|
1831
|
-
@title.gsub!(/\n/, " ")
|
2216
|
+
@title = @title.strip.gsub(/\[\d*\]$/, "").strip
|
1832
2217
|
end
|
2218
|
+
@title.gsub!(/\n/, " ")
|
2219
|
+
@title.strip!
|
1833
2220
|
@title = nil if @title == ""
|
1834
2221
|
end
|
1835
2222
|
return @title
|
@@ -1843,53 +2230,54 @@ module FeedTools
|
|
1843
2230
|
# Returns the feed item description
|
1844
2231
|
def description
|
1845
2232
|
if @description.nil?
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1856
|
-
|
1857
|
-
|
1858
|
-
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
1866
|
-
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
XPath.first(
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
@description =
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1891
|
-
|
1892
|
-
|
2233
|
+
repair_entities = false
|
2234
|
+
description_node = XPath.first(root_node, "description")
|
2235
|
+
if description_node.nil?
|
2236
|
+
description_node = XPath.first(root_node, "xhtml:body")
|
2237
|
+
end
|
2238
|
+
if description_node.nil?
|
2239
|
+
description_node = XPath.first(root_node, "body")
|
2240
|
+
end
|
2241
|
+
if description_node.nil?
|
2242
|
+
description_node = XPath.first(root_node, "tagline")
|
2243
|
+
end
|
2244
|
+
if description_node.nil?
|
2245
|
+
description_node = XPath.first(root_node, "subtitle")
|
2246
|
+
end
|
2247
|
+
if description_node.nil?
|
2248
|
+
description_node = XPath.first(root_node, "summary")
|
2249
|
+
end
|
2250
|
+
if description_node.nil?
|
2251
|
+
description_node = XPath.first(root_node, "abstract")
|
2252
|
+
end
|
2253
|
+
if description_node.nil?
|
2254
|
+
description_node = XPath.first(root_node, "content:encoded")
|
2255
|
+
end
|
2256
|
+
if description_node.nil?
|
2257
|
+
description_node = XPath.first(root_node, "content")
|
2258
|
+
end
|
2259
|
+
if description_node.nil?
|
2260
|
+
description_node = XPath.first(root_node, "info")
|
2261
|
+
@bozo = true unless description_node.nil?
|
2262
|
+
end
|
2263
|
+
unless description_node.nil?
|
2264
|
+
if XPath.first(description_node, "@encoding").to_s != ""
|
2265
|
+
@description =
|
2266
|
+
"[Embedded data objects are not currently supported.]"
|
2267
|
+
elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
|
2268
|
+
XPath.first(description_node, "@mode").to_s == "xhtml" ||
|
2269
|
+
XPath.first(description_node, "@type").to_s == "xml" ||
|
2270
|
+
XPath.first(description_node, "@mode").to_s == "xml" ||
|
2271
|
+
XPath.first(description_node, "@type").to_s ==
|
2272
|
+
"application/xhtml+xml"
|
2273
|
+
@description = description_node.inner_xml
|
2274
|
+
elsif XPath.first(description_node, "@type").to_s == "escaped" ||
|
2275
|
+
XPath.first(description_node, "@mode").to_s == "escaped"
|
2276
|
+
@description = FeedTools.unescape_entities(
|
2277
|
+
description_node.inner_xml)
|
2278
|
+
else
|
2279
|
+
@description = description_node.inner_xml
|
2280
|
+
repair_entities = true
|
1893
2281
|
end
|
1894
2282
|
end
|
1895
2283
|
if @description == ""
|
@@ -1900,20 +2288,13 @@ module FeedTools
|
|
1900
2288
|
@description = self.itunes_subtitle
|
1901
2289
|
@description = "" if @description.nil?
|
1902
2290
|
end
|
1903
|
-
if @description == ""
|
1904
|
-
@description = self.media_text
|
1905
|
-
@description = "" if @description.nil?
|
1906
|
-
end
|
1907
2291
|
|
1908
2292
|
unless @description.nil?
|
1909
|
-
@description = FeedTools.sanitize_html(@description)
|
2293
|
+
@description = FeedTools.sanitize_html(@description, :strip)
|
2294
|
+
@description = FeedTools.unescape_entities(@description) if repair_entities
|
2295
|
+
@description = FeedTools.tidy_html(@description)
|
1910
2296
|
end
|
1911
2297
|
|
1912
|
-
# If it started with a bunch of divs, hack them right off. We can put
|
1913
|
-
# them back later if they're needed.
|
1914
|
-
@description.gsub!(/^(<div[^>]*>)*/, "")
|
1915
|
-
@description.gsub!(/(<\/div>)*$/, "")
|
1916
|
-
|
1917
2298
|
@description.gsub!(/\n/, " ") if @description.size < 80
|
1918
2299
|
@description = @description.strip unless @description.nil?
|
1919
2300
|
@description = nil if @description == ""
|
@@ -1925,6 +2306,66 @@ module FeedTools
|
|
1925
2306
|
def description=(new_description)
|
1926
2307
|
@description = new_description
|
1927
2308
|
end
|
2309
|
+
|
2310
|
+
# Returns the contents of the itunes:summary element
|
2311
|
+
def itunes_summary
|
2312
|
+
if @itunes_summary.nil?
|
2313
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
2314
|
+
"itunes:summary/text()").to_s)
|
2315
|
+
if @itunes_summary == ""
|
2316
|
+
@itunes_summary = nil
|
2317
|
+
end
|
2318
|
+
unless @itunes_summary.nil?
|
2319
|
+
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
|
2320
|
+
end
|
2321
|
+
end
|
2322
|
+
return @itunes_summary
|
2323
|
+
end
|
2324
|
+
|
2325
|
+
# Sets the contents of the itunes:summary element
|
2326
|
+
def itunes_summary=(new_itunes_summary)
|
2327
|
+
@itunes_summary = new_itunes_summary
|
2328
|
+
end
|
2329
|
+
|
2330
|
+
# Returns the contents of the itunes:subtitle element
|
2331
|
+
def itunes_subtitle
|
2332
|
+
if @itunes_subtitle.nil?
|
2333
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
2334
|
+
"itunes:subtitle/text()").to_s)
|
2335
|
+
if @itunes_subtitle == ""
|
2336
|
+
@itunes_subtitle = nil
|
2337
|
+
end
|
2338
|
+
unless @itunes_subtitle.nil?
|
2339
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
2340
|
+
end
|
2341
|
+
end
|
2342
|
+
return @itunes_subtitle
|
2343
|
+
end
|
2344
|
+
|
2345
|
+
# Sets the contents of the itunes:subtitle element
|
2346
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
2347
|
+
@itunes_subtitle = new_itunes_subtitle
|
2348
|
+
end
|
2349
|
+
|
2350
|
+
# Returns the contents of the media:text element
|
2351
|
+
def media_text
|
2352
|
+
if @media_text.nil?
|
2353
|
+
@media_text = FeedTools.unescape_entities(XPath.first(root_node,
|
2354
|
+
"itunes:subtitle/text()").to_s)
|
2355
|
+
if @media_text == ""
|
2356
|
+
@media_text = nil
|
2357
|
+
end
|
2358
|
+
unless @media_text.nil?
|
2359
|
+
@media_text = FeedTools.sanitize_html(@media_text)
|
2360
|
+
end
|
2361
|
+
end
|
2362
|
+
return @media_text
|
2363
|
+
end
|
2364
|
+
|
2365
|
+
# Sets the contents of the media:text element
|
2366
|
+
def media_text=(new_media_text)
|
2367
|
+
@media_text = new_media_text
|
2368
|
+
end
|
1928
2369
|
|
1929
2370
|
# Returns the feed item link
|
1930
2371
|
def link
|
@@ -1948,7 +2389,7 @@ module FeedTools
|
|
1948
2389
|
end
|
1949
2390
|
end
|
1950
2391
|
if @link != ""
|
1951
|
-
@link =
|
2392
|
+
@link = FeedTools.unescape_entities(@link)
|
1952
2393
|
end
|
1953
2394
|
if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
|
1954
2395
|
if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
|
@@ -2084,23 +2525,6 @@ module FeedTools
|
|
2084
2525
|
@media_thumbnail_link = new_media_thumbnail_link
|
2085
2526
|
end
|
2086
2527
|
|
2087
|
-
# Returns the feed items's unique id
|
2088
|
-
def id
|
2089
|
-
if @id.nil?
|
2090
|
-
@id = XPath.first(root_node, "id/text()").to_s
|
2091
|
-
if @id == ""
|
2092
|
-
@id = XPath.first(root_node, "guid/text()").to_s
|
2093
|
-
end
|
2094
|
-
@id = nil if @id == ""
|
2095
|
-
end
|
2096
|
-
return @id
|
2097
|
-
end
|
2098
|
-
|
2099
|
-
# Sets the feed item's unique id
|
2100
|
-
def id=(new_id)
|
2101
|
-
@id = new_id
|
2102
|
-
end
|
2103
|
-
|
2104
2528
|
# Returns all feed item enclosures
|
2105
2529
|
def enclosures
|
2106
2530
|
if @enclosures.nil?
|
@@ -2116,7 +2540,7 @@ module FeedTools
|
|
2116
2540
|
# sometimes these also manage to show up in atom files.
|
2117
2541
|
for enclosure_node in rss_enclosures
|
2118
2542
|
enclosure = Enclosure.new
|
2119
|
-
enclosure.url =
|
2543
|
+
enclosure.url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
|
2120
2544
|
enclosure.type = enclosure_node.attributes["type"].to_s
|
2121
2545
|
enclosure.file_size = enclosure_node.attributes["length"].to_i
|
2122
2546
|
enclosure.credits = []
|
@@ -2127,7 +2551,7 @@ module FeedTools
|
|
2127
2551
|
# Parse atom-type enclosures. If there are repeats of the same enclosure object,
|
2128
2552
|
# we merge the two together.
|
2129
2553
|
for enclosure_node in atom_enclosures
|
2130
|
-
enclosure_url =
|
2554
|
+
enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["href"].to_s)
|
2131
2555
|
enclosure = nil
|
2132
2556
|
new_enclosure = false
|
2133
2557
|
for existing_enclosure in @enclosures
|
@@ -2156,7 +2580,7 @@ module FeedTools
|
|
2156
2580
|
parse_media_content = lambda do |media_content_nodes|
|
2157
2581
|
affected_enclosures = []
|
2158
2582
|
for enclosure_node in media_content_nodes
|
2159
|
-
enclosure_url =
|
2583
|
+
enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
|
2160
2584
|
enclosure = nil
|
2161
2585
|
new_enclosure = false
|
2162
2586
|
for existing_enclosure in @enclosures
|
@@ -2182,9 +2606,9 @@ module FeedTools
|
|
2182
2606
|
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
|
2183
2607
|
if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
|
2184
2608
|
enclosure.thumbnail = EnclosureThumbnail.new(
|
2185
|
-
|
2186
|
-
|
2187
|
-
|
2609
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
|
2610
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
|
2611
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
|
2188
2612
|
)
|
2189
2613
|
if enclosure.thumbnail.height == ""
|
2190
2614
|
enclosure.thumbnail.height = nil
|
@@ -2196,9 +2620,9 @@ module FeedTools
|
|
2196
2620
|
enclosure.categories = []
|
2197
2621
|
for category in XPath.match(enclosure_node, "media:category")
|
2198
2622
|
enclosure.categories << EnclosureCategory.new(
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2623
|
+
FeedTools.unescape_entities(category.text),
|
2624
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s),
|
2625
|
+
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2202
2626
|
)
|
2203
2627
|
if enclosure.categories.last.scheme == ""
|
2204
2628
|
enclosure.categories.last.scheme = nil
|
@@ -2209,16 +2633,16 @@ module FeedTools
|
|
2209
2633
|
end
|
2210
2634
|
if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
|
2211
2635
|
enclosure.hash = EnclosureHash.new(
|
2212
|
-
FeedTools.sanitize_html(
|
2636
|
+
FeedTools.sanitize_html(FeedTools.unescape_entities(XPath.first(
|
2213
2637
|
enclosure_node, "media:hash/text()").to_s), :strip),
|
2214
2638
|
"md5"
|
2215
2639
|
)
|
2216
2640
|
end
|
2217
2641
|
if XPath.first(enclosure_node, "media:player/@url").to_s != ""
|
2218
2642
|
enclosure.player = EnclosurePlayer.new(
|
2219
|
-
|
2220
|
-
|
2221
|
-
|
2643
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@url").to_s),
|
2644
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@height").to_s),
|
2645
|
+
FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@width").to_s)
|
2222
2646
|
)
|
2223
2647
|
if enclosure.player.height == ""
|
2224
2648
|
enclosure.player.height = nil
|
@@ -2230,8 +2654,8 @@ module FeedTools
|
|
2230
2654
|
enclosure.credits = []
|
2231
2655
|
for credit in XPath.match(enclosure_node, "media:credit")
|
2232
2656
|
enclosure.credits << EnclosureCredit.new(
|
2233
|
-
|
2234
|
-
|
2657
|
+
FeedTools.unescape_entities(credit.text),
|
2658
|
+
FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
|
2235
2659
|
)
|
2236
2660
|
if enclosure.credits.last.role == ""
|
2237
2661
|
enclosure.credits.last.role = nil
|
@@ -2240,7 +2664,7 @@ module FeedTools
|
|
2240
2664
|
enclosure.explicit = (XPath.first(enclosure_node,
|
2241
2665
|
"media:adult/text()").to_s.downcase == "true")
|
2242
2666
|
if XPath.first(enclosure_node, "media:text/text()").to_s != ""
|
2243
|
-
enclosure.text =
|
2667
|
+
enclosure.text = FeedTools.unescape_entities(XPath.first(enclosure_node,
|
2244
2668
|
"media:text/text()").to_s)
|
2245
2669
|
end
|
2246
2670
|
affected_enclosures << enclosure
|
@@ -2271,11 +2695,11 @@ module FeedTools
|
|
2271
2695
|
if enclosure.thumbnail.nil? &&
|
2272
2696
|
XPath.first(media_group, "media:thumbnail/@url").to_s != ""
|
2273
2697
|
enclosure.thumbnail = EnclosureThumbnail.new(
|
2274
|
-
|
2698
|
+
FeedTools.unescape_entities(
|
2275
2699
|
XPath.first(media_group, "media:thumbnail/@url").to_s),
|
2276
|
-
|
2700
|
+
FeedTools.unescape_entities(
|
2277
2701
|
XPath.first(media_group, "media:thumbnail/@height").to_s),
|
2278
|
-
|
2702
|
+
FeedTools.unescape_entities(
|
2279
2703
|
XPath.first(media_group, "media:thumbnail/@width").to_s)
|
2280
2704
|
)
|
2281
2705
|
if enclosure.thumbnail.height == ""
|
@@ -2289,9 +2713,9 @@ module FeedTools
|
|
2289
2713
|
enclosure.categories = []
|
2290
2714
|
for category in XPath.match(media_group, "media:category")
|
2291
2715
|
enclosure.categories << EnclosureCategory.new(
|
2292
|
-
|
2293
|
-
|
2294
|
-
|
2716
|
+
FeedTools.unescape_entities(category.text),
|
2717
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s),
|
2718
|
+
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2295
2719
|
)
|
2296
2720
|
if enclosure.categories.last.scheme == ""
|
2297
2721
|
enclosure.categories.last.scheme = nil
|
@@ -2304,16 +2728,16 @@ module FeedTools
|
|
2304
2728
|
if enclosure.hash.nil? &&
|
2305
2729
|
XPath.first(media_group, "media:hash/text()").to_s != ""
|
2306
2730
|
enclosure.hash = EnclosureHash.new(
|
2307
|
-
|
2731
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:hash/text()").to_s),
|
2308
2732
|
"md5"
|
2309
2733
|
)
|
2310
2734
|
end
|
2311
2735
|
if enclosure.player.nil? &&
|
2312
2736
|
XPath.first(media_group, "media:player/@url").to_s != ""
|
2313
2737
|
enclosure.player = EnclosurePlayer.new(
|
2314
|
-
|
2315
|
-
|
2316
|
-
|
2738
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:player/@url").to_s),
|
2739
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:player/@height").to_s),
|
2740
|
+
FeedTools.unescape_entities(XPath.first(media_group, "media:player/@width").to_s)
|
2317
2741
|
)
|
2318
2742
|
if enclosure.player.height == ""
|
2319
2743
|
enclosure.player.height = nil
|
@@ -2326,8 +2750,8 @@ module FeedTools
|
|
2326
2750
|
enclosure.credits = []
|
2327
2751
|
for credit in XPath.match(media_group, "media:credit")
|
2328
2752
|
enclosure.credits << EnclosureCredit.new(
|
2329
|
-
|
2330
|
-
|
2753
|
+
FeedTools.unescape_entities(credit.text),
|
2754
|
+
FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
|
2331
2755
|
)
|
2332
2756
|
if enclosure.credits.last.role == ""
|
2333
2757
|
enclosure.credits.last.role = nil
|
@@ -2340,7 +2764,7 @@ module FeedTools
|
|
2340
2764
|
end
|
2341
2765
|
if enclosure.text.nil? &&
|
2342
2766
|
XPath.first(media_group, "media:text/text()").to_s != ""
|
2343
|
-
enclosure.text = FeedTools.sanitize_html(
|
2767
|
+
enclosure.text = FeedTools.sanitize_html(FeedTools.unescape_entities(
|
2344
2768
|
XPath.first(media_group, "media:text/text()").to_s), :strip)
|
2345
2769
|
end
|
2346
2770
|
end
|
@@ -2373,9 +2797,9 @@ module FeedTools
|
|
2373
2797
|
enclosure.categories = []
|
2374
2798
|
end
|
2375
2799
|
enclosure.categories << EnclosureCategory.new(
|
2376
|
-
|
2377
|
-
|
2378
|
-
|
2800
|
+
FeedTools.unescape_entities(category_path),
|
2801
|
+
FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
|
2802
|
+
FeedTools.unescape_entities("iTunes Music Store Categories")
|
2379
2803
|
)
|
2380
2804
|
end
|
2381
2805
|
end
|
@@ -2464,136 +2888,140 @@ module FeedTools
|
|
2464
2888
|
def enclosures=(new_enclosures)
|
2465
2889
|
@enclosures = new_enclosures
|
2466
2890
|
end
|
2467
|
-
|
2468
|
-
# Returns the feed item author
|
2469
|
-
def author_name
|
2470
|
-
# TODO: make this not suck, actually ensure we're looking at a name
|
2471
|
-
# and not an email address.
|
2472
|
-
# Also, factor in itunes module.
|
2473
|
-
# =================================================================
|
2474
|
-
if @author_name.nil?
|
2475
|
-
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/name/text()").to_s)
|
2476
|
-
if @author_name == ""
|
2477
|
-
@author_name = CGI.unescapeHTML(XPath.first(root_node, "dc:creator/text()").to_s)
|
2478
|
-
end
|
2479
|
-
if @author_name == ""
|
2480
|
-
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/text()").to_s)
|
2481
|
-
end
|
2482
|
-
end
|
2483
|
-
return @author_name
|
2484
|
-
end
|
2485
2891
|
|
2486
|
-
#
|
2487
|
-
def
|
2488
|
-
@
|
2489
|
-
|
2490
|
-
|
2491
|
-
|
2492
|
-
|
2493
|
-
|
2494
|
-
|
2495
|
-
|
2496
|
-
|
2497
|
-
|
2498
|
-
|
2499
|
-
|
2500
|
-
|
2892
|
+
# Returns the feed item author
|
2893
|
+
def author
|
2894
|
+
if @author.nil?
|
2895
|
+
@author = FeedTools::Feed::Author.new
|
2896
|
+
|
2897
|
+
# Set the author name
|
2898
|
+
@author.name = FeedTools.unescape_entities(
|
2899
|
+
XPath.first(root_node, "author/name/text()").to_s)
|
2900
|
+
|
2901
|
+
@author.raw = FeedTools.unescape_entities(
|
2902
|
+
XPath.first(root_node, "author/text()").to_s)
|
2903
|
+
if @author.raw == ""
|
2904
|
+
@author.raw = FeedTools.unescape_entities(
|
2905
|
+
XPath.first(root_node, "dc:creator/text()").to_s)
|
2906
|
+
end
|
2907
|
+
if @author.raw == ""
|
2908
|
+
@author.raw = FeedTools.unescape_entities(
|
2909
|
+
XPath.first(root_node, "dc:author/text()").to_s)
|
2910
|
+
end
|
2911
|
+
unless @author.raw == ""
|
2912
|
+
raw_scan = @author.raw.scan(
|
2913
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
2914
|
+
if raw_scan.nil? || raw_scan.size == 0
|
2915
|
+
raw_scan = @author.raw.scan(
|
2916
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
2917
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
2918
|
+
else
|
2919
|
+
author_raw_pair = raw_scan.first
|
2920
|
+
end
|
2921
|
+
if raw_scan.nil? || raw_scan.size == 0
|
2922
|
+
email_scan = @author.raw.scan(
|
2923
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
2924
|
+
if email_scan != nil && email_scan.size > 0
|
2925
|
+
@author.email = email_scan.first.strip
|
2926
|
+
end
|
2927
|
+
end
|
2928
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
2929
|
+
@author.name = author_raw_pair.first.strip
|
2930
|
+
@author.email = author_raw_pair.last.strip
|
2931
|
+
else
|
2932
|
+
unless @author.raw.include?("@")
|
2933
|
+
# We can be reasonably sure we are looking at something
|
2934
|
+
# that the creator didn't intend to contain an email address if
|
2935
|
+
# it got through the preceeding regexes and it doesn't
|
2936
|
+
# contain the tell-tale '@' symbol.
|
2937
|
+
@author.name = @author.raw
|
2938
|
+
end
|
2939
|
+
end
|
2501
2940
|
end
|
2502
|
-
end
|
2503
|
-
return @itunes_summary
|
2504
|
-
end
|
2505
2941
|
|
2506
|
-
|
2507
|
-
|
2508
|
-
@itunes_summary = new_itunes_summary
|
2509
|
-
end
|
2942
|
+
@author.name = nil if @author.name == ""
|
2943
|
+
@author.raw = nil if @author.raw == ""
|
2510
2944
|
|
2511
|
-
|
2512
|
-
|
2513
|
-
|
2514
|
-
|
2515
|
-
"itunes:subtitle/text()").to_s)
|
2516
|
-
if @itunes_subtitle == ""
|
2517
|
-
@itunes_subtitle = nil
|
2945
|
+
# Set the author email
|
2946
|
+
if @author.email == ""
|
2947
|
+
@author.email = FeedTools.unescape_entities(
|
2948
|
+
XPath.first(root_node, "author/email/text()").to_s)
|
2518
2949
|
end
|
2519
|
-
|
2520
|
-
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
2521
|
-
end
|
2522
|
-
end
|
2523
|
-
return @itunes_subtitle
|
2524
|
-
end
|
2950
|
+
@author.email = nil if @author.email == ""
|
2525
2951
|
|
2526
|
-
|
2527
|
-
|
2528
|
-
|
2529
|
-
|
2952
|
+
# Set the author url
|
2953
|
+
@author.url = FeedTools.unescape_entities(
|
2954
|
+
XPath.first(root_node, "author/url/text()").to_s)
|
2955
|
+
@author.url = nil if @author.url == ""
|
2530
2956
|
|
2531
|
-
|
2532
|
-
|
2533
|
-
|
2534
|
-
|
2535
|
-
|
2536
|
-
if @media_text == ""
|
2537
|
-
@media_text = nil
|
2538
|
-
end
|
2539
|
-
unless @media_text.nil?
|
2540
|
-
@media_text = FeedTools.sanitize_html(@media_text)
|
2957
|
+
# Fallback on the itunes module if we didn't find an author name
|
2958
|
+
begin
|
2959
|
+
@author.name = self.itunes_author if @author.name.nil?
|
2960
|
+
rescue
|
2961
|
+
@author.name = nil
|
2541
2962
|
end
|
2542
2963
|
end
|
2543
|
-
return @
|
2964
|
+
return @author
|
2544
2965
|
end
|
2545
|
-
|
2546
|
-
# Sets the
|
2547
|
-
def
|
2548
|
-
|
2966
|
+
|
2967
|
+
# Sets the feed item author
|
2968
|
+
def author=(new_author)
|
2969
|
+
if new_author.respond_to?(:name) &&
|
2970
|
+
new_author.respond_to?(:email) &&
|
2971
|
+
new_author.respond_to?(:url)
|
2972
|
+
# It's a complete author object, just set it.
|
2973
|
+
@author = new_author
|
2974
|
+
else
|
2975
|
+
# We're not looking at an author object, this is probably a string,
|
2976
|
+
# default to setting the author's name.
|
2977
|
+
if @author.nil?
|
2978
|
+
@author = FeedTools::Feed::Author.new
|
2979
|
+
end
|
2980
|
+
@author.name = new_author
|
2981
|
+
end
|
2549
2982
|
end
|
2550
2983
|
|
2551
2984
|
# Returns the contents of the itunes:author element
|
2552
2985
|
#
|
2553
2986
|
# This inherits from any incorrectly placed channel-level itunes:author
|
2554
|
-
# elements. They're actually amazingly
|
2987
|
+
# elements. They're actually amazingly common. People don't read specs.
|
2555
2988
|
def itunes_author
|
2556
2989
|
if @itunes_author.nil?
|
2557
|
-
@itunes_author =
|
2990
|
+
@itunes_author = FeedTools.unescape_entities(XPath.first(root_node,
|
2558
2991
|
"itunes:author/text()").to_s)
|
2559
|
-
if @itunes_author == ""
|
2560
|
-
|
2561
|
-
"itunes:author/text()").to_s)
|
2562
|
-
end
|
2563
|
-
if @itunes_author == ""
|
2564
|
-
@itunes_author = nil
|
2565
|
-
end
|
2992
|
+
@itunes_author = feed.itunes_author if @itunes_author == ""
|
2993
|
+
@itunes_author = nil if @itunes_author == ""
|
2566
2994
|
end
|
2567
2995
|
return @itunes_author
|
2568
2996
|
end
|
2569
|
-
|
2997
|
+
|
2570
2998
|
# Sets the contents of the itunes:author element
|
2571
2999
|
def itunes_author=(new_itunes_author)
|
2572
3000
|
@itunes_author = new_itunes_author
|
2573
|
-
end
|
2574
|
-
|
3001
|
+
end
|
3002
|
+
|
2575
3003
|
# Returns the number of seconds that the associated media runs for
|
2576
|
-
def
|
2577
|
-
if @
|
2578
|
-
|
3004
|
+
def itunes_duration
|
3005
|
+
if @itunes_duration.nil?
|
3006
|
+
raw_duration = FeedTools.unescape_entities(XPath.first(root_node,
|
2579
3007
|
"itunes:duration/text()").to_s)
|
2580
|
-
if
|
2581
|
-
hms =
|
3008
|
+
if raw_duration != ""
|
3009
|
+
hms = raw_duration.split(":").map { |x| x.to_i }
|
2582
3010
|
if hms.size == 3
|
2583
|
-
@
|
3011
|
+
@itunes_duration = hms[0].hour + hms[1].minute + hms[2]
|
2584
3012
|
elsif hms.size == 2
|
2585
|
-
@
|
3013
|
+
@itunes_duration = hms[0].minute + hms[1]
|
2586
3014
|
elsif hms.size == 1
|
2587
|
-
@
|
3015
|
+
@itunes_duration = hms[0]
|
2588
3016
|
end
|
2589
3017
|
end
|
2590
3018
|
end
|
2591
|
-
return @
|
3019
|
+
return @itunes_duration
|
2592
3020
|
end
|
2593
3021
|
|
2594
3022
|
# Sets the number of seconds that the associate media runs for
|
2595
|
-
def
|
2596
|
-
@
|
3023
|
+
def itunes_duration=(new_itunes_duration)
|
3024
|
+
@itunes_duration = new_itunes_duration
|
2597
3025
|
end
|
2598
3026
|
|
2599
3027
|
# Sets the itunes:summary
|
@@ -2722,7 +3150,8 @@ module FeedTools
|
|
2722
3150
|
end
|
2723
3151
|
|
2724
3152
|
# Generates xml based on the content of the feed item
|
2725
|
-
def build_xml(feed_type="rss", version=0.0,
|
3153
|
+
def build_xml(feed_type=(self.feed.feed_type or "rss"), version=0.0,
|
3154
|
+
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
2726
3155
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
2727
3156
|
# RDF-based rss format
|
2728
3157
|
if link.nil?
|
@@ -2831,9 +3260,9 @@ module FeedTools
|
|
2831
3260
|
end
|
2832
3261
|
end
|
2833
3262
|
|
2834
|
-
module REXML
|
2835
|
-
class Element
|
2836
|
-
def inner_xml
|
3263
|
+
module REXML # :nodoc:
|
3264
|
+
class Element # :nodoc:
|
3265
|
+
def inner_xml # :nodoc:
|
2837
3266
|
result = ""
|
2838
3267
|
self.each_child do |child|
|
2839
3268
|
result << child.to_s
|
@@ -2848,4 +3277,4 @@ begin
|
|
2848
3277
|
FeedTools.feed_cache.initialize_cache
|
2849
3278
|
end
|
2850
3279
|
rescue
|
2851
|
-
end
|
3280
|
+
end
|