feedtools 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -0
- data/lib/feed_tools.rb +721 -209
- data/rakefile +1 -1
- data/test/cdf_test.rb +121 -0
- data/test/helper_test.rb +3 -1
- data/test/rss_test.rb +480 -52
- metadata +3 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
== FeedTools 0.2.4
|
2
|
+
* fixed bug in the sqlite table creation query
|
3
|
+
* greatly improved image support
|
4
|
+
* improved cdf support (still needs way more work)
|
5
|
+
* support for text input elements
|
6
|
+
* now possible to force retrieval from the cache only
|
7
|
+
* increased the flexibility of the database caching implementation
|
8
|
+
* feed attributes accessible through the keys and values collections
|
9
|
+
* minor coding style changes
|
10
|
+
* fixed really dumb typo in the podcast? and vidlog? methods
|
11
|
+
* fixed exception from missing titles and descriptions
|
12
|
+
* now passes all of mark pilgrim's well-formed rss tests
|
1
13
|
== FeedTools 0.2.3
|
2
14
|
* fixed omission of get parameters from http requests
|
3
15
|
== FeedTools 0.2.2
|
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.4"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -75,7 +75,7 @@ require 'yaml'
|
|
75
75
|
# => "News for nerds, stuff that matters"
|
76
76
|
# slashdot_feed.link
|
77
77
|
# => "http://slashdot.org/"
|
78
|
-
# slashdot_feed.items.first.find_node("slash:hitparade/text()").
|
78
|
+
# slashdot_feed.items.first.find_node("slash:hitparade/text()").value
|
79
79
|
# => "43,37,28,23,11,3,1"
|
80
80
|
module FeedTools
|
81
81
|
|
@@ -150,7 +150,7 @@ module FeedTools
|
|
150
150
|
begin
|
151
151
|
ActiveRecord::Base.connection.execute "select id, url, title, " +
|
152
152
|
"link, xml_data, http_headers, last_retrieved " +
|
153
|
-
"from
|
153
|
+
"from #{self.table_name()} limit 1"
|
154
154
|
rescue ActiveRecord::StatementInvalid
|
155
155
|
return false
|
156
156
|
rescue
|
@@ -163,7 +163,7 @@ module FeedTools
|
|
163
163
|
def DatabaseFeedCache.create_table
|
164
164
|
unless DatabaseFeedCache.table_exists?
|
165
165
|
feeds_mysql = <<-SQL_END
|
166
|
-
CREATE TABLE `
|
166
|
+
CREATE TABLE `#{self.table_name()}` (
|
167
167
|
`id` int(10) unsigned NOT NULL auto_increment,
|
168
168
|
`url` varchar(255) default NULL,
|
169
169
|
`title` varchar(255) default NULL,
|
@@ -175,19 +175,18 @@ module FeedTools
|
|
175
175
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
176
176
|
SQL_END
|
177
177
|
feeds_sqlite = <<-SQL_END
|
178
|
-
CREATE TABLE '
|
178
|
+
CREATE TABLE '#{self.table_name()}' (
|
179
179
|
'id' INTEGER PRIMARY KEY NOT NULL,
|
180
180
|
'url' VARCHAR(255) DEFAULT NULL,
|
181
181
|
'title' VARCHAR(255) DEFAULT NULL,
|
182
182
|
'link' VARCHAR(255) DEFAULT NULL,
|
183
|
-
'image_link' VARCHAR(255) DEFAULT NULL,
|
184
183
|
'xml_data' TEXT DEFAULT NULL,
|
185
184
|
'http_headers' TEXT DEFAULT NULL,
|
186
185
|
'last_retrieved' DATETIME DEFAULT NULL,
|
187
186
|
);
|
188
187
|
SQL_END
|
189
188
|
feeds_psql = <<-SQL_END
|
190
|
-
CREATE TABLE
|
189
|
+
CREATE TABLE #{self.table_name()} (
|
191
190
|
id SERIAL PRIMARY KEY NOT NULL,
|
192
191
|
url varchar(255) default NULL,
|
193
192
|
title varchar(255) default NULL,
|
@@ -206,7 +205,7 @@ module FeedTools
|
|
206
205
|
table_creation_sql = feeds_psql
|
207
206
|
end
|
208
207
|
if table_creation_sql.nil?
|
209
|
-
raise "Could not build
|
208
|
+
raise "Could not build #{self.table_name()} table."
|
210
209
|
else
|
211
210
|
connection.execute table_creation_sql
|
212
211
|
end
|
@@ -219,24 +218,86 @@ module FeedTools
|
|
219
218
|
end
|
220
219
|
|
221
220
|
# Quick method of enabling small classes to have their attributes
|
222
|
-
# accessible as a dictionary.
|
223
|
-
|
224
|
-
|
221
|
+
# accessible as a dictionary. These methods should not be used whenever
|
222
|
+
# performance is going to be an issue. They exist almost entirely for the
|
223
|
+
# purposes of aesthetics and/or debugging.
|
224
|
+
module AttributeDictionary
|
225
|
+
# Access the attributes as a dictionary.
|
225
226
|
def [](key)
|
226
|
-
|
227
|
-
# accessed like this.
|
228
|
-
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
|
229
|
-
return nil unless self.method(key).arity == 0
|
227
|
+
return nil unless self.keys.include? key
|
230
228
|
return self.send(key)
|
231
229
|
end
|
232
230
|
|
233
|
-
# Access the attributes as a dictionary
|
231
|
+
# Access the attributes as a dictionary.
|
234
232
|
def []=(key, value)
|
235
|
-
|
236
|
-
|
237
|
-
return nil
|
238
|
-
|
239
|
-
|
233
|
+
pseudo_key = key
|
234
|
+
pseudo_key = key[0..-2] if key[-1..-1] == "?"
|
235
|
+
return nil unless self.method(pseudo_key + "=").arity == 1
|
236
|
+
local_keys = self.keys
|
237
|
+
unless local_keys.include?(key) || local_keys.include?(pseudo_key)
|
238
|
+
return nil
|
239
|
+
end
|
240
|
+
return self.send(pseudo_key + "=", value)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Access the attributes as a dictionary.
|
244
|
+
def keys
|
245
|
+
key_methods = []
|
246
|
+
for key in self.methods
|
247
|
+
# Quick-n-dirty hack to speed things up and keep the list clean
|
248
|
+
if self.method(key).arity == 0 && key[-1..-1] != "=" &&
|
249
|
+
key[-1..-1] != "!" && key[0..1] != "__" &&
|
250
|
+
key[0..2] != "to_" && key[-5..-1] != "_node" &&
|
251
|
+
key != "cache_object" && key != "save" && key != "xml" &&
|
252
|
+
key != "xml_data" && key != "expired?" && key != "live?" &&
|
253
|
+
key != "feed"
|
254
|
+
superfluous_ancestors = self.class.ancestors
|
255
|
+
superfluous_ancestors = superfluous_ancestors[1..-1]
|
256
|
+
superfluous = false
|
257
|
+
for ancestor in superfluous_ancestors
|
258
|
+
if ancestor.instance_methods.include? key
|
259
|
+
superfluous = true
|
260
|
+
break
|
261
|
+
end
|
262
|
+
end
|
263
|
+
next if superfluous
|
264
|
+
key_methods << key
|
265
|
+
end
|
266
|
+
end
|
267
|
+
return key_methods.sort
|
268
|
+
end
|
269
|
+
|
270
|
+
# Access the attributes as a dictionary.
|
271
|
+
# Please note that this method may cause a nearly complete parse of a
|
272
|
+
# feed. This will be very slow.
|
273
|
+
def values
|
274
|
+
return self.keys.map { |key| self[key] }
|
275
|
+
end
|
276
|
+
|
277
|
+
# Access the attributes as a dictionary.
|
278
|
+
# Please note that this method may cause a complete parse of a feed.
|
279
|
+
# This will be very slow.
|
280
|
+
def to_hash
|
281
|
+
attribute_hash = {}
|
282
|
+
for key in keys
|
283
|
+
value = self[key]
|
284
|
+
if value.respond_to? :to_hash
|
285
|
+
value = value.to_hash
|
286
|
+
end
|
287
|
+
if value.respond_to? :to_ary
|
288
|
+
new_value = []
|
289
|
+
for item in value.to_ary
|
290
|
+
if item.respond_to? :to_hash
|
291
|
+
new_value << item.to_hash
|
292
|
+
else
|
293
|
+
new_value << item
|
294
|
+
end
|
295
|
+
end
|
296
|
+
value = new_value
|
297
|
+
end
|
298
|
+
attribute_hash[key] = value
|
299
|
+
end
|
300
|
+
return attribute_hash
|
240
301
|
end
|
241
302
|
end
|
242
303
|
|
@@ -278,6 +339,22 @@ module FeedTools
|
|
278
339
|
@feed_cache = new_feed_cache
|
279
340
|
end
|
280
341
|
|
342
|
+
# Returns true if FeedTools should only retrieve from the cache and avoid
|
343
|
+
# pulling feeds from their remote location.
|
344
|
+
def FeedTools.cache_only?
|
345
|
+
@cache_only = false if @cache_only.nil?
|
346
|
+
return @cache_only
|
347
|
+
end
|
348
|
+
|
349
|
+
# Sets whether or not FeedTools should retrieve feeds from remote locations
|
350
|
+
# or if it should rely on the cache only.
|
351
|
+
def FeedTools.cache_only=(new_cache_only)
|
352
|
+
if new_cache_only != true && new_cache_only != false
|
353
|
+
raise ArgumentError, "Must be either true or false."
|
354
|
+
end
|
355
|
+
@cache_only = new_cache_only
|
356
|
+
end
|
357
|
+
|
281
358
|
# Returns true if FeedTools.feed_cache is not nil and a connection with
|
282
359
|
# the cache has been successfully established. Also returns false if an
|
283
360
|
# error is raised while trying to determine the status of the cache.
|
@@ -613,37 +690,29 @@ module FeedTools
|
|
613
690
|
end
|
614
691
|
|
615
692
|
class Feed
|
616
|
-
include REXML
|
693
|
+
include REXML # :nodoc:
|
617
694
|
include AttributeDictionary
|
618
695
|
|
619
696
|
# Represents a feed/feed item's category
|
620
697
|
class Category
|
698
|
+
include AttributeDictionary
|
699
|
+
|
621
700
|
# The category term value
|
622
701
|
attr_accessor :term
|
623
702
|
# The categorization scheme
|
624
703
|
attr_accessor :scheme
|
625
704
|
# A human-readable description of the category
|
626
705
|
attr_accessor :label
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
self.term.send(msg, params)
|
632
|
-
end
|
633
|
-
|
634
|
-
# Relays the to_s method to the term field
|
635
|
-
def to_s
|
636
|
-
self.term.to_s
|
637
|
-
end
|
638
|
-
|
639
|
-
# Relays the inspect method to the term field
|
640
|
-
def inspect
|
641
|
-
self.term.inspect
|
642
|
-
end
|
706
|
+
|
707
|
+
alias_method :value, :term
|
708
|
+
alias_method :category, :term
|
709
|
+
alias_method :domain, :scheme
|
643
710
|
end
|
644
711
|
|
645
712
|
# Represents a feed/feed item's author
|
646
713
|
class Author
|
714
|
+
include AttributeDictionary
|
715
|
+
|
647
716
|
# The author's real name
|
648
717
|
attr_accessor :name
|
649
718
|
# The author's email address
|
@@ -652,26 +721,51 @@ module FeedTools
|
|
652
721
|
attr_accessor :url
|
653
722
|
# The raw value of the author tag if present
|
654
723
|
attr_accessor :raw
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
end
|
661
|
-
|
662
|
-
# Relays the to_s method to the name field
|
663
|
-
def to_s
|
664
|
-
self.name.to_s
|
665
|
-
end
|
724
|
+
end
|
725
|
+
|
726
|
+
# Represents a feed's image
|
727
|
+
class Image
|
728
|
+
include AttributeDictionary
|
666
729
|
|
667
|
-
#
|
668
|
-
|
669
|
-
|
670
|
-
|
730
|
+
# The image's title
|
731
|
+
attr_accessor :title
|
732
|
+
# The image's description
|
733
|
+
attr_accessor :description
|
734
|
+
# The image's url
|
735
|
+
attr_accessor :url
|
736
|
+
# The url to link the image to
|
737
|
+
attr_accessor :link
|
738
|
+
# The width of the image
|
739
|
+
attr_accessor :width
|
740
|
+
# The height of the image
|
741
|
+
attr_accessor :height
|
742
|
+
# The style of the image
|
743
|
+
# Possible values are "icon", "image", or "image-wide"
|
744
|
+
attr_accessor :style
|
745
|
+
end
|
746
|
+
|
747
|
+
# Represents a feed's text input element.
|
748
|
+
# Be aware that this will be ignored for feed generation. It's a
|
749
|
+
# pointless element that aggregators usually ignore and it doesn't have an
|
750
|
+
# equivalent in all feeds types.
|
751
|
+
class TextInput
|
752
|
+
include AttributeDictionary
|
753
|
+
|
754
|
+
# The label of the Submit button in the text input area.
|
755
|
+
attr_accessor :title
|
756
|
+
# The description explains the text input area.
|
757
|
+
attr_accessor :description
|
758
|
+
# The URL of the CGI script that processes text input requests.
|
759
|
+
attr_accessor :link
|
760
|
+
# The name of the text object in the text input area.
|
761
|
+
attr_accessor :name
|
671
762
|
end
|
672
763
|
|
673
764
|
# Represents a feed's cloud.
|
765
|
+
# Be aware that this will be ignored for feed generation.
|
674
766
|
class Cloud
|
767
|
+
include AttributeDictionary
|
768
|
+
|
675
769
|
# The domain of the cloud.
|
676
770
|
attr_accessor :domain
|
677
771
|
# The path for the cloud.
|
@@ -684,7 +778,19 @@ module FeedTools
|
|
684
778
|
# The procedure to use to request notification.
|
685
779
|
attr_accessor :register_procedure
|
686
780
|
end
|
687
|
-
|
781
|
+
|
782
|
+
# Represents a simple hyperlink
|
783
|
+
class Link
|
784
|
+
include AttributeDictionary
|
785
|
+
|
786
|
+
# The url that is being linked to
|
787
|
+
attr_accessor :url
|
788
|
+
# The content of the hyperlink
|
789
|
+
attr_accessor :value
|
790
|
+
|
791
|
+
alias_method :href, :url
|
792
|
+
end
|
793
|
+
|
688
794
|
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
|
689
795
|
def Feed.open(url)
|
690
796
|
# clean up the url
|
@@ -693,19 +799,19 @@ module FeedTools
|
|
693
799
|
# create and load the new feed
|
694
800
|
feed = Feed.new
|
695
801
|
feed.url = url
|
696
|
-
feed.update
|
802
|
+
feed.update!
|
697
803
|
return feed
|
698
804
|
end
|
699
805
|
|
700
806
|
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
701
807
|
# retrieved from the cache for some reason.
|
702
|
-
def update
|
808
|
+
def update!
|
703
809
|
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
704
810
|
!(self.cache_object.http_headers.nil?)
|
705
811
|
@http_headers = YAML.load(self.cache_object.http_headers)
|
706
812
|
end
|
707
|
-
if expired?
|
708
|
-
load_remote_feed
|
813
|
+
if expired? && !FeedTools.cache_only?
|
814
|
+
load_remote_feed!
|
709
815
|
else
|
710
816
|
@live = false
|
711
817
|
end
|
@@ -715,7 +821,7 @@ module FeedTools
|
|
715
821
|
# field to be set. If an etag or the last_modified date has been set,
|
716
822
|
# attempts to use them to prevent unnecessary reloading of identical
|
717
823
|
# content.
|
718
|
-
def load_remote_feed
|
824
|
+
def load_remote_feed!
|
719
825
|
@live = true
|
720
826
|
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
721
827
|
!(self.cache_object.http_headers.nil?)
|
@@ -970,6 +1076,10 @@ module FeedTools
|
|
970
1076
|
# Returns the root node of the feed.
|
971
1077
|
def root_node
|
972
1078
|
if @root_node.nil?
|
1079
|
+
# TODO: Fix this so that added content at the end of the file doesn't
|
1080
|
+
# break this stuff.
|
1081
|
+
# E.g.: http://smogzer.tripod.com/smog.rdf
|
1082
|
+
# ===================================================================
|
973
1083
|
@root_node = xml.root
|
974
1084
|
end
|
975
1085
|
return @root_node
|
@@ -979,6 +1089,9 @@ module FeedTools
|
|
979
1089
|
def channel_node
|
980
1090
|
if @channel_node.nil?
|
981
1091
|
@channel_node = XPath.first(root_node, "channel")
|
1092
|
+
if @channel_node == nil
|
1093
|
+
@channel_node = XPath.first(root_node, "CHANNEL")
|
1094
|
+
end
|
982
1095
|
if @channel_node == nil
|
983
1096
|
@channel_node = XPath.first(root_node, "feedinfo")
|
984
1097
|
end
|
@@ -1039,6 +1152,8 @@ module FeedTools
|
|
1039
1152
|
@feed_type = "atom"
|
1040
1153
|
when "rdf:rdf"
|
1041
1154
|
@feed_type = "rss"
|
1155
|
+
when "rdf"
|
1156
|
+
@feed_type = "rss"
|
1042
1157
|
when "rss"
|
1043
1158
|
@feed_type = "rss"
|
1044
1159
|
when "channel"
|
@@ -1053,6 +1168,58 @@ module FeedTools
|
|
1053
1168
|
@feed_type = new_feed_type
|
1054
1169
|
end
|
1055
1170
|
|
1171
|
+
# Returns the version number of the feed type.
|
1172
|
+
# Intentionally does not differentiate between the Netscape and Userland
|
1173
|
+
# versions of RSS 0.91.
|
1174
|
+
def feed_version
|
1175
|
+
if @feed_version.nil?
|
1176
|
+
version = nil
|
1177
|
+
begin
|
1178
|
+
version = XPath.first(root_node, "@version").to_s.strip.to_f
|
1179
|
+
rescue
|
1180
|
+
end
|
1181
|
+
version = nil if version == 0.0
|
1182
|
+
default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
|
1183
|
+
case self.feed_type
|
1184
|
+
when "atom"
|
1185
|
+
if default_namespace == "http://www.w3.org/2005/Atom"
|
1186
|
+
@feed_version = 1.0
|
1187
|
+
elsif version != nil
|
1188
|
+
@feed_version = version
|
1189
|
+
elsif default_namespace == "http://purl.org/atom/ns#"
|
1190
|
+
@feed_version = 0.3
|
1191
|
+
end
|
1192
|
+
when "rss"
|
1193
|
+
if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
|
1194
|
+
@feed_version = 0.9
|
1195
|
+
elsif default_namespace == "http://purl.org/rss/1.0/"
|
1196
|
+
@feed_version = 1.0
|
1197
|
+
elsif default_namespace == "http://purl.org/net/rss1.1#"
|
1198
|
+
@feed_version = 1.1
|
1199
|
+
elsif version != nil
|
1200
|
+
case version
|
1201
|
+
when 2.1
|
1202
|
+
@feed_version = 2.0
|
1203
|
+
when 2.01
|
1204
|
+
@feed_version = 2.0
|
1205
|
+
else
|
1206
|
+
@feed_version = version
|
1207
|
+
end
|
1208
|
+
end
|
1209
|
+
when "cdf"
|
1210
|
+
@feed_version = 0.4
|
1211
|
+
when "!okay/news"
|
1212
|
+
@feed_version = nil
|
1213
|
+
end
|
1214
|
+
end
|
1215
|
+
return @feed_version
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
# Sets the default feed version
|
1219
|
+
def feed_version=(new_feed_version)
|
1220
|
+
@feed_version = new_feed_version
|
1221
|
+
end
|
1222
|
+
|
1056
1223
|
# Returns the feed's unique id
|
1057
1224
|
def id
|
1058
1225
|
if @id.nil?
|
@@ -1089,19 +1256,27 @@ module FeedTools
|
|
1089
1256
|
def title
|
1090
1257
|
if @title.nil?
|
1091
1258
|
repair_entities = false
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1259
|
+
title_node = XPath.first(channel_node, "title")
|
1260
|
+
if title_node.nil?
|
1261
|
+
title_node = XPath.first(channel_node, "dc:title")
|
1262
|
+
end
|
1263
|
+
if title_node.nil?
|
1264
|
+
title_node = XPath.first(channel_node, "TITLE")
|
1265
|
+
end
|
1266
|
+
if title_node.nil?
|
1267
|
+
return nil
|
1268
|
+
end
|
1269
|
+
if XPath.first(title_node, "@type").to_s == "xhtml" ||
|
1270
|
+
XPath.first(title_node, "@mode").to_s == "xhtml" ||
|
1271
|
+
XPath.first(title_node, "@type").to_s == "xml" ||
|
1272
|
+
XPath.first(title_node, "@mode").to_s == "xml" ||
|
1273
|
+
XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
|
1274
|
+
@title = title_node.inner_xml
|
1275
|
+
elsif XPath.first(title_node, "@type").to_s == "escaped" ||
|
1276
|
+
XPath.first(title_node, "@mode").to_s == "escaped"
|
1101
1277
|
@title = FeedTools.unescape_entities(
|
1102
|
-
XPath.first(
|
1278
|
+
XPath.first(title_node, "text()").to_s)
|
1103
1279
|
else
|
1104
|
-
title_node = XPath.first(channel_node, "title")
|
1105
1280
|
@title = title_node.inner_xml
|
1106
1281
|
repair_entities = true
|
1107
1282
|
end
|
@@ -1141,6 +1316,9 @@ module FeedTools
|
|
1141
1316
|
if description_node.nil?
|
1142
1317
|
description_node = XPath.first(channel_node, "abstract")
|
1143
1318
|
end
|
1319
|
+
if description_node.nil?
|
1320
|
+
description_node = XPath.first(channel_node, "ABSTRACT")
|
1321
|
+
end
|
1144
1322
|
if description_node.nil?
|
1145
1323
|
description_node = XPath.first(channel_node, "info")
|
1146
1324
|
end
|
@@ -1160,6 +1338,9 @@ module FeedTools
|
|
1160
1338
|
description_node = XPath.first(channel_node, "body")
|
1161
1339
|
@bozo = true unless description_node.nil?
|
1162
1340
|
end
|
1341
|
+
if description_node.nil?
|
1342
|
+
return nil
|
1343
|
+
end
|
1163
1344
|
unless description_node.nil?
|
1164
1345
|
if XPath.first(description_node, "@encoding").to_s != ""
|
1165
1346
|
@description =
|
@@ -1195,7 +1376,6 @@ module FeedTools
|
|
1195
1376
|
@description = FeedTools.tidy_html(@description)
|
1196
1377
|
end
|
1197
1378
|
|
1198
|
-
@description.gsub!(/\n/, " ") if @description.size < 80
|
1199
1379
|
@description = @description.strip unless @description.nil?
|
1200
1380
|
@description = nil if @description == ""
|
1201
1381
|
end
|
@@ -1263,6 +1443,15 @@ module FeedTools
|
|
1263
1443
|
if @link == ""
|
1264
1444
|
@link = XPath.first(channel_node, "@href").to_s
|
1265
1445
|
end
|
1446
|
+
if @link == ""
|
1447
|
+
@link = XPath.first(channel_node, "@HREF").to_s
|
1448
|
+
end
|
1449
|
+
if @link == ""
|
1450
|
+
@link = XPath.first(channel_node, "a/@href").to_s
|
1451
|
+
end
|
1452
|
+
if @link == ""
|
1453
|
+
@link = XPath.first(channel_node, "A/@HREF").to_s
|
1454
|
+
end
|
1266
1455
|
if @link == ""
|
1267
1456
|
if FeedTools.is_url? self.guid
|
1268
1457
|
@link = self.guid
|
@@ -1290,39 +1479,6 @@ module FeedTools
|
|
1290
1479
|
end
|
1291
1480
|
end
|
1292
1481
|
|
1293
|
-
# Returns the feed image link
|
1294
|
-
def image_link
|
1295
|
-
if @image_link.nil?
|
1296
|
-
# get the feed image link from the xml document
|
1297
|
-
@image_link = XPath.first(channel_node, "image/url/text()").to_s
|
1298
|
-
if @image_link == ""
|
1299
|
-
@image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
|
1300
|
-
end
|
1301
|
-
if @image_link == ""
|
1302
|
-
@image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
|
1303
|
-
end
|
1304
|
-
if @image_link == ""
|
1305
|
-
@image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
|
1306
|
-
end
|
1307
|
-
if @image_link == ""
|
1308
|
-
@image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
|
1309
|
-
end
|
1310
|
-
if @image_link == ""
|
1311
|
-
@image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
|
1312
|
-
end
|
1313
|
-
if @image_link == ""
|
1314
|
-
@image_link = XPath.first(channel_node, "logo/@href").to_s
|
1315
|
-
end
|
1316
|
-
@image_link = FeedTools.normalize_url(@image_link)
|
1317
|
-
end
|
1318
|
-
return @image_link
|
1319
|
-
end
|
1320
|
-
|
1321
|
-
# Sets the feed image link
|
1322
|
-
def image_link=(new_image_link)
|
1323
|
-
@image_link = new_image_link
|
1324
|
-
end
|
1325
|
-
|
1326
1482
|
# Returns the url to the icon file for this feed.
|
1327
1483
|
#
|
1328
1484
|
# This method uses the url from the link field in order to avoid grabbing
|
@@ -1348,10 +1504,19 @@ module FeedTools
|
|
1348
1504
|
"icon/text()").to_s
|
1349
1505
|
end
|
1350
1506
|
if @icon_link == ""
|
1507
|
+
@icon_link = XPath.first(channel_node,
|
1508
|
+
"logo[@style='icon']/@href").to_s
|
1509
|
+
end
|
1510
|
+
if @icon_link == ""
|
1511
|
+
@icon_link = XPath.first(channel_node,
|
1512
|
+
"LOGO[@STYLE='ICON']/@HREF").to_s
|
1513
|
+
end
|
1514
|
+
if @icon_link == "" && self.link != nil && self.link != ""
|
1351
1515
|
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1352
1516
|
@icon_link =
|
1353
1517
|
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
1354
1518
|
end
|
1519
|
+
icon_link = nil if icon_link == ""
|
1355
1520
|
end
|
1356
1521
|
return @icon_link
|
1357
1522
|
end
|
@@ -1375,6 +1540,10 @@ module FeedTools
|
|
1375
1540
|
@author.raw = FeedTools.unescape_entities(
|
1376
1541
|
XPath.first(channel_node, "dc:author/text()").to_s)
|
1377
1542
|
end
|
1543
|
+
if @author.raw == ""
|
1544
|
+
@author.raw = FeedTools.unescape_entities(
|
1545
|
+
XPath.first(channel_node, "managingEditor/text()").to_s)
|
1546
|
+
end
|
1378
1547
|
unless @author.raw == ""
|
1379
1548
|
raw_scan = @author.raw.scan(
|
1380
1549
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1456,6 +1625,10 @@ module FeedTools
|
|
1456
1625
|
# Set the author name
|
1457
1626
|
@publisher.raw = FeedTools.unescape_entities(
|
1458
1627
|
XPath.first(channel_node, "dc:publisher/text()").to_s)
|
1628
|
+
if @publisher.raw == ""
|
1629
|
+
@publisher.raw = FeedTools.unescape_entities(
|
1630
|
+
XPath.first(channel_node, "webMaster/text()").to_s)
|
1631
|
+
end
|
1459
1632
|
unless @publisher.raw == ""
|
1460
1633
|
raw_scan = @publisher.raw.scan(
|
1461
1634
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1529,6 +1702,119 @@ module FeedTools
|
|
1529
1702
|
return @itunes_author
|
1530
1703
|
end
|
1531
1704
|
|
1705
|
+
# Returns a list of the feed's categories
|
1706
|
+
def categories
|
1707
|
+
if @categories.nil?
|
1708
|
+
@categories = []
|
1709
|
+
category_nodes = XPath.match(channel_node, "category")
|
1710
|
+
if category_nodes.nil? || category_nodes.empty?
|
1711
|
+
category_nodes = XPath.match(channel_node, "dc:subject")
|
1712
|
+
end
|
1713
|
+
unless category_nodes.nil?
|
1714
|
+
for category_node in category_nodes
|
1715
|
+
category = FeedTools::Feed::Category.new
|
1716
|
+
category.term = XPath.first(category_node, "@term").to_s
|
1717
|
+
if category.term == ""
|
1718
|
+
category.term = XPath.first(category_node, "text()").to_s
|
1719
|
+
end
|
1720
|
+
category.term.strip! unless category.term.nil?
|
1721
|
+
category.term = nil if category.term == ""
|
1722
|
+
category.label = XPath.first(category_node, "@label").to_s
|
1723
|
+
category.label.strip! unless category.label.nil?
|
1724
|
+
category.label = nil if category.label == ""
|
1725
|
+
category.scheme = XPath.first(category_node, "@scheme").to_s
|
1726
|
+
if category.scheme == ""
|
1727
|
+
category.scheme = XPath.first(category_node, "@domain").to_s
|
1728
|
+
end
|
1729
|
+
category.scheme.strip! unless category.scheme.nil?
|
1730
|
+
category.scheme = nil if category.scheme == ""
|
1731
|
+
@categories << category
|
1732
|
+
end
|
1733
|
+
end
|
1734
|
+
end
|
1735
|
+
return @categories
|
1736
|
+
end
|
1737
|
+
|
1738
|
+
# Returns a list of the feed's images
|
1739
|
+
def images
|
1740
|
+
if @images.nil?
|
1741
|
+
@images = []
|
1742
|
+
image_nodes = XPath.match(channel_node, "image")
|
1743
|
+
if image_nodes.nil? || image_nodes.empty?
|
1744
|
+
image_nodes = XPath.match(channel_node, "link")
|
1745
|
+
end
|
1746
|
+
if image_nodes.nil? || image_nodes.empty?
|
1747
|
+
image_nodes = XPath.match(channel_node, "logo")
|
1748
|
+
end
|
1749
|
+
if image_nodes.nil? || image_nodes.empty?
|
1750
|
+
image_nodes = XPath.match(channel_node, "LOGO")
|
1751
|
+
end
|
1752
|
+
unless image_nodes.nil?
|
1753
|
+
for image_node in image_nodes
|
1754
|
+
image = FeedTools::Feed::Image.new
|
1755
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
1756
|
+
if image.url == ""
|
1757
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
1758
|
+
end
|
1759
|
+
if image.url == "" && (image_node.name == "logo" ||
|
1760
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
1761
|
+
image.url = XPath.first(image_node, "@href").to_s
|
1762
|
+
end
|
1763
|
+
if image.url == "" && image_node.name == "LOGO"
|
1764
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
1765
|
+
end
|
1766
|
+
image.url.strip! unless image.url.nil?
|
1767
|
+
image.url = nil if image.url == ""
|
1768
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
1769
|
+
image.title.strip! unless image.title.nil?
|
1770
|
+
image.title = nil if image.title == ""
|
1771
|
+
image.description =
|
1772
|
+
XPath.first(image_node, "description/text()").to_s
|
1773
|
+
image.description.strip! unless image.description.nil?
|
1774
|
+
image.description = nil if image.description == ""
|
1775
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
1776
|
+
image.link.strip! unless image.link.nil?
|
1777
|
+
image.link = nil if image.link == ""
|
1778
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1779
|
+
image.height = nil if image.height <= 0
|
1780
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1781
|
+
image.width = nil if image.width <= 0
|
1782
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1783
|
+
if image.style == ""
|
1784
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1785
|
+
end
|
1786
|
+
image.style.strip! unless image.style.nil?
|
1787
|
+
image.style = nil if image.style == ""
|
1788
|
+
@images << image
|
1789
|
+
end
|
1790
|
+
end
|
1791
|
+
end
|
1792
|
+
return @images
|
1793
|
+
end
|
1794
|
+
|
1795
|
+
# Returns the feed's text input field
|
1796
|
+
def text_input
|
1797
|
+
if @text_input.nil?
|
1798
|
+
@text_input = FeedTools::Feed::TextInput.new
|
1799
|
+
text_input_node = XPath.first(channel_node, "textInput")
|
1800
|
+
unless text_input_node.nil?
|
1801
|
+
@text_input.title =
|
1802
|
+
XPath.first(text_input_node, "title/text()").to_s
|
1803
|
+
@text_input.title = nil if @text_input.title == ""
|
1804
|
+
@text_input.description =
|
1805
|
+
XPath.first(text_input_node, "description/text()").to_s
|
1806
|
+
@text_input.description = nil if @text_input.description == ""
|
1807
|
+
@text_input.link =
|
1808
|
+
XPath.first(text_input_node, "link/text()").to_s
|
1809
|
+
@text_input.link = nil if @text_input.link == ""
|
1810
|
+
@text_input.name =
|
1811
|
+
XPath.first(text_input_node, "name/text()").to_s
|
1812
|
+
@text_input.name = nil if @text_input.name == ""
|
1813
|
+
end
|
1814
|
+
end
|
1815
|
+
return @text_input
|
1816
|
+
end
|
1817
|
+
|
1532
1818
|
# Returns the feed's copyright information
|
1533
1819
|
def copyright
|
1534
1820
|
if @copyright.nil?
|
@@ -1696,7 +1982,7 @@ module FeedTools
|
|
1696
1982
|
end
|
1697
1983
|
|
1698
1984
|
# Returns true if this feed contains explicit material.
|
1699
|
-
def explicit
|
1985
|
+
def explicit?
|
1700
1986
|
if @explicit.nil?
|
1701
1987
|
if XPath.first(channel_node,
|
1702
1988
|
"media:adult/text()").to_s.downcase == "true" ||
|
@@ -1724,9 +2010,18 @@ module FeedTools
|
|
1724
2010
|
if raw_items == nil || raw_items == []
|
1725
2011
|
raw_items = XPath.match(channel_node, "item")
|
1726
2012
|
end
|
2013
|
+
if raw_items == nil || raw_items == []
|
2014
|
+
raw_items = XPath.match(channel_node, "ITEM")
|
2015
|
+
end
|
2016
|
+
if raw_items == nil || raw_items == []
|
2017
|
+
raw_items = XPath.match(root_node, "ITEM")
|
2018
|
+
end
|
1727
2019
|
if raw_items == nil || raw_items == []
|
1728
2020
|
raw_items = XPath.match(channel_node, "entry")
|
1729
2021
|
end
|
2022
|
+
if raw_items == nil || raw_items == []
|
2023
|
+
raw_items = XPath.match(root_node, "entry")
|
2024
|
+
end
|
1730
2025
|
|
1731
2026
|
# create the individual feed items
|
1732
2027
|
@items = []
|
@@ -1767,7 +2062,7 @@ module FeedTools
|
|
1767
2062
|
# True if this feed contains audio content enclosures
|
1768
2063
|
def podcast?
|
1769
2064
|
podcast = false
|
1770
|
-
|
2065
|
+
self.items.each do |item|
|
1771
2066
|
item.enclosures.each do |enclosure|
|
1772
2067
|
podcast = true if enclosure.audio?
|
1773
2068
|
end
|
@@ -1778,7 +2073,7 @@ module FeedTools
|
|
1778
2073
|
# True if this feed contains video content enclosures
|
1779
2074
|
def vidlog?
|
1780
2075
|
vidlog = false
|
1781
|
-
|
2076
|
+
self.items.each do |item|
|
1782
2077
|
item.enclosures.each do |enclosure|
|
1783
2078
|
vidlog = true if enclosure.video?
|
1784
2079
|
end
|
@@ -1805,7 +2100,7 @@ module FeedTools
|
|
1805
2100
|
end
|
1806
2101
|
|
1807
2102
|
# Forces this feed to expire.
|
1808
|
-
def expire
|
2103
|
+
def expire!
|
1809
2104
|
self.last_retrieved = Time.mktime(1970)
|
1810
2105
|
self.save
|
1811
2106
|
end
|
@@ -1817,11 +2112,11 @@ module FeedTools
|
|
1817
2112
|
end
|
1818
2113
|
|
1819
2114
|
# Generates xml based on the content of the feed
|
1820
|
-
def build_xml(feed_type=(self.feed_type or "rss"), version=
|
2115
|
+
def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
|
1821
2116
|
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1822
|
-
if feed_type == "rss" && version == 0.0
|
2117
|
+
if feed_type == "rss" && (version == nil || version == 0.0)
|
1823
2118
|
version = 1.0
|
1824
|
-
elsif feed_type == "atom" && version == 0.0
|
2119
|
+
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1825
2120
|
version = 0.3
|
1826
2121
|
end
|
1827
2122
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
@@ -1838,8 +2133,9 @@ module FeedTools
|
|
1838
2133
|
else
|
1839
2134
|
xml_builder.link
|
1840
2135
|
end
|
1841
|
-
unless
|
1842
|
-
xml_builder.image("rdf:resource" => CGI.escapeHTML(
|
2136
|
+
unless images.nil? || images.empty?
|
2137
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(
|
2138
|
+
images.first.url))
|
1843
2139
|
end
|
1844
2140
|
unless description.nil? || description == ""
|
1845
2141
|
xml_builder.description(description)
|
@@ -1866,18 +2162,30 @@ module FeedTools
|
|
1866
2162
|
end
|
1867
2163
|
build_xml_hook(feed_type, version, xml_builder)
|
1868
2164
|
end
|
1869
|
-
unless
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
2165
|
+
unless images.nil? || images.empty?
|
2166
|
+
best_image = nil
|
2167
|
+
for image in self.images
|
2168
|
+
if image.link != nil
|
2169
|
+
best_image = image
|
2170
|
+
break
|
2171
|
+
end
|
2172
|
+
end
|
2173
|
+
best_image = images.first if best_image.nil?
|
2174
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
|
2175
|
+
if best_image.title != nil && best_image.title != ""
|
2176
|
+
xml_builder.title(best_image.title)
|
2177
|
+
elsif self.title != nil && self.title != ""
|
2178
|
+
xml_builder.title(self.title)
|
1873
2179
|
else
|
1874
2180
|
xml_builder.title
|
1875
2181
|
end
|
1876
|
-
unless
|
1877
|
-
xml_builder.url(
|
2182
|
+
unless best_image.url.nil? || best_image.url == ""
|
2183
|
+
xml_builder.url(best_image.url)
|
1878
2184
|
end
|
1879
|
-
|
1880
|
-
xml_builder.link(link)
|
2185
|
+
if best_image.link != nil && best_image.link != ""
|
2186
|
+
xml_builder.link(best_image.link)
|
2187
|
+
elsif self.link != nil && self.link != ""
|
2188
|
+
xml_builder.link(self.link)
|
1881
2189
|
else
|
1882
2190
|
xml_builder.link
|
1883
2191
|
end
|
@@ -1891,7 +2199,7 @@ module FeedTools
|
|
1891
2199
|
end
|
1892
2200
|
elsif feed_type == "rss"
|
1893
2201
|
# normal rss format
|
1894
|
-
return xml_builder.rss("version" => version
|
2202
|
+
return xml_builder.rss("version" => version) do
|
1895
2203
|
unless title.nil? || title == ""
|
1896
2204
|
xml_builder.title(title)
|
1897
2205
|
end
|
@@ -1913,7 +2221,7 @@ module FeedTools
|
|
1913
2221
|
elsif feed_type == "atom"
|
1914
2222
|
# normal atom format
|
1915
2223
|
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
1916
|
-
"version" => version
|
2224
|
+
"version" => version,
|
1917
2225
|
"xml:lang" => language) do
|
1918
2226
|
unless title.nil? || title == ""
|
1919
2227
|
xml_builder.title(title,
|
@@ -2112,12 +2420,18 @@ module FeedTools
|
|
2112
2420
|
end
|
2113
2421
|
return false
|
2114
2422
|
end
|
2423
|
+
|
2424
|
+
alias_method :link, :url
|
2425
|
+
alias_method :link=, :url=
|
2115
2426
|
end
|
2116
|
-
|
2427
|
+
|
2428
|
+
# TODO: Make these actual classes instead of structs
|
2429
|
+
# ==================================================
|
2117
2430
|
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
|
2118
2431
|
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
|
2119
2432
|
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
|
2120
|
-
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
|
2433
|
+
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
|
2434
|
+
:width )
|
2121
2435
|
|
2122
2436
|
# Returns the parent feed of this feed item
|
2123
2437
|
def feed
|
@@ -2186,19 +2500,27 @@ module FeedTools
|
|
2186
2500
|
def title
|
2187
2501
|
if @title.nil?
|
2188
2502
|
repair_entities = false
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2503
|
+
title_node = XPath.first(root_node, "title")
|
2504
|
+
if title_node.nil?
|
2505
|
+
title_node = XPath.first(root_node, "dc:title")
|
2506
|
+
end
|
2507
|
+
if title_node.nil?
|
2508
|
+
title_node = XPath.first(root_node, "TITLE")
|
2509
|
+
end
|
2510
|
+
if title_node.nil?
|
2511
|
+
return nil
|
2512
|
+
end
|
2513
|
+
if XPath.first(title_node, "@type").to_s == "xhtml" ||
|
2514
|
+
XPath.first(title_node, "@mode").to_s == "xhtml" ||
|
2515
|
+
XPath.first(title_node, "@type").to_s == "xml" ||
|
2516
|
+
XPath.first(title_node, "@mode").to_s == "xml" ||
|
2517
|
+
XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
|
2518
|
+
@title = title_node.inner_xml
|
2519
|
+
elsif XPath.first(title_node, "@type").to_s == "escaped" ||
|
2520
|
+
XPath.first(title_node, "@mode").to_s == "escaped"
|
2198
2521
|
@title = FeedTools.unescape_entities(
|
2199
|
-
XPath.first(
|
2522
|
+
XPath.first(title_node, "text()").to_s)
|
2200
2523
|
else
|
2201
|
-
title_node = XPath.first(root_node, "title")
|
2202
2524
|
@title = title_node.inner_xml
|
2203
2525
|
repair_entities = true
|
2204
2526
|
end
|
@@ -2252,16 +2574,25 @@ module FeedTools
|
|
2252
2574
|
if description_node.nil?
|
2253
2575
|
description_node = XPath.first(root_node, "abstract")
|
2254
2576
|
end
|
2577
|
+
if description_node.nil?
|
2578
|
+
description_node = XPath.first(root_node, "ABSTRACT")
|
2579
|
+
end
|
2255
2580
|
if description_node.nil?
|
2256
2581
|
description_node = XPath.first(root_node, "content:encoded")
|
2257
2582
|
end
|
2258
2583
|
if description_node.nil?
|
2259
2584
|
description_node = XPath.first(root_node, "content")
|
2260
2585
|
end
|
2586
|
+
if description_node.nil?
|
2587
|
+
description_node = XPath.first(root_node, "fullitem")
|
2588
|
+
end
|
2261
2589
|
if description_node.nil?
|
2262
2590
|
description_node = XPath.first(root_node, "info")
|
2263
2591
|
@bozo = true unless description_node.nil?
|
2264
2592
|
end
|
2593
|
+
if description_node.nil?
|
2594
|
+
return nil
|
2595
|
+
end
|
2265
2596
|
unless description_node.nil?
|
2266
2597
|
if XPath.first(description_node, "@encoding").to_s != ""
|
2267
2598
|
@description =
|
@@ -2297,7 +2628,6 @@ module FeedTools
|
|
2297
2628
|
@description = FeedTools.tidy_html(@description)
|
2298
2629
|
end
|
2299
2630
|
|
2300
|
-
@description.gsub!(/\n/, " ") if @description.size < 80
|
2301
2631
|
@description = @description.strip unless @description.nil?
|
2302
2632
|
@description = nil if @description == ""
|
2303
2633
|
end
|
@@ -2385,6 +2715,18 @@ module FeedTools
|
|
2385
2715
|
if @link == ""
|
2386
2716
|
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
|
2387
2717
|
end
|
2718
|
+
if @link == ""
|
2719
|
+
@link = XPath.first(root_node, "@href").to_s
|
2720
|
+
end
|
2721
|
+
if @link == ""
|
2722
|
+
@link = XPath.first(root_node, "a/@href").to_s
|
2723
|
+
end
|
2724
|
+
if @link == ""
|
2725
|
+
@link = XPath.first(root_node, "@HREF").to_s
|
2726
|
+
end
|
2727
|
+
if @link == ""
|
2728
|
+
@link = XPath.first(root_node, "A/@HREF").to_s
|
2729
|
+
end
|
2388
2730
|
if @link == ""
|
2389
2731
|
if FeedTools.is_url? self.guid
|
2390
2732
|
@link = self.guid
|
@@ -2410,7 +2752,7 @@ module FeedTools
|
|
2410
2752
|
@link = new_link
|
2411
2753
|
end
|
2412
2754
|
|
2413
|
-
# Returns the feed comment link
|
2755
|
+
# Returns the feed item comment link
|
2414
2756
|
def comment_link
|
2415
2757
|
if @comment_link.nil?
|
2416
2758
|
# get the feed comment link from the xml document
|
@@ -2423,64 +2765,104 @@ module FeedTools
|
|
2423
2765
|
return @comment_link
|
2424
2766
|
end
|
2425
2767
|
|
2426
|
-
# Sets the feed comment link
|
2768
|
+
# Sets the feed item comment link
|
2427
2769
|
def comment_link=(new_comment_link)
|
2428
2770
|
@comment_link = new_comment_link
|
2429
2771
|
end
|
2430
2772
|
|
2431
|
-
# Returns the feed
|
2432
|
-
def
|
2433
|
-
if @
|
2434
|
-
|
2435
|
-
|
2436
|
-
|
2437
|
-
|
2438
|
-
|
2439
|
-
|
2440
|
-
|
2441
|
-
|
2442
|
-
|
2773
|
+
# Returns a list of the feed item's categories
|
2774
|
+
def categories
|
2775
|
+
if @categories.nil?
|
2776
|
+
@categories = []
|
2777
|
+
category_nodes = XPath.match(root_node, "category")
|
2778
|
+
if category_nodes.nil? || category_nodes.empty?
|
2779
|
+
category_nodes = XPath.match(root_node, "dc:subject")
|
2780
|
+
end
|
2781
|
+
unless category_nodes.nil?
|
2782
|
+
for category_node in category_nodes
|
2783
|
+
category = FeedTools::Feed::Category.new
|
2784
|
+
category.term = XPath.first(category_node, "@term").to_s
|
2785
|
+
if category.term == ""
|
2786
|
+
category.term = XPath.first(category_node, "text()").to_s
|
2787
|
+
end
|
2788
|
+
category.term.strip! unless category.term.nil?
|
2789
|
+
category.term = nil if category.term == ""
|
2790
|
+
category.label = XPath.first(category_node, "@label").to_s
|
2791
|
+
category.label.strip! unless category.label.nil?
|
2792
|
+
category.label = nil if category.label == ""
|
2793
|
+
category.scheme = XPath.first(category_node, "@scheme").to_s
|
2794
|
+
if category.scheme == ""
|
2795
|
+
category.scheme = XPath.first(category_node, "@domain").to_s
|
2796
|
+
end
|
2797
|
+
category.scheme.strip! unless category.scheme.nil?
|
2798
|
+
category.scheme = nil if category.scheme == ""
|
2799
|
+
@categories << category
|
2800
|
+
end
|
2443
2801
|
end
|
2444
|
-
|
2445
|
-
|
2446
|
-
|
2447
|
-
|
2802
|
+
end
|
2803
|
+
return @categories
|
2804
|
+
end
|
2805
|
+
|
2806
|
+
# Returns a list of the feed items's images
|
2807
|
+
def images
|
2808
|
+
if @images.nil?
|
2809
|
+
@images = []
|
2810
|
+
image_nodes = XPath.match(root_node, "link")
|
2811
|
+
if image_nodes.nil? || image_nodes.empty?
|
2812
|
+
image_nodes = XPath.match(root_node, "logo")
|
2448
2813
|
end
|
2449
|
-
if
|
2450
|
-
|
2814
|
+
if image_nodes.nil? || image_nodes.empty?
|
2815
|
+
image_nodes = XPath.match(root_node, "LOGO")
|
2451
2816
|
end
|
2452
|
-
if
|
2453
|
-
|
2454
|
-
# ideal, but chances are very good that anything that makes use of this image is
|
2455
|
-
# simply not going to care anyhow.
|
2456
|
-
@image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
2457
|
-
if @image_link == ""
|
2458
|
-
@media_image_link = @image_link
|
2459
|
-
end
|
2817
|
+
if image_nodes.nil? || image_nodes.empty?
|
2818
|
+
image_nodes = XPath.match(root_node, "image")
|
2460
2819
|
end
|
2461
|
-
|
2462
|
-
|
2463
|
-
|
2464
|
-
|
2465
|
-
|
2466
|
-
|
2467
|
-
@image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
2820
|
+
unless image_nodes.nil?
|
2821
|
+
for image_node in image_nodes
|
2822
|
+
image = FeedTools::Feed::Image.new
|
2823
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
2824
|
+
if image.url != ""
|
2825
|
+
self.feed.bozo = true
|
2468
2826
|
end
|
2469
|
-
|
2470
|
-
|
2471
|
-
|
2827
|
+
if image.url == ""
|
2828
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
2829
|
+
end
|
2830
|
+
if image.url == "" && (image_node.name == "logo" ||
|
2831
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
2832
|
+
image.url = XPath.first(image_node, "@href").to_s
|
2833
|
+
end
|
2834
|
+
if image.url == "" && image_node.name == "LOGO"
|
2835
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
2836
|
+
end
|
2837
|
+
image.url.strip! unless image.url.nil?
|
2838
|
+
image.url = nil if image.url == ""
|
2839
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
2840
|
+
image.title.strip! unless image.title.nil?
|
2841
|
+
image.title = nil if image.title == ""
|
2842
|
+
image.description =
|
2843
|
+
XPath.first(image_node, "description/text()").to_s
|
2844
|
+
image.description.strip! unless image.description.nil?
|
2845
|
+
image.description = nil if image.description == ""
|
2846
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
2847
|
+
image.link.strip! unless image.link.nil?
|
2848
|
+
image.link = nil if image.link == ""
|
2849
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
2850
|
+
image.height = nil if image.height <= 0
|
2851
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
2852
|
+
image.width = nil if image.width <= 0
|
2853
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
2854
|
+
if image.style == ""
|
2855
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
2856
|
+
end
|
2857
|
+
image.style.strip! unless image.style.nil?
|
2858
|
+
image.style = nil if image.style == ""
|
2859
|
+
@images << image
|
2472
2860
|
end
|
2473
2861
|
end
|
2474
|
-
@image_link = FeedTools.normalize_url(@image_link)
|
2475
2862
|
end
|
2476
|
-
return @
|
2477
|
-
end
|
2478
|
-
|
2479
|
-
# Sets the feed image link
|
2480
|
-
def image_link=(new_image_link)
|
2481
|
-
@image_link = new_image_link
|
2863
|
+
return @images
|
2482
2864
|
end
|
2483
|
-
|
2865
|
+
|
2484
2866
|
# Returns the feed item itunes image link
|
2485
2867
|
#
|
2486
2868
|
# If it's not present, falls back to the normal image link.
|
@@ -2494,9 +2876,6 @@ module FeedTools
|
|
2494
2876
|
if @itunes_image_link == ""
|
2495
2877
|
@itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
2496
2878
|
end
|
2497
|
-
if @itunes_image_link == ""
|
2498
|
-
@itunes_image_link = self.image_link
|
2499
|
-
end
|
2500
2879
|
@itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
|
2501
2880
|
end
|
2502
2881
|
return @itunes_image_link
|
@@ -2514,9 +2893,6 @@ module FeedTools
|
|
2514
2893
|
if @media_thumbnail_link.nil?
|
2515
2894
|
# get the feed item itunes image link from the xml document
|
2516
2895
|
@media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
2517
|
-
if @media_thumbnail_link == ""
|
2518
|
-
@media_thumbnail_link = image_link
|
2519
|
-
end
|
2520
2896
|
@media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
|
2521
2897
|
end
|
2522
2898
|
return @media_thumbnail_link
|
@@ -2527,6 +2903,21 @@ module FeedTools
|
|
2527
2903
|
@media_thumbnail_link = new_media_thumbnail_link
|
2528
2904
|
end
|
2529
2905
|
|
2906
|
+
# Returns the feed item's copyright information
|
2907
|
+
def copyright
|
2908
|
+
if @copyright.nil?
|
2909
|
+
@copyright = XPath.first(root_node, "dc:rights/text()").to_s
|
2910
|
+
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
2911
|
+
@copyright = nil if @copyright == ""
|
2912
|
+
end
|
2913
|
+
return @copyright
|
2914
|
+
end
|
2915
|
+
|
2916
|
+
# Sets the feed item's copyright information
|
2917
|
+
def copyright=(new_copyright)
|
2918
|
+
@copyright = new_copyright
|
2919
|
+
end
|
2920
|
+
|
2530
2921
|
# Returns all feed item enclosures
|
2531
2922
|
def enclosures
|
2532
2923
|
if @enclosures.nil?
|
@@ -2621,11 +3012,13 @@ module FeedTools
|
|
2621
3012
|
end
|
2622
3013
|
enclosure.categories = []
|
2623
3014
|
for category in XPath.match(enclosure_node, "media:category")
|
2624
|
-
enclosure.categories <<
|
2625
|
-
|
2626
|
-
FeedTools.unescape_entities(category.
|
3015
|
+
enclosure.categories << FeedTools::Feed::Category.new
|
3016
|
+
enclosure.categories.last.term =
|
3017
|
+
FeedTools.unescape_entities(category.text)
|
3018
|
+
enclosure.categories.last.scheme =
|
3019
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s)
|
3020
|
+
enclosure.categories.last.label =
|
2627
3021
|
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2628
|
-
)
|
2629
3022
|
if enclosure.categories.last.scheme == ""
|
2630
3023
|
enclosure.categories.last.scheme = nil
|
2631
3024
|
end
|
@@ -2714,11 +3107,13 @@ module FeedTools
|
|
2714
3107
|
if (enclosure.categories.nil? || enclosure.categories.size == 0)
|
2715
3108
|
enclosure.categories = []
|
2716
3109
|
for category in XPath.match(media_group, "media:category")
|
2717
|
-
enclosure.categories <<
|
2718
|
-
|
2719
|
-
FeedTools.unescape_entities(category.
|
3110
|
+
enclosure.categories << FeedTools::Feed::Category.new
|
3111
|
+
enclosure.categories.last.term =
|
3112
|
+
FeedTools.unescape_entities(category.text)
|
3113
|
+
enclosure.categories.last.scheme =
|
3114
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s)
|
3115
|
+
enclosure.categories.last.label =
|
2720
3116
|
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2721
|
-
)
|
2722
3117
|
if enclosure.categories.last.scheme == ""
|
2723
3118
|
enclosure.categories.last.scheme = nil
|
2724
3119
|
end
|
@@ -2880,7 +3275,7 @@ module FeedTools
|
|
2880
3275
|
# if it's missing.
|
2881
3276
|
if @enclosures.size == 1
|
2882
3277
|
if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
|
2883
|
-
@enclosures.first.duration = self.
|
3278
|
+
@enclosures.first.duration = self.itunes_duration
|
2884
3279
|
end
|
2885
3280
|
end
|
2886
3281
|
|
@@ -2910,6 +3305,10 @@ module FeedTools
|
|
2910
3305
|
@author.raw = FeedTools.unescape_entities(
|
2911
3306
|
XPath.first(root_node, "dc:author/text()").to_s)
|
2912
3307
|
end
|
3308
|
+
if @author.raw == ""
|
3309
|
+
@author.raw = FeedTools.unescape_entities(
|
3310
|
+
XPath.first(root_node, "managingEditor/text()").to_s)
|
3311
|
+
end
|
2913
3312
|
unless @author.raw == ""
|
2914
3313
|
raw_scan = @author.raw.scan(
|
2915
3314
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -2983,6 +3382,76 @@ module FeedTools
|
|
2983
3382
|
end
|
2984
3383
|
end
|
2985
3384
|
|
3385
|
+
# Returns the feed publisher
|
3386
|
+
def publisher
|
3387
|
+
if @publisher.nil?
|
3388
|
+
@publisher = FeedTools::Feed::Author.new
|
3389
|
+
|
3390
|
+
# Set the author name
|
3391
|
+
@publisher.raw = FeedTools.unescape_entities(
|
3392
|
+
XPath.first(root_node, "dc:publisher/text()").to_s)
|
3393
|
+
if @publisher.raw == ""
|
3394
|
+
@publisher.raw = FeedTools.unescape_entities(
|
3395
|
+
XPath.first(root_node, "webMaster/text()").to_s)
|
3396
|
+
end
|
3397
|
+
unless @publisher.raw == ""
|
3398
|
+
raw_scan = @publisher.raw.scan(
|
3399
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
3400
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3401
|
+
raw_scan = @publisher.raw.scan(
|
3402
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
3403
|
+
unless raw_scan.size == 0
|
3404
|
+
publisher_raw_pair = raw_scan.first.reverse
|
3405
|
+
end
|
3406
|
+
else
|
3407
|
+
publisher_raw_pair = raw_scan.first
|
3408
|
+
end
|
3409
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3410
|
+
email_scan = @publisher.raw.scan(
|
3411
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
3412
|
+
if email_scan != nil && email_scan.size > 0
|
3413
|
+
@publisher.email = email_scan.first.strip
|
3414
|
+
end
|
3415
|
+
end
|
3416
|
+
unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
|
3417
|
+
@publisher.name = publisher_raw_pair.first.strip
|
3418
|
+
@publisher.email = publisher_raw_pair.last.strip
|
3419
|
+
else
|
3420
|
+
unless @publisher.raw.include?("@")
|
3421
|
+
# We can be reasonably sure we are looking at something
|
3422
|
+
# that the creator didn't intend to contain an email address if
|
3423
|
+
# it got through the preceeding regexes and it doesn't
|
3424
|
+
# contain the tell-tale '@' symbol.
|
3425
|
+
@publisher.name = @publisher.raw
|
3426
|
+
end
|
3427
|
+
end
|
3428
|
+
end
|
3429
|
+
|
3430
|
+
@publisher.name = nil if @publisher.name == ""
|
3431
|
+
@publisher.raw = nil if @publisher.raw == ""
|
3432
|
+
@publisher.email = nil if @publisher.email == ""
|
3433
|
+
@publisher.url = nil if @publisher.url == ""
|
3434
|
+
end
|
3435
|
+
return @publisher
|
3436
|
+
end
|
3437
|
+
|
3438
|
+
# Sets the feed publisher
|
3439
|
+
def publisher=(new_publisher)
|
3440
|
+
if new_publisher.respond_to?(:name) &&
|
3441
|
+
new_publisher.respond_to?(:email) &&
|
3442
|
+
new_publisher.respond_to?(:url)
|
3443
|
+
# It's a complete Author object, just set it.
|
3444
|
+
@publisher = new_publisher
|
3445
|
+
else
|
3446
|
+
# We're not looking at an Author object, this is probably a string,
|
3447
|
+
# default to setting the publisher's name.
|
3448
|
+
if @publisher.nil?
|
3449
|
+
@publisher = FeedTools::Feed::Author.new
|
3450
|
+
end
|
3451
|
+
@publisher.name = new_publisher
|
3452
|
+
end
|
3453
|
+
end
|
3454
|
+
|
2986
3455
|
# Returns the contents of the itunes:author element
|
2987
3456
|
#
|
2988
3457
|
# This inherits from any incorrectly placed channel-level itunes:author
|
@@ -3026,10 +3495,6 @@ module FeedTools
|
|
3026
3495
|
@itunes_duration = new_itunes_duration
|
3027
3496
|
end
|
3028
3497
|
|
3029
|
-
# Sets the itunes:summary
|
3030
|
-
def itunes_summary=(new_itunes_summary)
|
3031
|
-
end
|
3032
|
-
|
3033
3498
|
# Returns the feed item time
|
3034
3499
|
def time
|
3035
3500
|
if @time.nil?
|
@@ -3054,6 +3519,32 @@ module FeedTools
|
|
3054
3519
|
@time = new_time
|
3055
3520
|
end
|
3056
3521
|
|
3522
|
+
# Returns the url for posting comments
|
3523
|
+
def comments
|
3524
|
+
if @comments.nil?
|
3525
|
+
@comments = XPath.first(root_node, "comments/text()").to_s
|
3526
|
+
@comments = nil if @comments == ""
|
3527
|
+
end
|
3528
|
+
return @comments
|
3529
|
+
end
|
3530
|
+
|
3531
|
+
# Sets the url for posting comments
|
3532
|
+
def comments=(new_comments)
|
3533
|
+
@comments = new_comments
|
3534
|
+
end
|
3535
|
+
|
3536
|
+
# The source that this post was based on
|
3537
|
+
def source
|
3538
|
+
if @source.nil?
|
3539
|
+
@source = FeedTools::Feed::Link.new
|
3540
|
+
@source.url = XPath.first(root_node, "source/@url").to_s
|
3541
|
+
@source.url = nil if @source.url == ""
|
3542
|
+
@source.value = XPath.first(root_node, "source/text()").to_s
|
3543
|
+
@source.value = nil if @source.value == ""
|
3544
|
+
end
|
3545
|
+
return @source
|
3546
|
+
end
|
3547
|
+
|
3057
3548
|
# Returns the feed item tags
|
3058
3549
|
def tags
|
3059
3550
|
# TODO: support the rel="tag" microformat
|
@@ -3131,7 +3622,7 @@ module FeedTools
|
|
3131
3622
|
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
3132
3623
|
XPath.first(root_node,
|
3133
3624
|
"itunes:explicit/text()").to_s.downcase == "true" ||
|
3134
|
-
feed.explicit
|
3625
|
+
feed.explicit?
|
3135
3626
|
@explicit = true
|
3136
3627
|
else
|
3137
3628
|
@explicit = false
|
@@ -3152,8 +3643,13 @@ module FeedTools
|
|
3152
3643
|
end
|
3153
3644
|
|
3154
3645
|
# Generates xml based on the content of the feed item
|
3155
|
-
def build_xml(feed_type=(self.feed.feed_type or "rss"), version=
|
3646
|
+
def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
|
3156
3647
|
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
3648
|
+
if feed_type == "rss" && (version == nil || version == 0.0)
|
3649
|
+
version = 1.0
|
3650
|
+
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
3651
|
+
version = 0.3
|
3652
|
+
end
|
3157
3653
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
3158
3654
|
# RDF-based rss format
|
3159
3655
|
if link.nil?
|
@@ -3253,6 +3749,8 @@ module FeedTools
|
|
3253
3749
|
alias_method :tagline=, :description=
|
3254
3750
|
alias_method :subtitle, :description
|
3255
3751
|
alias_method :subtitle=, :description=
|
3752
|
+
alias_method :summary, :description
|
3753
|
+
alias_method :summary=, :description=
|
3256
3754
|
alias_method :abstract, :description
|
3257
3755
|
alias_method :abstract=, :description=
|
3258
3756
|
alias_method :content, :description
|
@@ -3264,12 +3762,26 @@ end
|
|
3264
3762
|
|
3265
3763
|
module REXML # :nodoc:
|
3266
3764
|
class Element # :nodoc:
|
3267
|
-
|
3268
|
-
|
3269
|
-
|
3270
|
-
|
3765
|
+
unless REXML::Element.public_instance_methods.include? :inner_xml
|
3766
|
+
def inner_xml # :nodoc:
|
3767
|
+
result = ""
|
3768
|
+
self.each_child do |child|
|
3769
|
+
result << child.to_s
|
3770
|
+
end
|
3771
|
+
return result
|
3772
|
+
end
|
3773
|
+
end
|
3774
|
+
|
3775
|
+
unless REXML::Element.public_instance_methods.include? :base_uri
|
3776
|
+
def base_uri # :nodoc:
|
3777
|
+
if not attribute('xml:base')
|
3778
|
+
return parent.base_uri
|
3779
|
+
elsif parent
|
3780
|
+
return URI.join(parent.base_uri, attribute('xml:base').value).to_s
|
3781
|
+
else
|
3782
|
+
return (attribute('xml:base').value or '')
|
3783
|
+
end
|
3271
3784
|
end
|
3272
|
-
return result
|
3273
3785
|
end
|
3274
3786
|
end
|
3275
3787
|
end
|