feedtools 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -0
- data/lib/feed_tools.rb +721 -209
- data/rakefile +1 -1
- data/test/cdf_test.rb +121 -0
- data/test/helper_test.rb +3 -1
- data/test/rss_test.rb +480 -52
- metadata +3 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
== FeedTools 0.2.4
|
2
|
+
* fixed bug in the sqlite table creation query
|
3
|
+
* greatly improved image support
|
4
|
+
* improved cdf support (still needs way more work)
|
5
|
+
* support for text input elements
|
6
|
+
* now possible to force retrieval from the cache only
|
7
|
+
* increased the flexibility of the database caching implementation
|
8
|
+
* feed attributes accessible through the keys and values collections
|
9
|
+
* minor coding style changes
|
10
|
+
* fixed really dumb typo in the podcast? and vidlog? methods
|
11
|
+
* fixed exception from missing titles and descriptions
|
12
|
+
* now passes all of mark pilgrim's well-formed rss tests
|
1
13
|
== FeedTools 0.2.3
|
2
14
|
* fixed omission of get parameters from http requests
|
3
15
|
== FeedTools 0.2.2
|
data/lib/feed_tools.rb
CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
25
25
|
ENV['RAILS_ENV'] ||
|
26
26
|
'production' # :nodoc:
|
27
27
|
|
28
|
-
FEED_TOOLS_VERSION = "0.2.
|
28
|
+
FEED_TOOLS_VERSION = "0.2.4"
|
29
29
|
|
30
30
|
$:.unshift(File.dirname(__FILE__))
|
31
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
@@ -75,7 +75,7 @@ require 'yaml'
|
|
75
75
|
# => "News for nerds, stuff that matters"
|
76
76
|
# slashdot_feed.link
|
77
77
|
# => "http://slashdot.org/"
|
78
|
-
# slashdot_feed.items.first.find_node("slash:hitparade/text()").
|
78
|
+
# slashdot_feed.items.first.find_node("slash:hitparade/text()").value
|
79
79
|
# => "43,37,28,23,11,3,1"
|
80
80
|
module FeedTools
|
81
81
|
|
@@ -150,7 +150,7 @@ module FeedTools
|
|
150
150
|
begin
|
151
151
|
ActiveRecord::Base.connection.execute "select id, url, title, " +
|
152
152
|
"link, xml_data, http_headers, last_retrieved " +
|
153
|
-
"from
|
153
|
+
"from #{self.table_name()} limit 1"
|
154
154
|
rescue ActiveRecord::StatementInvalid
|
155
155
|
return false
|
156
156
|
rescue
|
@@ -163,7 +163,7 @@ module FeedTools
|
|
163
163
|
def DatabaseFeedCache.create_table
|
164
164
|
unless DatabaseFeedCache.table_exists?
|
165
165
|
feeds_mysql = <<-SQL_END
|
166
|
-
CREATE TABLE `
|
166
|
+
CREATE TABLE `#{self.table_name()}` (
|
167
167
|
`id` int(10) unsigned NOT NULL auto_increment,
|
168
168
|
`url` varchar(255) default NULL,
|
169
169
|
`title` varchar(255) default NULL,
|
@@ -175,19 +175,18 @@ module FeedTools
|
|
175
175
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
176
176
|
SQL_END
|
177
177
|
feeds_sqlite = <<-SQL_END
|
178
|
-
CREATE TABLE '
|
178
|
+
CREATE TABLE '#{self.table_name()}' (
|
179
179
|
'id' INTEGER PRIMARY KEY NOT NULL,
|
180
180
|
'url' VARCHAR(255) DEFAULT NULL,
|
181
181
|
'title' VARCHAR(255) DEFAULT NULL,
|
182
182
|
'link' VARCHAR(255) DEFAULT NULL,
|
183
|
-
'image_link' VARCHAR(255) DEFAULT NULL,
|
184
183
|
'xml_data' TEXT DEFAULT NULL,
|
185
184
|
'http_headers' TEXT DEFAULT NULL,
|
186
185
|
'last_retrieved' DATETIME DEFAULT NULL,
|
187
186
|
);
|
188
187
|
SQL_END
|
189
188
|
feeds_psql = <<-SQL_END
|
190
|
-
CREATE TABLE
|
189
|
+
CREATE TABLE #{self.table_name()} (
|
191
190
|
id SERIAL PRIMARY KEY NOT NULL,
|
192
191
|
url varchar(255) default NULL,
|
193
192
|
title varchar(255) default NULL,
|
@@ -206,7 +205,7 @@ module FeedTools
|
|
206
205
|
table_creation_sql = feeds_psql
|
207
206
|
end
|
208
207
|
if table_creation_sql.nil?
|
209
|
-
raise "Could not build
|
208
|
+
raise "Could not build #{self.table_name()} table."
|
210
209
|
else
|
211
210
|
connection.execute table_creation_sql
|
212
211
|
end
|
@@ -219,24 +218,86 @@ module FeedTools
|
|
219
218
|
end
|
220
219
|
|
221
220
|
# Quick method of enabling small classes to have their attributes
|
222
|
-
# accessible as a dictionary.
|
223
|
-
|
224
|
-
|
221
|
+
# accessible as a dictionary. These methods should not be used whenever
|
222
|
+
# performance is going to be an issue. They exist almost entirely for the
|
223
|
+
# purposes of aesthetics and/or debugging.
|
224
|
+
module AttributeDictionary
|
225
|
+
# Access the attributes as a dictionary.
|
225
226
|
def [](key)
|
226
|
-
|
227
|
-
# accessed like this.
|
228
|
-
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
|
229
|
-
return nil unless self.method(key).arity == 0
|
227
|
+
return nil unless self.keys.include? key
|
230
228
|
return self.send(key)
|
231
229
|
end
|
232
230
|
|
233
|
-
# Access the attributes as a dictionary
|
231
|
+
# Access the attributes as a dictionary.
|
234
232
|
def []=(key, value)
|
235
|
-
|
236
|
-
|
237
|
-
return nil
|
238
|
-
|
239
|
-
|
233
|
+
pseudo_key = key
|
234
|
+
pseudo_key = key[0..-2] if key[-1..-1] == "?"
|
235
|
+
return nil unless self.method(pseudo_key + "=").arity == 1
|
236
|
+
local_keys = self.keys
|
237
|
+
unless local_keys.include?(key) || local_keys.include?(pseudo_key)
|
238
|
+
return nil
|
239
|
+
end
|
240
|
+
return self.send(pseudo_key + "=", value)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Access the attributes as a dictionary.
|
244
|
+
def keys
|
245
|
+
key_methods = []
|
246
|
+
for key in self.methods
|
247
|
+
# Quick-n-dirty hack to speed things up and keep the list clean
|
248
|
+
if self.method(key).arity == 0 && key[-1..-1] != "=" &&
|
249
|
+
key[-1..-1] != "!" && key[0..1] != "__" &&
|
250
|
+
key[0..2] != "to_" && key[-5..-1] != "_node" &&
|
251
|
+
key != "cache_object" && key != "save" && key != "xml" &&
|
252
|
+
key != "xml_data" && key != "expired?" && key != "live?" &&
|
253
|
+
key != "feed"
|
254
|
+
superfluous_ancestors = self.class.ancestors
|
255
|
+
superfluous_ancestors = superfluous_ancestors[1..-1]
|
256
|
+
superfluous = false
|
257
|
+
for ancestor in superfluous_ancestors
|
258
|
+
if ancestor.instance_methods.include? key
|
259
|
+
superfluous = true
|
260
|
+
break
|
261
|
+
end
|
262
|
+
end
|
263
|
+
next if superfluous
|
264
|
+
key_methods << key
|
265
|
+
end
|
266
|
+
end
|
267
|
+
return key_methods.sort
|
268
|
+
end
|
269
|
+
|
270
|
+
# Access the attributes as a dictionary.
|
271
|
+
# Please note that this method may cause a nearly complete parse of a
|
272
|
+
# feed. This will be very slow.
|
273
|
+
def values
|
274
|
+
return self.keys.map { |key| self[key] }
|
275
|
+
end
|
276
|
+
|
277
|
+
# Access the attributes as a dictionary.
|
278
|
+
# Please note that this method may cause a complete parse of a feed.
|
279
|
+
# This will be very slow.
|
280
|
+
def to_hash
|
281
|
+
attribute_hash = {}
|
282
|
+
for key in keys
|
283
|
+
value = self[key]
|
284
|
+
if value.respond_to? :to_hash
|
285
|
+
value = value.to_hash
|
286
|
+
end
|
287
|
+
if value.respond_to? :to_ary
|
288
|
+
new_value = []
|
289
|
+
for item in value.to_ary
|
290
|
+
if item.respond_to? :to_hash
|
291
|
+
new_value << item.to_hash
|
292
|
+
else
|
293
|
+
new_value << item
|
294
|
+
end
|
295
|
+
end
|
296
|
+
value = new_value
|
297
|
+
end
|
298
|
+
attribute_hash[key] = value
|
299
|
+
end
|
300
|
+
return attribute_hash
|
240
301
|
end
|
241
302
|
end
|
242
303
|
|
@@ -278,6 +339,22 @@ module FeedTools
|
|
278
339
|
@feed_cache = new_feed_cache
|
279
340
|
end
|
280
341
|
|
342
|
+
# Returns true if FeedTools should only retrieve from the cache and avoid
|
343
|
+
# pulling feeds from their remote location.
|
344
|
+
def FeedTools.cache_only?
|
345
|
+
@cache_only = false if @cache_only.nil?
|
346
|
+
return @cache_only
|
347
|
+
end
|
348
|
+
|
349
|
+
# Sets whether or not FeedTools should retrieve feeds from remote locations
|
350
|
+
# or if it should rely on the cache only.
|
351
|
+
def FeedTools.cache_only=(new_cache_only)
|
352
|
+
if new_cache_only != true && new_cache_only != false
|
353
|
+
raise ArgumentError, "Must be either true or false."
|
354
|
+
end
|
355
|
+
@cache_only = new_cache_only
|
356
|
+
end
|
357
|
+
|
281
358
|
# Returns true if FeedTools.feed_cache is not nil and a connection with
|
282
359
|
# the cache has been successfully established. Also returns false if an
|
283
360
|
# error is raised while trying to determine the status of the cache.
|
@@ -613,37 +690,29 @@ module FeedTools
|
|
613
690
|
end
|
614
691
|
|
615
692
|
class Feed
|
616
|
-
include REXML
|
693
|
+
include REXML # :nodoc:
|
617
694
|
include AttributeDictionary
|
618
695
|
|
619
696
|
# Represents a feed/feed item's category
|
620
697
|
class Category
|
698
|
+
include AttributeDictionary
|
699
|
+
|
621
700
|
# The category term value
|
622
701
|
attr_accessor :term
|
623
702
|
# The categorization scheme
|
624
703
|
attr_accessor :scheme
|
625
704
|
# A human-readable description of the category
|
626
705
|
attr_accessor :label
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
self.term.send(msg, params)
|
632
|
-
end
|
633
|
-
|
634
|
-
# Relays the to_s method to the term field
|
635
|
-
def to_s
|
636
|
-
self.term.to_s
|
637
|
-
end
|
638
|
-
|
639
|
-
# Relays the inspect method to the term field
|
640
|
-
def inspect
|
641
|
-
self.term.inspect
|
642
|
-
end
|
706
|
+
|
707
|
+
alias_method :value, :term
|
708
|
+
alias_method :category, :term
|
709
|
+
alias_method :domain, :scheme
|
643
710
|
end
|
644
711
|
|
645
712
|
# Represents a feed/feed item's author
|
646
713
|
class Author
|
714
|
+
include AttributeDictionary
|
715
|
+
|
647
716
|
# The author's real name
|
648
717
|
attr_accessor :name
|
649
718
|
# The author's email address
|
@@ -652,26 +721,51 @@ module FeedTools
|
|
652
721
|
attr_accessor :url
|
653
722
|
# The raw value of the author tag if present
|
654
723
|
attr_accessor :raw
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
end
|
661
|
-
|
662
|
-
# Relays the to_s method to the name field
|
663
|
-
def to_s
|
664
|
-
self.name.to_s
|
665
|
-
end
|
724
|
+
end
|
725
|
+
|
726
|
+
# Represents a feed's image
|
727
|
+
class Image
|
728
|
+
include AttributeDictionary
|
666
729
|
|
667
|
-
#
|
668
|
-
|
669
|
-
|
670
|
-
|
730
|
+
# The image's title
|
731
|
+
attr_accessor :title
|
732
|
+
# The image's description
|
733
|
+
attr_accessor :description
|
734
|
+
# The image's url
|
735
|
+
attr_accessor :url
|
736
|
+
# The url to link the image to
|
737
|
+
attr_accessor :link
|
738
|
+
# The width of the image
|
739
|
+
attr_accessor :width
|
740
|
+
# The height of the image
|
741
|
+
attr_accessor :height
|
742
|
+
# The style of the image
|
743
|
+
# Possible values are "icon", "image", or "image-wide"
|
744
|
+
attr_accessor :style
|
745
|
+
end
|
746
|
+
|
747
|
+
# Represents a feed's text input element.
|
748
|
+
# Be aware that this will be ignored for feed generation. It's a
|
749
|
+
# pointless element that aggregators usually ignore and it doesn't have an
|
750
|
+
# equivalent in all feeds types.
|
751
|
+
class TextInput
|
752
|
+
include AttributeDictionary
|
753
|
+
|
754
|
+
# The label of the Submit button in the text input area.
|
755
|
+
attr_accessor :title
|
756
|
+
# The description explains the text input area.
|
757
|
+
attr_accessor :description
|
758
|
+
# The URL of the CGI script that processes text input requests.
|
759
|
+
attr_accessor :link
|
760
|
+
# The name of the text object in the text input area.
|
761
|
+
attr_accessor :name
|
671
762
|
end
|
672
763
|
|
673
764
|
# Represents a feed's cloud.
|
765
|
+
# Be aware that this will be ignored for feed generation.
|
674
766
|
class Cloud
|
767
|
+
include AttributeDictionary
|
768
|
+
|
675
769
|
# The domain of the cloud.
|
676
770
|
attr_accessor :domain
|
677
771
|
# The path for the cloud.
|
@@ -684,7 +778,19 @@ module FeedTools
|
|
684
778
|
# The procedure to use to request notification.
|
685
779
|
attr_accessor :register_procedure
|
686
780
|
end
|
687
|
-
|
781
|
+
|
782
|
+
# Represents a simple hyperlink
|
783
|
+
class Link
|
784
|
+
include AttributeDictionary
|
785
|
+
|
786
|
+
# The url that is being linked to
|
787
|
+
attr_accessor :url
|
788
|
+
# The content of the hyperlink
|
789
|
+
attr_accessor :value
|
790
|
+
|
791
|
+
alias_method :href, :url
|
792
|
+
end
|
793
|
+
|
688
794
|
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
|
689
795
|
def Feed.open(url)
|
690
796
|
# clean up the url
|
@@ -693,19 +799,19 @@ module FeedTools
|
|
693
799
|
# create and load the new feed
|
694
800
|
feed = Feed.new
|
695
801
|
feed.url = url
|
696
|
-
feed.update
|
802
|
+
feed.update!
|
697
803
|
return feed
|
698
804
|
end
|
699
805
|
|
700
806
|
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
701
807
|
# retrieved from the cache for some reason.
|
702
|
-
def update
|
808
|
+
def update!
|
703
809
|
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
704
810
|
!(self.cache_object.http_headers.nil?)
|
705
811
|
@http_headers = YAML.load(self.cache_object.http_headers)
|
706
812
|
end
|
707
|
-
if expired?
|
708
|
-
load_remote_feed
|
813
|
+
if expired? && !FeedTools.cache_only?
|
814
|
+
load_remote_feed!
|
709
815
|
else
|
710
816
|
@live = false
|
711
817
|
end
|
@@ -715,7 +821,7 @@ module FeedTools
|
|
715
821
|
# field to be set. If an etag or the last_modified date has been set,
|
716
822
|
# attempts to use them to prevent unnecessary reloading of identical
|
717
823
|
# content.
|
718
|
-
def load_remote_feed
|
824
|
+
def load_remote_feed!
|
719
825
|
@live = true
|
720
826
|
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
721
827
|
!(self.cache_object.http_headers.nil?)
|
@@ -970,6 +1076,10 @@ module FeedTools
|
|
970
1076
|
# Returns the root node of the feed.
|
971
1077
|
def root_node
|
972
1078
|
if @root_node.nil?
|
1079
|
+
# TODO: Fix this so that added content at the end of the file doesn't
|
1080
|
+
# break this stuff.
|
1081
|
+
# E.g.: http://smogzer.tripod.com/smog.rdf
|
1082
|
+
# ===================================================================
|
973
1083
|
@root_node = xml.root
|
974
1084
|
end
|
975
1085
|
return @root_node
|
@@ -979,6 +1089,9 @@ module FeedTools
|
|
979
1089
|
def channel_node
|
980
1090
|
if @channel_node.nil?
|
981
1091
|
@channel_node = XPath.first(root_node, "channel")
|
1092
|
+
if @channel_node == nil
|
1093
|
+
@channel_node = XPath.first(root_node, "CHANNEL")
|
1094
|
+
end
|
982
1095
|
if @channel_node == nil
|
983
1096
|
@channel_node = XPath.first(root_node, "feedinfo")
|
984
1097
|
end
|
@@ -1039,6 +1152,8 @@ module FeedTools
|
|
1039
1152
|
@feed_type = "atom"
|
1040
1153
|
when "rdf:rdf"
|
1041
1154
|
@feed_type = "rss"
|
1155
|
+
when "rdf"
|
1156
|
+
@feed_type = "rss"
|
1042
1157
|
when "rss"
|
1043
1158
|
@feed_type = "rss"
|
1044
1159
|
when "channel"
|
@@ -1053,6 +1168,58 @@ module FeedTools
|
|
1053
1168
|
@feed_type = new_feed_type
|
1054
1169
|
end
|
1055
1170
|
|
1171
|
+
# Returns the version number of the feed type.
|
1172
|
+
# Intentionally does not differentiate between the Netscape and Userland
|
1173
|
+
# versions of RSS 0.91.
|
1174
|
+
def feed_version
|
1175
|
+
if @feed_version.nil?
|
1176
|
+
version = nil
|
1177
|
+
begin
|
1178
|
+
version = XPath.first(root_node, "@version").to_s.strip.to_f
|
1179
|
+
rescue
|
1180
|
+
end
|
1181
|
+
version = nil if version == 0.0
|
1182
|
+
default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
|
1183
|
+
case self.feed_type
|
1184
|
+
when "atom"
|
1185
|
+
if default_namespace == "http://www.w3.org/2005/Atom"
|
1186
|
+
@feed_version = 1.0
|
1187
|
+
elsif version != nil
|
1188
|
+
@feed_version = version
|
1189
|
+
elsif default_namespace == "http://purl.org/atom/ns#"
|
1190
|
+
@feed_version = 0.3
|
1191
|
+
end
|
1192
|
+
when "rss"
|
1193
|
+
if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
|
1194
|
+
@feed_version = 0.9
|
1195
|
+
elsif default_namespace == "http://purl.org/rss/1.0/"
|
1196
|
+
@feed_version = 1.0
|
1197
|
+
elsif default_namespace == "http://purl.org/net/rss1.1#"
|
1198
|
+
@feed_version = 1.1
|
1199
|
+
elsif version != nil
|
1200
|
+
case version
|
1201
|
+
when 2.1
|
1202
|
+
@feed_version = 2.0
|
1203
|
+
when 2.01
|
1204
|
+
@feed_version = 2.0
|
1205
|
+
else
|
1206
|
+
@feed_version = version
|
1207
|
+
end
|
1208
|
+
end
|
1209
|
+
when "cdf"
|
1210
|
+
@feed_version = 0.4
|
1211
|
+
when "!okay/news"
|
1212
|
+
@feed_version = nil
|
1213
|
+
end
|
1214
|
+
end
|
1215
|
+
return @feed_version
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
# Sets the default feed version
|
1219
|
+
def feed_version=(new_feed_version)
|
1220
|
+
@feed_version = new_feed_version
|
1221
|
+
end
|
1222
|
+
|
1056
1223
|
# Returns the feed's unique id
|
1057
1224
|
def id
|
1058
1225
|
if @id.nil?
|
@@ -1089,19 +1256,27 @@ module FeedTools
|
|
1089
1256
|
def title
|
1090
1257
|
if @title.nil?
|
1091
1258
|
repair_entities = false
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1259
|
+
title_node = XPath.first(channel_node, "title")
|
1260
|
+
if title_node.nil?
|
1261
|
+
title_node = XPath.first(channel_node, "dc:title")
|
1262
|
+
end
|
1263
|
+
if title_node.nil?
|
1264
|
+
title_node = XPath.first(channel_node, "TITLE")
|
1265
|
+
end
|
1266
|
+
if title_node.nil?
|
1267
|
+
return nil
|
1268
|
+
end
|
1269
|
+
if XPath.first(title_node, "@type").to_s == "xhtml" ||
|
1270
|
+
XPath.first(title_node, "@mode").to_s == "xhtml" ||
|
1271
|
+
XPath.first(title_node, "@type").to_s == "xml" ||
|
1272
|
+
XPath.first(title_node, "@mode").to_s == "xml" ||
|
1273
|
+
XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
|
1274
|
+
@title = title_node.inner_xml
|
1275
|
+
elsif XPath.first(title_node, "@type").to_s == "escaped" ||
|
1276
|
+
XPath.first(title_node, "@mode").to_s == "escaped"
|
1101
1277
|
@title = FeedTools.unescape_entities(
|
1102
|
-
XPath.first(
|
1278
|
+
XPath.first(title_node, "text()").to_s)
|
1103
1279
|
else
|
1104
|
-
title_node = XPath.first(channel_node, "title")
|
1105
1280
|
@title = title_node.inner_xml
|
1106
1281
|
repair_entities = true
|
1107
1282
|
end
|
@@ -1141,6 +1316,9 @@ module FeedTools
|
|
1141
1316
|
if description_node.nil?
|
1142
1317
|
description_node = XPath.first(channel_node, "abstract")
|
1143
1318
|
end
|
1319
|
+
if description_node.nil?
|
1320
|
+
description_node = XPath.first(channel_node, "ABSTRACT")
|
1321
|
+
end
|
1144
1322
|
if description_node.nil?
|
1145
1323
|
description_node = XPath.first(channel_node, "info")
|
1146
1324
|
end
|
@@ -1160,6 +1338,9 @@ module FeedTools
|
|
1160
1338
|
description_node = XPath.first(channel_node, "body")
|
1161
1339
|
@bozo = true unless description_node.nil?
|
1162
1340
|
end
|
1341
|
+
if description_node.nil?
|
1342
|
+
return nil
|
1343
|
+
end
|
1163
1344
|
unless description_node.nil?
|
1164
1345
|
if XPath.first(description_node, "@encoding").to_s != ""
|
1165
1346
|
@description =
|
@@ -1195,7 +1376,6 @@ module FeedTools
|
|
1195
1376
|
@description = FeedTools.tidy_html(@description)
|
1196
1377
|
end
|
1197
1378
|
|
1198
|
-
@description.gsub!(/\n/, " ") if @description.size < 80
|
1199
1379
|
@description = @description.strip unless @description.nil?
|
1200
1380
|
@description = nil if @description == ""
|
1201
1381
|
end
|
@@ -1263,6 +1443,15 @@ module FeedTools
|
|
1263
1443
|
if @link == ""
|
1264
1444
|
@link = XPath.first(channel_node, "@href").to_s
|
1265
1445
|
end
|
1446
|
+
if @link == ""
|
1447
|
+
@link = XPath.first(channel_node, "@HREF").to_s
|
1448
|
+
end
|
1449
|
+
if @link == ""
|
1450
|
+
@link = XPath.first(channel_node, "a/@href").to_s
|
1451
|
+
end
|
1452
|
+
if @link == ""
|
1453
|
+
@link = XPath.first(channel_node, "A/@HREF").to_s
|
1454
|
+
end
|
1266
1455
|
if @link == ""
|
1267
1456
|
if FeedTools.is_url? self.guid
|
1268
1457
|
@link = self.guid
|
@@ -1290,39 +1479,6 @@ module FeedTools
|
|
1290
1479
|
end
|
1291
1480
|
end
|
1292
1481
|
|
1293
|
-
# Returns the feed image link
|
1294
|
-
def image_link
|
1295
|
-
if @image_link.nil?
|
1296
|
-
# get the feed image link from the xml document
|
1297
|
-
@image_link = XPath.first(channel_node, "image/url/text()").to_s
|
1298
|
-
if @image_link == ""
|
1299
|
-
@image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
|
1300
|
-
end
|
1301
|
-
if @image_link == ""
|
1302
|
-
@image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
|
1303
|
-
end
|
1304
|
-
if @image_link == ""
|
1305
|
-
@image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
|
1306
|
-
end
|
1307
|
-
if @image_link == ""
|
1308
|
-
@image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
|
1309
|
-
end
|
1310
|
-
if @image_link == ""
|
1311
|
-
@image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
|
1312
|
-
end
|
1313
|
-
if @image_link == ""
|
1314
|
-
@image_link = XPath.first(channel_node, "logo/@href").to_s
|
1315
|
-
end
|
1316
|
-
@image_link = FeedTools.normalize_url(@image_link)
|
1317
|
-
end
|
1318
|
-
return @image_link
|
1319
|
-
end
|
1320
|
-
|
1321
|
-
# Sets the feed image link
|
1322
|
-
def image_link=(new_image_link)
|
1323
|
-
@image_link = new_image_link
|
1324
|
-
end
|
1325
|
-
|
1326
1482
|
# Returns the url to the icon file for this feed.
|
1327
1483
|
#
|
1328
1484
|
# This method uses the url from the link field in order to avoid grabbing
|
@@ -1348,10 +1504,19 @@ module FeedTools
|
|
1348
1504
|
"icon/text()").to_s
|
1349
1505
|
end
|
1350
1506
|
if @icon_link == ""
|
1507
|
+
@icon_link = XPath.first(channel_node,
|
1508
|
+
"logo[@style='icon']/@href").to_s
|
1509
|
+
end
|
1510
|
+
if @icon_link == ""
|
1511
|
+
@icon_link = XPath.first(channel_node,
|
1512
|
+
"LOGO[@STYLE='ICON']/@HREF").to_s
|
1513
|
+
end
|
1514
|
+
if @icon_link == "" && self.link != nil && self.link != ""
|
1351
1515
|
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1352
1516
|
@icon_link =
|
1353
1517
|
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
1354
1518
|
end
|
1519
|
+
icon_link = nil if icon_link == ""
|
1355
1520
|
end
|
1356
1521
|
return @icon_link
|
1357
1522
|
end
|
@@ -1375,6 +1540,10 @@ module FeedTools
|
|
1375
1540
|
@author.raw = FeedTools.unescape_entities(
|
1376
1541
|
XPath.first(channel_node, "dc:author/text()").to_s)
|
1377
1542
|
end
|
1543
|
+
if @author.raw == ""
|
1544
|
+
@author.raw = FeedTools.unescape_entities(
|
1545
|
+
XPath.first(channel_node, "managingEditor/text()").to_s)
|
1546
|
+
end
|
1378
1547
|
unless @author.raw == ""
|
1379
1548
|
raw_scan = @author.raw.scan(
|
1380
1549
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1456,6 +1625,10 @@ module FeedTools
|
|
1456
1625
|
# Set the author name
|
1457
1626
|
@publisher.raw = FeedTools.unescape_entities(
|
1458
1627
|
XPath.first(channel_node, "dc:publisher/text()").to_s)
|
1628
|
+
if @publisher.raw == ""
|
1629
|
+
@publisher.raw = FeedTools.unescape_entities(
|
1630
|
+
XPath.first(channel_node, "webMaster/text()").to_s)
|
1631
|
+
end
|
1459
1632
|
unless @publisher.raw == ""
|
1460
1633
|
raw_scan = @publisher.raw.scan(
|
1461
1634
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1529,6 +1702,119 @@ module FeedTools
|
|
1529
1702
|
return @itunes_author
|
1530
1703
|
end
|
1531
1704
|
|
1705
|
+
# Returns a list of the feed's categories
|
1706
|
+
def categories
|
1707
|
+
if @categories.nil?
|
1708
|
+
@categories = []
|
1709
|
+
category_nodes = XPath.match(channel_node, "category")
|
1710
|
+
if category_nodes.nil? || category_nodes.empty?
|
1711
|
+
category_nodes = XPath.match(channel_node, "dc:subject")
|
1712
|
+
end
|
1713
|
+
unless category_nodes.nil?
|
1714
|
+
for category_node in category_nodes
|
1715
|
+
category = FeedTools::Feed::Category.new
|
1716
|
+
category.term = XPath.first(category_node, "@term").to_s
|
1717
|
+
if category.term == ""
|
1718
|
+
category.term = XPath.first(category_node, "text()").to_s
|
1719
|
+
end
|
1720
|
+
category.term.strip! unless category.term.nil?
|
1721
|
+
category.term = nil if category.term == ""
|
1722
|
+
category.label = XPath.first(category_node, "@label").to_s
|
1723
|
+
category.label.strip! unless category.label.nil?
|
1724
|
+
category.label = nil if category.label == ""
|
1725
|
+
category.scheme = XPath.first(category_node, "@scheme").to_s
|
1726
|
+
if category.scheme == ""
|
1727
|
+
category.scheme = XPath.first(category_node, "@domain").to_s
|
1728
|
+
end
|
1729
|
+
category.scheme.strip! unless category.scheme.nil?
|
1730
|
+
category.scheme = nil if category.scheme == ""
|
1731
|
+
@categories << category
|
1732
|
+
end
|
1733
|
+
end
|
1734
|
+
end
|
1735
|
+
return @categories
|
1736
|
+
end
|
1737
|
+
|
1738
|
+
# Returns a list of the feed's images
|
1739
|
+
def images
|
1740
|
+
if @images.nil?
|
1741
|
+
@images = []
|
1742
|
+
image_nodes = XPath.match(channel_node, "image")
|
1743
|
+
if image_nodes.nil? || image_nodes.empty?
|
1744
|
+
image_nodes = XPath.match(channel_node, "link")
|
1745
|
+
end
|
1746
|
+
if image_nodes.nil? || image_nodes.empty?
|
1747
|
+
image_nodes = XPath.match(channel_node, "logo")
|
1748
|
+
end
|
1749
|
+
if image_nodes.nil? || image_nodes.empty?
|
1750
|
+
image_nodes = XPath.match(channel_node, "LOGO")
|
1751
|
+
end
|
1752
|
+
unless image_nodes.nil?
|
1753
|
+
for image_node in image_nodes
|
1754
|
+
image = FeedTools::Feed::Image.new
|
1755
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
1756
|
+
if image.url == ""
|
1757
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
1758
|
+
end
|
1759
|
+
if image.url == "" && (image_node.name == "logo" ||
|
1760
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
1761
|
+
image.url = XPath.first(image_node, "@href").to_s
|
1762
|
+
end
|
1763
|
+
if image.url == "" && image_node.name == "LOGO"
|
1764
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
1765
|
+
end
|
1766
|
+
image.url.strip! unless image.url.nil?
|
1767
|
+
image.url = nil if image.url == ""
|
1768
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
1769
|
+
image.title.strip! unless image.title.nil?
|
1770
|
+
image.title = nil if image.title == ""
|
1771
|
+
image.description =
|
1772
|
+
XPath.first(image_node, "description/text()").to_s
|
1773
|
+
image.description.strip! unless image.description.nil?
|
1774
|
+
image.description = nil if image.description == ""
|
1775
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
1776
|
+
image.link.strip! unless image.link.nil?
|
1777
|
+
image.link = nil if image.link == ""
|
1778
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1779
|
+
image.height = nil if image.height <= 0
|
1780
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1781
|
+
image.width = nil if image.width <= 0
|
1782
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1783
|
+
if image.style == ""
|
1784
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1785
|
+
end
|
1786
|
+
image.style.strip! unless image.style.nil?
|
1787
|
+
image.style = nil if image.style == ""
|
1788
|
+
@images << image
|
1789
|
+
end
|
1790
|
+
end
|
1791
|
+
end
|
1792
|
+
return @images
|
1793
|
+
end
|
1794
|
+
|
1795
|
+
# Returns the feed's text input field
|
1796
|
+
def text_input
|
1797
|
+
if @text_input.nil?
|
1798
|
+
@text_input = FeedTools::Feed::TextInput.new
|
1799
|
+
text_input_node = XPath.first(channel_node, "textInput")
|
1800
|
+
unless text_input_node.nil?
|
1801
|
+
@text_input.title =
|
1802
|
+
XPath.first(text_input_node, "title/text()").to_s
|
1803
|
+
@text_input.title = nil if @text_input.title == ""
|
1804
|
+
@text_input.description =
|
1805
|
+
XPath.first(text_input_node, "description/text()").to_s
|
1806
|
+
@text_input.description = nil if @text_input.description == ""
|
1807
|
+
@text_input.link =
|
1808
|
+
XPath.first(text_input_node, "link/text()").to_s
|
1809
|
+
@text_input.link = nil if @text_input.link == ""
|
1810
|
+
@text_input.name =
|
1811
|
+
XPath.first(text_input_node, "name/text()").to_s
|
1812
|
+
@text_input.name = nil if @text_input.name == ""
|
1813
|
+
end
|
1814
|
+
end
|
1815
|
+
return @text_input
|
1816
|
+
end
|
1817
|
+
|
1532
1818
|
# Returns the feed's copyright information
|
1533
1819
|
def copyright
|
1534
1820
|
if @copyright.nil?
|
@@ -1696,7 +1982,7 @@ module FeedTools
|
|
1696
1982
|
end
|
1697
1983
|
|
1698
1984
|
# Returns true if this feed contains explicit material.
|
1699
|
-
def explicit
|
1985
|
+
def explicit?
|
1700
1986
|
if @explicit.nil?
|
1701
1987
|
if XPath.first(channel_node,
|
1702
1988
|
"media:adult/text()").to_s.downcase == "true" ||
|
@@ -1724,9 +2010,18 @@ module FeedTools
|
|
1724
2010
|
if raw_items == nil || raw_items == []
|
1725
2011
|
raw_items = XPath.match(channel_node, "item")
|
1726
2012
|
end
|
2013
|
+
if raw_items == nil || raw_items == []
|
2014
|
+
raw_items = XPath.match(channel_node, "ITEM")
|
2015
|
+
end
|
2016
|
+
if raw_items == nil || raw_items == []
|
2017
|
+
raw_items = XPath.match(root_node, "ITEM")
|
2018
|
+
end
|
1727
2019
|
if raw_items == nil || raw_items == []
|
1728
2020
|
raw_items = XPath.match(channel_node, "entry")
|
1729
2021
|
end
|
2022
|
+
if raw_items == nil || raw_items == []
|
2023
|
+
raw_items = XPath.match(root_node, "entry")
|
2024
|
+
end
|
1730
2025
|
|
1731
2026
|
# create the individual feed items
|
1732
2027
|
@items = []
|
@@ -1767,7 +2062,7 @@ module FeedTools
|
|
1767
2062
|
# True if this feed contains audio content enclosures
|
1768
2063
|
def podcast?
|
1769
2064
|
podcast = false
|
1770
|
-
|
2065
|
+
self.items.each do |item|
|
1771
2066
|
item.enclosures.each do |enclosure|
|
1772
2067
|
podcast = true if enclosure.audio?
|
1773
2068
|
end
|
@@ -1778,7 +2073,7 @@ module FeedTools
|
|
1778
2073
|
# True if this feed contains video content enclosures
|
1779
2074
|
def vidlog?
|
1780
2075
|
vidlog = false
|
1781
|
-
|
2076
|
+
self.items.each do |item|
|
1782
2077
|
item.enclosures.each do |enclosure|
|
1783
2078
|
vidlog = true if enclosure.video?
|
1784
2079
|
end
|
@@ -1805,7 +2100,7 @@ module FeedTools
|
|
1805
2100
|
end
|
1806
2101
|
|
1807
2102
|
# Forces this feed to expire.
|
1808
|
-
def expire
|
2103
|
+
def expire!
|
1809
2104
|
self.last_retrieved = Time.mktime(1970)
|
1810
2105
|
self.save
|
1811
2106
|
end
|
@@ -1817,11 +2112,11 @@ module FeedTools
|
|
1817
2112
|
end
|
1818
2113
|
|
1819
2114
|
# Generates xml based on the content of the feed
|
1820
|
-
def build_xml(feed_type=(self.feed_type or "rss"), version=
|
2115
|
+
def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
|
1821
2116
|
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1822
|
-
if feed_type == "rss" && version == 0.0
|
2117
|
+
if feed_type == "rss" && (version == nil || version == 0.0)
|
1823
2118
|
version = 1.0
|
1824
|
-
elsif feed_type == "atom" && version == 0.0
|
2119
|
+
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1825
2120
|
version = 0.3
|
1826
2121
|
end
|
1827
2122
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
@@ -1838,8 +2133,9 @@ module FeedTools
|
|
1838
2133
|
else
|
1839
2134
|
xml_builder.link
|
1840
2135
|
end
|
1841
|
-
unless
|
1842
|
-
xml_builder.image("rdf:resource" => CGI.escapeHTML(
|
2136
|
+
unless images.nil? || images.empty?
|
2137
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(
|
2138
|
+
images.first.url))
|
1843
2139
|
end
|
1844
2140
|
unless description.nil? || description == ""
|
1845
2141
|
xml_builder.description(description)
|
@@ -1866,18 +2162,30 @@ module FeedTools
|
|
1866
2162
|
end
|
1867
2163
|
build_xml_hook(feed_type, version, xml_builder)
|
1868
2164
|
end
|
1869
|
-
unless
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
2165
|
+
unless images.nil? || images.empty?
|
2166
|
+
best_image = nil
|
2167
|
+
for image in self.images
|
2168
|
+
if image.link != nil
|
2169
|
+
best_image = image
|
2170
|
+
break
|
2171
|
+
end
|
2172
|
+
end
|
2173
|
+
best_image = images.first if best_image.nil?
|
2174
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
|
2175
|
+
if best_image.title != nil && best_image.title != ""
|
2176
|
+
xml_builder.title(best_image.title)
|
2177
|
+
elsif self.title != nil && self.title != ""
|
2178
|
+
xml_builder.title(self.title)
|
1873
2179
|
else
|
1874
2180
|
xml_builder.title
|
1875
2181
|
end
|
1876
|
-
unless
|
1877
|
-
xml_builder.url(
|
2182
|
+
unless best_image.url.nil? || best_image.url == ""
|
2183
|
+
xml_builder.url(best_image.url)
|
1878
2184
|
end
|
1879
|
-
|
1880
|
-
xml_builder.link(link)
|
2185
|
+
if best_image.link != nil && best_image.link != ""
|
2186
|
+
xml_builder.link(best_image.link)
|
2187
|
+
elsif self.link != nil && self.link != ""
|
2188
|
+
xml_builder.link(self.link)
|
1881
2189
|
else
|
1882
2190
|
xml_builder.link
|
1883
2191
|
end
|
@@ -1891,7 +2199,7 @@ module FeedTools
|
|
1891
2199
|
end
|
1892
2200
|
elsif feed_type == "rss"
|
1893
2201
|
# normal rss format
|
1894
|
-
return xml_builder.rss("version" => version
|
2202
|
+
return xml_builder.rss("version" => version) do
|
1895
2203
|
unless title.nil? || title == ""
|
1896
2204
|
xml_builder.title(title)
|
1897
2205
|
end
|
@@ -1913,7 +2221,7 @@ module FeedTools
|
|
1913
2221
|
elsif feed_type == "atom"
|
1914
2222
|
# normal atom format
|
1915
2223
|
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
1916
|
-
"version" => version
|
2224
|
+
"version" => version,
|
1917
2225
|
"xml:lang" => language) do
|
1918
2226
|
unless title.nil? || title == ""
|
1919
2227
|
xml_builder.title(title,
|
@@ -2112,12 +2420,18 @@ module FeedTools
|
|
2112
2420
|
end
|
2113
2421
|
return false
|
2114
2422
|
end
|
2423
|
+
|
2424
|
+
alias_method :link, :url
|
2425
|
+
alias_method :link=, :url=
|
2115
2426
|
end
|
2116
|
-
|
2427
|
+
|
2428
|
+
# TODO: Make these actual classes instead of structs
|
2429
|
+
# ==================================================
|
2117
2430
|
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
|
2118
2431
|
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
|
2119
2432
|
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
|
2120
|
-
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
|
2433
|
+
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
|
2434
|
+
:width )
|
2121
2435
|
|
2122
2436
|
# Returns the parent feed of this feed item
|
2123
2437
|
def feed
|
@@ -2186,19 +2500,27 @@ module FeedTools
|
|
2186
2500
|
def title
|
2187
2501
|
if @title.nil?
|
2188
2502
|
repair_entities = false
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2503
|
+
title_node = XPath.first(root_node, "title")
|
2504
|
+
if title_node.nil?
|
2505
|
+
title_node = XPath.first(root_node, "dc:title")
|
2506
|
+
end
|
2507
|
+
if title_node.nil?
|
2508
|
+
title_node = XPath.first(root_node, "TITLE")
|
2509
|
+
end
|
2510
|
+
if title_node.nil?
|
2511
|
+
return nil
|
2512
|
+
end
|
2513
|
+
if XPath.first(title_node, "@type").to_s == "xhtml" ||
|
2514
|
+
XPath.first(title_node, "@mode").to_s == "xhtml" ||
|
2515
|
+
XPath.first(title_node, "@type").to_s == "xml" ||
|
2516
|
+
XPath.first(title_node, "@mode").to_s == "xml" ||
|
2517
|
+
XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
|
2518
|
+
@title = title_node.inner_xml
|
2519
|
+
elsif XPath.first(title_node, "@type").to_s == "escaped" ||
|
2520
|
+
XPath.first(title_node, "@mode").to_s == "escaped"
|
2198
2521
|
@title = FeedTools.unescape_entities(
|
2199
|
-
XPath.first(
|
2522
|
+
XPath.first(title_node, "text()").to_s)
|
2200
2523
|
else
|
2201
|
-
title_node = XPath.first(root_node, "title")
|
2202
2524
|
@title = title_node.inner_xml
|
2203
2525
|
repair_entities = true
|
2204
2526
|
end
|
@@ -2252,16 +2574,25 @@ module FeedTools
|
|
2252
2574
|
if description_node.nil?
|
2253
2575
|
description_node = XPath.first(root_node, "abstract")
|
2254
2576
|
end
|
2577
|
+
if description_node.nil?
|
2578
|
+
description_node = XPath.first(root_node, "ABSTRACT")
|
2579
|
+
end
|
2255
2580
|
if description_node.nil?
|
2256
2581
|
description_node = XPath.first(root_node, "content:encoded")
|
2257
2582
|
end
|
2258
2583
|
if description_node.nil?
|
2259
2584
|
description_node = XPath.first(root_node, "content")
|
2260
2585
|
end
|
2586
|
+
if description_node.nil?
|
2587
|
+
description_node = XPath.first(root_node, "fullitem")
|
2588
|
+
end
|
2261
2589
|
if description_node.nil?
|
2262
2590
|
description_node = XPath.first(root_node, "info")
|
2263
2591
|
@bozo = true unless description_node.nil?
|
2264
2592
|
end
|
2593
|
+
if description_node.nil?
|
2594
|
+
return nil
|
2595
|
+
end
|
2265
2596
|
unless description_node.nil?
|
2266
2597
|
if XPath.first(description_node, "@encoding").to_s != ""
|
2267
2598
|
@description =
|
@@ -2297,7 +2628,6 @@ module FeedTools
|
|
2297
2628
|
@description = FeedTools.tidy_html(@description)
|
2298
2629
|
end
|
2299
2630
|
|
2300
|
-
@description.gsub!(/\n/, " ") if @description.size < 80
|
2301
2631
|
@description = @description.strip unless @description.nil?
|
2302
2632
|
@description = nil if @description == ""
|
2303
2633
|
end
|
@@ -2385,6 +2715,18 @@ module FeedTools
|
|
2385
2715
|
if @link == ""
|
2386
2716
|
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
|
2387
2717
|
end
|
2718
|
+
if @link == ""
|
2719
|
+
@link = XPath.first(root_node, "@href").to_s
|
2720
|
+
end
|
2721
|
+
if @link == ""
|
2722
|
+
@link = XPath.first(root_node, "a/@href").to_s
|
2723
|
+
end
|
2724
|
+
if @link == ""
|
2725
|
+
@link = XPath.first(root_node, "@HREF").to_s
|
2726
|
+
end
|
2727
|
+
if @link == ""
|
2728
|
+
@link = XPath.first(root_node, "A/@HREF").to_s
|
2729
|
+
end
|
2388
2730
|
if @link == ""
|
2389
2731
|
if FeedTools.is_url? self.guid
|
2390
2732
|
@link = self.guid
|
@@ -2410,7 +2752,7 @@ module FeedTools
|
|
2410
2752
|
@link = new_link
|
2411
2753
|
end
|
2412
2754
|
|
2413
|
-
# Returns the feed comment link
|
2755
|
+
# Returns the feed item comment link
|
2414
2756
|
def comment_link
|
2415
2757
|
if @comment_link.nil?
|
2416
2758
|
# get the feed comment link from the xml document
|
@@ -2423,64 +2765,104 @@ module FeedTools
|
|
2423
2765
|
return @comment_link
|
2424
2766
|
end
|
2425
2767
|
|
2426
|
-
# Sets the feed comment link
|
2768
|
+
# Sets the feed item comment link
|
2427
2769
|
def comment_link=(new_comment_link)
|
2428
2770
|
@comment_link = new_comment_link
|
2429
2771
|
end
|
2430
2772
|
|
2431
|
-
# Returns the feed
|
2432
|
-
def
|
2433
|
-
if @
|
2434
|
-
|
2435
|
-
|
2436
|
-
|
2437
|
-
|
2438
|
-
|
2439
|
-
|
2440
|
-
|
2441
|
-
|
2442
|
-
|
2773
|
+
# Returns a list of the feed item's categories
|
2774
|
+
def categories
|
2775
|
+
if @categories.nil?
|
2776
|
+
@categories = []
|
2777
|
+
category_nodes = XPath.match(root_node, "category")
|
2778
|
+
if category_nodes.nil? || category_nodes.empty?
|
2779
|
+
category_nodes = XPath.match(root_node, "dc:subject")
|
2780
|
+
end
|
2781
|
+
unless category_nodes.nil?
|
2782
|
+
for category_node in category_nodes
|
2783
|
+
category = FeedTools::Feed::Category.new
|
2784
|
+
category.term = XPath.first(category_node, "@term").to_s
|
2785
|
+
if category.term == ""
|
2786
|
+
category.term = XPath.first(category_node, "text()").to_s
|
2787
|
+
end
|
2788
|
+
category.term.strip! unless category.term.nil?
|
2789
|
+
category.term = nil if category.term == ""
|
2790
|
+
category.label = XPath.first(category_node, "@label").to_s
|
2791
|
+
category.label.strip! unless category.label.nil?
|
2792
|
+
category.label = nil if category.label == ""
|
2793
|
+
category.scheme = XPath.first(category_node, "@scheme").to_s
|
2794
|
+
if category.scheme == ""
|
2795
|
+
category.scheme = XPath.first(category_node, "@domain").to_s
|
2796
|
+
end
|
2797
|
+
category.scheme.strip! unless category.scheme.nil?
|
2798
|
+
category.scheme = nil if category.scheme == ""
|
2799
|
+
@categories << category
|
2800
|
+
end
|
2443
2801
|
end
|
2444
|
-
|
2445
|
-
|
2446
|
-
|
2447
|
-
|
2802
|
+
end
|
2803
|
+
return @categories
|
2804
|
+
end
|
2805
|
+
|
2806
|
+
# Returns a list of the feed items's images
|
2807
|
+
def images
|
2808
|
+
if @images.nil?
|
2809
|
+
@images = []
|
2810
|
+
image_nodes = XPath.match(root_node, "link")
|
2811
|
+
if image_nodes.nil? || image_nodes.empty?
|
2812
|
+
image_nodes = XPath.match(root_node, "logo")
|
2448
2813
|
end
|
2449
|
-
if
|
2450
|
-
|
2814
|
+
if image_nodes.nil? || image_nodes.empty?
|
2815
|
+
image_nodes = XPath.match(root_node, "LOGO")
|
2451
2816
|
end
|
2452
|
-
if
|
2453
|
-
|
2454
|
-
# ideal, but chances are very good that anything that makes use of this image is
|
2455
|
-
# simply not going to care anyhow.
|
2456
|
-
@image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
2457
|
-
if @image_link == ""
|
2458
|
-
@media_image_link = @image_link
|
2459
|
-
end
|
2817
|
+
if image_nodes.nil? || image_nodes.empty?
|
2818
|
+
image_nodes = XPath.match(root_node, "image")
|
2460
2819
|
end
|
2461
|
-
|
2462
|
-
|
2463
|
-
|
2464
|
-
|
2465
|
-
|
2466
|
-
|
2467
|
-
@image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
2820
|
+
unless image_nodes.nil?
|
2821
|
+
for image_node in image_nodes
|
2822
|
+
image = FeedTools::Feed::Image.new
|
2823
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
2824
|
+
if image.url != ""
|
2825
|
+
self.feed.bozo = true
|
2468
2826
|
end
|
2469
|
-
|
2470
|
-
|
2471
|
-
|
2827
|
+
if image.url == ""
|
2828
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
2829
|
+
end
|
2830
|
+
if image.url == "" && (image_node.name == "logo" ||
|
2831
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
2832
|
+
image.url = XPath.first(image_node, "@href").to_s
|
2833
|
+
end
|
2834
|
+
if image.url == "" && image_node.name == "LOGO"
|
2835
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
2836
|
+
end
|
2837
|
+
image.url.strip! unless image.url.nil?
|
2838
|
+
image.url = nil if image.url == ""
|
2839
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
2840
|
+
image.title.strip! unless image.title.nil?
|
2841
|
+
image.title = nil if image.title == ""
|
2842
|
+
image.description =
|
2843
|
+
XPath.first(image_node, "description/text()").to_s
|
2844
|
+
image.description.strip! unless image.description.nil?
|
2845
|
+
image.description = nil if image.description == ""
|
2846
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
2847
|
+
image.link.strip! unless image.link.nil?
|
2848
|
+
image.link = nil if image.link == ""
|
2849
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
2850
|
+
image.height = nil if image.height <= 0
|
2851
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
2852
|
+
image.width = nil if image.width <= 0
|
2853
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
2854
|
+
if image.style == ""
|
2855
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
2856
|
+
end
|
2857
|
+
image.style.strip! unless image.style.nil?
|
2858
|
+
image.style = nil if image.style == ""
|
2859
|
+
@images << image
|
2472
2860
|
end
|
2473
2861
|
end
|
2474
|
-
@image_link = FeedTools.normalize_url(@image_link)
|
2475
2862
|
end
|
2476
|
-
return @
|
2477
|
-
end
|
2478
|
-
|
2479
|
-
# Sets the feed image link
|
2480
|
-
def image_link=(new_image_link)
|
2481
|
-
@image_link = new_image_link
|
2863
|
+
return @images
|
2482
2864
|
end
|
2483
|
-
|
2865
|
+
|
2484
2866
|
# Returns the feed item itunes image link
|
2485
2867
|
#
|
2486
2868
|
# If it's not present, falls back to the normal image link.
|
@@ -2494,9 +2876,6 @@ module FeedTools
|
|
2494
2876
|
if @itunes_image_link == ""
|
2495
2877
|
@itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
2496
2878
|
end
|
2497
|
-
if @itunes_image_link == ""
|
2498
|
-
@itunes_image_link = self.image_link
|
2499
|
-
end
|
2500
2879
|
@itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
|
2501
2880
|
end
|
2502
2881
|
return @itunes_image_link
|
@@ -2514,9 +2893,6 @@ module FeedTools
|
|
2514
2893
|
if @media_thumbnail_link.nil?
|
2515
2894
|
# get the feed item itunes image link from the xml document
|
2516
2895
|
@media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
2517
|
-
if @media_thumbnail_link == ""
|
2518
|
-
@media_thumbnail_link = image_link
|
2519
|
-
end
|
2520
2896
|
@media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
|
2521
2897
|
end
|
2522
2898
|
return @media_thumbnail_link
|
@@ -2527,6 +2903,21 @@ module FeedTools
|
|
2527
2903
|
@media_thumbnail_link = new_media_thumbnail_link
|
2528
2904
|
end
|
2529
2905
|
|
2906
|
+
# Returns the feed item's copyright information
|
2907
|
+
def copyright
|
2908
|
+
if @copyright.nil?
|
2909
|
+
@copyright = XPath.first(root_node, "dc:rights/text()").to_s
|
2910
|
+
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
2911
|
+
@copyright = nil if @copyright == ""
|
2912
|
+
end
|
2913
|
+
return @copyright
|
2914
|
+
end
|
2915
|
+
|
2916
|
+
# Sets the feed item's copyright information
|
2917
|
+
def copyright=(new_copyright)
|
2918
|
+
@copyright = new_copyright
|
2919
|
+
end
|
2920
|
+
|
2530
2921
|
# Returns all feed item enclosures
|
2531
2922
|
def enclosures
|
2532
2923
|
if @enclosures.nil?
|
@@ -2621,11 +3012,13 @@ module FeedTools
|
|
2621
3012
|
end
|
2622
3013
|
enclosure.categories = []
|
2623
3014
|
for category in XPath.match(enclosure_node, "media:category")
|
2624
|
-
enclosure.categories <<
|
2625
|
-
|
2626
|
-
FeedTools.unescape_entities(category.
|
3015
|
+
enclosure.categories << FeedTools::Feed::Category.new
|
3016
|
+
enclosure.categories.last.term =
|
3017
|
+
FeedTools.unescape_entities(category.text)
|
3018
|
+
enclosure.categories.last.scheme =
|
3019
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s)
|
3020
|
+
enclosure.categories.last.label =
|
2627
3021
|
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2628
|
-
)
|
2629
3022
|
if enclosure.categories.last.scheme == ""
|
2630
3023
|
enclosure.categories.last.scheme = nil
|
2631
3024
|
end
|
@@ -2714,11 +3107,13 @@ module FeedTools
|
|
2714
3107
|
if (enclosure.categories.nil? || enclosure.categories.size == 0)
|
2715
3108
|
enclosure.categories = []
|
2716
3109
|
for category in XPath.match(media_group, "media:category")
|
2717
|
-
enclosure.categories <<
|
2718
|
-
|
2719
|
-
FeedTools.unescape_entities(category.
|
3110
|
+
enclosure.categories << FeedTools::Feed::Category.new
|
3111
|
+
enclosure.categories.last.term =
|
3112
|
+
FeedTools.unescape_entities(category.text)
|
3113
|
+
enclosure.categories.last.scheme =
|
3114
|
+
FeedTools.unescape_entities(category.attributes["scheme"].to_s)
|
3115
|
+
enclosure.categories.last.label =
|
2720
3116
|
FeedTools.unescape_entities(category.attributes["label"].to_s)
|
2721
|
-
)
|
2722
3117
|
if enclosure.categories.last.scheme == ""
|
2723
3118
|
enclosure.categories.last.scheme = nil
|
2724
3119
|
end
|
@@ -2880,7 +3275,7 @@ module FeedTools
|
|
2880
3275
|
# if it's missing.
|
2881
3276
|
if @enclosures.size == 1
|
2882
3277
|
if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
|
2883
|
-
@enclosures.first.duration = self.
|
3278
|
+
@enclosures.first.duration = self.itunes_duration
|
2884
3279
|
end
|
2885
3280
|
end
|
2886
3281
|
|
@@ -2910,6 +3305,10 @@ module FeedTools
|
|
2910
3305
|
@author.raw = FeedTools.unescape_entities(
|
2911
3306
|
XPath.first(root_node, "dc:author/text()").to_s)
|
2912
3307
|
end
|
3308
|
+
if @author.raw == ""
|
3309
|
+
@author.raw = FeedTools.unescape_entities(
|
3310
|
+
XPath.first(root_node, "managingEditor/text()").to_s)
|
3311
|
+
end
|
2913
3312
|
unless @author.raw == ""
|
2914
3313
|
raw_scan = @author.raw.scan(
|
2915
3314
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -2983,6 +3382,76 @@ module FeedTools
|
|
2983
3382
|
end
|
2984
3383
|
end
|
2985
3384
|
|
3385
|
+
# Returns the feed publisher
|
3386
|
+
def publisher
|
3387
|
+
if @publisher.nil?
|
3388
|
+
@publisher = FeedTools::Feed::Author.new
|
3389
|
+
|
3390
|
+
# Set the author name
|
3391
|
+
@publisher.raw = FeedTools.unescape_entities(
|
3392
|
+
XPath.first(root_node, "dc:publisher/text()").to_s)
|
3393
|
+
if @publisher.raw == ""
|
3394
|
+
@publisher.raw = FeedTools.unescape_entities(
|
3395
|
+
XPath.first(root_node, "webMaster/text()").to_s)
|
3396
|
+
end
|
3397
|
+
unless @publisher.raw == ""
|
3398
|
+
raw_scan = @publisher.raw.scan(
|
3399
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
3400
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3401
|
+
raw_scan = @publisher.raw.scan(
|
3402
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
3403
|
+
unless raw_scan.size == 0
|
3404
|
+
publisher_raw_pair = raw_scan.first.reverse
|
3405
|
+
end
|
3406
|
+
else
|
3407
|
+
publisher_raw_pair = raw_scan.first
|
3408
|
+
end
|
3409
|
+
if raw_scan.nil? || raw_scan.size == 0
|
3410
|
+
email_scan = @publisher.raw.scan(
|
3411
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
3412
|
+
if email_scan != nil && email_scan.size > 0
|
3413
|
+
@publisher.email = email_scan.first.strip
|
3414
|
+
end
|
3415
|
+
end
|
3416
|
+
unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
|
3417
|
+
@publisher.name = publisher_raw_pair.first.strip
|
3418
|
+
@publisher.email = publisher_raw_pair.last.strip
|
3419
|
+
else
|
3420
|
+
unless @publisher.raw.include?("@")
|
3421
|
+
# We can be reasonably sure we are looking at something
|
3422
|
+
# that the creator didn't intend to contain an email address if
|
3423
|
+
# it got through the preceeding regexes and it doesn't
|
3424
|
+
# contain the tell-tale '@' symbol.
|
3425
|
+
@publisher.name = @publisher.raw
|
3426
|
+
end
|
3427
|
+
end
|
3428
|
+
end
|
3429
|
+
|
3430
|
+
@publisher.name = nil if @publisher.name == ""
|
3431
|
+
@publisher.raw = nil if @publisher.raw == ""
|
3432
|
+
@publisher.email = nil if @publisher.email == ""
|
3433
|
+
@publisher.url = nil if @publisher.url == ""
|
3434
|
+
end
|
3435
|
+
return @publisher
|
3436
|
+
end
|
3437
|
+
|
3438
|
+
# Sets the feed publisher
|
3439
|
+
def publisher=(new_publisher)
|
3440
|
+
if new_publisher.respond_to?(:name) &&
|
3441
|
+
new_publisher.respond_to?(:email) &&
|
3442
|
+
new_publisher.respond_to?(:url)
|
3443
|
+
# It's a complete Author object, just set it.
|
3444
|
+
@publisher = new_publisher
|
3445
|
+
else
|
3446
|
+
# We're not looking at an Author object, this is probably a string,
|
3447
|
+
# default to setting the publisher's name.
|
3448
|
+
if @publisher.nil?
|
3449
|
+
@publisher = FeedTools::Feed::Author.new
|
3450
|
+
end
|
3451
|
+
@publisher.name = new_publisher
|
3452
|
+
end
|
3453
|
+
end
|
3454
|
+
|
2986
3455
|
# Returns the contents of the itunes:author element
|
2987
3456
|
#
|
2988
3457
|
# This inherits from any incorrectly placed channel-level itunes:author
|
@@ -3026,10 +3495,6 @@ module FeedTools
|
|
3026
3495
|
@itunes_duration = new_itunes_duration
|
3027
3496
|
end
|
3028
3497
|
|
3029
|
-
# Sets the itunes:summary
|
3030
|
-
def itunes_summary=(new_itunes_summary)
|
3031
|
-
end
|
3032
|
-
|
3033
3498
|
# Returns the feed item time
|
3034
3499
|
def time
|
3035
3500
|
if @time.nil?
|
@@ -3054,6 +3519,32 @@ module FeedTools
|
|
3054
3519
|
@time = new_time
|
3055
3520
|
end
|
3056
3521
|
|
3522
|
+
# Returns the url for posting comments
|
3523
|
+
def comments
|
3524
|
+
if @comments.nil?
|
3525
|
+
@comments = XPath.first(root_node, "comments/text()").to_s
|
3526
|
+
@comments = nil if @comments == ""
|
3527
|
+
end
|
3528
|
+
return @comments
|
3529
|
+
end
|
3530
|
+
|
3531
|
+
# Sets the url for posting comments
|
3532
|
+
def comments=(new_comments)
|
3533
|
+
@comments = new_comments
|
3534
|
+
end
|
3535
|
+
|
3536
|
+
# The source that this post was based on
|
3537
|
+
def source
|
3538
|
+
if @source.nil?
|
3539
|
+
@source = FeedTools::Feed::Link.new
|
3540
|
+
@source.url = XPath.first(root_node, "source/@url").to_s
|
3541
|
+
@source.url = nil if @source.url == ""
|
3542
|
+
@source.value = XPath.first(root_node, "source/text()").to_s
|
3543
|
+
@source.value = nil if @source.value == ""
|
3544
|
+
end
|
3545
|
+
return @source
|
3546
|
+
end
|
3547
|
+
|
3057
3548
|
# Returns the feed item tags
|
3058
3549
|
def tags
|
3059
3550
|
# TODO: support the rel="tag" microformat
|
@@ -3131,7 +3622,7 @@ module FeedTools
|
|
3131
3622
|
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
3132
3623
|
XPath.first(root_node,
|
3133
3624
|
"itunes:explicit/text()").to_s.downcase == "true" ||
|
3134
|
-
feed.explicit
|
3625
|
+
feed.explicit?
|
3135
3626
|
@explicit = true
|
3136
3627
|
else
|
3137
3628
|
@explicit = false
|
@@ -3152,8 +3643,13 @@ module FeedTools
|
|
3152
3643
|
end
|
3153
3644
|
|
3154
3645
|
# Generates xml based on the content of the feed item
|
3155
|
-
def build_xml(feed_type=(self.feed.feed_type or "rss"), version=
|
3646
|
+
def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
|
3156
3647
|
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
3648
|
+
if feed_type == "rss" && (version == nil || version == 0.0)
|
3649
|
+
version = 1.0
|
3650
|
+
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
3651
|
+
version = 0.3
|
3652
|
+
end
|
3157
3653
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
3158
3654
|
# RDF-based rss format
|
3159
3655
|
if link.nil?
|
@@ -3253,6 +3749,8 @@ module FeedTools
|
|
3253
3749
|
alias_method :tagline=, :description=
|
3254
3750
|
alias_method :subtitle, :description
|
3255
3751
|
alias_method :subtitle=, :description=
|
3752
|
+
alias_method :summary, :description
|
3753
|
+
alias_method :summary=, :description=
|
3256
3754
|
alias_method :abstract, :description
|
3257
3755
|
alias_method :abstract=, :description=
|
3258
3756
|
alias_method :content, :description
|
@@ -3264,12 +3762,26 @@ end
|
|
3264
3762
|
|
3265
3763
|
module REXML # :nodoc:
|
3266
3764
|
class Element # :nodoc:
|
3267
|
-
|
3268
|
-
|
3269
|
-
|
3270
|
-
|
3765
|
+
unless REXML::Element.public_instance_methods.include? :inner_xml
|
3766
|
+
def inner_xml # :nodoc:
|
3767
|
+
result = ""
|
3768
|
+
self.each_child do |child|
|
3769
|
+
result << child.to_s
|
3770
|
+
end
|
3771
|
+
return result
|
3772
|
+
end
|
3773
|
+
end
|
3774
|
+
|
3775
|
+
unless REXML::Element.public_instance_methods.include? :base_uri
|
3776
|
+
def base_uri # :nodoc:
|
3777
|
+
if not attribute('xml:base')
|
3778
|
+
return parent.base_uri
|
3779
|
+
elsif parent
|
3780
|
+
return URI.join(parent.base_uri, attribute('xml:base').value).to_s
|
3781
|
+
else
|
3782
|
+
return (attribute('xml:base').value or '')
|
3783
|
+
end
|
3271
3784
|
end
|
3272
|
-
return result
|
3273
3785
|
end
|
3274
3786
|
end
|
3275
3787
|
end
|