feedtools 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ == FeedTools 0.2.4
2
+ * fixed bug in the sqlite table creation query
3
+ * greatly improved image support
4
+ * improved cdf support (still needs way more work)
5
+ * support for text input elements
6
+ * now possible to force retrieval from the cache only
7
+ * increased the flexibility of the database caching implementation
8
+ * feed attributes accessible through the keys and values collections
9
+ * minor coding style changes
10
+ * fixed really dumb typo in the podcast? and vidlog? methods
11
+ * fixed exception from missing titles and descriptions
12
+ * now passes all of mark pilgrim's well-formed rss tests
1
13
  == FeedTools 0.2.3
2
14
  * fixed omission of get parameters from http requests
3
15
  == FeedTools 0.2.2
data/lib/feed_tools.rb CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
25
25
  ENV['RAILS_ENV'] ||
26
26
  'production' # :nodoc:
27
27
 
28
- FEED_TOOLS_VERSION = "0.2.3"
28
+ FEED_TOOLS_VERSION = "0.2.4"
29
29
 
30
30
  $:.unshift(File.dirname(__FILE__))
31
31
  $:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
@@ -75,7 +75,7 @@ require 'yaml'
75
75
  # => "News for nerds, stuff that matters"
76
76
  # slashdot_feed.link
77
77
  # => "http://slashdot.org/"
78
- # slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
78
+ # slashdot_feed.items.first.find_node("slash:hitparade/text()").value
79
79
  # => "43,37,28,23,11,3,1"
80
80
  module FeedTools
81
81
 
@@ -150,7 +150,7 @@ module FeedTools
150
150
  begin
151
151
  ActiveRecord::Base.connection.execute "select id, url, title, " +
152
152
  "link, xml_data, http_headers, last_retrieved " +
153
- "from feeds limit 1"
153
+ "from #{self.table_name()} limit 1"
154
154
  rescue ActiveRecord::StatementInvalid
155
155
  return false
156
156
  rescue
@@ -163,7 +163,7 @@ module FeedTools
163
163
  def DatabaseFeedCache.create_table
164
164
  unless DatabaseFeedCache.table_exists?
165
165
  feeds_mysql = <<-SQL_END
166
- CREATE TABLE `feeds` (
166
+ CREATE TABLE `#{self.table_name()}` (
167
167
  `id` int(10) unsigned NOT NULL auto_increment,
168
168
  `url` varchar(255) default NULL,
169
169
  `title` varchar(255) default NULL,
@@ -175,19 +175,18 @@ module FeedTools
175
175
  ) ENGINE=MyISAM DEFAULT CHARSET=latin1;
176
176
  SQL_END
177
177
  feeds_sqlite = <<-SQL_END
178
- CREATE TABLE 'feeds' (
178
+ CREATE TABLE '#{self.table_name()}' (
179
179
  'id' INTEGER PRIMARY KEY NOT NULL,
180
180
  'url' VARCHAR(255) DEFAULT NULL,
181
181
  'title' VARCHAR(255) DEFAULT NULL,
182
182
  'link' VARCHAR(255) DEFAULT NULL,
183
- 'image_link' VARCHAR(255) DEFAULT NULL,
184
183
  'xml_data' TEXT DEFAULT NULL,
185
184
  'http_headers' TEXT DEFAULT NULL,
186
185
  'last_retrieved' DATETIME DEFAULT NULL,
187
186
  );
188
187
  SQL_END
189
188
  feeds_psql = <<-SQL_END
190
- CREATE TABLE feeds (
189
+ CREATE TABLE #{self.table_name()} (
191
190
  id SERIAL PRIMARY KEY NOT NULL,
192
191
  url varchar(255) default NULL,
193
192
  title varchar(255) default NULL,
@@ -206,7 +205,7 @@ module FeedTools
206
205
  table_creation_sql = feeds_psql
207
206
  end
208
207
  if table_creation_sql.nil?
209
- raise "Could not build feed_items table."
208
+ raise "Could not build #{self.table_name()} table."
210
209
  else
211
210
  connection.execute table_creation_sql
212
211
  end
@@ -219,24 +218,86 @@ module FeedTools
219
218
  end
220
219
 
221
220
  # Quick method of enabling small classes to have their attributes
222
- # accessible as a dictionary.
223
- module AttributeDictionary # :nodoc:
224
- # Access the attributes as a dictionary
221
+ # accessible as a dictionary. These methods should not be used whenever
222
+ # performance is going to be an issue. They exist almost entirely for the
223
+ # purposes of aesthetics and/or debugging.
224
+ module AttributeDictionary
225
+ # Access the attributes as a dictionary.
225
226
  def [](key)
226
- # Assignment, and destructive methods should not be
227
- # accessed like this.
228
- return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
229
- return nil unless self.method(key).arity == 0
227
+ return nil unless self.keys.include? key
230
228
  return self.send(key)
231
229
  end
232
230
 
233
- # Access the attributes as a dictionary
231
+ # Access the attributes as a dictionary.
234
232
  def []=(key, value)
235
- # Assignment, and destructive methods should not be
236
- # accessed like this.
237
- return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
238
- return nil unless self.method(key + "=").arity == 1
239
- return self.send(key + "=", value)
233
+ pseudo_key = key
234
+ pseudo_key = key[0..-2] if key[-1..-1] == "?"
235
+ return nil unless self.method(pseudo_key + "=").arity == 1
236
+ local_keys = self.keys
237
+ unless local_keys.include?(key) || local_keys.include?(pseudo_key)
238
+ return nil
239
+ end
240
+ return self.send(pseudo_key + "=", value)
241
+ end
242
+
243
+ # Access the attributes as a dictionary.
244
+ def keys
245
+ key_methods = []
246
+ for key in self.methods
247
+ # Quick-n-dirty hack to speed things up and keep the list clean
248
+ if self.method(key).arity == 0 && key[-1..-1] != "=" &&
249
+ key[-1..-1] != "!" && key[0..1] != "__" &&
250
+ key[0..2] != "to_" && key[-5..-1] != "_node" &&
251
+ key != "cache_object" && key != "save" && key != "xml" &&
252
+ key != "xml_data" && key != "expired?" && key != "live?" &&
253
+ key != "feed"
254
+ superfluous_ancestors = self.class.ancestors
255
+ superfluous_ancestors = superfluous_ancestors[1..-1]
256
+ superfluous = false
257
+ for ancestor in superfluous_ancestors
258
+ if ancestor.instance_methods.include? key
259
+ superfluous = true
260
+ break
261
+ end
262
+ end
263
+ next if superfluous
264
+ key_methods << key
265
+ end
266
+ end
267
+ return key_methods.sort
268
+ end
269
+
270
+ # Access the attributes as a dictionary.
271
+ # Please note that this method may cause a nearly complete parse of a
272
+ # feed. This will be very slow.
273
+ def values
274
+ return self.keys.map { |key| self[key] }
275
+ end
276
+
277
+ # Access the attributes as a dictionary.
278
+ # Please note that this method may cause a complete parse of a feed.
279
+ # This will be very slow.
280
+ def to_hash
281
+ attribute_hash = {}
282
+ for key in keys
283
+ value = self[key]
284
+ if value.respond_to? :to_hash
285
+ value = value.to_hash
286
+ end
287
+ if value.respond_to? :to_ary
288
+ new_value = []
289
+ for item in value.to_ary
290
+ if item.respond_to? :to_hash
291
+ new_value << item.to_hash
292
+ else
293
+ new_value << item
294
+ end
295
+ end
296
+ value = new_value
297
+ end
298
+ attribute_hash[key] = value
299
+ end
300
+ return attribute_hash
240
301
  end
241
302
  end
242
303
 
@@ -278,6 +339,22 @@ module FeedTools
278
339
  @feed_cache = new_feed_cache
279
340
  end
280
341
 
342
+ # Returns true if FeedTools should only retrieve from the cache and avoid
343
+ # pulling feeds from their remote location.
344
+ def FeedTools.cache_only?
345
+ @cache_only = false if @cache_only.nil?
346
+ return @cache_only
347
+ end
348
+
349
+ # Sets whether or not FeedTools should retrieve feeds from remote locations
350
+ # or if it should rely on the cache only.
351
+ def FeedTools.cache_only=(new_cache_only)
352
+ if new_cache_only != true && new_cache_only != false
353
+ raise ArgumentError, "Must be either true or false."
354
+ end
355
+ @cache_only = new_cache_only
356
+ end
357
+
281
358
  # Returns true if FeedTools.feed_cache is not nil and a connection with
282
359
  # the cache has been successfully established. Also returns false if an
283
360
  # error is raised while trying to determine the status of the cache.
@@ -613,37 +690,29 @@ module FeedTools
613
690
  end
614
691
 
615
692
  class Feed
616
- include REXML
693
+ include REXML # :nodoc:
617
694
  include AttributeDictionary
618
695
 
619
696
  # Represents a feed/feed item's category
620
697
  class Category
698
+ include AttributeDictionary
699
+
621
700
  # The category term value
622
701
  attr_accessor :term
623
702
  # The categorization scheme
624
703
  attr_accessor :scheme
625
704
  # A human-readable description of the category
626
705
  attr_accessor :label
627
-
628
- # Relays any unknown methods to the term so that you can treat the
629
- # category object as a string
630
- def method_missing(msg, *params)
631
- self.term.send(msg, params)
632
- end
633
-
634
- # Relays the to_s method to the term field
635
- def to_s
636
- self.term.to_s
637
- end
638
-
639
- # Relays the inspect method to the term field
640
- def inspect
641
- self.term.inspect
642
- end
706
+
707
+ alias_method :value, :term
708
+ alias_method :category, :term
709
+ alias_method :domain, :scheme
643
710
  end
644
711
 
645
712
  # Represents a feed/feed item's author
646
713
  class Author
714
+ include AttributeDictionary
715
+
647
716
  # The author's real name
648
717
  attr_accessor :name
649
718
  # The author's email address
@@ -652,26 +721,51 @@ module FeedTools
652
721
  attr_accessor :url
653
722
  # The raw value of the author tag if present
654
723
  attr_accessor :raw
655
-
656
- # Relays any unknown methods to the name so that you can treat the
657
- # author object as a string
658
- def method_missing(msg, *params)
659
- self.name.send(msg, params)
660
- end
661
-
662
- # Relays the to_s method to the name field
663
- def to_s
664
- self.name.to_s
665
- end
724
+ end
725
+
726
+ # Represents a feed's image
727
+ class Image
728
+ include AttributeDictionary
666
729
 
667
- # Relays the inspect method to the name field
668
- def inspect
669
- self.name.inspect
670
- end
730
+ # The image's title
731
+ attr_accessor :title
732
+ # The image's description
733
+ attr_accessor :description
734
+ # The image's url
735
+ attr_accessor :url
736
+ # The url to link the image to
737
+ attr_accessor :link
738
+ # The width of the image
739
+ attr_accessor :width
740
+ # The height of the image
741
+ attr_accessor :height
742
+ # The style of the image
743
+ # Possible values are "icon", "image", or "image-wide"
744
+ attr_accessor :style
745
+ end
746
+
747
+ # Represents a feed's text input element.
748
+ # Be aware that this will be ignored for feed generation. It's a
749
+ # pointless element that aggregators usually ignore and it doesn't have an
750
+ # equivalent in all feeds types.
751
+ class TextInput
752
+ include AttributeDictionary
753
+
754
+ # The label of the Submit button in the text input area.
755
+ attr_accessor :title
756
+ # The description explains the text input area.
757
+ attr_accessor :description
758
+ # The URL of the CGI script that processes text input requests.
759
+ attr_accessor :link
760
+ # The name of the text object in the text input area.
761
+ attr_accessor :name
671
762
  end
672
763
 
673
764
  # Represents a feed's cloud.
765
+ # Be aware that this will be ignored for feed generation.
674
766
  class Cloud
767
+ include AttributeDictionary
768
+
675
769
  # The domain of the cloud.
676
770
  attr_accessor :domain
677
771
  # The path for the cloud.
@@ -684,7 +778,19 @@ module FeedTools
684
778
  # The procedure to use to request notification.
685
779
  attr_accessor :register_procedure
686
780
  end
687
-
781
+
782
+ # Represents a simple hyperlink
783
+ class Link
784
+ include AttributeDictionary
785
+
786
+ # The url that is being linked to
787
+ attr_accessor :url
788
+ # The content of the hyperlink
789
+ attr_accessor :value
790
+
791
+ alias_method :href, :url
792
+ end
793
+
688
794
  # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
689
795
  def Feed.open(url)
690
796
  # clean up the url
@@ -693,19 +799,19 @@ module FeedTools
693
799
  # create and load the new feed
694
800
  feed = Feed.new
695
801
  feed.url = url
696
- feed.update
802
+ feed.update!
697
803
  return feed
698
804
  end
699
805
 
700
806
  # Loads the feed from the remote url if the feed has expired from the cache or cannot be
701
807
  # retrieved from the cache for some reason.
702
- def update
808
+ def update!
703
809
  if self.http_headers.nil? && !(self.cache_object.nil?) &&
704
810
  !(self.cache_object.http_headers.nil?)
705
811
  @http_headers = YAML.load(self.cache_object.http_headers)
706
812
  end
707
- if expired?
708
- load_remote_feed
813
+ if expired? && !FeedTools.cache_only?
814
+ load_remote_feed!
709
815
  else
710
816
  @live = false
711
817
  end
@@ -715,7 +821,7 @@ module FeedTools
715
821
  # field to be set. If an etag or the last_modified date has been set,
716
822
  # attempts to use them to prevent unnecessary reloading of identical
717
823
  # content.
718
- def load_remote_feed
824
+ def load_remote_feed!
719
825
  @live = true
720
826
  if self.http_headers.nil? && !(self.cache_object.nil?) &&
721
827
  !(self.cache_object.http_headers.nil?)
@@ -970,6 +1076,10 @@ module FeedTools
970
1076
  # Returns the root node of the feed.
971
1077
  def root_node
972
1078
  if @root_node.nil?
1079
+ # TODO: Fix this so that added content at the end of the file doesn't
1080
+ # break this stuff.
1081
+ # E.g.: http://smogzer.tripod.com/smog.rdf
1082
+ # ===================================================================
973
1083
  @root_node = xml.root
974
1084
  end
975
1085
  return @root_node
@@ -979,6 +1089,9 @@ module FeedTools
979
1089
  def channel_node
980
1090
  if @channel_node.nil?
981
1091
  @channel_node = XPath.first(root_node, "channel")
1092
+ if @channel_node == nil
1093
+ @channel_node = XPath.first(root_node, "CHANNEL")
1094
+ end
982
1095
  if @channel_node == nil
983
1096
  @channel_node = XPath.first(root_node, "feedinfo")
984
1097
  end
@@ -1039,6 +1152,8 @@ module FeedTools
1039
1152
  @feed_type = "atom"
1040
1153
  when "rdf:rdf"
1041
1154
  @feed_type = "rss"
1155
+ when "rdf"
1156
+ @feed_type = "rss"
1042
1157
  when "rss"
1043
1158
  @feed_type = "rss"
1044
1159
  when "channel"
@@ -1053,6 +1168,58 @@ module FeedTools
1053
1168
  @feed_type = new_feed_type
1054
1169
  end
1055
1170
 
1171
+ # Returns the version number of the feed type.
1172
+ # Intentionally does not differentiate between the Netscape and Userland
1173
+ # versions of RSS 0.91.
1174
+ def feed_version
1175
+ if @feed_version.nil?
1176
+ version = nil
1177
+ begin
1178
+ version = XPath.first(root_node, "@version").to_s.strip.to_f
1179
+ rescue
1180
+ end
1181
+ version = nil if version == 0.0
1182
+ default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
1183
+ case self.feed_type
1184
+ when "atom"
1185
+ if default_namespace == "http://www.w3.org/2005/Atom"
1186
+ @feed_version = 1.0
1187
+ elsif version != nil
1188
+ @feed_version = version
1189
+ elsif default_namespace == "http://purl.org/atom/ns#"
1190
+ @feed_version = 0.3
1191
+ end
1192
+ when "rss"
1193
+ if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
1194
+ @feed_version = 0.9
1195
+ elsif default_namespace == "http://purl.org/rss/1.0/"
1196
+ @feed_version = 1.0
1197
+ elsif default_namespace == "http://purl.org/net/rss1.1#"
1198
+ @feed_version = 1.1
1199
+ elsif version != nil
1200
+ case version
1201
+ when 2.1
1202
+ @feed_version = 2.0
1203
+ when 2.01
1204
+ @feed_version = 2.0
1205
+ else
1206
+ @feed_version = version
1207
+ end
1208
+ end
1209
+ when "cdf"
1210
+ @feed_version = 0.4
1211
+ when "!okay/news"
1212
+ @feed_version = nil
1213
+ end
1214
+ end
1215
+ return @feed_version
1216
+ end
1217
+
1218
+ # Sets the default feed version
1219
+ def feed_version=(new_feed_version)
1220
+ @feed_version = new_feed_version
1221
+ end
1222
+
1056
1223
  # Returns the feed's unique id
1057
1224
  def id
1058
1225
  if @id.nil?
@@ -1089,19 +1256,27 @@ module FeedTools
1089
1256
  def title
1090
1257
  if @title.nil?
1091
1258
  repair_entities = false
1092
- if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
1093
- XPath.first(channel_node, "title/@mode").to_s == "xhtml" ||
1094
- XPath.first(channel_node, "title/@type").to_s == "xml" ||
1095
- XPath.first(channel_node, "title/@mode").to_s == "xml" ||
1096
- XPath.first(channel_node, "title/@type").to_s ==
1097
- "application/xhtml+xml"
1098
- @title = XPath.first(channel_node, "title").inner_xml
1099
- elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
1100
- XPath.first(channel_node, "title/@mode").to_s == "escaped"
1259
+ title_node = XPath.first(channel_node, "title")
1260
+ if title_node.nil?
1261
+ title_node = XPath.first(channel_node, "dc:title")
1262
+ end
1263
+ if title_node.nil?
1264
+ title_node = XPath.first(channel_node, "TITLE")
1265
+ end
1266
+ if title_node.nil?
1267
+ return nil
1268
+ end
1269
+ if XPath.first(title_node, "@type").to_s == "xhtml" ||
1270
+ XPath.first(title_node, "@mode").to_s == "xhtml" ||
1271
+ XPath.first(title_node, "@type").to_s == "xml" ||
1272
+ XPath.first(title_node, "@mode").to_s == "xml" ||
1273
+ XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
1274
+ @title = title_node.inner_xml
1275
+ elsif XPath.first(title_node, "@type").to_s == "escaped" ||
1276
+ XPath.first(title_node, "@mode").to_s == "escaped"
1101
1277
  @title = FeedTools.unescape_entities(
1102
- XPath.first(channel_node, "title/text()").to_s)
1278
+ XPath.first(title_node, "text()").to_s)
1103
1279
  else
1104
- title_node = XPath.first(channel_node, "title")
1105
1280
  @title = title_node.inner_xml
1106
1281
  repair_entities = true
1107
1282
  end
@@ -1141,6 +1316,9 @@ module FeedTools
1141
1316
  if description_node.nil?
1142
1317
  description_node = XPath.first(channel_node, "abstract")
1143
1318
  end
1319
+ if description_node.nil?
1320
+ description_node = XPath.first(channel_node, "ABSTRACT")
1321
+ end
1144
1322
  if description_node.nil?
1145
1323
  description_node = XPath.first(channel_node, "info")
1146
1324
  end
@@ -1160,6 +1338,9 @@ module FeedTools
1160
1338
  description_node = XPath.first(channel_node, "body")
1161
1339
  @bozo = true unless description_node.nil?
1162
1340
  end
1341
+ if description_node.nil?
1342
+ return nil
1343
+ end
1163
1344
  unless description_node.nil?
1164
1345
  if XPath.first(description_node, "@encoding").to_s != ""
1165
1346
  @description =
@@ -1195,7 +1376,6 @@ module FeedTools
1195
1376
  @description = FeedTools.tidy_html(@description)
1196
1377
  end
1197
1378
 
1198
- @description.gsub!(/\n/, " ") if @description.size < 80
1199
1379
  @description = @description.strip unless @description.nil?
1200
1380
  @description = nil if @description == ""
1201
1381
  end
@@ -1263,6 +1443,15 @@ module FeedTools
1263
1443
  if @link == ""
1264
1444
  @link = XPath.first(channel_node, "@href").to_s
1265
1445
  end
1446
+ if @link == ""
1447
+ @link = XPath.first(channel_node, "@HREF").to_s
1448
+ end
1449
+ if @link == ""
1450
+ @link = XPath.first(channel_node, "a/@href").to_s
1451
+ end
1452
+ if @link == ""
1453
+ @link = XPath.first(channel_node, "A/@HREF").to_s
1454
+ end
1266
1455
  if @link == ""
1267
1456
  if FeedTools.is_url? self.guid
1268
1457
  @link = self.guid
@@ -1290,39 +1479,6 @@ module FeedTools
1290
1479
  end
1291
1480
  end
1292
1481
 
1293
- # Returns the feed image link
1294
- def image_link
1295
- if @image_link.nil?
1296
- # get the feed image link from the xml document
1297
- @image_link = XPath.first(channel_node, "image/url/text()").to_s
1298
- if @image_link == ""
1299
- @image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
1300
- end
1301
- if @image_link == ""
1302
- @image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
1303
- end
1304
- if @image_link == ""
1305
- @image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
1306
- end
1307
- if @image_link == ""
1308
- @image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
1309
- end
1310
- if @image_link == ""
1311
- @image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
1312
- end
1313
- if @image_link == ""
1314
- @image_link = XPath.first(channel_node, "logo/@href").to_s
1315
- end
1316
- @image_link = FeedTools.normalize_url(@image_link)
1317
- end
1318
- return @image_link
1319
- end
1320
-
1321
- # Sets the feed image link
1322
- def image_link=(new_image_link)
1323
- @image_link = new_image_link
1324
- end
1325
-
1326
1482
  # Returns the url to the icon file for this feed.
1327
1483
  #
1328
1484
  # This method uses the url from the link field in order to avoid grabbing
@@ -1348,10 +1504,19 @@ module FeedTools
1348
1504
  "icon/text()").to_s
1349
1505
  end
1350
1506
  if @icon_link == ""
1507
+ @icon_link = XPath.first(channel_node,
1508
+ "logo[@style='icon']/@href").to_s
1509
+ end
1510
+ if @icon_link == ""
1511
+ @icon_link = XPath.first(channel_node,
1512
+ "LOGO[@STYLE='ICON']/@HREF").to_s
1513
+ end
1514
+ if @icon_link == "" && self.link != nil && self.link != ""
1351
1515
  link_uri = URI.parse(FeedTools.normalize_url(self.link))
1352
1516
  @icon_link =
1353
1517
  link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1354
1518
  end
1519
+ icon_link = nil if icon_link == ""
1355
1520
  end
1356
1521
  return @icon_link
1357
1522
  end
@@ -1375,6 +1540,10 @@ module FeedTools
1375
1540
  @author.raw = FeedTools.unescape_entities(
1376
1541
  XPath.first(channel_node, "dc:author/text()").to_s)
1377
1542
  end
1543
+ if @author.raw == ""
1544
+ @author.raw = FeedTools.unescape_entities(
1545
+ XPath.first(channel_node, "managingEditor/text()").to_s)
1546
+ end
1378
1547
  unless @author.raw == ""
1379
1548
  raw_scan = @author.raw.scan(
1380
1549
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -1456,6 +1625,10 @@ module FeedTools
1456
1625
  # Set the author name
1457
1626
  @publisher.raw = FeedTools.unescape_entities(
1458
1627
  XPath.first(channel_node, "dc:publisher/text()").to_s)
1628
+ if @publisher.raw == ""
1629
+ @publisher.raw = FeedTools.unescape_entities(
1630
+ XPath.first(channel_node, "webMaster/text()").to_s)
1631
+ end
1459
1632
  unless @publisher.raw == ""
1460
1633
  raw_scan = @publisher.raw.scan(
1461
1634
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -1529,6 +1702,119 @@ module FeedTools
1529
1702
  return @itunes_author
1530
1703
  end
1531
1704
 
1705
+ # Returns a list of the feed's categories
1706
+ def categories
1707
+ if @categories.nil?
1708
+ @categories = []
1709
+ category_nodes = XPath.match(channel_node, "category")
1710
+ if category_nodes.nil? || category_nodes.empty?
1711
+ category_nodes = XPath.match(channel_node, "dc:subject")
1712
+ end
1713
+ unless category_nodes.nil?
1714
+ for category_node in category_nodes
1715
+ category = FeedTools::Feed::Category.new
1716
+ category.term = XPath.first(category_node, "@term").to_s
1717
+ if category.term == ""
1718
+ category.term = XPath.first(category_node, "text()").to_s
1719
+ end
1720
+ category.term.strip! unless category.term.nil?
1721
+ category.term = nil if category.term == ""
1722
+ category.label = XPath.first(category_node, "@label").to_s
1723
+ category.label.strip! unless category.label.nil?
1724
+ category.label = nil if category.label == ""
1725
+ category.scheme = XPath.first(category_node, "@scheme").to_s
1726
+ if category.scheme == ""
1727
+ category.scheme = XPath.first(category_node, "@domain").to_s
1728
+ end
1729
+ category.scheme.strip! unless category.scheme.nil?
1730
+ category.scheme = nil if category.scheme == ""
1731
+ @categories << category
1732
+ end
1733
+ end
1734
+ end
1735
+ return @categories
1736
+ end
1737
+
1738
+ # Returns a list of the feed's images
1739
+ def images
1740
+ if @images.nil?
1741
+ @images = []
1742
+ image_nodes = XPath.match(channel_node, "image")
1743
+ if image_nodes.nil? || image_nodes.empty?
1744
+ image_nodes = XPath.match(channel_node, "link")
1745
+ end
1746
+ if image_nodes.nil? || image_nodes.empty?
1747
+ image_nodes = XPath.match(channel_node, "logo")
1748
+ end
1749
+ if image_nodes.nil? || image_nodes.empty?
1750
+ image_nodes = XPath.match(channel_node, "LOGO")
1751
+ end
1752
+ unless image_nodes.nil?
1753
+ for image_node in image_nodes
1754
+ image = FeedTools::Feed::Image.new
1755
+ image.url = XPath.first(image_node, "url/text()").to_s
1756
+ if image.url == ""
1757
+ image.url = XPath.first(image_node, "@rdf:resource").to_s
1758
+ end
1759
+ if image.url == "" && (image_node.name == "logo" ||
1760
+ (image_node.attributes['type'] =~ /^image/) == 0)
1761
+ image.url = XPath.first(image_node, "@href").to_s
1762
+ end
1763
+ if image.url == "" && image_node.name == "LOGO"
1764
+ image.url = XPath.first(image_node, "@HREF").to_s
1765
+ end
1766
+ image.url.strip! unless image.url.nil?
1767
+ image.url = nil if image.url == ""
1768
+ image.title = XPath.first(image_node, "title/text()").to_s
1769
+ image.title.strip! unless image.title.nil?
1770
+ image.title = nil if image.title == ""
1771
+ image.description =
1772
+ XPath.first(image_node, "description/text()").to_s
1773
+ image.description.strip! unless image.description.nil?
1774
+ image.description = nil if image.description == ""
1775
+ image.link = XPath.first(image_node, "link/text()").to_s
1776
+ image.link.strip! unless image.link.nil?
1777
+ image.link = nil if image.link == ""
1778
+ image.height = XPath.first(image_node, "height/text()").to_s.to_i
1779
+ image.height = nil if image.height <= 0
1780
+ image.width = XPath.first(image_node, "width/text()").to_s.to_i
1781
+ image.width = nil if image.width <= 0
1782
+ image.style = XPath.first(image_node, "@style").to_s.downcase
1783
+ if image.style == ""
1784
+ image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1785
+ end
1786
+ image.style.strip! unless image.style.nil?
1787
+ image.style = nil if image.style == ""
1788
+ @images << image
1789
+ end
1790
+ end
1791
+ end
1792
+ return @images
1793
+ end
1794
+
1795
+ # Returns the feed's text input field
1796
+ def text_input
1797
+ if @text_input.nil?
1798
+ @text_input = FeedTools::Feed::TextInput.new
1799
+ text_input_node = XPath.first(channel_node, "textInput")
1800
+ unless text_input_node.nil?
1801
+ @text_input.title =
1802
+ XPath.first(text_input_node, "title/text()").to_s
1803
+ @text_input.title = nil if @text_input.title == ""
1804
+ @text_input.description =
1805
+ XPath.first(text_input_node, "description/text()").to_s
1806
+ @text_input.description = nil if @text_input.description == ""
1807
+ @text_input.link =
1808
+ XPath.first(text_input_node, "link/text()").to_s
1809
+ @text_input.link = nil if @text_input.link == ""
1810
+ @text_input.name =
1811
+ XPath.first(text_input_node, "name/text()").to_s
1812
+ @text_input.name = nil if @text_input.name == ""
1813
+ end
1814
+ end
1815
+ return @text_input
1816
+ end
1817
+
1532
1818
  # Returns the feed's copyright information
1533
1819
  def copyright
1534
1820
  if @copyright.nil?
@@ -1696,7 +1982,7 @@ module FeedTools
1696
1982
  end
1697
1983
 
1698
1984
  # Returns true if this feed contains explicit material.
1699
- def explicit
1985
+ def explicit?
1700
1986
  if @explicit.nil?
1701
1987
  if XPath.first(channel_node,
1702
1988
  "media:adult/text()").to_s.downcase == "true" ||
@@ -1724,9 +2010,18 @@ module FeedTools
1724
2010
  if raw_items == nil || raw_items == []
1725
2011
  raw_items = XPath.match(channel_node, "item")
1726
2012
  end
2013
+ if raw_items == nil || raw_items == []
2014
+ raw_items = XPath.match(channel_node, "ITEM")
2015
+ end
2016
+ if raw_items == nil || raw_items == []
2017
+ raw_items = XPath.match(root_node, "ITEM")
2018
+ end
1727
2019
  if raw_items == nil || raw_items == []
1728
2020
  raw_items = XPath.match(channel_node, "entry")
1729
2021
  end
2022
+ if raw_items == nil || raw_items == []
2023
+ raw_items = XPath.match(root_node, "entry")
2024
+ end
1730
2025
 
1731
2026
  # create the individual feed items
1732
2027
  @items = []
@@ -1767,7 +2062,7 @@ module FeedTools
1767
2062
  # True if this feed contains audio content enclosures
1768
2063
  def podcast?
1769
2064
  podcast = false
1770
- $test_feed.items.each do |item|
2065
+ self.items.each do |item|
1771
2066
  item.enclosures.each do |enclosure|
1772
2067
  podcast = true if enclosure.audio?
1773
2068
  end
@@ -1778,7 +2073,7 @@ module FeedTools
1778
2073
  # True if this feed contains video content enclosures
1779
2074
  def vidlog?
1780
2075
  vidlog = false
1781
- $test_feed.items.each do |item|
2076
+ self.items.each do |item|
1782
2077
  item.enclosures.each do |enclosure|
1783
2078
  vidlog = true if enclosure.video?
1784
2079
  end
@@ -1805,7 +2100,7 @@ module FeedTools
1805
2100
  end
1806
2101
 
1807
2102
  # Forces this feed to expire.
1808
- def expire
2103
+ def expire!
1809
2104
  self.last_retrieved = Time.mktime(1970)
1810
2105
  self.save
1811
2106
  end
@@ -1817,11 +2112,11 @@ module FeedTools
1817
2112
  end
1818
2113
 
1819
2114
  # Generates xml based on the content of the feed
1820
- def build_xml(feed_type=(self.feed_type or "rss"), version=0.0,
2115
+ def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
1821
2116
  xml_builder=Builder::XmlMarkup.new(:indent => 2))
1822
- if feed_type == "rss" && version == 0.0
2117
+ if feed_type == "rss" && (version == nil || version == 0.0)
1823
2118
  version = 1.0
1824
- elsif feed_type == "atom" && version == 0.0
2119
+ elsif feed_type == "atom" && (version == nil || version == 0.0)
1825
2120
  version = 0.3
1826
2121
  end
1827
2122
  if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
@@ -1838,8 +2133,9 @@ module FeedTools
1838
2133
  else
1839
2134
  xml_builder.link
1840
2135
  end
1841
- unless image_link.nil? || image_link == ""
1842
- xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
2136
+ unless images.nil? || images.empty?
2137
+ xml_builder.image("rdf:resource" => CGI.escapeHTML(
2138
+ images.first.url))
1843
2139
  end
1844
2140
  unless description.nil? || description == ""
1845
2141
  xml_builder.description(description)
@@ -1866,18 +2162,30 @@ module FeedTools
1866
2162
  end
1867
2163
  build_xml_hook(feed_type, version, xml_builder)
1868
2164
  end
1869
- unless image_link.nil? || image_link == ""
1870
- xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
1871
- unless title.nil? || title == ""
1872
- xml_builder.title(title)
2165
+ unless images.nil? || images.empty?
2166
+ best_image = nil
2167
+ for image in self.images
2168
+ if image.link != nil
2169
+ best_image = image
2170
+ break
2171
+ end
2172
+ end
2173
+ best_image = images.first if best_image.nil?
2174
+ xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
2175
+ if best_image.title != nil && best_image.title != ""
2176
+ xml_builder.title(best_image.title)
2177
+ elsif self.title != nil && self.title != ""
2178
+ xml_builder.title(self.title)
1873
2179
  else
1874
2180
  xml_builder.title
1875
2181
  end
1876
- unless image_link.nil? || image_link == ""
1877
- xml_builder.url(image_link)
2182
+ unless best_image.url.nil? || best_image.url == ""
2183
+ xml_builder.url(best_image.url)
1878
2184
  end
1879
- unless link.nil? || link == ""
1880
- xml_builder.link(link)
2185
+ if best_image.link != nil && best_image.link != ""
2186
+ xml_builder.link(best_image.link)
2187
+ elsif self.link != nil && self.link != ""
2188
+ xml_builder.link(self.link)
1881
2189
  else
1882
2190
  xml_builder.link
1883
2191
  end
@@ -1891,7 +2199,7 @@ module FeedTools
1891
2199
  end
1892
2200
  elsif feed_type == "rss"
1893
2201
  # normal rss format
1894
- return xml_builder.rss("version" => version.to_s) do
2202
+ return xml_builder.rss("version" => version) do
1895
2203
  unless title.nil? || title == ""
1896
2204
  xml_builder.title(title)
1897
2205
  end
@@ -1913,7 +2221,7 @@ module FeedTools
1913
2221
  elsif feed_type == "atom"
1914
2222
  # normal atom format
1915
2223
  return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
1916
- "version" => version.to_s,
2224
+ "version" => version,
1917
2225
  "xml:lang" => language) do
1918
2226
  unless title.nil? || title == ""
1919
2227
  xml_builder.title(title,
@@ -2112,12 +2420,18 @@ module FeedTools
2112
2420
  end
2113
2421
  return false
2114
2422
  end
2423
+
2424
+ alias_method :link, :url
2425
+ alias_method :link=, :url=
2115
2426
  end
2116
- EnclosureCategory = Struct.new( "EnclosureCategory", :category, :scheme, :label )
2427
+
2428
+ # TODO: Make these actual classes instead of structs
2429
+ # ==================================================
2117
2430
  EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
2118
2431
  EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
2119
2432
  EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
2120
- EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height, :width )
2433
+ EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
2434
+ :width )
2121
2435
 
2122
2436
  # Returns the parent feed of this feed item
2123
2437
  def feed
@@ -2186,19 +2500,27 @@ module FeedTools
2186
2500
  def title
2187
2501
  if @title.nil?
2188
2502
  repair_entities = false
2189
- if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
2190
- XPath.first(root_node, "title/@mode").to_s == "xhtml" ||
2191
- XPath.first(root_node, "title/@type").to_s == "xml" ||
2192
- XPath.first(root_node, "title/@mode").to_s == "xml" ||
2193
- XPath.first(root_node, "title/@type").to_s ==
2194
- "application/xhtml+xml"
2195
- @title = XPath.first(root_node, "title").inner_xml
2196
- elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
2197
- XPath.first(root_node, "title/@mode").to_s == "escaped"
2503
+ title_node = XPath.first(root_node, "title")
2504
+ if title_node.nil?
2505
+ title_node = XPath.first(root_node, "dc:title")
2506
+ end
2507
+ if title_node.nil?
2508
+ title_node = XPath.first(root_node, "TITLE")
2509
+ end
2510
+ if title_node.nil?
2511
+ return nil
2512
+ end
2513
+ if XPath.first(title_node, "@type").to_s == "xhtml" ||
2514
+ XPath.first(title_node, "@mode").to_s == "xhtml" ||
2515
+ XPath.first(title_node, "@type").to_s == "xml" ||
2516
+ XPath.first(title_node, "@mode").to_s == "xml" ||
2517
+ XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
2518
+ @title = title_node.inner_xml
2519
+ elsif XPath.first(title_node, "@type").to_s == "escaped" ||
2520
+ XPath.first(title_node, "@mode").to_s == "escaped"
2198
2521
  @title = FeedTools.unescape_entities(
2199
- XPath.first(root_node, "title/text()").to_s)
2522
+ XPath.first(title_node, "text()").to_s)
2200
2523
  else
2201
- title_node = XPath.first(root_node, "title")
2202
2524
  @title = title_node.inner_xml
2203
2525
  repair_entities = true
2204
2526
  end
@@ -2252,16 +2574,25 @@ module FeedTools
2252
2574
  if description_node.nil?
2253
2575
  description_node = XPath.first(root_node, "abstract")
2254
2576
  end
2577
+ if description_node.nil?
2578
+ description_node = XPath.first(root_node, "ABSTRACT")
2579
+ end
2255
2580
  if description_node.nil?
2256
2581
  description_node = XPath.first(root_node, "content:encoded")
2257
2582
  end
2258
2583
  if description_node.nil?
2259
2584
  description_node = XPath.first(root_node, "content")
2260
2585
  end
2586
+ if description_node.nil?
2587
+ description_node = XPath.first(root_node, "fullitem")
2588
+ end
2261
2589
  if description_node.nil?
2262
2590
  description_node = XPath.first(root_node, "info")
2263
2591
  @bozo = true unless description_node.nil?
2264
2592
  end
2593
+ if description_node.nil?
2594
+ return nil
2595
+ end
2265
2596
  unless description_node.nil?
2266
2597
  if XPath.first(description_node, "@encoding").to_s != ""
2267
2598
  @description =
@@ -2297,7 +2628,6 @@ module FeedTools
2297
2628
  @description = FeedTools.tidy_html(@description)
2298
2629
  end
2299
2630
 
2300
- @description.gsub!(/\n/, " ") if @description.size < 80
2301
2631
  @description = @description.strip unless @description.nil?
2302
2632
  @description = nil if @description == ""
2303
2633
  end
@@ -2385,6 +2715,18 @@ module FeedTools
2385
2715
  if @link == ""
2386
2716
  @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
2387
2717
  end
2718
+ if @link == ""
2719
+ @link = XPath.first(root_node, "@href").to_s
2720
+ end
2721
+ if @link == ""
2722
+ @link = XPath.first(root_node, "a/@href").to_s
2723
+ end
2724
+ if @link == ""
2725
+ @link = XPath.first(root_node, "@HREF").to_s
2726
+ end
2727
+ if @link == ""
2728
+ @link = XPath.first(root_node, "A/@HREF").to_s
2729
+ end
2388
2730
  if @link == ""
2389
2731
  if FeedTools.is_url? self.guid
2390
2732
  @link = self.guid
@@ -2410,7 +2752,7 @@ module FeedTools
2410
2752
  @link = new_link
2411
2753
  end
2412
2754
 
2413
- # Returns the feed comment link
2755
+ # Returns the feed item comment link
2414
2756
  def comment_link
2415
2757
  if @comment_link.nil?
2416
2758
  # get the feed comment link from the xml document
@@ -2423,64 +2765,104 @@ module FeedTools
2423
2765
  return @comment_link
2424
2766
  end
2425
2767
 
2426
- # Sets the feed comment link
2768
+ # Sets the feed item comment link
2427
2769
  def comment_link=(new_comment_link)
2428
2770
  @comment_link = new_comment_link
2429
2771
  end
2430
2772
 
2431
- # Returns the feed image link
2432
- def image_link
2433
- if @image_link.nil?
2434
- # get the feed image link from the xml document
2435
- if @image_link == ""
2436
- @image_link = XPath.first(root_node, "link[@type='image/jpeg']/@href").to_s
2437
- end
2438
- if @image_link == ""
2439
- @image_link = XPath.first(root_node, "link[@type='image/gif']/@href").to_s
2440
- end
2441
- if @image_link == ""
2442
- @image_link = XPath.first(root_node, "link[@type='image/png']/@href").to_s
2773
+ # Returns a list of the feed item's categories
2774
+ def categories
2775
+ if @categories.nil?
2776
+ @categories = []
2777
+ category_nodes = XPath.match(root_node, "category")
2778
+ if category_nodes.nil? || category_nodes.empty?
2779
+ category_nodes = XPath.match(root_node, "dc:subject")
2780
+ end
2781
+ unless category_nodes.nil?
2782
+ for category_node in category_nodes
2783
+ category = FeedTools::Feed::Category.new
2784
+ category.term = XPath.first(category_node, "@term").to_s
2785
+ if category.term == ""
2786
+ category.term = XPath.first(category_node, "text()").to_s
2787
+ end
2788
+ category.term.strip! unless category.term.nil?
2789
+ category.term = nil if category.term == ""
2790
+ category.label = XPath.first(category_node, "@label").to_s
2791
+ category.label.strip! unless category.label.nil?
2792
+ category.label = nil if category.label == ""
2793
+ category.scheme = XPath.first(category_node, "@scheme").to_s
2794
+ if category.scheme == ""
2795
+ category.scheme = XPath.first(category_node, "@domain").to_s
2796
+ end
2797
+ category.scheme.strip! unless category.scheme.nil?
2798
+ category.scheme = nil if category.scheme == ""
2799
+ @categories << category
2800
+ end
2443
2801
  end
2444
- # The following two should technically never occur, but have been included
2445
- # simply because I've seen both occuring in the wild at least once.
2446
- if @image_link == ""
2447
- @image_link = XPath.first(root_node, "image/url/text()").to_s
2802
+ end
2803
+ return @categories
2804
+ end
2805
+
2806
+ # Returns a list of the feed items's images
2807
+ def images
2808
+ if @images.nil?
2809
+ @images = []
2810
+ image_nodes = XPath.match(root_node, "link")
2811
+ if image_nodes.nil? || image_nodes.empty?
2812
+ image_nodes = XPath.match(root_node, "logo")
2448
2813
  end
2449
- if @image_link == ""
2450
- @image_link = XPath.first(root_node, "image/@rdf:resource").to_s
2814
+ if image_nodes.nil? || image_nodes.empty?
2815
+ image_nodes = XPath.match(root_node, "LOGO")
2451
2816
  end
2452
- if @image_link == ""
2453
- # If there's only a media thumbnail, we can just borrow it. Technically, this isn't
2454
- # ideal, but chances are very good that anything that makes use of this image is
2455
- # simply not going to care anyhow.
2456
- @image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
2457
- if @image_link == ""
2458
- @media_image_link = @image_link
2459
- end
2817
+ if image_nodes.nil? || image_nodes.empty?
2818
+ image_nodes = XPath.match(root_node, "image")
2460
2819
  end
2461
- if @image_link == ""
2462
- # If there's only an itunes image, we can just borrow it. See comment above regarding
2463
- # less-than-ideal-ness.
2464
- if @itunes_image_link == ""
2465
- @image_link = XPath.first(root_node, "itunes:image/@href").to_s
2466
- if @image_link == ""
2467
- @image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
2820
+ unless image_nodes.nil?
2821
+ for image_node in image_nodes
2822
+ image = FeedTools::Feed::Image.new
2823
+ image.url = XPath.first(image_node, "url/text()").to_s
2824
+ if image.url != ""
2825
+ self.feed.bozo = true
2468
2826
  end
2469
- @itunes_image_link = @image_link
2470
- else
2471
- @image_link = @itunes_image_link
2827
+ if image.url == ""
2828
+ image.url = XPath.first(image_node, "@rdf:resource").to_s
2829
+ end
2830
+ if image.url == "" && (image_node.name == "logo" ||
2831
+ (image_node.attributes['type'] =~ /^image/) == 0)
2832
+ image.url = XPath.first(image_node, "@href").to_s
2833
+ end
2834
+ if image.url == "" && image_node.name == "LOGO"
2835
+ image.url = XPath.first(image_node, "@HREF").to_s
2836
+ end
2837
+ image.url.strip! unless image.url.nil?
2838
+ image.url = nil if image.url == ""
2839
+ image.title = XPath.first(image_node, "title/text()").to_s
2840
+ image.title.strip! unless image.title.nil?
2841
+ image.title = nil if image.title == ""
2842
+ image.description =
2843
+ XPath.first(image_node, "description/text()").to_s
2844
+ image.description.strip! unless image.description.nil?
2845
+ image.description = nil if image.description == ""
2846
+ image.link = XPath.first(image_node, "link/text()").to_s
2847
+ image.link.strip! unless image.link.nil?
2848
+ image.link = nil if image.link == ""
2849
+ image.height = XPath.first(image_node, "height/text()").to_s.to_i
2850
+ image.height = nil if image.height <= 0
2851
+ image.width = XPath.first(image_node, "width/text()").to_s.to_i
2852
+ image.width = nil if image.width <= 0
2853
+ image.style = XPath.first(image_node, "@style").to_s.downcase
2854
+ if image.style == ""
2855
+ image.style = XPath.first(image_node, "@STYLE").to_s.downcase
2856
+ end
2857
+ image.style.strip! unless image.style.nil?
2858
+ image.style = nil if image.style == ""
2859
+ @images << image
2472
2860
  end
2473
2861
  end
2474
- @image_link = FeedTools.normalize_url(@image_link)
2475
2862
  end
2476
- return @image_link
2477
- end
2478
-
2479
- # Sets the feed image link
2480
- def image_link=(new_image_link)
2481
- @image_link = new_image_link
2863
+ return @images
2482
2864
  end
2483
-
2865
+
2484
2866
  # Returns the feed item itunes image link
2485
2867
  #
2486
2868
  # If it's not present, falls back to the normal image link.
@@ -2494,9 +2876,6 @@ module FeedTools
2494
2876
  if @itunes_image_link == ""
2495
2877
  @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
2496
2878
  end
2497
- if @itunes_image_link == ""
2498
- @itunes_image_link = self.image_link
2499
- end
2500
2879
  @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
2501
2880
  end
2502
2881
  return @itunes_image_link
@@ -2514,9 +2893,6 @@ module FeedTools
2514
2893
  if @media_thumbnail_link.nil?
2515
2894
  # get the feed item itunes image link from the xml document
2516
2895
  @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
2517
- if @media_thumbnail_link == ""
2518
- @media_thumbnail_link = image_link
2519
- end
2520
2896
  @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
2521
2897
  end
2522
2898
  return @media_thumbnail_link
@@ -2527,6 +2903,21 @@ module FeedTools
2527
2903
  @media_thumbnail_link = new_media_thumbnail_link
2528
2904
  end
2529
2905
 
2906
+ # Returns the feed item's copyright information
2907
+ def copyright
2908
+ if @copyright.nil?
2909
+ @copyright = XPath.first(root_node, "dc:rights/text()").to_s
2910
+ @copyright = FeedTools.sanitize_html(@copyright, :strip)
2911
+ @copyright = nil if @copyright == ""
2912
+ end
2913
+ return @copyright
2914
+ end
2915
+
2916
+ # Sets the feed item's copyright information
2917
+ def copyright=(new_copyright)
2918
+ @copyright = new_copyright
2919
+ end
2920
+
2530
2921
  # Returns all feed item enclosures
2531
2922
  def enclosures
2532
2923
  if @enclosures.nil?
@@ -2621,11 +3012,13 @@ module FeedTools
2621
3012
  end
2622
3013
  enclosure.categories = []
2623
3014
  for category in XPath.match(enclosure_node, "media:category")
2624
- enclosure.categories << EnclosureCategory.new(
2625
- FeedTools.unescape_entities(category.text),
2626
- FeedTools.unescape_entities(category.attributes["scheme"].to_s),
3015
+ enclosure.categories << FeedTools::Feed::Category.new
3016
+ enclosure.categories.last.term =
3017
+ FeedTools.unescape_entities(category.text)
3018
+ enclosure.categories.last.scheme =
3019
+ FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3020
+ enclosure.categories.last.label =
2627
3021
  FeedTools.unescape_entities(category.attributes["label"].to_s)
2628
- )
2629
3022
  if enclosure.categories.last.scheme == ""
2630
3023
  enclosure.categories.last.scheme = nil
2631
3024
  end
@@ -2714,11 +3107,13 @@ module FeedTools
2714
3107
  if (enclosure.categories.nil? || enclosure.categories.size == 0)
2715
3108
  enclosure.categories = []
2716
3109
  for category in XPath.match(media_group, "media:category")
2717
- enclosure.categories << EnclosureCategory.new(
2718
- FeedTools.unescape_entities(category.text),
2719
- FeedTools.unescape_entities(category.attributes["scheme"].to_s),
3110
+ enclosure.categories << FeedTools::Feed::Category.new
3111
+ enclosure.categories.last.term =
3112
+ FeedTools.unescape_entities(category.text)
3113
+ enclosure.categories.last.scheme =
3114
+ FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3115
+ enclosure.categories.last.label =
2720
3116
  FeedTools.unescape_entities(category.attributes["label"].to_s)
2721
- )
2722
3117
  if enclosure.categories.last.scheme == ""
2723
3118
  enclosure.categories.last.scheme = nil
2724
3119
  end
@@ -2880,7 +3275,7 @@ module FeedTools
2880
3275
  # if it's missing.
2881
3276
  if @enclosures.size == 1
2882
3277
  if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
2883
- @enclosures.first.duration = self.duration
3278
+ @enclosures.first.duration = self.itunes_duration
2884
3279
  end
2885
3280
  end
2886
3281
 
@@ -2910,6 +3305,10 @@ module FeedTools
2910
3305
  @author.raw = FeedTools.unescape_entities(
2911
3306
  XPath.first(root_node, "dc:author/text()").to_s)
2912
3307
  end
3308
+ if @author.raw == ""
3309
+ @author.raw = FeedTools.unescape_entities(
3310
+ XPath.first(root_node, "managingEditor/text()").to_s)
3311
+ end
2913
3312
  unless @author.raw == ""
2914
3313
  raw_scan = @author.raw.scan(
2915
3314
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -2983,6 +3382,76 @@ module FeedTools
2983
3382
  end
2984
3383
  end
2985
3384
 
3385
+ # Returns the feed publisher
3386
+ def publisher
3387
+ if @publisher.nil?
3388
+ @publisher = FeedTools::Feed::Author.new
3389
+
3390
+ # Set the author name
3391
+ @publisher.raw = FeedTools.unescape_entities(
3392
+ XPath.first(root_node, "dc:publisher/text()").to_s)
3393
+ if @publisher.raw == ""
3394
+ @publisher.raw = FeedTools.unescape_entities(
3395
+ XPath.first(root_node, "webMaster/text()").to_s)
3396
+ end
3397
+ unless @publisher.raw == ""
3398
+ raw_scan = @publisher.raw.scan(
3399
+ /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3400
+ if raw_scan.nil? || raw_scan.size == 0
3401
+ raw_scan = @publisher.raw.scan(
3402
+ /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3403
+ unless raw_scan.size == 0
3404
+ publisher_raw_pair = raw_scan.first.reverse
3405
+ end
3406
+ else
3407
+ publisher_raw_pair = raw_scan.first
3408
+ end
3409
+ if raw_scan.nil? || raw_scan.size == 0
3410
+ email_scan = @publisher.raw.scan(
3411
+ /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3412
+ if email_scan != nil && email_scan.size > 0
3413
+ @publisher.email = email_scan.first.strip
3414
+ end
3415
+ end
3416
+ unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
3417
+ @publisher.name = publisher_raw_pair.first.strip
3418
+ @publisher.email = publisher_raw_pair.last.strip
3419
+ else
3420
+ unless @publisher.raw.include?("@")
3421
+ # We can be reasonably sure we are looking at something
3422
+ # that the creator didn't intend to contain an email address if
3423
+ # it got through the preceeding regexes and it doesn't
3424
+ # contain the tell-tale '@' symbol.
3425
+ @publisher.name = @publisher.raw
3426
+ end
3427
+ end
3428
+ end
3429
+
3430
+ @publisher.name = nil if @publisher.name == ""
3431
+ @publisher.raw = nil if @publisher.raw == ""
3432
+ @publisher.email = nil if @publisher.email == ""
3433
+ @publisher.url = nil if @publisher.url == ""
3434
+ end
3435
+ return @publisher
3436
+ end
3437
+
3438
+ # Sets the feed publisher
3439
+ def publisher=(new_publisher)
3440
+ if new_publisher.respond_to?(:name) &&
3441
+ new_publisher.respond_to?(:email) &&
3442
+ new_publisher.respond_to?(:url)
3443
+ # It's a complete Author object, just set it.
3444
+ @publisher = new_publisher
3445
+ else
3446
+ # We're not looking at an Author object, this is probably a string,
3447
+ # default to setting the publisher's name.
3448
+ if @publisher.nil?
3449
+ @publisher = FeedTools::Feed::Author.new
3450
+ end
3451
+ @publisher.name = new_publisher
3452
+ end
3453
+ end
3454
+
2986
3455
  # Returns the contents of the itunes:author element
2987
3456
  #
2988
3457
  # This inherits from any incorrectly placed channel-level itunes:author
@@ -3026,10 +3495,6 @@ module FeedTools
3026
3495
  @itunes_duration = new_itunes_duration
3027
3496
  end
3028
3497
 
3029
- # Sets the itunes:summary
3030
- def itunes_summary=(new_itunes_summary)
3031
- end
3032
-
3033
3498
  # Returns the feed item time
3034
3499
  def time
3035
3500
  if @time.nil?
@@ -3054,6 +3519,32 @@ module FeedTools
3054
3519
  @time = new_time
3055
3520
  end
3056
3521
 
3522
+ # Returns the url for posting comments
3523
+ def comments
3524
+ if @comments.nil?
3525
+ @comments = XPath.first(root_node, "comments/text()").to_s
3526
+ @comments = nil if @comments == ""
3527
+ end
3528
+ return @comments
3529
+ end
3530
+
3531
+ # Sets the url for posting comments
3532
+ def comments=(new_comments)
3533
+ @comments = new_comments
3534
+ end
3535
+
3536
+ # The source that this post was based on
3537
+ def source
3538
+ if @source.nil?
3539
+ @source = FeedTools::Feed::Link.new
3540
+ @source.url = XPath.first(root_node, "source/@url").to_s
3541
+ @source.url = nil if @source.url == ""
3542
+ @source.value = XPath.first(root_node, "source/text()").to_s
3543
+ @source.value = nil if @source.value == ""
3544
+ end
3545
+ return @source
3546
+ end
3547
+
3057
3548
  # Returns the feed item tags
3058
3549
  def tags
3059
3550
  # TODO: support the rel="tag" microformat
@@ -3131,7 +3622,7 @@ module FeedTools
3131
3622
  "itunes:explicit/text()").to_s.downcase == "yes" ||
3132
3623
  XPath.first(root_node,
3133
3624
  "itunes:explicit/text()").to_s.downcase == "true" ||
3134
- feed.explicit
3625
+ feed.explicit?
3135
3626
  @explicit = true
3136
3627
  else
3137
3628
  @explicit = false
@@ -3152,8 +3643,13 @@ module FeedTools
3152
3643
  end
3153
3644
 
3154
3645
  # Generates xml based on the content of the feed item
3155
- def build_xml(feed_type=(self.feed.feed_type or "rss"), version=0.0,
3646
+ def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
3156
3647
  xml_builder=Builder::XmlMarkup.new(:indent => 2))
3648
+ if feed_type == "rss" && (version == nil || version == 0.0)
3649
+ version = 1.0
3650
+ elsif feed_type == "atom" && (version == nil || version == 0.0)
3651
+ version = 0.3
3652
+ end
3157
3653
  if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
3158
3654
  # RDF-based rss format
3159
3655
  if link.nil?
@@ -3253,6 +3749,8 @@ module FeedTools
3253
3749
  alias_method :tagline=, :description=
3254
3750
  alias_method :subtitle, :description
3255
3751
  alias_method :subtitle=, :description=
3752
+ alias_method :summary, :description
3753
+ alias_method :summary=, :description=
3256
3754
  alias_method :abstract, :description
3257
3755
  alias_method :abstract=, :description=
3258
3756
  alias_method :content, :description
@@ -3264,12 +3762,26 @@ end
3264
3762
 
3265
3763
  module REXML # :nodoc:
3266
3764
  class Element # :nodoc:
3267
- def inner_xml # :nodoc:
3268
- result = ""
3269
- self.each_child do |child|
3270
- result << child.to_s
3765
+ unless REXML::Element.public_instance_methods.include? :inner_xml
3766
+ def inner_xml # :nodoc:
3767
+ result = ""
3768
+ self.each_child do |child|
3769
+ result << child.to_s
3770
+ end
3771
+ return result
3772
+ end
3773
+ end
3774
+
3775
+ unless REXML::Element.public_instance_methods.include? :base_uri
3776
+ def base_uri # :nodoc:
3777
+ if not attribute('xml:base')
3778
+ return parent.base_uri
3779
+ elsif parent
3780
+ return URI.join(parent.base_uri, attribute('xml:base').value).to_s
3781
+ else
3782
+ return (attribute('xml:base').value or '')
3783
+ end
3271
3784
  end
3272
- return result
3273
3785
  end
3274
3786
  end
3275
3787
  end