feedtools 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ == FeedTools 0.2.4
2
+ * fixed bug in the sqlite table creation query
3
+ * greatly improved image support
4
+ * improved cdf support (still needs way more work)
5
+ * support for text input elements
6
+ * now possible to force retrieval from the cache only
7
+ * increased the flexibility of the database caching implementation
8
+ * feed attributes accessible through the keys and values collections
9
+ * minor coding style changes
10
+ * fixed really dumb typo in the podcast? and vidlog? methods
11
+ * fixed exception from missing titles and descriptions
12
+ * now passes all of mark pilgrim's well-formed rss tests
1
13
  == FeedTools 0.2.3
2
14
  * fixed omission of get parameters from http requests
3
15
  == FeedTools 0.2.2
data/lib/feed_tools.rb CHANGED
@@ -25,7 +25,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
25
25
  ENV['RAILS_ENV'] ||
26
26
  'production' # :nodoc:
27
27
 
28
- FEED_TOOLS_VERSION = "0.2.3"
28
+ FEED_TOOLS_VERSION = "0.2.4"
29
29
 
30
30
  $:.unshift(File.dirname(__FILE__))
31
31
  $:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
@@ -75,7 +75,7 @@ require 'yaml'
75
75
  # => "News for nerds, stuff that matters"
76
76
  # slashdot_feed.link
77
77
  # => "http://slashdot.org/"
78
- # slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
78
+ # slashdot_feed.items.first.find_node("slash:hitparade/text()").value
79
79
  # => "43,37,28,23,11,3,1"
80
80
  module FeedTools
81
81
 
@@ -150,7 +150,7 @@ module FeedTools
150
150
  begin
151
151
  ActiveRecord::Base.connection.execute "select id, url, title, " +
152
152
  "link, xml_data, http_headers, last_retrieved " +
153
- "from feeds limit 1"
153
+ "from #{self.table_name()} limit 1"
154
154
  rescue ActiveRecord::StatementInvalid
155
155
  return false
156
156
  rescue
@@ -163,7 +163,7 @@ module FeedTools
163
163
  def DatabaseFeedCache.create_table
164
164
  unless DatabaseFeedCache.table_exists?
165
165
  feeds_mysql = <<-SQL_END
166
- CREATE TABLE `feeds` (
166
+ CREATE TABLE `#{self.table_name()}` (
167
167
  `id` int(10) unsigned NOT NULL auto_increment,
168
168
  `url` varchar(255) default NULL,
169
169
  `title` varchar(255) default NULL,
@@ -175,19 +175,18 @@ module FeedTools
175
175
  ) ENGINE=MyISAM DEFAULT CHARSET=latin1;
176
176
  SQL_END
177
177
  feeds_sqlite = <<-SQL_END
178
- CREATE TABLE 'feeds' (
178
+ CREATE TABLE '#{self.table_name()}' (
179
179
  'id' INTEGER PRIMARY KEY NOT NULL,
180
180
  'url' VARCHAR(255) DEFAULT NULL,
181
181
  'title' VARCHAR(255) DEFAULT NULL,
182
182
  'link' VARCHAR(255) DEFAULT NULL,
183
- 'image_link' VARCHAR(255) DEFAULT NULL,
184
183
  'xml_data' TEXT DEFAULT NULL,
185
184
  'http_headers' TEXT DEFAULT NULL,
186
185
  'last_retrieved' DATETIME DEFAULT NULL,
187
186
  );
188
187
  SQL_END
189
188
  feeds_psql = <<-SQL_END
190
- CREATE TABLE feeds (
189
+ CREATE TABLE #{self.table_name()} (
191
190
  id SERIAL PRIMARY KEY NOT NULL,
192
191
  url varchar(255) default NULL,
193
192
  title varchar(255) default NULL,
@@ -206,7 +205,7 @@ module FeedTools
206
205
  table_creation_sql = feeds_psql
207
206
  end
208
207
  if table_creation_sql.nil?
209
- raise "Could not build feed_items table."
208
+ raise "Could not build #{self.table_name()} table."
210
209
  else
211
210
  connection.execute table_creation_sql
212
211
  end
@@ -219,24 +218,86 @@ module FeedTools
219
218
  end
220
219
 
221
220
  # Quick method of enabling small classes to have their attributes
222
- # accessible as a dictionary.
223
- module AttributeDictionary # :nodoc:
224
- # Access the attributes as a dictionary
221
+ # accessible as a dictionary. These methods should not be used whenever
222
+ # performance is going to be an issue. They exist almost entirely for the
223
+ # purposes of aesthetics and/or debugging.
224
+ module AttributeDictionary
225
+ # Access the attributes as a dictionary.
225
226
  def [](key)
226
- # Assignment, and destructive methods should not be
227
- # accessed like this.
228
- return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
229
- return nil unless self.method(key).arity == 0
227
+ return nil unless self.keys.include? key
230
228
  return self.send(key)
231
229
  end
232
230
 
233
- # Access the attributes as a dictionary
231
+ # Access the attributes as a dictionary.
234
232
  def []=(key, value)
235
- # Assignment, and destructive methods should not be
236
- # accessed like this.
237
- return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
238
- return nil unless self.method(key + "=").arity == 1
239
- return self.send(key + "=", value)
233
+ pseudo_key = key
234
+ pseudo_key = key[0..-2] if key[-1..-1] == "?"
235
+ return nil unless self.method(pseudo_key + "=").arity == 1
236
+ local_keys = self.keys
237
+ unless local_keys.include?(key) || local_keys.include?(pseudo_key)
238
+ return nil
239
+ end
240
+ return self.send(pseudo_key + "=", value)
241
+ end
242
+
243
+ # Access the attributes as a dictionary.
244
+ def keys
245
+ key_methods = []
246
+ for key in self.methods
247
+ # Quick-n-dirty hack to speed things up and keep the list clean
248
+ if self.method(key).arity == 0 && key[-1..-1] != "=" &&
249
+ key[-1..-1] != "!" && key[0..1] != "__" &&
250
+ key[0..2] != "to_" && key[-5..-1] != "_node" &&
251
+ key != "cache_object" && key != "save" && key != "xml" &&
252
+ key != "xml_data" && key != "expired?" && key != "live?" &&
253
+ key != "feed"
254
+ superfluous_ancestors = self.class.ancestors
255
+ superfluous_ancestors = superfluous_ancestors[1..-1]
256
+ superfluous = false
257
+ for ancestor in superfluous_ancestors
258
+ if ancestor.instance_methods.include? key
259
+ superfluous = true
260
+ break
261
+ end
262
+ end
263
+ next if superfluous
264
+ key_methods << key
265
+ end
266
+ end
267
+ return key_methods.sort
268
+ end
269
+
270
+ # Access the attributes as a dictionary.
271
+ # Please note that this method may cause a nearly complete parse of a
272
+ # feed. This will be very slow.
273
+ def values
274
+ return self.keys.map { |key| self[key] }
275
+ end
276
+
277
+ # Access the attributes as a dictionary.
278
+ # Please note that this method may cause a complete parse of a feed.
279
+ # This will be very slow.
280
+ def to_hash
281
+ attribute_hash = {}
282
+ for key in keys
283
+ value = self[key]
284
+ if value.respond_to? :to_hash
285
+ value = value.to_hash
286
+ end
287
+ if value.respond_to? :to_ary
288
+ new_value = []
289
+ for item in value.to_ary
290
+ if item.respond_to? :to_hash
291
+ new_value << item.to_hash
292
+ else
293
+ new_value << item
294
+ end
295
+ end
296
+ value = new_value
297
+ end
298
+ attribute_hash[key] = value
299
+ end
300
+ return attribute_hash
240
301
  end
241
302
  end
242
303
 
@@ -278,6 +339,22 @@ module FeedTools
278
339
  @feed_cache = new_feed_cache
279
340
  end
280
341
 
342
+ # Returns true if FeedTools should only retrieve from the cache and avoid
343
+ # pulling feeds from their remote location.
344
+ def FeedTools.cache_only?
345
+ @cache_only = false if @cache_only.nil?
346
+ return @cache_only
347
+ end
348
+
349
+ # Sets whether or not FeedTools should retrieve feeds from remote locations
350
+ # or if it should rely on the cache only.
351
+ def FeedTools.cache_only=(new_cache_only)
352
+ if new_cache_only != true && new_cache_only != false
353
+ raise ArgumentError, "Must be either true or false."
354
+ end
355
+ @cache_only = new_cache_only
356
+ end
357
+
281
358
  # Returns true if FeedTools.feed_cache is not nil and a connection with
282
359
  # the cache has been successfully established. Also returns false if an
283
360
  # error is raised while trying to determine the status of the cache.
@@ -613,37 +690,29 @@ module FeedTools
613
690
  end
614
691
 
615
692
  class Feed
616
- include REXML
693
+ include REXML # :nodoc:
617
694
  include AttributeDictionary
618
695
 
619
696
  # Represents a feed/feed item's category
620
697
  class Category
698
+ include AttributeDictionary
699
+
621
700
  # The category term value
622
701
  attr_accessor :term
623
702
  # The categorization scheme
624
703
  attr_accessor :scheme
625
704
  # A human-readable description of the category
626
705
  attr_accessor :label
627
-
628
- # Relays any unknown methods to the term so that you can treat the
629
- # category object as a string
630
- def method_missing(msg, *params)
631
- self.term.send(msg, params)
632
- end
633
-
634
- # Relays the to_s method to the term field
635
- def to_s
636
- self.term.to_s
637
- end
638
-
639
- # Relays the inspect method to the term field
640
- def inspect
641
- self.term.inspect
642
- end
706
+
707
+ alias_method :value, :term
708
+ alias_method :category, :term
709
+ alias_method :domain, :scheme
643
710
  end
644
711
 
645
712
  # Represents a feed/feed item's author
646
713
  class Author
714
+ include AttributeDictionary
715
+
647
716
  # The author's real name
648
717
  attr_accessor :name
649
718
  # The author's email address
@@ -652,26 +721,51 @@ module FeedTools
652
721
  attr_accessor :url
653
722
  # The raw value of the author tag if present
654
723
  attr_accessor :raw
655
-
656
- # Relays any unknown methods to the name so that you can treat the
657
- # author object as a string
658
- def method_missing(msg, *params)
659
- self.name.send(msg, params)
660
- end
661
-
662
- # Relays the to_s method to the name field
663
- def to_s
664
- self.name.to_s
665
- end
724
+ end
725
+
726
+ # Represents a feed's image
727
+ class Image
728
+ include AttributeDictionary
666
729
 
667
- # Relays the inspect method to the name field
668
- def inspect
669
- self.name.inspect
670
- end
730
+ # The image's title
731
+ attr_accessor :title
732
+ # The image's description
733
+ attr_accessor :description
734
+ # The image's url
735
+ attr_accessor :url
736
+ # The url to link the image to
737
+ attr_accessor :link
738
+ # The width of the image
739
+ attr_accessor :width
740
+ # The height of the image
741
+ attr_accessor :height
742
+ # The style of the image
743
+ # Possible values are "icon", "image", or "image-wide"
744
+ attr_accessor :style
745
+ end
746
+
747
+ # Represents a feed's text input element.
748
+ # Be aware that this will be ignored for feed generation. It's a
749
+ # pointless element that aggregators usually ignore and it doesn't have an
750
+ # equivalent in all feeds types.
751
+ class TextInput
752
+ include AttributeDictionary
753
+
754
+ # The label of the Submit button in the text input area.
755
+ attr_accessor :title
756
+ # The description explains the text input area.
757
+ attr_accessor :description
758
+ # The URL of the CGI script that processes text input requests.
759
+ attr_accessor :link
760
+ # The name of the text object in the text input area.
761
+ attr_accessor :name
671
762
  end
672
763
 
673
764
  # Represents a feed's cloud.
765
+ # Be aware that this will be ignored for feed generation.
674
766
  class Cloud
767
+ include AttributeDictionary
768
+
675
769
  # The domain of the cloud.
676
770
  attr_accessor :domain
677
771
  # The path for the cloud.
@@ -684,7 +778,19 @@ module FeedTools
684
778
  # The procedure to use to request notification.
685
779
  attr_accessor :register_procedure
686
780
  end
687
-
781
+
782
+ # Represents a simple hyperlink
783
+ class Link
784
+ include AttributeDictionary
785
+
786
+ # The url that is being linked to
787
+ attr_accessor :url
788
+ # The content of the hyperlink
789
+ attr_accessor :value
790
+
791
+ alias_method :href, :url
792
+ end
793
+
688
794
  # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
689
795
  def Feed.open(url)
690
796
  # clean up the url
@@ -693,19 +799,19 @@ module FeedTools
693
799
  # create and load the new feed
694
800
  feed = Feed.new
695
801
  feed.url = url
696
- feed.update
802
+ feed.update!
697
803
  return feed
698
804
  end
699
805
 
700
806
  # Loads the feed from the remote url if the feed has expired from the cache or cannot be
701
807
  # retrieved from the cache for some reason.
702
- def update
808
+ def update!
703
809
  if self.http_headers.nil? && !(self.cache_object.nil?) &&
704
810
  !(self.cache_object.http_headers.nil?)
705
811
  @http_headers = YAML.load(self.cache_object.http_headers)
706
812
  end
707
- if expired?
708
- load_remote_feed
813
+ if expired? && !FeedTools.cache_only?
814
+ load_remote_feed!
709
815
  else
710
816
  @live = false
711
817
  end
@@ -715,7 +821,7 @@ module FeedTools
715
821
  # field to be set. If an etag or the last_modified date has been set,
716
822
  # attempts to use them to prevent unnecessary reloading of identical
717
823
  # content.
718
- def load_remote_feed
824
+ def load_remote_feed!
719
825
  @live = true
720
826
  if self.http_headers.nil? && !(self.cache_object.nil?) &&
721
827
  !(self.cache_object.http_headers.nil?)
@@ -970,6 +1076,10 @@ module FeedTools
970
1076
  # Returns the root node of the feed.
971
1077
  def root_node
972
1078
  if @root_node.nil?
1079
+ # TODO: Fix this so that added content at the end of the file doesn't
1080
+ # break this stuff.
1081
+ # E.g.: http://smogzer.tripod.com/smog.rdf
1082
+ # ===================================================================
973
1083
  @root_node = xml.root
974
1084
  end
975
1085
  return @root_node
@@ -979,6 +1089,9 @@ module FeedTools
979
1089
  def channel_node
980
1090
  if @channel_node.nil?
981
1091
  @channel_node = XPath.first(root_node, "channel")
1092
+ if @channel_node == nil
1093
+ @channel_node = XPath.first(root_node, "CHANNEL")
1094
+ end
982
1095
  if @channel_node == nil
983
1096
  @channel_node = XPath.first(root_node, "feedinfo")
984
1097
  end
@@ -1039,6 +1152,8 @@ module FeedTools
1039
1152
  @feed_type = "atom"
1040
1153
  when "rdf:rdf"
1041
1154
  @feed_type = "rss"
1155
+ when "rdf"
1156
+ @feed_type = "rss"
1042
1157
  when "rss"
1043
1158
  @feed_type = "rss"
1044
1159
  when "channel"
@@ -1053,6 +1168,58 @@ module FeedTools
1053
1168
  @feed_type = new_feed_type
1054
1169
  end
1055
1170
 
1171
+ # Returns the version number of the feed type.
1172
+ # Intentionally does not differentiate between the Netscape and Userland
1173
+ # versions of RSS 0.91.
1174
+ def feed_version
1175
+ if @feed_version.nil?
1176
+ version = nil
1177
+ begin
1178
+ version = XPath.first(root_node, "@version").to_s.strip.to_f
1179
+ rescue
1180
+ end
1181
+ version = nil if version == 0.0
1182
+ default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
1183
+ case self.feed_type
1184
+ when "atom"
1185
+ if default_namespace == "http://www.w3.org/2005/Atom"
1186
+ @feed_version = 1.0
1187
+ elsif version != nil
1188
+ @feed_version = version
1189
+ elsif default_namespace == "http://purl.org/atom/ns#"
1190
+ @feed_version = 0.3
1191
+ end
1192
+ when "rss"
1193
+ if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
1194
+ @feed_version = 0.9
1195
+ elsif default_namespace == "http://purl.org/rss/1.0/"
1196
+ @feed_version = 1.0
1197
+ elsif default_namespace == "http://purl.org/net/rss1.1#"
1198
+ @feed_version = 1.1
1199
+ elsif version != nil
1200
+ case version
1201
+ when 2.1
1202
+ @feed_version = 2.0
1203
+ when 2.01
1204
+ @feed_version = 2.0
1205
+ else
1206
+ @feed_version = version
1207
+ end
1208
+ end
1209
+ when "cdf"
1210
+ @feed_version = 0.4
1211
+ when "!okay/news"
1212
+ @feed_version = nil
1213
+ end
1214
+ end
1215
+ return @feed_version
1216
+ end
1217
+
1218
+ # Sets the default feed version
1219
+ def feed_version=(new_feed_version)
1220
+ @feed_version = new_feed_version
1221
+ end
1222
+
1056
1223
  # Returns the feed's unique id
1057
1224
  def id
1058
1225
  if @id.nil?
@@ -1089,19 +1256,27 @@ module FeedTools
1089
1256
  def title
1090
1257
  if @title.nil?
1091
1258
  repair_entities = false
1092
- if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
1093
- XPath.first(channel_node, "title/@mode").to_s == "xhtml" ||
1094
- XPath.first(channel_node, "title/@type").to_s == "xml" ||
1095
- XPath.first(channel_node, "title/@mode").to_s == "xml" ||
1096
- XPath.first(channel_node, "title/@type").to_s ==
1097
- "application/xhtml+xml"
1098
- @title = XPath.first(channel_node, "title").inner_xml
1099
- elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
1100
- XPath.first(channel_node, "title/@mode").to_s == "escaped"
1259
+ title_node = XPath.first(channel_node, "title")
1260
+ if title_node.nil?
1261
+ title_node = XPath.first(channel_node, "dc:title")
1262
+ end
1263
+ if title_node.nil?
1264
+ title_node = XPath.first(channel_node, "TITLE")
1265
+ end
1266
+ if title_node.nil?
1267
+ return nil
1268
+ end
1269
+ if XPath.first(title_node, "@type").to_s == "xhtml" ||
1270
+ XPath.first(title_node, "@mode").to_s == "xhtml" ||
1271
+ XPath.first(title_node, "@type").to_s == "xml" ||
1272
+ XPath.first(title_node, "@mode").to_s == "xml" ||
1273
+ XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
1274
+ @title = title_node.inner_xml
1275
+ elsif XPath.first(title_node, "@type").to_s == "escaped" ||
1276
+ XPath.first(title_node, "@mode").to_s == "escaped"
1101
1277
  @title = FeedTools.unescape_entities(
1102
- XPath.first(channel_node, "title/text()").to_s)
1278
+ XPath.first(title_node, "text()").to_s)
1103
1279
  else
1104
- title_node = XPath.first(channel_node, "title")
1105
1280
  @title = title_node.inner_xml
1106
1281
  repair_entities = true
1107
1282
  end
@@ -1141,6 +1316,9 @@ module FeedTools
1141
1316
  if description_node.nil?
1142
1317
  description_node = XPath.first(channel_node, "abstract")
1143
1318
  end
1319
+ if description_node.nil?
1320
+ description_node = XPath.first(channel_node, "ABSTRACT")
1321
+ end
1144
1322
  if description_node.nil?
1145
1323
  description_node = XPath.first(channel_node, "info")
1146
1324
  end
@@ -1160,6 +1338,9 @@ module FeedTools
1160
1338
  description_node = XPath.first(channel_node, "body")
1161
1339
  @bozo = true unless description_node.nil?
1162
1340
  end
1341
+ if description_node.nil?
1342
+ return nil
1343
+ end
1163
1344
  unless description_node.nil?
1164
1345
  if XPath.first(description_node, "@encoding").to_s != ""
1165
1346
  @description =
@@ -1195,7 +1376,6 @@ module FeedTools
1195
1376
  @description = FeedTools.tidy_html(@description)
1196
1377
  end
1197
1378
 
1198
- @description.gsub!(/\n/, " ") if @description.size < 80
1199
1379
  @description = @description.strip unless @description.nil?
1200
1380
  @description = nil if @description == ""
1201
1381
  end
@@ -1263,6 +1443,15 @@ module FeedTools
1263
1443
  if @link == ""
1264
1444
  @link = XPath.first(channel_node, "@href").to_s
1265
1445
  end
1446
+ if @link == ""
1447
+ @link = XPath.first(channel_node, "@HREF").to_s
1448
+ end
1449
+ if @link == ""
1450
+ @link = XPath.first(channel_node, "a/@href").to_s
1451
+ end
1452
+ if @link == ""
1453
+ @link = XPath.first(channel_node, "A/@HREF").to_s
1454
+ end
1266
1455
  if @link == ""
1267
1456
  if FeedTools.is_url? self.guid
1268
1457
  @link = self.guid
@@ -1290,39 +1479,6 @@ module FeedTools
1290
1479
  end
1291
1480
  end
1292
1481
 
1293
- # Returns the feed image link
1294
- def image_link
1295
- if @image_link.nil?
1296
- # get the feed image link from the xml document
1297
- @image_link = XPath.first(channel_node, "image/url/text()").to_s
1298
- if @image_link == ""
1299
- @image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
1300
- end
1301
- if @image_link == ""
1302
- @image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
1303
- end
1304
- if @image_link == ""
1305
- @image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
1306
- end
1307
- if @image_link == ""
1308
- @image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
1309
- end
1310
- if @image_link == ""
1311
- @image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
1312
- end
1313
- if @image_link == ""
1314
- @image_link = XPath.first(channel_node, "logo/@href").to_s
1315
- end
1316
- @image_link = FeedTools.normalize_url(@image_link)
1317
- end
1318
- return @image_link
1319
- end
1320
-
1321
- # Sets the feed image link
1322
- def image_link=(new_image_link)
1323
- @image_link = new_image_link
1324
- end
1325
-
1326
1482
  # Returns the url to the icon file for this feed.
1327
1483
  #
1328
1484
  # This method uses the url from the link field in order to avoid grabbing
@@ -1348,10 +1504,19 @@ module FeedTools
1348
1504
  "icon/text()").to_s
1349
1505
  end
1350
1506
  if @icon_link == ""
1507
+ @icon_link = XPath.first(channel_node,
1508
+ "logo[@style='icon']/@href").to_s
1509
+ end
1510
+ if @icon_link == ""
1511
+ @icon_link = XPath.first(channel_node,
1512
+ "LOGO[@STYLE='ICON']/@HREF").to_s
1513
+ end
1514
+ if @icon_link == "" && self.link != nil && self.link != ""
1351
1515
  link_uri = URI.parse(FeedTools.normalize_url(self.link))
1352
1516
  @icon_link =
1353
1517
  link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1354
1518
  end
1519
+ icon_link = nil if icon_link == ""
1355
1520
  end
1356
1521
  return @icon_link
1357
1522
  end
@@ -1375,6 +1540,10 @@ module FeedTools
1375
1540
  @author.raw = FeedTools.unescape_entities(
1376
1541
  XPath.first(channel_node, "dc:author/text()").to_s)
1377
1542
  end
1543
+ if @author.raw == ""
1544
+ @author.raw = FeedTools.unescape_entities(
1545
+ XPath.first(channel_node, "managingEditor/text()").to_s)
1546
+ end
1378
1547
  unless @author.raw == ""
1379
1548
  raw_scan = @author.raw.scan(
1380
1549
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -1456,6 +1625,10 @@ module FeedTools
1456
1625
  # Set the author name
1457
1626
  @publisher.raw = FeedTools.unescape_entities(
1458
1627
  XPath.first(channel_node, "dc:publisher/text()").to_s)
1628
+ if @publisher.raw == ""
1629
+ @publisher.raw = FeedTools.unescape_entities(
1630
+ XPath.first(channel_node, "webMaster/text()").to_s)
1631
+ end
1459
1632
  unless @publisher.raw == ""
1460
1633
  raw_scan = @publisher.raw.scan(
1461
1634
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -1529,6 +1702,119 @@ module FeedTools
1529
1702
  return @itunes_author
1530
1703
  end
1531
1704
 
1705
+ # Returns a list of the feed's categories
1706
+ def categories
1707
+ if @categories.nil?
1708
+ @categories = []
1709
+ category_nodes = XPath.match(channel_node, "category")
1710
+ if category_nodes.nil? || category_nodes.empty?
1711
+ category_nodes = XPath.match(channel_node, "dc:subject")
1712
+ end
1713
+ unless category_nodes.nil?
1714
+ for category_node in category_nodes
1715
+ category = FeedTools::Feed::Category.new
1716
+ category.term = XPath.first(category_node, "@term").to_s
1717
+ if category.term == ""
1718
+ category.term = XPath.first(category_node, "text()").to_s
1719
+ end
1720
+ category.term.strip! unless category.term.nil?
1721
+ category.term = nil if category.term == ""
1722
+ category.label = XPath.first(category_node, "@label").to_s
1723
+ category.label.strip! unless category.label.nil?
1724
+ category.label = nil if category.label == ""
1725
+ category.scheme = XPath.first(category_node, "@scheme").to_s
1726
+ if category.scheme == ""
1727
+ category.scheme = XPath.first(category_node, "@domain").to_s
1728
+ end
1729
+ category.scheme.strip! unless category.scheme.nil?
1730
+ category.scheme = nil if category.scheme == ""
1731
+ @categories << category
1732
+ end
1733
+ end
1734
+ end
1735
+ return @categories
1736
+ end
1737
+
1738
+ # Returns a list of the feed's images
1739
+ def images
1740
+ if @images.nil?
1741
+ @images = []
1742
+ image_nodes = XPath.match(channel_node, "image")
1743
+ if image_nodes.nil? || image_nodes.empty?
1744
+ image_nodes = XPath.match(channel_node, "link")
1745
+ end
1746
+ if image_nodes.nil? || image_nodes.empty?
1747
+ image_nodes = XPath.match(channel_node, "logo")
1748
+ end
1749
+ if image_nodes.nil? || image_nodes.empty?
1750
+ image_nodes = XPath.match(channel_node, "LOGO")
1751
+ end
1752
+ unless image_nodes.nil?
1753
+ for image_node in image_nodes
1754
+ image = FeedTools::Feed::Image.new
1755
+ image.url = XPath.first(image_node, "url/text()").to_s
1756
+ if image.url == ""
1757
+ image.url = XPath.first(image_node, "@rdf:resource").to_s
1758
+ end
1759
+ if image.url == "" && (image_node.name == "logo" ||
1760
+ (image_node.attributes['type'] =~ /^image/) == 0)
1761
+ image.url = XPath.first(image_node, "@href").to_s
1762
+ end
1763
+ if image.url == "" && image_node.name == "LOGO"
1764
+ image.url = XPath.first(image_node, "@HREF").to_s
1765
+ end
1766
+ image.url.strip! unless image.url.nil?
1767
+ image.url = nil if image.url == ""
1768
+ image.title = XPath.first(image_node, "title/text()").to_s
1769
+ image.title.strip! unless image.title.nil?
1770
+ image.title = nil if image.title == ""
1771
+ image.description =
1772
+ XPath.first(image_node, "description/text()").to_s
1773
+ image.description.strip! unless image.description.nil?
1774
+ image.description = nil if image.description == ""
1775
+ image.link = XPath.first(image_node, "link/text()").to_s
1776
+ image.link.strip! unless image.link.nil?
1777
+ image.link = nil if image.link == ""
1778
+ image.height = XPath.first(image_node, "height/text()").to_s.to_i
1779
+ image.height = nil if image.height <= 0
1780
+ image.width = XPath.first(image_node, "width/text()").to_s.to_i
1781
+ image.width = nil if image.width <= 0
1782
+ image.style = XPath.first(image_node, "@style").to_s.downcase
1783
+ if image.style == ""
1784
+ image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1785
+ end
1786
+ image.style.strip! unless image.style.nil?
1787
+ image.style = nil if image.style == ""
1788
+ @images << image
1789
+ end
1790
+ end
1791
+ end
1792
+ return @images
1793
+ end
1794
+
1795
+ # Returns the feed's text input field
1796
+ def text_input
1797
+ if @text_input.nil?
1798
+ @text_input = FeedTools::Feed::TextInput.new
1799
+ text_input_node = XPath.first(channel_node, "textInput")
1800
+ unless text_input_node.nil?
1801
+ @text_input.title =
1802
+ XPath.first(text_input_node, "title/text()").to_s
1803
+ @text_input.title = nil if @text_input.title == ""
1804
+ @text_input.description =
1805
+ XPath.first(text_input_node, "description/text()").to_s
1806
+ @text_input.description = nil if @text_input.description == ""
1807
+ @text_input.link =
1808
+ XPath.first(text_input_node, "link/text()").to_s
1809
+ @text_input.link = nil if @text_input.link == ""
1810
+ @text_input.name =
1811
+ XPath.first(text_input_node, "name/text()").to_s
1812
+ @text_input.name = nil if @text_input.name == ""
1813
+ end
1814
+ end
1815
+ return @text_input
1816
+ end
1817
+
1532
1818
  # Returns the feed's copyright information
1533
1819
  def copyright
1534
1820
  if @copyright.nil?
@@ -1696,7 +1982,7 @@ module FeedTools
1696
1982
  end
1697
1983
 
1698
1984
  # Returns true if this feed contains explicit material.
1699
- def explicit
1985
+ def explicit?
1700
1986
  if @explicit.nil?
1701
1987
  if XPath.first(channel_node,
1702
1988
  "media:adult/text()").to_s.downcase == "true" ||
@@ -1724,9 +2010,18 @@ module FeedTools
1724
2010
  if raw_items == nil || raw_items == []
1725
2011
  raw_items = XPath.match(channel_node, "item")
1726
2012
  end
2013
+ if raw_items == nil || raw_items == []
2014
+ raw_items = XPath.match(channel_node, "ITEM")
2015
+ end
2016
+ if raw_items == nil || raw_items == []
2017
+ raw_items = XPath.match(root_node, "ITEM")
2018
+ end
1727
2019
  if raw_items == nil || raw_items == []
1728
2020
  raw_items = XPath.match(channel_node, "entry")
1729
2021
  end
2022
+ if raw_items == nil || raw_items == []
2023
+ raw_items = XPath.match(root_node, "entry")
2024
+ end
1730
2025
 
1731
2026
  # create the individual feed items
1732
2027
  @items = []
@@ -1767,7 +2062,7 @@ module FeedTools
1767
2062
  # True if this feed contains audio content enclosures
1768
2063
  def podcast?
1769
2064
  podcast = false
1770
- $test_feed.items.each do |item|
2065
+ self.items.each do |item|
1771
2066
  item.enclosures.each do |enclosure|
1772
2067
  podcast = true if enclosure.audio?
1773
2068
  end
@@ -1778,7 +2073,7 @@ module FeedTools
1778
2073
  # True if this feed contains video content enclosures
1779
2074
  def vidlog?
1780
2075
  vidlog = false
1781
- $test_feed.items.each do |item|
2076
+ self.items.each do |item|
1782
2077
  item.enclosures.each do |enclosure|
1783
2078
  vidlog = true if enclosure.video?
1784
2079
  end
@@ -1805,7 +2100,7 @@ module FeedTools
1805
2100
  end
1806
2101
 
1807
2102
  # Forces this feed to expire.
1808
- def expire
2103
+ def expire!
1809
2104
  self.last_retrieved = Time.mktime(1970)
1810
2105
  self.save
1811
2106
  end
@@ -1817,11 +2112,11 @@ module FeedTools
1817
2112
  end
1818
2113
 
1819
2114
  # Generates xml based on the content of the feed
1820
- def build_xml(feed_type=(self.feed_type or "rss"), version=0.0,
2115
+ def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
1821
2116
  xml_builder=Builder::XmlMarkup.new(:indent => 2))
1822
- if feed_type == "rss" && version == 0.0
2117
+ if feed_type == "rss" && (version == nil || version == 0.0)
1823
2118
  version = 1.0
1824
- elsif feed_type == "atom" && version == 0.0
2119
+ elsif feed_type == "atom" && (version == nil || version == 0.0)
1825
2120
  version = 0.3
1826
2121
  end
1827
2122
  if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
@@ -1838,8 +2133,9 @@ module FeedTools
1838
2133
  else
1839
2134
  xml_builder.link
1840
2135
  end
1841
- unless image_link.nil? || image_link == ""
1842
- xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
2136
+ unless images.nil? || images.empty?
2137
+ xml_builder.image("rdf:resource" => CGI.escapeHTML(
2138
+ images.first.url))
1843
2139
  end
1844
2140
  unless description.nil? || description == ""
1845
2141
  xml_builder.description(description)
@@ -1866,18 +2162,30 @@ module FeedTools
1866
2162
  end
1867
2163
  build_xml_hook(feed_type, version, xml_builder)
1868
2164
  end
1869
- unless image_link.nil? || image_link == ""
1870
- xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
1871
- unless title.nil? || title == ""
1872
- xml_builder.title(title)
2165
+ unless images.nil? || images.empty?
2166
+ best_image = nil
2167
+ for image in self.images
2168
+ if image.link != nil
2169
+ best_image = image
2170
+ break
2171
+ end
2172
+ end
2173
+ best_image = images.first if best_image.nil?
2174
+ xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
2175
+ if best_image.title != nil && best_image.title != ""
2176
+ xml_builder.title(best_image.title)
2177
+ elsif self.title != nil && self.title != ""
2178
+ xml_builder.title(self.title)
1873
2179
  else
1874
2180
  xml_builder.title
1875
2181
  end
1876
- unless image_link.nil? || image_link == ""
1877
- xml_builder.url(image_link)
2182
+ unless best_image.url.nil? || best_image.url == ""
2183
+ xml_builder.url(best_image.url)
1878
2184
  end
1879
- unless link.nil? || link == ""
1880
- xml_builder.link(link)
2185
+ if best_image.link != nil && best_image.link != ""
2186
+ xml_builder.link(best_image.link)
2187
+ elsif self.link != nil && self.link != ""
2188
+ xml_builder.link(self.link)
1881
2189
  else
1882
2190
  xml_builder.link
1883
2191
  end
@@ -1891,7 +2199,7 @@ module FeedTools
1891
2199
  end
1892
2200
  elsif feed_type == "rss"
1893
2201
  # normal rss format
1894
- return xml_builder.rss("version" => version.to_s) do
2202
+ return xml_builder.rss("version" => version) do
1895
2203
  unless title.nil? || title == ""
1896
2204
  xml_builder.title(title)
1897
2205
  end
@@ -1913,7 +2221,7 @@ module FeedTools
1913
2221
  elsif feed_type == "atom"
1914
2222
  # normal atom format
1915
2223
  return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
1916
- "version" => version.to_s,
2224
+ "version" => version,
1917
2225
  "xml:lang" => language) do
1918
2226
  unless title.nil? || title == ""
1919
2227
  xml_builder.title(title,
@@ -2112,12 +2420,18 @@ module FeedTools
2112
2420
  end
2113
2421
  return false
2114
2422
  end
2423
+
2424
+ alias_method :link, :url
2425
+ alias_method :link=, :url=
2115
2426
  end
2116
- EnclosureCategory = Struct.new( "EnclosureCategory", :category, :scheme, :label )
2427
+
2428
+ # TODO: Make these actual classes instead of structs
2429
+ # ==================================================
2117
2430
  EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
2118
2431
  EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
2119
2432
  EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
2120
- EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height, :width )
2433
+ EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
2434
+ :width )
2121
2435
 
2122
2436
  # Returns the parent feed of this feed item
2123
2437
  def feed
@@ -2186,19 +2500,27 @@ module FeedTools
2186
2500
  def title
2187
2501
  if @title.nil?
2188
2502
  repair_entities = false
2189
- if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
2190
- XPath.first(root_node, "title/@mode").to_s == "xhtml" ||
2191
- XPath.first(root_node, "title/@type").to_s == "xml" ||
2192
- XPath.first(root_node, "title/@mode").to_s == "xml" ||
2193
- XPath.first(root_node, "title/@type").to_s ==
2194
- "application/xhtml+xml"
2195
- @title = XPath.first(root_node, "title").inner_xml
2196
- elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
2197
- XPath.first(root_node, "title/@mode").to_s == "escaped"
2503
+ title_node = XPath.first(root_node, "title")
2504
+ if title_node.nil?
2505
+ title_node = XPath.first(root_node, "dc:title")
2506
+ end
2507
+ if title_node.nil?
2508
+ title_node = XPath.first(root_node, "TITLE")
2509
+ end
2510
+ if title_node.nil?
2511
+ return nil
2512
+ end
2513
+ if XPath.first(title_node, "@type").to_s == "xhtml" ||
2514
+ XPath.first(title_node, "@mode").to_s == "xhtml" ||
2515
+ XPath.first(title_node, "@type").to_s == "xml" ||
2516
+ XPath.first(title_node, "@mode").to_s == "xml" ||
2517
+ XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
2518
+ @title = title_node.inner_xml
2519
+ elsif XPath.first(title_node, "@type").to_s == "escaped" ||
2520
+ XPath.first(title_node, "@mode").to_s == "escaped"
2198
2521
  @title = FeedTools.unescape_entities(
2199
- XPath.first(root_node, "title/text()").to_s)
2522
+ XPath.first(title_node, "text()").to_s)
2200
2523
  else
2201
- title_node = XPath.first(root_node, "title")
2202
2524
  @title = title_node.inner_xml
2203
2525
  repair_entities = true
2204
2526
  end
@@ -2252,16 +2574,25 @@ module FeedTools
2252
2574
  if description_node.nil?
2253
2575
  description_node = XPath.first(root_node, "abstract")
2254
2576
  end
2577
+ if description_node.nil?
2578
+ description_node = XPath.first(root_node, "ABSTRACT")
2579
+ end
2255
2580
  if description_node.nil?
2256
2581
  description_node = XPath.first(root_node, "content:encoded")
2257
2582
  end
2258
2583
  if description_node.nil?
2259
2584
  description_node = XPath.first(root_node, "content")
2260
2585
  end
2586
+ if description_node.nil?
2587
+ description_node = XPath.first(root_node, "fullitem")
2588
+ end
2261
2589
  if description_node.nil?
2262
2590
  description_node = XPath.first(root_node, "info")
2263
2591
  @bozo = true unless description_node.nil?
2264
2592
  end
2593
+ if description_node.nil?
2594
+ return nil
2595
+ end
2265
2596
  unless description_node.nil?
2266
2597
  if XPath.first(description_node, "@encoding").to_s != ""
2267
2598
  @description =
@@ -2297,7 +2628,6 @@ module FeedTools
2297
2628
  @description = FeedTools.tidy_html(@description)
2298
2629
  end
2299
2630
 
2300
- @description.gsub!(/\n/, " ") if @description.size < 80
2301
2631
  @description = @description.strip unless @description.nil?
2302
2632
  @description = nil if @description == ""
2303
2633
  end
@@ -2385,6 +2715,18 @@ module FeedTools
2385
2715
  if @link == ""
2386
2716
  @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
2387
2717
  end
2718
+ if @link == ""
2719
+ @link = XPath.first(root_node, "@href").to_s
2720
+ end
2721
+ if @link == ""
2722
+ @link = XPath.first(root_node, "a/@href").to_s
2723
+ end
2724
+ if @link == ""
2725
+ @link = XPath.first(root_node, "@HREF").to_s
2726
+ end
2727
+ if @link == ""
2728
+ @link = XPath.first(root_node, "A/@HREF").to_s
2729
+ end
2388
2730
  if @link == ""
2389
2731
  if FeedTools.is_url? self.guid
2390
2732
  @link = self.guid
@@ -2410,7 +2752,7 @@ module FeedTools
2410
2752
  @link = new_link
2411
2753
  end
2412
2754
 
2413
- # Returns the feed comment link
2755
+ # Returns the feed item comment link
2414
2756
  def comment_link
2415
2757
  if @comment_link.nil?
2416
2758
  # get the feed comment link from the xml document
@@ -2423,64 +2765,104 @@ module FeedTools
2423
2765
  return @comment_link
2424
2766
  end
2425
2767
 
2426
- # Sets the feed comment link
2768
+ # Sets the feed item comment link
2427
2769
  def comment_link=(new_comment_link)
2428
2770
  @comment_link = new_comment_link
2429
2771
  end
2430
2772
 
2431
- # Returns the feed image link
2432
- def image_link
2433
- if @image_link.nil?
2434
- # get the feed image link from the xml document
2435
- if @image_link == ""
2436
- @image_link = XPath.first(root_node, "link[@type='image/jpeg']/@href").to_s
2437
- end
2438
- if @image_link == ""
2439
- @image_link = XPath.first(root_node, "link[@type='image/gif']/@href").to_s
2440
- end
2441
- if @image_link == ""
2442
- @image_link = XPath.first(root_node, "link[@type='image/png']/@href").to_s
2773
+ # Returns a list of the feed item's categories
2774
+ def categories
2775
+ if @categories.nil?
2776
+ @categories = []
2777
+ category_nodes = XPath.match(root_node, "category")
2778
+ if category_nodes.nil? || category_nodes.empty?
2779
+ category_nodes = XPath.match(root_node, "dc:subject")
2780
+ end
2781
+ unless category_nodes.nil?
2782
+ for category_node in category_nodes
2783
+ category = FeedTools::Feed::Category.new
2784
+ category.term = XPath.first(category_node, "@term").to_s
2785
+ if category.term == ""
2786
+ category.term = XPath.first(category_node, "text()").to_s
2787
+ end
2788
+ category.term.strip! unless category.term.nil?
2789
+ category.term = nil if category.term == ""
2790
+ category.label = XPath.first(category_node, "@label").to_s
2791
+ category.label.strip! unless category.label.nil?
2792
+ category.label = nil if category.label == ""
2793
+ category.scheme = XPath.first(category_node, "@scheme").to_s
2794
+ if category.scheme == ""
2795
+ category.scheme = XPath.first(category_node, "@domain").to_s
2796
+ end
2797
+ category.scheme.strip! unless category.scheme.nil?
2798
+ category.scheme = nil if category.scheme == ""
2799
+ @categories << category
2800
+ end
2443
2801
  end
2444
- # The following two should technically never occur, but have been included
2445
- # simply because I've seen both occuring in the wild at least once.
2446
- if @image_link == ""
2447
- @image_link = XPath.first(root_node, "image/url/text()").to_s
2802
+ end
2803
+ return @categories
2804
+ end
2805
+
2806
+ # Returns a list of the feed items's images
2807
+ def images
2808
+ if @images.nil?
2809
+ @images = []
2810
+ image_nodes = XPath.match(root_node, "link")
2811
+ if image_nodes.nil? || image_nodes.empty?
2812
+ image_nodes = XPath.match(root_node, "logo")
2448
2813
  end
2449
- if @image_link == ""
2450
- @image_link = XPath.first(root_node, "image/@rdf:resource").to_s
2814
+ if image_nodes.nil? || image_nodes.empty?
2815
+ image_nodes = XPath.match(root_node, "LOGO")
2451
2816
  end
2452
- if @image_link == ""
2453
- # If there's only a media thumbnail, we can just borrow it. Technically, this isn't
2454
- # ideal, but chances are very good that anything that makes use of this image is
2455
- # simply not going to care anyhow.
2456
- @image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
2457
- if @image_link == ""
2458
- @media_image_link = @image_link
2459
- end
2817
+ if image_nodes.nil? || image_nodes.empty?
2818
+ image_nodes = XPath.match(root_node, "image")
2460
2819
  end
2461
- if @image_link == ""
2462
- # If there's only an itunes image, we can just borrow it. See comment above regarding
2463
- # less-than-ideal-ness.
2464
- if @itunes_image_link == ""
2465
- @image_link = XPath.first(root_node, "itunes:image/@href").to_s
2466
- if @image_link == ""
2467
- @image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
2820
+ unless image_nodes.nil?
2821
+ for image_node in image_nodes
2822
+ image = FeedTools::Feed::Image.new
2823
+ image.url = XPath.first(image_node, "url/text()").to_s
2824
+ if image.url != ""
2825
+ self.feed.bozo = true
2468
2826
  end
2469
- @itunes_image_link = @image_link
2470
- else
2471
- @image_link = @itunes_image_link
2827
+ if image.url == ""
2828
+ image.url = XPath.first(image_node, "@rdf:resource").to_s
2829
+ end
2830
+ if image.url == "" && (image_node.name == "logo" ||
2831
+ (image_node.attributes['type'] =~ /^image/) == 0)
2832
+ image.url = XPath.first(image_node, "@href").to_s
2833
+ end
2834
+ if image.url == "" && image_node.name == "LOGO"
2835
+ image.url = XPath.first(image_node, "@HREF").to_s
2836
+ end
2837
+ image.url.strip! unless image.url.nil?
2838
+ image.url = nil if image.url == ""
2839
+ image.title = XPath.first(image_node, "title/text()").to_s
2840
+ image.title.strip! unless image.title.nil?
2841
+ image.title = nil if image.title == ""
2842
+ image.description =
2843
+ XPath.first(image_node, "description/text()").to_s
2844
+ image.description.strip! unless image.description.nil?
2845
+ image.description = nil if image.description == ""
2846
+ image.link = XPath.first(image_node, "link/text()").to_s
2847
+ image.link.strip! unless image.link.nil?
2848
+ image.link = nil if image.link == ""
2849
+ image.height = XPath.first(image_node, "height/text()").to_s.to_i
2850
+ image.height = nil if image.height <= 0
2851
+ image.width = XPath.first(image_node, "width/text()").to_s.to_i
2852
+ image.width = nil if image.width <= 0
2853
+ image.style = XPath.first(image_node, "@style").to_s.downcase
2854
+ if image.style == ""
2855
+ image.style = XPath.first(image_node, "@STYLE").to_s.downcase
2856
+ end
2857
+ image.style.strip! unless image.style.nil?
2858
+ image.style = nil if image.style == ""
2859
+ @images << image
2472
2860
  end
2473
2861
  end
2474
- @image_link = FeedTools.normalize_url(@image_link)
2475
2862
  end
2476
- return @image_link
2477
- end
2478
-
2479
- # Sets the feed image link
2480
- def image_link=(new_image_link)
2481
- @image_link = new_image_link
2863
+ return @images
2482
2864
  end
2483
-
2865
+
2484
2866
  # Returns the feed item itunes image link
2485
2867
  #
2486
2868
  # If it's not present, falls back to the normal image link.
@@ -2494,9 +2876,6 @@ module FeedTools
2494
2876
  if @itunes_image_link == ""
2495
2877
  @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
2496
2878
  end
2497
- if @itunes_image_link == ""
2498
- @itunes_image_link = self.image_link
2499
- end
2500
2879
  @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
2501
2880
  end
2502
2881
  return @itunes_image_link
@@ -2514,9 +2893,6 @@ module FeedTools
2514
2893
  if @media_thumbnail_link.nil?
2515
2894
  # get the feed item itunes image link from the xml document
2516
2895
  @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
2517
- if @media_thumbnail_link == ""
2518
- @media_thumbnail_link = image_link
2519
- end
2520
2896
  @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
2521
2897
  end
2522
2898
  return @media_thumbnail_link
@@ -2527,6 +2903,21 @@ module FeedTools
2527
2903
  @media_thumbnail_link = new_media_thumbnail_link
2528
2904
  end
2529
2905
 
2906
+ # Returns the feed item's copyright information
2907
+ def copyright
2908
+ if @copyright.nil?
2909
+ @copyright = XPath.first(root_node, "dc:rights/text()").to_s
2910
+ @copyright = FeedTools.sanitize_html(@copyright, :strip)
2911
+ @copyright = nil if @copyright == ""
2912
+ end
2913
+ return @copyright
2914
+ end
2915
+
2916
+ # Sets the feed item's copyright information
2917
+ def copyright=(new_copyright)
2918
+ @copyright = new_copyright
2919
+ end
2920
+
2530
2921
  # Returns all feed item enclosures
2531
2922
  def enclosures
2532
2923
  if @enclosures.nil?
@@ -2621,11 +3012,13 @@ module FeedTools
2621
3012
  end
2622
3013
  enclosure.categories = []
2623
3014
  for category in XPath.match(enclosure_node, "media:category")
2624
- enclosure.categories << EnclosureCategory.new(
2625
- FeedTools.unescape_entities(category.text),
2626
- FeedTools.unescape_entities(category.attributes["scheme"].to_s),
3015
+ enclosure.categories << FeedTools::Feed::Category.new
3016
+ enclosure.categories.last.term =
3017
+ FeedTools.unescape_entities(category.text)
3018
+ enclosure.categories.last.scheme =
3019
+ FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3020
+ enclosure.categories.last.label =
2627
3021
  FeedTools.unescape_entities(category.attributes["label"].to_s)
2628
- )
2629
3022
  if enclosure.categories.last.scheme == ""
2630
3023
  enclosure.categories.last.scheme = nil
2631
3024
  end
@@ -2714,11 +3107,13 @@ module FeedTools
2714
3107
  if (enclosure.categories.nil? || enclosure.categories.size == 0)
2715
3108
  enclosure.categories = []
2716
3109
  for category in XPath.match(media_group, "media:category")
2717
- enclosure.categories << EnclosureCategory.new(
2718
- FeedTools.unescape_entities(category.text),
2719
- FeedTools.unescape_entities(category.attributes["scheme"].to_s),
3110
+ enclosure.categories << FeedTools::Feed::Category.new
3111
+ enclosure.categories.last.term =
3112
+ FeedTools.unescape_entities(category.text)
3113
+ enclosure.categories.last.scheme =
3114
+ FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3115
+ enclosure.categories.last.label =
2720
3116
  FeedTools.unescape_entities(category.attributes["label"].to_s)
2721
- )
2722
3117
  if enclosure.categories.last.scheme == ""
2723
3118
  enclosure.categories.last.scheme = nil
2724
3119
  end
@@ -2880,7 +3275,7 @@ module FeedTools
2880
3275
  # if it's missing.
2881
3276
  if @enclosures.size == 1
2882
3277
  if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
2883
- @enclosures.first.duration = self.duration
3278
+ @enclosures.first.duration = self.itunes_duration
2884
3279
  end
2885
3280
  end
2886
3281
 
@@ -2910,6 +3305,10 @@ module FeedTools
2910
3305
  @author.raw = FeedTools.unescape_entities(
2911
3306
  XPath.first(root_node, "dc:author/text()").to_s)
2912
3307
  end
3308
+ if @author.raw == ""
3309
+ @author.raw = FeedTools.unescape_entities(
3310
+ XPath.first(root_node, "managingEditor/text()").to_s)
3311
+ end
2913
3312
  unless @author.raw == ""
2914
3313
  raw_scan = @author.raw.scan(
2915
3314
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -2983,6 +3382,76 @@ module FeedTools
2983
3382
  end
2984
3383
  end
2985
3384
 
3385
+ # Returns the feed publisher
3386
+ def publisher
3387
+ if @publisher.nil?
3388
+ @publisher = FeedTools::Feed::Author.new
3389
+
3390
+ # Set the author name
3391
+ @publisher.raw = FeedTools.unescape_entities(
3392
+ XPath.first(root_node, "dc:publisher/text()").to_s)
3393
+ if @publisher.raw == ""
3394
+ @publisher.raw = FeedTools.unescape_entities(
3395
+ XPath.first(root_node, "webMaster/text()").to_s)
3396
+ end
3397
+ unless @publisher.raw == ""
3398
+ raw_scan = @publisher.raw.scan(
3399
+ /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3400
+ if raw_scan.nil? || raw_scan.size == 0
3401
+ raw_scan = @publisher.raw.scan(
3402
+ /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3403
+ unless raw_scan.size == 0
3404
+ publisher_raw_pair = raw_scan.first.reverse
3405
+ end
3406
+ else
3407
+ publisher_raw_pair = raw_scan.first
3408
+ end
3409
+ if raw_scan.nil? || raw_scan.size == 0
3410
+ email_scan = @publisher.raw.scan(
3411
+ /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3412
+ if email_scan != nil && email_scan.size > 0
3413
+ @publisher.email = email_scan.first.strip
3414
+ end
3415
+ end
3416
+ unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
3417
+ @publisher.name = publisher_raw_pair.first.strip
3418
+ @publisher.email = publisher_raw_pair.last.strip
3419
+ else
3420
+ unless @publisher.raw.include?("@")
3421
+ # We can be reasonably sure we are looking at something
3422
+ # that the creator didn't intend to contain an email address if
3423
+ # it got through the preceeding regexes and it doesn't
3424
+ # contain the tell-tale '@' symbol.
3425
+ @publisher.name = @publisher.raw
3426
+ end
3427
+ end
3428
+ end
3429
+
3430
+ @publisher.name = nil if @publisher.name == ""
3431
+ @publisher.raw = nil if @publisher.raw == ""
3432
+ @publisher.email = nil if @publisher.email == ""
3433
+ @publisher.url = nil if @publisher.url == ""
3434
+ end
3435
+ return @publisher
3436
+ end
3437
+
3438
+ # Sets the feed publisher
3439
+ def publisher=(new_publisher)
3440
+ if new_publisher.respond_to?(:name) &&
3441
+ new_publisher.respond_to?(:email) &&
3442
+ new_publisher.respond_to?(:url)
3443
+ # It's a complete Author object, just set it.
3444
+ @publisher = new_publisher
3445
+ else
3446
+ # We're not looking at an Author object, this is probably a string,
3447
+ # default to setting the publisher's name.
3448
+ if @publisher.nil?
3449
+ @publisher = FeedTools::Feed::Author.new
3450
+ end
3451
+ @publisher.name = new_publisher
3452
+ end
3453
+ end
3454
+
2986
3455
  # Returns the contents of the itunes:author element
2987
3456
  #
2988
3457
  # This inherits from any incorrectly placed channel-level itunes:author
@@ -3026,10 +3495,6 @@ module FeedTools
3026
3495
  @itunes_duration = new_itunes_duration
3027
3496
  end
3028
3497
 
3029
- # Sets the itunes:summary
3030
- def itunes_summary=(new_itunes_summary)
3031
- end
3032
-
3033
3498
  # Returns the feed item time
3034
3499
  def time
3035
3500
  if @time.nil?
@@ -3054,6 +3519,32 @@ module FeedTools
3054
3519
  @time = new_time
3055
3520
  end
3056
3521
 
3522
+ # Returns the url for posting comments
3523
+ def comments
3524
+ if @comments.nil?
3525
+ @comments = XPath.first(root_node, "comments/text()").to_s
3526
+ @comments = nil if @comments == ""
3527
+ end
3528
+ return @comments
3529
+ end
3530
+
3531
+ # Sets the url for posting comments
3532
+ def comments=(new_comments)
3533
+ @comments = new_comments
3534
+ end
3535
+
3536
+ # The source that this post was based on
3537
+ def source
3538
+ if @source.nil?
3539
+ @source = FeedTools::Feed::Link.new
3540
+ @source.url = XPath.first(root_node, "source/@url").to_s
3541
+ @source.url = nil if @source.url == ""
3542
+ @source.value = XPath.first(root_node, "source/text()").to_s
3543
+ @source.value = nil if @source.value == ""
3544
+ end
3545
+ return @source
3546
+ end
3547
+
3057
3548
  # Returns the feed item tags
3058
3549
  def tags
3059
3550
  # TODO: support the rel="tag" microformat
@@ -3131,7 +3622,7 @@ module FeedTools
3131
3622
  "itunes:explicit/text()").to_s.downcase == "yes" ||
3132
3623
  XPath.first(root_node,
3133
3624
  "itunes:explicit/text()").to_s.downcase == "true" ||
3134
- feed.explicit
3625
+ feed.explicit?
3135
3626
  @explicit = true
3136
3627
  else
3137
3628
  @explicit = false
@@ -3152,8 +3643,13 @@ module FeedTools
3152
3643
  end
3153
3644
 
3154
3645
  # Generates xml based on the content of the feed item
3155
- def build_xml(feed_type=(self.feed.feed_type or "rss"), version=0.0,
3646
+ def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
3156
3647
  xml_builder=Builder::XmlMarkup.new(:indent => 2))
3648
+ if feed_type == "rss" && (version == nil || version == 0.0)
3649
+ version = 1.0
3650
+ elsif feed_type == "atom" && (version == nil || version == 0.0)
3651
+ version = 0.3
3652
+ end
3157
3653
  if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
3158
3654
  # RDF-based rss format
3159
3655
  if link.nil?
@@ -3253,6 +3749,8 @@ module FeedTools
3253
3749
  alias_method :tagline=, :description=
3254
3750
  alias_method :subtitle, :description
3255
3751
  alias_method :subtitle=, :description=
3752
+ alias_method :summary, :description
3753
+ alias_method :summary=, :description=
3256
3754
  alias_method :abstract, :description
3257
3755
  alias_method :abstract=, :description=
3258
3756
  alias_method :content, :description
@@ -3264,12 +3762,26 @@ end
3264
3762
 
3265
3763
  module REXML # :nodoc:
3266
3764
  class Element # :nodoc:
3267
- def inner_xml # :nodoc:
3268
- result = ""
3269
- self.each_child do |child|
3270
- result << child.to_s
3765
+ unless REXML::Element.public_instance_methods.include? :inner_xml
3766
+ def inner_xml # :nodoc:
3767
+ result = ""
3768
+ self.each_child do |child|
3769
+ result << child.to_s
3770
+ end
3771
+ return result
3772
+ end
3773
+ end
3774
+
3775
+ unless REXML::Element.public_instance_methods.include? :base_uri
3776
+ def base_uri # :nodoc:
3777
+ if not attribute('xml:base')
3778
+ return parent.base_uri
3779
+ elsif parent
3780
+ return URI.join(parent.base_uri, attribute('xml:base').value).to_s
3781
+ else
3782
+ return (attribute('xml:base').value or '')
3783
+ end
3271
3784
  end
3272
- return result
3273
3785
  end
3274
3786
  end
3275
3787
  end