feedtools 0.2.17 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,35 @@
1
+ #--
2
+ # Copyright (c) 2005 Robert Aman
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
1
24
  module FeedTools
2
25
  # The <tt>FeedTools::FeedItem</tt> class represents the structure of
3
26
  # a single item within a web feed.
4
27
  class FeedItem
28
+ # :stopdoc:
5
29
  include REXML
30
+ include GenericHelper
31
+ private :validate_options
32
+ # :startdoc:
6
33
 
7
34
  # This class stores information about a feed item's file enclosures.
8
35
  class Enclosure
@@ -125,7 +152,6 @@ module FeedTools
125
152
  # Initialize the feed object
126
153
  def initialize
127
154
  super
128
- @feed = nil
129
155
  @feed_data = nil
130
156
  @feed_data_type = :xml
131
157
  @xml_doc = nil
@@ -136,15 +162,31 @@ module FeedTools
136
162
  end
137
163
 
138
164
  # Returns the parent feed of this feed item
165
+ # Warning, this method may be slow if you have a
166
+ # large number of FeedTools::Feed objects. Can't
167
+ # use a direct reference to the parent because it plays
168
+ # havoc with the garbage collector.
139
169
  def feed
140
- return @feed
170
+ parent_feed = nil
171
+ ObjectSpace.each_object(FeedTools::Feed) do |feed|
172
+ if feed.instance_variable_get("@items").nil?
173
+ feed.items
174
+ end
175
+ unsorted_items = feed.instance_variable_get("@items")
176
+ for item in unsorted_items
177
+ if item.object_id == self.object_id
178
+ if parent_feed.nil?
179
+ parent_feed = feed
180
+ break
181
+ else
182
+ raise "Multiple parent feeds found."
183
+ end
184
+ end
185
+ end
186
+ end
187
+ return parent_feed
141
188
  end
142
189
 
143
- # Sets the parent feed of this feed item
144
- def feed=(new_feed)
145
- @feed = new_feed
146
- end
147
-
148
190
  # Returns the feed item's raw data.
149
191
  def feed_data
150
192
  return @feed_data
@@ -154,33 +196,6 @@ module FeedTools
154
196
  def feed_data=(new_feed_data)
155
197
  @time = nil
156
198
  @feed_data = new_feed_data
157
-
158
- # We need an immediate parse of the time so we don't mess up sort orders
159
- unless root_node.nil?
160
- repair_entities = false
161
- time_node = XPath.first(root_node, "pubDate")
162
- if time_node.nil?
163
- time_node = XPath.first(root_node, "dc:date")
164
- end
165
- if time_node.nil?
166
- time_node = XPath.first(root_node, "dc:date", FEED_TOOLS_NAMESPACES)
167
- end
168
- if time_node.nil?
169
- time_node = XPath.first(root_node, "issued")
170
- end
171
- if time_node.nil?
172
- time_node = XPath.first(root_node, "updated")
173
- end
174
- if time_node.nil?
175
- time_node = XPath.first(root_node, "time")
176
- end
177
- end
178
- unless time_node.nil?
179
- begin
180
- @time = Time.parse(time_node.inner_xml)
181
- rescue
182
- end
183
- end
184
199
  end
185
200
 
186
201
  # Returns the feed item's data type.
@@ -253,10 +268,22 @@ module FeedTools
253
268
  if @title.nil?
254
269
  unless root_node.nil?
255
270
  repair_entities = false
256
- title_node = XPath.first(root_node, "title")
271
+ title_node = XPath.first(root_node, "atom10:title",
272
+ FEED_TOOLS_NAMESPACES)
273
+ if title_node.nil?
274
+ title_node = XPath.first(root_node, "title")
275
+ end
276
+ if title_node.nil?
277
+ title_node = XPath.first(root_node, "atom03:title",
278
+ FEED_TOOLS_NAMESPACES)
279
+ end
257
280
  if title_node.nil?
258
281
  title_node = XPath.first(root_node, "atom:title")
259
282
  end
283
+ if title_node.nil?
284
+ title_node = XPath.first(root_node, "dc:title",
285
+ FEED_TOOLS_NAMESPACES)
286
+ end
260
287
  if title_node.nil?
261
288
  title_node = XPath.first(root_node, "dc:title")
262
289
  end
@@ -267,16 +294,21 @@ module FeedTools
267
294
  if title_node.nil?
268
295
  return nil
269
296
  end
270
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
271
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
272
- XPath.first(title_node, "@type").to_s == "xml" ||
273
- XPath.first(title_node, "@mode").to_s == "xml" ||
274
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
297
+ title_type = XPath.first(title_node, "@type").to_s
298
+ title_mode = XPath.first(title_node, "@mode").to_s
299
+ title_encoding = XPath.first(title_node, "@encoding").to_s
300
+
301
+ # Note that we're checking for misuse of type, mode and encoding here
302
+ if title_type == "base64" || title_mode == "base64" ||
303
+ title_encoding == "base64"
304
+ @title = Base64.decode64(title_node.inner_xml.strip)
305
+ elsif title_type == "xhtml" || title_mode == "xhtml" ||
306
+ title_type == "xml" || title_mode == "xml" ||
307
+ title_type == "application/xhtml+xml"
275
308
  @title = title_node.inner_xml
276
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
277
- XPath.first(title_node, "@mode").to_s == "escaped"
309
+ elsif title_type == "escaped" || title_mode == "escaped"
278
310
  @title = FeedTools.unescape_entities(
279
- XPath.first(title_node, "text()").to_s)
311
+ title_node.inner_xml)
280
312
  else
281
313
  @title = title_node.inner_xml
282
314
  repair_entities = true
@@ -366,27 +398,29 @@ module FeedTools
366
398
  if description_node.nil?
367
399
  return nil
368
400
  end
369
- unless description_node.nil?
370
- if XPath.first(description_node, "@encoding").to_s != ""
371
- @description =
372
- "[Embedded data objects are not currently supported.]"
373
- elsif description_node.cdatas.size > 0
374
- @description = description_node.cdatas.first.value
375
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
376
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
377
- XPath.first(description_node, "@type").to_s == "xml" ||
378
- XPath.first(description_node, "@mode").to_s == "xml" ||
379
- XPath.first(description_node, "@type").to_s ==
380
- "application/xhtml+xml"
381
- @description = description_node.inner_xml
382
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
383
- XPath.first(description_node, "@mode").to_s == "escaped"
384
- @description = FeedTools.unescape_entities(
385
- description_node.inner_xml)
386
- else
387
- @description = description_node.inner_xml
388
- repair_entities = true
389
- end
401
+ description_type = XPath.first(description_node, "@type").to_s
402
+ description_mode = XPath.first(description_node, "@mode").to_s
403
+ description_encoding = XPath.first(description_node, "@encoding").to_s
404
+
405
+ # Note that we're checking for misuse of type, mode and encoding here
406
+ if description_encoding != ""
407
+ @description =
408
+ "[Embedded data objects are not currently supported.]"
409
+ elsif description_node.cdatas.size > 0
410
+ @description = description_node.cdatas.first.value
411
+ elsif description_type == "base64" || description_mode == "base64" ||
412
+ description_encoding == "base64"
413
+ @description = Base64.decode64(description_node.inner_xml.strip)
414
+ elsif description_type == "xhtml" || description_mode == "xhtml" ||
415
+ description_type == "xml" || description_mode == "xml" ||
416
+ description_type == "application/xhtml+xml"
417
+ @description = description_node.inner_xml
418
+ elsif description_type == "escaped" || description_mode == "escaped"
419
+ @description = FeedTools.unescape_entities(
420
+ description_node.inner_xml)
421
+ else
422
+ @description = description_node.inner_xml
423
+ repair_entities = true
390
424
  end
391
425
  if @description == ""
392
426
  @description = self.itunes_summary
@@ -666,19 +700,70 @@ module FeedTools
666
700
  def copyright
667
701
  if @copyright.nil?
668
702
  unless root_node.nil?
669
- @copyright = XPath.first(root_node, "dc:rights/text()").to_s
670
- if @copyright == ""
671
- @copyright = XPath.first(root_node, "rights/text()").to_s
703
+ repair_entities = false
704
+
705
+ copyright_node = XPath.first(root_node, "dc:rights")
706
+ if copyright_node.nil?
707
+ copyright_node = XPath.first(root_node, "dc:rights",
708
+ FEED_TOOLS_NAMESPACES)
709
+ end
710
+ if copyright_node.nil?
711
+ copyright_node = XPath.first(root_node, "rights",
712
+ FEED_TOOLS_NAMESPACES)
713
+ end
714
+ if copyright_node.nil?
715
+ copyright_node = XPath.first(root_node, "copyright",
716
+ FEED_TOOLS_NAMESPACES)
717
+ end
718
+ if copyright_node.nil?
719
+ copyright_node = XPath.first(root_node, "atom03:copyright",
720
+ FEED_TOOLS_NAMESPACES)
672
721
  end
673
- if @copyright == ""
674
- @copyright = XPath.first(root_node, "copyright/text()").to_s
722
+ if copyright_node.nil?
723
+ copyright_node = XPath.first(root_node, "atom10:copyright",
724
+ FEED_TOOLS_NAMESPACES)
675
725
  end
676
- if @copyright == ""
677
- @copyright = XPath.first(root_node, "copyrights/text()").to_s
726
+ if copyright_node.nil?
727
+ copyright_node = XPath.first(root_node, "copyrights",
728
+ FEED_TOOLS_NAMESPACES)
678
729
  end
730
+ end
731
+ if copyright_node.nil?
732
+ return nil
733
+ end
734
+ copyright_type = XPath.first(copyright_node, "@type").to_s
735
+ copyright_mode = XPath.first(copyright_node, "@mode").to_s
736
+ copyright_encoding = XPath.first(copyright_node, "@encoding").to_s
737
+
738
+ # Note that we're checking for misuse of type, mode and encoding here
739
+ if copyright_encoding != ""
740
+ @copyright =
741
+ "[Embedded data objects are not currently supported.]"
742
+ elsif copyright_node.cdatas.size > 0
743
+ @copyright = copyright_node.cdatas.first.value
744
+ elsif copyright_type == "base64" || copyright_mode == "base64" ||
745
+ copyright_encoding == "base64"
746
+ @copyright = Base64.decode64(copyright_node.inner_xml.strip)
747
+ elsif copyright_type == "xhtml" || copyright_mode == "xhtml" ||
748
+ copyright_type == "xml" || copyright_mode == "xml" ||
749
+ copyright_type == "application/xhtml+xml"
750
+ @copyright = copyright_node.inner_xml
751
+ elsif copyright_type == "escaped" || copyright_mode == "escaped"
752
+ @copyright = FeedTools.unescape_entities(
753
+ copyright_node.inner_xml)
754
+ else
755
+ @copyright = copyright_node.inner_xml
756
+ repair_entities = true
757
+ end
758
+
759
+ unless @copyright.nil?
679
760
  @copyright = FeedTools.sanitize_html(@copyright, :strip)
680
- @copyright = nil if @copyright == ""
761
+ @copyright = FeedTools.unescape_entities(@copyright) if repair_entities
762
+ @copyright = FeedTools.tidy_html(@copyright)
681
763
  end
764
+
765
+ @copyright = @copyright.strip unless @copyright.nil?
766
+ @copyright = nil if @copyright == ""
682
767
  end
683
768
  return @copyright
684
769
  end
@@ -963,11 +1048,13 @@ module FeedTools
963
1048
  if enclosure.categories.nil?
964
1049
  enclosure.categories = []
965
1050
  end
966
- enclosure.categories << EnclosureCategory.new(
967
- FeedTools.unescape_entities(category_path),
968
- FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
969
- FeedTools.unescape_entities("iTunes Music Store Categories")
970
- )
1051
+ enclosure.categories << FeedTools::Feed::Category.new
1052
+ enclosure.categories.last.term =
1053
+ FeedTools.unescape_entities(category_path)
1054
+ enclosure.categories.last.scheme =
1055
+ "http://www.apple.com/itunes/store/"
1056
+ enclosure.categories.last.label =
1057
+ "iTunes Music Store Categories"
971
1058
  end
972
1059
  end
973
1060
 
@@ -1061,18 +1148,34 @@ module FeedTools
1061
1148
  if @author.nil?
1062
1149
  @author = FeedTools::Feed::Author.new
1063
1150
  unless root_node.nil?
1064
- author_node = XPath.first(root_node, "author")
1151
+ author_node = XPath.first(root_node, "atom10:author",
1152
+ FEED_TOOLS_NAMESPACES)
1153
+ if author_node.nil?
1154
+ author_node = XPath.first(root_node, "atom03:author",
1155
+ FEED_TOOLS_NAMESPACES)
1156
+ end
1157
+ if author_node.nil?
1158
+ author_node = XPath.first(root_node, "atom:author")
1159
+ end
1160
+ if author_node.nil?
1161
+ author_node = XPath.first(root_node, "author")
1162
+ end
1065
1163
  if author_node.nil?
1066
1164
  author_node = XPath.first(root_node, "managingEditor")
1067
1165
  end
1166
+ if author_node.nil?
1167
+ author_node = XPath.first(root_node, "dc:author",
1168
+ FEED_TOOLS_NAMESPACES)
1169
+ end
1068
1170
  if author_node.nil?
1069
1171
  author_node = XPath.first(root_node, "dc:author")
1070
1172
  end
1071
1173
  if author_node.nil?
1072
- author_node = XPath.first(root_node, "dc:creator")
1174
+ author_node = XPath.first(root_node, "dc:creator",
1175
+ FEED_TOOLS_NAMESPACES)
1073
1176
  end
1074
1177
  if author_node.nil?
1075
- author_node = XPath.first(root_node, "atom:author")
1178
+ author_node = XPath.first(root_node, "dc:creator")
1076
1179
  end
1077
1180
  end
1078
1181
  unless author_node.nil?
@@ -1280,7 +1383,10 @@ module FeedTools
1280
1383
  end
1281
1384
 
1282
1385
  # Returns the feed item time
1283
- def time
1386
+ def time(options = {})
1387
+ validate_options([ :estimate_timestamp ],
1388
+ options.keys)
1389
+ options = { :estimate_timestamp => true }.merge(options)
1284
1390
  if @time.nil?
1285
1391
  unless root_node.nil?
1286
1392
  time_string = XPath.first(root_node, "pubDate/text()").to_s
@@ -1300,22 +1406,24 @@ module FeedTools
1300
1406
  begin
1301
1407
  time_string = "" if time_string.nil?
1302
1408
  if time_string != ""
1303
- @time = Time.parse(time_string)
1304
- else
1305
- @time = succ_time
1306
- if @time.nil?
1307
- @time = prev_time
1308
- end
1409
+ @time = Time.parse(time_string).gmtime
1309
1410
  end
1310
1411
  rescue
1311
- @time = succ_time
1412
+ end
1413
+ if options[:estimate_timestamp]
1312
1414
  if @time.nil?
1313
- @time = prev_time
1415
+ begin
1416
+ @time = succ_time
1417
+ if @time.nil?
1418
+ @time = prev_time
1419
+ end
1420
+ rescue
1421
+ end
1422
+ if @time.nil?
1423
+ @time = Time.now.gmtime
1424
+ end
1314
1425
  end
1315
1426
  end
1316
- if @time.nil?
1317
- @time = Time.now.gmtime
1318
- end
1319
1427
  end
1320
1428
  return @time
1321
1429
  end
@@ -1328,13 +1436,14 @@ module FeedTools
1328
1436
  # Returns 1 second after the previous item's time.
1329
1437
  def succ_time #:nodoc:
1330
1438
  begin
1331
- if feed.nil?
1439
+ parent_feed = self.feed
1440
+ if parent_feed.nil?
1332
1441
  return nil
1333
1442
  end
1334
- if feed.instance_variable_get("@items").nil?
1335
- feed.items
1443
+ if parent_feed.instance_variable_get("@items").nil?
1444
+ parent_feed.items
1336
1445
  end
1337
- unsorted_items = feed.instance_variable_get("@items")
1446
+ unsorted_items = parent_feed.instance_variable_get("@items")
1338
1447
  item_index = unsorted_items.index(self)
1339
1448
  if item_index.nil?
1340
1449
  return nil
@@ -1343,7 +1452,7 @@ module FeedTools
1343
1452
  return nil
1344
1453
  end
1345
1454
  previous_item = unsorted_items[item_index - 1]
1346
- return (previous_item.time + 1)
1455
+ return (previous_item.time(:estimate_timestamp => false) + 1)
1347
1456
  rescue
1348
1457
  return nil
1349
1458
  end
@@ -1353,13 +1462,14 @@ module FeedTools
1353
1462
  # Returns 1 second before the succeeding item's time.
1354
1463
  def prev_time #:nodoc:
1355
1464
  begin
1356
- if feed.nil?
1465
+ parent_feed = self.feed
1466
+ if parent_feed.nil?
1357
1467
  return nil
1358
1468
  end
1359
- if feed.instance_variable_get("@items").nil?
1360
- feed.items
1469
+ if parent_feed.instance_variable_get("@items").nil?
1470
+ parent_feed.items
1361
1471
  end
1362
- unsorted_items = feed.instance_variable_get("@items")
1472
+ unsorted_items = parent_feed.instance_variable_get("@items")
1363
1473
  item_index = unsorted_items.index(self)
1364
1474
  if item_index.nil?
1365
1475
  return nil
@@ -1368,7 +1478,7 @@ module FeedTools
1368
1478
  return nil
1369
1479
  end
1370
1480
  succeeding_item = unsorted_items[item_index + 1]
1371
- return (succeeding_item.time - 1)
1481
+ return (succeeding_item.time(:estimate_timestamp => false) - 1)
1372
1482
  rescue
1373
1483
  return nil
1374
1484
  end
@@ -1385,7 +1495,7 @@ module FeedTools
1385
1495
  end
1386
1496
  end
1387
1497
  if updated_string != nil && updated_string != ""
1388
- @updated = Time.parse(updated_string) rescue nil
1498
+ @updated = Time.parse(updated_string).gmtime rescue nil
1389
1499
  else
1390
1500
  @updated = nil
1391
1501
  end
@@ -1414,7 +1524,7 @@ module FeedTools
1414
1524
  end
1415
1525
  end
1416
1526
  if issued_string != nil && issued_string != ""
1417
- @issued = Time.parse(issued_string) rescue nil
1527
+ @issued = Time.parse(issued_string).gmtime rescue nil
1418
1528
  else
1419
1529
  @issued = nil
1420
1530
  end
@@ -1631,7 +1741,8 @@ module FeedTools
1631
1741
  end
1632
1742
  elsif feed_type == "atom" && version == 0.3
1633
1743
  # normal atom format
1634
- return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
1744
+ return xml_builder.entry("xmlns" =>
1745
+ FEED_TOOLS_NAMESPACES['atom03']) do
1635
1746
  unless title.nil? || title == ""
1636
1747
  xml_builder.title(title,
1637
1748
  "mode" => "escaped",
@@ -1673,7 +1784,8 @@ module FeedTools
1673
1784
  end
1674
1785
  elsif feed_type == "atom" && version == 1.0
1675
1786
  # normal atom format
1676
- return xml_builder.entry("xmlns" => "http://www.w3.org/2005/Atom") do
1787
+ return xml_builder.entry("xmlns" =>
1788
+ FEED_TOOLS_NAMESPACES['atom10']) do
1677
1789
  unless title.nil? || title == ""
1678
1790
  xml_builder.title(title,
1679
1791
  "type" => "html")