feedtools 0.2.17 → 0.2.18

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,35 @@
1
+ #--
2
+ # Copyright (c) 2005 Robert Aman
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
1
24
  module FeedTools
2
25
  # The <tt>FeedTools::FeedItem</tt> class represents the structure of
3
26
  # a single item within a web feed.
4
27
  class FeedItem
28
+ # :stopdoc:
5
29
  include REXML
30
+ include GenericHelper
31
+ private :validate_options
32
+ # :startdoc:
6
33
 
7
34
  # This class stores information about a feed item's file enclosures.
8
35
  class Enclosure
@@ -125,7 +152,6 @@ module FeedTools
125
152
  # Initialize the feed object
126
153
  def initialize
127
154
  super
128
- @feed = nil
129
155
  @feed_data = nil
130
156
  @feed_data_type = :xml
131
157
  @xml_doc = nil
@@ -136,15 +162,31 @@ module FeedTools
136
162
  end
137
163
 
138
164
  # Returns the parent feed of this feed item
165
+ # Warning, this method may be slow if you have a
166
+ # large number of FeedTools::Feed objects. Can't
167
+ # use a direct reference to the parent because it plays
168
+ # havoc with the garbage collector.
139
169
  def feed
140
- return @feed
170
+ parent_feed = nil
171
+ ObjectSpace.each_object(FeedTools::Feed) do |feed|
172
+ if feed.instance_variable_get("@items").nil?
173
+ feed.items
174
+ end
175
+ unsorted_items = feed.instance_variable_get("@items")
176
+ for item in unsorted_items
177
+ if item.object_id == self.object_id
178
+ if parent_feed.nil?
179
+ parent_feed = feed
180
+ break
181
+ else
182
+ raise "Multiple parent feeds found."
183
+ end
184
+ end
185
+ end
186
+ end
187
+ return parent_feed
141
188
  end
142
189
 
143
- # Sets the parent feed of this feed item
144
- def feed=(new_feed)
145
- @feed = new_feed
146
- end
147
-
148
190
  # Returns the feed item's raw data.
149
191
  def feed_data
150
192
  return @feed_data
@@ -154,33 +196,6 @@ module FeedTools
154
196
  def feed_data=(new_feed_data)
155
197
  @time = nil
156
198
  @feed_data = new_feed_data
157
-
158
- # We need an immediate parse of the time so we don't mess up sort orders
159
- unless root_node.nil?
160
- repair_entities = false
161
- time_node = XPath.first(root_node, "pubDate")
162
- if time_node.nil?
163
- time_node = XPath.first(root_node, "dc:date")
164
- end
165
- if time_node.nil?
166
- time_node = XPath.first(root_node, "dc:date", FEED_TOOLS_NAMESPACES)
167
- end
168
- if time_node.nil?
169
- time_node = XPath.first(root_node, "issued")
170
- end
171
- if time_node.nil?
172
- time_node = XPath.first(root_node, "updated")
173
- end
174
- if time_node.nil?
175
- time_node = XPath.first(root_node, "time")
176
- end
177
- end
178
- unless time_node.nil?
179
- begin
180
- @time = Time.parse(time_node.inner_xml)
181
- rescue
182
- end
183
- end
184
199
  end
185
200
 
186
201
  # Returns the feed item's data type.
@@ -253,10 +268,22 @@ module FeedTools
253
268
  if @title.nil?
254
269
  unless root_node.nil?
255
270
  repair_entities = false
256
- title_node = XPath.first(root_node, "title")
271
+ title_node = XPath.first(root_node, "atom10:title",
272
+ FEED_TOOLS_NAMESPACES)
273
+ if title_node.nil?
274
+ title_node = XPath.first(root_node, "title")
275
+ end
276
+ if title_node.nil?
277
+ title_node = XPath.first(root_node, "atom03:title",
278
+ FEED_TOOLS_NAMESPACES)
279
+ end
257
280
  if title_node.nil?
258
281
  title_node = XPath.first(root_node, "atom:title")
259
282
  end
283
+ if title_node.nil?
284
+ title_node = XPath.first(root_node, "dc:title",
285
+ FEED_TOOLS_NAMESPACES)
286
+ end
260
287
  if title_node.nil?
261
288
  title_node = XPath.first(root_node, "dc:title")
262
289
  end
@@ -267,16 +294,21 @@ module FeedTools
267
294
  if title_node.nil?
268
295
  return nil
269
296
  end
270
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
271
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
272
- XPath.first(title_node, "@type").to_s == "xml" ||
273
- XPath.first(title_node, "@mode").to_s == "xml" ||
274
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
297
+ title_type = XPath.first(title_node, "@type").to_s
298
+ title_mode = XPath.first(title_node, "@mode").to_s
299
+ title_encoding = XPath.first(title_node, "@encoding").to_s
300
+
301
+ # Note that we're checking for misuse of type, mode and encoding here
302
+ if title_type == "base64" || title_mode == "base64" ||
303
+ title_encoding == "base64"
304
+ @title = Base64.decode64(title_node.inner_xml.strip)
305
+ elsif title_type == "xhtml" || title_mode == "xhtml" ||
306
+ title_type == "xml" || title_mode == "xml" ||
307
+ title_type == "application/xhtml+xml"
275
308
  @title = title_node.inner_xml
276
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
277
- XPath.first(title_node, "@mode").to_s == "escaped"
309
+ elsif title_type == "escaped" || title_mode == "escaped"
278
310
  @title = FeedTools.unescape_entities(
279
- XPath.first(title_node, "text()").to_s)
311
+ title_node.inner_xml)
280
312
  else
281
313
  @title = title_node.inner_xml
282
314
  repair_entities = true
@@ -366,27 +398,29 @@ module FeedTools
366
398
  if description_node.nil?
367
399
  return nil
368
400
  end
369
- unless description_node.nil?
370
- if XPath.first(description_node, "@encoding").to_s != ""
371
- @description =
372
- "[Embedded data objects are not currently supported.]"
373
- elsif description_node.cdatas.size > 0
374
- @description = description_node.cdatas.first.value
375
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
376
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
377
- XPath.first(description_node, "@type").to_s == "xml" ||
378
- XPath.first(description_node, "@mode").to_s == "xml" ||
379
- XPath.first(description_node, "@type").to_s ==
380
- "application/xhtml+xml"
381
- @description = description_node.inner_xml
382
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
383
- XPath.first(description_node, "@mode").to_s == "escaped"
384
- @description = FeedTools.unescape_entities(
385
- description_node.inner_xml)
386
- else
387
- @description = description_node.inner_xml
388
- repair_entities = true
389
- end
401
+ description_type = XPath.first(description_node, "@type").to_s
402
+ description_mode = XPath.first(description_node, "@mode").to_s
403
+ description_encoding = XPath.first(description_node, "@encoding").to_s
404
+
405
+ # Note that we're checking for misuse of type, mode and encoding here
406
+ if description_encoding != ""
407
+ @description =
408
+ "[Embedded data objects are not currently supported.]"
409
+ elsif description_node.cdatas.size > 0
410
+ @description = description_node.cdatas.first.value
411
+ elsif description_type == "base64" || description_mode == "base64" ||
412
+ description_encoding == "base64"
413
+ @description = Base64.decode64(description_node.inner_xml.strip)
414
+ elsif description_type == "xhtml" || description_mode == "xhtml" ||
415
+ description_type == "xml" || description_mode == "xml" ||
416
+ description_type == "application/xhtml+xml"
417
+ @description = description_node.inner_xml
418
+ elsif description_type == "escaped" || description_mode == "escaped"
419
+ @description = FeedTools.unescape_entities(
420
+ description_node.inner_xml)
421
+ else
422
+ @description = description_node.inner_xml
423
+ repair_entities = true
390
424
  end
391
425
  if @description == ""
392
426
  @description = self.itunes_summary
@@ -666,19 +700,70 @@ module FeedTools
666
700
  def copyright
667
701
  if @copyright.nil?
668
702
  unless root_node.nil?
669
- @copyright = XPath.first(root_node, "dc:rights/text()").to_s
670
- if @copyright == ""
671
- @copyright = XPath.first(root_node, "rights/text()").to_s
703
+ repair_entities = false
704
+
705
+ copyright_node = XPath.first(root_node, "dc:rights")
706
+ if copyright_node.nil?
707
+ copyright_node = XPath.first(root_node, "dc:rights",
708
+ FEED_TOOLS_NAMESPACES)
709
+ end
710
+ if copyright_node.nil?
711
+ copyright_node = XPath.first(root_node, "rights",
712
+ FEED_TOOLS_NAMESPACES)
713
+ end
714
+ if copyright_node.nil?
715
+ copyright_node = XPath.first(root_node, "copyright",
716
+ FEED_TOOLS_NAMESPACES)
717
+ end
718
+ if copyright_node.nil?
719
+ copyright_node = XPath.first(root_node, "atom03:copyright",
720
+ FEED_TOOLS_NAMESPACES)
672
721
  end
673
- if @copyright == ""
674
- @copyright = XPath.first(root_node, "copyright/text()").to_s
722
+ if copyright_node.nil?
723
+ copyright_node = XPath.first(root_node, "atom10:copyright",
724
+ FEED_TOOLS_NAMESPACES)
675
725
  end
676
- if @copyright == ""
677
- @copyright = XPath.first(root_node, "copyrights/text()").to_s
726
+ if copyright_node.nil?
727
+ copyright_node = XPath.first(root_node, "copyrights",
728
+ FEED_TOOLS_NAMESPACES)
678
729
  end
730
+ end
731
+ if copyright_node.nil?
732
+ return nil
733
+ end
734
+ copyright_type = XPath.first(copyright_node, "@type").to_s
735
+ copyright_mode = XPath.first(copyright_node, "@mode").to_s
736
+ copyright_encoding = XPath.first(copyright_node, "@encoding").to_s
737
+
738
+ # Note that we're checking for misuse of type, mode and encoding here
739
+ if copyright_encoding != ""
740
+ @copyright =
741
+ "[Embedded data objects are not currently supported.]"
742
+ elsif copyright_node.cdatas.size > 0
743
+ @copyright = copyright_node.cdatas.first.value
744
+ elsif copyright_type == "base64" || copyright_mode == "base64" ||
745
+ copyright_encoding == "base64"
746
+ @copyright = Base64.decode64(copyright_node.inner_xml.strip)
747
+ elsif copyright_type == "xhtml" || copyright_mode == "xhtml" ||
748
+ copyright_type == "xml" || copyright_mode == "xml" ||
749
+ copyright_type == "application/xhtml+xml"
750
+ @copyright = copyright_node.inner_xml
751
+ elsif copyright_type == "escaped" || copyright_mode == "escaped"
752
+ @copyright = FeedTools.unescape_entities(
753
+ copyright_node.inner_xml)
754
+ else
755
+ @copyright = copyright_node.inner_xml
756
+ repair_entities = true
757
+ end
758
+
759
+ unless @copyright.nil?
679
760
  @copyright = FeedTools.sanitize_html(@copyright, :strip)
680
- @copyright = nil if @copyright == ""
761
+ @copyright = FeedTools.unescape_entities(@copyright) if repair_entities
762
+ @copyright = FeedTools.tidy_html(@copyright)
681
763
  end
764
+
765
+ @copyright = @copyright.strip unless @copyright.nil?
766
+ @copyright = nil if @copyright == ""
682
767
  end
683
768
  return @copyright
684
769
  end
@@ -963,11 +1048,13 @@ module FeedTools
963
1048
  if enclosure.categories.nil?
964
1049
  enclosure.categories = []
965
1050
  end
966
- enclosure.categories << EnclosureCategory.new(
967
- FeedTools.unescape_entities(category_path),
968
- FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
969
- FeedTools.unescape_entities("iTunes Music Store Categories")
970
- )
1051
+ enclosure.categories << FeedTools::Feed::Category.new
1052
+ enclosure.categories.last.term =
1053
+ FeedTools.unescape_entities(category_path)
1054
+ enclosure.categories.last.scheme =
1055
+ "http://www.apple.com/itunes/store/"
1056
+ enclosure.categories.last.label =
1057
+ "iTunes Music Store Categories"
971
1058
  end
972
1059
  end
973
1060
 
@@ -1061,18 +1148,34 @@ module FeedTools
1061
1148
  if @author.nil?
1062
1149
  @author = FeedTools::Feed::Author.new
1063
1150
  unless root_node.nil?
1064
- author_node = XPath.first(root_node, "author")
1151
+ author_node = XPath.first(root_node, "atom10:author",
1152
+ FEED_TOOLS_NAMESPACES)
1153
+ if author_node.nil?
1154
+ author_node = XPath.first(root_node, "atom03:author",
1155
+ FEED_TOOLS_NAMESPACES)
1156
+ end
1157
+ if author_node.nil?
1158
+ author_node = XPath.first(root_node, "atom:author")
1159
+ end
1160
+ if author_node.nil?
1161
+ author_node = XPath.first(root_node, "author")
1162
+ end
1065
1163
  if author_node.nil?
1066
1164
  author_node = XPath.first(root_node, "managingEditor")
1067
1165
  end
1166
+ if author_node.nil?
1167
+ author_node = XPath.first(root_node, "dc:author",
1168
+ FEED_TOOLS_NAMESPACES)
1169
+ end
1068
1170
  if author_node.nil?
1069
1171
  author_node = XPath.first(root_node, "dc:author")
1070
1172
  end
1071
1173
  if author_node.nil?
1072
- author_node = XPath.first(root_node, "dc:creator")
1174
+ author_node = XPath.first(root_node, "dc:creator",
1175
+ FEED_TOOLS_NAMESPACES)
1073
1176
  end
1074
1177
  if author_node.nil?
1075
- author_node = XPath.first(root_node, "atom:author")
1178
+ author_node = XPath.first(root_node, "dc:creator")
1076
1179
  end
1077
1180
  end
1078
1181
  unless author_node.nil?
@@ -1280,7 +1383,10 @@ module FeedTools
1280
1383
  end
1281
1384
 
1282
1385
  # Returns the feed item time
1283
- def time
1386
+ def time(options = {})
1387
+ validate_options([ :estimate_timestamp ],
1388
+ options.keys)
1389
+ options = { :estimate_timestamp => true }.merge(options)
1284
1390
  if @time.nil?
1285
1391
  unless root_node.nil?
1286
1392
  time_string = XPath.first(root_node, "pubDate/text()").to_s
@@ -1300,22 +1406,24 @@ module FeedTools
1300
1406
  begin
1301
1407
  time_string = "" if time_string.nil?
1302
1408
  if time_string != ""
1303
- @time = Time.parse(time_string)
1304
- else
1305
- @time = succ_time
1306
- if @time.nil?
1307
- @time = prev_time
1308
- end
1409
+ @time = Time.parse(time_string).gmtime
1309
1410
  end
1310
1411
  rescue
1311
- @time = succ_time
1412
+ end
1413
+ if options[:estimate_timestamp]
1312
1414
  if @time.nil?
1313
- @time = prev_time
1415
+ begin
1416
+ @time = succ_time
1417
+ if @time.nil?
1418
+ @time = prev_time
1419
+ end
1420
+ rescue
1421
+ end
1422
+ if @time.nil?
1423
+ @time = Time.now.gmtime
1424
+ end
1314
1425
  end
1315
1426
  end
1316
- if @time.nil?
1317
- @time = Time.now.gmtime
1318
- end
1319
1427
  end
1320
1428
  return @time
1321
1429
  end
@@ -1328,13 +1436,14 @@ module FeedTools
1328
1436
  # Returns 1 second after the previous item's time.
1329
1437
  def succ_time #:nodoc:
1330
1438
  begin
1331
- if feed.nil?
1439
+ parent_feed = self.feed
1440
+ if parent_feed.nil?
1332
1441
  return nil
1333
1442
  end
1334
- if feed.instance_variable_get("@items").nil?
1335
- feed.items
1443
+ if parent_feed.instance_variable_get("@items").nil?
1444
+ parent_feed.items
1336
1445
  end
1337
- unsorted_items = feed.instance_variable_get("@items")
1446
+ unsorted_items = parent_feed.instance_variable_get("@items")
1338
1447
  item_index = unsorted_items.index(self)
1339
1448
  if item_index.nil?
1340
1449
  return nil
@@ -1343,7 +1452,7 @@ module FeedTools
1343
1452
  return nil
1344
1453
  end
1345
1454
  previous_item = unsorted_items[item_index - 1]
1346
- return (previous_item.time + 1)
1455
+ return (previous_item.time(:estimate_timestamp => false) + 1)
1347
1456
  rescue
1348
1457
  return nil
1349
1458
  end
@@ -1353,13 +1462,14 @@ module FeedTools
1353
1462
  # Returns 1 second before the succeeding item's time.
1354
1463
  def prev_time #:nodoc:
1355
1464
  begin
1356
- if feed.nil?
1465
+ parent_feed = self.feed
1466
+ if parent_feed.nil?
1357
1467
  return nil
1358
1468
  end
1359
- if feed.instance_variable_get("@items").nil?
1360
- feed.items
1469
+ if parent_feed.instance_variable_get("@items").nil?
1470
+ parent_feed.items
1361
1471
  end
1362
- unsorted_items = feed.instance_variable_get("@items")
1472
+ unsorted_items = parent_feed.instance_variable_get("@items")
1363
1473
  item_index = unsorted_items.index(self)
1364
1474
  if item_index.nil?
1365
1475
  return nil
@@ -1368,7 +1478,7 @@ module FeedTools
1368
1478
  return nil
1369
1479
  end
1370
1480
  succeeding_item = unsorted_items[item_index + 1]
1371
- return (succeeding_item.time - 1)
1481
+ return (succeeding_item.time(:estimate_timestamp => false) - 1)
1372
1482
  rescue
1373
1483
  return nil
1374
1484
  end
@@ -1385,7 +1495,7 @@ module FeedTools
1385
1495
  end
1386
1496
  end
1387
1497
  if updated_string != nil && updated_string != ""
1388
- @updated = Time.parse(updated_string) rescue nil
1498
+ @updated = Time.parse(updated_string).gmtime rescue nil
1389
1499
  else
1390
1500
  @updated = nil
1391
1501
  end
@@ -1414,7 +1524,7 @@ module FeedTools
1414
1524
  end
1415
1525
  end
1416
1526
  if issued_string != nil && issued_string != ""
1417
- @issued = Time.parse(issued_string) rescue nil
1527
+ @issued = Time.parse(issued_string).gmtime rescue nil
1418
1528
  else
1419
1529
  @issued = nil
1420
1530
  end
@@ -1631,7 +1741,8 @@ module FeedTools
1631
1741
  end
1632
1742
  elsif feed_type == "atom" && version == 0.3
1633
1743
  # normal atom format
1634
- return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
1744
+ return xml_builder.entry("xmlns" =>
1745
+ FEED_TOOLS_NAMESPACES['atom03']) do
1635
1746
  unless title.nil? || title == ""
1636
1747
  xml_builder.title(title,
1637
1748
  "mode" => "escaped",
@@ -1673,7 +1784,8 @@ module FeedTools
1673
1784
  end
1674
1785
  elsif feed_type == "atom" && version == 1.0
1675
1786
  # normal atom format
1676
- return xml_builder.entry("xmlns" => "http://www.w3.org/2005/Atom") do
1787
+ return xml_builder.entry("xmlns" =>
1788
+ FEED_TOOLS_NAMESPACES['atom10']) do
1677
1789
  unless title.nil? || title == ""
1678
1790
  xml_builder.title(title,
1679
1791
  "type" => "html")