feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -8,5 +8,7 @@
8
8
  `feed_data_type` varchar(20) default NULL,
9
9
  `http_headers` text default NULL,
10
10
  `last_retrieved` datetime default NULL,
11
+ `time_to_live` int(10) unsigned NULL,
12
+ `serialized` longtext default NULL,
11
13
  PRIMARY KEY (`id`)
12
14
  )
@@ -7,5 +7,7 @@
7
7
  feed_data text default NULL,
8
8
  feed_data_type varchar(20) default NULL,
9
9
  http_headers text default NULL,
10
- last_retrieved timestamp default NULL
10
+ last_retrieved timestamp default NULL,
11
+ time_to_live integer(10) default NULL,
12
+ serialized text default NULL
11
13
  );
@@ -7,5 +7,7 @@
7
7
  feed_data TEXT DEFAULT NULL,
8
8
  feed_data_type VARCHAR(20) DEFAULT NULL,
9
9
  http_headers TEXT DEFAULT NULL,
10
- last_retrieved DATETIME DEFAULT NULL
10
+ last_retrieved DATETIME DEFAULT NULL,
11
+ time_to_live INTEGER DEFAULT NULL,
12
+ serialized TEXT DEFAULT NULL
11
13
  );
@@ -33,6 +33,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
33
33
  'development' # :nodoc:
34
34
 
35
35
  FEED_TOOLS_NAMESPACES = {
36
+ "access" => "http://www.bloglines.com/about/specs/fac-1.0",
36
37
  "admin" => "http://webns.net/mvcb/",
37
38
  "ag" => "http://purl.org/rss/1.0/modules/aggregation/",
38
39
  "annotate" => "http://purl.org/rss/1.0/modules/annotate/",
@@ -54,10 +55,13 @@ FEED_TOOLS_NAMESPACES = {
54
55
  "ev" => "http://purl.org/rss/1.0/modules/event/",
55
56
  "icbm" => "http://postneo.com/icbm/",
56
57
  "image" => "http://purl.org/rss/1.0/modules/image/",
58
+ "indexing" => "urn:atom-extension:indexing",
57
59
  "feedburner" => "http://rssnamespace.org/feedburner/ext/1.0",
58
60
  "foaf" => "http://xmlns.com/foaf/0.1/",
59
61
  "foo" => "http://hsivonen.iki.fi/FooML",
60
62
  "fm" => "http://freshmeat.net/rss/fm/",
63
+ "gd" => "http://schemas.google.com/g/2005",
64
+ "gr" => "http://www.google.com/schemas/reader/atom/",
61
65
  "itunes" => "http://www.itunes.com/dtds/podcast-1.0.dtd",
62
66
  "l" => "http://purl.org/rss/1.0/modules/link/",
63
67
  "media" => "http://search.yahoo.com/mrss",
@@ -90,7 +94,8 @@ FEED_TOOLS_NAMESPACES = {
90
94
  }
91
95
 
92
96
  $:.unshift(File.dirname(__FILE__))
93
- $:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
97
+ $: << (File.dirname(__FILE__) + "/feed_tools/vendor")
98
+ $: << (File.dirname(__FILE__) + "/feed_tools/vendor/html5/lib")
94
99
 
95
100
  begin
96
101
  require 'feed_tools/version'
@@ -104,8 +109,16 @@ begin
104
109
  end
105
110
 
106
111
  require 'rubygems'
107
-
108
- require_gem('builder', '>= 1.2.4')
112
+
113
+ if !defined?(Builder)
114
+ gem('builder', '>= 1.2.4')
115
+ require 'builder'
116
+ end
117
+
118
+ if !defined?(ActiveRecord)
119
+ gem('activerecord', '>= 1.11.1')
120
+ require 'active_record'
121
+ end
109
122
 
110
123
  # Preload optional libraries.
111
124
  begin
@@ -117,7 +130,15 @@ begin
117
130
  rescue Object
118
131
  end
119
132
 
120
- require 'feed_tools/vendor/htree'
133
+ require 'html5'
134
+ require 'html5/html5parser'
135
+ require 'html5/liberalxmlparser'
136
+ require 'html5/treewalkers'
137
+ require 'html5/treebuilders'
138
+ require 'html5/serializer'
139
+ require 'html5/sanitizer'
140
+
141
+ require 'feed_tools/vendor/uri'
121
142
 
122
143
  require 'net/http'
123
144
 
@@ -134,15 +155,8 @@ begin
134
155
  require 'yaml'
135
156
  require 'base64'
136
157
 
137
- if !defined?(ActiveSupport)
138
- require_gem('activesupport', '>= 1.1.1')
139
- end
140
- if !defined?(ActiveRecord)
141
- require_gem('activerecord', '>= 1.11.1')
142
- end
143
-
144
158
  begin
145
- require_gem('uuidtools', '>= 0.1.2')
159
+ gem('uuidtools', '>= 0.1.2')
146
160
  rescue Gem::LoadError
147
161
  begin
148
162
  require 'uuidtools'
@@ -198,6 +212,10 @@ module FeedTools
198
212
  :proxy_port => nil,
199
213
  :proxy_user => nil,
200
214
  :proxy_password => nil,
215
+ :auth_user => nil,
216
+ :auth_password => nil,
217
+ :auth_scheme => nil,
218
+ :http_timeout => nil,
201
219
  :user_agent =>
202
220
  "FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING} " +
203
221
  "+http://www.sporkmonger.com/projects/feedtools/",
@@ -207,6 +225,8 @@ module FeedTools
207
225
  "http://www.sporkmonger.com/projects/feedtools/",
208
226
  :tidy_enabled => false,
209
227
  :tidy_options => {},
228
+ :lazy_parsing_enabled => true,
229
+ :serialization_enabled => false,
210
230
  :idn_enabled => true,
211
231
  :sanitization_enabled => true,
212
232
  :sanitize_with_nofollow => true,
@@ -217,6 +237,7 @@ module FeedTools
217
237
  :strip_comment_count => false,
218
238
  :tab_spaces => 2,
219
239
  :max_ttl => 3.days.to_s,
240
+ :default_ttl => 1.hour.to_s,
220
241
  :output_encoding => "utf-8"
221
242
  }.merge(config_hash)
222
243
  end
@@ -318,6 +339,7 @@ module FeedTools
318
339
  FeedTools::GenericHelper.validate_options([ :multi_threaded ],
319
340
  options.keys)
320
341
  options = { :multi_threaded => false }.merge(options)
342
+ warn("FeedTools.build_merged_feed is deprecated.")
321
343
  return nil if url_array.nil?
322
344
  merged_feed = FeedTools::Feed.new
323
345
  retrieved_feeds = []
@@ -339,12 +361,13 @@ module FeedTools
339
361
  end
340
362
  end
341
363
  retrieved_feeds.each do |feed|
342
- merged_feed.entries.concat(
364
+ merged_feed.entries = merged_feed.entries.concat(
343
365
  feed.entries.collect do |entry|
344
366
  new_entry = entry.dup
345
367
  new_entry.title = "#{feed.title}: #{entry.title}"
346
368
  new_entry
347
- end )
369
+ end
370
+ )
348
371
  end
349
372
  return merged_feed
350
373
  end
@@ -51,6 +51,8 @@ module FeedTools
51
51
  ActiveRecord::Base.default_timezone = :utc
52
52
  ActiveRecord::Base.connection
53
53
  rescue
54
+ end
55
+ if !ActiveRecord::Base.connected?
54
56
  begin
55
57
  possible_config_files = [
56
58
  "./config/database.yml",
@@ -66,6 +68,7 @@ module FeedTools
66
68
  for file in possible_config_files
67
69
  if File.exists?(File.expand_path(file))
68
70
  database_config_file = file
71
+ @config_path = database_config_file
69
72
  break
70
73
  end
71
74
  end
@@ -84,6 +87,14 @@ module FeedTools
84
87
  end
85
88
  return nil
86
89
  end
90
+
91
+ # Returns the path to the database.yml config file that FeedTools loaded.
92
+ def DatabaseFeedCache.config_path
93
+ if !defined?(@config_path) || @config_path.blank?
94
+ @config_path = nil
95
+ end
96
+ return @config_path
97
+ end
87
98
 
88
99
  # Returns true if a connection to the database has been established and the
89
100
  # required table structure is in place.
@@ -115,9 +126,9 @@ module FeedTools
115
126
  # True if the appropriate database table already exists
116
127
  def DatabaseFeedCache.table_exists?
117
128
  begin
118
- ActiveRecord::Base.connection.execute("select id, href, title, " +
129
+ ActiveRecord::Base.connection.select_one("select id, href, title, " +
119
130
  "link, feed_data, feed_data_type, http_headers, last_retrieved " +
120
- "from #{self.table_name()} limit 1")
131
+ "from #{self.table_name()}")
121
132
  rescue ActiveRecord::StatementInvalid
122
133
  return false
123
134
  rescue
@@ -53,34 +53,112 @@ module FeedTools
53
53
  @live = false
54
54
  @encoding = nil
55
55
  @options = nil
56
+ @version = FeedTools::FEED_TOOLS_VERSION::STRING
57
+ end
58
+
59
+ # Breaks any references that the feed may be keeping around, thus making
60
+ # the job of the garbage collector much, much easier. Call this
61
+ # method prior to feeds going out of scope to prevent memory leaks.
62
+ def dispose()
63
+ self.entries.each do |entry|
64
+ entry.instance_variable_set("@root_node", nil)
65
+ entry.instance_variable_set("@feed", nil)
66
+ entry.instance_variable_set("@parent_feed", nil)
67
+ entry.dispose if entry.respond_to?(:dispose)
68
+ end
69
+ self.entries = []
70
+
71
+ @cache_object = nil
72
+ @http_headers = nil
73
+ @xml_document = nil
74
+ @feed_data = nil
75
+ @feed_data_type = nil
76
+ @root_node = nil
77
+ @channel_node = nil
78
+ @href = nil
79
+ @id = nil
80
+ @title = nil
81
+ @subtitle = nil
82
+ @link = nil
83
+ @last_retrieved = nil
84
+ @time_to_live = nil
85
+ @entries = nil
86
+ @live = false
87
+ @encoding = nil
88
+ @options = nil
89
+
90
+ GC.start()
91
+ self
56
92
  end
57
93
 
58
94
  # Loads the feed specified by the url, pulling the data from the
59
95
  # cache if it hasn't expired. Options supplied will override the
60
96
  # default options.
61
- def Feed.open(url, options={})
97
+ def Feed.open(href, options={})
62
98
  FeedTools::GenericHelper.validate_options(
63
99
  FeedTools.configurations.keys, options.keys)
64
100
 
65
- # create the new feed
66
- feed = FeedTools::Feed.new
101
+ # clean up the url
102
+ href = FeedTools::UriHelper.normalize_url(href)
67
103
 
68
- feed.configurations = FeedTools.configurations.merge(options)
104
+ feed_configurations = FeedTools.configurations.merge(options)
105
+ cache_object = nil
106
+ deserialized_feed = nil
69
107
 
70
- if feed.configurations[:feed_cache] != nil && FeedTools.feed_cache.nil?
108
+ if feed_configurations[:feed_cache] != nil && FeedTools.feed_cache.nil?
71
109
  raise(ArgumentError, "There is currently no caching mechanism set. " +
72
110
  "Cannot retrieve cached feeds.")
111
+ elsif feed_configurations[:serialization_enabled] == true
112
+ # We've got a caching mechanism available
113
+ cache_object = FeedTools.feed_cache.find_by_href(href)
114
+ begin
115
+ if cache_object != nil && cache_object.serialized != nil
116
+ # If we've got a cache hit, deserialize
117
+ expired = true
118
+ if cache_object.time_to_live == nil
119
+ cache_object.time_to_live =
120
+ feed_configurations[:default_ttl].to_i
121
+ cache_object.save
122
+ end
123
+ if (cache_object.last_retrieved == nil)
124
+ expired = true
125
+ elsif (cache_object.time_to_live < 30.minutes)
126
+ expired =
127
+ (cache_object.last_retrieved + 30.minutes) < Time.now.gmtime
128
+ else
129
+ expired =
130
+ (cache_object.last_retrieved + cache_object.time_to_live) <
131
+ Time.now.gmtime
132
+ end
133
+ if !expired
134
+ require 'yaml'
135
+ deserialized_feed = YAML.load(cache_object.serialized)
136
+ deserialized_feed.cache_object = cache_object
137
+ Thread.pass
138
+ end
139
+ end
140
+ rescue Exception
141
+ end
73
142
  end
74
143
 
75
- # clean up the url
76
- url = FeedTools::UriHelper.normalize_url(url)
144
+ if deserialized_feed == nil
145
+ # create the new feed
146
+ feed = FeedTools::Feed.new
147
+
148
+ feed.configurations = feed_configurations
77
149
 
78
- # load the new feed
79
- feed.href = url
80
- feed.update! unless feed.configurations[:disable_update_from_remote]
81
- Thread.pass
150
+ # load the new feed
151
+ feed.href = href
152
+ if cache_object != nil
153
+ feed.cache_object = cache_object
154
+ end
155
+ feed.update! unless feed.configurations[:disable_update_from_remote]
156
+ Thread.pass
82
157
 
83
- return feed
158
+ return feed
159
+ else
160
+ return deserialized_feed
161
+ end
84
162
  end
85
163
 
86
164
  # Returns the load options for this feed.
@@ -99,10 +177,9 @@ module FeedTools
99
177
  # Loads the feed from the remote url if the feed has expired from the
100
178
  # cache or cannot be retrieved from the cache for some reason.
101
179
  def update!
102
- if self.configurations[:disable_update_from_remote]
103
- # Don't do anything if this option is set
104
- return
105
- end
180
+ # Don't do anything if this option is set
181
+ return if self.configurations[:disable_update_from_remote]
182
+
106
183
  if !FeedTools.feed_cache.nil? &&
107
184
  !FeedTools.feed_cache.set_up_correctly?
108
185
  FeedTools.feed_cache.initialize_cache()
@@ -129,25 +206,23 @@ module FeedTools
129
206
  self.http_headers['content-type'] =~ /application\/xhtml\+xml/
130
207
 
131
208
  autodiscovered_url = nil
132
- autodiscovered_url =
133
- FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
134
- "application/atom+xml")
135
- if autodiscovered_url.nil?
209
+ ['atom', 'rss', 'rdf'].each do |type|
136
210
  autodiscovered_url =
137
211
  FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
138
- "application/rss+xml")
212
+ "application/#{type}+xml")
213
+ break unless autodiscovered_url.nil?
139
214
  end
140
- if autodiscovered_url.nil?
141
- autodiscovered_url =
142
- FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
143
- "application/rdf+xml")
144
- end
145
- unless autodiscovered_url.nil?
215
+
216
+ if autodiscovered_url != nil
146
217
  begin
147
218
  autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri(
148
219
  autodiscovered_url, [self.href])
149
220
  rescue Exception
150
221
  end
222
+ if self.href == autodiscovered_url
223
+ raise FeedAccessError,
224
+ "Autodiscovery loop detected: #{autodiscovered_url}"
225
+ end
151
226
  self.feed_data = nil
152
227
  self.href = autodiscovered_url
153
228
  if FeedTools.feed_cache.nil?
@@ -157,12 +232,24 @@ module FeedTools
157
232
  FeedTools.feed_cache.find_by_href(autodiscovered_url)
158
233
  end
159
234
  self.update!
235
+ else
236
+ html_body = FeedTools::XmlHelper.try_xpaths(self.xml_document, [
237
+ "html/body"
238
+ ])
239
+ if html_body != nil
240
+ raise FeedAccessError,
241
+ "#{self.href} does not appear to be a feed."
242
+ end
160
243
  end
161
244
  else
162
245
  ugly_redirect = FeedTools::XmlHelper.try_xpaths(self.xml_document, [
163
246
  "redirect/newLocation/text()"
164
247
  ], :select_result_value => true)
165
248
  if !ugly_redirect.blank?
249
+ if self.href == ugly_redirect
250
+ raise FeedAccessError,
251
+ "Ugly redirect loop detected: #{ugly_redirect}"
252
+ end
166
253
  self.feed_data = nil
167
254
  self.href = ugly_redirect
168
255
  if FeedTools.feed_cache.nil?
@@ -187,6 +274,10 @@ module FeedTools
187
274
  @link = nil
188
275
  @time_to_live = nil
189
276
  @entries = nil
277
+
278
+ if self.configurations[:lazy_parsing_enabled] == false
279
+ self.full_parse()
280
+ end
190
281
  end
191
282
  end
192
283
 
@@ -338,7 +429,101 @@ module FeedTools
338
429
  end
339
430
  end
340
431
  end
432
+
433
+ # Does a full parse of the feed.
434
+ def full_parse
435
+ self.href
436
+
437
+ self.cache_object
341
438
 
439
+ self.http_headers
440
+ self.encoding
441
+ self.feed_data_utf_8
442
+ self.xml_document
443
+ self.root_node
444
+ self.channel_node
445
+
446
+ self.base_uri
447
+ self.feed_type
448
+ self.feed_version
449
+
450
+ self.entries
451
+
452
+ self.id
453
+ self.title
454
+ self.subtitle
455
+ self.links
456
+ self.link
457
+ self.icon
458
+ self.favicon
459
+ self.author
460
+ self.publisher
461
+ self.time
462
+ self.updated
463
+ self.published
464
+ self.categories
465
+ self.images
466
+ self.rights
467
+ self.time_to_live
468
+ self.generator
469
+ self.language
470
+
471
+ self.docs
472
+ self.text_input
473
+ self.cloud
474
+
475
+ self.itunes_summary
476
+ self.itunes_subtitle
477
+ self.itunes_author
478
+
479
+ self.media_text
480
+
481
+ self.explicit?
482
+
483
+ self.entries.each do |entry|
484
+ entry.full_parse()
485
+ end
486
+
487
+ nil
488
+ end
489
+
490
+ # Does a full parse, then serializes the feed object directly to the
491
+ # cache.
492
+ def serialize_to_cache
493
+ @cache_object = nil
494
+ require 'yaml'
495
+ serialized_feed = YAML.dump(self.serializable)
496
+ if self.cache_object != nil
497
+ begin
498
+ self.cache_object.serialized = serialized_feed
499
+ self.cache_object.save
500
+ rescue Exception
501
+ end
502
+ end
503
+ return nil
504
+ end
505
+
506
+ # Returns a duplicate object suitable for serialization
507
+ def serializable
508
+ self.full_parse()
509
+ entries_to_dump = self.entries
510
+ # This prevents errors due to temporarily having feed items with
511
+ # multiple parent feeds.
512
+ self.entries = []
513
+ feed_to_dump = self.dup
514
+ feed_to_dump.instance_variable_set("@xml_document", nil)
515
+ feed_to_dump.instance_variable_set("@root_node", nil)
516
+ feed_to_dump.instance_variable_set("@channel_node", nil)
517
+ feed_to_dump.entries = entries_to_dump.collect do |entry|
518
+ entry.serializable
519
+ end
520
+ self.entries = entries_to_dump
521
+ feed_to_dump.entries.each do |entry|
522
+ entry.instance_variable_set("@root_node", nil)
523
+ end
524
+ return feed_to_dump
525
+ end
526
+
342
527
  # Returns the relevant information from an http request.
343
528
  def http_response
344
529
  return @http_response
@@ -360,8 +545,12 @@ module FeedTools
360
545
  # Returns the encoding that the feed was parsed with
361
546
  def encoding
362
547
  if @encoding.blank?
363
- unless self.http_headers.blank?
364
- @encoding = "utf-8"
548
+ if !self.http_headers.blank?
549
+ if self.http_headers['content-type'] =~ /charset=([\w\d-]+)/
550
+ @encoding = $1.downcase
551
+ else
552
+ @encoding = self.encoding_from_feed_data
553
+ end
365
554
  else
366
555
  @encoding = self.encoding_from_feed_data
367
556
  end
@@ -377,7 +566,7 @@ module FeedTools
377
566
  return nil if raw_data.nil?
378
567
  encoding_from_xml_instruct =
379
568
  raw_data.scan(
380
- /^<\?xml [^>]*encoding="([\w]*)"[^>]*\?>/
569
+ /^<\?xml [^>]*encoding="([^\"]*)"[^>]*\?>/
381
570
  ).flatten.first
382
571
  unless encoding_from_xml_instruct.blank?
383
572
  encoding_from_xml_instruct.downcase!
@@ -446,11 +635,26 @@ module FeedTools
446
635
  if FeedTools.feed_cache.nil?
447
636
  self.cache_object = nil
448
637
  else
449
- self.cache_object =
450
- FeedTools.feed_cache.find_by_href(ugly_redirect)
638
+ begin
639
+ self.cache_object =
640
+ FeedTools.feed_cache.find_by_href(ugly_redirect)
641
+ rescue RuntimeError => error
642
+ if error.message =~ /sorry, too many clients already/
643
+ warn("There are too many connections to the database open.")
644
+ end
645
+ raise error
646
+ end
451
647
  end
452
648
  self.update!
453
649
  end
650
+
651
+ # Get these things parsed in the correct order to avoid the retardedly
652
+ # painful corecursion issues.
653
+ self.href
654
+ @links = nil
655
+ @link = nil
656
+ self.links
657
+ self.link
454
658
  end
455
659
 
456
660
  # Returns the feed's raw data as utf-8.
@@ -462,11 +666,11 @@ module FeedTools
462
666
  else
463
667
  use_encoding = force_encoding
464
668
  end
465
- if use_encoding != "utf-8"
669
+ if use_encoding != "utf-8" && use_encoding != nil && raw_data != nil
466
670
  begin
467
671
  @feed_data_utf_8 =
468
672
  Iconv.new('utf-8', use_encoding).iconv(raw_data)
469
- rescue
673
+ rescue Exception => error
470
674
  return raw_data
471
675
  end
472
676
  else
@@ -495,24 +699,23 @@ module FeedTools
495
699
  unless self.cache_object.nil?
496
700
  self.cache_object.feed_data_type = new_feed_data_type
497
701
  end
702
+ if self.feed_data_type != :xml
703
+ @xml_document = nil
704
+ end
498
705
  end
499
-
706
+
500
707
  # Returns a REXML Document of the feed_data
501
708
  def xml_document
502
- if self.feed_data_type != :xml
503
- @xml_document = nil
504
- else
505
- if @xml_document.nil?
709
+ if @xml_document.nil?
710
+ return nil if self.feed_data.blank?
711
+ if self.feed_data_type != :xml
712
+ @xml_document = nil
713
+ else
506
714
  begin
507
- begin
508
- @xml_document = REXML::Document.new(self.feed_data_utf_8)
509
- rescue Object
510
- # Something failed, attempt to repair the xml with htree.
511
- @xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
512
- end
513
- rescue Object
514
- @xml_document = nil
515
- raise
715
+ @xml_document = REXML::Document.new(self.feed_data_utf_8)
716
+ rescue Exception
717
+ # Something failed, attempt to repair the xml with htree.
718
+ @xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
516
719
  end
517
720
  end
518
721
  end
@@ -551,7 +754,7 @@ module FeedTools
551
754
  else
552
755
  @root_node = self.xml_document.root
553
756
  end
554
- rescue
757
+ rescue Exception
555
758
  return nil
556
759
  end
557
760
  end
@@ -564,7 +767,8 @@ module FeedTools
564
767
  @channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
565
768
  "channel",
566
769
  "CHANNEL",
567
- "feedinfo"
770
+ "feedinfo",
771
+ "news"
568
772
  ])
569
773
  if @channel_node == nil
570
774
  @channel_node = self.root_node
@@ -584,9 +788,17 @@ module FeedTools
584
788
  if @href != nil
585
789
  begin
586
790
  @cache_object = FeedTools.feed_cache.find_by_href(@href)
587
- rescue
791
+ rescue RuntimeError => error
792
+ if error.message =~ /sorry, too many clients already/
793
+ warn("There are too many connections to the database open.")
794
+ raise error
795
+ else
796
+ raise error
797
+ end
798
+ rescue => error
588
799
  warn("The feed cache seems to be having trouble with the " +
589
800
  "find_by_href method. This may cause unexpected results.")
801
+ raise error
590
802
  end
591
803
  end
592
804
  if @cache_object.nil?
@@ -778,19 +990,33 @@ module FeedTools
778
990
  end
779
991
  end
780
992
  if override_href.call(@href) && self.feed_data != nil
781
- for link_object in self.links
782
- if link_object.rel == 'self'
783
- if link_object.href != self.link ||
784
- (link_object.href =~ /xml/ ||
785
- link_object.href =~ /atom/ ||
786
- link_object.href =~ /feed/)
787
- @href = link_object.href
788
- @href_overridden = true
789
- @link = nil
790
- return @href
993
+ begin
994
+ links = FeedTools::GenericHelper.recursion_trap(:feed_href) do
995
+ self.links
996
+ end
997
+ link = FeedTools::GenericHelper.recursion_trap(:feed_href) do
998
+ self.link
999
+ end
1000
+ if links != nil
1001
+ for link_object in links
1002
+ if link_object.rel == 'self'
1003
+ if link_object.href != link ||
1004
+ (link_object.href =~ /xml/ ||
1005
+ link_object.href =~ /atom/ ||
1006
+ link_object.href =~ /feed/)
1007
+ @href = link_object.href
1008
+ @href_overridden = true
1009
+ @links = nil
1010
+ @link = nil
1011
+ return @href
1012
+ end
1013
+ end
791
1014
  end
792
1015
  end
1016
+ rescue Exception
793
1017
  end
1018
+ @links = nil
1019
+ @link = nil
794
1020
 
795
1021
  # rdf:about is ordered last because a lot of people put the url to
796
1022
  # the feed inside it instead of a link to their blog.
@@ -804,7 +1030,8 @@ module FeedTools
804
1030
  "feed/@resource",
805
1031
  "@rdf:about",
806
1032
  "@about",
807
- "newLocation/text()"
1033
+ "newLocation/text()",
1034
+ "atom10:link[@rel='self']/@href"
808
1035
  ], :select_result_value => true) do |result|
809
1036
  override_href.call(FeedTools::UriHelper.normalize_url(result))
810
1037
  end
@@ -831,6 +1058,7 @@ module FeedTools
831
1058
  @href_overridden = false
832
1059
  end
833
1060
  if @href_overridden == true
1061
+ @links = nil
834
1062
  @link = nil
835
1063
  end
836
1064
  end
@@ -858,7 +1086,7 @@ module FeedTools
858
1086
  "TITLE"
859
1087
  ])
860
1088
  @title = FeedTools::HtmlHelper.process_text_construct(title_node,
861
- self.feed_type, self.feed_version)
1089
+ self.feed_type, self.feed_version, [self.base_uri])
862
1090
  if self.feed_type == "atom" ||
863
1091
  self.configurations[:always_strip_wrapper_elements]
864
1092
  @title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
@@ -901,7 +1129,7 @@ module FeedTools
901
1129
  "info"
902
1130
  ])
903
1131
  @subtitle = FeedTools::HtmlHelper.process_text_construct(
904
- subtitle_node, self.feed_type, self.feed_version)
1132
+ subtitle_node, self.feed_type, self.feed_version, [self.base_uri])
905
1133
  if self.feed_type == "atom" ||
906
1134
  self.configurations[:always_strip_wrapper_elements]
907
1135
  @subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
@@ -1119,6 +1347,13 @@ module FeedTools
1119
1347
  "@href",
1120
1348
  "text()"
1121
1349
  ], :select_result_value => true)
1350
+ if link_object.href == "atom10:" ||
1351
+ link_object.href == "atom03:" ||
1352
+ link_object.href == "atom:"
1353
+ link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
1354
+ "@href"
1355
+ ], :select_result_value => true)
1356
+ end
1122
1357
  if link_object.href.nil? && link_node.base_uri != nil
1123
1358
  link_object.href = ""
1124
1359
  end
@@ -1143,6 +1378,13 @@ module FeedTools
1143
1378
  "@atom:hreflang",
1144
1379
  "@hreflang"
1145
1380
  ], :select_result_value => true)
1381
+ if link_object.hreflang == "atom10:" ||
1382
+ link_object.hreflang == "atom03:" ||
1383
+ link_object.hreflang == "atom:"
1384
+ link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
1385
+ "@hreflang"
1386
+ ], :select_result_value => true)
1387
+ end
1146
1388
  unless link_object.hreflang.nil?
1147
1389
  link_object.hreflang = link_object.hreflang.downcase
1148
1390
  end
@@ -1152,6 +1394,13 @@ module FeedTools
1152
1394
  "@atom:rel",
1153
1395
  "@rel"
1154
1396
  ], :select_result_value => true)
1397
+ if link_object.rel == "atom10:" ||
1398
+ link_object.rel == "atom03:" ||
1399
+ link_object.rel == "atom:"
1400
+ link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
1401
+ "@rel"
1402
+ ], :select_result_value => true)
1403
+ end
1155
1404
  unless link_object.rel.nil?
1156
1405
  link_object.rel = link_object.rel.downcase
1157
1406
  end
@@ -1164,6 +1413,13 @@ module FeedTools
1164
1413
  "@atom:type",
1165
1414
  "@type"
1166
1415
  ], :select_result_value => true)
1416
+ if link_object.type == "atom10:" ||
1417
+ link_object.type == "atom03:" ||
1418
+ link_object.type == "atom:"
1419
+ link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
1420
+ "@type"
1421
+ ], :select_result_value => true)
1422
+ end
1167
1423
  unless link_object.type.nil?
1168
1424
  link_object.type = link_object.type.downcase
1169
1425
  end
@@ -1174,6 +1430,13 @@ module FeedTools
1174
1430
  "@title",
1175
1431
  "text()"
1176
1432
  ], :select_result_value => true)
1433
+ if link_object.title == "atom10:" ||
1434
+ link_object.title == "atom03:" ||
1435
+ link_object.title == "atom:"
1436
+ link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
1437
+ "@title"
1438
+ ], :select_result_value => true)
1439
+ end
1177
1440
  # This catches the ambiguities between atom, rss, and cdf
1178
1441
  if link_object.title == link_object.href
1179
1442
  link_object.title = nil
@@ -1184,6 +1447,13 @@ module FeedTools
1184
1447
  "@atom:length",
1185
1448
  "@length"
1186
1449
  ], :select_result_value => true)
1450
+ if link_object.length == "atom10:" ||
1451
+ link_object.length == "atom03:" ||
1452
+ link_object.length == "atom:"
1453
+ link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
1454
+ "@length"
1455
+ ], :select_result_value => true)
1456
+ end
1187
1457
  if !link_object.length.nil?
1188
1458
  link_object.length = link_object.length.to_i
1189
1459
  else
@@ -1211,7 +1481,10 @@ module FeedTools
1211
1481
  def base_uri
1212
1482
  if @base_uri.nil?
1213
1483
  @base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
1214
- "@base"
1484
+ "@base",
1485
+ "base/@href",
1486
+ "base/text()",
1487
+ "@xml:base"
1215
1488
  ], :select_result_value => true)
1216
1489
  if @base_uri.blank?
1217
1490
  begin
@@ -1222,9 +1495,26 @@ module FeedTools
1222
1495
  rescue Exception
1223
1496
  end
1224
1497
  end
1498
+ if @base_uri.blank?
1499
+ @base_uri = FeedTools::XmlHelper.try_xpaths(self.root_node, [
1500
+ "@xml:base"
1501
+ ], :select_result_value => true)
1502
+ end
1225
1503
  if !@base_uri.blank?
1226
1504
  @base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
1227
1505
  end
1506
+ if !@base_uri.blank?
1507
+ parsed_uri = FeedTools::URI.parse(@base_uri)
1508
+ # Feedburner is almost never the base uri that was intended
1509
+ # Use the actual site instead
1510
+ if parsed_uri.host =~ /feedburner/
1511
+ site_uri =
1512
+ FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
1513
+ FeedTools::UriHelper.normalize_url(self.link)
1514
+ end
1515
+ @base_uri = site_uri if !site_uri.blank?
1516
+ end
1517
+ end
1228
1518
  end
1229
1519
  return @base_uri
1230
1520
  end
@@ -1425,6 +1715,12 @@ module FeedTools
1425
1715
  rescue
1426
1716
  end
1427
1717
  end
1718
+ if FeedTools::XmlHelper.try_xpaths(author_node,
1719
+ ["@gr:unknown-author"], :select_result_value => true) == "true"
1720
+ if @author.name == "(author unknown)"
1721
+ @author.name = nil
1722
+ end
1723
+ end
1428
1724
  end
1429
1725
  # Fallback on the itunes module if we didn't find an author name
1430
1726
  begin
@@ -1764,35 +2060,7 @@ module FeedTools
1764
2060
  end
1765
2061
  return @images
1766
2062
  end
1767
-
1768
- # Returns the feed's text input field
1769
- def text_input
1770
- if @text_input.nil?
1771
- @text_input = FeedTools::TextInput.new
1772
- text_input_node =
1773
- FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
1774
- unless text_input_node.nil?
1775
- @text_input.title =
1776
- FeedTools::XmlHelper.try_xpaths(text_input_node,
1777
- ["title/text()"],
1778
- :select_result_value => true)
1779
- @text_input.description =
1780
- FeedTools::XmlHelper.try_xpaths(text_input_node,
1781
- ["description/text()"],
1782
- :select_result_value => true)
1783
- @text_input.link =
1784
- FeedTools::XmlHelper.try_xpaths(text_input_node,
1785
- ["link/text()"],
1786
- :select_result_value => true)
1787
- @text_input.name =
1788
- FeedTools::XmlHelper.try_xpaths(text_input_node,
1789
- ["name/text()"],
1790
- :select_result_value => true)
1791
- end
1792
- end
1793
- return @text_input
1794
- end
1795
-
2063
+
1796
2064
  # Returns the feed's copyright information
1797
2065
  def rights
1798
2066
  if @rights.nil?
@@ -1807,7 +2075,7 @@ module FeedTools
1807
2075
  "rights"
1808
2076
  ])
1809
2077
  @rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
1810
- self.feed_type, self.feed_version)
2078
+ self.feed_type, self.feed_version, [self.base_uri])
1811
2079
  if self.feed_type == "atom" ||
1812
2080
  self.configurations[:always_strip_wrapper_elements]
1813
2081
  @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
@@ -1821,12 +2089,24 @@ module FeedTools
1821
2089
  @rights = new_rights
1822
2090
  end
1823
2091
 
1824
- def license #:nodoc:
1825
- raise "Not implemented yet."
2092
+ # Returns the first license link for the feed item.
2093
+ def license
2094
+ return self.licenses.first
2095
+ end
2096
+
2097
+ # Returns all licenses linked from this feed item.
2098
+ def licenses
2099
+ if @licenses.nil?
2100
+ @licenses = self.links.select do |link|
2101
+ link.rel == "license"
2102
+ end
2103
+ end
2104
+ return @licenses
1826
2105
  end
1827
2106
 
1828
- def license=(new_license) #:nodoc:
1829
- raise "Not implemented yet."
2107
+ # Sets the feed item's licenses.
2108
+ def licenses=(new_licenses)
2109
+ @licenses = new_licenses
1830
2110
  end
1831
2111
 
1832
2112
  # Returns the number of seconds before the feed should expire
@@ -1910,14 +2190,14 @@ module FeedTools
1910
2190
  @time_to_live = @time_to_live + update_frequency_seconds.to_i
1911
2191
  end
1912
2192
  if @time_to_live == 0
1913
- @time_to_live = 1.hour
2193
+ @time_to_live = self.configurations[:default_ttl].to_i
1914
2194
  end
1915
2195
  end
1916
2196
  end
1917
2197
  end
1918
2198
  if @time_to_live.nil? || @time_to_live == 0
1919
2199
  # Default to one hour
1920
- @time_to_live = 1.hour
2200
+ @time_to_live = self.configurations[:default_ttl].to_i
1921
2201
  elsif self.configurations[:max_ttl] != nil &&
1922
2202
  self.configurations[:max_ttl] != 0 &&
1923
2203
  @time_to_live >= self.configurations[:max_ttl].to_i
@@ -1930,7 +2210,7 @@ module FeedTools
1930
2210
  # Sets the feed time to live
1931
2211
  def time_to_live=(new_time_to_live)
1932
2212
  @time_to_live = new_time_to_live.round
1933
- @time_to_live = 1.hour if @time_to_live < 1.hour
2213
+ @time_to_live = 30.minutes if @time_to_live < 30.minutes
1934
2214
  end
1935
2215
 
1936
2216
  # Returns the feed's cloud
@@ -1966,6 +2246,34 @@ module FeedTools
1966
2246
  @cloud = new_cloud
1967
2247
  end
1968
2248
 
2249
+ # Returns the feed's text input field
2250
+ def text_input
2251
+ if @text_input.nil?
2252
+ @text_input = FeedTools::TextInput.new
2253
+ text_input_node =
2254
+ FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
2255
+ unless text_input_node.nil?
2256
+ @text_input.title =
2257
+ FeedTools::XmlHelper.try_xpaths(text_input_node,
2258
+ ["title/text()"],
2259
+ :select_result_value => true)
2260
+ @text_input.description =
2261
+ FeedTools::XmlHelper.try_xpaths(text_input_node,
2262
+ ["description/text()"],
2263
+ :select_result_value => true)
2264
+ @text_input.link =
2265
+ FeedTools::XmlHelper.try_xpaths(text_input_node,
2266
+ ["link/text()"],
2267
+ :select_result_value => true)
2268
+ @text_input.name =
2269
+ FeedTools::XmlHelper.try_xpaths(text_input_node,
2270
+ ["name/text()"],
2271
+ :select_result_value => true)
2272
+ end
2273
+ end
2274
+ return @text_input
2275
+ end
2276
+
1969
2277
  # Returns the feed generator
1970
2278
  def generator
1971
2279
  if @generator.nil?
@@ -2096,7 +2404,8 @@ module FeedTools
2096
2404
  "atom10:entry",
2097
2405
  "atom03:entry",
2098
2406
  "atom:entry",
2099
- "entry"
2407
+ "entry",
2408
+ "story"
2100
2409
  ]),
2101
2410
  FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
2102
2411
  "rss10:item",
@@ -2104,7 +2413,8 @@ module FeedTools
2104
2413
  "rss11:items/item",
2105
2414
  "items/rss11:item",
2106
2415
  "items/item",
2107
- "item"
2416
+ "item",
2417
+ "story"
2108
2418
  ])
2109
2419
  ])
2110
2420
 
@@ -2234,6 +2544,17 @@ module FeedTools
2234
2544
  def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil,
2235
2545
  xml_builder=Builder::XmlMarkup.new(
2236
2546
  :indent => 2, :escape_attrs => false))
2547
+
2548
+ if self.find_node("access:restriction/@relationship").to_s == "deny"
2549
+ raise StandardError,
2550
+ "Operation not permitted. This feed denies redistribution."
2551
+ elsif self.find_node("@indexing:index").to_s == "no"
2552
+ raise StandardError,
2553
+ "Operation not permitted. This feed denies redistribution."
2554
+ end
2555
+
2556
+ self.full_parse()
2557
+
2237
2558
  xml_builder.instruct! :xml, :version => "1.0",
2238
2559
  :encoding => (self.configurations[:output_encoding] or "utf-8")
2239
2560
  if feed_type.nil?
@@ -2392,6 +2713,9 @@ module FeedTools
2392
2713
  unless self.copyright.blank?
2393
2714
  xml_builder.copyright(self.copyright)
2394
2715
  end
2716
+ unless self.language.blank?
2717
+ xml_builder.language(self.language)
2718
+ end
2395
2719
  xml_builder.ttl((time_to_live / 1.minute).to_s)
2396
2720
  xml_builder.generator(
2397
2721
  self.configurations[:generator_href])
@@ -2467,6 +2791,8 @@ module FeedTools
2467
2791
  end
2468
2792
  elsif self.link != nil
2469
2793
  xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
2794
+ elsif self.url != nil
2795
+ xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.url))
2470
2796
  else
2471
2797
  raise "Cannot build feed, missing feed unique id."
2472
2798
  end