feedtools 0.2.26 → 0.2.27
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
data/db/schema.mysql.sql
CHANGED
data/db/schema.postgresql.sql
CHANGED
@@ -7,5 +7,7 @@
|
|
7
7
|
feed_data text default NULL,
|
8
8
|
feed_data_type varchar(20) default NULL,
|
9
9
|
http_headers text default NULL,
|
10
|
-
last_retrieved timestamp default NULL
|
10
|
+
last_retrieved timestamp default NULL,
|
11
|
+
time_to_live integer(10) default NULL,
|
12
|
+
serialized text default NULL
|
11
13
|
);
|
data/db/schema.sqlite.sql
CHANGED
data/lib/feed_tools.rb
CHANGED
@@ -33,6 +33,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
33
33
|
'development' # :nodoc:
|
34
34
|
|
35
35
|
FEED_TOOLS_NAMESPACES = {
|
36
|
+
"access" => "http://www.bloglines.com/about/specs/fac-1.0",
|
36
37
|
"admin" => "http://webns.net/mvcb/",
|
37
38
|
"ag" => "http://purl.org/rss/1.0/modules/aggregation/",
|
38
39
|
"annotate" => "http://purl.org/rss/1.0/modules/annotate/",
|
@@ -54,10 +55,13 @@ FEED_TOOLS_NAMESPACES = {
|
|
54
55
|
"ev" => "http://purl.org/rss/1.0/modules/event/",
|
55
56
|
"icbm" => "http://postneo.com/icbm/",
|
56
57
|
"image" => "http://purl.org/rss/1.0/modules/image/",
|
58
|
+
"indexing" => "urn:atom-extension:indexing",
|
57
59
|
"feedburner" => "http://rssnamespace.org/feedburner/ext/1.0",
|
58
60
|
"foaf" => "http://xmlns.com/foaf/0.1/",
|
59
61
|
"foo" => "http://hsivonen.iki.fi/FooML",
|
60
62
|
"fm" => "http://freshmeat.net/rss/fm/",
|
63
|
+
"gd" => "http://schemas.google.com/g/2005",
|
64
|
+
"gr" => "http://www.google.com/schemas/reader/atom/",
|
61
65
|
"itunes" => "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
62
66
|
"l" => "http://purl.org/rss/1.0/modules/link/",
|
63
67
|
"media" => "http://search.yahoo.com/mrss",
|
@@ -90,7 +94,8 @@ FEED_TOOLS_NAMESPACES = {
|
|
90
94
|
}
|
91
95
|
|
92
96
|
$:.unshift(File.dirname(__FILE__))
|
93
|
-
|
97
|
+
$: << (File.dirname(__FILE__) + "/feed_tools/vendor")
|
98
|
+
$: << (File.dirname(__FILE__) + "/feed_tools/vendor/html5/lib")
|
94
99
|
|
95
100
|
begin
|
96
101
|
require 'feed_tools/version'
|
@@ -104,8 +109,16 @@ begin
|
|
104
109
|
end
|
105
110
|
|
106
111
|
require 'rubygems'
|
107
|
-
|
108
|
-
|
112
|
+
|
113
|
+
if !defined?(Builder)
|
114
|
+
gem('builder', '>= 1.2.4')
|
115
|
+
require 'builder'
|
116
|
+
end
|
117
|
+
|
118
|
+
if !defined?(ActiveRecord)
|
119
|
+
gem('activerecord', '>= 1.11.1')
|
120
|
+
require 'active_record'
|
121
|
+
end
|
109
122
|
|
110
123
|
# Preload optional libraries.
|
111
124
|
begin
|
@@ -117,7 +130,15 @@ begin
|
|
117
130
|
rescue Object
|
118
131
|
end
|
119
132
|
|
120
|
-
require '
|
133
|
+
require 'html5'
|
134
|
+
require 'html5/html5parser'
|
135
|
+
require 'html5/liberalxmlparser'
|
136
|
+
require 'html5/treewalkers'
|
137
|
+
require 'html5/treebuilders'
|
138
|
+
require 'html5/serializer'
|
139
|
+
require 'html5/sanitizer'
|
140
|
+
|
141
|
+
require 'feed_tools/vendor/uri'
|
121
142
|
|
122
143
|
require 'net/http'
|
123
144
|
|
@@ -134,15 +155,8 @@ begin
|
|
134
155
|
require 'yaml'
|
135
156
|
require 'base64'
|
136
157
|
|
137
|
-
if !defined?(ActiveSupport)
|
138
|
-
require_gem('activesupport', '>= 1.1.1')
|
139
|
-
end
|
140
|
-
if !defined?(ActiveRecord)
|
141
|
-
require_gem('activerecord', '>= 1.11.1')
|
142
|
-
end
|
143
|
-
|
144
158
|
begin
|
145
|
-
|
159
|
+
gem('uuidtools', '>= 0.1.2')
|
146
160
|
rescue Gem::LoadError
|
147
161
|
begin
|
148
162
|
require 'uuidtools'
|
@@ -198,6 +212,10 @@ module FeedTools
|
|
198
212
|
:proxy_port => nil,
|
199
213
|
:proxy_user => nil,
|
200
214
|
:proxy_password => nil,
|
215
|
+
:auth_user => nil,
|
216
|
+
:auth_password => nil,
|
217
|
+
:auth_scheme => nil,
|
218
|
+
:http_timeout => nil,
|
201
219
|
:user_agent =>
|
202
220
|
"FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING} " +
|
203
221
|
"+http://www.sporkmonger.com/projects/feedtools/",
|
@@ -207,6 +225,8 @@ module FeedTools
|
|
207
225
|
"http://www.sporkmonger.com/projects/feedtools/",
|
208
226
|
:tidy_enabled => false,
|
209
227
|
:tidy_options => {},
|
228
|
+
:lazy_parsing_enabled => true,
|
229
|
+
:serialization_enabled => false,
|
210
230
|
:idn_enabled => true,
|
211
231
|
:sanitization_enabled => true,
|
212
232
|
:sanitize_with_nofollow => true,
|
@@ -217,6 +237,7 @@ module FeedTools
|
|
217
237
|
:strip_comment_count => false,
|
218
238
|
:tab_spaces => 2,
|
219
239
|
:max_ttl => 3.days.to_s,
|
240
|
+
:default_ttl => 1.hour.to_s,
|
220
241
|
:output_encoding => "utf-8"
|
221
242
|
}.merge(config_hash)
|
222
243
|
end
|
@@ -318,6 +339,7 @@ module FeedTools
|
|
318
339
|
FeedTools::GenericHelper.validate_options([ :multi_threaded ],
|
319
340
|
options.keys)
|
320
341
|
options = { :multi_threaded => false }.merge(options)
|
342
|
+
warn("FeedTools.build_merged_feed is deprecated.")
|
321
343
|
return nil if url_array.nil?
|
322
344
|
merged_feed = FeedTools::Feed.new
|
323
345
|
retrieved_feeds = []
|
@@ -339,12 +361,13 @@ module FeedTools
|
|
339
361
|
end
|
340
362
|
end
|
341
363
|
retrieved_feeds.each do |feed|
|
342
|
-
merged_feed.entries.concat(
|
364
|
+
merged_feed.entries = merged_feed.entries.concat(
|
343
365
|
feed.entries.collect do |entry|
|
344
366
|
new_entry = entry.dup
|
345
367
|
new_entry.title = "#{feed.title}: #{entry.title}"
|
346
368
|
new_entry
|
347
|
-
end
|
369
|
+
end
|
370
|
+
)
|
348
371
|
end
|
349
372
|
return merged_feed
|
350
373
|
end
|
@@ -51,6 +51,8 @@ module FeedTools
|
|
51
51
|
ActiveRecord::Base.default_timezone = :utc
|
52
52
|
ActiveRecord::Base.connection
|
53
53
|
rescue
|
54
|
+
end
|
55
|
+
if !ActiveRecord::Base.connected?
|
54
56
|
begin
|
55
57
|
possible_config_files = [
|
56
58
|
"./config/database.yml",
|
@@ -66,6 +68,7 @@ module FeedTools
|
|
66
68
|
for file in possible_config_files
|
67
69
|
if File.exists?(File.expand_path(file))
|
68
70
|
database_config_file = file
|
71
|
+
@config_path = database_config_file
|
69
72
|
break
|
70
73
|
end
|
71
74
|
end
|
@@ -84,6 +87,14 @@ module FeedTools
|
|
84
87
|
end
|
85
88
|
return nil
|
86
89
|
end
|
90
|
+
|
91
|
+
# Returns the path to the database.yml config file that FeedTools loaded.
|
92
|
+
def DatabaseFeedCache.config_path
|
93
|
+
if !defined?(@config_path) || @config_path.blank?
|
94
|
+
@config_path = nil
|
95
|
+
end
|
96
|
+
return @config_path
|
97
|
+
end
|
87
98
|
|
88
99
|
# Returns true if a connection to the database has been established and the
|
89
100
|
# required table structure is in place.
|
@@ -115,9 +126,9 @@ module FeedTools
|
|
115
126
|
# True if the appropriate database table already exists
|
116
127
|
def DatabaseFeedCache.table_exists?
|
117
128
|
begin
|
118
|
-
ActiveRecord::Base.connection.
|
129
|
+
ActiveRecord::Base.connection.select_one("select id, href, title, " +
|
119
130
|
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
120
|
-
"from #{self.table_name()}
|
131
|
+
"from #{self.table_name()}")
|
121
132
|
rescue ActiveRecord::StatementInvalid
|
122
133
|
return false
|
123
134
|
rescue
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -53,34 +53,112 @@ module FeedTools
|
|
53
53
|
@live = false
|
54
54
|
@encoding = nil
|
55
55
|
@options = nil
|
56
|
+
@version = FeedTools::FEED_TOOLS_VERSION::STRING
|
57
|
+
end
|
58
|
+
|
59
|
+
# Breaks any references that the feed may be keeping around, thus making
|
60
|
+
# the job of the garbage collector much, much easier. Call this
|
61
|
+
# method prior to feeds going out of scope to prevent memory leaks.
|
62
|
+
def dispose()
|
63
|
+
self.entries.each do |entry|
|
64
|
+
entry.instance_variable_set("@root_node", nil)
|
65
|
+
entry.instance_variable_set("@feed", nil)
|
66
|
+
entry.instance_variable_set("@parent_feed", nil)
|
67
|
+
entry.dispose if entry.respond_to?(:dispose)
|
68
|
+
end
|
69
|
+
self.entries = []
|
70
|
+
|
71
|
+
@cache_object = nil
|
72
|
+
@http_headers = nil
|
73
|
+
@xml_document = nil
|
74
|
+
@feed_data = nil
|
75
|
+
@feed_data_type = nil
|
76
|
+
@root_node = nil
|
77
|
+
@channel_node = nil
|
78
|
+
@href = nil
|
79
|
+
@id = nil
|
80
|
+
@title = nil
|
81
|
+
@subtitle = nil
|
82
|
+
@link = nil
|
83
|
+
@last_retrieved = nil
|
84
|
+
@time_to_live = nil
|
85
|
+
@entries = nil
|
86
|
+
@live = false
|
87
|
+
@encoding = nil
|
88
|
+
@options = nil
|
89
|
+
|
90
|
+
GC.start()
|
91
|
+
self
|
56
92
|
end
|
57
93
|
|
58
94
|
# Loads the feed specified by the url, pulling the data from the
|
59
95
|
# cache if it hasn't expired. Options supplied will override the
|
60
96
|
# default options.
|
61
|
-
def Feed.open(
|
97
|
+
def Feed.open(href, options={})
|
62
98
|
FeedTools::GenericHelper.validate_options(
|
63
99
|
FeedTools.configurations.keys, options.keys)
|
64
100
|
|
65
|
-
#
|
66
|
-
|
101
|
+
# clean up the url
|
102
|
+
href = FeedTools::UriHelper.normalize_url(href)
|
67
103
|
|
68
|
-
|
104
|
+
feed_configurations = FeedTools.configurations.merge(options)
|
105
|
+
cache_object = nil
|
106
|
+
deserialized_feed = nil
|
69
107
|
|
70
|
-
if
|
108
|
+
if feed_configurations[:feed_cache] != nil && FeedTools.feed_cache.nil?
|
71
109
|
raise(ArgumentError, "There is currently no caching mechanism set. " +
|
72
110
|
"Cannot retrieve cached feeds.")
|
111
|
+
elsif feed_configurations[:serialization_enabled] == true
|
112
|
+
# We've got a caching mechanism available
|
113
|
+
cache_object = FeedTools.feed_cache.find_by_href(href)
|
114
|
+
begin
|
115
|
+
if cache_object != nil && cache_object.serialized != nil
|
116
|
+
# If we've got a cache hit, deserialize
|
117
|
+
expired = true
|
118
|
+
if cache_object.time_to_live == nil
|
119
|
+
cache_object.time_to_live =
|
120
|
+
feed_configurations[:default_ttl].to_i
|
121
|
+
cache_object.save
|
122
|
+
end
|
123
|
+
if (cache_object.last_retrieved == nil)
|
124
|
+
expired = true
|
125
|
+
elsif (cache_object.time_to_live < 30.minutes)
|
126
|
+
expired =
|
127
|
+
(cache_object.last_retrieved + 30.minutes) < Time.now.gmtime
|
128
|
+
else
|
129
|
+
expired =
|
130
|
+
(cache_object.last_retrieved + cache_object.time_to_live) <
|
131
|
+
Time.now.gmtime
|
132
|
+
end
|
133
|
+
if !expired
|
134
|
+
require 'yaml'
|
135
|
+
deserialized_feed = YAML.load(cache_object.serialized)
|
136
|
+
deserialized_feed.cache_object = cache_object
|
137
|
+
Thread.pass
|
138
|
+
end
|
139
|
+
end
|
140
|
+
rescue Exception
|
141
|
+
end
|
73
142
|
end
|
74
143
|
|
75
|
-
|
76
|
-
|
144
|
+
if deserialized_feed == nil
|
145
|
+
# create the new feed
|
146
|
+
feed = FeedTools::Feed.new
|
147
|
+
|
148
|
+
feed.configurations = feed_configurations
|
77
149
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
150
|
+
# load the new feed
|
151
|
+
feed.href = href
|
152
|
+
if cache_object != nil
|
153
|
+
feed.cache_object = cache_object
|
154
|
+
end
|
155
|
+
feed.update! unless feed.configurations[:disable_update_from_remote]
|
156
|
+
Thread.pass
|
82
157
|
|
83
|
-
|
158
|
+
return feed
|
159
|
+
else
|
160
|
+
return deserialized_feed
|
161
|
+
end
|
84
162
|
end
|
85
163
|
|
86
164
|
# Returns the load options for this feed.
|
@@ -99,10 +177,9 @@ module FeedTools
|
|
99
177
|
# Loads the feed from the remote url if the feed has expired from the
|
100
178
|
# cache or cannot be retrieved from the cache for some reason.
|
101
179
|
def update!
|
102
|
-
if
|
103
|
-
|
104
|
-
|
105
|
-
end
|
180
|
+
# Don't do anything if this option is set
|
181
|
+
return if self.configurations[:disable_update_from_remote]
|
182
|
+
|
106
183
|
if !FeedTools.feed_cache.nil? &&
|
107
184
|
!FeedTools.feed_cache.set_up_correctly?
|
108
185
|
FeedTools.feed_cache.initialize_cache()
|
@@ -129,25 +206,23 @@ module FeedTools
|
|
129
206
|
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
130
207
|
|
131
208
|
autodiscovered_url = nil
|
132
|
-
|
133
|
-
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
134
|
-
"application/atom+xml")
|
135
|
-
if autodiscovered_url.nil?
|
209
|
+
['atom', 'rss', 'rdf'].each do |type|
|
136
210
|
autodiscovered_url =
|
137
211
|
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
138
|
-
"application
|
212
|
+
"application/#{type}+xml")
|
213
|
+
break unless autodiscovered_url.nil?
|
139
214
|
end
|
140
|
-
|
141
|
-
|
142
|
-
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
143
|
-
"application/rdf+xml")
|
144
|
-
end
|
145
|
-
unless autodiscovered_url.nil?
|
215
|
+
|
216
|
+
if autodiscovered_url != nil
|
146
217
|
begin
|
147
218
|
autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri(
|
148
219
|
autodiscovered_url, [self.href])
|
149
220
|
rescue Exception
|
150
221
|
end
|
222
|
+
if self.href == autodiscovered_url
|
223
|
+
raise FeedAccessError,
|
224
|
+
"Autodiscovery loop detected: #{autodiscovered_url}"
|
225
|
+
end
|
151
226
|
self.feed_data = nil
|
152
227
|
self.href = autodiscovered_url
|
153
228
|
if FeedTools.feed_cache.nil?
|
@@ -157,12 +232,24 @@ module FeedTools
|
|
157
232
|
FeedTools.feed_cache.find_by_href(autodiscovered_url)
|
158
233
|
end
|
159
234
|
self.update!
|
235
|
+
else
|
236
|
+
html_body = FeedTools::XmlHelper.try_xpaths(self.xml_document, [
|
237
|
+
"html/body"
|
238
|
+
])
|
239
|
+
if html_body != nil
|
240
|
+
raise FeedAccessError,
|
241
|
+
"#{self.href} does not appear to be a feed."
|
242
|
+
end
|
160
243
|
end
|
161
244
|
else
|
162
245
|
ugly_redirect = FeedTools::XmlHelper.try_xpaths(self.xml_document, [
|
163
246
|
"redirect/newLocation/text()"
|
164
247
|
], :select_result_value => true)
|
165
248
|
if !ugly_redirect.blank?
|
249
|
+
if self.href == ugly_redirect
|
250
|
+
raise FeedAccessError,
|
251
|
+
"Ugly redirect loop detected: #{ugly_redirect}"
|
252
|
+
end
|
166
253
|
self.feed_data = nil
|
167
254
|
self.href = ugly_redirect
|
168
255
|
if FeedTools.feed_cache.nil?
|
@@ -187,6 +274,10 @@ module FeedTools
|
|
187
274
|
@link = nil
|
188
275
|
@time_to_live = nil
|
189
276
|
@entries = nil
|
277
|
+
|
278
|
+
if self.configurations[:lazy_parsing_enabled] == false
|
279
|
+
self.full_parse()
|
280
|
+
end
|
190
281
|
end
|
191
282
|
end
|
192
283
|
|
@@ -338,7 +429,101 @@ module FeedTools
|
|
338
429
|
end
|
339
430
|
end
|
340
431
|
end
|
432
|
+
|
433
|
+
# Does a full parse of the feed.
|
434
|
+
def full_parse
|
435
|
+
self.href
|
436
|
+
|
437
|
+
self.cache_object
|
341
438
|
|
439
|
+
self.http_headers
|
440
|
+
self.encoding
|
441
|
+
self.feed_data_utf_8
|
442
|
+
self.xml_document
|
443
|
+
self.root_node
|
444
|
+
self.channel_node
|
445
|
+
|
446
|
+
self.base_uri
|
447
|
+
self.feed_type
|
448
|
+
self.feed_version
|
449
|
+
|
450
|
+
self.entries
|
451
|
+
|
452
|
+
self.id
|
453
|
+
self.title
|
454
|
+
self.subtitle
|
455
|
+
self.links
|
456
|
+
self.link
|
457
|
+
self.icon
|
458
|
+
self.favicon
|
459
|
+
self.author
|
460
|
+
self.publisher
|
461
|
+
self.time
|
462
|
+
self.updated
|
463
|
+
self.published
|
464
|
+
self.categories
|
465
|
+
self.images
|
466
|
+
self.rights
|
467
|
+
self.time_to_live
|
468
|
+
self.generator
|
469
|
+
self.language
|
470
|
+
|
471
|
+
self.docs
|
472
|
+
self.text_input
|
473
|
+
self.cloud
|
474
|
+
|
475
|
+
self.itunes_summary
|
476
|
+
self.itunes_subtitle
|
477
|
+
self.itunes_author
|
478
|
+
|
479
|
+
self.media_text
|
480
|
+
|
481
|
+
self.explicit?
|
482
|
+
|
483
|
+
self.entries.each do |entry|
|
484
|
+
entry.full_parse()
|
485
|
+
end
|
486
|
+
|
487
|
+
nil
|
488
|
+
end
|
489
|
+
|
490
|
+
# Does a full parse, then serializes the feed object directly to the
|
491
|
+
# cache.
|
492
|
+
def serialize_to_cache
|
493
|
+
@cache_object = nil
|
494
|
+
require 'yaml'
|
495
|
+
serialized_feed = YAML.dump(self.serializable)
|
496
|
+
if self.cache_object != nil
|
497
|
+
begin
|
498
|
+
self.cache_object.serialized = serialized_feed
|
499
|
+
self.cache_object.save
|
500
|
+
rescue Exception
|
501
|
+
end
|
502
|
+
end
|
503
|
+
return nil
|
504
|
+
end
|
505
|
+
|
506
|
+
# Returns a duplicate object suitable for serialization
|
507
|
+
def serializable
|
508
|
+
self.full_parse()
|
509
|
+
entries_to_dump = self.entries
|
510
|
+
# This prevents errors due to temporarily having feed items with
|
511
|
+
# multiple parent feeds.
|
512
|
+
self.entries = []
|
513
|
+
feed_to_dump = self.dup
|
514
|
+
feed_to_dump.instance_variable_set("@xml_document", nil)
|
515
|
+
feed_to_dump.instance_variable_set("@root_node", nil)
|
516
|
+
feed_to_dump.instance_variable_set("@channel_node", nil)
|
517
|
+
feed_to_dump.entries = entries_to_dump.collect do |entry|
|
518
|
+
entry.serializable
|
519
|
+
end
|
520
|
+
self.entries = entries_to_dump
|
521
|
+
feed_to_dump.entries.each do |entry|
|
522
|
+
entry.instance_variable_set("@root_node", nil)
|
523
|
+
end
|
524
|
+
return feed_to_dump
|
525
|
+
end
|
526
|
+
|
342
527
|
# Returns the relevant information from an http request.
|
343
528
|
def http_response
|
344
529
|
return @http_response
|
@@ -360,8 +545,12 @@ module FeedTools
|
|
360
545
|
# Returns the encoding that the feed was parsed with
|
361
546
|
def encoding
|
362
547
|
if @encoding.blank?
|
363
|
-
|
364
|
-
|
548
|
+
if !self.http_headers.blank?
|
549
|
+
if self.http_headers['content-type'] =~ /charset=([\w\d-]+)/
|
550
|
+
@encoding = $1.downcase
|
551
|
+
else
|
552
|
+
@encoding = self.encoding_from_feed_data
|
553
|
+
end
|
365
554
|
else
|
366
555
|
@encoding = self.encoding_from_feed_data
|
367
556
|
end
|
@@ -377,7 +566,7 @@ module FeedTools
|
|
377
566
|
return nil if raw_data.nil?
|
378
567
|
encoding_from_xml_instruct =
|
379
568
|
raw_data.scan(
|
380
|
-
/^<\?xml [^>]*encoding="([
|
569
|
+
/^<\?xml [^>]*encoding="([^\"]*)"[^>]*\?>/
|
381
570
|
).flatten.first
|
382
571
|
unless encoding_from_xml_instruct.blank?
|
383
572
|
encoding_from_xml_instruct.downcase!
|
@@ -446,11 +635,26 @@ module FeedTools
|
|
446
635
|
if FeedTools.feed_cache.nil?
|
447
636
|
self.cache_object = nil
|
448
637
|
else
|
449
|
-
|
450
|
-
|
638
|
+
begin
|
639
|
+
self.cache_object =
|
640
|
+
FeedTools.feed_cache.find_by_href(ugly_redirect)
|
641
|
+
rescue RuntimeError => error
|
642
|
+
if error.message =~ /sorry, too many clients already/
|
643
|
+
warn("There are too many connections to the database open.")
|
644
|
+
end
|
645
|
+
raise error
|
646
|
+
end
|
451
647
|
end
|
452
648
|
self.update!
|
453
649
|
end
|
650
|
+
|
651
|
+
# Get these things parsed in the correct order to avoid the retardedly
|
652
|
+
# painful corecursion issues.
|
653
|
+
self.href
|
654
|
+
@links = nil
|
655
|
+
@link = nil
|
656
|
+
self.links
|
657
|
+
self.link
|
454
658
|
end
|
455
659
|
|
456
660
|
# Returns the feed's raw data as utf-8.
|
@@ -462,11 +666,11 @@ module FeedTools
|
|
462
666
|
else
|
463
667
|
use_encoding = force_encoding
|
464
668
|
end
|
465
|
-
if use_encoding != "utf-8"
|
669
|
+
if use_encoding != "utf-8" && use_encoding != nil && raw_data != nil
|
466
670
|
begin
|
467
671
|
@feed_data_utf_8 =
|
468
672
|
Iconv.new('utf-8', use_encoding).iconv(raw_data)
|
469
|
-
rescue
|
673
|
+
rescue Exception => error
|
470
674
|
return raw_data
|
471
675
|
end
|
472
676
|
else
|
@@ -495,24 +699,23 @@ module FeedTools
|
|
495
699
|
unless self.cache_object.nil?
|
496
700
|
self.cache_object.feed_data_type = new_feed_data_type
|
497
701
|
end
|
702
|
+
if self.feed_data_type != :xml
|
703
|
+
@xml_document = nil
|
704
|
+
end
|
498
705
|
end
|
499
|
-
|
706
|
+
|
500
707
|
# Returns a REXML Document of the feed_data
|
501
708
|
def xml_document
|
502
|
-
if
|
503
|
-
|
504
|
-
|
505
|
-
|
709
|
+
if @xml_document.nil?
|
710
|
+
return nil if self.feed_data.blank?
|
711
|
+
if self.feed_data_type != :xml
|
712
|
+
@xml_document = nil
|
713
|
+
else
|
506
714
|
begin
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
@xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
|
512
|
-
end
|
513
|
-
rescue Object
|
514
|
-
@xml_document = nil
|
515
|
-
raise
|
715
|
+
@xml_document = REXML::Document.new(self.feed_data_utf_8)
|
716
|
+
rescue Exception
|
717
|
+
# Something failed, attempt to repair the xml with htree.
|
718
|
+
@xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
|
516
719
|
end
|
517
720
|
end
|
518
721
|
end
|
@@ -551,7 +754,7 @@ module FeedTools
|
|
551
754
|
else
|
552
755
|
@root_node = self.xml_document.root
|
553
756
|
end
|
554
|
-
rescue
|
757
|
+
rescue Exception
|
555
758
|
return nil
|
556
759
|
end
|
557
760
|
end
|
@@ -564,7 +767,8 @@ module FeedTools
|
|
564
767
|
@channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
565
768
|
"channel",
|
566
769
|
"CHANNEL",
|
567
|
-
"feedinfo"
|
770
|
+
"feedinfo",
|
771
|
+
"news"
|
568
772
|
])
|
569
773
|
if @channel_node == nil
|
570
774
|
@channel_node = self.root_node
|
@@ -584,9 +788,17 @@ module FeedTools
|
|
584
788
|
if @href != nil
|
585
789
|
begin
|
586
790
|
@cache_object = FeedTools.feed_cache.find_by_href(@href)
|
587
|
-
rescue
|
791
|
+
rescue RuntimeError => error
|
792
|
+
if error.message =~ /sorry, too many clients already/
|
793
|
+
warn("There are too many connections to the database open.")
|
794
|
+
raise error
|
795
|
+
else
|
796
|
+
raise error
|
797
|
+
end
|
798
|
+
rescue => error
|
588
799
|
warn("The feed cache seems to be having trouble with the " +
|
589
800
|
"find_by_href method. This may cause unexpected results.")
|
801
|
+
raise error
|
590
802
|
end
|
591
803
|
end
|
592
804
|
if @cache_object.nil?
|
@@ -778,19 +990,33 @@ module FeedTools
|
|
778
990
|
end
|
779
991
|
end
|
780
992
|
if override_href.call(@href) && self.feed_data != nil
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
993
|
+
begin
|
994
|
+
links = FeedTools::GenericHelper.recursion_trap(:feed_href) do
|
995
|
+
self.links
|
996
|
+
end
|
997
|
+
link = FeedTools::GenericHelper.recursion_trap(:feed_href) do
|
998
|
+
self.link
|
999
|
+
end
|
1000
|
+
if links != nil
|
1001
|
+
for link_object in links
|
1002
|
+
if link_object.rel == 'self'
|
1003
|
+
if link_object.href != link ||
|
1004
|
+
(link_object.href =~ /xml/ ||
|
1005
|
+
link_object.href =~ /atom/ ||
|
1006
|
+
link_object.href =~ /feed/)
|
1007
|
+
@href = link_object.href
|
1008
|
+
@href_overridden = true
|
1009
|
+
@links = nil
|
1010
|
+
@link = nil
|
1011
|
+
return @href
|
1012
|
+
end
|
1013
|
+
end
|
791
1014
|
end
|
792
1015
|
end
|
1016
|
+
rescue Exception
|
793
1017
|
end
|
1018
|
+
@links = nil
|
1019
|
+
@link = nil
|
794
1020
|
|
795
1021
|
# rdf:about is ordered last because a lot of people put the url to
|
796
1022
|
# the feed inside it instead of a link to their blog.
|
@@ -804,7 +1030,8 @@ module FeedTools
|
|
804
1030
|
"feed/@resource",
|
805
1031
|
"@rdf:about",
|
806
1032
|
"@about",
|
807
|
-
"newLocation/text()"
|
1033
|
+
"newLocation/text()",
|
1034
|
+
"atom10:link[@rel='self']/@href"
|
808
1035
|
], :select_result_value => true) do |result|
|
809
1036
|
override_href.call(FeedTools::UriHelper.normalize_url(result))
|
810
1037
|
end
|
@@ -831,6 +1058,7 @@ module FeedTools
|
|
831
1058
|
@href_overridden = false
|
832
1059
|
end
|
833
1060
|
if @href_overridden == true
|
1061
|
+
@links = nil
|
834
1062
|
@link = nil
|
835
1063
|
end
|
836
1064
|
end
|
@@ -858,7 +1086,7 @@ module FeedTools
|
|
858
1086
|
"TITLE"
|
859
1087
|
])
|
860
1088
|
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
861
|
-
self.feed_type, self.feed_version)
|
1089
|
+
self.feed_type, self.feed_version, [self.base_uri])
|
862
1090
|
if self.feed_type == "atom" ||
|
863
1091
|
self.configurations[:always_strip_wrapper_elements]
|
864
1092
|
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
@@ -901,7 +1129,7 @@ module FeedTools
|
|
901
1129
|
"info"
|
902
1130
|
])
|
903
1131
|
@subtitle = FeedTools::HtmlHelper.process_text_construct(
|
904
|
-
subtitle_node, self.feed_type, self.feed_version)
|
1132
|
+
subtitle_node, self.feed_type, self.feed_version, [self.base_uri])
|
905
1133
|
if self.feed_type == "atom" ||
|
906
1134
|
self.configurations[:always_strip_wrapper_elements]
|
907
1135
|
@subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
|
@@ -1119,6 +1347,13 @@ module FeedTools
|
|
1119
1347
|
"@href",
|
1120
1348
|
"text()"
|
1121
1349
|
], :select_result_value => true)
|
1350
|
+
if link_object.href == "atom10:" ||
|
1351
|
+
link_object.href == "atom03:" ||
|
1352
|
+
link_object.href == "atom:"
|
1353
|
+
link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1354
|
+
"@href"
|
1355
|
+
], :select_result_value => true)
|
1356
|
+
end
|
1122
1357
|
if link_object.href.nil? && link_node.base_uri != nil
|
1123
1358
|
link_object.href = ""
|
1124
1359
|
end
|
@@ -1143,6 +1378,13 @@ module FeedTools
|
|
1143
1378
|
"@atom:hreflang",
|
1144
1379
|
"@hreflang"
|
1145
1380
|
], :select_result_value => true)
|
1381
|
+
if link_object.hreflang == "atom10:" ||
|
1382
|
+
link_object.hreflang == "atom03:" ||
|
1383
|
+
link_object.hreflang == "atom:"
|
1384
|
+
link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1385
|
+
"@hreflang"
|
1386
|
+
], :select_result_value => true)
|
1387
|
+
end
|
1146
1388
|
unless link_object.hreflang.nil?
|
1147
1389
|
link_object.hreflang = link_object.hreflang.downcase
|
1148
1390
|
end
|
@@ -1152,6 +1394,13 @@ module FeedTools
|
|
1152
1394
|
"@atom:rel",
|
1153
1395
|
"@rel"
|
1154
1396
|
], :select_result_value => true)
|
1397
|
+
if link_object.rel == "atom10:" ||
|
1398
|
+
link_object.rel == "atom03:" ||
|
1399
|
+
link_object.rel == "atom:"
|
1400
|
+
link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1401
|
+
"@rel"
|
1402
|
+
], :select_result_value => true)
|
1403
|
+
end
|
1155
1404
|
unless link_object.rel.nil?
|
1156
1405
|
link_object.rel = link_object.rel.downcase
|
1157
1406
|
end
|
@@ -1164,6 +1413,13 @@ module FeedTools
|
|
1164
1413
|
"@atom:type",
|
1165
1414
|
"@type"
|
1166
1415
|
], :select_result_value => true)
|
1416
|
+
if link_object.type == "atom10:" ||
|
1417
|
+
link_object.type == "atom03:" ||
|
1418
|
+
link_object.type == "atom:"
|
1419
|
+
link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1420
|
+
"@type"
|
1421
|
+
], :select_result_value => true)
|
1422
|
+
end
|
1167
1423
|
unless link_object.type.nil?
|
1168
1424
|
link_object.type = link_object.type.downcase
|
1169
1425
|
end
|
@@ -1174,6 +1430,13 @@ module FeedTools
|
|
1174
1430
|
"@title",
|
1175
1431
|
"text()"
|
1176
1432
|
], :select_result_value => true)
|
1433
|
+
if link_object.title == "atom10:" ||
|
1434
|
+
link_object.title == "atom03:" ||
|
1435
|
+
link_object.title == "atom:"
|
1436
|
+
link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1437
|
+
"@title"
|
1438
|
+
], :select_result_value => true)
|
1439
|
+
end
|
1177
1440
|
# This catches the ambiguities between atom, rss, and cdf
|
1178
1441
|
if link_object.title == link_object.href
|
1179
1442
|
link_object.title = nil
|
@@ -1184,6 +1447,13 @@ module FeedTools
|
|
1184
1447
|
"@atom:length",
|
1185
1448
|
"@length"
|
1186
1449
|
], :select_result_value => true)
|
1450
|
+
if link_object.length == "atom10:" ||
|
1451
|
+
link_object.length == "atom03:" ||
|
1452
|
+
link_object.length == "atom:"
|
1453
|
+
link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1454
|
+
"@length"
|
1455
|
+
], :select_result_value => true)
|
1456
|
+
end
|
1187
1457
|
if !link_object.length.nil?
|
1188
1458
|
link_object.length = link_object.length.to_i
|
1189
1459
|
else
|
@@ -1211,7 +1481,10 @@ module FeedTools
|
|
1211
1481
|
def base_uri
|
1212
1482
|
if @base_uri.nil?
|
1213
1483
|
@base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1214
|
-
"@base"
|
1484
|
+
"@base",
|
1485
|
+
"base/@href",
|
1486
|
+
"base/text()",
|
1487
|
+
"@xml:base"
|
1215
1488
|
], :select_result_value => true)
|
1216
1489
|
if @base_uri.blank?
|
1217
1490
|
begin
|
@@ -1222,9 +1495,26 @@ module FeedTools
|
|
1222
1495
|
rescue Exception
|
1223
1496
|
end
|
1224
1497
|
end
|
1498
|
+
if @base_uri.blank?
|
1499
|
+
@base_uri = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1500
|
+
"@xml:base"
|
1501
|
+
], :select_result_value => true)
|
1502
|
+
end
|
1225
1503
|
if !@base_uri.blank?
|
1226
1504
|
@base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
|
1227
1505
|
end
|
1506
|
+
if !@base_uri.blank?
|
1507
|
+
parsed_uri = FeedTools::URI.parse(@base_uri)
|
1508
|
+
# Feedburner is almost never the base uri that was intended
|
1509
|
+
# Use the actual site instead
|
1510
|
+
if parsed_uri.host =~ /feedburner/
|
1511
|
+
site_uri =
|
1512
|
+
FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
|
1513
|
+
FeedTools::UriHelper.normalize_url(self.link)
|
1514
|
+
end
|
1515
|
+
@base_uri = site_uri if !site_uri.blank?
|
1516
|
+
end
|
1517
|
+
end
|
1228
1518
|
end
|
1229
1519
|
return @base_uri
|
1230
1520
|
end
|
@@ -1425,6 +1715,12 @@ module FeedTools
|
|
1425
1715
|
rescue
|
1426
1716
|
end
|
1427
1717
|
end
|
1718
|
+
if FeedTools::XmlHelper.try_xpaths(author_node,
|
1719
|
+
["@gr:unknown-author"], :select_result_value => true) == "true"
|
1720
|
+
if @author.name == "(author unknown)"
|
1721
|
+
@author.name = nil
|
1722
|
+
end
|
1723
|
+
end
|
1428
1724
|
end
|
1429
1725
|
# Fallback on the itunes module if we didn't find an author name
|
1430
1726
|
begin
|
@@ -1764,35 +2060,7 @@ module FeedTools
|
|
1764
2060
|
end
|
1765
2061
|
return @images
|
1766
2062
|
end
|
1767
|
-
|
1768
|
-
# Returns the feed's text input field
|
1769
|
-
def text_input
|
1770
|
-
if @text_input.nil?
|
1771
|
-
@text_input = FeedTools::TextInput.new
|
1772
|
-
text_input_node =
|
1773
|
-
FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
|
1774
|
-
unless text_input_node.nil?
|
1775
|
-
@text_input.title =
|
1776
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1777
|
-
["title/text()"],
|
1778
|
-
:select_result_value => true)
|
1779
|
-
@text_input.description =
|
1780
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1781
|
-
["description/text()"],
|
1782
|
-
:select_result_value => true)
|
1783
|
-
@text_input.link =
|
1784
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1785
|
-
["link/text()"],
|
1786
|
-
:select_result_value => true)
|
1787
|
-
@text_input.name =
|
1788
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1789
|
-
["name/text()"],
|
1790
|
-
:select_result_value => true)
|
1791
|
-
end
|
1792
|
-
end
|
1793
|
-
return @text_input
|
1794
|
-
end
|
1795
|
-
|
2063
|
+
|
1796
2064
|
# Returns the feed's copyright information
|
1797
2065
|
def rights
|
1798
2066
|
if @rights.nil?
|
@@ -1807,7 +2075,7 @@ module FeedTools
|
|
1807
2075
|
"rights"
|
1808
2076
|
])
|
1809
2077
|
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
1810
|
-
self.feed_type, self.feed_version)
|
2078
|
+
self.feed_type, self.feed_version, [self.base_uri])
|
1811
2079
|
if self.feed_type == "atom" ||
|
1812
2080
|
self.configurations[:always_strip_wrapper_elements]
|
1813
2081
|
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
@@ -1821,12 +2089,24 @@ module FeedTools
|
|
1821
2089
|
@rights = new_rights
|
1822
2090
|
end
|
1823
2091
|
|
1824
|
-
|
1825
|
-
|
2092
|
+
# Returns the first license link for the feed item.
|
2093
|
+
def license
|
2094
|
+
return self.licenses.first
|
2095
|
+
end
|
2096
|
+
|
2097
|
+
# Returns all licenses linked from this feed item.
|
2098
|
+
def licenses
|
2099
|
+
if @licenses.nil?
|
2100
|
+
@licenses = self.links.select do |link|
|
2101
|
+
link.rel == "license"
|
2102
|
+
end
|
2103
|
+
end
|
2104
|
+
return @licenses
|
1826
2105
|
end
|
1827
2106
|
|
1828
|
-
|
1829
|
-
|
2107
|
+
# Sets the feed item's licenses.
|
2108
|
+
def licenses=(new_licenses)
|
2109
|
+
@licenses = new_licenses
|
1830
2110
|
end
|
1831
2111
|
|
1832
2112
|
# Returns the number of seconds before the feed should expire
|
@@ -1910,14 +2190,14 @@ module FeedTools
|
|
1910
2190
|
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1911
2191
|
end
|
1912
2192
|
if @time_to_live == 0
|
1913
|
-
@time_to_live =
|
2193
|
+
@time_to_live = self.configurations[:default_ttl].to_i
|
1914
2194
|
end
|
1915
2195
|
end
|
1916
2196
|
end
|
1917
2197
|
end
|
1918
2198
|
if @time_to_live.nil? || @time_to_live == 0
|
1919
2199
|
# Default to one hour
|
1920
|
-
@time_to_live =
|
2200
|
+
@time_to_live = self.configurations[:default_ttl].to_i
|
1921
2201
|
elsif self.configurations[:max_ttl] != nil &&
|
1922
2202
|
self.configurations[:max_ttl] != 0 &&
|
1923
2203
|
@time_to_live >= self.configurations[:max_ttl].to_i
|
@@ -1930,7 +2210,7 @@ module FeedTools
|
|
1930
2210
|
# Sets the feed time to live
|
1931
2211
|
def time_to_live=(new_time_to_live)
|
1932
2212
|
@time_to_live = new_time_to_live.round
|
1933
|
-
@time_to_live =
|
2213
|
+
@time_to_live = 30.minutes if @time_to_live < 30.minutes
|
1934
2214
|
end
|
1935
2215
|
|
1936
2216
|
# Returns the feed's cloud
|
@@ -1966,6 +2246,34 @@ module FeedTools
|
|
1966
2246
|
@cloud = new_cloud
|
1967
2247
|
end
|
1968
2248
|
|
2249
|
+
# Returns the feed's text input field
|
2250
|
+
def text_input
|
2251
|
+
if @text_input.nil?
|
2252
|
+
@text_input = FeedTools::TextInput.new
|
2253
|
+
text_input_node =
|
2254
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
|
2255
|
+
unless text_input_node.nil?
|
2256
|
+
@text_input.title =
|
2257
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
2258
|
+
["title/text()"],
|
2259
|
+
:select_result_value => true)
|
2260
|
+
@text_input.description =
|
2261
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
2262
|
+
["description/text()"],
|
2263
|
+
:select_result_value => true)
|
2264
|
+
@text_input.link =
|
2265
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
2266
|
+
["link/text()"],
|
2267
|
+
:select_result_value => true)
|
2268
|
+
@text_input.name =
|
2269
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
2270
|
+
["name/text()"],
|
2271
|
+
:select_result_value => true)
|
2272
|
+
end
|
2273
|
+
end
|
2274
|
+
return @text_input
|
2275
|
+
end
|
2276
|
+
|
1969
2277
|
# Returns the feed generator
|
1970
2278
|
def generator
|
1971
2279
|
if @generator.nil?
|
@@ -2096,7 +2404,8 @@ module FeedTools
|
|
2096
2404
|
"atom10:entry",
|
2097
2405
|
"atom03:entry",
|
2098
2406
|
"atom:entry",
|
2099
|
-
"entry"
|
2407
|
+
"entry",
|
2408
|
+
"story"
|
2100
2409
|
]),
|
2101
2410
|
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
2102
2411
|
"rss10:item",
|
@@ -2104,7 +2413,8 @@ module FeedTools
|
|
2104
2413
|
"rss11:items/item",
|
2105
2414
|
"items/rss11:item",
|
2106
2415
|
"items/item",
|
2107
|
-
"item"
|
2416
|
+
"item",
|
2417
|
+
"story"
|
2108
2418
|
])
|
2109
2419
|
])
|
2110
2420
|
|
@@ -2234,6 +2544,17 @@ module FeedTools
|
|
2234
2544
|
def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil,
|
2235
2545
|
xml_builder=Builder::XmlMarkup.new(
|
2236
2546
|
:indent => 2, :escape_attrs => false))
|
2547
|
+
|
2548
|
+
if self.find_node("access:restriction/@relationship").to_s == "deny"
|
2549
|
+
raise StandardError,
|
2550
|
+
"Operation not permitted. This feed denies redistribution."
|
2551
|
+
elsif self.find_node("@indexing:index").to_s == "no"
|
2552
|
+
raise StandardError,
|
2553
|
+
"Operation not permitted. This feed denies redistribution."
|
2554
|
+
end
|
2555
|
+
|
2556
|
+
self.full_parse()
|
2557
|
+
|
2237
2558
|
xml_builder.instruct! :xml, :version => "1.0",
|
2238
2559
|
:encoding => (self.configurations[:output_encoding] or "utf-8")
|
2239
2560
|
if feed_type.nil?
|
@@ -2392,6 +2713,9 @@ module FeedTools
|
|
2392
2713
|
unless self.copyright.blank?
|
2393
2714
|
xml_builder.copyright(self.copyright)
|
2394
2715
|
end
|
2716
|
+
unless self.language.blank?
|
2717
|
+
xml_builder.language(self.language)
|
2718
|
+
end
|
2395
2719
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2396
2720
|
xml_builder.generator(
|
2397
2721
|
self.configurations[:generator_href])
|
@@ -2467,6 +2791,8 @@ module FeedTools
|
|
2467
2791
|
end
|
2468
2792
|
elsif self.link != nil
|
2469
2793
|
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
|
2794
|
+
elsif self.url != nil
|
2795
|
+
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.url))
|
2470
2796
|
else
|
2471
2797
|
raise "Cannot build feed, missing feed unique id."
|
2472
2798
|
end
|