feedtools 0.2.26 → 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +232 -216
- data/db/migration.rb +2 -0
- data/db/schema.mysql.sql +2 -0
- data/db/schema.postgresql.sql +3 -1
- data/db/schema.sqlite.sql +3 -1
- data/lib/feed_tools.rb +37 -14
- data/lib/feed_tools/database_feed_cache.rb +13 -2
- data/lib/feed_tools/feed.rb +430 -104
- data/lib/feed_tools/feed_item.rb +533 -268
- data/lib/feed_tools/helpers/generic_helper.rb +1 -1
- data/lib/feed_tools/helpers/html_helper.rb +78 -116
- data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
- data/lib/feed_tools/helpers/uri_helper.rb +46 -54
- data/lib/feed_tools/monkey_patch.rb +27 -1
- data/lib/feed_tools/vendor/html5/History.txt +10 -0
- data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
- data/lib/feed_tools/vendor/html5/README +45 -0
- data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
- data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
- data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
- data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
- data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
- data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
- data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
- data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
- data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
- data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
- data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
- data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
- data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
- data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
- data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
- data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
- data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
- data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
- data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
- data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
- data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
- data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
- data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
- data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
- data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
- data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
- data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
- data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
- data/lib/feed_tools/vendor/uri.rb +781 -0
- data/lib/feed_tools/version.rb +1 -1
- data/rakefile +27 -6
- data/test/unit/atom_test.rb +298 -210
- data/test/unit/helper_test.rb +7 -12
- data/test/unit/rdf_test.rb +51 -1
- data/test/unit/rss_test.rb +13 -3
- metadata +239 -116
- data/lib/feed_tools/vendor/htree.rb +0 -97
- data/lib/feed_tools/vendor/htree/container.rb +0 -10
- data/lib/feed_tools/vendor/htree/context.rb +0 -67
- data/lib/feed_tools/vendor/htree/display.rb +0 -27
- data/lib/feed_tools/vendor/htree/doc.rb +0 -149
- data/lib/feed_tools/vendor/htree/elem.rb +0 -262
- data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
- data/lib/feed_tools/vendor/htree/equality.rb +0 -218
- data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
- data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
- data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
- data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
- data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
- data/lib/feed_tools/vendor/htree/loc.rb +0 -367
- data/lib/feed_tools/vendor/htree/modules.rb +0 -48
- data/lib/feed_tools/vendor/htree/name.rb +0 -124
- data/lib/feed_tools/vendor/htree/output.rb +0 -207
- data/lib/feed_tools/vendor/htree/parse.rb +0 -409
- data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
- data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
- data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
- data/lib/feed_tools/vendor/htree/scan.rb +0 -166
- data/lib/feed_tools/vendor/htree/tag.rb +0 -111
- data/lib/feed_tools/vendor/htree/template.rb +0 -909
- data/lib/feed_tools/vendor/htree/text.rb +0 -115
- data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
data/db/schema.mysql.sql
CHANGED
data/db/schema.postgresql.sql
CHANGED
|
@@ -7,5 +7,7 @@
|
|
|
7
7
|
feed_data text default NULL,
|
|
8
8
|
feed_data_type varchar(20) default NULL,
|
|
9
9
|
http_headers text default NULL,
|
|
10
|
-
last_retrieved timestamp default NULL
|
|
10
|
+
last_retrieved timestamp default NULL,
|
|
11
|
+
time_to_live integer(10) default NULL,
|
|
12
|
+
serialized text default NULL
|
|
11
13
|
);
|
data/db/schema.sqlite.sql
CHANGED
data/lib/feed_tools.rb
CHANGED
|
@@ -33,6 +33,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
|
|
33
33
|
'development' # :nodoc:
|
|
34
34
|
|
|
35
35
|
FEED_TOOLS_NAMESPACES = {
|
|
36
|
+
"access" => "http://www.bloglines.com/about/specs/fac-1.0",
|
|
36
37
|
"admin" => "http://webns.net/mvcb/",
|
|
37
38
|
"ag" => "http://purl.org/rss/1.0/modules/aggregation/",
|
|
38
39
|
"annotate" => "http://purl.org/rss/1.0/modules/annotate/",
|
|
@@ -54,10 +55,13 @@ FEED_TOOLS_NAMESPACES = {
|
|
|
54
55
|
"ev" => "http://purl.org/rss/1.0/modules/event/",
|
|
55
56
|
"icbm" => "http://postneo.com/icbm/",
|
|
56
57
|
"image" => "http://purl.org/rss/1.0/modules/image/",
|
|
58
|
+
"indexing" => "urn:atom-extension:indexing",
|
|
57
59
|
"feedburner" => "http://rssnamespace.org/feedburner/ext/1.0",
|
|
58
60
|
"foaf" => "http://xmlns.com/foaf/0.1/",
|
|
59
61
|
"foo" => "http://hsivonen.iki.fi/FooML",
|
|
60
62
|
"fm" => "http://freshmeat.net/rss/fm/",
|
|
63
|
+
"gd" => "http://schemas.google.com/g/2005",
|
|
64
|
+
"gr" => "http://www.google.com/schemas/reader/atom/",
|
|
61
65
|
"itunes" => "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
|
62
66
|
"l" => "http://purl.org/rss/1.0/modules/link/",
|
|
63
67
|
"media" => "http://search.yahoo.com/mrss",
|
|
@@ -90,7 +94,8 @@ FEED_TOOLS_NAMESPACES = {
|
|
|
90
94
|
}
|
|
91
95
|
|
|
92
96
|
$:.unshift(File.dirname(__FILE__))
|
|
93
|
-
|
|
97
|
+
$: << (File.dirname(__FILE__) + "/feed_tools/vendor")
|
|
98
|
+
$: << (File.dirname(__FILE__) + "/feed_tools/vendor/html5/lib")
|
|
94
99
|
|
|
95
100
|
begin
|
|
96
101
|
require 'feed_tools/version'
|
|
@@ -104,8 +109,16 @@ begin
|
|
|
104
109
|
end
|
|
105
110
|
|
|
106
111
|
require 'rubygems'
|
|
107
|
-
|
|
108
|
-
|
|
112
|
+
|
|
113
|
+
if !defined?(Builder)
|
|
114
|
+
gem('builder', '>= 1.2.4')
|
|
115
|
+
require 'builder'
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
if !defined?(ActiveRecord)
|
|
119
|
+
gem('activerecord', '>= 1.11.1')
|
|
120
|
+
require 'active_record'
|
|
121
|
+
end
|
|
109
122
|
|
|
110
123
|
# Preload optional libraries.
|
|
111
124
|
begin
|
|
@@ -117,7 +130,15 @@ begin
|
|
|
117
130
|
rescue Object
|
|
118
131
|
end
|
|
119
132
|
|
|
120
|
-
require '
|
|
133
|
+
require 'html5'
|
|
134
|
+
require 'html5/html5parser'
|
|
135
|
+
require 'html5/liberalxmlparser'
|
|
136
|
+
require 'html5/treewalkers'
|
|
137
|
+
require 'html5/treebuilders'
|
|
138
|
+
require 'html5/serializer'
|
|
139
|
+
require 'html5/sanitizer'
|
|
140
|
+
|
|
141
|
+
require 'feed_tools/vendor/uri'
|
|
121
142
|
|
|
122
143
|
require 'net/http'
|
|
123
144
|
|
|
@@ -134,15 +155,8 @@ begin
|
|
|
134
155
|
require 'yaml'
|
|
135
156
|
require 'base64'
|
|
136
157
|
|
|
137
|
-
if !defined?(ActiveSupport)
|
|
138
|
-
require_gem('activesupport', '>= 1.1.1')
|
|
139
|
-
end
|
|
140
|
-
if !defined?(ActiveRecord)
|
|
141
|
-
require_gem('activerecord', '>= 1.11.1')
|
|
142
|
-
end
|
|
143
|
-
|
|
144
158
|
begin
|
|
145
|
-
|
|
159
|
+
gem('uuidtools', '>= 0.1.2')
|
|
146
160
|
rescue Gem::LoadError
|
|
147
161
|
begin
|
|
148
162
|
require 'uuidtools'
|
|
@@ -198,6 +212,10 @@ module FeedTools
|
|
|
198
212
|
:proxy_port => nil,
|
|
199
213
|
:proxy_user => nil,
|
|
200
214
|
:proxy_password => nil,
|
|
215
|
+
:auth_user => nil,
|
|
216
|
+
:auth_password => nil,
|
|
217
|
+
:auth_scheme => nil,
|
|
218
|
+
:http_timeout => nil,
|
|
201
219
|
:user_agent =>
|
|
202
220
|
"FeedTools/#{FeedTools::FEED_TOOLS_VERSION::STRING} " +
|
|
203
221
|
"+http://www.sporkmonger.com/projects/feedtools/",
|
|
@@ -207,6 +225,8 @@ module FeedTools
|
|
|
207
225
|
"http://www.sporkmonger.com/projects/feedtools/",
|
|
208
226
|
:tidy_enabled => false,
|
|
209
227
|
:tidy_options => {},
|
|
228
|
+
:lazy_parsing_enabled => true,
|
|
229
|
+
:serialization_enabled => false,
|
|
210
230
|
:idn_enabled => true,
|
|
211
231
|
:sanitization_enabled => true,
|
|
212
232
|
:sanitize_with_nofollow => true,
|
|
@@ -217,6 +237,7 @@ module FeedTools
|
|
|
217
237
|
:strip_comment_count => false,
|
|
218
238
|
:tab_spaces => 2,
|
|
219
239
|
:max_ttl => 3.days.to_s,
|
|
240
|
+
:default_ttl => 1.hour.to_s,
|
|
220
241
|
:output_encoding => "utf-8"
|
|
221
242
|
}.merge(config_hash)
|
|
222
243
|
end
|
|
@@ -318,6 +339,7 @@ module FeedTools
|
|
|
318
339
|
FeedTools::GenericHelper.validate_options([ :multi_threaded ],
|
|
319
340
|
options.keys)
|
|
320
341
|
options = { :multi_threaded => false }.merge(options)
|
|
342
|
+
warn("FeedTools.build_merged_feed is deprecated.")
|
|
321
343
|
return nil if url_array.nil?
|
|
322
344
|
merged_feed = FeedTools::Feed.new
|
|
323
345
|
retrieved_feeds = []
|
|
@@ -339,12 +361,13 @@ module FeedTools
|
|
|
339
361
|
end
|
|
340
362
|
end
|
|
341
363
|
retrieved_feeds.each do |feed|
|
|
342
|
-
merged_feed.entries.concat(
|
|
364
|
+
merged_feed.entries = merged_feed.entries.concat(
|
|
343
365
|
feed.entries.collect do |entry|
|
|
344
366
|
new_entry = entry.dup
|
|
345
367
|
new_entry.title = "#{feed.title}: #{entry.title}"
|
|
346
368
|
new_entry
|
|
347
|
-
end
|
|
369
|
+
end
|
|
370
|
+
)
|
|
348
371
|
end
|
|
349
372
|
return merged_feed
|
|
350
373
|
end
|
|
@@ -51,6 +51,8 @@ module FeedTools
|
|
|
51
51
|
ActiveRecord::Base.default_timezone = :utc
|
|
52
52
|
ActiveRecord::Base.connection
|
|
53
53
|
rescue
|
|
54
|
+
end
|
|
55
|
+
if !ActiveRecord::Base.connected?
|
|
54
56
|
begin
|
|
55
57
|
possible_config_files = [
|
|
56
58
|
"./config/database.yml",
|
|
@@ -66,6 +68,7 @@ module FeedTools
|
|
|
66
68
|
for file in possible_config_files
|
|
67
69
|
if File.exists?(File.expand_path(file))
|
|
68
70
|
database_config_file = file
|
|
71
|
+
@config_path = database_config_file
|
|
69
72
|
break
|
|
70
73
|
end
|
|
71
74
|
end
|
|
@@ -84,6 +87,14 @@ module FeedTools
|
|
|
84
87
|
end
|
|
85
88
|
return nil
|
|
86
89
|
end
|
|
90
|
+
|
|
91
|
+
# Returns the path to the database.yml config file that FeedTools loaded.
|
|
92
|
+
def DatabaseFeedCache.config_path
|
|
93
|
+
if !defined?(@config_path) || @config_path.blank?
|
|
94
|
+
@config_path = nil
|
|
95
|
+
end
|
|
96
|
+
return @config_path
|
|
97
|
+
end
|
|
87
98
|
|
|
88
99
|
# Returns true if a connection to the database has been established and the
|
|
89
100
|
# required table structure is in place.
|
|
@@ -115,9 +126,9 @@ module FeedTools
|
|
|
115
126
|
# True if the appropriate database table already exists
|
|
116
127
|
def DatabaseFeedCache.table_exists?
|
|
117
128
|
begin
|
|
118
|
-
ActiveRecord::Base.connection.
|
|
129
|
+
ActiveRecord::Base.connection.select_one("select id, href, title, " +
|
|
119
130
|
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
|
120
|
-
"from #{self.table_name()}
|
|
131
|
+
"from #{self.table_name()}")
|
|
121
132
|
rescue ActiveRecord::StatementInvalid
|
|
122
133
|
return false
|
|
123
134
|
rescue
|
data/lib/feed_tools/feed.rb
CHANGED
|
@@ -53,34 +53,112 @@ module FeedTools
|
|
|
53
53
|
@live = false
|
|
54
54
|
@encoding = nil
|
|
55
55
|
@options = nil
|
|
56
|
+
@version = FeedTools::FEED_TOOLS_VERSION::STRING
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Breaks any references that the feed may be keeping around, thus making
|
|
60
|
+
# the job of the garbage collector much, much easier. Call this
|
|
61
|
+
# method prior to feeds going out of scope to prevent memory leaks.
|
|
62
|
+
def dispose()
|
|
63
|
+
self.entries.each do |entry|
|
|
64
|
+
entry.instance_variable_set("@root_node", nil)
|
|
65
|
+
entry.instance_variable_set("@feed", nil)
|
|
66
|
+
entry.instance_variable_set("@parent_feed", nil)
|
|
67
|
+
entry.dispose if entry.respond_to?(:dispose)
|
|
68
|
+
end
|
|
69
|
+
self.entries = []
|
|
70
|
+
|
|
71
|
+
@cache_object = nil
|
|
72
|
+
@http_headers = nil
|
|
73
|
+
@xml_document = nil
|
|
74
|
+
@feed_data = nil
|
|
75
|
+
@feed_data_type = nil
|
|
76
|
+
@root_node = nil
|
|
77
|
+
@channel_node = nil
|
|
78
|
+
@href = nil
|
|
79
|
+
@id = nil
|
|
80
|
+
@title = nil
|
|
81
|
+
@subtitle = nil
|
|
82
|
+
@link = nil
|
|
83
|
+
@last_retrieved = nil
|
|
84
|
+
@time_to_live = nil
|
|
85
|
+
@entries = nil
|
|
86
|
+
@live = false
|
|
87
|
+
@encoding = nil
|
|
88
|
+
@options = nil
|
|
89
|
+
|
|
90
|
+
GC.start()
|
|
91
|
+
self
|
|
56
92
|
end
|
|
57
93
|
|
|
58
94
|
# Loads the feed specified by the url, pulling the data from the
|
|
59
95
|
# cache if it hasn't expired. Options supplied will override the
|
|
60
96
|
# default options.
|
|
61
|
-
def Feed.open(
|
|
97
|
+
def Feed.open(href, options={})
|
|
62
98
|
FeedTools::GenericHelper.validate_options(
|
|
63
99
|
FeedTools.configurations.keys, options.keys)
|
|
64
100
|
|
|
65
|
-
#
|
|
66
|
-
|
|
101
|
+
# clean up the url
|
|
102
|
+
href = FeedTools::UriHelper.normalize_url(href)
|
|
67
103
|
|
|
68
|
-
|
|
104
|
+
feed_configurations = FeedTools.configurations.merge(options)
|
|
105
|
+
cache_object = nil
|
|
106
|
+
deserialized_feed = nil
|
|
69
107
|
|
|
70
|
-
if
|
|
108
|
+
if feed_configurations[:feed_cache] != nil && FeedTools.feed_cache.nil?
|
|
71
109
|
raise(ArgumentError, "There is currently no caching mechanism set. " +
|
|
72
110
|
"Cannot retrieve cached feeds.")
|
|
111
|
+
elsif feed_configurations[:serialization_enabled] == true
|
|
112
|
+
# We've got a caching mechanism available
|
|
113
|
+
cache_object = FeedTools.feed_cache.find_by_href(href)
|
|
114
|
+
begin
|
|
115
|
+
if cache_object != nil && cache_object.serialized != nil
|
|
116
|
+
# If we've got a cache hit, deserialize
|
|
117
|
+
expired = true
|
|
118
|
+
if cache_object.time_to_live == nil
|
|
119
|
+
cache_object.time_to_live =
|
|
120
|
+
feed_configurations[:default_ttl].to_i
|
|
121
|
+
cache_object.save
|
|
122
|
+
end
|
|
123
|
+
if (cache_object.last_retrieved == nil)
|
|
124
|
+
expired = true
|
|
125
|
+
elsif (cache_object.time_to_live < 30.minutes)
|
|
126
|
+
expired =
|
|
127
|
+
(cache_object.last_retrieved + 30.minutes) < Time.now.gmtime
|
|
128
|
+
else
|
|
129
|
+
expired =
|
|
130
|
+
(cache_object.last_retrieved + cache_object.time_to_live) <
|
|
131
|
+
Time.now.gmtime
|
|
132
|
+
end
|
|
133
|
+
if !expired
|
|
134
|
+
require 'yaml'
|
|
135
|
+
deserialized_feed = YAML.load(cache_object.serialized)
|
|
136
|
+
deserialized_feed.cache_object = cache_object
|
|
137
|
+
Thread.pass
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
rescue Exception
|
|
141
|
+
end
|
|
73
142
|
end
|
|
74
143
|
|
|
75
|
-
|
|
76
|
-
|
|
144
|
+
if deserialized_feed == nil
|
|
145
|
+
# create the new feed
|
|
146
|
+
feed = FeedTools::Feed.new
|
|
147
|
+
|
|
148
|
+
feed.configurations = feed_configurations
|
|
77
149
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
150
|
+
# load the new feed
|
|
151
|
+
feed.href = href
|
|
152
|
+
if cache_object != nil
|
|
153
|
+
feed.cache_object = cache_object
|
|
154
|
+
end
|
|
155
|
+
feed.update! unless feed.configurations[:disable_update_from_remote]
|
|
156
|
+
Thread.pass
|
|
82
157
|
|
|
83
|
-
|
|
158
|
+
return feed
|
|
159
|
+
else
|
|
160
|
+
return deserialized_feed
|
|
161
|
+
end
|
|
84
162
|
end
|
|
85
163
|
|
|
86
164
|
# Returns the load options for this feed.
|
|
@@ -99,10 +177,9 @@ module FeedTools
|
|
|
99
177
|
# Loads the feed from the remote url if the feed has expired from the
|
|
100
178
|
# cache or cannot be retrieved from the cache for some reason.
|
|
101
179
|
def update!
|
|
102
|
-
if
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
end
|
|
180
|
+
# Don't do anything if this option is set
|
|
181
|
+
return if self.configurations[:disable_update_from_remote]
|
|
182
|
+
|
|
106
183
|
if !FeedTools.feed_cache.nil? &&
|
|
107
184
|
!FeedTools.feed_cache.set_up_correctly?
|
|
108
185
|
FeedTools.feed_cache.initialize_cache()
|
|
@@ -129,25 +206,23 @@ module FeedTools
|
|
|
129
206
|
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
|
130
207
|
|
|
131
208
|
autodiscovered_url = nil
|
|
132
|
-
|
|
133
|
-
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
|
134
|
-
"application/atom+xml")
|
|
135
|
-
if autodiscovered_url.nil?
|
|
209
|
+
['atom', 'rss', 'rdf'].each do |type|
|
|
136
210
|
autodiscovered_url =
|
|
137
211
|
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
|
138
|
-
"application
|
|
212
|
+
"application/#{type}+xml")
|
|
213
|
+
break unless autodiscovered_url.nil?
|
|
139
214
|
end
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
|
143
|
-
"application/rdf+xml")
|
|
144
|
-
end
|
|
145
|
-
unless autodiscovered_url.nil?
|
|
215
|
+
|
|
216
|
+
if autodiscovered_url != nil
|
|
146
217
|
begin
|
|
147
218
|
autodiscovered_url = FeedTools::UriHelper.resolve_relative_uri(
|
|
148
219
|
autodiscovered_url, [self.href])
|
|
149
220
|
rescue Exception
|
|
150
221
|
end
|
|
222
|
+
if self.href == autodiscovered_url
|
|
223
|
+
raise FeedAccessError,
|
|
224
|
+
"Autodiscovery loop detected: #{autodiscovered_url}"
|
|
225
|
+
end
|
|
151
226
|
self.feed_data = nil
|
|
152
227
|
self.href = autodiscovered_url
|
|
153
228
|
if FeedTools.feed_cache.nil?
|
|
@@ -157,12 +232,24 @@ module FeedTools
|
|
|
157
232
|
FeedTools.feed_cache.find_by_href(autodiscovered_url)
|
|
158
233
|
end
|
|
159
234
|
self.update!
|
|
235
|
+
else
|
|
236
|
+
html_body = FeedTools::XmlHelper.try_xpaths(self.xml_document, [
|
|
237
|
+
"html/body"
|
|
238
|
+
])
|
|
239
|
+
if html_body != nil
|
|
240
|
+
raise FeedAccessError,
|
|
241
|
+
"#{self.href} does not appear to be a feed."
|
|
242
|
+
end
|
|
160
243
|
end
|
|
161
244
|
else
|
|
162
245
|
ugly_redirect = FeedTools::XmlHelper.try_xpaths(self.xml_document, [
|
|
163
246
|
"redirect/newLocation/text()"
|
|
164
247
|
], :select_result_value => true)
|
|
165
248
|
if !ugly_redirect.blank?
|
|
249
|
+
if self.href == ugly_redirect
|
|
250
|
+
raise FeedAccessError,
|
|
251
|
+
"Ugly redirect loop detected: #{ugly_redirect}"
|
|
252
|
+
end
|
|
166
253
|
self.feed_data = nil
|
|
167
254
|
self.href = ugly_redirect
|
|
168
255
|
if FeedTools.feed_cache.nil?
|
|
@@ -187,6 +274,10 @@ module FeedTools
|
|
|
187
274
|
@link = nil
|
|
188
275
|
@time_to_live = nil
|
|
189
276
|
@entries = nil
|
|
277
|
+
|
|
278
|
+
if self.configurations[:lazy_parsing_enabled] == false
|
|
279
|
+
self.full_parse()
|
|
280
|
+
end
|
|
190
281
|
end
|
|
191
282
|
end
|
|
192
283
|
|
|
@@ -338,7 +429,101 @@ module FeedTools
|
|
|
338
429
|
end
|
|
339
430
|
end
|
|
340
431
|
end
|
|
432
|
+
|
|
433
|
+
# Does a full parse of the feed.
|
|
434
|
+
def full_parse
|
|
435
|
+
self.href
|
|
436
|
+
|
|
437
|
+
self.cache_object
|
|
341
438
|
|
|
439
|
+
self.http_headers
|
|
440
|
+
self.encoding
|
|
441
|
+
self.feed_data_utf_8
|
|
442
|
+
self.xml_document
|
|
443
|
+
self.root_node
|
|
444
|
+
self.channel_node
|
|
445
|
+
|
|
446
|
+
self.base_uri
|
|
447
|
+
self.feed_type
|
|
448
|
+
self.feed_version
|
|
449
|
+
|
|
450
|
+
self.entries
|
|
451
|
+
|
|
452
|
+
self.id
|
|
453
|
+
self.title
|
|
454
|
+
self.subtitle
|
|
455
|
+
self.links
|
|
456
|
+
self.link
|
|
457
|
+
self.icon
|
|
458
|
+
self.favicon
|
|
459
|
+
self.author
|
|
460
|
+
self.publisher
|
|
461
|
+
self.time
|
|
462
|
+
self.updated
|
|
463
|
+
self.published
|
|
464
|
+
self.categories
|
|
465
|
+
self.images
|
|
466
|
+
self.rights
|
|
467
|
+
self.time_to_live
|
|
468
|
+
self.generator
|
|
469
|
+
self.language
|
|
470
|
+
|
|
471
|
+
self.docs
|
|
472
|
+
self.text_input
|
|
473
|
+
self.cloud
|
|
474
|
+
|
|
475
|
+
self.itunes_summary
|
|
476
|
+
self.itunes_subtitle
|
|
477
|
+
self.itunes_author
|
|
478
|
+
|
|
479
|
+
self.media_text
|
|
480
|
+
|
|
481
|
+
self.explicit?
|
|
482
|
+
|
|
483
|
+
self.entries.each do |entry|
|
|
484
|
+
entry.full_parse()
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
nil
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
# Does a full parse, then serializes the feed object directly to the
|
|
491
|
+
# cache.
|
|
492
|
+
def serialize_to_cache
|
|
493
|
+
@cache_object = nil
|
|
494
|
+
require 'yaml'
|
|
495
|
+
serialized_feed = YAML.dump(self.serializable)
|
|
496
|
+
if self.cache_object != nil
|
|
497
|
+
begin
|
|
498
|
+
self.cache_object.serialized = serialized_feed
|
|
499
|
+
self.cache_object.save
|
|
500
|
+
rescue Exception
|
|
501
|
+
end
|
|
502
|
+
end
|
|
503
|
+
return nil
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# Returns a duplicate object suitable for serialization
|
|
507
|
+
def serializable
|
|
508
|
+
self.full_parse()
|
|
509
|
+
entries_to_dump = self.entries
|
|
510
|
+
# This prevents errors due to temporarily having feed items with
|
|
511
|
+
# multiple parent feeds.
|
|
512
|
+
self.entries = []
|
|
513
|
+
feed_to_dump = self.dup
|
|
514
|
+
feed_to_dump.instance_variable_set("@xml_document", nil)
|
|
515
|
+
feed_to_dump.instance_variable_set("@root_node", nil)
|
|
516
|
+
feed_to_dump.instance_variable_set("@channel_node", nil)
|
|
517
|
+
feed_to_dump.entries = entries_to_dump.collect do |entry|
|
|
518
|
+
entry.serializable
|
|
519
|
+
end
|
|
520
|
+
self.entries = entries_to_dump
|
|
521
|
+
feed_to_dump.entries.each do |entry|
|
|
522
|
+
entry.instance_variable_set("@root_node", nil)
|
|
523
|
+
end
|
|
524
|
+
return feed_to_dump
|
|
525
|
+
end
|
|
526
|
+
|
|
342
527
|
# Returns the relevant information from an http request.
|
|
343
528
|
def http_response
|
|
344
529
|
return @http_response
|
|
@@ -360,8 +545,12 @@ module FeedTools
|
|
|
360
545
|
# Returns the encoding that the feed was parsed with
|
|
361
546
|
def encoding
|
|
362
547
|
if @encoding.blank?
|
|
363
|
-
|
|
364
|
-
|
|
548
|
+
if !self.http_headers.blank?
|
|
549
|
+
if self.http_headers['content-type'] =~ /charset=([\w\d-]+)/
|
|
550
|
+
@encoding = $1.downcase
|
|
551
|
+
else
|
|
552
|
+
@encoding = self.encoding_from_feed_data
|
|
553
|
+
end
|
|
365
554
|
else
|
|
366
555
|
@encoding = self.encoding_from_feed_data
|
|
367
556
|
end
|
|
@@ -377,7 +566,7 @@ module FeedTools
|
|
|
377
566
|
return nil if raw_data.nil?
|
|
378
567
|
encoding_from_xml_instruct =
|
|
379
568
|
raw_data.scan(
|
|
380
|
-
/^<\?xml [^>]*encoding="([
|
|
569
|
+
/^<\?xml [^>]*encoding="([^\"]*)"[^>]*\?>/
|
|
381
570
|
).flatten.first
|
|
382
571
|
unless encoding_from_xml_instruct.blank?
|
|
383
572
|
encoding_from_xml_instruct.downcase!
|
|
@@ -446,11 +635,26 @@ module FeedTools
|
|
|
446
635
|
if FeedTools.feed_cache.nil?
|
|
447
636
|
self.cache_object = nil
|
|
448
637
|
else
|
|
449
|
-
|
|
450
|
-
|
|
638
|
+
begin
|
|
639
|
+
self.cache_object =
|
|
640
|
+
FeedTools.feed_cache.find_by_href(ugly_redirect)
|
|
641
|
+
rescue RuntimeError => error
|
|
642
|
+
if error.message =~ /sorry, too many clients already/
|
|
643
|
+
warn("There are too many connections to the database open.")
|
|
644
|
+
end
|
|
645
|
+
raise error
|
|
646
|
+
end
|
|
451
647
|
end
|
|
452
648
|
self.update!
|
|
453
649
|
end
|
|
650
|
+
|
|
651
|
+
# Get these things parsed in the correct order to avoid the retardedly
|
|
652
|
+
# painful corecursion issues.
|
|
653
|
+
self.href
|
|
654
|
+
@links = nil
|
|
655
|
+
@link = nil
|
|
656
|
+
self.links
|
|
657
|
+
self.link
|
|
454
658
|
end
|
|
455
659
|
|
|
456
660
|
# Returns the feed's raw data as utf-8.
|
|
@@ -462,11 +666,11 @@ module FeedTools
|
|
|
462
666
|
else
|
|
463
667
|
use_encoding = force_encoding
|
|
464
668
|
end
|
|
465
|
-
if use_encoding != "utf-8"
|
|
669
|
+
if use_encoding != "utf-8" && use_encoding != nil && raw_data != nil
|
|
466
670
|
begin
|
|
467
671
|
@feed_data_utf_8 =
|
|
468
672
|
Iconv.new('utf-8', use_encoding).iconv(raw_data)
|
|
469
|
-
rescue
|
|
673
|
+
rescue Exception => error
|
|
470
674
|
return raw_data
|
|
471
675
|
end
|
|
472
676
|
else
|
|
@@ -495,24 +699,23 @@ module FeedTools
|
|
|
495
699
|
unless self.cache_object.nil?
|
|
496
700
|
self.cache_object.feed_data_type = new_feed_data_type
|
|
497
701
|
end
|
|
702
|
+
if self.feed_data_type != :xml
|
|
703
|
+
@xml_document = nil
|
|
704
|
+
end
|
|
498
705
|
end
|
|
499
|
-
|
|
706
|
+
|
|
500
707
|
# Returns a REXML Document of the feed_data
|
|
501
708
|
def xml_document
|
|
502
|
-
if
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
709
|
+
if @xml_document.nil?
|
|
710
|
+
return nil if self.feed_data.blank?
|
|
711
|
+
if self.feed_data_type != :xml
|
|
712
|
+
@xml_document = nil
|
|
713
|
+
else
|
|
506
714
|
begin
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
@xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
|
|
512
|
-
end
|
|
513
|
-
rescue Object
|
|
514
|
-
@xml_document = nil
|
|
515
|
-
raise
|
|
715
|
+
@xml_document = REXML::Document.new(self.feed_data_utf_8)
|
|
716
|
+
rescue Exception
|
|
717
|
+
# Something failed, attempt to repair the xml with htree.
|
|
718
|
+
@xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
|
|
516
719
|
end
|
|
517
720
|
end
|
|
518
721
|
end
|
|
@@ -551,7 +754,7 @@ module FeedTools
|
|
|
551
754
|
else
|
|
552
755
|
@root_node = self.xml_document.root
|
|
553
756
|
end
|
|
554
|
-
rescue
|
|
757
|
+
rescue Exception
|
|
555
758
|
return nil
|
|
556
759
|
end
|
|
557
760
|
end
|
|
@@ -564,7 +767,8 @@ module FeedTools
|
|
|
564
767
|
@channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
|
565
768
|
"channel",
|
|
566
769
|
"CHANNEL",
|
|
567
|
-
"feedinfo"
|
|
770
|
+
"feedinfo",
|
|
771
|
+
"news"
|
|
568
772
|
])
|
|
569
773
|
if @channel_node == nil
|
|
570
774
|
@channel_node = self.root_node
|
|
@@ -584,9 +788,17 @@ module FeedTools
|
|
|
584
788
|
if @href != nil
|
|
585
789
|
begin
|
|
586
790
|
@cache_object = FeedTools.feed_cache.find_by_href(@href)
|
|
587
|
-
rescue
|
|
791
|
+
rescue RuntimeError => error
|
|
792
|
+
if error.message =~ /sorry, too many clients already/
|
|
793
|
+
warn("There are too many connections to the database open.")
|
|
794
|
+
raise error
|
|
795
|
+
else
|
|
796
|
+
raise error
|
|
797
|
+
end
|
|
798
|
+
rescue => error
|
|
588
799
|
warn("The feed cache seems to be having trouble with the " +
|
|
589
800
|
"find_by_href method. This may cause unexpected results.")
|
|
801
|
+
raise error
|
|
590
802
|
end
|
|
591
803
|
end
|
|
592
804
|
if @cache_object.nil?
|
|
@@ -778,19 +990,33 @@ module FeedTools
|
|
|
778
990
|
end
|
|
779
991
|
end
|
|
780
992
|
if override_href.call(@href) && self.feed_data != nil
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
993
|
+
begin
|
|
994
|
+
links = FeedTools::GenericHelper.recursion_trap(:feed_href) do
|
|
995
|
+
self.links
|
|
996
|
+
end
|
|
997
|
+
link = FeedTools::GenericHelper.recursion_trap(:feed_href) do
|
|
998
|
+
self.link
|
|
999
|
+
end
|
|
1000
|
+
if links != nil
|
|
1001
|
+
for link_object in links
|
|
1002
|
+
if link_object.rel == 'self'
|
|
1003
|
+
if link_object.href != link ||
|
|
1004
|
+
(link_object.href =~ /xml/ ||
|
|
1005
|
+
link_object.href =~ /atom/ ||
|
|
1006
|
+
link_object.href =~ /feed/)
|
|
1007
|
+
@href = link_object.href
|
|
1008
|
+
@href_overridden = true
|
|
1009
|
+
@links = nil
|
|
1010
|
+
@link = nil
|
|
1011
|
+
return @href
|
|
1012
|
+
end
|
|
1013
|
+
end
|
|
791
1014
|
end
|
|
792
1015
|
end
|
|
1016
|
+
rescue Exception
|
|
793
1017
|
end
|
|
1018
|
+
@links = nil
|
|
1019
|
+
@link = nil
|
|
794
1020
|
|
|
795
1021
|
# rdf:about is ordered last because a lot of people put the url to
|
|
796
1022
|
# the feed inside it instead of a link to their blog.
|
|
@@ -804,7 +1030,8 @@ module FeedTools
|
|
|
804
1030
|
"feed/@resource",
|
|
805
1031
|
"@rdf:about",
|
|
806
1032
|
"@about",
|
|
807
|
-
"newLocation/text()"
|
|
1033
|
+
"newLocation/text()",
|
|
1034
|
+
"atom10:link[@rel='self']/@href"
|
|
808
1035
|
], :select_result_value => true) do |result|
|
|
809
1036
|
override_href.call(FeedTools::UriHelper.normalize_url(result))
|
|
810
1037
|
end
|
|
@@ -831,6 +1058,7 @@ module FeedTools
|
|
|
831
1058
|
@href_overridden = false
|
|
832
1059
|
end
|
|
833
1060
|
if @href_overridden == true
|
|
1061
|
+
@links = nil
|
|
834
1062
|
@link = nil
|
|
835
1063
|
end
|
|
836
1064
|
end
|
|
@@ -858,7 +1086,7 @@ module FeedTools
|
|
|
858
1086
|
"TITLE"
|
|
859
1087
|
])
|
|
860
1088
|
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
|
861
|
-
self.feed_type, self.feed_version)
|
|
1089
|
+
self.feed_type, self.feed_version, [self.base_uri])
|
|
862
1090
|
if self.feed_type == "atom" ||
|
|
863
1091
|
self.configurations[:always_strip_wrapper_elements]
|
|
864
1092
|
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
|
@@ -901,7 +1129,7 @@ module FeedTools
|
|
|
901
1129
|
"info"
|
|
902
1130
|
])
|
|
903
1131
|
@subtitle = FeedTools::HtmlHelper.process_text_construct(
|
|
904
|
-
subtitle_node, self.feed_type, self.feed_version)
|
|
1132
|
+
subtitle_node, self.feed_type, self.feed_version, [self.base_uri])
|
|
905
1133
|
if self.feed_type == "atom" ||
|
|
906
1134
|
self.configurations[:always_strip_wrapper_elements]
|
|
907
1135
|
@subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
|
|
@@ -1119,6 +1347,13 @@ module FeedTools
|
|
|
1119
1347
|
"@href",
|
|
1120
1348
|
"text()"
|
|
1121
1349
|
], :select_result_value => true)
|
|
1350
|
+
if link_object.href == "atom10:" ||
|
|
1351
|
+
link_object.href == "atom03:" ||
|
|
1352
|
+
link_object.href == "atom:"
|
|
1353
|
+
link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
|
|
1354
|
+
"@href"
|
|
1355
|
+
], :select_result_value => true)
|
|
1356
|
+
end
|
|
1122
1357
|
if link_object.href.nil? && link_node.base_uri != nil
|
|
1123
1358
|
link_object.href = ""
|
|
1124
1359
|
end
|
|
@@ -1143,6 +1378,13 @@ module FeedTools
|
|
|
1143
1378
|
"@atom:hreflang",
|
|
1144
1379
|
"@hreflang"
|
|
1145
1380
|
], :select_result_value => true)
|
|
1381
|
+
if link_object.hreflang == "atom10:" ||
|
|
1382
|
+
link_object.hreflang == "atom03:" ||
|
|
1383
|
+
link_object.hreflang == "atom:"
|
|
1384
|
+
link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
|
|
1385
|
+
"@hreflang"
|
|
1386
|
+
], :select_result_value => true)
|
|
1387
|
+
end
|
|
1146
1388
|
unless link_object.hreflang.nil?
|
|
1147
1389
|
link_object.hreflang = link_object.hreflang.downcase
|
|
1148
1390
|
end
|
|
@@ -1152,6 +1394,13 @@ module FeedTools
|
|
|
1152
1394
|
"@atom:rel",
|
|
1153
1395
|
"@rel"
|
|
1154
1396
|
], :select_result_value => true)
|
|
1397
|
+
if link_object.rel == "atom10:" ||
|
|
1398
|
+
link_object.rel == "atom03:" ||
|
|
1399
|
+
link_object.rel == "atom:"
|
|
1400
|
+
link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
|
|
1401
|
+
"@rel"
|
|
1402
|
+
], :select_result_value => true)
|
|
1403
|
+
end
|
|
1155
1404
|
unless link_object.rel.nil?
|
|
1156
1405
|
link_object.rel = link_object.rel.downcase
|
|
1157
1406
|
end
|
|
@@ -1164,6 +1413,13 @@ module FeedTools
|
|
|
1164
1413
|
"@atom:type",
|
|
1165
1414
|
"@type"
|
|
1166
1415
|
], :select_result_value => true)
|
|
1416
|
+
if link_object.type == "atom10:" ||
|
|
1417
|
+
link_object.type == "atom03:" ||
|
|
1418
|
+
link_object.type == "atom:"
|
|
1419
|
+
link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
|
|
1420
|
+
"@type"
|
|
1421
|
+
], :select_result_value => true)
|
|
1422
|
+
end
|
|
1167
1423
|
unless link_object.type.nil?
|
|
1168
1424
|
link_object.type = link_object.type.downcase
|
|
1169
1425
|
end
|
|
@@ -1174,6 +1430,13 @@ module FeedTools
|
|
|
1174
1430
|
"@title",
|
|
1175
1431
|
"text()"
|
|
1176
1432
|
], :select_result_value => true)
|
|
1433
|
+
if link_object.title == "atom10:" ||
|
|
1434
|
+
link_object.title == "atom03:" ||
|
|
1435
|
+
link_object.title == "atom:"
|
|
1436
|
+
link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
|
|
1437
|
+
"@title"
|
|
1438
|
+
], :select_result_value => true)
|
|
1439
|
+
end
|
|
1177
1440
|
# This catches the ambiguities between atom, rss, and cdf
|
|
1178
1441
|
if link_object.title == link_object.href
|
|
1179
1442
|
link_object.title = nil
|
|
@@ -1184,6 +1447,13 @@ module FeedTools
|
|
|
1184
1447
|
"@atom:length",
|
|
1185
1448
|
"@length"
|
|
1186
1449
|
], :select_result_value => true)
|
|
1450
|
+
if link_object.length == "atom10:" ||
|
|
1451
|
+
link_object.length == "atom03:" ||
|
|
1452
|
+
link_object.length == "atom:"
|
|
1453
|
+
link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
|
|
1454
|
+
"@length"
|
|
1455
|
+
], :select_result_value => true)
|
|
1456
|
+
end
|
|
1187
1457
|
if !link_object.length.nil?
|
|
1188
1458
|
link_object.length = link_object.length.to_i
|
|
1189
1459
|
else
|
|
@@ -1211,7 +1481,10 @@ module FeedTools
|
|
|
1211
1481
|
def base_uri
|
|
1212
1482
|
if @base_uri.nil?
|
|
1213
1483
|
@base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
|
1214
|
-
"@base"
|
|
1484
|
+
"@base",
|
|
1485
|
+
"base/@href",
|
|
1486
|
+
"base/text()",
|
|
1487
|
+
"@xml:base"
|
|
1215
1488
|
], :select_result_value => true)
|
|
1216
1489
|
if @base_uri.blank?
|
|
1217
1490
|
begin
|
|
@@ -1222,9 +1495,26 @@ module FeedTools
|
|
|
1222
1495
|
rescue Exception
|
|
1223
1496
|
end
|
|
1224
1497
|
end
|
|
1498
|
+
if @base_uri.blank?
|
|
1499
|
+
@base_uri = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
|
1500
|
+
"@xml:base"
|
|
1501
|
+
], :select_result_value => true)
|
|
1502
|
+
end
|
|
1225
1503
|
if !@base_uri.blank?
|
|
1226
1504
|
@base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
|
|
1227
1505
|
end
|
|
1506
|
+
if !@base_uri.blank?
|
|
1507
|
+
parsed_uri = FeedTools::URI.parse(@base_uri)
|
|
1508
|
+
# Feedburner is almost never the base uri that was intended
|
|
1509
|
+
# Use the actual site instead
|
|
1510
|
+
if parsed_uri.host =~ /feedburner/
|
|
1511
|
+
site_uri =
|
|
1512
|
+
FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
|
|
1513
|
+
FeedTools::UriHelper.normalize_url(self.link)
|
|
1514
|
+
end
|
|
1515
|
+
@base_uri = site_uri if !site_uri.blank?
|
|
1516
|
+
end
|
|
1517
|
+
end
|
|
1228
1518
|
end
|
|
1229
1519
|
return @base_uri
|
|
1230
1520
|
end
|
|
@@ -1425,6 +1715,12 @@ module FeedTools
|
|
|
1425
1715
|
rescue
|
|
1426
1716
|
end
|
|
1427
1717
|
end
|
|
1718
|
+
if FeedTools::XmlHelper.try_xpaths(author_node,
|
|
1719
|
+
["@gr:unknown-author"], :select_result_value => true) == "true"
|
|
1720
|
+
if @author.name == "(author unknown)"
|
|
1721
|
+
@author.name = nil
|
|
1722
|
+
end
|
|
1723
|
+
end
|
|
1428
1724
|
end
|
|
1429
1725
|
# Fallback on the itunes module if we didn't find an author name
|
|
1430
1726
|
begin
|
|
@@ -1764,35 +2060,7 @@ module FeedTools
|
|
|
1764
2060
|
end
|
|
1765
2061
|
return @images
|
|
1766
2062
|
end
|
|
1767
|
-
|
|
1768
|
-
# Returns the feed's text input field
|
|
1769
|
-
def text_input
|
|
1770
|
-
if @text_input.nil?
|
|
1771
|
-
@text_input = FeedTools::TextInput.new
|
|
1772
|
-
text_input_node =
|
|
1773
|
-
FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
|
|
1774
|
-
unless text_input_node.nil?
|
|
1775
|
-
@text_input.title =
|
|
1776
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
1777
|
-
["title/text()"],
|
|
1778
|
-
:select_result_value => true)
|
|
1779
|
-
@text_input.description =
|
|
1780
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
1781
|
-
["description/text()"],
|
|
1782
|
-
:select_result_value => true)
|
|
1783
|
-
@text_input.link =
|
|
1784
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
1785
|
-
["link/text()"],
|
|
1786
|
-
:select_result_value => true)
|
|
1787
|
-
@text_input.name =
|
|
1788
|
-
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
1789
|
-
["name/text()"],
|
|
1790
|
-
:select_result_value => true)
|
|
1791
|
-
end
|
|
1792
|
-
end
|
|
1793
|
-
return @text_input
|
|
1794
|
-
end
|
|
1795
|
-
|
|
2063
|
+
|
|
1796
2064
|
# Returns the feed's copyright information
|
|
1797
2065
|
def rights
|
|
1798
2066
|
if @rights.nil?
|
|
@@ -1807,7 +2075,7 @@ module FeedTools
|
|
|
1807
2075
|
"rights"
|
|
1808
2076
|
])
|
|
1809
2077
|
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
|
1810
|
-
self.feed_type, self.feed_version)
|
|
2078
|
+
self.feed_type, self.feed_version, [self.base_uri])
|
|
1811
2079
|
if self.feed_type == "atom" ||
|
|
1812
2080
|
self.configurations[:always_strip_wrapper_elements]
|
|
1813
2081
|
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
|
@@ -1821,12 +2089,24 @@ module FeedTools
|
|
|
1821
2089
|
@rights = new_rights
|
|
1822
2090
|
end
|
|
1823
2091
|
|
|
1824
|
-
|
|
1825
|
-
|
|
2092
|
+
# Returns the first license link for the feed item.
|
|
2093
|
+
def license
|
|
2094
|
+
return self.licenses.first
|
|
2095
|
+
end
|
|
2096
|
+
|
|
2097
|
+
# Returns all licenses linked from this feed item.
|
|
2098
|
+
def licenses
|
|
2099
|
+
if @licenses.nil?
|
|
2100
|
+
@licenses = self.links.select do |link|
|
|
2101
|
+
link.rel == "license"
|
|
2102
|
+
end
|
|
2103
|
+
end
|
|
2104
|
+
return @licenses
|
|
1826
2105
|
end
|
|
1827
2106
|
|
|
1828
|
-
|
|
1829
|
-
|
|
2107
|
+
# Sets the feed item's licenses.
|
|
2108
|
+
def licenses=(new_licenses)
|
|
2109
|
+
@licenses = new_licenses
|
|
1830
2110
|
end
|
|
1831
2111
|
|
|
1832
2112
|
# Returns the number of seconds before the feed should expire
|
|
@@ -1910,14 +2190,14 @@ module FeedTools
|
|
|
1910
2190
|
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
|
1911
2191
|
end
|
|
1912
2192
|
if @time_to_live == 0
|
|
1913
|
-
@time_to_live =
|
|
2193
|
+
@time_to_live = self.configurations[:default_ttl].to_i
|
|
1914
2194
|
end
|
|
1915
2195
|
end
|
|
1916
2196
|
end
|
|
1917
2197
|
end
|
|
1918
2198
|
if @time_to_live.nil? || @time_to_live == 0
|
|
1919
2199
|
# Default to one hour
|
|
1920
|
-
@time_to_live =
|
|
2200
|
+
@time_to_live = self.configurations[:default_ttl].to_i
|
|
1921
2201
|
elsif self.configurations[:max_ttl] != nil &&
|
|
1922
2202
|
self.configurations[:max_ttl] != 0 &&
|
|
1923
2203
|
@time_to_live >= self.configurations[:max_ttl].to_i
|
|
@@ -1930,7 +2210,7 @@ module FeedTools
|
|
|
1930
2210
|
# Sets the feed time to live
|
|
1931
2211
|
def time_to_live=(new_time_to_live)
|
|
1932
2212
|
@time_to_live = new_time_to_live.round
|
|
1933
|
-
@time_to_live =
|
|
2213
|
+
@time_to_live = 30.minutes if @time_to_live < 30.minutes
|
|
1934
2214
|
end
|
|
1935
2215
|
|
|
1936
2216
|
# Returns the feed's cloud
|
|
@@ -1966,6 +2246,34 @@ module FeedTools
|
|
|
1966
2246
|
@cloud = new_cloud
|
|
1967
2247
|
end
|
|
1968
2248
|
|
|
2249
|
+
# Returns the feed's text input field
|
|
2250
|
+
def text_input
|
|
2251
|
+
if @text_input.nil?
|
|
2252
|
+
@text_input = FeedTools::TextInput.new
|
|
2253
|
+
text_input_node =
|
|
2254
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
|
|
2255
|
+
unless text_input_node.nil?
|
|
2256
|
+
@text_input.title =
|
|
2257
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
2258
|
+
["title/text()"],
|
|
2259
|
+
:select_result_value => true)
|
|
2260
|
+
@text_input.description =
|
|
2261
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
2262
|
+
["description/text()"],
|
|
2263
|
+
:select_result_value => true)
|
|
2264
|
+
@text_input.link =
|
|
2265
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
2266
|
+
["link/text()"],
|
|
2267
|
+
:select_result_value => true)
|
|
2268
|
+
@text_input.name =
|
|
2269
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
|
2270
|
+
["name/text()"],
|
|
2271
|
+
:select_result_value => true)
|
|
2272
|
+
end
|
|
2273
|
+
end
|
|
2274
|
+
return @text_input
|
|
2275
|
+
end
|
|
2276
|
+
|
|
1969
2277
|
# Returns the feed generator
|
|
1970
2278
|
def generator
|
|
1971
2279
|
if @generator.nil?
|
|
@@ -2096,7 +2404,8 @@ module FeedTools
|
|
|
2096
2404
|
"atom10:entry",
|
|
2097
2405
|
"atom03:entry",
|
|
2098
2406
|
"atom:entry",
|
|
2099
|
-
"entry"
|
|
2407
|
+
"entry",
|
|
2408
|
+
"story"
|
|
2100
2409
|
]),
|
|
2101
2410
|
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
|
2102
2411
|
"rss10:item",
|
|
@@ -2104,7 +2413,8 @@ module FeedTools
|
|
|
2104
2413
|
"rss11:items/item",
|
|
2105
2414
|
"items/rss11:item",
|
|
2106
2415
|
"items/item",
|
|
2107
|
-
"item"
|
|
2416
|
+
"item",
|
|
2417
|
+
"story"
|
|
2108
2418
|
])
|
|
2109
2419
|
])
|
|
2110
2420
|
|
|
@@ -2234,6 +2544,17 @@ module FeedTools
|
|
|
2234
2544
|
def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil,
|
|
2235
2545
|
xml_builder=Builder::XmlMarkup.new(
|
|
2236
2546
|
:indent => 2, :escape_attrs => false))
|
|
2547
|
+
|
|
2548
|
+
if self.find_node("access:restriction/@relationship").to_s == "deny"
|
|
2549
|
+
raise StandardError,
|
|
2550
|
+
"Operation not permitted. This feed denies redistribution."
|
|
2551
|
+
elsif self.find_node("@indexing:index").to_s == "no"
|
|
2552
|
+
raise StandardError,
|
|
2553
|
+
"Operation not permitted. This feed denies redistribution."
|
|
2554
|
+
end
|
|
2555
|
+
|
|
2556
|
+
self.full_parse()
|
|
2557
|
+
|
|
2237
2558
|
xml_builder.instruct! :xml, :version => "1.0",
|
|
2238
2559
|
:encoding => (self.configurations[:output_encoding] or "utf-8")
|
|
2239
2560
|
if feed_type.nil?
|
|
@@ -2392,6 +2713,9 @@ module FeedTools
|
|
|
2392
2713
|
unless self.copyright.blank?
|
|
2393
2714
|
xml_builder.copyright(self.copyright)
|
|
2394
2715
|
end
|
|
2716
|
+
unless self.language.blank?
|
|
2717
|
+
xml_builder.language(self.language)
|
|
2718
|
+
end
|
|
2395
2719
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
|
2396
2720
|
xml_builder.generator(
|
|
2397
2721
|
self.configurations[:generator_href])
|
|
@@ -2467,6 +2791,8 @@ module FeedTools
|
|
|
2467
2791
|
end
|
|
2468
2792
|
elsif self.link != nil
|
|
2469
2793
|
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
|
|
2794
|
+
elsif self.url != nil
|
|
2795
|
+
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.url))
|
|
2470
2796
|
else
|
|
2471
2797
|
raise "Cannot build feed, missing feed unique id."
|
|
2472
2798
|
end
|