feedtools 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +11 -0
- data/lib/feed_tools.rb +2496 -810
- data/lib/feed_tools/vendor/builder.rb +2 -0
- data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
- data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
- data/lib/feed_tools/vendor/htree.rb +97 -0
- data/lib/feed_tools/vendor/htree/container.rb +10 -0
- data/lib/feed_tools/vendor/htree/context.rb +67 -0
- data/lib/feed_tools/vendor/htree/display.rb +27 -0
- data/lib/feed_tools/vendor/htree/doc.rb +149 -0
- data/lib/feed_tools/vendor/htree/elem.rb +262 -0
- data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
- data/lib/feed_tools/vendor/htree/equality.rb +218 -0
- data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
- data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
- data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
- data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
- data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
- data/lib/feed_tools/vendor/htree/loc.rb +367 -0
- data/lib/feed_tools/vendor/htree/modules.rb +48 -0
- data/lib/feed_tools/vendor/htree/name.rb +124 -0
- data/lib/feed_tools/vendor/htree/output.rb +207 -0
- data/lib/feed_tools/vendor/htree/parse.rb +407 -0
- data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
- data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
- data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
- data/lib/feed_tools/vendor/htree/scan.rb +166 -0
- data/lib/feed_tools/vendor/htree/tag.rb +111 -0
- data/lib/feed_tools/vendor/htree/template.rb +909 -0
- data/lib/feed_tools/vendor/htree/text.rb +115 -0
- data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
- data/rakefile +1 -1
- data/test/rss_test.rb +97 -0
- metadata +30 -1
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== FeedTools 0.2.0
|
2
|
+
* more complete support for rss, atom, cdf
|
3
|
+
* modular caching mechanism
|
4
|
+
* lazy parsing
|
5
|
+
* HTML sanitization of possibly dangerous fields
|
6
|
+
* HTML tidy support
|
7
|
+
* support for podcasts and vidlogs
|
8
|
+
* corrected handling of http redirection
|
9
|
+
* made http header information available
|
10
|
+
* file: protocol support
|
11
|
+
* custom parsing can be done using the find_node and find_all_nodes methods
|
1
12
|
== FeedTools 0.1.0
|
2
13
|
* basic support for rss, atom, cdf
|
3
14
|
* basic caching using active record
|
data/lib/feed_tools.rb
CHANGED
@@ -21,987 +21,2669 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
24
|
+
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
25
|
+
ENV['RAILS_ENV'] ||
|
26
|
+
'production' # :nodoc:
|
27
|
+
|
28
|
+
FEED_TOOLS_VERSION = "0.2.0"
|
25
29
|
|
26
30
|
$:.unshift(File.dirname(__FILE__))
|
27
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
32
|
+
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
33
|
+
|
34
|
+
require 'rubygems'
|
35
|
+
require 'active_record'
|
28
36
|
|
29
37
|
begin
|
30
|
-
require '
|
38
|
+
require 'builder'
|
31
39
|
rescue LoadError
|
32
|
-
|
33
|
-
|
40
|
+
# RubyGems version is not available, use included Builder
|
41
|
+
require 'feed_tools/vendor/builder'
|
34
42
|
end
|
35
43
|
|
36
44
|
begin
|
37
|
-
require '
|
38
|
-
require 'builder'
|
45
|
+
require 'tidy'
|
39
46
|
rescue LoadError
|
40
|
-
#
|
41
|
-
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
42
|
-
require 'feed_tools/vendor/builder'
|
47
|
+
# Ignore the error for now.
|
43
48
|
end
|
44
49
|
|
45
|
-
require '
|
46
|
-
|
50
|
+
require 'feed_tools/vendor/htree'
|
51
|
+
|
52
|
+
require 'net/http'
|
53
|
+
require 'net/https'
|
54
|
+
require 'net/ftp'
|
55
|
+
|
47
56
|
require 'rexml/document'
|
48
|
-
|
57
|
+
|
58
|
+
require 'iconv'
|
59
|
+
require 'uri'
|
60
|
+
require 'time'
|
49
61
|
require 'cgi'
|
62
|
+
require 'pp'
|
63
|
+
require 'yaml'
|
50
64
|
|
65
|
+
#= feed_tools.rb
|
66
|
+
#
|
67
|
+
# FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
|
68
|
+
# caching system.
|
69
|
+
#
|
70
|
+
#== Example
|
71
|
+
# slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
72
|
+
# slashdot_feed.title
|
73
|
+
# => "Slashdot"
|
74
|
+
# slashdot_feed.description
|
75
|
+
# => "News for nerds, stuff that matters"
|
76
|
+
# slashdot_feed.link
|
77
|
+
# => "http://slashdot.org/"
|
78
|
+
# slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
|
79
|
+
# => "43,37,28,23,11,3,1"
|
51
80
|
module FeedTools
|
52
|
-
class Feed < ActiveRecord::Base
|
53
|
-
include REXML
|
54
81
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
def
|
70
|
-
#
|
71
|
-
if (url =~ /feed:/) == 0
|
72
|
-
url = url.gsub(/feed:\/\/http:\/\/\//, "http://")
|
73
|
-
url = url.gsub(/feed:\/\/http:\/\//, "http://")
|
74
|
-
url = url.gsub(/feed:http:\/\/\//, "http://")
|
75
|
-
url = url.gsub(/feed:http:\/\//, "http://")
|
76
|
-
url = url.gsub(/feed:\/\/\//, "http://")
|
77
|
-
url = url.gsub(/feed:\/\//, "http://")
|
78
|
-
url = url.gsub(/feed:\//, "http://")
|
79
|
-
url = url.gsub(/feed:/, "http://")
|
80
|
-
end
|
81
|
-
if (url =~ /rss:/) == 0
|
82
|
-
url = url.gsub(/rss:\/\/http:\/\/\//, "http://")
|
83
|
-
url = url.gsub(/rss:\/\/http:\/\//, "http://")
|
84
|
-
url = url.gsub(/rss:http:\/\/\//, "http://")
|
85
|
-
url = url.gsub(/rss:http:\/\//, "http://")
|
86
|
-
url = url.gsub(/rss:\/\/\//, "http://")
|
87
|
-
url = url.gsub(/rss:\/\//, "http://")
|
88
|
-
url = url.gsub(/rss:\//, "http://")
|
89
|
-
url = url.gsub(/rss:/, "http://")
|
90
|
-
end
|
91
|
-
|
92
|
-
feed = nil
|
82
|
+
# The default caching mechanism for the FeedTools module
|
83
|
+
class DatabaseFeedCache < ActiveRecord::Base
|
84
|
+
# Overrides the default table name to use the "feeds" table.
|
85
|
+
def self.table_name() "feeds" end
|
86
|
+
|
87
|
+
# If ActiveRecord is not already connected, attempts to find a configuration file and use
|
88
|
+
# it to open a connection for ActiveRecord.
|
89
|
+
# This method is probably unnecessary for anything but testing and debugging purposes.
|
90
|
+
# In a Rails environment, the connection will already have been established
|
91
|
+
# and this method will simply do nothing.
|
92
|
+
#
|
93
|
+
# This method should not raise any exceptions because it's designed to be run only when
|
94
|
+
# the module is first loaded. If it fails, the user should get an exception when they
|
95
|
+
# try to perform some action that makes use of the caching functionality, and not until.
|
96
|
+
def DatabaseFeedCache.initialize_cache
|
97
|
+
# Establish a connection if we don't already have one
|
93
98
|
begin
|
94
|
-
|
95
|
-
rescue
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
99
|
+
ActiveRecord::Base.connection
|
100
|
+
rescue
|
101
|
+
begin
|
102
|
+
possible_config_files = [
|
103
|
+
"./config/database.yml",
|
104
|
+
"../database.yml",
|
105
|
+
"./database.yml"
|
106
|
+
]
|
107
|
+
database_config_file = nil
|
108
|
+
for file in possible_config_files
|
109
|
+
if File.exists? file
|
110
|
+
database_config_file = file
|
111
|
+
break
|
112
|
+
end
|
113
|
+
end
|
114
|
+
database_config_hash = File.open(database_config_file) do |file|
|
115
|
+
config_hash = YAML::load(file)
|
116
|
+
unless config_hash[FEED_TOOLS_ENV].nil?
|
117
|
+
config_hash = config_hash[FEED_TOOLS_ENV]
|
118
|
+
end
|
119
|
+
config_hash
|
120
|
+
end
|
121
|
+
ActiveRecord::Base.configurations = database_config_hash
|
122
|
+
ActiveRecord::Base.establish_connection(database_config_hash)
|
123
|
+
ActiveRecord::Base.connection
|
124
|
+
rescue
|
100
125
|
end
|
101
|
-
feed = Feed.find_by_url(url)
|
102
126
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
feed.url = url
|
108
|
-
feed.load_remote_feed
|
127
|
+
# Verify that the necessary database tables are in place
|
128
|
+
# and if they're missing, create them
|
129
|
+
unless DatabaseFeedCache.table_exists?
|
130
|
+
DatabaseFeedCache.create_table
|
109
131
|
end
|
110
|
-
return
|
111
|
-
end
|
112
|
-
|
113
|
-
# Checks if the feed has expired and updates if it has
|
114
|
-
def update_if_needed
|
115
|
-
if expired?
|
116
|
-
load_remote_feed
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
# Verifies that the table structure exists
|
121
|
-
def Feed.cache_exists?
|
122
|
-
return Feed.table_exists? && FeedItem.table_exists?
|
132
|
+
return nil
|
123
133
|
end
|
124
|
-
|
125
|
-
#
|
126
|
-
def
|
134
|
+
|
135
|
+
# True if the appropriate database table already exists
|
136
|
+
def DatabaseFeedCache.table_exists?
|
127
137
|
begin
|
128
|
-
connection.execute "select id, url,
|
129
|
-
"
|
138
|
+
ActiveRecord::Base.connection.execute "select id, url, title, " +
|
139
|
+
"link, xml_data, http_headers, last_retrieved " +
|
140
|
+
"from feeds limit 1"
|
130
141
|
rescue ActiveRecord::StatementInvalid
|
131
142
|
return false
|
143
|
+
rescue
|
144
|
+
return false
|
132
145
|
end
|
133
146
|
return true
|
134
147
|
end
|
135
|
-
|
136
|
-
#
|
137
|
-
def
|
138
|
-
unless
|
139
|
-
feed_items_mysql = <<-SQL_END
|
140
|
-
CREATE TABLE `feed_items` (
|
141
|
-
`id` int(6) unsigned NOT NULL auto_increment,
|
142
|
-
`feed_id` int(6) unsigned NOT NULL default '0',
|
143
|
-
`link` varchar(255) default NULL,
|
144
|
-
`title` varchar(255) default NULL,
|
145
|
-
`author` varchar(255) default NULL,
|
146
|
-
`description` text default NULL,
|
147
|
-
`time` datetime NOT NULL default '0000-00-00 00:00:00',
|
148
|
-
`tags` varchar(255) default NULL,
|
149
|
-
PRIMARY KEY (`id`)
|
150
|
-
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
151
|
-
SQL_END
|
152
|
-
feed_items_sqlite = <<-SQL_END
|
153
|
-
CREATE TABLE 'feed_items' (
|
154
|
-
'id' INTEGER PRIMARY KEY NOT NULL,
|
155
|
-
'feed_id' INTEGER NOT NULL,
|
156
|
-
'link' VARCHAR(255) DEFAULT NULL,
|
157
|
-
'title' VARCHAR(255) DEFAULT NULL,
|
158
|
-
'author' VARCHAR(255) DEFAULT NULL,
|
159
|
-
'description' TEXT DEFAULT NULL,
|
160
|
-
'time' DATETIME DEFAULT NULL,
|
161
|
-
'tags' VARCHAR(255) DEFAULT NULL
|
162
|
-
);
|
163
|
-
SQL_END
|
164
|
-
feed_items_psql = <<-SQL_END
|
165
|
-
CREATE TABLE feed_items (
|
166
|
-
id SERIAL PRIMARY KEY NOT NULL,
|
167
|
-
feed_id int REFERENCES feeds,
|
168
|
-
link varchar(255) default NULL,
|
169
|
-
title varchar(255) default NULL,
|
170
|
-
author varchar(255) default NULL,
|
171
|
-
description text default NULL,
|
172
|
-
time datetime default NULL,
|
173
|
-
tags varchar(255) default NULL
|
174
|
-
);
|
175
|
-
SQL_END
|
176
|
-
unless FeedItem.table_exists?
|
177
|
-
table_creation_sql = nil
|
178
|
-
if configurations["adapter"] == "mysql"
|
179
|
-
table_creation_sql = feed_items_mysql
|
180
|
-
elsif configurations["adapter"] == "sqlite"
|
181
|
-
table_creation_sql = feed_items_sqlite
|
182
|
-
elsif configurations["adapter"] == "postgresql"
|
183
|
-
table_creation_sql = feeds_psql
|
184
|
-
end
|
185
|
-
if table_creation_sql.nil?
|
186
|
-
raise "Could not build feed_items table."
|
187
|
-
else
|
188
|
-
connection.execute table_creation_sql
|
189
|
-
end
|
190
|
-
end
|
148
|
+
|
149
|
+
# Creates the appropriate database table
|
150
|
+
def DatabaseFeedCache.create_table
|
151
|
+
unless DatabaseFeedCache.table_exists?
|
191
152
|
feeds_mysql = <<-SQL_END
|
192
153
|
CREATE TABLE `feeds` (
|
193
|
-
`id`
|
194
|
-
`url`
|
195
|
-
`
|
196
|
-
`
|
197
|
-
`
|
198
|
-
`
|
199
|
-
`
|
200
|
-
`last_updated` datetime default NULL,
|
201
|
-
`etag` varchar(255) default NULL,
|
202
|
-
`time_to_live` int(4) default NULL,
|
154
|
+
`id` int(10) unsigned NOT NULL auto_increment,
|
155
|
+
`url` varchar(255) default NULL,
|
156
|
+
`title` varchar(255) default NULL,
|
157
|
+
`link` varchar(255) default NULL,
|
158
|
+
`xml_data` longtext default NULL,
|
159
|
+
`http_headers` text default NULL,
|
160
|
+
`last_retrieved` datetime default NULL,
|
203
161
|
PRIMARY KEY (`id`)
|
204
162
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
205
163
|
SQL_END
|
206
164
|
feeds_sqlite = <<-SQL_END
|
207
165
|
CREATE TABLE 'feeds' (
|
208
|
-
'id'
|
209
|
-
'url'
|
210
|
-
'
|
211
|
-
'
|
212
|
-
'
|
213
|
-
'
|
214
|
-
'
|
215
|
-
'
|
216
|
-
'etag' VARCHAR(255) DEFAULT NULL,
|
217
|
-
'time_to_live' INTEGER DEFAULT NULL
|
166
|
+
'id' INTEGER PRIMARY KEY NOT NULL,
|
167
|
+
'url' VARCHAR(255) DEFAULT NULL,
|
168
|
+
'title' VARCHAR(255) DEFAULT NULL,
|
169
|
+
'link' VARCHAR(255) DEFAULT NULL,
|
170
|
+
'image_link' VARCHAR(255) DEFAULT NULL,
|
171
|
+
'xml_data' TEXT DEFAULT NULL,
|
172
|
+
'http_headers' TEXT DEFAULT NULL,
|
173
|
+
'last_retrieved' DATETIME DEFAULT NULL,
|
218
174
|
);
|
219
175
|
SQL_END
|
220
176
|
feeds_psql = <<-SQL_END
|
221
177
|
CREATE TABLE feeds (
|
222
|
-
id
|
223
|
-
url
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
last_updated datetime default NULL,
|
230
|
-
etag varchar(255) default NULL,
|
231
|
-
time_to_live int default NULL
|
178
|
+
id SERIAL PRIMARY KEY NOT NULL,
|
179
|
+
url varchar(255) default NULL,
|
180
|
+
title varchar(255) default NULL,
|
181
|
+
link varchar(255) default NULL,
|
182
|
+
xml_data text default NULL,
|
183
|
+
http_headers text default NULL,
|
184
|
+
last_retrieved datetime default NULL,
|
232
185
|
);
|
233
186
|
SQL_END
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
connection.execute table_creation_sql
|
247
|
-
end
|
187
|
+
table_creation_sql = nil
|
188
|
+
if configurations["adapter"] == "mysql"
|
189
|
+
table_creation_sql = feeds_mysql
|
190
|
+
elsif configurations["adapter"] == "sqlite"
|
191
|
+
table_creation_sql = feeds_sqlite
|
192
|
+
elsif configurations["adapter"] == "postgresql"
|
193
|
+
table_creation_sql = feeds_psql
|
194
|
+
end
|
195
|
+
if table_creation_sql.nil?
|
196
|
+
raise "Could not build feed_items table."
|
197
|
+
else
|
198
|
+
connection.execute table_creation_sql
|
248
199
|
end
|
249
200
|
end
|
250
201
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
202
|
+
end
|
203
|
+
|
204
|
+
# Error raised when a feed cannot be retrieved
|
205
|
+
class FeedAccessError < StandardError
|
206
|
+
end
|
207
|
+
|
208
|
+
# Quick method of enabling small classes to have their attributes
|
209
|
+
# accessible as a dictionary.
|
210
|
+
module AttributeDictionary # :nodoc:
|
211
|
+
# Access the attributes as a dictionary
|
212
|
+
def [](key)
|
213
|
+
# Assignment, and destructive methods should not be
|
214
|
+
# accessed like this.
|
215
|
+
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
|
216
|
+
return nil unless self.method(key).arity == 0
|
217
|
+
return self.send(key)
|
258
218
|
end
|
259
|
-
|
260
|
-
#
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
219
|
+
|
220
|
+
# Access the attributes as a dictionary
|
221
|
+
def []=(key, value)
|
222
|
+
# Assignment, and destructive methods should not be
|
223
|
+
# accessed like this.
|
224
|
+
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
|
225
|
+
return nil unless self.method(key + "=").arity == 1
|
226
|
+
return self.send(key + "=", value)
|
265
227
|
end
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
228
|
+
end
|
229
|
+
|
230
|
+
@feed_cache = DatabaseFeedCache
|
231
|
+
@user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
|
232
|
+
"+http://www.sporkmonger.com/projects/feedtools/"
|
233
|
+
|
234
|
+
# Returns the current caching mechanism.
|
235
|
+
def FeedTools.feed_cache
|
236
|
+
return @feed_cache
|
237
|
+
end
|
238
|
+
|
239
|
+
# Sets the current caching mechanism. If set to nil, disables caching.
|
240
|
+
# Default is the DatabaseFeedCache class.
|
241
|
+
#
|
242
|
+
# Objects of this class must accept the following messages:
|
243
|
+
# url
|
244
|
+
# url=
|
245
|
+
# title
|
246
|
+
# title=
|
247
|
+
# link
|
248
|
+
# link=
|
249
|
+
# xml_data
|
250
|
+
# xml_data=
|
251
|
+
# etag
|
252
|
+
# etag=
|
253
|
+
# last_modified
|
254
|
+
# last_modified=
|
255
|
+
# save
|
256
|
+
#
|
257
|
+
# Additionally, the class itself must accept the following messages:
|
258
|
+
# find_by_id
|
259
|
+
# find_by_url
|
260
|
+
# initialize_cache
|
261
|
+
def FeedTools.feed_cache=(new_feed_cache)
|
262
|
+
# TODO: ensure that the feed cache class actually does those things.
|
263
|
+
# ==================================================================
|
264
|
+
@feed_cache = new_feed_cache
|
265
|
+
end
|
266
|
+
|
267
|
+
# Returns the currently used user agent string.
|
268
|
+
def FeedTools.user_agent
|
269
|
+
return @user_agent
|
270
|
+
end
|
271
|
+
|
272
|
+
# Sets the user agent string to send in the http headers.
|
273
|
+
def FeedTools.user_agent=(new_user_agent)
|
274
|
+
@user_agent = new_user_agent
|
275
|
+
end
|
276
|
+
|
277
|
+
# Returns true if the html tidy module can be used.
|
278
|
+
#
|
279
|
+
# Obviously, you need the tidy gem installed in order to run with html
|
280
|
+
# tidy features turned on.
|
281
|
+
#
|
282
|
+
# This method does a fairly complicated, and probably unnecessarily
|
283
|
+
# desperate search for the libtidy library. If you want this thing to
|
284
|
+
# execute fast, the best thing to do is to set Tidy.path ahead of time.
|
285
|
+
# If Tidy.path is set, this method doesn't do much. If it's not set,
|
286
|
+
# it will do it's darnedest to find the libtidy library. If you set
|
287
|
+
# the LIBTIDYPATH environment variable to the libtidy library, it should
|
288
|
+
# be able to find it.
|
289
|
+
#
|
290
|
+
# Once the library is located, this method will run much faster.
|
291
|
+
def FeedTools.tidy_enabled?
|
292
|
+
# This is an override variable to keep tidy from being used even if it
|
293
|
+
# is available.
|
294
|
+
if @force_tidy_enabled == false
|
295
|
+
return false
|
271
296
|
end
|
272
|
-
|
273
|
-
|
274
|
-
# it to open a connection for ActiveRecord.
|
275
|
-
# This method is probably unnecessary for anything but testing and debugging purposes.
|
276
|
-
def Feed.prepare_connection
|
297
|
+
if @tidy_enabled.nil? || @tidy_enabled == false
|
298
|
+
@tidy_enabled = false
|
277
299
|
begin
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
300
|
+
require 'tidy'
|
301
|
+
if Tidy.path.nil?
|
302
|
+
# *Shrug*, just brute force it, I guess. There's a lot of places
|
303
|
+
# this thing might be hiding in, depending on platform and general
|
304
|
+
# sanity of the person who installed the thing. Most of these are
|
305
|
+
# probably unlikely, but it's not like checking unlikely locations
|
306
|
+
# hurts. Much. Especially if you actually find it.
|
307
|
+
libtidy_locations = [
|
308
|
+
'/usr/local/lib/libtidy.dylib',
|
309
|
+
'/opt/local/lib/libtidy.dylib',
|
310
|
+
'/usr/lib/libtidy.dylib',
|
311
|
+
'/usr/local/lib/tidylib.dylib',
|
312
|
+
'/opt/local/lib/tidylib.dylib',
|
313
|
+
'/usr/lib/tidylib.dylib',
|
314
|
+
'/usr/local/lib/tidy.dylib',
|
315
|
+
'/opt/local/lib/tidy.dylib',
|
316
|
+
'/usr/lib/tidy.dylib',
|
317
|
+
'/usr/local/lib/libtidy.so',
|
318
|
+
'/opt/local/lib/libtidy.so',
|
319
|
+
'/usr/lib/libtidy.so',
|
320
|
+
'/usr/local/lib/tidylib.so',
|
321
|
+
'/opt/local/lib/tidylib.so',
|
322
|
+
'/usr/lib/tidylib.so',
|
323
|
+
'/usr/local/lib/tidy.so',
|
324
|
+
'/opt/local/lib/tidy.so',
|
325
|
+
'/usr/lib/tidy.so',
|
326
|
+
'C:\Program Files\Tidy\tidy.dll',
|
327
|
+
'C:\Tidy\tidy.dll',
|
328
|
+
'/usr/local/lib',
|
329
|
+
'/opt/local/lib',
|
330
|
+
'/usr/lib'
|
331
|
+
]
|
332
|
+
# We just made this thing up, but if someone sets it, we'll
|
333
|
+
# go ahead and check it
|
334
|
+
unless ENV['LIBTIDYPATH'].nil?
|
335
|
+
libtidy_locations =
|
336
|
+
libtidy_locations.reverse.push(ENV['LIBTIDYPATH'])
|
337
|
+
end
|
338
|
+
for path in libtidy_locations
|
339
|
+
if File.exists? path
|
340
|
+
if File.ftype(path) == "file"
|
341
|
+
Tidy.path = path
|
342
|
+
@tidy_enabled = true
|
343
|
+
break
|
344
|
+
elsif File.ftype(path) == "directory"
|
345
|
+
# Ok, now perhaps we're getting a bit more desperate
|
346
|
+
lib_paths =
|
347
|
+
`find #{path} -name '*tidy*' | grep '\\.\\(so\\|dylib\\)$'`
|
348
|
+
# If there's more than one, grab the first one and
|
349
|
+
# hope for the best, and if it doesn't work, then blame the
|
350
|
+
# user for not specifying more accurately.
|
351
|
+
tidy_path = lib_paths.split("\n").first
|
352
|
+
unless tidy_path.nil?
|
353
|
+
Tidy.path = tidy_path
|
354
|
+
@tidy_enabled = true
|
355
|
+
break
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
# Still couldn't find it.
|
361
|
+
unless @tidy_enabled
|
362
|
+
@tidy_enabled = false
|
363
|
+
end
|
364
|
+
else
|
365
|
+
@tidy_enabled = true
|
366
|
+
end
|
367
|
+
rescue LoadError
|
368
|
+
# Tidy not installed, disable features that rely on tidy.
|
369
|
+
@tidy_enabled = false
|
301
370
|
end
|
302
371
|
end
|
303
|
-
|
304
|
-
|
305
|
-
|
372
|
+
return @tidy_enabled
|
373
|
+
end
|
374
|
+
|
375
|
+
# Turns html tidy support on or off. Be aware, that setting this to true
|
376
|
+
# does not mean tidy will be enabled. It simply means that tidy will be
|
377
|
+
# enabled if it is available to be enabled.
|
378
|
+
def FeedTools.tidy_enabled=(new_tidy_enabled)
|
379
|
+
@force_tidy_enabled = new_tidy_enabled
|
380
|
+
end
|
381
|
+
|
382
|
+
# Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls
|
383
|
+
# and makes every effort to figure out what it was supposed to be. Also translates from
|
384
|
+
# the feed: and rss: pseudo-protocols to the http: protocol.
|
385
|
+
def FeedTools.normalize_url(url)
|
386
|
+
if url.nil? || url == ""
|
387
|
+
return nil
|
306
388
|
end
|
389
|
+
normalized_url = url
|
307
390
|
|
308
|
-
|
309
|
-
|
391
|
+
# if a url begins with the '/' character, it only makes sense that they
|
392
|
+
# meant to be using a file:// url. Fix it for them.
|
393
|
+
if normalized_url.length > 0 && normalized_url[0..0] == "/"
|
394
|
+
normalized_url = "file://" + normalized_url
|
310
395
|
end
|
311
396
|
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
|
316
|
-
|
317
|
-
return (self["language"] or "en-US")
|
318
|
-
rescue
|
319
|
-
return "en-US"
|
320
|
-
end
|
397
|
+
# if a url begins with javascript:, it's quite possibly an attempt at
|
398
|
+
# doing something malicious. Let's keep that from getting anywhere,
|
399
|
+
# shall we?
|
400
|
+
if (normalized_url.downcase =~ /javascript:/) != nil
|
401
|
+
return "#"
|
321
402
|
end
|
322
403
|
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
404
|
+
# deal with all of the many ugly possibilities involved in the rss:
|
405
|
+
# and feed: pseudo-protocols (incidentally, whose crazy idea was this
|
406
|
+
# mess?)
|
407
|
+
normalized_url.gsub!(/^http:\/*(feed:\/*)?/, "http://")
|
408
|
+
normalized_url.gsub!(/^http:\/*(rss:\/*)?/, "http://")
|
409
|
+
normalized_url.gsub!(/^feed:\/*(http:\/*)?/, "http://")
|
410
|
+
normalized_url.gsub!(/^rss:\/*(http:\/*)?/, "http://")
|
411
|
+
normalized_url.gsub!(/^file:\/*/, "file:///")
|
412
|
+
normalized_url.gsub!(/^https:\/*/, "https://")
|
413
|
+
# fix (very) bad urls (usually of the user-entered sort)
|
414
|
+
normalized_url.gsub!(/^http:\/*(http:\/*)*/, "http://")
|
415
|
+
if (normalized_url =~ /^file:/) == 0
|
416
|
+
# fix bad Windows-based entries
|
417
|
+
normalized_url.gsub!(/file:\/\/\/([a-zA-Z]):/, 'file:///\1|')
|
330
418
|
|
331
|
-
|
332
|
-
|
419
|
+
# maybe this is too aggressive?
|
420
|
+
normalized_url.gsub!(/\\/, '/')
|
421
|
+
return normalized_url
|
422
|
+
else
|
423
|
+
if (normalized_url =~ /https?:\/\//) == nil
|
424
|
+
normalized_url = "http://" + normalized_url
|
425
|
+
end
|
426
|
+
if normalized_url == "http://"
|
427
|
+
return nil
|
428
|
+
end
|
429
|
+
begin
|
430
|
+
feed_uri = URI.parse(normalized_url)
|
431
|
+
if feed_uri.scheme == nil
|
432
|
+
feed_uri.scheme = "http"
|
433
|
+
end
|
434
|
+
if feed_uri.path == nil || feed_uri.path == ""
|
435
|
+
feed_uri.path = "/"
|
436
|
+
end
|
437
|
+
if (feed_uri.path =~ /^[\/]+/) == 0
|
438
|
+
feed_uri.path.gsub!(/^[\/]+/, "/")
|
439
|
+
end
|
440
|
+
return feed_uri.to_s
|
441
|
+
rescue URI::InvalidURIError
|
442
|
+
return normalized_url
|
443
|
+
end
|
333
444
|
end
|
445
|
+
end
|
334
446
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
447
|
+
# Returns true if the parameter appears to be a valid url
|
448
|
+
def FeedTools.is_url?(url)
|
449
|
+
return false if url.nil?
|
450
|
+
begin
|
451
|
+
uri = URI.parse(url)
|
452
|
+
rescue URI::InvalidURIError
|
453
|
+
return false
|
341
454
|
end
|
455
|
+
return true
|
456
|
+
end
|
342
457
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
458
|
+
# Removes all html tags from the html formatted text.
|
459
|
+
def FeedTools.strip_html(html)
|
460
|
+
# TODO: do this properly
|
461
|
+
# ======================
|
462
|
+
stripped_html = html.gsub(/<\/?[^>]+>/, "")
|
463
|
+
return stripped_html
|
464
|
+
end
|
348
465
|
|
349
|
-
|
350
|
-
|
466
|
+
# Tidys up the html
|
467
|
+
def FeedTools.tidy_html(html)
|
468
|
+
if FeedTools.tidy_enabled?
|
469
|
+
is_fragment = true
|
470
|
+
if (html.strip =~ /<html>(.|\n)*<body>/) != nil ||
|
471
|
+
(html.strip =~ /<\/body>(.|\n)*<\/html>$/) != nil
|
472
|
+
is_fragment = false
|
473
|
+
end
|
474
|
+
if (html.strip =~ /<\?xml(.|\n)*\?>/) != nil
|
475
|
+
is_fragment = false
|
476
|
+
end
|
477
|
+
tidy_html = Tidy.open(:show_warnings=>false) do |tidy|
|
478
|
+
tidy.options.output_xml = true
|
479
|
+
tidy.options.indent = false
|
480
|
+
tidy.options.wrap_attributes = true
|
481
|
+
tidy.options.logical_emphasis = true
|
482
|
+
tidy.options.doctype = "omit"
|
483
|
+
xml = tidy.clean(html)
|
484
|
+
xml
|
485
|
+
end
|
486
|
+
if is_fragment
|
487
|
+
# Tidy puts <html>...<body>[our html]</body>...</html> in.
|
488
|
+
# We don't want this.
|
489
|
+
tidy_html.strip!
|
490
|
+
tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
|
491
|
+
tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
|
492
|
+
tidy_html.strip!
|
493
|
+
end
|
494
|
+
else
|
495
|
+
tidy_html = html
|
351
496
|
end
|
497
|
+
return tidy_html
|
498
|
+
end
|
352
499
|
|
353
|
-
|
354
|
-
|
355
|
-
|
500
|
+
# Removes all dangerous html tags from the html formatted text.
|
501
|
+
# If mode is set to :escape, dangerous and unknown elements will
|
502
|
+
# be escaped. If mode is set to :strip, dangerous and unknown
|
503
|
+
# elements and all children will be removed entirely.
|
504
|
+
# Dangerous or unknown attributes are always removed.
|
505
|
+
def FeedTools.sanitize_html(html, mode=:escape)
|
506
|
+
|
507
|
+
# Lists borrowed from Mark Pilgrim's feedparser
|
508
|
+
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
509
|
+
'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
|
510
|
+
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
|
511
|
+
'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4',
|
512
|
+
'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend',
|
513
|
+
'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's',
|
514
|
+
'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
|
515
|
+
'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt',
|
516
|
+
'u', 'ul', 'var']
|
356
517
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
518
|
+
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
519
|
+
'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
|
520
|
+
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
|
521
|
+
'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
|
522
|
+
'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
|
523
|
+
'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
|
524
|
+
'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
|
525
|
+
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
|
526
|
+
'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
|
527
|
+
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
528
|
+
'type', 'usemap', 'valign', 'value', 'vspace', 'width']
|
364
529
|
|
365
|
-
#
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
530
|
+
# Stupid hack to pass this unit test:
|
531
|
+
# http://feedparser.org/tests/wellformed/rss/
|
532
|
+
# item_description_not_a_doctype.xml
|
533
|
+
html.gsub!(/<!'/, "<!'")
|
534
|
+
|
535
|
+
# The closer we are to proper xhtml, the more accurate the
|
536
|
+
# sanitization will be.
|
537
|
+
html = FeedTools.tidy_html(html)
|
538
|
+
|
539
|
+
# Hackity hack. But it works, and it seems plenty fast enough.
|
540
|
+
html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
|
541
|
+
|
542
|
+
sanitize_node = lambda do |html_node|
|
543
|
+
if html_node.respond_to? :children
|
544
|
+
for child in html_node.children
|
545
|
+
if child.kind_of? REXML::Element
|
546
|
+
unless acceptable_elements.include? child.name
|
547
|
+
if mode == :strip
|
548
|
+
html_node.delete_element(child)
|
549
|
+
else
|
550
|
+
new_child = REXML::Text.new(CGI.escapeHTML(child.to_s))
|
551
|
+
html_node.insert_after(child, new_child)
|
552
|
+
html_node.delete_element(child)
|
553
|
+
end
|
554
|
+
end
|
555
|
+
for attribute in child.attributes.keys
|
556
|
+
unless acceptable_attributes.include? attribute
|
557
|
+
child.delete_attribute(attribute)
|
558
|
+
end
|
559
|
+
end
|
560
|
+
end
|
561
|
+
sanitize_node.call(child)
|
379
562
|
end
|
380
563
|
end
|
564
|
+
html_node
|
381
565
|
end
|
566
|
+
sanitize_node.call(html_doc.root)
|
567
|
+
return html_doc.root.inner_xml
|
568
|
+
end
|
569
|
+
|
570
|
+
class Feed
|
571
|
+
include REXML
|
572
|
+
include AttributeDictionary
|
382
573
|
|
383
|
-
#
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
574
|
+
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
|
575
|
+
def Feed.open(url)
|
576
|
+
# clean up the url
|
577
|
+
url = FeedTools.normalize_url(url)
|
578
|
+
|
579
|
+
# create and load the new feed
|
580
|
+
feed = Feed.new
|
581
|
+
feed.url = url
|
582
|
+
feed.update
|
583
|
+
return feed
|
584
|
+
end
|
585
|
+
|
586
|
+
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
587
|
+
# retrieved from the cache for some reason.
|
588
|
+
def update
|
589
|
+
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
590
|
+
!(self.cache_object.http_headers.nil?)
|
591
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
592
|
+
end
|
593
|
+
if expired?
|
594
|
+
load_remote_feed
|
398
595
|
else
|
399
|
-
|
400
|
-
etag = http.meta['etag']
|
401
|
-
parse_feed(http.read)
|
402
|
-
end
|
596
|
+
@live = false
|
403
597
|
end
|
404
598
|
end
|
405
|
-
|
406
|
-
def parse_feed_hook(feed_data)
|
407
|
-
return nil
|
408
|
-
end
|
409
|
-
|
410
|
-
def parse_feed(feed_data)
|
411
|
-
root_node = Document.new(feed_data).root
|
412
|
-
metadata_node = XPath.first(root_node, "channel")
|
413
|
-
if metadata_node == nil
|
414
|
-
metadata_node = root_node
|
415
|
-
end
|
416
599
|
|
417
|
-
|
418
|
-
|
600
|
+
# Attempts to load the feed from the remote location. Requires the url
|
601
|
+
# field to be set. If an etag or the last_modified date has been set,
|
602
|
+
# attempts to use them to prevent unnecessary reloading of identical
|
603
|
+
# content.
|
604
|
+
def load_remote_feed
|
605
|
+
@live = true
|
606
|
+
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
607
|
+
!(self.cache_object.http_headers.nil?)
|
608
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
609
|
+
end
|
419
610
|
|
420
|
-
|
421
|
-
|
422
|
-
|
611
|
+
if (self.url =~ /^feed:/) == 0
|
612
|
+
# Woah, Nelly, how'd that happen? You should've already been
|
613
|
+
# corrected. So let's fix that url. And please,
|
614
|
+
# just use less crappy browsers instead of badly defined
|
615
|
+
# pseudo-protocol hacks.
|
616
|
+
self.url = FeedTools.normalize_url(self.url)
|
423
617
|
end
|
424
|
-
|
425
|
-
#
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
# files without any unnecessary methods.) But given that this only exists to support
|
442
|
-
# CDF files, it's not a big deal. It's not like CDF files really exist in the wild.
|
443
|
-
# (The assumption this ordering makes is that the 'base' attribute points to a valid
|
444
|
-
# location, hopefully the same as the 'href' location. Chances are pretty good that this
|
445
|
-
# is true.)
|
446
|
-
link = XPath.first(metadata_node, "@base").to_s
|
447
|
-
end
|
448
|
-
if link == ""
|
449
|
-
link = XPath.first(metadata_node, "@href").to_s
|
618
|
+
|
619
|
+
# Find out what method we're going to be using to obtain this feed.
|
620
|
+
uri = URI.parse(self.url)
|
621
|
+
retrieval_method = "http"
|
622
|
+
case uri.scheme
|
623
|
+
when "http"
|
624
|
+
retrieval_method = "http"
|
625
|
+
when "ftp"
|
626
|
+
retrieval_method = "ftp"
|
627
|
+
when "file"
|
628
|
+
retrieval_method = "file"
|
629
|
+
when nil
|
630
|
+
raise FeedAccessError,
|
631
|
+
"No protocol was specified in the url."
|
632
|
+
else
|
633
|
+
raise FeedAccessError,
|
634
|
+
"Cannot retrieve feed using unrecognized protocol: " + uri.scheme
|
450
635
|
end
|
451
636
|
|
452
|
-
#
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
637
|
+
# No need for http headers unless we're actually doing http
|
638
|
+
if retrieval_method == "http"
|
639
|
+
# Set up the appropriate http headers
|
640
|
+
headers = {}
|
641
|
+
unless self.http_headers.nil?
|
642
|
+
headers["If-None-Match"] =
|
643
|
+
self.http_headers['etag'] unless self.http_headers['etag'].nil?
|
644
|
+
headers["If-Modified-Since"] =
|
645
|
+
self.http_headers['last-modified'] unless
|
646
|
+
self.http_headers['last-modified'].nil?
|
647
|
+
end
|
648
|
+
headers["User-Agent"] =
|
649
|
+
FeedTools.user_agent unless FeedTools.user_agent.nil?
|
650
|
+
|
651
|
+
# The http feed access method
|
652
|
+
def http_fetch(feed_url, http_headers, redirect_limit = 10,
|
653
|
+
response_chain = []) # :nodoc:
|
654
|
+
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
655
|
+
feed_uri = nil
|
656
|
+
begin
|
657
|
+
feed_uri = URI.parse(feed_url)
|
658
|
+
rescue URI::InvalidURIError
|
659
|
+
# Uh, maybe try to fix it?
|
660
|
+
feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
|
661
|
+
end
|
662
|
+
|
663
|
+
# Borrowed from open-uri:
|
664
|
+
# According to RFC2616 14.23, Host: request-header field should be
|
665
|
+
# set to an origin server.
|
666
|
+
# But net/http wrongly set a proxy server if an absolute URI is
|
667
|
+
# specified as a request URI.
|
668
|
+
# So override it here explicitly.
|
669
|
+
http_headers['Host'] = feed_uri.host
|
670
|
+
http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
|
671
|
+
|
672
|
+
Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
|
673
|
+
response = http.request_get(feed_uri.path, http_headers)
|
674
|
+
|
675
|
+
case response
|
676
|
+
when Net::HTTPSuccess
|
677
|
+
# We've reached the final destination, process all previous
|
678
|
+
# redirections, and see if we need to update the url.
|
679
|
+
for redirected_response in response_chain
|
680
|
+
if redirected_response.last.code.to_i == 301
|
681
|
+
self.url = redirected_response.first
|
682
|
+
else
|
683
|
+
# Jump out as soon as we hit anything that isn't a
|
684
|
+
# permanently moved redirection.
|
685
|
+
break
|
686
|
+
end
|
687
|
+
end
|
688
|
+
return response
|
689
|
+
when Net::HTTPRedirection
|
690
|
+
if response.code.to_i == 304
|
691
|
+
response.error!
|
692
|
+
else
|
693
|
+
if response['Location'].nil?
|
694
|
+
raise FeedAccessError,
|
695
|
+
"No location to redirect to supplied: " + response.code
|
696
|
+
end
|
697
|
+
response_chain << [feed_url, response]
|
698
|
+
new_location = response['location']
|
699
|
+
if response_chain.assoc(new_location) != nil
|
700
|
+
raise FeedAccessError, "Redirection loop detected."
|
701
|
+
end
|
702
|
+
# TODO: deal with stupid people using relative urls
|
703
|
+
# in Location header
|
704
|
+
# =================================================
|
705
|
+
http_fetch(new_location, http_headers,
|
706
|
+
redirect_limit - 1, response_chain)
|
707
|
+
end
|
708
|
+
else
|
709
|
+
response.error!
|
710
|
+
end
|
711
|
+
end
|
712
|
+
end
|
713
|
+
|
714
|
+
begin
|
715
|
+
@http_response = http_fetch(self.url, headers)
|
716
|
+
@http_headers = {}
|
717
|
+
self.http_response.each_header do |header|
|
718
|
+
self.http_headers[header.first.downcase] = header.last
|
719
|
+
end
|
720
|
+
self.last_retrieved = Time.now
|
721
|
+
self.xml_data = self.http_response.body
|
722
|
+
rescue FeedAccessError
|
723
|
+
@live = false
|
724
|
+
if self.xml_data.nil?
|
725
|
+
raise
|
726
|
+
end
|
727
|
+
rescue Timeout::Error
|
728
|
+
# if we time out, do nothing, it should fall back to the xml_data
|
729
|
+
# stored in the cache.
|
730
|
+
@live = false
|
731
|
+
if self.xml_data.nil?
|
732
|
+
raise
|
733
|
+
end
|
734
|
+
rescue Errno::ECONNRESET
|
735
|
+
# if the connection gets reset by peer, oh well, fall back to the
|
736
|
+
# xml_data stored in the cache
|
737
|
+
@live = false
|
738
|
+
if self.xml_data.nil?
|
739
|
+
raise
|
740
|
+
end
|
741
|
+
rescue => error
|
742
|
+
# heck, if anything at all bad happens, fall back to the xml_data
|
743
|
+
# stored in the cache.
|
744
|
+
|
745
|
+
# If we can, get the HTTPResponse...
|
746
|
+
@http_response = nil
|
747
|
+
if error.respond_to?(:each_header)
|
748
|
+
@http_response = error
|
749
|
+
end
|
750
|
+
if error.respond_to?(:response) &&
|
751
|
+
error.response.respond_to?(:each_header)
|
752
|
+
@http_response = error.response
|
753
|
+
end
|
754
|
+
if @http_response != nil
|
755
|
+
@http_headers = {}
|
756
|
+
self.http_response.each_header do |header|
|
757
|
+
self.http_headers[header.first] = header.last
|
758
|
+
end
|
759
|
+
if self.http_response.code.to_i == 304
|
760
|
+
self.last_retrieved = Time.now
|
761
|
+
end
|
762
|
+
end
|
763
|
+
@live = false
|
764
|
+
if self.xml_data.nil?
|
765
|
+
raise
|
766
|
+
end
|
767
|
+
end
|
768
|
+
elsif retrieval_method == "https"
|
769
|
+
# Not supported... yet
|
770
|
+
elsif retrieval_method == "ftp"
|
771
|
+
# Not supported... yet
|
772
|
+
# Technically, CDF feeds are supposed to be able to be accessed directly
|
773
|
+
# from an ftp server. This is silly, but we'll humor Microsoft.
|
774
|
+
#
|
775
|
+
# Eventually.
|
776
|
+
elsif retrieval_method == "file"
|
777
|
+
# Now that we've gone to all that trouble to ensure the url begins
|
778
|
+
# with 'file://', strip the 'file://' off the front of the url.
|
779
|
+
file_name = self.url.gsub(/^file:\/\//, "")
|
780
|
+
begin
|
781
|
+
open(file_name) do |file|
|
782
|
+
@http_response = nil
|
783
|
+
@http_headers = {}
|
784
|
+
self.last_retrieved = Time.now
|
785
|
+
self.xml_data = file.read
|
786
|
+
end
|
787
|
+
rescue
|
788
|
+
@live = false
|
789
|
+
# In this case, pulling from the cache is probably not going
|
790
|
+
# to help at all, and the use should probably be immediately
|
791
|
+
# appraised of the problem. Raise the exception.
|
792
|
+
raise
|
459
793
|
end
|
460
794
|
end
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
795
|
+
unless self.cache_object.nil?
|
796
|
+
begin
|
797
|
+
self.save
|
798
|
+
rescue
|
465
799
|
end
|
466
800
|
end
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
801
|
+
end
|
802
|
+
|
803
|
+
# Returns the relevant information from an http request.
|
804
|
+
def http_response
|
805
|
+
return @http_response
|
806
|
+
end
|
807
|
+
|
808
|
+
# Returns a hash of the http headers from the response.
|
809
|
+
def http_headers
|
810
|
+
return @http_headers
|
811
|
+
end
|
812
|
+
|
813
|
+
# Returns the feed's raw xml data.
|
814
|
+
def xml_data
|
815
|
+
if @xml_data.nil?
|
816
|
+
unless self.cache_object.nil?
|
817
|
+
@xml_data = self.cache_object.xml_data
|
471
818
|
end
|
472
819
|
end
|
473
|
-
|
474
|
-
|
820
|
+
return @xml_data
|
821
|
+
end
|
822
|
+
|
823
|
+
# Sets the feed's xml data.
|
824
|
+
def xml_data=(new_xml_data)
|
825
|
+
@xml_data = new_xml_data
|
826
|
+
unless self.cache_object.nil?
|
827
|
+
self.cache_object.xml_data = new_xml_data
|
475
828
|
end
|
829
|
+
end
|
476
830
|
|
477
|
-
|
478
|
-
|
479
|
-
if
|
480
|
-
|
831
|
+
# Returns a REXML Document of the xml_data
|
832
|
+
def xml
|
833
|
+
if @xml_doc.nil?
|
834
|
+
begin
|
835
|
+
@xml_doc = Document.new(xml_data)
|
836
|
+
rescue
|
837
|
+
# Something failed, attempt to repair the xml with htree.
|
838
|
+
@xml_doc = HTree.parse(xml_data).to_rexml
|
839
|
+
end
|
481
840
|
end
|
482
|
-
|
483
|
-
|
841
|
+
return @xml_doc
|
842
|
+
end
|
843
|
+
|
844
|
+
# Returns the first node within the channel_node that matches the xpath query.
|
845
|
+
def find_node(xpath)
|
846
|
+
return XPath.first(channel_node, xpath)
|
847
|
+
end
|
848
|
+
|
849
|
+
# Returns all nodes within the channel_node that match the xpath query.
|
850
|
+
def find_all_nodes(xpath)
|
851
|
+
return XPath.match(channel_node, xpath)
|
852
|
+
end
|
853
|
+
|
854
|
+
# Returns the root node of the feed.
|
855
|
+
def root_node
|
856
|
+
if @root_node.nil?
|
857
|
+
@root_node = xml.root
|
484
858
|
end
|
485
|
-
|
486
|
-
|
859
|
+
return @root_node
|
860
|
+
end
|
861
|
+
|
862
|
+
# Returns the channel node of the feed.
|
863
|
+
def channel_node
|
864
|
+
if @channel_node.nil?
|
865
|
+
@channel_node = XPath.first(root_node, "channel")
|
866
|
+
if @channel_node == nil
|
867
|
+
@channel_node = XPath.first(root_node, "feedinfo")
|
868
|
+
end
|
869
|
+
if @channel_node == nil
|
870
|
+
@channel_node = root_node
|
871
|
+
end
|
487
872
|
end
|
488
|
-
|
489
|
-
|
873
|
+
return @channel_node
|
874
|
+
end
|
875
|
+
|
876
|
+
# The cache object that handles the feed persistence.
|
877
|
+
def cache_object
|
878
|
+
unless FeedTools.feed_cache.nil?
|
879
|
+
if @cache_object.nil?
|
880
|
+
begin
|
881
|
+
if @id != nil
|
882
|
+
@cache_object = FeedTools.feed_cache.find_by_id(@id)
|
883
|
+
elsif @url != nil
|
884
|
+
@cache_object = FeedTools.feed_cache.find_by_url(@url)
|
885
|
+
end
|
886
|
+
if @cache_object.nil?
|
887
|
+
@cache_object = FeedTools.feed_cache.new
|
888
|
+
end
|
889
|
+
rescue
|
890
|
+
end
|
891
|
+
end
|
490
892
|
end
|
491
|
-
|
492
|
-
|
893
|
+
return @cache_object
|
894
|
+
end
|
895
|
+
|
896
|
+
# Sets the cache object for this feed.
|
897
|
+
#
|
898
|
+
# This can be any object, but it must accept the following messages:
|
899
|
+
# url
|
900
|
+
# url=
|
901
|
+
# title
|
902
|
+
# title=
|
903
|
+
# link
|
904
|
+
# link=
|
905
|
+
# xml_data
|
906
|
+
# xml_data=
|
907
|
+
# etag
|
908
|
+
# etag=
|
909
|
+
# last_modified
|
910
|
+
# last_modified=
|
911
|
+
# save
|
912
|
+
def cache_object=(new_cache_object)
|
913
|
+
@cache_object = new_cache_object
|
914
|
+
end
|
915
|
+
|
916
|
+
# Returns the feed's unique id
|
917
|
+
def id
|
918
|
+
if @id.nil?
|
919
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
920
|
+
if @id == ""
|
921
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
922
|
+
end
|
923
|
+
@id = nil if @id == ""
|
493
924
|
end
|
494
|
-
|
495
|
-
|
925
|
+
return @id
|
926
|
+
end
|
927
|
+
|
928
|
+
# Sets the feed's unique id
|
929
|
+
def id=(new_id)
|
930
|
+
@id = new_id
|
931
|
+
end
|
932
|
+
|
933
|
+
# Returns the feed url.
|
934
|
+
def url
|
935
|
+
if @url.nil? && self.xml_data != nil
|
936
|
+
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
937
|
+
@url = nil if @url == ""
|
496
938
|
end
|
939
|
+
return @url
|
940
|
+
end
|
497
941
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
942
|
+
# Sets the feed url and prepares the cache_object if necessary.
|
943
|
+
def url=(new_url)
|
944
|
+
@url = FeedTools.normalize_url(new_url)
|
945
|
+
self.cache_object.url = new_url unless self.cache_object.nil?
|
946
|
+
end
|
947
|
+
|
948
|
+
# Returns the feed title
|
949
|
+
def title
|
950
|
+
if @title.nil?
|
951
|
+
if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
|
952
|
+
XPath.first(channel_node, "title/@mode").to_s == "xhtml"
|
953
|
+
@title = XPath.first(channel_node, "title").inner_xml
|
954
|
+
elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
|
955
|
+
XPath.first(channel_node, "title/@mode").to_s == "escaped"
|
956
|
+
@title = CGI.unescapeHTML(
|
957
|
+
XPath.first(channel_node, "title/text()").to_s)
|
511
958
|
else
|
512
|
-
|
513
|
-
|
959
|
+
@title = CGI.unescapeHTML(
|
960
|
+
XPath.first(channel_node, "title/text()").to_s)
|
514
961
|
end
|
515
|
-
|
516
|
-
|
517
|
-
# expressed in minutes
|
518
|
-
update_frequency = XPath.first(metadata_node, "ttl/text()").to_s
|
519
|
-
if update_frequency != ""
|
520
|
-
feed_time_to_live = (update_frequency.to_i / 60)
|
962
|
+
unless @title.nil?
|
963
|
+
@title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
|
521
964
|
end
|
965
|
+
if @title != "" && !(@title.nil?)
|
966
|
+
@title = FeedTools.strip_html(@title).strip
|
967
|
+
end
|
968
|
+
@title.gsub!(/\n/, " ")
|
969
|
+
@title = nil if @title == ""
|
970
|
+
self.cache_object.title = @title unless self.cache_object.nil?
|
522
971
|
end
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
972
|
+
return @title
|
973
|
+
end
|
974
|
+
|
975
|
+
# Sets the feed title
|
976
|
+
def title=(new_title)
|
977
|
+
@title = new_title
|
978
|
+
self.cache_object.title = new_title unless self.cache_object.nil?
|
979
|
+
end
|
980
|
+
|
981
|
+
# Returns the feed description
|
982
|
+
def description
|
983
|
+
if @description.nil?
|
984
|
+
# get the feed description from the xml document
|
985
|
+
@description = XPath.first(channel_node, "description/text()").to_s
|
986
|
+
if @description != ""
|
987
|
+
if XPath.first(channel_node, "description/@encoding").to_s != ""
|
988
|
+
@description = "[Embedded data objects are not supported.]"
|
989
|
+
else
|
990
|
+
@description = CGI.unescapeHTML(description)
|
991
|
+
end
|
992
|
+
end
|
993
|
+
if @description == ""
|
994
|
+
@description = XPath.first(channel_node, "subtitle/text()").to_s
|
995
|
+
if @description != "" &&
|
996
|
+
XPath.first(channel_node, "subtitle/@mode").to_s == "escaped"
|
997
|
+
@description = CGI.unescapeHTML(description)
|
998
|
+
end
|
999
|
+
end
|
1000
|
+
if @description == ""
|
1001
|
+
@description = XPath.first(channel_node, "tagline/text()").to_s
|
1002
|
+
if @description != "" &&
|
1003
|
+
XPath.first(channel_node, "tagline/@mode").to_s == "escaped"
|
1004
|
+
@description = CGI.unescapeHTML(description)
|
1005
|
+
end
|
1006
|
+
end
|
1007
|
+
if @description == "" && XPath.first(channel_node, "tagline") == nil
|
1008
|
+
@description = XPath.first(channel_node, "info/text()").to_s
|
1009
|
+
if @description != "" &&
|
1010
|
+
XPath.first(channel_node, "info/@mode").to_s == "escaped"
|
1011
|
+
@description = CGI.unescapeHTML(description)
|
1012
|
+
end
|
1013
|
+
end
|
1014
|
+
if @description == ""
|
1015
|
+
@description = CGI.unescapeHTML(
|
1016
|
+
XPath.first(channel_node, "abstract/text()").to_s)
|
1017
|
+
end
|
1018
|
+
if @description == ""
|
1019
|
+
@description = CGI.unescapeHTML(
|
1020
|
+
XPath.first(channel_node, "summary/text()").to_s)
|
1021
|
+
end
|
1022
|
+
if @description == ""
|
1023
|
+
# I don't think this is valid for anyone to do, but this is probably
|
1024
|
+
# what they meant if they do it.
|
1025
|
+
@description = CGI.unescapeHTML(
|
1026
|
+
XPath.first(channel_node, "content:encoded/text()").to_s)
|
1027
|
+
if @description != ""
|
1028
|
+
@bozo = true
|
1029
|
+
end
|
1030
|
+
end
|
1031
|
+
if @description == ""
|
1032
|
+
begin
|
1033
|
+
@description = XPath.first(channel_node, "description").inner_xml
|
1034
|
+
rescue
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
if @description == ""
|
1038
|
+
@description = self.itunes_summary
|
1039
|
+
@description = "" if @description.nil?
|
1040
|
+
end
|
1041
|
+
if @description == ""
|
1042
|
+
@description = self.itunes_subtitle
|
1043
|
+
@description = "" if @description.nil?
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
@description =
|
1047
|
+
FeedTools.sanitize_html(@description) unless @description.nil?
|
1048
|
+
# If it started with a bunch of divs, hack them right off. We can put
|
1049
|
+
# them back later if they're needed.
|
1050
|
+
@description.gsub!(/^(<div[^>]*>)*/, "")
|
1051
|
+
@description.gsub!(/(<\/div>)*$/, "")
|
1052
|
+
|
1053
|
+
@description.gsub!(/\n/, " ") if @description.size < 80
|
1054
|
+
@description = @description.strip unless @description.nil?
|
1055
|
+
@description = nil if @description == ""
|
1056
|
+
end
|
1057
|
+
return @description
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
# Sets the feed description
|
1061
|
+
def description=(new_description)
|
1062
|
+
@description = new_description
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
# Returns the contents of the itunes:summary element
|
1066
|
+
def itunes_summary
|
1067
|
+
if @itunes_summary.nil?
|
1068
|
+
@itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
|
1069
|
+
"itunes:summary/text()").to_s)
|
1070
|
+
if @itunes_summary == ""
|
1071
|
+
@itunes_summary = nil
|
1072
|
+
end
|
1073
|
+
@itunes_summary =
|
1074
|
+
FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
|
1075
|
+
end
|
1076
|
+
return @itunes_summary
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
# Sets the contents of the itunes:summary element
|
1080
|
+
def itunes_summary=(new_itunes_summary)
|
1081
|
+
@itunes_summary = new_itunes_summary
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
# Returns the contents of the itunes:subtitle element
|
1085
|
+
def itunes_subtitle
|
1086
|
+
if @itunes_subtitle.nil?
|
1087
|
+
@itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
|
1088
|
+
"itunes:subtitle/text()").to_s)
|
1089
|
+
if @itunes_subtitle == ""
|
1090
|
+
@itunes_subtitle = nil
|
1091
|
+
end
|
1092
|
+
unless @itunes_subtitle.nil?
|
1093
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
1094
|
+
end
|
1095
|
+
end
|
1096
|
+
return @itunes_subtitle
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
# Sets the contents of the itunes:subtitle element
|
1100
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
1101
|
+
@itunes_subtitle = new_itunes_subtitle
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
# Returns the feed link
|
1105
|
+
def link
|
1106
|
+
if @link.nil?
|
1107
|
+
# get the feed link from the xml document
|
1108
|
+
@link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
1109
|
+
if @link == ""
|
1110
|
+
@link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
|
1111
|
+
end
|
1112
|
+
if @link == ""
|
1113
|
+
@link = XPath.first(channel_node, "link/@href").to_s
|
1114
|
+
end
|
1115
|
+
if @link == ""
|
1116
|
+
@link = XPath.first(channel_node, "link/text()").to_s
|
1117
|
+
end
|
1118
|
+
if @link == ""
|
1119
|
+
@link = XPath.first(channel_node, "@href").to_s
|
1120
|
+
end
|
1121
|
+
if @link == ""
|
1122
|
+
if FeedTools.is_url? self.guid
|
1123
|
+
@link = self.guid
|
1124
|
+
end
|
1125
|
+
end
|
1126
|
+
if @link == ""
|
1127
|
+
# Technically, we shouldn't use the base attribute for this, but if the href attribute
|
1128
|
+
# is missing, it's already a given that we're looking at a messed up CDF file. We can
|
1129
|
+
# always pray it's correct.
|
1130
|
+
@link = XPath.first(channel_node, "@base").to_s
|
1131
|
+
end
|
1132
|
+
@link = FeedTools.normalize_url(@link)
|
1133
|
+
unless self.cache_object.nil?
|
1134
|
+
self.cache_object.link = @link
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
return @link
|
1138
|
+
end
|
1139
|
+
|
1140
|
+
# Sets the feed link
|
1141
|
+
def link=(new_link)
|
1142
|
+
@link = new_link
|
1143
|
+
unless self.cache_object.nil?
|
1144
|
+
self.cache_object.link = new_link
|
1145
|
+
end
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
# Returns the feed image link
|
1149
|
+
def image_link
|
1150
|
+
if @image_link.nil?
|
1151
|
+
# get the feed image link from the xml document
|
1152
|
+
@image_link = XPath.first(channel_node, "image/url/text()").to_s
|
1153
|
+
if @image_link == ""
|
1154
|
+
@image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
|
1155
|
+
end
|
1156
|
+
if @image_link == ""
|
1157
|
+
@image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
|
1158
|
+
end
|
1159
|
+
if @image_link == ""
|
1160
|
+
@image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
|
1161
|
+
end
|
1162
|
+
if @image_link == ""
|
1163
|
+
@image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
|
1164
|
+
end
|
1165
|
+
if @image_link == ""
|
1166
|
+
@image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
|
1167
|
+
end
|
1168
|
+
if @image_link == ""
|
1169
|
+
@image_link = XPath.first(channel_node, "logo/@href").to_s
|
1170
|
+
end
|
1171
|
+
@image_link = FeedTools.normalize_url(@image_link)
|
1172
|
+
end
|
1173
|
+
return @image_link
|
1174
|
+
end
|
1175
|
+
|
1176
|
+
# Sets the feed image link
|
1177
|
+
def image_link=(new_image_link)
|
1178
|
+
@image_link = new_image_link
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
# Returns the url to the icon file for this feed.
|
1182
|
+
#
|
1183
|
+
# This method uses the url from the link field in order to avoid grabbing
|
1184
|
+
# the favicon for services like feedburner.
|
1185
|
+
def icon_link
|
1186
|
+
if @icon_link.nil?
|
1187
|
+
@icon_link = XPath.first(channel_node,
|
1188
|
+
"link[@rel='icon']/@href").to_s
|
1189
|
+
if @icon_link == ""
|
1190
|
+
@icon_link = XPath.first(channel_node,
|
1191
|
+
"link[@rel='shortcut icon']/@href").to_s
|
1192
|
+
end
|
1193
|
+
if @icon_link == ""
|
1194
|
+
@icon_link = XPath.first(channel_node,
|
1195
|
+
"link[@type='image/x-icon']/@href").to_s
|
1196
|
+
end
|
1197
|
+
if @icon_link == ""
|
1198
|
+
@icon_link = XPath.first(channel_node,
|
1199
|
+
"icon/@href").to_s
|
1200
|
+
end
|
1201
|
+
if @icon_link == ""
|
1202
|
+
@icon_link = XPath.first(channel_node,
|
1203
|
+
"icon/text()").to_s
|
1204
|
+
end
|
1205
|
+
if @icon_link == ""
|
1206
|
+
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1207
|
+
@icon_link =
|
1208
|
+
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
1209
|
+
end
|
1210
|
+
end
|
1211
|
+
return @icon_link
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
# Returns the number of seconds before the feed should expire
|
1215
|
+
def time_to_live
|
1216
|
+
if @time_to_live.nil?
|
1217
|
+
# get the feed time to live from the xml document
|
1218
|
+
update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
|
1219
|
+
if update_frequency != ""
|
1220
|
+
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1221
|
+
if update_period == "daily"
|
1222
|
+
@time_to_live = update_frequency.to_i * 24
|
1223
|
+
elsif update_period == "weekly"
|
1224
|
+
@time_to_live = update_frequency.to_i * 24 * 7
|
1225
|
+
elsif update_period == "monthly"
|
1226
|
+
@time_to_live = update_frequency.to_i * 24 * 30
|
1227
|
+
elsif update_period == "yearly"
|
1228
|
+
@time_to_live = update_frequency.to_i * 24 * 365
|
1229
|
+
else
|
1230
|
+
# hourly
|
1231
|
+
@time_to_live = update_frequency.to_i
|
1232
|
+
end
|
1233
|
+
end
|
1234
|
+
end
|
1235
|
+
if @time_to_live.nil?
|
1236
|
+
# expressed in minutes
|
1237
|
+
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1238
|
+
if update_frequency != ""
|
1239
|
+
@time_to_live = (update_frequency.to_i / 60)
|
1240
|
+
end
|
1241
|
+
end
|
1242
|
+
if @time_to_live.nil?
|
1243
|
+
@time_to_live = 0
|
1244
|
+
update_frequency_days = XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1245
|
+
update_frequency_hours = XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1246
|
+
update_frequency_minutes = XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1247
|
+
update_frequency_seconds = XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1248
|
+
if update_frequency_days != ""
|
1249
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i * 24
|
1250
|
+
end
|
1251
|
+
if update_frequency_hours != ""
|
1252
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i * 1
|
1253
|
+
end
|
1254
|
+
if update_frequency_minutes != ""
|
1255
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i / 60
|
1256
|
+
end
|
1257
|
+
if update_frequency_seconds != ""
|
1258
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i / 3600
|
1259
|
+
end
|
1260
|
+
if @time_to_live == 0
|
1261
|
+
@time_to_live = nil
|
1262
|
+
end
|
1263
|
+
end
|
1264
|
+
if @time_to_live.nil? || @time_to_live == 0
|
1265
|
+
# Default to one hour
|
1266
|
+
@time_to_live = 1
|
1267
|
+
end
|
1268
|
+
@time_to_live = @time_to_live.round
|
1269
|
+
return @time_to_live.hour
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
# Sets the feed time to live
|
1273
|
+
def time_to_live=(new_time_to_live)
|
1274
|
+
@time_to_live = (new_time_to_live / 3600).round
|
1275
|
+
@time_to_live = 1 if @time_to_live < 1
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
# Returns the feed language
|
1279
|
+
def language
|
1280
|
+
if @language.nil?
|
1281
|
+
@language = XPath.first(channel_node, "language/text()").to_s
|
1282
|
+
if @language == ""
|
1283
|
+
@language = XPath.first(channel_node, "dc:language/text()").to_s
|
1284
|
+
end
|
1285
|
+
if @language == ""
|
1286
|
+
@language = XPath.first(channel_node, "xml:lang/text()").to_s
|
1287
|
+
end
|
1288
|
+
if @language == ""
|
1289
|
+
@language = XPath.first(root_node, "xml:lang/text()").to_s
|
1290
|
+
end
|
1291
|
+
if @language == ""
|
1292
|
+
@language = "en-us"
|
1293
|
+
end
|
1294
|
+
@language = @language.downcase
|
531
1295
|
end
|
532
|
-
|
533
|
-
|
1296
|
+
return @language
|
1297
|
+
end
|
1298
|
+
|
1299
|
+
# Sets the feed language
|
1300
|
+
def language=(new_language)
|
1301
|
+
@language = new_language
|
1302
|
+
end
|
1303
|
+
|
1304
|
+
# Returns true if this feed contains explicit material.
|
1305
|
+
def explicit
|
1306
|
+
if @explicit.nil?
|
1307
|
+
if XPath.first(channel_node,
|
1308
|
+
"media:adult/text()").to_s.downcase == "true" ||
|
1309
|
+
XPath.first(channel_node,
|
1310
|
+
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
1311
|
+
XPath.first(channel_node,
|
1312
|
+
"itunes:explicit/text()").to_s.downcase == "true"
|
1313
|
+
@explicit = true
|
1314
|
+
else
|
1315
|
+
@explicit = false
|
1316
|
+
end
|
1317
|
+
end
|
1318
|
+
return @explicit
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
# Sets whether or not the feed contains explicit material
|
1322
|
+
def explicit=(new_explicit)
|
1323
|
+
@explicit = (new_explicit ? true : false)
|
1324
|
+
end
|
1325
|
+
|
1326
|
+
# Returns the feed items
|
1327
|
+
def items
|
1328
|
+
if @items.nil?
|
1329
|
+
raw_items = XPath.match(root_node, "item")
|
1330
|
+
if raw_items == nil || raw_items == []
|
1331
|
+
raw_items = XPath.match(channel_node, "item")
|
1332
|
+
end
|
1333
|
+
if raw_items == nil || raw_items == []
|
1334
|
+
raw_items = XPath.match(channel_node, "entry")
|
1335
|
+
end
|
1336
|
+
|
1337
|
+
# create the individual feed items
|
1338
|
+
@items = []
|
1339
|
+
if raw_items != nil
|
1340
|
+
for item_node in raw_items
|
1341
|
+
new_item = FeedItem.new
|
1342
|
+
new_item.xml_data = item_node.to_s
|
1343
|
+
new_item.feed = self
|
1344
|
+
@items << new_item
|
1345
|
+
end
|
1346
|
+
end
|
534
1347
|
end
|
535
1348
|
|
536
|
-
#
|
537
|
-
|
538
|
-
|
539
|
-
else
|
540
|
-
self.title = nil
|
1349
|
+
# Sort the items
|
1350
|
+
@items = @items.sort do |a,b|
|
1351
|
+
(b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
|
541
1352
|
end
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
1353
|
+
return @items
|
1354
|
+
end
|
1355
|
+
|
1356
|
+
# The time that the feed was last requested from the remote server. Nil if it has
|
1357
|
+
# never been pulled, or if it was created from scratch.
|
1358
|
+
def last_retrieved
|
1359
|
+
unless self.cache_object.nil?
|
1360
|
+
@last_retrieved = self.cache_object.last_retrieved
|
546
1361
|
end
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
1362
|
+
return @last_retrieved
|
1363
|
+
end
|
1364
|
+
|
1365
|
+
# Sets the time that the feed was last updated.
|
1366
|
+
def last_retrieved=(new_last_retrieved)
|
1367
|
+
@last_retrieved = new_last_retrieved
|
1368
|
+
unless self.cache_object.nil?
|
1369
|
+
self.cache_object.last_retrieved = new_last_retrieved
|
551
1370
|
end
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
1371
|
+
end
|
1372
|
+
|
1373
|
+
# True if this feed contains audio content enclosures
|
1374
|
+
def podcast?
|
1375
|
+
podcast = false
|
1376
|
+
$test_feed.items.each do |item|
|
1377
|
+
item.enclosures.each do |enclosure|
|
1378
|
+
podcast = true if enclosure.audio?
|
1379
|
+
end
|
556
1380
|
end
|
557
|
-
|
558
|
-
|
1381
|
+
return podcast
|
1382
|
+
end
|
1383
|
+
|
1384
|
+
# True if this feed contains video content enclosures
|
1385
|
+
def vidlog?
|
1386
|
+
vidlog = false
|
1387
|
+
$test_feed.items.each do |item|
|
1388
|
+
item.enclosures.each do |enclosure|
|
1389
|
+
vidlog = true if enclosure.video?
|
1390
|
+
end
|
1391
|
+
end
|
1392
|
+
return vidlog
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
# True if the feed was not last retrieved from the cache.
|
1396
|
+
def live?
|
1397
|
+
return @live
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
# True if the feed has expired and must be reacquired from the remote server.
|
1401
|
+
def expired?
|
1402
|
+
return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
# Forces this feed to expire.
|
1406
|
+
def expire
|
1407
|
+
self.last_retrieved = Time.mktime(1970)
|
1408
|
+
self.save
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
# A hook method that is called during the feed generation process. Overriding this method
|
1412
|
+
# will enable additional content to be inserted into the feed.
|
1413
|
+
def build_xml_hook(feed_type, version, xml_builder)
|
1414
|
+
return nil
|
1415
|
+
end
|
1416
|
+
|
1417
|
+
# Generates xml based on the content of the feed
|
1418
|
+
def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1419
|
+
if feed_type == "rss" && version == 0.0
|
1420
|
+
version = 1.0
|
1421
|
+
elsif feed_type == "atom" && version == 0.0
|
1422
|
+
version = 0.3
|
1423
|
+
end
|
1424
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1425
|
+
# RDF-based rss format
|
1426
|
+
return xml_builder.tag!("rdf:RDF") do
|
1427
|
+
xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
|
1428
|
+
unless title.nil? || title == ""
|
1429
|
+
xml_builder.title(title)
|
1430
|
+
else
|
1431
|
+
xml_builder.title
|
1432
|
+
end
|
1433
|
+
unless link.nil? || link == ""
|
1434
|
+
xml_builder.link(link)
|
1435
|
+
else
|
1436
|
+
xml_builder.link
|
1437
|
+
end
|
1438
|
+
unless image_link.nil? || image_link == ""
|
1439
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
|
1440
|
+
end
|
1441
|
+
unless description.nil? || description == ""
|
1442
|
+
xml_builder.description(description)
|
1443
|
+
else
|
1444
|
+
xml_builder.description
|
1445
|
+
end
|
1446
|
+
unless language.nil? || language == ""
|
1447
|
+
xml_builder.tag!("dc:language", language)
|
1448
|
+
end
|
1449
|
+
xml_builder.tag!("syn:updatePeriod", "hourly")
|
1450
|
+
xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
|
1451
|
+
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
1452
|
+
xml_builder.items do
|
1453
|
+
xml_builder.tag!("rdf:Seq") do
|
1454
|
+
unless items.nil?
|
1455
|
+
for item in items
|
1456
|
+
if item.link.nil?
|
1457
|
+
raise "Cannot generate an rdf-based feed with a nil item link field."
|
1458
|
+
end
|
1459
|
+
xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
|
1460
|
+
end
|
1461
|
+
end
|
1462
|
+
end
|
1463
|
+
end
|
1464
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1465
|
+
end
|
1466
|
+
unless image_link.nil? || image_link == ""
|
1467
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
|
1468
|
+
unless title.nil? || title == ""
|
1469
|
+
xml_builder.title(title)
|
1470
|
+
else
|
1471
|
+
xml_builder.title
|
1472
|
+
end
|
1473
|
+
unless image_link.nil? || image_link == ""
|
1474
|
+
xml_builder.url(image_link)
|
1475
|
+
end
|
1476
|
+
unless link.nil? || link == ""
|
1477
|
+
xml_builder.link(link)
|
1478
|
+
else
|
1479
|
+
xml_builder.link
|
1480
|
+
end
|
1481
|
+
end
|
1482
|
+
end
|
1483
|
+
unless items.nil?
|
1484
|
+
for item in items
|
1485
|
+
item.build_xml(feed_type, version, xml_builder)
|
1486
|
+
end
|
1487
|
+
end
|
1488
|
+
end
|
1489
|
+
elsif feed_type == "rss"
|
1490
|
+
# normal rss format
|
1491
|
+
return xml_builder.rss("version" => version.to_s) do
|
1492
|
+
unless title.nil? || title == ""
|
1493
|
+
xml_builder.title(title)
|
1494
|
+
end
|
1495
|
+
unless link.nil? || link == ""
|
1496
|
+
xml_builder.link(link)
|
1497
|
+
end
|
1498
|
+
unless description.nil? || description == ""
|
1499
|
+
xml_builder.description(description)
|
1500
|
+
end
|
1501
|
+
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
1502
|
+
xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
|
1503
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1504
|
+
unless items.nil?
|
1505
|
+
for item in items
|
1506
|
+
item.build_xml(feed_type, version, xml_builder)
|
1507
|
+
end
|
1508
|
+
end
|
1509
|
+
end
|
1510
|
+
elsif feed_type == "atom"
|
1511
|
+
# normal atom format
|
1512
|
+
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
1513
|
+
"version" => version.to_s,
|
1514
|
+
"xml:lang" => language) do
|
1515
|
+
unless title.nil? || title == ""
|
1516
|
+
xml_builder.title(title,
|
1517
|
+
"mode" => "escaped",
|
1518
|
+
"type" => "text/html")
|
1519
|
+
end
|
1520
|
+
unless link.nil? || link == ""
|
1521
|
+
xml_builder.link("href" => link,
|
1522
|
+
"rel" => "alternate",
|
1523
|
+
"type" => "text/html",
|
1524
|
+
"title" => title)
|
1525
|
+
end
|
1526
|
+
unless description.nil? || description == ""
|
1527
|
+
xml_builder.tagline(description,
|
1528
|
+
"mode" => "escaped",
|
1529
|
+
"type" => "text/html")
|
1530
|
+
end
|
1531
|
+
xml_builder.generator("FeedTools",
|
1532
|
+
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
1533
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1534
|
+
unless items.nil?
|
1535
|
+
for item in items
|
1536
|
+
item.build_xml(feed_type, version, xml_builder)
|
1537
|
+
end
|
1538
|
+
end
|
1539
|
+
end
|
1540
|
+
end
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
# Persists the current feed state to the cache.
|
1544
|
+
def save
|
1545
|
+
if FeedTools.feed_cache.nil?
|
1546
|
+
raise "Caching is currently disabled. Cannot save to cache."
|
1547
|
+
elsif self.url.nil?
|
1548
|
+
raise "The url field must be set to save to the cache."
|
1549
|
+
elsif self.xml_data.nil?
|
1550
|
+
raise "The xml_data field must be set to save to the cache."
|
1551
|
+
elsif self.cache_object.nil?
|
1552
|
+
raise "The cache_object is currently nil. Cannot save to cache."
|
559
1553
|
else
|
560
|
-
self.
|
1554
|
+
self.cache_object.url = self.url
|
1555
|
+
self.cache_object.title = self.title
|
1556
|
+
self.cache_object.link = self.link
|
1557
|
+
self.cache_object.xml_data = self.xml_data
|
1558
|
+
unless self.http_response.nil?
|
1559
|
+
self.cache_object.http_headers = self.http_headers.to_yaml
|
1560
|
+
end
|
1561
|
+
self.cache_object.last_retrieved = self.last_retrieved
|
1562
|
+
self.cache_object.save
|
1563
|
+
end
|
1564
|
+
end
|
1565
|
+
|
1566
|
+
alias_method :tagline, :description
|
1567
|
+
alias_method :tagline=, :description=
|
1568
|
+
alias_method :subtitle, :description
|
1569
|
+
alias_method :subtitle=, :description=
|
1570
|
+
alias_method :abstract, :description
|
1571
|
+
alias_method :abstract=, :description=
|
1572
|
+
alias_method :content, :description
|
1573
|
+
alias_method :content=, :description=
|
1574
|
+
alias_method :ttl, :time_to_live
|
1575
|
+
alias_method :ttl=, :time_to_live=
|
1576
|
+
alias_method :guid, :id
|
1577
|
+
alias_method :guid=, :id=
|
1578
|
+
alias_method :entries, :items
|
1579
|
+
|
1580
|
+
# passes missing methods to the cache_object
|
1581
|
+
def method_missing(msg, *params)
|
1582
|
+
if self.cache_object.nil?
|
1583
|
+
raise NoMethodError, "Invalid method #{msg.to_s}"
|
1584
|
+
end
|
1585
|
+
return self.cache_object.send(msg, params)
|
1586
|
+
end
|
1587
|
+
|
1588
|
+
# passes missing methods to the FeedTools.feed_cache
|
1589
|
+
def Feed.method_missing(msg, *params)
|
1590
|
+
if FeedTools.feed_cache.nil?
|
1591
|
+
raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
|
1592
|
+
end
|
1593
|
+
result = FeedTools.feed_cache.send(msg, params)
|
1594
|
+
if result.kind_of? FeedTools.feed_cache
|
1595
|
+
result = Feed.open(result.url)
|
1596
|
+
end
|
1597
|
+
return result
|
1598
|
+
end
|
1599
|
+
end
|
1600
|
+
|
1601
|
+
class FeedItem
|
1602
|
+
include REXML
|
1603
|
+
include AttributeDictionary
|
1604
|
+
|
1605
|
+
# This class stores information about a feed item's file enclosures.
|
1606
|
+
class Enclosure
|
1607
|
+
include AttributeDictionary
|
1608
|
+
|
1609
|
+
# The url for the enclosure
|
1610
|
+
attr_accessor :url
|
1611
|
+
# The MIME type of the file referenced by the enclosure
|
1612
|
+
attr_accessor :type
|
1613
|
+
# The size of the file referenced by the enclosure
|
1614
|
+
attr_accessor :file_size
|
1615
|
+
# The total play time of the file referenced by the enclosure
|
1616
|
+
attr_accessor :duration
|
1617
|
+
# The height in pixels of the enclosed media
|
1618
|
+
attr_accessor :height
|
1619
|
+
# The width in pixels of the enclosed media
|
1620
|
+
attr_accessor :width
|
1621
|
+
# The bitrate of the enclosed media
|
1622
|
+
attr_accessor :bitrate
|
1623
|
+
# The framerate of the enclosed media
|
1624
|
+
attr_accessor :framerate
|
1625
|
+
# The thumbnail for this enclosure
|
1626
|
+
attr_accessor :thumbnail
|
1627
|
+
# The categories for this enclosure
|
1628
|
+
attr_accessor :categories
|
1629
|
+
# A hash of the enclosed file
|
1630
|
+
attr_accessor :hash
|
1631
|
+
# A website containing some kind of media player instead of a direct
|
1632
|
+
# link to the media file.
|
1633
|
+
attr_accessor :player
|
1634
|
+
# A list of credits for the enclosed media
|
1635
|
+
attr_accessor :credits
|
1636
|
+
# A text rendition of the enclosed media
|
1637
|
+
attr_accessor :text
|
1638
|
+
# A list of alternate version of the enclosed media file
|
1639
|
+
attr_accessor :versions
|
1640
|
+
# The default version of the enclosed media file
|
1641
|
+
attr_accessor :default_version
|
1642
|
+
|
1643
|
+
# Returns true if this is the default enclosure
|
1644
|
+
def is_default?
|
1645
|
+
return @is_default
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
# Sets whether this is the default enclosure for the media group
|
1649
|
+
def is_default=(new_is_default)
|
1650
|
+
@is_default = new_is_default
|
561
1651
|
end
|
562
1652
|
|
563
|
-
|
564
|
-
|
565
|
-
|
1653
|
+
# Returns true if the enclosure contains explicit material
|
1654
|
+
def explicit?
|
1655
|
+
return @explicit
|
1656
|
+
end
|
1657
|
+
|
1658
|
+
# Sets the explicit attribute on the enclosure
|
1659
|
+
def explicit=(new_explicit)
|
1660
|
+
@explicit = new_explicit
|
1661
|
+
end
|
1662
|
+
|
1663
|
+
# Determines if the object is a sample, or the full version of the
|
1664
|
+
# object, or if it is a stream.
|
1665
|
+
# Possible values are 'sample', 'full', 'nonstop'.
|
1666
|
+
def expression
|
1667
|
+
return @expression
|
1668
|
+
end
|
1669
|
+
|
1670
|
+
# Sets the expression attribute on the enclosure.
|
1671
|
+
# Allowed values are 'sample', 'full', 'nonstop'.
|
1672
|
+
def expression=(new_expression)
|
1673
|
+
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
|
1674
|
+
raise ArgumentError,
|
1675
|
+
"Permitted values are 'sample', 'full', 'nonstop'."
|
1676
|
+
end
|
1677
|
+
@expression = new_expression.downcase
|
566
1678
|
end
|
567
1679
|
|
568
|
-
#
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
1680
|
+
# Returns true if this enclosure contains audio content
|
1681
|
+
def audio?
|
1682
|
+
unless self.type.nil?
|
1683
|
+
return true if (self.type =~ /^audio/) != nil
|
1684
|
+
end
|
1685
|
+
# TODO: create a more complete list
|
1686
|
+
# =================================
|
1687
|
+
audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
|
1688
|
+
audio_extensions.each do |extension|
|
1689
|
+
if (url =~ /#{extension}$/) != nil
|
1690
|
+
return true
|
1691
|
+
end
|
574
1692
|
end
|
1693
|
+
return false
|
575
1694
|
end
|
576
1695
|
|
577
|
-
#
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
1696
|
+
# Returns true if this enclosure contains video content
|
1697
|
+
def video?
|
1698
|
+
unless self.type.nil?
|
1699
|
+
return true if (self.type =~ /^video/) != nil
|
1700
|
+
return true if self.type == "image/mov"
|
1701
|
+
end
|
1702
|
+
# TODO: create a more complete list
|
1703
|
+
# =================================
|
1704
|
+
video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
|
1705
|
+
video_extensions.each do |extension|
|
1706
|
+
if (url =~ /#{extension}$/) != nil
|
1707
|
+
return true
|
1708
|
+
end
|
582
1709
|
end
|
1710
|
+
return false
|
583
1711
|
end
|
584
|
-
return self
|
585
1712
|
end
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
1713
|
+
EnclosureCategory = Struct.new( "EnclosureCategory", :category, :scheme, :label )
|
1714
|
+
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
|
1715
|
+
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
|
1716
|
+
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
|
1717
|
+
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height, :width )
|
590
1718
|
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
1719
|
+
# Returns the parent feed of this feed item
|
1720
|
+
def feed
|
1721
|
+
return @feed
|
1722
|
+
end
|
1723
|
+
|
1724
|
+
# Sets the parent feed of this feed item
|
1725
|
+
def feed=(new_feed)
|
1726
|
+
@feed = new_feed
|
1727
|
+
end
|
1728
|
+
|
1729
|
+
# Returns the feed item's raw xml data.
|
1730
|
+
def xml_data
|
1731
|
+
return @xml_data
|
1732
|
+
end
|
1733
|
+
|
1734
|
+
# Sets the feed item's xml data.
|
1735
|
+
def xml_data=(new_xml_data)
|
1736
|
+
@xml_data = new_xml_data
|
1737
|
+
end
|
1738
|
+
|
1739
|
+
# Returns a REXML Document of the xml_data
|
1740
|
+
def xml
|
1741
|
+
if @xml_doc.nil?
|
1742
|
+
@xml_doc = Document.new(xml_data)
|
1743
|
+
end
|
1744
|
+
return @xml_doc
|
1745
|
+
end
|
1746
|
+
|
1747
|
+
# Returns the first node within the root_node that matches the xpath query.
|
1748
|
+
def find_node(xpath)
|
1749
|
+
return XPath.first(root_node, xpath)
|
1750
|
+
end
|
1751
|
+
|
1752
|
+
# Returns all nodes within the root_node that match the xpath query.
|
1753
|
+
def find_all_nodes(xpath)
|
1754
|
+
return XPath.match(root_node, xpath)
|
1755
|
+
end
|
1756
|
+
|
1757
|
+
# Returns the root node of the feed item.
|
1758
|
+
def root_node
|
1759
|
+
if @root_node.nil?
|
1760
|
+
@root_node = xml.root
|
1761
|
+
end
|
1762
|
+
return @root_node
|
1763
|
+
end
|
1764
|
+
|
1765
|
+
# Returns the feed item title
|
1766
|
+
def title
|
1767
|
+
if @title.nil?
|
1768
|
+
if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
|
1769
|
+
XPath.first(root_node, "title/@mode").to_s == "xhtml"
|
1770
|
+
@title = XPath.first(root_node, "title").inner_xml
|
1771
|
+
elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
|
1772
|
+
XPath.first(root_node, "title/@mode").to_s == "escaped"
|
1773
|
+
@title = CGI.unescapeHTML(
|
1774
|
+
XPath.first(root_node, "title/text()").to_s)
|
1775
|
+
else
|
1776
|
+
@title = CGI.unescapeHTML(
|
1777
|
+
XPath.first(root_node, "title/text()").to_s)
|
1778
|
+
end
|
1779
|
+
unless @title.nil?
|
1780
|
+
@title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
|
1781
|
+
end
|
1782
|
+
if @title != ""
|
1783
|
+
# Some blogging tools include the number of comments in a post
|
1784
|
+
# in the title... this is supremely ugly, and breaks any
|
1785
|
+
# applications which expect the title to be static, so we're
|
1786
|
+
# gonna strip them out.
|
1787
|
+
#
|
1788
|
+
# If for some incredibly wierd reason you need the actual
|
1789
|
+
# unstripped title, just use find_node("title/text()").to_s
|
1790
|
+
@title = FeedTools.strip_html(
|
1791
|
+
@title.strip.gsub(/\[\d*\]$/, "")).strip
|
1792
|
+
@title.gsub!(/\n/, " ")
|
1793
|
+
end
|
1794
|
+
@title = nil if @title == ""
|
1795
|
+
end
|
1796
|
+
return @title
|
1797
|
+
end
|
1798
|
+
|
1799
|
+
# Sets the feed item title
|
1800
|
+
def title=(new_title)
|
1801
|
+
@title = new_title
|
1802
|
+
end
|
1803
|
+
|
1804
|
+
# Returns the feed item description
|
1805
|
+
def description
|
1806
|
+
if @description.nil?
|
1807
|
+
# get the item content
|
1808
|
+
@description = ""
|
1809
|
+
body_node = XPath.first(root_node, "xhtml:body")
|
1810
|
+
if body_node == nil
|
1811
|
+
body_node = XPath.first(root_node, "body")
|
1812
|
+
end
|
1813
|
+
if body_node != nil
|
1814
|
+
@description = body_node.inner_xml
|
1815
|
+
end
|
1816
|
+
if @description == ""
|
1817
|
+
@description =
|
1818
|
+
CGI.unescapeHTML(XPath.first(root_node, "content:encoded/text()").to_s)
|
1819
|
+
end
|
1820
|
+
if @description == ""
|
1821
|
+
begin
|
1822
|
+
@description = XPath.first(root_node, "description").cdatas.first.to_s
|
1823
|
+
rescue
|
1824
|
+
@description = ""
|
1825
|
+
end
|
1826
|
+
if @description == ""
|
1827
|
+
@description = XPath.first(root_node, "description/text()").to_s
|
1828
|
+
end
|
1829
|
+
if @description != ""
|
1830
|
+
if XPath.first(root_node, "description/@encoding").to_s != ""
|
1831
|
+
# Not supported... yet.
|
1832
|
+
@description = "[Embedded data objects are not supported.]"
|
1833
|
+
else
|
1834
|
+
@description = CGI.unescapeHTML(@description)
|
1835
|
+
end
|
1836
|
+
end
|
1837
|
+
end
|
1838
|
+
if @description == ""
|
1839
|
+
@description = XPath.first(root_node, "content/text()").to_s
|
1840
|
+
if @description != "" &&
|
1841
|
+
(XPath.first(root_node, "content/@mode").to_s == "escaped" ||
|
1842
|
+
XPath.first(root_node, "content/@type").to_s == "escaped")
|
1843
|
+
@description = CGI.unescapeHTML(@description)
|
1844
|
+
end
|
1845
|
+
if XPath.first(root_node, "content/@mode").to_s == "xhtml" ||
|
1846
|
+
XPath.first(root_node, "content/@type").to_s == "xhtml"
|
1847
|
+
@description = XPath.first(root_node, "content").inner_xml
|
1848
|
+
end
|
1849
|
+
end
|
1850
|
+
if @description == ""
|
1851
|
+
begin
|
1852
|
+
@description = XPath.first(root_node, "description").inner_xml
|
1853
|
+
rescue
|
1854
|
+
end
|
1855
|
+
end
|
1856
|
+
if @description == ""
|
1857
|
+
@description = self.itunes_summary
|
1858
|
+
@description = "" if @description.nil?
|
1859
|
+
end
|
1860
|
+
if @description == ""
|
1861
|
+
@description = self.itunes_subtitle
|
1862
|
+
@description = "" if @description.nil?
|
1863
|
+
end
|
1864
|
+
if @description == ""
|
1865
|
+
@description = self.media_text
|
1866
|
+
@description = "" if @description.nil?
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
unless @description.nil?
|
1870
|
+
@description = FeedTools.sanitize_html(@description)
|
1871
|
+
end
|
1872
|
+
|
1873
|
+
# If it started with a bunch of divs, hack them right off. We can put
|
1874
|
+
# them back later if they're needed.
|
1875
|
+
@description.gsub!(/^(<div[^>]*>)*/, "")
|
1876
|
+
@description.gsub!(/(<\/div>)*$/, "")
|
1877
|
+
|
1878
|
+
@description.gsub!(/\n/, " ") if @description.size < 80
|
1879
|
+
@description = @description.strip unless @description.nil?
|
1880
|
+
@description = nil if @description == ""
|
595
1881
|
end
|
596
|
-
|
597
|
-
|
1882
|
+
return @description
|
1883
|
+
end
|
1884
|
+
|
1885
|
+
# Sets the feed item description
|
1886
|
+
def description=(new_description)
|
1887
|
+
@description = new_description
|
1888
|
+
end
|
1889
|
+
|
1890
|
+
# Returns the feed item link
|
1891
|
+
def link
|
1892
|
+
if @link.nil?
|
1893
|
+
@link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
|
1894
|
+
if @link == ""
|
1895
|
+
@link = XPath.first(root_node, "link/@href").to_s
|
1896
|
+
end
|
1897
|
+
if @link == ""
|
1898
|
+
@link = XPath.first(root_node, "link/text()").to_s
|
1899
|
+
end
|
1900
|
+
if @link == ""
|
1901
|
+
@link = XPath.first(root_node, "@rdf:about").to_s
|
1902
|
+
end
|
1903
|
+
if @link == ""
|
1904
|
+
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
|
1905
|
+
end
|
1906
|
+
if @link == ""
|
1907
|
+
if FeedTools.is_url? self.guid
|
1908
|
+
@link = self.guid
|
1909
|
+
end
|
1910
|
+
end
|
1911
|
+
if @link != ""
|
1912
|
+
@link = CGI.unescapeHTML(@link)
|
1913
|
+
end
|
1914
|
+
if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
|
1915
|
+
if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
|
1916
|
+
@link = @link[1..-1]
|
1917
|
+
end
|
1918
|
+
# prepend the base to the link since they seem to have used a relative path
|
1919
|
+
@link = feed.base + @link
|
1920
|
+
end
|
1921
|
+
@link = FeedTools.normalize_url(@link)
|
598
1922
|
end
|
599
|
-
|
600
|
-
|
1923
|
+
return @link
|
1924
|
+
end
|
1925
|
+
|
1926
|
+
# Sets the feed item link
|
1927
|
+
def link=(new_link)
|
1928
|
+
@link = new_link
|
1929
|
+
end
|
1930
|
+
|
1931
|
+
# Returns the feed comment link
|
1932
|
+
def comment_link
|
1933
|
+
if @comment_link.nil?
|
1934
|
+
# get the feed comment link from the xml document
|
1935
|
+
@comment_link = XPath.first(root_node, "comments/text()").to_s
|
1936
|
+
if @comment_link == ""
|
1937
|
+
@comment_link = self.link
|
1938
|
+
end
|
1939
|
+
@comment_link = FeedTools.normalize_url(@comment_link)
|
601
1940
|
end
|
602
|
-
|
603
|
-
|
1941
|
+
return @comment_link
|
1942
|
+
end
|
1943
|
+
|
1944
|
+
# Sets the feed comment link
|
1945
|
+
def comment_link=(new_comment_link)
|
1946
|
+
@comment_link = new_comment_link
|
1947
|
+
end
|
1948
|
+
|
1949
|
+
# Returns the feed image link
|
1950
|
+
def image_link
|
1951
|
+
if @image_link.nil?
|
1952
|
+
# get the feed image link from the xml document
|
1953
|
+
if @image_link == ""
|
1954
|
+
@image_link = XPath.first(root_node, "link[@type='image/jpeg']/@href").to_s
|
1955
|
+
end
|
1956
|
+
if @image_link == ""
|
1957
|
+
@image_link = XPath.first(root_node, "link[@type='image/gif']/@href").to_s
|
1958
|
+
end
|
1959
|
+
if @image_link == ""
|
1960
|
+
@image_link = XPath.first(root_node, "link[@type='image/png']/@href").to_s
|
1961
|
+
end
|
1962
|
+
# The following two should technically never occur, but have been included
|
1963
|
+
# simply because I've seen both occuring in the wild at least once.
|
1964
|
+
if @image_link == ""
|
1965
|
+
@image_link = XPath.first(root_node, "image/url/text()").to_s
|
1966
|
+
end
|
1967
|
+
if @image_link == ""
|
1968
|
+
@image_link = XPath.first(root_node, "image/@rdf:resource").to_s
|
1969
|
+
end
|
1970
|
+
if @image_link == ""
|
1971
|
+
# If there's only a media thumbnail, we can just borrow it. Technically, this isn't
|
1972
|
+
# ideal, but chances are very good that anything that makes use of this image is
|
1973
|
+
# simply not going to care anyhow.
|
1974
|
+
@image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
1975
|
+
if @image_link == ""
|
1976
|
+
@media_image_link = @image_link
|
1977
|
+
end
|
1978
|
+
end
|
1979
|
+
if @image_link == ""
|
1980
|
+
# If there's only an itunes image, we can just borrow it. See comment above regarding
|
1981
|
+
# less-than-ideal-ness.
|
1982
|
+
if @itunes_image_link == ""
|
1983
|
+
@image_link = XPath.first(root_node, "itunes:image/@href").to_s
|
1984
|
+
if @image_link == ""
|
1985
|
+
@image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
1986
|
+
end
|
1987
|
+
@itunes_image_link = @image_link
|
1988
|
+
else
|
1989
|
+
@image_link = @itunes_image_link
|
1990
|
+
end
|
1991
|
+
end
|
1992
|
+
@image_link = FeedTools.normalize_url(@image_link)
|
1993
|
+
end
|
1994
|
+
return @image_link
|
1995
|
+
end
|
1996
|
+
|
1997
|
+
# Sets the feed image link
|
1998
|
+
def image_link=(new_image_link)
|
1999
|
+
@image_link = new_image_link
|
2000
|
+
end
|
2001
|
+
|
2002
|
+
# Returns the feed item itunes image link
|
2003
|
+
#
|
2004
|
+
# If it's not present, falls back to the normal image link.
|
2005
|
+
# Technically, the itunes spec says that the image needs to be
|
2006
|
+
# square and larger than 300x300, but hey, if there's an image
|
2007
|
+
# to be had, it's better than none at all.
|
2008
|
+
def itunes_image_link
|
2009
|
+
if @itunes_image_link.nil?
|
2010
|
+
# get the feed item itunes image link from the xml document
|
2011
|
+
@itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
|
2012
|
+
if @itunes_image_link == ""
|
2013
|
+
@itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
2014
|
+
end
|
2015
|
+
if @itunes_image_link == ""
|
2016
|
+
@itunes_image_link = self.image_link
|
2017
|
+
end
|
2018
|
+
@itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
|
2019
|
+
end
|
2020
|
+
return @itunes_image_link
|
2021
|
+
end
|
2022
|
+
|
2023
|
+
# Sets the feed item itunes image link
|
2024
|
+
def itunes_image_link=(new_itunes_image_link)
|
2025
|
+
@itunes_image_link = new_itunes_image_link
|
2026
|
+
end
|
2027
|
+
|
2028
|
+
# Returns the feed item media thumbnail link
|
2029
|
+
#
|
2030
|
+
# If it's not present, falls back to the normal image link.
|
2031
|
+
def media_thumbnail_link
|
2032
|
+
if @media_thumbnail_link.nil?
|
2033
|
+
# get the feed item itunes image link from the xml document
|
2034
|
+
@media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
2035
|
+
if @media_thumbnail_link == ""
|
2036
|
+
@media_thumbnail_link = image_link
|
2037
|
+
end
|
2038
|
+
@media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
|
604
2039
|
end
|
605
|
-
|
2040
|
+
return @media_thumbnail_link
|
2041
|
+
end
|
2042
|
+
|
2043
|
+
# Sets the feed item media thumbnail url
|
2044
|
+
def media_thumbnail_link=(new_media_thumbnail_link)
|
2045
|
+
@media_thumbnail_link = new_media_thumbnail_link
|
2046
|
+
end
|
606
2047
|
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
item_title = item_title.gsub(/\[\d*\]/,"").strip
|
614
|
-
existing_title = existing_title.gsub(/\[\d*\]/,"").strip
|
615
|
-
item_title = item_title.gsub(/\(\d*\)/,"").strip
|
616
|
-
existing_title = existing_title.gsub(/\(\d*\)/,"").strip
|
617
|
-
item_title = item_title.gsub(/\{\d*\}/,"").strip
|
618
|
-
existing_title = existing_title.gsub(/\{\d*\}/,"").strip
|
619
|
-
if existing_title != item_title
|
620
|
-
feed_item = nil
|
2048
|
+
# Returns the feed items's unique id
|
2049
|
+
def id
|
2050
|
+
if @id.nil?
|
2051
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
2052
|
+
if @id == ""
|
2053
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
621
2054
|
end
|
2055
|
+
@id = nil if @id == ""
|
622
2056
|
end
|
623
|
-
return
|
2057
|
+
return @id
|
624
2058
|
end
|
625
2059
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
feed_item = FeedItem.new
|
630
|
-
end
|
631
|
-
feed_item.feed = self
|
632
|
-
feed_item.parse_item(item_data)
|
633
|
-
return feed_item
|
634
|
-
end
|
635
|
-
|
636
|
-
def build_feed_hook(feed_type, version, xml_builder)
|
637
|
-
return nil
|
2060
|
+
# Sets the feed item's unique id
|
2061
|
+
def id=(new_id)
|
2062
|
+
@id = new_id
|
638
2063
|
end
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
2064
|
+
|
2065
|
+
# Returns all feed item enclosures
|
2066
|
+
def enclosures
|
2067
|
+
if @enclosures.nil?
|
2068
|
+
@enclosures = []
|
2069
|
+
|
2070
|
+
# First, load up all the different possible sources of enclosures
|
2071
|
+
rss_enclosures = XPath.match(root_node, "enclosure")
|
2072
|
+
atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
|
2073
|
+
media_content_enclosures = XPath.match(root_node, "media:content")
|
2074
|
+
media_group_enclosures = XPath.match(root_node, "media:group")
|
2075
|
+
|
2076
|
+
# Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations,
|
2077
|
+
# sometimes these also manage to show up in atom files.
|
2078
|
+
for enclosure_node in rss_enclosures
|
2079
|
+
enclosure = Enclosure.new
|
2080
|
+
enclosure.url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
|
2081
|
+
enclosure.type = enclosure_node.attributes["type"].to_s
|
2082
|
+
enclosure.file_size = enclosure_node.attributes["length"].to_i
|
2083
|
+
enclosure.credits = []
|
2084
|
+
enclosure.explicit = false
|
2085
|
+
@enclosures << enclosure
|
2086
|
+
end
|
2087
|
+
|
2088
|
+
# Parse atom-type enclosures. If there are repeats of the same enclosure object,
|
2089
|
+
# we merge the two together.
|
2090
|
+
for enclosure_node in atom_enclosures
|
2091
|
+
enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["href"].to_s)
|
2092
|
+
enclosure = nil
|
2093
|
+
new_enclosure = false
|
2094
|
+
for existing_enclosure in @enclosures
|
2095
|
+
if existing_enclosure.url == enclosure_url
|
2096
|
+
enclosure = existing_enclosure
|
2097
|
+
break
|
654
2098
|
end
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
2099
|
+
end
|
2100
|
+
if enclosure.nil?
|
2101
|
+
new_enclosure = true
|
2102
|
+
enclosure = Enclosure.new
|
2103
|
+
end
|
2104
|
+
enclosure.url = enclosure_url
|
2105
|
+
enclosure.type = enclosure_node.attributes["type"].to_s
|
2106
|
+
enclosure.file_size = enclosure_node.attributes["length"].to_i
|
2107
|
+
enclosure.credits = []
|
2108
|
+
enclosure.explicit = false
|
2109
|
+
if new_enclosure
|
2110
|
+
@enclosures << enclosure
|
2111
|
+
end
|
2112
|
+
end
|
2113
|
+
|
2114
|
+
# Creates an anonymous method to parse content objects from the media module. We
|
2115
|
+
# do this to avoid excessive duplication of code since we have to do identical
|
2116
|
+
# processing for content objects within group objects.
|
2117
|
+
parse_media_content = lambda do |media_content_nodes|
|
2118
|
+
affected_enclosures = []
|
2119
|
+
for enclosure_node in media_content_nodes
|
2120
|
+
enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
|
2121
|
+
enclosure = nil
|
2122
|
+
new_enclosure = false
|
2123
|
+
for existing_enclosure in @enclosures
|
2124
|
+
if existing_enclosure.url == enclosure_url
|
2125
|
+
enclosure = existing_enclosure
|
2126
|
+
break
|
2127
|
+
end
|
659
2128
|
end
|
660
|
-
|
661
|
-
|
2129
|
+
if enclosure.nil?
|
2130
|
+
new_enclosure = true
|
2131
|
+
enclosure = Enclosure.new
|
662
2132
|
end
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
2133
|
+
enclosure.url = enclosure_url
|
2134
|
+
enclosure.type = enclosure_node.attributes["type"].to_s
|
2135
|
+
enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
|
2136
|
+
enclosure.duration = enclosure_node.attributes["duration"].to_s
|
2137
|
+
enclosure.height = enclosure_node.attributes["height"].to_i
|
2138
|
+
enclosure.width = enclosure_node.attributes["width"].to_i
|
2139
|
+
enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
|
2140
|
+
enclosure.framerate = enclosure_node.attributes["framerate"].to_i
|
2141
|
+
enclosure.expression = enclosure_node.attributes["expression"].to_s
|
2142
|
+
enclosure.is_default =
|
2143
|
+
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
|
2144
|
+
if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
|
2145
|
+
enclosure.thumbnail = EnclosureThumbnail.new(
|
2146
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
|
2147
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
|
2148
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
|
2149
|
+
)
|
2150
|
+
if enclosure.thumbnail.height == ""
|
2151
|
+
enclosure.thumbnail.height = nil
|
2152
|
+
end
|
2153
|
+
if enclosure.thumbnail.width == ""
|
2154
|
+
enclosure.thumbnail.width = nil
|
2155
|
+
end
|
667
2156
|
end
|
668
|
-
|
669
|
-
|
2157
|
+
enclosure.categories = []
|
2158
|
+
for category in XPath.match(enclosure_node, "media:category")
|
2159
|
+
enclosure.categories << EnclosureCategory.new(
|
2160
|
+
CGI.unescapeHTML(category.text),
|
2161
|
+
CGI.unescapeHTML(category.attributes["scheme"].to_s),
|
2162
|
+
CGI.unescapeHTML(category.attributes["label"].to_s)
|
2163
|
+
)
|
2164
|
+
if enclosure.categories.last.scheme == ""
|
2165
|
+
enclosure.categories.last.scheme = nil
|
2166
|
+
end
|
2167
|
+
if enclosure.categories.last.label == ""
|
2168
|
+
enclosure.categories.last.label = nil
|
2169
|
+
end
|
670
2170
|
end
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
2171
|
+
if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
|
2172
|
+
enclosure.hash = EnclosureHash.new(
|
2173
|
+
FeedTools.sanitize_html(CGI.unescapeHTML(XPath.first(
|
2174
|
+
enclosure_node, "media:hash/text()").to_s), :strip),
|
2175
|
+
"md5"
|
2176
|
+
)
|
2177
|
+
end
|
2178
|
+
if XPath.first(enclosure_node, "media:player/@url").to_s != ""
|
2179
|
+
enclosure.player = EnclosurePlayer.new(
|
2180
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@url").to_s),
|
2181
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@height").to_s),
|
2182
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@width").to_s)
|
2183
|
+
)
|
2184
|
+
if enclosure.player.height == ""
|
2185
|
+
enclosure.player.height = nil
|
2186
|
+
end
|
2187
|
+
if enclosure.player.width == ""
|
2188
|
+
enclosure.player.width = nil
|
2189
|
+
end
|
2190
|
+
end
|
2191
|
+
enclosure.credits = []
|
2192
|
+
for credit in XPath.match(enclosure_node, "media:credit")
|
2193
|
+
enclosure.credits << EnclosureCredit.new(
|
2194
|
+
CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
|
2195
|
+
CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
|
2196
|
+
)
|
2197
|
+
if enclosure.credits.last.role == ""
|
2198
|
+
enclosure.credits.last.role = nil
|
684
2199
|
end
|
685
2200
|
end
|
686
|
-
|
2201
|
+
enclosure.explicit = (XPath.first(enclosure_node,
|
2202
|
+
"media:adult/text()").to_s.downcase == "true")
|
2203
|
+
if XPath.first(enclosure_node, "media:text/text()").to_s != ""
|
2204
|
+
enclosure.text = CGI.unescapeHTML(XPath.first(enclosure_node,
|
2205
|
+
"media:text/text()").to_s)
|
2206
|
+
end
|
2207
|
+
affected_enclosures << enclosure
|
2208
|
+
if new_enclosure
|
2209
|
+
@enclosures << enclosure
|
2210
|
+
end
|
687
2211
|
end
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
2212
|
+
affected_enclosures
|
2213
|
+
end
|
2214
|
+
|
2215
|
+
# Parse the independant content objects.
|
2216
|
+
parse_media_content.call(media_content_enclosures)
|
2217
|
+
|
2218
|
+
media_groups = []
|
2219
|
+
|
2220
|
+
# Parse the group objects.
|
2221
|
+
for media_group in media_group_enclosures
|
2222
|
+
group_media_content_enclosures =
|
2223
|
+
XPath.match(media_group, "media:content")
|
2224
|
+
|
2225
|
+
# Parse the content objects within the group objects.
|
2226
|
+
affected_enclosures =
|
2227
|
+
parse_media_content.call(group_media_content_enclosures)
|
2228
|
+
|
2229
|
+
# Now make sure that content objects inherit certain properties from
|
2230
|
+
# the group objects.
|
2231
|
+
for enclosure in affected_enclosures
|
2232
|
+
if enclosure.thumbnail.nil? &&
|
2233
|
+
XPath.first(media_group, "media:thumbnail/@url").to_s != ""
|
2234
|
+
enclosure.thumbnail = EnclosureThumbnail.new(
|
2235
|
+
CGI.unescapeHTML(
|
2236
|
+
XPath.first(media_group, "media:thumbnail/@url").to_s),
|
2237
|
+
CGI.unescapeHTML(
|
2238
|
+
XPath.first(media_group, "media:thumbnail/@height").to_s),
|
2239
|
+
CGI.unescapeHTML(
|
2240
|
+
XPath.first(media_group, "media:thumbnail/@width").to_s)
|
2241
|
+
)
|
2242
|
+
if enclosure.thumbnail.height == ""
|
2243
|
+
enclosure.thumbnail.height = nil
|
694
2244
|
end
|
695
|
-
|
696
|
-
|
2245
|
+
if enclosure.thumbnail.width == ""
|
2246
|
+
enclosure.thumbnail.width = nil
|
697
2247
|
end
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
2248
|
+
end
|
2249
|
+
if (enclosure.categories.nil? || enclosure.categories.size == 0)
|
2250
|
+
enclosure.categories = []
|
2251
|
+
for category in XPath.match(media_group, "media:category")
|
2252
|
+
enclosure.categories << EnclosureCategory.new(
|
2253
|
+
CGI.unescapeHTML(category.text),
|
2254
|
+
CGI.unescapeHTML(category.attributes["scheme"].to_s),
|
2255
|
+
CGI.unescapeHTML(category.attributes["label"].to_s)
|
2256
|
+
)
|
2257
|
+
if enclosure.categories.last.scheme == ""
|
2258
|
+
enclosure.categories.last.scheme = nil
|
2259
|
+
end
|
2260
|
+
if enclosure.categories.last.label == ""
|
2261
|
+
enclosure.categories.last.label = nil
|
2262
|
+
end
|
702
2263
|
end
|
703
2264
|
end
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
2265
|
+
if enclosure.hash.nil? &&
|
2266
|
+
XPath.first(media_group, "media:hash/text()").to_s != ""
|
2267
|
+
enclosure.hash = EnclosureHash.new(
|
2268
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:hash/text()").to_s),
|
2269
|
+
"md5"
|
2270
|
+
)
|
2271
|
+
end
|
2272
|
+
if enclosure.player.nil? &&
|
2273
|
+
XPath.first(media_group, "media:player/@url").to_s != ""
|
2274
|
+
enclosure.player = EnclosurePlayer.new(
|
2275
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:player/@url").to_s),
|
2276
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:player/@height").to_s),
|
2277
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:player/@width").to_s)
|
2278
|
+
)
|
2279
|
+
if enclosure.player.height == ""
|
2280
|
+
enclosure.player.height = nil
|
2281
|
+
end
|
2282
|
+
if enclosure.player.width == ""
|
2283
|
+
enclosure.player.width = nil
|
2284
|
+
end
|
2285
|
+
end
|
2286
|
+
if enclosure.credits.nil? || enclosure.credits.size == 0
|
2287
|
+
enclosure.credits = []
|
2288
|
+
for credit in XPath.match(media_group, "media:credit")
|
2289
|
+
enclosure.credits << EnclosureCredit.new(
|
2290
|
+
CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
|
2291
|
+
CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
|
2292
|
+
)
|
2293
|
+
if enclosure.credits.last.role == ""
|
2294
|
+
enclosure.credits.last.role = nil
|
2295
|
+
end
|
2296
|
+
end
|
2297
|
+
end
|
2298
|
+
if enclosure.explicit?.nil?
|
2299
|
+
enclosure.explicit = (XPath.first(media_group,
|
2300
|
+
"media:adult/text()").to_s.downcase == "true") ? true : false
|
2301
|
+
end
|
2302
|
+
if enclosure.text.nil? &&
|
2303
|
+
XPath.first(media_group, "media:text/text()").to_s != ""
|
2304
|
+
enclosure.text = FeedTools.sanitize_html(CGI.unescapeHTML(
|
2305
|
+
XPath.first(media_group, "media:text/text()").to_s), :strip)
|
708
2306
|
end
|
709
2307
|
end
|
2308
|
+
|
2309
|
+
# Keep track of the media groups
|
2310
|
+
media_groups << affected_enclosures
|
710
2311
|
end
|
711
|
-
|
712
|
-
#
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
end
|
717
|
-
unless link.nil? || link == ""
|
718
|
-
xml_builder.link(link)
|
2312
|
+
|
2313
|
+
# Now we need to inherit any relevant item level information.
|
2314
|
+
if self.explicit?
|
2315
|
+
for enclosure in @enclosures
|
2316
|
+
enclosure.explicit = true
|
719
2317
|
end
|
720
|
-
|
721
|
-
|
2318
|
+
end
|
2319
|
+
|
2320
|
+
# Add all the itunes categories
|
2321
|
+
for itunes_category in XPath.match(root_node, "itunes:category")
|
2322
|
+
genre = "Podcasts"
|
2323
|
+
category = itunes_category.attributes["text"].to_s
|
2324
|
+
subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
|
2325
|
+
category_path = genre
|
2326
|
+
if category != ""
|
2327
|
+
category_path << "/" + category
|
722
2328
|
end
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
2329
|
+
if subcategory != ""
|
2330
|
+
category_path << "/" + subcategory
|
2331
|
+
end
|
2332
|
+
for enclosure in @enclosures
|
2333
|
+
if enclosure.categories.nil?
|
2334
|
+
enclosure.categories = []
|
729
2335
|
end
|
2336
|
+
enclosure.categories << EnclosureCategory.new(
|
2337
|
+
CGI.unescapeHTML(category_path),
|
2338
|
+
CGI.unescapeHTML("http://www.apple.com/itunes/store/"),
|
2339
|
+
CGI.unescapeHTML("iTunes Music Store Categories")
|
2340
|
+
)
|
730
2341
|
end
|
731
2342
|
end
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
xml_builder.title(title,
|
739
|
-
"mode" => "escaped",
|
740
|
-
"type" => "text/html")
|
2343
|
+
|
2344
|
+
for enclosure in @enclosures
|
2345
|
+
# Clean up any of those attributes that incorrectly have ""
|
2346
|
+
# or 0 as their values
|
2347
|
+
if enclosure.type == ""
|
2348
|
+
enclosure.type = nil
|
741
2349
|
end
|
742
|
-
|
743
|
-
|
744
|
-
"rel" => "alternate",
|
745
|
-
"type" => "text/html",
|
746
|
-
"title" => title)
|
2350
|
+
if enclosure.file_size == 0
|
2351
|
+
enclosure.file_size = nil
|
747
2352
|
end
|
748
|
-
|
749
|
-
|
750
|
-
"mode" => "escaped",
|
751
|
-
"type" => "text/html")
|
2353
|
+
if enclosure.duration == 0
|
2354
|
+
enclosure.duration = nil
|
752
2355
|
end
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
2356
|
+
if enclosure.height == 0
|
2357
|
+
enclosure.height = nil
|
2358
|
+
end
|
2359
|
+
if enclosure.width == 0
|
2360
|
+
enclosure.width = nil
|
2361
|
+
end
|
2362
|
+
if enclosure.bitrate == 0
|
2363
|
+
enclosure.bitrate = nil
|
2364
|
+
end
|
2365
|
+
if enclosure.framerate == 0
|
2366
|
+
enclosure.framerate = nil
|
2367
|
+
end
|
2368
|
+
if enclosure.expression == "" || enclosure.expression.nil?
|
2369
|
+
enclosure.expression = "full"
|
2370
|
+
end
|
2371
|
+
|
2372
|
+
# If an enclosure is missing the text field, fall back on the itunes:summary field
|
2373
|
+
if enclosure.text.nil? || enclosure.text = ""
|
2374
|
+
enclosure.text = self.itunes_summary
|
2375
|
+
end
|
2376
|
+
|
2377
|
+
# Make sure we don't have duplicate categories
|
2378
|
+
unless enclosure.categories.nil?
|
2379
|
+
enclosure.categories.uniq!
|
2380
|
+
end
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# And finally, now things get complicated. This is where we make
|
2384
|
+
# sure that the enclosures method only returns either default
|
2385
|
+
# enclosures or enclosures with only one version. Any enclosures
|
2386
|
+
# that are wrapped in a media:group will be placed in the appropriate
|
2387
|
+
# versions field.
|
2388
|
+
affected_enclosure_urls = []
|
2389
|
+
for media_group in media_groups
|
2390
|
+
affected_enclosure_urls =
|
2391
|
+
affected_enclosure_urls | (media_group.map do |enclosure|
|
2392
|
+
enclosure.url
|
2393
|
+
end)
|
2394
|
+
end
|
2395
|
+
@enclosures.delete_if do |enclosure|
|
2396
|
+
(affected_enclosure_urls.include? enclosure.url)
|
2397
|
+
end
|
2398
|
+
for media_group in media_groups
|
2399
|
+
default_enclosure = nil
|
2400
|
+
for enclosure in media_group
|
2401
|
+
if enclosure.is_default?
|
2402
|
+
default_enclosure = enclosure
|
759
2403
|
end
|
760
2404
|
end
|
2405
|
+
for enclosure in media_group
|
2406
|
+
enclosure.default_version = default_enclosure
|
2407
|
+
enclosure.versions = media_group.clone
|
2408
|
+
enclosure.versions.delete(enclosure)
|
2409
|
+
end
|
2410
|
+
@enclosures << default_enclosure
|
761
2411
|
end
|
762
2412
|
end
|
763
|
-
end
|
764
|
-
|
765
|
-
# Saves the current state of the feed to the database unless the feed lacks a remote location
|
766
|
-
def save
|
767
|
-
unless url.nil? || url == ""
|
768
|
-
super
|
769
|
-
end
|
770
|
-
end
|
771
|
-
end
|
772
2413
|
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
connection.execute "select id, feed_id, link, title, author, description, " +
|
780
|
-
"time, tags from feed_items limit 1"
|
781
|
-
rescue ActiveRecord::StatementInvalid
|
782
|
-
return false
|
2414
|
+
# If we have a single enclosure, it's safe to inherit the itunes:duration field
|
2415
|
+
# if it's missing.
|
2416
|
+
if @enclosures.size == 1
|
2417
|
+
if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
|
2418
|
+
@enclosures.first.duration = self.duration
|
2419
|
+
end
|
783
2420
|
end
|
784
|
-
|
2421
|
+
|
2422
|
+
return @enclosures
|
785
2423
|
end
|
786
2424
|
|
787
|
-
def
|
788
|
-
|
789
|
-
return @feed
|
790
|
-
elsif @feed_id != nil
|
791
|
-
@feed = Feed.find_by_id(self.feed_id)
|
792
|
-
return @feed
|
793
|
-
else
|
794
|
-
return nil
|
795
|
-
end
|
2425
|
+
def enclosures=(new_enclosures)
|
2426
|
+
@enclosures = new_enclosures
|
796
2427
|
end
|
797
2428
|
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
2429
|
+
# Returns the feed item author
|
2430
|
+
def author_name
|
2431
|
+
# TODO: make this not suck, actually ensure we're looking at a name
|
2432
|
+
# and not an email address.
|
2433
|
+
# Also, factor in itunes module.
|
2434
|
+
# =================================================================
|
2435
|
+
if @author_name.nil?
|
2436
|
+
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/name/text()").to_s)
|
2437
|
+
if @author_name == ""
|
2438
|
+
@author_name = CGI.unescapeHTML(XPath.first(root_node, "dc:creator/text()").to_s)
|
2439
|
+
end
|
2440
|
+
if @author_name == ""
|
2441
|
+
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/text()").to_s)
|
2442
|
+
end
|
2443
|
+
end
|
2444
|
+
return @author_name
|
805
2445
|
end
|
806
|
-
|
807
|
-
|
808
|
-
|
2446
|
+
|
2447
|
+
# Sets the feed item author
|
2448
|
+
def author_name=(new_author_name)
|
2449
|
+
@author_name = new_author_name
|
809
2450
|
end
|
810
|
-
|
811
|
-
|
812
|
-
|
2451
|
+
|
2452
|
+
# Returns the contents of the itunes:summary element
|
2453
|
+
def itunes_summary
|
2454
|
+
if @itunes_summary.nil?
|
2455
|
+
@itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
|
2456
|
+
"itunes:summary/text()").to_s)
|
2457
|
+
if @itunes_summary == ""
|
2458
|
+
@itunes_summary = nil
|
2459
|
+
end
|
2460
|
+
unless @itunes_summary.nil?
|
2461
|
+
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
|
2462
|
+
end
|
2463
|
+
end
|
2464
|
+
return @itunes_summary
|
813
2465
|
end
|
814
2466
|
|
815
|
-
|
816
|
-
|
2467
|
+
# Sets the contents of the itunes:summary element
|
2468
|
+
def itunes_summary=(new_itunes_summary)
|
2469
|
+
@itunes_summary = new_itunes_summary
|
817
2470
|
end
|
818
2471
|
|
819
|
-
|
820
|
-
|
2472
|
+
# Returns the contents of the itunes:subtitle element
|
2473
|
+
def itunes_subtitle
|
2474
|
+
if @itunes_subtitle.nil?
|
2475
|
+
@itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
|
2476
|
+
"itunes:subtitle/text()").to_s)
|
2477
|
+
if @itunes_subtitle == ""
|
2478
|
+
@itunes_subtitle = nil
|
2479
|
+
end
|
2480
|
+
unless @itunes_subtitle.nil?
|
2481
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
2482
|
+
end
|
2483
|
+
end
|
2484
|
+
return @itunes_subtitle
|
821
2485
|
end
|
822
2486
|
|
823
|
-
|
824
|
-
|
2487
|
+
# Sets the contents of the itunes:subtitle element
|
2488
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
2489
|
+
@itunes_subtitle = new_itunes_subtitle
|
825
2490
|
end
|
826
2491
|
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
end
|
838
|
-
if link == ""
|
839
|
-
link = XPath.first(item_node, "link/text()").to_s
|
840
|
-
end
|
841
|
-
if link == ""
|
842
|
-
link = XPath.first(item_node, "@rdf:about").to_s
|
843
|
-
end
|
844
|
-
if link == ""
|
845
|
-
link = XPath.first(item_node, "guid/text()").to_s
|
846
|
-
end
|
847
|
-
if link != ""
|
848
|
-
link = CGI.unescapeHTML(link)
|
849
|
-
end
|
850
|
-
if link != "" && (link =~ /http:\/\//) != 0 && (link =~ /https:\/\//) != 0
|
851
|
-
# ensure that we don't end up with 'http://www.foobar.com//path/to/entry'
|
852
|
-
# future-proofed this so that it doesn't break when Ruby 1.9/2.0 starts
|
853
|
-
# returning single character Strings instead of FixNums
|
854
|
-
if (base[-1] == 47 && link[0] == 47) || (base[-1] == "/" && link[0] == "/")
|
855
|
-
link = link[1..-1]
|
2492
|
+
# Returns the contents of the media:text element
|
2493
|
+
def media_text
|
2494
|
+
if @media_text.nil?
|
2495
|
+
@media_text = CGI.unescapeHTML(XPath.first(root_node,
|
2496
|
+
"itunes:subtitle/text()").to_s)
|
2497
|
+
if @media_text == ""
|
2498
|
+
@media_text = nil
|
2499
|
+
end
|
2500
|
+
unless @media_text.nil?
|
2501
|
+
@media_text = FeedTools.sanitize_html(@media_text)
|
856
2502
|
end
|
857
|
-
# prepend the base to the link since they seem to have used a relative path
|
858
|
-
link = base + link
|
859
|
-
end
|
860
|
-
|
861
|
-
title = XPath.first(item_node, "title/text()").to_s
|
862
|
-
if title != ""
|
863
|
-
# some blogging tools (notably TextPattern I believe) include the number of
|
864
|
-
# comments in a post in the title... this is ugly, so we're gonna strip them out
|
865
|
-
title = title.gsub(/\[\d*\]/,"").strip
|
866
|
-
end
|
867
|
-
|
868
|
-
# get the item author
|
869
|
-
author = CGI.unescapeHTML(XPath.first(item_node, "author/name/text()").to_s)
|
870
|
-
if author == ""
|
871
|
-
author = CGI.unescapeHTML(XPath.first(item_node, "dc:creator/text()").to_s)
|
872
|
-
end
|
873
|
-
if author == ""
|
874
|
-
author = CGI.unescapeHTML(XPath.first(item_node, "author/text()").to_s)
|
875
2503
|
end
|
2504
|
+
return @media_text
|
2505
|
+
end
|
876
2506
|
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
2507
|
+
# Sets the contents of the media:text element
|
2508
|
+
def media_text=(new_media_text)
|
2509
|
+
@media_text = new_media_text
|
2510
|
+
end
|
2511
|
+
|
2512
|
+
# Returns the contents of the itunes:author element
|
2513
|
+
#
|
2514
|
+
# This inherits from any incorrectly placed channel-level itunes:author
|
2515
|
+
# elements. They're actually amazingly commong. People don't read specs.
|
2516
|
+
def itunes_author
|
2517
|
+
if @itunes_author.nil?
|
2518
|
+
@itunes_author = CGI.unescapeHTML(XPath.first(root_node,
|
2519
|
+
"itunes:author/text()").to_s)
|
2520
|
+
if @itunes_author == ""
|
2521
|
+
@itunes_author = CGI.unescapeHTML(XPath.first(feed.channel_node,
|
2522
|
+
"itunes:author/text()").to_s)
|
2523
|
+
end
|
2524
|
+
if @itunes_author == ""
|
2525
|
+
@itunes_author = nil
|
2526
|
+
end
|
888
2527
|
end
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
2528
|
+
return @itunes_author
|
2529
|
+
end
|
2530
|
+
|
2531
|
+
# Sets the contents of the itunes:author element
|
2532
|
+
def itunes_author=(new_itunes_author)
|
2533
|
+
@itunes_author = new_itunes_author
|
2534
|
+
end
|
2535
|
+
|
2536
|
+
# Returns the number of seconds that the associated media runs for
|
2537
|
+
def duration
|
2538
|
+
if @duration.nil?
|
2539
|
+
itunes_duration = CGI.unescapeHTML(XPath.first(root_node,
|
2540
|
+
"itunes:duration/text()").to_s)
|
2541
|
+
if itunes_duration != ""
|
2542
|
+
hms = itunes_duration.split(":").map { |x| x.to_i }
|
2543
|
+
if hms.size == 3
|
2544
|
+
@duration = hms[0].hour + hms[1].minute + hms[2]
|
2545
|
+
elsif hms.size == 2
|
2546
|
+
@duration = hms[0].minute + hms[1]
|
2547
|
+
elsif hms.size == 1
|
2548
|
+
@duration = hms[0]
|
896
2549
|
end
|
897
2550
|
end
|
898
2551
|
end
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
2552
|
+
return @duration
|
2553
|
+
end
|
2554
|
+
|
2555
|
+
# Sets the number of seconds that the associate media runs for
|
2556
|
+
def duration=(new_duration)
|
2557
|
+
@duration = new_duration
|
2558
|
+
end
|
2559
|
+
|
2560
|
+
# Sets the itunes:summary
|
2561
|
+
def itunes_summary=(new_itunes_summary)
|
2562
|
+
end
|
2563
|
+
|
2564
|
+
# Returns the feed item time
|
2565
|
+
def time
|
2566
|
+
if @time.nil?
|
2567
|
+
time_string = XPath.first(root_node, "pubDate/text()").to_s
|
2568
|
+
if time_string == ""
|
2569
|
+
time_string = XPath.first(root_node, "dc:date/text()").to_s
|
2570
|
+
end
|
2571
|
+
if time_string == ""
|
2572
|
+
time_string = XPath.first(root_node, "issued/text()").to_s
|
2573
|
+
end
|
2574
|
+
if time_string != ""
|
2575
|
+
@time = Time.parse(time_string) rescue Time.now
|
2576
|
+
elsif time_string == nil
|
2577
|
+
@time = Time.now
|
903
2578
|
end
|
904
2579
|
end
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
#
|
916
|
-
|
917
|
-
if
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
2580
|
+
return @time
|
2581
|
+
end
|
2582
|
+
|
2583
|
+
# Sets the feed item time
|
2584
|
+
def time=(new_time)
|
2585
|
+
@time = new_time
|
2586
|
+
end
|
2587
|
+
|
2588
|
+
# Returns the feed item tags
|
2589
|
+
def tags
|
2590
|
+
# TODO: support the rel="tag" microformat
|
2591
|
+
# =======================================
|
2592
|
+
if @tags.nil?
|
2593
|
+
@tags = []
|
2594
|
+
if @tags.nil? || @tags.size == 0
|
2595
|
+
@tags = []
|
2596
|
+
tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
|
2597
|
+
if tag_list.size > 1
|
2598
|
+
for tag in tag_list
|
2599
|
+
@tags << tag.to_s.downcase.strip
|
2600
|
+
end
|
923
2601
|
end
|
924
2602
|
end
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
2603
|
+
if @tags.nil? || @tags.size == 0
|
2604
|
+
# messy effort to find ourselves some tags, mainly for del.icio.us
|
2605
|
+
@tags = []
|
2606
|
+
rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
|
2607
|
+
if rdf_bag != nil && rdf_bag.size > 0
|
2608
|
+
for tag_node in rdf_bag
|
2609
|
+
begin
|
2610
|
+
tag_url = XPath.first(root_node, "@resource").to_s
|
2611
|
+
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
|
2612
|
+
if tag_match.size > 0
|
2613
|
+
@tags << tag_match.first.last.downcase.strip
|
2614
|
+
end
|
2615
|
+
rescue
|
2616
|
+
end
|
2617
|
+
end
|
2618
|
+
end
|
2619
|
+
end
|
2620
|
+
if @tags.nil? || @tags.size == 0
|
2621
|
+
@tags = []
|
2622
|
+
tag_list = XPath.match(root_node, "category/text()")
|
930
2623
|
for tag in tag_list
|
931
|
-
|
2624
|
+
@tags << tag.to_s.downcase.strip
|
932
2625
|
end
|
933
2626
|
end
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
tag_list = XPath.match(item_node, "dc:subject/text()")
|
938
|
-
if tag_list.size > 1
|
2627
|
+
if @tags.nil? || @tags.size == 0
|
2628
|
+
@tags = []
|
2629
|
+
tag_list = XPath.match(root_node, "dc:subject/text()")
|
939
2630
|
for tag in tag_list
|
940
|
-
|
2631
|
+
@tags << tag.to_s.downcase.strip
|
941
2632
|
end
|
942
2633
|
end
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
if tags_array == nil || tags_array.size == 0
|
949
|
-
begin
|
950
|
-
tags_array = XPath.first(item_node,
|
951
|
-
"dc:subject/text()").to_s.downcase.split(" ")
|
952
|
-
rescue
|
953
|
-
tags_array = []
|
954
|
-
end
|
955
|
-
end
|
956
|
-
if tags_array == nil || tags_array.size == 0
|
957
|
-
tags_array = []
|
958
|
-
rdf_bag = XPath.match(item_node,
|
959
|
-
"taxo:topics/rdf:Bag/rdf:li")
|
960
|
-
if rdf_bag != nil && rdf_bag.size > 0
|
961
|
-
for tag_node in rdf_bag
|
962
|
-
begin
|
963
|
-
tag_url = XPath.first(tag_node, "@resource").to_s
|
964
|
-
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
|
965
|
-
if tag_match.size > 0
|
966
|
-
tags_array << tag_match.first.last.downcase.strip
|
967
|
-
end
|
968
|
-
rescue
|
969
|
-
end
|
2634
|
+
if @tags.nil? || @tags.size == 0
|
2635
|
+
begin
|
2636
|
+
@tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
|
2637
|
+
rescue
|
2638
|
+
@tags = []
|
970
2639
|
end
|
971
2640
|
end
|
2641
|
+
if @tags.nil?
|
2642
|
+
@tags = []
|
2643
|
+
end
|
2644
|
+
@tags.uniq!
|
972
2645
|
end
|
2646
|
+
return @tags
|
2647
|
+
end
|
973
2648
|
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
if
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
if Feed.cache_enabled?
|
996
|
-
save
|
2649
|
+
# Sets the feed item tags
|
2650
|
+
def tags=(new_tags)
|
2651
|
+
@tags = new_tags
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
# Returns true if this feed item contains explicit material. If the whole
|
2655
|
+
# feed has been marked as explicit, this will return true even if the item
|
2656
|
+
# isn't explicitly marked as explicit.
|
2657
|
+
def explicit?
|
2658
|
+
if @explicit.nil?
|
2659
|
+
if XPath.first(root_node,
|
2660
|
+
"media:adult/text()").to_s.downcase == "true" ||
|
2661
|
+
XPath.first(root_node,
|
2662
|
+
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
2663
|
+
XPath.first(root_node,
|
2664
|
+
"itunes:explicit/text()").to_s.downcase == "true" ||
|
2665
|
+
feed.explicit
|
2666
|
+
@explicit = true
|
2667
|
+
else
|
2668
|
+
@explicit = false
|
2669
|
+
end
|
997
2670
|
end
|
998
|
-
return
|
2671
|
+
return @explicit
|
999
2672
|
end
|
1000
2673
|
|
1001
|
-
|
2674
|
+
# Sets whether or not the feed contains explicit material
|
2675
|
+
def explicit=(new_explicit)
|
2676
|
+
@explicit = (new_explicit ? true : false)
|
1002
2677
|
end
|
1003
2678
|
|
1004
|
-
|
2679
|
+
# A hook method that is called during the feed generation process. Overriding this method
|
2680
|
+
# will enable additional content to be inserted into the feed.
|
2681
|
+
def build_xml_hook(feed_type, version, xml_builder)
|
2682
|
+
return nil
|
2683
|
+
end
|
2684
|
+
|
2685
|
+
# Generates xml based on the content of the feed item
|
2686
|
+
def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1005
2687
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1006
2688
|
# RDF-based rss format
|
1007
2689
|
if link.nil?
|
@@ -1026,15 +2708,17 @@ module FeedTools
|
|
1026
2708
|
unless time.nil?
|
1027
2709
|
xml_builder.tag!("dc:date", time.iso8601)
|
1028
2710
|
end
|
1029
|
-
unless tags.nil?
|
2711
|
+
unless tags.nil? || tags.size == 0
|
1030
2712
|
xml_builder.tag!("dc:subject") do
|
1031
2713
|
xml_builder.tag!("rdf:Bag") do
|
1032
|
-
for tag in
|
2714
|
+
for tag in tags
|
1033
2715
|
xml_builder.tag!("rdf:li", tag)
|
1034
2716
|
end
|
1035
2717
|
end
|
1036
2718
|
end
|
2719
|
+
xml_builder.tag!("itunes:keywords", tags.join(" "))
|
1037
2720
|
end
|
2721
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1038
2722
|
end
|
1039
2723
|
elsif feed_type == "rss"
|
1040
2724
|
# normal rss format
|
@@ -1051,15 +2735,17 @@ module FeedTools
|
|
1051
2735
|
unless time.nil?
|
1052
2736
|
xml_builder.pubDate(time.rfc822)
|
1053
2737
|
end
|
1054
|
-
unless tags.nil?
|
2738
|
+
unless tags.nil? || tags.size == 0
|
1055
2739
|
xml_builder.tag!("dc:subject") do
|
1056
2740
|
xml_builder.tag!("rdf:Bag") do
|
1057
|
-
for tag in
|
2741
|
+
for tag in tags
|
1058
2742
|
xml_builder.tag!("rdf:li", tag)
|
1059
2743
|
end
|
1060
2744
|
end
|
1061
2745
|
end
|
2746
|
+
xml_builder.tag!("itunes:keywords", tags.join(" "))
|
1062
2747
|
end
|
2748
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1063
2749
|
end
|
1064
2750
|
elsif feed_type == "atom"
|
1065
2751
|
# normal atom format
|
@@ -1083,31 +2769,34 @@ module FeedTools
|
|
1083
2769
|
unless time.nil?
|
1084
2770
|
xml_builder.issued(time.iso8601)
|
1085
2771
|
end
|
1086
|
-
unless tags.nil?
|
1087
|
-
for tag in
|
2772
|
+
unless tags.nil? || tags.size == 0
|
2773
|
+
for tag in tags
|
1088
2774
|
xml_builder.category(tag)
|
1089
2775
|
end
|
1090
2776
|
end
|
2777
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1091
2778
|
end
|
1092
2779
|
end
|
1093
2780
|
end
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
2781
|
+
|
2782
|
+
alias_method :tagline, :description
|
2783
|
+
alias_method :tagline=, :description=
|
2784
|
+
alias_method :subtitle, :description
|
2785
|
+
alias_method :subtitle=, :description=
|
2786
|
+
alias_method :abstract, :description
|
2787
|
+
alias_method :abstract=, :description=
|
2788
|
+
alias_method :content, :description
|
2789
|
+
alias_method :content=, :description=
|
2790
|
+
alias_method :guid, :id
|
2791
|
+
alias_method :guid=, :id=
|
1102
2792
|
end
|
1103
2793
|
end
|
1104
2794
|
|
1105
|
-
module REXML
|
1106
|
-
class Element
|
1107
|
-
|
1108
|
-
def inner_xml
|
2795
|
+
module REXML #:nodoc:
|
2796
|
+
class Element #:nodoc:
|
2797
|
+
def inner_xml #:nodoc:
|
1109
2798
|
result = ""
|
1110
|
-
each_child do |child|
|
2799
|
+
self.each_child do |child|
|
1111
2800
|
result << child.to_s
|
1112
2801
|
end
|
1113
2802
|
return result
|
@@ -1116,11 +2805,8 @@ module REXML
|
|
1116
2805
|
end
|
1117
2806
|
|
1118
2807
|
begin
|
1119
|
-
FeedTools
|
1120
|
-
|
1121
|
-
FeedTools::Feed.create_cache
|
2808
|
+
unless FeedTools.feed_cache.nil?
|
2809
|
+
FeedTools.feed_cache.initialize_cache
|
1122
2810
|
end
|
1123
2811
|
rescue
|
1124
|
-
# Nothing can be done until someone sets up the database connection.
|
1125
|
-
# We'll just assume for now that the user will take care of that.
|
1126
2812
|
end
|