feedtools 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +11 -0
- data/lib/feed_tools.rb +2496 -810
- data/lib/feed_tools/vendor/builder.rb +2 -0
- data/lib/feed_tools/vendor/builder/blankslate.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlbase.rb +2 -1
- data/lib/feed_tools/vendor/builder/xmlevents.rb +2 -0
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +4 -2
- data/lib/feed_tools/vendor/htree.rb +97 -0
- data/lib/feed_tools/vendor/htree/container.rb +10 -0
- data/lib/feed_tools/vendor/htree/context.rb +67 -0
- data/lib/feed_tools/vendor/htree/display.rb +27 -0
- data/lib/feed_tools/vendor/htree/doc.rb +149 -0
- data/lib/feed_tools/vendor/htree/elem.rb +262 -0
- data/lib/feed_tools/vendor/htree/encoder.rb +163 -0
- data/lib/feed_tools/vendor/htree/equality.rb +218 -0
- data/lib/feed_tools/vendor/htree/extract_text.rb +37 -0
- data/lib/feed_tools/vendor/htree/fstr.rb +33 -0
- data/lib/feed_tools/vendor/htree/gencode.rb +97 -0
- data/lib/feed_tools/vendor/htree/htmlinfo.rb +672 -0
- data/lib/feed_tools/vendor/htree/inspect.rb +108 -0
- data/lib/feed_tools/vendor/htree/leaf.rb +94 -0
- data/lib/feed_tools/vendor/htree/loc.rb +367 -0
- data/lib/feed_tools/vendor/htree/modules.rb +48 -0
- data/lib/feed_tools/vendor/htree/name.rb +124 -0
- data/lib/feed_tools/vendor/htree/output.rb +207 -0
- data/lib/feed_tools/vendor/htree/parse.rb +407 -0
- data/lib/feed_tools/vendor/htree/raw_string.rb +124 -0
- data/lib/feed_tools/vendor/htree/regexp-util.rb +15 -0
- data/lib/feed_tools/vendor/htree/rexml.rb +130 -0
- data/lib/feed_tools/vendor/htree/scan.rb +166 -0
- data/lib/feed_tools/vendor/htree/tag.rb +111 -0
- data/lib/feed_tools/vendor/htree/template.rb +909 -0
- data/lib/feed_tools/vendor/htree/text.rb +115 -0
- data/lib/feed_tools/vendor/htree/traverse.rb +465 -0
- data/rakefile +1 -1
- data/test/rss_test.rb +97 -0
- metadata +30 -1
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== FeedTools 0.2.0
|
2
|
+
* more complete support for rss, atom, cdf
|
3
|
+
* modular caching mechanism
|
4
|
+
* lazy parsing
|
5
|
+
* HTML sanitization of possibly dangerous fields
|
6
|
+
* HTML tidy support
|
7
|
+
* support for podcasts and vidlogs
|
8
|
+
* corrected handling of http redirection
|
9
|
+
* made http header information available
|
10
|
+
* file: protocol support
|
11
|
+
* custom parsing can be done using the find_node and find_all_nodes methods
|
1
12
|
== FeedTools 0.1.0
|
2
13
|
* basic support for rss, atom, cdf
|
3
14
|
* basic caching using active record
|
data/lib/feed_tools.rb
CHANGED
@@ -21,987 +21,2669 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
24
|
+
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
|
25
|
+
ENV['RAILS_ENV'] ||
|
26
|
+
'production' # :nodoc:
|
27
|
+
|
28
|
+
FEED_TOOLS_VERSION = "0.2.0"
|
25
29
|
|
26
30
|
$:.unshift(File.dirname(__FILE__))
|
27
31
|
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
32
|
+
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
33
|
+
|
34
|
+
require 'rubygems'
|
35
|
+
require 'active_record'
|
28
36
|
|
29
37
|
begin
|
30
|
-
require '
|
38
|
+
require 'builder'
|
31
39
|
rescue LoadError
|
32
|
-
|
33
|
-
|
40
|
+
# RubyGems version is not available, use included Builder
|
41
|
+
require 'feed_tools/vendor/builder'
|
34
42
|
end
|
35
43
|
|
36
44
|
begin
|
37
|
-
require '
|
38
|
-
require 'builder'
|
45
|
+
require 'tidy'
|
39
46
|
rescue LoadError
|
40
|
-
#
|
41
|
-
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
42
|
-
require 'feed_tools/vendor/builder'
|
47
|
+
# Ignore the error for now.
|
43
48
|
end
|
44
49
|
|
45
|
-
require '
|
46
|
-
|
50
|
+
require 'feed_tools/vendor/htree'
|
51
|
+
|
52
|
+
require 'net/http'
|
53
|
+
require 'net/https'
|
54
|
+
require 'net/ftp'
|
55
|
+
|
47
56
|
require 'rexml/document'
|
48
|
-
|
57
|
+
|
58
|
+
require 'iconv'
|
59
|
+
require 'uri'
|
60
|
+
require 'time'
|
49
61
|
require 'cgi'
|
62
|
+
require 'pp'
|
63
|
+
require 'yaml'
|
50
64
|
|
65
|
+
#= feed_tools.rb
|
66
|
+
#
|
67
|
+
# FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
|
68
|
+
# caching system.
|
69
|
+
#
|
70
|
+
#== Example
|
71
|
+
# slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
72
|
+
# slashdot_feed.title
|
73
|
+
# => "Slashdot"
|
74
|
+
# slashdot_feed.description
|
75
|
+
# => "News for nerds, stuff that matters"
|
76
|
+
# slashdot_feed.link
|
77
|
+
# => "http://slashdot.org/"
|
78
|
+
# slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
|
79
|
+
# => "43,37,28,23,11,3,1"
|
51
80
|
module FeedTools
|
52
|
-
class Feed < ActiveRecord::Base
|
53
|
-
include REXML
|
54
81
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
def
|
70
|
-
#
|
71
|
-
if (url =~ /feed:/) == 0
|
72
|
-
url = url.gsub(/feed:\/\/http:\/\/\//, "http://")
|
73
|
-
url = url.gsub(/feed:\/\/http:\/\//, "http://")
|
74
|
-
url = url.gsub(/feed:http:\/\/\//, "http://")
|
75
|
-
url = url.gsub(/feed:http:\/\//, "http://")
|
76
|
-
url = url.gsub(/feed:\/\/\//, "http://")
|
77
|
-
url = url.gsub(/feed:\/\//, "http://")
|
78
|
-
url = url.gsub(/feed:\//, "http://")
|
79
|
-
url = url.gsub(/feed:/, "http://")
|
80
|
-
end
|
81
|
-
if (url =~ /rss:/) == 0
|
82
|
-
url = url.gsub(/rss:\/\/http:\/\/\//, "http://")
|
83
|
-
url = url.gsub(/rss:\/\/http:\/\//, "http://")
|
84
|
-
url = url.gsub(/rss:http:\/\/\//, "http://")
|
85
|
-
url = url.gsub(/rss:http:\/\//, "http://")
|
86
|
-
url = url.gsub(/rss:\/\/\//, "http://")
|
87
|
-
url = url.gsub(/rss:\/\//, "http://")
|
88
|
-
url = url.gsub(/rss:\//, "http://")
|
89
|
-
url = url.gsub(/rss:/, "http://")
|
90
|
-
end
|
91
|
-
|
92
|
-
feed = nil
|
82
|
+
# The default caching mechanism for the FeedTools module
|
83
|
+
class DatabaseFeedCache < ActiveRecord::Base
|
84
|
+
# Overrides the default table name to use the "feeds" table.
|
85
|
+
def self.table_name() "feeds" end
|
86
|
+
|
87
|
+
# If ActiveRecord is not already connected, attempts to find a configuration file and use
|
88
|
+
# it to open a connection for ActiveRecord.
|
89
|
+
# This method is probably unnecessary for anything but testing and debugging purposes.
|
90
|
+
# In a Rails environment, the connection will already have been established
|
91
|
+
# and this method will simply do nothing.
|
92
|
+
#
|
93
|
+
# This method should not raise any exceptions because it's designed to be run only when
|
94
|
+
# the module is first loaded. If it fails, the user should get an exception when they
|
95
|
+
# try to perform some action that makes use of the caching functionality, and not until.
|
96
|
+
def DatabaseFeedCache.initialize_cache
|
97
|
+
# Establish a connection if we don't already have one
|
93
98
|
begin
|
94
|
-
|
95
|
-
rescue
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
99
|
+
ActiveRecord::Base.connection
|
100
|
+
rescue
|
101
|
+
begin
|
102
|
+
possible_config_files = [
|
103
|
+
"./config/database.yml",
|
104
|
+
"../database.yml",
|
105
|
+
"./database.yml"
|
106
|
+
]
|
107
|
+
database_config_file = nil
|
108
|
+
for file in possible_config_files
|
109
|
+
if File.exists? file
|
110
|
+
database_config_file = file
|
111
|
+
break
|
112
|
+
end
|
113
|
+
end
|
114
|
+
database_config_hash = File.open(database_config_file) do |file|
|
115
|
+
config_hash = YAML::load(file)
|
116
|
+
unless config_hash[FEED_TOOLS_ENV].nil?
|
117
|
+
config_hash = config_hash[FEED_TOOLS_ENV]
|
118
|
+
end
|
119
|
+
config_hash
|
120
|
+
end
|
121
|
+
ActiveRecord::Base.configurations = database_config_hash
|
122
|
+
ActiveRecord::Base.establish_connection(database_config_hash)
|
123
|
+
ActiveRecord::Base.connection
|
124
|
+
rescue
|
100
125
|
end
|
101
|
-
feed = Feed.find_by_url(url)
|
102
126
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
feed.url = url
|
108
|
-
feed.load_remote_feed
|
127
|
+
# Verify that the necessary database tables are in place
|
128
|
+
# and if they're missing, create them
|
129
|
+
unless DatabaseFeedCache.table_exists?
|
130
|
+
DatabaseFeedCache.create_table
|
109
131
|
end
|
110
|
-
return
|
111
|
-
end
|
112
|
-
|
113
|
-
# Checks if the feed has expired and updates if it has
|
114
|
-
def update_if_needed
|
115
|
-
if expired?
|
116
|
-
load_remote_feed
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
# Verifies that the table structure exists
|
121
|
-
def Feed.cache_exists?
|
122
|
-
return Feed.table_exists? && FeedItem.table_exists?
|
132
|
+
return nil
|
123
133
|
end
|
124
|
-
|
125
|
-
#
|
126
|
-
def
|
134
|
+
|
135
|
+
# True if the appropriate database table already exists
|
136
|
+
def DatabaseFeedCache.table_exists?
|
127
137
|
begin
|
128
|
-
connection.execute "select id, url,
|
129
|
-
"
|
138
|
+
ActiveRecord::Base.connection.execute "select id, url, title, " +
|
139
|
+
"link, xml_data, http_headers, last_retrieved " +
|
140
|
+
"from feeds limit 1"
|
130
141
|
rescue ActiveRecord::StatementInvalid
|
131
142
|
return false
|
143
|
+
rescue
|
144
|
+
return false
|
132
145
|
end
|
133
146
|
return true
|
134
147
|
end
|
135
|
-
|
136
|
-
#
|
137
|
-
def
|
138
|
-
unless
|
139
|
-
feed_items_mysql = <<-SQL_END
|
140
|
-
CREATE TABLE `feed_items` (
|
141
|
-
`id` int(6) unsigned NOT NULL auto_increment,
|
142
|
-
`feed_id` int(6) unsigned NOT NULL default '0',
|
143
|
-
`link` varchar(255) default NULL,
|
144
|
-
`title` varchar(255) default NULL,
|
145
|
-
`author` varchar(255) default NULL,
|
146
|
-
`description` text default NULL,
|
147
|
-
`time` datetime NOT NULL default '0000-00-00 00:00:00',
|
148
|
-
`tags` varchar(255) default NULL,
|
149
|
-
PRIMARY KEY (`id`)
|
150
|
-
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
151
|
-
SQL_END
|
152
|
-
feed_items_sqlite = <<-SQL_END
|
153
|
-
CREATE TABLE 'feed_items' (
|
154
|
-
'id' INTEGER PRIMARY KEY NOT NULL,
|
155
|
-
'feed_id' INTEGER NOT NULL,
|
156
|
-
'link' VARCHAR(255) DEFAULT NULL,
|
157
|
-
'title' VARCHAR(255) DEFAULT NULL,
|
158
|
-
'author' VARCHAR(255) DEFAULT NULL,
|
159
|
-
'description' TEXT DEFAULT NULL,
|
160
|
-
'time' DATETIME DEFAULT NULL,
|
161
|
-
'tags' VARCHAR(255) DEFAULT NULL
|
162
|
-
);
|
163
|
-
SQL_END
|
164
|
-
feed_items_psql = <<-SQL_END
|
165
|
-
CREATE TABLE feed_items (
|
166
|
-
id SERIAL PRIMARY KEY NOT NULL,
|
167
|
-
feed_id int REFERENCES feeds,
|
168
|
-
link varchar(255) default NULL,
|
169
|
-
title varchar(255) default NULL,
|
170
|
-
author varchar(255) default NULL,
|
171
|
-
description text default NULL,
|
172
|
-
time datetime default NULL,
|
173
|
-
tags varchar(255) default NULL
|
174
|
-
);
|
175
|
-
SQL_END
|
176
|
-
unless FeedItem.table_exists?
|
177
|
-
table_creation_sql = nil
|
178
|
-
if configurations["adapter"] == "mysql"
|
179
|
-
table_creation_sql = feed_items_mysql
|
180
|
-
elsif configurations["adapter"] == "sqlite"
|
181
|
-
table_creation_sql = feed_items_sqlite
|
182
|
-
elsif configurations["adapter"] == "postgresql"
|
183
|
-
table_creation_sql = feeds_psql
|
184
|
-
end
|
185
|
-
if table_creation_sql.nil?
|
186
|
-
raise "Could not build feed_items table."
|
187
|
-
else
|
188
|
-
connection.execute table_creation_sql
|
189
|
-
end
|
190
|
-
end
|
148
|
+
|
149
|
+
# Creates the appropriate database table
|
150
|
+
def DatabaseFeedCache.create_table
|
151
|
+
unless DatabaseFeedCache.table_exists?
|
191
152
|
feeds_mysql = <<-SQL_END
|
192
153
|
CREATE TABLE `feeds` (
|
193
|
-
`id`
|
194
|
-
`url`
|
195
|
-
`
|
196
|
-
`
|
197
|
-
`
|
198
|
-
`
|
199
|
-
`
|
200
|
-
`last_updated` datetime default NULL,
|
201
|
-
`etag` varchar(255) default NULL,
|
202
|
-
`time_to_live` int(4) default NULL,
|
154
|
+
`id` int(10) unsigned NOT NULL auto_increment,
|
155
|
+
`url` varchar(255) default NULL,
|
156
|
+
`title` varchar(255) default NULL,
|
157
|
+
`link` varchar(255) default NULL,
|
158
|
+
`xml_data` longtext default NULL,
|
159
|
+
`http_headers` text default NULL,
|
160
|
+
`last_retrieved` datetime default NULL,
|
203
161
|
PRIMARY KEY (`id`)
|
204
162
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
205
163
|
SQL_END
|
206
164
|
feeds_sqlite = <<-SQL_END
|
207
165
|
CREATE TABLE 'feeds' (
|
208
|
-
'id'
|
209
|
-
'url'
|
210
|
-
'
|
211
|
-
'
|
212
|
-
'
|
213
|
-
'
|
214
|
-
'
|
215
|
-
'
|
216
|
-
'etag' VARCHAR(255) DEFAULT NULL,
|
217
|
-
'time_to_live' INTEGER DEFAULT NULL
|
166
|
+
'id' INTEGER PRIMARY KEY NOT NULL,
|
167
|
+
'url' VARCHAR(255) DEFAULT NULL,
|
168
|
+
'title' VARCHAR(255) DEFAULT NULL,
|
169
|
+
'link' VARCHAR(255) DEFAULT NULL,
|
170
|
+
'image_link' VARCHAR(255) DEFAULT NULL,
|
171
|
+
'xml_data' TEXT DEFAULT NULL,
|
172
|
+
'http_headers' TEXT DEFAULT NULL,
|
173
|
+
'last_retrieved' DATETIME DEFAULT NULL,
|
218
174
|
);
|
219
175
|
SQL_END
|
220
176
|
feeds_psql = <<-SQL_END
|
221
177
|
CREATE TABLE feeds (
|
222
|
-
id
|
223
|
-
url
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
last_updated datetime default NULL,
|
230
|
-
etag varchar(255) default NULL,
|
231
|
-
time_to_live int default NULL
|
178
|
+
id SERIAL PRIMARY KEY NOT NULL,
|
179
|
+
url varchar(255) default NULL,
|
180
|
+
title varchar(255) default NULL,
|
181
|
+
link varchar(255) default NULL,
|
182
|
+
xml_data text default NULL,
|
183
|
+
http_headers text default NULL,
|
184
|
+
last_retrieved datetime default NULL,
|
232
185
|
);
|
233
186
|
SQL_END
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
connection.execute table_creation_sql
|
247
|
-
end
|
187
|
+
table_creation_sql = nil
|
188
|
+
if configurations["adapter"] == "mysql"
|
189
|
+
table_creation_sql = feeds_mysql
|
190
|
+
elsif configurations["adapter"] == "sqlite"
|
191
|
+
table_creation_sql = feeds_sqlite
|
192
|
+
elsif configurations["adapter"] == "postgresql"
|
193
|
+
table_creation_sql = feeds_psql
|
194
|
+
end
|
195
|
+
if table_creation_sql.nil?
|
196
|
+
raise "Could not build feed_items table."
|
197
|
+
else
|
198
|
+
connection.execute table_creation_sql
|
248
199
|
end
|
249
200
|
end
|
250
201
|
end
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
202
|
+
end
|
203
|
+
|
204
|
+
# Error raised when a feed cannot be retrieved
|
205
|
+
class FeedAccessError < StandardError
|
206
|
+
end
|
207
|
+
|
208
|
+
# Quick method of enabling small classes to have their attributes
|
209
|
+
# accessible as a dictionary.
|
210
|
+
module AttributeDictionary # :nodoc:
|
211
|
+
# Access the attributes as a dictionary
|
212
|
+
def [](key)
|
213
|
+
# Assignment, and destructive methods should not be
|
214
|
+
# accessed like this.
|
215
|
+
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
|
216
|
+
return nil unless self.method(key).arity == 0
|
217
|
+
return self.send(key)
|
258
218
|
end
|
259
|
-
|
260
|
-
#
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
219
|
+
|
220
|
+
# Access the attributes as a dictionary
|
221
|
+
def []=(key, value)
|
222
|
+
# Assignment, and destructive methods should not be
|
223
|
+
# accessed like this.
|
224
|
+
return nil if key[-1..-1] == "=" || key[-1..-1] == "!"
|
225
|
+
return nil unless self.method(key + "=").arity == 1
|
226
|
+
return self.send(key + "=", value)
|
265
227
|
end
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
228
|
+
end
|
229
|
+
|
230
|
+
@feed_cache = DatabaseFeedCache
|
231
|
+
@user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
|
232
|
+
"+http://www.sporkmonger.com/projects/feedtools/"
|
233
|
+
|
234
|
+
# Returns the current caching mechanism.
|
235
|
+
def FeedTools.feed_cache
|
236
|
+
return @feed_cache
|
237
|
+
end
|
238
|
+
|
239
|
+
# Sets the current caching mechanism. If set to nil, disables caching.
|
240
|
+
# Default is the DatabaseFeedCache class.
|
241
|
+
#
|
242
|
+
# Objects of this class must accept the following messages:
|
243
|
+
# url
|
244
|
+
# url=
|
245
|
+
# title
|
246
|
+
# title=
|
247
|
+
# link
|
248
|
+
# link=
|
249
|
+
# xml_data
|
250
|
+
# xml_data=
|
251
|
+
# etag
|
252
|
+
# etag=
|
253
|
+
# last_modified
|
254
|
+
# last_modified=
|
255
|
+
# save
|
256
|
+
#
|
257
|
+
# Additionally, the class itself must accept the following messages:
|
258
|
+
# find_by_id
|
259
|
+
# find_by_url
|
260
|
+
# initialize_cache
|
261
|
+
def FeedTools.feed_cache=(new_feed_cache)
|
262
|
+
# TODO: ensure that the feed cache class actually does those things.
|
263
|
+
# ==================================================================
|
264
|
+
@feed_cache = new_feed_cache
|
265
|
+
end
|
266
|
+
|
267
|
+
# Returns the currently used user agent string.
|
268
|
+
def FeedTools.user_agent
|
269
|
+
return @user_agent
|
270
|
+
end
|
271
|
+
|
272
|
+
# Sets the user agent string to send in the http headers.
|
273
|
+
def FeedTools.user_agent=(new_user_agent)
|
274
|
+
@user_agent = new_user_agent
|
275
|
+
end
|
276
|
+
|
277
|
+
# Returns true if the html tidy module can be used.
|
278
|
+
#
|
279
|
+
# Obviously, you need the tidy gem installed in order to run with html
|
280
|
+
# tidy features turned on.
|
281
|
+
#
|
282
|
+
# This method does a fairly complicated, and probably unnecessarily
|
283
|
+
# desperate search for the libtidy library. If you want this thing to
|
284
|
+
# execute fast, the best thing to do is to set Tidy.path ahead of time.
|
285
|
+
# If Tidy.path is set, this method doesn't do much. If it's not set,
|
286
|
+
# it will do it's darnedest to find the libtidy library. If you set
|
287
|
+
# the LIBTIDYPATH environment variable to the libtidy library, it should
|
288
|
+
# be able to find it.
|
289
|
+
#
|
290
|
+
# Once the library is located, this method will run much faster.
|
291
|
+
def FeedTools.tidy_enabled?
|
292
|
+
# This is an override variable to keep tidy from being used even if it
|
293
|
+
# is available.
|
294
|
+
if @force_tidy_enabled == false
|
295
|
+
return false
|
271
296
|
end
|
272
|
-
|
273
|
-
|
274
|
-
# it to open a connection for ActiveRecord.
|
275
|
-
# This method is probably unnecessary for anything but testing and debugging purposes.
|
276
|
-
def Feed.prepare_connection
|
297
|
+
if @tidy_enabled.nil? || @tidy_enabled == false
|
298
|
+
@tidy_enabled = false
|
277
299
|
begin
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
300
|
+
require 'tidy'
|
301
|
+
if Tidy.path.nil?
|
302
|
+
# *Shrug*, just brute force it, I guess. There's a lot of places
|
303
|
+
# this thing might be hiding in, depending on platform and general
|
304
|
+
# sanity of the person who installed the thing. Most of these are
|
305
|
+
# probably unlikely, but it's not like checking unlikely locations
|
306
|
+
# hurts. Much. Especially if you actually find it.
|
307
|
+
libtidy_locations = [
|
308
|
+
'/usr/local/lib/libtidy.dylib',
|
309
|
+
'/opt/local/lib/libtidy.dylib',
|
310
|
+
'/usr/lib/libtidy.dylib',
|
311
|
+
'/usr/local/lib/tidylib.dylib',
|
312
|
+
'/opt/local/lib/tidylib.dylib',
|
313
|
+
'/usr/lib/tidylib.dylib',
|
314
|
+
'/usr/local/lib/tidy.dylib',
|
315
|
+
'/opt/local/lib/tidy.dylib',
|
316
|
+
'/usr/lib/tidy.dylib',
|
317
|
+
'/usr/local/lib/libtidy.so',
|
318
|
+
'/opt/local/lib/libtidy.so',
|
319
|
+
'/usr/lib/libtidy.so',
|
320
|
+
'/usr/local/lib/tidylib.so',
|
321
|
+
'/opt/local/lib/tidylib.so',
|
322
|
+
'/usr/lib/tidylib.so',
|
323
|
+
'/usr/local/lib/tidy.so',
|
324
|
+
'/opt/local/lib/tidy.so',
|
325
|
+
'/usr/lib/tidy.so',
|
326
|
+
'C:\Program Files\Tidy\tidy.dll',
|
327
|
+
'C:\Tidy\tidy.dll',
|
328
|
+
'/usr/local/lib',
|
329
|
+
'/opt/local/lib',
|
330
|
+
'/usr/lib'
|
331
|
+
]
|
332
|
+
# We just made this thing up, but if someone sets it, we'll
|
333
|
+
# go ahead and check it
|
334
|
+
unless ENV['LIBTIDYPATH'].nil?
|
335
|
+
libtidy_locations =
|
336
|
+
libtidy_locations.reverse.push(ENV['LIBTIDYPATH'])
|
337
|
+
end
|
338
|
+
for path in libtidy_locations
|
339
|
+
if File.exists? path
|
340
|
+
if File.ftype(path) == "file"
|
341
|
+
Tidy.path = path
|
342
|
+
@tidy_enabled = true
|
343
|
+
break
|
344
|
+
elsif File.ftype(path) == "directory"
|
345
|
+
# Ok, now perhaps we're getting a bit more desperate
|
346
|
+
lib_paths =
|
347
|
+
`find #{path} -name '*tidy*' | grep '\\.\\(so\\|dylib\\)$'`
|
348
|
+
# If there's more than one, grab the first one and
|
349
|
+
# hope for the best, and if it doesn't work, then blame the
|
350
|
+
# user for not specifying more accurately.
|
351
|
+
tidy_path = lib_paths.split("\n").first
|
352
|
+
unless tidy_path.nil?
|
353
|
+
Tidy.path = tidy_path
|
354
|
+
@tidy_enabled = true
|
355
|
+
break
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
# Still couldn't find it.
|
361
|
+
unless @tidy_enabled
|
362
|
+
@tidy_enabled = false
|
363
|
+
end
|
364
|
+
else
|
365
|
+
@tidy_enabled = true
|
366
|
+
end
|
367
|
+
rescue LoadError
|
368
|
+
# Tidy not installed, disable features that rely on tidy.
|
369
|
+
@tidy_enabled = false
|
301
370
|
end
|
302
371
|
end
|
303
|
-
|
304
|
-
|
305
|
-
|
372
|
+
return @tidy_enabled
|
373
|
+
end
|
374
|
+
|
375
|
+
# Turns html tidy support on or off. Be aware, that setting this to true
|
376
|
+
# does not mean tidy will be enabled. It simply means that tidy will be
|
377
|
+
# enabled if it is available to be enabled.
|
378
|
+
def FeedTools.tidy_enabled=(new_tidy_enabled)
|
379
|
+
@force_tidy_enabled = new_tidy_enabled
|
380
|
+
end
|
381
|
+
|
382
|
+
# Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls
|
383
|
+
# and makes every effort to figure out what it was supposed to be. Also translates from
|
384
|
+
# the feed: and rss: pseudo-protocols to the http: protocol.
|
385
|
+
def FeedTools.normalize_url(url)
|
386
|
+
if url.nil? || url == ""
|
387
|
+
return nil
|
306
388
|
end
|
389
|
+
normalized_url = url
|
307
390
|
|
308
|
-
|
309
|
-
|
391
|
+
# if a url begins with the '/' character, it only makes sense that they
|
392
|
+
# meant to be using a file:// url. Fix it for them.
|
393
|
+
if normalized_url.length > 0 && normalized_url[0..0] == "/"
|
394
|
+
normalized_url = "file://" + normalized_url
|
310
395
|
end
|
311
396
|
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
|
316
|
-
|
317
|
-
return (self["language"] or "en-US")
|
318
|
-
rescue
|
319
|
-
return "en-US"
|
320
|
-
end
|
397
|
+
# if a url begins with javascript:, it's quite possibly an attempt at
|
398
|
+
# doing something malicious. Let's keep that from getting anywhere,
|
399
|
+
# shall we?
|
400
|
+
if (normalized_url.downcase =~ /javascript:/) != nil
|
401
|
+
return "#"
|
321
402
|
end
|
322
403
|
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
404
|
+
# deal with all of the many ugly possibilities involved in the rss:
|
405
|
+
# and feed: pseudo-protocols (incidentally, whose crazy idea was this
|
406
|
+
# mess?)
|
407
|
+
normalized_url.gsub!(/^http:\/*(feed:\/*)?/, "http://")
|
408
|
+
normalized_url.gsub!(/^http:\/*(rss:\/*)?/, "http://")
|
409
|
+
normalized_url.gsub!(/^feed:\/*(http:\/*)?/, "http://")
|
410
|
+
normalized_url.gsub!(/^rss:\/*(http:\/*)?/, "http://")
|
411
|
+
normalized_url.gsub!(/^file:\/*/, "file:///")
|
412
|
+
normalized_url.gsub!(/^https:\/*/, "https://")
|
413
|
+
# fix (very) bad urls (usually of the user-entered sort)
|
414
|
+
normalized_url.gsub!(/^http:\/*(http:\/*)*/, "http://")
|
415
|
+
if (normalized_url =~ /^file:/) == 0
|
416
|
+
# fix bad Windows-based entries
|
417
|
+
normalized_url.gsub!(/file:\/\/\/([a-zA-Z]):/, 'file:///\1|')
|
330
418
|
|
331
|
-
|
332
|
-
|
419
|
+
# maybe this is too aggressive?
|
420
|
+
normalized_url.gsub!(/\\/, '/')
|
421
|
+
return normalized_url
|
422
|
+
else
|
423
|
+
if (normalized_url =~ /https?:\/\//) == nil
|
424
|
+
normalized_url = "http://" + normalized_url
|
425
|
+
end
|
426
|
+
if normalized_url == "http://"
|
427
|
+
return nil
|
428
|
+
end
|
429
|
+
begin
|
430
|
+
feed_uri = URI.parse(normalized_url)
|
431
|
+
if feed_uri.scheme == nil
|
432
|
+
feed_uri.scheme = "http"
|
433
|
+
end
|
434
|
+
if feed_uri.path == nil || feed_uri.path == ""
|
435
|
+
feed_uri.path = "/"
|
436
|
+
end
|
437
|
+
if (feed_uri.path =~ /^[\/]+/) == 0
|
438
|
+
feed_uri.path.gsub!(/^[\/]+/, "/")
|
439
|
+
end
|
440
|
+
return feed_uri.to_s
|
441
|
+
rescue URI::InvalidURIError
|
442
|
+
return normalized_url
|
443
|
+
end
|
333
444
|
end
|
445
|
+
end
|
334
446
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
447
|
+
# Returns true if the parameter appears to be a valid url
|
448
|
+
def FeedTools.is_url?(url)
|
449
|
+
return false if url.nil?
|
450
|
+
begin
|
451
|
+
uri = URI.parse(url)
|
452
|
+
rescue URI::InvalidURIError
|
453
|
+
return false
|
341
454
|
end
|
455
|
+
return true
|
456
|
+
end
|
342
457
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
458
|
+
# Removes all html tags from the html formatted text.
|
459
|
+
def FeedTools.strip_html(html)
|
460
|
+
# TODO: do this properly
|
461
|
+
# ======================
|
462
|
+
stripped_html = html.gsub(/<\/?[^>]+>/, "")
|
463
|
+
return stripped_html
|
464
|
+
end
|
348
465
|
|
349
|
-
|
350
|
-
|
466
|
+
# Tidys up the html
|
467
|
+
def FeedTools.tidy_html(html)
|
468
|
+
if FeedTools.tidy_enabled?
|
469
|
+
is_fragment = true
|
470
|
+
if (html.strip =~ /<html>(.|\n)*<body>/) != nil ||
|
471
|
+
(html.strip =~ /<\/body>(.|\n)*<\/html>$/) != nil
|
472
|
+
is_fragment = false
|
473
|
+
end
|
474
|
+
if (html.strip =~ /<\?xml(.|\n)*\?>/) != nil
|
475
|
+
is_fragment = false
|
476
|
+
end
|
477
|
+
tidy_html = Tidy.open(:show_warnings=>false) do |tidy|
|
478
|
+
tidy.options.output_xml = true
|
479
|
+
tidy.options.indent = false
|
480
|
+
tidy.options.wrap_attributes = true
|
481
|
+
tidy.options.logical_emphasis = true
|
482
|
+
tidy.options.doctype = "omit"
|
483
|
+
xml = tidy.clean(html)
|
484
|
+
xml
|
485
|
+
end
|
486
|
+
if is_fragment
|
487
|
+
# Tidy puts <html>...<body>[our html]</body>...</html> in.
|
488
|
+
# We don't want this.
|
489
|
+
tidy_html.strip!
|
490
|
+
tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
|
491
|
+
tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
|
492
|
+
tidy_html.strip!
|
493
|
+
end
|
494
|
+
else
|
495
|
+
tidy_html = html
|
351
496
|
end
|
497
|
+
return tidy_html
|
498
|
+
end
|
352
499
|
|
353
|
-
|
354
|
-
|
355
|
-
|
500
|
+
# Removes all dangerous html tags from the html formatted text.
|
501
|
+
# If mode is set to :escape, dangerous and unknown elements will
|
502
|
+
# be escaped. If mode is set to :strip, dangerous and unknown
|
503
|
+
# elements and all children will be removed entirely.
|
504
|
+
# Dangerous or unknown attributes are always removed.
|
505
|
+
def FeedTools.sanitize_html(html, mode=:escape)
|
506
|
+
|
507
|
+
# Lists borrowed from Mark Pilgrim's feedparser
|
508
|
+
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
509
|
+
'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
|
510
|
+
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
|
511
|
+
'dt', 'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4',
|
512
|
+
'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend',
|
513
|
+
'li', 'map', 'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's',
|
514
|
+
'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup',
|
515
|
+
'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt',
|
516
|
+
'u', 'ul', 'var']
|
356
517
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
518
|
+
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
519
|
+
'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
|
520
|
+
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
|
521
|
+
'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
|
522
|
+
'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
|
523
|
+
'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
|
524
|
+
'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
|
525
|
+
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
|
526
|
+
'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
|
527
|
+
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
528
|
+
'type', 'usemap', 'valign', 'value', 'vspace', 'width']
|
364
529
|
|
365
|
-
#
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
530
|
+
# Stupid hack to pass this unit test:
|
531
|
+
# http://feedparser.org/tests/wellformed/rss/
|
532
|
+
# item_description_not_a_doctype.xml
|
533
|
+
html.gsub!(/<!'/, "<!'")
|
534
|
+
|
535
|
+
# The closer we are to proper xhtml, the more accurate the
|
536
|
+
# sanitization will be.
|
537
|
+
html = FeedTools.tidy_html(html)
|
538
|
+
|
539
|
+
# Hackity hack. But it works, and it seems plenty fast enough.
|
540
|
+
html_doc = HTree.parse_xml("<root>" + html + "</root>").to_rexml
|
541
|
+
|
542
|
+
sanitize_node = lambda do |html_node|
|
543
|
+
if html_node.respond_to? :children
|
544
|
+
for child in html_node.children
|
545
|
+
if child.kind_of? REXML::Element
|
546
|
+
unless acceptable_elements.include? child.name
|
547
|
+
if mode == :strip
|
548
|
+
html_node.delete_element(child)
|
549
|
+
else
|
550
|
+
new_child = REXML::Text.new(CGI.escapeHTML(child.to_s))
|
551
|
+
html_node.insert_after(child, new_child)
|
552
|
+
html_node.delete_element(child)
|
553
|
+
end
|
554
|
+
end
|
555
|
+
for attribute in child.attributes.keys
|
556
|
+
unless acceptable_attributes.include? attribute
|
557
|
+
child.delete_attribute(attribute)
|
558
|
+
end
|
559
|
+
end
|
560
|
+
end
|
561
|
+
sanitize_node.call(child)
|
379
562
|
end
|
380
563
|
end
|
564
|
+
html_node
|
381
565
|
end
|
566
|
+
sanitize_node.call(html_doc.root)
|
567
|
+
return html_doc.root.inner_xml
|
568
|
+
end
|
569
|
+
|
570
|
+
class Feed
|
571
|
+
include REXML
|
572
|
+
include AttributeDictionary
|
382
573
|
|
383
|
-
#
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
574
|
+
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
|
575
|
+
def Feed.open(url)
|
576
|
+
# clean up the url
|
577
|
+
url = FeedTools.normalize_url(url)
|
578
|
+
|
579
|
+
# create and load the new feed
|
580
|
+
feed = Feed.new
|
581
|
+
feed.url = url
|
582
|
+
feed.update
|
583
|
+
return feed
|
584
|
+
end
|
585
|
+
|
586
|
+
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
587
|
+
# retrieved from the cache for some reason.
|
588
|
+
def update
|
589
|
+
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
590
|
+
!(self.cache_object.http_headers.nil?)
|
591
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
592
|
+
end
|
593
|
+
if expired?
|
594
|
+
load_remote_feed
|
398
595
|
else
|
399
|
-
|
400
|
-
etag = http.meta['etag']
|
401
|
-
parse_feed(http.read)
|
402
|
-
end
|
596
|
+
@live = false
|
403
597
|
end
|
404
598
|
end
|
405
|
-
|
406
|
-
def parse_feed_hook(feed_data)
|
407
|
-
return nil
|
408
|
-
end
|
409
|
-
|
410
|
-
def parse_feed(feed_data)
|
411
|
-
root_node = Document.new(feed_data).root
|
412
|
-
metadata_node = XPath.first(root_node, "channel")
|
413
|
-
if metadata_node == nil
|
414
|
-
metadata_node = root_node
|
415
|
-
end
|
416
599
|
|
417
|
-
|
418
|
-
|
600
|
+
# Attempts to load the feed from the remote location. Requires the url
|
601
|
+
# field to be set. If an etag or the last_modified date has been set,
|
602
|
+
# attempts to use them to prevent unnecessary reloading of identical
|
603
|
+
# content.
|
604
|
+
def load_remote_feed
|
605
|
+
@live = true
|
606
|
+
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
607
|
+
!(self.cache_object.http_headers.nil?)
|
608
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
609
|
+
end
|
419
610
|
|
420
|
-
|
421
|
-
|
422
|
-
|
611
|
+
if (self.url =~ /^feed:/) == 0
|
612
|
+
# Woah, Nelly, how'd that happen? You should've already been
|
613
|
+
# corrected. So let's fix that url. And please,
|
614
|
+
# just use less crappy browsers instead of badly defined
|
615
|
+
# pseudo-protocol hacks.
|
616
|
+
self.url = FeedTools.normalize_url(self.url)
|
423
617
|
end
|
424
|
-
|
425
|
-
#
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
# files without any unnecessary methods.) But given that this only exists to support
|
442
|
-
# CDF files, it's not a big deal. It's not like CDF files really exist in the wild.
|
443
|
-
# (The assumption this ordering makes is that the 'base' attribute points to a valid
|
444
|
-
# location, hopefully the same as the 'href' location. Chances are pretty good that this
|
445
|
-
# is true.)
|
446
|
-
link = XPath.first(metadata_node, "@base").to_s
|
447
|
-
end
|
448
|
-
if link == ""
|
449
|
-
link = XPath.first(metadata_node, "@href").to_s
|
618
|
+
|
619
|
+
# Find out what method we're going to be using to obtain this feed.
|
620
|
+
uri = URI.parse(self.url)
|
621
|
+
retrieval_method = "http"
|
622
|
+
case uri.scheme
|
623
|
+
when "http"
|
624
|
+
retrieval_method = "http"
|
625
|
+
when "ftp"
|
626
|
+
retrieval_method = "ftp"
|
627
|
+
when "file"
|
628
|
+
retrieval_method = "file"
|
629
|
+
when nil
|
630
|
+
raise FeedAccessError,
|
631
|
+
"No protocol was specified in the url."
|
632
|
+
else
|
633
|
+
raise FeedAccessError,
|
634
|
+
"Cannot retrieve feed using unrecognized protocol: " + uri.scheme
|
450
635
|
end
|
451
636
|
|
452
|
-
#
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
637
|
+
# No need for http headers unless we're actually doing http
|
638
|
+
if retrieval_method == "http"
|
639
|
+
# Set up the appropriate http headers
|
640
|
+
headers = {}
|
641
|
+
unless self.http_headers.nil?
|
642
|
+
headers["If-None-Match"] =
|
643
|
+
self.http_headers['etag'] unless self.http_headers['etag'].nil?
|
644
|
+
headers["If-Modified-Since"] =
|
645
|
+
self.http_headers['last-modified'] unless
|
646
|
+
self.http_headers['last-modified'].nil?
|
647
|
+
end
|
648
|
+
headers["User-Agent"] =
|
649
|
+
FeedTools.user_agent unless FeedTools.user_agent.nil?
|
650
|
+
|
651
|
+
# The http feed access method
|
652
|
+
def http_fetch(feed_url, http_headers, redirect_limit = 10,
|
653
|
+
response_chain = []) # :nodoc:
|
654
|
+
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
655
|
+
feed_uri = nil
|
656
|
+
begin
|
657
|
+
feed_uri = URI.parse(feed_url)
|
658
|
+
rescue URI::InvalidURIError
|
659
|
+
# Uh, maybe try to fix it?
|
660
|
+
feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
|
661
|
+
end
|
662
|
+
|
663
|
+
# Borrowed from open-uri:
|
664
|
+
# According to RFC2616 14.23, Host: request-header field should be
|
665
|
+
# set to an origin server.
|
666
|
+
# But net/http wrongly set a proxy server if an absolute URI is
|
667
|
+
# specified as a request URI.
|
668
|
+
# So override it here explicitly.
|
669
|
+
http_headers['Host'] = feed_uri.host
|
670
|
+
http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
|
671
|
+
|
672
|
+
Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
|
673
|
+
response = http.request_get(feed_uri.path, http_headers)
|
674
|
+
|
675
|
+
case response
|
676
|
+
when Net::HTTPSuccess
|
677
|
+
# We've reached the final destination, process all previous
|
678
|
+
# redirections, and see if we need to update the url.
|
679
|
+
for redirected_response in response_chain
|
680
|
+
if redirected_response.last.code.to_i == 301
|
681
|
+
self.url = redirected_response.first
|
682
|
+
else
|
683
|
+
# Jump out as soon as we hit anything that isn't a
|
684
|
+
# permanently moved redirection.
|
685
|
+
break
|
686
|
+
end
|
687
|
+
end
|
688
|
+
return response
|
689
|
+
when Net::HTTPRedirection
|
690
|
+
if response.code.to_i == 304
|
691
|
+
response.error!
|
692
|
+
else
|
693
|
+
if response['Location'].nil?
|
694
|
+
raise FeedAccessError,
|
695
|
+
"No location to redirect to supplied: " + response.code
|
696
|
+
end
|
697
|
+
response_chain << [feed_url, response]
|
698
|
+
new_location = response['location']
|
699
|
+
if response_chain.assoc(new_location) != nil
|
700
|
+
raise FeedAccessError, "Redirection loop detected."
|
701
|
+
end
|
702
|
+
# TODO: deal with stupid people using relative urls
|
703
|
+
# in Location header
|
704
|
+
# =================================================
|
705
|
+
http_fetch(new_location, http_headers,
|
706
|
+
redirect_limit - 1, response_chain)
|
707
|
+
end
|
708
|
+
else
|
709
|
+
response.error!
|
710
|
+
end
|
711
|
+
end
|
712
|
+
end
|
713
|
+
|
714
|
+
begin
|
715
|
+
@http_response = http_fetch(self.url, headers)
|
716
|
+
@http_headers = {}
|
717
|
+
self.http_response.each_header do |header|
|
718
|
+
self.http_headers[header.first.downcase] = header.last
|
719
|
+
end
|
720
|
+
self.last_retrieved = Time.now
|
721
|
+
self.xml_data = self.http_response.body
|
722
|
+
rescue FeedAccessError
|
723
|
+
@live = false
|
724
|
+
if self.xml_data.nil?
|
725
|
+
raise
|
726
|
+
end
|
727
|
+
rescue Timeout::Error
|
728
|
+
# if we time out, do nothing, it should fall back to the xml_data
|
729
|
+
# stored in the cache.
|
730
|
+
@live = false
|
731
|
+
if self.xml_data.nil?
|
732
|
+
raise
|
733
|
+
end
|
734
|
+
rescue Errno::ECONNRESET
|
735
|
+
# if the connection gets reset by peer, oh well, fall back to the
|
736
|
+
# xml_data stored in the cache
|
737
|
+
@live = false
|
738
|
+
if self.xml_data.nil?
|
739
|
+
raise
|
740
|
+
end
|
741
|
+
rescue => error
|
742
|
+
# heck, if anything at all bad happens, fall back to the xml_data
|
743
|
+
# stored in the cache.
|
744
|
+
|
745
|
+
# If we can, get the HTTPResponse...
|
746
|
+
@http_response = nil
|
747
|
+
if error.respond_to?(:each_header)
|
748
|
+
@http_response = error
|
749
|
+
end
|
750
|
+
if error.respond_to?(:response) &&
|
751
|
+
error.response.respond_to?(:each_header)
|
752
|
+
@http_response = error.response
|
753
|
+
end
|
754
|
+
if @http_response != nil
|
755
|
+
@http_headers = {}
|
756
|
+
self.http_response.each_header do |header|
|
757
|
+
self.http_headers[header.first] = header.last
|
758
|
+
end
|
759
|
+
if self.http_response.code.to_i == 304
|
760
|
+
self.last_retrieved = Time.now
|
761
|
+
end
|
762
|
+
end
|
763
|
+
@live = false
|
764
|
+
if self.xml_data.nil?
|
765
|
+
raise
|
766
|
+
end
|
767
|
+
end
|
768
|
+
elsif retrieval_method == "https"
|
769
|
+
# Not supported... yet
|
770
|
+
elsif retrieval_method == "ftp"
|
771
|
+
# Not supported... yet
|
772
|
+
# Technically, CDF feeds are supposed to be able to be accessed directly
|
773
|
+
# from an ftp server. This is silly, but we'll humor Microsoft.
|
774
|
+
#
|
775
|
+
# Eventually.
|
776
|
+
elsif retrieval_method == "file"
|
777
|
+
# Now that we've gone to all that trouble to ensure the url begins
|
778
|
+
# with 'file://', strip the 'file://' off the front of the url.
|
779
|
+
file_name = self.url.gsub(/^file:\/\//, "")
|
780
|
+
begin
|
781
|
+
open(file_name) do |file|
|
782
|
+
@http_response = nil
|
783
|
+
@http_headers = {}
|
784
|
+
self.last_retrieved = Time.now
|
785
|
+
self.xml_data = file.read
|
786
|
+
end
|
787
|
+
rescue
|
788
|
+
@live = false
|
789
|
+
# In this case, pulling from the cache is probably not going
|
790
|
+
# to help at all, and the use should probably be immediately
|
791
|
+
# appraised of the problem. Raise the exception.
|
792
|
+
raise
|
459
793
|
end
|
460
794
|
end
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
795
|
+
unless self.cache_object.nil?
|
796
|
+
begin
|
797
|
+
self.save
|
798
|
+
rescue
|
465
799
|
end
|
466
800
|
end
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
801
|
+
end
|
802
|
+
|
803
|
+
# Returns the relevant information from an http request.
|
804
|
+
def http_response
|
805
|
+
return @http_response
|
806
|
+
end
|
807
|
+
|
808
|
+
# Returns a hash of the http headers from the response.
|
809
|
+
def http_headers
|
810
|
+
return @http_headers
|
811
|
+
end
|
812
|
+
|
813
|
+
# Returns the feed's raw xml data.
|
814
|
+
def xml_data
|
815
|
+
if @xml_data.nil?
|
816
|
+
unless self.cache_object.nil?
|
817
|
+
@xml_data = self.cache_object.xml_data
|
471
818
|
end
|
472
819
|
end
|
473
|
-
|
474
|
-
|
820
|
+
return @xml_data
|
821
|
+
end
|
822
|
+
|
823
|
+
# Sets the feed's xml data.
|
824
|
+
def xml_data=(new_xml_data)
|
825
|
+
@xml_data = new_xml_data
|
826
|
+
unless self.cache_object.nil?
|
827
|
+
self.cache_object.xml_data = new_xml_data
|
475
828
|
end
|
829
|
+
end
|
476
830
|
|
477
|
-
|
478
|
-
|
479
|
-
if
|
480
|
-
|
831
|
+
# Returns a REXML Document of the xml_data
|
832
|
+
def xml
|
833
|
+
if @xml_doc.nil?
|
834
|
+
begin
|
835
|
+
@xml_doc = Document.new(xml_data)
|
836
|
+
rescue
|
837
|
+
# Something failed, attempt to repair the xml with htree.
|
838
|
+
@xml_doc = HTree.parse(xml_data).to_rexml
|
839
|
+
end
|
481
840
|
end
|
482
|
-
|
483
|
-
|
841
|
+
return @xml_doc
|
842
|
+
end
|
843
|
+
|
844
|
+
# Returns the first node within the channel_node that matches the xpath query.
|
845
|
+
def find_node(xpath)
|
846
|
+
return XPath.first(channel_node, xpath)
|
847
|
+
end
|
848
|
+
|
849
|
+
# Returns all nodes within the channel_node that match the xpath query.
|
850
|
+
def find_all_nodes(xpath)
|
851
|
+
return XPath.match(channel_node, xpath)
|
852
|
+
end
|
853
|
+
|
854
|
+
# Returns the root node of the feed.
|
855
|
+
def root_node
|
856
|
+
if @root_node.nil?
|
857
|
+
@root_node = xml.root
|
484
858
|
end
|
485
|
-
|
486
|
-
|
859
|
+
return @root_node
|
860
|
+
end
|
861
|
+
|
862
|
+
# Returns the channel node of the feed.
|
863
|
+
def channel_node
|
864
|
+
if @channel_node.nil?
|
865
|
+
@channel_node = XPath.first(root_node, "channel")
|
866
|
+
if @channel_node == nil
|
867
|
+
@channel_node = XPath.first(root_node, "feedinfo")
|
868
|
+
end
|
869
|
+
if @channel_node == nil
|
870
|
+
@channel_node = root_node
|
871
|
+
end
|
487
872
|
end
|
488
|
-
|
489
|
-
|
873
|
+
return @channel_node
|
874
|
+
end
|
875
|
+
|
876
|
+
# The cache object that handles the feed persistence.
|
877
|
+
def cache_object
|
878
|
+
unless FeedTools.feed_cache.nil?
|
879
|
+
if @cache_object.nil?
|
880
|
+
begin
|
881
|
+
if @id != nil
|
882
|
+
@cache_object = FeedTools.feed_cache.find_by_id(@id)
|
883
|
+
elsif @url != nil
|
884
|
+
@cache_object = FeedTools.feed_cache.find_by_url(@url)
|
885
|
+
end
|
886
|
+
if @cache_object.nil?
|
887
|
+
@cache_object = FeedTools.feed_cache.new
|
888
|
+
end
|
889
|
+
rescue
|
890
|
+
end
|
891
|
+
end
|
490
892
|
end
|
491
|
-
|
492
|
-
|
893
|
+
return @cache_object
|
894
|
+
end
|
895
|
+
|
896
|
+
# Sets the cache object for this feed.
|
897
|
+
#
|
898
|
+
# This can be any object, but it must accept the following messages:
|
899
|
+
# url
|
900
|
+
# url=
|
901
|
+
# title
|
902
|
+
# title=
|
903
|
+
# link
|
904
|
+
# link=
|
905
|
+
# xml_data
|
906
|
+
# xml_data=
|
907
|
+
# etag
|
908
|
+
# etag=
|
909
|
+
# last_modified
|
910
|
+
# last_modified=
|
911
|
+
# save
|
912
|
+
def cache_object=(new_cache_object)
|
913
|
+
@cache_object = new_cache_object
|
914
|
+
end
|
915
|
+
|
916
|
+
# Returns the feed's unique id
|
917
|
+
def id
|
918
|
+
if @id.nil?
|
919
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
920
|
+
if @id == ""
|
921
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
922
|
+
end
|
923
|
+
@id = nil if @id == ""
|
493
924
|
end
|
494
|
-
|
495
|
-
|
925
|
+
return @id
|
926
|
+
end
|
927
|
+
|
928
|
+
# Sets the feed's unique id
|
929
|
+
def id=(new_id)
|
930
|
+
@id = new_id
|
931
|
+
end
|
932
|
+
|
933
|
+
# Returns the feed url.
|
934
|
+
def url
|
935
|
+
if @url.nil? && self.xml_data != nil
|
936
|
+
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
937
|
+
@url = nil if @url == ""
|
496
938
|
end
|
939
|
+
return @url
|
940
|
+
end
|
497
941
|
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
942
|
+
# Sets the feed url and prepares the cache_object if necessary.
|
943
|
+
def url=(new_url)
|
944
|
+
@url = FeedTools.normalize_url(new_url)
|
945
|
+
self.cache_object.url = new_url unless self.cache_object.nil?
|
946
|
+
end
|
947
|
+
|
948
|
+
# Returns the feed title
|
949
|
+
def title
|
950
|
+
if @title.nil?
|
951
|
+
if XPath.first(channel_node, "title/@type").to_s == "xhtml" ||
|
952
|
+
XPath.first(channel_node, "title/@mode").to_s == "xhtml"
|
953
|
+
@title = XPath.first(channel_node, "title").inner_xml
|
954
|
+
elsif XPath.first(channel_node, "title/@type").to_s == "escaped" ||
|
955
|
+
XPath.first(channel_node, "title/@mode").to_s == "escaped"
|
956
|
+
@title = CGI.unescapeHTML(
|
957
|
+
XPath.first(channel_node, "title/text()").to_s)
|
511
958
|
else
|
512
|
-
|
513
|
-
|
959
|
+
@title = CGI.unescapeHTML(
|
960
|
+
XPath.first(channel_node, "title/text()").to_s)
|
514
961
|
end
|
515
|
-
|
516
|
-
|
517
|
-
# expressed in minutes
|
518
|
-
update_frequency = XPath.first(metadata_node, "ttl/text()").to_s
|
519
|
-
if update_frequency != ""
|
520
|
-
feed_time_to_live = (update_frequency.to_i / 60)
|
962
|
+
unless @title.nil?
|
963
|
+
@title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
|
521
964
|
end
|
965
|
+
if @title != "" && !(@title.nil?)
|
966
|
+
@title = FeedTools.strip_html(@title).strip
|
967
|
+
end
|
968
|
+
@title.gsub!(/\n/, " ")
|
969
|
+
@title = nil if @title == ""
|
970
|
+
self.cache_object.title = @title unless self.cache_object.nil?
|
522
971
|
end
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
972
|
+
return @title
|
973
|
+
end
|
974
|
+
|
975
|
+
# Sets the feed title
|
976
|
+
def title=(new_title)
|
977
|
+
@title = new_title
|
978
|
+
self.cache_object.title = new_title unless self.cache_object.nil?
|
979
|
+
end
|
980
|
+
|
981
|
+
# Returns the feed description
|
982
|
+
def description
|
983
|
+
if @description.nil?
|
984
|
+
# get the feed description from the xml document
|
985
|
+
@description = XPath.first(channel_node, "description/text()").to_s
|
986
|
+
if @description != ""
|
987
|
+
if XPath.first(channel_node, "description/@encoding").to_s != ""
|
988
|
+
@description = "[Embedded data objects are not supported.]"
|
989
|
+
else
|
990
|
+
@description = CGI.unescapeHTML(description)
|
991
|
+
end
|
992
|
+
end
|
993
|
+
if @description == ""
|
994
|
+
@description = XPath.first(channel_node, "subtitle/text()").to_s
|
995
|
+
if @description != "" &&
|
996
|
+
XPath.first(channel_node, "subtitle/@mode").to_s == "escaped"
|
997
|
+
@description = CGI.unescapeHTML(description)
|
998
|
+
end
|
999
|
+
end
|
1000
|
+
if @description == ""
|
1001
|
+
@description = XPath.first(channel_node, "tagline/text()").to_s
|
1002
|
+
if @description != "" &&
|
1003
|
+
XPath.first(channel_node, "tagline/@mode").to_s == "escaped"
|
1004
|
+
@description = CGI.unescapeHTML(description)
|
1005
|
+
end
|
1006
|
+
end
|
1007
|
+
if @description == "" && XPath.first(channel_node, "tagline") == nil
|
1008
|
+
@description = XPath.first(channel_node, "info/text()").to_s
|
1009
|
+
if @description != "" &&
|
1010
|
+
XPath.first(channel_node, "info/@mode").to_s == "escaped"
|
1011
|
+
@description = CGI.unescapeHTML(description)
|
1012
|
+
end
|
1013
|
+
end
|
1014
|
+
if @description == ""
|
1015
|
+
@description = CGI.unescapeHTML(
|
1016
|
+
XPath.first(channel_node, "abstract/text()").to_s)
|
1017
|
+
end
|
1018
|
+
if @description == ""
|
1019
|
+
@description = CGI.unescapeHTML(
|
1020
|
+
XPath.first(channel_node, "summary/text()").to_s)
|
1021
|
+
end
|
1022
|
+
if @description == ""
|
1023
|
+
# I don't think this is valid for anyone to do, but this is probably
|
1024
|
+
# what they meant if they do it.
|
1025
|
+
@description = CGI.unescapeHTML(
|
1026
|
+
XPath.first(channel_node, "content:encoded/text()").to_s)
|
1027
|
+
if @description != ""
|
1028
|
+
@bozo = true
|
1029
|
+
end
|
1030
|
+
end
|
1031
|
+
if @description == ""
|
1032
|
+
begin
|
1033
|
+
@description = XPath.first(channel_node, "description").inner_xml
|
1034
|
+
rescue
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
if @description == ""
|
1038
|
+
@description = self.itunes_summary
|
1039
|
+
@description = "" if @description.nil?
|
1040
|
+
end
|
1041
|
+
if @description == ""
|
1042
|
+
@description = self.itunes_subtitle
|
1043
|
+
@description = "" if @description.nil?
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
@description =
|
1047
|
+
FeedTools.sanitize_html(@description) unless @description.nil?
|
1048
|
+
# If it started with a bunch of divs, hack them right off. We can put
|
1049
|
+
# them back later if they're needed.
|
1050
|
+
@description.gsub!(/^(<div[^>]*>)*/, "")
|
1051
|
+
@description.gsub!(/(<\/div>)*$/, "")
|
1052
|
+
|
1053
|
+
@description.gsub!(/\n/, " ") if @description.size < 80
|
1054
|
+
@description = @description.strip unless @description.nil?
|
1055
|
+
@description = nil if @description == ""
|
1056
|
+
end
|
1057
|
+
return @description
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
# Sets the feed description
|
1061
|
+
def description=(new_description)
|
1062
|
+
@description = new_description
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
# Returns the contents of the itunes:summary element
|
1066
|
+
def itunes_summary
|
1067
|
+
if @itunes_summary.nil?
|
1068
|
+
@itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
|
1069
|
+
"itunes:summary/text()").to_s)
|
1070
|
+
if @itunes_summary == ""
|
1071
|
+
@itunes_summary = nil
|
1072
|
+
end
|
1073
|
+
@itunes_summary =
|
1074
|
+
FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
|
1075
|
+
end
|
1076
|
+
return @itunes_summary
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
# Sets the contents of the itunes:summary element
|
1080
|
+
def itunes_summary=(new_itunes_summary)
|
1081
|
+
@itunes_summary = new_itunes_summary
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
# Returns the contents of the itunes:subtitle element
|
1085
|
+
def itunes_subtitle
|
1086
|
+
if @itunes_subtitle.nil?
|
1087
|
+
@itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
|
1088
|
+
"itunes:subtitle/text()").to_s)
|
1089
|
+
if @itunes_subtitle == ""
|
1090
|
+
@itunes_subtitle = nil
|
1091
|
+
end
|
1092
|
+
unless @itunes_subtitle.nil?
|
1093
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
1094
|
+
end
|
1095
|
+
end
|
1096
|
+
return @itunes_subtitle
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
# Sets the contents of the itunes:subtitle element
|
1100
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
1101
|
+
@itunes_subtitle = new_itunes_subtitle
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
# Returns the feed link
|
1105
|
+
def link
|
1106
|
+
if @link.nil?
|
1107
|
+
# get the feed link from the xml document
|
1108
|
+
@link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
1109
|
+
if @link == ""
|
1110
|
+
@link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
|
1111
|
+
end
|
1112
|
+
if @link == ""
|
1113
|
+
@link = XPath.first(channel_node, "link/@href").to_s
|
1114
|
+
end
|
1115
|
+
if @link == ""
|
1116
|
+
@link = XPath.first(channel_node, "link/text()").to_s
|
1117
|
+
end
|
1118
|
+
if @link == ""
|
1119
|
+
@link = XPath.first(channel_node, "@href").to_s
|
1120
|
+
end
|
1121
|
+
if @link == ""
|
1122
|
+
if FeedTools.is_url? self.guid
|
1123
|
+
@link = self.guid
|
1124
|
+
end
|
1125
|
+
end
|
1126
|
+
if @link == ""
|
1127
|
+
# Technically, we shouldn't use the base attribute for this, but if the href attribute
|
1128
|
+
# is missing, it's already a given that we're looking at a messed up CDF file. We can
|
1129
|
+
# always pray it's correct.
|
1130
|
+
@link = XPath.first(channel_node, "@base").to_s
|
1131
|
+
end
|
1132
|
+
@link = FeedTools.normalize_url(@link)
|
1133
|
+
unless self.cache_object.nil?
|
1134
|
+
self.cache_object.link = @link
|
1135
|
+
end
|
1136
|
+
end
|
1137
|
+
return @link
|
1138
|
+
end
|
1139
|
+
|
1140
|
+
# Sets the feed link
|
1141
|
+
def link=(new_link)
|
1142
|
+
@link = new_link
|
1143
|
+
unless self.cache_object.nil?
|
1144
|
+
self.cache_object.link = new_link
|
1145
|
+
end
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
# Returns the feed image link
|
1149
|
+
def image_link
|
1150
|
+
if @image_link.nil?
|
1151
|
+
# get the feed image link from the xml document
|
1152
|
+
@image_link = XPath.first(channel_node, "image/url/text()").to_s
|
1153
|
+
if @image_link == ""
|
1154
|
+
@image_link = XPath.first(channel_node, "image/@rdf:resource").to_s
|
1155
|
+
end
|
1156
|
+
if @image_link == ""
|
1157
|
+
@image_link = XPath.first(channel_node, "link[@type='image/jpeg']/@href").to_s
|
1158
|
+
end
|
1159
|
+
if @image_link == ""
|
1160
|
+
@image_link = XPath.first(channel_node, "link[@type='image/gif']/@href").to_s
|
1161
|
+
end
|
1162
|
+
if @image_link == ""
|
1163
|
+
@image_link = XPath.first(channel_node, "link[@type='image/png']/@href").to_s
|
1164
|
+
end
|
1165
|
+
if @image_link == ""
|
1166
|
+
@image_link = XPath.first(channel_node, "logo[@style='image']/@href").to_s
|
1167
|
+
end
|
1168
|
+
if @image_link == ""
|
1169
|
+
@image_link = XPath.first(channel_node, "logo/@href").to_s
|
1170
|
+
end
|
1171
|
+
@image_link = FeedTools.normalize_url(@image_link)
|
1172
|
+
end
|
1173
|
+
return @image_link
|
1174
|
+
end
|
1175
|
+
|
1176
|
+
# Sets the feed image link
|
1177
|
+
def image_link=(new_image_link)
|
1178
|
+
@image_link = new_image_link
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
# Returns the url to the icon file for this feed.
|
1182
|
+
#
|
1183
|
+
# This method uses the url from the link field in order to avoid grabbing
|
1184
|
+
# the favicon for services like feedburner.
|
1185
|
+
def icon_link
|
1186
|
+
if @icon_link.nil?
|
1187
|
+
@icon_link = XPath.first(channel_node,
|
1188
|
+
"link[@rel='icon']/@href").to_s
|
1189
|
+
if @icon_link == ""
|
1190
|
+
@icon_link = XPath.first(channel_node,
|
1191
|
+
"link[@rel='shortcut icon']/@href").to_s
|
1192
|
+
end
|
1193
|
+
if @icon_link == ""
|
1194
|
+
@icon_link = XPath.first(channel_node,
|
1195
|
+
"link[@type='image/x-icon']/@href").to_s
|
1196
|
+
end
|
1197
|
+
if @icon_link == ""
|
1198
|
+
@icon_link = XPath.first(channel_node,
|
1199
|
+
"icon/@href").to_s
|
1200
|
+
end
|
1201
|
+
if @icon_link == ""
|
1202
|
+
@icon_link = XPath.first(channel_node,
|
1203
|
+
"icon/text()").to_s
|
1204
|
+
end
|
1205
|
+
if @icon_link == ""
|
1206
|
+
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1207
|
+
@icon_link =
|
1208
|
+
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
1209
|
+
end
|
1210
|
+
end
|
1211
|
+
return @icon_link
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
# Returns the number of seconds before the feed should expire
|
1215
|
+
def time_to_live
|
1216
|
+
if @time_to_live.nil?
|
1217
|
+
# get the feed time to live from the xml document
|
1218
|
+
update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
|
1219
|
+
if update_frequency != ""
|
1220
|
+
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1221
|
+
if update_period == "daily"
|
1222
|
+
@time_to_live = update_frequency.to_i * 24
|
1223
|
+
elsif update_period == "weekly"
|
1224
|
+
@time_to_live = update_frequency.to_i * 24 * 7
|
1225
|
+
elsif update_period == "monthly"
|
1226
|
+
@time_to_live = update_frequency.to_i * 24 * 30
|
1227
|
+
elsif update_period == "yearly"
|
1228
|
+
@time_to_live = update_frequency.to_i * 24 * 365
|
1229
|
+
else
|
1230
|
+
# hourly
|
1231
|
+
@time_to_live = update_frequency.to_i
|
1232
|
+
end
|
1233
|
+
end
|
1234
|
+
end
|
1235
|
+
if @time_to_live.nil?
|
1236
|
+
# expressed in minutes
|
1237
|
+
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1238
|
+
if update_frequency != ""
|
1239
|
+
@time_to_live = (update_frequency.to_i / 60)
|
1240
|
+
end
|
1241
|
+
end
|
1242
|
+
if @time_to_live.nil?
|
1243
|
+
@time_to_live = 0
|
1244
|
+
update_frequency_days = XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1245
|
+
update_frequency_hours = XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1246
|
+
update_frequency_minutes = XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1247
|
+
update_frequency_seconds = XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1248
|
+
if update_frequency_days != ""
|
1249
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i * 24
|
1250
|
+
end
|
1251
|
+
if update_frequency_hours != ""
|
1252
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i * 1
|
1253
|
+
end
|
1254
|
+
if update_frequency_minutes != ""
|
1255
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i / 60
|
1256
|
+
end
|
1257
|
+
if update_frequency_seconds != ""
|
1258
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i / 3600
|
1259
|
+
end
|
1260
|
+
if @time_to_live == 0
|
1261
|
+
@time_to_live = nil
|
1262
|
+
end
|
1263
|
+
end
|
1264
|
+
if @time_to_live.nil? || @time_to_live == 0
|
1265
|
+
# Default to one hour
|
1266
|
+
@time_to_live = 1
|
1267
|
+
end
|
1268
|
+
@time_to_live = @time_to_live.round
|
1269
|
+
return @time_to_live.hour
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
# Sets the feed time to live
|
1273
|
+
def time_to_live=(new_time_to_live)
|
1274
|
+
@time_to_live = (new_time_to_live / 3600).round
|
1275
|
+
@time_to_live = 1 if @time_to_live < 1
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
# Returns the feed language
|
1279
|
+
def language
|
1280
|
+
if @language.nil?
|
1281
|
+
@language = XPath.first(channel_node, "language/text()").to_s
|
1282
|
+
if @language == ""
|
1283
|
+
@language = XPath.first(channel_node, "dc:language/text()").to_s
|
1284
|
+
end
|
1285
|
+
if @language == ""
|
1286
|
+
@language = XPath.first(channel_node, "xml:lang/text()").to_s
|
1287
|
+
end
|
1288
|
+
if @language == ""
|
1289
|
+
@language = XPath.first(root_node, "xml:lang/text()").to_s
|
1290
|
+
end
|
1291
|
+
if @language == ""
|
1292
|
+
@language = "en-us"
|
1293
|
+
end
|
1294
|
+
@language = @language.downcase
|
531
1295
|
end
|
532
|
-
|
533
|
-
|
1296
|
+
return @language
|
1297
|
+
end
|
1298
|
+
|
1299
|
+
# Sets the feed language
|
1300
|
+
def language=(new_language)
|
1301
|
+
@language = new_language
|
1302
|
+
end
|
1303
|
+
|
1304
|
+
# Returns true if this feed contains explicit material.
|
1305
|
+
def explicit
|
1306
|
+
if @explicit.nil?
|
1307
|
+
if XPath.first(channel_node,
|
1308
|
+
"media:adult/text()").to_s.downcase == "true" ||
|
1309
|
+
XPath.first(channel_node,
|
1310
|
+
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
1311
|
+
XPath.first(channel_node,
|
1312
|
+
"itunes:explicit/text()").to_s.downcase == "true"
|
1313
|
+
@explicit = true
|
1314
|
+
else
|
1315
|
+
@explicit = false
|
1316
|
+
end
|
1317
|
+
end
|
1318
|
+
return @explicit
|
1319
|
+
end
|
1320
|
+
|
1321
|
+
# Sets whether or not the feed contains explicit material
|
1322
|
+
def explicit=(new_explicit)
|
1323
|
+
@explicit = (new_explicit ? true : false)
|
1324
|
+
end
|
1325
|
+
|
1326
|
+
# Returns the feed items
|
1327
|
+
def items
|
1328
|
+
if @items.nil?
|
1329
|
+
raw_items = XPath.match(root_node, "item")
|
1330
|
+
if raw_items == nil || raw_items == []
|
1331
|
+
raw_items = XPath.match(channel_node, "item")
|
1332
|
+
end
|
1333
|
+
if raw_items == nil || raw_items == []
|
1334
|
+
raw_items = XPath.match(channel_node, "entry")
|
1335
|
+
end
|
1336
|
+
|
1337
|
+
# create the individual feed items
|
1338
|
+
@items = []
|
1339
|
+
if raw_items != nil
|
1340
|
+
for item_node in raw_items
|
1341
|
+
new_item = FeedItem.new
|
1342
|
+
new_item.xml_data = item_node.to_s
|
1343
|
+
new_item.feed = self
|
1344
|
+
@items << new_item
|
1345
|
+
end
|
1346
|
+
end
|
534
1347
|
end
|
535
1348
|
|
536
|
-
#
|
537
|
-
|
538
|
-
|
539
|
-
else
|
540
|
-
self.title = nil
|
1349
|
+
# Sort the items
|
1350
|
+
@items = @items.sort do |a,b|
|
1351
|
+
(b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
|
541
1352
|
end
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
1353
|
+
return @items
|
1354
|
+
end
|
1355
|
+
|
1356
|
+
# The time that the feed was last requested from the remote server. Nil if it has
|
1357
|
+
# never been pulled, or if it was created from scratch.
|
1358
|
+
def last_retrieved
|
1359
|
+
unless self.cache_object.nil?
|
1360
|
+
@last_retrieved = self.cache_object.last_retrieved
|
546
1361
|
end
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
1362
|
+
return @last_retrieved
|
1363
|
+
end
|
1364
|
+
|
1365
|
+
# Sets the time that the feed was last updated.
|
1366
|
+
def last_retrieved=(new_last_retrieved)
|
1367
|
+
@last_retrieved = new_last_retrieved
|
1368
|
+
unless self.cache_object.nil?
|
1369
|
+
self.cache_object.last_retrieved = new_last_retrieved
|
551
1370
|
end
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
1371
|
+
end
|
1372
|
+
|
1373
|
+
# True if this feed contains audio content enclosures
|
1374
|
+
def podcast?
|
1375
|
+
podcast = false
|
1376
|
+
$test_feed.items.each do |item|
|
1377
|
+
item.enclosures.each do |enclosure|
|
1378
|
+
podcast = true if enclosure.audio?
|
1379
|
+
end
|
556
1380
|
end
|
557
|
-
|
558
|
-
|
1381
|
+
return podcast
|
1382
|
+
end
|
1383
|
+
|
1384
|
+
# True if this feed contains video content enclosures
|
1385
|
+
def vidlog?
|
1386
|
+
vidlog = false
|
1387
|
+
$test_feed.items.each do |item|
|
1388
|
+
item.enclosures.each do |enclosure|
|
1389
|
+
vidlog = true if enclosure.video?
|
1390
|
+
end
|
1391
|
+
end
|
1392
|
+
return vidlog
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
# True if the feed was not last retrieved from the cache.
|
1396
|
+
def live?
|
1397
|
+
return @live
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
# True if the feed has expired and must be reacquired from the remote server.
|
1401
|
+
def expired?
|
1402
|
+
return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
|
1403
|
+
end
|
1404
|
+
|
1405
|
+
# Forces this feed to expire.
|
1406
|
+
def expire
|
1407
|
+
self.last_retrieved = Time.mktime(1970)
|
1408
|
+
self.save
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
# A hook method that is called during the feed generation process. Overriding this method
|
1412
|
+
# will enable additional content to be inserted into the feed.
|
1413
|
+
def build_xml_hook(feed_type, version, xml_builder)
|
1414
|
+
return nil
|
1415
|
+
end
|
1416
|
+
|
1417
|
+
# Generates xml based on the content of the feed
|
1418
|
+
def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1419
|
+
if feed_type == "rss" && version == 0.0
|
1420
|
+
version = 1.0
|
1421
|
+
elsif feed_type == "atom" && version == 0.0
|
1422
|
+
version = 0.3
|
1423
|
+
end
|
1424
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1425
|
+
# RDF-based rss format
|
1426
|
+
return xml_builder.tag!("rdf:RDF") do
|
1427
|
+
xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
|
1428
|
+
unless title.nil? || title == ""
|
1429
|
+
xml_builder.title(title)
|
1430
|
+
else
|
1431
|
+
xml_builder.title
|
1432
|
+
end
|
1433
|
+
unless link.nil? || link == ""
|
1434
|
+
xml_builder.link(link)
|
1435
|
+
else
|
1436
|
+
xml_builder.link
|
1437
|
+
end
|
1438
|
+
unless image_link.nil? || image_link == ""
|
1439
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
|
1440
|
+
end
|
1441
|
+
unless description.nil? || description == ""
|
1442
|
+
xml_builder.description(description)
|
1443
|
+
else
|
1444
|
+
xml_builder.description
|
1445
|
+
end
|
1446
|
+
unless language.nil? || language == ""
|
1447
|
+
xml_builder.tag!("dc:language", language)
|
1448
|
+
end
|
1449
|
+
xml_builder.tag!("syn:updatePeriod", "hourly")
|
1450
|
+
xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
|
1451
|
+
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
1452
|
+
xml_builder.items do
|
1453
|
+
xml_builder.tag!("rdf:Seq") do
|
1454
|
+
unless items.nil?
|
1455
|
+
for item in items
|
1456
|
+
if item.link.nil?
|
1457
|
+
raise "Cannot generate an rdf-based feed with a nil item link field."
|
1458
|
+
end
|
1459
|
+
xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
|
1460
|
+
end
|
1461
|
+
end
|
1462
|
+
end
|
1463
|
+
end
|
1464
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1465
|
+
end
|
1466
|
+
unless image_link.nil? || image_link == ""
|
1467
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
|
1468
|
+
unless title.nil? || title == ""
|
1469
|
+
xml_builder.title(title)
|
1470
|
+
else
|
1471
|
+
xml_builder.title
|
1472
|
+
end
|
1473
|
+
unless image_link.nil? || image_link == ""
|
1474
|
+
xml_builder.url(image_link)
|
1475
|
+
end
|
1476
|
+
unless link.nil? || link == ""
|
1477
|
+
xml_builder.link(link)
|
1478
|
+
else
|
1479
|
+
xml_builder.link
|
1480
|
+
end
|
1481
|
+
end
|
1482
|
+
end
|
1483
|
+
unless items.nil?
|
1484
|
+
for item in items
|
1485
|
+
item.build_xml(feed_type, version, xml_builder)
|
1486
|
+
end
|
1487
|
+
end
|
1488
|
+
end
|
1489
|
+
elsif feed_type == "rss"
|
1490
|
+
# normal rss format
|
1491
|
+
return xml_builder.rss("version" => version.to_s) do
|
1492
|
+
unless title.nil? || title == ""
|
1493
|
+
xml_builder.title(title)
|
1494
|
+
end
|
1495
|
+
unless link.nil? || link == ""
|
1496
|
+
xml_builder.link(link)
|
1497
|
+
end
|
1498
|
+
unless description.nil? || description == ""
|
1499
|
+
xml_builder.description(description)
|
1500
|
+
end
|
1501
|
+
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
1502
|
+
xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
|
1503
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1504
|
+
unless items.nil?
|
1505
|
+
for item in items
|
1506
|
+
item.build_xml(feed_type, version, xml_builder)
|
1507
|
+
end
|
1508
|
+
end
|
1509
|
+
end
|
1510
|
+
elsif feed_type == "atom"
|
1511
|
+
# normal atom format
|
1512
|
+
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
1513
|
+
"version" => version.to_s,
|
1514
|
+
"xml:lang" => language) do
|
1515
|
+
unless title.nil? || title == ""
|
1516
|
+
xml_builder.title(title,
|
1517
|
+
"mode" => "escaped",
|
1518
|
+
"type" => "text/html")
|
1519
|
+
end
|
1520
|
+
unless link.nil? || link == ""
|
1521
|
+
xml_builder.link("href" => link,
|
1522
|
+
"rel" => "alternate",
|
1523
|
+
"type" => "text/html",
|
1524
|
+
"title" => title)
|
1525
|
+
end
|
1526
|
+
unless description.nil? || description == ""
|
1527
|
+
xml_builder.tagline(description,
|
1528
|
+
"mode" => "escaped",
|
1529
|
+
"type" => "text/html")
|
1530
|
+
end
|
1531
|
+
xml_builder.generator("FeedTools",
|
1532
|
+
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
1533
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1534
|
+
unless items.nil?
|
1535
|
+
for item in items
|
1536
|
+
item.build_xml(feed_type, version, xml_builder)
|
1537
|
+
end
|
1538
|
+
end
|
1539
|
+
end
|
1540
|
+
end
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
# Persists the current feed state to the cache.
|
1544
|
+
def save
|
1545
|
+
if FeedTools.feed_cache.nil?
|
1546
|
+
raise "Caching is currently disabled. Cannot save to cache."
|
1547
|
+
elsif self.url.nil?
|
1548
|
+
raise "The url field must be set to save to the cache."
|
1549
|
+
elsif self.xml_data.nil?
|
1550
|
+
raise "The xml_data field must be set to save to the cache."
|
1551
|
+
elsif self.cache_object.nil?
|
1552
|
+
raise "The cache_object is currently nil. Cannot save to cache."
|
559
1553
|
else
|
560
|
-
self.
|
1554
|
+
self.cache_object.url = self.url
|
1555
|
+
self.cache_object.title = self.title
|
1556
|
+
self.cache_object.link = self.link
|
1557
|
+
self.cache_object.xml_data = self.xml_data
|
1558
|
+
unless self.http_response.nil?
|
1559
|
+
self.cache_object.http_headers = self.http_headers.to_yaml
|
1560
|
+
end
|
1561
|
+
self.cache_object.last_retrieved = self.last_retrieved
|
1562
|
+
self.cache_object.save
|
1563
|
+
end
|
1564
|
+
end
|
1565
|
+
|
1566
|
+
alias_method :tagline, :description
|
1567
|
+
alias_method :tagline=, :description=
|
1568
|
+
alias_method :subtitle, :description
|
1569
|
+
alias_method :subtitle=, :description=
|
1570
|
+
alias_method :abstract, :description
|
1571
|
+
alias_method :abstract=, :description=
|
1572
|
+
alias_method :content, :description
|
1573
|
+
alias_method :content=, :description=
|
1574
|
+
alias_method :ttl, :time_to_live
|
1575
|
+
alias_method :ttl=, :time_to_live=
|
1576
|
+
alias_method :guid, :id
|
1577
|
+
alias_method :guid=, :id=
|
1578
|
+
alias_method :entries, :items
|
1579
|
+
|
1580
|
+
# passes missing methods to the cache_object
|
1581
|
+
def method_missing(msg, *params)
|
1582
|
+
if self.cache_object.nil?
|
1583
|
+
raise NoMethodError, "Invalid method #{msg.to_s}"
|
1584
|
+
end
|
1585
|
+
return self.cache_object.send(msg, params)
|
1586
|
+
end
|
1587
|
+
|
1588
|
+
# passes missing methods to the FeedTools.feed_cache
|
1589
|
+
def Feed.method_missing(msg, *params)
|
1590
|
+
if FeedTools.feed_cache.nil?
|
1591
|
+
raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
|
1592
|
+
end
|
1593
|
+
result = FeedTools.feed_cache.send(msg, params)
|
1594
|
+
if result.kind_of? FeedTools.feed_cache
|
1595
|
+
result = Feed.open(result.url)
|
1596
|
+
end
|
1597
|
+
return result
|
1598
|
+
end
|
1599
|
+
end
|
1600
|
+
|
1601
|
+
class FeedItem
|
1602
|
+
include REXML
|
1603
|
+
include AttributeDictionary
|
1604
|
+
|
1605
|
+
# This class stores information about a feed item's file enclosures.
|
1606
|
+
class Enclosure
|
1607
|
+
include AttributeDictionary
|
1608
|
+
|
1609
|
+
# The url for the enclosure
|
1610
|
+
attr_accessor :url
|
1611
|
+
# The MIME type of the file referenced by the enclosure
|
1612
|
+
attr_accessor :type
|
1613
|
+
# The size of the file referenced by the enclosure
|
1614
|
+
attr_accessor :file_size
|
1615
|
+
# The total play time of the file referenced by the enclosure
|
1616
|
+
attr_accessor :duration
|
1617
|
+
# The height in pixels of the enclosed media
|
1618
|
+
attr_accessor :height
|
1619
|
+
# The width in pixels of the enclosed media
|
1620
|
+
attr_accessor :width
|
1621
|
+
# The bitrate of the enclosed media
|
1622
|
+
attr_accessor :bitrate
|
1623
|
+
# The framerate of the enclosed media
|
1624
|
+
attr_accessor :framerate
|
1625
|
+
# The thumbnail for this enclosure
|
1626
|
+
attr_accessor :thumbnail
|
1627
|
+
# The categories for this enclosure
|
1628
|
+
attr_accessor :categories
|
1629
|
+
# A hash of the enclosed file
|
1630
|
+
attr_accessor :hash
|
1631
|
+
# A website containing some kind of media player instead of a direct
|
1632
|
+
# link to the media file.
|
1633
|
+
attr_accessor :player
|
1634
|
+
# A list of credits for the enclosed media
|
1635
|
+
attr_accessor :credits
|
1636
|
+
# A text rendition of the enclosed media
|
1637
|
+
attr_accessor :text
|
1638
|
+
# A list of alternate version of the enclosed media file
|
1639
|
+
attr_accessor :versions
|
1640
|
+
# The default version of the enclosed media file
|
1641
|
+
attr_accessor :default_version
|
1642
|
+
|
1643
|
+
# Returns true if this is the default enclosure
|
1644
|
+
def is_default?
|
1645
|
+
return @is_default
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
# Sets whether this is the default enclosure for the media group
|
1649
|
+
def is_default=(new_is_default)
|
1650
|
+
@is_default = new_is_default
|
561
1651
|
end
|
562
1652
|
|
563
|
-
|
564
|
-
|
565
|
-
|
1653
|
+
# Returns true if the enclosure contains explicit material
|
1654
|
+
def explicit?
|
1655
|
+
return @explicit
|
1656
|
+
end
|
1657
|
+
|
1658
|
+
# Sets the explicit attribute on the enclosure
|
1659
|
+
def explicit=(new_explicit)
|
1660
|
+
@explicit = new_explicit
|
1661
|
+
end
|
1662
|
+
|
1663
|
+
# Determines if the object is a sample, or the full version of the
|
1664
|
+
# object, or if it is a stream.
|
1665
|
+
# Possible values are 'sample', 'full', 'nonstop'.
|
1666
|
+
def expression
|
1667
|
+
return @expression
|
1668
|
+
end
|
1669
|
+
|
1670
|
+
# Sets the expression attribute on the enclosure.
|
1671
|
+
# Allowed values are 'sample', 'full', 'nonstop'.
|
1672
|
+
def expression=(new_expression)
|
1673
|
+
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
|
1674
|
+
raise ArgumentError,
|
1675
|
+
"Permitted values are 'sample', 'full', 'nonstop'."
|
1676
|
+
end
|
1677
|
+
@expression = new_expression.downcase
|
566
1678
|
end
|
567
1679
|
|
568
|
-
#
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
1680
|
+
# Returns true if this enclosure contains audio content
|
1681
|
+
def audio?
|
1682
|
+
unless self.type.nil?
|
1683
|
+
return true if (self.type =~ /^audio/) != nil
|
1684
|
+
end
|
1685
|
+
# TODO: create a more complete list
|
1686
|
+
# =================================
|
1687
|
+
audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
|
1688
|
+
audio_extensions.each do |extension|
|
1689
|
+
if (url =~ /#{extension}$/) != nil
|
1690
|
+
return true
|
1691
|
+
end
|
574
1692
|
end
|
1693
|
+
return false
|
575
1694
|
end
|
576
1695
|
|
577
|
-
#
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
1696
|
+
# Returns true if this enclosure contains video content
|
1697
|
+
def video?
|
1698
|
+
unless self.type.nil?
|
1699
|
+
return true if (self.type =~ /^video/) != nil
|
1700
|
+
return true if self.type == "image/mov"
|
1701
|
+
end
|
1702
|
+
# TODO: create a more complete list
|
1703
|
+
# =================================
|
1704
|
+
video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
|
1705
|
+
video_extensions.each do |extension|
|
1706
|
+
if (url =~ /#{extension}$/) != nil
|
1707
|
+
return true
|
1708
|
+
end
|
582
1709
|
end
|
1710
|
+
return false
|
583
1711
|
end
|
584
|
-
return self
|
585
1712
|
end
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
1713
|
+
EnclosureCategory = Struct.new( "EnclosureCategory", :category, :scheme, :label )
|
1714
|
+
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
|
1715
|
+
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
|
1716
|
+
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
|
1717
|
+
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height, :width )
|
590
1718
|
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
1719
|
+
# Returns the parent feed of this feed item
|
1720
|
+
def feed
|
1721
|
+
return @feed
|
1722
|
+
end
|
1723
|
+
|
1724
|
+
# Sets the parent feed of this feed item
|
1725
|
+
def feed=(new_feed)
|
1726
|
+
@feed = new_feed
|
1727
|
+
end
|
1728
|
+
|
1729
|
+
# Returns the feed item's raw xml data.
|
1730
|
+
def xml_data
|
1731
|
+
return @xml_data
|
1732
|
+
end
|
1733
|
+
|
1734
|
+
# Sets the feed item's xml data.
|
1735
|
+
def xml_data=(new_xml_data)
|
1736
|
+
@xml_data = new_xml_data
|
1737
|
+
end
|
1738
|
+
|
1739
|
+
# Returns a REXML Document of the xml_data
|
1740
|
+
def xml
|
1741
|
+
if @xml_doc.nil?
|
1742
|
+
@xml_doc = Document.new(xml_data)
|
1743
|
+
end
|
1744
|
+
return @xml_doc
|
1745
|
+
end
|
1746
|
+
|
1747
|
+
# Returns the first node within the root_node that matches the xpath query.
|
1748
|
+
def find_node(xpath)
|
1749
|
+
return XPath.first(root_node, xpath)
|
1750
|
+
end
|
1751
|
+
|
1752
|
+
# Returns all nodes within the root_node that match the xpath query.
|
1753
|
+
def find_all_nodes(xpath)
|
1754
|
+
return XPath.match(root_node, xpath)
|
1755
|
+
end
|
1756
|
+
|
1757
|
+
# Returns the root node of the feed item.
|
1758
|
+
def root_node
|
1759
|
+
if @root_node.nil?
|
1760
|
+
@root_node = xml.root
|
1761
|
+
end
|
1762
|
+
return @root_node
|
1763
|
+
end
|
1764
|
+
|
1765
|
+
# Returns the feed item title
|
1766
|
+
def title
|
1767
|
+
if @title.nil?
|
1768
|
+
if XPath.first(root_node, "title/@type").to_s == "xhtml" ||
|
1769
|
+
XPath.first(root_node, "title/@mode").to_s == "xhtml"
|
1770
|
+
@title = XPath.first(root_node, "title").inner_xml
|
1771
|
+
elsif XPath.first(root_node, "title/@type").to_s == "escaped" ||
|
1772
|
+
XPath.first(root_node, "title/@mode").to_s == "escaped"
|
1773
|
+
@title = CGI.unescapeHTML(
|
1774
|
+
XPath.first(root_node, "title/text()").to_s)
|
1775
|
+
else
|
1776
|
+
@title = CGI.unescapeHTML(
|
1777
|
+
XPath.first(root_node, "title/text()").to_s)
|
1778
|
+
end
|
1779
|
+
unless @title.nil?
|
1780
|
+
@title = CGI.unescapeHTML(FeedTools.sanitize_html(@title, :strip))
|
1781
|
+
end
|
1782
|
+
if @title != ""
|
1783
|
+
# Some blogging tools include the number of comments in a post
|
1784
|
+
# in the title... this is supremely ugly, and breaks any
|
1785
|
+
# applications which expect the title to be static, so we're
|
1786
|
+
# gonna strip them out.
|
1787
|
+
#
|
1788
|
+
# If for some incredibly wierd reason you need the actual
|
1789
|
+
# unstripped title, just use find_node("title/text()").to_s
|
1790
|
+
@title = FeedTools.strip_html(
|
1791
|
+
@title.strip.gsub(/\[\d*\]$/, "")).strip
|
1792
|
+
@title.gsub!(/\n/, " ")
|
1793
|
+
end
|
1794
|
+
@title = nil if @title == ""
|
1795
|
+
end
|
1796
|
+
return @title
|
1797
|
+
end
|
1798
|
+
|
1799
|
+
# Sets the feed item title
|
1800
|
+
def title=(new_title)
|
1801
|
+
@title = new_title
|
1802
|
+
end
|
1803
|
+
|
1804
|
+
# Returns the feed item description
|
1805
|
+
def description
|
1806
|
+
if @description.nil?
|
1807
|
+
# get the item content
|
1808
|
+
@description = ""
|
1809
|
+
body_node = XPath.first(root_node, "xhtml:body")
|
1810
|
+
if body_node == nil
|
1811
|
+
body_node = XPath.first(root_node, "body")
|
1812
|
+
end
|
1813
|
+
if body_node != nil
|
1814
|
+
@description = body_node.inner_xml
|
1815
|
+
end
|
1816
|
+
if @description == ""
|
1817
|
+
@description =
|
1818
|
+
CGI.unescapeHTML(XPath.first(root_node, "content:encoded/text()").to_s)
|
1819
|
+
end
|
1820
|
+
if @description == ""
|
1821
|
+
begin
|
1822
|
+
@description = XPath.first(root_node, "description").cdatas.first.to_s
|
1823
|
+
rescue
|
1824
|
+
@description = ""
|
1825
|
+
end
|
1826
|
+
if @description == ""
|
1827
|
+
@description = XPath.first(root_node, "description/text()").to_s
|
1828
|
+
end
|
1829
|
+
if @description != ""
|
1830
|
+
if XPath.first(root_node, "description/@encoding").to_s != ""
|
1831
|
+
# Not supported... yet.
|
1832
|
+
@description = "[Embedded data objects are not supported.]"
|
1833
|
+
else
|
1834
|
+
@description = CGI.unescapeHTML(@description)
|
1835
|
+
end
|
1836
|
+
end
|
1837
|
+
end
|
1838
|
+
if @description == ""
|
1839
|
+
@description = XPath.first(root_node, "content/text()").to_s
|
1840
|
+
if @description != "" &&
|
1841
|
+
(XPath.first(root_node, "content/@mode").to_s == "escaped" ||
|
1842
|
+
XPath.first(root_node, "content/@type").to_s == "escaped")
|
1843
|
+
@description = CGI.unescapeHTML(@description)
|
1844
|
+
end
|
1845
|
+
if XPath.first(root_node, "content/@mode").to_s == "xhtml" ||
|
1846
|
+
XPath.first(root_node, "content/@type").to_s == "xhtml"
|
1847
|
+
@description = XPath.first(root_node, "content").inner_xml
|
1848
|
+
end
|
1849
|
+
end
|
1850
|
+
if @description == ""
|
1851
|
+
begin
|
1852
|
+
@description = XPath.first(root_node, "description").inner_xml
|
1853
|
+
rescue
|
1854
|
+
end
|
1855
|
+
end
|
1856
|
+
if @description == ""
|
1857
|
+
@description = self.itunes_summary
|
1858
|
+
@description = "" if @description.nil?
|
1859
|
+
end
|
1860
|
+
if @description == ""
|
1861
|
+
@description = self.itunes_subtitle
|
1862
|
+
@description = "" if @description.nil?
|
1863
|
+
end
|
1864
|
+
if @description == ""
|
1865
|
+
@description = self.media_text
|
1866
|
+
@description = "" if @description.nil?
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
unless @description.nil?
|
1870
|
+
@description = FeedTools.sanitize_html(@description)
|
1871
|
+
end
|
1872
|
+
|
1873
|
+
# If it started with a bunch of divs, hack them right off. We can put
|
1874
|
+
# them back later if they're needed.
|
1875
|
+
@description.gsub!(/^(<div[^>]*>)*/, "")
|
1876
|
+
@description.gsub!(/(<\/div>)*$/, "")
|
1877
|
+
|
1878
|
+
@description.gsub!(/\n/, " ") if @description.size < 80
|
1879
|
+
@description = @description.strip unless @description.nil?
|
1880
|
+
@description = nil if @description == ""
|
595
1881
|
end
|
596
|
-
|
597
|
-
|
1882
|
+
return @description
|
1883
|
+
end
|
1884
|
+
|
1885
|
+
# Sets the feed item description
|
1886
|
+
def description=(new_description)
|
1887
|
+
@description = new_description
|
1888
|
+
end
|
1889
|
+
|
1890
|
+
# Returns the feed item link
|
1891
|
+
def link
|
1892
|
+
if @link.nil?
|
1893
|
+
@link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
|
1894
|
+
if @link == ""
|
1895
|
+
@link = XPath.first(root_node, "link/@href").to_s
|
1896
|
+
end
|
1897
|
+
if @link == ""
|
1898
|
+
@link = XPath.first(root_node, "link/text()").to_s
|
1899
|
+
end
|
1900
|
+
if @link == ""
|
1901
|
+
@link = XPath.first(root_node, "@rdf:about").to_s
|
1902
|
+
end
|
1903
|
+
if @link == ""
|
1904
|
+
@link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
|
1905
|
+
end
|
1906
|
+
if @link == ""
|
1907
|
+
if FeedTools.is_url? self.guid
|
1908
|
+
@link = self.guid
|
1909
|
+
end
|
1910
|
+
end
|
1911
|
+
if @link != ""
|
1912
|
+
@link = CGI.unescapeHTML(@link)
|
1913
|
+
end
|
1914
|
+
if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
|
1915
|
+
if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
|
1916
|
+
@link = @link[1..-1]
|
1917
|
+
end
|
1918
|
+
# prepend the base to the link since they seem to have used a relative path
|
1919
|
+
@link = feed.base + @link
|
1920
|
+
end
|
1921
|
+
@link = FeedTools.normalize_url(@link)
|
598
1922
|
end
|
599
|
-
|
600
|
-
|
1923
|
+
return @link
|
1924
|
+
end
|
1925
|
+
|
1926
|
+
# Sets the feed item link
|
1927
|
+
def link=(new_link)
|
1928
|
+
@link = new_link
|
1929
|
+
end
|
1930
|
+
|
1931
|
+
# Returns the feed comment link
|
1932
|
+
def comment_link
|
1933
|
+
if @comment_link.nil?
|
1934
|
+
# get the feed comment link from the xml document
|
1935
|
+
@comment_link = XPath.first(root_node, "comments/text()").to_s
|
1936
|
+
if @comment_link == ""
|
1937
|
+
@comment_link = self.link
|
1938
|
+
end
|
1939
|
+
@comment_link = FeedTools.normalize_url(@comment_link)
|
601
1940
|
end
|
602
|
-
|
603
|
-
|
1941
|
+
return @comment_link
|
1942
|
+
end
|
1943
|
+
|
1944
|
+
# Sets the feed comment link
|
1945
|
+
def comment_link=(new_comment_link)
|
1946
|
+
@comment_link = new_comment_link
|
1947
|
+
end
|
1948
|
+
|
1949
|
+
# Returns the feed image link
|
1950
|
+
def image_link
|
1951
|
+
if @image_link.nil?
|
1952
|
+
# get the feed image link from the xml document
|
1953
|
+
if @image_link == ""
|
1954
|
+
@image_link = XPath.first(root_node, "link[@type='image/jpeg']/@href").to_s
|
1955
|
+
end
|
1956
|
+
if @image_link == ""
|
1957
|
+
@image_link = XPath.first(root_node, "link[@type='image/gif']/@href").to_s
|
1958
|
+
end
|
1959
|
+
if @image_link == ""
|
1960
|
+
@image_link = XPath.first(root_node, "link[@type='image/png']/@href").to_s
|
1961
|
+
end
|
1962
|
+
# The following two should technically never occur, but have been included
|
1963
|
+
# simply because I've seen both occuring in the wild at least once.
|
1964
|
+
if @image_link == ""
|
1965
|
+
@image_link = XPath.first(root_node, "image/url/text()").to_s
|
1966
|
+
end
|
1967
|
+
if @image_link == ""
|
1968
|
+
@image_link = XPath.first(root_node, "image/@rdf:resource").to_s
|
1969
|
+
end
|
1970
|
+
if @image_link == ""
|
1971
|
+
# If there's only a media thumbnail, we can just borrow it. Technically, this isn't
|
1972
|
+
# ideal, but chances are very good that anything that makes use of this image is
|
1973
|
+
# simply not going to care anyhow.
|
1974
|
+
@image_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
1975
|
+
if @image_link == ""
|
1976
|
+
@media_image_link = @image_link
|
1977
|
+
end
|
1978
|
+
end
|
1979
|
+
if @image_link == ""
|
1980
|
+
# If there's only an itunes image, we can just borrow it. See comment above regarding
|
1981
|
+
# less-than-ideal-ness.
|
1982
|
+
if @itunes_image_link == ""
|
1983
|
+
@image_link = XPath.first(root_node, "itunes:image/@href").to_s
|
1984
|
+
if @image_link == ""
|
1985
|
+
@image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
1986
|
+
end
|
1987
|
+
@itunes_image_link = @image_link
|
1988
|
+
else
|
1989
|
+
@image_link = @itunes_image_link
|
1990
|
+
end
|
1991
|
+
end
|
1992
|
+
@image_link = FeedTools.normalize_url(@image_link)
|
1993
|
+
end
|
1994
|
+
return @image_link
|
1995
|
+
end
|
1996
|
+
|
1997
|
+
# Sets the feed image link
|
1998
|
+
def image_link=(new_image_link)
|
1999
|
+
@image_link = new_image_link
|
2000
|
+
end
|
2001
|
+
|
2002
|
+
# Returns the feed item itunes image link
|
2003
|
+
#
|
2004
|
+
# If it's not present, falls back to the normal image link.
|
2005
|
+
# Technically, the itunes spec says that the image needs to be
|
2006
|
+
# square and larger than 300x300, but hey, if there's an image
|
2007
|
+
# to be had, it's better than none at all.
|
2008
|
+
def itunes_image_link
|
2009
|
+
if @itunes_image_link.nil?
|
2010
|
+
# get the feed item itunes image link from the xml document
|
2011
|
+
@itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
|
2012
|
+
if @itunes_image_link == ""
|
2013
|
+
@itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
|
2014
|
+
end
|
2015
|
+
if @itunes_image_link == ""
|
2016
|
+
@itunes_image_link = self.image_link
|
2017
|
+
end
|
2018
|
+
@itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
|
2019
|
+
end
|
2020
|
+
return @itunes_image_link
|
2021
|
+
end
|
2022
|
+
|
2023
|
+
# Sets the feed item itunes image link
|
2024
|
+
def itunes_image_link=(new_itunes_image_link)
|
2025
|
+
@itunes_image_link = new_itunes_image_link
|
2026
|
+
end
|
2027
|
+
|
2028
|
+
# Returns the feed item media thumbnail link
|
2029
|
+
#
|
2030
|
+
# If it's not present, falls back to the normal image link.
|
2031
|
+
def media_thumbnail_link
|
2032
|
+
if @media_thumbnail_link.nil?
|
2033
|
+
# get the feed item itunes image link from the xml document
|
2034
|
+
@media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
|
2035
|
+
if @media_thumbnail_link == ""
|
2036
|
+
@media_thumbnail_link = image_link
|
2037
|
+
end
|
2038
|
+
@media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
|
604
2039
|
end
|
605
|
-
|
2040
|
+
return @media_thumbnail_link
|
2041
|
+
end
|
2042
|
+
|
2043
|
+
# Sets the feed item media thumbnail url
|
2044
|
+
def media_thumbnail_link=(new_media_thumbnail_link)
|
2045
|
+
@media_thumbnail_link = new_media_thumbnail_link
|
2046
|
+
end
|
606
2047
|
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
item_title = item_title.gsub(/\[\d*\]/,"").strip
|
614
|
-
existing_title = existing_title.gsub(/\[\d*\]/,"").strip
|
615
|
-
item_title = item_title.gsub(/\(\d*\)/,"").strip
|
616
|
-
existing_title = existing_title.gsub(/\(\d*\)/,"").strip
|
617
|
-
item_title = item_title.gsub(/\{\d*\}/,"").strip
|
618
|
-
existing_title = existing_title.gsub(/\{\d*\}/,"").strip
|
619
|
-
if existing_title != item_title
|
620
|
-
feed_item = nil
|
2048
|
+
# Returns the feed items's unique id
|
2049
|
+
def id
|
2050
|
+
if @id.nil?
|
2051
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
2052
|
+
if @id == ""
|
2053
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
621
2054
|
end
|
2055
|
+
@id = nil if @id == ""
|
622
2056
|
end
|
623
|
-
return
|
2057
|
+
return @id
|
624
2058
|
end
|
625
2059
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
feed_item = FeedItem.new
|
630
|
-
end
|
631
|
-
feed_item.feed = self
|
632
|
-
feed_item.parse_item(item_data)
|
633
|
-
return feed_item
|
634
|
-
end
|
635
|
-
|
636
|
-
def build_feed_hook(feed_type, version, xml_builder)
|
637
|
-
return nil
|
2060
|
+
# Sets the feed item's unique id
|
2061
|
+
def id=(new_id)
|
2062
|
+
@id = new_id
|
638
2063
|
end
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
2064
|
+
|
2065
|
+
# Returns all feed item enclosures
|
2066
|
+
def enclosures
|
2067
|
+
if @enclosures.nil?
|
2068
|
+
@enclosures = []
|
2069
|
+
|
2070
|
+
# First, load up all the different possible sources of enclosures
|
2071
|
+
rss_enclosures = XPath.match(root_node, "enclosure")
|
2072
|
+
atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
|
2073
|
+
media_content_enclosures = XPath.match(root_node, "media:content")
|
2074
|
+
media_group_enclosures = XPath.match(root_node, "media:group")
|
2075
|
+
|
2076
|
+
# Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations,
|
2077
|
+
# sometimes these also manage to show up in atom files.
|
2078
|
+
for enclosure_node in rss_enclosures
|
2079
|
+
enclosure = Enclosure.new
|
2080
|
+
enclosure.url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
|
2081
|
+
enclosure.type = enclosure_node.attributes["type"].to_s
|
2082
|
+
enclosure.file_size = enclosure_node.attributes["length"].to_i
|
2083
|
+
enclosure.credits = []
|
2084
|
+
enclosure.explicit = false
|
2085
|
+
@enclosures << enclosure
|
2086
|
+
end
|
2087
|
+
|
2088
|
+
# Parse atom-type enclosures. If there are repeats of the same enclosure object,
|
2089
|
+
# we merge the two together.
|
2090
|
+
for enclosure_node in atom_enclosures
|
2091
|
+
enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["href"].to_s)
|
2092
|
+
enclosure = nil
|
2093
|
+
new_enclosure = false
|
2094
|
+
for existing_enclosure in @enclosures
|
2095
|
+
if existing_enclosure.url == enclosure_url
|
2096
|
+
enclosure = existing_enclosure
|
2097
|
+
break
|
654
2098
|
end
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
2099
|
+
end
|
2100
|
+
if enclosure.nil?
|
2101
|
+
new_enclosure = true
|
2102
|
+
enclosure = Enclosure.new
|
2103
|
+
end
|
2104
|
+
enclosure.url = enclosure_url
|
2105
|
+
enclosure.type = enclosure_node.attributes["type"].to_s
|
2106
|
+
enclosure.file_size = enclosure_node.attributes["length"].to_i
|
2107
|
+
enclosure.credits = []
|
2108
|
+
enclosure.explicit = false
|
2109
|
+
if new_enclosure
|
2110
|
+
@enclosures << enclosure
|
2111
|
+
end
|
2112
|
+
end
|
2113
|
+
|
2114
|
+
# Creates an anonymous method to parse content objects from the media module. We
|
2115
|
+
# do this to avoid excessive duplication of code since we have to do identical
|
2116
|
+
# processing for content objects within group objects.
|
2117
|
+
parse_media_content = lambda do |media_content_nodes|
|
2118
|
+
affected_enclosures = []
|
2119
|
+
for enclosure_node in media_content_nodes
|
2120
|
+
enclosure_url = CGI.unescapeHTML(enclosure_node.attributes["url"].to_s)
|
2121
|
+
enclosure = nil
|
2122
|
+
new_enclosure = false
|
2123
|
+
for existing_enclosure in @enclosures
|
2124
|
+
if existing_enclosure.url == enclosure_url
|
2125
|
+
enclosure = existing_enclosure
|
2126
|
+
break
|
2127
|
+
end
|
659
2128
|
end
|
660
|
-
|
661
|
-
|
2129
|
+
if enclosure.nil?
|
2130
|
+
new_enclosure = true
|
2131
|
+
enclosure = Enclosure.new
|
662
2132
|
end
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
2133
|
+
enclosure.url = enclosure_url
|
2134
|
+
enclosure.type = enclosure_node.attributes["type"].to_s
|
2135
|
+
enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
|
2136
|
+
enclosure.duration = enclosure_node.attributes["duration"].to_s
|
2137
|
+
enclosure.height = enclosure_node.attributes["height"].to_i
|
2138
|
+
enclosure.width = enclosure_node.attributes["width"].to_i
|
2139
|
+
enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
|
2140
|
+
enclosure.framerate = enclosure_node.attributes["framerate"].to_i
|
2141
|
+
enclosure.expression = enclosure_node.attributes["expression"].to_s
|
2142
|
+
enclosure.is_default =
|
2143
|
+
(enclosure_node.attributes["isDefault"].to_s.downcase == "true")
|
2144
|
+
if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
|
2145
|
+
enclosure.thumbnail = EnclosureThumbnail.new(
|
2146
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
|
2147
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
|
2148
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
|
2149
|
+
)
|
2150
|
+
if enclosure.thumbnail.height == ""
|
2151
|
+
enclosure.thumbnail.height = nil
|
2152
|
+
end
|
2153
|
+
if enclosure.thumbnail.width == ""
|
2154
|
+
enclosure.thumbnail.width = nil
|
2155
|
+
end
|
667
2156
|
end
|
668
|
-
|
669
|
-
|
2157
|
+
enclosure.categories = []
|
2158
|
+
for category in XPath.match(enclosure_node, "media:category")
|
2159
|
+
enclosure.categories << EnclosureCategory.new(
|
2160
|
+
CGI.unescapeHTML(category.text),
|
2161
|
+
CGI.unescapeHTML(category.attributes["scheme"].to_s),
|
2162
|
+
CGI.unescapeHTML(category.attributes["label"].to_s)
|
2163
|
+
)
|
2164
|
+
if enclosure.categories.last.scheme == ""
|
2165
|
+
enclosure.categories.last.scheme = nil
|
2166
|
+
end
|
2167
|
+
if enclosure.categories.last.label == ""
|
2168
|
+
enclosure.categories.last.label = nil
|
2169
|
+
end
|
670
2170
|
end
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
2171
|
+
if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
|
2172
|
+
enclosure.hash = EnclosureHash.new(
|
2173
|
+
FeedTools.sanitize_html(CGI.unescapeHTML(XPath.first(
|
2174
|
+
enclosure_node, "media:hash/text()").to_s), :strip),
|
2175
|
+
"md5"
|
2176
|
+
)
|
2177
|
+
end
|
2178
|
+
if XPath.first(enclosure_node, "media:player/@url").to_s != ""
|
2179
|
+
enclosure.player = EnclosurePlayer.new(
|
2180
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@url").to_s),
|
2181
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@height").to_s),
|
2182
|
+
CGI.unescapeHTML(XPath.first(enclosure_node, "media:player/@width").to_s)
|
2183
|
+
)
|
2184
|
+
if enclosure.player.height == ""
|
2185
|
+
enclosure.player.height = nil
|
2186
|
+
end
|
2187
|
+
if enclosure.player.width == ""
|
2188
|
+
enclosure.player.width = nil
|
2189
|
+
end
|
2190
|
+
end
|
2191
|
+
enclosure.credits = []
|
2192
|
+
for credit in XPath.match(enclosure_node, "media:credit")
|
2193
|
+
enclosure.credits << EnclosureCredit.new(
|
2194
|
+
CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
|
2195
|
+
CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
|
2196
|
+
)
|
2197
|
+
if enclosure.credits.last.role == ""
|
2198
|
+
enclosure.credits.last.role = nil
|
684
2199
|
end
|
685
2200
|
end
|
686
|
-
|
2201
|
+
enclosure.explicit = (XPath.first(enclosure_node,
|
2202
|
+
"media:adult/text()").to_s.downcase == "true")
|
2203
|
+
if XPath.first(enclosure_node, "media:text/text()").to_s != ""
|
2204
|
+
enclosure.text = CGI.unescapeHTML(XPath.first(enclosure_node,
|
2205
|
+
"media:text/text()").to_s)
|
2206
|
+
end
|
2207
|
+
affected_enclosures << enclosure
|
2208
|
+
if new_enclosure
|
2209
|
+
@enclosures << enclosure
|
2210
|
+
end
|
687
2211
|
end
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
2212
|
+
affected_enclosures
|
2213
|
+
end
|
2214
|
+
|
2215
|
+
# Parse the independant content objects.
|
2216
|
+
parse_media_content.call(media_content_enclosures)
|
2217
|
+
|
2218
|
+
media_groups = []
|
2219
|
+
|
2220
|
+
# Parse the group objects.
|
2221
|
+
for media_group in media_group_enclosures
|
2222
|
+
group_media_content_enclosures =
|
2223
|
+
XPath.match(media_group, "media:content")
|
2224
|
+
|
2225
|
+
# Parse the content objects within the group objects.
|
2226
|
+
affected_enclosures =
|
2227
|
+
parse_media_content.call(group_media_content_enclosures)
|
2228
|
+
|
2229
|
+
# Now make sure that content objects inherit certain properties from
|
2230
|
+
# the group objects.
|
2231
|
+
for enclosure in affected_enclosures
|
2232
|
+
if enclosure.thumbnail.nil? &&
|
2233
|
+
XPath.first(media_group, "media:thumbnail/@url").to_s != ""
|
2234
|
+
enclosure.thumbnail = EnclosureThumbnail.new(
|
2235
|
+
CGI.unescapeHTML(
|
2236
|
+
XPath.first(media_group, "media:thumbnail/@url").to_s),
|
2237
|
+
CGI.unescapeHTML(
|
2238
|
+
XPath.first(media_group, "media:thumbnail/@height").to_s),
|
2239
|
+
CGI.unescapeHTML(
|
2240
|
+
XPath.first(media_group, "media:thumbnail/@width").to_s)
|
2241
|
+
)
|
2242
|
+
if enclosure.thumbnail.height == ""
|
2243
|
+
enclosure.thumbnail.height = nil
|
694
2244
|
end
|
695
|
-
|
696
|
-
|
2245
|
+
if enclosure.thumbnail.width == ""
|
2246
|
+
enclosure.thumbnail.width = nil
|
697
2247
|
end
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
2248
|
+
end
|
2249
|
+
if (enclosure.categories.nil? || enclosure.categories.size == 0)
|
2250
|
+
enclosure.categories = []
|
2251
|
+
for category in XPath.match(media_group, "media:category")
|
2252
|
+
enclosure.categories << EnclosureCategory.new(
|
2253
|
+
CGI.unescapeHTML(category.text),
|
2254
|
+
CGI.unescapeHTML(category.attributes["scheme"].to_s),
|
2255
|
+
CGI.unescapeHTML(category.attributes["label"].to_s)
|
2256
|
+
)
|
2257
|
+
if enclosure.categories.last.scheme == ""
|
2258
|
+
enclosure.categories.last.scheme = nil
|
2259
|
+
end
|
2260
|
+
if enclosure.categories.last.label == ""
|
2261
|
+
enclosure.categories.last.label = nil
|
2262
|
+
end
|
702
2263
|
end
|
703
2264
|
end
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
2265
|
+
if enclosure.hash.nil? &&
|
2266
|
+
XPath.first(media_group, "media:hash/text()").to_s != ""
|
2267
|
+
enclosure.hash = EnclosureHash.new(
|
2268
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:hash/text()").to_s),
|
2269
|
+
"md5"
|
2270
|
+
)
|
2271
|
+
end
|
2272
|
+
if enclosure.player.nil? &&
|
2273
|
+
XPath.first(media_group, "media:player/@url").to_s != ""
|
2274
|
+
enclosure.player = EnclosurePlayer.new(
|
2275
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:player/@url").to_s),
|
2276
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:player/@height").to_s),
|
2277
|
+
CGI.unescapeHTML(XPath.first(media_group, "media:player/@width").to_s)
|
2278
|
+
)
|
2279
|
+
if enclosure.player.height == ""
|
2280
|
+
enclosure.player.height = nil
|
2281
|
+
end
|
2282
|
+
if enclosure.player.width == ""
|
2283
|
+
enclosure.player.width = nil
|
2284
|
+
end
|
2285
|
+
end
|
2286
|
+
if enclosure.credits.nil? || enclosure.credits.size == 0
|
2287
|
+
enclosure.credits = []
|
2288
|
+
for credit in XPath.match(media_group, "media:credit")
|
2289
|
+
enclosure.credits << EnclosureCredit.new(
|
2290
|
+
CGI.unescapeHTML(CGI.unescapeHTML(credit.text)),
|
2291
|
+
CGI.unescapeHTML(credit.attributes["role"].to_s.downcase)
|
2292
|
+
)
|
2293
|
+
if enclosure.credits.last.role == ""
|
2294
|
+
enclosure.credits.last.role = nil
|
2295
|
+
end
|
2296
|
+
end
|
2297
|
+
end
|
2298
|
+
if enclosure.explicit?.nil?
|
2299
|
+
enclosure.explicit = (XPath.first(media_group,
|
2300
|
+
"media:adult/text()").to_s.downcase == "true") ? true : false
|
2301
|
+
end
|
2302
|
+
if enclosure.text.nil? &&
|
2303
|
+
XPath.first(media_group, "media:text/text()").to_s != ""
|
2304
|
+
enclosure.text = FeedTools.sanitize_html(CGI.unescapeHTML(
|
2305
|
+
XPath.first(media_group, "media:text/text()").to_s), :strip)
|
708
2306
|
end
|
709
2307
|
end
|
2308
|
+
|
2309
|
+
# Keep track of the media groups
|
2310
|
+
media_groups << affected_enclosures
|
710
2311
|
end
|
711
|
-
|
712
|
-
#
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
end
|
717
|
-
unless link.nil? || link == ""
|
718
|
-
xml_builder.link(link)
|
2312
|
+
|
2313
|
+
# Now we need to inherit any relevant item level information.
|
2314
|
+
if self.explicit?
|
2315
|
+
for enclosure in @enclosures
|
2316
|
+
enclosure.explicit = true
|
719
2317
|
end
|
720
|
-
|
721
|
-
|
2318
|
+
end
|
2319
|
+
|
2320
|
+
# Add all the itunes categories
|
2321
|
+
for itunes_category in XPath.match(root_node, "itunes:category")
|
2322
|
+
genre = "Podcasts"
|
2323
|
+
category = itunes_category.attributes["text"].to_s
|
2324
|
+
subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
|
2325
|
+
category_path = genre
|
2326
|
+
if category != ""
|
2327
|
+
category_path << "/" + category
|
722
2328
|
end
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
2329
|
+
if subcategory != ""
|
2330
|
+
category_path << "/" + subcategory
|
2331
|
+
end
|
2332
|
+
for enclosure in @enclosures
|
2333
|
+
if enclosure.categories.nil?
|
2334
|
+
enclosure.categories = []
|
729
2335
|
end
|
2336
|
+
enclosure.categories << EnclosureCategory.new(
|
2337
|
+
CGI.unescapeHTML(category_path),
|
2338
|
+
CGI.unescapeHTML("http://www.apple.com/itunes/store/"),
|
2339
|
+
CGI.unescapeHTML("iTunes Music Store Categories")
|
2340
|
+
)
|
730
2341
|
end
|
731
2342
|
end
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
xml_builder.title(title,
|
739
|
-
"mode" => "escaped",
|
740
|
-
"type" => "text/html")
|
2343
|
+
|
2344
|
+
for enclosure in @enclosures
|
2345
|
+
# Clean up any of those attributes that incorrectly have ""
|
2346
|
+
# or 0 as their values
|
2347
|
+
if enclosure.type == ""
|
2348
|
+
enclosure.type = nil
|
741
2349
|
end
|
742
|
-
|
743
|
-
|
744
|
-
"rel" => "alternate",
|
745
|
-
"type" => "text/html",
|
746
|
-
"title" => title)
|
2350
|
+
if enclosure.file_size == 0
|
2351
|
+
enclosure.file_size = nil
|
747
2352
|
end
|
748
|
-
|
749
|
-
|
750
|
-
"mode" => "escaped",
|
751
|
-
"type" => "text/html")
|
2353
|
+
if enclosure.duration == 0
|
2354
|
+
enclosure.duration = nil
|
752
2355
|
end
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
2356
|
+
if enclosure.height == 0
|
2357
|
+
enclosure.height = nil
|
2358
|
+
end
|
2359
|
+
if enclosure.width == 0
|
2360
|
+
enclosure.width = nil
|
2361
|
+
end
|
2362
|
+
if enclosure.bitrate == 0
|
2363
|
+
enclosure.bitrate = nil
|
2364
|
+
end
|
2365
|
+
if enclosure.framerate == 0
|
2366
|
+
enclosure.framerate = nil
|
2367
|
+
end
|
2368
|
+
if enclosure.expression == "" || enclosure.expression.nil?
|
2369
|
+
enclosure.expression = "full"
|
2370
|
+
end
|
2371
|
+
|
2372
|
+
# If an enclosure is missing the text field, fall back on the itunes:summary field
|
2373
|
+
if enclosure.text.nil? || enclosure.text = ""
|
2374
|
+
enclosure.text = self.itunes_summary
|
2375
|
+
end
|
2376
|
+
|
2377
|
+
# Make sure we don't have duplicate categories
|
2378
|
+
unless enclosure.categories.nil?
|
2379
|
+
enclosure.categories.uniq!
|
2380
|
+
end
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# And finally, now things get complicated. This is where we make
|
2384
|
+
# sure that the enclosures method only returns either default
|
2385
|
+
# enclosures or enclosures with only one version. Any enclosures
|
2386
|
+
# that are wrapped in a media:group will be placed in the appropriate
|
2387
|
+
# versions field.
|
2388
|
+
affected_enclosure_urls = []
|
2389
|
+
for media_group in media_groups
|
2390
|
+
affected_enclosure_urls =
|
2391
|
+
affected_enclosure_urls | (media_group.map do |enclosure|
|
2392
|
+
enclosure.url
|
2393
|
+
end)
|
2394
|
+
end
|
2395
|
+
@enclosures.delete_if do |enclosure|
|
2396
|
+
(affected_enclosure_urls.include? enclosure.url)
|
2397
|
+
end
|
2398
|
+
for media_group in media_groups
|
2399
|
+
default_enclosure = nil
|
2400
|
+
for enclosure in media_group
|
2401
|
+
if enclosure.is_default?
|
2402
|
+
default_enclosure = enclosure
|
759
2403
|
end
|
760
2404
|
end
|
2405
|
+
for enclosure in media_group
|
2406
|
+
enclosure.default_version = default_enclosure
|
2407
|
+
enclosure.versions = media_group.clone
|
2408
|
+
enclosure.versions.delete(enclosure)
|
2409
|
+
end
|
2410
|
+
@enclosures << default_enclosure
|
761
2411
|
end
|
762
2412
|
end
|
763
|
-
end
|
764
|
-
|
765
|
-
# Saves the current state of the feed to the database unless the feed lacks a remote location
|
766
|
-
def save
|
767
|
-
unless url.nil? || url == ""
|
768
|
-
super
|
769
|
-
end
|
770
|
-
end
|
771
|
-
end
|
772
2413
|
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
connection.execute "select id, feed_id, link, title, author, description, " +
|
780
|
-
"time, tags from feed_items limit 1"
|
781
|
-
rescue ActiveRecord::StatementInvalid
|
782
|
-
return false
|
2414
|
+
# If we have a single enclosure, it's safe to inherit the itunes:duration field
|
2415
|
+
# if it's missing.
|
2416
|
+
if @enclosures.size == 1
|
2417
|
+
if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
|
2418
|
+
@enclosures.first.duration = self.duration
|
2419
|
+
end
|
783
2420
|
end
|
784
|
-
|
2421
|
+
|
2422
|
+
return @enclosures
|
785
2423
|
end
|
786
2424
|
|
787
|
-
def
|
788
|
-
|
789
|
-
return @feed
|
790
|
-
elsif @feed_id != nil
|
791
|
-
@feed = Feed.find_by_id(self.feed_id)
|
792
|
-
return @feed
|
793
|
-
else
|
794
|
-
return nil
|
795
|
-
end
|
2425
|
+
def enclosures=(new_enclosures)
|
2426
|
+
@enclosures = new_enclosures
|
796
2427
|
end
|
797
2428
|
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
2429
|
+
# Returns the feed item author
|
2430
|
+
def author_name
|
2431
|
+
# TODO: make this not suck, actually ensure we're looking at a name
|
2432
|
+
# and not an email address.
|
2433
|
+
# Also, factor in itunes module.
|
2434
|
+
# =================================================================
|
2435
|
+
if @author_name.nil?
|
2436
|
+
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/name/text()").to_s)
|
2437
|
+
if @author_name == ""
|
2438
|
+
@author_name = CGI.unescapeHTML(XPath.first(root_node, "dc:creator/text()").to_s)
|
2439
|
+
end
|
2440
|
+
if @author_name == ""
|
2441
|
+
@author_name = CGI.unescapeHTML(XPath.first(root_node, "author/text()").to_s)
|
2442
|
+
end
|
2443
|
+
end
|
2444
|
+
return @author_name
|
805
2445
|
end
|
806
|
-
|
807
|
-
|
808
|
-
|
2446
|
+
|
2447
|
+
# Sets the feed item author
|
2448
|
+
def author_name=(new_author_name)
|
2449
|
+
@author_name = new_author_name
|
809
2450
|
end
|
810
|
-
|
811
|
-
|
812
|
-
|
2451
|
+
|
2452
|
+
# Returns the contents of the itunes:summary element
|
2453
|
+
def itunes_summary
|
2454
|
+
if @itunes_summary.nil?
|
2455
|
+
@itunes_summary = CGI.unescapeHTML(XPath.first(root_node,
|
2456
|
+
"itunes:summary/text()").to_s)
|
2457
|
+
if @itunes_summary == ""
|
2458
|
+
@itunes_summary = nil
|
2459
|
+
end
|
2460
|
+
unless @itunes_summary.nil?
|
2461
|
+
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
|
2462
|
+
end
|
2463
|
+
end
|
2464
|
+
return @itunes_summary
|
813
2465
|
end
|
814
2466
|
|
815
|
-
|
816
|
-
|
2467
|
+
# Sets the contents of the itunes:summary element
|
2468
|
+
def itunes_summary=(new_itunes_summary)
|
2469
|
+
@itunes_summary = new_itunes_summary
|
817
2470
|
end
|
818
2471
|
|
819
|
-
|
820
|
-
|
2472
|
+
# Returns the contents of the itunes:subtitle element
|
2473
|
+
def itunes_subtitle
|
2474
|
+
if @itunes_subtitle.nil?
|
2475
|
+
@itunes_subtitle = CGI.unescapeHTML(XPath.first(root_node,
|
2476
|
+
"itunes:subtitle/text()").to_s)
|
2477
|
+
if @itunes_subtitle == ""
|
2478
|
+
@itunes_subtitle = nil
|
2479
|
+
end
|
2480
|
+
unless @itunes_subtitle.nil?
|
2481
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
2482
|
+
end
|
2483
|
+
end
|
2484
|
+
return @itunes_subtitle
|
821
2485
|
end
|
822
2486
|
|
823
|
-
|
824
|
-
|
2487
|
+
# Sets the contents of the itunes:subtitle element
|
2488
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
2489
|
+
@itunes_subtitle = new_itunes_subtitle
|
825
2490
|
end
|
826
2491
|
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
end
|
838
|
-
if link == ""
|
839
|
-
link = XPath.first(item_node, "link/text()").to_s
|
840
|
-
end
|
841
|
-
if link == ""
|
842
|
-
link = XPath.first(item_node, "@rdf:about").to_s
|
843
|
-
end
|
844
|
-
if link == ""
|
845
|
-
link = XPath.first(item_node, "guid/text()").to_s
|
846
|
-
end
|
847
|
-
if link != ""
|
848
|
-
link = CGI.unescapeHTML(link)
|
849
|
-
end
|
850
|
-
if link != "" && (link =~ /http:\/\//) != 0 && (link =~ /https:\/\//) != 0
|
851
|
-
# ensure that we don't end up with 'http://www.foobar.com//path/to/entry'
|
852
|
-
# future-proofed this so that it doesn't break when Ruby 1.9/2.0 starts
|
853
|
-
# returning single character Strings instead of FixNums
|
854
|
-
if (base[-1] == 47 && link[0] == 47) || (base[-1] == "/" && link[0] == "/")
|
855
|
-
link = link[1..-1]
|
2492
|
+
# Returns the contents of the media:text element
|
2493
|
+
def media_text
|
2494
|
+
if @media_text.nil?
|
2495
|
+
@media_text = CGI.unescapeHTML(XPath.first(root_node,
|
2496
|
+
"itunes:subtitle/text()").to_s)
|
2497
|
+
if @media_text == ""
|
2498
|
+
@media_text = nil
|
2499
|
+
end
|
2500
|
+
unless @media_text.nil?
|
2501
|
+
@media_text = FeedTools.sanitize_html(@media_text)
|
856
2502
|
end
|
857
|
-
# prepend the base to the link since they seem to have used a relative path
|
858
|
-
link = base + link
|
859
|
-
end
|
860
|
-
|
861
|
-
title = XPath.first(item_node, "title/text()").to_s
|
862
|
-
if title != ""
|
863
|
-
# some blogging tools (notably TextPattern I believe) include the number of
|
864
|
-
# comments in a post in the title... this is ugly, so we're gonna strip them out
|
865
|
-
title = title.gsub(/\[\d*\]/,"").strip
|
866
|
-
end
|
867
|
-
|
868
|
-
# get the item author
|
869
|
-
author = CGI.unescapeHTML(XPath.first(item_node, "author/name/text()").to_s)
|
870
|
-
if author == ""
|
871
|
-
author = CGI.unescapeHTML(XPath.first(item_node, "dc:creator/text()").to_s)
|
872
|
-
end
|
873
|
-
if author == ""
|
874
|
-
author = CGI.unescapeHTML(XPath.first(item_node, "author/text()").to_s)
|
875
2503
|
end
|
2504
|
+
return @media_text
|
2505
|
+
end
|
876
2506
|
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
2507
|
+
# Sets the contents of the media:text element
|
2508
|
+
def media_text=(new_media_text)
|
2509
|
+
@media_text = new_media_text
|
2510
|
+
end
|
2511
|
+
|
2512
|
+
# Returns the contents of the itunes:author element
|
2513
|
+
#
|
2514
|
+
# This inherits from any incorrectly placed channel-level itunes:author
|
2515
|
+
# elements. They're actually amazingly commong. People don't read specs.
|
2516
|
+
def itunes_author
|
2517
|
+
if @itunes_author.nil?
|
2518
|
+
@itunes_author = CGI.unescapeHTML(XPath.first(root_node,
|
2519
|
+
"itunes:author/text()").to_s)
|
2520
|
+
if @itunes_author == ""
|
2521
|
+
@itunes_author = CGI.unescapeHTML(XPath.first(feed.channel_node,
|
2522
|
+
"itunes:author/text()").to_s)
|
2523
|
+
end
|
2524
|
+
if @itunes_author == ""
|
2525
|
+
@itunes_author = nil
|
2526
|
+
end
|
888
2527
|
end
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
2528
|
+
return @itunes_author
|
2529
|
+
end
|
2530
|
+
|
2531
|
+
# Sets the contents of the itunes:author element
|
2532
|
+
def itunes_author=(new_itunes_author)
|
2533
|
+
@itunes_author = new_itunes_author
|
2534
|
+
end
|
2535
|
+
|
2536
|
+
# Returns the number of seconds that the associated media runs for
|
2537
|
+
def duration
|
2538
|
+
if @duration.nil?
|
2539
|
+
itunes_duration = CGI.unescapeHTML(XPath.first(root_node,
|
2540
|
+
"itunes:duration/text()").to_s)
|
2541
|
+
if itunes_duration != ""
|
2542
|
+
hms = itunes_duration.split(":").map { |x| x.to_i }
|
2543
|
+
if hms.size == 3
|
2544
|
+
@duration = hms[0].hour + hms[1].minute + hms[2]
|
2545
|
+
elsif hms.size == 2
|
2546
|
+
@duration = hms[0].minute + hms[1]
|
2547
|
+
elsif hms.size == 1
|
2548
|
+
@duration = hms[0]
|
896
2549
|
end
|
897
2550
|
end
|
898
2551
|
end
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
2552
|
+
return @duration
|
2553
|
+
end
|
2554
|
+
|
2555
|
+
# Sets the number of seconds that the associate media runs for
|
2556
|
+
def duration=(new_duration)
|
2557
|
+
@duration = new_duration
|
2558
|
+
end
|
2559
|
+
|
2560
|
+
# Sets the itunes:summary
|
2561
|
+
def itunes_summary=(new_itunes_summary)
|
2562
|
+
end
|
2563
|
+
|
2564
|
+
# Returns the feed item time
|
2565
|
+
def time
|
2566
|
+
if @time.nil?
|
2567
|
+
time_string = XPath.first(root_node, "pubDate/text()").to_s
|
2568
|
+
if time_string == ""
|
2569
|
+
time_string = XPath.first(root_node, "dc:date/text()").to_s
|
2570
|
+
end
|
2571
|
+
if time_string == ""
|
2572
|
+
time_string = XPath.first(root_node, "issued/text()").to_s
|
2573
|
+
end
|
2574
|
+
if time_string != ""
|
2575
|
+
@time = Time.parse(time_string) rescue Time.now
|
2576
|
+
elsif time_string == nil
|
2577
|
+
@time = Time.now
|
903
2578
|
end
|
904
2579
|
end
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
#
|
916
|
-
|
917
|
-
if
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
2580
|
+
return @time
|
2581
|
+
end
|
2582
|
+
|
2583
|
+
# Sets the feed item time
|
2584
|
+
def time=(new_time)
|
2585
|
+
@time = new_time
|
2586
|
+
end
|
2587
|
+
|
2588
|
+
# Returns the feed item tags
|
2589
|
+
def tags
|
2590
|
+
# TODO: support the rel="tag" microformat
|
2591
|
+
# =======================================
|
2592
|
+
if @tags.nil?
|
2593
|
+
@tags = []
|
2594
|
+
if @tags.nil? || @tags.size == 0
|
2595
|
+
@tags = []
|
2596
|
+
tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
|
2597
|
+
if tag_list.size > 1
|
2598
|
+
for tag in tag_list
|
2599
|
+
@tags << tag.to_s.downcase.strip
|
2600
|
+
end
|
923
2601
|
end
|
924
2602
|
end
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
2603
|
+
if @tags.nil? || @tags.size == 0
|
2604
|
+
# messy effort to find ourselves some tags, mainly for del.icio.us
|
2605
|
+
@tags = []
|
2606
|
+
rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
|
2607
|
+
if rdf_bag != nil && rdf_bag.size > 0
|
2608
|
+
for tag_node in rdf_bag
|
2609
|
+
begin
|
2610
|
+
tag_url = XPath.first(root_node, "@resource").to_s
|
2611
|
+
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
|
2612
|
+
if tag_match.size > 0
|
2613
|
+
@tags << tag_match.first.last.downcase.strip
|
2614
|
+
end
|
2615
|
+
rescue
|
2616
|
+
end
|
2617
|
+
end
|
2618
|
+
end
|
2619
|
+
end
|
2620
|
+
if @tags.nil? || @tags.size == 0
|
2621
|
+
@tags = []
|
2622
|
+
tag_list = XPath.match(root_node, "category/text()")
|
930
2623
|
for tag in tag_list
|
931
|
-
|
2624
|
+
@tags << tag.to_s.downcase.strip
|
932
2625
|
end
|
933
2626
|
end
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
tag_list = XPath.match(item_node, "dc:subject/text()")
|
938
|
-
if tag_list.size > 1
|
2627
|
+
if @tags.nil? || @tags.size == 0
|
2628
|
+
@tags = []
|
2629
|
+
tag_list = XPath.match(root_node, "dc:subject/text()")
|
939
2630
|
for tag in tag_list
|
940
|
-
|
2631
|
+
@tags << tag.to_s.downcase.strip
|
941
2632
|
end
|
942
2633
|
end
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
if tags_array == nil || tags_array.size == 0
|
949
|
-
begin
|
950
|
-
tags_array = XPath.first(item_node,
|
951
|
-
"dc:subject/text()").to_s.downcase.split(" ")
|
952
|
-
rescue
|
953
|
-
tags_array = []
|
954
|
-
end
|
955
|
-
end
|
956
|
-
if tags_array == nil || tags_array.size == 0
|
957
|
-
tags_array = []
|
958
|
-
rdf_bag = XPath.match(item_node,
|
959
|
-
"taxo:topics/rdf:Bag/rdf:li")
|
960
|
-
if rdf_bag != nil && rdf_bag.size > 0
|
961
|
-
for tag_node in rdf_bag
|
962
|
-
begin
|
963
|
-
tag_url = XPath.first(tag_node, "@resource").to_s
|
964
|
-
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
|
965
|
-
if tag_match.size > 0
|
966
|
-
tags_array << tag_match.first.last.downcase.strip
|
967
|
-
end
|
968
|
-
rescue
|
969
|
-
end
|
2634
|
+
if @tags.nil? || @tags.size == 0
|
2635
|
+
begin
|
2636
|
+
@tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
|
2637
|
+
rescue
|
2638
|
+
@tags = []
|
970
2639
|
end
|
971
2640
|
end
|
2641
|
+
if @tags.nil?
|
2642
|
+
@tags = []
|
2643
|
+
end
|
2644
|
+
@tags.uniq!
|
972
2645
|
end
|
2646
|
+
return @tags
|
2647
|
+
end
|
973
2648
|
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
if
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
if Feed.cache_enabled?
|
996
|
-
save
|
2649
|
+
# Sets the feed item tags
|
2650
|
+
def tags=(new_tags)
|
2651
|
+
@tags = new_tags
|
2652
|
+
end
|
2653
|
+
|
2654
|
+
# Returns true if this feed item contains explicit material. If the whole
|
2655
|
+
# feed has been marked as explicit, this will return true even if the item
|
2656
|
+
# isn't explicitly marked as explicit.
|
2657
|
+
def explicit?
|
2658
|
+
if @explicit.nil?
|
2659
|
+
if XPath.first(root_node,
|
2660
|
+
"media:adult/text()").to_s.downcase == "true" ||
|
2661
|
+
XPath.first(root_node,
|
2662
|
+
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
2663
|
+
XPath.first(root_node,
|
2664
|
+
"itunes:explicit/text()").to_s.downcase == "true" ||
|
2665
|
+
feed.explicit
|
2666
|
+
@explicit = true
|
2667
|
+
else
|
2668
|
+
@explicit = false
|
2669
|
+
end
|
997
2670
|
end
|
998
|
-
return
|
2671
|
+
return @explicit
|
999
2672
|
end
|
1000
2673
|
|
1001
|
-
|
2674
|
+
# Sets whether or not the feed contains explicit material
|
2675
|
+
def explicit=(new_explicit)
|
2676
|
+
@explicit = (new_explicit ? true : false)
|
1002
2677
|
end
|
1003
2678
|
|
1004
|
-
|
2679
|
+
# A hook method that is called during the feed generation process. Overriding this method
|
2680
|
+
# will enable additional content to be inserted into the feed.
|
2681
|
+
def build_xml_hook(feed_type, version, xml_builder)
|
2682
|
+
return nil
|
2683
|
+
end
|
2684
|
+
|
2685
|
+
# Generates xml based on the content of the feed item
|
2686
|
+
def build_xml(feed_type="rss", version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1005
2687
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1006
2688
|
# RDF-based rss format
|
1007
2689
|
if link.nil?
|
@@ -1026,15 +2708,17 @@ module FeedTools
|
|
1026
2708
|
unless time.nil?
|
1027
2709
|
xml_builder.tag!("dc:date", time.iso8601)
|
1028
2710
|
end
|
1029
|
-
unless tags.nil?
|
2711
|
+
unless tags.nil? || tags.size == 0
|
1030
2712
|
xml_builder.tag!("dc:subject") do
|
1031
2713
|
xml_builder.tag!("rdf:Bag") do
|
1032
|
-
for tag in
|
2714
|
+
for tag in tags
|
1033
2715
|
xml_builder.tag!("rdf:li", tag)
|
1034
2716
|
end
|
1035
2717
|
end
|
1036
2718
|
end
|
2719
|
+
xml_builder.tag!("itunes:keywords", tags.join(" "))
|
1037
2720
|
end
|
2721
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1038
2722
|
end
|
1039
2723
|
elsif feed_type == "rss"
|
1040
2724
|
# normal rss format
|
@@ -1051,15 +2735,17 @@ module FeedTools
|
|
1051
2735
|
unless time.nil?
|
1052
2736
|
xml_builder.pubDate(time.rfc822)
|
1053
2737
|
end
|
1054
|
-
unless tags.nil?
|
2738
|
+
unless tags.nil? || tags.size == 0
|
1055
2739
|
xml_builder.tag!("dc:subject") do
|
1056
2740
|
xml_builder.tag!("rdf:Bag") do
|
1057
|
-
for tag in
|
2741
|
+
for tag in tags
|
1058
2742
|
xml_builder.tag!("rdf:li", tag)
|
1059
2743
|
end
|
1060
2744
|
end
|
1061
2745
|
end
|
2746
|
+
xml_builder.tag!("itunes:keywords", tags.join(" "))
|
1062
2747
|
end
|
2748
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1063
2749
|
end
|
1064
2750
|
elsif feed_type == "atom"
|
1065
2751
|
# normal atom format
|
@@ -1083,31 +2769,34 @@ module FeedTools
|
|
1083
2769
|
unless time.nil?
|
1084
2770
|
xml_builder.issued(time.iso8601)
|
1085
2771
|
end
|
1086
|
-
unless tags.nil?
|
1087
|
-
for tag in
|
2772
|
+
unless tags.nil? || tags.size == 0
|
2773
|
+
for tag in tags
|
1088
2774
|
xml_builder.category(tag)
|
1089
2775
|
end
|
1090
2776
|
end
|
2777
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1091
2778
|
end
|
1092
2779
|
end
|
1093
2780
|
end
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
2781
|
+
|
2782
|
+
alias_method :tagline, :description
|
2783
|
+
alias_method :tagline=, :description=
|
2784
|
+
alias_method :subtitle, :description
|
2785
|
+
alias_method :subtitle=, :description=
|
2786
|
+
alias_method :abstract, :description
|
2787
|
+
alias_method :abstract=, :description=
|
2788
|
+
alias_method :content, :description
|
2789
|
+
alias_method :content=, :description=
|
2790
|
+
alias_method :guid, :id
|
2791
|
+
alias_method :guid=, :id=
|
1102
2792
|
end
|
1103
2793
|
end
|
1104
2794
|
|
1105
|
-
module REXML
|
1106
|
-
class Element
|
1107
|
-
|
1108
|
-
def inner_xml
|
2795
|
+
module REXML #:nodoc:
|
2796
|
+
class Element #:nodoc:
|
2797
|
+
def inner_xml #:nodoc:
|
1109
2798
|
result = ""
|
1110
|
-
each_child do |child|
|
2799
|
+
self.each_child do |child|
|
1111
2800
|
result << child.to_s
|
1112
2801
|
end
|
1113
2802
|
return result
|
@@ -1116,11 +2805,8 @@ module REXML
|
|
1116
2805
|
end
|
1117
2806
|
|
1118
2807
|
begin
|
1119
|
-
FeedTools
|
1120
|
-
|
1121
|
-
FeedTools::Feed.create_cache
|
2808
|
+
unless FeedTools.feed_cache.nil?
|
2809
|
+
FeedTools.feed_cache.initialize_cache
|
1122
2810
|
end
|
1123
2811
|
rescue
|
1124
|
-
# Nothing can be done until someone sets up the database connection.
|
1125
|
-
# We'll just assume for now that the user will take care of that.
|
1126
2812
|
end
|