feedtools 0.2.22 → 0.2.23
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +28 -0
- data/README +23 -2
- data/db/migration.rb +19 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +71 -388
- data/lib/feed_tools/database_feed_cache.rb +4 -3
- data/lib/feed_tools/feed.rb +809 -607
- data/lib/feed_tools/feed_item.rb +551 -574
- data/lib/feed_tools/feed_structures.rb +252 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +6 -5
- data/lib/feed_tools/helpers/generic_helper.rb +16 -158
- data/lib/feed_tools/helpers/html_helper.rb +629 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +5 -0
- data/lib/feed_tools/helpers/uri_helper.rb +223 -0
- data/lib/feed_tools/helpers/xml_helper.rb +239 -0
- data/rakefile +10 -237
- data/test/unit/amp_test.rb +102 -94
- data/test/unit/atom_test.rb +239 -6
- data/test/unit/cache_test.rb +1 -1
- data/test/unit/encoding_test.rb +5 -5
- data/test/unit/generation_test.rb +34 -1
- data/test/unit/helper_test.rb +111 -17
- data/test/unit/rss_test.rb +21 -2
- metadata +7 -3
- data/lib/feed_tools/helpers/module_helper.rb +0 -27
@@ -0,0 +1,252 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2005 Robert Aman
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module FeedTools
|
25
|
+
# Represents a feed/feed item's category
|
26
|
+
class Category
|
27
|
+
|
28
|
+
# The category term value
|
29
|
+
attr_accessor :term
|
30
|
+
# The categorization scheme
|
31
|
+
attr_accessor :scheme
|
32
|
+
# A human-readable description of the category
|
33
|
+
attr_accessor :label
|
34
|
+
|
35
|
+
alias_method :value, :term
|
36
|
+
alias_method :category, :term
|
37
|
+
alias_method :domain, :scheme
|
38
|
+
end
|
39
|
+
|
40
|
+
# Represents a feed/feed item's author
|
41
|
+
class Author
|
42
|
+
|
43
|
+
# The author's real name
|
44
|
+
attr_accessor :name
|
45
|
+
# The author's email address
|
46
|
+
attr_accessor :email
|
47
|
+
# The url of the author's homepage
|
48
|
+
attr_accessor :href
|
49
|
+
# The raw value of the author tag if present
|
50
|
+
attr_accessor :raw
|
51
|
+
|
52
|
+
alias_method :url, :href
|
53
|
+
alias_method :url=, :href=
|
54
|
+
alias_method :uri, :href
|
55
|
+
alias_method :uri=, :href=
|
56
|
+
end
|
57
|
+
|
58
|
+
# Represents a feed's image
|
59
|
+
class Image
|
60
|
+
|
61
|
+
# The image's title
|
62
|
+
attr_accessor :title
|
63
|
+
# The image's description
|
64
|
+
attr_accessor :description
|
65
|
+
# The url of the image that is being linked to
|
66
|
+
attr_accessor :href
|
67
|
+
# The url to link the image to
|
68
|
+
attr_accessor :link
|
69
|
+
# The width of the image
|
70
|
+
attr_accessor :width
|
71
|
+
# The height of the image
|
72
|
+
attr_accessor :height
|
73
|
+
# The style of the image
|
74
|
+
# Possible values are "icon", "image", or "image-wide"
|
75
|
+
attr_accessor :style
|
76
|
+
|
77
|
+
alias_method :url, :href
|
78
|
+
alias_method :url=, :href=
|
79
|
+
end
|
80
|
+
|
81
|
+
# Represents a feed's text input element.
|
82
|
+
# Be aware that this will be ignored for feed generation. It's a
|
83
|
+
# pointless element that aggregators usually ignore and it doesn't have an
|
84
|
+
# equivalent in all feeds types.
|
85
|
+
class TextInput
|
86
|
+
|
87
|
+
# The label of the Submit button in the text input area.
|
88
|
+
attr_accessor :title
|
89
|
+
# The description explains the text input area.
|
90
|
+
attr_accessor :description
|
91
|
+
# The URL of the CGI script that processes text input requests.
|
92
|
+
attr_accessor :link
|
93
|
+
# The name of the text object in the text input area.
|
94
|
+
attr_accessor :name
|
95
|
+
end
|
96
|
+
|
97
|
+
# Represents a feed's cloud.
|
98
|
+
# Be aware that this will be ignored for feed generation.
|
99
|
+
class Cloud
|
100
|
+
|
101
|
+
# The domain of the cloud.
|
102
|
+
attr_accessor :domain
|
103
|
+
# The path for the cloud.
|
104
|
+
attr_accessor :path
|
105
|
+
# The port the cloud is listening on.
|
106
|
+
attr_accessor :port
|
107
|
+
# The web services protocol the cloud uses.
|
108
|
+
# Possible values are either "xml-rpc" or "soap".
|
109
|
+
attr_accessor :protocol
|
110
|
+
# The procedure to use to request notification.
|
111
|
+
attr_accessor :register_procedure
|
112
|
+
end
|
113
|
+
|
114
|
+
# Represents a simple hyperlink
|
115
|
+
class Link
|
116
|
+
# The url that is being linked to
|
117
|
+
attr_accessor :href
|
118
|
+
# The language of the resource being linked to
|
119
|
+
attr_accessor :hreflang
|
120
|
+
# The relation type of the link
|
121
|
+
attr_accessor :rel
|
122
|
+
# The mime type of the link
|
123
|
+
attr_accessor :type
|
124
|
+
# The title of the hyperlink
|
125
|
+
attr_accessor :title
|
126
|
+
# The length of the resource being linked to in bytes
|
127
|
+
attr_accessor :length
|
128
|
+
|
129
|
+
alias_method :url, :href
|
130
|
+
alias_method :url=, :href=
|
131
|
+
end
|
132
|
+
|
133
|
+
# This class stores information about a feed item's file enclosures.
|
134
|
+
class Enclosure
|
135
|
+
# The url for the enclosure
|
136
|
+
attr_accessor :href
|
137
|
+
# The MIME type of the file referenced by the enclosure
|
138
|
+
attr_accessor :type
|
139
|
+
# The size of the file referenced by the enclosure
|
140
|
+
attr_accessor :file_size
|
141
|
+
# The total play time of the file referenced by the enclosure
|
142
|
+
attr_accessor :duration
|
143
|
+
# The height in pixels of the enclosed media
|
144
|
+
attr_accessor :height
|
145
|
+
# The width in pixels of the enclosed media
|
146
|
+
attr_accessor :width
|
147
|
+
# The bitrate of the enclosed media
|
148
|
+
attr_accessor :bitrate
|
149
|
+
# The framerate of the enclosed media
|
150
|
+
attr_accessor :framerate
|
151
|
+
# The thumbnail for this enclosure
|
152
|
+
attr_accessor :thumbnail
|
153
|
+
# The categories for this enclosure
|
154
|
+
attr_accessor :categories
|
155
|
+
# A hash of the enclosed file
|
156
|
+
attr_accessor :hash
|
157
|
+
# A website containing some kind of media player instead of a direct
|
158
|
+
# link to the media file.
|
159
|
+
attr_accessor :player
|
160
|
+
# A list of credits for the enclosed media
|
161
|
+
attr_accessor :credits
|
162
|
+
# A text rendition of the enclosed media
|
163
|
+
attr_accessor :text
|
164
|
+
# A list of alternate version of the enclosed media file
|
165
|
+
attr_accessor :versions
|
166
|
+
# The default version of the enclosed media file
|
167
|
+
attr_accessor :default_version
|
168
|
+
|
169
|
+
alias_method :url, :href
|
170
|
+
alias_method :url=, :href=
|
171
|
+
alias_method :link, :href
|
172
|
+
alias_method :link=, :href=
|
173
|
+
|
174
|
+
# Returns true if this is the default enclosure
|
175
|
+
def is_default?
|
176
|
+
return @is_default
|
177
|
+
end
|
178
|
+
|
179
|
+
# Sets whether this is the default enclosure for the media group
|
180
|
+
def is_default=(new_is_default)
|
181
|
+
@is_default = new_is_default
|
182
|
+
end
|
183
|
+
|
184
|
+
# Returns true if the enclosure contains explicit material
|
185
|
+
def explicit?
|
186
|
+
return @explicit
|
187
|
+
end
|
188
|
+
|
189
|
+
# Sets the explicit attribute on the enclosure
|
190
|
+
def explicit=(new_explicit)
|
191
|
+
@explicit = new_explicit
|
192
|
+
end
|
193
|
+
|
194
|
+
# Determines if the object is a sample, or the full version of the
|
195
|
+
# object, or if it is a stream.
|
196
|
+
# Possible values are 'sample', 'full', 'nonstop'.
|
197
|
+
def expression
|
198
|
+
return @expression
|
199
|
+
end
|
200
|
+
|
201
|
+
# Sets the expression attribute on the enclosure.
|
202
|
+
# Allowed values are 'sample', 'full', 'nonstop'.
|
203
|
+
def expression=(new_expression)
|
204
|
+
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
|
205
|
+
raise ArgumentError,
|
206
|
+
"Permitted values are 'sample', 'full', 'nonstop'."
|
207
|
+
end
|
208
|
+
@expression = new_expression.downcase
|
209
|
+
end
|
210
|
+
|
211
|
+
# Returns true if this enclosure contains audio content
|
212
|
+
def audio?
|
213
|
+
unless self.type.nil?
|
214
|
+
return true if (self.type =~ /^audio/) != nil
|
215
|
+
end
|
216
|
+
# TODO: create a more complete list
|
217
|
+
# =================================
|
218
|
+
audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
|
219
|
+
audio_extensions.each do |extension|
|
220
|
+
if (url =~ /#{extension}$/) != nil
|
221
|
+
return true
|
222
|
+
end
|
223
|
+
end
|
224
|
+
return false
|
225
|
+
end
|
226
|
+
|
227
|
+
# Returns true if this enclosure contains video content
|
228
|
+
def video?
|
229
|
+
unless self.type.nil?
|
230
|
+
return true if (self.type =~ /^video/) != nil
|
231
|
+
return true if self.type == "image/mov"
|
232
|
+
end
|
233
|
+
# TODO: create a more complete list
|
234
|
+
# =================================
|
235
|
+
video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
|
236
|
+
video_extensions.each do |extension|
|
237
|
+
if (url =~ /#{extension}$/) != nil
|
238
|
+
return true
|
239
|
+
end
|
240
|
+
end
|
241
|
+
return false
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# TODO: Make these actual classes instead of structs
|
246
|
+
# ==================================================
|
247
|
+
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
|
248
|
+
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
|
249
|
+
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
|
250
|
+
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
|
251
|
+
:width )
|
252
|
+
end
|
@@ -31,8 +31,6 @@ require 'feed_tools/helpers/generic_helper'
|
|
31
31
|
# the FeedTools library.
|
32
32
|
module FeedTools
|
33
33
|
module FeedToolsHelper
|
34
|
-
include FeedTools::GenericHelper
|
35
|
-
private :validate_options
|
36
34
|
|
37
35
|
@@default_local_path = File.expand_path('.')
|
38
36
|
|
@@ -45,12 +43,15 @@ module FeedTools
|
|
45
43
|
def self.default_local_path=(new_default_local_path)
|
46
44
|
@@default_local_path = new_default_local_path
|
47
45
|
end
|
48
|
-
|
49
|
-
protected
|
46
|
+
|
47
|
+
protected
|
50
48
|
# Loads a feed within a block for more consistent syntax and control
|
51
49
|
# over the FeedTools environment.
|
52
50
|
def with_feed(options={})
|
53
|
-
validate_options([ :from_file,
|
51
|
+
FeedTools::GenericHelper.validate_options([ :from_file,
|
52
|
+
:from_url,
|
53
|
+
:from_data,
|
54
|
+
:feed_cache ],
|
54
55
|
options.keys)
|
55
56
|
options = { :feed_cache =>
|
56
57
|
FeedTools.configurations[:feed_cache] }.merge(options)
|
@@ -22,179 +22,37 @@
|
|
22
22
|
#++
|
23
23
|
|
24
24
|
require 'feed_tools'
|
25
|
-
require 'rexml/document'
|
26
25
|
|
27
26
|
module FeedTools
|
28
27
|
# Generic methods needed in numerous places throughout FeedTools
|
29
28
|
module GenericHelper
|
30
29
|
# Raises an exception if an invalid option has been specified to prevent
|
31
30
|
# misspellings from slipping through
|
32
|
-
def validate_options(valid_option_keys, supplied_option_keys)
|
31
|
+
def self.validate_options(valid_option_keys, supplied_option_keys)
|
33
32
|
unknown_option_keys = supplied_option_keys - valid_option_keys
|
34
33
|
unless unknown_option_keys.empty?
|
35
34
|
raise "Unknown options: #{unknown_option_keys}"
|
36
35
|
end
|
37
36
|
end
|
38
37
|
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
unless result.nil? || blank_result
|
49
|
-
return result
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return nil
|
53
|
-
end
|
54
|
-
|
55
|
-
# Runs through a list of XPath queries on an element or document and
|
56
|
-
# returns the first non-blank result. Subsequent XPath queries will
|
57
|
-
# not be evaluated.
|
58
|
-
def try_xpaths(element, xpath_list,
|
59
|
-
options={}, &block)
|
60
|
-
validate_options([ :select_result_value ],
|
61
|
-
options.keys)
|
62
|
-
options = { :select_result_value => false }.merge(options)
|
63
|
-
|
64
|
-
result = nil
|
65
|
-
if element.nil?
|
38
|
+
# Nifty little method that takes a block and returns nil if recursion
|
39
|
+
# occurs or the block's result value if it doesn't.
|
40
|
+
def self.recursion_trap(lock_object, &block)
|
41
|
+
if @lock_ids.nil?
|
42
|
+
@lock_ids = []
|
43
|
+
end
|
44
|
+
if !@lock_ids.include?(lock_object.object_id)
|
45
|
+
@lock_ids << lock_object.object_id
|
46
|
+
else
|
66
47
|
return nil
|
67
48
|
end
|
68
|
-
|
69
|
-
result =
|
70
|
-
|
71
|
-
|
72
|
-
if result.respond_to?(:value)
|
73
|
-
result = result.value
|
74
|
-
else
|
75
|
-
result = result.to_s
|
76
|
-
end
|
77
|
-
end
|
78
|
-
blank_result = false
|
79
|
-
if block_given?
|
80
|
-
blank_result = yield(result)
|
81
|
-
else
|
82
|
-
blank_result = result.to_s.blank?
|
83
|
-
end
|
84
|
-
if !blank_result
|
85
|
-
if result.respond_to? :strip
|
86
|
-
result.strip!
|
87
|
-
end
|
88
|
-
return result
|
89
|
-
end
|
90
|
-
end
|
91
|
-
for xpath in xpath_list
|
92
|
-
result = REXML::XPath.liberal_first(element, xpath)
|
93
|
-
if options[:select_result_value] && !result.nil?
|
94
|
-
if result.respond_to?(:value)
|
95
|
-
result = result.value
|
96
|
-
else
|
97
|
-
result = result.to_s
|
98
|
-
end
|
99
|
-
end
|
100
|
-
blank_result = false
|
101
|
-
if block_given?
|
102
|
-
blank_result = yield(result)
|
103
|
-
else
|
104
|
-
blank_result = result.to_s.blank?
|
105
|
-
end
|
106
|
-
if !blank_result
|
107
|
-
if result.respond_to? :strip
|
108
|
-
result.strip!
|
109
|
-
end
|
110
|
-
return result
|
111
|
-
end
|
112
|
-
end
|
113
|
-
for xpath in xpath_list
|
114
|
-
if xpath =~ /^\w+$/
|
115
|
-
for child in element.children
|
116
|
-
if child.class == REXML::Element
|
117
|
-
if child.name.downcase == xpath.downcase
|
118
|
-
result = child
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
if options[:select_result_value] && !result.nil?
|
123
|
-
if result.respond_to?(:value)
|
124
|
-
result = result.value
|
125
|
-
else
|
126
|
-
result = result.to_s
|
127
|
-
end
|
128
|
-
end
|
129
|
-
blank_result = false
|
130
|
-
if block_given?
|
131
|
-
blank_result = yield(result)
|
132
|
-
else
|
133
|
-
blank_result = result.to_s.blank?
|
134
|
-
end
|
135
|
-
if !blank_result
|
136
|
-
if result.respond_to? :strip
|
137
|
-
result.strip!
|
138
|
-
end
|
139
|
-
return result
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
return nil
|
144
|
-
end
|
145
|
-
|
146
|
-
# Runs through a list of XPath queries on an element or document and
|
147
|
-
# returns the first non-empty result. Subsequent XPath queries will
|
148
|
-
# not be evaluated.
|
149
|
-
def try_xpaths_all(element, xpath_list, options={})
|
150
|
-
validate_options([ :select_result_value ],
|
151
|
-
options.keys)
|
152
|
-
options = { :select_result_value => false }.merge(options)
|
153
|
-
|
154
|
-
results = []
|
155
|
-
if element.nil?
|
156
|
-
return []
|
157
|
-
end
|
158
|
-
for xpath in xpath_list
|
159
|
-
results = REXML::XPath.liberal_match(element, xpath,
|
160
|
-
FEED_TOOLS_NAMESPACES)
|
161
|
-
if options[:select_result_value] && !results.nil? && !results.empty?
|
162
|
-
results =
|
163
|
-
results.map { |x| x.respond_to?(:value) ? x.value : x.to_s }
|
164
|
-
end
|
165
|
-
if results.blank?
|
166
|
-
results = REXML::XPath.liberal_match(element, xpath)
|
167
|
-
else
|
168
|
-
return results
|
169
|
-
end
|
170
|
-
if options[:select_result_value] && !results.nil? && !results.empty?
|
171
|
-
results =
|
172
|
-
results.map { |x| x.respond_to?(:value) ? x.value : x.to_s }
|
173
|
-
end
|
174
|
-
if !results.blank?
|
175
|
-
return results
|
176
|
-
end
|
177
|
-
end
|
178
|
-
for xpath in xpath_list
|
179
|
-
if xpath =~ /^\w+$/
|
180
|
-
results = []
|
181
|
-
for child in element.children
|
182
|
-
if child.class == REXML::Element
|
183
|
-
if child.name.downcase == xpath.downcase
|
184
|
-
results << child
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
188
|
-
if options[:select_result_value] && !results.nil? && !results.empty?
|
189
|
-
results =
|
190
|
-
results.map { |x| x.inner_xml }
|
191
|
-
end
|
192
|
-
if !results.blank?
|
193
|
-
return results
|
194
|
-
end
|
195
|
-
end
|
49
|
+
begin
|
50
|
+
result = block.call
|
51
|
+
rescue SystemStackError
|
52
|
+
result = nil
|
196
53
|
end
|
197
|
-
|
54
|
+
@lock_ids.delete(lock_object.object_id)
|
55
|
+
return result
|
198
56
|
end
|
199
57
|
end
|
200
58
|
end
|