feedtools 0.2.22 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +28 -0
- data/README +23 -2
- data/db/migration.rb +19 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +71 -388
- data/lib/feed_tools/database_feed_cache.rb +4 -3
- data/lib/feed_tools/feed.rb +809 -607
- data/lib/feed_tools/feed_item.rb +551 -574
- data/lib/feed_tools/feed_structures.rb +252 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +6 -5
- data/lib/feed_tools/helpers/generic_helper.rb +16 -158
- data/lib/feed_tools/helpers/html_helper.rb +629 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +5 -0
- data/lib/feed_tools/helpers/uri_helper.rb +223 -0
- data/lib/feed_tools/helpers/xml_helper.rb +239 -0
- data/rakefile +10 -237
- data/test/unit/amp_test.rb +102 -94
- data/test/unit/atom_test.rb +239 -6
- data/test/unit/cache_test.rb +1 -1
- data/test/unit/encoding_test.rb +5 -5
- data/test/unit/generation_test.rb +34 -1
- data/test/unit/helper_test.rb +111 -17
- data/test/unit/rss_test.rb +21 -2
- metadata +7 -3
- data/lib/feed_tools/helpers/module_helper.rb +0 -27
@@ -0,0 +1,252 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2005 Robert Aman
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
module FeedTools
|
25
|
+
# Represents a feed/feed item's category
|
26
|
+
class Category
|
27
|
+
|
28
|
+
# The category term value
|
29
|
+
attr_accessor :term
|
30
|
+
# The categorization scheme
|
31
|
+
attr_accessor :scheme
|
32
|
+
# A human-readable description of the category
|
33
|
+
attr_accessor :label
|
34
|
+
|
35
|
+
alias_method :value, :term
|
36
|
+
alias_method :category, :term
|
37
|
+
alias_method :domain, :scheme
|
38
|
+
end
|
39
|
+
|
40
|
+
# Represents a feed/feed item's author
|
41
|
+
class Author
|
42
|
+
|
43
|
+
# The author's real name
|
44
|
+
attr_accessor :name
|
45
|
+
# The author's email address
|
46
|
+
attr_accessor :email
|
47
|
+
# The url of the author's homepage
|
48
|
+
attr_accessor :href
|
49
|
+
# The raw value of the author tag if present
|
50
|
+
attr_accessor :raw
|
51
|
+
|
52
|
+
alias_method :url, :href
|
53
|
+
alias_method :url=, :href=
|
54
|
+
alias_method :uri, :href
|
55
|
+
alias_method :uri=, :href=
|
56
|
+
end
|
57
|
+
|
58
|
+
# Represents a feed's image
|
59
|
+
class Image
|
60
|
+
|
61
|
+
# The image's title
|
62
|
+
attr_accessor :title
|
63
|
+
# The image's description
|
64
|
+
attr_accessor :description
|
65
|
+
# The url of the image that is being linked to
|
66
|
+
attr_accessor :href
|
67
|
+
# The url to link the image to
|
68
|
+
attr_accessor :link
|
69
|
+
# The width of the image
|
70
|
+
attr_accessor :width
|
71
|
+
# The height of the image
|
72
|
+
attr_accessor :height
|
73
|
+
# The style of the image
|
74
|
+
# Possible values are "icon", "image", or "image-wide"
|
75
|
+
attr_accessor :style
|
76
|
+
|
77
|
+
alias_method :url, :href
|
78
|
+
alias_method :url=, :href=
|
79
|
+
end
|
80
|
+
|
81
|
+
# Represents a feed's text input element.
|
82
|
+
# Be aware that this will be ignored for feed generation. It's a
|
83
|
+
# pointless element that aggregators usually ignore and it doesn't have an
|
84
|
+
# equivalent in all feeds types.
|
85
|
+
class TextInput
|
86
|
+
|
87
|
+
# The label of the Submit button in the text input area.
|
88
|
+
attr_accessor :title
|
89
|
+
# The description explains the text input area.
|
90
|
+
attr_accessor :description
|
91
|
+
# The URL of the CGI script that processes text input requests.
|
92
|
+
attr_accessor :link
|
93
|
+
# The name of the text object in the text input area.
|
94
|
+
attr_accessor :name
|
95
|
+
end
|
96
|
+
|
97
|
+
# Represents a feed's cloud.
|
98
|
+
# Be aware that this will be ignored for feed generation.
|
99
|
+
class Cloud
|
100
|
+
|
101
|
+
# The domain of the cloud.
|
102
|
+
attr_accessor :domain
|
103
|
+
# The path for the cloud.
|
104
|
+
attr_accessor :path
|
105
|
+
# The port the cloud is listening on.
|
106
|
+
attr_accessor :port
|
107
|
+
# The web services protocol the cloud uses.
|
108
|
+
# Possible values are either "xml-rpc" or "soap".
|
109
|
+
attr_accessor :protocol
|
110
|
+
# The procedure to use to request notification.
|
111
|
+
attr_accessor :register_procedure
|
112
|
+
end
|
113
|
+
|
114
|
+
# Represents a simple hyperlink
|
115
|
+
class Link
|
116
|
+
# The url that is being linked to
|
117
|
+
attr_accessor :href
|
118
|
+
# The language of the resource being linked to
|
119
|
+
attr_accessor :hreflang
|
120
|
+
# The relation type of the link
|
121
|
+
attr_accessor :rel
|
122
|
+
# The mime type of the link
|
123
|
+
attr_accessor :type
|
124
|
+
# The title of the hyperlink
|
125
|
+
attr_accessor :title
|
126
|
+
# The length of the resource being linked to in bytes
|
127
|
+
attr_accessor :length
|
128
|
+
|
129
|
+
alias_method :url, :href
|
130
|
+
alias_method :url=, :href=
|
131
|
+
end
|
132
|
+
|
133
|
+
# This class stores information about a feed item's file enclosures.
|
134
|
+
class Enclosure
|
135
|
+
# The url for the enclosure
|
136
|
+
attr_accessor :href
|
137
|
+
# The MIME type of the file referenced by the enclosure
|
138
|
+
attr_accessor :type
|
139
|
+
# The size of the file referenced by the enclosure
|
140
|
+
attr_accessor :file_size
|
141
|
+
# The total play time of the file referenced by the enclosure
|
142
|
+
attr_accessor :duration
|
143
|
+
# The height in pixels of the enclosed media
|
144
|
+
attr_accessor :height
|
145
|
+
# The width in pixels of the enclosed media
|
146
|
+
attr_accessor :width
|
147
|
+
# The bitrate of the enclosed media
|
148
|
+
attr_accessor :bitrate
|
149
|
+
# The framerate of the enclosed media
|
150
|
+
attr_accessor :framerate
|
151
|
+
# The thumbnail for this enclosure
|
152
|
+
attr_accessor :thumbnail
|
153
|
+
# The categories for this enclosure
|
154
|
+
attr_accessor :categories
|
155
|
+
# A hash of the enclosed file
|
156
|
+
attr_accessor :hash
|
157
|
+
# A website containing some kind of media player instead of a direct
|
158
|
+
# link to the media file.
|
159
|
+
attr_accessor :player
|
160
|
+
# A list of credits for the enclosed media
|
161
|
+
attr_accessor :credits
|
162
|
+
# A text rendition of the enclosed media
|
163
|
+
attr_accessor :text
|
164
|
+
# A list of alternate version of the enclosed media file
|
165
|
+
attr_accessor :versions
|
166
|
+
# The default version of the enclosed media file
|
167
|
+
attr_accessor :default_version
|
168
|
+
|
169
|
+
alias_method :url, :href
|
170
|
+
alias_method :url=, :href=
|
171
|
+
alias_method :link, :href
|
172
|
+
alias_method :link=, :href=
|
173
|
+
|
174
|
+
# Returns true if this is the default enclosure
|
175
|
+
def is_default?
|
176
|
+
return @is_default
|
177
|
+
end
|
178
|
+
|
179
|
+
# Sets whether this is the default enclosure for the media group
|
180
|
+
def is_default=(new_is_default)
|
181
|
+
@is_default = new_is_default
|
182
|
+
end
|
183
|
+
|
184
|
+
# Returns true if the enclosure contains explicit material
|
185
|
+
def explicit?
|
186
|
+
return @explicit
|
187
|
+
end
|
188
|
+
|
189
|
+
# Sets the explicit attribute on the enclosure
|
190
|
+
def explicit=(new_explicit)
|
191
|
+
@explicit = new_explicit
|
192
|
+
end
|
193
|
+
|
194
|
+
# Determines if the object is a sample, or the full version of the
|
195
|
+
# object, or if it is a stream.
|
196
|
+
# Possible values are 'sample', 'full', 'nonstop'.
|
197
|
+
def expression
|
198
|
+
return @expression
|
199
|
+
end
|
200
|
+
|
201
|
+
# Sets the expression attribute on the enclosure.
|
202
|
+
# Allowed values are 'sample', 'full', 'nonstop'.
|
203
|
+
def expression=(new_expression)
|
204
|
+
unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
|
205
|
+
raise ArgumentError,
|
206
|
+
"Permitted values are 'sample', 'full', 'nonstop'."
|
207
|
+
end
|
208
|
+
@expression = new_expression.downcase
|
209
|
+
end
|
210
|
+
|
211
|
+
# Returns true if this enclosure contains audio content
|
212
|
+
def audio?
|
213
|
+
unless self.type.nil?
|
214
|
+
return true if (self.type =~ /^audio/) != nil
|
215
|
+
end
|
216
|
+
# TODO: create a more complete list
|
217
|
+
# =================================
|
218
|
+
audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
|
219
|
+
audio_extensions.each do |extension|
|
220
|
+
if (url =~ /#{extension}$/) != nil
|
221
|
+
return true
|
222
|
+
end
|
223
|
+
end
|
224
|
+
return false
|
225
|
+
end
|
226
|
+
|
227
|
+
# Returns true if this enclosure contains video content
|
228
|
+
def video?
|
229
|
+
unless self.type.nil?
|
230
|
+
return true if (self.type =~ /^video/) != nil
|
231
|
+
return true if self.type == "image/mov"
|
232
|
+
end
|
233
|
+
# TODO: create a more complete list
|
234
|
+
# =================================
|
235
|
+
video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
|
236
|
+
video_extensions.each do |extension|
|
237
|
+
if (url =~ /#{extension}$/) != nil
|
238
|
+
return true
|
239
|
+
end
|
240
|
+
end
|
241
|
+
return false
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# TODO: Make these actual classes instead of structs
|
246
|
+
# ==================================================
|
247
|
+
EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
|
248
|
+
EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
|
249
|
+
EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
|
250
|
+
EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
|
251
|
+
:width )
|
252
|
+
end
|
@@ -31,8 +31,6 @@ require 'feed_tools/helpers/generic_helper'
|
|
31
31
|
# the FeedTools library.
|
32
32
|
module FeedTools
|
33
33
|
module FeedToolsHelper
|
34
|
-
include FeedTools::GenericHelper
|
35
|
-
private :validate_options
|
36
34
|
|
37
35
|
@@default_local_path = File.expand_path('.')
|
38
36
|
|
@@ -45,12 +43,15 @@ module FeedTools
|
|
45
43
|
def self.default_local_path=(new_default_local_path)
|
46
44
|
@@default_local_path = new_default_local_path
|
47
45
|
end
|
48
|
-
|
49
|
-
protected
|
46
|
+
|
47
|
+
protected
|
50
48
|
# Loads a feed within a block for more consistent syntax and control
|
51
49
|
# over the FeedTools environment.
|
52
50
|
def with_feed(options={})
|
53
|
-
validate_options([ :from_file,
|
51
|
+
FeedTools::GenericHelper.validate_options([ :from_file,
|
52
|
+
:from_url,
|
53
|
+
:from_data,
|
54
|
+
:feed_cache ],
|
54
55
|
options.keys)
|
55
56
|
options = { :feed_cache =>
|
56
57
|
FeedTools.configurations[:feed_cache] }.merge(options)
|
@@ -22,179 +22,37 @@
|
|
22
22
|
#++
|
23
23
|
|
24
24
|
require 'feed_tools'
|
25
|
-
require 'rexml/document'
|
26
25
|
|
27
26
|
module FeedTools
|
28
27
|
# Generic methods needed in numerous places throughout FeedTools
|
29
28
|
module GenericHelper
|
30
29
|
# Raises an exception if an invalid option has been specified to prevent
|
31
30
|
# misspellings from slipping through
|
32
|
-
def validate_options(valid_option_keys, supplied_option_keys)
|
31
|
+
def self.validate_options(valid_option_keys, supplied_option_keys)
|
33
32
|
unknown_option_keys = supplied_option_keys - valid_option_keys
|
34
33
|
unless unknown_option_keys.empty?
|
35
34
|
raise "Unknown options: #{unknown_option_keys}"
|
36
35
|
end
|
37
36
|
end
|
38
37
|
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
unless result.nil? || blank_result
|
49
|
-
return result
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return nil
|
53
|
-
end
|
54
|
-
|
55
|
-
# Runs through a list of XPath queries on an element or document and
|
56
|
-
# returns the first non-blank result. Subsequent XPath queries will
|
57
|
-
# not be evaluated.
|
58
|
-
def try_xpaths(element, xpath_list,
|
59
|
-
options={}, &block)
|
60
|
-
validate_options([ :select_result_value ],
|
61
|
-
options.keys)
|
62
|
-
options = { :select_result_value => false }.merge(options)
|
63
|
-
|
64
|
-
result = nil
|
65
|
-
if element.nil?
|
38
|
+
# Nifty little method that takes a block and returns nil if recursion
|
39
|
+
# occurs or the block's result value if it doesn't.
|
40
|
+
def self.recursion_trap(lock_object, &block)
|
41
|
+
if @lock_ids.nil?
|
42
|
+
@lock_ids = []
|
43
|
+
end
|
44
|
+
if !@lock_ids.include?(lock_object.object_id)
|
45
|
+
@lock_ids << lock_object.object_id
|
46
|
+
else
|
66
47
|
return nil
|
67
48
|
end
|
68
|
-
|
69
|
-
result =
|
70
|
-
|
71
|
-
|
72
|
-
if result.respond_to?(:value)
|
73
|
-
result = result.value
|
74
|
-
else
|
75
|
-
result = result.to_s
|
76
|
-
end
|
77
|
-
end
|
78
|
-
blank_result = false
|
79
|
-
if block_given?
|
80
|
-
blank_result = yield(result)
|
81
|
-
else
|
82
|
-
blank_result = result.to_s.blank?
|
83
|
-
end
|
84
|
-
if !blank_result
|
85
|
-
if result.respond_to? :strip
|
86
|
-
result.strip!
|
87
|
-
end
|
88
|
-
return result
|
89
|
-
end
|
90
|
-
end
|
91
|
-
for xpath in xpath_list
|
92
|
-
result = REXML::XPath.liberal_first(element, xpath)
|
93
|
-
if options[:select_result_value] && !result.nil?
|
94
|
-
if result.respond_to?(:value)
|
95
|
-
result = result.value
|
96
|
-
else
|
97
|
-
result = result.to_s
|
98
|
-
end
|
99
|
-
end
|
100
|
-
blank_result = false
|
101
|
-
if block_given?
|
102
|
-
blank_result = yield(result)
|
103
|
-
else
|
104
|
-
blank_result = result.to_s.blank?
|
105
|
-
end
|
106
|
-
if !blank_result
|
107
|
-
if result.respond_to? :strip
|
108
|
-
result.strip!
|
109
|
-
end
|
110
|
-
return result
|
111
|
-
end
|
112
|
-
end
|
113
|
-
for xpath in xpath_list
|
114
|
-
if xpath =~ /^\w+$/
|
115
|
-
for child in element.children
|
116
|
-
if child.class == REXML::Element
|
117
|
-
if child.name.downcase == xpath.downcase
|
118
|
-
result = child
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
if options[:select_result_value] && !result.nil?
|
123
|
-
if result.respond_to?(:value)
|
124
|
-
result = result.value
|
125
|
-
else
|
126
|
-
result = result.to_s
|
127
|
-
end
|
128
|
-
end
|
129
|
-
blank_result = false
|
130
|
-
if block_given?
|
131
|
-
blank_result = yield(result)
|
132
|
-
else
|
133
|
-
blank_result = result.to_s.blank?
|
134
|
-
end
|
135
|
-
if !blank_result
|
136
|
-
if result.respond_to? :strip
|
137
|
-
result.strip!
|
138
|
-
end
|
139
|
-
return result
|
140
|
-
end
|
141
|
-
end
|
142
|
-
end
|
143
|
-
return nil
|
144
|
-
end
|
145
|
-
|
146
|
-
# Runs through a list of XPath queries on an element or document and
|
147
|
-
# returns the first non-empty result. Subsequent XPath queries will
|
148
|
-
# not be evaluated.
|
149
|
-
def try_xpaths_all(element, xpath_list, options={})
|
150
|
-
validate_options([ :select_result_value ],
|
151
|
-
options.keys)
|
152
|
-
options = { :select_result_value => false }.merge(options)
|
153
|
-
|
154
|
-
results = []
|
155
|
-
if element.nil?
|
156
|
-
return []
|
157
|
-
end
|
158
|
-
for xpath in xpath_list
|
159
|
-
results = REXML::XPath.liberal_match(element, xpath,
|
160
|
-
FEED_TOOLS_NAMESPACES)
|
161
|
-
if options[:select_result_value] && !results.nil? && !results.empty?
|
162
|
-
results =
|
163
|
-
results.map { |x| x.respond_to?(:value) ? x.value : x.to_s }
|
164
|
-
end
|
165
|
-
if results.blank?
|
166
|
-
results = REXML::XPath.liberal_match(element, xpath)
|
167
|
-
else
|
168
|
-
return results
|
169
|
-
end
|
170
|
-
if options[:select_result_value] && !results.nil? && !results.empty?
|
171
|
-
results =
|
172
|
-
results.map { |x| x.respond_to?(:value) ? x.value : x.to_s }
|
173
|
-
end
|
174
|
-
if !results.blank?
|
175
|
-
return results
|
176
|
-
end
|
177
|
-
end
|
178
|
-
for xpath in xpath_list
|
179
|
-
if xpath =~ /^\w+$/
|
180
|
-
results = []
|
181
|
-
for child in element.children
|
182
|
-
if child.class == REXML::Element
|
183
|
-
if child.name.downcase == xpath.downcase
|
184
|
-
results << child
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
188
|
-
if options[:select_result_value] && !results.nil? && !results.empty?
|
189
|
-
results =
|
190
|
-
results.map { |x| x.inner_xml }
|
191
|
-
end
|
192
|
-
if !results.blank?
|
193
|
-
return results
|
194
|
-
end
|
195
|
-
end
|
49
|
+
begin
|
50
|
+
result = block.call
|
51
|
+
rescue SystemStackError
|
52
|
+
result = nil
|
196
53
|
end
|
197
|
-
|
54
|
+
@lock_ids.delete(lock_object.object_id)
|
55
|
+
return result
|
198
56
|
end
|
199
57
|
end
|
200
58
|
end
|