yyyc514-syndication 0.6.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +10 -0
- data/DEVELOPER +5 -0
- data/IMPLEMENTATION +55 -0
- data/README +228 -0
- data/examples/apple.rb +24 -0
- data/examples/google.rb +23 -0
- data/examples/yahoo.rb +21 -0
- data/lib/syndication/atom.rb +531 -0
- data/lib/syndication/common.rb +289 -0
- data/lib/syndication/content.rb +44 -0
- data/lib/syndication/dublincore.rb +98 -0
- data/lib/syndication/feedburner.rb +18 -0
- data/lib/syndication/google.rb +58 -0
- data/lib/syndication/podcast.rb +90 -0
- data/lib/syndication/rss.rb +332 -0
- data/lib/syndication/syndication.rb +49 -0
- data/lib/syndication/tagsoup.rb +51 -0
- data/rakefile +60 -0
- data/test/atomtest.rb +190 -0
- data/test/feedburntest.rb +79 -0
- data/test/google.rb +91 -0
- data/test/rsstest.rb +422 -0
- data/test/tagsouptest.rb +86 -0
- metadata +83 -0
@@ -0,0 +1,332 @@
|
|
1
|
+
# This module provides classes and methods for parsing RSS web syndication
|
2
|
+
# feeds.
|
3
|
+
#
|
4
|
+
# Copyright � mathew <meta@pobox.com> 2005-2006.
|
5
|
+
# Licensed under the same terms as Ruby.
|
6
|
+
|
7
|
+
require 'uri'
|
8
|
+
require 'rexml/parsers/streamparser'
|
9
|
+
require 'rexml/streamlistener'
|
10
|
+
require 'rexml/document'
|
11
|
+
require 'date'
|
12
|
+
require 'syndication/common'
|
13
|
+
|
14
|
+
module Syndication
|
15
|
+
class Container
|
16
|
+
|
17
|
+
# This method is used by objects in RSS feeds that accept
|
18
|
+
# <category> elements
|
19
|
+
def store_category(cat)
|
20
|
+
if cat.kind_of?(String)
|
21
|
+
if !defined? @category
|
22
|
+
@category = Array.new
|
23
|
+
end
|
24
|
+
@category << cat
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# RSS is a method of syndicating web site content.
|
30
|
+
#
|
31
|
+
# There are nine different versions of RSS; see
|
32
|
+
# <URL:http://diveintomark.org/archives/2004/02/04/incompatible-rss>
|
33
|
+
#
|
34
|
+
# This code attempts to parse all of them, and provide the same API via
|
35
|
+
# the same data model regardless of the particular flavor of RSS fed in.
|
36
|
+
#
|
37
|
+
# One thing to be aware of is that RSS 0.9x and 2.0x have no mechanism for
|
38
|
+
# indicating the type of text in a description, whether plain text or HTML.
|
39
|
+
# As a result, this library leaves it to you to write code to 'sniff'
|
40
|
+
# the data returned and decide whether you think it looks like text or HTML.
|
41
|
+
#
|
42
|
+
# RSS 1.0 solves the problem via the content module, which is supported
|
43
|
+
# via Syndication::Content. Atom solves the problem too.
|
44
|
+
module RSS
|
45
|
+
|
46
|
+
# Represents an individual story or entry in an RSS feed.
|
47
|
+
class Item < Container
|
48
|
+
# The title of the item as a String.
|
49
|
+
attr_accessor :title
|
50
|
+
# The URL of the item as a String.
|
51
|
+
attr_accessor :link
|
52
|
+
# A textual description of the item as a String.
|
53
|
+
attr_accessor :description
|
54
|
+
# E-mail address of item author.
|
55
|
+
attr_accessor :author
|
56
|
+
# One or more categories for the item, as an Array of Strings.
|
57
|
+
attr_reader :category
|
58
|
+
alias category= store_category
|
59
|
+
# URL for feedback on this item as a String.
|
60
|
+
attr_accessor :comments
|
61
|
+
# A media object attached to the item, as a Syndication::Enclosure.
|
62
|
+
attr_accessor :enclosure
|
63
|
+
# A globally unique identifier for this item, a String.
|
64
|
+
attr_accessor :guid
|
65
|
+
# The publication date for this item. Accepts anything DateTime can
|
66
|
+
# parse, which includes RFC822-style dates as specified by the RSS
|
67
|
+
# standards.
|
68
|
+
attr_writer :pubdate
|
69
|
+
# An RSS channel this item was copied from, used to give credit for
|
70
|
+
# copied links. A URL String.
|
71
|
+
attr_accessor :source
|
72
|
+
|
73
|
+
# Publication date as a DateTime if possible; if it won't parse,
|
74
|
+
# returns the original string.
|
75
|
+
def pubdate
|
76
|
+
parse_date(@pubdate)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Used to represent graphical images provided in an RSS feed, with the
|
81
|
+
# intent that they be used to represent the channel in a graphical user
|
82
|
+
# interface, or on a web page.
|
83
|
+
#
|
84
|
+
# Typically found via Syndication::Channel#image
|
85
|
+
class Image < Container
|
86
|
+
# URL of image.
|
87
|
+
attr_accessor :url
|
88
|
+
# Title of image for use as ALT text.
|
89
|
+
attr_accessor :title
|
90
|
+
# Link to use when image is clicked on.
|
91
|
+
attr_accessor :link
|
92
|
+
# Width of image in pixels, as an integer.
|
93
|
+
attr_reader :width
|
94
|
+
# Height of image in pixels, as an integer.
|
95
|
+
attr_reader :height
|
96
|
+
|
97
|
+
# Set width in pixels.
|
98
|
+
def width=(x)
|
99
|
+
if x.kind_of?(String)
|
100
|
+
@width = x.to_i
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Set height in pixels.
|
105
|
+
def height=(x)
|
106
|
+
if x.kind_of?(String)
|
107
|
+
@height = x.to_i
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Represents a text input box to be used in association with an RSS feed, for
|
113
|
+
# example a search box or e-mail subscription input box.
|
114
|
+
#
|
115
|
+
# Typically found via Syndication::Channel#textinput method.
|
116
|
+
class TextInput < Container
|
117
|
+
# Label for Submit button in text input area.
|
118
|
+
attr_accessor :title
|
119
|
+
# Label to explain purpose of text input area.
|
120
|
+
attr_accessor :description
|
121
|
+
# Name of text object in input area, for form submission.
|
122
|
+
attr_accessor :name
|
123
|
+
# URL to submit data to via HTTP POST.
|
124
|
+
attr_accessor :link end
|
125
|
+
|
126
|
+
# Represents metadata about an RSS feed as a whole.
|
127
|
+
# Typically found via the Syndication::RSS::Feed#channel method.
|
128
|
+
class Channel < Container
|
129
|
+
# The title of the channel.
|
130
|
+
attr_accessor :title
|
131
|
+
# The URL of the web site this is a channel for.
|
132
|
+
attr_accessor :link
|
133
|
+
# A textual description of the channel.
|
134
|
+
attr_accessor :description
|
135
|
+
# Copyright statement for channel.
|
136
|
+
attr_accessor :copyright
|
137
|
+
# ISO code for the language the channel is written in.
|
138
|
+
attr_accessor :language
|
139
|
+
# E-mail address of person responsible for editorial content.
|
140
|
+
attr_accessor :managingeditor
|
141
|
+
# E-mail address of person responsible for technical issues with feed.
|
142
|
+
attr_accessor :webmaster
|
143
|
+
# Publication date of content in channel.
|
144
|
+
attr_writer :pubdate
|
145
|
+
# Last time content in channel changed.
|
146
|
+
attr_writer :lastbuilddate
|
147
|
+
# The graphical image to represent the channel, as a
|
148
|
+
# Syndication::Image object.
|
149
|
+
attr_accessor :image
|
150
|
+
# One or more categories for the channel, as an Array of Strings.
|
151
|
+
attr_accessor :category
|
152
|
+
alias category= store_category
|
153
|
+
# The software that generated the channel.
|
154
|
+
attr_accessor :generator
|
155
|
+
# The URL of some documentation on what the RSS format is.
|
156
|
+
attr_accessor :docs
|
157
|
+
# Time to live for this copy of the channel.
|
158
|
+
attr_accessor :ttl
|
159
|
+
# rssCloud interface (for Radio UserLand).
|
160
|
+
attr_accessor :cloud
|
161
|
+
# PICS rating for channel.
|
162
|
+
attr_accessor :rating
|
163
|
+
# The TextInput area as a Syndication::TextInput object.
|
164
|
+
attr_accessor :textinput
|
165
|
+
# Hours when the feed can be skipped (because it will not have new content).
|
166
|
+
# Returned as an Array of values in the range 0..23 (even if parsing the
|
167
|
+
# UserLand variant of RSS 0.91).
|
168
|
+
attr_reader :skiphours
|
169
|
+
# Full names (in English) of days when the feed can be skipped.
|
170
|
+
attr_reader :skipdays
|
171
|
+
|
172
|
+
# Publication date of content in channel, as a DateTime object if it
|
173
|
+
# can be parsed by DateTime; otherwise, as a String.
|
174
|
+
def pubdate
|
175
|
+
return parse_date(@pubdate)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Last time content in channel changed, as a DateTime object if it
|
179
|
+
# can be parsed by DateTime; otherwise, as a String.
|
180
|
+
def lastbuilddate
|
181
|
+
return parse_date(@lastbuilddate)
|
182
|
+
end
|
183
|
+
|
184
|
+
# Add an hour to the list of hours to skip.
|
185
|
+
#
|
186
|
+
# The <hour> element in fact comes inside <skipHours>, but we don't enforce
|
187
|
+
# that; we just make the Channel recognize it and store the values.
|
188
|
+
def hour=(hr)
|
189
|
+
if hr.kind_of?(String)
|
190
|
+
if !defined? @skiphours
|
191
|
+
@skiphours = Array.new
|
192
|
+
end
|
193
|
+
h = hr.to_i
|
194
|
+
@skiphours << (h == 24 ? 0 : h)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# Add a day name to the list of days to skip.
|
199
|
+
#
|
200
|
+
# The <day> element in fact comes inside <skipDays>, but we don't enforce
|
201
|
+
# that; we just make the Channel recognize it and store the values.
|
202
|
+
def day=(dayname)
|
203
|
+
if dayname.kind_of?(String)
|
204
|
+
if !defined? @skipdays
|
205
|
+
@skipdays = Array.new
|
206
|
+
end
|
207
|
+
@skipdays << dayname
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# The <cloud> element is very rarely used. It was added to the RSS standards
|
213
|
+
# to support the rssCloud protocol of Radio UserLand.
|
214
|
+
class Cloud < Container
|
215
|
+
# The hostname to connect to.
|
216
|
+
attr_accessor :domain
|
217
|
+
# The TCP/IP port number.
|
218
|
+
attr_reader :port
|
219
|
+
# The request path.
|
220
|
+
attr_accessor :path
|
221
|
+
# The registration method.
|
222
|
+
attr_accessor :registerprocedure
|
223
|
+
# The protocol to use.
|
224
|
+
attr_accessor :protocol
|
225
|
+
|
226
|
+
# Set port number
|
227
|
+
def port=(x)
|
228
|
+
@port = x.to_i
|
229
|
+
end
|
230
|
+
|
231
|
+
def initialize(parent, tag, attrs = nil)
|
232
|
+
@tag = tag
|
233
|
+
@parent = parent
|
234
|
+
if attrs
|
235
|
+
attrs.each_pair {|key, value|
|
236
|
+
self.store(key, value)
|
237
|
+
}
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Represents a multimedia enclosure in an RSS item.
|
243
|
+
# Typically found as Syndication::Item#enclosure
|
244
|
+
class Enclosure < Container
|
245
|
+
# The URL to the multimedia file.
|
246
|
+
attr_accessor :url
|
247
|
+
# The MIME type of the file.
|
248
|
+
attr_accessor :type
|
249
|
+
# The length of the file, in bytes.
|
250
|
+
attr_reader :length
|
251
|
+
|
252
|
+
# Set length in bytes.
|
253
|
+
def length=(x)
|
254
|
+
@length = x.to_i
|
255
|
+
end
|
256
|
+
|
257
|
+
def initialize(parent, tag, attrs = nil)
|
258
|
+
@tag = tag
|
259
|
+
@parent = parent
|
260
|
+
if attrs
|
261
|
+
attrs.each_pair {|key, value|
|
262
|
+
self.store(key, value)
|
263
|
+
}
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
# Represents a parsed RSS feed, as returned by Syndication::RSS::Parser.
|
269
|
+
class Feed < Container
|
270
|
+
# The Channel metadata and contents of the feed as a
|
271
|
+
# Syndication::Channel object
|
272
|
+
attr_accessor :channel
|
273
|
+
# The items in the feed as an Array of Syndication::Item objects.
|
274
|
+
attr_reader :items
|
275
|
+
# The text input area as a Syndication::TextInput object.
|
276
|
+
attr_accessor :textinput
|
277
|
+
# The image for the feed, as a Syndication::Image object.
|
278
|
+
attr_accessor :image
|
279
|
+
|
280
|
+
def initialize(parent, tag = nil, attrs = nil)
|
281
|
+
# Explicitly initialize to nil to avoid warnings
|
282
|
+
@items = @category = @skiphours = @skipdays = nil
|
283
|
+
super
|
284
|
+
end
|
285
|
+
|
286
|
+
# Add an item to the feed.
|
287
|
+
def item=(obj)
|
288
|
+
if (!defined? @items) || (@items == nil)
|
289
|
+
@items = Array.new
|
290
|
+
end
|
291
|
+
@items.push(obj)
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# A parser for RSS feeds.
|
296
|
+
# See Syndication::Parser in common.rb for the abstract class this
|
297
|
+
# specializes.
|
298
|
+
class Parser < AbstractParser
|
299
|
+
include REXML::StreamListener
|
300
|
+
|
301
|
+
#:stopdoc:
|
302
|
+
# A hash of tags which require the creation of new objects, and the class
|
303
|
+
# to use for creating the object.
|
304
|
+
CLASS_FOR_TAG = {
|
305
|
+
'item' => Item,
|
306
|
+
'entry' => Item,
|
307
|
+
'image' => Image,
|
308
|
+
'channel' => Channel,
|
309
|
+
'cloud' => Cloud,
|
310
|
+
'textinput' => TextInput,
|
311
|
+
'textInput' => TextInput,
|
312
|
+
'enclosure' => Enclosure
|
313
|
+
}
|
314
|
+
#:startdoc:
|
315
|
+
|
316
|
+
# Reset the parser ready to parse a new feed.
|
317
|
+
def reset
|
318
|
+
# Set up an empty RSS::Feed object and make it the current object
|
319
|
+
@parsetree = Feed.new(nil)
|
320
|
+
# Set up the class-for-tag hash
|
321
|
+
@class_for_tag = CLASS_FOR_TAG
|
322
|
+
# Everything else is common to both kinds of parser
|
323
|
+
super
|
324
|
+
end
|
325
|
+
|
326
|
+
# The most recently parsed feed as a Syndication::RSS::Feed object.
|
327
|
+
def feed
|
328
|
+
return @parsetree
|
329
|
+
end
|
330
|
+
end
|
331
|
+
end
|
332
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright � mathew <meta@pobox.com> 2005.
|
2
|
+
# Licensed under the same terms as Ruby.
|
3
|
+
#
|
4
|
+
# $Header$
|
5
|
+
|
6
|
+
require 'date'
|
7
|
+
|
8
|
+
module Syndication
|
9
|
+
|
10
|
+
# Mixin for RSS 1.0 syndication data (draft standard for RSS 1.0).
|
11
|
+
#
|
12
|
+
# If you require 'syndication/syndication' these methods are added to the
|
13
|
+
# Syndication::Channel class.
|
14
|
+
#
|
15
|
+
# Access methods are named after the XML elements, prefixed with sy_.
|
16
|
+
#
|
17
|
+
module Syndication
|
18
|
+
# The period over which the channel is updated. Allowed values are
|
19
|
+
# 'hourly', 'daily', 'weekly', 'monthly', 'yearly'. If omitted, 'daily'
|
20
|
+
# is assumed.
|
21
|
+
attr_accessor :sy_updateperiod
|
22
|
+
|
23
|
+
# Frequency of updates, in relation to sy_updateperiod. Indicates how many
|
24
|
+
# times in each sy_updateperiod the channel is updated. For example,
|
25
|
+
# sy_updateperiod = 'daily' and sy_updatefrequency = 4 means four times
|
26
|
+
# per day.
|
27
|
+
attr_accessor :sy_updatefrequency
|
28
|
+
|
29
|
+
# Base date used to calculate publishing times. When combined with
|
30
|
+
# sy_updateperiod and sy_updatefrequency, the publishing schedule can
|
31
|
+
# be derived. Returned as a DateTime if possible, otherwise as a String.
|
32
|
+
attr_reader :sy_updatebase
|
33
|
+
|
34
|
+
def sy_updatebase=(x)
|
35
|
+
d = DateTime.parse(x)
|
36
|
+
if d
|
37
|
+
@sy_updatebase = d
|
38
|
+
else
|
39
|
+
@sy_updatebase = x
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
#:enddoc:
|
45
|
+
class Channel
|
46
|
+
include Syndication
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright � mathew <meta@pobox.com> 2005-2006.
|
2
|
+
# Licensed under the same terms as Ruby.
|
3
|
+
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
module Syndication
|
7
|
+
|
8
|
+
# TagSoup is a tiny completely non-validating XML parser which implements the
|
9
|
+
# tag_start, tag_end and text methods of the REXML StreamListener interface.
|
10
|
+
#
|
11
|
+
# It's designed for permissive parsing of RSS and Atom feeds; using it for
|
12
|
+
# anything more complex (like HTML with CSS and JavaScript) is not advised.
|
13
|
+
class TagSoup
|
14
|
+
|
15
|
+
# Parse data String and send events to listener
|
16
|
+
def TagSoup.parse_stream(data, listener)
|
17
|
+
data.scan(/(<\/[^>]*>|<[^>]*>|[^<>]*)/m) do |match|
|
18
|
+
thing = match.first.strip
|
19
|
+
if thing[0,1] == '<'
|
20
|
+
# It's a tag_start or tag_end
|
21
|
+
(tag,rest) = thing.match(/<\/?([^>\s]+)([^>]*)/)[1,2]
|
22
|
+
if thing[1,1] == '/'
|
23
|
+
listener.tag_end(tag)
|
24
|
+
else
|
25
|
+
# Parse the attr=val pairs
|
26
|
+
pairs = Hash.new
|
27
|
+
rest.scan(/([\w:]+)=("([^"]*)"|'([^']*)')/) {|a,j,v1,v2|
|
28
|
+
if v1 == nil
|
29
|
+
v = v2
|
30
|
+
else
|
31
|
+
v = v1
|
32
|
+
end
|
33
|
+
if a
|
34
|
+
pairs[a] = v
|
35
|
+
end
|
36
|
+
}
|
37
|
+
listener.tag_start(tag, pairs)
|
38
|
+
# Tags with end tag build in, XML style
|
39
|
+
if thing[-2,1] == '/'
|
40
|
+
listener.tag_end(tag)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
else
|
44
|
+
# It's text
|
45
|
+
listener.text(CGI.unescapeHTML(thing))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
data/rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
|
2
|
+
require 'rake/rdoctask'
|
3
|
+
require 'rake/packagetask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rubygems'
|
7
|
+
|
8
|
+
PKG_VERSION = "0.6.1"
|
9
|
+
|
10
|
+
desc "Create HTML documentation from RDOC"
|
11
|
+
Rake::RDocTask.new do |rd|
|
12
|
+
rd.main = "README"
|
13
|
+
rd.rdoc_files.include("README", "CHANGES", "IMPLEMENTATION", "DEVELOPER",
|
14
|
+
"lib/**/*.rb", "test/**/*.rb", "examples/**/*.rb")
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Make tar distribution"
|
18
|
+
Rake::PackageTask.new('syndication', PKG_VERSION) do |t|
|
19
|
+
t.need_tar_bz2 = true
|
20
|
+
t.package_files.include("README", "CHANGES", "IMPLEMENTATION", "DEVELOPER", "lib/**/*.rb", "test/**/*.rb", "examples/**/*.rb", "rakefile", "setup.rb")
|
21
|
+
t.package_dir = "pkg"
|
22
|
+
end
|
23
|
+
|
24
|
+
spec = Gem::Specification.new do |s|
|
25
|
+
s.name = "syndication"
|
26
|
+
s.version = PKG_VERSION
|
27
|
+
s.author = "mathew"
|
28
|
+
s.email = "meta@pobox.com"
|
29
|
+
s.homepage = "http://www.pobox.com/~meta/"
|
30
|
+
s.platform = Gem::Platform::RUBY
|
31
|
+
s.summary = "A web syndication parser for Atom and RSS with a uniform API"
|
32
|
+
candidates = Dir.glob("{bin,docs,lib,test,examples}/**/*")
|
33
|
+
candidates << "rakefile"
|
34
|
+
s.files = candidates.delete_if do |item|
|
35
|
+
item.include?("CVS") || item.include?("html")
|
36
|
+
end
|
37
|
+
s.require_path = "lib"
|
38
|
+
s.test_files = ["test/atomtest.rb", "test/rsstest.rb", "test/google.rb",
|
39
|
+
"test/tagsouptest.rb", "test/feedburntest.rb"]
|
40
|
+
s.has_rdoc = true
|
41
|
+
s.extra_rdoc_files = ["README", "IMPLEMENTATION", "CHANGES", "DEVELOPER"]
|
42
|
+
end
|
43
|
+
|
44
|
+
desc "Make RubyGems gem distribution"
|
45
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
46
|
+
pkg.need_zip = true
|
47
|
+
pkg.need_tar = true
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "Run unit tests"
|
51
|
+
Rake::TestTask.new("test") { |t|
|
52
|
+
t.pattern = 'test/*.rb'
|
53
|
+
t.verbose = true
|
54
|
+
t.warning = true
|
55
|
+
}
|
56
|
+
|
57
|
+
task :default do
|
58
|
+
puts "This is a pure Ruby library, no compilation is required."
|
59
|
+
puts "Try rake --tasks"
|
60
|
+
end
|
data/test/atomtest.rb
ADDED
@@ -0,0 +1,190 @@
|
|
1
|
+
# Copyright � mathew <meta@pobox.com> 2005.
|
2
|
+
# Licensed under the same terms as Ruby.
|
3
|
+
#
|
4
|
+
# $Header: /var/cvs/syndication/syndication/test/atomtest.rb,v 1.2 2005/10/17 20:06:51 meta Exp $
|
5
|
+
|
6
|
+
require 'syndication/atom'
|
7
|
+
require 'test/unit'
|
8
|
+
|
9
|
+
module Syndication
|
10
|
+
|
11
|
+
# This class contains the unit tests for the Syndication module.
|
12
|
+
class Tests < Test::Unit::TestCase
|
13
|
+
|
14
|
+
# A set of minimal assertions that can be applied to every well-formed parsed
|
15
|
+
# feed.
|
16
|
+
def baseline_atom_assertions(feed)
|
17
|
+
assert_not_nil(feed, 'Parser returned nil')
|
18
|
+
assert_kind_of(Syndication::Atom::Feed, feed)
|
19
|
+
assert_not_nil(feed.title, 'Feed#title was nil')
|
20
|
+
assert_not_nil(feed.id, 'Feed#id was nil')
|
21
|
+
assert_not_nil(feed.updated, 'Feed#updated was nil')
|
22
|
+
assert_kind_of(DateTime, feed.updated)
|
23
|
+
assert(feed.entries.length > 0, 'No entries in feed')
|
24
|
+
for entry in feed.entries
|
25
|
+
assert_not_nil(entry.title, 'Entry#title was nil')
|
26
|
+
assert_not_nil(entry.id, 'Entry#id was nil')
|
27
|
+
assert(entry.links.length > 0, 'No links in entry')
|
28
|
+
assert_not_nil(entry.links[0], 'Entry#links[0] was nil')
|
29
|
+
assert_not_nil(entry.updated, 'Entry#updated was nil')
|
30
|
+
assert_kind_of(DateTime, entry.updated)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Minimal test
|
35
|
+
def test_atom_minimal
|
36
|
+
xml = <<-EOF
|
37
|
+
<?xml version="1.0" encoding="utf-8"?>
|
38
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
39
|
+
<title>One good turn usually gets most of the blanket.</title>
|
40
|
+
<updated>2005-08-20T21:14:38Z</updated>
|
41
|
+
<id>urn:uuid:035d3aa3022c1b1b2a17e37ae2dcc376</id>
|
42
|
+
<entry>
|
43
|
+
<title>Quidquid latine dictum sit, altum viditur.</title>
|
44
|
+
<link href="http://example.com/05/08/20/2114.html"/>
|
45
|
+
<id>urn:uuid:89d96d76a99426264f6f1f520c1b93c2</id>
|
46
|
+
<updated>2005-08-20T21:14:38Z</updated>
|
47
|
+
</entry>
|
48
|
+
</feed>
|
49
|
+
EOF
|
50
|
+
f = Syndication::Atom::Parser.new.parse(xml)
|
51
|
+
baseline_atom_assertions(f)
|
52
|
+
assert(f.title.txt == 'One good turn usually gets most of the blanket.')
|
53
|
+
assert(f.updated.strftime('%F %T') == '2005-08-20 21:14:38')
|
54
|
+
assert(f.entries.length == 1, 'Wrong number of entries in feed')
|
55
|
+
assert(f.id == 'urn:uuid:035d3aa3022c1b1b2a17e37ae2dcc376')
|
56
|
+
e = f.entries[0]
|
57
|
+
assert(e.title.txt == 'Quidquid latine dictum sit, altum viditur.')
|
58
|
+
assert(e.links.length == 1, 'Wrong number of links in entry')
|
59
|
+
l = e.links[0]
|
60
|
+
assert(l.href == 'http://example.com/05/08/20/2114.html')
|
61
|
+
assert(e.id == 'urn:uuid:89d96d76a99426264f6f1f520c1b93c2')
|
62
|
+
assert(e.updated.strftime('%F %T') == '2005-08-20 21:14:38')
|
63
|
+
end
|
64
|
+
|
65
|
+
# Test a well-formed Atom feed with all possible elements
|
66
|
+
def test_atom_wf_full
|
67
|
+
xml = <<-EOF
|
68
|
+
<?xml version="1.0" encoding="utf-8"?>
|
69
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
70
|
+
<title type="text">It is the quality rather than the quantity that matters.</title>
|
71
|
+
<updated>2005-08-20T21:43:44Z</updated>
|
72
|
+
<id>urn:uuid:dc03a676cc5f04b9f0c728592270c8b7</id>
|
73
|
+
<author>
|
74
|
+
<name>mathew</name>
|
75
|
+
<email>meta@pobox.com</email>
|
76
|
+
<uri>http://www.pobox.com/~meta/</uri>
|
77
|
+
</author>
|
78
|
+
<category term="test"/>
|
79
|
+
<category term="Ruby"/>
|
80
|
+
<contributor>
|
81
|
+
<name>Phil Space</name>
|
82
|
+
<email>space@example.com</email>
|
83
|
+
</contributor>
|
84
|
+
<contributor>
|
85
|
+
<name>Anne Example</name>
|
86
|
+
<email>anne@example.com</email>
|
87
|
+
</contributor>
|
88
|
+
<generator uri="http://example.com/ruby/syndication" version="1.0">
|
89
|
+
Ruby Syndication Library
|
90
|
+
</generator>
|
91
|
+
<icon>http://www.example.com/goatseicon.gif</icon>
|
92
|
+
<link rel="self" type="application/ruby" href="file://atom.rb"/>
|
93
|
+
<logo>http://www.example.com/goatse.jpg</logo>
|
94
|
+
<rights>Copyright (c) meta@pobox.com 2005</rights>
|
95
|
+
<subtitle type="xhtml">
|
96
|
+
<div xmlns="http://www.w3.org/1999/xhtml">
|
97
|
+
<p>This is <b>XHTML</b> content.</p>
|
98
|
+
</div>
|
99
|
+
</subtitle>
|
100
|
+
<entry>
|
101
|
+
<title>Cleanliness is next to impossible.</title>
|
102
|
+
<summary type="xhtml">
|
103
|
+
<xhtml:div xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
104
|
+
This is <xhtml:b>XHTML</xhtml:b> content.
|
105
|
+
</xhtml:div>
|
106
|
+
</summary>
|
107
|
+
<link href="http://example.com/05/08/20/2143.html"/>
|
108
|
+
<id>urn:uuid:380b651e97c2e6ecc68eaa66c90939b6</id>
|
109
|
+
<published>1978-03-12T10:22:11Z</published>
|
110
|
+
<updated>2005-08-20T21:43:44Z</updated>
|
111
|
+
<author>
|
112
|
+
<name>Stu Dapples</name>
|
113
|
+
<email>stu@example.com</email>
|
114
|
+
</author>
|
115
|
+
<category term="fortune"/>
|
116
|
+
<category term="aphorism"/>
|
117
|
+
<content type="text">
|
118
|
+
Cleanliness of code is certainly next to impossible if you have to parse
|
119
|
+
Atom feeds with all their features.
|
120
|
+
</content>
|
121
|
+
<contributor>
|
122
|
+
<name>Ben Dover</name>
|
123
|
+
</contributor>
|
124
|
+
<contributor>
|
125
|
+
<name>Eileen Dover</name>
|
126
|
+
</contributor>
|
127
|
+
<rights>This test entry is in the public domain.</rights>
|
128
|
+
</entry>
|
129
|
+
<entry>
|
130
|
+
<title type="html"><b>WE HAVE TACOS</b></title>
|
131
|
+
<link href="http://www.pobox.com/~meta/"/>
|
132
|
+
<id>urn:uuid:13be6c856fac98d9a7fd144b61dee06d</id>
|
133
|
+
<updated>2004-12-23T21:22:23-06:00</updated>
|
134
|
+
<source>
|
135
|
+
<author><name>Rick O'Shea</name></author>
|
136
|
+
<category term="example"/>
|
137
|
+
<contributor><name>Hugh Cares</name></contributor>
|
138
|
+
<generator uri="http://www.pobox.com/~meta/" version="1">
|
139
|
+
Typed in by hand by some poor guy.
|
140
|
+
</generator>
|
141
|
+
<icon>http://www.example.com/icon2.png</icon>
|
142
|
+
<id>urn:uuid:1234decafbad7890deadbeef5678304</id>
|
143
|
+
<link rel="alternate" type="text/html"
|
144
|
+
href="http://www.pobox.com/~meta/"/>
|
145
|
+
<logo>http://www.example.com/logo.svg</logo>
|
146
|
+
<rights>Some rights reserved, some not</rights>
|
147
|
+
<title>More example stuff</title>
|
148
|
+
<subtitle>MAKE IT STOP!</subtitle>
|
149
|
+
<updated>2005-08-20T22:11-05:00</updated>
|
150
|
+
</source>
|
151
|
+
</entry>
|
152
|
+
</feed>
|
153
|
+
EOF
|
154
|
+
f = Syndication::Atom::Parser.new.parse(xml)
|
155
|
+
baseline_atom_assertions(f)
|
156
|
+
assert(f.categories.length == 2)
|
157
|
+
assert(f.contributors.length == 2)
|
158
|
+
assert(f.contributors[0].name == 'Phil Space', "Feed#contributors name didn't match")
|
159
|
+
assert(f.contributors[1].name == 'Anne Example', "Feed#contributors name didn't match")
|
160
|
+
assert(f.categories[0].term = 'test', "Feed#categories didn't match")
|
161
|
+
assert(f.categories[1].term = 'Ruby', "Feed#categories didn't match")
|
162
|
+
assert(f.title.txt == 'It is the quality rather than the quantity that matters.')
|
163
|
+
assert(f.updated == DateTime.parse('2005-08-20 21:43:44Z'), 'Feed#updated incorrectly parsed')
|
164
|
+
assert(f.author.name == 'mathew')
|
165
|
+
assert(f.author.email == 'meta@pobox.com')
|
166
|
+
assert(f.author.uri == 'http://www.pobox.com/~meta/')
|
167
|
+
assert(f.generator == 'Ruby Syndication Library')
|
168
|
+
assert(f.icon == 'http://www.example.com/goatseicon.gif')
|
169
|
+
assert(f.links.length == 1)
|
170
|
+
assert(f.links[0].rel == 'self')
|
171
|
+
assert(f.links[0].href == 'file://atom.rb')
|
172
|
+
assert(f.links[0].type == 'application/ruby')
|
173
|
+
assert(f.logo == 'http://www.example.com/goatse.jpg')
|
174
|
+
assert(f.rights == 'Copyright (c) meta@pobox.com 2005')
|
175
|
+
assert(f.subtitle.xhtml == '<p>This is <b>XHTML</b> content.</p>')
|
176
|
+
assert(f.entries.length == 2)
|
177
|
+
e1 = f.entries[0]
|
178
|
+
assert(e1.summary.xhtml == 'This is <b>XHTML</b> content.')
|
179
|
+
assert(e1.categories.length == 2)
|
180
|
+
assert(e1.categories[0].term == 'fortune')
|
181
|
+
assert(e1.categories[1].term == 'aphorism')
|
182
|
+
e2 = f.entries[1]
|
183
|
+
assert(e2.title.html == '<b>WE HAVE TACOS</b>')
|
184
|
+
s = e2.source
|
185
|
+
assert(s.kind_of?(Syndication::Atom::Feed))
|
186
|
+
assert(s.title.txt == 'More example stuff')
|
187
|
+
assert(s.updated == DateTime.parse('2005-08-20 22:11:00-0500'))
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|