atom-tools 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/COPYING +18 -0
  2. data/README +103 -0
  3. data/Rakefile +77 -0
  4. data/bin/atom-client.rb +246 -0
  5. data/bin/atom-server.rb~ +71 -0
  6. data/doc/classes/Atom/App.html +217 -0
  7. data/doc/classes/Atom/Author.html +130 -0
  8. data/doc/classes/Atom/Category.html +128 -0
  9. data/doc/classes/Atom/Collection.html +322 -0
  10. data/doc/classes/Atom/Content.html +129 -0
  11. data/doc/classes/Atom/Contributor.html +119 -0
  12. data/doc/classes/Atom/Element.html +325 -0
  13. data/doc/classes/Atom/Entry.html +365 -0
  14. data/doc/classes/Atom/Feed.html +585 -0
  15. data/doc/classes/Atom/HTTP.html +374 -0
  16. data/doc/classes/Atom/Link.html +137 -0
  17. data/doc/classes/Atom/Text.html +229 -0
  18. data/doc/classes/XHTML.html +118 -0
  19. data/doc/created.rid +1 -0
  20. data/doc/files/README.html +213 -0
  21. data/doc/files/lib/atom/app_rb.html +110 -0
  22. data/doc/files/lib/atom/collection_rb.html +110 -0
  23. data/doc/files/lib/atom/element_rb.html +109 -0
  24. data/doc/files/lib/atom/entry_rb.html +111 -0
  25. data/doc/files/lib/atom/feed_rb.html +112 -0
  26. data/doc/files/lib/atom/http_rb.html +109 -0
  27. data/doc/files/lib/atom/text_rb.html +108 -0
  28. data/doc/files/lib/atom/xml_rb.html +110 -0
  29. data/doc/files/lib/atom/yaml_rb.html +109 -0
  30. data/doc/fr_class_index.html +39 -0
  31. data/doc/fr_file_index.html +36 -0
  32. data/doc/fr_method_index.html +62 -0
  33. data/doc/index.html +24 -0
  34. data/doc/rdoc-style.css +208 -0
  35. data/lib/atom/app.rb +87 -0
  36. data/lib/atom/collection.rb +75 -0
  37. data/lib/atom/element.rb +277 -0
  38. data/lib/atom/entry.rb +135 -0
  39. data/lib/atom/feed.rb +229 -0
  40. data/lib/atom/http.rb +132 -0
  41. data/lib/atom/text.rb +163 -0
  42. data/lib/atom/xml.rb +200 -0
  43. data/lib/atom/yaml.rb +101 -0
  44. data/setup.rb +1585 -0
  45. data/test/conformance/order.rb +117 -0
  46. data/test/conformance/title.rb +108 -0
  47. data/test/conformance/updated.rb +33 -0
  48. data/test/conformance/xhtmlcontentdiv.rb +18 -0
  49. data/test/conformance/xmlnamespace.rb +54 -0
  50. data/test/runtests.rb +14 -0
  51. data/test/test_constructs.rb +91 -0
  52. data/test/test_feed.rb +128 -0
  53. data/test/test_general.rb +99 -0
  54. data/test/test_http.rb +86 -0
  55. data/test/test_protocol.rb +69 -0
  56. data/test/test_xml.rb +353 -0
  57. metadata +107 -0
@@ -0,0 +1,277 @@
1
+ require "time"
2
+ require "rexml/element"
3
+
4
+ module Atom # :nodoc:
5
+ class Time < ::Time # :nodoc:
6
+ def self.new date
7
+ return if date.nil?
8
+
9
+ date = if date.respond_to?(:iso8601)
10
+ date
11
+ else
12
+ Time.parse date.to_s
13
+ end
14
+
15
+ def date.to_s
16
+ iso8601
17
+ end
18
+
19
+ date
20
+ end
21
+ end
22
+
23
+ # ignore the man behind the curtain.
24
+ def self.Multiple klass
25
+ Class.new(Array) do
26
+ @class = klass
27
+
28
+ def new
29
+ item = self.class.holds.new
30
+ self << item
31
+
32
+ item
33
+ end
34
+
35
+ def to_element
36
+ collect do |item| item.to_element end
37
+ end
38
+
39
+ def self.holds; @class end
40
+ def self.single?; true end
41
+ def taguri; end
42
+ end
43
+ end
44
+
45
+ class Element < Hash
46
+ # a REXML::Element that shares this element's extension attributes
47
+ # and child elements
48
+ attr_reader :extensions
49
+
50
+ # this element's xml:base
51
+ attr_accessor :base
52
+
53
+ # The following is a DSL for describing an atom element.
54
+
55
+ # this element's attributes
56
+ def self.attrs # :nodoc:
57
+ @attrs || []
58
+ end
59
+
60
+ # this element's child elements
61
+ def self.elements # :nodoc:
62
+ @elements || []
63
+ end
64
+
65
+ # required child elements
66
+ def self.required # :nodoc:
67
+ @elements.find { |name,kind,req| req }
68
+ end
69
+
70
+ # copy defined elements and attributes so inheritance works
71
+ def self.inherited klass # :nodoc:
72
+ elements.each do |name, kind, req|
73
+ klass.element name, kind, req
74
+ end
75
+ attrs.each do |name, req|
76
+ klass.attrb name, req
77
+ end
78
+ end
79
+
80
+ # define a child element
81
+ def self.element(name, kind, req = false) # :nodoc:
82
+ attr_reader name
83
+
84
+ @elements ||= []
85
+ @elements << [name, kind, req]
86
+
87
+ unless kind.respond_to? :single?
88
+ self.define_accessor(name,kind)
89
+ end
90
+ end
91
+
92
+ # define an attribute
93
+ def self.attrb(name, req = false) # :nodoc:
94
+ @attrs ||= []
95
+
96
+ @attrs << [name, req]
97
+ end
98
+
99
+ # a little bit of magic
100
+ def self.define_accessor(name,kind) # :nodoc:
101
+ define_method "#{name}=".to_sym do |value|
102
+ return unless value
103
+
104
+ i = if kind.ancestors.member? Atom::Element
105
+ kind.new(value, name.to_s)
106
+ else
107
+ kind.new(value)
108
+ end
109
+
110
+ set(name, i)
111
+ end
112
+ end
113
+
114
+ # get the value of an attribute
115
+ def [] key
116
+ test_key key
117
+
118
+ super
119
+ end
120
+
121
+ # set the value of an attribute
122
+ def []= key, value
123
+ test_key key
124
+
125
+ super
126
+ end
127
+
128
+ # internal junk you probably don't care about
129
+ def initialize name = nil # :nodoc:
130
+ @extensions = REXML::Element.new("extensions")
131
+ @local_name = name
132
+
133
+ self.class.elements.each do |name,kind,req|
134
+ if kind.respond_to? :single?
135
+ a = kind.new
136
+ set(name, kind.new)
137
+ end
138
+ end
139
+ end
140
+
141
+ # eg. "feed" or "entry" or "updated" or "title" or ...
142
+ def local_name # :nodoc:
143
+ @local_name || self.class.name.split("::").last.downcase
144
+ end
145
+
146
+ # convert to a REXML::Element (with no namespace)
147
+ def to_element
148
+ elem = REXML::Element.new(local_name)
149
+
150
+ self.class.elements.each do |name,kind,req|
151
+ v = get(name)
152
+ next if v.nil?
153
+
154
+ if v.respond_to? :to_element
155
+ e = v.to_element
156
+ e = [ e ] unless e.is_a? Array
157
+
158
+ e.each do |bit|
159
+ elem << bit
160
+ end
161
+ else
162
+ e = REXML::Element.new(name.to_s, elem).text = get(name)
163
+ end
164
+ end
165
+
166
+ self.class.attrs.each do |name,req|
167
+ value = self[name.to_s]
168
+ elem.attributes[name.to_s] = value if value
169
+ end
170
+
171
+ self.extensions.children.each do |element|
172
+ elem << element.dup # otherwise they get removed from @extensions
173
+ end
174
+
175
+ if self.base and not self.base.empty?
176
+ elem.attributes["xml:base"] = self.base
177
+ end
178
+
179
+ elem
180
+ end
181
+
182
+ # convert to a REXML::Document (properly namespaced)
183
+ def to_xml
184
+ doc = REXML::Document.new
185
+ root = to_element
186
+ root.add_namespace Atom::NS
187
+ doc << root
188
+ doc
189
+ end
190
+
191
+ # convert to an XML string
192
+ def to_s
193
+ to_xml.to_s
194
+ end
195
+
196
+ private
197
+
198
+ # like +valid_key?+ but raises on failure
199
+ def test_key key
200
+ unless valid_key? key
201
+ raise RuntimeError, "this element (#{local_name}) doesn't have that attribute '#{key}'"
202
+ end
203
+ end
204
+
205
+ # tests that an attribute 'key' has been defined
206
+ def valid_key? key
207
+ self.class.attrs.find { |name,req| name.to_s == key }
208
+ end
209
+
210
+ def get name
211
+ instance_variable_get "@#{name}"
212
+ end
213
+
214
+ def set name, value
215
+ instance_variable_set "@#{name}", value
216
+ end
217
+ end
218
+
219
+ # this facilitates YAML output
220
+ class AttrEl < Atom::Element # :nodoc:
221
+ end
222
+
223
+ # A link has the following attributes:
224
+ #
225
+ # href (required):: the link's IRI
226
+ # rel:: the relationship of the linked item to the current item
227
+ # type:: a hint about the media type of the linked item
228
+ # hreflang:: the language of the linked item (RFC3066)
229
+ # title:: human-readable information about the link
230
+ # length:: a hint about the length (in octets) of the linked item
231
+ class Link < Atom::AttrEl
232
+ attrb :href, true
233
+ attrb :rel
234
+ attrb :type
235
+ attrb :hreflang
236
+ attrb :title
237
+ attrb :length
238
+
239
+ def initialize name = nil # :nodoc:
240
+ super name
241
+
242
+ # just setting a default
243
+ self["rel"] = "alternate"
244
+ end
245
+ end
246
+
247
+ # A category has the following attributes:
248
+ #
249
+ # term (required):: a string that identifies the category
250
+ # scheme:: an IRI that identifies a categorization scheme
251
+ # label:: a human-readable label
252
+ class Category < Atom::AttrEl
253
+ attrb :term, true
254
+ attrb :scheme
255
+ attrb :label
256
+ end
257
+
258
+ # A person construct has the following child elements:
259
+ #
260
+ # name (required):: a human-readable name
261
+ # uri:: an IRI associated with the person
262
+ # email:: an email address associated with the person
263
+ class Author < Atom::Element
264
+ element :name, String, true
265
+ element :uri, String
266
+ element :email, String
267
+ end
268
+
269
+ # same as Atom::Author
270
+ class Contributor < Atom::Element
271
+ # Author and Contributor should probably inherit from Person, but
272
+ # oh well.
273
+ element :name, String, true
274
+ element :uri, String
275
+ element :email, String
276
+ end
277
+ end
data/lib/atom/entry.rb ADDED
@@ -0,0 +1,135 @@
1
+ require "rexml/document"
2
+
3
+ require "atom/element"
4
+ require "atom/text"
5
+
6
+ module Atom
7
+ NS = "http://www.w3.org/2005/Atom"
8
+
9
+ # An individual entry in a feed. As an Atom::Element, it can be
10
+ # manipulated using accessors for each of its child elements. You
11
+ # should be able to set them using an instance of any class that
12
+ # makes sense
13
+ #
14
+ # Entries have the following children:
15
+ #
16
+ # id:: a universally unique IRI which permanently identifies the entry
17
+ # title:: a human-readable title (Atom::Text)
18
+ # content:: contains or links to the content of an entry (Atom::Content)
19
+ # rights:: information about rights held in and over an entry (Atom::Text)
20
+ # source:: the source feed's metadata (unimplemented)
21
+ # published:: a Time "early in the life cycle of an entry"
22
+ # updated:: the most recent Time an entry was modified in a way the publisher considers significant
23
+ # summary:: a summary, abstract or excerpt of an entry (Atom::Text)
24
+ #
25
+ # There are also +categories+, +links+, +authors+ and +contributors+,
26
+ # each of which is an Array of its respective type and can be used
27
+ # thusly:
28
+ #
29
+ # author = entry.authors.new
30
+ # author.name = "Captain Kangaroo"
31
+ class Entry < Atom::Element
32
+ # the master list of standard children and the types they map to
33
+ element :id, String, true
34
+ element :title, Atom::Text, true
35
+ element :content, Atom::Content, true
36
+
37
+ element :rights, Atom::Text
38
+ # element :source, Atom::Feed # complicated, eg. serialization
39
+
40
+ element :authors, Atom::Multiple(Atom::Author)
41
+ element :contributors, Atom::Multiple(Atom::Contributor)
42
+
43
+ element :categories, Atom::Multiple(Atom::Category)
44
+ element :links, Atom::Multiple(Atom::Link)
45
+
46
+ element :published, Atom::Time
47
+ element :updated, Atom::Time, true
48
+
49
+ element :summary, Atom::Text
50
+
51
+ def initialize # :nodoc:
52
+ super "entry"
53
+
54
+ # XXX I don't think I've ever actually used this
55
+ yield self if block_given?
56
+ end
57
+
58
+ # parses XML fetched from +base+ into an Atom::Entry
59
+ def self.parse xml, base = ""
60
+ if xml.respond_to? :to_atom_entry
61
+ xml.to_atom_entry(base)
62
+ else
63
+ REXML::Document.new(xml.to_s).to_atom_entry(base)
64
+ end
65
+ end
66
+
67
+ def inspect # :nodoc:
68
+ "#<Atom::Entry id:'#{self.id}'>"
69
+ end
70
+
71
+ # declare that this entry has updated
72
+ def updated!
73
+ self.updated = Time.now
74
+ end
75
+
76
+ # categorize the entry based on a space-separated string
77
+ def tag_with string
78
+ return if string.nil?
79
+
80
+ string.split.each do |tag|
81
+ categories.new["term"] = tag
82
+ end
83
+ end
84
+
85
+ # XXX this needs a test suite before it can be trusted.
86
+ =begin
87
+ # tests the entry's validity
88
+ def valid?
89
+ self.class.required.each do |element|
90
+ unless instance_variable_get "@#{element}"
91
+ return [ false, "required element atom:#{element} missing" ]
92
+ end
93
+ end
94
+
95
+ if @authors.length == 0
96
+ return [ false, "required element atom:author missing" ]
97
+ end
98
+
99
+ alternates = @links.find_all do |link|
100
+ link["rel"] == "alternate"
101
+ end
102
+
103
+ unless @content or alternates
104
+ return [ false, "no atom:content or atom:link[rel='alternate']" ]
105
+ end
106
+
107
+ alternates.each do |link|
108
+ if alternates.find do |x|
109
+ not x == link and
110
+ x["type"] == link["type"] and
111
+ x["hreflang"] == link["hreflang"]
112
+ end
113
+
114
+ return [ false, 'more than one atom:link with a rel attribute value of "alternate" that has the same combination of type and hreflang attribute values.' ]
115
+ end
116
+ end
117
+
118
+ type = @content["type"]
119
+
120
+ base64ed = (not ["", "text", "html", "xhtml"].member? type) and
121
+ type.match(/^text\/.*/).nil? and # not text
122
+ type.match(/.*[\+\/]xml$/).nil? # not XML
123
+
124
+ if (@content["src"] or base64ed) and not summary
125
+ return [ false, "out-of-line or base64ed atom:content and no atom:summary" ]
126
+ end
127
+
128
+ true
129
+ end
130
+ =end
131
+ end
132
+ end
133
+
134
+ # this is here solely so that you don't have to require it
135
+ require "atom/xml"
data/lib/atom/feed.rb ADDED
@@ -0,0 +1,229 @@
1
+ require "atom/element"
2
+ require "atom/text"
3
+ require "atom/entry"
4
+
5
+ require "atom/http"
6
+
7
+ module Atom
8
+ class HTTPException < RuntimeError # :nodoc:
9
+ end
10
+ class FeedGone < RuntimeError # :nodoc:
11
+ end
12
+
13
+ # A feed of entries. As an Atom::Element, it can be manipulated using
14
+ # accessors for each of its child elements. You can set them with any
15
+ # object that makes sense; they will be returned in the types listed.
16
+ #
17
+ # Feeds have the following children:
18
+ #
19
+ # id:: a universally unique IRI which permanently identifies the feed
20
+ # title:: a human-readable title (Atom::Text)
21
+ # subtitle:: a human-readable description or subtitle (Atom::Text)
22
+ # updated:: the most recent Time the feed was modified in a way the publisher considers significant
23
+ # generator:: the agent used to generate a feed
24
+ # icon:: an IRI identifying an icon which visually identifies a feed (1:1 aspect ratio, looks OK small)
25
+ # logo:: an IRI identifying an image which visually identifies a feed (2:1 aspect ratio)
26
+ # rights:: rights held in and over a feed (Atom::Text)
27
+ #
28
+ # There are also +links+, +categories+, +authors+, +contributors+
29
+ # and +entries+, each of which is an Array of its respective type and
30
+ # can be used thusly:
31
+ #
32
+ # entry = feed.entries.new
33
+ # entry.title = "blah blah blah"
34
+ class Feed < Atom::Element
35
+ attr_reader :uri
36
+
37
+ # the Atom::Feed pointed to by link[@rel='previous']
38
+ attr_reader :prev
39
+ # the Atom::Feed pointed to by link[@rel='next']
40
+ attr_reader :next
41
+
42
+ # conditional get information from the last fetch
43
+ attr_reader :etag, :last_modified
44
+
45
+ element :id, String, true
46
+ element :title, Atom::Text, true
47
+ element :subtitle, Atom::Text
48
+
49
+ element :updated, Atom::Time, true
50
+
51
+ element :links, Atom::Multiple(Atom::Link)
52
+ element :categories, Atom::Multiple(Atom::Category)
53
+
54
+ element :authors, Atom::Multiple(Atom::Author)
55
+ element :contributors, Atom::Multiple(Atom::Contributor)
56
+
57
+ element :generator, String # XXX with uri and version attributes!
58
+ element :icon, String
59
+ element :logo, String
60
+
61
+ element :rights, Atom::Text
62
+
63
+ element :entries, Atom::Multiple(Atom::Entry)
64
+
65
+ include Enumerable
66
+
67
+ def inspect # :nodoc:
68
+ "<#{@uri} entries: #{entries.length} title='#{title}'>"
69
+ end
70
+
71
+ # parses XML fetched from +base+ into an Atom::Feed
72
+ def self.parse xml, base = ""
73
+ if xml.respond_to? :to_atom_entry
74
+ xml.to_atom_feed(base)
75
+ else
76
+ REXML::Document.new(xml.to_s).to_atom_feed(base)
77
+ end
78
+ end
79
+
80
+ # Create a new Feed that can be found at feed_uri and retrieved
81
+ # using an Atom::HTTP object http
82
+ def initialize feed_uri = nil, http = Atom::HTTP.new
83
+ @entries = []
84
+ @http = http
85
+
86
+ if feed_uri
87
+ @uri = feed_uri.to_uri
88
+ self.base = feed_uri
89
+ end
90
+
91
+ super "feed"
92
+ end
93
+
94
+ # iterates over a feed's entries
95
+ def each &block
96
+ @entries.each &block
97
+ end
98
+
99
+ # gets everything in the logical feed (could be a lot of stuff)
100
+ # (see <http://www.ietf.org/internet-drafts/draft-nottingham-atompub-feed-history-05.txt>)
101
+ def get_everything!
102
+ self.update!
103
+
104
+ prev = @prev
105
+ while prev
106
+ prev.update!
107
+
108
+ self.merge_entries! prev
109
+ prev = prev.prev
110
+ end
111
+
112
+ nxt = @next
113
+ while nxt
114
+ nxt.update!
115
+
116
+ self.merge_entries! nxt
117
+ nxt = nxt.next
118
+ end
119
+
120
+ self
121
+ end
122
+
123
+ # merges the entries from another feed into this one
124
+ def merge_entries! other_feed
125
+ other_feed.each do |entry|
126
+ # TODO: add atom:source elements
127
+ self << entry
128
+ end
129
+ end
130
+
131
+ # like #merge, but in place
132
+ def merge! other_feed
133
+ [:id, :title, :subtitle, :updated, :rights].each { |p|
134
+ self.send("#{p}=", other_feed.send("#{p}"))
135
+ }
136
+
137
+ [:links, :categories, :authors, :contributors].each do |p|
138
+ other_feed.send("#{p}").each do |e|
139
+ self.send("#{p}") << e
140
+ end
141
+ end
142
+
143
+ merge_entries! other_feed
144
+ end
145
+
146
+ # merges "important" properties of this feed with another one,
147
+ # returning a new feed
148
+ def merge other_feed
149
+ feed = self.clone
150
+
151
+ feed.merge! other_feed
152
+
153
+ feed
154
+ end
155
+
156
+ # fetches this feed's URL, parses the result and #merge!s
157
+ # changes, new entries, &c.
158
+ def update!
159
+ raise(RuntimeError, "can't fetch without a uri.") unless @uri
160
+
161
+ headers = {}
162
+ headers["If-None-Match"] = @etag if @etag
163
+ headers["If-Modified-Since"] = @last_modified if @last_modified
164
+
165
+ res = @http.get(@uri, headers)
166
+
167
+ if res.code == "304"
168
+ # we're already all up to date
169
+ return self
170
+ elsif res.code == "410"
171
+ raise Atom::FeedGone, "410 Gone (#{@uri})"
172
+ elsif res.code != "200"
173
+ raise Atom::HTTPException, "Unexpected HTTP response code: #{res.code}"
174
+ end
175
+
176
+ unless res.content_type.match(/^application\/atom\+xml/)
177
+ raise Atom::HTTPException, "Unexpected HTTP response Content-Type: #{res.content_type} (wanted application/atom+xml)"
178
+ end
179
+
180
+ @etag = res["Etag"] if res["Etag"]
181
+ @last_modified = res["Last-Modified"] if res["Last-Modified"]
182
+
183
+ xml = res.body
184
+
185
+ coll = REXML::Document.new(xml)
186
+
187
+ update_time = Time.parse(REXML::XPath.first(coll, "/atom:feed/atom:updated", { "atom" => Atom::NS } ).text)
188
+
189
+ # the feed hasn't been updated, don't bother
190
+ if self.updated and self.updated >= update_time
191
+ return self
192
+ end
193
+
194
+ coll = Atom::Feed.parse(coll, self.base.to_s)
195
+ merge! coll
196
+
197
+ link = coll.links.find { |l| l["rel"] = "next" and l["type"] == "application/atom+xml" }
198
+ if link
199
+ abs_uri = @uri + link["href"]
200
+ @next = Feed.new(abs_uri.to_s, @http)
201
+ end
202
+
203
+ link = coll.links.find { |l| l["rel"] = "previous" and l["type"] == "application/atom+xml" }
204
+ if link
205
+ abs_uri = @uri + link["href"]
206
+ @prev = Feed.new(abs_uri.to_s, @http)
207
+ end
208
+
209
+ self
210
+ end
211
+
212
+ # adds an entry to this feed. if this feed already contains an
213
+ # entry with the same id, the newest one is used.
214
+ def << entry
215
+ existing = entries.find do |e|
216
+ e.id == entry.id
217
+ end
218
+
219
+ if not existing
220
+ @entries << entry
221
+ elsif not existing.updated or (existing.updated and entry.updated and entry.updated >= existing.updated)
222
+ @entries[@entries.index(existing)] = entry
223
+ end
224
+ end
225
+ end
226
+ end
227
+
228
+ # this is here solely so you don't have to require it
229
+ require "atom/xml"