atom-tools 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/COPYING +18 -0
  2. data/README +103 -0
  3. data/Rakefile +77 -0
  4. data/bin/atom-client.rb +246 -0
  5. data/bin/atom-server.rb~ +71 -0
  6. data/doc/classes/Atom/App.html +217 -0
  7. data/doc/classes/Atom/Author.html +130 -0
  8. data/doc/classes/Atom/Category.html +128 -0
  9. data/doc/classes/Atom/Collection.html +322 -0
  10. data/doc/classes/Atom/Content.html +129 -0
  11. data/doc/classes/Atom/Contributor.html +119 -0
  12. data/doc/classes/Atom/Element.html +325 -0
  13. data/doc/classes/Atom/Entry.html +365 -0
  14. data/doc/classes/Atom/Feed.html +585 -0
  15. data/doc/classes/Atom/HTTP.html +374 -0
  16. data/doc/classes/Atom/Link.html +137 -0
  17. data/doc/classes/Atom/Text.html +229 -0
  18. data/doc/classes/XHTML.html +118 -0
  19. data/doc/created.rid +1 -0
  20. data/doc/files/README.html +213 -0
  21. data/doc/files/lib/atom/app_rb.html +110 -0
  22. data/doc/files/lib/atom/collection_rb.html +110 -0
  23. data/doc/files/lib/atom/element_rb.html +109 -0
  24. data/doc/files/lib/atom/entry_rb.html +111 -0
  25. data/doc/files/lib/atom/feed_rb.html +112 -0
  26. data/doc/files/lib/atom/http_rb.html +109 -0
  27. data/doc/files/lib/atom/text_rb.html +108 -0
  28. data/doc/files/lib/atom/xml_rb.html +110 -0
  29. data/doc/files/lib/atom/yaml_rb.html +109 -0
  30. data/doc/fr_class_index.html +39 -0
  31. data/doc/fr_file_index.html +36 -0
  32. data/doc/fr_method_index.html +62 -0
  33. data/doc/index.html +24 -0
  34. data/doc/rdoc-style.css +208 -0
  35. data/lib/atom/app.rb +87 -0
  36. data/lib/atom/collection.rb +75 -0
  37. data/lib/atom/element.rb +277 -0
  38. data/lib/atom/entry.rb +135 -0
  39. data/lib/atom/feed.rb +229 -0
  40. data/lib/atom/http.rb +132 -0
  41. data/lib/atom/text.rb +163 -0
  42. data/lib/atom/xml.rb +200 -0
  43. data/lib/atom/yaml.rb +101 -0
  44. data/setup.rb +1585 -0
  45. data/test/conformance/order.rb +117 -0
  46. data/test/conformance/title.rb +108 -0
  47. data/test/conformance/updated.rb +33 -0
  48. data/test/conformance/xhtmlcontentdiv.rb +18 -0
  49. data/test/conformance/xmlnamespace.rb +54 -0
  50. data/test/runtests.rb +14 -0
  51. data/test/test_constructs.rb +91 -0
  52. data/test/test_feed.rb +128 -0
  53. data/test/test_general.rb +99 -0
  54. data/test/test_http.rb +86 -0
  55. data/test/test_protocol.rb +69 -0
  56. data/test/test_xml.rb +353 -0
  57. metadata +107 -0
@@ -0,0 +1,277 @@
1
+ require "time"
2
+ require "rexml/element"
3
+
4
+ module Atom # :nodoc:
5
+ class Time < ::Time # :nodoc:
6
+ def self.new date
7
+ return if date.nil?
8
+
9
+ date = if date.respond_to?(:iso8601)
10
+ date
11
+ else
12
+ Time.parse date.to_s
13
+ end
14
+
15
+ def date.to_s
16
+ iso8601
17
+ end
18
+
19
+ date
20
+ end
21
+ end
22
+
23
+ # ignore the man behind the curtain.
24
+ def self.Multiple klass
25
+ Class.new(Array) do
26
+ @class = klass
27
+
28
+ def new
29
+ item = self.class.holds.new
30
+ self << item
31
+
32
+ item
33
+ end
34
+
35
+ def to_element
36
+ collect do |item| item.to_element end
37
+ end
38
+
39
+ def self.holds; @class end
40
+ def self.single?; true end
41
+ def taguri; end
42
+ end
43
+ end
44
+
45
+ class Element < Hash
46
+ # a REXML::Element that shares this element's extension attributes
47
+ # and child elements
48
+ attr_reader :extensions
49
+
50
+ # this element's xml:base
51
+ attr_accessor :base
52
+
53
+ # The following is a DSL for describing an atom element.
54
+
55
+ # this element's attributes
56
+ def self.attrs # :nodoc:
57
+ @attrs || []
58
+ end
59
+
60
+ # this element's child elements
61
+ def self.elements # :nodoc:
62
+ @elements || []
63
+ end
64
+
65
+ # required child elements
66
+ def self.required # :nodoc:
67
+ @elements.find { |name,kind,req| req }
68
+ end
69
+
70
+ # copy defined elements and attributes so inheritance works
71
+ def self.inherited klass # :nodoc:
72
+ elements.each do |name, kind, req|
73
+ klass.element name, kind, req
74
+ end
75
+ attrs.each do |name, req|
76
+ klass.attrb name, req
77
+ end
78
+ end
79
+
80
+ # define a child element
81
+ def self.element(name, kind, req = false) # :nodoc:
82
+ attr_reader name
83
+
84
+ @elements ||= []
85
+ @elements << [name, kind, req]
86
+
87
+ unless kind.respond_to? :single?
88
+ self.define_accessor(name,kind)
89
+ end
90
+ end
91
+
92
+ # define an attribute
93
+ def self.attrb(name, req = false) # :nodoc:
94
+ @attrs ||= []
95
+
96
+ @attrs << [name, req]
97
+ end
98
+
99
+ # a little bit of magic
100
+ def self.define_accessor(name,kind) # :nodoc:
101
+ define_method "#{name}=".to_sym do |value|
102
+ return unless value
103
+
104
+ i = if kind.ancestors.member? Atom::Element
105
+ kind.new(value, name.to_s)
106
+ else
107
+ kind.new(value)
108
+ end
109
+
110
+ set(name, i)
111
+ end
112
+ end
113
+
114
+ # get the value of an attribute
115
+ def [] key
116
+ test_key key
117
+
118
+ super
119
+ end
120
+
121
+ # set the value of an attribute
122
+ def []= key, value
123
+ test_key key
124
+
125
+ super
126
+ end
127
+
128
+ # internal junk you probably don't care about
129
+ def initialize name = nil # :nodoc:
130
+ @extensions = REXML::Element.new("extensions")
131
+ @local_name = name
132
+
133
+ self.class.elements.each do |name,kind,req|
134
+ if kind.respond_to? :single?
135
+ a = kind.new
136
+ set(name, kind.new)
137
+ end
138
+ end
139
+ end
140
+
141
+ # eg. "feed" or "entry" or "updated" or "title" or ...
142
+ def local_name # :nodoc:
143
+ @local_name || self.class.name.split("::").last.downcase
144
+ end
145
+
146
+ # convert to a REXML::Element (with no namespace)
147
+ def to_element
148
+ elem = REXML::Element.new(local_name)
149
+
150
+ self.class.elements.each do |name,kind,req|
151
+ v = get(name)
152
+ next if v.nil?
153
+
154
+ if v.respond_to? :to_element
155
+ e = v.to_element
156
+ e = [ e ] unless e.is_a? Array
157
+
158
+ e.each do |bit|
159
+ elem << bit
160
+ end
161
+ else
162
+ e = REXML::Element.new(name.to_s, elem).text = get(name)
163
+ end
164
+ end
165
+
166
+ self.class.attrs.each do |name,req|
167
+ value = self[name.to_s]
168
+ elem.attributes[name.to_s] = value if value
169
+ end
170
+
171
+ self.extensions.children.each do |element|
172
+ elem << element.dup # otherwise they get removed from @extensions
173
+ end
174
+
175
+ if self.base and not self.base.empty?
176
+ elem.attributes["xml:base"] = self.base
177
+ end
178
+
179
+ elem
180
+ end
181
+
182
+ # convert to a REXML::Document (properly namespaced)
183
+ def to_xml
184
+ doc = REXML::Document.new
185
+ root = to_element
186
+ root.add_namespace Atom::NS
187
+ doc << root
188
+ doc
189
+ end
190
+
191
+ # convert to an XML string
192
+ def to_s
193
+ to_xml.to_s
194
+ end
195
+
196
+ private
197
+
198
+ # like +valid_key?+ but raises on failure
199
+ def test_key key
200
+ unless valid_key? key
201
+ raise RuntimeError, "this element (#{local_name}) doesn't have that attribute '#{key}'"
202
+ end
203
+ end
204
+
205
+ # tests that an attribute 'key' has been defined
206
+ def valid_key? key
207
+ self.class.attrs.find { |name,req| name.to_s == key }
208
+ end
209
+
210
+ def get name
211
+ instance_variable_get "@#{name}"
212
+ end
213
+
214
+ def set name, value
215
+ instance_variable_set "@#{name}", value
216
+ end
217
+ end
218
+
219
+ # this facilitates YAML output
220
+ class AttrEl < Atom::Element # :nodoc:
221
+ end
222
+
223
+ # A link has the following attributes:
224
+ #
225
+ # href (required):: the link's IRI
226
+ # rel:: the relationship of the linked item to the current item
227
+ # type:: a hint about the media type of the linked item
228
+ # hreflang:: the language of the linked item (RFC3066)
229
+ # title:: human-readable information about the link
230
+ # length:: a hint about the length (in octets) of the linked item
231
+ class Link < Atom::AttrEl
232
+ attrb :href, true
233
+ attrb :rel
234
+ attrb :type
235
+ attrb :hreflang
236
+ attrb :title
237
+ attrb :length
238
+
239
+ def initialize name = nil # :nodoc:
240
+ super name
241
+
242
+ # just setting a default
243
+ self["rel"] = "alternate"
244
+ end
245
+ end
246
+
247
+ # A category has the following attributes:
248
+ #
249
+ # term (required):: a string that identifies the category
250
+ # scheme:: an IRI that identifies a categorization scheme
251
+ # label:: a human-readable label
252
+ class Category < Atom::AttrEl
253
+ attrb :term, true
254
+ attrb :scheme
255
+ attrb :label
256
+ end
257
+
258
+ # A person construct has the following child elements:
259
+ #
260
+ # name (required):: a human-readable name
261
+ # uri:: an IRI associated with the person
262
+ # email:: an email address associated with the person
263
+ class Author < Atom::Element
264
+ element :name, String, true
265
+ element :uri, String
266
+ element :email, String
267
+ end
268
+
269
+ # same as Atom::Author
270
+ class Contributor < Atom::Element
271
+ # Author and Contributor should probably inherit from Person, but
272
+ # oh well.
273
+ element :name, String, true
274
+ element :uri, String
275
+ element :email, String
276
+ end
277
+ end
data/lib/atom/entry.rb ADDED
@@ -0,0 +1,135 @@
1
+ require "rexml/document"
2
+
3
+ require "atom/element"
4
+ require "atom/text"
5
+
6
+ module Atom
7
+ NS = "http://www.w3.org/2005/Atom"
8
+
9
+ # An individual entry in a feed. As an Atom::Element, it can be
10
+ # manipulated using accessors for each of its child elements. You
11
+ # should be able to set them using an instance of any class that
12
+ # makes sense
13
+ #
14
+ # Entries have the following children:
15
+ #
16
+ # id:: a universally unique IRI which permanently identifies the entry
17
+ # title:: a human-readable title (Atom::Text)
18
+ # content:: contains or links to the content of an entry (Atom::Content)
19
+ # rights:: information about rights held in and over an entry (Atom::Text)
20
+ # source:: the source feed's metadata (unimplemented)
21
+ # published:: a Time "early in the life cycle of an entry"
22
+ # updated:: the most recent Time an entry was modified in a way the publisher considers significant
23
+ # summary:: a summary, abstract or excerpt of an entry (Atom::Text)
24
+ #
25
+ # There are also +categories+, +links+, +authors+ and +contributors+,
26
+ # each of which is an Array of its respective type and can be used
27
+ # thusly:
28
+ #
29
+ # author = entry.authors.new
30
+ # author.name = "Captain Kangaroo"
31
+ class Entry < Atom::Element
32
+ # the master list of standard children and the types they map to
33
+ element :id, String, true
34
+ element :title, Atom::Text, true
35
+ element :content, Atom::Content, true
36
+
37
+ element :rights, Atom::Text
38
+ # element :source, Atom::Feed # complicated, eg. serialization
39
+
40
+ element :authors, Atom::Multiple(Atom::Author)
41
+ element :contributors, Atom::Multiple(Atom::Contributor)
42
+
43
+ element :categories, Atom::Multiple(Atom::Category)
44
+ element :links, Atom::Multiple(Atom::Link)
45
+
46
+ element :published, Atom::Time
47
+ element :updated, Atom::Time, true
48
+
49
+ element :summary, Atom::Text
50
+
51
+ def initialize # :nodoc:
52
+ super "entry"
53
+
54
+ # XXX I don't think I've ever actually used this
55
+ yield self if block_given?
56
+ end
57
+
58
+ # parses XML fetched from +base+ into an Atom::Entry
59
+ def self.parse xml, base = ""
60
+ if xml.respond_to? :to_atom_entry
61
+ xml.to_atom_entry(base)
62
+ else
63
+ REXML::Document.new(xml.to_s).to_atom_entry(base)
64
+ end
65
+ end
66
+
67
+ def inspect # :nodoc:
68
+ "#<Atom::Entry id:'#{self.id}'>"
69
+ end
70
+
71
+ # declare that this entry has updated
72
+ def updated!
73
+ self.updated = Time.now
74
+ end
75
+
76
+ # categorize the entry based on a space-separated string
77
+ def tag_with string
78
+ return if string.nil?
79
+
80
+ string.split.each do |tag|
81
+ categories.new["term"] = tag
82
+ end
83
+ end
84
+
85
+ # XXX this needs a test suite before it can be trusted.
86
+ =begin
87
+ # tests the entry's validity
88
+ def valid?
89
+ self.class.required.each do |element|
90
+ unless instance_variable_get "@#{element}"
91
+ return [ false, "required element atom:#{element} missing" ]
92
+ end
93
+ end
94
+
95
+ if @authors.length == 0
96
+ return [ false, "required element atom:author missing" ]
97
+ end
98
+
99
+ alternates = @links.find_all do |link|
100
+ link["rel"] == "alternate"
101
+ end
102
+
103
+ unless @content or alternates
104
+ return [ false, "no atom:content or atom:link[rel='alternate']" ]
105
+ end
106
+
107
+ alternates.each do |link|
108
+ if alternates.find do |x|
109
+ not x == link and
110
+ x["type"] == link["type"] and
111
+ x["hreflang"] == link["hreflang"]
112
+ end
113
+
114
+ return [ false, 'more than one atom:link with a rel attribute value of "alternate" that has the same combination of type and hreflang attribute values.' ]
115
+ end
116
+ end
117
+
118
+ type = @content["type"]
119
+
120
+ base64ed = (not ["", "text", "html", "xhtml"].member? type) and
121
+ type.match(/^text\/.*/).nil? and # not text
122
+ type.match(/.*[\+\/]xml$/).nil? # not XML
123
+
124
+ if (@content["src"] or base64ed) and not summary
125
+ return [ false, "out-of-line or base64ed atom:content and no atom:summary" ]
126
+ end
127
+
128
+ true
129
+ end
130
+ =end
131
+ end
132
+ end
133
+
134
+ # this is here solely so that you don't have to require it
135
+ require "atom/xml"
data/lib/atom/feed.rb ADDED
@@ -0,0 +1,229 @@
1
+ require "atom/element"
2
+ require "atom/text"
3
+ require "atom/entry"
4
+
5
+ require "atom/http"
6
+
7
+ module Atom
8
+ class HTTPException < RuntimeError # :nodoc:
9
+ end
10
+ class FeedGone < RuntimeError # :nodoc:
11
+ end
12
+
13
+ # A feed of entries. As an Atom::Element, it can be manipulated using
14
+ # accessors for each of its child elements. You can set them with any
15
+ # object that makes sense; they will be returned in the types listed.
16
+ #
17
+ # Feeds have the following children:
18
+ #
19
+ # id:: a universally unique IRI which permanently identifies the feed
20
+ # title:: a human-readable title (Atom::Text)
21
+ # subtitle:: a human-readable description or subtitle (Atom::Text)
22
+ # updated:: the most recent Time the feed was modified in a way the publisher considers significant
23
+ # generator:: the agent used to generate a feed
24
+ # icon:: an IRI identifying an icon which visually identifies a feed (1:1 aspect ratio, looks OK small)
25
+ # logo:: an IRI identifying an image which visually identifies a feed (2:1 aspect ratio)
26
+ # rights:: rights held in and over a feed (Atom::Text)
27
+ #
28
+ # There are also +links+, +categories+, +authors+, +contributors+
29
+ # and +entries+, each of which is an Array of its respective type and
30
+ # can be used thusly:
31
+ #
32
+ # entry = feed.entries.new
33
+ # entry.title = "blah blah blah"
34
+ class Feed < Atom::Element
35
+ attr_reader :uri
36
+
37
+ # the Atom::Feed pointed to by link[@rel='previous']
38
+ attr_reader :prev
39
+ # the Atom::Feed pointed to by link[@rel='next']
40
+ attr_reader :next
41
+
42
+ # conditional get information from the last fetch
43
+ attr_reader :etag, :last_modified
44
+
45
+ element :id, String, true
46
+ element :title, Atom::Text, true
47
+ element :subtitle, Atom::Text
48
+
49
+ element :updated, Atom::Time, true
50
+
51
+ element :links, Atom::Multiple(Atom::Link)
52
+ element :categories, Atom::Multiple(Atom::Category)
53
+
54
+ element :authors, Atom::Multiple(Atom::Author)
55
+ element :contributors, Atom::Multiple(Atom::Contributor)
56
+
57
+ element :generator, String # XXX with uri and version attributes!
58
+ element :icon, String
59
+ element :logo, String
60
+
61
+ element :rights, Atom::Text
62
+
63
+ element :entries, Atom::Multiple(Atom::Entry)
64
+
65
+ include Enumerable
66
+
67
+ def inspect # :nodoc:
68
+ "<#{@uri} entries: #{entries.length} title='#{title}'>"
69
+ end
70
+
71
+ # parses XML fetched from +base+ into an Atom::Feed
72
+ def self.parse xml, base = ""
73
+ if xml.respond_to? :to_atom_entry
74
+ xml.to_atom_feed(base)
75
+ else
76
+ REXML::Document.new(xml.to_s).to_atom_feed(base)
77
+ end
78
+ end
79
+
80
+ # Create a new Feed that can be found at feed_uri and retrieved
81
+ # using an Atom::HTTP object http
82
+ def initialize feed_uri = nil, http = Atom::HTTP.new
83
+ @entries = []
84
+ @http = http
85
+
86
+ if feed_uri
87
+ @uri = feed_uri.to_uri
88
+ self.base = feed_uri
89
+ end
90
+
91
+ super "feed"
92
+ end
93
+
94
+ # iterates over a feed's entries
95
+ def each &block
96
+ @entries.each &block
97
+ end
98
+
99
+ # gets everything in the logical feed (could be a lot of stuff)
100
+ # (see <http://www.ietf.org/internet-drafts/draft-nottingham-atompub-feed-history-05.txt>)
101
+ def get_everything!
102
+ self.update!
103
+
104
+ prev = @prev
105
+ while prev
106
+ prev.update!
107
+
108
+ self.merge_entries! prev
109
+ prev = prev.prev
110
+ end
111
+
112
+ nxt = @next
113
+ while nxt
114
+ nxt.update!
115
+
116
+ self.merge_entries! nxt
117
+ nxt = nxt.next
118
+ end
119
+
120
+ self
121
+ end
122
+
123
+ # merges the entries from another feed into this one
124
+ def merge_entries! other_feed
125
+ other_feed.each do |entry|
126
+ # TODO: add atom:source elements
127
+ self << entry
128
+ end
129
+ end
130
+
131
+ # like #merge, but in place
132
+ def merge! other_feed
133
+ [:id, :title, :subtitle, :updated, :rights].each { |p|
134
+ self.send("#{p}=", other_feed.send("#{p}"))
135
+ }
136
+
137
+ [:links, :categories, :authors, :contributors].each do |p|
138
+ other_feed.send("#{p}").each do |e|
139
+ self.send("#{p}") << e
140
+ end
141
+ end
142
+
143
+ merge_entries! other_feed
144
+ end
145
+
146
+ # merges "important" properties of this feed with another one,
147
+ # returning a new feed
148
+ def merge other_feed
149
+ feed = self.clone
150
+
151
+ feed.merge! other_feed
152
+
153
+ feed
154
+ end
155
+
156
+ # fetches this feed's URL, parses the result and #merge!s
157
+ # changes, new entries, &c.
158
+ def update!
159
+ raise(RuntimeError, "can't fetch without a uri.") unless @uri
160
+
161
+ headers = {}
162
+ headers["If-None-Match"] = @etag if @etag
163
+ headers["If-Modified-Since"] = @last_modified if @last_modified
164
+
165
+ res = @http.get(@uri, headers)
166
+
167
+ if res.code == "304"
168
+ # we're already all up to date
169
+ return self
170
+ elsif res.code == "410"
171
+ raise Atom::FeedGone, "410 Gone (#{@uri})"
172
+ elsif res.code != "200"
173
+ raise Atom::HTTPException, "Unexpected HTTP response code: #{res.code}"
174
+ end
175
+
176
+ unless res.content_type.match(/^application\/atom\+xml/)
177
+ raise Atom::HTTPException, "Unexpected HTTP response Content-Type: #{res.content_type} (wanted application/atom+xml)"
178
+ end
179
+
180
+ @etag = res["Etag"] if res["Etag"]
181
+ @last_modified = res["Last-Modified"] if res["Last-Modified"]
182
+
183
+ xml = res.body
184
+
185
+ coll = REXML::Document.new(xml)
186
+
187
+ update_time = Time.parse(REXML::XPath.first(coll, "/atom:feed/atom:updated", { "atom" => Atom::NS } ).text)
188
+
189
+ # the feed hasn't been updated, don't bother
190
+ if self.updated and self.updated >= update_time
191
+ return self
192
+ end
193
+
194
+ coll = Atom::Feed.parse(coll, self.base.to_s)
195
+ merge! coll
196
+
197
+ link = coll.links.find { |l| l["rel"] = "next" and l["type"] == "application/atom+xml" }
198
+ if link
199
+ abs_uri = @uri + link["href"]
200
+ @next = Feed.new(abs_uri.to_s, @http)
201
+ end
202
+
203
+ link = coll.links.find { |l| l["rel"] = "previous" and l["type"] == "application/atom+xml" }
204
+ if link
205
+ abs_uri = @uri + link["href"]
206
+ @prev = Feed.new(abs_uri.to_s, @http)
207
+ end
208
+
209
+ self
210
+ end
211
+
212
+ # adds an entry to this feed. if this feed already contains an
213
+ # entry with the same id, the newest one is used.
214
+ def << entry
215
+ existing = entries.find do |e|
216
+ e.id == entry.id
217
+ end
218
+
219
+ if not existing
220
+ @entries << entry
221
+ elsif not existing.updated or (existing.updated and entry.updated and entry.updated >= existing.updated)
222
+ @entries[@entries.index(existing)] = entry
223
+ end
224
+ end
225
+ end
226
+ end
227
+
228
+ # this is here solely so you don't have to require it
229
+ require "atom/xml"