thehack-atom-tools 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,106 @@
1
+ require "uri"
2
+
3
+ require "atom/http"
4
+ require "atom/element"
5
+ require "atom/collection"
6
+
7
+ module Atom
8
+ class AutodiscoveryFailure < RuntimeError; end
9
+
10
+ # an Atom::Workspace has a #title (Atom::Text) and #collections, an Array of Atom::Collection s
11
+ class Workspace < Atom::Element
12
+ is_element PP_NS, :workspace
13
+
14
+ elements ['app', PP_NS], :collection, :collections, Atom::Collection
15
+ atom_element :title, Atom::Title
16
+ end
17
+
18
+ # Atom::Service represents an Atom Publishing Protocol service
19
+ # document. Its only child is #workspaces, which is an Array of
20
+ # Atom::Workspace s
21
+ class Service < Atom::Element
22
+ is_element PP_NS, :service
23
+
24
+ elements ['app', PP_NS], :workspace, :workspaces, Atom::Workspace
25
+
26
+ # retrieves and parses an Atom service document.
27
+ def initialize(service_url = "", http = Atom::HTTP.new)
28
+ super()
29
+
30
+ @http = http
31
+
32
+ return if service_url.empty?
33
+
34
+ base = URI.parse(service_url)
35
+
36
+ rxml = nil
37
+
38
+ res = @http.get(base, "Accept" => "application/atomsvc+xml")
39
+ res.validate_content_type(["application/atomsvc+xml"])
40
+
41
+ unless res.code == "200"
42
+ raise Atom::HTTPException, "Unexpected HTTP response code: #{res.code}"
43
+ end
44
+
45
+ self.class.parse(res.body, base, self)
46
+ end
47
+
48
+ def collections
49
+ self.workspaces.map { |ws| ws.collections }.flatten
50
+ end
51
+
52
+ # given a URL, attempt to find a service document
53
+ def self.discover url, http = Atom::HTTP.new
54
+ res = http.get(url, 'Accept' => 'application/atomsvc+xml, text/html')
55
+
56
+ case res.content_type
57
+ when /application\/atomsvc\+xml/
58
+ Service.parse res.body, url
59
+ when /html/
60
+ begin
61
+ require 'hpricot'
62
+ rescue
63
+ raise 'autodiscovering from HTML requires Hpricot.'
64
+ end
65
+
66
+ h = Hpricot(res.body)
67
+
68
+ links = h.search('//link')
69
+
70
+ service_links = links.select { |l| (' ' + l['rel'] + ' ').match(/ service /i) }
71
+
72
+ unless service_links.empty?
73
+ url = url.to_uri + service_links.first['href']
74
+ return Service.new(url.to_s, http)
75
+ end
76
+
77
+ rsd_links = links.select { |l| (' ' + l['rel'] + ' ').match(/ EditURI /i) }
78
+
79
+ unless rsd_links.empty?
80
+ url = url.to_uri + rsd_links.first['href']
81
+ return Service.from_rsd(url, http)
82
+ end
83
+
84
+ raise AutodiscoveryFailure, "couldn't find any autodiscovery links in the HTML"
85
+ else
86
+ raise AutodiscoveryFailure, "can't autodiscover from a document of type #{res.content_type}"
87
+ end
88
+ end
89
+
90
+ def self.from_rsd url, http = Atom::HTTP.new
91
+ rsd = http.get(url)
92
+
93
+ doc = REXML::Document.new(rsd.body)
94
+
95
+ atom = REXML::XPath.first(doc, '/rsd/service/apis/api[@name="Atom"]')
96
+
97
+ unless atom
98
+ raise AutodiscoveryFailure, "couldn't find an Atom link in the RSD"
99
+ end
100
+
101
+ url = url.to_uri + atom.attributes['apiLink']
102
+
103
+ Service.new(url.to_s, http)
104
+ end
105
+ end
106
+ end
data/lib/atom/text.rb ADDED
@@ -0,0 +1,231 @@
1
+ require "atom/element"
2
+
3
+ module XHTML
4
+ NS = "http://www.w3.org/1999/xhtml"
5
+ end
6
+
7
+ module Atom
8
+ # An Atom::Element representing a text construct.
9
+ # It has a single attribute, "type", which specifies how to interpret
10
+ # the element's content. Different types are:
11
+ #
12
+ # text:: a plain string, without any markup (default)
13
+ # html:: a chunk of HTML
14
+ # xhtml:: a chunk of *well-formed* XHTML
15
+ #
16
+ # You should set this attribute appropriately after you set a Text
17
+ # element (entry.content, entry.title or entry.summary).
18
+ #
19
+ # This content of this element can be retrieved in different formats, see #html and #xml
20
+ class Text < Atom::Element
21
+ atom_attrb :type
22
+
23
+ include AttrEl
24
+
25
+ on_parse_root do |e,x|
26
+ type = e.type
27
+
28
+ if x.is_a? REXML::Element
29
+ if type == 'xhtml'
30
+ x = e.get_elem x, XHTML::NS, 'div'
31
+
32
+ raise Atom::ParseError, 'xhtml content needs div wrapper' unless x
33
+
34
+ c = x.dup
35
+
36
+ unless x.prefix.empty?
37
+ # content has a namespace prefix, strip prefixes from it and all
38
+ # XHTML children
39
+
40
+ REXML::XPath.each(c, './/xhtml:*', 'xhtml' => XHTML::NS) do |x|
41
+ x.name = x.name
42
+ end
43
+ end
44
+ else
45
+ c = x[0] ? x[0].value : nil
46
+ end
47
+ else
48
+ c = x.to_s
49
+ end
50
+
51
+ e.instance_variable_set("@content", c)
52
+ end
53
+
54
+ on_build do |e,x|
55
+ c = e.instance_variable_get('@content')
56
+
57
+ if c.respond_to? :parent
58
+ x << c.dup
59
+ elsif c
60
+ x.text = c.to_s
61
+ end
62
+ end
63
+
64
+ def initialize value = nil
65
+ super()
66
+
67
+ @content = if value.respond_to? :to_xml
68
+ value.to_xml[0]
69
+ elsif value
70
+ value
71
+ else
72
+ ''
73
+ end
74
+ end
75
+
76
+ def type
77
+ @type ? @type : 'text'
78
+ end
79
+
80
+ def to_s
81
+ if type == 'xhtml' and @content and @content.name == 'div'
82
+ @content.children.to_s
83
+ else
84
+ @content.to_s
85
+ end
86
+ end
87
+
88
+ # returns a string suitable for dumping into an HTML document.
89
+ # (or nil if that's impossible)
90
+ #
91
+ # if you're storing the content of a Text construct, you probably
92
+ # want this representation.
93
+ def html
94
+ if self["type"] == "xhtml" or self["type"] == "html"
95
+ to_s
96
+ elsif self["type"] == "text"
97
+ REXML::Text.new(to_s).to_s
98
+ end
99
+ end
100
+
101
+ # attempts to parse the content of this element as XML and return it
102
+ # as an array of REXML::Elements.
103
+ #
104
+ # If self["type"] is "html" and Hpricot is installed, it will
105
+ # be converted to XHTML first.
106
+ def xml
107
+ xml = REXML::Element.new 'div'
108
+
109
+ if self["type"] == "xhtml"
110
+ @content.children.each { |child| xml << child }
111
+ elsif self["type"] == "text"
112
+ xml.text = self.to_s
113
+ elsif self["type"] == "html"
114
+ begin
115
+ require "hpricot"
116
+ rescue
117
+ raise "Turning HTML content into XML requires Hpricot."
118
+ end
119
+
120
+ fixed = Hpricot(self.to_s, :xhtml_strict => true)
121
+ xml = REXML::Document.new("<div>#{fixed}</div>").root
122
+ else
123
+ # XXX check that @type is an XML mimetype and parse it
124
+ raise "I haven't implemented this yet"
125
+ end
126
+
127
+ xml
128
+ end
129
+
130
+ def inspect # :nodoc:
131
+ "'#{to_s}'##{self['type']}"
132
+ end
133
+
134
+ def type= value
135
+ unless valid_type? value
136
+ raise Atom::ParseError, "atomTextConstruct type '#{value}' is meaningless"
137
+ end
138
+
139
+ @type = value
140
+ if @type == "xhtml"
141
+ begin
142
+ parse_xhtml_content
143
+ rescue REXML::ParseException
144
+ raise Atom::ParseError, "#{@content.inspect} can't be parsed as XML"
145
+ end
146
+ end
147
+ end
148
+
149
+ private
150
+ # converts @content based on the value of self["type"]
151
+ def convert_contents e
152
+ if self["type"] == "xhtml"
153
+ @content
154
+ elsif self["type"] == "text" or self["type"].nil? or self["type"] == "html"
155
+ @content.to_s
156
+ end
157
+ end
158
+
159
+ def valid_type? type
160
+ ["text", "xhtml", "html"].member? type
161
+ end
162
+
163
+ def parse_xhtml_content xhtml = nil
164
+ xhtml ||= @content
165
+
166
+ @content = if xhtml.is_a? REXML::Element
167
+ if xhtml.name == "div" and xhtml.namespace == XHTML::NS
168
+ xhtml.dup
169
+ else
170
+ elem = REXML::Element.new("div")
171
+ elem.add_namespace(XHTML::NS)
172
+
173
+ elem << xhtml.dup
174
+
175
+ elem
176
+ end
177
+ elsif xhtml.is_a? REXML::Document
178
+ parse_xhtml_content xhtml.root
179
+ else
180
+ div = REXML::Document.new("<div>#{@content}</div>")
181
+ div.root.add_namespace(XHTML::NS)
182
+
183
+ div.root
184
+ end
185
+ end
186
+ end
187
+
188
+ # Atom::Content behaves the same as an Atom::Text, but for two things:
189
+ #
190
+ # * the "type" attribute can be an arbitrary media type
191
+ # * there is a "src" attribute which is an IRI that points to the content of the entry (in which case the content element will be empty)
192
+ class Content < Atom::Text
193
+ is_atom_element :content
194
+
195
+ atom_attrb :src
196
+
197
+ def src= v
198
+ @content = nil
199
+
200
+ if self.base
201
+ @src = (self.base.to_uri + v).to_s
202
+ else
203
+ @src = v
204
+ end
205
+ end
206
+
207
+ private
208
+ def valid_type? type
209
+ super or type.match(/\//)
210
+ end
211
+
212
+ def convert_contents e
213
+ s = super
214
+
215
+ s ||= if @content.is_a? REXML::Document
216
+ @content.root
217
+ elsif @content.is_a? REXML::Element
218
+ @content
219
+ else
220
+ REXML::Text.normalize(@content.to_s)
221
+ end
222
+
223
+ s
224
+ end
225
+ end
226
+
227
+ class Title < Atom::Text; is_atom_element :title; end
228
+ class Subtitle < Atom::Text; is_atom_element :subtitle; end
229
+ class Summary < Atom::Text; is_atom_element :summary; end
230
+ class Rights < Atom::Text; is_atom_element :rights; end
231
+ end
data/lib/atom/tools.rb ADDED
@@ -0,0 +1,163 @@
1
+ require 'atom/collection'
2
+
3
+ # methods to make writing commandline Atom tools more convenient
4
+
5
+ module Atom::Tools
6
+ # fetch and parse a Feed URL, returning the entries found
7
+ def http_to_entries url, complete_feed = false, http = Atom::HTTP.new
8
+ feed = Atom::Feed.new url, http
9
+
10
+ if complete_feed
11
+ feed.get_everything!
12
+ else
13
+ feed.update!
14
+ end
15
+
16
+ feed.entries
17
+ end
18
+
19
+ # parse a directory of entries
20
+ def dir_to_entries path
21
+ raise ArgumentError, "#{path} is not a directory" unless File.directory? path
22
+
23
+ Dir[path+'/*.atom'].map do |e|
24
+ Atom::Entry.parse(File.read(e))
25
+ end
26
+ end
27
+
28
+ # parse a Feed on stdin
29
+ def stdin_to_entries
30
+ Atom::Feed.parse($stdin).entries
31
+ end
32
+
33
+ # POSTs an Array of Atom::Entrys to an Atom Collection
34
+ def entries_to_http entries, url, http = Atom::HTTP.new
35
+ coll = Atom::Collection.new url, http
36
+
37
+ entries.each { |entry| coll.post! entry }
38
+ end
39
+
40
+ # saves an Array of Atom::Entrys to a directory
41
+ def entries_to_dir entries, path
42
+ if File.exists? path
43
+ raise "directory #{path} already exists"
44
+ else
45
+ Dir.mkdir path
46
+ end
47
+
48
+ entries.each do |entry|
49
+ e = entry.to_s
50
+
51
+ new_filename = path + '/0x' + MD5.new(e).hexdigest[0,8] + '.atom'
52
+
53
+ File.open(new_filename, 'w') { |f| f.write e }
54
+ end
55
+ end
56
+
57
+ # dumps an Array of Atom::Entrys into a Feed on stdout
58
+ def entries_to_stdout entries
59
+ feed = Atom::Feed.new
60
+
61
+ entries.each do |entry|
62
+ puts entry.inspect
63
+ feed.entries << entry
64
+ end
65
+
66
+ puts feed.to_s
67
+ end
68
+
69
+ # turns a collection of Atom Entries into an Array of Atom::Entrys
70
+ #
71
+ # source: a URL, a directory or "-" for an Atom Feed on stdin
72
+ # options:
73
+ # :complete - whether to fetch the complete logical feed
74
+ # :user - username to use for HTTP requests (if required)
75
+ # :pass - password to use for HTTP requests (if required)
76
+ def parse_input source, options
77
+ entries = if source.match /^http/
78
+ http = Atom::HTTP.new
79
+
80
+ setup_http http, options
81
+
82
+ http_to_entries source, options[:complete], http
83
+ elsif source == '-'
84
+ stdin_to_entries
85
+ else
86
+ dir_to_entries source
87
+ end
88
+
89
+ if options[:verbose]
90
+ entries.each do |entry|
91
+ puts "got #{entry.title}"
92
+ end
93
+ end
94
+
95
+ entries
96
+ end
97
+
98
+ # turns an Array of Atom::Entrys into a collection of Atom Entries
99
+ #
100
+ # entries: an Array of Atom::Entrys pairs
101
+ # dest: a URL, a directory or "-" for an Atom Feed on stdout
102
+ # options:
103
+ # :user - username to use for HTTP requests (if required)
104
+ # :pass - password to use for HTTP requests (if required)
105
+ def write_output entries, dest, options
106
+ if dest.match /^http/
107
+ http = Atom::HTTP.new
108
+
109
+ setup_http http, options
110
+
111
+ entries_to_http entries, dest, http
112
+ elsif dest == '-'
113
+ entries_to_stdout entries
114
+ else
115
+ entries_to_dir entries, dest
116
+ end
117
+ end
118
+
119
+ # set up some common OptionParser settings
120
+ def atom_options opts, options
121
+ opts.on('-u', '--user NAME', 'username for HTTP auth') { |u| options[:user] = u }
122
+
123
+ opts.on_tail('-h', '--help', 'show this usage statement') { |h| puts opts; exit }
124
+
125
+ opts.on_tail('-p', '--password [PASSWORD]', 'password for HTTP auth') do |p|
126
+ options[:pass] = p
127
+ end
128
+ end
129
+
130
+
131
+ # obtain a password from the TTY, hiding the user's input
132
+ # this will fail if you don't have the program 'stty'
133
+ def obtain_password
134
+ i = o = File.open('/dev/tty', 'w+')
135
+
136
+ o.print 'Password: '
137
+
138
+ # store original settings
139
+ state = `stty -F /dev/tty -g`
140
+
141
+ # don't echo input
142
+ system "stty -F /dev/tty -echo"
143
+
144
+ p = i.gets.chomp
145
+
146
+ # restore original settings
147
+ system "stty -F /dev/tty #{state}"
148
+
149
+ p
150
+ end
151
+
152
+ def setup_http http, options
153
+ if options[:user]
154
+ http.user = options[:user]
155
+
156
+ unless options[:pass]
157
+ options[:pass] = obtain_password
158
+ end
159
+
160
+ http.pass = options[:pass]
161
+ end
162
+ end
163
+ end