agio 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/.rspec +2 -0
- data/History.rdoc +3 -0
- data/License.rdoc +23 -0
- data/Manifest.txt +23 -0
- data/README.rdoc +97 -0
- data/Rakefile +35 -0
- data/agio.gemspec +53 -0
- data/bin/agio +17 -0
- data/lib/agio.rb +171 -0
- data/lib/agio/block.rb +132 -0
- data/lib/agio/bourse.rb +340 -0
- data/lib/agio/broker.rb +415 -0
- data/lib/agio/data.rb +90 -0
- data/lib/agio/flags.rb +317 -0
- data/lib/agio/html_element_description.rb +126 -0
- data/spec/block_spec.rb +168 -0
- data/spec/bourse_spec.rb +10 -0
- data/spec/broker_spec.rb +539 -0
- data/spec/data_spec.rb +341 -0
- data/spec/flags_spec.rb +473 -0
- data/spec/html_element_description_spec.rb +52 -0
- data/spec/pmh_spec.rb +31 -0
- data/spec/spec_helper.rb +308 -0
- metadata +216 -0
data/lib/agio/bourse.rb
ADDED
@@ -0,0 +1,340 @@
|
|
1
|
+
# -*- ruby encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'text/format'
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
##
|
7
|
+
# The Bourse is where the incoming HTML document, after parsing through the
|
8
|
+
# Broker, will be transformed into Markdown.
|
9
|
+
class Agio::Bourse
|
10
|
+
extend Agio::Flags
|
11
|
+
|
12
|
+
##
|
13
|
+
# :attr_accessor: link_count
|
14
|
+
# The counter for non-inline links and images.
|
15
|
+
|
16
|
+
##
|
17
|
+
# :method: link_count?
|
18
|
+
# Returns +true+ if the link_count is non-zero.
|
19
|
+
|
20
|
+
##
|
21
|
+
# :method: incr_link_count
|
22
|
+
# Increments the link count by the provided value.
|
23
|
+
|
24
|
+
##
|
25
|
+
# :method: decr_link_count
|
26
|
+
# Decrements the link count by the provided value.
|
27
|
+
integer_flag :link_count
|
28
|
+
|
29
|
+
##
|
30
|
+
# :attr_accessor: list_stack
|
31
|
+
# A stack for lists being managed.
|
32
|
+
|
33
|
+
##
|
34
|
+
# :method: list_stack?
|
35
|
+
# Returns +true+ if the list_stack is not empty.
|
36
|
+
array_flag :list_stack
|
37
|
+
|
38
|
+
##
|
39
|
+
# The Bourse is initialized with both the Broker instance and the Agio
|
40
|
+
# instance; the latter is used because it controls how some of the
|
41
|
+
# conversions should be performed.
|
42
|
+
def initialize(broker, agio)
|
43
|
+
@broker, @agio = broker, agio
|
44
|
+
|
45
|
+
@formatter = Text::Format.new
|
46
|
+
@formatter.first_indent = 0
|
47
|
+
|
48
|
+
self.link_placement = nil
|
49
|
+
@output = StringIO.new("")
|
50
|
+
|
51
|
+
@abbr = { }
|
52
|
+
@links = { }
|
53
|
+
|
54
|
+
reset_flags(true)
|
55
|
+
end
|
56
|
+
|
57
|
+
attr_reader :abbr
|
58
|
+
private :abbr
|
59
|
+
|
60
|
+
attr_reader :links
|
61
|
+
private :links
|
62
|
+
|
63
|
+
##
|
64
|
+
# An instance of Text::Format that is used to cleanly format the text
|
65
|
+
# output by the Bourse.
|
66
|
+
attr_reader :formatter
|
67
|
+
|
68
|
+
##
|
69
|
+
# The output StringIO.
|
70
|
+
attr_reader :output
|
71
|
+
|
72
|
+
##
|
73
|
+
# :attr_accessor: base_url
|
74
|
+
# The base URL for implicit (or local) link references. If not provided,
|
75
|
+
# links will remain implicit. This is a String value.
|
76
|
+
|
77
|
+
##
|
78
|
+
# :method: base_url?
|
79
|
+
# Returns +true+ if the base URL has been set.
|
80
|
+
string_flag :base_url, :public => true
|
81
|
+
|
82
|
+
##
|
83
|
+
# :attr_accessor: skip_local_fragments
|
84
|
+
# Controls whether local link references containing fragments will be
|
85
|
+
# output in the final document.
|
86
|
+
#
|
87
|
+
# A local link reference is either an implicit link reference (one missing
|
88
|
+
# the protocol and host, such as '<a href="about.html">' or '<a
|
89
|
+
# href="/about.html">') or one that points to the #base_url.
|
90
|
+
#
|
91
|
+
# If this value is +true+, links that refer to fragments on local URIs
|
92
|
+
# will be ignored (such as '<a href="about.html#address">').
|
93
|
+
|
94
|
+
##
|
95
|
+
# :method: skip_local_fragments?
|
96
|
+
# Returns +true+ if local fragments are supposed to be skipped. See
|
97
|
+
# #skip_local_fragments.
|
98
|
+
boolean_flag :skip_local_fragments, :public => true
|
99
|
+
|
100
|
+
##
|
101
|
+
# :attr_reader:
|
102
|
+
# Controls how links are placed in the Markdown document.
|
103
|
+
def link_placement
|
104
|
+
@link_placement
|
105
|
+
end
|
106
|
+
# :attr_writer: link_placement
|
107
|
+
# Controls how links are placed in the Markdown document.
|
108
|
+
#
|
109
|
+
# In-Line:: Links appear next to their wrapped text, like "[See the
|
110
|
+
# example](http://example.org/)". The default for
|
111
|
+
# link_placement, set if the value is +nil+, <tt>:inline</tt>,
|
112
|
+
# or any other unrecognized value.
|
113
|
+
# Paragraph:: Links appear in the body as linked references, like "[See
|
114
|
+
# the example][1]", and the reference "[1]:
|
115
|
+
# http://example.org" is placed immediately after the
|
116
|
+
# paragraph in which the link first appears. Used if the value
|
117
|
+
# of link_placement is <tt>:paragraph</tt>.
|
118
|
+
# Endnote:: Links appear in the body as linked references, like "[See
|
119
|
+
# the example][1]", and the reference "[1]:
|
120
|
+
# http://example.org" is placed at the end of the document.
|
121
|
+
# Used if the value of link_placement is <tt>:endnote</tt>.
|
122
|
+
def link_placement=(value)
|
123
|
+
value = case value
|
124
|
+
when :inline, :paragraph, :endnote
|
125
|
+
value
|
126
|
+
when nil
|
127
|
+
:inline
|
128
|
+
else
|
129
|
+
warn "Invalid value for link placement: #{value}; using inline."
|
130
|
+
:inline
|
131
|
+
end
|
132
|
+
@link_placement = value
|
133
|
+
end
|
134
|
+
|
135
|
+
attr_reader :broker
|
136
|
+
private :broker
|
137
|
+
|
138
|
+
attr_reader :agio
|
139
|
+
private :agio
|
140
|
+
|
141
|
+
def transform
|
142
|
+
blocks = broker.blocks.map { |block|
|
143
|
+
body = transform_block(block)
|
144
|
+
|
145
|
+
if :paragraph == link_placement
|
146
|
+
[ body, link_references ]
|
147
|
+
else
|
148
|
+
body
|
149
|
+
end
|
150
|
+
|
151
|
+
}.flatten.compact
|
152
|
+
|
153
|
+
if :endnote == link_placement
|
154
|
+
blocks << link_references
|
155
|
+
end
|
156
|
+
|
157
|
+
output.write(blocks.join("\n\n"))
|
158
|
+
end
|
159
|
+
|
160
|
+
def link_references(clear = nil)
|
161
|
+
items = links.values.sort_by { |link| link[:id] }
|
162
|
+
text = items.map { |link|
|
163
|
+
unless link[:written]
|
164
|
+
link[:written] = true
|
165
|
+
s = %Q( [#{link[:id]}]: #{link[:href]})
|
166
|
+
s << %Q( "#{link[:title]}") if link[:title]
|
167
|
+
s
|
168
|
+
end
|
169
|
+
}.join("\n")
|
170
|
+
end
|
171
|
+
private :link_references
|
172
|
+
|
173
|
+
def escape(string, parents)
|
174
|
+
unless parents.include? "pre"
|
175
|
+
string = string.
|
176
|
+
gsub(/\*/) { "\\*" }.
|
177
|
+
gsub(/`/) { "\\`" }.
|
178
|
+
gsub(/_/) { "\\_" }.
|
179
|
+
gsub(/^(\d+\. )/) { "\\$1" }
|
180
|
+
end
|
181
|
+
string
|
182
|
+
end
|
183
|
+
|
184
|
+
def transform_block(block, parents = [])
|
185
|
+
contents = block.contents.map { |object|
|
186
|
+
case object
|
187
|
+
when String
|
188
|
+
escape(object, parents)
|
189
|
+
when Agio::Data
|
190
|
+
escape(object.value, parents)
|
191
|
+
when Agio::Block
|
192
|
+
transform_block(object, parents + [ block.name ])
|
193
|
+
end
|
194
|
+
}
|
195
|
+
|
196
|
+
case block.name
|
197
|
+
when /^h([1-6])$/
|
198
|
+
"#{"#" * $1.to_i} #{contents.join}"
|
199
|
+
when "p", "div"
|
200
|
+
formatter.format_one_paragraph(contents.join).chomp
|
201
|
+
when "br"
|
202
|
+
" "
|
203
|
+
when "hr"
|
204
|
+
"* * *"
|
205
|
+
when "head", "style", "script"
|
206
|
+
nil
|
207
|
+
when "em", "i", "u"
|
208
|
+
"_#{contents.join}_"
|
209
|
+
when "strong", "b"
|
210
|
+
"**#{contents.join}**"
|
211
|
+
when "blockquote"
|
212
|
+
contents.map { |line|
|
213
|
+
line.split($/).map { |part| "> #{part}" }.join("\n")
|
214
|
+
}.join("\n")
|
215
|
+
when "code"
|
216
|
+
if parents.include? "pre"
|
217
|
+
contents.join
|
218
|
+
else
|
219
|
+
"`#{contents.join}`"
|
220
|
+
end
|
221
|
+
when "abbr"
|
222
|
+
abbr[block.options[:attrs]["title"]] = contents.join
|
223
|
+
when "a"
|
224
|
+
attrs = block.options[:attrs]
|
225
|
+
text = contents.join
|
226
|
+
|
227
|
+
if attrs and attrs["href"]
|
228
|
+
href, title = attrs["href"], attrs["title"]
|
229
|
+
|
230
|
+
if :inline == link_placement
|
231
|
+
if title
|
232
|
+
%Q([#{text}](#{href} "#{title}"))
|
233
|
+
else
|
234
|
+
%Q([#{text}](#{href}))
|
235
|
+
end
|
236
|
+
else
|
237
|
+
key = Digest::MD5.hexdigest(href + title.to_s)
|
238
|
+
link = links[key]
|
239
|
+
|
240
|
+
unless link
|
241
|
+
incr_link_count 1
|
242
|
+
link = {
|
243
|
+
:title => title,
|
244
|
+
:href => href,
|
245
|
+
:id => link_count
|
246
|
+
}
|
247
|
+
|
248
|
+
links[key] = link
|
249
|
+
end
|
250
|
+
|
251
|
+
%Q([#{text}][#{link[:id]}])
|
252
|
+
end
|
253
|
+
else
|
254
|
+
text
|
255
|
+
end
|
256
|
+
when "img"
|
257
|
+
attrs = block.options[:attrs]
|
258
|
+
attrs["href"] = attrs["src"]
|
259
|
+
text = attrs["alt"] || contents.join
|
260
|
+
|
261
|
+
if attrs and attrs["href"]
|
262
|
+
href, title = attrs["href"], attrs["title"]
|
263
|
+
|
264
|
+
if :inline == link_placement
|
265
|
+
if title
|
266
|
+
%Q()
|
267
|
+
else
|
268
|
+
%Q()
|
269
|
+
end
|
270
|
+
else
|
271
|
+
key = Digest::MD5.hexdigest(href + title.to_s)
|
272
|
+
link = links[key]
|
273
|
+
|
274
|
+
unless link
|
275
|
+
incr_link_count 1
|
276
|
+
link = {
|
277
|
+
:title => title,
|
278
|
+
:href => href,
|
279
|
+
:id => link_count
|
280
|
+
}
|
281
|
+
|
282
|
+
links[key] = link
|
283
|
+
end
|
284
|
+
|
285
|
+
%Q(![#{text}][#{link[:id]}])
|
286
|
+
end
|
287
|
+
else
|
288
|
+
text
|
289
|
+
end
|
290
|
+
when "dl"
|
291
|
+
contents.join
|
292
|
+
when "dt"
|
293
|
+
contents.join("\n")
|
294
|
+
when "dd"
|
295
|
+
": #{contents.join}"
|
296
|
+
when "ol"
|
297
|
+
count = 0
|
298
|
+
contents.map { |line|
|
299
|
+
next if line.strip.empty?
|
300
|
+
first, *rest = line.split($/)
|
301
|
+
|
302
|
+
fpref = "#{count += 1}. "
|
303
|
+
first = "#{fpref}#{first}"
|
304
|
+
|
305
|
+
if rest.empty?
|
306
|
+
first
|
307
|
+
else
|
308
|
+
rpref = " " * fpref.size
|
309
|
+
rest = rest.map { |part| "#{rpref}#{part}" }
|
310
|
+
[ first, rest ].flatten.join("\n")
|
311
|
+
end
|
312
|
+
}.compact.join("\n")
|
313
|
+
when "ul"
|
314
|
+
contents.map { |line|
|
315
|
+
next if line.strip.empty?
|
316
|
+
first, *rest = line.split($/)
|
317
|
+
|
318
|
+
fpref = " * "
|
319
|
+
first = "#{fpref}#{first}"
|
320
|
+
|
321
|
+
if rest.empty?
|
322
|
+
first
|
323
|
+
else
|
324
|
+
rpref = " " * fpref.size
|
325
|
+
rest = rest.map { |part| "#{rpref}#{part}" }
|
326
|
+
[ first, rest ].flatten.join("\n")
|
327
|
+
end
|
328
|
+
}.compact.join("\n")
|
329
|
+
when "li"
|
330
|
+
contents.join
|
331
|
+
when "pre"
|
332
|
+
contents.map { |line|
|
333
|
+
line.split($/).map { |part| " #{part}" }.join("\n")
|
334
|
+
}.join("\n")
|
335
|
+
end
|
336
|
+
end
|
337
|
+
private :transform_block
|
338
|
+
end
|
339
|
+
|
340
|
+
# vim: ft=ruby
|
data/lib/agio/broker.rb
ADDED
@@ -0,0 +1,415 @@
|
|
1
|
+
# -*- ruby encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'agio/block'
|
4
|
+
require 'agio/data'
|
5
|
+
|
6
|
+
##
|
7
|
+
# The Broker class is the object that transforms HTML into an intermediate
|
8
|
+
# format for Agio so that the intermediate format can be converted into
|
9
|
+
# Markdown text.
|
10
|
+
#
|
11
|
+
# The Broker has two primary data structures it keeps: the block list
|
12
|
+
# (#blocks) and the block stack (#stack).
|
13
|
+
#
|
14
|
+
# The block list is an array of completed blocks for the document that, when
|
15
|
+
# processed correctly, will allow the meaningful creation of the Markdown
|
16
|
+
# text.
|
17
|
+
#
|
18
|
+
# The block stack is where the blocks reside during creation.
|
19
|
+
#
|
20
|
+
# Agio::Broker is a Nokogiri::XML::SAX::Document and can be used by the
|
21
|
+
# Nokogiri SAX parser to fill the block list.
|
22
|
+
#
|
23
|
+
# == Algorithm
|
24
|
+
# Assume a fairly simple HTML document:
|
25
|
+
#
|
26
|
+
# <h1>Title</h1>
|
27
|
+
# <p>Lorem ipsum dolor sit amet,
|
28
|
+
# <strong>consectetur</strong> adipiscing.</p>
|
29
|
+
#
|
30
|
+
# When the first element ("h1") is observed, a new block will be created on
|
31
|
+
# the stack:
|
32
|
+
#
|
33
|
+
# Blocks[ ]
|
34
|
+
# Stack [ block(h1) ]
|
35
|
+
#
|
36
|
+
# The text will be appended to the block:
|
37
|
+
#
|
38
|
+
# Blocks[ ]
|
39
|
+
# Stack [ block(h1, Title) ]
|
40
|
+
#
|
41
|
+
# When the closing tag for the element is observed, the block will be popped
|
42
|
+
# from the stack and pushed to the end of the blocks list.
|
43
|
+
#
|
44
|
+
# Blocks[ block(h1, Title) ]
|
45
|
+
# Stack [ ]
|
46
|
+
#
|
47
|
+
# The same happens for the second element ("p") and its text:
|
48
|
+
#
|
49
|
+
# Blocks[ block(h1, Title) ]
|
50
|
+
# Stack [ block(p, Lorem ipsum dolor it amet) ]
|
51
|
+
#
|
52
|
+
# When the "strong" element is received, though, it and its text are pushed
|
53
|
+
# onto the stack:
|
54
|
+
#
|
55
|
+
# Blocks[ block(h1, Title) ]
|
56
|
+
# Stack [ block(p, Lorem ipsum dolor it amet),
|
57
|
+
# block(strong, consectetur)
|
58
|
+
# ]
|
59
|
+
#
|
60
|
+
# When the closing tag for the "strong" element is received, the "strong"
|
61
|
+
# block is popped off the stack and appended to the block at the top of the
|
62
|
+
# stack.
|
63
|
+
#
|
64
|
+
# Blocks[ block(h1, Title) ]
|
65
|
+
# Stack [ block(p, Lorem ipsum dolor it amet,
|
66
|
+
# block(strong, consectetur)
|
67
|
+
# ]
|
68
|
+
#
|
69
|
+
# Finally, the text is appended, the closing tag for the "p" element shows
|
70
|
+
# up, and that block is popped off the stack and appended to the blocks
|
71
|
+
# list:
|
72
|
+
#
|
73
|
+
# Blocks[ block(h1, Title),
|
74
|
+
# block(p, Lorem ipsum dolor it amet,
|
75
|
+
# block(strong, consectetur), adipiscing)
|
76
|
+
# ]
|
77
|
+
# Stack [ ]
|
78
|
+
#
|
79
|
+
# === Handling Broken HTML
|
80
|
+
# Agio tries to be sane when dealing with broken HTML.
|
81
|
+
#
|
82
|
+
# ==== Missing Block Elements
|
83
|
+
# It is possible to have missing block elements. In this case, an implicit
|
84
|
+
# "p" block element will be assumed.
|
85
|
+
#
|
86
|
+
# Lorem ipsum dolor sit amet,
|
87
|
+
#
|
88
|
+
# When encountered, this will be treated as:
|
89
|
+
#
|
90
|
+
# Stack [ block(p, Lorem ipsum dolor sit amet,) ]
|
91
|
+
#
|
92
|
+
# If a span element is encountered, an implicit "p" block element will still
|
93
|
+
# be assumed.
|
94
|
+
#
|
95
|
+
# <em>Lorem ipsum dolor sit amet,</em>
|
96
|
+
#
|
97
|
+
# Will produce:
|
98
|
+
#
|
99
|
+
# Stack [ block(p),
|
100
|
+
# block(em, Lorem ipsum dolor sit amet,)
|
101
|
+
# ]
|
102
|
+
#
|
103
|
+
# A special case exists for the "li", "dt", and "dd" tags; if they are
|
104
|
+
# encountered outside of lists ("ul", "ol", or "dl"), implicit list tags
|
105
|
+
# will be inserted ("ul" for "li"; "dl" for "dt" or "dd").
|
106
|
+
#
|
107
|
+
# ==== Unclosed Elements Inside a Block
|
108
|
+
# Things are a little more complex when dealing with broken HTML.
|
109
|
+
# Agio::Broker tries to deal with them sanely. Assume the following HTML:
|
110
|
+
#
|
111
|
+
# <p>Lorem ipsum dolor sit amet,
|
112
|
+
# <strong>consectetur adipiscing.</p>
|
113
|
+
#
|
114
|
+
# Before the closing "p" tag is observed, the stack looks like this:
|
115
|
+
#
|
116
|
+
# Stack [ block(p, Lorem ipsum dolor it amet),
|
117
|
+
# block(strong, consectetur adipiscing)
|
118
|
+
# ]
|
119
|
+
#
|
120
|
+
# When the "p" tag is observed, the Broker sees that the topmost block was
|
121
|
+
# not opened with a "p" tag, so it *implicitly* closes the topmost block as
|
122
|
+
# defined above, resulting in:
|
123
|
+
#
|
124
|
+
# Blocks[ block(p, Lorem ipsum dolor it amet,
|
125
|
+
# block(strong, consectetur adipiscing)
|
126
|
+
# ]
|
127
|
+
#
|
128
|
+
# ==== Unclosed Elements Between Blocks
|
129
|
+
# If an HTML element is not nestable (see below), then observing another
|
130
|
+
# element start of that type will cause the existing block to be closed and
|
131
|
+
# a new one to be opened. For example:
|
132
|
+
#
|
133
|
+
# <p>Lorem ipsum dolor sit amet,
|
134
|
+
# <p>consectetur adipiscing.</p>
|
135
|
+
#
|
136
|
+
# If the Broker has processed the the first "p" element:
|
137
|
+
#
|
138
|
+
# Blocks[ ]
|
139
|
+
# Stack [ block(p, Lorem ipsum dolor it amet,) ]
|
140
|
+
#
|
141
|
+
# When the second "p" opening tag is seen, Agio::Broker treats this as
|
142
|
+
# having an implicit closing "p" tag:
|
143
|
+
#
|
144
|
+
# Blocsk[ block(p, Lorem ipsum dolor it amet,) ]
|
145
|
+
# Stack [ block(p) ]
|
146
|
+
#
|
147
|
+
# This behaviour does not apply to a nestable element.
|
148
|
+
#
|
149
|
+
# === Nestable HTML Elements
|
150
|
+
# Some HTML elements are considered nestable by Agio::Broker. These
|
151
|
+
# currently include "blockquote", "ol", and "ul". When opening tags for
|
152
|
+
# these types are observed, these tags do not cause a current block of the
|
153
|
+
# same type to be shifted as outlined above. Nestable elements can contain
|
154
|
+
# other HTML block elements; "li" elements are special in that they cannot
|
155
|
+
# directly contain another "li", but they can contain other HTML block
|
156
|
+
# elements.
|
157
|
+
class Agio::Broker < Nokogiri::XML::SAX::Document
|
158
|
+
##
|
159
|
+
# The array of completed document subsections. Each entry is a root object
|
160
|
+
# for contained contents. When HTML parsing is complete, this attribute
|
161
|
+
# should be read for the structures that must be translated into Markdown.
|
162
|
+
attr_reader :blocks
|
163
|
+
|
164
|
+
##
|
165
|
+
# The operating stack.
|
166
|
+
attr_reader :stack
|
167
|
+
private :stack
|
168
|
+
|
169
|
+
##
|
170
|
+
# Errors found while parsing the document. For example,
|
171
|
+
# "<p><em>Foo</p>", will produce an error when the
|
172
|
+
# "</p>" is encountered because the "<em>" has not been
|
173
|
+
# closed. The logic for the Agio::Broker is such that this sort of error
|
174
|
+
# is not a problem; it implicitly closes the "<em>".
|
175
|
+
attr_reader :errors
|
176
|
+
##
|
177
|
+
# Warnings found while parsing the document.
|
178
|
+
attr_reader :warnings
|
179
|
+
|
180
|
+
def initialize
|
181
|
+
@blocks = []
|
182
|
+
@stack = []
|
183
|
+
@warnings = []
|
184
|
+
@errors = []
|
185
|
+
end
|
186
|
+
|
187
|
+
##
|
188
|
+
# Push the object onto the stack. Some objects may cause the stack to be
|
189
|
+
# modified in other ways. 'html' objects will be ignored. 'body' objects
|
190
|
+
# will cause 'head' objects to be popped from the stack and then be
|
191
|
+
# ignored.
|
192
|
+
def push(object)
|
193
|
+
object = Agio::Data.new(object) if object.kind_of? String
|
194
|
+
|
195
|
+
case object
|
196
|
+
when Agio::XMLDecl
|
197
|
+
# An XML declaration goes directly to the blocks array.
|
198
|
+
blocks.push object
|
199
|
+
when Agio::Data
|
200
|
+
# The stack will only ever contain Agio::Block objects; so if we get a
|
201
|
+
# Agio::Data object, we need push a Agio::Block onto the stack and
|
202
|
+
# then append the Agio::Data to the Agio::Block.
|
203
|
+
push Agio::Block.new('p') if stack.empty?
|
204
|
+
stack[-1].append object
|
205
|
+
when Agio::Block
|
206
|
+
# We don't care about the outer 'html' element; this would be
|
207
|
+
# discarded if we did, so let's explicitly skip it.
|
208
|
+
return nil if object.name == 'html'
|
209
|
+
|
210
|
+
# Similarly to the 'html element, we don't care about the 'body'
|
211
|
+
# element. We will discard it, but since we collect the 'head' tag, we
|
212
|
+
# need to make sure that any existing 'head' Agio::Block is popped off
|
213
|
+
# the stack as we ignore the 'body' element.
|
214
|
+
if object.name == 'body'
|
215
|
+
pop 'head'
|
216
|
+
return nil
|
217
|
+
end
|
218
|
+
|
219
|
+
# When the stack already has something on it, we need to see if we
|
220
|
+
# need to correct the stack; this may empty or otherwise modify the
|
221
|
+
# stack, so we need to do this before making any decisions about how
|
222
|
+
# to add to the stack.
|
223
|
+
unless stack.empty?
|
224
|
+
loop do
|
225
|
+
top = stack[-1]
|
226
|
+
|
227
|
+
break if top.nil?
|
228
|
+
|
229
|
+
if top.sibling_of? object
|
230
|
+
# If the top item is a sibling, pop the stack over.
|
231
|
+
pop
|
232
|
+
next
|
233
|
+
elsif (top.can_contain?(object) ||
|
234
|
+
(object.li? and top.can_contain?('ul')) ||
|
235
|
+
(object.definition? and top.can_contain?('dl')))
|
236
|
+
# If the top item in the stack can contain the current object,
|
237
|
+
# keep pushing down. Deal with special cases like <li>, <dt>,
|
238
|
+
# and <dl> which can only be contained in <ul> and <dl>,
|
239
|
+
# respectively but where we implicitly insert blocks when we get
|
240
|
+
# the <li> or definition block without the outer container.
|
241
|
+
break
|
242
|
+
elsif top.inline? and not object.inline?
|
243
|
+
# If the top item is a span object and the current item is not a
|
244
|
+
# span object, pop the stack over.
|
245
|
+
pop
|
246
|
+
elsif top.dl? and not object.definition?
|
247
|
+
# If the top item is a definition list, but the current object
|
248
|
+
# isn't a definition item, pop.
|
249
|
+
pop
|
250
|
+
elsif top.ul_ol? and not object.li?
|
251
|
+
# If the top item is a list, but the current object isn't a list
|
252
|
+
# item, pop.
|
253
|
+
pop
|
254
|
+
elsif top.block? and object.block?
|
255
|
+
# If the top item is a block and the object is a block, pop.
|
256
|
+
pop
|
257
|
+
elsif object.inline?
|
258
|
+
# If the object is a span object, keep pushing down.
|
259
|
+
break
|
260
|
+
end
|
261
|
+
|
262
|
+
break if stack.empty? or top.object_id == stack[-1].object_id
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
if stack.empty?
|
267
|
+
if object.li?
|
268
|
+
push Agio::Block.new("ul")
|
269
|
+
elsif object.definition?
|
270
|
+
push Agio::Block.new("dl")
|
271
|
+
elsif object.inline?
|
272
|
+
push Agio::Block.new("p")
|
273
|
+
end
|
274
|
+
else
|
275
|
+
top = stack[-1]
|
276
|
+
|
277
|
+
if object.li? and not top.ul_ol?
|
278
|
+
# If the top item isn't a "ul" or "ol" element, push that on
|
279
|
+
# first.
|
280
|
+
push Agio::Block.new("ul")
|
281
|
+
elsif object.definition? and not top.dl?
|
282
|
+
# If the top item is a definition item ("dt" or "dd") and the top
|
283
|
+
# item isn't one of those or a definition list ("dl"), push that
|
284
|
+
# on first.
|
285
|
+
push Agio::Block.new("dl")
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
stack.push object
|
290
|
+
else
|
291
|
+
raise ArgumentError, "Unknown object type being pushed."
|
292
|
+
end
|
293
|
+
|
294
|
+
object
|
295
|
+
end
|
296
|
+
private :push
|
297
|
+
|
298
|
+
##
|
299
|
+
# Pop one or more blocks from the stack and process the popped blocks.
|
300
|
+
# Returns the last block popped.
|
301
|
+
#
|
302
|
+
# === Pop Control
|
303
|
+
# If +until_element+ is +nil+, only the top item on the stack will be
|
304
|
+
# popped and processed.
|
305
|
+
#
|
306
|
+
# If +until_element+ is *not* +nil+, the stack will be popped and
|
307
|
+
# processed until either the stack is empty or the popped item's block
|
308
|
+
# name matches the value of +until_element+.
|
309
|
+
#
|
310
|
+
# === Agio::Block Processing
|
311
|
+
# For each block popped off the stack:
|
312
|
+
#
|
313
|
+
# 1. If the stack is empty, append the block to the #blocks array.
|
314
|
+
# 2. If the stack is not empty, append the block to the top item in the
|
315
|
+
# stack.
|
316
|
+
def pop(until_element = nil, options = nil)
|
317
|
+
return nil if stack.empty?
|
318
|
+
|
319
|
+
top = nil
|
320
|
+
|
321
|
+
if until_element.nil?
|
322
|
+
top = stack.pop
|
323
|
+
|
324
|
+
if stack.empty?
|
325
|
+
blocks.push top
|
326
|
+
else
|
327
|
+
stack[-1].append top
|
328
|
+
end
|
329
|
+
else
|
330
|
+
loop do
|
331
|
+
return top if stack.empty?
|
332
|
+
|
333
|
+
top = stack.pop
|
334
|
+
|
335
|
+
if stack.empty?
|
336
|
+
blocks.push top
|
337
|
+
break
|
338
|
+
end
|
339
|
+
|
340
|
+
stack[-1].append top
|
341
|
+
|
342
|
+
if top.name == until_element
|
343
|
+
break if options.nil?
|
344
|
+
if ((top.options[:prefix] == options[:prefix]) &&
|
345
|
+
(top.options[:uri] == options[:uri]))
|
346
|
+
break
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
top
|
353
|
+
end
|
354
|
+
private :pop
|
355
|
+
|
356
|
+
def cdata_block(string)
|
357
|
+
push Agio::CData.new(string)
|
358
|
+
end
|
359
|
+
|
360
|
+
def characters(string)
|
361
|
+
return if (stack.empty? or stack[-1].pre?) and string =~ /\A\s+\Z/
|
362
|
+
push Agio::Data.new(string)
|
363
|
+
end
|
364
|
+
|
365
|
+
def comment(string)
|
366
|
+
push Agio::Comment.new(string)
|
367
|
+
end
|
368
|
+
|
369
|
+
def end_document
|
370
|
+
pop while not stack.empty?
|
371
|
+
end
|
372
|
+
|
373
|
+
def end_element(name)
|
374
|
+
pop(name)
|
375
|
+
end
|
376
|
+
|
377
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
378
|
+
pop(name, :prefix => prefix, :uri => uri)
|
379
|
+
end
|
380
|
+
|
381
|
+
def error(string)
|
382
|
+
errors << string
|
383
|
+
end
|
384
|
+
|
385
|
+
# When we
|
386
|
+
def start_document
|
387
|
+
pop while not stack.empty?
|
388
|
+
end
|
389
|
+
|
390
|
+
def start_element(name, attrs = [])
|
391
|
+
options = if attrs.empty?
|
392
|
+
{ }
|
393
|
+
else
|
394
|
+
{ :attrs => Hash[attrs] }
|
395
|
+
end
|
396
|
+
|
397
|
+
push Agio::Block.new(name, options)
|
398
|
+
end
|
399
|
+
|
400
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
401
|
+
push Agio::Block.new(name, :attrs => attrs, :prefix => prefix,
|
402
|
+
:uri => uri, :ns => ns)
|
403
|
+
end
|
404
|
+
|
405
|
+
def warning(string)
|
406
|
+
warnings << string
|
407
|
+
end
|
408
|
+
|
409
|
+
def xmldecl(version, encoding, standalone)
|
410
|
+
push Agio::XMLDecl.new(:version => version, :encoding => encoding,
|
411
|
+
:standalone => standalone)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
# vim: ft=ruby
|