asciidoctor-asciidoc 0.0.2.dev
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/asciidoctor-asciidoc/const.rb +8 -0
- data/lib/asciidoctor-asciidoc/conv-node.rb +74 -0
- data/lib/asciidoctor-asciidoc/linked-list.rb +126 -0
- data/lib/asciidoctor-asciidoc/unescape.rb +407 -0
- data/lib/asciidoctor-asciidoc/version.rb +3 -0
- data/lib/asciidoctor-asciidoc.rb +925 -0
- metadata +128 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5c3a7406ea5ca33ac04d2f852f99babb9288d77ad6218badf24c601ace327b10
|
4
|
+
data.tar.gz: cce24466950c5d6a94b796813ba43ec8781e5bcc5d2954720fb80f864d872e47
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c0e08c4a75fb09c8a7be40b95f2dd44d0b8ecdca7831b5b7bccb7a39d4fda41152c82b3b91b6312648255fe1a44ed47a3eddc64aa9eb2b38e2aa6ccd8abdca3e
|
7
|
+
data.tar.gz: b12afcda64eaf2dc1b71e432f8df7ca2cdbb815146a598d69c5a4386313a1a4635f7279fd59a9875a99b156bed2b15299c20170b0a9985a152ff273c998d63ce
|
@@ -0,0 +1,74 @@
|
|
1
|
+
|
2
|
+
class AsciiDoctorAsciiDocNode
|
3
|
+
|
4
|
+
TYPE_NODE = :node
|
5
|
+
TYPE_TEXT = :text
|
6
|
+
|
7
|
+
attr_reader :parent
|
8
|
+
attr_reader :children
|
9
|
+
attr_reader :node
|
10
|
+
attr_reader :transform
|
11
|
+
attr_reader :text
|
12
|
+
attr_reader :type
|
13
|
+
|
14
|
+
attr_accessor :is_list
|
15
|
+
attr_accessor :is_anchor
|
16
|
+
attr_accessor :anchor
|
17
|
+
|
18
|
+
def initialize(parent:, node: nil, transform: nil, text: nil)
|
19
|
+
super()
|
20
|
+
@parent = parent
|
21
|
+
@node = node
|
22
|
+
@transform = transform
|
23
|
+
@text = text
|
24
|
+
|
25
|
+
if text.nil?
|
26
|
+
@children = []
|
27
|
+
@type = TYPE_NODE
|
28
|
+
else
|
29
|
+
@children = nil
|
30
|
+
@type = TYPE_TEXT
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
def add_child(node)
|
35
|
+
@children.push(node)
|
36
|
+
end
|
37
|
+
|
38
|
+
def parent_is_list?
|
39
|
+
@parent && @parent.is_list
|
40
|
+
end
|
41
|
+
|
42
|
+
def prev_sibling
|
43
|
+
|
44
|
+
return nil unless @parent
|
45
|
+
|
46
|
+
children = parent.children
|
47
|
+
prev = nil
|
48
|
+
children.each do |child|
|
49
|
+
return prev if child === self
|
50
|
+
prev = child
|
51
|
+
end
|
52
|
+
|
53
|
+
raise "I am not a child of my parent!"
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
def next_child
|
58
|
+
|
59
|
+
idx = @children.length
|
60
|
+
[yield, idx == @children.length ? nil : @children[idx]]
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
def add_text_child(text)
|
65
|
+
|
66
|
+
@children.push(AsciiDoctorAsciiDocNode.new(parent: self, text: text))
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
def is_text?
|
71
|
+
@type == TYPE_TEXT
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
class LinkedList
|
2
|
+
|
3
|
+
attr_reader :head
|
4
|
+
attr_reader :tail
|
5
|
+
|
6
|
+
def initialize(copy_from = nil, copy_fun = nil)
|
7
|
+
super()
|
8
|
+
@head = nil
|
9
|
+
@tail = nil
|
10
|
+
|
11
|
+
if copy_from
|
12
|
+
copy_from.each do |item|
|
13
|
+
contents = item.contents
|
14
|
+
contents = copy_fun.call(contents) if copy_fun
|
15
|
+
add(contents)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_after(el, contents)
|
22
|
+
add_between(el, el.next, contents)
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_before(el, contents)
|
26
|
+
add_between(el.prev, el, contents)
|
27
|
+
end
|
28
|
+
|
29
|
+
# adds at the end
|
30
|
+
def add(contents)
|
31
|
+
add_between(@tail, nil, contents)
|
32
|
+
end
|
33
|
+
|
34
|
+
# adds at the beginning
|
35
|
+
def insert(contents)
|
36
|
+
add_between(nil, @head, contents)
|
37
|
+
end
|
38
|
+
|
39
|
+
def each
|
40
|
+
return unless block_given?
|
41
|
+
item = @head
|
42
|
+
until item.nil?
|
43
|
+
# cache next value in case the item's deleted
|
44
|
+
next_el = item.next
|
45
|
+
yield item
|
46
|
+
item = next_el
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def delete(item)
|
51
|
+
if item.list
|
52
|
+
if @head === item
|
53
|
+
@head = item.next
|
54
|
+
else
|
55
|
+
item.prev.next = item.next
|
56
|
+
end
|
57
|
+
if @tail === item
|
58
|
+
@tail = item.prev
|
59
|
+
else
|
60
|
+
item.next.prev = item.prev
|
61
|
+
end
|
62
|
+
item.list = nil
|
63
|
+
item.next = nil
|
64
|
+
item.prev = nil
|
65
|
+
end
|
66
|
+
item.contents
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def add_between(e_prev, e_next, contents)
|
72
|
+
|
73
|
+
if contents.is_a?(LinkedListItem)
|
74
|
+
item = contents
|
75
|
+
raise "Attempting to add a list item that is already attached to a list" if item.list
|
76
|
+
item.list = self
|
77
|
+
item.next = e_next
|
78
|
+
item.prev = e_prev
|
79
|
+
else
|
80
|
+
item = LinkedListItem.new(contents, self, e_next, e_prev)
|
81
|
+
end
|
82
|
+
|
83
|
+
if e_prev.nil?
|
84
|
+
@head = item
|
85
|
+
else
|
86
|
+
e_prev.next = item
|
87
|
+
end
|
88
|
+
|
89
|
+
if e_next.nil?
|
90
|
+
@tail = item
|
91
|
+
else
|
92
|
+
e_next.prev = item
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
class LinkedListItem
|
100
|
+
|
101
|
+
attr_accessor :next
|
102
|
+
attr_accessor :prev
|
103
|
+
attr_accessor :list
|
104
|
+
attr_accessor :contents
|
105
|
+
|
106
|
+
def initialize(contents, list, next_item, prev_item)
|
107
|
+
super()
|
108
|
+
@list = list
|
109
|
+
@next = next_item
|
110
|
+
@prev = prev_item
|
111
|
+
@contents = contents
|
112
|
+
end
|
113
|
+
|
114
|
+
def add_after(contents)
|
115
|
+
@list.add_after(self, contents)
|
116
|
+
end
|
117
|
+
|
118
|
+
def add_before(contents)
|
119
|
+
@list.add_before(self, contents)
|
120
|
+
end
|
121
|
+
|
122
|
+
def delete
|
123
|
+
list.delete(self)
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
@@ -0,0 +1,407 @@
|
|
1
|
+
|
2
|
+
require 'asciidoctor-asciidoc/linked-list'
|
3
|
+
require 'asciidoctor-asciidoc/const'
|
4
|
+
|
5
|
+
class Unescape
|
6
|
+
|
7
|
+
include AsciiDoctorAsciiDocConst
|
8
|
+
|
9
|
+
FEED_MORE = :more
|
10
|
+
FEED_NO_MATCH = :no_match
|
11
|
+
FEED_DONE = :done
|
12
|
+
|
13
|
+
class Context
|
14
|
+
|
15
|
+
attr_accessor :last_char
|
16
|
+
attr_accessor :buffer_len
|
17
|
+
attr_accessor :buffer
|
18
|
+
attr_accessor :curve_quote_index
|
19
|
+
attr_accessor :pos
|
20
|
+
attr_accessor :str
|
21
|
+
attr_accessor :out
|
22
|
+
attr_reader :encode
|
23
|
+
|
24
|
+
def initialize(str, encode = false)
|
25
|
+
super()
|
26
|
+
@str = str
|
27
|
+
@curve_quote_index = []
|
28
|
+
@len = str.length
|
29
|
+
@pos = 0
|
30
|
+
@out = ''
|
31
|
+
@encode = encode
|
32
|
+
reset
|
33
|
+
end
|
34
|
+
|
35
|
+
def reset
|
36
|
+
@buffer = ''
|
37
|
+
@buffer_len = 0
|
38
|
+
end
|
39
|
+
|
40
|
+
def has_next_char?
|
41
|
+
@pos < @len
|
42
|
+
end
|
43
|
+
|
44
|
+
def next_char
|
45
|
+
@str[@pos]
|
46
|
+
end
|
47
|
+
|
48
|
+
def prev_char
|
49
|
+
@str[@pos-2]
|
50
|
+
end
|
51
|
+
|
52
|
+
def advance
|
53
|
+
@last_char = next_char
|
54
|
+
@buffer << @last_char
|
55
|
+
@pos+=1
|
56
|
+
@buffer_len += 1
|
57
|
+
end
|
58
|
+
|
59
|
+
def back_track(pattern)
|
60
|
+
@pos -= buffer_len
|
61
|
+
return @str[@pos..@pos + pattern.len - 1] if pattern.len > 0
|
62
|
+
''
|
63
|
+
end
|
64
|
+
|
65
|
+
def forward(pattern)
|
66
|
+
@pos += pattern.len
|
67
|
+
end
|
68
|
+
|
69
|
+
def remainder
|
70
|
+
str[pos..-1]
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
class Pattern
|
76
|
+
|
77
|
+
include AsciiDoctorAsciiDocConst
|
78
|
+
|
79
|
+
attr_reader :len
|
80
|
+
|
81
|
+
def initialize(context)
|
82
|
+
super()
|
83
|
+
@ctx = context
|
84
|
+
reset
|
85
|
+
end
|
86
|
+
|
87
|
+
def reset
|
88
|
+
@done = false
|
89
|
+
@len = 0
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
def is_done?(ended)
|
94
|
+
@done
|
95
|
+
end
|
96
|
+
|
97
|
+
def encode(piece)
|
98
|
+
r = produce(piece)
|
99
|
+
if @ctx.encode
|
100
|
+
encode_bin(r)
|
101
|
+
else
|
102
|
+
r
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def encode_bin(text)
|
107
|
+
out = "#{ESC}"
|
108
|
+
text.each_char do |chr|
|
109
|
+
chr = chr.ord
|
110
|
+
raise "Non-ASCII character #{chr}" if chr > 255
|
111
|
+
out << ("%02x" % chr)
|
112
|
+
end
|
113
|
+
out << ESC_E
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
class StringPattern < Pattern
|
119
|
+
|
120
|
+
def initialize(context, pattern, replacement)
|
121
|
+
super(context)
|
122
|
+
@pattern = pattern
|
123
|
+
@max_len = pattern.length
|
124
|
+
@replacement = replacement
|
125
|
+
end
|
126
|
+
|
127
|
+
def feed
|
128
|
+
|
129
|
+
return FEED_NO_MATCH if @ctx.last_char != @pattern[@len]
|
130
|
+
@len += 1
|
131
|
+
if @len == @max_len
|
132
|
+
@done = true
|
133
|
+
return FEED_DONE
|
134
|
+
end
|
135
|
+
FEED_MORE
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
def produce(piece)
|
140
|
+
@replacement
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
class CurveQuoteStart < StringPattern
|
146
|
+
|
147
|
+
def initialize(context)
|
148
|
+
super(context, "‘", %('`))
|
149
|
+
end
|
150
|
+
|
151
|
+
def produce(piece)
|
152
|
+
@ctx.curve_quote_index << { :pos=>@ctx.out.length, :start=>true}
|
153
|
+
super
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
class CurveQuoteEnd < StringPattern
|
159
|
+
|
160
|
+
def initialize(context)
|
161
|
+
super(context, "’", %(`'))
|
162
|
+
end
|
163
|
+
|
164
|
+
def produce(piece)
|
165
|
+
@ctx.curve_quote_index << { :pos=>@ctx.out.length, :start=>false}
|
166
|
+
super
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
class SpecialGobbler < Pattern
|
172
|
+
|
173
|
+
# these are the character that we care about escaping
|
174
|
+
CHAR_SET = %w[* _ ` # ~ ^].to_set
|
175
|
+
VOID_SET = /[[:alpha:][:digit:]:;}_]/
|
176
|
+
|
177
|
+
def initialize(context)
|
178
|
+
super
|
179
|
+
end
|
180
|
+
|
181
|
+
def feed
|
182
|
+
|
183
|
+
accept_char = CHAR_SET.include?(@ctx.last_char)
|
184
|
+
unless accept_char
|
185
|
+
return FEED_NO_MATCH unless @len > 0
|
186
|
+
@done = true
|
187
|
+
return FEED_DONE
|
188
|
+
end
|
189
|
+
|
190
|
+
# we don't need to escape if:
|
191
|
+
# - the character is singular
|
192
|
+
# - the character directly follows:
|
193
|
+
# -- a colon, semicolon, or closing curly bracket
|
194
|
+
# -- a letter, number, or underscore
|
195
|
+
# -- there is a space from this mark to the next mark.
|
196
|
+
# - but we need to make exceptions for first character being '*' because of lists
|
197
|
+
|
198
|
+
gobble = -> () do
|
199
|
+
@len+=1
|
200
|
+
return FEED_MORE
|
201
|
+
end
|
202
|
+
|
203
|
+
# if we have accumulated something already continue doing so no matter what
|
204
|
+
return gobble.call if @len > 0
|
205
|
+
|
206
|
+
# list exception - gobble up chars if we are consuming from the
|
207
|
+
# start of text
|
208
|
+
return gobble.call if @ctx.buffer_len == @ctx.pos
|
209
|
+
|
210
|
+
return gobble.call if @ctx.has_next_char? && @ctx.next_char == @ctx.last_char
|
211
|
+
|
212
|
+
# here we are guaranteed that there is a character before ours
|
213
|
+
return FEED_NO_MATCH if @ctx.prev_char.match?(VOID_SET)
|
214
|
+
|
215
|
+
# if there are no more control characters, no need to escape
|
216
|
+
next_char = @ctx.remainder.index(@ctx.last_char)
|
217
|
+
return FEED_NO_MATCH if next_char.nil?
|
218
|
+
next_ws = @ctx.remainder.index(/[[:space:]]/)
|
219
|
+
return FEED_NO_MATCH if !next_ws.nil? && next_ws < next_char
|
220
|
+
|
221
|
+
gobble.call
|
222
|
+
|
223
|
+
end
|
224
|
+
|
225
|
+
def is_done?(ended)
|
226
|
+
(ended && @len > 0) || super
|
227
|
+
end
|
228
|
+
|
229
|
+
def produce(piece)
|
230
|
+
%(pass:[#{piece}])
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
def self.unescape(str, encode = false)
|
236
|
+
|
237
|
+
# Note - it's generally OK for us to leave the &#xxxx; sequences in the
|
238
|
+
# Asciidoc, because they are re-rendered as is. We try our best to resolve
|
239
|
+
# these reference, but if we are unsure on how to, we can leave them in.
|
240
|
+
|
241
|
+
# $TODO I'm sure there is more to this than that.
|
242
|
+
# quote_release = Set[
|
243
|
+
# ',', ';', '"', '.', '?', '!', ' ', '\n'
|
244
|
+
# ]
|
245
|
+
|
246
|
+
ctx = Context.new(str, encode)
|
247
|
+
|
248
|
+
all = LinkedList.new
|
249
|
+
# https://docs.asciidoctor.org/asciidoc/latest/subs/special-characters/
|
250
|
+
all.add(StringPattern.new(ctx, "<", "<"))
|
251
|
+
all.add(StringPattern.new(ctx, ">", ">"))
|
252
|
+
all.add(StringPattern.new(ctx, "&", "&"))
|
253
|
+
# https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/
|
254
|
+
all.add(StringPattern.new(ctx, "“", %("`)))
|
255
|
+
all.add(StringPattern.new(ctx, "”", %(`")))
|
256
|
+
all.add(CurveQuoteEnd.new(ctx))
|
257
|
+
all.add(CurveQuoteStart.new(ctx))
|
258
|
+
# https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/
|
259
|
+
all.add(StringPattern.new(ctx, "©", "(C)"))
|
260
|
+
all.add(StringPattern.new(ctx, "®", "(R)"))
|
261
|
+
all.add(StringPattern.new(ctx, "™", "(TM)"))
|
262
|
+
all.add(StringPattern.new(ctx, "—", "--"))
|
263
|
+
all.add(StringPattern.new(ctx, "—​", "--"))
|
264
|
+
all.add(StringPattern.new(ctx, " — ", " -- "))
|
265
|
+
all.add(StringPattern.new(ctx, "…", "..."))
|
266
|
+
all.add(StringPattern.new(ctx, "…​", "..."))
|
267
|
+
all.add(StringPattern.new(ctx, "→", "->"))
|
268
|
+
all.add(StringPattern.new(ctx, "⇒", "=>"))
|
269
|
+
all.add(StringPattern.new(ctx, "←", "<-"))
|
270
|
+
all.add(StringPattern.new(ctx, "⇐", "<="))
|
271
|
+
|
272
|
+
all.add(SpecialGobbler.new(ctx))
|
273
|
+
|
274
|
+
active = nil
|
275
|
+
|
276
|
+
reset = -> do
|
277
|
+
ctx.reset
|
278
|
+
active = LinkedList.new(all, -> (p) do
|
279
|
+
p.reset
|
280
|
+
end)
|
281
|
+
end
|
282
|
+
|
283
|
+
reset.call
|
284
|
+
|
285
|
+
while true do
|
286
|
+
|
287
|
+
ended = !ctx.has_next_char?
|
288
|
+
ctx.advance unless ended
|
289
|
+
|
290
|
+
done = nil
|
291
|
+
has_more = false
|
292
|
+
|
293
|
+
active.each do |item|
|
294
|
+
|
295
|
+
pattern = item.contents
|
296
|
+
|
297
|
+
record_done = Proc.new do
|
298
|
+
if done.nil?
|
299
|
+
done = item
|
300
|
+
else
|
301
|
+
if done.contents.len > item.contents.len
|
302
|
+
active.delete(item)
|
303
|
+
else
|
304
|
+
active.delete(done)
|
305
|
+
done = item
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
if ended
|
311
|
+
|
312
|
+
record_done.call if pattern.is_done?(true)
|
313
|
+
|
314
|
+
else
|
315
|
+
|
316
|
+
if pattern.is_done?(false)
|
317
|
+
record_done.call
|
318
|
+
else
|
319
|
+
feed = pattern.feed
|
320
|
+
if feed == FEED_NO_MATCH
|
321
|
+
active.delete(item)
|
322
|
+
next
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# it's either "more" or "done" now
|
327
|
+
record_done.call if feed == FEED_DONE
|
328
|
+
has_more = true if feed == FEED_MORE
|
329
|
+
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|
333
|
+
|
334
|
+
# if we found some more patterns to match
|
335
|
+
# we need to continue
|
336
|
+
next if has_more
|
337
|
+
|
338
|
+
if done.nil?
|
339
|
+
|
340
|
+
# there is nothing done, so we just flush the
|
341
|
+
# buffer out
|
342
|
+
# TODO: this should move to the next char, and not
|
343
|
+
# jump over the entire buffer.
|
344
|
+
ctx.out << ctx.buffer
|
345
|
+
|
346
|
+
else
|
347
|
+
|
348
|
+
pattern = done.contents
|
349
|
+
piece = ctx.back_track(pattern)
|
350
|
+
ctx.out << pattern.encode(piece)
|
351
|
+
ctx.forward(pattern)
|
352
|
+
|
353
|
+
end
|
354
|
+
|
355
|
+
reset.call
|
356
|
+
|
357
|
+
break unless ctx.has_next_char?
|
358
|
+
|
359
|
+
end
|
360
|
+
|
361
|
+
out2 = ''
|
362
|
+
last_pos = 0
|
363
|
+
|
364
|
+
# deal with apostrophes
|
365
|
+
has_start = false
|
366
|
+
q_len = ctx.encode ? 6 : 2
|
367
|
+
(0..ctx.curve_quote_index.length-1).each do |idx|
|
368
|
+
item = ctx.curve_quote_index[idx]
|
369
|
+
|
370
|
+
item_pos = item[:pos]
|
371
|
+
out2 << ctx.out[last_pos..item_pos-1] unless item_pos <= 0
|
372
|
+
last_pos = item_pos
|
373
|
+
|
374
|
+
if item[:start]
|
375
|
+
out2 << ctx.out[last_pos..last_pos + q_len - 1]
|
376
|
+
has_start = true
|
377
|
+
else
|
378
|
+
# ok, then this is the end quote,
|
379
|
+
# or it's in the middle of something
|
380
|
+
# or a simple one
|
381
|
+
# We need to replace it with the single apostrophe
|
382
|
+
# if we are not ending the quote. We are not ending the quote
|
383
|
+
# if there is an end to it later, but before any new quote starts.
|
384
|
+
replace = !has_start
|
385
|
+
unless replace
|
386
|
+
replace = ctx.curve_quote_index.length-1 != idx && ctx.curve_quote_index[idx+1][:start]
|
387
|
+
end
|
388
|
+
|
389
|
+
if replace
|
390
|
+
out2 << (ctx.encode ? "#{ESC}27#{ESC_E}" : "'")
|
391
|
+
else
|
392
|
+
has_start = false
|
393
|
+
out2 << (ctx.out[last_pos..last_pos + q_len - 1])
|
394
|
+
end
|
395
|
+
|
396
|
+
end
|
397
|
+
|
398
|
+
last_pos += q_len
|
399
|
+
|
400
|
+
end
|
401
|
+
|
402
|
+
return ctx.out if last_pos == 0
|
403
|
+
out2 << ctx.out[last_pos..-1]
|
404
|
+
|
405
|
+
end
|
406
|
+
|
407
|
+
end
|