asciidoctor-asciidoc 0.0.2.dev

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5c3a7406ea5ca33ac04d2f852f99babb9288d77ad6218badf24c601ace327b10
4
+ data.tar.gz: cce24466950c5d6a94b796813ba43ec8781e5bcc5d2954720fb80f864d872e47
5
+ SHA512:
6
+ metadata.gz: c0e08c4a75fb09c8a7be40b95f2dd44d0b8ecdca7831b5b7bccb7a39d4fda41152c82b3b91b6312648255fe1a44ed47a3eddc64aa9eb2b38e2aa6ccd8abdca3e
7
+ data.tar.gz: b12afcda64eaf2dc1b71e432f8df7ca2cdbb815146a598d69c5a4386313a1a4635f7279fd59a9875a99b156bed2b15299c20170b0a9985a152ff273c998d63ce
@@ -0,0 +1,8 @@
1
+
2
+ module AsciiDoctorAsciiDocConst
3
+
4
+ ESC = "\x00"
5
+ ESC_E = "x"
6
+
7
+ end
8
+
@@ -0,0 +1,74 @@
1
+
2
+ class AsciiDoctorAsciiDocNode
3
+
4
+ TYPE_NODE = :node
5
+ TYPE_TEXT = :text
6
+
7
+ attr_reader :parent
8
+ attr_reader :children
9
+ attr_reader :node
10
+ attr_reader :transform
11
+ attr_reader :text
12
+ attr_reader :type
13
+
14
+ attr_accessor :is_list
15
+ attr_accessor :is_anchor
16
+ attr_accessor :anchor
17
+
18
+ def initialize(parent:, node: nil, transform: nil, text: nil)
19
+ super()
20
+ @parent = parent
21
+ @node = node
22
+ @transform = transform
23
+ @text = text
24
+
25
+ if text.nil?
26
+ @children = []
27
+ @type = TYPE_NODE
28
+ else
29
+ @children = nil
30
+ @type = TYPE_TEXT
31
+ end
32
+
33
+ end
34
+ def add_child(node)
35
+ @children.push(node)
36
+ end
37
+
38
+ def parent_is_list?
39
+ @parent && @parent.is_list
40
+ end
41
+
42
+ def prev_sibling
43
+
44
+ return nil unless @parent
45
+
46
+ children = parent.children
47
+ prev = nil
48
+ children.each do |child|
49
+ return prev if child === self
50
+ prev = child
51
+ end
52
+
53
+ raise "I am not a child of my parent!"
54
+
55
+ end
56
+
57
+ def next_child
58
+
59
+ idx = @children.length
60
+ [yield, idx == @children.length ? nil : @children[idx]]
61
+
62
+ end
63
+
64
+ def add_text_child(text)
65
+
66
+ @children.push(AsciiDoctorAsciiDocNode.new(parent: self, text: text))
67
+
68
+ end
69
+
70
+ def is_text?
71
+ @type == TYPE_TEXT
72
+ end
73
+
74
+ end
@@ -0,0 +1,126 @@
1
+ class LinkedList
2
+
3
+ attr_reader :head
4
+ attr_reader :tail
5
+
6
+ def initialize(copy_from = nil, copy_fun = nil)
7
+ super()
8
+ @head = nil
9
+ @tail = nil
10
+
11
+ if copy_from
12
+ copy_from.each do |item|
13
+ contents = item.contents
14
+ contents = copy_fun.call(contents) if copy_fun
15
+ add(contents)
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ def add_after(el, contents)
22
+ add_between(el, el.next, contents)
23
+ end
24
+
25
+ def add_before(el, contents)
26
+ add_between(el.prev, el, contents)
27
+ end
28
+
29
+ # adds at the end
30
+ def add(contents)
31
+ add_between(@tail, nil, contents)
32
+ end
33
+
34
+ # adds at the beginning
35
+ def insert(contents)
36
+ add_between(nil, @head, contents)
37
+ end
38
+
39
+ def each
40
+ return unless block_given?
41
+ item = @head
42
+ until item.nil?
43
+ # cache next value in case the item's deleted
44
+ next_el = item.next
45
+ yield item
46
+ item = next_el
47
+ end
48
+ end
49
+
50
+ def delete(item)
51
+ if item.list
52
+ if @head === item
53
+ @head = item.next
54
+ else
55
+ item.prev.next = item.next
56
+ end
57
+ if @tail === item
58
+ @tail = item.prev
59
+ else
60
+ item.next.prev = item.prev
61
+ end
62
+ item.list = nil
63
+ item.next = nil
64
+ item.prev = nil
65
+ end
66
+ item.contents
67
+ end
68
+
69
+ private
70
+
71
+ def add_between(e_prev, e_next, contents)
72
+
73
+ if contents.is_a?(LinkedListItem)
74
+ item = contents
75
+ raise "Attempting to add a list item that is already attached to a list" if item.list
76
+ item.list = self
77
+ item.next = e_next
78
+ item.prev = e_prev
79
+ else
80
+ item = LinkedListItem.new(contents, self, e_next, e_prev)
81
+ end
82
+
83
+ if e_prev.nil?
84
+ @head = item
85
+ else
86
+ e_prev.next = item
87
+ end
88
+
89
+ if e_next.nil?
90
+ @tail = item
91
+ else
92
+ e_next.prev = item
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ class LinkedListItem
100
+
101
+ attr_accessor :next
102
+ attr_accessor :prev
103
+ attr_accessor :list
104
+ attr_accessor :contents
105
+
106
+ def initialize(contents, list, next_item, prev_item)
107
+ super()
108
+ @list = list
109
+ @next = next_item
110
+ @prev = prev_item
111
+ @contents = contents
112
+ end
113
+
114
+ def add_after(contents)
115
+ @list.add_after(self, contents)
116
+ end
117
+
118
+ def add_before(contents)
119
+ @list.add_before(self, contents)
120
+ end
121
+
122
+ def delete
123
+ list.delete(self)
124
+ end
125
+
126
+ end
@@ -0,0 +1,407 @@
1
+
2
+ require 'asciidoctor-asciidoc/linked-list'
3
+ require 'asciidoctor-asciidoc/const'
4
+
5
+ class Unescape
6
+
7
+ include AsciiDoctorAsciiDocConst
8
+
9
+ FEED_MORE = :more
10
+ FEED_NO_MATCH = :no_match
11
+ FEED_DONE = :done
12
+
13
+ class Context
14
+
15
+ attr_accessor :last_char
16
+ attr_accessor :buffer_len
17
+ attr_accessor :buffer
18
+ attr_accessor :curve_quote_index
19
+ attr_accessor :pos
20
+ attr_accessor :str
21
+ attr_accessor :out
22
+ attr_reader :encode
23
+
24
+ def initialize(str, encode = false)
25
+ super()
26
+ @str = str
27
+ @curve_quote_index = []
28
+ @len = str.length
29
+ @pos = 0
30
+ @out = ''
31
+ @encode = encode
32
+ reset
33
+ end
34
+
35
+ def reset
36
+ @buffer = ''
37
+ @buffer_len = 0
38
+ end
39
+
40
+ def has_next_char?
41
+ @pos < @len
42
+ end
43
+
44
+ def next_char
45
+ @str[@pos]
46
+ end
47
+
48
+ def prev_char
49
+ @str[@pos-2]
50
+ end
51
+
52
+ def advance
53
+ @last_char = next_char
54
+ @buffer << @last_char
55
+ @pos+=1
56
+ @buffer_len += 1
57
+ end
58
+
59
+ def back_track(pattern)
60
+ @pos -= buffer_len
61
+ return @str[@pos..@pos + pattern.len - 1] if pattern.len > 0
62
+ ''
63
+ end
64
+
65
+ def forward(pattern)
66
+ @pos += pattern.len
67
+ end
68
+
69
+ def remainder
70
+ str[pos..-1]
71
+ end
72
+
73
+ end
74
+
75
+ class Pattern
76
+
77
+ include AsciiDoctorAsciiDocConst
78
+
79
+ attr_reader :len
80
+
81
+ def initialize(context)
82
+ super()
83
+ @ctx = context
84
+ reset
85
+ end
86
+
87
+ def reset
88
+ @done = false
89
+ @len = 0
90
+ self
91
+ end
92
+
93
+ def is_done?(ended)
94
+ @done
95
+ end
96
+
97
+ def encode(piece)
98
+ r = produce(piece)
99
+ if @ctx.encode
100
+ encode_bin(r)
101
+ else
102
+ r
103
+ end
104
+ end
105
+
106
+ def encode_bin(text)
107
+ out = "#{ESC}"
108
+ text.each_char do |chr|
109
+ chr = chr.ord
110
+ raise "Non-ASCII character #{chr}" if chr > 255
111
+ out << ("%02x" % chr)
112
+ end
113
+ out << ESC_E
114
+ end
115
+
116
+ end
117
+
118
+ class StringPattern < Pattern
119
+
120
+ def initialize(context, pattern, replacement)
121
+ super(context)
122
+ @pattern = pattern
123
+ @max_len = pattern.length
124
+ @replacement = replacement
125
+ end
126
+
127
+ def feed
128
+
129
+ return FEED_NO_MATCH if @ctx.last_char != @pattern[@len]
130
+ @len += 1
131
+ if @len == @max_len
132
+ @done = true
133
+ return FEED_DONE
134
+ end
135
+ FEED_MORE
136
+
137
+ end
138
+
139
+ def produce(piece)
140
+ @replacement
141
+ end
142
+
143
+ end
144
+
145
+ class CurveQuoteStart < StringPattern
146
+
147
+ def initialize(context)
148
+ super(context, "&#8216;", %('`))
149
+ end
150
+
151
+ def produce(piece)
152
+ @ctx.curve_quote_index << { :pos=>@ctx.out.length, :start=>true}
153
+ super
154
+ end
155
+
156
+ end
157
+
158
+ class CurveQuoteEnd < StringPattern
159
+
160
+ def initialize(context)
161
+ super(context, "&#8217;", %(`'))
162
+ end
163
+
164
+ def produce(piece)
165
+ @ctx.curve_quote_index << { :pos=>@ctx.out.length, :start=>false}
166
+ super
167
+ end
168
+
169
+ end
170
+
171
+ class SpecialGobbler < Pattern
172
+
173
+ # these are the character that we care about escaping
174
+ CHAR_SET = %w[* _ ` # ~ ^].to_set
175
+ VOID_SET = /[[:alpha:][:digit:]:;}_]/
176
+
177
+ def initialize(context)
178
+ super
179
+ end
180
+
181
+ def feed
182
+
183
+ accept_char = CHAR_SET.include?(@ctx.last_char)
184
+ unless accept_char
185
+ return FEED_NO_MATCH unless @len > 0
186
+ @done = true
187
+ return FEED_DONE
188
+ end
189
+
190
+ # we don't need to escape if:
191
+ # - the character is singular
192
+ # - the character directly follows:
193
+ # -- a colon, semicolon, or closing curly bracket
194
+ # -- a letter, number, or underscore
195
+ # -- there is a space from this mark to the next mark.
196
+ # - but we need to make exceptions for first character being '*' because of lists
197
+
198
+ gobble = -> () do
199
+ @len+=1
200
+ return FEED_MORE
201
+ end
202
+
203
+ # if we have accumulated something already continue doing so no matter what
204
+ return gobble.call if @len > 0
205
+
206
+ # list exception - gobble up chars if we are consuming from the
207
+ # start of text
208
+ return gobble.call if @ctx.buffer_len == @ctx.pos
209
+
210
+ return gobble.call if @ctx.has_next_char? && @ctx.next_char == @ctx.last_char
211
+
212
+ # here we are guaranteed that there is a character before ours
213
+ return FEED_NO_MATCH if @ctx.prev_char.match?(VOID_SET)
214
+
215
+ # if there are no more control characters, no need to escape
216
+ next_char = @ctx.remainder.index(@ctx.last_char)
217
+ return FEED_NO_MATCH if next_char.nil?
218
+ next_ws = @ctx.remainder.index(/[[:space:]]/)
219
+ return FEED_NO_MATCH if !next_ws.nil? && next_ws < next_char
220
+
221
+ gobble.call
222
+
223
+ end
224
+
225
+ def is_done?(ended)
226
+ (ended && @len > 0) || super
227
+ end
228
+
229
+ def produce(piece)
230
+ %(pass:[#{piece}])
231
+ end
232
+
233
+ end
234
+
235
+ def self.unescape(str, encode = false)
236
+
237
+ # Note - it's generally OK for us to leave the &#xxxx; sequences in the
238
+ # Asciidoc, because they are re-rendered as is. We try our best to resolve
239
+ # these reference, but if we are unsure on how to, we can leave them in.
240
+
241
+ # $TODO I'm sure there is more to this than that.
242
+ # quote_release = Set[
243
+ # ',', ';', '"', '.', '?', '!', ' ', '\n'
244
+ # ]
245
+
246
+ ctx = Context.new(str, encode)
247
+
248
+ all = LinkedList.new
249
+ # https://docs.asciidoctor.org/asciidoc/latest/subs/special-characters/
250
+ all.add(StringPattern.new(ctx, "&lt;", "<"))
251
+ all.add(StringPattern.new(ctx, "&gt;", ">"))
252
+ all.add(StringPattern.new(ctx, "&amp;", "&"))
253
+ # https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/
254
+ all.add(StringPattern.new(ctx, "&#8220;", %("`)))
255
+ all.add(StringPattern.new(ctx, "&#8221;", %(`")))
256
+ all.add(CurveQuoteEnd.new(ctx))
257
+ all.add(CurveQuoteStart.new(ctx))
258
+ # https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/
259
+ all.add(StringPattern.new(ctx, "&#169;", "(C)"))
260
+ all.add(StringPattern.new(ctx, "&#174;", "(R)"))
261
+ all.add(StringPattern.new(ctx, "&#8482;", "(TM)"))
262
+ all.add(StringPattern.new(ctx, "&#8212;", "--"))
263
+ all.add(StringPattern.new(ctx, "&#8212;&#8203;", "--"))
264
+ all.add(StringPattern.new(ctx, "&#8201;&#8212;&#8201;", " -- "))
265
+ all.add(StringPattern.new(ctx, "&#8230;", "..."))
266
+ all.add(StringPattern.new(ctx, "&#8230;&#8203;", "..."))
267
+ all.add(StringPattern.new(ctx, "&#8594;", "->"))
268
+ all.add(StringPattern.new(ctx, "&#8658;", "=>"))
269
+ all.add(StringPattern.new(ctx, "&#8592;", "<-"))
270
+ all.add(StringPattern.new(ctx, "&#8656;", "<="))
271
+
272
+ all.add(SpecialGobbler.new(ctx))
273
+
274
+ active = nil
275
+
276
+ reset = -> do
277
+ ctx.reset
278
+ active = LinkedList.new(all, -> (p) do
279
+ p.reset
280
+ end)
281
+ end
282
+
283
+ reset.call
284
+
285
+ while true do
286
+
287
+ ended = !ctx.has_next_char?
288
+ ctx.advance unless ended
289
+
290
+ done = nil
291
+ has_more = false
292
+
293
+ active.each do |item|
294
+
295
+ pattern = item.contents
296
+
297
+ record_done = Proc.new do
298
+ if done.nil?
299
+ done = item
300
+ else
301
+ if done.contents.len > item.contents.len
302
+ active.delete(item)
303
+ else
304
+ active.delete(done)
305
+ done = item
306
+ end
307
+ end
308
+ end
309
+
310
+ if ended
311
+
312
+ record_done.call if pattern.is_done?(true)
313
+
314
+ else
315
+
316
+ if pattern.is_done?(false)
317
+ record_done.call
318
+ else
319
+ feed = pattern.feed
320
+ if feed == FEED_NO_MATCH
321
+ active.delete(item)
322
+ next
323
+ end
324
+ end
325
+
326
+ # it's either "more" or "done" now
327
+ record_done.call if feed == FEED_DONE
328
+ has_more = true if feed == FEED_MORE
329
+
330
+ end
331
+
332
+ end
333
+
334
+ # if we found some more patterns to match
335
+ # we need to continue
336
+ next if has_more
337
+
338
+ if done.nil?
339
+
340
+ # there is nothing done, so we just flush the
341
+ # buffer out
342
+ # TODO: this should move to the next char, and not
343
+ # jump over the entire buffer.
344
+ ctx.out << ctx.buffer
345
+
346
+ else
347
+
348
+ pattern = done.contents
349
+ piece = ctx.back_track(pattern)
350
+ ctx.out << pattern.encode(piece)
351
+ ctx.forward(pattern)
352
+
353
+ end
354
+
355
+ reset.call
356
+
357
+ break unless ctx.has_next_char?
358
+
359
+ end
360
+
361
+ out2 = ''
362
+ last_pos = 0
363
+
364
+ # deal with apostrophes
365
+ has_start = false
366
+ q_len = ctx.encode ? 6 : 2
367
+ (0..ctx.curve_quote_index.length-1).each do |idx|
368
+ item = ctx.curve_quote_index[idx]
369
+
370
+ item_pos = item[:pos]
371
+ out2 << ctx.out[last_pos..item_pos-1] unless item_pos <= 0
372
+ last_pos = item_pos
373
+
374
+ if item[:start]
375
+ out2 << ctx.out[last_pos..last_pos + q_len - 1]
376
+ has_start = true
377
+ else
378
+ # ok, then this is the end quote,
379
+ # or it's in the middle of something
380
+ # or a simple one
381
+ # We need to replace it with the single apostrophe
382
+ # if we are not ending the quote. We are not ending the quote
383
+ # if there is an end to it later, but before any new quote starts.
384
+ replace = !has_start
385
+ unless replace
386
+ replace = ctx.curve_quote_index.length-1 != idx && ctx.curve_quote_index[idx+1][:start]
387
+ end
388
+
389
+ if replace
390
+ out2 << (ctx.encode ? "#{ESC}27#{ESC_E}" : "'")
391
+ else
392
+ has_start = false
393
+ out2 << (ctx.out[last_pos..last_pos + q_len - 1])
394
+ end
395
+
396
+ end
397
+
398
+ last_pos += q_len
399
+
400
+ end
401
+
402
+ return ctx.out if last_pos == 0
403
+ out2 << ctx.out[last_pos..-1]
404
+
405
+ end
406
+
407
+ end