asciidoctor-asciidoc 0.0.2.dev

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5c3a7406ea5ca33ac04d2f852f99babb9288d77ad6218badf24c601ace327b10
4
+ data.tar.gz: cce24466950c5d6a94b796813ba43ec8781e5bcc5d2954720fb80f864d872e47
5
+ SHA512:
6
+ metadata.gz: c0e08c4a75fb09c8a7be40b95f2dd44d0b8ecdca7831b5b7bccb7a39d4fda41152c82b3b91b6312648255fe1a44ed47a3eddc64aa9eb2b38e2aa6ccd8abdca3e
7
+ data.tar.gz: b12afcda64eaf2dc1b71e432f8df7ca2cdbb815146a598d69c5a4386313a1a4635f7279fd59a9875a99b156bed2b15299c20170b0a9985a152ff273c998d63ce
@@ -0,0 +1,8 @@
1
+
2
+ module AsciiDoctorAsciiDocConst
3
+
4
+ ESC = "\x00"
5
+ ESC_E = "x"
6
+
7
+ end
8
+
@@ -0,0 +1,74 @@
1
+
2
+ class AsciiDoctorAsciiDocNode
3
+
4
+ TYPE_NODE = :node
5
+ TYPE_TEXT = :text
6
+
7
+ attr_reader :parent
8
+ attr_reader :children
9
+ attr_reader :node
10
+ attr_reader :transform
11
+ attr_reader :text
12
+ attr_reader :type
13
+
14
+ attr_accessor :is_list
15
+ attr_accessor :is_anchor
16
+ attr_accessor :anchor
17
+
18
+ def initialize(parent:, node: nil, transform: nil, text: nil)
19
+ super()
20
+ @parent = parent
21
+ @node = node
22
+ @transform = transform
23
+ @text = text
24
+
25
+ if text.nil?
26
+ @children = []
27
+ @type = TYPE_NODE
28
+ else
29
+ @children = nil
30
+ @type = TYPE_TEXT
31
+ end
32
+
33
+ end
34
+ def add_child(node)
35
+ @children.push(node)
36
+ end
37
+
38
+ def parent_is_list?
39
+ @parent && @parent.is_list
40
+ end
41
+
42
+ def prev_sibling
43
+
44
+ return nil unless @parent
45
+
46
+ children = parent.children
47
+ prev = nil
48
+ children.each do |child|
49
+ return prev if child === self
50
+ prev = child
51
+ end
52
+
53
+ raise "I am not a child of my parent!"
54
+
55
+ end
56
+
57
+ def next_child
58
+
59
+ idx = @children.length
60
+ [yield, idx == @children.length ? nil : @children[idx]]
61
+
62
+ end
63
+
64
+ def add_text_child(text)
65
+
66
+ @children.push(AsciiDoctorAsciiDocNode.new(parent: self, text: text))
67
+
68
+ end
69
+
70
+ def is_text?
71
+ @type == TYPE_TEXT
72
+ end
73
+
74
+ end
@@ -0,0 +1,126 @@
1
+ class LinkedList
2
+
3
+ attr_reader :head
4
+ attr_reader :tail
5
+
6
+ def initialize(copy_from = nil, copy_fun = nil)
7
+ super()
8
+ @head = nil
9
+ @tail = nil
10
+
11
+ if copy_from
12
+ copy_from.each do |item|
13
+ contents = item.contents
14
+ contents = copy_fun.call(contents) if copy_fun
15
+ add(contents)
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ def add_after(el, contents)
22
+ add_between(el, el.next, contents)
23
+ end
24
+
25
+ def add_before(el, contents)
26
+ add_between(el.prev, el, contents)
27
+ end
28
+
29
+ # adds at the end
30
+ def add(contents)
31
+ add_between(@tail, nil, contents)
32
+ end
33
+
34
+ # adds at the beginning
35
+ def insert(contents)
36
+ add_between(nil, @head, contents)
37
+ end
38
+
39
+ def each
40
+ return unless block_given?
41
+ item = @head
42
+ until item.nil?
43
+ # cache next value in case the item's deleted
44
+ next_el = item.next
45
+ yield item
46
+ item = next_el
47
+ end
48
+ end
49
+
50
+ def delete(item)
51
+ if item.list
52
+ if @head === item
53
+ @head = item.next
54
+ else
55
+ item.prev.next = item.next
56
+ end
57
+ if @tail === item
58
+ @tail = item.prev
59
+ else
60
+ item.next.prev = item.prev
61
+ end
62
+ item.list = nil
63
+ item.next = nil
64
+ item.prev = nil
65
+ end
66
+ item.contents
67
+ end
68
+
69
+ private
70
+
71
+ def add_between(e_prev, e_next, contents)
72
+
73
+ if contents.is_a?(LinkedListItem)
74
+ item = contents
75
+ raise "Attempting to add a list item that is already attached to a list" if item.list
76
+ item.list = self
77
+ item.next = e_next
78
+ item.prev = e_prev
79
+ else
80
+ item = LinkedListItem.new(contents, self, e_next, e_prev)
81
+ end
82
+
83
+ if e_prev.nil?
84
+ @head = item
85
+ else
86
+ e_prev.next = item
87
+ end
88
+
89
+ if e_next.nil?
90
+ @tail = item
91
+ else
92
+ e_next.prev = item
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ class LinkedListItem
100
+
101
+ attr_accessor :next
102
+ attr_accessor :prev
103
+ attr_accessor :list
104
+ attr_accessor :contents
105
+
106
+ def initialize(contents, list, next_item, prev_item)
107
+ super()
108
+ @list = list
109
+ @next = next_item
110
+ @prev = prev_item
111
+ @contents = contents
112
+ end
113
+
114
+ def add_after(contents)
115
+ @list.add_after(self, contents)
116
+ end
117
+
118
+ def add_before(contents)
119
+ @list.add_before(self, contents)
120
+ end
121
+
122
+ def delete
123
+ list.delete(self)
124
+ end
125
+
126
+ end
@@ -0,0 +1,407 @@
1
+
2
+ require 'asciidoctor-asciidoc/linked-list'
3
+ require 'asciidoctor-asciidoc/const'
4
+
5
+ class Unescape
6
+
7
+ include AsciiDoctorAsciiDocConst
8
+
9
+ FEED_MORE = :more
10
+ FEED_NO_MATCH = :no_match
11
+ FEED_DONE = :done
12
+
13
+ class Context
14
+
15
+ attr_accessor :last_char
16
+ attr_accessor :buffer_len
17
+ attr_accessor :buffer
18
+ attr_accessor :curve_quote_index
19
+ attr_accessor :pos
20
+ attr_accessor :str
21
+ attr_accessor :out
22
+ attr_reader :encode
23
+
24
+ def initialize(str, encode = false)
25
+ super()
26
+ @str = str
27
+ @curve_quote_index = []
28
+ @len = str.length
29
+ @pos = 0
30
+ @out = ''
31
+ @encode = encode
32
+ reset
33
+ end
34
+
35
+ def reset
36
+ @buffer = ''
37
+ @buffer_len = 0
38
+ end
39
+
40
+ def has_next_char?
41
+ @pos < @len
42
+ end
43
+
44
+ def next_char
45
+ @str[@pos]
46
+ end
47
+
48
+ def prev_char
49
+ @str[@pos-2]
50
+ end
51
+
52
+ def advance
53
+ @last_char = next_char
54
+ @buffer << @last_char
55
+ @pos+=1
56
+ @buffer_len += 1
57
+ end
58
+
59
+ def back_track(pattern)
60
+ @pos -= buffer_len
61
+ return @str[@pos..@pos + pattern.len - 1] if pattern.len > 0
62
+ ''
63
+ end
64
+
65
+ def forward(pattern)
66
+ @pos += pattern.len
67
+ end
68
+
69
+ def remainder
70
+ str[pos..-1]
71
+ end
72
+
73
+ end
74
+
75
+ class Pattern
76
+
77
+ include AsciiDoctorAsciiDocConst
78
+
79
+ attr_reader :len
80
+
81
+ def initialize(context)
82
+ super()
83
+ @ctx = context
84
+ reset
85
+ end
86
+
87
+ def reset
88
+ @done = false
89
+ @len = 0
90
+ self
91
+ end
92
+
93
+ def is_done?(ended)
94
+ @done
95
+ end
96
+
97
+ def encode(piece)
98
+ r = produce(piece)
99
+ if @ctx.encode
100
+ encode_bin(r)
101
+ else
102
+ r
103
+ end
104
+ end
105
+
106
+ def encode_bin(text)
107
+ out = "#{ESC}"
108
+ text.each_char do |chr|
109
+ chr = chr.ord
110
+ raise "Non-ASCII character #{chr}" if chr > 255
111
+ out << ("%02x" % chr)
112
+ end
113
+ out << ESC_E
114
+ end
115
+
116
+ end
117
+
118
+ class StringPattern < Pattern
119
+
120
+ def initialize(context, pattern, replacement)
121
+ super(context)
122
+ @pattern = pattern
123
+ @max_len = pattern.length
124
+ @replacement = replacement
125
+ end
126
+
127
+ def feed
128
+
129
+ return FEED_NO_MATCH if @ctx.last_char != @pattern[@len]
130
+ @len += 1
131
+ if @len == @max_len
132
+ @done = true
133
+ return FEED_DONE
134
+ end
135
+ FEED_MORE
136
+
137
+ end
138
+
139
+ def produce(piece)
140
+ @replacement
141
+ end
142
+
143
+ end
144
+
145
+ class CurveQuoteStart < StringPattern
146
+
147
+ def initialize(context)
148
+ super(context, "&#8216;", %('`))
149
+ end
150
+
151
+ def produce(piece)
152
+ @ctx.curve_quote_index << { :pos=>@ctx.out.length, :start=>true}
153
+ super
154
+ end
155
+
156
+ end
157
+
158
+ class CurveQuoteEnd < StringPattern
159
+
160
+ def initialize(context)
161
+ super(context, "&#8217;", %(`'))
162
+ end
163
+
164
+ def produce(piece)
165
+ @ctx.curve_quote_index << { :pos=>@ctx.out.length, :start=>false}
166
+ super
167
+ end
168
+
169
+ end
170
+
171
+ class SpecialGobbler < Pattern
172
+
173
+ # these are the character that we care about escaping
174
+ CHAR_SET = %w[* _ ` # ~ ^].to_set
175
+ VOID_SET = /[[:alpha:][:digit:]:;}_]/
176
+
177
+ def initialize(context)
178
+ super
179
+ end
180
+
181
+ def feed
182
+
183
+ accept_char = CHAR_SET.include?(@ctx.last_char)
184
+ unless accept_char
185
+ return FEED_NO_MATCH unless @len > 0
186
+ @done = true
187
+ return FEED_DONE
188
+ end
189
+
190
+ # we don't need to escape if:
191
+ # - the character is singular
192
+ # - the character directly follows:
193
+ # -- a colon, semicolon, or closing curly bracket
194
+ # -- a letter, number, or underscore
195
+ # -- there is a space from this mark to the next mark.
196
+ # - but we need to make exceptions for first character being '*' because of lists
197
+
198
+ gobble = -> () do
199
+ @len+=1
200
+ return FEED_MORE
201
+ end
202
+
203
+ # if we have accumulated something already continue doing so no matter what
204
+ return gobble.call if @len > 0
205
+
206
+ # list exception - gobble up chars if we are consuming from the
207
+ # start of text
208
+ return gobble.call if @ctx.buffer_len == @ctx.pos
209
+
210
+ return gobble.call if @ctx.has_next_char? && @ctx.next_char == @ctx.last_char
211
+
212
+ # here we are guaranteed that there is a character before ours
213
+ return FEED_NO_MATCH if @ctx.prev_char.match?(VOID_SET)
214
+
215
+ # if there are no more control characters, no need to escape
216
+ next_char = @ctx.remainder.index(@ctx.last_char)
217
+ return FEED_NO_MATCH if next_char.nil?
218
+ next_ws = @ctx.remainder.index(/[[:space:]]/)
219
+ return FEED_NO_MATCH if !next_ws.nil? && next_ws < next_char
220
+
221
+ gobble.call
222
+
223
+ end
224
+
225
+ def is_done?(ended)
226
+ (ended && @len > 0) || super
227
+ end
228
+
229
+ def produce(piece)
230
+ %(pass:[#{piece}])
231
+ end
232
+
233
+ end
234
+
235
+ def self.unescape(str, encode = false)
236
+
237
+ # Note - it's generally OK for us to leave the &#xxxx; sequences in the
238
+ # Asciidoc, because they are re-rendered as is. We try our best to resolve
239
+ # these reference, but if we are unsure on how to, we can leave them in.
240
+
241
+ # $TODO I'm sure there is more to this than that.
242
+ # quote_release = Set[
243
+ # ',', ';', '"', '.', '?', '!', ' ', '\n'
244
+ # ]
245
+
246
+ ctx = Context.new(str, encode)
247
+
248
+ all = LinkedList.new
249
+ # https://docs.asciidoctor.org/asciidoc/latest/subs/special-characters/
250
+ all.add(StringPattern.new(ctx, "&lt;", "<"))
251
+ all.add(StringPattern.new(ctx, "&gt;", ">"))
252
+ all.add(StringPattern.new(ctx, "&amp;", "&"))
253
+ # https://docs.asciidoctor.org/asciidoc/latest/subs/quotes/
254
+ all.add(StringPattern.new(ctx, "&#8220;", %("`)))
255
+ all.add(StringPattern.new(ctx, "&#8221;", %(`")))
256
+ all.add(CurveQuoteEnd.new(ctx))
257
+ all.add(CurveQuoteStart.new(ctx))
258
+ # https://docs.asciidoctor.org/asciidoc/latest/subs/replacements/
259
+ all.add(StringPattern.new(ctx, "&#169;", "(C)"))
260
+ all.add(StringPattern.new(ctx, "&#174;", "(R)"))
261
+ all.add(StringPattern.new(ctx, "&#8482;", "(TM)"))
262
+ all.add(StringPattern.new(ctx, "&#8212;", "--"))
263
+ all.add(StringPattern.new(ctx, "&#8212;&#8203;", "--"))
264
+ all.add(StringPattern.new(ctx, "&#8201;&#8212;&#8201;", " -- "))
265
+ all.add(StringPattern.new(ctx, "&#8230;", "..."))
266
+ all.add(StringPattern.new(ctx, "&#8230;&#8203;", "..."))
267
+ all.add(StringPattern.new(ctx, "&#8594;", "->"))
268
+ all.add(StringPattern.new(ctx, "&#8658;", "=>"))
269
+ all.add(StringPattern.new(ctx, "&#8592;", "<-"))
270
+ all.add(StringPattern.new(ctx, "&#8656;", "<="))
271
+
272
+ all.add(SpecialGobbler.new(ctx))
273
+
274
+ active = nil
275
+
276
+ reset = -> do
277
+ ctx.reset
278
+ active = LinkedList.new(all, -> (p) do
279
+ p.reset
280
+ end)
281
+ end
282
+
283
+ reset.call
284
+
285
+ while true do
286
+
287
+ ended = !ctx.has_next_char?
288
+ ctx.advance unless ended
289
+
290
+ done = nil
291
+ has_more = false
292
+
293
+ active.each do |item|
294
+
295
+ pattern = item.contents
296
+
297
+ record_done = Proc.new do
298
+ if done.nil?
299
+ done = item
300
+ else
301
+ if done.contents.len > item.contents.len
302
+ active.delete(item)
303
+ else
304
+ active.delete(done)
305
+ done = item
306
+ end
307
+ end
308
+ end
309
+
310
+ if ended
311
+
312
+ record_done.call if pattern.is_done?(true)
313
+
314
+ else
315
+
316
+ if pattern.is_done?(false)
317
+ record_done.call
318
+ else
319
+ feed = pattern.feed
320
+ if feed == FEED_NO_MATCH
321
+ active.delete(item)
322
+ next
323
+ end
324
+ end
325
+
326
+ # it's either "more" or "done" now
327
+ record_done.call if feed == FEED_DONE
328
+ has_more = true if feed == FEED_MORE
329
+
330
+ end
331
+
332
+ end
333
+
334
+ # if we found some more patterns to match
335
+ # we need to continue
336
+ next if has_more
337
+
338
+ if done.nil?
339
+
340
+ # there is nothing done, so we just flush the
341
+ # buffer out
342
+ # TODO: this should move to the next char, and not
343
+ # jump over the entire buffer.
344
+ ctx.out << ctx.buffer
345
+
346
+ else
347
+
348
+ pattern = done.contents
349
+ piece = ctx.back_track(pattern)
350
+ ctx.out << pattern.encode(piece)
351
+ ctx.forward(pattern)
352
+
353
+ end
354
+
355
+ reset.call
356
+
357
+ break unless ctx.has_next_char?
358
+
359
+ end
360
+
361
+ out2 = ''
362
+ last_pos = 0
363
+
364
+ # deal with apostrophes
365
+ has_start = false
366
+ q_len = ctx.encode ? 6 : 2
367
+ (0..ctx.curve_quote_index.length-1).each do |idx|
368
+ item = ctx.curve_quote_index[idx]
369
+
370
+ item_pos = item[:pos]
371
+ out2 << ctx.out[last_pos..item_pos-1] unless item_pos <= 0
372
+ last_pos = item_pos
373
+
374
+ if item[:start]
375
+ out2 << ctx.out[last_pos..last_pos + q_len - 1]
376
+ has_start = true
377
+ else
378
+ # ok, then this is the end quote,
379
+ # or it's in the middle of something
380
+ # or a simple one
381
+ # We need to replace it with the single apostrophe
382
+ # if we are not ending the quote. We are not ending the quote
383
+ # if there is an end to it later, but before any new quote starts.
384
+ replace = !has_start
385
+ unless replace
386
+ replace = ctx.curve_quote_index.length-1 != idx && ctx.curve_quote_index[idx+1][:start]
387
+ end
388
+
389
+ if replace
390
+ out2 << (ctx.encode ? "#{ESC}27#{ESC_E}" : "'")
391
+ else
392
+ has_start = false
393
+ out2 << (ctx.out[last_pos..last_pos + q_len - 1])
394
+ end
395
+
396
+ end
397
+
398
+ last_pos += q_len
399
+
400
+ end
401
+
402
+ return ctx.out if last_pos == 0
403
+ out2 << ctx.out[last_pos..-1]
404
+
405
+ end
406
+
407
+ end