mmmd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +3 -0
- data/architecture.md +278 -0
- data/bin/mmmdpp +168 -0
- data/lib/mmmd/blankshell.rb +1895 -0
- data/lib/mmmd/entities.json +2233 -0
- data/lib/mmmd/renderers/html.rb +356 -0
- data/lib/mmmd/renderers/plainterm.rb +452 -0
- data/lib/mmmd/renderers.rb +11 -0
- data/lib/mmmd/util.rb +61 -0
- data/lib/mmmd.rb +14 -0
- data/security.md +21 -0
- metadata +61 -0
|
@@ -0,0 +1,1895 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'util'
|
|
4
|
+
require 'uri'
|
|
5
|
+
|
|
6
|
+
# Modular, extensible approach to parsing markdown as close as
|
|
7
|
+
# it gets to CommonMark spec (as of version 0.31.2).
|
|
8
|
+
module PointBlank
|
|
9
|
+
module Parsing
|
|
10
|
+
# Shared methods for parsing links
|
|
11
|
+
module LinkSharedMethods
|
|
12
|
+
# Normalize a label
|
|
13
|
+
# @param string [String]
|
|
14
|
+
# @return [String]
|
|
15
|
+
def normalize_label(string)
|
|
16
|
+
string = string.downcase(:fold).strip.gsub(/\s+/, " ")
|
|
17
|
+
return nil if string.empty?
|
|
18
|
+
|
|
19
|
+
string
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Read link label.
|
|
23
|
+
# Returns matched label or nil, and remainder of the string
|
|
24
|
+
# @param text [String]
|
|
25
|
+
# @return [Array(<String, nil>, String)]
|
|
26
|
+
def read_return_label(text)
|
|
27
|
+
prev = text
|
|
28
|
+
label = ""
|
|
29
|
+
return nil, text unless text.start_with?('[')
|
|
30
|
+
|
|
31
|
+
bracketcount = 0
|
|
32
|
+
text.split(/(?<!\\)([\[\]])/).each do |part|
|
|
33
|
+
if part == '['
|
|
34
|
+
bracketcount += 1
|
|
35
|
+
elsif part == ']'
|
|
36
|
+
bracketcount -= 1
|
|
37
|
+
break (label += part) if bracketcount.zero?
|
|
38
|
+
end
|
|
39
|
+
label += part
|
|
40
|
+
end
|
|
41
|
+
return [nil, text] unless bracketcount.zero?
|
|
42
|
+
|
|
43
|
+
text = text.delete_prefix(label)
|
|
44
|
+
label = normalize_label(label[1..-2])
|
|
45
|
+
label ? [label, text] : [nil, prev]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Read link label.
|
|
49
|
+
# Returns matched label or nil, and remainder of the string
|
|
50
|
+
# @param text [String]
|
|
51
|
+
# @return [Array(<String, nil>, String)]
|
|
52
|
+
def read_label(text)
|
|
53
|
+
prev = text
|
|
54
|
+
label = ""
|
|
55
|
+
return nil, text unless text.start_with?('[')
|
|
56
|
+
|
|
57
|
+
bracketcount = 0
|
|
58
|
+
text.split(/(?<!\\)([\[\]])/).each do |part|
|
|
59
|
+
if part == '['
|
|
60
|
+
bracketcount += 1
|
|
61
|
+
elsif part == ']'
|
|
62
|
+
bracketcount -= 1
|
|
63
|
+
break (label += part) if bracketcount.zero?
|
|
64
|
+
end
|
|
65
|
+
label += part
|
|
66
|
+
end
|
|
67
|
+
text = text.delete_prefix(label)
|
|
68
|
+
label = normalize_label(label[1..-2])
|
|
69
|
+
text.start_with?(':') && label ? [label, text[1..].lstrip] : [nil, prev]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Read link destination (URI).
|
|
73
|
+
# Returns matched label or nil, and remainder of the string
|
|
74
|
+
# @param text [String]
|
|
75
|
+
# @return [Array(<String, nil>, String)]
|
|
76
|
+
def read_destination(text)
|
|
77
|
+
if (result = text.match(/\A<.*?(?<![^\\]\\)>/m)) &&
|
|
78
|
+
!result[0][1..].match?(/(?<![^\\]\\)</)
|
|
79
|
+
[process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]),
|
|
80
|
+
text.delete_prefix(result[0]).lstrip]
|
|
81
|
+
elsif (result = text.match(/\A\S+/)) &&
|
|
82
|
+
!result[0].start_with?('<') &&
|
|
83
|
+
result &&
|
|
84
|
+
balanced?(result[0])
|
|
85
|
+
[process_destination(result[0]),
|
|
86
|
+
text.delete_prefix(result[0]).lstrip]
|
|
87
|
+
else
|
|
88
|
+
[nil, text]
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Read link title.
|
|
93
|
+
# Returns matched label or nil, and remainder of the string
|
|
94
|
+
# @param text [String]
|
|
95
|
+
# @return [Array(<String, nil>, String)]
|
|
96
|
+
def read_title(text)
|
|
97
|
+
if text.start_with?("'") &&
|
|
98
|
+
(result = text.match(/\A'.*?(?<!\\)'/m))
|
|
99
|
+
[process_title(result[0][1..-2]),
|
|
100
|
+
text.delete_prefix(result[0]).lstrip]
|
|
101
|
+
elsif text.start_with?('"') &&
|
|
102
|
+
(result = text.match(/\A".*?(?<!\\)"/m))
|
|
103
|
+
[process_title(result[0][1..-2]),
|
|
104
|
+
text.delete_prefix(result[0]).lstrip]
|
|
105
|
+
elsif text.start_with?('(') &&
|
|
106
|
+
(result = find_balanced_end(text))
|
|
107
|
+
[process_title(text[1..(result - 1)]),
|
|
108
|
+
text.delete_prefix(text[..result]).lstrip]
|
|
109
|
+
else
|
|
110
|
+
[nil, text]
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Read link properties.
|
|
115
|
+
# Returns matched parameters as hash or nil, and remainder of the string
|
|
116
|
+
# @param text [String]
|
|
117
|
+
# @return [Array([Hash, nil], String)]
|
|
118
|
+
def read_properties(text)
|
|
119
|
+
properties = {}
|
|
120
|
+
remaining = text
|
|
121
|
+
if text.start_with? '[' # link label
|
|
122
|
+
properties[:label], remaining = read_return_label(remaining)
|
|
123
|
+
close_bracket = false
|
|
124
|
+
elsif text.start_with? '(' # link properties
|
|
125
|
+
destination, remaining = read_destination(remaining[1..])
|
|
126
|
+
return [nil, text] unless destination
|
|
127
|
+
|
|
128
|
+
title, remaining = read_title(remaining)
|
|
129
|
+
properties[:uri] = destination
|
|
130
|
+
properties[:title] = title
|
|
131
|
+
close_bracket = true
|
|
132
|
+
end
|
|
133
|
+
if properties.empty? || (close_bracket && !remaining.start_with?(')'))
|
|
134
|
+
[nil, text]
|
|
135
|
+
else
|
|
136
|
+
[properties, close_bracket ? remaining[1..] : remaining]
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Check if brackets are balanced
|
|
141
|
+
# @param text [String]
|
|
142
|
+
# @return [Boolean]
|
|
143
|
+
def balanced?(text)
|
|
144
|
+
bracketcount = 0
|
|
145
|
+
text.split(/(?<!\\)([()])/).each do |part|
|
|
146
|
+
if part == '('
|
|
147
|
+
bracketcount += 1
|
|
148
|
+
elsif part == ')'
|
|
149
|
+
bracketcount -= 1
|
|
150
|
+
return false if bracketcount.negative?
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
bracketcount.zero?
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Find index at which balanced part of a bracket closes
|
|
157
|
+
# @param text [String]
|
|
158
|
+
# @return [Integer, nil]
|
|
159
|
+
def find_balanced_end(text)
|
|
160
|
+
bracketcount = 0
|
|
161
|
+
index = 0
|
|
162
|
+
text.split(/(?<!\\)([()])/).each do |part|
|
|
163
|
+
if part == '('
|
|
164
|
+
bracketcount += 1
|
|
165
|
+
elsif part == ')'
|
|
166
|
+
bracketcount -= 1
|
|
167
|
+
return index if bracketcount.zero?
|
|
168
|
+
end
|
|
169
|
+
index += part.length
|
|
170
|
+
end
|
|
171
|
+
nil
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Process destination string
|
|
175
|
+
# @param string [String]
|
|
176
|
+
# @return [String]
|
|
177
|
+
def process_destination(string)
|
|
178
|
+
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
|
179
|
+
'\\1')
|
|
180
|
+
string = string.gsub("\n", " ")
|
|
181
|
+
MMMD::EntityUtils.encode_uri(
|
|
182
|
+
MMMD::EntityUtils.decode_entities(string)
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Process title string
|
|
187
|
+
# @param string [String]
|
|
188
|
+
# @return [String]
|
|
189
|
+
def process_title(string)
|
|
190
|
+
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
|
191
|
+
'\\1')
|
|
192
|
+
string = string.gsub("\n", " ")
|
|
193
|
+
MMMD::EntityUtils.decode_entities(string)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
class LineScanner
|
|
198
|
+
def initialize(text, doc)
|
|
199
|
+
@text = text
|
|
200
|
+
@document = doc
|
|
201
|
+
@stack = [@document]
|
|
202
|
+
@depth = 0
|
|
203
|
+
@topdepth = 0
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Scan document and return scanned structure
|
|
207
|
+
def scan
|
|
208
|
+
@text.each_line do |line|
|
|
209
|
+
# Consume markers from lines to keep the levels open
|
|
210
|
+
line = consume_markers(line)
|
|
211
|
+
# DO NOT RHEDEEM line if it's empty
|
|
212
|
+
line = line&.strip&.empty? ? nil : line
|
|
213
|
+
# Open up a new block on the line out of all allowed child types
|
|
214
|
+
while line && (status, line = try_open(line)) && status; end
|
|
215
|
+
end
|
|
216
|
+
close_up(0)
|
|
217
|
+
@stack.first
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
private
|
|
221
|
+
|
|
222
|
+
# Try to open a new block on the line
|
|
223
|
+
def try_open(line)
|
|
224
|
+
return [false, line] unless topclass.parser && line
|
|
225
|
+
return [false, line] unless [nil, self.class].include? topclass.scanner
|
|
226
|
+
|
|
227
|
+
topclass.valid_children.each do |cand|
|
|
228
|
+
next unless cand.parser.begin?(line)
|
|
229
|
+
|
|
230
|
+
@depth += 1
|
|
231
|
+
@topdepth = @depth if @topdepth < @depth
|
|
232
|
+
@stack[@depth] = cand.new
|
|
233
|
+
@stack[@depth - 1].append_child(toplevel)
|
|
234
|
+
toplevel.parser = cand.parser.new
|
|
235
|
+
line, _implicit = toplevel.parser.consume(line, @stack[@depth - 1])
|
|
236
|
+
return [true, line]
|
|
237
|
+
end
|
|
238
|
+
[false, line]
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Attempt to consume markers for all valid stack elements
|
|
242
|
+
def consume_markers(line)
|
|
243
|
+
climb = -1
|
|
244
|
+
previous = nil
|
|
245
|
+
implicit = nil
|
|
246
|
+
@stack[..@depth].each do |element|
|
|
247
|
+
newline, impl = element.parser.consume(line, previous)
|
|
248
|
+
implicit = impl unless impl.nil?
|
|
249
|
+
line = newline if newline
|
|
250
|
+
break unless newline
|
|
251
|
+
|
|
252
|
+
climb += 1
|
|
253
|
+
previous = element
|
|
254
|
+
end
|
|
255
|
+
if climb < @depth
|
|
256
|
+
if implicit && @stack[@topdepth].is_a?(::PointBlank::DOM::Paragraph)
|
|
257
|
+
backref = @stack[@topdepth]
|
|
258
|
+
remaining, = backref.parser.consume(line, previous, lazy: true)
|
|
259
|
+
return nil if remaining
|
|
260
|
+
end
|
|
261
|
+
close_up(climb)
|
|
262
|
+
end
|
|
263
|
+
line
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Close upper levels than picked level
|
|
267
|
+
def close_up(level)
|
|
268
|
+
((level + 1)..(@stack.length - 1)).each do |index|
|
|
269
|
+
x = @stack[index]
|
|
270
|
+
x.content = x.parser.parsed_content
|
|
271
|
+
x.parser.applyprops(x) if x.parser.respond_to? :applyprops
|
|
272
|
+
switch = x.parser.close(x)
|
|
273
|
+
x.parser = nil
|
|
274
|
+
x = transfer(x, switch) if switch
|
|
275
|
+
x.parse_inner if x.respond_to? :parse_inner
|
|
276
|
+
end
|
|
277
|
+
@topdepth = @depth = level
|
|
278
|
+
@stack = @stack[..level]
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Transfer data from class to another class (morph class)
|
|
282
|
+
def transfer(block, switchclass)
|
|
283
|
+
newblock = switchclass.new
|
|
284
|
+
newblock.content = block.content
|
|
285
|
+
newblock.parser = nil
|
|
286
|
+
newblock.parent = block.parent
|
|
287
|
+
block.parent[block.position] = newblock
|
|
288
|
+
newblock
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Get top level element at the current moment
|
|
292
|
+
def toplevel
|
|
293
|
+
@stack[@depth]
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Get top level element class
|
|
297
|
+
def topclass
|
|
298
|
+
@stack[@depth].class
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Debug ifno
|
|
302
|
+
def debug(line)
|
|
303
|
+
warn "#{@depth}:#{@topdepth} #{line.inspect}"
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Null parser
|
|
308
|
+
class NullParser
|
|
309
|
+
class << self
|
|
310
|
+
attr_accessor :parser_for
|
|
311
|
+
|
|
312
|
+
# Check that a parser parses this line as a beginning of a block
|
|
313
|
+
# @param line [String]
|
|
314
|
+
# @return [Boolean]
|
|
315
|
+
def begin?(_line)
|
|
316
|
+
false
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Instantiate a new parser object
|
|
321
|
+
def initialize
|
|
322
|
+
@buffer = []
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Close parser
|
|
326
|
+
# @param block [::PointBlank::DOM::DOMObject]
|
|
327
|
+
# @return [nil, Class]
|
|
328
|
+
def close(block, lazy: false)
|
|
329
|
+
block.class.valid_overlays.each do |overlay_class|
|
|
330
|
+
overlay = overlay_class.new
|
|
331
|
+
output = overlay.process(block, lazy: lazy)
|
|
332
|
+
return output if output
|
|
333
|
+
end
|
|
334
|
+
nil
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Return parsed content
|
|
338
|
+
# @return [String]
|
|
339
|
+
def parsed_content
|
|
340
|
+
@buffer.join('')
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Consume line markers
|
|
344
|
+
# @param line [String]
|
|
345
|
+
# @return [Array(String, Boolean)]
|
|
346
|
+
def consume(line, _parent = nil, **_hargs)
|
|
347
|
+
[line, false]
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
private
|
|
351
|
+
|
|
352
|
+
# Push a new parsed line
|
|
353
|
+
# @param line [String]
|
|
354
|
+
# @return [void]
|
|
355
|
+
def push(line)
|
|
356
|
+
@buffer.append(line)
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# Paragraph parser
|
|
361
|
+
class ParagraphParser < NullParser
|
|
362
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
363
|
+
def self.begin?(line)
|
|
364
|
+
line.match?(/\A {0,3}\S/)
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
368
|
+
def consume(line, parent = nil, lazy: false)
|
|
369
|
+
@lazy_triggered = lazy || @lazy_triggered
|
|
370
|
+
return [nil, nil] if line.match?(/\A {0,3}\Z/)
|
|
371
|
+
return [nil, nil] if @closed
|
|
372
|
+
return [nil, nil] if check_candidates(line, parent)
|
|
373
|
+
|
|
374
|
+
push(line)
|
|
375
|
+
["", nil]
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# (see ::PointBlank::Parsing::NullParser#close)
|
|
379
|
+
def close(block, **_lazy)
|
|
380
|
+
super(block, lazy: @lazy_triggered)
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
private
|
|
384
|
+
|
|
385
|
+
# Check that there are no other candidates for line beginning
|
|
386
|
+
def check_candidates(line, parent)
|
|
387
|
+
return false unless parent
|
|
388
|
+
|
|
389
|
+
classes = parent.class.valid_children
|
|
390
|
+
once = false
|
|
391
|
+
other = classes.filter do |cls|
|
|
392
|
+
!(once ||= (cls == ::PointBlank::DOM::Paragraph))
|
|
393
|
+
end
|
|
394
|
+
underlines_match = ::PointBlank::DOM::Paragraph.valid_children.any? do |x|
|
|
395
|
+
x.parser.begin?(line)
|
|
396
|
+
end
|
|
397
|
+
if underlines_match && !@lazy_triggered
|
|
398
|
+
@closed = true
|
|
399
|
+
return false
|
|
400
|
+
end
|
|
401
|
+
other.any? do |x|
|
|
402
|
+
x.parser.begin? line
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# ATX heading
|
|
408
|
+
# @abstract
|
|
409
|
+
class ATXParser < NullParser
|
|
410
|
+
class << self
|
|
411
|
+
attr_accessor :level
|
|
412
|
+
|
|
413
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
414
|
+
def begin?(line)
|
|
415
|
+
line.match?(/^ {0,3}\#{#{@level}}(?: .*|)$/)
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def initialize
|
|
420
|
+
super
|
|
421
|
+
@matched = false
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
425
|
+
def consume(line, _parent, **_hargs)
|
|
426
|
+
return [nil, false] if @matched
|
|
427
|
+
|
|
428
|
+
@matched = true
|
|
429
|
+
push(line
|
|
430
|
+
.gsub(/\A {0,3}\#{#{self.class.level}} */, '')
|
|
431
|
+
.gsub(/( #+|)\Z/, ''))
|
|
432
|
+
[line, false]
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# ATX heading level 1
|
|
437
|
+
class ATXParserLV1 < ATXParser
|
|
438
|
+
self.level = 1
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# ATX heading level 2
|
|
442
|
+
class ATXParserLV2 < ATXParser
|
|
443
|
+
self.level = 2
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
# ATX heading level 3
|
|
447
|
+
class ATXParserLV3 < ATXParser
|
|
448
|
+
self.level = 3
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# ATX heading level 4
|
|
452
|
+
class ATXParserLV4 < ATXParser
|
|
453
|
+
self.level = 4
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# ATX heading level 5
|
|
457
|
+
class ATXParserLV5 < ATXParser
|
|
458
|
+
self.level = 5
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
# ATX heading level 6
|
|
462
|
+
class ATXParserLV6 < ATXParser
|
|
463
|
+
self.level = 6
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Underline parser
|
|
467
|
+
# @abstract
|
|
468
|
+
class UnderlineParser < NullParser
|
|
469
|
+
# Checks whether a paragraph underline is on this line.
|
|
470
|
+
# Should match an entire underline.
|
|
471
|
+
# @param line [String]
|
|
472
|
+
# @return [boolean]
|
|
473
|
+
def self.begin?(_line)
|
|
474
|
+
false
|
|
475
|
+
end
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
# Setext parser level 1
|
|
479
|
+
class SetextParserLV1 < UnderlineParser
|
|
480
|
+
# (see ::PointBlank::Parsing::UnderlineParser)
|
|
481
|
+
def self.begin?(line)
|
|
482
|
+
line.match?(/\A {0,3}={3,}\s*\z/)
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Setext parser level 2
|
|
487
|
+
class SetextParserLV2 < UnderlineParser
|
|
488
|
+
# (see ::PointBlank::Parsing::UnderlineParser)
|
|
489
|
+
def self.begin?(line)
|
|
490
|
+
line.match?(/\A {0,3}-{3,}\s*\z/)
|
|
491
|
+
end
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
# Unordered list block (group)
|
|
495
|
+
class ULParser < NullParser
|
|
496
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
497
|
+
def self.begin?(line)
|
|
498
|
+
line.match?(/\A {0,3}([-+*])(\s+)/)
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
# (see ::PointBlank::Parsing::NullParser#close)
|
|
502
|
+
def applyprops(block)
|
|
503
|
+
block.each do |child|
|
|
504
|
+
child.properties["marker"] = @marker[-1]
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
509
|
+
def consume(line, _parent = nil, **_hargs)
|
|
510
|
+
self.open(line)
|
|
511
|
+
return [nil, true] unless continues?(line)
|
|
512
|
+
|
|
513
|
+
[normalize(line), true]
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
attr_reader :preoff
|
|
517
|
+
|
|
518
|
+
private
|
|
519
|
+
|
|
520
|
+
# Open block if it hasn't been opened yet
|
|
521
|
+
def open(line)
|
|
522
|
+
return if @open
|
|
523
|
+
|
|
524
|
+
preoff, mark, off = line.match(/\A( {0,3})([-+*])(\s+)/)&.captures
|
|
525
|
+
return unless mark
|
|
526
|
+
|
|
527
|
+
@preoff = preoff
|
|
528
|
+
@marker ||= ['+', '*'].include?(mark) ? "\\#{mark}" : mark
|
|
529
|
+
@offset = off
|
|
530
|
+
@open = true
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# Check if a line continues this ULParser block
|
|
534
|
+
def continues?(line)
|
|
535
|
+
return false if ::PointBlank::Parsing::ThematicBreakParser.begin?(line)
|
|
536
|
+
|
|
537
|
+
line.start_with?(/\A(?: {0,3}#{@marker}| )#{@offset}/) ||
|
|
538
|
+
line.strip.empty?
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
# Strip off pre-marker offset
|
|
542
|
+
def normalize(line)
|
|
543
|
+
line.delete_prefix(@preoff)
|
|
544
|
+
end
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
# Unorder list block (element)
|
|
548
|
+
class ULElementParser < NullParser
|
|
549
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
550
|
+
def self.begin?(line)
|
|
551
|
+
line.match?(/\A {0,3}([-+*])(\s+)/)
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
555
|
+
def consume(line, parent = nil, **_hargs)
|
|
556
|
+
@parent ||= parent
|
|
557
|
+
return [nil, true] unless continues?(line)
|
|
558
|
+
|
|
559
|
+
self.open(line)
|
|
560
|
+
|
|
561
|
+
[normalize(line), true]
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
private
|
|
565
|
+
|
|
566
|
+
# Open block if it hasn't been opened yet
|
|
567
|
+
def open(line)
|
|
568
|
+
return if @open
|
|
569
|
+
|
|
570
|
+
@marker, @offset = line.match(/\A {0,3}([-+*])(\s+)/)&.captures
|
|
571
|
+
@marker = "\\#{@marker}" if ['+', '*'].include? @marker
|
|
572
|
+
@open = true
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
# Check if a line continues this ULParser block
|
|
576
|
+
def continues?(line)
|
|
577
|
+
return true unless @marker
|
|
578
|
+
|
|
579
|
+
line.start_with?(/\A\s#{@offset}/) ||
|
|
580
|
+
line.strip.empty?
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Normalize the line
|
|
584
|
+
def normalize(line)
|
|
585
|
+
if !@opening_stripped
|
|
586
|
+
@opening_stripped = true
|
|
587
|
+
line.gsub(/\A(?: {0,3}#{@marker}| )#{@offset}/, '')
|
|
588
|
+
else
|
|
589
|
+
line.gsub(/\A\s#{@offset}/, '')
|
|
590
|
+
end
|
|
591
|
+
end
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
# Ordered list block (group)
|
|
595
|
+
class OLParser < NullParser
|
|
596
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
597
|
+
def self.begin?(line)
|
|
598
|
+
line.match?(/\A {0,3}(\d+)([).])(\s+)/)
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# (see ::PointBlank::Parsing::NullParser#close)
|
|
602
|
+
def applyprops(block)
|
|
603
|
+
block.each do |child|
|
|
604
|
+
child.properties["marker"] = @mark[-1]
|
|
605
|
+
end
|
|
606
|
+
end
|
|
607
|
+
|
|
608
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
609
|
+
def consume(line, _parent = nil, **_hargs)
|
|
610
|
+
self.open(line)
|
|
611
|
+
return [nil, true] unless continues?(line)
|
|
612
|
+
|
|
613
|
+
[normalize(line), true]
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
private
|
|
617
|
+
|
|
618
|
+
# Open block if it hasn't been opened yet
|
|
619
|
+
def open(line)
|
|
620
|
+
return if @open
|
|
621
|
+
|
|
622
|
+
pre, num, marker, off = line.match(/\A( {0,3})(\d+)([).])(\s+)/)
|
|
623
|
+
&.captures
|
|
624
|
+
return unless marker
|
|
625
|
+
|
|
626
|
+
@preoff = pre
|
|
627
|
+
@num = " " * (num.length + 1)
|
|
628
|
+
@mark ||= "\\#{marker}"
|
|
629
|
+
@offset = off
|
|
630
|
+
@open = true
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
# Check if a line continues this ULParser block
|
|
634
|
+
def continues?(line)
|
|
635
|
+
return false if ::PointBlank::Parsing::ThematicBreakParser.begin?(line)
|
|
636
|
+
|
|
637
|
+
line.start_with?(/\A(?: {0,3}(\d+)#{@mark}|#{@num})#{@offset}/) ||
|
|
638
|
+
line.strip.empty?
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
# Strip off pre-marker offset
|
|
642
|
+
def normalize(line)
|
|
643
|
+
line.delete_prefix(@preoff)
|
|
644
|
+
end
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
# Unorder list block (element)
|
|
648
|
+
class OLElementParser < NullParser
|
|
649
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
650
|
+
def self.begin?(line)
|
|
651
|
+
line.match?(/\A {0,3}(\d+)([).])(\s+)/)
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
655
|
+
def consume(line, _parent = nil, **_hargs)
|
|
656
|
+
return [nil, true] unless continues?(line)
|
|
657
|
+
|
|
658
|
+
self.open(line)
|
|
659
|
+
|
|
660
|
+
[normalize(line), true]
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
# (see ::PointBlank::Parsing::NullParser#applyprops)
|
|
664
|
+
def applyprops(block)
|
|
665
|
+
block.properties["number"] = @num.to_i
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
private
|
|
669
|
+
|
|
670
|
+
# Open block if it hasn't been opened yet
|
|
671
|
+
def open(line)
|
|
672
|
+
return if @open
|
|
673
|
+
|
|
674
|
+
num, marker, off = line.match(/\A {0,3}(\d+)([).])(\s+)/)
|
|
675
|
+
&.captures
|
|
676
|
+
@num = num
|
|
677
|
+
@numoffset = " " * (@num.length + 1)
|
|
678
|
+
@marker = "\\#{marker}"
|
|
679
|
+
@offset = off
|
|
680
|
+
@open = true
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
# Check if a line continues this ULParser block
|
|
684
|
+
def continues?(line)
|
|
685
|
+
return true unless @marker
|
|
686
|
+
|
|
687
|
+
line.start_with?(/\A#{@numoffset}#{@offset}/) ||
|
|
688
|
+
line.strip.empty?
|
|
689
|
+
end
|
|
690
|
+
|
|
691
|
+
# Normalize the line
|
|
692
|
+
def normalize(line)
|
|
693
|
+
if !@opening_stripped
|
|
694
|
+
@opening_stripped = true
|
|
695
|
+
line.gsub(/\A(?: {0,3}\d+#{@marker}|#{@numoffset})#{@offset}/, '')
|
|
696
|
+
else
|
|
697
|
+
line.gsub(/\A#{@numoffset}#{@offset}/, '')
|
|
698
|
+
end
|
|
699
|
+
end
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
# Quote block
|
|
703
|
+
class QuoteParser < NullParser
|
|
704
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
705
|
+
def self.begin?(line)
|
|
706
|
+
line.start_with?(/\A {0,3}>(?: \S|)/)
|
|
707
|
+
end
|
|
708
|
+
|
|
709
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
710
|
+
def consume(line, _parent = nil, **_hargs)
|
|
711
|
+
return [nil, true] unless line.start_with?(/\A {0,3}>(?: \S|)/)
|
|
712
|
+
|
|
713
|
+
[normalize(line), true]
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
private
|
|
717
|
+
|
|
718
|
+
# Normalize line in quoteblock
|
|
719
|
+
def normalize(line)
|
|
720
|
+
line.gsub(/\A {0,3}> ?/, '')
|
|
721
|
+
end
|
|
722
|
+
end
|
|
723
|
+
|
|
724
|
+
# Fenced code block
|
|
725
|
+
class FencedCodeBlock < NullParser
|
|
726
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
727
|
+
def self.begin?(line)
|
|
728
|
+
line.start_with?(/\A {0,3}(?:`{3,}[^`]+$|~{3,}[^~]+$)/)
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
# (see ::PointBlank::Parsing::NullParser#applyprops)
|
|
732
|
+
def applyprops(block)
|
|
733
|
+
block.properties["infoline"] = @infoline
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
737
|
+
def consume(line, _parent = nil, **_hargs)
|
|
738
|
+
return [nil, false] if @closed
|
|
739
|
+
|
|
740
|
+
try_close(line)
|
|
741
|
+
push(line.gsub(/^ {0,#{@space}}/, '')) if @open && !@closed
|
|
742
|
+
self.open(line)
|
|
743
|
+
["", false]
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
private
|
|
747
|
+
|
|
748
|
+
def try_close(line)
|
|
749
|
+
@closed = true if @open && line.match?(/\A {0,3}#{@marker}+$/)
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
def open(line)
|
|
753
|
+
return if @open
|
|
754
|
+
|
|
755
|
+
s, m, @infoline = line.match(/\A( {0,3})(`{3,}|~{3,})(.*)/)[1..3]
|
|
756
|
+
@space = s.length
|
|
757
|
+
@marker = m
|
|
758
|
+
@open = true
|
|
759
|
+
end
|
|
760
|
+
end
|
|
761
|
+
|
|
762
|
+
# Indented code block
|
|
763
|
+
class IndentedBlock < NullParser
|
|
764
|
+
# (see ::PointBlank::Parsing::NullParser#begin?)
|
|
765
|
+
def self.begin?(line)
|
|
766
|
+
line.start_with?(/\A {4}/)
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
# (see ::PointBlank::Parsing::NullParser#consume)
|
|
770
|
+
def consume(line, _parent = nil, **_hargs)
|
|
771
|
+
return [nil, nil] unless self.class.begin?(line) ||
|
|
772
|
+
line.strip.empty?
|
|
773
|
+
|
|
774
|
+
push(normalize(line))
|
|
775
|
+
["", false]
|
|
776
|
+
end
|
|
777
|
+
|
|
778
|
+
private
|
|
779
|
+
|
|
780
|
+
def normalize(line)
|
|
781
|
+
line.gsub("\A(?: |\t)", '')
|
|
782
|
+
end
|
|
783
|
+
end
|
|
784
|
+
|
|
785
|
+
# Thematic break parser
|
|
786
|
+
class ThematicBreakParser < NullParser
|
|
787
|
+
# (see PointBlank::Parsing::NullParser#begin?)
|
|
788
|
+
def self.begin?(line)
|
|
789
|
+
line.match?(/\A {0,3}(?:[- ]+|[* ]+|[_ ]+)\n/)
|
|
790
|
+
end
|
|
791
|
+
|
|
792
|
+
# (see PointBlank::Parsing::NullParser#consume)
|
|
793
|
+
def consume(_line, _parent = nil, **_hargs)
|
|
794
|
+
return [nil, nil] if @closed
|
|
795
|
+
|
|
796
|
+
@closed = true
|
|
797
|
+
["", nil]
|
|
798
|
+
end
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
# Class of parsers that process the paragraph after it finished collection
|
|
802
|
+
class NullOverlay < NullParser
|
|
803
|
+
# Stub
|
|
804
|
+
def self.begin?(_line)
|
|
805
|
+
false
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
# Process block after it closed
|
|
809
|
+
# @param block [::PointBlank::DOM::DOMObject]
|
|
810
|
+
# @param lazy [Boolean]
|
|
811
|
+
# @return [nil, Class]
|
|
812
|
+
def process(_block, lazy: false); end
|
|
813
|
+
end
|
|
814
|
+
|
|
815
|
+
# Overlay for processing underline classes of paragraph
|
|
816
|
+
class ParagraphUnderlineOverlay < NullOverlay
|
|
817
|
+
# (see ::PointBlank::Parsing::NullOverlay#process)
|
|
818
|
+
def process(block, lazy: false)
|
|
819
|
+
output = check_underlines(block.content.lines.last, lazy)
|
|
820
|
+
block.content = block.content.lines[0..-2].join("") if output
|
|
821
|
+
output
|
|
822
|
+
end
|
|
823
|
+
|
|
824
|
+
private
|
|
825
|
+
|
|
826
|
+
# Check if the current line is an underline (morphs class)
|
|
827
|
+
def check_underlines(line, lazy)
|
|
828
|
+
return nil if lazy
|
|
829
|
+
|
|
830
|
+
::PointBlank::DOM::Paragraph.valid_children.each do |underline|
|
|
831
|
+
parser = underline.parser
|
|
832
|
+
next unless parser < ::PointBlank::Parsing::UnderlineParser
|
|
833
|
+
next unless parser.begin? line
|
|
834
|
+
|
|
835
|
+
return underline
|
|
836
|
+
end
|
|
837
|
+
nil
|
|
838
|
+
end
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
# Overlay for link reference definitions
|
|
842
|
+
class LinkReferenceOverlay < NullOverlay
|
|
843
|
+
include LinkSharedMethods
|
|
844
|
+
|
|
845
|
+
def initialize
|
|
846
|
+
super
|
|
847
|
+
@definitions = {}
|
|
848
|
+
end
|
|
849
|
+
|
|
850
|
+
# (see ::PointBlank::Parsing::NullOverlay#process)
|
|
851
|
+
def process(block, **_lazy)
|
|
852
|
+
text = block.content
|
|
853
|
+
loop do
|
|
854
|
+
prev = text
|
|
855
|
+
label, text = read_label(text)
|
|
856
|
+
break prev unless label
|
|
857
|
+
|
|
858
|
+
destination, text = read_destination(text)
|
|
859
|
+
break prev unless destination
|
|
860
|
+
|
|
861
|
+
title, text = read_title(text)
|
|
862
|
+
push_definition(label, destination, title)
|
|
863
|
+
end
|
|
864
|
+
modify(block, text)
|
|
865
|
+
nil
|
|
866
|
+
end
|
|
867
|
+
|
|
868
|
+
private
|
|
869
|
+
|
|
870
|
+
def root(block)
|
|
871
|
+
current_root = block
|
|
872
|
+
current_root = current_root.parent while current_root.parent
|
|
873
|
+
current_root
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
def modify(block, text)
|
|
877
|
+
rootblock = root(block)
|
|
878
|
+
rootblock.properties[:linkdefs] =
|
|
879
|
+
if rootblock.properties[:linkdefs]
|
|
880
|
+
@definitions.merge(rootblock.properties[:linkdefs])
|
|
881
|
+
else
|
|
882
|
+
@definitions.dup
|
|
883
|
+
end
|
|
884
|
+
block.content = text
|
|
885
|
+
end
|
|
886
|
+
|
|
887
|
+
def push_definition(label, uri, title = nil)
|
|
888
|
+
labelname = label.strip.downcase.gsub(/\s+/, ' ')
|
|
889
|
+
return if @definitions[labelname]
|
|
890
|
+
|
|
891
|
+
@definitions[labelname] = {
|
|
892
|
+
uri: uri,
|
|
893
|
+
title: title
|
|
894
|
+
}
|
|
895
|
+
end
|
|
896
|
+
end
|
|
897
|
+
|
|
898
|
+
# Inline scanner
|
|
899
|
+
class StackScanner
|
|
900
|
+
def initialize(doc, init_tokens: nil)
|
|
901
|
+
@doc = doc
|
|
902
|
+
@init_tokens = init_tokens
|
|
903
|
+
end
|
|
904
|
+
|
|
905
|
+
# Scan document
|
|
906
|
+
def scan
|
|
907
|
+
rounds = quantize(@doc.class.unsorted_children)
|
|
908
|
+
tokens = @init_tokens || [@doc.content]
|
|
909
|
+
rounds.each do |valid_parsers|
|
|
910
|
+
@valid_parsers = valid_parsers
|
|
911
|
+
tokens = tokenize(tokens)
|
|
912
|
+
tokens = forward_walk(tokens)
|
|
913
|
+
tokens = reverse_walk(tokens)
|
|
914
|
+
end
|
|
915
|
+
structure = finalize(tokens)
|
|
916
|
+
structure.each { |child| @doc.append_child(child) }
|
|
917
|
+
end
|
|
918
|
+
|
|
919
|
+
private
|
|
920
|
+
|
|
921
|
+
# Finalize structure, concatenate adjacent text parts,
|
|
922
|
+
# transform into Text objects
|
|
923
|
+
# @param parts [Array<String, ::PointBlank::DOM::DOMObject>]
|
|
924
|
+
# @return [Array<::PointBlank::DOM::DOMObject>]
|
|
925
|
+
def finalize(structure)
|
|
926
|
+
structnew = []
|
|
927
|
+
buffer = ""
|
|
928
|
+
structure.each do |block|
|
|
929
|
+
block = block.first if block.is_a? Array
|
|
930
|
+
buffer += block if block.is_a? String
|
|
931
|
+
next if block.is_a? String
|
|
932
|
+
|
|
933
|
+
structnew.append(construct_text(buffer)) unless buffer.empty?
|
|
934
|
+
buffer = ""
|
|
935
|
+
structnew.append(block)
|
|
936
|
+
end
|
|
937
|
+
structnew.append(construct_text(buffer)) unless buffer.empty?
|
|
938
|
+
structnew
|
|
939
|
+
end
|
|
940
|
+
|
|
941
|
+
# Construct text object for a string
|
|
942
|
+
# @param string [String]
|
|
943
|
+
# @return [::PointBlank::DOM::Text]
|
|
944
|
+
def construct_text(string)
|
|
945
|
+
obj = ::PointBlank::DOM::Text.new
|
|
946
|
+
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
|
947
|
+
'\\1')
|
|
948
|
+
string = string.gsub("\n", " ")
|
|
949
|
+
string = MMMD::EntityUtils.decode_entities(string)
|
|
950
|
+
obj.content = string
|
|
951
|
+
obj
|
|
952
|
+
end
|
|
953
|
+
|
|
954
|
+
# Transform text into a list of tokens
|
|
955
|
+
def tokenize(tokens)
|
|
956
|
+
parts = tokens
|
|
957
|
+
@valid_parsers.each do |parser|
|
|
958
|
+
newparts = []
|
|
959
|
+
parts.each do |x|
|
|
960
|
+
if x.is_a? String
|
|
961
|
+
newparts.append(*parser.tokenize(x))
|
|
962
|
+
else
|
|
963
|
+
newparts.append(x)
|
|
964
|
+
end
|
|
965
|
+
end
|
|
966
|
+
parts = newparts
|
|
967
|
+
end
|
|
968
|
+
parts
|
|
969
|
+
end
|
|
970
|
+
|
|
971
|
+
# Process parsed tokens (callback on open, forward search direction)
|
|
972
|
+
def forward_walk(parts)
|
|
973
|
+
parts = parts.dup
|
|
974
|
+
newparts = []
|
|
975
|
+
while (part = parts.shift)
|
|
976
|
+
next newparts.append(part) if part.is_a? String
|
|
977
|
+
|
|
978
|
+
if part[1].respond_to?(:forward_walk) && part.last == :open
|
|
979
|
+
part, parts = part[1].forward_walk([part] + parts)
|
|
980
|
+
end
|
|
981
|
+
newparts.append(part)
|
|
982
|
+
end
|
|
983
|
+
newparts
|
|
984
|
+
end
|
|
985
|
+
|
|
986
|
+
# Process parsed tokens (callback on close, inverse search direction)
|
|
987
|
+
def reverse_walk(parts)
|
|
988
|
+
backlog = []
|
|
989
|
+
parts.each do |part|
|
|
990
|
+
backlog.append(part)
|
|
991
|
+
next unless part.is_a? Array
|
|
992
|
+
next unless part.last == :close
|
|
993
|
+
next unless part[1].respond_to?(:reverse_walk)
|
|
994
|
+
|
|
995
|
+
backlog = part[1].reverse_walk(backlog, doc: @doc)
|
|
996
|
+
end
|
|
997
|
+
backlog
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
# Quantize valid children
|
|
1001
|
+
def quantize(children)
|
|
1002
|
+
children.group_by(&:last).map { |_, v| v.map(&:first).map(&:parser) }
|
|
1003
|
+
end
|
|
1004
|
+
end
|
|
1005
|
+
|
|
1006
|
+
# Null inline scanner element
|
|
1007
|
+
# @abstract
|
|
1008
|
+
class NullInline
|
|
1009
|
+
class << self
|
|
1010
|
+
attr_accessor :parser_for
|
|
1011
|
+
end
|
|
1012
|
+
|
|
1013
|
+
# Tokenize a string
|
|
1014
|
+
# @param string [String]
|
|
1015
|
+
# @return [Array<Array(String, Class, Symbol), String>]
|
|
1016
|
+
def self.tokenize(string)
|
|
1017
|
+
[string]
|
|
1018
|
+
end
|
|
1019
|
+
|
|
1020
|
+
# @!method self.reverse_walk(backlog)
|
|
1021
|
+
# Reverse-walk the backlog and construct a valid element from it
|
|
1022
|
+
# @param backlog [Array<Array(String, Class, Symbol), String>]
|
|
1023
|
+
# @return [Array<Array(String, Class, Symbol), String>]
|
|
1024
|
+
|
|
1025
|
+
# @!method self.forward_walk(backlog)
|
|
1026
|
+
# Forward-walk the backlog starting from the current valid element
|
|
1027
|
+
# @param backlog [Array<Array(String, Class, Symbol), String>]
|
|
1028
|
+
# @return [Array<Array(String, Class, Symbol), String>]
|
|
1029
|
+
|
|
1030
|
+
# Check that the symbol at this index is not escaped
|
|
1031
|
+
# @param index [Integer]
|
|
1032
|
+
# @param string [String]
|
|
1033
|
+
# @return [nil, Integer]
|
|
1034
|
+
def self.check_unescaped(index, string)
|
|
1035
|
+
return index if index.zero?
|
|
1036
|
+
|
|
1037
|
+
count = 0
|
|
1038
|
+
index -= 1
|
|
1039
|
+
while index >= 0 && string[index] == "\\"
|
|
1040
|
+
count += 1
|
|
1041
|
+
index -= 1
|
|
1042
|
+
end
|
|
1043
|
+
(count % 2).zero?
|
|
1044
|
+
end
|
|
1045
|
+
|
|
1046
|
+
# Find the first occurence of an unescaped pattern
|
|
1047
|
+
# @param string [String]
|
|
1048
|
+
# @param pattern [Regexp, String]
|
|
1049
|
+
# @return [Integer, nil]
|
|
1050
|
+
def self.find_unescaped(string, pattern)
|
|
1051
|
+
initial = 0
|
|
1052
|
+
while (index = string.index(pattern, initial))
|
|
1053
|
+
return index if check_unescaped(index, string)
|
|
1054
|
+
|
|
1055
|
+
initial = index + 1
|
|
1056
|
+
end
|
|
1057
|
+
nil
|
|
1058
|
+
end
|
|
1059
|
+
|
|
1060
|
+
# Iterate over every string/unescaped token part
|
|
1061
|
+
# @param string [String]
|
|
1062
|
+
# @param pattern [Regexp]
|
|
1063
|
+
# @param callback [#call]
|
|
1064
|
+
# @return [Array<String, Array(String, Class, Symbol)>]
|
|
1065
|
+
def self.iterate_tokens(string, pattern, &filter)
|
|
1066
|
+
tokens = []
|
|
1067
|
+
initial = 0
|
|
1068
|
+
while (index = string.index(pattern, initial))
|
|
1069
|
+
prefix = (index.zero? ? nil : string[initial..(index - 1)])
|
|
1070
|
+
tokens.append(prefix) if prefix
|
|
1071
|
+
unescaped = check_unescaped(index, string)
|
|
1072
|
+
match = filter.call(index.positive? ? string[..(index - 1)] : "",
|
|
1073
|
+
string[index..],
|
|
1074
|
+
unescaped)
|
|
1075
|
+
tokens.append(match)
|
|
1076
|
+
match = match.first if match.is_a? Array
|
|
1077
|
+
initial = index + match.length
|
|
1078
|
+
end
|
|
1079
|
+
remaining = string[initial..] || ""
|
|
1080
|
+
tokens.append(remaining) unless remaining.empty?
|
|
1081
|
+
tokens
|
|
1082
|
+
end
|
|
1083
|
+
|
|
1084
|
+
# Build child
|
|
1085
|
+
# @param children [Array]
|
|
1086
|
+
# @return [::PointBlank::DOM::DOMObject]
|
|
1087
|
+
def self.build(children)
|
|
1088
|
+
obj = parser_for.new
|
|
1089
|
+
if parser_for.valid_children.empty?
|
|
1090
|
+
children.each do |child|
|
|
1091
|
+
child = child.first if child.is_a? Array
|
|
1092
|
+
child = construct_text(child) if child.is_a? String
|
|
1093
|
+
obj.append_child(child)
|
|
1094
|
+
end
|
|
1095
|
+
else
|
|
1096
|
+
tokens = children.map do |child|
|
|
1097
|
+
child.is_a?(Array) ? child.first : child
|
|
1098
|
+
end
|
|
1099
|
+
scanner = StackScanner.new(obj, init_tokens: tokens)
|
|
1100
|
+
scanner.scan
|
|
1101
|
+
end
|
|
1102
|
+
obj
|
|
1103
|
+
end
|
|
1104
|
+
|
|
1105
|
+
# Construct text object for a string
|
|
1106
|
+
# @param string [String]
|
|
1107
|
+
# @return [::PointBlank::DOM::Text]
|
|
1108
|
+
def self.construct_text(string)
|
|
1109
|
+
obj = ::PointBlank::DOM::Text.new
|
|
1110
|
+
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
|
1111
|
+
'\\1')
|
|
1112
|
+
string = string.gsub("\n", " ")
|
|
1113
|
+
string = MMMD::EntityUtils.decode_entities(string)
|
|
1114
|
+
obj.content = string
|
|
1115
|
+
obj
|
|
1116
|
+
end
|
|
1117
|
+
|
|
1118
|
+
# Construct text literal for a string
|
|
1119
|
+
# @param string [String]
|
|
1120
|
+
# @return [::PointBlank::DOM::Text]
|
|
1121
|
+
def self.construct_literal(string)
|
|
1122
|
+
obj = ::PointBlank::DOM::Text.new
|
|
1123
|
+
obj.content = string
|
|
1124
|
+
obj
|
|
1125
|
+
end
|
|
1126
|
+
|
|
1127
|
+
# Check that contents can be contained within this element
|
|
1128
|
+
# @param elements [Array<String, Array(String, Class, Symbol)>]
|
|
1129
|
+
# @return [Boolean]
|
|
1130
|
+
def self.check_contents(elements)
|
|
1131
|
+
elements.each do |element|
|
|
1132
|
+
next unless element.is_a? ::PointBlank::DOM::DOMObject
|
|
1133
|
+
next if parser_for.valid_children.include? element.class
|
|
1134
|
+
|
|
1135
|
+
return false
|
|
1136
|
+
end
|
|
1137
|
+
true
|
|
1138
|
+
end
|
|
1139
|
+
end
|
|
1140
|
+
|
|
1141
|
+
# Code inline parser
|
|
1142
|
+
class CodeInline < NullInline
|
|
1143
|
+
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
|
1144
|
+
def self.tokenize(string)
|
|
1145
|
+
open = {}
|
|
1146
|
+
iterate_tokens(string, "`") do |_before, current_text, matched|
|
|
1147
|
+
if matched
|
|
1148
|
+
match = current_text.match(/^`+/)[0]
|
|
1149
|
+
if open[match]
|
|
1150
|
+
open[match] = nil
|
|
1151
|
+
[match, self, :close]
|
|
1152
|
+
else
|
|
1153
|
+
open[match] = true
|
|
1154
|
+
[match, self, :open]
|
|
1155
|
+
end
|
|
1156
|
+
else
|
|
1157
|
+
current_text[0]
|
|
1158
|
+
end
|
|
1159
|
+
end
|
|
1160
|
+
end
|
|
1161
|
+
|
|
1162
|
+
# TODO: optimize, buffer only after walking
|
|
1163
|
+
# (see ::PointBlank::Parsing::NullInline#forward_walk)
|
|
1164
|
+
def self.forward_walk(parts)
|
|
1165
|
+
buffer = ""
|
|
1166
|
+
opening = parts.first.first
|
|
1167
|
+
cutoff = 0
|
|
1168
|
+
parts.each_with_index do |part, idx|
|
|
1169
|
+
text = (part.is_a?(Array) ? part.first : part)
|
|
1170
|
+
buffer += text
|
|
1171
|
+
next unless part.is_a? Array
|
|
1172
|
+
|
|
1173
|
+
break (cutoff = idx) if part.first == opening &&
|
|
1174
|
+
part.last == :close
|
|
1175
|
+
end
|
|
1176
|
+
buffer = construct_literal(buffer[opening.length..(-1 - opening.length)])
|
|
1177
|
+
[cutoff.positive? ? build([buffer]) : opening, parts[(cutoff + 1)..]]
|
|
1178
|
+
end
|
|
1179
|
+
end
|
|
1180
|
+
|
|
1181
|
+
# Autolink inline parser
|
|
1182
|
+
class AutolinkInline < NullInline
|
|
1183
|
+
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
|
1184
|
+
def self.tokenize(string)
|
|
1185
|
+
iterate_tokens(string, /[<>]/) do |_before, current_text, matched|
|
|
1186
|
+
if matched
|
|
1187
|
+
if current_text.start_with?("<")
|
|
1188
|
+
["<", self, :open]
|
|
1189
|
+
else
|
|
1190
|
+
[">", self, :close]
|
|
1191
|
+
end
|
|
1192
|
+
else
|
|
1193
|
+
current_text[0]
|
|
1194
|
+
end
|
|
1195
|
+
end
|
|
1196
|
+
end
|
|
1197
|
+
|
|
1198
|
+
# TODO: optimize, buffer only after walking
|
|
1199
|
+
# (see ::PointBlank::Parsing::NullInline#forward_walk
|
|
1200
|
+
def self.forward_walk(parts)
|
|
1201
|
+
buffer = ""
|
|
1202
|
+
cutoff = 0
|
|
1203
|
+
parts.each_with_index do |part, idx|
|
|
1204
|
+
text = (part.is_a?(Array) ? part.first : part)
|
|
1205
|
+
buffer += text
|
|
1206
|
+
next unless part.is_a? Array
|
|
1207
|
+
|
|
1208
|
+
break (cutoff = idx) if part.first == ">" && part.last == :close
|
|
1209
|
+
end
|
|
1210
|
+
return '<', parts[1..] unless buffer.match?(/^<[\w\-_+]+:[^<>\s]+>$/)
|
|
1211
|
+
|
|
1212
|
+
obj = build([buffer[1..-2]])
|
|
1213
|
+
obj.properties[:uri] = MMMD::EntityUtils.encode_uri(buffer[1..-2])
|
|
1214
|
+
[obj, parts[(cutoff + 1)..]]
|
|
1215
|
+
end
|
|
1216
|
+
end
|
|
1217
|
+
|
|
1218
|
+
# Hyperreference inline superclass
|
|
1219
|
+
# @abstract
|
|
1220
|
+
class HyperlinkInline < NullInline
|
|
1221
|
+
# Parse link properties according to given link suffix
|
|
1222
|
+
# @param input [String]
|
|
1223
|
+
# @return [Array(<Hash, String, nil>, String)]
|
|
1224
|
+
def self.parse_linkinfo(input)
|
|
1225
|
+
props, remainder = read_properties(input)
|
|
1226
|
+
return nil, "" unless props
|
|
1227
|
+
|
|
1228
|
+
capture = input[..(input.length - remainder.length - 1)]
|
|
1229
|
+
[props, capture]
|
|
1230
|
+
end
|
|
1231
|
+
|
|
1232
|
+
# Build object and apply link info to it
|
|
1233
|
+
# @param capture [Array<String, Array(String, Class, Symbol)>]
|
|
1234
|
+
# @param doc [::PointBlank::DOM::DOMObject]
|
|
1235
|
+
# @return [::PointBlank::DOM::DOMObject]
|
|
1236
|
+
def self.build_w_linkinfo(capture, doc)
|
|
1237
|
+
linkinfo = capture[-1][2]
|
|
1238
|
+
obj = build(capture[1..-2])
|
|
1239
|
+
if linkinfo[:label]
|
|
1240
|
+
if (props = doc.root.properties[:linkdefs][linkinfo[:label]])
|
|
1241
|
+
linkinfo = props
|
|
1242
|
+
else
|
|
1243
|
+
return nil
|
|
1244
|
+
end
|
|
1245
|
+
end
|
|
1246
|
+
obj.properties = linkinfo
|
|
1247
|
+
obj
|
|
1248
|
+
end
|
|
1249
|
+
|
|
1250
|
+
# TODO: optimize, increase index instead of building buffers
|
|
1251
|
+
# (see ::PointBlank::Parsing::NullInline#reverse_walk)
|
|
1252
|
+
def self.reverse_walk(backlog, doc:)
|
|
1253
|
+
before = []
|
|
1254
|
+
capture = []
|
|
1255
|
+
open = true
|
|
1256
|
+
cls = nil
|
|
1257
|
+
backlog.reverse_each do |block|
|
|
1258
|
+
(open ? capture : before).prepend(block)
|
|
1259
|
+
next unless block.is_a?(Array) && block[1] < self
|
|
1260
|
+
|
|
1261
|
+
open = false
|
|
1262
|
+
cls = block[1]
|
|
1263
|
+
return backlog unless block[1].check_contents(capture)
|
|
1264
|
+
end
|
|
1265
|
+
return backlog if open
|
|
1266
|
+
|
|
1267
|
+
block = cls.build_w_linkinfo(capture, doc)
|
|
1268
|
+
block ? before + [block] : backlog
|
|
1269
|
+
end
|
|
1270
|
+
end
|
|
1271
|
+
|
|
1272
|
+
# Image inline parser
|
|
1273
|
+
class ImageInline < HyperlinkInline
|
|
1274
|
+
class << self
|
|
1275
|
+
include ::PointBlank::Parsing::LinkSharedMethods
|
|
1276
|
+
end
|
|
1277
|
+
|
|
1278
|
+
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
|
1279
|
+
def self.tokenize(string)
|
|
1280
|
+
iterate_tokens(string, /(?:!\[|\]\()/) do |_before, text, matched|
|
|
1281
|
+
next text[0] unless matched
|
|
1282
|
+
next ["![", self, :open] if text.start_with? "!["
|
|
1283
|
+
next text[0] unless text.start_with? "]"
|
|
1284
|
+
|
|
1285
|
+
info, capture = parse_linkinfo(text[1..])
|
|
1286
|
+
info ? ["]#{capture}", HyperlinkInline, info, :close] : text[0]
|
|
1287
|
+
end
|
|
1288
|
+
end
|
|
1289
|
+
end
|
|
1290
|
+
|
|
1291
|
+
# Link inline parser
|
|
1292
|
+
class LinkInline < HyperlinkInline
|
|
1293
|
+
class << self
|
|
1294
|
+
include ::PointBlank::Parsing::LinkSharedMethods
|
|
1295
|
+
end
|
|
1296
|
+
|
|
1297
|
+
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
|
1298
|
+
def self.tokenize(string)
|
|
1299
|
+
iterate_tokens(string, /(?:\[|\][(\[])/) do |_before, text, matched|
|
|
1300
|
+
next text[0] unless matched
|
|
1301
|
+
next ["[", self, :open] if text.start_with? "["
|
|
1302
|
+
next text[0] unless text.start_with? "]"
|
|
1303
|
+
|
|
1304
|
+
info, capture = parse_linkinfo(text[1..])
|
|
1305
|
+
info ? ["]#{capture}", HyperlinkInline, info, :close] : text[0]
|
|
1306
|
+
end
|
|
1307
|
+
end
|
|
1308
|
+
end
|
|
1309
|
+
|
|
1310
|
+
# Emphasis and strong emphasis inline parser
|
|
1311
|
+
class EmphInline < NullInline
|
|
1312
|
+
INFIX_TOKENS = /^[^\p{S}\p{P}\p{Zs}_]_++[^\p{S}\p{P}\p{Zs}_]$/
|
|
1313
|
+
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
|
1314
|
+
def self.tokenize(string)
|
|
1315
|
+
iterate_tokens(string, /(?:_++|\*++)/) do |bfr, text, matched|
|
|
1316
|
+
token, afr = text.match(/^(_++|\*++)(.?)/)[1..2]
|
|
1317
|
+
left = left_token?(bfr[-1] || "", token, afr)
|
|
1318
|
+
right = right_token?(bfr[-1] || "", token, afr)
|
|
1319
|
+
break_into_elements(token, [bfr[-1] || "", token, afr].join(''),
|
|
1320
|
+
left, right, matched)
|
|
1321
|
+
end
|
|
1322
|
+
end
|
|
1323
|
+
|
|
1324
|
+
# Is this token, given these surrounding characters, left-flanking?
|
|
1325
|
+
# @param bfr [String]
|
|
1326
|
+
# @param token [String]
|
|
1327
|
+
# @param afr [String]
|
|
1328
|
+
def self.left_token?(bfr, _token, afr)
|
|
1329
|
+
bfr_white = bfr.match?(/[\p{Zs}\n\r]/) || bfr.empty?
|
|
1330
|
+
afr_white = afr.match?(/[\p{Zs}\n\r]/) || afr.empty?
|
|
1331
|
+
bfr_symbol = bfr.match?(/[\p{P}\p{S}]/)
|
|
1332
|
+
afr_symbol = afr.match?(/[\p{P}\p{S}]/)
|
|
1333
|
+
!afr_white && (!afr_symbol || (afr_symbol && (bfr_symbol || bfr_white)))
|
|
1334
|
+
end
|
|
1335
|
+
|
|
1336
|
+
# Is this token, given these surrounding characters, reft-flanking?
|
|
1337
|
+
# @param bfr [String]
|
|
1338
|
+
# @param token [String]
|
|
1339
|
+
# @param afr [String]
|
|
1340
|
+
def self.right_token?(bfr, _token, afr)
|
|
1341
|
+
bfr_white = bfr.match?(/[\p{Z}\n\r]/) || bfr.empty?
|
|
1342
|
+
afr_white = afr.match?(/[\p{Z}\n\r]/) || afr.empty?
|
|
1343
|
+
bfr_symbol = bfr.match?(/[\p{P}\p{S}]/)
|
|
1344
|
+
afr_symbol = afr.match?(/[\p{P}\p{S}]/)
|
|
1345
|
+
!bfr_white && (!bfr_symbol || (bfr_symbol && (afr_symbol || afr_white)))
|
|
1346
|
+
end
|
|
1347
|
+
|
|
1348
|
+
# Break token string into elements
|
|
1349
|
+
# @param token_inner [String]
|
|
1350
|
+
# @param token [String]
|
|
1351
|
+
# @param left [Boolean]
|
|
1352
|
+
# @param right [Boolean]
|
|
1353
|
+
# @param matched [Boolean]
|
|
1354
|
+
def self.break_into_elements(token_inner, token, left, right, matched)
|
|
1355
|
+
return token_inner[0] unless matched
|
|
1356
|
+
|
|
1357
|
+
star_token = token_inner.match?(/^\*+$/)
|
|
1358
|
+
infix_token = token.match(INFIX_TOKENS)
|
|
1359
|
+
return token_inner if !star_token && infix_token
|
|
1360
|
+
|
|
1361
|
+
if left && right
|
|
1362
|
+
[token_inner, self, :open, :close]
|
|
1363
|
+
elsif left
|
|
1364
|
+
[token_inner, self, :open]
|
|
1365
|
+
elsif right
|
|
1366
|
+
[token_inner, self, :close]
|
|
1367
|
+
else
|
|
1368
|
+
token_inner
|
|
1369
|
+
end
|
|
1370
|
+
end
|
|
1371
|
+
|
|
1372
|
+
# (see ::PointBlank::Parsing::NullInline#reverse_walk)
|
|
1373
|
+
def self.reverse_walk(backlog, **_doc)
|
|
1374
|
+
until backlog.last.first.empty?
|
|
1375
|
+
capture = []
|
|
1376
|
+
before = []
|
|
1377
|
+
closer = backlog.last
|
|
1378
|
+
star = closer.first.match?(/^\*+$/)
|
|
1379
|
+
open = true
|
|
1380
|
+
backlog[..-2].reverse_each do |blk|
|
|
1381
|
+
open = false if blk.is_a?(Array) && blk[2] == :open &&
|
|
1382
|
+
blk.first.match?(/^\*+$/) == star &&
|
|
1383
|
+
blk[1] == self &&
|
|
1384
|
+
((blk.first.length + closer.first.length) % 3 != 0 ||
|
|
1385
|
+
((blk.first.length % 3).zero? &&
|
|
1386
|
+
(closer.first.length % 3).zero?))
|
|
1387
|
+
(open ? capture : before).prepend(blk)
|
|
1388
|
+
next unless blk.is_a?(Array)
|
|
1389
|
+
return backlog unless blk[1].check_contents(capture)
|
|
1390
|
+
end
|
|
1391
|
+
return backlog if open
|
|
1392
|
+
|
|
1393
|
+
opener = before[-1]
|
|
1394
|
+
strong = if closer.first.length > 1 && opener.first.length > 1
|
|
1395
|
+
# Strong emphasis
|
|
1396
|
+
closer[0] = closer.first[2..]
|
|
1397
|
+
opener[0] = opener.first[2..]
|
|
1398
|
+
true
|
|
1399
|
+
else
|
|
1400
|
+
# Emphasis
|
|
1401
|
+
closer[0] = closer.first[1..]
|
|
1402
|
+
opener[0] = opener.first[1..]
|
|
1403
|
+
false
|
|
1404
|
+
end
|
|
1405
|
+
before = before[..-2] if opener.first.empty?
|
|
1406
|
+
backlog = before + [build_emph(capture, strong)] + [closer]
|
|
1407
|
+
end
|
|
1408
|
+
backlog
|
|
1409
|
+
end
|
|
1410
|
+
|
|
1411
|
+
# Build strong or normal emphasis depending on the boolean flag
|
|
1412
|
+
# @param children [Array<String, ::PointBlank::DOM::DOMObject>]
|
|
1413
|
+
# @param strong [Boolean]
|
|
1414
|
+
# @return [::PointBlank::DOM::DOMObject]
|
|
1415
|
+
def self.build_emph(children, strong)
|
|
1416
|
+
obj = if strong
|
|
1417
|
+
::PointBlank::DOM::InlineStrong
|
|
1418
|
+
else
|
|
1419
|
+
::PointBlank::DOM::InlineEmphasis
|
|
1420
|
+
end.new
|
|
1421
|
+
tokens = children.map do |child|
|
|
1422
|
+
child.is_a?(Array) ? child.first : child
|
|
1423
|
+
end
|
|
1424
|
+
scanner = StackScanner.new(obj, init_tokens: tokens)
|
|
1425
|
+
scanner.scan
|
|
1426
|
+
obj
|
|
1427
|
+
end
|
|
1428
|
+
end
|
|
1429
|
+
|
|
1430
|
+
# Hard break
|
|
1431
|
+
class HardBreakInline < NullInline
|
|
1432
|
+
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
|
1433
|
+
def self.tokenize(string)
|
|
1434
|
+
iterate_tokens(string, /(?: \n|\\\n)/) do |_before, token, matched|
|
|
1435
|
+
next ["\n", self, :close] if token.start_with?(" \n")
|
|
1436
|
+
next ["\n", self, :close] if matched
|
|
1437
|
+
|
|
1438
|
+
" "
|
|
1439
|
+
end
|
|
1440
|
+
end
|
|
1441
|
+
|
|
1442
|
+
# (see ::PointBlank::Parsing::NullInline#reverse_walk)
|
|
1443
|
+
def self.reverse_walk(backlog, **_doc)
|
|
1444
|
+
backlog[-1] = build([])
|
|
1445
|
+
backlog
|
|
1446
|
+
end
|
|
1447
|
+
end
|
|
1448
|
+
end
|
|
1449
|
+
|
|
1450
|
+
# Domain object model elements
|
|
1451
|
+
module DOM
|
|
1452
|
+
class DOMError < StandardError; end
|
|
1453
|
+
|
|
1454
|
+
# DOM Object
|
|
1455
|
+
class DOMObject
|
|
1456
|
+
class << self
|
|
1457
|
+
# Make subclasses inherit scanner and valid children
|
|
1458
|
+
def inherited(subclass)
|
|
1459
|
+
subclass.parser ||= @parser
|
|
1460
|
+
subclass.scanner ||= @scanner
|
|
1461
|
+
subclass.unsorted_children ||= @unsorted_children.dup || []
|
|
1462
|
+
super(subclass)
|
|
1463
|
+
end
|
|
1464
|
+
|
|
1465
|
+
# Sort children by priority
|
|
1466
|
+
# @return [void]
|
|
1467
|
+
def sort_children
|
|
1468
|
+
@valid_children = @unsorted_children&.sort_by(&:last)&.map(&:first) ||
|
|
1469
|
+
[]
|
|
1470
|
+
end
|
|
1471
|
+
|
|
1472
|
+
# Define valid child for this DOMObject class
|
|
1473
|
+
# @param child [Class]
|
|
1474
|
+
# @return [void]
|
|
1475
|
+
def define_child(child, priority = 9999)
|
|
1476
|
+
@unsorted_children ||= []
|
|
1477
|
+
@unsorted_children.append([child, priority])
|
|
1478
|
+
end
|
|
1479
|
+
|
|
1480
|
+
# Define child element scanner for this DOMObject class
|
|
1481
|
+
# @param child [Class]
|
|
1482
|
+
# @return [void]
|
|
1483
|
+
def define_scanner(scanner)
|
|
1484
|
+
@scanner = scanner
|
|
1485
|
+
end
|
|
1486
|
+
|
|
1487
|
+
# Define self parser for this DOMObject class
|
|
1488
|
+
# @param child [::PointBlank::Parsing::NullParser]
|
|
1489
|
+
# @return [void]
|
|
1490
|
+
def define_parser(parser)
|
|
1491
|
+
parser.parser_for = self
|
|
1492
|
+
@parser = parser
|
|
1493
|
+
end
|
|
1494
|
+
|
|
1495
|
+
# Define an overlay - a parsing strategy that occurs once a block is closed.
|
|
1496
|
+
# May transform block if #process method of the overlay class returns
|
|
1497
|
+
# a class.
|
|
1498
|
+
# @param overlay [::PointBlank::Parsing::NullOverlay]
|
|
1499
|
+
# @return [void]
|
|
1500
|
+
def define_overlay(overlay, priority = 9999)
|
|
1501
|
+
@unsorted_overlays ||= []
|
|
1502
|
+
@unsorted_overlays.append([overlay, priority])
|
|
1503
|
+
end
|
|
1504
|
+
|
|
1505
|
+
# Sort overlays by priority
|
|
1506
|
+
# @return [void]
|
|
1507
|
+
def sort_overlays
|
|
1508
|
+
@valid_overlays = @unsorted_overlays&.sort_by(&:last)&.map(&:first) ||
|
|
1509
|
+
[]
|
|
1510
|
+
end
|
|
1511
|
+
|
|
1512
|
+
# Parse a document
|
|
1513
|
+
# @return [self]
|
|
1514
|
+
def parse(doc)
|
|
1515
|
+
newdoc = new
|
|
1516
|
+
newdoc.parser = parser.new
|
|
1517
|
+
scan = @scanner.new(doc, newdoc)
|
|
1518
|
+
scan.scan
|
|
1519
|
+
end
|
|
1520
|
+
|
|
1521
|
+
# Source parameters from parent (fixes recursive dependency)
|
|
1522
|
+
def upsource
|
|
1523
|
+
superclass&.tap do |sc|
|
|
1524
|
+
@scanner = sc.scanner
|
|
1525
|
+
@parser = sc.parser
|
|
1526
|
+
@unsorted_children = sc.unsorted_children.dup
|
|
1527
|
+
@unsorted_overlays = sc.unsorted_overlays.dup
|
|
1528
|
+
end
|
|
1529
|
+
sort_children
|
|
1530
|
+
end
|
|
1531
|
+
|
|
1532
|
+
# Get array of valid overlays sorted by priority
|
|
1533
|
+
# @return [Array<::PointBlank::Parsing::NullOverlay>]
|
|
1534
|
+
def valid_overlays
|
|
1535
|
+
sort_overlays unless @valid_overlays
|
|
1536
|
+
@valid_overlays
|
|
1537
|
+
end
|
|
1538
|
+
|
|
1539
|
+
# Get array of valid children sorted by priority
|
|
1540
|
+
# @return [Array<Class>]
|
|
1541
|
+
def valid_children
|
|
1542
|
+
sort_children unless @valid_children
|
|
1543
|
+
@valid_children
|
|
1544
|
+
end
|
|
1545
|
+
|
|
1546
|
+
attr_accessor :scanner, :parser,
|
|
1547
|
+
:unsorted_children,
|
|
1548
|
+
:unsorted_overlays
|
|
1549
|
+
end
|
|
1550
|
+
|
|
1551
|
+
include ::Enumerable
|
|
1552
|
+
|
|
1553
|
+
def initialize
|
|
1554
|
+
@children = []
|
|
1555
|
+
@temp_children = []
|
|
1556
|
+
@properties = {}
|
|
1557
|
+
@content = ""
|
|
1558
|
+
end
|
|
1559
|
+
|
|
1560
|
+
# Set element at position
|
|
1561
|
+
# @param index [Integer]
|
|
1562
|
+
# @param element [DOMObject]
|
|
1563
|
+
# @return [DOMObject]
|
|
1564
|
+
def []=(index, element)
|
|
1565
|
+
unless element.is_a? ::PointBlank::DOM::DOMObject
|
|
1566
|
+
raise DOMError, "invalid DOM class #{element.class}"
|
|
1567
|
+
end
|
|
1568
|
+
|
|
1569
|
+
@children[index] = element
|
|
1570
|
+
end
|
|
1571
|
+
|
|
1572
|
+
# Get element at position
|
|
1573
|
+
# @param index [Integer]
|
|
1574
|
+
# @return [DOMObject]
|
|
1575
|
+
def [](index)
|
|
1576
|
+
@children[index]
|
|
1577
|
+
end
|
|
1578
|
+
|
|
1579
|
+
# Iterate over each child of DOMObject
|
|
1580
|
+
# @param block [#call]
|
|
1581
|
+
def each(&block)
|
|
1582
|
+
@children.each(&block)
|
|
1583
|
+
end
|
|
1584
|
+
|
|
1585
|
+
# Return an array duplicate of all children
|
|
1586
|
+
# @return [Array<DOMObject>]
|
|
1587
|
+
def children
|
|
1588
|
+
@children.dup
|
|
1589
|
+
end
|
|
1590
|
+
|
|
1591
|
+
# Get root element containing this child
|
|
1592
|
+
# @return [::PointBlank::DOM::DOMObject]
|
|
1593
|
+
def root
|
|
1594
|
+
current_root = self
|
|
1595
|
+
current_root = current_root.parent while current_root.parent
|
|
1596
|
+
current_root
|
|
1597
|
+
end
|
|
1598
|
+
|
|
1599
|
+
# Append child
|
|
1600
|
+
# @param child [DOMObject]
|
|
1601
|
+
def append_child(child)
|
|
1602
|
+
unless child.is_a? ::PointBlank::DOM::DOMObject
|
|
1603
|
+
raise DOMError, "invalid DOM class #{child.class}"
|
|
1604
|
+
end
|
|
1605
|
+
|
|
1606
|
+
child.parent = self
|
|
1607
|
+
child.position = @children.length
|
|
1608
|
+
@children.append(child)
|
|
1609
|
+
end
|
|
1610
|
+
|
|
1611
|
+
# Append temp child
|
|
1612
|
+
# @param child [DOMObject]
|
|
1613
|
+
def append_temp_child(child)
|
|
1614
|
+
@temp_children.append(child)
|
|
1615
|
+
end
|
|
1616
|
+
|
|
1617
|
+
attr_accessor :content, :parser, :parent, :position, :properties
|
|
1618
|
+
attr_reader :temp_children
|
|
1619
|
+
end
|
|
1620
|
+
|
|
1621
|
+
# Temp. text class
|
|
1622
|
+
class TempText < DOMObject
|
|
1623
|
+
end
|
|
1624
|
+
|
|
1625
|
+
# Inline text
|
|
1626
|
+
class Text < DOMObject
|
|
1627
|
+
end
|
|
1628
|
+
|
|
1629
|
+
# Inline preformatted text
|
|
1630
|
+
class InlinePre < DOMObject
|
|
1631
|
+
define_parser ::PointBlank::Parsing::CodeInline
|
|
1632
|
+
end
|
|
1633
|
+
|
|
1634
|
+
# Hard Linebreak
|
|
1635
|
+
class InlineBreak < DOMObject
|
|
1636
|
+
define_parser ::PointBlank::Parsing::HardBreakInline
|
|
1637
|
+
end
|
|
1638
|
+
|
|
1639
|
+
# Autolink
|
|
1640
|
+
class InlineAutolink < DOMObject
|
|
1641
|
+
define_parser ::PointBlank::Parsing::AutolinkInline
|
|
1642
|
+
end
|
|
1643
|
+
|
|
1644
|
+
# Infline formattable text
|
|
1645
|
+
class InlineFormattable < DOMObject
|
|
1646
|
+
end
|
|
1647
|
+
|
|
1648
|
+
# Image
|
|
1649
|
+
class InlineImage < InlineFormattable
|
|
1650
|
+
define_parser ::PointBlank::Parsing::ImageInline
|
|
1651
|
+
define_child ::PointBlank::DOM::InlinePre, 4000
|
|
1652
|
+
define_child ::PointBlank::DOM::InlineBreak, 9999
|
|
1653
|
+
## that would be really funny lmao
|
|
1654
|
+
# define_child ::PointBlank::DOM::InlineImage
|
|
1655
|
+
end
|
|
1656
|
+
|
|
1657
|
+
# Hyperreferenced text
|
|
1658
|
+
class InlineLink < InlineFormattable
|
|
1659
|
+
define_parser ::PointBlank::Parsing::LinkInline
|
|
1660
|
+
define_child ::PointBlank::DOM::InlinePre, 4000
|
|
1661
|
+
define_child ::PointBlank::DOM::InlineImage, 5000
|
|
1662
|
+
define_child ::PointBlank::DOM::InlineBreak, 9999
|
|
1663
|
+
## idk if this makes sense honestly
|
|
1664
|
+
# define_child ::PointBlank::DOM::InlineAutolink
|
|
1665
|
+
end
|
|
1666
|
+
|
|
1667
|
+
# Inline root
|
|
1668
|
+
class InlineRoot < DOMObject
|
|
1669
|
+
define_scanner ::PointBlank::Parsing::StackScanner
|
|
1670
|
+
define_child ::PointBlank::DOM::InlinePre, 4000
|
|
1671
|
+
define_child ::PointBlank::DOM::InlineAutolink, 4000
|
|
1672
|
+
define_child ::PointBlank::DOM::InlineImage, 5000
|
|
1673
|
+
define_child ::PointBlank::DOM::InlineLink, 6000
|
|
1674
|
+
define_child ::PointBlank::DOM::InlineBreak, 9999
|
|
1675
|
+
end
|
|
1676
|
+
|
|
1677
|
+
# Strong emphasis
|
|
1678
|
+
class InlineStrong < InlineRoot
|
|
1679
|
+
end
|
|
1680
|
+
|
|
1681
|
+
# Emphasis
|
|
1682
|
+
class InlineEmphasis < InlineRoot
|
|
1683
|
+
end
|
|
1684
|
+
|
|
1685
|
+
InlineRoot.class_eval do
|
|
1686
|
+
define_child ::PointBlank::DOM::InlineStrong, 8000
|
|
1687
|
+
define_child ::PointBlank::DOM::InlineEmphasis, 8000
|
|
1688
|
+
end
|
|
1689
|
+
|
|
1690
|
+
InlineRoot.subclasses.each(&:upsource)
|
|
1691
|
+
|
|
1692
|
+
InlineStrong.class_eval do
|
|
1693
|
+
define_parser ::PointBlank::Parsing::EmphInline
|
|
1694
|
+
end
|
|
1695
|
+
|
|
1696
|
+
InlineEmphasis.class_eval do
|
|
1697
|
+
define_parser ::PointBlank::Parsing::EmphInline
|
|
1698
|
+
end
|
|
1699
|
+
|
|
1700
|
+
InlineImage.class_eval do
|
|
1701
|
+
define_child ::PointBlank::DOM::InlineStrong, 8000
|
|
1702
|
+
define_child ::PointBlank::DOM::InlineEmphasis, 8000
|
|
1703
|
+
end
|
|
1704
|
+
|
|
1705
|
+
InlineLink.class_eval do
|
|
1706
|
+
define_child ::PointBlank::DOM::InlineStrong, 8000
|
|
1707
|
+
define_child ::PointBlank::DOM::InlineEmphasis, 8000
|
|
1708
|
+
end
|
|
1709
|
+
# Block root (virtual)
|
|
1710
|
+
class Block < DOMObject
|
|
1711
|
+
end
|
|
1712
|
+
|
|
1713
|
+
# Leaf block (virtual)
|
|
1714
|
+
class LeafBlock < DOMObject
|
|
1715
|
+
# Virtual hook to delay inline processing
|
|
1716
|
+
def parse_inner
|
|
1717
|
+
self.content = content.strip if content
|
|
1718
|
+
root.append_temp_child(self)
|
|
1719
|
+
end
|
|
1720
|
+
end
|
|
1721
|
+
|
|
1722
|
+
# Leaf literal block (virtual)
|
|
1723
|
+
class LeafLiteralBlock < LeafBlock
|
|
1724
|
+
# Virtual hook to push inlines in place of leaf blocks
|
|
1725
|
+
def parse_inner
|
|
1726
|
+
child = ::PointBlank::DOM::Text.new
|
|
1727
|
+
child.content = content
|
|
1728
|
+
append_child(child)
|
|
1729
|
+
end
|
|
1730
|
+
end
|
|
1731
|
+
|
|
1732
|
+
# Document root
|
|
1733
|
+
class Document < Block
|
|
1734
|
+
# (see ::PointBlank::DOM::DOMObject#parse)
|
|
1735
|
+
def self.parse(doc)
|
|
1736
|
+
output = super(doc)
|
|
1737
|
+
# This has to be done after the document gets processed due to the way link
|
|
1738
|
+
# definitions have to be handled.
|
|
1739
|
+
parse_inner = lambda do |block|
|
|
1740
|
+
child = ::PointBlank::DOM::InlineRoot.new
|
|
1741
|
+
child.parent = block.parent
|
|
1742
|
+
child.content = block.content
|
|
1743
|
+
scanner = ::PointBlank::Parsing::StackScanner.new(child)
|
|
1744
|
+
scanner.scan
|
|
1745
|
+
block.content = ""
|
|
1746
|
+
child.each { |c| block.append_child(c) }
|
|
1747
|
+
end
|
|
1748
|
+
output.temp_children.each { |block| parse_inner.call(block) }
|
|
1749
|
+
output.temp_children.clear
|
|
1750
|
+
output
|
|
1751
|
+
end
|
|
1752
|
+
end
|
|
1753
|
+
|
|
1754
|
+
# Paragraph in a document (separated by 2 newlines)
|
|
1755
|
+
class Paragraph < DOMObject
|
|
1756
|
+
define_parser ::PointBlank::Parsing::ParagraphParser
|
|
1757
|
+
define_overlay ::PointBlank::Parsing::ParagraphUnderlineOverlay, 0
|
|
1758
|
+
define_overlay ::PointBlank::Parsing::LinkReferenceOverlay
|
|
1759
|
+
|
|
1760
|
+
# Virtual hook to delay inline processing
|
|
1761
|
+
def parse_inner
|
|
1762
|
+
self.content = content.strip if content
|
|
1763
|
+
root.append_temp_child(self)
|
|
1764
|
+
end
|
|
1765
|
+
end
|
|
1766
|
+
|
|
1767
|
+
# Heading level 1
|
|
1768
|
+
class SetextHeading1 < LeafBlock
|
|
1769
|
+
define_parser ::PointBlank::Parsing::SetextParserLV1
|
|
1770
|
+
end
|
|
1771
|
+
|
|
1772
|
+
# Heading level 2
|
|
1773
|
+
class SetextHeading2 < SetextHeading1
|
|
1774
|
+
define_parser ::PointBlank::Parsing::SetextParserLV2
|
|
1775
|
+
end
|
|
1776
|
+
|
|
1777
|
+
# Heading level 1
|
|
1778
|
+
class ATXHeading1 < LeafBlock
|
|
1779
|
+
define_parser ::PointBlank::Parsing::ATXParserLV1
|
|
1780
|
+
end
|
|
1781
|
+
|
|
1782
|
+
# Heading level 2
|
|
1783
|
+
class ATXHeading2 < ATXHeading1
|
|
1784
|
+
define_parser ::PointBlank::Parsing::ATXParserLV2
|
|
1785
|
+
end
|
|
1786
|
+
|
|
1787
|
+
# Heading level 3
|
|
1788
|
+
class ATXHeading3 < ATXHeading1
|
|
1789
|
+
define_parser ::PointBlank::Parsing::ATXParserLV3
|
|
1790
|
+
end
|
|
1791
|
+
|
|
1792
|
+
# Heading level 4
|
|
1793
|
+
class ATXHeading4 < ATXHeading1
|
|
1794
|
+
define_parser ::PointBlank::Parsing::ATXParserLV4
|
|
1795
|
+
end
|
|
1796
|
+
|
|
1797
|
+
# Heading level 5
|
|
1798
|
+
class ATXHeading5 < ATXHeading1
|
|
1799
|
+
define_parser ::PointBlank::Parsing::ATXParserLV5
|
|
1800
|
+
end
|
|
1801
|
+
|
|
1802
|
+
# Heading level 6
|
|
1803
|
+
class ATXHeading6 < ATXHeading1
|
|
1804
|
+
define_parser ::PointBlank::Parsing::ATXParserLV6
|
|
1805
|
+
end
|
|
1806
|
+
|
|
1807
|
+
# Preformatted fenced code block
|
|
1808
|
+
class CodeBlock < LeafLiteralBlock
|
|
1809
|
+
define_parser ::PointBlank::Parsing::FencedCodeBlock
|
|
1810
|
+
end
|
|
1811
|
+
|
|
1812
|
+
# Quote block
|
|
1813
|
+
class QuoteBlock < Block
|
|
1814
|
+
end
|
|
1815
|
+
|
|
1816
|
+
# Unordered list element
|
|
1817
|
+
class ULListElement < Block
|
|
1818
|
+
end
|
|
1819
|
+
|
|
1820
|
+
# Ordered list element
|
|
1821
|
+
class OLListElement < Block
|
|
1822
|
+
end
|
|
1823
|
+
|
|
1824
|
+
# Unordered list
|
|
1825
|
+
class ULBlock < DOMObject
|
|
1826
|
+
define_scanner ::PointBlank::Parsing::LineScanner
|
|
1827
|
+
define_parser ::PointBlank::Parsing::ULParser
|
|
1828
|
+
define_child ::PointBlank::DOM::ULListElement
|
|
1829
|
+
end
|
|
1830
|
+
|
|
1831
|
+
# Ordered list block
|
|
1832
|
+
class OLBlock < DOMObject
|
|
1833
|
+
define_scanner ::PointBlank::Parsing::LineScanner
|
|
1834
|
+
define_parser ::PointBlank::Parsing::ULParser
|
|
1835
|
+
define_child ::PointBlank::DOM::OLListElement
|
|
1836
|
+
end
|
|
1837
|
+
|
|
1838
|
+
# Indent block
|
|
1839
|
+
class IndentBlock < LeafLiteralBlock
|
|
1840
|
+
define_parser ::PointBlank::Parsing::IndentedBlock
|
|
1841
|
+
end
|
|
1842
|
+
|
|
1843
|
+
# Horizontal rule
|
|
1844
|
+
class HorizontalRule < DOMObject
|
|
1845
|
+
define_parser ::PointBlank::Parsing::ThematicBreakParser
|
|
1846
|
+
end
|
|
1847
|
+
|
|
1848
|
+
# Block root (real)
|
|
1849
|
+
Block.class_eval do
|
|
1850
|
+
define_scanner ::PointBlank::Parsing::LineScanner
|
|
1851
|
+
define_parser ::PointBlank::Parsing::NullParser
|
|
1852
|
+
define_child ::PointBlank::DOM::IndentBlock, 9999
|
|
1853
|
+
define_child ::PointBlank::DOM::Paragraph, 9998
|
|
1854
|
+
define_child ::PointBlank::DOM::ATXHeading1, 600
|
|
1855
|
+
define_child ::PointBlank::DOM::ATXHeading2, 600
|
|
1856
|
+
define_child ::PointBlank::DOM::ATXHeading3, 600
|
|
1857
|
+
define_child ::PointBlank::DOM::ATXHeading4, 600
|
|
1858
|
+
define_child ::PointBlank::DOM::ATXHeading5, 600
|
|
1859
|
+
define_child ::PointBlank::DOM::ATXHeading6, 600
|
|
1860
|
+
define_child ::PointBlank::DOM::QuoteBlock, 600
|
|
1861
|
+
define_child ::PointBlank::DOM::ULBlock, 700
|
|
1862
|
+
define_child ::PointBlank::DOM::OLBlock, 700
|
|
1863
|
+
define_child ::PointBlank::DOM::CodeBlock, 600
|
|
1864
|
+
define_child ::PointBlank::DOM::HorizontalRule, 300
|
|
1865
|
+
sort_children
|
|
1866
|
+
end
|
|
1867
|
+
|
|
1868
|
+
Paragraph.class_eval do
|
|
1869
|
+
define_child ::PointBlank::DOM::SetextHeading1, 1
|
|
1870
|
+
define_child ::PointBlank::DOM::SetextHeading2, 2
|
|
1871
|
+
end
|
|
1872
|
+
|
|
1873
|
+
Block.subclasses.each(&:upsource)
|
|
1874
|
+
|
|
1875
|
+
QuoteBlock.class_eval do
|
|
1876
|
+
define_parser ::PointBlank::Parsing::QuoteParser
|
|
1877
|
+
end
|
|
1878
|
+
|
|
1879
|
+
ULBlock.class_eval do
|
|
1880
|
+
define_parser ::PointBlank::Parsing::ULParser
|
|
1881
|
+
end
|
|
1882
|
+
|
|
1883
|
+
ULListElement.class_eval do
|
|
1884
|
+
define_parser ::PointBlank::Parsing::ULElementParser
|
|
1885
|
+
end
|
|
1886
|
+
|
|
1887
|
+
OLBlock.class_eval do
|
|
1888
|
+
define_parser ::PointBlank::Parsing::OLParser
|
|
1889
|
+
end
|
|
1890
|
+
|
|
1891
|
+
OLListElement.class_eval do
|
|
1892
|
+
define_parser ::PointBlank::Parsing::OLElementParser
|
|
1893
|
+
end
|
|
1894
|
+
end
|
|
1895
|
+
end
|