mmmd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1895 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'util'
4
+ require 'uri'
5
+
6
+ # Modular, extensible approach to parsing markdown as close as
7
+ # it gets to CommonMark spec (as of version 0.31.2).
8
+ module PointBlank
9
+ module Parsing
10
+ # Shared methods for parsing links
11
+ module LinkSharedMethods
12
+ # Normalize a label
13
+ # @param string [String]
14
+ # @return [String]
15
+ def normalize_label(string)
16
+ string = string.downcase(:fold).strip.gsub(/\s+/, " ")
17
+ return nil if string.empty?
18
+
19
+ string
20
+ end
21
+
22
+ # Read link label.
23
+ # Returns matched label or nil, and remainder of the string
24
+ # @param text [String]
25
+ # @return [Array(<String, nil>, String)]
26
+ def read_return_label(text)
27
+ prev = text
28
+ label = ""
29
+ return nil, text unless text.start_with?('[')
30
+
31
+ bracketcount = 0
32
+ text.split(/(?<!\\)([\[\]])/).each do |part|
33
+ if part == '['
34
+ bracketcount += 1
35
+ elsif part == ']'
36
+ bracketcount -= 1
37
+ break (label += part) if bracketcount.zero?
38
+ end
39
+ label += part
40
+ end
41
+ return [nil, text] unless bracketcount.zero?
42
+
43
+ text = text.delete_prefix(label)
44
+ label = normalize_label(label[1..-2])
45
+ label ? [label, text] : [nil, prev]
46
+ end
47
+
48
+ # Read link label.
49
+ # Returns matched label or nil, and remainder of the string
50
+ # @param text [String]
51
+ # @return [Array(<String, nil>, String)]
52
+ def read_label(text)
53
+ prev = text
54
+ label = ""
55
+ return nil, text unless text.start_with?('[')
56
+
57
+ bracketcount = 0
58
+ text.split(/(?<!\\)([\[\]])/).each do |part|
59
+ if part == '['
60
+ bracketcount += 1
61
+ elsif part == ']'
62
+ bracketcount -= 1
63
+ break (label += part) if bracketcount.zero?
64
+ end
65
+ label += part
66
+ end
67
+ text = text.delete_prefix(label)
68
+ label = normalize_label(label[1..-2])
69
+ text.start_with?(':') && label ? [label, text[1..].lstrip] : [nil, prev]
70
+ end
71
+
72
+ # Read link destination (URI).
73
+ # Returns matched label or nil, and remainder of the string
74
+ # @param text [String]
75
+ # @return [Array(<String, nil>, String)]
76
+ def read_destination(text)
77
+ if (result = text.match(/\A<.*?(?<![^\\]\\)>/m)) &&
78
+ !result[0][1..].match?(/(?<![^\\]\\)</)
79
+ [process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]),
80
+ text.delete_prefix(result[0]).lstrip]
81
+ elsif (result = text.match(/\A\S+/)) &&
82
+ !result[0].start_with?('<') &&
83
+ result &&
84
+ balanced?(result[0])
85
+ [process_destination(result[0]),
86
+ text.delete_prefix(result[0]).lstrip]
87
+ else
88
+ [nil, text]
89
+ end
90
+ end
91
+
92
+ # Read link title.
93
+ # Returns matched label or nil, and remainder of the string
94
+ # @param text [String]
95
+ # @return [Array(<String, nil>, String)]
96
+ def read_title(text)
97
+ if text.start_with?("'") &&
98
+ (result = text.match(/\A'.*?(?<!\\)'/m))
99
+ [process_title(result[0][1..-2]),
100
+ text.delete_prefix(result[0]).lstrip]
101
+ elsif text.start_with?('"') &&
102
+ (result = text.match(/\A".*?(?<!\\)"/m))
103
+ [process_title(result[0][1..-2]),
104
+ text.delete_prefix(result[0]).lstrip]
105
+ elsif text.start_with?('(') &&
106
+ (result = find_balanced_end(text))
107
+ [process_title(text[1..(result - 1)]),
108
+ text.delete_prefix(text[..result]).lstrip]
109
+ else
110
+ [nil, text]
111
+ end
112
+ end
113
+
114
+ # Read link properties.
115
+ # Returns matched parameters as hash or nil, and remainder of the string
116
+ # @param text [String]
117
+ # @return [Array([Hash, nil], String)]
118
+ def read_properties(text)
119
+ properties = {}
120
+ remaining = text
121
+ if text.start_with? '[' # link label
122
+ properties[:label], remaining = read_return_label(remaining)
123
+ close_bracket = false
124
+ elsif text.start_with? '(' # link properties
125
+ destination, remaining = read_destination(remaining[1..])
126
+ return [nil, text] unless destination
127
+
128
+ title, remaining = read_title(remaining)
129
+ properties[:uri] = destination
130
+ properties[:title] = title
131
+ close_bracket = true
132
+ end
133
+ if properties.empty? || (close_bracket && !remaining.start_with?(')'))
134
+ [nil, text]
135
+ else
136
+ [properties, close_bracket ? remaining[1..] : remaining]
137
+ end
138
+ end
139
+
140
+ # Check if brackets are balanced
141
+ # @param text [String]
142
+ # @return [Boolean]
143
+ def balanced?(text)
144
+ bracketcount = 0
145
+ text.split(/(?<!\\)([()])/).each do |part|
146
+ if part == '('
147
+ bracketcount += 1
148
+ elsif part == ')'
149
+ bracketcount -= 1
150
+ return false if bracketcount.negative?
151
+ end
152
+ end
153
+ bracketcount.zero?
154
+ end
155
+
156
+ # Find index at which balanced part of a bracket closes
157
+ # @param text [String]
158
+ # @return [Integer, nil]
159
+ def find_balanced_end(text)
160
+ bracketcount = 0
161
+ index = 0
162
+ text.split(/(?<!\\)([()])/).each do |part|
163
+ if part == '('
164
+ bracketcount += 1
165
+ elsif part == ')'
166
+ bracketcount -= 1
167
+ return index if bracketcount.zero?
168
+ end
169
+ index += part.length
170
+ end
171
+ nil
172
+ end
173
+
174
+ # Process destination string
175
+ # @param string [String]
176
+ # @return [String]
177
+ def process_destination(string)
178
+ string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
179
+ '\\1')
180
+ string = string.gsub("\n", " ")
181
+ MMMD::EntityUtils.encode_uri(
182
+ MMMD::EntityUtils.decode_entities(string)
183
+ )
184
+ end
185
+
186
+ # Process title string
187
+ # @param string [String]
188
+ # @return [String]
189
+ def process_title(string)
190
+ string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
191
+ '\\1')
192
+ string = string.gsub("\n", " ")
193
+ MMMD::EntityUtils.decode_entities(string)
194
+ end
195
+ end
196
+
197
+ class LineScanner
198
+ def initialize(text, doc)
199
+ @text = text
200
+ @document = doc
201
+ @stack = [@document]
202
+ @depth = 0
203
+ @topdepth = 0
204
+ end
205
+
206
+ # Scan document and return scanned structure
207
+ def scan
208
+ @text.each_line do |line|
209
+ # Consume markers from lines to keep the levels open
210
+ line = consume_markers(line)
211
+ # DO NOT RHEDEEM line if it's empty
212
+ line = line&.strip&.empty? ? nil : line
213
+ # Open up a new block on the line out of all allowed child types
214
+ while line && (status, line = try_open(line)) && status; end
215
+ end
216
+ close_up(0)
217
+ @stack.first
218
+ end
219
+
220
+ private
221
+
222
+ # Try to open a new block on the line
223
+ def try_open(line)
224
+ return [false, line] unless topclass.parser && line
225
+ return [false, line] unless [nil, self.class].include? topclass.scanner
226
+
227
+ topclass.valid_children.each do |cand|
228
+ next unless cand.parser.begin?(line)
229
+
230
+ @depth += 1
231
+ @topdepth = @depth if @topdepth < @depth
232
+ @stack[@depth] = cand.new
233
+ @stack[@depth - 1].append_child(toplevel)
234
+ toplevel.parser = cand.parser.new
235
+ line, _implicit = toplevel.parser.consume(line, @stack[@depth - 1])
236
+ return [true, line]
237
+ end
238
+ [false, line]
239
+ end
240
+
241
+ # Attempt to consume markers for all valid stack elements
242
+ def consume_markers(line)
243
+ climb = -1
244
+ previous = nil
245
+ implicit = nil
246
+ @stack[..@depth].each do |element|
247
+ newline, impl = element.parser.consume(line, previous)
248
+ implicit = impl unless impl.nil?
249
+ line = newline if newline
250
+ break unless newline
251
+
252
+ climb += 1
253
+ previous = element
254
+ end
255
+ if climb < @depth
256
+ if implicit && @stack[@topdepth].is_a?(::PointBlank::DOM::Paragraph)
257
+ backref = @stack[@topdepth]
258
+ remaining, = backref.parser.consume(line, previous, lazy: true)
259
+ return nil if remaining
260
+ end
261
+ close_up(climb)
262
+ end
263
+ line
264
+ end
265
+
266
+ # Close upper levels than picked level
267
+ def close_up(level)
268
+ ((level + 1)..(@stack.length - 1)).each do |index|
269
+ x = @stack[index]
270
+ x.content = x.parser.parsed_content
271
+ x.parser.applyprops(x) if x.parser.respond_to? :applyprops
272
+ switch = x.parser.close(x)
273
+ x.parser = nil
274
+ x = transfer(x, switch) if switch
275
+ x.parse_inner if x.respond_to? :parse_inner
276
+ end
277
+ @topdepth = @depth = level
278
+ @stack = @stack[..level]
279
+ end
280
+
281
+ # Transfer data from class to another class (morph class)
282
+ def transfer(block, switchclass)
283
+ newblock = switchclass.new
284
+ newblock.content = block.content
285
+ newblock.parser = nil
286
+ newblock.parent = block.parent
287
+ block.parent[block.position] = newblock
288
+ newblock
289
+ end
290
+
291
+ # Get top level element at the current moment
292
+ def toplevel
293
+ @stack[@depth]
294
+ end
295
+
296
+ # Get top level element class
297
+ def topclass
298
+ @stack[@depth].class
299
+ end
300
+
301
+ # Debug ifno
302
+ def debug(line)
303
+ warn "#{@depth}:#{@topdepth} #{line.inspect}"
304
+ end
305
+ end
306
+
307
+ # Null parser
308
+ class NullParser
309
+ class << self
310
+ attr_accessor :parser_for
311
+
312
+ # Check that a parser parses this line as a beginning of a block
313
+ # @param line [String]
314
+ # @return [Boolean]
315
+ def begin?(_line)
316
+ false
317
+ end
318
+ end
319
+
320
+ # Instantiate a new parser object
321
+ def initialize
322
+ @buffer = []
323
+ end
324
+
325
+ # Close parser
326
+ # @param block [::PointBlank::DOM::DOMObject]
327
+ # @return [nil, Class]
328
+ def close(block, lazy: false)
329
+ block.class.valid_overlays.each do |overlay_class|
330
+ overlay = overlay_class.new
331
+ output = overlay.process(block, lazy: lazy)
332
+ return output if output
333
+ end
334
+ nil
335
+ end
336
+
337
+ # Return parsed content
338
+ # @return [String]
339
+ def parsed_content
340
+ @buffer.join('')
341
+ end
342
+
343
+ # Consume line markers
344
+ # @param line [String]
345
+ # @return [Array(String, Boolean)]
346
+ def consume(line, _parent = nil, **_hargs)
347
+ [line, false]
348
+ end
349
+
350
+ private
351
+
352
+ # Push a new parsed line
353
+ # @param line [String]
354
+ # @return [void]
355
+ def push(line)
356
+ @buffer.append(line)
357
+ end
358
+ end
359
+
360
+ # Paragraph parser
361
+ class ParagraphParser < NullParser
362
+ # (see ::PointBlank::Parsing::NullParser#begin?)
363
+ def self.begin?(line)
364
+ line.match?(/\A {0,3}\S/)
365
+ end
366
+
367
+ # (see ::PointBlank::Parsing::NullParser#consume)
368
+ def consume(line, parent = nil, lazy: false)
369
+ @lazy_triggered = lazy || @lazy_triggered
370
+ return [nil, nil] if line.match?(/\A {0,3}\Z/)
371
+ return [nil, nil] if @closed
372
+ return [nil, nil] if check_candidates(line, parent)
373
+
374
+ push(line)
375
+ ["", nil]
376
+ end
377
+
378
+ # (see ::PointBlank::Parsing::NullParser#close)
379
+ def close(block, **_lazy)
380
+ super(block, lazy: @lazy_triggered)
381
+ end
382
+
383
+ private
384
+
385
+ # Check that there are no other candidates for line beginning
386
+ def check_candidates(line, parent)
387
+ return false unless parent
388
+
389
+ classes = parent.class.valid_children
390
+ once = false
391
+ other = classes.filter do |cls|
392
+ !(once ||= (cls == ::PointBlank::DOM::Paragraph))
393
+ end
394
+ underlines_match = ::PointBlank::DOM::Paragraph.valid_children.any? do |x|
395
+ x.parser.begin?(line)
396
+ end
397
+ if underlines_match && !@lazy_triggered
398
+ @closed = true
399
+ return false
400
+ end
401
+ other.any? do |x|
402
+ x.parser.begin? line
403
+ end
404
+ end
405
+ end
406
+
407
+ # ATX heading
408
+ # @abstract
409
+ class ATXParser < NullParser
410
+ class << self
411
+ attr_accessor :level
412
+
413
+ # (see ::PointBlank::Parsing::NullParser#begin?)
414
+ def begin?(line)
415
+ line.match?(/^ {0,3}\#{#{@level}}(?: .*|)$/)
416
+ end
417
+ end
418
+
419
+ def initialize
420
+ super
421
+ @matched = false
422
+ end
423
+
424
+ # (see ::PointBlank::Parsing::NullParser#consume)
425
+ def consume(line, _parent, **_hargs)
426
+ return [nil, false] if @matched
427
+
428
+ @matched = true
429
+ push(line
430
+ .gsub(/\A {0,3}\#{#{self.class.level}} */, '')
431
+ .gsub(/( #+|)\Z/, ''))
432
+ [line, false]
433
+ end
434
+ end
435
+
436
+ # ATX heading level 1
437
+ class ATXParserLV1 < ATXParser
438
+ self.level = 1
439
+ end
440
+
441
+ # ATX heading level 2
442
+ class ATXParserLV2 < ATXParser
443
+ self.level = 2
444
+ end
445
+
446
+ # ATX heading level 3
447
+ class ATXParserLV3 < ATXParser
448
+ self.level = 3
449
+ end
450
+
451
+ # ATX heading level 4
452
+ class ATXParserLV4 < ATXParser
453
+ self.level = 4
454
+ end
455
+
456
+ # ATX heading level 5
457
+ class ATXParserLV5 < ATXParser
458
+ self.level = 5
459
+ end
460
+
461
+ # ATX heading level 6
462
+ class ATXParserLV6 < ATXParser
463
+ self.level = 6
464
+ end
465
+
466
+ # Underline parser
467
+ # @abstract
468
+ class UnderlineParser < NullParser
469
+ # Checks whether a paragraph underline is on this line.
470
+ # Should match an entire underline.
471
+ # @param line [String]
472
+ # @return [boolean]
473
+ def self.begin?(_line)
474
+ false
475
+ end
476
+ end
477
+
478
+ # Setext parser level 1
479
+ class SetextParserLV1 < UnderlineParser
480
+ # (see ::PointBlank::Parsing::UnderlineParser)
481
+ def self.begin?(line)
482
+ line.match?(/\A {0,3}={3,}\s*\z/)
483
+ end
484
+ end
485
+
486
+ # Setext parser level 2
487
+ class SetextParserLV2 < UnderlineParser
488
+ # (see ::PointBlank::Parsing::UnderlineParser)
489
+ def self.begin?(line)
490
+ line.match?(/\A {0,3}-{3,}\s*\z/)
491
+ end
492
+ end
493
+
494
+ # Unordered list block (group)
495
+ class ULParser < NullParser
496
+ # (see ::PointBlank::Parsing::NullParser#begin?)
497
+ def self.begin?(line)
498
+ line.match?(/\A {0,3}([-+*])(\s+)/)
499
+ end
500
+
501
+ # (see ::PointBlank::Parsing::NullParser#close)
502
+ def applyprops(block)
503
+ block.each do |child|
504
+ child.properties["marker"] = @marker[-1]
505
+ end
506
+ end
507
+
508
+ # (see ::PointBlank::Parsing::NullParser#consume)
509
+ def consume(line, _parent = nil, **_hargs)
510
+ self.open(line)
511
+ return [nil, true] unless continues?(line)
512
+
513
+ [normalize(line), true]
514
+ end
515
+
516
+ attr_reader :preoff
517
+
518
+ private
519
+
520
+ # Open block if it hasn't been opened yet
521
+ def open(line)
522
+ return if @open
523
+
524
+ preoff, mark, off = line.match(/\A( {0,3})([-+*])(\s+)/)&.captures
525
+ return unless mark
526
+
527
+ @preoff = preoff
528
+ @marker ||= ['+', '*'].include?(mark) ? "\\#{mark}" : mark
529
+ @offset = off
530
+ @open = true
531
+ end
532
+
533
+ # Check if a line continues this ULParser block
534
+ def continues?(line)
535
+ return false if ::PointBlank::Parsing::ThematicBreakParser.begin?(line)
536
+
537
+ line.start_with?(/\A(?: {0,3}#{@marker}| )#{@offset}/) ||
538
+ line.strip.empty?
539
+ end
540
+
541
+ # Strip off pre-marker offset
542
+ def normalize(line)
543
+ line.delete_prefix(@preoff)
544
+ end
545
+ end
546
+
547
+ # Unorder list block (element)
548
+ class ULElementParser < NullParser
549
+ # (see ::PointBlank::Parsing::NullParser#begin?)
550
+ def self.begin?(line)
551
+ line.match?(/\A {0,3}([-+*])(\s+)/)
552
+ end
553
+
554
+ # (see ::PointBlank::Parsing::NullParser#consume)
555
+ def consume(line, parent = nil, **_hargs)
556
+ @parent ||= parent
557
+ return [nil, true] unless continues?(line)
558
+
559
+ self.open(line)
560
+
561
+ [normalize(line), true]
562
+ end
563
+
564
+ private
565
+
566
+ # Open block if it hasn't been opened yet
567
+ def open(line)
568
+ return if @open
569
+
570
+ @marker, @offset = line.match(/\A {0,3}([-+*])(\s+)/)&.captures
571
+ @marker = "\\#{@marker}" if ['+', '*'].include? @marker
572
+ @open = true
573
+ end
574
+
575
+ # Check if a line continues this ULParser block
576
+ def continues?(line)
577
+ return true unless @marker
578
+
579
+ line.start_with?(/\A\s#{@offset}/) ||
580
+ line.strip.empty?
581
+ end
582
+
583
+ # Normalize the line
584
+ def normalize(line)
585
+ if !@opening_stripped
586
+ @opening_stripped = true
587
+ line.gsub(/\A(?: {0,3}#{@marker}| )#{@offset}/, '')
588
+ else
589
+ line.gsub(/\A\s#{@offset}/, '')
590
+ end
591
+ end
592
+ end
593
+
594
+ # Ordered list block (group)
595
+ class OLParser < NullParser
596
+ # (see ::PointBlank::Parsing::NullParser#begin?)
597
+ def self.begin?(line)
598
+ line.match?(/\A {0,3}(\d+)([).])(\s+)/)
599
+ end
600
+
601
+ # (see ::PointBlank::Parsing::NullParser#close)
602
+ def applyprops(block)
603
+ block.each do |child|
604
+ child.properties["marker"] = @mark[-1]
605
+ end
606
+ end
607
+
608
+ # (see ::PointBlank::Parsing::NullParser#consume)
609
+ def consume(line, _parent = nil, **_hargs)
610
+ self.open(line)
611
+ return [nil, true] unless continues?(line)
612
+
613
+ [normalize(line), true]
614
+ end
615
+
616
+ private
617
+
618
+ # Open block if it hasn't been opened yet
619
+ def open(line)
620
+ return if @open
621
+
622
+ pre, num, marker, off = line.match(/\A( {0,3})(\d+)([).])(\s+)/)
623
+ &.captures
624
+ return unless marker
625
+
626
+ @preoff = pre
627
+ @num = " " * (num.length + 1)
628
+ @mark ||= "\\#{marker}"
629
+ @offset = off
630
+ @open = true
631
+ end
632
+
633
+ # Check if a line continues this ULParser block
634
+ def continues?(line)
635
+ return false if ::PointBlank::Parsing::ThematicBreakParser.begin?(line)
636
+
637
+ line.start_with?(/\A(?: {0,3}(\d+)#{@mark}|#{@num})#{@offset}/) ||
638
+ line.strip.empty?
639
+ end
640
+
641
+ # Strip off pre-marker offset
642
+ def normalize(line)
643
+ line.delete_prefix(@preoff)
644
+ end
645
+ end
646
+
647
+ # Unorder list block (element)
648
+ class OLElementParser < NullParser
649
+ # (see ::PointBlank::Parsing::NullParser#begin?)
650
+ def self.begin?(line)
651
+ line.match?(/\A {0,3}(\d+)([).])(\s+)/)
652
+ end
653
+
654
+ # (see ::PointBlank::Parsing::NullParser#consume)
655
+ def consume(line, _parent = nil, **_hargs)
656
+ return [nil, true] unless continues?(line)
657
+
658
+ self.open(line)
659
+
660
+ [normalize(line), true]
661
+ end
662
+
663
+ # (see ::PointBlank::Parsing::NullParser#applyprops)
664
+ def applyprops(block)
665
+ block.properties["number"] = @num.to_i
666
+ end
667
+
668
+ private
669
+
670
+ # Open block if it hasn't been opened yet
671
+ def open(line)
672
+ return if @open
673
+
674
+ num, marker, off = line.match(/\A {0,3}(\d+)([).])(\s+)/)
675
+ &.captures
676
+ @num = num
677
+ @numoffset = " " * (@num.length + 1)
678
+ @marker = "\\#{marker}"
679
+ @offset = off
680
+ @open = true
681
+ end
682
+
683
+ # Check if a line continues this ULParser block
684
+ def continues?(line)
685
+ return true unless @marker
686
+
687
+ line.start_with?(/\A#{@numoffset}#{@offset}/) ||
688
+ line.strip.empty?
689
+ end
690
+
691
+ # Normalize the line
692
+ def normalize(line)
693
+ if !@opening_stripped
694
+ @opening_stripped = true
695
+ line.gsub(/\A(?: {0,3}\d+#{@marker}|#{@numoffset})#{@offset}/, '')
696
+ else
697
+ line.gsub(/\A#{@numoffset}#{@offset}/, '')
698
+ end
699
+ end
700
+ end
701
+
702
+ # Quote block
703
+ class QuoteParser < NullParser
704
+ # (see ::PointBlank::Parsing::NullParser#begin?)
705
+ def self.begin?(line)
706
+ line.start_with?(/\A {0,3}>(?: \S|)/)
707
+ end
708
+
709
+ # (see ::PointBlank::Parsing::NullParser#consume)
710
+ def consume(line, _parent = nil, **_hargs)
711
+ return [nil, true] unless line.start_with?(/\A {0,3}>(?: \S|)/)
712
+
713
+ [normalize(line), true]
714
+ end
715
+
716
+ private
717
+
718
+ # Normalize line in quoteblock
719
+ def normalize(line)
720
+ line.gsub(/\A {0,3}> ?/, '')
721
+ end
722
+ end
723
+
724
+ # Fenced code block
725
+ class FencedCodeBlock < NullParser
726
+ # (see ::PointBlank::Parsing::NullParser#begin?)
727
+ def self.begin?(line)
728
+ line.start_with?(/\A {0,3}(?:`{3,}[^`]+$|~{3,}[^~]+$)/)
729
+ end
730
+
731
+ # (see ::PointBlank::Parsing::NullParser#applyprops)
732
+ def applyprops(block)
733
+ block.properties["infoline"] = @infoline
734
+ end
735
+
736
+ # (see ::PointBlank::Parsing::NullParser#consume)
737
+ def consume(line, _parent = nil, **_hargs)
738
+ return [nil, false] if @closed
739
+
740
+ try_close(line)
741
+ push(line.gsub(/^ {0,#{@space}}/, '')) if @open && !@closed
742
+ self.open(line)
743
+ ["", false]
744
+ end
745
+
746
+ private
747
+
748
+ def try_close(line)
749
+ @closed = true if @open && line.match?(/\A {0,3}#{@marker}+$/)
750
+ end
751
+
752
+ def open(line)
753
+ return if @open
754
+
755
+ s, m, @infoline = line.match(/\A( {0,3})(`{3,}|~{3,})(.*)/)[1..3]
756
+ @space = s.length
757
+ @marker = m
758
+ @open = true
759
+ end
760
+ end
761
+
762
+ # Indented code block
763
+ class IndentedBlock < NullParser
764
+ # (see ::PointBlank::Parsing::NullParser#begin?)
765
+ def self.begin?(line)
766
+ line.start_with?(/\A {4}/)
767
+ end
768
+
769
+ # (see ::PointBlank::Parsing::NullParser#consume)
770
+ def consume(line, _parent = nil, **_hargs)
771
+ return [nil, nil] unless self.class.begin?(line) ||
772
+ line.strip.empty?
773
+
774
+ push(normalize(line))
775
+ ["", false]
776
+ end
777
+
778
+ private
779
+
780
+ def normalize(line)
781
+ line.gsub("\A(?: |\t)", '')
782
+ end
783
+ end
784
+
785
+ # Thematic break parser
786
+ class ThematicBreakParser < NullParser
787
+ # (see PointBlank::Parsing::NullParser#begin?)
788
+ def self.begin?(line)
789
+ line.match?(/\A {0,3}(?:[- ]+|[* ]+|[_ ]+)\n/)
790
+ end
791
+
792
+ # (see PointBlank::Parsing::NullParser#consume)
793
+ def consume(_line, _parent = nil, **_hargs)
794
+ return [nil, nil] if @closed
795
+
796
+ @closed = true
797
+ ["", nil]
798
+ end
799
+ end
800
+
801
+ # Class of parsers that process the paragraph after it finished collection
802
+ class NullOverlay < NullParser
803
+ # Stub
804
+ def self.begin?(_line)
805
+ false
806
+ end
807
+
808
+ # Process block after it closed
809
+ # @param block [::PointBlank::DOM::DOMObject]
810
+ # @param lazy [Boolean]
811
+ # @return [nil, Class]
812
+ def process(_block, lazy: false); end
813
+ end
814
+
815
+ # Overlay for processing underline classes of paragraph
816
+ class ParagraphUnderlineOverlay < NullOverlay
817
+ # (see ::PointBlank::Parsing::NullOverlay#process)
818
+ def process(block, lazy: false)
819
+ output = check_underlines(block.content.lines.last, lazy)
820
+ block.content = block.content.lines[0..-2].join("") if output
821
+ output
822
+ end
823
+
824
+ private
825
+
826
+ # Check if the current line is an underline (morphs class)
827
+ def check_underlines(line, lazy)
828
+ return nil if lazy
829
+
830
+ ::PointBlank::DOM::Paragraph.valid_children.each do |underline|
831
+ parser = underline.parser
832
+ next unless parser < ::PointBlank::Parsing::UnderlineParser
833
+ next unless parser.begin? line
834
+
835
+ return underline
836
+ end
837
+ nil
838
+ end
839
+ end
840
+
841
+ # Overlay for link reference definitions
842
+ class LinkReferenceOverlay < NullOverlay
843
+ include LinkSharedMethods
844
+
845
+ def initialize
846
+ super
847
+ @definitions = {}
848
+ end
849
+
850
+ # (see ::PointBlank::Parsing::NullOverlay#process)
851
+ def process(block, **_lazy)
852
+ text = block.content
853
+ loop do
854
+ prev = text
855
+ label, text = read_label(text)
856
+ break prev unless label
857
+
858
+ destination, text = read_destination(text)
859
+ break prev unless destination
860
+
861
+ title, text = read_title(text)
862
+ push_definition(label, destination, title)
863
+ end
864
+ modify(block, text)
865
+ nil
866
+ end
867
+
868
+ private
869
+
870
+ def root(block)
871
+ current_root = block
872
+ current_root = current_root.parent while current_root.parent
873
+ current_root
874
+ end
875
+
876
+ def modify(block, text)
877
+ rootblock = root(block)
878
+ rootblock.properties[:linkdefs] =
879
+ if rootblock.properties[:linkdefs]
880
+ @definitions.merge(rootblock.properties[:linkdefs])
881
+ else
882
+ @definitions.dup
883
+ end
884
+ block.content = text
885
+ end
886
+
887
+ def push_definition(label, uri, title = nil)
888
+ labelname = label.strip.downcase.gsub(/\s+/, ' ')
889
+ return if @definitions[labelname]
890
+
891
+ @definitions[labelname] = {
892
+ uri: uri,
893
+ title: title
894
+ }
895
+ end
896
+ end
897
+
898
+ # Inline scanner
899
+ class StackScanner
900
+ def initialize(doc, init_tokens: nil)
901
+ @doc = doc
902
+ @init_tokens = init_tokens
903
+ end
904
+
905
+ # Scan document
906
+ def scan
907
+ rounds = quantize(@doc.class.unsorted_children)
908
+ tokens = @init_tokens || [@doc.content]
909
+ rounds.each do |valid_parsers|
910
+ @valid_parsers = valid_parsers
911
+ tokens = tokenize(tokens)
912
+ tokens = forward_walk(tokens)
913
+ tokens = reverse_walk(tokens)
914
+ end
915
+ structure = finalize(tokens)
916
+ structure.each { |child| @doc.append_child(child) }
917
+ end
918
+
919
+ private
920
+
921
+ # Finalize structure, concatenate adjacent text parts,
922
+ # transform into Text objects
923
+ # @param parts [Array<String, ::PointBlank::DOM::DOMObject>]
924
+ # @return [Array<::PointBlank::DOM::DOMObject>]
925
+ def finalize(structure)
926
+ structnew = []
927
+ buffer = ""
928
+ structure.each do |block|
929
+ block = block.first if block.is_a? Array
930
+ buffer += block if block.is_a? String
931
+ next if block.is_a? String
932
+
933
+ structnew.append(construct_text(buffer)) unless buffer.empty?
934
+ buffer = ""
935
+ structnew.append(block)
936
+ end
937
+ structnew.append(construct_text(buffer)) unless buffer.empty?
938
+ structnew
939
+ end
940
+
941
+ # Construct text object for a string
942
+ # @param string [String]
943
+ # @return [::PointBlank::DOM::Text]
944
+ def construct_text(string)
945
+ obj = ::PointBlank::DOM::Text.new
946
+ string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
947
+ '\\1')
948
+ string = string.gsub("\n", " ")
949
+ string = MMMD::EntityUtils.decode_entities(string)
950
+ obj.content = string
951
+ obj
952
+ end
953
+
954
+ # Transform text into a list of tokens
955
+ def tokenize(tokens)
956
+ parts = tokens
957
+ @valid_parsers.each do |parser|
958
+ newparts = []
959
+ parts.each do |x|
960
+ if x.is_a? String
961
+ newparts.append(*parser.tokenize(x))
962
+ else
963
+ newparts.append(x)
964
+ end
965
+ end
966
+ parts = newparts
967
+ end
968
+ parts
969
+ end
970
+
971
+ # Process parsed tokens (callback on open, forward search direction)
972
+ def forward_walk(parts)
973
+ parts = parts.dup
974
+ newparts = []
975
+ while (part = parts.shift)
976
+ next newparts.append(part) if part.is_a? String
977
+
978
+ if part[1].respond_to?(:forward_walk) && part.last == :open
979
+ part, parts = part[1].forward_walk([part] + parts)
980
+ end
981
+ newparts.append(part)
982
+ end
983
+ newparts
984
+ end
985
+
986
+ # Process parsed tokens (callback on close, inverse search direction)
987
+ def reverse_walk(parts)
988
+ backlog = []
989
+ parts.each do |part|
990
+ backlog.append(part)
991
+ next unless part.is_a? Array
992
+ next unless part.last == :close
993
+ next unless part[1].respond_to?(:reverse_walk)
994
+
995
+ backlog = part[1].reverse_walk(backlog, doc: @doc)
996
+ end
997
+ backlog
998
+ end
999
+
1000
+ # Quantize valid children
1001
+ def quantize(children)
1002
+ children.group_by(&:last).map { |_, v| v.map(&:first).map(&:parser) }
1003
+ end
1004
+ end
1005
+
1006
+ # Null inline scanner element
1007
+ # @abstract
1008
+ class NullInline
1009
+ class << self
1010
+ attr_accessor :parser_for
1011
+ end
1012
+
1013
+ # Tokenize a string
1014
+ # @param string [String]
1015
+ # @return [Array<Array(String, Class, Symbol), String>]
1016
+ def self.tokenize(string)
1017
+ [string]
1018
+ end
1019
+
1020
+ # @!method self.reverse_walk(backlog)
1021
+ # Reverse-walk the backlog and construct a valid element from it
1022
+ # @param backlog [Array<Array(String, Class, Symbol), String>]
1023
+ # @return [Array<Array(String, Class, Symbol), String>]
1024
+
1025
+ # @!method self.forward_walk(backlog)
1026
+ # Forward-walk the backlog starting from the current valid element
1027
+ # @param backlog [Array<Array(String, Class, Symbol), String>]
1028
+ # @return [Array<Array(String, Class, Symbol), String>]
1029
+
1030
+ # Check that the symbol at this index is not escaped
1031
+ # @param index [Integer]
1032
+ # @param string [String]
1033
+ # @return [nil, Integer]
1034
+ def self.check_unescaped(index, string)
1035
+ return index if index.zero?
1036
+
1037
+ count = 0
1038
+ index -= 1
1039
+ while index >= 0 && string[index] == "\\"
1040
+ count += 1
1041
+ index -= 1
1042
+ end
1043
+ (count % 2).zero?
1044
+ end
1045
+
1046
+ # Find the first occurence of an unescaped pattern
1047
+ # @param string [String]
1048
+ # @param pattern [Regexp, String]
1049
+ # @return [Integer, nil]
1050
+ def self.find_unescaped(string, pattern)
1051
+ initial = 0
1052
+ while (index = string.index(pattern, initial))
1053
+ return index if check_unescaped(index, string)
1054
+
1055
+ initial = index + 1
1056
+ end
1057
+ nil
1058
+ end
1059
+
1060
+ # Iterate over every string/unescaped token part
1061
+ # @param string [String]
1062
+ # @param pattern [Regexp]
1063
+ # @param callback [#call]
1064
+ # @return [Array<String, Array(String, Class, Symbol)>]
1065
+ def self.iterate_tokens(string, pattern, &filter)
1066
+ tokens = []
1067
+ initial = 0
1068
+ while (index = string.index(pattern, initial))
1069
+ prefix = (index.zero? ? nil : string[initial..(index - 1)])
1070
+ tokens.append(prefix) if prefix
1071
+ unescaped = check_unescaped(index, string)
1072
+ match = filter.call(index.positive? ? string[..(index - 1)] : "",
1073
+ string[index..],
1074
+ unescaped)
1075
+ tokens.append(match)
1076
+ match = match.first if match.is_a? Array
1077
+ initial = index + match.length
1078
+ end
1079
+ remaining = string[initial..] || ""
1080
+ tokens.append(remaining) unless remaining.empty?
1081
+ tokens
1082
+ end
1083
+
1084
+ # Build child
1085
+ # @param children [Array]
1086
+ # @return [::PointBlank::DOM::DOMObject]
1087
+ def self.build(children)
1088
+ obj = parser_for.new
1089
+ if parser_for.valid_children.empty?
1090
+ children.each do |child|
1091
+ child = child.first if child.is_a? Array
1092
+ child = construct_text(child) if child.is_a? String
1093
+ obj.append_child(child)
1094
+ end
1095
+ else
1096
+ tokens = children.map do |child|
1097
+ child.is_a?(Array) ? child.first : child
1098
+ end
1099
+ scanner = StackScanner.new(obj, init_tokens: tokens)
1100
+ scanner.scan
1101
+ end
1102
+ obj
1103
+ end
1104
+
1105
+ # Construct text object for a string
1106
+ # @param string [String]
1107
+ # @return [::PointBlank::DOM::Text]
1108
+ def self.construct_text(string)
1109
+ obj = ::PointBlank::DOM::Text.new
1110
+ string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
1111
+ '\\1')
1112
+ string = string.gsub("\n", " ")
1113
+ string = MMMD::EntityUtils.decode_entities(string)
1114
+ obj.content = string
1115
+ obj
1116
+ end
1117
+
1118
+ # Construct text literal for a string
1119
+ # @param string [String]
1120
+ # @return [::PointBlank::DOM::Text]
1121
+ def self.construct_literal(string)
1122
+ obj = ::PointBlank::DOM::Text.new
1123
+ obj.content = string
1124
+ obj
1125
+ end
1126
+
1127
+ # Check that contents can be contained within this element
1128
+ # @param elements [Array<String, Array(String, Class, Symbol)>]
1129
+ # @return [Boolean]
1130
+ def self.check_contents(elements)
1131
+ elements.each do |element|
1132
+ next unless element.is_a? ::PointBlank::DOM::DOMObject
1133
+ next if parser_for.valid_children.include? element.class
1134
+
1135
+ return false
1136
+ end
1137
+ true
1138
+ end
1139
+ end
1140
+
1141
+ # Code inline parser
1142
+ class CodeInline < NullInline
1143
+ # (see ::PointBlank::Parsing::NullInline#tokenize)
1144
+ def self.tokenize(string)
1145
+ open = {}
1146
+ iterate_tokens(string, "`") do |_before, current_text, matched|
1147
+ if matched
1148
+ match = current_text.match(/^`+/)[0]
1149
+ if open[match]
1150
+ open[match] = nil
1151
+ [match, self, :close]
1152
+ else
1153
+ open[match] = true
1154
+ [match, self, :open]
1155
+ end
1156
+ else
1157
+ current_text[0]
1158
+ end
1159
+ end
1160
+ end
1161
+
1162
+ # TODO: optimize, buffer only after walking
1163
+ # (see ::PointBlank::Parsing::NullInline#forward_walk)
1164
+ def self.forward_walk(parts)
1165
+ buffer = ""
1166
+ opening = parts.first.first
1167
+ cutoff = 0
1168
+ parts.each_with_index do |part, idx|
1169
+ text = (part.is_a?(Array) ? part.first : part)
1170
+ buffer += text
1171
+ next unless part.is_a? Array
1172
+
1173
+ break (cutoff = idx) if part.first == opening &&
1174
+ part.last == :close
1175
+ end
1176
+ buffer = construct_literal(buffer[opening.length..(-1 - opening.length)])
1177
+ [cutoff.positive? ? build([buffer]) : opening, parts[(cutoff + 1)..]]
1178
+ end
1179
+ end
1180
+
1181
+ # Autolink inline parser
1182
+ class AutolinkInline < NullInline
1183
+ # (see ::PointBlank::Parsing::NullInline#tokenize)
1184
+ def self.tokenize(string)
1185
+ iterate_tokens(string, /[<>]/) do |_before, current_text, matched|
1186
+ if matched
1187
+ if current_text.start_with?("<")
1188
+ ["<", self, :open]
1189
+ else
1190
+ [">", self, :close]
1191
+ end
1192
+ else
1193
+ current_text[0]
1194
+ end
1195
+ end
1196
+ end
1197
+
1198
+ # TODO: optimize, buffer only after walking
1199
+ # (see ::PointBlank::Parsing::NullInline#forward_walk
1200
+ def self.forward_walk(parts)
1201
+ buffer = ""
1202
+ cutoff = 0
1203
+ parts.each_with_index do |part, idx|
1204
+ text = (part.is_a?(Array) ? part.first : part)
1205
+ buffer += text
1206
+ next unless part.is_a? Array
1207
+
1208
+ break (cutoff = idx) if part.first == ">" && part.last == :close
1209
+ end
1210
+ return '<', parts[1..] unless buffer.match?(/^<[\w\-_+]+:[^<>\s]+>$/)
1211
+
1212
+ obj = build([buffer[1..-2]])
1213
+ obj.properties[:uri] = MMMD::EntityUtils.encode_uri(buffer[1..-2])
1214
+ [obj, parts[(cutoff + 1)..]]
1215
+ end
1216
+ end
1217
+
1218
+ # Hyperreference inline superclass
1219
+ # @abstract
1220
+ class HyperlinkInline < NullInline
1221
+ # Parse link properties according to given link suffix
1222
+ # @param input [String]
1223
+ # @return [Array(<Hash, String, nil>, String)]
1224
+ def self.parse_linkinfo(input)
1225
+ props, remainder = read_properties(input)
1226
+ return nil, "" unless props
1227
+
1228
+ capture = input[..(input.length - remainder.length - 1)]
1229
+ [props, capture]
1230
+ end
1231
+
1232
+ # Build object and apply link info to it
1233
+ # @param capture [Array<String, Array(String, Class, Symbol)>]
1234
+ # @param doc [::PointBlank::DOM::DOMObject]
1235
+ # @return [::PointBlank::DOM::DOMObject]
1236
+ def self.build_w_linkinfo(capture, doc)
1237
+ linkinfo = capture[-1][2]
1238
+ obj = build(capture[1..-2])
1239
+ if linkinfo[:label]
1240
+ if (props = doc.root.properties[:linkdefs][linkinfo[:label]])
1241
+ linkinfo = props
1242
+ else
1243
+ return nil
1244
+ end
1245
+ end
1246
+ obj.properties = linkinfo
1247
+ obj
1248
+ end
1249
+
1250
+ # TODO: optimize, increase index instead of building buffers
1251
+ # (see ::PointBlank::Parsing::NullInline#reverse_walk)
1252
+ def self.reverse_walk(backlog, doc:)
1253
+ before = []
1254
+ capture = []
1255
+ open = true
1256
+ cls = nil
1257
+ backlog.reverse_each do |block|
1258
+ (open ? capture : before).prepend(block)
1259
+ next unless block.is_a?(Array) && block[1] < self
1260
+
1261
+ open = false
1262
+ cls = block[1]
1263
+ return backlog unless block[1].check_contents(capture)
1264
+ end
1265
+ return backlog if open
1266
+
1267
+ block = cls.build_w_linkinfo(capture, doc)
1268
+ block ? before + [block] : backlog
1269
+ end
1270
+ end
1271
+
1272
+ # Image inline parser
1273
+ class ImageInline < HyperlinkInline
1274
+ class << self
1275
+ include ::PointBlank::Parsing::LinkSharedMethods
1276
+ end
1277
+
1278
+ # (see ::PointBlank::Parsing::NullInline#tokenize)
1279
+ def self.tokenize(string)
1280
+ iterate_tokens(string, /(?:!\[|\]\()/) do |_before, text, matched|
1281
+ next text[0] unless matched
1282
+ next ["![", self, :open] if text.start_with? "!["
1283
+ next text[0] unless text.start_with? "]"
1284
+
1285
+ info, capture = parse_linkinfo(text[1..])
1286
+ info ? ["]#{capture}", HyperlinkInline, info, :close] : text[0]
1287
+ end
1288
+ end
1289
+ end
1290
+
1291
+ # Link inline parser
1292
+ class LinkInline < HyperlinkInline
1293
+ class << self
1294
+ include ::PointBlank::Parsing::LinkSharedMethods
1295
+ end
1296
+
1297
+ # (see ::PointBlank::Parsing::NullInline#tokenize)
1298
+ def self.tokenize(string)
1299
+ iterate_tokens(string, /(?:\[|\][(\[])/) do |_before, text, matched|
1300
+ next text[0] unless matched
1301
+ next ["[", self, :open] if text.start_with? "["
1302
+ next text[0] unless text.start_with? "]"
1303
+
1304
+ info, capture = parse_linkinfo(text[1..])
1305
+ info ? ["]#{capture}", HyperlinkInline, info, :close] : text[0]
1306
+ end
1307
+ end
1308
+ end
1309
+
1310
+ # Emphasis and strong emphasis inline parser
1311
+ class EmphInline < NullInline
1312
+ INFIX_TOKENS = /^[^\p{S}\p{P}\p{Zs}_]_++[^\p{S}\p{P}\p{Zs}_]$/
1313
+ # (see ::PointBlank::Parsing::NullInline#tokenize)
1314
+ def self.tokenize(string)
1315
+ iterate_tokens(string, /(?:_++|\*++)/) do |bfr, text, matched|
1316
+ token, afr = text.match(/^(_++|\*++)(.?)/)[1..2]
1317
+ left = left_token?(bfr[-1] || "", token, afr)
1318
+ right = right_token?(bfr[-1] || "", token, afr)
1319
+ break_into_elements(token, [bfr[-1] || "", token, afr].join(''),
1320
+ left, right, matched)
1321
+ end
1322
+ end
1323
+
1324
+ # Is this token, given these surrounding characters, left-flanking?
1325
+ # @param bfr [String]
1326
+ # @param token [String]
1327
+ # @param afr [String]
1328
+ def self.left_token?(bfr, _token, afr)
1329
+ bfr_white = bfr.match?(/[\p{Zs}\n\r]/) || bfr.empty?
1330
+ afr_white = afr.match?(/[\p{Zs}\n\r]/) || afr.empty?
1331
+ bfr_symbol = bfr.match?(/[\p{P}\p{S}]/)
1332
+ afr_symbol = afr.match?(/[\p{P}\p{S}]/)
1333
+ !afr_white && (!afr_symbol || (afr_symbol && (bfr_symbol || bfr_white)))
1334
+ end
1335
+
1336
+ # Is this token, given these surrounding characters, reft-flanking?
1337
+ # @param bfr [String]
1338
+ # @param token [String]
1339
+ # @param afr [String]
1340
+ def self.right_token?(bfr, _token, afr)
1341
+ bfr_white = bfr.match?(/[\p{Z}\n\r]/) || bfr.empty?
1342
+ afr_white = afr.match?(/[\p{Z}\n\r]/) || afr.empty?
1343
+ bfr_symbol = bfr.match?(/[\p{P}\p{S}]/)
1344
+ afr_symbol = afr.match?(/[\p{P}\p{S}]/)
1345
+ !bfr_white && (!bfr_symbol || (bfr_symbol && (afr_symbol || afr_white)))
1346
+ end
1347
+
1348
+ # Break token string into elements
1349
+ # @param token_inner [String]
1350
+ # @param token [String]
1351
+ # @param left [Boolean]
1352
+ # @param right [Boolean]
1353
+ # @param matched [Boolean]
1354
+ def self.break_into_elements(token_inner, token, left, right, matched)
1355
+ return token_inner[0] unless matched
1356
+
1357
+ star_token = token_inner.match?(/^\*+$/)
1358
+ infix_token = token.match(INFIX_TOKENS)
1359
+ return token_inner if !star_token && infix_token
1360
+
1361
+ if left && right
1362
+ [token_inner, self, :open, :close]
1363
+ elsif left
1364
+ [token_inner, self, :open]
1365
+ elsif right
1366
+ [token_inner, self, :close]
1367
+ else
1368
+ token_inner
1369
+ end
1370
+ end
1371
+
1372
+ # (see ::PointBlank::Parsing::NullInline#reverse_walk)
1373
+ def self.reverse_walk(backlog, **_doc)
1374
+ until backlog.last.first.empty?
1375
+ capture = []
1376
+ before = []
1377
+ closer = backlog.last
1378
+ star = closer.first.match?(/^\*+$/)
1379
+ open = true
1380
+ backlog[..-2].reverse_each do |blk|
1381
+ open = false if blk.is_a?(Array) && blk[2] == :open &&
1382
+ blk.first.match?(/^\*+$/) == star &&
1383
+ blk[1] == self &&
1384
+ ((blk.first.length + closer.first.length) % 3 != 0 ||
1385
+ ((blk.first.length % 3).zero? &&
1386
+ (closer.first.length % 3).zero?))
1387
+ (open ? capture : before).prepend(blk)
1388
+ next unless blk.is_a?(Array)
1389
+ return backlog unless blk[1].check_contents(capture)
1390
+ end
1391
+ return backlog if open
1392
+
1393
+ opener = before[-1]
1394
+ strong = if closer.first.length > 1 && opener.first.length > 1
1395
+ # Strong emphasis
1396
+ closer[0] = closer.first[2..]
1397
+ opener[0] = opener.first[2..]
1398
+ true
1399
+ else
1400
+ # Emphasis
1401
+ closer[0] = closer.first[1..]
1402
+ opener[0] = opener.first[1..]
1403
+ false
1404
+ end
1405
+ before = before[..-2] if opener.first.empty?
1406
+ backlog = before + [build_emph(capture, strong)] + [closer]
1407
+ end
1408
+ backlog
1409
+ end
1410
+
1411
+ # Build strong or normal emphasis depending on the boolean flag
1412
+ # @param children [Array<String, ::PointBlank::DOM::DOMObject>]
1413
+ # @param strong [Boolean]
1414
+ # @return [::PointBlank::DOM::DOMObject]
1415
+ def self.build_emph(children, strong)
1416
+ obj = if strong
1417
+ ::PointBlank::DOM::InlineStrong
1418
+ else
1419
+ ::PointBlank::DOM::InlineEmphasis
1420
+ end.new
1421
+ tokens = children.map do |child|
1422
+ child.is_a?(Array) ? child.first : child
1423
+ end
1424
+ scanner = StackScanner.new(obj, init_tokens: tokens)
1425
+ scanner.scan
1426
+ obj
1427
+ end
1428
+ end
1429
+
1430
+ # Hard break
1431
+ class HardBreakInline < NullInline
1432
+ # (see ::PointBlank::Parsing::NullInline#tokenize)
1433
+ def self.tokenize(string)
1434
+ iterate_tokens(string, /(?: \n|\\\n)/) do |_before, token, matched|
1435
+ next ["\n", self, :close] if token.start_with?(" \n")
1436
+ next ["\n", self, :close] if matched
1437
+
1438
+ " "
1439
+ end
1440
+ end
1441
+
1442
+ # (see ::PointBlank::Parsing::NullInline#reverse_walk)
1443
+ def self.reverse_walk(backlog, **_doc)
1444
+ backlog[-1] = build([])
1445
+ backlog
1446
+ end
1447
+ end
1448
+ end
1449
+
1450
+ # Domain object model elements
1451
+ module DOM
1452
+ class DOMError < StandardError; end
1453
+
1454
+ # DOM Object
1455
+ class DOMObject
1456
+ class << self
1457
+ # Make subclasses inherit scanner and valid children
1458
+ def inherited(subclass)
1459
+ subclass.parser ||= @parser
1460
+ subclass.scanner ||= @scanner
1461
+ subclass.unsorted_children ||= @unsorted_children.dup || []
1462
+ super(subclass)
1463
+ end
1464
+
1465
+ # Sort children by priority
1466
+ # @return [void]
1467
+ def sort_children
1468
+ @valid_children = @unsorted_children&.sort_by(&:last)&.map(&:first) ||
1469
+ []
1470
+ end
1471
+
1472
+ # Define valid child for this DOMObject class
1473
+ # @param child [Class]
1474
+ # @return [void]
1475
+ def define_child(child, priority = 9999)
1476
+ @unsorted_children ||= []
1477
+ @unsorted_children.append([child, priority])
1478
+ end
1479
+
1480
+ # Define child element scanner for this DOMObject class
1481
+ # @param child [Class]
1482
+ # @return [void]
1483
+ def define_scanner(scanner)
1484
+ @scanner = scanner
1485
+ end
1486
+
1487
+ # Define self parser for this DOMObject class
1488
+ # @param child [::PointBlank::Parsing::NullParser]
1489
+ # @return [void]
1490
+ def define_parser(parser)
1491
+ parser.parser_for = self
1492
+ @parser = parser
1493
+ end
1494
+
1495
+ # Define an overlay - a parsing strategy that occurs once a block is closed.
1496
+ # May transform block if #process method of the overlay class returns
1497
+ # a class.
1498
+ # @param overlay [::PointBlank::Parsing::NullOverlay]
1499
+ # @return [void]
1500
+ def define_overlay(overlay, priority = 9999)
1501
+ @unsorted_overlays ||= []
1502
+ @unsorted_overlays.append([overlay, priority])
1503
+ end
1504
+
1505
+ # Sort overlays by priority
1506
+ # @return [void]
1507
+ def sort_overlays
1508
+ @valid_overlays = @unsorted_overlays&.sort_by(&:last)&.map(&:first) ||
1509
+ []
1510
+ end
1511
+
1512
+ # Parse a document
1513
+ # @return [self]
1514
+ def parse(doc)
1515
+ newdoc = new
1516
+ newdoc.parser = parser.new
1517
+ scan = @scanner.new(doc, newdoc)
1518
+ scan.scan
1519
+ end
1520
+
1521
+ # Source parameters from parent (fixes recursive dependency)
1522
+ def upsource
1523
+ superclass&.tap do |sc|
1524
+ @scanner = sc.scanner
1525
+ @parser = sc.parser
1526
+ @unsorted_children = sc.unsorted_children.dup
1527
+ @unsorted_overlays = sc.unsorted_overlays.dup
1528
+ end
1529
+ sort_children
1530
+ end
1531
+
1532
+ # Get array of valid overlays sorted by priority
1533
+ # @return [Array<::PointBlank::Parsing::NullOverlay>]
1534
+ def valid_overlays
1535
+ sort_overlays unless @valid_overlays
1536
+ @valid_overlays
1537
+ end
1538
+
1539
+ # Get array of valid children sorted by priority
1540
+ # @return [Array<Class>]
1541
+ def valid_children
1542
+ sort_children unless @valid_children
1543
+ @valid_children
1544
+ end
1545
+
1546
+ attr_accessor :scanner, :parser,
1547
+ :unsorted_children,
1548
+ :unsorted_overlays
1549
+ end
1550
+
1551
+ include ::Enumerable
1552
+
1553
+ def initialize
1554
+ @children = []
1555
+ @temp_children = []
1556
+ @properties = {}
1557
+ @content = ""
1558
+ end
1559
+
1560
+ # Set element at position
1561
+ # @param index [Integer]
1562
+ # @param element [DOMObject]
1563
+ # @return [DOMObject]
1564
+ def []=(index, element)
1565
+ unless element.is_a? ::PointBlank::DOM::DOMObject
1566
+ raise DOMError, "invalid DOM class #{element.class}"
1567
+ end
1568
+
1569
+ @children[index] = element
1570
+ end
1571
+
1572
+ # Get element at position
1573
+ # @param index [Integer]
1574
+ # @return [DOMObject]
1575
+ def [](index)
1576
+ @children[index]
1577
+ end
1578
+
1579
+ # Iterate over each child of DOMObject
1580
+ # @param block [#call]
1581
+ def each(&block)
1582
+ @children.each(&block)
1583
+ end
1584
+
1585
+ # Return an array duplicate of all children
1586
+ # @return [Array<DOMObject>]
1587
+ def children
1588
+ @children.dup
1589
+ end
1590
+
1591
+ # Get root element containing this child
1592
+ # @return [::PointBlank::DOM::DOMObject]
1593
+ def root
1594
+ current_root = self
1595
+ current_root = current_root.parent while current_root.parent
1596
+ current_root
1597
+ end
1598
+
1599
+ # Append child
1600
+ # @param child [DOMObject]
1601
+ def append_child(child)
1602
+ unless child.is_a? ::PointBlank::DOM::DOMObject
1603
+ raise DOMError, "invalid DOM class #{child.class}"
1604
+ end
1605
+
1606
+ child.parent = self
1607
+ child.position = @children.length
1608
+ @children.append(child)
1609
+ end
1610
+
1611
+ # Append temp child
1612
+ # @param child [DOMObject]
1613
+ def append_temp_child(child)
1614
+ @temp_children.append(child)
1615
+ end
1616
+
1617
+ attr_accessor :content, :parser, :parent, :position, :properties
1618
+ attr_reader :temp_children
1619
+ end
1620
+
1621
+ # Temp. text class
1622
+ class TempText < DOMObject
1623
+ end
1624
+
1625
+ # Inline text
1626
+ class Text < DOMObject
1627
+ end
1628
+
1629
+ # Inline preformatted text
1630
+ class InlinePre < DOMObject
1631
+ define_parser ::PointBlank::Parsing::CodeInline
1632
+ end
1633
+
1634
+ # Hard Linebreak
1635
+ class InlineBreak < DOMObject
1636
+ define_parser ::PointBlank::Parsing::HardBreakInline
1637
+ end
1638
+
1639
+ # Autolink
1640
+ class InlineAutolink < DOMObject
1641
+ define_parser ::PointBlank::Parsing::AutolinkInline
1642
+ end
1643
+
1644
+ # Infline formattable text
1645
+ class InlineFormattable < DOMObject
1646
+ end
1647
+
1648
+ # Image
1649
+ class InlineImage < InlineFormattable
1650
+ define_parser ::PointBlank::Parsing::ImageInline
1651
+ define_child ::PointBlank::DOM::InlinePre, 4000
1652
+ define_child ::PointBlank::DOM::InlineBreak, 9999
1653
+ ## that would be really funny lmao
1654
+ # define_child ::PointBlank::DOM::InlineImage
1655
+ end
1656
+
1657
+ # Hyperreferenced text
1658
+ class InlineLink < InlineFormattable
1659
+ define_parser ::PointBlank::Parsing::LinkInline
1660
+ define_child ::PointBlank::DOM::InlinePre, 4000
1661
+ define_child ::PointBlank::DOM::InlineImage, 5000
1662
+ define_child ::PointBlank::DOM::InlineBreak, 9999
1663
+ ## idk if this makes sense honestly
1664
+ # define_child ::PointBlank::DOM::InlineAutolink
1665
+ end
1666
+
1667
+ # Inline root
1668
+ class InlineRoot < DOMObject
1669
+ define_scanner ::PointBlank::Parsing::StackScanner
1670
+ define_child ::PointBlank::DOM::InlinePre, 4000
1671
+ define_child ::PointBlank::DOM::InlineAutolink, 4000
1672
+ define_child ::PointBlank::DOM::InlineImage, 5000
1673
+ define_child ::PointBlank::DOM::InlineLink, 6000
1674
+ define_child ::PointBlank::DOM::InlineBreak, 9999
1675
+ end
1676
+
1677
+ # Strong emphasis
1678
+ class InlineStrong < InlineRoot
1679
+ end
1680
+
1681
+ # Emphasis
1682
+ class InlineEmphasis < InlineRoot
1683
+ end
1684
+
1685
+ InlineRoot.class_eval do
1686
+ define_child ::PointBlank::DOM::InlineStrong, 8000
1687
+ define_child ::PointBlank::DOM::InlineEmphasis, 8000
1688
+ end
1689
+
1690
+ InlineRoot.subclasses.each(&:upsource)
1691
+
1692
+ InlineStrong.class_eval do
1693
+ define_parser ::PointBlank::Parsing::EmphInline
1694
+ end
1695
+
1696
+ InlineEmphasis.class_eval do
1697
+ define_parser ::PointBlank::Parsing::EmphInline
1698
+ end
1699
+
1700
+ InlineImage.class_eval do
1701
+ define_child ::PointBlank::DOM::InlineStrong, 8000
1702
+ define_child ::PointBlank::DOM::InlineEmphasis, 8000
1703
+ end
1704
+
1705
+ InlineLink.class_eval do
1706
+ define_child ::PointBlank::DOM::InlineStrong, 8000
1707
+ define_child ::PointBlank::DOM::InlineEmphasis, 8000
1708
+ end
1709
+ # Block root (virtual)
1710
+ class Block < DOMObject
1711
+ end
1712
+
1713
+ # Leaf block (virtual)
1714
+ class LeafBlock < DOMObject
1715
+ # Virtual hook to delay inline processing
1716
+ def parse_inner
1717
+ self.content = content.strip if content
1718
+ root.append_temp_child(self)
1719
+ end
1720
+ end
1721
+
1722
+ # Leaf literal block (virtual)
1723
+ class LeafLiteralBlock < LeafBlock
1724
+ # Virtual hook to push inlines in place of leaf blocks
1725
+ def parse_inner
1726
+ child = ::PointBlank::DOM::Text.new
1727
+ child.content = content
1728
+ append_child(child)
1729
+ end
1730
+ end
1731
+
1732
+ # Document root
1733
+ class Document < Block
1734
+ # (see ::PointBlank::DOM::DOMObject#parse)
1735
+ def self.parse(doc)
1736
+ output = super(doc)
1737
+ # This has to be done after the document gets processed due to the way link
1738
+ # definitions have to be handled.
1739
+ parse_inner = lambda do |block|
1740
+ child = ::PointBlank::DOM::InlineRoot.new
1741
+ child.parent = block.parent
1742
+ child.content = block.content
1743
+ scanner = ::PointBlank::Parsing::StackScanner.new(child)
1744
+ scanner.scan
1745
+ block.content = ""
1746
+ child.each { |c| block.append_child(c) }
1747
+ end
1748
+ output.temp_children.each { |block| parse_inner.call(block) }
1749
+ output.temp_children.clear
1750
+ output
1751
+ end
1752
+ end
1753
+
1754
+ # Paragraph in a document (separated by 2 newlines)
1755
+ class Paragraph < DOMObject
1756
+ define_parser ::PointBlank::Parsing::ParagraphParser
1757
+ define_overlay ::PointBlank::Parsing::ParagraphUnderlineOverlay, 0
1758
+ define_overlay ::PointBlank::Parsing::LinkReferenceOverlay
1759
+
1760
+ # Virtual hook to delay inline processing
1761
+ def parse_inner
1762
+ self.content = content.strip if content
1763
+ root.append_temp_child(self)
1764
+ end
1765
+ end
1766
+
1767
+ # Heading level 1
1768
+ class SetextHeading1 < LeafBlock
1769
+ define_parser ::PointBlank::Parsing::SetextParserLV1
1770
+ end
1771
+
1772
+ # Heading level 2
1773
+ class SetextHeading2 < SetextHeading1
1774
+ define_parser ::PointBlank::Parsing::SetextParserLV2
1775
+ end
1776
+
1777
+ # Heading level 1
1778
+ class ATXHeading1 < LeafBlock
1779
+ define_parser ::PointBlank::Parsing::ATXParserLV1
1780
+ end
1781
+
1782
+ # Heading level 2
1783
+ class ATXHeading2 < ATXHeading1
1784
+ define_parser ::PointBlank::Parsing::ATXParserLV2
1785
+ end
1786
+
1787
+ # Heading level 3
1788
+ class ATXHeading3 < ATXHeading1
1789
+ define_parser ::PointBlank::Parsing::ATXParserLV3
1790
+ end
1791
+
1792
+ # Heading level 4
1793
+ class ATXHeading4 < ATXHeading1
1794
+ define_parser ::PointBlank::Parsing::ATXParserLV4
1795
+ end
1796
+
1797
+ # Heading level 5
1798
+ class ATXHeading5 < ATXHeading1
1799
+ define_parser ::PointBlank::Parsing::ATXParserLV5
1800
+ end
1801
+
1802
+ # Heading level 6
1803
+ class ATXHeading6 < ATXHeading1
1804
+ define_parser ::PointBlank::Parsing::ATXParserLV6
1805
+ end
1806
+
1807
+ # Preformatted fenced code block
1808
+ class CodeBlock < LeafLiteralBlock
1809
+ define_parser ::PointBlank::Parsing::FencedCodeBlock
1810
+ end
1811
+
1812
+ # Quote block
1813
+ class QuoteBlock < Block
1814
+ end
1815
+
1816
+ # Unordered list element
1817
+ class ULListElement < Block
1818
+ end
1819
+
1820
+ # Ordered list element
1821
+ class OLListElement < Block
1822
+ end
1823
+
1824
+ # Unordered list
1825
+ class ULBlock < DOMObject
1826
+ define_scanner ::PointBlank::Parsing::LineScanner
1827
+ define_parser ::PointBlank::Parsing::ULParser
1828
+ define_child ::PointBlank::DOM::ULListElement
1829
+ end
1830
+
1831
+ # Ordered list block
1832
+ class OLBlock < DOMObject
1833
+ define_scanner ::PointBlank::Parsing::LineScanner
1834
+ define_parser ::PointBlank::Parsing::ULParser
1835
+ define_child ::PointBlank::DOM::OLListElement
1836
+ end
1837
+
1838
+ # Indent block
1839
+ class IndentBlock < LeafLiteralBlock
1840
+ define_parser ::PointBlank::Parsing::IndentedBlock
1841
+ end
1842
+
1843
+ # Horizontal rule
1844
+ class HorizontalRule < DOMObject
1845
+ define_parser ::PointBlank::Parsing::ThematicBreakParser
1846
+ end
1847
+
1848
+ # Block root (real)
1849
+ Block.class_eval do
1850
+ define_scanner ::PointBlank::Parsing::LineScanner
1851
+ define_parser ::PointBlank::Parsing::NullParser
1852
+ define_child ::PointBlank::DOM::IndentBlock, 9999
1853
+ define_child ::PointBlank::DOM::Paragraph, 9998
1854
+ define_child ::PointBlank::DOM::ATXHeading1, 600
1855
+ define_child ::PointBlank::DOM::ATXHeading2, 600
1856
+ define_child ::PointBlank::DOM::ATXHeading3, 600
1857
+ define_child ::PointBlank::DOM::ATXHeading4, 600
1858
+ define_child ::PointBlank::DOM::ATXHeading5, 600
1859
+ define_child ::PointBlank::DOM::ATXHeading6, 600
1860
+ define_child ::PointBlank::DOM::QuoteBlock, 600
1861
+ define_child ::PointBlank::DOM::ULBlock, 700
1862
+ define_child ::PointBlank::DOM::OLBlock, 700
1863
+ define_child ::PointBlank::DOM::CodeBlock, 600
1864
+ define_child ::PointBlank::DOM::HorizontalRule, 300
1865
+ sort_children
1866
+ end
1867
+
1868
+ Paragraph.class_eval do
1869
+ define_child ::PointBlank::DOM::SetextHeading1, 1
1870
+ define_child ::PointBlank::DOM::SetextHeading2, 2
1871
+ end
1872
+
1873
+ Block.subclasses.each(&:upsource)
1874
+
1875
+ QuoteBlock.class_eval do
1876
+ define_parser ::PointBlank::Parsing::QuoteParser
1877
+ end
1878
+
1879
+ ULBlock.class_eval do
1880
+ define_parser ::PointBlank::Parsing::ULParser
1881
+ end
1882
+
1883
+ ULListElement.class_eval do
1884
+ define_parser ::PointBlank::Parsing::ULElementParser
1885
+ end
1886
+
1887
+ OLBlock.class_eval do
1888
+ define_parser ::PointBlank::Parsing::OLParser
1889
+ end
1890
+
1891
+ OLListElement.class_eval do
1892
+ define_parser ::PointBlank::Parsing::OLElementParser
1893
+ end
1894
+ end
1895
+ end