coradoc-markdown 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/coradoc/markdown/errors.rb +28 -0
- data/lib/coradoc/markdown/model/abbreviation.rb +27 -0
- data/lib/coradoc/markdown/model/attribute_list.rb +98 -0
- data/lib/coradoc/markdown/model/base.rb +86 -0
- data/lib/coradoc/markdown/model/blockquote.rb +21 -0
- data/lib/coradoc/markdown/model/code.rb +11 -0
- data/lib/coradoc/markdown/model/code_block.rb +24 -0
- data/lib/coradoc/markdown/model/definition_item.rb +24 -0
- data/lib/coradoc/markdown/model/definition_list.rb +47 -0
- data/lib/coradoc/markdown/model/definition_term.rb +21 -0
- data/lib/coradoc/markdown/model/document.rb +39 -0
- data/lib/coradoc/markdown/model/emphasis.rb +11 -0
- data/lib/coradoc/markdown/model/extension.rb +92 -0
- data/lib/coradoc/markdown/model/footnote.rb +31 -0
- data/lib/coradoc/markdown/model/footnote_reference.rb +22 -0
- data/lib/coradoc/markdown/model/heading.rb +44 -0
- data/lib/coradoc/markdown/model/highlight.rb +18 -0
- data/lib/coradoc/markdown/model/horizontal_rule.rb +16 -0
- data/lib/coradoc/markdown/model/image.rb +19 -0
- data/lib/coradoc/markdown/model/link.rb +19 -0
- data/lib/coradoc/markdown/model/list.rb +22 -0
- data/lib/coradoc/markdown/model/list_item.rb +29 -0
- data/lib/coradoc/markdown/model/math.rb +50 -0
- data/lib/coradoc/markdown/model/paragraph.rb +28 -0
- data/lib/coradoc/markdown/model/strikethrough.rb +18 -0
- data/lib/coradoc/markdown/model/strong.rb +11 -0
- data/lib/coradoc/markdown/model/table.rb +13 -0
- data/lib/coradoc/markdown/model/text.rb +15 -0
- data/lib/coradoc/markdown/parser/ast_processor.rb +543 -0
- data/lib/coradoc/markdown/parser/block_parser.rb +745 -0
- data/lib/coradoc/markdown/parser/html_entities.rb +2149 -0
- data/lib/coradoc/markdown/parser/inline_parser.rb +274 -0
- data/lib/coradoc/markdown/parser/parslet_extras.rb +215 -0
- data/lib/coradoc/markdown/parser.rb +11 -0
- data/lib/coradoc/markdown/parser_util.rb +90 -0
- data/lib/coradoc/markdown/serializer.rb +199 -0
- data/lib/coradoc/markdown/toc_generator.rb +215 -0
- data/lib/coradoc/markdown/transform/from_core_model.rb +325 -0
- data/lib/coradoc/markdown/transform/text_extraction.rb +19 -0
- data/lib/coradoc/markdown/transform/to_core_model.rb +287 -0
- data/lib/coradoc/markdown/transformer.rb +463 -0
- data/lib/coradoc/markdown/version.rb +7 -0
- data/lib/coradoc/markdown.rb +190 -0
- metadata +173 -0
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Markdown
|
|
5
|
+
module Parser
|
|
6
|
+
autoload :ParsletExtras, "#{__dir__}/parslet_extras"
|
|
7
|
+
|
|
8
|
+
class BlockParser < Parslet::Parser
|
|
9
|
+
using ParsletExtras
|
|
10
|
+
|
|
11
|
+
# NOTE: Debug method for parser development. Outputs current parse position
|
|
12
|
+
# and capture context. Only called during parser debugging sessions.
|
|
13
|
+
def debug(msg)
|
|
14
|
+
dynamic do |src, ctx|
|
|
15
|
+
puts "#{msg} @ #{src.line_and_column}:"
|
|
16
|
+
pp ctx.captures
|
|
17
|
+
any.present? | any.absent?
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
rule(:line_ending) { (str("\n") | str("\r\n") | str("\r")).ignore }
|
|
22
|
+
rule(:line_ending_or_eof) { line_ending | any.absent? }
|
|
23
|
+
|
|
24
|
+
rule(:whitespace) { match[" \t"] }
|
|
25
|
+
# NOTE: repeat(1) before EOF (any.absent?) because infinite loop otherwise
|
|
26
|
+
rule(:blank_line) { (whitespace.repeat(1) >> any.absent? | whitespace.repeat >> line_ending).ignore }
|
|
27
|
+
rule(:blank_line_verbatim) do
|
|
28
|
+
whitespace.repeat(1).as(:ln) >> any.absent? | whitespace.repeat.as(:ln) >> line_ending
|
|
29
|
+
end
|
|
30
|
+
rule(:line_char) { match["^\r\n"] }
|
|
31
|
+
rule(:line_verbatim) { line_char.repeat(1).as(:ln) >> line_ending_or_eof }
|
|
32
|
+
|
|
33
|
+
rule(:non_indent_space) { str(' ').repeat(0, 3) }
|
|
34
|
+
|
|
35
|
+
# Block nesting is the tricky part, but Parslet's `dynamic` and `scope`
|
|
36
|
+
# make it possible to be aware of what blocks we're already in, and implement
|
|
37
|
+
# a check for whether we're still inside of those blocks on the beginning of
|
|
38
|
+
# every line. The rules that match the line run inside of the innermost
|
|
39
|
+
# parser expression, but this way they are aware of where they're nested at runtime.
|
|
40
|
+
#
|
|
41
|
+
# `continuation` MUST NOT be a `rule`, otherwise gets cached in a failure state
|
|
42
|
+
# and prevents nested alternatives from working
|
|
43
|
+
def continuation
|
|
44
|
+
dynamic do |_src, ctx|
|
|
45
|
+
# puts "parsing continuation at #{src.line_and_column} (#{src.bytepos}) with #{ctx.captures[:cont]}"
|
|
46
|
+
if ctx.captures.key?(:cont)
|
|
47
|
+
ctx.captures[:cont].ignore
|
|
48
|
+
else
|
|
49
|
+
any.present?
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def open_block(kind, cont_rule)
|
|
55
|
+
dynamic do |_src, ctx|
|
|
56
|
+
parent_scope = ctx.captures.current.parent
|
|
57
|
+
ctx.captures[:cont] = cont_rule
|
|
58
|
+
ctx.captures[:cont] = parent_scope[:cont] >> cont_rule if parent_scope.key?(:cont)
|
|
59
|
+
ctx.captures[:block] = kind
|
|
60
|
+
# puts "starting block #{kind} at #{src.line_and_column} (#{src.bytepos}): #{ctx.captures[:cont]}"
|
|
61
|
+
any.present? | any.absent?
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
rule(:atx_ending_seq) do
|
|
66
|
+
whitespace.repeat(1) >>
|
|
67
|
+
str('#').repeat >>
|
|
68
|
+
whitespace.repeat >>
|
|
69
|
+
(line_ending.present? | any.absent?)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Escaped hash - not a heading
|
|
73
|
+
rule(:escaped_hash) do
|
|
74
|
+
str('\\') >> str('#')
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
rule(:atx_heading) do
|
|
78
|
+
non_indent_space >>
|
|
79
|
+
escaped_hash.absent? >>
|
|
80
|
+
str('#').repeat(1, 6).as(:heading) >>
|
|
81
|
+
str('#').absent? >>
|
|
82
|
+
(
|
|
83
|
+
# first, check to catch the case with only one space
|
|
84
|
+
# (that would be consumed with the repeat(1)) until ending seq
|
|
85
|
+
atx_ending_seq.absent? >>
|
|
86
|
+
str(' ').repeat(1) >>
|
|
87
|
+
(
|
|
88
|
+
atx_ending_seq.absent? >> line_char
|
|
89
|
+
).repeat(1).as(:text)
|
|
90
|
+
).maybe >>
|
|
91
|
+
atx_ending_seq.maybe >>
|
|
92
|
+
line_ending_or_eof
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def thematic_break_char(c)
|
|
96
|
+
(str(c) >> whitespace.repeat).repeat(3)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
rule(:thematic_break) do
|
|
100
|
+
non_indent_space >>
|
|
101
|
+
(
|
|
102
|
+
thematic_break_char('-') | thematic_break_char('_') | thematic_break_char('*')
|
|
103
|
+
).output(hr: true) >>
|
|
104
|
+
line_ending_or_eof
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
rule(:indented_code_line) do
|
|
108
|
+
str(' ') >> line_verbatim
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
rule(:indented_code_blank_line) do
|
|
112
|
+
blank_line_verbatim.output(ln: '') >>
|
|
113
|
+
(
|
|
114
|
+
continuation >>
|
|
115
|
+
(str(' ') | blank_line_verbatim)
|
|
116
|
+
).present?
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
rule(:indented_code_block) do
|
|
120
|
+
(
|
|
121
|
+
indented_code_line >>
|
|
122
|
+
(
|
|
123
|
+
continuation >>
|
|
124
|
+
(indented_code_line | indented_code_blank_line)
|
|
125
|
+
).repeat
|
|
126
|
+
).as(:code_block)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def code_fence_info
|
|
130
|
+
# NOTE: Uses dynamic block for context-dependent fence character detection
|
|
131
|
+
# This handles both backtick (`) and tilde (~) fenced code blocks
|
|
132
|
+
dynamic do |_src, ctx|
|
|
133
|
+
char = line_char
|
|
134
|
+
char = str('`').absent? >> char if ctx.captures[:fence].to_s.chr == '`'
|
|
135
|
+
char.repeat(1).as(:info).maybe
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
rule(:code_fence_open) do
|
|
140
|
+
non_indent_space.capture(:fence_indent) >>
|
|
141
|
+
(str('`').repeat(3) | str('~').repeat(3)).capture(:fence).ignore >>
|
|
142
|
+
code_fence_info >>
|
|
143
|
+
line_ending_or_eof
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
rule(:code_fence_close) do
|
|
147
|
+
non_indent_space >> dynamic do |_src, ctx|
|
|
148
|
+
str(ctx.captures[:fence]) >>
|
|
149
|
+
str(ctx.captures[:fence].to_s.chr).repeat
|
|
150
|
+
end.ignore >> line_ending_or_eof
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def consume_fenced_indent
|
|
154
|
+
dynamic do |_src, ctx|
|
|
155
|
+
indent = ctx.captures[:fence_indent].to_s.length
|
|
156
|
+
if indent.positive?
|
|
157
|
+
str(' ').repeat(0, indent)
|
|
158
|
+
else
|
|
159
|
+
any.present?
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
rule(:fenced_code_block) do
|
|
165
|
+
code_fence_open >>
|
|
166
|
+
(
|
|
167
|
+
continuation >>
|
|
168
|
+
code_fence_close.absent? >>
|
|
169
|
+
consume_fenced_indent >>
|
|
170
|
+
(line_verbatim | blank_line_verbatim.output(ln: ''))
|
|
171
|
+
).repeat.as(:code_block) >>
|
|
172
|
+
(
|
|
173
|
+
(continuation >> code_fence_close) | continuation.absent? | any.absent?
|
|
174
|
+
)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
rule(:block_quote_marker) do
|
|
178
|
+
non_indent_space >>
|
|
179
|
+
str('>') >>
|
|
180
|
+
str(' ').maybe
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# This implements laziness, which is context-sensitive:
|
|
184
|
+
# "only applies to lines that would have been continuations of
|
|
185
|
+
# paragraphs had they been prepended with block quote markers"
|
|
186
|
+
# means we *actually* must be inside of a continueable paragraph.
|
|
187
|
+
#
|
|
188
|
+
# Cannot be a `rule` as usual with `dynamic`.
|
|
189
|
+
def block_quote_cont
|
|
190
|
+
dynamic do |_src, ctx|
|
|
191
|
+
# puts "BQDYN in #{ctx.captures[:block]}"
|
|
192
|
+
block_quote_marker | if ctx.captures[:block] == :paragraph
|
|
193
|
+
paragraph_interrupt.absent? >> paragraph_continued_line.present?
|
|
194
|
+
else
|
|
195
|
+
any.absent? >> any.present? # never match
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
rule(:block_quote) do
|
|
201
|
+
block_quote_marker >> scope do
|
|
202
|
+
open_block(:block_quote, block_quote_cont) >>
|
|
203
|
+
(
|
|
204
|
+
(block | any.absent?.output('')) >>
|
|
205
|
+
(
|
|
206
|
+
continuation >>
|
|
207
|
+
(block | any.absent?.output(''))
|
|
208
|
+
).repeat
|
|
209
|
+
).as(:block_quote)
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# IAL that appears on its own line (applies to next block or as ALD)
|
|
214
|
+
rule(:ial_block) do
|
|
215
|
+
whitespace.repeat(0, 3) >> (ial | ald) >> line_ending_or_eof
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
rule(:paragraph_interrupt) do
|
|
219
|
+
blank_line | atx_heading | thematic_break |
|
|
220
|
+
code_fence_open | block_quote | ial_block | extension |
|
|
221
|
+
unordered_list_marker | ordered_list_marker | definition_marker
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
rule(:paragraph_line) do
|
|
225
|
+
line_char.repeat(1).as(:ln) >> any.absent? | line_char.repeat.as(:ln) >> line_ending
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
rule(:paragraph_continued_line) do
|
|
229
|
+
whitespace.repeat.ignore >>
|
|
230
|
+
paragraph_line
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
rule(:paragraph) do
|
|
234
|
+
# Tempting to not use `scope` here as `paragraph` is a leaf block,
|
|
235
|
+
# but laziness rules for block quotes and lists need to know
|
|
236
|
+
# whether we are actually in a paragraph that could be continued
|
|
237
|
+
non_indent_space >> scope do
|
|
238
|
+
open_block(:paragraph, paragraph_interrupt.absent?) >>
|
|
239
|
+
(
|
|
240
|
+
paragraph_line >>
|
|
241
|
+
(
|
|
242
|
+
continuation >>
|
|
243
|
+
paragraph_continued_line
|
|
244
|
+
).repeat
|
|
245
|
+
).as(:p)
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
rule(:setext_underline) do
|
|
250
|
+
non_indent_space >>
|
|
251
|
+
(
|
|
252
|
+
str('-').repeat(1) | str('=').repeat(1)
|
|
253
|
+
).as(:heading) >>
|
|
254
|
+
whitespace.repeat.ignore >>
|
|
255
|
+
line_ending_or_eof
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
rule(:setext_heading) do
|
|
259
|
+
check = paragraph_interrupt.absent? >> setext_underline.absent?
|
|
260
|
+
check >>
|
|
261
|
+
non_indent_space >>
|
|
262
|
+
(
|
|
263
|
+
paragraph_line >>
|
|
264
|
+
(
|
|
265
|
+
continuation >>
|
|
266
|
+
check >>
|
|
267
|
+
paragraph_continued_line
|
|
268
|
+
).repeat
|
|
269
|
+
).as(:text) >>
|
|
270
|
+
continuation >>
|
|
271
|
+
setext_underline
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# ===== KRAMDOWN EXTENSIONS =====
|
|
275
|
+
|
|
276
|
+
# Inline Attribute List (IAL): {:.class #id key="value"}
|
|
277
|
+
# Can appear after any block element to add attributes
|
|
278
|
+
rule(:ial_class) do
|
|
279
|
+
str('.') >> match['\\w\\-'].repeat(1)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
rule(:ial_id) do
|
|
283
|
+
str('#') >> match['\\w\\-'].repeat(1)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
rule(:ial_key_value) do
|
|
287
|
+
match['\\w\\-'].repeat(1) >> str('=') >>
|
|
288
|
+
(
|
|
289
|
+
str('"') >> match['^"'].repeat(0) >> str('"') |
|
|
290
|
+
str("'") >> match["^'"].repeat(0) >> str("'") |
|
|
291
|
+
match['^\\s\\}'].repeat(1)
|
|
292
|
+
)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
rule(:ial_content) do
|
|
296
|
+
(
|
|
297
|
+
whitespace.repeat >>
|
|
298
|
+
(ial_class | ial_id | ial_key_value)
|
|
299
|
+
).repeat(1)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
rule(:ial) do
|
|
303
|
+
str('{:') >> ial_content.as(:ial) >> str('}')
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Attribute List Definition (ALD): {:name: #id .class key="value"}
|
|
307
|
+
# Defines a named attribute list that can be referenced
|
|
308
|
+
rule(:ald_name) do
|
|
309
|
+
match['\\w'].repeat(1) >> str(':')
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
rule(:ald) do
|
|
313
|
+
str('{:') >> ald_name.as(:ald_name) >> whitespace.repeat(1) >> ial_content.as(:ial) >> str('}')
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Block-level extension: {::extension_name options /}
|
|
317
|
+
# Common extensions: {::toc}, {::options ... /}
|
|
318
|
+
rule(:extension_name) do
|
|
319
|
+
match['a-z'].repeat(1)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
rule(:extension_option) do
|
|
323
|
+
match['\\w\\-'].repeat(1) >> str('=') >>
|
|
324
|
+
(
|
|
325
|
+
str('"') >> match['^"'].repeat(0) >> str('"') |
|
|
326
|
+
str("'") >> match["^'"].repeat(0) >> str("'") |
|
|
327
|
+
match['^\\s/\\}'].repeat(1)
|
|
328
|
+
)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
rule(:extension_options) do
|
|
332
|
+
(whitespace.repeat(1) >> extension_option).repeat
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
rule(:extension_self_closing) do
|
|
336
|
+
str('{::') >> extension_name.as(:ext_name) >> extension_options.as(:ext_options) >> whitespace.repeat >> str('/}')
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
rule(:extension_with_body) do
|
|
340
|
+
str('{::') >> extension_name.as(:ext_name) >> extension_options.as(:ext_options) >> str('}') >>
|
|
341
|
+
(str('{:/').absent? >> any).repeat.as(:ext_body) >>
|
|
342
|
+
str('{:/}')
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
rule(:extension) do
|
|
346
|
+
(extension_self_closing | extension_with_body).as(:extension)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Block math: $$...$$ on its own line(s)
|
|
350
|
+
rule(:block_math) do
|
|
351
|
+
str('$$') >> line_ending >>
|
|
352
|
+
(str('$$').absent? >> any).repeat.as(:math_content) >>
|
|
353
|
+
str('$$')
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# ===== GFM TABLE PARSING RULES =====
|
|
357
|
+
|
|
358
|
+
# Table cell: any characters except | and newline
|
|
359
|
+
rule(:table_cell) do
|
|
360
|
+
(str('|').absent? >> line_char).repeat.as(:cell)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Table row: handles both | cell | cell | and cell | cell formats
|
|
364
|
+
# Pattern: optional leading pipe, then (cell pipe)+ cell, optional trailing pipe
|
|
365
|
+
# Or: cell | cell without any leading/trailing pipes
|
|
366
|
+
rule(:table_row) do
|
|
367
|
+
# Format with leading pipe: | cell | cell | or | cell | cell
|
|
368
|
+
(str('|') >> whitespace.maybe >>
|
|
369
|
+
(
|
|
370
|
+
table_cell >>
|
|
371
|
+
whitespace.maybe >>
|
|
372
|
+
str('|') >>
|
|
373
|
+
whitespace.maybe
|
|
374
|
+
).repeat(1).as(:row)) |
|
|
375
|
+
# Format without leading pipe: cell | cell | or cell | cell
|
|
376
|
+
(table_cell >>
|
|
377
|
+
whitespace.maybe >>
|
|
378
|
+
str('|') >>
|
|
379
|
+
whitespace.maybe >>
|
|
380
|
+
(
|
|
381
|
+
table_cell >>
|
|
382
|
+
whitespace.maybe >>
|
|
383
|
+
str('|') >>
|
|
384
|
+
whitespace.maybe
|
|
385
|
+
).repeat.as(:row_rest) >>
|
|
386
|
+
table_cell.maybe.as(:last_cell)).as(:row)
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Table separator cell: dashes with optional colons
|
|
390
|
+
rule(:table_separator_cell) do
|
|
391
|
+
str(':').maybe >>
|
|
392
|
+
str('-').repeat(1) >>
|
|
393
|
+
str(':').maybe
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Separator row: handles both |---|---| and ---|---| formats
|
|
397
|
+
rule(:table_separator_row) do
|
|
398
|
+
# Format with leading pipe: |---|---| or |---|---|
|
|
399
|
+
(str('|') >> whitespace.maybe >>
|
|
400
|
+
(
|
|
401
|
+
table_separator_cell.as(:sep) >>
|
|
402
|
+
whitespace.maybe >>
|
|
403
|
+
str('|') >>
|
|
404
|
+
whitespace.maybe
|
|
405
|
+
).repeat(1)) |
|
|
406
|
+
# Format without leading pipe: ---|---| or ---|---|
|
|
407
|
+
(table_separator_cell.as(:sep) >>
|
|
408
|
+
whitespace.maybe >>
|
|
409
|
+
str('|') >>
|
|
410
|
+
whitespace.maybe >>
|
|
411
|
+
(
|
|
412
|
+
table_separator_cell.as(:sep) >>
|
|
413
|
+
whitespace.maybe >>
|
|
414
|
+
str('|') >>
|
|
415
|
+
whitespace.maybe
|
|
416
|
+
).repeat >>
|
|
417
|
+
table_separator_cell.as(:sep).maybe)
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
# GFM Table: header row, separator row, body rows
|
|
421
|
+
rule(:table) do
|
|
422
|
+
table_row.as(:table_header) >>
|
|
423
|
+
line_ending >>
|
|
424
|
+
table_separator_row.as(:table_separator) >>
|
|
425
|
+
line_ending >>
|
|
426
|
+
(
|
|
427
|
+
table_row.as(:table_body_row) >> line_ending
|
|
428
|
+
).repeat(1).as(:table_body)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
rule(:block) do
|
|
432
|
+
blank_line | eob_marker | atx_heading | thematic_break |
|
|
433
|
+
indented_code_block | fenced_code_block |
|
|
434
|
+
block_quote | setext_heading |
|
|
435
|
+
unordered_list | ordered_list | definition_list |
|
|
436
|
+
footnote_definition | abbreviation_definition |
|
|
437
|
+
ial_block | extension | block_math |
|
|
438
|
+
table | paragraph
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# ===== LIST PARSING RULES =====
|
|
442
|
+
|
|
443
|
+
# List interrupt - blocks that can interrupt a list
|
|
444
|
+
rule(:list_interrupt) do
|
|
445
|
+
blank_line.repeat(1) | atx_heading | thematic_break |
|
|
446
|
+
code_fence_open | block_quote |
|
|
447
|
+
unordered_list_marker | ordered_list_marker
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Unordered list marker: -, *, or + followed by 1+ spaces
|
|
451
|
+
rule(:unordered_list_marker) do
|
|
452
|
+
non_indent_space >>
|
|
453
|
+
match['-*+'] >>
|
|
454
|
+
str(' ').repeat(1)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Ordered list marker: 1-9 digits followed by . or ) and 1+ spaces
|
|
458
|
+
rule(:ordered_list_marker) do
|
|
459
|
+
non_indent_space >>
|
|
460
|
+
match['1-9'] >>
|
|
461
|
+
match['0-9'].repeat >>
|
|
462
|
+
match['\\.)'] >>
|
|
463
|
+
str(' ').repeat(1)
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# List item continuation line (indented content that's not a block)
|
|
467
|
+
# Excludes lines that look like nested list markers
|
|
468
|
+
rule(:list_continuation_line) do
|
|
469
|
+
(str(' ') | str("\t")) >>
|
|
470
|
+
nested_list_marker.absent? >>
|
|
471
|
+
line_verbatim |
|
|
472
|
+
nested_list_marker.absent? >>
|
|
473
|
+
line_verbatim
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
# Nested list marker detection (for 4-space indented lists)
|
|
477
|
+
rule(:nested_list_marker) do
|
|
478
|
+
(str(' ') | str("\t")) >>
|
|
479
|
+
(
|
|
480
|
+
(match['-*+'] >> str(' ').repeat(1)) |
|
|
481
|
+
(match['1-9'] >> match['0-9'].repeat >> match['\\.)'] >> str(' ').repeat(1))
|
|
482
|
+
)
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
# Thematic break as list item content (e.g., "- * * *")
|
|
486
|
+
rule(:thematic_break_in_list) do
|
|
487
|
+
(
|
|
488
|
+
(str('*') >> whitespace.repeat >> str('*') >> whitespace.repeat >> str('*')) |
|
|
489
|
+
(str('-') >> whitespace.repeat >> str('-') >> whitespace.repeat >> str('-')) |
|
|
490
|
+
(str('_') >> whitespace.repeat >> str('_') >> whitespace.repeat >> str('_'))
|
|
491
|
+
) >> whitespace.repeat >> line_ending_or_eof
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
# Unordered list item with content
|
|
495
|
+
# Can contain thematic break or paragraph with continuation lines
|
|
496
|
+
rule(:unordered_list_item) do
|
|
497
|
+
unordered_list_marker.capture(:list_marker) >>
|
|
498
|
+
(
|
|
499
|
+
thematic_break_in_list.output(hr: true).as(:li) |
|
|
500
|
+
list_item_content.as(:li)
|
|
501
|
+
)
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
# List item content - paragraph first, then optional nested blocks
|
|
505
|
+
rule(:list_item_content) do
|
|
506
|
+
list_item_paragraph >>
|
|
507
|
+
(
|
|
508
|
+
continuation >>
|
|
509
|
+
list_interrupt.absent? >>
|
|
510
|
+
nested_block
|
|
511
|
+
).repeat
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
# Nested block (indented list, etc.)
|
|
515
|
+
rule(:nested_block) do
|
|
516
|
+
(str(' ') | str("\t")) >> nested_unordered_list |
|
|
517
|
+
(str(' ') | str("\t")) >> nested_ordered_list
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# Nested unordered list (4-space indented)
|
|
521
|
+
rule(:nested_unordered_list) do
|
|
522
|
+
(
|
|
523
|
+
nested_unordered_list_item >>
|
|
524
|
+
(
|
|
525
|
+
continuation >>
|
|
526
|
+
(str(' ') | str("\t")) >>
|
|
527
|
+
nested_unordered_list_item
|
|
528
|
+
).repeat
|
|
529
|
+
).as(:ul)
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# Nested unordered list item (simpler format - just text content)
|
|
533
|
+
rule(:nested_unordered_list_item) do
|
|
534
|
+
unordered_list_marker >> line_verbatim
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Nested ordered list (4-space indented)
|
|
538
|
+
rule(:nested_ordered_list) do
|
|
539
|
+
(
|
|
540
|
+
nested_ordered_list_item >>
|
|
541
|
+
(
|
|
542
|
+
continuation >>
|
|
543
|
+
(str(' ') | str("\t")) >>
|
|
544
|
+
nested_ordered_list_item
|
|
545
|
+
).repeat
|
|
546
|
+
).as(:ol)
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# Nested unordered list item (simpler format - just text content)
|
|
550
|
+
rule(:nested_unordered_list_item) do
|
|
551
|
+
unordered_list_marker >> line_text.as(:li)
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
# Line text without the :ln wrapper
|
|
555
|
+
rule(:line_text) do
|
|
556
|
+
line_char.repeat(1) >> line_ending_or_eof
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
# Nested ordered list item (simpler format - just text content)
|
|
560
|
+
rule(:nested_ordered_list_item) do
|
|
561
|
+
ordered_list_marker >> line_text.as(:li)
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
# List item paragraph - first line plus any continuation lines
|
|
565
|
+
rule(:list_item_paragraph) do
|
|
566
|
+
(
|
|
567
|
+
line_verbatim >>
|
|
568
|
+
(
|
|
569
|
+
continuation >>
|
|
570
|
+
list_interrupt.absent? >>
|
|
571
|
+
nested_list_marker.absent? >>
|
|
572
|
+
list_continuation_line
|
|
573
|
+
).repeat
|
|
574
|
+
).as(:p)
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
# Ordered list item with content (wraps content in p structure)
|
|
578
|
+
rule(:ordered_list_item) do
|
|
579
|
+
ordered_list_marker >>
|
|
580
|
+
list_item_content.as(:li)
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Unordered list: sequence of items (thematic break interrupts)
|
|
584
|
+
rule(:unordered_list) do
|
|
585
|
+
(
|
|
586
|
+
unordered_list_item >>
|
|
587
|
+
(
|
|
588
|
+
continuation >>
|
|
589
|
+
thematic_break.absent? >>
|
|
590
|
+
blank_line.maybe >>
|
|
591
|
+
unordered_list_item
|
|
592
|
+
).repeat
|
|
593
|
+
).as(:ul)
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# Ordered list: sequence of numbered items (thematic break interrupts)
|
|
597
|
+
rule(:ordered_list) do
|
|
598
|
+
(
|
|
599
|
+
ordered_list_item >>
|
|
600
|
+
(
|
|
601
|
+
continuation >>
|
|
602
|
+
thematic_break.absent? >>
|
|
603
|
+
blank_line.maybe >>
|
|
604
|
+
ordered_list_item
|
|
605
|
+
).repeat
|
|
606
|
+
).as(:ol)
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
# ===== KRAMDOWN DEFINITION LIST PARSING RULES =====
|
|
610
|
+
|
|
611
|
+
# Definition list marker: colon followed by space
|
|
612
|
+
rule(:definition_marker) do
|
|
613
|
+
non_indent_space >>
|
|
614
|
+
str(':') >>
|
|
615
|
+
str(' ').repeat(1)
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# Definition term: line(s) not starting with colon
|
|
619
|
+
# Can span multiple lines if next line doesn't start with :
|
|
620
|
+
rule(:definition_term_line) do
|
|
621
|
+
non_indent_space >>
|
|
622
|
+
str(':').absent? >>
|
|
623
|
+
line_verbatim
|
|
624
|
+
end
|
|
625
|
+
|
|
626
|
+
# Definition term with continuation
|
|
627
|
+
rule(:definition_term) do
|
|
628
|
+
(
|
|
629
|
+
definition_term_line >>
|
|
630
|
+
(
|
|
631
|
+
continuation >>
|
|
632
|
+
definition_marker.absent? >>
|
|
633
|
+
blank_line.absent? >>
|
|
634
|
+
definition_term_line
|
|
635
|
+
).repeat
|
|
636
|
+
).as(:def_term)
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
# Definition item content (after the :)
|
|
640
|
+
rule(:definition_content) do
|
|
641
|
+
(
|
|
642
|
+
line_verbatim >>
|
|
643
|
+
(
|
|
644
|
+
continuation >>
|
|
645
|
+
definition_marker.absent? >>
|
|
646
|
+
blank_line.absent? >>
|
|
647
|
+
(str(' ') | str("\t")).maybe >>
|
|
648
|
+
line_verbatim
|
|
649
|
+
).repeat
|
|
650
|
+
).as(:def_content)
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
# Definition item: : followed by content
|
|
654
|
+
rule(:definition_item) do
|
|
655
|
+
definition_marker >> definition_content
|
|
656
|
+
end
|
|
657
|
+
|
|
658
|
+
# Definition list item: term followed by one or more definitions
|
|
659
|
+
rule(:definition_list_item) do
|
|
660
|
+
definition_term >>
|
|
661
|
+
(
|
|
662
|
+
continuation >>
|
|
663
|
+
definition_item
|
|
664
|
+
).repeat(1)
|
|
665
|
+
end
|
|
666
|
+
|
|
667
|
+
# Definition list: sequence of term+definition groups
|
|
668
|
+
rule(:definition_list) do
|
|
669
|
+
(
|
|
670
|
+
definition_list_item >>
|
|
671
|
+
(
|
|
672
|
+
continuation >>
|
|
673
|
+
blank_line.maybe >>
|
|
674
|
+
definition_list_item
|
|
675
|
+
).repeat
|
|
676
|
+
).as(:dl)
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
# ===== KRAMDOWN FOOTNOTE PARSING RULES =====
|
|
680
|
+
|
|
681
|
+
# Footnote definition: [^name]: content
|
|
682
|
+
rule(:footnote_id) do
|
|
683
|
+
str('[^') >> match['^\]'].repeat(1).as(:fn_id) >> str(']')
|
|
684
|
+
end
|
|
685
|
+
|
|
686
|
+
rule(:footnote_definition) do
|
|
687
|
+
non_indent_space >>
|
|
688
|
+
footnote_id >>
|
|
689
|
+
str(':') >>
|
|
690
|
+
whitespace.repeat >>
|
|
691
|
+
line_verbatim.as(:fn_content) >>
|
|
692
|
+
(
|
|
693
|
+
continuation >>
|
|
694
|
+
(str(' ') | str("\t")).repeat(1, 4) >>
|
|
695
|
+
line_verbatim
|
|
696
|
+
).repeat.as(:fn_content_continued)
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
# ===== KRAMDOWN ABBREVIATION PARSING RULES =====
|
|
700
|
+
|
|
701
|
+
# Abbreviation definition: *[TERM]: definition
|
|
702
|
+
rule(:abbreviation_term) do
|
|
703
|
+
str('*[') >> match['^\]'].repeat(1).as(:abbr_term) >> str(']')
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
rule(:abbreviation_definition) do
|
|
707
|
+
non_indent_space >>
|
|
708
|
+
abbreviation_term >>
|
|
709
|
+
str(':') >>
|
|
710
|
+
whitespace.repeat >>
|
|
711
|
+
line_char.repeat.as(:abbr_def) >>
|
|
712
|
+
line_ending_or_eof
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
# ===== KRAMDOWN EOB (End of Block) MARKER =====
|
|
716
|
+
|
|
717
|
+
# EOB marker: ^ on its own line (terminates blocks explicitly)
|
|
718
|
+
rule(:eob_marker) do
|
|
719
|
+
whitespace.repeat >> str('^') >> whitespace.repeat >> line_ending_or_eof
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
root :document
|
|
723
|
+
rule(:document) do
|
|
724
|
+
block.repeat
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
def self.parse(filename)
|
|
728
|
+
content = File.read(filename)
|
|
729
|
+
new.parse(content)
|
|
730
|
+
rescue Parslet::ParseFailed => e
|
|
731
|
+
puts e.parse_failure_cause.ascii_tree
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
# Parse with AST post-processing (escape sequences, etc.)
|
|
735
|
+
def self.parse_with_processing(content)
|
|
736
|
+
ast = new.parse(content)
|
|
737
|
+
AstProcessor.process(ast)
|
|
738
|
+
rescue Parslet::ParseFailed => e
|
|
739
|
+
puts e.parse_failure_cause.ascii_tree
|
|
740
|
+
nil
|
|
741
|
+
end
|
|
742
|
+
end
|
|
743
|
+
end
|
|
744
|
+
end
|
|
745
|
+
end
|