coradoc-markdown 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/lib/coradoc/markdown/errors.rb +28 -0
  4. data/lib/coradoc/markdown/model/abbreviation.rb +27 -0
  5. data/lib/coradoc/markdown/model/attribute_list.rb +98 -0
  6. data/lib/coradoc/markdown/model/base.rb +86 -0
  7. data/lib/coradoc/markdown/model/blockquote.rb +21 -0
  8. data/lib/coradoc/markdown/model/code.rb +11 -0
  9. data/lib/coradoc/markdown/model/code_block.rb +24 -0
  10. data/lib/coradoc/markdown/model/definition_item.rb +24 -0
  11. data/lib/coradoc/markdown/model/definition_list.rb +47 -0
  12. data/lib/coradoc/markdown/model/definition_term.rb +21 -0
  13. data/lib/coradoc/markdown/model/document.rb +39 -0
  14. data/lib/coradoc/markdown/model/emphasis.rb +11 -0
  15. data/lib/coradoc/markdown/model/extension.rb +92 -0
  16. data/lib/coradoc/markdown/model/footnote.rb +31 -0
  17. data/lib/coradoc/markdown/model/footnote_reference.rb +22 -0
  18. data/lib/coradoc/markdown/model/heading.rb +44 -0
  19. data/lib/coradoc/markdown/model/highlight.rb +18 -0
  20. data/lib/coradoc/markdown/model/horizontal_rule.rb +16 -0
  21. data/lib/coradoc/markdown/model/image.rb +19 -0
  22. data/lib/coradoc/markdown/model/link.rb +19 -0
  23. data/lib/coradoc/markdown/model/list.rb +22 -0
  24. data/lib/coradoc/markdown/model/list_item.rb +29 -0
  25. data/lib/coradoc/markdown/model/math.rb +50 -0
  26. data/lib/coradoc/markdown/model/paragraph.rb +28 -0
  27. data/lib/coradoc/markdown/model/strikethrough.rb +18 -0
  28. data/lib/coradoc/markdown/model/strong.rb +11 -0
  29. data/lib/coradoc/markdown/model/table.rb +13 -0
  30. data/lib/coradoc/markdown/model/text.rb +15 -0
  31. data/lib/coradoc/markdown/parser/ast_processor.rb +543 -0
  32. data/lib/coradoc/markdown/parser/block_parser.rb +745 -0
  33. data/lib/coradoc/markdown/parser/html_entities.rb +2149 -0
  34. data/lib/coradoc/markdown/parser/inline_parser.rb +274 -0
  35. data/lib/coradoc/markdown/parser/parslet_extras.rb +215 -0
  36. data/lib/coradoc/markdown/parser.rb +11 -0
  37. data/lib/coradoc/markdown/parser_util.rb +90 -0
  38. data/lib/coradoc/markdown/serializer.rb +199 -0
  39. data/lib/coradoc/markdown/toc_generator.rb +215 -0
  40. data/lib/coradoc/markdown/transform/from_core_model.rb +325 -0
  41. data/lib/coradoc/markdown/transform/text_extraction.rb +19 -0
  42. data/lib/coradoc/markdown/transform/to_core_model.rb +287 -0
  43. data/lib/coradoc/markdown/transformer.rb +463 -0
  44. data/lib/coradoc/markdown/version.rb +7 -0
  45. data/lib/coradoc/markdown.rb +190 -0
  46. metadata +173 -0
@@ -0,0 +1,745 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Markdown
5
+ module Parser
6
+ autoload :ParsletExtras, "#{__dir__}/parslet_extras"
7
+
8
+ class BlockParser < Parslet::Parser
9
+ using ParsletExtras
10
+
11
+ # NOTE: Debug method for parser development. Outputs current parse position
12
+ # and capture context. Only called during parser debugging sessions.
13
+ def debug(msg)
14
+ dynamic do |src, ctx|
15
+ puts "#{msg} @ #{src.line_and_column}:"
16
+ pp ctx.captures
17
+ any.present? | any.absent?
18
+ end
19
+ end
20
+
21
+ rule(:line_ending) { (str("\n") | str("\r\n") | str("\r")).ignore }
22
+ rule(:line_ending_or_eof) { line_ending | any.absent? }
23
+
24
+ rule(:whitespace) { match[" \t"] }
25
+ # NOTE: repeat(1) before EOF (any.absent?) because infinite loop otherwise
26
+ rule(:blank_line) { (whitespace.repeat(1) >> any.absent? | whitespace.repeat >> line_ending).ignore }
27
+ rule(:blank_line_verbatim) do
28
+ whitespace.repeat(1).as(:ln) >> any.absent? | whitespace.repeat.as(:ln) >> line_ending
29
+ end
30
+ rule(:line_char) { match["^\r\n"] }
31
+ rule(:line_verbatim) { line_char.repeat(1).as(:ln) >> line_ending_or_eof }
32
+
33
+ rule(:non_indent_space) { str(' ').repeat(0, 3) }
34
+
35
+ # Block nesting is the tricky part, but Parslet's `dynamic` and `scope`
36
+ # make it possible to be aware of what blocks we're already in, and implement
37
+ # a check for whether we're still inside of those blocks on the beginning of
38
+ # every line. The rules that match the line run inside of the innermost
39
+ # parser expression, but this way they are aware of where they're nested at runtime.
40
+ #
41
+ # `continuation` MUST NOT be a `rule`, otherwise gets cached in a failure state
42
+ # and prevents nested alternatives from working
43
+ def continuation
44
+ dynamic do |_src, ctx|
45
+ # puts "parsing continuation at #{src.line_and_column} (#{src.bytepos}) with #{ctx.captures[:cont]}"
46
+ if ctx.captures.key?(:cont)
47
+ ctx.captures[:cont].ignore
48
+ else
49
+ any.present?
50
+ end
51
+ end
52
+ end
53
+
54
+ def open_block(kind, cont_rule)
55
+ dynamic do |_src, ctx|
56
+ parent_scope = ctx.captures.current.parent
57
+ ctx.captures[:cont] = cont_rule
58
+ ctx.captures[:cont] = parent_scope[:cont] >> cont_rule if parent_scope.key?(:cont)
59
+ ctx.captures[:block] = kind
60
+ # puts "starting block #{kind} at #{src.line_and_column} (#{src.bytepos}): #{ctx.captures[:cont]}"
61
+ any.present? | any.absent?
62
+ end
63
+ end
64
+
65
+ rule(:atx_ending_seq) do
66
+ whitespace.repeat(1) >>
67
+ str('#').repeat >>
68
+ whitespace.repeat >>
69
+ (line_ending.present? | any.absent?)
70
+ end
71
+
72
+ # Escaped hash - not a heading
73
+ rule(:escaped_hash) do
74
+ str('\\') >> str('#')
75
+ end
76
+
77
+ rule(:atx_heading) do
78
+ non_indent_space >>
79
+ escaped_hash.absent? >>
80
+ str('#').repeat(1, 6).as(:heading) >>
81
+ str('#').absent? >>
82
+ (
83
+ # first, check to catch the case with only one space
84
+ # (that would be consumed with the repeat(1)) until ending seq
85
+ atx_ending_seq.absent? >>
86
+ str(' ').repeat(1) >>
87
+ (
88
+ atx_ending_seq.absent? >> line_char
89
+ ).repeat(1).as(:text)
90
+ ).maybe >>
91
+ atx_ending_seq.maybe >>
92
+ line_ending_or_eof
93
+ end
94
+
95
+ def thematic_break_char(c)
96
+ (str(c) >> whitespace.repeat).repeat(3)
97
+ end
98
+
99
+ rule(:thematic_break) do
100
+ non_indent_space >>
101
+ (
102
+ thematic_break_char('-') | thematic_break_char('_') | thematic_break_char('*')
103
+ ).output(hr: true) >>
104
+ line_ending_or_eof
105
+ end
106
+
107
+ rule(:indented_code_line) do
108
+ str(' ') >> line_verbatim
109
+ end
110
+
111
+ rule(:indented_code_blank_line) do
112
+ blank_line_verbatim.output(ln: '') >>
113
+ (
114
+ continuation >>
115
+ (str(' ') | blank_line_verbatim)
116
+ ).present?
117
+ end
118
+
119
+ rule(:indented_code_block) do
120
+ (
121
+ indented_code_line >>
122
+ (
123
+ continuation >>
124
+ (indented_code_line | indented_code_blank_line)
125
+ ).repeat
126
+ ).as(:code_block)
127
+ end
128
+
129
+ def code_fence_info
130
+ # NOTE: Uses dynamic block for context-dependent fence character detection
131
+ # This handles both backtick (`) and tilde (~) fenced code blocks
132
+ dynamic do |_src, ctx|
133
+ char = line_char
134
+ char = str('`').absent? >> char if ctx.captures[:fence].to_s.chr == '`'
135
+ char.repeat(1).as(:info).maybe
136
+ end
137
+ end
138
+
139
+ rule(:code_fence_open) do
140
+ non_indent_space.capture(:fence_indent) >>
141
+ (str('`').repeat(3) | str('~').repeat(3)).capture(:fence).ignore >>
142
+ code_fence_info >>
143
+ line_ending_or_eof
144
+ end
145
+
146
+ rule(:code_fence_close) do
147
+ non_indent_space >> dynamic do |_src, ctx|
148
+ str(ctx.captures[:fence]) >>
149
+ str(ctx.captures[:fence].to_s.chr).repeat
150
+ end.ignore >> line_ending_or_eof
151
+ end
152
+
153
+ def consume_fenced_indent
154
+ dynamic do |_src, ctx|
155
+ indent = ctx.captures[:fence_indent].to_s.length
156
+ if indent.positive?
157
+ str(' ').repeat(0, indent)
158
+ else
159
+ any.present?
160
+ end
161
+ end
162
+ end
163
+
164
+ rule(:fenced_code_block) do
165
+ code_fence_open >>
166
+ (
167
+ continuation >>
168
+ code_fence_close.absent? >>
169
+ consume_fenced_indent >>
170
+ (line_verbatim | blank_line_verbatim.output(ln: ''))
171
+ ).repeat.as(:code_block) >>
172
+ (
173
+ (continuation >> code_fence_close) | continuation.absent? | any.absent?
174
+ )
175
+ end
176
+
177
+ rule(:block_quote_marker) do
178
+ non_indent_space >>
179
+ str('>') >>
180
+ str(' ').maybe
181
+ end
182
+
183
+ # This implements laziness, which is context-sensitive:
184
+ # "only applies to lines that would have been continuations of
185
+ # paragraphs had they been prepended with block quote markers"
186
+ # means we *actually* must be inside of a continueable paragraph.
187
+ #
188
+ # Cannot be a `rule` as usual with `dynamic`.
189
+ def block_quote_cont
190
+ dynamic do |_src, ctx|
191
+ # puts "BQDYN in #{ctx.captures[:block]}"
192
+ block_quote_marker | if ctx.captures[:block] == :paragraph
193
+ paragraph_interrupt.absent? >> paragraph_continued_line.present?
194
+ else
195
+ any.absent? >> any.present? # never match
196
+ end
197
+ end
198
+ end
199
+
200
+ rule(:block_quote) do
201
+ block_quote_marker >> scope do
202
+ open_block(:block_quote, block_quote_cont) >>
203
+ (
204
+ (block | any.absent?.output('')) >>
205
+ (
206
+ continuation >>
207
+ (block | any.absent?.output(''))
208
+ ).repeat
209
+ ).as(:block_quote)
210
+ end
211
+ end
212
+
213
+ # IAL that appears on its own line (applies to next block or as ALD)
214
+ rule(:ial_block) do
215
+ whitespace.repeat(0, 3) >> (ial | ald) >> line_ending_or_eof
216
+ end
217
+
218
+ rule(:paragraph_interrupt) do
219
+ blank_line | atx_heading | thematic_break |
220
+ code_fence_open | block_quote | ial_block | extension |
221
+ unordered_list_marker | ordered_list_marker | definition_marker
222
+ end
223
+
224
+ rule(:paragraph_line) do
225
+ line_char.repeat(1).as(:ln) >> any.absent? | line_char.repeat.as(:ln) >> line_ending
226
+ end
227
+
228
+ rule(:paragraph_continued_line) do
229
+ whitespace.repeat.ignore >>
230
+ paragraph_line
231
+ end
232
+
233
+ rule(:paragraph) do
234
+ # Tempting to not use `scope` here as `paragraph` is a leaf block,
235
+ # but laziness rules for block quotes and lists need to know
236
+ # whether we are actually in a paragraph that could be continued
237
+ non_indent_space >> scope do
238
+ open_block(:paragraph, paragraph_interrupt.absent?) >>
239
+ (
240
+ paragraph_line >>
241
+ (
242
+ continuation >>
243
+ paragraph_continued_line
244
+ ).repeat
245
+ ).as(:p)
246
+ end
247
+ end
248
+
249
+ rule(:setext_underline) do
250
+ non_indent_space >>
251
+ (
252
+ str('-').repeat(1) | str('=').repeat(1)
253
+ ).as(:heading) >>
254
+ whitespace.repeat.ignore >>
255
+ line_ending_or_eof
256
+ end
257
+
258
+ rule(:setext_heading) do
259
+ check = paragraph_interrupt.absent? >> setext_underline.absent?
260
+ check >>
261
+ non_indent_space >>
262
+ (
263
+ paragraph_line >>
264
+ (
265
+ continuation >>
266
+ check >>
267
+ paragraph_continued_line
268
+ ).repeat
269
+ ).as(:text) >>
270
+ continuation >>
271
+ setext_underline
272
+ end
273
+
274
+ # ===== KRAMDOWN EXTENSIONS =====
275
+
276
+ # Inline Attribute List (IAL): {:.class #id key="value"}
277
+ # Can appear after any block element to add attributes
278
+ rule(:ial_class) do
279
+ str('.') >> match['\\w\\-'].repeat(1)
280
+ end
281
+
282
+ rule(:ial_id) do
283
+ str('#') >> match['\\w\\-'].repeat(1)
284
+ end
285
+
286
+ rule(:ial_key_value) do
287
+ match['\\w\\-'].repeat(1) >> str('=') >>
288
+ (
289
+ str('"') >> match['^"'].repeat(0) >> str('"') |
290
+ str("'") >> match["^'"].repeat(0) >> str("'") |
291
+ match['^\\s\\}'].repeat(1)
292
+ )
293
+ end
294
+
295
+ rule(:ial_content) do
296
+ (
297
+ whitespace.repeat >>
298
+ (ial_class | ial_id | ial_key_value)
299
+ ).repeat(1)
300
+ end
301
+
302
+ rule(:ial) do
303
+ str('{:') >> ial_content.as(:ial) >> str('}')
304
+ end
305
+
306
+ # Attribute List Definition (ALD): {:name: #id .class key="value"}
307
+ # Defines a named attribute list that can be referenced
308
+ rule(:ald_name) do
309
+ match['\\w'].repeat(1) >> str(':')
310
+ end
311
+
312
+ rule(:ald) do
313
+ str('{:') >> ald_name.as(:ald_name) >> whitespace.repeat(1) >> ial_content.as(:ial) >> str('}')
314
+ end
315
+
316
+ # Block-level extension: {::extension_name options /}
317
+ # Common extensions: {::toc}, {::options ... /}
318
+ rule(:extension_name) do
319
+ match['a-z'].repeat(1)
320
+ end
321
+
322
+ rule(:extension_option) do
323
+ match['\\w\\-'].repeat(1) >> str('=') >>
324
+ (
325
+ str('"') >> match['^"'].repeat(0) >> str('"') |
326
+ str("'") >> match["^'"].repeat(0) >> str("'") |
327
+ match['^\\s/\\}'].repeat(1)
328
+ )
329
+ end
330
+
331
+ rule(:extension_options) do
332
+ (whitespace.repeat(1) >> extension_option).repeat
333
+ end
334
+
335
+ rule(:extension_self_closing) do
336
+ str('{::') >> extension_name.as(:ext_name) >> extension_options.as(:ext_options) >> whitespace.repeat >> str('/}')
337
+ end
338
+
339
+ rule(:extension_with_body) do
340
+ str('{::') >> extension_name.as(:ext_name) >> extension_options.as(:ext_options) >> str('}') >>
341
+ (str('{:/').absent? >> any).repeat.as(:ext_body) >>
342
+ str('{:/}')
343
+ end
344
+
345
+ rule(:extension) do
346
+ (extension_self_closing | extension_with_body).as(:extension)
347
+ end
348
+
349
+ # Block math: $$...$$ on its own line(s)
350
+ rule(:block_math) do
351
+ str('$$') >> line_ending >>
352
+ (str('$$').absent? >> any).repeat.as(:math_content) >>
353
+ str('$$')
354
+ end
355
+
356
+ # ===== GFM TABLE PARSING RULES =====
357
+
358
+ # Table cell: any characters except | and newline
359
+ rule(:table_cell) do
360
+ (str('|').absent? >> line_char).repeat.as(:cell)
361
+ end
362
+
363
+ # Table row: handles both | cell | cell | and cell | cell formats
364
+ # Pattern: optional leading pipe, then (cell pipe)+ cell, optional trailing pipe
365
+ # Or: cell | cell without any leading/trailing pipes
366
+ rule(:table_row) do
367
+ # Format with leading pipe: | cell | cell | or | cell | cell
368
+ (str('|') >> whitespace.maybe >>
369
+ (
370
+ table_cell >>
371
+ whitespace.maybe >>
372
+ str('|') >>
373
+ whitespace.maybe
374
+ ).repeat(1).as(:row)) |
375
+ # Format without leading pipe: cell | cell | or cell | cell
376
+ (table_cell >>
377
+ whitespace.maybe >>
378
+ str('|') >>
379
+ whitespace.maybe >>
380
+ (
381
+ table_cell >>
382
+ whitespace.maybe >>
383
+ str('|') >>
384
+ whitespace.maybe
385
+ ).repeat.as(:row_rest) >>
386
+ table_cell.maybe.as(:last_cell)).as(:row)
387
+ end
388
+
389
+ # Table separator cell: dashes with optional colons
390
+ rule(:table_separator_cell) do
391
+ str(':').maybe >>
392
+ str('-').repeat(1) >>
393
+ str(':').maybe
394
+ end
395
+
396
+ # Separator row: handles both |---|---| and ---|---| formats
397
+ rule(:table_separator_row) do
398
+ # Format with leading pipe: |---|---| or |---|---|
399
+ (str('|') >> whitespace.maybe >>
400
+ (
401
+ table_separator_cell.as(:sep) >>
402
+ whitespace.maybe >>
403
+ str('|') >>
404
+ whitespace.maybe
405
+ ).repeat(1)) |
406
+ # Format without leading pipe: ---|---| or ---|---|
407
+ (table_separator_cell.as(:sep) >>
408
+ whitespace.maybe >>
409
+ str('|') >>
410
+ whitespace.maybe >>
411
+ (
412
+ table_separator_cell.as(:sep) >>
413
+ whitespace.maybe >>
414
+ str('|') >>
415
+ whitespace.maybe
416
+ ).repeat >>
417
+ table_separator_cell.as(:sep).maybe)
418
+ end
419
+
420
+ # GFM Table: header row, separator row, body rows
421
+ rule(:table) do
422
+ table_row.as(:table_header) >>
423
+ line_ending >>
424
+ table_separator_row.as(:table_separator) >>
425
+ line_ending >>
426
+ (
427
+ table_row.as(:table_body_row) >> line_ending
428
+ ).repeat(1).as(:table_body)
429
+ end
430
+
431
+ rule(:block) do
432
+ blank_line | eob_marker | atx_heading | thematic_break |
433
+ indented_code_block | fenced_code_block |
434
+ block_quote | setext_heading |
435
+ unordered_list | ordered_list | definition_list |
436
+ footnote_definition | abbreviation_definition |
437
+ ial_block | extension | block_math |
438
+ table | paragraph
439
+ end
440
+
441
+ # ===== LIST PARSING RULES =====
442
+
443
+ # List interrupt - blocks that can interrupt a list
444
+ rule(:list_interrupt) do
445
+ blank_line.repeat(1) | atx_heading | thematic_break |
446
+ code_fence_open | block_quote |
447
+ unordered_list_marker | ordered_list_marker
448
+ end
449
+
450
+ # Unordered list marker: -, *, or + followed by 1+ spaces
451
+ rule(:unordered_list_marker) do
452
+ non_indent_space >>
453
+ match['-*+'] >>
454
+ str(' ').repeat(1)
455
+ end
456
+
457
+ # Ordered list marker: 1-9 digits followed by . or ) and 1+ spaces
458
+ rule(:ordered_list_marker) do
459
+ non_indent_space >>
460
+ match['1-9'] >>
461
+ match['0-9'].repeat >>
462
+ match['\\.)'] >>
463
+ str(' ').repeat(1)
464
+ end
465
+
466
+ # List item continuation line (indented content that's not a block)
467
+ # Excludes lines that look like nested list markers
468
+ rule(:list_continuation_line) do
469
+ (str(' ') | str("\t")) >>
470
+ nested_list_marker.absent? >>
471
+ line_verbatim |
472
+ nested_list_marker.absent? >>
473
+ line_verbatim
474
+ end
475
+
476
+ # Nested list marker detection (for 4-space indented lists)
477
+ rule(:nested_list_marker) do
478
+ (str(' ') | str("\t")) >>
479
+ (
480
+ (match['-*+'] >> str(' ').repeat(1)) |
481
+ (match['1-9'] >> match['0-9'].repeat >> match['\\.)'] >> str(' ').repeat(1))
482
+ )
483
+ end
484
+
485
+ # Thematic break as list item content (e.g., "- * * *")
486
+ rule(:thematic_break_in_list) do
487
+ (
488
+ (str('*') >> whitespace.repeat >> str('*') >> whitespace.repeat >> str('*')) |
489
+ (str('-') >> whitespace.repeat >> str('-') >> whitespace.repeat >> str('-')) |
490
+ (str('_') >> whitespace.repeat >> str('_') >> whitespace.repeat >> str('_'))
491
+ ) >> whitespace.repeat >> line_ending_or_eof
492
+ end
493
+
494
+ # Unordered list item with content
495
+ # Can contain thematic break or paragraph with continuation lines
496
+ rule(:unordered_list_item) do
497
+ unordered_list_marker.capture(:list_marker) >>
498
+ (
499
+ thematic_break_in_list.output(hr: true).as(:li) |
500
+ list_item_content.as(:li)
501
+ )
502
+ end
503
+
504
+ # List item content - paragraph first, then optional nested blocks
505
+ rule(:list_item_content) do
506
+ list_item_paragraph >>
507
+ (
508
+ continuation >>
509
+ list_interrupt.absent? >>
510
+ nested_block
511
+ ).repeat
512
+ end
513
+
514
+ # Nested block (indented list, etc.)
515
+ rule(:nested_block) do
516
+ (str(' ') | str("\t")) >> nested_unordered_list |
517
+ (str(' ') | str("\t")) >> nested_ordered_list
518
+ end
519
+
520
+ # Nested unordered list (4-space indented)
521
+ rule(:nested_unordered_list) do
522
+ (
523
+ nested_unordered_list_item >>
524
+ (
525
+ continuation >>
526
+ (str(' ') | str("\t")) >>
527
+ nested_unordered_list_item
528
+ ).repeat
529
+ ).as(:ul)
530
+ end
531
+
532
+ # Nested unordered list item (simpler format - just text content)
533
+ rule(:nested_unordered_list_item) do
534
+ unordered_list_marker >> line_verbatim
535
+ end
536
+
537
+ # Nested ordered list (4-space indented)
538
+ rule(:nested_ordered_list) do
539
+ (
540
+ nested_ordered_list_item >>
541
+ (
542
+ continuation >>
543
+ (str(' ') | str("\t")) >>
544
+ nested_ordered_list_item
545
+ ).repeat
546
+ ).as(:ol)
547
+ end
548
+
549
+ # Nested unordered list item (simpler format - just text content)
550
+ rule(:nested_unordered_list_item) do
551
+ unordered_list_marker >> line_text.as(:li)
552
+ end
553
+
554
+ # Line text without the :ln wrapper
555
+ rule(:line_text) do
556
+ line_char.repeat(1) >> line_ending_or_eof
557
+ end
558
+
559
+ # Nested ordered list item (simpler format - just text content)
560
+ rule(:nested_ordered_list_item) do
561
+ ordered_list_marker >> line_text.as(:li)
562
+ end
563
+
564
+ # List item paragraph - first line plus any continuation lines
565
+ rule(:list_item_paragraph) do
566
+ (
567
+ line_verbatim >>
568
+ (
569
+ continuation >>
570
+ list_interrupt.absent? >>
571
+ nested_list_marker.absent? >>
572
+ list_continuation_line
573
+ ).repeat
574
+ ).as(:p)
575
+ end
576
+
577
+ # Ordered list item with content (wraps content in p structure)
578
+ rule(:ordered_list_item) do
579
+ ordered_list_marker >>
580
+ list_item_content.as(:li)
581
+ end
582
+
583
+ # Unordered list: sequence of items (thematic break interrupts)
584
+ rule(:unordered_list) do
585
+ (
586
+ unordered_list_item >>
587
+ (
588
+ continuation >>
589
+ thematic_break.absent? >>
590
+ blank_line.maybe >>
591
+ unordered_list_item
592
+ ).repeat
593
+ ).as(:ul)
594
+ end
595
+
596
+ # Ordered list: sequence of numbered items (thematic break interrupts)
597
+ rule(:ordered_list) do
598
+ (
599
+ ordered_list_item >>
600
+ (
601
+ continuation >>
602
+ thematic_break.absent? >>
603
+ blank_line.maybe >>
604
+ ordered_list_item
605
+ ).repeat
606
+ ).as(:ol)
607
+ end
608
+
609
+ # ===== KRAMDOWN DEFINITION LIST PARSING RULES =====
610
+
611
+ # Definition list marker: colon followed by space
612
+ rule(:definition_marker) do
613
+ non_indent_space >>
614
+ str(':') >>
615
+ str(' ').repeat(1)
616
+ end
617
+
618
+ # Definition term: line(s) not starting with colon
619
+ # Can span multiple lines if next line doesn't start with :
620
+ rule(:definition_term_line) do
621
+ non_indent_space >>
622
+ str(':').absent? >>
623
+ line_verbatim
624
+ end
625
+
626
+ # Definition term with continuation
627
+ rule(:definition_term) do
628
+ (
629
+ definition_term_line >>
630
+ (
631
+ continuation >>
632
+ definition_marker.absent? >>
633
+ blank_line.absent? >>
634
+ definition_term_line
635
+ ).repeat
636
+ ).as(:def_term)
637
+ end
638
+
639
+ # Definition item content (after the :)
640
+ rule(:definition_content) do
641
+ (
642
+ line_verbatim >>
643
+ (
644
+ continuation >>
645
+ definition_marker.absent? >>
646
+ blank_line.absent? >>
647
+ (str(' ') | str("\t")).maybe >>
648
+ line_verbatim
649
+ ).repeat
650
+ ).as(:def_content)
651
+ end
652
+
653
+ # Definition item: : followed by content
654
+ rule(:definition_item) do
655
+ definition_marker >> definition_content
656
+ end
657
+
658
+ # Definition list item: term followed by one or more definitions
659
+ rule(:definition_list_item) do
660
+ definition_term >>
661
+ (
662
+ continuation >>
663
+ definition_item
664
+ ).repeat(1)
665
+ end
666
+
667
+ # Definition list: sequence of term+definition groups
668
+ rule(:definition_list) do
669
+ (
670
+ definition_list_item >>
671
+ (
672
+ continuation >>
673
+ blank_line.maybe >>
674
+ definition_list_item
675
+ ).repeat
676
+ ).as(:dl)
677
+ end
678
+
679
+ # ===== KRAMDOWN FOOTNOTE PARSING RULES =====
680
+
681
+ # Footnote definition: [^name]: content
682
+ rule(:footnote_id) do
683
+ str('[^') >> match['^\]'].repeat(1).as(:fn_id) >> str(']')
684
+ end
685
+
686
+ rule(:footnote_definition) do
687
+ non_indent_space >>
688
+ footnote_id >>
689
+ str(':') >>
690
+ whitespace.repeat >>
691
+ line_verbatim.as(:fn_content) >>
692
+ (
693
+ continuation >>
694
+ (str(' ') | str("\t")).repeat(1, 4) >>
695
+ line_verbatim
696
+ ).repeat.as(:fn_content_continued)
697
+ end
698
+
699
+ # ===== KRAMDOWN ABBREVIATION PARSING RULES =====
700
+
701
+ # Abbreviation definition: *[TERM]: definition
702
+ rule(:abbreviation_term) do
703
+ str('*[') >> match['^\]'].repeat(1).as(:abbr_term) >> str(']')
704
+ end
705
+
706
+ rule(:abbreviation_definition) do
707
+ non_indent_space >>
708
+ abbreviation_term >>
709
+ str(':') >>
710
+ whitespace.repeat >>
711
+ line_char.repeat.as(:abbr_def) >>
712
+ line_ending_or_eof
713
+ end
714
+
715
+ # ===== KRAMDOWN EOB (End of Block) MARKER =====
716
+
717
+ # EOB marker: ^ on its own line (terminates blocks explicitly)
718
+ rule(:eob_marker) do
719
+ whitespace.repeat >> str('^') >> whitespace.repeat >> line_ending_or_eof
720
+ end
721
+
722
+ root :document
723
+ rule(:document) do
724
+ block.repeat
725
+ end
726
+
727
+ def self.parse(filename)
728
+ content = File.read(filename)
729
+ new.parse(content)
730
+ rescue Parslet::ParseFailed => e
731
+ puts e.parse_failure_cause.ascii_tree
732
+ end
733
+
734
+ # Parse with AST post-processing (escape sequences, etc.)
735
+ def self.parse_with_processing(content)
736
+ ast = new.parse(content)
737
+ AstProcessor.process(ast)
738
+ rescue Parslet::ParseFailed => e
739
+ puts e.parse_failure_cause.ascii_tree
740
+ nil
741
+ end
742
+ end
743
+ end
744
+ end
745
+ end