coradoc-adoc 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/lib/coradoc/asciidoc/model/admonition.rb +37 -0
  4. data/lib/coradoc/asciidoc/model/anchorable.rb +64 -0
  5. data/lib/coradoc/asciidoc/model/attached.rb +26 -0
  6. data/lib/coradoc/asciidoc/model/attribute.rb +22 -0
  7. data/lib/coradoc/asciidoc/model/attribute_list/matchers.rb +45 -0
  8. data/lib/coradoc/asciidoc/model/attribute_list.rb +230 -0
  9. data/lib/coradoc/asciidoc/model/attribute_list_attribute.rb +11 -0
  10. data/lib/coradoc/asciidoc/model/audio.rb +44 -0
  11. data/lib/coradoc/asciidoc/model/author.rb +36 -0
  12. data/lib/coradoc/asciidoc/model/base.rb +141 -0
  13. data/lib/coradoc/asciidoc/model/bibliography.rb +37 -0
  14. data/lib/coradoc/asciidoc/model/bibliography_entry.rb +38 -0
  15. data/lib/coradoc/asciidoc/model/block/core.rb +139 -0
  16. data/lib/coradoc/asciidoc/model/block/example.rb +14 -0
  17. data/lib/coradoc/asciidoc/model/block/listing.rb +14 -0
  18. data/lib/coradoc/asciidoc/model/block/literal.rb +14 -0
  19. data/lib/coradoc/asciidoc/model/block/open.rb +14 -0
  20. data/lib/coradoc/asciidoc/model/block/pass.rb +14 -0
  21. data/lib/coradoc/asciidoc/model/block/quote.rb +14 -0
  22. data/lib/coradoc/asciidoc/model/block/reviewer_comment.rb +14 -0
  23. data/lib/coradoc/asciidoc/model/block/side.rb +14 -0
  24. data/lib/coradoc/asciidoc/model/block/source_code.rb +14 -0
  25. data/lib/coradoc/asciidoc/model/block.rb +21 -0
  26. data/lib/coradoc/asciidoc/model/break.rb +33 -0
  27. data/lib/coradoc/asciidoc/model/comment_block.rb +33 -0
  28. data/lib/coradoc/asciidoc/model/comment_line.rb +30 -0
  29. data/lib/coradoc/asciidoc/model/content_list.rb +334 -0
  30. data/lib/coradoc/asciidoc/model/document.rb +197 -0
  31. data/lib/coradoc/asciidoc/model/document_attributes.rb +43 -0
  32. data/lib/coradoc/asciidoc/model/glossaries.rb +11 -0
  33. data/lib/coradoc/asciidoc/model/header.rb +57 -0
  34. data/lib/coradoc/asciidoc/model/highlight.rb +11 -0
  35. data/lib/coradoc/asciidoc/model/image/block_image/attribute_list.rb +23 -0
  36. data/lib/coradoc/asciidoc/model/image/block_image.rb +25 -0
  37. data/lib/coradoc/asciidoc/model/image/core/attribute_list.rb +43 -0
  38. data/lib/coradoc/asciidoc/model/image/core.rb +72 -0
  39. data/lib/coradoc/asciidoc/model/image/inline_image.rb +17 -0
  40. data/lib/coradoc/asciidoc/model/image.rb +14 -0
  41. data/lib/coradoc/asciidoc/model/include.rb +66 -0
  42. data/lib/coradoc/asciidoc/model/inline/anchor.rb +41 -0
  43. data/lib/coradoc/asciidoc/model/inline/attribute_reference.rb +25 -0
  44. data/lib/coradoc/asciidoc/model/inline/base.rb +15 -0
  45. data/lib/coradoc/asciidoc/model/inline/bold.rb +38 -0
  46. data/lib/coradoc/asciidoc/model/inline/cross_reference.rb +29 -0
  47. data/lib/coradoc/asciidoc/model/inline/cross_reference_arg.rb +15 -0
  48. data/lib/coradoc/asciidoc/model/inline/footnote.rb +34 -0
  49. data/lib/coradoc/asciidoc/model/inline/hard_line_break.rb +24 -0
  50. data/lib/coradoc/asciidoc/model/inline/highlight.rb +36 -0
  51. data/lib/coradoc/asciidoc/model/inline/italic.rb +38 -0
  52. data/lib/coradoc/asciidoc/model/inline/link.rb +46 -0
  53. data/lib/coradoc/asciidoc/model/inline/monospace.rb +39 -0
  54. data/lib/coradoc/asciidoc/model/inline/quotation.rb +25 -0
  55. data/lib/coradoc/asciidoc/model/inline/small.rb +25 -0
  56. data/lib/coradoc/asciidoc/model/inline/span.rb +38 -0
  57. data/lib/coradoc/asciidoc/model/inline/stem.rb +24 -0
  58. data/lib/coradoc/asciidoc/model/inline/strikethrough.rb +39 -0
  59. data/lib/coradoc/asciidoc/model/inline/subscript.rb +33 -0
  60. data/lib/coradoc/asciidoc/model/inline/superscript.rb +33 -0
  61. data/lib/coradoc/asciidoc/model/inline/underline.rb +25 -0
  62. data/lib/coradoc/asciidoc/model/inline.rb +31 -0
  63. data/lib/coradoc/asciidoc/model/line_break.rb +11 -0
  64. data/lib/coradoc/asciidoc/model/list/core.rb +61 -0
  65. data/lib/coradoc/asciidoc/model/list/definition.rb +27 -0
  66. data/lib/coradoc/asciidoc/model/list/definition_item.rb +43 -0
  67. data/lib/coradoc/asciidoc/model/list/item.rb +72 -0
  68. data/lib/coradoc/asciidoc/model/list/nestable.rb +14 -0
  69. data/lib/coradoc/asciidoc/model/list/ordered.rb +34 -0
  70. data/lib/coradoc/asciidoc/model/list/unordered.rb +34 -0
  71. data/lib/coradoc/asciidoc/model/list.rb +29 -0
  72. data/lib/coradoc/asciidoc/model/named_attribute.rb +12 -0
  73. data/lib/coradoc/asciidoc/model/paragraph.rb +59 -0
  74. data/lib/coradoc/asciidoc/model/rejected_positional_attribute.rb +12 -0
  75. data/lib/coradoc/asciidoc/model/resolvable.rb +71 -0
  76. data/lib/coradoc/asciidoc/model/resolver.rb +430 -0
  77. data/lib/coradoc/asciidoc/model/reviewer_note.rb +54 -0
  78. data/lib/coradoc/asciidoc/model/revision.rb +47 -0
  79. data/lib/coradoc/asciidoc/model/section.rb +109 -0
  80. data/lib/coradoc/asciidoc/model/serialization/asciidoc_adapter.rb +28 -0
  81. data/lib/coradoc/asciidoc/model/serialization/asciidoc_mapping.rb +42 -0
  82. data/lib/coradoc/asciidoc/model/serialization/asciidoc_mapping_rule.rb +41 -0
  83. data/lib/coradoc/asciidoc/model/serialization/asciidoc_transform.rb +211 -0
  84. data/lib/coradoc/asciidoc/model/serialization/errors.rb +57 -0
  85. data/lib/coradoc/asciidoc/model/serialization.rb +39 -0
  86. data/lib/coradoc/asciidoc/model/spacing.rb +282 -0
  87. data/lib/coradoc/asciidoc/model/table.rb +44 -0
  88. data/lib/coradoc/asciidoc/model/table_cell.rb +122 -0
  89. data/lib/coradoc/asciidoc/model/table_row.rb +26 -0
  90. data/lib/coradoc/asciidoc/model/tag.rb +36 -0
  91. data/lib/coradoc/asciidoc/model/term.rb +48 -0
  92. data/lib/coradoc/asciidoc/model/text_element.rb +66 -0
  93. data/lib/coradoc/asciidoc/model/title.rb +85 -0
  94. data/lib/coradoc/asciidoc/model/video/attribute_list.rb +43 -0
  95. data/lib/coradoc/asciidoc/model/video.rb +49 -0
  96. data/lib/coradoc/asciidoc/model.rb +75 -0
  97. data/lib/coradoc/asciidoc/parse_error.rb +161 -0
  98. data/lib/coradoc/asciidoc/parser/admonition.rb +26 -0
  99. data/lib/coradoc/asciidoc/parser/attribute_list.rb +110 -0
  100. data/lib/coradoc/asciidoc/parser/base.rb +159 -0
  101. data/lib/coradoc/asciidoc/parser/bibliography.rb +31 -0
  102. data/lib/coradoc/asciidoc/parser/block.rb +186 -0
  103. data/lib/coradoc/asciidoc/parser/block_assembler.rb +183 -0
  104. data/lib/coradoc/asciidoc/parser/cache.rb +155 -0
  105. data/lib/coradoc/asciidoc/parser/citation.rb +32 -0
  106. data/lib/coradoc/asciidoc/parser/content.rb +76 -0
  107. data/lib/coradoc/asciidoc/parser/document_attributes.rb +27 -0
  108. data/lib/coradoc/asciidoc/parser/fix_files.rb +76 -0
  109. data/lib/coradoc/asciidoc/parser/header.rb +31 -0
  110. data/lib/coradoc/asciidoc/parser/inline.rb +199 -0
  111. data/lib/coradoc/asciidoc/parser/list.rb +130 -0
  112. data/lib/coradoc/asciidoc/parser/metadata_detector.rb +164 -0
  113. data/lib/coradoc/asciidoc/parser/paragraph.rb +64 -0
  114. data/lib/coradoc/asciidoc/parser/section.rb +62 -0
  115. data/lib/coradoc/asciidoc/parser/stem.rb +19 -0
  116. data/lib/coradoc/asciidoc/parser/table.rb +166 -0
  117. data/lib/coradoc/asciidoc/parser/term.rb +70 -0
  118. data/lib/coradoc/asciidoc/parser/text.rb +156 -0
  119. data/lib/coradoc/asciidoc/parser.rb +10 -0
  120. data/lib/coradoc/asciidoc/serializer/adoc_serializer.rb +86 -0
  121. data/lib/coradoc/asciidoc/serializer/element_registry.rb +95 -0
  122. data/lib/coradoc/asciidoc/serializer/fallback_serializer.rb +21 -0
  123. data/lib/coradoc/asciidoc/serializer/formatter.rb +144 -0
  124. data/lib/coradoc/asciidoc/serializer/registrations.rb +108 -0
  125. data/lib/coradoc/asciidoc/serializer/serialization_context.rb +238 -0
  126. data/lib/coradoc/asciidoc/serializer/serializers/admonition.rb +19 -0
  127. data/lib/coradoc/asciidoc/serializer/serializers/attribute.rb +23 -0
  128. data/lib/coradoc/asciidoc/serializer/serializers/attribute_list.rb +40 -0
  129. data/lib/coradoc/asciidoc/serializer/serializers/attribute_list_attribute.rb +18 -0
  130. data/lib/coradoc/asciidoc/serializer/serializers/audio.rb +33 -0
  131. data/lib/coradoc/asciidoc/serializer/serializers/author.rb +20 -0
  132. data/lib/coradoc/asciidoc/serializer/serializers/base.rb +152 -0
  133. data/lib/coradoc/asciidoc/serializer/serializers/bibliography.rb +35 -0
  134. data/lib/coradoc/asciidoc/serializer/serializers/bibliography_entry.rb +24 -0
  135. data/lib/coradoc/asciidoc/serializer/serializers/block/core.rb +70 -0
  136. data/lib/coradoc/asciidoc/serializer/serializers/block/example.rb +17 -0
  137. data/lib/coradoc/asciidoc/serializer/serializers/block/listing.rb +22 -0
  138. data/lib/coradoc/asciidoc/serializer/serializers/block/literal.rb +17 -0
  139. data/lib/coradoc/asciidoc/serializer/serializers/block/open.rb +22 -0
  140. data/lib/coradoc/asciidoc/serializer/serializers/block/pass.rb +17 -0
  141. data/lib/coradoc/asciidoc/serializer/serializers/block/quote.rb +17 -0
  142. data/lib/coradoc/asciidoc/serializer/serializers/block/reviewer_comment.rb +17 -0
  143. data/lib/coradoc/asciidoc/serializer/serializers/block/side.rb +22 -0
  144. data/lib/coradoc/asciidoc/serializer/serializers/block/source_code.rb +22 -0
  145. data/lib/coradoc/asciidoc/serializer/serializers/block.rb +23 -0
  146. data/lib/coradoc/asciidoc/serializer/serializers/break.rb +18 -0
  147. data/lib/coradoc/asciidoc/serializer/serializers/comment_block.rb +22 -0
  148. data/lib/coradoc/asciidoc/serializer/serializers/comment_line.rb +22 -0
  149. data/lib/coradoc/asciidoc/serializer/serializers/document.rb +65 -0
  150. data/lib/coradoc/asciidoc/serializer/serializers/document_attributes.rb +21 -0
  151. data/lib/coradoc/asciidoc/serializer/serializers/header.rb +24 -0
  152. data/lib/coradoc/asciidoc/serializer/serializers/highlight.rb +23 -0
  153. data/lib/coradoc/asciidoc/serializer/serializers/image/core.rb +30 -0
  154. data/lib/coradoc/asciidoc/serializer/serializers/image.rb +14 -0
  155. data/lib/coradoc/asciidoc/serializer/serializers/include.rb +19 -0
  156. data/lib/coradoc/asciidoc/serializer/serializers/inline/anchor.rb +20 -0
  157. data/lib/coradoc/asciidoc/serializer/serializers/inline/attribute_reference.rb +20 -0
  158. data/lib/coradoc/asciidoc/serializer/serializers/inline/bold.rb +26 -0
  159. data/lib/coradoc/asciidoc/serializer/serializers/inline/cross_reference.rb +30 -0
  160. data/lib/coradoc/asciidoc/serializer/serializers/inline/cross_reference_arg.rb +20 -0
  161. data/lib/coradoc/asciidoc/serializer/serializers/inline/footnote.rb +24 -0
  162. data/lib/coradoc/asciidoc/serializer/serializers/inline/hard_line_break.rb +20 -0
  163. data/lib/coradoc/asciidoc/serializer/serializers/inline/highlight.rb +26 -0
  164. data/lib/coradoc/asciidoc/serializer/serializers/inline/italic.rb +26 -0
  165. data/lib/coradoc/asciidoc/serializer/serializers/inline/link.rb +38 -0
  166. data/lib/coradoc/asciidoc/serializer/serializers/inline/monospace.rb +26 -0
  167. data/lib/coradoc/asciidoc/serializer/serializers/inline/quotation.rb +21 -0
  168. data/lib/coradoc/asciidoc/serializer/serializers/inline/small.rb +20 -0
  169. data/lib/coradoc/asciidoc/serializer/serializers/inline/span.rb +35 -0
  170. data/lib/coradoc/asciidoc/serializer/serializers/inline/stem.rb +23 -0
  171. data/lib/coradoc/asciidoc/serializer/serializers/inline/strikethrough.rb +29 -0
  172. data/lib/coradoc/asciidoc/serializer/serializers/inline/subscript.rb +29 -0
  173. data/lib/coradoc/asciidoc/serializer/serializers/inline/superscript.rb +26 -0
  174. data/lib/coradoc/asciidoc/serializer/serializers/inline/underline.rb +20 -0
  175. data/lib/coradoc/asciidoc/serializer/serializers/inline.rb +32 -0
  176. data/lib/coradoc/asciidoc/serializer/serializers/line_break.rb +18 -0
  177. data/lib/coradoc/asciidoc/serializer/serializers/list/core.rb +47 -0
  178. data/lib/coradoc/asciidoc/serializer/serializers/list/definition.rb +35 -0
  179. data/lib/coradoc/asciidoc/serializer/serializers/list/definition_item.rb +38 -0
  180. data/lib/coradoc/asciidoc/serializer/serializers/list/item.rb +120 -0
  181. data/lib/coradoc/asciidoc/serializer/serializers/list/ordered.rb +24 -0
  182. data/lib/coradoc/asciidoc/serializer/serializers/list/unordered.rb +29 -0
  183. data/lib/coradoc/asciidoc/serializer/serializers/list.rb +19 -0
  184. data/lib/coradoc/asciidoc/serializer/serializers/named_attribute.rb +22 -0
  185. data/lib/coradoc/asciidoc/serializer/serializers/paragraph.rb +65 -0
  186. data/lib/coradoc/asciidoc/serializer/serializers/reviewer_note.rb +28 -0
  187. data/lib/coradoc/asciidoc/serializer/serializers/revision.rb +26 -0
  188. data/lib/coradoc/asciidoc/serializer/serializers/section.rb +37 -0
  189. data/lib/coradoc/asciidoc/serializer/serializers/table.rb +24 -0
  190. data/lib/coradoc/asciidoc/serializer/serializers/table_cell.rb +75 -0
  191. data/lib/coradoc/asciidoc/serializer/serializers/table_row.rb +24 -0
  192. data/lib/coradoc/asciidoc/serializer/serializers/tag.rb +19 -0
  193. data/lib/coradoc/asciidoc/serializer/serializers/term.rb +20 -0
  194. data/lib/coradoc/asciidoc/serializer/serializers/text_element.rb +23 -0
  195. data/lib/coradoc/asciidoc/serializer/serializers/title.rb +55 -0
  196. data/lib/coradoc/asciidoc/serializer/serializers/video.rb +33 -0
  197. data/lib/coradoc/asciidoc/serializer/spacing_strategy.rb +70 -0
  198. data/lib/coradoc/asciidoc/serializer.rb +75 -0
  199. data/lib/coradoc/asciidoc/transform/from_core_model.rb +502 -0
  200. data/lib/coradoc/asciidoc/transform/from_core_model_registrations.rb +126 -0
  201. data/lib/coradoc/asciidoc/transform/registry.rb +146 -0
  202. data/lib/coradoc/asciidoc/transform/to_core_model.rb +564 -0
  203. data/lib/coradoc/asciidoc/transform/to_core_model_registrations.rb +257 -0
  204. data/lib/coradoc/asciidoc/transform.rb +13 -0
  205. data/lib/coradoc/asciidoc/transformer/block_rules.rb +101 -0
  206. data/lib/coradoc/asciidoc/transformer/header_rules.rb +91 -0
  207. data/lib/coradoc/asciidoc/transformer/inline_rules.rb +179 -0
  208. data/lib/coradoc/asciidoc/transformer/list_rules.rb +131 -0
  209. data/lib/coradoc/asciidoc/transformer/misc_rules.rb +196 -0
  210. data/lib/coradoc/asciidoc/transformer/structural_rules.rb +216 -0
  211. data/lib/coradoc/asciidoc/transformer/text_rules.rb +107 -0
  212. data/lib/coradoc/asciidoc/transformer.rb +406 -0
  213. data/lib/coradoc/asciidoc/version.rb +7 -0
  214. data/lib/coradoc/asciidoc.rb +148 -0
  215. data/lib/coradoc/util/asciidoc.rb +71 -0
  216. data/lib/coradoc/util.rb +8 -0
  217. metadata +343 -0
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ # $DEBUG = true
4
+ module Coradoc
5
+ module AsciiDoc
6
+ module Parser
7
+ module List
8
+ def list(nesting_level = 1)
9
+ (
10
+ unordered_list(nesting_level) |
11
+ ordered_list(nesting_level) |
12
+ definition_list
13
+ ).as(:list)
14
+ end
15
+
16
+ def list_continuation
17
+ line_start? >> str("+\n")
18
+ end
19
+
20
+ def ordered_list(nesting_level = 1)
21
+ attrs = (attribute_list >> newline).maybe
22
+ r = olist_item(nesting_level)
23
+ attrs >> olist_item(nesting_level).present? >> r.repeat(1).as(:ordered)
24
+ end
25
+
26
+ def unordered_list(nesting_level = 1)
27
+ attrs = (attribute_list >> newline).maybe
28
+ r = ulist_item(nesting_level)
29
+ attrs >> r.repeat(1).as(:unordered)
30
+ end
31
+
32
+ def definition_list(delimiter = '::')
33
+ (attribute_list >> newline).maybe >>
34
+ dlist_item(delimiter).repeat(1).as(:definition_list) >>
35
+ dlist_item(delimiter).absent?
36
+ end
37
+
38
+ def list_marker(nesting_level = 1)
39
+ olist_marker(nesting_level) | ulist_marker(nesting_level)
40
+ end
41
+
42
+ def olist_marker(nesting_level = 1)
43
+ # Don't match table cell format specs like ".2+^.^|"
44
+ # Table cells have format: [colspan][.rowspan][halign][valign][style][*]|
45
+ # If we see a format spec pattern followed by "|", it's a table cell, not a list
46
+ line_start? >>
47
+ str('.' * nesting_level) >>
48
+ str('.').absent? >>
49
+ # Don't match if followed by table cell format spec
50
+ # Pattern: digits, dots, plus, alignment chars (^<>), style letters, then |
51
+ (
52
+ (match['0-9.<>^'] | str('+')).repeat(0, 3) >> str('|')
53
+ ).absent?
54
+ end
55
+
56
+ def olist_item(nesting_level = 1)
57
+ item = olist_marker(nesting_level).as(:marker) >>
58
+ match("\n").absent? >> space >> text_line(true, unguarded: true)
59
+ # >>
60
+ # (list_continuation.present? >> list_continuation >>
61
+ # paragraph #| example_block(n_deep: 1)
62
+ # ).repeat(0).as(:attached)
63
+
64
+ att = (list_continuation.present? >>
65
+ list_continuation >>
66
+ (admonition_line | paragraph | block) # (n_deep: 1))
67
+ ).repeat(0).as(:attached)
68
+ item >>= att.maybe
69
+
70
+ if nesting_level <= 4
71
+ item >>= (list_marker(nesting_level + 1).present? >>
72
+ list(nesting_level + 1)).repeat(0).as(:nested) # ).maybe
73
+ end
74
+ olist_marker(nesting_level).present? >> item.as(:list_item)
75
+ end
76
+
77
+ def ulist_marker(nesting_level = 1)
78
+ # Don't match table delimiters like "|==="
79
+ line_start? >>
80
+ str('*' * nesting_level) >>
81
+ str('*').absent? >>
82
+ # Don't match if followed by "===" (table delimiter)
83
+ str('===').absent?
84
+ end
85
+
86
+ def ulist_item(nesting_level = 1)
87
+ item = ulist_marker(nesting_level).as(:marker) >>
88
+ str(' [[[').absent? >>
89
+ match("\n").absent? >> space >> text_line(true, unguarded: true)
90
+
91
+ att = (list_continuation.present? >>
92
+ list_continuation >>
93
+ (admonition_line | paragraph | block) # (n_deep: 1))
94
+ ).repeat(0).as(:attached)
95
+ item >>= att.maybe
96
+
97
+ if nesting_level <= 4
98
+ item >>= (list_marker(nesting_level + 1).present? >>
99
+ list(nesting_level + 1)).repeat(0).as(:nested) # ).maybe
100
+ end
101
+ ulist_marker(nesting_level).present? >> item.as(:list_item)
102
+ end
103
+
104
+ def dlist_delimiter
105
+ (str('::') | str(':::') | str('::::') | str(';;')
106
+ ).as(:delimiter)
107
+ end
108
+
109
+ def dlist_term(_delimiter)
110
+ match("[^\n:]").repeat(1) # >> empty_line.repeat(0)
111
+ .as(:dlist_term) >> dlist_delimiter
112
+ end
113
+
114
+ def dlist_definition
115
+ text # >> empty_line.repeat(0)
116
+ .as(:definition) >> line_ending >> empty_line.repeat(0)
117
+ end
118
+
119
+ def dlist_item(delimiter)
120
+ (((dlist_term(delimiter).as(:terms).repeat(1) >> line_ending >>
121
+ empty_line.repeat(0)).repeat(1) >>
122
+ dlist_definition) |
123
+ (dlist_term(delimiter).repeat(1, 1).as(:terms) >> space >>
124
+ dlist_definition)
125
+ ).as(:definition_list_item)
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ module Parser
6
+ # Single Responsibility: Detect metadata markers in AsciiDoc input
7
+ # Stateless, no dependencies on other parser rules (Dependency Inversion)
8
+ class MetadataDetector
9
+ # Metadata types (MECE - Mutually Exclusive, Collectively Exhaustive)
10
+ METADATA_TYPES = {
11
+ block_title: /^\.[^ \n].*/,
12
+ attribute_list: /^\[.+\]$/,
13
+ element_id_double: /^\[\[.+\]\]$/,
14
+ element_id_single: /^\[#.+\]$/,
15
+ block_delimiter: /^(\*{4,}|={4,}|_{4,}|\+{4,}|-{4,}|--)/
16
+ }.freeze
17
+
18
+ # Scan input and return metadata markers with positions
19
+ # @param input [String] The input text to scan
20
+ # @param max_lines [Integer] Maximum lines to scan ahead
21
+ # @return [Array<Hash>] Array of {type:, content:, line:, position:}
22
+ def self.scan(input, max_lines: 20)
23
+ lines = input.lines
24
+ metadata = []
25
+ found_delimiter = false
26
+
27
+ lines.first(max_lines).each_with_index do |line, index|
28
+ # Skip blank lines - they are not metadata
29
+ next if line.strip.empty?
30
+
31
+ # Stop scanning after finding a delimiter
32
+ # (delimiter marks the start of block content)
33
+ break if found_delimiter
34
+
35
+ # Calculate absolute position in input
36
+ position = lines[0...index].sum(&:length)
37
+
38
+ # Detect each metadata type (MECE)
39
+ METADATA_TYPES.each do |type, pattern|
40
+ next unless line.strip&.match?(pattern)
41
+
42
+ metadata << {
43
+ type: type,
44
+ content: line.strip,
45
+ line: index,
46
+ position: position,
47
+ length: line.length
48
+ }
49
+ # Mark if we found a delimiter
50
+ found_delimiter = true if type == :block_delimiter
51
+ break # Each line has at most one metadata type
52
+ end
53
+ end
54
+
55
+ metadata
56
+ end
57
+
58
+ # Detect block title (Single Responsibility)
59
+ # @param line [String] Line to check
60
+ # @return [Hash, nil] {text:} or nil
61
+ def self.detect_block_title(line)
62
+ return nil unless /^\.[^ \n]/.match?(line)
63
+
64
+ # Extract title text (everything after '.')
65
+ text = line.sub(/^\./, '').strip
66
+ { text: text }
67
+ end
68
+
69
+ # Detect attribute list (Single Responsibility)
70
+ # @param line [String] Line to check
71
+ # @return [Hash, nil] {content:, attributes:} or nil
72
+ def self.detect_attribute_list(line)
73
+ return nil unless /^\[.+\]$/.match?(line)
74
+
75
+ content = line.strip
76
+ # Parse basic attribute structure
77
+ inner = content[1...-1] # Remove [ and ]
78
+
79
+ # Simple attribute parsing (positional)
80
+ attributes = inner.split(',').map(&:strip)
81
+
82
+ {
83
+ content: content,
84
+ attributes: attributes
85
+ }
86
+ end
87
+
88
+ # Detect element ID (Single Responsibility)
89
+ # @param line [String] Line to check
90
+ # @return [Hash, nil] {id:, style:} or nil
91
+ def self.detect_element_id(line)
92
+ # Double bracket style: [[id]]
93
+ return { id: ::Regexp.last_match(1), style: :double } if line =~ /^\[\[(.+)\]\]$/
94
+
95
+ # Single bracket style: [#id]
96
+ return { id: ::Regexp.last_match(1), style: :single } if line =~ /^\[#(.+)\]$/
97
+
98
+ nil
99
+ end
100
+
101
+ # Detect block delimiter (Single Responsibility)
102
+ # @param line [String] Line to check
103
+ # @return [Hash, nil] {char:, count:, type:} or nil
104
+ def self.detect_block_delimiter(line)
105
+ return nil unless line =~ /^(\*{4,}|={4,}|_{4,}|\+{4,}|-{4,}|--)$/
106
+
107
+ delimiter = ::Regexp.last_match(1)
108
+ char = delimiter[0]
109
+ count = delimiter.length
110
+
111
+ # Map to block type
112
+ type = case char
113
+ when '*' then :sidebar
114
+ when '=' then :example
115
+ when '_' then :quote
116
+ when '+' then :pass
117
+ when '-' then count == 2 ? :open : :source
118
+ end
119
+
120
+ {
121
+ char: char,
122
+ count: count,
123
+ type: type,
124
+ delimiter: delimiter
125
+ }
126
+ end
127
+
128
+ # Analyze block structure from metadata (MECE patterns)
129
+ # @param metadata [Array<Hash>] Metadata from scan()
130
+ # @return [Hash, nil] {pattern:, title:, attributes:, delimiter:} or nil
131
+ def self.analyze_block_structure(metadata)
132
+ return nil if metadata.empty?
133
+
134
+ # Find block delimiter (required for block)
135
+ delim_meta = metadata.find { |m| m[:type] == :block_delimiter }
136
+ return nil unless delim_meta
137
+
138
+ # Find title and attributes
139
+ title_meta = metadata.find { |m| m[:type] == :block_title }
140
+ attr_meta = metadata.find { |m| m[:type] == :attribute_list }
141
+
142
+ # Determine MECE pattern
143
+ pattern = if title_meta && attr_meta
144
+ :title_attr_delim
145
+ elsif title_meta
146
+ :title_delim
147
+ elsif attr_meta
148
+ :attr_delim
149
+ else
150
+ :plain_delim
151
+ end
152
+
153
+ {
154
+ pattern: pattern,
155
+ title: title_meta ? detect_block_title(title_meta[:content]) : nil,
156
+ attributes: attr_meta ? detect_attribute_list(attr_meta[:content]) : nil,
157
+ delimiter: detect_block_delimiter(delim_meta[:content]),
158
+ delimiter_line: delim_meta[:line]
159
+ }
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ module Parser
6
+ module Paragraph
7
+ def line_not_text?
8
+ line_start? >>
9
+ (attribute_list >> newline).absent? >>
10
+ block_delimiter.absent? >>
11
+ (str('|===') >> newline).absent? >>
12
+ list.absent? >>
13
+ list_prefix.absent? >>
14
+ list_continuation.absent? >>
15
+ element_id.absent? >>
16
+ section_prefix.absent?
17
+ end
18
+
19
+ # NOTE: many_breaks parameter has three states for different parsing contexts:
20
+ # - 0: Single line with EOF termination (no trailing newline)
21
+ # - true: Multiple lines with flexible newline handling
22
+ # - false: Single line with strict newline handling
23
+ # This ternary logic handles different paragraph parsing scenarios in AsciiDoc.
24
+ # rubocop:disable Style/OptionalBooleanParameter, Style/NumericPredicate
25
+ def paragraph_text_line(many_breaks = false)
26
+ tl = line_not_text? >>
27
+ (asciidoc_char_with_id.absent? |
28
+ (element_id_inline >> literal_space?) |
29
+ (line_start? >> line_not_text?)) >>
30
+ text_any.as(:text)
31
+ # Use == 0 instead of .zero? because many_breaks can be false (not a number)
32
+ if many_breaks == 0
33
+ tl >> eof?
34
+ elsif many_breaks
35
+ tl >> (newline.as(:line_break) | eof?)
36
+ else
37
+ tl >> (newline_single.as(:line_break) | eof?)
38
+ end
39
+ end
40
+ # rubocop:enable Style/OptionalBooleanParameter, Style/NumericPredicate
41
+
42
+ def paragraph
43
+ (element_id.maybe >>
44
+ block_title.maybe >>
45
+ (attribute_list >> newline).maybe >>
46
+ ((paragraph_text_line(0).repeat(1, 1) >>
47
+ (newline.repeat(1).as(:line_break) | eof?)) |
48
+ (paragraph_text_line(false).repeat(1) >>
49
+ (paragraph_text_line(true).repeat(1, 1) >>
50
+ (newline.repeat(1).as(:line_break) | eof?)).repeat(0, 1))
51
+ ).as(:lines) >>
52
+ (newline.repeat(0) | eof?)
53
+ ).as(:paragraph)
54
+ end
55
+
56
+ def paragraph_attributes
57
+ str('[') >>
58
+ keyword.as(:key) >> str('=') >>
59
+ word.as(:value) >> str(']') >> newline
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ module Parser
6
+ module Section
7
+ def contents
8
+ (
9
+ bib_entry |
10
+ block_image |
11
+ tag |
12
+ comment_block |
13
+ comment_line |
14
+ include_directive |
15
+ admonition_line |
16
+ block |
17
+ table.as(:table) |
18
+ page_break.as(:page_break) |
19
+ # highlight.as(:highlight) |
20
+ # glossaries.as(:glossaries) |
21
+ paragraph |
22
+ list |
23
+ empty_line.as(:line_break)
24
+ ).repeat(1)
25
+ end
26
+
27
+ def section_block(level = 2)
28
+ return nil if level > 8
29
+
30
+ (attribute_list >> newline).maybe >>
31
+ element_id.maybe >>
32
+ (attribute_list >> newline).maybe >>
33
+ section_title(level).as(:title) >>
34
+ contents.as(:contents).maybe
35
+ end
36
+
37
+ def section_prefix
38
+ (line_start? >> match('^[=]') >> str('=').repeat(0) >> match('[^\n]'))
39
+ end
40
+
41
+ # Heading
42
+ def section_title(level = 2, max_level = 8)
43
+ line_start? >>
44
+ match('=').repeat(level, max_level).as(:level) >>
45
+ str('=').absent? >>
46
+ space? >> text.as(:text) >> endline.as(:line_break)
47
+ end
48
+
49
+ # section
50
+ def section(level = 2)
51
+ r = section_block(level)
52
+ r >>= section(level + 1).as(:section).repeat(0).as(:sections) if level < 8
53
+ if level == 2
54
+ r.as(:section)
55
+ else
56
+ r
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ module Parser
6
+ module Stem
7
+ def stem_type
8
+ (str('stem') | str('latexmath') | str('asciimath')).as(:stem_type)
9
+ end
10
+
11
+ def stem
12
+ (stem_type >> str(':[') >>
13
+ match('[^\]]').repeat(1).as(:content) >>
14
+ str(']')).as(:stem)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,166 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ module Parser
6
+ module Table
7
+ # AsciiDoc Table Parser
8
+ #
9
+ # Table syntax:
10
+ # - Table starts with: {delimiter}===
11
+ # - Table ends with: {delimiter}===
12
+ # - Delimiter can be: | ! , : ; (and other punctuation)
13
+ # - Cells are separated by: {delimiter}
14
+ # - Rows can span multiple lines using " +" at end of line
15
+ #
16
+ # IMPORTANT: AsciiDoc table row semantics
17
+ # - Column count is determined by:
18
+ # 1. `cols` attribute (e.g., [cols="3"] or [cols="1,a,a"])
19
+ # 2. First row's cell count (if no cols attribute)
20
+ # - A new row starts when:
21
+ # 1. Previous row has `column_count` cells
22
+ # 2. A line starts with the cell delimiter (indicates new row on next line)
23
+ # - Cells on the same line are part of the same row
24
+ #
25
+ # Example:
26
+ # |===
27
+ # | A | B | C <- Row 1 (3 cells, defines 3 columns)
28
+ # | D <- Cell 1 of Row 2
29
+ # | E <- Cell 2 of Row 2
30
+ # | F <- Cell 3 of Row 2
31
+ # | G | H | I <- Row 3
32
+ # |===
33
+ #
34
+ # Cell format specification (before cell delimiter):
35
+ # Format: [colspan][.rowspan][halign][valign][style][*]
36
+ #
37
+ # Examples:
38
+ # - "|===" starts a table with | as cell delimiter
39
+ # - "2|" cell spanning 2 columns
40
+ # - ".3|" cell spanning 3 rows
41
+ # - "^e|" centered cell with emphasis style
42
+
43
+ def table
44
+ element_id.maybe >>
45
+ (attribute_list >> newline).maybe >>
46
+ block_title.maybe >>
47
+ (attribute_list >> newline).maybe >>
48
+ table_start.capture(:table_delim) >>
49
+ line_ending >>
50
+ table_rows.as(:rows) >>
51
+ table_end >>
52
+ (line_ending | eof?)
53
+ end
54
+
55
+ # Match opening delimiter: any valid delimiter char followed by ===
56
+ # Valid delimiter chars: | ! , : ; (punctuation commonly used)
57
+ def table_start
58
+ match['|!,:;'].as(:delim_char) >> str('===')
59
+ end
60
+
61
+ # Match closing delimiter using the captured delimiter char
62
+ def table_end
63
+ dynamic do |_s, c|
64
+ delim = c.captures[:table_delim]
65
+ if delim.is_a?(Hash) && delim[:delim_char]
66
+ str(delim[:delim_char]) >> str('===')
67
+ else
68
+ str('|===')
69
+ end
70
+ end
71
+ end
72
+
73
+ # Match all rows until closing delimiter
74
+ # A row is a sequence of cells until:
75
+ # 1. End of line (next cells start on new line = new row)
76
+ # 2. Closing delimiter
77
+ def table_rows
78
+ dynamic do |_s, c|
79
+ delim = c.captures[:table_delim]
80
+ delim_char = if delim.is_a?(Hash) && delim[:delim_char]
81
+ delim[:delim_char]
82
+ else
83
+ '|'
84
+ end
85
+ closing_delim = "#{delim_char}==="
86
+
87
+ # Match rows until we hit the closing delimiter
88
+ (
89
+ str(closing_delim).absent? >>
90
+ table_row(delim_char, closing_delim).as(:row)
91
+ ).repeat(1)
92
+ end
93
+ end
94
+
95
+ # Match a single table row
96
+ # A row consists of cells on the same line (until newline)
97
+ def table_row(delim_char, closing_delim)
98
+ dynamic do
99
+ # Match cells until we hit a newline or closing delimiter
100
+ (
101
+ str(closing_delim).absent? >>
102
+ (newline >> str(delim_char)).absent? >>
103
+ table_cell(delim_char, closing_delim).as(:cell)
104
+ ).repeat(1).as(:cells) >>
105
+ # Consume the newline at end of row (if present)
106
+ newline
107
+ end
108
+ end
109
+
110
+ # Match a single table cell
111
+ # A cell starts with delimiter, contains content
112
+ def table_cell(delim_char, closing_delim)
113
+ dynamic do
114
+ # Cell format spec (optional) + delimiter + content
115
+ # Leading space is optional (cells can start at column 0)
116
+ literal_space?.maybe >>
117
+ cell_format_spec.maybe.as(:cell_format) >>
118
+ str(delim_char) >>
119
+ cell_content(delim_char, closing_delim).as(:text)
120
+ end
121
+ end
122
+
123
+ # Match cell content - everything until next cell delimiter
124
+ # IMPORTANT: Must not consume format specs that belong to the next cell
125
+ # Supports multi-line cells with " +" continuation
126
+ def cell_content(delim_char, closing_delim)
127
+ dynamic do
128
+ # Pattern for format spec followed by delimiter
129
+ # Format specs contain: digits, dots, alignment (^<>), style letters, +
130
+ # Using alternation to avoid regex character class issues with ^
131
+ format_spec_char = (
132
+ match['0-9'] | match['.<>'] | match['dsemalhv'] | str('+') | str('^')
133
+ )
134
+ format_spec_then_delim = (
135
+ format_spec_char >>
136
+ format_spec_char.repeat(0) >>
137
+ str(delim_char)
138
+ )
139
+
140
+ # Row boundary: newline followed by (plain delimiter OR format spec + delimiter)
141
+ # This detects when the next line is a new row
142
+ new_row_signal = newline >> (str(delim_char) | format_spec_then_delim)
143
+
144
+ # A single content character - match any char that doesn't signal end of cell
145
+ (
146
+ str(closing_delim).absent? >>
147
+ new_row_signal.absent? >>
148
+ str(delim_char).absent? >>
149
+ format_spec_then_delim.absent? >>
150
+ any
151
+ ).repeat(0)
152
+ end
153
+ end
154
+
155
+ # Match cell format specification
156
+ # Format: [colspan][.rowspan][halign][valign][style][*]
157
+ # Using alternation to avoid regex character class issues with ^
158
+ def cell_format_spec
159
+ (
160
+ match['0-9'] | match['.<>'] | match['dsemalhv'] | str('+') | str('^')
161
+ ).repeat(1)
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ module Parser
6
+ module Term
7
+ def term_type
8
+ (str('term') |
9
+ str('alt') |
10
+ str('deprecated') |
11
+ str('domain')).as(:term_type)
12
+ end
13
+
14
+ def term
15
+ line_start? >>
16
+ term_type >> str(':[') >>
17
+ match('[^\]]').repeat(1).as(:term) >>
18
+ str(']') >> str("\n").repeat(1).as(:line_break)
19
+ end
20
+
21
+ # Content that may contain nested macro:[...] patterns
22
+ # Handles balanced brackets for nested macros like stem:[x], term:[y]
23
+ def macro_content
24
+ # Match content that is either:
25
+ # 1. Any character that is not ]
26
+ # 2. Or a complete nested macro like stem:[...] where the content
27
+ # itself can contain nested macros
28
+ (
29
+ # Non-bracket character (but not starting a macro keyword)
30
+ (macro_keyword.absent? >> match('[^\]]')) |
31
+ # A complete nested macro
32
+ nested_macro
33
+ ).repeat(0)
34
+ end
35
+
36
+ # Keywords that start macros
37
+ def macro_keyword
38
+ str('stem') | str('term') | str('footnote') |
39
+ str('latexmath') | str('asciimath') | str('alt') |
40
+ str('deprecated') | str('domain')
41
+ end
42
+
43
+ # A nested macro: keyword:[content]
44
+ def nested_macro
45
+ macro_keyword >> str(':[') >> macro_content >> str(']')
46
+ end
47
+
48
+ def footnote
49
+ str('footnote:') >>
50
+ keyword.as(:id).maybe >>
51
+ str('[') >>
52
+ macro_content.as(:footnote) >>
53
+ str(']')
54
+ end
55
+
56
+ def term_inline
57
+ term_type >> str(':[') >>
58
+ match('[^\]]').repeat(1).as(:term) >>
59
+ str(']')
60
+ end
61
+
62
+ def term_inline2
63
+ line_start? >>
64
+ match('^\[') >> term_type >> str(']#') >>
65
+ match('[^\#]').repeat(1).as(:term2) >> str('#')
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end