coradoc-adoc 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/lib/coradoc/asciidoc/model/admonition.rb +37 -0
  4. data/lib/coradoc/asciidoc/model/anchorable.rb +64 -0
  5. data/lib/coradoc/asciidoc/model/attached.rb +26 -0
  6. data/lib/coradoc/asciidoc/model/attribute.rb +22 -0
  7. data/lib/coradoc/asciidoc/model/attribute_list/matchers.rb +45 -0
  8. data/lib/coradoc/asciidoc/model/attribute_list.rb +230 -0
  9. data/lib/coradoc/asciidoc/model/attribute_list_attribute.rb +11 -0
  10. data/lib/coradoc/asciidoc/model/audio.rb +44 -0
  11. data/lib/coradoc/asciidoc/model/author.rb +36 -0
  12. data/lib/coradoc/asciidoc/model/base.rb +141 -0
  13. data/lib/coradoc/asciidoc/model/bibliography.rb +37 -0
  14. data/lib/coradoc/asciidoc/model/bibliography_entry.rb +38 -0
  15. data/lib/coradoc/asciidoc/model/block/core.rb +139 -0
  16. data/lib/coradoc/asciidoc/model/block/example.rb +14 -0
  17. data/lib/coradoc/asciidoc/model/block/listing.rb +14 -0
  18. data/lib/coradoc/asciidoc/model/block/literal.rb +14 -0
  19. data/lib/coradoc/asciidoc/model/block/open.rb +14 -0
  20. data/lib/coradoc/asciidoc/model/block/pass.rb +14 -0
  21. data/lib/coradoc/asciidoc/model/block/quote.rb +14 -0
  22. data/lib/coradoc/asciidoc/model/block/reviewer_comment.rb +14 -0
  23. data/lib/coradoc/asciidoc/model/block/side.rb +14 -0
  24. data/lib/coradoc/asciidoc/model/block/source_code.rb +14 -0
  25. data/lib/coradoc/asciidoc/model/block.rb +21 -0
  26. data/lib/coradoc/asciidoc/model/break.rb +33 -0
  27. data/lib/coradoc/asciidoc/model/comment_block.rb +33 -0
  28. data/lib/coradoc/asciidoc/model/comment_line.rb +30 -0
  29. data/lib/coradoc/asciidoc/model/content_list.rb +334 -0
  30. data/lib/coradoc/asciidoc/model/document.rb +197 -0
  31. data/lib/coradoc/asciidoc/model/document_attributes.rb +43 -0
  32. data/lib/coradoc/asciidoc/model/glossaries.rb +11 -0
  33. data/lib/coradoc/asciidoc/model/header.rb +57 -0
  34. data/lib/coradoc/asciidoc/model/highlight.rb +11 -0
  35. data/lib/coradoc/asciidoc/model/image/block_image/attribute_list.rb +23 -0
  36. data/lib/coradoc/asciidoc/model/image/block_image.rb +25 -0
  37. data/lib/coradoc/asciidoc/model/image/core/attribute_list.rb +43 -0
  38. data/lib/coradoc/asciidoc/model/image/core.rb +72 -0
  39. data/lib/coradoc/asciidoc/model/image/inline_image.rb +17 -0
  40. data/lib/coradoc/asciidoc/model/image.rb +14 -0
  41. data/lib/coradoc/asciidoc/model/include.rb +66 -0
  42. data/lib/coradoc/asciidoc/model/inline/anchor.rb +41 -0
  43. data/lib/coradoc/asciidoc/model/inline/attribute_reference.rb +25 -0
  44. data/lib/coradoc/asciidoc/model/inline/base.rb +15 -0
  45. data/lib/coradoc/asciidoc/model/inline/bold.rb +38 -0
  46. data/lib/coradoc/asciidoc/model/inline/cross_reference.rb +29 -0
  47. data/lib/coradoc/asciidoc/model/inline/cross_reference_arg.rb +15 -0
  48. data/lib/coradoc/asciidoc/model/inline/footnote.rb +34 -0
  49. data/lib/coradoc/asciidoc/model/inline/hard_line_break.rb +24 -0
  50. data/lib/coradoc/asciidoc/model/inline/highlight.rb +36 -0
  51. data/lib/coradoc/asciidoc/model/inline/italic.rb +38 -0
  52. data/lib/coradoc/asciidoc/model/inline/link.rb +46 -0
  53. data/lib/coradoc/asciidoc/model/inline/monospace.rb +39 -0
  54. data/lib/coradoc/asciidoc/model/inline/quotation.rb +25 -0
  55. data/lib/coradoc/asciidoc/model/inline/small.rb +25 -0
  56. data/lib/coradoc/asciidoc/model/inline/span.rb +38 -0
  57. data/lib/coradoc/asciidoc/model/inline/stem.rb +24 -0
  58. data/lib/coradoc/asciidoc/model/inline/strikethrough.rb +39 -0
  59. data/lib/coradoc/asciidoc/model/inline/subscript.rb +33 -0
  60. data/lib/coradoc/asciidoc/model/inline/superscript.rb +33 -0
  61. data/lib/coradoc/asciidoc/model/inline/underline.rb +25 -0
  62. data/lib/coradoc/asciidoc/model/inline.rb +31 -0
  63. data/lib/coradoc/asciidoc/model/line_break.rb +11 -0
  64. data/lib/coradoc/asciidoc/model/list/core.rb +61 -0
  65. data/lib/coradoc/asciidoc/model/list/definition.rb +27 -0
  66. data/lib/coradoc/asciidoc/model/list/definition_item.rb +43 -0
  67. data/lib/coradoc/asciidoc/model/list/item.rb +72 -0
  68. data/lib/coradoc/asciidoc/model/list/nestable.rb +14 -0
  69. data/lib/coradoc/asciidoc/model/list/ordered.rb +34 -0
  70. data/lib/coradoc/asciidoc/model/list/unordered.rb +34 -0
  71. data/lib/coradoc/asciidoc/model/list.rb +29 -0
  72. data/lib/coradoc/asciidoc/model/named_attribute.rb +12 -0
  73. data/lib/coradoc/asciidoc/model/paragraph.rb +59 -0
  74. data/lib/coradoc/asciidoc/model/rejected_positional_attribute.rb +12 -0
  75. data/lib/coradoc/asciidoc/model/resolvable.rb +71 -0
  76. data/lib/coradoc/asciidoc/model/resolver.rb +430 -0
  77. data/lib/coradoc/asciidoc/model/reviewer_note.rb +54 -0
  78. data/lib/coradoc/asciidoc/model/revision.rb +47 -0
  79. data/lib/coradoc/asciidoc/model/section.rb +109 -0
  80. data/lib/coradoc/asciidoc/model/serialization/asciidoc_adapter.rb +28 -0
  81. data/lib/coradoc/asciidoc/model/serialization/asciidoc_mapping.rb +42 -0
  82. data/lib/coradoc/asciidoc/model/serialization/asciidoc_mapping_rule.rb +41 -0
  83. data/lib/coradoc/asciidoc/model/serialization/asciidoc_transform.rb +211 -0
  84. data/lib/coradoc/asciidoc/model/serialization/errors.rb +57 -0
  85. data/lib/coradoc/asciidoc/model/serialization.rb +39 -0
  86. data/lib/coradoc/asciidoc/model/spacing.rb +282 -0
  87. data/lib/coradoc/asciidoc/model/table.rb +44 -0
  88. data/lib/coradoc/asciidoc/model/table_cell.rb +122 -0
  89. data/lib/coradoc/asciidoc/model/table_row.rb +26 -0
  90. data/lib/coradoc/asciidoc/model/tag.rb +36 -0
  91. data/lib/coradoc/asciidoc/model/term.rb +48 -0
  92. data/lib/coradoc/asciidoc/model/text_element.rb +66 -0
  93. data/lib/coradoc/asciidoc/model/title.rb +85 -0
  94. data/lib/coradoc/asciidoc/model/video/attribute_list.rb +43 -0
  95. data/lib/coradoc/asciidoc/model/video.rb +49 -0
  96. data/lib/coradoc/asciidoc/model.rb +75 -0
  97. data/lib/coradoc/asciidoc/parse_error.rb +161 -0
  98. data/lib/coradoc/asciidoc/parser/admonition.rb +26 -0
  99. data/lib/coradoc/asciidoc/parser/attribute_list.rb +110 -0
  100. data/lib/coradoc/asciidoc/parser/base.rb +159 -0
  101. data/lib/coradoc/asciidoc/parser/bibliography.rb +31 -0
  102. data/lib/coradoc/asciidoc/parser/block.rb +186 -0
  103. data/lib/coradoc/asciidoc/parser/block_assembler.rb +183 -0
  104. data/lib/coradoc/asciidoc/parser/cache.rb +155 -0
  105. data/lib/coradoc/asciidoc/parser/citation.rb +32 -0
  106. data/lib/coradoc/asciidoc/parser/content.rb +76 -0
  107. data/lib/coradoc/asciidoc/parser/document_attributes.rb +27 -0
  108. data/lib/coradoc/asciidoc/parser/fix_files.rb +76 -0
  109. data/lib/coradoc/asciidoc/parser/header.rb +31 -0
  110. data/lib/coradoc/asciidoc/parser/inline.rb +199 -0
  111. data/lib/coradoc/asciidoc/parser/list.rb +130 -0
  112. data/lib/coradoc/asciidoc/parser/metadata_detector.rb +164 -0
  113. data/lib/coradoc/asciidoc/parser/paragraph.rb +64 -0
  114. data/lib/coradoc/asciidoc/parser/section.rb +62 -0
  115. data/lib/coradoc/asciidoc/parser/stem.rb +19 -0
  116. data/lib/coradoc/asciidoc/parser/table.rb +166 -0
  117. data/lib/coradoc/asciidoc/parser/term.rb +70 -0
  118. data/lib/coradoc/asciidoc/parser/text.rb +156 -0
  119. data/lib/coradoc/asciidoc/parser.rb +10 -0
  120. data/lib/coradoc/asciidoc/serializer/adoc_serializer.rb +86 -0
  121. data/lib/coradoc/asciidoc/serializer/element_registry.rb +95 -0
  122. data/lib/coradoc/asciidoc/serializer/fallback_serializer.rb +21 -0
  123. data/lib/coradoc/asciidoc/serializer/formatter.rb +144 -0
  124. data/lib/coradoc/asciidoc/serializer/registrations.rb +108 -0
  125. data/lib/coradoc/asciidoc/serializer/serialization_context.rb +238 -0
  126. data/lib/coradoc/asciidoc/serializer/serializers/admonition.rb +19 -0
  127. data/lib/coradoc/asciidoc/serializer/serializers/attribute.rb +23 -0
  128. data/lib/coradoc/asciidoc/serializer/serializers/attribute_list.rb +40 -0
  129. data/lib/coradoc/asciidoc/serializer/serializers/attribute_list_attribute.rb +18 -0
  130. data/lib/coradoc/asciidoc/serializer/serializers/audio.rb +33 -0
  131. data/lib/coradoc/asciidoc/serializer/serializers/author.rb +20 -0
  132. data/lib/coradoc/asciidoc/serializer/serializers/base.rb +152 -0
  133. data/lib/coradoc/asciidoc/serializer/serializers/bibliography.rb +35 -0
  134. data/lib/coradoc/asciidoc/serializer/serializers/bibliography_entry.rb +24 -0
  135. data/lib/coradoc/asciidoc/serializer/serializers/block/core.rb +70 -0
  136. data/lib/coradoc/asciidoc/serializer/serializers/block/example.rb +17 -0
  137. data/lib/coradoc/asciidoc/serializer/serializers/block/listing.rb +22 -0
  138. data/lib/coradoc/asciidoc/serializer/serializers/block/literal.rb +17 -0
  139. data/lib/coradoc/asciidoc/serializer/serializers/block/open.rb +22 -0
  140. data/lib/coradoc/asciidoc/serializer/serializers/block/pass.rb +17 -0
  141. data/lib/coradoc/asciidoc/serializer/serializers/block/quote.rb +17 -0
  142. data/lib/coradoc/asciidoc/serializer/serializers/block/reviewer_comment.rb +17 -0
  143. data/lib/coradoc/asciidoc/serializer/serializers/block/side.rb +22 -0
  144. data/lib/coradoc/asciidoc/serializer/serializers/block/source_code.rb +22 -0
  145. data/lib/coradoc/asciidoc/serializer/serializers/block.rb +23 -0
  146. data/lib/coradoc/asciidoc/serializer/serializers/break.rb +18 -0
  147. data/lib/coradoc/asciidoc/serializer/serializers/comment_block.rb +22 -0
  148. data/lib/coradoc/asciidoc/serializer/serializers/comment_line.rb +22 -0
  149. data/lib/coradoc/asciidoc/serializer/serializers/document.rb +65 -0
  150. data/lib/coradoc/asciidoc/serializer/serializers/document_attributes.rb +21 -0
  151. data/lib/coradoc/asciidoc/serializer/serializers/header.rb +24 -0
  152. data/lib/coradoc/asciidoc/serializer/serializers/highlight.rb +23 -0
  153. data/lib/coradoc/asciidoc/serializer/serializers/image/core.rb +30 -0
  154. data/lib/coradoc/asciidoc/serializer/serializers/image.rb +14 -0
  155. data/lib/coradoc/asciidoc/serializer/serializers/include.rb +19 -0
  156. data/lib/coradoc/asciidoc/serializer/serializers/inline/anchor.rb +20 -0
  157. data/lib/coradoc/asciidoc/serializer/serializers/inline/attribute_reference.rb +20 -0
  158. data/lib/coradoc/asciidoc/serializer/serializers/inline/bold.rb +26 -0
  159. data/lib/coradoc/asciidoc/serializer/serializers/inline/cross_reference.rb +30 -0
  160. data/lib/coradoc/asciidoc/serializer/serializers/inline/cross_reference_arg.rb +20 -0
  161. data/lib/coradoc/asciidoc/serializer/serializers/inline/footnote.rb +24 -0
  162. data/lib/coradoc/asciidoc/serializer/serializers/inline/hard_line_break.rb +20 -0
  163. data/lib/coradoc/asciidoc/serializer/serializers/inline/highlight.rb +26 -0
  164. data/lib/coradoc/asciidoc/serializer/serializers/inline/italic.rb +26 -0
  165. data/lib/coradoc/asciidoc/serializer/serializers/inline/link.rb +38 -0
  166. data/lib/coradoc/asciidoc/serializer/serializers/inline/monospace.rb +26 -0
  167. data/lib/coradoc/asciidoc/serializer/serializers/inline/quotation.rb +21 -0
  168. data/lib/coradoc/asciidoc/serializer/serializers/inline/small.rb +20 -0
  169. data/lib/coradoc/asciidoc/serializer/serializers/inline/span.rb +35 -0
  170. data/lib/coradoc/asciidoc/serializer/serializers/inline/stem.rb +23 -0
  171. data/lib/coradoc/asciidoc/serializer/serializers/inline/strikethrough.rb +29 -0
  172. data/lib/coradoc/asciidoc/serializer/serializers/inline/subscript.rb +29 -0
  173. data/lib/coradoc/asciidoc/serializer/serializers/inline/superscript.rb +26 -0
  174. data/lib/coradoc/asciidoc/serializer/serializers/inline/underline.rb +20 -0
  175. data/lib/coradoc/asciidoc/serializer/serializers/inline.rb +32 -0
  176. data/lib/coradoc/asciidoc/serializer/serializers/line_break.rb +18 -0
  177. data/lib/coradoc/asciidoc/serializer/serializers/list/core.rb +47 -0
  178. data/lib/coradoc/asciidoc/serializer/serializers/list/definition.rb +35 -0
  179. data/lib/coradoc/asciidoc/serializer/serializers/list/definition_item.rb +38 -0
  180. data/lib/coradoc/asciidoc/serializer/serializers/list/item.rb +120 -0
  181. data/lib/coradoc/asciidoc/serializer/serializers/list/ordered.rb +24 -0
  182. data/lib/coradoc/asciidoc/serializer/serializers/list/unordered.rb +29 -0
  183. data/lib/coradoc/asciidoc/serializer/serializers/list.rb +19 -0
  184. data/lib/coradoc/asciidoc/serializer/serializers/named_attribute.rb +22 -0
  185. data/lib/coradoc/asciidoc/serializer/serializers/paragraph.rb +65 -0
  186. data/lib/coradoc/asciidoc/serializer/serializers/reviewer_note.rb +28 -0
  187. data/lib/coradoc/asciidoc/serializer/serializers/revision.rb +26 -0
  188. data/lib/coradoc/asciidoc/serializer/serializers/section.rb +37 -0
  189. data/lib/coradoc/asciidoc/serializer/serializers/table.rb +24 -0
  190. data/lib/coradoc/asciidoc/serializer/serializers/table_cell.rb +75 -0
  191. data/lib/coradoc/asciidoc/serializer/serializers/table_row.rb +24 -0
  192. data/lib/coradoc/asciidoc/serializer/serializers/tag.rb +19 -0
  193. data/lib/coradoc/asciidoc/serializer/serializers/term.rb +20 -0
  194. data/lib/coradoc/asciidoc/serializer/serializers/text_element.rb +23 -0
  195. data/lib/coradoc/asciidoc/serializer/serializers/title.rb +55 -0
  196. data/lib/coradoc/asciidoc/serializer/serializers/video.rb +33 -0
  197. data/lib/coradoc/asciidoc/serializer/spacing_strategy.rb +70 -0
  198. data/lib/coradoc/asciidoc/serializer.rb +75 -0
  199. data/lib/coradoc/asciidoc/transform/from_core_model.rb +502 -0
  200. data/lib/coradoc/asciidoc/transform/from_core_model_registrations.rb +126 -0
  201. data/lib/coradoc/asciidoc/transform/registry.rb +146 -0
  202. data/lib/coradoc/asciidoc/transform/to_core_model.rb +564 -0
  203. data/lib/coradoc/asciidoc/transform/to_core_model_registrations.rb +257 -0
  204. data/lib/coradoc/asciidoc/transform.rb +13 -0
  205. data/lib/coradoc/asciidoc/transformer/block_rules.rb +101 -0
  206. data/lib/coradoc/asciidoc/transformer/header_rules.rb +91 -0
  207. data/lib/coradoc/asciidoc/transformer/inline_rules.rb +179 -0
  208. data/lib/coradoc/asciidoc/transformer/list_rules.rb +131 -0
  209. data/lib/coradoc/asciidoc/transformer/misc_rules.rb +196 -0
  210. data/lib/coradoc/asciidoc/transformer/structural_rules.rb +216 -0
  211. data/lib/coradoc/asciidoc/transformer/text_rules.rb +107 -0
  212. data/lib/coradoc/asciidoc/transformer.rb +406 -0
  213. data/lib/coradoc/asciidoc/version.rb +7 -0
  214. data/lib/coradoc/asciidoc.rb +148 -0
  215. data/lib/coradoc/util/asciidoc.rb +71 -0
  216. data/lib/coradoc/util.rb +8 -0
  217. metadata +343 -0
@@ -0,0 +1,406 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'parslet'
4
+
5
+ module Coradoc
6
+ module AsciiDoc
7
+ # Parslet::Transform subclass that converts AST to AsciiDoc model objects.
8
+ #
9
+ # This transformer uses a modular rule system where each group of rules
10
+ # is defined in a separate file for maintainability.
11
+ #
12
+ # Rule modules (each autoloaded):
13
+ # - HeaderRules: Document header, author, revision
14
+ # - InlineRules: Inline formatting (bold, italic, etc.)
15
+ # - TextRules: Text elements and paragraphs
16
+ # - BlockRules: Block elements (example, admonition, etc.)
17
+ # - ListRules: List items and list types
18
+ # - StructuralRules: Sections, tables, documents
19
+ # - MiscRules: Comments, attributes, media elements
20
+ #
21
+ class Transformer < Parslet::Transform
22
+ # Autoload rule modules at the class level.
23
+ # Each rule file reopens this class and defines a module inside it.
24
+ # The file path matches the expected constant path.
25
+ autoload :HeaderRules, "#{__dir__}/transformer/header_rules"
26
+ autoload :InlineRules, "#{__dir__}/transformer/inline_rules"
27
+ autoload :TextRules, "#{__dir__}/transformer/text_rules"
28
+ autoload :BlockRules, "#{__dir__}/transformer/block_rules"
29
+ autoload :ListRules, "#{__dir__}/transformer/list_rules"
30
+ autoload :StructuralRules, "#{__dir__}/transformer/structural_rules"
31
+ autoload :MiscRules, "#{__dir__}/transformer/misc_rules"
32
+
33
+ # Apply all rule modules (triggers autoload)
34
+ HeaderRules.apply(self)
35
+ InlineRules.apply(self)
36
+ TextRules.apply(self)
37
+ BlockRules.apply(self)
38
+ ListRules.apply(self)
39
+ StructuralRules.apply(self)
40
+ MiscRules.apply(self)
41
+
42
+ # Helper method for extracting inline content (used by InlineRules)
43
+ def self.extract_inline_content(data)
44
+ if data.is_a?(Hash) && data.key?(:content)
45
+ data[:content]
46
+ elsif data.is_a?(Array)
47
+ data.map do |item|
48
+ if item.is_a?(Hash) && item.key?(:text)
49
+ text = item[:text]
50
+ if text.is_a?(Model::Base) && text.class.attributes.key?(:content)
51
+ text.content
52
+ elsif text.is_a?(Model::Base)
53
+ text
54
+ else
55
+ text.to_s
56
+ end
57
+ else
58
+ item
59
+ end
60
+ end
61
+ else
62
+ data
63
+ end
64
+ end
65
+
66
+ # Helper method for extracting simple inline content
67
+ def self.extract_simple_inline_content(data)
68
+ if data.is_a?(Hash) && data.key?(:content)
69
+ data[:content]
70
+ elsif data.is_a?(Array)
71
+ data.map do |item|
72
+ item.is_a?(Hash) && item.key?(:text) ? item[:text].to_s : item
73
+ end.join
74
+ else
75
+ data
76
+ end
77
+ end
78
+
79
+ # Helper method for parsing inline content from raw text
80
+ # This is used for table cells where content is captured as raw text
81
+ # @param text [String] Raw text to parse
82
+ # @param style [String, nil] Cell style ('a' for AsciiDoc, 'l' for literal, etc.)
83
+ # @return [Array<TextElement>] Parsed content as array of TextElement objects
84
+ def self.parse_inline_content(text, style = nil)
85
+ return [Coradoc::AsciiDoc::Model::TextElement.new(content: '')] if text.nil? || text.to_s.strip.empty?
86
+
87
+ # For AsciiDoc style cells, parse as block content
88
+ return parse_block_content(text) if style == 'a'
89
+
90
+ # For literal style cells, preserve text as-is
91
+ return [Coradoc::AsciiDoc::Model::TextElement.new(content: text.to_s)] if style == 'l'
92
+
93
+ # For default cells, parse inline content
94
+ parser = Coradoc::AsciiDoc::Parser::Base.new
95
+ begin
96
+ ast = parser.text_any.parse(text.to_s)
97
+ # Transform the AST to model objects
98
+ transformed = new.apply(ast)
99
+
100
+ # Wrap in TextElement
101
+ content_array = transformed.is_a?(Array) ? transformed : [transformed]
102
+ [Coradoc::AsciiDoc::Model::TextElement.new(content: content_array)]
103
+ rescue Parslet::ParseFailed
104
+ # If parsing fails, return the text as a simple TextElement
105
+ [Coradoc::AsciiDoc::Model::TextElement.new(content: text.to_s)]
106
+ end
107
+ end
108
+
109
+ # Parse block-level AsciiDoc content (for 'a' style cells)
110
+ # @param text [String] Raw text containing AsciiDoc blocks
111
+ # @return [Array] Parsed block content
112
+ def self.parse_block_content(text)
113
+ return [Coradoc::AsciiDoc::Model::TextElement.new(content: '')] if text.nil? || text.to_s.strip.empty?
114
+
115
+ parser = Coradoc::AsciiDoc::Parser::Base.new
116
+ text_str = text.to_s
117
+
118
+ # Try parsing as a list if content contains list markers
119
+ # List markers can appear after other content (e.g., "Title:\n\n* item")
120
+ if /^(\*+|-+|\d+\.)/m.match?(text_str)
121
+ # Extract just the list portion
122
+ list_match = text_str.match(/\n(\*+|-+|\d+\.)(.*)$/m)
123
+ if list_match
124
+ list_text = list_match[1] + list_match[2]
125
+ begin
126
+ ast = parser.list.parse(list_text)
127
+ transformed = new.apply(ast)
128
+
129
+ # Parse the text before the list as inline content
130
+ before_list = text_str[0, list_match.begin(1) - 1].strip
131
+ before_elements = []
132
+ unless before_list.empty?
133
+ begin
134
+ before_ast = parser.text_any.parse(before_list)
135
+ before_transformed = new.apply(before_ast)
136
+ before_array = before_transformed.is_a?(Array) ? before_transformed : [before_transformed]
137
+ before_elements = [Coradoc::AsciiDoc::Model::TextElement.new(content: before_array)]
138
+ rescue Parslet::ParseFailed
139
+ before_elements = [Coradoc::AsciiDoc::Model::TextElement.new(content: before_list)]
140
+ end
141
+ end
142
+
143
+ return before_elements + [transformed]
144
+ rescue Parslet::ParseFailed
145
+ # Fall through to inline parsing
146
+ end
147
+ end
148
+ end
149
+
150
+ # Try parsing as inline content
151
+ begin
152
+ ast = parser.text_any.parse(text_str)
153
+ transformed = new.apply(ast)
154
+ content_array = transformed.is_a?(Array) ? transformed : [transformed]
155
+ [Coradoc::AsciiDoc::Model::TextElement.new(content: content_array)]
156
+ rescue Parslet::ParseFailed
157
+ # If parsing fails, return the text as a simple TextElement
158
+ [Coradoc::AsciiDoc::Model::TextElement.new(content: text_str)]
159
+ end
160
+ end
161
+
162
+ # Helper method for building table cells with format specification
163
+ # @param format [Hash, String, Object] Cell format specification from parser
164
+ # @param content [Object] Cell content
165
+ # @return [Model::TableCell] Table cell model with parsed attributes
166
+ def self.build_table_cell(format, content)
167
+ cell_opts = {}
168
+
169
+ # Extract style first for content parsing
170
+ style = nil
171
+
172
+ # Parse format specification if present
173
+ if format.is_a?(Hash)
174
+ # Colspan
175
+ cell_opts[:colspan] = format[:colspan].to_i if format[:colspan]
176
+
177
+ # Rowspan (remove leading dot)
178
+ if format[:rowspan]
179
+ rowspan_str = format[:rowspan].to_s
180
+ rowspan_str = rowspan_str.sub(/^\./, '')
181
+ cell_opts[:rowspan] = rowspan_str.to_i if rowspan_str.match?(/^\d+$/)
182
+ end
183
+
184
+ # Horizontal alignment
185
+ cell_opts[:halign] = format[:halign].to_s if format[:halign]
186
+
187
+ # Vertical alignment (remove leading dot)
188
+ if format[:valign]
189
+ valign_str = format[:valign].to_s
190
+ valign_str = valign_str.sub(/^\./, '')
191
+ cell_opts[:valign] = valign_str if %w[< ^ >].include?(valign_str)
192
+ end
193
+
194
+ # Style
195
+ style = format[:style].to_s if format[:style]
196
+ cell_opts[:style] = style
197
+
198
+ # Repeat marker
199
+ cell_opts[:repeat] = true if format[:repeat]
200
+ elsif format.is_a?(String)
201
+ # Parse format string like ".2+^.^" or "4+^" or ".3+a"
202
+ # Format: [colspan][.rowspan][halign][valign][style][*]
203
+ format_str = format.to_s
204
+
205
+ # Parse colspan (digits before +)
206
+ cell_opts[:colspan] = Regexp.last_match(1).to_i if format_str =~ /^(\d+)\+/
207
+
208
+ # Parse rowspan (.digits)
209
+ cell_opts[:rowspan] = Regexp.last_match(1).to_i if format_str =~ /\.(\d+)/
210
+
211
+ # Parse horizontal alignment (^ < >)
212
+ # Note: In AsciiDoc, ^ is center, < is left, > is right
213
+ cell_opts[:halign] = Regexp.last_match(0) if format_str =~ /[<>^]/
214
+
215
+ # Parse vertical alignment (.<. ^. >.)
216
+ cell_opts[:valign] = Regexp.last_match(0)[1] if format_str =~ /\.[.^<>]/
217
+
218
+ # Parse style (d=decimal, s=strong, e=emphasis, m=monospace, a=asciidoc, l=literal, h=header)
219
+ style = Regexp.last_match(0) if format_str =~ /[dsemalhv]/
220
+ cell_opts[:style] = style
221
+
222
+ # Parse repeat marker
223
+ cell_opts[:repeat] = true if format_str.include?('*')
224
+ end
225
+
226
+ # Parse content based on style
227
+ parsed_content = parse_inline_content(content, style)
228
+ cell_opts[:content] = parsed_content
229
+
230
+ Model::TableCell.new(**cell_opts)
231
+ end
232
+
233
+ # Parse the cols attribute to determine column count
234
+ # @param attrs [Model::AttributeList, nil] Table attributes
235
+ # @return [Integer, nil] Column count or nil if not specified
236
+ def self.parse_cols_attribute(attrs)
237
+ return nil if attrs.nil?
238
+
239
+ # Get the cols value from named attributes
240
+ cols_value = if attrs.is_a?(Model::AttributeList)
241
+ attrs.named.find { |n| n.name.to_s == 'cols' }&.value
242
+ elsif attrs.is_a?(Hash)
243
+ attrs['cols'] || attrs[:cols]
244
+ end
245
+
246
+ return nil if cols_value.nil?
247
+
248
+ # cols can be:
249
+ # - A single number: "3" -> 3 columns
250
+ # - A list: "1,2,1" -> 3 columns
251
+ # - With multipliers: "3*" -> 3 columns
252
+ # - Quoted: "\"3\"" -> 3 columns
253
+ cols_str = cols_value.is_a?(Array) ? cols_value.first.to_s : cols_value.to_s
254
+
255
+ # Remove surrounding quotes if present
256
+ cols_str = cols_str.gsub(/^["']|["']$/, '')
257
+
258
+ # Handle multiplier syntax: "3*" means 3 columns
259
+ return Regexp.last_match(1).to_i if cols_str =~ /^(\d+)\*$/
260
+
261
+ # Handle comma-separated list: count the parts
262
+ return cols_str.split(',').size if cols_str.include?(',')
263
+
264
+ # Single number
265
+ cols_str.to_i if /^\d+$/.match?(cols_str)
266
+ end
267
+
268
+ # Group cells into rows based on column count
269
+ #
270
+ # AsciiDoc table row semantics:
271
+ # - Column count is determined by cols attribute or first row
272
+ # - A new row starts when previous row has `column_count` cells
273
+ # - Cells with colspan > 1 take multiple column slots
274
+ #
275
+ # @param cells [Array<Model::TableCell>] Flat list of cells
276
+ # @param explicit_col_count [Integer, nil] Column count from cols attribute
277
+ # @return [Array<Model::TableRow>] Grouped rows
278
+ def self.group_cells_into_rows(cells, explicit_col_count = nil)
279
+ return [] if cells.nil? || cells.empty?
280
+
281
+ # Normalize cells to ensure they're TableCell objects
282
+ normalized_cells = cells.map do |cell|
283
+ case cell
284
+ when Model::TableCell
285
+ cell
286
+ when Hash
287
+ content = cell[:text] || cell[:content] || ''
288
+ Model::TableCell.new(content: parse_inline_content(content))
289
+ else
290
+ Model::TableCell.new(content: parse_inline_content(cell))
291
+ end
292
+ end
293
+
294
+ # Determine column count
295
+ # If explicit_col_count is provided, use it
296
+ # Otherwise, count cells until we find a row boundary
297
+ col_count = explicit_col_count
298
+
299
+ if col_count.nil? || col_count.zero?
300
+ # Infer from first row - count cells until we have a complete row
301
+ # A complete row is when the total column slots equals a consistent number
302
+ col_count = infer_column_count(normalized_cells)
303
+ end
304
+
305
+ # If still no column count, assume all cells are one row
306
+ col_count = normalized_cells.size if col_count.nil? || col_count.zero?
307
+
308
+ # Group cells into rows
309
+ rows = []
310
+ current_row_cells = []
311
+ current_col_slots = 0
312
+
313
+ normalized_cells.each do |cell|
314
+ # Get colspan (default 1)
315
+ colspan = cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
316
+
317
+ current_row_cells << cell
318
+ current_col_slots += colspan
319
+
320
+ # Check if row is complete
321
+ next unless current_col_slots >= col_count
322
+
323
+ rows << Model::TableRow.new(columns: current_row_cells)
324
+ current_row_cells = []
325
+ current_col_slots = 0
326
+ end
327
+
328
+ # Handle remaining cells (incomplete last row)
329
+ rows << Model::TableRow.new(columns: current_row_cells) if current_row_cells.any?
330
+
331
+ rows
332
+ end
333
+
334
+ # Infer column count from cells
335
+ # Look for patterns where rows have consistent cell counts
336
+ def self.infer_column_count(cells)
337
+ return nil if cells.nil? || cells.empty?
338
+
339
+ col_slots = cells.map do |cell|
340
+ cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
341
+ end
342
+
343
+ total_cells = col_slots.sum
344
+
345
+ # Find all valid column counts
346
+ possible_cols = (1..[total_cells, 12].min).select do |candidate|
347
+ next false if candidate > total_cells
348
+ next false if total_cells % candidate != 0
349
+
350
+ slots_used = 0
351
+ valid = true
352
+
353
+ col_slots.each do |slots|
354
+ slots_used += slots
355
+ if slots_used == candidate
356
+ slots_used = 0
357
+ elsif slots_used > candidate
358
+ valid = false
359
+ break
360
+ end
361
+ end
362
+
363
+ valid && slots_used.zero?
364
+ end
365
+
366
+ possible_cols.max || col_slots.first || 1
367
+ end
368
+
369
+ # Regroup parser-level rows into proper AsciiDoc rows.
370
+ # The parser produces one "row" per line; this flattens all cells
371
+ # and regroups by the cols attribute, then marks the first row as header.
372
+ #
373
+ # @param rows [Array<Model::TableRow>] Parser-level rows
374
+ # @param attrs [Model::AttributeList, nil] Table attributes containing cols
375
+ # @return [Array<Model::TableRow>] Properly grouped rows with header flag
376
+ def self.regroup_table_rows(rows, attrs = nil)
377
+ return rows if rows.nil? || rows.empty?
378
+
379
+ col_count = parse_cols_attribute(attrs)
380
+ all_cells = rows.flat_map do |r|
381
+ r.is_a?(Model::TableRow) ? r.columns : []
382
+ end
383
+
384
+ return rows if all_cells.empty?
385
+
386
+ grouped = group_cells_into_rows(all_cells, col_count)
387
+ grouped.first.header = true unless grouped.empty?
388
+ grouped
389
+ end
390
+
391
+ # Transform a syntax tree using this transformer's rules
392
+ #
393
+ # @param syntax_tree [Hash, Array] The AST from the parser
394
+ # @return [Object] The transformed model object(s)
395
+ def self.transform(syntax_tree)
396
+ new.apply(syntax_tree)
397
+ end
398
+
399
+ # Legacy transform method (deprecated)
400
+ # @deprecated Use {.transform} instead
401
+ def self.legacy_transform(syntax_tree)
402
+ new.apply(syntax_tree)
403
+ end
404
+ end
405
+ end
406
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ VERSION = '2.0.0'
6
+ end
7
+ end
@@ -0,0 +1,148 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'parslet'
4
+ require 'lutaml/model'
5
+ require 'coradoc/core_model' # Required for CoreModel types in transformers
6
+
7
+ module Coradoc
8
+ # Utility module autoload
9
+ autoload :Util, "#{__dir__}/util"
10
+
11
+ module AsciiDoc
12
+ # Base error class for AsciiDoc gem, inheriting from Coradoc::Error
13
+ # for consistent error handling across all gems
14
+ class Error < Coradoc::Error; end
15
+ end
16
+ end
17
+
18
+ # Autoload version and parse error
19
+ require_relative 'asciidoc/version'
20
+
21
+ module Coradoc
22
+ module AsciiDoc
23
+ autoload :ParseError, "#{__dir__}/asciidoc/parse_error"
24
+ end
25
+ end
26
+
27
+ # Autoload main components (lazy loading)
28
+ module Coradoc
29
+ module AsciiDoc
30
+ autoload :Model, "#{__dir__}/asciidoc/model"
31
+ autoload :Parser, "#{__dir__}/asciidoc/parser"
32
+ autoload :Transformer, "#{__dir__}/asciidoc/transformer"
33
+ autoload :Serializer, "#{__dir__}/asciidoc/serializer"
34
+ autoload :Transform, "#{__dir__}/asciidoc/transform"
35
+ end
36
+ end
37
+
38
+ # Now define the module methods after all dependencies are loaded
39
+ module Coradoc
40
+ module AsciiDoc
41
+ class << self
42
+ # Parse AsciiDoc text and return an AsciiDoc document model
43
+ #
44
+ # @param text [String] AsciiDoc content to parse
45
+ # @return [Coradoc::AsciiDoc::Model::Document] Parsed document model
46
+ def parse(text)
47
+ ast = Coradoc::AsciiDoc::Parser::Base.parse(text)
48
+ Coradoc::AsciiDoc::Transformer.transform(ast)
49
+ end
50
+
51
+ # Parse AsciiDoc text and convert to CoreModel
52
+ #
53
+ # @param text [String] AsciiDoc content to parse
54
+ # @return [Coradoc::CoreModel::Document] CoreModel document
55
+ def parse_to_core(text)
56
+ doc = parse(text)
57
+ Coradoc::AsciiDoc::Transform::ToCoreModel.transform(doc)
58
+ end
59
+
60
+ # Check if this format can transform the given model to CoreModel
61
+ #
62
+ # @param model [Object] The model to check
63
+ # @return [Boolean] true if this format handles the model type
64
+ def handles_model?(model)
65
+ model.is_a?(Coradoc::AsciiDoc::Model::Base)
66
+ end
67
+
68
+ # Transform an AsciiDoc model to CoreModel
69
+ #
70
+ # @param document [Coradoc::AsciiDoc::Model::Base] AsciiDoc document model
71
+ # @return [Coradoc::CoreModel::Base] CoreModel
72
+ def to_core(document)
73
+ Transform::ToCoreModel.transform(document)
74
+ end
75
+
76
+ # Serialize a document model to AsciiDoc string
77
+ #
78
+ # @param document [Coradoc::AsciiDoc::Model::Document, Coradoc::CoreModel::Base]
79
+ # Document model to serialize
80
+ # @return [String] AsciiDoc representation
81
+ def serialize(document)
82
+ case document
83
+ when Coradoc::CoreModel::Base
84
+ # Convert CoreModel to AsciiDoc model first
85
+ adoc_model = Coradoc::AsciiDoc::Transform::FromCoreModel.transform(document)
86
+ adoc_model.to_adoc
87
+ else
88
+ document.to_adoc
89
+ end
90
+ end
91
+ end
92
+
93
+ # Backward-compatible aliases for model classes
94
+ # These allow tests and legacy code to use Coradoc::AsciiDoc::Document
95
+ # instead of Coradoc::AsciiDoc::Model::Document
96
+ Base = Model::Base
97
+ Document = Model::Document
98
+ Section = Model::Section
99
+ Paragraph = Model::Paragraph
100
+ TextElement = Model::TextElement
101
+ Title = Model::Title
102
+ Header = Model::Header
103
+ Admonition = Model::Admonition
104
+ Table = Model::Table
105
+ TableRow = Model::TableRow
106
+ TableCell = Model::TableCell
107
+ Term = Model::Term
108
+ Break = Model::Break
109
+ Audio = Model::Audio
110
+ Video = Model::Video
111
+ Bibliography = Model::Bibliography
112
+ BibliographyEntry = Model::BibliographyEntry
113
+ CommentBlock = Model::CommentBlock
114
+ CommentLine = Model::CommentLine
115
+ LineBreak = Model::LineBreak
116
+ Include = Model::Include
117
+ Attribute = Model::Attribute
118
+ AttributeList = Model::AttributeList
119
+ Author = Model::Author
120
+ Revision = Model::Revision
121
+ NamedAttribute = Model::NamedAttribute
122
+ ContentList = Model::ContentList
123
+ Tag = Model::Tag
124
+ Highlight = Model::Highlight
125
+ DocumentAttributes = Model::DocumentAttributes
126
+
127
+ # Namespace aliases for nested modules
128
+ Inline = Model::Inline
129
+ Block = Model::Block
130
+ List = Model::List
131
+ Image = Model::Image
132
+
133
+ # Module aliases for mixins
134
+ Anchorable = Model::Anchorable
135
+ Attached = Model::Attached
136
+ Spacing = Model::Spacing
137
+ end
138
+ end
139
+
140
+ # Register the AsciiDoc format with Coradoc after module is fully defined
141
+ # Use conditional registration to handle load order issues
142
+ Coradoc.register_format(:asciidoc, Coradoc::AsciiDoc,
143
+ aliases: %w[adoc asciidoc],
144
+ extensions: %w[.adoc .asciidoc])
145
+
146
+ # Backward-compatibility: Coradoc::Model is now Coradoc::AsciiDoc::Model
147
+ # This alias is provided for legacy code that hasn't been updated
148
+ Coradoc::Model = Coradoc::AsciiDoc::Model unless defined?(Coradoc::Model)
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Util
5
+ # AsciiDoc-specific utility functions
6
+ module AsciiDoc
7
+ # Serialize a Coradoc model to AsciiDoc string
8
+ #
9
+ # @param model [Object] The model to serialize
10
+ # @return [String] The AsciiDoc representation
11
+ #
12
+ # @example Serialize a document
13
+ # serialize(document) # => "= Title\n\nContent"
14
+ #
15
+ def self.serialize(model)
16
+ return '' if model.nil?
17
+ return model if model.is_a?(String)
18
+
19
+ case model
20
+ when Coradoc::AsciiDoc::Model::Base
21
+ model.to_adoc
22
+ when Array
23
+ model.map { |item| serialize(item) }.join("\n")
24
+ when Hash
25
+ model.map { |k, v| "#{k}: #{serialize(v)}" }.join("\n")
26
+ else
27
+ model.to_s
28
+ end
29
+ end
30
+
31
+ # Escape special AsciiDoc characters in content
32
+ #
33
+ # @param content [String] The content to escape
34
+ # @param escape_chars [Array<String>] Characters to escape (e.g., ["*", "_", "#"])
35
+ # @return [String] The escaped content
36
+ #
37
+ # @example Escape asterisks for bold text
38
+ # escape_characters("2 * 3 = 6", escape_chars: ["*"])
39
+ # # => "2 \\* 3 = 6"
40
+ #
41
+ def self.escape_characters(content, escape_chars: [])
42
+ return '' if content.nil?
43
+ return content if escape_chars.empty?
44
+
45
+ result = content.to_s
46
+ escape_chars.each do |char|
47
+ # Escape the character with backslash, but only if not already escaped
48
+ result = result.gsub(/(?<!\\)#{Regexp.escape(char)}/, "\\#{char}")
49
+ end
50
+ result
51
+ end
52
+
53
+ # Unescape AsciiDoc characters in content
54
+ #
55
+ # @param content [String] The content to unescape
56
+ # @param escape_chars [Array<String>] Characters to unescape
57
+ # @return [String] The unescaped content
58
+ #
59
+ def self.unescape_characters(content, escape_chars: [])
60
+ return '' if content.nil?
61
+ return content if escape_chars.empty?
62
+
63
+ result = content.to_s
64
+ escape_chars.each do |char|
65
+ result = result.gsub("\\#{char}", char)
66
+ end
67
+ result
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Utility modules for Coradoc
5
+ module Util
6
+ autoload :AsciiDoc, 'coradoc/util/asciidoc'
7
+ end
8
+ end