coradoc 1.1.8 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +1 -1
  3. data/Rakefile +3 -12
  4. data/exe/coradoc +21 -2
  5. data/lib/coradoc/cli.rb +185 -91
  6. data/lib/coradoc/configurable.rb +527 -0
  7. data/lib/coradoc/coradoc.rb +463 -0
  8. data/lib/coradoc/core_model/annotation_block.rb +57 -0
  9. data/lib/coradoc/core_model/base.rb +172 -0
  10. data/lib/coradoc/core_model/bibliography.rb +41 -0
  11. data/lib/coradoc/core_model/bibliography_entry.rb +48 -0
  12. data/lib/coradoc/core_model/block.rb +63 -0
  13. data/lib/coradoc/core_model/children_content.rb +53 -0
  14. data/lib/coradoc/core_model/comment_block.rb +10 -0
  15. data/lib/coradoc/core_model/definition_item.rb +46 -0
  16. data/lib/coradoc/core_model/definition_list.rb +28 -0
  17. data/lib/coradoc/core_model/element_attribute.rb +26 -0
  18. data/lib/coradoc/core_model/example_block.rb +10 -0
  19. data/lib/coradoc/core_model/footnote.rb +92 -0
  20. data/lib/coradoc/core_model/horizontal_rule_block.rb +10 -0
  21. data/lib/coradoc/core_model/id_generator.rb +16 -0
  22. data/lib/coradoc/core_model/image.rb +66 -0
  23. data/lib/coradoc/core_model/inline_element.rb +140 -0
  24. data/lib/coradoc/core_model/list_block.rb +135 -0
  25. data/lib/coradoc/core_model/list_item.rb +142 -0
  26. data/lib/coradoc/core_model/listing_block.rb +13 -0
  27. data/lib/coradoc/core_model/literal_block.rb +10 -0
  28. data/lib/coradoc/core_model/metadata.rb +79 -0
  29. data/lib/coradoc/core_model/open_block.rb +10 -0
  30. data/lib/coradoc/core_model/paragraph_block.rb +10 -0
  31. data/lib/coradoc/core_model/pass_block.rb +10 -0
  32. data/lib/coradoc/core_model/quote_block.rb +12 -0
  33. data/lib/coradoc/core_model/reviewer_block.rb +10 -0
  34. data/lib/coradoc/core_model/sidebar_block.rb +10 -0
  35. data/lib/coradoc/core_model/source_block.rb +10 -0
  36. data/lib/coradoc/core_model/structural_element.rb +94 -0
  37. data/lib/coradoc/core_model/table.rb +148 -0
  38. data/lib/coradoc/core_model/term.rb +53 -0
  39. data/lib/coradoc/core_model/text_content.rb +22 -0
  40. data/lib/coradoc/core_model/toc.rb +105 -0
  41. data/lib/coradoc/core_model/toc_generator.rb +151 -0
  42. data/lib/coradoc/core_model/verse_block.rb +12 -0
  43. data/lib/coradoc/core_model.rb +77 -0
  44. data/lib/coradoc/document_builder.rb +184 -0
  45. data/lib/coradoc/document_manipulator.rb +203 -0
  46. data/lib/coradoc/errors.rb +312 -0
  47. data/lib/coradoc/format_module.rb +49 -0
  48. data/lib/coradoc/hooks.rb +176 -0
  49. data/lib/coradoc/input.rb +17 -7
  50. data/lib/coradoc/logger.rb +54 -0
  51. data/lib/coradoc/output.rb +17 -6
  52. data/lib/coradoc/performance_regression.rb +109 -0
  53. data/lib/coradoc/processor_registry.rb +50 -0
  54. data/lib/coradoc/query.rb +455 -0
  55. data/lib/coradoc/registry.rb +156 -0
  56. data/lib/coradoc/serializer/registry.rb +150 -0
  57. data/lib/coradoc/transform.rb +11 -0
  58. data/lib/coradoc/validation.rb +646 -0
  59. data/lib/coradoc/version.rb +1 -1
  60. data/lib/coradoc/visitor.rb +283 -0
  61. data/lib/coradoc.rb +40 -19
  62. metadata +67 -277
  63. data/.editorconfig +0 -15
  64. data/.envrc +0 -1
  65. data/.irbrc +0 -1
  66. data/.pryrc.sample +0 -1
  67. data/.rubocop.yml +0 -14
  68. data/.rubocop_todo.yml +0 -179
  69. data/CHANGELOG.md +0 -9
  70. data/CODE_OF_CONDUCT.md +0 -84
  71. data/Dockerfile +0 -19
  72. data/Gemfile +0 -16
  73. data/LICENSE.txt +0 -21
  74. data/Makefile +0 -35
  75. data/README.Docker.adoc +0 -57
  76. data/README.adoc +0 -119
  77. data/coradoc.gemspec +0 -40
  78. data/docker-compose.yml +0 -14
  79. data/exe/reverse_adoc +0 -81
  80. data/exe/w2a +0 -60
  81. data/flake.lock +0 -114
  82. data/flake.nix +0 -135
  83. data/lib/coradoc/converter.rb +0 -144
  84. data/lib/coradoc/document.rb +0 -77
  85. data/lib/coradoc/element/admonition.rb +0 -18
  86. data/lib/coradoc/element/attribute.rb +0 -36
  87. data/lib/coradoc/element/attribute_list.rb +0 -138
  88. data/lib/coradoc/element/audio.rb +0 -33
  89. data/lib/coradoc/element/author.rb +0 -24
  90. data/lib/coradoc/element/base.rb +0 -92
  91. data/lib/coradoc/element/bibliography.rb +0 -24
  92. data/lib/coradoc/element/bibliography_entry.rb +0 -24
  93. data/lib/coradoc/element/block/core.rb +0 -76
  94. data/lib/coradoc/element/block/example.rb +0 -23
  95. data/lib/coradoc/element/block/listing.rb +0 -21
  96. data/lib/coradoc/element/block/literal.rb +0 -21
  97. data/lib/coradoc/element/block/open.rb +0 -22
  98. data/lib/coradoc/element/block/pass.rb +0 -21
  99. data/lib/coradoc/element/block/quote.rb +0 -19
  100. data/lib/coradoc/element/block/reviewer_comment.rb +0 -19
  101. data/lib/coradoc/element/block/side.rb +0 -19
  102. data/lib/coradoc/element/block/sourcecode.rb +0 -21
  103. data/lib/coradoc/element/block.rb +0 -17
  104. data/lib/coradoc/element/break.rb +0 -11
  105. data/lib/coradoc/element/comment_block.rb +0 -22
  106. data/lib/coradoc/element/comment_line.rb +0 -18
  107. data/lib/coradoc/element/document_attributes.rb +0 -33
  108. data/lib/coradoc/element/header.rb +0 -22
  109. data/lib/coradoc/element/image/block_image.rb +0 -32
  110. data/lib/coradoc/element/image/core.rb +0 -58
  111. data/lib/coradoc/element/image/inline_image.rb +0 -12
  112. data/lib/coradoc/element/image.rb +0 -10
  113. data/lib/coradoc/element/include.rb +0 -18
  114. data/lib/coradoc/element/inline/anchor.rb +0 -19
  115. data/lib/coradoc/element/inline/attribute_reference.rb +0 -19
  116. data/lib/coradoc/element/inline/bold.rb +0 -25
  117. data/lib/coradoc/element/inline/cross_reference.rb +0 -46
  118. data/lib/coradoc/element/inline/footnote.rb +0 -24
  119. data/lib/coradoc/element/inline/hard_line_break.rb +0 -11
  120. data/lib/coradoc/element/inline/highlight.rb +0 -25
  121. data/lib/coradoc/element/inline/italic.rb +0 -25
  122. data/lib/coradoc/element/inline/link.rb +0 -42
  123. data/lib/coradoc/element/inline/monospace.rb +0 -25
  124. data/lib/coradoc/element/inline/quotation.rb +0 -20
  125. data/lib/coradoc/element/inline/small.rb +0 -19
  126. data/lib/coradoc/element/inline/span.rb +0 -37
  127. data/lib/coradoc/element/inline/subscript.rb +0 -20
  128. data/lib/coradoc/element/inline/superscript.rb +0 -20
  129. data/lib/coradoc/element/inline/underline.rb +0 -19
  130. data/lib/coradoc/element/inline.rb +0 -23
  131. data/lib/coradoc/element/list/core.rb +0 -51
  132. data/lib/coradoc/element/list/definition.rb +0 -29
  133. data/lib/coradoc/element/list/ordered.rb +0 -17
  134. data/lib/coradoc/element/list/unordered.rb +0 -17
  135. data/lib/coradoc/element/list.rb +0 -13
  136. data/lib/coradoc/element/list_item.rb +0 -98
  137. data/lib/coradoc/element/list_item_definition.rb +0 -32
  138. data/lib/coradoc/element/paragraph.rb +0 -37
  139. data/lib/coradoc/element/revision.rb +0 -27
  140. data/lib/coradoc/element/section.rb +0 -62
  141. data/lib/coradoc/element/table.rb +0 -91
  142. data/lib/coradoc/element/tag.rb +0 -19
  143. data/lib/coradoc/element/term.rb +0 -22
  144. data/lib/coradoc/element/text_element.rb +0 -92
  145. data/lib/coradoc/element/title.rb +0 -62
  146. data/lib/coradoc/element/video.rb +0 -50
  147. data/lib/coradoc/generator.rb +0 -19
  148. data/lib/coradoc/input/adoc.rb +0 -30
  149. data/lib/coradoc/input/docx.rb +0 -64
  150. data/lib/coradoc/input/html/LICENSE.txt +0 -25
  151. data/lib/coradoc/input/html/README.adoc +0 -308
  152. data/lib/coradoc/input/html/cleaner.rb +0 -142
  153. data/lib/coradoc/input/html/config.rb +0 -77
  154. data/lib/coradoc/input/html/converters/a.rb +0 -52
  155. data/lib/coradoc/input/html/converters/aside.rb +0 -16
  156. data/lib/coradoc/input/html/converters/audio.rb +0 -29
  157. data/lib/coradoc/input/html/converters/base.rb +0 -108
  158. data/lib/coradoc/input/html/converters/blockquote.rb +0 -22
  159. data/lib/coradoc/input/html/converters/br.rb +0 -15
  160. data/lib/coradoc/input/html/converters/bypass.rb +0 -81
  161. data/lib/coradoc/input/html/converters/code.rb +0 -23
  162. data/lib/coradoc/input/html/converters/div.rb +0 -19
  163. data/lib/coradoc/input/html/converters/dl.rb +0 -62
  164. data/lib/coradoc/input/html/converters/drop.rb +0 -26
  165. data/lib/coradoc/input/html/converters/em.rb +0 -21
  166. data/lib/coradoc/input/html/converters/figure.rb +0 -25
  167. data/lib/coradoc/input/html/converters/h.rb +0 -42
  168. data/lib/coradoc/input/html/converters/head.rb +0 -23
  169. data/lib/coradoc/input/html/converters/hr.rb +0 -15
  170. data/lib/coradoc/input/html/converters/ignore.rb +0 -20
  171. data/lib/coradoc/input/html/converters/img.rb +0 -110
  172. data/lib/coradoc/input/html/converters/li.rb +0 -17
  173. data/lib/coradoc/input/html/converters/mark.rb +0 -19
  174. data/lib/coradoc/input/html/converters/markup.rb +0 -31
  175. data/lib/coradoc/input/html/converters/math.rb +0 -38
  176. data/lib/coradoc/input/html/converters/ol.rb +0 -65
  177. data/lib/coradoc/input/html/converters/p.rb +0 -23
  178. data/lib/coradoc/input/html/converters/pass_through.rb +0 -17
  179. data/lib/coradoc/input/html/converters/pre.rb +0 -55
  180. data/lib/coradoc/input/html/converters/q.rb +0 -16
  181. data/lib/coradoc/input/html/converters/strong.rb +0 -20
  182. data/lib/coradoc/input/html/converters/sub.rb +0 -22
  183. data/lib/coradoc/input/html/converters/sup.rb +0 -22
  184. data/lib/coradoc/input/html/converters/table.rb +0 -319
  185. data/lib/coradoc/input/html/converters/td.rb +0 -81
  186. data/lib/coradoc/input/html/converters/text.rb +0 -32
  187. data/lib/coradoc/input/html/converters/th.rb +0 -18
  188. data/lib/coradoc/input/html/converters/tr.rb +0 -22
  189. data/lib/coradoc/input/html/converters/video.rb +0 -29
  190. data/lib/coradoc/input/html/converters.rb +0 -59
  191. data/lib/coradoc/input/html/errors.rb +0 -14
  192. data/lib/coradoc/input/html/html_converter.rb +0 -168
  193. data/lib/coradoc/input/html/plugin.rb +0 -131
  194. data/lib/coradoc/input/html/plugins/plateau.rb +0 -213
  195. data/lib/coradoc/input/html/postprocessor.rb +0 -220
  196. data/lib/coradoc/input/html.rb +0 -61
  197. data/lib/coradoc/legacy_parser.rb +0 -200
  198. data/lib/coradoc/oscal.rb +0 -99
  199. data/lib/coradoc/output/adoc.rb +0 -19
  200. data/lib/coradoc/output/coradoc_tree_debug.rb +0 -21
  201. data/lib/coradoc/parser/asciidoc/admonition.rb +0 -24
  202. data/lib/coradoc/parser/asciidoc/attribute_list.rb +0 -89
  203. data/lib/coradoc/parser/asciidoc/base.rb +0 -87
  204. data/lib/coradoc/parser/asciidoc/bibliography.rb +0 -29
  205. data/lib/coradoc/parser/asciidoc/block.rb +0 -94
  206. data/lib/coradoc/parser/asciidoc/citation.rb +0 -30
  207. data/lib/coradoc/parser/asciidoc/content.rb +0 -64
  208. data/lib/coradoc/parser/asciidoc/document_attributes.rb +0 -25
  209. data/lib/coradoc/parser/asciidoc/header.rb +0 -29
  210. data/lib/coradoc/parser/asciidoc/inline.rb +0 -195
  211. data/lib/coradoc/parser/asciidoc/list.rb +0 -115
  212. data/lib/coradoc/parser/asciidoc/paragraph.rb +0 -54
  213. data/lib/coradoc/parser/asciidoc/section.rb +0 -61
  214. data/lib/coradoc/parser/asciidoc/table.rb +0 -32
  215. data/lib/coradoc/parser/asciidoc/term.rb +0 -41
  216. data/lib/coradoc/parser/asciidoc/text.rb +0 -158
  217. data/lib/coradoc/parser/base.rb +0 -40
  218. data/lib/coradoc/parser.rb +0 -11
  219. data/lib/coradoc/reverse_adoc.rb +0 -18
  220. data/lib/coradoc/transformer.rb +0 -476
  221. data/lib/coradoc/util.rb +0 -12
  222. data/lib/reverse_adoc.rb +0 -20
  223. data/utils/inspect_asciidoc.rb +0 -29
  224. data/utils/parser_analyzer.rb +0 -66
  225. data/utils/round_trip.rb +0 -53
@@ -0,0 +1,463 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lutaml/model'
4
+
5
+ # Coradoc - A hub-and-spoke document transformation library
6
+ #
7
+ # Coradoc provides a unified document model (CoreModel) and transformation
8
+ # infrastructure for converting between document formats such as AsciiDoc,
9
+ # HTML, and Markdown.
10
+ #
11
+ # ## Architecture
12
+ #
13
+ # Coradoc uses a hub-and-spoke architecture where CoreModel acts as the
14
+ # canonical document representation. Each format (AsciiDoc, HTML, Markdown)
15
+ # has its own model and transformers to/from CoreModel.
16
+ #
17
+ # ```
18
+ # Source Format → Source Model → CoreModel → Target Model → Target Format
19
+ # ```
20
+ #
21
+ # ## Quick Start
22
+ #
23
+ # @example Parsing documents
24
+ # require 'coradoc'
25
+ #
26
+ # # Parse AsciiDoc to CoreModel
27
+ # doc = Coradoc.parse("= Title\n\nContent", format: :asciidoc)
28
+ #
29
+ # @example Converting between formats
30
+ # # Convert AsciiDoc to HTML
31
+ # html = Coradoc.convert(adoc_text, from: :asciidoc, to: :html)
32
+ #
33
+ # # Convert Markdown to AsciiDoc
34
+ # adoc = Coradoc.convert(md_text, from: :markdown, to: :asciidoc)
35
+ #
36
+ # @example Using the hooks system
37
+ # Coradoc::Hooks.register(:before_parse) do |content, format:|
38
+ # puts "Parsing #{format} document..."
39
+ # content
40
+ # end
41
+ #
42
+ # @see Coradoc::CoreModel The canonical document model
43
+ # @see Coradoc::Hooks Plugin lifecycle hooks system
44
+ # @see Coradoc::FormatModule Interface contract for format modules
45
+ #
46
+ module Coradoc
47
+ # Base error class - defined in errors.rb
48
+ # @see Coradoc::Error Base error class
49
+ # @see Coradoc::ParseError Parsing errors with source context
50
+ # @see Coradoc::ValidationError Document validation errors
51
+ # @see Coradoc::TransformationError Model transformation errors
52
+ # @see Coradoc::UnsupportedFormatError Unsupported format errors
53
+
54
+ class << self
55
+ # Get the format registry
56
+ #
57
+ # @return [Registry] the format registry
58
+ def registry
59
+ @registry ||= Registry.new
60
+ end
61
+
62
+ # Register a format gem
63
+ #
64
+ # @param format_name [Symbol] the format name (e.g., :asciidoc, :html, :markdown)
65
+ # @param format_module [Module] the format module
66
+ # @param options [Hash] optional configuration (e.g., extensions: [])
67
+ # @return [void]
68
+ def register_format(format_name, format_module, **options)
69
+ format_module.extend(FormatModule::Interface) unless format_module.is_a?(FormatModule::Interface)
70
+ registry.register(format_name, format_module, options)
71
+ FormatModule.validate!(format_module, format_name)
72
+ end
73
+
74
+ # Get a registered format
75
+ #
76
+ # @param format_name [Symbol] the format name
77
+ # @return [Module, nil] the format module or nil if not found
78
+ def get_format(format_name)
79
+ registry.get(format_name)
80
+ end
81
+
82
+ # List all registered formats
83
+ #
84
+ # @return [Array<Symbol>] list of registered format names
85
+ def registered_formats
86
+ registry.list
87
+ end
88
+
89
+ # Parse text to a document model
90
+ #
91
+ # This is the main entry point for parsing documents. It automatically
92
+ # selects the appropriate parser based on the format.
93
+ #
94
+ # @param text [String] the document text to parse
95
+ # @param format [Symbol] the source format (:asciidoc, :html, :markdown)
96
+ # @return [Coradoc::CoreModel::Base, Object] the parsed document model
97
+ # @raise [UnsupportedFormatError] if the format is not registered
98
+ #
99
+ # @example Parse AsciiDoc
100
+ # doc = Coradoc.parse("= Title\n\nContent", format: :asciidoc)
101
+ # doc = Coradoc.parse(File.read("doc.adoc"), format: :asciidoc)
102
+ #
103
+ # @example Parse and get CoreModel
104
+ # core = Coradoc.parse(text, format: :asciidoc) # Returns CoreModel
105
+ def parse(text, format:)
106
+ format_module = get_format(format)
107
+ unless format_module
108
+ raise UnsupportedFormatError,
109
+ "Format '#{format}' is not registered. " \
110
+ "Available formats: #{registered_formats.join(', ')}"
111
+ end
112
+
113
+ text = Hooks.invoke(:before_parse, text, format: format)
114
+ result = format_module.parse_to_core(text)
115
+ Hooks.invoke(:after_parse, result, format: format)
116
+ end
117
+
118
+ # Convert document text from one format to another
119
+ #
120
+ # This is the main entry point for format conversion. It handles the
121
+ # complete pipeline: parse -> transform to CoreModel -> transform to target -> serialize
122
+ #
123
+ # @param text [String] the source document text
124
+ # @param from [Symbol] the source format (:asciidoc, :html, :markdown)
125
+ # @param to [Symbol] the target format (:asciidoc, :html, :markdown)
126
+ # @param options [Hash] additional options for the conversion
127
+ # @return [String] the converted document text
128
+ # @raise [UnsupportedFormatError] if a format is not registered
129
+ #
130
+ # @example Convert AsciiDoc to HTML
131
+ # html = Coradoc.convert(adoc_text, from: :asciidoc, to: :html)
132
+ #
133
+ # @example Convert HTML to AsciiDoc
134
+ # adoc = Coradoc.convert(html_text, from: :html, to: :asciidoc)
135
+ def convert(text, from:, to:, **options)
136
+ # Parse to CoreModel
137
+ core = parse(text, format: from)
138
+
139
+ # Convert to target format
140
+ serialize(core, to: to, **options)
141
+ end
142
+
143
+ # Transform a model to CoreModel
144
+ #
145
+ # @param model [Object] a format-specific model
146
+ # @return [Coradoc::CoreModel::Base] the CoreModel representation
147
+ def to_core(model)
148
+ return model if model.is_a?(CoreModel::Base)
149
+
150
+ registry.each_value do |format_module|
151
+ next unless format_module.handles_model?(model)
152
+
153
+ return format_module.to_core(model)
154
+ end
155
+
156
+ raise TransformationError, "No transformer found for #{model.class}"
157
+ end
158
+
159
+ # Serialize a CoreModel to a specific format
160
+ #
161
+ # @param model [Coradoc::CoreModel::Base] the CoreModel to serialize
162
+ # @param to [Symbol] the target format
163
+ # @param options [Hash] additional options
164
+ # @return [String] the serialized document
165
+ def serialize(model, to:, **options)
166
+ format_module = get_format(to)
167
+ raise UnsupportedFormatError, "Format '#{to}' is not registered" unless format_module
168
+
169
+ model = Hooks.invoke(:before_serialize, model, format: to)
170
+ result = format_module.serialize(model, **options)
171
+ Hooks.invoke(:after_serialize, result, format: to)
172
+ end
173
+
174
+ # Create a DocumentManipulator for chainable operations
175
+ #
176
+ # @param document [Coradoc::CoreModel::Base] the document to manipulate
177
+ # @return [DocumentManipulator] a new manipulator instance
178
+ #
179
+ # @example Chainable document manipulation
180
+ # html = Coradoc.manipulate(doc)
181
+ # .transform_text(&:upcase)
182
+ # .add_toc
183
+ # .to_html
184
+ def manipulate(document)
185
+ DocumentManipulator.new(document)
186
+ end
187
+
188
+ # Detect format from a file extension
189
+ #
190
+ # @param filename [String] Filename or extension to detect
191
+ # @return [Symbol, nil] the detected format symbol
192
+ #
193
+ # @example
194
+ # Coradoc.detect_format("document.adoc") # => :asciidoc
195
+ # Coradoc.detect_format("file.md") # => :markdown
196
+ def detect_format(filename)
197
+ ext = File.extname(filename).downcase
198
+ registry.each_key do |name|
199
+ opts = registry.options_for(name)
200
+ return name if opts[:extensions]&.include?(ext)
201
+ end
202
+ nil
203
+ end
204
+
205
+ # Parse a document from a file path
206
+ #
207
+ # Handles both text formats (reads file content) and binary formats
208
+ # (passes file path directly to the format module).
209
+ #
210
+ # @param path [String] path to the document file
211
+ # @param format [Symbol, nil] source format (auto-detected if nil)
212
+ # @return [Coradoc::CoreModel::Base] the parsed CoreModel document
213
+ # @raise [UnsupportedFormatError] if format is not detected or registered
214
+ #
215
+ # @example
216
+ # doc = Coradoc.parse_file("document.adoc")
217
+ # doc = Coradoc.parse_file("report.docx", format: :docx)
218
+ def parse_file(path, format: nil)
219
+ raise FileNotFoundError, path unless File.exist?(path)
220
+
221
+ source_format = format || detect_format(path)
222
+ raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
223
+
224
+ format_module = get_format(source_format)
225
+ raise UnsupportedFormatError, "Format '#{source_format}' is not registered" unless format_module
226
+
227
+ if binary_format?(source_format)
228
+ format_module.parse_to_core(path)
229
+ else
230
+ content = File.read(path)
231
+ parse(content, format: source_format)
232
+ end
233
+ end
234
+
235
+ # Convert a file from one format to another
236
+ #
237
+ # @param path [String] path to the source document file
238
+ # @param from [Symbol, nil] source format (auto-detected if nil)
239
+ # @param to [Symbol] target format
240
+ # @param options [Hash] additional options
241
+ # @return [String] the converted document text
242
+ #
243
+ # @example
244
+ # html = Coradoc.convert_file("document.adoc", to: :html)
245
+ # adoc = Coradoc.convert_file("report.docx", to: :asciidoc)
246
+ def convert_file(path, to:, from: nil, **options)
247
+ source_format = from || detect_format(path)
248
+ raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
249
+
250
+ core = parse_file(path, format: source_format)
251
+ serialize(core, to: to, **options)
252
+ end
253
+
254
+ # Check if a format requires binary (file path) input
255
+ #
256
+ # @param format [Symbol] the format to check
257
+ # @return [Boolean] true if the format is binary
258
+ def binary_format?(format)
259
+ opts = registry.options_for(format)
260
+ opts&.fetch(:binary, false) == true
261
+ end
262
+
263
+ # Normalize a format name string to a symbol
264
+ #
265
+ # Handles common aliases like "adoc" → :asciidoc, "md" → :markdown.
266
+ #
267
+ # @param name [String, Symbol, nil] the format name to normalize
268
+ # @return [Symbol, nil] the normalized format symbol, or nil
269
+ def normalize_format(name)
270
+ return nil unless name
271
+
272
+ key = name.to_s.downcase
273
+ registry.each_key do |fmt_name|
274
+ opts = registry.options_for(fmt_name)
275
+ return fmt_name if opts[:aliases]&.include?(key)
276
+ end
277
+ key.to_sym
278
+ end
279
+
280
+ # Check if a format supports serialization (writing output)
281
+ #
282
+ # @param format [Symbol] the format to check
283
+ # @return [Boolean] true if the format can serialize
284
+ def serialize_format?(format)
285
+ mod = get_format(format)
286
+ return false unless mod
287
+
288
+ mod.serialize?
289
+ end
290
+
291
+ # Check if a format supports parsing (reading input)
292
+ #
293
+ # @param format [Symbol] the format to check
294
+ # @return [Boolean] true if the format can parse
295
+ def parse_format?(format)
296
+ mod = get_format(format)
297
+ return false unless mod
298
+
299
+ mod.public_methods.include?(:parse_to_core) || mod.public_methods.include?(:parse)
300
+ end
301
+
302
+ # Get capability summary for all registered formats
303
+ #
304
+ # Returns a hash mapping each format name to its capabilities
305
+ # (parse: bool, serialize: bool). Useful for CLI display and introspection.
306
+ #
307
+ # @return [Hash<Symbol, Hash<Symbol, Boolean>>]
308
+ def format_capabilities
309
+ registered_formats.each_with_object({}) do |name, caps|
310
+ caps[name] = {
311
+ parse: parse_format?(name),
312
+ serialize: serialize_format?(name)
313
+ }
314
+ end
315
+ end
316
+
317
+ # Resolve the output format from a filename, with a default
318
+ #
319
+ # @param output_file [String, nil] output filename to detect from
320
+ # @param default [Symbol] default format when detection fails (default: :html)
321
+ # @return [Symbol] the resolved format
322
+ def resolve_output_format(output_file, default: :html)
323
+ return default unless output_file
324
+
325
+ detect_format(output_file) || default
326
+ end
327
+
328
+ # Get file metadata for display
329
+ #
330
+ # @param path [String] path to the file
331
+ # @return [Hash] metadata including :size, :format, and :lines (for text formats)
332
+ def file_info(path)
333
+ fmt = detect_format(path)
334
+ info = { size: File.size(path), format: fmt }
335
+ info[:lines] = File.read(path).lines.count unless binary_format?(fmt)
336
+ info
337
+ end
338
+
339
+ # Validate a document file
340
+ #
341
+ # Parses the file and validates against auto-generated schema.
342
+ # Returns a Coradoc::Validation::Result.
343
+ #
344
+ # @param path [String] path to the document file
345
+ # @param format [Symbol, nil] source format (auto-detected if nil)
346
+ # @return [Coradoc::Validation::Result] validation result
347
+ # @raise [UnsupportedFormatError] if format is not detected or registered
348
+ def validate_file(path, format: nil)
349
+ doc = parse_file(path, format: format)
350
+
351
+ schema = Validation::SchemaGenerator.generate(doc.class)
352
+ return schema.validate(doc) if schema
353
+
354
+ Validation::Result.new
355
+ end
356
+
357
+ # Gather statistics about a parsed document
358
+ #
359
+ # @param doc [CoreModel::Base] parsed document
360
+ # @return [Hash] statistics including element counts, title, etc.
361
+ def document_stats(doc)
362
+ stats = {}
363
+
364
+ stats[:title] = doc.title if doc.title
365
+
366
+ if doc.is_a?(CoreModel::StructuralElement)
367
+ stats[:child_count] = count_elements(doc)
368
+ stats[:element_counts] = count_element_types(doc)
369
+ end
370
+
371
+ stats
372
+ end
373
+
374
+ # Describe an element for display
375
+ #
376
+ # @param elem [Object] element to describe
377
+ # @return [String] human-readable description
378
+ def describe_element(elem)
379
+ return elem.to_s unless elem.is_a?(CoreModel::Base)
380
+
381
+ type = elem.class.name.split('::').last
382
+ if elem.title
383
+ "#{type}: #{elem.title}"
384
+ elsif elem.is_a?(CoreModel::Block) && elem.content
385
+ preview = elem.content.to_s[0..50]
386
+ preview += '...' if elem.content.to_s.length > 50
387
+ "#{type}: #{preview}"
388
+ else
389
+ type
390
+ end
391
+ end
392
+
393
+ # Strip unicode whitespace from a string
394
+ #
395
+ # @param string [String] the string to strip
396
+ # @param only [Symbol, nil] what to strip: :begin, :end, or nil for both
397
+ # @return [String] the stripped string
398
+ def strip_unicode(string, only: nil)
399
+ return string if string.nil?
400
+
401
+ case only
402
+ when :begin
403
+ string.sub(/^\p{Zs}+/, '')
404
+ when :end
405
+ string.sub(/\p{Zs}+$/, '')
406
+ else
407
+ string.sub(/^\p{Zs}+/, '').sub(/\p{Zs}+$/, '')
408
+ end
409
+ end
410
+
411
+ private
412
+
413
+ def count_elements(doc)
414
+ return 0 unless doc.is_a?(CoreModel::StructuralElement)
415
+
416
+ doc.children.sum do |child|
417
+ 1 + (child.is_a?(CoreModel::StructuralElement) ? count_elements(child) : 0)
418
+ end
419
+ end
420
+
421
+ def count_element_types(doc)
422
+ counts = Hash.new(0)
423
+ visitor = Class.new(Visitor::Base) do
424
+ define_method(:visit) do |element|
425
+ if element.is_a?(CoreModel::Base)
426
+ has_element_type = element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)
427
+ type_key = if has_element_type && element.element_type
428
+ element.element_type
429
+ else
430
+ element.class.name.split('::').last
431
+ .gsub(/([A-Z])/, '_\1').downcase.sub(/^_/, '')
432
+ end
433
+ counts[type_key] += 1
434
+ end
435
+ super(element)
436
+ end
437
+ end.new
438
+ visitor.visit(doc)
439
+ counts.reject! { |_, v| v.zero? }
440
+ counts
441
+ end
442
+ end
443
+
444
+ autoload :Error, "#{__dir__}/errors"
445
+ autoload :Version, "#{__dir__}/version"
446
+ autoload :Logger, "#{__dir__}/logger"
447
+ autoload :Hooks, "#{__dir__}/hooks"
448
+ autoload :Query, "#{__dir__}/query"
449
+ autoload :Validation, "#{__dir__}/validation"
450
+ autoload :Configurable, "#{__dir__}/configurable"
451
+ autoload :FormatModule, "#{__dir__}/format_module"
452
+ autoload :CoreModel, "#{__dir__}/core_model"
453
+ autoload :Registry, "#{__dir__}/registry"
454
+ autoload :Transform, "#{__dir__}/transform"
455
+ autoload :Input, "#{__dir__}/input"
456
+ autoload :Output, "#{__dir__}/output"
457
+ autoload :PerformanceRegression, "#{__dir__}/performance_regression"
458
+ end
459
+
460
+ # Format gems self-register via Coradoc.register_format when they are required.
461
+ # No hardcoded registration needed here — each gem's entry file handles its own
462
+ # registration (e.g., coradoc-adoc/lib/coradoc/asciidoc.rb calls
463
+ # Coradoc.register_format(:asciidoc, Coradoc::AsciiDoc)).
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # Specialized block for annotations and admonitions
6
+ #
7
+ # Represents annotation blocks that have special semantic meaning:
8
+ # - NOTE
9
+ # - WARNING
10
+ # - CAUTION
11
+ # - IMPORTANT
12
+ # - TIP
13
+ # - Reviewer notes
14
+ # - Sidebar blocks (when used for annotations)
15
+ #
16
+ # This class extends Block to add annotation-specific attributes that
17
+ # distinguish these blocks semantically from generic delimited blocks.
18
+ #
19
+ # @example Creating a NOTE annotation
20
+ # note = CoreModel::AnnotationBlock.new(
21
+ # annotation_type: "note",
22
+ # content: "This is important information."
23
+ # )
24
+ #
25
+ # @example Creating a reviewer note
26
+ # reviewer = CoreModel::AnnotationBlock.new(
27
+ # annotation_type: "reviewer",
28
+ # annotation_label: "john.doe",
29
+ # content: "Please review this section."
30
+ # )
31
+ class AnnotationBlock < Block
32
+ # @!attribute annotation_type
33
+ # @return [String, nil] the type of annotation
34
+ # (e.g., 'note', 'warning', 'reviewer', 'sidebar')
35
+ attribute :annotation_type, :string
36
+
37
+ # @!attribute annotation_label
38
+ # @return [String, nil] optional custom label or identifier
39
+ # (e.g., reviewer ID, custom note label)
40
+ attribute :annotation_label, :string
41
+
42
+ private
43
+
44
+ # Attributes to compare for semantic equivalence
45
+ #
46
+ # Annotation blocks are semantically different from generic blocks
47
+ # because they carry additional meaning through annotation_type and
48
+ # annotation_label. Two blocks with different annotation types are
49
+ # not semantically equivalent even if their content is identical.
50
+ #
51
+ # @return [Array<Symbol>] list of comparable attributes
52
+ def comparable_attributes
53
+ super + %i[annotation_type annotation_label]
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lutaml/model'
4
+
5
+ module Coradoc
6
+ module CoreModel
7
+ # Base class for all core models
8
+ #
9
+ # Provides common functionality for schema-agnostic document models.
10
+ # This class establishes the foundational structure for all CoreModel
11
+ # classes, including semantic equivalence comparison and common
12
+ # attributes.
13
+ #
14
+ # @example Creating a base model
15
+ # model = CoreModel::Base.new(
16
+ # id: "example-1",
17
+ # title: "Example Title",
18
+ # element_attributes: [
19
+ # CoreModel::ElementAttribute.new(name: "role", value: "note")
20
+ # ]
21
+ # )
22
+ #
23
+ # @example Semantic comparison
24
+ # model1 = CoreModel::Base.new(id: "test", title: "Test")
25
+ # model2 = CoreModel::Base.new(id: "test", title: "Test")
26
+ # model1.semantically_equivalent?(model2) # => true
27
+ class Base < Lutaml::Model::Serializable
28
+ # @!attribute id
29
+ # @return [String, nil] unique identifier for the element
30
+ attribute :id, :string
31
+
32
+ # @!attribute title
33
+ # @return [String, nil] title of the element
34
+ attribute :title, :string
35
+
36
+ # @!attribute element_attributes
37
+ # @return [Array<ElementAttribute>] collection of element attributes
38
+ attribute :element_attributes, ElementAttribute, collection: true
39
+
40
+ # @!attribute metadata_entries
41
+ # @return [Array<MetadataEntry>] additional metadata entries
42
+ attribute :metadata_entries, MetadataEntry, collection: true
43
+
44
+ # Get all metadata as a hash, or a specific metadata value by key
45
+ # @overload metadata
46
+ # @return [Hash] All metadata as key-value pairs
47
+ # @overload metadata(key)
48
+ # @param key [String] The metadata key
49
+ # @return [String, nil] The value or nil
50
+ def metadata(key = nil)
51
+ entries = metadata_entries || []
52
+ if key.nil?
53
+ # Return all metadata as hash
54
+ entries.each_with_object({}) { |e, h| h[e.key] = e.value }
55
+ else
56
+ # Return specific value
57
+ entries.find { |e| e.key == key }&.value
58
+ end
59
+ end
60
+
61
+ # Convenience method to set metadata
62
+ # @param key [String] The metadata key
63
+ # @param value [String] The value to set
64
+ def set_metadata(key, value)
65
+ self.metadata_entries ||= []
66
+ existing = metadata_entries.find { |e| e.key == key }
67
+ if existing
68
+ existing.value = value
69
+ else
70
+ metadata_entries << MetadataEntry.new(key: key, value: value)
71
+ end
72
+ end
73
+
74
+ # Get all element attributes as a hash, or a specific attribute value by name
75
+ # @overload attr
76
+ # @return [Hash] All attributes as key-value pairs
77
+ # @overload attr(name)
78
+ # @param name [String] The attribute name
79
+ # @return [String, nil] The value or nil
80
+ def attr(name = nil)
81
+ attrs = element_attributes || []
82
+ if name.nil?
83
+ # Return all attributes as hash
84
+ attrs.each_with_object({}) { |a, h| h[a.name] = a.value }
85
+ else
86
+ # Return specific value
87
+ attrs.find { |a| a.name == name }&.value
88
+ end
89
+ end
90
+
91
+ # Set attribute value
92
+ # @param name [String] The attribute name
93
+ # @param value [String] The value to set
94
+ def set_attr(name, value)
95
+ self.element_attributes ||= []
96
+ existing = element_attributes.find { |a| a.name == name }
97
+ if existing
98
+ existing.value = value
99
+ else
100
+ element_attributes << ElementAttribute.new(name: name, value: value)
101
+ end
102
+ end
103
+
104
+ # Compare this model with another for semantic equivalence
105
+ #
106
+ # Semantic equivalence means the models represent the same semantic
107
+ # content, even if their exact structure differs. This is different
108
+ # from equality, which requires exact matching.
109
+ #
110
+ # @param other [Object] the object to compare with
111
+ # @return [Boolean] true if semantically equivalent, false otherwise
112
+ def semantically_equivalent?(other)
113
+ return false unless other.is_a?(self.class)
114
+
115
+ comparable_attributes.all? do |attr|
116
+ compare_attribute(attr, other)
117
+ end
118
+ end
119
+
120
+ # Accept a visitor to traverse this element
121
+ #
122
+ # Implements the visitor pattern for document traversal.
123
+ # The visitor's visit method will be called with this element.
124
+ #
125
+ # @param visitor [Coradoc::Visitor::Base] Visitor to accept
126
+ # @return [void]
127
+ def accept(visitor)
128
+ visitor.visit(self)
129
+ end
130
+
131
+ private
132
+
133
+ # List of attributes to compare for semantic equivalence
134
+ #
135
+ # Override in subclasses to define which attributes matter for
136
+ # equivalence. By default, only id and title are compared.
137
+ #
138
+ # @return [Array<Symbol>] list of attribute names to compare
139
+ def comparable_attributes
140
+ %i[id title]
141
+ end
142
+
143
+ # Compare a single attribute between this model and another
144
+ def compare_attribute(attr, other)
145
+ self_value = public_send(attr)
146
+ other_value = other.public_send(attr)
147
+
148
+ case self_value
149
+ when Array
150
+ compare_arrays(self_value, other_value)
151
+ when Base
152
+ self_value.semantically_equivalent?(other_value)
153
+ else
154
+ self_value == other_value
155
+ end
156
+ end
157
+
158
+ # Compare two arrays for semantic equivalence
159
+ def compare_arrays(arr1, arr2)
160
+ return false unless arr1.size == arr2.size
161
+
162
+ arr1.zip(arr2).all? do |item1, item2|
163
+ if item1.is_a?(Base)
164
+ item1.semantically_equivalent?(item2)
165
+ else
166
+ item1 == item2
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end