coradoc 1.1.8 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +1 -1
  3. data/Rakefile +3 -12
  4. data/exe/coradoc +21 -2
  5. data/lib/coradoc/cli.rb +205 -91
  6. data/lib/coradoc/configurable.rb +527 -0
  7. data/lib/coradoc/coradoc.rb +467 -0
  8. data/lib/coradoc/core_model/annotation_block.rb +57 -0
  9. data/lib/coradoc/core_model/base.rb +172 -0
  10. data/lib/coradoc/core_model/bibliography.rb +41 -0
  11. data/lib/coradoc/core_model/bibliography_entry.rb +48 -0
  12. data/lib/coradoc/core_model/block.rb +63 -0
  13. data/lib/coradoc/core_model/children_content.rb +53 -0
  14. data/lib/coradoc/core_model/comment_block.rb +12 -0
  15. data/lib/coradoc/core_model/definition_item.rb +46 -0
  16. data/lib/coradoc/core_model/definition_list.rb +28 -0
  17. data/lib/coradoc/core_model/element_attribute.rb +26 -0
  18. data/lib/coradoc/core_model/example_block.rb +12 -0
  19. data/lib/coradoc/core_model/footnote.rb +92 -0
  20. data/lib/coradoc/core_model/horizontal_rule_block.rb +12 -0
  21. data/lib/coradoc/core_model/id_generator.rb +20 -0
  22. data/lib/coradoc/core_model/image.rb +66 -0
  23. data/lib/coradoc/core_model/inline_element.rb +172 -0
  24. data/lib/coradoc/core_model/list_block.rb +135 -0
  25. data/lib/coradoc/core_model/list_item.rb +142 -0
  26. data/lib/coradoc/core_model/listing_block.rb +15 -0
  27. data/lib/coradoc/core_model/literal_block.rb +12 -0
  28. data/lib/coradoc/core_model/metadata.rb +79 -0
  29. data/lib/coradoc/core_model/open_block.rb +12 -0
  30. data/lib/coradoc/core_model/paragraph_block.rb +12 -0
  31. data/lib/coradoc/core_model/pass_block.rb +12 -0
  32. data/lib/coradoc/core_model/quote_block.rb +14 -0
  33. data/lib/coradoc/core_model/reviewer_block.rb +12 -0
  34. data/lib/coradoc/core_model/sidebar_block.rb +12 -0
  35. data/lib/coradoc/core_model/source_block.rb +12 -0
  36. data/lib/coradoc/core_model/structural_element.rb +111 -0
  37. data/lib/coradoc/core_model/table.rb +148 -0
  38. data/lib/coradoc/core_model/term.rb +53 -0
  39. data/lib/coradoc/core_model/text_content.rb +22 -0
  40. data/lib/coradoc/core_model/toc.rb +105 -0
  41. data/lib/coradoc/core_model/toc_generator.rb +151 -0
  42. data/lib/coradoc/core_model/verse_block.rb +14 -0
  43. data/lib/coradoc/core_model.rb +77 -0
  44. data/lib/coradoc/document_builder.rb +184 -0
  45. data/lib/coradoc/document_manipulator.rb +203 -0
  46. data/lib/coradoc/errors.rb +312 -0
  47. data/lib/coradoc/format_module.rb +60 -0
  48. data/lib/coradoc/hooks.rb +176 -0
  49. data/lib/coradoc/input.rb +17 -7
  50. data/lib/coradoc/logger.rb +54 -0
  51. data/lib/coradoc/output.rb +17 -6
  52. data/lib/coradoc/performance_regression.rb +109 -0
  53. data/lib/coradoc/processor_registry.rb +50 -0
  54. data/lib/coradoc/query.rb +455 -0
  55. data/lib/coradoc/registry.rb +156 -0
  56. data/lib/coradoc/serializer/registry.rb +150 -0
  57. data/lib/coradoc/transform.rb +11 -0
  58. data/lib/coradoc/validation.rb +646 -0
  59. data/lib/coradoc/version.rb +1 -1
  60. data/lib/coradoc/visitor.rb +283 -0
  61. data/lib/coradoc.rb +40 -19
  62. metadata +67 -277
  63. data/.editorconfig +0 -15
  64. data/.envrc +0 -1
  65. data/.irbrc +0 -1
  66. data/.pryrc.sample +0 -1
  67. data/.rubocop.yml +0 -14
  68. data/.rubocop_todo.yml +0 -179
  69. data/CHANGELOG.md +0 -9
  70. data/CODE_OF_CONDUCT.md +0 -84
  71. data/Dockerfile +0 -19
  72. data/Gemfile +0 -16
  73. data/LICENSE.txt +0 -21
  74. data/Makefile +0 -35
  75. data/README.Docker.adoc +0 -57
  76. data/README.adoc +0 -119
  77. data/coradoc.gemspec +0 -40
  78. data/docker-compose.yml +0 -14
  79. data/exe/reverse_adoc +0 -81
  80. data/exe/w2a +0 -60
  81. data/flake.lock +0 -114
  82. data/flake.nix +0 -135
  83. data/lib/coradoc/converter.rb +0 -144
  84. data/lib/coradoc/document.rb +0 -77
  85. data/lib/coradoc/element/admonition.rb +0 -18
  86. data/lib/coradoc/element/attribute.rb +0 -36
  87. data/lib/coradoc/element/attribute_list.rb +0 -138
  88. data/lib/coradoc/element/audio.rb +0 -33
  89. data/lib/coradoc/element/author.rb +0 -24
  90. data/lib/coradoc/element/base.rb +0 -92
  91. data/lib/coradoc/element/bibliography.rb +0 -24
  92. data/lib/coradoc/element/bibliography_entry.rb +0 -24
  93. data/lib/coradoc/element/block/core.rb +0 -76
  94. data/lib/coradoc/element/block/example.rb +0 -23
  95. data/lib/coradoc/element/block/listing.rb +0 -21
  96. data/lib/coradoc/element/block/literal.rb +0 -21
  97. data/lib/coradoc/element/block/open.rb +0 -22
  98. data/lib/coradoc/element/block/pass.rb +0 -21
  99. data/lib/coradoc/element/block/quote.rb +0 -19
  100. data/lib/coradoc/element/block/reviewer_comment.rb +0 -19
  101. data/lib/coradoc/element/block/side.rb +0 -19
  102. data/lib/coradoc/element/block/sourcecode.rb +0 -21
  103. data/lib/coradoc/element/block.rb +0 -17
  104. data/lib/coradoc/element/break.rb +0 -11
  105. data/lib/coradoc/element/comment_block.rb +0 -22
  106. data/lib/coradoc/element/comment_line.rb +0 -18
  107. data/lib/coradoc/element/document_attributes.rb +0 -33
  108. data/lib/coradoc/element/header.rb +0 -22
  109. data/lib/coradoc/element/image/block_image.rb +0 -32
  110. data/lib/coradoc/element/image/core.rb +0 -58
  111. data/lib/coradoc/element/image/inline_image.rb +0 -12
  112. data/lib/coradoc/element/image.rb +0 -10
  113. data/lib/coradoc/element/include.rb +0 -18
  114. data/lib/coradoc/element/inline/anchor.rb +0 -19
  115. data/lib/coradoc/element/inline/attribute_reference.rb +0 -19
  116. data/lib/coradoc/element/inline/bold.rb +0 -25
  117. data/lib/coradoc/element/inline/cross_reference.rb +0 -46
  118. data/lib/coradoc/element/inline/footnote.rb +0 -24
  119. data/lib/coradoc/element/inline/hard_line_break.rb +0 -11
  120. data/lib/coradoc/element/inline/highlight.rb +0 -25
  121. data/lib/coradoc/element/inline/italic.rb +0 -25
  122. data/lib/coradoc/element/inline/link.rb +0 -42
  123. data/lib/coradoc/element/inline/monospace.rb +0 -25
  124. data/lib/coradoc/element/inline/quotation.rb +0 -20
  125. data/lib/coradoc/element/inline/small.rb +0 -19
  126. data/lib/coradoc/element/inline/span.rb +0 -37
  127. data/lib/coradoc/element/inline/subscript.rb +0 -20
  128. data/lib/coradoc/element/inline/superscript.rb +0 -20
  129. data/lib/coradoc/element/inline/underline.rb +0 -19
  130. data/lib/coradoc/element/inline.rb +0 -23
  131. data/lib/coradoc/element/list/core.rb +0 -51
  132. data/lib/coradoc/element/list/definition.rb +0 -29
  133. data/lib/coradoc/element/list/ordered.rb +0 -17
  134. data/lib/coradoc/element/list/unordered.rb +0 -17
  135. data/lib/coradoc/element/list.rb +0 -13
  136. data/lib/coradoc/element/list_item.rb +0 -98
  137. data/lib/coradoc/element/list_item_definition.rb +0 -32
  138. data/lib/coradoc/element/paragraph.rb +0 -37
  139. data/lib/coradoc/element/revision.rb +0 -27
  140. data/lib/coradoc/element/section.rb +0 -62
  141. data/lib/coradoc/element/table.rb +0 -91
  142. data/lib/coradoc/element/tag.rb +0 -19
  143. data/lib/coradoc/element/term.rb +0 -22
  144. data/lib/coradoc/element/text_element.rb +0 -92
  145. data/lib/coradoc/element/title.rb +0 -62
  146. data/lib/coradoc/element/video.rb +0 -50
  147. data/lib/coradoc/generator.rb +0 -19
  148. data/lib/coradoc/input/adoc.rb +0 -30
  149. data/lib/coradoc/input/docx.rb +0 -64
  150. data/lib/coradoc/input/html/LICENSE.txt +0 -25
  151. data/lib/coradoc/input/html/README.adoc +0 -308
  152. data/lib/coradoc/input/html/cleaner.rb +0 -142
  153. data/lib/coradoc/input/html/config.rb +0 -77
  154. data/lib/coradoc/input/html/converters/a.rb +0 -52
  155. data/lib/coradoc/input/html/converters/aside.rb +0 -16
  156. data/lib/coradoc/input/html/converters/audio.rb +0 -29
  157. data/lib/coradoc/input/html/converters/base.rb +0 -108
  158. data/lib/coradoc/input/html/converters/blockquote.rb +0 -22
  159. data/lib/coradoc/input/html/converters/br.rb +0 -15
  160. data/lib/coradoc/input/html/converters/bypass.rb +0 -81
  161. data/lib/coradoc/input/html/converters/code.rb +0 -23
  162. data/lib/coradoc/input/html/converters/div.rb +0 -19
  163. data/lib/coradoc/input/html/converters/dl.rb +0 -62
  164. data/lib/coradoc/input/html/converters/drop.rb +0 -26
  165. data/lib/coradoc/input/html/converters/em.rb +0 -21
  166. data/lib/coradoc/input/html/converters/figure.rb +0 -25
  167. data/lib/coradoc/input/html/converters/h.rb +0 -42
  168. data/lib/coradoc/input/html/converters/head.rb +0 -23
  169. data/lib/coradoc/input/html/converters/hr.rb +0 -15
  170. data/lib/coradoc/input/html/converters/ignore.rb +0 -20
  171. data/lib/coradoc/input/html/converters/img.rb +0 -110
  172. data/lib/coradoc/input/html/converters/li.rb +0 -17
  173. data/lib/coradoc/input/html/converters/mark.rb +0 -19
  174. data/lib/coradoc/input/html/converters/markup.rb +0 -31
  175. data/lib/coradoc/input/html/converters/math.rb +0 -38
  176. data/lib/coradoc/input/html/converters/ol.rb +0 -65
  177. data/lib/coradoc/input/html/converters/p.rb +0 -23
  178. data/lib/coradoc/input/html/converters/pass_through.rb +0 -17
  179. data/lib/coradoc/input/html/converters/pre.rb +0 -55
  180. data/lib/coradoc/input/html/converters/q.rb +0 -16
  181. data/lib/coradoc/input/html/converters/strong.rb +0 -20
  182. data/lib/coradoc/input/html/converters/sub.rb +0 -22
  183. data/lib/coradoc/input/html/converters/sup.rb +0 -22
  184. data/lib/coradoc/input/html/converters/table.rb +0 -319
  185. data/lib/coradoc/input/html/converters/td.rb +0 -81
  186. data/lib/coradoc/input/html/converters/text.rb +0 -32
  187. data/lib/coradoc/input/html/converters/th.rb +0 -18
  188. data/lib/coradoc/input/html/converters/tr.rb +0 -22
  189. data/lib/coradoc/input/html/converters/video.rb +0 -29
  190. data/lib/coradoc/input/html/converters.rb +0 -59
  191. data/lib/coradoc/input/html/errors.rb +0 -14
  192. data/lib/coradoc/input/html/html_converter.rb +0 -168
  193. data/lib/coradoc/input/html/plugin.rb +0 -131
  194. data/lib/coradoc/input/html/plugins/plateau.rb +0 -213
  195. data/lib/coradoc/input/html/postprocessor.rb +0 -220
  196. data/lib/coradoc/input/html.rb +0 -61
  197. data/lib/coradoc/legacy_parser.rb +0 -200
  198. data/lib/coradoc/oscal.rb +0 -99
  199. data/lib/coradoc/output/adoc.rb +0 -19
  200. data/lib/coradoc/output/coradoc_tree_debug.rb +0 -21
  201. data/lib/coradoc/parser/asciidoc/admonition.rb +0 -24
  202. data/lib/coradoc/parser/asciidoc/attribute_list.rb +0 -89
  203. data/lib/coradoc/parser/asciidoc/base.rb +0 -87
  204. data/lib/coradoc/parser/asciidoc/bibliography.rb +0 -29
  205. data/lib/coradoc/parser/asciidoc/block.rb +0 -94
  206. data/lib/coradoc/parser/asciidoc/citation.rb +0 -30
  207. data/lib/coradoc/parser/asciidoc/content.rb +0 -64
  208. data/lib/coradoc/parser/asciidoc/document_attributes.rb +0 -25
  209. data/lib/coradoc/parser/asciidoc/header.rb +0 -29
  210. data/lib/coradoc/parser/asciidoc/inline.rb +0 -195
  211. data/lib/coradoc/parser/asciidoc/list.rb +0 -115
  212. data/lib/coradoc/parser/asciidoc/paragraph.rb +0 -54
  213. data/lib/coradoc/parser/asciidoc/section.rb +0 -61
  214. data/lib/coradoc/parser/asciidoc/table.rb +0 -32
  215. data/lib/coradoc/parser/asciidoc/term.rb +0 -41
  216. data/lib/coradoc/parser/asciidoc/text.rb +0 -158
  217. data/lib/coradoc/parser/base.rb +0 -40
  218. data/lib/coradoc/parser.rb +0 -11
  219. data/lib/coradoc/reverse_adoc.rb +0 -18
  220. data/lib/coradoc/transformer.rb +0 -476
  221. data/lib/coradoc/util.rb +0 -12
  222. data/lib/reverse_adoc.rb +0 -20
  223. data/utils/inspect_asciidoc.rb +0 -29
  224. data/utils/parser_analyzer.rb +0 -66
  225. data/utils/round_trip.rb +0 -53
@@ -0,0 +1,467 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lutaml/model'
4
+
5
+ # Coradoc - A hub-and-spoke document transformation library
6
+ #
7
+ # Coradoc provides a unified document model (CoreModel) and transformation
8
+ # infrastructure for converting between document formats such as AsciiDoc,
9
+ # HTML, and Markdown.
10
+ #
11
+ # ## Architecture
12
+ #
13
+ # Coradoc uses a hub-and-spoke architecture where CoreModel acts as the
14
+ # canonical document representation. Each format (AsciiDoc, HTML, Markdown)
15
+ # has its own model and transformers to/from CoreModel.
16
+ #
17
+ # ```
18
+ # Source Format → Source Model → CoreModel → Target Model → Target Format
19
+ # ```
20
+ #
21
+ # ## Quick Start
22
+ #
23
+ # @example Parsing documents
24
+ # require 'coradoc'
25
+ #
26
+ # # Parse AsciiDoc to CoreModel
27
+ # doc = Coradoc.parse("= Title\n\nContent", format: :asciidoc)
28
+ #
29
+ # @example Converting between formats
30
+ # # Convert AsciiDoc to HTML
31
+ # html = Coradoc.convert(adoc_text, from: :asciidoc, to: :html)
32
+ #
33
+ # # Convert Markdown to AsciiDoc
34
+ # adoc = Coradoc.convert(md_text, from: :markdown, to: :asciidoc)
35
+ #
36
+ # @example Using the hooks system
37
+ # Coradoc::Hooks.register(:before_parse) do |content, format:|
38
+ # puts "Parsing #{format} document..."
39
+ # content
40
+ # end
41
+ #
42
+ # @see Coradoc::CoreModel The canonical document model
43
+ # @see Coradoc::Hooks Plugin lifecycle hooks system
44
+ # @see Coradoc::FormatModule Interface contract for format modules
45
+ #
46
+ module Coradoc
47
+ # Base error class - defined in errors.rb
48
+ # @see Coradoc::Error Base error class
49
+ # @see Coradoc::ParseError Parsing errors with source context
50
+ # @see Coradoc::ValidationError Document validation errors
51
+ # @see Coradoc::TransformationError Model transformation errors
52
+ # @see Coradoc::UnsupportedFormatError Unsupported format errors
53
+
54
+ class << self
55
+ # Get the format registry
56
+ #
57
+ # @return [Registry] the format registry
58
+ def registry
59
+ @registry ||= Registry.new
60
+ end
61
+
62
+ # Register a format gem
63
+ #
64
+ # @param format_name [Symbol] the format name (e.g., :asciidoc, :html, :markdown)
65
+ # @param format_module [Module] the format module
66
+ # @param options [Hash] optional configuration (e.g., extensions: [])
67
+ # @return [void]
68
+ def register_format(format_name, format_module, **options)
69
+ format_module.extend(FormatModule::Interface) unless format_module.is_a?(FormatModule::Interface)
70
+ registry.register(format_name, format_module, options)
71
+ FormatModule.validate!(format_module, format_name)
72
+ end
73
+
74
+ # Get a registered format
75
+ #
76
+ # @param format_name [Symbol] the format name
77
+ # @return [Module, nil] the format module or nil if not found
78
+ def get_format(format_name)
79
+ registry.get(format_name)
80
+ end
81
+
82
+ # List all registered formats
83
+ #
84
+ # @return [Array<Symbol>] list of registered format names
85
+ def registered_formats
86
+ registry.list
87
+ end
88
+
89
+ # Parse text to a document model
90
+ #
91
+ # This is the main entry point for parsing documents. It automatically
92
+ # selects the appropriate parser based on the format.
93
+ #
94
+ # @param text [String] the document text to parse
95
+ # @param format [Symbol] the source format (:asciidoc, :html, :markdown)
96
+ # @return [Coradoc::CoreModel::Base, Object] the parsed document model
97
+ # @raise [UnsupportedFormatError] if the format is not registered
98
+ #
99
+ # @example Parse AsciiDoc
100
+ # doc = Coradoc.parse("= Title\n\nContent", format: :asciidoc)
101
+ # doc = Coradoc.parse(File.read("doc.adoc"), format: :asciidoc)
102
+ #
103
+ # @example Parse and get CoreModel
104
+ # core = Coradoc.parse(text, format: :asciidoc) # Returns CoreModel
105
+ def parse(text, format:)
106
+ format_module = get_format(format)
107
+ unless format_module
108
+ raise UnsupportedFormatError,
109
+ "Format '#{format}' is not registered. " \
110
+ "Available formats: #{registered_formats.join(', ')}"
111
+ end
112
+
113
+ text = Hooks.invoke(:before_parse, text, format: format)
114
+ result = format_module.parse_to_core(text)
115
+ Hooks.invoke(:after_parse, result, format: format)
116
+ end
117
+
118
+ # Convert document text from one format to another
119
+ #
120
+ # This is the main entry point for format conversion. It handles the
121
+ # complete pipeline: parse -> transform to CoreModel -> transform to target -> serialize
122
+ #
123
+ # @param text [String] the source document text
124
+ # @param from [Symbol] the source format (:asciidoc, :html, :markdown)
125
+ # @param to [Symbol] the target format (:asciidoc, :html, :markdown)
126
+ # @param options [Hash] additional options for the conversion
127
+ # @return [String] the converted document text
128
+ # @raise [UnsupportedFormatError] if a format is not registered
129
+ #
130
+ # @example Convert AsciiDoc to HTML
131
+ # html = Coradoc.convert(adoc_text, from: :asciidoc, to: :html)
132
+ #
133
+ # @example Convert HTML to AsciiDoc
134
+ # adoc = Coradoc.convert(html_text, from: :html, to: :asciidoc)
135
+ def convert(text, from:, to:, **)
136
+ # Parse to CoreModel
137
+ core = parse(text, format: from)
138
+
139
+ # Convert to target format
140
+ serialize(core, to: to, **)
141
+ end
142
+
143
+ # Transform a model to CoreModel
144
+ #
145
+ # @param model [Object] a format-specific model
146
+ # @return [Coradoc::CoreModel::Base] the CoreModel representation
147
+ def to_core(model)
148
+ return model if model.is_a?(CoreModel::Base)
149
+
150
+ registry.each_value do |format_module|
151
+ next unless format_module.handles_model?(model)
152
+
153
+ return format_module.to_core(model)
154
+ end
155
+
156
+ raise TransformationError, "No transformer found for #{model.class}"
157
+ end
158
+
159
+ # Serialize a CoreModel to a specific format
160
+ #
161
+ # @param model [Coradoc::CoreModel::Base] the CoreModel to serialize
162
+ # @param to [Symbol] the target format
163
+ # @param options [Hash] additional options
164
+ # @return [String] the serialized document
165
+ def serialize(model, to:, **)
166
+ format_module = get_format(to)
167
+ raise UnsupportedFormatError, "Format '#{to}' is not registered" unless format_module
168
+
169
+ model = Hooks.invoke(:before_serialize, model, format: to)
170
+ result = format_module.serialize(model, **)
171
+ Hooks.invoke(:after_serialize, result, format: to)
172
+ end
173
+
174
+ # Create a DocumentManipulator for chainable operations
175
+ #
176
+ # @param document [Coradoc::CoreModel::Base] the document to manipulate
177
+ # @return [DocumentManipulator] a new manipulator instance
178
+ #
179
+ # @example Chainable document manipulation
180
+ # html = Coradoc.manipulate(doc)
181
+ # .transform_text(&:upcase)
182
+ # .add_toc
183
+ # .to_html
184
+ def manipulate(document)
185
+ DocumentManipulator.new(document)
186
+ end
187
+
188
+ # Detect format from a file extension
189
+ #
190
+ # @param filename [String] Filename or extension to detect
191
+ # @return [Symbol, nil] the detected format symbol
192
+ #
193
+ # @example
194
+ # Coradoc.detect_format("document.adoc") # => :asciidoc
195
+ # Coradoc.detect_format("file.md") # => :markdown
196
+ def detect_format(filename)
197
+ ext = File.extname(filename).downcase
198
+ registry.each_key do |name|
199
+ opts = registry.options_for(name)
200
+ return name if opts[:extensions]&.include?(ext)
201
+ end
202
+ nil
203
+ end
204
+
205
+ # Parse a document from a file path
206
+ #
207
+ # Handles both text formats (reads file content) and binary formats
208
+ # (passes file path directly to the format module).
209
+ #
210
+ # @param path [String] path to the document file
211
+ # @param format [Symbol, nil] source format (auto-detected if nil)
212
+ # @return [Coradoc::CoreModel::Base] the parsed CoreModel document
213
+ # @raise [UnsupportedFormatError] if format is not detected or registered
214
+ #
215
+ # @example
216
+ # doc = Coradoc.parse_file("document.adoc")
217
+ # doc = Coradoc.parse_file("report.docx", format: :docx)
218
+ def parse_file(path, format: nil)
219
+ raise FileNotFoundError, path unless File.exist?(path)
220
+
221
+ source_format = format || detect_format(path)
222
+ raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
223
+
224
+ format_module = get_format(source_format)
225
+ raise UnsupportedFormatError, "Format '#{source_format}' is not registered" unless format_module
226
+
227
+ if binary_format?(source_format)
228
+ format_module.parse_to_core(path)
229
+ else
230
+ content = File.read(path)
231
+ content = Hooks.invoke(:before_parse, content, format: source_format)
232
+ result = format_module.parse_file_to_core(path, content)
233
+ Hooks.invoke(:after_parse, result, format: source_format)
234
+ end
235
+ end
236
+
237
+ # Convert a file from one format to another
238
+ #
239
+ # @param path [String] path to the source document file
240
+ # @param from [Symbol, nil] source format (auto-detected if nil)
241
+ # @param to [Symbol] target format
242
+ # @param options [Hash] additional options
243
+ # @return [String] the converted document text
244
+ #
245
+ # @example
246
+ # html = Coradoc.convert_file("document.adoc", to: :html)
247
+ # adoc = Coradoc.convert_file("report.docx", to: :asciidoc)
248
+ def convert_file(path, to:, from: nil, **)
249
+ source_format = from || detect_format(path)
250
+ raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
251
+
252
+ core = parse_file(path, format: source_format)
253
+ serialize(core, to: to, **)
254
+ end
255
+
256
+ # Check if a format requires binary (file path) input
257
+ #
258
+ # @param format [Symbol] the format to check
259
+ # @return [Boolean] true if the format is binary
260
+ def binary_format?(format)
261
+ opts = registry.options_for(format)
262
+ opts&.fetch(:binary, false) == true
263
+ end
264
+
265
+ # Normalize a format name string to a symbol
266
+ #
267
+ # Handles common aliases like "adoc" → :asciidoc, "md" → :markdown.
268
+ #
269
+ # @param name [String, Symbol, nil] the format name to normalize
270
+ # @return [Symbol, nil] the normalized format symbol, or nil
271
+ def normalize_format(name)
272
+ return nil unless name
273
+
274
+ key = name.to_s.downcase
275
+ registry.each_key do |fmt_name|
276
+ opts = registry.options_for(fmt_name)
277
+ return fmt_name if opts[:aliases]&.include?(key)
278
+ end
279
+ key.to_sym
280
+ end
281
+
282
+ # Check if a format supports serialization (writing output)
283
+ #
284
+ # @param format [Symbol] the format to check
285
+ # @return [Boolean] true if the format can serialize
286
+ def serialize_format?(format)
287
+ mod = get_format(format)
288
+ return false unless mod
289
+
290
+ mod.serialize?
291
+ end
292
+
293
+ # Check if a format supports parsing (reading input)
294
+ #
295
+ # @param format [Symbol] the format to check
296
+ # @return [Boolean] true if the format can parse
297
+ def parse_format?(format)
298
+ mod = get_format(format)
299
+ return false unless mod
300
+
301
+ mod.public_methods.include?(:parse_to_core) || mod.public_methods.include?(:parse)
302
+ end
303
+
304
+ # Get capability summary for all registered formats
305
+ #
306
+ # Returns a hash mapping each format name to its capabilities
307
+ # (parse: bool, serialize: bool). Useful for CLI display and introspection.
308
+ #
309
+ # @return [Hash<Symbol, Hash<Symbol, Boolean>>]
310
+ def format_capabilities
311
+ registered_formats.each_with_object({}) do |name, caps|
312
+ caps[name] = {
313
+ parse: parse_format?(name),
314
+ serialize: serialize_format?(name)
315
+ }
316
+ end
317
+ end
318
+
319
+ # Resolve the output format from a filename, with a default
320
+ #
321
+ # @param output_file [String, nil] output filename to detect from
322
+ # @param default [Symbol] default format when detection fails (default: :html)
323
+ # @return [Symbol] the resolved format
324
+ def resolve_output_format(output_file, default: :html)
325
+ return default unless output_file
326
+
327
+ detect_format(output_file) || default
328
+ end
329
+
330
+ # Get file metadata for display
331
+ #
332
+ # @param path [String] path to the file
333
+ # @return [Hash] metadata including :size, :format, and :lines (for text formats)
334
+ def file_info(path)
335
+ fmt = detect_format(path)
336
+ info = { size: File.size(path), format: fmt }
337
+ info[:lines] = File.foreach(path).count unless binary_format?(fmt)
338
+ info
339
+ end
340
+
341
+ # Validate a document file
342
+ #
343
+ # Parses the file and validates against auto-generated schema.
344
+ # Returns a Coradoc::Validation::Result.
345
+ #
346
+ # @param path [String] path to the document file
347
+ # @param format [Symbol, nil] source format (auto-detected if nil)
348
+ # @return [Coradoc::Validation::Result] validation result
349
+ # @raise [UnsupportedFormatError] if format is not detected or registered
350
+ def validate_file(path, format: nil)
351
+ doc = parse_file(path, format: format)
352
+
353
+ schema = Validation::SchemaGenerator.generate(doc.class)
354
+ return schema.validate(doc) if schema
355
+
356
+ Validation::Result.new
357
+ end
358
+
359
+ # Gather statistics about a parsed document
360
+ #
361
+ # @param doc [CoreModel::Base] parsed document
362
+ # @return [Hash] statistics including element counts, title, etc.
363
+ def document_stats(doc)
364
+ stats = {}
365
+
366
+ stats[:title] = doc.title if doc.title
367
+
368
+ if doc.is_a?(CoreModel::StructuralElement)
369
+ stats[:child_count] = count_elements(doc)
370
+ stats[:element_counts] = count_element_types(doc)
371
+ end
372
+
373
+ stats
374
+ end
375
+
376
+ # Describe an element for display
377
+ #
378
+ # @param elem [Object] element to describe
379
+ # @return [String] human-readable description
380
+ def describe_element(elem)
381
+ return elem.to_s unless elem.is_a?(CoreModel::Base)
382
+
383
+ type = elem.class.name.split('::').last
384
+ if elem.title
385
+ "#{type}: #{elem.title}"
386
+ elsif elem.is_a?(CoreModel::Block) && elem.content
387
+ preview = elem.content.to_s[0..50]
388
+ preview += '...' if elem.content.to_s.length > 50
389
+ "#{type}: #{preview}"
390
+ else
391
+ type
392
+ end
393
+ end
394
+
395
+ # Strip unicode whitespace from a string
396
+ #
397
+ # @param string [String] the string to strip
398
+ # @param only [Symbol, nil] what to strip: :begin, :end, or nil for both
399
+ # @return [String] the stripped string
400
+ def strip_unicode(string, only: nil)
401
+ return string if string.nil?
402
+
403
+ case only
404
+ when :begin
405
+ string.sub(/^\p{Zs}+/, '')
406
+ when :end
407
+ string.sub(/\p{Zs}+$/, '')
408
+ else
409
+ string.sub(/^\p{Zs}+/, '').sub(/\p{Zs}+$/, '')
410
+ end
411
+ end
412
+
413
+ private
414
+
415
+ def count_elements(doc)
416
+ return 0 unless doc.is_a?(CoreModel::StructuralElement)
417
+
418
+ doc.children.sum do |child|
419
+ 1 + (child.is_a?(CoreModel::StructuralElement) ? count_elements(child) : 0)
420
+ end
421
+ end
422
+
423
+ def count_element_types(doc)
424
+ counts = Hash.new(0)
425
+ visitor = Class.new(Visitor::Base) do
426
+ define_method(:visit) do |element|
427
+ if element.is_a?(CoreModel::Base)
428
+ has_element_type = element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)
429
+ type_key = if has_element_type && element.element_type
430
+ element.element_type
431
+ else
432
+ element.class.name.split('::').last
433
+ .gsub(/([A-Z])/, '_\1').downcase.sub(/^_/, '')
434
+ end
435
+ counts[type_key] += 1
436
+ end
437
+ super(element)
438
+ end
439
+ end.new
440
+ visitor.visit(doc)
441
+ counts.reject! { |_, v| v.zero? }
442
+ counts
443
+ end
444
+ end
445
+
446
+ autoload :Error, "#{__dir__}/errors"
447
+ autoload :Version, "#{__dir__}/version"
448
+ autoload :Logger, "#{__dir__}/logger"
449
+ autoload :Hooks, "#{__dir__}/hooks"
450
+ autoload :Query, "#{__dir__}/query"
451
+ autoload :Validation, "#{__dir__}/validation"
452
+ autoload :Configurable, "#{__dir__}/configurable"
453
+ autoload :FormatModule, "#{__dir__}/format_module"
454
+ autoload :CoreModel, "#{__dir__}/core_model"
455
+ autoload :Registry, "#{__dir__}/registry"
456
+ autoload :Transform, "#{__dir__}/transform"
457
+ autoload :Input, "#{__dir__}/input"
458
+ autoload :Output, "#{__dir__}/output"
459
+ autoload :DocumentManipulator, "#{__dir__}/document_manipulator"
460
+ autoload :Visitor, "#{__dir__}/visitor"
461
+ autoload :PerformanceRegression, "#{__dir__}/performance_regression"
462
+ end
463
+
464
+ # Format gems self-register via Coradoc.register_format when they are required.
465
+ # No hardcoded registration needed here — each gem's entry file handles its own
466
+ # registration (e.g., coradoc-adoc/lib/coradoc/asciidoc.rb calls
467
+ # Coradoc.register_format(:asciidoc, Coradoc::AsciiDoc)).
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # Specialized block for annotations and admonitions
6
+ #
7
+ # Represents annotation blocks that have special semantic meaning:
8
+ # - NOTE
9
+ # - WARNING
10
+ # - CAUTION
11
+ # - IMPORTANT
12
+ # - TIP
13
+ # - Reviewer notes
14
+ # - Sidebar blocks (when used for annotations)
15
+ #
16
+ # This class extends Block to add annotation-specific attributes that
17
+ # distinguish these blocks semantically from generic delimited blocks.
18
+ #
19
+ # @example Creating a NOTE annotation
20
+ # note = CoreModel::AnnotationBlock.new(
21
+ # annotation_type: "note",
22
+ # content: "This is important information."
23
+ # )
24
+ #
25
+ # @example Creating a reviewer note
26
+ # reviewer = CoreModel::AnnotationBlock.new(
27
+ # annotation_type: "reviewer",
28
+ # annotation_label: "john.doe",
29
+ # content: "Please review this section."
30
+ # )
31
+ class AnnotationBlock < Block
32
+ # @!attribute annotation_type
33
+ # @return [String, nil] the type of annotation
34
+ # (e.g., 'note', 'warning', 'reviewer', 'sidebar')
35
+ attribute :annotation_type, :string
36
+
37
+ # @!attribute annotation_label
38
+ # @return [String, nil] optional custom label or identifier
39
+ # (e.g., reviewer ID, custom note label)
40
+ attribute :annotation_label, :string
41
+
42
+ private
43
+
44
+ # Attributes to compare for semantic equivalence
45
+ #
46
+ # Annotation blocks are semantically different from generic blocks
47
+ # because they carry additional meaning through annotation_type and
48
+ # annotation_label. Two blocks with different annotation types are
49
+ # not semantically equivalent even if their content is identical.
50
+ #
51
+ # @return [Array<Symbol>] list of comparable attributes
52
+ def comparable_attributes
53
+ super + %i[annotation_type annotation_label]
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lutaml/model'
4
+
5
+ module Coradoc
6
+ module CoreModel
7
+ # Base class for all core models
8
+ #
9
+ # Provides common functionality for schema-agnostic document models.
10
+ # This class establishes the foundational structure for all CoreModel
11
+ # classes, including semantic equivalence comparison and common
12
+ # attributes.
13
+ #
14
+ # @example Creating a base model
15
+ # model = CoreModel::Base.new(
16
+ # id: "example-1",
17
+ # title: "Example Title",
18
+ # element_attributes: [
19
+ # CoreModel::ElementAttribute.new(name: "role", value: "note")
20
+ # ]
21
+ # )
22
+ #
23
+ # @example Semantic comparison
24
+ # model1 = CoreModel::Base.new(id: "test", title: "Test")
25
+ # model2 = CoreModel::Base.new(id: "test", title: "Test")
26
+ # model1.semantically_equivalent?(model2) # => true
27
+ class Base < Lutaml::Model::Serializable
28
+ # @!attribute id
29
+ # @return [String, nil] unique identifier for the element
30
+ attribute :id, :string
31
+
32
+ # @!attribute title
33
+ # @return [String, nil] title of the element
34
+ attribute :title, :string
35
+
36
+ # @!attribute element_attributes
37
+ # @return [Array<ElementAttribute>] collection of element attributes
38
+ attribute :element_attributes, ElementAttribute, collection: true
39
+
40
+ # @!attribute metadata_entries
41
+ # @return [Array<MetadataEntry>] additional metadata entries
42
+ attribute :metadata_entries, MetadataEntry, collection: true
43
+
44
+ # Get all metadata as a hash, or a specific metadata value by key
45
+ # @overload metadata
46
+ # @return [Hash] All metadata as key-value pairs
47
+ # @overload metadata(key)
48
+ # @param key [String] The metadata key
49
+ # @return [String, nil] The value or nil
50
+ def metadata(key = nil)
51
+ entries = metadata_entries || []
52
+ if key.nil?
53
+ # Return all metadata as hash
54
+ entries.each_with_object({}) { |e, h| h[e.key] = e.value }
55
+ else
56
+ # Return specific value
57
+ entries.find { |e| e.key == key }&.value
58
+ end
59
+ end
60
+
61
+ # Convenience method to set metadata
62
+ # @param key [String] The metadata key
63
+ # @param value [String] The value to set
64
+ def set_metadata(key, value)
65
+ self.metadata_entries ||= []
66
+ existing = metadata_entries.find { |e| e.key == key }
67
+ if existing
68
+ existing.value = value
69
+ else
70
+ metadata_entries << MetadataEntry.new(key: key, value: value)
71
+ end
72
+ end
73
+
74
+ # Get all element attributes as a hash, or a specific attribute value by name
75
+ # @overload attr
76
+ # @return [Hash] All attributes as key-value pairs
77
+ # @overload attr(name)
78
+ # @param name [String] The attribute name
79
+ # @return [String, nil] The value or nil
80
+ def attr(name = nil)
81
+ attrs = element_attributes || []
82
+ if name.nil?
83
+ # Return all attributes as hash
84
+ attrs.each_with_object({}) { |a, h| h[a.name] = a.value }
85
+ else
86
+ # Return specific value
87
+ attrs.find { |a| a.name == name }&.value
88
+ end
89
+ end
90
+
91
+ # Set attribute value
92
+ # @param name [String] The attribute name
93
+ # @param value [String] The value to set
94
+ def set_attr(name, value)
95
+ self.element_attributes ||= []
96
+ existing = element_attributes.find { |a| a.name == name }
97
+ if existing
98
+ existing.value = value
99
+ else
100
+ element_attributes << ElementAttribute.new(name: name, value: value)
101
+ end
102
+ end
103
+
104
+ # Compare this model with another for semantic equivalence
105
+ #
106
+ # Semantic equivalence means the models represent the same semantic
107
+ # content, even if their exact structure differs. This is different
108
+ # from equality, which requires exact matching.
109
+ #
110
+ # @param other [Object] the object to compare with
111
+ # @return [Boolean] true if semantically equivalent, false otherwise
112
+ def semantically_equivalent?(other)
113
+ return false unless other.is_a?(self.class)
114
+
115
+ comparable_attributes.all? do |attr|
116
+ compare_attribute(attr, other)
117
+ end
118
+ end
119
+
120
+ # Accept a visitor to traverse this element
121
+ #
122
+ # Implements the visitor pattern for document traversal.
123
+ # The visitor's visit method will be called with this element.
124
+ #
125
+ # @param visitor [Coradoc::Visitor::Base] Visitor to accept
126
+ # @return [void]
127
+ def accept(visitor)
128
+ visitor.visit(self)
129
+ end
130
+
131
+ private
132
+
133
+ # List of attributes to compare for semantic equivalence
134
+ #
135
+ # Override in subclasses to define which attributes matter for
136
+ # equivalence. By default, only id and title are compared.
137
+ #
138
+ # @return [Array<Symbol>] list of attribute names to compare
139
+ def comparable_attributes
140
+ %i[id title]
141
+ end
142
+
143
+ # Compare a single attribute between this model and another
144
+ def compare_attribute(attr, other)
145
+ self_value = public_send(attr)
146
+ other_value = other.public_send(attr)
147
+
148
+ case self_value
149
+ when Array
150
+ compare_arrays(self_value, other_value)
151
+ when Base
152
+ self_value.semantically_equivalent?(other_value)
153
+ else
154
+ self_value == other_value
155
+ end
156
+ end
157
+
158
+ # Compare two arrays for semantic equivalence
159
+ def compare_arrays(arr1, arr2)
160
+ return false unless arr1.size == arr2.size
161
+
162
+ arr1.zip(arr2).all? do |item1, item2|
163
+ if item1.is_a?(Base)
164
+ item1.semantically_equivalent?(item2)
165
+ else
166
+ item1 == item2
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end