coradoc 2.0.23 → 2.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/coradoc.rb +35 -429
- data/lib/coradoc/core_model/inline_content.rb +77 -0
- data/lib/coradoc/core_model/inline_element.rb +33 -27
- data/lib/coradoc/core_model.rb +1 -0
- data/lib/coradoc/dispatch.rb +95 -0
- data/lib/coradoc/format_catalog.rb +83 -0
- data/lib/coradoc/introspection/element_counter.rb +48 -0
- data/lib/coradoc/introspection.rb +72 -0
- data/lib/coradoc/pipeline.rb +108 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +5 -13
- metadata +7 -9
- data/lib/coradoc/document_builder.rb +0 -184
- data/lib/coradoc/document_manipulator.rb +0 -203
- data/lib/coradoc/input.rb +0 -22
- data/lib/coradoc/output.rb +0 -22
- data/lib/coradoc/processor_registry.rb +0 -50
- data/lib/coradoc/serializer/registry.rb +0 -150
- data/lib/coradoc/transform/base.rb +0 -21
- data/lib/coradoc/transform.rb +0 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bcce76c634ae51a857397a301fc2638950cb25e1a97048589ee5510f8a7aee40
|
|
4
|
+
data.tar.gz: 6cd3b391ae83e360ed282b40f778dad1c4290202b386b457b881e264ddca464e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f424dfb3267d2497dc46b9e764fab868083add0ef5cd82163aa89a35b884e75b81c7426a47d9c0b81abd15659dcc15b49f6f309b786848cba7c0aed42557cf3a
|
|
7
|
+
data.tar.gz: 07fe25f772cf02b7673f25f4bb41e9abb65e25631898af071407eb68961bba9c2a74c19c7d20d85544c722a4ad07404c4846d389278fb989d2a186caab36bdbb
|
data/lib/coradoc/coradoc.rb
CHANGED
|
@@ -52,429 +52,67 @@ module Coradoc
|
|
|
52
52
|
# @see Coradoc::UnsupportedFormatError Unsupported format errors
|
|
53
53
|
|
|
54
54
|
class << self
|
|
55
|
-
#
|
|
56
|
-
#
|
|
57
|
-
# @return [Registry] the format registry
|
|
58
|
-
def registry
|
|
59
|
-
@registry ||= Registry.new
|
|
60
|
-
end
|
|
55
|
+
# ---- Format registry (delegates to FormatCatalog) ----
|
|
61
56
|
|
|
62
|
-
|
|
63
|
-
#
|
|
64
|
-
# @param format_name [Symbol] the format name (e.g., :asciidoc, :html, :markdown)
|
|
65
|
-
# @param format_module [Module] the format module
|
|
66
|
-
# @param options [Hash] optional configuration (e.g., extensions: [])
|
|
67
|
-
# @return [void]
|
|
68
|
-
def register_format(format_name, format_module, **options)
|
|
69
|
-
format_module.extend(FormatModule::Interface) unless format_module.is_a?(FormatModule::Interface)
|
|
70
|
-
registry.register(format_name, format_module, options)
|
|
71
|
-
FormatModule.validate!(format_module, format_name)
|
|
72
|
-
end
|
|
57
|
+
def registry = FormatCatalog.registry
|
|
73
58
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# @param format_name [Symbol] the format name
|
|
77
|
-
# @return [Module, nil] the format module or nil if not found
|
|
78
|
-
def get_format(format_name)
|
|
79
|
-
registry.get(format_name)
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
# List all registered formats
|
|
83
|
-
#
|
|
84
|
-
# @return [Array<Symbol>] list of registered format names
|
|
85
|
-
def registered_formats
|
|
86
|
-
registry.list
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# Parse text to a document model.
|
|
90
|
-
#
|
|
91
|
-
# Graph mode is the only mode: +include::+ directives survive as
|
|
92
|
-
# +CoreModel::Include+ link nodes pointing at other files. NO file
|
|
93
|
-
# I/O happens during parse. The result is a single document that
|
|
94
|
-
# references other documents via Include edges — a text graph.
|
|
95
|
-
#
|
|
96
|
-
# To splice included content inline, call +Coradoc.resolve_includes+
|
|
97
|
-
# on the parsed document. This is an explicit, separate step so the
|
|
98
|
-
# caller controls when (and whether) file I/O happens.
|
|
99
|
-
#
|
|
100
|
-
# @param text [String] the document text to parse
|
|
101
|
-
# @param format [Symbol] the source format (:asciidoc, :html, :markdown)
|
|
102
|
-
# @return [Coradoc::CoreModel::Base, Object] the parsed document model
|
|
103
|
-
# @raise [UnsupportedFormatError] if the format is not registered
|
|
104
|
-
#
|
|
105
|
-
# @example Parse — Include directives stay as link nodes
|
|
106
|
-
# doc = Coradoc.parse(text, format: :asciidoc)
|
|
107
|
-
#
|
|
108
|
-
# @example Then flatten — splice included files inline
|
|
109
|
-
# flat = Coradoc.resolve_includes(doc, base_dir: Dir.pwd)
|
|
110
|
-
def parse(text, format:)
|
|
111
|
-
format_module = get_format(format)
|
|
112
|
-
unless format_module
|
|
113
|
-
raise UnsupportedFormatError,
|
|
114
|
-
"Format '#{format}' is not registered. " \
|
|
115
|
-
"Available formats: #{registered_formats.join(', ')}"
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
text = Hooks.invoke(:before_parse, text, format: format)
|
|
119
|
-
result = format_module.parse_to_core(text)
|
|
120
|
-
Hooks.invoke(:after_parse, result, format: format)
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
# Resolve +include::+ directives in a parsed document.
|
|
124
|
-
#
|
|
125
|
-
# Walks the document tree and replaces every +CoreModel::Include+
|
|
126
|
-
# link node with the parsed content of its target file, recursing
|
|
127
|
-
# into the result. The original document is left unchanged; a new
|
|
128
|
-
# subtree is constructed.
|
|
129
|
-
#
|
|
130
|
-
# This is the explicit "flatten" step that turns a text graph into
|
|
131
|
-
# a single spliced document. Callers control:
|
|
132
|
-
# - +base_dir+ — where to root relative include paths
|
|
133
|
-
# - +missing_include+ — what to do when a target is missing
|
|
134
|
-
# - +max_depth+ — recursion cap
|
|
135
|
-
# - +allow_unsafe+ — opt out of path-traversal protection
|
|
136
|
-
# - +resolver+ — custom resolution strategy (e.g. HTTP, in-memory)
|
|
137
|
-
#
|
|
138
|
-
# @param document [Coradoc::CoreModel::Base] parsed document
|
|
139
|
-
# @param base_dir [String] base directory for relative include paths
|
|
140
|
-
# @param missing_include [Symbol] :error (default), :warn, :silent, :passthrough
|
|
141
|
-
# @param max_depth [Integer] recursion cap (default 64)
|
|
142
|
-
# @param allow_unsafe [Boolean] disable path-traversal protection
|
|
143
|
-
# @param resolver [Object, nil] custom resolver. Defaults to
|
|
144
|
-
# +Coradoc::IncludeResolver::Filesystem+ rooted at +base_dir+.
|
|
145
|
-
# @return [Coradoc::CoreModel::Base] new document with includes expanded
|
|
146
|
-
# @raise [Coradoc::IncludeNotFoundError] when a target is missing
|
|
147
|
-
# and policy is :error
|
|
148
|
-
# @raise [Coradoc::IncludeDepthExceededError] when +max_depth+ is hit
|
|
149
|
-
# @raise [Coradoc::CircularIncludeError] when an include cycle is detected
|
|
150
|
-
#
|
|
151
|
-
# @example
|
|
152
|
-
# doc = Coradoc.parse(text, format: :asciidoc)
|
|
153
|
-
# flat = Coradoc.resolve_includes(doc, base_dir: Dir.pwd)
|
|
154
|
-
def resolve_includes(document, base_dir:,
|
|
155
|
-
missing_include: :error,
|
|
156
|
-
max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH,
|
|
157
|
-
allow_unsafe: false,
|
|
158
|
-
resolver: nil)
|
|
159
|
-
resolver = Coradoc::IncludeResolver.coerce(
|
|
160
|
-
resolver,
|
|
161
|
-
base_dir: base_dir,
|
|
162
|
-
allow_unsafe: allow_unsafe
|
|
163
|
-
)
|
|
164
|
-
Coradoc::ResolveIncludes.call(
|
|
165
|
-
document,
|
|
166
|
-
resolver: resolver,
|
|
167
|
-
base_dir: base_dir,
|
|
168
|
-
missing_include: missing_include,
|
|
169
|
-
max_depth: max_depth
|
|
170
|
-
)
|
|
171
|
-
end
|
|
172
|
-
|
|
173
|
-
# Rewrite every link/xref target in a parsed document.
|
|
174
|
-
#
|
|
175
|
-
# Walks the document tree and invokes the supplied rewriter for each
|
|
176
|
-
# link and cross-reference target. The original document is never
|
|
177
|
-
# mutated — a NEW document is returned.
|
|
178
|
-
#
|
|
179
|
-
# Verbatim blocks (+SourceBlock+, +ListingBlock+, +LiteralBlock+,
|
|
180
|
-
# +PassBlock+, +StemBlock+) are skipped entirely so link-shaped text
|
|
181
|
-
# inside code/math bodies is never rewritten.
|
|
182
|
-
#
|
|
183
|
-
# The rewriter responds to +#call(target:, kind:, context:)+ and
|
|
184
|
-
# returns the new target String. +kind+ is +:link+ or +:xref+; the
|
|
185
|
-
# block form is supported for one-liners.
|
|
186
|
-
#
|
|
187
|
-
# @param document [Coradoc::CoreModel::Base] parsed document
|
|
188
|
-
# @param rewriter [#call, nil] callable rewriter; ignored when a block is given
|
|
189
|
-
# @return [Coradoc::CoreModel::Base] new document with rewritten targets
|
|
190
|
-
#
|
|
191
|
-
# @example Canonicalize snake_case targets to kebab-case
|
|
192
|
-
# doc = Coradoc.parse(adoc, format: :asciidoc)
|
|
193
|
-
# rewritten = Coradoc.rewrite_links(doc) do |target:, kind:, **|
|
|
194
|
-
# target.tr('_', '-')
|
|
195
|
-
# end
|
|
196
|
-
def rewrite_links(document, rewriter: nil, &block)
|
|
197
|
-
Coradoc::LinkRewriter.rewrite(document, rewriter: rewriter, &block)
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
# Convert document text from one format to another
|
|
201
|
-
#
|
|
202
|
-
# This is the main entry point for format conversion. It handles the
|
|
203
|
-
# complete pipeline: parse -> transform to CoreModel -> transform to target -> serialize
|
|
204
|
-
#
|
|
205
|
-
# @param text [String] the source document text
|
|
206
|
-
# @param from [Symbol] the source format (:asciidoc, :html, :markdown)
|
|
207
|
-
# @param to [Symbol] the target format (:asciidoc, :html, :markdown)
|
|
208
|
-
# @param options [Hash] additional options for the conversion
|
|
209
|
-
# @return [String] the converted document text
|
|
210
|
-
# @raise [UnsupportedFormatError] if a format is not registered
|
|
211
|
-
#
|
|
212
|
-
# @example Convert AsciiDoc to HTML
|
|
213
|
-
# html = Coradoc.convert(adoc_text, from: :asciidoc, to: :html)
|
|
214
|
-
#
|
|
215
|
-
# @example Convert HTML to AsciiDoc
|
|
216
|
-
# adoc = Coradoc.convert(html_text, from: :html, to: :asciidoc)
|
|
217
|
-
def convert(text, from:, to:, **)
|
|
218
|
-
# Parse to CoreModel
|
|
219
|
-
core = parse(text, format: from)
|
|
220
|
-
|
|
221
|
-
# Convert to target format
|
|
222
|
-
serialize(core, to: to, **)
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
# Transform a model to CoreModel
|
|
226
|
-
#
|
|
227
|
-
# @param model [Object] a format-specific model
|
|
228
|
-
# @return [Coradoc::CoreModel::Base] the CoreModel representation
|
|
229
|
-
def to_core(model)
|
|
230
|
-
return model if model.is_a?(CoreModel::Base)
|
|
231
|
-
|
|
232
|
-
registry.each_value do |format_module|
|
|
233
|
-
next unless format_module.handles_model?(model)
|
|
234
|
-
|
|
235
|
-
return format_module.to_core(model)
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
raise TransformationError, "No transformer found for #{model.class}"
|
|
59
|
+
def register_format(format_name, format_module, **options)
|
|
60
|
+
FormatCatalog.register_format(format_name, format_module, **options)
|
|
239
61
|
end
|
|
240
62
|
|
|
241
|
-
|
|
242
|
-
#
|
|
243
|
-
# @param model [Coradoc::CoreModel::Base] the CoreModel to serialize
|
|
244
|
-
# @param to [Symbol] the target format
|
|
245
|
-
# @param options [Hash] additional options
|
|
246
|
-
# @return [String] the serialized document
|
|
247
|
-
def serialize(model, to:, **)
|
|
248
|
-
format_module = get_format(to)
|
|
249
|
-
raise UnsupportedFormatError, "Format '#{to}' is not registered" unless format_module
|
|
250
|
-
|
|
251
|
-
model = Hooks.invoke(:before_serialize, model, format: to)
|
|
252
|
-
result = format_module.serialize(model, **)
|
|
253
|
-
Hooks.invoke(:after_serialize, result, format: to)
|
|
254
|
-
end
|
|
63
|
+
def get_format(format_name) = FormatCatalog.get_format(format_name)
|
|
255
64
|
|
|
256
|
-
|
|
257
|
-
#
|
|
258
|
-
# @param document [Coradoc::CoreModel::Base] the document to manipulate
|
|
259
|
-
# @return [DocumentManipulator] a new manipulator instance
|
|
260
|
-
#
|
|
261
|
-
# @example Chainable document manipulation
|
|
262
|
-
# html = Coradoc.manipulate(doc)
|
|
263
|
-
# .transform_text(&:upcase)
|
|
264
|
-
# .add_toc
|
|
265
|
-
# .to_html
|
|
266
|
-
def manipulate(document)
|
|
267
|
-
DocumentManipulator.new(document)
|
|
268
|
-
end
|
|
65
|
+
def registered_formats = FormatCatalog.registered_formats
|
|
269
66
|
|
|
270
|
-
#
|
|
271
|
-
#
|
|
272
|
-
# @param filename [String] Filename or extension to detect
|
|
273
|
-
# @return [Symbol, nil] the detected format symbol
|
|
274
|
-
#
|
|
275
|
-
# @example
|
|
276
|
-
# Coradoc.detect_format("document.adoc") # => :asciidoc
|
|
277
|
-
# Coradoc.detect_format("file.md") # => :markdown
|
|
278
|
-
def detect_format(filename)
|
|
279
|
-
ext = File.extname(filename).downcase
|
|
280
|
-
registry.each_key do |name|
|
|
281
|
-
opts = registry.options_for(name)
|
|
282
|
-
return name if opts[:extensions]&.include?(ext)
|
|
283
|
-
end
|
|
284
|
-
nil
|
|
285
|
-
end
|
|
67
|
+
# ---- Pipeline (delegates to Pipeline) ----
|
|
286
68
|
|
|
287
|
-
|
|
288
|
-
#
|
|
289
|
-
# Handles both text formats (reads file content) and binary formats
|
|
290
|
-
# (passes file path directly to the format module).
|
|
291
|
-
#
|
|
292
|
-
# @param path [String] path to the document file
|
|
293
|
-
# @param format [Symbol, nil] source format (auto-detected if nil)
|
|
294
|
-
# @return [Coradoc::CoreModel::Base] the parsed CoreModel document
|
|
295
|
-
# @raise [UnsupportedFormatError] if format is not detected or registered
|
|
296
|
-
#
|
|
297
|
-
# @example
|
|
298
|
-
# doc = Coradoc.parse_file("document.adoc")
|
|
299
|
-
# doc = Coradoc.parse_file("report.docx", format: :docx)
|
|
300
|
-
def parse_file(path, format: nil)
|
|
301
|
-
raise FileNotFoundError, path unless File.exist?(path)
|
|
69
|
+
def parse(text, format:) = Pipeline.parse(text, format: format)
|
|
302
70
|
|
|
303
|
-
|
|
304
|
-
raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
|
|
71
|
+
def resolve_includes(document, **) = Pipeline.resolve_includes(document, **)
|
|
305
72
|
|
|
306
|
-
|
|
307
|
-
raise UnsupportedFormatError, "Format '#{source_format}' is not registered" unless format_module
|
|
73
|
+
def rewrite_links(...) = Pipeline.rewrite_links(...)
|
|
308
74
|
|
|
309
|
-
|
|
310
|
-
format_module.parse_to_core(path)
|
|
311
|
-
else
|
|
312
|
-
content = File.read(path)
|
|
313
|
-
content = Hooks.invoke(:before_parse, content, format: source_format)
|
|
314
|
-
result = format_module.parse_file_to_core(path, content)
|
|
315
|
-
Hooks.invoke(:after_parse, result, format: source_format)
|
|
316
|
-
end
|
|
317
|
-
end
|
|
75
|
+
def convert(text, **) = Pipeline.convert(text, **)
|
|
318
76
|
|
|
319
|
-
|
|
320
|
-
#
|
|
321
|
-
# @param path [String] path to the source document file
|
|
322
|
-
# @param from [Symbol, nil] source format (auto-detected if nil)
|
|
323
|
-
# @param to [Symbol] target format
|
|
324
|
-
# @param options [Hash] additional options
|
|
325
|
-
# @return [String] the converted document text
|
|
326
|
-
#
|
|
327
|
-
# @example
|
|
328
|
-
# html = Coradoc.convert_file("document.adoc", to: :html)
|
|
329
|
-
# adoc = Coradoc.convert_file("report.docx", to: :asciidoc)
|
|
330
|
-
def convert_file(path, to:, from: nil, **)
|
|
331
|
-
source_format = from || detect_format(path)
|
|
332
|
-
raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
|
|
333
|
-
|
|
334
|
-
core = parse_file(path, format: source_format)
|
|
335
|
-
serialize(core, to: to, **)
|
|
336
|
-
end
|
|
77
|
+
def to_core(model) = Pipeline.to_core(model)
|
|
337
78
|
|
|
338
|
-
|
|
339
|
-
#
|
|
340
|
-
# @param format [Symbol] the format to check
|
|
341
|
-
# @return [Boolean] true if the format is binary
|
|
342
|
-
def binary_format?(format)
|
|
343
|
-
opts = registry.options_for(format)
|
|
344
|
-
opts&.fetch(:binary, false) == true
|
|
345
|
-
end
|
|
79
|
+
def serialize(model, **) = Pipeline.serialize(model, **)
|
|
346
80
|
|
|
347
|
-
|
|
348
|
-
#
|
|
349
|
-
# Handles common aliases like "adoc" → :asciidoc, "md" → :markdown.
|
|
350
|
-
#
|
|
351
|
-
# @param name [String, Symbol, nil] the format name to normalize
|
|
352
|
-
# @return [Symbol, nil] the normalized format symbol, or nil
|
|
353
|
-
def normalize_format(name)
|
|
354
|
-
return nil unless name
|
|
355
|
-
|
|
356
|
-
key = name.to_s.downcase
|
|
357
|
-
registry.each_key do |fmt_name|
|
|
358
|
-
opts = registry.options_for(fmt_name)
|
|
359
|
-
return fmt_name if opts[:aliases]&.include?(key)
|
|
360
|
-
end
|
|
361
|
-
key.to_sym
|
|
362
|
-
end
|
|
81
|
+
def build(...) = Pipeline.build(...)
|
|
363
82
|
|
|
364
|
-
|
|
365
|
-
#
|
|
366
|
-
# @param format [Symbol] the format to check
|
|
367
|
-
# @return [Boolean] true if the format can serialize
|
|
368
|
-
def serialize_format?(format)
|
|
369
|
-
mod = get_format(format)
|
|
370
|
-
return false unless mod
|
|
83
|
+
def parse_file(path, **) = Pipeline.parse_file(path, **)
|
|
371
84
|
|
|
372
|
-
|
|
373
|
-
end
|
|
85
|
+
def convert_file(path, **) = Pipeline.convert_file(path, **)
|
|
374
86
|
|
|
375
|
-
#
|
|
376
|
-
#
|
|
377
|
-
# @param format [Symbol] the format to check
|
|
378
|
-
# @return [Boolean] true if the format can parse
|
|
379
|
-
def parse_format?(format)
|
|
380
|
-
mod = get_format(format)
|
|
381
|
-
return false unless mod
|
|
87
|
+
# ---- Format detection (delegates to FormatCatalog) ----
|
|
382
88
|
|
|
383
|
-
|
|
384
|
-
end
|
|
89
|
+
def detect_format(filename) = FormatCatalog.detect_format(filename)
|
|
385
90
|
|
|
386
|
-
|
|
387
|
-
#
|
|
388
|
-
# Returns a hash mapping each format name to its capabilities
|
|
389
|
-
# (parse: bool, serialize: bool). Useful for CLI display and introspection.
|
|
390
|
-
#
|
|
391
|
-
# @return [Hash<Symbol, Hash<Symbol, Boolean>>]
|
|
392
|
-
def format_capabilities
|
|
393
|
-
registered_formats.each_with_object({}) do |name, caps|
|
|
394
|
-
caps[name] = {
|
|
395
|
-
parse: parse_format?(name),
|
|
396
|
-
serialize: serialize_format?(name)
|
|
397
|
-
}
|
|
398
|
-
end
|
|
399
|
-
end
|
|
91
|
+
def binary_format?(format) = FormatCatalog.binary_format?(format)
|
|
400
92
|
|
|
401
|
-
|
|
402
|
-
#
|
|
403
|
-
# @param output_file [String, nil] output filename to detect from
|
|
404
|
-
# @param default [Symbol] default format when detection fails (default: :html)
|
|
405
|
-
# @return [Symbol] the resolved format
|
|
406
|
-
def resolve_output_format(output_file, default: :html)
|
|
407
|
-
return default unless output_file
|
|
93
|
+
def normalize_format(name) = FormatCatalog.normalize_format(name)
|
|
408
94
|
|
|
409
|
-
|
|
410
|
-
end
|
|
95
|
+
def serialize_format?(format) = FormatCatalog.serialize_format?(format)
|
|
411
96
|
|
|
412
|
-
|
|
413
|
-
#
|
|
414
|
-
# @param path [String] path to the file
|
|
415
|
-
# @return [Hash] metadata including :size, :format, and :lines (for text formats)
|
|
416
|
-
def file_info(path)
|
|
417
|
-
fmt = detect_format(path)
|
|
418
|
-
info = { size: File.size(path), format: fmt }
|
|
419
|
-
info[:lines] = File.foreach(path).count unless binary_format?(fmt)
|
|
420
|
-
info
|
|
421
|
-
end
|
|
97
|
+
def parse_format?(format) = FormatCatalog.parse_format?(format)
|
|
422
98
|
|
|
423
|
-
|
|
424
|
-
#
|
|
425
|
-
# Parses the file and validates against auto-generated schema.
|
|
426
|
-
# Returns a Coradoc::Validation::Result.
|
|
427
|
-
#
|
|
428
|
-
# @param path [String] path to the document file
|
|
429
|
-
# @param format [Symbol, nil] source format (auto-detected if nil)
|
|
430
|
-
# @return [Coradoc::Validation::Result] validation result
|
|
431
|
-
# @raise [UnsupportedFormatError] if format is not detected or registered
|
|
432
|
-
def validate_file(path, format: nil)
|
|
433
|
-
doc = parse_file(path, format: format)
|
|
99
|
+
def format_capabilities = FormatCatalog.capabilities
|
|
434
100
|
|
|
435
|
-
|
|
436
|
-
return schema.validate(doc) if schema
|
|
101
|
+
def resolve_output_format(output_file, **) = FormatCatalog.resolve_output_format(output_file, **)
|
|
437
102
|
|
|
438
|
-
|
|
439
|
-
end
|
|
103
|
+
# ---- Introspection (delegates to Introspection) ----
|
|
440
104
|
|
|
441
|
-
|
|
442
|
-
#
|
|
443
|
-
# @param doc [CoreModel::Base] parsed document
|
|
444
|
-
# @return [Hash] statistics including element counts, title, etc.
|
|
445
|
-
def document_stats(doc)
|
|
446
|
-
stats = {}
|
|
105
|
+
def file_info(path) = Introspection.file_info(path)
|
|
447
106
|
|
|
448
|
-
|
|
107
|
+
def validate_file(path, **) = Introspection.validate_file(path, **)
|
|
449
108
|
|
|
450
|
-
|
|
451
|
-
stats[:child_count] = count_elements(doc)
|
|
452
|
-
stats[:element_counts] = count_element_types(doc)
|
|
453
|
-
end
|
|
109
|
+
def document_stats(doc) = Introspection.document_stats(doc)
|
|
454
110
|
|
|
455
|
-
|
|
456
|
-
end
|
|
111
|
+
def describe_element(elem) = Introspection.describe_element(elem)
|
|
457
112
|
|
|
458
|
-
#
|
|
459
|
-
#
|
|
460
|
-
# @param elem [Object] element to describe
|
|
461
|
-
# @return [String] human-readable description
|
|
462
|
-
def describe_element(elem)
|
|
463
|
-
return elem.to_s unless elem.is_a?(CoreModel::Base)
|
|
464
|
-
|
|
465
|
-
type = elem.class.name.split('::').last
|
|
466
|
-
if elem.title
|
|
467
|
-
"#{type}: #{elem.title}"
|
|
468
|
-
elsif elem.is_a?(CoreModel::Block) && elem.content
|
|
469
|
-
preview = elem.content.to_s[0..50]
|
|
470
|
-
preview += '...' if elem.content.to_s.length > 50
|
|
471
|
-
"#{type}: #{preview}"
|
|
472
|
-
else
|
|
473
|
-
type
|
|
474
|
-
end
|
|
475
|
-
end
|
|
113
|
+
# ---- Utilities that stay on the top-level façade ----
|
|
476
114
|
|
|
477
|
-
# Strip unicode whitespace from a string
|
|
115
|
+
# Strip unicode whitespace from a string.
|
|
478
116
|
#
|
|
479
117
|
# @param string [String] the string to strip
|
|
480
118
|
# @param only [Symbol, nil] what to strip: :begin, :end, or nil for both
|
|
@@ -491,38 +129,6 @@ module Coradoc
|
|
|
491
129
|
string.sub(/^\p{Zs}+/, '').sub(/\p{Zs}+$/, '')
|
|
492
130
|
end
|
|
493
131
|
end
|
|
494
|
-
|
|
495
|
-
private
|
|
496
|
-
|
|
497
|
-
def count_elements(doc)
|
|
498
|
-
return 0 unless doc.is_a?(CoreModel::StructuralElement)
|
|
499
|
-
|
|
500
|
-
doc.children.sum do |child|
|
|
501
|
-
1 + (child.is_a?(CoreModel::StructuralElement) ? count_elements(child) : 0)
|
|
502
|
-
end
|
|
503
|
-
end
|
|
504
|
-
|
|
505
|
-
def count_element_types(doc)
|
|
506
|
-
counts = Hash.new(0)
|
|
507
|
-
visitor = Class.new(Visitor::Base) do
|
|
508
|
-
define_method(:visit) do |element|
|
|
509
|
-
if element.is_a?(CoreModel::Base)
|
|
510
|
-
has_element_type = element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)
|
|
511
|
-
type_key = if has_element_type && element.element_type
|
|
512
|
-
element.element_type
|
|
513
|
-
else
|
|
514
|
-
element.class.name.split('::').last
|
|
515
|
-
.gsub(/([A-Z])/, '_\1').downcase.sub(/^_/, '')
|
|
516
|
-
end
|
|
517
|
-
counts[type_key] += 1
|
|
518
|
-
end
|
|
519
|
-
super(element)
|
|
520
|
-
end
|
|
521
|
-
end.new
|
|
522
|
-
visitor.visit(doc)
|
|
523
|
-
counts.reject! { |_, v| v.zero? }
|
|
524
|
-
counts
|
|
525
|
-
end
|
|
526
132
|
end
|
|
527
133
|
|
|
528
134
|
autoload :Error, "#{__dir__}/errors"
|
|
@@ -535,15 +141,15 @@ module Coradoc
|
|
|
535
141
|
autoload :FormatModule, "#{__dir__}/format_module"
|
|
536
142
|
autoload :CoreModel, "#{__dir__}/core_model"
|
|
537
143
|
autoload :Registry, "#{__dir__}/registry"
|
|
538
|
-
autoload :Transform, "#{__dir__}/transform"
|
|
539
|
-
autoload :Input, "#{__dir__}/input"
|
|
540
|
-
autoload :Output, "#{__dir__}/output"
|
|
541
|
-
autoload :DocumentManipulator, "#{__dir__}/document_manipulator"
|
|
542
144
|
autoload :Visitor, "#{__dir__}/visitor"
|
|
543
145
|
autoload :PerformanceRegression, "#{__dir__}/performance_regression"
|
|
544
146
|
autoload :IncludeResolver, "#{__dir__}/include_resolver"
|
|
545
147
|
autoload :IncludeSelectors, "#{__dir__}/include_selectors"
|
|
546
148
|
autoload :ResolveIncludes, "#{__dir__}/resolve_includes"
|
|
149
|
+
autoload :Pipeline, "#{__dir__}/pipeline"
|
|
150
|
+
autoload :FormatCatalog, "#{__dir__}/format_catalog"
|
|
151
|
+
autoload :Introspection, "#{__dir__}/introspection"
|
|
152
|
+
autoload :Dispatch, "#{__dir__}/dispatch"
|
|
547
153
|
end
|
|
548
154
|
|
|
549
155
|
# Format gems self-register via Coradoc.register_format when they are required.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# Operations on mixed arrays of inline content (String /
|
|
6
|
+
# InlineElement / CoreModel::Base). Single source of truth for text
|
|
7
|
+
# extraction and edge cleanup, replacing parallel implementations
|
|
8
|
+
# that previously lived in the HTML converters.
|
|
9
|
+
#
|
|
10
|
+
# No method here mutates its inputs — InlineElements are duplicated
|
|
11
|
+
# via #with_content, Strings are replaced with new instances.
|
|
12
|
+
module InlineContent
|
|
13
|
+
class << self
|
|
14
|
+
# Extract plain text from a mixed content value.
|
|
15
|
+
#
|
|
16
|
+
# nil → '' / String → itself / Array → text_of mapped + joined /
|
|
17
|
+
# InlineElement → #content.to_s / StructuralElement → recurse on
|
|
18
|
+
# #children / other Base → #content if String else #title.to_s /
|
|
19
|
+
# anything else → #to_s.
|
|
20
|
+
def text_of(content)
|
|
21
|
+
return '' if content.nil?
|
|
22
|
+
return content if content.is_a?(String)
|
|
23
|
+
return text_of_one(content) unless content.is_a?(Array)
|
|
24
|
+
|
|
25
|
+
content.map { |item| text_of_one(item) }.join
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Return a new array with leading whitespace stripped from the
|
|
29
|
+
# first text-carrying item and trailing whitespace stripped from
|
|
30
|
+
# the last. Inputs are not mutated. Non-Array inputs return
|
|
31
|
+
# unchanged. If no item carries text, returns the input array
|
|
32
|
+
# unchanged.
|
|
33
|
+
def strip_edges(content)
|
|
34
|
+
return content unless content.is_a?(Array)
|
|
35
|
+
return content if content.empty?
|
|
36
|
+
|
|
37
|
+
first_idx = content.index { |i| text_carrier?(i) }
|
|
38
|
+
return content if first_idx.nil?
|
|
39
|
+
last_idx = content.rindex { |i| text_carrier?(i) }
|
|
40
|
+
|
|
41
|
+
content.map.with_index do |item, idx|
|
|
42
|
+
next item unless text_carrier?(item)
|
|
43
|
+
|
|
44
|
+
stripped = item_text(item)
|
|
45
|
+
stripped = stripped.lstrip if idx == first_idx
|
|
46
|
+
stripped = stripped.rstrip if idx == last_idx
|
|
47
|
+
item.is_a?(String) ? stripped : item.with_content(stripped)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def text_of_one(item)
|
|
54
|
+
case item
|
|
55
|
+
when String then item
|
|
56
|
+
when CoreModel::InlineElement then item.content.to_s
|
|
57
|
+
when CoreModel::StructuralElement then text_of(Array(item.children))
|
|
58
|
+
when CoreModel::Block
|
|
59
|
+
item.children.is_a?(Array) && !item.children.empty? ? text_of(item.children) : item.content.to_s
|
|
60
|
+
when CoreModel::Base
|
|
61
|
+
item.content.is_a?(String) ? item.content : item.title.to_s
|
|
62
|
+
else
|
|
63
|
+
item.to_s
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def text_carrier?(item)
|
|
68
|
+
item.is_a?(String) || item.is_a?(CoreModel::InlineElement)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def item_text(item)
|
|
72
|
+
item.is_a?(String) ? item : item.content.to_s
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|