coradoc 2.0.23 → 2.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,13 @@ module Coradoc
24
24
  self.class.format_type || format_type
25
25
  end
26
26
 
27
+ # Return a duplicate of this element with its content replaced.
28
+ # Used by InlineContent.strip_edges (and other edge-cleanup
29
+ # callers) so that mutations never leak through the public API.
30
+ def with_content(new_content)
31
+ dup.tap { |copy| copy.content = new_content }
32
+ end
33
+
27
34
  # Polymorphic classification used by LinkRewriter::Visitor. Returns
28
35
  # :link / :xref when this node carries a rewrite-able target, nil
29
36
  # otherwise. Generic InlineElement instances defer to their
@@ -36,14 +43,6 @@ module Coradoc
36
43
  end
37
44
  end
38
45
 
39
- FORMAT_TYPES = %w[
40
- bold italic monospace underline strikethrough
41
- subscript superscript highlight
42
- link xref stem footnote
43
- hard_line_break text span term
44
- line_break quotation
45
- ].freeze
46
-
47
46
  attribute :format_type, :string
48
47
  attribute :content, :string
49
48
  attribute :nested_elements, InlineElement, collection: true
@@ -169,24 +168,31 @@ module Coradoc
169
168
  end
170
169
  end
171
170
 
172
- FORMAT_TYPE_CLASS_MAP = {
173
- 'bold' => BoldElement,
174
- 'italic' => ItalicElement,
175
- 'monospace' => MonospaceElement,
176
- 'underline' => UnderlineElement,
177
- 'strikethrough' => StrikethroughElement,
178
- 'subscript' => SubscriptElement,
179
- 'superscript' => SuperscriptElement,
180
- 'highlight' => HighlightElement,
181
- 'link' => LinkElement,
182
- 'xref' => CrossReferenceElement,
183
- 'stem' => StemElement,
184
- 'footnote' => FootnoteElement,
185
- 'hard_line_break' => HardLineBreakElement,
186
- 'text' => TextElement,
187
- 'span' => SpanElement,
188
- 'term' => TermElement,
189
- 'line_break' => LineBreakElement
190
- }.freeze
171
+ # Wire-name table: bidirectional string ↔ class index. Single source
172
+ # of truth for serialization names and runtime dispatch. Reopened
173
+ # onto InlineElement so the subclasses above are defined first.
174
+ class InlineElement
175
+ FORMAT_TYPE_CLASS_MAP = {
176
+ 'bold' => BoldElement,
177
+ 'italic' => ItalicElement,
178
+ 'monospace' => MonospaceElement,
179
+ 'underline' => UnderlineElement,
180
+ 'strikethrough' => StrikethroughElement,
181
+ 'subscript' => SubscriptElement,
182
+ 'superscript' => SuperscriptElement,
183
+ 'highlight' => HighlightElement,
184
+ 'link' => LinkElement,
185
+ 'xref' => CrossReferenceElement,
186
+ 'stem' => StemElement,
187
+ 'footnote' => FootnoteElement,
188
+ 'hard_line_break' => HardLineBreakElement,
189
+ 'text' => TextElement,
190
+ 'span' => SpanElement,
191
+ 'term' => TermElement,
192
+ 'line_break' => LineBreakElement
193
+ }.freeze
194
+
195
+ FORMAT_TYPES = FORMAT_TYPE_CLASS_MAP.keys.freeze
196
+ end
191
197
  end
192
198
  end
@@ -13,6 +13,7 @@ module Coradoc
13
13
  autoload :Base, "#{__dir__}/core_model/base"
14
14
  autoload :ChildrenContent, "#{__dir__}/core_model/children_content"
15
15
  autoload :HasChildren, "#{__dir__}/core_model/has_children"
16
+ autoload :InlineContent, "#{__dir__}/core_model/inline_content"
16
17
  autoload :Callout, "#{__dir__}/core_model/callout"
17
18
  autoload :CalloutText, "#{__dir__}/core_model/callout_text"
18
19
  autoload :Block, "#{__dir__}/core_model/block"
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Type-keyed handler dispatch.
5
+ #
6
+ # Replaces the bespoke `register/lookup` hashes that recur across gems with
7
+ # one cohesive registry. Each gem that needs type-keyed dispatch configures
8
+ # a single Dispatch instance and exposes its legacy DSL as thin delegates.
9
+ #
10
+ # Two resolution policies cover the common shapes:
11
+ #
12
+ # - `Coradoc::Dispatch.strict` — exact key match, raises on miss
13
+ # - `Coradoc::Dispatch.hierarchical` — walks ancestors on miss, returns nil
14
+ #
15
+ # Registries that need priority ordering, predicate matching, or lazy
16
+ # loading do not fit this shape and stay as they are; the friction there is
17
+ # genuine semantic difference, not duplicated mechanism.
18
+ class Dispatch
19
+ TERMINAL_ANCESTORS = [Object, BasicObject].freeze
20
+ private_constant :TERMINAL_ANCESTORS
21
+
22
+ class << self
23
+ # Exact key match; raises if no entry. Used by registries that map a
24
+ # concrete type to its sole handler (e.g. AsciiDoc ElementRegistry).
25
+ def strict = new(walk_ancestors: false)
26
+
27
+ # Walks the key's class ancestors on miss; returns nil if no entry.
28
+ # Used by registries that want base-class handlers to apply to all
29
+ # subclasses (e.g. Mirror HandlerRegistry).
30
+ def hierarchical = new(walk_ancestors: true, miss: :return_nil)
31
+ end
32
+
33
+ def initialize(walk_ancestors: false, miss: :raise, &default)
34
+ @walk_ancestors = walk_ancestors
35
+ @miss = miss
36
+ @default = default
37
+ @entries = {}
38
+ end
39
+
40
+ def register(key, handler)
41
+ @entries[key] = handler
42
+ end
43
+
44
+ # Replace the handler for an existing key. Returns the previous handler
45
+ # so wrappers can chain: original = dispatch.override(K, Wrapper.new(original))
46
+ def override(key, handler)
47
+ previous = @entries[key]
48
+ @entries[key] = handler
49
+ previous
50
+ end
51
+
52
+ def unregister(key)
53
+ @entries.delete(key)
54
+ end
55
+
56
+ # Resolve the handler for a key. Returns nil if no handler matches
57
+ # unless the dispatch is configured to raise.
58
+ def lookup(key)
59
+ exact = @entries[key]
60
+ return exact if exact
61
+ return walk(key) if @walk_ancestors && key.is_a?(Class)
62
+
63
+ apply_default(key)
64
+ end
65
+
66
+ # Resolve the handler, raising Coradoc::Error on miss.
67
+ def lookup!(key)
68
+ lookup(key) || raise(Coradoc::Error, "no handler registered for #{key.inspect}")
69
+ end
70
+
71
+ def registered?(key) = @entries.key?(key)
72
+
73
+ def registered_keys = @entries.keys
74
+
75
+ def clear! = @entries.clear
76
+
77
+ private
78
+
79
+ def walk(klass)
80
+ klass.ancestors.each do |ancestor|
81
+ next if ancestor == klass
82
+ break if TERMINAL_ANCESTORS.include?(ancestor)
83
+
84
+ entry = @entries[ancestor]
85
+ return entry if entry
86
+ end
87
+ apply_default(klass)
88
+ end
89
+
90
+ def apply_default(key)
91
+ return @default.call(key) if @default
92
+ nil
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Format registry, detection, and capability introspection. Single
5
+ # source of truth for "what formats exist and what can they do?",
6
+ # extracted from the top-level Coradoc façade. Public API on
7
+ # +Coradoc+ delegates here.
8
+ module FormatCatalog
9
+ class << self
10
+ def registry
11
+ @registry ||= Registry.new
12
+ end
13
+
14
+ def register_format(format_name, format_module, **options)
15
+ format_module.extend(FormatModule::Interface) unless format_module.is_a?(FormatModule::Interface)
16
+ registry.register(format_name, format_module, options)
17
+ FormatModule.validate!(format_module, format_name)
18
+ end
19
+
20
+ def get_format(format_name)
21
+ registry.get(format_name)
22
+ end
23
+
24
+ def registered_formats
25
+ registry.list
26
+ end
27
+
28
+ def detect_format(filename)
29
+ ext = File.extname(filename).downcase
30
+ registry.each_key do |name|
31
+ opts = registry.options_for(name)
32
+ return name if opts[:extensions]&.include?(ext)
33
+ end
34
+ nil
35
+ end
36
+
37
+ def binary_format?(format)
38
+ opts = registry.options_for(format)
39
+ opts&.fetch(:binary, false) == true
40
+ end
41
+
42
+ def normalize_format(name)
43
+ return nil unless name
44
+
45
+ key = name.to_s.downcase
46
+ registry.each_key do |fmt_name|
47
+ opts = registry.options_for(fmt_name)
48
+ return fmt_name if opts[:aliases]&.include?(key)
49
+ end
50
+ key.to_sym
51
+ end
52
+
53
+ def serialize_format?(format)
54
+ mod = get_format(format)
55
+ return false unless mod
56
+
57
+ mod.serialize?
58
+ end
59
+
60
+ def parse_format?(format)
61
+ mod = get_format(format)
62
+ return false unless mod
63
+
64
+ mod.public_methods.include?(:parse_to_core) || mod.public_methods.include?(:parse)
65
+ end
66
+
67
+ def capabilities
68
+ registered_formats.each_with_object({}) do |name, caps|
69
+ caps[name] = {
70
+ parse: parse_format?(name),
71
+ serialize: serialize_format?(name)
72
+ }
73
+ end
74
+ end
75
+
76
+ def resolve_output_format(output_file, default: :html)
77
+ return default unless output_file
78
+
79
+ detect_format(output_file) || default
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Introspection
5
+ # Visitor that walks a document and counts each CoreModel node by
6
+ # its type key. Used by Introspection.count_element_types to back
7
+ # the +Coradoc.document_stats+ API.
8
+ #
9
+ # Typed StructuralElement / Block nodes are counted under their
10
+ # +element_type+ (semantic identity). Other nodes fall back to a
11
+ # snake_case rendering of their class name.
12
+ class ElementCounter < Visitor::Base
13
+ def initialize
14
+ @counts = Hash.new(0)
15
+ end
16
+
17
+ attr_reader :counts
18
+
19
+ def visit(element)
20
+ return super(element) unless element.is_a?(CoreModel::Base)
21
+
22
+ @counts[type_key_for(element)] += 1
23
+ super(element)
24
+ end
25
+
26
+ private
27
+
28
+ def type_key_for(element)
29
+ if typed_node?(element) && element.element_type
30
+ element.element_type
31
+ else
32
+ snake_case(element.class.name)
33
+ end
34
+ end
35
+
36
+ def typed_node?(element)
37
+ element.is_a?(CoreModel::StructuralElement) || element.is_a?(CoreModel::Block)
38
+ end
39
+
40
+ def snake_case(class_name)
41
+ class_name.split('::').last
42
+ .gsub(/([A-Z])/, '_\1')
43
+ .downcase
44
+ .sub(/^_/, '')
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Document introspection: file metadata, validation, stats. Extracted
5
+ # from the top-level Coradoc façade so the document-counting visitor
6
+ # and file-metadata helpers have their own home. Public API on
7
+ # +Coradoc+ delegates here.
8
+ module Introspection
9
+ autoload :ElementCounter, "#{__dir__}/introspection/element_counter"
10
+
11
+ class << self
12
+ def file_info(path)
13
+ fmt = FormatCatalog.detect_format(path)
14
+ info = { size: File.size(path), format: fmt }
15
+ info[:lines] = File.foreach(path).count unless FormatCatalog.binary_format?(fmt)
16
+ info
17
+ end
18
+
19
+ def validate_file(path, format: nil)
20
+ doc = Pipeline.parse_file(path, format: format)
21
+
22
+ schema = Validation::SchemaGenerator.generate(doc.class)
23
+ return schema.validate(doc) if schema
24
+
25
+ Validation::Result.new
26
+ end
27
+
28
+ def document_stats(doc)
29
+ stats = {}
30
+ stats[:title] = doc.title if doc.title
31
+
32
+ if doc.is_a?(CoreModel::StructuralElement)
33
+ stats[:child_count] = count_elements(doc)
34
+ stats[:element_counts] = count_element_types(doc)
35
+ end
36
+
37
+ stats
38
+ end
39
+
40
+ def describe_element(elem)
41
+ return elem.to_s unless elem.is_a?(CoreModel::Base)
42
+
43
+ type = elem.class.name.split('::').last
44
+ if elem.title
45
+ "#{type}: #{elem.title}"
46
+ elsif elem.is_a?(CoreModel::Block) && elem.content
47
+ preview = elem.content.to_s[0..50]
48
+ preview += '...' if elem.content.to_s.length > 50
49
+ "#{type}: #{preview}"
50
+ else
51
+ type
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def count_elements(doc)
58
+ return 0 unless doc.is_a?(CoreModel::StructuralElement)
59
+
60
+ doc.children.sum do |child|
61
+ 1 + (child.is_a?(CoreModel::StructuralElement) ? count_elements(child) : 0)
62
+ end
63
+ end
64
+
65
+ def count_element_types(doc)
66
+ counter = ElementCounter.new
67
+ counter.visit(doc)
68
+ counter.counts.reject { |_, v| v.zero? }
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Parse / serialize / convert pipeline. Single source of truth for
5
+ # the document transformation flow, extracted from the top-level
6
+ # Coradoc façade so pipeline logic has its own home and its own
7
+ # spec surface. Public API on +Coradoc+ delegates here.
8
+ module Pipeline
9
+ class << self
10
+ # Parse text to a document model. Graph mode: +include::+
11
+ # directives survive as +CoreModel::Include+ link nodes — no
12
+ # file I/O happens during parse. Splicing included content is
13
+ # a separate, explicit step (see +Coradoc.resolve_includes+).
14
+ def parse(text, format:)
15
+ format_module = FormatCatalog.get_format(format)
16
+ unless format_module
17
+ raise UnsupportedFormatError,
18
+ "Format '#{format}' is not registered. " \
19
+ "Available formats: #{FormatCatalog.registered_formats.join(', ')}"
20
+ end
21
+
22
+ text = Hooks.invoke(:before_parse, text, format: format)
23
+ result = format_module.parse_to_core(text)
24
+ Hooks.invoke(:after_parse, result, format: format)
25
+ end
26
+
27
+ def resolve_includes(document, base_dir:,
28
+ missing_include: :error,
29
+ max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH,
30
+ allow_unsafe: false,
31
+ resolver: nil)
32
+ resolver = Coradoc::IncludeResolver.coerce(
33
+ resolver,
34
+ base_dir: base_dir,
35
+ allow_unsafe: allow_unsafe
36
+ )
37
+ Coradoc::ResolveIncludes.call(
38
+ document,
39
+ resolver: resolver,
40
+ base_dir: base_dir,
41
+ missing_include: missing_include,
42
+ max_depth: max_depth
43
+ )
44
+ end
45
+
46
+ def rewrite_links(document, rewriter: nil, &block)
47
+ Coradoc::LinkRewriter.rewrite(document, rewriter: rewriter, &block)
48
+ end
49
+
50
+ def convert(text, from:, to:, **)
51
+ core = parse(text, format: from)
52
+ serialize(core, to: to, **)
53
+ end
54
+
55
+ def to_core(model)
56
+ return model if model.is_a?(CoreModel::Base)
57
+
58
+ FormatCatalog.registry.each_value do |format_module|
59
+ next unless format_module.handles_model?(model)
60
+
61
+ return format_module.to_core(model)
62
+ end
63
+
64
+ raise TransformationError, "No transformer found for #{model.class}"
65
+ end
66
+
67
+ def serialize(model, to:, **)
68
+ format_module = FormatCatalog.get_format(to)
69
+ raise UnsupportedFormatError, "Format '#{to}' is not registered" unless format_module
70
+
71
+ model = Hooks.invoke(:before_serialize, model, format: to)
72
+ result = format_module.serialize(model, **)
73
+ Hooks.invoke(:after_serialize, result, format: to)
74
+ end
75
+
76
+ def build(&block)
77
+ CoreModel::DocumentElement.build(children: [], &block)
78
+ end
79
+
80
+ def parse_file(path, format: nil)
81
+ raise FileNotFoundError, path unless File.exist?(path)
82
+
83
+ source_format = format || FormatCatalog.detect_format(path)
84
+ raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
85
+
86
+ format_module = FormatCatalog.get_format(source_format)
87
+ raise UnsupportedFormatError, "Format '#{source_format}' is not registered" unless format_module
88
+
89
+ if FormatCatalog.binary_format?(source_format)
90
+ format_module.parse_to_core(path)
91
+ else
92
+ content = File.read(path)
93
+ content = Hooks.invoke(:before_parse, content, format: source_format)
94
+ result = format_module.parse_file_to_core(path, content)
95
+ Hooks.invoke(:after_parse, result, format: source_format)
96
+ end
97
+ end
98
+
99
+ def convert_file(path, to:, from: nil, **)
100
+ source_format = from || FormatCatalog.detect_format(path)
101
+ raise UnsupportedFormatError, "Could not detect format for: #{path}" unless source_format
102
+
103
+ core = parse_file(path, format: source_format)
104
+ serialize(core, to: to, **)
105
+ end
106
+ end
107
+ end
108
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = '2.0.23'
4
+ VERSION = '2.0.24'
5
5
  end
data/lib/coradoc.rb CHANGED
@@ -20,18 +20,13 @@
20
20
  #
21
21
  # @example Manipulating documents
22
22
  # doc = Coradoc.parse(text, format: :asciidoc)
23
- # html = Coradoc.manipulate(doc)
24
- # .transform_text(&:upcase)
25
- # .add_toc
26
- # .to_html
23
+ # html = Coradoc.serialize(doc, to: :html)
27
24
  #
28
25
  # @example Building documents programmatically
29
- # doc = Coradoc.build do
30
- # title "My Document"
31
- # section "Intro" do
32
- # paragraph "Hello world"
33
- # end
34
- # end.to_core
26
+ # doc = Coradoc.build do |d|
27
+ # d.title = "My Document"
28
+ # d.children << Coradoc::CoreModel::ParagraphBlock.new(content: "Hello")
29
+ # end
35
30
  # Coradoc.serialize(doc, to: :html)
36
31
 
37
32
  require_relative 'coradoc/coradoc'
@@ -39,10 +34,7 @@ require_relative 'coradoc/version'
39
34
 
40
35
  module Coradoc
41
36
  autoload :CLI, 'coradoc/cli'
42
- autoload :DocumentBuilder, 'coradoc/document_builder'
43
- autoload :DocumentManipulator, 'coradoc/document_manipulator'
44
37
  autoload :Visitor, 'coradoc/visitor'
45
- autoload :Serializer, 'coradoc/serializer/registry'
46
38
  autoload :LinkRewriter, 'coradoc/link_rewriter'
47
39
  autoload :RelativePath, 'coradoc/relative_path'
48
40
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.23
4
+ version: 2.0.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -83,6 +83,7 @@ files:
83
83
  - lib/coradoc/core_model/include.rb
84
84
  - lib/coradoc/core_model/include_level_offset.rb
85
85
  - lib/coradoc/core_model/include_options.rb
86
+ - lib/coradoc/core_model/inline_content.rb
86
87
  - lib/coradoc/core_model/inline_element.rb
87
88
  - lib/coradoc/core_model/list_block.rb
88
89
  - lib/coradoc/core_model/list_item.rb
@@ -106,9 +107,9 @@ files:
106
107
  - lib/coradoc/core_model/toc.rb
107
108
  - lib/coradoc/core_model/toc_generator.rb
108
109
  - lib/coradoc/core_model/verse_block.rb
109
- - lib/coradoc/document_builder.rb
110
- - lib/coradoc/document_manipulator.rb
110
+ - lib/coradoc/dispatch.rb
111
111
  - lib/coradoc/errors.rb
112
+ - lib/coradoc/format_catalog.rb
112
113
  - lib/coradoc/format_module.rb
113
114
  - lib/coradoc/hooks.rb
114
115
  - lib/coradoc/include_resolver.rb
@@ -118,21 +119,18 @@ files:
118
119
  - lib/coradoc/include_selectors/level_offset.rb
119
120
  - lib/coradoc/include_selectors/lines.rb
120
121
  - lib/coradoc/include_selectors/tags.rb
121
- - lib/coradoc/input.rb
122
+ - lib/coradoc/introspection.rb
123
+ - lib/coradoc/introspection/element_counter.rb
122
124
  - lib/coradoc/link_rewriter.rb
123
125
  - lib/coradoc/link_rewriter/identity.rb
124
126
  - lib/coradoc/link_rewriter/visitor.rb
125
127
  - lib/coradoc/logger.rb
126
- - lib/coradoc/output.rb
127
128
  - lib/coradoc/performance_regression.rb
128
- - lib/coradoc/processor_registry.rb
129
+ - lib/coradoc/pipeline.rb
129
130
  - lib/coradoc/query.rb
130
131
  - lib/coradoc/registry.rb
131
132
  - lib/coradoc/relative_path.rb
132
133
  - lib/coradoc/resolve_includes.rb
133
- - lib/coradoc/serializer/registry.rb
134
- - lib/coradoc/transform.rb
135
- - lib/coradoc/transform/base.rb
136
134
  - lib/coradoc/validation.rb
137
135
  - lib/coradoc/version.rb
138
136
  - lib/coradoc/visitor.rb