coradoc 2.0.22 → 2.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a33595a0aa6205f5a5d1adabd8ede0dc8f3d4237c2376a910800822400af2f59
4
- data.tar.gz: ecb898a27a6cb1540b553fdb3aa97bef73077a85a13175099738fefe3307faf2
3
+ metadata.gz: 7370b3dabd4535faf0592e455d0bfd13c582b37941c411a0c4c7b86528843896
4
+ data.tar.gz: c61e126d88a2dc69c507657201b9b3e7875a7d6beffac957d5b805a81ffcc6d5
5
5
  SHA512:
6
- metadata.gz: db01c4272054b9f2907ba0af6444e741c6d3151bf916f22aeb11dc25380ea7b0a9d63f88ba2fe2eeacd07bfcf08e50a30b8eeac914605ac394d4bccc9d3d282a
7
- data.tar.gz: c40699fd199f0f74a8f657ff7ee26036703e0865862919f2787acd52f532e40c825687cd52c4ef4c4a01337f3819121813f6f291dcd76d264c27bd916f1bc823
6
+ metadata.gz: 548b69fbd08a23cabf10e60ed599bafb904c87e857678bd539f820fc824ff2d407761728c10a2c6be2359a40b8d6973211adaf7c41171e7928c83a8816f43360
7
+ data.tar.gz: 04e7b4f63d935208df375f2ecf979104db5b0b7de9e95b6c04d77939d1a293114f2c285ccea7cc7a7e0709e364d7fe1c58d2038e2b6adc923591421be4d2ade8
@@ -170,6 +170,33 @@ module Coradoc
170
170
  )
171
171
  end
172
172
 
173
+ # Rewrite every link/xref target in a parsed document.
174
+ #
175
+ # Walks the document tree and invokes the supplied rewriter for each
176
+ # link and cross-reference target. The original document is never
177
+ # mutated — a NEW document is returned.
178
+ #
179
+ # Verbatim blocks (+SourceBlock+, +ListingBlock+, +LiteralBlock+,
180
+ # +PassBlock+, +StemBlock+) are skipped entirely so link-shaped text
181
+ # inside code/math bodies is never rewritten.
182
+ #
183
+ # The rewriter responds to +#call(target:, kind:, context:)+ and
184
+ # returns the new target String. +kind+ is +:link+ or +:xref+; the
185
+ # block form is supported for one-liners.
186
+ #
187
+ # @param document [Coradoc::CoreModel::Base] parsed document
188
+ # @param rewriter [#call, nil] callable rewriter; ignored when a block is given
189
+ # @return [Coradoc::CoreModel::Base] new document with rewritten targets
190
+ #
191
+ # @example Canonicalize snake_case targets to kebab-case
192
+ # doc = Coradoc.parse(adoc, format: :asciidoc)
193
+ # rewritten = Coradoc.rewrite_links(doc) do |target:, kind:, **|
194
+ # target.tr('_', '-')
195
+ # end
196
+ def rewrite_links(document, rewriter: nil, &block)
197
+ Coradoc::LinkRewriter.rewrite(document, rewriter: rewriter, &block)
198
+ end
199
+
173
200
  # Convert document text from one format to another
174
201
  #
175
202
  # This is the main entry point for format conversion. It handles the
@@ -41,6 +41,28 @@ module Coradoc
41
41
  # @return [Array<MetadataEntry>] additional metadata entries
42
42
  attribute :metadata_entries, MetadataEntry, collection: true
43
43
 
44
+ # Construct an instance and yield it for in-place mutation.
45
+ #
46
+ # This is the programmatic-construction entry point for CoreModel
47
+ # nodes. It calls +new+ exactly as a caller would, then yields
48
+ # the resulting instance for append-style construction. No new
49
+ # class hierarchy, no +method_missing+ — the block operates on
50
+ # the real model object.
51
+ #
52
+ # Per-class fluent helpers (e.g., +ListBlock#add_item+,
53
+ # +ListItem#add_text+) compose naturally with +build+:
54
+ #
55
+ # list = ListBlock.build do |ul|
56
+ # children.each { |c| ul.add_item { |li| li.add_link(c[:slug], text: c[:title]) } }
57
+ # end
58
+ #
59
+ # Without a block, +build(**attrs)+ is identical to +new(**attrs)+.
60
+ def self.build(**attrs)
61
+ instance = new(**attrs)
62
+ yield instance if block_given?
63
+ instance
64
+ end
65
+
44
66
  # Get all metadata as a hash, or a specific metadata value by key
45
67
  # @overload metadata
46
68
  # @return [Hash] All metadata as key-value pairs
@@ -145,6 +167,23 @@ module Coradoc
145
167
  visitor.visit(self)
146
168
  end
147
169
 
170
+ # True when this node counts as "real body content" for the
171
+ # purposes of empty-document detection and similar structural
172
+ # queries. Default is true; metadata and ephemeral nodes
173
+ # (FrontmatterBlock, CommentBlock, CommentLine) override to
174
+ # false. Polymorphic dispatch keeps the predicate open for
175
+ # future "skip-me" types — no central walker to edit (OCP).
176
+ def body_content?
177
+ true
178
+ end
179
+
180
+ # True when this node is structurally present but carries no
181
+ # visible characters. Default is false; inline text and
182
+ # paragraph blocks override to inspect their text content.
183
+ def whitespace_only?
184
+ false
185
+ end
186
+
148
187
  private
149
188
 
150
189
  # List of attributes to compare for semantic equivalence
@@ -7,6 +7,10 @@ module Coradoc
7
7
  def self.semantic_type
8
8
  :comment
9
9
  end
10
+
11
+ def body_content?
12
+ false
13
+ end
10
14
  end
11
15
  end
12
16
  end
@@ -13,6 +13,10 @@ module Coradoc
13
13
  :comment_line
14
14
  end
15
15
 
16
+ def body_content?
17
+ false
18
+ end
19
+
16
20
  attribute :text, :string
17
21
  end
18
22
  end
@@ -10,42 +10,89 @@ module Coradoc
10
10
  # No other code in any gem may call YAML directly for frontmatter.
11
11
  # This isolates permitted-classes configuration and error handling
12
12
  # in one MECE location (DRY).
13
+ #
14
+ # The Codec emits flat YAML — values rendered with their natural
15
+ # YAML type. This is what Jekyll, Hugo, VitePress, VuePress, 11ty
16
+ # and every SSG expects: +title: Foo+ / +date: 2024-01-01+.
17
+ # Round-trip fidelity for typed values (Date, Time, Symbol) is
18
+ # preserved by Psych's permitted-classes mechanism, not by a
19
+ # custom discriminator scheme.
20
+ #
21
+ # For the typed-tree representation used by the coradoc-mirror JSON
22
+ # pipeline, see +Coradoc::Mirror::Node::FrontmatterValue+ and
23
+ # +Coradoc::Mirror::Handlers::Frontmatter+. The typed-tree concern
24
+ # lives in the mirror gem; this Codec stays focused on YAML.
13
25
  module Codec
14
26
  PERMITTED_CLASSES = [Date, Time, DateTime, Symbol].freeze
15
27
 
16
28
  class << self
17
- # Parse a YAML string into a FrontmatterBlock.
18
- # Returns an empty FrontmatterBlock on malformed YAML (graceful
19
- # degradation body parsing continues).
29
+ # Parse YAML text into a FrontmatterBlock. Returns an empty
30
+ # FrontmatterBlock on malformed YAML or non-Hash payload.
31
+ # Logs a warning so the conversion pipeline can surface the
32
+ # skip rather than silently dropping user-authored content.
20
33
  def from_yaml(yaml_text)
21
34
  return FrontmatterBlock.new if yaml_text.nil? || yaml_text.strip.empty?
22
35
 
23
- parsed = YAML.safe_load(
24
- yaml_text,
25
- permitted_classes: PERMITTED_CLASSES,
26
- aliases: true
27
- )
28
- return FrontmatterBlock.new unless parsed.is_a?(Hash)
29
-
30
- schema = parsed['$schema']
31
- data = parsed.except('$schema')
32
- FrontmatterBlock.new(schema: schema&.to_s, data: data)
33
- rescue YAML::SyntaxError, Psych::DisallowedClass
36
+ build_from_loaded(load_yaml(yaml_text))
37
+ rescue YAML::SyntaxError, Psych::DisallowedClass => e
38
+ Coradoc::Logger.warn("frontmatter parse failed: #{e.message}")
34
39
  FrontmatterBlock.new
35
40
  end
36
41
 
42
+ # Build a FrontmatterBlock from a Ruby hash with native-typed
43
+ # values (String, Integer, Date, …). Returns an empty block
44
+ # for non-Hash input.
45
+ def from_hash(hash)
46
+ return FrontmatterBlock.new unless hash.is_a?(Hash)
47
+
48
+ build_from_loaded(hash)
49
+ end
50
+
37
51
  # Serialize a FrontmatterBlock to canonical YAML text.
38
- # Does NOT include leading/trailing `---` delimiters; the caller
39
- # wraps the output.
52
+ # Does NOT include leading/trailing +---+ delimiters; the
53
+ # caller wraps the output. Returns +''+ for empty blocks.
40
54
  def to_yaml(block)
41
55
  return '' unless block.is_a?(FrontmatterBlock)
42
56
 
57
+ payload = flat_tree(block)
58
+ return '' if payload.empty?
59
+
60
+ YAML.dump(payload).delete_prefix("---\n").delete_suffix("\n...")
61
+ end
62
+
63
+ # Return the frontmatter as a native-typed Ruby hash.
64
+ # +$schema+ is included when present.
65
+ def to_hash(block)
66
+ return {} unless block.is_a?(FrontmatterBlock)
67
+
68
+ flat_tree(block)
69
+ end
70
+
71
+ private
72
+
73
+ def load_yaml(yaml_text)
74
+ YAML.safe_load(
75
+ yaml_text,
76
+ permitted_classes: PERMITTED_CLASSES,
77
+ aliases: true
78
+ )
79
+ end
80
+
81
+ def build_from_loaded(loaded)
82
+ return FrontmatterBlock.new unless loaded.is_a?(Hash)
83
+
84
+ schema = loaded['$schema']
85
+ FrontmatterBlock.new(
86
+ schema: schema&.to_s,
87
+ data: loaded.except('$schema')
88
+ )
89
+ end
90
+
91
+ def flat_tree(block)
43
92
  tree = {}
44
93
  tree['$schema'] = block.schema if block.schema
45
94
  tree.merge!(block.data || {})
46
- return '' if tree.empty?
47
-
48
- YAML.dump(tree).delete_prefix("---\n").delete_suffix("\n...")
95
+ tree
49
96
  end
50
97
  end
51
98
  end
@@ -50,6 +50,10 @@ module Coradoc
50
50
  schema.nil? && (data.nil? || data.empty?)
51
51
  end
52
52
 
53
+ def body_content?
54
+ false
55
+ end
56
+
53
57
  # Sub-namespaces (Codec, SchemaResolver, FieldTransform, TextSplitter)
54
58
  # live under FrontmatterBlock and autoload lazily.
55
59
  autoload :Codec, "#{__dir__}/frontmatter/codec"
@@ -24,6 +24,18 @@ module Coradoc
24
24
  self.class.format_type || format_type
25
25
  end
26
26
 
27
+ # Polymorphic classification used by LinkRewriter::Visitor. Returns
28
+ # :link / :xref when this node carries a rewrite-able target, nil
29
+ # otherwise. Generic InlineElement instances defer to their
30
+ # resolved format_type; typed subclasses override with a literal.
31
+ # Keeps the visitor free of class-keyed case/when (OCP).
32
+ def link_kind
33
+ case resolve_format_type
34
+ when 'link' then :link
35
+ when 'xref' then :xref
36
+ end
37
+ end
38
+
27
39
  FORMAT_TYPES = %w[
28
40
  bold italic monospace underline strikethrough
29
41
  subscript superscript highlight
@@ -99,12 +111,20 @@ module Coradoc
99
111
  def self.format_type
100
112
  'link'
101
113
  end
114
+
115
+ def link_kind
116
+ :link
117
+ end
102
118
  end
103
119
 
104
120
  class CrossReferenceElement < InlineElement
105
121
  def self.format_type
106
122
  'xref'
107
123
  end
124
+
125
+ def link_kind
126
+ :xref
127
+ end
108
128
  end
109
129
 
110
130
  class StemElement < InlineElement
@@ -108,6 +108,23 @@ module Coradoc
108
108
  # @return [Array<ListItem>] collection of list items
109
109
  attribute :items, ListItem, collection: true
110
110
 
111
+ # -- Fluent construction helpers (paired with Base.build) --
112
+
113
+ # Append a new ListItem, built via ListItem.build. The block
114
+ # (if given) is yielded the new item so callers can chain
115
+ # +add_text+ / +add_link+ on it inline:
116
+ #
117
+ # ListBlock.build do |ul|
118
+ # children.each { |c| ul.add_item { |li| li.add_link(c[:slug], text: c[:title]) } }
119
+ # end
120
+ #
121
+ # Returns self for chaining at the list level.
122
+ def add_item(marker: self.marker_type == 'ordered' ? '.' : '*')
123
+ item = ListItem.build(marker: marker) { |li| yield li if block_given? }
124
+ self.items = Array(items) + [item]
125
+ self
126
+ end
127
+
111
128
  private
112
129
 
113
130
  # Attributes to compare for semantic equivalence
@@ -109,6 +109,27 @@ module Coradoc
109
109
  true
110
110
  end
111
111
 
112
+ # -- Fluent construction helpers (paired with Base.build) --
113
+
114
+ # Append a plain-text inline to this item's children. Returns
115
+ # self for chaining. Pairs naturally with ListItem.build:
116
+ #
117
+ # ListItem.build do |li|
118
+ # li.add_text("See ")
119
+ # li.add_link("foo.adoc", text: "Foo")
120
+ # end
121
+ def add_text(text)
122
+ self.children = Array(children) + [TextContent.new(text: text)]
123
+ self
124
+ end
125
+
126
+ # Append a +link:+ inline to this item's children. +text+ becomes
127
+ # the link's visible label; +target+ is the URL/anchor.
128
+ def add_link(target, text: nil)
129
+ self.children = Array(children) + [LinkElement.new(target: target, content: text)]
130
+ self
131
+ end
132
+
112
133
  private
113
134
 
114
135
  # Compare two list blocks for equivalence
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # Output-side state object for host-system emitters (VitePress, Hugo,
6
+ # Astro, plain ERB, etc.).
7
+ #
8
+ # coradoc's source side has +IncludeResolver::Filesystem+ to resolve
9
+ # include targets without coupling to a storage layer. The output side
10
+ # has no analogous state object — until now. +OutputArtifact+ captures
11
+ # the three pieces of state coradoc genuinely needs to hand to a
12
+ # downstream emitter:
13
+ #
14
+ # - +output_key+ — site-relative key (e.g. "author/iso/ref/foo")
15
+ # - +frontmatter_block+ — parsed YAML frontmatter (may be empty)
16
+ # - +core_document+ — the canonical CoreModel document
17
+ #
18
+ # The consumer takes these and renders whatever wrapper it needs in
19
+ # its host system's native template language. coradoc does not know
20
+ # about VitePress, ERB, or Liquid. Symmetric with the source side:
21
+ # minimal protocol object, not an engine.
22
+ #
23
+ # A mirror-tree document is deliberately NOT bundled here. coradoc
24
+ # core has no runtime dependency on coradoc-mirror; consumers that
25
+ # target the mirror JSON pipeline pair an +OutputArtifact+ with a
26
+ # separately-computed +Coradoc::Mirror.transform(core)+ result.
27
+ class OutputArtifact < Base
28
+ # @!attribute output_key
29
+ # @return [String, nil] site-relative key with no leading slash
30
+ # and no trailing extension. SSGs map this to their URL space.
31
+ attribute :output_key, :string
32
+
33
+ # @!attribute frontmatter_block
34
+ # @return [FrontmatterBlock, nil] parsed YAML frontmatter
35
+ attribute :frontmatter_block, FrontmatterBlock
36
+
37
+ # @!attribute core_document
38
+ # @return [DocumentElement, nil] canonical CoreModel document
39
+ attribute :core_document, DocumentElement
40
+
41
+ private
42
+
43
+ def comparable_attributes
44
+ %i[output_key frontmatter_block core_document]
45
+ end
46
+ end
47
+ end
48
+ end
@@ -7,6 +7,10 @@ module Coradoc
7
7
  def self.semantic_type
8
8
  :paragraph
9
9
  end
10
+
11
+ def whitespace_only?
12
+ flat_text.strip.empty?
13
+ end
10
14
  end
11
15
  end
12
16
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # STEM block — mathematical/scientific content authored in LaTeX,
6
+ # AsciiMath, or another STEM markup. Carries a +language+ attribute so
7
+ # downstream renderers know which interpreter to invoke.
8
+ #
9
+ # AsciiDoc surface forms:
10
+ # [stem]\n++++\nx^2\n++++ # language: "latex" (default)
11
+ # [latexmath]\n++++\nx^2\n++++ # language: "latex"
12
+ # [asciimath]\n++++\nx^2\n++++ # language: "asciimath"
13
+ class StemBlock < Block
14
+ attribute :language, :string, default: -> { 'latex' }
15
+
16
+ def self.semantic_type
17
+ :stem
18
+ end
19
+ end
20
+ end
21
+ end
@@ -52,6 +52,38 @@ module Coradoc
52
52
  false
53
53
  end
54
54
 
55
+ # Override in subclasses that carry document-title semantics.
56
+ # HeaderElement at level 0 represents the document title (`= Title`
57
+ # in AsciiDoc). Consumers that walk the body — TOC builders,
58
+ # section numbering — skip these so the title is not counted as
59
+ # "section 1". Polymorphic dispatch (vs. an is_a? guard at the
60
+ # call site) keeps the predicate open for future subclasses.
61
+ def document_title?
62
+ false
63
+ end
64
+
65
+ # Children that count as body content and aren't whitespace-only.
66
+ # Derived from per-node {#body_content?} and {#whitespace_only?}
67
+ # predicates — no central walker, no is_a? switch to maintain.
68
+ def visible_children
69
+ Array(children).select(&:body_content?).reject(&:whitespace_only?)
70
+ end
71
+
72
+ # True when the body has no visible content anywhere in its subtree.
73
+ # A document with only frontmatter + comments returns true; a
74
+ # document with one non-whitespace paragraph returns false.
75
+ def empty_body?
76
+ return true if children.nil? || children.empty?
77
+
78
+ children.all? do |child|
79
+ next true unless child.body_content?
80
+ next true if child.whitespace_only?
81
+ next child.empty_body? if child.is_a?(StructuralElement)
82
+
83
+ false
84
+ end
85
+ end
86
+
55
87
  # Derived element_type string for backward compatibility with
56
88
  # templates and legacy consumers. Subclasses override this.
57
89
  def element_type
@@ -108,6 +140,15 @@ module Coradoc
108
140
  true
109
141
  end
110
142
 
143
+ # A level-0 HeaderElement represents the document title (the `= Title`
144
+ # line in AsciiDoc, the `<h1>` in HTML). It is structurally part of
145
+ # the body but semantically the document's title, not a section —
146
+ # section numbering, TOC builders, and other section-aware logic
147
+ # skip these so the title is not counted as "section 1".
148
+ def document_title?
149
+ level.to_i.zero?
150
+ end
151
+
111
152
  def self.element_type_name
112
153
  'header'
113
154
  end
@@ -17,6 +17,10 @@ module Coradoc
17
17
  def to_s
18
18
  text.to_s
19
19
  end
20
+
21
+ def whitespace_only?
22
+ text.to_s.strip.empty?
23
+ end
20
24
  end
21
25
  end
22
26
  end
@@ -70,6 +70,7 @@ module Coradoc
70
70
  autoload :SidebarBlock, "#{__dir__}/core_model/sidebar_block"
71
71
  autoload :LiteralBlock, "#{__dir__}/core_model/literal_block"
72
72
  autoload :PassBlock, "#{__dir__}/core_model/pass_block"
73
+ autoload :StemBlock, "#{__dir__}/core_model/stem_block"
73
74
  autoload :ListingBlock, "#{__dir__}/core_model/listing_block"
74
75
  autoload :OpenBlock, "#{__dir__}/core_model/open_block"
75
76
  autoload :VerseBlock, "#{__dir__}/core_model/verse_block"
@@ -82,5 +83,6 @@ module Coradoc
82
83
  autoload :Include, "#{__dir__}/core_model/include"
83
84
  autoload :IncludeOptions, "#{__dir__}/core_model/include_options"
84
85
  autoload :IncludeLevelOffset, "#{__dir__}/core_model/include_level_offset"
86
+ autoload :OutputArtifact, "#{__dir__}/core_model/output_artifact"
85
87
  end
86
88
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module LinkRewriter
5
+ # Default no-op rewriter. Returns every target unchanged. Used when
6
+ # the caller only wants the visitor's immutable-copy guarantee.
7
+ class Identity
8
+ def call(target:, **)
9
+ target
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module LinkRewriter
5
+ # Focused rewriting visitor for the CoreModel tree.
6
+ #
7
+ # The set of node types that carry link/xref targets is closed and
8
+ # small: +LinkElement+, +CrossReferenceElement+, plus generic
9
+ # +InlineElement+ instances whose +format_type+ is +'link'+ or
10
+ # +'xref'+. That classification is owned polymorphically by
11
+ # +InlineElement#link_kind+ — the visitor just calls it. Adding a
12
+ # new link-bearing subclass means overriding +link_kind+ on it,
13
+ # not editing a case/when here (OCP).
14
+ #
15
+ # Verbatim block types are also closed: +SourceBlock+, +ListingBlock+,
16
+ # +LiteralBlock+, +PassBlock+, +StemBlock+. The visitor returns them
17
+ # unchanged so the rewriter never sees link-shaped text that is, in
18
+ # fact, raw code/math.
19
+ #
20
+ # Dispatch is class-based (no +respond_to?+ duck-typing). Unrecognized
21
+ # classes are returned unchanged — the visitor is closed by design.
22
+ class Visitor
23
+ # Verbatim block classes — content is raw, no link semantics.
24
+ VERBATIM_TYPES = [
25
+ Coradoc::CoreModel::SourceBlock,
26
+ Coradoc::CoreModel::ListingBlock,
27
+ Coradoc::CoreModel::LiteralBlock,
28
+ Coradoc::CoreModel::PassBlock,
29
+ Coradoc::CoreModel::StemBlock
30
+ ].freeze
31
+
32
+ # Structural/container classes that own a child collection. Each
33
+ # entry maps the class to the reader method that exposes its
34
+ # children. MECE — every "recurse into the children" case lands
35
+ # in this table. Exposed as a public constant so spec helpers
36
+ # and other visitors can mirror the same paths without restating
37
+ # the dispatch (DRY).
38
+ CONTAINER_TYPES = {
39
+ Coradoc::CoreModel::DocumentElement => :children,
40
+ Coradoc::CoreModel::SectionElement => :children,
41
+ Coradoc::CoreModel::PreambleElement => :children,
42
+ Coradoc::CoreModel::HeaderElement => :children,
43
+ Coradoc::CoreModel::Block => :children,
44
+ Coradoc::CoreModel::ListBlock => :items,
45
+ Coradoc::CoreModel::ListItem => :children,
46
+ Coradoc::CoreModel::Table => :rows,
47
+ Coradoc::CoreModel::TableRow => :cells,
48
+ Coradoc::CoreModel::TableCell => :children,
49
+ Coradoc::CoreModel::DefinitionList => :items,
50
+ Coradoc::CoreModel::Toc => :entries,
51
+ Coradoc::CoreModel::Bibliography => :entries,
52
+ Coradoc::CoreModel::AnnotationBlock => :children
53
+ }.freeze
54
+
55
+ def initialize(rewriter)
56
+ @rewriter = rewriter
57
+ end
58
+
59
+ # Entry point. Always returns a NEW root node — even Identity
60
+ # callers can rely on object identity to confirm the rewrite ran.
61
+ def visit_document(document)
62
+ return document unless document.is_a?(Coradoc::CoreModel::Base)
63
+
64
+ result = visit_subtree(document)
65
+ result.equal?(document) ? document.dup : result
66
+ end
67
+
68
+ private
69
+
70
+ def visit_subtree(node)
71
+ return node if VERBATIM_TYPES.any? { |type| node.is_a?(type) }
72
+ return rewrite_inline(node) if node.is_a?(Coradoc::CoreModel::InlineElement)
73
+
74
+ reader = reader_for(node)
75
+ return node unless reader
76
+
77
+ rewrite_collection(node, reader)
78
+ end
79
+
80
+ # Look up the children-reader method for +node+. Returns nil for
81
+ # unrecognized classes (no duck-typing — the CONTAINER_TYPES
82
+ # table is the single source of truth).
83
+ def reader_for(node)
84
+ CONTAINER_TYPES.each do |klass, reader|
85
+ return reader if node.is_a?(klass)
86
+ end
87
+ nil
88
+ end
89
+
90
+ def rewrite_collection(node, attr_name)
91
+ original = node.public_send(attr_name)
92
+ return node if original.nil? || original.empty?
93
+
94
+ rewritten = original.map { |child| visit_subtree(child) }
95
+ return node if unchanged?(rewritten, original)
96
+
97
+ rebuild_with(node, attr_name => rewritten)
98
+ end
99
+
100
+ # Inline dispatch. Typed LinkElement / CrossReferenceElement are
101
+ # always candidates; generic InlineElement instances must declare a
102
+ # matching format_type. Other typed subclasses (Bold, Italic, …)
103
+ # are walked for nested inlines instead of being rewritten.
104
+ def rewrite_inline(inline)
105
+ kind = inline.link_kind
106
+
107
+ rewritten_target = rewrite_target(inline, kind)
108
+ rewritten_nested = rewrite_nested(inline)
109
+
110
+ return inline if rewritten_target.nil? && rewritten_nested.nil?
111
+
112
+ overrides = {}
113
+ overrides[:target] = rewritten_target unless rewritten_target.nil?
114
+ overrides[:nested_elements] = rewritten_nested unless rewritten_nested.nil?
115
+ rebuild_with(inline, overrides)
116
+ end
117
+
118
+ def rewrite_target(inline, kind)
119
+ return nil unless kind
120
+
121
+ target = inline.target
122
+ return nil if target.nil? || target.empty?
123
+
124
+ new_target = @rewriter.call(
125
+ target: target,
126
+ kind: kind,
127
+ context: { in_verbatim: false }
128
+ )
129
+ return nil if new_target == target
130
+
131
+ new_target
132
+ end
133
+
134
+ def rewrite_nested(inline)
135
+ nested = inline.nested_elements
136
+ return nil if nested.nil? || nested.empty?
137
+
138
+ rewritten = nested.map { |child| visit_subtree(child) }
139
+ return nil if unchanged?(rewritten, nested)
140
+
141
+ rewritten
142
+ end
143
+
144
+ def rebuild_with(node, overrides)
145
+ duplicate = node.dup
146
+ overrides.each { |key, value| duplicate.public_send("#{key}=", value) }
147
+ duplicate
148
+ end
149
+
150
+ def unchanged?(rewritten, original)
151
+ return false unless rewritten.length == original.length
152
+
153
+ rewritten.each_with_index.all? { |node, i| node.equal?(original[i]) }
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Post-parse link/xref rewriting.
5
+ #
6
+ # Consumers that need to canonicalize link and xref targets (snake→kebab,
7
+ # strip +.adoc+, redirect maps, dialect translation) get a single
8
+ # immutable entry point: +Coradoc.rewrite_links(doc, rewriter:, &)+.
9
+ # The visitor walks the parsed CoreModel, invokes the supplied rewriter
10
+ # for every link/xref target, and returns a NEW document. Verbatim
11
+ # blocks (source, listing, literal, pass, stem) are skipped entirely —
12
+ # coradoc owns the parse and guarantees those bodies never reach the
13
+ # rewriter, removing the "track parser state to avoid verbatim bodies"
14
+ # footgun that plagues regex-based rewriting.
15
+ #
16
+ # Two-step API mirrors +Coradoc.resolve_includes+: parse produces the
17
+ # document, rewrite is a separate explicit step the caller controls.
18
+ module LinkRewriter
19
+ autoload :Identity, "#{__dir__}/link_rewriter/identity"
20
+ autoload :Visitor, "#{__dir__}/link_rewriter/visitor"
21
+
22
+ class << self
23
+ # Rewrite every link/xref target in +doc+.
24
+ #
25
+ # +rewriter+ responds to +#call(target:, kind:, context:)+ and returns
26
+ # the new target String. If a block is given it is used as the
27
+ # rewriter. Omitting both falls back to {Identity} (no-op) — useful
28
+ # for "give me a structurally identical copy" cases.
29
+ #
30
+ # Returns a NEW document; the input is never mutated.
31
+ def rewrite(doc, rewriter: nil, &block)
32
+ callable = rewriter || block || Identity.new
33
+ Visitor.new(callable).visit_document(doc)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ # Pure-function module for relative-path arithmetic across output keys.
5
+ #
6
+ # Every SSG wrapper (VitePress, Hugo, Astro) needs to compute "how many
7
+ # directories up do I walk to reach the site root from this output?".
8
+ # The answer is the segment count of the source output_key. Everything
9
+ # else (template, imports) is host-system-specific; this module owns
10
+ # only the one piece of arithmetic that is genuinely shared.
11
+ #
12
+ # No state. No class. No knowledge of any specific SSG.
13
+ #
14
+ # @example Compute a VitePress import path
15
+ # Coradoc::RelativePath.from("author/iso/ref/foo", to: ".vitepress/theme")
16
+ # # => "../../../.vitepress/theme"
17
+ module RelativePath
18
+ module_function
19
+
20
+ # Compute a relative path from an output_key to a site-root-relative
21
+ # target.
22
+ #
23
+ # @param output_key [String, nil] site-relative key for the source
24
+ # page (e.g. "author/iso/ref/foo"). No leading slash, no extension.
25
+ # @param to [String] destination path relative to the site root.
26
+ # @return [String] the composed relative path.
27
+ def from(output_key, to:)
28
+ depth = output_key.to_s.count('/')
29
+ ('../' * depth) + to.to_s
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = '2.0.22'
4
+ VERSION = '2.0.23'
5
5
  end
data/lib/coradoc.rb CHANGED
@@ -43,4 +43,6 @@ module Coradoc
43
43
  autoload :DocumentManipulator, 'coradoc/document_manipulator'
44
44
  autoload :Visitor, 'coradoc/visitor'
45
45
  autoload :Serializer, 'coradoc/serializer/registry'
46
+ autoload :LinkRewriter, 'coradoc/link_rewriter'
47
+ autoload :RelativePath, 'coradoc/relative_path'
46
48
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.22
4
+ version: 2.0.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -90,6 +90,7 @@ files:
90
90
  - lib/coradoc/core_model/literal_block.rb
91
91
  - lib/coradoc/core_model/metadata.rb
92
92
  - lib/coradoc/core_model/open_block.rb
93
+ - lib/coradoc/core_model/output_artifact.rb
93
94
  - lib/coradoc/core_model/paragraph_block.rb
94
95
  - lib/coradoc/core_model/pass_block.rb
95
96
  - lib/coradoc/core_model/quote_block.rb
@@ -97,6 +98,7 @@ files:
97
98
  - lib/coradoc/core_model/reviewer_block.rb
98
99
  - lib/coradoc/core_model/sidebar_block.rb
99
100
  - lib/coradoc/core_model/source_block.rb
101
+ - lib/coradoc/core_model/stem_block.rb
100
102
  - lib/coradoc/core_model/structural_element.rb
101
103
  - lib/coradoc/core_model/table.rb
102
104
  - lib/coradoc/core_model/term.rb
@@ -117,12 +119,16 @@ files:
117
119
  - lib/coradoc/include_selectors/lines.rb
118
120
  - lib/coradoc/include_selectors/tags.rb
119
121
  - lib/coradoc/input.rb
122
+ - lib/coradoc/link_rewriter.rb
123
+ - lib/coradoc/link_rewriter/identity.rb
124
+ - lib/coradoc/link_rewriter/visitor.rb
120
125
  - lib/coradoc/logger.rb
121
126
  - lib/coradoc/output.rb
122
127
  - lib/coradoc/performance_regression.rb
123
128
  - lib/coradoc/processor_registry.rb
124
129
  - lib/coradoc/query.rb
125
130
  - lib/coradoc/registry.rb
131
+ - lib/coradoc/relative_path.rb
126
132
  - lib/coradoc/resolve_includes.rb
127
133
  - lib/coradoc/serializer/registry.rb
128
134
  - lib/coradoc/transform.rb