coradoc 2.0.22 → 2.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/coradoc.rb +27 -0
- data/lib/coradoc/core_model/base.rb +39 -0
- data/lib/coradoc/core_model/comment_block.rb +4 -0
- data/lib/coradoc/core_model/comment_line.rb +4 -0
- data/lib/coradoc/core_model/frontmatter/codec.rb +66 -19
- data/lib/coradoc/core_model/frontmatter.rb +4 -0
- data/lib/coradoc/core_model/inline_element.rb +20 -0
- data/lib/coradoc/core_model/list_block.rb +17 -0
- data/lib/coradoc/core_model/list_item.rb +21 -0
- data/lib/coradoc/core_model/output_artifact.rb +48 -0
- data/lib/coradoc/core_model/paragraph_block.rb +4 -0
- data/lib/coradoc/core_model/stem_block.rb +21 -0
- data/lib/coradoc/core_model/structural_element.rb +41 -0
- data/lib/coradoc/core_model/text_content.rb +4 -0
- data/lib/coradoc/core_model.rb +2 -0
- data/lib/coradoc/link_rewriter/identity.rb +13 -0
- data/lib/coradoc/link_rewriter/visitor.rb +157 -0
- data/lib/coradoc/link_rewriter.rb +37 -0
- data/lib/coradoc/relative_path.rb +32 -0
- data/lib/coradoc/version.rb +1 -1
- data/lib/coradoc.rb +2 -0
- metadata +7 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7370b3dabd4535faf0592e455d0bfd13c582b37941c411a0c4c7b86528843896
|
|
4
|
+
data.tar.gz: c61e126d88a2dc69c507657201b9b3e7875a7d6beffac957d5b805a81ffcc6d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 548b69fbd08a23cabf10e60ed599bafb904c87e857678bd539f820fc824ff2d407761728c10a2c6be2359a40b8d6973211adaf7c41171e7928c83a8816f43360
|
|
7
|
+
data.tar.gz: 04e7b4f63d935208df375f2ecf979104db5b0b7de9e95b6c04d77939d1a293114f2c285ccea7cc7a7e0709e364d7fe1c58d2038e2b6adc923591421be4d2ade8
|
data/lib/coradoc/coradoc.rb
CHANGED
|
@@ -170,6 +170,33 @@ module Coradoc
|
|
|
170
170
|
)
|
|
171
171
|
end
|
|
172
172
|
|
|
173
|
+
# Rewrite every link/xref target in a parsed document.
|
|
174
|
+
#
|
|
175
|
+
# Walks the document tree and invokes the supplied rewriter for each
|
|
176
|
+
# link and cross-reference target. The original document is never
|
|
177
|
+
# mutated — a NEW document is returned.
|
|
178
|
+
#
|
|
179
|
+
# Verbatim blocks (+SourceBlock+, +ListingBlock+, +LiteralBlock+,
|
|
180
|
+
# +PassBlock+, +StemBlock+) are skipped entirely so link-shaped text
|
|
181
|
+
# inside code/math bodies is never rewritten.
|
|
182
|
+
#
|
|
183
|
+
# The rewriter responds to +#call(target:, kind:, context:)+ and
|
|
184
|
+
# returns the new target String. +kind+ is +:link+ or +:xref+; the
|
|
185
|
+
# block form is supported for one-liners.
|
|
186
|
+
#
|
|
187
|
+
# @param document [Coradoc::CoreModel::Base] parsed document
|
|
188
|
+
# @param rewriter [#call, nil] callable rewriter; ignored when a block is given
|
|
189
|
+
# @return [Coradoc::CoreModel::Base] new document with rewritten targets
|
|
190
|
+
#
|
|
191
|
+
# @example Canonicalize snake_case targets to kebab-case
|
|
192
|
+
# doc = Coradoc.parse(adoc, format: :asciidoc)
|
|
193
|
+
# rewritten = Coradoc.rewrite_links(doc) do |target:, kind:, **|
|
|
194
|
+
# target.tr('_', '-')
|
|
195
|
+
# end
|
|
196
|
+
def rewrite_links(document, rewriter: nil, &block)
|
|
197
|
+
Coradoc::LinkRewriter.rewrite(document, rewriter: rewriter, &block)
|
|
198
|
+
end
|
|
199
|
+
|
|
173
200
|
# Convert document text from one format to another
|
|
174
201
|
#
|
|
175
202
|
# This is the main entry point for format conversion. It handles the
|
|
@@ -41,6 +41,28 @@ module Coradoc
|
|
|
41
41
|
# @return [Array<MetadataEntry>] additional metadata entries
|
|
42
42
|
attribute :metadata_entries, MetadataEntry, collection: true
|
|
43
43
|
|
|
44
|
+
# Construct an instance and yield it for in-place mutation.
|
|
45
|
+
#
|
|
46
|
+
# This is the programmatic-construction entry point for CoreModel
|
|
47
|
+
# nodes. It calls +new+ exactly as a caller would, then yields
|
|
48
|
+
# the resulting instance for append-style construction. No new
|
|
49
|
+
# class hierarchy, no +method_missing+ — the block operates on
|
|
50
|
+
# the real model object.
|
|
51
|
+
#
|
|
52
|
+
# Per-class fluent helpers (e.g., +ListBlock#add_item+,
|
|
53
|
+
# +ListItem#add_text+) compose naturally with +build+:
|
|
54
|
+
#
|
|
55
|
+
# list = ListBlock.build do |ul|
|
|
56
|
+
# children.each { |c| ul.add_item { |li| li.add_link(c[:slug], text: c[:title]) } }
|
|
57
|
+
# end
|
|
58
|
+
#
|
|
59
|
+
# Without a block, +build(**attrs)+ is identical to +new(**attrs)+.
|
|
60
|
+
def self.build(**attrs)
|
|
61
|
+
instance = new(**attrs)
|
|
62
|
+
yield instance if block_given?
|
|
63
|
+
instance
|
|
64
|
+
end
|
|
65
|
+
|
|
44
66
|
# Get all metadata as a hash, or a specific metadata value by key
|
|
45
67
|
# @overload metadata
|
|
46
68
|
# @return [Hash] All metadata as key-value pairs
|
|
@@ -145,6 +167,23 @@ module Coradoc
|
|
|
145
167
|
visitor.visit(self)
|
|
146
168
|
end
|
|
147
169
|
|
|
170
|
+
# True when this node counts as "real body content" for the
|
|
171
|
+
# purposes of empty-document detection and similar structural
|
|
172
|
+
# queries. Default is true; metadata and ephemeral nodes
|
|
173
|
+
# (FrontmatterBlock, CommentBlock, CommentLine) override to
|
|
174
|
+
# false. Polymorphic dispatch keeps the predicate open for
|
|
175
|
+
# future "skip-me" types — no central walker to edit (OCP).
|
|
176
|
+
def body_content?
|
|
177
|
+
true
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# True when this node is structurally present but carries no
|
|
181
|
+
# visible characters. Default is false; inline text and
|
|
182
|
+
# paragraph blocks override to inspect their text content.
|
|
183
|
+
def whitespace_only?
|
|
184
|
+
false
|
|
185
|
+
end
|
|
186
|
+
|
|
148
187
|
private
|
|
149
188
|
|
|
150
189
|
# List of attributes to compare for semantic equivalence
|
|
@@ -10,42 +10,89 @@ module Coradoc
|
|
|
10
10
|
# No other code in any gem may call YAML directly for frontmatter.
|
|
11
11
|
# This isolates permitted-classes configuration and error handling
|
|
12
12
|
# in one MECE location (DRY).
|
|
13
|
+
#
|
|
14
|
+
# The Codec emits flat YAML — values rendered with their natural
|
|
15
|
+
# YAML type. This is what Jekyll, Hugo, VitePress, VuePress, 11ty
|
|
16
|
+
# and every SSG expects: +title: Foo+ / +date: 2024-01-01+.
|
|
17
|
+
# Round-trip fidelity for typed values (Date, Time, Symbol) is
|
|
18
|
+
# preserved by Psych's permitted-classes mechanism, not by a
|
|
19
|
+
# custom discriminator scheme.
|
|
20
|
+
#
|
|
21
|
+
# For the typed-tree representation used by the coradoc-mirror JSON
|
|
22
|
+
# pipeline, see +Coradoc::Mirror::Node::FrontmatterValue+ and
|
|
23
|
+
# +Coradoc::Mirror::Handlers::Frontmatter+. The typed-tree concern
|
|
24
|
+
# lives in the mirror gem; this Codec stays focused on YAML.
|
|
13
25
|
module Codec
|
|
14
26
|
PERMITTED_CLASSES = [Date, Time, DateTime, Symbol].freeze
|
|
15
27
|
|
|
16
28
|
class << self
|
|
17
|
-
# Parse
|
|
18
|
-
#
|
|
19
|
-
#
|
|
29
|
+
# Parse YAML text into a FrontmatterBlock. Returns an empty
|
|
30
|
+
# FrontmatterBlock on malformed YAML or non-Hash payload.
|
|
31
|
+
# Logs a warning so the conversion pipeline can surface the
|
|
32
|
+
# skip rather than silently dropping user-authored content.
|
|
20
33
|
def from_yaml(yaml_text)
|
|
21
34
|
return FrontmatterBlock.new if yaml_text.nil? || yaml_text.strip.empty?
|
|
22
35
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
aliases: true
|
|
27
|
-
)
|
|
28
|
-
return FrontmatterBlock.new unless parsed.is_a?(Hash)
|
|
29
|
-
|
|
30
|
-
schema = parsed['$schema']
|
|
31
|
-
data = parsed.except('$schema')
|
|
32
|
-
FrontmatterBlock.new(schema: schema&.to_s, data: data)
|
|
33
|
-
rescue YAML::SyntaxError, Psych::DisallowedClass
|
|
36
|
+
build_from_loaded(load_yaml(yaml_text))
|
|
37
|
+
rescue YAML::SyntaxError, Psych::DisallowedClass => e
|
|
38
|
+
Coradoc::Logger.warn("frontmatter parse failed: #{e.message}")
|
|
34
39
|
FrontmatterBlock.new
|
|
35
40
|
end
|
|
36
41
|
|
|
42
|
+
# Build a FrontmatterBlock from a Ruby hash with native-typed
|
|
43
|
+
# values (String, Integer, Date, …). Returns an empty block
|
|
44
|
+
# for non-Hash input.
|
|
45
|
+
def from_hash(hash)
|
|
46
|
+
return FrontmatterBlock.new unless hash.is_a?(Hash)
|
|
47
|
+
|
|
48
|
+
build_from_loaded(hash)
|
|
49
|
+
end
|
|
50
|
+
|
|
37
51
|
# Serialize a FrontmatterBlock to canonical YAML text.
|
|
38
|
-
# Does NOT include leading/trailing
|
|
39
|
-
# wraps the output.
|
|
52
|
+
# Does NOT include leading/trailing +---+ delimiters; the
|
|
53
|
+
# caller wraps the output. Returns +''+ for empty blocks.
|
|
40
54
|
def to_yaml(block)
|
|
41
55
|
return '' unless block.is_a?(FrontmatterBlock)
|
|
42
56
|
|
|
57
|
+
payload = flat_tree(block)
|
|
58
|
+
return '' if payload.empty?
|
|
59
|
+
|
|
60
|
+
YAML.dump(payload).delete_prefix("---\n").delete_suffix("\n...")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Return the frontmatter as a native-typed Ruby hash.
|
|
64
|
+
# +$schema+ is included when present.
|
|
65
|
+
def to_hash(block)
|
|
66
|
+
return {} unless block.is_a?(FrontmatterBlock)
|
|
67
|
+
|
|
68
|
+
flat_tree(block)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def load_yaml(yaml_text)
|
|
74
|
+
YAML.safe_load(
|
|
75
|
+
yaml_text,
|
|
76
|
+
permitted_classes: PERMITTED_CLASSES,
|
|
77
|
+
aliases: true
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def build_from_loaded(loaded)
|
|
82
|
+
return FrontmatterBlock.new unless loaded.is_a?(Hash)
|
|
83
|
+
|
|
84
|
+
schema = loaded['$schema']
|
|
85
|
+
FrontmatterBlock.new(
|
|
86
|
+
schema: schema&.to_s,
|
|
87
|
+
data: loaded.except('$schema')
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def flat_tree(block)
|
|
43
92
|
tree = {}
|
|
44
93
|
tree['$schema'] = block.schema if block.schema
|
|
45
94
|
tree.merge!(block.data || {})
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
YAML.dump(tree).delete_prefix("---\n").delete_suffix("\n...")
|
|
95
|
+
tree
|
|
49
96
|
end
|
|
50
97
|
end
|
|
51
98
|
end
|
|
@@ -50,6 +50,10 @@ module Coradoc
|
|
|
50
50
|
schema.nil? && (data.nil? || data.empty?)
|
|
51
51
|
end
|
|
52
52
|
|
|
53
|
+
def body_content?
|
|
54
|
+
false
|
|
55
|
+
end
|
|
56
|
+
|
|
53
57
|
# Sub-namespaces (Codec, SchemaResolver, FieldTransform, TextSplitter)
|
|
54
58
|
# live under FrontmatterBlock and autoload lazily.
|
|
55
59
|
autoload :Codec, "#{__dir__}/frontmatter/codec"
|
|
@@ -24,6 +24,18 @@ module Coradoc
|
|
|
24
24
|
self.class.format_type || format_type
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
# Polymorphic classification used by LinkRewriter::Visitor. Returns
|
|
28
|
+
# :link / :xref when this node carries a rewrite-able target, nil
|
|
29
|
+
# otherwise. Generic InlineElement instances defer to their
|
|
30
|
+
# resolved format_type; typed subclasses override with a literal.
|
|
31
|
+
# Keeps the visitor free of class-keyed case/when (OCP).
|
|
32
|
+
def link_kind
|
|
33
|
+
case resolve_format_type
|
|
34
|
+
when 'link' then :link
|
|
35
|
+
when 'xref' then :xref
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
27
39
|
FORMAT_TYPES = %w[
|
|
28
40
|
bold italic monospace underline strikethrough
|
|
29
41
|
subscript superscript highlight
|
|
@@ -99,12 +111,20 @@ module Coradoc
|
|
|
99
111
|
def self.format_type
|
|
100
112
|
'link'
|
|
101
113
|
end
|
|
114
|
+
|
|
115
|
+
def link_kind
|
|
116
|
+
:link
|
|
117
|
+
end
|
|
102
118
|
end
|
|
103
119
|
|
|
104
120
|
class CrossReferenceElement < InlineElement
|
|
105
121
|
def self.format_type
|
|
106
122
|
'xref'
|
|
107
123
|
end
|
|
124
|
+
|
|
125
|
+
def link_kind
|
|
126
|
+
:xref
|
|
127
|
+
end
|
|
108
128
|
end
|
|
109
129
|
|
|
110
130
|
class StemElement < InlineElement
|
|
@@ -108,6 +108,23 @@ module Coradoc
|
|
|
108
108
|
# @return [Array<ListItem>] collection of list items
|
|
109
109
|
attribute :items, ListItem, collection: true
|
|
110
110
|
|
|
111
|
+
# -- Fluent construction helpers (paired with Base.build) --
|
|
112
|
+
|
|
113
|
+
# Append a new ListItem, built via ListItem.build. The block
|
|
114
|
+
# (if given) is yielded the new item so callers can chain
|
|
115
|
+
# +add_text+ / +add_link+ on it inline:
|
|
116
|
+
#
|
|
117
|
+
# ListBlock.build do |ul|
|
|
118
|
+
# children.each { |c| ul.add_item { |li| li.add_link(c[:slug], text: c[:title]) } }
|
|
119
|
+
# end
|
|
120
|
+
#
|
|
121
|
+
# Returns self for chaining at the list level.
|
|
122
|
+
def add_item(marker: self.marker_type == 'ordered' ? '.' : '*')
|
|
123
|
+
item = ListItem.build(marker: marker) { |li| yield li if block_given? }
|
|
124
|
+
self.items = Array(items) + [item]
|
|
125
|
+
self
|
|
126
|
+
end
|
|
127
|
+
|
|
111
128
|
private
|
|
112
129
|
|
|
113
130
|
# Attributes to compare for semantic equivalence
|
|
@@ -109,6 +109,27 @@ module Coradoc
|
|
|
109
109
|
true
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
+
# -- Fluent construction helpers (paired with Base.build) --
|
|
113
|
+
|
|
114
|
+
# Append a plain-text inline to this item's children. Returns
|
|
115
|
+
# self for chaining. Pairs naturally with ListItem.build:
|
|
116
|
+
#
|
|
117
|
+
# ListItem.build do |li|
|
|
118
|
+
# li.add_text("See ")
|
|
119
|
+
# li.add_link("foo.adoc", text: "Foo")
|
|
120
|
+
# end
|
|
121
|
+
def add_text(text)
|
|
122
|
+
self.children = Array(children) + [TextContent.new(text: text)]
|
|
123
|
+
self
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Append a +link:+ inline to this item's children. +text+ becomes
|
|
127
|
+
# the link's visible label; +target+ is the URL/anchor.
|
|
128
|
+
def add_link(target, text: nil)
|
|
129
|
+
self.children = Array(children) + [LinkElement.new(target: target, content: text)]
|
|
130
|
+
self
|
|
131
|
+
end
|
|
132
|
+
|
|
112
133
|
private
|
|
113
134
|
|
|
114
135
|
# Compare two list blocks for equivalence
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# Output-side state object for host-system emitters (VitePress, Hugo,
|
|
6
|
+
# Astro, plain ERB, etc.).
|
|
7
|
+
#
|
|
8
|
+
# coradoc's source side has +IncludeResolver::Filesystem+ to resolve
|
|
9
|
+
# include targets without coupling to a storage layer. The output side
|
|
10
|
+
# has no analogous state object — until now. +OutputArtifact+ captures
|
|
11
|
+
# the three pieces of state coradoc genuinely needs to hand to a
|
|
12
|
+
# downstream emitter:
|
|
13
|
+
#
|
|
14
|
+
# - +output_key+ — site-relative key (e.g. "author/iso/ref/foo")
|
|
15
|
+
# - +frontmatter_block+ — parsed YAML frontmatter (may be empty)
|
|
16
|
+
# - +core_document+ — the canonical CoreModel document
|
|
17
|
+
#
|
|
18
|
+
# The consumer takes these and renders whatever wrapper it needs in
|
|
19
|
+
# its host system's native template language. coradoc does not know
|
|
20
|
+
# about VitePress, ERB, or Liquid. Symmetric with the source side:
|
|
21
|
+
# minimal protocol object, not an engine.
|
|
22
|
+
#
|
|
23
|
+
# A mirror-tree document is deliberately NOT bundled here. coradoc
|
|
24
|
+
# core has no runtime dependency on coradoc-mirror; consumers that
|
|
25
|
+
# target the mirror JSON pipeline pair an +OutputArtifact+ with a
|
|
26
|
+
# separately-computed +Coradoc::Mirror.transform(core)+ result.
|
|
27
|
+
class OutputArtifact < Base
|
|
28
|
+
# @!attribute output_key
|
|
29
|
+
# @return [String, nil] site-relative key with no leading slash
|
|
30
|
+
# and no trailing extension. SSGs map this to their URL space.
|
|
31
|
+
attribute :output_key, :string
|
|
32
|
+
|
|
33
|
+
# @!attribute frontmatter_block
|
|
34
|
+
# @return [FrontmatterBlock, nil] parsed YAML frontmatter
|
|
35
|
+
attribute :frontmatter_block, FrontmatterBlock
|
|
36
|
+
|
|
37
|
+
# @!attribute core_document
|
|
38
|
+
# @return [DocumentElement, nil] canonical CoreModel document
|
|
39
|
+
attribute :core_document, DocumentElement
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def comparable_attributes
|
|
44
|
+
%i[output_key frontmatter_block core_document]
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# STEM block — mathematical/scientific content authored in LaTeX,
|
|
6
|
+
# AsciiMath, or another STEM markup. Carries a +language+ attribute so
|
|
7
|
+
# downstream renderers know which interpreter to invoke.
|
|
8
|
+
#
|
|
9
|
+
# AsciiDoc surface forms:
|
|
10
|
+
# [stem]\n++++\nx^2\n++++ # language: "latex" (default)
|
|
11
|
+
# [latexmath]\n++++\nx^2\n++++ # language: "latex"
|
|
12
|
+
# [asciimath]\n++++\nx^2\n++++ # language: "asciimath"
|
|
13
|
+
class StemBlock < Block
|
|
14
|
+
attribute :language, :string, default: -> { 'latex' }
|
|
15
|
+
|
|
16
|
+
def self.semantic_type
|
|
17
|
+
:stem
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -52,6 +52,38 @@ module Coradoc
|
|
|
52
52
|
false
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
# Override in subclasses that carry document-title semantics.
|
|
56
|
+
# HeaderElement at level 0 represents the document title (`= Title`
|
|
57
|
+
# in AsciiDoc). Consumers that walk the body — TOC builders,
|
|
58
|
+
# section numbering — skip these so the title is not counted as
|
|
59
|
+
# "section 1". Polymorphic dispatch (vs. an is_a? guard at the
|
|
60
|
+
# call site) keeps the predicate open for future subclasses.
|
|
61
|
+
def document_title?
|
|
62
|
+
false
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Children that count as body content and aren't whitespace-only.
|
|
66
|
+
# Derived from per-node {#body_content?} and {#whitespace_only?}
|
|
67
|
+
# predicates — no central walker, no is_a? switch to maintain.
|
|
68
|
+
def visible_children
|
|
69
|
+
Array(children).select(&:body_content?).reject(&:whitespace_only?)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# True when the body has no visible content anywhere in its subtree.
|
|
73
|
+
# A document with only frontmatter + comments returns true; a
|
|
74
|
+
# document with one non-whitespace paragraph returns false.
|
|
75
|
+
def empty_body?
|
|
76
|
+
return true if children.nil? || children.empty?
|
|
77
|
+
|
|
78
|
+
children.all? do |child|
|
|
79
|
+
next true unless child.body_content?
|
|
80
|
+
next true if child.whitespace_only?
|
|
81
|
+
next child.empty_body? if child.is_a?(StructuralElement)
|
|
82
|
+
|
|
83
|
+
false
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
55
87
|
# Derived element_type string for backward compatibility with
|
|
56
88
|
# templates and legacy consumers. Subclasses override this.
|
|
57
89
|
def element_type
|
|
@@ -108,6 +140,15 @@ module Coradoc
|
|
|
108
140
|
true
|
|
109
141
|
end
|
|
110
142
|
|
|
143
|
+
# A level-0 HeaderElement represents the document title (the `= Title`
|
|
144
|
+
# line in AsciiDoc, the `<h1>` in HTML). It is structurally part of
|
|
145
|
+
# the body but semantically the document's title, not a section —
|
|
146
|
+
# section numbering, TOC builders, and other section-aware logic
|
|
147
|
+
# skip these so the title is not counted as "section 1".
|
|
148
|
+
def document_title?
|
|
149
|
+
level.to_i.zero?
|
|
150
|
+
end
|
|
151
|
+
|
|
111
152
|
def self.element_type_name
|
|
112
153
|
'header'
|
|
113
154
|
end
|
data/lib/coradoc/core_model.rb
CHANGED
|
@@ -70,6 +70,7 @@ module Coradoc
|
|
|
70
70
|
autoload :SidebarBlock, "#{__dir__}/core_model/sidebar_block"
|
|
71
71
|
autoload :LiteralBlock, "#{__dir__}/core_model/literal_block"
|
|
72
72
|
autoload :PassBlock, "#{__dir__}/core_model/pass_block"
|
|
73
|
+
autoload :StemBlock, "#{__dir__}/core_model/stem_block"
|
|
73
74
|
autoload :ListingBlock, "#{__dir__}/core_model/listing_block"
|
|
74
75
|
autoload :OpenBlock, "#{__dir__}/core_model/open_block"
|
|
75
76
|
autoload :VerseBlock, "#{__dir__}/core_model/verse_block"
|
|
@@ -82,5 +83,6 @@ module Coradoc
|
|
|
82
83
|
autoload :Include, "#{__dir__}/core_model/include"
|
|
83
84
|
autoload :IncludeOptions, "#{__dir__}/core_model/include_options"
|
|
84
85
|
autoload :IncludeLevelOffset, "#{__dir__}/core_model/include_level_offset"
|
|
86
|
+
autoload :OutputArtifact, "#{__dir__}/core_model/output_artifact"
|
|
85
87
|
end
|
|
86
88
|
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module LinkRewriter
|
|
5
|
+
# Default no-op rewriter. Returns every target unchanged. Used when
|
|
6
|
+
# the caller only wants the visitor's immutable-copy guarantee.
|
|
7
|
+
class Identity
|
|
8
|
+
def call(target:, **)
|
|
9
|
+
target
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module LinkRewriter
|
|
5
|
+
# Focused rewriting visitor for the CoreModel tree.
|
|
6
|
+
#
|
|
7
|
+
# The set of node types that carry link/xref targets is closed and
|
|
8
|
+
# small: +LinkElement+, +CrossReferenceElement+, plus generic
|
|
9
|
+
# +InlineElement+ instances whose +format_type+ is +'link'+ or
|
|
10
|
+
# +'xref'+. That classification is owned polymorphically by
|
|
11
|
+
# +InlineElement#link_kind+ — the visitor just calls it. Adding a
|
|
12
|
+
# new link-bearing subclass means overriding +link_kind+ on it,
|
|
13
|
+
# not editing a case/when here (OCP).
|
|
14
|
+
#
|
|
15
|
+
# Verbatim block types are also closed: +SourceBlock+, +ListingBlock+,
|
|
16
|
+
# +LiteralBlock+, +PassBlock+, +StemBlock+. The visitor returns them
|
|
17
|
+
# unchanged so the rewriter never sees link-shaped text that is, in
|
|
18
|
+
# fact, raw code/math.
|
|
19
|
+
#
|
|
20
|
+
# Dispatch is class-based (no +respond_to?+ duck-typing). Unrecognized
|
|
21
|
+
# classes are returned unchanged — the visitor is closed by design.
|
|
22
|
+
class Visitor
|
|
23
|
+
# Verbatim block classes — content is raw, no link semantics.
|
|
24
|
+
VERBATIM_TYPES = [
|
|
25
|
+
Coradoc::CoreModel::SourceBlock,
|
|
26
|
+
Coradoc::CoreModel::ListingBlock,
|
|
27
|
+
Coradoc::CoreModel::LiteralBlock,
|
|
28
|
+
Coradoc::CoreModel::PassBlock,
|
|
29
|
+
Coradoc::CoreModel::StemBlock
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
32
|
+
# Structural/container classes that own a child collection. Each
|
|
33
|
+
# entry maps the class to the reader method that exposes its
|
|
34
|
+
# children. MECE — every "recurse into the children" case lands
|
|
35
|
+
# in this table. Exposed as a public constant so spec helpers
|
|
36
|
+
# and other visitors can mirror the same paths without restating
|
|
37
|
+
# the dispatch (DRY).
|
|
38
|
+
CONTAINER_TYPES = {
|
|
39
|
+
Coradoc::CoreModel::DocumentElement => :children,
|
|
40
|
+
Coradoc::CoreModel::SectionElement => :children,
|
|
41
|
+
Coradoc::CoreModel::PreambleElement => :children,
|
|
42
|
+
Coradoc::CoreModel::HeaderElement => :children,
|
|
43
|
+
Coradoc::CoreModel::Block => :children,
|
|
44
|
+
Coradoc::CoreModel::ListBlock => :items,
|
|
45
|
+
Coradoc::CoreModel::ListItem => :children,
|
|
46
|
+
Coradoc::CoreModel::Table => :rows,
|
|
47
|
+
Coradoc::CoreModel::TableRow => :cells,
|
|
48
|
+
Coradoc::CoreModel::TableCell => :children,
|
|
49
|
+
Coradoc::CoreModel::DefinitionList => :items,
|
|
50
|
+
Coradoc::CoreModel::Toc => :entries,
|
|
51
|
+
Coradoc::CoreModel::Bibliography => :entries,
|
|
52
|
+
Coradoc::CoreModel::AnnotationBlock => :children
|
|
53
|
+
}.freeze
|
|
54
|
+
|
|
55
|
+
def initialize(rewriter)
|
|
56
|
+
@rewriter = rewriter
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Entry point. Always returns a NEW root node — even Identity
|
|
60
|
+
# callers can rely on object identity to confirm the rewrite ran.
|
|
61
|
+
def visit_document(document)
|
|
62
|
+
return document unless document.is_a?(Coradoc::CoreModel::Base)
|
|
63
|
+
|
|
64
|
+
result = visit_subtree(document)
|
|
65
|
+
result.equal?(document) ? document.dup : result
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def visit_subtree(node)
|
|
71
|
+
return node if VERBATIM_TYPES.any? { |type| node.is_a?(type) }
|
|
72
|
+
return rewrite_inline(node) if node.is_a?(Coradoc::CoreModel::InlineElement)
|
|
73
|
+
|
|
74
|
+
reader = reader_for(node)
|
|
75
|
+
return node unless reader
|
|
76
|
+
|
|
77
|
+
rewrite_collection(node, reader)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Look up the children-reader method for +node+. Returns nil for
|
|
81
|
+
# unrecognized classes (no duck-typing — the CONTAINER_TYPES
|
|
82
|
+
# table is the single source of truth).
|
|
83
|
+
def reader_for(node)
|
|
84
|
+
CONTAINER_TYPES.each do |klass, reader|
|
|
85
|
+
return reader if node.is_a?(klass)
|
|
86
|
+
end
|
|
87
|
+
nil
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def rewrite_collection(node, attr_name)
|
|
91
|
+
original = node.public_send(attr_name)
|
|
92
|
+
return node if original.nil? || original.empty?
|
|
93
|
+
|
|
94
|
+
rewritten = original.map { |child| visit_subtree(child) }
|
|
95
|
+
return node if unchanged?(rewritten, original)
|
|
96
|
+
|
|
97
|
+
rebuild_with(node, attr_name => rewritten)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Inline dispatch. Typed LinkElement / CrossReferenceElement are
|
|
101
|
+
# always candidates; generic InlineElement instances must declare a
|
|
102
|
+
# matching format_type. Other typed subclasses (Bold, Italic, …)
|
|
103
|
+
# are walked for nested inlines instead of being rewritten.
|
|
104
|
+
def rewrite_inline(inline)
|
|
105
|
+
kind = inline.link_kind
|
|
106
|
+
|
|
107
|
+
rewritten_target = rewrite_target(inline, kind)
|
|
108
|
+
rewritten_nested = rewrite_nested(inline)
|
|
109
|
+
|
|
110
|
+
return inline if rewritten_target.nil? && rewritten_nested.nil?
|
|
111
|
+
|
|
112
|
+
overrides = {}
|
|
113
|
+
overrides[:target] = rewritten_target unless rewritten_target.nil?
|
|
114
|
+
overrides[:nested_elements] = rewritten_nested unless rewritten_nested.nil?
|
|
115
|
+
rebuild_with(inline, overrides)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def rewrite_target(inline, kind)
|
|
119
|
+
return nil unless kind
|
|
120
|
+
|
|
121
|
+
target = inline.target
|
|
122
|
+
return nil if target.nil? || target.empty?
|
|
123
|
+
|
|
124
|
+
new_target = @rewriter.call(
|
|
125
|
+
target: target,
|
|
126
|
+
kind: kind,
|
|
127
|
+
context: { in_verbatim: false }
|
|
128
|
+
)
|
|
129
|
+
return nil if new_target == target
|
|
130
|
+
|
|
131
|
+
new_target
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def rewrite_nested(inline)
|
|
135
|
+
nested = inline.nested_elements
|
|
136
|
+
return nil if nested.nil? || nested.empty?
|
|
137
|
+
|
|
138
|
+
rewritten = nested.map { |child| visit_subtree(child) }
|
|
139
|
+
return nil if unchanged?(rewritten, nested)
|
|
140
|
+
|
|
141
|
+
rewritten
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def rebuild_with(node, overrides)
|
|
145
|
+
duplicate = node.dup
|
|
146
|
+
overrides.each { |key, value| duplicate.public_send("#{key}=", value) }
|
|
147
|
+
duplicate
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def unchanged?(rewritten, original)
|
|
151
|
+
return false unless rewritten.length == original.length
|
|
152
|
+
|
|
153
|
+
rewritten.each_with_index.all? { |node, i| node.equal?(original[i]) }
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
# Post-parse link/xref rewriting.
|
|
5
|
+
#
|
|
6
|
+
# Consumers that need to canonicalize link and xref targets (snake→kebab,
|
|
7
|
+
# strip +.adoc+, redirect maps, dialect translation) get a single
|
|
8
|
+
# immutable entry point: +Coradoc.rewrite_links(doc, rewriter:, &)+.
|
|
9
|
+
# The visitor walks the parsed CoreModel, invokes the supplied rewriter
|
|
10
|
+
# for every link/xref target, and returns a NEW document. Verbatim
|
|
11
|
+
# blocks (source, listing, literal, pass, stem) are skipped entirely —
|
|
12
|
+
# coradoc owns the parse and guarantees those bodies never reach the
|
|
13
|
+
# rewriter, removing the "track parser state to avoid verbatim bodies"
|
|
14
|
+
# footgun that plagues regex-based rewriting.
|
|
15
|
+
#
|
|
16
|
+
# Two-step API mirrors +Coradoc.resolve_includes+: parse produces the
|
|
17
|
+
# document, rewrite is a separate explicit step the caller controls.
|
|
18
|
+
module LinkRewriter
|
|
19
|
+
autoload :Identity, "#{__dir__}/link_rewriter/identity"
|
|
20
|
+
autoload :Visitor, "#{__dir__}/link_rewriter/visitor"
|
|
21
|
+
|
|
22
|
+
class << self
|
|
23
|
+
# Rewrite every link/xref target in +doc+.
|
|
24
|
+
#
|
|
25
|
+
# +rewriter+ responds to +#call(target:, kind:, context:)+ and returns
|
|
26
|
+
# the new target String. If a block is given it is used as the
|
|
27
|
+
# rewriter. Omitting both falls back to {Identity} (no-op) — useful
|
|
28
|
+
# for "give me a structurally identical copy" cases.
|
|
29
|
+
#
|
|
30
|
+
# Returns a NEW document; the input is never mutated.
|
|
31
|
+
def rewrite(doc, rewriter: nil, &block)
|
|
32
|
+
callable = rewriter || block || Identity.new
|
|
33
|
+
Visitor.new(callable).visit_document(doc)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
# Pure-function module for relative-path arithmetic across output keys.
|
|
5
|
+
#
|
|
6
|
+
# Every SSG wrapper (VitePress, Hugo, Astro) needs to compute "how many
|
|
7
|
+
# directories up do I walk to reach the site root from this output?".
|
|
8
|
+
# The answer is the segment count of the source output_key. Everything
|
|
9
|
+
# else (template, imports) is host-system-specific; this module owns
|
|
10
|
+
# only the one piece of arithmetic that is genuinely shared.
|
|
11
|
+
#
|
|
12
|
+
# No state. No class. No knowledge of any specific SSG.
|
|
13
|
+
#
|
|
14
|
+
# @example Compute a VitePress import path
|
|
15
|
+
# Coradoc::RelativePath.from("author/iso/ref/foo", to: ".vitepress/theme")
|
|
16
|
+
# # => "../../../.vitepress/theme"
|
|
17
|
+
module RelativePath
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
# Compute a relative path from an output_key to a site-root-relative
|
|
21
|
+
# target.
|
|
22
|
+
#
|
|
23
|
+
# @param output_key [String, nil] site-relative key for the source
|
|
24
|
+
# page (e.g. "author/iso/ref/foo"). No leading slash, no extension.
|
|
25
|
+
# @param to [String] destination path relative to the site root.
|
|
26
|
+
# @return [String] the composed relative path.
|
|
27
|
+
def from(output_key, to:)
|
|
28
|
+
depth = output_key.to_s.count('/')
|
|
29
|
+
('../' * depth) + to.to_s
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
data/lib/coradoc/version.rb
CHANGED
data/lib/coradoc.rb
CHANGED
|
@@ -43,4 +43,6 @@ module Coradoc
|
|
|
43
43
|
autoload :DocumentManipulator, 'coradoc/document_manipulator'
|
|
44
44
|
autoload :Visitor, 'coradoc/visitor'
|
|
45
45
|
autoload :Serializer, 'coradoc/serializer/registry'
|
|
46
|
+
autoload :LinkRewriter, 'coradoc/link_rewriter'
|
|
47
|
+
autoload :RelativePath, 'coradoc/relative_path'
|
|
46
48
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: coradoc
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.23
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -90,6 +90,7 @@ files:
|
|
|
90
90
|
- lib/coradoc/core_model/literal_block.rb
|
|
91
91
|
- lib/coradoc/core_model/metadata.rb
|
|
92
92
|
- lib/coradoc/core_model/open_block.rb
|
|
93
|
+
- lib/coradoc/core_model/output_artifact.rb
|
|
93
94
|
- lib/coradoc/core_model/paragraph_block.rb
|
|
94
95
|
- lib/coradoc/core_model/pass_block.rb
|
|
95
96
|
- lib/coradoc/core_model/quote_block.rb
|
|
@@ -97,6 +98,7 @@ files:
|
|
|
97
98
|
- lib/coradoc/core_model/reviewer_block.rb
|
|
98
99
|
- lib/coradoc/core_model/sidebar_block.rb
|
|
99
100
|
- lib/coradoc/core_model/source_block.rb
|
|
101
|
+
- lib/coradoc/core_model/stem_block.rb
|
|
100
102
|
- lib/coradoc/core_model/structural_element.rb
|
|
101
103
|
- lib/coradoc/core_model/table.rb
|
|
102
104
|
- lib/coradoc/core_model/term.rb
|
|
@@ -117,12 +119,16 @@ files:
|
|
|
117
119
|
- lib/coradoc/include_selectors/lines.rb
|
|
118
120
|
- lib/coradoc/include_selectors/tags.rb
|
|
119
121
|
- lib/coradoc/input.rb
|
|
122
|
+
- lib/coradoc/link_rewriter.rb
|
|
123
|
+
- lib/coradoc/link_rewriter/identity.rb
|
|
124
|
+
- lib/coradoc/link_rewriter/visitor.rb
|
|
120
125
|
- lib/coradoc/logger.rb
|
|
121
126
|
- lib/coradoc/output.rb
|
|
122
127
|
- lib/coradoc/performance_regression.rb
|
|
123
128
|
- lib/coradoc/processor_registry.rb
|
|
124
129
|
- lib/coradoc/query.rb
|
|
125
130
|
- lib/coradoc/registry.rb
|
|
131
|
+
- lib/coradoc/relative_path.rb
|
|
126
132
|
- lib/coradoc/resolve_includes.rb
|
|
127
133
|
- lib/coradoc/serializer/registry.rb
|
|
128
134
|
- lib/coradoc/transform.rb
|