coradoc 2.0.21 → 2.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/cli.rb +26 -1
- data/lib/coradoc/coradoc.rb +66 -8
- data/lib/coradoc/core_model/children_content.rb +5 -0
- data/lib/coradoc/core_model/frontmatter/frontmatter_value.rb +61 -0
- data/lib/coradoc/core_model/has_children.rb +23 -0
- data/lib/coradoc/core_model/include.rb +43 -0
- data/lib/coradoc/core_model/include_level_offset.rb +71 -0
- data/lib/coradoc/core_model/include_options.rb +100 -0
- data/lib/coradoc/core_model/structural_element.rb +5 -0
- data/lib/coradoc/core_model.rb +4 -0
- data/lib/coradoc/errors.rb +56 -0
- data/lib/coradoc/include_resolver/filesystem.rb +84 -0
- data/lib/coradoc/include_resolver.rb +67 -0
- data/lib/coradoc/include_selectors/indent.rb +54 -0
- data/lib/coradoc/include_selectors/level_offset.rb +86 -0
- data/lib/coradoc/include_selectors/lines.rb +60 -0
- data/lib/coradoc/include_selectors/tags.rb +138 -0
- data/lib/coradoc/include_selectors.rb +26 -0
- data/lib/coradoc/resolve_includes.rb +202 -0
- data/lib/coradoc/version.rb +1 -1
- metadata +14 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a33595a0aa6205f5a5d1adabd8ede0dc8f3d4237c2376a910800822400af2f59
|
|
4
|
+
data.tar.gz: ecb898a27a6cb1540b553fdb3aa97bef73077a85a13175099738fefe3307faf2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: db01c4272054b9f2907ba0af6444e741c6d3151bf916f22aeb11dc25380ea7b0a9d63f88ba2fe2eeacd07bfcf08e50a30b8eeac914605ac394d4bccc9d3d282a
|
|
7
|
+
data.tar.gz: c40699fd199f0f74a8f657ff7ee26036703e0865862919f2787acd52f532e40c825687cd52c4ef4c4a01337f3819121813f6f291dcd76d264c27bd916f1bc823
|
data/lib/coradoc/cli.rb
CHANGED
|
@@ -22,6 +22,16 @@ module Coradoc
|
|
|
22
22
|
option :section_numbers, desc: 'Enable section numbering', type: :boolean, default: false
|
|
23
23
|
option :section_number_levels, desc: 'Section numbering depth (1-6)', type: :numeric, default: 3
|
|
24
24
|
option :lang, desc: 'Document language code', type: :string, default: 'en'
|
|
25
|
+
option :resolve_includes, desc: 'Resolve include:: directives inline (default: leave as link nodes)',
|
|
26
|
+
type: :boolean, default: false
|
|
27
|
+
option :base_dir, desc: 'Base directory for include resolution (default: dirname of FILE)',
|
|
28
|
+
type: :string
|
|
29
|
+
option :missing_include, desc: 'Policy for missing includes: error, warn, silent, passthrough',
|
|
30
|
+
type: :string, default: 'error'
|
|
31
|
+
option :max_include_depth, desc: 'Maximum include nesting depth', type: :numeric,
|
|
32
|
+
default: 64
|
|
33
|
+
option :allow_unsafe_includes, desc: 'Disable path-traversal protection (asciidoctor :unsafe mode)',
|
|
34
|
+
type: :boolean, default: false
|
|
25
35
|
def convert(file)
|
|
26
36
|
source_format = resolve_format(file, :from)
|
|
27
37
|
target_format = options[:to] ? Coradoc.normalize_format(options[:to]) : Coradoc.resolve_output_format(options[:output])
|
|
@@ -38,8 +48,11 @@ module Coradoc
|
|
|
38
48
|
|
|
39
49
|
verbose_log "Converting #{file} (#{source_format}) to #{target_format}"
|
|
40
50
|
|
|
51
|
+
doc = Coradoc.parse_file(file, format: source_format)
|
|
52
|
+
doc = resolve_includes!(doc, file) if options[:resolve_includes]
|
|
53
|
+
|
|
41
54
|
opts = build_convert_options
|
|
42
|
-
result = Coradoc.
|
|
55
|
+
result = Coradoc.serialize(doc, to: target_format, **opts)
|
|
43
56
|
write_output(result, options[:output])
|
|
44
57
|
rescue Coradoc::Error => e
|
|
45
58
|
error "Error: #{e.message}"
|
|
@@ -215,5 +228,17 @@ module Coradoc
|
|
|
215
228
|
opts[key] = SYMBOL_OPTIONS.include?(key) ? value.to_sym : value
|
|
216
229
|
end
|
|
217
230
|
end
|
|
231
|
+
|
|
232
|
+
def resolve_includes!(doc, source_file)
|
|
233
|
+
base_dir = options[:base_dir] || File.expand_path(File.dirname(source_file))
|
|
234
|
+
verbose_log "Resolving includes against #{base_dir}"
|
|
235
|
+
Coradoc.resolve_includes(
|
|
236
|
+
doc,
|
|
237
|
+
base_dir: base_dir,
|
|
238
|
+
missing_include: options[:missing_include].to_sym,
|
|
239
|
+
max_depth: options[:max_include_depth],
|
|
240
|
+
allow_unsafe: options[:allow_unsafe_includes]
|
|
241
|
+
)
|
|
242
|
+
end
|
|
218
243
|
end
|
|
219
244
|
end
|
data/lib/coradoc/coradoc.rb
CHANGED
|
@@ -86,22 +86,27 @@ module Coradoc
|
|
|
86
86
|
registry.list
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
-
# Parse text to a document model
|
|
89
|
+
# Parse text to a document model.
|
|
90
90
|
#
|
|
91
|
-
#
|
|
92
|
-
#
|
|
91
|
+
# Graph mode is the only mode: +include::+ directives survive as
|
|
92
|
+
# +CoreModel::Include+ link nodes pointing at other files. NO file
|
|
93
|
+
# I/O happens during parse. The result is a single document that
|
|
94
|
+
# references other documents via Include edges — a text graph.
|
|
95
|
+
#
|
|
96
|
+
# To splice included content inline, call +Coradoc.resolve_includes+
|
|
97
|
+
# on the parsed document. This is an explicit, separate step so the
|
|
98
|
+
# caller controls when (and whether) file I/O happens.
|
|
93
99
|
#
|
|
94
100
|
# @param text [String] the document text to parse
|
|
95
101
|
# @param format [Symbol] the source format (:asciidoc, :html, :markdown)
|
|
96
102
|
# @return [Coradoc::CoreModel::Base, Object] the parsed document model
|
|
97
103
|
# @raise [UnsupportedFormatError] if the format is not registered
|
|
98
104
|
#
|
|
99
|
-
# @example Parse
|
|
100
|
-
# doc = Coradoc.parse(
|
|
101
|
-
# doc = Coradoc.parse(File.read("doc.adoc"), format: :asciidoc)
|
|
105
|
+
# @example Parse — Include directives stay as link nodes
|
|
106
|
+
# doc = Coradoc.parse(text, format: :asciidoc)
|
|
102
107
|
#
|
|
103
|
-
# @example
|
|
104
|
-
#
|
|
108
|
+
# @example Then flatten — splice included files inline
|
|
109
|
+
# flat = Coradoc.resolve_includes(doc, base_dir: Dir.pwd)
|
|
105
110
|
def parse(text, format:)
|
|
106
111
|
format_module = get_format(format)
|
|
107
112
|
unless format_module
|
|
@@ -115,6 +120,56 @@ module Coradoc
|
|
|
115
120
|
Hooks.invoke(:after_parse, result, format: format)
|
|
116
121
|
end
|
|
117
122
|
|
|
123
|
+
# Resolve +include::+ directives in a parsed document.
|
|
124
|
+
#
|
|
125
|
+
# Walks the document tree and replaces every +CoreModel::Include+
|
|
126
|
+
# link node with the parsed content of its target file, recursing
|
|
127
|
+
# into the result. The original document is left unchanged; a new
|
|
128
|
+
# subtree is constructed.
|
|
129
|
+
#
|
|
130
|
+
# This is the explicit "flatten" step that turns a text graph into
|
|
131
|
+
# a single spliced document. Callers control:
|
|
132
|
+
# - +base_dir+ — where to root relative include paths
|
|
133
|
+
# - +missing_include+ — what to do when a target is missing
|
|
134
|
+
# - +max_depth+ — recursion cap
|
|
135
|
+
# - +allow_unsafe+ — opt out of path-traversal protection
|
|
136
|
+
# - +resolver+ — custom resolution strategy (e.g. HTTP, in-memory)
|
|
137
|
+
#
|
|
138
|
+
# @param document [Coradoc::CoreModel::Base] parsed document
|
|
139
|
+
# @param base_dir [String] base directory for relative include paths
|
|
140
|
+
# @param missing_include [Symbol] :error (default), :warn, :silent, :passthrough
|
|
141
|
+
# @param max_depth [Integer] recursion cap (default 64)
|
|
142
|
+
# @param allow_unsafe [Boolean] disable path-traversal protection
|
|
143
|
+
# @param resolver [Object, nil] custom resolver. Defaults to
|
|
144
|
+
# +Coradoc::IncludeResolver::Filesystem+ rooted at +base_dir+.
|
|
145
|
+
# @return [Coradoc::CoreModel::Base] new document with includes expanded
|
|
146
|
+
# @raise [Coradoc::IncludeNotFoundError] when a target is missing
|
|
147
|
+
# and policy is :error
|
|
148
|
+
# @raise [Coradoc::IncludeDepthExceededError] when +max_depth+ is hit
|
|
149
|
+
# @raise [Coradoc::CircularIncludeError] when an include cycle is detected
|
|
150
|
+
#
|
|
151
|
+
# @example
|
|
152
|
+
# doc = Coradoc.parse(text, format: :asciidoc)
|
|
153
|
+
# flat = Coradoc.resolve_includes(doc, base_dir: Dir.pwd)
|
|
154
|
+
def resolve_includes(document, base_dir:,
|
|
155
|
+
missing_include: :error,
|
|
156
|
+
max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH,
|
|
157
|
+
allow_unsafe: false,
|
|
158
|
+
resolver: nil)
|
|
159
|
+
resolver = Coradoc::IncludeResolver.coerce(
|
|
160
|
+
resolver,
|
|
161
|
+
base_dir: base_dir,
|
|
162
|
+
allow_unsafe: allow_unsafe
|
|
163
|
+
)
|
|
164
|
+
Coradoc::ResolveIncludes.call(
|
|
165
|
+
document,
|
|
166
|
+
resolver: resolver,
|
|
167
|
+
base_dir: base_dir,
|
|
168
|
+
missing_include: missing_include,
|
|
169
|
+
max_depth: max_depth
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
|
|
118
173
|
# Convert document text from one format to another
|
|
119
174
|
#
|
|
120
175
|
# This is the main entry point for format conversion. It handles the
|
|
@@ -459,6 +514,9 @@ module Coradoc
|
|
|
459
514
|
autoload :DocumentManipulator, "#{__dir__}/document_manipulator"
|
|
460
515
|
autoload :Visitor, "#{__dir__}/visitor"
|
|
461
516
|
autoload :PerformanceRegression, "#{__dir__}/performance_regression"
|
|
517
|
+
autoload :IncludeResolver, "#{__dir__}/include_resolver"
|
|
518
|
+
autoload :IncludeSelectors, "#{__dir__}/include_selectors"
|
|
519
|
+
autoload :ResolveIncludes, "#{__dir__}/resolve_includes"
|
|
462
520
|
end
|
|
463
521
|
|
|
464
522
|
# Format gems self-register via Coradoc.register_format when they are required.
|
|
@@ -11,7 +11,12 @@ module Coradoc
|
|
|
11
11
|
# attribute :children, Base, collection: true
|
|
12
12
|
# on each including class. This module overrides the setter to
|
|
13
13
|
# auto-wrap raw strings as TextContent, keeping all callers simple.
|
|
14
|
+
#
|
|
15
|
+
# Includes HasChildren so all mixed-content classes also satisfy
|
|
16
|
+
# the structural predicate (OCP — no subclass enumeration needed
|
|
17
|
+
# for children-based dispatch).
|
|
14
18
|
module ChildrenContent
|
|
19
|
+
include HasChildren
|
|
15
20
|
# Override the children= setter to auto-wrap strings as TextContent.
|
|
16
21
|
# This is defined via define_method so it always overrides the
|
|
17
22
|
# lutaml-generated setter, regardless of include order.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
class FrontmatterBlock
|
|
6
|
+
# Single typed value node in a FrontmatterBlock entry tree.
|
|
7
|
+
#
|
|
8
|
+
# Replaces the previous `attribute :data, :hash` representation.
|
|
9
|
+
# Each value carries a `value_type` discriminator plus one populated
|
|
10
|
+
# slot matching that type. Container types (`array`, `map`) hold
|
|
11
|
+
# nested FrontmatterValue / FrontmatterEntry children.
|
|
12
|
+
#
|
|
13
|
+
# Supported value types (mirror what YAML.safe_load returns given
|
|
14
|
+
# the Codec's PERMITTED_CLASSES):
|
|
15
|
+
#
|
|
16
|
+
# scalar -> string, integer, float, boolean, date, datetime, symbol, nil
|
|
17
|
+
# container -> array, map
|
|
18
|
+
#
|
|
19
|
+
# Adding a new scalar type is purely additive: declare a new typed
|
|
20
|
+
# slot and extend the case in Codec::ValueBridge (OCP).
|
|
21
|
+
class FrontmatterValue < Base
|
|
22
|
+
SCALAR_TYPES = %w[
|
|
23
|
+
string integer float boolean date datetime symbol nil
|
|
24
|
+
].freeze
|
|
25
|
+
CONTAINER_TYPES = %w[array map].freeze
|
|
26
|
+
ALL_TYPES = (SCALAR_TYPES + CONTAINER_TYPES).freeze
|
|
27
|
+
|
|
28
|
+
attribute :value_type, :string
|
|
29
|
+
|
|
30
|
+
# Scalar slots — exactly one populated, selected by value_type.
|
|
31
|
+
attribute :string_value, :string
|
|
32
|
+
attribute :integer_value, :integer
|
|
33
|
+
attribute :float_value, :float
|
|
34
|
+
attribute :boolean_value, :boolean
|
|
35
|
+
attribute :date_value, :date
|
|
36
|
+
attribute :datetime_value, :date_time
|
|
37
|
+
attribute :symbol_value, :symbol
|
|
38
|
+
|
|
39
|
+
# Container slots — populated when value_type is array/map.
|
|
40
|
+
attribute :items, FrontmatterValue, collection: true
|
|
41
|
+
attribute :entries, FrontmatterEntry, collection: true
|
|
42
|
+
|
|
43
|
+
# Convenience: return the Ruby-native scalar value for this node,
|
|
44
|
+
# or nil for containers / nil-typed values. Used by callers that
|
|
45
|
+
# don't care about the type discriminator.
|
|
46
|
+
def ruby_value
|
|
47
|
+
case value_type
|
|
48
|
+
when 'string' then string_value
|
|
49
|
+
when 'integer' then integer_value
|
|
50
|
+
when 'float' then float_value
|
|
51
|
+
when 'boolean' then boolean_value
|
|
52
|
+
when 'date' then date_value
|
|
53
|
+
when 'datetime' then datetime_value
|
|
54
|
+
when 'symbol' then symbol_value
|
|
55
|
+
when 'nil' then nil
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# Marker module for "this model exposes a +children+ collection".
|
|
6
|
+
#
|
|
7
|
+
# Including this is the canonical way to opt a CoreModel class into
|
|
8
|
+
# children-based traversal. Downstream code (e.g. mirror's
|
|
9
|
+
# CoreModelToMirror#element_children) dispatches on +is_a?(HasChildren)+
|
|
10
|
+
# rather than enumerating subclasses, so adding a new children-bearing
|
|
11
|
+
# class is purely additive (OCP).
|
|
12
|
+
#
|
|
13
|
+
# ChildrenContent (the mixed-content auto-wrap behavior) includes
|
|
14
|
+
# HasChildren, so every class that mixes in ChildrenContent also
|
|
15
|
+
# satisfies HasChildren. Classes that carry typed block children
|
|
16
|
+
# only (StructuralElement, etc.) include HasChildren directly.
|
|
17
|
+
module HasChildren
|
|
18
|
+
def has_children?
|
|
19
|
+
true
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# First-class include directive node in the canonical document model.
|
|
6
|
+
#
|
|
7
|
+
# An include directive is a LINK from one document to another file.
|
|
8
|
+
# Parsing preserves these nodes verbatim — no file I/O happens during
|
|
9
|
+
# parse. The result is a text graph: a document referencing other
|
|
10
|
+
# documents via Include edges.
|
|
11
|
+
#
|
|
12
|
+
# Splicing the linked content inline is an explicit, separate step:
|
|
13
|
+
# +Coradoc.resolve_includes(doc, base_dir:)+ walks the tree and
|
|
14
|
+
# replaces each Include node with the parsed content of its target,
|
|
15
|
+
# recursing into the result.
|
|
16
|
+
#
|
|
17
|
+
# This separation lets callers:
|
|
18
|
+
# - inspect the graph before deciding to flatten
|
|
19
|
+
# - resolve with different base dirs / resolvers without re-parsing
|
|
20
|
+
# - treat includes as external links (e.g. when parsing a site)
|
|
21
|
+
#
|
|
22
|
+
# Attributes:
|
|
23
|
+
# target String path or URL as authored
|
|
24
|
+
# options IncludeOptions parsed selectors (tags/lines/leveloffset/indent/encoding)
|
|
25
|
+
# raw_options String original bracket body, preserved for verbatim round-trip
|
|
26
|
+
# line_break String trailing line break, default "\n"
|
|
27
|
+
#
|
|
28
|
+
# The node is block-level: it appears in the +content+ / +children+
|
|
29
|
+
# array of any block container (Document, Section, Paragraph, List
|
|
30
|
+
# item, Table cell, etc.) alongside other block-level nodes.
|
|
31
|
+
class Include < Base
|
|
32
|
+
attribute :target, :string
|
|
33
|
+
attribute :options, Coradoc::CoreModel::IncludeOptions,
|
|
34
|
+
default: -> { Coradoc::CoreModel::IncludeOptions.new }
|
|
35
|
+
attribute :raw_options, :string, default: -> { '' }
|
|
36
|
+
attribute :line_break, :string, default: -> { "\n" }
|
|
37
|
+
|
|
38
|
+
def self.semantic_type
|
|
39
|
+
:include
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# A leveloffset value parsed from an include directive.
|
|
6
|
+
#
|
|
7
|
+
# asciidoctor supports two forms:
|
|
8
|
+
# leveloffset=+N → relative shift (heading.level += N)
|
|
9
|
+
# leveloffset=-N → relative shift (heading.level -= N)
|
|
10
|
+
# leveloffset=N → absolute set (heading.level = N)
|
|
11
|
+
#
|
|
12
|
+
# The parsed form keeps the mode and delta separate so that the
|
|
13
|
+
# selector that applies the offset does not need to re-parse the
|
|
14
|
+
# string each time it walks a section (DRY).
|
|
15
|
+
class IncludeLevelOffset < Base
|
|
16
|
+
# "relative" (+N/-N) or "absolute" (bare N).
|
|
17
|
+
attribute :mode, :string
|
|
18
|
+
|
|
19
|
+
# Signed integer for relative shifts; the absolute target level
|
|
20
|
+
# for absolute mode.
|
|
21
|
+
attribute :delta, :integer
|
|
22
|
+
|
|
23
|
+
# Construct from a raw asciidoctor-style string ("+2", "-1", "3").
|
|
24
|
+
# Returns nil if the input is nil or unparsable.
|
|
25
|
+
#
|
|
26
|
+
# @param raw [String, nil]
|
|
27
|
+
# @return [IncludeLevelOffset, nil]
|
|
28
|
+
def self.parse(raw)
|
|
29
|
+
return nil if raw.nil? || raw.strip.empty?
|
|
30
|
+
|
|
31
|
+
matched_offset(raw.strip)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Apply this offset to a heading level (1-indexed asciidoctor level).
|
|
35
|
+
#
|
|
36
|
+
# @param level [Integer] original section level
|
|
37
|
+
# @return [Integer] new section level, clamped to >= 0
|
|
38
|
+
def apply(level)
|
|
39
|
+
case mode
|
|
40
|
+
when 'relative' then [level + delta, 0].max
|
|
41
|
+
when 'absolute' then [delta, 0].max
|
|
42
|
+
else level
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Render back to the asciidoctor wire form ("+2", "-1", "3").
|
|
47
|
+
#
|
|
48
|
+
# @return [String]
|
|
49
|
+
def to_s
|
|
50
|
+
case mode
|
|
51
|
+
when 'relative' then format('%+d', delta)
|
|
52
|
+
when 'absolute' then delta.to_s
|
|
53
|
+
else ''
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
class << self
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def matched_offset(trimmed)
|
|
61
|
+
%r{\A(?<sign>[+-]?)(?<digits>\d+)\z}.match(trimmed) do |m|
|
|
62
|
+
digits = m[:digits].to_i
|
|
63
|
+
signed = m[:sign] == '-' ? -digits : digits
|
|
64
|
+
mode = m[:sign].empty? ? 'absolute' : 'relative'
|
|
65
|
+
new(mode: mode, delta: signed)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module CoreModel
|
|
5
|
+
# Typed options for an include directive, parsed once at construction.
|
|
6
|
+
#
|
|
7
|
+
# Each asciidoctor selector (tags / lines / leveloffset / indent /
|
|
8
|
+
# encoding) gets one typed attribute. Selectors downstream operate on
|
|
9
|
+
# the typed form, never re-parsing the raw string (DRY).
|
|
10
|
+
#
|
|
11
|
+
# tags Array<String> ["body"], [] when unspecified
|
|
12
|
+
# tags_wildcard Boolean true for tags=*
|
|
13
|
+
# tags_inverted Boolean true for tags=**
|
|
14
|
+
# lines_spec String? raw "1..2;5;7..8" — parsed by Lines selector
|
|
15
|
+
# leveloffset IncludeLevelOffset?
|
|
16
|
+
# indent Integer? 0 = strip, N = re-indent, nil = passthrough
|
|
17
|
+
# file_encoding String? passed through to resolver for File.read
|
|
18
|
+
class IncludeOptions < Base
|
|
19
|
+
attribute :tags, :string, collection: true, default: -> { [] }
|
|
20
|
+
attribute :tags_wildcard, :boolean, default: -> { false }
|
|
21
|
+
attribute :tags_inverted, :boolean, default: -> { false }
|
|
22
|
+
attribute :lines_spec, :string
|
|
23
|
+
attribute :leveloffset, Coradoc::CoreModel::IncludeLevelOffset
|
|
24
|
+
attribute :indent, :integer
|
|
25
|
+
attribute :file_encoding, :string
|
|
26
|
+
|
|
27
|
+
# Whether the lines selector is in effect. Tags are ignored when
|
|
28
|
+
# lines is set — matches asciidoctor precedence (SPEC 3.5).
|
|
29
|
+
def lines?
|
|
30
|
+
!lines_spec.nil? && !lines_spec.strip.empty?
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Whether any tag selector is in effect.
|
|
34
|
+
def tags?
|
|
35
|
+
tags_wildcard || tags_inverted || !tags.empty?
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# True when both selectors were specified (lines wins).
|
|
39
|
+
def conflict_resolved_to_lines?
|
|
40
|
+
lines? && tags?
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Build from a flat hash of asciidoctor-style key/value strings.
|
|
44
|
+
# Whitespace around keys and values is trimmed (SPEC 6.3).
|
|
45
|
+
#
|
|
46
|
+
# @param attrs [Hash{String=>String}] e.g. {"tags"=>"a;b", "leveloffset"=>"+2"}
|
|
47
|
+
# @return [IncludeOptions]
|
|
48
|
+
def self.from_hash(attrs)
|
|
49
|
+
new(build_args(attrs))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class << self
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def build_args(attrs)
|
|
56
|
+
cleaned = clean_keys(attrs)
|
|
57
|
+
{
|
|
58
|
+
tags: parse_tags(cleaned['tags']),
|
|
59
|
+
tags_wildcard: wildcard?(cleaned['tags']),
|
|
60
|
+
tags_inverted: inverted?(cleaned['tags']),
|
|
61
|
+
lines_spec: cleaned['lines'],
|
|
62
|
+
leveloffset: CoreModel::IncludeLevelOffset.parse(cleaned['leveloffset']),
|
|
63
|
+
indent: parse_integer(cleaned['indent']),
|
|
64
|
+
file_encoding: cleaned['encoding']
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def clean_keys(attrs)
|
|
69
|
+
attrs.each_with_object({}) do |(k, v), h|
|
|
70
|
+
h[k.to_s.strip] = v.to_s.strip
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def parse_tags(raw)
|
|
75
|
+
return [] if raw.nil?
|
|
76
|
+
trimmed = raw.strip
|
|
77
|
+
return [] if trimmed.empty? || trimmed == '*' || trimmed == '**'
|
|
78
|
+
|
|
79
|
+
trimmed.split(';').map(&:strip).reject(&:empty?)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def wildcard?(raw)
|
|
83
|
+
!raw.nil? && raw.strip == '*'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def inverted?(raw)
|
|
87
|
+
!raw.nil? && raw.strip == '**'
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def parse_integer(raw)
|
|
91
|
+
return nil if raw.nil? || raw.strip.empty?
|
|
92
|
+
|
|
93
|
+
Integer(raw.strip)
|
|
94
|
+
rescue ArgumentError
|
|
95
|
+
nil
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -11,6 +11,11 @@ module Coradoc
|
|
|
11
11
|
# Structural elements can contain other elements (blocks, lists, etc.)
|
|
12
12
|
# and can be nested hierarchically to represent document structure.
|
|
13
13
|
class StructuralElement < Base
|
|
14
|
+
# StructuralElements carry typed block children (sections, paragraphs,
|
|
15
|
+
# etc.) rather than mixed inline content, so they don't include
|
|
16
|
+
# ChildrenContent. HasChildren marks the structural predicate that
|
|
17
|
+
# downstream traversal dispatches on (OCP).
|
|
18
|
+
include HasChildren
|
|
14
19
|
# @!attribute level
|
|
15
20
|
# @return [Integer, nil] hierarchical level (1-6 for sections)
|
|
16
21
|
attribute :level, :integer
|
data/lib/coradoc/core_model.rb
CHANGED
|
@@ -12,6 +12,7 @@ module Coradoc
|
|
|
12
12
|
# Autoload submodules lazily using relative paths
|
|
13
13
|
autoload :Base, "#{__dir__}/core_model/base"
|
|
14
14
|
autoload :ChildrenContent, "#{__dir__}/core_model/children_content"
|
|
15
|
+
autoload :HasChildren, "#{__dir__}/core_model/has_children"
|
|
15
16
|
autoload :Callout, "#{__dir__}/core_model/callout"
|
|
16
17
|
autoload :CalloutText, "#{__dir__}/core_model/callout_text"
|
|
17
18
|
autoload :Block, "#{__dir__}/core_model/block"
|
|
@@ -78,5 +79,8 @@ module Coradoc
|
|
|
78
79
|
autoload :CommentLine, "#{__dir__}/core_model/comment_line"
|
|
79
80
|
autoload :HorizontalRuleBlock, "#{__dir__}/core_model/horizontal_rule_block"
|
|
80
81
|
autoload :IdGenerator, "#{__dir__}/core_model/id_generator"
|
|
82
|
+
autoload :Include, "#{__dir__}/core_model/include"
|
|
83
|
+
autoload :IncludeOptions, "#{__dir__}/core_model/include_options"
|
|
84
|
+
autoload :IncludeLevelOffset, "#{__dir__}/core_model/include_level_offset"
|
|
81
85
|
end
|
|
82
86
|
end
|
data/lib/coradoc/errors.rb
CHANGED
|
@@ -283,6 +283,62 @@ module Coradoc
|
|
|
283
283
|
end
|
|
284
284
|
end
|
|
285
285
|
|
|
286
|
+
# Error raised when an include directive's target cannot be located.
|
|
287
|
+
# Honors the +missing_include+ policy: +:error+ raises this; +:warn+,
|
|
288
|
+
# +:silent+, and +:passthrough+ swallow it.
|
|
289
|
+
class IncludeNotFoundError < Error
|
|
290
|
+
attr_reader :target
|
|
291
|
+
|
|
292
|
+
def initialize(target)
|
|
293
|
+
@target = target
|
|
294
|
+
super("Include target not found: #{target}")
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Error raised when an include chain exceeds the configured depth limit.
|
|
299
|
+
class IncludeDepthExceededError < Error
|
|
300
|
+
attr_reader :depth, :target
|
|
301
|
+
|
|
302
|
+
def initialize(target:, depth:, max:)
|
|
303
|
+
@target = target
|
|
304
|
+
@depth = depth
|
|
305
|
+
super("Include depth #{depth} exceeds max #{max} at #{target}")
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Error raised when a cycle is detected in the include graph.
|
|
310
|
+
# The chain is the list of files leading back to the repeated target.
|
|
311
|
+
class CircularIncludeError < Error
|
|
312
|
+
attr_reader :chain, :target
|
|
313
|
+
|
|
314
|
+
def initialize(target:, chain:)
|
|
315
|
+
@target = target
|
|
316
|
+
@chain = chain
|
|
317
|
+
super("Circular include detected: #{chain.join(' -> ')} -> #{target}")
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Error raised when an include target escapes the resolver's safe base
|
|
322
|
+
# directory and +allow_unsafe_includes+ is not set.
|
|
323
|
+
class UnsafeIncludeError < Error
|
|
324
|
+
attr_reader :target
|
|
325
|
+
|
|
326
|
+
def initialize(target)
|
|
327
|
+
@target = target
|
|
328
|
+
super("Unsafe include path blocked: #{target}")
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# Error raised when an include target exceeds the resolver's size limit.
|
|
333
|
+
class IncludeTooLargeError < Error
|
|
334
|
+
attr_reader :target
|
|
335
|
+
|
|
336
|
+
def initialize(target)
|
|
337
|
+
@target = target
|
|
338
|
+
super("Include target too large to read: #{target}")
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
286
342
|
# Error raised when a requested format is not supported
|
|
287
343
|
#
|
|
288
344
|
# @example
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'pathname'
|
|
4
|
+
|
|
5
|
+
module Coradoc
|
|
6
|
+
class IncludeResolver
|
|
7
|
+
# Default include resolver: reads files from the local filesystem,
|
|
8
|
+
# rooted at +base_dir+. Path-traversal protection is ON by default
|
|
9
|
+
# to match asciidoctor's +:safe+ mode.
|
|
10
|
+
#
|
|
11
|
+
# Pass +allow_unsafe: true+ to opt out (matches +:unsafe+ mode).
|
|
12
|
+
class Filesystem < IncludeResolver
|
|
13
|
+
attr_reader :base_dir, :allow_unsafe, :max_bytes
|
|
14
|
+
|
|
15
|
+
# @param base_dir [String] absolute path to the directory includes
|
|
16
|
+
# are resolved against. Usually the directory of the including
|
|
17
|
+
# document. Relative paths inside the resolver are expanded
|
|
18
|
+
# against this.
|
|
19
|
+
# @param allow_unsafe [Boolean] when false (default), refuses any
|
|
20
|
+
# resolved path that escapes +base_dir+ via .. or that is an
|
|
21
|
+
# absolute path outside +base_dir+.
|
|
22
|
+
# @param max_bytes [Integer, nil] if set, refuses files larger
|
|
23
|
+
# than this. Defense against accidental megabyte-include loops.
|
|
24
|
+
def initialize(base_dir:, allow_unsafe: false, max_bytes: nil)
|
|
25
|
+
@base_dir = File.expand_path(base_dir)
|
|
26
|
+
@allow_unsafe = allow_unsafe
|
|
27
|
+
@max_bytes = max_bytes
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def call(target:, base_dir:, options:, context:)
|
|
31
|
+
full = File.expand_path(target, base_dir)
|
|
32
|
+
enforce_safety!(full, base_dir) unless allow_unsafe
|
|
33
|
+
raise Coradoc::IncludeNotFoundError, target unless File.file?(full)
|
|
34
|
+
|
|
35
|
+
enforce_size!(full, target)
|
|
36
|
+
|
|
37
|
+
encoding = options&.file_encoding || 'utf-8'
|
|
38
|
+
read_with_encoding(full, encoding)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def enforce_safety!(full_path, base_dir)
|
|
44
|
+
base_expanded = File.expand_path(base_dir)
|
|
45
|
+
base_with_sep = "#{base_expanded}#{File::SEPARATOR}"
|
|
46
|
+
|
|
47
|
+
return if full_path == base_expanded || full_path.start_with?(base_with_sep)
|
|
48
|
+
|
|
49
|
+
raise Coradoc::UnsafeIncludeError, full_path
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def enforce_size!(full_path, target)
|
|
53
|
+
return unless max_bytes
|
|
54
|
+
return unless File.exist?(full_path)
|
|
55
|
+
|
|
56
|
+
size = File.size(full_path)
|
|
57
|
+
return if size <= max_bytes
|
|
58
|
+
|
|
59
|
+
raise Coradoc::IncludeTooLargeError, target
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def read_with_encoding(full_path, encoding_name)
|
|
63
|
+
content = File.binread(full_path)
|
|
64
|
+
return content if encoding_name.to_s.downcase == 'binary'
|
|
65
|
+
|
|
66
|
+
content.force_encoding(clean_encoding_name(encoding_name))
|
|
67
|
+
encoded = content.encode('utf-8', invalid: :replace, undef: :replace)
|
|
68
|
+
normalize_line_endings(encoded)
|
|
69
|
+
rescue ArgumentError => e
|
|
70
|
+
raise Coradoc::Error, "Unsupported encoding #{encoding_name.inspect}: #{e.message}"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# asciidoctor parity: normalize CRLF and lone CR to LF so the parser
|
|
74
|
+
# sees consistent line endings regardless of the source platform.
|
|
75
|
+
def normalize_line_endings(text)
|
|
76
|
+
text.gsub(/\r\n?/, "\n")
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def clean_encoding_name(name)
|
|
80
|
+
name.to_s.downcase.sub(/^utf-8$/, 'utf-8')
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|