coradoc 2.0.21 → 2.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a4ebd4f989cb1c3bc9c6adcede592e91c6013a29bf94c3d12319e03c5c42ab38
4
- data.tar.gz: d321f36297bb31aac76fe30a6195aa9cd1456c840dfb1888405c0e36f474c4b7
3
+ metadata.gz: a33595a0aa6205f5a5d1adabd8ede0dc8f3d4237c2376a910800822400af2f59
4
+ data.tar.gz: ecb898a27a6cb1540b553fdb3aa97bef73077a85a13175099738fefe3307faf2
5
5
  SHA512:
6
- metadata.gz: 1bba8d765b1af164d1713f16013eddbdbc906340f37d1918777fd3ec1c6f1ec784e869155ccae100b6d4a4306bea003c4031fbf6276890608471a97d5a407bd2
7
- data.tar.gz: e76a4e152ebbc25ee03732c6870fbfbdf062d8a3e0af8423b4da9486d14483952a56a3af6b25c256a6350e60e475afa89112e25931b08b47209884ceb1e8089a
6
+ metadata.gz: db01c4272054b9f2907ba0af6444e741c6d3151bf916f22aeb11dc25380ea7b0a9d63f88ba2fe2eeacd07bfcf08e50a30b8eeac914605ac394d4bccc9d3d282a
7
+ data.tar.gz: c40699fd199f0f74a8f657ff7ee26036703e0865862919f2787acd52f532e40c825687cd52c4ef4c4a01337f3819121813f6f291dcd76d264c27bd916f1bc823
data/lib/coradoc/cli.rb CHANGED
@@ -22,6 +22,16 @@ module Coradoc
22
22
  option :section_numbers, desc: 'Enable section numbering', type: :boolean, default: false
23
23
  option :section_number_levels, desc: 'Section numbering depth (1-6)', type: :numeric, default: 3
24
24
  option :lang, desc: 'Document language code', type: :string, default: 'en'
25
+ option :resolve_includes, desc: 'Resolve include:: directives inline (default: leave as link nodes)',
26
+ type: :boolean, default: false
27
+ option :base_dir, desc: 'Base directory for include resolution (default: dirname of FILE)',
28
+ type: :string
29
+ option :missing_include, desc: 'Policy for missing includes: error, warn, silent, passthrough',
30
+ type: :string, default: 'error'
31
+ option :max_include_depth, desc: 'Maximum include nesting depth', type: :numeric,
32
+ default: 64
33
+ option :allow_unsafe_includes, desc: 'Disable path-traversal protection (asciidoctor :unsafe mode)',
34
+ type: :boolean, default: false
25
35
  def convert(file)
26
36
  source_format = resolve_format(file, :from)
27
37
  target_format = options[:to] ? Coradoc.normalize_format(options[:to]) : Coradoc.resolve_output_format(options[:output])
@@ -38,8 +48,11 @@ module Coradoc
38
48
 
39
49
  verbose_log "Converting #{file} (#{source_format}) to #{target_format}"
40
50
 
51
+ doc = Coradoc.parse_file(file, format: source_format)
52
+ doc = resolve_includes!(doc, file) if options[:resolve_includes]
53
+
41
54
  opts = build_convert_options
42
- result = Coradoc.convert_file(file, from: source_format, to: target_format, **opts)
55
+ result = Coradoc.serialize(doc, to: target_format, **opts)
43
56
  write_output(result, options[:output])
44
57
  rescue Coradoc::Error => e
45
58
  error "Error: #{e.message}"
@@ -215,5 +228,17 @@ module Coradoc
215
228
  opts[key] = SYMBOL_OPTIONS.include?(key) ? value.to_sym : value
216
229
  end
217
230
  end
231
+
232
+ def resolve_includes!(doc, source_file)
233
+ base_dir = options[:base_dir] || File.expand_path(File.dirname(source_file))
234
+ verbose_log "Resolving includes against #{base_dir}"
235
+ Coradoc.resolve_includes(
236
+ doc,
237
+ base_dir: base_dir,
238
+ missing_include: options[:missing_include].to_sym,
239
+ max_depth: options[:max_include_depth],
240
+ allow_unsafe: options[:allow_unsafe_includes]
241
+ )
242
+ end
218
243
  end
219
244
  end
@@ -86,22 +86,27 @@ module Coradoc
86
86
  registry.list
87
87
  end
88
88
 
89
- # Parse text to a document model
89
+ # Parse text to a document model.
90
90
  #
91
- # This is the main entry point for parsing documents. It automatically
92
- # selects the appropriate parser based on the format.
91
+ # Graph mode is the only mode: +include::+ directives survive as
92
+ # +CoreModel::Include+ link nodes pointing at other files. NO file
93
+ # I/O happens during parse. The result is a single document that
94
+ # references other documents via Include edges — a text graph.
95
+ #
96
+ # To splice included content inline, call +Coradoc.resolve_includes+
97
+ # on the parsed document. This is an explicit, separate step so the
98
+ # caller controls when (and whether) file I/O happens.
93
99
  #
94
100
  # @param text [String] the document text to parse
95
101
  # @param format [Symbol] the source format (:asciidoc, :html, :markdown)
96
102
  # @return [Coradoc::CoreModel::Base, Object] the parsed document model
97
103
  # @raise [UnsupportedFormatError] if the format is not registered
98
104
  #
99
- # @example Parse AsciiDoc
100
- # doc = Coradoc.parse("= Title\n\nContent", format: :asciidoc)
101
- # doc = Coradoc.parse(File.read("doc.adoc"), format: :asciidoc)
105
+ # @example Parse — Include directives stay as link nodes
106
+ # doc = Coradoc.parse(text, format: :asciidoc)
102
107
  #
103
- # @example Parse and get CoreModel
104
- # core = Coradoc.parse(text, format: :asciidoc) # Returns CoreModel
108
+ # @example Then flatten splice included files inline
109
+ # flat = Coradoc.resolve_includes(doc, base_dir: Dir.pwd)
105
110
  def parse(text, format:)
106
111
  format_module = get_format(format)
107
112
  unless format_module
@@ -115,6 +120,56 @@ module Coradoc
115
120
  Hooks.invoke(:after_parse, result, format: format)
116
121
  end
117
122
 
123
+ # Resolve +include::+ directives in a parsed document.
124
+ #
125
+ # Walks the document tree and replaces every +CoreModel::Include+
126
+ # link node with the parsed content of its target file, recursing
127
+ # into the result. The original document is left unchanged; a new
128
+ # subtree is constructed.
129
+ #
130
+ # This is the explicit "flatten" step that turns a text graph into
131
+ # a single spliced document. Callers control:
132
+ # - +base_dir+ — where to root relative include paths
133
+ # - +missing_include+ — what to do when a target is missing
134
+ # - +max_depth+ — recursion cap
135
+ # - +allow_unsafe+ — opt out of path-traversal protection
136
+ # - +resolver+ — custom resolution strategy (e.g. HTTP, in-memory)
137
+ #
138
+ # @param document [Coradoc::CoreModel::Base] parsed document
139
+ # @param base_dir [String] base directory for relative include paths
140
+ # @param missing_include [Symbol] :error (default), :warn, :silent, :passthrough
141
+ # @param max_depth [Integer] recursion cap (default 64)
142
+ # @param allow_unsafe [Boolean] disable path-traversal protection
143
+ # @param resolver [Object, nil] custom resolver. Defaults to
144
+ # +Coradoc::IncludeResolver::Filesystem+ rooted at +base_dir+.
145
+ # @return [Coradoc::CoreModel::Base] new document with includes expanded
146
+ # @raise [Coradoc::IncludeNotFoundError] when a target is missing
147
+ # and policy is :error
148
+ # @raise [Coradoc::IncludeDepthExceededError] when +max_depth+ is hit
149
+ # @raise [Coradoc::CircularIncludeError] when an include cycle is detected
150
+ #
151
+ # @example
152
+ # doc = Coradoc.parse(text, format: :asciidoc)
153
+ # flat = Coradoc.resolve_includes(doc, base_dir: Dir.pwd)
154
+ def resolve_includes(document, base_dir:,
155
+ missing_include: :error,
156
+ max_depth: Coradoc::ResolveIncludes::DEFAULT_MAX_DEPTH,
157
+ allow_unsafe: false,
158
+ resolver: nil)
159
+ resolver = Coradoc::IncludeResolver.coerce(
160
+ resolver,
161
+ base_dir: base_dir,
162
+ allow_unsafe: allow_unsafe
163
+ )
164
+ Coradoc::ResolveIncludes.call(
165
+ document,
166
+ resolver: resolver,
167
+ base_dir: base_dir,
168
+ missing_include: missing_include,
169
+ max_depth: max_depth
170
+ )
171
+ end
172
+
118
173
  # Convert document text from one format to another
119
174
  #
120
175
  # This is the main entry point for format conversion. It handles the
@@ -459,6 +514,9 @@ module Coradoc
459
514
  autoload :DocumentManipulator, "#{__dir__}/document_manipulator"
460
515
  autoload :Visitor, "#{__dir__}/visitor"
461
516
  autoload :PerformanceRegression, "#{__dir__}/performance_regression"
517
+ autoload :IncludeResolver, "#{__dir__}/include_resolver"
518
+ autoload :IncludeSelectors, "#{__dir__}/include_selectors"
519
+ autoload :ResolveIncludes, "#{__dir__}/resolve_includes"
462
520
  end
463
521
 
464
522
  # Format gems self-register via Coradoc.register_format when they are required.
@@ -11,7 +11,12 @@ module Coradoc
11
11
  # attribute :children, Base, collection: true
12
12
  # on each including class. This module overrides the setter to
13
13
  # auto-wrap raw strings as TextContent, keeping all callers simple.
14
+ #
15
+ # Includes HasChildren so all mixed-content classes also satisfy
16
+ # the structural predicate (OCP — no subclass enumeration needed
17
+ # for children-based dispatch).
14
18
  module ChildrenContent
19
+ include HasChildren
15
20
  # Override the children= setter to auto-wrap strings as TextContent.
16
21
  # This is defined via define_method so it always overrides the
17
22
  # lutaml-generated setter, regardless of include order.
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ class FrontmatterBlock
6
+ # Single typed value node in a FrontmatterBlock entry tree.
7
+ #
8
+ # Replaces the previous `attribute :data, :hash` representation.
9
+ # Each value carries a `value_type` discriminator plus one populated
10
+ # slot matching that type. Container types (`array`, `map`) hold
11
+ # nested FrontmatterValue / FrontmatterEntry children.
12
+ #
13
+ # Supported value types (mirror what YAML.safe_load returns given
14
+ # the Codec's PERMITTED_CLASSES):
15
+ #
16
+ # scalar -> string, integer, float, boolean, date, datetime, symbol, nil
17
+ # container -> array, map
18
+ #
19
+ # Adding a new scalar type is purely additive: declare a new typed
20
+ # slot and extend the case in Codec::ValueBridge (OCP).
21
+ class FrontmatterValue < Base
22
+ SCALAR_TYPES = %w[
23
+ string integer float boolean date datetime symbol nil
24
+ ].freeze
25
+ CONTAINER_TYPES = %w[array map].freeze
26
+ ALL_TYPES = (SCALAR_TYPES + CONTAINER_TYPES).freeze
27
+
28
+ attribute :value_type, :string
29
+
30
+ # Scalar slots — exactly one populated, selected by value_type.
31
+ attribute :string_value, :string
32
+ attribute :integer_value, :integer
33
+ attribute :float_value, :float
34
+ attribute :boolean_value, :boolean
35
+ attribute :date_value, :date
36
+ attribute :datetime_value, :date_time
37
+ attribute :symbol_value, :symbol
38
+
39
+ # Container slots — populated when value_type is array/map.
40
+ attribute :items, FrontmatterValue, collection: true
41
+ attribute :entries, FrontmatterEntry, collection: true
42
+
43
+ # Convenience: return the Ruby-native scalar value for this node,
44
+ # or nil for containers / nil-typed values. Used by callers that
45
+ # don't care about the type discriminator.
46
+ def ruby_value
47
+ case value_type
48
+ when 'string' then string_value
49
+ when 'integer' then integer_value
50
+ when 'float' then float_value
51
+ when 'boolean' then boolean_value
52
+ when 'date' then date_value
53
+ when 'datetime' then datetime_value
54
+ when 'symbol' then symbol_value
55
+ when 'nil' then nil
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # Marker module for "this model exposes a +children+ collection".
6
+ #
7
+ # Including this is the canonical way to opt a CoreModel class into
8
+ # children-based traversal. Downstream code (e.g. mirror's
9
+ # CoreModelToMirror#element_children) dispatches on +is_a?(HasChildren)+
10
+ # rather than enumerating subclasses, so adding a new children-bearing
11
+ # class is purely additive (OCP).
12
+ #
13
+ # ChildrenContent (the mixed-content auto-wrap behavior) includes
14
+ # HasChildren, so every class that mixes in ChildrenContent also
15
+ # satisfies HasChildren. Classes that carry typed block children
16
+ # only (StructuralElement, etc.) include HasChildren directly.
17
+ module HasChildren
18
+ def has_children?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # First-class include directive node in the canonical document model.
6
+ #
7
+ # An include directive is a LINK from one document to another file.
8
+ # Parsing preserves these nodes verbatim — no file I/O happens during
9
+ # parse. The result is a text graph: a document referencing other
10
+ # documents via Include edges.
11
+ #
12
+ # Splicing the linked content inline is an explicit, separate step:
13
+ # +Coradoc.resolve_includes(doc, base_dir:)+ walks the tree and
14
+ # replaces each Include node with the parsed content of its target,
15
+ # recursing into the result.
16
+ #
17
+ # This separation lets callers:
18
+ # - inspect the graph before deciding to flatten
19
+ # - resolve with different base dirs / resolvers without re-parsing
20
+ # - treat includes as external links (e.g. when parsing a site)
21
+ #
22
+ # Attributes:
23
+ # target String path or URL as authored
24
+ # options IncludeOptions parsed selectors (tags/lines/leveloffset/indent/encoding)
25
+ # raw_options String original bracket body, preserved for verbatim round-trip
26
+ # line_break String trailing line break, default "\n"
27
+ #
28
+ # The node is block-level: it appears in the +content+ / +children+
29
+ # array of any block container (Document, Section, Paragraph, List
30
+ # item, Table cell, etc.) alongside other block-level nodes.
31
+ class Include < Base
32
+ attribute :target, :string
33
+ attribute :options, Coradoc::CoreModel::IncludeOptions,
34
+ default: -> { Coradoc::CoreModel::IncludeOptions.new }
35
+ attribute :raw_options, :string, default: -> { '' }
36
+ attribute :line_break, :string, default: -> { "\n" }
37
+
38
+ def self.semantic_type
39
+ :include
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # A leveloffset value parsed from an include directive.
6
+ #
7
+ # asciidoctor supports two forms:
8
+ # leveloffset=+N → relative shift (heading.level += N)
9
+ # leveloffset=-N → relative shift (heading.level -= N)
10
+ # leveloffset=N → absolute set (heading.level = N)
11
+ #
12
+ # The parsed form keeps the mode and delta separate so that the
13
+ # selector that applies the offset does not need to re-parse the
14
+ # string each time it walks a section (DRY).
15
+ class IncludeLevelOffset < Base
16
+ # "relative" (+N/-N) or "absolute" (bare N).
17
+ attribute :mode, :string
18
+
19
+ # Signed integer for relative shifts; the absolute target level
20
+ # for absolute mode.
21
+ attribute :delta, :integer
22
+
23
+ # Construct from a raw asciidoctor-style string ("+2", "-1", "3").
24
+ # Returns nil if the input is nil or unparsable.
25
+ #
26
+ # @param raw [String, nil]
27
+ # @return [IncludeLevelOffset, nil]
28
+ def self.parse(raw)
29
+ return nil if raw.nil? || raw.strip.empty?
30
+
31
+ matched_offset(raw.strip)
32
+ end
33
+
34
+ # Apply this offset to a heading level (1-indexed asciidoctor level).
35
+ #
36
+ # @param level [Integer] original section level
37
+ # @return [Integer] new section level, clamped to >= 0
38
+ def apply(level)
39
+ case mode
40
+ when 'relative' then [level + delta, 0].max
41
+ when 'absolute' then [delta, 0].max
42
+ else level
43
+ end
44
+ end
45
+
46
+ # Render back to the asciidoctor wire form ("+2", "-1", "3").
47
+ #
48
+ # @return [String]
49
+ def to_s
50
+ case mode
51
+ when 'relative' then format('%+d', delta)
52
+ when 'absolute' then delta.to_s
53
+ else ''
54
+ end
55
+ end
56
+
57
+ class << self
58
+ private
59
+
60
+ def matched_offset(trimmed)
61
+ %r{\A(?<sign>[+-]?)(?<digits>\d+)\z}.match(trimmed) do |m|
62
+ digits = m[:digits].to_i
63
+ signed = m[:sign] == '-' ? -digits : digits
64
+ mode = m[:sign].empty? ? 'absolute' : 'relative'
65
+ new(mode: mode, delta: signed)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module CoreModel
5
+ # Typed options for an include directive, parsed once at construction.
6
+ #
7
+ # Each asciidoctor selector (tags / lines / leveloffset / indent /
8
+ # encoding) gets one typed attribute. Selectors downstream operate on
9
+ # the typed form, never re-parsing the raw string (DRY).
10
+ #
11
+ # tags Array<String> ["body"], [] when unspecified
12
+ # tags_wildcard Boolean true for tags=*
13
+ # tags_inverted Boolean true for tags=**
14
+ # lines_spec String? raw "1..2;5;7..8" — parsed by Lines selector
15
+ # leveloffset IncludeLevelOffset?
16
+ # indent Integer? 0 = strip, N = re-indent, nil = passthrough
17
+ # file_encoding String? passed through to resolver for File.read
18
+ class IncludeOptions < Base
19
+ attribute :tags, :string, collection: true, default: -> { [] }
20
+ attribute :tags_wildcard, :boolean, default: -> { false }
21
+ attribute :tags_inverted, :boolean, default: -> { false }
22
+ attribute :lines_spec, :string
23
+ attribute :leveloffset, Coradoc::CoreModel::IncludeLevelOffset
24
+ attribute :indent, :integer
25
+ attribute :file_encoding, :string
26
+
27
+ # Whether the lines selector is in effect. Tags are ignored when
28
+ # lines is set — matches asciidoctor precedence (SPEC 3.5).
29
+ def lines?
30
+ !lines_spec.nil? && !lines_spec.strip.empty?
31
+ end
32
+
33
+ # Whether any tag selector is in effect.
34
+ def tags?
35
+ tags_wildcard || tags_inverted || !tags.empty?
36
+ end
37
+
38
+ # True when both selectors were specified (lines wins).
39
+ def conflict_resolved_to_lines?
40
+ lines? && tags?
41
+ end
42
+
43
+ # Build from a flat hash of asciidoctor-style key/value strings.
44
+ # Whitespace around keys and values is trimmed (SPEC 6.3).
45
+ #
46
+ # @param attrs [Hash{String=>String}] e.g. {"tags"=>"a;b", "leveloffset"=>"+2"}
47
+ # @return [IncludeOptions]
48
+ def self.from_hash(attrs)
49
+ new(build_args(attrs))
50
+ end
51
+
52
+ class << self
53
+ private
54
+
55
+ def build_args(attrs)
56
+ cleaned = clean_keys(attrs)
57
+ {
58
+ tags: parse_tags(cleaned['tags']),
59
+ tags_wildcard: wildcard?(cleaned['tags']),
60
+ tags_inverted: inverted?(cleaned['tags']),
61
+ lines_spec: cleaned['lines'],
62
+ leveloffset: CoreModel::IncludeLevelOffset.parse(cleaned['leveloffset']),
63
+ indent: parse_integer(cleaned['indent']),
64
+ file_encoding: cleaned['encoding']
65
+ }
66
+ end
67
+
68
+ def clean_keys(attrs)
69
+ attrs.each_with_object({}) do |(k, v), h|
70
+ h[k.to_s.strip] = v.to_s.strip
71
+ end
72
+ end
73
+
74
+ def parse_tags(raw)
75
+ return [] if raw.nil?
76
+ trimmed = raw.strip
77
+ return [] if trimmed.empty? || trimmed == '*' || trimmed == '**'
78
+
79
+ trimmed.split(';').map(&:strip).reject(&:empty?)
80
+ end
81
+
82
+ def wildcard?(raw)
83
+ !raw.nil? && raw.strip == '*'
84
+ end
85
+
86
+ def inverted?(raw)
87
+ !raw.nil? && raw.strip == '**'
88
+ end
89
+
90
+ def parse_integer(raw)
91
+ return nil if raw.nil? || raw.strip.empty?
92
+
93
+ Integer(raw.strip)
94
+ rescue ArgumentError
95
+ nil
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -11,6 +11,11 @@ module Coradoc
11
11
  # Structural elements can contain other elements (blocks, lists, etc.)
12
12
  # and can be nested hierarchically to represent document structure.
13
13
  class StructuralElement < Base
14
+ # StructuralElements carry typed block children (sections, paragraphs,
15
+ # etc.) rather than mixed inline content, so they don't include
16
+ # ChildrenContent. HasChildren marks the structural predicate that
17
+ # downstream traversal dispatches on (OCP).
18
+ include HasChildren
14
19
  # @!attribute level
15
20
  # @return [Integer, nil] hierarchical level (1-6 for sections)
16
21
  attribute :level, :integer
@@ -12,6 +12,7 @@ module Coradoc
12
12
  # Autoload submodules lazily using relative paths
13
13
  autoload :Base, "#{__dir__}/core_model/base"
14
14
  autoload :ChildrenContent, "#{__dir__}/core_model/children_content"
15
+ autoload :HasChildren, "#{__dir__}/core_model/has_children"
15
16
  autoload :Callout, "#{__dir__}/core_model/callout"
16
17
  autoload :CalloutText, "#{__dir__}/core_model/callout_text"
17
18
  autoload :Block, "#{__dir__}/core_model/block"
@@ -78,5 +79,8 @@ module Coradoc
78
79
  autoload :CommentLine, "#{__dir__}/core_model/comment_line"
79
80
  autoload :HorizontalRuleBlock, "#{__dir__}/core_model/horizontal_rule_block"
80
81
  autoload :IdGenerator, "#{__dir__}/core_model/id_generator"
82
+ autoload :Include, "#{__dir__}/core_model/include"
83
+ autoload :IncludeOptions, "#{__dir__}/core_model/include_options"
84
+ autoload :IncludeLevelOffset, "#{__dir__}/core_model/include_level_offset"
81
85
  end
82
86
  end
@@ -283,6 +283,62 @@ module Coradoc
283
283
  end
284
284
  end
285
285
 
286
+ # Error raised when an include directive's target cannot be located.
287
+ # Honors the +missing_include+ policy: +:error+ raises this; +:warn+,
288
+ # +:silent+, and +:passthrough+ swallow it.
289
+ class IncludeNotFoundError < Error
290
+ attr_reader :target
291
+
292
+ def initialize(target)
293
+ @target = target
294
+ super("Include target not found: #{target}")
295
+ end
296
+ end
297
+
298
+ # Error raised when an include chain exceeds the configured depth limit.
299
+ class IncludeDepthExceededError < Error
300
+ attr_reader :depth, :target
301
+
302
+ def initialize(target:, depth:, max:)
303
+ @target = target
304
+ @depth = depth
305
+ super("Include depth #{depth} exceeds max #{max} at #{target}")
306
+ end
307
+ end
308
+
309
+ # Error raised when a cycle is detected in the include graph.
310
+ # The chain is the list of files leading back to the repeated target.
311
+ class CircularIncludeError < Error
312
+ attr_reader :chain, :target
313
+
314
+ def initialize(target:, chain:)
315
+ @target = target
316
+ @chain = chain
317
+ super("Circular include detected: #{chain.join(' -> ')} -> #{target}")
318
+ end
319
+ end
320
+
321
+ # Error raised when an include target escapes the resolver's safe base
322
+ # directory and +allow_unsafe_includes+ is not set.
323
+ class UnsafeIncludeError < Error
324
+ attr_reader :target
325
+
326
+ def initialize(target)
327
+ @target = target
328
+ super("Unsafe include path blocked: #{target}")
329
+ end
330
+ end
331
+
332
+ # Error raised when an include target exceeds the resolver's size limit.
333
+ class IncludeTooLargeError < Error
334
+ attr_reader :target
335
+
336
+ def initialize(target)
337
+ @target = target
338
+ super("Include target too large to read: #{target}")
339
+ end
340
+ end
341
+
286
342
  # Error raised when a requested format is not supported
287
343
  #
288
344
  # @example
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
5
+ module Coradoc
6
+ class IncludeResolver
7
+ # Default include resolver: reads files from the local filesystem,
8
+ # rooted at +base_dir+. Path-traversal protection is ON by default
9
+ # to match asciidoctor's +:safe+ mode.
10
+ #
11
+ # Pass +allow_unsafe: true+ to opt out (matches +:unsafe+ mode).
12
+ class Filesystem < IncludeResolver
13
+ attr_reader :base_dir, :allow_unsafe, :max_bytes
14
+
15
+ # @param base_dir [String] absolute path to the directory includes
16
+ # are resolved against. Usually the directory of the including
17
+ # document. Relative paths inside the resolver are expanded
18
+ # against this.
19
+ # @param allow_unsafe [Boolean] when false (default), refuses any
20
+ # resolved path that escapes +base_dir+ via .. or that is an
21
+ # absolute path outside +base_dir+.
22
+ # @param max_bytes [Integer, nil] if set, refuses files larger
23
+ # than this. Defense against accidental megabyte-include loops.
24
+ def initialize(base_dir:, allow_unsafe: false, max_bytes: nil)
25
+ @base_dir = File.expand_path(base_dir)
26
+ @allow_unsafe = allow_unsafe
27
+ @max_bytes = max_bytes
28
+ end
29
+
30
+ def call(target:, base_dir:, options:, context:)
31
+ full = File.expand_path(target, base_dir)
32
+ enforce_safety!(full, base_dir) unless allow_unsafe
33
+ raise Coradoc::IncludeNotFoundError, target unless File.file?(full)
34
+
35
+ enforce_size!(full, target)
36
+
37
+ encoding = options&.file_encoding || 'utf-8'
38
+ read_with_encoding(full, encoding)
39
+ end
40
+
41
+ private
42
+
43
+ def enforce_safety!(full_path, base_dir)
44
+ base_expanded = File.expand_path(base_dir)
45
+ base_with_sep = "#{base_expanded}#{File::SEPARATOR}"
46
+
47
+ return if full_path == base_expanded || full_path.start_with?(base_with_sep)
48
+
49
+ raise Coradoc::UnsafeIncludeError, full_path
50
+ end
51
+
52
+ def enforce_size!(full_path, target)
53
+ return unless max_bytes
54
+ return unless File.exist?(full_path)
55
+
56
+ size = File.size(full_path)
57
+ return if size <= max_bytes
58
+
59
+ raise Coradoc::IncludeTooLargeError, target
60
+ end
61
+
62
+ def read_with_encoding(full_path, encoding_name)
63
+ content = File.binread(full_path)
64
+ return content if encoding_name.to_s.downcase == 'binary'
65
+
66
+ content.force_encoding(clean_encoding_name(encoding_name))
67
+ encoded = content.encode('utf-8', invalid: :replace, undef: :replace)
68
+ normalize_line_endings(encoded)
69
+ rescue ArgumentError => e
70
+ raise Coradoc::Error, "Unsupported encoding #{encoding_name.inspect}: #{e.message}"
71
+ end
72
+
73
+ # asciidoctor parity: normalize CRLF and lone CR to LF so the parser
74
+ # sees consistent line endings regardless of the source platform.
75
+ def normalize_line_endings(text)
76
+ text.gsub(/\r\n?/, "\n")
77
+ end
78
+
79
+ def clean_encoding_name(name)
80
+ name.to_s.downcase.sub(/^utf-8$/, 'utf-8')
81
+ end
82
+ end
83
+ end
84
+ end