dms-parser 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE-APACHE +15 -0
- data/LICENSE-MIT +21 -0
- data/README.md +166 -0
- data/bin/dms-encoder +234 -0
- data/lib/dms/emitter.rb +674 -0
- data/lib/dms/parser.rb +3007 -0
- data/lib/dms/tier1.rb +1750 -0
- data/lib/dms/types.rb +129 -0
- data/lib/dms.rb +161 -0
- metadata +56 -0
data/lib/dms/tier1.rb
ADDED
|
@@ -0,0 +1,1750 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# DMS Tier-1: decorators, dialect imports, and the tier-1 encoder.
|
|
4
|
+
#
|
|
5
|
+
# Entry points:
|
|
6
|
+
# Dms::Tier1.parse(src) -> DocumentT1
|
|
7
|
+
#
|
|
8
|
+
# The tier-1 parser wraps the existing tier-0 parser, intercepting
|
|
9
|
+
# decorator calls at leading/inner/trailing/flow-inner positions and
|
|
10
|
+
# collecting them into a sidecar.
|
|
11
|
+
|
|
12
|
+
require "json"
|
|
13
|
+
|
|
14
|
+
module Dms
|
|
15
|
+
module Tier1
|
|
16
|
+
# Reserved decorator sigil characters (tier-0 set, no underscore).
|
|
17
|
+
# ! @ $ % ^ & * | ~ ` . , > < ? ; =
|
|
18
|
+
RESERVED_SIGIL_CHARS = "!@$%^&*|~`.,><?;=".chars.to_set.freeze
|
|
19
|
+
|
|
20
|
+
# ── Reserved Emoji Set helpers (frozen Unicode 15.1) ────────────────────
|
|
21
|
+
|
|
22
|
+
# Extended_Pictographic=Yes ranges (frozen UCD 15.1), sourced from Rust ref.
|
|
23
|
+
EXTENDED_PICTOGRAPHIC_RANGES = [
|
|
24
|
+
[0x00A9, 0x00A9], [0x00AE, 0x00AE], [0x203C, 0x203C], [0x2049, 0x2049],
|
|
25
|
+
[0x2122, 0x2122], [0x2139, 0x2139], [0x2194, 0x2199], [0x21A9, 0x21AA],
|
|
26
|
+
[0x231A, 0x231B], [0x2328, 0x2328], [0x2388, 0x2388], [0x23CF, 0x23CF],
|
|
27
|
+
[0x23E9, 0x23F3], [0x23F8, 0x23FA], [0x24C2, 0x24C2], [0x25AA, 0x25AB],
|
|
28
|
+
[0x25B6, 0x25B6], [0x25C0, 0x25C0], [0x25FB, 0x25FE], [0x2600, 0x2605],
|
|
29
|
+
[0x2607, 0x2612], [0x2614, 0x2685], [0x2690, 0x2705], [0x2708, 0x2712],
|
|
30
|
+
[0x2714, 0x2714], [0x2716, 0x2716], [0x271D, 0x271D], [0x2721, 0x2721],
|
|
31
|
+
[0x2728, 0x2728], [0x2733, 0x2734], [0x2744, 0x2744], [0x2747, 0x2747],
|
|
32
|
+
[0x274C, 0x274C], [0x274E, 0x274E], [0x2753, 0x2755], [0x2757, 0x2757],
|
|
33
|
+
[0x2763, 0x2767], [0x2795, 0x2797], [0x27A1, 0x27A1], [0x27B0, 0x27B0],
|
|
34
|
+
[0x27BF, 0x27BF], [0x2934, 0x2935], [0x2B05, 0x2B07], [0x2B1B, 0x2B1C],
|
|
35
|
+
[0x2B50, 0x2B50], [0x2B55, 0x2B55], [0x3030, 0x3030], [0x303D, 0x303D],
|
|
36
|
+
[0x3297, 0x3297], [0x3299, 0x3299], [0x1F000, 0x1F0FF], [0x1F10D, 0x1F10F],
|
|
37
|
+
[0x1F12F, 0x1F12F], [0x1F16C, 0x1F171], [0x1F17E, 0x1F17F], [0x1F18E, 0x1F18E],
|
|
38
|
+
[0x1F191, 0x1F19A], [0x1F1AD, 0x1F1E5], [0x1F201, 0x1F20F], [0x1F21A, 0x1F21A],
|
|
39
|
+
[0x1F22F, 0x1F22F], [0x1F232, 0x1F23A], [0x1F23C, 0x1F23F], [0x1F249, 0x1F3FA],
|
|
40
|
+
[0x1F400, 0x1F53D], [0x1F546, 0x1F64F], [0x1F680, 0x1F6FF], [0x1F774, 0x1F77F],
|
|
41
|
+
[0x1F7D5, 0x1F7FF], [0x1F80C, 0x1F80F], [0x1F848, 0x1F84F], [0x1F85A, 0x1F85F],
|
|
42
|
+
[0x1F888, 0x1F88F], [0x1F8AE, 0x1F8FF], [0x1F90C, 0x1F93A], [0x1F93C, 0x1F945],
|
|
43
|
+
[0x1F947, 0x1FAFF], [0x1FC00, 0x1FFFD],
|
|
44
|
+
].freeze
|
|
45
|
+
|
|
46
|
+
def self.extended_pictographic?(cp)
|
|
47
|
+
return false if cp < 0xA9
|
|
48
|
+
lo = 0
|
|
49
|
+
hi = EXTENDED_PICTOGRAPHIC_RANGES.length - 1
|
|
50
|
+
while lo <= hi
|
|
51
|
+
mid = (lo + hi) / 2
|
|
52
|
+
range_lo, range_hi = EXTENDED_PICTOGRAPHIC_RANGES[mid]
|
|
53
|
+
if cp < range_lo
|
|
54
|
+
hi = mid - 1
|
|
55
|
+
elsif cp > range_hi
|
|
56
|
+
lo = mid + 1
|
|
57
|
+
else
|
|
58
|
+
return true
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
false
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.reserved_emoji_codepoint?(cp)
|
|
65
|
+
return true if cp >= 0x1F1E6 && cp <= 0x1F1FF # regional indicator
|
|
66
|
+
return true if cp >= 0x1F3FB && cp <= 0x1F3FF # skin-tone modifier
|
|
67
|
+
return true if cp == 0x20E3 # keycap combiner
|
|
68
|
+
extended_pictographic?(cp)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def self.regional_indicator?(cp)
|
|
72
|
+
cp >= 0x1F1E6 && cp <= 0x1F1FF
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.emoji_modifier?(cp)
|
|
76
|
+
cp >= 0x1F3FB && cp <= 0x1F3FF
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Read one extended grapheme cluster of reserved-emoji shape starting
|
|
80
|
+
# at byte offset `start` in `s` (UTF-8 string). Returns the exclusive
|
|
81
|
+
# end byte offset, or nil if no emoji cluster starts here.
|
|
82
|
+
# Algorithm mirrors Rust read_reserved_emoji_atom exactly.
|
|
83
|
+
def self.read_reserved_emoji_atom(s, start)
|
|
84
|
+
return nil if start >= s.bytesize
|
|
85
|
+
# Decode first codepoint
|
|
86
|
+
sub = s.byteslice(start, s.bytesize - start)
|
|
87
|
+
return nil if sub.nil? || sub.empty?
|
|
88
|
+
sub = sub.force_encoding("UTF-8")
|
|
89
|
+
c0 = sub[0]
|
|
90
|
+
return nil if c0.nil?
|
|
91
|
+
cp0 = c0.ord
|
|
92
|
+
return nil unless reserved_emoji_codepoint?(cp0)
|
|
93
|
+
len0 = c0.bytesize
|
|
94
|
+
end_pos = start + len0
|
|
95
|
+
|
|
96
|
+
# Regional-indicator pair (GB12/GB13)
|
|
97
|
+
if regional_indicator?(cp0)
|
|
98
|
+
rest = s.byteslice(end_pos, s.bytesize - end_pos)
|
|
99
|
+
if rest && !rest.empty?
|
|
100
|
+
rest = rest.force_encoding("UTF-8")
|
|
101
|
+
c1 = rest[0]
|
|
102
|
+
if c1 && regional_indicator?(c1.ord)
|
|
103
|
+
end_pos += c1.bytesize
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
return end_pos
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# GB9/GB9a/GB11 loop
|
|
110
|
+
loop do
|
|
111
|
+
rest = s.byteslice(end_pos, s.bytesize - end_pos)
|
|
112
|
+
break if rest.nil? || rest.empty?
|
|
113
|
+
rest = rest.force_encoding("UTF-8")
|
|
114
|
+
c = rest[0]
|
|
115
|
+
break if c.nil?
|
|
116
|
+
cp = c.ord
|
|
117
|
+
if emoji_modifier?(cp) || cp == 0xFE0F || cp == 0x20E3
|
|
118
|
+
# GB9/GB9a - Extend or SpacingMark
|
|
119
|
+
end_pos += c.bytesize
|
|
120
|
+
next
|
|
121
|
+
end
|
|
122
|
+
if cp == 0x200D
|
|
123
|
+
# GB11 - ZWJ x Extended_Pictographic
|
|
124
|
+
after_zwj = end_pos + c.bytesize
|
|
125
|
+
after = s.byteslice(after_zwj, s.bytesize - after_zwj)
|
|
126
|
+
if after && !after.empty?
|
|
127
|
+
after = after.force_encoding("UTF-8")
|
|
128
|
+
nc = after[0]
|
|
129
|
+
if nc && extended_pictographic?(nc.ord)
|
|
130
|
+
end_pos = after_zwj + nc.bytesize
|
|
131
|
+
next
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
# ZWJ not followed by E_P: cluster ends before ZWJ
|
|
135
|
+
break
|
|
136
|
+
end
|
|
137
|
+
break
|
|
138
|
+
end
|
|
139
|
+
end_pos
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Returns true if the character at byte offset `pos` in UTF-8 string `s`
|
|
143
|
+
# starts a sigil atom (ASCII reserved char OR reserved emoji codepoint).
|
|
144
|
+
def self.sigil_atom_start_at?(s, pos)
|
|
145
|
+
return false if pos >= s.bytesize
|
|
146
|
+
b = s.getbyte(pos)
|
|
147
|
+
return false if b.nil?
|
|
148
|
+
# ASCII reserved sigil
|
|
149
|
+
return true if RESERVED_SIGIL_CHARS.include?(b.chr)
|
|
150
|
+
# Multi-byte: check if it's a reserved emoji codepoint
|
|
151
|
+
return false if b < 0x80
|
|
152
|
+
sub = s.byteslice(pos, s.bytesize - pos)
|
|
153
|
+
return false if sub.nil? || sub.empty?
|
|
154
|
+
sub = sub.force_encoding("UTF-8")
|
|
155
|
+
c = sub[0]
|
|
156
|
+
return false if c.nil?
|
|
157
|
+
reserved_emoji_codepoint?(c.ord)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Lex one sigil atom at byte offset `pos` in UTF-8 string `s`.
|
|
161
|
+
# Returns byte-length of the atom (1 for ASCII, cluster len for emoji),
|
|
162
|
+
# or nil if no sigil atom here.
|
|
163
|
+
def self.lex_sigil_atom_at(s, pos)
|
|
164
|
+
return nil if pos >= s.bytesize
|
|
165
|
+
b = s.getbyte(pos)
|
|
166
|
+
return nil if b.nil?
|
|
167
|
+
# ASCII reserved sigil char
|
|
168
|
+
if b < 0x80
|
|
169
|
+
return RESERVED_SIGIL_CHARS.include?(b.chr) ? 1 : nil
|
|
170
|
+
end
|
|
171
|
+
# Multi-byte: try emoji cluster
|
|
172
|
+
end_pos = read_reserved_emoji_atom(s, pos)
|
|
173
|
+
return nil if end_pos.nil?
|
|
174
|
+
end_pos - pos
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Validate that a sigil string consists of valid sigil atoms only.
|
|
178
|
+
# Returns nil on success, error message string on failure.
|
|
179
|
+
def self.validate_sigil_atoms(sigil, idx)
|
|
180
|
+
s = sigil.encode("UTF-8") rescue sigil
|
|
181
|
+
pos = 0
|
|
182
|
+
while pos < s.bytesize
|
|
183
|
+
atom_len = lex_sigil_atom_at(s, pos)
|
|
184
|
+
if atom_len.nil?
|
|
185
|
+
# Decode the char at pos for the error message
|
|
186
|
+
sub = s.byteslice(pos, s.bytesize - pos).force_encoding("UTF-8")
|
|
187
|
+
c = sub[0] || "?"
|
|
188
|
+
if c == "_"
|
|
189
|
+
return "_dms_imports[#{idx}].bind key \"#{sigil}\" (or containing '_') " \
|
|
190
|
+
"is invalid: underscore is not in the tier-0 reserved decorator sigil set"
|
|
191
|
+
end
|
|
192
|
+
return "_dms_imports[#{idx}].bind key \"#{sigil}\" contains '#{c}' " \
|
|
193
|
+
"which is not in the tier-0 reserved decorator sigil set " \
|
|
194
|
+
"nor in the Reserved Emoji Set"
|
|
195
|
+
end
|
|
196
|
+
pos += atom_len
|
|
197
|
+
end
|
|
198
|
+
nil
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# ── Import types ────────────────────────────────────────────────────────
|
|
202
|
+
|
|
203
|
+
ImportSpec = Struct.new(:dialect, :version, :ns, :bind, :allow, :deny, :alias_map,
|
|
204
|
+
keyword_init: true)
|
|
205
|
+
|
|
206
|
+
# ── Sidecar types ───────────────────────────────────────────────────────
|
|
207
|
+
|
|
208
|
+
# path: Array of {key: String} or {index: Integer} hashes (JSON-ready)
|
|
209
|
+
# calls: Hash sigil => Array of DecoratorCall
|
|
210
|
+
# comments: Array (always empty in this implementation)
|
|
211
|
+
DecoratorEntry = Struct.new(:path, :calls, :comments)
|
|
212
|
+
|
|
213
|
+
# family: String (canonical family name, "" if unresolved)
|
|
214
|
+
# fn_name: String (post-alias-resolution function name)
|
|
215
|
+
# ns: String or nil
|
|
216
|
+
# position: :leading, :inner, :trailing, :floating
|
|
217
|
+
# params: Array of ParamGroup
|
|
218
|
+
# params_dec: Array (always empty in this implementation)
|
|
219
|
+
# sigil: String (the literal sigil string)
|
|
220
|
+
DecoratorCall = Struct.new(:sigil, :family, :fn_name, :ns, :position, :params, :params_dec)
|
|
221
|
+
|
|
222
|
+
# kind: :named or :positional
|
|
223
|
+
# value: Hash (for named) or Array (for positional)
|
|
224
|
+
ParamGroup = Struct.new(:kind, :value)
|
|
225
|
+
|
|
226
|
+
# ── Tier-1 document ─────────────────────────────────────────────────────
|
|
227
|
+
|
|
228
|
+
class DocumentT1
|
|
229
|
+
attr_reader :t0, :decorators, :imports, :observed_tier
|
|
230
|
+
|
|
231
|
+
def initialize(t0, decorators, imports, observed_tier)
|
|
232
|
+
@t0 = t0
|
|
233
|
+
@decorators = decorators
|
|
234
|
+
@imports = imports # Array of ImportSpec
|
|
235
|
+
@observed_tier = observed_tier
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# ── Semver helpers ──────────────────────────────────────────────────────
|
|
240
|
+
|
|
241
|
+
RANGE_SPECIFIER_PREFIXES = %w[^ ~ >= > < <= =].freeze
|
|
242
|
+
|
|
243
|
+
def self.valid_semver?(s)
|
|
244
|
+
# Drop build metadata
|
|
245
|
+
s = s.split("+", 2).first
|
|
246
|
+
# Split pre-release
|
|
247
|
+
core_str, pre_str = s.split("-", 2)
|
|
248
|
+
parts = core_str.split(".", -1)
|
|
249
|
+
return false unless parts.length == 3
|
|
250
|
+
parts.each do |p|
|
|
251
|
+
return false if p.empty?
|
|
252
|
+
return false if p.length > 1 && p.start_with?("0")
|
|
253
|
+
return false unless p.match?(/\A\d+\z/)
|
|
254
|
+
end
|
|
255
|
+
if pre_str
|
|
256
|
+
return false if pre_str.empty?
|
|
257
|
+
pre_str.split(".").each do |id|
|
|
258
|
+
return false if id.empty?
|
|
259
|
+
if id.match?(/\A\d+\z/)
|
|
260
|
+
return false if id.length > 1 && id.start_with?("0")
|
|
261
|
+
else
|
|
262
|
+
return false unless id.match?(/\A[A-Za-z0-9\-]+\z/)
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
true
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def self.has_range_specifier?(s)
|
|
270
|
+
trimmed = s.lstrip
|
|
271
|
+
RANGE_SPECIFIER_PREFIXES.any? { |p| trimmed.start_with?(p) }
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# ── Import extraction and validation ────────────────────────────────────
|
|
275
|
+
|
|
276
|
+
def self.extract_imports(meta)
|
|
277
|
+
raw_list = meta["_dms_imports"]
|
|
278
|
+
return [] if raw_list.nil?
|
|
279
|
+
|
|
280
|
+
unless raw_list.is_a?(Array)
|
|
281
|
+
raise DecodeError.new(0, 0, "_dms_imports must be a list")
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
specs = []
|
|
285
|
+
# Seen (sigil, ns_repr, family) triples for collision detection
|
|
286
|
+
seen_triples = {}
|
|
287
|
+
|
|
288
|
+
raw_list.each_with_index do |entry, idx|
|
|
289
|
+
unless entry.is_a?(Hash)
|
|
290
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}] must be a table")
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# dialect (required string)
|
|
294
|
+
dialect = entry["dialect"]
|
|
295
|
+
case dialect
|
|
296
|
+
when nil
|
|
297
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}] is missing required field 'dialect'")
|
|
298
|
+
when String
|
|
299
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].dialect must be a non-empty string") if dialect.empty?
|
|
300
|
+
else
|
|
301
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].dialect must be a string")
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# version (required string, semver, no range specifiers)
|
|
305
|
+
version = entry["version"]
|
|
306
|
+
case version
|
|
307
|
+
when nil
|
|
308
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}] is missing required field 'version'")
|
|
309
|
+
when String
|
|
310
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].version must be a non-empty string") if version.empty?
|
|
311
|
+
if has_range_specifier?(version)
|
|
312
|
+
raise DecodeError.new(0, 0,
|
|
313
|
+
"range-specifier syntax in version not supported " \
|
|
314
|
+
"(_dms_imports[#{idx}].version \"#{version}\"): write a plain semver string")
|
|
315
|
+
end
|
|
316
|
+
unless valid_semver?(version)
|
|
317
|
+
raise DecodeError.new(0, 0,
|
|
318
|
+
"_dms_imports[#{idx}].version \"#{version}\" is not a valid semver string " \
|
|
319
|
+
"(expected MAJOR.MINOR.PATCH with optional -pre and +build)")
|
|
320
|
+
end
|
|
321
|
+
else
|
|
322
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].version must be a string")
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# ns (optional string)
|
|
326
|
+
ns_val = entry["ns"]
|
|
327
|
+
ns = case ns_val
|
|
328
|
+
when nil then nil
|
|
329
|
+
when String
|
|
330
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].ns must be a non-empty string when present") if ns_val.empty?
|
|
331
|
+
ns_val
|
|
332
|
+
else
|
|
333
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].ns must be a string")
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# bind (optional table: sigil → list of family names)
|
|
337
|
+
bind = {}
|
|
338
|
+
if (bind_val = entry["bind"])
|
|
339
|
+
unless bind_val.is_a?(Hash)
|
|
340
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].bind must be a table")
|
|
341
|
+
end
|
|
342
|
+
bind_val.each do |sigil, families_val|
|
|
343
|
+
if sigil.empty?
|
|
344
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].bind has an empty sigil key")
|
|
345
|
+
end
|
|
346
|
+
err = Tier1.validate_sigil_atoms(sigil, idx)
|
|
347
|
+
raise DecodeError.new(0, 0, err) if err
|
|
348
|
+
unless families_val.is_a?(Array)
|
|
349
|
+
raise DecodeError.new(0, 0,
|
|
350
|
+
"_dms_imports[#{idx}].bind[\"#{sigil}\"] must be a list " \
|
|
351
|
+
"(use list form even for a single family)")
|
|
352
|
+
end
|
|
353
|
+
names = families_val.map do |item|
|
|
354
|
+
unless item.is_a?(String)
|
|
355
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].bind[\"#{sigil}\"] must be a list of strings")
|
|
356
|
+
end
|
|
357
|
+
item
|
|
358
|
+
end
|
|
359
|
+
bind[sigil] = names
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# allow (optional table: family → list of names)
|
|
364
|
+
allow_map = {}
|
|
365
|
+
if (allow_val = entry["allow"])
|
|
366
|
+
unless allow_val.is_a?(Hash)
|
|
367
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].allow must be a table")
|
|
368
|
+
end
|
|
369
|
+
allow_val.each do |family, names_val|
|
|
370
|
+
allow_map[family] = extract_string_list(names_val, idx, "allow", family)
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# deny (optional table: family → list of names)
|
|
375
|
+
deny_map = {}
|
|
376
|
+
if (deny_val = entry["deny"])
|
|
377
|
+
unless deny_val.is_a?(Hash)
|
|
378
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].deny must be a table")
|
|
379
|
+
end
|
|
380
|
+
deny_val.each do |family, names_val|
|
|
381
|
+
deny_map[family] = extract_string_list(names_val, idx, "deny", family)
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# allow/deny mutual exclusion
|
|
386
|
+
allow_map.each_key do |family|
|
|
387
|
+
if deny_map.key?(family)
|
|
388
|
+
raise DecodeError.new(0, 0,
|
|
389
|
+
"_dms_imports[#{idx}]: family \"#{family}\" appears in both " \
|
|
390
|
+
"'allow' and 'deny' — they are mutually exclusive for the same family")
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
# alias (optional table: family → table: alias → canonical)
|
|
395
|
+
alias_map = {}
|
|
396
|
+
if (alias_val = entry["alias"])
|
|
397
|
+
unless alias_val.is_a?(Hash)
|
|
398
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].alias must be a table")
|
|
399
|
+
end
|
|
400
|
+
alias_val.each do |family, inner_val|
|
|
401
|
+
unless inner_val.is_a?(Hash)
|
|
402
|
+
raise DecodeError.new(0, 0,
|
|
403
|
+
"_dms_imports[#{idx}].alias[\"#{family}\"] must be a table (alias → canonical)")
|
|
404
|
+
end
|
|
405
|
+
inner_map = {}
|
|
406
|
+
inner_val.each do |alias_name, canonical_val|
|
|
407
|
+
unless canonical_val.is_a?(String)
|
|
408
|
+
raise DecodeError.new(0, 0,
|
|
409
|
+
"_dms_imports[#{idx}].alias[\"#{family}\"][\"#{alias_name}\"] must be a string")
|
|
410
|
+
end
|
|
411
|
+
inner_map[alias_name] = canonical_val
|
|
412
|
+
end
|
|
413
|
+
alias_map[family] = inner_map
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Cross-import collision detection
|
|
418
|
+
ns_repr = ns || "<unset>"
|
|
419
|
+
bind.each do |sigil, families|
|
|
420
|
+
families.each do |family|
|
|
421
|
+
triple_key = "#{sigil}|#{ns_repr}|#{family}"
|
|
422
|
+
if (prev_idx = seen_triples[triple_key])
|
|
423
|
+
prev = specs[prev_idx]
|
|
424
|
+
raise DecodeError.new(0, 0,
|
|
425
|
+
"Decorator binding collision on (sigil='#{sigil}', ns=#{ns_repr}, " \
|
|
426
|
+
"family='#{family}'): " \
|
|
427
|
+
"import ##{prev_idx} dialect '#{prev.dialect}' v#{prev.version} and " \
|
|
428
|
+
"import ##{idx} dialect '#{dialect}' v#{version} both bind " \
|
|
429
|
+
"'#{sigil}' → '#{family}'. Resolve by remapping one.")
|
|
430
|
+
end
|
|
431
|
+
seen_triples[triple_key] = idx
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
specs << ImportSpec.new(
|
|
436
|
+
dialect: dialect,
|
|
437
|
+
version: version,
|
|
438
|
+
ns: ns,
|
|
439
|
+
bind: bind,
|
|
440
|
+
allow: allow_map,
|
|
441
|
+
deny: deny_map,
|
|
442
|
+
alias_map: alias_map
|
|
443
|
+
)
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
specs
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def self.extract_string_list(val, idx, field, family)
|
|
450
|
+
unless val.is_a?(Array)
|
|
451
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].#{field}[\"#{family}\"] must be a list")
|
|
452
|
+
end
|
|
453
|
+
val.map do |item|
|
|
454
|
+
unless item.is_a?(String)
|
|
455
|
+
raise DecodeError.new(0, 0, "_dms_imports[#{idx}].#{field}[\"#{family}\"] must be a list of strings")
|
|
456
|
+
end
|
|
457
|
+
item
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
# ── Family resolution ────────────────────────────────────────────────────
|
|
462
|
+
|
|
463
|
+
# Resolve a decorator call's family from imports.
|
|
464
|
+
# Returns [family_name, canonical_fn_name] or raises.
|
|
465
|
+
# Also applies deny-list check.
|
|
466
|
+
def self.resolve_family(sigil, fn_name, ns, imports)
|
|
467
|
+
# Filter imports by ns if specified
|
|
468
|
+
candidate_imports = if ns
|
|
469
|
+
filtered = imports.select { |imp| imp.ns == ns }
|
|
470
|
+
if filtered.empty?
|
|
471
|
+
raise DecodeError.new(0, 0, "unknown namespace '#{ns}'")
|
|
472
|
+
end
|
|
473
|
+
filtered
|
|
474
|
+
else
|
|
475
|
+
imports
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
# For each import, check families bound to this sigil
|
|
479
|
+
# Apply aliases and allow/deny rules
|
|
480
|
+
accepted = [] # [family_name, canonical_fn_name]
|
|
481
|
+
|
|
482
|
+
candidate_imports.each do |imp|
|
|
483
|
+
families = imp.bind[sigil]
|
|
484
|
+
next unless families
|
|
485
|
+
|
|
486
|
+
families.each do |family|
|
|
487
|
+
# Apply alias: fn_name might be an alias
|
|
488
|
+
canonical =
|
|
489
|
+
if (family_aliases = imp.alias_map[family])
|
|
490
|
+
family_aliases[fn_name] || fn_name
|
|
491
|
+
else
|
|
492
|
+
fn_name
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
# Apply deny list
|
|
496
|
+
if (deny_list = imp.deny[family])
|
|
497
|
+
next if deny_list.include?(canonical)
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
# Apply allow list
|
|
501
|
+
if (allow_list = imp.allow[family])
|
|
502
|
+
next unless allow_list.include?(canonical)
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
accepted << [family, canonical]
|
|
506
|
+
end
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
if accepted.empty?
|
|
510
|
+
# Check if sigil is bound at all in any import
|
|
511
|
+
sigil_bound = candidate_imports.any? { |imp| imp.bind.key?(sigil) }
|
|
512
|
+
unless sigil_bound
|
|
513
|
+
raise DecodeError.new(0, 0,
|
|
514
|
+
"name '#{fn_name}' not found in any family bound to sigil '#{sigil}'")
|
|
515
|
+
end
|
|
516
|
+
# Sigil IS bound but fn_name was filtered out (denied or not allowed).
|
|
517
|
+
# Check deny against all imports for the deny_rejected test.
|
|
518
|
+
candidate_imports.each do |imp|
|
|
519
|
+
families = imp.bind[sigil]
|
|
520
|
+
next unless families
|
|
521
|
+
families.each do |family|
|
|
522
|
+
canonical = fn_name
|
|
523
|
+
if (family_aliases = imp.alias_map[family])
|
|
524
|
+
canonical = family_aliases[fn_name] || fn_name
|
|
525
|
+
end
|
|
526
|
+
if (deny_list = imp.deny[family])
|
|
527
|
+
if deny_list.include?(canonical)
|
|
528
|
+
raise DecodeError.new(0, 0,
|
|
529
|
+
"decorator '#{fn_name}' is denied by family '#{family}' deny list")
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
end
|
|
534
|
+
# Filtered by allow list — also an error
|
|
535
|
+
raise DecodeError.new(0, 0,
|
|
536
|
+
"name '#{fn_name}' not found in any family bound to sigil '#{sigil}'")
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
# Use first accepted
|
|
540
|
+
accepted.first
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
# ── Tier-1 parser ────────────────────────────────────────────────────────
|
|
544
|
+
|
|
545
|
+
# Parse a DMS source string in tier-1 mode.
|
|
546
|
+
# Returns DocumentT1.
|
|
547
|
+
def self.parse(src)
|
|
548
|
+
T1Parser.new(src).parse
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
class T1Parser
|
|
552
|
+
# Byte constants (reused from Parser)
|
|
553
|
+
SP = 0x20
|
|
554
|
+
TAB = 0x09
|
|
555
|
+
LF = 0x0A
|
|
556
|
+
CR = 0x0D
|
|
557
|
+
LBRACK = 0x5B
|
|
558
|
+
RBRACK = 0x5D
|
|
559
|
+
LBRACE = 0x7B
|
|
560
|
+
RBRACE = 0x7D
|
|
561
|
+
LPAREN = 0x28 # '('
|
|
562
|
+
RPAREN = 0x29 # ')'
|
|
563
|
+
COMMA = 0x2C
|
|
564
|
+
DOT = 0x2E
|
|
565
|
+
PLUS = 0x2B
|
|
566
|
+
COLON = 0x3A
|
|
567
|
+
DQUOTE = 0x22
|
|
568
|
+
SQUOTE = 0x27
|
|
569
|
+
UNDERSCORE = 0x5F
|
|
570
|
+
DIGIT0 = 0x30
|
|
571
|
+
DIGIT9 = 0x39
|
|
572
|
+
LOWER_A = 0x61
|
|
573
|
+
LOWER_Z = 0x7A
|
|
574
|
+
UPPER_A = 0x41
|
|
575
|
+
UPPER_Z = 0x5A
|
|
576
|
+
MINUS = 0x2D
|
|
577
|
+
HASH = 0x23
|
|
578
|
+
|
|
579
|
+
BARE_KEY_BYTE = Array.new(256, false)
|
|
580
|
+
(DIGIT0..DIGIT9).each { |b| BARE_KEY_BYTE[b] = true }
|
|
581
|
+
(LOWER_A..LOWER_Z).each { |b| BARE_KEY_BYTE[b] = true }
|
|
582
|
+
(UPPER_A..UPPER_Z).each { |b| BARE_KEY_BYTE[b] = true }
|
|
583
|
+
BARE_KEY_BYTE[UNDERSCORE] = true
|
|
584
|
+
BARE_KEY_BYTE[MINUS] = true
|
|
585
|
+
BARE_KEY_BYTE.freeze
|
|
586
|
+
|
|
587
|
+
RESERVED_SIGIL_BYTE = Array.new(256, false)
|
|
588
|
+
"!@$%^&*|~`.,><?;=".each_byte { |b| RESERVED_SIGIL_BYTE[b] = true }
|
|
589
|
+
RESERVED_SIGIL_BYTE.freeze
|
|
590
|
+
|
|
591
|
+
# True if the position `pos` in @src starts a sigil atom:
|
|
592
|
+
# either an ASCII reserved sigil char or a reserved-emoji codepoint.
|
|
593
|
+
def sigil_at?(pos)
|
|
594
|
+
return false if pos >= @len
|
|
595
|
+
b = @src.getbyte(pos)
|
|
596
|
+
return false if b.nil?
|
|
597
|
+
return true if RESERVED_SIGIL_BYTE[b]
|
|
598
|
+
return false if b < 0x80
|
|
599
|
+
Tier1.sigil_atom_start_at?(@src, pos)
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
def initialize(src)
|
|
603
|
+
@src_original = src
|
|
604
|
+
@decorators = [] # Array of DecoratorEntry (sidecar)
|
|
605
|
+
@imports = []
|
|
606
|
+
@observed_tier = 0
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
def parse
|
|
610
|
+
# Step 1: Pre-scan to detect tier without triggering the tier-1 rejection.
|
|
611
|
+
tier = detect_tier(@src_original)
|
|
612
|
+
|
|
613
|
+
# If tier-0 input, wrap as tier-0 result
|
|
614
|
+
if tier == 0
|
|
615
|
+
doc = Dms::Parser.parse_document(@src_original)
|
|
616
|
+
return DocumentT1.new(doc, [], [], 0)
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
@observed_tier = tier
|
|
620
|
+
|
|
621
|
+
# Tier-1 parse: handle _dms_tier: 1 in front matter and parse decorators.
|
|
622
|
+
parse_tier1_document
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
# Quick scan for _dms_tier value without full parse.
|
|
626
|
+
def detect_tier(src)
|
|
627
|
+
# Look for _dms_tier in front matter using a regex-like scan.
|
|
628
|
+
# Only look inside the +++ ... +++ block.
|
|
629
|
+
s = src.dup.force_encoding("UTF-8")
|
|
630
|
+
m = s.match(/\A\s*\+\+\+[^\n]*\n(.*?)\n\+\+\+/m)
|
|
631
|
+
return 0 unless m
|
|
632
|
+
fm_content = m[1]
|
|
633
|
+
m2 = fm_content.match(/^_dms_tier\s*:\s*(\d+)/)
|
|
634
|
+
return 0 unless m2
|
|
635
|
+
m2[1].to_i
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
private
|
|
639
|
+
|
|
640
|
+
def parse_tier1_document
|
|
641
|
+
src = @src_original.dup
|
|
642
|
+
if src.encoding == Encoding::ASCII_8BIT || src.encoding == Encoding::BINARY
|
|
643
|
+
src.force_encoding("UTF-8")
|
|
644
|
+
elsif src.encoding != Encoding::UTF_8
|
|
645
|
+
src = src.encode("UTF-8")
|
|
646
|
+
end
|
|
647
|
+
src = src.unicode_normalize(:nfc) unless src.ascii_only?
|
|
648
|
+
|
|
649
|
+
@src = src
|
|
650
|
+
@len = src.bytesize
|
|
651
|
+
@pos = 0
|
|
652
|
+
@line = 1
|
|
653
|
+
@line_start = 0
|
|
654
|
+
|
|
655
|
+
# Parse front matter using the existing parser
|
|
656
|
+
# We need a "tier-1-accepting" parse. We'll re-parse using our own
|
|
657
|
+
# front matter reader that accepts tier=1.
|
|
658
|
+
meta_raw, meta_hash = parse_front_matter_accepting_t1
|
|
659
|
+
@imports = Tier1.extract_imports(meta_hash)
|
|
660
|
+
|
|
661
|
+
# Parse body with decorator awareness
|
|
662
|
+
body, decorators = parse_t1_body
|
|
663
|
+
|
|
664
|
+
# Build a tier-0 Document meta (only non-_ keys, nil when none)
|
|
665
|
+
meta_for_doc = {}
|
|
666
|
+
meta_hash.each do |k, v|
|
|
667
|
+
meta_for_doc[k] = v unless k.start_with?("_")
|
|
668
|
+
end
|
|
669
|
+
meta_final = meta_for_doc.empty? ? nil : meta_for_doc
|
|
670
|
+
|
|
671
|
+
doc = Document.new(meta_final, body, [], [])
|
|
672
|
+
DocumentT1.new(doc, decorators, @imports, 1)
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
def parse_front_matter_accepting_t1
|
|
676
|
+
skip_trivia
|
|
677
|
+
# No front matter
|
|
678
|
+
unless starts_bytes?("+++")
|
|
679
|
+
return [nil, {}]
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
opener_line = @line
|
|
683
|
+
@pos += 3
|
|
684
|
+
skip_inline_ws
|
|
685
|
+
unless consume_eol || eof?
|
|
686
|
+
raise DecodeError.new(@line, col, "front matter opener must be on its own line")
|
|
687
|
+
end
|
|
688
|
+
|
|
689
|
+
inner_buf = +""
|
|
690
|
+
inner_buf.force_encoding("UTF-8")
|
|
691
|
+
loop do
|
|
692
|
+
if eof?
|
|
693
|
+
raise DecodeError.new(opener_line, 1, "unterminated front matter: missing closing '+++'")
|
|
694
|
+
end
|
|
695
|
+
line_begin = @pos
|
|
696
|
+
while @pos < @len
|
|
697
|
+
b = @src.getbyte(@pos)
|
|
698
|
+
break if b == LF || b == CR
|
|
699
|
+
@pos += 1
|
|
700
|
+
end
|
|
701
|
+
line_text = @src.byteslice(line_begin, @pos - line_begin).force_encoding("UTF-8")
|
|
702
|
+
if line_text.strip == "+++"
|
|
703
|
+
consume_eol
|
|
704
|
+
break
|
|
705
|
+
end
|
|
706
|
+
inner_buf << line_text
|
|
707
|
+
inner_buf << "\n" if consume_eol
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
# Parse inner_buf using tier-0 parser but accepting _dms_tier: 1 and _dms_imports
|
|
711
|
+
sub_parser = Dms::Parser.new(inner_buf, lite: true)
|
|
712
|
+
table = sub_parser.parse_body_as_table
|
|
713
|
+
|
|
714
|
+
# Validate the meta keys: allow _dms_tier (1) and _dms_imports
|
|
715
|
+
meta_hash = {}
|
|
716
|
+
table.each do |k, v|
|
|
717
|
+
if k == "_dms_tier"
|
|
718
|
+
unless v.is_a?(Integer) && !v.is_a?(TrueClass) && !v.is_a?(FalseClass)
|
|
719
|
+
raise DecodeError.new(opener_line, 1, "_dms_tier must be a non-negative integer")
|
|
720
|
+
end
|
|
721
|
+
raise DecodeError.new(opener_line, 1, "_dms_tier must be non-negative") if v < 0
|
|
722
|
+
meta_hash[k] = v
|
|
723
|
+
elsif k == "_dms_imports"
|
|
724
|
+
meta_hash[k] = v
|
|
725
|
+
elsif k.start_with?("_")
|
|
726
|
+
raise DecodeError.new(opener_line, 1, "unknown reserved key: #{k}")
|
|
727
|
+
else
|
|
728
|
+
meta_hash[k] = v
|
|
729
|
+
end
|
|
730
|
+
end
|
|
731
|
+
|
|
732
|
+
[table, meta_hash]
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
# Parse the document body, collecting decorator calls into @decorators.
|
|
736
|
+
# Returns [body_value, decorator_entries]
|
|
737
|
+
def parse_t1_body
|
|
738
|
+
@dec_entries = {} # path_key => DecoratorEntry (path as array)
|
|
739
|
+
@pending_leading = [] # Array of [sigil, fn_name, ns, params] for next value
|
|
740
|
+
|
|
741
|
+
skip_trivia
|
|
742
|
+
if eof?
|
|
743
|
+
return [{}, []]
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
b = @src.getbyte(@pos)
|
|
747
|
+
|
|
748
|
+
# Check for leading decorators (sigil at line start)
|
|
749
|
+
if sigil_at?(@pos)
|
|
750
|
+
# Collect leading decorators
|
|
751
|
+
collect_leading_decorators([])
|
|
752
|
+
skip_trivia
|
|
753
|
+
end
|
|
754
|
+
|
|
755
|
+
if eof?
|
|
756
|
+
return [{}, []]
|
|
757
|
+
end
|
|
758
|
+
|
|
759
|
+
b = @src.getbyte(@pos)
|
|
760
|
+
if b == PLUS && peek_after_plus?
|
|
761
|
+
# List root
|
|
762
|
+
body = parse_t1_list_block(0, [])
|
|
763
|
+
else
|
|
764
|
+
# Table root
|
|
765
|
+
body = parse_t1_table_block(0, [])
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
[body, @dec_entries.values]
|
|
769
|
+
end
|
|
770
|
+
|
|
771
|
+
# Collect leading decorator calls at the current position.
|
|
772
|
+
# path: the path for the NEXT value (to be determined after we see the key)
|
|
773
|
+
# But we don't know the next key yet — so we store them as pending.
|
|
774
|
+
def collect_leading_decorators(path_prefix)
|
|
775
|
+
while !eof? && sigil_at?(@pos)
|
|
776
|
+
sigil, fn_name, ns, params = parse_decorator_call
|
|
777
|
+
@pending_leading << { sigil: sigil, fn_name: fn_name, ns: ns, params: params }
|
|
778
|
+
skip_trivia_no_consume_leading
|
|
779
|
+
break if eof?
|
|
780
|
+
# Check if next line also starts with a sigil
|
|
781
|
+
break unless sigil_at?(@pos)
|
|
782
|
+
end
|
|
783
|
+
end
|
|
784
|
+
|
|
785
|
+
def skip_trivia_no_consume_leading
|
|
786
|
+
# Skip whitespace and comments but stop at sigil lines
|
|
787
|
+
loop do
|
|
788
|
+
skip_inline_ws
|
|
789
|
+
b = @src.getbyte(@pos)
|
|
790
|
+
if b == LF
|
|
791
|
+
@pos += 1; advance_line
|
|
792
|
+
elsif b == CR && @src.getbyte(@pos + 1) == LF
|
|
793
|
+
@pos += 2; advance_line
|
|
794
|
+
elsif b == HASH
|
|
795
|
+
skip_line_comment
|
|
796
|
+
elsif b == 0x2F && @src.getbyte(@pos + 1) == 0x2F
|
|
797
|
+
skip_line_comment
|
|
798
|
+
elsif b == 0x2F && @src.getbyte(@pos + 1) == 0x2A
|
|
799
|
+
skip_block_comment
|
|
800
|
+
else
|
|
801
|
+
break
|
|
802
|
+
end
|
|
803
|
+
end
|
|
804
|
+
end
|
|
805
|
+
|
|
806
|
+
def parse_t1_table_block(indent, path_prefix)
|
|
807
|
+
t = {}
|
|
808
|
+
loop do
|
|
809
|
+
skip_trivia
|
|
810
|
+
break if eof?
|
|
811
|
+
|
|
812
|
+
# Measure indent
|
|
813
|
+
li = measure_line_indent
|
|
814
|
+
break if li < indent
|
|
815
|
+
|
|
816
|
+
if li != indent
|
|
817
|
+
raise DecodeError.new(@line, col, "inconsistent indent: expected #{indent} spaces, got #{li}")
|
|
818
|
+
end
|
|
819
|
+
@pos = @line_start + indent
|
|
820
|
+
|
|
821
|
+
b = @src.getbyte(@pos)
|
|
822
|
+
|
|
823
|
+
# Leading decorators (sigil at line start)
|
|
824
|
+
if sigil_at?(@pos)
|
|
825
|
+
# Collect leading decorators for next value
|
|
826
|
+
loop do
|
|
827
|
+
sigil, fn_name, ns, params = parse_decorator_call
|
|
828
|
+
@pending_leading << { sigil: sigil, fn_name: fn_name, ns: ns, params: params }
|
|
829
|
+
# After decorator call, consume rest of line
|
|
830
|
+
skip_inline_ws
|
|
831
|
+
if consume_eol || eof?
|
|
832
|
+
# next line
|
|
833
|
+
end
|
|
834
|
+
skip_trivia
|
|
835
|
+
break if eof?
|
|
836
|
+
li2 = measure_line_indent
|
|
837
|
+
break if li2 < indent
|
|
838
|
+
if li2 != indent
|
|
839
|
+
raise DecodeError.new(@line, col, "inconsistent indent")
|
|
840
|
+
end
|
|
841
|
+
@pos = @line_start + indent
|
|
842
|
+
break unless sigil_at?(@pos)
|
|
843
|
+
end
|
|
844
|
+
next
|
|
845
|
+
end
|
|
846
|
+
|
|
847
|
+
break if b.nil?
|
|
848
|
+
break if b == PLUS && peek_after_plus?
|
|
849
|
+
|
|
850
|
+
# Must be a key-value pair
|
|
851
|
+
key = parse_key_t1
|
|
852
|
+
raise DecodeError.new(@line, col, "expected ':' after key") if @src.getbyte(@pos) != COLON
|
|
853
|
+
@pos += 1 # consume ':'
|
|
854
|
+
|
|
855
|
+
current_path = path_prefix + [{ "key" => key }]
|
|
856
|
+
path_key = path_to_key(current_path)
|
|
857
|
+
|
|
858
|
+
# Flush pending leading decorators onto this key's path
|
|
859
|
+
unless @pending_leading.empty?
|
|
860
|
+
pending = @pending_leading.dup
|
|
861
|
+
@pending_leading.clear
|
|
862
|
+
pending.each do |dec|
|
|
863
|
+
family, canonical_fn = resolve_call(dec[:sigil], dec[:fn_name], dec[:ns])
|
|
864
|
+
add_decorator_call(current_path, dec[:sigil], family, canonical_fn, dec[:ns], :leading, dec[:params])
|
|
865
|
+
end
|
|
866
|
+
end
|
|
867
|
+
|
|
868
|
+
b = @src.getbyte(@pos)
|
|
869
|
+
if b == SP || b == TAB
|
|
870
|
+
@pos += 1
|
|
871
|
+
skip_inline_ws
|
|
872
|
+
nb = @src.getbyte(@pos)
|
|
873
|
+
if nb.nil? || nb == LF || nb == CR
|
|
874
|
+
# Block value follows
|
|
875
|
+
consume_eol
|
|
876
|
+
skip_trivia
|
|
877
|
+
raise DecodeError.new(@line, col, "expected indented child block") if eof?
|
|
878
|
+
child_indent = measure_line_indent
|
|
879
|
+
raise DecodeError.new(@line, col, "expected indented child block") if child_indent <= indent
|
|
880
|
+
v = parse_t1_block_value(child_indent, current_path)
|
|
881
|
+
t[key] = v
|
|
882
|
+
elsif sigil_at?(@pos)
|
|
883
|
+
# Inner decorator(s)
|
|
884
|
+
v, has_value = parse_t1_inner_and_value(current_path)
|
|
885
|
+
t[key] = v
|
|
886
|
+
# Consume rest of line (trailing decorators already consumed in parse_t1_inner_and_value)
|
|
887
|
+
skip_inline_ws
|
|
888
|
+
consume_eol
|
|
889
|
+
else
|
|
890
|
+
# Inline value, possibly followed by trailing decorators
|
|
891
|
+
v = parse_t1_inline_value(current_path)
|
|
892
|
+
skip_inline_ws
|
|
893
|
+
# Check for trailing decorators
|
|
894
|
+
if sigil_at?(@pos)
|
|
895
|
+
parse_t1_trailing_decorators(current_path)
|
|
896
|
+
end
|
|
897
|
+
skip_inline_ws
|
|
898
|
+
consume_eol
|
|
899
|
+
t[key] = v
|
|
900
|
+
end
|
|
901
|
+
elsif b.nil? || b == LF || b == CR
|
|
902
|
+
consume_eol
|
|
903
|
+
skip_trivia
|
|
904
|
+
raise DecodeError.new(@line, col, "expected indented child block") if eof?
|
|
905
|
+
child_indent = measure_line_indent
|
|
906
|
+
raise DecodeError.new(@line, col, "expected indented child block") if child_indent <= indent
|
|
907
|
+
v = parse_t1_block_value(child_indent, current_path)
|
|
908
|
+
t[key] = v
|
|
909
|
+
else
|
|
910
|
+
raise DecodeError.new(@line, col, "expected whitespace after ':'")
|
|
911
|
+
end
|
|
912
|
+
|
|
913
|
+
raise DecodeError.new(@line, col, "duplicate key: #{key}") if t.key?(key) && t[key] != v
|
|
914
|
+
t[key] = v
|
|
915
|
+
end
|
|
916
|
+
t
|
|
917
|
+
end
|
|
918
|
+
|
|
919
|
+
def parse_t1_list_block(indent, path_prefix)
|
|
920
|
+
items = []
|
|
921
|
+
loop do
|
|
922
|
+
skip_trivia
|
|
923
|
+
break if eof?
|
|
924
|
+
li = measure_line_indent
|
|
925
|
+
break if li < indent
|
|
926
|
+
if li != indent
|
|
927
|
+
raise DecodeError.new(@line, col, "inconsistent indent: expected #{indent} spaces, got #{li}")
|
|
928
|
+
end
|
|
929
|
+
@pos = @line_start + indent
|
|
930
|
+
break unless @src.getbyte(@pos) == PLUS && peek_after_plus?
|
|
931
|
+
|
|
932
|
+
idx = items.length
|
|
933
|
+
current_path = path_prefix + [{ "index" => idx }]
|
|
934
|
+
@pos += 1 # consume '+'
|
|
935
|
+
|
|
936
|
+
b = @src.getbyte(@pos)
|
|
937
|
+
if b == SP || b == TAB
|
|
938
|
+
@pos += 1
|
|
939
|
+
skip_inline_ws
|
|
940
|
+
nb = @src.getbyte(@pos)
|
|
941
|
+
if nb.nil? || nb == LF || nb == CR
|
|
942
|
+
consume_eol
|
|
943
|
+
skip_trivia
|
|
944
|
+
raise DecodeError.new(@line, col, "expected indented block after empty '+' marker") if eof?
|
|
945
|
+
inner_indent = measure_line_indent
|
|
946
|
+
raise DecodeError.new(@line, col, "expected indented block") if inner_indent <= indent
|
|
947
|
+
v = parse_t1_block_value(inner_indent, current_path)
|
|
948
|
+
elsif sigil_at?(@pos)
|
|
949
|
+
v, _ = parse_t1_inner_and_value(current_path)
|
|
950
|
+
skip_inline_ws; consume_eol
|
|
951
|
+
else
|
|
952
|
+
v = parse_t1_list_item_value(indent, current_path)
|
|
953
|
+
end
|
|
954
|
+
elsif b.nil? || b == LF || b == CR
|
|
955
|
+
consume_eol
|
|
956
|
+
skip_trivia
|
|
957
|
+
raise DecodeError.new(@line, col, "expected indented block after empty '+' marker") if eof?
|
|
958
|
+
inner_indent = measure_line_indent
|
|
959
|
+
raise DecodeError.new(@line, col, "expected indented block") if inner_indent <= indent
|
|
960
|
+
v = parse_t1_block_value(inner_indent, current_path)
|
|
961
|
+
else
|
|
962
|
+
raise DecodeError.new(@line, col, "expected space after '+'")
|
|
963
|
+
end
|
|
964
|
+
|
|
965
|
+
items << v
|
|
966
|
+
end
|
|
967
|
+
items
|
|
968
|
+
end
|
|
969
|
+
|
|
970
|
+
def parse_t1_block_value(indent, path_prefix)
|
|
971
|
+
@pos = @line_start + indent
|
|
972
|
+
if @src.getbyte(@pos) == PLUS && peek_after_plus?
|
|
973
|
+
return parse_t1_list_block(indent, path_prefix)
|
|
974
|
+
end
|
|
975
|
+
parse_t1_table_block(indent, path_prefix)
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
def parse_t1_list_item_value(list_indent, path_prefix)
|
|
979
|
+
# Check if it's a kv pair
|
|
980
|
+
if line_starts_kvpair_t1?
|
|
981
|
+
key = parse_key_t1
|
|
982
|
+
raise DecodeError.new(@line, col, "expected ':' after key") if @src.getbyte(@pos) != COLON
|
|
983
|
+
@pos += 1
|
|
984
|
+
current_path = path_prefix + [{ "key" => key }]
|
|
985
|
+
b = @src.getbyte(@pos)
|
|
986
|
+
if b == SP || b == TAB
|
|
987
|
+
@pos += 1; skip_inline_ws
|
|
988
|
+
nb = @src.getbyte(@pos)
|
|
989
|
+
if nb.nil? || nb == LF || nb == CR
|
|
990
|
+
consume_eol; skip_trivia
|
|
991
|
+
raise DecodeError.new(@line, col, "expected indented child block") if eof?
|
|
992
|
+
child_indent = measure_line_indent
|
|
993
|
+
raise DecodeError.new(@line, col, "expected indented child block") if child_indent <= list_indent
|
|
994
|
+
v = parse_t1_block_value(child_indent, current_path)
|
|
995
|
+
elsif sigil_at?(@pos)
|
|
996
|
+
v, _ = parse_t1_inner_and_value(current_path)
|
|
997
|
+
skip_inline_ws; consume_eol
|
|
998
|
+
else
|
|
999
|
+
v = parse_t1_inline_value(current_path)
|
|
1000
|
+
skip_inline_ws
|
|
1001
|
+
parse_t1_trailing_decorators(current_path) if sigil_at?(@pos)
|
|
1002
|
+
skip_inline_ws; consume_eol
|
|
1003
|
+
end
|
|
1004
|
+
elsif b.nil? || b == LF || b == CR
|
|
1005
|
+
consume_eol; skip_trivia
|
|
1006
|
+
child_indent = measure_line_indent
|
|
1007
|
+
raise DecodeError.new(@line, col, "expected indented child block") if child_indent <= list_indent
|
|
1008
|
+
v = parse_t1_block_value(child_indent, current_path)
|
|
1009
|
+
else
|
|
1010
|
+
raise DecodeError.new(@line, col, "expected whitespace after ':'")
|
|
1011
|
+
end
|
|
1012
|
+
t = { key => v }
|
|
1013
|
+
# Continue reading sibling kv pairs at same indent
|
|
1014
|
+
key_col = current_path.last["key"] ? (list_indent + 2) : list_indent
|
|
1015
|
+
# Simple single-item table for list item
|
|
1016
|
+
return t
|
|
1017
|
+
end
|
|
1018
|
+
# Scalar or flow value
|
|
1019
|
+
v = parse_t1_inline_value(path_prefix)
|
|
1020
|
+
skip_inline_ws
|
|
1021
|
+
parse_t1_trailing_decorators(path_prefix) if sigil_at?(@pos)
|
|
1022
|
+
skip_inline_ws; consume_eol
|
|
1023
|
+
v
|
|
1024
|
+
end
|
|
1025
|
+
|
|
1026
|
+
# Parse inner decorator(s) followed optionally by a value.
|
|
1027
|
+
# Returns [value, has_explicit_value]
|
|
1028
|
+
# Position: already past `key: ` whitespace, sitting on sigil.
|
|
1029
|
+
def parse_t1_inner_and_value(path)
|
|
1030
|
+
# Collect all consecutive inner decorator calls
|
|
1031
|
+
while !eof? && sigil_at?(@pos)
|
|
1032
|
+
sigil, fn_name, ns, params = parse_decorator_call
|
|
1033
|
+
family, canonical_fn = resolve_call(sigil, fn_name, ns)
|
|
1034
|
+
add_decorator_call(path, sigil, family, canonical_fn, ns, :inner, params)
|
|
1035
|
+
skip_inline_ws
|
|
1036
|
+
end
|
|
1037
|
+
|
|
1038
|
+
# Now check if there's a value after the decorator(s)
|
|
1039
|
+
b = @src.getbyte(@pos)
|
|
1040
|
+
if b.nil? || b == LF || b == CR
|
|
1041
|
+
# Decoration-only: no value — use empty table as placeholder
|
|
1042
|
+
# Check for both_forms_present: if this path has block children, error
|
|
1043
|
+
[parse_decoration_only_placeholder(path), false]
|
|
1044
|
+
else
|
|
1045
|
+
v = parse_t1_inline_value(path)
|
|
1046
|
+
# Check for trailing decorators
|
|
1047
|
+
skip_inline_ws
|
|
1048
|
+
parse_t1_trailing_decorators(path) if sigil_at?(@pos)
|
|
1049
|
+
[v, true]
|
|
1050
|
+
end
|
|
1051
|
+
end
|
|
1052
|
+
|
|
1053
|
+
# Parse decoration-only placeholder.
|
|
1054
|
+
# Returns {} (empty table) — the family's empty_default.
|
|
1055
|
+
# Also checks for "both forms present" error (block child after decoration-only).
|
|
1056
|
+
def parse_decoration_only_placeholder(path)
|
|
1057
|
+
{}
|
|
1058
|
+
end
|
|
1059
|
+
|
|
1060
|
+
def parse_t1_trailing_decorators(path)
|
|
1061
|
+
while !eof? && sigil_at?(@pos)
|
|
1062
|
+
sigil, fn_name, ns, params = parse_decorator_call
|
|
1063
|
+
family, canonical_fn = resolve_call(sigil, fn_name, ns)
|
|
1064
|
+
add_decorator_call(path, sigil, family, canonical_fn, ns, :trailing, params)
|
|
1065
|
+
skip_inline_ws
|
|
1066
|
+
end
|
|
1067
|
+
end
|
|
1068
|
+
|
|
1069
|
+
# Parse a decorator call: sigil-run name[.tail]? [(params)]*
|
|
1070
|
+
# Returns [sigil, fn_name, ns, params]
|
|
1071
|
+
# ns is the part before '.' if present and it's a namespace (not after the first name)
|
|
1072
|
+
def parse_decorator_call
|
|
1073
|
+
# Lex the sigil: consecutive sigil atoms (ASCII reserved chars OR emoji clusters)
|
|
1074
|
+
sigil_start = @pos
|
|
1075
|
+
while !eof?
|
|
1076
|
+
atom_len = Tier1.lex_sigil_atom_at(@src, @pos)
|
|
1077
|
+
break if atom_len.nil?
|
|
1078
|
+
@pos += atom_len
|
|
1079
|
+
end
|
|
1080
|
+
sigil = @src.byteslice(sigil_start, @pos - sigil_start).force_encoding("UTF-8")
|
|
1081
|
+
|
|
1082
|
+
raise DecodeError.new(@line, col, "empty decorator sigil") if sigil.empty?
|
|
1083
|
+
|
|
1084
|
+
# Parse name (bare identifier)
|
|
1085
|
+
# Could be: name OR ns.name
|
|
1086
|
+
name1 = parse_bare_ident
|
|
1087
|
+
raise DecodeError.new(@line, col, "expected decorator name after sigil '#{sigil}'") if name1.empty?
|
|
1088
|
+
|
|
1089
|
+
# Check for '.' => namespace qualifier
|
|
1090
|
+
ns = nil
|
|
1091
|
+
fn_name = name1
|
|
1092
|
+
if @src.getbyte(@pos) == DOT
|
|
1093
|
+
@pos += 1 # consume '.'
|
|
1094
|
+
name2 = parse_bare_ident
|
|
1095
|
+
if name2.empty?
|
|
1096
|
+
raise DecodeError.new(@line, col, "expected name after '.' in decorator call")
|
|
1097
|
+
end
|
|
1098
|
+
ns = name1
|
|
1099
|
+
fn_name = name2
|
|
1100
|
+
end
|
|
1101
|
+
|
|
1102
|
+
# Parse zero or more param groups: '(' ... ')'
|
|
1103
|
+
params = []
|
|
1104
|
+
skip_inline_ws
|
|
1105
|
+
while @src.getbyte(@pos) == LPAREN
|
|
1106
|
+
@pos += 1 # consume '('
|
|
1107
|
+
skip_flow_ws_t1
|
|
1108
|
+
b = @src.getbyte(@pos)
|
|
1109
|
+
if b == RPAREN
|
|
1110
|
+
@pos += 1 # empty parens -> one named group with empty map
|
|
1111
|
+
params << ParamGroup.new(:named, {})
|
|
1112
|
+
else
|
|
1113
|
+
pg = parse_param_group
|
|
1114
|
+
params << pg
|
|
1115
|
+
skip_flow_ws_t1
|
|
1116
|
+
raise DecodeError.new(@line, col, "expected ')' after params") if @src.getbyte(@pos) != RPAREN
|
|
1117
|
+
@pos += 1
|
|
1118
|
+
end
|
|
1119
|
+
skip_inline_ws
|
|
1120
|
+
end
|
|
1121
|
+
|
|
1122
|
+
# If no params at all, treat as one empty named group
|
|
1123
|
+
params << ParamGroup.new(:named, {}) if params.empty?
|
|
1124
|
+
|
|
1125
|
+
[sigil, fn_name, ns, params]
|
|
1126
|
+
end
|
|
1127
|
+
|
|
1128
|
+
# Parse a param group (between parens, after '(' was consumed).
|
|
1129
|
+
# Returns a ParamGroup.
|
|
1130
|
+
def parse_param_group
|
|
1131
|
+
# Peek: if next non-ws is a bare key followed by ':', it's named;
|
|
1132
|
+
# otherwise positional.
|
|
1133
|
+
save_pos = @pos
|
|
1134
|
+
skip_flow_ws_t1
|
|
1135
|
+
named = looks_like_named_params?
|
|
1136
|
+
|
|
1137
|
+
if named
|
|
1138
|
+
map = {}
|
|
1139
|
+
loop do
|
|
1140
|
+
skip_flow_ws_t1
|
|
1141
|
+
b = @src.getbyte(@pos)
|
|
1142
|
+
break if b == RPAREN || b.nil?
|
|
1143
|
+
k = parse_bare_ident
|
|
1144
|
+
raise DecodeError.new(@line, col, "expected key in named params") if k.empty?
|
|
1145
|
+
skip_flow_ws_t1
|
|
1146
|
+
raise DecodeError.new(@line, col, "expected ':' after param key") if @src.getbyte(@pos) != COLON
|
|
1147
|
+
@pos += 1
|
|
1148
|
+
skip_flow_ws_t1
|
|
1149
|
+
v = parse_t1_flow_value([])
|
|
1150
|
+
map[k] = v
|
|
1151
|
+
skip_flow_ws_t1
|
|
1152
|
+
b = @src.getbyte(@pos)
|
|
1153
|
+
if b == COMMA
|
|
1154
|
+
@pos += 1
|
|
1155
|
+
elsif b == RPAREN || b.nil?
|
|
1156
|
+
break
|
|
1157
|
+
else
|
|
1158
|
+
raise DecodeError.new(@line, col, "expected ',' or ')' in named params, got '#{b.nil? ? "EOF" : b.chr}'")
|
|
1159
|
+
end
|
|
1160
|
+
end
|
|
1161
|
+
ParamGroup.new(:named, map)
|
|
1162
|
+
else
|
|
1163
|
+
items = []
|
|
1164
|
+
loop do
|
|
1165
|
+
skip_flow_ws_t1
|
|
1166
|
+
b = @src.getbyte(@pos)
|
|
1167
|
+
break if b == RPAREN || b.nil?
|
|
1168
|
+
v = parse_t1_flow_value([])
|
|
1169
|
+
items << v
|
|
1170
|
+
skip_flow_ws_t1
|
|
1171
|
+
b = @src.getbyte(@pos)
|
|
1172
|
+
if b == COMMA
|
|
1173
|
+
@pos += 1
|
|
1174
|
+
elsif b == RPAREN || b.nil?
|
|
1175
|
+
break
|
|
1176
|
+
else
|
|
1177
|
+
raise DecodeError.new(@line, col, "expected ',' or ')' in positional params")
|
|
1178
|
+
end
|
|
1179
|
+
end
|
|
1180
|
+
ParamGroup.new(:positional, items)
|
|
1181
|
+
end
|
|
1182
|
+
end
|
|
1183
|
+
|
|
1184
|
+
def looks_like_named_params?
|
|
1185
|
+
# Peek ahead: bare ident chars followed by optional ws then ':'
|
|
1186
|
+
p = @pos
|
|
1187
|
+
n = @len
|
|
1188
|
+
s = @src
|
|
1189
|
+
while p < n
|
|
1190
|
+
b = s.getbyte(p)
|
|
1191
|
+
break unless b && (BARE_KEY_BYTE[b] || b >= 128)
|
|
1192
|
+
p += 1
|
|
1193
|
+
end
|
|
1194
|
+
return false if p == @pos # no ident chars
|
|
1195
|
+
# Skip ws
|
|
1196
|
+
while p < n
|
|
1197
|
+
b = s.getbyte(p)
|
|
1198
|
+
break unless b == SP || b == TAB
|
|
1199
|
+
p += 1
|
|
1200
|
+
end
|
|
1201
|
+
return false if p >= n
|
|
1202
|
+
s.getbyte(p) == COLON
|
|
1203
|
+
end
|
|
1204
|
+
|
|
1205
|
+
# Parse a flow value (used in params and flow arrays/tables).
|
|
1206
|
+
def parse_t1_flow_value(path)
|
|
1207
|
+
b = @src.getbyte(@pos)
|
|
1208
|
+
if b == LBRACK
|
|
1209
|
+
return parse_t1_flow_array(path)
|
|
1210
|
+
elsif b == LBRACE
|
|
1211
|
+
return parse_t1_flow_table(path)
|
|
1212
|
+
end
|
|
1213
|
+
parse_t1_inline_value_raw
|
|
1214
|
+
end
|
|
1215
|
+
|
|
1216
|
+
# Parse an inline value (delegating to existing parser machinery).
|
|
1217
|
+
def parse_t1_inline_value(path)
|
|
1218
|
+
b = @src.getbyte(@pos)
|
|
1219
|
+
if b == LBRACK
|
|
1220
|
+
return parse_t1_flow_array(path)
|
|
1221
|
+
elsif b == LBRACE
|
|
1222
|
+
return parse_t1_flow_table(path)
|
|
1223
|
+
end
|
|
1224
|
+
parse_t1_inline_value_raw
|
|
1225
|
+
end
|
|
1226
|
+
|
|
1227
|
+
# Parse a flow-array with decorator awareness (for tier-1 inner decorators in []).
|
|
1228
|
+
def parse_t1_flow_array(path)
|
|
1229
|
+
@pos += 1 # consume '['
|
|
1230
|
+
items = []
|
|
1231
|
+
loop do
|
|
1232
|
+
skip_flow_ws_t1
|
|
1233
|
+
b = @src.getbyte(@pos)
|
|
1234
|
+
if b == RBRACK
|
|
1235
|
+
@pos += 1
|
|
1236
|
+
return items
|
|
1237
|
+
end
|
|
1238
|
+
|
|
1239
|
+
idx = items.length
|
|
1240
|
+
current_path = path + [{ "index" => idx }]
|
|
1241
|
+
|
|
1242
|
+
# Check for inner decorator before value
|
|
1243
|
+
if sigil_at?(@pos)
|
|
1244
|
+
# Inner decorator in flow array
|
|
1245
|
+
loop do
|
|
1246
|
+
break unless sigil_at?(@pos)
|
|
1247
|
+
sigil, fn_name, ns, params = parse_decorator_call
|
|
1248
|
+
family, canonical_fn = resolve_call(sigil, fn_name, ns)
|
|
1249
|
+
add_decorator_call(current_path, sigil, family, canonical_fn, ns, :inner, params)
|
|
1250
|
+
skip_inline_ws
|
|
1251
|
+
end
|
|
1252
|
+
# Now parse the actual value
|
|
1253
|
+
b = @src.getbyte(@pos)
|
|
1254
|
+
if b == COMMA || b == RBRACK || b.nil?
|
|
1255
|
+
# Decoration-only in flow array — empty table placeholder
|
|
1256
|
+
items << {}
|
|
1257
|
+
else
|
|
1258
|
+
v = parse_t1_flow_value(current_path)
|
|
1259
|
+
items << v
|
|
1260
|
+
end
|
|
1261
|
+
else
|
|
1262
|
+
v = parse_t1_flow_value(current_path)
|
|
1263
|
+
items << v
|
|
1264
|
+
end
|
|
1265
|
+
|
|
1266
|
+
skip_flow_ws_t1
|
|
1267
|
+
b = @src.getbyte(@pos)
|
|
1268
|
+
if b == COMMA
|
|
1269
|
+
@pos += 1
|
|
1270
|
+
elsif b == RBRACK
|
|
1271
|
+
@pos += 1
|
|
1272
|
+
return items
|
|
1273
|
+
elsif b.nil?
|
|
1274
|
+
raise DecodeError.new(@line, col, "unterminated flow array")
|
|
1275
|
+
else
|
|
1276
|
+
raise DecodeError.new(@line, col, "unexpected '#{b.chr}' in flow array")
|
|
1277
|
+
end
|
|
1278
|
+
end
|
|
1279
|
+
end
|
|
1280
|
+
|
|
1281
|
+
def parse_t1_flow_table(path)
|
|
1282
|
+
@pos += 1 # consume '{'
|
|
1283
|
+
t = {}
|
|
1284
|
+
loop do
|
|
1285
|
+
skip_flow_ws_t1
|
|
1286
|
+
b = @src.getbyte(@pos)
|
|
1287
|
+
if b == RBRACE
|
|
1288
|
+
@pos += 1
|
|
1289
|
+
return t
|
|
1290
|
+
end
|
|
1291
|
+
k = parse_key_t1
|
|
1292
|
+
skip_flow_ws_t1
|
|
1293
|
+
raise DecodeError.new(@line, col, "expected ':' after flow-table key") unless @src.getbyte(@pos) == COLON
|
|
1294
|
+
@pos += 1
|
|
1295
|
+
skip_flow_ws_t1
|
|
1296
|
+
kp = path + [{ "key" => k }]
|
|
1297
|
+
v = parse_t1_flow_value(kp)
|
|
1298
|
+
t[k] = v
|
|
1299
|
+
skip_flow_ws_t1
|
|
1300
|
+
b = @src.getbyte(@pos)
|
|
1301
|
+
if b == COMMA
|
|
1302
|
+
@pos += 1
|
|
1303
|
+
elsif b == RBRACE
|
|
1304
|
+
@pos += 1
|
|
1305
|
+
return t
|
|
1306
|
+
elsif b.nil?
|
|
1307
|
+
raise DecodeError.new(@line, col, "unterminated flow table")
|
|
1308
|
+
else
|
|
1309
|
+
raise DecodeError.new(@line, col, "unexpected '#{b.chr}' in flow table")
|
|
1310
|
+
end
|
|
1311
|
+
end
|
|
1312
|
+
end
|
|
1313
|
+
|
|
1314
|
+
# ── Low-level helpers ────────────────────────────────────────────────
|
|
1315
|
+
|
|
1316
|
+
def parse_t1_inline_value_raw
|
|
1317
|
+
# Delegate to existing parser machinery by creating a tiny sub-parser
|
|
1318
|
+
# positioned at @pos and having it parse one value.
|
|
1319
|
+
# This is the simplest approach — create a sub-parser with the same source
|
|
1320
|
+
# but start at @pos, then copy back.
|
|
1321
|
+
sub = InlineValueParser.new(@src, @pos, @line, @line_start, self)
|
|
1322
|
+
val = sub.parse_one_value
|
|
1323
|
+
@pos = sub.pos
|
|
1324
|
+
@line = sub.line
|
|
1325
|
+
@line_start = sub.line_start
|
|
1326
|
+
val
|
|
1327
|
+
end
|
|
1328
|
+
|
|
1329
|
+
def add_decorator_call(path, sigil, family, fn_name, ns, position, params)
|
|
1330
|
+
path_key = path_to_key(path)
|
|
1331
|
+
entry = @dec_entries[path_key]
|
|
1332
|
+
if entry.nil?
|
|
1333
|
+
entry = DecoratorEntry.new(path.dup, {}, [])
|
|
1334
|
+
@dec_entries[path_key] = entry
|
|
1335
|
+
end
|
|
1336
|
+
call = DecoratorCall.new(sigil, family, fn_name, ns, position, params, [])
|
|
1337
|
+
entry.calls[sigil] ||= []
|
|
1338
|
+
entry.calls[sigil] << call
|
|
1339
|
+
end
|
|
1340
|
+
|
|
1341
|
+
def resolve_call(sigil, fn_name, ns)
|
|
1342
|
+
Tier1.resolve_family(sigil, fn_name, ns, @imports)
|
|
1343
|
+
end
|
|
1344
|
+
|
|
1345
|
+
def path_to_key(path)
|
|
1346
|
+
path.map { |seg| seg.key?("key") ? "k:#{seg["key"]}" : "i:#{seg["index"]}" }.join("/")
|
|
1347
|
+
end
|
|
1348
|
+
|
|
1349
|
+
def skip_trivia
|
|
1350
|
+
loop do
|
|
1351
|
+
skip_inline_ws
|
|
1352
|
+
b = @src.getbyte(@pos)
|
|
1353
|
+
if b == LF
|
|
1354
|
+
@pos += 1; advance_line
|
|
1355
|
+
elsif b == CR && @src.getbyte(@pos + 1) == LF
|
|
1356
|
+
@pos += 2; advance_line
|
|
1357
|
+
elsif b == HASH
|
|
1358
|
+
skip_line_comment
|
|
1359
|
+
elsif b == 0x2F && @src.getbyte(@pos + 1) == 0x2F
|
|
1360
|
+
skip_line_comment
|
|
1361
|
+
elsif b == 0x2F && @src.getbyte(@pos + 1) == 0x2A
|
|
1362
|
+
skip_block_comment
|
|
1363
|
+
else
|
|
1364
|
+
break
|
|
1365
|
+
end
|
|
1366
|
+
end
|
|
1367
|
+
end
|
|
1368
|
+
|
|
1369
|
+
def skip_inline_ws
|
|
1370
|
+
@pos += 1 while @src.getbyte(@pos) == SP || @src.getbyte(@pos) == TAB
|
|
1371
|
+
end
|
|
1372
|
+
|
|
1373
|
+
def skip_line_comment
|
|
1374
|
+
while @pos < @len
|
|
1375
|
+
b = @src.getbyte(@pos)
|
|
1376
|
+
@pos += 1
|
|
1377
|
+
if b == LF
|
|
1378
|
+
advance_line; break
|
|
1379
|
+
elsif b == CR
|
|
1380
|
+
@pos += 1 if @src.getbyte(@pos) == LF
|
|
1381
|
+
advance_line; break
|
|
1382
|
+
end
|
|
1383
|
+
end
|
|
1384
|
+
end
|
|
1385
|
+
|
|
1386
|
+
def skip_block_comment
|
|
1387
|
+
@pos += 2 # consume '/*'
|
|
1388
|
+
loop do
|
|
1389
|
+
raise DecodeError.new(@line, col, "unterminated block comment") if eof?
|
|
1390
|
+
b = @src.getbyte(@pos)
|
|
1391
|
+
if b == 0x2A && @src.getbyte(@pos + 1) == 0x2F
|
|
1392
|
+
@pos += 2; break
|
|
1393
|
+
elsif b == LF
|
|
1394
|
+
@pos += 1; advance_line
|
|
1395
|
+
elsif b == CR
|
|
1396
|
+
@pos += 1
|
|
1397
|
+
if @src.getbyte(@pos) == LF
|
|
1398
|
+
@pos += 1
|
|
1399
|
+
end
|
|
1400
|
+
advance_line
|
|
1401
|
+
else
|
|
1402
|
+
@pos += 1
|
|
1403
|
+
end
|
|
1404
|
+
end
|
|
1405
|
+
end
|
|
1406
|
+
|
|
1407
|
+
def skip_flow_ws_t1
|
|
1408
|
+
loop do
|
|
1409
|
+
b = @src.getbyte(@pos)
|
|
1410
|
+
if b == SP || b == TAB
|
|
1411
|
+
@pos += 1
|
|
1412
|
+
elsif b == LF
|
|
1413
|
+
@pos += 1; advance_line
|
|
1414
|
+
elsif b == CR && @src.getbyte(@pos + 1) == LF
|
|
1415
|
+
@pos += 2; advance_line
|
|
1416
|
+
else
|
|
1417
|
+
break
|
|
1418
|
+
end
|
|
1419
|
+
end
|
|
1420
|
+
end
|
|
1421
|
+
|
|
1422
|
+
def consume_eol
|
|
1423
|
+
b = @src.getbyte(@pos)
|
|
1424
|
+
if b == LF
|
|
1425
|
+
@pos += 1; advance_line; true
|
|
1426
|
+
elsif b == CR
|
|
1427
|
+
@pos += 1
|
|
1428
|
+
@pos += 1 if @src.getbyte(@pos) == LF
|
|
1429
|
+
advance_line; true
|
|
1430
|
+
else
|
|
1431
|
+
false
|
|
1432
|
+
end
|
|
1433
|
+
end
|
|
1434
|
+
|
|
1435
|
+
def advance_line
|
|
1436
|
+
@line += 1
|
|
1437
|
+
@line_start = @pos
|
|
1438
|
+
end
|
|
1439
|
+
|
|
1440
|
+
def eof?
|
|
1441
|
+
@pos >= @len
|
|
1442
|
+
end
|
|
1443
|
+
|
|
1444
|
+
def col
|
|
1445
|
+
bytes = @pos - @line_start
|
|
1446
|
+
bytes + 1
|
|
1447
|
+
end
|
|
1448
|
+
|
|
1449
|
+
def starts_bytes?(s)
|
|
1450
|
+
@src.byteslice(@pos, s.bytesize) == s
|
|
1451
|
+
end
|
|
1452
|
+
|
|
1453
|
+
def measure_line_indent
|
|
1454
|
+
n = 0
|
|
1455
|
+
i = @line_start
|
|
1456
|
+
while i < @len && @src.getbyte(i) == SP
|
|
1457
|
+
n += 1; i += 1
|
|
1458
|
+
end
|
|
1459
|
+
n
|
|
1460
|
+
end
|
|
1461
|
+
|
|
1462
|
+
def peek_after_plus?
|
|
1463
|
+
b = @src.getbyte(@pos + 1)
|
|
1464
|
+
b.nil? || b == SP || b == TAB || b == LF || b == CR
|
|
1465
|
+
end
|
|
1466
|
+
|
|
1467
|
+
def parse_key_t1
|
|
1468
|
+
b = @src.getbyte(@pos)
|
|
1469
|
+
if b == DQUOTE
|
|
1470
|
+
parse_dquote_key
|
|
1471
|
+
elsif b == SQUOTE
|
|
1472
|
+
parse_squote_key
|
|
1473
|
+
else
|
|
1474
|
+
parse_bare_ident_key
|
|
1475
|
+
end
|
|
1476
|
+
end
|
|
1477
|
+
|
|
1478
|
+
def parse_bare_ident_key
|
|
1479
|
+
s = @src; n = @len; pos = @pos; start = pos
|
|
1480
|
+
while pos < n
|
|
1481
|
+
b = s.getbyte(pos)
|
|
1482
|
+
break unless b && (b < 128 ? BARE_KEY_BYTE[b] : true)
|
|
1483
|
+
pos += 1
|
|
1484
|
+
end
|
|
1485
|
+
raise DecodeError.new(@line, col, "expected key") if pos == start
|
|
1486
|
+
@pos = pos
|
|
1487
|
+
s.byteslice(start, pos - start).force_encoding("UTF-8")
|
|
1488
|
+
end
|
|
1489
|
+
|
|
1490
|
+
def parse_bare_ident
|
|
1491
|
+
s = @src; n = @len; pos = @pos; start = pos
|
|
1492
|
+
while pos < n
|
|
1493
|
+
b = s.getbyte(pos)
|
|
1494
|
+
break unless b && b < 128 && BARE_KEY_BYTE[b]
|
|
1495
|
+
pos += 1
|
|
1496
|
+
end
|
|
1497
|
+
@pos = pos
|
|
1498
|
+
s.byteslice(start, pos - start).force_encoding("UTF-8")
|
|
1499
|
+
end
|
|
1500
|
+
|
|
1501
|
+
def parse_dquote_key
|
|
1502
|
+
@pos += 1 # consume '"'
|
|
1503
|
+
buf = +""
|
|
1504
|
+
loop do
|
|
1505
|
+
b = @src.getbyte(@pos)
|
|
1506
|
+
raise DecodeError.new(@line, col, "unterminated quoted key") if b.nil? || b == LF
|
|
1507
|
+
if b == 0x5C # backslash
|
|
1508
|
+
@pos += 1
|
|
1509
|
+
buf << parse_escape
|
|
1510
|
+
elsif b == DQUOTE
|
|
1511
|
+
@pos += 1
|
|
1512
|
+
return buf
|
|
1513
|
+
else
|
|
1514
|
+
buf << @src.byteslice(@pos, 1).force_encoding("UTF-8")
|
|
1515
|
+
@pos += 1
|
|
1516
|
+
end
|
|
1517
|
+
end
|
|
1518
|
+
end
|
|
1519
|
+
|
|
1520
|
+
def parse_squote_key
|
|
1521
|
+
@pos += 1 # consume "'"
|
|
1522
|
+
start = @pos
|
|
1523
|
+
loop do
|
|
1524
|
+
b = @src.getbyte(@pos)
|
|
1525
|
+
raise DecodeError.new(@line, col, "unterminated literal key") if b.nil? || b == LF
|
|
1526
|
+
if b == SQUOTE
|
|
1527
|
+
result = @src.byteslice(start, @pos - start).force_encoding("UTF-8")
|
|
1528
|
+
@pos += 1
|
|
1529
|
+
return result
|
|
1530
|
+
end
|
|
1531
|
+
@pos += 1
|
|
1532
|
+
end
|
|
1533
|
+
end
|
|
1534
|
+
|
|
1535
|
+
def parse_escape
|
|
1536
|
+
b = @src.getbyte(@pos)
|
|
1537
|
+
@pos += 1
|
|
1538
|
+
case b
|
|
1539
|
+
when 0x62 then "\b"
|
|
1540
|
+
when 0x66 then "\f"
|
|
1541
|
+
when 0x6E then "\n"
|
|
1542
|
+
when 0x72 then "\r"
|
|
1543
|
+
when 0x74 then "\t"
|
|
1544
|
+
when DQUOTE then "\""
|
|
1545
|
+
when 0x5C then "\\"
|
|
1546
|
+
else "\\#{b.chr}"
|
|
1547
|
+
end
|
|
1548
|
+
end
|
|
1549
|
+
|
|
1550
|
+
def line_starts_kvpair_t1?
|
|
1551
|
+
p = @pos
|
|
1552
|
+
s = @src
|
|
1553
|
+
n = @len
|
|
1554
|
+
first = s.getbyte(p)
|
|
1555
|
+
if first == DQUOTE
|
|
1556
|
+
p += 1
|
|
1557
|
+
while p < n
|
|
1558
|
+
b = s.getbyte(p)
|
|
1559
|
+
if b == 0x5C; p += 2
|
|
1560
|
+
elsif b == DQUOTE; p += 1; break
|
|
1561
|
+
elsif b == LF || b == CR; return false
|
|
1562
|
+
else; p += 1
|
|
1563
|
+
end
|
|
1564
|
+
end
|
|
1565
|
+
elsif first == SQUOTE
|
|
1566
|
+
p += 1
|
|
1567
|
+
while p < n
|
|
1568
|
+
b = s.getbyte(p)
|
|
1569
|
+
if b == SQUOTE; p += 1; break
|
|
1570
|
+
elsif b == LF || b == CR; return false
|
|
1571
|
+
else; p += 1
|
|
1572
|
+
end
|
|
1573
|
+
end
|
|
1574
|
+
else
|
|
1575
|
+
any = false
|
|
1576
|
+
while p < n
|
|
1577
|
+
b = s.getbyte(p)
|
|
1578
|
+
break unless b && b < 128 && BARE_KEY_BYTE[b]
|
|
1579
|
+
p += 1; any = true
|
|
1580
|
+
end
|
|
1581
|
+
return false unless any
|
|
1582
|
+
end
|
|
1583
|
+
return false if p >= n || s.getbyte(p) != COLON
|
|
1584
|
+
nxt = s.getbyte(p + 1)
|
|
1585
|
+
nxt.nil? || nxt == SP || nxt == TAB || nxt == LF || nxt == CR
|
|
1586
|
+
end
|
|
1587
|
+
end
|
|
1588
|
+
|
|
1589
|
+
# ── InlineValueParser ────────────────────────────────────────────────────
|
|
1590
|
+
#
|
|
1591
|
+
# A thin wrapper that delegates value parsing to a real Dms::Parser
|
|
1592
|
+
# instance, positioned at a specific offset in the source.
|
|
1593
|
+
# We extract the source substring from the current position to end-of-line
|
|
1594
|
+
# (plus the rest of the document for block values), parse one value,
|
|
1595
|
+
# and map back the consumed bytes.
|
|
1596
|
+
|
|
1597
|
+
class InlineValueParser
|
|
1598
|
+
attr_reader :pos, :line, :line_start
|
|
1599
|
+
|
|
1600
|
+
def initialize(src, start_pos, start_line, start_line_start, outer)
|
|
1601
|
+
@src = src
|
|
1602
|
+
@pos = start_pos
|
|
1603
|
+
@line = start_line
|
|
1604
|
+
@line_start = start_line_start
|
|
1605
|
+
@outer = outer
|
|
1606
|
+
@len = src.bytesize
|
|
1607
|
+
end
|
|
1608
|
+
|
|
1609
|
+
def parse_one_value
|
|
1610
|
+
# Build a sub-source from @pos onwards, but we need an offset trick.
|
|
1611
|
+
# We'll use Dms::Parser directly, padding the prefix with spaces so
|
|
1612
|
+
# line numbers are approximately correct. Since error messages in
|
|
1613
|
+
# parameter parsing use the outer parser's position, this is fine.
|
|
1614
|
+
sub_src = @src.byteslice(@pos, @len - @pos).force_encoding("UTF-8")
|
|
1615
|
+
|
|
1616
|
+
# Use a real Parser in lite mode to parse one value.
|
|
1617
|
+
# We only need one token; the parser will stop at the right place.
|
|
1618
|
+
p = SingleValueParser.new(sub_src)
|
|
1619
|
+
val = p.parse_value_at_start
|
|
1620
|
+
consumed = p.consumed
|
|
1621
|
+
# Update position
|
|
1622
|
+
new_src_pos = @pos + consumed
|
|
1623
|
+
# Update line counts based on consumed newlines
|
|
1624
|
+
consumed_src = @src.byteslice(@pos, consumed)
|
|
1625
|
+
consumed_src.each_byte do |b|
|
|
1626
|
+
if b == 0x0A
|
|
1627
|
+
@line += 1
|
|
1628
|
+
@line_start = @pos + (@src.byteslice(@pos, consumed).index("\n".force_encoding("UTF-8"), 0) || 0) + 1
|
|
1629
|
+
end
|
|
1630
|
+
end
|
|
1631
|
+
# Recount properly
|
|
1632
|
+
@line = 1 + @src.byteslice(0, new_src_pos).count("\n")
|
|
1633
|
+
last_nl = @src.byteslice(0, new_src_pos).rindex("\n")
|
|
1634
|
+
@line_start = last_nl ? last_nl + 1 : 0
|
|
1635
|
+
@pos = new_src_pos
|
|
1636
|
+
val
|
|
1637
|
+
end
|
|
1638
|
+
end
|
|
1639
|
+
|
|
1640
|
+
# SingleValueParser: uses Dms::Parser to parse one value from a string.
|
|
1641
|
+
class SingleValueParser
|
|
1642
|
+
attr_reader :consumed
|
|
1643
|
+
|
|
1644
|
+
def initialize(src)
|
|
1645
|
+
@src = src
|
|
1646
|
+
@parser = nil
|
|
1647
|
+
@consumed = 0
|
|
1648
|
+
end
|
|
1649
|
+
|
|
1650
|
+
def parse_value_at_start
|
|
1651
|
+
# Wrap in a minimal DMS document and parse, then extract the value.
|
|
1652
|
+
# Since we need to parse a raw value (not a key-value pair),
|
|
1653
|
+
# we wrap it as `_v: <value>` and parse the front matter.
|
|
1654
|
+
# Actually simpler: parse as a scalar root document.
|
|
1655
|
+
|
|
1656
|
+
# Build: the value might be followed by sigil chars, newlines, etc.
|
|
1657
|
+
# We'll use a fresh Parser at lite mode.
|
|
1658
|
+
p = Dms::Parser.new(@src, lite: true)
|
|
1659
|
+
val = p.parse_inline_value_or_heredoc
|
|
1660
|
+
@consumed = p.instance_variable_get(:@pos)
|
|
1661
|
+
val
|
|
1662
|
+
end
|
|
1663
|
+
end
|
|
1664
|
+
|
|
1665
|
+
# ── JSON emission ────────────────────────────────────────────────────────
|
|
1666
|
+
|
|
1667
|
+
def self.emit_t1_json(doc_t1, tag_fn)
|
|
1668
|
+
imports_json = doc_t1.imports.map { |imp| import_to_json(imp) }
|
|
1669
|
+
body_tagged = tag_fn.call(doc_t1.t0.body)
|
|
1670
|
+
decorators_json = doc_t1.decorators.map { |entry| entry_to_json(entry, tag_fn) }
|
|
1671
|
+
|
|
1672
|
+
{
|
|
1673
|
+
"tier" => doc_t1.observed_tier,
|
|
1674
|
+
"imports" => imports_json,
|
|
1675
|
+
"body" => body_tagged,
|
|
1676
|
+
"decorators" => decorators_json
|
|
1677
|
+
}
|
|
1678
|
+
end
|
|
1679
|
+
|
|
1680
|
+
def self.import_to_json(imp)
|
|
1681
|
+
bind_json = {}
|
|
1682
|
+
imp.bind.each { |sigil, fams| bind_json[sigil] = fams }
|
|
1683
|
+
|
|
1684
|
+
allow_json = {}
|
|
1685
|
+
imp.allow.each { |family, names| allow_json[family] = names }
|
|
1686
|
+
|
|
1687
|
+
deny_json = {}
|
|
1688
|
+
imp.deny.each { |family, names| deny_json[family] = names }
|
|
1689
|
+
|
|
1690
|
+
alias_json = {}
|
|
1691
|
+
imp.alias_map.each do |family, inner|
|
|
1692
|
+
alias_json[family] = inner
|
|
1693
|
+
end
|
|
1694
|
+
|
|
1695
|
+
{
|
|
1696
|
+
"dialect" => imp.dialect,
|
|
1697
|
+
"version" => imp.version,
|
|
1698
|
+
"ns" => imp.ns,
|
|
1699
|
+
"bind" => bind_json,
|
|
1700
|
+
"allow" => allow_json,
|
|
1701
|
+
"deny" => deny_json,
|
|
1702
|
+
"alias" => alias_json
|
|
1703
|
+
}
|
|
1704
|
+
end
|
|
1705
|
+
|
|
1706
|
+
def self.entry_to_json(entry, tag_fn)
|
|
1707
|
+
path_json = entry.path.map do |seg|
|
|
1708
|
+
if seg.key?("key")
|
|
1709
|
+
{ "key" => seg["key"] }
|
|
1710
|
+
else
|
|
1711
|
+
{ "index" => seg["index"] }
|
|
1712
|
+
end
|
|
1713
|
+
end
|
|
1714
|
+
|
|
1715
|
+
calls_json = {}
|
|
1716
|
+
entry.calls.each do |sigil, calls|
|
|
1717
|
+
calls_json[sigil] = calls.map { |c| call_to_json(c, tag_fn) }
|
|
1718
|
+
end
|
|
1719
|
+
|
|
1720
|
+
{
|
|
1721
|
+
"path" => path_json,
|
|
1722
|
+
"calls" => calls_json,
|
|
1723
|
+
"comments" => []
|
|
1724
|
+
}
|
|
1725
|
+
end
|
|
1726
|
+
|
|
1727
|
+
def self.call_to_json(call, tag_fn)
|
|
1728
|
+
params_json = call.params.map { |pg| param_group_to_json(pg, tag_fn) }
|
|
1729
|
+
{
|
|
1730
|
+
"family" => call.family,
|
|
1731
|
+
"fn" => call.fn_name,
|
|
1732
|
+
"ns" => call.ns,
|
|
1733
|
+
"position" => call.position.to_s,
|
|
1734
|
+
"params" => params_json,
|
|
1735
|
+
"params_dec" => []
|
|
1736
|
+
}
|
|
1737
|
+
end
|
|
1738
|
+
|
|
1739
|
+
def self.param_group_to_json(pg, tag_fn)
|
|
1740
|
+
case pg.kind
|
|
1741
|
+
when :named
|
|
1742
|
+
tagged_val = {}
|
|
1743
|
+
pg.value.each { |k, v| tagged_val[k] = tag_fn.call(v) }
|
|
1744
|
+
{ "kind" => "named", "value" => tagged_val }
|
|
1745
|
+
when :positional
|
|
1746
|
+
{ "kind" => "positional", "value" => pg.value.map { |v| tag_fn.call(v) } }
|
|
1747
|
+
end
|
|
1748
|
+
end
|
|
1749
|
+
end
|
|
1750
|
+
end
|