odin-foundation 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/odin/diff/differ.rb +115 -0
- data/lib/odin/diff/patcher.rb +64 -0
- data/lib/odin/export.rb +330 -0
- data/lib/odin/parsing/parser.rb +1193 -0
- data/lib/odin/parsing/token.rb +26 -0
- data/lib/odin/parsing/token_type.rb +40 -0
- data/lib/odin/parsing/tokenizer.rb +825 -0
- data/lib/odin/parsing/value_parser.rb +322 -0
- data/lib/odin/resolver/import_resolver.rb +137 -0
- data/lib/odin/serialization/canonicalize.rb +112 -0
- data/lib/odin/serialization/stringify.rb +582 -0
- data/lib/odin/transform/format_exporters.rb +819 -0
- data/lib/odin/transform/source_parsers.rb +385 -0
- data/lib/odin/transform/transform_engine.rb +2837 -0
- data/lib/odin/transform/transform_parser.rb +979 -0
- data/lib/odin/transform/transform_types.rb +278 -0
- data/lib/odin/transform/verb_context.rb +87 -0
- data/lib/odin/transform/verbs/aggregation_verbs.rb +106 -0
- data/lib/odin/transform/verbs/collection_verbs.rb +640 -0
- data/lib/odin/transform/verbs/datetime_verbs.rb +602 -0
- data/lib/odin/transform/verbs/financial_verbs.rb +356 -0
- data/lib/odin/transform/verbs/geo_verbs.rb +125 -0
- data/lib/odin/transform/verbs/numeric_verbs.rb +434 -0
- data/lib/odin/transform/verbs/object_verbs.rb +123 -0
- data/lib/odin/types/array_item.rb +42 -0
- data/lib/odin/types/diff.rb +89 -0
- data/lib/odin/types/directive.rb +28 -0
- data/lib/odin/types/document.rb +92 -0
- data/lib/odin/types/document_builder.rb +67 -0
- data/lib/odin/types/dyn_value.rb +270 -0
- data/lib/odin/types/errors.rb +149 -0
- data/lib/odin/types/modifiers.rb +45 -0
- data/lib/odin/types/ordered_map.rb +79 -0
- data/lib/odin/types/schema.rb +262 -0
- data/lib/odin/types/value_type.rb +28 -0
- data/lib/odin/types/values.rb +618 -0
- data/lib/odin/types.rb +12 -0
- data/lib/odin/utils/format_utils.rb +186 -0
- data/lib/odin/utils/path_utils.rb +25 -0
- data/lib/odin/utils/security_limits.rb +17 -0
- data/lib/odin/validation/format_validators.rb +238 -0
- data/lib/odin/validation/redos_protection.rb +102 -0
- data/lib/odin/validation/schema_parser.rb +813 -0
- data/lib/odin/validation/schema_serializer.rb +262 -0
- data/lib/odin/validation/validator.rb +1061 -0
- data/lib/odin/version.rb +5 -0
- data/lib/odin.rb +90 -0
- metadata +160 -0
|
@@ -0,0 +1,1193 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Odin
|
|
4
|
+
module Parsing
|
|
5
|
+
class OdinParser
|
|
6
|
+
MAX_NESTING_DEPTH = Utils::SecurityLimits::MAX_DEPTH
|
|
7
|
+
MAX_ARRAY_INDEX = Utils::SecurityLimits::MAX_ARRAY_INDEX
|
|
8
|
+
|
|
9
|
+
def parse(text, options = nil)
|
|
10
|
+
text = text.encode("UTF-8") if text.is_a?(String) && text.encoding != Encoding::UTF_8
|
|
11
|
+
tokens = Tokenizer.new(text).tokenize
|
|
12
|
+
build_document(tokens, text, options)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def build_document(tokens, source, options)
|
|
18
|
+
@tokens = tokens
|
|
19
|
+
@source = source
|
|
20
|
+
@pos = 0
|
|
21
|
+
|
|
22
|
+
# State
|
|
23
|
+
@context = ""
|
|
24
|
+
@previous_context = ""
|
|
25
|
+
@metadata_mode = false
|
|
26
|
+
@assigned_paths = {}
|
|
27
|
+
@array_indices = {}
|
|
28
|
+
|
|
29
|
+
# Tabular state
|
|
30
|
+
@tabular_mode = false
|
|
31
|
+
@tabular_primitive = false
|
|
32
|
+
@tabular_columns = []
|
|
33
|
+
@tabular_array_path = ""
|
|
34
|
+
@tabular_row_index = 0
|
|
35
|
+
|
|
36
|
+
# Document chaining
|
|
37
|
+
@documents = []
|
|
38
|
+
@current_builder = Types::OdinDocumentBuilder.new
|
|
39
|
+
@current_metadata = {}
|
|
40
|
+
@current_modifiers = {}
|
|
41
|
+
@current_comments = {}
|
|
42
|
+
|
|
43
|
+
# Directives
|
|
44
|
+
@directives = []
|
|
45
|
+
|
|
46
|
+
while @pos < @tokens.length
|
|
47
|
+
token = @tokens[@pos]
|
|
48
|
+
|
|
49
|
+
case token.type
|
|
50
|
+
when TokenType::EOF
|
|
51
|
+
break
|
|
52
|
+
when TokenType::NEWLINE
|
|
53
|
+
@pos += 1
|
|
54
|
+
# Blank line after {$} metadata exits metadata mode (Java parity)
|
|
55
|
+
if @metadata_mode && @context.empty?
|
|
56
|
+
nt = @tokens[@pos]
|
|
57
|
+
if nt && nt.type == TokenType::NEWLINE
|
|
58
|
+
@metadata_mode = false
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
next
|
|
62
|
+
when TokenType::COMMENT
|
|
63
|
+
@pos += 1
|
|
64
|
+
next
|
|
65
|
+
when TokenType::HEADER_OPEN
|
|
66
|
+
exit_tabular_mode!
|
|
67
|
+
parse_header
|
|
68
|
+
next
|
|
69
|
+
when TokenType::REFERENCE
|
|
70
|
+
# Check for @import, @schema, @if directives
|
|
71
|
+
if %w[import schema if].include?(token.value)
|
|
72
|
+
parse_at_directive_from_ref(token)
|
|
73
|
+
next
|
|
74
|
+
end
|
|
75
|
+
# Check for invalid @directive
|
|
76
|
+
if token.value.empty? || !token.value.match?(/\A[a-zA-Z]/)
|
|
77
|
+
# Bare @ or @unknown at line start — check if followed by =
|
|
78
|
+
nt = peek_token
|
|
79
|
+
if nt&.type == TokenType::EQUALS
|
|
80
|
+
raise Errors::ParseError.new(
|
|
81
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
82
|
+
token.line, token.column,
|
|
83
|
+
"@ cannot be used as a path on the left side of assignment"
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
# Unknown @directive
|
|
87
|
+
raise Errors::ParseError.new(
|
|
88
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
89
|
+
token.line, token.column,
|
|
90
|
+
"Invalid directive: @#{token.value}"
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
# Otherwise fall through to tabular/assignment handling
|
|
94
|
+
if @tabular_mode
|
|
95
|
+
parse_tabular_row
|
|
96
|
+
next
|
|
97
|
+
end
|
|
98
|
+
@pos += 1
|
|
99
|
+
next
|
|
100
|
+
when TokenType::PATH
|
|
101
|
+
if token.value.start_with?("---")
|
|
102
|
+
handle_doc_separator
|
|
103
|
+
next
|
|
104
|
+
end
|
|
105
|
+
# Check for @directive at document level
|
|
106
|
+
if token.value.start_with?("@")
|
|
107
|
+
parse_at_directive(token)
|
|
108
|
+
next
|
|
109
|
+
end
|
|
110
|
+
exit_tabular_mode!
|
|
111
|
+
parse_assignment
|
|
112
|
+
next
|
|
113
|
+
when TokenType::PIPE
|
|
114
|
+
# Pipe-based tabular (not used in current golden tests, but handle)
|
|
115
|
+
skip_to_newline
|
|
116
|
+
next
|
|
117
|
+
when TokenType::ERROR
|
|
118
|
+
handle_error_token(token)
|
|
119
|
+
next
|
|
120
|
+
else
|
|
121
|
+
# In tabular mode, data rows start with a value token
|
|
122
|
+
if @tabular_mode
|
|
123
|
+
parse_tabular_row
|
|
124
|
+
next
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Check for --- separator as standalone token
|
|
128
|
+
if token.type == TokenType::MODIFIER && token.value == "-"
|
|
129
|
+
if peek_is_doc_separator?
|
|
130
|
+
handle_doc_separator
|
|
131
|
+
next
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
@pos += 1
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
validate_array_contiguity!
|
|
140
|
+
finalize_documents
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def current_token
|
|
144
|
+
@tokens[@pos]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def peek_token(offset = 1)
|
|
148
|
+
p = @pos + offset
|
|
149
|
+
p < @tokens.length ? @tokens[p] : nil
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def advance
|
|
153
|
+
t = @tokens[@pos]
|
|
154
|
+
@pos += 1
|
|
155
|
+
t
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def expect(type)
|
|
159
|
+
t = current_token
|
|
160
|
+
if t.nil? || t.type != type
|
|
161
|
+
line = t&.line || 0
|
|
162
|
+
col = t&.column || 0
|
|
163
|
+
raise Errors::ParseError.new(
|
|
164
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
165
|
+
line, col,
|
|
166
|
+
"Expected #{type}, got #{t&.type}"
|
|
167
|
+
)
|
|
168
|
+
end
|
|
169
|
+
advance
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def skip_newlines
|
|
173
|
+
while @pos < @tokens.length && @tokens[@pos].type == TokenType::NEWLINE
|
|
174
|
+
@pos += 1
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def skip_to_newline
|
|
179
|
+
while @pos < @tokens.length
|
|
180
|
+
break if @tokens[@pos].type == TokenType::NEWLINE || @tokens[@pos].type == TokenType::EOF
|
|
181
|
+
@pos += 1
|
|
182
|
+
end
|
|
183
|
+
@pos += 1 if @pos < @tokens.length && @tokens[@pos].type == TokenType::NEWLINE
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# --- Header Parsing ---
|
|
187
|
+
|
|
188
|
+
def parse_header
|
|
189
|
+
open_token = advance # consume HEADER_OPEN
|
|
190
|
+
|
|
191
|
+
# Collect the path content between { and }
|
|
192
|
+
if current_token&.type == TokenType::HEADER_CLOSE
|
|
193
|
+
# Empty header {} - reset to root
|
|
194
|
+
advance
|
|
195
|
+
@context = ""
|
|
196
|
+
@previous_context = ""
|
|
197
|
+
@metadata_mode = false
|
|
198
|
+
return
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
if current_token&.type == TokenType::PATH
|
|
202
|
+
path_token = advance
|
|
203
|
+
raw_path = path_token.value.strip
|
|
204
|
+
|
|
205
|
+
# Expect HEADER_CLOSE
|
|
206
|
+
if current_token&.type == TokenType::HEADER_CLOSE
|
|
207
|
+
advance
|
|
208
|
+
else
|
|
209
|
+
raise Errors::ParseError.new(
|
|
210
|
+
Errors::ParseErrorCode::INVALID_HEADER_SYNTAX,
|
|
211
|
+
open_token.line, open_token.column,
|
|
212
|
+
"Missing closing brace"
|
|
213
|
+
)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
validate_header_path!(raw_path, path_token)
|
|
217
|
+
resolve_header_path(raw_path, path_token)
|
|
218
|
+
else
|
|
219
|
+
# Try to read whatever is there until HEADER_CLOSE
|
|
220
|
+
if current_token&.type == TokenType::HEADER_CLOSE
|
|
221
|
+
advance
|
|
222
|
+
@context = ""
|
|
223
|
+
@metadata_mode = false
|
|
224
|
+
else
|
|
225
|
+
raise Errors::ParseError.new(
|
|
226
|
+
Errors::ParseErrorCode::INVALID_HEADER_SYNTAX,
|
|
227
|
+
open_token.line, open_token.column,
|
|
228
|
+
"Invalid header"
|
|
229
|
+
)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def resolve_header_path(raw_path, token)
|
|
235
|
+
# Check for metadata header
|
|
236
|
+
if raw_path == "$"
|
|
237
|
+
@context = ""
|
|
238
|
+
@metadata_mode = true
|
|
239
|
+
return
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Check for metadata sub-path: $key or $.key
|
|
243
|
+
if raw_path.start_with?("$")
|
|
244
|
+
@metadata_mode = true
|
|
245
|
+
sub = raw_path[1..]
|
|
246
|
+
sub = sub[1..] if sub.start_with?(".")
|
|
247
|
+
@context = sub || ""
|
|
248
|
+
return
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
@metadata_mode = false
|
|
252
|
+
|
|
253
|
+
# Check for tabular: path[] : col1, col2
|
|
254
|
+
if raw_path =~ /\A(.+)\[\]\s*:\s*(.+)\z/
|
|
255
|
+
array_path = $1
|
|
256
|
+
columns_str = $2
|
|
257
|
+
setup_tabular(array_path, columns_str, token)
|
|
258
|
+
return
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Check for relative header
|
|
262
|
+
if raw_path.start_with?(".")
|
|
263
|
+
relative = raw_path[1..]
|
|
264
|
+
if @previous_context.empty?
|
|
265
|
+
@context = relative
|
|
266
|
+
else
|
|
267
|
+
@context = "#{@previous_context}.#{relative}"
|
|
268
|
+
end
|
|
269
|
+
else
|
|
270
|
+
@context = raw_path
|
|
271
|
+
@previous_context = raw_path
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Validate depth
|
|
275
|
+
validate_depth!(@context, token)
|
|
276
|
+
|
|
277
|
+
# Validate array indices in header path
|
|
278
|
+
validate_path_indices!(@context, token)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def setup_tabular(array_path, columns_str, token)
|
|
282
|
+
# Relative paths (starting with .) resolve relative to previous context
|
|
283
|
+
# Absolute paths are used as-is (same logic as resolve_header_path)
|
|
284
|
+
resolved_path = if array_path.start_with?(".")
|
|
285
|
+
if @previous_context.empty?
|
|
286
|
+
array_path[1..]
|
|
287
|
+
else
|
|
288
|
+
"#{@previous_context}#{array_path}"
|
|
289
|
+
end
|
|
290
|
+
else
|
|
291
|
+
array_path
|
|
292
|
+
end
|
|
293
|
+
# Update previous_context for non-relative paths (same as resolve_header_path)
|
|
294
|
+
@previous_context = resolved_path unless array_path.start_with?(".")
|
|
295
|
+
|
|
296
|
+
@tabular_mode = true
|
|
297
|
+
@tabular_array_path = resolved_path
|
|
298
|
+
@tabular_row_index = 0
|
|
299
|
+
|
|
300
|
+
columns_str = columns_str.strip
|
|
301
|
+
if columns_str == "~"
|
|
302
|
+
@tabular_primitive = true
|
|
303
|
+
@tabular_columns = []
|
|
304
|
+
else
|
|
305
|
+
@tabular_primitive = false
|
|
306
|
+
raw_cols = columns_str.split(",").map(&:strip)
|
|
307
|
+
@tabular_columns = resolve_tabular_columns(raw_cols)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
@context = resolved_path
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def resolve_tabular_columns(raw_cols)
|
|
314
|
+
resolved = []
|
|
315
|
+
last_context = ""
|
|
316
|
+
|
|
317
|
+
raw_cols.each do |col|
|
|
318
|
+
if col.start_with?(".")
|
|
319
|
+
# Relative column: use last context prefix
|
|
320
|
+
resolved << "#{last_context}#{col}"
|
|
321
|
+
else
|
|
322
|
+
resolved << col
|
|
323
|
+
# Update context to the prefix of this column (everything before the last segment)
|
|
324
|
+
if col.include?(".")
|
|
325
|
+
last_context = col.sub(/\.[^.]+\z/, "")
|
|
326
|
+
else
|
|
327
|
+
last_context = ""
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
resolved
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def exit_tabular_mode!
|
|
336
|
+
return unless @tabular_mode
|
|
337
|
+
@tabular_mode = false
|
|
338
|
+
@tabular_primitive = false
|
|
339
|
+
@tabular_columns = []
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# --- Assignment Parsing ---
|
|
343
|
+
|
|
344
|
+
def parse_assignment
|
|
345
|
+
path_token = advance # consume PATH
|
|
346
|
+
|
|
347
|
+
# Validate error tokens from tokenizer
|
|
348
|
+
check_for_error_before_equals!
|
|
349
|
+
|
|
350
|
+
# Expect EQUALS
|
|
351
|
+
eq = current_token
|
|
352
|
+
unless eq&.type == TokenType::EQUALS
|
|
353
|
+
raise Errors::ParseError.new(
|
|
354
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
355
|
+
path_token.line, path_token.column,
|
|
356
|
+
"Expected '=' after path"
|
|
357
|
+
)
|
|
358
|
+
end
|
|
359
|
+
advance # consume EQUALS
|
|
360
|
+
|
|
361
|
+
# Parse modifiers
|
|
362
|
+
mods = parse_modifiers
|
|
363
|
+
|
|
364
|
+
# Parse value
|
|
365
|
+
value = parse_value(path_token)
|
|
366
|
+
|
|
367
|
+
# Parse trailing directives
|
|
368
|
+
directives = parse_trailing_directives
|
|
369
|
+
|
|
370
|
+
# Parse trailing comment
|
|
371
|
+
comment = nil
|
|
372
|
+
if current_token&.type == TokenType::COMMENT
|
|
373
|
+
comment = current_token.value
|
|
374
|
+
advance
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# Apply modifiers to value
|
|
378
|
+
value = value.with_modifiers(mods) if mods.any?
|
|
379
|
+
|
|
380
|
+
# Apply directives to value
|
|
381
|
+
value = value.with_directives(directives) unless directives.empty?
|
|
382
|
+
|
|
383
|
+
# Resolve full path
|
|
384
|
+
raw_path = path_token.value
|
|
385
|
+
full_path = resolve_path(raw_path)
|
|
386
|
+
|
|
387
|
+
# Normalize leading zeros in array indices: [007] -> [7]
|
|
388
|
+
full_path = full_path.gsub(/\[(\d+)\]/) { |m| "[#{$1.to_i}]" }
|
|
389
|
+
|
|
390
|
+
# Validate depth
|
|
391
|
+
validate_depth!(full_path, path_token)
|
|
392
|
+
|
|
393
|
+
# Track array indices
|
|
394
|
+
track_array_index(full_path, path_token)
|
|
395
|
+
|
|
396
|
+
if @metadata_mode
|
|
397
|
+
# Check duplicate in metadata
|
|
398
|
+
if @current_metadata.key?(full_path)
|
|
399
|
+
raise Errors::ParseError.new(
|
|
400
|
+
Errors::ParseErrorCode::DUPLICATE_PATH_ASSIGNMENT,
|
|
401
|
+
path_token.line, path_token.column,
|
|
402
|
+
"Duplicate metadata key: #{full_path}"
|
|
403
|
+
)
|
|
404
|
+
end
|
|
405
|
+
@current_metadata[full_path] = value
|
|
406
|
+
else
|
|
407
|
+
# Check duplicate
|
|
408
|
+
if @assigned_paths.key?(full_path)
|
|
409
|
+
raise Errors::ParseError.new(
|
|
410
|
+
Errors::ParseErrorCode::DUPLICATE_PATH_ASSIGNMENT,
|
|
411
|
+
path_token.line, path_token.column,
|
|
412
|
+
"Duplicate path: #{full_path}"
|
|
413
|
+
)
|
|
414
|
+
end
|
|
415
|
+
@assigned_paths[full_path] = true
|
|
416
|
+
@current_builder.set(full_path, value, modifiers: mods.any? ? mods : nil, comment: comment)
|
|
417
|
+
@current_modifiers[full_path] = mods if mods.any?
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def check_for_error_before_equals!
|
|
422
|
+
if current_token&.type == TokenType::ERROR
|
|
423
|
+
err_token = current_token
|
|
424
|
+
val = err_token.value
|
|
425
|
+
|
|
426
|
+
if val == "@#"
|
|
427
|
+
raise Errors::ParseError.new(
|
|
428
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
429
|
+
err_token.line, err_token.column,
|
|
430
|
+
"@# is invalid"
|
|
431
|
+
)
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
raise Errors::ParseError.new(
|
|
435
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
436
|
+
err_token.line, err_token.column,
|
|
437
|
+
val
|
|
438
|
+
)
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
def resolve_path(raw_path)
|
|
443
|
+
if @context.empty?
|
|
444
|
+
raw_path
|
|
445
|
+
else
|
|
446
|
+
"#{@context}.#{raw_path}"
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def parse_modifiers
|
|
451
|
+
req = false
|
|
452
|
+
conf = false
|
|
453
|
+
depr = false
|
|
454
|
+
|
|
455
|
+
while current_token&.type == TokenType::MODIFIER
|
|
456
|
+
case current_token.value
|
|
457
|
+
when "!" then req = true
|
|
458
|
+
when "*" then conf = true
|
|
459
|
+
when "-" then depr = true
|
|
460
|
+
end
|
|
461
|
+
advance
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
if req || conf || depr
|
|
465
|
+
Types::OdinModifiers.new(required: req, confidential: conf, deprecated: depr)
|
|
466
|
+
else
|
|
467
|
+
Types::OdinModifiers::NONE
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def parse_value(context_token, allow_bare: false)
|
|
472
|
+
t = current_token
|
|
473
|
+
|
|
474
|
+
if t.nil? || t.type == TokenType::NEWLINE || t.type == TokenType::EOF
|
|
475
|
+
raise Errors::ParseError.new(
|
|
476
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
477
|
+
context_token.line, context_token.column,
|
|
478
|
+
"Expected value"
|
|
479
|
+
)
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
if t.type == TokenType::ERROR
|
|
483
|
+
handle_error_token(t)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Check for bare strings (unquoted) — raise P002 unless in verb arg context
|
|
487
|
+
if !allow_bare && t.type == TokenType::STRING && t.raw == "bare"
|
|
488
|
+
raise Errors::ParseError.new(
|
|
489
|
+
Errors::ParseErrorCode::BARE_STRING_NOT_ALLOWED,
|
|
490
|
+
t.line, t.column,
|
|
491
|
+
"Strings must be quoted"
|
|
492
|
+
)
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
advance
|
|
496
|
+
ValueParser.parse_value(t)
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def parse_trailing_directives
|
|
500
|
+
directives = []
|
|
501
|
+
while current_token&.type == TokenType::DIRECTIVE
|
|
502
|
+
dir_token = advance
|
|
503
|
+
dir_name = dir_token.value
|
|
504
|
+
|
|
505
|
+
# Check if next token is a directive value (string)
|
|
506
|
+
dir_value = nil
|
|
507
|
+
if current_token&.type == TokenType::STRING
|
|
508
|
+
dir_value = current_token.value
|
|
509
|
+
advance
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
directives << Types::OdinDirective.new(dir_name, dir_value)
|
|
513
|
+
end
|
|
514
|
+
directives
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
# --- Tabular Row Parsing ---
|
|
518
|
+
|
|
519
|
+
def parse_tabular_row
|
|
520
|
+
if @tabular_primitive
|
|
521
|
+
parse_tabular_primitive_row
|
|
522
|
+
else
|
|
523
|
+
parse_tabular_object_row
|
|
524
|
+
end
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def parse_tabular_primitive_row
|
|
528
|
+
# Single value per row
|
|
529
|
+
t = current_token
|
|
530
|
+
return skip_to_newline if t.nil? || t.type == TokenType::NEWLINE || t.type == TokenType::EOF
|
|
531
|
+
|
|
532
|
+
value = parse_tabular_cell_value
|
|
533
|
+
full_path = "#{@tabular_array_path}[#{@tabular_row_index}]"
|
|
534
|
+
|
|
535
|
+
track_array_index(full_path, t)
|
|
536
|
+
|
|
537
|
+
if @assigned_paths.key?(full_path)
|
|
538
|
+
raise Errors::ParseError.new(
|
|
539
|
+
Errors::ParseErrorCode::DUPLICATE_PATH_ASSIGNMENT,
|
|
540
|
+
t.line, t.column,
|
|
541
|
+
"Duplicate path: #{full_path}"
|
|
542
|
+
)
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
@assigned_paths[full_path] = true
|
|
546
|
+
@current_builder.set(full_path, value)
|
|
547
|
+
|
|
548
|
+
@tabular_row_index += 1
|
|
549
|
+
skip_to_newline
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
def parse_tabular_object_row
|
|
553
|
+
row_token = current_token
|
|
554
|
+
col_idx = 0
|
|
555
|
+
row_idx = @tabular_row_index
|
|
556
|
+
|
|
557
|
+
while col_idx < @tabular_columns.length
|
|
558
|
+
t = current_token
|
|
559
|
+
break if t.nil? || t.type == TokenType::NEWLINE || t.type == TokenType::EOF || t.type == TokenType::HEADER_OPEN
|
|
560
|
+
|
|
561
|
+
# Check for comma (separator between cells)
|
|
562
|
+
# An absent cell is indicated by consecutive commas or trailing comma
|
|
563
|
+
|
|
564
|
+
if is_value_token?(t)
|
|
565
|
+
value = parse_tabular_cell_value
|
|
566
|
+
col_name = @tabular_columns[col_idx]
|
|
567
|
+
full_path = "#{@tabular_array_path}[#{row_idx}].#{col_name}"
|
|
568
|
+
|
|
569
|
+
track_array_index(full_path, row_token)
|
|
570
|
+
|
|
571
|
+
@assigned_paths[full_path] = true
|
|
572
|
+
@current_builder.set(full_path, value)
|
|
573
|
+
end
|
|
574
|
+
# else: absent cell, skip
|
|
575
|
+
|
|
576
|
+
col_idx += 1
|
|
577
|
+
|
|
578
|
+
# Skip comma separator
|
|
579
|
+
# After a value or absent cell, look for comma
|
|
580
|
+
t = current_token
|
|
581
|
+
if t&.type == TokenType::PATH && t.value == ","
|
|
582
|
+
advance
|
|
583
|
+
elsif t&.type == TokenType::COMMENT
|
|
584
|
+
break
|
|
585
|
+
elsif t&.type == TokenType::NEWLINE || t&.type == TokenType::EOF
|
|
586
|
+
break
|
|
587
|
+
end
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
@tabular_row_index += 1
|
|
591
|
+
skip_to_newline
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
def is_value_token?(t)
|
|
595
|
+
case t.type
|
|
596
|
+
when TokenType::STRING, TokenType::NUMBER, TokenType::INTEGER,
|
|
597
|
+
TokenType::CURRENCY, TokenType::PERCENT, TokenType::BOOLEAN,
|
|
598
|
+
TokenType::NULL, TokenType::REFERENCE, TokenType::BINARY,
|
|
599
|
+
TokenType::DATE, TokenType::TIMESTAMP, TokenType::TIME,
|
|
600
|
+
TokenType::DURATION, TokenType::VERB, TokenType::MODIFIER
|
|
601
|
+
true
|
|
602
|
+
when TokenType::PATH
|
|
603
|
+
# Bare booleans in tabular context
|
|
604
|
+
t.value == "true" || t.value == "false"
|
|
605
|
+
else
|
|
606
|
+
false
|
|
607
|
+
end
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
def parse_tabular_cell_value
|
|
611
|
+
t = current_token
|
|
612
|
+
return Types::NULL if t.nil?
|
|
613
|
+
|
|
614
|
+
# Handle modifiers on cell values
|
|
615
|
+
mods = parse_modifiers
|
|
616
|
+
|
|
617
|
+
t = current_token
|
|
618
|
+
return Types::NULL if t.nil? || t.type == TokenType::NEWLINE
|
|
619
|
+
|
|
620
|
+
# Handle PATH tokens that are bare booleans (true/false) in tabular context
|
|
621
|
+
if t.type == TokenType::PATH && (t.value == "true" || t.value == "false")
|
|
622
|
+
advance
|
|
623
|
+
value = t.value == "true" ? Types::TRUE_VAL : Types::FALSE_VAL
|
|
624
|
+
value = value.with_modifiers(mods) if mods.any?
|
|
625
|
+
return value
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
advance
|
|
629
|
+
value = ValueParser.parse_value(t)
|
|
630
|
+
value = value.with_modifiers(mods) if mods.any?
|
|
631
|
+
value
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
# --- At-Directive Parsing (@import, @schema, @if) ---
|
|
635
|
+
|
|
636
|
+
def parse_at_directive(token)
|
|
637
|
+
directive_text = token.value
|
|
638
|
+
advance # consume the PATH token
|
|
639
|
+
|
|
640
|
+
case directive_text
|
|
641
|
+
when "@import"
|
|
642
|
+
parse_import_directive(token)
|
|
643
|
+
when "@schema"
|
|
644
|
+
parse_schema_directive(token)
|
|
645
|
+
when "@if"
|
|
646
|
+
parse_if_directive(token)
|
|
647
|
+
else
|
|
648
|
+
raise Errors::ParseError.new(
|
|
649
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
650
|
+
token.line, token.column,
|
|
651
|
+
"Invalid directive: #{directive_text}"
|
|
652
|
+
)
|
|
653
|
+
end
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
# Handle @import/@schema/@if when tokenized as REFERENCE tokens
|
|
657
|
+
def parse_at_directive_from_ref(token)
|
|
658
|
+
directive_name = token.value # "import", "schema", "if"
|
|
659
|
+
advance # consume the REFERENCE token
|
|
660
|
+
|
|
661
|
+
case directive_name
|
|
662
|
+
when "import"
|
|
663
|
+
parse_import_directive_from_tokens(token)
|
|
664
|
+
when "schema"
|
|
665
|
+
parse_schema_directive(token)
|
|
666
|
+
when "if"
|
|
667
|
+
parse_if_directive(token)
|
|
668
|
+
else
|
|
669
|
+
raise Errors::ParseError.new(
|
|
670
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
671
|
+
token.line, token.column,
|
|
672
|
+
"Invalid directive: @#{directive_name}"
|
|
673
|
+
)
|
|
674
|
+
end
|
|
675
|
+
end
|
|
676
|
+
|
|
677
|
+
def parse_import_directive_from_tokens(token)
|
|
678
|
+
# Collect all remaining tokens on this line as the import path
|
|
679
|
+
parts = []
|
|
680
|
+
alias_name = nil
|
|
681
|
+
|
|
682
|
+
while current_token && current_token.type != TokenType::NEWLINE &&
|
|
683
|
+
current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
|
|
684
|
+
t = advance
|
|
685
|
+
parts << t.value.to_s
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
# Skip trailing comment
|
|
689
|
+
advance if current_token&.type == TokenType::COMMENT
|
|
690
|
+
|
|
691
|
+
if parts.empty?
|
|
692
|
+
raise Errors::ParseError.new(
|
|
693
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
694
|
+
token.line, token.column,
|
|
695
|
+
"Import directive requires a path"
|
|
696
|
+
)
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
# Reconstruct the import path, handling "as" alias
|
|
700
|
+
# The path was split across multiple tokens. We need to rejoin them.
|
|
701
|
+
# Look for "as" keyword
|
|
702
|
+
full_text = parts.join("")
|
|
703
|
+
|
|
704
|
+
# Check for "as" in the token values
|
|
705
|
+
as_idx = nil
|
|
706
|
+
parts.each_with_index do |p, i|
|
|
707
|
+
if p == "as" && i > 0
|
|
708
|
+
as_idx = i
|
|
709
|
+
break
|
|
710
|
+
end
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
if as_idx
|
|
714
|
+
# Path is everything before "as", alias is everything after
|
|
715
|
+
import_path = parts[0...as_idx].join("")
|
|
716
|
+
remaining = parts[as_idx + 1..]
|
|
717
|
+
if remaining.empty? || remaining.join("").strip.empty?
|
|
718
|
+
raise Errors::ParseError.new(
|
|
719
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
720
|
+
token.line, token.column,
|
|
721
|
+
"Import alias requires identifier"
|
|
722
|
+
)
|
|
723
|
+
end
|
|
724
|
+
alias_name = remaining.join("").strip
|
|
725
|
+
import_path = import_path.strip
|
|
726
|
+
else
|
|
727
|
+
import_path = full_text.strip
|
|
728
|
+
end
|
|
729
|
+
|
|
730
|
+
# Handle path reconstruction: tokenizer splits "./other.odin" into multiple tokens
|
|
731
|
+
# We may need to add dots/slashes back
|
|
732
|
+
@directives << { type: "import", path: import_path, alias: alias_name }
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
def parse_import_directive(token)
|
|
736
|
+
# Expect: PATH (file path) [PATH("as") PATH(alias)]
|
|
737
|
+
# The tokenizer puts the rest of the line as subsequent tokens
|
|
738
|
+
# We need to collect the import path
|
|
739
|
+
path_parts = []
|
|
740
|
+
alias_name = nil
|
|
741
|
+
|
|
742
|
+
# Read tokens until newline/EOF/comment
|
|
743
|
+
while current_token && current_token.type != TokenType::NEWLINE &&
|
|
744
|
+
current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
|
|
745
|
+
t = advance
|
|
746
|
+
if t.type == TokenType::PATH || t.type == TokenType::STRING
|
|
747
|
+
path_parts << t.value
|
|
748
|
+
elsif t.type == TokenType::EQUALS
|
|
749
|
+
path_parts << "="
|
|
750
|
+
else
|
|
751
|
+
path_parts << t.value.to_s
|
|
752
|
+
end
|
|
753
|
+
end
|
|
754
|
+
|
|
755
|
+
# Skip trailing comment
|
|
756
|
+
advance if current_token&.type == TokenType::COMMENT
|
|
757
|
+
|
|
758
|
+
if path_parts.empty?
|
|
759
|
+
raise Errors::ParseError.new(
|
|
760
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
761
|
+
token.line, token.column,
|
|
762
|
+
"Import directive requires a path"
|
|
763
|
+
)
|
|
764
|
+
end
|
|
765
|
+
|
|
766
|
+
# Check for alias: "path as alias"
|
|
767
|
+
as_idx = path_parts.index("as")
|
|
768
|
+
if as_idx
|
|
769
|
+
import_path = path_parts[0...as_idx].join(" ")
|
|
770
|
+
if as_idx + 1 < path_parts.length
|
|
771
|
+
alias_name = path_parts[as_idx + 1]
|
|
772
|
+
else
|
|
773
|
+
raise Errors::ParseError.new(
|
|
774
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
775
|
+
token.line, token.column,
|
|
776
|
+
"Invalid import alias syntax"
|
|
777
|
+
)
|
|
778
|
+
end
|
|
779
|
+
else
|
|
780
|
+
import_path = path_parts.join(" ")
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
@directives << { type: "import", path: import_path, alias: alias_name }
|
|
784
|
+
end
|
|
785
|
+
|
|
786
|
+
def parse_schema_directive(token)
|
|
787
|
+
parts = []
|
|
788
|
+
while current_token && current_token.type != TokenType::NEWLINE &&
|
|
789
|
+
current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
|
|
790
|
+
parts << advance.value.to_s
|
|
791
|
+
end
|
|
792
|
+
advance if current_token&.type == TokenType::COMMENT
|
|
793
|
+
|
|
794
|
+
if parts.empty?
|
|
795
|
+
raise Errors::ParseError.new(
|
|
796
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
797
|
+
token.line, token.column,
|
|
798
|
+
"Schema directive requires a URL"
|
|
799
|
+
)
|
|
800
|
+
end
|
|
801
|
+
|
|
802
|
+
@directives << { type: "schema", url: parts.join("") }
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
def parse_if_directive(token)
|
|
806
|
+
parts = []
|
|
807
|
+
while current_token && current_token.type != TokenType::NEWLINE &&
|
|
808
|
+
current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
|
|
809
|
+
t = advance
|
|
810
|
+
parts << t.value.to_s
|
|
811
|
+
end
|
|
812
|
+
advance if current_token&.type == TokenType::COMMENT
|
|
813
|
+
|
|
814
|
+
if parts.empty?
|
|
815
|
+
raise Errors::ParseError.new(
|
|
816
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
817
|
+
token.line, token.column,
|
|
818
|
+
"If directive requires a condition"
|
|
819
|
+
)
|
|
820
|
+
end
|
|
821
|
+
|
|
822
|
+
# Reconstruct condition with proper spacing
|
|
823
|
+
condition = parts.join(" ")
|
|
824
|
+
@directives << { type: "if", condition: condition }
|
|
825
|
+
end
|
|
826
|
+
|
|
827
|
+
# --- Document Chaining ---
|
|
828
|
+
|
|
829
|
+
def handle_doc_separator
|
|
830
|
+
# Skip --- tokens
|
|
831
|
+
skip_to_newline
|
|
832
|
+
|
|
833
|
+
# Finalize current document
|
|
834
|
+
finalize_current_document
|
|
835
|
+
|
|
836
|
+
# Reset state for next document
|
|
837
|
+
@context = ""
|
|
838
|
+
@previous_context = ""
|
|
839
|
+
@metadata_mode = false
|
|
840
|
+
@assigned_paths = {}
|
|
841
|
+
@array_indices = {}
|
|
842
|
+
@current_builder = Types::OdinDocumentBuilder.new
|
|
843
|
+
@current_metadata = {}
|
|
844
|
+
@current_modifiers = {}
|
|
845
|
+
@current_comments = {}
|
|
846
|
+
@directives = []
|
|
847
|
+
end
|
|
848
|
+
|
|
849
|
+
def peek_is_doc_separator?
|
|
850
|
+
# Check if current position has --- pattern
|
|
851
|
+
# This is already handled by the tokenizer PATH token
|
|
852
|
+
false
|
|
853
|
+
end
|
|
854
|
+
|
|
855
|
+
def finalize_current_document
|
|
856
|
+
validate_array_contiguity!
|
|
857
|
+
|
|
858
|
+
doc_data = {
|
|
859
|
+
metadata: @current_metadata.dup,
|
|
860
|
+
assignments: @current_builder.instance_variable_get(:@assignments).dup,
|
|
861
|
+
modifiers: @current_modifiers.dup,
|
|
862
|
+
directives: @directives.dup
|
|
863
|
+
}
|
|
864
|
+
@documents << doc_data
|
|
865
|
+
end
|
|
866
|
+
|
|
867
|
+
def finalize_documents
|
|
868
|
+
if @documents.empty?
|
|
869
|
+
# Single document
|
|
870
|
+
build_single_document
|
|
871
|
+
else
|
|
872
|
+
# We have chained documents, finalize the last one
|
|
873
|
+
finalize_current_document
|
|
874
|
+
build_chained_result
|
|
875
|
+
end
|
|
876
|
+
end
|
|
877
|
+
|
|
878
|
+
def build_single_document
|
|
879
|
+
# Build OdinDocument from accumulated state
|
|
880
|
+
assignments = @current_builder.instance_variable_get(:@assignments)
|
|
881
|
+
comments = @current_builder.instance_variable_get(:@comments)
|
|
882
|
+
Types::OdinDocument.new(
|
|
883
|
+
assignments: assignments,
|
|
884
|
+
metadata: @current_metadata,
|
|
885
|
+
modifiers: @current_modifiers,
|
|
886
|
+
comments: comments
|
|
887
|
+
)
|
|
888
|
+
end
|
|
889
|
+
|
|
890
|
+
def build_chained_result
|
|
891
|
+
# For chained documents, return a special result
|
|
892
|
+
# The first document is the "primary" one
|
|
893
|
+
# Return it as an OdinDocument with chained_documents attribute
|
|
894
|
+
primary = @documents[0]
|
|
895
|
+
|
|
896
|
+
doc = Types::OdinDocument.new(
|
|
897
|
+
assignments: primary[:assignments],
|
|
898
|
+
metadata: primary[:metadata],
|
|
899
|
+
modifiers: primary[:modifiers],
|
|
900
|
+
comments: {}
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
# Store chained documents in instance variable
|
|
904
|
+
chained = @documents.map do |d|
|
|
905
|
+
Types::OdinDocument.new(
|
|
906
|
+
assignments: d[:assignments],
|
|
907
|
+
metadata: d[:metadata],
|
|
908
|
+
modifiers: d[:modifiers],
|
|
909
|
+
comments: {}
|
|
910
|
+
)
|
|
911
|
+
end
|
|
912
|
+
|
|
913
|
+
# Use a wrapper that includes chained docs
|
|
914
|
+
ParseResult.new(doc, chained, @documents)
|
|
915
|
+
end
|
|
916
|
+
|
|
917
|
+
# --- Validation ---
|
|
918
|
+
|
|
919
|
+
def validate_header_path!(raw_path, token)
|
|
920
|
+
# Check for malformed array indices in header: [, [}, [abc], etc.
|
|
921
|
+
if raw_path =~ /\[/
|
|
922
|
+
# Validate all bracket pairs
|
|
923
|
+
raw_path.scan(/\[([^\]]*)\]?/).each do |match|
|
|
924
|
+
content = match[0]
|
|
925
|
+
# Check if bracket is properly closed
|
|
926
|
+
unless raw_path.include?("[#{content}]")
|
|
927
|
+
raise Errors::ParseError.new(
|
|
928
|
+
Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
|
|
929
|
+
token.line, token.column,
|
|
930
|
+
"Invalid array index in header"
|
|
931
|
+
)
|
|
932
|
+
end
|
|
933
|
+
# If it has content, validate it's a valid index (digits or empty for tabular)
|
|
934
|
+
unless content.empty? || content.match?(/\A\d+\z/)
|
|
935
|
+
# Allow tabular syntax: path[] : cols
|
|
936
|
+
next if content.strip.empty?
|
|
937
|
+
raise Errors::ParseError.new(
|
|
938
|
+
Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
|
|
939
|
+
token.line, token.column,
|
|
940
|
+
"Invalid array index: #{content}"
|
|
941
|
+
)
|
|
942
|
+
end
|
|
943
|
+
end
|
|
944
|
+
end
|
|
945
|
+
end
|
|
946
|
+
|
|
947
|
+
def validate_depth!(path, token)
|
|
948
|
+
depth = path_depth(path)
|
|
949
|
+
if depth > MAX_NESTING_DEPTH
|
|
950
|
+
raise Errors::ParseError.new(
|
|
951
|
+
Errors::ParseErrorCode::MAXIMUM_DEPTH_EXCEEDED,
|
|
952
|
+
token.line, token.column,
|
|
953
|
+
"Path depth #{depth} exceeds maximum #{MAX_NESTING_DEPTH}"
|
|
954
|
+
)
|
|
955
|
+
end
|
|
956
|
+
end
|
|
957
|
+
|
|
958
|
+
def path_depth(path)
|
|
959
|
+
depth = 1
|
|
960
|
+
path.each_char do |c|
|
|
961
|
+
depth += 1 if c == "." || c == "["
|
|
962
|
+
end
|
|
963
|
+
depth
|
|
964
|
+
end
|
|
965
|
+
|
|
966
|
+
def validate_path_indices!(path, token)
|
|
967
|
+
path.scan(/\[(\d+)\]/).each do |match|
|
|
968
|
+
idx = match[0].to_i
|
|
969
|
+
if idx > MAX_ARRAY_INDEX
|
|
970
|
+
raise Errors::ParseError.new(
|
|
971
|
+
Errors::ParseErrorCode::ARRAY_INDEX_OUT_OF_RANGE,
|
|
972
|
+
token.line, token.column,
|
|
973
|
+
"Array index #{idx} exceeds maximum"
|
|
974
|
+
)
|
|
975
|
+
end
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
# Check for negative index
|
|
979
|
+
if path =~ /\[-/
|
|
980
|
+
raise Errors::ParseError.new(
|
|
981
|
+
Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
|
|
982
|
+
token.line, token.column,
|
|
983
|
+
"Negative array index"
|
|
984
|
+
)
|
|
985
|
+
end
|
|
986
|
+
end
|
|
987
|
+
|
|
988
|
+
def track_array_index(full_path, token)
|
|
989
|
+
# Check for array index range
|
|
990
|
+
cumulative = 0
|
|
991
|
+
full_path.scan(/\[(\-?\d+)\]/).each do |match|
|
|
992
|
+
idx_str = match[0]
|
|
993
|
+
idx = idx_str.to_i
|
|
994
|
+
|
|
995
|
+
if idx < 0
|
|
996
|
+
raise Errors::ParseError.new(
|
|
997
|
+
Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
|
|
998
|
+
token.line, token.column,
|
|
999
|
+
"Negative array index: #{idx}"
|
|
1000
|
+
)
|
|
1001
|
+
end
|
|
1002
|
+
|
|
1003
|
+
if idx > MAX_ARRAY_INDEX
|
|
1004
|
+
raise Errors::ParseError.new(
|
|
1005
|
+
Errors::ParseErrorCode::ARRAY_INDEX_OUT_OF_RANGE,
|
|
1006
|
+
token.line, token.column,
|
|
1007
|
+
"Array index #{idx} out of range"
|
|
1008
|
+
)
|
|
1009
|
+
end
|
|
1010
|
+
|
|
1011
|
+
cumulative += idx
|
|
1012
|
+
if cumulative > MAX_ARRAY_INDEX
|
|
1013
|
+
raise Errors::ParseError.new(
|
|
1014
|
+
Errors::ParseErrorCode::ARRAY_INDEX_OUT_OF_RANGE,
|
|
1015
|
+
token.line, token.column,
|
|
1016
|
+
"Cumulative array index #{cumulative} out of range"
|
|
1017
|
+
)
|
|
1018
|
+
end
|
|
1019
|
+
end
|
|
1020
|
+
|
|
1021
|
+
# Track first array index for contiguity check
|
|
1022
|
+
if full_path =~ /\A([^\[]*)\[(\d+)\]/
|
|
1023
|
+
array_base = $1
|
|
1024
|
+
idx = $2.to_i
|
|
1025
|
+
@array_indices[array_base] ||= []
|
|
1026
|
+
@array_indices[array_base] << idx unless @array_indices[array_base].include?(idx)
|
|
1027
|
+
end
|
|
1028
|
+
end
|
|
1029
|
+
|
|
1030
|
+
def validate_array_contiguity!
|
|
1031
|
+
@array_indices.each do |path, indices|
|
|
1032
|
+
next if indices.empty?
|
|
1033
|
+
sorted = indices.sort
|
|
1034
|
+
if sorted[0] != 0
|
|
1035
|
+
raise Errors::ParseError.new(
|
|
1036
|
+
Errors::ParseErrorCode::NON_CONTIGUOUS_ARRAY_INDICES,
|
|
1037
|
+
0, 0,
|
|
1038
|
+
"Array '#{path}' does not start at index 0"
|
|
1039
|
+
)
|
|
1040
|
+
end
|
|
1041
|
+
sorted.each_with_index do |idx, i|
|
|
1042
|
+
if idx != i
|
|
1043
|
+
raise Errors::ParseError.new(
|
|
1044
|
+
Errors::ParseErrorCode::NON_CONTIGUOUS_ARRAY_INDICES,
|
|
1045
|
+
0, 0,
|
|
1046
|
+
"Non-contiguous array indices for '#{path}': expected #{i}, got #{idx}"
|
|
1047
|
+
)
|
|
1048
|
+
end
|
|
1049
|
+
end
|
|
1050
|
+
end
|
|
1051
|
+
end
|
|
1052
|
+
|
|
1053
|
+
# --- Error Handling ---
|
|
1054
|
+
|
|
1055
|
+
def handle_error_token(token)
|
|
1056
|
+
val = token.value
|
|
1057
|
+
|
|
1058
|
+
case val
|
|
1059
|
+
when "@#"
|
|
1060
|
+
raise Errors::ParseError.new(
|
|
1061
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
1062
|
+
token.line, token.column,
|
|
1063
|
+
"@# is invalid"
|
|
1064
|
+
)
|
|
1065
|
+
when /\AUnterminated string/
|
|
1066
|
+
raise Errors::ParseError.new(
|
|
1067
|
+
Errors::ParseErrorCode::UNTERMINATED_STRING,
|
|
1068
|
+
token.line, token.column,
|
|
1069
|
+
val
|
|
1070
|
+
)
|
|
1071
|
+
when /\AUnterminated/
|
|
1072
|
+
raise Errors::ParseError.new(
|
|
1073
|
+
Errors::ParseErrorCode::UNTERMINATED_STRING,
|
|
1074
|
+
token.line, token.column,
|
|
1075
|
+
val
|
|
1076
|
+
)
|
|
1077
|
+
when /\AInvalid escape/
|
|
1078
|
+
raise Errors::ParseError.new(
|
|
1079
|
+
Errors::ParseErrorCode::INVALID_ESCAPE_SEQUENCE,
|
|
1080
|
+
token.line, token.column,
|
|
1081
|
+
val
|
|
1082
|
+
)
|
|
1083
|
+
when /\AInvalid boolean/, /\AInvalid numeric/
|
|
1084
|
+
raise Errors::ParseError.new(
|
|
1085
|
+
Errors::ParseErrorCode::INVALID_TYPE_PREFIX,
|
|
1086
|
+
token.line, token.column,
|
|
1087
|
+
val
|
|
1088
|
+
)
|
|
1089
|
+
when /\AInvalid unicode/
|
|
1090
|
+
raise Errors::ParseError.new(
|
|
1091
|
+
Errors::ParseErrorCode::INVALID_ESCAPE_SEQUENCE,
|
|
1092
|
+
token.line, token.column,
|
|
1093
|
+
val
|
|
1094
|
+
)
|
|
1095
|
+
when /\AUnterminated header/
|
|
1096
|
+
raise Errors::ParseError.new(
|
|
1097
|
+
Errors::ParseErrorCode::INVALID_HEADER_SYNTAX,
|
|
1098
|
+
token.line, token.column,
|
|
1099
|
+
val
|
|
1100
|
+
)
|
|
1101
|
+
when /\AEmpty directive/
|
|
1102
|
+
raise Errors::ParseError.new(
|
|
1103
|
+
Errors::ParseErrorCode::INVALID_DIRECTIVE,
|
|
1104
|
+
token.line, token.column,
|
|
1105
|
+
val
|
|
1106
|
+
)
|
|
1107
|
+
when /\AEmpty verb/
|
|
1108
|
+
raise Errors::ParseError.new(
|
|
1109
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
1110
|
+
token.line, token.column,
|
|
1111
|
+
val
|
|
1112
|
+
)
|
|
1113
|
+
else
|
|
1114
|
+
raise Errors::ParseError.new(
|
|
1115
|
+
Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
|
|
1116
|
+
token.line, token.column,
|
|
1117
|
+
val
|
|
1118
|
+
)
|
|
1119
|
+
end
|
|
1120
|
+
end
|
|
1121
|
+
end
|
|
1122
|
+
|
|
1123
|
+
# Result wrapper for chained documents
|
|
1124
|
+
class ParseResult
|
|
1125
|
+
attr_reader :metadata, :assignments, :modifiers, :chained_documents, :raw_documents
|
|
1126
|
+
|
|
1127
|
+
def initialize(primary_doc, chained_docs, raw_docs)
|
|
1128
|
+
@primary = primary_doc
|
|
1129
|
+
@chained_documents = chained_docs
|
|
1130
|
+
@raw_documents = raw_docs
|
|
1131
|
+
@assignments = primary_doc.assignments
|
|
1132
|
+
@metadata = primary_doc.metadata
|
|
1133
|
+
@modifiers = primary_doc.all_modifiers
|
|
1134
|
+
end
|
|
1135
|
+
|
|
1136
|
+
def get(path)
|
|
1137
|
+
@primary.get(path)
|
|
1138
|
+
end
|
|
1139
|
+
|
|
1140
|
+
def [](path)
|
|
1141
|
+
get(path)
|
|
1142
|
+
end
|
|
1143
|
+
|
|
1144
|
+
def include?(path)
|
|
1145
|
+
@primary.include?(path)
|
|
1146
|
+
end
|
|
1147
|
+
|
|
1148
|
+
def size
|
|
1149
|
+
@primary.size
|
|
1150
|
+
end
|
|
1151
|
+
|
|
1152
|
+
def paths
|
|
1153
|
+
@primary.paths
|
|
1154
|
+
end
|
|
1155
|
+
|
|
1156
|
+
def empty?
|
|
1157
|
+
@primary.empty?
|
|
1158
|
+
end
|
|
1159
|
+
|
|
1160
|
+
def each_assignment(&block)
|
|
1161
|
+
@primary.each_assignment(&block)
|
|
1162
|
+
end
|
|
1163
|
+
|
|
1164
|
+
def each_metadata(&block)
|
|
1165
|
+
@primary.each_metadata(&block)
|
|
1166
|
+
end
|
|
1167
|
+
|
|
1168
|
+
def modifiers_for(path)
|
|
1169
|
+
@primary.modifiers_for(path)
|
|
1170
|
+
end
|
|
1171
|
+
|
|
1172
|
+
def all_modifiers
|
|
1173
|
+
@primary.all_modifiers
|
|
1174
|
+
end
|
|
1175
|
+
|
|
1176
|
+
def comment_for(path)
|
|
1177
|
+
@primary.comment_for(path)
|
|
1178
|
+
end
|
|
1179
|
+
|
|
1180
|
+
def all_comments
|
|
1181
|
+
@primary.all_comments
|
|
1182
|
+
end
|
|
1183
|
+
|
|
1184
|
+
def metadata_value(key)
|
|
1185
|
+
@metadata[key]
|
|
1186
|
+
end
|
|
1187
|
+
|
|
1188
|
+
def documents
|
|
1189
|
+
@chained_documents
|
|
1190
|
+
end
|
|
1191
|
+
end
|
|
1192
|
+
end
|
|
1193
|
+
end
|