rip-lang 3.15.0 → 3.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1805 @@
1
+ // Schema reaches sideways to the host's parser table to re-parse @ensure
2
+ // predicate bodies. This is the one host coupling point — the host's lexer
3
+ // and compiler import `installSchemaSupport` from us, and we import the
4
+ // parser back from them. Same compilation unit, no package boundary.
5
+ import { parser } from '../parser.js';
6
+
7
+ // Runtime-string composition is delegated to a registered provider so the
8
+ // bundler can tree-shake server-only fragments out of the browser bundle.
9
+ // One of `./loader-server.js` or `./loader-browser.js` must be
10
+ // side-effect-imported before any compileToJS call that emits schemas.
11
+ // (`src/browser.js` imports loader-browser; CLI / typecheck / test runner
12
+ // import loader-server.)
13
+ let _schemaRuntimeProvider = null;
14
+ export function setSchemaRuntimeProvider(fn) { _schemaRuntimeProvider = fn; }
15
+
16
+ // Schema System — inline `schema` declarations compile to runtime validator
17
+ // and ORM plans.
18
+ //
19
+ // Architecture (parallels types.js and components.js sidecars):
20
+ //
21
+ // installSchemaSupport(Lexer, CodeEmitter)
22
+ // Adds rewriteSchema() to Lexer.prototype and emitSchema() to
23
+ // CodeEmitter.prototype.
24
+ //
25
+ // rewriteSchema()
26
+ // Token-stream pass. Recognizes `schema [:kind] INDENT ... OUTDENT`
27
+ // blocks at expression-start positions, parses the body with a
28
+ // schema-specific sub-parser, and collapses the whole region into
29
+ // `SCHEMA SCHEMA_BODY` where SCHEMA_BODY carries a structured
30
+ // descriptor on its .data. The main Rip grammar only sees two
31
+ // tiny productions. Schema body syntax never reaches the main
32
+ // parser.
33
+ //
34
+ // emitSchema(head, rest, context)
35
+ // CodeEmitter dispatch. Reads the structured descriptor off the
36
+ // SCHEMA_BODY node's metadata and emits a `__schema({...})` runtime
37
+ // call. For Phase 1 the emission is a self-describing object; the
38
+ // runtime (__schema) lands in Phase 3.
39
+ //
40
+ // hasSchemas(source)
41
+ // Cheap regex probe for the presence of a schema declaration.
42
+ // Parallels hasTypeAnnotations. Used by typecheck.js and the LSP to
43
+ // skip work on files without schemas.
44
+ //
45
+ // Two body sub-modes:
46
+ //
47
+ // fielded — kinds :input, :shape, :model, :mixin. Permitted line forms:
48
+ // field IDENTIFIER[!|?|#]* TYPE [, constraints] [, attrs]
49
+ // directive @NAME [args]
50
+ // callable NAME: (-> | ~>) body
51
+ //
52
+ // enum — kind :enum. Permitted line forms:
53
+ // bare IDENTIFIER
54
+ // valued IDENTIFIER : Literal
55
+ //
56
+ // Anything else at schema top level is a schema-mode-aware compile error
57
+ // with a helpful message.
58
+
59
+ const VALID_KINDS = new Set(['input', 'shape', 'model', 'mixin', 'enum']);
60
+ const KIND_DEFAULT = 'input';
61
+
62
+ const HOOK_NAMES = new Set([
63
+ 'beforeValidation', 'afterValidation',
64
+ 'beforeSave', 'afterSave',
65
+ 'beforeCreate', 'afterCreate',
66
+ 'beforeUpdate', 'afterUpdate',
67
+ 'beforeDestroy', 'afterDestroy',
68
+ ]);
69
+
70
+ // Positions where `schema` can legitimately start an expression.
71
+ // If the prev token is one of these tags, the identifier `schema` is a
72
+ // candidate for retagging to SCHEMA.
73
+ const EXPR_START_PREV = new Set([
74
+ 'TERMINATOR', 'INDENT', 'OUTDENT',
75
+ '=', '+=', '-=', '*=', '/=', '%=', '**=', '//=', '%%=',
76
+ '?=', '??=', '&&=', '||=', '&=', '|=', '^=', '<<=', '>>=', '>>>=',
77
+ 'READONLY_ASSIGN', 'REACTIVE_ASSIGN', 'COMPUTED_ASSIGN',
78
+ 'RETURN', 'THROW', 'YIELD', 'AWAIT', 'EXPORT',
79
+ ',', '(', '[', '{', 'CALL_START', 'PARAM_START', 'INDEX_START',
80
+ '->', '=>', ':', 'WHEN', 'THEN', 'IF', 'UNLESS',
81
+ 'UNARY', '!', 'NOT',
82
+ ]);
83
+
84
+ // ============================================================================
85
+ // hasSchemas — fast probe
86
+ // ============================================================================
87
+
88
+ // True when source looks like it contains a schema declaration. We look
89
+ // for `schema` followed by either a `:kind` symbol or by a newline +
90
+ // deeper indent. Conservative: a false positive just means typecheck
91
+ // pays a bit more work, never wrong behavior.
92
+ export function hasSchemas(source) {
93
+ if (typeof source !== 'string') return false;
94
+ if (!/\bschema\b/.test(source)) return false;
95
+ return /(?:^|[\s=,(\[{:])schema(?:\s*:[A-Za-z_$][\w$]*|\s*\n[ \t]+\S)/m.test(source);
96
+ }
97
+
98
+ // ============================================================================
99
+ // installSchemaSupport — prototype installation
100
+ // ============================================================================
101
+
102
+ export function installSchemaSupport(Lexer, CodeEmitter) {
103
+ if (Lexer) {
104
+ Lexer.prototype.rewriteSchema = function() {
105
+ rewriteSchema(this);
106
+ };
107
+ // Captured body tokens need the tail rewriter passes before parsing.
108
+ // parseBodyTokens runs those passes on a fresh Lexer instance.
109
+ parseBodyTokens._LexerCtor = Lexer;
110
+ }
111
+ if (CodeEmitter) {
112
+ CodeEmitter.prototype.emitSchema = function(head, rest, context) {
113
+ return emitSchemaNode(this, head, rest, context);
114
+ };
115
+ CodeEmitter.prototype.getSchemaRuntime = function() {
116
+ // Compiler-controlled mode. Defaults to 'migration' (everything) for
117
+ // compatibility with existing CLI / Node compilation, where the user
118
+ // might invoke any schema feature including .toSQL(). Browser-bundle
119
+ // build overrides to 'browser' for size reduction — see Phase 2 step 3.
120
+ const mode = this.options?.schemaMode || 'migration';
121
+ return getSchemaRuntime({ mode });
122
+ };
123
+ }
124
+ }
125
+
126
+ // ============================================================================
127
+ // Lexer pass: rewriteSchema
128
+ // ============================================================================
129
+
130
+ // Known keys for the `schema.<key> = <value>` file-level pragma. Each
131
+ // pragma takes effect from its declaration forward and is scoped to the
132
+ // current compilation unit — schemas in other files are unaffected.
133
+ // Extend this map when new pragma keys land.
134
+ const SCHEMA_PRAGMA_KEYS = new Set(['defaultMaxString']);
135
+
136
+ function rewriteSchema(lexer) {
137
+ let tokens = lexer.tokens;
138
+ // File-scoped config, updated in-place as pragmas are encountered, then
139
+ // snapshotted into each schema descriptor at collapse time so post-pragma
140
+ // changes don't mutate earlier schemas retroactively.
141
+ let config = { defaultMaxString: null };
142
+ // Top-level INDENT/OUTDENT depth. Pragmas are file-level only so we
143
+ // reject them inside function / class / block bodies — otherwise a
144
+ // pragma nested in `foo = ->` would leak to module-scope schemas
145
+ // declared later on. Schemas themselves get collapsed out of the
146
+ // token stream before their internal INDENT/OUTDENT reach this
147
+ // counter, so depth reflects only user-written nesting.
148
+ let depth = 0;
149
+ let i = 0;
150
+ while (i < tokens.length) {
151
+ let t = tokens[i];
152
+ if (t[0] === 'INDENT') depth++;
153
+ else if (t[0] === 'OUTDENT') depth--;
154
+ let consumed = matchSchemaPragma(tokens, i, config, depth);
155
+ if (consumed > 0) {
156
+ tokens.splice(i, consumed);
157
+ continue;
158
+ }
159
+ if (isSchemaStart(tokens, i)) {
160
+ collapseSchemaAt(lexer, tokens, i, config);
161
+ }
162
+ i++;
163
+ }
164
+ }
165
+
166
+ // Recognize `schema.<key> = <value>` at statement position. Returns the
167
+ // number of tokens consumed (including any trailing TERMINATOR) when the
168
+ // pragma is applied, or 0 when the sequence isn't a pragma. Unknown keys
169
+ // and non-literal values error loudly — silently ignoring a typo like
170
+ // `schema.defaultMacString = 100` would bake a wrong value into every
171
+ // downstream schema.
172
+ function matchSchemaPragma(tokens, i, config, depth) {
173
+ let t = tokens[i];
174
+ if (!t || t[0] !== 'IDENTIFIER' || t[1] !== 'schema') return 0;
175
+ if (tokens[i + 1]?.[0] !== '.') return 0;
176
+ let keyTok = tokens[i + 2];
177
+ if (!keyTok || keyTok[0] !== 'PROPERTY') return 0;
178
+ if (tokens[i + 3]?.[0] !== '=') return 0;
179
+ // Pragmas must start a statement — the `schema` identifier must be
180
+ // preceded by nothing, TERMINATOR, INDENT, or OUTDENT so we don't
181
+ // accidentally rewrite `foo.schema.defaultMaxString = 100` or similar.
182
+ let prev = tokens[i - 1];
183
+ if (prev) {
184
+ let ptag = prev[0];
185
+ if (ptag !== 'TERMINATOR' && ptag !== 'INDENT' && ptag !== 'OUTDENT') return 0;
186
+ }
187
+ let key = keyTok[1];
188
+ if (!SCHEMA_PRAGMA_KEYS.has(key)) {
189
+ throw schemaError(keyTok,
190
+ `Unknown schema pragma 'schema.${key}'. Known pragmas: ${[...SCHEMA_PRAGMA_KEYS].join(', ')}.`);
191
+ }
192
+ if (depth > 0) {
193
+ throw schemaError(keyTok,
194
+ `Schema pragma 'schema.${key}' must be declared at file top level. It was found inside a nested block (function / class / if / loop body), where it would leak into later top-level schemas.`);
195
+ }
196
+ let valTok = tokens[i + 4];
197
+ if (!valTok || valTok[0] !== 'NUMBER') {
198
+ throw schemaError(valTok || keyTok,
199
+ `Pragma 'schema.${key}' requires a number literal. Example: schema.${key} = 100.`);
200
+ }
201
+ let n = Number(valTok[1]);
202
+ if (!Number.isFinite(n) || n < 0 || !Number.isInteger(n)) {
203
+ throw schemaError(valTok,
204
+ `Pragma 'schema.${key}' expects a non-negative integer (got ${valTok[1]}). Use 0 to disable.`);
205
+ }
206
+ // `0` means "no default cap" — explicit way to reset a pragma mid-file.
207
+ config[key] = n === 0 ? null : n;
208
+ // Consume trailing TERMINATOR so the pragma line leaves no blank statement behind.
209
+ let end = i + 5;
210
+ if (tokens[end]?.[0] === 'TERMINATOR') end++;
211
+ return end - i;
212
+ }
213
+
214
+ function isSchemaStart(tokens, i) {
215
+ let t = tokens[i];
216
+ if (!t || t[0] !== 'IDENTIFIER' || t[1] !== 'schema') return false;
217
+ // Skip property access — `x.schema` is lexed as PROPERTY, not IDENTIFIER.
218
+ // Still guard against generated IDENTIFIER tokens in odd positions.
219
+ let prev = tokens[i - 1];
220
+ if (prev) {
221
+ let ptag = prev[0];
222
+ if (ptag === '.' || ptag === '?.') return false;
223
+ if (prev[0] === 'IDENTIFIER' || prev[0] === 'PROPERTY' ||
224
+ prev[0] === ')' || prev[0] === ']' || prev[0] === '}' ||
225
+ prev[0] === 'STRING' || prev[0] === 'NUMBER') {
226
+ // `x schema` is an implicit call of x on schema — not a decl.
227
+ if (!EXPR_START_PREV.has(ptag)) return false;
228
+ }
229
+ }
230
+ // What follows determines the body form:
231
+ // SYMBOL? then INDENT — indented block body.
232
+ // SYMBOL? then `TERMINATOR ;` — inline body (one-liner), with field
233
+ // entries separated by more `;`
234
+ // terminators up to the newline.
235
+ let j = i + 1;
236
+ if (tokens[j]?.[0] === 'SYMBOL') j++;
237
+ if (tokens[j]?.[0] === 'TERMINATOR') {
238
+ if (tokens[j][1] === ';') return true;
239
+ j++;
240
+ }
241
+ return tokens[j]?.[0] === 'INDENT';
242
+ }
243
+
244
+ // Collapse `IDENTIFIER 'schema' [SYMBOL kind] [TERMINATOR] INDENT ... OUTDENT`
245
+ // at position i into `SCHEMA SCHEMA_BODY`. SCHEMA_BODY carries a structured
246
+ // descriptor on .data. `config` snapshots any `schema.<key>` pragmas in
247
+ // effect at this point so later pragma changes don't retroactively alter
248
+ // earlier schemas.
249
+ function collapseSchemaAt(lexer, tokens, i, config) {
250
+ let schemaTok = tokens[i];
251
+ let kindToken = null;
252
+ let kind = KIND_DEFAULT;
253
+ let j = i + 1;
254
+
255
+ if (tokens[j]?.[0] === 'SYMBOL') {
256
+ kindToken = tokens[j];
257
+ let k = kindToken[1];
258
+ if (!VALID_KINDS.has(k)) {
259
+ throw schemaError(kindToken,
260
+ `Unknown schema kind :${k}. Expected one of :input, :shape, :model, :mixin, :enum.`);
261
+ }
262
+ kind = k;
263
+ j++;
264
+ }
265
+
266
+ let bodyTokens;
267
+ let endIdx;
268
+ if (tokens[j]?.[0] === 'TERMINATOR' && tokens[j][1] === ';') {
269
+ // Inline one-liner: `schema [:kind]; field; field; ...` up to the
270
+ // next `\n` TERMINATOR at depth 0. The `;` separators are already
271
+ // TERMINATOR tokens, so splitBodyLines handles them unchanged.
272
+ // Arrows (`->`, `~>`, `!>`) would make the body ambiguous with
273
+ // subsequent `;`-separated fields, so methods/computed/hooks/
274
+ // transforms are rejected on the inline form.
275
+ let inlineStart = j + 1;
276
+ let end = inlineStart;
277
+ let depth = 0;
278
+ // Rip's lexer collapses `;\n` into a single `;`-valued TERMINATOR,
279
+ // so value-based "end of inline" detection alone misses trailing
280
+ // `X = schema :shape; name!;\ny = 1`. We track the inline body's
281
+ // starting row and break the moment a token's row advances past
282
+ // it at depth 0 — that captures both plain `\n` and the folded
283
+ // `;\n` case.
284
+ let startRow = tokens[inlineStart]?.loc?.r ?? null;
285
+ while (end < tokens.length) {
286
+ let tk = tokens[end];
287
+ let tag = tk[0];
288
+ if (depth === 0 && startRow != null && tk.loc && tk.loc.r > startRow) break;
289
+ if (tag === '(' || tag === '[' || tag === '{' ||
290
+ tag === 'CALL_START' || tag === 'INDEX_START' || tag === 'PARAM_START') depth++;
291
+ else if (tag === ')' || tag === ']' || tag === '}' ||
292
+ tag === 'CALL_END' || tag === 'INDEX_END' || tag === 'PARAM_END') depth--;
293
+ // Inline body ends at the first depth-0 newline OR at any
294
+ // INDENT/OUTDENT — INDENT would mean the user opened a block
295
+ // (incompatible with inline), and OUTDENT means we're exiting
296
+ // a surrounding block and must leave that token in place for
297
+ // the outer scanner's depth bookkeeping.
298
+ else if (depth === 0 && tag === 'TERMINATOR' && tk[1] !== ';') break;
299
+ else if (depth === 0 && (tag === 'INDENT' || tag === 'OUTDENT')) break;
300
+ // Arrows (`->` method/hook/transform, `~>` computed, `!>` eager
301
+ // derived) make field bodies ambiguous with subsequent
302
+ // `;`-separated entries on the same line, so reject them early
303
+ // with a clear message that points users at the indented form.
304
+ // `~>` lexes as EFFECT; `!>` lexes as UNARY_MATH '!' + COMPARE '>'.
305
+ else if (depth === 0 && tag === '->') {
306
+ throw schemaError(tk, `Inline schema body does not support '->' (method/hook/transform). Use the indented form.`);
307
+ }
308
+ else if (depth === 0 && tag === 'EFFECT') {
309
+ throw schemaError(tk, `Inline schema body does not support '~>' (computed getter). Use the indented form.`);
310
+ }
311
+ else if (depth === 0 && tag === 'UNARY_MATH' && tk[1] === '!' &&
312
+ tokens[end + 1]?.[0] === 'COMPARE' && tokens[end + 1][1] === '>') {
313
+ throw schemaError(tk, `Inline schema body does not support '!>' (eager derived). Use the indented form.`);
314
+ }
315
+ end++;
316
+ }
317
+ // A trailing TERMINATOR at the boundary (`;` that the lexer folded
318
+ // with `\n`, or a plain `\n` that happened to land inside our
319
+ // capture range) must remain in the token stream as a statement
320
+ // separator between this schema and whatever follows on the next
321
+ // line. Trim it out of the body / splice span so the parser
322
+ // keeps seeing it. splitBodyLines is safe with a body that
323
+ // doesn't end in TERMINATOR.
324
+ while (end > inlineStart && tokens[end - 1][0] === 'TERMINATOR') end--;
325
+ bodyTokens = tokens.slice(inlineStart, end);
326
+ endIdx = end;
327
+ // Empty inline body (`X = schema :shape;` with nothing after the
328
+ // leading `;`) is almost always a typo — an indented body that
329
+ // wasn't written, or a stray `;` on an otherwise complete decl.
330
+ // Fail loud rather than emit a schema with no entries.
331
+ if (!bodyTokens.length) {
332
+ throw schemaError(schemaTok,
333
+ `Inline schema body is empty. Either add '; field; …' entries after 'schema${kindToken ? ' :' + kind : ''};' or switch to the indented form.`);
334
+ }
335
+ } else {
336
+ if (tokens[j]?.[0] === 'TERMINATOR') j++;
337
+ if (tokens[j]?.[0] !== 'INDENT') {
338
+ throw schemaError(schemaTok,
339
+ `Expected indented schema body after 'schema${kindToken ? ' :' + kind : ''}'.`);
340
+ }
341
+ let indentIdx = j;
342
+ let outdentIdx = findMatchingOutdent(tokens, indentIdx);
343
+ if (outdentIdx < 0) {
344
+ throw schemaError(tokens[indentIdx], 'Unterminated schema body.');
345
+ }
346
+ bodyTokens = tokens.slice(indentIdx + 1, outdentIdx);
347
+ endIdx = outdentIdx + 1; // include the OUTDENT itself in the replaced span
348
+ }
349
+
350
+ let descriptor = parseSchemaBody(kind, bodyTokens, {
351
+ schemaLoc: schemaTok.loc,
352
+ kindLoc: kindToken?.loc ?? null,
353
+ kind,
354
+ // Snapshot pragmas in effect at this decl so later pragma writes
355
+ // don't retroactively change already-parsed schemas.
356
+ defaultMaxString: config?.defaultMaxString ?? null,
357
+ });
358
+
359
+ // Replace range `[i, endIdx-1]` with `SCHEMA SCHEMA_BODY`.
360
+ let schemaNewTok = mkToken('SCHEMA', 'schema', schemaTok);
361
+ let bodyNewTok = mkToken('SCHEMA_BODY', kind, schemaTok);
362
+ bodyNewTok.data = { descriptor };
363
+ tokens.splice(i, endIdx - i, schemaNewTok, bodyNewTok);
364
+ }
365
+
366
+ // ============================================================================
367
+ // Sub-parser — fielded and enum modes
368
+ // ============================================================================
369
+
370
+ function parseSchemaBody(kind, bodyTokens, ctx) {
371
+ let entries = [];
372
+ let lines = splitBodyLines(bodyTokens);
373
+
374
+ // Kind inference: a body whose first non-empty line begins with a
375
+ // SYMBOL token is unambiguously an enum. Promote the default :input
376
+ // kind to :enum so `schema\n :draft\n :active` needs no marker.
377
+ // Explicit `:input` or any other kind stays as written.
378
+ if (kind === KIND_DEFAULT && !ctx.kindLoc && lines.length > 0 &&
379
+ lines[0][0]?.[0] === 'SYMBOL') {
380
+ kind = 'enum';
381
+ ctx.kind = 'enum';
382
+ }
383
+
384
+ if (kind === 'enum') {
385
+ for (let line of lines) {
386
+ parseEnumLine(line, entries);
387
+ }
388
+ } else {
389
+ for (let line of lines) {
390
+ parseFieldedLine(kind, line, entries, ctx);
391
+ }
392
+ // Capability-matrix enforcement by kind. `@mixin` is allowed as a
393
+ // field-inclusion directive on every fielded kind because it adds
394
+ // fields (not behavior). Other directives are restricted per the
395
+ // matrix in the language reference.
396
+ if (kind === 'mixin') {
397
+ for (let e of entries) {
398
+ if (e.tag === 'method' || e.tag === 'computed' || e.tag === 'hook') {
399
+ throw schemaError({ loc: e.headerLoc || e.loc },
400
+ `:mixin schemas are fields-only. '${e.name}' is a ${e.tag}; move it to a :shape or :model.`);
401
+ }
402
+ if (e.tag === 'ensure') {
403
+ throw schemaError({ loc: e.headerLoc || e.loc },
404
+ `:mixin schemas don't accept @ensure refinements. Move the invariant to a :shape or :model that composes this mixin.`);
405
+ }
406
+ if (e.tag === 'directive' && e.name !== 'mixin') {
407
+ throw schemaError({ loc: e.loc },
408
+ `:mixin schemas only accept '@mixin Name' directives. '@${e.name}' is not allowed.`);
409
+ }
410
+ }
411
+ } else if (kind === 'input') {
412
+ // :input accepts fields, @mixin, and @ensure (cross-field predicates
413
+ // are a natural fit for form validation — "passwords must match").
414
+ // Other methods, computed getters, hooks, and non-mixin directives
415
+ // are rejected.
416
+ for (let e of entries) {
417
+ if (e.tag === 'method' || e.tag === 'computed' || e.tag === 'hook') {
418
+ throw schemaError({ loc: e.headerLoc || e.loc },
419
+ `:input schemas are fields-only. '${e.name}' is a ${e.tag}; use :shape or :model if you need behavior.`);
420
+ }
421
+ if (e.tag === 'directive' && e.name !== 'mixin') {
422
+ throw schemaError({ loc: e.loc },
423
+ `:input schemas only accept '@mixin Name' and '@ensure'. '@${e.name}' is not allowed.`);
424
+ }
425
+ }
426
+ } else if (kind === 'shape') {
427
+ // :shape accepts fields, methods, computed, and @mixin. Hooks
428
+ // and ORM-bound directives (timestamps, softDelete, index,
429
+ // belongs_to, has_many, has_one, link) are :model-only.
430
+ for (let e of entries) {
431
+ if (e.tag === 'hook') {
432
+ throw schemaError({ loc: e.headerLoc || e.loc },
433
+ `:shape schemas don't have lifecycle hooks. '${e.name}' runs only on :model; move it or remove it.`);
434
+ }
435
+ if (e.tag === 'directive' && e.name !== 'mixin') {
436
+ throw schemaError({ loc: e.loc },
437
+ `:shape schemas only accept '@mixin Name'. '@${e.name}' is :model-only.`);
438
+ }
439
+ }
440
+ }
441
+ }
442
+
443
+ return {
444
+ kind,
445
+ loc: ctx.schemaLoc,
446
+ kindLoc: ctx.kindLoc,
447
+ entries,
448
+ };
449
+ }
450
+
451
+ // Split top-level lines inside a schema body. Nested INDENT/OUTDENT stays
452
+ // inside its owning line (belongs to a callable body, multi-line
453
+ // constraints, etc.). Each returned line is the raw sub-stream of tokens
454
+ // for that line (no outer TERMINATORs).
455
+ function splitBodyLines(tokens) {
456
+ let lines = [];
457
+ let cur = [];
458
+ let depth = 0;
459
+ for (let t of tokens) {
460
+ let tag = t[0];
461
+ if (tag === 'INDENT') depth++;
462
+ if (tag === 'OUTDENT') depth--;
463
+ if (tag === 'TERMINATOR' && depth === 0) {
464
+ if (cur.length) { lines.push(cur); cur = []; }
465
+ continue;
466
+ }
467
+ cur.push(t);
468
+ }
469
+ if (cur.length) lines.push(cur);
470
+ return lines;
471
+ }
472
+
473
+ // Fielded body: field, directive, or callable.
474
+ // Field-line grammar (v2, locked):
475
+ //
476
+ // name[!|?|#]* [type] [range] [default] [regex] [attrs] [, -> transform]
477
+ //
478
+ // Invariants enforced here:
479
+ // 1. Line classification: IDENTIFIER-start = field; PROPERTY-start (the
480
+ // lexer absorbs trailing `:` into the identifier's tag) = callable.
481
+ // 2. Type slot is optional — default is `string`. Identifier types
482
+ // (`email`, `integer`, …), array suffix (`string[]`), and string-
483
+ // literal unions (`"M" | "F" | "U"`) are the three valid shapes.
484
+ // 3. Literal unions require 2+ members, all string literals, no mixing
485
+ // with identifier types or null. Nullability is carried by the `?`
486
+ // modifier, not by union membership.
487
+ // 4. The `->` transform is TERMINAL — nothing follows it on the line.
488
+ // 5. Comma before `->` is required when anything precedes the arrow
489
+ // (type, range, regex, default, attrs). Only the bare form
490
+ // `name! -> body` parses comma-less, because there's nothing to
491
+ // elide.
492
+ // 6. Each comma-separated rest part is one of: `[…]` default,
493
+ // `{…}` attrs, `/regex/` pattern, `n..n` range, `-> transform`.
494
+ // The head token uniquely identifies the form. Duplicates of any
495
+ // single form are rejected.
496
+ // VARCHAR-like primitive types — the `schema.defaultMaxString` pragma
497
+ // applies a default `max` to these when no explicit range/regex/literals
498
+ // are declared. `text` stays uncapped by design (it's the opt-out for
499
+ // long-form content); `uuid` has fixed length; `json`/`any` aren't strings.
500
+ const VARCHAR_TYPES = new Set(['string', 'email', 'url', 'phone', 'zip']);
501
+
502
+ function parseFieldedLine(kind, line, entries, ctx) {
503
+ let first = line[0];
504
+ if (!first) return;
505
+
506
+ // Directive: @NAME [args]
507
+ if (first[0] === '@') {
508
+ let nameTok = line[1];
509
+ if (!nameTok || (nameTok[0] !== 'IDENTIFIER' && nameTok[0] !== 'PROPERTY')) {
510
+ throw schemaError(first, "Expected directive name after '@'.");
511
+ }
512
+ let argTokens = line.slice(2);
513
+ let dname = nameTok[1];
514
+
515
+ // `@ensure` is a refinement directive with its own grammar — it takes
516
+ // either an inline `"msg", (args) -> body` or a bracketed array of
517
+ // those pairs. Emits one `tag: "ensure"` entry per refinement; the
518
+ // per-entry shape mirrors methods so compileCallableFn-style codegen
519
+ // can fire.
520
+ if (dname === 'ensure') {
521
+ let pairs = parseEnsurePairs(argTokens, first);
522
+ for (let p of pairs) {
523
+ entries.push({
524
+ tag: 'ensure',
525
+ name: 'ensure',
526
+ message: p.message,
527
+ paramTokens: p.paramTokens,
528
+ bodyTokens: p.bodyTokens,
529
+ loc: p.loc,
530
+ headerLoc: first.loc,
531
+ });
532
+ }
533
+ return;
534
+ }
535
+
536
+ // Pre-parse structured args so shadow-TS and runtime-codegen share
537
+ // the same descriptor shape. Relation and mixin directives get a
538
+ // `[{target, optional?}]` array; other directives leave `args` unset.
539
+ let args = null;
540
+ if (dname === 'belongs_to' || dname === 'has_many' || dname === 'has_one' ||
541
+ dname === 'one' || dname === 'many' || dname === 'mixin') {
542
+ let t0 = argTokens[0];
543
+ if (t0 && (t0[0] === 'IDENTIFIER' || t0[0] === 'PROPERTY')) {
544
+ let optional = t0.data?.predicate === true;
545
+ if (!optional && argTokens[1]?.[0] === '?') optional = true;
546
+ args = [{ target: t0[1], optional }];
547
+ }
548
+ }
549
+ entries.push({
550
+ tag: 'directive',
551
+ name: dname,
552
+ args,
553
+ argTokens,
554
+ loc: first.loc,
555
+ });
556
+ return;
557
+ }
558
+
559
+ // The identifier regex absorbs a trailing `:` by retagging the ident as
560
+ // PROPERTY and emitting a separate `:` token. So a line starting with
561
+ // PROPERTY is always a callable (`name: -> body` or `name: ~> body`);
562
+ // a line starting with IDENTIFIER is always a field.
563
+ if (first[0] === 'PROPERTY') {
564
+ parseCallableLine(kind, first, line, entries);
565
+ return;
566
+ }
567
+ if (first[0] !== 'IDENTIFIER') {
568
+ throw schemaError(first,
569
+ `Unexpected ${first[0]} at schema top level. Allowed: fields ('name! type'), directives ('@name'), methods ('name: -> body'), or computed getters ('name: ~> body').`);
570
+ }
571
+
572
+ let name = first[1];
573
+
574
+ // Guard: `name:` without the colon absorbed — shouldn't happen but
575
+ // produces a friendly error if it does.
576
+ if (line[1]?.[0] === ':') {
577
+ throw schemaError(line[1],
578
+ `Schema fields use 'name type' (space, no colon). For methods or computed use 'name: -> body' or 'name: ~> body'.`);
579
+ }
580
+
581
+ // Field: IDENTIFIER [modifiers] TYPE [, constraints] [, attrs]
582
+ let modifiers = collectModifiers(first);
583
+ let pos = 1;
584
+
585
+ // Adjacent `!`, `#`, `?` modifier tokens. `!` and `?` are absorbed into
586
+ // the IDENTIFIER's data by the main lexer. `#` arrives as a standalone
587
+ // token because the schema commentToken exception kicks in when `#` is
588
+ // adjacent to an identifier. A modifier must be unspaced from the
589
+ // token it follows, so we check the preceding token's `.spaced` flag
590
+ // (which the whitespace pass sets to true when whitespace follows).
591
+ while (pos < line.length) {
592
+ let tk = line[pos];
593
+ let adjacent = line[pos - 1] && !line[pos - 1].spaced;
594
+ if (!adjacent) break;
595
+ if (tk[0] === '#' || tk[0] === '?' || tk[0] === '!') {
596
+ modifiers.push(tk[0]);
597
+ pos++;
598
+ continue;
599
+ }
600
+ break;
601
+ }
602
+
603
+ // Reject a stray colon here — gives a clear diagnostic for the common
604
+ // mistake `name: type` instead of `name type`.
605
+ let typeFirst = line[pos];
606
+ if (typeFirst?.[0] === ':') {
607
+ throw schemaError(typeFirst,
608
+ `Schema fields use 'name type' (space, no colon). Got 'name:'. For methods/computed use 'name: -> body' or 'name: ~> body'.`);
609
+ }
610
+
611
+ // Type: IDENTIFIER (optionally followed by `[]` for array) OR a
612
+ // string-literal union like `"M" | "F" | "U"`. The type slot is
613
+ // OPTIONAL — if the next token isn't a type-starting token, the
614
+ // field defaults to `string` and we fall through to constraint
615
+ // parsing.
616
+ let typeName = 'string';
617
+ let literals = null;
618
+ if (typeFirst?.[0] === 'IDENTIFIER') {
619
+ typeName = typeFirst[1];
620
+ pos++;
621
+ } else if (typeFirst?.[0] === 'STRING') {
622
+ // Literal union: collect alternating STRING | STRING | STRING...
623
+ literals = [JSON.parse(typeFirst[1])];
624
+ pos++;
625
+ while (line[pos]?.[0] === '|' && line[pos + 1]?.[0] === 'STRING') {
626
+ pos++; // consume '|'
627
+ literals.push(JSON.parse(line[pos][1]));
628
+ pos++;
629
+ }
630
+ // Forbid mixing with identifier types or null/undefined.
631
+ if (line[pos]?.[0] === '|') {
632
+ let next = line[pos + 1];
633
+ let tag = next?.[0] ?? '<end>';
634
+ throw schemaError(next || line[pos],
635
+ `Literal unions contain string literals only. '${tag}' is not allowed as a union member. Use the '?' modifier for nullability.`);
636
+ }
637
+ if (literals.length < 2) {
638
+ throw schemaError(typeFirst,
639
+ `Literal union needs at least two string literals. Use '${JSON.stringify(literals[0])}' as a default with '[${JSON.stringify(literals[0])}]' instead.`);
640
+ }
641
+ typeName = 'literal-union';
642
+ }
643
+ let array = false;
644
+ // `string[]` tokenizes as IDENTIFIER INDEX_START INDEX_END (or `[` `]`
645
+ // depending on context; closeOpenIndexes retags the empty bracket pair
646
+ // as INDEX_START/INDEX_END when it follows an indexable token).
647
+ let openTag = line[pos]?.[0];
648
+ let closeTag = line[pos + 1]?.[0];
649
+ if ((openTag === '[' || openTag === 'INDEX_START') &&
650
+ (closeTag === ']' || closeTag === 'INDEX_END')) {
651
+ array = true;
652
+ pos += 2;
653
+ }
654
+
655
+ // Remaining tokens on the line are a mix of `[…]` constraints (default,
656
+ // regex), `{…}` attrs, and `n..n` range constraints. Each form is
657
+ // self-identifying by its head token shape. Raw token slices are
658
+ // captured here and semantic-parsed at compile time.
659
+ let rest = line.slice(pos);
660
+
661
+ // Comma-required rule: if a type was consumed and the next token is
662
+ // `->` (no comma separator), reject with a clear diagnostic. The
663
+ // comma is a structural boundary between the field declaration and
664
+ // the transform; skipping it makes `email!# email -> fn` read as
665
+ // if 'email' were an argument to the arrow, which it isn't.
666
+ let typeConsumed = typeFirst?.[0] === 'IDENTIFIER' || typeFirst?.[0] === 'STRING';
667
+ if (typeConsumed && rest[0]?.[0] === '->') {
668
+ throw schemaError(rest[0],
669
+ `Field '${name}' has a transform after the type; a comma is required before '->'. Write '${name} ${typeName}, -> …'.`);
670
+ }
671
+ let constraintTokens = null;
672
+ let attrsTokens = null;
673
+ let rangeTokens = null;
674
+ let regexToken = null;
675
+ let transformTokens = null;
676
+
677
+ if (rest.length > 0) {
678
+ // The leading comma is only required when a type was consumed. If
679
+ // the type slot was empty, constraints may follow the modifiers
680
+ // directly (`name? [1, 20]`). Both shapes produce the same parts.
681
+ if (rest[0]?.[0] === ',') {
682
+ rest = rest.slice(1);
683
+ }
684
+ // Split top-level by commas. Multi-line trailers (`name! type,\n
685
+ // [8, 100]`) introduce surrounding INDENT/OUTDENT tokens that
686
+ // don't affect semantics — strip them from each part so the head
687
+ // is the literal `[` or `{`.
688
+ let parts = splitTopLevelByComma(rest);
689
+ for (let i = 0; i < parts.length; i++) {
690
+ let part = parts[i];
691
+ // Strip leading INDENT/TERMINATOR so we can inspect the head token.
692
+ while (part.length && (part[0][0] === 'INDENT' || part[0][0] === 'TERMINATOR')) {
693
+ part = part.slice(1);
694
+ }
695
+ if (!part.length) continue;
696
+
697
+ // A `->` at the head of a part is the transform arrow — the
698
+ // preceding comma separated it out. `->` elsewhere in the part
699
+ // (after content) means the user wrote something like
700
+ // `email -> fn` without the separator; the comma is required
701
+ // as a structural boundary between the field declaration and
702
+ // the transform.
703
+ if (part[0][0] !== '->') {
704
+ let innerArrow = findTopLevelArrowIdx(part);
705
+ if (innerArrow > 0) {
706
+ throw schemaError(part[innerArrow],
707
+ `Field '${name}' has a transform after other content; a comma is required before '->'. Write 'name! <constraints>, -> <body>'.`);
708
+ }
709
+ }
710
+ let head = part[0];
711
+ // For non-transform parts, also strip trailing OUTDENT/TERMINATOR.
712
+ // Transform parts own their INDENT/OUTDENT wrapping — parseBodyTokens
713
+ // handles it.
714
+ if (head[0] !== '->') {
715
+ while (part.length && (part[part.length - 1][0] === 'OUTDENT' || part[part.length - 1][0] === 'TERMINATOR')) {
716
+ part = part.slice(0, -1);
717
+ }
718
+ if (!part.length) continue;
719
+ head = part[0];
720
+ }
721
+ if (head[0] === '[' || head[0] === 'INDEX_START') {
722
+ if (constraintTokens) {
723
+ throw schemaError(head,
724
+ `Field '${name}' has more than one '[…]' constraint. At most one default / regex bracket per field.`);
725
+ }
726
+ constraintTokens = part;
727
+ } else if (head[0] === '{') {
728
+ if (attrsTokens) {
729
+ throw schemaError(head,
730
+ `Field '${name}' has more than one '{…}' attrs bracket.`);
731
+ }
732
+ attrsTokens = part;
733
+ } else if (isRangeConstraintTokens(part)) {
734
+ if (rangeTokens) {
735
+ throw schemaError(head,
736
+ `Field '${name}' has more than one range constraint. Only one 'min..max' per field.`);
737
+ }
738
+ rangeTokens = part;
739
+ } else if (head[0] === 'REGEX' && part.length === 1) {
740
+ if (regexToken) {
741
+ throw schemaError(head,
742
+ `Field '${name}' has more than one regex constraint.`);
743
+ }
744
+ regexToken = head;
745
+ } else if (head[0] === '->') {
746
+ // Transform part. Must be the last comma-separated part on the
747
+ // line (transform is terminal).
748
+ if (i !== parts.length - 1) {
749
+ throw schemaError(head,
750
+ `Transform '-> …' must be the last element on the field line for '${name}'.`);
751
+ }
752
+ transformTokens = part.slice(1);
753
+ } else {
754
+ throw schemaError(head,
755
+ `Unexpected trailer for field '${name}'. Expected '[…]' default, '{…}' attrs, '/regex/', 'min..max' range, or '-> transform'.`);
756
+ }
757
+ }
758
+ }
759
+
760
+ // Array suffix is incompatible with literal-union types in v2.
761
+ if (array && literals) {
762
+ throw schemaError(typeFirst,
763
+ `Array-of-literal-union is not supported. Use 'string[]' if you need an array of strings.`);
764
+ }
765
+
766
+ // The `schema.defaultMaxString` pragma baked into this schema's ctx
767
+ // is a candidate for any VARCHAR-like primitive that isn't already
768
+ // narrowed by a regex or literal-union. The final "fill it in only
769
+ // if max is still absent" decision happens in mergeFieldConstraints
770
+ // so open-ended ranges (`5..` → only min) still get the pragma's max.
771
+ // Using `!= null` (not truthy) keeps future non-positive pragma
772
+ // values valid if more keys land here.
773
+ let defaultMax = null;
774
+ if (ctx?.defaultMaxString != null && !regexToken && !literals &&
775
+ VARCHAR_TYPES.has(typeName)) {
776
+ defaultMax = ctx.defaultMaxString;
777
+ }
778
+
779
+ entries.push({
780
+ tag: 'field',
781
+ name,
782
+ modifiers,
783
+ typeName,
784
+ array,
785
+ literals,
786
+ constraintTokens,
787
+ attrsTokens,
788
+ rangeTokens,
789
+ regexToken,
790
+ transformTokens,
791
+ defaultMax,
792
+ loc: first.loc,
793
+ });
794
+ }
795
+
796
+ // Scan a constraint part for a top-level `->` (depth-zero arrow). Returns
797
+ // the index of the arrow or -1 if absent. Used to split parts like
798
+ // `8..100 -> transform` without requiring a comma between them.
799
+ function findTopLevelArrowIdx(tokens) {
800
+ let depth = 0;
801
+ for (let i = 0; i < tokens.length; i++) {
802
+ let tag = tokens[i][0];
803
+ if (tag === '(' || tag === '[' || tag === '{' ||
804
+ tag === 'CALL_START' || tag === 'INDEX_START' ||
805
+ tag === 'PARAM_START') depth++;
806
+ else if (tag === ')' || tag === ']' || tag === '}' ||
807
+ tag === 'CALL_END' || tag === 'INDEX_END' ||
808
+ tag === 'PARAM_END') depth--;
809
+ else if (depth === 0 && tag === '->') return i;
810
+ }
811
+ return -1;
812
+ }
813
+
814
+ // Range constraint: `min..max` with optional leading `-` on either
815
+ // endpoint. Either endpoint may be omitted for open-ended ranges —
816
+ // `..N` is "at most N" (no min), `N..` is "at least N" (no max). At
817
+ // least one endpoint must be present; a bare `..` is rejected.
818
+ // Operates on a top-level comma-split part; stripping any surrounding
819
+ // INDENT/OUTDENT is handled by the caller.
820
+ function isRangeConstraintTokens(tokens) {
821
+ let i = 0;
822
+ // Left endpoint (optional).
823
+ let hasLeft = false;
824
+ if (tokens[i]?.[0] === '-' && tokens[i + 1]?.[0] === 'NUMBER') { i += 2; hasLeft = true; }
825
+ else if (tokens[i]?.[0] === 'NUMBER') { i++; hasLeft = true; }
826
+ // Dots.
827
+ if (tokens[i]?.[0] !== '..') return false;
828
+ i++;
829
+ // Right endpoint (optional).
830
+ let hasRight = false;
831
+ if (tokens[i]?.[0] === '-' && tokens[i + 1]?.[0] === 'NUMBER') { i += 2; hasRight = true; }
832
+ else if (tokens[i]?.[0] === 'NUMBER') { i++; hasRight = true; }
833
+ // Need at least one endpoint, and nothing trailing.
834
+ return (hasLeft || hasRight) && i === tokens.length;
835
+ }
836
+
837
+ function parseCallableLine(kind, headerTok, line, entries) {
838
+ let name = headerTok[1];
839
+ let colonTok = line[1];
840
+ if (!colonTok || colonTok[0] !== ':') {
841
+ throw schemaError(headerTok,
842
+ `Expected ':' after '${name}' before arrow.`);
843
+ }
844
+ // Three arrow forms:
845
+ // name: -> body — method / hook
846
+ // name: ~> body — lazy computed getter (EFFECT token)
847
+ // name: !> body — eager derived field (UNARY_MATH '!' + COMPARE '>')
848
+ let arrowTok = line[2];
849
+ let nextTok = line[3];
850
+ let arrow, arrowLoc, bodyStart;
851
+ if (arrowTok && arrowTok[0] === '->') {
852
+ arrow = '->';
853
+ arrowLoc = arrowTok.loc;
854
+ bodyStart = 3;
855
+ } else if (arrowTok && arrowTok[0] === 'EFFECT') {
856
+ arrow = '~>';
857
+ arrowLoc = arrowTok.loc;
858
+ bodyStart = 3;
859
+ } else if (arrowTok && arrowTok[0] === 'UNARY_MATH' && arrowTok[1] === '!' &&
860
+ nextTok && nextTok[0] === 'COMPARE' && nextTok[1] === '>' &&
861
+ !arrowTok.spaced) {
862
+ arrow = '!>';
863
+ arrowLoc = arrowTok.loc;
864
+ bodyStart = 4;
865
+ } else {
866
+ throw schemaError(colonTok,
867
+ `Schema top-level '${name}:' must be followed by '->' (method/hook), '~>' (computed getter), or '!>' (eager derived).`);
868
+ }
869
+ let bodyTokens = line.slice(bodyStart);
870
+ let isHook = HOOK_NAMES.has(name);
871
+ let entryTag;
872
+ if (arrow === '~>') {
873
+ entryTag = 'computed';
874
+ } else if (arrow === '!>') {
875
+ entryTag = 'derived';
876
+ } else if (kind === 'model' && isHook) {
877
+ entryTag = 'hook';
878
+ } else {
879
+ entryTag = 'method';
880
+ }
881
+ entries.push({
882
+ tag: entryTag,
883
+ name,
884
+ arrow,
885
+ paramTokens: [],
886
+ bodyTokens,
887
+ headerLoc: headerTok.loc,
888
+ arrowLoc,
889
+ });
890
+ }
891
+
892
+ // Parse `@ensure` arguments into one or more refinement pairs. Accepts two
893
+ // forms:
894
+ //
895
+ // inline: `@ensure "msg", (args) -> body`
896
+ // array: `@ensure [ "msg", (args) -> body
897
+ // , "msg", (args) -> body
898
+ // , ... ]`
899
+ //
900
+ // Both forms compile to the SAME entry shape — each pair becomes one
901
+ // `{tag: "ensure", message, paramTokens, bodyTokens}` entry. Downstream
902
+ // runtime code can't tell the two source forms apart.
903
+ //
904
+ // The directive arrives wrapped in the implicit CALL_START/CALL_END pair
905
+ // because Rip sees `@ensure args...` as a call; we strip that wrapper
906
+ // before looking for the array bracket or the inline string.
907
+ function parseEnsurePairs(argTokens, directiveTok) {
908
+ let tokens = argTokens;
909
+ if (!tokens.length) {
910
+ throw schemaError(directiveTok,
911
+ "@ensure requires 'message, (x) -> body' or '[...]' array of pairs.");
912
+ }
913
+ // Strip implicit call wrapper if present.
914
+ if (tokens[0]?.[0] === 'CALL_START' &&
915
+ tokens[tokens.length - 1]?.[0] === 'CALL_END') {
916
+ tokens = tokens.slice(1, -1);
917
+ }
918
+ if (!tokens.length) {
919
+ throw schemaError(directiveTok,
920
+ "@ensure requires 'message, (x) -> body' or '[...]' array of pairs.");
921
+ }
922
+
923
+ let first = tokens[0];
924
+ // Array form: tokens start with `[` (or INDEX_START).
925
+ if (first[0] === '[' || first[0] === 'INDEX_START') {
926
+ let inner = extractEnsureBracketInner(tokens, first);
927
+ let parts = splitEnsureElements(inner);
928
+ if (parts.length === 0) {
929
+ throw schemaError(first, "@ensure [...] must contain at least one 'message, fn' pair.");
930
+ }
931
+ if (parts.length % 2 !== 0) {
932
+ throw schemaError(first,
933
+ `@ensure [...] must have pairs of 'message, fn' (got ${parts.length} elements; odd count).`);
934
+ }
935
+ let pairs = [];
936
+ for (let i = 0; i < parts.length; i += 2) {
937
+ pairs.push(extractEnsurePair(parts[i], parts[i + 1], first));
938
+ }
939
+ return pairs;
940
+ }
941
+
942
+ // Inline form: STRING, (args) -> body
943
+ let parts = splitTopLevelByComma(tokens);
944
+ if (parts.length < 2) {
945
+ throw schemaError(first,
946
+ "@ensure inline form must be 'message, (x) -> body'. Did you forget the comma?");
947
+ }
948
+ if (parts.length > 2) {
949
+ throw schemaError(first,
950
+ `@ensure inline form takes exactly 'message, fn' (got ${parts.length} comma-separated parts). Use '@ensure [...]' for multiple refinements.`);
951
+ }
952
+ return [extractEnsurePair(parts[0], parts[1], first)];
953
+ }
954
+
955
+ // Walk `[ ... ]` tokens and return the inner slice. Rejects trailing
956
+ // tokens after the close bracket. Strips an outermost INDENT/OUTDENT
957
+ // pair if the bracket body is multi-line (Rip wraps multi-line array
958
+ // contents in one), since @ensure splits pairs at depth 0 and that
959
+ // outer wrap would hide every internal comma/newline.
960
+ function extractEnsureBracketInner(tokens, openTok) {
961
+ let depth = 0;
962
+ let inner = [];
963
+ for (let i = 0; i < tokens.length; i++) {
964
+ let t = tokens[i];
965
+ let tag = t[0];
966
+ if (tag === '[' || tag === 'INDEX_START') {
967
+ depth++;
968
+ if (depth === 1) continue;
969
+ }
970
+ if (tag === ']' || tag === 'INDEX_END') {
971
+ depth--;
972
+ if (depth === 0) {
973
+ if (i < tokens.length - 1) {
974
+ throw schemaError(tokens[i + 1],
975
+ "@ensure [...] must be the only argument — extra tokens after ']'.");
976
+ }
977
+ // Strip outer INDENT/OUTDENT pair if it wraps the whole inner.
978
+ if (inner.length >= 2 &&
979
+ inner[0][0] === 'INDENT' &&
980
+ inner[inner.length - 1][0] === 'OUTDENT') {
981
+ let wd = 0, matched = false;
982
+ for (let k = 0; k < inner.length; k++) {
983
+ if (inner[k][0] === 'INDENT') wd++;
984
+ else if (inner[k][0] === 'OUTDENT') {
985
+ wd--;
986
+ if (wd === 0 && k === inner.length - 1) { matched = true; break; }
987
+ if (wd === 0) break;
988
+ }
989
+ }
990
+ if (matched) inner = inner.slice(1, -1);
991
+ }
992
+ return inner;
993
+ }
994
+ }
995
+ if (depth >= 1) inner.push(t);
996
+ }
997
+ throw schemaError(openTok, "@ensure: unclosed '['.");
998
+ }
999
+
1000
+ // Split an @ensure array body into elements. Mirrors Rip's array-literal
1001
+ // rule: both `,` and newlines (TERMINATOR) are element separators at
1002
+ // depth 0. This lets users write rows without trailing commas:
1003
+ //
1004
+ // @ensure [
1005
+ // "msg1", (u) -> body
1006
+ // "msg2", (u) -> body <-- no comma needed between pairs
1007
+ // ]
1008
+ function splitEnsureElements(tokens) {
1009
+ let parts = [];
1010
+ let cur = [];
1011
+ let depth = 0;
1012
+ for (let t of tokens) {
1013
+ let tag = t[0];
1014
+ if (tag === '(' || tag === '[' || tag === '{' ||
1015
+ tag === 'CALL_START' || tag === 'INDEX_START' ||
1016
+ tag === 'PARAM_START' || tag === 'INDENT') depth++;
1017
+ if (tag === ')' || tag === ']' || tag === '}' ||
1018
+ tag === 'CALL_END' || tag === 'INDEX_END' ||
1019
+ tag === 'PARAM_END' || tag === 'OUTDENT') depth--;
1020
+ if (depth === 0 && (tag === ',' || tag === 'TERMINATOR')) {
1021
+ if (cur.length) { parts.push(cur); cur = []; }
1022
+ continue;
1023
+ }
1024
+ cur.push(t);
1025
+ }
1026
+ if (cur.length) parts.push(cur);
1027
+ return parts;
1028
+ }
1029
+
1030
+ // Extract one refinement pair from `messagePart` and `fnPart` (two token
1031
+ // slices already split by splitTopLevelByComma). Validates shape at parse
1032
+ // time so typos surface with targeted diagnostics instead of runtime
1033
+ // "expected function" noise.
1034
+ function extractEnsurePair(messagePart, fnPart, refTok) {
1035
+ if (!messagePart || !messagePart.length) {
1036
+ throw schemaError(refTok, "@ensure: missing message (expected a string literal).");
1037
+ }
1038
+ if (messagePart.length !== 1 || messagePart[0][0] !== 'STRING') {
1039
+ throw schemaError(messagePart[0] || refTok,
1040
+ "@ensure: each refinement's first element must be a string literal message.");
1041
+ }
1042
+ let msgTok = messagePart[0];
1043
+ let message = JSON.parse(msgTok[1]);
1044
+
1045
+ if (!fnPart || !fnPart.length) {
1046
+ throw schemaError(msgTok, "@ensure: missing function after message.");
1047
+ }
1048
+ // The fn part should open with `(` / PARAM_START and contain `->`. An
1049
+ // `->` with no params (e.g. `-> true`) is rejected — refinements must
1050
+ // declare the object parameter explicitly.
1051
+ let t0 = fnPart[0];
1052
+ if (t0[0] !== '(' && t0[0] !== 'PARAM_START') {
1053
+ throw schemaError(t0,
1054
+ "@ensure: expected '(args) -> body' after the message. Predicates must declare their parameter explicitly — '(u) -> ...'.");
1055
+ }
1056
+ // Walk matching paren to find PARAM_END.
1057
+ let depth = 1;
1058
+ let pos = 1;
1059
+ let paramTokens = [];
1060
+ while (pos < fnPart.length && depth > 0) {
1061
+ let t = fnPart[pos];
1062
+ let tag = t[0];
1063
+ if (tag === '(' || tag === 'PARAM_START') depth++;
1064
+ if (tag === ')' || tag === 'PARAM_END') {
1065
+ depth--;
1066
+ if (depth === 0) { pos++; break; }
1067
+ }
1068
+ paramTokens.push(t);
1069
+ pos++;
1070
+ }
1071
+ if (depth !== 0) {
1072
+ throw schemaError(t0, "@ensure: unclosed '(' in predicate parameters.");
1073
+ }
1074
+ let arrowTok = fnPart[pos];
1075
+ if (!arrowTok || arrowTok[0] !== '->') {
1076
+ throw schemaError(arrowTok || fnPart[pos - 1] || msgTok,
1077
+ "@ensure: expected '->' after predicate parameters.");
1078
+ }
1079
+ let bodyTokens = fnPart.slice(pos + 1);
1080
+ if (!bodyTokens.length) {
1081
+ throw schemaError(arrowTok, "@ensure: predicate function body is empty.");
1082
+ }
1083
+ return { message, paramTokens, bodyTokens, loc: msgTok.loc };
1084
+ }
1085
+
1086
+ // Extract param names from `(u)` or `(u, opts)` token slice. Accepts
1087
+ // plain identifiers only (no destructuring, defaults, or rest args —
1088
+ // refinements don't need that complexity yet).
1089
+ function ensureParamNames(paramTokens, refTok) {
1090
+ if (!paramTokens.length) return [];
1091
+ let parts = splitTopLevelByComma(paramTokens);
1092
+ return parts.map(part => {
1093
+ let pTokens = part.filter(t => t[0] !== 'TERMINATOR');
1094
+ if (pTokens.length !== 1 || pTokens[0][0] !== 'IDENTIFIER') {
1095
+ throw schemaError(pTokens[0] || refTok,
1096
+ "@ensure: predicate parameters must be plain identifiers.");
1097
+ }
1098
+ return pTokens[0][1];
1099
+ });
1100
+ }
1101
+
1102
+ function parseEnumLine(line, entries) {
1103
+ let first = line[0];
1104
+ if (!first) return;
1105
+ // Enum member forms:
1106
+ // :admin bare symbol → maps to name string "admin"
1107
+ // :pending 0 valued symbol → maps "pending" (and 0) to 0
1108
+ //
1109
+ // Values are any literal (number, string, boolean, null, regex).
1110
+ // Mixing bare and valued members in one enum is permitted but
1111
+ // unusual: the Map is heterogeneous when you do it — bare entries
1112
+ // hold name strings, valued entries hold their literal. Keep the
1113
+ // members uniform if that matters for downstream consumers.
1114
+ if (first[0] === '@') {
1115
+ let nameTok = line[1];
1116
+ let dname = nameTok && (nameTok[0] === 'IDENTIFIER' || nameTok[0] === 'PROPERTY')
1117
+ ? nameTok[1] : 'directive';
1118
+ throw schemaError(first,
1119
+ `:enum schemas don't accept '@${dname}'. Enums hold only :symbol members. Move the invariant to a :shape or :model that uses this enum as a field type.`);
1120
+ }
1121
+ if (first[0] !== 'SYMBOL') {
1122
+ throw schemaError(first,
1123
+ `Enum member must be a :symbol. Use ':${first[1] ?? 'name'}' for a bare member or ':${first[1] ?? 'name'} value' for a valued one.`);
1124
+ }
1125
+ let name = first[1];
1126
+ let second = line[1];
1127
+ if (!second) {
1128
+ entries.push({ tag: 'enum-member', name, value: undefined, loc: first.loc });
1129
+ return;
1130
+ }
1131
+ if (second[0] === ':') {
1132
+ throw schemaError(second,
1133
+ `Enum member ':${name}' — drop the ':' before the value. Use ':${name} value'.`);
1134
+ }
1135
+ if (line.length > 2) {
1136
+ throw schemaError(line[2],
1137
+ `Extra tokens after enum member ':${name}' value.`);
1138
+ }
1139
+ entries.push({
1140
+ tag: 'enum-member',
1141
+ name,
1142
+ value: literalOf(second),
1143
+ loc: first.loc,
1144
+ });
1145
+ }
1146
+
1147
+ // ============================================================================
1148
+ // Codegen — emitSchema
1149
+ // ============================================================================
1150
+
1151
+ function emitSchemaNode(emitter, head, rest, context) {
1152
+ // rest[0] is the SCHEMA_BODY node. The parser metadata bridge wraps the
1153
+ // token value in `new String()` and copies token.data fields onto it, so
1154
+ // the descriptor surfaces as `node.descriptor` here.
1155
+ let node = rest[0];
1156
+ let descriptor = readDescriptor(node);
1157
+ if (!descriptor) {
1158
+ throw new Error('schema: missing descriptor on SCHEMA_BODY token');
1159
+ }
1160
+ emitter.usesSchemas = true;
1161
+
1162
+ // The binding name is threaded through `_schemaName` by emitAssignment
1163
+ // (parallels `_componentName`). When present, we embed it so SchemaError,
1164
+ // generated class name, and debug output all have a stable identity.
1165
+ let schemaName = emitter._schemaName || null;
1166
+
1167
+ let parts = [`kind: ${JSON.stringify(descriptor.kind)}`];
1168
+ if (schemaName) parts.push(`name: ${JSON.stringify(schemaName)}`);
1169
+ parts.push(`entries: [${descriptor.entries.map(e => entryLiteral(emitter, e)).join(', ')}]`);
1170
+ return `__schema({${parts.join(', ')}})`;
1171
+ }
1172
+
1173
+ function readDescriptor(node) {
1174
+ if (node && typeof node === 'object') {
1175
+ if (node.descriptor) return node.descriptor;
1176
+ if (node.data?.descriptor) return node.data.descriptor;
1177
+ }
1178
+ return null;
1179
+ }
1180
+
1181
+ function entryLiteral(emitter, e) {
1182
+ switch (e.tag) {
1183
+ case 'field': {
1184
+ let obj = [
1185
+ `tag: "field"`,
1186
+ `name: ${JSON.stringify(e.name)}`,
1187
+ `modifiers: ${JSON.stringify(e.modifiers)}`,
1188
+ `typeName: ${JSON.stringify(e.typeName)}`,
1189
+ `array: ${e.array ? 'true' : 'false'}`,
1190
+ ];
1191
+ if (e.literals) {
1192
+ obj.push(`literals: ${JSON.stringify(e.literals)}`);
1193
+ }
1194
+ let range = e.rangeTokens ? compileRangeTokens(e.rangeTokens, e) : null;
1195
+ let bracket = e.constraintTokens ? compileConstraintsLiteral(e.constraintTokens, e) : null;
1196
+ let regex = e.regexToken ? regexLiteralOf(e.regexToken) : null;
1197
+ let merged = mergeFieldConstraints(range, bracket, regex, e);
1198
+ if (merged) obj.push(`constraints: ${merged}`);
1199
+ if (e.transformTokens) {
1200
+ obj.push(`transform: ${compileTransformFn(emitter, e.transformTokens)}`);
1201
+ }
1202
+ return `{${obj.join(', ')}}`;
1203
+ }
1204
+ case 'directive': {
1205
+ let obj = [`tag: "directive"`, `name: ${JSON.stringify(e.name)}`];
1206
+ let args = compileDirectiveArgsLiteral(e.name, e.argTokens || []);
1207
+ if (args) obj.push(`args: ${args}`);
1208
+ return `{${obj.join(', ')}}`;
1209
+ }
1210
+ case 'ensure': {
1211
+ let fnCode = compileEnsureFn(emitter, e);
1212
+ let obj = [
1213
+ `tag: "ensure"`,
1214
+ `message: ${JSON.stringify(e.message)}`,
1215
+ `fn: ${fnCode}`,
1216
+ ];
1217
+ return `{${obj.join(', ')}}`;
1218
+ }
1219
+ case 'computed':
1220
+ case 'method':
1221
+ case 'hook':
1222
+ case 'derived': {
1223
+ let fnCode = compileCallableFn(emitter, e);
1224
+ let obj = [
1225
+ `tag: ${JSON.stringify(e.tag)}`,
1226
+ `name: ${JSON.stringify(e.name)}`,
1227
+ `fn: ${fnCode}`,
1228
+ ];
1229
+ return `{${obj.join(', ')}}`;
1230
+ }
1231
+ case 'enum-member': {
1232
+ let obj = [`tag: "enum-member"`, `name: ${JSON.stringify(e.name)}`];
1233
+ if (e.value !== undefined) obj.push(`value: ${JSON.stringify(e.value)}`);
1234
+ return `{${obj.join(', ')}}`;
1235
+ }
1236
+ default:
1237
+ return `{tag: "unknown"}`;
1238
+ }
1239
+ }
1240
+
1241
+ // Compile a callable body (`-> body` or `~> body`) to a JS `function(...)`
1242
+ // expression with dynamic `this`. Both computed getters and methods are
1243
+ // emitted using the Rip thin-arrow codegen, which naturally produces a
1244
+ // `function() { ... }` (Rip `->` is NOT a JS arrow). This gives us the
1245
+ // right `this` semantics for instance-attached methods and proto getters.
1246
+ function compileCallableFn(emitter, entry) {
1247
+ let bodySexpr = parseBodyTokens(entry.bodyTokens);
1248
+ if (!bodySexpr) {
1249
+ // Empty body — emit a no-op.
1250
+ return `(function() {})`;
1251
+ }
1252
+ // Wrap as a thin-arrow with no params. `emit` in value context produces
1253
+ // a parenthesized function expression.
1254
+ let arrowSexpr = ['->', [], bodySexpr];
1255
+ return emitter.emit(arrowSexpr, 'value');
1256
+ }
1257
+
1258
+ // Compile an inline field transform body (`-> body`). The body receives
1259
+ // the raw input object via Rip's implicit `it` parameter; no explicit
1260
+ // params are emitted. Transform runs on .parse() only, not on hydrate.
1261
+ function compileTransformFn(emitter, bodyTokens) {
1262
+ let bodySexpr = parseBodyTokens(bodyTokens);
1263
+ if (!bodySexpr) {
1264
+ return `(function() { return undefined; })`;
1265
+ }
1266
+ let arrowSexpr = ['->', [], bodySexpr];
1267
+ return emitter.emit(arrowSexpr, 'value');
1268
+ }
1269
+
1270
+ // Compile an `@ensure` predicate — `(args) -> body` — into a thin-arrow
1271
+ // function expression with explicit params. Unlike transforms (which use
1272
+ // implicit `it`), refinements require the parameter to be named so the
1273
+ // contract of "what the predicate sees" is visible at the call site.
1274
+ function compileEnsureFn(emitter, entry) {
1275
+ let bodySexpr = parseBodyTokens(entry.bodyTokens);
1276
+ if (!bodySexpr) {
1277
+ return `(function() { return undefined; })`;
1278
+ }
1279
+ let params = ensureParamNames(entry.paramTokens, entry);
1280
+ let arrowSexpr = ['->', params, bodySexpr];
1281
+ return emitter.emit(arrowSexpr, 'value');
1282
+ }
1283
+
1284
+ // ----------------------------------------------------------------------------
1285
+ // Compile-time constraint + directive argument evaluation
1286
+ // ----------------------------------------------------------------------------
1287
+ //
1288
+ // Constraints are captured as raw token slices during the lexer pass; this
1289
+ // layer evaluates them into a normalized {min?, max?, default?, regex?}
1290
+ // shape shared by runtime validation and DDL emission. Only literal-
1291
+ // deterministic values are accepted — identifiers, calls, and arbitrary
1292
+ // expressions are rejected.
1293
+ //
1294
+ // v2 constraint grammar (each form is self-identifying by token shape):
1295
+ // `min..max` — range: string length / array length / numeric value
1296
+ // `[value]` — default: a single literal payload in brackets
1297
+ // `/regex/` — pattern: bare regex literal, no wrapping brackets
1298
+ // `{key: val}` — attrs: object literal for `unique`, `index`, etc.
1299
+ // `-> body` — transform: terminal, comma-required before arrow
1300
+ // when anything precedes (see parseFieldedLine)
1301
+ //
1302
+ // Pre-v2 multi-element bracket forms (`[n, n]`, `[n, n, n]`, `[/re/]`) are
1303
+ // explicitly rejected with migration diagnostics pointing at the new form.
1304
+ function compileConstraintsLiteral(tokens, fieldEntry) {
1305
+ let inner = tokens.slice(1, -1);
1306
+ let items = splitTopLevelByComma(inner);
1307
+ if (!items.length) return { c: null };
1308
+
1309
+ let values = items.map(part => evalLiteralTokens(part, fieldEntry));
1310
+
1311
+ if (values.length === 1) {
1312
+ let v = values[0];
1313
+ if (v instanceof RegExp) {
1314
+ throw schemaError(tokens[0],
1315
+ `Regex constraints are written bare, not in brackets. Replace '[${v}]' with '${v}'.`);
1316
+ }
1317
+ return { c: { default: v } };
1318
+ }
1319
+
1320
+ if (values.length === 2 && typeof values[0] === 'number' && typeof values[1] === 'number') {
1321
+ throw schemaError(tokens[0],
1322
+ `Size/value ranges use 'min..max' syntax, not brackets. Replace '[${values[0]}, ${values[1]}]' with '${values[0]}..${values[1]}'.`);
1323
+ }
1324
+ if (values.length === 3 && values.every(v => typeof v === 'number')) {
1325
+ throw schemaError(tokens[0],
1326
+ `Range + default is two separate constraints in v2. Replace '[${values[0]}, ${values[1]}, ${values[2]}]' with '${values[0]}..${values[1]}, [${values[2]}]'.`);
1327
+ }
1328
+ throw schemaError(tokens[0],
1329
+ `Constraint bracket takes a single default value in v2. Got ${values.length} elements.`);
1330
+ }
1331
+
1332
+ // Extract a regex literal from a bare REGEX token. The lexer's raw text
1333
+ // includes the surrounding `/.../` plus any flags.
1334
+ function regexLiteralOf(tok) {
1335
+ let raw = tok[1];
1336
+ let m = /^\/((?:\\.|[^\\/])+)\/([a-z]*)$/.exec(raw);
1337
+ if (!m) throw schemaError(tok, `Invalid regex literal ${JSON.stringify(raw)}.`);
1338
+ try {
1339
+ return new RegExp(m[1], m[2]);
1340
+ } catch (e) {
1341
+ throw schemaError(tok, `Invalid regex '${raw}': ${e.message}`);
1342
+ }
1343
+ }
1344
+
1345
+ // Evaluate a range token slice into {min?, max?}. Caller has already
1346
+ // verified shape via isRangeConstraintTokens. Open-ended forms omit
1347
+ // the corresponding key rather than emitting undefined, so downstream
1348
+ // constraint serialization stays clean.
1349
+ function compileRangeTokens(tokens, fieldEntry) {
1350
+ let i = 0;
1351
+ let readOneAt = () => {
1352
+ let sign = 1;
1353
+ if (tokens[i]?.[0] === '-') { sign = -1; i++; }
1354
+ let numTok = tokens[i++];
1355
+ let v = evalLiteralTokens([numTok], fieldEntry);
1356
+ if (typeof v !== 'number') {
1357
+ throw schemaError(numTok, `Range endpoints must be numeric literals.`);
1358
+ }
1359
+ return sign * v;
1360
+ };
1361
+ let min;
1362
+ if (tokens[i]?.[0] !== '..') min = readOneAt();
1363
+ i++; // consume `..`
1364
+ let max;
1365
+ if (i < tokens.length) max = readOneAt();
1366
+ if (min !== undefined && max !== undefined && min > max) {
1367
+ throw schemaError(tokens[0],
1368
+ `Range '${min}..${max}' is reversed. Write the smaller endpoint first.`);
1369
+ }
1370
+ let out = {};
1371
+ if (min !== undefined) out.min = min;
1372
+ if (max !== undefined) out.max = max;
1373
+ return out;
1374
+ }
1375
+
1376
+ // Merge the optional range, bracket-default, and bare-regex constraints
1377
+ // into a single literal object. Each source contributes disjoint keys
1378
+ // by construction — range sets min/max, bracket sets default, regex
1379
+ // sets regex.
1380
+ function mergeFieldConstraints(range, bracketLiteral, regex, fieldEntry) {
1381
+ let c = (bracketLiteral && bracketLiteral.c) || {};
1382
+ // Track whether this field's range used open-left shorthand (`..N`).
1383
+ // The implicit-min sugar is gated on *syntax* (range omitted its
1384
+ // min) rather than on merged state, so a future sugar that also
1385
+ // writes to c.min can't accidentally trigger the implicit.
1386
+ let openLeftRange = range && range.min === undefined;
1387
+ if (range) {
1388
+ if (range.min !== undefined) c.min = range.min;
1389
+ if (range.max !== undefined) c.max = range.max;
1390
+ // Open-min shorthand (`..N`) with a `!` modifier implies min=1 —
1391
+ // "required and non-empty" is the default reading for required
1392
+ // varchar-like fields. Gated on openLeftRange syntactically so
1393
+ // adding more sugar layers later doesn't trigger this by accident.
1394
+ if (openLeftRange && c.min === undefined && fieldEntry?.modifiers?.includes('!')) {
1395
+ c.min = 1;
1396
+ }
1397
+ }
1398
+ if (regex) {
1399
+ c.regex = regex;
1400
+ }
1401
+ // File-level `schema.defaultMaxString` pragma fills in max only when
1402
+ // the field didn't narrow the max any other way — parseFieldedLine
1403
+ // suppresses defaultMax on regex / literal-union fields already, so
1404
+ // this last check covers the open-ended `N..` case (min set, max
1405
+ // still unbounded) where the pragma should fill the gap.
1406
+ if (fieldEntry?.defaultMax != null && c.max === undefined) {
1407
+ c.max = fieldEntry.defaultMax;
1408
+ }
1409
+ // Post-merge consistency check. Sugar (`!` implicit min=1) and the
1410
+ // pragma default max can compose with a user-written explicit max to
1411
+ // produce min > max — e.g. `name! ..0` would naively emit
1412
+ // `{min: 1, max: 0}`, a constraint no value can satisfy. The
1413
+ // parse-time reversed-range check only sees syntactically-present
1414
+ // endpoints, so we re-validate here after every sugar has been
1415
+ // applied. Error message names the actual sources so the user can
1416
+ // pinpoint which side to fix.
1417
+ if (c.min !== undefined && c.max !== undefined && c.min > c.max) {
1418
+ let minSrc = (range && range.min !== undefined) ? `range min ${range.min}` : 'implicit min=1 from `!`';
1419
+ let maxSrc = (range && range.max !== undefined)
1420
+ ? `range max ${range.max}`
1421
+ : `pragma defaultMaxString=${fieldEntry?.defaultMax}`;
1422
+ throw schemaError({ loc: fieldEntry?.loc },
1423
+ `Field '${fieldEntry?.name}' would have impossible constraints min=${c.min} > max=${c.max} after sugar is applied (${minSrc} vs ${maxSrc}). Write an explicit range or drop the conflicting pragma.`);
1424
+ }
1425
+ if (c.min === undefined && c.max === undefined && c.default === undefined && c.regex === undefined) {
1426
+ return null;
1427
+ }
1428
+ return constraintLiteral(c);
1429
+ }
1430
+
1431
+ function constraintLiteral(c) {
1432
+ let parts = [];
1433
+ if (c.min !== undefined) parts.push(`min: ${serializeLiteral(c.min)}`);
1434
+ if (c.max !== undefined) parts.push(`max: ${serializeLiteral(c.max)}`);
1435
+ if (c.default !== undefined) parts.push(`default: ${serializeLiteral(c.default)}`);
1436
+ if (c.regex !== undefined) parts.push(`regex: ${c.regex.toString()}`);
1437
+ return parts.length ? `{${parts.join(', ')}}` : null;
1438
+ }
1439
+
1440
+ function serializeLiteral(v) {
1441
+ if (v === null) return 'null';
1442
+ if (v === undefined) return 'undefined';
1443
+ if (typeof v === 'string') return JSON.stringify(v);
1444
+ if (typeof v === 'number' || typeof v === 'boolean') return String(v);
1445
+ if (v instanceof RegExp) return v.toString();
1446
+ return JSON.stringify(v);
1447
+ }
1448
+
1449
+ // Compile directive args to a JS literal list or null. Each directive has
1450
+ // its own arg shape — we centralize the parsing here so Layer 2 can rely
1451
+ // on normalized structures.
1452
+ function compileDirectiveArgsLiteral(name, tokens) {
1453
+ // @idStart requires its arg, so validate before the generic empty-bail.
1454
+ if (name === 'idStart' && !tokens.length) {
1455
+ throw schemaError(null,
1456
+ '@idStart requires an integer literal, e.g. @idStart 10001.');
1457
+ }
1458
+ if (!tokens.length) return null;
1459
+
1460
+ // Relation directives: `@belongs_to Org`, `@belongs_to Org?`,
1461
+ // `@has_many Order`, `@has_one Profile`, `@one X`, `@many X`.
1462
+ if (name === 'belongs_to' || name === 'has_many' || name === 'has_one' ||
1463
+ name === 'one' || name === 'many' || name === 'mixin') {
1464
+ let t0 = tokens[0];
1465
+ if (!t0 || (t0[0] !== 'IDENTIFIER' && t0[0] !== 'PROPERTY')) {
1466
+ throw schemaError(t0 || tokens[tokens.length - 1],
1467
+ `@${name} requires a target name.`);
1468
+ }
1469
+ let target = t0[1];
1470
+ // `@belongs_to User?` tokenizes as IDENTIFIER "User" with
1471
+ // data.predicate=true. A trailing `?` in a later token position is
1472
+ // also accepted for robustness.
1473
+ let optional = t0.data?.predicate === true;
1474
+ let pos = 1;
1475
+ if (!optional && tokens[pos]?.[0] === '?') { optional = true; pos++; }
1476
+ let parts = [`target: ${JSON.stringify(target)}`];
1477
+ if (optional) parts.push('optional: true');
1478
+ return `[{${parts.join(', ')}}]`;
1479
+ }
1480
+
1481
+ // `@index field` or `@index [a, b]` or `@index [a, b] #` for unique.
1482
+ if (name === 'index') {
1483
+ let fields = [];
1484
+ let unique = false;
1485
+ let pos = 0;
1486
+ if (tokens[pos]?.[0] === 'IDENTIFIER' || tokens[pos]?.[0] === 'PROPERTY') {
1487
+ fields.push(tokens[pos][1]);
1488
+ pos++;
1489
+ } else if (tokens[pos]?.[0] === '[' || tokens[pos]?.[0] === 'INDEX_START') {
1490
+ let inner = [];
1491
+ let depth = 1;
1492
+ pos++;
1493
+ while (pos < tokens.length && depth > 0) {
1494
+ let t = tokens[pos];
1495
+ if (t[0] === '[' || t[0] === 'INDEX_START') depth++;
1496
+ if (t[0] === ']' || t[0] === 'INDEX_END') {
1497
+ depth--;
1498
+ if (depth === 0) { pos++; break; }
1499
+ }
1500
+ inner.push(t);
1501
+ pos++;
1502
+ }
1503
+ for (let part of splitTopLevelByComma(inner)) {
1504
+ if (part[0] && (part[0][0] === 'IDENTIFIER' || part[0][0] === 'PROPERTY')) {
1505
+ fields.push(part[0][1]);
1506
+ }
1507
+ }
1508
+ }
1509
+ if (tokens[pos]?.[0] === '#') unique = true;
1510
+ let parts = [`fields: ${JSON.stringify(fields)}`];
1511
+ if (unique) parts.push('unique: true');
1512
+ return `[{${parts.join(', ')}}]`;
1513
+ }
1514
+
1515
+ // @idStart N sets the seed value for the table's auto-id sequence.
1516
+ // Accepts a single integer literal (optionally negative). Consumed by
1517
+ // .toSQL(); models that never call .toSQL() simply ignore it.
1518
+ if (name === 'idStart') {
1519
+ let tok = tokens[0];
1520
+ let sign = 1;
1521
+ let numTok = tok;
1522
+ if (tok && tok[0] === '-' && tokens[1] && tokens[1][0] === 'NUMBER') {
1523
+ sign = -1;
1524
+ numTok = tokens[1];
1525
+ }
1526
+ if (!numTok || numTok[0] !== 'NUMBER') {
1527
+ throw schemaError(tok || tokens[tokens.length - 1],
1528
+ '@idStart requires an integer literal, e.g. @idStart 10001.');
1529
+ }
1530
+ let n = sign * Number(numTok[1]);
1531
+ if (!Number.isInteger(n)) {
1532
+ throw schemaError(numTok,
1533
+ '@idStart requires an integer literal; got ' + numTok[1] + '.');
1534
+ }
1535
+ return '[{value: ' + n + '}]';
1536
+ }
1537
+
1538
+ // Bare flag-like directives (@timestamps, @softDelete) don't take args.
1539
+ // Anything else — capture as raw literal tokens conservatively.
1540
+ return null;
1541
+ }
1542
+
1543
+ // Evaluate a small expression as a literal. Accepts NUMBER, STRING, BOOL,
1544
+ // NULL, UNDEFINED, REGEX, SYMBOL (returns its name string — for enum-member
1545
+ // defaults like `[:draft]`), and unary minus on NUMBER. Anything else throws.
1546
+ function evalLiteralTokens(tokens, fieldEntry) {
1547
+ if (!tokens.length) {
1548
+ throw schemaError(null, 'Empty constraint value.');
1549
+ }
1550
+ let first = tokens[0];
1551
+ let tag = first[0];
1552
+ if (tokens.length === 1) {
1553
+ if (tag === 'NUMBER') return Number(first[1]);
1554
+ if (tag === 'STRING') return JSON.parse(first[1]);
1555
+ if (tag === 'BOOL') return first[1] === 'true';
1556
+ if (tag === 'NULL') return null;
1557
+ if (tag === 'UNDEFINED') return undefined;
1558
+ if (tag === 'REGEX') return parseRegexLiteral(first[1]);
1559
+ if (tag === 'SYMBOL') return first[1];
1560
+ }
1561
+ if (tokens.length === 2 && tag === '-' && tokens[1][0] === 'NUMBER') {
1562
+ return -Number(tokens[1][1]);
1563
+ }
1564
+ // Deterministic but not literal — IDENTIFIER references aren't supported.
1565
+ throw schemaError(first,
1566
+ `Constraint values must be literals (number, string, boolean, null, regex, :symbol). Got ${tag}.`);
1567
+ }
1568
+
1569
+ function parseRegexLiteral(val) {
1570
+ let s = typeof val === 'string' ? val : String(val);
1571
+ let m = s.match(/^\/(.*)\/([gimsuy]*)$/s);
1572
+ return m ? new RegExp(m[1], m[2]) : new RegExp(s);
1573
+ }
1574
+
1575
+ // Run the tail rewriter passes on a captured body token slice, then feed
1576
+ // the result through parser.parse() via a temporary lex adapter. The
1577
+ // returned s-expression is the parsed body — either a single statement or
1578
+ // a block of statements — ready to wrap in `['->', [], body]`.
1579
+ function parseBodyTokens(bodyTokens) {
1580
+ if (!bodyTokens || !bodyTokens.length) return null;
1581
+
1582
+ // The body tokens were captured by rewriteSchema BEFORE rewriteTypes,
1583
+ // tagPostfixConditionals, rewriteTaggedTemplates, addImplicitBracesAndParens,
1584
+ // and addImplicitCallCommas ran. Run those tail passes on a sub-lexer
1585
+ // whose `this.tokens` is the body slice.
1586
+ let LexerCtor = parseBodyTokens._LexerCtor;
1587
+ if (!LexerCtor) {
1588
+ throw new Error('schema: parseBodyTokens called before Lexer was wired');
1589
+ }
1590
+ let sub = Object.create(LexerCtor.prototype);
1591
+ let toks = bodyTokens.slice();
1592
+ // Multi-line callable bodies open with a matched INDENT ... OUTDENT pair
1593
+ // wrapping the statements. parser.parse() expects a Body (list of Lines),
1594
+ // not a leading INDENT, so strip the outer pair when the first INDENT's
1595
+ // matching OUTDENT is the last token.
1596
+ if (toks.length >= 2 && toks[0]?.[0] === 'INDENT') {
1597
+ let depth = 0;
1598
+ let lastOutdent = -1;
1599
+ for (let k = 0; k < toks.length; k++) {
1600
+ if (toks[k][0] === 'INDENT') depth++;
1601
+ else if (toks[k][0] === 'OUTDENT') {
1602
+ depth--;
1603
+ if (depth === 0) { lastOutdent = k; break; }
1604
+ }
1605
+ }
1606
+ if (lastOutdent === toks.length - 1) {
1607
+ toks = toks.slice(1, -1);
1608
+ }
1609
+ }
1610
+ sub.tokens = toks;
1611
+ sub.seenFor = sub.seenImport = sub.seenExport = false;
1612
+ sub.ends = [];
1613
+ sub.indent = 0;
1614
+ sub.outdebt = 0;
1615
+ sub.indents = [];
1616
+ // Ensure a terminating TERMINATOR so parser.parse() sees a clean EOF.
1617
+ let lastTag = sub.tokens[sub.tokens.length - 1]?.[0];
1618
+ if (lastTag !== 'TERMINATOR') {
1619
+ sub.tokens.push(mkToken('TERMINATOR', '\n', bodyTokens[bodyTokens.length - 1]));
1620
+ }
1621
+ try {
1622
+ sub.rewriteTypes?.();
1623
+ sub.tagPostfixConditionals?.();
1624
+ sub.rewriteTaggedTemplates?.();
1625
+ sub.addImplicitBracesAndParens?.();
1626
+ sub.addImplicitCallCommas?.();
1627
+ } catch (e) {
1628
+ // If a tail pass throws, surface a clean schema error.
1629
+ throw schemaError(bodyTokens[0], `schema: failed to compile body: ${e.message}`);
1630
+ }
1631
+ let tokens = sub.tokens.filter(t => t[0] !== 'TYPE_DECL');
1632
+
1633
+ // Swap parser.lexer, parse, restore.
1634
+ let savedLexer = parser.lexer;
1635
+ parser.lexer = {
1636
+ tokens, pos: 0,
1637
+ setInput() {},
1638
+ lex() {
1639
+ if (this.pos >= this.tokens.length) return 1;
1640
+ let token = this.tokens[this.pos++];
1641
+ let val = token[1];
1642
+ if (token.data) {
1643
+ val = new String(val);
1644
+ Object.assign(val, token.data);
1645
+ }
1646
+ this.text = val;
1647
+ this.loc = token.loc;
1648
+ this.line = token.loc?.r;
1649
+ return token[0];
1650
+ },
1651
+ };
1652
+ let sexpr;
1653
+ try {
1654
+ sexpr = parser.parse('');
1655
+ } finally {
1656
+ parser.lexer = savedLexer;
1657
+ }
1658
+
1659
+ // sexpr is `['program', ...statements]`. Unwrap to a body we can feed
1660
+ // a thin-arrow AST. One statement → the statement itself. Multiple →
1661
+ // ['block', ...].
1662
+ if (!Array.isArray(sexpr) || sexpr[0] !== 'program') return null;
1663
+ let stmts = sexpr.slice(1);
1664
+ if (stmts.length === 0) return null;
1665
+ if (stmts.length === 1) return stmts[0];
1666
+ return ['block', ...stmts];
1667
+ }
1668
+
1669
+ // ============================================================================
1670
+ // Helpers
1671
+ // ============================================================================
1672
+
1673
+ function collectModifiers(identToken) {
1674
+ let mods = [];
1675
+ let d = identToken.data;
1676
+ if (d?.await === true) mods.push('!');
1677
+ if (d?.predicate === true) mods.push('?');
1678
+ return mods;
1679
+ }
1680
+
1681
+ function findMatchingOutdent(tokens, indentIdx) {
1682
+ let depth = 0;
1683
+ for (let j = indentIdx; j < tokens.length; j++) {
1684
+ if (tokens[j][0] === 'INDENT') depth++;
1685
+ else if (tokens[j][0] === 'OUTDENT') {
1686
+ depth--;
1687
+ if (depth === 0) return j;
1688
+ }
1689
+ }
1690
+ return -1;
1691
+ }
1692
+
1693
+ function splitTopLevelByComma(tokens) {
1694
+ let parts = [];
1695
+ let cur = [];
1696
+ let depth = 0;
1697
+ for (let t of tokens) {
1698
+ let tag = t[0];
1699
+ if (tag === '(' || tag === '[' || tag === '{' ||
1700
+ tag === 'CALL_START' || tag === 'INDEX_START' ||
1701
+ tag === 'PARAM_START' || tag === 'INDENT') depth++;
1702
+ if (tag === ')' || tag === ']' || tag === '}' ||
1703
+ tag === 'CALL_END' || tag === 'INDEX_END' ||
1704
+ tag === 'PARAM_END' || tag === 'OUTDENT') depth--;
1705
+ if (tag === ',' && depth === 0) {
1706
+ if (cur.length) parts.push(cur);
1707
+ cur = [];
1708
+ continue;
1709
+ }
1710
+ cur.push(t);
1711
+ }
1712
+ if (cur.length) parts.push(cur);
1713
+ return parts;
1714
+ }
1715
+
1716
+ function literalOf(tok) {
1717
+ let tag = tok[0], val = tok[1];
1718
+ if (tag === 'NUMBER') return Number(val);
1719
+ if (tag === 'STRING') return JSON.parse(val);
1720
+ if (tag === 'BOOL') return val === 'true';
1721
+ if (tag === 'NULL') return null;
1722
+ if (tag === 'UNDEFINED') return undefined;
1723
+ return val;
1724
+ }
1725
+
1726
+ function mkToken(tag, value, origin) {
1727
+ let t = [tag, value];
1728
+ t.pre = 0;
1729
+ t.data = null;
1730
+ t.loc = origin?.loc ?? { r: 0, c: 0, n: 0 };
1731
+ t.spaced = false;
1732
+ t.newLine = false;
1733
+ t.generated = true;
1734
+ if (origin) t.origin = origin;
1735
+ return t;
1736
+ }
1737
+
1738
+ function schemaError(tok, message) {
1739
+ let loc = tok?.loc || { r: 0, c: 0 };
1740
+ let err = new Error(message);
1741
+ err.name = 'SchemaSyntaxError';
1742
+ err.loc = loc;
1743
+ err.line = loc.r;
1744
+ err.column = loc.c;
1745
+ err.phase = 'schema';
1746
+ err.code = 'E_SCHEMA';
1747
+ return err;
1748
+ }
1749
+
1750
+ // ============================================================================
1751
+ // Runtime — injected into compiled output when the source uses `schema`
1752
+ // ============================================================================
1753
+ //
1754
+ // Four-layer architecture (D22):
1755
+ // Layer 1 — Descriptor: the object passed to `__schema({...})`. Raw
1756
+ // metadata from compiler, plus real functions for callables.
1757
+ // Layer 2 — Normalized: fields map / methods map / computed map / hooks
1758
+ // map / directives / enum members. Built lazily on first
1759
+ // downstream need. Collision and kind-legality checks live
1760
+ // here (Phase 4 tightens them).
1761
+ // Layer 3 — Validator plan: compiled validator tree. Built on first
1762
+ // `.parse` / `.safe` / `.ok`.
1763
+ // Layer 4 — ORM plan (Phase 4) and DDL plan (Phase 4) — not in Phase 3.
1764
+ //
1765
+ // Public API per kind (v1):
1766
+ // .parse(data) throws SchemaError on failure, returns value
1767
+ // .safe(data) {ok: true, value, errors: null} | {ok: false, value: null, errors: [...]}
1768
+ // .ok(data) boolean, fast path (no allocation)
1769
+ //
1770
+ // Result `value` shape:
1771
+ // :shape — generated class instance (fields enumerable own props,
1772
+ // methods non-enumerable prototype fns, computed non-enumerable
1773
+ // prototype getters)
1774
+ // :input — plain object (same class-instance plumbing; Phase 3 treats
1775
+ // :input like :shape sans methods for consistency)
1776
+ // :enum — the member value (or name when the enum is bare)
1777
+ // :mixin — non-instantiable; raises `Cannot parse :mixin`
1778
+ // :model — Phase 4 (the class additionally wires ORM methods)
1779
+
1780
+ // =============================================================================
1781
+ // Runtime composition (delegated to registered provider)
1782
+ // =============================================================================
1783
+ // Mode matrix:
1784
+ //
1785
+ // validate = VALIDATE (pure)
1786
+ // browser = VALIDATE + BROWSER_STUBS (browser bundle)
1787
+ // server = VALIDATE + DB_NAMING + ORM (server runtime)
1788
+ // migration = VALIDATE + DB_NAMING + ORM + DDL (migration tool)
1789
+ //
1790
+ // The actual fragment imports + composition live in the loader files so
1791
+ // only the fragments needed by a given entry are bundled. Browser bundles
1792
+ // import loader-browser.js (validate + browser-stubs only); CLI / server
1793
+ // imports loader-server.js (all five fragments).
1794
+
1795
+ export function getSchemaRuntime(opts = {}) {
1796
+ if (!_schemaRuntimeProvider) {
1797
+ throw new Error(
1798
+ "schema runtime provider not registered. Side-effect-import either " +
1799
+ "'./schema/loader-server.js' (CLI / server / tests) or " +
1800
+ "'./schema/loader-browser.js' (browser bundle) before calling " +
1801
+ "any compileToJS that emits schemas."
1802
+ );
1803
+ }
1804
+ return _schemaRuntimeProvider(opts);
1805
+ }