rip-lang 3.15.0 → 3.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/bin/rip +1 -1
- package/docs/RIP-SCHEMA.md +4 -4
- package/docs/dist/rip.js +6 -6
- package/docs/dist/rip.min.js +83 -83
- package/docs/dist/rip.min.js.br +0 -0
- package/docs/extensions/duckdb/manifest.json +1 -1
- package/docs/extensions/duckdb/v1.5.2/linux_amd64/ripdb.duckdb_extension.gz +0 -0
- package/docs/extensions/duckdb/v1.5.2/osx_arm64/ripdb.duckdb_extension.gz +0 -0
- package/package.json +6 -8
- package/scripts/postinstall.js +27 -0
- package/src/AGENTS.md +27 -25
- package/src/browser.js +1 -1
- package/src/compiler.js +1 -1
- package/src/grammar/grammar.rip +1 -1
- package/src/lexer.js +1 -1
- package/src/schema/dts-emit.js +329 -0
- package/src/schema/loader-browser.js +55 -0
- package/src/schema/loader-server.js +65 -0
- package/src/schema/runtime-browser-stubs.js +51 -0
- package/src/schema/runtime-db-naming.js +34 -0
- package/src/schema/runtime-ddl.js +124 -0
- package/src/schema/runtime-orm.js +294 -0
- package/src/schema/runtime-validate.js +816 -0
- package/src/schema/runtime.generated.js +1315 -0
- package/src/schema/schema.js +1805 -0
- package/src/typecheck.js +2 -2
- package/src/types-emit.js +1 -1
|
@@ -0,0 +1,1805 @@
|
|
|
1
|
+
// Schema reaches sideways to the host's parser table to re-parse @ensure
|
|
2
|
+
// predicate bodies. This is the one host coupling point — the host's lexer
|
|
3
|
+
// and compiler import `installSchemaSupport` from us, and we import the
|
|
4
|
+
// parser back from them. Same compilation unit, no package boundary.
|
|
5
|
+
import { parser } from '../parser.js';
|
|
6
|
+
|
|
7
|
+
// Runtime-string composition is delegated to a registered provider so the
|
|
8
|
+
// bundler can tree-shake server-only fragments out of the browser bundle.
|
|
9
|
+
// One of `./loader-server.js` or `./loader-browser.js` must be
|
|
10
|
+
// side-effect-imported before any compileToJS call that emits schemas.
|
|
11
|
+
// (`src/browser.js` imports loader-browser; CLI / typecheck / test runner
|
|
12
|
+
// import loader-server.)
|
|
13
|
+
let _schemaRuntimeProvider = null;
|
|
14
|
+
export function setSchemaRuntimeProvider(fn) { _schemaRuntimeProvider = fn; }
|
|
15
|
+
|
|
16
|
+
// Schema System — inline `schema` declarations compile to runtime validator
|
|
17
|
+
// and ORM plans.
|
|
18
|
+
//
|
|
19
|
+
// Architecture (parallels types.js and components.js sidecars):
|
|
20
|
+
//
|
|
21
|
+
// installSchemaSupport(Lexer, CodeEmitter)
|
|
22
|
+
// Adds rewriteSchema() to Lexer.prototype and emitSchema() to
|
|
23
|
+
// CodeEmitter.prototype.
|
|
24
|
+
//
|
|
25
|
+
// rewriteSchema()
|
|
26
|
+
// Token-stream pass. Recognizes `schema [:kind] INDENT ... OUTDENT`
|
|
27
|
+
// blocks at expression-start positions, parses the body with a
|
|
28
|
+
// schema-specific sub-parser, and collapses the whole region into
|
|
29
|
+
// `SCHEMA SCHEMA_BODY` where SCHEMA_BODY carries a structured
|
|
30
|
+
// descriptor on its .data. The main Rip grammar only sees two
|
|
31
|
+
// tiny productions. Schema body syntax never reaches the main
|
|
32
|
+
// parser.
|
|
33
|
+
//
|
|
34
|
+
// emitSchema(head, rest, context)
|
|
35
|
+
// CodeEmitter dispatch. Reads the structured descriptor off the
|
|
36
|
+
// SCHEMA_BODY node's metadata and emits a `__schema({...})` runtime
|
|
37
|
+
// call. For Phase 1 the emission is a self-describing object; the
|
|
38
|
+
// runtime (__schema) lands in Phase 3.
|
|
39
|
+
//
|
|
40
|
+
// hasSchemas(source)
|
|
41
|
+
// Cheap regex probe for the presence of a schema declaration.
|
|
42
|
+
// Parallels hasTypeAnnotations. Used by typecheck.js and the LSP to
|
|
43
|
+
// skip work on files without schemas.
|
|
44
|
+
//
|
|
45
|
+
// Two body sub-modes:
|
|
46
|
+
//
|
|
47
|
+
// fielded — kinds :input, :shape, :model, :mixin. Permitted line forms:
|
|
48
|
+
// field IDENTIFIER[!|?|#]* TYPE [, constraints] [, attrs]
|
|
49
|
+
// directive @NAME [args]
|
|
50
|
+
// callable NAME: (-> | ~>) body
|
|
51
|
+
//
|
|
52
|
+
// enum — kind :enum. Permitted line forms:
|
|
53
|
+
// bare IDENTIFIER
|
|
54
|
+
// valued IDENTIFIER : Literal
|
|
55
|
+
//
|
|
56
|
+
// Anything else at schema top level is a schema-mode-aware compile error
|
|
57
|
+
// with a helpful message.
|
|
58
|
+
|
|
59
|
+
const VALID_KINDS = new Set(['input', 'shape', 'model', 'mixin', 'enum']);
|
|
60
|
+
const KIND_DEFAULT = 'input';
|
|
61
|
+
|
|
62
|
+
const HOOK_NAMES = new Set([
|
|
63
|
+
'beforeValidation', 'afterValidation',
|
|
64
|
+
'beforeSave', 'afterSave',
|
|
65
|
+
'beforeCreate', 'afterCreate',
|
|
66
|
+
'beforeUpdate', 'afterUpdate',
|
|
67
|
+
'beforeDestroy', 'afterDestroy',
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
// Positions where `schema` can legitimately start an expression.
|
|
71
|
+
// If the prev token is one of these tags, the identifier `schema` is a
|
|
72
|
+
// candidate for retagging to SCHEMA.
|
|
73
|
+
const EXPR_START_PREV = new Set([
|
|
74
|
+
'TERMINATOR', 'INDENT', 'OUTDENT',
|
|
75
|
+
'=', '+=', '-=', '*=', '/=', '%=', '**=', '//=', '%%=',
|
|
76
|
+
'?=', '??=', '&&=', '||=', '&=', '|=', '^=', '<<=', '>>=', '>>>=',
|
|
77
|
+
'READONLY_ASSIGN', 'REACTIVE_ASSIGN', 'COMPUTED_ASSIGN',
|
|
78
|
+
'RETURN', 'THROW', 'YIELD', 'AWAIT', 'EXPORT',
|
|
79
|
+
',', '(', '[', '{', 'CALL_START', 'PARAM_START', 'INDEX_START',
|
|
80
|
+
'->', '=>', ':', 'WHEN', 'THEN', 'IF', 'UNLESS',
|
|
81
|
+
'UNARY', '!', 'NOT',
|
|
82
|
+
]);
|
|
83
|
+
|
|
84
|
+
// ============================================================================
|
|
85
|
+
// hasSchemas — fast probe
|
|
86
|
+
// ============================================================================
|
|
87
|
+
|
|
88
|
+
// True when source looks like it contains a schema declaration. We look
|
|
89
|
+
// for `schema` followed by either a `:kind` symbol or by a newline +
|
|
90
|
+
// deeper indent. Conservative: a false positive just means typecheck
|
|
91
|
+
// pays a bit more work, never wrong behavior.
|
|
92
|
+
export function hasSchemas(source) {
|
|
93
|
+
if (typeof source !== 'string') return false;
|
|
94
|
+
if (!/\bschema\b/.test(source)) return false;
|
|
95
|
+
return /(?:^|[\s=,(\[{:])schema(?:\s*:[A-Za-z_$][\w$]*|\s*\n[ \t]+\S)/m.test(source);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ============================================================================
|
|
99
|
+
// installSchemaSupport — prototype installation
|
|
100
|
+
// ============================================================================
|
|
101
|
+
|
|
102
|
+
export function installSchemaSupport(Lexer, CodeEmitter) {
|
|
103
|
+
if (Lexer) {
|
|
104
|
+
Lexer.prototype.rewriteSchema = function() {
|
|
105
|
+
rewriteSchema(this);
|
|
106
|
+
};
|
|
107
|
+
// Captured body tokens need the tail rewriter passes before parsing.
|
|
108
|
+
// parseBodyTokens runs those passes on a fresh Lexer instance.
|
|
109
|
+
parseBodyTokens._LexerCtor = Lexer;
|
|
110
|
+
}
|
|
111
|
+
if (CodeEmitter) {
|
|
112
|
+
CodeEmitter.prototype.emitSchema = function(head, rest, context) {
|
|
113
|
+
return emitSchemaNode(this, head, rest, context);
|
|
114
|
+
};
|
|
115
|
+
CodeEmitter.prototype.getSchemaRuntime = function() {
|
|
116
|
+
// Compiler-controlled mode. Defaults to 'migration' (everything) for
|
|
117
|
+
// compatibility with existing CLI / Node compilation, where the user
|
|
118
|
+
// might invoke any schema feature including .toSQL(). Browser-bundle
|
|
119
|
+
// build overrides to 'browser' for size reduction — see Phase 2 step 3.
|
|
120
|
+
const mode = this.options?.schemaMode || 'migration';
|
|
121
|
+
return getSchemaRuntime({ mode });
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ============================================================================
|
|
127
|
+
// Lexer pass: rewriteSchema
|
|
128
|
+
// ============================================================================
|
|
129
|
+
|
|
130
|
+
// Known keys for the `schema.<key> = <value>` file-level pragma. Each
|
|
131
|
+
// pragma takes effect from its declaration forward and is scoped to the
|
|
132
|
+
// current compilation unit — schemas in other files are unaffected.
|
|
133
|
+
// Extend this map when new pragma keys land.
|
|
134
|
+
const SCHEMA_PRAGMA_KEYS = new Set(['defaultMaxString']);
|
|
135
|
+
|
|
136
|
+
function rewriteSchema(lexer) {
|
|
137
|
+
let tokens = lexer.tokens;
|
|
138
|
+
// File-scoped config, updated in-place as pragmas are encountered, then
|
|
139
|
+
// snapshotted into each schema descriptor at collapse time so post-pragma
|
|
140
|
+
// changes don't mutate earlier schemas retroactively.
|
|
141
|
+
let config = { defaultMaxString: null };
|
|
142
|
+
// Top-level INDENT/OUTDENT depth. Pragmas are file-level only so we
|
|
143
|
+
// reject them inside function / class / block bodies — otherwise a
|
|
144
|
+
// pragma nested in `foo = ->` would leak to module-scope schemas
|
|
145
|
+
// declared later on. Schemas themselves get collapsed out of the
|
|
146
|
+
// token stream before their internal INDENT/OUTDENT reach this
|
|
147
|
+
// counter, so depth reflects only user-written nesting.
|
|
148
|
+
let depth = 0;
|
|
149
|
+
let i = 0;
|
|
150
|
+
while (i < tokens.length) {
|
|
151
|
+
let t = tokens[i];
|
|
152
|
+
if (t[0] === 'INDENT') depth++;
|
|
153
|
+
else if (t[0] === 'OUTDENT') depth--;
|
|
154
|
+
let consumed = matchSchemaPragma(tokens, i, config, depth);
|
|
155
|
+
if (consumed > 0) {
|
|
156
|
+
tokens.splice(i, consumed);
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
if (isSchemaStart(tokens, i)) {
|
|
160
|
+
collapseSchemaAt(lexer, tokens, i, config);
|
|
161
|
+
}
|
|
162
|
+
i++;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Recognize `schema.<key> = <value>` at statement position. Returns the
|
|
167
|
+
// number of tokens consumed (including any trailing TERMINATOR) when the
|
|
168
|
+
// pragma is applied, or 0 when the sequence isn't a pragma. Unknown keys
|
|
169
|
+
// and non-literal values error loudly — silently ignoring a typo like
|
|
170
|
+
// `schema.defaultMacString = 100` would bake a wrong value into every
|
|
171
|
+
// downstream schema.
|
|
172
|
+
function matchSchemaPragma(tokens, i, config, depth) {
|
|
173
|
+
let t = tokens[i];
|
|
174
|
+
if (!t || t[0] !== 'IDENTIFIER' || t[1] !== 'schema') return 0;
|
|
175
|
+
if (tokens[i + 1]?.[0] !== '.') return 0;
|
|
176
|
+
let keyTok = tokens[i + 2];
|
|
177
|
+
if (!keyTok || keyTok[0] !== 'PROPERTY') return 0;
|
|
178
|
+
if (tokens[i + 3]?.[0] !== '=') return 0;
|
|
179
|
+
// Pragmas must start a statement — the `schema` identifier must be
|
|
180
|
+
// preceded by nothing, TERMINATOR, INDENT, or OUTDENT so we don't
|
|
181
|
+
// accidentally rewrite `foo.schema.defaultMaxString = 100` or similar.
|
|
182
|
+
let prev = tokens[i - 1];
|
|
183
|
+
if (prev) {
|
|
184
|
+
let ptag = prev[0];
|
|
185
|
+
if (ptag !== 'TERMINATOR' && ptag !== 'INDENT' && ptag !== 'OUTDENT') return 0;
|
|
186
|
+
}
|
|
187
|
+
let key = keyTok[1];
|
|
188
|
+
if (!SCHEMA_PRAGMA_KEYS.has(key)) {
|
|
189
|
+
throw schemaError(keyTok,
|
|
190
|
+
`Unknown schema pragma 'schema.${key}'. Known pragmas: ${[...SCHEMA_PRAGMA_KEYS].join(', ')}.`);
|
|
191
|
+
}
|
|
192
|
+
if (depth > 0) {
|
|
193
|
+
throw schemaError(keyTok,
|
|
194
|
+
`Schema pragma 'schema.${key}' must be declared at file top level. It was found inside a nested block (function / class / if / loop body), where it would leak into later top-level schemas.`);
|
|
195
|
+
}
|
|
196
|
+
let valTok = tokens[i + 4];
|
|
197
|
+
if (!valTok || valTok[0] !== 'NUMBER') {
|
|
198
|
+
throw schemaError(valTok || keyTok,
|
|
199
|
+
`Pragma 'schema.${key}' requires a number literal. Example: schema.${key} = 100.`);
|
|
200
|
+
}
|
|
201
|
+
let n = Number(valTok[1]);
|
|
202
|
+
if (!Number.isFinite(n) || n < 0 || !Number.isInteger(n)) {
|
|
203
|
+
throw schemaError(valTok,
|
|
204
|
+
`Pragma 'schema.${key}' expects a non-negative integer (got ${valTok[1]}). Use 0 to disable.`);
|
|
205
|
+
}
|
|
206
|
+
// `0` means "no default cap" — explicit way to reset a pragma mid-file.
|
|
207
|
+
config[key] = n === 0 ? null : n;
|
|
208
|
+
// Consume trailing TERMINATOR so the pragma line leaves no blank statement behind.
|
|
209
|
+
let end = i + 5;
|
|
210
|
+
if (tokens[end]?.[0] === 'TERMINATOR') end++;
|
|
211
|
+
return end - i;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function isSchemaStart(tokens, i) {
|
|
215
|
+
let t = tokens[i];
|
|
216
|
+
if (!t || t[0] !== 'IDENTIFIER' || t[1] !== 'schema') return false;
|
|
217
|
+
// Skip property access — `x.schema` is lexed as PROPERTY, not IDENTIFIER.
|
|
218
|
+
// Still guard against generated IDENTIFIER tokens in odd positions.
|
|
219
|
+
let prev = tokens[i - 1];
|
|
220
|
+
if (prev) {
|
|
221
|
+
let ptag = prev[0];
|
|
222
|
+
if (ptag === '.' || ptag === '?.') return false;
|
|
223
|
+
if (prev[0] === 'IDENTIFIER' || prev[0] === 'PROPERTY' ||
|
|
224
|
+
prev[0] === ')' || prev[0] === ']' || prev[0] === '}' ||
|
|
225
|
+
prev[0] === 'STRING' || prev[0] === 'NUMBER') {
|
|
226
|
+
// `x schema` is an implicit call of x on schema — not a decl.
|
|
227
|
+
if (!EXPR_START_PREV.has(ptag)) return false;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
// What follows determines the body form:
|
|
231
|
+
// SYMBOL? then INDENT — indented block body.
|
|
232
|
+
// SYMBOL? then `TERMINATOR ;` — inline body (one-liner), with field
|
|
233
|
+
// entries separated by more `;`
|
|
234
|
+
// terminators up to the newline.
|
|
235
|
+
let j = i + 1;
|
|
236
|
+
if (tokens[j]?.[0] === 'SYMBOL') j++;
|
|
237
|
+
if (tokens[j]?.[0] === 'TERMINATOR') {
|
|
238
|
+
if (tokens[j][1] === ';') return true;
|
|
239
|
+
j++;
|
|
240
|
+
}
|
|
241
|
+
return tokens[j]?.[0] === 'INDENT';
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Collapse `IDENTIFIER 'schema' [SYMBOL kind] [TERMINATOR] INDENT ... OUTDENT`
|
|
245
|
+
// at position i into `SCHEMA SCHEMA_BODY`. SCHEMA_BODY carries a structured
|
|
246
|
+
// descriptor on .data. `config` snapshots any `schema.<key>` pragmas in
|
|
247
|
+
// effect at this point so later pragma changes don't retroactively alter
|
|
248
|
+
// earlier schemas.
|
|
249
|
+
function collapseSchemaAt(lexer, tokens, i, config) {
|
|
250
|
+
let schemaTok = tokens[i];
|
|
251
|
+
let kindToken = null;
|
|
252
|
+
let kind = KIND_DEFAULT;
|
|
253
|
+
let j = i + 1;
|
|
254
|
+
|
|
255
|
+
if (tokens[j]?.[0] === 'SYMBOL') {
|
|
256
|
+
kindToken = tokens[j];
|
|
257
|
+
let k = kindToken[1];
|
|
258
|
+
if (!VALID_KINDS.has(k)) {
|
|
259
|
+
throw schemaError(kindToken,
|
|
260
|
+
`Unknown schema kind :${k}. Expected one of :input, :shape, :model, :mixin, :enum.`);
|
|
261
|
+
}
|
|
262
|
+
kind = k;
|
|
263
|
+
j++;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
let bodyTokens;
|
|
267
|
+
let endIdx;
|
|
268
|
+
if (tokens[j]?.[0] === 'TERMINATOR' && tokens[j][1] === ';') {
|
|
269
|
+
// Inline one-liner: `schema [:kind]; field; field; ...` up to the
|
|
270
|
+
// next `\n` TERMINATOR at depth 0. The `;` separators are already
|
|
271
|
+
// TERMINATOR tokens, so splitBodyLines handles them unchanged.
|
|
272
|
+
// Arrows (`->`, `~>`, `!>`) would make the body ambiguous with
|
|
273
|
+
// subsequent `;`-separated fields, so methods/computed/hooks/
|
|
274
|
+
// transforms are rejected on the inline form.
|
|
275
|
+
let inlineStart = j + 1;
|
|
276
|
+
let end = inlineStart;
|
|
277
|
+
let depth = 0;
|
|
278
|
+
// Rip's lexer collapses `;\n` into a single `;`-valued TERMINATOR,
|
|
279
|
+
// so value-based "end of inline" detection alone misses trailing
|
|
280
|
+
// `X = schema :shape; name!;\ny = 1`. We track the inline body's
|
|
281
|
+
// starting row and break the moment a token's row advances past
|
|
282
|
+
// it at depth 0 — that captures both plain `\n` and the folded
|
|
283
|
+
// `;\n` case.
|
|
284
|
+
let startRow = tokens[inlineStart]?.loc?.r ?? null;
|
|
285
|
+
while (end < tokens.length) {
|
|
286
|
+
let tk = tokens[end];
|
|
287
|
+
let tag = tk[0];
|
|
288
|
+
if (depth === 0 && startRow != null && tk.loc && tk.loc.r > startRow) break;
|
|
289
|
+
if (tag === '(' || tag === '[' || tag === '{' ||
|
|
290
|
+
tag === 'CALL_START' || tag === 'INDEX_START' || tag === 'PARAM_START') depth++;
|
|
291
|
+
else if (tag === ')' || tag === ']' || tag === '}' ||
|
|
292
|
+
tag === 'CALL_END' || tag === 'INDEX_END' || tag === 'PARAM_END') depth--;
|
|
293
|
+
// Inline body ends at the first depth-0 newline OR at any
|
|
294
|
+
// INDENT/OUTDENT — INDENT would mean the user opened a block
|
|
295
|
+
// (incompatible with inline), and OUTDENT means we're exiting
|
|
296
|
+
// a surrounding block and must leave that token in place for
|
|
297
|
+
// the outer scanner's depth bookkeeping.
|
|
298
|
+
else if (depth === 0 && tag === 'TERMINATOR' && tk[1] !== ';') break;
|
|
299
|
+
else if (depth === 0 && (tag === 'INDENT' || tag === 'OUTDENT')) break;
|
|
300
|
+
// Arrows (`->` method/hook/transform, `~>` computed, `!>` eager
|
|
301
|
+
// derived) make field bodies ambiguous with subsequent
|
|
302
|
+
// `;`-separated entries on the same line, so reject them early
|
|
303
|
+
// with a clear message that points users at the indented form.
|
|
304
|
+
// `~>` lexes as EFFECT; `!>` lexes as UNARY_MATH '!' + COMPARE '>'.
|
|
305
|
+
else if (depth === 0 && tag === '->') {
|
|
306
|
+
throw schemaError(tk, `Inline schema body does not support '->' (method/hook/transform). Use the indented form.`);
|
|
307
|
+
}
|
|
308
|
+
else if (depth === 0 && tag === 'EFFECT') {
|
|
309
|
+
throw schemaError(tk, `Inline schema body does not support '~>' (computed getter). Use the indented form.`);
|
|
310
|
+
}
|
|
311
|
+
else if (depth === 0 && tag === 'UNARY_MATH' && tk[1] === '!' &&
|
|
312
|
+
tokens[end + 1]?.[0] === 'COMPARE' && tokens[end + 1][1] === '>') {
|
|
313
|
+
throw schemaError(tk, `Inline schema body does not support '!>' (eager derived). Use the indented form.`);
|
|
314
|
+
}
|
|
315
|
+
end++;
|
|
316
|
+
}
|
|
317
|
+
// A trailing TERMINATOR at the boundary (`;` that the lexer folded
|
|
318
|
+
// with `\n`, or a plain `\n` that happened to land inside our
|
|
319
|
+
// capture range) must remain in the token stream as a statement
|
|
320
|
+
// separator between this schema and whatever follows on the next
|
|
321
|
+
// line. Trim it out of the body / splice span so the parser
|
|
322
|
+
// keeps seeing it. splitBodyLines is safe with a body that
|
|
323
|
+
// doesn't end in TERMINATOR.
|
|
324
|
+
while (end > inlineStart && tokens[end - 1][0] === 'TERMINATOR') end--;
|
|
325
|
+
bodyTokens = tokens.slice(inlineStart, end);
|
|
326
|
+
endIdx = end;
|
|
327
|
+
// Empty inline body (`X = schema :shape;` with nothing after the
|
|
328
|
+
// leading `;`) is almost always a typo — an indented body that
|
|
329
|
+
// wasn't written, or a stray `;` on an otherwise complete decl.
|
|
330
|
+
// Fail loud rather than emit a schema with no entries.
|
|
331
|
+
if (!bodyTokens.length) {
|
|
332
|
+
throw schemaError(schemaTok,
|
|
333
|
+
`Inline schema body is empty. Either add '; field; …' entries after 'schema${kindToken ? ' :' + kind : ''};' or switch to the indented form.`);
|
|
334
|
+
}
|
|
335
|
+
} else {
|
|
336
|
+
if (tokens[j]?.[0] === 'TERMINATOR') j++;
|
|
337
|
+
if (tokens[j]?.[0] !== 'INDENT') {
|
|
338
|
+
throw schemaError(schemaTok,
|
|
339
|
+
`Expected indented schema body after 'schema${kindToken ? ' :' + kind : ''}'.`);
|
|
340
|
+
}
|
|
341
|
+
let indentIdx = j;
|
|
342
|
+
let outdentIdx = findMatchingOutdent(tokens, indentIdx);
|
|
343
|
+
if (outdentIdx < 0) {
|
|
344
|
+
throw schemaError(tokens[indentIdx], 'Unterminated schema body.');
|
|
345
|
+
}
|
|
346
|
+
bodyTokens = tokens.slice(indentIdx + 1, outdentIdx);
|
|
347
|
+
endIdx = outdentIdx + 1; // include the OUTDENT itself in the replaced span
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
let descriptor = parseSchemaBody(kind, bodyTokens, {
|
|
351
|
+
schemaLoc: schemaTok.loc,
|
|
352
|
+
kindLoc: kindToken?.loc ?? null,
|
|
353
|
+
kind,
|
|
354
|
+
// Snapshot pragmas in effect at this decl so later pragma writes
|
|
355
|
+
// don't retroactively change already-parsed schemas.
|
|
356
|
+
defaultMaxString: config?.defaultMaxString ?? null,
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// Replace range `[i, endIdx-1]` with `SCHEMA SCHEMA_BODY`.
|
|
360
|
+
let schemaNewTok = mkToken('SCHEMA', 'schema', schemaTok);
|
|
361
|
+
let bodyNewTok = mkToken('SCHEMA_BODY', kind, schemaTok);
|
|
362
|
+
bodyNewTok.data = { descriptor };
|
|
363
|
+
tokens.splice(i, endIdx - i, schemaNewTok, bodyNewTok);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// ============================================================================
|
|
367
|
+
// Sub-parser — fielded and enum modes
|
|
368
|
+
// ============================================================================
|
|
369
|
+
|
|
370
|
+
function parseSchemaBody(kind, bodyTokens, ctx) {
|
|
371
|
+
let entries = [];
|
|
372
|
+
let lines = splitBodyLines(bodyTokens);
|
|
373
|
+
|
|
374
|
+
// Kind inference: a body whose first non-empty line begins with a
|
|
375
|
+
// SYMBOL token is unambiguously an enum. Promote the default :input
|
|
376
|
+
// kind to :enum so `schema\n :draft\n :active` needs no marker.
|
|
377
|
+
// Explicit `:input` or any other kind stays as written.
|
|
378
|
+
if (kind === KIND_DEFAULT && !ctx.kindLoc && lines.length > 0 &&
|
|
379
|
+
lines[0][0]?.[0] === 'SYMBOL') {
|
|
380
|
+
kind = 'enum';
|
|
381
|
+
ctx.kind = 'enum';
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (kind === 'enum') {
|
|
385
|
+
for (let line of lines) {
|
|
386
|
+
parseEnumLine(line, entries);
|
|
387
|
+
}
|
|
388
|
+
} else {
|
|
389
|
+
for (let line of lines) {
|
|
390
|
+
parseFieldedLine(kind, line, entries, ctx);
|
|
391
|
+
}
|
|
392
|
+
// Capability-matrix enforcement by kind. `@mixin` is allowed as a
|
|
393
|
+
// field-inclusion directive on every fielded kind because it adds
|
|
394
|
+
// fields (not behavior). Other directives are restricted per the
|
|
395
|
+
// matrix in the language reference.
|
|
396
|
+
if (kind === 'mixin') {
|
|
397
|
+
for (let e of entries) {
|
|
398
|
+
if (e.tag === 'method' || e.tag === 'computed' || e.tag === 'hook') {
|
|
399
|
+
throw schemaError({ loc: e.headerLoc || e.loc },
|
|
400
|
+
`:mixin schemas are fields-only. '${e.name}' is a ${e.tag}; move it to a :shape or :model.`);
|
|
401
|
+
}
|
|
402
|
+
if (e.tag === 'ensure') {
|
|
403
|
+
throw schemaError({ loc: e.headerLoc || e.loc },
|
|
404
|
+
`:mixin schemas don't accept @ensure refinements. Move the invariant to a :shape or :model that composes this mixin.`);
|
|
405
|
+
}
|
|
406
|
+
if (e.tag === 'directive' && e.name !== 'mixin') {
|
|
407
|
+
throw schemaError({ loc: e.loc },
|
|
408
|
+
`:mixin schemas only accept '@mixin Name' directives. '@${e.name}' is not allowed.`);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
} else if (kind === 'input') {
|
|
412
|
+
// :input accepts fields, @mixin, and @ensure (cross-field predicates
|
|
413
|
+
// are a natural fit for form validation — "passwords must match").
|
|
414
|
+
// Other methods, computed getters, hooks, and non-mixin directives
|
|
415
|
+
// are rejected.
|
|
416
|
+
for (let e of entries) {
|
|
417
|
+
if (e.tag === 'method' || e.tag === 'computed' || e.tag === 'hook') {
|
|
418
|
+
throw schemaError({ loc: e.headerLoc || e.loc },
|
|
419
|
+
`:input schemas are fields-only. '${e.name}' is a ${e.tag}; use :shape or :model if you need behavior.`);
|
|
420
|
+
}
|
|
421
|
+
if (e.tag === 'directive' && e.name !== 'mixin') {
|
|
422
|
+
throw schemaError({ loc: e.loc },
|
|
423
|
+
`:input schemas only accept '@mixin Name' and '@ensure'. '@${e.name}' is not allowed.`);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
} else if (kind === 'shape') {
|
|
427
|
+
// :shape accepts fields, methods, computed, and @mixin. Hooks
|
|
428
|
+
// and ORM-bound directives (timestamps, softDelete, index,
|
|
429
|
+
// belongs_to, has_many, has_one, link) are :model-only.
|
|
430
|
+
for (let e of entries) {
|
|
431
|
+
if (e.tag === 'hook') {
|
|
432
|
+
throw schemaError({ loc: e.headerLoc || e.loc },
|
|
433
|
+
`:shape schemas don't have lifecycle hooks. '${e.name}' runs only on :model; move it or remove it.`);
|
|
434
|
+
}
|
|
435
|
+
if (e.tag === 'directive' && e.name !== 'mixin') {
|
|
436
|
+
throw schemaError({ loc: e.loc },
|
|
437
|
+
`:shape schemas only accept '@mixin Name'. '@${e.name}' is :model-only.`);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return {
|
|
444
|
+
kind,
|
|
445
|
+
loc: ctx.schemaLoc,
|
|
446
|
+
kindLoc: ctx.kindLoc,
|
|
447
|
+
entries,
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Split top-level lines inside a schema body. Nested INDENT/OUTDENT stays
|
|
452
|
+
// inside its owning line (belongs to a callable body, multi-line
|
|
453
|
+
// constraints, etc.). Each returned line is the raw sub-stream of tokens
|
|
454
|
+
// for that line (no outer TERMINATORs).
|
|
455
|
+
function splitBodyLines(tokens) {
|
|
456
|
+
let lines = [];
|
|
457
|
+
let cur = [];
|
|
458
|
+
let depth = 0;
|
|
459
|
+
for (let t of tokens) {
|
|
460
|
+
let tag = t[0];
|
|
461
|
+
if (tag === 'INDENT') depth++;
|
|
462
|
+
if (tag === 'OUTDENT') depth--;
|
|
463
|
+
if (tag === 'TERMINATOR' && depth === 0) {
|
|
464
|
+
if (cur.length) { lines.push(cur); cur = []; }
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
cur.push(t);
|
|
468
|
+
}
|
|
469
|
+
if (cur.length) lines.push(cur);
|
|
470
|
+
return lines;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Fielded body: field, directive, or callable.
|
|
474
|
+
// Field-line grammar (v2, locked):
|
|
475
|
+
//
|
|
476
|
+
// name[!|?|#]* [type] [range] [default] [regex] [attrs] [, -> transform]
|
|
477
|
+
//
|
|
478
|
+
// Invariants enforced here:
|
|
479
|
+
// 1. Line classification: IDENTIFIER-start = field; PROPERTY-start (the
|
|
480
|
+
// lexer absorbs trailing `:` into the identifier's tag) = callable.
|
|
481
|
+
// 2. Type slot is optional — default is `string`. Identifier types
|
|
482
|
+
// (`email`, `integer`, …), array suffix (`string[]`), and string-
|
|
483
|
+
// literal unions (`"M" | "F" | "U"`) are the three valid shapes.
|
|
484
|
+
// 3. Literal unions require 2+ members, all string literals, no mixing
|
|
485
|
+
// with identifier types or null. Nullability is carried by the `?`
|
|
486
|
+
// modifier, not by union membership.
|
|
487
|
+
// 4. The `->` transform is TERMINAL — nothing follows it on the line.
|
|
488
|
+
// 5. Comma before `->` is required when anything precedes the arrow
|
|
489
|
+
// (type, range, regex, default, attrs). Only the bare form
|
|
490
|
+
// `name! -> body` parses comma-less, because there's nothing to
|
|
491
|
+
// elide.
|
|
492
|
+
// 6. Each comma-separated rest part is one of: `[…]` default,
|
|
493
|
+
// `{…}` attrs, `/regex/` pattern, `n..n` range, `-> transform`.
|
|
494
|
+
// The head token uniquely identifies the form. Duplicates of any
|
|
495
|
+
// single form are rejected.
|
|
496
|
+
// VARCHAR-like primitive types — the `schema.defaultMaxString` pragma
|
|
497
|
+
// applies a default `max` to these when no explicit range/regex/literals
|
|
498
|
+
// are declared. `text` stays uncapped by design (it's the opt-out for
|
|
499
|
+
// long-form content); `uuid` has fixed length; `json`/`any` aren't strings.
|
|
500
|
+
const VARCHAR_TYPES = new Set(['string', 'email', 'url', 'phone', 'zip']);
|
|
501
|
+
|
|
502
|
+
function parseFieldedLine(kind, line, entries, ctx) {
|
|
503
|
+
let first = line[0];
|
|
504
|
+
if (!first) return;
|
|
505
|
+
|
|
506
|
+
// Directive: @NAME [args]
|
|
507
|
+
if (first[0] === '@') {
|
|
508
|
+
let nameTok = line[1];
|
|
509
|
+
if (!nameTok || (nameTok[0] !== 'IDENTIFIER' && nameTok[0] !== 'PROPERTY')) {
|
|
510
|
+
throw schemaError(first, "Expected directive name after '@'.");
|
|
511
|
+
}
|
|
512
|
+
let argTokens = line.slice(2);
|
|
513
|
+
let dname = nameTok[1];
|
|
514
|
+
|
|
515
|
+
// `@ensure` is a refinement directive with its own grammar — it takes
|
|
516
|
+
// either an inline `"msg", (args) -> body` or a bracketed array of
|
|
517
|
+
// those pairs. Emits one `tag: "ensure"` entry per refinement; the
|
|
518
|
+
// per-entry shape mirrors methods so compileCallableFn-style codegen
|
|
519
|
+
// can fire.
|
|
520
|
+
if (dname === 'ensure') {
|
|
521
|
+
let pairs = parseEnsurePairs(argTokens, first);
|
|
522
|
+
for (let p of pairs) {
|
|
523
|
+
entries.push({
|
|
524
|
+
tag: 'ensure',
|
|
525
|
+
name: 'ensure',
|
|
526
|
+
message: p.message,
|
|
527
|
+
paramTokens: p.paramTokens,
|
|
528
|
+
bodyTokens: p.bodyTokens,
|
|
529
|
+
loc: p.loc,
|
|
530
|
+
headerLoc: first.loc,
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Pre-parse structured args so shadow-TS and runtime-codegen share
|
|
537
|
+
// the same descriptor shape. Relation and mixin directives get a
|
|
538
|
+
// `[{target, optional?}]` array; other directives leave `args` unset.
|
|
539
|
+
let args = null;
|
|
540
|
+
if (dname === 'belongs_to' || dname === 'has_many' || dname === 'has_one' ||
|
|
541
|
+
dname === 'one' || dname === 'many' || dname === 'mixin') {
|
|
542
|
+
let t0 = argTokens[0];
|
|
543
|
+
if (t0 && (t0[0] === 'IDENTIFIER' || t0[0] === 'PROPERTY')) {
|
|
544
|
+
let optional = t0.data?.predicate === true;
|
|
545
|
+
if (!optional && argTokens[1]?.[0] === '?') optional = true;
|
|
546
|
+
args = [{ target: t0[1], optional }];
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
entries.push({
|
|
550
|
+
tag: 'directive',
|
|
551
|
+
name: dname,
|
|
552
|
+
args,
|
|
553
|
+
argTokens,
|
|
554
|
+
loc: first.loc,
|
|
555
|
+
});
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// The identifier regex absorbs a trailing `:` by retagging the ident as
|
|
560
|
+
// PROPERTY and emitting a separate `:` token. So a line starting with
|
|
561
|
+
// PROPERTY is always a callable (`name: -> body` or `name: ~> body`);
|
|
562
|
+
// a line starting with IDENTIFIER is always a field.
|
|
563
|
+
if (first[0] === 'PROPERTY') {
|
|
564
|
+
parseCallableLine(kind, first, line, entries);
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
if (first[0] !== 'IDENTIFIER') {
|
|
568
|
+
throw schemaError(first,
|
|
569
|
+
`Unexpected ${first[0]} at schema top level. Allowed: fields ('name! type'), directives ('@name'), methods ('name: -> body'), or computed getters ('name: ~> body').`);
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
let name = first[1];
|
|
573
|
+
|
|
574
|
+
// Guard: `name:` without the colon absorbed — shouldn't happen but
|
|
575
|
+
// produces a friendly error if it does.
|
|
576
|
+
if (line[1]?.[0] === ':') {
|
|
577
|
+
throw schemaError(line[1],
|
|
578
|
+
`Schema fields use 'name type' (space, no colon). For methods or computed use 'name: -> body' or 'name: ~> body'.`);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Field: IDENTIFIER [modifiers] TYPE [, constraints] [, attrs]
|
|
582
|
+
let modifiers = collectModifiers(first);
|
|
583
|
+
let pos = 1;
|
|
584
|
+
|
|
585
|
+
// Adjacent `!`, `#`, `?` modifier tokens. `!` and `?` are absorbed into
|
|
586
|
+
// the IDENTIFIER's data by the main lexer. `#` arrives as a standalone
|
|
587
|
+
// token because the schema commentToken exception kicks in when `#` is
|
|
588
|
+
// adjacent to an identifier. A modifier must be unspaced from the
|
|
589
|
+
// token it follows, so we check the preceding token's `.spaced` flag
|
|
590
|
+
// (which the whitespace pass sets to true when whitespace follows).
|
|
591
|
+
while (pos < line.length) {
|
|
592
|
+
let tk = line[pos];
|
|
593
|
+
let adjacent = line[pos - 1] && !line[pos - 1].spaced;
|
|
594
|
+
if (!adjacent) break;
|
|
595
|
+
if (tk[0] === '#' || tk[0] === '?' || tk[0] === '!') {
|
|
596
|
+
modifiers.push(tk[0]);
|
|
597
|
+
pos++;
|
|
598
|
+
continue;
|
|
599
|
+
}
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
// Reject a stray colon here — gives a clear diagnostic for the common
|
|
604
|
+
// mistake `name: type` instead of `name type`.
|
|
605
|
+
let typeFirst = line[pos];
|
|
606
|
+
if (typeFirst?.[0] === ':') {
|
|
607
|
+
throw schemaError(typeFirst,
|
|
608
|
+
`Schema fields use 'name type' (space, no colon). Got 'name:'. For methods/computed use 'name: -> body' or 'name: ~> body'.`);
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// Type: IDENTIFIER (optionally followed by `[]` for array) OR a
|
|
612
|
+
// string-literal union like `"M" | "F" | "U"`. The type slot is
|
|
613
|
+
// OPTIONAL — if the next token isn't a type-starting token, the
|
|
614
|
+
// field defaults to `string` and we fall through to constraint
|
|
615
|
+
// parsing.
|
|
616
|
+
let typeName = 'string';
|
|
617
|
+
let literals = null;
|
|
618
|
+
if (typeFirst?.[0] === 'IDENTIFIER') {
|
|
619
|
+
typeName = typeFirst[1];
|
|
620
|
+
pos++;
|
|
621
|
+
} else if (typeFirst?.[0] === 'STRING') {
|
|
622
|
+
// Literal union: collect alternating STRING | STRING | STRING...
|
|
623
|
+
literals = [JSON.parse(typeFirst[1])];
|
|
624
|
+
pos++;
|
|
625
|
+
while (line[pos]?.[0] === '|' && line[pos + 1]?.[0] === 'STRING') {
|
|
626
|
+
pos++; // consume '|'
|
|
627
|
+
literals.push(JSON.parse(line[pos][1]));
|
|
628
|
+
pos++;
|
|
629
|
+
}
|
|
630
|
+
// Forbid mixing with identifier types or null/undefined.
|
|
631
|
+
if (line[pos]?.[0] === '|') {
|
|
632
|
+
let next = line[pos + 1];
|
|
633
|
+
let tag = next?.[0] ?? '<end>';
|
|
634
|
+
throw schemaError(next || line[pos],
|
|
635
|
+
`Literal unions contain string literals only. '${tag}' is not allowed as a union member. Use the '?' modifier for nullability.`);
|
|
636
|
+
}
|
|
637
|
+
if (literals.length < 2) {
|
|
638
|
+
throw schemaError(typeFirst,
|
|
639
|
+
`Literal union needs at least two string literals. Use '${JSON.stringify(literals[0])}' as a default with '[${JSON.stringify(literals[0])}]' instead.`);
|
|
640
|
+
}
|
|
641
|
+
typeName = 'literal-union';
|
|
642
|
+
}
|
|
643
|
+
let array = false;
|
|
644
|
+
// `string[]` tokenizes as IDENTIFIER INDEX_START INDEX_END (or `[` `]`
|
|
645
|
+
// depending on context; closeOpenIndexes retags the empty bracket pair
|
|
646
|
+
// as INDEX_START/INDEX_END when it follows an indexable token).
|
|
647
|
+
let openTag = line[pos]?.[0];
|
|
648
|
+
let closeTag = line[pos + 1]?.[0];
|
|
649
|
+
if ((openTag === '[' || openTag === 'INDEX_START') &&
|
|
650
|
+
(closeTag === ']' || closeTag === 'INDEX_END')) {
|
|
651
|
+
array = true;
|
|
652
|
+
pos += 2;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// Remaining tokens on the line are a mix of `[…]` constraints (default,
|
|
656
|
+
// regex), `{…}` attrs, and `n..n` range constraints. Each form is
|
|
657
|
+
// self-identifying by its head token shape. Raw token slices are
|
|
658
|
+
// captured here and semantic-parsed at compile time.
|
|
659
|
+
let rest = line.slice(pos);
|
|
660
|
+
|
|
661
|
+
// Comma-required rule: if a type was consumed and the next token is
|
|
662
|
+
// `->` (no comma separator), reject with a clear diagnostic. The
|
|
663
|
+
// comma is a structural boundary between the field declaration and
|
|
664
|
+
// the transform; skipping it makes `email!# email -> fn` read as
|
|
665
|
+
// if 'email' were an argument to the arrow, which it isn't.
|
|
666
|
+
let typeConsumed = typeFirst?.[0] === 'IDENTIFIER' || typeFirst?.[0] === 'STRING';
|
|
667
|
+
if (typeConsumed && rest[0]?.[0] === '->') {
|
|
668
|
+
throw schemaError(rest[0],
|
|
669
|
+
`Field '${name}' has a transform after the type; a comma is required before '->'. Write '${name} ${typeName}, -> …'.`);
|
|
670
|
+
}
|
|
671
|
+
let constraintTokens = null;
|
|
672
|
+
let attrsTokens = null;
|
|
673
|
+
let rangeTokens = null;
|
|
674
|
+
let regexToken = null;
|
|
675
|
+
let transformTokens = null;
|
|
676
|
+
|
|
677
|
+
if (rest.length > 0) {
|
|
678
|
+
// The leading comma is only required when a type was consumed. If
|
|
679
|
+
// the type slot was empty, constraints may follow the modifiers
|
|
680
|
+
// directly (`name? [1, 20]`). Both shapes produce the same parts.
|
|
681
|
+
if (rest[0]?.[0] === ',') {
|
|
682
|
+
rest = rest.slice(1);
|
|
683
|
+
}
|
|
684
|
+
// Split top-level by commas. Multi-line trailers (`name! type,\n
|
|
685
|
+
// [8, 100]`) introduce surrounding INDENT/OUTDENT tokens that
|
|
686
|
+
// don't affect semantics — strip them from each part so the head
|
|
687
|
+
// is the literal `[` or `{`.
|
|
688
|
+
let parts = splitTopLevelByComma(rest);
|
|
689
|
+
for (let i = 0; i < parts.length; i++) {
|
|
690
|
+
let part = parts[i];
|
|
691
|
+
// Strip leading INDENT/TERMINATOR so we can inspect the head token.
|
|
692
|
+
while (part.length && (part[0][0] === 'INDENT' || part[0][0] === 'TERMINATOR')) {
|
|
693
|
+
part = part.slice(1);
|
|
694
|
+
}
|
|
695
|
+
if (!part.length) continue;
|
|
696
|
+
|
|
697
|
+
// A `->` at the head of a part is the transform arrow — the
|
|
698
|
+
// preceding comma separated it out. `->` elsewhere in the part
|
|
699
|
+
// (after content) means the user wrote something like
|
|
700
|
+
// `email -> fn` without the separator; the comma is required
|
|
701
|
+
// as a structural boundary between the field declaration and
|
|
702
|
+
// the transform.
|
|
703
|
+
if (part[0][0] !== '->') {
|
|
704
|
+
let innerArrow = findTopLevelArrowIdx(part);
|
|
705
|
+
if (innerArrow > 0) {
|
|
706
|
+
throw schemaError(part[innerArrow],
|
|
707
|
+
`Field '${name}' has a transform after other content; a comma is required before '->'. Write 'name! <constraints>, -> <body>'.`);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
let head = part[0];
|
|
711
|
+
// For non-transform parts, also strip trailing OUTDENT/TERMINATOR.
|
|
712
|
+
// Transform parts own their INDENT/OUTDENT wrapping — parseBodyTokens
|
|
713
|
+
// handles it.
|
|
714
|
+
if (head[0] !== '->') {
|
|
715
|
+
while (part.length && (part[part.length - 1][0] === 'OUTDENT' || part[part.length - 1][0] === 'TERMINATOR')) {
|
|
716
|
+
part = part.slice(0, -1);
|
|
717
|
+
}
|
|
718
|
+
if (!part.length) continue;
|
|
719
|
+
head = part[0];
|
|
720
|
+
}
|
|
721
|
+
if (head[0] === '[' || head[0] === 'INDEX_START') {
|
|
722
|
+
if (constraintTokens) {
|
|
723
|
+
throw schemaError(head,
|
|
724
|
+
`Field '${name}' has more than one '[…]' constraint. At most one default / regex bracket per field.`);
|
|
725
|
+
}
|
|
726
|
+
constraintTokens = part;
|
|
727
|
+
} else if (head[0] === '{') {
|
|
728
|
+
if (attrsTokens) {
|
|
729
|
+
throw schemaError(head,
|
|
730
|
+
`Field '${name}' has more than one '{…}' attrs bracket.`);
|
|
731
|
+
}
|
|
732
|
+
attrsTokens = part;
|
|
733
|
+
} else if (isRangeConstraintTokens(part)) {
|
|
734
|
+
if (rangeTokens) {
|
|
735
|
+
throw schemaError(head,
|
|
736
|
+
`Field '${name}' has more than one range constraint. Only one 'min..max' per field.`);
|
|
737
|
+
}
|
|
738
|
+
rangeTokens = part;
|
|
739
|
+
} else if (head[0] === 'REGEX' && part.length === 1) {
|
|
740
|
+
if (regexToken) {
|
|
741
|
+
throw schemaError(head,
|
|
742
|
+
`Field '${name}' has more than one regex constraint.`);
|
|
743
|
+
}
|
|
744
|
+
regexToken = head;
|
|
745
|
+
} else if (head[0] === '->') {
|
|
746
|
+
// Transform part. Must be the last comma-separated part on the
|
|
747
|
+
// line (transform is terminal).
|
|
748
|
+
if (i !== parts.length - 1) {
|
|
749
|
+
throw schemaError(head,
|
|
750
|
+
`Transform '-> …' must be the last element on the field line for '${name}'.`);
|
|
751
|
+
}
|
|
752
|
+
transformTokens = part.slice(1);
|
|
753
|
+
} else {
|
|
754
|
+
throw schemaError(head,
|
|
755
|
+
`Unexpected trailer for field '${name}'. Expected '[…]' default, '{…}' attrs, '/regex/', 'min..max' range, or '-> transform'.`);
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Array suffix is incompatible with literal-union types in v2.
|
|
761
|
+
if (array && literals) {
|
|
762
|
+
throw schemaError(typeFirst,
|
|
763
|
+
`Array-of-literal-union is not supported. Use 'string[]' if you need an array of strings.`);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// The `schema.defaultMaxString` pragma baked into this schema's ctx
|
|
767
|
+
// is a candidate for any VARCHAR-like primitive that isn't already
|
|
768
|
+
// narrowed by a regex or literal-union. The final "fill it in only
|
|
769
|
+
// if max is still absent" decision happens in mergeFieldConstraints
|
|
770
|
+
// so open-ended ranges (`5..` → only min) still get the pragma's max.
|
|
771
|
+
// Using `!= null` (not truthy) keeps future non-positive pragma
|
|
772
|
+
// values valid if more keys land here.
|
|
773
|
+
let defaultMax = null;
|
|
774
|
+
if (ctx?.defaultMaxString != null && !regexToken && !literals &&
|
|
775
|
+
VARCHAR_TYPES.has(typeName)) {
|
|
776
|
+
defaultMax = ctx.defaultMaxString;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
entries.push({
|
|
780
|
+
tag: 'field',
|
|
781
|
+
name,
|
|
782
|
+
modifiers,
|
|
783
|
+
typeName,
|
|
784
|
+
array,
|
|
785
|
+
literals,
|
|
786
|
+
constraintTokens,
|
|
787
|
+
attrsTokens,
|
|
788
|
+
rangeTokens,
|
|
789
|
+
regexToken,
|
|
790
|
+
transformTokens,
|
|
791
|
+
defaultMax,
|
|
792
|
+
loc: first.loc,
|
|
793
|
+
});
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// Scan a constraint part for a top-level `->` (depth-zero arrow). Returns
|
|
797
|
+
// the index of the arrow or -1 if absent. Used to split parts like
|
|
798
|
+
// `8..100 -> transform` without requiring a comma between them.
|
|
799
|
+
function findTopLevelArrowIdx(tokens) {
|
|
800
|
+
let depth = 0;
|
|
801
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
802
|
+
let tag = tokens[i][0];
|
|
803
|
+
if (tag === '(' || tag === '[' || tag === '{' ||
|
|
804
|
+
tag === 'CALL_START' || tag === 'INDEX_START' ||
|
|
805
|
+
tag === 'PARAM_START') depth++;
|
|
806
|
+
else if (tag === ')' || tag === ']' || tag === '}' ||
|
|
807
|
+
tag === 'CALL_END' || tag === 'INDEX_END' ||
|
|
808
|
+
tag === 'PARAM_END') depth--;
|
|
809
|
+
else if (depth === 0 && tag === '->') return i;
|
|
810
|
+
}
|
|
811
|
+
return -1;
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
// Range constraint: `min..max` with optional leading `-` on either
|
|
815
|
+
// endpoint. Either endpoint may be omitted for open-ended ranges —
|
|
816
|
+
// `..N` is "at most N" (no min), `N..` is "at least N" (no max). At
|
|
817
|
+
// least one endpoint must be present; a bare `..` is rejected.
|
|
818
|
+
// Operates on a top-level comma-split part; stripping any surrounding
|
|
819
|
+
// INDENT/OUTDENT is handled by the caller.
|
|
820
|
+
function isRangeConstraintTokens(tokens) {
|
|
821
|
+
let i = 0;
|
|
822
|
+
// Left endpoint (optional).
|
|
823
|
+
let hasLeft = false;
|
|
824
|
+
if (tokens[i]?.[0] === '-' && tokens[i + 1]?.[0] === 'NUMBER') { i += 2; hasLeft = true; }
|
|
825
|
+
else if (tokens[i]?.[0] === 'NUMBER') { i++; hasLeft = true; }
|
|
826
|
+
// Dots.
|
|
827
|
+
if (tokens[i]?.[0] !== '..') return false;
|
|
828
|
+
i++;
|
|
829
|
+
// Right endpoint (optional).
|
|
830
|
+
let hasRight = false;
|
|
831
|
+
if (tokens[i]?.[0] === '-' && tokens[i + 1]?.[0] === 'NUMBER') { i += 2; hasRight = true; }
|
|
832
|
+
else if (tokens[i]?.[0] === 'NUMBER') { i++; hasRight = true; }
|
|
833
|
+
// Need at least one endpoint, and nothing trailing.
|
|
834
|
+
return (hasLeft || hasRight) && i === tokens.length;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
function parseCallableLine(kind, headerTok, line, entries) {
|
|
838
|
+
let name = headerTok[1];
|
|
839
|
+
let colonTok = line[1];
|
|
840
|
+
if (!colonTok || colonTok[0] !== ':') {
|
|
841
|
+
throw schemaError(headerTok,
|
|
842
|
+
`Expected ':' after '${name}' before arrow.`);
|
|
843
|
+
}
|
|
844
|
+
// Three arrow forms:
|
|
845
|
+
// name: -> body — method / hook
|
|
846
|
+
// name: ~> body — lazy computed getter (EFFECT token)
|
|
847
|
+
// name: !> body — eager derived field (UNARY_MATH '!' + COMPARE '>')
|
|
848
|
+
let arrowTok = line[2];
|
|
849
|
+
let nextTok = line[3];
|
|
850
|
+
let arrow, arrowLoc, bodyStart;
|
|
851
|
+
if (arrowTok && arrowTok[0] === '->') {
|
|
852
|
+
arrow = '->';
|
|
853
|
+
arrowLoc = arrowTok.loc;
|
|
854
|
+
bodyStart = 3;
|
|
855
|
+
} else if (arrowTok && arrowTok[0] === 'EFFECT') {
|
|
856
|
+
arrow = '~>';
|
|
857
|
+
arrowLoc = arrowTok.loc;
|
|
858
|
+
bodyStart = 3;
|
|
859
|
+
} else if (arrowTok && arrowTok[0] === 'UNARY_MATH' && arrowTok[1] === '!' &&
|
|
860
|
+
nextTok && nextTok[0] === 'COMPARE' && nextTok[1] === '>' &&
|
|
861
|
+
!arrowTok.spaced) {
|
|
862
|
+
arrow = '!>';
|
|
863
|
+
arrowLoc = arrowTok.loc;
|
|
864
|
+
bodyStart = 4;
|
|
865
|
+
} else {
|
|
866
|
+
throw schemaError(colonTok,
|
|
867
|
+
`Schema top-level '${name}:' must be followed by '->' (method/hook), '~>' (computed getter), or '!>' (eager derived).`);
|
|
868
|
+
}
|
|
869
|
+
let bodyTokens = line.slice(bodyStart);
|
|
870
|
+
let isHook = HOOK_NAMES.has(name);
|
|
871
|
+
let entryTag;
|
|
872
|
+
if (arrow === '~>') {
|
|
873
|
+
entryTag = 'computed';
|
|
874
|
+
} else if (arrow === '!>') {
|
|
875
|
+
entryTag = 'derived';
|
|
876
|
+
} else if (kind === 'model' && isHook) {
|
|
877
|
+
entryTag = 'hook';
|
|
878
|
+
} else {
|
|
879
|
+
entryTag = 'method';
|
|
880
|
+
}
|
|
881
|
+
entries.push({
|
|
882
|
+
tag: entryTag,
|
|
883
|
+
name,
|
|
884
|
+
arrow,
|
|
885
|
+
paramTokens: [],
|
|
886
|
+
bodyTokens,
|
|
887
|
+
headerLoc: headerTok.loc,
|
|
888
|
+
arrowLoc,
|
|
889
|
+
});
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
// Parse `@ensure` arguments into one or more refinement pairs. Accepts two
|
|
893
|
+
// forms:
|
|
894
|
+
//
|
|
895
|
+
// inline: `@ensure "msg", (args) -> body`
|
|
896
|
+
// array: `@ensure [ "msg", (args) -> body
|
|
897
|
+
// , "msg", (args) -> body
|
|
898
|
+
// , ... ]`
|
|
899
|
+
//
|
|
900
|
+
// Both forms compile to the SAME entry shape — each pair becomes one
|
|
901
|
+
// `{tag: "ensure", message, paramTokens, bodyTokens}` entry. Downstream
|
|
902
|
+
// runtime code can't tell the two source forms apart.
|
|
903
|
+
//
|
|
904
|
+
// The directive arrives wrapped in the implicit CALL_START/CALL_END pair
|
|
905
|
+
// because Rip sees `@ensure args...` as a call; we strip that wrapper
|
|
906
|
+
// before looking for the array bracket or the inline string.
|
|
907
|
+
function parseEnsurePairs(argTokens, directiveTok) {
|
|
908
|
+
let tokens = argTokens;
|
|
909
|
+
if (!tokens.length) {
|
|
910
|
+
throw schemaError(directiveTok,
|
|
911
|
+
"@ensure requires 'message, (x) -> body' or '[...]' array of pairs.");
|
|
912
|
+
}
|
|
913
|
+
// Strip implicit call wrapper if present.
|
|
914
|
+
if (tokens[0]?.[0] === 'CALL_START' &&
|
|
915
|
+
tokens[tokens.length - 1]?.[0] === 'CALL_END') {
|
|
916
|
+
tokens = tokens.slice(1, -1);
|
|
917
|
+
}
|
|
918
|
+
if (!tokens.length) {
|
|
919
|
+
throw schemaError(directiveTok,
|
|
920
|
+
"@ensure requires 'message, (x) -> body' or '[...]' array of pairs.");
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
let first = tokens[0];
|
|
924
|
+
// Array form: tokens start with `[` (or INDEX_START).
|
|
925
|
+
if (first[0] === '[' || first[0] === 'INDEX_START') {
|
|
926
|
+
let inner = extractEnsureBracketInner(tokens, first);
|
|
927
|
+
let parts = splitEnsureElements(inner);
|
|
928
|
+
if (parts.length === 0) {
|
|
929
|
+
throw schemaError(first, "@ensure [...] must contain at least one 'message, fn' pair.");
|
|
930
|
+
}
|
|
931
|
+
if (parts.length % 2 !== 0) {
|
|
932
|
+
throw schemaError(first,
|
|
933
|
+
`@ensure [...] must have pairs of 'message, fn' (got ${parts.length} elements; odd count).`);
|
|
934
|
+
}
|
|
935
|
+
let pairs = [];
|
|
936
|
+
for (let i = 0; i < parts.length; i += 2) {
|
|
937
|
+
pairs.push(extractEnsurePair(parts[i], parts[i + 1], first));
|
|
938
|
+
}
|
|
939
|
+
return pairs;
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
// Inline form: STRING, (args) -> body
|
|
943
|
+
let parts = splitTopLevelByComma(tokens);
|
|
944
|
+
if (parts.length < 2) {
|
|
945
|
+
throw schemaError(first,
|
|
946
|
+
"@ensure inline form must be 'message, (x) -> body'. Did you forget the comma?");
|
|
947
|
+
}
|
|
948
|
+
if (parts.length > 2) {
|
|
949
|
+
throw schemaError(first,
|
|
950
|
+
`@ensure inline form takes exactly 'message, fn' (got ${parts.length} comma-separated parts). Use '@ensure [...]' for multiple refinements.`);
|
|
951
|
+
}
|
|
952
|
+
return [extractEnsurePair(parts[0], parts[1], first)];
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
// Walk `[ ... ]` tokens and return the inner slice. Rejects trailing
|
|
956
|
+
// tokens after the close bracket. Strips an outermost INDENT/OUTDENT
|
|
957
|
+
// pair if the bracket body is multi-line (Rip wraps multi-line array
|
|
958
|
+
// contents in one), since @ensure splits pairs at depth 0 and that
|
|
959
|
+
// outer wrap would hide every internal comma/newline.
|
|
960
|
+
function extractEnsureBracketInner(tokens, openTok) {
|
|
961
|
+
let depth = 0;
|
|
962
|
+
let inner = [];
|
|
963
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
964
|
+
let t = tokens[i];
|
|
965
|
+
let tag = t[0];
|
|
966
|
+
if (tag === '[' || tag === 'INDEX_START') {
|
|
967
|
+
depth++;
|
|
968
|
+
if (depth === 1) continue;
|
|
969
|
+
}
|
|
970
|
+
if (tag === ']' || tag === 'INDEX_END') {
|
|
971
|
+
depth--;
|
|
972
|
+
if (depth === 0) {
|
|
973
|
+
if (i < tokens.length - 1) {
|
|
974
|
+
throw schemaError(tokens[i + 1],
|
|
975
|
+
"@ensure [...] must be the only argument — extra tokens after ']'.");
|
|
976
|
+
}
|
|
977
|
+
// Strip outer INDENT/OUTDENT pair if it wraps the whole inner.
|
|
978
|
+
if (inner.length >= 2 &&
|
|
979
|
+
inner[0][0] === 'INDENT' &&
|
|
980
|
+
inner[inner.length - 1][0] === 'OUTDENT') {
|
|
981
|
+
let wd = 0, matched = false;
|
|
982
|
+
for (let k = 0; k < inner.length; k++) {
|
|
983
|
+
if (inner[k][0] === 'INDENT') wd++;
|
|
984
|
+
else if (inner[k][0] === 'OUTDENT') {
|
|
985
|
+
wd--;
|
|
986
|
+
if (wd === 0 && k === inner.length - 1) { matched = true; break; }
|
|
987
|
+
if (wd === 0) break;
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
if (matched) inner = inner.slice(1, -1);
|
|
991
|
+
}
|
|
992
|
+
return inner;
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
if (depth >= 1) inner.push(t);
|
|
996
|
+
}
|
|
997
|
+
throw schemaError(openTok, "@ensure: unclosed '['.");
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// Split an @ensure array body into elements. Mirrors Rip's array-literal
|
|
1001
|
+
// rule: both `,` and newlines (TERMINATOR) are element separators at
|
|
1002
|
+
// depth 0. This lets users write rows without trailing commas:
|
|
1003
|
+
//
|
|
1004
|
+
// @ensure [
|
|
1005
|
+
// "msg1", (u) -> body
|
|
1006
|
+
// "msg2", (u) -> body <-- no comma needed between pairs
|
|
1007
|
+
// ]
|
|
1008
|
+
function splitEnsureElements(tokens) {
|
|
1009
|
+
let parts = [];
|
|
1010
|
+
let cur = [];
|
|
1011
|
+
let depth = 0;
|
|
1012
|
+
for (let t of tokens) {
|
|
1013
|
+
let tag = t[0];
|
|
1014
|
+
if (tag === '(' || tag === '[' || tag === '{' ||
|
|
1015
|
+
tag === 'CALL_START' || tag === 'INDEX_START' ||
|
|
1016
|
+
tag === 'PARAM_START' || tag === 'INDENT') depth++;
|
|
1017
|
+
if (tag === ')' || tag === ']' || tag === '}' ||
|
|
1018
|
+
tag === 'CALL_END' || tag === 'INDEX_END' ||
|
|
1019
|
+
tag === 'PARAM_END' || tag === 'OUTDENT') depth--;
|
|
1020
|
+
if (depth === 0 && (tag === ',' || tag === 'TERMINATOR')) {
|
|
1021
|
+
if (cur.length) { parts.push(cur); cur = []; }
|
|
1022
|
+
continue;
|
|
1023
|
+
}
|
|
1024
|
+
cur.push(t);
|
|
1025
|
+
}
|
|
1026
|
+
if (cur.length) parts.push(cur);
|
|
1027
|
+
return parts;
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
// Extract one refinement pair from `messagePart` and `fnPart` (two token
|
|
1031
|
+
// slices already split by splitTopLevelByComma). Validates shape at parse
|
|
1032
|
+
// time so typos surface with targeted diagnostics instead of runtime
|
|
1033
|
+
// "expected function" noise.
|
|
1034
|
+
function extractEnsurePair(messagePart, fnPart, refTok) {
|
|
1035
|
+
if (!messagePart || !messagePart.length) {
|
|
1036
|
+
throw schemaError(refTok, "@ensure: missing message (expected a string literal).");
|
|
1037
|
+
}
|
|
1038
|
+
if (messagePart.length !== 1 || messagePart[0][0] !== 'STRING') {
|
|
1039
|
+
throw schemaError(messagePart[0] || refTok,
|
|
1040
|
+
"@ensure: each refinement's first element must be a string literal message.");
|
|
1041
|
+
}
|
|
1042
|
+
let msgTok = messagePart[0];
|
|
1043
|
+
let message = JSON.parse(msgTok[1]);
|
|
1044
|
+
|
|
1045
|
+
if (!fnPart || !fnPart.length) {
|
|
1046
|
+
throw schemaError(msgTok, "@ensure: missing function after message.");
|
|
1047
|
+
}
|
|
1048
|
+
// The fn part should open with `(` / PARAM_START and contain `->`. An
|
|
1049
|
+
// `->` with no params (e.g. `-> true`) is rejected — refinements must
|
|
1050
|
+
// declare the object parameter explicitly.
|
|
1051
|
+
let t0 = fnPart[0];
|
|
1052
|
+
if (t0[0] !== '(' && t0[0] !== 'PARAM_START') {
|
|
1053
|
+
throw schemaError(t0,
|
|
1054
|
+
"@ensure: expected '(args) -> body' after the message. Predicates must declare their parameter explicitly — '(u) -> ...'.");
|
|
1055
|
+
}
|
|
1056
|
+
// Walk matching paren to find PARAM_END.
|
|
1057
|
+
let depth = 1;
|
|
1058
|
+
let pos = 1;
|
|
1059
|
+
let paramTokens = [];
|
|
1060
|
+
while (pos < fnPart.length && depth > 0) {
|
|
1061
|
+
let t = fnPart[pos];
|
|
1062
|
+
let tag = t[0];
|
|
1063
|
+
if (tag === '(' || tag === 'PARAM_START') depth++;
|
|
1064
|
+
if (tag === ')' || tag === 'PARAM_END') {
|
|
1065
|
+
depth--;
|
|
1066
|
+
if (depth === 0) { pos++; break; }
|
|
1067
|
+
}
|
|
1068
|
+
paramTokens.push(t);
|
|
1069
|
+
pos++;
|
|
1070
|
+
}
|
|
1071
|
+
if (depth !== 0) {
|
|
1072
|
+
throw schemaError(t0, "@ensure: unclosed '(' in predicate parameters.");
|
|
1073
|
+
}
|
|
1074
|
+
let arrowTok = fnPart[pos];
|
|
1075
|
+
if (!arrowTok || arrowTok[0] !== '->') {
|
|
1076
|
+
throw schemaError(arrowTok || fnPart[pos - 1] || msgTok,
|
|
1077
|
+
"@ensure: expected '->' after predicate parameters.");
|
|
1078
|
+
}
|
|
1079
|
+
let bodyTokens = fnPart.slice(pos + 1);
|
|
1080
|
+
if (!bodyTokens.length) {
|
|
1081
|
+
throw schemaError(arrowTok, "@ensure: predicate function body is empty.");
|
|
1082
|
+
}
|
|
1083
|
+
return { message, paramTokens, bodyTokens, loc: msgTok.loc };
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
// Extract param names from `(u)` or `(u, opts)` token slice. Accepts
|
|
1087
|
+
// plain identifiers only (no destructuring, defaults, or rest args —
|
|
1088
|
+
// refinements don't need that complexity yet).
|
|
1089
|
+
function ensureParamNames(paramTokens, refTok) {
|
|
1090
|
+
if (!paramTokens.length) return [];
|
|
1091
|
+
let parts = splitTopLevelByComma(paramTokens);
|
|
1092
|
+
return parts.map(part => {
|
|
1093
|
+
let pTokens = part.filter(t => t[0] !== 'TERMINATOR');
|
|
1094
|
+
if (pTokens.length !== 1 || pTokens[0][0] !== 'IDENTIFIER') {
|
|
1095
|
+
throw schemaError(pTokens[0] || refTok,
|
|
1096
|
+
"@ensure: predicate parameters must be plain identifiers.");
|
|
1097
|
+
}
|
|
1098
|
+
return pTokens[0][1];
|
|
1099
|
+
});
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
function parseEnumLine(line, entries) {
|
|
1103
|
+
let first = line[0];
|
|
1104
|
+
if (!first) return;
|
|
1105
|
+
// Enum member forms:
|
|
1106
|
+
// :admin bare symbol → maps to name string "admin"
|
|
1107
|
+
// :pending 0 valued symbol → maps "pending" (and 0) to 0
|
|
1108
|
+
//
|
|
1109
|
+
// Values are any literal (number, string, boolean, null, regex).
|
|
1110
|
+
// Mixing bare and valued members in one enum is permitted but
|
|
1111
|
+
// unusual: the Map is heterogeneous when you do it — bare entries
|
|
1112
|
+
// hold name strings, valued entries hold their literal. Keep the
|
|
1113
|
+
// members uniform if that matters for downstream consumers.
|
|
1114
|
+
if (first[0] === '@') {
|
|
1115
|
+
let nameTok = line[1];
|
|
1116
|
+
let dname = nameTok && (nameTok[0] === 'IDENTIFIER' || nameTok[0] === 'PROPERTY')
|
|
1117
|
+
? nameTok[1] : 'directive';
|
|
1118
|
+
throw schemaError(first,
|
|
1119
|
+
`:enum schemas don't accept '@${dname}'. Enums hold only :symbol members. Move the invariant to a :shape or :model that uses this enum as a field type.`);
|
|
1120
|
+
}
|
|
1121
|
+
if (first[0] !== 'SYMBOL') {
|
|
1122
|
+
throw schemaError(first,
|
|
1123
|
+
`Enum member must be a :symbol. Use ':${first[1] ?? 'name'}' for a bare member or ':${first[1] ?? 'name'} value' for a valued one.`);
|
|
1124
|
+
}
|
|
1125
|
+
let name = first[1];
|
|
1126
|
+
let second = line[1];
|
|
1127
|
+
if (!second) {
|
|
1128
|
+
entries.push({ tag: 'enum-member', name, value: undefined, loc: first.loc });
|
|
1129
|
+
return;
|
|
1130
|
+
}
|
|
1131
|
+
if (second[0] === ':') {
|
|
1132
|
+
throw schemaError(second,
|
|
1133
|
+
`Enum member ':${name}' — drop the ':' before the value. Use ':${name} value'.`);
|
|
1134
|
+
}
|
|
1135
|
+
if (line.length > 2) {
|
|
1136
|
+
throw schemaError(line[2],
|
|
1137
|
+
`Extra tokens after enum member ':${name}' value.`);
|
|
1138
|
+
}
|
|
1139
|
+
entries.push({
|
|
1140
|
+
tag: 'enum-member',
|
|
1141
|
+
name,
|
|
1142
|
+
value: literalOf(second),
|
|
1143
|
+
loc: first.loc,
|
|
1144
|
+
});
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// ============================================================================
|
|
1148
|
+
// Codegen — emitSchema
|
|
1149
|
+
// ============================================================================
|
|
1150
|
+
|
|
1151
|
+
function emitSchemaNode(emitter, head, rest, context) {
|
|
1152
|
+
// rest[0] is the SCHEMA_BODY node. The parser metadata bridge wraps the
|
|
1153
|
+
// token value in `new String()` and copies token.data fields onto it, so
|
|
1154
|
+
// the descriptor surfaces as `node.descriptor` here.
|
|
1155
|
+
let node = rest[0];
|
|
1156
|
+
let descriptor = readDescriptor(node);
|
|
1157
|
+
if (!descriptor) {
|
|
1158
|
+
throw new Error('schema: missing descriptor on SCHEMA_BODY token');
|
|
1159
|
+
}
|
|
1160
|
+
emitter.usesSchemas = true;
|
|
1161
|
+
|
|
1162
|
+
// The binding name is threaded through `_schemaName` by emitAssignment
|
|
1163
|
+
// (parallels `_componentName`). When present, we embed it so SchemaError,
|
|
1164
|
+
// generated class name, and debug output all have a stable identity.
|
|
1165
|
+
let schemaName = emitter._schemaName || null;
|
|
1166
|
+
|
|
1167
|
+
let parts = [`kind: ${JSON.stringify(descriptor.kind)}`];
|
|
1168
|
+
if (schemaName) parts.push(`name: ${JSON.stringify(schemaName)}`);
|
|
1169
|
+
parts.push(`entries: [${descriptor.entries.map(e => entryLiteral(emitter, e)).join(', ')}]`);
|
|
1170
|
+
return `__schema({${parts.join(', ')}})`;
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
function readDescriptor(node) {
|
|
1174
|
+
if (node && typeof node === 'object') {
|
|
1175
|
+
if (node.descriptor) return node.descriptor;
|
|
1176
|
+
if (node.data?.descriptor) return node.data.descriptor;
|
|
1177
|
+
}
|
|
1178
|
+
return null;
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
function entryLiteral(emitter, e) {
|
|
1182
|
+
switch (e.tag) {
|
|
1183
|
+
case 'field': {
|
|
1184
|
+
let obj = [
|
|
1185
|
+
`tag: "field"`,
|
|
1186
|
+
`name: ${JSON.stringify(e.name)}`,
|
|
1187
|
+
`modifiers: ${JSON.stringify(e.modifiers)}`,
|
|
1188
|
+
`typeName: ${JSON.stringify(e.typeName)}`,
|
|
1189
|
+
`array: ${e.array ? 'true' : 'false'}`,
|
|
1190
|
+
];
|
|
1191
|
+
if (e.literals) {
|
|
1192
|
+
obj.push(`literals: ${JSON.stringify(e.literals)}`);
|
|
1193
|
+
}
|
|
1194
|
+
let range = e.rangeTokens ? compileRangeTokens(e.rangeTokens, e) : null;
|
|
1195
|
+
let bracket = e.constraintTokens ? compileConstraintsLiteral(e.constraintTokens, e) : null;
|
|
1196
|
+
let regex = e.regexToken ? regexLiteralOf(e.regexToken) : null;
|
|
1197
|
+
let merged = mergeFieldConstraints(range, bracket, regex, e);
|
|
1198
|
+
if (merged) obj.push(`constraints: ${merged}`);
|
|
1199
|
+
if (e.transformTokens) {
|
|
1200
|
+
obj.push(`transform: ${compileTransformFn(emitter, e.transformTokens)}`);
|
|
1201
|
+
}
|
|
1202
|
+
return `{${obj.join(', ')}}`;
|
|
1203
|
+
}
|
|
1204
|
+
case 'directive': {
|
|
1205
|
+
let obj = [`tag: "directive"`, `name: ${JSON.stringify(e.name)}`];
|
|
1206
|
+
let args = compileDirectiveArgsLiteral(e.name, e.argTokens || []);
|
|
1207
|
+
if (args) obj.push(`args: ${args}`);
|
|
1208
|
+
return `{${obj.join(', ')}}`;
|
|
1209
|
+
}
|
|
1210
|
+
case 'ensure': {
|
|
1211
|
+
let fnCode = compileEnsureFn(emitter, e);
|
|
1212
|
+
let obj = [
|
|
1213
|
+
`tag: "ensure"`,
|
|
1214
|
+
`message: ${JSON.stringify(e.message)}`,
|
|
1215
|
+
`fn: ${fnCode}`,
|
|
1216
|
+
];
|
|
1217
|
+
return `{${obj.join(', ')}}`;
|
|
1218
|
+
}
|
|
1219
|
+
case 'computed':
|
|
1220
|
+
case 'method':
|
|
1221
|
+
case 'hook':
|
|
1222
|
+
case 'derived': {
|
|
1223
|
+
let fnCode = compileCallableFn(emitter, e);
|
|
1224
|
+
let obj = [
|
|
1225
|
+
`tag: ${JSON.stringify(e.tag)}`,
|
|
1226
|
+
`name: ${JSON.stringify(e.name)}`,
|
|
1227
|
+
`fn: ${fnCode}`,
|
|
1228
|
+
];
|
|
1229
|
+
return `{${obj.join(', ')}}`;
|
|
1230
|
+
}
|
|
1231
|
+
case 'enum-member': {
|
|
1232
|
+
let obj = [`tag: "enum-member"`, `name: ${JSON.stringify(e.name)}`];
|
|
1233
|
+
if (e.value !== undefined) obj.push(`value: ${JSON.stringify(e.value)}`);
|
|
1234
|
+
return `{${obj.join(', ')}}`;
|
|
1235
|
+
}
|
|
1236
|
+
default:
|
|
1237
|
+
return `{tag: "unknown"}`;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
// Compile a callable body (`-> body` or `~> body`) to a JS `function(...)`
|
|
1242
|
+
// expression with dynamic `this`. Both computed getters and methods are
|
|
1243
|
+
// emitted using the Rip thin-arrow codegen, which naturally produces a
|
|
1244
|
+
// `function() { ... }` (Rip `->` is NOT a JS arrow). This gives us the
|
|
1245
|
+
// right `this` semantics for instance-attached methods and proto getters.
|
|
1246
|
+
function compileCallableFn(emitter, entry) {
|
|
1247
|
+
let bodySexpr = parseBodyTokens(entry.bodyTokens);
|
|
1248
|
+
if (!bodySexpr) {
|
|
1249
|
+
// Empty body — emit a no-op.
|
|
1250
|
+
return `(function() {})`;
|
|
1251
|
+
}
|
|
1252
|
+
// Wrap as a thin-arrow with no params. `emit` in value context produces
|
|
1253
|
+
// a parenthesized function expression.
|
|
1254
|
+
let arrowSexpr = ['->', [], bodySexpr];
|
|
1255
|
+
return emitter.emit(arrowSexpr, 'value');
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
// Compile an inline field transform body (`-> body`). The body receives
|
|
1259
|
+
// the raw input object via Rip's implicit `it` parameter; no explicit
|
|
1260
|
+
// params are emitted. Transform runs on .parse() only, not on hydrate.
|
|
1261
|
+
function compileTransformFn(emitter, bodyTokens) {
|
|
1262
|
+
let bodySexpr = parseBodyTokens(bodyTokens);
|
|
1263
|
+
if (!bodySexpr) {
|
|
1264
|
+
return `(function() { return undefined; })`;
|
|
1265
|
+
}
|
|
1266
|
+
let arrowSexpr = ['->', [], bodySexpr];
|
|
1267
|
+
return emitter.emit(arrowSexpr, 'value');
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
// Compile an `@ensure` predicate — `(args) -> body` — into a thin-arrow
|
|
1271
|
+
// function expression with explicit params. Unlike transforms (which use
|
|
1272
|
+
// implicit `it`), refinements require the parameter to be named so the
|
|
1273
|
+
// contract of "what the predicate sees" is visible at the call site.
|
|
1274
|
+
function compileEnsureFn(emitter, entry) {
|
|
1275
|
+
let bodySexpr = parseBodyTokens(entry.bodyTokens);
|
|
1276
|
+
if (!bodySexpr) {
|
|
1277
|
+
return `(function() { return undefined; })`;
|
|
1278
|
+
}
|
|
1279
|
+
let params = ensureParamNames(entry.paramTokens, entry);
|
|
1280
|
+
let arrowSexpr = ['->', params, bodySexpr];
|
|
1281
|
+
return emitter.emit(arrowSexpr, 'value');
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
// ----------------------------------------------------------------------------
|
|
1285
|
+
// Compile-time constraint + directive argument evaluation
|
|
1286
|
+
// ----------------------------------------------------------------------------
|
|
1287
|
+
//
|
|
1288
|
+
// Constraints are captured as raw token slices during the lexer pass; this
|
|
1289
|
+
// layer evaluates them into a normalized {min?, max?, default?, regex?}
|
|
1290
|
+
// shape shared by runtime validation and DDL emission. Only literal-
|
|
1291
|
+
// deterministic values are accepted — identifiers, calls, and arbitrary
|
|
1292
|
+
// expressions are rejected.
|
|
1293
|
+
//
|
|
1294
|
+
// v2 constraint grammar (each form is self-identifying by token shape):
|
|
1295
|
+
// `min..max` — range: string length / array length / numeric value
|
|
1296
|
+
// `[value]` — default: a single literal payload in brackets
|
|
1297
|
+
// `/regex/` — pattern: bare regex literal, no wrapping brackets
|
|
1298
|
+
// `{key: val}` — attrs: object literal for `unique`, `index`, etc.
|
|
1299
|
+
// `-> body` — transform: terminal, comma-required before arrow
|
|
1300
|
+
// when anything precedes (see parseFieldedLine)
|
|
1301
|
+
//
|
|
1302
|
+
// Pre-v2 multi-element bracket forms (`[n, n]`, `[n, n, n]`, `[/re/]`) are
|
|
1303
|
+
// explicitly rejected with migration diagnostics pointing at the new form.
|
|
1304
|
+
function compileConstraintsLiteral(tokens, fieldEntry) {
|
|
1305
|
+
let inner = tokens.slice(1, -1);
|
|
1306
|
+
let items = splitTopLevelByComma(inner);
|
|
1307
|
+
if (!items.length) return { c: null };
|
|
1308
|
+
|
|
1309
|
+
let values = items.map(part => evalLiteralTokens(part, fieldEntry));
|
|
1310
|
+
|
|
1311
|
+
if (values.length === 1) {
|
|
1312
|
+
let v = values[0];
|
|
1313
|
+
if (v instanceof RegExp) {
|
|
1314
|
+
throw schemaError(tokens[0],
|
|
1315
|
+
`Regex constraints are written bare, not in brackets. Replace '[${v}]' with '${v}'.`);
|
|
1316
|
+
}
|
|
1317
|
+
return { c: { default: v } };
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
if (values.length === 2 && typeof values[0] === 'number' && typeof values[1] === 'number') {
|
|
1321
|
+
throw schemaError(tokens[0],
|
|
1322
|
+
`Size/value ranges use 'min..max' syntax, not brackets. Replace '[${values[0]}, ${values[1]}]' with '${values[0]}..${values[1]}'.`);
|
|
1323
|
+
}
|
|
1324
|
+
if (values.length === 3 && values.every(v => typeof v === 'number')) {
|
|
1325
|
+
throw schemaError(tokens[0],
|
|
1326
|
+
`Range + default is two separate constraints in v2. Replace '[${values[0]}, ${values[1]}, ${values[2]}]' with '${values[0]}..${values[1]}, [${values[2]}]'.`);
|
|
1327
|
+
}
|
|
1328
|
+
throw schemaError(tokens[0],
|
|
1329
|
+
`Constraint bracket takes a single default value in v2. Got ${values.length} elements.`);
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
// Extract a regex literal from a bare REGEX token. The lexer's raw text
|
|
1333
|
+
// includes the surrounding `/.../` plus any flags.
|
|
1334
|
+
function regexLiteralOf(tok) {
|
|
1335
|
+
let raw = tok[1];
|
|
1336
|
+
let m = /^\/((?:\\.|[^\\/])+)\/([a-z]*)$/.exec(raw);
|
|
1337
|
+
if (!m) throw schemaError(tok, `Invalid regex literal ${JSON.stringify(raw)}.`);
|
|
1338
|
+
try {
|
|
1339
|
+
return new RegExp(m[1], m[2]);
|
|
1340
|
+
} catch (e) {
|
|
1341
|
+
throw schemaError(tok, `Invalid regex '${raw}': ${e.message}`);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
// Evaluate a range token slice into {min?, max?}. Caller has already
|
|
1346
|
+
// verified shape via isRangeConstraintTokens. Open-ended forms omit
|
|
1347
|
+
// the corresponding key rather than emitting undefined, so downstream
|
|
1348
|
+
// constraint serialization stays clean.
|
|
1349
|
+
function compileRangeTokens(tokens, fieldEntry) {
|
|
1350
|
+
let i = 0;
|
|
1351
|
+
let readOneAt = () => {
|
|
1352
|
+
let sign = 1;
|
|
1353
|
+
if (tokens[i]?.[0] === '-') { sign = -1; i++; }
|
|
1354
|
+
let numTok = tokens[i++];
|
|
1355
|
+
let v = evalLiteralTokens([numTok], fieldEntry);
|
|
1356
|
+
if (typeof v !== 'number') {
|
|
1357
|
+
throw schemaError(numTok, `Range endpoints must be numeric literals.`);
|
|
1358
|
+
}
|
|
1359
|
+
return sign * v;
|
|
1360
|
+
};
|
|
1361
|
+
let min;
|
|
1362
|
+
if (tokens[i]?.[0] !== '..') min = readOneAt();
|
|
1363
|
+
i++; // consume `..`
|
|
1364
|
+
let max;
|
|
1365
|
+
if (i < tokens.length) max = readOneAt();
|
|
1366
|
+
if (min !== undefined && max !== undefined && min > max) {
|
|
1367
|
+
throw schemaError(tokens[0],
|
|
1368
|
+
`Range '${min}..${max}' is reversed. Write the smaller endpoint first.`);
|
|
1369
|
+
}
|
|
1370
|
+
let out = {};
|
|
1371
|
+
if (min !== undefined) out.min = min;
|
|
1372
|
+
if (max !== undefined) out.max = max;
|
|
1373
|
+
return out;
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
// Merge the optional range, bracket-default, and bare-regex constraints
|
|
1377
|
+
// into a single literal object. Each source contributes disjoint keys
|
|
1378
|
+
// by construction — range sets min/max, bracket sets default, regex
|
|
1379
|
+
// sets regex.
|
|
1380
|
+
function mergeFieldConstraints(range, bracketLiteral, regex, fieldEntry) {
|
|
1381
|
+
let c = (bracketLiteral && bracketLiteral.c) || {};
|
|
1382
|
+
// Track whether this field's range used open-left shorthand (`..N`).
|
|
1383
|
+
// The implicit-min sugar is gated on *syntax* (range omitted its
|
|
1384
|
+
// min) rather than on merged state, so a future sugar that also
|
|
1385
|
+
// writes to c.min can't accidentally trigger the implicit.
|
|
1386
|
+
let openLeftRange = range && range.min === undefined;
|
|
1387
|
+
if (range) {
|
|
1388
|
+
if (range.min !== undefined) c.min = range.min;
|
|
1389
|
+
if (range.max !== undefined) c.max = range.max;
|
|
1390
|
+
// Open-min shorthand (`..N`) with a `!` modifier implies min=1 —
|
|
1391
|
+
// "required and non-empty" is the default reading for required
|
|
1392
|
+
// varchar-like fields. Gated on openLeftRange syntactically so
|
|
1393
|
+
// adding more sugar layers later doesn't trigger this by accident.
|
|
1394
|
+
if (openLeftRange && c.min === undefined && fieldEntry?.modifiers?.includes('!')) {
|
|
1395
|
+
c.min = 1;
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
if (regex) {
|
|
1399
|
+
c.regex = regex;
|
|
1400
|
+
}
|
|
1401
|
+
// File-level `schema.defaultMaxString` pragma fills in max only when
|
|
1402
|
+
// the field didn't narrow the max any other way — parseFieldedLine
|
|
1403
|
+
// suppresses defaultMax on regex / literal-union fields already, so
|
|
1404
|
+
// this last check covers the open-ended `N..` case (min set, max
|
|
1405
|
+
// still unbounded) where the pragma should fill the gap.
|
|
1406
|
+
if (fieldEntry?.defaultMax != null && c.max === undefined) {
|
|
1407
|
+
c.max = fieldEntry.defaultMax;
|
|
1408
|
+
}
|
|
1409
|
+
// Post-merge consistency check. Sugar (`!` implicit min=1) and the
|
|
1410
|
+
// pragma default max can compose with a user-written explicit max to
|
|
1411
|
+
// produce min > max — e.g. `name! ..0` would naively emit
|
|
1412
|
+
// `{min: 1, max: 0}`, a constraint no value can satisfy. The
|
|
1413
|
+
// parse-time reversed-range check only sees syntactically-present
|
|
1414
|
+
// endpoints, so we re-validate here after every sugar has been
|
|
1415
|
+
// applied. Error message names the actual sources so the user can
|
|
1416
|
+
// pinpoint which side to fix.
|
|
1417
|
+
if (c.min !== undefined && c.max !== undefined && c.min > c.max) {
|
|
1418
|
+
let minSrc = (range && range.min !== undefined) ? `range min ${range.min}` : 'implicit min=1 from `!`';
|
|
1419
|
+
let maxSrc = (range && range.max !== undefined)
|
|
1420
|
+
? `range max ${range.max}`
|
|
1421
|
+
: `pragma defaultMaxString=${fieldEntry?.defaultMax}`;
|
|
1422
|
+
throw schemaError({ loc: fieldEntry?.loc },
|
|
1423
|
+
`Field '${fieldEntry?.name}' would have impossible constraints min=${c.min} > max=${c.max} after sugar is applied (${minSrc} vs ${maxSrc}). Write an explicit range or drop the conflicting pragma.`);
|
|
1424
|
+
}
|
|
1425
|
+
if (c.min === undefined && c.max === undefined && c.default === undefined && c.regex === undefined) {
|
|
1426
|
+
return null;
|
|
1427
|
+
}
|
|
1428
|
+
return constraintLiteral(c);
|
|
1429
|
+
}
|
|
1430
|
+
|
|
1431
|
+
function constraintLiteral(c) {
|
|
1432
|
+
let parts = [];
|
|
1433
|
+
if (c.min !== undefined) parts.push(`min: ${serializeLiteral(c.min)}`);
|
|
1434
|
+
if (c.max !== undefined) parts.push(`max: ${serializeLiteral(c.max)}`);
|
|
1435
|
+
if (c.default !== undefined) parts.push(`default: ${serializeLiteral(c.default)}`);
|
|
1436
|
+
if (c.regex !== undefined) parts.push(`regex: ${c.regex.toString()}`);
|
|
1437
|
+
return parts.length ? `{${parts.join(', ')}}` : null;
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
function serializeLiteral(v) {
|
|
1441
|
+
if (v === null) return 'null';
|
|
1442
|
+
if (v === undefined) return 'undefined';
|
|
1443
|
+
if (typeof v === 'string') return JSON.stringify(v);
|
|
1444
|
+
if (typeof v === 'number' || typeof v === 'boolean') return String(v);
|
|
1445
|
+
if (v instanceof RegExp) return v.toString();
|
|
1446
|
+
return JSON.stringify(v);
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
// Compile directive args to a JS literal list or null. Each directive has
|
|
1450
|
+
// its own arg shape — we centralize the parsing here so Layer 2 can rely
|
|
1451
|
+
// on normalized structures.
|
|
1452
|
+
function compileDirectiveArgsLiteral(name, tokens) {
|
|
1453
|
+
// @idStart requires its arg, so validate before the generic empty-bail.
|
|
1454
|
+
if (name === 'idStart' && !tokens.length) {
|
|
1455
|
+
throw schemaError(null,
|
|
1456
|
+
'@idStart requires an integer literal, e.g. @idStart 10001.');
|
|
1457
|
+
}
|
|
1458
|
+
if (!tokens.length) return null;
|
|
1459
|
+
|
|
1460
|
+
// Relation directives: `@belongs_to Org`, `@belongs_to Org?`,
|
|
1461
|
+
// `@has_many Order`, `@has_one Profile`, `@one X`, `@many X`.
|
|
1462
|
+
if (name === 'belongs_to' || name === 'has_many' || name === 'has_one' ||
|
|
1463
|
+
name === 'one' || name === 'many' || name === 'mixin') {
|
|
1464
|
+
let t0 = tokens[0];
|
|
1465
|
+
if (!t0 || (t0[0] !== 'IDENTIFIER' && t0[0] !== 'PROPERTY')) {
|
|
1466
|
+
throw schemaError(t0 || tokens[tokens.length - 1],
|
|
1467
|
+
`@${name} requires a target name.`);
|
|
1468
|
+
}
|
|
1469
|
+
let target = t0[1];
|
|
1470
|
+
// `@belongs_to User?` tokenizes as IDENTIFIER "User" with
|
|
1471
|
+
// data.predicate=true. A trailing `?` in a later token position is
|
|
1472
|
+
// also accepted for robustness.
|
|
1473
|
+
let optional = t0.data?.predicate === true;
|
|
1474
|
+
let pos = 1;
|
|
1475
|
+
if (!optional && tokens[pos]?.[0] === '?') { optional = true; pos++; }
|
|
1476
|
+
let parts = [`target: ${JSON.stringify(target)}`];
|
|
1477
|
+
if (optional) parts.push('optional: true');
|
|
1478
|
+
return `[{${parts.join(', ')}}]`;
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
// `@index field` or `@index [a, b]` or `@index [a, b] #` for unique.
|
|
1482
|
+
if (name === 'index') {
|
|
1483
|
+
let fields = [];
|
|
1484
|
+
let unique = false;
|
|
1485
|
+
let pos = 0;
|
|
1486
|
+
if (tokens[pos]?.[0] === 'IDENTIFIER' || tokens[pos]?.[0] === 'PROPERTY') {
|
|
1487
|
+
fields.push(tokens[pos][1]);
|
|
1488
|
+
pos++;
|
|
1489
|
+
} else if (tokens[pos]?.[0] === '[' || tokens[pos]?.[0] === 'INDEX_START') {
|
|
1490
|
+
let inner = [];
|
|
1491
|
+
let depth = 1;
|
|
1492
|
+
pos++;
|
|
1493
|
+
while (pos < tokens.length && depth > 0) {
|
|
1494
|
+
let t = tokens[pos];
|
|
1495
|
+
if (t[0] === '[' || t[0] === 'INDEX_START') depth++;
|
|
1496
|
+
if (t[0] === ']' || t[0] === 'INDEX_END') {
|
|
1497
|
+
depth--;
|
|
1498
|
+
if (depth === 0) { pos++; break; }
|
|
1499
|
+
}
|
|
1500
|
+
inner.push(t);
|
|
1501
|
+
pos++;
|
|
1502
|
+
}
|
|
1503
|
+
for (let part of splitTopLevelByComma(inner)) {
|
|
1504
|
+
if (part[0] && (part[0][0] === 'IDENTIFIER' || part[0][0] === 'PROPERTY')) {
|
|
1505
|
+
fields.push(part[0][1]);
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
}
|
|
1509
|
+
if (tokens[pos]?.[0] === '#') unique = true;
|
|
1510
|
+
let parts = [`fields: ${JSON.stringify(fields)}`];
|
|
1511
|
+
if (unique) parts.push('unique: true');
|
|
1512
|
+
return `[{${parts.join(', ')}}]`;
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
// @idStart N sets the seed value for the table's auto-id sequence.
|
|
1516
|
+
// Accepts a single integer literal (optionally negative). Consumed by
|
|
1517
|
+
// .toSQL(); models that never call .toSQL() simply ignore it.
|
|
1518
|
+
if (name === 'idStart') {
|
|
1519
|
+
let tok = tokens[0];
|
|
1520
|
+
let sign = 1;
|
|
1521
|
+
let numTok = tok;
|
|
1522
|
+
if (tok && tok[0] === '-' && tokens[1] && tokens[1][0] === 'NUMBER') {
|
|
1523
|
+
sign = -1;
|
|
1524
|
+
numTok = tokens[1];
|
|
1525
|
+
}
|
|
1526
|
+
if (!numTok || numTok[0] !== 'NUMBER') {
|
|
1527
|
+
throw schemaError(tok || tokens[tokens.length - 1],
|
|
1528
|
+
'@idStart requires an integer literal, e.g. @idStart 10001.');
|
|
1529
|
+
}
|
|
1530
|
+
let n = sign * Number(numTok[1]);
|
|
1531
|
+
if (!Number.isInteger(n)) {
|
|
1532
|
+
throw schemaError(numTok,
|
|
1533
|
+
'@idStart requires an integer literal; got ' + numTok[1] + '.');
|
|
1534
|
+
}
|
|
1535
|
+
return '[{value: ' + n + '}]';
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
// Bare flag-like directives (@timestamps, @softDelete) don't take args.
|
|
1539
|
+
// Anything else — capture as raw literal tokens conservatively.
|
|
1540
|
+
return null;
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
// Evaluate a small expression as a literal. Accepts NUMBER, STRING, BOOL,
|
|
1544
|
+
// NULL, UNDEFINED, REGEX, SYMBOL (returns its name string — for enum-member
|
|
1545
|
+
// defaults like `[:draft]`), and unary minus on NUMBER. Anything else throws.
|
|
1546
|
+
function evalLiteralTokens(tokens, fieldEntry) {
|
|
1547
|
+
if (!tokens.length) {
|
|
1548
|
+
throw schemaError(null, 'Empty constraint value.');
|
|
1549
|
+
}
|
|
1550
|
+
let first = tokens[0];
|
|
1551
|
+
let tag = first[0];
|
|
1552
|
+
if (tokens.length === 1) {
|
|
1553
|
+
if (tag === 'NUMBER') return Number(first[1]);
|
|
1554
|
+
if (tag === 'STRING') return JSON.parse(first[1]);
|
|
1555
|
+
if (tag === 'BOOL') return first[1] === 'true';
|
|
1556
|
+
if (tag === 'NULL') return null;
|
|
1557
|
+
if (tag === 'UNDEFINED') return undefined;
|
|
1558
|
+
if (tag === 'REGEX') return parseRegexLiteral(first[1]);
|
|
1559
|
+
if (tag === 'SYMBOL') return first[1];
|
|
1560
|
+
}
|
|
1561
|
+
if (tokens.length === 2 && tag === '-' && tokens[1][0] === 'NUMBER') {
|
|
1562
|
+
return -Number(tokens[1][1]);
|
|
1563
|
+
}
|
|
1564
|
+
// Deterministic but not literal — IDENTIFIER references aren't supported.
|
|
1565
|
+
throw schemaError(first,
|
|
1566
|
+
`Constraint values must be literals (number, string, boolean, null, regex, :symbol). Got ${tag}.`);
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
function parseRegexLiteral(val) {
|
|
1570
|
+
let s = typeof val === 'string' ? val : String(val);
|
|
1571
|
+
let m = s.match(/^\/(.*)\/([gimsuy]*)$/s);
|
|
1572
|
+
return m ? new RegExp(m[1], m[2]) : new RegExp(s);
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
// Run the tail rewriter passes on a captured body token slice, then feed
|
|
1576
|
+
// the result through parser.parse() via a temporary lex adapter. The
|
|
1577
|
+
// returned s-expression is the parsed body — either a single statement or
|
|
1578
|
+
// a block of statements — ready to wrap in `['->', [], body]`.
|
|
1579
|
+
function parseBodyTokens(bodyTokens) {
|
|
1580
|
+
if (!bodyTokens || !bodyTokens.length) return null;
|
|
1581
|
+
|
|
1582
|
+
// The body tokens were captured by rewriteSchema BEFORE rewriteTypes,
|
|
1583
|
+
// tagPostfixConditionals, rewriteTaggedTemplates, addImplicitBracesAndParens,
|
|
1584
|
+
// and addImplicitCallCommas ran. Run those tail passes on a sub-lexer
|
|
1585
|
+
// whose `this.tokens` is the body slice.
|
|
1586
|
+
let LexerCtor = parseBodyTokens._LexerCtor;
|
|
1587
|
+
if (!LexerCtor) {
|
|
1588
|
+
throw new Error('schema: parseBodyTokens called before Lexer was wired');
|
|
1589
|
+
}
|
|
1590
|
+
let sub = Object.create(LexerCtor.prototype);
|
|
1591
|
+
let toks = bodyTokens.slice();
|
|
1592
|
+
// Multi-line callable bodies open with a matched INDENT ... OUTDENT pair
|
|
1593
|
+
// wrapping the statements. parser.parse() expects a Body (list of Lines),
|
|
1594
|
+
// not a leading INDENT, so strip the outer pair when the first INDENT's
|
|
1595
|
+
// matching OUTDENT is the last token.
|
|
1596
|
+
if (toks.length >= 2 && toks[0]?.[0] === 'INDENT') {
|
|
1597
|
+
let depth = 0;
|
|
1598
|
+
let lastOutdent = -1;
|
|
1599
|
+
for (let k = 0; k < toks.length; k++) {
|
|
1600
|
+
if (toks[k][0] === 'INDENT') depth++;
|
|
1601
|
+
else if (toks[k][0] === 'OUTDENT') {
|
|
1602
|
+
depth--;
|
|
1603
|
+
if (depth === 0) { lastOutdent = k; break; }
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
if (lastOutdent === toks.length - 1) {
|
|
1607
|
+
toks = toks.slice(1, -1);
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
sub.tokens = toks;
|
|
1611
|
+
sub.seenFor = sub.seenImport = sub.seenExport = false;
|
|
1612
|
+
sub.ends = [];
|
|
1613
|
+
sub.indent = 0;
|
|
1614
|
+
sub.outdebt = 0;
|
|
1615
|
+
sub.indents = [];
|
|
1616
|
+
// Ensure a terminating TERMINATOR so parser.parse() sees a clean EOF.
|
|
1617
|
+
let lastTag = sub.tokens[sub.tokens.length - 1]?.[0];
|
|
1618
|
+
if (lastTag !== 'TERMINATOR') {
|
|
1619
|
+
sub.tokens.push(mkToken('TERMINATOR', '\n', bodyTokens[bodyTokens.length - 1]));
|
|
1620
|
+
}
|
|
1621
|
+
try {
|
|
1622
|
+
sub.rewriteTypes?.();
|
|
1623
|
+
sub.tagPostfixConditionals?.();
|
|
1624
|
+
sub.rewriteTaggedTemplates?.();
|
|
1625
|
+
sub.addImplicitBracesAndParens?.();
|
|
1626
|
+
sub.addImplicitCallCommas?.();
|
|
1627
|
+
} catch (e) {
|
|
1628
|
+
// If a tail pass throws, surface a clean schema error.
|
|
1629
|
+
throw schemaError(bodyTokens[0], `schema: failed to compile body: ${e.message}`);
|
|
1630
|
+
}
|
|
1631
|
+
let tokens = sub.tokens.filter(t => t[0] !== 'TYPE_DECL');
|
|
1632
|
+
|
|
1633
|
+
// Swap parser.lexer, parse, restore.
|
|
1634
|
+
let savedLexer = parser.lexer;
|
|
1635
|
+
parser.lexer = {
|
|
1636
|
+
tokens, pos: 0,
|
|
1637
|
+
setInput() {},
|
|
1638
|
+
lex() {
|
|
1639
|
+
if (this.pos >= this.tokens.length) return 1;
|
|
1640
|
+
let token = this.tokens[this.pos++];
|
|
1641
|
+
let val = token[1];
|
|
1642
|
+
if (token.data) {
|
|
1643
|
+
val = new String(val);
|
|
1644
|
+
Object.assign(val, token.data);
|
|
1645
|
+
}
|
|
1646
|
+
this.text = val;
|
|
1647
|
+
this.loc = token.loc;
|
|
1648
|
+
this.line = token.loc?.r;
|
|
1649
|
+
return token[0];
|
|
1650
|
+
},
|
|
1651
|
+
};
|
|
1652
|
+
let sexpr;
|
|
1653
|
+
try {
|
|
1654
|
+
sexpr = parser.parse('');
|
|
1655
|
+
} finally {
|
|
1656
|
+
parser.lexer = savedLexer;
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
// sexpr is `['program', ...statements]`. Unwrap to a body we can feed
|
|
1660
|
+
// a thin-arrow AST. One statement → the statement itself. Multiple →
|
|
1661
|
+
// ['block', ...].
|
|
1662
|
+
if (!Array.isArray(sexpr) || sexpr[0] !== 'program') return null;
|
|
1663
|
+
let stmts = sexpr.slice(1);
|
|
1664
|
+
if (stmts.length === 0) return null;
|
|
1665
|
+
if (stmts.length === 1) return stmts[0];
|
|
1666
|
+
return ['block', ...stmts];
|
|
1667
|
+
}
|
|
1668
|
+
|
|
1669
|
+
// ============================================================================
|
|
1670
|
+
// Helpers
|
|
1671
|
+
// ============================================================================
|
|
1672
|
+
|
|
1673
|
+
function collectModifiers(identToken) {
|
|
1674
|
+
let mods = [];
|
|
1675
|
+
let d = identToken.data;
|
|
1676
|
+
if (d?.await === true) mods.push('!');
|
|
1677
|
+
if (d?.predicate === true) mods.push('?');
|
|
1678
|
+
return mods;
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
function findMatchingOutdent(tokens, indentIdx) {
|
|
1682
|
+
let depth = 0;
|
|
1683
|
+
for (let j = indentIdx; j < tokens.length; j++) {
|
|
1684
|
+
if (tokens[j][0] === 'INDENT') depth++;
|
|
1685
|
+
else if (tokens[j][0] === 'OUTDENT') {
|
|
1686
|
+
depth--;
|
|
1687
|
+
if (depth === 0) return j;
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
return -1;
|
|
1691
|
+
}
|
|
1692
|
+
|
|
1693
|
+
function splitTopLevelByComma(tokens) {
|
|
1694
|
+
let parts = [];
|
|
1695
|
+
let cur = [];
|
|
1696
|
+
let depth = 0;
|
|
1697
|
+
for (let t of tokens) {
|
|
1698
|
+
let tag = t[0];
|
|
1699
|
+
if (tag === '(' || tag === '[' || tag === '{' ||
|
|
1700
|
+
tag === 'CALL_START' || tag === 'INDEX_START' ||
|
|
1701
|
+
tag === 'PARAM_START' || tag === 'INDENT') depth++;
|
|
1702
|
+
if (tag === ')' || tag === ']' || tag === '}' ||
|
|
1703
|
+
tag === 'CALL_END' || tag === 'INDEX_END' ||
|
|
1704
|
+
tag === 'PARAM_END' || tag === 'OUTDENT') depth--;
|
|
1705
|
+
if (tag === ',' && depth === 0) {
|
|
1706
|
+
if (cur.length) parts.push(cur);
|
|
1707
|
+
cur = [];
|
|
1708
|
+
continue;
|
|
1709
|
+
}
|
|
1710
|
+
cur.push(t);
|
|
1711
|
+
}
|
|
1712
|
+
if (cur.length) parts.push(cur);
|
|
1713
|
+
return parts;
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
function literalOf(tok) {
|
|
1717
|
+
let tag = tok[0], val = tok[1];
|
|
1718
|
+
if (tag === 'NUMBER') return Number(val);
|
|
1719
|
+
if (tag === 'STRING') return JSON.parse(val);
|
|
1720
|
+
if (tag === 'BOOL') return val === 'true';
|
|
1721
|
+
if (tag === 'NULL') return null;
|
|
1722
|
+
if (tag === 'UNDEFINED') return undefined;
|
|
1723
|
+
return val;
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
function mkToken(tag, value, origin) {
|
|
1727
|
+
let t = [tag, value];
|
|
1728
|
+
t.pre = 0;
|
|
1729
|
+
t.data = null;
|
|
1730
|
+
t.loc = origin?.loc ?? { r: 0, c: 0, n: 0 };
|
|
1731
|
+
t.spaced = false;
|
|
1732
|
+
t.newLine = false;
|
|
1733
|
+
t.generated = true;
|
|
1734
|
+
if (origin) t.origin = origin;
|
|
1735
|
+
return t;
|
|
1736
|
+
}
|
|
1737
|
+
|
|
1738
|
+
function schemaError(tok, message) {
|
|
1739
|
+
let loc = tok?.loc || { r: 0, c: 0 };
|
|
1740
|
+
let err = new Error(message);
|
|
1741
|
+
err.name = 'SchemaSyntaxError';
|
|
1742
|
+
err.loc = loc;
|
|
1743
|
+
err.line = loc.r;
|
|
1744
|
+
err.column = loc.c;
|
|
1745
|
+
err.phase = 'schema';
|
|
1746
|
+
err.code = 'E_SCHEMA';
|
|
1747
|
+
return err;
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
// ============================================================================
|
|
1751
|
+
// Runtime — injected into compiled output when the source uses `schema`
|
|
1752
|
+
// ============================================================================
|
|
1753
|
+
//
|
|
1754
|
+
// Four-layer architecture (D22):
|
|
1755
|
+
// Layer 1 — Descriptor: the object passed to `__schema({...})`. Raw
|
|
1756
|
+
// metadata from compiler, plus real functions for callables.
|
|
1757
|
+
// Layer 2 — Normalized: fields map / methods map / computed map / hooks
|
|
1758
|
+
// map / directives / enum members. Built lazily on first
|
|
1759
|
+
// downstream need. Collision and kind-legality checks live
|
|
1760
|
+
// here (Phase 4 tightens them).
|
|
1761
|
+
// Layer 3 — Validator plan: compiled validator tree. Built on first
|
|
1762
|
+
// `.parse` / `.safe` / `.ok`.
|
|
1763
|
+
// Layer 4 — ORM plan (Phase 4) and DDL plan (Phase 4) — not in Phase 3.
|
|
1764
|
+
//
|
|
1765
|
+
// Public API per kind (v1):
|
|
1766
|
+
// .parse(data) throws SchemaError on failure, returns value
|
|
1767
|
+
// .safe(data) {ok: true, value, errors: null} | {ok: false, value: null, errors: [...]}
|
|
1768
|
+
// .ok(data) boolean, fast path (no allocation)
|
|
1769
|
+
//
|
|
1770
|
+
// Result `value` shape:
|
|
1771
|
+
// :shape — generated class instance (fields enumerable own props,
|
|
1772
|
+
// methods non-enumerable prototype fns, computed non-enumerable
|
|
1773
|
+
// prototype getters)
|
|
1774
|
+
// :input — plain object (same class-instance plumbing; Phase 3 treats
|
|
1775
|
+
// :input like :shape sans methods for consistency)
|
|
1776
|
+
// :enum — the member value (or name when the enum is bare)
|
|
1777
|
+
// :mixin — non-instantiable; raises `Cannot parse :mixin`
|
|
1778
|
+
// :model — Phase 4 (the class additionally wires ORM methods)
|
|
1779
|
+
|
|
1780
|
+
// =============================================================================
|
|
1781
|
+
// Runtime composition (delegated to registered provider)
|
|
1782
|
+
// =============================================================================
|
|
1783
|
+
// Mode matrix:
|
|
1784
|
+
//
|
|
1785
|
+
// validate = VALIDATE (pure)
|
|
1786
|
+
// browser = VALIDATE + BROWSER_STUBS (browser bundle)
|
|
1787
|
+
// server = VALIDATE + DB_NAMING + ORM (server runtime)
|
|
1788
|
+
// migration = VALIDATE + DB_NAMING + ORM + DDL (migration tool)
|
|
1789
|
+
//
|
|
1790
|
+
// The actual fragment imports + composition live in the loader files so
|
|
1791
|
+
// only the fragments needed by a given entry are bundled. Browser bundles
|
|
1792
|
+
// import loader-browser.js (validate + browser-stubs only); CLI / server
|
|
1793
|
+
// imports loader-server.js (all five fragments).
|
|
1794
|
+
|
|
1795
|
+
export function getSchemaRuntime(opts = {}) {
|
|
1796
|
+
if (!_schemaRuntimeProvider) {
|
|
1797
|
+
throw new Error(
|
|
1798
|
+
"schema runtime provider not registered. Side-effect-import either " +
|
|
1799
|
+
"'./schema/loader-server.js' (CLI / server / tests) or " +
|
|
1800
|
+
"'./schema/loader-browser.js' (browser bundle) before calling " +
|
|
1801
|
+
"any compileToJS that emits schemas."
|
|
1802
|
+
);
|
|
1803
|
+
}
|
|
1804
|
+
return _schemaRuntimeProvider(opts);
|
|
1805
|
+
}
|