shapecraft 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/CLAUDE.md +227 -0
  2. package/README.md +22 -0
  3. package/apps/cli/node_modules/.bin/prettier +21 -0
  4. package/apps/cli/node_modules/.bin/tsc +21 -0
  5. package/apps/cli/node_modules/.bin/tsserver +21 -0
  6. package/apps/cli/node_modules/.bin/tsx +21 -0
  7. package/apps/cli/node_modules/.bin/vitest +21 -0
  8. package/apps/cli/package.json +47 -0
  9. package/apps/cli/src/index.ts +98 -0
  10. package/apps/cli/tsconfig.cjs.json +10 -0
  11. package/apps/cli/tsconfig.esm.json +10 -0
  12. package/apps/cli/tsconfig.json +22 -0
  13. package/package.json +16 -0
  14. package/packages/core/node_modules/.bin/prettier +21 -0
  15. package/packages/core/node_modules/.bin/tsc +21 -0
  16. package/packages/core/node_modules/.bin/tsserver +21 -0
  17. package/packages/core/node_modules/.bin/tsx +21 -0
  18. package/packages/core/node_modules/.bin/vitest +21 -0
  19. package/packages/core/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  20. package/packages/core/package.json +44 -0
  21. package/packages/core/src/common/array.test.ts +19 -0
  22. package/packages/core/src/common/array.ts +15 -0
  23. package/packages/core/src/common/index.ts +5 -0
  24. package/packages/core/src/common/is.ts +23 -0
  25. package/packages/core/src/common/object.ts +35 -0
  26. package/packages/core/src/common/phantom.ts +1 -0
  27. package/packages/core/src/common/result.ts +43 -0
  28. package/packages/core/src/common/string.ts +28 -0
  29. package/packages/core/src/common/types.ts +34 -0
  30. package/packages/core/src/index.ts +1 -0
  31. package/packages/core/src/shape/annotate.ts +139 -0
  32. package/packages/core/src/shape/annotation.ts +47 -0
  33. package/packages/core/src/shape/base.ts +71 -0
  34. package/packages/core/src/shape/builder.test.ts +728 -0
  35. package/packages/core/src/shape/builder.ts +475 -0
  36. package/packages/core/src/shape/error.ts +4 -0
  37. package/packages/core/src/shape/index.ts +3 -0
  38. package/packages/core/src/shape/number.ts +118 -0
  39. package/packages/core/src/shape/shape.test.ts +792 -0
  40. package/packages/core/src/shape/shape.ts +377 -0
  41. package/packages/core/src/shape/tags.ts +14 -0
  42. package/packages/core/src/shape/transforms/index.ts +3 -0
  43. package/packages/core/src/shape/transforms/json-schema/index.ts +2 -0
  44. package/packages/core/src/shape/transforms/json-schema/transform.test.ts +850 -0
  45. package/packages/core/src/shape/transforms/json-schema/transform.ts +882 -0
  46. package/packages/core/src/shape/transforms/json-schema/types.ts +132 -0
  47. package/packages/core/src/shape/transforms/sql/dialects/dialect.ts +89 -0
  48. package/packages/core/src/shape/transforms/sql/dialects/index.ts +14 -0
  49. package/packages/core/src/shape/transforms/sql/dialects/postgres.ts +392 -0
  50. package/packages/core/src/shape/transforms/sql/dialects/sqlite.ts +333 -0
  51. package/packages/core/src/shape/transforms/sql/from-sql.test.ts +704 -0
  52. package/packages/core/src/shape/transforms/sql/from-sql.ts +210 -0
  53. package/packages/core/src/shape/transforms/sql/index.ts +3 -0
  54. package/packages/core/src/shape/transforms/sql/options.ts +6 -0
  55. package/packages/core/src/shape/transforms/sql/parser/check-decoder.ts +457 -0
  56. package/packages/core/src/shape/transforms/sql/parser/create-domain.ts +105 -0
  57. package/packages/core/src/shape/transforms/sql/parser/create-table.ts +809 -0
  58. package/packages/core/src/shape/transforms/sql/parser/create-type.ts +91 -0
  59. package/packages/core/src/shape/transforms/sql/parser/cursor.ts +179 -0
  60. package/packages/core/src/shape/transforms/sql/parser/default-decoder.ts +129 -0
  61. package/packages/core/src/shape/transforms/sql/parser/lexer.ts +289 -0
  62. package/packages/core/src/shape/transforms/sql/parser/pg-types.ts +247 -0
  63. package/packages/core/src/shape/transforms/sql/parser/sqlite-types.ts +103 -0
  64. package/packages/core/src/shape/transforms/sql/parser/statements.ts +127 -0
  65. package/packages/core/src/shape/transforms/sql/parser/type-spec.ts +159 -0
  66. package/packages/core/src/shape/transforms/sql/transform.sqlite.test.ts +448 -0
  67. package/packages/core/src/shape/transforms/sql/transform.test.ts +880 -0
  68. package/packages/core/src/shape/transforms/sql/transform.ts +295 -0
  69. package/packages/core/src/shape/transforms/typescript/index.ts +1 -0
  70. package/packages/core/src/shape/transforms/typescript/transform.ts +211 -0
  71. package/packages/core/src/shape/tuple.test.ts +171 -0
  72. package/packages/core/src/shape/validate.ts +413 -0
  73. package/packages/core/tsconfig.cjs.json +11 -0
  74. package/packages/core/tsconfig.esm.json +10 -0
  75. package/packages/core/tsconfig.json +23 -0
  76. package/packages/samples/node_modules/.bin/prettier +21 -0
  77. package/packages/samples/node_modules/.bin/tsc +21 -0
  78. package/packages/samples/node_modules/.bin/tsserver +21 -0
  79. package/packages/samples/node_modules/.bin/tsx +21 -0
  80. package/packages/samples/node_modules/.bin/vitest +21 -0
  81. package/packages/samples/package.json +47 -0
  82. package/packages/samples/src/blog.ts +49 -0
  83. package/packages/samples/src/config.ts +50 -0
  84. package/packages/samples/src/ecommerce.ts +65 -0
  85. package/packages/samples/src/embeddings.ts +43 -0
  86. package/packages/samples/src/events.ts +52 -0
  87. package/packages/samples/src/geometry.ts +62 -0
  88. package/packages/samples/src/index.ts +9 -0
  89. package/packages/samples/src/relational.ts +17 -0
  90. package/packages/samples/src/tuples.ts +67 -0
  91. package/packages/samples/src/user.ts +9 -0
  92. package/packages/samples/tsconfig.cjs.json +11 -0
  93. package/packages/samples/tsconfig.esm.json +10 -0
  94. package/packages/samples/tsconfig.json +23 -0
  95. package/pnpm-workspace.yaml +3 -0
  96. package/test-data/json-schema/address.json +35 -0
  97. package/test-data/json-schema/array-of-things.json +36 -0
  98. package/test-data/json-schema/basic.json +21 -0
  99. package/test-data/json-schema/blog-post.json +29 -0
  100. package/test-data/json-schema/calendar.json +48 -0
  101. package/test-data/json-schema/complex-object-with-nested-properties.json +41 -0
  102. package/test-data/json-schema/ecommerce-complex.json +344 -0
  103. package/test-data/json-schema/ecommerce-system.json +27 -0
  104. package/test-data/json-schema/enumerated-values.json +11 -0
  105. package/test-data/json-schema/fstab-entry.json +92 -0
  106. package/test-data/json-schema/geographical-location.json +20 -0
  107. package/test-data/json-schema/health-record.json +41 -0
  108. package/test-data/json-schema/job-posting.json +33 -0
  109. package/test-data/json-schema/movie.json +35 -0
  110. package/test-data/json-schema/regular-expression-pattern.json +12 -0
  111. package/test-data/json-schema/user-profile.json +33 -0
  112. package/test-data/sql/ecommerce.sql +641 -0
@@ -0,0 +1,210 @@
1
+ import { R } from "../../../common/result";
2
+ import { annotate, shapes } from "../../shape";
3
+ import { resolveDialect, SQLOptions } from "./options";
4
+ import { decodeCheck } from "./parser/check-decoder";
5
+ import { parseCreateDomain } from "./parser/create-domain";
6
+ import { parseCreateTable, ParsedTable } from "./parser/create-table";
7
+ import { parseCreateType } from "./parser/create-type";
8
+ import { decodeDefault } from "./parser/default-decoder";
9
+ import { LexError, tokenize } from "./parser/lexer";
10
+ import {
11
+ parsePgType,
12
+ Registries,
13
+ resolvePgType,
14
+ ResolvedDomain,
15
+ } from "./parser/pg-types";
16
+ import { classifyStatement, splitStatements } from "./parser/statements";
17
+ import { ParseError } from "./parser/cursor";
18
+
19
+ const newRegistries = (): Registries => ({
20
+ enums: new Map(),
21
+ composites: new Map(),
22
+ domains: new Map(),
23
+ });
24
+
25
+ const registerDomain = (
26
+ registries: Registries,
27
+ raw: ReturnType<typeof parseCreateDomain>,
28
+ ): void => {
29
+ const baseSpec = parsePgType(raw.baseTypeTokens);
30
+ const resolved = resolvePgType(baseSpec, registries);
31
+ let shape = resolved.shape;
32
+ for (const chkTokens of raw.checks) {
33
+ const dec = decodeCheck(chkTokens, {
34
+ columnName: "value",
35
+ isInteger: shape.type === "number",
36
+ isString: shape.type === "string",
37
+ isArray: shape.type === "array",
38
+ });
39
+ if (dec.fullyDecoded) {
40
+ const u = dec.updates;
41
+ if (u.min !== undefined) shape = annotate.as(shape, { min: u.min });
42
+ if (u.max !== undefined) shape = annotate.as(shape, { max: u.max });
43
+ if (u.pattern !== undefined)
44
+ shape = annotate.as(shape, { pattern: u.pattern });
45
+ }
46
+ // undecoded domain checks are dropped — they tend to reference VALUE
47
+ // rather than a specific column, so the column-oriented check-decoder
48
+ // wouldn't be able to round-trip them onto a column anyway.
49
+ }
50
+ // Add domain-name format hint
51
+ const qualified =
52
+ raw.schema !== undefined ? `${raw.schema}.${raw.name}` : raw.name;
53
+ shape = annotate.as(shape, { format: `pgtype:${qualified}` });
54
+ if (raw.defaultTokens !== undefined) {
55
+ const dec = decodeDefault(raw.defaultTokens, null);
56
+ if (dec.kind === "literal") {
57
+ shape = annotate.as(shape, { default: dec.value });
58
+ } else if (dec.sql.length > 0) {
59
+ shape = annotate.as(shape, { defaultExpr: dec.sql });
60
+ }
61
+ }
62
+ if (raw.notNull) {
63
+ // domain says NOT NULL; column nullability is still column-level
64
+ // (the column's NOT NULL applies). We leave the domain's shape alone.
65
+ }
66
+ const def: ResolvedDomain = { name: raw.name, shape };
67
+ if (raw.schema !== undefined) def.schema = raw.schema;
68
+ registries.domains.set(qualified, def);
69
+ registries.domains.set(raw.name, def);
70
+ };
71
+
72
+ const assemble = (
73
+ parsedTables: ParsedTable[],
74
+ ): R.Result<shapes.Shape, string[]> => {
75
+ if (parsedTables.length === 0) {
76
+ return R.err(["no CREATE TABLE statements found"]);
77
+ }
78
+
79
+ // For a single table return the mapping directly
80
+ if (parsedTables.length === 1) {
81
+ const t = parsedTables[0]!;
82
+ return R.ok(makeMapping(t));
83
+ }
84
+
85
+ // For multiple tables, build a module
86
+ const tables: Record<string, shapes.ShapeMapping> = {};
87
+ const usedKeys = new Set<string>();
88
+ for (const t of parsedTables) {
89
+ let key = t.name;
90
+ if (usedKeys.has(key)) {
91
+ key = t.schema !== undefined ? `${t.schema}_${t.name}` : `${t.name}_2`;
92
+ // ensure uniqueness even with the prefix
93
+ let suffix = 2;
94
+ let cand = key;
95
+ while (usedKeys.has(cand)) {
96
+ cand = `${key}_${suffix}`;
97
+ suffix += 1;
98
+ }
99
+ key = cand;
100
+ }
101
+ usedKeys.add(key);
102
+ tables[key] = makeMapping(t);
103
+ }
104
+ return R.ok(shapes.module(tables));
105
+ };
106
+
107
+ const makeMapping = (t: ParsedTable): shapes.ShapeMapping => {
108
+ const rec: Record<string, shapes.Shape> = {};
109
+ for (const c of t.columns) rec[c.key] = c.shape;
110
+ let m: shapes.ShapeMapping = shapes.mapping(rec);
111
+ m = annotate.titled(m, t.mappingAnno.title) as shapes.ShapeMapping;
112
+ if (t.mappingAnno.schema !== undefined) {
113
+ m = annotate.as(m, { schema: t.mappingAnno.schema }) as shapes.ShapeMapping;
114
+ }
115
+ if (t.mappingAnno.check !== undefined) {
116
+ m = annotate.as(m, { check: t.mappingAnno.check }) as shapes.ShapeMapping;
117
+ }
118
+ if (t.mappingAnno.description !== undefined) {
119
+ m = annotate.described(m, t.mappingAnno.description) as shapes.ShapeMapping;
120
+ }
121
+ if (t.uniqueConstraints !== undefined && t.uniqueConstraints.length > 0) {
122
+ m = annotate.as(m, {
123
+ uniqueConstraints: t.uniqueConstraints,
124
+ }) as shapes.ShapeMapping;
125
+ }
126
+ return m;
127
+ };
128
+
129
+ export const fromSQL = (
130
+ sql: string,
131
+ opts?: SQLOptions,
132
+ ): R.Result<shapes.Shape, string[]> => {
133
+ const dialect = resolveDialect(opts);
134
+ let tokens;
135
+ try {
136
+ tokens = tokenize(sql);
137
+ } catch (e) {
138
+ if (e instanceof LexError) return R.err([`lex error: ${e.message}`]);
139
+ return R.err([`lex error: ${String(e)}`]);
140
+ }
141
+
142
+ const stmts = splitStatements(tokens).map(classifyStatement);
143
+ const registries = newRegistries();
144
+ const tableStmts: typeof stmts = [];
145
+ const errors: string[] = [];
146
+
147
+ // First pass: types and domains
148
+ for (const s of stmts) {
149
+ if (s.kind === "createType") {
150
+ try {
151
+ const parsed = parseCreateType(s.tokens);
152
+ if (parsed === null) continue;
153
+ if (parsed.kind === "enum") {
154
+ const def = parsed.def;
155
+ const qualified =
156
+ def.schema !== undefined ? `${def.schema}.${def.name}` : def.name;
157
+ registries.enums.set(qualified, def);
158
+ registries.enums.set(def.name, def);
159
+ } else {
160
+ const def = parsed.def;
161
+ const qualified =
162
+ def.schema !== undefined ? `${def.schema}.${def.name}` : def.name;
163
+ registries.composites.set(qualified, def);
164
+ registries.composites.set(def.name, def);
165
+ }
166
+ } catch (e) {
167
+ errors.push(formatParserError(e, "CREATE TYPE"));
168
+ }
169
+ continue;
170
+ }
171
+ if (s.kind === "createDomain") {
172
+ try {
173
+ const parsed = parseCreateDomain(s.tokens);
174
+ registerDomain(registries, parsed);
175
+ } catch (e) {
176
+ errors.push(formatParserError(e, "CREATE DOMAIN"));
177
+ }
178
+ continue;
179
+ }
180
+ if (s.kind === "createTable") {
181
+ tableStmts.push(s);
182
+ continue;
183
+ }
184
+ // createTablePartitionOf and skip — ignored
185
+ }
186
+
187
+ // Second pass: tables
188
+ const parsedTables: ParsedTable[] = [];
189
+ for (const s of tableStmts) {
190
+ try {
191
+ const parsed = parseCreateTable(s.tokens, { registries, dialect });
192
+ parsedTables.push(parsed);
193
+ } catch (e) {
194
+ errors.push(formatParserError(e, "CREATE TABLE"));
195
+ }
196
+ }
197
+
198
+ if (errors.length > 0 && parsedTables.length === 0) {
199
+ return R.err(errors);
200
+ }
201
+
202
+ return assemble(parsedTables);
203
+ };
204
+
205
+ const formatParserError = (e: unknown, ctx: string): string => {
206
+ if (e instanceof ParseError) return `${ctx}: ${e.message}`;
207
+ if (e instanceof LexError) return `${ctx}: ${e.message}`;
208
+ if (e instanceof Error) return `${ctx}: ${e.message}`;
209
+ return `${ctx}: ${String(e)}`;
210
+ };
@@ -0,0 +1,3 @@
1
+ export * from "./transform";
2
+ export * from "./from-sql";
3
+ export * from "./options";
@@ -0,0 +1,6 @@
1
+ export type SQLDialect = "postgres" | "sqlite";
2
+
3
+ export type SQLOptions = { dialect?: SQLDialect };
4
+
5
+ export const resolveDialect = (opts?: SQLOptions): SQLDialect =>
6
+ opts?.dialect ?? "postgres";
@@ -0,0 +1,457 @@
1
+ import { Token } from "./lexer";
2
+
3
+ export type CheckUpdate = {
4
+ min?: number;
5
+ max?: number;
6
+ pattern?: string;
7
+ uniqueItems?: boolean;
8
+ unionLiterals?: string[];
9
+ // SQLite-specific hints — interpreted by the column-meta applier
10
+ isBooleanInt?: boolean; // `col IN (0, 1)` on an integer column
11
+ isJsonValid?: boolean; // `json_valid(col)` (column is JSON-encoded)
12
+ tupleLength?: number; // `json_array_length(col) = N` together with isJsonValid
13
+ };
14
+
15
+ export type DecodedCheck = {
16
+ updates: CheckUpdate;
17
+ // True if the matcher consumed the whole expression. False means we couldn't
18
+ // fully decode and the caller should keep the raw text on `anno.check`.
19
+ fullyDecoded: boolean;
20
+ };
21
+
22
+ // Re-emit a token stream back to readable SQL. Used when we couldn't decode
23
+ // and want to preserve the raw expression on `anno.check`.
24
+ export const checkExprToSql = (tokens: Token[]): string => {
25
+ const parts: string[] = [];
26
+ let prev: Token | undefined;
27
+ for (const t of tokens) {
28
+ let s: string;
29
+ if (t.kind === "string") s = `'${t.value.replace(/'/g, "''")}'`;
30
+ else if (t.kind === "ident" && t.quoted)
31
+ s = `"${t.value.replace(/"/g, '""')}"`;
32
+ else s = t.value;
33
+ const needsSpace =
34
+ prev !== undefined &&
35
+ !(t.kind === "punct" && (t.value === "," || t.value === ")")) &&
36
+ !(prev.kind === "punct" && prev.value === "(") &&
37
+ !(t.kind === "punct" && t.value === "(" && prev.kind === "ident") &&
38
+ !(prev.kind === "punct" && prev.value === ".") &&
39
+ !(t.kind === "punct" && t.value === ".");
40
+ parts.push(needsSpace ? " " + s : s);
41
+ prev = t;
42
+ }
43
+ return parts.join("");
44
+ };
45
+
46
+ const isColRef = (t: Token | undefined, col: string): boolean =>
47
+ t !== undefined &&
48
+ t.kind === "ident" &&
49
+ ((!t.quoted && t.value === col.toLowerCase()) ||
50
+ (t.quoted === true && t.value === col));
51
+
52
+ const numLit = (t: Token | undefined): number | null => {
53
+ if (t === undefined) return null;
54
+ if (t.kind === "number") {
55
+ const n = Number(t.value);
56
+ if (isFinite(n)) return n;
57
+ return null;
58
+ }
59
+ return null;
60
+ };
61
+
62
+ // Try to extract a numeric literal possibly with a leading sign
63
+ const numLitWithSign = (
64
+ tokens: Token[],
65
+ start: number,
66
+ ): { value: number; consumed: number } | null => {
67
+ const t = tokens[start];
68
+ if (t === undefined) return null;
69
+ if (
70
+ t.kind === "op" &&
71
+ (t.value === "-" || t.value === "+") &&
72
+ tokens[start + 1]?.kind === "number"
73
+ ) {
74
+ const n = Number(`${t.value}${tokens[start + 1]!.value}`);
75
+ if (isFinite(n)) return { value: n, consumed: 2 };
76
+ return null;
77
+ }
78
+ const n = numLit(t);
79
+ if (n !== null) return { value: n, consumed: 1 };
80
+ return null;
81
+ };
82
+
83
+ // Strip wrapping parens until the body no longer starts with them
84
+ const stripParens = (tokens: Token[]): Token[] => {
85
+ let body = tokens;
86
+ while (
87
+ body.length >= 2 &&
88
+ body[0]?.kind === "punct" &&
89
+ body[0]?.value === "(" &&
90
+ body[body.length - 1]?.kind === "punct" &&
91
+ body[body.length - 1]?.value === ")"
92
+ ) {
93
+ // ensure these are matched
94
+ let depth = 0;
95
+ let balanced = true;
96
+ for (let i = 0; i < body.length; i += 1) {
97
+ const t = body[i]!;
98
+ if (t.kind === "punct" && t.value === "(") depth += 1;
99
+ else if (t.kind === "punct" && t.value === ")") {
100
+ depth -= 1;
101
+ if (depth === 0 && i !== body.length - 1) {
102
+ balanced = false;
103
+ break;
104
+ }
105
+ }
106
+ }
107
+ if (!balanced) break;
108
+ body = body.slice(1, -1);
109
+ }
110
+ return body;
111
+ };
112
+
113
+ type CheckCtx = {
114
+ columnName: string;
115
+ // Hint for how to interpret bounds (e.g. integer columns can decode `> N` → min=N+1)
116
+ isInteger: boolean;
117
+ // Whether the column is a string-typed column eligible for `IN ('...')` → union conversion
118
+ isString: boolean;
119
+ // Whether the column is an array-typed column eligible for cardinality/uniqueItems
120
+ isArray: boolean;
121
+ };
122
+
123
+ // Some pg checks wrap a real predicate in `<col> IS NULL OR <real>`. That
124
+ // pattern lets a CHECK on a nullable column not fail when the value is NULL.
125
+ // We peel that off and decode the real predicate.
126
+ const peelIsNullOr = (tokens: Token[], ctx: CheckCtx): Token[] | null => {
127
+ // Look for: <col> IS NULL OR <rest>
128
+ if (tokens.length < 5) return null;
129
+ if (!isColRef(tokens[0], ctx.columnName)) return null;
130
+ if (tokens[1]?.kind !== "ident" || tokens[1].value !== "is") return null;
131
+ if (tokens[2]?.kind !== "ident" || tokens[2].value !== "null") return null;
132
+ if (tokens[3]?.kind !== "ident" || tokens[3].value !== "or") return null;
133
+ return stripParens(tokens.slice(4));
134
+ };
135
+
136
+ const tryDecode = (tokens: Token[], ctx: CheckCtx): CheckUpdate | null => {
137
+ if (tokens.length === 0) return null;
138
+
139
+ // <col> <op> <num>
140
+ if (isColRef(tokens[0], ctx.columnName)) {
141
+ const opTok = tokens[1];
142
+ const op = opTok?.kind === "op" ? opTok.value : null;
143
+ if (op === ">=") {
144
+ const n = numLitWithSign(tokens, 2);
145
+ if (n !== null && n.consumed === tokens.length - 2)
146
+ return { min: n.value };
147
+ }
148
+ if (op === ">") {
149
+ const n = numLitWithSign(tokens, 2);
150
+ if (n !== null && n.consumed === tokens.length - 2 && ctx.isInteger)
151
+ return { min: n.value + 1 };
152
+ }
153
+ if (op === "<=") {
154
+ const n = numLitWithSign(tokens, 2);
155
+ if (n !== null && n.consumed === tokens.length - 2)
156
+ return { max: n.value };
157
+ }
158
+ if (op === "<") {
159
+ const n = numLitWithSign(tokens, 2);
160
+ if (n !== null && n.consumed === tokens.length - 2 && ctx.isInteger)
161
+ return { max: n.value - 1 };
162
+ }
163
+ if (op === "~") {
164
+ const next = tokens[2];
165
+ if (next?.kind === "string" && tokens.length === 3) {
166
+ return { pattern: next.value };
167
+ }
168
+ }
169
+
170
+ // <col> REGEXP '<pat>' (SQLite). REGEXP is parsed as an ident keyword.
171
+ if (
172
+ tokens[1]?.kind === "ident" &&
173
+ tokens[1].value === "regexp" &&
174
+ tokens[2]?.kind === "string" &&
175
+ tokens.length === 3
176
+ ) {
177
+ return { pattern: tokens[2].value };
178
+ }
179
+
180
+ // <col> BETWEEN <num> AND <num>
181
+ if (tokens[1]?.kind === "ident" && tokens[1].value === "between") {
182
+ const lo = numLitWithSign(tokens, 2);
183
+ if (lo !== null) {
184
+ const andIdx = 2 + lo.consumed;
185
+ if (
186
+ tokens[andIdx]?.kind === "ident" &&
187
+ tokens[andIdx].value === "and"
188
+ ) {
189
+ const hi = numLitWithSign(tokens, andIdx + 1);
190
+ if (hi !== null && andIdx + 1 + hi.consumed === tokens.length)
191
+ return { min: lo.value, max: hi.value };
192
+ }
193
+ }
194
+ }
195
+
196
+ // <col> IN (0, 1) → boolean hint (SQLite encodes booleans as INTEGER).
197
+ if (
198
+ ctx.isInteger &&
199
+ tokens[1]?.kind === "ident" &&
200
+ tokens[1].value === "in" &&
201
+ tokens[2]?.kind === "punct" &&
202
+ tokens[2].value === "(" &&
203
+ tokens.length === 7 &&
204
+ tokens[6]?.kind === "punct" &&
205
+ tokens[6].value === ")"
206
+ ) {
207
+ const a = numLitWithSign(tokens, 3);
208
+ const comma = tokens[3 + (a?.consumed ?? 0)];
209
+ const b = numLitWithSign(tokens, 3 + (a?.consumed ?? 0) + 1);
210
+ if (
211
+ a !== null &&
212
+ b !== null &&
213
+ comma?.kind === "punct" &&
214
+ comma.value === "," &&
215
+ ((a.value === 0 && b.value === 1) || (a.value === 1 && b.value === 0))
216
+ ) {
217
+ return { isBooleanInt: true };
218
+ }
219
+ }
220
+
221
+ // <col> IN ('a', 'b', ...) → union of string literals
222
+ if (
223
+ ctx.isString &&
224
+ tokens[1]?.kind === "ident" &&
225
+ tokens[1].value === "in" &&
226
+ tokens[2]?.kind === "punct" &&
227
+ tokens[2].value === "("
228
+ ) {
229
+ const lits: string[] = [];
230
+ let i = 3;
231
+ let ok = true;
232
+ while (i < tokens.length) {
233
+ const t = tokens[i]!;
234
+ if (t.kind === "punct" && t.value === ")") {
235
+ i += 1;
236
+ break;
237
+ }
238
+ if (t.kind !== "string") {
239
+ ok = false;
240
+ break;
241
+ }
242
+ lits.push(t.value);
243
+ i += 1;
244
+ const nxt = tokens[i];
245
+ if (nxt?.kind === "punct" && nxt.value === ",") {
246
+ i += 1;
247
+ continue;
248
+ }
249
+ if (nxt?.kind === "punct" && nxt.value === ")") {
250
+ i += 1;
251
+ break;
252
+ }
253
+ ok = false;
254
+ break;
255
+ }
256
+ if (ok && i === tokens.length && lits.length > 0)
257
+ return { unionLiterals: lits };
258
+ }
259
+ }
260
+
261
+ // char_length(<col>) >= N / length(<col>) >= N → string min
262
+ // char_length(<col>) <= N → string max
263
+ if (
264
+ tokens[0]?.kind === "ident" &&
265
+ (tokens[0].value === "char_length" || tokens[0].value === "length") &&
266
+ tokens[1]?.kind === "punct" &&
267
+ tokens[1].value === "(" &&
268
+ isColRef(tokens[2], ctx.columnName) &&
269
+ tokens[3]?.kind === "punct" &&
270
+ tokens[3].value === ")"
271
+ ) {
272
+ const op = tokens[4]?.kind === "op" ? tokens[4].value : null;
273
+ if (op === ">=") {
274
+ const n = numLitWithSign(tokens, 5);
275
+ if (n !== null && 5 + n.consumed === tokens.length)
276
+ return { min: n.value };
277
+ }
278
+ if (op === "<=") {
279
+ const n = numLitWithSign(tokens, 5);
280
+ if (n !== null && 5 + n.consumed === tokens.length)
281
+ return { max: n.value };
282
+ }
283
+ }
284
+
285
+ // cardinality(<col>) >= N / <= N → array min/max (postgres)
286
+ // json_array_length(<col>) >= N / <= N → array min/max (sqlite)
287
+ if (
288
+ tokens[0]?.kind === "ident" &&
289
+ (tokens[0].value === "cardinality" ||
290
+ tokens[0].value === "json_array_length") &&
291
+ tokens[1]?.kind === "punct" &&
292
+ tokens[1].value === "(" &&
293
+ isColRef(tokens[2], ctx.columnName) &&
294
+ tokens[3]?.kind === "punct" &&
295
+ tokens[3].value === ")"
296
+ ) {
297
+ const op = tokens[4]?.kind === "op" ? tokens[4].value : null;
298
+ if (op === ">=") {
299
+ const n = numLitWithSign(tokens, 5);
300
+ if (n !== null && 5 + n.consumed === tokens.length)
301
+ return { min: n.value };
302
+ }
303
+ if (op === "<=") {
304
+ const n = numLitWithSign(tokens, 5);
305
+ if (n !== null && 5 + n.consumed === tokens.length)
306
+ return { max: n.value };
307
+ }
308
+ }
309
+
310
+ // json_valid(<col>) → isJsonValid (SQLite JSON-encoded column).
311
+ if (
312
+ tokens[0]?.kind === "ident" &&
313
+ tokens[0].value === "json_valid" &&
314
+ tokens[1]?.kind === "punct" &&
315
+ tokens[1].value === "(" &&
316
+ isColRef(tokens[2], ctx.columnName) &&
317
+ tokens[3]?.kind === "punct" &&
318
+ tokens[3].value === ")" &&
319
+ tokens.length === 4
320
+ ) {
321
+ return { isJsonValid: true };
322
+ }
323
+
324
+ // json_valid(<col>) AND json_array_length(<col>) = N → tuple of length N.
325
+ if (
326
+ tokens[0]?.kind === "ident" &&
327
+ tokens[0].value === "json_valid" &&
328
+ tokens[1]?.kind === "punct" &&
329
+ tokens[1].value === "(" &&
330
+ isColRef(tokens[2], ctx.columnName) &&
331
+ tokens[3]?.kind === "punct" &&
332
+ tokens[3].value === ")" &&
333
+ tokens[4]?.kind === "ident" &&
334
+ tokens[4].value === "and" &&
335
+ tokens[5]?.kind === "ident" &&
336
+ tokens[5].value === "json_array_length" &&
337
+ tokens[6]?.kind === "punct" &&
338
+ tokens[6].value === "(" &&
339
+ isColRef(tokens[7], ctx.columnName) &&
340
+ tokens[8]?.kind === "punct" &&
341
+ tokens[8].value === ")" &&
342
+ tokens[9]?.kind === "op" &&
343
+ tokens[9].value === "="
344
+ ) {
345
+ const n = numLitWithSign(tokens, 10);
346
+ if (n !== null && 10 + n.consumed === tokens.length) {
347
+ return { isJsonValid: true, tupleLength: n.value };
348
+ }
349
+ }
350
+
351
+ // shapecraft_array_is_unique(<col>) → uniqueItems
352
+ if (
353
+ tokens[0]?.kind === "ident" &&
354
+ tokens[0].value === "shapecraft_array_is_unique" &&
355
+ tokens[1]?.kind === "punct" &&
356
+ tokens[1].value === "(" &&
357
+ isColRef(tokens[2], ctx.columnName) &&
358
+ tokens[3]?.kind === "punct" &&
359
+ tokens[3].value === ")" &&
360
+ tokens.length === 4
361
+ ) {
362
+ return { uniqueItems: true };
363
+ }
364
+
365
+ // SQLite uniqueItems pattern:
366
+ // (SELECT COUNT(*) FROM json_each(<col>))
367
+ // = (SELECT COUNT(DISTINCT value) FROM json_each(<col>))
368
+ if (matchSqliteUniqueItems(tokens, ctx)) {
369
+ return { uniqueItems: true };
370
+ }
371
+
372
+ return null;
373
+ };
374
+
375
+ // Match the exact token shape:
376
+ // ( SELECT COUNT ( * ) FROM json_each ( <col> ) )
377
+ // = ( SELECT COUNT ( DISTINCT value ) FROM json_each ( <col> ) )
378
+ const matchSqliteUniqueItems = (tokens: Token[], ctx: CheckCtx): boolean => {
379
+ const isIdent = (t: Token | undefined, v: string): boolean =>
380
+ t !== undefined && t.kind === "ident" && t.value === v;
381
+ const isPunct = (t: Token | undefined, v: string): boolean =>
382
+ t !== undefined && t.kind === "punct" && t.value === v;
383
+ const isOp = (t: Token | undefined, v: string): boolean =>
384
+ t !== undefined && t.kind === "op" && t.value === v;
385
+
386
+ // Skim past wrapping parens (peeled by stripParens already, but the
387
+ // body itself is two paren-wrapped subqueries joined by `=`).
388
+ let i = 0;
389
+ if (!isPunct(tokens[i], "(")) return false;
390
+ i += 1;
391
+ if (!isIdent(tokens[i], "select")) return false;
392
+ i += 1;
393
+ if (!isIdent(tokens[i], "count")) return false;
394
+ i += 1;
395
+ if (!isPunct(tokens[i], "(")) return false;
396
+ i += 1;
397
+ if (!isOp(tokens[i], "*")) return false;
398
+ i += 1;
399
+ if (!isPunct(tokens[i], ")")) return false;
400
+ i += 1;
401
+ if (!isIdent(tokens[i], "from")) return false;
402
+ i += 1;
403
+ if (!isIdent(tokens[i], "json_each")) return false;
404
+ i += 1;
405
+ if (!isPunct(tokens[i], "(")) return false;
406
+ i += 1;
407
+ if (!isColRef(tokens[i], ctx.columnName)) return false;
408
+ i += 1;
409
+ if (!isPunct(tokens[i], ")")) return false;
410
+ i += 1;
411
+ if (!isPunct(tokens[i], ")")) return false;
412
+ i += 1;
413
+ if (!isOp(tokens[i], "=")) return false;
414
+ i += 1;
415
+ if (!isPunct(tokens[i], "(")) return false;
416
+ i += 1;
417
+ if (!isIdent(tokens[i], "select")) return false;
418
+ i += 1;
419
+ if (!isIdent(tokens[i], "count")) return false;
420
+ i += 1;
421
+ if (!isPunct(tokens[i], "(")) return false;
422
+ i += 1;
423
+ if (!isIdent(tokens[i], "distinct")) return false;
424
+ i += 1;
425
+ if (!isIdent(tokens[i], "value")) return false;
426
+ i += 1;
427
+ if (!isPunct(tokens[i], ")")) return false;
428
+ i += 1;
429
+ if (!isIdent(tokens[i], "from")) return false;
430
+ i += 1;
431
+ if (!isIdent(tokens[i], "json_each")) return false;
432
+ i += 1;
433
+ if (!isPunct(tokens[i], "(")) return false;
434
+ i += 1;
435
+ if (!isColRef(tokens[i], ctx.columnName)) return false;
436
+ i += 1;
437
+ if (!isPunct(tokens[i], ")")) return false;
438
+ i += 1;
439
+ if (!isPunct(tokens[i], ")")) return false;
440
+ i += 1;
441
+ return i === tokens.length;
442
+ };
443
+
444
+ export const decodeCheck = (
445
+ rawTokens: Token[],
446
+ ctx: CheckCtx,
447
+ ): DecodedCheck => {
448
+ let body = stripParens(rawTokens);
449
+
450
+ // try peel IS NULL OR …
451
+ const peeled = peelIsNullOr(body, ctx);
452
+ if (peeled !== null) body = peeled;
453
+
454
+ const updates = tryDecode(body, ctx);
455
+ if (updates !== null) return { updates, fullyDecoded: true };
456
+ return { updates: {}, fullyDecoded: false };
457
+ };