shapecraft 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/CLAUDE.md +227 -0
  2. package/README.md +22 -0
  3. package/apps/cli/node_modules/.bin/prettier +21 -0
  4. package/apps/cli/node_modules/.bin/tsc +21 -0
  5. package/apps/cli/node_modules/.bin/tsserver +21 -0
  6. package/apps/cli/node_modules/.bin/tsx +21 -0
  7. package/apps/cli/node_modules/.bin/vitest +21 -0
  8. package/apps/cli/package.json +47 -0
  9. package/apps/cli/src/index.ts +98 -0
  10. package/apps/cli/tsconfig.cjs.json +10 -0
  11. package/apps/cli/tsconfig.esm.json +10 -0
  12. package/apps/cli/tsconfig.json +22 -0
  13. package/package.json +16 -0
  14. package/packages/core/node_modules/.bin/prettier +21 -0
  15. package/packages/core/node_modules/.bin/tsc +21 -0
  16. package/packages/core/node_modules/.bin/tsserver +21 -0
  17. package/packages/core/node_modules/.bin/tsx +21 -0
  18. package/packages/core/node_modules/.bin/vitest +21 -0
  19. package/packages/core/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
  20. package/packages/core/package.json +44 -0
  21. package/packages/core/src/common/array.test.ts +19 -0
  22. package/packages/core/src/common/array.ts +15 -0
  23. package/packages/core/src/common/index.ts +5 -0
  24. package/packages/core/src/common/is.ts +23 -0
  25. package/packages/core/src/common/object.ts +35 -0
  26. package/packages/core/src/common/phantom.ts +1 -0
  27. package/packages/core/src/common/result.ts +43 -0
  28. package/packages/core/src/common/string.ts +28 -0
  29. package/packages/core/src/common/types.ts +34 -0
  30. package/packages/core/src/index.ts +1 -0
  31. package/packages/core/src/shape/annotate.ts +139 -0
  32. package/packages/core/src/shape/annotation.ts +47 -0
  33. package/packages/core/src/shape/base.ts +71 -0
  34. package/packages/core/src/shape/builder.test.ts +728 -0
  35. package/packages/core/src/shape/builder.ts +475 -0
  36. package/packages/core/src/shape/error.ts +4 -0
  37. package/packages/core/src/shape/index.ts +3 -0
  38. package/packages/core/src/shape/number.ts +118 -0
  39. package/packages/core/src/shape/shape.test.ts +792 -0
  40. package/packages/core/src/shape/shape.ts +377 -0
  41. package/packages/core/src/shape/tags.ts +14 -0
  42. package/packages/core/src/shape/transforms/index.ts +3 -0
  43. package/packages/core/src/shape/transforms/json-schema/index.ts +2 -0
  44. package/packages/core/src/shape/transforms/json-schema/transform.test.ts +850 -0
  45. package/packages/core/src/shape/transforms/json-schema/transform.ts +882 -0
  46. package/packages/core/src/shape/transforms/json-schema/types.ts +132 -0
  47. package/packages/core/src/shape/transforms/sql/dialects/dialect.ts +89 -0
  48. package/packages/core/src/shape/transforms/sql/dialects/index.ts +14 -0
  49. package/packages/core/src/shape/transforms/sql/dialects/postgres.ts +392 -0
  50. package/packages/core/src/shape/transforms/sql/dialects/sqlite.ts +333 -0
  51. package/packages/core/src/shape/transforms/sql/from-sql.test.ts +704 -0
  52. package/packages/core/src/shape/transforms/sql/from-sql.ts +210 -0
  53. package/packages/core/src/shape/transforms/sql/index.ts +3 -0
  54. package/packages/core/src/shape/transforms/sql/options.ts +6 -0
  55. package/packages/core/src/shape/transforms/sql/parser/check-decoder.ts +457 -0
  56. package/packages/core/src/shape/transforms/sql/parser/create-domain.ts +105 -0
  57. package/packages/core/src/shape/transforms/sql/parser/create-table.ts +809 -0
  58. package/packages/core/src/shape/transforms/sql/parser/create-type.ts +91 -0
  59. package/packages/core/src/shape/transforms/sql/parser/cursor.ts +179 -0
  60. package/packages/core/src/shape/transforms/sql/parser/default-decoder.ts +129 -0
  61. package/packages/core/src/shape/transforms/sql/parser/lexer.ts +289 -0
  62. package/packages/core/src/shape/transforms/sql/parser/pg-types.ts +247 -0
  63. package/packages/core/src/shape/transforms/sql/parser/sqlite-types.ts +103 -0
  64. package/packages/core/src/shape/transforms/sql/parser/statements.ts +127 -0
  65. package/packages/core/src/shape/transforms/sql/parser/type-spec.ts +159 -0
  66. package/packages/core/src/shape/transforms/sql/transform.sqlite.test.ts +448 -0
  67. package/packages/core/src/shape/transforms/sql/transform.test.ts +880 -0
  68. package/packages/core/src/shape/transforms/sql/transform.ts +295 -0
  69. package/packages/core/src/shape/transforms/typescript/index.ts +1 -0
  70. package/packages/core/src/shape/transforms/typescript/transform.ts +211 -0
  71. package/packages/core/src/shape/tuple.test.ts +171 -0
  72. package/packages/core/src/shape/validate.ts +413 -0
  73. package/packages/core/tsconfig.cjs.json +11 -0
  74. package/packages/core/tsconfig.esm.json +10 -0
  75. package/packages/core/tsconfig.json +23 -0
  76. package/packages/samples/node_modules/.bin/prettier +21 -0
  77. package/packages/samples/node_modules/.bin/tsc +21 -0
  78. package/packages/samples/node_modules/.bin/tsserver +21 -0
  79. package/packages/samples/node_modules/.bin/tsx +21 -0
  80. package/packages/samples/node_modules/.bin/vitest +21 -0
  81. package/packages/samples/package.json +47 -0
  82. package/packages/samples/src/blog.ts +49 -0
  83. package/packages/samples/src/config.ts +50 -0
  84. package/packages/samples/src/ecommerce.ts +65 -0
  85. package/packages/samples/src/embeddings.ts +43 -0
  86. package/packages/samples/src/events.ts +52 -0
  87. package/packages/samples/src/geometry.ts +62 -0
  88. package/packages/samples/src/index.ts +9 -0
  89. package/packages/samples/src/relational.ts +17 -0
  90. package/packages/samples/src/tuples.ts +67 -0
  91. package/packages/samples/src/user.ts +9 -0
  92. package/packages/samples/tsconfig.cjs.json +11 -0
  93. package/packages/samples/tsconfig.esm.json +10 -0
  94. package/packages/samples/tsconfig.json +23 -0
  95. package/pnpm-workspace.yaml +3 -0
  96. package/test-data/json-schema/address.json +35 -0
  97. package/test-data/json-schema/array-of-things.json +36 -0
  98. package/test-data/json-schema/basic.json +21 -0
  99. package/test-data/json-schema/blog-post.json +29 -0
  100. package/test-data/json-schema/calendar.json +48 -0
  101. package/test-data/json-schema/complex-object-with-nested-properties.json +41 -0
  102. package/test-data/json-schema/ecommerce-complex.json +344 -0
  103. package/test-data/json-schema/ecommerce-system.json +27 -0
  104. package/test-data/json-schema/enumerated-values.json +11 -0
  105. package/test-data/json-schema/fstab-entry.json +92 -0
  106. package/test-data/json-schema/geographical-location.json +20 -0
  107. package/test-data/json-schema/health-record.json +41 -0
  108. package/test-data/json-schema/job-posting.json +33 -0
  109. package/test-data/json-schema/movie.json +35 -0
  110. package/test-data/json-schema/regular-expression-pattern.json +12 -0
  111. package/test-data/json-schema/user-profile.json +33 -0
  112. package/test-data/sql/ecommerce.sql +641 -0
@@ -0,0 +1,91 @@
1
+ import { ParseError, TokenCursor } from "./cursor";
2
+ import { Token } from "./lexer";
3
+
4
+ export type EnumDef = {
5
+ schema?: string;
6
+ name: string;
7
+ values: string[];
8
+ };
9
+
10
+ export type CompositeDef = {
11
+ schema?: string;
12
+ name: string;
13
+ // raw field defs preserved as token slices for later mapping if we choose to
14
+ // unpack composites into mappings. v1 doesn't use these.
15
+ fields: Array<{ name: string; typeTokens: Token[] }>;
16
+ };
17
+
18
+ export type ParsedType =
19
+ | { kind: "enum"; def: EnumDef }
20
+ | { kind: "composite"; def: CompositeDef }
21
+ | null;
22
+
23
+ const qualifiedName = (parts: string[]): { schema?: string; name: string } => {
24
+ if (parts.length === 1) return { name: parts[0]! };
25
+ return { schema: parts[0]!, name: parts.slice(1).join(".") };
26
+ };
27
+
28
+ export const parseCreateType = (tokens: Token[]): ParsedType => {
29
+ const c = new TokenCursor(tokens);
30
+ c.expectIdent("create");
31
+ // CREATE [OR REPLACE] TYPE
32
+ if (c.consumeIdent("or")) c.expectIdent("replace");
33
+ c.expectIdent("type");
34
+ if (c.consumeIdent("if")) {
35
+ c.expectIdent("not");
36
+ c.expectIdent("exists");
37
+ }
38
+
39
+ const nameParts = c.readDottedName();
40
+ const qn = qualifiedName(nameParts);
41
+
42
+ c.expectIdent("as");
43
+
44
+ if (c.consumeIdent("enum")) {
45
+ c.expectPunct("(");
46
+ const values: string[] = [];
47
+ while (!c.isPunct(")")) {
48
+ const t = c.peek();
49
+ if (t.kind !== "string") {
50
+ throw new ParseError(
51
+ `expected string literal in ENUM but found ${t.kind}`,
52
+ t.pos,
53
+ );
54
+ }
55
+ c.consume();
56
+ values.push(t.value);
57
+ if (!c.consumePunct(",")) break;
58
+ }
59
+ c.expectPunct(")");
60
+ const def: EnumDef = { name: qn.name, values };
61
+ if (qn.schema !== undefined) def.schema = qn.schema;
62
+ return { kind: "enum", def };
63
+ }
64
+
65
+ if (c.consumePunct("(")) {
66
+ const fields: Array<{ name: string; typeTokens: Token[] }> = [];
67
+ while (!c.isPunct(")")) {
68
+ const nameTok = c.expectAnyIdent();
69
+ const typeTokens: Token[] = [];
70
+ let depth = 0;
71
+ while (!c.done()) {
72
+ const t = c.peek();
73
+ if (t.kind === "punct" && t.value === "," && depth === 0) break;
74
+ if (t.kind === "punct" && t.value === ")" && depth === 0) break;
75
+ if (t.kind === "punct" && t.value === "(") depth += 1;
76
+ if (t.kind === "punct" && t.value === ")") depth -= 1;
77
+ typeTokens.push(t);
78
+ c.consume();
79
+ }
80
+ fields.push({ name: nameTok.name, typeTokens });
81
+ if (!c.consumePunct(",")) break;
82
+ }
83
+ c.expectPunct(")");
84
+ const def: CompositeDef = { name: qn.name, fields };
85
+ if (qn.schema !== undefined) def.schema = qn.schema;
86
+ return { kind: "composite", def };
87
+ }
88
+
89
+ // Unknown form (e.g. CREATE TYPE name AS RANGE (...)) — skip
90
+ return null;
91
+ };
@@ -0,0 +1,179 @@
1
+ import { Token } from "./lexer";
2
+
3
+ export class ParseError extends Error {
4
+ constructor(
5
+ message: string,
6
+ public readonly pos: number,
7
+ ) {
8
+ super(`${message} at offset ${pos}`);
9
+ this.name = "ParseError";
10
+ }
11
+ }
12
+
13
+ export class TokenCursor {
14
+ private idx = 0;
15
+
16
+ constructor(private readonly tokens: Token[]) {}
17
+
18
+ get pos(): number {
19
+ return this.idx;
20
+ }
21
+
22
+ reset(idx: number): void {
23
+ this.idx = idx;
24
+ }
25
+
26
+ peek(off = 0): Token {
27
+ const t = this.tokens[this.idx + off];
28
+ if (t === undefined) {
29
+ const last = this.tokens[this.tokens.length - 1];
30
+ return { kind: "eof", value: "", pos: last?.pos ?? 0 };
31
+ }
32
+ return t;
33
+ }
34
+
35
+ consume(): Token {
36
+ const t = this.peek();
37
+ if (t.kind !== "eof") this.idx += 1;
38
+ return t;
39
+ }
40
+
41
+ done(): boolean {
42
+ return this.peek().kind === "eof";
43
+ }
44
+
45
+ isIdent(value: string, off = 0): boolean {
46
+ const t = this.peek(off);
47
+ return t.kind === "ident" && !t.quoted && t.value === value;
48
+ }
49
+
50
+ isPunct(value: string, off = 0): boolean {
51
+ const t = this.peek(off);
52
+ return t.kind === "punct" && t.value === value;
53
+ }
54
+
55
+ isOp(value: string, off = 0): boolean {
56
+ const t = this.peek(off);
57
+ return t.kind === "op" && t.value === value;
58
+ }
59
+
60
+ consumeIdent(value: string): boolean {
61
+ if (this.isIdent(value)) {
62
+ this.consume();
63
+ return true;
64
+ }
65
+ return false;
66
+ }
67
+
68
+ consumePunct(value: string): boolean {
69
+ if (this.isPunct(value)) {
70
+ this.consume();
71
+ return true;
72
+ }
73
+ return false;
74
+ }
75
+
76
+ consumeOp(value: string): boolean {
77
+ if (this.isOp(value)) {
78
+ this.consume();
79
+ return true;
80
+ }
81
+ return false;
82
+ }
83
+
84
+ expectPunct(value: string): void {
85
+ if (!this.consumePunct(value)) {
86
+ const t = this.peek();
87
+ throw new ParseError(
88
+ `expected punct '${value}' but found ${describe(t)}`,
89
+ t.pos,
90
+ );
91
+ }
92
+ }
93
+
94
+ expectIdent(value: string): void {
95
+ if (!this.consumeIdent(value)) {
96
+ const t = this.peek();
97
+ throw new ParseError(
98
+ `expected keyword '${value}' but found ${describe(t)}`,
99
+ t.pos,
100
+ );
101
+ }
102
+ }
103
+
104
+ // Read a possibly-quoted ident. Throws if next token isn't an ident.
105
+ expectAnyIdent(): { name: string; quoted: boolean; pos: number } {
106
+ const t = this.peek();
107
+ if (t.kind !== "ident") {
108
+ throw new ParseError(
109
+ `expected identifier but found ${describe(t)}`,
110
+ t.pos,
111
+ );
112
+ }
113
+ this.consume();
114
+ return { name: t.value, quoted: t.quoted === true, pos: t.pos };
115
+ }
116
+
117
+ // Read a dotted ident chain (e.g. `shop.users` or `shop`.users) into [parts...]
118
+ readDottedName(): string[] {
119
+ const parts: string[] = [];
120
+ const first = this.expectAnyIdent();
121
+ parts.push(first.name);
122
+ while (this.consumePunct(".")) {
123
+ const next = this.expectAnyIdent();
124
+ parts.push(next.name);
125
+ }
126
+ return parts;
127
+ }
128
+
129
+ // Skip a balanced paren group. Cursor must be at the opening '('.
130
+ skipParens(): Token[] {
131
+ if (!this.isPunct("(")) {
132
+ const t = this.peek();
133
+ throw new ParseError(`expected '(' but found ${describe(t)}`, t.pos);
134
+ }
135
+ this.consume();
136
+ const collected: Token[] = [];
137
+ let depth = 1;
138
+ while (!this.done() && depth > 0) {
139
+ const t = this.peek();
140
+ if (t.kind === "punct" && t.value === "(") depth += 1;
141
+ else if (t.kind === "punct" && t.value === ")") {
142
+ depth -= 1;
143
+ if (depth === 0) {
144
+ this.consume();
145
+ break;
146
+ }
147
+ }
148
+ collected.push(t);
149
+ this.consume();
150
+ }
151
+ return collected;
152
+ }
153
+
154
+ // Skip everything up to (but not including) a top-level matching token.
155
+ skipUntilTopLevel(pred: (t: Token) => boolean): void {
156
+ let depth = 0;
157
+ while (!this.done()) {
158
+ const t = this.peek();
159
+ if (t.kind === "punct" && t.value === "(") depth += 1;
160
+ else if (t.kind === "punct" && t.value === ")") {
161
+ if (depth === 0) return;
162
+ depth -= 1;
163
+ }
164
+ if (depth === 0 && pred(t)) return;
165
+ this.consume();
166
+ }
167
+ }
168
+ }
169
+
170
+ const describe = (t: Token): string => {
171
+ if (t.kind === "eof") return "end of input";
172
+ if (t.kind === "ident") return `ident '${t.value}'`;
173
+ if (t.kind === "punct") return `'${t.value}'`;
174
+ if (t.kind === "op") return `op '${t.value}'`;
175
+ if (t.kind === "number") return `number ${t.value}`;
176
+ if (t.kind === "string") return `string '${t.value}'`;
177
+ if (t.kind === "dollarString") return "dollar-quoted string";
178
+ return t.kind;
179
+ };
@@ -0,0 +1,129 @@
1
+ import { Token } from "./lexer";
2
+
3
+ export type DecodedDefault =
4
+ | { kind: "literal"; value: unknown }
5
+ | { kind: "expr"; sql: string };
6
+
7
+ const tokensToSql = (tokens: Token[]): string => {
8
+ const parts: string[] = [];
9
+ for (const t of tokens) {
10
+ if (t.kind === "string") parts.push(`'${t.value.replace(/'/g, "''")}'`);
11
+ else if (t.kind === "ident" && t.quoted)
12
+ parts.push(`"${t.value.replace(/"/g, '""')}"`);
13
+ else if (t.kind === "ident") parts.push(t.value.toUpperCase());
14
+ else parts.push(t.value);
15
+ }
16
+ return parts.join(" ").replace(/\s+::/g, "::").replace(/::\s+/g, "::");
17
+ };
18
+
19
+ // Single string-literal possibly followed by ::type
20
+ const matchStringCast = (
21
+ tokens: Token[],
22
+ ): { value: string; castType?: string } | null => {
23
+ const t0 = tokens[0];
24
+ if (t0 === undefined || t0.kind !== "string") return null;
25
+ if (tokens.length === 1) return { value: t0.value };
26
+ const t1 = tokens[1];
27
+ if (t1 === undefined || t1.kind !== "op" || t1.value !== "::") return null;
28
+ // Collect remaining ident chain as the cast type
29
+ const parts: string[] = [];
30
+ let i = 2;
31
+ while (i < tokens.length) {
32
+ const t = tokens[i]!;
33
+ if (t.kind === "ident") parts.push(t.value);
34
+ else if (t.kind === "punct" && t.value === ".") parts.push(".");
35
+ else if (t.kind === "punct" && t.value === "(") {
36
+ // type args — skip
37
+ let depth = 1;
38
+ i += 1;
39
+ while (i < tokens.length && depth > 0) {
40
+ const tt = tokens[i]!;
41
+ if (tt.kind === "punct" && tt.value === "(") depth += 1;
42
+ if (tt.kind === "punct" && tt.value === ")") depth -= 1;
43
+ i += 1;
44
+ }
45
+ continue;
46
+ } else break;
47
+ i += 1;
48
+ }
49
+ if (i !== tokens.length) return null;
50
+ return { value: t0.value, castType: parts.join("").toLowerCase() };
51
+ };
52
+
53
+ const isTimestampType = (t: string): boolean =>
54
+ t === "timestamp" ||
55
+ t === "timestamptz" ||
56
+ t === "date" ||
57
+ t.startsWith("timestamp");
58
+
59
+ export const decodeDefault = (
60
+ tokens: Token[],
61
+ columnPgType: string | null,
62
+ ): DecodedDefault => {
63
+ if (tokens.length === 0) return { kind: "expr", sql: "" };
64
+ const first = tokens[0]!;
65
+
66
+ // NULL
67
+ if (tokens.length === 1 && first.kind === "ident" && first.value === "null") {
68
+ return { kind: "literal", value: null };
69
+ }
70
+
71
+ // TRUE / FALSE
72
+ if (
73
+ tokens.length === 1 &&
74
+ first.kind === "ident" &&
75
+ (first.value === "true" || first.value === "false")
76
+ ) {
77
+ return { kind: "literal", value: first.value === "true" };
78
+ }
79
+
80
+ // Bare number (with optional leading sign)
81
+ if (tokens.length === 1 && first.kind === "number") {
82
+ const n = Number(first.value);
83
+ if (isFinite(n)) return { kind: "literal", value: n };
84
+ }
85
+ if (
86
+ tokens.length === 2 &&
87
+ first.kind === "op" &&
88
+ (first.value === "-" || first.value === "+") &&
89
+ tokens[1]?.kind === "number"
90
+ ) {
91
+ const n = Number(`${first.value}${tokens[1].value}`);
92
+ if (isFinite(n)) return { kind: "literal", value: n };
93
+ }
94
+
95
+ // String (possibly with cast)
96
+ const sc = matchStringCast(tokens);
97
+ if (sc !== null) {
98
+ const ct = sc.castType ?? "";
99
+ if (ct === "" || ct === "text" || ct === "varchar") {
100
+ return { kind: "literal", value: sc.value };
101
+ }
102
+ if (ct === "jsonb" || ct === "json") {
103
+ try {
104
+ return { kind: "literal", value: JSON.parse(sc.value) };
105
+ } catch {
106
+ return { kind: "expr", sql: tokensToSql(tokens) };
107
+ }
108
+ }
109
+ if (isTimestampType(ct)) {
110
+ const d = new Date(sc.value);
111
+ const t = d.getTime();
112
+ if (!isNaN(t) && isFinite(t)) return { kind: "literal", value: d };
113
+ }
114
+ // Fall through to expr for other casts
115
+ }
116
+
117
+ // Postgres array literal '{}' on a column declared as `T[]` — decode to []
118
+ if (
119
+ sc !== null &&
120
+ sc.castType === undefined &&
121
+ sc.value === "{}" &&
122
+ columnPgType !== null &&
123
+ columnPgType.endsWith("[]")
124
+ ) {
125
+ return { kind: "literal", value: [] };
126
+ }
127
+
128
+ return { kind: "expr", sql: tokensToSql(tokens) };
129
+ };
@@ -0,0 +1,289 @@
1
+ export type TokenKind =
2
+ | "ident"
3
+ | "string"
4
+ | "dollarString"
5
+ | "number"
6
+ | "punct"
7
+ | "op"
8
+ | "eof";
9
+
10
+ export type Token = {
11
+ kind: TokenKind;
12
+ value: string;
13
+ quoted?: boolean;
14
+ pos: number;
15
+ };
16
+
17
+ const PUNCT_CHARS = new Set(["(", ")", ",", ";", "[", "]", "."]);
18
+
19
+ const isIdentStart = (c: string): boolean =>
20
+ (c >= "a" && c <= "z") || (c >= "A" && c <= "Z") || c === "_";
21
+
22
+ const isIdentCont = (c: string): boolean =>
23
+ isIdentStart(c) || (c >= "0" && c <= "9") || c === "$";
24
+
25
+ const isDigit = (c: string): boolean => c >= "0" && c <= "9";
26
+
27
+ const isSpace = (c: string): boolean =>
28
+ c === " " || c === "\t" || c === "\n" || c === "\r" || c === "\f";
29
+
30
+ export class LexError extends Error {
31
+ constructor(
32
+ message: string,
33
+ public readonly pos: number,
34
+ ) {
35
+ super(`${message} at offset ${pos}`);
36
+ this.name = "LexError";
37
+ }
38
+ }
39
+
40
+ export const tokenize = (sql: string): Token[] => {
41
+ const tokens: Token[] = [];
42
+ let i = 0;
43
+ const N = sql.length;
44
+
45
+ const peek = (off = 0): string => sql[i + off] ?? "";
46
+
47
+ while (i < N) {
48
+ const c = peek();
49
+
50
+ // whitespace
51
+ if (isSpace(c)) {
52
+ i += 1;
53
+ continue;
54
+ }
55
+
56
+ // line comment
57
+ if (c === "-" && peek(1) === "-") {
58
+ while (i < N && sql[i] !== "\n") i += 1;
59
+ continue;
60
+ }
61
+
62
+ // block comment (nestable per pg)
63
+ if (c === "/" && peek(1) === "*") {
64
+ let depth = 1;
65
+ i += 2;
66
+ while (i < N && depth > 0) {
67
+ if (sql[i] === "/" && sql[i + 1] === "*") {
68
+ depth += 1;
69
+ i += 2;
70
+ } else if (sql[i] === "*" && sql[i + 1] === "/") {
71
+ depth -= 1;
72
+ i += 2;
73
+ } else {
74
+ i += 1;
75
+ }
76
+ }
77
+ if (depth > 0) throw new LexError("unterminated block comment", i);
78
+ continue;
79
+ }
80
+
81
+ // dollar-quoted string: $tag$...$tag$ (tag is empty or [A-Za-z_][A-Za-z0-9_]*)
82
+ if (c === "$") {
83
+ // try to read a tag (which may be empty)
84
+ let j = i + 1;
85
+ // first char of tag: letter or underscore
86
+ if (j < N && isIdentStart(sql[j] ?? "")) {
87
+ j += 1;
88
+ while (j < N) {
89
+ const cc = sql[j] ?? "";
90
+ if (
91
+ (cc >= "a" && cc <= "z") ||
92
+ (cc >= "A" && cc <= "Z") ||
93
+ (cc >= "0" && cc <= "9") ||
94
+ cc === "_"
95
+ )
96
+ j += 1;
97
+ else break;
98
+ }
99
+ }
100
+ if (sql[j] === "$") {
101
+ const tag = sql.slice(i + 1, j);
102
+ const opener = `$${tag}$`;
103
+ const start = j + 1;
104
+ const end = sql.indexOf(opener, start);
105
+ if (end < 0)
106
+ throw new LexError(
107
+ `unterminated dollar-quoted string with tag '${tag}'`,
108
+ i,
109
+ );
110
+ const body = sql.slice(start, end);
111
+ tokens.push({ kind: "dollarString", value: body, pos: i });
112
+ i = end + opener.length;
113
+ continue;
114
+ }
115
+ // not a dollar-quote — fall through to identifier handling? `$` isn't a
116
+ // valid leading char for a bare ident in pg, but parameter placeholders
117
+ // like $1 use it. Treat as punct.
118
+ tokens.push({ kind: "punct", value: "$", pos: i });
119
+ i += 1;
120
+ continue;
121
+ }
122
+
123
+ // single-quoted string
124
+ if (c === "'") {
125
+ const start = i;
126
+ i += 1;
127
+ let buf = "";
128
+ while (i < N) {
129
+ if (sql[i] === "'") {
130
+ if (sql[i + 1] === "'") {
131
+ buf += "'";
132
+ i += 2;
133
+ continue;
134
+ }
135
+ break;
136
+ }
137
+ buf += sql[i];
138
+ i += 1;
139
+ }
140
+ if (sql[i] !== "'") throw new LexError("unterminated string", start);
141
+ i += 1;
142
+ tokens.push({ kind: "string", value: buf, pos: start });
143
+ continue;
144
+ }
145
+
146
+ // double-quoted identifier
147
+ if (c === '"') {
148
+ const start = i;
149
+ i += 1;
150
+ let buf = "";
151
+ while (i < N) {
152
+ if (sql[i] === '"') {
153
+ if (sql[i + 1] === '"') {
154
+ buf += '"';
155
+ i += 2;
156
+ continue;
157
+ }
158
+ break;
159
+ }
160
+ buf += sql[i];
161
+ i += 1;
162
+ }
163
+ if (sql[i] !== '"')
164
+ throw new LexError("unterminated quoted identifier", start);
165
+ i += 1;
166
+ tokens.push({ kind: "ident", value: buf, quoted: true, pos: start });
167
+ continue;
168
+ }
169
+
170
+ // backtick-quoted identifier (SQLite/MySQL syntax)
171
+ if (c === "`") {
172
+ const start = i;
173
+ i += 1;
174
+ let buf = "";
175
+ while (i < N) {
176
+ if (sql[i] === "`") {
177
+ if (sql[i + 1] === "`") {
178
+ buf += "`";
179
+ i += 2;
180
+ continue;
181
+ }
182
+ break;
183
+ }
184
+ buf += sql[i];
185
+ i += 1;
186
+ }
187
+ if (sql[i] !== "`")
188
+ throw new LexError("unterminated backtick-quoted identifier", start);
189
+ i += 1;
190
+ tokens.push({ kind: "ident", value: buf, quoted: true, pos: start });
191
+ continue;
192
+ }
193
+
194
+ // identifier / keyword
195
+ if (isIdentStart(c)) {
196
+ const start = i;
197
+ while (i < N && isIdentCont(sql[i] ?? "")) i += 1;
198
+ tokens.push({
199
+ kind: "ident",
200
+ value: sql.slice(start, i).toLowerCase(),
201
+ pos: start,
202
+ });
203
+ continue;
204
+ }
205
+
206
+ // number (no sign — sign is consumed by parser if needed)
207
+ if (isDigit(c) || (c === "." && isDigit(peek(1)))) {
208
+ const start = i;
209
+ while (i < N && isDigit(sql[i] ?? "")) i += 1;
210
+ if (sql[i] === ".") {
211
+ i += 1;
212
+ while (i < N && isDigit(sql[i] ?? "")) i += 1;
213
+ }
214
+ if (sql[i] === "e" || sql[i] === "E") {
215
+ i += 1;
216
+ if (sql[i] === "+" || sql[i] === "-") i += 1;
217
+ while (i < N && isDigit(sql[i] ?? "")) i += 1;
218
+ }
219
+ tokens.push({ kind: "number", value: sql.slice(start, i), pos: start });
220
+ continue;
221
+ }
222
+
223
+ // operators
224
+ // two-char first
225
+ const two = sql.slice(i, i + 2);
226
+ if (
227
+ two === "<=" ||
228
+ two === ">=" ||
229
+ two === "<>" ||
230
+ two === "!=" ||
231
+ two === "||" ||
232
+ two === "::" ||
233
+ two === "&&" ||
234
+ two === "@>" ||
235
+ two === "<@"
236
+ ) {
237
+ tokens.push({ kind: "op", value: two, pos: i });
238
+ i += 2;
239
+ continue;
240
+ }
241
+
242
+ if (PUNCT_CHARS.has(c)) {
243
+ tokens.push({ kind: "punct", value: c, pos: i });
244
+ i += 1;
245
+ continue;
246
+ }
247
+
248
+ if (
249
+ c === "=" ||
250
+ c === "<" ||
251
+ c === ">" ||
252
+ c === "+" ||
253
+ c === "-" ||
254
+ c === "*" ||
255
+ c === "/" ||
256
+ c === "%" ||
257
+ c === "~" ||
258
+ c === "!" ||
259
+ c === "@" ||
260
+ c === "#" ||
261
+ c === "&" ||
262
+ c === "|" ||
263
+ c === "^" ||
264
+ c === "?"
265
+ ) {
266
+ tokens.push({ kind: "op", value: c, pos: i });
267
+ i += 1;
268
+ continue;
269
+ }
270
+
271
+ throw new LexError(`unexpected character ${JSON.stringify(c)}`, i);
272
+ }
273
+
274
+ tokens.push({ kind: "eof", value: "", pos: i });
275
+ return tokens;
276
+ };
277
+
278
+ export const tokensToText = (tokens: Token[]): string => {
279
+ const parts: string[] = [];
280
+ for (const t of tokens) {
281
+ if (t.kind === "eof") continue;
282
+ if (t.kind === "string") parts.push(`'${t.value.replace(/'/g, "''")}'`);
283
+ else if (t.kind === "ident" && t.quoted)
284
+ parts.push(`"${t.value.replace(/"/g, '""')}"`);
285
+ else if (t.kind === "dollarString") parts.push(`$$${t.value}$$`);
286
+ else parts.push(t.value);
287
+ }
288
+ return parts.join(" ");
289
+ };