@kuindji/typed-sql 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +11 -3
  2. package/dist/columns.d.ts +11 -3
  3. package/dist/columns.d.ts.map +1 -1
  4. package/dist/expressions.d.ts +84 -13
  5. package/dist/expressions.d.ts.map +1 -1
  6. package/dist/parsing/extract.d.ts +13 -9
  7. package/dist/parsing/extract.d.ts.map +1 -1
  8. package/dist/parsing/normalize.d.ts +9 -3
  9. package/dist/parsing/normalize.d.ts.map +1 -1
  10. package/dist/parsing/pg-literals.d.ts +10 -2
  11. package/dist/parsing/pg-literals.d.ts.map +1 -1
  12. package/dist/parsing/split.d.ts +27 -3
  13. package/dist/parsing/split.d.ts.map +1 -1
  14. package/dist/parsing/string-utils.d.ts +2 -4
  15. package/dist/parsing/string-utils.d.ts.map +1 -1
  16. package/dist/parsing/tokenize.d.ts +27 -17
  17. package/dist/parsing/tokenize.d.ts.map +1 -1
  18. package/dist/partial.d.ts +6 -6
  19. package/dist/partial.d.ts.map +1 -1
  20. package/dist/tables.d.ts +58 -13
  21. package/dist/tables.d.ts.map +1 -1
  22. package/dist/validation/dispatch.d.ts +7 -5
  23. package/dist/validation/dispatch.d.ts.map +1 -1
  24. package/dist/validation/joins.d.ts +3 -3
  25. package/dist/validation/joins.d.ts.map +1 -1
  26. package/dist/validation/return-derived.d.ts +8 -4
  27. package/dist/validation/return-derived.d.ts.map +1 -1
  28. package/dist/validation/return-derived.js.map +1 -1
  29. package/dist/validation/return-types.d.ts +1 -1
  30. package/dist/validation/return-types.d.ts.map +1 -1
  31. package/dist/validation/validate-columns.d.ts +27 -16
  32. package/dist/validation/validate-columns.d.ts.map +1 -1
  33. package/package.json +1 -1
  34. package/src/columns.ts +168 -32
  35. package/src/expressions.ts +589 -63
  36. package/src/parsing/extract.ts +72 -32
  37. package/src/parsing/normalize.ts +114 -10
  38. package/src/parsing/pg-literals.ts +32 -10
  39. package/src/parsing/split.ts +236 -72
  40. package/src/parsing/string-utils.ts +15 -15
  41. package/src/parsing/tokenize.ts +224 -146
  42. package/src/partial.ts +9 -15
  43. package/src/tables.ts +546 -183
  44. package/src/validation/dispatch.ts +58 -52
  45. package/src/validation/joins.ts +15 -19
  46. package/src/validation/return-derived.ts +60 -4
  47. package/src/validation/return-types.ts +9 -3
  48. package/src/validation/validate-columns.ts +161 -67
@@ -1,103 +1,166 @@
1
1
  // Tokenization, sentinels, operators, and SQL keyword sets.
2
- import type { CleanIdent, CleanLooseToken, CollapseSpaces, ReplaceAll, Split, Trim, TrimPunctuation } from "./string-utils.js";
2
+ import type { CleanIdent, CollapseSpaces, ReplaceAll, Trim, TrimPunctuation } from "./string-utils.js";
3
3
  import type { ExceedsLengthBudget, HasLineBreaks } from "./normalize.js";
4
4
  // Tokenization & parsing helpers
5
5
 
6
- export type Tokenize<N extends string> = CleanFilterTokens<Split<N, " ">>;
7
-
8
6
  // Sentinel token standing in for a TOP-LEVEL comma. It survives `MapClean`
9
7
  // (no stripped punctuation, non-empty identifier) whereas a bare `,` does not,
10
8
  // so it cleanly distinguishes a FROM-source separator from a comma nested in
11
9
  // parens / a string literal — which must still be dropped as before.
12
- export type CommaSep = "__tsqlcomma__";
10
+ // A control char unrepresentable in real SQL: 1 char instead of the old
11
+ // 13-char `__tsqlcomma__`, so every marked query string and every token list
12
+ // it flows through interns ~14 fewer chars per top-level comma. Neutral to
13
+ // the pipeline: not in `Punct`/`Whitespace`/`DQuotedPunct`/`OperatorToken`,
14
+ // and `Lowercase`/`CleanIdent` leave it intact.
15
+ export type CommaSep = "";
13
16
 
14
17
  // Replace only TOP-LEVEL commas (paren depth 0, outside single OR double quotes)
15
18
  // with the `CommaSep` sentinel (space-padded so it tokenizes on its own). Commas
16
19
  // nested inside parens (`count(a, b)`, FROM subqueries, `insert (x, y)`, value
17
20
  // tuples), string literals, or quoted identifiers (`users as "u,1"`) are left
18
- // verbatim and get stripped by `MapClean` as today. The `InDString` arm tracks
19
- // double-quoted identifiers so a comma inside a quoted table/column alias is not
20
- // mistaken for a FROM-source separator. Char-walk mirrors `SplitTopLevel` /
21
- // `StripComments`; step-bounded.
22
- export type MarkTopLevelCommas<
21
+ // verbatim and get stripped by `MapClean` as today.
22
+ //
23
+ // Segment-jump, not per-char (the old walk minted one growing-`Acc` string PER
24
+ // CHARACTER on every under-budget query). Each step advances to the LEFTMOST of
25
+ // the five state chars `,` `'` `"` `(` `)`, copying the whole run before it in a
26
+ // single mint; inside a quote it jumps straight to the closing quote, exactly
27
+ // like `LowercaseOutsideQuotesWorker` (`''` escapes exit+re-enter across two
28
+ // jumps; an unterminated quote at EOF copies the rest verbatim). The `Steps` cap
29
+ // counts JUMPS and yields `{ __c: [...] }` to the driver, so arbitrarily
30
+ // boundary-dense inputs still complete without a partial-output bail.
31
+ export type MarkTopLevelCommas<S extends string> =
32
+ string extends S
33
+ ? S
34
+ : MtcDrive<MtcWorker<S, [], false, false, "", []>>;
35
+
36
+ type MtcDrive<R> =
37
+ R extends { __c: [infer S extends string, infer D extends any[], infer Q1 extends boolean, infer Q2 extends boolean, infer Acc extends string] }
38
+ ? MtcDrive<MtcWorker<S, D, Q1, Q2, Acc, []>>
39
+ : R;
40
+
41
+ type MtcHasStruct<S extends string> =
42
+ S extends `${string}'${string}` ? true
43
+ : S extends `${string}"${string}` ? true
44
+ : S extends `${string}(${string}` ? true
45
+ : S extends `${string})${string}` ? true
46
+ : false;
47
+
48
+ type MtcWorker<
23
49
  S extends string,
24
- Depth extends any[] = [],
25
- InString extends boolean = false,
26
- Acc extends string = "",
27
- Steps extends any[] = [],
28
- InDString extends boolean = false
29
- > = string extends S
30
- ? S
31
- : Steps["length"] extends 1500
32
- ? `${Acc}${S}`
33
- : S extends `${infer C}${infer Rest}`
34
- ? InDString extends true
35
- ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], C extends `"` ? false : true>
36
- : C extends "'"
37
- ? MarkTopLevelCommas<Rest, Depth, InString extends true ? false : true, `${Acc}${C}`, [any, ...Steps], InDString>
38
- : InString extends true
39
- ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
40
- : C extends `"`
41
- ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], true>
42
- : C extends "("
43
- ? MarkTopLevelCommas<Rest, [any, ...Depth], InString, `${Acc}${C}`, [any, ...Steps], InDString>
44
- : C extends ")"
45
- ? MarkTopLevelCommas<Rest, Depth extends [any, ...infer D] ? D : [], InString, `${Acc}${C}`, [any, ...Steps], InDString>
46
- : C extends ","
47
- ? Depth["length"] extends 0
48
- ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc} ${CommaSep} `, [any, ...Steps], InDString>
49
- : MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
50
- : MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
51
- : Acc;
50
+ Depth extends any[],
51
+ InString extends boolean,
52
+ InDString extends boolean,
53
+ Acc extends string,
54
+ Steps extends any[]
55
+ > = Steps["length"] extends 450
56
+ ? { __c: [S, Depth, InString, InDString, Acc] }
57
+ : InString extends true
58
+ ? S extends `${infer P}'${infer R}`
59
+ ? MtcWorker<R, Depth, false, InDString, `${Acc}${P}'`, [any, ...Steps]>
60
+ : `${Acc}${S}`
61
+ : InDString extends true
62
+ ? S extends `${infer P}"${infer R}`
63
+ ? MtcWorker<R, Depth, InString, false, `${Acc}${P}"`, [any, ...Steps]>
64
+ : `${Acc}${S}`
65
+ : S extends `${infer P},${infer R}`
66
+ // a structural char in the run before the first comma → it is
67
+ // leftmost; defer to the struct jump
68
+ ? MtcHasStruct<P> extends true
69
+ ? MtcStructJump<S, Depth, Acc, Steps>
70
+ : Depth["length"] extends 0
71
+ ? MtcWorker<R, Depth, false, false, `${Acc}${P} ${CommaSep} `, [any, ...Steps]>
72
+ : MtcWorker<R, Depth, false, false, `${Acc}${P},`, [any, ...Steps]>
73
+ : MtcHasStruct<S> extends true
74
+ ? MtcStructJump<S, Depth, Acc, Steps>
75
+ : `${Acc}${S}`;
52
76
 
53
- // Token stream for the table/alias collectors: identical to `Tokenize` except
54
- // top-level commas survive as `CommaSep` tokens (so `from a, b` exposes its
55
- // source boundary). Used ONLY by `TablesInQuery` / `AliasesInQuery`.
77
+ // Leftmost of `'` / `"` / `(` / `)` (the caller guarantees at least one occurs
78
+ // before any comma). Pairwise narrowing: split on a candidate; if an
79
+ // earlier-class char appears in its prefix, that one is leftmost instead.
80
+ type MtcStructJump<
81
+ S extends string,
82
+ Depth extends any[],
83
+ Acc extends string,
84
+ Steps extends any[]
85
+ > = S extends `${infer P}'${infer R}`
86
+ ? P extends `${string}"${string}` | `${string}(${string}` | `${string})${string}`
87
+ ? MtcStructJump2<S, Depth, Acc, Steps>
88
+ : MtcWorker<R, Depth, true, false, `${Acc}${P}'`, [any, ...Steps]>
89
+ : MtcStructJump2<S, Depth, Acc, Steps>;
90
+
91
+ type MtcStructJump2<
92
+ S extends string,
93
+ Depth extends any[],
94
+ Acc extends string,
95
+ Steps extends any[]
96
+ > = S extends `${infer P}"${infer R}`
97
+ ? P extends `${string}(${string}` | `${string})${string}`
98
+ ? MtcStructJump3<S, Depth, Acc, Steps>
99
+ : MtcWorker<R, Depth, false, true, `${Acc}${P}"`, [any, ...Steps]>
100
+ : MtcStructJump3<S, Depth, Acc, Steps>;
101
+
102
+ type MtcStructJump3<
103
+ S extends string,
104
+ Depth extends any[],
105
+ Acc extends string,
106
+ Steps extends any[]
107
+ > = S extends `${infer P}(${infer R}`
108
+ ? P extends `${string})${string}`
109
+ ? S extends `${infer P2})${infer R2}`
110
+ ? MtcWorker<R2, Depth extends [any, ...infer D] ? D : [], false, false, `${Acc}${P2})`, [any, ...Steps]>
111
+ : `${Acc}${S}`
112
+ : MtcWorker<R, [any, ...Depth], false, false, `${Acc}${P}(`, [any, ...Steps]>
113
+ : S extends `${infer P2})${infer R2}`
114
+ ? MtcWorker<R2, Depth extends [any, ...infer D] ? D : [], false, false, `${Acc}${P2})`, [any, ...Steps]>
115
+ : `${Acc}${S}`;
116
+
117
+ // String view for the table/alias collectors: identical content to plain
118
+ // `Tokenize` input except top-level commas survive as `CommaSep` sentinels (so
119
+ // `from a, b` exposes its source boundary). The collectors walk this string
120
+ // DIRECTLY, word by word (the `Ct`/`Ca`/`Cn`/`Ta` scan walkers in tables.ts) —
121
+ // the former `SplitCollectorTokens` token-ARRAY build (and the
122
+ // collector-relevance filter that existed only to keep that array small) is
123
+ // gone: per the round-8/9 census, every array build/destructure step minted a
124
+ // unique-content tuple plus its apparent-`Array` types, while a word-jump
125
+ // string walk interns its substrings and counter tuples.
56
126
  //
57
127
  // Report-scale queries (multi-line, or very long) skip the comma-marking
58
- // char-walk and fall back to plain `Tokenize` — the same big-query light path
128
+ // char-walk and use the raw normalized string — the same big-query light path
59
129
  // `ValidateSQLNormalizedLightSelect` already takes. A comma cross-join in such a
60
130
  // query is negligibly rare, and avoiding the extra instantiation depth keeps the
61
131
  // largest analytics queries under the TS recursion limit.
62
- export type TokenizeTables<N extends string> =
132
+ export type CollectorScanView<N extends string> =
63
133
  HasLineBreaks<N> extends true
64
- ? Tokenize<N>
134
+ ? N
65
135
  : ExceedsLengthBudget<N> extends true
66
- ? Tokenize<N>
67
- : RestoreCleanFilterTokens<Split<MaybeMarkDQuotedSpaces<MarkTopLevelCommas<N>>, " ">>;
68
-
69
- export type TokenizeLoose<N extends string> =
70
- RestoreCleanLooseFilterTokens<
71
- Split<CollapseSpaces<RestoreWildcards<PadOperators<ProtectWildcards<MaybeMarkDQuotedSpaces<MaybeStripDQuotedPunct<N>>>>>>, " ">
72
- > extends infer Toks extends string[]
73
- ? N extends `${string}distinct ${string}`
74
- ? DropDistinctFrom<Toks>
75
- : Toks
76
- : [];
77
-
78
- // `IS [NOT] DISTINCT FROM` is a comparison operator: its `from` is operator text,
79
- // NOT a FROM clause / table-source boundary. The column ref-scanner skips a token
80
- // whose `Prev` is `from` (treating it as a table source), so the RHS expression of
81
- // the operator (`price IS DISTINCT FROM bogus_col`) escapes validation entirely
82
- // (round-13 D1/D2). Drop the operator `from` — the one directly preceded by
83
- // `distinct` from the token list so the RHS's `Prev` becomes `distinct`, which
84
- // `CanPrecedeColumn` already blesses, and the column is validated like any other.
85
- // `distinct` is immediately followed by the bare token `from` ONLY in this
86
- // operator, so the rewrite is unambiguous. The real FROM-clause `from` is untouched.
87
- export type DropDistinctFrom<
88
- Tokens extends string[],
89
- Acc extends string[] = [],
90
- Prev extends string = "",
91
- Steps extends any[] = []
92
- > = Steps["length"] extends 400
93
- ? [...Acc, ...Tokens]
94
- : Tokens extends [infer H extends string, ...infer R extends string[]]
95
- ? H extends "from"
96
- ? Prev extends "distinct"
97
- ? DropDistinctFrom<R, Acc, "from", [any, ...Steps]>
98
- : DropDistinctFrom<R, [...Acc, H], H, [any, ...Steps]>
99
- : DropDistinctFrom<R, [...Acc, H], H, [any, ...Steps]>
100
- : Acc;
136
+ ? N
137
+ : MaybeMarkDQuotedSpaces<MarkTopLevelCommas<N>>;
138
+
139
+ // The collector token for one raw word of `CollectorScanView`: sentinel-restored,
140
+ // then exactly the value the old split pushed (`TrimPunctuation<Trim<H>>`); `""`
141
+ // means the word is punctuation/whitespace-only and never occupied a token
142
+ // position (the old `CleanIdent<H> extends ""` empty-token filter — a non-empty
143
+ // `CleanIdent` guarantees a non-empty `TrimPunctuation<Trim<H>>`, so `""` is a
144
+ // safe drop sentinel). On the raw big-query path no sentinel can occur and the
145
+ // restore is a single failed template match.
146
+ export type CollectorToken<H extends string> =
147
+ ReplaceAll<H, DQuoteSpaceSentinel, " "> extends infer R extends string
148
+ ? CleanIdent<R> extends ""
149
+ ? ""
150
+ : TrimPunctuation<Trim<R>>
151
+ : never;
152
+
153
+ // The padded, space-collapsed string the column ref-scanners walk DIRECTLY
154
+ // the string→string prefix of the old `TokenizeLoose` pipeline. The split into a
155
+ // token ARRAY (and the separate `DropDistinctFrom` array pass) is gone: per round-8
156
+ // census, every token-array build/destructure step minted a unique-content tuple
157
+ // plus its apparent-`Array` types, while the word-jump string walks that replaced
158
+ // them (`QualifiedRefScan` / `UnqualifiedRefScan` in columns.ts) intern their
159
+ // substrings and counters. Token semantics (per-word `CleanLooseToken` transform,
160
+ // sentinel restore, empty-token drop, `IS [NOT] DISTINCT FROM` handling) are
161
+ // reproduced verbatim inside the scan walkers.
162
+ export type LooseScanView<N extends string> =
163
+ CollapseSpaces<RestoreWildcards<PadOperators<ProtectWildcards<MaybePadModulo<MaybeMarkDQuotedSpaces<MaybeStripDQuotedPunct<N>>>>>>>;
101
164
 
102
165
  // Operator/comma characters that `PadOperators` would split on. Inside a
103
166
  // double-quoted identifier (`"u,1"`) these are part of the identifier, not
@@ -105,7 +168,7 @@ export type DropDistinctFrom<
105
168
  // ref-scan and falsely rejects an otherwise valid query. We drop them from inside
106
169
  // double-quoted spans before padding so the identifier stays a single token.
107
170
  export type DQuotedPunct =
108
- "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "|" | "&" | "!" | "?";
171
+ "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "%" | "|" | "&" | "!" | "?";
109
172
 
110
173
  // Only pay for the char-walk when there is actually a double quote to handle —
111
174
  // the overwhelmingly common no-quote query short-circuits to identity.
@@ -115,24 +178,35 @@ export type MaybeStripDQuotedPunct<S extends string> =
115
178
  // Quote-aware walk that removes `DQuotedPunct` characters located INSIDE a
116
179
  // double-quoted span while leaving the quote characters and everything outside
117
180
  // the quotes untouched. `"u,1"` -> `"u1"`; `"u1".id` (no inner punctuation) is
118
- // unchanged. Step-bounded against runaway.
181
+ // unchanged.
182
+ //
183
+ // Span-jump, not per-char: nothing outside a double-quoted span changes, so each
184
+ // step jumps to the leftmost `"`, copies the whole preceding run in one mint,
185
+ // finds the closing `"` and rewrites only the (short) span interior. Like the
186
+ // old walk, single quotes are NOT tracked — every `"` toggles. An unterminated
187
+ // `"` at EOF keeps stripping to the end (the old InDQ-at-EOF behavior).
119
188
  export type StripDQuotedPunct<
120
189
  S extends string,
121
- InDQ extends boolean = false,
122
190
  Acc extends string = "",
123
191
  Steps extends any[] = []
124
192
  > = string extends S
125
193
  ? S
126
- : Steps["length"] extends 1500
194
+ : Steps["length"] extends 300
195
+ ? `${Acc}${S}`
196
+ : S extends `${infer P}"${infer R}`
197
+ ? R extends `${infer Span}"${infer R2}`
198
+ ? StripDQuotedPunct<R2, `${Acc}${P}"${StripPunctChars<Span>}"`, [any, ...Steps]>
199
+ : `${Acc}${P}"${StripPunctChars<R>}`
200
+ : `${Acc}${S}`;
201
+
202
+ // Per-char strip over a (short) double-quoted span interior only.
203
+ type StripPunctChars<S extends string, Acc extends string = "", Steps extends any[] = []> =
204
+ Steps["length"] extends 200
127
205
  ? `${Acc}${S}`
128
206
  : S extends `${infer C}${infer Rest}`
129
- ? C extends `"`
130
- ? StripDQuotedPunct<Rest, InDQ extends true ? false : true, `${Acc}${C}`, [any, ...Steps]>
131
- : InDQ extends true
132
- ? C extends DQuotedPunct
133
- ? StripDQuotedPunct<Rest, InDQ, Acc, [any, ...Steps]>
134
- : StripDQuotedPunct<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
135
- : StripDQuotedPunct<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
207
+ ? C extends DQuotedPunct
208
+ ? StripPunctChars<Rest, Acc, [any, ...Steps]>
209
+ : StripPunctChars<Rest, `${Acc}${C}`, [any, ...Steps]>
136
210
  : Acc;
137
211
 
138
212
  // Sentinel standing in for a SPACE located INSIDE a double-quoted identifier.
@@ -142,72 +216,33 @@ export type StripDQuotedPunct<
142
216
  // tokens. Marking the inner spaces keeps the identifier a single token through
143
217
  // the space-split; `RestoreDQuotedSpaces` turns each sentinel back into a real
144
218
  // space per-token before `CleanIdent`/`MapClean` runs. Mirrors `StripDQuotedPunct`.
145
- export type DQuoteSpaceSentinel = "__tsqldqsp__";
219
+ // 1-char control sentinel (was the 12-char `__tsqldqsp__`) — same neutrality
220
+ // argument as `CommaSep`.
221
+ export type DQuoteSpaceSentinel = "";
146
222
 
147
223
  // Only pay for the char-walk when there is actually a double quote present — the
148
224
  // overwhelmingly common no-quote query short-circuits to identity.
149
225
  export type MaybeMarkDQuotedSpaces<S extends string> =
150
226
  S extends `${string}"${string}` ? MarkDQuotedSpaces<S> : S;
151
227
 
228
+ // Span-jump sibling of `StripDQuotedPunct`: copy the run before the leftmost
229
+ // `"` in one mint, then mark the span interior's spaces via `ReplaceAll`
230
+ // (spans are short identifiers). Single quotes are NOT tracked — every `"`
231
+ // toggles, exactly like the old per-char walk; an unterminated `"` keeps
232
+ // marking to EOF.
152
233
  export type MarkDQuotedSpaces<
153
234
  S extends string,
154
- InDQ extends boolean = false,
155
235
  Acc extends string = "",
156
236
  Steps extends any[] = []
157
237
  > = string extends S
158
238
  ? S
159
- : Steps["length"] extends 1500
239
+ : Steps["length"] extends 300
160
240
  ? `${Acc}${S}`
161
- : S extends `${infer C}${infer Rest}`
162
- ? C extends `"`
163
- ? MarkDQuotedSpaces<Rest, InDQ extends true ? false : true, `${Acc}${C}`, [any, ...Steps]>
164
- : InDQ extends true
165
- ? C extends " "
166
- ? MarkDQuotedSpaces<Rest, InDQ, `${Acc}${DQuoteSpaceSentinel}`, [any, ...Steps]>
167
- : MarkDQuotedSpaces<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
168
- : MarkDQuotedSpaces<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
169
- : Acc;
170
-
171
- // Fused token post-passes: one walk instead of the old
172
- // `FilterEmpty<MapClean<RestoreDQuotedSpaces<…>>>` three-walk chain. Each pass was
173
- // an independent element-wise map/filter, so composing them per token yields the
174
- // identical list (ordering preserved) while building the result spine once.
175
- //
176
- // The DQuote-space sentinel restore (`ReplaceAll<H, DQuoteSpaceSentinel, " ">`) lets
177
- // a quoted identifier that survived the space-split as one token (`"Order ID"`,
178
- // `"user alias".id`) clean to its true value. `CleanFilterTokens` is the no-restore
179
- // variant (plain `Tokenize`, which never marks sentinels).
180
- //
181
- // MapClean maps each token to `CleanIdent<H> extends "" ? "" : TrimPunctuation<Trim<H>>`
182
- // and FilterEmpty drops the `""`s. Since `CleanIdent = Lowercase<Unquote<TrimPunctuation<
183
- // Trim<S>>>>`, a non-empty `CleanIdent<H>` guarantees a non-empty `TrimPunctuation<Trim<H>>`,
184
- // so the kept value is never empty — the empty-token filter collapses to the single
185
- // `CleanIdent<H> extends ""` test. (The loose variant keeps an explicit empty filter
186
- // because `CleanLooseToken` can return `""` for a non-operator empty ident.)
187
- export type CleanFilterTokens<Tokens extends string[], Acc extends string[] = []> =
188
- Tokens extends [infer H extends string, ...infer R extends string[]]
189
- ? CleanIdent<H> extends ""
190
- ? CleanFilterTokens<R, Acc>
191
- : CleanFilterTokens<R, [...Acc, TrimPunctuation<Trim<H>>]>
192
- : Acc;
193
-
194
- export type RestoreCleanFilterTokens<Tokens extends string[], Acc extends string[] = []> =
195
- Tokens extends [infer H0 extends string, ...infer R extends string[]]
196
- ? ReplaceAll<H0, DQuoteSpaceSentinel, " "> extends infer H extends string
197
- ? CleanIdent<H> extends ""
198
- ? RestoreCleanFilterTokens<R, Acc>
199
- : RestoreCleanFilterTokens<R, [...Acc, TrimPunctuation<Trim<H>>]>
200
- : never
201
- : Acc;
202
-
203
- export type RestoreCleanLooseFilterTokens<Tokens extends string[], Acc extends string[] = []> =
204
- Tokens extends [infer H0 extends string, ...infer R extends string[]]
205
- ? CleanLooseToken<ReplaceAll<H0, DQuoteSpaceSentinel, " ">> extends infer M extends string
206
- ? M extends ""
207
- ? RestoreCleanLooseFilterTokens<R, Acc>
208
- : RestoreCleanLooseFilterTokens<R, [...Acc, M]>
209
- : never
210
- : Acc;
241
+ : S extends `${infer P}"${infer R}`
242
+ ? R extends `${infer Span}"${infer R2}`
243
+ ? MarkDQuotedSpaces<R2, `${Acc}${P}"${ReplaceAll<Span, " ", DQuoteSpaceSentinel>}"`, [any, ...Steps]>
244
+ : `${Acc}${P}"${ReplaceAll<R, " ", DQuoteSpaceSentinel>}`
245
+ : `${Acc}${S}`;
211
246
 
212
247
  // A validation-only view of a query: blank the CONTENTS of every single-quoted
213
248
  // string literal (`'anything'` -> `''`) and mask the interior spaces of every
@@ -224,7 +259,7 @@ export type RestoreCleanLooseFilterTokens<Tokens extends string[], Acc extends s
224
259
  // Blanking the literal removes both problems at once (round-12 S1–S5). Masking
225
260
  // double-quoted spaces stops the same markers matching inside a quoted output
226
261
  // alias (round-12 A1) while leaving the identifier intact for ref validation
227
- // (`TokenizeLoose` restores the sentinel). The caller gates this behind a quote
262
+ // (the ref-scan walkers restore the sentinel). The caller gates this behind a quote
228
263
  // and within-budget pre-check so report-scale queries never run the walk.
229
264
  export type ValidationScanView<S extends string> =
230
265
  S extends `${string}'${string}`
@@ -253,7 +288,7 @@ export type BlankSingleQuotedLiterals<
253
288
  : `${Acc}${S}`;
254
289
 
255
290
  export type OperatorToken =
256
- | "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "|" | "&" | "!" | "?"
291
+ | "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "%" | "|" | "&" | "!" | "?"
257
292
  // `~` / `!~` are PostgreSQL regex-match operators; `[` / `]` delimit array
258
293
  // literals/subscripts. Treating them as operators makes `CanPrecedeColumn`
259
294
  // bless the RHS expression so a column ref there is validated (e.g.
@@ -264,11 +299,54 @@ export type OperatorToken =
264
299
  export type PadOperator<S extends string, Op extends string> =
265
300
  ReplaceAll<S, Op, ` ${Op} `>;
266
301
 
302
+ // `.` control sentinel (was `.__wildcard__`) keeps the qualified `.*`
303
+ // out of `PadOperators`' `*` padding; `` itself is never padded.
267
304
  export type ProtectWildcards<S extends string> =
268
- ReplaceAll<S, ".*", ".__wildcard__">;
305
+ ReplaceAll<S, ".*", ".">;
269
306
 
270
307
  export type RestoreWildcards<S extends string> =
271
- ReplaceAll<S, ".__wildcard__", ".*">;
308
+ ReplaceAll<S, ".", ".*">;
309
+
310
+ // `%` is the modulo operator, but it is also the single most common character
311
+ // inside LIKE/ILIKE pattern literals (`'%smith%'`). The plain `PadOperator`
312
+ // chain pads EVERYWHERE — acceptable for the operators above because the
313
+ // validation path blanks string literals first on small queries — but
314
+ // `LooseScanView` also runs on NON-neutralized inputs (multi-line /
315
+ // over-budget queries skip `ValidationScanView`), where padding inside a
316
+ // literal would leak its words as blessed column candidates
317
+ // (`'%smith%'` -> `' % smith % '` -> `smith` validated -> false reject).
318
+ // So `%` gets its own quote-aware pad: literal interiors are copied
319
+ // verbatim, `%` is padded only between them. `%`-free strings (the
320
+ // overwhelming majority) short-circuit on a single pattern match.
321
+ export type MaybePadModulo<S extends string> =
322
+ S extends `${string}%${string}`
323
+ ? S extends `${string}'${string}`
324
+ ? PadModuloQuoteAware<S>
325
+ : PadOperator<S, "%">
326
+ : S;
327
+
328
+ // Pairwise span-jump (same shape as `BlankSingleQuotedLiterals`): hop to the
329
+ // leftmost `'`, pad the run BEFORE it, copy the `'…'` span verbatim, recurse
330
+ // on the tail. The `''` SQL escape pairs leftmost exactly like the blanking
331
+ // walk. An UNTERMINATED opener pads the run before it, then copies the
332
+ // literal tail verbatim (lenient: no padding inside what is textually a
333
+ // string literal). Depth is the NUMBER OF
334
+ // LITERALS, not string length; the step cap is a runaway backstop only — on
335
+ // cap the remainder passes through UNPADDED (pre-round behavior, so a cap
336
+ // hit can never cause a new rejection).
337
+ type PadModuloQuoteAware<
338
+ S extends string,
339
+ Acc extends string = "",
340
+ Steps extends any[] = []
341
+ > = string extends S
342
+ ? S
343
+ : Steps["length"] extends 300
344
+ ? `${Acc}${S}`
345
+ : S extends `${infer Pre}'${infer Rest}`
346
+ ? Rest extends `${infer Lit}'${infer After}`
347
+ ? PadModuloQuoteAware<After, `${Acc}${PadOperator<Pre, "%">}'${Lit}'`, [any, ...Steps]>
348
+ : `${Acc}${PadOperator<Pre, "%">}'${Rest}`
349
+ : `${Acc}${PadOperator<S, "%">}`;
272
350
 
273
351
  export type PadOperators<S extends string> =
274
352
  PadOperator<
package/src/partial.ts CHANGED
@@ -16,11 +16,10 @@ import type {
16
16
  NormalizeQuery,
17
17
  SplitOnDotClean,
18
18
  SplitTopLevel,
19
- TokenizeLoose,
20
19
  Trim
21
20
  } from "./parsing.js";
22
21
  import type {
23
- QualifiedColumnRefs,
22
+ QualifiedRefScan,
24
23
  ResolveAlias,
25
24
  StripDoubleQuotes,
26
25
  TableKeysByName
@@ -76,13 +75,13 @@ export type ColumnRefValidPartialWith<
76
75
  : true
77
76
  : true;
78
77
 
79
- // Validate every qualified column ref in a token list, partial-mode.
78
+ // Validate every qualified column ref in a fragment, partial-mode.
80
79
  export type QualifiedColumnRefsValidPartialFor<
81
80
  S extends DatabaseSchema,
82
81
  Tables extends string,
83
82
  Aliases extends string,
84
- LooseTokens extends string[]
85
- > = QualifiedColumnRefs<LooseTokens, S, Tables, Aliases> extends infer Cols
83
+ RefSeg extends string
84
+ > = QualifiedRefScan<RefSeg> extends infer Cols
86
85
  ? AllTrue<Cols extends string ? ColumnRefValidPartialWith<Cols, Tables, Aliases, S> : true>
87
86
  : true;
88
87
 
@@ -97,9 +96,7 @@ export type ValidateTableSourcePart<N extends string, S extends DatabaseSchema>
97
96
  TablesInQuery<N, S> extends infer Tables extends string
98
97
  ? AliasesInQuery<N, S> extends infer Aliases extends string
99
98
  ? AllPartTablesValid<Tables, S> extends true
100
- ? TokenizeLoose<N> extends infer Toks extends string[]
101
- ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, Toks>
102
- : true
99
+ ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, N>
103
100
  : false
104
101
  : true
105
102
  : true;
@@ -130,9 +127,7 @@ export type ValidateClausePart<Part extends string, S extends DatabaseSchema> =
130
127
  string extends Part
131
128
  ? false
132
129
  : NormalizeQuery<Part> extends infer N extends string
133
- ? TokenizeLoose<N> extends infer Toks extends string[]
134
- ? QualifiedColumnRefsValidPartialFor<S, never, never, Toks>
135
- : true
130
+ ? QualifiedColumnRefsValidPartialFor<S, never, never, N>
136
131
  : false;
137
132
 
138
133
  // Scope-aware clause validation: identical to ValidateClausePart, but the
@@ -147,13 +142,12 @@ export type ValidateClausePartScoped<
147
142
  string extends Part
148
143
  ? false
149
144
  : NormalizeQuery<Part> extends infer N extends string
150
- ? TokenizeLoose<N> extends infer Toks extends string[]
151
- ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, Toks>
152
- : true
145
+ ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, N>
153
146
  : false;
154
147
 
155
148
  // Expression-detector for a single SELECT-item token. HasSpecial covers space,
156
- // parens, arithmetic/comparison operators, comma, `::`, `||`. We additionally
149
+ // parens, arithmetic operators (+ - * / %), comparison operators, comma,
150
+ // `::`, `||`. We additionally
157
151
  // reject `[ ] " ' :` so array-indexing, quoted-with-space idents, json arrows,
158
152
  // and param/cast colons are treated as expressions (skipped, never falsely
159
153
  // rejected). A token clearing this guard is a plain identifier piece.