@kuindji/typed-sql 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -3
- package/dist/columns.d.ts +11 -3
- package/dist/columns.d.ts.map +1 -1
- package/dist/expressions.d.ts +73 -8
- package/dist/expressions.d.ts.map +1 -1
- package/dist/parsing/extract.d.ts +13 -9
- package/dist/parsing/extract.d.ts.map +1 -1
- package/dist/parsing/normalize.d.ts +3 -1
- package/dist/parsing/normalize.d.ts.map +1 -1
- package/dist/parsing/pg-literals.d.ts +10 -2
- package/dist/parsing/pg-literals.d.ts.map +1 -1
- package/dist/parsing/split.d.ts +27 -3
- package/dist/parsing/split.d.ts.map +1 -1
- package/dist/parsing/string-utils.d.ts +2 -4
- package/dist/parsing/string-utils.d.ts.map +1 -1
- package/dist/parsing/tokenize.d.ts +27 -17
- package/dist/parsing/tokenize.d.ts.map +1 -1
- package/dist/partial.d.ts +6 -6
- package/dist/partial.d.ts.map +1 -1
- package/dist/tables.d.ts +58 -13
- package/dist/tables.d.ts.map +1 -1
- package/dist/validation/dispatch.d.ts +7 -5
- package/dist/validation/dispatch.d.ts.map +1 -1
- package/dist/validation/joins.d.ts +3 -3
- package/dist/validation/joins.d.ts.map +1 -1
- package/dist/validation/validate-columns.d.ts +14 -14
- package/dist/validation/validate-columns.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/columns.ts +168 -32
- package/src/expressions.ts +550 -57
- package/src/parsing/extract.ts +72 -32
- package/src/parsing/normalize.ts +54 -8
- package/src/parsing/pg-literals.ts +32 -10
- package/src/parsing/split.ts +236 -72
- package/src/parsing/string-utils.ts +15 -15
- package/src/parsing/tokenize.ts +224 -146
- package/src/partial.ts +9 -15
- package/src/tables.ts +546 -214
- package/src/validation/dispatch.ts +58 -52
- package/src/validation/joins.ts +15 -19
- package/src/validation/validate-columns.ts +54 -64
|
@@ -69,6 +69,7 @@ export type HasSpecial<S extends string> =
|
|
|
69
69
|
S extends `${string}-${string}` ? true :
|
|
70
70
|
S extends `${string}*${string}` ? true :
|
|
71
71
|
S extends `${string}/${string}` ? true :
|
|
72
|
+
S extends `${string}%${string}` ? true :
|
|
72
73
|
S extends `${string}=${string}` ? true :
|
|
73
74
|
S extends `${string}<${string}` ? true :
|
|
74
75
|
S extends `${string}>${string}` ? true :
|
|
@@ -136,17 +137,21 @@ export type SplitLast<S extends string, Delim extends string> =
|
|
|
136
137
|
: [Head, Tail]
|
|
137
138
|
: [S, ""];
|
|
138
139
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
140
|
+
// Direct template-match split of a (≤3-part) dotted ref into cleaned segments.
|
|
141
|
+
// Replaces the old recursive `SplitOnDot` array build (its `[S]` base case and
|
|
142
|
+
// `[A, ...rest]` prepend minted tuples per qualified ref, and the 1/2/3-arm
|
|
143
|
+
// dispatch re-matched the built array three times). `${infer A}.${infer R}`
|
|
144
|
+
// binds the LEFTMOST dot, so A is the first segment exactly as before; a 4th
|
|
145
|
+
// segment (a dot remaining after the third split) yields `[]`, matching the old
|
|
146
|
+
// "no arm matches a 4+-tuple" fall-through.
|
|
142
147
|
export type SplitOnDotClean<S extends string> =
|
|
143
|
-
|
|
144
|
-
?
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
S extends `${infer A}.${infer R}`
|
|
149
|
+
? R extends `${infer B}.${infer R2}`
|
|
150
|
+
? R2 extends `${string}.${string}`
|
|
151
|
+
? []
|
|
152
|
+
: [CleanIdent<A>, CleanIdent<B>, CleanIdent<R2>]
|
|
153
|
+
: [CleanIdent<A>, CleanIdent<R>]
|
|
154
|
+
: [CleanIdent<S>];
|
|
150
155
|
|
|
151
156
|
export type MapClean<Tokens extends string[], Acc extends string[] = []> =
|
|
152
157
|
Tokens extends [infer H extends string, ...infer R extends string[]]
|
|
@@ -154,11 +159,6 @@ export type MapClean<Tokens extends string[], Acc extends string[] = []> =
|
|
|
154
159
|
: Acc;
|
|
155
160
|
|
|
156
161
|
|
|
157
|
-
export type MapCleanLoose<Tokens extends string[], Acc extends string[] = []> =
|
|
158
|
-
Tokens extends [infer H extends string, ...infer R extends string[]]
|
|
159
|
-
? MapCleanLoose<R, [...Acc, CleanLooseToken<H>]>
|
|
160
|
-
: Acc;
|
|
161
|
-
|
|
162
162
|
export type CleanLooseToken<S extends string> =
|
|
163
163
|
S extends OperatorToken
|
|
164
164
|
? S
|
package/src/parsing/tokenize.ts
CHANGED
|
@@ -1,103 +1,166 @@
|
|
|
1
1
|
// Tokenization, sentinels, operators, and SQL keyword sets.
|
|
2
|
-
import type { CleanIdent,
|
|
2
|
+
import type { CleanIdent, CollapseSpaces, ReplaceAll, Trim, TrimPunctuation } from "./string-utils.js";
|
|
3
3
|
import type { ExceedsLengthBudget, HasLineBreaks } from "./normalize.js";
|
|
4
4
|
// Tokenization & parsing helpers
|
|
5
5
|
|
|
6
|
-
export type Tokenize<N extends string> = CleanFilterTokens<Split<N, " ">>;
|
|
7
|
-
|
|
8
6
|
// Sentinel token standing in for a TOP-LEVEL comma. It survives `MapClean`
|
|
9
7
|
// (no stripped punctuation, non-empty identifier) whereas a bare `,` does not,
|
|
10
8
|
// so it cleanly distinguishes a FROM-source separator from a comma nested in
|
|
11
9
|
// parens / a string literal — which must still be dropped as before.
|
|
12
|
-
|
|
10
|
+
// A control char unrepresentable in real SQL: 1 char instead of the old
|
|
11
|
+
// 13-char `__tsqlcomma__`, so every marked query string and every token list
|
|
12
|
+
// it flows through interns ~14 fewer chars per top-level comma. Neutral to
|
|
13
|
+
// the pipeline: not in `Punct`/`Whitespace`/`DQuotedPunct`/`OperatorToken`,
|
|
14
|
+
// and `Lowercase`/`CleanIdent` leave it intact.
|
|
15
|
+
export type CommaSep = "";
|
|
13
16
|
|
|
14
17
|
// Replace only TOP-LEVEL commas (paren depth 0, outside single OR double quotes)
|
|
15
18
|
// with the `CommaSep` sentinel (space-padded so it tokenizes on its own). Commas
|
|
16
19
|
// nested inside parens (`count(a, b)`, FROM subqueries, `insert (x, y)`, value
|
|
17
20
|
// tuples), string literals, or quoted identifiers (`users as "u,1"`) are left
|
|
18
|
-
// verbatim and get stripped by `MapClean` as today.
|
|
19
|
-
//
|
|
20
|
-
//
|
|
21
|
-
//
|
|
22
|
-
|
|
21
|
+
// verbatim and get stripped by `MapClean` as today.
|
|
22
|
+
//
|
|
23
|
+
// Segment-jump, not per-char (the old walk minted one growing-`Acc` string PER
|
|
24
|
+
// CHARACTER on every under-budget query). Each step advances to the LEFTMOST of
|
|
25
|
+
// the five state chars `,` `'` `"` `(` `)`, copying the whole run before it in a
|
|
26
|
+
// single mint; inside a quote it jumps straight to the closing quote, exactly
|
|
27
|
+
// like `LowercaseOutsideQuotesWorker` (`''` escapes exit+re-enter across two
|
|
28
|
+
// jumps; an unterminated quote at EOF copies the rest verbatim). The `Steps` cap
|
|
29
|
+
// counts JUMPS and yields `{ __c: [...] }` to the driver, so arbitrarily
|
|
30
|
+
// boundary-dense inputs still complete without a partial-output bail.
|
|
31
|
+
export type MarkTopLevelCommas<S extends string> =
|
|
32
|
+
string extends S
|
|
33
|
+
? S
|
|
34
|
+
: MtcDrive<MtcWorker<S, [], false, false, "", []>>;
|
|
35
|
+
|
|
36
|
+
type MtcDrive<R> =
|
|
37
|
+
R extends { __c: [infer S extends string, infer D extends any[], infer Q1 extends boolean, infer Q2 extends boolean, infer Acc extends string] }
|
|
38
|
+
? MtcDrive<MtcWorker<S, D, Q1, Q2, Acc, []>>
|
|
39
|
+
: R;
|
|
40
|
+
|
|
41
|
+
type MtcHasStruct<S extends string> =
|
|
42
|
+
S extends `${string}'${string}` ? true
|
|
43
|
+
: S extends `${string}"${string}` ? true
|
|
44
|
+
: S extends `${string}(${string}` ? true
|
|
45
|
+
: S extends `${string})${string}` ? true
|
|
46
|
+
: false;
|
|
47
|
+
|
|
48
|
+
type MtcWorker<
|
|
23
49
|
S extends string,
|
|
24
|
-
Depth extends any[]
|
|
25
|
-
InString extends boolean
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
> =
|
|
30
|
-
? S
|
|
31
|
-
:
|
|
32
|
-
? `${
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
: MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
|
|
51
|
-
: Acc;
|
|
50
|
+
Depth extends any[],
|
|
51
|
+
InString extends boolean,
|
|
52
|
+
InDString extends boolean,
|
|
53
|
+
Acc extends string,
|
|
54
|
+
Steps extends any[]
|
|
55
|
+
> = Steps["length"] extends 450
|
|
56
|
+
? { __c: [S, Depth, InString, InDString, Acc] }
|
|
57
|
+
: InString extends true
|
|
58
|
+
? S extends `${infer P}'${infer R}`
|
|
59
|
+
? MtcWorker<R, Depth, false, InDString, `${Acc}${P}'`, [any, ...Steps]>
|
|
60
|
+
: `${Acc}${S}`
|
|
61
|
+
: InDString extends true
|
|
62
|
+
? S extends `${infer P}"${infer R}`
|
|
63
|
+
? MtcWorker<R, Depth, InString, false, `${Acc}${P}"`, [any, ...Steps]>
|
|
64
|
+
: `${Acc}${S}`
|
|
65
|
+
: S extends `${infer P},${infer R}`
|
|
66
|
+
// a structural char in the run before the first comma → it is
|
|
67
|
+
// leftmost; defer to the struct jump
|
|
68
|
+
? MtcHasStruct<P> extends true
|
|
69
|
+
? MtcStructJump<S, Depth, Acc, Steps>
|
|
70
|
+
: Depth["length"] extends 0
|
|
71
|
+
? MtcWorker<R, Depth, false, false, `${Acc}${P} ${CommaSep} `, [any, ...Steps]>
|
|
72
|
+
: MtcWorker<R, Depth, false, false, `${Acc}${P},`, [any, ...Steps]>
|
|
73
|
+
: MtcHasStruct<S> extends true
|
|
74
|
+
? MtcStructJump<S, Depth, Acc, Steps>
|
|
75
|
+
: `${Acc}${S}`;
|
|
52
76
|
|
|
53
|
-
//
|
|
54
|
-
//
|
|
55
|
-
//
|
|
77
|
+
// Leftmost of `'` / `"` / `(` / `)` (the caller guarantees at least one occurs
|
|
78
|
+
// before any comma). Pairwise narrowing: split on a candidate; if an
|
|
79
|
+
// earlier-class char appears in its prefix, that one is leftmost instead.
|
|
80
|
+
type MtcStructJump<
|
|
81
|
+
S extends string,
|
|
82
|
+
Depth extends any[],
|
|
83
|
+
Acc extends string,
|
|
84
|
+
Steps extends any[]
|
|
85
|
+
> = S extends `${infer P}'${infer R}`
|
|
86
|
+
? P extends `${string}"${string}` | `${string}(${string}` | `${string})${string}`
|
|
87
|
+
? MtcStructJump2<S, Depth, Acc, Steps>
|
|
88
|
+
: MtcWorker<R, Depth, true, false, `${Acc}${P}'`, [any, ...Steps]>
|
|
89
|
+
: MtcStructJump2<S, Depth, Acc, Steps>;
|
|
90
|
+
|
|
91
|
+
type MtcStructJump2<
|
|
92
|
+
S extends string,
|
|
93
|
+
Depth extends any[],
|
|
94
|
+
Acc extends string,
|
|
95
|
+
Steps extends any[]
|
|
96
|
+
> = S extends `${infer P}"${infer R}`
|
|
97
|
+
? P extends `${string}(${string}` | `${string})${string}`
|
|
98
|
+
? MtcStructJump3<S, Depth, Acc, Steps>
|
|
99
|
+
: MtcWorker<R, Depth, false, true, `${Acc}${P}"`, [any, ...Steps]>
|
|
100
|
+
: MtcStructJump3<S, Depth, Acc, Steps>;
|
|
101
|
+
|
|
102
|
+
type MtcStructJump3<
|
|
103
|
+
S extends string,
|
|
104
|
+
Depth extends any[],
|
|
105
|
+
Acc extends string,
|
|
106
|
+
Steps extends any[]
|
|
107
|
+
> = S extends `${infer P}(${infer R}`
|
|
108
|
+
? P extends `${string})${string}`
|
|
109
|
+
? S extends `${infer P2})${infer R2}`
|
|
110
|
+
? MtcWorker<R2, Depth extends [any, ...infer D] ? D : [], false, false, `${Acc}${P2})`, [any, ...Steps]>
|
|
111
|
+
: `${Acc}${S}`
|
|
112
|
+
: MtcWorker<R, [any, ...Depth], false, false, `${Acc}${P}(`, [any, ...Steps]>
|
|
113
|
+
: S extends `${infer P2})${infer R2}`
|
|
114
|
+
? MtcWorker<R2, Depth extends [any, ...infer D] ? D : [], false, false, `${Acc}${P2})`, [any, ...Steps]>
|
|
115
|
+
: `${Acc}${S}`;
|
|
116
|
+
|
|
117
|
+
// String view for the table/alias collectors: identical content to plain
|
|
118
|
+
// `Tokenize` input except top-level commas survive as `CommaSep` sentinels (so
|
|
119
|
+
// `from a, b` exposes its source boundary). The collectors walk this string
|
|
120
|
+
// DIRECTLY, word by word (the `Ct`/`Ca`/`Cn`/`Ta` scan walkers in tables.ts) —
|
|
121
|
+
// the former `SplitCollectorTokens` token-ARRAY build (and the
|
|
122
|
+
// collector-relevance filter that existed only to keep that array small) is
|
|
123
|
+
// gone: per the round-8/9 census, every array build/destructure step minted a
|
|
124
|
+
// unique-content tuple plus its apparent-`Array` types, while a word-jump
|
|
125
|
+
// string walk interns its substrings and counter tuples.
|
|
56
126
|
//
|
|
57
127
|
// Report-scale queries (multi-line, or very long) skip the comma-marking
|
|
58
|
-
// char-walk and
|
|
128
|
+
// char-walk and use the raw normalized string — the same big-query light path
|
|
59
129
|
// `ValidateSQLNormalizedLightSelect` already takes. A comma cross-join in such a
|
|
60
130
|
// query is negligibly rare, and avoiding the extra instantiation depth keeps the
|
|
61
131
|
// largest analytics queries under the TS recursion limit.
|
|
62
|
-
export type
|
|
132
|
+
export type CollectorScanView<N extends string> =
|
|
63
133
|
HasLineBreaks<N> extends true
|
|
64
|
-
?
|
|
134
|
+
? N
|
|
65
135
|
: ExceedsLengthBudget<N> extends true
|
|
66
|
-
?
|
|
67
|
-
:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
//
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
//
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
: Tokens extends [infer H extends string, ...infer R extends string[]]
|
|
95
|
-
? H extends "from"
|
|
96
|
-
? Prev extends "distinct"
|
|
97
|
-
? DropDistinctFrom<R, Acc, "from", [any, ...Steps]>
|
|
98
|
-
: DropDistinctFrom<R, [...Acc, H], H, [any, ...Steps]>
|
|
99
|
-
: DropDistinctFrom<R, [...Acc, H], H, [any, ...Steps]>
|
|
100
|
-
: Acc;
|
|
136
|
+
? N
|
|
137
|
+
: MaybeMarkDQuotedSpaces<MarkTopLevelCommas<N>>;
|
|
138
|
+
|
|
139
|
+
// The collector token for one raw word of `CollectorScanView`: sentinel-restored,
|
|
140
|
+
// then exactly the value the old split pushed (`TrimPunctuation<Trim<H>>`); `""`
|
|
141
|
+
// means the word is punctuation/whitespace-only and never occupied a token
|
|
142
|
+
// position (the old `CleanIdent<H> extends ""` empty-token filter — a non-empty
|
|
143
|
+
// `CleanIdent` guarantees a non-empty `TrimPunctuation<Trim<H>>`, so `""` is a
|
|
144
|
+
// safe drop sentinel). On the raw big-query path no sentinel can occur and the
|
|
145
|
+
// restore is a single failed template match.
|
|
146
|
+
export type CollectorToken<H extends string> =
|
|
147
|
+
ReplaceAll<H, DQuoteSpaceSentinel, " "> extends infer R extends string
|
|
148
|
+
? CleanIdent<R> extends ""
|
|
149
|
+
? ""
|
|
150
|
+
: TrimPunctuation<Trim<R>>
|
|
151
|
+
: never;
|
|
152
|
+
|
|
153
|
+
// The padded, space-collapsed string the column ref-scanners walk DIRECTLY —
|
|
154
|
+
// the string→string prefix of the old `TokenizeLoose` pipeline. The split into a
|
|
155
|
+
// token ARRAY (and the separate `DropDistinctFrom` array pass) is gone: per round-8
|
|
156
|
+
// census, every token-array build/destructure step minted a unique-content tuple
|
|
157
|
+
// plus its apparent-`Array` types, while the word-jump string walks that replaced
|
|
158
|
+
// them (`QualifiedRefScan` / `UnqualifiedRefScan` in columns.ts) intern their
|
|
159
|
+
// substrings and counters. Token semantics (per-word `CleanLooseToken` transform,
|
|
160
|
+
// sentinel restore, empty-token drop, `IS [NOT] DISTINCT FROM` handling) are
|
|
161
|
+
// reproduced verbatim inside the scan walkers.
|
|
162
|
+
export type LooseScanView<N extends string> =
|
|
163
|
+
CollapseSpaces<RestoreWildcards<PadOperators<ProtectWildcards<MaybePadModulo<MaybeMarkDQuotedSpaces<MaybeStripDQuotedPunct<N>>>>>>>;
|
|
101
164
|
|
|
102
165
|
// Operator/comma characters that `PadOperators` would split on. Inside a
|
|
103
166
|
// double-quoted identifier (`"u,1"`) these are part of the identifier, not
|
|
@@ -105,7 +168,7 @@ export type DropDistinctFrom<
|
|
|
105
168
|
// ref-scan and falsely rejects an otherwise valid query. We drop them from inside
|
|
106
169
|
// double-quoted spans before padding so the identifier stays a single token.
|
|
107
170
|
export type DQuotedPunct =
|
|
108
|
-
"(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "|" | "&" | "!" | "?";
|
|
171
|
+
"(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "%" | "|" | "&" | "!" | "?";
|
|
109
172
|
|
|
110
173
|
// Only pay for the char-walk when there is actually a double quote to handle —
|
|
111
174
|
// the overwhelmingly common no-quote query short-circuits to identity.
|
|
@@ -115,24 +178,35 @@ export type MaybeStripDQuotedPunct<S extends string> =
|
|
|
115
178
|
// Quote-aware walk that removes `DQuotedPunct` characters located INSIDE a
|
|
116
179
|
// double-quoted span while leaving the quote characters and everything outside
|
|
117
180
|
// the quotes untouched. `"u,1"` -> `"u1"`; `"u1".id` (no inner punctuation) is
|
|
118
|
-
// unchanged.
|
|
181
|
+
// unchanged.
|
|
182
|
+
//
|
|
183
|
+
// Span-jump, not per-char: nothing outside a double-quoted span changes, so each
|
|
184
|
+
// step jumps to the leftmost `"`, copies the whole preceding run in one mint,
|
|
185
|
+
// finds the closing `"` and rewrites only the (short) span interior. Like the
|
|
186
|
+
// old walk, single quotes are NOT tracked — every `"` toggles. An unterminated
|
|
187
|
+
// `"` at EOF keeps stripping to the end (the old InDQ-at-EOF behavior).
|
|
119
188
|
export type StripDQuotedPunct<
|
|
120
189
|
S extends string,
|
|
121
|
-
InDQ extends boolean = false,
|
|
122
190
|
Acc extends string = "",
|
|
123
191
|
Steps extends any[] = []
|
|
124
192
|
> = string extends S
|
|
125
193
|
? S
|
|
126
|
-
: Steps["length"] extends
|
|
194
|
+
: Steps["length"] extends 300
|
|
195
|
+
? `${Acc}${S}`
|
|
196
|
+
: S extends `${infer P}"${infer R}`
|
|
197
|
+
? R extends `${infer Span}"${infer R2}`
|
|
198
|
+
? StripDQuotedPunct<R2, `${Acc}${P}"${StripPunctChars<Span>}"`, [any, ...Steps]>
|
|
199
|
+
: `${Acc}${P}"${StripPunctChars<R>}`
|
|
200
|
+
: `${Acc}${S}`;
|
|
201
|
+
|
|
202
|
+
// Per-char strip over a (short) double-quoted span interior only.
|
|
203
|
+
type StripPunctChars<S extends string, Acc extends string = "", Steps extends any[] = []> =
|
|
204
|
+
Steps["length"] extends 200
|
|
127
205
|
? `${Acc}${S}`
|
|
128
206
|
: S extends `${infer C}${infer Rest}`
|
|
129
|
-
? C extends
|
|
130
|
-
?
|
|
131
|
-
:
|
|
132
|
-
? C extends DQuotedPunct
|
|
133
|
-
? StripDQuotedPunct<Rest, InDQ, Acc, [any, ...Steps]>
|
|
134
|
-
: StripDQuotedPunct<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
|
|
135
|
-
: StripDQuotedPunct<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
|
|
207
|
+
? C extends DQuotedPunct
|
|
208
|
+
? StripPunctChars<Rest, Acc, [any, ...Steps]>
|
|
209
|
+
: StripPunctChars<Rest, `${Acc}${C}`, [any, ...Steps]>
|
|
136
210
|
: Acc;
|
|
137
211
|
|
|
138
212
|
// Sentinel standing in for a SPACE located INSIDE a double-quoted identifier.
|
|
@@ -142,72 +216,33 @@ export type StripDQuotedPunct<
|
|
|
142
216
|
// tokens. Marking the inner spaces keeps the identifier a single token through
|
|
143
217
|
// the space-split; `RestoreDQuotedSpaces` turns each sentinel back into a real
|
|
144
218
|
// space per-token before `CleanIdent`/`MapClean` runs. Mirrors `StripDQuotedPunct`.
|
|
145
|
-
|
|
219
|
+
// 1-char control sentinel (was the 12-char `__tsqldqsp__`) — same neutrality
|
|
220
|
+
// argument as `CommaSep`.
|
|
221
|
+
export type DQuoteSpaceSentinel = "";
|
|
146
222
|
|
|
147
223
|
// Only pay for the char-walk when there is actually a double quote present — the
|
|
148
224
|
// overwhelmingly common no-quote query short-circuits to identity.
|
|
149
225
|
export type MaybeMarkDQuotedSpaces<S extends string> =
|
|
150
226
|
S extends `${string}"${string}` ? MarkDQuotedSpaces<S> : S;
|
|
151
227
|
|
|
228
|
+
// Span-jump sibling of `StripDQuotedPunct`: copy the run before the leftmost
|
|
229
|
+
// `"` in one mint, then mark the span interior's spaces via `ReplaceAll`
|
|
230
|
+
// (spans are short identifiers). Single quotes are NOT tracked — every `"`
|
|
231
|
+
// toggles, exactly like the old per-char walk; an unterminated `"` keeps
|
|
232
|
+
// marking to EOF.
|
|
152
233
|
export type MarkDQuotedSpaces<
|
|
153
234
|
S extends string,
|
|
154
|
-
InDQ extends boolean = false,
|
|
155
235
|
Acc extends string = "",
|
|
156
236
|
Steps extends any[] = []
|
|
157
237
|
> = string extends S
|
|
158
238
|
? S
|
|
159
|
-
: Steps["length"] extends
|
|
239
|
+
: Steps["length"] extends 300
|
|
160
240
|
? `${Acc}${S}`
|
|
161
|
-
: S extends `${infer
|
|
162
|
-
?
|
|
163
|
-
? MarkDQuotedSpaces<
|
|
164
|
-
:
|
|
165
|
-
|
|
166
|
-
? MarkDQuotedSpaces<Rest, InDQ, `${Acc}${DQuoteSpaceSentinel}`, [any, ...Steps]>
|
|
167
|
-
: MarkDQuotedSpaces<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
|
|
168
|
-
: MarkDQuotedSpaces<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
|
|
169
|
-
: Acc;
|
|
170
|
-
|
|
171
|
-
// Fused token post-passes: one walk instead of the old
|
|
172
|
-
// `FilterEmpty<MapClean<RestoreDQuotedSpaces<…>>>` three-walk chain. Each pass was
|
|
173
|
-
// an independent element-wise map/filter, so composing them per token yields the
|
|
174
|
-
// identical list (ordering preserved) while building the result spine once.
|
|
175
|
-
//
|
|
176
|
-
// The DQuote-space sentinel restore (`ReplaceAll<H, DQuoteSpaceSentinel, " ">`) lets
|
|
177
|
-
// a quoted identifier that survived the space-split as one token (`"Order ID"`,
|
|
178
|
-
// `"user alias".id`) clean to its true value. `CleanFilterTokens` is the no-restore
|
|
179
|
-
// variant (plain `Tokenize`, which never marks sentinels).
|
|
180
|
-
//
|
|
181
|
-
// MapClean maps each token to `CleanIdent<H> extends "" ? "" : TrimPunctuation<Trim<H>>`
|
|
182
|
-
// and FilterEmpty drops the `""`s. Since `CleanIdent = Lowercase<Unquote<TrimPunctuation<
|
|
183
|
-
// Trim<S>>>>`, a non-empty `CleanIdent<H>` guarantees a non-empty `TrimPunctuation<Trim<H>>`,
|
|
184
|
-
// so the kept value is never empty — the empty-token filter collapses to the single
|
|
185
|
-
// `CleanIdent<H> extends ""` test. (The loose variant keeps an explicit empty filter
|
|
186
|
-
// because `CleanLooseToken` can return `""` for a non-operator empty ident.)
|
|
187
|
-
export type CleanFilterTokens<Tokens extends string[], Acc extends string[] = []> =
|
|
188
|
-
Tokens extends [infer H extends string, ...infer R extends string[]]
|
|
189
|
-
? CleanIdent<H> extends ""
|
|
190
|
-
? CleanFilterTokens<R, Acc>
|
|
191
|
-
: CleanFilterTokens<R, [...Acc, TrimPunctuation<Trim<H>>]>
|
|
192
|
-
: Acc;
|
|
193
|
-
|
|
194
|
-
export type RestoreCleanFilterTokens<Tokens extends string[], Acc extends string[] = []> =
|
|
195
|
-
Tokens extends [infer H0 extends string, ...infer R extends string[]]
|
|
196
|
-
? ReplaceAll<H0, DQuoteSpaceSentinel, " "> extends infer H extends string
|
|
197
|
-
? CleanIdent<H> extends ""
|
|
198
|
-
? RestoreCleanFilterTokens<R, Acc>
|
|
199
|
-
: RestoreCleanFilterTokens<R, [...Acc, TrimPunctuation<Trim<H>>]>
|
|
200
|
-
: never
|
|
201
|
-
: Acc;
|
|
202
|
-
|
|
203
|
-
export type RestoreCleanLooseFilterTokens<Tokens extends string[], Acc extends string[] = []> =
|
|
204
|
-
Tokens extends [infer H0 extends string, ...infer R extends string[]]
|
|
205
|
-
? CleanLooseToken<ReplaceAll<H0, DQuoteSpaceSentinel, " ">> extends infer M extends string
|
|
206
|
-
? M extends ""
|
|
207
|
-
? RestoreCleanLooseFilterTokens<R, Acc>
|
|
208
|
-
: RestoreCleanLooseFilterTokens<R, [...Acc, M]>
|
|
209
|
-
: never
|
|
210
|
-
: Acc;
|
|
241
|
+
: S extends `${infer P}"${infer R}`
|
|
242
|
+
? R extends `${infer Span}"${infer R2}`
|
|
243
|
+
? MarkDQuotedSpaces<R2, `${Acc}${P}"${ReplaceAll<Span, " ", DQuoteSpaceSentinel>}"`, [any, ...Steps]>
|
|
244
|
+
: `${Acc}${P}"${ReplaceAll<R, " ", DQuoteSpaceSentinel>}`
|
|
245
|
+
: `${Acc}${S}`;
|
|
211
246
|
|
|
212
247
|
// A validation-only view of a query: blank the CONTENTS of every single-quoted
|
|
213
248
|
// string literal (`'anything'` -> `''`) and mask the interior spaces of every
|
|
@@ -224,7 +259,7 @@ export type RestoreCleanLooseFilterTokens<Tokens extends string[], Acc extends s
|
|
|
224
259
|
// Blanking the literal removes both problems at once (round-12 S1–S5). Masking
|
|
225
260
|
// double-quoted spaces stops the same markers matching inside a quoted output
|
|
226
261
|
// alias (round-12 A1) while leaving the identifier intact for ref validation
|
|
227
|
-
// (
|
|
262
|
+
// (the ref-scan walkers restore the sentinel). The caller gates this behind a quote
|
|
228
263
|
// and within-budget pre-check so report-scale queries never run the walk.
|
|
229
264
|
export type ValidationScanView<S extends string> =
|
|
230
265
|
S extends `${string}'${string}`
|
|
@@ -253,7 +288,7 @@ export type BlankSingleQuotedLiterals<
|
|
|
253
288
|
: `${Acc}${S}`;
|
|
254
289
|
|
|
255
290
|
export type OperatorToken =
|
|
256
|
-
| "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "|" | "&" | "!" | "?"
|
|
291
|
+
| "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "%" | "|" | "&" | "!" | "?"
|
|
257
292
|
// `~` / `!~` are PostgreSQL regex-match operators; `[` / `]` delimit array
|
|
258
293
|
// literals/subscripts. Treating them as operators makes `CanPrecedeColumn`
|
|
259
294
|
// bless the RHS expression so a column ref there is validated (e.g.
|
|
@@ -264,11 +299,54 @@ export type OperatorToken =
|
|
|
264
299
|
export type PadOperator<S extends string, Op extends string> =
|
|
265
300
|
ReplaceAll<S, Op, ` ${Op} `>;
|
|
266
301
|
|
|
302
|
+
// `.` control sentinel (was `.__wildcard__`) keeps the qualified `.*`
|
|
303
|
+
// out of `PadOperators`' `*` padding; `` itself is never padded.
|
|
267
304
|
export type ProtectWildcards<S extends string> =
|
|
268
|
-
ReplaceAll<S, ".*", "
|
|
305
|
+
ReplaceAll<S, ".*", ".">;
|
|
269
306
|
|
|
270
307
|
export type RestoreWildcards<S extends string> =
|
|
271
|
-
ReplaceAll<S, "
|
|
308
|
+
ReplaceAll<S, ".", ".*">;
|
|
309
|
+
|
|
310
|
+
// `%` is the modulo operator, but it is also the single most common character
|
|
311
|
+
// inside LIKE/ILIKE pattern literals (`'%smith%'`). The plain `PadOperator`
|
|
312
|
+
// chain pads EVERYWHERE — acceptable for the operators above because the
|
|
313
|
+
// validation path blanks string literals first on small queries — but
|
|
314
|
+
// `LooseScanView` also runs on NON-neutralized inputs (multi-line /
|
|
315
|
+
// over-budget queries skip `ValidationScanView`), where padding inside a
|
|
316
|
+
// literal would leak its words as blessed column candidates
|
|
317
|
+
// (`'%smith%'` -> `' % smith % '` -> `smith` validated -> false reject).
|
|
318
|
+
// So `%` gets its own quote-aware pad: literal interiors are copied
|
|
319
|
+
// verbatim, `%` is padded only between them. `%`-free strings (the
|
|
320
|
+
// overwhelming majority) short-circuit on a single pattern match.
|
|
321
|
+
export type MaybePadModulo<S extends string> =
|
|
322
|
+
S extends `${string}%${string}`
|
|
323
|
+
? S extends `${string}'${string}`
|
|
324
|
+
? PadModuloQuoteAware<S>
|
|
325
|
+
: PadOperator<S, "%">
|
|
326
|
+
: S;
|
|
327
|
+
|
|
328
|
+
// Pairwise span-jump (same shape as `BlankSingleQuotedLiterals`): hop to the
|
|
329
|
+
// leftmost `'`, pad the run BEFORE it, copy the `'…'` span verbatim, recurse
|
|
330
|
+
// on the tail. The `''` SQL escape pairs leftmost exactly like the blanking
|
|
331
|
+
// walk. An UNTERMINATED opener pads the run before it, then copies the
|
|
332
|
+
// literal tail verbatim (lenient: no padding inside what is textually a
|
|
333
|
+
// string literal). Depth is the NUMBER OF
|
|
334
|
+
// LITERALS, not string length; the step cap is a runaway backstop only — on
|
|
335
|
+
// cap the remainder passes through UNPADDED (pre-round behavior, so a cap
|
|
336
|
+
// hit can never cause a new rejection).
|
|
337
|
+
type PadModuloQuoteAware<
|
|
338
|
+
S extends string,
|
|
339
|
+
Acc extends string = "",
|
|
340
|
+
Steps extends any[] = []
|
|
341
|
+
> = string extends S
|
|
342
|
+
? S
|
|
343
|
+
: Steps["length"] extends 300
|
|
344
|
+
? `${Acc}${S}`
|
|
345
|
+
: S extends `${infer Pre}'${infer Rest}`
|
|
346
|
+
? Rest extends `${infer Lit}'${infer After}`
|
|
347
|
+
? PadModuloQuoteAware<After, `${Acc}${PadOperator<Pre, "%">}'${Lit}'`, [any, ...Steps]>
|
|
348
|
+
: `${Acc}${PadOperator<Pre, "%">}'${Rest}`
|
|
349
|
+
: `${Acc}${PadOperator<S, "%">}`;
|
|
272
350
|
|
|
273
351
|
export type PadOperators<S extends string> =
|
|
274
352
|
PadOperator<
|
package/src/partial.ts
CHANGED
|
@@ -16,11 +16,10 @@ import type {
|
|
|
16
16
|
NormalizeQuery,
|
|
17
17
|
SplitOnDotClean,
|
|
18
18
|
SplitTopLevel,
|
|
19
|
-
TokenizeLoose,
|
|
20
19
|
Trim
|
|
21
20
|
} from "./parsing.js";
|
|
22
21
|
import type {
|
|
23
|
-
|
|
22
|
+
QualifiedRefScan,
|
|
24
23
|
ResolveAlias,
|
|
25
24
|
StripDoubleQuotes,
|
|
26
25
|
TableKeysByName
|
|
@@ -76,13 +75,13 @@ export type ColumnRefValidPartialWith<
|
|
|
76
75
|
: true
|
|
77
76
|
: true;
|
|
78
77
|
|
|
79
|
-
// Validate every qualified column ref in a
|
|
78
|
+
// Validate every qualified column ref in a fragment, partial-mode.
|
|
80
79
|
export type QualifiedColumnRefsValidPartialFor<
|
|
81
80
|
S extends DatabaseSchema,
|
|
82
81
|
Tables extends string,
|
|
83
82
|
Aliases extends string,
|
|
84
|
-
|
|
85
|
-
> =
|
|
83
|
+
RefSeg extends string
|
|
84
|
+
> = QualifiedRefScan<RefSeg> extends infer Cols
|
|
86
85
|
? AllTrue<Cols extends string ? ColumnRefValidPartialWith<Cols, Tables, Aliases, S> : true>
|
|
87
86
|
: true;
|
|
88
87
|
|
|
@@ -97,9 +96,7 @@ export type ValidateTableSourcePart<N extends string, S extends DatabaseSchema>
|
|
|
97
96
|
TablesInQuery<N, S> extends infer Tables extends string
|
|
98
97
|
? AliasesInQuery<N, S> extends infer Aliases extends string
|
|
99
98
|
? AllPartTablesValid<Tables, S> extends true
|
|
100
|
-
?
|
|
101
|
-
? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, Toks>
|
|
102
|
-
: true
|
|
99
|
+
? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, N>
|
|
103
100
|
: false
|
|
104
101
|
: true
|
|
105
102
|
: true;
|
|
@@ -130,9 +127,7 @@ export type ValidateClausePart<Part extends string, S extends DatabaseSchema> =
|
|
|
130
127
|
string extends Part
|
|
131
128
|
? false
|
|
132
129
|
: NormalizeQuery<Part> extends infer N extends string
|
|
133
|
-
?
|
|
134
|
-
? QualifiedColumnRefsValidPartialFor<S, never, never, Toks>
|
|
135
|
-
: true
|
|
130
|
+
? QualifiedColumnRefsValidPartialFor<S, never, never, N>
|
|
136
131
|
: false;
|
|
137
132
|
|
|
138
133
|
// Scope-aware clause validation: identical to ValidateClausePart, but the
|
|
@@ -147,13 +142,12 @@ export type ValidateClausePartScoped<
|
|
|
147
142
|
string extends Part
|
|
148
143
|
? false
|
|
149
144
|
: NormalizeQuery<Part> extends infer N extends string
|
|
150
|
-
?
|
|
151
|
-
? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, Toks>
|
|
152
|
-
: true
|
|
145
|
+
? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, N>
|
|
153
146
|
: false;
|
|
154
147
|
|
|
155
148
|
// Expression-detector for a single SELECT-item token. HasSpecial covers space,
|
|
156
|
-
// parens, arithmetic/comparison operators, comma,
|
|
149
|
+
// parens, arithmetic operators (+ - * / %), comparison operators, comma,
|
|
150
|
+
// `::`, `||`. We additionally
|
|
157
151
|
// reject `[ ] " ' :` so array-indexing, quoted-with-space idents, json arrows,
|
|
158
152
|
// and param/cast colons are treated as expressions (skipped, never falsely
|
|
159
153
|
// rejected). A token clearing this guard is a plain identifier piece.
|