npm - @kuindji/typed-sql - Versions diffs - 0.2.0 → 0.4.0 - Mend

@kuindji/typed-sql 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +11 -3
package/dist/columns.d.ts +11 -3
package/dist/columns.d.ts.map +1 -1
package/dist/expressions.d.ts +84 -13
package/dist/expressions.d.ts.map +1 -1
package/dist/parsing/extract.d.ts +13 -9
package/dist/parsing/extract.d.ts.map +1 -1
package/dist/parsing/normalize.d.ts +9 -3
package/dist/parsing/normalize.d.ts.map +1 -1
package/dist/parsing/pg-literals.d.ts +10 -2
package/dist/parsing/pg-literals.d.ts.map +1 -1
package/dist/parsing/split.d.ts +27 -3
package/dist/parsing/split.d.ts.map +1 -1
package/dist/parsing/string-utils.d.ts +2 -4
package/dist/parsing/string-utils.d.ts.map +1 -1
package/dist/parsing/tokenize.d.ts +27 -17
package/dist/parsing/tokenize.d.ts.map +1 -1
package/dist/partial.d.ts +6 -6
package/dist/partial.d.ts.map +1 -1
package/dist/tables.d.ts +58 -13
package/dist/tables.d.ts.map +1 -1
package/dist/validation/dispatch.d.ts +7 -5
package/dist/validation/dispatch.d.ts.map +1 -1
package/dist/validation/joins.d.ts +3 -3
package/dist/validation/joins.d.ts.map +1 -1
package/dist/validation/return-derived.d.ts +8 -4
package/dist/validation/return-derived.d.ts.map +1 -1
package/dist/validation/return-derived.js.map +1 -1
package/dist/validation/return-types.d.ts +1 -1
package/dist/validation/return-types.d.ts.map +1 -1
package/dist/validation/validate-columns.d.ts +27 -16
package/dist/validation/validate-columns.d.ts.map +1 -1
package/package.json +1 -1
package/src/columns.ts +168 -32
package/src/expressions.ts +589 -63
package/src/parsing/extract.ts +72 -32
package/src/parsing/normalize.ts +114 -10
package/src/parsing/pg-literals.ts +32 -10
package/src/parsing/split.ts +236 -72
package/src/parsing/string-utils.ts +15 -15
package/src/parsing/tokenize.ts +224 -146
package/src/partial.ts +9 -15
package/src/tables.ts +546 -183
package/src/validation/dispatch.ts +58 -52
package/src/validation/joins.ts +15 -19
package/src/validation/return-derived.ts +60 -4
package/src/validation/return-types.ts +9 -3
package/src/validation/validate-columns.ts +161 -67

package/src/parsing/tokenize.ts CHANGED Viewed

@@ -1,103 +1,166 @@
 // Tokenization, sentinels, operators, and SQL keyword sets.
-import type { CleanIdent, CleanLooseToken, CollapseSpaces, ReplaceAll, Split, Trim, TrimPunctuation } from "./string-utils.js";
+import type { CleanIdent, CollapseSpaces, ReplaceAll, Trim, TrimPunctuation } from "./string-utils.js";
 import type { ExceedsLengthBudget, HasLineBreaks } from "./normalize.js";
 // Tokenization & parsing helpers
-export type Tokenize<N extends string> = CleanFilterTokens<Split<N, " ">>;
 // Sentinel token standing in for a TOP-LEVEL comma. It survives `MapClean`
 // (no stripped punctuation, non-empty identifier) whereas a bare `,` does not,
 // so it cleanly distinguishes a FROM-source separator from a comma nested in
 // parens / a string literal — which must still be dropped as before.
-export type CommaSep = "__tsqlcomma__";
+// A control char unrepresentable in real SQL: 1 char instead of the old
+// 13-char `__tsqlcomma__`, so every marked query string and every token list
+// it flows through interns ~14 fewer chars per top-level comma. Neutral to
+// the pipeline: not in `Punct`/`Whitespace`/`DQuotedPunct`/`OperatorToken`,
+// and `Lowercase`/`CleanIdent` leave it intact.
+export type CommaSep = "";
 // Replace only TOP-LEVEL commas (paren depth 0, outside single OR double quotes)
 // with the `CommaSep` sentinel (space-padded so it tokenizes on its own). Commas
 // nested inside parens (`count(a, b)`, FROM subqueries, `insert (x, y)`, value
 // tuples), string literals, or quoted identifiers (`users as "u,1"`) are left
-// verbatim and get stripped by `MapClean` as today. The `InDString` arm tracks
-// double-quoted identifiers so a comma inside a quoted table/column alias is not
-// mistaken for a FROM-source separator. Char-walk mirrors `SplitTopLevel` /
-// `StripComments`; step-bounded.
-export type MarkTopLevelCommas<
+// verbatim and get stripped by `MapClean` as today.
+//
+// Segment-jump, not per-char (the old walk minted one growing-`Acc` string PER
+// CHARACTER on every under-budget query). Each step advances to the LEFTMOST of
+// the five state chars `,` `'` `"` `(` `)`, copying the whole run before it in a
+// single mint; inside a quote it jumps straight to the closing quote, exactly
+// like `LowercaseOutsideQuotesWorker` (`''` escapes exit+re-enter across two
+// jumps; an unterminated quote at EOF copies the rest verbatim). The `Steps` cap
+// counts JUMPS and yields `{ __c: [...] }` to the driver, so arbitrarily
+// boundary-dense inputs still complete without a partial-output bail.
+export type MarkTopLevelCommas<S extends string> =
+    string extends S
+        ? S
+        : MtcDrive<MtcWorker<S, [], false, false, "", []>>;
+type MtcDrive<R> =
+    R extends { __c: [infer S extends string, infer D extends any[], infer Q1 extends boolean, infer Q2 extends boolean, infer Acc extends string] }
+        ? MtcDrive<MtcWorker<S, D, Q1, Q2, Acc, []>>
+        : R;
+type MtcHasStruct<S extends string> =
+    S extends `${string}'${string}` ? true
+    : S extends `${string}"${string}` ? true
+    : S extends `${string}(${string}` ? true
+    : S extends `${string})${string}` ? true
+    : false;
+type MtcWorker<
     S extends string,
-    Depth extends any[] = [],
-    InString extends boolean = false,
-    Acc extends string = "",
-    Steps extends any[] = [],
-    InDString extends boolean = false
-> = string extends S
-    ? S
-    : Steps["length"] extends 1500
-        ? `${Acc}${S}`
-        : S extends `${infer C}${infer Rest}`
-            ? InDString extends true
-                ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], C extends `"` ? false : true>
-                : C extends "'"
-                    ? MarkTopLevelCommas<Rest, Depth, InString extends true ? false : true, `${Acc}${C}`, [any, ...Steps], InDString>
-                    : InString extends true
-                        ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
-                        : C extends `"`
-                            ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], true>
-                            : C extends "("
-                                ? MarkTopLevelCommas<Rest, [any, ...Depth], InString, `${Acc}${C}`, [any, ...Steps], InDString>
-                                : C extends ")"
-                                    ? MarkTopLevelCommas<Rest, Depth extends [any, ...infer D] ? D : [], InString, `${Acc}${C}`, [any, ...Steps], InDString>
-                                    : C extends ","
-                                        ? Depth["length"] extends 0
-                                            ? MarkTopLevelCommas<Rest, Depth, InString, `${Acc} ${CommaSep} `, [any, ...Steps], InDString>
-                                            : MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
-                                        : MarkTopLevelCommas<Rest, Depth, InString, `${Acc}${C}`, [any, ...Steps], InDString>
-            : Acc;
+    Depth extends any[],
+    InString extends boolean,
+    InDString extends boolean,
+    Acc extends string,
+    Steps extends any[]
+> = Steps["length"] extends 450
+    ? { __c: [S, Depth, InString, InDString, Acc] }
+    : InString extends true
+        ? S extends `${infer P}'${infer R}`
+            ? MtcWorker<R, Depth, false, InDString, `${Acc}${P}'`, [any, ...Steps]>
+            : `${Acc}${S}`
+        : InDString extends true
+            ? S extends `${infer P}"${infer R}`
+                ? MtcWorker<R, Depth, InString, false, `${Acc}${P}"`, [any, ...Steps]>
+                : `${Acc}${S}`
+            : S extends `${infer P},${infer R}`
+                // a structural char in the run before the first comma → it is
+                // leftmost; defer to the struct jump
+                ? MtcHasStruct<P> extends true
+                    ? MtcStructJump<S, Depth, Acc, Steps>
+                    : Depth["length"] extends 0
+                        ? MtcWorker<R, Depth, false, false, `${Acc}${P} ${CommaSep} `, [any, ...Steps]>
+                        : MtcWorker<R, Depth, false, false, `${Acc}${P},`, [any, ...Steps]>
+                : MtcHasStruct<S> extends true
+                    ? MtcStructJump<S, Depth, Acc, Steps>
+                    : `${Acc}${S}`;
-// Token stream for the table/alias collectors: identical to `Tokenize` except
-// top-level commas survive as `CommaSep` tokens (so `from a, b` exposes its
-// source boundary). Used ONLY by `TablesInQuery` / `AliasesInQuery`.
+// Leftmost of `'` / `"` / `(` / `)` (the caller guarantees at least one occurs
+// before any comma). Pairwise narrowing: split on a candidate; if an
+// earlier-class char appears in its prefix, that one is leftmost instead.
+type MtcStructJump<
+    S extends string,
+    Depth extends any[],
+    Acc extends string,
+    Steps extends any[]
+> = S extends `${infer P}'${infer R}`
+    ? P extends `${string}"${string}` | `${string}(${string}` | `${string})${string}`
+        ? MtcStructJump2<S, Depth, Acc, Steps>
+        : MtcWorker<R, Depth, true, false, `${Acc}${P}'`, [any, ...Steps]>
+    : MtcStructJump2<S, Depth, Acc, Steps>;
+type MtcStructJump2<
+    S extends string,
+    Depth extends any[],
+    Acc extends string,
+    Steps extends any[]
+> = S extends `${infer P}"${infer R}`
+    ? P extends `${string}(${string}` | `${string})${string}`
+        ? MtcStructJump3<S, Depth, Acc, Steps>
+        : MtcWorker<R, Depth, false, true, `${Acc}${P}"`, [any, ...Steps]>
+    : MtcStructJump3<S, Depth, Acc, Steps>;
+type MtcStructJump3<
+    S extends string,
+    Depth extends any[],
+    Acc extends string,
+    Steps extends any[]
+> = S extends `${infer P}(${infer R}`
+    ? P extends `${string})${string}`
+        ? S extends `${infer P2})${infer R2}`
+            ? MtcWorker<R2, Depth extends [any, ...infer D] ? D : [], false, false, `${Acc}${P2})`, [any, ...Steps]>
+            : `${Acc}${S}`
+        : MtcWorker<R, [any, ...Depth], false, false, `${Acc}${P}(`, [any, ...Steps]>
+    : S extends `${infer P2})${infer R2}`
+        ? MtcWorker<R2, Depth extends [any, ...infer D] ? D : [], false, false, `${Acc}${P2})`, [any, ...Steps]>
+        : `${Acc}${S}`;
+// String view for the table/alias collectors: identical content to plain
+// `Tokenize` input except top-level commas survive as `CommaSep` sentinels (so
+// `from a, b` exposes its source boundary). The collectors walk this string
+// DIRECTLY, word by word (the `Ct`/`Ca`/`Cn`/`Ta` scan walkers in tables.ts) —
+// the former `SplitCollectorTokens` token-ARRAY build (and the
+// collector-relevance filter that existed only to keep that array small) is
+// gone: per the round-8/9 census, every array build/destructure step minted a
+// unique-content tuple plus its apparent-`Array` types, while a word-jump
+// string walk interns its substrings and counter tuples.
 //
 // Report-scale queries (multi-line, or very long) skip the comma-marking
-// char-walk and fall back to plain `Tokenize` — the same big-query light path
+// char-walk and use the raw normalized string — the same big-query light path
 // `ValidateSQLNormalizedLightSelect` already takes. A comma cross-join in such a
 // query is negligibly rare, and avoiding the extra instantiation depth keeps the
 // largest analytics queries under the TS recursion limit.
-export type TokenizeTables<N extends string> =
+export type CollectorScanView<N extends string> =
     HasLineBreaks<N> extends true
-        ? Tokenize<N>
+        ? N
         : ExceedsLengthBudget<N> extends true
-            ? Tokenize<N>
-            : RestoreCleanFilterTokens<Split<MaybeMarkDQuotedSpaces<MarkTopLevelCommas<N>>, " ">>;
-export type TokenizeLoose<N extends string> =
-    RestoreCleanLooseFilterTokens<
-        Split<CollapseSpaces<RestoreWildcards<PadOperators<ProtectWildcards<MaybeMarkDQuotedSpaces<MaybeStripDQuotedPunct<N>>>>>>, " ">
-    > extends infer Toks extends string[]
-        ? N extends `${string}distinct ${string}`
-            ? DropDistinctFrom<Toks>
-            : Toks
-        : [];
-// `IS [NOT] DISTINCT FROM` is a comparison operator: its `from` is operator text,
-// NOT a FROM clause / table-source boundary. The column ref-scanner skips a token
-// whose `Prev` is `from` (treating it as a table source), so the RHS expression of
-// the operator (`price IS DISTINCT FROM bogus_col`) escapes validation entirely
-// (round-13 D1/D2). Drop the operator `from` — the one directly preceded by
-// `distinct` — from the token list so the RHS's `Prev` becomes `distinct`, which
-// `CanPrecedeColumn` already blesses, and the column is validated like any other.
-// `distinct` is immediately followed by the bare token `from` ONLY in this
-// operator, so the rewrite is unambiguous. The real FROM-clause `from` is untouched.
-export type DropDistinctFrom<
-    Tokens extends string[],
-    Acc extends string[] = [],
-    Prev extends string = "",
-    Steps extends any[] = []
-> = Steps["length"] extends 400
-    ? [...Acc, ...Tokens]
-    : Tokens extends [infer H extends string, ...infer R extends string[]]
-        ? H extends "from"
-            ? Prev extends "distinct"
-                ? DropDistinctFrom<R, Acc, "from", [any, ...Steps]>
-                : DropDistinctFrom<R, [...Acc, H], H, [any, ...Steps]>
-            : DropDistinctFrom<R, [...Acc, H], H, [any, ...Steps]>
-        : Acc;
+            ? N
+            : MaybeMarkDQuotedSpaces<MarkTopLevelCommas<N>>;
+// The collector token for one raw word of `CollectorScanView`: sentinel-restored,
+// then exactly the value the old split pushed (`TrimPunctuation<Trim<H>>`); `""`
+// means the word is punctuation/whitespace-only and never occupied a token
+// position (the old `CleanIdent<H> extends ""` empty-token filter — a non-empty
+// `CleanIdent` guarantees a non-empty `TrimPunctuation<Trim<H>>`, so `""` is a
+// safe drop sentinel). On the raw big-query path no sentinel can occur and the
+// restore is a single failed template match.
+export type CollectorToken<H extends string> =
+    ReplaceAll<H, DQuoteSpaceSentinel, " "> extends infer R extends string
+        ? CleanIdent<R> extends ""
+            ? ""
+            : TrimPunctuation<Trim<R>>
+        : never;
+// The padded, space-collapsed string the column ref-scanners walk DIRECTLY —
+// the string→string prefix of the old `TokenizeLoose` pipeline. The split into a
+// token ARRAY (and the separate `DropDistinctFrom` array pass) is gone: per round-8
+// census, every token-array build/destructure step minted a unique-content tuple
+// plus its apparent-`Array` types, while the word-jump string walks that replaced
+// them (`QualifiedRefScan` / `UnqualifiedRefScan` in columns.ts) intern their
+// substrings and counters. Token semantics (per-word `CleanLooseToken` transform,
+// sentinel restore, empty-token drop, `IS [NOT] DISTINCT FROM` handling) are
+// reproduced verbatim inside the scan walkers.
+export type LooseScanView<N extends string> =
+    CollapseSpaces<RestoreWildcards<PadOperators<ProtectWildcards<MaybePadModulo<MaybeMarkDQuotedSpaces<MaybeStripDQuotedPunct<N>>>>>>>;
 // Operator/comma characters that `PadOperators` would split on. Inside a
 // double-quoted identifier (`"u,1"`) these are part of the identifier, not
@@ -105,7 +168,7 @@ export type DropDistinctFrom<
 // ref-scan and falsely rejects an otherwise valid query. We drop them from inside
 // double-quoted spans before padding so the identifier stays a single token.
 export type DQuotedPunct =
-    "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "|" | "&" | "!" | "?";
+    "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "%" | "|" | "&" | "!" | "?";
 // Only pay for the char-walk when there is actually a double quote to handle —
 // the overwhelmingly common no-quote query short-circuits to identity.
@@ -115,24 +178,35 @@ export type MaybeStripDQuotedPunct<S extends string> =
 // Quote-aware walk that removes `DQuotedPunct` characters located INSIDE a
 // double-quoted span while leaving the quote characters and everything outside
 // the quotes untouched. `"u,1"` -> `"u1"`; `"u1".id` (no inner punctuation) is
-// unchanged. Step-bounded against runaway.
+// unchanged.
+//
+// Span-jump, not per-char: nothing outside a double-quoted span changes, so each
+// step jumps to the leftmost `"`, copies the whole preceding run in one mint,
+// finds the closing `"` and rewrites only the (short) span interior. Like the
+// old walk, single quotes are NOT tracked — every `"` toggles. An unterminated
+// `"` at EOF keeps stripping to the end (the old InDQ-at-EOF behavior).
 export type StripDQuotedPunct<
     S extends string,
-    InDQ extends boolean = false,
     Acc extends string = "",
     Steps extends any[] = []
 > = string extends S
     ? S
-    : Steps["length"] extends 1500
+    : Steps["length"] extends 300
+        ? `${Acc}${S}`
+        : S extends `${infer P}"${infer R}`
+            ? R extends `${infer Span}"${infer R2}`
+                ? StripDQuotedPunct<R2, `${Acc}${P}"${StripPunctChars<Span>}"`, [any, ...Steps]>
+                : `${Acc}${P}"${StripPunctChars<R>}`
+            : `${Acc}${S}`;
+// Per-char strip over a (short) double-quoted span interior only.
+type StripPunctChars<S extends string, Acc extends string = "", Steps extends any[] = []> =
+    Steps["length"] extends 200
         ? `${Acc}${S}`
         : S extends `${infer C}${infer Rest}`
-            ? C extends `"`
-                ? StripDQuotedPunct<Rest, InDQ extends true ? false : true, `${Acc}${C}`, [any, ...Steps]>
-                : InDQ extends true
-                    ? C extends DQuotedPunct
-                        ? StripDQuotedPunct<Rest, InDQ, Acc, [any, ...Steps]>
-                        : StripDQuotedPunct<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
-                    : StripDQuotedPunct<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
+            ? C extends DQuotedPunct
+                ? StripPunctChars<Rest, Acc, [any, ...Steps]>
+                : StripPunctChars<Rest, `${Acc}${C}`, [any, ...Steps]>
             : Acc;
 // Sentinel standing in for a SPACE located INSIDE a double-quoted identifier.
@@ -142,72 +216,33 @@ export type StripDQuotedPunct<
 // tokens. Marking the inner spaces keeps the identifier a single token through
 // the space-split; `RestoreDQuotedSpaces` turns each sentinel back into a real
 // space per-token before `CleanIdent`/`MapClean` runs. Mirrors `StripDQuotedPunct`.
-export type DQuoteSpaceSentinel = "__tsqldqsp__";
+// 1-char control sentinel (was the 12-char `__tsqldqsp__`) — same neutrality
+// argument as `CommaSep`.
+export type DQuoteSpaceSentinel = "";
 // Only pay for the char-walk when there is actually a double quote present — the
 // overwhelmingly common no-quote query short-circuits to identity.
 export type MaybeMarkDQuotedSpaces<S extends string> =
     S extends `${string}"${string}` ? MarkDQuotedSpaces<S> : S;
+// Span-jump sibling of `StripDQuotedPunct`: copy the run before the leftmost
+// `"` in one mint, then mark the span interior's spaces via `ReplaceAll`
+// (spans are short identifiers). Single quotes are NOT tracked — every `"`
+// toggles, exactly like the old per-char walk; an unterminated `"` keeps
+// marking to EOF.
 export type MarkDQuotedSpaces<
     S extends string,
-    InDQ extends boolean = false,
     Acc extends string = "",
     Steps extends any[] = []
 > = string extends S
     ? S
-    : Steps["length"] extends 1500
+    : Steps["length"] extends 300
         ? `${Acc}${S}`
-        : S extends `${infer C}${infer Rest}`
-            ? C extends `"`
-                ? MarkDQuotedSpaces<Rest, InDQ extends true ? false : true, `${Acc}${C}`, [any, ...Steps]>
-                : InDQ extends true
-                    ? C extends " "
-                        ? MarkDQuotedSpaces<Rest, InDQ, `${Acc}${DQuoteSpaceSentinel}`, [any, ...Steps]>
-                        : MarkDQuotedSpaces<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
-                    : MarkDQuotedSpaces<Rest, InDQ, `${Acc}${C}`, [any, ...Steps]>
-            : Acc;
-// Fused token post-passes: one walk instead of the old
-// `FilterEmpty<MapClean<RestoreDQuotedSpaces<…>>>` three-walk chain. Each pass was
-// an independent element-wise map/filter, so composing them per token yields the
-// identical list (ordering preserved) while building the result spine once.
-//
-// The DQuote-space sentinel restore (`ReplaceAll<H, DQuoteSpaceSentinel, " ">`) lets
-// a quoted identifier that survived the space-split as one token (`"Order ID"`,
-// `"user alias".id`) clean to its true value. `CleanFilterTokens` is the no-restore
-// variant (plain `Tokenize`, which never marks sentinels).
-//
-// MapClean maps each token to `CleanIdent<H> extends "" ? "" : TrimPunctuation<Trim<H>>`
-// and FilterEmpty drops the `""`s. Since `CleanIdent = Lowercase<Unquote<TrimPunctuation<
-// Trim<S>>>>`, a non-empty `CleanIdent<H>` guarantees a non-empty `TrimPunctuation<Trim<H>>`,
-// so the kept value is never empty — the empty-token filter collapses to the single
-// `CleanIdent<H> extends ""` test. (The loose variant keeps an explicit empty filter
-// because `CleanLooseToken` can return `""` for a non-operator empty ident.)
-export type CleanFilterTokens<Tokens extends string[], Acc extends string[] = []> =
-    Tokens extends [infer H extends string, ...infer R extends string[]]
-        ? CleanIdent<H> extends ""
-            ? CleanFilterTokens<R, Acc>
-            : CleanFilterTokens<R, [...Acc, TrimPunctuation<Trim<H>>]>
-        : Acc;
-export type RestoreCleanFilterTokens<Tokens extends string[], Acc extends string[] = []> =
-    Tokens extends [infer H0 extends string, ...infer R extends string[]]
-        ? ReplaceAll<H0, DQuoteSpaceSentinel, " "> extends infer H extends string
-            ? CleanIdent<H> extends ""
-                ? RestoreCleanFilterTokens<R, Acc>
-                : RestoreCleanFilterTokens<R, [...Acc, TrimPunctuation<Trim<H>>]>
-            : never
-        : Acc;
-export type RestoreCleanLooseFilterTokens<Tokens extends string[], Acc extends string[] = []> =
-    Tokens extends [infer H0 extends string, ...infer R extends string[]]
-        ? CleanLooseToken<ReplaceAll<H0, DQuoteSpaceSentinel, " ">> extends infer M extends string
-            ? M extends ""
-                ? RestoreCleanLooseFilterTokens<R, Acc>
-                : RestoreCleanLooseFilterTokens<R, [...Acc, M]>
-            : never
-        : Acc;
+        : S extends `${infer P}"${infer R}`
+            ? R extends `${infer Span}"${infer R2}`
+                ? MarkDQuotedSpaces<R2, `${Acc}${P}"${ReplaceAll<Span, " ", DQuoteSpaceSentinel>}"`, [any, ...Steps]>
+                : `${Acc}${P}"${ReplaceAll<R, " ", DQuoteSpaceSentinel>}`
+            : `${Acc}${S}`;
 // A validation-only view of a query: blank the CONTENTS of every single-quoted
 // string literal (`'anything'` -> `''`) and mask the interior spaces of every
@@ -224,7 +259,7 @@ export type RestoreCleanLooseFilterTokens<Tokens extends string[], Acc extends s
 // Blanking the literal removes both problems at once (round-12 S1–S5). Masking
 // double-quoted spaces stops the same markers matching inside a quoted output
 // alias (round-12 A1) while leaving the identifier intact for ref validation
-// (`TokenizeLoose` restores the sentinel). The caller gates this behind a quote
+// (the ref-scan walkers restore the sentinel). The caller gates this behind a quote
 // and within-budget pre-check so report-scale queries never run the walk.
 export type ValidationScanView<S extends string> =
     S extends `${string}'${string}`
@@ -253,7 +288,7 @@ export type BlankSingleQuotedLiterals<
             : `${Acc}${S}`;
 export type OperatorToken =
-    | "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "|" | "&" | "!" | "?"
+    | "(" | ")" | "," | "=" | "<" | ">" | "+" | "-" | "*" | "/" | "%" | "|" | "&" | "!" | "?"
     // `~` / `!~` are PostgreSQL regex-match operators; `[` / `]` delimit array
     // literals/subscripts. Treating them as operators makes `CanPrecedeColumn`
     // bless the RHS expression so a column ref there is validated (e.g.
@@ -264,11 +299,54 @@ export type OperatorToken =
 export type PadOperator<S extends string, Op extends string> =
     ReplaceAll<S, Op, ` ${Op} `>;
+// `.` control sentinel (was `.__wildcard__`) keeps the qualified `.*`
+// out of `PadOperators`' `*` padding; `` itself is never padded.
 export type ProtectWildcards<S extends string> =
-    ReplaceAll<S, ".*", ".__wildcard__">;
+    ReplaceAll<S, ".*", ".">;
 export type RestoreWildcards<S extends string> =
-    ReplaceAll<S, ".__wildcard__", ".*">;
+    ReplaceAll<S, ".", ".*">;
+// `%` is the modulo operator, but it is also the single most common character
+// inside LIKE/ILIKE pattern literals (`'%smith%'`). The plain `PadOperator`
+// chain pads EVERYWHERE — acceptable for the operators above because the
+// validation path blanks string literals first on small queries — but
+// `LooseScanView` also runs on NON-neutralized inputs (multi-line /
+// over-budget queries skip `ValidationScanView`), where padding inside a
+// literal would leak its words as blessed column candidates
+// (`'%smith%'` -> `' % smith % '` -> `smith` validated -> false reject).
+// So `%` gets its own quote-aware pad: literal interiors are copied
+// verbatim, `%` is padded only between them. `%`-free strings (the
+// overwhelming majority) short-circuit on a single pattern match.
+export type MaybePadModulo<S extends string> =
+    S extends `${string}%${string}`
+        ? S extends `${string}'${string}`
+            ? PadModuloQuoteAware<S>
+            : PadOperator<S, "%">
+        : S;
+// Pairwise span-jump (same shape as `BlankSingleQuotedLiterals`): hop to the
+// leftmost `'`, pad the run BEFORE it, copy the `'…'` span verbatim, recurse
+// on the tail. The `''` SQL escape pairs leftmost exactly like the blanking
+// walk. An UNTERMINATED opener pads the run before it, then copies the
+// literal tail verbatim (lenient: no padding inside what is textually a
+// string literal). Depth is the NUMBER OF
+// LITERALS, not string length; the step cap is a runaway backstop only — on
+// cap the remainder passes through UNPADDED (pre-round behavior, so a cap
+// hit can never cause a new rejection).
+type PadModuloQuoteAware<
+    S extends string,
+    Acc extends string = "",
+    Steps extends any[] = []
+> = string extends S
+    ? S
+    : Steps["length"] extends 300
+        ? `${Acc}${S}`
+        : S extends `${infer Pre}'${infer Rest}`
+            ? Rest extends `${infer Lit}'${infer After}`
+                ? PadModuloQuoteAware<After, `${Acc}${PadOperator<Pre, "%">}'${Lit}'`, [any, ...Steps]>
+                : `${Acc}${PadOperator<Pre, "%">}'${Rest}`
+            : `${Acc}${PadOperator<S, "%">}`;
 export type PadOperators<S extends string> =
     PadOperator<

package/src/partial.ts CHANGED Viewed

@@ -16,11 +16,10 @@ import type {
     NormalizeQuery,
     SplitOnDotClean,
     SplitTopLevel,
-    TokenizeLoose,
     Trim
 } from "./parsing.js";
 import type {
-    QualifiedColumnRefs,
+    QualifiedRefScan,
     ResolveAlias,
     StripDoubleQuotes,
     TableKeysByName
@@ -76,13 +75,13 @@ export type ColumnRefValidPartialWith<
                     : true
                 : true;
-// Validate every qualified column ref in a token list, partial-mode.
+// Validate every qualified column ref in a fragment, partial-mode.
 export type QualifiedColumnRefsValidPartialFor<
     S extends DatabaseSchema,
     Tables extends string,
     Aliases extends string,
-    LooseTokens extends string[]
-> = QualifiedColumnRefs<LooseTokens, S, Tables, Aliases> extends infer Cols
+    RefSeg extends string
+> = QualifiedRefScan<RefSeg> extends infer Cols
     ? AllTrue<Cols extends string ? ColumnRefValidPartialWith<Cols, Tables, Aliases, S> : true>
     : true;
@@ -97,9 +96,7 @@ export type ValidateTableSourcePart<N extends string, S extends DatabaseSchema>
     TablesInQuery<N, S> extends infer Tables extends string
         ? AliasesInQuery<N, S> extends infer Aliases extends string
             ? AllPartTablesValid<Tables, S> extends true
-                ? TokenizeLoose<N> extends infer Toks extends string[]
-                    ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, Toks>
-                    : true
+                ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, N>
                 : false
             : true
         : true;
@@ -130,9 +127,7 @@ export type ValidateClausePart<Part extends string, S extends DatabaseSchema> =
     string extends Part
         ? false
         : NormalizeQuery<Part> extends infer N extends string
-            ? TokenizeLoose<N> extends infer Toks extends string[]
-                ? QualifiedColumnRefsValidPartialFor<S, never, never, Toks>
-                : true
+            ? QualifiedColumnRefsValidPartialFor<S, never, never, N>
             : false;
 // Scope-aware clause validation: identical to ValidateClausePart, but the
@@ -147,13 +142,12 @@ export type ValidateClausePartScoped<
     string extends Part
         ? false
         : NormalizeQuery<Part> extends infer N extends string
-            ? TokenizeLoose<N> extends infer Toks extends string[]
-                ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, Toks>
-                : true
+            ? QualifiedColumnRefsValidPartialFor<S, Tables, Aliases, N>
             : false;
 // Expression-detector for a single SELECT-item token. HasSpecial covers space,
-// parens, arithmetic/comparison operators, comma, `::`, `||`. We additionally
+// parens, arithmetic operators (+ - * / %), comparison operators, comma,
+// `::`, `||`. We additionally
 // reject `[ ] " ' :` so array-indexing, quoted-with-space idents, json arrows,
 // and param/cast colons are treated as expressions (skipped, never falsely
 // rejected). A token clearing this guard is a plain identifier piece.