neonctl 2.22.2 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -0
- package/analytics.js +5 -2
- package/commands/branches.js +9 -1
- package/commands/connection_string.js +9 -1
- package/commands/functions.js +277 -0
- package/commands/index.js +4 -0
- package/commands/neon_auth.js +1013 -0
- package/commands/projects.js +9 -1
- package/commands/psql.js +6 -1
- package/functions_api.js +44 -0
- package/package.json +15 -5
- package/psql/cli.js +51 -0
- package/psql/command/cmd_cond.js +437 -0
- package/psql/command/cmd_connect.js +815 -0
- package/psql/command/cmd_copy.js +1025 -0
- package/psql/command/cmd_describe.js +1810 -0
- package/psql/command/cmd_format.js +909 -0
- package/psql/command/cmd_io.js +2187 -0
- package/psql/command/cmd_lo.js +385 -0
- package/psql/command/cmd_meta.js +970 -0
- package/psql/command/cmd_misc.js +187 -0
- package/psql/command/cmd_pipeline.js +1141 -0
- package/psql/command/cmd_restrict.js +171 -0
- package/psql/command/cmd_show.js +751 -0
- package/psql/command/dispatch.js +343 -0
- package/psql/command/inputQueue.js +42 -0
- package/psql/command/shared.js +71 -0
- package/psql/complete/filenames.js +139 -0
- package/psql/complete/index.js +104 -0
- package/psql/complete/matcher.js +314 -0
- package/psql/complete/psqlVars.js +247 -0
- package/psql/complete/queries.js +491 -0
- package/psql/complete/rules.js +2387 -0
- package/psql/core/common.js +1250 -0
- package/psql/core/help.js +576 -0
- package/psql/core/mainloop.js +1353 -0
- package/psql/core/prompt.js +437 -0
- package/psql/core/settings.js +684 -0
- package/psql/core/sqlHelp.js +1066 -0
- package/psql/core/startup.js +840 -0
- package/psql/core/syncVars.js +116 -0
- package/psql/core/variables.js +287 -0
- package/psql/describe/formatters.js +1277 -0
- package/psql/describe/processNamePattern.js +270 -0
- package/psql/describe/queries.js +2373 -0
- package/psql/describe/versionGate.js +43 -0
- package/psql/index.js +2005 -0
- package/psql/io/history.js +299 -0
- package/psql/io/input.js +120 -0
- package/psql/io/lineEditor/buffer.js +323 -0
- package/psql/io/lineEditor/complete.js +227 -0
- package/psql/io/lineEditor/filename.js +159 -0
- package/psql/io/lineEditor/index.js +891 -0
- package/psql/io/lineEditor/keymap.js +738 -0
- package/psql/io/lineEditor/vt100.js +363 -0
- package/psql/io/pgpass.js +202 -0
- package/psql/io/pgservice.js +194 -0
- package/psql/io/psqlrc.js +422 -0
- package/psql/print/aligned.js +1756 -0
- package/psql/print/asciidoc.js +248 -0
- package/psql/print/crosstab.js +460 -0
- package/psql/print/csv.js +92 -0
- package/psql/print/html.js +258 -0
- package/psql/print/json.js +96 -0
- package/psql/print/latex.js +396 -0
- package/psql/print/pager.js +265 -0
- package/psql/print/troff.js +258 -0
- package/psql/print/unaligned.js +118 -0
- package/psql/print/units.js +135 -0
- package/psql/scanner/slash.js +513 -0
- package/psql/scanner/sql.js +910 -0
- package/psql/scanner/stringutils.js +390 -0
- package/psql/types/backslash.js +1 -0
- package/psql/types/connection.js +1 -0
- package/psql/types/index.js +7 -0
- package/psql/types/printer.js +1 -0
- package/psql/types/repl.js +1 -0
- package/psql/types/scanner.js +24 -0
- package/psql/types/settings.js +1 -0
- package/psql/types/variables.js +1 -0
- package/psql/wire/connection.js +2844 -0
- package/psql/wire/copy.js +108 -0
- package/psql/wire/notify.js +59 -0
- package/psql/wire/pipeline.js +519 -0
- package/psql/wire/protocol.js +466 -0
- package/psql/wire/sasl.js +296 -0
- package/psql/wire/tls.js +596 -0
- package/test_utils/fixtures.js +1 -0
- package/utils/esbuild.js +147 -0
- package/utils/psql.js +107 -11
- package/utils/zip.js +4 -0
- package/writer.js +1 -1
- package/commands/auth.test.js +0 -211
- package/commands/branches.test.js +0 -460
- package/commands/checkout.test.js +0 -170
- package/commands/connection_string.test.js +0 -196
- package/commands/data_api.test.js +0 -169
- package/commands/databases.test.js +0 -39
- package/commands/help.test.js +0 -9
- package/commands/init.test.js +0 -56
- package/commands/ip_allow.test.js +0 -59
- package/commands/link.test.js +0 -381
- package/commands/operations.test.js +0 -7
- package/commands/orgs.test.js +0 -7
- package/commands/projects.test.js +0 -144
- package/commands/psql.test.js +0 -49
- package/commands/roles.test.js +0 -37
- package/commands/set_context.test.js +0 -159
- package/commands/vpc_endpoints.test.js +0 -69
- package/context.test.js +0 -119
- package/env.test.js +0 -55
- package/utils/formats.test.js +0 -32
- package/writer.test.js +0 -104
|
@@ -0,0 +1,910 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* psql SQL scanner — statement boundary detection.
|
|
3
|
+
*
|
|
4
|
+
* Hand-port of PostgreSQL's `src/fe_utils/psqlscan.l`. The upstream is a
|
|
5
|
+
* flex-generated state machine concerned with one thing: finding the end of a
|
|
6
|
+
* SQL statement — a semicolon that is *not* inside a quote, comment, paren or
|
|
7
|
+
* dollar-quoted block. Backslash commands at the top level are also detected.
|
|
8
|
+
*
|
|
9
|
+
* We deliberately do **not** mechanically translate the flex rules; that would
|
|
10
|
+
* produce dense, untestable code. Instead we collapse the upstream exclusive
|
|
11
|
+
* states into a single integer-tagged state machine inside {@link scanSql} and
|
|
12
|
+
* cover behaviour with a >40-case differential corpus in `sql.test.ts`.
|
|
13
|
+
*
|
|
14
|
+
* Upstream exclusive states and our mapping:
|
|
15
|
+
*
|
|
16
|
+
* - `<xc>` extended C-style comment (with nested-depth tracking) → `Mode.BlockComment`
|
|
17
|
+
* - `<xq>` standard single-quoted string → `Mode.SingleQuote`
|
|
18
|
+
* - `<xe>` extended single-quoted string (`E'…'`) → `Mode.SingleQuote` + `escape=true`
|
|
19
|
+
* - `<xqs>` quote-stop (lookahead for continuation across newlines) → folded into the
|
|
20
|
+
* `SingleQuote` exit logic via {@link tryQuoteContinue}: after the closing
|
|
21
|
+
* `'` of a single-quoted string we look ahead through whitespace; if we
|
|
22
|
+
* find a newline followed by another `'`, we re-enter the SingleQuote
|
|
23
|
+
* state so the two pieces concatenate per SQL standard.
|
|
24
|
+
* - `<xd>` double-quoted identifier → `Mode.DoubleQuote`
|
|
25
|
+
* - `<xdolq>` `$tag$…$tag$` dollar-quoted string → `Mode.DollarQuote`
|
|
26
|
+
* - `<xb>`, `<xh>`, `<xui>`, `<xus>` (bit / hex / unicode-quoted identifiers and strings)
|
|
27
|
+
* are folded into the standard single-/double-quoted paths because for
|
|
28
|
+
* statement-boundary purposes only the surrounding quote characters matter —
|
|
29
|
+
* no escapes inside them affect whether the closing quote is found.
|
|
30
|
+
*
|
|
31
|
+
* What's deliberately out of scope:
|
|
32
|
+
*
|
|
33
|
+
* - `COPY … FROM STDIN` data-line handling. Upstream's `<xcopy>` state is
|
|
34
|
+
* **mainloop-owned, not scanner-owned**: once libpq returns `PGRES_COPY_IN`
|
|
35
|
+
* the mainloop bypasses the SQL scanner entirely and forwards raw lines to
|
|
36
|
+
* `PQputCopyData` until it sees `\.` on a line by itself. Our mainloop has
|
|
37
|
+
* that wiring stubbed as WP-16 (see `src/psql/core/mainloop.ts`, comment
|
|
38
|
+
* near the top). The scanner state machine therefore has *nothing to do*
|
|
39
|
+
* here — when the mainloop is in copy mode it never calls `scanSql` until
|
|
40
|
+
* after `\.`. The contract is: `ScanState.promptStatus = 'copy'` is set by
|
|
41
|
+
* the mainloop (not by the scanner) while copy mode is active; the scanner
|
|
42
|
+
* only consumes it for PROMPT3 selection. See {@link computePromptStatus}.
|
|
43
|
+
* No scanner API change is required for COPY support to land — only the
|
|
44
|
+
* mainloop bypass logic.
|
|
45
|
+
* - Variable substitution `:var`, `:'var'`, `:"var"`. Upstream expands these
|
|
46
|
+
* inline via callbacks; we do the same when {@link scanSql} is given a
|
|
47
|
+
* `varLookup`. Substitution fires at top-level only — never inside SQL
|
|
48
|
+
* string literals, double-quoted identifiers, dollar-quoted blocks, or
|
|
49
|
+
* comments (matches upstream's `<INITIAL>` flex rule scope). The token
|
|
50
|
+
* `::` (PostgreSQL cast operator) is preserved verbatim. When no
|
|
51
|
+
* `varLookup` is supplied (legacy call site or `\set NEW :OLD` chains
|
|
52
|
+
* that should keep the literal), no substitution happens.
|
|
53
|
+
* - Tab-completion helpers (`psqlscan_test_*`). Not needed for the REPL.
|
|
54
|
+
* - U&'…' and U&"…" Unicode-escape forms — folded into the standard quoted paths;
|
|
55
|
+
* the `u&` prefix is treated as two identifier characters and the following quote
|
|
56
|
+
* starts the regular quoted run. Boundary detection is unaffected.
|
|
57
|
+
*
|
|
58
|
+
* CREATE FUNCTION / PROCEDURE bodies (fully handled — noted here because the
|
|
59
|
+
* surrounding flex machinery can make it look like an open question):
|
|
60
|
+
*
|
|
61
|
+
* - All three body shapes split correctly, so an embedded `;` never terminates
|
|
62
|
+
* the surrounding CREATE:
|
|
63
|
+
* • dollar-quoted bodies (`AS $$…$$`) — the modern idiom — via `<xdolq>`;
|
|
64
|
+
* • plain single-quoted bodies (`AS '…'`) — the pre-SQL-standard legacy
|
|
65
|
+
* idiom — via `<xq>` (the quote-state machine swallows the body, `;`s
|
|
66
|
+
* and all, with no special-casing required);
|
|
67
|
+
* • unquoted SQL-standard bodies (`BEGIN ATOMIC … END`, PG14+) — via the
|
|
68
|
+
* `begin_depth` tracking implemented in {@link maybeTrackBeginEnd}, which
|
|
69
|
+
* mirrors upstream's `begin_depth` counter.
|
|
70
|
+
* The first two are covered by the quote machine and the third by
|
|
71
|
+
* `begin_depth`; together they span every body form a real query writes —
|
|
72
|
+
* there is no remaining "legacy shape" gap here. Pinned by the
|
|
73
|
+
* "Plain-string … CREATE FUNCTION bodies" and "BEGIN ATOMIC" corpus cases
|
|
74
|
+
* in `sql.test.ts`.
|
|
75
|
+
*
|
|
76
|
+
* Incremental API:
|
|
77
|
+
*
|
|
78
|
+
* Callers thread {@link ScanState} between calls. On each call we return the first
|
|
79
|
+
* boundary in `input`. For statement terminators we hand back the SQL up to and
|
|
80
|
+
* including the `;`. For backslash commands at the top of the buffer we return
|
|
81
|
+
* the command name and the rest of the line (without consuming further input).
|
|
82
|
+
* When the chunk ends mid-statement we return `'incomplete'` (still inside a
|
|
83
|
+
* quote/comment/paren/dollar) or `'eof'` (clean break at end of buffer) with the
|
|
84
|
+
* residue and current state, and the caller is expected to read more input and
|
|
85
|
+
* call again.
|
|
86
|
+
*/
|
|
87
|
+
import { initialScanState } from '../types/scanner.js';
|
|
88
|
+
import { tryConsumeVarSubstitution } from './stringutils.js';
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Character predicates.
|
|
91
|
+
//
|
|
92
|
+
// Upstream uses POSIX char classes inside flex. We mirror them as regex tests on
|
|
93
|
+
// the JS string. Bytes >= 0x80 are accepted in identifier positions to match
|
|
94
|
+
// upstream's `\200-\377` class — useful for dollar-quote tags that contain
|
|
95
|
+
// non-ASCII identifier characters in 8-bit encodings.
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
const IDENT_START_RE = /[A-Za-z_\u0080-\uffff]/;
|
|
98
|
+
const IDENT_CONT_RE = /[A-Za-z0-9_\u0080-\uffff]/;
|
|
99
|
+
const isIdentStart = (c) => c !== undefined && IDENT_START_RE.test(c);
|
|
100
|
+
const isIdentCont = (c) => c !== undefined && IDENT_CONT_RE.test(c);
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// State cloning.
|
|
103
|
+
//
|
|
104
|
+
// `initialScanState()` returns a fresh ScanState. We treat ScanState as
|
|
105
|
+
// immutable from the caller's perspective — every result returns a fresh
|
|
106
|
+
// nextState — but mutate a local working copy inside scanSql() for speed.
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
const cloneState = (s) => ({
|
|
109
|
+
promptStatus: s.promptStatus,
|
|
110
|
+
parenDepth: s.parenDepth,
|
|
111
|
+
dollarTag: s.dollarTag,
|
|
112
|
+
inLineComment: s.inLineComment,
|
|
113
|
+
inBlockComment: s.inBlockComment,
|
|
114
|
+
inSingleQuote: s.inSingleQuote,
|
|
115
|
+
inDoubleQuote: s.inDoubleQuote,
|
|
116
|
+
inEscapeString: s.inEscapeString,
|
|
117
|
+
beginDepth: s.beginDepth,
|
|
118
|
+
identifierLetters: [
|
|
119
|
+
s.identifierLetters[0],
|
|
120
|
+
s.identifierLetters[1],
|
|
121
|
+
s.identifierLetters[2],
|
|
122
|
+
s.identifierLetters[3],
|
|
123
|
+
],
|
|
124
|
+
identifierCount: s.identifierCount,
|
|
125
|
+
});
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// `BEGIN ... END` block tracking for SQL function bodies.
|
|
128
|
+
//
|
|
129
|
+
// Upstream `psqlscan.l` keeps a per-statement counter (`begin_depth`) plus a
|
|
130
|
+
// short prefix of the first letters of each leading identifier
|
|
131
|
+
// (`identifiers[0..N]`, `identifier_count`). The counter is only adjusted
|
|
132
|
+
// when the prefix looks like `CREATE [OR REPLACE] {FUNCTION|PROCEDURE}` —
|
|
133
|
+
// i.e. exactly one of `c f`, `c p`, `c o r f`, `c o r p`. Inside that
|
|
134
|
+
// window, `BEGIN` increments depth, `CASE` increments depth ONLY when
|
|
135
|
+
// already inside a BEGIN (so that bare `SELECT CASE ... END` outside a
|
|
136
|
+
// function body doesn't unbalance the counter), and `END` decrements depth.
|
|
137
|
+
// While `begin_depth > 0`, a top-level `;` is not a statement terminator —
|
|
138
|
+
// see the gated `LEXRES_SEMI` return in `scanSql`.
|
|
139
|
+
//
|
|
140
|
+
// We mirror this behaviour with {@link maybeTrackBeginEnd}, which is fed
|
|
141
|
+
// every lexed identifier at top level (outside quotes, comments, paren
|
|
142
|
+
// blocks, dollar-quoted bodies). Identifier letters are stored lower-cased.
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
const KEYWORD_PREFIX_LETTERS = new Set([
|
|
145
|
+
'c',
|
|
146
|
+
'f',
|
|
147
|
+
'p',
|
|
148
|
+
'o',
|
|
149
|
+
'r', // replace
|
|
150
|
+
]);
|
|
151
|
+
const PREFIX_MATCHES_CREATE_FN_OR_PROC = (letters) => {
|
|
152
|
+
if (letters[0] !== 'c')
|
|
153
|
+
return false;
|
|
154
|
+
// CREATE FUNCTION
|
|
155
|
+
if (letters[1] === 'f')
|
|
156
|
+
return true;
|
|
157
|
+
// CREATE PROCEDURE
|
|
158
|
+
if (letters[1] === 'p')
|
|
159
|
+
return true;
|
|
160
|
+
// CREATE OR REPLACE FUNCTION / PROCEDURE
|
|
161
|
+
if (letters[1] === 'o' &&
|
|
162
|
+
letters[2] === 'r' &&
|
|
163
|
+
(letters[3] === 'f' || letters[3] === 'p')) {
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
return false;
|
|
167
|
+
};
|
|
168
|
+
const maybeTrackBeginEnd = (st, word) => {
|
|
169
|
+
if (word.length === 0)
|
|
170
|
+
return;
|
|
171
|
+
// Only track identifiers at paren depth 0 — upstream's flex rule guards
|
|
172
|
+
// the whole identifier block on `cur_state->paren_depth == 0`.
|
|
173
|
+
if (st.parenDepth !== 0)
|
|
174
|
+
return;
|
|
175
|
+
const lower = word.toLowerCase();
|
|
176
|
+
// Record the leading letter of select keywords into the first few slots
|
|
177
|
+
// so we can decide whether this statement is a CREATE FUNCTION /
|
|
178
|
+
// PROCEDURE shape. Subsequent identifiers still bump `identifierCount`
|
|
179
|
+
// — that lets the prefix slots stay aligned with the first 4 idents only.
|
|
180
|
+
if (st.identifierCount === 0) {
|
|
181
|
+
st.identifierLetters = ['', '', '', ''];
|
|
182
|
+
}
|
|
183
|
+
if (st.identifierCount < st.identifierLetters.length &&
|
|
184
|
+
KEYWORD_PREFIX_LETTERS.has(lower[0])) {
|
|
185
|
+
st.identifierLetters[st.identifierCount] = lower[0];
|
|
186
|
+
}
|
|
187
|
+
st.identifierCount++;
|
|
188
|
+
if (!PREFIX_MATCHES_CREATE_FN_OR_PROC(st.identifierLetters))
|
|
189
|
+
return;
|
|
190
|
+
if (lower === 'begin') {
|
|
191
|
+
st.beginDepth++;
|
|
192
|
+
}
|
|
193
|
+
else if (lower === 'case') {
|
|
194
|
+
// Upstream comment: "CASE also ends with END. We only need to track
|
|
195
|
+
// this if we are already inside a BEGIN." Guard so `SELECT CASE WHEN
|
|
196
|
+
// ... END` outside a function body doesn't double-bump the counter.
|
|
197
|
+
if (st.beginDepth >= 1)
|
|
198
|
+
st.beginDepth++;
|
|
199
|
+
}
|
|
200
|
+
else if (lower === 'end') {
|
|
201
|
+
if (st.beginDepth > 0)
|
|
202
|
+
st.beginDepth--;
|
|
203
|
+
}
|
|
204
|
+
};
|
|
205
|
+
const matchDollarDelim = (s, i) => {
|
|
206
|
+
if (s[i] !== '$')
|
|
207
|
+
return null;
|
|
208
|
+
// Empty tag: `$$`
|
|
209
|
+
if (s[i + 1] === '$')
|
|
210
|
+
return { tag: '', end: i + 2 };
|
|
211
|
+
if (!isIdentStart(s[i + 1]))
|
|
212
|
+
return null;
|
|
213
|
+
let j = i + 2;
|
|
214
|
+
while (j < s.length && isIdentCont(s[j]))
|
|
215
|
+
j++;
|
|
216
|
+
if (s[j] !== '$')
|
|
217
|
+
return null;
|
|
218
|
+
return { tag: s.slice(i + 1, j), end: j + 1 };
|
|
219
|
+
};
|
|
220
|
+
// ---------------------------------------------------------------------------
|
|
221
|
+
// Determine whether a `'` at position `i` starts an extended string (E'…').
|
|
222
|
+
//
|
|
223
|
+
// Upstream `{xestart}` is `[eE]{quote}`. We require the `E` to be at a token
|
|
224
|
+
// boundary so that an identifier ending in `E` (like `THREE'foo'`) doesn't
|
|
225
|
+
// mistakenly enable backslash escapes. Upstream's flex resolves this by
|
|
226
|
+
// preferring the longer identifier match; we approximate with a "previous char
|
|
227
|
+
// is not an identifier continuation" check.
|
|
228
|
+
// ---------------------------------------------------------------------------
|
|
229
|
+
const isExtendedStringStart = (input, quotePos) => {
|
|
230
|
+
if (quotePos === 0)
|
|
231
|
+
return false;
|
|
232
|
+
const prev = input[quotePos - 1];
|
|
233
|
+
if (prev !== 'E' && prev !== 'e')
|
|
234
|
+
return false;
|
|
235
|
+
// Must be a standalone E — not part of a longer identifier.
|
|
236
|
+
if (quotePos >= 2 && isIdentCont(input[quotePos - 2]))
|
|
237
|
+
return false;
|
|
238
|
+
return true;
|
|
239
|
+
};
|
|
240
|
+
// ---------------------------------------------------------------------------
|
|
241
|
+
// Skip helpers used while inside an extended single-quoted string. We need to
|
|
242
|
+
// recognise `\\`, `\'`, etc. so that an escaped quote does **not** close the
|
|
243
|
+
// string. Returns the count of characters consumed by the escape sequence (>=2
|
|
244
|
+
// when an escape was recognised, or 1 to advance past a non-escape backslash —
|
|
245
|
+
// matching upstream's `<xe>.` fallback).
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
const consumeXeEscape = (input, i) => {
|
|
248
|
+
// Assumes input[i] === '\\'.
|
|
249
|
+
const n = input[i + 1];
|
|
250
|
+
if (n === undefined)
|
|
251
|
+
return 1; // trailing backslash at EOF — caller stays open
|
|
252
|
+
if (n >= '0' && n <= '7') {
|
|
253
|
+
// Octal \ooo (1..3 digits)
|
|
254
|
+
let k = i + 2;
|
|
255
|
+
let count = 1;
|
|
256
|
+
while (k < input.length &&
|
|
257
|
+
count < 3 &&
|
|
258
|
+
input[k] >= '0' &&
|
|
259
|
+
input[k] <= '7') {
|
|
260
|
+
k++;
|
|
261
|
+
count++;
|
|
262
|
+
}
|
|
263
|
+
return k - i;
|
|
264
|
+
}
|
|
265
|
+
if (n === 'x') {
|
|
266
|
+
let k = i + 2;
|
|
267
|
+
let count = 0;
|
|
268
|
+
while (k < input.length && count < 2 && /[0-9a-fA-F]/.test(input[k])) {
|
|
269
|
+
k++;
|
|
270
|
+
count++;
|
|
271
|
+
}
|
|
272
|
+
return k - i; // even \x with no hex digits consumes 2 chars (matches xeunicodefail vibe)
|
|
273
|
+
}
|
|
274
|
+
if (n === 'u') {
|
|
275
|
+
let k = i + 2;
|
|
276
|
+
let count = 0;
|
|
277
|
+
while (k < input.length && count < 4 && /[0-9a-fA-F]/.test(input[k])) {
|
|
278
|
+
k++;
|
|
279
|
+
count++;
|
|
280
|
+
}
|
|
281
|
+
return k - i;
|
|
282
|
+
}
|
|
283
|
+
if (n === 'U') {
|
|
284
|
+
let k = i + 2;
|
|
285
|
+
let count = 0;
|
|
286
|
+
while (k < input.length && count < 8 && /[0-9a-fA-F]/.test(input[k])) {
|
|
287
|
+
k++;
|
|
288
|
+
count++;
|
|
289
|
+
}
|
|
290
|
+
return k - i;
|
|
291
|
+
}
|
|
292
|
+
// Any other char (including `'`, `\`, `n`, `t`, etc.) — consume both chars.
|
|
293
|
+
return 2;
|
|
294
|
+
};
|
|
295
|
+
// ---------------------------------------------------------------------------
|
|
296
|
+
// `<xqs>` quote-continuation lookahead. SQL standard: two single-quoted
|
|
297
|
+
// strings separated only by whitespace that **contains at least one newline**
|
|
298
|
+
// concatenate into a single logical literal (`'abc'\n'def'` == `'abcdef'`).
|
|
299
|
+
// Whitespace without a newline is **not** a continuation — the strings stay
|
|
300
|
+
// separate at the lexer level (and would be a syntax error in most contexts,
|
|
301
|
+
// which is the parser's problem, not ours).
|
|
302
|
+
//
|
|
303
|
+
// Returns the index of the new opening `'` if a continuation is found, or
|
|
304
|
+
// `null` otherwise. `i` is the position just past the closing `'` of the
|
|
305
|
+
// previous string. We do not consume `--` line comments or `/* */` block
|
|
306
|
+
// comments inside the gap; upstream's flex rules treat the gap as plain
|
|
307
|
+
// whitespace per the lexical spec. We also avoid descending into block
|
|
308
|
+
// comments because that would require recursive comment-depth tracking on
|
|
309
|
+
// the lookahead path.
|
|
310
|
+
// ---------------------------------------------------------------------------
|
|
311
|
+
const tryQuoteContinue = (input, i) => {
|
|
312
|
+
let k = i;
|
|
313
|
+
let sawNewline = false;
|
|
314
|
+
while (k < input.length) {
|
|
315
|
+
const c = input[k];
|
|
316
|
+
if (c === '\n' || c === '\r') {
|
|
317
|
+
sawNewline = true;
|
|
318
|
+
k++;
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
if (c === ' ' || c === '\t' || c === '\f' || c === '\v') {
|
|
322
|
+
k++;
|
|
323
|
+
continue;
|
|
324
|
+
}
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
if (!sawNewline)
|
|
328
|
+
return null;
|
|
329
|
+
if (input[k] !== "'")
|
|
330
|
+
return null;
|
|
331
|
+
return k;
|
|
332
|
+
};
|
|
333
|
+
// ---------------------------------------------------------------------------
|
|
334
|
+
// Recognise a `--` line comment at position `i`. Returns the index just past
|
|
335
|
+
// the terminating newline (or end of input). Boundary semantics: the entire
|
|
336
|
+
// span is part of the surrounding SQL.
|
|
337
|
+
// ---------------------------------------------------------------------------
|
|
338
|
+
const skipLineComment = (input, i) => {
|
|
339
|
+
// Assumes input[i] === '-' and input[i+1] === '-'.
|
|
340
|
+
let k = i + 2;
|
|
341
|
+
while (k < input.length && input[k] !== '\n' && input[k] !== '\r')
|
|
342
|
+
k++;
|
|
343
|
+
return k;
|
|
344
|
+
};
|
|
345
|
+
// ---------------------------------------------------------------------------
|
|
346
|
+
// Compute the PROMPT2 status for an incomplete chunk. Matches upstream's
|
|
347
|
+
// `promptStatus_t`: the *reason* we need more input drives the `%R` rendering
|
|
348
|
+
// under PROMPT2 (single-quote → `'`, double-quote → `"`, dollar-quote → `$`,
|
|
349
|
+
// block comment → `*`, paren → `(`, otherwise → `-`).
|
|
350
|
+
//
|
|
351
|
+
// Precedence mirrors upstream `psql_scan_get_prompt`: block comment first
|
|
352
|
+
// (because `/*` can wrap anything else), then quoted-state checks, then
|
|
353
|
+
// paren depth. Plain "buffer's not empty but no special state" falls through
|
|
354
|
+
// to `'continue'`.
|
|
355
|
+
// ---------------------------------------------------------------------------
|
|
356
|
+
const computePromptStatus = (state) => {
|
|
357
|
+
if (state.inBlockComment > 0)
|
|
358
|
+
return 'comment';
|
|
359
|
+
if (state.inSingleQuote)
|
|
360
|
+
return 'continue-quote';
|
|
361
|
+
if (state.inDoubleQuote)
|
|
362
|
+
return 'continue-dquote';
|
|
363
|
+
if (state.dollarTag !== null)
|
|
364
|
+
return 'continue-dollar';
|
|
365
|
+
if (state.parenDepth > 0)
|
|
366
|
+
return 'paren';
|
|
367
|
+
return 'continue';
|
|
368
|
+
};
|
|
369
|
+
export const scanSql = (input, state, varLookup, slashCmdMode, options) => {
|
|
370
|
+
const singleline = options?.singleline ?? false;
|
|
371
|
+
// Local working copy; we mutate freely and clone at exit.
|
|
372
|
+
const st = cloneState(state ?? initialScanState());
|
|
373
|
+
// SQL accumulator. We append characters as we scan; this matches upstream's
|
|
374
|
+
// `output_buf` which receives all ECHOed text.
|
|
375
|
+
let sql = '';
|
|
376
|
+
let i = 0;
|
|
377
|
+
// Convenience: emit characters from `from` (inclusive) up to `to` (exclusive)
|
|
378
|
+
// into the SQL accumulator and advance the cursor.
|
|
379
|
+
const emit = (from, to) => {
|
|
380
|
+
sql += input.slice(from, to);
|
|
381
|
+
i = to;
|
|
382
|
+
};
|
|
383
|
+
while (i < input.length) {
|
|
384
|
+
const c = input[i];
|
|
385
|
+
// --- Inside a block comment: look for nested opens and closes. ---
|
|
386
|
+
if (st.inBlockComment > 0) {
|
|
387
|
+
if (c === '/' && input[i + 1] === '*') {
|
|
388
|
+
st.inBlockComment++;
|
|
389
|
+
sql += '/*';
|
|
390
|
+
i += 2;
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
if (c === '*' && input[i + 1] === '/') {
|
|
394
|
+
st.inBlockComment--;
|
|
395
|
+
sql += '*/';
|
|
396
|
+
i += 2;
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
sql += c;
|
|
400
|
+
i++;
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
// --- Inside a single-quoted string (standard or extended). ---
|
|
404
|
+
if (st.inSingleQuote) {
|
|
405
|
+
if (st.inEscapeString && c === '\\') {
|
|
406
|
+
const n = consumeXeEscape(input, i);
|
|
407
|
+
emit(i, i + n);
|
|
408
|
+
continue;
|
|
409
|
+
}
|
|
410
|
+
if (c === "'") {
|
|
411
|
+
// Doubled quote is a literal.
|
|
412
|
+
if (input[i + 1] === "'") {
|
|
413
|
+
sql += "''";
|
|
414
|
+
i += 2;
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
sql += "'";
|
|
418
|
+
i++;
|
|
419
|
+
// <xqs> quote-continuation: SQL standard merges two single-quoted
|
|
420
|
+
// strings separated by whitespace containing at least one newline.
|
|
421
|
+
// Look ahead; if we find one, re-enter the single-quote state at the
|
|
422
|
+
// new opening `'` and keep going as if nothing happened. The gap
|
|
423
|
+
// (whitespace + newline) is preserved verbatim in the SQL accumulator
|
|
424
|
+
// so the round-tripped text matches the input.
|
|
425
|
+
const cont = tryQuoteContinue(input, i);
|
|
426
|
+
if (cont !== null) {
|
|
427
|
+
sql += input.slice(i, cont + 1);
|
|
428
|
+
i = cont + 1;
|
|
429
|
+
// Re-derive escape-string status from the new opening `'` position;
|
|
430
|
+
// each piece picks its own prefix per the lexical spec, so
|
|
431
|
+
// `E'a'\n'b'` keeps escape mode off for the second piece while
|
|
432
|
+
// `E'a'\nE'b'` keeps it on.
|
|
433
|
+
st.inEscapeString = isExtendedStringStart(input, cont);
|
|
434
|
+
continue;
|
|
435
|
+
}
|
|
436
|
+
st.inSingleQuote = false;
|
|
437
|
+
st.inEscapeString = false;
|
|
438
|
+
continue;
|
|
439
|
+
}
|
|
440
|
+
sql += c;
|
|
441
|
+
i++;
|
|
442
|
+
continue;
|
|
443
|
+
}
|
|
444
|
+
// --- Inside a double-quoted identifier. ---
|
|
445
|
+
if (st.inDoubleQuote) {
|
|
446
|
+
if (c === '"') {
|
|
447
|
+
if (input[i + 1] === '"') {
|
|
448
|
+
sql += '""';
|
|
449
|
+
i += 2;
|
|
450
|
+
continue;
|
|
451
|
+
}
|
|
452
|
+
sql += '"';
|
|
453
|
+
i++;
|
|
454
|
+
st.inDoubleQuote = false;
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
sql += c;
|
|
458
|
+
i++;
|
|
459
|
+
continue;
|
|
460
|
+
}
|
|
461
|
+
// --- Inside a dollar-quoted string. ---
|
|
462
|
+
if (st.dollarTag !== null) {
|
|
463
|
+
if (c === '$') {
|
|
464
|
+
const m = matchDollarDelim(input, i);
|
|
465
|
+
if (m !== null && m.tag === st.dollarTag) {
|
|
466
|
+
sql += input.slice(i, m.end);
|
|
467
|
+
i = m.end;
|
|
468
|
+
st.dollarTag = null;
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
// Either not a delim or a non-matching tag: consume just the $ and
|
|
472
|
+
// keep scanning. This matches upstream's `<xdolq>.` fallback which
|
|
473
|
+
// ECHOes the `$` and continues.
|
|
474
|
+
sql += '$';
|
|
475
|
+
i++;
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
sql += c;
|
|
479
|
+
i++;
|
|
480
|
+
continue;
|
|
481
|
+
}
|
|
482
|
+
// --- Top-level / INITIAL state. ---
|
|
483
|
+
// SINGLELINE (-S): a top-level newline is an implicit statement
|
|
484
|
+
// terminator, behaving exactly like `;`. Upstream's `MainLoop()` adds the
|
|
485
|
+
// implicit semicolon when `pset.singleline` is set; we apply it here so
|
|
486
|
+
// the boundary logic lives in one place. We're guaranteed to be at the
|
|
487
|
+
// INITIAL top level here (the quote / comment / paren / dollar-quote
|
|
488
|
+
// guards above already `continue`d), but still gate on `beginDepth === 0`
|
|
489
|
+
// so a newline inside a `CREATE FUNCTION … BEGIN … END` body doesn't split
|
|
490
|
+
// the surrounding statement — matching the `;` rule below. Only fire when
|
|
491
|
+
// the statement scanned so far carries non-whitespace SQL; a blank or
|
|
492
|
+
// whitespace-only line emits the newline and keeps scanning so it never
|
|
493
|
+
// dispatches an empty statement.
|
|
494
|
+
if (singleline &&
|
|
495
|
+
(c === '\n' || c === '\r') &&
|
|
496
|
+
st.beginDepth === 0 &&
|
|
497
|
+
sql.trim().length > 0) {
|
|
498
|
+
sql += c;
|
|
499
|
+
i++;
|
|
500
|
+
// Reset per-statement state, mirroring the `;` boundary below.
|
|
501
|
+
return {
|
|
502
|
+
kind: 'semicolon',
|
|
503
|
+
sql,
|
|
504
|
+
consumed: i,
|
|
505
|
+
nextState: initialScanState(),
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
// Block comment start.
|
|
509
|
+
if (c === '/' && input[i + 1] === '*') {
|
|
510
|
+
st.inBlockComment = 1;
|
|
511
|
+
sql += '/*';
|
|
512
|
+
i += 2;
|
|
513
|
+
continue;
|
|
514
|
+
}
|
|
515
|
+
// Line comment.
|
|
516
|
+
if (c === '-' && input[i + 1] === '-') {
|
|
517
|
+
const end = skipLineComment(input, i);
|
|
518
|
+
sql += input.slice(i, end);
|
|
519
|
+
i = end;
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
// Double-quoted identifier start (including u&"…" form which lexes as
|
|
523
|
+
// `u&` + `"…"` for boundary purposes).
|
|
524
|
+
if (c === '"') {
|
|
525
|
+
sql += '"';
|
|
526
|
+
i++;
|
|
527
|
+
st.inDoubleQuote = true;
|
|
528
|
+
continue;
|
|
529
|
+
}
|
|
530
|
+
// Single-quoted string start. Detect E'…' for escape-aware lex; bit/hex
|
|
531
|
+
// (B'…', X'…') and N'…' / U&'…' need no special handling for boundary
|
|
532
|
+
// detection — only the surrounding `'` matters.
|
|
533
|
+
if (c === "'") {
|
|
534
|
+
sql += "'";
|
|
535
|
+
i++;
|
|
536
|
+
st.inSingleQuote = true;
|
|
537
|
+
st.inEscapeString = isExtendedStringStart(input, i - 1);
|
|
538
|
+
continue;
|
|
539
|
+
}
|
|
540
|
+
// Dollar-quoted string start.
|
|
541
|
+
if (c === '$') {
|
|
542
|
+
const m = matchDollarDelim(input, i);
|
|
543
|
+
if (m !== null) {
|
|
544
|
+
sql += input.slice(i, m.end);
|
|
545
|
+
i = m.end;
|
|
546
|
+
st.dollarTag = m.tag;
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
// Lone `$` (e.g. param `$1` or just bare `$`): emit and continue.
|
|
550
|
+
sql += '$';
|
|
551
|
+
i++;
|
|
552
|
+
continue;
|
|
553
|
+
}
|
|
554
|
+
// Parentheses tracking.
|
|
555
|
+
if (c === '(') {
|
|
556
|
+
sql += '(';
|
|
557
|
+
i++;
|
|
558
|
+
st.parenDepth++;
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
if (c === ')') {
|
|
562
|
+
sql += ')';
|
|
563
|
+
i++;
|
|
564
|
+
if (st.parenDepth > 0)
|
|
565
|
+
st.parenDepth--;
|
|
566
|
+
continue;
|
|
567
|
+
}
|
|
568
|
+
// Top-level semicolon — boundary, but only when we are NOT inside a
|
|
569
|
+
// `BEGIN ... END` function body. Upstream `psqlscan.l` gates `LEXRES_SEMI`
|
|
570
|
+
// on `paren_depth == 0 && begin_depth == 0` so that semicolons separating
|
|
571
|
+
// statements inside a SQL function body (`CREATE FUNCTION f() ... BEGIN
|
|
572
|
+
// ATOMIC SELECT 1; SELECT 2; END;`) do not terminate the surrounding
|
|
573
|
+
// CREATE statement. The depth-gated case falls through to the catch-all,
|
|
574
|
+
// which emits the `;` and continues scanning.
|
|
575
|
+
if (c === ';' && st.parenDepth === 0 && st.beginDepth === 0) {
|
|
576
|
+
sql += ';';
|
|
577
|
+
i++;
|
|
578
|
+
// Reset per-statement state. (parenDepth, dollarTag, comment depths,
|
|
579
|
+
// quote flags, beginDepth, and identifier tracking are all zero here
|
|
580
|
+
// by construction — but use initialScanState() so future fields are
|
|
581
|
+
// wiped automatically.)
|
|
582
|
+
const next = initialScanState();
|
|
583
|
+
// The post-semicolon residue stays unread; the caller passes it back in
|
|
584
|
+
// on the next call. We do NOT continue scanning — upstream returns
|
|
585
|
+
// immediately on LEXRES_SEMI to let the mainloop dispatch.
|
|
586
|
+
// The residue *includes* anything after the `;` that the caller hasn't
|
|
587
|
+
// looked at yet; we hand that back inside `sql` only if we'd consumed
|
|
588
|
+
// it. Since we returned right after the `;`, sql ends in `;`.
|
|
589
|
+
return {
|
|
590
|
+
kind: 'semicolon',
|
|
591
|
+
sql,
|
|
592
|
+
consumed: i,
|
|
593
|
+
nextState: next,
|
|
594
|
+
};
|
|
595
|
+
}
|
|
596
|
+
// Backslash — always recognised as a backslash-command boundary at top
|
|
597
|
+
// level, regardless of whether the SQL buffer is empty.
|
|
598
|
+
//
|
|
599
|
+
// Upstream `psqlscan.l` recognises the boundary in the scanner; the
|
|
600
|
+
// mainloop decides whether the dispatched command CONSUMES the buffered
|
|
601
|
+
// SQL (`\g`, `\gx`, `\gset`, `\gexec`, `\gdesc`, `\crosstabview`,
|
|
602
|
+
// `\watch`, `\bind`) or leaves it intact (`\set`, `\echo`, `\!`, …). We
|
|
603
|
+
// mirror that split: the scanner ALWAYS returns `kind: 'backslash'` and
|
|
604
|
+
// hands back the buffered SQL alongside the command name + remaining
|
|
605
|
+
// args; the mainloop forwards it into `BackslashContext.queryBuf` and the
|
|
606
|
+
// command's `run()` is free to read or ignore it.
|
|
607
|
+
if (c === '\\') {
|
|
608
|
+
// Upstream special: `\;` and `\:` are forced into the query buffer (so a
|
|
609
|
+
// user can write `SELECT 1\;` to suppress immediate dispatch). We honour
|
|
610
|
+
// those by emitting just the second char and not breaking.
|
|
611
|
+
const nxt = input[i + 1];
|
|
612
|
+
if (nxt === ';' || nxt === ':') {
|
|
613
|
+
sql += nxt;
|
|
614
|
+
i += 2;
|
|
615
|
+
continue;
|
|
616
|
+
}
|
|
617
|
+
// True backslash command. Lex `\cmd` followed by the rest of the line.
|
|
618
|
+
// The slash arg lexer (WP-05) handles arg splitting; we only need to
|
|
619
|
+
// peel off the command name and hand the remainder over.
|
|
620
|
+
i++; // consume the `\`
|
|
621
|
+
// Command name: contiguous non-whitespace, non-`\` chars. Upstream
|
|
622
|
+
// also breaks on `;` here? No — see the `xslashcmd` state: it accepts
|
|
623
|
+
// ASCII letters + a few specials. We match alnum + a small set of
|
|
624
|
+
// standalone-cmd punctuation (`?`, `!`, `+`, etc.) which covers
|
|
625
|
+
// `\?`, `\!`, `\d+`, etc.
|
|
626
|
+
let cmdEnd = i;
|
|
627
|
+
// Allow a single non-alnum punctuation char like `?` or `!` to be the
|
|
628
|
+
// whole command name (matches `\?` and `\!`). Otherwise accept any
|
|
629
|
+
// run of identifier chars + `+` (which is the trailing modifier on
|
|
630
|
+
// `\d+`, `\dt+` etc.).
|
|
631
|
+
const first = input[i];
|
|
632
|
+
if (first !== undefined && /[A-Za-z]/.test(first)) {
|
|
633
|
+
// Backslash command names are ASCII alnum + `_` + `+` (the trailing
|
|
634
|
+
// modifier on `\d+`/`\dt+`). Underscore is required for psql's
|
|
635
|
+
// multi-word commands: `\lo_import`, `\lo_export`, `\lo_list`,
|
|
636
|
+
// `\lo_unlink`, `\bind_named`, `\close_prepared`.
|
|
637
|
+
while (cmdEnd < input.length && /[A-Za-z0-9_+]/.test(input[cmdEnd])) {
|
|
638
|
+
cmdEnd++;
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
else if (first !== undefined && /[?!|]/.test(first)) {
|
|
642
|
+
cmdEnd = i + 1;
|
|
643
|
+
}
|
|
644
|
+
else {
|
|
645
|
+
// Empty or strange char: treat as a zero-length command and let the
|
|
646
|
+
// dispatcher report "unknown command".
|
|
647
|
+
cmdEnd = i;
|
|
648
|
+
}
|
|
649
|
+
const cmd = input.slice(i, cmdEnd);
|
|
650
|
+
// Determine the command's argument-mode hint (if any). Upstream's
|
|
651
|
+
// psqlscanslash.l flips between `<xslasharg>` (default; breaks on `\`)
|
|
652
|
+
// and `<xslashwholeline>` (no `\` break) based on `option_type`. We
|
|
653
|
+
// recreate that decision here so commands declared with `argMode:
|
|
654
|
+
// 'whole-line'` (`\!`, `\sf`, `\sv`, `\copy`, `\help`, etc.) capture
|
|
655
|
+
// embedded `\else` / `\endif` etc. as plain argument text instead of
|
|
656
|
+
// having the scanner treat them as a new command boundary. Filepipe
|
|
657
|
+
// commands enter whole-line mode only when the first non-whitespace
|
|
658
|
+
// character is `|` — matching upstream's `<xslashargstart>` rule.
|
|
659
|
+
const argMode = slashCmdMode?.(cmd);
|
|
660
|
+
let consumeWholeLine = argMode === 'whole-line';
|
|
661
|
+
if (argMode === 'filepipe') {
|
|
662
|
+
// Skip leading whitespace inside the arg; if the next char is `|`,
|
|
663
|
+
// upstream switches into `<xslashwholeline>` for the rest of the line.
|
|
664
|
+
let p = cmdEnd;
|
|
665
|
+
while (p < input.length && (input[p] === ' ' || input[p] === '\t')) {
|
|
666
|
+
p++;
|
|
667
|
+
}
|
|
668
|
+
if (input[p] === '|')
|
|
669
|
+
consumeWholeLine = true;
|
|
670
|
+
}
|
|
671
|
+
// Rest of the line — up to a newline OR (for normal-mode commands)
|
|
672
|
+
// the next unquoted backslash command on the same line. Mirrors
|
|
673
|
+
// upstream `psqlscanslash.l`'s <xslasharg> exit-on-`\` rule: a second
|
|
674
|
+
// backslash on the same line STARTS a new slash command and
|
|
675
|
+
// terminates the previous one's arg list. We track minimal quote
|
|
676
|
+
// state (single, double, back) so backslashes inside arg quotes
|
|
677
|
+
// don't trigger the boundary.
|
|
678
|
+
//
|
|
679
|
+
// Upstream special: `\\` (two consecutive backslashes) is the
|
|
680
|
+
// "flush-and-continue" separator. It terminates the current command's
|
|
681
|
+
// args AND is itself consumed silently — the next iteration's input
|
|
682
|
+
// starts immediately after the `\\` pair. Without this, our scanner
|
|
683
|
+
// would surface the second `\` as a fresh empty-name backslash command
|
|
684
|
+
// (and the dispatcher would log "invalid command \"), spuriously
|
|
685
|
+
// doubling diagnostics for shapes like `\gset pref \\ \echo foo`.
|
|
686
|
+
// See psqlscanslash.l <xslasharg> rule for the upstream equivalent.
|
|
687
|
+
//
|
|
688
|
+
// Whole-line mode (`\!`, `\sf`, `\sv`, `\help`, `\copy`, and filepipe
|
|
689
|
+
// commands with a leading `|`) skips the `\` boundary entirely:
|
|
690
|
+
// upstream's `<xslashwholeline>` state only matches `{space}+` and
|
|
691
|
+
// `{other}`, with no rule for `\` — so `\` characters end up in the
|
|
692
|
+
// ECHO sink as plain argument text.
|
|
693
|
+
let restEnd = cmdEnd;
|
|
694
|
+
let inSingle = false;
|
|
695
|
+
let inDouble = false;
|
|
696
|
+
let inBack = false;
|
|
697
|
+
// Set true when we exit the loop via the `\\` separator branch. In that
|
|
698
|
+
// case `consumed` skips both backslashes so the next scan doesn't see
|
|
699
|
+
// the trailing `\` as a fresh empty-name backslash command.
|
|
700
|
+
let sawDoubleBackslashSeparator = false;
|
|
701
|
+
while (restEnd < input.length &&
|
|
702
|
+
input[restEnd] !== '\n' &&
|
|
703
|
+
input[restEnd] !== '\r') {
|
|
704
|
+
const ch = input[restEnd];
|
|
705
|
+
if (consumeWholeLine) {
|
|
706
|
+
// Whole-line / filepipe-pipe path: never break on `\`. Just walk
|
|
707
|
+
// to end of line so the entire tail lands in `rest`.
|
|
708
|
+
restEnd++;
|
|
709
|
+
continue;
|
|
710
|
+
}
|
|
711
|
+
if (inSingle) {
|
|
712
|
+
// C-style `\'` escape inside single quotes.
|
|
713
|
+
if (ch === '\\' && input[restEnd + 1] !== undefined) {
|
|
714
|
+
restEnd += 2;
|
|
715
|
+
continue;
|
|
716
|
+
}
|
|
717
|
+
if (ch === "'")
|
|
718
|
+
inSingle = false;
|
|
719
|
+
}
|
|
720
|
+
else if (inDouble) {
|
|
721
|
+
if (ch === '"')
|
|
722
|
+
inDouble = false;
|
|
723
|
+
}
|
|
724
|
+
else if (inBack) {
|
|
725
|
+
if (ch === '`')
|
|
726
|
+
inBack = false;
|
|
727
|
+
}
|
|
728
|
+
else {
|
|
729
|
+
if (ch === '\\') {
|
|
730
|
+
// `\\` = "flush and continue" separator: end this command's
|
|
731
|
+
// args HERE, but consume both backslashes so the next scan
|
|
732
|
+
// resumes immediately after the pair. A lone `\` is a regular
|
|
733
|
+
// next-slash-cmd boundary; we stop at it without consuming so
|
|
734
|
+
// the next iteration picks it up.
|
|
735
|
+
if (input[restEnd + 1] === '\\') {
|
|
736
|
+
sawDoubleBackslashSeparator = true;
|
|
737
|
+
}
|
|
738
|
+
break;
|
|
739
|
+
}
|
|
740
|
+
if (ch === "'")
|
|
741
|
+
inSingle = true;
|
|
742
|
+
else if (ch === '"')
|
|
743
|
+
inDouble = true;
|
|
744
|
+
else if (ch === '`')
|
|
745
|
+
inBack = true;
|
|
746
|
+
}
|
|
747
|
+
restEnd++;
|
|
748
|
+
}
|
|
749
|
+
const rest = input.slice(cmdEnd, restEnd);
|
|
750
|
+
const consumed = sawDoubleBackslashSeparator ? restEnd + 2 : restEnd;
|
|
751
|
+
// Note: we *don't* consume the newline; it's left for the next chunk
|
|
752
|
+
// so caller can see PROMPT1 reset cleanly. Upstream's
|
|
753
|
+
// `psql_scan_slash_command_end()` does eat the trailing newline, but
|
|
754
|
+
// doing so HERE would lose the inter-line `\n` separator when a
|
|
755
|
+
// non-buffer-consuming slash command (e.g. `\echo`) sits between two
|
|
756
|
+
// lines of a continuing multi-line query. We compensate elsewhere:
|
|
757
|
+
// the mainloop's `reset-buf` branch strips a residual leading `\n`
|
|
758
|
+
// from the working chunk so the next statement's queryBuf doesn't
|
|
759
|
+
// pick it up via the `eof` accumulation path.
|
|
760
|
+
return {
|
|
761
|
+
kind: 'backslash',
|
|
762
|
+
cmd,
|
|
763
|
+
rest,
|
|
764
|
+
sql,
|
|
765
|
+
consumed,
|
|
766
|
+
nextState: cloneState(st),
|
|
767
|
+
};
|
|
768
|
+
}
|
|
769
|
+
// Variable substitution (`:NAME`, `:'NAME'`, `:"NAME"`). Only fires at
|
|
770
|
+
// top level — not inside strings / dollar-quoted blocks / identifiers /
|
|
771
|
+
// comments, all of which are handled above this point.
|
|
772
|
+
//
|
|
773
|
+
// `::` (PostgreSQL cast operator): emit BOTH colons as a single unit so
|
|
774
|
+
// the second `:` doesn't get re-examined on the next iteration and
|
|
775
|
+
// wrongly interpreted as the start of a `:NAME` substitution. This is
|
|
776
|
+
// load-bearing for `'foo'::int`-style casts and matches upstream
|
|
777
|
+
// `psqlscan.l`, which absorbs the `::` via its `{op_chars}+` operator
|
|
778
|
+
// rule before the `:{variable}` rule can fire. We do not need to gate
|
|
779
|
+
// this on `varLookup` — without substitution the result is byte-
|
|
780
|
+
// identical to the catch-all path.
|
|
781
|
+
if (c === ':' && input[i + 1] === ':') {
|
|
782
|
+
sql += '::';
|
|
783
|
+
i += 2;
|
|
784
|
+
continue;
|
|
785
|
+
}
|
|
786
|
+
if (c === ':') {
|
|
787
|
+
const sub = tryConsumeVarSubstitution(input, i, varLookup);
|
|
788
|
+
if (sub !== null) {
|
|
789
|
+
sql += sub.text;
|
|
790
|
+
i = sub.end;
|
|
791
|
+
continue;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
// Identifier — lexed as a whole token so we can run upstream's
|
|
795
|
+
// `psqlscan.l` `{identifier}` rule that gates `BEGIN`/`CASE`/`END`
|
|
796
|
+
// depth tracking on a leading `CREATE [OR REPLACE] {FUNCTION|PROCEDURE}`
|
|
797
|
+
// signature. We consume `[A-Za-z_][A-Za-z0-9_]*` greedily; ASCII letters
|
|
798
|
+
// are sufficient for the keywords we recognise (upstream's `identifier`
|
|
799
|
+
// class is wider but the BEGIN-tracking logic only cares about lowercased
|
|
800
|
+
// letter prefixes of these specific keywords).
|
|
801
|
+
if (/[A-Za-z_]/.test(c)) {
|
|
802
|
+
let j = i + 1;
|
|
803
|
+
while (j < input.length && /[A-Za-z0-9_]/.test(input[j]))
|
|
804
|
+
j++;
|
|
805
|
+
const word = input.slice(i, j);
|
|
806
|
+
maybeTrackBeginEnd(st, word);
|
|
807
|
+
sql += word;
|
|
808
|
+
i = j;
|
|
809
|
+
continue;
|
|
810
|
+
}
|
|
811
|
+
// Anything else: just emit. This is the catch-all matching upstream's
|
|
812
|
+
// `{self}`, `{operator}`, `{numeric}`, `{other}` rules — none of which
|
|
813
|
+
// can change scanner state at the top level.
|
|
814
|
+
sql += c;
|
|
815
|
+
i++;
|
|
816
|
+
}
|
|
817
|
+
// --- End of input. ---
|
|
818
|
+
//
|
|
819
|
+
// Decide between 'incomplete' (still inside something) and 'eof' (clean
|
|
820
|
+
// break, but no semicolon was found in this chunk).
|
|
821
|
+
const hasOpenContext = st.inBlockComment > 0 ||
|
|
822
|
+
st.inSingleQuote ||
|
|
823
|
+
st.inDoubleQuote ||
|
|
824
|
+
st.dollarTag !== null ||
|
|
825
|
+
st.parenDepth > 0;
|
|
826
|
+
if (hasOpenContext) {
|
|
827
|
+
const promptStatus = computePromptStatus(st);
|
|
828
|
+
st.promptStatus = promptStatus;
|
|
829
|
+
return {
|
|
830
|
+
kind: 'incomplete',
|
|
831
|
+
sql,
|
|
832
|
+
consumed: i,
|
|
833
|
+
nextState: cloneState(st),
|
|
834
|
+
promptStatus,
|
|
835
|
+
};
|
|
836
|
+
}
|
|
837
|
+
// COPY-data handling is a mainloop concern, not a scanner concern.
|
|
838
|
+
// After a `COPY ... FROM STDIN` statement libpq returns PGRES_COPY_IN; the
|
|
839
|
+
// mainloop bypasses the scanner and forwards raw lines until `\.`. See the
|
|
840
|
+
// file header for the contract.
|
|
841
|
+
return {
|
|
842
|
+
kind: 'eof',
|
|
843
|
+
sql,
|
|
844
|
+
consumed: i,
|
|
845
|
+
nextState: cloneState(st),
|
|
846
|
+
};
|
|
847
|
+
};
|
|
848
|
+
/**
|
|
849
|
+
* Split a complete script into statement boundaries.
|
|
850
|
+
*
|
|
851
|
+
* Convenience wrapper for non-streaming inputs (e.g. `-c "SELECT 1; SELECT 2;"`,
|
|
852
|
+
* or whole-file `-f` runs). Returns one string per terminated statement, plus
|
|
853
|
+
* a trailing un-terminated residue iff non-empty (matching psql's behaviour:
|
|
854
|
+
* input that ends without `;` is still dispatched on EOF for `-c`/`-f`).
|
|
855
|
+
*
|
|
856
|
+
* Backslash commands appear in the result as `\cmd rest…` strings so the caller
|
|
857
|
+
* can dispatch them uniformly; this matches `psql -f script.sql` which mixes
|
|
858
|
+
* SQL and backslash commands in a single stream.
|
|
859
|
+
*
|
|
860
|
+
* The optional `varLookup` is forwarded to {@link scanSql} so callers that
|
|
861
|
+
* want variable expansion in `-c`/`-f` input get it. NOTE: the consumed-slice
|
|
862
|
+
* round-trip property (sum of returned strings === `input`) is preserved
|
|
863
|
+
* **only** when `varLookup` is omitted; substitution legitimately changes
|
|
864
|
+
* the byte content of each returned statement.
|
|
865
|
+
*/
|
|
866
|
+
export const splitStatements = (input, varLookup, slashCmdMode, options) => {
|
|
867
|
+
const out = [];
|
|
868
|
+
let remaining = input;
|
|
869
|
+
let state = initialScanState();
|
|
870
|
+
// Cap iterations defensively; any non-progressing scan would be a bug.
|
|
871
|
+
let safety = 0;
|
|
872
|
+
while (remaining.length > 0) {
|
|
873
|
+
if (++safety > input.length + 10)
|
|
874
|
+
break;
|
|
875
|
+
const r = scanSql(remaining, state, varLookup, slashCmdMode, options);
|
|
876
|
+
if (r.kind === 'semicolon') {
|
|
877
|
+
// When the caller requested variable substitution, the scanner has
|
|
878
|
+
// already applied it to `r.sql` — we must push the transformed text,
|
|
879
|
+
// not the raw input slice. Without substitution the slice and `r.sql`
|
|
880
|
+
// are identical except for `\;`-style backslash transforms; for the
|
|
881
|
+
// round-trip property we keep pushing the consumed slice in that case.
|
|
882
|
+
out.push(varLookup ? r.sql : remaining.slice(0, r.consumed));
|
|
883
|
+
remaining = remaining.slice(r.consumed);
|
|
884
|
+
state = r.nextState;
|
|
885
|
+
continue;
|
|
886
|
+
}
|
|
887
|
+
if (r.kind === 'backslash') {
|
|
888
|
+
// Emit the consumed input slice verbatim (covers any leading whitespace
|
|
889
|
+
// that scanSql skipped over before the `\`, plus `\cmd rest`). The
|
|
890
|
+
// slash-arg scanner will do its own variable expansion when the
|
|
891
|
+
// command body is parsed — we don't preprocess it here.
|
|
892
|
+
out.push(remaining.slice(0, r.consumed));
|
|
893
|
+
remaining = remaining.slice(r.consumed);
|
|
894
|
+
state = r.nextState;
|
|
895
|
+
continue;
|
|
896
|
+
}
|
|
897
|
+
if (r.kind === 'eof' || r.kind === 'incomplete') {
|
|
898
|
+
// No more boundaries in this input. Append residue if non-empty.
|
|
899
|
+
// Same as the semicolon branch: emit `r.sql` (with substitutions
|
|
900
|
+
// applied) when `varLookup` is set, otherwise pass the raw slice
|
|
901
|
+
// through for byte-identical round-tripping.
|
|
902
|
+
if (remaining.length > 0) {
|
|
903
|
+
out.push(varLookup ? r.sql : remaining);
|
|
904
|
+
}
|
|
905
|
+
remaining = '';
|
|
906
|
+
break;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
return out;
|
|
910
|
+
};
|