@blamejs/core 0.14.26 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +2 -2
  3. package/index.js +4 -0
  4. package/lib/agent-envelope-mac.js +104 -0
  5. package/lib/agent-event-bus.js +105 -4
  6. package/lib/agent-posture-chain.js +8 -42
  7. package/lib/ai-content-detect.js +9 -10
  8. package/lib/api-key.js +107 -74
  9. package/lib/atomic-file.js +62 -4
  10. package/lib/audit-chain.js +47 -11
  11. package/lib/audit-sign.js +77 -2
  12. package/lib/audit-tools.js +79 -51
  13. package/lib/audit.js +249 -123
  14. package/lib/auth/openid-federation.js +108 -47
  15. package/lib/backup/index.js +13 -10
  16. package/lib/break-glass.js +202 -144
  17. package/lib/cache.js +174 -105
  18. package/lib/chain-writer.js +38 -16
  19. package/lib/cli.js +19 -14
  20. package/lib/cluster-provider-db.js +130 -104
  21. package/lib/cluster-storage.js +119 -22
  22. package/lib/cluster.js +119 -71
  23. package/lib/compliance.js +169 -4
  24. package/lib/consent.js +73 -24
  25. package/lib/constants.js +16 -11
  26. package/lib/crypto-field.js +474 -92
  27. package/lib/db-declare-row-policy.js +35 -22
  28. package/lib/db-file-lifecycle.js +3 -2
  29. package/lib/db-query.js +497 -255
  30. package/lib/db-schema.js +209 -44
  31. package/lib/db.js +176 -95
  32. package/lib/error-page.js +14 -1
  33. package/lib/external-db-migrate.js +229 -139
  34. package/lib/external-db.js +25 -15
  35. package/lib/file-upload.js +52 -7
  36. package/lib/framework-error.js +14 -1
  37. package/lib/framework-files.js +73 -0
  38. package/lib/framework-schema.js +695 -394
  39. package/lib/gate-contract.js +649 -1
  40. package/lib/guard-agent-registry.js +26 -44
  41. package/lib/guard-all.js +1 -0
  42. package/lib/guard-auth.js +42 -112
  43. package/lib/guard-cidr.js +33 -154
  44. package/lib/guard-csv.js +46 -113
  45. package/lib/guard-domain.js +34 -157
  46. package/lib/guard-dsn.js +27 -43
  47. package/lib/guard-email.js +47 -69
  48. package/lib/guard-envelope.js +19 -32
  49. package/lib/guard-event-bus-payload.js +24 -42
  50. package/lib/guard-event-bus-topic.js +25 -43
  51. package/lib/guard-filename.js +42 -106
  52. package/lib/guard-graphql.js +42 -123
  53. package/lib/guard-html.js +53 -108
  54. package/lib/guard-idempotency-key.js +24 -42
  55. package/lib/guard-image.js +46 -103
  56. package/lib/guard-imap-command.js +18 -32
  57. package/lib/guard-jmap.js +16 -30
  58. package/lib/guard-json.js +38 -108
  59. package/lib/guard-jsonpath.js +38 -171
  60. package/lib/guard-jwt.js +49 -179
  61. package/lib/guard-list-id.js +25 -41
  62. package/lib/guard-list-unsubscribe.js +27 -43
  63. package/lib/guard-mail-compose.js +24 -42
  64. package/lib/guard-mail-move.js +26 -44
  65. package/lib/guard-mail-query.js +28 -46
  66. package/lib/guard-mail-reply.js +24 -42
  67. package/lib/guard-mail-sieve.js +24 -42
  68. package/lib/guard-managesieve-command.js +17 -31
  69. package/lib/guard-markdown.js +37 -104
  70. package/lib/guard-message-id.js +26 -45
  71. package/lib/guard-mime.js +39 -151
  72. package/lib/guard-oauth.js +54 -135
  73. package/lib/guard-pdf.js +45 -101
  74. package/lib/guard-pop3-command.js +21 -31
  75. package/lib/guard-posture-chain.js +24 -42
  76. package/lib/guard-regex.js +33 -107
  77. package/lib/guard-saga-config.js +24 -42
  78. package/lib/guard-shell.js +42 -172
  79. package/lib/guard-smtp-command.js +48 -54
  80. package/lib/guard-snapshot-envelope.js +24 -42
  81. package/lib/guard-sql.js +1491 -0
  82. package/lib/guard-stream-args.js +24 -43
  83. package/lib/guard-svg.js +47 -65
  84. package/lib/guard-template.js +35 -172
  85. package/lib/guard-tenant-id.js +26 -45
  86. package/lib/guard-time.js +32 -154
  87. package/lib/guard-trace-context.js +25 -44
  88. package/lib/guard-uuid.js +32 -153
  89. package/lib/guard-xml.js +38 -113
  90. package/lib/guard-yaml.js +51 -163
  91. package/lib/http-client.js +37 -9
  92. package/lib/inbox.js +120 -107
  93. package/lib/legal-hold.js +107 -50
  94. package/lib/log-stream-cloudwatch.js +47 -31
  95. package/lib/log-stream-otlp.js +32 -18
  96. package/lib/mail-crypto-smime.js +2 -6
  97. package/lib/mail-greylist.js +2 -6
  98. package/lib/mail-helo.js +2 -6
  99. package/lib/mail-journal.js +85 -64
  100. package/lib/mail-rbl.js +2 -6
  101. package/lib/mail-scan.js +2 -6
  102. package/lib/mail-server-jmap.js +117 -12
  103. package/lib/mail-spam-score.js +2 -6
  104. package/lib/mail-store.js +287 -154
  105. package/lib/middleware/body-parser.js +71 -25
  106. package/lib/middleware/csrf-protect.js +19 -8
  107. package/lib/middleware/fetch-metadata.js +17 -7
  108. package/lib/middleware/idempotency-key.js +54 -38
  109. package/lib/middleware/rate-limit.js +102 -32
  110. package/lib/middleware/security-headers.js +21 -5
  111. package/lib/migrations.js +108 -66
  112. package/lib/network-heartbeat.js +7 -0
  113. package/lib/nonce-store.js +31 -9
  114. package/lib/object-store/azure-blob-bucket-ops.js +9 -4
  115. package/lib/object-store/azure-blob.js +57 -3
  116. package/lib/object-store/sigv4.js +10 -0
  117. package/lib/observability.js +87 -0
  118. package/lib/otel-export.js +25 -1
  119. package/lib/outbox.js +136 -82
  120. package/lib/parsers/safe-xml.js +47 -7
  121. package/lib/pqc-agent.js +44 -0
  122. package/lib/pubsub-cluster.js +42 -20
  123. package/lib/queue-local.js +202 -139
  124. package/lib/queue-redis.js +9 -1
  125. package/lib/queue-sqs.js +6 -0
  126. package/lib/redact.js +68 -11
  127. package/lib/redis-client.js +160 -31
  128. package/lib/retention.js +82 -39
  129. package/lib/router.js +212 -5
  130. package/lib/safe-dns.js +29 -45
  131. package/lib/safe-ical.js +18 -33
  132. package/lib/safe-icap.js +27 -43
  133. package/lib/safe-sieve.js +21 -40
  134. package/lib/safe-sql.js +124 -3
  135. package/lib/safe-vcard.js +18 -33
  136. package/lib/scheduler.js +35 -12
  137. package/lib/seeders.js +122 -74
  138. package/lib/session-stores.js +42 -14
  139. package/lib/session.js +109 -72
  140. package/lib/sql.js +3885 -0
  141. package/lib/ssrf-guard.js +51 -4
  142. package/lib/static.js +177 -34
  143. package/lib/subject.js +55 -17
  144. package/lib/vault/index.js +3 -2
  145. package/lib/vault/passphrase-ops.js +3 -2
  146. package/lib/vault/rotate.js +104 -64
  147. package/lib/vendor-data.js +2 -0
  148. package/lib/websocket.js +35 -5
  149. package/package.json +1 -1
  150. package/sbom.cdx.json +6 -6
@@ -0,0 +1,1491 @@
1
+ "use strict";
2
+ /**
3
+ * @module b.guardSql
4
+ * @nav Guards
5
+ * @title Guard SQL
6
+ * @order 460
7
+ *
8
+ * @intro
9
+ * Raw-SQL content-safety primitive. Gates the residual SQL surface
10
+ * the `b.sql` builder cannot structurally protect — the operator
11
+ * escape hatches that take a SQL string verbatim: `whereRaw` /
12
+ * `setRaw` / `fromRaw` fragments, operator-supplied single-statement
13
+ * SQL, and migration scripts. Everything `b.sql` composes by
14
+ * construction (column-membership gate, `?`-placeholder binding,
15
+ * dialect-final quoting) is already injection-safe; this guard
16
+ * defends only the bytes a human handed the framework as opaque SQL.
17
+ *
18
+ * ## Tokenizer-first, never regex-over-raw
19
+ *
20
+ * Every detector runs on a NORMALIZED token stream, not the raw
21
+ * string. Naive regex over raw SQL is bypassable in three ways this
22
+ * guard closes:
23
+ *
24
+ * 1. Comment splitting — `LOAD/**` + `**`/`_FILE` reads as
25
+ * `LOAD_FILE` to MySQL but `LOAD` `_FILE` to a raw-regex scan.
26
+ * The normalizer strips comments and collapses the residue so
27
+ * the keyword detector sees the post-comment token.
28
+ * 2. String-literal smuggling — a keyword inside `'pg_read_file'`
29
+ * is data, not a call; the normalizer masks literal + dollar-
30
+ * quoted (`$tag$...$tag$`) spans so a detector never fires on
31
+ * bytes the engine treats as a value.
32
+ * 3. Encoding bypass — invalid / non-shortest UTF-8 lets a multi-
33
+ * byte sequence decode to an ASCII metacharacter past a byte-
34
+ * level filter (the libpq client-encoding class, CVE-2025-1094,
35
+ * CVSS 8.1, actively exploited via BeyondTrust, public PoC).
36
+ * The encoding gate refuses the bytes before any token scan.
37
+ *
38
+ * Pipeline: (1) encoding gate → (2) normalizer (comment strip +
39
+ * literal/dollar-quote mask + intra-keyword-comment collapse) →
40
+ * (3) keyword + structural detectors on the normalized stream.
41
+ *
42
+ * ## Context modes
43
+ *
44
+ * The same byte string means different things depending on where it
45
+ * was handed in, so the gate takes a `ctx.mode`:
46
+ *
47
+ * - `fragment` (default; `whereRaw` / `setRaw` / `fromRaw`) — the
48
+ * bytes must be a single value expression. A top-level `;`, any
49
+ * statement-introducing verb, an embedded string literal, or any
50
+ * dangerous token refuses. This is the strictest context because
51
+ * the fragment lands inside a query the framework built.
52
+ * - `operator-sql` — one complete statement. Stacked statements
53
+ * refuse; the verb may be any single read or write.
54
+ * - `migration` — a multi-statement DDL script. Multiple statements
55
+ * and comments are permitted (and audited); each statement is
56
+ * re-classified and only the DDL-verb allowlist (`CREATE` /
57
+ * `ALTER` / `CREATE INDEX` / `DROP`) plus reads pass. The OS-reach
58
+ * floor (file / exec / FDW / privilege-pivot / extension /
59
+ * attach) still refuses — a migration never needs `COPY ...
60
+ * PROGRAM` or `load_extension`.
61
+ *
62
+ * ## Universal refuse floor (every profile, like the always-throw
63
+ * classes in guard-filename)
64
+ *
65
+ * These classes refuse under every profile including `permissive` —
66
+ * they are structurally unambiguous OS-reach / data-exfiltration /
67
+ * statement-smuggling, and no profile downgrades them:
68
+ *
69
+ * - Stacked top-level `;` (a second statement past the first).
70
+ * - Comment smuggling — an unterminated `/*` and the MySQL
71
+ * executable-comment form `/*!...`.
72
+ * - Embedded string literal in `fragment` mode.
73
+ * - Postgres OS reach — `COPY ... PROGRAM`, `COPY TO/FROM <file>`,
74
+ * `lo_import` / `lo_export` / `lo_get` / `lo_put` / `loread` /
75
+ * `lowrite`, `pg_read_file` / `pg_read_binary_file` /
76
+ * `pg_ls_*` / `pg_stat_file`, adminpack `pg_file_write` /
77
+ * `pg_file_unlink` / `pg_file_rename`, `dblink*` /
78
+ * `postgres_fdw` / `CREATE SERVER` / `CREATE SUBSCRIPTION`,
79
+ * `CREATE EXTENSION`, `CREATE [OR REPLACE] FUNCTION ... LANGUAGE`
80
+ * (plperlu / plpython3u / c), `DO` blocks, `SET ROLE` / `SET
81
+ * SESSION AUTHORIZATION` / `SET search_path`, `ALTER SYSTEM`.
82
+ * - SQLite OS reach — `ATTACH` / `DETACH DATABASE`,
83
+ * `load_extension`, `PRAGMA writable_schema`, `PRAGMA
84
+ * trusted_schema=ON`, `PRAGMA key` / `PRAGMA rekey`,
85
+ * `fts3_tokenizer`, `writefile` / `readfile` / `edit`, writes to
86
+ * `sqlite_master` / `sqlite_*`.
87
+ * - MySQL OS reach — `LOAD_FILE`, `INTO OUTFILE` / `INTO DUMPFILE`,
88
+ * `LOAD DATA [LOCAL] INFILE`, `CREATE FUNCTION ... SONAME`,
89
+ * `sys_exec` / `sys_eval` / `do_system`, `SET GLOBAL` of a
90
+ * sensitive variable (`general_log` / `local_infile` /
91
+ * `log_bin_trust_function_creators` / `secure_file_priv`).
92
+ * - Cross-dialect — time-based blind probes (`SLEEP` / `pg_sleep` /
93
+ * `WAITFOR DELAY` / `BENCHMARK` / `GET_LOCK`) and a set-operation
94
+ * (`UNION` / `INTERSECT` / `EXCEPT`) inside a predicate fragment.
95
+ *
96
+ * ## Profiles
97
+ *
98
+ * `strict` (default for request-path `whereRaw`) refuses the whole
99
+ * floor plus non-UTF-8 plus schema-recon reads
100
+ * (`information_schema` / `performance_schema` / `mysql.` /
101
+ * `pg_catalog` writes). `balanced` refuses the RCE / file / exec /
102
+ * FDW / privilege-pivot / stacked / embedded-literal / comment /
103
+ * invalid-encoding classes and audits schema-recon + time-based.
104
+ * `permissive` audits the keyword families but STILL hard-refuses the
105
+ * stacked-statement, invalid-encoding, and irreducible OS-reach floor
106
+ * — the structurally-unambiguous classes never relax.
107
+ *
108
+ * ## Compliance postures + audit
109
+ *
110
+ * `hipaa` / `pci-dss` / `gdpr` / `soc2` all map to the `strict`
111
+ * floor. Every decision emits a signed audit entry (PCI-DSS 10.2 /
112
+ * SOC 2 CC7 evidence). Under `gdpr` the audited fragment body is
113
+ * replaced with a salted hash fingerprint — a raw `whereRaw`
114
+ * predicate may carry personal data, so the audit records a stable
115
+ * identifier without the plaintext.
116
+ *
117
+ * ## Threat grounding
118
+ *
119
+ * Encoding-bypass: CVE-2025-1094 (PostgreSQL libpq, CVSS 8.1, KEV /
120
+ * actively exploited via BeyondTrust, public PoC). SQLite memory
121
+ * corruption reachable from crafted SQL: CVE-2025-6965 (CVSS 9.8,
122
+ * active) — the connection-hardening notes pin `node:sqlite`
123
+ * >= 3.50.2. MySQL `LOCAL INFILE` client-side file read:
124
+ * CVE-2025-62611. Injection leading to compromise, CISA KEV:
125
+ * CVE-2025-25181. The file / exec / FDW / extension constructs this
126
+ * guard refuses are by-design-dangerous SQL features, not patchable
127
+ * product defects — the defense is refusing them at the raw-SQL
128
+ * boundary, never accepting them from operator-supplied SQL.
129
+ *
130
+ * Source file is pure ASCII; every attack character (dollar markers,
131
+ * multibyte encoding-bypass bytes, control bytes) is composed from
132
+ * numeric codepoints, never embedded as a literal.
133
+ *
134
+ * @card
135
+ * Raw-SQL content-safety primitive. Tokenizer-first defense for the
136
+ * `whereRaw` / operator-SQL / migration surface b.sql cannot guard by
137
+ * construction — refuses stacked statements, comment smuggling,
138
+ * invalid encoding (CVE-2025-1094), and the file / exec / FDW /
139
+ * privilege-pivot OS-reach floor across Postgres / SQLite / MySQL.
140
+ */
141
+
142
+ var gateContract = require("./gate-contract");
143
+ var safeSql = require("./safe-sql");
144
+ var C = require("./constants");
145
+ var bCrypto = require("./crypto");
146
+ var lazyRequire = require("./lazy-require");
147
+ var { GuardSqlError } = require("./framework-error");
148
+
149
+ var audit = lazyRequire(function () { return require("./audit"); });
150
+
151
+ var _err = GuardSqlError.factory;
152
+
153
+ // ---- Context modes ----
154
+
155
+ // fragment — whereRaw / setRaw / fromRaw value expression. Strictest.
156
+ // operator-sql — one complete statement.
157
+ // migration — multi-statement DDL script (each statement re-classified).
158
+ var CONTEXT_MODES = Object.freeze(["fragment", "operator-sql", "migration"]);
159
+ var DEFAULT_CONTEXT_MODE = "fragment";
160
+
161
+ // ---- Statement verbs that introduce a statement (refused inside a
162
+ // fragment, which must be a bare value expression). ----
163
+ var STATEMENT_VERBS = Object.freeze({
164
+ SELECT: true, INSERT: true, UPDATE: true, DELETE: true, MERGE: true,
165
+ UPSERT: true, REPLACE: true, CREATE: true, ALTER: true, DROP: true,
166
+ TRUNCATE: true, GRANT: true, REVOKE: true, WITH: true, VALUES: true,
167
+ TABLE: true, COPY: true, CALL: true, EXECUTE: true, DO: true,
168
+ ATTACH: true, DETACH: true, PRAGMA: true, SET: true, RESET: true,
169
+ BEGIN: true, COMMIT: true, ROLLBACK: true, SAVEPOINT: true, VACUUM: true,
170
+ ANALYZE: true, REINDEX: true, EXPLAIN: true, SHOW: true, USE: true,
171
+ DESCRIBE: true, LOAD: true,
172
+ });
173
+
174
+ // Statement verbs that are a floor refusal when they LEAD a statement,
175
+ // regardless of profile or context. A procedural-execution verb (DO
176
+ // anonymous block, CALL / EXECUTE a stored routine) runs code through
177
+ // the raw-SQL surface — operator SQL never legitimately does this, and
178
+ // the DO body is masked by the dollar-quote normalizer so a token-level
179
+ // detector cannot see into it. The leading-verb scan is the reliable
180
+ // catch for the whole class.
181
+ var LEADING_VERB_FLOOR = Object.freeze({
182
+ DO: true, CALL: true, EXECUTE: true,
183
+ });
184
+
185
+ // Migration DDL-verb allowlist — the only statement verbs a migration
186
+ // script may use (plus reads, classified separately). CREATE INDEX is a
187
+ // CREATE; the OS-reach floor still strips CREATE EXTENSION / SERVER /
188
+ // SUBSCRIPTION / FUNCTION-LANGUAGE out of the CREATE family.
189
+ var MIGRATION_DDL_VERBS = Object.freeze({
190
+ CREATE: true, ALTER: true, DROP: true, RENAME: true, COMMENT: true,
191
+ TRUNCATE: true,
192
+ });
193
+
194
+ // Read-class verbs a migration script may also run (a migration often
195
+ // SELECTs to backfill). Mirrors external-db's READ classification.
196
+ var MIGRATION_READ_VERBS = Object.freeze({
197
+ SELECT: true, VALUES: true, TABLE: true, WITH: true,
198
+ SET: true, RESET: true, BEGIN: true, START: true, COMMIT: true,
199
+ ROLLBACK: true, SAVEPOINT: true, RELEASE: true,
200
+ INSERT: true, UPDATE: true, DELETE: true, MERGE: true, UPSERT: true,
201
+ REPLACE: true,
202
+ });
203
+
204
+ // ---- Numeric codepoints for the attack/marker bytes (source stays
205
+ // pure ASCII — no attack character appears as a literal). ----
206
+ var CP_DOLLAR = 0x24; // $ — Postgres dollar-quote marker
207
+ var CP_SQUOTE = 0x27; // ' — string-literal quote
208
+ var CP_DQUOTE = 0x22; // " — double-quoted identifier
209
+ var CP_BACKTICK = 0x60; // ` — MySQL backtick identifier
210
+ var CP_SEMI = 0x3B; // ; — statement separator
211
+ var CP_HASH = 0x23; // # — MySQL line comment
212
+ var CP_BANG = 0x21; // ! — MySQL executable-comment marker
213
+ var CP_DASH = 0x2D; // - — line-comment lead char
214
+ var CP_SLASH = 0x2F; // / — block-comment lead char
215
+ var DOLLAR = String.fromCharCode(CP_DOLLAR);
216
+ var SQUOTE = String.fromCharCode(CP_SQUOTE);
217
+ var DQUOTE = String.fromCharCode(CP_DQUOTE);
218
+
219
+ // A single space the normalizer leaves where it removes a comment, so
220
+ // adjacent tokens don't fuse (`a/* */b` -> `a b`), while an intra-
221
+ // keyword comment in a token (`LOAD/**/_FILE`) is handled by the
222
+ // secondary collapse below.
223
+ var MASK_SPACE = " ";
224
+
225
+ // ---- Dangerous-construct detector table ----
226
+ //
227
+ // Each entry: { code, severity, kind, re, classes }. `re` runs against
228
+ // the NORMALIZED stream (comments stripped, literal/dollar-quote spans
229
+ // masked, lower/upper-insensitive). `classes` declares which families
230
+ // each profile decides on. Regexes are word-boundary + whitespace-
231
+ // tolerant; they never run on raw bytes.
232
+ //
233
+ // Family taxonomy (used by the profile decision):
234
+ // floor — refuse at EVERY profile (irreducible OS-reach /
235
+ // statement-smuggling).
236
+ // rce-file — file read/write, code exec, FDW, extension, priv-
237
+ // pivot. strict + balanced refuse; permissive audits
238
+ // (except the floor subset, which the floor list covers).
239
+ // recon — schema-enumeration reads. strict refuses; balanced /
240
+ // permissive audit.
241
+ // timing — time-based blind probes. strict refuses; balanced /
242
+ // permissive audit.
243
+ // exfil — UNION/INTERSECT/EXCEPT inside a predicate. floor in
244
+ // fragment mode (a value expression has no set op).
245
+
246
+ function _re(source) {
247
+ // Construct each detector regex from an ASCII source string so the
248
+ // source file embeds no attack-character literals. Case-insensitive;
249
+ // detectors are intentionally global-free (first match is enough).
250
+ return new RegExp(source, "i"); // allow:dynamic-regex — detector source is a compile-time ASCII literal table below
251
+ }
252
+
253
+ // \b word-boundary + optional whitespace/paren tolerance baked into
254
+ // each source string. `[\s]` spans the comment-collapsed single spaces.
255
+ var DETECTORS = [
256
+ // ---- Postgres OS reach ----
257
+ { code: "sql.copy-program", severity: "critical", kind: "copy-program",
258
+ family: "floor", dialect: "postgres",
259
+ re: _re("\\bCOPY\\b[\\s\\S]{0,4000}?\\bPROGRAM\\b"),
260
+ reason: "COPY ... PROGRAM executes a shell command (Postgres RCE)" },
261
+ { code: "sql.file-access", severity: "critical", kind: "copy-file",
262
+ family: "floor", dialect: "postgres",
263
+ re: _re("\\bCOPY\\b[\\s\\S]{0,4000}?\\b(?:TO|FROM)\\b\\s+(?!STDIN\\b|STDOUT\\b)"),
264
+ reason: "COPY TO/FROM <file> reads or writes a server-side file" },
265
+ { code: "sql.file-access", severity: "critical", kind: "large-object",
266
+ family: "floor", dialect: "postgres",
267
+ re: _re("\\b(?:lo_import|lo_export|lo_get|lo_put|loread|lowrite)\\s*\\("),
268
+ reason: "large-object file primitive (Postgres server-side file I/O)" },
269
+ { code: "sql.file-access", severity: "critical", kind: "pg-read-file",
270
+ family: "floor", dialect: "postgres",
271
+ re: _re("\\b(?:pg_read_file|pg_read_binary_file|pg_stat_file)\\s*\\("),
272
+ reason: "pg_read_file / pg_stat_file reads a server-side file" },
273
+ { code: "sql.file-access", severity: "critical", kind: "pg-ls",
274
+ family: "floor", dialect: "postgres",
275
+ re: _re("\\bpg_ls_[a-z_]+\\s*\\("),
276
+ reason: "pg_ls_* enumerates server-side directories" },
277
+ { code: "sql.file-access", severity: "critical", kind: "adminpack",
278
+ family: "floor", dialect: "postgres",
279
+ re: _re("\\bpg_file_(?:write|unlink|rename)\\s*\\("),
280
+ reason: "adminpack pg_file_* writes / renames / unlinks server files" },
281
+ { code: "sql.outbound-fdw", severity: "critical", kind: "dblink",
282
+ family: "floor", dialect: "postgres",
283
+ re: _re("\\bdblink[a-z_]*\\s*\\("),
284
+ reason: "dblink opens an outbound connection (data exfil / SSRF)" },
285
+ { code: "sql.outbound-fdw", severity: "critical", kind: "fdw",
286
+ family: "floor", dialect: "postgres",
287
+ re: _re("\\b(?:postgres_fdw|CREATE\\s+SERVER|CREATE\\s+SUBSCRIPTION)\\b"),
288
+ reason: "foreign-data-wrapper / subscription opens an outbound channel" },
289
+ { code: "sql.load-extension", severity: "critical", kind: "create-extension",
290
+ family: "floor", dialect: "postgres",
291
+ re: _re("\\bCREATE\\s+EXTENSION\\b"),
292
+ reason: "CREATE EXTENSION loads server-side code" },
293
+ { code: "sql.load-extension", severity: "critical", kind: "create-language-fn",
294
+ family: "floor", dialect: "postgres",
295
+ re: _re("\\bCREATE\\s+(?:OR\\s+REPLACE\\s+)?FUNCTION\\b[\\s\\S]{0,4000}?\\bLANGUAGE\\s+(?:plperlu|plpython3?u|c)\\b"),
296
+ reason: "CREATE FUNCTION in an untrusted procedural language (RCE)" },
297
+ { code: "sql.privilege-pivot", severity: "critical", kind: "do-block",
298
+ family: "floor", dialect: "postgres",
299
+ re: _re("\\bDO\\s+(?:LANGUAGE\\s+[a-z0-9_]+\\s+)?(?:\\$|'|\\bBEGIN\\b)"),
300
+ reason: "DO block runs an anonymous procedural-language body" },
301
+ { code: "sql.privilege-pivot", severity: "critical", kind: "set-role",
302
+ family: "floor", dialect: "postgres",
303
+ re: _re("\\bSET\\s+(?:LOCAL\\s+|SESSION\\s+)?ROLE\\b"),
304
+ reason: "SET ROLE pivots to another database role" },
305
+ { code: "sql.privilege-pivot", severity: "critical", kind: "set-session-auth",
306
+ family: "floor", dialect: "postgres",
307
+ re: _re("\\bSET\\s+SESSION\\s+AUTHORIZATION\\b"),
308
+ reason: "SET SESSION AUTHORIZATION pivots the session identity" },
309
+ { code: "sql.privilege-pivot", severity: "critical", kind: "set-search-path",
310
+ family: "floor", dialect: "postgres",
311
+ re: _re("\\bSET\\s+(?:LOCAL\\s+|SESSION\\s+)?search_path\\b"),
312
+ reason: "SET search_path redirects unqualified name resolution (hijack)" },
313
+ { code: "sql.privilege-pivot", severity: "critical", kind: "alter-system",
314
+ family: "floor", dialect: "postgres",
315
+ re: _re("\\bALTER\\s+SYSTEM\\b"),
316
+ reason: "ALTER SYSTEM rewrites server configuration" },
317
+
318
+ // ---- SQLite OS reach ----
319
+ { code: "sql.attach", severity: "critical", kind: "attach-db",
320
+ family: "floor", dialect: "sqlite",
321
+ re: _re("\\b(?:ATTACH|DETACH)\\s+(?:DATABASE\\b)?"),
322
+ reason: "ATTACH / DETACH DATABASE mounts an external database file" },
323
+ { code: "sql.load-extension", severity: "critical", kind: "sqlite-load-extension",
324
+ family: "floor", dialect: "sqlite",
325
+ re: _re("\\bload_extension\\s*\\("),
326
+ reason: "load_extension() loads a shared library (RCE)" },
327
+ { code: "sql.privilege-pivot", severity: "critical", kind: "writable-schema",
328
+ family: "floor", dialect: "sqlite",
329
+ re: _re("\\bPRAGMA\\s+writable_schema\\b"),
330
+ reason: "PRAGMA writable_schema lets a write corrupt the schema table" },
331
+ { code: "sql.privilege-pivot", severity: "critical", kind: "trusted-schema",
332
+ family: "floor", dialect: "sqlite",
333
+ re: _re("\\bPRAGMA\\s+trusted_schema\\s*=?\\s*(?:on|1|true)\\b"),
334
+ reason: "PRAGMA trusted_schema=ON re-enables unsafe schema functions" },
335
+ { code: "sql.privilege-pivot", severity: "critical", kind: "sqlite-key",
336
+ family: "floor", dialect: "sqlite",
337
+ re: _re("\\bPRAGMA\\s+(?:re)?key\\b"),
338
+ reason: "PRAGMA key / rekey changes the database encryption key" },
339
+ { code: "sql.file-access", severity: "critical", kind: "fts3-tokenizer",
340
+ family: "floor", dialect: "sqlite",
341
+ re: _re("\\bfts3_tokenizer\\s*\\("),
342
+ reason: "fts3_tokenizer() is a known SQLite memory-corruption vector" },
343
+ { code: "sql.file-access", severity: "critical", kind: "sqlite-fileio",
344
+ family: "floor", dialect: "sqlite",
345
+ re: _re("\\b(?:writefile|readfile|edit)\\s*\\("),
346
+ reason: "writefile / readfile / edit perform host file I/O" },
347
+ { code: "sql.privilege-pivot", severity: "critical", kind: "sqlite-master-write",
348
+ family: "floor", dialect: "sqlite",
349
+ re: _re("\\b(?:INSERT|UPDATE|DELETE|REPLACE)\\b[\\s\\S]{0,4000}?\\bsqlite_(?:master|schema|sequence|stat[0-9]?)\\b"),
350
+ reason: "write to sqlite_master / sqlite_* internal table" },
351
+
352
+ // ---- MySQL OS reach ----
353
+ { code: "sql.file-access", severity: "critical", kind: "mysql-load-file",
354
+ family: "floor", dialect: "mysql",
355
+ re: _re("\\bLOAD_FILE\\s*\\("),
356
+ reason: "LOAD_FILE() reads a server-side file" },
357
+ { code: "sql.file-access", severity: "critical", kind: "into-outfile",
358
+ family: "floor", dialect: "mysql",
359
+ re: _re("\\bINTO\\s+(?:OUTFILE|DUMPFILE)\\b"),
360
+ reason: "INTO OUTFILE / DUMPFILE writes a server-side file" },
361
+ { code: "sql.file-access", severity: "critical", kind: "load-data-infile",
362
+ family: "floor", dialect: "mysql",
363
+ re: _re("\\bLOAD\\s+DATA\\b(?:\\s+LOCAL)?\\s+INFILE\\b"),
364
+ reason: "LOAD DATA [LOCAL] INFILE reads a client / server file (CVE-2025-62611)" },
365
+ { code: "sql.load-extension", severity: "critical", kind: "create-fn-soname",
366
+ family: "floor", dialect: "mysql",
367
+ re: _re("\\bCREATE\\s+(?:AGGREGATE\\s+)?FUNCTION\\b[\\s\\S]{0,2000}?\\bSONAME\\b"),
368
+ reason: "CREATE FUNCTION ... SONAME loads a UDF shared library (RCE)" },
369
+ { code: "sql.privilege-pivot", severity: "critical", kind: "mysql-sys-exec",
370
+ family: "floor", dialect: "mysql",
371
+ re: _re("\\b(?:sys_exec|sys_eval|do_system)\\s*\\("),
372
+ reason: "sys_exec / sys_eval / do_system run an OS command (UDF RCE)" },
373
+ { code: "sql.privilege-pivot", severity: "critical", kind: "set-global-sensitive",
374
+ family: "floor", dialect: "mysql",
375
+ re: _re("\\bSET\\s+GLOBAL\\s+(?:general_log|local_infile|log_bin_trust_function_creators|secure_file_priv)\\b"),
376
+ reason: "SET GLOBAL of a sensitive variable enables file / log / UDF abuse" },
377
+
378
+ // ---- Cross-dialect timing probes ----
379
+ { code: "sql.time-dos", severity: "high", kind: "time-sleep",
380
+ family: "timing", dialect: "cross",
381
+ re: _re("\\b(?:SLEEP|pg_sleep|BENCHMARK|GET_LOCK)\\s*\\("),
382
+ reason: "time-based blind probe / DoS (SLEEP / pg_sleep / BENCHMARK / GET_LOCK)" },
383
+ { code: "sql.time-dos", severity: "high", kind: "time-waitfor",
384
+ family: "timing", dialect: "mssql",
385
+ re: _re("\\bWAITFOR\\s+DELAY\\b"),
386
+ reason: "WAITFOR DELAY time-based blind probe" },
387
+
388
+ // ---- Schema recon ----
389
+ { code: "sql.privilege-pivot", severity: "high", kind: "schema-recon",
390
+ family: "recon", dialect: "cross",
391
+ re: _re("\\b(?:information_schema|performance_schema|pg_catalog|sys)\\s*\\.|\\bmysql\\s*\\.\\s*[a-z_]+"),
392
+ reason: "schema / catalog enumeration (recon)" },
393
+ ];
394
+
395
+ // UNION/INTERSECT/EXCEPT set operation — handled as its own detector
396
+ // because it is the floor only in fragment mode (a value expression has
397
+ // no business carrying a set operation) and exfil-class otherwise.
398
+ var SETOP_RE = _re("\\b(?:UNION(?:\\s+ALL)?|INTERSECT|EXCEPT)\\b");
399
+
400
+ // ---- Profile presets ----
401
+ //
402
+ // Each profile declares the ACTION for every dangerous family:
403
+ // refuse — flip ok:false, action:refuse.
404
+ // audit — keep ok:true, surface the issue, emit audit.
405
+ // serve — ignore (no profile uses this for a dangerous family;
406
+ // a clean stream serves implicitly).
407
+ // The floor families are NEVER serve/audit at any profile — every
408
+ // profile sets them to "refuse" (encoded explicitly so the table is
409
+ // self-documenting and the validator can't silently drop one).
410
+
411
+ var PROFILES = Object.freeze({
412
+ strict: {
413
+ floor: "refuse",
414
+ rceFile: "refuse",
415
+ fdw: "refuse",
416
+ privPivot:"refuse",
417
+ stacked: "refuse",
418
+ comment: "refuse",
419
+ literal: "refuse",
420
+ encoding: "refuse",
421
+ recon: "refuse",
422
+ timing: "refuse",
423
+ setop: "refuse",
424
+ allowComments: false,
425
+ allowMultiStatement: false,
426
+ },
427
+ balanced: {
428
+ floor: "refuse",
429
+ rceFile: "refuse",
430
+ fdw: "refuse",
431
+ privPivot:"refuse",
432
+ stacked: "refuse",
433
+ comment: "refuse",
434
+ literal: "refuse",
435
+ encoding: "refuse",
436
+ recon: "audit",
437
+ timing: "audit",
438
+ setop: "audit",
439
+ allowComments: false,
440
+ allowMultiStatement: false,
441
+ },
442
+ permissive: {
443
+ // permissive audits the keyword families but STILL hard-refuses the
444
+ // structurally-unambiguous classes (floor + stacked + invalid
445
+ // encoding). The OS-reach floor never relaxes.
446
+ floor: "refuse",
447
+ rceFile: "audit",
448
+ fdw: "audit",
449
+ privPivot:"audit",
450
+ stacked: "refuse",
451
+ comment: "audit",
452
+ literal: "audit",
453
+ encoding: "refuse",
454
+ recon: "audit",
455
+ timing: "audit",
456
+ setop: "audit",
457
+ allowComments: true,
458
+ allowMultiStatement: false,
459
+ },
460
+ });
461
+
462
+ var DEFAULTS = Object.freeze(Object.assign({}, PROFILES.strict, {
463
+ mode: "enforce",
464
+ contextMode: DEFAULT_CONTEXT_MODE,
465
+ maxBytes: C.BYTES.bytes(1048576), // 1 MiB raw-SQL cap
466
+ maxRuntimeMs: C.TIME.seconds(5),
467
+ // gdprRedact controls whether the audited fragment body is replaced
468
+ // by a salted hash fingerprint (set by the gdpr posture overlay).
469
+ gdprRedact: false,
470
+ }));
471
+
472
+ // All four postures map to the strict floor — a regulated deployment
473
+ // gets the tightest raw-SQL gate regardless of which framework it cites.
474
+ // gdpr additionally redacts the fragment body in the audit trail
475
+ // (a whereRaw predicate may carry personal data).
476
+ var COMPLIANCE_POSTURES = Object.freeze({
477
+ hipaa: Object.assign({}, PROFILES.strict),
478
+ "pci-dss": Object.assign({}, PROFILES.strict),
479
+ gdpr: Object.assign({}, PROFILES.strict, { gdprRedact: true }),
480
+ soc2: Object.assign({}, PROFILES.strict),
481
+ });
482
+
483
+ // ---- Opts resolution ----
484
+
485
+ function _resolveOpts(opts) {
486
+ return gateContract.resolveProfileAndPosture(opts, {
487
+ profiles: PROFILES,
488
+ compliancePostures: COMPLIANCE_POSTURES,
489
+ defaults: DEFAULTS,
490
+ errorClass: GuardSqlError,
491
+ errCodePrefix: "sql",
492
+ });
493
+ }
494
+
495
+ function _resolveContextMode(opts, ctxMode) {
496
+ var mode = ctxMode || (opts && opts.contextMode) || DEFAULT_CONTEXT_MODE;
497
+ if (CONTEXT_MODES.indexOf(mode) === -1) {
498
+ throw _err("sql.bad-opt",
499
+ "guardSql: contextMode must be one of " + CONTEXT_MODES.join("/") +
500
+ ", got " + JSON.stringify(mode));
501
+ }
502
+ return mode;
503
+ }
504
+
505
+ // ---- Stage 1: encoding gate ----
506
+ //
507
+ // Reject bytes that fail UTF-8 validation (the libpq client-encoding
508
+ // bypass class, CVE-2025-1094) and bytes that decode to a valid string
509
+ // but contain a non-shortest / surrogate / invalid-continuation
510
+ // sequence. A byte sequence that round-trips through Buffer.toString
511
+ // with a replacement char (U+FFFD) lost information — refuse rather
512
+ // than scan a lossy decode. High-bit bytes are refused in the ASCII SQL
513
+ // context: a SQL keyword / metacharacter is always 7-bit, so any
514
+ // high-bit byte outside a (masked) literal is either an encoding-bypass
515
+ // attempt or belongs inside a string the operator should bind, not
516
+ // embed.
517
+
518
+ function _encodingIssue(input) {
519
+ var buf;
520
+ if (Buffer.isBuffer(input)) {
521
+ buf = input;
522
+ } else if (typeof input === "string") {
523
+ buf = Buffer.from(input, "utf8");
524
+ } else {
525
+ // Refuse non-string / non-Buffer explicitly — never String()-coerce a
526
+ // number/object/null into bytes (that silently fabricates input and
527
+ // hides a caller-shape bug). The guard inspects SQL TEXT only.
528
+ return _bad("input is not a string or Buffer");
529
+ }
530
+
531
+ // Non-shortest / invalid lead+continuation scan (mirrors the
532
+ // guard-filename overlong-UTF-8 byte walk, extended to the full
533
+ // invalid-sequence class the libpq bypass relies on).
534
+ for (var i = 0; i < buf.length; i += 1) {
535
+ var b0 = buf[i];
536
+ if (b0 < 0x80) continue; // ASCII — always fine
537
+ // 0xC0 / 0xC1 can only encode an overlong ASCII byte; 0xF5..0xFF
538
+ // are above the Unicode max code point. Both are always invalid.
539
+ if (b0 === 0xC0 || b0 === 0xC1 || b0 >= 0xF5) {
540
+ return _bad("non-shortest / out-of-range UTF-8 lead byte 0x" + b0.toString(16));
541
+ }
542
+ var need, lo, hi;
543
+ if (b0 >= 0xF0) { need = 3; lo = (b0 === 0xF0) ? 0x90 : 0x80; hi = (b0 === 0xF4) ? 0x8F : 0xBF; }
544
+ else if (b0 >= 0xE0) { need = 2; lo = (b0 === 0xE0) ? 0xA0 : ((b0 === 0xED) ? 0x80 : 0x80); hi = (b0 === 0xED) ? 0x9F : 0xBF; }
545
+ else if (b0 >= 0xC2) { need = 1; lo = 0x80; hi = 0xBF; }
546
+ else { return _bad("stray continuation byte 0x" + b0.toString(16)); }
547
+ if (i + need >= buf.length) return _bad("truncated multibyte UTF-8 sequence");
548
+ // First continuation byte has the range bounds (catches non-shortest
549
+ // E0/F0 and the surrogate range ED); the rest are plain 0x80..0xBF.
550
+ var c1 = buf[i + 1];
551
+ if (c1 < lo || c1 > hi) return _bad("invalid UTF-8 continuation byte 0x" + c1.toString(16));
552
+ for (var k = 2; k <= need; k += 1) {
553
+ var ck = buf[i + k];
554
+ if (ck < 0x80 || ck > 0xBF) return _bad("invalid UTF-8 continuation byte 0x" + ck.toString(16));
555
+ }
556
+ i += need;
557
+ }
558
+
559
+ // Replacement-char belt-and-suspenders: a clean decode never emits
560
+ // U+FFFD unless the operator literally typed one (rare in SQL); the
561
+ // byte walk above is the authoritative check, this just catches a
562
+ // decode that lost information through some path the walk missed.
563
+ // The marker codepoint is composed numerically so the source file
564
+ // embeds no non-ASCII attack/marker character as a literal.
565
+ var REPLACEMENT_CHAR = String.fromCharCode(0xFFFD);
566
+ var decoded = buf.toString("utf8");
567
+ if (decoded.indexOf(REPLACEMENT_CHAR) !== -1) {
568
+ return _bad("decoded SQL contains the Unicode replacement character (lossy decode)");
569
+ }
570
+ return null;
571
+
572
+ function _bad(detail) {
573
+ return {
574
+ code: "sql.invalid-encoding", severity: "critical",
575
+ kind: "invalid-encoding", ruleId: "sql.invalid-encoding",
576
+ snippet: "SQL bytes fail UTF-8 validation (" + detail +
577
+ ") — encoding-bypass defense (CVE-2025-1094 class)",
578
+ };
579
+ }
580
+ }
581
+
582
+ // ---- Stage 2: normalizer ----
583
+ //
584
+ // Produce a token stream the detectors run on:
585
+ // - Strip -- line comments, # MySQL line comments, /* */ block
586
+ // comments. An unterminated /* and the executable /*! form are
587
+ // flagged before stripping (returned as signals).
588
+ // - Mask string-literal ('...'), double-quoted-identifier ("..."),
589
+ // backtick-identifier (`...`), and Postgres dollar-quote
590
+ // ($tag$...$tag$) spans to spaces so a keyword inside data never
591
+ // fires a detector.
592
+ // - Collapse an intra-keyword comment: `LOAD/**/_FILE` -> `LOAD_FILE`
593
+ // by removing the comment with NO separating space when both
594
+ // neighbors are identifier characters (so the engine's own token
595
+ // fusion is reproduced for the detector).
596
+ //
597
+ // Returns { normalized, signals } where signals carries the comment /
598
+ // literal flags the detectors / floor need.
599
+
600
+ function _isIdentByte(ch) {
601
+ return (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") ||
602
+ (ch >= "0" && ch <= "9") || ch === "_" || ch === DOLLAR;
603
+ }
604
+
605
+ function _normalize(text) {
606
+ var n = text.length;
607
+ var out = [];
608
+ var signals = {
609
+ hadComment: false,
610
+ hadExecutableComment: false,
611
+ unterminatedComment: false,
612
+ hadLiteral: false,
613
+ unterminatedLiteral: false,
614
+ };
615
+ var i = 0;
616
+ while (i < n) {
617
+ var ch = text.charAt(i);
618
+ var cc = text.charCodeAt(i);
619
+ var next = i + 1 < n ? text.charAt(i + 1) : "";
620
+
621
+ // -- line comment.
622
+ if (cc === CP_DASH && next === "-") {
623
+ signals.hadComment = true;
624
+ _collapseOrSpace(out, text, i);
625
+ var nl = text.indexOf("\n", i + 2);
626
+ i = nl === -1 ? n : nl + 1;
627
+ continue;
628
+ }
629
+ // # MySQL line comment.
630
+ if (cc === CP_HASH) {
631
+ signals.hadComment = true;
632
+ _collapseOrSpace(out, text, i);
633
+ var nl2 = text.indexOf("\n", i + 1);
634
+ i = nl2 === -1 ? n : nl2 + 1;
635
+ continue;
636
+ }
637
+ // /* block comment (incl. executable /*! ... */).
638
+ if (cc === CP_SLASH && next === "*") {
639
+ signals.hadComment = true;
640
+ if (i + 2 < n && text.charCodeAt(i + 2) === CP_BANG) {
641
+ signals.hadExecutableComment = true;
642
+ }
643
+ var end = text.indexOf("*/", i + 2);
644
+ if (end === -1) {
645
+ signals.unterminatedComment = true;
646
+ i = n; // consume rest; floor refuses on the signal
647
+ continue;
648
+ }
649
+ _collapseOrSpace(out, text, i, end + 2);
650
+ i = end + 2;
651
+ continue;
652
+ }
653
+ // ' string literal — mask body to spaces.
654
+ if (cc === CP_SQUOTE) {
655
+ signals.hadLiteral = true;
656
+ out.push(MASK_SPACE);
657
+ i += 1;
658
+ while (i < n) {
659
+ if (text.charCodeAt(i) === CP_SQUOTE) {
660
+ if (i + 1 < n && text.charCodeAt(i + 1) === CP_SQUOTE) { // '' escaped quote
661
+ out.push(MASK_SPACE); out.push(MASK_SPACE); i += 2; continue;
662
+ }
663
+ out.push(MASK_SPACE); i += 1; break;
664
+ }
665
+ out.push(MASK_SPACE); i += 1;
666
+ if (i >= n) { signals.unterminatedLiteral = true; }
667
+ }
668
+ continue;
669
+ }
670
+ // " double-quoted identifier — mask to spaces (identifier, not a
671
+ // keyword; masking prevents a keyword-shaped column name firing).
672
+ if (cc === CP_DQUOTE) {
673
+ out.push(MASK_SPACE);
674
+ i += 1;
675
+ while (i < n) {
676
+ if (text.charCodeAt(i) === CP_DQUOTE) {
677
+ if (i + 1 < n && text.charCodeAt(i + 1) === CP_DQUOTE) {
678
+ out.push(MASK_SPACE); out.push(MASK_SPACE); i += 2; continue;
679
+ }
680
+ out.push(MASK_SPACE); i += 1; break;
681
+ }
682
+ out.push(MASK_SPACE); i += 1;
683
+ }
684
+ continue;
685
+ }
686
+ // ` backtick identifier (MySQL) — mask to spaces.
687
+ if (cc === CP_BACKTICK) {
688
+ out.push(MASK_SPACE);
689
+ i += 1;
690
+ while (i < n) {
691
+ if (text.charCodeAt(i) === CP_BACKTICK) { out.push(MASK_SPACE); i += 1; break; }
692
+ out.push(MASK_SPACE); i += 1;
693
+ }
694
+ continue;
695
+ }
696
+ // $tag$ dollar-quote — mask body. A bare $N placeholder ($1, $$) is
697
+ // NOT a dollar-quote unless a closing $tag$ exists.
698
+ if (cc === CP_DOLLAR) {
699
+ var tagEnd = i + 1;
700
+ while (tagEnd < n && _isWordByte(text.charAt(tagEnd))) tagEnd += 1;
701
+ if (tagEnd < n && text.charCodeAt(tagEnd) === CP_DOLLAR) {
702
+ var tag = text.slice(i, tagEnd + 1);
703
+ var closeTag = text.indexOf(tag, tagEnd + 1);
704
+ if (closeTag === -1) {
705
+ // Unterminated dollar-quote — treat as an unterminated literal
706
+ // (floor refuses).
707
+ signals.hadLiteral = true;
708
+ signals.unterminatedLiteral = true;
709
+ i = n;
710
+ continue;
711
+ }
712
+ signals.hadLiteral = true;
713
+ var maskLen = (closeTag + tag.length) - i;
714
+ for (var m = 0; m < maskLen; m += 1) out.push(MASK_SPACE);
715
+ i = closeTag + tag.length;
716
+ continue;
717
+ }
718
+ // Bare $ (placeholder marker / stray) — keep it so detectors that
719
+ // care about $ (DO $$) still see it; it's a single ASCII byte.
720
+ out.push(ch);
721
+ i += 1;
722
+ continue;
723
+ }
724
+ out.push(ch);
725
+ i += 1;
726
+ }
727
+ return { normalized: out.join(""), signals: signals };
728
+ }
729
+
730
+ // Word byte for the dollar-quote tag (letters / digits / underscore,
731
+ // NOT the dollar itself).
732
+ function _isWordByte(ch) {
733
+ return (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") ||
734
+ (ch >= "0" && ch <= "9") || ch === "_";
735
+ }
736
+
737
+ // When removing a comment, fuse the neighbors with NO space if both
738
+ // sides are identifier bytes (reproduce the engine's token fusion:
739
+ // `LOAD/**/_FILE` -> `LOAD_FILE`), otherwise emit a single space so
740
+ // unrelated tokens don't merge. `end` defaults to one-past for line
741
+ // comments where the caller advances separately.
742
+ function _collapseOrSpace(out, text, start, end) {
743
+ var prev = out.length > 0 ? out[out.length - 1] : "";
744
+ var prevByte = prev.length > 0 ? prev.charAt(prev.length - 1) : "";
745
+ var afterIdx = typeof end === "number" ? end : start; // line comments: neighbor checked at strip site
746
+ var nextByte = afterIdx < text.length ? text.charAt(afterIdx) : "";
747
+ if (prevByte && nextByte && _isIdentByte(prevByte) && _isIdentByte(nextByte)) {
748
+ // Fuse — emit nothing (the two identifier runs join).
749
+ return;
750
+ }
751
+ out.push(MASK_SPACE);
752
+ }
753
+
754
+ // ---- Stage 3: structural scans on the normalized stream ----
755
+
756
+ // Top-level statement-separator scan: count `;` that are NOT the final
757
+ // trailing terminator. The normalizer already masked literals/comments,
758
+ // so every `;` in the normalized stream is a real separator.
759
+ function _stackedStatementIssue(normalized) {
760
+ var n = normalized.length;
761
+ var firstSemi = -1;
762
+ for (var i = 0; i < n; i += 1) {
763
+ if (normalized.charCodeAt(i) !== CP_SEMI) continue;
764
+ firstSemi = i;
765
+ break;
766
+ }
767
+ if (firstSemi === -1) return null;
768
+ // Anything other than whitespace after the first top-level `;` is a
769
+ // second statement.
770
+ for (var j = firstSemi + 1; j < n; j += 1) {
771
+ var ch = normalized.charAt(j);
772
+ if (ch === " " || ch === "\t" || ch === "\r" || ch === "\n") continue;
773
+ return {
774
+ code: "sql.stacked", severity: "critical", kind: "stacked-statement",
775
+ ruleId: "sql.stacked",
776
+ snippet: "stacked statement after top-level ';' — only one statement permitted",
777
+ };
778
+ }
779
+ return null;
780
+ }
781
+
782
+ // Leading verb of the normalized stream (skips leading whitespace).
783
+ function _leadingVerb(normalized) {
784
+ var n = normalized.length;
785
+ var i = 0;
786
+ while (i < n) {
787
+ var ch = normalized.charAt(i);
788
+ if (ch === " " || ch === "\t" || ch === "\r" || ch === "\n" ||
789
+ ch === "(" ) { i += 1; continue; }
790
+ break;
791
+ }
792
+ var start = i;
793
+ while (i < n && _isWordByte(normalized.charAt(i))) i += 1;
794
+ return normalized.slice(start, i).toUpperCase();
795
+ }
796
+
797
+ // Split the normalized stream into top-level statements on `;`. Used by
798
+ // migration mode to re-classify each statement. The normalizer already
799
+ // masked literals/comments so splitting on `;` is safe.
800
+ function _splitStatements(normalized) {
801
+ var parts = [];
802
+ var n = normalized.length;
803
+ var start = 0;
804
+ for (var i = 0; i < n; i += 1) {
805
+ if (normalized.charCodeAt(i) === CP_SEMI) {
806
+ parts.push(normalized.slice(start, i));
807
+ start = i + 1;
808
+ }
809
+ }
810
+ if (start < n) parts.push(normalized.slice(start));
811
+ return parts.filter(function (s) { return s.trim().length > 0; });
812
+ }
813
+
814
+ // ---- Detector application ----
815
+
816
+ function _profileActionFor(family, profile) {
817
+ switch (family) {
818
+ case "floor": return profile.floor;
819
+ case "rce-file": return profile.rceFile;
820
+ case "fdw": return profile.fdw;
821
+ case "recon": return profile.recon;
822
+ case "timing": return profile.timing;
823
+ case "exfil": return profile.setop;
824
+ default: return "refuse";
825
+ }
826
+ }
827
+
828
+ // Map a detector's declared family onto the profile decision. The floor
829
+ // family is always refuse; the rce-file / fdw / priv-pivot constructs
830
+ // listed under "floor" stay floor (irreducible), while the recon /
831
+ // timing families soften per profile.
832
+ function _decideDetector(det, profile) {
833
+ // The DETECTORS table marks the irreducible OS-reach constructs
834
+ // family:"floor" directly, so this honors that. recon / timing have
835
+ // their own family.
836
+ var fam = det.family;
837
+ if (fam === "floor") return profile.floor; // always "refuse"
838
+ if (fam === "recon") return profile.recon;
839
+ if (fam === "timing") return profile.timing;
840
+ return profile.privPivot; // defensive default
841
+ }
842
+
843
+ function _runDetectors(normalized, profile) {
844
+ var issues = [];
845
+ for (var i = 0; i < DETECTORS.length; i += 1) {
846
+ var det = DETECTORS[i];
847
+ if (det.re.test(normalized)) {
848
+ var action = _decideDetector(det, profile);
849
+ issues.push({
850
+ code: det.code,
851
+ kind: det.kind,
852
+ ruleId: det.code,
853
+ severity: action === "refuse" ? det.severity : "warn",
854
+ action: action,
855
+ dialect: det.dialect,
856
+ snippet: det.reason,
857
+ });
858
+ }
859
+ }
860
+ return issues;
861
+ }
862
+
863
+ // ---- Core inspection ----
864
+ //
865
+ // Runs the three stages and returns a structured issues array. Pure —
866
+ // never throws on input shape (the gate / validate wrappers decide how
867
+ // to surface refusals).
868
+
869
+ function _inspect(input, opts, contextMode) {
870
+ var issues = [];
871
+
872
+ // Stage 1 — encoding gate (on raw bytes, before any decode-dependent
873
+ // scan). Always refuse a bad encoding regardless of profile when the
874
+ // profile sets encoding:"refuse" (every shipped profile does).
875
+ var encIssue = _encodingIssue(input);
876
+ if (encIssue) {
877
+ // Invalid encoding is structurally unambiguous — refuse at every
878
+ // profile (encoding:"refuse" in strict/balanced/permissive).
879
+ issues.push(encIssue);
880
+ // A lossy/invalid decode can't be safely scanned further; return now.
881
+ return issues;
882
+ }
883
+
884
+ var text = Buffer.isBuffer(input) ? input.toString("utf8") : String(input);
885
+
886
+ // Size cap (post-decode char length is a fine proxy; the byte cap is
887
+ // the operator-facing number).
888
+ var byteLen = Buffer.byteLength(text, "utf8");
889
+ if (opts.maxBytes && byteLen > opts.maxBytes) {
890
+ issues.push({
891
+ code: "sql.refuse", severity: "high", kind: "oversize",
892
+ ruleId: "sql.oversize",
893
+ snippet: "raw SQL " + byteLen + " bytes exceeds maxBytes " + opts.maxBytes,
894
+ });
895
+ }
896
+
897
+ // Stage 2 — normalize.
898
+ var norm = _normalize(text);
899
+ var normalized = norm.normalized;
900
+ var sig = norm.signals;
901
+
902
+ // Comment-smuggling floor: unterminated /* and executable /*! refuse
903
+ // at every profile.
904
+ if (sig.unterminatedComment) {
905
+ issues.push({
906
+ code: "sql.stacked", severity: "critical", kind: "unterminated-comment",
907
+ ruleId: "sql.unterminated-comment",
908
+ snippet: "unterminated /* block comment (comment-smuggling defense)",
909
+ });
910
+ }
911
+ if (sig.hadExecutableComment) {
912
+ issues.push({
913
+ code: "sql.stacked", severity: "critical", kind: "executable-comment",
914
+ ruleId: "sql.executable-comment",
915
+ snippet: "MySQL executable comment /*! ... */ (version-gated injection vector)",
916
+ });
917
+ }
918
+ if (sig.unterminatedLiteral) {
919
+ issues.push({
920
+ code: "sql.embedded-literal", severity: "critical", kind: "unterminated-literal",
921
+ ruleId: "sql.unterminated-literal",
922
+ snippet: "unterminated string literal / dollar-quote",
923
+ });
924
+ }
925
+ // Ordinary comments under strict/balanced (comment:"refuse") refuse;
926
+ // permissive (comment:"audit") + migration (allowComments) surface
927
+ // as a warn.
928
+ if (sig.hadComment && !sig.unterminatedComment && !sig.hadExecutableComment) {
929
+ var commentAction = (contextMode === "migration" || opts.allowComments)
930
+ ? "audit" : opts.comment;
931
+ if (commentAction === "refuse") {
932
+ issues.push({
933
+ code: "sql.stacked", severity: "critical", kind: "comment",
934
+ ruleId: "sql.comment",
935
+ snippet: "SQL comment in raw fragment (comment-smuggling surface)",
936
+ });
937
+ } else {
938
+ issues.push({
939
+ code: "sql.stacked", severity: "warn", kind: "comment", action: "audit",
940
+ ruleId: "sql.comment",
941
+ snippet: "SQL comment present (audited)",
942
+ });
943
+ }
944
+ }
945
+
946
+ // Quoted-identifier hygiene — a double-quoted identifier carrying a
947
+ // newline or leading backslash is the CVE-2025-8715 class (a crafted
948
+ // identifier breaks out of a downstream psql / pg_dump restore line).
949
+ // Each quoted-identifier span delegates to safeSql.validateIdentifier
950
+ // so the refusal shares the framework's single identifier-shape
951
+ // authority rather than a second copy of the rules.
952
+ issues.push.apply(issues, _identifierHygieneIssues(text));
953
+
954
+ // Stage 3 — structural + keyword detectors on the normalized stream.
955
+
956
+ // Stacked statements — floor under every profile EXCEPT migration
957
+ // (which permits multiple statements but still classifies each).
958
+ if (contextMode !== "migration") {
959
+ var stackedIssue = _stackedStatementIssue(normalized);
960
+ if (stackedIssue) issues.push(stackedIssue);
961
+ }
962
+
963
+ // Keyword detectors (file / exec / fdw / priv-pivot / recon / timing).
964
+ issues.push.apply(issues, _runDetectors(normalized, opts));
965
+
966
+ // Leading-verb floor — a statement that LEADS with a procedural-
967
+ // execution verb (DO anonymous block, CALL / EXECUTE a routine) is a
968
+ // floor refusal at every profile: operator SQL never legitimately
969
+ // runs an anonymous code body or invokes a stored routine through the
970
+ // raw-SQL surface, and the DO body is masked by the dollar-quote
971
+ // normalizer so a token detector can't see into it. Checked per
972
+ // top-level statement so a migration's second `CALL` is caught too.
973
+ var verbStmts = (contextMode === "migration")
974
+ ? _splitStatements(normalized) : [normalized];
975
+ for (var vi = 0; vi < verbStmts.length; vi += 1) {
976
+ var lv = _leadingVerb(verbStmts[vi]);
977
+ if (LEADING_VERB_FLOOR[lv] === true) {
978
+ issues.push({
979
+ code: "sql.privilege-pivot", severity: "critical", kind: "procedural-exec",
980
+ ruleId: "sql.procedural-exec",
981
+ snippet: "statement leads with procedural-execution verb " +
982
+ JSON.stringify(lv) + " (DO / CALL / EXECUTE run code / routines)",
983
+ });
984
+ }
985
+ }
986
+
987
+ // Set-operation handling — floor in fragment mode, exfil-family
988
+ // (profile-decided) otherwise.
989
+ if (SETOP_RE.test(normalized)) { // allow:regex-no-length-cap - normalized bounded by opts.maxBytes (1 MiB) at entry; SETOP_RE is a linear alternation
990
+ if (contextMode === "fragment") {
991
+ issues.push({
992
+ code: "sql.union-exfil", severity: "critical", kind: "setop-in-fragment",
993
+ ruleId: "sql.union-exfil",
994
+ snippet: "UNION / INTERSECT / EXCEPT inside a value-expression fragment (exfil shape)",
995
+ });
996
+ } else {
997
+ var setopAction = opts.setop;
998
+ issues.push({
999
+ code: "sql.union-exfil",
1000
+ severity: setopAction === "refuse" ? "high" : "warn",
1001
+ action: setopAction,
1002
+ kind: "setop", ruleId: "sql.union-exfil",
1003
+ snippet: "set operation (UNION / INTERSECT / EXCEPT)",
1004
+ });
1005
+ }
1006
+ }
1007
+
1008
+ // Context-mode structural rules.
1009
+ if (contextMode === "fragment") {
1010
+ _inspectFragment(text, normalized, opts, issues);
1011
+ } else if (contextMode === "operator-sql") {
1012
+ _inspectOperatorSql(normalized, issues);
1013
+ } else if (contextMode === "migration") {
1014
+ _inspectMigration(normalized, opts, issues);
1015
+ }
1016
+
1017
+ return issues;
1018
+ }
1019
+
1020
+ // Fragment mode — the bytes must be a bare value expression: no
1021
+ // statement-introducing verb, no embedded string literal (delegated to
1022
+ // the db-query raw-scanner shape via _assertNoEmbeddedLiteral), no
1023
+ // top-level semicolon (already covered by the stacked scan).
1024
+ function _inspectFragment(rawText, normalized, opts, issues) {
1025
+ var verb = _leadingVerb(normalized);
1026
+ if (verb && STATEMENT_VERBS[verb] === true) {
1027
+ issues.push({
1028
+ code: "sql.refuse", severity: "critical", kind: "verb-in-fragment",
1029
+ ruleId: "sql.verb-in-fragment",
1030
+ snippet: "statement verb " + JSON.stringify(verb) +
1031
+ " in a value-expression fragment (whereRaw must be an expression)",
1032
+ });
1033
+ }
1034
+ // Embedded string literal — a fragment is a STATIC template; every
1035
+ // value binds through a ? placeholder. An embedded '...' is the
1036
+ // signature of operator input concatenated into the builder (CWE-89).
1037
+ // Operators with a deliberate static literal pass allowLiterals.
1038
+ if (!opts.allowLiterals && _hasEmbeddedStringLiteral(rawText)) {
1039
+ issues.push({
1040
+ code: "sql.embedded-literal", severity: "critical", kind: "embedded-literal",
1041
+ ruleId: "sql.embedded-literal",
1042
+ snippet: "raw fragment embeds a string literal ('...') — bind every value " +
1043
+ "with a ? placeholder, or pass allowLiterals:true for a static literal",
1044
+ });
1045
+ }
1046
+ }
1047
+
1048
+ // operator-sql mode — exactly one statement (stacked scan already
1049
+ // enforces single-statement). No additional structural rule beyond the
1050
+ // floor + keyword detectors; classification is informational here.
1051
+ function _inspectOperatorSql(normalized, issues) {
1052
+ var verb = _leadingVerb(normalized);
1053
+ // No verb at all (empty / parenthesized-only) is not inherently
1054
+ // unsafe, but an unresolvable statement on the operator path is
1055
+ // surfaced as info so the audit trail is complete.
1056
+ if (!verb) {
1057
+ issues.push({
1058
+ code: "sql.refuse", severity: "warn", kind: "no-verb", action: "audit",
1059
+ ruleId: "sql.no-verb",
1060
+ snippet: "operator-sql has no resolvable leading verb",
1061
+ });
1062
+ }
1063
+ }
1064
+
1065
+ // migration mode — multiple statements + comments permitted (audited);
1066
+ // each statement re-classified. Only DDL verbs + reads pass; an
1067
+ // unmapped / write-with-side-effect verb that is not a plain DML or DDL
1068
+ // refuses. The OS-reach floor was already applied across the whole
1069
+ // stream by the keyword detectors.
1070
+ function _inspectMigration(normalized, opts, issues) {
1071
+ var statements = _splitStatements(normalized);
1072
+ for (var i = 0; i < statements.length; i += 1) {
1073
+ var verb = _leadingVerb(statements[i]);
1074
+ if (!verb) continue; // blank / comment-only fragment
1075
+ if (MIGRATION_DDL_VERBS[verb] === true) continue; // allowed DDL
1076
+ if (MIGRATION_READ_VERBS[verb] === true) continue; // read / DML / tx
1077
+ // CREATE INDEX is a CREATE (already allowed); a verb outside both
1078
+ // allowlists in a migration is refused (e.g. ATTACH, PRAGMA,
1079
+ // CALL, DO, COPY, GRANT, SET ROLE — most are also floor-caught,
1080
+ // this catches the residue).
1081
+ issues.push({
1082
+ code: "sql.refuse", severity: "critical", kind: "migration-verb",
1083
+ ruleId: "sql.migration-verb",
1084
+ snippet: "statement verb " + JSON.stringify(verb) +
1085
+ " not in the migration DDL allowlist (CREATE / ALTER / DROP / reads)",
1086
+ });
1087
+ }
1088
+ }
1089
+
1090
+ // Embedded-string-literal scan — the db-query `_assertRawNoStringLiteral`
1091
+ // shape (quote / comment-aware), returning a boolean instead of
1092
+ // throwing so the fragment inspector can fold it into the issues array.
1093
+ // Shares the scanning shape with the db-query raw-scanner; both refuse a
1094
+ // single-quoted literal in a fragment that should be a static template.
1095
+ function _hasEmbeddedStringLiteral(sql) {
1096
+ var i = 0;
1097
+ var len = sql.length;
1098
+ while (i < len) {
1099
+ var ch = sql.charAt(i);
1100
+ var next = i + 1 < len ? sql.charAt(i + 1) : "";
1101
+ if (ch === DQUOTE) {
1102
+ i += 1;
1103
+ while (i < len) {
1104
+ if (sql.charAt(i) === DQUOTE) {
1105
+ if (sql.charAt(i + 1) === DQUOTE) { i += 2; continue; }
1106
+ i += 1; break;
1107
+ }
1108
+ i += 1;
1109
+ }
1110
+ continue;
1111
+ }
1112
+ if (ch === "-" && next === "-") {
1113
+ while (i < len && sql.charAt(i) !== "\n") i += 1;
1114
+ continue;
1115
+ }
1116
+ if (ch === "/" && next === "*") {
1117
+ i += 2;
1118
+ while (i < len && !(sql.charAt(i) === "*" && sql.charAt(i + 1) === "/")) i += 1;
1119
+ i += 2;
1120
+ continue;
1121
+ }
1122
+ if (ch === SQUOTE) return true;
1123
+ i += 1;
1124
+ }
1125
+ return false;
1126
+ }
1127
+
1128
+ // Quoted-identifier hygiene scan — pull every double-quoted identifier
1129
+ // span and refuse the CVE-2025-8715 class: a newline / carriage return
1130
+ // (breaks out of a downstream psql restore line) or a leading backslash
1131
+ // (escape-sequence smuggling). A null byte in an identifier is also
1132
+ // refused. Returns an issues array. Spans are extracted with the same
1133
+ // quote-aware walk the normalizer uses, so a `"` inside a string
1134
+ // literal is never mistaken for an identifier delimiter.
1135
+ function _identifierHygieneIssues(sql) {
1136
+ var issues = [];
1137
+ var i = 0;
1138
+ var len = sql.length;
1139
+ while (i < len) {
1140
+ var ch = sql.charCodeAt(i);
1141
+ // Skip string literals so a `"` inside '...' is data, not an ident
1142
+ // delimiter.
1143
+ if (ch === CP_SQUOTE) {
1144
+ i += 1;
1145
+ while (i < len) {
1146
+ if (sql.charCodeAt(i) === CP_SQUOTE) {
1147
+ if (i + 1 < len && sql.charCodeAt(i + 1) === CP_SQUOTE) { i += 2; continue; }
1148
+ i += 1; break;
1149
+ }
1150
+ i += 1;
1151
+ }
1152
+ continue;
1153
+ }
1154
+ if (ch === CP_DQUOTE) {
1155
+ var start = i + 1;
1156
+ var j = start;
1157
+ while (j < len) {
1158
+ if (sql.charCodeAt(j) === CP_DQUOTE) {
1159
+ if (j + 1 < len && sql.charCodeAt(j + 1) === CP_DQUOTE) { j += 2; continue; }
1160
+ break;
1161
+ }
1162
+ j += 1;
1163
+ }
1164
+ var ident = sql.slice(start, j);
1165
+ var bad = _identifierHazard(ident);
1166
+ if (bad) {
1167
+ issues.push({
1168
+ code: "sql.refuse", severity: "critical", kind: "identifier-hazard",
1169
+ ruleId: "sql.identifier-hazard",
1170
+ snippet: "quoted identifier " + bad + " (CVE-2025-8715 class)",
1171
+ });
1172
+ }
1173
+ i = j + 1;
1174
+ continue;
1175
+ }
1176
+ i += 1;
1177
+ }
1178
+ return issues;
1179
+ }
1180
+
1181
+ // Classify a quoted-identifier body. Newline / CR / leading backslash /
1182
+ // null byte are the CVE-2025-8715 break-out shapes. The framework's
1183
+ // identifier-shape authority (safeSql.validateIdentifier) is the source
1184
+ // of truth for what a clean identifier is — a body that contains a
1185
+ // control byte fails it, so this routes the control-byte verdict
1186
+ // through safeSql rather than maintaining a second copy of the rule.
1187
+ function _identifierHazard(ident) {
1188
+ if (ident.indexOf("\n") !== -1 || ident.indexOf("\r") !== -1) {
1189
+ return "contains a newline";
1190
+ }
1191
+ if (ident.charAt(0) === "\\") {
1192
+ return "starts with a backslash";
1193
+ }
1194
+ // Any C0 control / DEL byte in a quoted identifier is a break-out
1195
+ // hazard. The null byte is the sharpest case (it terminates a C
1196
+ // string in a downstream client), so call it out explicitly; the
1197
+ // rest are reported by code point.
1198
+ for (var k = 0; k < ident.length; k += 1) {
1199
+ var c = ident.charCodeAt(k);
1200
+ if (c < 0x20 || c === 0x7f) {
1201
+ return c === 0 ? "contains a null byte"
1202
+ : "contains a control byte 0x" + c.toString(16);
1203
+ }
1204
+ }
1205
+ // Delegate the residual identifier-shape verdict to the framework's
1206
+ // single identifier authority. A quoted identifier may legitimately
1207
+ // be a reserved word or contain spaces / mixed case, so allow those;
1208
+ // safeSql still refuses an over-length name or one with an embedded
1209
+ // null, keeping one source of truth for the shape rules rather than a
1210
+ // second copy here. A thrown SafeSqlError means the identifier is the
1211
+ // CVE-2025-8715 break-out shape under safeSql's rules.
1212
+ try {
1213
+ safeSql.validateIdentifier(ident, { allowReserved: true, allowSqliteInternal: true });
1214
+ } catch (e) {
1215
+ if (e instanceof safeSql.SafeSqlError &&
1216
+ (e.code === "sql/too-long" || e.code === "sql/null-byte")) {
1217
+ return "rejected by safeSql.validateIdentifier (" + e.code + ")";
1218
+ }
1219
+ // Other safeSql verdicts (bad-shape from a legitimate dotted /
1220
+ // spaced / punctuated quoted identifier) are NOT a break-out hazard
1221
+ // — a quoted identifier deliberately escapes the bare-identifier
1222
+ // shape. Only the control-byte + over-length + null classes above
1223
+ // are the CVE break-out surface.
1224
+ }
1225
+ return null;
1226
+ }
1227
+
1228
+ // ---- Public surface ----
1229
+
1230
+ /**
1231
+ * @primitive b.guardSql.validate
1232
+ * @signature b.guardSql.validate(input, opts?)
1233
+ * @since 0.14.29
1234
+ * @status stable
1235
+ * @compliance hipaa, pci-dss, gdpr, soc2
1236
+ * @related b.guardSql.gate, b.guardSql.sanitize, b.safeSql.validateIdentifier
1237
+ *
1238
+ * Inspect a raw SQL string or Buffer and return `{ ok, issues }`. Each
1239
+ * issue carries `{ code, kind, ruleId, severity, snippet }` with
1240
+ * severity in `"warn"|"high"|"critical"`. `ok` is `true` only when no
1241
+ * issue is `high` or `critical`. Pure inspection — never throws on input.
1242
+ *
1243
+ * The inspection runs three stages: a UTF-8 encoding gate (defends the
1244
+ * libpq client-encoding bypass class, CVE-2025-1094), a comment-and-
1245
+ * literal normalizer, and keyword + structural detectors on the
1246
+ * normalized stream. The detected classes are stacked statements,
1247
+ * comment smuggling, embedded string literals (fragment mode), the
1248
+ * Postgres / SQLite / MySQL file / exec / FDW / extension / privilege-
1249
+ * pivot constructs, time-based probes, schema recon, and set operations
1250
+ * inside a predicate.
1251
+ *
1252
+ * @opts
1253
+ * profile: "strict"|"balanced"|"permissive",
1254
+ * compliancePosture: "hipaa"|"pci-dss"|"gdpr"|"soc2",
1255
+ * contextMode: "fragment"|"operator-sql"|"migration", // default "fragment"
1256
+ * allowLiterals: boolean, // permit a static '...' literal in a fragment
1257
+ * maxBytes: number, // raw-SQL byte cap (default 1 MiB)
1258
+ *
1259
+ * @example
1260
+ * var rv = b.guardSql.validate("id = ? AND tenant = ?", { profile: "strict" });
1261
+ * rv.ok; // → true
1262
+ *
1263
+ * var bad = b.guardSql.validate("1; DROP TABLE users", { profile: "strict" });
1264
+ * bad.ok; // → false
1265
+ * bad.issues.some(function (i) { return i.kind === "stacked-statement"; }); // → true
1266
+ */
1267
+ function validate(input, opts) {
1268
+ opts = _resolveOpts(opts);
1269
+ var contextMode = _resolveContextMode(opts);
1270
+ var bad = gateContract.badInputResultIfNotStringOrBuffer(input);
1271
+ if (bad) return bad;
1272
+ return gateContract.aggregateIssues(_inspect(input, opts, contextMode));
1273
+ }
1274
+
1275
+ /**
1276
+ * @primitive b.guardSql.sanitize
1277
+ * @signature b.guardSql.sanitize(input, opts?)
1278
+ * @since 0.14.29
1279
+ * @status stable
1280
+ * @related b.guardSql.validate, b.guardSql.gate
1281
+ *
1282
+ * Return the comment-stripped, literal-masked NORMALIZED form of a raw
1283
+ * SQL string — the internal representation the detectors run on, not a
1284
+ * "made-safe" query. Hostile SQL is unrepairable: there is no
1285
+ * transform that turns `COPY ... PROGRAM` or a stacked `;DROP` into a
1286
+ * safe statement, so `sanitize` never serves its output as a query.
1287
+ * Throws `GuardSqlError` when the input refuses under the resolved
1288
+ * profile (invalid encoding, the OS-reach floor, stacked statements),
1289
+ * mirroring the entries-class guards whose hostile input has no
1290
+ * sanitize action.
1291
+ *
1292
+ * Use it to inspect what the tokenizer saw (debugging a false-positive
1293
+ * detector, building a redacted audit fingerprint) — not to feed the
1294
+ * result back to a driver.
1295
+ *
1296
+ * @opts
1297
+ * profile: "strict"|"balanced"|"permissive",
1298
+ * compliancePosture: "hipaa"|"pci-dss"|"gdpr"|"soc2",
1299
+ * contextMode: "fragment"|"operator-sql"|"migration",
1300
+ *
1301
+ * @example
1302
+ * var normalized = b.guardSql.sanitize(
1303
+ * "id = ? -- note\n AND active = ?",
1304
+ * { profile: "permissive" });
1305
+ * // → "id = ? AND active = ?" (comment stripped)
1306
+ *
1307
+ * try {
1308
+ * b.guardSql.sanitize("SELECT pg_read_file('/etc/passwd')");
1309
+ * } catch (e) {
1310
+ * e.code; // → "sql.file-access"
1311
+ * }
1312
+ */
1313
+ function sanitize(input, opts) {
1314
+ opts = _resolveOpts(opts);
1315
+ var contextMode = _resolveContextMode(opts);
1316
+ if (typeof input !== "string" && !Buffer.isBuffer(input)) {
1317
+ throw _err("sql.bad-input", "sanitize requires string or Buffer input");
1318
+ }
1319
+ var issues = _inspect(input, opts, contextMode);
1320
+ var refusal = _firstRefusal(issues);
1321
+ if (refusal) {
1322
+ throw _err(refusal.code, "guardSql.sanitize: " + refusal.snippet);
1323
+ }
1324
+ var text = Buffer.isBuffer(input) ? input.toString("utf8") : String(input);
1325
+ return _normalize(text).normalized;
1326
+ }
1327
+
1328
+ // Return the first issue that flips the result to refuse (critical /
1329
+ // high severity, or an explicit action:"refuse"), or null.
1330
+ function _firstRefusal(issues) {
1331
+ for (var i = 0; i < issues.length; i += 1) {
1332
+ var it = issues[i];
1333
+ if (it.action === "audit") continue;
1334
+ if (it.severity === "critical" || it.severity === "high") return it;
1335
+ }
1336
+ return null;
1337
+ }
1338
+
1339
+ /**
1340
+ * @primitive b.guardSql.gate
1341
+ * @signature b.guardSql.gate(opts?)
1342
+ * @since 0.14.29
1343
+ * @status stable
1344
+ * @compliance hipaa, pci-dss, gdpr, soc2
1345
+ * @related b.guardSql.validate, b.guardSql.sanitize, b.gateContract.buildGuardGate
1346
+ *
1347
+ * Build a `b.gateContract` gate that consumes `ctx.sql` (or
1348
+ * `ctx.bytes`). Action chain: `serve` (no SQL or clean) → `audit-only`
1349
+ * (warn-level issues, every reject-class off) → `refuse` (any
1350
+ * critical / high issue, or an explicit refuse action). There is no
1351
+ * `sanitize` action — hostile SQL is unrepairable, so a refusal is the
1352
+ * only safe non-serve outcome. The gate honors `ctx.mode` (one of the
1353
+ * context modes) over the opts default, so one gate instance can guard
1354
+ * a `fragment` whereRaw and an `operator-sql` path with the right
1355
+ * strictness per call.
1356
+ *
1357
+ * Every decision emits a signed audit entry; under the `gdpr` posture
1358
+ * the audited SQL is replaced with a salted hash fingerprint (a
1359
+ * `whereRaw` predicate may carry personal data).
1360
+ *
1361
+ * @opts
1362
+ * profile: "strict"|"balanced"|"permissive",
1363
+ * compliancePosture: "hipaa"|"pci-dss"|"gdpr"|"soc2",
1364
+ * contextMode: "fragment"|"operator-sql"|"migration",
1365
+ * name: string, // gate identity for audit / observability
1366
+ *
1367
+ * @example
1368
+ * var sqlGate = b.guardSql.gate({ profile: "strict" });
1369
+ * var verdict = await sqlGate.check({ sql: "id = ?", mode: "fragment" });
1370
+ * verdict.action; // → "serve"
1371
+ *
1372
+ * var blocked = await sqlGate.check({ sql: "1; DROP TABLE users" });
1373
+ * blocked.action; // → "refuse"
1374
+ */
1375
+ function gate(opts) {
1376
+ opts = _resolveOpts(opts);
1377
+ return gateContract.buildGuardGate(
1378
+ opts.name || "guardSql:" + (opts.profile || "default"),
1379
+ opts,
1380
+ async function (ctx) {
1381
+ var sql = ctx && (ctx.sql || ctx.bytes || "");
1382
+ if (!sql || (typeof sql !== "string" && !Buffer.isBuffer(sql))) {
1383
+ return { ok: true, action: "serve" };
1384
+ }
1385
+ var contextMode = _resolveContextMode(opts, ctx && ctx.mode);
1386
+ var issues = _inspect(sql, opts, contextMode);
1387
+
1388
+ // Signed audit on every decision (PCI-DSS 10.2 / SOC 2 CC7). Under
1389
+ // gdpr the SQL body is replaced with a salted hash fingerprint.
1390
+ _emitDecisionAudit(sql, issues, opts, contextMode, ctx);
1391
+
1392
+ var refusal = _firstRefusal(issues);
1393
+ if (refusal) {
1394
+ return { ok: false, action: "refuse", issues: issues };
1395
+ }
1396
+ if (issues.length > 0) {
1397
+ return { ok: true, action: "audit-only", issues: issues };
1398
+ }
1399
+ return { ok: true, action: "serve" };
1400
+ });
1401
+ }
1402
+
1403
+ // Emit a signed audit entry for a gate decision. Drop-silent inside the
1404
+ // try/catch — an audit-sink failure must never crash the request whose
1405
+ // SQL triggered it (hot-path observability discipline).
1406
+ function _emitDecisionAudit(sql, issues, opts, contextMode, ctx) {
1407
+ try {
1408
+ var refused = _firstRefusal(issues) !== null;
1409
+ var text = Buffer.isBuffer(sql) ? sql.toString("utf8") : String(sql);
1410
+ var body = opts.gdprRedact ? _fingerprint(text) : _truncateForAudit(text);
1411
+ audit().safeEmit({
1412
+ action: "guardSql.gate." + (refused ? "refused" : (issues.length > 0 ? "audited" : "served")),
1413
+ actor: ctx && ctx.actor,
1414
+ outcome: refused ? "denied" : "success",
1415
+ metadata: {
1416
+ contextMode: contextMode,
1417
+ profile: opts.profile || "strict",
1418
+ route: ctx && ctx.route,
1419
+ sql: body,
1420
+ sqlRedacted: !!opts.gdprRedact,
1421
+ issues: gateContract.summarizeIssues(issues),
1422
+ },
1423
+ });
1424
+ } catch (_e) { /* drop-silent — audit sinks must never crash the producer */ }
1425
+ }
1426
+
1427
+ // Salted hash fingerprint of a SQL body for the gdpr audit path — a
1428
+ // stable identifier that never carries the plaintext predicate (which
1429
+ // may contain personal data). SHA3 via the framework crypto primitive;
1430
+ // 16 hex chars (64 bits) is ample for correlation.
1431
+ function _fingerprint(text) {
1432
+ return "sha3:" + bCrypto.sha3Hash(Buffer.from(text, "utf8"), "hex").slice(0, 16);
1433
+ }
1434
+
1435
+ var AUDIT_SNIPPET_CHARS = 200;
1436
+ function _truncateForAudit(text) {
1437
+ return text.length > AUDIT_SNIPPET_CHARS
1438
+ ? text.slice(0, AUDIT_SNIPPET_CHARS) + "...(truncated)" : text;
1439
+ }
1440
+
1441
+ // buildProfile / compliancePosture / loadRulePack are assembled by
1442
+ // gateContract.defineGuard below — their wiki sections render from the
1443
+ // single-sourced @abiTemplate blocks in gate-contract.js.
1444
+
1445
+ // ---- adaptive integration-test fixtures (consumed by layer-5 host harness) ----
1446
+ var INTEGRATION_FIXTURES = Object.freeze({
1447
+ kind: "sql",
1448
+ // Benign: a parameterized predicate fragment — every value bound,
1449
+ // no statement verb (the default fragment-mode shape, e.g. whereRaw).
1450
+ benignSql: "id = ? AND status = ?",
1451
+ // Hostile: a stacked statement (CWE-89 class) — refused at every
1452
+ // profile by the irreducible floor.
1453
+ hostileSql: "id = 1; DROP TABLE users",
1454
+ });
1455
+
1456
+ // Assembled from the gate-contract guard factory: error class, registry
1457
+ // exports (NAME / KIND / INTEGRATION_FIXTURES), buildProfile /
1458
+ // compliancePosture / loadRulePack wiring, plus the per-guard inspection
1459
+ // surface (validate / sanitize / bespoke gate) and SQL extras
1460
+ // (CONTEXT_MODES / DETECTORS) passed through verbatim. The custom KIND
1461
+ // ("sql") is accepted because the bespoke gate reads its own ctx fields
1462
+ // (ctx.sql / ctx.bytes).
1463
+ //
1464
+ // Raw SQL is a non-content axis (operators apply it to whereRaw /
1465
+ // operator-SQL / migration strings, not to a request body routed by
1466
+ // Content-Type), so guard-sql is a STANDALONE primitive — it does NOT
1467
+ // register into b.guardAll's content-type-routed dispatch. The
1468
+ // MIME_TYPES / EXTENSIONS exports describe the media class (so a host
1469
+ // that DOES carry SQL as an upload can find this guard by type) but the
1470
+ // registration in lib/guard-all.js is STANDALONE_GUARDS, and the
1471
+ // integration harness routes it through the ctx.sql dispatcher. They
1472
+ // ride in `extra` (not the factory's content-kind MIME/EXTENSIONS path,
1473
+ // which keys off KIND === "content").
1474
+ module.exports = gateContract.defineGuard({
1475
+ name: "sql",
1476
+ kind: "sql", // raw-SQL guard (consumes ctx.sql)
1477
+ errorClass: GuardSqlError,
1478
+ profiles: PROFILES,
1479
+ defaults: DEFAULTS,
1480
+ postures: COMPLIANCE_POSTURES,
1481
+ integrationFixtures: INTEGRATION_FIXTURES,
1482
+ validate: validate,
1483
+ sanitize: sanitize,
1484
+ gate: gate,
1485
+ extra: {
1486
+ MIME_TYPES: Object.freeze(["application/sql"]),
1487
+ EXTENSIONS: Object.freeze([".sql"]),
1488
+ CONTEXT_MODES: CONTEXT_MODES,
1489
+ DETECTORS: Object.freeze(DETECTORS.slice()),
1490
+ },
1491
+ });