@malloydata/db-duckdb 0.0.376 → 0.0.378

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,487 @@
1
+ # DuckDB Config Maintenance Notes
2
+
3
+ This file is the self-contained maintainer handoff for Malloy's native DuckDB
4
+ configuration and restricted-execution policy code. A future maintainer should
5
+ be able to understand the policy entities, their purpose, and the safe way to
6
+ extend them from this file plus the implementation.
7
+
8
+ Historical design notes may exist elsewhere in the repository, but this file
9
+ must not depend on them for essential context.
10
+
11
+ ## Mental Model
12
+
13
+ Malloy exposes two user-facing policy axes for native DuckDB:
14
+
15
+ - `filesystemPolicy: "open" | "sandboxed"`
16
+ - `networkPolicy: "open" | "closed"`
17
+
18
+ Those fields are Malloy policy controls. They are not raw DuckDB option names
19
+ and they are not a general policy language.
20
+
21
+ The reviewed strict recipe for untrusted Malloy is:
22
+
23
+ - `filesystemPolicy: "sandboxed"`
24
+ - `networkPolicy: "closed"`
25
+
26
+ The implementation compiles those public policy values into an internal
27
+ `NormalizedDuckDBSafetyPolicy`. That internal object records the derived
28
+ enforcement requirements, such as locked configuration, no setup SQL,
29
+ temp-file encryption, extension restrictions, and secret neutralization. Keep
30
+ the public surface small and make new enforcement consequences explicit in the
31
+ derived policy object.
32
+
33
+ Restricted execution is about DuckDB filesystem reach, network reach, mutable
34
+ configuration, extension loading, instance sharing, and ambient persistent
35
+ secrets. It is not a complete sandbox. It does not provide CPU, memory,
36
+ temp-space, query-timeout, or denial-of-service isolation. Hosts that need
37
+ resource isolation must configure controls such as `threads`, `memoryLimit`,
38
+ process isolation, query cancellation, and host-level quotas separately.
39
+
40
+ ## Code Map
41
+
42
+ - Native connection schema: `src/native.ts`
43
+ - Registers native DuckDB properties.
44
+ - `filesystemPolicy` and `networkPolicy` are `requireLiteralString` so
45
+ invalid reference-shaped or non-string values reach registry validation
46
+ instead of being silently dropped by generic config compilation.
47
+ - Config compiler literal guard: `../malloy/src/api/foundation/config_compile.ts`
48
+ - Preserves invalid literal-required values as values after warning, allowing
49
+ registry lookup to fail closed before the DuckDB factory runs.
50
+ - Normalization and policy derivation: `src/duckdb_config.ts`
51
+ - Parses raw effective config.
52
+ - Derives `NormalizedDuckDBSafetyPolicy`.
53
+ - Applies conflict checks, policy-derived defaults, canonicalization, secret
54
+ directory derivation, and share-key construction.
55
+ - Path security helpers: `src/path_security.ts`
56
+ - Canonicalizes paths and performs containment checks.
57
+ - Path handling is part of the security boundary.
58
+ - Native lifecycle and baseline setup: `src/duckdb_connection.ts`
59
+ - Normalizes before opening DuckDB.
60
+ - Builds the share key.
61
+ - Opens or reuses a native instance.
62
+ - Applies the final baseline, then optional unrestricted `setupSQL`, then
63
+ `lock_configuration=true` when policy requires it.
64
+ - Policy tests:
65
+ - `src/duckdb_config.spec.ts`
66
+ - `src/duckdb_restricted.spec.ts`
67
+
68
+ ## Core Invariants
69
+
70
+ - Restricted execution must fail closed. Unknown policy values, conflicting
71
+ explicit settings, missing required values, or inability to apply the
72
+ baseline must fail connection creation.
73
+ - Policy validation must run early enough that invalid raw values such as
74
+ `true`, `42`, or `{env: "POLICY"}` cannot be silently dropped and interpreted
75
+ as the default `"open"` policy.
76
+ - A restricted DuckDB connection must never share a live instance with a less
77
+ restrictive or otherwise semantically different connection.
78
+ - The fixed baseline must be established before any Malloy-derived SQL runs.
79
+ - Configuration must be locked whenever the derived safety policy requires it.
80
+ - Ordinary DuckDB connections must remain ordinary. Do not implicitly turn them
81
+ into locked connections. Mixed Malloy/SQL workflows that rely on later DuckDB
82
+ `SET` statements remain unrestricted workflows.
83
+ - `duckdb_wasm` does not receive these native policy guarantees. WASM hardening
84
+ is host-owned unless a separate WASM-specific policy design is implemented.
85
+
86
+ ## User-Facing Policy Behavior
87
+
88
+ `filesystemPolicy: "open"` means Malloy does not derive a filesystem sandbox.
89
+
90
+ `filesystemPolicy: "sandboxed"` means Malloy derives and enforces a native
91
+ DuckDB filesystem boundary:
92
+
93
+ - `allowedDirectories` is required or derived.
94
+ - `tempDirectory` is required or derived.
95
+ - `workingDirectory`, when present, must be inside `allowedDirectories`.
96
+ - `tempDirectory` must be inside `allowedDirectories`.
97
+ - Non-POSIX hosts are rejected. Do not approximate Windows path behavior for
98
+ the current policy.
99
+
100
+ `networkPolicy: "open"` means network-capable DuckDB behavior may remain
101
+ available.
102
+
103
+ `networkPolicy: "closed"` means Malloy disables network-capable DuckDB behavior:
104
+
105
+ - `enableExternalAccess` is forced to `false`.
106
+ - `httpfs` must not load.
107
+ - DuckDB must not `INSTALL` extensions.
108
+ - network-requiring `databasePath` values, including MotherDuck paths, are
109
+ rejected.
110
+ - `motherDuckToken` is rejected.
111
+
112
+ The two axes can be used independently:
113
+
114
+ - sandboxed filesystem plus open network is allowed, but it is not the reviewed
115
+ strict recipe.
116
+ - open filesystem plus closed network is allowed, for hosts that trust an
117
+ external filesystem/container boundary but want Malloy to close DuckDB's
118
+ network-capable surface.
119
+
120
+ ## Registered Config Surface
121
+
122
+ Native DuckDB supports the existing Malloy-facing properties:
123
+
124
+ - `databasePath`
125
+ - `workingDirectory`
126
+ - `motherDuckToken`
127
+ - `additionalExtensions`
128
+ - `readOnly`
129
+ - `setupSQL`
130
+
131
+ The config extension adds policy properties:
132
+
133
+ - `filesystemPolicy`
134
+ - `networkPolicy`
135
+
136
+ It also adds curated DuckDB-level properties:
137
+
138
+ - `allowedDirectories`
139
+ - `enableExternalAccess`
140
+ - `lockConfiguration`
141
+ - `autoloadKnownExtensions`
142
+ - `autoinstallKnownExtensions`
143
+ - `allowCommunityExtensions`
144
+ - `allowUnsignedExtensions`
145
+ - `tempFileEncryption`
146
+ - `threads`
147
+ - `memoryLimit`
148
+ - `tempDirectory`
149
+ - `extensionDirectory`
150
+
151
+ `allowedDirectories` is intentionally registered as `json` for now because the
152
+ connection property model does not yet have a first-class string-array type.
153
+ Even though the registry metadata says `json`, DuckDB normalization must require
154
+ the value to be a JSON array of strings. Do not add a registry default for
155
+ `allowedDirectories`; its only defaulting behavior belongs to
156
+ `filesystemPolicy: "sandboxed"` normalization.
157
+
158
+ `workingDirectory` defaults to `{config: "rootDirectory"}` in the native DuckDB
159
+ registry. Hosts that know the project root should populate `config.rootDirectory`
160
+ so relative DuckDB file paths resolve against the project root rather than the
161
+ location of an individual Malloy file or config file. If a sandboxed connection
162
+ expected this overlay and it is missing, normalization should fail with an error
163
+ that points at `workingDirectory`/`allowedDirectories` and mentions the
164
+ `config.rootDirectory` overlay.
165
+
166
+ `memoryLimit` remains a string because DuckDB accepts values such as `1GB`.
167
+
168
+ ## Normalization Rules
169
+
170
+ All policy reasoning belongs in `normalizeDuckDBConfig()`.
171
+
172
+ Policy parsing:
173
+
174
+ - Missing `filesystemPolicy` defaults to `"open"`.
175
+ - Missing `networkPolicy` defaults to `"open"`.
176
+ - Accepted policy values are exact documented strings only.
177
+ - Unknown strings, strings with whitespace or different casing, non-strings,
178
+ and reference-shaped values must fail closed.
179
+
180
+ Derived safety policy:
181
+
182
+ - If either policy is restricted, derive:
183
+ - `requiresLockedConfiguration: true`
184
+ - `requiresNoSetupSQL: true`
185
+ - `requiresTempFileEncryption: true`
186
+ - `requiresSecretNeutralization: true`
187
+ - `forbidAdditionalExtensions: true`
188
+ - required baseline extensions `icu` and `json`
189
+ - If `filesystemPolicy === "sandboxed"`, also derive:
190
+ - POSIX host required
191
+ - sandboxed path validation required
192
+ - derived temp directory name `.tmp`
193
+ - If `networkPolicy === "closed"`, also derive:
194
+ - `allowHttpfs: false`
195
+ - no extension install or auto-install/autoload expansion
196
+
197
+ Conflict checks:
198
+
199
+ - Reject `setupSQL` whenever a restricted policy requires a locked baseline.
200
+ - Reject non-empty `additionalExtensions` whenever a restricted policy forbids
201
+ extension broadening.
202
+ - Reject `lockConfiguration: false` under any restricted policy.
203
+ - Reject `tempFileEncryption: false` under any restricted policy.
204
+ - Under `networkPolicy: "closed"`, reject:
205
+ - `enableExternalAccess: true`
206
+ - `autoloadKnownExtensions: true`
207
+ - `autoinstallKnownExtensions: true`
208
+ - `allowCommunityExtensions: true`
209
+ - `allowUnsignedExtensions: true`
210
+ - `motherDuckToken`
211
+ - remote or network-requiring `databasePath`
212
+ - Redundant matching values are allowed. For example,
213
+ `networkPolicy: "closed"` plus `enableExternalAccess: false` is valid.
214
+
215
+ Derived defaults:
216
+
217
+ - Under `networkPolicy: "closed"`, force:
218
+ - `enableExternalAccess = false`
219
+ - `autoloadKnownExtensions = false`
220
+ - `autoinstallKnownExtensions = false`
221
+ - `allowCommunityExtensions = false`
222
+ - `allowUnsignedExtensions = false`
223
+ - Under any restricted policy, force:
224
+ - `lockConfiguration = true`
225
+ - `tempFileEncryption = true`
226
+ - Under `filesystemPolicy: "sandboxed"`:
227
+ - If `allowedDirectories` is omitted, derive it to exactly the canonical
228
+ `workingDirectory`, and nothing broader.
229
+ - If `tempDirectory` is omitted, derive it to `workingDirectory/.tmp`.
230
+ - If Malloy cannot derive the required paths safely, fail closed with a
231
+ field-specific error.
232
+ - Outside `filesystemPolicy: "sandboxed"`, do not invent an
233
+ `allowedDirectories` default.
234
+ - With `databasePath: ":memory:"`, normalize `readOnly` to `false`.
235
+ This intentionally preserves existing behavior. A user-facing warning for
236
+ ignored `readOnly: true` is deferred until host warning plumbing has a
237
+ reviewed path for normalized connection options.
238
+
239
+ Empty text/list handling:
240
+
241
+ - Empty or whitespace-only `setupSQL` is absent.
242
+ - Empty or whitespace-only `motherDuckToken` is absent.
243
+ - Empty `additionalExtensions` is absent.
244
+
245
+ ## Path Handling
246
+
247
+ Canonicalize path-bearing values before validation and before share-key
248
+ comparison:
249
+
250
+ - `databasePath`, except `:memory:` and recognized remote paths
251
+ - `workingDirectory`
252
+ - `allowedDirectories`
253
+ - `tempDirectory`
254
+ - `extensionDirectory`
255
+
256
+ Canonicalization must:
257
+
258
+ - resolve `.` and `..`
259
+ - normalize separators and remove trailing separators
260
+ - resolve symlinks when the path or its nearest existing parent exists
261
+ - deduplicate and sort list-valued paths before identity comparison
262
+
263
+ Containment checks must operate only on canonicalized values. Treat path
264
+ normalization as security logic, not cosmetic cleanup.
265
+
266
+ `allowedDirectories` is not read-only. Hosts must assume DuckDB may read and
267
+ write inside every allowed directory through features that remain available
268
+ inside the boundary, including `COPY TO`, `EXPORT DATABASE`, and attached
269
+ writable databases. Do not point `allowedDirectories` at shared writable
270
+ locations unless that write surface is acceptable.
271
+
272
+ ## Sharing And Identity
273
+
274
+ Do not cache native DuckDB instances by `databasePath` alone.
275
+
276
+ Use the derived share key from `buildDuckDBShareKey()` and include every
277
+ effective setting that can affect runtime behavior, safety, or semantics:
278
+
279
+ - `databasePath`
280
+ - `readOnly`
281
+ - `filesystemPolicy`
282
+ - `networkPolicy`
283
+ - `setupSQL`
284
+ - canonicalized `allowedDirectories`
285
+ - `enableExternalAccess`
286
+ - `lockConfiguration`
287
+ - `autoloadKnownExtensions`
288
+ - `autoinstallKnownExtensions`
289
+ - `allowCommunityExtensions`
290
+ - `allowUnsignedExtensions`
291
+ - `tempFileEncryption`
292
+ - `threads`
293
+ - `memoryLimit`
294
+ - `tempDirectory`
295
+ - `workingDirectory`
296
+ - normalized `additionalExtensions`
297
+ - `extensionDirectory`
298
+ - `motherDuckToken`
299
+
300
+ List-valued inputs must be canonicalized, sorted, and deduplicated so
301
+ semantically identical configs share and semantically different configs do not.
302
+
303
+ Keep the share key separate from `getDigest()`:
304
+
305
+ - `getDigest()` is about build/result identity.
306
+ - the share key is about safe native instance reuse.
307
+
308
+ This share-key behavior intentionally affects ordinary connections too:
309
+ connections with the same `databasePath` but different effective settings
310
+ should not share a live instance.
311
+
312
+ Share keys are sensitive because `motherDuckToken` contributes to them. Use
313
+ `makeDigest(...)` and do not log raw share-key inputs.
314
+
315
+ ## Baseline Setup
316
+
317
+ Prefer open-time DuckDB config through `DuckDBInstance.create(path, options)`
318
+ when the option is supported and serializes cleanly. Keep post-connect setup as
319
+ small as practical.
320
+
321
+ The final baseline must happen before Malloy-derived SQL. When locking is
322
+ required, `lock_configuration=true` is the final baseline step.
323
+
324
+ Current baseline mapping:
325
+
326
+ - `FILE_SEARCH_PATH`
327
+ - set before user SQL when `workingDirectory` exists
328
+ - currently post-connect because open-time behavior is not verified
329
+ - `allowed_directories`
330
+ - set before lock when normalized config has `allowedDirectories`
331
+ - currently post-connect because the DuckDB Node API rejects the list-valued
332
+ option during local verification
333
+ - `secret_directory`
334
+ - set before lock when restricted secret neutralization derives a directory
335
+ - currently post-connect because open-time behavior is not verified
336
+ - `enable_external_access`
337
+ - set at open time when no `allowed_directories` baseline SET is required
338
+ - otherwise set immediately after `allowed_directories`
339
+ - DuckDB rejects changing `allowed_directories` after
340
+ `enable_external_access=false`, so the strict sandboxed recipe has this
341
+ small post-connect ordering constraint until the Node API can apply the
342
+ allowlist at instance creation
343
+ - `TimeZone='UTC'`
344
+ - always part of Malloy's fixed correctness baseline
345
+ - not user-configurable
346
+ - must be established before lock
347
+ - built-in extension loading
348
+ - preserve ordinary compatibility outside restricted modes
349
+ - under `networkPolicy: "closed"`, do not `INSTALL`, do not load `httpfs`,
350
+ and load only the fixed Malloy baseline extensions
351
+ - `setupSQL`
352
+ - preserve outside restricted modes
353
+ - reject when a restricted policy requires a locked baseline
354
+ - run only after fixed baseline steps and before optional lock
355
+
356
+ Do not introduce later Malloy-emitted DuckDB `SET` statements that mutate
357
+ configuration after `lock_configuration=true`.
358
+
359
+ ## Extensions
360
+
361
+ `icu` and `json` are part of the fixed Malloy DuckDB baseline.
362
+
363
+ `httpfs` is not part of that baseline. It broadens remote/network-capable
364
+ behavior and is controlled by `networkPolicy`.
365
+
366
+ Under `networkPolicy: "closed"`:
367
+
368
+ - do not load `httpfs`
369
+ - do not `INSTALL` extensions
370
+ - do not allow `additionalExtensions`
371
+ - load only fixed baseline extensions `icu` and `json`
372
+ - fail closed if a required baseline extension is unavailable locally
373
+
374
+ Outside `networkPolicy: "closed"`, preserve ordinary compatibility unless a
375
+ separate product decision changes it.
376
+
377
+ ## Secrets
378
+
379
+ The full DuckDB secrets product story is deferred. Do not add public
380
+ secrets-related config surface as part of restricted-policy maintenance unless
381
+ there is a reviewed product design.
382
+
383
+ Even without a public secrets product, restricted execution must not expose
384
+ ambient persistent DuckDB secrets from the host environment or another tenant.
385
+
386
+ Current interim behavior:
387
+
388
+ - Any restricted policy derives a private `secretDirectory`.
389
+ - If `tempDirectory` exists, derive secrets under
390
+ `tempDirectory/.duckdb-secrets`.
391
+ - Otherwise, if `workingDirectory` exists, derive secrets under
392
+ `workingDirectory/.duckdb-secrets`.
393
+ - If restricted mode cannot derive a scoped secret directory, fail closed.
394
+ - Apply `secret_directory` before lock.
395
+
396
+ If this behavior changes, update both the safety policy derivation and the
397
+ restricted-mode tests.
398
+
399
+ ## WASM Scope
400
+
401
+ The policy system described here targets native DuckDB.
402
+
403
+ Do not register native-only policy fields in the `duckdb_wasm` connection
404
+ schema in this pass:
405
+
406
+ - `filesystemPolicy`
407
+ - `networkPolicy`
408
+ - native-only hardening properties
409
+
410
+ Do not add native restricted-policy behavior to `DuckDBWASMConnection` as a
411
+ side effect of native DuckDB maintenance. WASM runs in a host-provided
412
+ JavaScript/WASM environment, where the host controls the virtual filesystem,
413
+ browser file APIs, fetch/network reach, OPFS, registered files, remote URL
414
+ registration, and credential injection.
415
+
416
+ If Malloy later wants restricted execution for `duckdb_wasm`, design it
417
+ explicitly for WASM instead of copying native policy semantics.
418
+
419
+ ## Adding A New DuckDB Config Property
420
+
421
+ When adding a new native DuckDB config property, update all relevant layers:
422
+
423
+ - Register the property in `src/native.ts` with the correct config metadata.
424
+ - Parse and validate it in `normalizeDuckDBConfig()`.
425
+ - Decide whether it conflicts with `filesystemPolicy` or `networkPolicy`.
426
+ - Decide whether any restricted policy must derive or force a value.
427
+ - If it is path-like, canonicalize it before validation and identity
428
+ comparison.
429
+ - Include it in `buildDuckDBShareKey()` if it can affect runtime behavior,
430
+ security posture, or semantics.
431
+ - Decide whether it belongs in open-time options or final baseline setup.
432
+ - Add tests for validation, derived behavior, conflict behavior, and sharing
433
+ identity when relevant.
434
+ - Update this file if the property changes the policy model or maintainer
435
+ checklist.
436
+
437
+ Be conservative: new settings that broaden filesystem, network, extension,
438
+ credential, or mutable-configuration behavior should usually be rejected or
439
+ forced to a safe value under restricted policies.
440
+
441
+ ## Changing Policy Behavior
442
+
443
+ When changing `filesystemPolicy`, `networkPolicy`, or
444
+ `NormalizedDuckDBSafetyPolicy`, review these together:
445
+
446
+ - user-facing policy contract
447
+ - normalizer parsing and conflict checks
448
+ - derived defaults
449
+ - path canonicalization and containment rules
450
+ - secret neutralization
451
+ - open-time options
452
+ - final baseline ordering
453
+ - extension install/load behavior
454
+ - lock timing
455
+ - share-key inputs
456
+ - native restricted tests
457
+ - WASM non-claim boundary
458
+ - public documentation
459
+
460
+ Do not make a policy change only in `DuckDBConnection`. The normalizer should
461
+ remain the central place where raw config becomes effective policy and runtime
462
+ state.
463
+
464
+ ## Test Expectations
465
+
466
+ Keep focused tests for:
467
+
468
+ - `allowedDirectories` accepted as a JSON array of strings
469
+ - `allowedDirectories` rejected for non-array or non-string JSON values
470
+ - exact policy value parsing
471
+ - policy fields rejected when provided as non-literal or reference-shaped
472
+ values
473
+ - missing policy-required values failing closed
474
+ - conflict checks
475
+ - redundant matching explicit values accepted
476
+ - sandboxed derived defaults
477
+ - `tempDirectory` containment
478
+ - network-requiring `databasePath` rejection
479
+ - `readOnly: true` with `:memory:` normalizing to `false`
480
+ - share keys differing when safety-relevant settings differ
481
+ - semantically identical allowlists sharing
482
+ - restricted baseline order
483
+ - `networkPolicy: "closed"` not loading `httpfs`
484
+ - `networkPolicy: "closed"` not running `INSTALL`
485
+ - required baseline extensions loaded or failing closed
486
+ - later config-changing `SET` statements failing after lock
487
+ - `closeAllInstances()` clearing all share-keyed native instances
@@ -0,0 +1,48 @@
1
+ import type { ConnectionConfig } from '@malloydata/malloy';
2
+ export type DuckDBFilesystemPolicy = 'open' | 'sandboxed';
3
+ export type DuckDBNetworkPolicy = 'open' | 'closed';
4
+ export interface NormalizedDuckDBSafetyPolicy {
5
+ requiresPosixHost: boolean;
6
+ requiresLockedConfiguration: boolean;
7
+ requiresNoSetupSQL: boolean;
8
+ requiresSandboxedPaths: boolean;
9
+ requiresTempFileEncryption: boolean;
10
+ requiresSecretNeutralization: boolean;
11
+ requiredBaselineExtensions: readonly ['icu', 'json'];
12
+ allowHttpfs: boolean;
13
+ forbidAdditionalExtensions: boolean;
14
+ derivedTempDirectoryName: '.tmp';
15
+ }
16
+ export interface NormalizedDuckDBConfig {
17
+ name: string;
18
+ databasePath: string;
19
+ readOnly: boolean;
20
+ workingDirectory?: string;
21
+ filesystemPolicy: DuckDBFilesystemPolicy;
22
+ networkPolicy: DuckDBNetworkPolicy;
23
+ safetyPolicy?: NormalizedDuckDBSafetyPolicy;
24
+ allowedDirectories?: string[];
25
+ enableExternalAccess?: boolean;
26
+ lockConfiguration?: boolean;
27
+ autoloadKnownExtensions?: boolean;
28
+ autoinstallKnownExtensions?: boolean;
29
+ allowCommunityExtensions?: boolean;
30
+ allowUnsignedExtensions?: boolean;
31
+ tempFileEncryption?: boolean;
32
+ threads?: number;
33
+ memoryLimit?: string;
34
+ tempDirectory?: string;
35
+ secretDirectory?: string;
36
+ extensionDirectory?: string;
37
+ motherDuckToken?: string;
38
+ additionalExtensions: string[];
39
+ setupSQL?: string;
40
+ }
41
+ export declare class DuckDBConfigValidationError extends Error {
42
+ constructor(message: string);
43
+ }
44
+ export declare function normalizeDuckDBConfig(config: ConnectionConfig): NormalizedDuckDBConfig;
45
+ export declare function buildDuckDBShareKey(config: NormalizedDuckDBConfig): string;
46
+ export declare function sqlStringLiteral(value: string): string;
47
+ export declare function sqlStringListLiteral(values: string[]): string;
48
+ export declare function stringifyDuckDBOption(value: string | number | boolean): string;