@fuzdev/fuz_app 0.81.0 → 0.83.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"account_queries.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/auth/account_queries.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,qBAAqB,CAAC;AAEnD,OAAO,EAEN,KAAK,OAAO,EACZ,KAAK,KAAK,EACV,KAAK,kBAAkB,EACvB,KAAK,qBAAqB,EAC1B,MAAM,qBAAqB,CAAC;AAG7B;;;;;;;GAOG;AACH,eAAO,MAAM,oBAAoB,GAChC,MAAM,SAAS,EACf,OAAO,kBAAkB,KACvB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,OAAO,GAAG,SAAS,CAI7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,yBAAyB,GACrC,MAAM,SAAS,EACf,UAAU,MAAM,KACd,OAAO,CAAC,OAAO,GAAG,SAAS,CAI7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,sBAAsB,GAClC,MAAM,SAAS,EACf,OAAO,MAAM,KACX,OAAO,CAAC,OAAO,GAAG,SAAS,CAI7B,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,kCAAkC,GAC9C,MAAM,SAAS,EACf,OAAO,MAAM,KACX,OAAO,CAAC,OAAO,GAAG,SAAS,CAS7B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,eAAO,MAAM,6BAA6B,GACzC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,eAAe,MAAM,EACrB,YAAY,MAAM,GAAG,IAAI,EACzB,eAAe,MAAM,KACnB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;GAIG;AACH,MAAM,WAAW,uBAAuB;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACrB;AAED;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,yBAAyB,GACrC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,YAAY,MAAM,GAAG,IAAI,KACvB,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAO7C,CAAC;AAEF;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAK7C,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,uBAAuB,GACnC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,YAAY,MAAM,GAAG,IAAI,KACvB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,sBAAsB,GAClC,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAO7C,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,oBAAoB,GAAU,MAAM,SAAS,EAAE,IAAI,MAAM,KAAG,OAAO,CAAC,OAAO,CAQvF,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,qBAAqB,GAAU,MAAM,SAAS,KAAG,OAAO,CAAC,OAAO,CAK5E,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,GAC9B,MAAM,SAAS,EACf,YAAY,MAAM,EAClB,MAAM,MAAM,KACV,OAAO,CAAC,KAAK,CAMf,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,uBAAuB,GACnC,MAAM,SAAS,EACf,YAAY,MAAM,KAChB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAKtB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,iBAAiB,GAC7B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,KAAK,GAAG,SAAS,CAE3B,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,+BAA+B,GAC3C,MAAM,SAAS,EACf,OAAO,kBAAkB,KACvB,OAAO,CAAC;IAAC,OAAO,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,KAAK,CAAA;CAAC,CAI1C,CAAC;AA2BF,8CAA8C;AAC9C,MAAM,WAAW,uBAAuB;IACvC;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,IAAI,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,SAAS,EACf,UAAU,uBAAuB,KAC/B,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CA8GtC,CAAC"}
1
+ {"version":3,"file":"account_queries.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/auth/account_queries.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,qBAAqB,CAAC;AAEnD,OAAO,EAEN,KAAK,OAAO,EACZ,KAAK,KAAK,EACV,KAAK,kBAAkB,EACvB,KAAK,qBAAqB,EAC1B,MAAM,qBAAqB,CAAC;AAqB7B;;;;;;;GAOG;AACH,eAAO,MAAM,oBAAoB,GAChC,MAAM,SAAS,EACf,OAAO,kBAAkB,KACvB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,OAAO,GAAG,SAAS,CAK7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,yBAAyB,GACrC,MAAM,SAAS,EACf,UAAU,MAAM,KACd,OAAO,CAAC,OAAO,GAAG,SAAS,CAK7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,sBAAsB,GAClC,MAAM,SAAS,EACf,OAAO,MAAM,KACX,OAAO,CAAC,OAAO,GAAG,SAAS,CAK7B,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,kCAAkC,GAC9C,MAAM,SAAS,EACf,OAAO,MAAM,KACX,OAAO,CAAC,OAAO,GAAG,SAAS,CAS7B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,eAAO,MAAM,6BAA6B,GACzC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,eAAe,MAAM,EACrB,YAAY,MAAM,GAAG,IAAI,EACzB,eAAe,MAAM,KACnB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;GAIG;AACH,MAAM,WAAW,uBAAuB;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACrB;AAED;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,yBAAyB,GACrC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,YAAY,MAAM,GAAG,IAAI,KACvB,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAO7C,CAAC;AAEF;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAK7C,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,uBAAuB,GACnC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,YAAY,MAAM,GAAG,IAAI,KACvB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,sBAAsB,GAClC,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAO7C,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,oBAAoB,GAAU,MAAM,SAAS,EAAE,IAAI,MAAM,KAAG,OAAO,CAAC,OAAO,CAQvF,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,qBAAqB,GAAU,MAAM,SAAS,KAAG,OAAO,CAAC,OAAO,CAK5E,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,GAC9B,MAAM,SAAS,EACf,YAAY,MAAM,EAClB,MAAM,MAAM,KACV,OAAO,CAAC,KAAK,CAMf,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,uBAAuB,GACnC,MAAM,SAAS,EACf,YAAY,MAAM,KAChB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAKtB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,iBAAiB,GAC7B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,KAAK,GAAG,SAAS,CAE3B,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,+BAA+B,GAC3C,MAAM,SAAS,EACf,OAAO,kBAAkB,KACvB,OAAO,CAAC;IAAC,OAAO,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,KAAK,CAAA;CAAC,CAI1C,CAAC;AA2BF,8CAA8C;AAC9C,MAAM,WAAW,uBAAuB;IACvC;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,IAAI,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,SAAS,EACf,UAAU,uBAAuB,KAC/B,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CA8GtC,CAAC"}
@@ -9,6 +9,22 @@
9
9
  import { assert_row } from '../db/assert_row.js';
10
10
  import { to_admin_account, } from './account_schema.js';
11
11
  import { ADMIN_ACCOUNT_LIST_DEFAULT_LIMIT } from './admin_action_specs.js';
12
+ /**
13
+ * The full `account` column set, named explicitly so a row read fails loud
14
+ * on schema drift.
15
+ *
16
+ * `SELECT *` silently omits a dropped column, which the login lookups then
17
+ * misread: `query_account_by_username_or_email` filters its result with
18
+ * `account.deleted_at === null`, so a missing `deleted_at` column reads back
19
+ * as `undefined`, `undefined === null` is `false`, and *every* login resolves
20
+ * to "not found" (401) — a silent, total auth outage instead of an error.
21
+ * Selecting named columns turns that drift into a hard Postgres
22
+ * `column "..." does not exist`. Mirrors the Rust side
23
+ * (`fuz_auth/src/account_queries.rs`), which selects named columns and
24
+ * decodes them positionally. Keep in sync with `Account` and the `account`
25
+ * DDL in `auth/auth_ddl.ts`.
26
+ */
27
+ const ACCOUNT_COLUMNS = 'id, username, email, email_verified, password_hash, created_at, created_by, updated_at, updated_by, deleted_at, deleted_by';
12
28
  /**
13
29
  * Create a new account.
14
30
  *
@@ -33,25 +49,19 @@ export const query_create_account = async (deps, input) => {
33
49
  * soft-deleted rows too, uses `query_purge_account` directly.
34
50
  */
35
51
  export const query_account_by_id = async (deps, id) => {
36
- return deps.db.query_one(`SELECT * FROM account WHERE id = $1 AND deleted_at IS NULL`, [
37
- id,
38
- ]);
52
+ return deps.db.query_one(`SELECT ${ACCOUNT_COLUMNS} FROM account WHERE id = $1 AND deleted_at IS NULL`, [id]);
39
53
  };
40
54
  /**
41
55
  * Find an account by username (case-insensitive).
42
56
  */
43
57
  export const query_account_by_username = async (deps, username) => {
44
- return deps.db.query_one(`SELECT * FROM account WHERE LOWER(username) = LOWER($1)`, [
45
- username,
46
- ]);
58
+ return deps.db.query_one(`SELECT ${ACCOUNT_COLUMNS} FROM account WHERE LOWER(username) = LOWER($1)`, [username]);
47
59
  };
48
60
  /**
49
61
  * Find an account by email (case-insensitive).
50
62
  */
51
63
  export const query_account_by_email = async (deps, email) => {
52
- return deps.db.query_one(`SELECT * FROM account WHERE LOWER(email) = LOWER($1)`, [
53
- email,
54
- ]);
64
+ return deps.db.query_one(`SELECT ${ACCOUNT_COLUMNS} FROM account WHERE LOWER(email) = LOWER($1)`, [email]);
55
65
  };
56
66
  /**
57
67
  * Find an account by username or email.
package/dist/db/CLAUDE.md CHANGED
@@ -92,11 +92,28 @@ DO NOTHING`), `_put_fact_refs`, `_get_fact` / `_get_fact_meta` / `_has_fact`
92
92
  external unlink), and the cell-coupled orphan queries `query_orphan_facts_list`
93
93
  / `_select_for_delete` (a fact is orphan when no active `cell.refs` names it).
94
94
  - **`fact_store.ts`** — `PgFactStore implements FactStore` (the interface lives
95
- in `@fuzdev/fuz_util/fact_store.js`): embedded-vs-`put_ref` split by
96
- `embedded_threshold`, JSON ref auto-extract, idempotent put, verify-on-read
97
- for external content via an injected `FactExternalFetcher`. The filesystem
98
- fetcher + write/serve plumbing live under `server/` (`file_fact_url.ts`,
99
- `file_fact_fetcher.ts`, `fact_write.ts`, `serve_fact_route.ts`).
95
+ in `@fuzdev/fuz_util/fact_store.js`): size-routed writes (embedded ≤
96
+ `embedded_threshold` / disk CAS above it / `put_ref` for an externally-managed
97
+ URL), JSON ref auto-extract, idempotent put, verify-on-read for external
98
+ content via an injected `FactExternalFetcher`. With `disk_root` + `fs` (the
99
+ `runtime/*Deps`) configured, oversize `put` and the streaming `put_stream`
100
+ write to the `<shard>/<rest>` disk CAS and the default fetcher reads from it.
101
+ - **`file_fact_url.ts`** — the canonical `file:<shard>/<rest>` URL shape
102
+ (`FileFactUrl` brand, `mint_file_fact_url` / `parse_file_fact_url` /
103
+ `FILE_FACT_URL_PATTERN`) plus `fact_disk_path(hash) → {shard, rest}`, the
104
+ single source of truth for the on-disk layout (twins the Rust `fuz_fact`).
105
+ - **`fact_disk_storage.ts`** — the filesystem CAS over `runtime/{FsStream,FsWrite,FsRemove,FsRead}Deps`
106
+ (not raw `node:fs`): `stream_fact_to_disk` (bounded-memory blake3+sha256 single
107
+ pass, buffer→spill, fsync-then-atomic-rename, dedup-drop if the CAS path already
108
+ exists), `write_fact_bytes_to_disk` (buffering twin), `create_disk_fact_fetcher`,
109
+ and `sweep_orphan_temps` (reaps stale `.tmp` spills by mtime). The temp is
110
+ `fsync`ed before the rename publishes it (twins the Rust `fuz_fact` §fsync
111
+ posture: data-sync before rename, parent-dir fsync waived) — the serve path
112
+ streams the file without re-hashing, so write-time durability is the guard.
113
+ - **`fact_store_errors.ts`** — `PayloadTooLargeError` / `StorageFullError` (+
114
+ `is_enospc_error`) thrown by `put_stream`, for a consumer route's 413 / 507.
115
+ - The read-side fetcher + write/serve plumbing also live under `server/`
116
+ (`file_fact_fetcher.ts`, `fact_write.ts`, `serve_fact_route.ts`).
100
117
 
101
118
  ### Migration namespace order
102
119
 
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Filesystem CAS for externally-stored fact bytes — the disk half of
3
+ * `PgFactStore`, threaded over the injectable `runtime/*Deps` rather than raw
4
+ * `node:fs`, so it runs unchanged under Node, Deno, and a mock runtime.
5
+ *
6
+ * Large facts (over the embedded threshold) live on disk at the canonical
7
+ * sharded layout `<facts_dir>/<shard>/<rest>` — `<shard>` is the first 2 hex
8
+ * chars of the blake3 digest, `<rest>` the remaining 62 — with the `fact` row
9
+ * carrying `external_url = file:<shard>/<rest>` (disk-root-relative). The layout
10
+ * is single-sourced by `fact_disk_path` in `db/file_fact_url.ts`, so the write
11
+ * path here and the URL minted into the row can't drift. The TS twin of the
12
+ * Rust `fuz_fact` disk CAS.
13
+ *
14
+ * Writes land through `<facts_dir>/.tmp/<rand>.tmp`, are `fsync`ed, then
15
+ * `rename`d into the content-addressed final path. The `rename` is atomic on
16
+ * POSIX (a *concurrent reader* observing the path sees either the full content
17
+ * or nothing), but atomicity is not durability — the `fsync` before the rename
18
+ * is what guards against a *host crash* leaving a torn/zero file at a published
19
+ * CAS path, because the serving path streams the hash-named file without
20
+ * re-hashing it (`server/serve_fact_route.ts`). This twins the Rust `fuz_fact`
21
+ * §fsync posture: data-sync before the rename; the parent-dir fsync stays
22
+ * deliberately waived (a lost dirent is regenerable under content addressing).
23
+ * If the final path already exists the temp is dropped instead of renamed over
24
+ * — idempotent dedup (same hash → byte-identical content), mirroring the Rust
25
+ * commit path. `.tmp/` is a sibling of `<shard>/` under the same `facts_dir` so
26
+ * `rename` is always same-filesystem (no EXDEV).
27
+ *
28
+ * @module
29
+ */
30
+ import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
31
+ import type { Logger } from '@fuzdev/fuz_util/log.js';
32
+ import type { FsReadDeps, FsWriteDeps, FsStreamDeps, FsRemoveDeps } from '../runtime/deps.js';
33
+ import { type FileFactUrl } from './file_fact_url.js';
34
+ import type { FactExternalFetcher } from './fact_store.js';
35
+ /** Subdirectory under `facts_dir` for in-flight atomic temp files. */
36
+ export declare const FACT_TMP_DIRNAME = ".tmp";
37
+ /** Default age (1 hour) past which a `.tmp/*` file is considered orphaned. */
38
+ export declare const FACT_TMP_ORPHAN_MAX_AGE_MS: number;
39
+ /**
40
+ * Filesystem capabilities the disk CAS needs, drawn from `runtime/deps.ts`. A
41
+ * full `RuntimeDeps` (Node or Deno) satisfies this; each function below picks
42
+ * the narrow subset it actually uses.
43
+ */
44
+ export type FactDiskStorageDeps = Pick<FsReadDeps, 'stat' | 'readdir' | 'read_file'> & Pick<FsWriteDeps, 'mkdir' | 'rename' | 'write_file' | 'fsync'> & Pick<FsStreamDeps, 'write_file_stream' | 'read_file_stream'> & Pick<FsRemoveDeps, 'remove'>;
45
+ /**
46
+ * Where a streamed body landed — `embedded` carries the in-memory bytes (under
47
+ * the embedded threshold, bound for the PG `fact.bytes` column); `disk` means
48
+ * the bytes are already at `<facts_dir>/<shard>/<rest>` and the row carries the
49
+ * `file:` URL.
50
+ */
51
+ export type StreamPlacement = {
52
+ kind: 'embedded';
53
+ bytes: Uint8Array;
54
+ } | {
55
+ kind: 'disk';
56
+ external_url: FileFactUrl;
57
+ };
58
+ /**
59
+ * Outcome of streaming an upload to storage: the `blake3:`-prefixed fact hash,
60
+ * the bare-hex SHA-256, the byte count, and where the bytes landed.
61
+ * `PgFactStore.put_stream` turns this into the `fact` row insert.
62
+ */
63
+ export interface StreamFactToDiskResult {
64
+ hash: FactHash;
65
+ sha256: string;
66
+ size: number;
67
+ placement: StreamPlacement;
68
+ }
69
+ /**
70
+ * Stream `source` to storage with bounded memory: hash BLAKE3 + SHA-256
71
+ * incrementally in one pass, buffer in memory until the bytes cross
72
+ * `embedded_threshold`, then spill the buffer + remaining chunks through a temp
73
+ * file and atomically land it in the disk CAS. Peak heap is
74
+ * `O(chunk + embedded_threshold)`, never `O(artifact)`, so a multi-GB upload
75
+ * never buffers in RAM.
76
+ *
77
+ * - **Embedded vs disk.** A body `<= embedded_threshold` stays in memory and is
78
+ * returned as `{kind: 'embedded'}` for the PG `bytes` column. Above it (with a
79
+ * `facts_dir`), the buffer + remaining chunks spill to `<facts_dir>/.tmp/…`,
80
+ * then `rename` into `<facts_dir>/<shard>/<rest>` once the hash is known —
81
+ * `{kind: 'disk'}`. A body over the threshold with `facts_dir === undefined`
82
+ * throws `PayloadTooLargeError` (matches `PgFactStore.put`).
83
+ * - **Cap enforcement.** Aborts with `PayloadTooLargeError` the moment the
84
+ * running byte count passes `max_bytes` — the mid-stream backstop for a
85
+ * chunked or mis-declared `Content-Length`.
86
+ * - **Disk-full.** An `ENOSPC` from the temp-file write surfaces as
87
+ * `StorageFullError`.
88
+ *
89
+ * @mutates `facts_dir` filesystem
90
+ */
91
+ export declare const stream_fact_to_disk: (deps: Pick<FactDiskStorageDeps, "mkdir" | "rename" | "remove" | "write_file_stream" | "fsync" | "stat">, facts_dir: string | undefined, source: ReadableStream<Uint8Array>, max_bytes: number, embedded_threshold: number) => Promise<StreamFactToDiskResult>;
92
+ /**
93
+ * Write fully-buffered `bytes` for `hash` to the canonical
94
+ * `<facts_dir>/<shard>/<rest>` path, then publish via `commit_temp_to_cas`
95
+ * (fsync'd temp + atomic rename, dedup-aware). The buffering twin of
96
+ * `stream_fact_to_disk`, used by `PgFactStore.put` for oversize sync bytes.
97
+ * Returns the `file:` `external_url` for the `fact` row.
98
+ *
99
+ * @mutates `facts_dir` filesystem
100
+ */
101
+ export declare const write_fact_bytes_to_disk: (deps: Pick<FactDiskStorageDeps, "mkdir" | "rename" | "remove" | "write_file" | "fsync" | "stat">, facts_dir: string, hash: FactHash, bytes: Uint8Array) => Promise<FileFactUrl>;
102
+ /**
103
+ * `FactExternalFetcher` reading from the `<facts_dir>/<shard>/<rest>` layout the
104
+ * writers above produce, over the injected `*Deps`. Does NOT verify hash content
105
+ * — `PgFactStore.get` calls `fact_hash_verify(hash, bytes)` after the fetch and
106
+ * returns `null` on mismatch.
107
+ *
108
+ * Defense at the read seam is the `FILE_FACT_URL_PATTERN` regex (via
109
+ * `parse_file_fact_url`) — `..` segments, foreign schemes, and non-hex chars
110
+ * fail before any disk access.
111
+ */
112
+ export declare const create_disk_fact_fetcher: (deps: Pick<FactDiskStorageDeps, "read_file" | "read_file_stream">, facts_dir: string) => FactExternalFetcher;
113
+ /**
114
+ * Reap stale temp files left under `<facts_dir>/.tmp/` by a hard crash (SIGKILL
115
+ * / OOM / host crash) mid-write — the `finally` cleanup in the writers above
116
+ * never ran. Removes `.tmp` entries whose mtime is older than `max_age_ms` (so
117
+ * an in-flight upload isn't yanked out from under itself). The TS twin of the
118
+ * Rust `sweep_orphan_temps`; call on startup + on an interval.
119
+ *
120
+ * Best-effort: a missing `.tmp/` dir (no oversize upload has ever run) is a
121
+ * no-op; a runtime that doesn't report `mtime_ms` (a mock) leaves every temp
122
+ * untouched; a per-file stat/remove failure is logged and skipped rather than
123
+ * aborting the sweep. Returns the count removed.
124
+ *
125
+ * @mutates `facts_dir` filesystem
126
+ */
127
+ export declare const sweep_orphan_temps: (deps: Pick<FactDiskStorageDeps, "readdir" | "stat" | "remove">, facts_dir: string, options?: {
128
+ max_age_ms?: number;
129
+ log?: Pick<Logger, "warn">;
130
+ }) => Promise<number>;
131
+ //# sourceMappingURL=fact_disk_storage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fact_disk_storage.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/fact_disk_storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,OAAO,EAAmB,KAAK,QAAQ,EAAC,MAAM,+BAA+B,CAAC;AAC9E,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,yBAAyB,CAAC;AAEpD,OAAO,KAAK,EAAC,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAE5F,OAAO,EAIN,KAAK,WAAW,EAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,KAAK,EAAC,mBAAmB,EAAC,MAAM,iBAAiB,CAAC;AAGzD,sEAAsE;AACtE,eAAO,MAAM,gBAAgB,SAAS,CAAC;AAEvC,8EAA8E;AAC9E,eAAO,MAAM,0BAA0B,QAAiB,CAAC;AAEzD;;;;GAIG;AACH,MAAM,MAAM,mBAAmB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,CAAC,GACnF,IAAI,CAAC,WAAW,EAAE,OAAO,GAAG,QAAQ,GAAG,YAAY,GAAG,OAAO,CAAC,GAC9D,IAAI,CAAC,YAAY,EAAE,mBAAmB,GAAG,kBAAkB,CAAC,GAC5D,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;AAE9B;;;;;GAKG;AACH,MAAM,MAAM,eAAe,GACxB;IAAC,IAAI,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,UAAU,CAAA;CAAC,GACrC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,WAAW,CAAA;CAAC,CAAC;AAE7C;;;;GAIG;AACH,MAAM,WAAW,sBAAsB;IACtC,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,eAAe,CAAC;CAC3B;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,IAAI,CACT,mBAAmB,EACnB,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,mBAAmB,GAAG,OAAO,GAAG,MAAM,CACtE,EACD,WAAW,MAAM,GAAG,SAAS,EAC7B,QAAQ,cAAc,CAAC,UAAU,CAAC,EAClC,WAAW,MAAM,EACjB,oBAAoB,MAAM,KACxB,OAAO,CAAC,sBAAsB,CA8GhC,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,IAAI,CAAC,mBAAmB,EAAE,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,YAAY,GAAG,OAAO,GAAG,MAAM,CAAC,EAChG,WAAW,MAAM,EACjB,MAAM,QAAQ,EACd,OAAO,UAAU,KACf,OAAO,CAAC,WAAW,CAgBrB,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,IAAI,CAAC,mBAAmB,EAAE,WAAW,GAAG,kBAAkB,CAAC,EACjE,WAAW,MAAM,KACf,mBAWF,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,kBAAkB,GAC9B,MAAM,IAAI,CAAC,mBAAmB,EAAE,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC,EAC9D,WAAW,MAAM,EACjB,UAAU;IAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAC,KACzD,OAAO,CAAC,MAAM,CA6BhB,CAAC"}
@@ -0,0 +1,315 @@
1
+ /**
2
+ * Filesystem CAS for externally-stored fact bytes — the disk half of
3
+ * `PgFactStore`, threaded over the injectable `runtime/*Deps` rather than raw
4
+ * `node:fs`, so it runs unchanged under Node, Deno, and a mock runtime.
5
+ *
6
+ * Large facts (over the embedded threshold) live on disk at the canonical
7
+ * sharded layout `<facts_dir>/<shard>/<rest>` — `<shard>` is the first 2 hex
8
+ * chars of the blake3 digest, `<rest>` the remaining 62 — with the `fact` row
9
+ * carrying `external_url = file:<shard>/<rest>` (disk-root-relative). The layout
10
+ * is single-sourced by `fact_disk_path` in `db/file_fact_url.ts`, so the write
11
+ * path here and the URL minted into the row can't drift. The TS twin of the
12
+ * Rust `fuz_fact` disk CAS.
13
+ *
14
+ * Writes land through `<facts_dir>/.tmp/<rand>.tmp`, are `fsync`ed, then
15
+ * `rename`d into the content-addressed final path. The `rename` is atomic on
16
+ * POSIX (a *concurrent reader* observing the path sees either the full content
17
+ * or nothing), but atomicity is not durability — the `fsync` before the rename
18
+ * is what guards against a *host crash* leaving a torn/zero file at a published
19
+ * CAS path, because the serving path streams the hash-named file without
20
+ * re-hashing it (`server/serve_fact_route.ts`). This twins the Rust `fuz_fact`
21
+ * §fsync posture: data-sync before the rename; the parent-dir fsync stays
22
+ * deliberately waived (a lost dirent is regenerable under content addressing).
23
+ * If the final path already exists the temp is dropped instead of renamed over
24
+ * — idempotent dedup (same hash → byte-identical content), mirroring the Rust
25
+ * commit path. `.tmp/` is a sibling of `<shard>/` under the same `facts_dir` so
26
+ * `rename` is always same-filesystem (no EXDEV).
27
+ *
28
+ * @module
29
+ */
30
+ import { createHash } from 'node:crypto';
31
+ import { join } from 'node:path';
32
+ import { Blake3Hasher } from '@fuzdev/blake3_wasm';
33
+ import { blake3_ready } from '@fuzdev/fuz_util/hash_blake3.js';
34
+ import { to_hex } from '@fuzdev/fuz_util/hex.js';
35
+ import { FACT_HASH_PREFIX } from '@fuzdev/fuz_util/fact_hash.js';
36
+ import { generate_random_base64url } from '../crypto.js';
37
+ import { fact_disk_path, mint_file_fact_url, parse_file_fact_url, } from './file_fact_url.js';
38
+ import { is_enospc_error, PayloadTooLargeError, StorageFullError } from './fact_store_errors.js';
39
+ /** Subdirectory under `facts_dir` for in-flight atomic temp files. */
40
+ export const FACT_TMP_DIRNAME = '.tmp';
41
+ /** Default age (1 hour) past which a `.tmp/*` file is considered orphaned. */
42
+ export const FACT_TMP_ORPHAN_MAX_AGE_MS = 60 * 60 * 1000;
43
+ /**
44
+ * Stream `source` to storage with bounded memory: hash BLAKE3 + SHA-256
45
+ * incrementally in one pass, buffer in memory until the bytes cross
46
+ * `embedded_threshold`, then spill the buffer + remaining chunks through a temp
47
+ * file and atomically land it in the disk CAS. Peak heap is
48
+ * `O(chunk + embedded_threshold)`, never `O(artifact)`, so a multi-GB upload
49
+ * never buffers in RAM.
50
+ *
51
+ * - **Embedded vs disk.** A body `<= embedded_threshold` stays in memory and is
52
+ * returned as `{kind: 'embedded'}` for the PG `bytes` column. Above it (with a
53
+ * `facts_dir`), the buffer + remaining chunks spill to `<facts_dir>/.tmp/…`,
54
+ * then `rename` into `<facts_dir>/<shard>/<rest>` once the hash is known —
55
+ * `{kind: 'disk'}`. A body over the threshold with `facts_dir === undefined`
56
+ * throws `PayloadTooLargeError` (matches `PgFactStore.put`).
57
+ * - **Cap enforcement.** Aborts with `PayloadTooLargeError` the moment the
58
+ * running byte count passes `max_bytes` — the mid-stream backstop for a
59
+ * chunked or mis-declared `Content-Length`.
60
+ * - **Disk-full.** An `ENOSPC` from the temp-file write surfaces as
61
+ * `StorageFullError`.
62
+ *
63
+ * @mutates `facts_dir` filesystem
64
+ */
65
+ export const stream_fact_to_disk = async (deps, facts_dir, source, max_bytes, embedded_threshold) => {
66
+ await blake3_ready;
67
+ const blake3 = new Blake3Hasher();
68
+ const sha256 = createHash('sha256');
69
+ let size = 0;
70
+ // Buffer leading bytes until they cross the embedded threshold; small facts
71
+ // stay embedded (no disk), large ones never buffer past the threshold.
72
+ const buffered = [];
73
+ let buffered_len = 0;
74
+ const reader = source.getReader();
75
+ const hash_and_count = (chunk) => {
76
+ size += chunk.length;
77
+ if (size > max_bytes)
78
+ throw new PayloadTooLargeError(size, max_bytes);
79
+ blake3.update(chunk);
80
+ sha256.update(chunk);
81
+ };
82
+ try {
83
+ // Phase 1: read + hash + buffer until the threshold is crossed or the
84
+ // stream ends. The crossing chunk is hashed + buffered here, then emitted
85
+ // (not re-read) by the spill stream below.
86
+ let spill_needed = false;
87
+ for (;;) {
88
+ const { done, value } = await reader.read();
89
+ if (done)
90
+ break;
91
+ if (!value || value.length === 0)
92
+ continue;
93
+ hash_and_count(value);
94
+ buffered.push(value);
95
+ buffered_len += value.length;
96
+ if (buffered_len > embedded_threshold) {
97
+ spill_needed = true;
98
+ break;
99
+ }
100
+ }
101
+ if (!spill_needed) {
102
+ const hash = (FACT_HASH_PREFIX + to_hex(blake3.finalize()));
103
+ return {
104
+ hash,
105
+ sha256: sha256.digest('hex'),
106
+ size,
107
+ placement: { kind: 'embedded', bytes: concat_chunks(buffered, buffered_len) },
108
+ };
109
+ }
110
+ if (facts_dir === undefined) {
111
+ // Over the embedded threshold with nowhere to spill — same shape as the
112
+ // `PgFactStore.put` oversize-without-disk_root reject.
113
+ throw new PayloadTooLargeError(size, embedded_threshold);
114
+ }
115
+ // Phase 2: spill. A combined stream emits the already-hashed buffered
116
+ // chunks, then continues pulling from `reader`, hashing each remaining
117
+ // chunk as it flows. `write_file_stream` consumes it with backpressure
118
+ // (peak memory one chunk).
119
+ let buffer_index = 0;
120
+ const combined = new ReadableStream({
121
+ async pull(controller) {
122
+ if (buffer_index < buffered.length) {
123
+ controller.enqueue(buffered[buffer_index++]);
124
+ return;
125
+ }
126
+ for (;;) {
127
+ const { done, value } = await reader.read();
128
+ if (done) {
129
+ controller.close();
130
+ return;
131
+ }
132
+ if (!value || value.length === 0)
133
+ continue;
134
+ try {
135
+ hash_and_count(value);
136
+ }
137
+ catch (err) {
138
+ controller.error(err);
139
+ return;
140
+ }
141
+ controller.enqueue(value);
142
+ return;
143
+ }
144
+ },
145
+ cancel: (reason) => reader.cancel(reason),
146
+ });
147
+ const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
148
+ const tmp_path = join(tmp_dir, `${generate_random_base64url(16)}.tmp`);
149
+ await deps.mkdir(tmp_dir, { recursive: true });
150
+ try {
151
+ await deps.write_file_stream(tmp_path, combined);
152
+ }
153
+ catch (err) {
154
+ await deps.remove(tmp_path).catch(() => undefined);
155
+ if (is_enospc_error(err))
156
+ throw new StorageFullError(err);
157
+ throw err; // includes a mid-stream PayloadTooLargeError surfaced via the stream
158
+ }
159
+ const hash = (FACT_HASH_PREFIX + to_hex(blake3.finalize()));
160
+ const { shard, rest } = await commit_temp_to_cas(deps, tmp_path, facts_dir, hash);
161
+ return {
162
+ hash,
163
+ sha256: sha256.digest('hex'),
164
+ size,
165
+ placement: { kind: 'disk', external_url: mint_file_fact_url(shard, rest) },
166
+ };
167
+ }
168
+ finally {
169
+ blake3.free();
170
+ try {
171
+ reader.releaseLock();
172
+ }
173
+ catch {
174
+ // Already released/cancelled by the spill stream's cancel path.
175
+ }
176
+ }
177
+ };
178
+ /**
179
+ * Write fully-buffered `bytes` for `hash` to the canonical
180
+ * `<facts_dir>/<shard>/<rest>` path, then publish via `commit_temp_to_cas`
181
+ * (fsync'd temp + atomic rename, dedup-aware). The buffering twin of
182
+ * `stream_fact_to_disk`, used by `PgFactStore.put` for oversize sync bytes.
183
+ * Returns the `file:` `external_url` for the `fact` row.
184
+ *
185
+ * @mutates `facts_dir` filesystem
186
+ */
187
+ export const write_fact_bytes_to_disk = async (deps, facts_dir, hash, bytes) => {
188
+ const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
189
+ const tmp_path = join(tmp_dir, `${generate_random_base64url(16)}.tmp`);
190
+ await deps.mkdir(tmp_dir, { recursive: true });
191
+ // Write the temp first (mapping disk-full), then publish — the same
192
+ // write-then-commit shape as the streaming twin.
193
+ try {
194
+ await deps.write_file(tmp_path, bytes);
195
+ }
196
+ catch (err) {
197
+ await deps.remove(tmp_path).catch(() => undefined);
198
+ if (is_enospc_error(err))
199
+ throw new StorageFullError(err);
200
+ throw err;
201
+ }
202
+ const { shard, rest } = await commit_temp_to_cas(deps, tmp_path, facts_dir, hash);
203
+ return mint_file_fact_url(shard, rest);
204
+ };
205
+ /**
206
+ * `FactExternalFetcher` reading from the `<facts_dir>/<shard>/<rest>` layout the
207
+ * writers above produce, over the injected `*Deps`. Does NOT verify hash content
208
+ * — `PgFactStore.get` calls `fact_hash_verify(hash, bytes)` after the fetch and
209
+ * returns `null` on mismatch.
210
+ *
211
+ * Defense at the read seam is the `FILE_FACT_URL_PATTERN` regex (via
212
+ * `parse_file_fact_url`) — `..` segments, foreign schemes, and non-hex chars
213
+ * fail before any disk access.
214
+ */
215
+ export const create_disk_fact_fetcher = (deps, facts_dir) => {
216
+ const resolve_path = (url) => {
217
+ const parsed = parse_file_fact_url(url);
218
+ if (!parsed)
219
+ throw new Error(`invalid file fact url: ${url}`);
220
+ return join(facts_dir, parsed.shard, parsed.rest);
221
+ };
222
+ return {
223
+ fetch_bytes: (url) => deps.read_file(resolve_path(url)),
224
+ // `async` funnels a synchronous `resolve_path` throw into a rejection.
225
+ fetch_stream: async (url) => deps.read_file_stream(resolve_path(url)),
226
+ };
227
+ };
228
+ /**
229
+ * Reap stale temp files left under `<facts_dir>/.tmp/` by a hard crash (SIGKILL
230
+ * / OOM / host crash) mid-write — the `finally` cleanup in the writers above
231
+ * never ran. Removes `.tmp` entries whose mtime is older than `max_age_ms` (so
232
+ * an in-flight upload isn't yanked out from under itself). The TS twin of the
233
+ * Rust `sweep_orphan_temps`; call on startup + on an interval.
234
+ *
235
+ * Best-effort: a missing `.tmp/` dir (no oversize upload has ever run) is a
236
+ * no-op; a runtime that doesn't report `mtime_ms` (a mock) leaves every temp
237
+ * untouched; a per-file stat/remove failure is logged and skipped rather than
238
+ * aborting the sweep. Returns the count removed.
239
+ *
240
+ * @mutates `facts_dir` filesystem
241
+ */
242
+ export const sweep_orphan_temps = async (deps, facts_dir, options) => {
243
+ const max_age_ms = options?.max_age_ms ?? FACT_TMP_ORPHAN_MAX_AGE_MS;
244
+ const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
245
+ let entries;
246
+ try {
247
+ entries = await deps.readdir(tmp_dir);
248
+ }
249
+ catch {
250
+ return 0; // `.tmp/` doesn't exist yet — nothing to sweep.
251
+ }
252
+ const cutoff = Date.now() - max_age_ms;
253
+ let removed = 0;
254
+ for (const entry of entries) {
255
+ if (!entry.endsWith('.tmp'))
256
+ continue;
257
+ const path = join(tmp_dir, entry);
258
+ try {
259
+ const info = await deps.stat(path);
260
+ // Unknown age (missing file, or a runtime that doesn't report mtime) →
261
+ // leave it; never reap something we can't prove is stale.
262
+ if (!info || info.mtime_ms === undefined || info.mtime_ms >= cutoff)
263
+ continue;
264
+ await deps.remove(path);
265
+ removed++;
266
+ }
267
+ catch (err) {
268
+ options?.log?.warn(`sweep_orphan_temps: failed to reap ${path}:`, err instanceof Error ? err.message : String(err));
269
+ }
270
+ }
271
+ return removed;
272
+ };
273
+ /**
274
+ * Publish a written temp file into the CAS at `<facts_dir>/<shard>/<rest>`:
275
+ * `fsync` the temp's data (durability before the rename — the serve path streams
276
+ * the file without re-hashing, so the bytes must be stable before they become
277
+ * the canonical body), then either drop the temp (byte-identical content already
278
+ * present — idempotent dedup) or atomically `rename` it into place. On any
279
+ * failure the temp is unlinked and an `ENOSPC` is surfaced as `StorageFullError`.
280
+ * The single commit path shared by both writers above — twins the Rust `fuz_fact`
281
+ * `SpillFile::rename_into_cas` (data-sync before rename; parent-dir fsync waived).
282
+ *
283
+ * @mutates `facts_dir` filesystem
284
+ */
285
+ const commit_temp_to_cas = async (deps, tmp_path, facts_dir, hash) => {
286
+ const { shard, rest } = fact_disk_path(hash);
287
+ const final_path = join(facts_dir, shard, rest);
288
+ try {
289
+ await deps.fsync(tmp_path);
290
+ if (await deps.stat(final_path)) {
291
+ await deps.remove(tmp_path).catch(() => undefined);
292
+ }
293
+ else {
294
+ await deps.mkdir(join(facts_dir, shard), { recursive: true });
295
+ await deps.rename(tmp_path, final_path);
296
+ }
297
+ }
298
+ catch (err) {
299
+ await deps.remove(tmp_path).catch(() => undefined);
300
+ if (is_enospc_error(err))
301
+ throw new StorageFullError(err);
302
+ throw err;
303
+ }
304
+ return { shard, rest };
305
+ };
306
+ /** Concatenate buffered chunks into a single `Uint8Array` of `total` bytes. */
307
+ const concat_chunks = (chunks, total) => {
308
+ const out = new Uint8Array(total);
309
+ let offset = 0;
310
+ for (const chunk of chunks) {
311
+ out.set(chunk, offset);
312
+ offset += chunk.length;
313
+ }
314
+ return out;
315
+ };
@@ -14,21 +14,23 @@
14
14
  * - mismatched external bytes return `null` + log warning (treat as
15
15
  * unavailable; GC / repair is a separate concern)
16
16
  *
17
- * Embedded vs referenced split: callers route by size. `put` rejects
18
- * `bytes.length > embedded_threshold` so oversized content takes the
19
- * `put_ref` path explicitly. Auto-split inside `put` is a future option.
20
- *
21
- * Wired with a filesystem `file:`-URL fetcher (`create_file_fact_fetcher`)
22
- * at server assembly: bytes threshold embed via `put`, larger bytes go
23
- * through atomic temp+rename onto disk then `put_ref('file:<shard>/<rest>',
24
- * size)` for verified registration.
17
+ * Embedded vs disk split: writes route by size. Bytes `<= embedded_threshold`
18
+ * land in the PG `bytes` column; larger bytes go to the disk CAS at
19
+ * `<facts_dir>/<shard>/<rest>` (`db/fact_disk_storage.ts`) and the row records a
20
+ * `file:<shard>/<rest>` `external_url`. `put` takes fully-buffered bytes;
21
+ * `put_stream` is the bounded-memory streaming twin (hash BLAKE3 + SHA-256 in
22
+ * one pass, spill past the threshold, enforce `max_bytes` / `ENOSPC`). Both need
23
+ * `disk_root` + `fs` (the `runtime/*Deps`) configured for the over-threshold
24
+ * path; without them, an oversize `put` throws and the caller must `put_ref`
25
+ * against an externally-managed URL (federation / stub-fetcher tests).
25
26
  *
26
27
  * @module
27
28
  */
28
29
  import type { QueryDeps } from './query_deps.js';
29
30
  import type { Logger } from '@fuzdev/fuz_util/log.js';
30
31
  import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
31
- import type { FactMeta, FactPutOptions, FactStore } from '@fuzdev/fuz_util/fact_store.js';
32
+ import type { FactMeta, FactPutOptions, FactStore, PutStreamOutcome } from '@fuzdev/fuz_util/fact_store.js';
33
+ import { type FactDiskStorageDeps } from './fact_disk_storage.js';
32
34
  /** Default embedded-vs-referenced cutoff (1 MiB). */
33
35
  export declare const FACT_EMBEDDED_THRESHOLD_DEFAULT: number;
34
36
  /** Fetcher abstraction so tests can stub external URL retrieval. */
@@ -43,16 +45,24 @@ export declare const create_default_fetcher: () => FactExternalFetcher;
43
45
  *
44
46
  * `embedded_threshold` (bytes) is the inline-vs-external cutoff: payloads
45
47
  * at or under it store embedded in the `fact` row, larger ones route to
46
- * the external fetcher. Defaults to `FACT_EMBEDDED_THRESHOLD_DEFAULT`
48
+ * the disk CAS. Defaults to `FACT_EMBEDDED_THRESHOLD_DEFAULT`
47
49
  * (1 MiB). Consumers tune it per workload — e.g. a much lower bound
48
50
  * (~16 KiB) keeps only small JSON inline and routes image originals +
49
- * thumbnails external. `fetcher` defaults to a `globalThis.fetch`-backed
50
- * implementation; tests inject a stub. `log` is optional — the only call
51
- * site is the verify-mismatch warning path.
51
+ * thumbnails to disk.
52
+ *
53
+ * `disk_root` is the facts directory backing the `<shard>/<rest>` disk CAS;
54
+ * `fs` supplies the filesystem capabilities (a `RuntimeDeps` satisfies it).
55
+ * When both are set, oversize `put` + `put_stream` write to disk and the
56
+ * default `fetcher` reads from it. When unset, oversize `put`/`put_stream`
57
+ * spill throws and reads fall back to the `globalThis.fetch`-backed default
58
+ * fetcher (or an injected stub). `log` is optional — the only call site is the
59
+ * verify-mismatch warning path.
52
60
  */
53
61
  export interface PgFactStoreDeps {
54
62
  deps: QueryDeps;
55
63
  embedded_threshold?: number;
64
+ disk_root?: string;
65
+ fs?: FactDiskStorageDeps;
56
66
  fetcher?: FactExternalFetcher;
57
67
  log?: Logger;
58
68
  }
@@ -64,11 +74,32 @@ export declare class PgFactStore implements FactStore {
64
74
  #private;
65
75
  constructor(options: PgFactStoreDeps);
66
76
  /**
67
- * Store small bytes embedded in PG. Rejects oversized content so the
68
- * caller routes it through `put_ref` explicitly — implicit splitting
69
- * hides the size decision from the caller.
77
+ * Store fully-buffered bytes, routing by size: `<= embedded_threshold` into
78
+ * the PG `bytes` column; larger into the disk CAS (when `disk_root` + `fs`
79
+ * are configured) at `<facts_dir>/<shard>/<rest>` with a `file:` URL. Oversize
80
+ * without a disk root throws so the caller routes it through `put_ref`
81
+ * explicitly. Idempotent — `ON CONFLICT DO NOTHING` + content-addressed disk
82
+ * filenames make a re-write a no-op.
70
83
  */
71
84
  put(bytes: Uint8Array, options?: FactPutOptions): Promise<FactHash>;
85
+ /**
86
+ * Stream bytes into the store with bounded memory, returning the finalized
87
+ * digests + size. Delegates the byte path to `stream_fact_to_disk` (hash
88
+ * BLAKE3 + SHA-256 in one pass, buffer to the embedded threshold, spill to the
89
+ * disk CAS), then inserts the `fact` row by placement — embedded bytes go to
90
+ * the PG `bytes` column, disk-spilled bytes record the `file:` `external_url`.
91
+ * The cap is enforced mid-stream (`PayloadTooLargeError`); a disk-full mid-
92
+ * stream throws `StorageFullError`.
93
+ *
94
+ * Refs: explicit `options.refs` are recorded; JSON auto-extraction is NOT
95
+ * attempted (it would need a buffered re-read, defeating the bounded-memory
96
+ * contract) — streamed uploads are opaque blobs.
97
+ *
98
+ * Requires `fs` (and, for the over-threshold spill, `disk_root`) to be
99
+ * configured. The streaming twin of `put`; mirrors the Rust
100
+ * `FactStore::put_stream`.
101
+ */
102
+ put_stream(stream: ReadableStream<Uint8Array>, max_bytes: number, options?: FactPutOptions): Promise<PutStreamOutcome>;
72
103
  /**
73
104
  * Stream-hash external content and record `(hash, external_url, size)`.
74
105
  * Throws when the streamed byte count disagrees with the caller's