@fuzdev/fuz_app 0.81.0 → 0.83.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/account_queries.d.ts.map +1 -1
- package/dist/auth/account_queries.js +19 -9
- package/dist/db/CLAUDE.md +22 -5
- package/dist/db/fact_disk_storage.d.ts +131 -0
- package/dist/db/fact_disk_storage.d.ts.map +1 -0
- package/dist/db/fact_disk_storage.js +315 -0
- package/dist/db/fact_store.d.ts +47 -16
- package/dist/db/fact_store.d.ts.map +1 -1
- package/dist/db/fact_store.js +75 -16
- package/dist/db/fact_store_errors.d.ts +38 -0
- package/dist/db/fact_store_errors.d.ts.map +1 -0
- package/dist/db/fact_store_errors.js +48 -0
- package/dist/{server → db}/file_fact_url.d.ts +22 -9
- package/dist/db/file_fact_url.d.ts.map +1 -0
- package/dist/{server → db}/file_fact_url.js +22 -9
- package/dist/runtime/deno.d.ts.map +1 -1
- package/dist/runtime/deno.js +15 -1
- package/dist/runtime/deps.d.ts +21 -0
- package/dist/runtime/deps.d.ts.map +1 -1
- package/dist/runtime/mock.d.ts.map +1 -1
- package/dist/runtime/mock.js +3 -0
- package/dist/runtime/node.d.ts.map +1 -1
- package/dist/runtime/node.js +17 -1
- package/dist/server/fact_write.js +1 -1
- package/dist/server/file_fact_fetcher.js +1 -1
- package/dist/server/serve_fact_route.js +1 -1
- package/package.json +16 -13
- package/dist/server/file_fact_url.d.ts.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"account_queries.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/auth/account_queries.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,qBAAqB,CAAC;AAEnD,OAAO,EAEN,KAAK,OAAO,EACZ,KAAK,KAAK,EACV,KAAK,kBAAkB,EACvB,KAAK,qBAAqB,EAC1B,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"account_queries.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/auth/account_queries.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,qBAAqB,CAAC;AAEnD,OAAO,EAEN,KAAK,OAAO,EACZ,KAAK,KAAK,EACV,KAAK,kBAAkB,EACvB,KAAK,qBAAqB,EAC1B,MAAM,qBAAqB,CAAC;AAqB7B;;;;;;;GAOG;AACH,eAAO,MAAM,oBAAoB,GAChC,MAAM,SAAS,EACf,OAAO,kBAAkB,KACvB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,OAAO,GAAG,SAAS,CAK7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,yBAAyB,GACrC,MAAM,SAAS,EACf,UAAU,MAAM,KACd,OAAO,CAAC,OAAO,GAAG,SAAS,CAK7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,sBAAsB,GAClC,MAAM,SAAS,EACf,OAAO,MAAM,KACX,OAAO,CAAC,OAAO,GAAG,SAAS,CAK7B,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,kCAAkC,GAC9C,MAAM,SAAS,EACf,OAAO,MAAM,KACX,OAAO,CAAC,OAAO,GAAG,SAAS,CAS7B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;GAmBG;AACH,eAAO,MAAM,6BAA6B,GACzC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,eAAe,MAAM,EACrB,YAAY,MAAM,GAAG,IAAI,EACzB,eAAe,MAAM,KACnB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;GAIG;AACH,MAAM,WAAW,uBAAuB;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACrB;AAED;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,yBAAyB,GACrC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,YAAY,MAAM,GAAG,IAAI,KACvB,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAO7C,CAAC;AAEF;;;;;;;;;;;;;;GAcG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAK7C,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,uBAAuB,GACnC,MAAM,SAAS,EACf,IAAI,MAAM,EACV,YAAY,MAAM,GAAG,IAAI,KACvB,OAAO,CAAC,OAAO,CAQjB,CAAC;AAEF;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,sBAAsB,GAClC,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,uBAAuB,GAAG,SAAS,CAO7C,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,oBAAoB,GAAU,MAAM,SAAS,EAAE,IAAI,MAAM,KAAG,OAAO,CAAC,OAAO,CAQvF,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,qBAAqB,GAAU,MAAM,SAAS,KAAG,OAAO,CAAC,OAAO,CAK5E,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,kBAAkB,GAC9B,MAAM,SAAS,EACf,YAAY,MAAM,EAClB,MAAM,MAAM,KACV,OAAO,CAAC,KAAK,CAMf,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,uBAAuB,GACnC,MAAM,SAAS,EACf,YAAY,MAAM,KAChB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAKtB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,iBAAiB,GAC7B,MAAM,SAAS,EACf,IAAI,MAAM,KACR,OAAO,CAAC,KAAK,GAAG,SAAS,CAE3B,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,+BAA+B,GAC3C,MAAM,SAAS,EACf,OAAO,kBAAkB,KACvB,OAAO,CAAC;IAAC,OAAO,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,KAAK,CAAA;CAAC,CAI1C,CAAC;AA2BF,8CAA8C;AAC9C,MAAM,WAAW,uBAAuB;IACvC;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,wCAAwC;IACxC,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB;;;;;OAKG;IACH,eAAe,CAAC,EAAE,OAAO,GAAG,IAAI,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,SAAS,EACf,UAAU,uBAAuB,KAC/B,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CA8GtC,CAAC"}
|
|
@@ -9,6 +9,22 @@
|
|
|
9
9
|
import { assert_row } from '../db/assert_row.js';
|
|
10
10
|
import { to_admin_account, } from './account_schema.js';
|
|
11
11
|
import { ADMIN_ACCOUNT_LIST_DEFAULT_LIMIT } from './admin_action_specs.js';
|
|
12
|
+
/**
|
|
13
|
+
* The full `account` column set, named explicitly so a row read fails loud
|
|
14
|
+
* on schema drift.
|
|
15
|
+
*
|
|
16
|
+
* `SELECT *` silently omits a dropped column, which the login lookups then
|
|
17
|
+
* misread: `query_account_by_username_or_email` filters its result with
|
|
18
|
+
* `account.deleted_at === null`, so a missing `deleted_at` column reads back
|
|
19
|
+
* as `undefined`, `undefined === null` is `false`, and *every* login resolves
|
|
20
|
+
* to "not found" (401) — a silent, total auth outage instead of an error.
|
|
21
|
+
* Selecting named columns turns that drift into a hard Postgres
|
|
22
|
+
* `column "..." does not exist`. Mirrors the Rust side
|
|
23
|
+
* (`fuz_auth/src/account_queries.rs`), which selects named columns and
|
|
24
|
+
* decodes them positionally. Keep in sync with `Account` and the `account`
|
|
25
|
+
* DDL in `auth/auth_ddl.ts`.
|
|
26
|
+
*/
|
|
27
|
+
const ACCOUNT_COLUMNS = 'id, username, email, email_verified, password_hash, created_at, created_by, updated_at, updated_by, deleted_at, deleted_by';
|
|
12
28
|
/**
|
|
13
29
|
* Create a new account.
|
|
14
30
|
*
|
|
@@ -33,25 +49,19 @@ export const query_create_account = async (deps, input) => {
|
|
|
33
49
|
* soft-deleted rows too, uses `query_purge_account` directly.
|
|
34
50
|
*/
|
|
35
51
|
export const query_account_by_id = async (deps, id) => {
|
|
36
|
-
return deps.db.query_one(`SELECT
|
|
37
|
-
id,
|
|
38
|
-
]);
|
|
52
|
+
return deps.db.query_one(`SELECT ${ACCOUNT_COLUMNS} FROM account WHERE id = $1 AND deleted_at IS NULL`, [id]);
|
|
39
53
|
};
|
|
40
54
|
/**
|
|
41
55
|
* Find an account by username (case-insensitive).
|
|
42
56
|
*/
|
|
43
57
|
export const query_account_by_username = async (deps, username) => {
|
|
44
|
-
return deps.db.query_one(`SELECT
|
|
45
|
-
username,
|
|
46
|
-
]);
|
|
58
|
+
return deps.db.query_one(`SELECT ${ACCOUNT_COLUMNS} FROM account WHERE LOWER(username) = LOWER($1)`, [username]);
|
|
47
59
|
};
|
|
48
60
|
/**
|
|
49
61
|
* Find an account by email (case-insensitive).
|
|
50
62
|
*/
|
|
51
63
|
export const query_account_by_email = async (deps, email) => {
|
|
52
|
-
return deps.db.query_one(`SELECT
|
|
53
|
-
email,
|
|
54
|
-
]);
|
|
64
|
+
return deps.db.query_one(`SELECT ${ACCOUNT_COLUMNS} FROM account WHERE LOWER(email) = LOWER($1)`, [email]);
|
|
55
65
|
};
|
|
56
66
|
/**
|
|
57
67
|
* Find an account by username or email.
|
package/dist/db/CLAUDE.md
CHANGED
|
@@ -92,11 +92,28 @@ DO NOTHING`), `_put_fact_refs`, `_get_fact` / `_get_fact_meta` / `_has_fact`
|
|
|
92
92
|
external unlink), and the cell-coupled orphan queries `query_orphan_facts_list`
|
|
93
93
|
/ `_select_for_delete` (a fact is orphan when no active `cell.refs` names it).
|
|
94
94
|
- **`fact_store.ts`** — `PgFactStore implements FactStore` (the interface lives
|
|
95
|
-
in `@fuzdev/fuz_util/fact_store.js`):
|
|
96
|
-
`embedded_threshold
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
`
|
|
95
|
+
in `@fuzdev/fuz_util/fact_store.js`): size-routed writes (embedded ≤
|
|
96
|
+
`embedded_threshold` / disk CAS above it / `put_ref` for an externally-managed
|
|
97
|
+
URL), JSON ref auto-extract, idempotent put, verify-on-read for external
|
|
98
|
+
content via an injected `FactExternalFetcher`. With `disk_root` + `fs` (the
|
|
99
|
+
`runtime/*Deps`) configured, oversize `put` and the streaming `put_stream`
|
|
100
|
+
write to the `<shard>/<rest>` disk CAS and the default fetcher reads from it.
|
|
101
|
+
- **`file_fact_url.ts`** — the canonical `file:<shard>/<rest>` URL shape
|
|
102
|
+
(`FileFactUrl` brand, `mint_file_fact_url` / `parse_file_fact_url` /
|
|
103
|
+
`FILE_FACT_URL_PATTERN`) plus `fact_disk_path(hash) → {shard, rest}`, the
|
|
104
|
+
single source of truth for the on-disk layout (twins the Rust `fuz_fact`).
|
|
105
|
+
- **`fact_disk_storage.ts`** — the filesystem CAS over `runtime/{FsStream,FsWrite,FsRemove,FsRead}Deps`
|
|
106
|
+
(not raw `node:fs`): `stream_fact_to_disk` (bounded-memory blake3+sha256 single
|
|
107
|
+
pass, buffer→spill, fsync-then-atomic-rename, dedup-drop if the CAS path already
|
|
108
|
+
exists), `write_fact_bytes_to_disk` (buffering twin), `create_disk_fact_fetcher`,
|
|
109
|
+
and `sweep_orphan_temps` (reaps stale `.tmp` spills by mtime). The temp is
|
|
110
|
+
`fsync`ed before the rename publishes it (twins the Rust `fuz_fact` §fsync
|
|
111
|
+
posture: data-sync before rename, parent-dir fsync waived) — the serve path
|
|
112
|
+
streams the file without re-hashing, so write-time durability is the guard.
|
|
113
|
+
- **`fact_store_errors.ts`** — `PayloadTooLargeError` / `StorageFullError` (+
|
|
114
|
+
`is_enospc_error`) thrown by `put_stream`, for a consumer route's 413 / 507.
|
|
115
|
+
- The read-side fetcher + write/serve plumbing also live under `server/`
|
|
116
|
+
(`file_fact_fetcher.ts`, `fact_write.ts`, `serve_fact_route.ts`).
|
|
100
117
|
|
|
101
118
|
### Migration namespace order
|
|
102
119
|
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem CAS for externally-stored fact bytes — the disk half of
|
|
3
|
+
* `PgFactStore`, threaded over the injectable `runtime/*Deps` rather than raw
|
|
4
|
+
* `node:fs`, so it runs unchanged under Node, Deno, and a mock runtime.
|
|
5
|
+
*
|
|
6
|
+
* Large facts (over the embedded threshold) live on disk at the canonical
|
|
7
|
+
* sharded layout `<facts_dir>/<shard>/<rest>` — `<shard>` is the first 2 hex
|
|
8
|
+
* chars of the blake3 digest, `<rest>` the remaining 62 — with the `fact` row
|
|
9
|
+
* carrying `external_url = file:<shard>/<rest>` (disk-root-relative). The layout
|
|
10
|
+
* is single-sourced by `fact_disk_path` in `db/file_fact_url.ts`, so the write
|
|
11
|
+
* path here and the URL minted into the row can't drift. The TS twin of the
|
|
12
|
+
* Rust `fuz_fact` disk CAS.
|
|
13
|
+
*
|
|
14
|
+
* Writes land through `<facts_dir>/.tmp/<rand>.tmp`, are `fsync`ed, then
|
|
15
|
+
* `rename`d into the content-addressed final path. The `rename` is atomic on
|
|
16
|
+
* POSIX (a *concurrent reader* observing the path sees either the full content
|
|
17
|
+
* or nothing), but atomicity is not durability — the `fsync` before the rename
|
|
18
|
+
* is what guards against a *host crash* leaving a torn/zero file at a published
|
|
19
|
+
* CAS path, because the serving path streams the hash-named file without
|
|
20
|
+
* re-hashing it (`server/serve_fact_route.ts`). This twins the Rust `fuz_fact`
|
|
21
|
+
* §fsync posture: data-sync before the rename; the parent-dir fsync stays
|
|
22
|
+
* deliberately waived (a lost dirent is regenerable under content addressing).
|
|
23
|
+
* If the final path already exists the temp is dropped instead of renamed over
|
|
24
|
+
* — idempotent dedup (same hash → byte-identical content), mirroring the Rust
|
|
25
|
+
* commit path. `.tmp/` is a sibling of `<shard>/` under the same `facts_dir` so
|
|
26
|
+
* `rename` is always same-filesystem (no EXDEV).
|
|
27
|
+
*
|
|
28
|
+
* @module
|
|
29
|
+
*/
|
|
30
|
+
import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
|
|
31
|
+
import type { Logger } from '@fuzdev/fuz_util/log.js';
|
|
32
|
+
import type { FsReadDeps, FsWriteDeps, FsStreamDeps, FsRemoveDeps } from '../runtime/deps.js';
|
|
33
|
+
import { type FileFactUrl } from './file_fact_url.js';
|
|
34
|
+
import type { FactExternalFetcher } from './fact_store.js';
|
|
35
|
+
/** Subdirectory under `facts_dir` for in-flight atomic temp files. */
|
|
36
|
+
export declare const FACT_TMP_DIRNAME = ".tmp";
|
|
37
|
+
/** Default age (1 hour) past which a `.tmp/*` file is considered orphaned. */
|
|
38
|
+
export declare const FACT_TMP_ORPHAN_MAX_AGE_MS: number;
|
|
39
|
+
/**
|
|
40
|
+
* Filesystem capabilities the disk CAS needs, drawn from `runtime/deps.ts`. A
|
|
41
|
+
* full `RuntimeDeps` (Node or Deno) satisfies this; each function below picks
|
|
42
|
+
* the narrow subset it actually uses.
|
|
43
|
+
*/
|
|
44
|
+
export type FactDiskStorageDeps = Pick<FsReadDeps, 'stat' | 'readdir' | 'read_file'> & Pick<FsWriteDeps, 'mkdir' | 'rename' | 'write_file' | 'fsync'> & Pick<FsStreamDeps, 'write_file_stream' | 'read_file_stream'> & Pick<FsRemoveDeps, 'remove'>;
|
|
45
|
+
/**
|
|
46
|
+
* Where a streamed body landed — `embedded` carries the in-memory bytes (under
|
|
47
|
+
* the embedded threshold, bound for the PG `fact.bytes` column); `disk` means
|
|
48
|
+
* the bytes are already at `<facts_dir>/<shard>/<rest>` and the row carries the
|
|
49
|
+
* `file:` URL.
|
|
50
|
+
*/
|
|
51
|
+
export type StreamPlacement = {
|
|
52
|
+
kind: 'embedded';
|
|
53
|
+
bytes: Uint8Array;
|
|
54
|
+
} | {
|
|
55
|
+
kind: 'disk';
|
|
56
|
+
external_url: FileFactUrl;
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* Outcome of streaming an upload to storage: the `blake3:`-prefixed fact hash,
|
|
60
|
+
* the bare-hex SHA-256, the byte count, and where the bytes landed.
|
|
61
|
+
* `PgFactStore.put_stream` turns this into the `fact` row insert.
|
|
62
|
+
*/
|
|
63
|
+
export interface StreamFactToDiskResult {
|
|
64
|
+
hash: FactHash;
|
|
65
|
+
sha256: string;
|
|
66
|
+
size: number;
|
|
67
|
+
placement: StreamPlacement;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Stream `source` to storage with bounded memory: hash BLAKE3 + SHA-256
|
|
71
|
+
* incrementally in one pass, buffer in memory until the bytes cross
|
|
72
|
+
* `embedded_threshold`, then spill the buffer + remaining chunks through a temp
|
|
73
|
+
* file and atomically land it in the disk CAS. Peak heap is
|
|
74
|
+
* `O(chunk + embedded_threshold)`, never `O(artifact)`, so a multi-GB upload
|
|
75
|
+
* never buffers in RAM.
|
|
76
|
+
*
|
|
77
|
+
* - **Embedded vs disk.** A body `<= embedded_threshold` stays in memory and is
|
|
78
|
+
* returned as `{kind: 'embedded'}` for the PG `bytes` column. Above it (with a
|
|
79
|
+
* `facts_dir`), the buffer + remaining chunks spill to `<facts_dir>/.tmp/…`,
|
|
80
|
+
* then `rename` into `<facts_dir>/<shard>/<rest>` once the hash is known —
|
|
81
|
+
* `{kind: 'disk'}`. A body over the threshold with `facts_dir === undefined`
|
|
82
|
+
* throws `PayloadTooLargeError` (matches `PgFactStore.put`).
|
|
83
|
+
* - **Cap enforcement.** Aborts with `PayloadTooLargeError` the moment the
|
|
84
|
+
* running byte count passes `max_bytes` — the mid-stream backstop for a
|
|
85
|
+
* chunked or mis-declared `Content-Length`.
|
|
86
|
+
* - **Disk-full.** An `ENOSPC` from the temp-file write surfaces as
|
|
87
|
+
* `StorageFullError`.
|
|
88
|
+
*
|
|
89
|
+
* @mutates `facts_dir` filesystem
|
|
90
|
+
*/
|
|
91
|
+
export declare const stream_fact_to_disk: (deps: Pick<FactDiskStorageDeps, "mkdir" | "rename" | "remove" | "write_file_stream" | "fsync" | "stat">, facts_dir: string | undefined, source: ReadableStream<Uint8Array>, max_bytes: number, embedded_threshold: number) => Promise<StreamFactToDiskResult>;
|
|
92
|
+
/**
|
|
93
|
+
* Write fully-buffered `bytes` for `hash` to the canonical
|
|
94
|
+
* `<facts_dir>/<shard>/<rest>` path, then publish via `commit_temp_to_cas`
|
|
95
|
+
* (fsync'd temp + atomic rename, dedup-aware). The buffering twin of
|
|
96
|
+
* `stream_fact_to_disk`, used by `PgFactStore.put` for oversize sync bytes.
|
|
97
|
+
* Returns the `file:` `external_url` for the `fact` row.
|
|
98
|
+
*
|
|
99
|
+
* @mutates `facts_dir` filesystem
|
|
100
|
+
*/
|
|
101
|
+
export declare const write_fact_bytes_to_disk: (deps: Pick<FactDiskStorageDeps, "mkdir" | "rename" | "remove" | "write_file" | "fsync" | "stat">, facts_dir: string, hash: FactHash, bytes: Uint8Array) => Promise<FileFactUrl>;
|
|
102
|
+
/**
|
|
103
|
+
* `FactExternalFetcher` reading from the `<facts_dir>/<shard>/<rest>` layout the
|
|
104
|
+
* writers above produce, over the injected `*Deps`. Does NOT verify hash content
|
|
105
|
+
* — `PgFactStore.get` calls `fact_hash_verify(hash, bytes)` after the fetch and
|
|
106
|
+
* returns `null` on mismatch.
|
|
107
|
+
*
|
|
108
|
+
* Defense at the read seam is the `FILE_FACT_URL_PATTERN` regex (via
|
|
109
|
+
* `parse_file_fact_url`) — `..` segments, foreign schemes, and non-hex chars
|
|
110
|
+
* fail before any disk access.
|
|
111
|
+
*/
|
|
112
|
+
export declare const create_disk_fact_fetcher: (deps: Pick<FactDiskStorageDeps, "read_file" | "read_file_stream">, facts_dir: string) => FactExternalFetcher;
|
|
113
|
+
/**
|
|
114
|
+
* Reap stale temp files left under `<facts_dir>/.tmp/` by a hard crash (SIGKILL
|
|
115
|
+
* / OOM / host crash) mid-write — the `finally` cleanup in the writers above
|
|
116
|
+
* never ran. Removes `.tmp` entries whose mtime is older than `max_age_ms` (so
|
|
117
|
+
* an in-flight upload isn't yanked out from under itself). The TS twin of the
|
|
118
|
+
* Rust `sweep_orphan_temps`; call on startup + on an interval.
|
|
119
|
+
*
|
|
120
|
+
* Best-effort: a missing `.tmp/` dir (no oversize upload has ever run) is a
|
|
121
|
+
* no-op; a runtime that doesn't report `mtime_ms` (a mock) leaves every temp
|
|
122
|
+
* untouched; a per-file stat/remove failure is logged and skipped rather than
|
|
123
|
+
* aborting the sweep. Returns the count removed.
|
|
124
|
+
*
|
|
125
|
+
* @mutates `facts_dir` filesystem
|
|
126
|
+
*/
|
|
127
|
+
export declare const sweep_orphan_temps: (deps: Pick<FactDiskStorageDeps, "readdir" | "stat" | "remove">, facts_dir: string, options?: {
|
|
128
|
+
max_age_ms?: number;
|
|
129
|
+
log?: Pick<Logger, "warn">;
|
|
130
|
+
}) => Promise<number>;
|
|
131
|
+
//# sourceMappingURL=fact_disk_storage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact_disk_storage.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/fact_disk_storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,OAAO,EAAmB,KAAK,QAAQ,EAAC,MAAM,+BAA+B,CAAC;AAC9E,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,yBAAyB,CAAC;AAEpD,OAAO,KAAK,EAAC,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAE5F,OAAO,EAIN,KAAK,WAAW,EAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,KAAK,EAAC,mBAAmB,EAAC,MAAM,iBAAiB,CAAC;AAGzD,sEAAsE;AACtE,eAAO,MAAM,gBAAgB,SAAS,CAAC;AAEvC,8EAA8E;AAC9E,eAAO,MAAM,0BAA0B,QAAiB,CAAC;AAEzD;;;;GAIG;AACH,MAAM,MAAM,mBAAmB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,CAAC,GACnF,IAAI,CAAC,WAAW,EAAE,OAAO,GAAG,QAAQ,GAAG,YAAY,GAAG,OAAO,CAAC,GAC9D,IAAI,CAAC,YAAY,EAAE,mBAAmB,GAAG,kBAAkB,CAAC,GAC5D,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;AAE9B;;;;;GAKG;AACH,MAAM,MAAM,eAAe,GACxB;IAAC,IAAI,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,UAAU,CAAA;CAAC,GACrC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,WAAW,CAAA;CAAC,CAAC;AAE7C;;;;GAIG;AACH,MAAM,WAAW,sBAAsB;IACtC,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,eAAe,CAAC;CAC3B;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,IAAI,CACT,mBAAmB,EACnB,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,mBAAmB,GAAG,OAAO,GAAG,MAAM,CACtE,EACD,WAAW,MAAM,GAAG,SAAS,EAC7B,QAAQ,cAAc,CAAC,UAAU,CAAC,EAClC,WAAW,MAAM,EACjB,oBAAoB,MAAM,KACxB,OAAO,CAAC,sBAAsB,CA8GhC,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,IAAI,CAAC,mBAAmB,EAAE,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,YAAY,GAAG,OAAO,GAAG,MAAM,CAAC,EAChG,WAAW,MAAM,EACjB,MAAM,QAAQ,EACd,OAAO,UAAU,KACf,OAAO,CAAC,WAAW,CAgBrB,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,IAAI,CAAC,mBAAmB,EAAE,WAAW,GAAG,kBAAkB,CAAC,EACjE,WAAW,MAAM,KACf,mBAWF,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,kBAAkB,GAC9B,MAAM,IAAI,CAAC,mBAAmB,EAAE,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC,EAC9D,WAAW,MAAM,EACjB,UAAU;IAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAC,KACzD,OAAO,CAAC,MAAM,CA6BhB,CAAC"}
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem CAS for externally-stored fact bytes — the disk half of
|
|
3
|
+
* `PgFactStore`, threaded over the injectable `runtime/*Deps` rather than raw
|
|
4
|
+
* `node:fs`, so it runs unchanged under Node, Deno, and a mock runtime.
|
|
5
|
+
*
|
|
6
|
+
* Large facts (over the embedded threshold) live on disk at the canonical
|
|
7
|
+
* sharded layout `<facts_dir>/<shard>/<rest>` — `<shard>` is the first 2 hex
|
|
8
|
+
* chars of the blake3 digest, `<rest>` the remaining 62 — with the `fact` row
|
|
9
|
+
* carrying `external_url = file:<shard>/<rest>` (disk-root-relative). The layout
|
|
10
|
+
* is single-sourced by `fact_disk_path` in `db/file_fact_url.ts`, so the write
|
|
11
|
+
* path here and the URL minted into the row can't drift. The TS twin of the
|
|
12
|
+
* Rust `fuz_fact` disk CAS.
|
|
13
|
+
*
|
|
14
|
+
* Writes land through `<facts_dir>/.tmp/<rand>.tmp`, are `fsync`ed, then
|
|
15
|
+
* `rename`d into the content-addressed final path. The `rename` is atomic on
|
|
16
|
+
* POSIX (a *concurrent reader* observing the path sees either the full content
|
|
17
|
+
* or nothing), but atomicity is not durability — the `fsync` before the rename
|
|
18
|
+
* is what guards against a *host crash* leaving a torn/zero file at a published
|
|
19
|
+
* CAS path, because the serving path streams the hash-named file without
|
|
20
|
+
* re-hashing it (`server/serve_fact_route.ts`). This twins the Rust `fuz_fact`
|
|
21
|
+
* §fsync posture: data-sync before the rename; the parent-dir fsync stays
|
|
22
|
+
* deliberately waived (a lost dirent is regenerable under content addressing).
|
|
23
|
+
* If the final path already exists the temp is dropped instead of renamed over
|
|
24
|
+
* — idempotent dedup (same hash → byte-identical content), mirroring the Rust
|
|
25
|
+
* commit path. `.tmp/` is a sibling of `<shard>/` under the same `facts_dir` so
|
|
26
|
+
* `rename` is always same-filesystem (no EXDEV).
|
|
27
|
+
*
|
|
28
|
+
* @module
|
|
29
|
+
*/
|
|
30
|
+
import { createHash } from 'node:crypto';
|
|
31
|
+
import { join } from 'node:path';
|
|
32
|
+
import { Blake3Hasher } from '@fuzdev/blake3_wasm';
|
|
33
|
+
import { blake3_ready } from '@fuzdev/fuz_util/hash_blake3.js';
|
|
34
|
+
import { to_hex } from '@fuzdev/fuz_util/hex.js';
|
|
35
|
+
import { FACT_HASH_PREFIX } from '@fuzdev/fuz_util/fact_hash.js';
|
|
36
|
+
import { generate_random_base64url } from '../crypto.js';
|
|
37
|
+
import { fact_disk_path, mint_file_fact_url, parse_file_fact_url, } from './file_fact_url.js';
|
|
38
|
+
import { is_enospc_error, PayloadTooLargeError, StorageFullError } from './fact_store_errors.js';
|
|
39
|
+
/** Subdirectory under `facts_dir` for in-flight atomic temp files. */
|
|
40
|
+
export const FACT_TMP_DIRNAME = '.tmp';
|
|
41
|
+
/** Default age (1 hour) past which a `.tmp/*` file is considered orphaned. */
|
|
42
|
+
export const FACT_TMP_ORPHAN_MAX_AGE_MS = 60 * 60 * 1000;
|
|
43
|
+
/**
|
|
44
|
+
* Stream `source` to storage with bounded memory: hash BLAKE3 + SHA-256
|
|
45
|
+
* incrementally in one pass, buffer in memory until the bytes cross
|
|
46
|
+
* `embedded_threshold`, then spill the buffer + remaining chunks through a temp
|
|
47
|
+
* file and atomically land it in the disk CAS. Peak heap is
|
|
48
|
+
* `O(chunk + embedded_threshold)`, never `O(artifact)`, so a multi-GB upload
|
|
49
|
+
* never buffers in RAM.
|
|
50
|
+
*
|
|
51
|
+
* - **Embedded vs disk.** A body `<= embedded_threshold` stays in memory and is
|
|
52
|
+
* returned as `{kind: 'embedded'}` for the PG `bytes` column. Above it (with a
|
|
53
|
+
* `facts_dir`), the buffer + remaining chunks spill to `<facts_dir>/.tmp/…`,
|
|
54
|
+
* then `rename` into `<facts_dir>/<shard>/<rest>` once the hash is known —
|
|
55
|
+
* `{kind: 'disk'}`. A body over the threshold with `facts_dir === undefined`
|
|
56
|
+
* throws `PayloadTooLargeError` (matches `PgFactStore.put`).
|
|
57
|
+
* - **Cap enforcement.** Aborts with `PayloadTooLargeError` the moment the
|
|
58
|
+
* running byte count passes `max_bytes` — the mid-stream backstop for a
|
|
59
|
+
* chunked or mis-declared `Content-Length`.
|
|
60
|
+
* - **Disk-full.** An `ENOSPC` from the temp-file write surfaces as
|
|
61
|
+
* `StorageFullError`.
|
|
62
|
+
*
|
|
63
|
+
* @mutates `facts_dir` filesystem
|
|
64
|
+
*/
|
|
65
|
+
export const stream_fact_to_disk = async (deps, facts_dir, source, max_bytes, embedded_threshold) => {
|
|
66
|
+
await blake3_ready;
|
|
67
|
+
const blake3 = new Blake3Hasher();
|
|
68
|
+
const sha256 = createHash('sha256');
|
|
69
|
+
let size = 0;
|
|
70
|
+
// Buffer leading bytes until they cross the embedded threshold; small facts
|
|
71
|
+
// stay embedded (no disk), large ones never buffer past the threshold.
|
|
72
|
+
const buffered = [];
|
|
73
|
+
let buffered_len = 0;
|
|
74
|
+
const reader = source.getReader();
|
|
75
|
+
const hash_and_count = (chunk) => {
|
|
76
|
+
size += chunk.length;
|
|
77
|
+
if (size > max_bytes)
|
|
78
|
+
throw new PayloadTooLargeError(size, max_bytes);
|
|
79
|
+
blake3.update(chunk);
|
|
80
|
+
sha256.update(chunk);
|
|
81
|
+
};
|
|
82
|
+
try {
|
|
83
|
+
// Phase 1: read + hash + buffer until the threshold is crossed or the
|
|
84
|
+
// stream ends. The crossing chunk is hashed + buffered here, then emitted
|
|
85
|
+
// (not re-read) by the spill stream below.
|
|
86
|
+
let spill_needed = false;
|
|
87
|
+
for (;;) {
|
|
88
|
+
const { done, value } = await reader.read();
|
|
89
|
+
if (done)
|
|
90
|
+
break;
|
|
91
|
+
if (!value || value.length === 0)
|
|
92
|
+
continue;
|
|
93
|
+
hash_and_count(value);
|
|
94
|
+
buffered.push(value);
|
|
95
|
+
buffered_len += value.length;
|
|
96
|
+
if (buffered_len > embedded_threshold) {
|
|
97
|
+
spill_needed = true;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (!spill_needed) {
|
|
102
|
+
const hash = (FACT_HASH_PREFIX + to_hex(blake3.finalize()));
|
|
103
|
+
return {
|
|
104
|
+
hash,
|
|
105
|
+
sha256: sha256.digest('hex'),
|
|
106
|
+
size,
|
|
107
|
+
placement: { kind: 'embedded', bytes: concat_chunks(buffered, buffered_len) },
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
if (facts_dir === undefined) {
|
|
111
|
+
// Over the embedded threshold with nowhere to spill — same shape as the
|
|
112
|
+
// `PgFactStore.put` oversize-without-disk_root reject.
|
|
113
|
+
throw new PayloadTooLargeError(size, embedded_threshold);
|
|
114
|
+
}
|
|
115
|
+
// Phase 2: spill. A combined stream emits the already-hashed buffered
|
|
116
|
+
// chunks, then continues pulling from `reader`, hashing each remaining
|
|
117
|
+
// chunk as it flows. `write_file_stream` consumes it with backpressure
|
|
118
|
+
// (peak memory one chunk).
|
|
119
|
+
let buffer_index = 0;
|
|
120
|
+
const combined = new ReadableStream({
|
|
121
|
+
async pull(controller) {
|
|
122
|
+
if (buffer_index < buffered.length) {
|
|
123
|
+
controller.enqueue(buffered[buffer_index++]);
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
for (;;) {
|
|
127
|
+
const { done, value } = await reader.read();
|
|
128
|
+
if (done) {
|
|
129
|
+
controller.close();
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
if (!value || value.length === 0)
|
|
133
|
+
continue;
|
|
134
|
+
try {
|
|
135
|
+
hash_and_count(value);
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
controller.error(err);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
controller.enqueue(value);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
cancel: (reason) => reader.cancel(reason),
|
|
146
|
+
});
|
|
147
|
+
const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
|
|
148
|
+
const tmp_path = join(tmp_dir, `${generate_random_base64url(16)}.tmp`);
|
|
149
|
+
await deps.mkdir(tmp_dir, { recursive: true });
|
|
150
|
+
try {
|
|
151
|
+
await deps.write_file_stream(tmp_path, combined);
|
|
152
|
+
}
|
|
153
|
+
catch (err) {
|
|
154
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
155
|
+
if (is_enospc_error(err))
|
|
156
|
+
throw new StorageFullError(err);
|
|
157
|
+
throw err; // includes a mid-stream PayloadTooLargeError surfaced via the stream
|
|
158
|
+
}
|
|
159
|
+
const hash = (FACT_HASH_PREFIX + to_hex(blake3.finalize()));
|
|
160
|
+
const { shard, rest } = await commit_temp_to_cas(deps, tmp_path, facts_dir, hash);
|
|
161
|
+
return {
|
|
162
|
+
hash,
|
|
163
|
+
sha256: sha256.digest('hex'),
|
|
164
|
+
size,
|
|
165
|
+
placement: { kind: 'disk', external_url: mint_file_fact_url(shard, rest) },
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
finally {
|
|
169
|
+
blake3.free();
|
|
170
|
+
try {
|
|
171
|
+
reader.releaseLock();
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// Already released/cancelled by the spill stream's cancel path.
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
/**
|
|
179
|
+
* Write fully-buffered `bytes` for `hash` to the canonical
|
|
180
|
+
* `<facts_dir>/<shard>/<rest>` path, then publish via `commit_temp_to_cas`
|
|
181
|
+
* (fsync'd temp + atomic rename, dedup-aware). The buffering twin of
|
|
182
|
+
* `stream_fact_to_disk`, used by `PgFactStore.put` for oversize sync bytes.
|
|
183
|
+
* Returns the `file:` `external_url` for the `fact` row.
|
|
184
|
+
*
|
|
185
|
+
* @mutates `facts_dir` filesystem
|
|
186
|
+
*/
|
|
187
|
+
export const write_fact_bytes_to_disk = async (deps, facts_dir, hash, bytes) => {
|
|
188
|
+
const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
|
|
189
|
+
const tmp_path = join(tmp_dir, `${generate_random_base64url(16)}.tmp`);
|
|
190
|
+
await deps.mkdir(tmp_dir, { recursive: true });
|
|
191
|
+
// Write the temp first (mapping disk-full), then publish — the same
|
|
192
|
+
// write-then-commit shape as the streaming twin.
|
|
193
|
+
try {
|
|
194
|
+
await deps.write_file(tmp_path, bytes);
|
|
195
|
+
}
|
|
196
|
+
catch (err) {
|
|
197
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
198
|
+
if (is_enospc_error(err))
|
|
199
|
+
throw new StorageFullError(err);
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
202
|
+
const { shard, rest } = await commit_temp_to_cas(deps, tmp_path, facts_dir, hash);
|
|
203
|
+
return mint_file_fact_url(shard, rest);
|
|
204
|
+
};
|
|
205
|
+
/**
|
|
206
|
+
* `FactExternalFetcher` reading from the `<facts_dir>/<shard>/<rest>` layout the
|
|
207
|
+
* writers above produce, over the injected `*Deps`. Does NOT verify hash content
|
|
208
|
+
* — `PgFactStore.get` calls `fact_hash_verify(hash, bytes)` after the fetch and
|
|
209
|
+
* returns `null` on mismatch.
|
|
210
|
+
*
|
|
211
|
+
* Defense at the read seam is the `FILE_FACT_URL_PATTERN` regex (via
|
|
212
|
+
* `parse_file_fact_url`) — `..` segments, foreign schemes, and non-hex chars
|
|
213
|
+
* fail before any disk access.
|
|
214
|
+
*/
|
|
215
|
+
export const create_disk_fact_fetcher = (deps, facts_dir) => {
|
|
216
|
+
const resolve_path = (url) => {
|
|
217
|
+
const parsed = parse_file_fact_url(url);
|
|
218
|
+
if (!parsed)
|
|
219
|
+
throw new Error(`invalid file fact url: ${url}`);
|
|
220
|
+
return join(facts_dir, parsed.shard, parsed.rest);
|
|
221
|
+
};
|
|
222
|
+
return {
|
|
223
|
+
fetch_bytes: (url) => deps.read_file(resolve_path(url)),
|
|
224
|
+
// `async` funnels a synchronous `resolve_path` throw into a rejection.
|
|
225
|
+
fetch_stream: async (url) => deps.read_file_stream(resolve_path(url)),
|
|
226
|
+
};
|
|
227
|
+
};
|
|
228
|
+
/**
|
|
229
|
+
* Reap stale temp files left under `<facts_dir>/.tmp/` by a hard crash (SIGKILL
|
|
230
|
+
* / OOM / host crash) mid-write — the `finally` cleanup in the writers above
|
|
231
|
+
* never ran. Removes `.tmp` entries whose mtime is older than `max_age_ms` (so
|
|
232
|
+
* an in-flight upload isn't yanked out from under itself). The TS twin of the
|
|
233
|
+
* Rust `sweep_orphan_temps`; call on startup + on an interval.
|
|
234
|
+
*
|
|
235
|
+
* Best-effort: a missing `.tmp/` dir (no oversize upload has ever run) is a
|
|
236
|
+
* no-op; a runtime that doesn't report `mtime_ms` (a mock) leaves every temp
|
|
237
|
+
* untouched; a per-file stat/remove failure is logged and skipped rather than
|
|
238
|
+
* aborting the sweep. Returns the count removed.
|
|
239
|
+
*
|
|
240
|
+
* @mutates `facts_dir` filesystem
|
|
241
|
+
*/
|
|
242
|
+
export const sweep_orphan_temps = async (deps, facts_dir, options) => {
|
|
243
|
+
const max_age_ms = options?.max_age_ms ?? FACT_TMP_ORPHAN_MAX_AGE_MS;
|
|
244
|
+
const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
|
|
245
|
+
let entries;
|
|
246
|
+
try {
|
|
247
|
+
entries = await deps.readdir(tmp_dir);
|
|
248
|
+
}
|
|
249
|
+
catch {
|
|
250
|
+
return 0; // `.tmp/` doesn't exist yet — nothing to sweep.
|
|
251
|
+
}
|
|
252
|
+
const cutoff = Date.now() - max_age_ms;
|
|
253
|
+
let removed = 0;
|
|
254
|
+
for (const entry of entries) {
|
|
255
|
+
if (!entry.endsWith('.tmp'))
|
|
256
|
+
continue;
|
|
257
|
+
const path = join(tmp_dir, entry);
|
|
258
|
+
try {
|
|
259
|
+
const info = await deps.stat(path);
|
|
260
|
+
// Unknown age (missing file, or a runtime that doesn't report mtime) →
|
|
261
|
+
// leave it; never reap something we can't prove is stale.
|
|
262
|
+
if (!info || info.mtime_ms === undefined || info.mtime_ms >= cutoff)
|
|
263
|
+
continue;
|
|
264
|
+
await deps.remove(path);
|
|
265
|
+
removed++;
|
|
266
|
+
}
|
|
267
|
+
catch (err) {
|
|
268
|
+
options?.log?.warn(`sweep_orphan_temps: failed to reap ${path}:`, err instanceof Error ? err.message : String(err));
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return removed;
|
|
272
|
+
};
|
|
273
|
+
/**
|
|
274
|
+
* Publish a written temp file into the CAS at `<facts_dir>/<shard>/<rest>`:
|
|
275
|
+
* `fsync` the temp's data (durability before the rename — the serve path streams
|
|
276
|
+
* the file without re-hashing, so the bytes must be stable before they become
|
|
277
|
+
* the canonical body), then either drop the temp (byte-identical content already
|
|
278
|
+
* present — idempotent dedup) or atomically `rename` it into place. On any
|
|
279
|
+
* failure the temp is unlinked and an `ENOSPC` is surfaced as `StorageFullError`.
|
|
280
|
+
* The single commit path shared by both writers above — twins the Rust `fuz_fact`
|
|
281
|
+
* `SpillFile::rename_into_cas` (data-sync before rename; parent-dir fsync waived).
|
|
282
|
+
*
|
|
283
|
+
* @mutates `facts_dir` filesystem
|
|
284
|
+
*/
|
|
285
|
+
const commit_temp_to_cas = async (deps, tmp_path, facts_dir, hash) => {
|
|
286
|
+
const { shard, rest } = fact_disk_path(hash);
|
|
287
|
+
const final_path = join(facts_dir, shard, rest);
|
|
288
|
+
try {
|
|
289
|
+
await deps.fsync(tmp_path);
|
|
290
|
+
if (await deps.stat(final_path)) {
|
|
291
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
await deps.mkdir(join(facts_dir, shard), { recursive: true });
|
|
295
|
+
await deps.rename(tmp_path, final_path);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
catch (err) {
|
|
299
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
300
|
+
if (is_enospc_error(err))
|
|
301
|
+
throw new StorageFullError(err);
|
|
302
|
+
throw err;
|
|
303
|
+
}
|
|
304
|
+
return { shard, rest };
|
|
305
|
+
};
|
|
306
|
+
/** Concatenate buffered chunks into a single `Uint8Array` of `total` bytes. */
|
|
307
|
+
const concat_chunks = (chunks, total) => {
|
|
308
|
+
const out = new Uint8Array(total);
|
|
309
|
+
let offset = 0;
|
|
310
|
+
for (const chunk of chunks) {
|
|
311
|
+
out.set(chunk, offset);
|
|
312
|
+
offset += chunk.length;
|
|
313
|
+
}
|
|
314
|
+
return out;
|
|
315
|
+
};
|
package/dist/db/fact_store.d.ts
CHANGED
|
@@ -14,21 +14,23 @@
|
|
|
14
14
|
* - mismatched external bytes return `null` + log warning (treat as
|
|
15
15
|
* unavailable; GC / repair is a separate concern)
|
|
16
16
|
*
|
|
17
|
-
* Embedded vs
|
|
18
|
-
* `bytes
|
|
19
|
-
* `
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
17
|
+
* Embedded vs disk split: writes route by size. Bytes `<= embedded_threshold`
|
|
18
|
+
* land in the PG `bytes` column; larger bytes go to the disk CAS at
|
|
19
|
+
* `<facts_dir>/<shard>/<rest>` (`db/fact_disk_storage.ts`) and the row records a
|
|
20
|
+
* `file:<shard>/<rest>` `external_url`. `put` takes fully-buffered bytes;
|
|
21
|
+
* `put_stream` is the bounded-memory streaming twin (hash BLAKE3 + SHA-256 in
|
|
22
|
+
* one pass, spill past the threshold, enforce `max_bytes` / `ENOSPC`). Both need
|
|
23
|
+
* `disk_root` + `fs` (the `runtime/*Deps`) configured for the over-threshold
|
|
24
|
+
* path; without them, an oversize `put` throws and the caller must `put_ref`
|
|
25
|
+
* against an externally-managed URL (federation / stub-fetcher tests).
|
|
25
26
|
*
|
|
26
27
|
* @module
|
|
27
28
|
*/
|
|
28
29
|
import type { QueryDeps } from './query_deps.js';
|
|
29
30
|
import type { Logger } from '@fuzdev/fuz_util/log.js';
|
|
30
31
|
import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
|
|
31
|
-
import type { FactMeta, FactPutOptions, FactStore } from '@fuzdev/fuz_util/fact_store.js';
|
|
32
|
+
import type { FactMeta, FactPutOptions, FactStore, PutStreamOutcome } from '@fuzdev/fuz_util/fact_store.js';
|
|
33
|
+
import { type FactDiskStorageDeps } from './fact_disk_storage.js';
|
|
32
34
|
/** Default embedded-vs-referenced cutoff (1 MiB). */
|
|
33
35
|
export declare const FACT_EMBEDDED_THRESHOLD_DEFAULT: number;
|
|
34
36
|
/** Fetcher abstraction so tests can stub external URL retrieval. */
|
|
@@ -43,16 +45,24 @@ export declare const create_default_fetcher: () => FactExternalFetcher;
|
|
|
43
45
|
*
|
|
44
46
|
* `embedded_threshold` (bytes) is the inline-vs-external cutoff: payloads
|
|
45
47
|
* at or under it store embedded in the `fact` row, larger ones route to
|
|
46
|
-
* the
|
|
48
|
+
* the disk CAS. Defaults to `FACT_EMBEDDED_THRESHOLD_DEFAULT`
|
|
47
49
|
* (1 MiB). Consumers tune it per workload — e.g. a much lower bound
|
|
48
50
|
* (~16 KiB) keeps only small JSON inline and routes image originals +
|
|
49
|
-
* thumbnails
|
|
50
|
-
*
|
|
51
|
-
*
|
|
51
|
+
* thumbnails to disk.
|
|
52
|
+
*
|
|
53
|
+
* `disk_root` is the facts directory backing the `<shard>/<rest>` disk CAS;
|
|
54
|
+
* `fs` supplies the filesystem capabilities (a `RuntimeDeps` satisfies it).
|
|
55
|
+
* When both are set, oversize `put` + `put_stream` write to disk and the
|
|
56
|
+
* default `fetcher` reads from it. When unset, oversize `put`/`put_stream`
|
|
57
|
+
* spill throws and reads fall back to the `globalThis.fetch`-backed default
|
|
58
|
+
* fetcher (or an injected stub). `log` is optional — the only call site is the
|
|
59
|
+
* verify-mismatch warning path.
|
|
52
60
|
*/
|
|
53
61
|
export interface PgFactStoreDeps {
|
|
54
62
|
deps: QueryDeps;
|
|
55
63
|
embedded_threshold?: number;
|
|
64
|
+
disk_root?: string;
|
|
65
|
+
fs?: FactDiskStorageDeps;
|
|
56
66
|
fetcher?: FactExternalFetcher;
|
|
57
67
|
log?: Logger;
|
|
58
68
|
}
|
|
@@ -64,11 +74,32 @@ export declare class PgFactStore implements FactStore {
|
|
|
64
74
|
#private;
|
|
65
75
|
constructor(options: PgFactStoreDeps);
|
|
66
76
|
/**
|
|
67
|
-
* Store
|
|
68
|
-
*
|
|
69
|
-
*
|
|
77
|
+
* Store fully-buffered bytes, routing by size: `<= embedded_threshold` into
|
|
78
|
+
* the PG `bytes` column; larger into the disk CAS (when `disk_root` + `fs`
|
|
79
|
+
* are configured) at `<facts_dir>/<shard>/<rest>` with a `file:` URL. Oversize
|
|
80
|
+
* without a disk root throws so the caller routes it through `put_ref`
|
|
81
|
+
* explicitly. Idempotent — `ON CONFLICT DO NOTHING` + content-addressed disk
|
|
82
|
+
* filenames make a re-write a no-op.
|
|
70
83
|
*/
|
|
71
84
|
put(bytes: Uint8Array, options?: FactPutOptions): Promise<FactHash>;
|
|
85
|
+
/**
|
|
86
|
+
* Stream bytes into the store with bounded memory, returning the finalized
|
|
87
|
+
* digests + size. Delegates the byte path to `stream_fact_to_disk` (hash
|
|
88
|
+
* BLAKE3 + SHA-256 in one pass, buffer to the embedded threshold, spill to the
|
|
89
|
+
* disk CAS), then inserts the `fact` row by placement — embedded bytes go to
|
|
90
|
+
* the PG `bytes` column, disk-spilled bytes record the `file:` `external_url`.
|
|
91
|
+
* The cap is enforced mid-stream (`PayloadTooLargeError`); a disk-full mid-
|
|
92
|
+
* stream throws `StorageFullError`.
|
|
93
|
+
*
|
|
94
|
+
* Refs: explicit `options.refs` are recorded; JSON auto-extraction is NOT
|
|
95
|
+
* attempted (it would need a buffered re-read, defeating the bounded-memory
|
|
96
|
+
* contract) — streamed uploads are opaque blobs.
|
|
97
|
+
*
|
|
98
|
+
* Requires `fs` (and, for the over-threshold spill, `disk_root`) to be
|
|
99
|
+
* configured. The streaming twin of `put`; mirrors the Rust
|
|
100
|
+
* `FactStore::put_stream`.
|
|
101
|
+
*/
|
|
102
|
+
put_stream(stream: ReadableStream<Uint8Array>, max_bytes: number, options?: FactPutOptions): Promise<PutStreamOutcome>;
|
|
72
103
|
/**
|
|
73
104
|
* Stream-hash external content and record `(hash, external_url, size)`.
|
|
74
105
|
* Throws when the streamed byte count disagrees with the caller's
|