@fuzdev/fuz_app 0.81.0 → 0.82.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db/CLAUDE.md +22 -5
- package/dist/db/fact_disk_storage.d.ts +131 -0
- package/dist/db/fact_disk_storage.d.ts.map +1 -0
- package/dist/db/fact_disk_storage.js +315 -0
- package/dist/db/fact_store.d.ts +47 -16
- package/dist/db/fact_store.d.ts.map +1 -1
- package/dist/db/fact_store.js +75 -16
- package/dist/db/fact_store_errors.d.ts +38 -0
- package/dist/db/fact_store_errors.d.ts.map +1 -0
- package/dist/db/fact_store_errors.js +48 -0
- package/dist/{server → db}/file_fact_url.d.ts +22 -9
- package/dist/db/file_fact_url.d.ts.map +1 -0
- package/dist/{server → db}/file_fact_url.js +22 -9
- package/dist/runtime/deno.d.ts.map +1 -1
- package/dist/runtime/deno.js +15 -1
- package/dist/runtime/deps.d.ts +21 -0
- package/dist/runtime/deps.d.ts.map +1 -1
- package/dist/runtime/mock.d.ts.map +1 -1
- package/dist/runtime/mock.js +3 -0
- package/dist/runtime/node.d.ts.map +1 -1
- package/dist/runtime/node.js +17 -1
- package/dist/server/fact_write.js +1 -1
- package/dist/server/file_fact_fetcher.js +1 -1
- package/dist/server/serve_fact_route.js +1 -1
- package/package.json +1 -1
- package/dist/server/file_fact_url.d.ts.map +0 -1
package/dist/db/CLAUDE.md
CHANGED
|
@@ -92,11 +92,28 @@ DO NOTHING`), `_put_fact_refs`, `_get_fact` / `_get_fact_meta` / `_has_fact`
|
|
|
92
92
|
external unlink), and the cell-coupled orphan queries `query_orphan_facts_list`
|
|
93
93
|
/ `_select_for_delete` (a fact is orphan when no active `cell.refs` names it).
|
|
94
94
|
- **`fact_store.ts`** — `PgFactStore implements FactStore` (the interface lives
|
|
95
|
-
in `@fuzdev/fuz_util/fact_store.js`):
|
|
96
|
-
`embedded_threshold
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
`
|
|
95
|
+
in `@fuzdev/fuz_util/fact_store.js`): size-routed writes (embedded ≤
|
|
96
|
+
`embedded_threshold` / disk CAS above it / `put_ref` for an externally-managed
|
|
97
|
+
URL), JSON ref auto-extract, idempotent put, verify-on-read for external
|
|
98
|
+
content via an injected `FactExternalFetcher`. With `disk_root` + `fs` (the
|
|
99
|
+
`runtime/*Deps`) configured, oversize `put` and the streaming `put_stream`
|
|
100
|
+
write to the `<shard>/<rest>` disk CAS and the default fetcher reads from it.
|
|
101
|
+
- **`file_fact_url.ts`** — the canonical `file:<shard>/<rest>` URL shape
|
|
102
|
+
(`FileFactUrl` brand, `mint_file_fact_url` / `parse_file_fact_url` /
|
|
103
|
+
`FILE_FACT_URL_PATTERN`) plus `fact_disk_path(hash) → {shard, rest}`, the
|
|
104
|
+
single source of truth for the on-disk layout (twins the Rust `fuz_fact`).
|
|
105
|
+
- **`fact_disk_storage.ts`** — the filesystem CAS over `runtime/{FsStream,FsWrite,FsRemove,FsRead}Deps`
|
|
106
|
+
(not raw `node:fs`): `stream_fact_to_disk` (bounded-memory blake3+sha256 single
|
|
107
|
+
pass, buffer→spill, fsync-then-atomic-rename, dedup-drop if the CAS path already
|
|
108
|
+
exists), `write_fact_bytes_to_disk` (buffering twin), `create_disk_fact_fetcher`,
|
|
109
|
+
and `sweep_orphan_temps` (reaps stale `.tmp` spills by mtime). The temp is
|
|
110
|
+
`fsync`ed before the rename publishes it (twins the Rust `fuz_fact` §fsync
|
|
111
|
+
posture: data-sync before rename, parent-dir fsync waived) — the serve path
|
|
112
|
+
streams the file without re-hashing, so write-time durability is the guard.
|
|
113
|
+
- **`fact_store_errors.ts`** — `PayloadTooLargeError` / `StorageFullError` (+
|
|
114
|
+
`is_enospc_error`) thrown by `put_stream`, for a consumer route's 413 / 507.
|
|
115
|
+
- The read-side fetcher + write/serve plumbing also live under `server/`
|
|
116
|
+
(`file_fact_fetcher.ts`, `fact_write.ts`, `serve_fact_route.ts`).
|
|
100
117
|
|
|
101
118
|
### Migration namespace order
|
|
102
119
|
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem CAS for externally-stored fact bytes — the disk half of
|
|
3
|
+
* `PgFactStore`, threaded over the injectable `runtime/*Deps` rather than raw
|
|
4
|
+
* `node:fs`, so it runs unchanged under Node, Deno, and a mock runtime.
|
|
5
|
+
*
|
|
6
|
+
* Large facts (over the embedded threshold) live on disk at the canonical
|
|
7
|
+
* sharded layout `<facts_dir>/<shard>/<rest>` — `<shard>` is the first 2 hex
|
|
8
|
+
* chars of the blake3 digest, `<rest>` the remaining 62 — with the `fact` row
|
|
9
|
+
* carrying `external_url = file:<shard>/<rest>` (disk-root-relative). The layout
|
|
10
|
+
* is single-sourced by `fact_disk_path` in `db/file_fact_url.ts`, so the write
|
|
11
|
+
* path here and the URL minted into the row can't drift. The TS twin of the
|
|
12
|
+
* Rust `fuz_fact` disk CAS.
|
|
13
|
+
*
|
|
14
|
+
* Writes land through `<facts_dir>/.tmp/<rand>.tmp`, are `fsync`ed, then
|
|
15
|
+
* `rename`d into the content-addressed final path. The `rename` is atomic on
|
|
16
|
+
* POSIX (a *concurrent reader* observing the path sees either the full content
|
|
17
|
+
* or nothing), but atomicity is not durability — the `fsync` before the rename
|
|
18
|
+
* is what guards against a *host crash* leaving a torn/zero file at a published
|
|
19
|
+
* CAS path, because the serving path streams the hash-named file without
|
|
20
|
+
* re-hashing it (`server/serve_fact_route.ts`). This twins the Rust `fuz_fact`
|
|
21
|
+
* §fsync posture: data-sync before the rename; the parent-dir fsync stays
|
|
22
|
+
* deliberately waived (a lost dirent is regenerable under content addressing).
|
|
23
|
+
* If the final path already exists the temp is dropped instead of renamed over
|
|
24
|
+
* — idempotent dedup (same hash → byte-identical content), mirroring the Rust
|
|
25
|
+
* commit path. `.tmp/` is a sibling of `<shard>/` under the same `facts_dir` so
|
|
26
|
+
* `rename` is always same-filesystem (no EXDEV).
|
|
27
|
+
*
|
|
28
|
+
* @module
|
|
29
|
+
*/
|
|
30
|
+
import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
|
|
31
|
+
import type { Logger } from '@fuzdev/fuz_util/log.js';
|
|
32
|
+
import type { FsReadDeps, FsWriteDeps, FsStreamDeps, FsRemoveDeps } from '../runtime/deps.js';
|
|
33
|
+
import { type FileFactUrl } from './file_fact_url.js';
|
|
34
|
+
import type { FactExternalFetcher } from './fact_store.js';
|
|
35
|
+
/** Subdirectory under `facts_dir` for in-flight atomic temp files. */
|
|
36
|
+
export declare const FACT_TMP_DIRNAME = ".tmp";
|
|
37
|
+
/** Default age (1 hour) past which a `.tmp/*` file is considered orphaned. */
|
|
38
|
+
export declare const FACT_TMP_ORPHAN_MAX_AGE_MS: number;
|
|
39
|
+
/**
|
|
40
|
+
* Filesystem capabilities the disk CAS needs, drawn from `runtime/deps.ts`. A
|
|
41
|
+
* full `RuntimeDeps` (Node or Deno) satisfies this; each function below picks
|
|
42
|
+
* the narrow subset it actually uses.
|
|
43
|
+
*/
|
|
44
|
+
export type FactDiskStorageDeps = Pick<FsReadDeps, 'stat' | 'readdir' | 'read_file'> & Pick<FsWriteDeps, 'mkdir' | 'rename' | 'write_file' | 'fsync'> & Pick<FsStreamDeps, 'write_file_stream' | 'read_file_stream'> & Pick<FsRemoveDeps, 'remove'>;
|
|
45
|
+
/**
|
|
46
|
+
* Where a streamed body landed — `embedded` carries the in-memory bytes (under
|
|
47
|
+
* the embedded threshold, bound for the PG `fact.bytes` column); `disk` means
|
|
48
|
+
* the bytes are already at `<facts_dir>/<shard>/<rest>` and the row carries the
|
|
49
|
+
* `file:` URL.
|
|
50
|
+
*/
|
|
51
|
+
export type StreamPlacement = {
|
|
52
|
+
kind: 'embedded';
|
|
53
|
+
bytes: Uint8Array;
|
|
54
|
+
} | {
|
|
55
|
+
kind: 'disk';
|
|
56
|
+
external_url: FileFactUrl;
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* Outcome of streaming an upload to storage: the `blake3:`-prefixed fact hash,
|
|
60
|
+
* the bare-hex SHA-256, the byte count, and where the bytes landed.
|
|
61
|
+
* `PgFactStore.put_stream` turns this into the `fact` row insert.
|
|
62
|
+
*/
|
|
63
|
+
export interface StreamFactToDiskResult {
|
|
64
|
+
hash: FactHash;
|
|
65
|
+
sha256: string;
|
|
66
|
+
size: number;
|
|
67
|
+
placement: StreamPlacement;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Stream `source` to storage with bounded memory: hash BLAKE3 + SHA-256
|
|
71
|
+
* incrementally in one pass, buffer in memory until the bytes cross
|
|
72
|
+
* `embedded_threshold`, then spill the buffer + remaining chunks through a temp
|
|
73
|
+
* file and atomically land it in the disk CAS. Peak heap is
|
|
74
|
+
* `O(chunk + embedded_threshold)`, never `O(artifact)`, so a multi-GB upload
|
|
75
|
+
* never buffers in RAM.
|
|
76
|
+
*
|
|
77
|
+
* - **Embedded vs disk.** A body `<= embedded_threshold` stays in memory and is
|
|
78
|
+
* returned as `{kind: 'embedded'}` for the PG `bytes` column. Above it (with a
|
|
79
|
+
* `facts_dir`), the buffer + remaining chunks spill to `<facts_dir>/.tmp/…`,
|
|
80
|
+
* then `rename` into `<facts_dir>/<shard>/<rest>` once the hash is known —
|
|
81
|
+
* `{kind: 'disk'}`. A body over the threshold with `facts_dir === undefined`
|
|
82
|
+
* throws `PayloadTooLargeError` (matches `PgFactStore.put`).
|
|
83
|
+
* - **Cap enforcement.** Aborts with `PayloadTooLargeError` the moment the
|
|
84
|
+
* running byte count passes `max_bytes` — the mid-stream backstop for a
|
|
85
|
+
* chunked or mis-declared `Content-Length`.
|
|
86
|
+
* - **Disk-full.** An `ENOSPC` from the temp-file write surfaces as
|
|
87
|
+
* `StorageFullError`.
|
|
88
|
+
*
|
|
89
|
+
* @mutates `facts_dir` filesystem
|
|
90
|
+
*/
|
|
91
|
+
export declare const stream_fact_to_disk: (deps: Pick<FactDiskStorageDeps, "mkdir" | "rename" | "remove" | "write_file_stream" | "fsync" | "stat">, facts_dir: string | undefined, source: ReadableStream<Uint8Array>, max_bytes: number, embedded_threshold: number) => Promise<StreamFactToDiskResult>;
|
|
92
|
+
/**
|
|
93
|
+
* Write fully-buffered `bytes` for `hash` to the canonical
|
|
94
|
+
* `<facts_dir>/<shard>/<rest>` path, then publish via `commit_temp_to_cas`
|
|
95
|
+
* (fsync'd temp + atomic rename, dedup-aware). The buffering twin of
|
|
96
|
+
* `stream_fact_to_disk`, used by `PgFactStore.put` for oversize sync bytes.
|
|
97
|
+
* Returns the `file:` `external_url` for the `fact` row.
|
|
98
|
+
*
|
|
99
|
+
* @mutates `facts_dir` filesystem
|
|
100
|
+
*/
|
|
101
|
+
export declare const write_fact_bytes_to_disk: (deps: Pick<FactDiskStorageDeps, "mkdir" | "rename" | "remove" | "write_file" | "fsync" | "stat">, facts_dir: string, hash: FactHash, bytes: Uint8Array) => Promise<FileFactUrl>;
|
|
102
|
+
/**
|
|
103
|
+
* `FactExternalFetcher` reading from the `<facts_dir>/<shard>/<rest>` layout the
|
|
104
|
+
* writers above produce, over the injected `*Deps`. Does NOT verify hash content
|
|
105
|
+
* — `PgFactStore.get` calls `fact_hash_verify(hash, bytes)` after the fetch and
|
|
106
|
+
* returns `null` on mismatch.
|
|
107
|
+
*
|
|
108
|
+
* Defense at the read seam is the `FILE_FACT_URL_PATTERN` regex (via
|
|
109
|
+
* `parse_file_fact_url`) — `..` segments, foreign schemes, and non-hex chars
|
|
110
|
+
* fail before any disk access.
|
|
111
|
+
*/
|
|
112
|
+
export declare const create_disk_fact_fetcher: (deps: Pick<FactDiskStorageDeps, "read_file" | "read_file_stream">, facts_dir: string) => FactExternalFetcher;
|
|
113
|
+
/**
|
|
114
|
+
* Reap stale temp files left under `<facts_dir>/.tmp/` by a hard crash (SIGKILL
|
|
115
|
+
* / OOM / host crash) mid-write — the `finally` cleanup in the writers above
|
|
116
|
+
* never ran. Removes `.tmp` entries whose mtime is older than `max_age_ms` (so
|
|
117
|
+
* an in-flight upload isn't yanked out from under itself). The TS twin of the
|
|
118
|
+
* Rust `sweep_orphan_temps`; call on startup + on an interval.
|
|
119
|
+
*
|
|
120
|
+
* Best-effort: a missing `.tmp/` dir (no oversize upload has ever run) is a
|
|
121
|
+
* no-op; a runtime that doesn't report `mtime_ms` (a mock) leaves every temp
|
|
122
|
+
* untouched; a per-file stat/remove failure is logged and skipped rather than
|
|
123
|
+
* aborting the sweep. Returns the count removed.
|
|
124
|
+
*
|
|
125
|
+
* @mutates `facts_dir` filesystem
|
|
126
|
+
*/
|
|
127
|
+
export declare const sweep_orphan_temps: (deps: Pick<FactDiskStorageDeps, "readdir" | "stat" | "remove">, facts_dir: string, options?: {
|
|
128
|
+
max_age_ms?: number;
|
|
129
|
+
log?: Pick<Logger, "warn">;
|
|
130
|
+
}) => Promise<number>;
|
|
131
|
+
//# sourceMappingURL=fact_disk_storage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact_disk_storage.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/fact_disk_storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,OAAO,EAAmB,KAAK,QAAQ,EAAC,MAAM,+BAA+B,CAAC;AAC9E,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,yBAAyB,CAAC;AAEpD,OAAO,KAAK,EAAC,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAE5F,OAAO,EAIN,KAAK,WAAW,EAChB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,KAAK,EAAC,mBAAmB,EAAC,MAAM,iBAAiB,CAAC;AAGzD,sEAAsE;AACtE,eAAO,MAAM,gBAAgB,SAAS,CAAC;AAEvC,8EAA8E;AAC9E,eAAO,MAAM,0BAA0B,QAAiB,CAAC;AAEzD;;;;GAIG;AACH,MAAM,MAAM,mBAAmB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,GAAG,SAAS,GAAG,WAAW,CAAC,GACnF,IAAI,CAAC,WAAW,EAAE,OAAO,GAAG,QAAQ,GAAG,YAAY,GAAG,OAAO,CAAC,GAC9D,IAAI,CAAC,YAAY,EAAE,mBAAmB,GAAG,kBAAkB,CAAC,GAC5D,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;AAE9B;;;;;GAKG;AACH,MAAM,MAAM,eAAe,GACxB;IAAC,IAAI,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,UAAU,CAAA;CAAC,GACrC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,WAAW,CAAA;CAAC,CAAC;AAE7C;;;;GAIG;AACH,MAAM,WAAW,sBAAsB;IACtC,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,eAAe,CAAC;CAC3B;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,eAAO,MAAM,mBAAmB,GAC/B,MAAM,IAAI,CACT,mBAAmB,EACnB,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,mBAAmB,GAAG,OAAO,GAAG,MAAM,CACtE,EACD,WAAW,MAAM,GAAG,SAAS,EAC7B,QAAQ,cAAc,CAAC,UAAU,CAAC,EAClC,WAAW,MAAM,EACjB,oBAAoB,MAAM,KACxB,OAAO,CAAC,sBAAsB,CA8GhC,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,IAAI,CAAC,mBAAmB,EAAE,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,YAAY,GAAG,OAAO,GAAG,MAAM,CAAC,EAChG,WAAW,MAAM,EACjB,MAAM,QAAQ,EACd,OAAO,UAAU,KACf,OAAO,CAAC,WAAW,CAgBrB,CAAC;AAEF;;;;;;;;;GASG;AACH,eAAO,MAAM,wBAAwB,GACpC,MAAM,IAAI,CAAC,mBAAmB,EAAE,WAAW,GAAG,kBAAkB,CAAC,EACjE,WAAW,MAAM,KACf,mBAWF,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,eAAO,MAAM,kBAAkB,GAC9B,MAAM,IAAI,CAAC,mBAAmB,EAAE,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC,EAC9D,WAAW,MAAM,EACjB,UAAU;IAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAAC,KACzD,OAAO,CAAC,MAAM,CA6BhB,CAAC"}
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filesystem CAS for externally-stored fact bytes — the disk half of
|
|
3
|
+
* `PgFactStore`, threaded over the injectable `runtime/*Deps` rather than raw
|
|
4
|
+
* `node:fs`, so it runs unchanged under Node, Deno, and a mock runtime.
|
|
5
|
+
*
|
|
6
|
+
* Large facts (over the embedded threshold) live on disk at the canonical
|
|
7
|
+
* sharded layout `<facts_dir>/<shard>/<rest>` — `<shard>` is the first 2 hex
|
|
8
|
+
* chars of the blake3 digest, `<rest>` the remaining 62 — with the `fact` row
|
|
9
|
+
* carrying `external_url = file:<shard>/<rest>` (disk-root-relative). The layout
|
|
10
|
+
* is single-sourced by `fact_disk_path` in `db/file_fact_url.ts`, so the write
|
|
11
|
+
* path here and the URL minted into the row can't drift. The TS twin of the
|
|
12
|
+
* Rust `fuz_fact` disk CAS.
|
|
13
|
+
*
|
|
14
|
+
* Writes land through `<facts_dir>/.tmp/<rand>.tmp`, are `fsync`ed, then
|
|
15
|
+
* `rename`d into the content-addressed final path. The `rename` is atomic on
|
|
16
|
+
* POSIX (a *concurrent reader* observing the path sees either the full content
|
|
17
|
+
* or nothing), but atomicity is not durability — the `fsync` before the rename
|
|
18
|
+
* is what guards against a *host crash* leaving a torn/zero file at a published
|
|
19
|
+
* CAS path, because the serving path streams the hash-named file without
|
|
20
|
+
* re-hashing it (`server/serve_fact_route.ts`). This twins the Rust `fuz_fact`
|
|
21
|
+
* §fsync posture: data-sync before the rename; the parent-dir fsync stays
|
|
22
|
+
* deliberately waived (a lost dirent is regenerable under content addressing).
|
|
23
|
+
* If the final path already exists the temp is dropped instead of renamed over
|
|
24
|
+
* — idempotent dedup (same hash → byte-identical content), mirroring the Rust
|
|
25
|
+
* commit path. `.tmp/` is a sibling of `<shard>/` under the same `facts_dir` so
|
|
26
|
+
* `rename` is always same-filesystem (no EXDEV).
|
|
27
|
+
*
|
|
28
|
+
* @module
|
|
29
|
+
*/
|
|
30
|
+
import { createHash } from 'node:crypto';
|
|
31
|
+
import { join } from 'node:path';
|
|
32
|
+
import { Blake3Hasher } from '@fuzdev/blake3_wasm';
|
|
33
|
+
import { blake3_ready } from '@fuzdev/fuz_util/hash_blake3.js';
|
|
34
|
+
import { to_hex } from '@fuzdev/fuz_util/hex.js';
|
|
35
|
+
import { FACT_HASH_PREFIX } from '@fuzdev/fuz_util/fact_hash.js';
|
|
36
|
+
import { generate_random_base64url } from '../crypto.js';
|
|
37
|
+
import { fact_disk_path, mint_file_fact_url, parse_file_fact_url, } from './file_fact_url.js';
|
|
38
|
+
import { is_enospc_error, PayloadTooLargeError, StorageFullError } from './fact_store_errors.js';
|
|
39
|
+
/** Subdirectory under `facts_dir` for in-flight atomic temp files. */
|
|
40
|
+
export const FACT_TMP_DIRNAME = '.tmp';
|
|
41
|
+
/** Default age (1 hour) past which a `.tmp/*` file is considered orphaned. */
|
|
42
|
+
export const FACT_TMP_ORPHAN_MAX_AGE_MS = 60 * 60 * 1000;
|
|
43
|
+
/**
|
|
44
|
+
* Stream `source` to storage with bounded memory: hash BLAKE3 + SHA-256
|
|
45
|
+
* incrementally in one pass, buffer in memory until the bytes cross
|
|
46
|
+
* `embedded_threshold`, then spill the buffer + remaining chunks through a temp
|
|
47
|
+
* file and atomically land it in the disk CAS. Peak heap is
|
|
48
|
+
* `O(chunk + embedded_threshold)`, never `O(artifact)`, so a multi-GB upload
|
|
49
|
+
* never buffers in RAM.
|
|
50
|
+
*
|
|
51
|
+
* - **Embedded vs disk.** A body `<= embedded_threshold` stays in memory and is
|
|
52
|
+
* returned as `{kind: 'embedded'}` for the PG `bytes` column. Above it (with a
|
|
53
|
+
* `facts_dir`), the buffer + remaining chunks spill to `<facts_dir>/.tmp/…`,
|
|
54
|
+
* then `rename` into `<facts_dir>/<shard>/<rest>` once the hash is known —
|
|
55
|
+
* `{kind: 'disk'}`. A body over the threshold with `facts_dir === undefined`
|
|
56
|
+
* throws `PayloadTooLargeError` (matches `PgFactStore.put`).
|
|
57
|
+
* - **Cap enforcement.** Aborts with `PayloadTooLargeError` the moment the
|
|
58
|
+
* running byte count passes `max_bytes` — the mid-stream backstop for a
|
|
59
|
+
* chunked or mis-declared `Content-Length`.
|
|
60
|
+
* - **Disk-full.** An `ENOSPC` from the temp-file write surfaces as
|
|
61
|
+
* `StorageFullError`.
|
|
62
|
+
*
|
|
63
|
+
* @mutates `facts_dir` filesystem
|
|
64
|
+
*/
|
|
65
|
+
export const stream_fact_to_disk = async (deps, facts_dir, source, max_bytes, embedded_threshold) => {
|
|
66
|
+
await blake3_ready;
|
|
67
|
+
const blake3 = new Blake3Hasher();
|
|
68
|
+
const sha256 = createHash('sha256');
|
|
69
|
+
let size = 0;
|
|
70
|
+
// Buffer leading bytes until they cross the embedded threshold; small facts
|
|
71
|
+
// stay embedded (no disk), large ones never buffer past the threshold.
|
|
72
|
+
const buffered = [];
|
|
73
|
+
let buffered_len = 0;
|
|
74
|
+
const reader = source.getReader();
|
|
75
|
+
const hash_and_count = (chunk) => {
|
|
76
|
+
size += chunk.length;
|
|
77
|
+
if (size > max_bytes)
|
|
78
|
+
throw new PayloadTooLargeError(size, max_bytes);
|
|
79
|
+
blake3.update(chunk);
|
|
80
|
+
sha256.update(chunk);
|
|
81
|
+
};
|
|
82
|
+
try {
|
|
83
|
+
// Phase 1: read + hash + buffer until the threshold is crossed or the
|
|
84
|
+
// stream ends. The crossing chunk is hashed + buffered here, then emitted
|
|
85
|
+
// (not re-read) by the spill stream below.
|
|
86
|
+
let spill_needed = false;
|
|
87
|
+
for (;;) {
|
|
88
|
+
const { done, value } = await reader.read();
|
|
89
|
+
if (done)
|
|
90
|
+
break;
|
|
91
|
+
if (!value || value.length === 0)
|
|
92
|
+
continue;
|
|
93
|
+
hash_and_count(value);
|
|
94
|
+
buffered.push(value);
|
|
95
|
+
buffered_len += value.length;
|
|
96
|
+
if (buffered_len > embedded_threshold) {
|
|
97
|
+
spill_needed = true;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (!spill_needed) {
|
|
102
|
+
const hash = (FACT_HASH_PREFIX + to_hex(blake3.finalize()));
|
|
103
|
+
return {
|
|
104
|
+
hash,
|
|
105
|
+
sha256: sha256.digest('hex'),
|
|
106
|
+
size,
|
|
107
|
+
placement: { kind: 'embedded', bytes: concat_chunks(buffered, buffered_len) },
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
if (facts_dir === undefined) {
|
|
111
|
+
// Over the embedded threshold with nowhere to spill — same shape as the
|
|
112
|
+
// `PgFactStore.put` oversize-without-disk_root reject.
|
|
113
|
+
throw new PayloadTooLargeError(size, embedded_threshold);
|
|
114
|
+
}
|
|
115
|
+
// Phase 2: spill. A combined stream emits the already-hashed buffered
|
|
116
|
+
// chunks, then continues pulling from `reader`, hashing each remaining
|
|
117
|
+
// chunk as it flows. `write_file_stream` consumes it with backpressure
|
|
118
|
+
// (peak memory one chunk).
|
|
119
|
+
let buffer_index = 0;
|
|
120
|
+
const combined = new ReadableStream({
|
|
121
|
+
async pull(controller) {
|
|
122
|
+
if (buffer_index < buffered.length) {
|
|
123
|
+
controller.enqueue(buffered[buffer_index++]);
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
for (;;) {
|
|
127
|
+
const { done, value } = await reader.read();
|
|
128
|
+
if (done) {
|
|
129
|
+
controller.close();
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
if (!value || value.length === 0)
|
|
133
|
+
continue;
|
|
134
|
+
try {
|
|
135
|
+
hash_and_count(value);
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
controller.error(err);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
controller.enqueue(value);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
cancel: (reason) => reader.cancel(reason),
|
|
146
|
+
});
|
|
147
|
+
const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
|
|
148
|
+
const tmp_path = join(tmp_dir, `${generate_random_base64url(16)}.tmp`);
|
|
149
|
+
await deps.mkdir(tmp_dir, { recursive: true });
|
|
150
|
+
try {
|
|
151
|
+
await deps.write_file_stream(tmp_path, combined);
|
|
152
|
+
}
|
|
153
|
+
catch (err) {
|
|
154
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
155
|
+
if (is_enospc_error(err))
|
|
156
|
+
throw new StorageFullError(err);
|
|
157
|
+
throw err; // includes a mid-stream PayloadTooLargeError surfaced via the stream
|
|
158
|
+
}
|
|
159
|
+
const hash = (FACT_HASH_PREFIX + to_hex(blake3.finalize()));
|
|
160
|
+
const { shard, rest } = await commit_temp_to_cas(deps, tmp_path, facts_dir, hash);
|
|
161
|
+
return {
|
|
162
|
+
hash,
|
|
163
|
+
sha256: sha256.digest('hex'),
|
|
164
|
+
size,
|
|
165
|
+
placement: { kind: 'disk', external_url: mint_file_fact_url(shard, rest) },
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
finally {
|
|
169
|
+
blake3.free();
|
|
170
|
+
try {
|
|
171
|
+
reader.releaseLock();
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// Already released/cancelled by the spill stream's cancel path.
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
/**
|
|
179
|
+
* Write fully-buffered `bytes` for `hash` to the canonical
|
|
180
|
+
* `<facts_dir>/<shard>/<rest>` path, then publish via `commit_temp_to_cas`
|
|
181
|
+
* (fsync'd temp + atomic rename, dedup-aware). The buffering twin of
|
|
182
|
+
* `stream_fact_to_disk`, used by `PgFactStore.put` for oversize sync bytes.
|
|
183
|
+
* Returns the `file:` `external_url` for the `fact` row.
|
|
184
|
+
*
|
|
185
|
+
* @mutates `facts_dir` filesystem
|
|
186
|
+
*/
|
|
187
|
+
export const write_fact_bytes_to_disk = async (deps, facts_dir, hash, bytes) => {
|
|
188
|
+
const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
|
|
189
|
+
const tmp_path = join(tmp_dir, `${generate_random_base64url(16)}.tmp`);
|
|
190
|
+
await deps.mkdir(tmp_dir, { recursive: true });
|
|
191
|
+
// Write the temp first (mapping disk-full), then publish — the same
|
|
192
|
+
// write-then-commit shape as the streaming twin.
|
|
193
|
+
try {
|
|
194
|
+
await deps.write_file(tmp_path, bytes);
|
|
195
|
+
}
|
|
196
|
+
catch (err) {
|
|
197
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
198
|
+
if (is_enospc_error(err))
|
|
199
|
+
throw new StorageFullError(err);
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
202
|
+
const { shard, rest } = await commit_temp_to_cas(deps, tmp_path, facts_dir, hash);
|
|
203
|
+
return mint_file_fact_url(shard, rest);
|
|
204
|
+
};
|
|
205
|
+
/**
|
|
206
|
+
* `FactExternalFetcher` reading from the `<facts_dir>/<shard>/<rest>` layout the
|
|
207
|
+
* writers above produce, over the injected `*Deps`. Does NOT verify hash content
|
|
208
|
+
* — `PgFactStore.get` calls `fact_hash_verify(hash, bytes)` after the fetch and
|
|
209
|
+
* returns `null` on mismatch.
|
|
210
|
+
*
|
|
211
|
+
* Defense at the read seam is the `FILE_FACT_URL_PATTERN` regex (via
|
|
212
|
+
* `parse_file_fact_url`) — `..` segments, foreign schemes, and non-hex chars
|
|
213
|
+
* fail before any disk access.
|
|
214
|
+
*/
|
|
215
|
+
export const create_disk_fact_fetcher = (deps, facts_dir) => {
|
|
216
|
+
const resolve_path = (url) => {
|
|
217
|
+
const parsed = parse_file_fact_url(url);
|
|
218
|
+
if (!parsed)
|
|
219
|
+
throw new Error(`invalid file fact url: ${url}`);
|
|
220
|
+
return join(facts_dir, parsed.shard, parsed.rest);
|
|
221
|
+
};
|
|
222
|
+
return {
|
|
223
|
+
fetch_bytes: (url) => deps.read_file(resolve_path(url)),
|
|
224
|
+
// `async` funnels a synchronous `resolve_path` throw into a rejection.
|
|
225
|
+
fetch_stream: async (url) => deps.read_file_stream(resolve_path(url)),
|
|
226
|
+
};
|
|
227
|
+
};
|
|
228
|
+
/**
|
|
229
|
+
* Reap stale temp files left under `<facts_dir>/.tmp/` by a hard crash (SIGKILL
|
|
230
|
+
* / OOM / host crash) mid-write — the `finally` cleanup in the writers above
|
|
231
|
+
* never ran. Removes `.tmp` entries whose mtime is older than `max_age_ms` (so
|
|
232
|
+
* an in-flight upload isn't yanked out from under itself). The TS twin of the
|
|
233
|
+
* Rust `sweep_orphan_temps`; call on startup + on an interval.
|
|
234
|
+
*
|
|
235
|
+
* Best-effort: a missing `.tmp/` dir (no oversize upload has ever run) is a
|
|
236
|
+
* no-op; a runtime that doesn't report `mtime_ms` (a mock) leaves every temp
|
|
237
|
+
* untouched; a per-file stat/remove failure is logged and skipped rather than
|
|
238
|
+
* aborting the sweep. Returns the count removed.
|
|
239
|
+
*
|
|
240
|
+
* @mutates `facts_dir` filesystem
|
|
241
|
+
*/
|
|
242
|
+
export const sweep_orphan_temps = async (deps, facts_dir, options) => {
|
|
243
|
+
const max_age_ms = options?.max_age_ms ?? FACT_TMP_ORPHAN_MAX_AGE_MS;
|
|
244
|
+
const tmp_dir = join(facts_dir, FACT_TMP_DIRNAME);
|
|
245
|
+
let entries;
|
|
246
|
+
try {
|
|
247
|
+
entries = await deps.readdir(tmp_dir);
|
|
248
|
+
}
|
|
249
|
+
catch {
|
|
250
|
+
return 0; // `.tmp/` doesn't exist yet — nothing to sweep.
|
|
251
|
+
}
|
|
252
|
+
const cutoff = Date.now() - max_age_ms;
|
|
253
|
+
let removed = 0;
|
|
254
|
+
for (const entry of entries) {
|
|
255
|
+
if (!entry.endsWith('.tmp'))
|
|
256
|
+
continue;
|
|
257
|
+
const path = join(tmp_dir, entry);
|
|
258
|
+
try {
|
|
259
|
+
const info = await deps.stat(path);
|
|
260
|
+
// Unknown age (missing file, or a runtime that doesn't report mtime) →
|
|
261
|
+
// leave it; never reap something we can't prove is stale.
|
|
262
|
+
if (!info || info.mtime_ms === undefined || info.mtime_ms >= cutoff)
|
|
263
|
+
continue;
|
|
264
|
+
await deps.remove(path);
|
|
265
|
+
removed++;
|
|
266
|
+
}
|
|
267
|
+
catch (err) {
|
|
268
|
+
options?.log?.warn(`sweep_orphan_temps: failed to reap ${path}:`, err instanceof Error ? err.message : String(err));
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return removed;
|
|
272
|
+
};
|
|
273
|
+
/**
|
|
274
|
+
* Publish a written temp file into the CAS at `<facts_dir>/<shard>/<rest>`:
|
|
275
|
+
* `fsync` the temp's data (durability before the rename — the serve path streams
|
|
276
|
+
* the file without re-hashing, so the bytes must be stable before they become
|
|
277
|
+
* the canonical body), then either drop the temp (byte-identical content already
|
|
278
|
+
* present — idempotent dedup) or atomically `rename` it into place. On any
|
|
279
|
+
* failure the temp is unlinked and an `ENOSPC` is surfaced as `StorageFullError`.
|
|
280
|
+
* The single commit path shared by both writers above — twins the Rust `fuz_fact`
|
|
281
|
+
* `SpillFile::rename_into_cas` (data-sync before rename; parent-dir fsync waived).
|
|
282
|
+
*
|
|
283
|
+
* @mutates `facts_dir` filesystem
|
|
284
|
+
*/
|
|
285
|
+
const commit_temp_to_cas = async (deps, tmp_path, facts_dir, hash) => {
|
|
286
|
+
const { shard, rest } = fact_disk_path(hash);
|
|
287
|
+
const final_path = join(facts_dir, shard, rest);
|
|
288
|
+
try {
|
|
289
|
+
await deps.fsync(tmp_path);
|
|
290
|
+
if (await deps.stat(final_path)) {
|
|
291
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
await deps.mkdir(join(facts_dir, shard), { recursive: true });
|
|
295
|
+
await deps.rename(tmp_path, final_path);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
catch (err) {
|
|
299
|
+
await deps.remove(tmp_path).catch(() => undefined);
|
|
300
|
+
if (is_enospc_error(err))
|
|
301
|
+
throw new StorageFullError(err);
|
|
302
|
+
throw err;
|
|
303
|
+
}
|
|
304
|
+
return { shard, rest };
|
|
305
|
+
};
|
|
306
|
+
/** Concatenate buffered chunks into a single `Uint8Array` of `total` bytes. */
|
|
307
|
+
const concat_chunks = (chunks, total) => {
|
|
308
|
+
const out = new Uint8Array(total);
|
|
309
|
+
let offset = 0;
|
|
310
|
+
for (const chunk of chunks) {
|
|
311
|
+
out.set(chunk, offset);
|
|
312
|
+
offset += chunk.length;
|
|
313
|
+
}
|
|
314
|
+
return out;
|
|
315
|
+
};
|
package/dist/db/fact_store.d.ts
CHANGED
|
@@ -14,21 +14,23 @@
|
|
|
14
14
|
* - mismatched external bytes return `null` + log warning (treat as
|
|
15
15
|
* unavailable; GC / repair is a separate concern)
|
|
16
16
|
*
|
|
17
|
-
* Embedded vs
|
|
18
|
-
* `bytes
|
|
19
|
-
* `
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
17
|
+
* Embedded vs disk split: writes route by size. Bytes `<= embedded_threshold`
|
|
18
|
+
* land in the PG `bytes` column; larger bytes go to the disk CAS at
|
|
19
|
+
* `<facts_dir>/<shard>/<rest>` (`db/fact_disk_storage.ts`) and the row records a
|
|
20
|
+
* `file:<shard>/<rest>` `external_url`. `put` takes fully-buffered bytes;
|
|
21
|
+
* `put_stream` is the bounded-memory streaming twin (hash BLAKE3 + SHA-256 in
|
|
22
|
+
* one pass, spill past the threshold, enforce `max_bytes` / `ENOSPC`). Both need
|
|
23
|
+
* `disk_root` + `fs` (the `runtime/*Deps`) configured for the over-threshold
|
|
24
|
+
* path; without them, an oversize `put` throws and the caller must `put_ref`
|
|
25
|
+
* against an externally-managed URL (federation / stub-fetcher tests).
|
|
25
26
|
*
|
|
26
27
|
* @module
|
|
27
28
|
*/
|
|
28
29
|
import type { QueryDeps } from './query_deps.js';
|
|
29
30
|
import type { Logger } from '@fuzdev/fuz_util/log.js';
|
|
30
31
|
import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
|
|
31
|
-
import type { FactMeta, FactPutOptions, FactStore } from '@fuzdev/fuz_util/fact_store.js';
|
|
32
|
+
import type { FactMeta, FactPutOptions, FactStore, PutStreamOutcome } from '@fuzdev/fuz_util/fact_store.js';
|
|
33
|
+
import { type FactDiskStorageDeps } from './fact_disk_storage.js';
|
|
32
34
|
/** Default embedded-vs-referenced cutoff (1 MiB). */
|
|
33
35
|
export declare const FACT_EMBEDDED_THRESHOLD_DEFAULT: number;
|
|
34
36
|
/** Fetcher abstraction so tests can stub external URL retrieval. */
|
|
@@ -43,16 +45,24 @@ export declare const create_default_fetcher: () => FactExternalFetcher;
|
|
|
43
45
|
*
|
|
44
46
|
* `embedded_threshold` (bytes) is the inline-vs-external cutoff: payloads
|
|
45
47
|
* at or under it store embedded in the `fact` row, larger ones route to
|
|
46
|
-
* the
|
|
48
|
+
* the disk CAS. Defaults to `FACT_EMBEDDED_THRESHOLD_DEFAULT`
|
|
47
49
|
* (1 MiB). Consumers tune it per workload — e.g. a much lower bound
|
|
48
50
|
* (~16 KiB) keeps only small JSON inline and routes image originals +
|
|
49
|
-
* thumbnails
|
|
50
|
-
*
|
|
51
|
-
*
|
|
51
|
+
* thumbnails to disk.
|
|
52
|
+
*
|
|
53
|
+
* `disk_root` is the facts directory backing the `<shard>/<rest>` disk CAS;
|
|
54
|
+
* `fs` supplies the filesystem capabilities (a `RuntimeDeps` satisfies it).
|
|
55
|
+
* When both are set, oversize `put` + `put_stream` write to disk and the
|
|
56
|
+
* default `fetcher` reads from it. When unset, oversize `put`/`put_stream`
|
|
57
|
+
* spill throws and reads fall back to the `globalThis.fetch`-backed default
|
|
58
|
+
* fetcher (or an injected stub). `log` is optional — the only call site is the
|
|
59
|
+
* verify-mismatch warning path.
|
|
52
60
|
*/
|
|
53
61
|
export interface PgFactStoreDeps {
|
|
54
62
|
deps: QueryDeps;
|
|
55
63
|
embedded_threshold?: number;
|
|
64
|
+
disk_root?: string;
|
|
65
|
+
fs?: FactDiskStorageDeps;
|
|
56
66
|
fetcher?: FactExternalFetcher;
|
|
57
67
|
log?: Logger;
|
|
58
68
|
}
|
|
@@ -64,11 +74,32 @@ export declare class PgFactStore implements FactStore {
|
|
|
64
74
|
#private;
|
|
65
75
|
constructor(options: PgFactStoreDeps);
|
|
66
76
|
/**
|
|
67
|
-
* Store
|
|
68
|
-
*
|
|
69
|
-
*
|
|
77
|
+
* Store fully-buffered bytes, routing by size: `<= embedded_threshold` into
|
|
78
|
+
* the PG `bytes` column; larger into the disk CAS (when `disk_root` + `fs`
|
|
79
|
+
* are configured) at `<facts_dir>/<shard>/<rest>` with a `file:` URL. Oversize
|
|
80
|
+
* without a disk root throws so the caller routes it through `put_ref`
|
|
81
|
+
* explicitly. Idempotent — `ON CONFLICT DO NOTHING` + content-addressed disk
|
|
82
|
+
* filenames make a re-write a no-op.
|
|
70
83
|
*/
|
|
71
84
|
put(bytes: Uint8Array, options?: FactPutOptions): Promise<FactHash>;
|
|
85
|
+
/**
|
|
86
|
+
* Stream bytes into the store with bounded memory, returning the finalized
|
|
87
|
+
* digests + size. Delegates the byte path to `stream_fact_to_disk` (hash
|
|
88
|
+
* BLAKE3 + SHA-256 in one pass, buffer to the embedded threshold, spill to the
|
|
89
|
+
* disk CAS), then inserts the `fact` row by placement — embedded bytes go to
|
|
90
|
+
* the PG `bytes` column, disk-spilled bytes record the `file:` `external_url`.
|
|
91
|
+
* The cap is enforced mid-stream (`PayloadTooLargeError`); a disk-full mid-
|
|
92
|
+
* stream throws `StorageFullError`.
|
|
93
|
+
*
|
|
94
|
+
* Refs: explicit `options.refs` are recorded; JSON auto-extraction is NOT
|
|
95
|
+
* attempted (it would need a buffered re-read, defeating the bounded-memory
|
|
96
|
+
* contract) — streamed uploads are opaque blobs.
|
|
97
|
+
*
|
|
98
|
+
* Requires `fs` (and, for the over-threshold spill, `disk_root`) to be
|
|
99
|
+
* configured. The streaming twin of `put`; mirrors the Rust
|
|
100
|
+
* `FactStore::put_stream`.
|
|
101
|
+
*/
|
|
102
|
+
put_stream(stream: ReadableStream<Uint8Array>, max_bytes: number, options?: FactPutOptions): Promise<PutStreamOutcome>;
|
|
72
103
|
/**
|
|
73
104
|
* Stream-hash external content and record `(hash, external_url, size)`.
|
|
74
105
|
* Throws when the streamed byte count disagrees with the caller's
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fact_store.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/fact_store.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"fact_store.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/fact_store.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,KAAK,EAAC,SAAS,EAAC,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,yBAAyB,CAAC;AAEpD,OAAO,EAKN,KAAK,QAAQ,EACb,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACX,QAAQ,EACR,cAAc,EACd,SAAS,EACT,gBAAgB,EAChB,MAAM,gCAAgC,CAAC;AAWxC,OAAO,EAIN,KAAK,mBAAmB,EACxB,MAAM,wBAAwB,CAAC;AAEhC,qDAAqD;AACrD,eAAO,MAAM,+BAA+B,QAAc,CAAC;AAE3D,oEAAoE;AACpE,MAAM,WAAW,mBAAmB;IACnC,YAAY,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,CAAC;IACnE,WAAW,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CAClD;AAED,oDAAoD;AACpD,eAAO,MAAM,sBAAsB,QAAO,mBAkBxC,CAAC;AAEH;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,SAAS,CAAC;IAChB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,EAAE,CAAC,EAAE,mBAAmB,CAAC;IACzB,OAAO,CAAC,EAAE,mBAAmB,CAAC;IAC9B,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED;;;GAGG;AACH,qBAAa,WAAY,YAAW,SAAS;;gBAQhC,OAAO,EAAE,eAAe;IAapC;;;;;;;OAOG;IACG,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC;IA6BzE;;;;;;;;;;;;;;;;OAgBG;IACG,UAAU,CACf,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,EAClC,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,cAAc,GACtB,OAAO,CAAC,gBAAgB,CAAC;IA6B5B;;;;;OAKG;IACG,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC;IAqBrF;;;;OAIG;IACG,GAAG,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IA4B/C,GAAG,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC;IAIrC,QAAQ,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC;IAWlD,QAAQ,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAIxD;;;;;;;;;;;;;;;;;;OAkBG;IACG,MAAM,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,GAAG,IAAI,CAAA;KAAC,GAAG,IAAI,CAAC;CAGzF"}
|
package/dist/db/fact_store.js
CHANGED
|
@@ -14,19 +14,21 @@
|
|
|
14
14
|
* - mismatched external bytes return `null` + log warning (treat as
|
|
15
15
|
* unavailable; GC / repair is a separate concern)
|
|
16
16
|
*
|
|
17
|
-
* Embedded vs
|
|
18
|
-
* `bytes
|
|
19
|
-
* `
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
17
|
+
* Embedded vs disk split: writes route by size. Bytes `<= embedded_threshold`
|
|
18
|
+
* land in the PG `bytes` column; larger bytes go to the disk CAS at
|
|
19
|
+
* `<facts_dir>/<shard>/<rest>` (`db/fact_disk_storage.ts`) and the row records a
|
|
20
|
+
* `file:<shard>/<rest>` `external_url`. `put` takes fully-buffered bytes;
|
|
21
|
+
* `put_stream` is the bounded-memory streaming twin (hash BLAKE3 + SHA-256 in
|
|
22
|
+
* one pass, spill past the threshold, enforce `max_bytes` / `ENOSPC`). Both need
|
|
23
|
+
* `disk_root` + `fs` (the `runtime/*Deps`) configured for the over-threshold
|
|
24
|
+
* path; without them, an oversize `put` throws and the caller must `put_ref`
|
|
25
|
+
* against an externally-managed URL (federation / stub-fetcher tests).
|
|
25
26
|
*
|
|
26
27
|
* @module
|
|
27
28
|
*/
|
|
28
29
|
import { fact_hash_bytes, fact_hash_stream, fact_hash_verify, fact_hash_extract_refs, } from '@fuzdev/fuz_util/fact_hash.js';
|
|
29
30
|
import { query_delete_fact, query_get_fact, query_get_fact_meta, query_get_fact_refs, query_has_fact, query_put_fact, query_put_fact_refs, } from './fact_queries.js';
|
|
31
|
+
import { create_disk_fact_fetcher, stream_fact_to_disk, write_fact_bytes_to_disk, } from './fact_disk_storage.js';
|
|
30
32
|
/** Default embedded-vs-referenced cutoff (1 MiB). */
|
|
31
33
|
export const FACT_EMBEDDED_THRESHOLD_DEFAULT = 1024 * 1024;
|
|
32
34
|
/** Default fetcher backed by `globalThis.fetch`. */
|
|
@@ -56,28 +58,49 @@ export const create_default_fetcher = () => ({
|
|
|
56
58
|
export class PgFactStore {
|
|
57
59
|
#deps;
|
|
58
60
|
#embedded_threshold;
|
|
61
|
+
#disk_root;
|
|
62
|
+
#fs;
|
|
59
63
|
#fetcher;
|
|
60
64
|
#log;
|
|
61
65
|
constructor(options) {
|
|
62
66
|
this.#deps = options.deps;
|
|
63
67
|
this.#embedded_threshold = options.embedded_threshold ?? FACT_EMBEDDED_THRESHOLD_DEFAULT;
|
|
64
|
-
this.#
|
|
68
|
+
this.#disk_root = options.disk_root;
|
|
69
|
+
this.#fs = options.fs;
|
|
70
|
+
this.#fetcher =
|
|
71
|
+
options.fetcher ??
|
|
72
|
+
(options.disk_root !== undefined && options.fs !== undefined
|
|
73
|
+
? create_disk_fact_fetcher(options.fs, options.disk_root)
|
|
74
|
+
: create_default_fetcher());
|
|
65
75
|
this.#log = options.log;
|
|
66
76
|
}
|
|
67
77
|
/**
|
|
68
|
-
* Store
|
|
69
|
-
*
|
|
70
|
-
*
|
|
78
|
+
* Store fully-buffered bytes, routing by size: `<= embedded_threshold` into
|
|
79
|
+
* the PG `bytes` column; larger into the disk CAS (when `disk_root` + `fs`
|
|
80
|
+
* are configured) at `<facts_dir>/<shard>/<rest>` with a `file:` URL. Oversize
|
|
81
|
+
* without a disk root throws so the caller routes it through `put_ref`
|
|
82
|
+
* explicitly. Idempotent — `ON CONFLICT DO NOTHING` + content-addressed disk
|
|
83
|
+
* filenames make a re-write a no-op.
|
|
71
84
|
*/
|
|
72
85
|
async put(bytes, options) {
|
|
86
|
+
const hash = fact_hash_bytes(bytes);
|
|
87
|
+
let row_bytes;
|
|
88
|
+
let row_external_url;
|
|
73
89
|
if (bytes.length > this.#embedded_threshold) {
|
|
74
|
-
|
|
90
|
+
if (this.#disk_root === undefined || this.#fs === undefined) {
|
|
91
|
+
throw new Error(`fact bytes exceed embedded threshold (${bytes.length} > ${this.#embedded_threshold}); configure disk_root or use put_ref for external storage`);
|
|
92
|
+
}
|
|
93
|
+
row_bytes = null;
|
|
94
|
+
row_external_url = await write_fact_bytes_to_disk(this.#fs, this.#disk_root, hash, bytes);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
row_bytes = bytes;
|
|
98
|
+
row_external_url = null;
|
|
75
99
|
}
|
|
76
|
-
const hash = fact_hash_bytes(bytes);
|
|
77
100
|
const inserted = await query_put_fact(this.#deps, {
|
|
78
101
|
hash,
|
|
79
|
-
bytes,
|
|
80
|
-
external_url:
|
|
102
|
+
bytes: row_bytes,
|
|
103
|
+
external_url: row_external_url,
|
|
81
104
|
content_type: options?.content_type ?? null,
|
|
82
105
|
size: bytes.length,
|
|
83
106
|
});
|
|
@@ -86,6 +109,42 @@ export class PgFactStore {
|
|
|
86
109
|
}
|
|
87
110
|
return hash;
|
|
88
111
|
}
|
|
112
|
+
/**
|
|
113
|
+
* Stream bytes into the store with bounded memory, returning the finalized
|
|
114
|
+
* digests + size. Delegates the byte path to `stream_fact_to_disk` (hash
|
|
115
|
+
* BLAKE3 + SHA-256 in one pass, buffer to the embedded threshold, spill to the
|
|
116
|
+
* disk CAS), then inserts the `fact` row by placement — embedded bytes go to
|
|
117
|
+
* the PG `bytes` column, disk-spilled bytes record the `file:` `external_url`.
|
|
118
|
+
* The cap is enforced mid-stream (`PayloadTooLargeError`); a disk-full mid-
|
|
119
|
+
* stream throws `StorageFullError`.
|
|
120
|
+
*
|
|
121
|
+
* Refs: explicit `options.refs` are recorded; JSON auto-extraction is NOT
|
|
122
|
+
* attempted (it would need a buffered re-read, defeating the bounded-memory
|
|
123
|
+
* contract) — streamed uploads are opaque blobs.
|
|
124
|
+
*
|
|
125
|
+
* Requires `fs` (and, for the over-threshold spill, `disk_root`) to be
|
|
126
|
+
* configured. The streaming twin of `put`; mirrors the Rust
|
|
127
|
+
* `FactStore::put_stream`.
|
|
128
|
+
*/
|
|
129
|
+
async put_stream(stream, max_bytes, options) {
|
|
130
|
+
if (this.#fs === undefined) {
|
|
131
|
+
throw new Error('PgFactStore.put_stream requires `fs` (FactDiskStorageDeps) to be configured');
|
|
132
|
+
}
|
|
133
|
+
const streamed = await stream_fact_to_disk(this.#fs, this.#disk_root, stream, max_bytes, this.#embedded_threshold);
|
|
134
|
+
const row_bytes = streamed.placement.kind === 'embedded' ? streamed.placement.bytes : null;
|
|
135
|
+
const row_external_url = streamed.placement.kind === 'disk' ? streamed.placement.external_url : null;
|
|
136
|
+
const inserted = await query_put_fact(this.#deps, {
|
|
137
|
+
hash: streamed.hash,
|
|
138
|
+
bytes: row_bytes,
|
|
139
|
+
external_url: row_external_url,
|
|
140
|
+
content_type: options?.content_type ?? null,
|
|
141
|
+
size: streamed.size,
|
|
142
|
+
});
|
|
143
|
+
if (inserted && options?.refs && options.refs.length > 0) {
|
|
144
|
+
await query_put_fact_refs(this.#deps, streamed.hash, options.refs);
|
|
145
|
+
}
|
|
146
|
+
return { hash: streamed.hash, sha256: streamed.sha256, size: streamed.size };
|
|
147
|
+
}
|
|
89
148
|
/**
|
|
90
149
|
* Stream-hash external content and record `(hash, external_url, size)`.
|
|
91
150
|
* Throws when the streamed byte count disagrees with the caller's
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed errors thrown by `PgFactStore.put_stream` so a file-store route can
|
|
3
|
+
* map them to the canonical wire responses.
|
|
4
|
+
*
|
|
5
|
+
* The Rust twin uses `FactError::PayloadTooLarge` / `::StorageFull` (`fuz_fact`);
|
|
6
|
+
* these TS classes carry the same two cases so the upload handler can branch
|
|
7
|
+
* identically and return the same status + body shape (`413` / `507`).
|
|
8
|
+
*
|
|
9
|
+
* @module
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* The streamed upload exceeded the byte cap. Thrown by `put_stream` when its
|
|
13
|
+
* mid-stream counter passes `max_bytes` — the backstop for a chunked or
|
|
14
|
+
* mis-declared `Content-Length` that the cheap header pre-check can't catch.
|
|
15
|
+
* A consumer route maps this to `413`.
|
|
16
|
+
*/
|
|
17
|
+
export declare class PayloadTooLargeError extends Error {
|
|
18
|
+
/** Bytes read before the cap tripped (may exceed `max_bytes` by one chunk). */
|
|
19
|
+
readonly bytes_read: number;
|
|
20
|
+
readonly max_bytes: number;
|
|
21
|
+
constructor(bytes_read: number, max_bytes: number);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* The disk filled mid-stream (`ENOSPC`). Thrown by `put_stream` when the
|
|
25
|
+
* temp-file write fails for lack of space — the real disk-full guarantee that
|
|
26
|
+
* a best-effort free-space preflight can't promise (chunked uploads, TOCTOU
|
|
27
|
+
* races). A consumer route maps this to `507`.
|
|
28
|
+
*/
|
|
29
|
+
export declare class StorageFullError extends Error {
|
|
30
|
+
constructor(cause?: unknown);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Whether a thrown value is a Node filesystem `ENOSPC` (no space left on
|
|
34
|
+
* device). Used by the streaming disk write to translate the raw FS error
|
|
35
|
+
* into a `StorageFullError`.
|
|
36
|
+
*/
|
|
37
|
+
export declare const is_enospc_error: (err: unknown) => boolean;
|
|
38
|
+
//# sourceMappingURL=fact_store_errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact_store_errors.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/fact_store_errors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH;;;;;GAKG;AACH,qBAAa,oBAAqB,SAAQ,KAAK;IAC9C,+EAA+E;IAC/E,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;gBACf,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;CAMjD;AAED;;;;;GAKG;AACH,qBAAa,gBAAiB,SAAQ,KAAK;gBAC9B,KAAK,CAAC,EAAE,OAAO;CAI3B;AAED;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,KAAK,OAAO,KAAG,OAIH,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed errors thrown by `PgFactStore.put_stream` so a file-store route can
|
|
3
|
+
* map them to the canonical wire responses.
|
|
4
|
+
*
|
|
5
|
+
* The Rust twin uses `FactError::PayloadTooLarge` / `::StorageFull` (`fuz_fact`);
|
|
6
|
+
* these TS classes carry the same two cases so the upload handler can branch
|
|
7
|
+
* identically and return the same status + body shape (`413` / `507`).
|
|
8
|
+
*
|
|
9
|
+
* @module
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* The streamed upload exceeded the byte cap. Thrown by `put_stream` when its
|
|
13
|
+
* mid-stream counter passes `max_bytes` — the backstop for a chunked or
|
|
14
|
+
* mis-declared `Content-Length` that the cheap header pre-check can't catch.
|
|
15
|
+
* A consumer route maps this to `413`.
|
|
16
|
+
*/
|
|
17
|
+
export class PayloadTooLargeError extends Error {
|
|
18
|
+
/** Bytes read before the cap tripped (may exceed `max_bytes` by one chunk). */
|
|
19
|
+
bytes_read;
|
|
20
|
+
max_bytes;
|
|
21
|
+
constructor(bytes_read, max_bytes) {
|
|
22
|
+
super(`payload too large: read ${bytes_read} bytes, exceeds ${max_bytes} byte limit`);
|
|
23
|
+
this.name = 'PayloadTooLargeError';
|
|
24
|
+
this.bytes_read = bytes_read;
|
|
25
|
+
this.max_bytes = max_bytes;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* The disk filled mid-stream (`ENOSPC`). Thrown by `put_stream` when the
|
|
30
|
+
* temp-file write fails for lack of space — the real disk-full guarantee that
|
|
31
|
+
* a best-effort free-space preflight can't promise (chunked uploads, TOCTOU
|
|
32
|
+
* races). A consumer route maps this to `507`.
|
|
33
|
+
*/
|
|
34
|
+
export class StorageFullError extends Error {
|
|
35
|
+
constructor(cause) {
|
|
36
|
+
super('storage_full', cause === undefined ? undefined : { cause });
|
|
37
|
+
this.name = 'StorageFullError';
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Whether a thrown value is a Node filesystem `ENOSPC` (no space left on
|
|
42
|
+
* device). Used by the streaming disk write to translate the raw FS error
|
|
43
|
+
* into a `StorageFullError`.
|
|
44
|
+
*/
|
|
45
|
+
export const is_enospc_error = (err) => typeof err === 'object' &&
|
|
46
|
+
err !== null &&
|
|
47
|
+
'code' in err &&
|
|
48
|
+
err.code === 'ENOSPC';
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Canonical filesystem-fact URL shape.
|
|
2
|
+
* Canonical filesystem-fact URL shape + on-disk layout.
|
|
3
3
|
*
|
|
4
|
-
* `external_url` on the generic `
|
|
4
|
+
* `external_url` on the generic `fact` row is `string | null` because the
|
|
5
5
|
* `FactStore` interface stays federation-friendly (future
|
|
6
6
|
* `https://...` / `s3://...` shapes). Filesystem-minted URLs are exactly
|
|
7
7
|
* `file:<shard>/<rest>` where `<shard>` is the first 2 hex chars of the
|
|
@@ -9,10 +9,11 @@
|
|
|
9
9
|
* `<facts_dir>/<shard>/<rest>` after the writer atomically temp+renames
|
|
10
10
|
* them in.
|
|
11
11
|
*
|
|
12
|
-
* Centralizing the regex keeps the shape in one
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
12
|
+
* Centralizing the regex + the `fact_disk_path` split keeps the shape in one
|
|
13
|
+
* place: `PgFactStore`'s disk CAS (`db/fact_disk_storage.ts`), the
|
|
14
|
+
* `serve_fact_route` defense-in-depth check, and the `file_fact_fetcher`
|
|
15
|
+
* resolver all derive the layout here, so the write path and the read path
|
|
16
|
+
* can't drift. The TS twin of the Rust `fact_disk_path` (`fuz_fact`).
|
|
16
17
|
*
|
|
17
18
|
* Defense-in-depth: a `..` segment can't match (`.` isn't in `[0-9a-f]`),
|
|
18
19
|
* neither can absolute paths, query strings, or any non-hex character.
|
|
@@ -21,6 +22,7 @@
|
|
|
21
22
|
*
|
|
22
23
|
* @module
|
|
23
24
|
*/
|
|
25
|
+
import { type FactHash } from '@fuzdev/fuz_util/fact_hash.js';
|
|
24
26
|
import { z } from 'zod';
|
|
25
27
|
/** Anchored, capture-group form: `^file:(<shard>)/(<rest>)$`. */
|
|
26
28
|
export declare const FILE_FACT_URL_PATTERN: RegExp;
|
|
@@ -33,6 +35,17 @@ export declare const FileFactUrl: z.core.$ZodBranded<z.ZodString, "FileFactUrl",
|
|
|
33
35
|
export type FileFactUrl = z.infer<typeof FileFactUrl>;
|
|
34
36
|
/** Type guard. Useful when discriminating a `string | null` column. */
|
|
35
37
|
export declare const is_file_fact_url: (s: string) => s is FileFactUrl;
|
|
38
|
+
/**
|
|
39
|
+
* Split a `FactHash` into its on-disk `<shard>/<rest>` parts — the first 2
|
|
40
|
+
* hex chars of the digest (shard subdir) + the remaining 62. The single
|
|
41
|
+
* source of truth for the disk layout, so the write path (`put` /
|
|
42
|
+
* `put_stream`) and the URL minted into the `fact` row can't disagree.
|
|
43
|
+
* Mirrors the Rust `fact_disk_path` in `fuz_fact`.
|
|
44
|
+
*/
|
|
45
|
+
export declare const fact_disk_path: (hash: FactHash) => {
|
|
46
|
+
shard: string;
|
|
47
|
+
rest: string;
|
|
48
|
+
};
|
|
36
49
|
/**
|
|
37
50
|
* Validate a string against the canonical shape. Returns the branded URL
|
|
38
51
|
* plus its parsed parts, or `null` on shape mismatch — callers decide
|
|
@@ -45,9 +58,9 @@ export declare const parse_file_fact_url: (url: string) => {
|
|
|
45
58
|
} | null;
|
|
46
59
|
/**
|
|
47
60
|
* Construct a canonical `file:<shard>/<rest>` URL. The writer side
|
|
48
|
-
* (`
|
|
49
|
-
* hash
|
|
50
|
-
* a single edit.
|
|
61
|
+
* (`db/fact_disk_storage.ts`) assembles the shape from a freshly-computed
|
|
62
|
+
* hash via `fact_disk_path`; this helper centralizes the literal so a
|
|
63
|
+
* future shape change is a single edit.
|
|
51
64
|
*/
|
|
52
65
|
export declare const mint_file_fact_url: (shard: string, rest: string) => FileFactUrl;
|
|
53
66
|
//# sourceMappingURL=file_fact_url.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file_fact_url.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/db/file_fact_url.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EAAmB,KAAK,QAAQ,EAAC,MAAM,+BAA+B,CAAC;AAC9E,OAAO,EAAC,CAAC,EAAC,MAAM,KAAK,CAAC;AAEtB,iEAAiE;AACjE,eAAO,MAAM,qBAAqB,QAAyC,CAAC;AAE5E;;;;GAIG;AACH,eAAO,MAAM,WAAW,uDAA+D,CAAC;AACxF,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC;AAEtD,uEAAuE;AACvE,eAAO,MAAM,gBAAgB,GAAI,GAAG,MAAM,KAAG,CAAC,IAAI,WAA4C,CAAC;AAE/F;;;;;;GAMG;AACH,eAAO,MAAM,cAAc,GAAI,MAAM,QAAQ,KAAG;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAG3E,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,mBAAmB,GAC/B,KAAK,MAAM,KACT;IAAC,GAAG,EAAE,WAAW,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAC,GAAG,IAIpD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,kBAAkB,GAAI,OAAO,MAAM,EAAE,MAAM,MAAM,KAAG,WAC1B,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Canonical filesystem-fact URL shape.
|
|
2
|
+
* Canonical filesystem-fact URL shape + on-disk layout.
|
|
3
3
|
*
|
|
4
|
-
* `external_url` on the generic `
|
|
4
|
+
* `external_url` on the generic `fact` row is `string | null` because the
|
|
5
5
|
* `FactStore` interface stays federation-friendly (future
|
|
6
6
|
* `https://...` / `s3://...` shapes). Filesystem-minted URLs are exactly
|
|
7
7
|
* `file:<shard>/<rest>` where `<shard>` is the first 2 hex chars of the
|
|
@@ -9,10 +9,11 @@
|
|
|
9
9
|
* `<facts_dir>/<shard>/<rest>` after the writer atomically temp+renames
|
|
10
10
|
* them in.
|
|
11
11
|
*
|
|
12
|
-
* Centralizing the regex keeps the shape in one
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
12
|
+
* Centralizing the regex + the `fact_disk_path` split keeps the shape in one
|
|
13
|
+
* place: `PgFactStore`'s disk CAS (`db/fact_disk_storage.ts`), the
|
|
14
|
+
* `serve_fact_route` defense-in-depth check, and the `file_fact_fetcher`
|
|
15
|
+
* resolver all derive the layout here, so the write path and the read path
|
|
16
|
+
* can't drift. The TS twin of the Rust `fact_disk_path` (`fuz_fact`).
|
|
16
17
|
*
|
|
17
18
|
* Defense-in-depth: a `..` segment can't match (`.` isn't in `[0-9a-f]`),
|
|
18
19
|
* neither can absolute paths, query strings, or any non-hex character.
|
|
@@ -21,6 +22,7 @@
|
|
|
21
22
|
*
|
|
22
23
|
* @module
|
|
23
24
|
*/
|
|
25
|
+
import { FACT_HASH_PREFIX } from '@fuzdev/fuz_util/fact_hash.js';
|
|
24
26
|
import { z } from 'zod';
|
|
25
27
|
/** Anchored, capture-group form: `^file:(<shard>)/(<rest>)$`. */
|
|
26
28
|
export const FILE_FACT_URL_PATTERN = /^file:([0-9a-f]{2})\/([0-9a-f]{62})$/;
|
|
@@ -32,6 +34,17 @@ export const FILE_FACT_URL_PATTERN = /^file:([0-9a-f]{2})\/([0-9a-f]{62})$/;
|
|
|
32
34
|
export const FileFactUrl = z.string().regex(FILE_FACT_URL_PATTERN).brand('FileFactUrl');
|
|
33
35
|
/** Type guard. Useful when discriminating a `string | null` column. */
|
|
34
36
|
export const is_file_fact_url = (s) => FILE_FACT_URL_PATTERN.test(s);
|
|
37
|
+
/**
|
|
38
|
+
* Split a `FactHash` into its on-disk `<shard>/<rest>` parts — the first 2
|
|
39
|
+
* hex chars of the digest (shard subdir) + the remaining 62. The single
|
|
40
|
+
* source of truth for the disk layout, so the write path (`put` /
|
|
41
|
+
* `put_stream`) and the URL minted into the `fact` row can't disagree.
|
|
42
|
+
* Mirrors the Rust `fact_disk_path` in `fuz_fact`.
|
|
43
|
+
*/
|
|
44
|
+
export const fact_disk_path = (hash) => {
|
|
45
|
+
const hex = hash.slice(FACT_HASH_PREFIX.length);
|
|
46
|
+
return { shard: hex.slice(0, 2), rest: hex.slice(2) };
|
|
47
|
+
};
|
|
35
48
|
/**
|
|
36
49
|
* Validate a string against the canonical shape. Returns the branded URL
|
|
37
50
|
* plus its parsed parts, or `null` on shape mismatch — callers decide
|
|
@@ -45,8 +58,8 @@ export const parse_file_fact_url = (url) => {
|
|
|
45
58
|
};
|
|
46
59
|
/**
|
|
47
60
|
* Construct a canonical `file:<shard>/<rest>` URL. The writer side
|
|
48
|
-
* (`
|
|
49
|
-
* hash
|
|
50
|
-
* a single edit.
|
|
61
|
+
* (`db/fact_disk_storage.ts`) assembles the shape from a freshly-computed
|
|
62
|
+
* hash via `fact_disk_path`; this helper centralizes the literal so a
|
|
63
|
+
* future shape change is a single edit.
|
|
51
64
|
*/
|
|
52
65
|
export const mint_file_fact_url = (shard, rest) => `file:${shard}/${rest}`;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"deno.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/deno.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAC,WAAW,EAA4B,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"deno.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/deno.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAC,WAAW,EAA4B,MAAM,WAAW,CAAC;AA6DtE;;;;;;;;GAQG;AACH,eAAO,MAAM,mBAAmB,GAAI,MAAM,aAAa,CAAC,MAAM,CAAC,KAAG,WAgJhE,CAAC"}
|
package/dist/runtime/deno.js
CHANGED
|
@@ -29,7 +29,12 @@ export const create_deno_runtime = (args) => ({
|
|
|
29
29
|
stat: async (path) => {
|
|
30
30
|
try {
|
|
31
31
|
const s = await Deno.stat(path);
|
|
32
|
-
return {
|
|
32
|
+
return {
|
|
33
|
+
is_file: s.isFile,
|
|
34
|
+
is_directory: s.isDirectory,
|
|
35
|
+
size: s.size,
|
|
36
|
+
mtime_ms: s.mtime?.getTime(),
|
|
37
|
+
};
|
|
33
38
|
}
|
|
34
39
|
catch {
|
|
35
40
|
return null;
|
|
@@ -75,6 +80,15 @@ export const create_deno_runtime = (args) => ({
|
|
|
75
80
|
write_text_file: (path, content) => Deno.writeTextFile(path, content),
|
|
76
81
|
write_file: (path, data) => Deno.writeFile(path, data),
|
|
77
82
|
rename: (old_path, new_path) => Deno.rename(old_path, new_path),
|
|
83
|
+
fsync: async (path) => {
|
|
84
|
+
const file = await Deno.open(path, { read: true });
|
|
85
|
+
try {
|
|
86
|
+
await file.sync();
|
|
87
|
+
}
|
|
88
|
+
finally {
|
|
89
|
+
file.close();
|
|
90
|
+
}
|
|
91
|
+
},
|
|
78
92
|
remove: (path, options) => Deno.remove(path, options),
|
|
79
93
|
// === HTTP ===
|
|
80
94
|
fetch: globalThis.fetch,
|
package/dist/runtime/deps.d.ts
CHANGED
|
@@ -24,6 +24,15 @@ export interface StatResult {
|
|
|
24
24
|
* `Content-Length`) read it from a real runtime, where it is always present.
|
|
25
25
|
*/
|
|
26
26
|
size?: number;
|
|
27
|
+
/**
|
|
28
|
+
* Last-modification time in epoch milliseconds, when the runtime reports it.
|
|
29
|
+
* Populated by `create_node_runtime` / `create_deno_runtime`;
|
|
30
|
+
* `create_mock_runtime` omits it (so a mock-backed sweep treats every temp as
|
|
31
|
+
* unknown-age and never reaps). Optional so loose test stubs that only assert
|
|
32
|
+
* `is_file` / `is_directory` don't have to supply it. The orphan-temp sweep
|
|
33
|
+
* (`db/fact_disk_storage.ts`) reads it to age out stale `.tmp` spill files.
|
|
34
|
+
*/
|
|
35
|
+
mtime_ms?: number;
|
|
27
36
|
}
|
|
28
37
|
/**
|
|
29
38
|
* Result of executing a command.
|
|
@@ -105,6 +114,18 @@ export interface FsWriteDeps {
|
|
|
105
114
|
write_file: (path: string, data: Uint8Array) => Promise<void>;
|
|
106
115
|
/** Rename (move) a file. */
|
|
107
116
|
rename: (old_path: string, new_path: string) => Promise<void>;
|
|
117
|
+
/**
|
|
118
|
+
* Flush a file's data to stable storage (fsync). Call on a temp file after
|
|
119
|
+
* writing it and *before* `rename`-ing it into place when the renamed path is
|
|
120
|
+
* later served without re-verification — otherwise a host crash after the
|
|
121
|
+
* rename can surface a torn/zero file as authentic content. The fact disk CAS
|
|
122
|
+
* (`db/fact_disk_storage.ts`) is the one such path; it twins the Rust
|
|
123
|
+
* `fuz_fact` §fsync posture (data-sync before rename; the parent-dir fsync
|
|
124
|
+
* stays deliberately waived — a lost dirent is regenerable under content
|
|
125
|
+
* addressing). Real runtimes open the path, fsync, and close;
|
|
126
|
+
* `create_mock_runtime` no-ops (it models no durability).
|
|
127
|
+
*/
|
|
128
|
+
fsync: (path: string) => Promise<void>;
|
|
108
129
|
}
|
|
109
130
|
/**
|
|
110
131
|
* Streaming file I/O — read a file as a byte stream, or write a byte stream to
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"deps.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/deps.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,OAAO,CAAC;IACtB;;;;;;;;;OASG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"deps.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/deps.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,OAAO,CAAC;IACtB;;;;;;;;;OASG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;;;;OAOG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,+CAA+C;IAC/C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,kFAAkF;IAClF,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACvB,yCAAyC;IACzC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAAG,SAAS,CAAC;IAC9C,mCAAmC;IACnC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CAC/C;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACxC,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,0EAA0E;IAC1E,SAAS,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,+DAA+D;IAC/D,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC;IACnD,8DAA8D;IAC9D,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IAClD,+DAA+D;IAC/D,SAAS,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;IACjD;;;;;;OAMG;IACH,qBAAqB,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;IAC3F,8FAA8F;IAC9F,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;CAClD;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,0BAA0B;IAC1B,KAAK,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAC,SAAS,CAAC,EAAE,OAAO,CAAA;KAAC,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACxE,4BAA4B;IAC5B,eAAe,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAClE,6BAA6B;IAC7B,UAAU,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9D,4BAA4B;IAC5B,MAAM,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9D;;;;;;;;;;OAUG;IACH,KAAK,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CACvC;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,YAAY;IAC5B;;;;;OAKG;IACH,gBAAgB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,CAAC;IACxE;;;;;OAKG;IACH,iBAAiB,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,cAAc,CAAC,UAAU,CAAC,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CACrF;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,kCAAkC;IAClC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAC,SAAS,CAAC,EAAE,OAAO,CAAA;KAAC,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CACzE;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B;;;;;;;OAOG;IACH,WAAW,EAAE,CACZ,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,CAAC,EAAE,iBAAiB,KACvB,OAAO,CAAC,aAAa,CAAC,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB,yDAAyD;IACzD,KAAK,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACvB,6BAA6B;IAC7B,IAAI,EAAE,CAAC,GAAG,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC;CACxC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC5B,6BAA6B;IAC7B,YAAY,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IACpD,6CAA6C;IAC7C,UAAU,EAAE,CAAC,MAAM,EAAE,UAAU,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;CAC3D;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC3B,oCAAoC;IACpC,IAAI,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,KAAK,CAAC;CAC9B;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAChB,SACC,OAAO,EACP,UAAU,EACV,WAAW,EACX,YAAY,EACZ,YAAY,EACZ,WAAW,EACX,SAAS,EACT,YAAY,EACZ,WAAW,EACX,OAAO;IACR,qCAAqC;IACrC,OAAO,EAAE,MAAM,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,2CAA2C;IAC3C,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IACrC,qCAAqC;IACrC,GAAG,EAAE,MAAM,MAAM,CAAC;IAClB,qFAAqF;IACrF,mBAAmB,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CAC3E"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mock.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/mock.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAC,WAAW,EAAc,aAAa,EAAE,iBAAiB,EAAC,MAAM,WAAW,CAAC;AAIzF;;GAEG;AACH,MAAM,WAAW,WAAY,SAAQ,WAAW;IAC/C,kCAAkC;IAClC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,0CAA0C;IAC1C,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,+CAA+C;IAC/C,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACvC,mCAAmC;IACnC,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACvB,wCAAwC;IACxC,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1B,gGAAgG;IAChG,aAAa,EAAE,KAAK,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;KAAC,CAAC,CAAC;IACtF,sCAAsC;IACtC,qBAAqB,EAAE,KAAK,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;KAAC,CAAC,CAAC;IACjE,8BAA8B;IAC9B,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7B,4CAA4C;IAC5C,oBAAoB,EAAE,GAAG,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IACjD,yCAAyC;IACzC,YAAY,EAAE,UAAU,GAAG,IAAI,CAAC;IAChC,4BAA4B;IAC5B,WAAW,EAAE,KAAK,CAAC;QAAC,KAAK,EAAE,MAAM,GAAG,GAAG,GAAG,OAAO,CAAC;QAAC,IAAI,CAAC,EAAE,WAAW,CAAA;KAAC,CAAC,CAAC;IACxE,wDAAwD;IACxD,oBAAoB,EAAE,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;CAC5C;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,mBAAmB,GAAI,OAAM,KAAK,CAAC,MAAM,CAAM,KAAG,
|
|
1
|
+
{"version":3,"file":"mock.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/mock.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAC,WAAW,EAAc,aAAa,EAAE,iBAAiB,EAAC,MAAM,WAAW,CAAC;AAIzF;;GAEG;AACH,MAAM,WAAW,WAAY,SAAQ,WAAW;IAC/C,kCAAkC;IAClC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,0CAA0C;IAC1C,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,+CAA+C;IAC/C,aAAa,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACvC,mCAAmC;IACnC,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACvB,wCAAwC;IACxC,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1B,gGAAgG;IAChG,aAAa,EAAE,KAAK,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;KAAC,CAAC,CAAC;IACtF,sCAAsC;IACtC,qBAAqB,EAAE,KAAK,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,CAAA;KAAC,CAAC,CAAC;IACjE,8BAA8B;IAC9B,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7B,4CAA4C;IAC5C,oBAAoB,EAAE,GAAG,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IACjD,yCAAyC;IACzC,YAAY,EAAE,UAAU,GAAG,IAAI,CAAC;IAChC,4BAA4B;IAC5B,WAAW,EAAE,KAAK,CAAC;QAAC,KAAK,EAAE,MAAM,GAAG,GAAG,GAAG,OAAO,CAAC;QAAC,IAAI,CAAC,EAAE,WAAW,CAAA;KAAC,CAAC,CAAC;IACxE,wDAAwD;IACxD,oBAAoB,EAAE,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;CAC5C;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,mBAAmB,GAAI,OAAM,KAAK,CAAC,MAAM,CAAM,KAAG,WAuR9D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,kBAAkB,GAAI,SAAS,WAAW,KAAG,IAazD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,cAAc,GAAI,SAAS,WAAW,EAAE,OAAO,MAAM,KAAG,IAEpE,CAAC;AAEF;;;;GAIG;AACH,qBAAa,aAAc,SAAQ,KAAK;IACvC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;gBAEV,IAAI,EAAE,MAAM;CAKxB"}
|
package/dist/runtime/mock.js
CHANGED
|
@@ -222,6 +222,9 @@ export const create_mock_runtime = (args = []) => {
|
|
|
222
222
|
}
|
|
223
223
|
mock_fs_bytes.set(path, merged);
|
|
224
224
|
},
|
|
225
|
+
// The mock models no real disk, so durability is a no-op (mirrors how it
|
|
226
|
+
// omits `mtime_ms`). The fact disk CAS sweep + fsync stay deps-based.
|
|
227
|
+
fsync: async () => { },
|
|
225
228
|
rename: async (old_path, new_path) => {
|
|
226
229
|
const content = mock_fs.get(old_path);
|
|
227
230
|
if (content !== undefined) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"node.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/node.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAWH,OAAO,KAAK,EAAC,WAAW,EAA4B,MAAM,WAAW,CAAC;AAEtE;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,GAC/B,OAAM,aAAa,CAAC,MAAM,CAAyB,KACjD,
|
|
1
|
+
{"version":3,"file":"node.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/runtime/node.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAWH,OAAO,KAAK,EAAC,WAAW,EAA4B,MAAM,WAAW,CAAC;AAEtE;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,GAC/B,OAAM,aAAa,CAAC,MAAM,CAAyB,KACjD,WAkMD,CAAC"}
|
package/dist/runtime/node.js
CHANGED
|
@@ -34,7 +34,12 @@ export const create_node_runtime = (args = process.argv.slice(2)) => ({
|
|
|
34
34
|
stat: async (path) => {
|
|
35
35
|
try {
|
|
36
36
|
const s = await stat(path);
|
|
37
|
-
return {
|
|
37
|
+
return {
|
|
38
|
+
is_file: s.isFile(),
|
|
39
|
+
is_directory: s.isDirectory(),
|
|
40
|
+
size: s.size,
|
|
41
|
+
mtime_ms: s.mtimeMs,
|
|
42
|
+
};
|
|
38
43
|
}
|
|
39
44
|
catch {
|
|
40
45
|
return null;
|
|
@@ -82,6 +87,17 @@ export const create_node_runtime = (args = process.argv.slice(2)) => ({
|
|
|
82
87
|
write_text_file: (path, content) => writeFile(path, content, 'utf-8'),
|
|
83
88
|
write_file: (path, data) => writeFile(path, data),
|
|
84
89
|
rename: (old_path, new_path) => rename(old_path, new_path),
|
|
90
|
+
fsync: async (path) => {
|
|
91
|
+
// fsync flushes the inode's dirty pages regardless of the fd's open mode,
|
|
92
|
+
// so a read handle is enough (and needs no write permission).
|
|
93
|
+
const handle = await open(path, 'r');
|
|
94
|
+
try {
|
|
95
|
+
await handle.sync();
|
|
96
|
+
}
|
|
97
|
+
finally {
|
|
98
|
+
await handle.close();
|
|
99
|
+
}
|
|
100
|
+
},
|
|
85
101
|
remove: (path, options) => rm(path, options),
|
|
86
102
|
// === HTTP ===
|
|
87
103
|
fetch: globalThis.fetch,
|
|
@@ -14,7 +14,7 @@ import { randomBytes } from 'node:crypto';
|
|
|
14
14
|
import { writeFile, rename, mkdir, unlink } from 'node:fs/promises';
|
|
15
15
|
import { join } from 'node:path';
|
|
16
16
|
import { FACT_HASH_PREFIX, fact_hash_bytes } from '@fuzdev/fuz_util/fact_hash.js';
|
|
17
|
-
import { mint_file_fact_url } from '
|
|
17
|
+
import { mint_file_fact_url } from '../db/file_fact_url.js';
|
|
18
18
|
/**
|
|
19
19
|
* Write `bytes` as a fact, choosing embedded (PG) vs external (disk +
|
|
20
20
|
* `put_ref`) based on `embedded_threshold`. Returns the canonical
|
|
@@ -27,7 +27,7 @@ import { readFile } from 'node:fs/promises';
|
|
|
27
27
|
import { createReadStream } from 'node:fs';
|
|
28
28
|
import { Readable } from 'node:stream';
|
|
29
29
|
import { join } from 'node:path';
|
|
30
|
-
import { parse_file_fact_url } from '
|
|
30
|
+
import { parse_file_fact_url } from '../db/file_fact_url.js';
|
|
31
31
|
/**
|
|
32
32
|
* Build a `FactExternalFetcher` that resolves `file:` URLs against the
|
|
33
33
|
* filesystem. Throws on a malformed URL before touching the disk so
|
|
@@ -89,7 +89,7 @@ import { query_get_fact, query_get_fact_meta } from '../db/fact_queries.js';
|
|
|
89
89
|
import { query_cell_get } from '../db/cell_queries.js';
|
|
90
90
|
import { query_cell_grant_list_for_cell } from '../db/cell_grant_queries.js';
|
|
91
91
|
import { can_view_cell } from '../auth/cell_authorize.js';
|
|
92
|
-
import { parse_file_fact_url } from '
|
|
92
|
+
import { parse_file_fact_url } from '../db/file_fact_url.js';
|
|
93
93
|
/** `Cache-Control` for fact responses — 5 min revocation window. */
|
|
94
94
|
const CACHE_CONTROL = 'private, max-age=300';
|
|
95
95
|
/**
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"file_fact_url.d.ts","sourceRoot":"../src/lib/","sources":["../../src/lib/server/file_fact_url.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAC,CAAC,EAAC,MAAM,KAAK,CAAC;AAEtB,iEAAiE;AACjE,eAAO,MAAM,qBAAqB,QAAyC,CAAC;AAE5E;;;;GAIG;AACH,eAAO,MAAM,WAAW,uDAA+D,CAAC;AACxF,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC;AAEtD,uEAAuE;AACvE,eAAO,MAAM,gBAAgB,GAAI,GAAG,MAAM,KAAG,CAAC,IAAI,WAA4C,CAAC;AAE/F;;;;GAIG;AACH,eAAO,MAAM,mBAAmB,GAC/B,KAAK,MAAM,KACT;IAAC,GAAG,EAAE,WAAW,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAC,GAAG,IAIpD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,kBAAkB,GAAI,OAAO,MAAM,EAAE,MAAM,MAAM,KAAG,WAC1B,CAAC"}
|