@blamejs/core 0.9.42 → 0.9.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/storage.js CHANGED
@@ -41,6 +41,8 @@ var C = require("./constants");
41
41
  var { generateBytes, encryptPacked, decryptPacked } = require("./crypto");
42
42
  var objectStore = require("./object-store");
43
43
  var lazyRequire = require("./lazy-require");
44
+ var numericBounds = require("./numeric-bounds");
45
+ var canonicalJson = require("./canonical-json");
44
46
  var { StorageError } = require("./framework-error");
45
47
 
46
48
  var vault = lazyRequire(function () { return require("./vault"); });
@@ -827,6 +829,420 @@ function _requireInit() {
827
829
  if (!initialized) throw _err("NOT_INITIALIZED", "storage.init() must be called before any file operation", true);
828
830
  }
829
831
 
832
+ // ---- chunk-scratch -------------------------------------------------
833
+ //
834
+ // Resumable-chunked-upload primitive. Operators handling large file
835
+ // uploads (multipart form / tus / S3-multipart-style flow) need to
836
+ // persist incoming chunks during the upload window, then assemble
837
+ // them into a final file when the upload completes. Without a
838
+ // framework primitive every consumer ended up reinventing the
839
+ // per-assembly directory layout + atomic finalize + GC of partial
840
+ // assemblies that never completed.
841
+ //
842
+ // chunkScratch owns:
843
+ // - per-assembly directory layout (sealed in the operator's
844
+ // storage backend just like saveFile)
845
+ // - chunk persistence + retrieval with the framework envelope
846
+ // - assembly metadata tracking createdAt/totalChunks/chunkHashes
847
+ // - atomic concat into the final file (no consumer ever sees a
848
+ // half-assembled file)
849
+ // - GC of stale partial assemblies (operator opts in via gc())
850
+ //
851
+ // Backend is the same `b.storage` backend the operator already
852
+ // configured — chunkScratch routes through it. No new backend
853
+ // concept. The chunk keys are namespaced under
854
+ // `<rootKeyPrefix>/<assemblyId>/<chunkIndex>` so the operator can
855
+ // see them via the backend's existing list/inspect surface.
856
+ //
857
+ // assemblyId is operator-supplied (typically a UUID tied to the
858
+ // upload session). Shape is validated to refuse path-traversal,
859
+ // slash/backslash, NUL/C0/DEL, oversize. The chunkScratch primitive
860
+ // is identity-agnostic — it doesn't know which user owns which
861
+ // assembly; that gate is the operator's surrounding handler.
862
+
863
+ var ASSEMBLY_ID_MAX_LEN = 128;
864
+ var CHUNK_INDEX_MAX = 100000; // allow:raw-byte-literal — chunk-index cap (not bytes, not seconds)
865
+ var CHUNK_BYTES_DEFAULT = C.BYTES.mib(16);
866
+ var STALE_DEFAULT_MS = C.TIME.hours(24);
867
+
868
+ function _stripTrailingSlashes(s) {
869
+ // Linear-time alternative to `.replace(/\/+$/, "")` — CodeQL flags the
870
+ // regex form as polynomial-ReDoS-vulnerable on inputs with many
871
+ // trailing slashes (theoretical here since rootKeyPrefix is operator-
872
+ // supplied at create-time, not request-bound, but using the explicit
873
+ // loop avoids the regex-engine backtracking surface entirely).
874
+ var end = s.length;
875
+ while (end > 0 && s.charCodeAt(end - 1) === 0x2F /* / */) end -= 1;
876
+ return end === s.length ? s : s.slice(0, end);
877
+ }
878
+
879
+ function _validateAssemblyId(id) {
880
+ if (typeof id !== "string" || id.length === 0) {
881
+ throw _err("INVALID_ARGUMENT", "chunkScratch: assemblyId must be a non-empty string", true);
882
+ }
883
+ if (id.length > ASSEMBLY_ID_MAX_LEN) {
884
+ throw _err("INVALID_ARGUMENT",
885
+ "chunkScratch: assemblyId exceeds " + ASSEMBLY_ID_MAX_LEN + "-char cap", true);
886
+ }
887
+ for (var i = 0; i < id.length; i += 1) {
888
+ var c = id.charCodeAt(i);
889
+ // Refuse: C0 (0x00-0x1F), DEL (0x7F), slash, backslash, dot-prefix
890
+ if (c < 0x20 || c === 0x2F || c === 0x5C || c === 0x7F) {
891
+ throw _err("INVALID_ARGUMENT",
892
+ "chunkScratch: assemblyId carries forbidden character at byte " + i, true);
893
+ }
894
+ }
895
+ // Refuse path-traversal shapes — operator-supplied ID should be a
896
+ // UUID-shape or opaque session token, not a path.
897
+ if (id.indexOf("..") !== -1 || id.charAt(0) === ".") {
898
+ throw _err("INVALID_ARGUMENT",
899
+ "chunkScratch: assemblyId carries path-traversal shape", true);
900
+ }
901
+ }
902
+
903
+ function _validateChunkIndex(idx) {
904
+ if (typeof idx !== "number" || !Number.isInteger(idx) || idx < 0) {
905
+ throw _err("INVALID_ARGUMENT",
906
+ "chunkScratch: chunkIndex must be a non-negative integer", true);
907
+ }
908
+ if (idx >= CHUNK_INDEX_MAX) {
909
+ throw _err("INVALID_ARGUMENT",
910
+ "chunkScratch: chunkIndex exceeds cap " + CHUNK_INDEX_MAX, true);
911
+ }
912
+ }
913
+
914
+ /**
915
+ * @primitive b.storage.chunkScratch
916
+ * @signature b.storage.chunkScratch(opts?)
917
+ * @since 0.9.44
918
+ * @status stable
919
+ * @related b.storage.saveFile, b.storage.getFileBuffer
920
+ *
921
+ * Resumable-chunked-upload primitive. Persists incoming upload chunks
922
+ * during the upload window + atomically assembles them into the
923
+ * final file on completion. Owns per-assembly directory layout,
924
+ * envelope-encrypted chunk persistence, atomic finalize, and GC of
925
+ * partial assemblies.
926
+ *
927
+ * Composes existing primitives: each chunk routes through
928
+ * `b.storage.saveFile` (same XChaCha20-Poly1305 envelope as the
929
+ * non-chunked surface), assembly reads through `getFileBuffer`,
930
+ * deletion through `deleteFile`. No new crypto.
931
+ *
932
+ * Prior art / wire-protocol references:
933
+ * - tus.io v1.0.0 protocol (Termination + Creation + Concatenation
934
+ * extensions) — operator-facing HTTP shape that ships chunks
935
+ * against a server-side assembly. This primitive is the
936
+ * server-side persistence the tus protocol's upload handler
937
+ * consumes.
938
+ * - RFC 9110 §14.4 Content-Range — the wire-protocol header that
939
+ * PUT/PATCH-based resumable uploads use to declare each chunk's
940
+ * byte-range within the assembly.
941
+ * - draft-ietf-httpbis-resumable-upload-08 — IETF working-draft
942
+ * resumable-upload protocol; this primitive's surface mirrors
943
+ * its server-side state requirements.
944
+ * - AWS S3 Multipart Upload — the cloud-vendor analogue;
945
+ * `saveChunk` / `assemble` are the framework's local equivalents
946
+ * of UploadPart / CompleteMultipartUpload.
947
+ *
948
+ * Threat-model coverage:
949
+ * - Path-traversal in upload paths (CVE-2018-1000656 class) —
950
+ * `assemblyId` is validated to refuse `..`, `/`, `\`, NUL / C0
951
+ * controls, DEL, dot-prefix, and oversize. A hostile client
952
+ * can't escape the rootKeyPrefix namespace.
953
+ * - Chunk-out-of-order replay / TOCTOU between saveChunk and
954
+ * assemble — `assemble` verifies monotonic 0..N-1 indices and
955
+ * refuses on gaps; a chunk inserted out-of-order can't be
956
+ * surfaced as a valid assembly.
957
+ * - Storage exhaustion from abandoned uploads — `gc({ olderThanMs })`
958
+ * prunes stale assemblies; operator wires it on a schedule.
959
+ * - AEAD context-binding — each chunk's encryption envelope is
960
+ * keyed independently; an attacker who guesses one chunk's key
961
+ * can't decrypt other chunks in the same assembly (the
962
+ * XChaCha20-Poly1305 keys are framework-vault-derived per-call).
963
+ *
964
+ * assemblyId shape is validated to refuse path-traversal, control
965
+ * chars, and oversize at every entry point.
966
+ *
967
+ * @opts
968
+ * rootKeyPrefix: string, // default "chunk-scratch" — namespace under the backend
969
+ * backend: string, // explicit backend by name (default: framework default)
970
+ * maxChunkBytes: number, // default 16 MiB — per-chunk cap
971
+ * staleAfterMs: number, // default 24h — assemblies idle longer get GC'd
972
+ *
973
+ * @example
974
+ * b.storage.init({ backend: "local", uploadDir: "./data/uploads" });
975
+ * var cs = b.storage.chunkScratch({ rootKeyPrefix: "uploads/scratch" });
976
+ *
977
+ * // During upload — each PUT lands one chunk
978
+ * await cs.saveChunk({ assemblyId: "upload-abc", chunkIndex: 0, data: chunk0 });
979
+ * await cs.saveChunk({ assemblyId: "upload-abc", chunkIndex: 1, data: chunk1 });
980
+ * await cs.saveChunk({ assemblyId: "upload-abc", chunkIndex: 2, data: chunk2 });
981
+ *
982
+ * // On completion — atomic assemble + cleanup
983
+ * var assembled = await cs.assemble({ assemblyId: "upload-abc", expectedTotal: 3 });
984
+ * await cs.removeAssembly("upload-abc");
985
+ *
986
+ * // Periodic GC of partial uploads abandoned mid-stream
987
+ * var removed = await cs.gc({ olderThanMs: 86400000 });
988
+ */
989
+ function chunkScratch(opts) {
990
+ _requireInit();
991
+ opts = opts || {};
992
+ var rootKeyPrefix = typeof opts.rootKeyPrefix === "string" && opts.rootKeyPrefix.length > 0
993
+ ? _stripTrailingSlashes(opts.rootKeyPrefix)
994
+ : "chunk-scratch";
995
+ numericBounds.requirePositiveFiniteIntIfPresent(
996
+ opts.maxChunkBytes, "chunkScratch.maxChunkBytes", StorageError, "INVALID_ARGUMENT");
997
+ numericBounds.requirePositiveFiniteIntIfPresent(
998
+ opts.staleAfterMs, "chunkScratch.staleAfterMs", StorageError, "INVALID_ARGUMENT");
999
+ var maxChunkBytes = opts.maxChunkBytes !== undefined ? opts.maxChunkBytes : CHUNK_BYTES_DEFAULT;
1000
+ var staleAfterMs = opts.staleAfterMs !== undefined ? opts.staleAfterMs : STALE_DEFAULT_MS;
1001
+ var backendOverride = opts.backend;
1002
+
1003
+ function _chunkKey(assemblyId, chunkIndex) {
1004
+ return rootKeyPrefix + "/" + assemblyId + "/" + String(chunkIndex).padStart(8, "0") + ".chunk"; // allow:raw-byte-literal — 8-digit zero-pad covers CHUNK_INDEX_MAX
1005
+ }
1006
+ function _pickOpts() {
1007
+ return backendOverride ? { backend: backendOverride } : {};
1008
+ }
1009
+
1010
+ async function saveChunk(args) {
1011
+ if (!args || typeof args !== "object") {
1012
+ throw _err("INVALID_ARGUMENT", "chunkScratch.saveChunk: args must be an object", true);
1013
+ }
1014
+ _validateAssemblyId(args.assemblyId);
1015
+ _validateChunkIndex(args.chunkIndex);
1016
+ if (!Buffer.isBuffer(args.data)) {
1017
+ throw _err("INVALID_ARGUMENT", "chunkScratch.saveChunk: data must be a Buffer", true);
1018
+ }
1019
+ if (args.data.length > maxChunkBytes) {
1020
+ throw _err("INVALID_ARGUMENT",
1021
+ "chunkScratch.saveChunk: chunk exceeds maxChunkBytes (" + args.data.length + " > " + maxChunkBytes + ")", true);
1022
+ }
1023
+ var saved = await saveFile(args.data, _chunkKey(args.assemblyId, args.chunkIndex), _pickOpts());
1024
+ _emit("system.storage.chunk_scratch.chunk_saved", {
1025
+ metadata: {
1026
+ assemblyId: args.assemblyId,
1027
+ chunkIndex: args.chunkIndex,
1028
+ sizeBytes: args.data.length,
1029
+ backend: saved.backend,
1030
+ },
1031
+ });
1032
+ return { encryptionKey: saved.encryptionKey, sizeBytes: args.data.length };
1033
+ }
1034
+
1035
+ async function getChunk(args) {
1036
+ if (!args || typeof args !== "object") {
1037
+ throw _err("INVALID_ARGUMENT", "chunkScratch.getChunk: args must be an object", true);
1038
+ }
1039
+ _validateAssemblyId(args.assemblyId);
1040
+ _validateChunkIndex(args.chunkIndex);
1041
+ if (typeof args.encryptionKey !== "string" || args.encryptionKey.length === 0) {
1042
+ throw _err("INVALID_ARGUMENT", "chunkScratch.getChunk: encryptionKey required", true);
1043
+ }
1044
+ return getFileBuffer(_chunkKey(args.assemblyId, args.chunkIndex),
1045
+ args.encryptionKey, _pickOpts());
1046
+ }
1047
+
1048
+ async function chunkExists(args) {
1049
+ _validateAssemblyId(args.assemblyId);
1050
+ _validateChunkIndex(args.chunkIndex);
1051
+ return exists(_chunkKey(args.assemblyId, args.chunkIndex), _pickOpts());
1052
+ }
1053
+
1054
+ async function listChunks(assemblyId) {
1055
+ _validateAssemblyId(assemblyId);
1056
+ var picked = _pickBackend(_pickOpts());
1057
+ if (typeof picked.backend.list !== "function") {
1058
+ throw _err("UNSUPPORTED",
1059
+ "chunkScratch.listChunks: backend '" + picked.backend.name + "' does not implement list()", true);
1060
+ }
1061
+ var prefix = rootKeyPrefix + "/" + assemblyId + "/";
1062
+ var listRes = await picked.backend.list(prefix);
1063
+ var items = listRes && Array.isArray(listRes.items) ? listRes.items
1064
+ : Array.isArray(listRes) ? listRes : [];
1065
+ var indices = [];
1066
+ for (var i = 0; i < items.length; i += 1) {
1067
+ // Backends return either { key, size, lastModified } objects
1068
+ // (local + S3 + GCS) or bare key strings. Normalize.
1069
+ var item = items[i];
1070
+ var rawKey = typeof item === "string" ? item : item && item.key;
1071
+ if (typeof rawKey !== "string") continue;
1072
+ // The local backend's `list(prefix)` returns keys relative to
1073
+ // the prefix; cloud backends return absolute keys. Normalize
1074
+ // by stripping the prefix when present.
1075
+ var base = rawKey.indexOf(prefix) === 0 ? rawKey.slice(prefix.length) : rawKey;
1076
+ if (base === ".meta" || base.indexOf("/") !== -1) continue;
1077
+ if (!/^[0-9]{1,8}\.chunk$/.test(base)) continue;
1078
+ indices.push(parseInt(base.slice(0, -6), 10));
1079
+ }
1080
+ indices.sort(function (a, b) { return a - b; });
1081
+ return indices;
1082
+ }
1083
+
1084
+ async function countChunks(assemblyId) {
1085
+ var indices = await listChunks(assemblyId);
1086
+ return indices.length;
1087
+ }
1088
+
1089
+ async function removeChunk(args) {
1090
+ _validateAssemblyId(args.assemblyId);
1091
+ _validateChunkIndex(args.chunkIndex);
1092
+ return deleteFile(_chunkKey(args.assemblyId, args.chunkIndex), _pickOpts());
1093
+ }
1094
+
1095
+ async function assemble(args) {
1096
+ if (!args || typeof args !== "object") {
1097
+ throw _err("INVALID_ARGUMENT", "chunkScratch.assemble: args must be an object", true);
1098
+ }
1099
+ _validateAssemblyId(args.assemblyId);
1100
+ if (!Array.isArray(args.chunkEncryptionKeys) || args.chunkEncryptionKeys.length === 0) {
1101
+ throw _err("INVALID_ARGUMENT",
1102
+ "chunkScratch.assemble: chunkEncryptionKeys must be a non-empty array (one per chunk in order)", true);
1103
+ }
1104
+ var indices = await listChunks(args.assemblyId);
1105
+ if (typeof args.expectedTotal === "number" && indices.length !== args.expectedTotal) {
1106
+ throw _err("INCOMPLETE_ASSEMBLY",
1107
+ "chunkScratch.assemble: have " + indices.length + " chunks; expected " + args.expectedTotal, true);
1108
+ }
1109
+ if (indices.length !== args.chunkEncryptionKeys.length) {
1110
+ throw _err("INVALID_ARGUMENT",
1111
+ "chunkScratch.assemble: chunkEncryptionKeys.length (" + args.chunkEncryptionKeys.length +
1112
+ ") must match chunk count (" + indices.length + ")", true);
1113
+ }
1114
+ // Verify monotonic 0..N-1 indices — no gaps.
1115
+ for (var i = 0; i < indices.length; i += 1) {
1116
+ if (indices[i] !== i) {
1117
+ throw _err("INCOMPLETE_ASSEMBLY",
1118
+ "chunkScratch.assemble: chunk gap at index " + i + " (found " + indices[i] + ")", true);
1119
+ }
1120
+ }
1121
+ // Concatenate in order. Each chunk decrypts via its own envelope
1122
+ // key; the operator persisted the per-chunk key when saveChunk
1123
+ // returned it.
1124
+ var parts = [];
1125
+ var totalBytes = 0;
1126
+ for (var c = 0; c < indices.length; c += 1) {
1127
+ var buf = await getChunk({
1128
+ assemblyId: args.assemblyId,
1129
+ chunkIndex: c,
1130
+ encryptionKey: args.chunkEncryptionKeys[c],
1131
+ });
1132
+ parts.push(buf);
1133
+ totalBytes += buf.length;
1134
+ }
1135
+ _emit("system.storage.chunk_scratch.assembled", {
1136
+ metadata: {
1137
+ assemblyId: args.assemblyId,
1138
+ chunkCount: indices.length,
1139
+ sizeBytes: totalBytes,
1140
+ },
1141
+ });
1142
+ return Buffer.concat(parts, totalBytes);
1143
+ }
1144
+
1145
+ async function removeAssembly(assemblyId) {
1146
+ _validateAssemblyId(assemblyId);
1147
+ var indices = await listChunks(assemblyId);
1148
+ var removed = 0;
1149
+ for (var i = 0; i < indices.length; i += 1) {
1150
+ try { await removeChunk({ assemblyId: assemblyId, chunkIndex: indices[i] }); removed += 1; }
1151
+ catch (_e) { /* best-effort */ }
1152
+ }
1153
+ _emit("system.storage.chunk_scratch.removed", {
1154
+ metadata: { assemblyId: assemblyId, chunksRemoved: removed },
1155
+ });
1156
+ return { chunksRemoved: removed };
1157
+ }
1158
+
1159
+ async function listAssemblies() {
1160
+ var picked = _pickBackend(_pickOpts());
1161
+ if (typeof picked.backend.list !== "function") {
1162
+ throw _err("UNSUPPORTED",
1163
+ "chunkScratch.listAssemblies: backend '" + picked.backend.name + "' does not implement list()", true);
1164
+ }
1165
+ var listRes = await picked.backend.list(rootKeyPrefix + "/");
1166
+ var items = listRes && Array.isArray(listRes.items) ? listRes.items
1167
+ : Array.isArray(listRes) ? listRes : [];
1168
+ var ids = {};
1169
+ var prefixWithSlash = rootKeyPrefix + "/";
1170
+ for (var i = 0; i < items.length; i += 1) {
1171
+ var item = items[i];
1172
+ var rawKey = typeof item === "string" ? item : item && item.key;
1173
+ if (typeof rawKey !== "string") continue;
1174
+ var rel = rawKey.indexOf(prefixWithSlash) === 0
1175
+ ? rawKey.slice(prefixWithSlash.length) : rawKey;
1176
+ var slash = rel.indexOf("/");
1177
+ if (slash === -1) continue;
1178
+ ids[rel.slice(0, slash)] = true;
1179
+ }
1180
+ return canonicalJson.sortKeys(ids);
1181
+ }
1182
+
1183
+ async function listStaleAssemblies(args) {
1184
+ args = args || {};
1185
+ var olderThan = (typeof args.olderThanMs === "number" && args.olderThanMs > 0)
1186
+ ? args.olderThanMs : staleAfterMs;
1187
+ var cutoff = Date.now() - olderThan;
1188
+ var picked = _pickBackend(_pickOpts());
1189
+ if (typeof picked.backend.list !== "function") {
1190
+ throw _err("UNSUPPORTED",
1191
+ "chunkScratch.listStaleAssemblies: backend does not implement list()", true);
1192
+ }
1193
+ var assemblies = await listAssemblies();
1194
+ var stale = [];
1195
+ for (var i = 0; i < assemblies.length; i += 1) {
1196
+ var assemblyId = assemblies[i];
1197
+ // Use the earliest chunk's mtime as the assembly's createdAt
1198
+ // proxy. Backends that surface mtime via list() inspect items;
1199
+ // others fall through to a stat probe on the first chunk.
1200
+ var indices = await listChunks(assemblyId);
1201
+ if (indices.length === 0) { stale.push(assemblyId); continue; }
1202
+ var firstKey = _chunkKey(assemblyId, indices[0]);
1203
+ var stat = null;
1204
+ if (typeof picked.backend.stat === "function") {
1205
+ try { stat = await picked.backend.stat(firstKey); } catch (_e) { stat = null; }
1206
+ }
1207
+ var mtime = stat && (stat.mtimeMs || (stat.mtime && stat.mtime.getTime && stat.mtime.getTime()));
1208
+ if (typeof mtime === "number" && mtime < cutoff) {
1209
+ stale.push(assemblyId);
1210
+ }
1211
+ }
1212
+ return stale;
1213
+ }
1214
+
1215
+ async function gc(args) {
1216
+ args = args || {};
1217
+ var stale = await listStaleAssemblies({ olderThanMs: args.olderThanMs });
1218
+ var removed = [];
1219
+ for (var i = 0; i < stale.length; i += 1) {
1220
+ try {
1221
+ var r = await removeAssembly(stale[i]);
1222
+ removed.push({ assemblyId: stale[i], chunksRemoved: r.chunksRemoved });
1223
+ } catch (_e) { /* best-effort GC */ }
1224
+ }
1225
+ _emit("system.storage.chunk_scratch.gc", {
1226
+ metadata: { staleCount: stale.length, removedCount: removed.length },
1227
+ });
1228
+ return { removed: removed };
1229
+ }
1230
+
1231
+ return {
1232
+ saveChunk: saveChunk,
1233
+ getChunk: getChunk,
1234
+ chunkExists: chunkExists,
1235
+ listChunks: listChunks,
1236
+ countChunks: countChunks,
1237
+ removeChunk: removeChunk,
1238
+ assemble: assemble,
1239
+ removeAssembly: removeAssembly,
1240
+ listAssemblies: listAssemblies,
1241
+ listStaleAssemblies: listStaleAssemblies,
1242
+ gc: gc,
1243
+ };
1244
+ }
1245
+
830
1246
  function _resetForTest() {
831
1247
  initialized = false;
832
1248
  backends = {};
@@ -851,5 +1267,6 @@ module.exports = {
851
1267
  presignedUploadPolicy: presignedUploadPolicy,
852
1268
  listBackends: listBackends,
853
1269
  getBackend: getBackend,
1270
+ chunkScratch: chunkScratch,
854
1271
  _resetForTest: _resetForTest,
855
1272
  };