@remnic/core 9.3.519 → 9.3.520

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,10 +26,17 @@ interface PipelineLogger {
26
26
  error(msg: string): void;
27
27
  }
28
28
 
29
+ type ReadMarkdownFile = (filePath: string) => Promise<string>;
30
+ type WriteMarkdownFile = (filePath: string, content: string) => Promise<void>;
31
+
29
32
  interface PipelineOptions {
30
33
  dryRun?: boolean;
31
34
  /** Force-clean all files past grace period, ignoring redirect status. */
32
35
  forceClean?: boolean;
36
+ /** Test hook for deterministic markdown read failures. */
37
+ readMarkdownFile?: ReadMarkdownFile;
38
+ /** Test hook for deterministic markdown write failures. */
39
+ writeMarkdownFile?: WriteMarkdownFile;
33
40
  }
34
41
 
35
42
  // ---------------------------------------------------------------------------
@@ -63,6 +70,27 @@ function escapeRegex(s: string): string {
63
70
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
64
71
  }
65
72
 
73
+ function resolveManifestAssetPath(memoryDir: string, originalPath: string): string | null {
74
+ if (
75
+ originalPath.length === 0 ||
76
+ originalPath.includes("\0") ||
77
+ originalPath.includes("\\") ||
78
+ path.isAbsolute(originalPath) ||
79
+ path.win32.isAbsolute(originalPath)
80
+ ) {
81
+ return null;
82
+ }
83
+
84
+ const memoryRoot = path.resolve(memoryDir);
85
+ const fullPath = path.resolve(memoryRoot, originalPath);
86
+ const relative = path.relative(memoryRoot, fullPath);
87
+ if (relative === "" || relative === ".." || relative.startsWith(`..${path.sep}`) || path.isAbsolute(relative)) {
88
+ return null;
89
+ }
90
+
91
+ return fullPath;
92
+ }
93
+
66
94
  function validateBinaryLifecycleConfig(config: BinaryLifecycleConfig): void {
67
95
  if (
68
96
  typeof config.gracePeriodDays !== "number" ||
@@ -74,6 +102,18 @@ function validateBinaryLifecycleConfig(config: BinaryLifecycleConfig): void {
74
102
  }
75
103
  }
76
104
 
105
+ function remotePathForAsset(backend: BinaryStorageBackend, relPath: string): string {
106
+ const normalized = relPath.split(path.sep).join("/");
107
+ if (backend.type === "filesystem") {
108
+ return `.binary-lifecycle/mirrors/${normalized}`;
109
+ }
110
+ return normalized;
111
+ }
112
+
113
+ function markdownTargetForAsset(asset: BinaryAssetRecord): string {
114
+ return asset.redirectPath ?? asset.mirroredPath;
115
+ }
116
+
77
117
  // ---------------------------------------------------------------------------
78
118
  // Pipeline stages
79
119
  // ---------------------------------------------------------------------------
@@ -96,16 +136,18 @@ async function stageMirror(
96
136
  const contentHash = await hashFile(fullPath);
97
137
  const ext = path.extname(relPath);
98
138
  const mimeType = guessMimeType(ext);
99
- const remotePath = relPath;
139
+ const remotePath = remotePathForAsset(backend, relPath);
100
140
 
101
141
  let backendLocation = remotePath;
102
142
  if (!dryRun) {
103
143
  backendLocation = await backend.upload(fullPath, remotePath);
104
144
  }
145
+ const redirectPath = backend.getRedirectTarget?.(backendLocation);
105
146
 
106
147
  const record: BinaryAssetRecord = {
107
148
  originalPath: relPath,
108
149
  mirroredPath: backendLocation,
150
+ ...(redirectPath ? { redirectPath } : {}),
109
151
  contentHash,
110
152
  sizeBytes: stat.size,
111
153
  mimeType,
@@ -137,93 +179,247 @@ async function stageRedirect(
137
179
  assets: BinaryAssetRecord[],
138
180
  log: PipelineLogger,
139
181
  dryRun: boolean,
182
+ readMarkdownFile: ReadMarkdownFile,
183
+ writeMarkdownFile: WriteMarkdownFile,
140
184
  ): Promise<{ redirected: number; errors: string[] }> {
141
185
  let redirected = 0;
142
186
  const errors: string[] = [];
143
187
 
144
- // Only redirect assets that are mirrored but not yet redirected.
145
- const candidates = assets.filter((a) => a.status === "mirrored");
188
+ // Redirect mirrored assets and retry prior redirect errors. Clean-stage errors
189
+ // remain safe because the redirect path validation below will keep rejecting
190
+ // invalid manifest records.
191
+ const candidates = assets.filter((a) => a.status === "mirrored" || a.status === "error");
146
192
  if (candidates.length === 0) return { redirected, errors };
147
193
 
148
194
  // Find all markdown files in memoryDir (recursive).
149
195
  const mdFiles = await findMarkdownFiles(memoryDir);
150
196
 
151
197
  for (const asset of candidates) {
152
- let matchCount = 0;
153
- let writeFailCount = 0;
198
+ const assetAbsolute = resolveManifestAssetPath(memoryDir, asset.originalPath);
199
+ if (assetAbsolute === null) {
200
+ const msg = `redirect blocked for ${asset.originalPath}: manifest path is outside memoryDir`;
201
+ log.error(`[binary-lifecycle] ${msg}`);
202
+ errors.push(msg);
203
+ if (!dryRun) {
204
+ asset.status = "error";
205
+ }
206
+ continue;
207
+ }
208
+
209
+ const updates: Array<{ mdPath: string; content: string }> = [];
210
+ let scanFailCount = 0;
154
211
  for (const mdPath of mdFiles) {
155
212
  try {
156
- const content = await fsp.readFile(mdPath, "utf-8");
157
-
158
- // Build the match path relative to this markdown file's directory.
159
- // Markdown links like `![img](./image.png)` are file-relative, but
160
- // asset.originalPath is memory-root relative (e.g. `sub/image.png`).
161
- // Resolve the asset path relative to the markdown file's directory
162
- // so both forms match correctly.
163
- const mdDir = path.dirname(mdPath);
164
- const assetAbsolute = path.join(memoryDir, asset.originalPath);
165
- const relativeToMd = path.relative(mdDir, assetAbsolute);
166
- // Normalise to forward slashes for regex matching (markdown uses /).
167
- const relativeForward = relativeToMd.split(path.sep).join("/");
168
- const escaped = escapeRegex(relativeForward);
169
-
170
- // Build a regex that matches markdown image/link references to the file.
171
- // Handles: ![alt](./path) , ![alt](path) , [text](./path)
172
- const pattern = new RegExp(
173
- `(!?\\[[^\\]]*\\]\\()(\\.\\/)?(${escaped})(\\))`,
174
- "g",
175
- );
213
+ const content = await readMarkdownFile(mdPath);
214
+
215
+ const pattern = markdownReferencePattern(asset, assetAbsolute, mdPath);
176
216
 
177
217
  if (!pattern.test(content)) continue;
178
- matchCount++;
179
218
 
180
- if (!dryRun) {
181
- // Reset lastIndex after test().
182
- pattern.lastIndex = 0;
183
- const updated = content.replace(pattern, (_match, open, _dotSlash, _file, close) => {
184
- return `${open as string}${asset.mirroredPath}${close as string}`;
185
- });
186
- await fsp.writeFile(mdPath, updated, "utf-8");
187
- }
219
+ // Reset lastIndex after test().
220
+ pattern.lastIndex = 0;
221
+ const updated = content.replace(pattern, (_match, open, _target, close) => {
222
+ return `${open as string}${markdownTargetForAsset(asset)}${close as string}`;
223
+ });
224
+ updates.push({ mdPath, content: updated });
188
225
  } catch (err) {
189
- // Track write failures separately so we don't transition status
190
- // when some markdown rewrites failed (P1: block redirect on failure).
191
- writeFailCount++;
226
+ scanFailCount++;
192
227
  const msg = `redirect scan failed for ${mdPath}: ${err instanceof Error ? err.message : String(err)}`;
193
228
  log.error(`[binary-lifecycle] ${msg}`);
194
229
  errors.push(msg);
195
230
  }
196
231
  }
197
232
 
198
- // Only transition to "redirected" when at least one reference was found
199
- // AND all matched files were rewritten successfully.
200
- if (matchCount > 0 && writeFailCount === 0) {
233
+ if (scanFailCount > 0) {
201
234
  if (!dryRun) {
202
- asset.status = "redirected";
203
- asset.redirectedAt = new Date().toISOString();
235
+ asset.status = "error";
236
+ }
237
+ log.warn(
238
+ `[binary-lifecycle] redirect blocked for ${asset.originalPath}: ` +
239
+ `${scanFailCount} markdown scan failure(s)` +
240
+ `${dryRun ? "" : " — status set to error"}`,
241
+ );
242
+ continue;
243
+ }
244
+
245
+ if (updates.length === 0) {
246
+ if (asset.status === "error") {
247
+ const verifyResult = await countRemainingLocalReferences(
248
+ memoryDir,
249
+ asset,
250
+ assetAbsolute,
251
+ mdFiles,
252
+ readMarkdownFile,
253
+ );
254
+ if (verifyResult.errors.length > 0 || verifyResult.remaining > 0) {
255
+ if (!dryRun) {
256
+ asset.status = "error";
257
+ }
258
+ for (const msg of verifyResult.errors) {
259
+ log.error(`[binary-lifecycle] ${msg}`);
260
+ errors.push(msg);
261
+ }
262
+ if (verifyResult.remaining > 0) {
263
+ const msg = `redirect verification failed for ${asset.originalPath}: ${verifyResult.remaining} local reference(s) remain`;
264
+ log.warn(`[binary-lifecycle] ${msg}`);
265
+ errors.push(msg);
266
+ }
267
+ continue;
268
+ }
269
+
270
+ if (asset.redirectedAt === undefined) {
271
+ if (!dryRun) {
272
+ asset.status = "mirrored";
273
+ }
274
+ log.info(`[binary-lifecycle] preserved mirrored asset without redirected marker: ${asset.originalPath}${dryRun ? " [dry-run]" : ""}`);
275
+ continue;
276
+ }
277
+
278
+ if (!Number.isFinite(new Date(asset.mirroredAt).getTime())) {
279
+ const msg = `redirect blocked for ${asset.originalPath}: manifest mirroredAt is invalid`;
280
+ log.error(`[binary-lifecycle] ${msg}`);
281
+ errors.push(msg);
282
+ if (!dryRun) {
283
+ asset.status = "error";
284
+ }
285
+ continue;
286
+ }
287
+
288
+ if (!dryRun) {
289
+ asset.status = "redirected";
290
+ asset.redirectedAt = new Date().toISOString();
291
+ }
292
+ redirected++;
293
+ log.info(`[binary-lifecycle] redirected: ${asset.originalPath}${dryRun ? " [dry-run]" : ""}`);
204
294
  }
295
+ continue;
296
+ }
297
+
298
+ if (dryRun) {
205
299
  redirected++;
206
- log.info(`[binary-lifecycle] redirected: ${asset.originalPath}${dryRun ? " [dry-run]" : ""}`);
207
- } else if (matchCount > 0 && writeFailCount > 0) {
208
- // Some rewrites failed — set error status so the asset is not cleaned
209
- // prematurely. It can be retried on the next pipeline run.
300
+ log.info(`[binary-lifecycle] redirected: ${asset.originalPath} [dry-run]`);
301
+ continue;
302
+ }
303
+
304
+ let writeFailCount = 0;
305
+ for (const update of updates) {
306
+ try {
307
+ await writeMarkdownFile(update.mdPath, update.content);
308
+ } catch (err) {
309
+ writeFailCount++;
310
+ const msg = `redirect write failed for ${update.mdPath}: ${err instanceof Error ? err.message : String(err)}`;
311
+ log.error(`[binary-lifecycle] ${msg}`);
312
+ errors.push(msg);
313
+ }
314
+ }
315
+
316
+ if (writeFailCount > 0) {
210
317
  if (!dryRun) {
211
318
  asset.status = "error";
212
319
  }
213
320
  log.warn(
214
- `[binary-lifecycle] redirect partial failure for ${asset.originalPath}: ` +
215
- `${matchCount} match(es), ${writeFailCount} write failure(s)` +
216
- `${dryRun ? "" : " — status set to error"}`,
321
+ `[binary-lifecycle] redirect write failure for ${asset.originalPath}: ` +
322
+ `${writeFailCount} write failure(s) — status set to error`,
217
323
  );
324
+ continue;
325
+ }
326
+
327
+ const redirectedAt = new Date().toISOString();
328
+ asset.redirectedAt = redirectedAt;
329
+
330
+ const verifyResult = await countRemainingLocalReferences(
331
+ memoryDir,
332
+ asset,
333
+ assetAbsolute,
334
+ mdFiles,
335
+ readMarkdownFile,
336
+ );
337
+ if (verifyResult.errors.length > 0 || verifyResult.remaining > 0) {
338
+ asset.status = "error";
339
+ for (const msg of verifyResult.errors) {
340
+ log.error(`[binary-lifecycle] ${msg}`);
341
+ errors.push(msg);
342
+ }
343
+ if (verifyResult.remaining > 0) {
344
+ const msg = `redirect verification failed for ${asset.originalPath}: ${verifyResult.remaining} local reference(s) remain`;
345
+ log.warn(`[binary-lifecycle] ${msg}`);
346
+ errors.push(msg);
347
+ }
348
+ continue;
218
349
  }
350
+ asset.status = "redirected";
351
+ asset.redirectedAt = redirectedAt;
352
+ redirected++;
353
+ log.info(`[binary-lifecycle] redirected: ${asset.originalPath}`);
219
354
  }
220
355
 
221
356
  return { redirected, errors };
222
357
  }
223
358
 
359
+ async function countRemainingLocalReferences(
360
+ memoryDir: string,
361
+ asset: BinaryAssetRecord,
362
+ assetAbsolute: string,
363
+ mdFiles: string[],
364
+ readMarkdownFile: ReadMarkdownFile,
365
+ ): Promise<{ remaining: number; errors: string[] }> {
366
+ let remaining = 0;
367
+ const errors: string[] = [];
368
+
369
+ for (const mdPath of mdFiles) {
370
+ try {
371
+ const content = await readMarkdownFile(mdPath);
372
+ const pattern = markdownReferencePattern(asset, assetAbsolute, mdPath);
373
+ if (pattern.test(content)) {
374
+ remaining++;
375
+ }
376
+ } catch (err) {
377
+ errors.push(`redirect verification failed for ${mdPath}: ${err instanceof Error ? err.message : String(err)}`);
378
+ }
379
+ }
380
+
381
+ return { remaining, errors };
382
+ }
383
+
384
+ function markdownReferencePattern(
385
+ asset: BinaryAssetRecord,
386
+ assetAbsolute: string,
387
+ mdPath: string,
388
+ ): RegExp {
389
+ const mdDir = path.dirname(mdPath);
390
+ const candidates = new Set<string>();
391
+ const addCandidate = (candidate: string): void => {
392
+ const normalized = candidate.split(path.sep).join("/");
393
+ if (normalized.length === 0) return;
394
+ candidates.add(normalized);
395
+ const isParentTraversal = normalized === ".." || normalized.startsWith("../");
396
+ if (!normalized.startsWith("./") && !normalized.startsWith("/") && !isParentTraversal) {
397
+ candidates.add(`./${normalized}`);
398
+ }
399
+ };
400
+
401
+ // Markdown links may be file-relative to the note or memory-root-relative in
402
+ // Remnic notes. Match both forms so verification cannot miss a live local ref.
403
+ addCandidate(path.relative(mdDir, assetAbsolute));
404
+ const originalPath = asset.originalPath.split(path.sep).join("/");
405
+ const originalAsFileRelative = path.resolve(mdDir, ...originalPath.split("/"));
406
+ if (path.resolve(originalAsFileRelative) === path.resolve(assetAbsolute)) {
407
+ addCandidate(originalPath);
408
+ }
409
+ addCandidate(`/${originalPath}`);
410
+
411
+ const alternatives = [...candidates]
412
+ .sort((a, b) => b.length - a.length)
413
+ .map(escapeRegex)
414
+ .join("|");
415
+
416
+ return new RegExp(`(!?\\[[^\\]]*\\]\\()(${alternatives})(\\))`, "g");
417
+ }
418
+
224
419
  async function stageClean(
225
420
  memoryDir: string,
226
421
  assets: BinaryAssetRecord[],
422
+ backend: BinaryStorageBackend,
227
423
  gracePeriodDays: number,
228
424
  log: PipelineLogger,
229
425
  dryRun: boolean,
@@ -243,6 +439,15 @@ async function stageClean(
243
439
 
244
440
  for (const asset of candidates) {
245
441
  const mirroredMs = new Date(asset.mirroredAt).getTime();
442
+ if (!Number.isFinite(mirroredMs)) {
443
+ const msg = `clean blocked for ${asset.originalPath}: manifest mirroredAt is invalid`;
444
+ log.error(`[binary-lifecycle] ${msg}`);
445
+ errors.push(msg);
446
+ if (!dryRun) {
447
+ asset.status = "error";
448
+ }
449
+ continue;
450
+ }
246
451
  const ageMs = now - mirroredMs;
247
452
 
248
453
  if (!forceClean && ageMs < graceMs) {
@@ -250,7 +455,34 @@ async function stageClean(
250
455
  continue;
251
456
  }
252
457
 
253
- const fullPath = path.join(memoryDir, asset.originalPath);
458
+ const fullPath = resolveManifestAssetPath(memoryDir, asset.originalPath);
459
+ if (fullPath === null) {
460
+ const msg = `clean blocked for ${asset.originalPath}: manifest path is outside memoryDir`;
461
+ log.error(`[binary-lifecycle] ${msg}`);
462
+ errors.push(msg);
463
+ if (!dryRun) {
464
+ asset.status = "error";
465
+ }
466
+ continue;
467
+ }
468
+
469
+ let remoteExists: boolean;
470
+ try {
471
+ remoteExists = await backend.exists(asset.mirroredPath);
472
+ } catch (err) {
473
+ const msg = `clean blocked for ${asset.originalPath}: failed to verify mirrored copy: ${err instanceof Error ? err.message : String(err)}`;
474
+ log.error(`[binary-lifecycle] ${msg}`);
475
+ errors.push(msg);
476
+ continue;
477
+ }
478
+
479
+ if (!remoteExists) {
480
+ const msg = `clean blocked for ${asset.originalPath}: mirrored copy is missing`;
481
+ log.error(`[binary-lifecycle] ${msg}`);
482
+ errors.push(msg);
483
+ continue;
484
+ }
485
+
254
486
  try {
255
487
  const currentHash = await hashFile(fullPath);
256
488
  if (currentHash !== asset.contentHash) {
@@ -269,9 +501,11 @@ async function stageClean(
269
501
  } catch (err) {
270
502
  if ((err as NodeJS.ErrnoException).code === "ENOENT") {
271
503
  // Already gone — mark as cleaned.
272
- asset.status = "cleaned";
273
- asset.cleanedAt = new Date().toISOString();
274
- cleaned++;
504
+ if (!dryRun) {
505
+ asset.status = "cleaned";
506
+ asset.cleanedAt = new Date().toISOString();
507
+ cleaned++;
508
+ }
275
509
  } else {
276
510
  const msg = `clean failed for ${asset.originalPath}: ${err instanceof Error ? err.message : String(err)}`;
277
511
  log.error(`[binary-lifecycle] ${msg}`);
@@ -359,12 +593,20 @@ export async function runBinaryLifecyclePipeline(
359
593
  );
360
594
 
361
595
  // Stage 2: Redirect
362
- const redirectResult = await stageRedirect(memoryDir, manifest.assets, log, dryRun);
596
+ const redirectResult = await stageRedirect(
597
+ memoryDir,
598
+ manifest.assets,
599
+ log,
600
+ dryRun,
601
+ opts?.readMarkdownFile ?? ((filePath: string) => fsp.readFile(filePath, "utf-8")),
602
+ opts?.writeMarkdownFile ?? ((filePath: string, content: string) => fsp.writeFile(filePath, content, "utf-8")),
603
+ );
363
604
 
364
605
  // Stage 3: Clean
365
606
  const cleanResult = await stageClean(
366
607
  memoryDir,
367
608
  manifest.assets,
609
+ backend,
368
610
  config.gracePeriodDays,
369
611
  log,
370
612
  dryRun,
@@ -43,6 +43,8 @@ export interface BinaryAssetRecord {
43
43
  originalPath: string;
44
44
  /** Path (or URL) in the backend storage. */
45
45
  mirroredPath: string;
46
+ /** Optional user-resolvable target to write into markdown links. */
47
+ redirectPath?: string;
46
48
  /** SHA-256 hex digest of file content. */
47
49
  contentHash: string;
48
50
  /** File size in bytes. */