@symbiosis-lab/moss-plugin-matters 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +88 -0
  2. package/README.md +18 -0
  3. package/assets/icon.svg +1 -0
  4. package/assets/manifest.json +36 -0
  5. package/codegen.ts +26 -0
  6. package/e2e/moss-cli.test.ts +338 -0
  7. package/features/api/fetch-articles.feature +39 -0
  8. package/features/auth/wallet-auth.feature +27 -0
  9. package/features/download/retry-logic.feature +36 -0
  10. package/features/download/self-correcting.feature +83 -0
  11. package/features/download/worker-pool.feature +29 -0
  12. package/features/social/fetch-social-data.feature +40 -0
  13. package/features/steps/api.steps.ts +180 -0
  14. package/features/steps/download.steps.ts +423 -0
  15. package/features/steps/incremental-sync.steps.ts +105 -0
  16. package/features/steps/self-correcting.steps.ts +575 -0
  17. package/features/steps/social.steps.ts +257 -0
  18. package/features/steps/syndication.steps.ts +264 -0
  19. package/features/steps/wallet-auth.steps.ts +185 -0
  20. package/features/sync/article-sync.feature +49 -0
  21. package/features/sync/homepage-grid.feature +43 -0
  22. package/features/sync/incremental-sync.feature +28 -0
  23. package/features/syndication/create-draft.feature +35 -0
  24. package/package.json +58 -0
  25. package/src/__generated__/schema.graphql +4289 -0
  26. package/src/__generated__/types.ts +5355 -0
  27. package/src/__tests__/api.test.ts +678 -0
  28. package/src/__tests__/auth-route.test.ts +38 -0
  29. package/src/__tests__/auth-routing.test.ts +462 -0
  30. package/src/__tests__/auto-detect.test.ts +412 -0
  31. package/src/__tests__/binding-guard.test.ts +256 -0
  32. package/src/__tests__/config.test.ts +212 -0
  33. package/src/__tests__/converter.test.ts +289 -0
  34. package/src/__tests__/credential.test.ts +332 -0
  35. package/src/__tests__/domain.test.ts +341 -0
  36. package/src/__tests__/downloader.test.ts +679 -0
  37. package/src/__tests__/folder-detection.test.ts +289 -0
  38. package/src/__tests__/force-fresh-login.test.ts +236 -0
  39. package/src/__tests__/main.test.ts +2437 -0
  40. package/src/__tests__/progress.test.ts +93 -0
  41. package/src/__tests__/session.test.ts +375 -0
  42. package/src/__tests__/social-integration.test.ts +386 -0
  43. package/src/__tests__/social-sync-logic.test.ts +107 -0
  44. package/src/__tests__/social.test.ts +788 -0
  45. package/src/__tests__/sync.test.ts +1273 -0
  46. package/src/__tests__/syndication-toast-law.test.ts +649 -0
  47. package/src/__tests__/syndication.test.ts +125 -0
  48. package/src/__tests__/test-profile-escape.test.ts +209 -0
  49. package/src/__tests__/url-detect.test.ts +79 -0
  50. package/src/__tests__/utils.test.ts +226 -0
  51. package/src/api.ts +1366 -0
  52. package/src/auth-route.ts +38 -0
  53. package/src/config.ts +80 -0
  54. package/src/converter.ts +305 -0
  55. package/src/credential.ts +329 -0
  56. package/src/domain.ts +183 -0
  57. package/src/downloader.ts +761 -0
  58. package/src/main.ts +2092 -0
  59. package/src/progress.ts +89 -0
  60. package/src/queries/user.graphql +85 -0
  61. package/src/queries/viewer.graphql +104 -0
  62. package/src/social.ts +413 -0
  63. package/src/sync.ts +818 -0
  64. package/src/types.ts +477 -0
  65. package/src/url-detect.ts +49 -0
  66. package/src/utils.ts +305 -0
  67. package/test-fixtures/syndication-test-site/input/index.md +8 -0
  68. package/test-fixtures/syndication-test-site/input/posts/rich-test-article.md +90 -0
  69. package/test-helpers/TEST_ACCOUNT.md +151 -0
  70. package/test-helpers/api-client.ts +252 -0
  71. package/test-helpers/fixtures/articles.ts +147 -0
  72. package/test-helpers/wallet-auth.ts +305 -0
  73. package/test-setup/e2e.ts +93 -0
  74. package/tsconfig.json +23 -0
  75. package/vitest.config.ts +39 -0
@@ -0,0 +1,761 @@
1
+ /**
2
+ * Asset download functionality with incremental file updates
3
+ *
4
+ * DESIGN PRINCIPLE: Write files incrementally, not in a batch at the end.
5
+ *
6
+ * This ensures:
7
+ * 1. If interrupted, completed files are already saved
8
+ * 2. Running again skips already-updated files (self-correcting)
9
+ * 3. No "Phase 3" batch write that can silently fail
10
+ */
11
+
12
+ import {
13
+ reportError,
14
+ sleep,
15
+ } from "./utils";
16
+ import { overallProgress, type ProgressReporter } from "./progress";
17
+ import { downloadAsset as downloadAssetRust } from "@symbiosis-lab/moss-api";
18
+ import { extractRemoteImageUrls, extractMarkdownLinks } from "./converter";
19
+ import { isInternalMattersLink as isDomainInternalLink, extractShortHash } from "./domain";
20
+ import { listFiles, readFile, writeFile } from "@symbiosis-lab/moss-api";
21
+
22
+ // ============================================================================
23
+ // Constants
24
+ // ============================================================================
25
+
26
+ const MAX_RETRIES = 3;
27
+ // Note: Concurrency is now handled by Rust-side Semaphore (DOWNLOAD_CONCURRENCY_LIMIT=5)
28
+ // Timeout is handled by Rust-side tokio::time::timeout (default 30s)
29
+
30
+ // ============================================================================
31
+ // Pure Helper Functions (exported for testing)
32
+ // ============================================================================
33
+
34
+ /**
35
+ * Extract UUID from a URL (Matters asset IDs are UUIDs)
36
+ * Handles URLs from both assets.matters.news and imagedelivery.net
37
+ */
38
+ export function extractAssetUuid(url: string): string | null {
39
+ const match = url.match(/([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})/i);
40
+ return match ? match[1] : null;
41
+ }
42
+
43
+ /**
44
+ * Escape special regex characters in a string
45
+ */
46
+ export function escapeRegex(str: string): string {
47
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
48
+ }
49
+
50
+ /**
51
+ * Build a regex pattern that matches any URL containing the given asset ID
52
+ * Used for updating references when the same asset has multiple CDN URLs
53
+ */
54
+ export function buildAssetUrlPattern(assetId: string): RegExp {
55
+ return new RegExp(
56
+ `https?://[^)\\s"]*${escapeRegex(assetId)}[^)\\s"]*`,
57
+ 'g'
58
+ );
59
+ }
60
+
61
+ /**
62
+ * Replace all URLs containing the asset ID with a local path
63
+ * Returns the modified content and whether any replacements were made
64
+ */
65
+ export function replaceAssetUrls(
66
+ content: string,
67
+ assetId: string,
68
+ localPath: string
69
+ ): { content: string; replaced: boolean } {
70
+ const pattern = buildAssetUrlPattern(assetId);
71
+ const hasMatch = pattern.test(content);
72
+
73
+ if (!hasMatch) {
74
+ return { content, replaced: false };
75
+ }
76
+
77
+ // Reset regex lastIndex after test() call
78
+ pattern.lastIndex = 0;
79
+ const newContent = content.replace(pattern, localPath);
80
+ return { content: newContent, replaced: true };
81
+ }
82
+
83
+ /**
84
+ * Replace a full markdown image token `![alt](url)` whose URL contains the
85
+ * asset id with a filename-only wikilink `![[filename]]` (B2).
86
+ *
87
+ * Unlike `replaceAssetUrls` — which swaps only the URL substring and leaves the
88
+ * `![alt](...)` wrapper plus a depth-dependent relative path (`../assets/…` vs
89
+ * `../../assets/…`) — this replaces the ENTIRE image token so moss's shared
90
+ * filename-stem asset resolver (`resolve::asset_class::resolve_asset_ref`)
91
+ * resolves it from ANY article depth with no `../` chains. The basename carries
92
+ * the real extension, so the extensionless-ref bug (B8) disappears too. Alt
93
+ * text is dropped to match moss/Obsidian `![[file]]` embed syntax.
94
+ */
95
+ export function replaceImageWithWikilink(
96
+ content: string,
97
+ assetId: string,
98
+ filename: string
99
+ ): { content: string; replaced: boolean } {
100
+ // `!\[[^\]]*\]` = the `![alt]` part (alt may be empty); then `(url[ "title"])`
101
+ // where the url contains the asset id. The optional ` "title"` trailer matches
102
+ // htmd's `![alt](url "title")` output for <img title=...> (else the CDN URL
103
+ // would be left in the body — an orphaned-asset / broken-image leak).
104
+ const pattern = new RegExp(
105
+ `!\\[[^\\]]*\\]\\(https?://[^)\\s"]*${escapeRegex(assetId)}[^)\\s"]*(?:\\s+"[^"]*")?\\)`,
106
+ 'g'
107
+ );
108
+ if (!pattern.test(content)) {
109
+ return { content, replaced: false };
110
+ }
111
+ pattern.lastIndex = 0;
112
+ const newContent = content.replace(pattern, `![[${filename}]]`);
113
+ return { content: newContent, replaced: true };
114
+ }
115
+
116
+ /**
117
+ * Replace a full markdown image token whose URL is the EXACT given URL with a
118
+ * filename-only wikilink `![[filename]]` (B6 — legacy non-UUID CDN assets).
119
+ *
120
+ * `replaceImageWithWikilink` keys on a Matters asset UUID; legacy cloudfront
121
+ * images (e.g. `assets.matters.news/.../image.jpg` with no UUID segment) have
122
+ * no UUID to key on, so their references were never rewritten — the dead remote
123
+ * CDN URL leaked into the published body. This matches on the literal URL
124
+ * instead, so a downloaded legacy asset still localizes. The optional ` "title"`
125
+ * trailer matches htmd's `![alt](url "title")` output.
126
+ */
127
+ export function replaceImageUrlWithWikilink(
128
+ content: string,
129
+ url: string,
130
+ filename: string
131
+ ): { content: string; replaced: boolean } {
132
+ const pattern = new RegExp(
133
+ `!\\[[^\\]]*\\]\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`,
134
+ 'g'
135
+ );
136
+ if (!pattern.test(content)) {
137
+ return { content, replaced: false };
138
+ }
139
+ pattern.lastIndex = 0;
140
+ const newContent = content.replace(pattern, `![[${filename}]]`);
141
+ return { content: newContent, replaced: true };
142
+ }
143
+
144
+ // ============================================================================
145
+ // Fibonacci Backoff
146
+ // ============================================================================
147
+
148
+ /**
149
+ * Get delay for retry attempt using Fibonacci sequence
150
+ * Returns delay in milliseconds: 1000, 1000, 2000, 3000, 5000, 8000, 13000, 21000...
151
+ */
152
+ function getFibonacciDelay(attempt: number): number {
153
+ if (attempt <= 2) return 1000;
154
+ let a = 1, b = 1;
155
+ for (let i = 2; i < attempt; i++) {
156
+ [a, b] = [b, a + b];
157
+ }
158
+ return b * 1000;
159
+ }
160
+
161
+ /**
162
+ * Check if an HTTP status code is retryable (transient error)
163
+ * 408 = Request Timeout, 429 = Too Many Requests, 5xx = Server errors
164
+ */
165
+ function isRetryableHttpStatus(status: number): boolean {
166
+ return status === 408 || status === 429 || (status >= 500 && status < 600);
167
+ }
168
+
169
+ // ============================================================================
170
+ // Asset Download
171
+ // ============================================================================
172
+
173
+ /** Error with HTTP status for retry classification */
174
+ class DownloadError extends Error {
175
+ constructor(message: string, public readonly httpStatus?: number) {
176
+ super(message);
177
+ this.name = "DownloadError";
178
+ }
179
+
180
+ /** Check if this error is retryable (transient) */
181
+ isRetryable(): boolean {
182
+ // Network errors (no status) are retryable
183
+ if (this.httpStatus === undefined) return true;
184
+ return isRetryableHttpStatus(this.httpStatus);
185
+ }
186
+ }
187
+
188
+ /**
189
+ * Download a single asset with retry logic and comprehensive logging.
190
+ * Uses Rust to download and save directly to disk (avoids JS base64 blocking).
191
+ * moss handles filename derivation and extension from content-type.
192
+ *
193
+ * Timeout and concurrency are handled by Rust side:
194
+ * - Semaphore limits concurrent downloads to 5
195
+ * - tokio::time::timeout enforces 30s cumulative timeout
196
+ *
197
+ * Logging:
198
+ * - [↓] Attempt N/M: Starting download attempt
199
+ * - [✓] Downloaded: Successful download
200
+ * - [!] HTTP {status}: HTTP error (retryable or final)
201
+ * - [✗] TIMEOUT: Download timeout from Rust
202
+ * - [✗] ERROR: Other errors (network, etc.)
203
+ * - [↻] Retrying: Retry announcement with delay
204
+ * - [✗] FAILED: Final failure after all retries
205
+ */
206
+ async function downloadAssetWithRetry(
207
+ url: string
208
+ ): Promise<{ actualPath: string; success: boolean; error?: string }> {
209
+ for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
210
+ try {
211
+ console.log(` [↓] Attempt ${attempt}/${MAX_RETRIES}: ${url}`);
212
+
213
+ // Rust handles timeout (30s) and concurrency (5 parallel)
214
+ const result = await downloadAssetRust(url, "assets");
215
+
216
+ if (!result.ok) {
217
+ const err = new DownloadError(`HTTP ${result.status}`, result.status);
218
+ console.warn(` [!] HTTP ${result.status} for ${url}`);
219
+
220
+ if (!err.isRetryable() || attempt === MAX_RETRIES) {
221
+ console.error(` [✗] FAILED after ${attempt} attempts: ${url} - HTTP ${result.status}`);
222
+ return { actualPath: "", success: false, error: `HTTP ${result.status}` };
223
+ }
224
+
225
+ const delay = getFibonacciDelay(attempt);
226
+ console.warn(` [↻] Retrying in ${delay}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
227
+ await sleep(delay);
228
+ continue;
229
+ }
230
+
231
+ console.log(` [✓] Downloaded: ${result.actualPath}`);
232
+ return { actualPath: result.actualPath, success: true };
233
+
234
+ } catch (fetchError: unknown) {
235
+ const message = fetchError instanceof Error ? fetchError.message : String(fetchError);
236
+ const isTimeout = message.toLowerCase().includes("timeout");
237
+
238
+ // Log the error type
239
+ if (isTimeout) {
240
+ console.error(` [✗] TIMEOUT: ${url} - ${message}`);
241
+ } else {
242
+ console.error(` [✗] ERROR: ${url} - ${message}`);
243
+ }
244
+
245
+ if (attempt === MAX_RETRIES) {
246
+ console.error(` [✗] FAILED after ${MAX_RETRIES} attempts: ${url}`);
247
+ return { actualPath: "", success: false, error: message };
248
+ }
249
+
250
+ const delay = getFibonacciDelay(attempt);
251
+ console.warn(` [↻] Retrying in ${delay}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
252
+ await sleep(delay);
253
+ }
254
+ }
255
+
256
+ console.error(` [✗] FAILED after ${MAX_RETRIES} attempts: ${url}`);
257
+ return { actualPath: "", success: false, error: "Max retries exceeded" };
258
+ }
259
+
260
+ // ============================================================================
261
+ // Types
262
+ // ============================================================================
263
+
264
+ /** Media URL found in a file */
265
+ interface MediaUrl {
266
+ url: string;
267
+ uuid: string | null;
268
+ inBody: boolean;
269
+ inCover: boolean;
270
+ }
271
+
272
+ /** File state for processing */
273
+ interface FileState {
274
+ path: string;
275
+ frontmatter: Record<string, unknown>;
276
+ body: string;
277
+ mediaUrls: MediaUrl[];
278
+ }
279
+
280
+ // ============================================================================
281
+ // Main Function: downloadMediaAndUpdate
282
+ // ============================================================================
283
+
284
+ /**
285
+ * Download all media for all markdown files in a project and update references.
286
+ *
287
+ * DESIGN: Fire all downloads in parallel, let Rust handle concurrency.
288
+ *
289
+ * 1. **Parallel downloads**: All downloads start immediately via Promise.allSettled
290
+ * 2. **Rust-side concurrency**: Semaphore limits to 5 concurrent downloads
291
+ * 3. **Rust-side timeout**: tokio::time::timeout enforces 30s cumulative timeout
292
+ * 4. **Self-correcting**: Running again skips already-downloaded assets (by UUID)
293
+ *
294
+ * Flow:
295
+ * 1. Scan all files to collect unique media URLs needing download
296
+ * 2. Fire all downloads in parallel (Rust handles concurrency/timeout)
297
+ * 3. After all complete, update references in each file
298
+ * 4. Write modified files to disk
299
+ */
300
+ export async function downloadMediaAndUpdate(
301
+ onProgress?: ProgressReporter,
302
+ ): Promise<{
303
+ filesProcessed: number;
304
+ imagesDownloaded: number;
305
+ imagesSkipped: number;
306
+ errors: string[];
307
+ /**
308
+ * Source URLs of images that failed to download (the dead CDN references
309
+ * still sitting in the article bodies). The caller turns each into a
310
+ * per-image advisory so the user sees WHICH image broke — not an opaque
311
+ * "N failed" count. A subset of `errors` carrying just the image-download
312
+ * failures (not list/write failures).
313
+ */
314
+ failedImageUrls: string[];
315
+ }> {
316
+ const result = {
317
+ filesProcessed: 0,
318
+ imagesDownloaded: 0,
319
+ imagesSkipped: 0,
320
+ errors: [] as string[],
321
+ failedImageUrls: [] as string[],
322
+ };
323
+
324
+ console.log("📸 Downloading media assets and updating references...");
325
+
326
+ // Get all project files once
327
+ let allProjectFiles: string[];
328
+ try {
329
+ allProjectFiles = await listFiles();
330
+ } catch (err) {
331
+ console.error(`Failed to list project files: ${err}`);
332
+ result.errors.push(`Failed to list files: ${err}`);
333
+ return result;
334
+ }
335
+
336
+ const allMdFiles = allProjectFiles.filter(f => f.endsWith(".md"));
337
+ console.log(` Found ${allMdFiles.length} markdown files`);
338
+
339
+ // Build UUID→asset path mapping for existing assets
340
+ // This allows us to skip downloads when assets already exist
341
+ const existingAssetsByUuid = new Map<string, string>();
342
+ for (const assetPath of allProjectFiles.filter(f => f.startsWith("assets/"))) {
343
+ const uuid = extractAssetUuid(assetPath);
344
+ if (uuid) {
345
+ existingAssetsByUuid.set(uuid, assetPath);
346
+ }
347
+ }
348
+ console.log(` Found ${existingAssetsByUuid.size} existing assets`);
349
+
350
+ const { parseFrontmatter, regenerateFrontmatter } = await import("./converter");
351
+
352
+ // ========================================================================
353
+ // Phase 1: Scan files to find those with remote media
354
+ // ========================================================================
355
+
356
+ const filesToProcess: FileState[] = [];
357
+
358
+ for (const filePath of allMdFiles) {
359
+ try {
360
+ const content = await readFile(filePath);
361
+ const parsed = parseFrontmatter(content);
362
+ if (!parsed) continue;
363
+
364
+ const mediaUrls: MediaUrl[] = [];
365
+
366
+ // Extract body media
367
+ const bodyMedia = extractRemoteImageUrls(parsed.body);
368
+ for (const media of bodyMedia) {
369
+ mediaUrls.push({
370
+ url: media.url,
371
+ uuid: extractAssetUuid(media.url),
372
+ inBody: true,
373
+ inCover: false,
374
+ });
375
+ }
376
+
377
+ // Extract cover media
378
+ const cover = parsed.frontmatter.cover;
379
+ if (typeof cover === "string" && (cover.startsWith("http://") || cover.startsWith("https://"))) {
380
+ mediaUrls.push({
381
+ url: cover,
382
+ uuid: extractAssetUuid(cover),
383
+ inBody: false,
384
+ inCover: true,
385
+ });
386
+ }
387
+
388
+ // Skip files with no remote media
389
+ if (mediaUrls.length === 0) continue;
390
+
391
+ filesToProcess.push({
392
+ path: filePath,
393
+ frontmatter: parsed.frontmatter,
394
+ body: parsed.body,
395
+ mediaUrls,
396
+ });
397
+ } catch {
398
+ // Skip files that can't be read
399
+ }
400
+ }
401
+
402
+ console.log(` Found ${filesToProcess.length} files with remote media`);
403
+
404
+ if (filesToProcess.length === 0) {
405
+ return result;
406
+ }
407
+
408
+ // Count total unique URLs (for progress reporting)
409
+ const allUuids = new Set<string>();
410
+ let totalUrls = 0;
411
+ for (const file of filesToProcess) {
412
+ for (const media of file.mediaUrls) {
413
+ if (media.uuid) {
414
+ if (!allUuids.has(media.uuid)) {
415
+ allUuids.add(media.uuid);
416
+ totalUrls++;
417
+ }
418
+ } else {
419
+ totalUrls++;
420
+ }
421
+ }
422
+ }
423
+ console.log(` Total unique media URLs: ${totalUrls}`);
424
+
425
+ // ========================================================================
426
+ // Phase 2: Download all images in parallel (Rust handles concurrency)
427
+ // ========================================================================
428
+
429
+ // Collect all unique media that needs downloading (not already in existing assets)
430
+ const mediaToDownload: { url: string; uuid: string | null }[] = [];
431
+ const seenUuids = new Set<string>();
432
+
433
+ for (const file of filesToProcess) {
434
+ for (const media of file.mediaUrls) {
435
+ // Skip if already downloaded in this batch
436
+ if (media.uuid && seenUuids.has(media.uuid)) continue;
437
+
438
+ // Skip if asset already exists
439
+ if (media.uuid && existingAssetsByUuid.has(media.uuid)) {
440
+ result.imagesSkipped++;
441
+ continue;
442
+ }
443
+
444
+ mediaToDownload.push({ url: media.url, uuid: media.uuid });
445
+ if (media.uuid) seenUuids.add(media.uuid);
446
+ }
447
+ }
448
+
449
+ console.log(` Downloading ${mediaToDownload.length} media files (${result.imagesSkipped} skipped)...`);
450
+
451
+ // Fire all downloads in parallel - Rust Semaphore limits to 5 concurrent
452
+ // Promise.allSettled ensures we get results for all, even if some fail
453
+ let completedCount = 0;
454
+ const downloadPromises = mediaToDownload.map(async (media) => {
455
+ const downloadResult = await downloadAssetWithRetry(media.url);
456
+
457
+ // Report progress as each download COMPLETES. Count completions (single-
458
+ // threaded `++` in the continuation is atomic), NOT the creation index —
459
+ // downloads finish out of order, so an index-based fraction would jump the
460
+ // hairline forward then back. media download is the heaviest phase
461
+ // (weight 35/100); feeding the unified task here keeps the hairline
462
+ // advancing monotonically instead of stalling through it.
463
+ completedCount++;
464
+ onProgress?.(
465
+ "downloading_media",
466
+ overallProgress("downloading_media", completedCount, mediaToDownload.length),
467
+ 100,
468
+ `Downloading ${completedCount}/${mediaToDownload.length}...`
469
+ );
470
+
471
+ return { media, downloadResult };
472
+ });
473
+
474
+ const downloadResults = await Promise.allSettled(downloadPromises);
475
+
476
+ // Build uuid → localPath map from successful downloads. Also key by the
477
+ // literal URL so legacy non-UUID assets (no UUID to key on) can still be
478
+ // localized in Phase 3 (B6).
479
+ const downloadedUuids = new Map<string, string>();
480
+ const downloadedByUrl = new Map<string, string>();
481
+
482
+ for (const settled of downloadResults) {
483
+ if (settled.status === "fulfilled") {
484
+ const { media, downloadResult } = settled.value;
485
+ if (downloadResult.success) {
486
+ result.imagesDownloaded++;
487
+ downloadedByUrl.set(media.url, downloadResult.actualPath);
488
+ // Track by UUID for dedup and reference updates
489
+ if (media.uuid) {
490
+ downloadedUuids.set(media.uuid, downloadResult.actualPath);
491
+ existingAssetsByUuid.set(media.uuid, downloadResult.actualPath);
492
+ }
493
+ } else {
494
+ // Surface the failure as a user-visible diagnostic (not just a count +
495
+ // a console line). A failed download leaves the dead CDN URL in the
496
+ // body, so the user needs to know which image broke (B6). Non-fatal:
497
+ // sync continues, partial success is allowed.
498
+ const msg = `Image download failed (${downloadResult.error}): ${media.url}`;
499
+ result.errors.push(`${media.url}: ${downloadResult.error}`);
500
+ result.failedImageUrls.push(media.url);
501
+ await reportError(msg, "downloading_media", false);
502
+ }
503
+ } else {
504
+ // Promise rejected (shouldn't happen with our try/catch in
505
+ // downloadAssetWithRetry). No `media.url` is available here, so this stays
506
+ // in `errors` only — it is intentionally NOT pushed to `failedImageUrls`,
507
+ // whose entries must be real image URLs for the per-image advisory.
508
+ const msg = `Image download failed: ${settled.reason}`;
509
+ result.errors.push(`Download failed: ${settled.reason}`);
510
+ await reportError(msg, "downloading_media", false);
511
+ }
512
+ }
513
+
514
+ console.log(` Downloaded ${result.imagesDownloaded}/${mediaToDownload.length} media files`);
515
+
516
+ // ========================================================================
517
+ // Phase 3: Update references in files
518
+ // ========================================================================
519
+
520
+ for (let fileIndex = 0; fileIndex < filesToProcess.length; fileIndex++) {
521
+ const file = filesToProcess[fileIndex];
522
+ let modified = false;
523
+ let { frontmatter, body } = file;
524
+
525
+ // Deduplicate URLs within this file by UUID, merging inBody/inCover flags
526
+ const mediaByKey = new Map<string, MediaUrl>();
527
+ for (const media of file.mediaUrls) {
528
+ const key = media.uuid || media.url;
529
+ const existing = mediaByKey.get(key);
530
+ if (existing) {
531
+ existing.inBody = existing.inBody || media.inBody;
532
+ existing.inCover = existing.inCover || media.inCover;
533
+ } else {
534
+ mediaByKey.set(key, { ...media });
535
+ }
536
+ }
537
+ const uniqueMedia = Array.from(mediaByKey.values());
538
+
539
+ // Update references for each media
540
+ for (const media of uniqueMedia) {
541
+ // Resolve the downloaded/existing local path. UUID assets key on UUID;
542
+ // legacy non-UUID CDN assets (no UUID segment) key on the literal URL
543
+ // (B6) — previously these were skipped (`if (!media.uuid) continue;`)
544
+ // and their dead CDN URL leaked into the published body.
545
+ const localPath = media.uuid
546
+ ? (downloadedUuids.get(media.uuid) || existingAssetsByUuid.get(media.uuid))
547
+ : downloadedByUrl.get(media.url);
548
+ if (!localPath) continue;
549
+
550
+ // Emit a filename-only wikilink (B2/B8): depth-independent, resolved by
551
+ // moss's shared filename-stem asset resolver from any article depth — no
552
+ // `../` chains. The basename carries the real extension. Replaces the
553
+ // prior depth-dependent `calculateRelativePath` + URL-substring rewrite.
554
+ const filename = localPath.split('/').pop() || localPath;
555
+
556
+ // Update body references → `![[filename]]`. UUID assets match any CDN URL
557
+ // carrying the UUID; non-UUID assets match the exact URL.
558
+ if (media.inBody) {
559
+ const { content: newBody, replaced } = media.uuid
560
+ ? replaceImageWithWikilink(body, media.uuid, filename)
561
+ : replaceImageUrlWithWikilink(body, media.url, filename);
562
+ if (replaced) {
563
+ body = newBody;
564
+ modified = true;
565
+ }
566
+ }
567
+
568
+ // Update cover reference → bare filename (frontmatter; resolver finds it).
569
+ if (media.inCover) {
570
+ const coverStr = String(frontmatter.cover || '');
571
+ const coverMatches = media.uuid
572
+ ? coverStr.includes(media.uuid)
573
+ : coverStr === media.url;
574
+ if (coverMatches) {
575
+ frontmatter = { ...frontmatter, cover: filename };
576
+ modified = true;
577
+ }
578
+ }
579
+ }
580
+
581
+ // Write file if modified
582
+ if (modified) {
583
+ try {
584
+ const newContent = regenerateFrontmatter(frontmatter) + "\n" + body;
585
+ await writeFile(file.path, newContent);
586
+ result.filesProcessed++;
587
+ console.log(` [📝] Wrote: ${file.path}`);
588
+ } catch (err) {
589
+ result.errors.push(`Failed to write ${file.path}: ${err}`);
590
+ console.error(` [✗] Failed to write: ${file.path} - ${err}`);
591
+ }
592
+ }
593
+ }
594
+
595
+ // Final report. Snap the band to 100% using `totalUrls` (ALL unique media,
596
+ // including already-cached ones) — NOT `mediaToDownload.length`, which is 0
597
+ // when everything was cached and would jump progress BACK to the band start.
598
+ onProgress?.(
599
+ "downloading_media",
600
+ overallProgress("downloading_media", totalUrls, totalUrls),
601
+ 100,
602
+ `Downloaded ${result.imagesDownloaded} media, updated ${result.filesProcessed} files`
603
+ );
604
+
605
+ console.log(` ✅ Downloaded ${result.imagesDownloaded}, skipped ${result.imagesSkipped}, updated ${result.filesProcessed} files`);
606
+
607
+ return result;
608
+ }
609
+
610
+ // ============================================================================
611
+ // Internal Link Rewriting
612
+ // ============================================================================
613
+
614
+ /**
615
+ * Check if a URL points to current user's Matters content
616
+ */
617
+ function isInternalMattersLink(url: string, userName: string): boolean {
618
+ return isDomainInternalLink(url, userName);
619
+ }
620
+
621
+ /**
622
+ * Rewrite internal Matters links to local paths in a single file's content
623
+ */
624
+ function rewriteLinksInContent(
625
+ content: string,
626
+ articlePathMap: Map<string, string>,
627
+ userName: string,
628
+ currentFilePath: string
629
+ ): { content: string; linksRewritten: number } {
630
+ const links = extractMarkdownLinks(content);
631
+ let modifiedContent = content;
632
+ let linksRewritten = 0;
633
+
634
+ for (const { url, fullMatch } of links) {
635
+ // Only own-user canonical `/@userName/...` links are rewritten. `/a/<shortHash>`
636
+ // short-links never pass this guard, so the shared extractShortHash's short-link
637
+ // support is intentionally unreachable here — body cross-links are canonical form.
638
+ if (!isInternalMattersLink(url, userName)) continue;
639
+
640
+ // Try exact URL match first
641
+ let localPath = articlePathMap.get(url);
642
+
643
+ // If not found, try shortHash match
644
+ if (!localPath) {
645
+ const shortHash = extractShortHash(url);
646
+ if (shortHash) {
647
+ localPath = articlePathMap.get(shortHash);
648
+ }
649
+ }
650
+
651
+ if (localPath) {
652
+ // Calculate relative path from current file to target file
653
+ const relativePath = calculateRelativePath(currentFilePath, localPath);
654
+ const newLink = fullMatch.replace(url, relativePath);
655
+ modifiedContent = modifiedContent.replace(fullMatch, newLink);
656
+ linksRewritten++;
657
+ }
658
+ }
659
+
660
+ return { content: modifiedContent, linksRewritten };
661
+ }
662
+
663
+ /**
664
+ * Calculate relative path from one file to another
665
+ * e.g., from "article/collection/post.md" to "article/other.md" → "../other.md"
666
+ */
667
+ export function calculateRelativePath(fromPath: string, toPath: string): string {
668
+ const fromParts = fromPath.split("/").slice(0, -1); // Remove filename, keep directory
669
+ const toParts = toPath.split("/");
670
+
671
+ // Find common prefix
672
+ let commonLength = 0;
673
+ while (
674
+ commonLength < fromParts.length &&
675
+ commonLength < toParts.length - 1 &&
676
+ fromParts[commonLength] === toParts[commonLength]
677
+ ) {
678
+ commonLength++;
679
+ }
680
+
681
+ // Build relative path
682
+ const upCount = fromParts.length - commonLength;
683
+ const upPath = "../".repeat(upCount);
684
+ const downPath = toParts.slice(commonLength).join("/");
685
+
686
+ return upPath + downPath || toPath;
687
+ }
688
+
689
+ /**
690
+ * Rewrite internal Matters links to local paths across all markdown files
691
+ * This is a fast operation (string manipulation only, no network I/O)
692
+ *
693
+ * Should be run AFTER downloadMediaAndUpdate() to avoid overwriting image refs.
694
+ */
695
+ export async function rewriteAllInternalLinks(
696
+ articlePathMap: Map<string, string>,
697
+ userName: string
698
+ ): Promise<{
699
+ filesProcessed: number;
700
+ linksRewritten: number;
701
+ errors: string[];
702
+ }> {
703
+ const result = {
704
+ filesProcessed: 0,
705
+ linksRewritten: 0,
706
+ errors: [] as string[],
707
+ };
708
+
709
+ if (articlePathMap.size === 0) {
710
+ console.log("🔗 No articles to rewrite links for");
711
+ return result;
712
+ }
713
+
714
+ console.log("🔗 Rewriting internal Matters links...");
715
+
716
+ let allFiles: string[];
717
+ try {
718
+ const allProjectFiles = await listFiles();
719
+ allFiles = allProjectFiles.filter((f: string) => f.endsWith(".md"));
720
+ } catch (err) {
721
+ console.error(`Failed to list project files: ${err}`);
722
+ result.errors.push(`Failed to list files: ${err}`);
723
+ return result;
724
+ }
725
+
726
+ console.log(` Scanning ${allFiles.length} markdown files for internal links...`);
727
+
728
+ // Import parseFrontmatter dynamically to avoid circular dependency
729
+ const { parseFrontmatter, regenerateFrontmatter } = await import("./converter");
730
+
731
+ for (const file of allFiles) {
732
+ try {
733
+ const content = await readFile(file);
734
+
735
+ const parsed = parseFrontmatter(content);
736
+ if (!parsed) continue;
737
+
738
+ const { content: modifiedBody, linksRewritten } = rewriteLinksInContent(
739
+ parsed.body,
740
+ articlePathMap,
741
+ userName,
742
+ file
743
+ );
744
+
745
+ if (linksRewritten > 0) {
746
+ const newContent = regenerateFrontmatter(parsed.frontmatter) + "\n" + modifiedBody;
747
+
748
+ await writeFile(file, newContent);
749
+
750
+ result.filesProcessed++;
751
+ result.linksRewritten += linksRewritten;
752
+ }
753
+ } catch (err) {
754
+ result.errors.push(`Failed to process ${file}: ${err}`);
755
+ }
756
+ }
757
+
758
+ console.log(` Rewrote ${result.linksRewritten} links in ${result.filesProcessed} files`);
759
+
760
+ return result;
761
+ }