@symbiosis-lab/moss-plugin-matters 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +88 -0
- package/README.md +18 -0
- package/assets/icon.svg +1 -0
- package/assets/manifest.json +36 -0
- package/codegen.ts +26 -0
- package/e2e/moss-cli.test.ts +338 -0
- package/features/api/fetch-articles.feature +39 -0
- package/features/auth/wallet-auth.feature +27 -0
- package/features/download/retry-logic.feature +36 -0
- package/features/download/self-correcting.feature +83 -0
- package/features/download/worker-pool.feature +29 -0
- package/features/social/fetch-social-data.feature +40 -0
- package/features/steps/api.steps.ts +180 -0
- package/features/steps/download.steps.ts +423 -0
- package/features/steps/incremental-sync.steps.ts +105 -0
- package/features/steps/self-correcting.steps.ts +575 -0
- package/features/steps/social.steps.ts +257 -0
- package/features/steps/syndication.steps.ts +264 -0
- package/features/steps/wallet-auth.steps.ts +185 -0
- package/features/sync/article-sync.feature +49 -0
- package/features/sync/homepage-grid.feature +43 -0
- package/features/sync/incremental-sync.feature +28 -0
- package/features/syndication/create-draft.feature +35 -0
- package/package.json +58 -0
- package/src/__generated__/schema.graphql +4289 -0
- package/src/__generated__/types.ts +5355 -0
- package/src/__tests__/api.test.ts +678 -0
- package/src/__tests__/auth-route.test.ts +38 -0
- package/src/__tests__/auth-routing.test.ts +462 -0
- package/src/__tests__/auto-detect.test.ts +412 -0
- package/src/__tests__/binding-guard.test.ts +256 -0
- package/src/__tests__/config.test.ts +212 -0
- package/src/__tests__/converter.test.ts +289 -0
- package/src/__tests__/credential.test.ts +332 -0
- package/src/__tests__/domain.test.ts +341 -0
- package/src/__tests__/downloader.test.ts +679 -0
- package/src/__tests__/folder-detection.test.ts +289 -0
- package/src/__tests__/force-fresh-login.test.ts +236 -0
- package/src/__tests__/main.test.ts +2437 -0
- package/src/__tests__/progress.test.ts +93 -0
- package/src/__tests__/session.test.ts +375 -0
- package/src/__tests__/social-integration.test.ts +386 -0
- package/src/__tests__/social-sync-logic.test.ts +107 -0
- package/src/__tests__/social.test.ts +788 -0
- package/src/__tests__/sync.test.ts +1273 -0
- package/src/__tests__/syndication-toast-law.test.ts +649 -0
- package/src/__tests__/syndication.test.ts +125 -0
- package/src/__tests__/test-profile-escape.test.ts +209 -0
- package/src/__tests__/url-detect.test.ts +79 -0
- package/src/__tests__/utils.test.ts +226 -0
- package/src/api.ts +1366 -0
- package/src/auth-route.ts +38 -0
- package/src/config.ts +80 -0
- package/src/converter.ts +305 -0
- package/src/credential.ts +329 -0
- package/src/domain.ts +183 -0
- package/src/downloader.ts +761 -0
- package/src/main.ts +2092 -0
- package/src/progress.ts +89 -0
- package/src/queries/user.graphql +85 -0
- package/src/queries/viewer.graphql +104 -0
- package/src/social.ts +413 -0
- package/src/sync.ts +818 -0
- package/src/types.ts +477 -0
- package/src/url-detect.ts +49 -0
- package/src/utils.ts +305 -0
- package/test-fixtures/syndication-test-site/input/index.md +8 -0
- package/test-fixtures/syndication-test-site/input/posts/rich-test-article.md +90 -0
- package/test-helpers/TEST_ACCOUNT.md +151 -0
- package/test-helpers/api-client.ts +252 -0
- package/test-helpers/fixtures/articles.ts +147 -0
- package/test-helpers/wallet-auth.ts +305 -0
- package/test-setup/e2e.ts +93 -0
- package/tsconfig.json +23 -0
- package/vitest.config.ts +39 -0
|
@@ -0,0 +1,761 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Asset download functionality with incremental file updates
|
|
3
|
+
*
|
|
4
|
+
* DESIGN PRINCIPLE: Write files incrementally, not in a batch at the end.
|
|
5
|
+
*
|
|
6
|
+
* This ensures:
|
|
7
|
+
* 1. If interrupted, completed files are already saved
|
|
8
|
+
* 2. Running again skips already-updated files (self-correcting)
|
|
9
|
+
* 3. No "Phase 3" batch write that can silently fail
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import {
|
|
13
|
+
reportError,
|
|
14
|
+
sleep,
|
|
15
|
+
} from "./utils";
|
|
16
|
+
import { overallProgress, type ProgressReporter } from "./progress";
|
|
17
|
+
import { downloadAsset as downloadAssetRust } from "@symbiosis-lab/moss-api";
|
|
18
|
+
import { extractRemoteImageUrls, extractMarkdownLinks } from "./converter";
|
|
19
|
+
import { isInternalMattersLink as isDomainInternalLink, extractShortHash } from "./domain";
|
|
20
|
+
import { listFiles, readFile, writeFile } from "@symbiosis-lab/moss-api";
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// Constants
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
26
|
+
const MAX_RETRIES = 3;
|
|
27
|
+
// Note: Concurrency is now handled by Rust-side Semaphore (DOWNLOAD_CONCURRENCY_LIMIT=5)
|
|
28
|
+
// Timeout is handled by Rust-side tokio::time::timeout (default 30s)
|
|
29
|
+
|
|
30
|
+
// ============================================================================
|
|
31
|
+
// Pure Helper Functions (exported for testing)
|
|
32
|
+
// ============================================================================
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Extract UUID from a URL (Matters asset IDs are UUIDs)
|
|
36
|
+
* Handles URLs from both assets.matters.news and imagedelivery.net
|
|
37
|
+
*/
|
|
38
|
+
export function extractAssetUuid(url: string): string | null {
|
|
39
|
+
const match = url.match(/([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})/i);
|
|
40
|
+
return match ? match[1] : null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Escape special regex characters in a string
|
|
45
|
+
*/
|
|
46
|
+
export function escapeRegex(str: string): string {
|
|
47
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build a regex pattern that matches any URL containing the given asset ID
|
|
52
|
+
* Used for updating references when the same asset has multiple CDN URLs
|
|
53
|
+
*/
|
|
54
|
+
export function buildAssetUrlPattern(assetId: string): RegExp {
|
|
55
|
+
return new RegExp(
|
|
56
|
+
`https?://[^)\\s"]*${escapeRegex(assetId)}[^)\\s"]*`,
|
|
57
|
+
'g'
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Replace all URLs containing the asset ID with a local path
|
|
63
|
+
* Returns the modified content and whether any replacements were made
|
|
64
|
+
*/
|
|
65
|
+
export function replaceAssetUrls(
|
|
66
|
+
content: string,
|
|
67
|
+
assetId: string,
|
|
68
|
+
localPath: string
|
|
69
|
+
): { content: string; replaced: boolean } {
|
|
70
|
+
const pattern = buildAssetUrlPattern(assetId);
|
|
71
|
+
const hasMatch = pattern.test(content);
|
|
72
|
+
|
|
73
|
+
if (!hasMatch) {
|
|
74
|
+
return { content, replaced: false };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Reset regex lastIndex after test() call
|
|
78
|
+
pattern.lastIndex = 0;
|
|
79
|
+
const newContent = content.replace(pattern, localPath);
|
|
80
|
+
return { content: newContent, replaced: true };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Replace a full markdown image token `` whose URL contains the
|
|
85
|
+
* asset id with a filename-only wikilink `![[filename]]` (B2).
|
|
86
|
+
*
|
|
87
|
+
* Unlike `replaceAssetUrls` — which swaps only the URL substring and leaves the
|
|
88
|
+
* `` wrapper plus a depth-dependent relative path (`../assets/…` vs
|
|
89
|
+
* `../../assets/…`) — this replaces the ENTIRE image token so moss's shared
|
|
90
|
+
* filename-stem asset resolver (`resolve::asset_class::resolve_asset_ref`)
|
|
91
|
+
* resolves it from ANY article depth with no `../` chains. The basename carries
|
|
92
|
+
* the real extension, so the extensionless-ref bug (B8) disappears too. Alt
|
|
93
|
+
* text is dropped to match moss/Obsidian `![[file]]` embed syntax.
|
|
94
|
+
*/
|
|
95
|
+
export function replaceImageWithWikilink(
|
|
96
|
+
content: string,
|
|
97
|
+
assetId: string,
|
|
98
|
+
filename: string
|
|
99
|
+
): { content: string; replaced: boolean } {
|
|
100
|
+
// `!\[[^\]]*\]` = the `![alt]` part (alt may be empty); then `(url[ "title"])`
|
|
101
|
+
// where the url contains the asset id. The optional ` "title"` trailer matches
|
|
102
|
+
// htmd's `` output for <img title=...> (else the CDN URL
|
|
103
|
+
// would be left in the body — an orphaned-asset / broken-image leak).
|
|
104
|
+
const pattern = new RegExp(
|
|
105
|
+
`!\\[[^\\]]*\\]\\(https?://[^)\\s"]*${escapeRegex(assetId)}[^)\\s"]*(?:\\s+"[^"]*")?\\)`,
|
|
106
|
+
'g'
|
|
107
|
+
);
|
|
108
|
+
if (!pattern.test(content)) {
|
|
109
|
+
return { content, replaced: false };
|
|
110
|
+
}
|
|
111
|
+
pattern.lastIndex = 0;
|
|
112
|
+
const newContent = content.replace(pattern, `![[${filename}]]`);
|
|
113
|
+
return { content: newContent, replaced: true };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Replace a full markdown image token whose URL is the EXACT given URL with a
|
|
118
|
+
* filename-only wikilink `![[filename]]` (B6 — legacy non-UUID CDN assets).
|
|
119
|
+
*
|
|
120
|
+
* `replaceImageWithWikilink` keys on a Matters asset UUID; legacy cloudfront
|
|
121
|
+
* images (e.g. `assets.matters.news/.../image.jpg` with no UUID segment) have
|
|
122
|
+
* no UUID to key on, so their references were never rewritten — the dead remote
|
|
123
|
+
* CDN URL leaked into the published body. This matches on the literal URL
|
|
124
|
+
* instead, so a downloaded legacy asset still localizes. The optional ` "title"`
|
|
125
|
+
* trailer matches htmd's `` output.
|
|
126
|
+
*/
|
|
127
|
+
export function replaceImageUrlWithWikilink(
|
|
128
|
+
content: string,
|
|
129
|
+
url: string,
|
|
130
|
+
filename: string
|
|
131
|
+
): { content: string; replaced: boolean } {
|
|
132
|
+
const pattern = new RegExp(
|
|
133
|
+
`!\\[[^\\]]*\\]\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`,
|
|
134
|
+
'g'
|
|
135
|
+
);
|
|
136
|
+
if (!pattern.test(content)) {
|
|
137
|
+
return { content, replaced: false };
|
|
138
|
+
}
|
|
139
|
+
pattern.lastIndex = 0;
|
|
140
|
+
const newContent = content.replace(pattern, `![[${filename}]]`);
|
|
141
|
+
return { content: newContent, replaced: true };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ============================================================================
|
|
145
|
+
// Fibonacci Backoff
|
|
146
|
+
// ============================================================================
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Get delay for retry attempt using Fibonacci sequence
|
|
150
|
+
* Returns delay in milliseconds: 1000, 1000, 2000, 3000, 5000, 8000, 13000, 21000...
|
|
151
|
+
*/
|
|
152
|
+
function getFibonacciDelay(attempt: number): number {
|
|
153
|
+
if (attempt <= 2) return 1000;
|
|
154
|
+
let a = 1, b = 1;
|
|
155
|
+
for (let i = 2; i < attempt; i++) {
|
|
156
|
+
[a, b] = [b, a + b];
|
|
157
|
+
}
|
|
158
|
+
return b * 1000;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Check if an HTTP status code is retryable (transient error)
|
|
163
|
+
* 408 = Request Timeout, 429 = Too Many Requests, 5xx = Server errors
|
|
164
|
+
*/
|
|
165
|
+
function isRetryableHttpStatus(status: number): boolean {
|
|
166
|
+
return status === 408 || status === 429 || (status >= 500 && status < 600);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ============================================================================
|
|
170
|
+
// Asset Download
|
|
171
|
+
// ============================================================================
|
|
172
|
+
|
|
173
|
+
/** Error with HTTP status for retry classification */
|
|
174
|
+
class DownloadError extends Error {
|
|
175
|
+
constructor(message: string, public readonly httpStatus?: number) {
|
|
176
|
+
super(message);
|
|
177
|
+
this.name = "DownloadError";
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** Check if this error is retryable (transient) */
|
|
181
|
+
isRetryable(): boolean {
|
|
182
|
+
// Network errors (no status) are retryable
|
|
183
|
+
if (this.httpStatus === undefined) return true;
|
|
184
|
+
return isRetryableHttpStatus(this.httpStatus);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Download a single asset with retry logic and comprehensive logging.
|
|
190
|
+
* Uses Rust to download and save directly to disk (avoids JS base64 blocking).
|
|
191
|
+
* moss handles filename derivation and extension from content-type.
|
|
192
|
+
*
|
|
193
|
+
* Timeout and concurrency are handled by Rust side:
|
|
194
|
+
* - Semaphore limits concurrent downloads to 5
|
|
195
|
+
* - tokio::time::timeout enforces 30s cumulative timeout
|
|
196
|
+
*
|
|
197
|
+
* Logging:
|
|
198
|
+
* - [↓] Attempt N/M: Starting download attempt
|
|
199
|
+
* - [✓] Downloaded: Successful download
|
|
200
|
+
* - [!] HTTP {status}: HTTP error (retryable or final)
|
|
201
|
+
* - [✗] TIMEOUT: Download timeout from Rust
|
|
202
|
+
* - [✗] ERROR: Other errors (network, etc.)
|
|
203
|
+
* - [↻] Retrying: Retry announcement with delay
|
|
204
|
+
* - [✗] FAILED: Final failure after all retries
|
|
205
|
+
*/
|
|
206
|
+
async function downloadAssetWithRetry(
|
|
207
|
+
url: string
|
|
208
|
+
): Promise<{ actualPath: string; success: boolean; error?: string }> {
|
|
209
|
+
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
210
|
+
try {
|
|
211
|
+
console.log(` [↓] Attempt ${attempt}/${MAX_RETRIES}: ${url}`);
|
|
212
|
+
|
|
213
|
+
// Rust handles timeout (30s) and concurrency (5 parallel)
|
|
214
|
+
const result = await downloadAssetRust(url, "assets");
|
|
215
|
+
|
|
216
|
+
if (!result.ok) {
|
|
217
|
+
const err = new DownloadError(`HTTP ${result.status}`, result.status);
|
|
218
|
+
console.warn(` [!] HTTP ${result.status} for ${url}`);
|
|
219
|
+
|
|
220
|
+
if (!err.isRetryable() || attempt === MAX_RETRIES) {
|
|
221
|
+
console.error(` [✗] FAILED after ${attempt} attempts: ${url} - HTTP ${result.status}`);
|
|
222
|
+
return { actualPath: "", success: false, error: `HTTP ${result.status}` };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const delay = getFibonacciDelay(attempt);
|
|
226
|
+
console.warn(` [↻] Retrying in ${delay}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
|
|
227
|
+
await sleep(delay);
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
console.log(` [✓] Downloaded: ${result.actualPath}`);
|
|
232
|
+
return { actualPath: result.actualPath, success: true };
|
|
233
|
+
|
|
234
|
+
} catch (fetchError: unknown) {
|
|
235
|
+
const message = fetchError instanceof Error ? fetchError.message : String(fetchError);
|
|
236
|
+
const isTimeout = message.toLowerCase().includes("timeout");
|
|
237
|
+
|
|
238
|
+
// Log the error type
|
|
239
|
+
if (isTimeout) {
|
|
240
|
+
console.error(` [✗] TIMEOUT: ${url} - ${message}`);
|
|
241
|
+
} else {
|
|
242
|
+
console.error(` [✗] ERROR: ${url} - ${message}`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
if (attempt === MAX_RETRIES) {
|
|
246
|
+
console.error(` [✗] FAILED after ${MAX_RETRIES} attempts: ${url}`);
|
|
247
|
+
return { actualPath: "", success: false, error: message };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const delay = getFibonacciDelay(attempt);
|
|
251
|
+
console.warn(` [↻] Retrying in ${delay}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
|
|
252
|
+
await sleep(delay);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
console.error(` [✗] FAILED after ${MAX_RETRIES} attempts: ${url}`);
|
|
257
|
+
return { actualPath: "", success: false, error: "Max retries exceeded" };
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// ============================================================================
|
|
261
|
+
// Types
|
|
262
|
+
// ============================================================================
|
|
263
|
+
|
|
264
|
+
/** Media URL found in a file */
|
|
265
|
+
interface MediaUrl {
|
|
266
|
+
url: string;
|
|
267
|
+
uuid: string | null;
|
|
268
|
+
inBody: boolean;
|
|
269
|
+
inCover: boolean;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/** File state for processing */
|
|
273
|
+
interface FileState {
|
|
274
|
+
path: string;
|
|
275
|
+
frontmatter: Record<string, unknown>;
|
|
276
|
+
body: string;
|
|
277
|
+
mediaUrls: MediaUrl[];
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// ============================================================================
|
|
281
|
+
// Main Function: downloadMediaAndUpdate
|
|
282
|
+
// ============================================================================
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Download all media for all markdown files in a project and update references.
|
|
286
|
+
*
|
|
287
|
+
* DESIGN: Fire all downloads in parallel, let Rust handle concurrency.
|
|
288
|
+
*
|
|
289
|
+
* 1. **Parallel downloads**: All downloads start immediately via Promise.allSettled
|
|
290
|
+
* 2. **Rust-side concurrency**: Semaphore limits to 5 concurrent downloads
|
|
291
|
+
* 3. **Rust-side timeout**: tokio::time::timeout enforces 30s cumulative timeout
|
|
292
|
+
* 4. **Self-correcting**: Running again skips already-downloaded assets (by UUID)
|
|
293
|
+
*
|
|
294
|
+
* Flow:
|
|
295
|
+
* 1. Scan all files to collect unique media URLs needing download
|
|
296
|
+
* 2. Fire all downloads in parallel (Rust handles concurrency/timeout)
|
|
297
|
+
* 3. After all complete, update references in each file
|
|
298
|
+
* 4. Write modified files to disk
|
|
299
|
+
*/
|
|
300
|
+
export async function downloadMediaAndUpdate(
|
|
301
|
+
onProgress?: ProgressReporter,
|
|
302
|
+
): Promise<{
|
|
303
|
+
filesProcessed: number;
|
|
304
|
+
imagesDownloaded: number;
|
|
305
|
+
imagesSkipped: number;
|
|
306
|
+
errors: string[];
|
|
307
|
+
/**
|
|
308
|
+
* Source URLs of images that failed to download (the dead CDN references
|
|
309
|
+
* still sitting in the article bodies). The caller turns each into a
|
|
310
|
+
* per-image advisory so the user sees WHICH image broke — not an opaque
|
|
311
|
+
* "N failed" count. A subset of `errors` carrying just the image-download
|
|
312
|
+
* failures (not list/write failures).
|
|
313
|
+
*/
|
|
314
|
+
failedImageUrls: string[];
|
|
315
|
+
}> {
|
|
316
|
+
const result = {
|
|
317
|
+
filesProcessed: 0,
|
|
318
|
+
imagesDownloaded: 0,
|
|
319
|
+
imagesSkipped: 0,
|
|
320
|
+
errors: [] as string[],
|
|
321
|
+
failedImageUrls: [] as string[],
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
console.log("📸 Downloading media assets and updating references...");
|
|
325
|
+
|
|
326
|
+
// Get all project files once
|
|
327
|
+
let allProjectFiles: string[];
|
|
328
|
+
try {
|
|
329
|
+
allProjectFiles = await listFiles();
|
|
330
|
+
} catch (err) {
|
|
331
|
+
console.error(`Failed to list project files: ${err}`);
|
|
332
|
+
result.errors.push(`Failed to list files: ${err}`);
|
|
333
|
+
return result;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const allMdFiles = allProjectFiles.filter(f => f.endsWith(".md"));
|
|
337
|
+
console.log(` Found ${allMdFiles.length} markdown files`);
|
|
338
|
+
|
|
339
|
+
// Build UUID→asset path mapping for existing assets
|
|
340
|
+
// This allows us to skip downloads when assets already exist
|
|
341
|
+
const existingAssetsByUuid = new Map<string, string>();
|
|
342
|
+
for (const assetPath of allProjectFiles.filter(f => f.startsWith("assets/"))) {
|
|
343
|
+
const uuid = extractAssetUuid(assetPath);
|
|
344
|
+
if (uuid) {
|
|
345
|
+
existingAssetsByUuid.set(uuid, assetPath);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
console.log(` Found ${existingAssetsByUuid.size} existing assets`);
|
|
349
|
+
|
|
350
|
+
const { parseFrontmatter, regenerateFrontmatter } = await import("./converter");
|
|
351
|
+
|
|
352
|
+
// ========================================================================
|
|
353
|
+
// Phase 1: Scan files to find those with remote media
|
|
354
|
+
// ========================================================================
|
|
355
|
+
|
|
356
|
+
const filesToProcess: FileState[] = [];
|
|
357
|
+
|
|
358
|
+
for (const filePath of allMdFiles) {
|
|
359
|
+
try {
|
|
360
|
+
const content = await readFile(filePath);
|
|
361
|
+
const parsed = parseFrontmatter(content);
|
|
362
|
+
if (!parsed) continue;
|
|
363
|
+
|
|
364
|
+
const mediaUrls: MediaUrl[] = [];
|
|
365
|
+
|
|
366
|
+
// Extract body media
|
|
367
|
+
const bodyMedia = extractRemoteImageUrls(parsed.body);
|
|
368
|
+
for (const media of bodyMedia) {
|
|
369
|
+
mediaUrls.push({
|
|
370
|
+
url: media.url,
|
|
371
|
+
uuid: extractAssetUuid(media.url),
|
|
372
|
+
inBody: true,
|
|
373
|
+
inCover: false,
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Extract cover media
|
|
378
|
+
const cover = parsed.frontmatter.cover;
|
|
379
|
+
if (typeof cover === "string" && (cover.startsWith("http://") || cover.startsWith("https://"))) {
|
|
380
|
+
mediaUrls.push({
|
|
381
|
+
url: cover,
|
|
382
|
+
uuid: extractAssetUuid(cover),
|
|
383
|
+
inBody: false,
|
|
384
|
+
inCover: true,
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Skip files with no remote media
|
|
389
|
+
if (mediaUrls.length === 0) continue;
|
|
390
|
+
|
|
391
|
+
filesToProcess.push({
|
|
392
|
+
path: filePath,
|
|
393
|
+
frontmatter: parsed.frontmatter,
|
|
394
|
+
body: parsed.body,
|
|
395
|
+
mediaUrls,
|
|
396
|
+
});
|
|
397
|
+
} catch {
|
|
398
|
+
// Skip files that can't be read
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
console.log(` Found ${filesToProcess.length} files with remote media`);
|
|
403
|
+
|
|
404
|
+
if (filesToProcess.length === 0) {
|
|
405
|
+
return result;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Count total unique URLs (for progress reporting)
|
|
409
|
+
const allUuids = new Set<string>();
|
|
410
|
+
let totalUrls = 0;
|
|
411
|
+
for (const file of filesToProcess) {
|
|
412
|
+
for (const media of file.mediaUrls) {
|
|
413
|
+
if (media.uuid) {
|
|
414
|
+
if (!allUuids.has(media.uuid)) {
|
|
415
|
+
allUuids.add(media.uuid);
|
|
416
|
+
totalUrls++;
|
|
417
|
+
}
|
|
418
|
+
} else {
|
|
419
|
+
totalUrls++;
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
console.log(` Total unique media URLs: ${totalUrls}`);
|
|
424
|
+
|
|
425
|
+
// ========================================================================
|
|
426
|
+
// Phase 2: Download all images in parallel (Rust handles concurrency)
|
|
427
|
+
// ========================================================================
|
|
428
|
+
|
|
429
|
+
// Collect all unique media that needs downloading (not already in existing assets)
|
|
430
|
+
const mediaToDownload: { url: string; uuid: string | null }[] = [];
|
|
431
|
+
const seenUuids = new Set<string>();
|
|
432
|
+
|
|
433
|
+
for (const file of filesToProcess) {
|
|
434
|
+
for (const media of file.mediaUrls) {
|
|
435
|
+
// Skip if already downloaded in this batch
|
|
436
|
+
if (media.uuid && seenUuids.has(media.uuid)) continue;
|
|
437
|
+
|
|
438
|
+
// Skip if asset already exists
|
|
439
|
+
if (media.uuid && existingAssetsByUuid.has(media.uuid)) {
|
|
440
|
+
result.imagesSkipped++;
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
mediaToDownload.push({ url: media.url, uuid: media.uuid });
|
|
445
|
+
if (media.uuid) seenUuids.add(media.uuid);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
console.log(` Downloading ${mediaToDownload.length} media files (${result.imagesSkipped} skipped)...`);
|
|
450
|
+
|
|
451
|
+
// Fire all downloads in parallel - Rust Semaphore limits to 5 concurrent
|
|
452
|
+
// Promise.allSettled ensures we get results for all, even if some fail
|
|
453
|
+
let completedCount = 0;
|
|
454
|
+
const downloadPromises = mediaToDownload.map(async (media) => {
|
|
455
|
+
const downloadResult = await downloadAssetWithRetry(media.url);
|
|
456
|
+
|
|
457
|
+
// Report progress as each download COMPLETES. Count completions (single-
|
|
458
|
+
// threaded `++` in the continuation is atomic), NOT the creation index —
|
|
459
|
+
// downloads finish out of order, so an index-based fraction would jump the
|
|
460
|
+
// hairline forward then back. media download is the heaviest phase
|
|
461
|
+
// (weight 35/100); feeding the unified task here keeps the hairline
|
|
462
|
+
// advancing monotonically instead of stalling through it.
|
|
463
|
+
completedCount++;
|
|
464
|
+
onProgress?.(
|
|
465
|
+
"downloading_media",
|
|
466
|
+
overallProgress("downloading_media", completedCount, mediaToDownload.length),
|
|
467
|
+
100,
|
|
468
|
+
`Downloading ${completedCount}/${mediaToDownload.length}...`
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
return { media, downloadResult };
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
const downloadResults = await Promise.allSettled(downloadPromises);
|
|
475
|
+
|
|
476
|
+
// Build uuid → localPath map from successful downloads. Also key by the
|
|
477
|
+
// literal URL so legacy non-UUID assets (no UUID to key on) can still be
|
|
478
|
+
// localized in Phase 3 (B6).
|
|
479
|
+
const downloadedUuids = new Map<string, string>();
|
|
480
|
+
const downloadedByUrl = new Map<string, string>();
|
|
481
|
+
|
|
482
|
+
for (const settled of downloadResults) {
|
|
483
|
+
if (settled.status === "fulfilled") {
|
|
484
|
+
const { media, downloadResult } = settled.value;
|
|
485
|
+
if (downloadResult.success) {
|
|
486
|
+
result.imagesDownloaded++;
|
|
487
|
+
downloadedByUrl.set(media.url, downloadResult.actualPath);
|
|
488
|
+
// Track by UUID for dedup and reference updates
|
|
489
|
+
if (media.uuid) {
|
|
490
|
+
downloadedUuids.set(media.uuid, downloadResult.actualPath);
|
|
491
|
+
existingAssetsByUuid.set(media.uuid, downloadResult.actualPath);
|
|
492
|
+
}
|
|
493
|
+
} else {
|
|
494
|
+
// Surface the failure as a user-visible diagnostic (not just a count +
|
|
495
|
+
// a console line). A failed download leaves the dead CDN URL in the
|
|
496
|
+
// body, so the user needs to know which image broke (B6). Non-fatal:
|
|
497
|
+
// sync continues, partial success is allowed.
|
|
498
|
+
const msg = `Image download failed (${downloadResult.error}): ${media.url}`;
|
|
499
|
+
result.errors.push(`${media.url}: ${downloadResult.error}`);
|
|
500
|
+
result.failedImageUrls.push(media.url);
|
|
501
|
+
await reportError(msg, "downloading_media", false);
|
|
502
|
+
}
|
|
503
|
+
} else {
|
|
504
|
+
// Promise rejected (shouldn't happen with our try/catch in
|
|
505
|
+
// downloadAssetWithRetry). No `media.url` is available here, so this stays
|
|
506
|
+
// in `errors` only — it is intentionally NOT pushed to `failedImageUrls`,
|
|
507
|
+
// whose entries must be real image URLs for the per-image advisory.
|
|
508
|
+
const msg = `Image download failed: ${settled.reason}`;
|
|
509
|
+
result.errors.push(`Download failed: ${settled.reason}`);
|
|
510
|
+
await reportError(msg, "downloading_media", false);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
console.log(` Downloaded ${result.imagesDownloaded}/${mediaToDownload.length} media files`);
|
|
515
|
+
|
|
516
|
+
// ========================================================================
|
|
517
|
+
// Phase 3: Update references in files
|
|
518
|
+
// ========================================================================
|
|
519
|
+
|
|
520
|
+
for (let fileIndex = 0; fileIndex < filesToProcess.length; fileIndex++) {
|
|
521
|
+
const file = filesToProcess[fileIndex];
|
|
522
|
+
let modified = false;
|
|
523
|
+
let { frontmatter, body } = file;
|
|
524
|
+
|
|
525
|
+
// Deduplicate URLs within this file by UUID, merging inBody/inCover flags
|
|
526
|
+
const mediaByKey = new Map<string, MediaUrl>();
|
|
527
|
+
for (const media of file.mediaUrls) {
|
|
528
|
+
const key = media.uuid || media.url;
|
|
529
|
+
const existing = mediaByKey.get(key);
|
|
530
|
+
if (existing) {
|
|
531
|
+
existing.inBody = existing.inBody || media.inBody;
|
|
532
|
+
existing.inCover = existing.inCover || media.inCover;
|
|
533
|
+
} else {
|
|
534
|
+
mediaByKey.set(key, { ...media });
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
const uniqueMedia = Array.from(mediaByKey.values());
|
|
538
|
+
|
|
539
|
+
// Update references for each media
|
|
540
|
+
for (const media of uniqueMedia) {
|
|
541
|
+
// Resolve the downloaded/existing local path. UUID assets key on UUID;
|
|
542
|
+
// legacy non-UUID CDN assets (no UUID segment) key on the literal URL
|
|
543
|
+
// (B6) — previously these were skipped (`if (!media.uuid) continue;`)
|
|
544
|
+
// and their dead CDN URL leaked into the published body.
|
|
545
|
+
const localPath = media.uuid
|
|
546
|
+
? (downloadedUuids.get(media.uuid) || existingAssetsByUuid.get(media.uuid))
|
|
547
|
+
: downloadedByUrl.get(media.url);
|
|
548
|
+
if (!localPath) continue;
|
|
549
|
+
|
|
550
|
+
// Emit a filename-only wikilink (B2/B8): depth-independent, resolved by
|
|
551
|
+
// moss's shared filename-stem asset resolver from any article depth — no
|
|
552
|
+
// `../` chains. The basename carries the real extension. Replaces the
|
|
553
|
+
// prior depth-dependent `calculateRelativePath` + URL-substring rewrite.
|
|
554
|
+
const filename = localPath.split('/').pop() || localPath;
|
|
555
|
+
|
|
556
|
+
// Update body references → `![[filename]]`. UUID assets match any CDN URL
|
|
557
|
+
// carrying the UUID; non-UUID assets match the exact URL.
|
|
558
|
+
if (media.inBody) {
|
|
559
|
+
const { content: newBody, replaced } = media.uuid
|
|
560
|
+
? replaceImageWithWikilink(body, media.uuid, filename)
|
|
561
|
+
: replaceImageUrlWithWikilink(body, media.url, filename);
|
|
562
|
+
if (replaced) {
|
|
563
|
+
body = newBody;
|
|
564
|
+
modified = true;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// Update cover reference → bare filename (frontmatter; resolver finds it).
|
|
569
|
+
if (media.inCover) {
|
|
570
|
+
const coverStr = String(frontmatter.cover || '');
|
|
571
|
+
const coverMatches = media.uuid
|
|
572
|
+
? coverStr.includes(media.uuid)
|
|
573
|
+
: coverStr === media.url;
|
|
574
|
+
if (coverMatches) {
|
|
575
|
+
frontmatter = { ...frontmatter, cover: filename };
|
|
576
|
+
modified = true;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Write file if modified
|
|
582
|
+
if (modified) {
|
|
583
|
+
try {
|
|
584
|
+
const newContent = regenerateFrontmatter(frontmatter) + "\n" + body;
|
|
585
|
+
await writeFile(file.path, newContent);
|
|
586
|
+
result.filesProcessed++;
|
|
587
|
+
console.log(` [📝] Wrote: ${file.path}`);
|
|
588
|
+
} catch (err) {
|
|
589
|
+
result.errors.push(`Failed to write ${file.path}: ${err}`);
|
|
590
|
+
console.error(` [✗] Failed to write: ${file.path} - ${err}`);
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Final report. Snap the band to 100% using `totalUrls` (ALL unique media,
|
|
596
|
+
// including already-cached ones) — NOT `mediaToDownload.length`, which is 0
|
|
597
|
+
// when everything was cached and would jump progress BACK to the band start.
|
|
598
|
+
onProgress?.(
|
|
599
|
+
"downloading_media",
|
|
600
|
+
overallProgress("downloading_media", totalUrls, totalUrls),
|
|
601
|
+
100,
|
|
602
|
+
`Downloaded ${result.imagesDownloaded} media, updated ${result.filesProcessed} files`
|
|
603
|
+
);
|
|
604
|
+
|
|
605
|
+
console.log(` ✅ Downloaded ${result.imagesDownloaded}, skipped ${result.imagesSkipped}, updated ${result.filesProcessed} files`);
|
|
606
|
+
|
|
607
|
+
return result;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// ============================================================================
|
|
611
|
+
// Internal Link Rewriting
|
|
612
|
+
// ============================================================================
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Check if a URL points to current user's Matters content
|
|
616
|
+
*/
|
|
617
|
+
function isInternalMattersLink(url: string, userName: string): boolean {
|
|
618
|
+
return isDomainInternalLink(url, userName);
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Rewrite internal Matters links to local paths in a single file's content
|
|
623
|
+
*/
|
|
624
|
+
function rewriteLinksInContent(
|
|
625
|
+
content: string,
|
|
626
|
+
articlePathMap: Map<string, string>,
|
|
627
|
+
userName: string,
|
|
628
|
+
currentFilePath: string
|
|
629
|
+
): { content: string; linksRewritten: number } {
|
|
630
|
+
const links = extractMarkdownLinks(content);
|
|
631
|
+
let modifiedContent = content;
|
|
632
|
+
let linksRewritten = 0;
|
|
633
|
+
|
|
634
|
+
for (const { url, fullMatch } of links) {
|
|
635
|
+
// Only own-user canonical `/@userName/...` links are rewritten. `/a/<shortHash>`
|
|
636
|
+
// short-links never pass this guard, so the shared extractShortHash's short-link
|
|
637
|
+
// support is intentionally unreachable here — body cross-links are canonical form.
|
|
638
|
+
if (!isInternalMattersLink(url, userName)) continue;
|
|
639
|
+
|
|
640
|
+
// Try exact URL match first
|
|
641
|
+
let localPath = articlePathMap.get(url);
|
|
642
|
+
|
|
643
|
+
// If not found, try shortHash match
|
|
644
|
+
if (!localPath) {
|
|
645
|
+
const shortHash = extractShortHash(url);
|
|
646
|
+
if (shortHash) {
|
|
647
|
+
localPath = articlePathMap.get(shortHash);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
if (localPath) {
|
|
652
|
+
// Calculate relative path from current file to target file
|
|
653
|
+
const relativePath = calculateRelativePath(currentFilePath, localPath);
|
|
654
|
+
const newLink = fullMatch.replace(url, relativePath);
|
|
655
|
+
modifiedContent = modifiedContent.replace(fullMatch, newLink);
|
|
656
|
+
linksRewritten++;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
return { content: modifiedContent, linksRewritten };
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Calculate relative path from one file to another
|
|
665
|
+
* e.g., from "article/collection/post.md" to "article/other.md" → "../other.md"
|
|
666
|
+
*/
|
|
667
|
+
export function calculateRelativePath(fromPath: string, toPath: string): string {
|
|
668
|
+
const fromParts = fromPath.split("/").slice(0, -1); // Remove filename, keep directory
|
|
669
|
+
const toParts = toPath.split("/");
|
|
670
|
+
|
|
671
|
+
// Find common prefix
|
|
672
|
+
let commonLength = 0;
|
|
673
|
+
while (
|
|
674
|
+
commonLength < fromParts.length &&
|
|
675
|
+
commonLength < toParts.length - 1 &&
|
|
676
|
+
fromParts[commonLength] === toParts[commonLength]
|
|
677
|
+
) {
|
|
678
|
+
commonLength++;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
// Build relative path
|
|
682
|
+
const upCount = fromParts.length - commonLength;
|
|
683
|
+
const upPath = "../".repeat(upCount);
|
|
684
|
+
const downPath = toParts.slice(commonLength).join("/");
|
|
685
|
+
|
|
686
|
+
return upPath + downPath || toPath;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* Rewrite internal Matters links to local paths across all markdown files
|
|
691
|
+
* This is a fast operation (string manipulation only, no network I/O)
|
|
692
|
+
*
|
|
693
|
+
* Should be run AFTER downloadMediaAndUpdate() to avoid overwriting image refs.
|
|
694
|
+
*/
|
|
695
|
+
export async function rewriteAllInternalLinks(
|
|
696
|
+
articlePathMap: Map<string, string>,
|
|
697
|
+
userName: string
|
|
698
|
+
): Promise<{
|
|
699
|
+
filesProcessed: number;
|
|
700
|
+
linksRewritten: number;
|
|
701
|
+
errors: string[];
|
|
702
|
+
}> {
|
|
703
|
+
const result = {
|
|
704
|
+
filesProcessed: 0,
|
|
705
|
+
linksRewritten: 0,
|
|
706
|
+
errors: [] as string[],
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
if (articlePathMap.size === 0) {
|
|
710
|
+
console.log("🔗 No articles to rewrite links for");
|
|
711
|
+
return result;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
console.log("🔗 Rewriting internal Matters links...");
|
|
715
|
+
|
|
716
|
+
let allFiles: string[];
|
|
717
|
+
try {
|
|
718
|
+
const allProjectFiles = await listFiles();
|
|
719
|
+
allFiles = allProjectFiles.filter((f: string) => f.endsWith(".md"));
|
|
720
|
+
} catch (err) {
|
|
721
|
+
console.error(`Failed to list project files: ${err}`);
|
|
722
|
+
result.errors.push(`Failed to list files: ${err}`);
|
|
723
|
+
return result;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
console.log(` Scanning ${allFiles.length} markdown files for internal links...`);
|
|
727
|
+
|
|
728
|
+
// Import parseFrontmatter dynamically to avoid circular dependency
|
|
729
|
+
const { parseFrontmatter, regenerateFrontmatter } = await import("./converter");
|
|
730
|
+
|
|
731
|
+
for (const file of allFiles) {
|
|
732
|
+
try {
|
|
733
|
+
const content = await readFile(file);
|
|
734
|
+
|
|
735
|
+
const parsed = parseFrontmatter(content);
|
|
736
|
+
if (!parsed) continue;
|
|
737
|
+
|
|
738
|
+
const { content: modifiedBody, linksRewritten } = rewriteLinksInContent(
|
|
739
|
+
parsed.body,
|
|
740
|
+
articlePathMap,
|
|
741
|
+
userName,
|
|
742
|
+
file
|
|
743
|
+
);
|
|
744
|
+
|
|
745
|
+
if (linksRewritten > 0) {
|
|
746
|
+
const newContent = regenerateFrontmatter(parsed.frontmatter) + "\n" + modifiedBody;
|
|
747
|
+
|
|
748
|
+
await writeFile(file, newContent);
|
|
749
|
+
|
|
750
|
+
result.filesProcessed++;
|
|
751
|
+
result.linksRewritten += linksRewritten;
|
|
752
|
+
}
|
|
753
|
+
} catch (err) {
|
|
754
|
+
result.errors.push(`Failed to process ${file}: ${err}`);
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
console.log(` Rewrote ${result.linksRewritten} links in ${result.filesProcessed} files`);
|
|
759
|
+
|
|
760
|
+
return result;
|
|
761
|
+
}
|