@jacobbubu/md-to-lark 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +171 -0
- package/dist/btt/build-tree.js +79 -0
- package/dist/btt/index.js +1 -0
- package/dist/btt/types.js +1 -0
- package/dist/cli/publish-md-to-lark.js +15 -0
- package/dist/commands/publish-md/args.js +224 -0
- package/dist/commands/publish-md/command.js +97 -0
- package/dist/commands/publish-md/index.js +1 -0
- package/dist/commands/publish-md/input-resolver.js +48 -0
- package/dist/commands/publish-md/mermaid-render.js +17 -0
- package/dist/commands/publish-md/pipeline-transform.js +4 -0
- package/dist/commands/publish-md/preset-loader.js +113 -0
- package/dist/commands/publish-md/presets/medium.js +7 -0
- package/dist/commands/publish-md/presets/zh-format.js +8 -0
- package/dist/commands/publish-md/title-policy.js +93 -0
- package/dist/index.js +1 -0
- package/dist/interop/btt-to-last.js +79 -0
- package/dist/interop/codec-btt-to-last.js +435 -0
- package/dist/interop/codec-last-to-btt.js +383 -0
- package/dist/interop/codec-shared.js +722 -0
- package/dist/interop/index.js +2 -0
- package/dist/interop/last-to-btt.js +17 -0
- package/dist/lark/block-types.js +42 -0
- package/dist/lark/client.js +36 -0
- package/dist/lark/docx/ops.js +596 -0
- package/dist/lark/docx/render-btt.js +156 -0
- package/dist/lark/docx/render-models.js +1 -0
- package/dist/lark/docx/render-payload.js +338 -0
- package/dist/lark/docx/render-post-process.js +98 -0
- package/dist/lark/docx/render-table.js +87 -0
- package/dist/lark/docx/render-types.js +7 -0
- package/dist/lark/index.js +2 -0
- package/dist/lark/types.js +1 -0
- package/dist/last/api.js +1687 -0
- package/dist/last/index.js +3 -0
- package/dist/last/preview-terminal.js +296 -0
- package/dist/last/textual-block-types.js +19 -0
- package/dist/last/to-markdown.js +303 -0
- package/dist/last/types.js +11 -0
- package/dist/pipeline/hast-to-last.js +946 -0
- package/dist/pipeline/index.js +3 -0
- package/dist/pipeline/markdown/md-to-hast.js +34 -0
- package/dist/pipeline/markdown/prepare-markdown.js +1049 -0
- package/dist/preview/index.js +1 -0
- package/dist/preview/markdown-terminal.js +350 -0
- package/dist/publish/asset-adapter.js +123 -0
- package/dist/publish/btt-patch.js +65 -0
- package/dist/publish/common.js +139 -0
- package/dist/publish/ids.js +9 -0
- package/dist/publish/index.js +7 -0
- package/dist/publish/last-normalize.js +327 -0
- package/dist/publish/process-file.js +228 -0
- package/dist/publish/runtime.js +133 -0
- package/dist/publish/stage-cache.js +56 -0
- package/dist/shared/rate-limiter.js +18 -0
- package/dist/shared/retry.js +141 -0
- package/package.json +78 -0
|
@@ -0,0 +1,1049 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { createHash } from 'node:crypto';
|
|
3
|
+
import { createWriteStream } from 'node:fs';
|
|
4
|
+
import { access, mkdir, rename, rm, stat } from 'node:fs/promises';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
import { Readable } from 'node:stream';
|
|
7
|
+
import { pipeline } from 'node:stream/promises';
|
|
8
|
+
import { parse as parseYaml } from 'yaml';
|
|
9
|
+
const MARKDOWN_IMAGE_TAG_RE = /(!\[[^\]\n]*\]\(\s*)(?:<([^>\n]+)>|([^\s)\n]+))(\s*\))/g;
|
|
10
|
+
const DEFAULT_TIMEOUT_MS = 15_000;
|
|
11
|
+
const DEFAULT_MAX_RETRIES = 3;
|
|
12
|
+
const DEFAULT_BACKOFF_BASE_MS = 500;
|
|
13
|
+
const DEFAULT_BACKOFF_MAX_MS = 5_000;
|
|
14
|
+
const DEFAULT_BACKOFF_JITTER_RATIO = 0.2;
|
|
15
|
+
const DEFAULT_YT_DLP_TIMEOUT_MS = 10 * 60 * 1_000;
|
|
16
|
+
const RETRYABLE_HTTP_STATUS = new Set([408, 425, 429]);
|
|
17
|
+
const YT_DLP_PRINT_PREFIX = '__M2L__';
|
|
18
|
+
const CONTENT_TYPE_TO_EXTENSION = {
|
|
19
|
+
'image/png': '.png',
|
|
20
|
+
'image/jpeg': '.jpg',
|
|
21
|
+
'image/gif': '.gif',
|
|
22
|
+
'image/webp': '.webp',
|
|
23
|
+
'image/svg+xml': '.svg',
|
|
24
|
+
'image/bmp': '.bmp',
|
|
25
|
+
'image/x-icon': '.ico',
|
|
26
|
+
'image/vnd.microsoft.icon': '.ico',
|
|
27
|
+
'image/avif': '.avif',
|
|
28
|
+
};
|
|
29
|
+
const IMAGE_FILE_EXTENSIONS = new Set(['.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.bmp', '.ico', '.avif']);
|
|
30
|
+
function isRemoteHttpUrl(value) {
|
|
31
|
+
try {
|
|
32
|
+
const parsed = new URL(value);
|
|
33
|
+
return parsed.protocol === 'http:' || parsed.protocol === 'https:';
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function parseHttpUrl(value) {
|
|
40
|
+
try {
|
|
41
|
+
const parsed = new URL(value);
|
|
42
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
43
|
+
return undefined;
|
|
44
|
+
return parsed;
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
return undefined;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
function toPosixPath(p) {
|
|
51
|
+
return p.replaceAll('\\', '/');
|
|
52
|
+
}
|
|
53
|
+
function toMarkdownLocalPath(localPath) {
|
|
54
|
+
return encodeURI(toPosixPath(localPath));
|
|
55
|
+
}
|
|
56
|
+
function normalizeContentType(value) {
|
|
57
|
+
if (!value)
|
|
58
|
+
return undefined;
|
|
59
|
+
const normalized = value.split(';', 1)[0]?.trim().toLowerCase();
|
|
60
|
+
return normalized && normalized.length > 0 ? normalized : undefined;
|
|
61
|
+
}
|
|
62
|
+
function normalizeErrorMessage(error) {
|
|
63
|
+
if (!error)
|
|
64
|
+
return 'unknown error';
|
|
65
|
+
if (typeof error === 'string')
|
|
66
|
+
return error;
|
|
67
|
+
if (error instanceof Error)
|
|
68
|
+
return error.message;
|
|
69
|
+
return String(error);
|
|
70
|
+
}
|
|
71
|
+
function isAbortLikeError(error) {
|
|
72
|
+
if (!error || typeof error !== 'object')
|
|
73
|
+
return false;
|
|
74
|
+
const withName = error;
|
|
75
|
+
return withName.name === 'AbortError';
|
|
76
|
+
}
|
|
77
|
+
function isRecoverableNetworkError(error) {
|
|
78
|
+
if (isAbortLikeError(error))
|
|
79
|
+
return true;
|
|
80
|
+
const message = normalizeErrorMessage(error).toLowerCase();
|
|
81
|
+
return (message.includes('fetch failed') ||
|
|
82
|
+
message.includes('network') ||
|
|
83
|
+
message.includes('timeout') ||
|
|
84
|
+
message.includes('timed out') ||
|
|
85
|
+
message.includes('econnreset') ||
|
|
86
|
+
message.includes('econnrefused') ||
|
|
87
|
+
message.includes('enotfound') ||
|
|
88
|
+
message.includes('eai_again') ||
|
|
89
|
+
message.includes('etimedout') ||
|
|
90
|
+
message.includes('ehostunreach') ||
|
|
91
|
+
message.includes('und_err_connect_timeout'));
|
|
92
|
+
}
|
|
93
|
+
function isRetryableHttpStatus(status) {
|
|
94
|
+
return RETRYABLE_HTTP_STATUS.has(status) || (status >= 500 && status <= 599);
|
|
95
|
+
}
|
|
96
|
+
function fibonacci(n) {
|
|
97
|
+
if (n <= 2)
|
|
98
|
+
return 1;
|
|
99
|
+
let a = 1;
|
|
100
|
+
let b = 1;
|
|
101
|
+
for (let i = 3; i <= n; i += 1) {
|
|
102
|
+
const next = a + b;
|
|
103
|
+
a = b;
|
|
104
|
+
b = next;
|
|
105
|
+
}
|
|
106
|
+
return b;
|
|
107
|
+
}
|
|
108
|
+
function getBackoffMs(retryIndex, baseMs, maxMs, jitterRatio) {
|
|
109
|
+
const withoutJitter = Math.min(maxMs, fibonacci(retryIndex) * baseMs);
|
|
110
|
+
if (jitterRatio <= 0)
|
|
111
|
+
return withoutJitter;
|
|
112
|
+
const jitterFactor = 1 + (Math.random() * 2 - 1) * jitterRatio;
|
|
113
|
+
return Math.max(0, Math.round(withoutJitter * jitterFactor));
|
|
114
|
+
}
|
|
115
|
+
async function delay(ms) {
|
|
116
|
+
if (ms <= 0)
|
|
117
|
+
return;
|
|
118
|
+
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
119
|
+
}
|
|
120
|
+
async function fileSize(targetPath) {
|
|
121
|
+
try {
|
|
122
|
+
const st = await stat(targetPath);
|
|
123
|
+
return st.size;
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
return 0;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
async function writeResponseToTempFile(response, tempFilePath, append) {
|
|
130
|
+
if (!response.body) {
|
|
131
|
+
throw new Error('response body is empty');
|
|
132
|
+
}
|
|
133
|
+
const nodeReadable = Readable.fromWeb(response.body);
|
|
134
|
+
const writeStream = createWriteStream(tempFilePath, {
|
|
135
|
+
flags: append ? 'a' : 'w',
|
|
136
|
+
});
|
|
137
|
+
await pipeline(nodeReadable, writeStream);
|
|
138
|
+
}
|
|
139
|
+
function resolveImageExtension(url, contentType) {
|
|
140
|
+
try {
|
|
141
|
+
const ext = path.extname(new URL(url).pathname).toLowerCase();
|
|
142
|
+
if (IMAGE_FILE_EXTENSIONS.has(ext))
|
|
143
|
+
return ext;
|
|
144
|
+
}
|
|
145
|
+
catch {
|
|
146
|
+
// noop
|
|
147
|
+
}
|
|
148
|
+
if (contentType) {
|
|
149
|
+
const mapped = CONTENT_TYPE_TO_EXTENSION[contentType.toLowerCase()];
|
|
150
|
+
if (mapped)
|
|
151
|
+
return mapped;
|
|
152
|
+
}
|
|
153
|
+
return '.img';
|
|
154
|
+
}
|
|
155
|
+
async function downloadRemoteImageOnce(url, assetsDir, tempFilePath, fileKey, timeoutMs, allowResume) {
|
|
156
|
+
await mkdir(assetsDir, { recursive: true });
|
|
157
|
+
const existingTempSize = allowResume ? await fileSize(tempFilePath) : 0;
|
|
158
|
+
const resumeFrom = existingTempSize > 0 ? existingTempSize : 0;
|
|
159
|
+
const rangeHeader = resumeFrom > 0 ? `bytes=${resumeFrom}-` : undefined;
|
|
160
|
+
const abortController = new AbortController();
|
|
161
|
+
const timeoutHandle = setTimeout(() => abortController.abort(), timeoutMs);
|
|
162
|
+
try {
|
|
163
|
+
const fetchInit = {
|
|
164
|
+
method: 'GET',
|
|
165
|
+
redirect: 'follow',
|
|
166
|
+
signal: abortController.signal,
|
|
167
|
+
};
|
|
168
|
+
if (rangeHeader) {
|
|
169
|
+
fetchInit.headers = { Range: rangeHeader };
|
|
170
|
+
}
|
|
171
|
+
const response = await fetch(url, fetchInit);
|
|
172
|
+
const contentType = normalizeContentType(response.headers.get('content-type'));
|
|
173
|
+
if (!response.ok) {
|
|
174
|
+
if (response.status === 416 && resumeFrom > 0) {
|
|
175
|
+
return {
|
|
176
|
+
ok: false,
|
|
177
|
+
retryable: true,
|
|
178
|
+
resumeSupported: false,
|
|
179
|
+
attempts: 1,
|
|
180
|
+
retries: 0,
|
|
181
|
+
httpStatus: response.status,
|
|
182
|
+
...(contentType ? { contentType } : {}),
|
|
183
|
+
error: 'http status 416 (range not satisfiable)',
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
return {
|
|
187
|
+
ok: false,
|
|
188
|
+
retryable: isRetryableHttpStatus(response.status),
|
|
189
|
+
attempts: 1,
|
|
190
|
+
retries: 0,
|
|
191
|
+
httpStatus: response.status,
|
|
192
|
+
...(contentType ? { contentType } : {}),
|
|
193
|
+
error: `http status ${response.status}`,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
const extension = resolveImageExtension(url, contentType);
|
|
197
|
+
const isImageByType = contentType ? contentType.startsWith('image/') : false;
|
|
198
|
+
const isImageByExt = IMAGE_FILE_EXTENSIONS.has(extension);
|
|
199
|
+
if (!isImageByType && !isImageByExt) {
|
|
200
|
+
return {
|
|
201
|
+
ok: false,
|
|
202
|
+
retryable: false,
|
|
203
|
+
attempts: 1,
|
|
204
|
+
retries: 0,
|
|
205
|
+
...(contentType ? { contentType } : {}),
|
|
206
|
+
error: `unsupported content-type: ${contentType ?? 'unknown'}`,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
let appendMode = false;
|
|
210
|
+
let resumeSupported;
|
|
211
|
+
if (resumeFrom > 0) {
|
|
212
|
+
if (response.status === 206) {
|
|
213
|
+
appendMode = true;
|
|
214
|
+
resumeSupported = true;
|
|
215
|
+
}
|
|
216
|
+
else {
|
|
217
|
+
appendMode = false;
|
|
218
|
+
resumeSupported = false;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
try {
|
|
222
|
+
await writeResponseToTempFile(response, tempFilePath, appendMode);
|
|
223
|
+
}
|
|
224
|
+
catch (streamError) {
|
|
225
|
+
return {
|
|
226
|
+
ok: false,
|
|
227
|
+
retryable: isRecoverableNetworkError(streamError),
|
|
228
|
+
...(resumeSupported !== undefined ? { resumeSupported } : {}),
|
|
229
|
+
attempts: 1,
|
|
230
|
+
retries: 0,
|
|
231
|
+
error: normalizeErrorMessage(streamError),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
const tempSize = await fileSize(tempFilePath);
|
|
235
|
+
if (tempSize <= 0) {
|
|
236
|
+
return {
|
|
237
|
+
ok: false,
|
|
238
|
+
retryable: false,
|
|
239
|
+
attempts: 1,
|
|
240
|
+
retries: 0,
|
|
241
|
+
...(resumeSupported !== undefined ? { resumeSupported } : {}),
|
|
242
|
+
...(contentType ? { contentType } : {}),
|
|
243
|
+
error: 'downloaded file is empty',
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
const localPath = path.join(assetsDir, `${fileKey}${extension}`);
|
|
247
|
+
await rm(localPath, { force: true });
|
|
248
|
+
await rename(tempFilePath, localPath);
|
|
249
|
+
const finalSize = await fileSize(localPath);
|
|
250
|
+
return {
|
|
251
|
+
ok: true,
|
|
252
|
+
retryable: false,
|
|
253
|
+
attempts: 1,
|
|
254
|
+
retries: 0,
|
|
255
|
+
localPath,
|
|
256
|
+
...(resumeSupported !== undefined ? { resumeSupported } : {}),
|
|
257
|
+
...(contentType ? { contentType } : {}),
|
|
258
|
+
size: finalSize,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
catch (error) {
|
|
262
|
+
return {
|
|
263
|
+
ok: false,
|
|
264
|
+
retryable: isRecoverableNetworkError(error),
|
|
265
|
+
attempts: 1,
|
|
266
|
+
retries: 0,
|
|
267
|
+
error: normalizeErrorMessage(error),
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
finally {
|
|
271
|
+
clearTimeout(timeoutHandle);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
async function downloadRemoteImageWithRetry(url, assetsDir, timeoutMs, maxRetries, backoffBaseMs, backoffMaxMs, backoffJitterRatio) {
|
|
275
|
+
const retriesLimit = Math.max(0, maxRetries);
|
|
276
|
+
const fileKey = createHash('sha1').update(url).digest('hex').slice(0, 16);
|
|
277
|
+
const tempFilePath = path.join(assetsDir, `${fileKey}.part`);
|
|
278
|
+
let allowResume = true;
|
|
279
|
+
for (let attempt = 1; attempt <= retriesLimit + 1; attempt += 1) {
|
|
280
|
+
const single = await downloadRemoteImageOnce(url, assetsDir, tempFilePath, fileKey, timeoutMs, allowResume);
|
|
281
|
+
const retries = attempt - 1;
|
|
282
|
+
if (single.ok) {
|
|
283
|
+
return {
|
|
284
|
+
...single,
|
|
285
|
+
attempts: attempt,
|
|
286
|
+
retries,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
if (single.resumeSupported === false) {
|
|
290
|
+
allowResume = false;
|
|
291
|
+
await rm(tempFilePath, { force: true });
|
|
292
|
+
}
|
|
293
|
+
const canRetry = single.retryable && retries < retriesLimit;
|
|
294
|
+
if (!canRetry) {
|
|
295
|
+
await rm(tempFilePath, { force: true });
|
|
296
|
+
return {
|
|
297
|
+
...single,
|
|
298
|
+
attempts: attempt,
|
|
299
|
+
retries,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
const retryIndex = retries + 1;
|
|
303
|
+
const waitMs = getBackoffMs(retryIndex, backoffBaseMs, backoffMaxMs, backoffJitterRatio);
|
|
304
|
+
await delay(waitMs);
|
|
305
|
+
}
|
|
306
|
+
return {
|
|
307
|
+
ok: false,
|
|
308
|
+
retryable: true,
|
|
309
|
+
attempts: retriesLimit + 1,
|
|
310
|
+
retries: retriesLimit,
|
|
311
|
+
error: 'download failed',
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
function normalizeYtDlpPrefix(rawPrefix) {
|
|
315
|
+
const cleaned = rawPrefix.trim();
|
|
316
|
+
if (!cleaned)
|
|
317
|
+
return undefined;
|
|
318
|
+
const normalizedInput = cleaned.startsWith('http://') || cleaned.startsWith('https://')
|
|
319
|
+
? cleaned
|
|
320
|
+
: /^[A-Za-z0-9.-]+\.[A-Za-z]{2,}(?:[/:?#].*)?$/i.test(cleaned)
|
|
321
|
+
? `https://${cleaned}`
|
|
322
|
+
: undefined;
|
|
323
|
+
if (!normalizedInput)
|
|
324
|
+
return undefined;
|
|
325
|
+
const parsed = parseHttpUrl(normalizedInput);
|
|
326
|
+
if (!parsed)
|
|
327
|
+
return undefined;
|
|
328
|
+
const pathPrefix = parsed.pathname === '/' ? '' : parsed.pathname;
|
|
329
|
+
return {
|
|
330
|
+
originalPrefix: rawPrefix,
|
|
331
|
+
normalizedPrefix: parsed.toString(),
|
|
332
|
+
host: parsed.hostname.toLowerCase(),
|
|
333
|
+
pathPrefix,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
function readYtDlpPrefixesFromFrontmatter(sourceContent) {
|
|
337
|
+
const matched = sourceContent.match(/^---\s*\r?\n([\s\S]*?)\r?\n---(?:\s*\r?\n|$)/);
|
|
338
|
+
if (!matched) {
|
|
339
|
+
return { configured: false, prefixes: [], rules: [] };
|
|
340
|
+
}
|
|
341
|
+
const yamlText = matched[1];
|
|
342
|
+
if (!yamlText || yamlText.trim().length === 0) {
|
|
343
|
+
return { configured: false, prefixes: [], rules: [] };
|
|
344
|
+
}
|
|
345
|
+
let parsed;
|
|
346
|
+
try {
|
|
347
|
+
parsed = parseYaml(yamlText);
|
|
348
|
+
}
|
|
349
|
+
catch {
|
|
350
|
+
return { configured: false, prefixes: [], rules: [] };
|
|
351
|
+
}
|
|
352
|
+
const obj = parsed && typeof parsed === 'object' ? parsed : null;
|
|
353
|
+
const urlHandlers = obj?.url_handlers;
|
|
354
|
+
const urlHandlersObj = urlHandlers && typeof urlHandlers === 'object' ? urlHandlers : null;
|
|
355
|
+
const ytDlp = urlHandlersObj?.yt_dlp;
|
|
356
|
+
const ytDlpObj = ytDlp && typeof ytDlp === 'object' ? ytDlp : null;
|
|
357
|
+
const rawPrefixes = ytDlpObj?.prefixes;
|
|
358
|
+
let prefixes = [];
|
|
359
|
+
if (typeof rawPrefixes === 'string') {
|
|
360
|
+
prefixes = [rawPrefixes.trim()].filter((item) => item.length > 0);
|
|
361
|
+
}
|
|
362
|
+
else if (Array.isArray(rawPrefixes)) {
|
|
363
|
+
prefixes = rawPrefixes
|
|
364
|
+
.map((item) => (typeof item === 'string' ? item.trim() : ''))
|
|
365
|
+
.filter((item) => item.length > 0);
|
|
366
|
+
}
|
|
367
|
+
else {
|
|
368
|
+
return { configured: false, prefixes: [], rules: [] };
|
|
369
|
+
}
|
|
370
|
+
const uniqueRules = new Map();
|
|
371
|
+
for (const rawPrefix of prefixes) {
|
|
372
|
+
const normalized = normalizeYtDlpPrefix(rawPrefix);
|
|
373
|
+
if (!normalized)
|
|
374
|
+
continue;
|
|
375
|
+
uniqueRules.set(normalized.normalizedPrefix, normalized);
|
|
376
|
+
}
|
|
377
|
+
return {
|
|
378
|
+
configured: true,
|
|
379
|
+
prefixes,
|
|
380
|
+
rules: [...uniqueRules.values()],
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
function normalizeStandaloneUrl(value) {
|
|
384
|
+
const trimmed = value.trim();
|
|
385
|
+
if (!trimmed || /\s/.test(trimmed))
|
|
386
|
+
return undefined;
|
|
387
|
+
const unwrapped = trimmed.startsWith('<') && trimmed.endsWith('>') ? trimmed.slice(1, -1).trim() : trimmed;
|
|
388
|
+
if (!unwrapped || /\s/.test(unwrapped))
|
|
389
|
+
return undefined;
|
|
390
|
+
const maybeWithProtocol = unwrapped.startsWith('http://') || unwrapped.startsWith('https://')
|
|
391
|
+
? unwrapped
|
|
392
|
+
: /^[A-Za-z0-9.-]+\.[A-Za-z]{2,}(?:[/:?#].*)?$/i.test(unwrapped)
|
|
393
|
+
? `https://${unwrapped}`
|
|
394
|
+
: undefined;
|
|
395
|
+
if (!maybeWithProtocol)
|
|
396
|
+
return undefined;
|
|
397
|
+
return parseHttpUrl(maybeWithProtocol);
|
|
398
|
+
}
|
|
399
|
+
function getHostSuffixMatchLength(candidateHost, ruleHost) {
|
|
400
|
+
if (candidateHost === ruleHost)
|
|
401
|
+
return ruleHost.length;
|
|
402
|
+
if (candidateHost.endsWith(`.${ruleHost}`))
|
|
403
|
+
return ruleHost.length;
|
|
404
|
+
return -1;
|
|
405
|
+
}
|
|
406
|
+
function matchesYtDlpPrefix(url, rules) {
|
|
407
|
+
const candidateHost = url.hostname.toLowerCase();
|
|
408
|
+
const candidatePath = url.pathname || '/';
|
|
409
|
+
let bestHostSuffixLength = -1;
|
|
410
|
+
let bestPathPrefixLength = -1;
|
|
411
|
+
for (const rule of rules) {
|
|
412
|
+
const hostSuffixLength = getHostSuffixMatchLength(candidateHost, rule.host);
|
|
413
|
+
if (hostSuffixLength < 0)
|
|
414
|
+
continue;
|
|
415
|
+
if (rule.pathPrefix && !candidatePath.startsWith(rule.pathPrefix))
|
|
416
|
+
continue;
|
|
417
|
+
const pathPrefixLength = rule.pathPrefix.length;
|
|
418
|
+
const isBetter = hostSuffixLength > bestHostSuffixLength ||
|
|
419
|
+
(hostSuffixLength === bestHostSuffixLength && pathPrefixLength > bestPathPrefixLength);
|
|
420
|
+
if (isBetter) {
|
|
421
|
+
bestHostSuffixLength = hostSuffixLength;
|
|
422
|
+
bestPathPrefixLength = pathPrefixLength;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
return bestHostSuffixLength >= 0;
|
|
426
|
+
}
|
|
427
|
+
function collectStandaloneYtDlpUrlLines(content, rules) {
|
|
428
|
+
if (rules.length === 0)
|
|
429
|
+
return [];
|
|
430
|
+
const matches = [];
|
|
431
|
+
const lineRegex = /([^\r\n]*)(\r?\n|$)/g;
|
|
432
|
+
let lineMatch;
|
|
433
|
+
let insideFence;
|
|
434
|
+
let insideFenceLength = 0;
|
|
435
|
+
while ((lineMatch = lineRegex.exec(content)) !== null) {
|
|
436
|
+
const whole = lineMatch[0];
|
|
437
|
+
if (!whole)
|
|
438
|
+
break;
|
|
439
|
+
const lineRaw = lineMatch[1] ?? '';
|
|
440
|
+
const lineStart = lineMatch.index;
|
|
441
|
+
const lineEnd = lineStart + lineRaw.length;
|
|
442
|
+
const trimmed = lineRaw.trim();
|
|
443
|
+
const fenceMatch = trimmed.match(/^(`{3,}|~{3,})/);
|
|
444
|
+
if (fenceMatch) {
|
|
445
|
+
const fenceMarker = fenceMatch[1];
|
|
446
|
+
if (!fenceMarker)
|
|
447
|
+
continue;
|
|
448
|
+
const fenceChar = fenceMarker[0];
|
|
449
|
+
if (!insideFence) {
|
|
450
|
+
insideFence = fenceChar;
|
|
451
|
+
insideFenceLength = fenceMarker.length;
|
|
452
|
+
}
|
|
453
|
+
else if (insideFence === fenceChar && fenceMarker.length >= insideFenceLength) {
|
|
454
|
+
insideFence = undefined;
|
|
455
|
+
insideFenceLength = 0;
|
|
456
|
+
}
|
|
457
|
+
continue;
|
|
458
|
+
}
|
|
459
|
+
if (insideFence)
|
|
460
|
+
continue;
|
|
461
|
+
const parsedUrl = normalizeStandaloneUrl(trimmed);
|
|
462
|
+
if (!parsedUrl || !matchesYtDlpPrefix(parsedUrl, rules))
|
|
463
|
+
continue;
|
|
464
|
+
matches.push({
|
|
465
|
+
start: lineStart,
|
|
466
|
+
end: lineEnd,
|
|
467
|
+
originalUrl: trimmed,
|
|
468
|
+
normalizedUrl: parsedUrl.toString(),
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
return matches;
|
|
472
|
+
}
|
|
473
|
+
function extractHttpStatusFromText(text) {
|
|
474
|
+
const match = text.match(/\b(?:http(?:\s+error)?|error)\s*[: ]\s*(\d{3})\b/i);
|
|
475
|
+
const statusStr = match?.[1];
|
|
476
|
+
if (!statusStr)
|
|
477
|
+
return undefined;
|
|
478
|
+
const status = Number.parseInt(statusStr, 10);
|
|
479
|
+
return Number.isFinite(status) ? status : undefined;
|
|
480
|
+
}
|
|
481
|
+
function isRetryableYtDlpFailure(stderr, status) {
|
|
482
|
+
if (status !== undefined) {
|
|
483
|
+
if (isRetryableHttpStatus(status))
|
|
484
|
+
return true;
|
|
485
|
+
if (status === 401 || status === 403 || status === 404)
|
|
486
|
+
return false;
|
|
487
|
+
}
|
|
488
|
+
const normalized = stderr.toLowerCase();
|
|
489
|
+
if (normalized.includes('unsupported url'))
|
|
490
|
+
return false;
|
|
491
|
+
if (normalized.includes('private video'))
|
|
492
|
+
return false;
|
|
493
|
+
if (normalized.includes('sign in to confirm your age'))
|
|
494
|
+
return false;
|
|
495
|
+
if (normalized.includes('this video is unavailable'))
|
|
496
|
+
return false;
|
|
497
|
+
if (normalized.includes('copyright'))
|
|
498
|
+
return false;
|
|
499
|
+
return isRecoverableNetworkError(stderr);
|
|
500
|
+
}
|
|
501
|
+
function normalizeYtDlpSourceUrl(rawUrl) {
|
|
502
|
+
const trimmed = rawUrl?.trim();
|
|
503
|
+
if (!trimmed)
|
|
504
|
+
return undefined;
|
|
505
|
+
const normalized = trimmed.toLowerCase();
|
|
506
|
+
if (normalized === 'na' || normalized === 'n/a' || normalized === 'none' || normalized === 'null') {
|
|
507
|
+
return undefined;
|
|
508
|
+
}
|
|
509
|
+
const parsed = parseHttpUrl(trimmed);
|
|
510
|
+
return parsed?.toString();
|
|
511
|
+
}
|
|
512
|
+
function resolveYtDlpPath(rawPath, downloadDir) {
|
|
513
|
+
return path.isAbsolute(rawPath) ? rawPath : path.resolve(downloadDir, rawPath);
|
|
514
|
+
}
|
|
515
|
+
function parseYtDlpOutputEntries(stdout, downloadDir) {
|
|
516
|
+
const byPath = new Map();
|
|
517
|
+
let nextOrder = 0;
|
|
518
|
+
const upsert = (localPath, sourceUrl, playlistIndex) => {
|
|
519
|
+
const existing = byPath.get(localPath);
|
|
520
|
+
if (!existing) {
|
|
521
|
+
const entry = {
|
|
522
|
+
localPath,
|
|
523
|
+
order: nextOrder,
|
|
524
|
+
};
|
|
525
|
+
if (sourceUrl) {
|
|
526
|
+
entry.sourceUrl = sourceUrl;
|
|
527
|
+
}
|
|
528
|
+
if (playlistIndex !== undefined) {
|
|
529
|
+
entry.playlistIndex = playlistIndex;
|
|
530
|
+
}
|
|
531
|
+
byPath.set(localPath, {
|
|
532
|
+
...entry,
|
|
533
|
+
});
|
|
534
|
+
nextOrder += 1;
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
if (!existing.sourceUrl && sourceUrl) {
|
|
538
|
+
existing.sourceUrl = sourceUrl;
|
|
539
|
+
}
|
|
540
|
+
if (existing.playlistIndex === undefined && playlistIndex !== undefined) {
|
|
541
|
+
existing.playlistIndex = playlistIndex;
|
|
542
|
+
}
|
|
543
|
+
};
|
|
544
|
+
for (const line of stdout.split(/\r?\n/)) {
|
|
545
|
+
const trimmed = line.trim();
|
|
546
|
+
if (!trimmed)
|
|
547
|
+
continue;
|
|
548
|
+
if (trimmed.startsWith('['))
|
|
549
|
+
continue;
|
|
550
|
+
if (trimmed.startsWith(YT_DLP_PRINT_PREFIX)) {
|
|
551
|
+
const payload = trimmed.slice(YT_DLP_PRINT_PREFIX.length);
|
|
552
|
+
const [rawPath = '', rawSourceUrl = '', rawPlaylistIndex = ''] = payload.split('\t');
|
|
553
|
+
const normalizedPath = rawPath.trim();
|
|
554
|
+
if (!normalizedPath)
|
|
555
|
+
continue;
|
|
556
|
+
const localPath = resolveYtDlpPath(normalizedPath, downloadDir);
|
|
557
|
+
const sourceUrl = normalizeYtDlpSourceUrl(rawSourceUrl);
|
|
558
|
+
const parsedPlaylistIndex = Number.parseInt(rawPlaylistIndex.trim(), 10);
|
|
559
|
+
const playlistIndex = Number.isFinite(parsedPlaylistIndex) && parsedPlaylistIndex > 0 ? parsedPlaylistIndex : undefined;
|
|
560
|
+
upsert(localPath, sourceUrl, playlistIndex);
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
const localPath = resolveYtDlpPath(trimmed, downloadDir);
|
|
564
|
+
upsert(localPath, undefined, undefined);
|
|
565
|
+
}
|
|
566
|
+
return [...byPath.values()].sort((a, b) => a.order - b.order);
|
|
567
|
+
}
|
|
568
|
+
async function runYtDlpOnce(ytDlpPath, ytDlpCookiesPath, url, downloadDir, timeoutMs) {
|
|
569
|
+
await mkdir(downloadDir, { recursive: true });
|
|
570
|
+
const args = [
|
|
571
|
+
'--ignore-config',
|
|
572
|
+
'--no-warnings',
|
|
573
|
+
'--no-progress',
|
|
574
|
+
'--newline',
|
|
575
|
+
'--restrict-filenames',
|
|
576
|
+
'--paths',
|
|
577
|
+
downloadDir,
|
|
578
|
+
'--output',
|
|
579
|
+
'%(title).80B-%(id)s.%(ext)s',
|
|
580
|
+
'--print',
|
|
581
|
+
`after_move:${YT_DLP_PRINT_PREFIX}%(filepath)s\t%(webpage_url)s\t%(playlist_index)s`,
|
|
582
|
+
];
|
|
583
|
+
if (ytDlpCookiesPath) {
|
|
584
|
+
args.push('--cookies', ytDlpCookiesPath);
|
|
585
|
+
}
|
|
586
|
+
args.push(url);
|
|
587
|
+
const spawnResult = await new Promise((resolve) => {
|
|
588
|
+
const child = spawn(ytDlpPath, args, {
|
|
589
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
590
|
+
env: process.env,
|
|
591
|
+
});
|
|
592
|
+
const stdoutChunks = [];
|
|
593
|
+
const stderrChunks = [];
|
|
594
|
+
let timedOut = false;
|
|
595
|
+
let spawnError;
|
|
596
|
+
child.stdout.on('data', (chunk) => {
|
|
597
|
+
stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
598
|
+
});
|
|
599
|
+
child.stderr.on('data', (chunk) => {
|
|
600
|
+
stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
601
|
+
});
|
|
602
|
+
const timeoutHandle = setTimeout(() => {
|
|
603
|
+
timedOut = true;
|
|
604
|
+
child.kill('SIGKILL');
|
|
605
|
+
}, timeoutMs);
|
|
606
|
+
child.on('error', (error) => {
|
|
607
|
+
spawnError = normalizeErrorMessage(error);
|
|
608
|
+
});
|
|
609
|
+
child.on('close', (code) => {
|
|
610
|
+
clearTimeout(timeoutHandle);
|
|
611
|
+
const result = {
|
|
612
|
+
code,
|
|
613
|
+
stdout: Buffer.concat(stdoutChunks).toString('utf8'),
|
|
614
|
+
stderr: Buffer.concat(stderrChunks).toString('utf8'),
|
|
615
|
+
timedOut,
|
|
616
|
+
};
|
|
617
|
+
if (spawnError) {
|
|
618
|
+
result.spawnError = spawnError;
|
|
619
|
+
}
|
|
620
|
+
resolve(result);
|
|
621
|
+
});
|
|
622
|
+
});
|
|
623
|
+
if (spawnResult.timedOut) {
|
|
624
|
+
return {
|
|
625
|
+
ok: false,
|
|
626
|
+
attempts: 1,
|
|
627
|
+
retries: 0,
|
|
628
|
+
retryable: true,
|
|
629
|
+
error: `yt-dlp timed out after ${timeoutMs}ms`,
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
if (spawnResult.spawnError) {
|
|
633
|
+
return {
|
|
634
|
+
ok: false,
|
|
635
|
+
attempts: 1,
|
|
636
|
+
retries: 0,
|
|
637
|
+
retryable: false,
|
|
638
|
+
error: `yt-dlp spawn failed: ${spawnResult.spawnError}`,
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
if (spawnResult.code !== 0) {
|
|
642
|
+
const httpStatus = extractHttpStatusFromText(spawnResult.stderr);
|
|
643
|
+
const retryable = isRetryableYtDlpFailure(spawnResult.stderr, httpStatus);
|
|
644
|
+
return {
|
|
645
|
+
ok: false,
|
|
646
|
+
attempts: 1,
|
|
647
|
+
retries: 0,
|
|
648
|
+
retryable,
|
|
649
|
+
...(httpStatus !== undefined ? { httpStatus } : {}),
|
|
650
|
+
error: spawnResult.stderr.trim() || `yt-dlp exited with code ${String(spawnResult.code ?? 'unknown')}`,
|
|
651
|
+
};
|
|
652
|
+
}
|
|
653
|
+
const parsedEntries = parseYtDlpOutputEntries(spawnResult.stdout, downloadDir);
|
|
654
|
+
if (parsedEntries.length === 0) {
|
|
655
|
+
return {
|
|
656
|
+
ok: false,
|
|
657
|
+
attempts: 1,
|
|
658
|
+
retries: 0,
|
|
659
|
+
retryable: false,
|
|
660
|
+
error: 'yt-dlp returned success but no downloaded files were reported',
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
const files = [];
|
|
664
|
+
for (const entry of parsedEntries) {
|
|
665
|
+
const size = await fileSize(entry.localPath);
|
|
666
|
+
if (size <= 0)
|
|
667
|
+
continue;
|
|
668
|
+
const file = {
|
|
669
|
+
localPath: entry.localPath,
|
|
670
|
+
title: path.basename(entry.localPath),
|
|
671
|
+
};
|
|
672
|
+
if (entry.sourceUrl) {
|
|
673
|
+
file.sourceUrl = entry.sourceUrl;
|
|
674
|
+
}
|
|
675
|
+
if (entry.playlistIndex !== undefined) {
|
|
676
|
+
file.playlistIndex = entry.playlistIndex;
|
|
677
|
+
}
|
|
678
|
+
files.push(file);
|
|
679
|
+
}
|
|
680
|
+
if (files.length === 0) {
|
|
681
|
+
return {
|
|
682
|
+
ok: false,
|
|
683
|
+
attempts: 1,
|
|
684
|
+
retries: 0,
|
|
685
|
+
retryable: false,
|
|
686
|
+
error: 'yt-dlp output files are empty or missing',
|
|
687
|
+
};
|
|
688
|
+
}
|
|
689
|
+
return {
|
|
690
|
+
ok: true,
|
|
691
|
+
attempts: 1,
|
|
692
|
+
retries: 0,
|
|
693
|
+
retryable: false,
|
|
694
|
+
files,
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
async function downloadMediaWithYtDlpRetry(url, ytDlpPath, ytDlpCookiesPath, assetsDir, timeoutMs, maxRetries, backoffBaseMs, backoffMaxMs, backoffJitterRatio) {
|
|
698
|
+
const retriesLimit = Math.max(0, maxRetries);
|
|
699
|
+
const sourceHash = createHash('sha1').update(url).digest('hex').slice(0, 12);
|
|
700
|
+
const downloadDir = path.join(assetsDir, 'yt-dlp', sourceHash);
|
|
701
|
+
for (let attempt = 1; attempt <= retriesLimit + 1; attempt += 1) {
|
|
702
|
+
const single = await runYtDlpOnce(ytDlpPath, ytDlpCookiesPath, url, downloadDir, timeoutMs);
|
|
703
|
+
const retries = attempt - 1;
|
|
704
|
+
if (single.ok) {
|
|
705
|
+
return {
|
|
706
|
+
...single,
|
|
707
|
+
attempts: attempt,
|
|
708
|
+
retries,
|
|
709
|
+
};
|
|
710
|
+
}
|
|
711
|
+
const canRetry = single.retryable && retries < retriesLimit;
|
|
712
|
+
if (!canRetry) {
|
|
713
|
+
return {
|
|
714
|
+
...single,
|
|
715
|
+
attempts: attempt,
|
|
716
|
+
retries,
|
|
717
|
+
};
|
|
718
|
+
}
|
|
719
|
+
const retryIndex = retries + 1;
|
|
720
|
+
const waitMs = getBackoffMs(retryIndex, backoffBaseMs, backoffMaxMs, backoffJitterRatio);
|
|
721
|
+
await delay(waitMs);
|
|
722
|
+
}
|
|
723
|
+
return {
|
|
724
|
+
ok: false,
|
|
725
|
+
attempts: retriesLimit + 1,
|
|
726
|
+
retries: retriesLimit,
|
|
727
|
+
retryable: true,
|
|
728
|
+
error: 'yt-dlp download failed',
|
|
729
|
+
};
|
|
730
|
+
}
|
|
731
|
+
function collectImageTagMatches(content) {
|
|
732
|
+
const matches = [];
|
|
733
|
+
const regex = new RegExp(MARKDOWN_IMAGE_TAG_RE.source, MARKDOWN_IMAGE_TAG_RE.flags);
|
|
734
|
+
let matched;
|
|
735
|
+
while ((matched = regex.exec(content)) !== null) {
|
|
736
|
+
const full = matched[0];
|
|
737
|
+
const prefix = matched[1] ?? '';
|
|
738
|
+
const angleWrappedUrl = matched[2];
|
|
739
|
+
const plainUrl = matched[3];
|
|
740
|
+
const suffix = matched[4] ?? '';
|
|
741
|
+
const start = matched.index;
|
|
742
|
+
const end = start + full.length;
|
|
743
|
+
const originalUrl = angleWrappedUrl ?? plainUrl;
|
|
744
|
+
if (!originalUrl)
|
|
745
|
+
continue;
|
|
746
|
+
matches.push({
|
|
747
|
+
start,
|
|
748
|
+
end,
|
|
749
|
+
prefix,
|
|
750
|
+
suffix,
|
|
751
|
+
originalUrl,
|
|
752
|
+
wrappedByAngles: angleWrappedUrl !== undefined,
|
|
753
|
+
});
|
|
754
|
+
}
|
|
755
|
+
return matches;
|
|
756
|
+
}
|
|
757
|
+
function escapeMarkdownLinkText(value) {
|
|
758
|
+
const normalized = value.replace(/[\r\n\t]+/g, ' ').trim();
|
|
759
|
+
if (!normalized)
|
|
760
|
+
return 'downloaded-video';
|
|
761
|
+
// Keep markdown link labels literal (avoid emphasis/code parsing for filenames such as a_b_c.mp4).
|
|
762
|
+
return normalized.replace(/([\\`*_[\]])/g, '\\$1');
|
|
763
|
+
}
|
|
764
|
+
function formatYtDlpReplacementMarkdown(files) {
|
|
765
|
+
return files
|
|
766
|
+
.map((file) => {
|
|
767
|
+
const markdownPath = toMarkdownLocalPath(file.localPath);
|
|
768
|
+
const title = escapeMarkdownLinkText(file.title);
|
|
769
|
+
return `[${title}](<${markdownPath}>)`;
|
|
770
|
+
})
|
|
771
|
+
.join('\n\n');
|
|
772
|
+
}
|
|
773
|
+
async function tryAccess(filePath) {
|
|
774
|
+
try {
|
|
775
|
+
await access(filePath);
|
|
776
|
+
return true;
|
|
777
|
+
}
|
|
778
|
+
catch {
|
|
779
|
+
return false;
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
export async function prepareMarkdownBeforePublish(sourcePath, sourceContent, options) {
|
|
783
|
+
const absoluteSourcePath = path.resolve(sourcePath);
|
|
784
|
+
const prepareDir = path.resolve(options.prepareDir);
|
|
785
|
+
const assetsDir = path.join(prepareDir, 'assets');
|
|
786
|
+
const logFilePath = path.join(prepareDir, 'download.log.json');
|
|
787
|
+
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
788
|
+
const maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
789
|
+
const backoffBaseMs = options.backoffBaseMs ?? DEFAULT_BACKOFF_BASE_MS;
|
|
790
|
+
const backoffMaxMs = options.backoffMaxMs ?? DEFAULT_BACKOFF_MAX_MS;
|
|
791
|
+
const backoffJitterRatio = options.backoffJitterRatio ?? DEFAULT_BACKOFF_JITTER_RATIO;
|
|
792
|
+
const ytDlpTimeoutMs = options.ytDlpTimeoutMs ?? DEFAULT_YT_DLP_TIMEOUT_MS;
|
|
793
|
+
const ytDlpPath = options.ytDlpPath?.trim();
|
|
794
|
+
const ytDlpCookiesPath = options.ytDlpCookiesPath?.trim();
|
|
795
|
+
const remoteImageMatches = collectImageTagMatches(sourceContent).filter((item) => isRemoteHttpUrl(item.originalUrl));
|
|
796
|
+
const ytDlpFrontmatterConfig = readYtDlpPrefixesFromFrontmatter(sourceContent);
|
|
797
|
+
const ytDlpConfigured = ytDlpFrontmatterConfig.configured && ytDlpFrontmatterConfig.rules.length > 0;
|
|
798
|
+
const canRunYtDlp = ytDlpConfigured && Boolean(ytDlpPath);
|
|
799
|
+
const ytDlpLineMatches = collectStandaloneYtDlpUrlLines(sourceContent, ytDlpFrontmatterConfig.rules);
|
|
800
|
+
const logEntries = [];
|
|
801
|
+
let nextLogIndex = 1;
|
|
802
|
+
const pushLogEntry = (entry) => {
|
|
803
|
+
logEntries.push({
|
|
804
|
+
index: nextLogIndex,
|
|
805
|
+
...entry,
|
|
806
|
+
});
|
|
807
|
+
nextLogIndex += 1;
|
|
808
|
+
};
|
|
809
|
+
if (remoteImageMatches.length === 0 && ytDlpLineMatches.length === 0) {
|
|
810
|
+
const logFileContent = {
|
|
811
|
+
generatedAt: new Date().toISOString(),
|
|
812
|
+
sourcePath: absoluteSourcePath,
|
|
813
|
+
enabled: options.enabled,
|
|
814
|
+
ytDlp: {
|
|
815
|
+
enabled: canRunYtDlp,
|
|
816
|
+
configuredInFrontmatter: ytDlpFrontmatterConfig.configured,
|
|
817
|
+
prefixes: ytDlpFrontmatterConfig.prefixes,
|
|
818
|
+
executable: ytDlpPath ?? null,
|
|
819
|
+
cookiesPath: ytDlpCookiesPath ?? null,
|
|
820
|
+
timeoutMs: ytDlpTimeoutMs,
|
|
821
|
+
},
|
|
822
|
+
remoteImageCount: 0,
|
|
823
|
+
remoteYtDlpCount: 0,
|
|
824
|
+
remoteFetchTotal: 0,
|
|
825
|
+
rewrittenCount: 0,
|
|
826
|
+
downloadedCount: 0,
|
|
827
|
+
failedCount: 0,
|
|
828
|
+
remoteFetchFailed: 0,
|
|
829
|
+
ytDlpDownloadedCount: 0,
|
|
830
|
+
ytDlpFailedCount: 0,
|
|
831
|
+
entries: [],
|
|
832
|
+
};
|
|
833
|
+
const result = {
|
|
834
|
+
sourcePath: absoluteSourcePath,
|
|
835
|
+
preparedContent: sourceContent,
|
|
836
|
+
changed: false,
|
|
837
|
+
remoteImageCount: 0,
|
|
838
|
+
remoteYtDlpCount: 0,
|
|
839
|
+
remoteFetchTotal: 0,
|
|
840
|
+
rewrittenCount: 0,
|
|
841
|
+
downloadedCount: 0,
|
|
842
|
+
failedCount: 0,
|
|
843
|
+
remoteFetchFailed: 0,
|
|
844
|
+
ytDlpDownloadedCount: 0,
|
|
845
|
+
ytDlpFailedCount: 0,
|
|
846
|
+
prepareDir,
|
|
847
|
+
assetsDir,
|
|
848
|
+
logFilePath,
|
|
849
|
+
logEntries,
|
|
850
|
+
logFileContent,
|
|
851
|
+
};
|
|
852
|
+
return result;
|
|
853
|
+
}
|
|
854
|
+
const imageDownloadResults = new Map();
|
|
855
|
+
const ytDlpResults = new Map();
|
|
856
|
+
if (options.enabled) {
|
|
857
|
+
const uniqueRemoteUrls = [...new Set(remoteImageMatches.map((item) => item.originalUrl))];
|
|
858
|
+
await Promise.all(uniqueRemoteUrls.map(async (url) => {
|
|
859
|
+
const result = await downloadRemoteImageWithRetry(url, assetsDir, timeoutMs, maxRetries, backoffBaseMs, backoffMaxMs, backoffJitterRatio);
|
|
860
|
+
imageDownloadResults.set(url, result);
|
|
861
|
+
}));
|
|
862
|
+
}
|
|
863
|
+
if (canRunYtDlp && ytDlpPath) {
|
|
864
|
+
const uniqueYtDlpUrls = [...new Set(ytDlpLineMatches.map((item) => item.normalizedUrl))];
|
|
865
|
+
for (const url of uniqueYtDlpUrls) {
|
|
866
|
+
const result = await downloadMediaWithYtDlpRetry(url, ytDlpPath, ytDlpCookiesPath, assetsDir, ytDlpTimeoutMs, maxRetries, backoffBaseMs, backoffMaxMs, backoffJitterRatio);
|
|
867
|
+
ytDlpResults.set(url, result);
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
const replacements = [];
|
|
871
|
+
let imageDownloadedCount = 0;
|
|
872
|
+
let imageFailedCount = 0;
|
|
873
|
+
let ytDlpDownloadedCount = 0;
|
|
874
|
+
let ytDlpFailedCount = 0;
|
|
875
|
+
for (const match of remoteImageMatches) {
|
|
876
|
+
if (!options.enabled) {
|
|
877
|
+
pushLogEntry({
|
|
878
|
+
status: 'skipped-disabled',
|
|
879
|
+
sourceType: 'image',
|
|
880
|
+
originalUrl: match.originalUrl,
|
|
881
|
+
});
|
|
882
|
+
continue;
|
|
883
|
+
}
|
|
884
|
+
const downloadResult = imageDownloadResults.get(match.originalUrl);
|
|
885
|
+
if (!downloadResult?.ok || !downloadResult.localPath) {
|
|
886
|
+
imageFailedCount += 1;
|
|
887
|
+
const failedLog = {
|
|
888
|
+
status: 'failed',
|
|
889
|
+
sourceType: 'image',
|
|
890
|
+
originalUrl: match.originalUrl,
|
|
891
|
+
error: downloadResult?.error ?? 'download failed',
|
|
892
|
+
};
|
|
893
|
+
if (downloadResult) {
|
|
894
|
+
failedLog.attempts = downloadResult.attempts;
|
|
895
|
+
failedLog.retries = downloadResult.retries;
|
|
896
|
+
failedLog.retryable = downloadResult.retryable;
|
|
897
|
+
if (downloadResult.httpStatus !== undefined) {
|
|
898
|
+
failedLog.httpStatus = downloadResult.httpStatus;
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
pushLogEntry(failedLog);
|
|
902
|
+
continue;
|
|
903
|
+
}
|
|
904
|
+
imageDownloadedCount += 1;
|
|
905
|
+
const pathForMarkdown = toMarkdownLocalPath(downloadResult.localPath);
|
|
906
|
+
const urlValue = match.wrappedByAngles ? `<${pathForMarkdown}>` : pathForMarkdown;
|
|
907
|
+
replacements.push({
|
|
908
|
+
start: match.start,
|
|
909
|
+
end: match.end,
|
|
910
|
+
replacement: `${match.prefix}${urlValue}${match.suffix}`,
|
|
911
|
+
});
|
|
912
|
+
const downloadedImageLog = {
|
|
913
|
+
status: 'downloaded',
|
|
914
|
+
sourceType: 'image',
|
|
915
|
+
originalUrl: match.originalUrl,
|
|
916
|
+
attempts: downloadResult.attempts,
|
|
917
|
+
retries: downloadResult.retries,
|
|
918
|
+
retryable: downloadResult.retryable,
|
|
919
|
+
localPath: downloadResult.localPath,
|
|
920
|
+
};
|
|
921
|
+
if (downloadResult.httpStatus !== undefined) {
|
|
922
|
+
downloadedImageLog.httpStatus = downloadResult.httpStatus;
|
|
923
|
+
}
|
|
924
|
+
if (downloadResult.contentType) {
|
|
925
|
+
downloadedImageLog.contentType = downloadResult.contentType;
|
|
926
|
+
}
|
|
927
|
+
if (downloadResult.size !== undefined) {
|
|
928
|
+
downloadedImageLog.size = downloadResult.size;
|
|
929
|
+
}
|
|
930
|
+
pushLogEntry(downloadedImageLog);
|
|
931
|
+
}
|
|
932
|
+
for (const match of ytDlpLineMatches) {
|
|
933
|
+
if (!canRunYtDlp) {
|
|
934
|
+
pushLogEntry({
|
|
935
|
+
status: 'skipped-disabled',
|
|
936
|
+
sourceType: 'yt_dlp',
|
|
937
|
+
originalUrl: match.originalUrl,
|
|
938
|
+
});
|
|
939
|
+
continue;
|
|
940
|
+
}
|
|
941
|
+
const downloadResult = ytDlpResults.get(match.normalizedUrl);
|
|
942
|
+
if (!downloadResult?.ok || !downloadResult.files || downloadResult.files.length === 0) {
|
|
943
|
+
ytDlpFailedCount += 1;
|
|
944
|
+
const failedLog = {
|
|
945
|
+
status: 'failed',
|
|
946
|
+
sourceType: 'yt_dlp',
|
|
947
|
+
originalUrl: match.originalUrl,
|
|
948
|
+
error: downloadResult?.error ?? 'yt-dlp download failed',
|
|
949
|
+
};
|
|
950
|
+
if (downloadResult) {
|
|
951
|
+
failedLog.attempts = downloadResult.attempts;
|
|
952
|
+
failedLog.retries = downloadResult.retries;
|
|
953
|
+
failedLog.retryable = downloadResult.retryable;
|
|
954
|
+
if (downloadResult.httpStatus !== undefined) {
|
|
955
|
+
failedLog.httpStatus = downloadResult.httpStatus;
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
pushLogEntry(failedLog);
|
|
959
|
+
continue;
|
|
960
|
+
}
|
|
961
|
+
ytDlpDownloadedCount += 1;
|
|
962
|
+
replacements.push({
|
|
963
|
+
start: match.start,
|
|
964
|
+
end: match.end,
|
|
965
|
+
replacement: formatYtDlpReplacementMarkdown(downloadResult.files),
|
|
966
|
+
});
|
|
967
|
+
const first = downloadResult.files[0];
|
|
968
|
+
let totalBytes = 0;
|
|
969
|
+
for (const item of downloadResult.files) {
|
|
970
|
+
if (await tryAccess(item.localPath)) {
|
|
971
|
+
totalBytes += await fileSize(item.localPath);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
const downloadedYtDlpLog = {
|
|
975
|
+
status: 'downloaded',
|
|
976
|
+
sourceType: 'yt_dlp',
|
|
977
|
+
originalUrl: match.originalUrl,
|
|
978
|
+
attempts: downloadResult.attempts,
|
|
979
|
+
retries: downloadResult.retries,
|
|
980
|
+
retryable: downloadResult.retryable,
|
|
981
|
+
size: totalBytes,
|
|
982
|
+
contentType: 'video/*',
|
|
983
|
+
};
|
|
984
|
+
if (first?.localPath) {
|
|
985
|
+
downloadedYtDlpLog.localPath = first.localPath;
|
|
986
|
+
}
|
|
987
|
+
pushLogEntry(downloadedYtDlpLog);
|
|
988
|
+
}
|
|
989
|
+
let preparedContent = sourceContent;
|
|
990
|
+
replacements
|
|
991
|
+
.sort((a, b) => b.start - a.start)
|
|
992
|
+
.forEach((item) => {
|
|
993
|
+
preparedContent = preparedContent.slice(0, item.start) + item.replacement + preparedContent.slice(item.end);
|
|
994
|
+
});
|
|
995
|
+
const remoteImageCount = remoteImageMatches.length;
|
|
996
|
+
const remoteYtDlpCount = ytDlpLineMatches.length;
|
|
997
|
+
const remoteFetchTotal = (options.enabled ? remoteImageCount : 0) + (canRunYtDlp ? remoteYtDlpCount : 0);
|
|
998
|
+
const downloadedCount = imageDownloadedCount + ytDlpDownloadedCount;
|
|
999
|
+
const failedCount = imageFailedCount + ytDlpFailedCount;
|
|
1000
|
+
const rewrittenCount = replacements.length;
|
|
1001
|
+
const logFileContent = {
|
|
1002
|
+
generatedAt: new Date().toISOString(),
|
|
1003
|
+
sourcePath: absoluteSourcePath,
|
|
1004
|
+
enabled: options.enabled,
|
|
1005
|
+
retryPolicy: {
|
|
1006
|
+
maxRetries,
|
|
1007
|
+
backoffBaseMs,
|
|
1008
|
+
backoffMaxMs,
|
|
1009
|
+
backoffJitterRatio,
|
|
1010
|
+
},
|
|
1011
|
+
ytDlp: {
|
|
1012
|
+
enabled: canRunYtDlp,
|
|
1013
|
+
configuredInFrontmatter: ytDlpFrontmatterConfig.configured,
|
|
1014
|
+
prefixes: ytDlpFrontmatterConfig.prefixes,
|
|
1015
|
+
executable: ytDlpPath ?? null,
|
|
1016
|
+
cookiesPath: ytDlpCookiesPath ?? null,
|
|
1017
|
+
timeoutMs: ytDlpTimeoutMs,
|
|
1018
|
+
},
|
|
1019
|
+
remoteImageCount,
|
|
1020
|
+
remoteYtDlpCount,
|
|
1021
|
+
remoteFetchTotal,
|
|
1022
|
+
rewrittenCount,
|
|
1023
|
+
downloadedCount,
|
|
1024
|
+
failedCount,
|
|
1025
|
+
remoteFetchFailed: failedCount,
|
|
1026
|
+
ytDlpDownloadedCount,
|
|
1027
|
+
ytDlpFailedCount,
|
|
1028
|
+
entries: logEntries,
|
|
1029
|
+
};
|
|
1030
|
+
return {
|
|
1031
|
+
sourcePath: absoluteSourcePath,
|
|
1032
|
+
preparedContent,
|
|
1033
|
+
changed: preparedContent !== sourceContent,
|
|
1034
|
+
remoteImageCount,
|
|
1035
|
+
remoteYtDlpCount,
|
|
1036
|
+
remoteFetchTotal,
|
|
1037
|
+
rewrittenCount,
|
|
1038
|
+
downloadedCount,
|
|
1039
|
+
failedCount,
|
|
1040
|
+
remoteFetchFailed: failedCount,
|
|
1041
|
+
ytDlpDownloadedCount,
|
|
1042
|
+
ytDlpFailedCount,
|
|
1043
|
+
prepareDir,
|
|
1044
|
+
assetsDir,
|
|
1045
|
+
logFilePath,
|
|
1046
|
+
logEntries,
|
|
1047
|
+
logFileContent,
|
|
1048
|
+
};
|
|
1049
|
+
}
|