@aholbreich/agent-skills 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,507 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const fsp = require('fs/promises');
5
+ const os = require('os');
6
+ const path = require('path');
7
+ const { spawn } = require('child_process');
8
+ const { parseSize, formatBytes, slugify, safeName, extractPageId, sameVersion } = require('./lib');
9
+
10
+ function usage() {
11
+ console.log(`Usage: confluence-browser-fetch <URL|PAGE-ID> [...] [options]
12
+
13
+ Fetch Confluence Cloud pages through an authenticated Chrome browser session.
14
+ No Atlassian API token is required; useful for Microsoft/SSO environments.
15
+
16
+ Options:
17
+ --site URL Atlassian site base URL (or set CONFLUENCE_SITE), e.g. https://example.atlassian.net
18
+ --raw-dir DIR Output raw directory (default: CONFLUENCE_RAW_DIR or ./raw)
19
+ --space KEY Resolve --title inside this space, or constrain CQL
20
+ --title TITLE Resolve and fetch page by title; repeatable with --space
21
+ --cql CQL Search Confluence with CQL and fetch matching pages
22
+ --descendants Fetch descendant pages of each requested page
23
+ --max-search-results N Max pages to add per CQL search (default: 200)
24
+ --max-attachment-size S Skip attachment downloads larger than S (default: 5mb; use unlimited to disable)
25
+ --force Re-fetch even when local page version is current
26
+ --no-skip-unchanged Disable version/timestamp skip check
27
+ --no-attachments Do not download attachments
28
+ --no-browser-html Do not save rendered browser HTML
29
+ --retries N HTTP retry count for transient failures (default: 3)
30
+ --request-timeout SEC Per-request timeout (default: 60)
31
+ --wait SEC Wait time for SSO/session (default: 900)
32
+ --port PORT Chrome DevTools port (default: 9224)
33
+ --profile-dir DIR Chrome profile dir (default: ~/.local/share/confluence-browser-fetch-chrome)
34
+ --help Show this help
35
+
36
+ Examples:
37
+ confluence-browser-fetch 'https://example.atlassian.net/wiki/spaces/ABC/pages/123456/Page+Title' --site https://example.atlassian.net --raw-dir ./raw
38
+ confluence-browser-fetch 123456 --site https://example.atlassian.net --raw-dir ./raw
39
+ confluence-browser-fetch --space ABC --title 'Architecture Overview' --raw-dir ./raw
40
+ confluence-browser-fetch --cql 'space = ABC and type = page and text ~ "billing"' --raw-dir ./raw
41
+ confluence-browser-fetch 123456 --descendants --raw-dir ./raw
42
+ `);
43
+ }
44
+
45
+ const opts = {
46
+ site: process.env.CONFLUENCE_SITE || '',
47
+ rawDir: process.env.CONFLUENCE_RAW_DIR || path.resolve(process.cwd(), 'raw'),
48
+ port: Number(process.env.CONFLUENCE_CHROME_DEBUG_PORT || 9224),
49
+ waitSec: Number(process.env.CONFLUENCE_FETCH_WAIT_SEC || 900),
50
+ profileDir: process.env.CONFLUENCE_CHROME_PROFILE || path.join(os.homedir(), '.local/share/confluence-browser-fetch-chrome'),
51
+ maxSearchResults: Number(process.env.CONFLUENCE_MAX_SEARCH_RESULTS || 200),
52
+ retries: Number(process.env.CONFLUENCE_RETRIES || 3),
53
+ requestTimeoutSec: Number(process.env.CONFLUENCE_REQUEST_TIMEOUT_SEC || 60),
54
+ maxAttachmentBytes: parseSize(process.env.CONFLUENCE_MAX_ATTACHMENT_SIZE || process.env.CONFLUENCE_MAX_ATTACHMENT_BYTES || '5mb'),
55
+ skipUnchanged: process.env.CONFLUENCE_SKIP_UNCHANGED !== '0',
56
+ force: false,
57
+ attachments: true,
58
+ browserHtml: true,
59
+ descendants: false,
60
+ cqls: [],
61
+ titles: [],
62
+ space: null,
63
+ };
64
+ const inputs = [];
65
+
66
+ for (let i = 2; i < process.argv.length; i++) {
67
+ const a = process.argv[i];
68
+ if (a === '-h' || a === '--help') { usage(); process.exit(0); }
69
+ else if (a === '--site') opts.site = process.argv[++i];
70
+ else if (a === '--raw-dir') opts.rawDir = process.argv[++i];
71
+ else if (a === '--space') opts.space = process.argv[++i];
72
+ else if (a === '--title') opts.titles.push(process.argv[++i]);
73
+ else if (a === '--cql') opts.cqls.push(process.argv[++i]);
74
+ else if (a === '--descendants') opts.descendants = true;
75
+ else if (a === '--max-search-results') opts.maxSearchResults = Number(process.argv[++i]);
76
+ else if (a === '--max-attachment-size') opts.maxAttachmentBytes = parseSize(process.argv[++i]);
77
+ else if (a === '--force') opts.force = true;
78
+ else if (a === '--no-skip-unchanged') opts.skipUnchanged = false;
79
+ else if (a === '--retries') opts.retries = Number(process.argv[++i]);
80
+ else if (a === '--request-timeout') opts.requestTimeoutSec = Number(process.argv[++i]);
81
+ else if (a === '--no-attachments') opts.attachments = false;
82
+ else if (a === '--no-browser-html') opts.browserHtml = false;
83
+ else if (a === '--wait') opts.waitSec = Number(process.argv[++i]);
84
+ else if (a === '--port') opts.port = Number(process.argv[++i]);
85
+ else if (a === '--profile-dir') opts.profileDir = process.argv[++i];
86
+ else if (!a.startsWith('-')) inputs.push(a);
87
+ else { console.error(`Unknown argument: ${a}`); process.exit(2); }
88
+ }
89
+
90
+ if (!inputs.length && !opts.titles.length && !opts.cqls.length) { usage(); process.exit(2); }
91
+ opts.site = opts.site.replace(/\/$/, '');
92
+ if (!opts.site) {
93
+ console.error('Missing Atlassian site. Pass --site https://example.atlassian.net or set CONFLUENCE_SITE.');
94
+ process.exit(2);
95
+ }
96
+ opts.rawDir = path.resolve(opts.rawDir);
97
+ const wikiBase = `${opts.site}/wiki`;
98
+ const sleep = ms => new Promise(r => setTimeout(r, ms));
99
+
100
+ async function endpoint(pathname) {
101
+ const res = await fetch(`http://127.0.0.1:${opts.port}${pathname}`);
102
+ if (!res.ok) throw new Error(`DevTools HTTP ${res.status} for ${pathname}`);
103
+ return res.json();
104
+ }
105
+
106
+ async function devtoolsReady() {
107
+ try { await endpoint('/json/version'); return true; } catch { return false; }
108
+ }
109
+
110
+ async function waitDevtools() {
111
+ for (let i = 0; i < 80; i++) {
112
+ if (await devtoolsReady()) return;
113
+ await sleep(250);
114
+ }
115
+ throw new Error('Chrome DevTools endpoint did not start');
116
+ }
117
+
118
+ function launchChrome(url) {
119
+ const chrome = process.env.CHROME || '/usr/bin/google-chrome';
120
+ const args = [
121
+ `--remote-debugging-port=${opts.port}`,
122
+ '--remote-debugging-address=127.0.0.1',
123
+ '--remote-allow-origins=*',
124
+ `--user-data-dir=${opts.profileDir}`,
125
+ '--no-first-run',
126
+ '--no-default-browser-check',
127
+ url,
128
+ ];
129
+ const child = spawn(chrome, args, { detached: true, stdio: 'ignore' });
130
+ child.unref();
131
+ }
132
+
133
+ async function ensureBrowser(openUrl) {
134
+ if (!(await devtoolsReady())) {
135
+ console.log(`Opening Chrome with reusable profile: ${opts.profileDir}`);
136
+ launchChrome(openUrl || wikiBase);
137
+ } else {
138
+ console.log(`Reusing Chrome DevTools on port ${opts.port}`);
139
+ }
140
+ await waitDevtools();
141
+ }
142
+
143
+ async function getPageWsUrl() {
144
+ const list = await endpoint('/json/list');
145
+ const pages = list.filter(t => t.type === 'page' && t.webSocketDebuggerUrl);
146
+ const host = new URL(opts.site).host;
147
+ const preferred = pages.find(t => (t.url || '').includes(host)) || pages[0];
148
+ return preferred && preferred.webSocketDebuggerUrl;
149
+ }
150
+
151
+ function connectCdp(wsUrl) {
152
+ return new Promise((resolve, reject) => {
153
+ const ws = new WebSocket(wsUrl);
154
+ let id = 0;
155
+ const pending = new Map();
156
+ const failTimer = setTimeout(() => reject(new Error('CDP websocket timeout')), 10000);
157
+
158
+ ws.addEventListener('open', () => {
159
+ clearTimeout(failTimer);
160
+ resolve({
161
+ send(method, params = {}) {
162
+ return new Promise((res, rej) => {
163
+ const msgId = ++id;
164
+ pending.set(msgId, { res, rej });
165
+ ws.send(JSON.stringify({ id: msgId, method, params }));
166
+ });
167
+ },
168
+ close() { try { ws.close(); } catch {} },
169
+ });
170
+ });
171
+
172
+ ws.addEventListener('message', ev => {
173
+ let data = ev.data;
174
+ if (typeof data !== 'string') data = Buffer.from(data).toString('utf8');
175
+ const msg = JSON.parse(data);
176
+ if (!msg.id || !pending.has(msg.id)) return;
177
+ const { res, rej } = pending.get(msg.id);
178
+ pending.delete(msg.id);
179
+ if (msg.error) rej(new Error(`${msg.error.message || 'CDP error'} ${JSON.stringify(msg.error)}`));
180
+ else res(msg.result);
181
+ });
182
+
183
+ ws.addEventListener('error', err => reject(err));
184
+ });
185
+ }
186
+
187
+ async function getCookieHeader() {
188
+ const wsUrl = await getPageWsUrl();
189
+ if (!wsUrl) return '';
190
+ const cdp = await connectCdp(wsUrl);
191
+ try {
192
+ await cdp.send('Network.enable');
193
+ const host = new URL(opts.site).host;
194
+ const result = await cdp.send('Network.getCookies', { urls: [`${opts.site}/`, wikiBase] });
195
+ const cookies = (result.cookies || [])
196
+ .filter(c => c.domain && (c.domain === host || c.domain.endsWith(`.${host}`)))
197
+ .map(c => `${c.name}=${c.value}`);
198
+ return cookies.join('; ');
199
+ } finally {
200
+ cdp.close();
201
+ }
202
+ }
203
+
204
+ function shouldRetry(status) {
205
+ return status === 408 || status === 429 || status >= 500;
206
+ }
207
+
208
+ async function fetchWithRetry(url, init = {}, label = url) {
209
+ let lastErr;
210
+ const attempts = Math.max(1, opts.retries + 1);
211
+ for (let attempt = 1; attempt <= attempts; attempt++) {
212
+ const controller = new AbortController();
213
+ const timer = setTimeout(() => controller.abort(), Math.max(1, opts.requestTimeoutSec) * 1000);
214
+ try {
215
+ const res = await fetch(url, { ...init, signal: controller.signal });
216
+ clearTimeout(timer);
217
+ if (!shouldRetry(res.status) || attempt === attempts) return res;
218
+ lastErr = new Error(`${label} HTTP ${res.status}`);
219
+ await sleep(Math.min(30000, 1000 * 2 ** (attempt - 1)));
220
+ } catch (e) {
221
+ clearTimeout(timer);
222
+ lastErr = e;
223
+ if (attempt === attempts) throw e;
224
+ await sleep(Math.min(30000, 1000 * 2 ** (attempt - 1)));
225
+ }
226
+ }
227
+ throw lastErr;
228
+ }
229
+
230
+ async function fetchText(url, cookie, accept) {
231
+ const res = await fetchWithRetry(url, {
232
+ redirect: 'follow',
233
+ headers: {
234
+ Cookie: cookie,
235
+ Accept: accept || '*/*',
236
+ 'User-Agent': 'confluence-browser-fetch/1.0',
237
+ },
238
+ }, url);
239
+ return { status: res.status, contentType: res.headers.get('content-type') || '', text: await res.text() };
240
+ }
241
+
242
+ async function fetchJson(url, cookie) {
243
+ const result = await fetchText(url, cookie, 'application/json');
244
+ let json = null;
245
+ try { json = JSON.parse(result.text); } catch {}
246
+ return { ...result, json };
247
+ }
248
+
249
+ async function getCookieWithWait(openUrl) {
250
+ await ensureBrowser(openUrl || wikiBase);
251
+ console.log(`If prompted in Chrome, complete SSO for: ${openUrl || wikiBase}`);
252
+ const deadline = Date.now() + opts.waitSec * 1000;
253
+ let last = '';
254
+ while (Date.now() < deadline) {
255
+ try {
256
+ const cookie = await getCookieHeader();
257
+ if (cookie) return cookie;
258
+ last = 'no Atlassian cookies yet';
259
+ } catch (e) { last = e.message; }
260
+ process.stdout.write(`\r${new Date().toLocaleTimeString()} ${last.padEnd(120).slice(0, 120)}`);
261
+ await sleep(3000);
262
+ }
263
+ process.stdout.write('\n');
264
+ throw new Error(`Could not get browser cookies. Last result: ${last}`);
265
+ }
266
+
267
+ function cqlQuote(s) {
268
+ return `"${String(s).replace(/"/g, '\\"')}"`;
269
+ }
270
+
271
+ async function searchCql(cql, cookie) {
272
+ const found = [];
273
+ let start = 0;
274
+ const pageSize = Math.min(100, Math.max(1, opts.maxSearchResults || 200));
275
+ while (found.length < opts.maxSearchResults) {
276
+ const limit = Math.min(pageSize, opts.maxSearchResults - found.length);
277
+ const url = `${wikiBase}/rest/api/content/search?cql=${encodeURIComponent(cql)}&limit=${limit}&start=${start}&expand=space,version`;
278
+ const result = await fetchJson(url, cookie);
279
+ if (result.status !== 200 || !result.json || !Array.isArray(result.json.results)) {
280
+ throw new Error(`CQL failed HTTP ${result.status}: ${(result.text || '').slice(0, 300)}`);
281
+ }
282
+ for (const item of result.json.results) if (item.id) found.push(String(item.id));
283
+ if (!result.json._links || !result.json._links.next || !result.json.results.length) break;
284
+ start += result.json.results.length;
285
+ }
286
+ return [...new Set(found)];
287
+ }
288
+
289
+ async function resolveInputToPageId(input, cookie) {
290
+ const direct = extractPageId(input);
291
+ if (direct) return direct;
292
+
293
+ // Last resort for short/tiny links: fetch browser HTML and scan for content id markers.
294
+ if (/^https?:\/\//.test(String(input))) {
295
+ const html = await fetchText(input, cookie, 'text/html');
296
+ const m = html.text.match(/(?:ajs-page-id|content-id|contentId|pageId)["'=:\s]+(\d+)/i);
297
+ if (m) return m[1];
298
+ throw new Error(`Could not extract page id from URL: ${input}`);
299
+ }
300
+
301
+ throw new Error(`Input is not a page id or supported Confluence URL: ${input}`);
302
+ }
303
+
304
+ async function fetchPageJson(pageId, cookie) {
305
+ const expand = 'body.storage,body.view,version,space,ancestors,metadata.labels,children.attachment,history';
306
+ const url = `${wikiBase}/rest/api/content/${encodeURIComponent(pageId)}?expand=${encodeURIComponent(expand)}`;
307
+ const result = await fetchJson(url, cookie);
308
+ if (result.status !== 200 || !result.json || !result.json.id) {
309
+ throw new Error(`Page ${pageId} failed HTTP ${result.status}: ${(result.text || '').slice(0, 300)}`);
310
+ }
311
+ return { url, page: result.json };
312
+ }
313
+
314
+ function pageWebUrl(page) {
315
+ const webui = page && page._links && page._links.webui;
316
+ return webui ? `${wikiBase}${webui}` : `${wikiBase}/pages/viewpage.action?pageId=${page.id}`;
317
+ }
318
+
319
+ function outputDirForPage(page) {
320
+ const space = page.space && (page.space.key || page.space.name) || 'unknown-space';
321
+ return path.join(opts.rawDir, 'confluence', slugify(space), `${page.id}-${slugify(page.title)}`);
322
+ }
323
+
324
+ async function downloadAttachments(page, cookie, outDir) {
325
+ const manifest = [];
326
+ const attachDir = path.join(outDir, 'attachments');
327
+ await fsp.mkdir(attachDir, { recursive: true });
328
+
329
+ let url = `${wikiBase}/rest/api/content/${encodeURIComponent(page.id)}/child/attachment?limit=200&expand=version,metadata`;
330
+ while (url) {
331
+ const result = await fetchJson(url, cookie);
332
+ if (result.status !== 200 || !result.json) {
333
+ manifest.push({ error: `attachment listing HTTP ${result.status}`, url });
334
+ break;
335
+ }
336
+ for (const att of result.json.results || []) {
337
+ const download = att._links && att._links.download;
338
+ if (!download) continue;
339
+ const fullUrl = download.startsWith('http') ? download : `${download.startsWith('/wiki/') ? opts.site : wikiBase}${download}`;
340
+ const filename = safeName(att.title || `${att.id}.bin`);
341
+ const fileSize = att.extensions && typeof att.extensions.fileSize === 'number' ? att.extensions.fileSize : Number(att.extensions && att.extensions.fileSize);
342
+ const baseEntry = {
343
+ id: att.id,
344
+ filename,
345
+ url: fullUrl,
346
+ mediaType: att.metadata && att.metadata.mediaType,
347
+ fileSize: Number.isFinite(fileSize) ? fileSize : att.extensions && att.extensions.fileSize,
348
+ version: att.version,
349
+ };
350
+ if (Number.isFinite(fileSize) && fileSize > opts.maxAttachmentBytes) {
351
+ console.log(`Attachment ${filename} ... skipped (${formatBytes(fileSize)} > ${formatBytes(opts.maxAttachmentBytes)})`);
352
+ manifest.push({
353
+ ...baseEntry,
354
+ skipped: true,
355
+ reason: 'larger-than-max-attachment-size',
356
+ maxAttachmentBytes: opts.maxAttachmentBytes,
357
+ });
358
+ continue;
359
+ }
360
+ const target = path.join(attachDir, filename);
361
+ process.stdout.write(`Attachment ${filename} ... `);
362
+ const res = await fetchWithRetry(fullUrl, { redirect: 'follow', headers: { Cookie: cookie, 'User-Agent': 'confluence-browser-fetch/1.0' } }, `attachment ${filename}`);
363
+ if (!res.ok) {
364
+ console.log(`HTTP ${res.status}`);
365
+ manifest.push({ ...baseEntry, status: res.status });
366
+ continue;
367
+ }
368
+ const buf = Buffer.from(await res.arrayBuffer());
369
+ await fsp.writeFile(target, buf);
370
+ console.log(`${buf.length} bytes`);
371
+ manifest.push({ ...baseEntry, path: path.relative(outDir, target), downloadedBytes: buf.length, status: res.status });
372
+ }
373
+ const next = result.json._links && result.json._links.next;
374
+ url = next ? `${wikiBase}${next}` : null;
375
+ }
376
+
377
+ await fsp.writeFile(path.join(outDir, 'attachments.json'), JSON.stringify(manifest, null, 2));
378
+ return manifest.length;
379
+ }
380
+
381
+ async function fetchDescendants(pageId, cookie) {
382
+ const ids = [];
383
+ let url = `${wikiBase}/rest/api/content/${encodeURIComponent(pageId)}/descendant/page?limit=200&expand=space,version`;
384
+ while (url) {
385
+ const result = await fetchJson(url, cookie);
386
+ if (result.status !== 200 || !result.json) throw new Error(`Descendants failed HTTP ${result.status}: ${(result.text || '').slice(0, 300)}`);
387
+ for (const page of result.json.results || []) if (page.id) ids.push(String(page.id));
388
+ const next = result.json._links && result.json._links.next;
389
+ url = next ? `${wikiBase}${next}` : null;
390
+ }
391
+ return [...new Set(ids)];
392
+ }
393
+
394
+ async function readExistingMetadata(outDir) {
395
+ try { return JSON.parse(await fsp.readFile(path.join(outDir, 'metadata.json'), 'utf8')); }
396
+ catch { return null; }
397
+ }
398
+
399
+ async function fetchOnePage(pageId, cookie) {
400
+ const { url: restUrl, page } = await fetchPageJson(pageId, cookie);
401
+ const outDir = outputDirForPage(page);
402
+ await fsp.mkdir(outDir, { recursive: true });
403
+
404
+ const existing = await readExistingMetadata(outDir);
405
+ if (opts.skipUnchanged && !opts.force && sameVersion(existing, page)) {
406
+ console.log(`Skipped unchanged ${page.title} (${page.id}) version ${page.version && page.version.number} -> ${outDir}`);
407
+ return { page, outDir, skipped: true };
408
+ }
409
+
410
+ await fsp.writeFile(path.join(outDir, 'page.json'), JSON.stringify(page, null, 2));
411
+ await fsp.writeFile(path.join(outDir, 'page.storage.html'), (page.body && page.body.storage && page.body.storage.value) || '');
412
+ await fsp.writeFile(path.join(outDir, 'page.view.html'), (page.body && page.body.view && page.body.view.value) || '');
413
+
414
+ const webUrl = pageWebUrl(page);
415
+ let browserStatus = 0;
416
+ if (opts.browserHtml) {
417
+ const html = await fetchText(webUrl, cookie, 'text/html');
418
+ browserStatus = html.status;
419
+ await fsp.writeFile(path.join(outDir, 'page.browser.html'), html.text);
420
+ }
421
+
422
+ let attachmentCount = 0;
423
+ if (opts.attachments) attachmentCount = await downloadAttachments(page, cookie, outDir);
424
+
425
+ const meta = {
426
+ fetchedAt: new Date().toISOString(),
427
+ id: page.id,
428
+ title: page.title,
429
+ type: page.type,
430
+ status: page.status,
431
+ space: page.space && { key: page.space.key, name: page.space.name },
432
+ version: page.version,
433
+ webUrl,
434
+ restUrl,
435
+ browserStatus,
436
+ attachmentCount,
437
+ };
438
+ await fsp.writeFile(path.join(outDir, 'metadata.json'), JSON.stringify(meta, null, 2));
439
+
440
+ console.log(`Saved ${page.title} (${page.id}) -> ${outDir}`);
441
+ return { page, outDir, skipped: false };
442
+ }
443
+
444
+ async function main() {
445
+ await fsp.mkdir(opts.rawDir, { recursive: true });
446
+ const openUrl = inputs.find(i => /^https?:\/\//.test(i)) || wikiBase;
447
+ const cookie = await getCookieWithWait(openUrl);
448
+
449
+ const queue = [];
450
+ const failed = [];
451
+ const searches = [];
452
+
453
+ for (const input of inputs) {
454
+ try { queue.push({ id: await resolveInputToPageId(input, cookie), from: input }); }
455
+ catch (e) { failed.push({ input, error: e.message }); console.error(`FAILED resolving ${input}: ${e.message}`); }
456
+ }
457
+
458
+ for (const title of opts.titles) {
459
+ const cql = `${opts.space ? `space = ${cqlQuote(opts.space)} and ` : ''}type = page and title = ${cqlQuote(title)}`;
460
+ opts.cqls.push(cql);
461
+ }
462
+
463
+ for (const cql of opts.cqls) {
464
+ console.log(`Searching CQL: ${cql}`);
465
+ try {
466
+ const ids = await searchCql(cql, cookie);
467
+ searches.push({ cql, ids });
468
+ console.log(`CQL matched ${ids.length} page(s): ${ids.join(' ') || '(none)'}`);
469
+ for (const id of ids) queue.push({ id, from: `CQL: ${cql}` });
470
+ } catch (e) {
471
+ failed.push({ input: `CQL: ${cql}`, error: e.message });
472
+ console.error(`CQL FAILED: ${e.message}`);
473
+ }
474
+ }
475
+
476
+ const seen = new Set();
477
+ const fetched = [];
478
+ for (let i = 0; i < queue.length; i++) {
479
+ const item = queue[i];
480
+ if (!item.id || seen.has(item.id)) continue;
481
+ seen.add(item.id);
482
+ console.log(`\n===== Fetching Confluence page ${item.id}${item.from ? ` (${item.from})` : ''} =====`);
483
+ try {
484
+ const { page, outDir, skipped } = await fetchOnePage(item.id, cookie);
485
+ fetched.push({ id: page.id, title: page.title, outDir, skipped });
486
+ if (opts.descendants) {
487
+ const descendants = await fetchDescendants(page.id, cookie);
488
+ console.log(`Descendants: ${descendants.join(' ') || '(none)'}`);
489
+ for (const id of descendants) if (!seen.has(id) && !queue.some(q => q.id === id)) queue.push({ id, from: `descendant of ${page.id}` });
490
+ }
491
+ } catch (e) {
492
+ failed.push({ input: item.id, error: e.message });
493
+ console.error(`FAILED page ${item.id}: ${e.message}`);
494
+ }
495
+ }
496
+
497
+ const runMeta = { fetchedAt: new Date().toISOString(), site: opts.site, rawDir: opts.rawDir, inputs, searches, fetched, failed };
498
+ await fsp.writeFile(path.join(opts.rawDir, 'confluence-browser-fetch-run.json'), JSON.stringify(runMeta, null, 2));
499
+
500
+ if (failed.length) console.error(`\nCompleted with ${failed.length} failure(s). See ${path.join(opts.rawDir, 'confluence-browser-fetch-run.json')}`);
501
+ else console.log(`\nCompleted successfully. See ${path.join(opts.rawDir, 'confluence-browser-fetch-run.json')}`);
502
+ }
503
+
504
+ main().catch(err => {
505
+ console.error(`\nERROR: ${err.stack || err.message}`);
506
+ process.exit(1);
507
+ });
@@ -0,0 +1,83 @@
1
+ 'use strict';
2
+
3
+ const DEFAULT_MAX_ATTACHMENT_BYTES = 5 * 1024 * 1024;
4
+
5
+ function parseSize(value) {
6
+ if (value === undefined || value === null || value === '') return DEFAULT_MAX_ATTACHMENT_BYTES;
7
+ const s = String(value).trim().toLowerCase();
8
+ if (['unlimited', 'infinite', 'inf', 'none', 'no-limit'].includes(s)) return Infinity;
9
+ const m = s.match(/^([0-9]+(?:\.[0-9]+)?)\s*(b|bytes?|k|kb|kib|m|mb|mib|g|gb|gib)?$/);
10
+ if (!m) throw new Error(`Invalid size: ${value}`);
11
+ const n = Number(m[1]);
12
+ const unit = m[2] || 'b';
13
+ const factor = unit.startsWith('g') ? 1024 ** 3 : unit.startsWith('m') ? 1024 ** 2 : unit.startsWith('k') ? 1024 : 1;
14
+ return Math.floor(n * factor);
15
+ }
16
+
17
+ function formatBytes(n) {
18
+ if (n === Infinity) return 'unlimited';
19
+ if (!Number.isFinite(n)) return String(n);
20
+ if (n >= 1024 ** 3) return `${(n / 1024 ** 3).toFixed(1)} GiB`;
21
+ if (n >= 1024 ** 2) return `${(n / 1024 ** 2).toFixed(1)} MiB`;
22
+ if (n >= 1024) return `${(n / 1024).toFixed(1)} KiB`;
23
+ return `${n} B`;
24
+ }
25
+
26
+ function slugify(s) {
27
+ return String(s || 'untitled')
28
+ .normalize('NFKD')
29
+ .replace(/[\u0300-\u036f]/g, '')
30
+ .replace(/[^a-zA-Z0-9._-]+/g, '-')
31
+ .replace(/^-+|-+$/g, '')
32
+ .slice(0, 90) || 'untitled';
33
+ }
34
+
35
+ function safeName(name) {
36
+ return String(name || 'attachment').replace(/[\\/\0]/g, '_').replace(/^\.+$/, '_');
37
+ }
38
+
39
+ function extractPageId(input) {
40
+ const s = String(input).trim();
41
+ if (/^\d+$/.test(s)) return s;
42
+ try {
43
+ const u = new URL(s);
44
+ const qp = u.searchParams.get('pageId') || u.searchParams.get('pageid') || u.searchParams.get('homepageId') || u.searchParams.get('homepageid');
45
+ if (qp && /^\d+$/.test(qp)) return qp;
46
+ const patterns = [
47
+ /\/pages\/(\d+)(?:\/|$)/,
48
+ /\/display\/[^/]+\/.*?[?&]pageId=(\d+)/,
49
+ /contentId=(\d+)/,
50
+ ];
51
+ for (const re of patterns) {
52
+ const m = u.href.match(re);
53
+ if (m) return m[1];
54
+ }
55
+ } catch {}
56
+ return null;
57
+ }
58
+
59
+ function sameVersion(existing, page) {
60
+ if (!existing || !page) return false;
61
+ const oldVersion = existing.version || {};
62
+ const newVersion = page.version || {};
63
+ return String(existing.id) === String(page.id)
64
+ && oldVersion.number === newVersion.number
65
+ && oldVersion.when === newVersion.when
66
+ && existing.status === page.status;
67
+ }
68
+
69
+ function shouldSkipAttachment(size, maxAttachmentBytes) {
70
+ const n = typeof size === 'number' ? size : Number(size);
71
+ return Number.isFinite(n) && n > maxAttachmentBytes;
72
+ }
73
+
74
+ module.exports = {
75
+ DEFAULT_MAX_ATTACHMENT_BYTES,
76
+ parseSize,
77
+ formatBytes,
78
+ slugify,
79
+ safeName,
80
+ extractPageId,
81
+ sameVersion,
82
+ shouldSkipAttachment,
83
+ };