@abraca/wiki 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,508 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * `abracadabra-wiki <title>` — fetch Wikipedia articles and seed them into the
4
+ * active Abracadabra space as a graph of docs.
5
+ *
6
+ * Streaming flow (no buffering): every newly-discovered title becomes a shell
7
+ * doc immediately (visible in the dashboard right away), then bodies are filled
8
+ * in one fetch at a time. The user sees the tree skeleton appear before the
9
+ * first body is written.
10
+ *
11
+ * Authenticates with its own Ed25519 key (`ABRA_KEY_FILE`, default
12
+ * `~/.abracadabra/cli.key`) via the modern `DocumentManager` API — the same
13
+ * identity the `abracadabra` CLI uses, so both tools share one account.
14
+ *
15
+ * Usage:
16
+ * abracadabra-wiki "<Article Title>" user-agent="you (you@example.com)" [options]
17
+ *
18
+ * Environment:
19
+ * ABRA_URL Server URL (required unless --dry-run)
20
+ * ABRA_KEY_FILE Ed25519 key file (~/.abracadabra/cli.key)
21
+ * ABRA_NAME / ABRA_COLOR Presence identity
22
+ * ABRA_INVITE_CODE Invite code for first-run registration
23
+ * ABRA_WIKI_USER_AGENT Default Api-User-Agent (or pass user-agent=...)
24
+ */
25
+ import type { DocumentManager } from '@abraca/dabra'
26
+ import { parseArgs, type ParsedArgs } from './parser.ts'
27
+ import { WikipediaClient } from './wikipedia.ts'
28
+ import { snapshotArticle, canonicalTitle, prettyCategoryLabel } from './snapshot.ts'
29
+ import {
30
+ ICONS,
31
+ pickSectionType,
32
+ renderArticleLead,
33
+ renderArticleSingleDoc,
34
+ renderInfoboxBody,
35
+ renderCategoryBody,
36
+ rewriteLinks,
37
+ } from './render.ts'
38
+ import { openSession } from './connect.ts'
39
+ import type { WikiOptions, ExtractMode, ExtractedArticle, ExtractedSection } from './types.ts'
40
+
41
+ const USAGE = [
42
+ 'abracadabra-wiki "<Article Title>" user-agent="<name (email)>" [options]',
43
+ '',
44
+ 'Options:',
45
+ ' mode=single|split single doc per article OR split into sections+infobox [split]',
46
+ ' depth=<n> follow internal links to depth N [1]',
47
+ ' category-depth=<n> recurse into sub-categories [1]',
48
+ ' lang=<code> wiki language [en]',
49
+ ' domain=<host> 3rd-party MediaWiki host (overrides lang)',
50
+ ' parent=<docId> parent doc for the new graph [active space root]',
51
+ ' user-agent=<str> Api-User-Agent header (REQUIRED by Wikimedia etiquette)',
52
+ ' rate=<rps> max wikipedia requests per second [3]',
53
+ ' --include-categories expand each article\'s categories into nested graphs',
54
+ ' --dry-run fetch only the entry article, print outline, no writes',
55
+ '',
56
+ 'Environment: ABRA_URL (required unless --dry-run), ABRA_KEY_FILE, ABRA_NAME,',
57
+ ' ABRA_COLOR, ABRA_INVITE_CODE, ABRA_WIKI_USER_AGENT.',
58
+ ].join('\n')
59
+
60
+ /**
61
+ * Run a Wikipedia import for already-parsed args. Returns a human-readable
62
+ * summary (or an error/usage string). Exported for programmatic use.
63
+ */
64
+ export async function runWiki(args: ParsedArgs): Promise<string> {
65
+ const opts = parseOptions(args)
66
+ if (typeof opts === 'string') return opts
67
+
68
+ const log = (msg: string) => {
69
+ if (!args.flags.has('quiet') && !args.flags.has('q')) {
70
+ console.error(`[wiki] ${msg}`)
71
+ }
72
+ }
73
+
74
+ const wp = new WikipediaClient({
75
+ lang: opts.lang,
76
+ domain: opts.domain,
77
+ userAgent: opts.userAgent,
78
+ rate: opts.rate,
79
+ })
80
+
81
+ if (opts.dryRun) {
82
+ // Dry-run: fetch only the entry, print its outline, no server.
83
+ log(`fetch ${opts.title}`)
84
+ const doc = await wp.fetchArticle(opts.title)
85
+ if (!doc) return `Article not found: "${opts.title}"`
86
+ const snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? opts.title))
87
+ return [
88
+ `Entry: ${snap.title}`,
89
+ `URL: ${snap.url ?? '(none)'}`,
90
+ `Internal links: ${snap.linkTitles.length}`,
91
+ `Categories: ${snap.categories.length}`,
92
+ `Sections: ${snap.sections.length}`,
93
+ `Has infobox: ${snap.infobox && snap.infobox.length > 0 ? 'yes' : 'no'}`,
94
+ '',
95
+ '── Sections ──',
96
+ printSections(snap.sections, ''),
97
+ ].join('\n')
98
+ }
99
+
100
+ // ── Connect ──────────────────────────────────────────────────────────
101
+ // process.env access uses bracket notation to satisfy noUncheckedIndexedAccess.
102
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
103
+ const env = (globalThis as any).process?.env ?? {}
104
+ const url = env['ABRA_URL']
105
+ if (!url) {
106
+ return 'ABRA_URL is required to write to the server. Set it or pass --dry-run.'
107
+ }
108
+ const { dm } = await openSession({
109
+ url,
110
+ name: env['ABRA_NAME'],
111
+ color: env['ABRA_COLOR'],
112
+ inviteCode: env['ABRA_INVITE_CODE'],
113
+ keyFile: env['ABRA_KEY_FILE'],
114
+ quiet: args.flags.has('quiet') || args.flags.has('q'),
115
+ })
116
+
117
+ try {
118
+ const result = await runStreaming(dm, wp, opts, log)
119
+ return [
120
+ `Done. Created ${result.articleCount} articles${
121
+ result.categoryCount > 0 ? ` + ${result.categoryCount} categories` : ''
122
+ }.`,
123
+ `Root: ${result.rootDocId}`,
124
+ ].join('\n')
125
+ } finally {
126
+ await dm.destroy().catch(() => {})
127
+ }
128
+ }
129
+
130
+ // ─────────────────────────────────────────────────────────────────────────
131
+ // Streaming orchestrator
132
+ // ─────────────────────────────────────────────────────────────────────────
133
+
134
+ interface StreamResult {
135
+ rootDocId: string
136
+ articleCount: number
137
+ categoryCount: number
138
+ }
139
+
140
+ async function runStreaming(
141
+ dm: DocumentManager,
142
+ wp: WikipediaClient,
143
+ opts: WikiOptions,
144
+ log: (msg: string) => void,
145
+ ): Promise<StreamResult> {
146
+ // Title → docId map. Drives [[Title]] → [[docId|label]] rewriting at write time.
147
+ const titleToDocId = new Map<string, string>()
148
+ // Snapshots of articles we've already fetched (avoid re-fetching).
149
+ const fetched = new Map<string, ExtractedArticle>()
150
+ // Articles whose section/infobox children have been created (split mode).
151
+ const childrenCreated = new Set<string>()
152
+ // Categories whose shells have been created.
153
+ const categoryToDocId = new Map<string, string>()
154
+ let categoriesContainerId: string | null = null
155
+
156
+ // ── Fetch entry first; we need its title to label the root graph ─────
157
+ log(`fetch ${opts.title}`)
158
+ const entryDoc = await wp.fetchArticle(opts.title)
159
+ if (!entryDoc) {
160
+ throw new Error(`Article not found: "${opts.title}"`)
161
+ }
162
+ const entryTitle = canonicalTitle(entryDoc.title?.() ?? opts.title)
163
+ const entrySnap = snapshotArticle(entryDoc, entryTitle)
164
+ fetched.set(entryTitle, entrySnap)
165
+
166
+ // ── Step 1: create root graph doc (visible immediately) ──────────────
167
+ const rootEntry = dm.tree.create({
168
+ parentId: opts.parentDocId ?? null,
169
+ label: entryTitle,
170
+ type: 'graph',
171
+ meta: { icon: ICONS.graph },
172
+ })
173
+ log(`+ ${rootEntry.id.slice(0, 8)}… ${entryTitle} (graph)`)
174
+
175
+ // ── Step 2: create the entry article shell ───────────────────────────
176
+ const entryArticleId = createArticleShell(dm, entrySnap, rootEntry.id, log)
177
+ titleToDocId.set(entryTitle, entryArticleId)
178
+
179
+ // Queue of (title, depth) to process. Each entry is guaranteed to have
180
+ // a shell doc already in titleToDocId.
181
+ const queue: Array<{ title: string; depth: number }> = [{ title: entryTitle, depth: 0 }]
182
+ let articleCount = 0
183
+
184
+ // ── Step 3: streaming process ───────────────────────────────────────
185
+ while (queue.length > 0) {
186
+ const { title, depth } = queue.shift()!
187
+ const articleDocId = titleToDocId.get(title)!
188
+
189
+ // Ensure we've fetched this article.
190
+ let snap = fetched.get(title)
191
+ if (!snap) {
192
+ log(`fetch [d${depth}] ${title}`)
193
+ try {
194
+ const doc = await wp.fetchArticle(title)
195
+ if (!doc) {
196
+ log(` not found — leaving stub`)
197
+ continue
198
+ }
199
+ snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? title))
200
+ fetched.set(title, snap)
201
+ } catch (err: any) {
202
+ log(`! fetch failed: ${err?.message ?? err}`)
203
+ continue
204
+ }
205
+ }
206
+
207
+ // Create this article's section/infobox children (split mode only,
208
+ // and only once per article).
209
+ if (opts.mode === 'split' && !childrenCreated.has(title)) {
210
+ createArticleChildren(dm, snap, articleDocId, log)
211
+ childrenCreated.add(title)
212
+ }
213
+
214
+ // Discover new linked titles and pre-allocate shells immediately.
215
+ if (depth < opts.depth) {
216
+ for (const linkTitle of snap.linkTitles) {
217
+ if (titleToDocId.has(linkTitle)) continue
218
+ const shell = dm.tree.create({
219
+ parentId: rootEntry.id,
220
+ label: linkTitle,
221
+ type: 'doc',
222
+ meta: { icon: ICONS.article },
223
+ })
224
+ titleToDocId.set(linkTitle, shell.id)
225
+ queue.push({ title: linkTitle, depth: depth + 1 })
226
+ log(`+ ${shell.id.slice(0, 8)}… ${linkTitle} (doc, shell)`)
227
+ }
228
+ }
229
+
230
+ // Pre-allocate category shells when first discovered.
231
+ if (opts.includeCategories && snap.categories.length > 0) {
232
+ if (!categoriesContainerId) {
233
+ const c = dm.tree.create({
234
+ parentId: rootEntry.id,
235
+ label: 'Categories',
236
+ type: 'graph',
237
+ meta: { icon: ICONS.categories },
238
+ })
239
+ categoriesContainerId = c.id
240
+ log(`+ ${c.id.slice(0, 8)}… Categories (graph)`)
241
+ }
242
+ for (const catTitle of snap.categories) {
243
+ if (categoryToDocId.has(catTitle)) continue
244
+ const cat = dm.tree.create({
245
+ parentId: categoriesContainerId,
246
+ label: prettyCategoryLabel(catTitle),
247
+ type: 'graph',
248
+ meta: { icon: ICONS.category },
249
+ })
250
+ categoryToDocId.set(catTitle, cat.id)
251
+ log(`+ ${cat.id.slice(0, 8)}… ${prettyCategoryLabel(catTitle)} (graph, cat)`)
252
+ }
253
+ }
254
+
255
+ // Write this article's body NOW (links resolve to whatever shells we
256
+ // have allocated so far — that's all of this article's links since we
257
+ // just allocated them above).
258
+ const body =
259
+ opts.mode === 'split' ? renderArticleLead(snap) : renderArticleSingleDoc(snap)
260
+ if (body.trim().length > 0) {
261
+ const rewritten = rewriteLinks(body, titleToDocId)
262
+ try {
263
+ await dm.content.write(articleDocId, rewritten)
264
+ log(`✓ body ${title}`)
265
+ } catch (err: any) {
266
+ log(`! body write failed for ${title}: ${err?.message ?? err}`)
267
+ }
268
+ }
269
+
270
+ // In split mode, also write each section/infobox doc body.
271
+ if (opts.mode === 'split') {
272
+ await writeChildrenBodies(dm, snap, articleDocId, titleToDocId, log)
273
+ }
274
+
275
+ articleCount++
276
+ }
277
+
278
+ // ── Step 4: fill in category bodies ─────────────────────────────────
279
+ let categoryCount = 0
280
+ if (opts.includeCategories && categoryToDocId.size > 0) {
281
+ for (const [catTitle, catDocId] of categoryToDocId) {
282
+ log(`category ${catTitle}`)
283
+ try {
284
+ const members = await wp.fetchCategoryPages(
285
+ catTitle,
286
+ opts.categoryDepth > 0,
287
+ Math.max(0, opts.categoryDepth),
288
+ )
289
+ const memberArticles: string[] = []
290
+ const subcats: string[] = []
291
+ for (const m of members) {
292
+ if (m.type === 'subcat') subcats.push(prettyCategoryLabel(m.title))
293
+ else memberArticles.push(m.title)
294
+ }
295
+ const body = renderCategoryBody(memberArticles, subcats)
296
+ const rewritten = rewriteLinks(body, titleToDocId)
297
+ if (rewritten.trim().length > 0) {
298
+ await dm.content.write(catDocId, rewritten)
299
+ log(`✓ body category ${catTitle}`)
300
+ }
301
+ categoryCount++
302
+ } catch (err: any) {
303
+ log(`! category ${catTitle}: ${err?.message ?? err}`)
304
+ }
305
+ }
306
+ }
307
+
308
+ return { rootDocId: rootEntry.id, articleCount, categoryCount }
309
+ }
310
+
311
+ // ─────────────────────────────────────────────────────────────────────────
312
+ // Shell + body helpers
313
+ // ─────────────────────────────────────────────────────────────────────────
314
+
315
+ function createArticleShell(
316
+ dm: DocumentManager,
317
+ article: ExtractedArticle,
318
+ parentId: string,
319
+ log: (msg: string) => void,
320
+ ): string {
321
+ const meta: Record<string, unknown> = { icon: ICONS.article }
322
+ if (article.url) meta.url = article.url
323
+ const entry = dm.tree.create({
324
+ parentId,
325
+ label: article.title,
326
+ type: 'doc',
327
+ meta,
328
+ })
329
+ log(`+ ${entry.id.slice(0, 8)}… ${article.title} (doc)`)
330
+ return entry.id
331
+ }
332
+
333
+ /**
334
+ * Create section + infobox child docs for a split-mode article. Returns nothing
335
+ * — children get bodies written later in writeChildrenBodies.
336
+ */
337
+ function createArticleChildren(
338
+ dm: DocumentManager,
339
+ article: ExtractedArticle,
340
+ articleDocId: string,
341
+ log: (msg: string) => void,
342
+ ): void {
343
+ if (article.infobox && article.infobox.length > 0) {
344
+ const ib = dm.tree.create({
345
+ parentId: articleDocId,
346
+ label: 'Infobox',
347
+ type: 'outline',
348
+ meta: { icon: ICONS.infobox },
349
+ })
350
+ log(` + ${ib.id.slice(0, 8)}… Infobox (outline)`)
351
+ // We attach the docId to the article object so writeChildrenBodies
352
+ // can find it without a second tree query.
353
+ ;(article as any)._infoboxDocId = ib.id
354
+ }
355
+ for (const section of article.sections) {
356
+ createSectionShell(dm, section, articleDocId, log)
357
+ }
358
+ }
359
+
360
+ function createSectionShell(
361
+ dm: DocumentManager,
362
+ section: ExtractedSection,
363
+ parentDocId: string,
364
+ log: (msg: string) => void,
365
+ ): void {
366
+ const hasChildren = section.children.length > 0
367
+ if (!section.body.trim() && !hasChildren) return
368
+ const { type, icon } = pickSectionType(section)
369
+ const entry = dm.tree.create({
370
+ parentId: parentDocId,
371
+ label: section.title || 'Untitled section',
372
+ type,
373
+ meta: { icon },
374
+ })
375
+ log(` + ${entry.id.slice(0, 8)}… ${entry.label} (${type})`)
376
+ ;(section as any)._docId = entry.id
377
+ for (const child of section.children) {
378
+ createSectionShell(dm, child, entry.id, log)
379
+ }
380
+ }
381
+
382
+ async function writeChildrenBodies(
383
+ dm: DocumentManager,
384
+ article: ExtractedArticle,
385
+ _articleDocId: string,
386
+ titleToDocId: Map<string, string>,
387
+ log: (msg: string) => void,
388
+ ): Promise<void> {
389
+ const infoboxDocId = (article as any)._infoboxDocId as string | undefined
390
+ if (infoboxDocId && article.infobox && article.infobox.length > 0) {
391
+ try {
392
+ await dm.content.write(infoboxDocId, renderInfoboxBody(article.infobox))
393
+ } catch (err: any) {
394
+ log(`! infobox body write failed: ${err?.message ?? err}`)
395
+ }
396
+ }
397
+ for (const section of article.sections) {
398
+ await writeSectionBody(dm, section, titleToDocId, log)
399
+ }
400
+ }
401
+
402
+ async function writeSectionBody(
403
+ dm: DocumentManager,
404
+ section: ExtractedSection,
405
+ titleToDocId: Map<string, string>,
406
+ log: (msg: string) => void,
407
+ ): Promise<void> {
408
+ const docId = (section as any)._docId as string | undefined
409
+ if (docId && section.body.trim().length > 0) {
410
+ try {
411
+ await dm.content.write(docId, rewriteLinks(section.body, titleToDocId))
412
+ } catch (err: any) {
413
+ log(`! section body write failed for ${section.title}: ${err?.message ?? err}`)
414
+ }
415
+ }
416
+ for (const child of section.children) {
417
+ await writeSectionBody(dm, child, titleToDocId, log)
418
+ }
419
+ }
420
+
421
+ // ─────────────────────────────────────────────────────────────────────────
422
+ // Argument parsing + dry-run printing
423
+ // ─────────────────────────────────────────────────────────────────────────
424
+
425
+ function parseOptions(args: ParsedArgs): WikiOptions | string {
426
+ const title = args.positional[0]?.trim() || args.params['title']
427
+ if (!title) return 'Missing required positional argument: <title>. Example: abracadabra-wiki "Toronto Raptors"'
428
+
429
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
430
+ const env = (globalThis as any).process?.env ?? {}
431
+ const userAgent = args.params['user-agent'] || args.params['userAgent'] || env['ABRA_WIKI_USER_AGENT']
432
+ if (!userAgent) {
433
+ return [
434
+ 'Missing required parameter: user-agent="your-name (you@example.com)"',
435
+ '(Wikimedia etiquette requires an Api-User-Agent header. Pass user-agent=... or set ABRA_WIKI_USER_AGENT.)',
436
+ ].join('\n')
437
+ }
438
+
439
+ const mode = (args.params['mode'] ?? 'split') as ExtractMode
440
+ if (mode !== 'single' && mode !== 'split') {
441
+ return `Invalid mode "${mode}". Use mode=single or mode=split.`
442
+ }
443
+
444
+ const depth = parseIntOr(args.params['depth'], 1)
445
+ const categoryDepth = parseIntOr(args.params['category-depth'] ?? args.params['categoryDepth'], 1)
446
+ const rate = parseFloatOr(args.params['rate'], 3)
447
+
448
+ return {
449
+ title,
450
+ mode,
451
+ depth,
452
+ categoryDepth,
453
+ includeCategories: args.flags.has('include-categories') || args.flags.has('includeCategories'),
454
+ lang: args.params['lang'] ?? 'en',
455
+ domain: args.params['domain'],
456
+ parentDocId: args.params['parent'],
457
+ userAgent,
458
+ rate,
459
+ dryRun: args.flags.has('dry-run') || args.flags.has('dryRun'),
460
+ }
461
+ }
462
+
463
+ function parseIntOr(s: string | undefined, fallback: number): number {
464
+ if (!s) return fallback
465
+ const n = Number.parseInt(s, 10)
466
+ return Number.isFinite(n) && n >= 0 ? n : fallback
467
+ }
468
+
469
+ function parseFloatOr(s: string | undefined, fallback: number): number {
470
+ if (!s) return fallback
471
+ const n = Number.parseFloat(s)
472
+ return Number.isFinite(n) && n > 0 ? n : fallback
473
+ }
474
+
475
+ function printSections(sections: ExtractedSection[], indent: string): string {
476
+ const lines: string[] = []
477
+ for (const s of sections) {
478
+ const hint = s.body ? ` (${s.body.length}b)` : ''
479
+ lines.push(`${indent}- ${s.title}${hint}${s.children.length > 0 ? ` [${s.children.length} sub]` : ''}`)
480
+ if (s.children.length > 0) {
481
+ lines.push(printSections(s.children, indent + ' '))
482
+ }
483
+ }
484
+ return lines.join('\n')
485
+ }
486
+
487
+ // ─────────────────────────────────────────────────────────────────────────
488
+ // Bin entry
489
+ // ─────────────────────────────────────────────────────────────────────────
490
+
491
+ async function main(): Promise<void> {
492
+ const args = parseArgs(process.argv)
493
+ if (
494
+ args.flags.has('help') ||
495
+ args.flags.has('h') ||
496
+ (!args.positional[0]?.trim() && !args.params['title'])
497
+ ) {
498
+ console.log(USAGE)
499
+ return
500
+ }
501
+ const output = await runWiki(args)
502
+ if (output) console.log(output)
503
+ }
504
+
505
+ main().catch((err) => {
506
+ console.error(`Fatal: ${err?.message ?? err}`)
507
+ process.exit(1)
508
+ })
package/src/parser.ts ADDED
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Minimal argument parser for the `abracadabra-wiki` bin.
3
+ *
4
+ * Single-purpose: there is no subcommand — the first bare word is the article
5
+ * title (`positional[0]`). Supports `key=value`, `key="value with spaces"`,
6
+ * and `--flag` / `--key=value`.
7
+ *
8
+ * abracadabra-wiki "<Article Title>" [key=value ...] [--flags]
9
+ */
10
+
11
+ export interface ParsedArgs {
12
+ /** Positional arguments — `positional[0]` is the article title. */
13
+ positional: string[]
14
+ /** Key-value parameters, e.g. `{ lang: "en", "user-agent": "..." }`. */
15
+ params: Record<string, string>
16
+ /** Boolean flags, e.g. `dry-run`, `include-categories`, `quiet`. */
17
+ flags: Set<string>
18
+ }
19
+
20
+ /**
21
+ * Parse CLI arguments into a structured object.
22
+ * @param argv Raw `process.argv` (includes node path and script path).
23
+ */
24
+ export function parseArgs(argv: string[]): ParsedArgs {
25
+ const args = argv.slice(2) // skip node + script
26
+ const result: ParsedArgs = { positional: [], params: {}, flags: new Set() }
27
+
28
+ for (let i = 0; i < args.length; i++) {
29
+ const arg = args[i]
30
+
31
+ // --flag or --key=value
32
+ if (arg.startsWith('--')) {
33
+ const stripped = arg.slice(2)
34
+ const eqIdx = stripped.indexOf('=')
35
+ if (eqIdx !== -1) {
36
+ result.params[stripped.slice(0, eqIdx)] = stripped.slice(eqIdx + 1)
37
+ } else {
38
+ result.flags.add(stripped)
39
+ }
40
+ continue
41
+ }
42
+
43
+ // key=value pair
44
+ const eqIdx = arg.indexOf('=')
45
+ if (eqIdx > 0) {
46
+ const key = arg.slice(0, eqIdx)
47
+ let value = arg.slice(eqIdx + 1)
48
+ // Strip surrounding quotes if present
49
+ if ((value.startsWith('"') && value.endsWith('"')) ||
50
+ (value.startsWith("'") && value.endsWith("'"))) {
51
+ value = value.slice(1, -1)
52
+ }
53
+ result.params[key] = value
54
+ continue
55
+ }
56
+
57
+ // Every bare word is positional (no subcommand for this single-purpose bin).
58
+ result.positional.push(arg)
59
+ }
60
+
61
+ return result
62
+ }
package/src/render.ts ADDED
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Body rendering + page-type decisions for the streaming orchestrator.
3
+ *
4
+ * All rendering is title-driven: bodies are rendered with `[[Title]]`
5
+ * placeholders, and `rewriteLinks` rewrites them to `[[docId|label]]`
6
+ * using the live title→docId map at write time.
7
+ */
8
+ import type { ExtractedArticle, ExtractedSection } from './types.ts'
9
+
10
+ export const ICONS = {
11
+ graph: 'git-fork',
12
+ article: 'book-open',
13
+ category: 'tag',
14
+ infobox: 'info',
15
+ outline: 'list',
16
+ gallery: 'images',
17
+ section: 'pilcrow',
18
+ categories: 'tags',
19
+ } as const
20
+
21
+ /** Decide a page type for a section based on its shape. */
22
+ export function pickSectionType(section: ExtractedSection): { type: string; icon: string } {
23
+ if (section.children.length > 0) return { type: 'outline', icon: ICONS.outline }
24
+ if (section.isList && section.listLength >= 5) return { type: 'outline', icon: ICONS.outline }
25
+ return { type: 'doc', icon: ICONS.section }
26
+ }
27
+
28
+ /** Render the lead paragraph as the article-doc body. */
29
+ export function renderArticleLead(article: ExtractedArticle): string {
30
+ return article.lead ?? ''
31
+ }
32
+
33
+ /** Render the article as a single doc, sections + infobox inlined. */
34
+ export function renderArticleSingleDoc(article: ExtractedArticle): string {
35
+ const parts: string[] = []
36
+ if (article.lead) parts.push(article.lead)
37
+ if (article.infobox && article.infobox.length > 0) {
38
+ parts.push('## Infobox', renderInfoboxBody(article.infobox))
39
+ }
40
+ for (const section of article.sections) {
41
+ parts.push(...renderSectionInline(section, 2))
42
+ }
43
+ return parts.join('\n\n')
44
+ }
45
+
46
+ function renderSectionInline(section: ExtractedSection, level: number): string[] {
47
+ const out: string[] = []
48
+ const prefix = '#'.repeat(Math.min(6, level))
49
+ if (section.title) out.push(`${prefix} ${section.title}`)
50
+ if (section.body.trim()) out.push(section.body)
51
+ for (const child of section.children) {
52
+ out.push(...renderSectionInline(child, level + 1))
53
+ }
54
+ return out
55
+ }
56
+
57
+ export function renderInfoboxBody(rows: Array<{ key: string; value: string }>): string {
58
+ return rows.map((r) => `- **${r.key}:** ${r.value}`).join('\n')
59
+ }
60
+
61
+ export function renderCategoryBody(members: string[], subcategories: string[]): string {
62
+ const parts: string[] = []
63
+ if (members.length > 0) {
64
+ parts.push('## Pages')
65
+ parts.push(members.map((m) => `- [[${m}]]`).join('\n'))
66
+ }
67
+ if (subcategories.length > 0) {
68
+ parts.push('## Sub-categories')
69
+ parts.push(subcategories.map((s) => `- ${s}`).join('\n'))
70
+ }
71
+ return parts.join('\n\n')
72
+ }
73
+
74
+ /**
75
+ * Replace `[[Title]]` / `[[Title|Alias]]` in markdown with
76
+ * `[[docId|label]]` using the title→docId map. Unresolved titles fall
77
+ * back to plain text (their alias or original title).
78
+ */
79
+ export function rewriteLinks(
80
+ markdown: string,
81
+ titleToDocId: Map<string, string>,
82
+ ): string {
83
+ const re = /\[\[([^\]|]+?)(?:\|([^\]]+?))?\]\]/g
84
+ return markdown.replace(re, (_match, target: string, alias?: string) => {
85
+ const title = target.trim()
86
+ const docId = titleToDocId.get(title)
87
+ const display = (alias && alias.trim().length > 0 ? alias : title).trim()
88
+ if (!docId) return display
89
+ return `[[${docId}|${display}]]`
90
+ })
91
+ }