@abraca/cli 2.26.0 → 2.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/abracadabra-cli.cjs +1 -696
- package/dist/abracadabra-cli.cjs.map +1 -1
- package/dist/abracadabra-cli.esm.js +2 -695
- package/dist/abracadabra-cli.esm.js.map +1 -1
- package/package.json +3 -5
- package/src/index.ts +0 -5
- package/src/commands/wiki/connect.ts +0 -69
- package/src/commands/wiki/index.ts +0 -471
- package/src/commands/wiki/render.ts +0 -91
- package/src/commands/wiki/snapshot.ts +0 -210
- package/src/commands/wiki/types.ts +0 -45
- package/src/commands/wiki/wikipedia.ts +0 -154
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@abraca/cli",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.27.0",
|
|
4
4
|
"description": "CLI for Abracadabra — interact with CRDT document workspaces from the terminal",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -29,9 +29,7 @@
|
|
|
29
29
|
],
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@noble/ed25519": "^3.1.0",
|
|
32
|
-
"@noble/hashes": "^2.2.0"
|
|
33
|
-
"wtf-plugin-api": "^2.0.1",
|
|
34
|
-
"wtf_wikipedia": "^10.4.1"
|
|
32
|
+
"@noble/hashes": "^2.2.0"
|
|
35
33
|
},
|
|
36
34
|
"peerDependencies": {
|
|
37
35
|
"@abraca/dabra": ">=2.0.0",
|
|
@@ -39,6 +37,6 @@
|
|
|
39
37
|
"yjs": "^13.6.8"
|
|
40
38
|
},
|
|
41
39
|
"devDependencies": {
|
|
42
|
-
"@abraca/dabra": "2.
|
|
40
|
+
"@abraca/dabra": "2.27.0"
|
|
43
41
|
}
|
|
44
42
|
}
|
package/src/index.ts
CHANGED
|
@@ -28,17 +28,12 @@ import './commands/awareness.ts'
|
|
|
28
28
|
import './commands/files.ts'
|
|
29
29
|
import './commands/permissions.ts'
|
|
30
30
|
import './commands/page-types.ts'
|
|
31
|
-
import './commands/wiki/index.ts'
|
|
32
31
|
|
|
33
32
|
// ── Commands that don't require a connection ─────────────────────────────────
|
|
34
|
-
// "wiki" opens its own DocumentManager session via wiki/connect.ts, so the
|
|
35
|
-
// parent harness should NOT pre-open a CLIConnection (which would authenticate
|
|
36
|
-
// twice and hold an unused root provider).
|
|
37
33
|
const NO_CONNECT_COMMANDS = new Set([
|
|
38
34
|
'help', 'h', '?',
|
|
39
35
|
'version', 'v',
|
|
40
36
|
'page-types', 'types', 'doctypes',
|
|
41
|
-
'wiki', 'wikipedia',
|
|
42
37
|
])
|
|
43
38
|
|
|
44
39
|
async function main() {
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Open a DocumentManager session for the wiki command, mirroring the
|
|
3
|
-
* auth/register flow that CLIConnection uses but using the modern public API.
|
|
4
|
-
*
|
|
5
|
-
* Reuses the CLI's Ed25519 keypair handling (loadOrCreateKeypair, signChallenge)
|
|
6
|
-
* so the wiki command authenticates with the same identity as every other
|
|
7
|
-
* subcommand.
|
|
8
|
-
*/
|
|
9
|
-
import { DocumentManager } from '@abraca/dabra'
|
|
10
|
-
import { loadOrCreateKeypair, signChallenge } from '../../crypto.ts'
|
|
11
|
-
|
|
12
|
-
export interface OpenSessionConfig {
|
|
13
|
-
url: string
|
|
14
|
-
name?: string
|
|
15
|
-
color?: string
|
|
16
|
-
inviteCode?: string
|
|
17
|
-
keyFile?: string
|
|
18
|
-
/** Suppress informational stderr logging. */
|
|
19
|
-
quiet?: boolean
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface OpenSessionResult {
|
|
23
|
-
dm: DocumentManager
|
|
24
|
-
/** Active root doc id (the entry-point space). */
|
|
25
|
-
rootDocId: string
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export async function openSession(config: OpenSessionConfig): Promise<OpenSessionResult> {
|
|
29
|
-
const keypair = await loadOrCreateKeypair(config.keyFile)
|
|
30
|
-
const sign = (challenge: string) => Promise.resolve(signChallenge(challenge, keypair.privateKey))
|
|
31
|
-
|
|
32
|
-
const dm = new DocumentManager({
|
|
33
|
-
url: config.url,
|
|
34
|
-
name: config.name ?? 'Wiki Extractor',
|
|
35
|
-
color: config.color,
|
|
36
|
-
quiet: config.quiet,
|
|
37
|
-
})
|
|
38
|
-
|
|
39
|
-
// Authenticate first; register on first run.
|
|
40
|
-
try {
|
|
41
|
-
await dm.client.loginWithKey(keypair.publicKeyB64, sign)
|
|
42
|
-
} catch (err: any) {
|
|
43
|
-
const status = err?.status ?? err?.response?.status
|
|
44
|
-
if (status === 404 || status === 422) {
|
|
45
|
-
if (!config.quiet) {
|
|
46
|
-
console.error('[abracadabra] Key not registered, creating new account...')
|
|
47
|
-
}
|
|
48
|
-
await dm.client.registerWithKey({
|
|
49
|
-
publicKey: keypair.publicKeyB64,
|
|
50
|
-
username: (config.name ?? 'wiki-extractor').replace(/\s+/g, '-').toLowerCase(),
|
|
51
|
-
displayName: config.name ?? 'Wiki Extractor',
|
|
52
|
-
deviceName: 'CLI Wiki',
|
|
53
|
-
inviteCode: config.inviteCode,
|
|
54
|
-
})
|
|
55
|
-
await dm.client.loginWithKey(keypair.publicKeyB64, sign)
|
|
56
|
-
} else {
|
|
57
|
-
throw err
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
await dm.connect()
|
|
62
|
-
|
|
63
|
-
const rootDocId = dm.rootDocId
|
|
64
|
-
if (!rootDocId) {
|
|
65
|
-
throw new Error('Connected but no rootDocId — server has no spaces.')
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
return { dm, rootDocId }
|
|
69
|
-
}
|
|
@@ -1,471 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* `abracadabra wiki <title>` — fetch Wikipedia articles and seed them into
|
|
3
|
-
* the active space as a graph of docs.
|
|
4
|
-
*
|
|
5
|
-
* Streaming flow (no buffering): every newly-discovered title becomes a
|
|
6
|
-
* shell doc immediately (visible in the dashboard right away), then bodies
|
|
7
|
-
* are filled in one fetch at a time. The user sees the tree skeleton appear
|
|
8
|
-
* before the first body is written.
|
|
9
|
-
*
|
|
10
|
-
* The command authenticates separately from the parent CLI (it's listed in
|
|
11
|
-
* NO_CONNECT_COMMANDS in src/index.ts) so the parent harness doesn't open a
|
|
12
|
-
* second connection. We use the modern DocumentManager API from @abraca/dabra.
|
|
13
|
-
*/
|
|
14
|
-
import type { DocumentManager } from '@abraca/dabra'
|
|
15
|
-
import { registerCommand } from '../../command.ts'
|
|
16
|
-
import type { CLIConnection } from '../../connection.ts'
|
|
17
|
-
import type { ParsedArgs } from '../../parser.ts'
|
|
18
|
-
import { WikipediaClient } from './wikipedia.ts'
|
|
19
|
-
import { snapshotArticle, canonicalTitle, prettyCategoryLabel } from './snapshot.ts'
|
|
20
|
-
import {
|
|
21
|
-
ICONS,
|
|
22
|
-
pickSectionType,
|
|
23
|
-
renderArticleLead,
|
|
24
|
-
renderArticleSingleDoc,
|
|
25
|
-
renderInfoboxBody,
|
|
26
|
-
renderCategoryBody,
|
|
27
|
-
rewriteLinks,
|
|
28
|
-
} from './render.ts'
|
|
29
|
-
import { openSession } from './connect.ts'
|
|
30
|
-
import type { WikiOptions, ExtractMode, ExtractedArticle, ExtractedSection } from './types.ts'
|
|
31
|
-
|
|
32
|
-
registerCommand({
|
|
33
|
-
name: 'wiki',
|
|
34
|
-
aliases: ['wikipedia'],
|
|
35
|
-
description: 'Fetch Wikipedia articles into a graph of docs (streaming).',
|
|
36
|
-
usage: [
|
|
37
|
-
'wiki "<Article Title>"',
|
|
38
|
-
' mode=single|split single doc per article OR split into sections+infobox [split]',
|
|
39
|
-
' depth=<n> follow internal links to depth N [1]',
|
|
40
|
-
' category-depth=<n> recurse into sub-categories [1]',
|
|
41
|
-
' lang=<code> wiki language [en]',
|
|
42
|
-
' domain=<host> 3rd-party MediaWiki host (overrides lang)',
|
|
43
|
-
' parent=<docId> parent doc for the new graph [active space root]',
|
|
44
|
-
' user-agent=<str> Api-User-Agent header (REQUIRED by Wikimedia etiquette)',
|
|
45
|
-
' rate=<rps> max wikipedia requests per second [3]',
|
|
46
|
-
' --include-categories expand each article\'s categories into nested graphs',
|
|
47
|
-
' --dry-run fetch only the entry article, print outline, no writes',
|
|
48
|
-
].join('\n'),
|
|
49
|
-
async run(_conn: CLIConnection | null, args: ParsedArgs): Promise<string> {
|
|
50
|
-
const opts = parseOptions(args)
|
|
51
|
-
if (typeof opts === 'string') return opts
|
|
52
|
-
|
|
53
|
-
const log = (msg: string) => {
|
|
54
|
-
if (!args.flags.has('quiet') && !args.flags.has('q')) {
|
|
55
|
-
console.error(`[wiki] ${msg}`)
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
const wp = new WikipediaClient({
|
|
60
|
-
lang: opts.lang,
|
|
61
|
-
domain: opts.domain,
|
|
62
|
-
userAgent: opts.userAgent,
|
|
63
|
-
rate: opts.rate,
|
|
64
|
-
})
|
|
65
|
-
|
|
66
|
-
if (opts.dryRun) {
|
|
67
|
-
// Dry-run: fetch only the entry, print its outline, no server.
|
|
68
|
-
log(`fetch ${opts.title}`)
|
|
69
|
-
const doc = await wp.fetchArticle(opts.title)
|
|
70
|
-
if (!doc) return `Article not found: "${opts.title}"`
|
|
71
|
-
const snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? opts.title))
|
|
72
|
-
return [
|
|
73
|
-
`Entry: ${snap.title}`,
|
|
74
|
-
`URL: ${snap.url ?? '(none)'}`,
|
|
75
|
-
`Internal links: ${snap.linkTitles.length}`,
|
|
76
|
-
`Categories: ${snap.categories.length}`,
|
|
77
|
-
`Sections: ${snap.sections.length}`,
|
|
78
|
-
`Has infobox: ${snap.infobox && snap.infobox.length > 0 ? 'yes' : 'no'}`,
|
|
79
|
-
'',
|
|
80
|
-
'── Sections ──',
|
|
81
|
-
printSections(snap.sections, ''),
|
|
82
|
-
].join('\n')
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// ── Connect ──────────────────────────────────────────────────────────
|
|
86
|
-
// process.env access uses bracket notation to satisfy noUncheckedIndexedAccess.
|
|
87
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
88
|
-
const env = (globalThis as any).process?.env ?? {}
|
|
89
|
-
const url = env['ABRA_URL']
|
|
90
|
-
if (!url) {
|
|
91
|
-
return 'ABRA_URL is required to write to the server. Set it or pass --dry-run.'
|
|
92
|
-
}
|
|
93
|
-
const { dm } = await openSession({
|
|
94
|
-
url,
|
|
95
|
-
name: env['ABRA_NAME'],
|
|
96
|
-
color: env['ABRA_COLOR'],
|
|
97
|
-
inviteCode: env['ABRA_INVITE_CODE'],
|
|
98
|
-
keyFile: env['ABRA_KEY_FILE'],
|
|
99
|
-
quiet: args.flags.has('quiet') || args.flags.has('q'),
|
|
100
|
-
})
|
|
101
|
-
|
|
102
|
-
try {
|
|
103
|
-
const result = await runStreaming(dm, wp, opts, log)
|
|
104
|
-
return [
|
|
105
|
-
`Done. Created ${result.articleCount} articles${
|
|
106
|
-
result.categoryCount > 0 ? ` + ${result.categoryCount} categories` : ''
|
|
107
|
-
}.`,
|
|
108
|
-
`Root: ${result.rootDocId}`,
|
|
109
|
-
].join('\n')
|
|
110
|
-
} finally {
|
|
111
|
-
await dm.destroy().catch(() => {})
|
|
112
|
-
}
|
|
113
|
-
},
|
|
114
|
-
})
|
|
115
|
-
|
|
116
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
117
|
-
// Streaming orchestrator
|
|
118
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
119
|
-
|
|
120
|
-
interface StreamResult {
|
|
121
|
-
rootDocId: string
|
|
122
|
-
articleCount: number
|
|
123
|
-
categoryCount: number
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
async function runStreaming(
|
|
127
|
-
dm: DocumentManager,
|
|
128
|
-
wp: WikipediaClient,
|
|
129
|
-
opts: WikiOptions,
|
|
130
|
-
log: (msg: string) => void,
|
|
131
|
-
): Promise<StreamResult> {
|
|
132
|
-
// Title → docId map. Drives [[Title]] → [[docId|label]] rewriting at write time.
|
|
133
|
-
const titleToDocId = new Map<string, string>()
|
|
134
|
-
// Snapshots of articles we've already fetched (avoid re-fetching).
|
|
135
|
-
const fetched = new Map<string, ExtractedArticle>()
|
|
136
|
-
// Articles whose section/infobox children have been created (split mode).
|
|
137
|
-
const childrenCreated = new Set<string>()
|
|
138
|
-
// Categories whose shells have been created.
|
|
139
|
-
const categoryToDocId = new Map<string, string>()
|
|
140
|
-
let categoriesContainerId: string | null = null
|
|
141
|
-
|
|
142
|
-
// ── Fetch entry first; we need its title to label the root graph ─────
|
|
143
|
-
log(`fetch ${opts.title}`)
|
|
144
|
-
const entryDoc = await wp.fetchArticle(opts.title)
|
|
145
|
-
if (!entryDoc) {
|
|
146
|
-
throw new Error(`Article not found: "${opts.title}"`)
|
|
147
|
-
}
|
|
148
|
-
const entryTitle = canonicalTitle(entryDoc.title?.() ?? opts.title)
|
|
149
|
-
const entrySnap = snapshotArticle(entryDoc, entryTitle)
|
|
150
|
-
fetched.set(entryTitle, entrySnap)
|
|
151
|
-
|
|
152
|
-
// ── Step 1: create root graph doc (visible immediately) ──────────────
|
|
153
|
-
const rootEntry = dm.tree.create({
|
|
154
|
-
parentId: opts.parentDocId ?? null,
|
|
155
|
-
label: entryTitle,
|
|
156
|
-
type: 'graph',
|
|
157
|
-
meta: { icon: ICONS.graph },
|
|
158
|
-
})
|
|
159
|
-
log(`+ ${rootEntry.id.slice(0, 8)}… ${entryTitle} (graph)`)
|
|
160
|
-
|
|
161
|
-
// ── Step 2: create the entry article shell ───────────────────────────
|
|
162
|
-
const entryArticleId = createArticleShell(dm, entrySnap, rootEntry.id, log)
|
|
163
|
-
titleToDocId.set(entryTitle, entryArticleId)
|
|
164
|
-
|
|
165
|
-
// Queue of (title, depth) to process. Each entry is guaranteed to have
|
|
166
|
-
// a shell doc already in titleToDocId.
|
|
167
|
-
const queue: Array<{ title: string; depth: number }> = [{ title: entryTitle, depth: 0 }]
|
|
168
|
-
let articleCount = 0
|
|
169
|
-
|
|
170
|
-
// ── Step 3: streaming process ───────────────────────────────────────
|
|
171
|
-
while (queue.length > 0) {
|
|
172
|
-
const { title, depth } = queue.shift()!
|
|
173
|
-
const articleDocId = titleToDocId.get(title)!
|
|
174
|
-
|
|
175
|
-
// Ensure we've fetched this article.
|
|
176
|
-
let snap = fetched.get(title)
|
|
177
|
-
if (!snap) {
|
|
178
|
-
log(`fetch [d${depth}] ${title}`)
|
|
179
|
-
try {
|
|
180
|
-
const doc = await wp.fetchArticle(title)
|
|
181
|
-
if (!doc) {
|
|
182
|
-
log(` not found — leaving stub`)
|
|
183
|
-
continue
|
|
184
|
-
}
|
|
185
|
-
snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? title))
|
|
186
|
-
fetched.set(title, snap)
|
|
187
|
-
} catch (err: any) {
|
|
188
|
-
log(`! fetch failed: ${err?.message ?? err}`)
|
|
189
|
-
continue
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// Create this article's section/infobox children (split mode only,
|
|
194
|
-
// and only once per article).
|
|
195
|
-
if (opts.mode === 'split' && !childrenCreated.has(title)) {
|
|
196
|
-
createArticleChildren(dm, snap, articleDocId, log)
|
|
197
|
-
childrenCreated.add(title)
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
// Discover new linked titles and pre-allocate shells immediately.
|
|
201
|
-
if (depth < opts.depth) {
|
|
202
|
-
for (const linkTitle of snap.linkTitles) {
|
|
203
|
-
if (titleToDocId.has(linkTitle)) continue
|
|
204
|
-
const shell = dm.tree.create({
|
|
205
|
-
parentId: rootEntry.id,
|
|
206
|
-
label: linkTitle,
|
|
207
|
-
type: 'doc',
|
|
208
|
-
meta: { icon: ICONS.article },
|
|
209
|
-
})
|
|
210
|
-
titleToDocId.set(linkTitle, shell.id)
|
|
211
|
-
queue.push({ title: linkTitle, depth: depth + 1 })
|
|
212
|
-
log(`+ ${shell.id.slice(0, 8)}… ${linkTitle} (doc, shell)`)
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
// Pre-allocate category shells when first discovered.
|
|
217
|
-
if (opts.includeCategories && snap.categories.length > 0) {
|
|
218
|
-
if (!categoriesContainerId) {
|
|
219
|
-
const c = dm.tree.create({
|
|
220
|
-
parentId: rootEntry.id,
|
|
221
|
-
label: 'Categories',
|
|
222
|
-
type: 'graph',
|
|
223
|
-
meta: { icon: ICONS.categories },
|
|
224
|
-
})
|
|
225
|
-
categoriesContainerId = c.id
|
|
226
|
-
log(`+ ${c.id.slice(0, 8)}… Categories (graph)`)
|
|
227
|
-
}
|
|
228
|
-
for (const catTitle of snap.categories) {
|
|
229
|
-
if (categoryToDocId.has(catTitle)) continue
|
|
230
|
-
const cat = dm.tree.create({
|
|
231
|
-
parentId: categoriesContainerId,
|
|
232
|
-
label: prettyCategoryLabel(catTitle),
|
|
233
|
-
type: 'graph',
|
|
234
|
-
meta: { icon: ICONS.category },
|
|
235
|
-
})
|
|
236
|
-
categoryToDocId.set(catTitle, cat.id)
|
|
237
|
-
log(`+ ${cat.id.slice(0, 8)}… ${prettyCategoryLabel(catTitle)} (graph, cat)`)
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// Write this article's body NOW (links resolve to whatever shells we
|
|
242
|
-
// have allocated so far — that's all of this article's links since we
|
|
243
|
-
// just allocated them above).
|
|
244
|
-
const body =
|
|
245
|
-
opts.mode === 'split' ? renderArticleLead(snap) : renderArticleSingleDoc(snap)
|
|
246
|
-
if (body.trim().length > 0) {
|
|
247
|
-
const rewritten = rewriteLinks(body, titleToDocId)
|
|
248
|
-
try {
|
|
249
|
-
await dm.content.write(articleDocId, rewritten)
|
|
250
|
-
log(`✓ body ${title}`)
|
|
251
|
-
} catch (err: any) {
|
|
252
|
-
log(`! body write failed for ${title}: ${err?.message ?? err}`)
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
// In split mode, also write each section/infobox doc body.
|
|
257
|
-
if (opts.mode === 'split') {
|
|
258
|
-
await writeChildrenBodies(dm, snap, articleDocId, titleToDocId, log)
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
articleCount++
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// ── Step 4: fill in category bodies ─────────────────────────────────
|
|
265
|
-
let categoryCount = 0
|
|
266
|
-
if (opts.includeCategories && categoryToDocId.size > 0) {
|
|
267
|
-
for (const [catTitle, catDocId] of categoryToDocId) {
|
|
268
|
-
log(`category ${catTitle}`)
|
|
269
|
-
try {
|
|
270
|
-
const members = await wp.fetchCategoryPages(
|
|
271
|
-
catTitle,
|
|
272
|
-
opts.categoryDepth > 0,
|
|
273
|
-
Math.max(0, opts.categoryDepth),
|
|
274
|
-
)
|
|
275
|
-
const memberArticles: string[] = []
|
|
276
|
-
const subcats: string[] = []
|
|
277
|
-
for (const m of members) {
|
|
278
|
-
if (m.type === 'subcat') subcats.push(prettyCategoryLabel(m.title))
|
|
279
|
-
else memberArticles.push(m.title)
|
|
280
|
-
}
|
|
281
|
-
const body = renderCategoryBody(memberArticles, subcats)
|
|
282
|
-
const rewritten = rewriteLinks(body, titleToDocId)
|
|
283
|
-
if (rewritten.trim().length > 0) {
|
|
284
|
-
await dm.content.write(catDocId, rewritten)
|
|
285
|
-
log(`✓ body category ${catTitle}`)
|
|
286
|
-
}
|
|
287
|
-
categoryCount++
|
|
288
|
-
} catch (err: any) {
|
|
289
|
-
log(`! category ${catTitle}: ${err?.message ?? err}`)
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
return { rootDocId: rootEntry.id, articleCount, categoryCount }
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
298
|
-
// Shell + body helpers
|
|
299
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
300
|
-
|
|
301
|
-
function createArticleShell(
|
|
302
|
-
dm: DocumentManager,
|
|
303
|
-
article: ExtractedArticle,
|
|
304
|
-
parentId: string,
|
|
305
|
-
log: (msg: string) => void,
|
|
306
|
-
): string {
|
|
307
|
-
const meta: Record<string, unknown> = { icon: ICONS.article }
|
|
308
|
-
if (article.url) meta.url = article.url
|
|
309
|
-
const entry = dm.tree.create({
|
|
310
|
-
parentId,
|
|
311
|
-
label: article.title,
|
|
312
|
-
type: 'doc',
|
|
313
|
-
meta,
|
|
314
|
-
})
|
|
315
|
-
log(`+ ${entry.id.slice(0, 8)}… ${article.title} (doc)`)
|
|
316
|
-
return entry.id
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
/**
|
|
320
|
-
* Create section + infobox child docs for a split-mode article. Returns nothing
|
|
321
|
-
* — children get bodies written later in writeChildrenBodies.
|
|
322
|
-
*/
|
|
323
|
-
function createArticleChildren(
|
|
324
|
-
dm: DocumentManager,
|
|
325
|
-
article: ExtractedArticle,
|
|
326
|
-
articleDocId: string,
|
|
327
|
-
log: (msg: string) => void,
|
|
328
|
-
): void {
|
|
329
|
-
if (article.infobox && article.infobox.length > 0) {
|
|
330
|
-
const ib = dm.tree.create({
|
|
331
|
-
parentId: articleDocId,
|
|
332
|
-
label: 'Infobox',
|
|
333
|
-
type: 'outline',
|
|
334
|
-
meta: { icon: ICONS.infobox },
|
|
335
|
-
})
|
|
336
|
-
log(` + ${ib.id.slice(0, 8)}… Infobox (outline)`)
|
|
337
|
-
// We attach the docId to the article object so writeChildrenBodies
|
|
338
|
-
// can find it without a second tree query.
|
|
339
|
-
;(article as any)._infoboxDocId = ib.id
|
|
340
|
-
}
|
|
341
|
-
for (const section of article.sections) {
|
|
342
|
-
createSectionShell(dm, section, articleDocId, log)
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
function createSectionShell(
|
|
347
|
-
dm: DocumentManager,
|
|
348
|
-
section: ExtractedSection,
|
|
349
|
-
parentDocId: string,
|
|
350
|
-
log: (msg: string) => void,
|
|
351
|
-
): void {
|
|
352
|
-
const hasChildren = section.children.length > 0
|
|
353
|
-
if (!section.body.trim() && !hasChildren) return
|
|
354
|
-
const { type, icon } = pickSectionType(section)
|
|
355
|
-
const entry = dm.tree.create({
|
|
356
|
-
parentId: parentDocId,
|
|
357
|
-
label: section.title || 'Untitled section',
|
|
358
|
-
type,
|
|
359
|
-
meta: { icon },
|
|
360
|
-
})
|
|
361
|
-
log(` + ${entry.id.slice(0, 8)}… ${entry.label} (${type})`)
|
|
362
|
-
;(section as any)._docId = entry.id
|
|
363
|
-
for (const child of section.children) {
|
|
364
|
-
createSectionShell(dm, child, entry.id, log)
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
async function writeChildrenBodies(
|
|
369
|
-
dm: DocumentManager,
|
|
370
|
-
article: ExtractedArticle,
|
|
371
|
-
_articleDocId: string,
|
|
372
|
-
titleToDocId: Map<string, string>,
|
|
373
|
-
log: (msg: string) => void,
|
|
374
|
-
): Promise<void> {
|
|
375
|
-
const infoboxDocId = (article as any)._infoboxDocId as string | undefined
|
|
376
|
-
if (infoboxDocId && article.infobox && article.infobox.length > 0) {
|
|
377
|
-
try {
|
|
378
|
-
await dm.content.write(infoboxDocId, renderInfoboxBody(article.infobox))
|
|
379
|
-
} catch (err: any) {
|
|
380
|
-
log(`! infobox body write failed: ${err?.message ?? err}`)
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
for (const section of article.sections) {
|
|
384
|
-
await writeSectionBody(dm, section, titleToDocId, log)
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
async function writeSectionBody(
|
|
389
|
-
dm: DocumentManager,
|
|
390
|
-
section: ExtractedSection,
|
|
391
|
-
titleToDocId: Map<string, string>,
|
|
392
|
-
log: (msg: string) => void,
|
|
393
|
-
): Promise<void> {
|
|
394
|
-
const docId = (section as any)._docId as string | undefined
|
|
395
|
-
if (docId && section.body.trim().length > 0) {
|
|
396
|
-
try {
|
|
397
|
-
await dm.content.write(docId, rewriteLinks(section.body, titleToDocId))
|
|
398
|
-
} catch (err: any) {
|
|
399
|
-
log(`! section body write failed for ${section.title}: ${err?.message ?? err}`)
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
for (const child of section.children) {
|
|
403
|
-
await writeSectionBody(dm, child, titleToDocId, log)
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
408
|
-
// Argument parsing + dry-run printing
|
|
409
|
-
// ─────────────────────────────────────────────────────────────────────────
|
|
410
|
-
|
|
411
|
-
function parseOptions(args: ParsedArgs): WikiOptions | string {
|
|
412
|
-
const title = args.positional[0]?.trim() || args.params['title']
|
|
413
|
-
if (!title) return 'Missing required positional argument: <title>. Example: abracadabra wiki "Toronto Raptors"'
|
|
414
|
-
|
|
415
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
416
|
-
const env = (globalThis as any).process?.env ?? {}
|
|
417
|
-
const userAgent = args.params['user-agent'] || args.params['userAgent'] || env['ABRA_WIKI_USER_AGENT']
|
|
418
|
-
if (!userAgent) {
|
|
419
|
-
return [
|
|
420
|
-
'Missing required parameter: user-agent="your-name (you@example.com)"',
|
|
421
|
-
'(Wikimedia etiquette requires an Api-User-Agent header. Pass user-agent=... or set ABRA_WIKI_USER_AGENT.)',
|
|
422
|
-
].join('\n')
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
const mode = (args.params['mode'] ?? 'split') as ExtractMode
|
|
426
|
-
if (mode !== 'single' && mode !== 'split') {
|
|
427
|
-
return `Invalid mode "${mode}". Use mode=single or mode=split.`
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
const depth = parseIntOr(args.params['depth'], 1)
|
|
431
|
-
const categoryDepth = parseIntOr(args.params['category-depth'] ?? args.params['categoryDepth'], 1)
|
|
432
|
-
const rate = parseFloatOr(args.params['rate'], 3)
|
|
433
|
-
|
|
434
|
-
return {
|
|
435
|
-
title,
|
|
436
|
-
mode,
|
|
437
|
-
depth,
|
|
438
|
-
categoryDepth,
|
|
439
|
-
includeCategories: args.flags.has('include-categories') || args.flags.has('includeCategories'),
|
|
440
|
-
lang: args.params['lang'] ?? 'en',
|
|
441
|
-
domain: args.params['domain'],
|
|
442
|
-
parentDocId: args.params['parent'],
|
|
443
|
-
userAgent,
|
|
444
|
-
rate,
|
|
445
|
-
dryRun: args.flags.has('dry-run') || args.flags.has('dryRun'),
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
function parseIntOr(s: string | undefined, fallback: number): number {
|
|
450
|
-
if (!s) return fallback
|
|
451
|
-
const n = Number.parseInt(s, 10)
|
|
452
|
-
return Number.isFinite(n) && n >= 0 ? n : fallback
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
function parseFloatOr(s: string | undefined, fallback: number): number {
|
|
456
|
-
if (!s) return fallback
|
|
457
|
-
const n = Number.parseFloat(s)
|
|
458
|
-
return Number.isFinite(n) && n > 0 ? n : fallback
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
function printSections(sections: ExtractedSection[], indent: string): string {
|
|
462
|
-
const lines: string[] = []
|
|
463
|
-
for (const s of sections) {
|
|
464
|
-
const hint = s.body ? ` (${s.body.length}b)` : ''
|
|
465
|
-
lines.push(`${indent}- ${s.title}${hint}${s.children.length > 0 ? ` [${s.children.length} sub]` : ''}`)
|
|
466
|
-
if (s.children.length > 0) {
|
|
467
|
-
lines.push(printSections(s.children, indent + ' '))
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
return lines.join('\n')
|
|
471
|
-
}
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Body rendering + page-type decisions for the streaming orchestrator.
|
|
3
|
-
*
|
|
4
|
-
* All rendering is title-driven: bodies are rendered with `[[Title]]`
|
|
5
|
-
* placeholders, and `rewriteLinks` rewrites them to `[[docId|label]]`
|
|
6
|
-
* using the live title→docId map at write time.
|
|
7
|
-
*/
|
|
8
|
-
import type { ExtractedArticle, ExtractedSection } from './types.ts'
|
|
9
|
-
|
|
10
|
-
export const ICONS = {
|
|
11
|
-
graph: 'git-fork',
|
|
12
|
-
article: 'book-open',
|
|
13
|
-
category: 'tag',
|
|
14
|
-
infobox: 'info',
|
|
15
|
-
outline: 'list',
|
|
16
|
-
gallery: 'images',
|
|
17
|
-
section: 'pilcrow',
|
|
18
|
-
categories: 'tags',
|
|
19
|
-
} as const
|
|
20
|
-
|
|
21
|
-
/** Decide a page type for a section based on its shape. */
|
|
22
|
-
export function pickSectionType(section: ExtractedSection): { type: string; icon: string } {
|
|
23
|
-
if (section.children.length > 0) return { type: 'outline', icon: ICONS.outline }
|
|
24
|
-
if (section.isList && section.listLength >= 5) return { type: 'outline', icon: ICONS.outline }
|
|
25
|
-
return { type: 'doc', icon: ICONS.section }
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
/** Render the lead paragraph as the article-doc body. */
|
|
29
|
-
export function renderArticleLead(article: ExtractedArticle): string {
|
|
30
|
-
return article.lead ?? ''
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
/** Render the article as a single doc, sections + infobox inlined. */
|
|
34
|
-
export function renderArticleSingleDoc(article: ExtractedArticle): string {
|
|
35
|
-
const parts: string[] = []
|
|
36
|
-
if (article.lead) parts.push(article.lead)
|
|
37
|
-
if (article.infobox && article.infobox.length > 0) {
|
|
38
|
-
parts.push('## Infobox', renderInfoboxBody(article.infobox))
|
|
39
|
-
}
|
|
40
|
-
for (const section of article.sections) {
|
|
41
|
-
parts.push(...renderSectionInline(section, 2))
|
|
42
|
-
}
|
|
43
|
-
return parts.join('\n\n')
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function renderSectionInline(section: ExtractedSection, level: number): string[] {
|
|
47
|
-
const out: string[] = []
|
|
48
|
-
const prefix = '#'.repeat(Math.min(6, level))
|
|
49
|
-
if (section.title) out.push(`${prefix} ${section.title}`)
|
|
50
|
-
if (section.body.trim()) out.push(section.body)
|
|
51
|
-
for (const child of section.children) {
|
|
52
|
-
out.push(...renderSectionInline(child, level + 1))
|
|
53
|
-
}
|
|
54
|
-
return out
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
export function renderInfoboxBody(rows: Array<{ key: string; value: string }>): string {
|
|
58
|
-
return rows.map((r) => `- **${r.key}:** ${r.value}`).join('\n')
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
export function renderCategoryBody(members: string[], subcategories: string[]): string {
|
|
62
|
-
const parts: string[] = []
|
|
63
|
-
if (members.length > 0) {
|
|
64
|
-
parts.push('## Pages')
|
|
65
|
-
parts.push(members.map((m) => `- [[${m}]]`).join('\n'))
|
|
66
|
-
}
|
|
67
|
-
if (subcategories.length > 0) {
|
|
68
|
-
parts.push('## Sub-categories')
|
|
69
|
-
parts.push(subcategories.map((s) => `- ${s}`).join('\n'))
|
|
70
|
-
}
|
|
71
|
-
return parts.join('\n\n')
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Replace `[[Title]]` / `[[Title|Alias]]` in markdown with
|
|
76
|
-
* `[[docId|label]]` using the title→docId map. Unresolved titles fall
|
|
77
|
-
* back to plain text (their alias or original title).
|
|
78
|
-
*/
|
|
79
|
-
export function rewriteLinks(
|
|
80
|
-
markdown: string,
|
|
81
|
-
titleToDocId: Map<string, string>,
|
|
82
|
-
): string {
|
|
83
|
-
const re = /\[\[([^\]|]+?)(?:\|([^\]]+?))?\]\]/g
|
|
84
|
-
return markdown.replace(re, (_match, target: string, alias?: string) => {
|
|
85
|
-
const title = target.trim()
|
|
86
|
-
const docId = titleToDocId.get(title)
|
|
87
|
-
const display = (alias && alias.trim().length > 0 ? alias : title).trim()
|
|
88
|
-
if (!docId) return display
|
|
89
|
-
return `[[${docId}|${display}]]`
|
|
90
|
-
})
|
|
91
|
-
}
|