@abraca/cli 1.9.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ /**
2
+ * Snapshot a wtf_wikipedia Document into a plain-data shape that's easy to
3
+ * work with downstream. No BFS / no plan-building here — just a pure read
4
+ * of one parsed page.
5
+ */
6
+ import type { ExtractedArticle, ExtractedSection } from './types.ts'
7
+ import { canonicalTitle, isCategoryTitle, stripCategoryPrefix } from './wikipedia.ts'
8
+
9
+ export { canonicalTitle, isCategoryTitle, stripCategoryPrefix }
10
+
11
+ export function snapshotArticle(doc: any, title: string): ExtractedArticle {
12
+ return {
13
+ title,
14
+ linkTitles: collectLinkTitles(doc),
15
+ categories: collectCategories(doc),
16
+ sections: snapshotSections(doc.sections?.() ?? []),
17
+ infobox: snapshotInfobox(doc.infobox?.()),
18
+ lead: leadParagraph(doc),
19
+ url: typeof doc.url === 'function' ? doc.url() : null,
20
+ }
21
+ }
22
+
23
+ export function prettyCategoryLabel(catTitle: string): string {
24
+ return stripCategoryPrefix(catTitle)
25
+ }
26
+
27
+ // ─────────────────────────────────────────────────────────────────────────
28
+ // Link / category extraction
29
+ // ─────────────────────────────────────────────────────────────────────────
30
+
31
+ function collectLinkTitles(doc: any): string[] {
32
+ const links = doc.links?.() ?? []
33
+ const out = new Set<string>()
34
+ for (const l of links) {
35
+ if (!l) continue
36
+ const page = typeof l.page === 'function' ? l.page() : null
37
+ if (typeof page !== 'string' || page.length === 0) continue
38
+ if (isCategoryTitle(page)) continue
39
+ out.add(canonicalTitle(page))
40
+ }
41
+ return [...out]
42
+ }
43
+
44
+ function collectCategories(doc: any): string[] {
45
+ const out: string[] = []
46
+ for (const c of (doc.categories?.() as string[] | undefined) ?? []) {
47
+ const norm = canonicalTitle(c)
48
+ if (norm) out.push(norm)
49
+ }
50
+ return out
51
+ }
52
+
53
+ // ─────────────────────────────────────────────────────────────────────────
54
+ // Sections — flatten wtf's parent-child references into a real tree
55
+ // ─────────────────────────────────────────────────────────────────────────
56
+
57
+ function snapshotSections(rawSections: any[]): ExtractedSection[] {
58
+ const all = rawSections.map((s) => ({
59
+ raw: s,
60
+ title: s.title?.() || '',
61
+ parentRef: typeof s.parent === 'function' ? s.parent() : null,
62
+ children: [] as ExtractedSection[],
63
+ }))
64
+
65
+ const byRaw = new Map<any, (typeof all)[number]>()
66
+ for (const s of all) byRaw.set(s.raw, s)
67
+
68
+ const roots: (typeof all)[number][] = []
69
+ for (const s of all) {
70
+ if (s.parentRef && byRaw.has(s.parentRef)) {
71
+ byRaw.get(s.parentRef)!.children.push(materialize(s))
72
+ } else {
73
+ roots.push(s)
74
+ }
75
+ }
76
+ return roots.map(materialize)
77
+ }
78
+
79
+ function materialize(node: {
80
+ raw: any
81
+ title: string
82
+ children: ExtractedSection[]
83
+ }): ExtractedSection {
84
+ const lists = node.raw.lists?.() ?? []
85
+ const paragraphs = node.raw.paragraphs?.() ?? []
86
+
87
+ let listLength = 0
88
+ for (const l of lists) {
89
+ const lines = l.lines?.() ?? []
90
+ listLength += lines.length
91
+ }
92
+ const isList =
93
+ lists.length > 0 && (paragraphs.length === 0 || listLength >= paragraphs.length * 2)
94
+
95
+ const bodyParts: string[] = []
96
+ for (const p of paragraphs) {
97
+ const md = paragraphMarkdown(p)
98
+ if (md) bodyParts.push(md)
99
+ }
100
+ for (const l of lists) {
101
+ const lines = (l.lines?.() ?? []) as any[]
102
+ for (const line of lines) {
103
+ const text = lineText(line)
104
+ if (text) bodyParts.push(`- ${text}`)
105
+ }
106
+ }
107
+
108
+ return {
109
+ title: node.title,
110
+ body: bodyParts.join('\n\n'),
111
+ isList,
112
+ listLength,
113
+ children: node.children,
114
+ }
115
+ }
116
+
117
+ // ─────────────────────────────────────────────────────────────────────────
118
+ // Infobox
119
+ // ─────────────────────────────────────────────────────────────────────────
120
+
121
+ function snapshotInfobox(box: any | null | undefined): Array<{ key: string; value: string }> | undefined {
122
+ if (!box) return undefined
123
+ const data = typeof box.json === 'function' ? box.json() : null
124
+ if (!data || typeof data !== 'object') return undefined
125
+ const rows: Array<{ key: string; value: string }> = []
126
+ for (const [key, val] of Object.entries(data)) {
127
+ const value = stringifyInfoboxValue(val)
128
+ if (!value) continue
129
+ rows.push({ key: humanKey(key), value })
130
+ }
131
+ return rows.length > 0 ? rows : undefined
132
+ }
133
+
134
+ function stringifyInfoboxValue(val: unknown): string {
135
+ if (val == null) return ''
136
+ if (typeof val === 'string') return val
137
+ if (typeof val === 'number' || typeof val === 'boolean') return String(val)
138
+ if (Array.isArray(val)) {
139
+ return val.map(stringifyInfoboxValue).filter(Boolean).join(', ')
140
+ }
141
+ if (typeof val === 'object') {
142
+ const o = val as Record<string, unknown>
143
+ if (typeof o.text === 'string') return o.text
144
+ if (typeof o.number === 'number') return String(o.number)
145
+ }
146
+ return ''
147
+ }
148
+
149
+ function humanKey(k: string): string {
150
+ return k.replace(/_/g, ' ').replace(/^./, (m) => m.toUpperCase())
151
+ }
152
+
153
+ // ─────────────────────────────────────────────────────────────────────────
154
+ // Markdown rendering
155
+ // ─────────────────────────────────────────────────────────────────────────
156
+
157
+ function leadParagraph(doc: any): string {
158
+ const paras = doc.paragraphs?.() ?? []
159
+ const first = paras[0]
160
+ if (!first) return ''
161
+ return paragraphMarkdown(first)
162
+ }
163
+
164
+ /**
165
+ * Render a paragraph as markdown, replacing internal links with `[[Title]]`.
166
+ * The streaming orchestrator's link rewriter later swaps `[[Title]]` →
167
+ * `[[docId|label]]` once IDs are known.
168
+ */
169
+ function paragraphMarkdown(paragraph: any): string {
170
+ const sentences = paragraph.sentences?.() ?? []
171
+ const out: string[] = []
172
+ for (const s of sentences) {
173
+ out.push(sentenceWithWikilinks(s))
174
+ }
175
+ return out.join(' ').trim()
176
+ }
177
+
178
+ function sentenceWithWikilinks(sentence: any): string {
179
+ const text: string = (sentence.text?.() ?? '').toString()
180
+ const links = sentence.links?.() ?? []
181
+ if (links.length === 0) return text
182
+
183
+ let result = text
184
+ const replacements = links
185
+ .map((l: any) => {
186
+ const page = typeof l.page === 'function' ? l.page() : null
187
+ const display = typeof l.text === 'function' ? l.text() : null
188
+ if (typeof page !== 'string' || page.length === 0) return null
189
+ if (isCategoryTitle(page)) return null
190
+ const shown = (display && display.length > 0 ? display : page) as string
191
+ return { page: canonicalTitle(page), shown }
192
+ })
193
+ .filter((x: any): x is { page: string; shown: string } => x !== null)
194
+ .sort((a: any, b: any) => b.shown.length - a.shown.length)
195
+
196
+ for (const { page, shown } of replacements) {
197
+ if (!result.includes(shown)) continue
198
+ const replacement = shown === page ? `[[${page}]]` : `[[${page}|${shown}]]`
199
+ result = result.replace(shown, replacement)
200
+ }
201
+ return result
202
+ }
203
+
204
+ function lineText(line: any): string {
205
+ if (!line) return ''
206
+ if (typeof line === 'string') return line
207
+ if (typeof line.text === 'string') return line.text
208
+ if (typeof line.text === 'function') return line.text()
209
+ return ''
210
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Internal types for the Wikipedia extractor command.
3
+ */
4
+
5
+ export type ExtractMode = 'single' | 'split'
6
+
7
+ export interface WikiOptions {
8
+ title: string
9
+ mode: ExtractMode
10
+ depth: number
11
+ includeCategories: boolean
12
+ categoryDepth: number
13
+ lang: string
14
+ domain?: string
15
+ parentDocId?: string
16
+ userAgent: string
17
+ rate: number
18
+ /** Print plan-tree to stderr without writing (no server connection). */
19
+ dryRun: boolean
20
+ }
21
+
22
+ /** A simplified article snapshot extracted from a wtf_wikipedia Document. */
23
+ export interface ExtractedArticle {
24
+ title: string
25
+ /** Wikipedia internal links (page titles) referenced by this article. */
26
+ linkTitles: string[]
27
+ /** Categories this article belongs to. */
28
+ categories: string[]
29
+ /** Top-level sections; each section has nested subsections. */
30
+ sections: ExtractedSection[]
31
+ /** Infobox key-value rows (first infobox only). */
32
+ infobox?: Array<{ key: string; value: string }>
33
+ /** Plaintext lead paragraph(s) for the single-doc body. */
34
+ lead: string
35
+ /** Source URL on Wikipedia. */
36
+ url: string | null
37
+ }
38
+
39
+ export interface ExtractedSection {
40
+ title: string
41
+ body: string
42
+ isList: boolean
43
+ listLength: number
44
+ children: ExtractedSection[]
45
+ }
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Rate-limited wrapper around wtf_wikipedia + wtf-plugin-api.
3
+ *
4
+ * Responsibilities:
5
+ * - Throttle requests to respect Wikimedia API etiquette
6
+ * - Cache parsed Documents by canonical title
7
+ * - Resolve redirects so callers always see the redirect target
8
+ * - Expose getCategoryPages via wtf-plugin-api
9
+ */
10
+ // @ts-ignore — wtf_wikipedia ships its own types but they are imprecise; we
11
+ // cast Link/Section APIs as needed below.
12
+ import wtf from 'wtf_wikipedia'
13
+ // @ts-ignore — wtf-plugin-api is JS-only with no types; treat as opaque.
14
+ import wtfApiPlugin from 'wtf-plugin-api'
15
+
16
+ // Augment wtf at module load so getCategoryPages becomes available.
17
+ let pluginExtended = false
18
+ function ensurePlugin(): void {
19
+ if (pluginExtended) return
20
+ // @ts-ignore — extend is dynamically attached to the wtf default export.
21
+ wtf.extend(wtfApiPlugin)
22
+ pluginExtended = true
23
+ }
24
+
25
+ export interface WikipediaClientConfig {
26
+ lang: string
27
+ domain?: string
28
+ userAgent: string
29
+ /** Max requests per second. */
30
+ rate: number
31
+ }
32
+
33
+ interface FetchOpts {
34
+ lang?: string
35
+ domain?: string
36
+ 'Api-User-Agent'?: string
37
+ follow_redirects?: boolean
38
+ }
39
+
40
+ /** A token-bucket-ish throttle: at most `rate` calls per second, FIFO. */
41
+ class RateLimiter {
42
+ private lastTickMs = 0
43
+ constructor(private intervalMs: number) {}
44
+
45
+ async wait(): Promise<void> {
46
+ const now = Date.now()
47
+ const earliest = this.lastTickMs + this.intervalMs
48
+ if (now < earliest) {
49
+ await new Promise((r) => setTimeout(r, earliest - now))
50
+ }
51
+ this.lastTickMs = Math.max(now, earliest)
52
+ }
53
+ }
54
+
55
+ export class WikipediaClient {
56
+ private cache = new Map<string, any>()
57
+ private redirects = new Map<string, string>()
58
+ private limiter: RateLimiter
59
+ private fetchOpts: FetchOpts
60
+
61
+ constructor(private config: WikipediaClientConfig) {
62
+ ensurePlugin()
63
+ this.limiter = new RateLimiter(Math.max(50, Math.floor(1000 / Math.max(0.1, config.rate))))
64
+ this.fetchOpts = {
65
+ lang: config.lang,
66
+ 'Api-User-Agent': config.userAgent,
67
+ follow_redirects: true,
68
+ }
69
+ if (config.domain) this.fetchOpts.domain = config.domain
70
+ }
71
+
72
+ /**
73
+ * Fetch and parse a Wikipedia article.
74
+ * - Returns the cached Document if we've seen this title before.
75
+ * - Follows redirects and caches under both source and target titles.
76
+ * - Returns null when the page does not exist.
77
+ */
78
+ async fetchArticle(rawTitle: string): Promise<any | null> {
79
+ const title = canonicalTitle(rawTitle)
80
+ if (this.cache.has(title)) return this.cache.get(title)
81
+ if (this.redirects.has(title)) {
82
+ const target = this.redirects.get(title)!
83
+ return this.cache.get(target) ?? null
84
+ }
85
+
86
+ await this.limiter.wait()
87
+ let doc: any
88
+ try {
89
+ doc = await (wtf as any).fetch(title, this.fetchOpts)
90
+ } catch (err: any) {
91
+ throw new Error(`Wikipedia fetch failed for "${title}": ${err?.message ?? err}`)
92
+ }
93
+ if (!doc) return null
94
+
95
+ // wtf usually follows redirects automatically when follow_redirects=true,
96
+ // but defensively handle the case where it surfaces a redirect doc.
97
+ if (typeof doc.isRedirect === 'function' && doc.isRedirect()) {
98
+ const target = doc.redirectTo?.()?.page
99
+ if (typeof target === 'string') {
100
+ this.redirects.set(title, canonicalTitle(target))
101
+ const inner = await this.fetchArticle(target)
102
+ return inner
103
+ }
104
+ }
105
+
106
+ const resolvedTitle = canonicalTitle(doc.title?.() ?? title)
107
+ this.cache.set(resolvedTitle, doc)
108
+ if (resolvedTitle !== title) this.redirects.set(title, resolvedTitle)
109
+ return doc
110
+ }
111
+
112
+ /**
113
+ * Fetch the member pages of a category (and optionally sub-categories).
114
+ * @param category Category title (with or without "Category:" prefix).
115
+ * @param recursive Whether to traverse sub-categories.
116
+ * @param maxDepth Recursion depth when recursive=true.
117
+ */
118
+ async fetchCategoryPages(
119
+ category: string,
120
+ recursive: boolean,
121
+ maxDepth: number,
122
+ ): Promise<Array<{ title: string; type: 'page' | 'subcat' }>> {
123
+ await this.limiter.wait()
124
+ const opts: Record<string, unknown> = {
125
+ lang: this.config.lang,
126
+ 'Api-User-Agent': this.config.userAgent,
127
+ recursive,
128
+ maxDepth,
129
+ }
130
+ if (this.config.domain) opts.domain = this.config.domain
131
+ // @ts-ignore — getCategoryPages is attached at runtime via wtf.extend.
132
+ const list: any[] = await wtf.getCategoryPages(category, opts)
133
+ return (list ?? []).map((m) => ({
134
+ title: canonicalTitle(m.title),
135
+ type: m.type === 'subcat' ? 'subcat' : 'page',
136
+ }))
137
+ }
138
+ }
139
+
140
+ /** Normalize a Wikipedia title — trim, collapse spaces, strip leading/trailing colons. */
141
+ export function canonicalTitle(s: string): string {
142
+ return (s ?? '').toString().replace(/_/g, ' ').replace(/\s+/g, ' ').trim()
143
+ }
144
+
145
+ /** Detect a category-namespaced title. */
146
+ const CATEGORY_PREFIX = /^(Category|Catégorie|Kategorie|Categoría|Categoria|Categorie|Kategoria):/i
147
+ export function isCategoryTitle(title: string): boolean {
148
+ return CATEGORY_PREFIX.test(title)
149
+ }
150
+
151
+ /** Strip the "Category:" prefix for display. */
152
+ export function stripCategoryPrefix(title: string): string {
153
+ return title.replace(CATEGORY_PREFIX, '').trim()
154
+ }
package/src/connection.ts CHANGED
@@ -5,8 +5,8 @@
5
5
  * Reuses the same patterns as AbracadabraMCPServer but without MCP SDK dependency.
6
6
  */
7
7
  import * as Y from 'yjs'
8
- import { AbracadabraProvider, AbracadabraClient } from '@abraca/dabra'
9
- import type { ServerInfo, DocumentMeta, SpaceMeta } from '@abraca/dabra'
8
+ import { AbracadabraProvider, AbracadabraClient, Kind } from '@abraca/dabra'
9
+ import type { ServerInfo, DocumentMeta } from '@abraca/dabra'
10
10
  import { loadOrCreateKeypair, signChallenge } from './crypto.ts'
11
11
 
12
12
  export interface CLIConnectionConfig {
@@ -43,33 +43,13 @@ function waitForSync(
43
43
  })
44
44
  }
45
45
 
46
- /** Map a DocumentMeta to SpaceMeta shape for display compatibility. */
47
- function docToSpaceMeta(doc: DocumentMeta): SpaceMeta {
48
- const publicAccess = doc.public_access
49
- let visibility: SpaceMeta['visibility'] = 'private'
50
- if (publicAccess && publicAccess !== 'none') visibility = 'public'
51
-
52
- return {
53
- id: doc.id,
54
- doc_id: doc.id,
55
- name: doc.label ?? doc.id,
56
- description: doc.description ?? null,
57
- visibility,
58
- is_hub: doc.is_hub ?? false,
59
- owner_id: doc.owner_id ?? null,
60
- created_at: 0,
61
- updated_at: doc.updated_at ?? 0,
62
- public_access: publicAccess ?? null,
63
- }
64
- }
65
-
66
46
  export class CLIConnection {
67
47
  readonly config: CLIConnectionConfig
68
48
  readonly client: AbracadabraClient
69
49
 
70
50
  private _serverInfo: ServerInfo | null = null
71
51
  private _rootDocId: string | null = null
72
- private _spaces: SpaceMeta[] = []
52
+ private _spaces: DocumentMeta[] = []
73
53
  private _rootDoc: Y.Doc | null = null
74
54
  private _rootProvider: AbracadabraProvider | null = null
75
55
  private _userId: string | null = null
@@ -100,7 +80,7 @@ export class CLIConnection {
100
80
  return this._rootDocId
101
81
  }
102
82
 
103
- get spaces(): SpaceMeta[] {
83
+ get spaces(): DocumentMeta[] {
104
84
  return this._spaces
105
85
  }
106
86
 
@@ -154,35 +134,20 @@ export class CLIConnection {
154
134
  // Step 3: Discover server info
155
135
  this._serverInfo = await this.client.serverInfo()
156
136
 
157
- // Step 4: Discover root documents
158
- let initialDocId: string | null = this._serverInfo.index_doc_id ?? null
159
- try {
160
- const roots = await this.client.listRootDocuments()
161
- this._spaces = roots.map(docToSpaceMeta)
162
- const hub = roots.find((d: any) => d.is_hub)
163
- if (hub) {
164
- initialDocId = hub.id
165
- this.log(`Hub document: ${hub.label ?? hub.id} (${hub.id})`)
166
- } else if (roots.length > 0) {
167
- initialDocId = roots[0].id
168
- this.log(`No hub, using first root doc: ${roots[0].label ?? roots[0].id}`)
169
- }
170
- } catch {
171
- try {
172
- this._spaces = await this.client.listSpaces()
173
- const hub = this._spaces.find(s => s.is_hub)
174
- if (hub) {
175
- initialDocId = hub.doc_id
176
- } else if (this._spaces.length > 0) {
177
- initialDocId = this._spaces[0].doc_id
178
- }
179
- } catch {
180
- this.log('Neither /docs?root=true nor /spaces available, using index_doc_id')
181
- }
137
+ // Step 4: Pick an entry-point doc — first Space under the server root,
138
+ // falling back to the first top-level doc of any kind.
139
+ const roots = await this.client.listChildren()
140
+ this._spaces = roots.filter(d => d.kind === Kind.Space)
141
+ const first = this._spaces[0] ?? roots[0]
142
+ const initialDocId = first?.id ?? null
143
+ if (first) {
144
+ this.log(`Entry document: ${first.label ?? first.id} (${first.id})`)
182
145
  }
183
146
 
184
147
  if (!initialDocId) {
185
- throw new Error('No entry point found: server has neither spaces nor index_doc_id configured.')
148
+ throw new Error(
149
+ 'No entry point found: server has no top-level documents. Create a Space first.',
150
+ )
186
151
  }
187
152
 
188
153
  this._rootDocId = initialDocId
package/src/crypto.ts CHANGED
@@ -3,11 +3,12 @@
3
3
  * Mirrors @abraca/mcp/src/crypto.ts — standalone to avoid MCP SDK dependency.
4
4
  */
5
5
  import * as ed from '@noble/ed25519'
6
- import { sha512 } from '@noble/hashes/sha2'
6
+ import { sha512 } from '@noble/hashes/sha2.js'
7
7
  import { readFile, writeFile, mkdir } from 'node:fs/promises'
8
8
 
9
- // @noble/ed25519 v2+ requires explicit hash configuration
10
- ed.etc.sha512Sync = (...msgs: Uint8Array[]) => sha512(ed.etc.concatBytes(...msgs))
9
+ // @noble/ed25519 v3 hash hook
10
+ ed.hashes.sha512 = sha512
11
+ ed.hashes.sha512Async = (m: Uint8Array) => Promise.resolve(sha512(m))
11
12
  import { existsSync } from 'node:fs'
12
13
  import { homedir } from 'node:os'
13
14
  import { join, dirname } from 'node:path'
@@ -45,7 +46,7 @@ export async function loadOrCreateKeypair(keyPath?: string): Promise<CLIKeypair>
45
46
  }
46
47
 
47
48
  // Generate new keypair
48
- const privateKey = ed.utils.randomPrivateKey()
49
+ const privateKey = ed.utils.randomSecretKey()
49
50
  const publicKey = ed.getPublicKey(privateKey)
50
51
 
51
52
  // Ensure directory exists and write seed with restricted permissions
package/src/index.ts CHANGED
@@ -28,9 +28,18 @@ import './commands/awareness.ts'
28
28
  import './commands/files.ts'
29
29
  import './commands/permissions.ts'
30
30
  import './commands/page-types.ts'
31
+ import './commands/wiki/index.ts'
31
32
 
32
33
  // ── Commands that don't require a connection ─────────────────────────────────
33
- const NO_CONNECT_COMMANDS = new Set(['help', 'h', '?', 'version', 'v', 'page-types', 'types', 'doctypes'])
34
+ // "wiki" opens its own DocumentManager session via wiki/connect.ts, so the
35
+ // parent harness should NOT pre-open a CLIConnection (which would authenticate
36
+ // twice and hold an unused root provider).
37
+ const NO_CONNECT_COMMANDS = new Set([
38
+ 'help', 'h', '?',
39
+ 'version', 'v',
40
+ 'page-types', 'types', 'doctypes',
41
+ 'wiki', 'wikipedia',
42
+ ])
34
43
 
35
44
  async function main() {
36
45
  const args = parseArgs(process.argv)