@abraca/cli 2.25.0 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import * as Y from "yjs";
3
- import { AbracadabraClient, AbracadabraProvider, DocumentManager, Kind } from "@abraca/dabra";
3
+ import { AbracadabraClient, AbracadabraProvider, Kind } from "@abraca/dabra";
4
4
  import * as ed from "@noble/ed25519";
5
5
  import { mkdir, readFile, writeFile } from "node:fs/promises";
6
6
  import * as fs from "node:fs";
@@ -8,8 +8,6 @@ import { existsSync } from "node:fs";
8
8
  import { homedir } from "node:os";
9
9
  import * as path from "node:path";
10
10
  import { dirname, join } from "node:path";
11
- import wtf from "wtf_wikipedia";
12
- import wtfApiPlugin from "wtf-plugin-api";
13
11
 
14
12
  //#region packages/cli/src/parser.ts
15
13
  /**
@@ -5531,695 +5529,6 @@ registerCommand({
5531
5529
  }
5532
5530
  });
5533
5531
 
5534
- //#endregion
5535
- //#region packages/cli/src/commands/wiki/wikipedia.ts
5536
- /**
5537
- * Rate-limited wrapper around wtf_wikipedia + wtf-plugin-api.
5538
- *
5539
- * Responsibilities:
5540
- * - Throttle requests to respect Wikimedia API etiquette
5541
- * - Cache parsed Documents by canonical title
5542
- * - Resolve redirects so callers always see the redirect target
5543
- * - Expose getCategoryPages via wtf-plugin-api
5544
- */
5545
- let pluginExtended = false;
5546
- function ensurePlugin() {
5547
- if (pluginExtended) return;
5548
- wtf.extend(wtfApiPlugin);
5549
- pluginExtended = true;
5550
- }
5551
- /** A token-bucket-ish throttle: at most `rate` calls per second, FIFO. */
5552
- var RateLimiter = class {
5553
- lastTickMs = 0;
5554
- constructor(intervalMs) {
5555
- this.intervalMs = intervalMs;
5556
- }
5557
- async wait() {
5558
- const now = Date.now();
5559
- const earliest = this.lastTickMs + this.intervalMs;
5560
- if (now < earliest) await new Promise((r) => setTimeout(r, earliest - now));
5561
- this.lastTickMs = Math.max(now, earliest);
5562
- }
5563
- };
5564
- var WikipediaClient = class {
5565
- cache = /* @__PURE__ */ new Map();
5566
- redirects = /* @__PURE__ */ new Map();
5567
- limiter;
5568
- fetchOpts;
5569
- constructor(config) {
5570
- this.config = config;
5571
- ensurePlugin();
5572
- this.limiter = new RateLimiter(Math.max(50, Math.floor(1e3 / Math.max(.1, config.rate))));
5573
- this.fetchOpts = {
5574
- lang: config.lang,
5575
- "Api-User-Agent": config.userAgent,
5576
- follow_redirects: true
5577
- };
5578
- if (config.domain) this.fetchOpts.domain = config.domain;
5579
- }
5580
- /**
5581
- * Fetch and parse a Wikipedia article.
5582
- * - Returns the cached Document if we've seen this title before.
5583
- * - Follows redirects and caches under both source and target titles.
5584
- * - Returns null when the page does not exist.
5585
- */
5586
- async fetchArticle(rawTitle) {
5587
- const title = canonicalTitle(rawTitle);
5588
- if (this.cache.has(title)) return this.cache.get(title);
5589
- if (this.redirects.has(title)) {
5590
- const target = this.redirects.get(title);
5591
- return this.cache.get(target) ?? null;
5592
- }
5593
- await this.limiter.wait();
5594
- let doc;
5595
- try {
5596
- doc = await wtf.fetch(title, this.fetchOpts);
5597
- } catch (err) {
5598
- throw new Error(`Wikipedia fetch failed for "${title}": ${err?.message ?? err}`);
5599
- }
5600
- if (!doc) return null;
5601
- if (typeof doc.isRedirect === "function" && doc.isRedirect()) {
5602
- const target = doc.redirectTo?.()?.page;
5603
- if (typeof target === "string") {
5604
- this.redirects.set(title, canonicalTitle(target));
5605
- return await this.fetchArticle(target);
5606
- }
5607
- }
5608
- const resolvedTitle = canonicalTitle(doc.title?.() ?? title);
5609
- this.cache.set(resolvedTitle, doc);
5610
- if (resolvedTitle !== title) this.redirects.set(title, resolvedTitle);
5611
- return doc;
5612
- }
5613
- /**
5614
- * Fetch the member pages of a category (and optionally sub-categories).
5615
- * @param category Category title (with or without "Category:" prefix).
5616
- * @param recursive Whether to traverse sub-categories.
5617
- * @param maxDepth Recursion depth when recursive=true.
5618
- */
5619
- async fetchCategoryPages(category, recursive, maxDepth) {
5620
- await this.limiter.wait();
5621
- const opts = {
5622
- lang: this.config.lang,
5623
- "Api-User-Agent": this.config.userAgent,
5624
- recursive,
5625
- maxDepth
5626
- };
5627
- if (this.config.domain) opts.domain = this.config.domain;
5628
- return (await wtf.getCategoryPages(category, opts) ?? []).map((m) => ({
5629
- title: canonicalTitle(m.title),
5630
- type: m.type === "subcat" ? "subcat" : "page"
5631
- }));
5632
- }
5633
- };
5634
- /** Normalize a Wikipedia title — trim, collapse spaces, strip leading/trailing colons. */
5635
- function canonicalTitle(s) {
5636
- return (s ?? "").toString().replace(/_/g, " ").replace(/\s+/g, " ").trim();
5637
- }
5638
- /** Detect a category-namespaced title. */
5639
- const CATEGORY_PREFIX = /^(Category|Catégorie|Kategorie|Categoría|Categoria|Categorie|Kategoria):/i;
5640
- function isCategoryTitle(title) {
5641
- return CATEGORY_PREFIX.test(title);
5642
- }
5643
- /** Strip the "Category:" prefix for display. */
5644
- function stripCategoryPrefix(title) {
5645
- return title.replace(CATEGORY_PREFIX, "").trim();
5646
- }
5647
-
5648
- //#endregion
5649
- //#region packages/cli/src/commands/wiki/snapshot.ts
5650
- function snapshotArticle(doc, title) {
5651
- return {
5652
- title,
5653
- linkTitles: collectLinkTitles(doc),
5654
- categories: collectCategories(doc),
5655
- sections: snapshotSections(doc.sections?.() ?? []),
5656
- infobox: snapshotInfobox(doc.infobox?.()),
5657
- lead: leadParagraph(doc),
5658
- url: typeof doc.url === "function" ? doc.url() : null
5659
- };
5660
- }
5661
- function prettyCategoryLabel(catTitle) {
5662
- return stripCategoryPrefix(catTitle);
5663
- }
5664
- function collectLinkTitles(doc) {
5665
- const links = doc.links?.() ?? [];
5666
- const out = /* @__PURE__ */ new Set();
5667
- for (const l of links) {
5668
- if (!l) continue;
5669
- const page = typeof l.page === "function" ? l.page() : null;
5670
- if (typeof page !== "string" || page.length === 0) continue;
5671
- if (isCategoryTitle(page)) continue;
5672
- out.add(canonicalTitle(page));
5673
- }
5674
- return [...out];
5675
- }
5676
- function collectCategories(doc) {
5677
- const out = [];
5678
- for (const c of doc.categories?.() ?? []) {
5679
- const norm = canonicalTitle(c);
5680
- if (norm) out.push(norm);
5681
- }
5682
- return out;
5683
- }
5684
- function snapshotSections(rawSections) {
5685
- const all = rawSections.map((s) => ({
5686
- raw: s,
5687
- title: s.title?.() || "",
5688
- parentRef: typeof s.parent === "function" ? s.parent() : null,
5689
- children: []
5690
- }));
5691
- const byRaw = /* @__PURE__ */ new Map();
5692
- for (const s of all) byRaw.set(s.raw, s);
5693
- const roots = [];
5694
- for (const s of all) if (s.parentRef && byRaw.has(s.parentRef)) byRaw.get(s.parentRef).children.push(materialize(s));
5695
- else roots.push(s);
5696
- return roots.map(materialize);
5697
- }
5698
- function materialize(node) {
5699
- const lists = node.raw.lists?.() ?? [];
5700
- const paragraphs = node.raw.paragraphs?.() ?? [];
5701
- let listLength = 0;
5702
- for (const l of lists) {
5703
- const lines = l.lines?.() ?? [];
5704
- listLength += lines.length;
5705
- }
5706
- const isList = lists.length > 0 && (paragraphs.length === 0 || listLength >= paragraphs.length * 2);
5707
- const bodyParts = [];
5708
- for (const p of paragraphs) {
5709
- const md = paragraphMarkdown(p);
5710
- if (md) bodyParts.push(md);
5711
- }
5712
- for (const l of lists) {
5713
- const lines = l.lines?.() ?? [];
5714
- for (const line of lines) {
5715
- const text = lineText(line);
5716
- if (text) bodyParts.push(`- ${text}`);
5717
- }
5718
- }
5719
- return {
5720
- title: node.title,
5721
- body: bodyParts.join("\n\n"),
5722
- isList,
5723
- listLength,
5724
- children: node.children
5725
- };
5726
- }
5727
- function snapshotInfobox(box) {
5728
- if (!box) return void 0;
5729
- const data = typeof box.json === "function" ? box.json() : null;
5730
- if (!data || typeof data !== "object") return void 0;
5731
- const rows = [];
5732
- for (const [key, val] of Object.entries(data)) {
5733
- const value = stringifyInfoboxValue(val);
5734
- if (!value) continue;
5735
- rows.push({
5736
- key: humanKey(key),
5737
- value
5738
- });
5739
- }
5740
- return rows.length > 0 ? rows : void 0;
5741
- }
5742
- function stringifyInfoboxValue(val) {
5743
- if (val == null) return "";
5744
- if (typeof val === "string") return val;
5745
- if (typeof val === "number" || typeof val === "boolean") return String(val);
5746
- if (Array.isArray(val)) return val.map(stringifyInfoboxValue).filter(Boolean).join(", ");
5747
- if (typeof val === "object") {
5748
- const o = val;
5749
- if (typeof o.text === "string") return o.text;
5750
- if (typeof o.number === "number") return String(o.number);
5751
- }
5752
- return "";
5753
- }
5754
- function humanKey(k) {
5755
- return k.replace(/_/g, " ").replace(/^./, (m) => m.toUpperCase());
5756
- }
5757
- function leadParagraph(doc) {
5758
- const first = (doc.paragraphs?.() ?? [])[0];
5759
- if (!first) return "";
5760
- return paragraphMarkdown(first);
5761
- }
5762
- /**
5763
- * Render a paragraph as markdown, replacing internal links with `[[Title]]`.
5764
- * The streaming orchestrator's link rewriter later swaps `[[Title]]` →
5765
- * `[[docId|label]]` once IDs are known.
5766
- */
5767
- function paragraphMarkdown(paragraph) {
5768
- const sentences = paragraph.sentences?.() ?? [];
5769
- const out = [];
5770
- for (const s of sentences) out.push(sentenceWithWikilinks(s));
5771
- return out.join(" ").trim();
5772
- }
5773
- function sentenceWithWikilinks(sentence) {
5774
- const text = (sentence.text?.() ?? "").toString();
5775
- const links = sentence.links?.() ?? [];
5776
- if (links.length === 0) return text;
5777
- let result = text;
5778
- const replacements = links.map((l) => {
5779
- const page = typeof l.page === "function" ? l.page() : null;
5780
- const display = typeof l.text === "function" ? l.text() : null;
5781
- if (typeof page !== "string" || page.length === 0) return null;
5782
- if (isCategoryTitle(page)) return null;
5783
- const shown = display && display.length > 0 ? display : page;
5784
- return {
5785
- page: canonicalTitle(page),
5786
- shown
5787
- };
5788
- }).filter((x) => x !== null).sort((a, b) => b.shown.length - a.shown.length);
5789
- for (const { page, shown } of replacements) {
5790
- if (!result.includes(shown)) continue;
5791
- const replacement = shown === page ? `[[${page}]]` : `[[${page}|${shown}]]`;
5792
- result = result.replace(shown, replacement);
5793
- }
5794
- return result;
5795
- }
5796
- function lineText(line) {
5797
- if (!line) return "";
5798
- if (typeof line === "string") return line;
5799
- if (typeof line.text === "string") return line.text;
5800
- if (typeof line.text === "function") return line.text();
5801
- return "";
5802
- }
5803
-
5804
- //#endregion
5805
- //#region packages/cli/src/commands/wiki/render.ts
5806
- const ICONS = {
5807
- graph: "git-fork",
5808
- article: "book-open",
5809
- category: "tag",
5810
- infobox: "info",
5811
- outline: "list",
5812
- gallery: "images",
5813
- section: "pilcrow",
5814
- categories: "tags"
5815
- };
5816
- /** Decide a page type for a section based on its shape. */
5817
- function pickSectionType(section) {
5818
- if (section.children.length > 0) return {
5819
- type: "outline",
5820
- icon: ICONS.outline
5821
- };
5822
- if (section.isList && section.listLength >= 5) return {
5823
- type: "outline",
5824
- icon: ICONS.outline
5825
- };
5826
- return {
5827
- type: "doc",
5828
- icon: ICONS.section
5829
- };
5830
- }
5831
- /** Render the lead paragraph as the article-doc body. */
5832
- function renderArticleLead(article) {
5833
- return article.lead ?? "";
5834
- }
5835
- /** Render the article as a single doc, sections + infobox inlined. */
5836
- function renderArticleSingleDoc(article) {
5837
- const parts = [];
5838
- if (article.lead) parts.push(article.lead);
5839
- if (article.infobox && article.infobox.length > 0) parts.push("## Infobox", renderInfoboxBody(article.infobox));
5840
- for (const section of article.sections) parts.push(...renderSectionInline(section, 2));
5841
- return parts.join("\n\n");
5842
- }
5843
- function renderSectionInline(section, level) {
5844
- const out = [];
5845
- const prefix = "#".repeat(Math.min(6, level));
5846
- if (section.title) out.push(`${prefix} ${section.title}`);
5847
- if (section.body.trim()) out.push(section.body);
5848
- for (const child of section.children) out.push(...renderSectionInline(child, level + 1));
5849
- return out;
5850
- }
5851
- function renderInfoboxBody(rows) {
5852
- return rows.map((r) => `- **${r.key}:** ${r.value}`).join("\n");
5853
- }
5854
- function renderCategoryBody(members, subcategories) {
5855
- const parts = [];
5856
- if (members.length > 0) {
5857
- parts.push("## Pages");
5858
- parts.push(members.map((m) => `- [[${m}]]`).join("\n"));
5859
- }
5860
- if (subcategories.length > 0) {
5861
- parts.push("## Sub-categories");
5862
- parts.push(subcategories.map((s) => `- ${s}`).join("\n"));
5863
- }
5864
- return parts.join("\n\n");
5865
- }
5866
- /**
5867
- * Replace `[[Title]]` / `[[Title|Alias]]` in markdown with
5868
- * `[[docId|label]]` using the title→docId map. Unresolved titles fall
5869
- * back to plain text (their alias or original title).
5870
- */
5871
- function rewriteLinks(markdown, titleToDocId) {
5872
- return markdown.replace(/\[\[([^\]|]+?)(?:\|([^\]]+?))?\]\]/g, (_match, target, alias) => {
5873
- const title = target.trim();
5874
- const docId = titleToDocId.get(title);
5875
- const display = (alias && alias.trim().length > 0 ? alias : title).trim();
5876
- if (!docId) return display;
5877
- return `[[${docId}|${display}]]`;
5878
- });
5879
- }
5880
-
5881
- //#endregion
5882
- //#region packages/cli/src/commands/wiki/connect.ts
5883
- /**
5884
- * Open a DocumentManager session for the wiki command, mirroring the
5885
- * auth/register flow that CLIConnection uses but using the modern public API.
5886
- *
5887
- * Reuses the CLI's Ed25519 keypair handling (loadOrCreateKeypair, signChallenge)
5888
- * so the wiki command authenticates with the same identity as every other
5889
- * subcommand.
5890
- */
5891
- async function openSession(config) {
5892
- const keypair = await loadOrCreateKeypair(config.keyFile);
5893
- const sign = (challenge) => Promise.resolve(signChallenge(challenge, keypair.privateKey));
5894
- const dm = new DocumentManager({
5895
- url: config.url,
5896
- name: config.name ?? "Wiki Extractor",
5897
- color: config.color,
5898
- quiet: config.quiet
5899
- });
5900
- try {
5901
- await dm.client.loginWithKey(keypair.publicKeyB64, sign);
5902
- } catch (err) {
5903
- const status = err?.status ?? err?.response?.status;
5904
- if (status === 404 || status === 422) {
5905
- if (!config.quiet) console.error("[abracadabra] Key not registered, creating new account...");
5906
- await dm.client.registerWithKey({
5907
- publicKey: keypair.publicKeyB64,
5908
- username: (config.name ?? "wiki-extractor").replace(/\s+/g, "-").toLowerCase(),
5909
- displayName: config.name ?? "Wiki Extractor",
5910
- deviceName: "CLI Wiki",
5911
- inviteCode: config.inviteCode
5912
- });
5913
- await dm.client.loginWithKey(keypair.publicKeyB64, sign);
5914
- } else throw err;
5915
- }
5916
- await dm.connect();
5917
- const rootDocId = dm.rootDocId;
5918
- if (!rootDocId) throw new Error("Connected but no rootDocId — server has no spaces.");
5919
- return {
5920
- dm,
5921
- rootDocId
5922
- };
5923
- }
5924
-
5925
- //#endregion
5926
- //#region packages/cli/src/commands/wiki/index.ts
5927
- registerCommand({
5928
- name: "wiki",
5929
- aliases: ["wikipedia"],
5930
- description: "Fetch Wikipedia articles into a graph of docs (streaming).",
5931
- usage: [
5932
- "wiki \"<Article Title>\"",
5933
- " mode=single|split single doc per article OR split into sections+infobox [split]",
5934
- " depth=<n> follow internal links to depth N [1]",
5935
- " category-depth=<n> recurse into sub-categories [1]",
5936
- " lang=<code> wiki language [en]",
5937
- " domain=<host> 3rd-party MediaWiki host (overrides lang)",
5938
- " parent=<docId> parent doc for the new graph [active space root]",
5939
- " user-agent=<str> Api-User-Agent header (REQUIRED by Wikimedia etiquette)",
5940
- " rate=<rps> max wikipedia requests per second [3]",
5941
- " --include-categories expand each article's categories into nested graphs",
5942
- " --dry-run fetch only the entry article, print outline, no writes"
5943
- ].join("\n"),
5944
- async run(_conn, args) {
5945
- const opts = parseOptions(args);
5946
- if (typeof opts === "string") return opts;
5947
- const log = (msg) => {
5948
- if (!args.flags.has("quiet") && !args.flags.has("q")) console.error(`[wiki] ${msg}`);
5949
- };
5950
- const wp = new WikipediaClient({
5951
- lang: opts.lang,
5952
- domain: opts.domain,
5953
- userAgent: opts.userAgent,
5954
- rate: opts.rate
5955
- });
5956
- if (opts.dryRun) {
5957
- log(`fetch ${opts.title}`);
5958
- const doc = await wp.fetchArticle(opts.title);
5959
- if (!doc) return `Article not found: "${opts.title}"`;
5960
- const snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? opts.title));
5961
- return [
5962
- `Entry: ${snap.title}`,
5963
- `URL: ${snap.url ?? "(none)"}`,
5964
- `Internal links: ${snap.linkTitles.length}`,
5965
- `Categories: ${snap.categories.length}`,
5966
- `Sections: ${snap.sections.length}`,
5967
- `Has infobox: ${snap.infobox && snap.infobox.length > 0 ? "yes" : "no"}`,
5968
- "",
5969
- "── Sections ──",
5970
- printSections(snap.sections, "")
5971
- ].join("\n");
5972
- }
5973
- const env = globalThis.process?.env ?? {};
5974
- const url = env["ABRA_URL"];
5975
- if (!url) return "ABRA_URL is required to write to the server. Set it or pass --dry-run.";
5976
- const { dm } = await openSession({
5977
- url,
5978
- name: env["ABRA_NAME"],
5979
- color: env["ABRA_COLOR"],
5980
- inviteCode: env["ABRA_INVITE_CODE"],
5981
- keyFile: env["ABRA_KEY_FILE"],
5982
- quiet: args.flags.has("quiet") || args.flags.has("q")
5983
- });
5984
- try {
5985
- const result = await runStreaming(dm, wp, opts, log);
5986
- return [`Done. Created ${result.articleCount} articles${result.categoryCount > 0 ? ` + ${result.categoryCount} categories` : ""}.`, `Root: ${result.rootDocId}`].join("\n");
5987
- } finally {
5988
- await dm.destroy().catch(() => {});
5989
- }
5990
- }
5991
- });
5992
- async function runStreaming(dm, wp, opts, log) {
5993
- const titleToDocId = /* @__PURE__ */ new Map();
5994
- const fetched = /* @__PURE__ */ new Map();
5995
- const childrenCreated = /* @__PURE__ */ new Set();
5996
- const categoryToDocId = /* @__PURE__ */ new Map();
5997
- let categoriesContainerId = null;
5998
- log(`fetch ${opts.title}`);
5999
- const entryDoc = await wp.fetchArticle(opts.title);
6000
- if (!entryDoc) throw new Error(`Article not found: "${opts.title}"`);
6001
- const entryTitle = canonicalTitle(entryDoc.title?.() ?? opts.title);
6002
- const entrySnap = snapshotArticle(entryDoc, entryTitle);
6003
- fetched.set(entryTitle, entrySnap);
6004
- const rootEntry = dm.tree.create({
6005
- parentId: opts.parentDocId ?? null,
6006
- label: entryTitle,
6007
- type: "graph",
6008
- meta: { icon: ICONS.graph }
6009
- });
6010
- log(`+ ${rootEntry.id.slice(0, 8)}… ${entryTitle} (graph)`);
6011
- const entryArticleId = createArticleShell(dm, entrySnap, rootEntry.id, log);
6012
- titleToDocId.set(entryTitle, entryArticleId);
6013
- const queue = [{
6014
- title: entryTitle,
6015
- depth: 0
6016
- }];
6017
- let articleCount = 0;
6018
- while (queue.length > 0) {
6019
- const { title, depth } = queue.shift();
6020
- const articleDocId = titleToDocId.get(title);
6021
- let snap = fetched.get(title);
6022
- if (!snap) {
6023
- log(`fetch [d${depth}] ${title}`);
6024
- try {
6025
- const doc = await wp.fetchArticle(title);
6026
- if (!doc) {
6027
- log(` not found — leaving stub`);
6028
- continue;
6029
- }
6030
- snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? title));
6031
- fetched.set(title, snap);
6032
- } catch (err) {
6033
- log(`! fetch failed: ${err?.message ?? err}`);
6034
- continue;
6035
- }
6036
- }
6037
- if (opts.mode === "split" && !childrenCreated.has(title)) {
6038
- createArticleChildren(dm, snap, articleDocId, log);
6039
- childrenCreated.add(title);
6040
- }
6041
- if (depth < opts.depth) for (const linkTitle of snap.linkTitles) {
6042
- if (titleToDocId.has(linkTitle)) continue;
6043
- const shell = dm.tree.create({
6044
- parentId: rootEntry.id,
6045
- label: linkTitle,
6046
- type: "doc",
6047
- meta: { icon: ICONS.article }
6048
- });
6049
- titleToDocId.set(linkTitle, shell.id);
6050
- queue.push({
6051
- title: linkTitle,
6052
- depth: depth + 1
6053
- });
6054
- log(`+ ${shell.id.slice(0, 8)}… ${linkTitle} (doc, shell)`);
6055
- }
6056
- if (opts.includeCategories && snap.categories.length > 0) {
6057
- if (!categoriesContainerId) {
6058
- const c = dm.tree.create({
6059
- parentId: rootEntry.id,
6060
- label: "Categories",
6061
- type: "graph",
6062
- meta: { icon: ICONS.categories }
6063
- });
6064
- categoriesContainerId = c.id;
6065
- log(`+ ${c.id.slice(0, 8)}… Categories (graph)`);
6066
- }
6067
- for (const catTitle of snap.categories) {
6068
- if (categoryToDocId.has(catTitle)) continue;
6069
- const cat = dm.tree.create({
6070
- parentId: categoriesContainerId,
6071
- label: prettyCategoryLabel(catTitle),
6072
- type: "graph",
6073
- meta: { icon: ICONS.category }
6074
- });
6075
- categoryToDocId.set(catTitle, cat.id);
6076
- log(`+ ${cat.id.slice(0, 8)}… ${prettyCategoryLabel(catTitle)} (graph, cat)`);
6077
- }
6078
- }
6079
- const body = opts.mode === "split" ? renderArticleLead(snap) : renderArticleSingleDoc(snap);
6080
- if (body.trim().length > 0) {
6081
- const rewritten = rewriteLinks(body, titleToDocId);
6082
- try {
6083
- await dm.content.write(articleDocId, rewritten);
6084
- log(`✓ body ${title}`);
6085
- } catch (err) {
6086
- log(`! body write failed for ${title}: ${err?.message ?? err}`);
6087
- }
6088
- }
6089
- if (opts.mode === "split") await writeChildrenBodies(dm, snap, articleDocId, titleToDocId, log);
6090
- articleCount++;
6091
- }
6092
- let categoryCount = 0;
6093
- if (opts.includeCategories && categoryToDocId.size > 0) for (const [catTitle, catDocId] of categoryToDocId) {
6094
- log(`category ${catTitle}`);
6095
- try {
6096
- const members = await wp.fetchCategoryPages(catTitle, opts.categoryDepth > 0, Math.max(0, opts.categoryDepth));
6097
- const memberArticles = [];
6098
- const subcats = [];
6099
- for (const m of members) if (m.type === "subcat") subcats.push(prettyCategoryLabel(m.title));
6100
- else memberArticles.push(m.title);
6101
- const rewritten = rewriteLinks(renderCategoryBody(memberArticles, subcats), titleToDocId);
6102
- if (rewritten.trim().length > 0) {
6103
- await dm.content.write(catDocId, rewritten);
6104
- log(`✓ body category ${catTitle}`);
6105
- }
6106
- categoryCount++;
6107
- } catch (err) {
6108
- log(`! category ${catTitle}: ${err?.message ?? err}`);
6109
- }
6110
- }
6111
- return {
6112
- rootDocId: rootEntry.id,
6113
- articleCount,
6114
- categoryCount
6115
- };
6116
- }
6117
- function createArticleShell(dm, article, parentId, log) {
6118
- const meta = { icon: ICONS.article };
6119
- if (article.url) meta.url = article.url;
6120
- const entry = dm.tree.create({
6121
- parentId,
6122
- label: article.title,
6123
- type: "doc",
6124
- meta
6125
- });
6126
- log(`+ ${entry.id.slice(0, 8)}… ${article.title} (doc)`);
6127
- return entry.id;
6128
- }
6129
- /**
6130
- * Create section + infobox child docs for a split-mode article. Returns nothing
6131
- * — children get bodies written later in writeChildrenBodies.
6132
- */
6133
- function createArticleChildren(dm, article, articleDocId, log) {
6134
- if (article.infobox && article.infobox.length > 0) {
6135
- const ib = dm.tree.create({
6136
- parentId: articleDocId,
6137
- label: "Infobox",
6138
- type: "outline",
6139
- meta: { icon: ICONS.infobox }
6140
- });
6141
- log(` + ${ib.id.slice(0, 8)}… Infobox (outline)`);
6142
- article._infoboxDocId = ib.id;
6143
- }
6144
- for (const section of article.sections) createSectionShell(dm, section, articleDocId, log);
6145
- }
6146
- function createSectionShell(dm, section, parentDocId, log) {
6147
- const hasChildren = section.children.length > 0;
6148
- if (!section.body.trim() && !hasChildren) return;
6149
- const { type, icon } = pickSectionType(section);
6150
- const entry = dm.tree.create({
6151
- parentId: parentDocId,
6152
- label: section.title || "Untitled section",
6153
- type,
6154
- meta: { icon }
6155
- });
6156
- log(` + ${entry.id.slice(0, 8)}… ${entry.label} (${type})`);
6157
- section._docId = entry.id;
6158
- for (const child of section.children) createSectionShell(dm, child, entry.id, log);
6159
- }
6160
- async function writeChildrenBodies(dm, article, _articleDocId, titleToDocId, log) {
6161
- const infoboxDocId = article._infoboxDocId;
6162
- if (infoboxDocId && article.infobox && article.infobox.length > 0) try {
6163
- await dm.content.write(infoboxDocId, renderInfoboxBody(article.infobox));
6164
- } catch (err) {
6165
- log(`! infobox body write failed: ${err?.message ?? err}`);
6166
- }
6167
- for (const section of article.sections) await writeSectionBody(dm, section, titleToDocId, log);
6168
- }
6169
- async function writeSectionBody(dm, section, titleToDocId, log) {
6170
- const docId = section._docId;
6171
- if (docId && section.body.trim().length > 0) try {
6172
- await dm.content.write(docId, rewriteLinks(section.body, titleToDocId));
6173
- } catch (err) {
6174
- log(`! section body write failed for ${section.title}: ${err?.message ?? err}`);
6175
- }
6176
- for (const child of section.children) await writeSectionBody(dm, child, titleToDocId, log);
6177
- }
6178
- function parseOptions(args) {
6179
- const title = args.positional[0]?.trim() || args.params["title"];
6180
- if (!title) return "Missing required positional argument: <title>. Example: abracadabra wiki \"Toronto Raptors\"";
6181
- const env = globalThis.process?.env ?? {};
6182
- const userAgent = args.params["user-agent"] || args.params["userAgent"] || env["ABRA_WIKI_USER_AGENT"];
6183
- if (!userAgent) return ["Missing required parameter: user-agent=\"your-name (you@example.com)\"", "(Wikimedia etiquette requires an Api-User-Agent header. Pass user-agent=... or set ABRA_WIKI_USER_AGENT.)"].join("\n");
6184
- const mode = args.params["mode"] ?? "split";
6185
- if (mode !== "single" && mode !== "split") return `Invalid mode "${mode}". Use mode=single or mode=split.`;
6186
- const depth = parseIntOr(args.params["depth"], 1);
6187
- const categoryDepth = parseIntOr(args.params["category-depth"] ?? args.params["categoryDepth"], 1);
6188
- const rate = parseFloatOr(args.params["rate"], 3);
6189
- return {
6190
- title,
6191
- mode,
6192
- depth,
6193
- categoryDepth,
6194
- includeCategories: args.flags.has("include-categories") || args.flags.has("includeCategories"),
6195
- lang: args.params["lang"] ?? "en",
6196
- domain: args.params["domain"],
6197
- parentDocId: args.params["parent"],
6198
- userAgent,
6199
- rate,
6200
- dryRun: args.flags.has("dry-run") || args.flags.has("dryRun")
6201
- };
6202
- }
6203
- function parseIntOr(s, fallback) {
6204
- if (!s) return fallback;
6205
- const n = Number.parseInt(s, 10);
6206
- return Number.isFinite(n) && n >= 0 ? n : fallback;
6207
- }
6208
- function parseFloatOr(s, fallback) {
6209
- if (!s) return fallback;
6210
- const n = Number.parseFloat(s);
6211
- return Number.isFinite(n) && n > 0 ? n : fallback;
6212
- }
6213
- function printSections(sections, indent) {
6214
- const lines = [];
6215
- for (const s of sections) {
6216
- const hint = s.body ? ` (${s.body.length}b)` : "";
6217
- lines.push(`${indent}- ${s.title}${hint}${s.children.length > 0 ? ` [${s.children.length} sub]` : ""}`);
6218
- if (s.children.length > 0) lines.push(printSections(s.children, indent + " "));
6219
- }
6220
- return lines.join("\n");
6221
- }
6222
-
6223
5532
  //#endregion
6224
5533
  //#region packages/cli/src/index.ts
6225
5534
  /**
@@ -6243,9 +5552,7 @@ const NO_CONNECT_COMMANDS = new Set([
6243
5552
  "v",
6244
5553
  "page-types",
6245
5554
  "types",
6246
- "doctypes",
6247
- "wiki",
6248
- "wikipedia"
5555
+ "doctypes"
6249
5556
  ]);
6250
5557
  async function main() {
6251
5558
  const args = parseArgs(process.argv);