@abraca/cli 2.26.0 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,10 +37,6 @@ node_fs = __toESM(node_fs);
37
37
  let node_os = require("node:os");
38
38
  let node_path = require("node:path");
39
39
  node_path = __toESM(node_path);
40
- let wtf_wikipedia = require("wtf_wikipedia");
41
- wtf_wikipedia = __toESM(wtf_wikipedia);
42
- let wtf_plugin_api = require("wtf-plugin-api");
43
- wtf_plugin_api = __toESM(wtf_plugin_api);
44
40
 
45
41
  //#region packages/cli/src/parser.ts
46
42
  /**
@@ -5562,695 +5558,6 @@ registerCommand({
5562
5558
  }
5563
5559
  });
5564
5560
 
5565
- //#endregion
5566
- //#region packages/cli/src/commands/wiki/wikipedia.ts
5567
- /**
5568
- * Rate-limited wrapper around wtf_wikipedia + wtf-plugin-api.
5569
- *
5570
- * Responsibilities:
5571
- * - Throttle requests to respect Wikimedia API etiquette
5572
- * - Cache parsed Documents by canonical title
5573
- * - Resolve redirects so callers always see the redirect target
5574
- * - Expose getCategoryPages via wtf-plugin-api
5575
- */
5576
- let pluginExtended = false;
5577
- function ensurePlugin() {
5578
- if (pluginExtended) return;
5579
- wtf_wikipedia.default.extend(wtf_plugin_api.default);
5580
- pluginExtended = true;
5581
- }
5582
- /** A token-bucket-ish throttle: at most `rate` calls per second, FIFO. */
5583
- var RateLimiter = class {
5584
- lastTickMs = 0;
5585
- constructor(intervalMs) {
5586
- this.intervalMs = intervalMs;
5587
- }
5588
- async wait() {
5589
- const now = Date.now();
5590
- const earliest = this.lastTickMs + this.intervalMs;
5591
- if (now < earliest) await new Promise((r) => setTimeout(r, earliest - now));
5592
- this.lastTickMs = Math.max(now, earliest);
5593
- }
5594
- };
5595
- var WikipediaClient = class {
5596
- cache = /* @__PURE__ */ new Map();
5597
- redirects = /* @__PURE__ */ new Map();
5598
- limiter;
5599
- fetchOpts;
5600
- constructor(config) {
5601
- this.config = config;
5602
- ensurePlugin();
5603
- this.limiter = new RateLimiter(Math.max(50, Math.floor(1e3 / Math.max(.1, config.rate))));
5604
- this.fetchOpts = {
5605
- lang: config.lang,
5606
- "Api-User-Agent": config.userAgent,
5607
- follow_redirects: true
5608
- };
5609
- if (config.domain) this.fetchOpts.domain = config.domain;
5610
- }
5611
- /**
5612
- * Fetch and parse a Wikipedia article.
5613
- * - Returns the cached Document if we've seen this title before.
5614
- * - Follows redirects and caches under both source and target titles.
5615
- * - Returns null when the page does not exist.
5616
- */
5617
- async fetchArticle(rawTitle) {
5618
- const title = canonicalTitle(rawTitle);
5619
- if (this.cache.has(title)) return this.cache.get(title);
5620
- if (this.redirects.has(title)) {
5621
- const target = this.redirects.get(title);
5622
- return this.cache.get(target) ?? null;
5623
- }
5624
- await this.limiter.wait();
5625
- let doc;
5626
- try {
5627
- doc = await wtf_wikipedia.default.fetch(title, this.fetchOpts);
5628
- } catch (err) {
5629
- throw new Error(`Wikipedia fetch failed for "${title}": ${err?.message ?? err}`);
5630
- }
5631
- if (!doc) return null;
5632
- if (typeof doc.isRedirect === "function" && doc.isRedirect()) {
5633
- const target = doc.redirectTo?.()?.page;
5634
- if (typeof target === "string") {
5635
- this.redirects.set(title, canonicalTitle(target));
5636
- return await this.fetchArticle(target);
5637
- }
5638
- }
5639
- const resolvedTitle = canonicalTitle(doc.title?.() ?? title);
5640
- this.cache.set(resolvedTitle, doc);
5641
- if (resolvedTitle !== title) this.redirects.set(title, resolvedTitle);
5642
- return doc;
5643
- }
5644
- /**
5645
- * Fetch the member pages of a category (and optionally sub-categories).
5646
- * @param category Category title (with or without "Category:" prefix).
5647
- * @param recursive Whether to traverse sub-categories.
5648
- * @param maxDepth Recursion depth when recursive=true.
5649
- */
5650
- async fetchCategoryPages(category, recursive, maxDepth) {
5651
- await this.limiter.wait();
5652
- const opts = {
5653
- lang: this.config.lang,
5654
- "Api-User-Agent": this.config.userAgent,
5655
- recursive,
5656
- maxDepth
5657
- };
5658
- if (this.config.domain) opts.domain = this.config.domain;
5659
- return (await wtf_wikipedia.default.getCategoryPages(category, opts) ?? []).map((m) => ({
5660
- title: canonicalTitle(m.title),
5661
- type: m.type === "subcat" ? "subcat" : "page"
5662
- }));
5663
- }
5664
- };
5665
- /** Normalize a Wikipedia title — trim, collapse spaces, strip leading/trailing colons. */
5666
- function canonicalTitle(s) {
5667
- return (s ?? "").toString().replace(/_/g, " ").replace(/\s+/g, " ").trim();
5668
- }
5669
- /** Detect a category-namespaced title. */
5670
- const CATEGORY_PREFIX = /^(Category|Catégorie|Kategorie|Categoría|Categoria|Categorie|Kategoria):/i;
5671
- function isCategoryTitle(title) {
5672
- return CATEGORY_PREFIX.test(title);
5673
- }
5674
- /** Strip the "Category:" prefix for display. */
5675
- function stripCategoryPrefix(title) {
5676
- return title.replace(CATEGORY_PREFIX, "").trim();
5677
- }
5678
-
5679
- //#endregion
5680
- //#region packages/cli/src/commands/wiki/snapshot.ts
5681
- function snapshotArticle(doc, title) {
5682
- return {
5683
- title,
5684
- linkTitles: collectLinkTitles(doc),
5685
- categories: collectCategories(doc),
5686
- sections: snapshotSections(doc.sections?.() ?? []),
5687
- infobox: snapshotInfobox(doc.infobox?.()),
5688
- lead: leadParagraph(doc),
5689
- url: typeof doc.url === "function" ? doc.url() : null
5690
- };
5691
- }
5692
- function prettyCategoryLabel(catTitle) {
5693
- return stripCategoryPrefix(catTitle);
5694
- }
5695
- function collectLinkTitles(doc) {
5696
- const links = doc.links?.() ?? [];
5697
- const out = /* @__PURE__ */ new Set();
5698
- for (const l of links) {
5699
- if (!l) continue;
5700
- const page = typeof l.page === "function" ? l.page() : null;
5701
- if (typeof page !== "string" || page.length === 0) continue;
5702
- if (isCategoryTitle(page)) continue;
5703
- out.add(canonicalTitle(page));
5704
- }
5705
- return [...out];
5706
- }
5707
- function collectCategories(doc) {
5708
- const out = [];
5709
- for (const c of doc.categories?.() ?? []) {
5710
- const norm = canonicalTitle(c);
5711
- if (norm) out.push(norm);
5712
- }
5713
- return out;
5714
- }
5715
- function snapshotSections(rawSections) {
5716
- const all = rawSections.map((s) => ({
5717
- raw: s,
5718
- title: s.title?.() || "",
5719
- parentRef: typeof s.parent === "function" ? s.parent() : null,
5720
- children: []
5721
- }));
5722
- const byRaw = /* @__PURE__ */ new Map();
5723
- for (const s of all) byRaw.set(s.raw, s);
5724
- const roots = [];
5725
- for (const s of all) if (s.parentRef && byRaw.has(s.parentRef)) byRaw.get(s.parentRef).children.push(materialize(s));
5726
- else roots.push(s);
5727
- return roots.map(materialize);
5728
- }
5729
- function materialize(node) {
5730
- const lists = node.raw.lists?.() ?? [];
5731
- const paragraphs = node.raw.paragraphs?.() ?? [];
5732
- let listLength = 0;
5733
- for (const l of lists) {
5734
- const lines = l.lines?.() ?? [];
5735
- listLength += lines.length;
5736
- }
5737
- const isList = lists.length > 0 && (paragraphs.length === 0 || listLength >= paragraphs.length * 2);
5738
- const bodyParts = [];
5739
- for (const p of paragraphs) {
5740
- const md = paragraphMarkdown(p);
5741
- if (md) bodyParts.push(md);
5742
- }
5743
- for (const l of lists) {
5744
- const lines = l.lines?.() ?? [];
5745
- for (const line of lines) {
5746
- const text = lineText(line);
5747
- if (text) bodyParts.push(`- ${text}`);
5748
- }
5749
- }
5750
- return {
5751
- title: node.title,
5752
- body: bodyParts.join("\n\n"),
5753
- isList,
5754
- listLength,
5755
- children: node.children
5756
- };
5757
- }
5758
- function snapshotInfobox(box) {
5759
- if (!box) return void 0;
5760
- const data = typeof box.json === "function" ? box.json() : null;
5761
- if (!data || typeof data !== "object") return void 0;
5762
- const rows = [];
5763
- for (const [key, val] of Object.entries(data)) {
5764
- const value = stringifyInfoboxValue(val);
5765
- if (!value) continue;
5766
- rows.push({
5767
- key: humanKey(key),
5768
- value
5769
- });
5770
- }
5771
- return rows.length > 0 ? rows : void 0;
5772
- }
5773
- function stringifyInfoboxValue(val) {
5774
- if (val == null) return "";
5775
- if (typeof val === "string") return val;
5776
- if (typeof val === "number" || typeof val === "boolean") return String(val);
5777
- if (Array.isArray(val)) return val.map(stringifyInfoboxValue).filter(Boolean).join(", ");
5778
- if (typeof val === "object") {
5779
- const o = val;
5780
- if (typeof o.text === "string") return o.text;
5781
- if (typeof o.number === "number") return String(o.number);
5782
- }
5783
- return "";
5784
- }
5785
- function humanKey(k) {
5786
- return k.replace(/_/g, " ").replace(/^./, (m) => m.toUpperCase());
5787
- }
5788
- function leadParagraph(doc) {
5789
- const first = (doc.paragraphs?.() ?? [])[0];
5790
- if (!first) return "";
5791
- return paragraphMarkdown(first);
5792
- }
5793
- /**
5794
- * Render a paragraph as markdown, replacing internal links with `[[Title]]`.
5795
- * The streaming orchestrator's link rewriter later swaps `[[Title]]` →
5796
- * `[[docId|label]]` once IDs are known.
5797
- */
5798
- function paragraphMarkdown(paragraph) {
5799
- const sentences = paragraph.sentences?.() ?? [];
5800
- const out = [];
5801
- for (const s of sentences) out.push(sentenceWithWikilinks(s));
5802
- return out.join(" ").trim();
5803
- }
5804
- function sentenceWithWikilinks(sentence) {
5805
- const text = (sentence.text?.() ?? "").toString();
5806
- const links = sentence.links?.() ?? [];
5807
- if (links.length === 0) return text;
5808
- let result = text;
5809
- const replacements = links.map((l) => {
5810
- const page = typeof l.page === "function" ? l.page() : null;
5811
- const display = typeof l.text === "function" ? l.text() : null;
5812
- if (typeof page !== "string" || page.length === 0) return null;
5813
- if (isCategoryTitle(page)) return null;
5814
- const shown = display && display.length > 0 ? display : page;
5815
- return {
5816
- page: canonicalTitle(page),
5817
- shown
5818
- };
5819
- }).filter((x) => x !== null).sort((a, b) => b.shown.length - a.shown.length);
5820
- for (const { page, shown } of replacements) {
5821
- if (!result.includes(shown)) continue;
5822
- const replacement = shown === page ? `[[${page}]]` : `[[${page}|${shown}]]`;
5823
- result = result.replace(shown, replacement);
5824
- }
5825
- return result;
5826
- }
5827
- function lineText(line) {
5828
- if (!line) return "";
5829
- if (typeof line === "string") return line;
5830
- if (typeof line.text === "string") return line.text;
5831
- if (typeof line.text === "function") return line.text();
5832
- return "";
5833
- }
5834
-
5835
- //#endregion
5836
- //#region packages/cli/src/commands/wiki/render.ts
5837
- const ICONS = {
5838
- graph: "git-fork",
5839
- article: "book-open",
5840
- category: "tag",
5841
- infobox: "info",
5842
- outline: "list",
5843
- gallery: "images",
5844
- section: "pilcrow",
5845
- categories: "tags"
5846
- };
5847
- /** Decide a page type for a section based on its shape. */
5848
- function pickSectionType(section) {
5849
- if (section.children.length > 0) return {
5850
- type: "outline",
5851
- icon: ICONS.outline
5852
- };
5853
- if (section.isList && section.listLength >= 5) return {
5854
- type: "outline",
5855
- icon: ICONS.outline
5856
- };
5857
- return {
5858
- type: "doc",
5859
- icon: ICONS.section
5860
- };
5861
- }
5862
- /** Render the lead paragraph as the article-doc body. */
5863
- function renderArticleLead(article) {
5864
- return article.lead ?? "";
5865
- }
5866
- /** Render the article as a single doc, sections + infobox inlined. */
5867
- function renderArticleSingleDoc(article) {
5868
- const parts = [];
5869
- if (article.lead) parts.push(article.lead);
5870
- if (article.infobox && article.infobox.length > 0) parts.push("## Infobox", renderInfoboxBody(article.infobox));
5871
- for (const section of article.sections) parts.push(...renderSectionInline(section, 2));
5872
- return parts.join("\n\n");
5873
- }
5874
- function renderSectionInline(section, level) {
5875
- const out = [];
5876
- const prefix = "#".repeat(Math.min(6, level));
5877
- if (section.title) out.push(`${prefix} ${section.title}`);
5878
- if (section.body.trim()) out.push(section.body);
5879
- for (const child of section.children) out.push(...renderSectionInline(child, level + 1));
5880
- return out;
5881
- }
5882
- function renderInfoboxBody(rows) {
5883
- return rows.map((r) => `- **${r.key}:** ${r.value}`).join("\n");
5884
- }
5885
- function renderCategoryBody(members, subcategories) {
5886
- const parts = [];
5887
- if (members.length > 0) {
5888
- parts.push("## Pages");
5889
- parts.push(members.map((m) => `- [[${m}]]`).join("\n"));
5890
- }
5891
- if (subcategories.length > 0) {
5892
- parts.push("## Sub-categories");
5893
- parts.push(subcategories.map((s) => `- ${s}`).join("\n"));
5894
- }
5895
- return parts.join("\n\n");
5896
- }
5897
- /**
5898
- * Replace `[[Title]]` / `[[Title|Alias]]` in markdown with
5899
- * `[[docId|label]]` using the title→docId map. Unresolved titles fall
5900
- * back to plain text (their alias or original title).
5901
- */
5902
- function rewriteLinks(markdown, titleToDocId) {
5903
- return markdown.replace(/\[\[([^\]|]+?)(?:\|([^\]]+?))?\]\]/g, (_match, target, alias) => {
5904
- const title = target.trim();
5905
- const docId = titleToDocId.get(title);
5906
- const display = (alias && alias.trim().length > 0 ? alias : title).trim();
5907
- if (!docId) return display;
5908
- return `[[${docId}|${display}]]`;
5909
- });
5910
- }
5911
-
5912
- //#endregion
5913
- //#region packages/cli/src/commands/wiki/connect.ts
5914
- /**
5915
- * Open a DocumentManager session for the wiki command, mirroring the
5916
- * auth/register flow that CLIConnection uses but using the modern public API.
5917
- *
5918
- * Reuses the CLI's Ed25519 keypair handling (loadOrCreateKeypair, signChallenge)
5919
- * so the wiki command authenticates with the same identity as every other
5920
- * subcommand.
5921
- */
5922
- async function openSession(config) {
5923
- const keypair = await loadOrCreateKeypair(config.keyFile);
5924
- const sign = (challenge) => Promise.resolve(signChallenge(challenge, keypair.privateKey));
5925
- const dm = new _abraca_dabra.DocumentManager({
5926
- url: config.url,
5927
- name: config.name ?? "Wiki Extractor",
5928
- color: config.color,
5929
- quiet: config.quiet
5930
- });
5931
- try {
5932
- await dm.client.loginWithKey(keypair.publicKeyB64, sign);
5933
- } catch (err) {
5934
- const status = err?.status ?? err?.response?.status;
5935
- if (status === 404 || status === 422) {
5936
- if (!config.quiet) console.error("[abracadabra] Key not registered, creating new account...");
5937
- await dm.client.registerWithKey({
5938
- publicKey: keypair.publicKeyB64,
5939
- username: (config.name ?? "wiki-extractor").replace(/\s+/g, "-").toLowerCase(),
5940
- displayName: config.name ?? "Wiki Extractor",
5941
- deviceName: "CLI Wiki",
5942
- inviteCode: config.inviteCode
5943
- });
5944
- await dm.client.loginWithKey(keypair.publicKeyB64, sign);
5945
- } else throw err;
5946
- }
5947
- await dm.connect();
5948
- const rootDocId = dm.rootDocId;
5949
- if (!rootDocId) throw new Error("Connected but no rootDocId — server has no spaces.");
5950
- return {
5951
- dm,
5952
- rootDocId
5953
- };
5954
- }
5955
-
5956
- //#endregion
5957
- //#region packages/cli/src/commands/wiki/index.ts
5958
- registerCommand({
5959
- name: "wiki",
5960
- aliases: ["wikipedia"],
5961
- description: "Fetch Wikipedia articles into a graph of docs (streaming).",
5962
- usage: [
5963
- "wiki \"<Article Title>\"",
5964
- " mode=single|split single doc per article OR split into sections+infobox [split]",
5965
- " depth=<n> follow internal links to depth N [1]",
5966
- " category-depth=<n> recurse into sub-categories [1]",
5967
- " lang=<code> wiki language [en]",
5968
- " domain=<host> 3rd-party MediaWiki host (overrides lang)",
5969
- " parent=<docId> parent doc for the new graph [active space root]",
5970
- " user-agent=<str> Api-User-Agent header (REQUIRED by Wikimedia etiquette)",
5971
- " rate=<rps> max wikipedia requests per second [3]",
5972
- " --include-categories expand each article's categories into nested graphs",
5973
- " --dry-run fetch only the entry article, print outline, no writes"
5974
- ].join("\n"),
5975
- async run(_conn, args) {
5976
- const opts = parseOptions(args);
5977
- if (typeof opts === "string") return opts;
5978
- const log = (msg) => {
5979
- if (!args.flags.has("quiet") && !args.flags.has("q")) console.error(`[wiki] ${msg}`);
5980
- };
5981
- const wp = new WikipediaClient({
5982
- lang: opts.lang,
5983
- domain: opts.domain,
5984
- userAgent: opts.userAgent,
5985
- rate: opts.rate
5986
- });
5987
- if (opts.dryRun) {
5988
- log(`fetch ${opts.title}`);
5989
- const doc = await wp.fetchArticle(opts.title);
5990
- if (!doc) return `Article not found: "${opts.title}"`;
5991
- const snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? opts.title));
5992
- return [
5993
- `Entry: ${snap.title}`,
5994
- `URL: ${snap.url ?? "(none)"}`,
5995
- `Internal links: ${snap.linkTitles.length}`,
5996
- `Categories: ${snap.categories.length}`,
5997
- `Sections: ${snap.sections.length}`,
5998
- `Has infobox: ${snap.infobox && snap.infobox.length > 0 ? "yes" : "no"}`,
5999
- "",
6000
- "── Sections ──",
6001
- printSections(snap.sections, "")
6002
- ].join("\n");
6003
- }
6004
- const env = globalThis.process?.env ?? {};
6005
- const url = env["ABRA_URL"];
6006
- if (!url) return "ABRA_URL is required to write to the server. Set it or pass --dry-run.";
6007
- const { dm } = await openSession({
6008
- url,
6009
- name: env["ABRA_NAME"],
6010
- color: env["ABRA_COLOR"],
6011
- inviteCode: env["ABRA_INVITE_CODE"],
6012
- keyFile: env["ABRA_KEY_FILE"],
6013
- quiet: args.flags.has("quiet") || args.flags.has("q")
6014
- });
6015
- try {
6016
- const result = await runStreaming(dm, wp, opts, log);
6017
- return [`Done. Created ${result.articleCount} articles${result.categoryCount > 0 ? ` + ${result.categoryCount} categories` : ""}.`, `Root: ${result.rootDocId}`].join("\n");
6018
- } finally {
6019
- await dm.destroy().catch(() => {});
6020
- }
6021
- }
6022
- });
6023
- async function runStreaming(dm, wp, opts, log) {
6024
- const titleToDocId = /* @__PURE__ */ new Map();
6025
- const fetched = /* @__PURE__ */ new Map();
6026
- const childrenCreated = /* @__PURE__ */ new Set();
6027
- const categoryToDocId = /* @__PURE__ */ new Map();
6028
- let categoriesContainerId = null;
6029
- log(`fetch ${opts.title}`);
6030
- const entryDoc = await wp.fetchArticle(opts.title);
6031
- if (!entryDoc) throw new Error(`Article not found: "${opts.title}"`);
6032
- const entryTitle = canonicalTitle(entryDoc.title?.() ?? opts.title);
6033
- const entrySnap = snapshotArticle(entryDoc, entryTitle);
6034
- fetched.set(entryTitle, entrySnap);
6035
- const rootEntry = dm.tree.create({
6036
- parentId: opts.parentDocId ?? null,
6037
- label: entryTitle,
6038
- type: "graph",
6039
- meta: { icon: ICONS.graph }
6040
- });
6041
- log(`+ ${rootEntry.id.slice(0, 8)}… ${entryTitle} (graph)`);
6042
- const entryArticleId = createArticleShell(dm, entrySnap, rootEntry.id, log);
6043
- titleToDocId.set(entryTitle, entryArticleId);
6044
- const queue = [{
6045
- title: entryTitle,
6046
- depth: 0
6047
- }];
6048
- let articleCount = 0;
6049
- while (queue.length > 0) {
6050
- const { title, depth } = queue.shift();
6051
- const articleDocId = titleToDocId.get(title);
6052
- let snap = fetched.get(title);
6053
- if (!snap) {
6054
- log(`fetch [d${depth}] ${title}`);
6055
- try {
6056
- const doc = await wp.fetchArticle(title);
6057
- if (!doc) {
6058
- log(` not found — leaving stub`);
6059
- continue;
6060
- }
6061
- snap = snapshotArticle(doc, canonicalTitle(doc.title?.() ?? title));
6062
- fetched.set(title, snap);
6063
- } catch (err) {
6064
- log(`! fetch failed: ${err?.message ?? err}`);
6065
- continue;
6066
- }
6067
- }
6068
- if (opts.mode === "split" && !childrenCreated.has(title)) {
6069
- createArticleChildren(dm, snap, articleDocId, log);
6070
- childrenCreated.add(title);
6071
- }
6072
- if (depth < opts.depth) for (const linkTitle of snap.linkTitles) {
6073
- if (titleToDocId.has(linkTitle)) continue;
6074
- const shell = dm.tree.create({
6075
- parentId: rootEntry.id,
6076
- label: linkTitle,
6077
- type: "doc",
6078
- meta: { icon: ICONS.article }
6079
- });
6080
- titleToDocId.set(linkTitle, shell.id);
6081
- queue.push({
6082
- title: linkTitle,
6083
- depth: depth + 1
6084
- });
6085
- log(`+ ${shell.id.slice(0, 8)}… ${linkTitle} (doc, shell)`);
6086
- }
6087
- if (opts.includeCategories && snap.categories.length > 0) {
6088
- if (!categoriesContainerId) {
6089
- const c = dm.tree.create({
6090
- parentId: rootEntry.id,
6091
- label: "Categories",
6092
- type: "graph",
6093
- meta: { icon: ICONS.categories }
6094
- });
6095
- categoriesContainerId = c.id;
6096
- log(`+ ${c.id.slice(0, 8)}… Categories (graph)`);
6097
- }
6098
- for (const catTitle of snap.categories) {
6099
- if (categoryToDocId.has(catTitle)) continue;
6100
- const cat = dm.tree.create({
6101
- parentId: categoriesContainerId,
6102
- label: prettyCategoryLabel(catTitle),
6103
- type: "graph",
6104
- meta: { icon: ICONS.category }
6105
- });
6106
- categoryToDocId.set(catTitle, cat.id);
6107
- log(`+ ${cat.id.slice(0, 8)}… ${prettyCategoryLabel(catTitle)} (graph, cat)`);
6108
- }
6109
- }
6110
- const body = opts.mode === "split" ? renderArticleLead(snap) : renderArticleSingleDoc(snap);
6111
- if (body.trim().length > 0) {
6112
- const rewritten = rewriteLinks(body, titleToDocId);
6113
- try {
6114
- await dm.content.write(articleDocId, rewritten);
6115
- log(`✓ body ${title}`);
6116
- } catch (err) {
6117
- log(`! body write failed for ${title}: ${err?.message ?? err}`);
6118
- }
6119
- }
6120
- if (opts.mode === "split") await writeChildrenBodies(dm, snap, articleDocId, titleToDocId, log);
6121
- articleCount++;
6122
- }
6123
- let categoryCount = 0;
6124
- if (opts.includeCategories && categoryToDocId.size > 0) for (const [catTitle, catDocId] of categoryToDocId) {
6125
- log(`category ${catTitle}`);
6126
- try {
6127
- const members = await wp.fetchCategoryPages(catTitle, opts.categoryDepth > 0, Math.max(0, opts.categoryDepth));
6128
- const memberArticles = [];
6129
- const subcats = [];
6130
- for (const m of members) if (m.type === "subcat") subcats.push(prettyCategoryLabel(m.title));
6131
- else memberArticles.push(m.title);
6132
- const rewritten = rewriteLinks(renderCategoryBody(memberArticles, subcats), titleToDocId);
6133
- if (rewritten.trim().length > 0) {
6134
- await dm.content.write(catDocId, rewritten);
6135
- log(`✓ body category ${catTitle}`);
6136
- }
6137
- categoryCount++;
6138
- } catch (err) {
6139
- log(`! category ${catTitle}: ${err?.message ?? err}`);
6140
- }
6141
- }
6142
- return {
6143
- rootDocId: rootEntry.id,
6144
- articleCount,
6145
- categoryCount
6146
- };
6147
- }
6148
- function createArticleShell(dm, article, parentId, log) {
6149
- const meta = { icon: ICONS.article };
6150
- if (article.url) meta.url = article.url;
6151
- const entry = dm.tree.create({
6152
- parentId,
6153
- label: article.title,
6154
- type: "doc",
6155
- meta
6156
- });
6157
- log(`+ ${entry.id.slice(0, 8)}… ${article.title} (doc)`);
6158
- return entry.id;
6159
- }
6160
- /**
6161
- * Create section + infobox child docs for a split-mode article. Returns nothing
6162
- * — children get bodies written later in writeChildrenBodies.
6163
- */
6164
- function createArticleChildren(dm, article, articleDocId, log) {
6165
- if (article.infobox && article.infobox.length > 0) {
6166
- const ib = dm.tree.create({
6167
- parentId: articleDocId,
6168
- label: "Infobox",
6169
- type: "outline",
6170
- meta: { icon: ICONS.infobox }
6171
- });
6172
- log(` + ${ib.id.slice(0, 8)}… Infobox (outline)`);
6173
- article._infoboxDocId = ib.id;
6174
- }
6175
- for (const section of article.sections) createSectionShell(dm, section, articleDocId, log);
6176
- }
6177
- function createSectionShell(dm, section, parentDocId, log) {
6178
- const hasChildren = section.children.length > 0;
6179
- if (!section.body.trim() && !hasChildren) return;
6180
- const { type, icon } = pickSectionType(section);
6181
- const entry = dm.tree.create({
6182
- parentId: parentDocId,
6183
- label: section.title || "Untitled section",
6184
- type,
6185
- meta: { icon }
6186
- });
6187
- log(` + ${entry.id.slice(0, 8)}… ${entry.label} (${type})`);
6188
- section._docId = entry.id;
6189
- for (const child of section.children) createSectionShell(dm, child, entry.id, log);
6190
- }
6191
- async function writeChildrenBodies(dm, article, _articleDocId, titleToDocId, log) {
6192
- const infoboxDocId = article._infoboxDocId;
6193
- if (infoboxDocId && article.infobox && article.infobox.length > 0) try {
6194
- await dm.content.write(infoboxDocId, renderInfoboxBody(article.infobox));
6195
- } catch (err) {
6196
- log(`! infobox body write failed: ${err?.message ?? err}`);
6197
- }
6198
- for (const section of article.sections) await writeSectionBody(dm, section, titleToDocId, log);
6199
- }
6200
- async function writeSectionBody(dm, section, titleToDocId, log) {
6201
- const docId = section._docId;
6202
- if (docId && section.body.trim().length > 0) try {
6203
- await dm.content.write(docId, rewriteLinks(section.body, titleToDocId));
6204
- } catch (err) {
6205
- log(`! section body write failed for ${section.title}: ${err?.message ?? err}`);
6206
- }
6207
- for (const child of section.children) await writeSectionBody(dm, child, titleToDocId, log);
6208
- }
6209
- function parseOptions(args) {
6210
- const title = args.positional[0]?.trim() || args.params["title"];
6211
- if (!title) return "Missing required positional argument: <title>. Example: abracadabra wiki \"Toronto Raptors\"";
6212
- const env = globalThis.process?.env ?? {};
6213
- const userAgent = args.params["user-agent"] || args.params["userAgent"] || env["ABRA_WIKI_USER_AGENT"];
6214
- if (!userAgent) return ["Missing required parameter: user-agent=\"your-name (you@example.com)\"", "(Wikimedia etiquette requires an Api-User-Agent header. Pass user-agent=... or set ABRA_WIKI_USER_AGENT.)"].join("\n");
6215
- const mode = args.params["mode"] ?? "split";
6216
- if (mode !== "single" && mode !== "split") return `Invalid mode "${mode}". Use mode=single or mode=split.`;
6217
- const depth = parseIntOr(args.params["depth"], 1);
6218
- const categoryDepth = parseIntOr(args.params["category-depth"] ?? args.params["categoryDepth"], 1);
6219
- const rate = parseFloatOr(args.params["rate"], 3);
6220
- return {
6221
- title,
6222
- mode,
6223
- depth,
6224
- categoryDepth,
6225
- includeCategories: args.flags.has("include-categories") || args.flags.has("includeCategories"),
6226
- lang: args.params["lang"] ?? "en",
6227
- domain: args.params["domain"],
6228
- parentDocId: args.params["parent"],
6229
- userAgent,
6230
- rate,
6231
- dryRun: args.flags.has("dry-run") || args.flags.has("dryRun")
6232
- };
6233
- }
6234
- function parseIntOr(s, fallback) {
6235
- if (!s) return fallback;
6236
- const n = Number.parseInt(s, 10);
6237
- return Number.isFinite(n) && n >= 0 ? n : fallback;
6238
- }
6239
- function parseFloatOr(s, fallback) {
6240
- if (!s) return fallback;
6241
- const n = Number.parseFloat(s);
6242
- return Number.isFinite(n) && n > 0 ? n : fallback;
6243
- }
6244
- function printSections(sections, indent) {
6245
- const lines = [];
6246
- for (const s of sections) {
6247
- const hint = s.body ? ` (${s.body.length}b)` : "";
6248
- lines.push(`${indent}- ${s.title}${hint}${s.children.length > 0 ? ` [${s.children.length} sub]` : ""}`);
6249
- if (s.children.length > 0) lines.push(printSections(s.children, indent + " "));
6250
- }
6251
- return lines.join("\n");
6252
- }
6253
-
6254
5561
  //#endregion
6255
5562
  //#region packages/cli/src/index.ts
6256
5563
  /**
@@ -6274,9 +5581,7 @@ const NO_CONNECT_COMMANDS = new Set([
6274
5581
  "v",
6275
5582
  "page-types",
6276
5583
  "types",
6277
- "doctypes",
6278
- "wiki",
6279
- "wikipedia"
5584
+ "doctypes"
6280
5585
  ]);
6281
5586
  async function main() {
6282
5587
  const args = parseArgs(process.argv);