npm - llm-kb - Versions diffs - 0.4.2 → 0.6.0 - Mend

llm-kb 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +11 -6
package/bin/{chunk-DHOXVEIR.js → chunk-3WBSKCCH.js} +96 -119
package/bin/chunk-EZ7LPPEP.js +218 -0
package/bin/chunk-Y2764FFH.js +1356 -0
package/bin/cli.js +385 -874
package/bin/{indexer-KSYRIVVN.js → indexer-K37QM2HP.js} +2 -1
package/bin/public/index.html +949 -0
package/bin/server-QC5SN6T4.js +1069 -0
package/package.json +4 -3

package/README.md CHANGED Viewed

@@ -15,20 +15,25 @@ That's it. PDFs get parsed, an index is built, and an interactive chat opens —
 ## Authentication
-Two options (you need one):
+Three options (you need one):
-**Option 1 — Pi SDK (recommended)**
+**Option 1 — OpenRouter API key (recommended)**
+```bash
+export OPENROUTER_API_KEY=sk-or-...
+```
+**Option 2 — Pi SDK**
 ```bash
 npm install -g @mariozechner/pi-coding-agent
 pi   # run once to authenticate
 ```
-**Option 2 — Anthropic API key**
+**Option 3 — Anthropic API key**
 ```bash
 export ANTHROPIC_API_KEY=sk-ant-...
 ```
-If neither is configured, `llm-kb` shows a clear error with setup instructions.
+If none is configured, `llm-kb` shows a clear error with setup instructions.
 ## What It Does
@@ -39,7 +44,7 @@ llm-kb run ./my-documents
 ```
 ```
-llm-kb v0.4.1
+llm-kb v0.5.0
 Scanning ./my-documents...
   Found 9 files (9 PDF)
@@ -156,7 +161,7 @@ Knowledge Base Status
   Articles: 15 compiled
   Outputs: 2 saved answers
   Models:  claude-sonnet-4-6 (query)  claude-haiku-4-5 (index)
-  Auth:    Pi SDK
+  Auth:    OpenRouter
 ```
 ## The Three-Layer Architecture

package/bin/{chunk-DHOXVEIR.js → chunk-3WBSKCCH.js} RENAMED Viewed

@@ -6776,16 +6776,6 @@ var require_dist = __commonJS({
   }
 });
-// src/indexer.ts
-import {
-  createAgentSession,
-  createBashTool,
-  createReadTool,
-  createWriteTool,
-  DefaultResourceLoader,
-  SettingsManager
-} from "@mariozechner/pi-coding-agent";
 // node_modules/@mariozechner/pi-ai/dist/api-registry.js
 var apiProviderRegistry = /* @__PURE__ */ new Map();
 function wrapStream(api, stream) {
@@ -7112,8 +7102,99 @@ if (canUseRuntimeCodegen()) {
   }
 }
-// src/indexer.ts
-import { readdir, readFile } from "fs/promises";
+// src/model-resolver.ts
+import { AuthStorage } from "@mariozechner/pi-coding-agent";
+var ANTHROPIC_TO_OPENROUTER = {
+  "claude-haiku-4-5": "anthropic/claude-haiku-4.5",
+  "claude-sonnet-4-6": "anthropic/claude-sonnet-4.6",
+  "claude-sonnet-4-5": "anthropic/claude-sonnet-4.5",
+  "claude-sonnet-4-0": "anthropic/claude-sonnet-4",
+  "claude-opus-4-5": "anthropic/claude-opus-4.5"
+};
+var OPENROUTER_TO_ANTHROPIC = Object.fromEntries(
+  Object.entries(ANTHROPIC_TO_OPENROUTER).map(([anthropic, openrouter]) => [openrouter, anthropic])
+);
+var PURPOSE_FALLBACKS = {
+  index: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"],
+  query: ["claude-sonnet-4-6", "anthropic/claude-sonnet-4.6", "claude-sonnet-4-5", "anthropic/claude-sonnet-4.5"],
+  wiki: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"],
+  eval: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"],
+  generic: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"]
+};
+function stripOpenAI(prefixOrId) {
+  return prefixOrId.replace(/^openai\//, "");
+}
+function modelCandidates(modelId, purpose) {
+  const ids = /* @__PURE__ */ new Set();
+  ids.add(modelId);
+  const mappedOpenRouter = ANTHROPIC_TO_OPENROUTER[modelId];
+  if (mappedOpenRouter) ids.add(mappedOpenRouter);
+  const mappedAnthropic = OPENROUTER_TO_ANTHROPIC[modelId];
+  if (mappedAnthropic) ids.add(mappedAnthropic);
+  if (modelId.startsWith("openai/")) ids.add(stripOpenAI(modelId));
+  else ids.add(`openai/${modelId}`);
+  for (const fallback of PURPOSE_FALLBACKS[purpose]) ids.add(fallback);
+  return [...ids];
+}
+function providerOrder(modelId) {
+  if (modelId.startsWith("openai/") || modelId.startsWith("gpt-")) {
+    return ["openrouter", "openai", "anthropic"];
+  }
+  if (modelId.startsWith("anthropic/") || modelId.startsWith("claude-")) {
+    return ["openrouter", "anthropic", "openai"];
+  }
+  return ["openrouter", "openai", "anthropic"];
+}
+function resolveIdForProvider(provider, candidateId) {
+  switch (provider) {
+    case "anthropic": {
+      const ids = [candidateId];
+      const mapped = OPENROUTER_TO_ANTHROPIC[candidateId];
+      if (mapped) ids.push(mapped);
+      return [...new Set(ids.filter((id) => !id.startsWith("openai/")))];
+    }
+    case "openrouter": {
+      const ids = [candidateId];
+      const mapped = ANTHROPIC_TO_OPENROUTER[candidateId];
+      if (mapped) ids.unshift(mapped);
+      if (candidateId.startsWith("gpt-")) ids.unshift(`openai/${candidateId}`);
+      return [...new Set(ids)];
+    }
+    case "openai": {
+      return [stripOpenAI(candidateId)].filter((id) => !id.startsWith("claude-") && !id.startsWith("anthropic/"));
+    }
+  }
+}
+async function findModelForProvider(provider, candidateId, storage) {
+  const key = await storage.getApiKey(provider);
+  if (!key) return void 0;
+  const available = getModels(provider);
+  for (const id of resolveIdForProvider(provider, candidateId)) {
+    const model = available.find((m) => m.id === id);
+    if (model) return model;
+  }
+  return void 0;
+}
+async function resolveModelCandidates(modelId, authStorage, purpose = "generic") {
+  const storage = authStorage ?? AuthStorage.create();
+  const resolved = [];
+  const seen = /* @__PURE__ */ new Set();
+  for (const candidateId of modelCandidates(modelId, purpose)) {
+    for (const provider of providerOrder(candidateId)) {
+      const model = await findModelForProvider(provider, candidateId, storage);
+      if (!model) continue;
+      const key = `${provider}:${model.id}`;
+      if (seen.has(key)) continue;
+      seen.add(key);
+      resolved.push({ provider, candidateId, model });
+    }
+  }
+  return resolved;
+}
+async function getApiKeyForProvider(provider, authStorage) {
+  const storage = authStorage ?? AuthStorage.create();
+  return storage.getApiKey(provider);
+}
 // src/session-store.ts
 import { SessionManager } from "@mariozechner/pi-coding-agent";
@@ -7147,115 +7228,11 @@ function getNodeModulesPath() {
   return join2(process.cwd(), "node_modules");
 }
-// src/indexer.ts
-import { join as join3 } from "path";
-function buildAgentsContent(sourcesDir, files) {
-  const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => `  - ${f}`).join("\n");
-  return `# llm-kb Knowledge Base
-## How to access documents
-### PDFs (pre-parsed)
-PDFs have been parsed to markdown with bounding boxes.
-Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
-Available parsed sources:
-${sourceList}
-### Other file types (Excel, Word, PowerPoint)
-You have bash and read tools. Use bash to run Node.js scripts.
-Libraries are pre-installed via require().
-For .docx (structured XML \u2014 ZIP containing word/document.xml):
-  const AdmZip = require('adm-zip');
-  const zip = new AdmZip('file.docx');
-  const xml = zip.readAsText('word/document.xml');
-  // Parse XML to extract headings and first paragraphs for summary
-For .xlsx use exceljs:
-  const ExcelJS = require('exceljs');
-  const wb = new ExcelJS.Workbook();
-  await wb.xlsx.readFile('file.xlsx');
-  const sheet = wb.getWorksheet(1);
-For .pptx use officeparser:
-  const officeparser = require('officeparser');
-  const text = await officeparser.parseOfficeAsync('file.pptx');
-## Index file
-Write the index to \`.llm-kb/wiki/index.md\`.
-The index should be a markdown file with:
-1. A title and last-updated timestamp
-2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
-3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
-4. Total word count across all sources
-`;
-}
-async function buildIndex(folder, sourcesDir, onOutput, authStorage, modelId) {
-  const files = await readdir(sourcesDir);
-  const mdFiles = files.filter((f) => f.endsWith(".md"));
-  if (mdFiles.length === 0) {
-    throw new Error("No source files found to index");
-  }
-  const agentsContent = buildAgentsContent(sourcesDir, files);
-  const nodeModulesPath = getNodeModulesPath();
-  process.env.NODE_PATH = nodeModulesPath;
-  const loader = new DefaultResourceLoader({
-    cwd: folder,
-    agentsFilesOverride: (current) => ({
-      agentsFiles: [
-        ...current.agentsFiles,
-        { path: ".llm-kb/AGENTS.md", content: agentsContent }
-      ]
-    })
-  });
-  await loader.reload();
-  const model = modelId ? getModels("anthropic").find((m) => m.id === modelId) : void 0;
-  const { session } = await createAgentSession({
-    cwd: folder,
-    resourceLoader: loader,
-    tools: [
-      createReadTool(folder),
-      createBashTool(folder),
-      createWriteTool(folder)
-    ],
-    sessionManager: await createKBSession(folder),
-    settingsManager: SettingsManager.inMemory({
-      compaction: { enabled: false }
-    }),
-    ...authStorage ? { authStorage } : {},
-    ...model ? { model } : {}
-  });
-  if (onOutput) {
-    session.subscribe((event) => {
-      if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
-        onOutput(event.assistantMessageEvent.delta);
-      }
-    });
-  }
-  session.setSessionName(`index: ${(/* @__PURE__ */ new Date()).toISOString()}`);
-  const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
-Then write .llm-kb/wiki/index.md with a summary table of all sources.
-Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
-Add a total word count estimate at the bottom.`;
-  await session.prompt(prompt);
-  const indexPath = join3(sourcesDir, "..", "index.md");
-  try {
-    const content = await readFile(indexPath, "utf-8");
-    session.dispose();
-    return content;
-  } catch {
-    session.dispose();
-    throw new Error("Agent did not create index.md");
-  }
-}
 export {
   completeSimple,
+  resolveModelCandidates,
+  getApiKeyForProvider,
   continueKBSession,
   createKBSession,
-  getNodeModulesPath,
-  buildIndex
+  getNodeModulesPath
 };

package/bin/chunk-EZ7LPPEP.js ADDED Viewed

@@ -0,0 +1,218 @@
+import {
+  createKBSession,
+  getNodeModulesPath,
+  resolveModelCandidates
+} from "./chunk-3WBSKCCH.js";
+// src/indexer.ts
+import {
+  createAgentSession,
+  createBashTool,
+  createReadTool,
+  createWriteTool,
+  DefaultResourceLoader,
+  SettingsManager
+} from "@mariozechner/pi-coding-agent";
+import { readdir, readFile } from "fs/promises";
+import { join } from "path";
+function buildAgentsContent(sourcesDir, files) {
+  const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => `  - ${f}`).join("\n");
+  return `# llm-kb Knowledge Base
+## How to access documents
+### PDFs (pre-parsed)
+PDFs have been parsed to markdown with bounding boxes.
+Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
+Available parsed sources:
+${sourceList}
+### Other file types (Excel, Word, PowerPoint)
+You have bash and read tools. Use bash to run Node.js scripts.
+Libraries are pre-installed via require().
+For .docx (structured XML \u2014 ZIP containing word/document.xml):
+  const AdmZip = require('adm-zip');
+  const zip = new AdmZip('file.docx');
+  const xml = zip.readAsText('word/document.xml');
+  // Parse XML to extract headings and first paragraphs for summary
+For .xlsx use exceljs:
+  const ExcelJS = require('exceljs');
+  const wb = new ExcelJS.Workbook();
+  await wb.xlsx.readFile('file.xlsx');
+  const sheet = wb.getWorksheet(1);
+For .pptx use officeparser:
+  const officeparser = require('officeparser');
+  const text = await officeparser.parseOfficeAsync('file.pptx');
+## Index file
+Write the index to \`.llm-kb/wiki/index.md\`.
+The index should be a markdown file with:
+1. A title and last-updated timestamp
+2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
+3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
+4. Total word count across all sources
+`;
+}
+async function buildIndex(folder, sourcesDir, onOutput, authStorage, modelId) {
+  const files = await readdir(sourcesDir);
+  const mdFiles = files.filter((f) => f.endsWith(".md"));
+  const jsonFiles = files.filter((f) => f.endsWith(".json") && !f.endsWith(".pages"));
+  if (mdFiles.length === 0) {
+    throw new Error("No source files found to index");
+  }
+  const snippets = [];
+  const total = mdFiles.length;
+  const cols = process.stdout.columns || 80;
+  for (let i = 0; i < mdFiles.length; i++) {
+    const f = mdFiles[i];
+    const pct = Math.round((i + 1) / total * 100);
+    const name = f.length > 30 ? f.slice(0, 27) + "..." : f;
+    process.stdout.write(`\r  Reading sources... ${i + 1}/${total} (${pct}%) ${name}`.padEnd(cols));
+    try {
+      const content = await readFile(join(sourcesDir, f), "utf-8");
+      const preview = content.slice(0, 800);
+      const jsonName = f.replace(/\.md$/, ".json");
+      let pages = 0;
+      if (jsonFiles.includes(jsonName)) {
+        try {
+          const jsonHead = await readFile(join(sourcesDir, jsonName), "utf-8");
+          const match = jsonHead.match(/"totalPages"\s*:\s*(\d+)/);
+          if (match) pages = parseInt(match[1], 10);
+        } catch {
+        }
+      }
+      snippets.push(`### ${f}${pages > 0 ? ` (${pages} pages)` : ""}
+${preview}
+`);
+    } catch {
+      snippets.push(`### ${f}
+(could not read)
+`);
+    }
+  }
+  process.stdout.write(`\r${"".padEnd(cols)}\r`);
+  process.stdout.write(`  Read ${mdFiles.length} source previews
+`);
+  const BATCH_SIZE = 100;
+  const batches = [];
+  for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
+    batches.push(snippets.slice(i, i + BATCH_SIZE));
+  }
+  const agentsContent = buildAgentsContent(sourcesDir, files);
+  const nodeModulesPath = getNodeModulesPath();
+  process.env.NODE_PATH = nodeModulesPath;
+  const candidates = modelId ? await resolveModelCandidates(modelId, authStorage, "index") : [];
+  if (modelId && candidates.length === 0) {
+    throw new Error(`No usable model found for '${modelId}'. Configure Anthropic, OpenRouter, or OpenAI credentials.`);
+  }
+  const indexPath = join(sourcesDir, "..", "index.md");
+  const attemptCandidates = candidates.length > 0 ? candidates : [{ provider: "default", candidateId: "default", model: void 0 }];
+  const batchResults = [];
+  for (let b = 0; b < batches.length; b++) {
+    const batch = batches[b];
+    const batchLabel = batches.length > 1 ? ` (batch ${b + 1}/${batches.length})` : "";
+    process.stdout.write(`  Generating index${batchLabel}...
+`);
+    const batchContent = batch.join("\n---\n\n");
+    const prompt = batches.length === 1 ? `Here are previews of all ${mdFiles.length} source files in this knowledge base. Generate a summary table in markdown.
+${batchContent}
+Write .llm-kb/wiki/index.md with:
+1. Title and last-updated timestamp
+2. A markdown table with columns: Source, Type, Pages, Summary, Key Topics
+3. One row per source with a one-line summary
+4. Total count at the bottom
+Do NOT read any files \u2014 all the data you need is above.` : b < batches.length - 1 ? `Here are previews of source files ${b * BATCH_SIZE + 1}-${Math.min((b + 1) * BATCH_SIZE, mdFiles.length)} of ${mdFiles.length}. Generate summary table rows ONLY (no header, no footer).
+${batchContent}
+Output ONLY markdown table rows \u2014 one per source. Columns: Source, Type, Pages, Summary, Key Topics.
+Do NOT read any files.` : `Here are the remaining source file previews (${b * BATCH_SIZE + 1}-${mdFiles.length} of ${mdFiles.length}).
+${batchContent}
+Output ONLY markdown table rows for these sources. Columns: Source, Type, Pages, Summary, Key Topics.
+Then combine with the previous batch results below and write the final .llm-kb/wiki/index.md:
+Previous batch rows:
+${batchResults.join("\n")}
+Write the complete index.md with title, timestamp, full table (header + all rows), and total count.`;
+    let lastError;
+    for (let i = 0; i < attemptCandidates.length; i++) {
+      const candidate = attemptCandidates[i];
+      const loader = new DefaultResourceLoader({
+        cwd: folder,
+        agentsFilesOverride: (current) => ({
+          agentsFiles: [
+            ...current.agentsFiles,
+            { path: ".llm-kb/AGENTS.md", content: agentsContent }
+          ]
+        })
+      });
+      await loader.reload();
+      const { session } = await createAgentSession({
+        cwd: folder,
+        resourceLoader: loader,
+        tools: [
+          createReadTool(folder),
+          createBashTool(folder),
+          createWriteTool(folder)
+        ],
+        sessionManager: await createKBSession(folder),
+        settingsManager: SettingsManager.inMemory({
+          compaction: { enabled: false }
+        }),
+        ...authStorage ? { authStorage } : {},
+        ...candidate.model ? { model: candidate.model } : {}
+      });
+      if (onOutput) {
+        session.subscribe((event) => {
+          if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
+            onOutput(event.assistantMessageEvent.delta);
+          }
+        });
+      }
+      session.setSessionName(`index: ${(/* @__PURE__ */ new Date()).toISOString()}`);
+      try {
+        await session.prompt(prompt);
+        if (batches.length === 1 || b === batches.length - 1) {
+          const content = await readFile(indexPath, "utf-8");
+          session.dispose();
+          return content;
+        } else {
+          const messages = session.state.messages;
+          const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
+          const text = lastAssistant?.content?.filter((b2) => b2.type === "text").map((b2) => b2.text).join("") ?? "";
+          batchResults.push(text);
+          session.dispose();
+          break;
+        }
+      } catch (error) {
+        lastError = error;
+        session.dispose();
+        const next = attemptCandidates[i + 1];
+        if (next) {
+          const detail = error instanceof Error ? error.message : String(error);
+          console.warn(`  Index attempt failed on ${candidate.provider}:${candidate.model?.id ?? candidate.candidateId} (${detail}). Retrying with ${next.provider}:${next.model?.id ?? next.candidateId}...`);
+          continue;
+        }
+        if (lastError instanceof Error) throw lastError;
+        throw new Error("Agent did not create index.md");
+      }
+    }
+  }
+  throw new Error("Agent did not create index.md");
+}
+export {
+  buildIndex
+};