llm-kb 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,20 +15,25 @@ That's it. PDFs get parsed, an index is built, and an interactive chat opens —
15
15
 
16
16
  ## Authentication
17
17
 
18
- Two options (you need one):
18
+ Three options (you need one):
19
19
 
20
- **Option 1 — Pi SDK (recommended)**
20
+ **Option 1 — OpenRouter API key (recommended)**
21
+ ```bash
22
+ export OPENROUTER_API_KEY=sk-or-...
23
+ ```
24
+
25
+ **Option 2 — Pi SDK**
21
26
  ```bash
22
27
  npm install -g @mariozechner/pi-coding-agent
23
28
  pi # run once to authenticate
24
29
  ```
25
30
 
26
- **Option 2 — Anthropic API key**
31
+ **Option 3 — Anthropic API key**
27
32
  ```bash
28
33
  export ANTHROPIC_API_KEY=sk-ant-...
29
34
  ```
30
35
 
31
- If neither is configured, `llm-kb` shows a clear error with setup instructions.
36
+ If none is configured, `llm-kb` shows a clear error with setup instructions.
32
37
 
33
38
  ## What It Does
34
39
 
@@ -39,7 +44,7 @@ llm-kb run ./my-documents
39
44
  ```
40
45
 
41
46
  ```
42
- llm-kb v0.4.1
47
+ llm-kb v0.5.0
43
48
 
44
49
  Scanning ./my-documents...
45
50
  Found 9 files (9 PDF)
@@ -156,7 +161,7 @@ Knowledge Base Status
156
161
  Articles: 15 compiled
157
162
  Outputs: 2 saved answers
158
163
  Models: claude-sonnet-4-6 (query) claude-haiku-4-5 (index)
159
- Auth: Pi SDK
164
+ Auth: OpenRouter
160
165
  ```
161
166
 
162
167
  ## The Three-Layer Architecture
@@ -6776,16 +6776,6 @@ var require_dist = __commonJS({
6776
6776
  }
6777
6777
  });
6778
6778
 
6779
- // src/indexer.ts
6780
- import {
6781
- createAgentSession,
6782
- createBashTool,
6783
- createReadTool,
6784
- createWriteTool,
6785
- DefaultResourceLoader,
6786
- SettingsManager
6787
- } from "@mariozechner/pi-coding-agent";
6788
-
6789
6779
  // node_modules/@mariozechner/pi-ai/dist/api-registry.js
6790
6780
  var apiProviderRegistry = /* @__PURE__ */ new Map();
6791
6781
  function wrapStream(api, stream) {
@@ -7112,8 +7102,99 @@ if (canUseRuntimeCodegen()) {
7112
7102
  }
7113
7103
  }
7114
7104
 
7115
- // src/indexer.ts
7116
- import { readdir, readFile } from "fs/promises";
7105
+ // src/model-resolver.ts
7106
+ import { AuthStorage } from "@mariozechner/pi-coding-agent";
7107
+ var ANTHROPIC_TO_OPENROUTER = {
7108
+ "claude-haiku-4-5": "anthropic/claude-haiku-4.5",
7109
+ "claude-sonnet-4-6": "anthropic/claude-sonnet-4.6",
7110
+ "claude-sonnet-4-5": "anthropic/claude-sonnet-4.5",
7111
+ "claude-sonnet-4-0": "anthropic/claude-sonnet-4",
7112
+ "claude-opus-4-5": "anthropic/claude-opus-4.5"
7113
+ };
7114
+ var OPENROUTER_TO_ANTHROPIC = Object.fromEntries(
7115
+ Object.entries(ANTHROPIC_TO_OPENROUTER).map(([anthropic, openrouter]) => [openrouter, anthropic])
7116
+ );
7117
+ var PURPOSE_FALLBACKS = {
7118
+ index: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"],
7119
+ query: ["claude-sonnet-4-6", "anthropic/claude-sonnet-4.6", "claude-sonnet-4-5", "anthropic/claude-sonnet-4.5"],
7120
+ wiki: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"],
7121
+ eval: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"],
7122
+ generic: ["claude-haiku-4-5", "anthropic/claude-haiku-4.5", "claude-sonnet-4-6", "anthropic/claude-sonnet-4.6"]
7123
+ };
7124
+ function stripOpenAI(prefixOrId) {
7125
+ return prefixOrId.replace(/^openai\//, "");
7126
+ }
7127
+ function modelCandidates(modelId, purpose) {
7128
+ const ids = /* @__PURE__ */ new Set();
7129
+ ids.add(modelId);
7130
+ const mappedOpenRouter = ANTHROPIC_TO_OPENROUTER[modelId];
7131
+ if (mappedOpenRouter) ids.add(mappedOpenRouter);
7132
+ const mappedAnthropic = OPENROUTER_TO_ANTHROPIC[modelId];
7133
+ if (mappedAnthropic) ids.add(mappedAnthropic);
7134
+ if (modelId.startsWith("openai/")) ids.add(stripOpenAI(modelId));
7135
+ else ids.add(`openai/${modelId}`);
7136
+ for (const fallback of PURPOSE_FALLBACKS[purpose]) ids.add(fallback);
7137
+ return [...ids];
7138
+ }
7139
+ function providerOrder(modelId) {
7140
+ if (modelId.startsWith("openai/") || modelId.startsWith("gpt-")) {
7141
+ return ["openrouter", "openai", "anthropic"];
7142
+ }
7143
+ if (modelId.startsWith("anthropic/") || modelId.startsWith("claude-")) {
7144
+ return ["openrouter", "anthropic", "openai"];
7145
+ }
7146
+ return ["openrouter", "openai", "anthropic"];
7147
+ }
7148
+ function resolveIdForProvider(provider, candidateId) {
7149
+ switch (provider) {
7150
+ case "anthropic": {
7151
+ const ids = [candidateId];
7152
+ const mapped = OPENROUTER_TO_ANTHROPIC[candidateId];
7153
+ if (mapped) ids.push(mapped);
7154
+ return [...new Set(ids.filter((id) => !id.startsWith("openai/")))];
7155
+ }
7156
+ case "openrouter": {
7157
+ const ids = [candidateId];
7158
+ const mapped = ANTHROPIC_TO_OPENROUTER[candidateId];
7159
+ if (mapped) ids.unshift(mapped);
7160
+ if (candidateId.startsWith("gpt-")) ids.unshift(`openai/${candidateId}`);
7161
+ return [...new Set(ids)];
7162
+ }
7163
+ case "openai": {
7164
+ return [stripOpenAI(candidateId)].filter((id) => !id.startsWith("claude-") && !id.startsWith("anthropic/"));
7165
+ }
7166
+ }
7167
+ }
7168
+ async function findModelForProvider(provider, candidateId, storage) {
7169
+ const key = await storage.getApiKey(provider);
7170
+ if (!key) return void 0;
7171
+ const available = getModels(provider);
7172
+ for (const id of resolveIdForProvider(provider, candidateId)) {
7173
+ const model = available.find((m) => m.id === id);
7174
+ if (model) return model;
7175
+ }
7176
+ return void 0;
7177
+ }
7178
+ async function resolveModelCandidates(modelId, authStorage, purpose = "generic") {
7179
+ const storage = authStorage ?? AuthStorage.create();
7180
+ const resolved = [];
7181
+ const seen = /* @__PURE__ */ new Set();
7182
+ for (const candidateId of modelCandidates(modelId, purpose)) {
7183
+ for (const provider of providerOrder(candidateId)) {
7184
+ const model = await findModelForProvider(provider, candidateId, storage);
7185
+ if (!model) continue;
7186
+ const key = `${provider}:${model.id}`;
7187
+ if (seen.has(key)) continue;
7188
+ seen.add(key);
7189
+ resolved.push({ provider, candidateId, model });
7190
+ }
7191
+ }
7192
+ return resolved;
7193
+ }
7194
+ async function getApiKeyForProvider(provider, authStorage) {
7195
+ const storage = authStorage ?? AuthStorage.create();
7196
+ return storage.getApiKey(provider);
7197
+ }
7117
7198
 
7118
7199
  // src/session-store.ts
7119
7200
  import { SessionManager } from "@mariozechner/pi-coding-agent";
@@ -7147,115 +7228,11 @@ function getNodeModulesPath() {
7147
7228
  return join2(process.cwd(), "node_modules");
7148
7229
  }
7149
7230
 
7150
- // src/indexer.ts
7151
- import { join as join3 } from "path";
7152
- function buildAgentsContent(sourcesDir, files) {
7153
- const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => ` - ${f}`).join("\n");
7154
- return `# llm-kb Knowledge Base
7155
-
7156
- ## How to access documents
7157
-
7158
- ### PDFs (pre-parsed)
7159
- PDFs have been parsed to markdown with bounding boxes.
7160
- Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
7161
-
7162
- Available parsed sources:
7163
- ${sourceList}
7164
-
7165
- ### Other file types (Excel, Word, PowerPoint)
7166
- You have bash and read tools. Use bash to run Node.js scripts.
7167
- Libraries are pre-installed via require().
7168
-
7169
- For .docx (structured XML \u2014 ZIP containing word/document.xml):
7170
- const AdmZip = require('adm-zip');
7171
- const zip = new AdmZip('file.docx');
7172
- const xml = zip.readAsText('word/document.xml');
7173
- // Parse XML to extract headings and first paragraphs for summary
7174
-
7175
- For .xlsx use exceljs:
7176
- const ExcelJS = require('exceljs');
7177
- const wb = new ExcelJS.Workbook();
7178
- await wb.xlsx.readFile('file.xlsx');
7179
- const sheet = wb.getWorksheet(1);
7180
-
7181
- For .pptx use officeparser:
7182
- const officeparser = require('officeparser');
7183
- const text = await officeparser.parseOfficeAsync('file.pptx');
7184
-
7185
- ## Index file
7186
- Write the index to \`.llm-kb/wiki/index.md\`.
7187
-
7188
- The index should be a markdown file with:
7189
- 1. A title and last-updated timestamp
7190
- 2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
7191
- 3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
7192
- 4. Total word count across all sources
7193
- `;
7194
- }
7195
- async function buildIndex(folder, sourcesDir, onOutput, authStorage, modelId) {
7196
- const files = await readdir(sourcesDir);
7197
- const mdFiles = files.filter((f) => f.endsWith(".md"));
7198
- if (mdFiles.length === 0) {
7199
- throw new Error("No source files found to index");
7200
- }
7201
- const agentsContent = buildAgentsContent(sourcesDir, files);
7202
- const nodeModulesPath = getNodeModulesPath();
7203
- process.env.NODE_PATH = nodeModulesPath;
7204
- const loader = new DefaultResourceLoader({
7205
- cwd: folder,
7206
- agentsFilesOverride: (current) => ({
7207
- agentsFiles: [
7208
- ...current.agentsFiles,
7209
- { path: ".llm-kb/AGENTS.md", content: agentsContent }
7210
- ]
7211
- })
7212
- });
7213
- await loader.reload();
7214
- const model = modelId ? getModels("anthropic").find((m) => m.id === modelId) : void 0;
7215
- const { session } = await createAgentSession({
7216
- cwd: folder,
7217
- resourceLoader: loader,
7218
- tools: [
7219
- createReadTool(folder),
7220
- createBashTool(folder),
7221
- createWriteTool(folder)
7222
- ],
7223
- sessionManager: await createKBSession(folder),
7224
- settingsManager: SettingsManager.inMemory({
7225
- compaction: { enabled: false }
7226
- }),
7227
- ...authStorage ? { authStorage } : {},
7228
- ...model ? { model } : {}
7229
- });
7230
- if (onOutput) {
7231
- session.subscribe((event) => {
7232
- if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
7233
- onOutput(event.assistantMessageEvent.delta);
7234
- }
7235
- });
7236
- }
7237
- session.setSessionName(`index: ${(/* @__PURE__ */ new Date()).toISOString()}`);
7238
- const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
7239
- Then write .llm-kb/wiki/index.md with a summary table of all sources.
7240
-
7241
- Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
7242
- Add a total word count estimate at the bottom.`;
7243
- await session.prompt(prompt);
7244
- const indexPath = join3(sourcesDir, "..", "index.md");
7245
- try {
7246
- const content = await readFile(indexPath, "utf-8");
7247
- session.dispose();
7248
- return content;
7249
- } catch {
7250
- session.dispose();
7251
- throw new Error("Agent did not create index.md");
7252
- }
7253
- }
7254
-
7255
7231
  export {
7256
7232
  completeSimple,
7233
+ resolveModelCandidates,
7234
+ getApiKeyForProvider,
7257
7235
  continueKBSession,
7258
7236
  createKBSession,
7259
- getNodeModulesPath,
7260
- buildIndex
7237
+ getNodeModulesPath
7261
7238
  };
@@ -0,0 +1,218 @@
1
+ import {
2
+ createKBSession,
3
+ getNodeModulesPath,
4
+ resolveModelCandidates
5
+ } from "./chunk-3WBSKCCH.js";
6
+
7
+ // src/indexer.ts
8
+ import {
9
+ createAgentSession,
10
+ createBashTool,
11
+ createReadTool,
12
+ createWriteTool,
13
+ DefaultResourceLoader,
14
+ SettingsManager
15
+ } from "@mariozechner/pi-coding-agent";
16
+ import { readdir, readFile } from "fs/promises";
17
+ import { join } from "path";
18
+ function buildAgentsContent(sourcesDir, files) {
19
+ const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => ` - ${f}`).join("\n");
20
+ return `# llm-kb Knowledge Base
21
+
22
+ ## How to access documents
23
+
24
+ ### PDFs (pre-parsed)
25
+ PDFs have been parsed to markdown with bounding boxes.
26
+ Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
27
+
28
+ Available parsed sources:
29
+ ${sourceList}
30
+
31
+ ### Other file types (Excel, Word, PowerPoint)
32
+ You have bash and read tools. Use bash to run Node.js scripts.
33
+ Libraries are pre-installed via require().
34
+
35
+ For .docx (structured XML \u2014 ZIP containing word/document.xml):
36
+ const AdmZip = require('adm-zip');
37
+ const zip = new AdmZip('file.docx');
38
+ const xml = zip.readAsText('word/document.xml');
39
+ // Parse XML to extract headings and first paragraphs for summary
40
+
41
+ For .xlsx use exceljs:
42
+ const ExcelJS = require('exceljs');
43
+ const wb = new ExcelJS.Workbook();
44
+ await wb.xlsx.readFile('file.xlsx');
45
+ const sheet = wb.getWorksheet(1);
46
+
47
+ For .pptx use officeparser:
48
+ const officeparser = require('officeparser');
49
+ const text = await officeparser.parseOfficeAsync('file.pptx');
50
+
51
+ ## Index file
52
+ Write the index to \`.llm-kb/wiki/index.md\`.
53
+
54
+ The index should be a markdown file with:
55
+ 1. A title and last-updated timestamp
56
+ 2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
57
+ 3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
58
+ 4. Total word count across all sources
59
+ `;
60
+ }
61
+ async function buildIndex(folder, sourcesDir, onOutput, authStorage, modelId) {
62
+ const files = await readdir(sourcesDir);
63
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
64
+ const jsonFiles = files.filter((f) => f.endsWith(".json") && !f.endsWith(".pages"));
65
+ if (mdFiles.length === 0) {
66
+ throw new Error("No source files found to index");
67
+ }
68
+ const snippets = [];
69
+ const total = mdFiles.length;
70
+ const cols = process.stdout.columns || 80;
71
+ for (let i = 0; i < mdFiles.length; i++) {
72
+ const f = mdFiles[i];
73
+ const pct = Math.round((i + 1) / total * 100);
74
+ const name = f.length > 30 ? f.slice(0, 27) + "..." : f;
75
+ process.stdout.write(`\r Reading sources... ${i + 1}/${total} (${pct}%) ${name}`.padEnd(cols));
76
+ try {
77
+ const content = await readFile(join(sourcesDir, f), "utf-8");
78
+ const preview = content.slice(0, 800);
79
+ const jsonName = f.replace(/\.md$/, ".json");
80
+ let pages = 0;
81
+ if (jsonFiles.includes(jsonName)) {
82
+ try {
83
+ const jsonHead = await readFile(join(sourcesDir, jsonName), "utf-8");
84
+ const match = jsonHead.match(/"totalPages"\s*:\s*(\d+)/);
85
+ if (match) pages = parseInt(match[1], 10);
86
+ } catch {
87
+ }
88
+ }
89
+ snippets.push(`### ${f}${pages > 0 ? ` (${pages} pages)` : ""}
90
+ ${preview}
91
+ `);
92
+ } catch {
93
+ snippets.push(`### ${f}
94
+ (could not read)
95
+ `);
96
+ }
97
+ }
98
+ process.stdout.write(`\r${"".padEnd(cols)}\r`);
99
+ process.stdout.write(` Read ${mdFiles.length} source previews
100
+ `);
101
+ const BATCH_SIZE = 100;
102
+ const batches = [];
103
+ for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
104
+ batches.push(snippets.slice(i, i + BATCH_SIZE));
105
+ }
106
+ const agentsContent = buildAgentsContent(sourcesDir, files);
107
+ const nodeModulesPath = getNodeModulesPath();
108
+ process.env.NODE_PATH = nodeModulesPath;
109
+ const candidates = modelId ? await resolveModelCandidates(modelId, authStorage, "index") : [];
110
+ if (modelId && candidates.length === 0) {
111
+ throw new Error(`No usable model found for '${modelId}'. Configure Anthropic, OpenRouter, or OpenAI credentials.`);
112
+ }
113
+ const indexPath = join(sourcesDir, "..", "index.md");
114
+ const attemptCandidates = candidates.length > 0 ? candidates : [{ provider: "default", candidateId: "default", model: void 0 }];
115
+ const batchResults = [];
116
+ for (let b = 0; b < batches.length; b++) {
117
+ const batch = batches[b];
118
+ const batchLabel = batches.length > 1 ? ` (batch ${b + 1}/${batches.length})` : "";
119
+ process.stdout.write(` Generating index${batchLabel}...
120
+ `);
121
+ const batchContent = batch.join("\n---\n\n");
122
+ const prompt = batches.length === 1 ? `Here are previews of all ${mdFiles.length} source files in this knowledge base. Generate a summary table in markdown.
123
+
124
+ ${batchContent}
125
+
126
+ Write .llm-kb/wiki/index.md with:
127
+ 1. Title and last-updated timestamp
128
+ 2. A markdown table with columns: Source, Type, Pages, Summary, Key Topics
129
+ 3. One row per source with a one-line summary
130
+ 4. Total count at the bottom
131
+
132
+ Do NOT read any files \u2014 all the data you need is above.` : b < batches.length - 1 ? `Here are previews of source files ${b * BATCH_SIZE + 1}-${Math.min((b + 1) * BATCH_SIZE, mdFiles.length)} of ${mdFiles.length}. Generate summary table rows ONLY (no header, no footer).
133
+
134
+ ${batchContent}
135
+
136
+ Output ONLY markdown table rows \u2014 one per source. Columns: Source, Type, Pages, Summary, Key Topics.
137
+ Do NOT read any files.` : `Here are the remaining source file previews (${b * BATCH_SIZE + 1}-${mdFiles.length} of ${mdFiles.length}).
138
+
139
+ ${batchContent}
140
+
141
+ Output ONLY markdown table rows for these sources. Columns: Source, Type, Pages, Summary, Key Topics.
142
+
143
+ Then combine with the previous batch results below and write the final .llm-kb/wiki/index.md:
144
+
145
+ Previous batch rows:
146
+ ${batchResults.join("\n")}
147
+
148
+ Write the complete index.md with title, timestamp, full table (header + all rows), and total count.`;
149
+ let lastError;
150
+ for (let i = 0; i < attemptCandidates.length; i++) {
151
+ const candidate = attemptCandidates[i];
152
+ const loader = new DefaultResourceLoader({
153
+ cwd: folder,
154
+ agentsFilesOverride: (current) => ({
155
+ agentsFiles: [
156
+ ...current.agentsFiles,
157
+ { path: ".llm-kb/AGENTS.md", content: agentsContent }
158
+ ]
159
+ })
160
+ });
161
+ await loader.reload();
162
+ const { session } = await createAgentSession({
163
+ cwd: folder,
164
+ resourceLoader: loader,
165
+ tools: [
166
+ createReadTool(folder),
167
+ createBashTool(folder),
168
+ createWriteTool(folder)
169
+ ],
170
+ sessionManager: await createKBSession(folder),
171
+ settingsManager: SettingsManager.inMemory({
172
+ compaction: { enabled: false }
173
+ }),
174
+ ...authStorage ? { authStorage } : {},
175
+ ...candidate.model ? { model: candidate.model } : {}
176
+ });
177
+ if (onOutput) {
178
+ session.subscribe((event) => {
179
+ if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
180
+ onOutput(event.assistantMessageEvent.delta);
181
+ }
182
+ });
183
+ }
184
+ session.setSessionName(`index: ${(/* @__PURE__ */ new Date()).toISOString()}`);
185
+ try {
186
+ await session.prompt(prompt);
187
+ if (batches.length === 1 || b === batches.length - 1) {
188
+ const content = await readFile(indexPath, "utf-8");
189
+ session.dispose();
190
+ return content;
191
+ } else {
192
+ const messages = session.state.messages;
193
+ const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
194
+ const text = lastAssistant?.content?.filter((b2) => b2.type === "text").map((b2) => b2.text).join("") ?? "";
195
+ batchResults.push(text);
196
+ session.dispose();
197
+ break;
198
+ }
199
+ } catch (error) {
200
+ lastError = error;
201
+ session.dispose();
202
+ const next = attemptCandidates[i + 1];
203
+ if (next) {
204
+ const detail = error instanceof Error ? error.message : String(error);
205
+ console.warn(` Index attempt failed on ${candidate.provider}:${candidate.model?.id ?? candidate.candidateId} (${detail}). Retrying with ${next.provider}:${next.model?.id ?? next.candidateId}...`);
206
+ continue;
207
+ }
208
+ if (lastError instanceof Error) throw lastError;
209
+ throw new Error("Agent did not create index.md");
210
+ }
211
+ }
212
+ }
213
+ throw new Error("Agent did not create index.md");
214
+ }
215
+
216
+ export {
217
+ buildIndex
218
+ };