llm-kb 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/PHASE2_SPEC.md ADDED
@@ -0,0 +1,274 @@
1
+ # llm-kb — Phase 2: Query Engine
2
+
3
+ > **Goal:** `llm-kb query "question" --folder ./research` works from the terminal.
4
+ > **Depends on:** Phase 1 (ingest pipeline — complete)
5
+ > **Blog:** Part 3 of the series
6
+
7
+ ---
8
+
9
+ ## What Success Looks Like
10
+
11
+ ```bash
12
+ llm-kb query "what are the reserve requirements?" --folder ./research
13
+ ```
14
+
15
+ ```
16
+ Reading index... 12 sources
17
+ Selected: reserve-policy.md, q3-results.md, board-deck.md
18
+ Reading 3 files...
19
+
20
+ Reserve requirements are defined in two documents:
21
+
22
+ 1. **Reserve Policy** (reserve-policy.md, p.3): Minimum reserve
23
+ ratio of 12% of total assets, reviewed quarterly.
24
+
25
+ 2. **Q3 Results** (q3-results.md, p.8): Current reserve ratio
26
+ is 14.2%, above the 12% minimum. Management notes this
27
+ provides a 2.2% buffer against regulatory changes.
28
+
29
+ Sources: reserve-policy.md (p.3), q3-results.md (p.8)
30
+ ```
31
+
32
+ That's the shape: file selection visible, citations inline, synthesis across sources.
33
+
34
+ ---
35
+
36
+ ## Two Modes
37
+
38
+ ### Query (read-only)
39
+
40
+ ```bash
41
+ llm-kb query "what changed in Q4 guidance?" --folder ./research
42
+ ```
43
+
44
+ The agent reads `index.md`, picks files, reads them, answers. **Cannot modify anything.** Tools: `createReadTool` only.
45
+
46
+ ### Research (read + write)
47
+
48
+ ```bash
49
+ llm-kb query "compare pipeline coverage to revenue target" --folder ./research --save
50
+ ```
51
+
52
+ Same as query, but the answer is also saved to `.llm-kb/wiki/outputs/`. The watcher detects the new file and re-indexes. Next query can reference the analysis.
53
+
54
+ Tools: `createReadTool` + `createWriteTool` + `createBashTool`.
55
+
56
+ The `--save` flag switches from query mode to research mode.
57
+
58
+ ---
59
+
60
+ ## Architecture
61
+
62
+ Same pattern as the indexer — a Pi SDK session with different tools:
63
+
64
+ ```typescript
65
+ export async function query(
66
+ folder: string,
67
+ question: string,
68
+ options: { save?: boolean }
69
+ ) {
70
+ const sourcesDir = join(folder, ".llm-kb", "wiki", "sources");
71
+ const outputsDir = join(folder, ".llm-kb", "wiki", "outputs");
72
+
73
+ // Build AGENTS.md for query context
74
+ const agentsContent = buildQueryAgents(sourcesDir, options.save);
75
+
76
+ const loader = new DefaultResourceLoader({
77
+ cwd: folder,
78
+ agentsFilesOverride: (current) => ({
79
+ agentsFiles: [
80
+ ...current.agentsFiles,
81
+ { path: ".llm-kb/AGENTS.md", content: agentsContent },
82
+ ],
83
+ }),
84
+ });
85
+ await loader.reload();
86
+
87
+ const tools = [createReadTool(folder)];
88
+ if (options.save) {
89
+ tools.push(createWriteTool(folder), createBashTool(folder));
90
+ }
91
+
92
+ const { session } = await createAgentSession({
93
+ cwd: folder,
94
+ resourceLoader: loader,
95
+ tools,
96
+ sessionManager: SessionManager.inMemory(),
97
+ settingsManager: SettingsManager.inMemory({
98
+ compaction: { enabled: false },
99
+ }),
100
+ });
101
+
102
+ // Stream output to terminal
103
+ session.subscribe((event) => {
104
+ if (
105
+ event.type === "message_update" &&
106
+ event.assistantMessageEvent.type === "text_delta"
107
+ ) {
108
+ process.stdout.write(event.assistantMessageEvent.delta);
109
+ }
110
+ });
111
+
112
+ await session.prompt(question);
113
+ session.dispose();
114
+ }
115
+ ```
116
+
117
+ ### The Query AGENTS.md
118
+
119
+ The injected `AGENTS.md` for query mode tells the agent:
120
+
121
+ ```markdown
122
+ # llm-kb Knowledge Base — Query Mode
123
+
124
+ ## How to answer questions
125
+
126
+ 1. FIRST read .llm-kb/wiki/index.md to see all available sources
127
+ 2. Based on the question, select the most relevant source files
128
+ 3. Read those files in full (not just the first 500 chars)
129
+ 4. Answer with inline citations: (filename, page/section)
130
+ 5. If the answer requires cross-referencing, read additional files
131
+ 6. Prefer primary sources over previous analyses in outputs/
132
+
133
+ ## Available sources
134
+ (dynamically generated list of .md files in sources/)
135
+
136
+ ## Available libraries for non-PDF files
137
+ - exceljs — for .xlsx/.xls
138
+ - mammoth — for .docx
139
+ - officeparser — for .pptx
140
+ Write a quick Node.js script via bash to read these when needed.
141
+
142
+ ## Rules
143
+ - Always cite sources with filename and page number
144
+ - If you can't find the answer, say so — don't hallucinate
145
+ - Read the FULL file, not just the beginning
146
+ ```
147
+
148
+ For research mode, add:
149
+
150
+ ```markdown
151
+ ## Research Mode
152
+ You can save your analysis to .llm-kb/wiki/outputs/.
153
+ Use a descriptive filename (e.g., coverage-analysis.md).
154
+ The file watcher will detect it and update the index.
155
+ ```
156
+
157
+ ---
158
+
159
+ ## CLI Integration
160
+
161
+ Add `query` command to Commander:
162
+
163
+ ```typescript
164
+ program
165
+ .command("query")
166
+ .description("Ask a question across your knowledge base")
167
+ .argument("<question>", "Your question")
168
+ .option("--folder <path>", "Path to document folder", ".")
169
+ .option("--save", "Save the answer to wiki/outputs/ (research mode)")
170
+ .action(async (question, options) => {
171
+ const folder = resolve(options.folder);
172
+
173
+ // Check if .llm-kb exists
174
+ if (!existsSync(join(folder, ".llm-kb"))) {
175
+ console.error(chalk.red("No knowledge base found. Run 'llm-kb run' first."));
176
+ process.exit(1);
177
+ }
178
+
179
+ await query(folder, question, { save: options.save });
180
+ });
181
+ ```
182
+
183
+ ---
184
+
185
+ ## Trace Logging (Prep for Eval — Phase 4)
186
+
187
+ Every query gets logged to `.llm-kb/traces/`:
188
+
189
+ ```json
190
+ {
191
+ "timestamp": "2026-04-05T14:30:00Z",
192
+ "question": "what are the reserve requirements?",
193
+ "mode": "query",
194
+ "filesRead": ["index.md", "reserve-policy.md", "q3-results.md"],
195
+ "filesAvailable": ["reserve-policy.md", "q3-results.md", "board-deck.md", "pipeline.md"],
196
+ "answer": "Reserve requirements are defined in two documents...",
197
+ "citations": [
198
+ { "file": "reserve-policy.md", "location": "p.3", "claim": "Minimum reserve ratio of 12%" },
199
+ { "file": "q3-results.md", "location": "p.8", "claim": "Current reserve ratio is 14.2%" }
200
+ ],
201
+ "tokensUsed": 3800,
202
+ "durationMs": 4200,
203
+ "model": "claude-sonnet-4"
204
+ }
205
+ ```
206
+
207
+ Implementation: wrap the session to intercept tool calls and capture which files were read. Save trace JSON after session completes.
208
+
209
+ The eval agent (Phase 4) reads these traces to check citations against sources.
210
+
211
+ ---
212
+
213
+ ## Streaming Output
214
+
215
+ Terminal query should stream — the user sees the answer appear word by word, not wait for the full response. The `session.subscribe()` handler writes deltas to stdout.
216
+
217
+ For the `run` command (when we add query to the web UI in Phase 3), streaming goes through the Vercel AI SDK protocol.
218
+
219
+ ---
220
+
221
+ ## Constraints
222
+
223
+ 1. **Query must work without the web server running.** `llm-kb query` is standalone — it reads `.llm-kb/` directly. No dependency on `llm-kb run`.
224
+
225
+ 2. **Read-only by default.** Query mode cannot modify files. Only `--save` enables write.
226
+
227
+ 3. **Index must exist.** If `.llm-kb/wiki/index.md` doesn't exist, error out: "No knowledge base found. Run 'llm-kb run' first."
228
+
229
+ 4. **Graceful on empty results.** If the agent can't find relevant files, it should say "I couldn't find sources relevant to this question" — not hallucinate.
230
+
231
+ 5. **Token-conscious.** The agent reads index.md (~200 tokens for 50 sources) first, then only the files it selects (3-7 typically). Don't read all sources.
232
+
233
+ ---
234
+
235
+ ## Build Order (Slices)
236
+
237
+ | Slice | What | Demoable? |
238
+ |---|---|---|
239
+ | 1 | `query` command + read-only session + streaming | ✅ Ask questions, get answers |
240
+ | 2 | `--save` flag + research mode + write to outputs/ | ✅ Answers compound in wiki |
241
+ | 3 | Trace logging (JSON per query) | Prep for eval |
242
+ | 4 | `status` command (show KB stats) | ✅ Nice-to-have |
243
+
244
+ ---
245
+
246
+ ## Definition of Done
247
+
248
+ - [ ] `llm-kb query "question" --folder ./research` returns a cited answer
249
+ - [ ] Answer streams to terminal (word by word, not all at once)
250
+ - [ ] Agent reads index.md first, then selects and reads relevant source files
251
+ - [ ] `--save` flag saves the answer to `.llm-kb/wiki/outputs/`
252
+ - [ ] Saved answers get detected by watcher and re-indexed
253
+ - [ ] Query traces logged to `.llm-kb/traces/` as JSON
254
+ - [ ] Error if no `.llm-kb/` exists ("run 'llm-kb run' first")
255
+ - [ ] Non-PDF files (Excel, Word) readable by agent via bundled libraries
256
+ - [ ] Blog Part 3 written with real terminal output
257
+
258
+ ---
259
+
260
+ ## What This Enables
261
+
262
+ With query working, the demo becomes:
263
+
264
+ ```bash
265
+ npx llm-kb run ./my-documents # ingest
266
+ llm-kb query "what changed?" # ask
267
+ llm-kb query "compare X vs Y" --save # research (compounds)
268
+ ```
269
+
270
+ Three commands. Ingest → Query → Research. That's a product, not a script.
271
+
272
+ ---
273
+
274
+ *Phase 2 spec written April 4, 2026. DeltaXY.*
package/README.md CHANGED
@@ -22,12 +22,14 @@ Pi handles the LLM auth — no separate API key configuration needed.
22
22
 
23
23
  ## What It Does
24
24
 
25
- ```
25
+ ### Ingest
26
+
27
+ ```bash
26
28
  llm-kb run ./my-documents
27
29
  ```
28
30
 
29
31
  ```
30
- llm-kb v0.0.1
32
+ llm-kb v0.2.0
31
33
 
32
34
  Scanning ./my-documents...
33
35
  Found 9 files (9 PDF)
@@ -46,6 +48,24 @@ Scanning ./my-documents...
46
48
  3. **Builds an index** — Pi SDK agent reads all sources and writes `index.md` with summaries
47
49
  4. **Watches** — drop a new PDF in while it's running, it gets parsed and indexed automatically
48
50
 
51
+ ### Query
52
+
53
+ ```bash
54
+ # From inside the documents folder (auto-detects .llm-kb/)
55
+ llm-kb query "what are the key findings?"
56
+
57
+ # From anywhere, with explicit folder
58
+ llm-kb query "compare Q3 vs Q4" --folder ./my-documents
59
+
60
+ # Research mode — saves the answer to wiki/outputs/ and re-indexes
61
+ llm-kb query "summarize all revenue data" --save
62
+ ```
63
+
64
+ The agent reads `index.md`, selects relevant files, and streams a cited answer to the terminal.
65
+
66
+ **Query mode** — read-only. The agent can only read your files.
67
+ **Research mode** (`--save`) — read + write + bash. The agent saves answers to `outputs/`, re-indexes, and can write scripts to read Excel/Word files. Answers compound over time.
68
+
49
69
  ### What It Creates
50
70
 
51
71
  ```
@@ -0,0 +1,118 @@
1
+ // src/indexer.ts
2
+ import {
3
+ createAgentSession,
4
+ createBashTool,
5
+ createReadTool,
6
+ createWriteTool,
7
+ DefaultResourceLoader,
8
+ SessionManager,
9
+ SettingsManager
10
+ } from "@mariozechner/pi-coding-agent";
11
+ import { readdir, readFile } from "fs/promises";
12
+ import { join, dirname } from "path";
13
+ import { fileURLToPath } from "url";
14
+ var __filename = fileURLToPath(import.meta.url);
15
+ var __dirname = dirname(__filename);
16
+ function getNodeModulesPath() {
17
+ let dir = __dirname;
18
+ for (let i = 0; i < 5; i++) {
19
+ const candidate = join(dir, "node_modules");
20
+ try {
21
+ return candidate;
22
+ } catch {
23
+ dir = dirname(dir);
24
+ }
25
+ }
26
+ return join(process.cwd(), "node_modules");
27
+ }
28
+ function buildAgentsContent(sourcesDir, files) {
29
+ const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => ` - ${f}`).join("\n");
30
+ return `# llm-kb Knowledge Base
31
+
32
+ ## How to access documents
33
+
34
+ ### PDFs (pre-parsed)
35
+ PDFs have been parsed to markdown with bounding boxes.
36
+ Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
37
+
38
+ Available parsed sources:
39
+ ${sourceList}
40
+
41
+ ### Other file types (Excel, Word, PowerPoint, CSV, images)
42
+ You have bash and read tools. These libraries are pre-installed and available:
43
+ - **exceljs** \u2014 for .xlsx/.xls files
44
+ - **mammoth** \u2014 for .docx files
45
+ - **officeparser** \u2014 for .pptx files
46
+ - **csv-parse** \u2014 built into Node.js, use fs + split for .csv
47
+
48
+ Write a quick Node.js script to extract content when needed.
49
+
50
+ ## Index file
51
+ Write the index to \`.llm-kb/wiki/index.md\`.
52
+
53
+ The index should be a markdown file with:
54
+ 1. A title and last-updated timestamp
55
+ 2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
56
+ 3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
57
+ 4. Total word count across all sources
58
+ `;
59
+ }
60
+ async function buildIndex(folder, sourcesDir, onOutput) {
61
+ const files = await readdir(sourcesDir);
62
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
63
+ if (mdFiles.length === 0) {
64
+ throw new Error("No source files found to index");
65
+ }
66
+ const agentsContent = buildAgentsContent(sourcesDir, files);
67
+ const nodeModulesPath = getNodeModulesPath();
68
+ process.env.NODE_PATH = nodeModulesPath;
69
+ const loader = new DefaultResourceLoader({
70
+ cwd: folder,
71
+ agentsFilesOverride: (current) => ({
72
+ agentsFiles: [
73
+ ...current.agentsFiles,
74
+ { path: ".llm-kb/AGENTS.md", content: agentsContent }
75
+ ]
76
+ })
77
+ });
78
+ await loader.reload();
79
+ const { session } = await createAgentSession({
80
+ cwd: folder,
81
+ resourceLoader: loader,
82
+ tools: [
83
+ createReadTool(folder),
84
+ createBashTool(folder),
85
+ createWriteTool(folder)
86
+ ],
87
+ sessionManager: SessionManager.inMemory(),
88
+ settingsManager: SettingsManager.inMemory({
89
+ compaction: { enabled: false }
90
+ })
91
+ });
92
+ if (onOutput) {
93
+ session.subscribe((event) => {
94
+ if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
95
+ onOutput(event.assistantMessageEvent.delta);
96
+ }
97
+ });
98
+ }
99
+ const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
100
+ Then write .llm-kb/wiki/index.md with a summary table of all sources.
101
+
102
+ Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
103
+ Add a total word count estimate at the bottom.`;
104
+ await session.prompt(prompt);
105
+ const indexPath = join(sourcesDir, "..", "index.md");
106
+ try {
107
+ const content = await readFile(indexPath, "utf-8");
108
+ session.dispose();
109
+ return content;
110
+ } catch {
111
+ session.dispose();
112
+ throw new Error("Agent did not create index.md");
113
+ }
114
+ }
115
+
116
+ export {
117
+ buildIndex
118
+ };
package/bin/cli.js CHANGED
@@ -1,4 +1,7 @@
1
1
  #!/usr/bin/env node
2
+ import {
3
+ buildIndex
4
+ } from "./chunk-MYQ36JJB.js";
2
5
 
3
6
  // src/cli.ts
4
7
  import { Command } from "commander";
@@ -123,121 +126,6 @@ ${p.text}`).join("\n\n---\n\n");
123
126
  };
124
127
  }
125
128
 
126
- // src/indexer.ts
127
- import {
128
- createAgentSession,
129
- createBashTool,
130
- createReadTool,
131
- createWriteTool,
132
- DefaultResourceLoader,
133
- SessionManager,
134
- SettingsManager
135
- } from "@mariozechner/pi-coding-agent";
136
- import { readdir as readdir2, readFile } from "fs/promises";
137
- import { join as join2, dirname } from "path";
138
- import { fileURLToPath } from "url";
139
- var __filename = fileURLToPath(import.meta.url);
140
- var __dirname = dirname(__filename);
141
- function getNodeModulesPath() {
142
- let dir = __dirname;
143
- for (let i = 0; i < 5; i++) {
144
- const candidate = join2(dir, "node_modules");
145
- try {
146
- return candidate;
147
- } catch {
148
- dir = dirname(dir);
149
- }
150
- }
151
- return join2(process.cwd(), "node_modules");
152
- }
153
- function buildAgentsContent(sourcesDir, files) {
154
- const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => ` - ${f}`).join("\n");
155
- return `# llm-kb Knowledge Base
156
-
157
- ## How to access documents
158
-
159
- ### PDFs (pre-parsed)
160
- PDFs have been parsed to markdown with bounding boxes.
161
- Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
162
-
163
- Available parsed sources:
164
- ${sourceList}
165
-
166
- ### Other file types (Excel, Word, PowerPoint, CSV, images)
167
- You have bash and read tools. These libraries are pre-installed and available:
168
- - **exceljs** \u2014 for .xlsx/.xls files
169
- - **mammoth** \u2014 for .docx files
170
- - **officeparser** \u2014 for .pptx files
171
- - **csv-parse** \u2014 built into Node.js, use fs + split for .csv
172
-
173
- Write a quick Node.js script to extract content when needed.
174
-
175
- ## Index file
176
- Write the index to \`.llm-kb/wiki/index.md\`.
177
-
178
- The index should be a markdown file with:
179
- 1. A title and last-updated timestamp
180
- 2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
181
- 3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
182
- 4. Total word count across all sources
183
- `;
184
- }
185
- async function buildIndex(folder, sourcesDir, onOutput) {
186
- const files = await readdir2(sourcesDir);
187
- const mdFiles = files.filter((f) => f.endsWith(".md"));
188
- if (mdFiles.length === 0) {
189
- throw new Error("No source files found to index");
190
- }
191
- const agentsContent = buildAgentsContent(sourcesDir, files);
192
- const nodeModulesPath = getNodeModulesPath();
193
- process.env.NODE_PATH = nodeModulesPath;
194
- const loader = new DefaultResourceLoader({
195
- cwd: folder,
196
- agentsFilesOverride: (current) => ({
197
- agentsFiles: [
198
- ...current.agentsFiles,
199
- { path: ".llm-kb/AGENTS.md", content: agentsContent }
200
- ]
201
- })
202
- });
203
- await loader.reload();
204
- const { session } = await createAgentSession({
205
- cwd: folder,
206
- resourceLoader: loader,
207
- tools: [
208
- createReadTool(folder),
209
- createBashTool(folder),
210
- createWriteTool(folder)
211
- ],
212
- sessionManager: SessionManager.inMemory(),
213
- settingsManager: SettingsManager.inMemory({
214
- compaction: { enabled: false }
215
- })
216
- });
217
- if (onOutput) {
218
- session.subscribe((event) => {
219
- if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
220
- onOutput(event.assistantMessageEvent.delta);
221
- }
222
- });
223
- }
224
- const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
225
- Then write .llm-kb/wiki/index.md with a summary table of all sources.
226
-
227
- Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
228
- Add a total word count estimate at the bottom.`;
229
- await session.prompt(prompt);
230
- const indexPath = join2(sourcesDir, "..", "index.md");
231
- try {
232
- const content = await readFile(indexPath, "utf-8");
233
- session.dispose();
234
- return content;
235
- } catch {
236
- session.dispose();
237
- throw new Error("Agent did not create index.md");
238
- }
239
- }
240
-
241
129
  // src/watcher.ts
242
130
  import { watch } from "chokidar";
243
131
  import { extname as extname2, basename as basename2 } from "path";
@@ -307,18 +195,145 @@ function startWatcher({ folder, sourcesDir, debounceMs = 2e3 }) {
307
195
  return watcher;
308
196
  }
309
197
 
310
- // src/cli.ts
198
+ // src/query.ts
199
+ import {
200
+ createAgentSession,
201
+ createBashTool,
202
+ createReadTool,
203
+ createWriteTool,
204
+ DefaultResourceLoader,
205
+ SessionManager,
206
+ SettingsManager
207
+ } from "@mariozechner/pi-coding-agent";
208
+ import { readdir as readdir2, mkdir as mkdir2 } from "fs/promises";
209
+ import { join as join3, dirname } from "path";
210
+ import { fileURLToPath } from "url";
211
+ var __dirname = dirname(fileURLToPath(import.meta.url));
212
+ function getNodeModulesPath() {
213
+ let dir = __dirname;
214
+ for (let i = 0; i < 5; i++) {
215
+ const candidate = join3(dir, "node_modules");
216
+ try {
217
+ return candidate;
218
+ } catch {
219
+ dir = dirname(dir);
220
+ }
221
+ }
222
+ return join3(process.cwd(), "node_modules");
223
+ }
224
+ function buildQueryAgents(sourceFiles, save) {
225
+ const sourceList = sourceFiles.map((f) => ` - ${f}`).join("\n");
226
+ let content = `# llm-kb Knowledge Base \u2014 Query Mode
227
+
228
+ ## How to answer questions
229
+
230
+ 1. FIRST read .llm-kb/wiki/index.md to understand all available sources
231
+ 2. Based on the question, select the most relevant source files (usually 2-5)
232
+ 3. Read those source files in full from .llm-kb/wiki/sources/
233
+ 4. Answer with inline citations: (filename, page number)
234
+ 5. If the answer requires cross-referencing multiple files, read additional ones
235
+ 6. If you can't find the answer, say so \u2014 don't hallucinate
236
+
237
+ ## Available parsed sources
238
+ ${sourceList}
239
+
240
+ ## Non-PDF files
241
+ If the user's folder has Excel, Word, or PowerPoint files, these libraries are available:
242
+ - **exceljs** \u2014 for .xlsx/.xls files
243
+ - **mammoth** \u2014 for .docx files
244
+ - **officeparser** \u2014 for .pptx files
245
+ Write a quick Node.js script via bash to read them.
246
+
247
+ ## Rules
248
+ - Always cite sources with filename and page number
249
+ - Read the FULL source file, not just the beginning
250
+ - Prefer primary sources over previous analyses
251
+ `;
252
+ if (save) {
253
+ content += `
254
+ ## Research Mode
255
+ Save your analysis to .llm-kb/wiki/outputs/ with a descriptive filename (e.g., comparison-analysis.md).
256
+ Include the question at the top and all citations.
257
+ `;
258
+ }
259
+ return content;
260
+ }
261
+ async function query(folder, question, options) {
262
+ const sourcesDir = join3(folder, ".llm-kb", "wiki", "sources");
263
+ const files = await readdir2(sourcesDir);
264
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
265
+ if (mdFiles.length === 0) {
266
+ throw new Error("No sources found. Run 'llm-kb run' first to parse documents.");
267
+ }
268
+ if (options.save) {
269
+ await mkdir2(join3(folder, ".llm-kb", "wiki", "outputs"), { recursive: true });
270
+ }
271
+ process.env.NODE_PATH = getNodeModulesPath();
272
+ const agentsContent = buildQueryAgents(mdFiles, !!options.save);
273
+ const loader = new DefaultResourceLoader({
274
+ cwd: folder,
275
+ agentsFilesOverride: (current) => ({
276
+ agentsFiles: [
277
+ ...current.agentsFiles,
278
+ { path: ".llm-kb/AGENTS.md", content: agentsContent }
279
+ ]
280
+ })
281
+ });
282
+ await loader.reload();
283
+ const tools = [createReadTool(folder)];
284
+ if (options.save) {
285
+ tools.push(createBashTool(folder), createWriteTool(folder));
286
+ }
287
+ const { session } = await createAgentSession({
288
+ cwd: folder,
289
+ resourceLoader: loader,
290
+ tools,
291
+ sessionManager: SessionManager.inMemory(),
292
+ settingsManager: SettingsManager.inMemory({
293
+ compaction: { enabled: false }
294
+ })
295
+ });
296
+ session.subscribe((event) => {
297
+ if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
298
+ process.stdout.write(event.assistantMessageEvent.delta);
299
+ }
300
+ });
301
+ await session.prompt(question);
302
+ console.log();
303
+ session.dispose();
304
+ if (options.save) {
305
+ const { buildIndex: buildIndex2 } = await import("./indexer-LSYSZXZX.js");
306
+ await buildIndex2(folder, sourcesDir);
307
+ }
308
+ }
309
+
310
+ // src/resolve-kb.ts
311
311
  import { existsSync } from "fs";
312
- import { mkdir as mkdir2 } from "fs/promises";
313
- import { resolve as resolve2, join as join4 } from "path";
312
+ import { resolve as resolve2, join as join4, dirname as dirname2 } from "path";
313
+ function resolveKnowledgeBase(startDir) {
314
+ let dir = resolve2(startDir);
315
+ while (true) {
316
+ if (existsSync(join4(dir, ".llm-kb"))) {
317
+ return dir;
318
+ }
319
+ const parent = dirname2(dir);
320
+ if (parent === dir) return null;
321
+ dir = parent;
322
+ }
323
+ }
324
+
325
+ // src/cli.ts
326
+ import { existsSync as existsSync2 } from "fs";
327
+ import { mkdir as mkdir3 } from "fs/promises";
328
+ import { resolve as resolve3, join as join5 } from "path";
314
329
  import chalk2 from "chalk";
315
330
  var program = new Command();
316
- program.name("llm-kb").description("Drop files into a folder. Get a knowledge base you can query.").version("0.1.0");
331
+ program.name("llm-kb").description("Drop files into a folder. Get a knowledge base you can query.").version("0.2.0");
317
332
  program.command("run").description("Scan, parse, index, and watch a folder").argument("<folder>", "Path to your documents folder").action(async (folder) => {
318
333
  console.log(`
319
- ${chalk2.bold("llm-kb")} v0.1.0
334
+ ${chalk2.bold("llm-kb")} v0.2.0
320
335
  `);
321
- if (!existsSync(folder)) {
336
+ if (!existsSync2(folder)) {
322
337
  console.error(chalk2.red(`Error: Folder not found: ${folder}`));
323
338
  process.exit(1);
324
339
  }
@@ -331,16 +346,16 @@ ${chalk2.bold("llm-kb")} v0.1.0
331
346
  const pdfs = files.filter((f) => f.ext === ".pdf");
332
347
  console.log(` Found ${chalk2.bold(files.length.toString())} files (${summarize(files)})`);
333
348
  if (pdfs.length === 0) return;
334
- const root = resolve2(folder);
335
- const sourcesDir = join4(root, ".llm-kb", "wiki", "sources");
336
- await mkdir2(sourcesDir, { recursive: true });
349
+ const root = resolve3(folder);
350
+ const sourcesDir = join5(root, ".llm-kb", "wiki", "sources");
351
+ await mkdir3(sourcesDir, { recursive: true });
337
352
  let parsed = 0;
338
353
  let skipped = 0;
339
354
  let failed = 0;
340
355
  const errors = [];
341
356
  for (let i = 0; i < pdfs.length; i++) {
342
357
  const pdf = pdfs[i];
343
- const fullPath = join4(root, pdf.path);
358
+ const fullPath = join5(root, pdf.path);
344
359
  const progress = ` Parsing... ${i + 1}/${pdfs.length} \u2014 ${pdf.name}`;
345
360
  process.stdout.write(`\r${progress.padEnd(80)}`);
346
361
  try {
@@ -378,4 +393,17 @@ ${chalk2.bold("llm-kb")} v0.1.0
378
393
  Watching for new files... (Ctrl+C to stop)`));
379
394
  startWatcher({ folder: root, sourcesDir });
380
395
  });
396
+ program.command("query").description("Ask a question across your knowledge base").argument("<question>", "Your question").option("--folder <path>", "Path to document folder (auto-detects if omitted)").option("--save", "Save the answer to wiki/outputs/ (research mode)").action(async (question, options) => {
397
+ const root = resolveKnowledgeBase(options.folder || process.cwd());
398
+ if (!root) {
399
+ console.error(chalk2.red("No knowledge base found. Run 'llm-kb run <folder>' first."));
400
+ process.exit(1);
401
+ }
402
+ try {
403
+ await query(root, question, { save: options.save });
404
+ } catch (err) {
405
+ console.error(chalk2.red(err.message));
406
+ process.exit(1);
407
+ }
408
+ });
381
409
  program.parse();
@@ -0,0 +1,6 @@
1
+ import {
2
+ buildIndex
3
+ } from "./chunk-MYQ36JJB.js";
4
+ export {
5
+ buildIndex
6
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm-kb",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "LLM-powered knowledge base. Drop documents, build a wiki, ask questions. Inspired by Karpathy.",
5
5
  "bin": {
6
6
  "llm-kb": "./bin/cli.js"
package/plan.md CHANGED
@@ -45,7 +45,11 @@ Config file has no readers yet. Deferred to Phase 2/3. README updated instead.
45
45
  - OCR via env var (local Tesseract or remote Azure bridge)
46
46
  - Auth via Pi SDK (zero config)
47
47
 
48
- **What's next (Phase 2):**
49
- - `llm-kb query "question" --folder ./research` — terminal query
50
- - Pi SDK agent session for Q&A with read-only tools
51
- - Research mode agent writes answers to `outputs/`
48
+ **Phase 2 complete ✅:**
49
+ - `llm-kb query "question"` — auto-detects KB, streams cited answers
50
+ - `--save` flag research mode, saves to `outputs/`, re-indexes
51
+ - Query mode is read-only (read tool only). Research mode adds bash + write.
52
+
53
+ **Deferred to Phase 4:**
54
+ - Trace logging (JSON per query: question, filesRead, citations, tokens, duration)
55
+ - Needed for eval, but no eval system yet to consume traces
package/src/cli.ts CHANGED
@@ -5,6 +5,8 @@ import { scan, summarize } from "./scan.js";
5
5
  import { parsePDF } from "./pdf.js";
6
6
  import { buildIndex } from "./indexer.js";
7
7
  import { startWatcher } from "./watcher.js";
8
+ import { query } from "./query.js";
9
+ import { resolveKnowledgeBase } from "./resolve-kb.js";
8
10
  import { existsSync } from "node:fs";
9
11
  import { mkdir } from "node:fs/promises";
10
12
  import { resolve, join } from "node:path";
@@ -15,14 +17,14 @@ const program = new Command();
15
17
  program
16
18
  .name("llm-kb")
17
19
  .description("Drop files into a folder. Get a knowledge base you can query.")
18
- .version("0.1.0");
20
+ .version("0.2.0");
19
21
 
20
22
  program
21
23
  .command("run")
22
24
  .description("Scan, parse, index, and watch a folder")
23
25
  .argument("<folder>", "Path to your documents folder")
24
26
  .action(async (folder: string) => {
25
- console.log(`\n${chalk.bold("llm-kb")} v0.1.0\n`);
27
+ console.log(`\n${chalk.bold("llm-kb")} v0.2.0\n`);
26
28
 
27
29
  if (!existsSync(folder)) {
28
30
  console.error(chalk.red(`Error: Folder not found: ${folder}`));
@@ -105,4 +107,26 @@ program
105
107
  startWatcher({ folder: root, sourcesDir });
106
108
  });
107
109
 
110
+ program
111
+ .command("query")
112
+ .description("Ask a question across your knowledge base")
113
+ .argument("<question>", "Your question")
114
+ .option("--folder <path>", "Path to document folder (auto-detects if omitted)")
115
+ .option("--save", "Save the answer to wiki/outputs/ (research mode)")
116
+ .action(async (question: string, options: { folder?: string; save?: boolean }) => {
117
+ const root = resolveKnowledgeBase(options.folder || process.cwd());
118
+
119
+ if (!root) {
120
+ console.error(chalk.red("No knowledge base found. Run 'llm-kb run <folder>' first."));
121
+ process.exit(1);
122
+ }
123
+
124
+ try {
125
+ await query(root, question, { save: options.save });
126
+ } catch (err: any) {
127
+ console.error(chalk.red(err.message));
128
+ process.exit(1);
129
+ }
130
+ });
131
+
108
132
  program.parse();
package/src/query.ts ADDED
@@ -0,0 +1,132 @@
1
+ import {
2
+ createAgentSession,
3
+ createBashTool,
4
+ createReadTool,
5
+ createWriteTool,
6
+ DefaultResourceLoader,
7
+ SessionManager,
8
+ SettingsManager,
9
+ } from "@mariozechner/pi-coding-agent";
10
+ import { readdir, mkdir } from "node:fs/promises";
11
+ import { join, dirname } from "node:path";
12
+ import { fileURLToPath } from "node:url";
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+
16
+ function getNodeModulesPath(): string {
17
+ let dir = __dirname;
18
+ for (let i = 0; i < 5; i++) {
19
+ const candidate = join(dir, "node_modules");
20
+ try { return candidate; } catch { dir = dirname(dir); }
21
+ }
22
+ return join(process.cwd(), "node_modules");
23
+ }
24
+
25
+ function buildQueryAgents(sourceFiles: string[], save: boolean): string {
26
+ const sourceList = sourceFiles.map((f) => ` - ${f}`).join("\n");
27
+
28
+ let content = `# llm-kb Knowledge Base — Query Mode
29
+
30
+ ## How to answer questions
31
+
32
+ 1. FIRST read .llm-kb/wiki/index.md to understand all available sources
33
+ 2. Based on the question, select the most relevant source files (usually 2-5)
34
+ 3. Read those source files in full from .llm-kb/wiki/sources/
35
+ 4. Answer with inline citations: (filename, page number)
36
+ 5. If the answer requires cross-referencing multiple files, read additional ones
37
+ 6. If you can't find the answer, say so — don't hallucinate
38
+
39
+ ## Available parsed sources
40
+ ${sourceList}
41
+
42
+ ## Non-PDF files
43
+ If the user's folder has Excel, Word, or PowerPoint files, these libraries are available:
44
+ - **exceljs** — for .xlsx/.xls files
45
+ - **mammoth** — for .docx files
46
+ - **officeparser** — for .pptx files
47
+ Write a quick Node.js script via bash to read them.
48
+
49
+ ## Rules
50
+ - Always cite sources with filename and page number
51
+ - Read the FULL source file, not just the beginning
52
+ - Prefer primary sources over previous analyses
53
+ `;
54
+
55
+ if (save) {
56
+ content += `
57
+ ## Research Mode
58
+ Save your analysis to .llm-kb/wiki/outputs/ with a descriptive filename (e.g., comparison-analysis.md).
59
+ Include the question at the top and all citations.
60
+ `;
61
+ }
62
+
63
+ return content;
64
+ }
65
+
66
+ export async function query(
67
+ folder: string,
68
+ question: string,
69
+ options: { save?: boolean }
70
+ ): Promise<void> {
71
+ const sourcesDir = join(folder, ".llm-kb", "wiki", "sources");
72
+
73
+ const files = await readdir(sourcesDir);
74
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
75
+
76
+ if (mdFiles.length === 0) {
77
+ throw new Error("No sources found. Run 'llm-kb run' first to parse documents.");
78
+ }
79
+
80
+ if (options.save) {
81
+ await mkdir(join(folder, ".llm-kb", "wiki", "outputs"), { recursive: true });
82
+ }
83
+
84
+ process.env.NODE_PATH = getNodeModulesPath();
85
+
86
+ const agentsContent = buildQueryAgents(mdFiles, !!options.save);
87
+
88
+ const loader = new DefaultResourceLoader({
89
+ cwd: folder,
90
+ agentsFilesOverride: (current) => ({
91
+ agentsFiles: [
92
+ ...current.agentsFiles,
93
+ { path: ".llm-kb/AGENTS.md", content: agentsContent },
94
+ ],
95
+ }),
96
+ });
97
+ await loader.reload();
98
+
99
+ const tools = [createReadTool(folder)];
100
+ if (options.save) {
101
+ tools.push(createBashTool(folder), createWriteTool(folder));
102
+ }
103
+
104
+ const { session } = await createAgentSession({
105
+ cwd: folder,
106
+ resourceLoader: loader,
107
+ tools,
108
+ sessionManager: SessionManager.inMemory(),
109
+ settingsManager: SettingsManager.inMemory({
110
+ compaction: { enabled: false },
111
+ }),
112
+ });
113
+
114
+ session.subscribe((event) => {
115
+ if (
116
+ event.type === "message_update" &&
117
+ event.assistantMessageEvent.type === "text_delta"
118
+ ) {
119
+ process.stdout.write(event.assistantMessageEvent.delta);
120
+ }
121
+ });
122
+
123
+ await session.prompt(question);
124
+ console.log();
125
+ session.dispose();
126
+
127
+ // Re-index after save so the compounding loop works
128
+ if (options.save) {
129
+ const { buildIndex } = await import("./indexer.js");
130
+ await buildIndex(folder, sourcesDir);
131
+ }
132
+ }
@@ -0,0 +1,19 @@
1
+ import { existsSync } from "node:fs";
2
+ import { resolve, join, dirname } from "node:path";
3
+
4
+ /**
5
+ * Walk up from startDir looking for a .llm-kb/ directory.
6
+ * Returns the folder containing .llm-kb/, or null if not found.
7
+ */
8
+ export function resolveKnowledgeBase(startDir: string): string | null {
9
+ let dir = resolve(startDir);
10
+
11
+ while (true) {
12
+ if (existsSync(join(dir, ".llm-kb"))) {
13
+ return dir;
14
+ }
15
+ const parent = dirname(dir);
16
+ if (parent === dir) return null;
17
+ dir = parent;
18
+ }
19
+ }