llm-kb 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PHASE2_SPEC.md +274 -0
- package/README.md +22 -2
- package/bin/chunk-MYQ36JJB.js +118 -0
- package/bin/cli.js +153 -125
- package/bin/indexer-LSYSZXZX.js +6 -0
- package/package.json +1 -1
- package/plan.md +8 -4
- package/src/cli.ts +26 -2
- package/src/query.ts +132 -0
- package/src/resolve-kb.ts +19 -0
package/PHASE2_SPEC.md
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# llm-kb — Phase 2: Query Engine
|
|
2
|
+
|
|
3
|
+
> **Goal:** `llm-kb query "question" --folder ./research` works from the terminal.
|
|
4
|
+
> **Depends on:** Phase 1 (ingest pipeline — complete)
|
|
5
|
+
> **Blog:** Part 3 of the series
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What Success Looks Like
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
llm-kb query "what are the reserve requirements?" --folder ./research
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
Reading index... 12 sources
|
|
17
|
+
Selected: reserve-policy.md, q3-results.md, board-deck.md
|
|
18
|
+
Reading 3 files...
|
|
19
|
+
|
|
20
|
+
Reserve requirements are defined in two documents:
|
|
21
|
+
|
|
22
|
+
1. **Reserve Policy** (reserve-policy.md, p.3): Minimum reserve
|
|
23
|
+
ratio of 12% of total assets, reviewed quarterly.
|
|
24
|
+
|
|
25
|
+
2. **Q3 Results** (q3-results.md, p.8): Current reserve ratio
|
|
26
|
+
is 14.2%, above the 12% minimum. Management notes this
|
|
27
|
+
provides a 2.2% buffer against regulatory changes.
|
|
28
|
+
|
|
29
|
+
Sources: reserve-policy.md (p.3), q3-results.md (p.8)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
That's the shape: file selection visible, citations inline, synthesis across sources.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Two Modes
|
|
37
|
+
|
|
38
|
+
### Query (read-only)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
llm-kb query "what changed in Q4 guidance?" --folder ./research
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
The agent reads `index.md`, picks files, reads them, answers. **Cannot modify anything.** Tools: `createReadTool` only.
|
|
45
|
+
|
|
46
|
+
### Research (read + write)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
llm-kb query "compare pipeline coverage to revenue target" --folder ./research --save
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Same as query, but the answer is also saved to `.llm-kb/wiki/outputs/`. The watcher detects the new file and re-indexes. Next query can reference the analysis.
|
|
53
|
+
|
|
54
|
+
Tools: `createReadTool` + `createWriteTool` + `createBashTool`.
|
|
55
|
+
|
|
56
|
+
The `--save` flag switches from query mode to research mode.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Architecture
|
|
61
|
+
|
|
62
|
+
Same pattern as the indexer — a Pi SDK session with different tools:
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
export async function query(
|
|
66
|
+
folder: string,
|
|
67
|
+
question: string,
|
|
68
|
+
options: { save?: boolean }
|
|
69
|
+
) {
|
|
70
|
+
const sourcesDir = join(folder, ".llm-kb", "wiki", "sources");
|
|
71
|
+
const outputsDir = join(folder, ".llm-kb", "wiki", "outputs");
|
|
72
|
+
|
|
73
|
+
// Build AGENTS.md for query context
|
|
74
|
+
const agentsContent = buildQueryAgents(sourcesDir, options.save);
|
|
75
|
+
|
|
76
|
+
const loader = new DefaultResourceLoader({
|
|
77
|
+
cwd: folder,
|
|
78
|
+
agentsFilesOverride: (current) => ({
|
|
79
|
+
agentsFiles: [
|
|
80
|
+
...current.agentsFiles,
|
|
81
|
+
{ path: ".llm-kb/AGENTS.md", content: agentsContent },
|
|
82
|
+
],
|
|
83
|
+
}),
|
|
84
|
+
});
|
|
85
|
+
await loader.reload();
|
|
86
|
+
|
|
87
|
+
const tools = [createReadTool(folder)];
|
|
88
|
+
if (options.save) {
|
|
89
|
+
tools.push(createWriteTool(folder), createBashTool(folder));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const { session } = await createAgentSession({
|
|
93
|
+
cwd: folder,
|
|
94
|
+
resourceLoader: loader,
|
|
95
|
+
tools,
|
|
96
|
+
sessionManager: SessionManager.inMemory(),
|
|
97
|
+
settingsManager: SettingsManager.inMemory({
|
|
98
|
+
compaction: { enabled: false },
|
|
99
|
+
}),
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// Stream output to terminal
|
|
103
|
+
session.subscribe((event) => {
|
|
104
|
+
if (
|
|
105
|
+
event.type === "message_update" &&
|
|
106
|
+
event.assistantMessageEvent.type === "text_delta"
|
|
107
|
+
) {
|
|
108
|
+
process.stdout.write(event.assistantMessageEvent.delta);
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
await session.prompt(question);
|
|
113
|
+
session.dispose();
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### The Query AGENTS.md
|
|
118
|
+
|
|
119
|
+
The injected `AGENTS.md` for query mode tells the agent:
|
|
120
|
+
|
|
121
|
+
```markdown
|
|
122
|
+
# llm-kb Knowledge Base — Query Mode
|
|
123
|
+
|
|
124
|
+
## How to answer questions
|
|
125
|
+
|
|
126
|
+
1. FIRST read .llm-kb/wiki/index.md to see all available sources
|
|
127
|
+
2. Based on the question, select the most relevant source files
|
|
128
|
+
3. Read those files in full (not just the first 500 chars)
|
|
129
|
+
4. Answer with inline citations: (filename, page/section)
|
|
130
|
+
5. If the answer requires cross-referencing, read additional files
|
|
131
|
+
6. Prefer primary sources over previous analyses in outputs/
|
|
132
|
+
|
|
133
|
+
## Available sources
|
|
134
|
+
(dynamically generated list of .md files in sources/)
|
|
135
|
+
|
|
136
|
+
## Available libraries for non-PDF files
|
|
137
|
+
- exceljs — for .xlsx/.xls
|
|
138
|
+
- mammoth — for .docx
|
|
139
|
+
- officeparser — for .pptx
|
|
140
|
+
Write a quick Node.js script via bash to read these when needed.
|
|
141
|
+
|
|
142
|
+
## Rules
|
|
143
|
+
- Always cite sources with filename and page number
|
|
144
|
+
- If you can't find the answer, say so — don't hallucinate
|
|
145
|
+
- Read the FULL file, not just the beginning
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
For research mode, add:
|
|
149
|
+
|
|
150
|
+
```markdown
|
|
151
|
+
## Research Mode
|
|
152
|
+
You can save your analysis to .llm-kb/wiki/outputs/.
|
|
153
|
+
Use a descriptive filename (e.g., coverage-analysis.md).
|
|
154
|
+
The file watcher will detect it and update the index.
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## CLI Integration
|
|
160
|
+
|
|
161
|
+
Add `query` command to Commander:
|
|
162
|
+
|
|
163
|
+
```typescript
|
|
164
|
+
program
|
|
165
|
+
.command("query")
|
|
166
|
+
.description("Ask a question across your knowledge base")
|
|
167
|
+
.argument("<question>", "Your question")
|
|
168
|
+
.option("--folder <path>", "Path to document folder", ".")
|
|
169
|
+
.option("--save", "Save the answer to wiki/outputs/ (research mode)")
|
|
170
|
+
.action(async (question, options) => {
|
|
171
|
+
const folder = resolve(options.folder);
|
|
172
|
+
|
|
173
|
+
// Check if .llm-kb exists
|
|
174
|
+
if (!existsSync(join(folder, ".llm-kb"))) {
|
|
175
|
+
console.error(chalk.red("No knowledge base found. Run 'llm-kb run' first."));
|
|
176
|
+
process.exit(1);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
await query(folder, question, { save: options.save });
|
|
180
|
+
});
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Trace Logging (Prep for Eval — Phase 4)
|
|
186
|
+
|
|
187
|
+
Every query gets logged to `.llm-kb/traces/`:
|
|
188
|
+
|
|
189
|
+
```json
|
|
190
|
+
{
|
|
191
|
+
"timestamp": "2026-04-05T14:30:00Z",
|
|
192
|
+
"question": "what are the reserve requirements?",
|
|
193
|
+
"mode": "query",
|
|
194
|
+
"filesRead": ["index.md", "reserve-policy.md", "q3-results.md"],
|
|
195
|
+
"filesAvailable": ["reserve-policy.md", "q3-results.md", "board-deck.md", "pipeline.md"],
|
|
196
|
+
"answer": "Reserve requirements are defined in two documents...",
|
|
197
|
+
"citations": [
|
|
198
|
+
{ "file": "reserve-policy.md", "location": "p.3", "claim": "Minimum reserve ratio of 12%" },
|
|
199
|
+
{ "file": "q3-results.md", "location": "p.8", "claim": "Current reserve ratio is 14.2%" }
|
|
200
|
+
],
|
|
201
|
+
"tokensUsed": 3800,
|
|
202
|
+
"durationMs": 4200,
|
|
203
|
+
"model": "claude-sonnet-4"
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Implementation: wrap the session to intercept tool calls and capture which files were read. Save trace JSON after session completes.
|
|
208
|
+
|
|
209
|
+
The eval agent (Phase 4) reads these traces to check citations against sources.
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## Streaming Output
|
|
214
|
+
|
|
215
|
+
Terminal query should stream — the user sees the answer appear word by word, not wait for the full response. The `session.subscribe()` handler writes deltas to stdout.
|
|
216
|
+
|
|
217
|
+
For the `run` command (when we add query to the web UI in Phase 3), streaming goes through the Vercel AI SDK protocol.
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## Constraints
|
|
222
|
+
|
|
223
|
+
1. **Query must work without the web server running.** `llm-kb query` is standalone — it reads `.llm-kb/` directly. No dependency on `llm-kb run`.
|
|
224
|
+
|
|
225
|
+
2. **Read-only by default.** Query mode cannot modify files. Only `--save` enables write.
|
|
226
|
+
|
|
227
|
+
3. **Index must exist.** If `.llm-kb/wiki/index.md` doesn't exist, error out: "No knowledge base found. Run 'llm-kb run' first."
|
|
228
|
+
|
|
229
|
+
4. **Graceful on empty results.** If the agent can't find relevant files, it should say "I couldn't find sources relevant to this question" — not hallucinate.
|
|
230
|
+
|
|
231
|
+
5. **Token-conscious.** The agent reads index.md (~200 tokens for 50 sources) first, then only the files it selects (3-7 typically). Don't read all sources.
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Build Order (Slices)
|
|
236
|
+
|
|
237
|
+
| Slice | What | Demoable? |
|
|
238
|
+
|---|---|---|
|
|
239
|
+
| 1 | `query` command + read-only session + streaming | ✅ Ask questions, get answers |
|
|
240
|
+
| 2 | `--save` flag + research mode + write to outputs/ | ✅ Answers compound in wiki |
|
|
241
|
+
| 3 | Trace logging (JSON per query) | Prep for eval |
|
|
242
|
+
| 4 | `status` command (show KB stats) | ✅ Nice-to-have |
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## Definition of Done
|
|
247
|
+
|
|
248
|
+
- [ ] `llm-kb query "question" --folder ./research` returns a cited answer
|
|
249
|
+
- [ ] Answer streams to terminal (word by word, not all at once)
|
|
250
|
+
- [ ] Agent reads index.md first, then selects and reads relevant source files
|
|
251
|
+
- [ ] `--save` flag saves the answer to `.llm-kb/wiki/outputs/`
|
|
252
|
+
- [ ] Saved answers get detected by watcher and re-indexed
|
|
253
|
+
- [ ] Query traces logged to `.llm-kb/traces/` as JSON
|
|
254
|
+
- [ ] Error if no `.llm-kb/` exists ("run 'llm-kb run' first")
|
|
255
|
+
- [ ] Non-PDF files (Excel, Word) readable by agent via bundled libraries
|
|
256
|
+
- [ ] Blog Part 3 written with real terminal output
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## What This Enables
|
|
261
|
+
|
|
262
|
+
With query working, the demo becomes:
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
npx llm-kb run ./my-documents # ingest
|
|
266
|
+
llm-kb query "what changed?" # ask
|
|
267
|
+
llm-kb query "compare X vs Y" --save # research (compounds)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Three commands. Ingest → Query → Research. That's a product, not a script.
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
*Phase 2 spec written April 4, 2026. DeltaXY.*
|
package/README.md
CHANGED
|
@@ -22,12 +22,14 @@ Pi handles the LLM auth — no separate API key configuration needed.
|
|
|
22
22
|
|
|
23
23
|
## What It Does
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
### Ingest
|
|
26
|
+
|
|
27
|
+
```bash
|
|
26
28
|
llm-kb run ./my-documents
|
|
27
29
|
```
|
|
28
30
|
|
|
29
31
|
```
|
|
30
|
-
llm-kb v0.0
|
|
32
|
+
llm-kb v0.2.0
|
|
31
33
|
|
|
32
34
|
Scanning ./my-documents...
|
|
33
35
|
Found 9 files (9 PDF)
|
|
@@ -46,6 +48,24 @@ Scanning ./my-documents...
|
|
|
46
48
|
3. **Builds an index** — Pi SDK agent reads all sources and writes `index.md` with summaries
|
|
47
49
|
4. **Watches** — drop a new PDF in while it's running, it gets parsed and indexed automatically
|
|
48
50
|
|
|
51
|
+
### Query
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# From inside the documents folder (auto-detects .llm-kb/)
|
|
55
|
+
llm-kb query "what are the key findings?"
|
|
56
|
+
|
|
57
|
+
# From anywhere, with explicit folder
|
|
58
|
+
llm-kb query "compare Q3 vs Q4" --folder ./my-documents
|
|
59
|
+
|
|
60
|
+
# Research mode — saves the answer to wiki/outputs/ and re-indexes
|
|
61
|
+
llm-kb query "summarize all revenue data" --save
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
The agent reads `index.md`, selects relevant files, and streams a cited answer to the terminal.
|
|
65
|
+
|
|
66
|
+
**Query mode** — read-only. The agent can only read your files.
|
|
67
|
+
**Research mode** (`--save`) — read + write + bash. The agent saves answers to `outputs/`, re-indexes, and can write scripts to read Excel/Word files. Answers compound over time.
|
|
68
|
+
|
|
49
69
|
### What It Creates
|
|
50
70
|
|
|
51
71
|
```
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// src/indexer.ts
|
|
2
|
+
import {
|
|
3
|
+
createAgentSession,
|
|
4
|
+
createBashTool,
|
|
5
|
+
createReadTool,
|
|
6
|
+
createWriteTool,
|
|
7
|
+
DefaultResourceLoader,
|
|
8
|
+
SessionManager,
|
|
9
|
+
SettingsManager
|
|
10
|
+
} from "@mariozechner/pi-coding-agent";
|
|
11
|
+
import { readdir, readFile } from "fs/promises";
|
|
12
|
+
import { join, dirname } from "path";
|
|
13
|
+
import { fileURLToPath } from "url";
|
|
14
|
+
var __filename = fileURLToPath(import.meta.url);
|
|
15
|
+
var __dirname = dirname(__filename);
|
|
16
|
+
function getNodeModulesPath() {
|
|
17
|
+
let dir = __dirname;
|
|
18
|
+
for (let i = 0; i < 5; i++) {
|
|
19
|
+
const candidate = join(dir, "node_modules");
|
|
20
|
+
try {
|
|
21
|
+
return candidate;
|
|
22
|
+
} catch {
|
|
23
|
+
dir = dirname(dir);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return join(process.cwd(), "node_modules");
|
|
27
|
+
}
|
|
28
|
+
function buildAgentsContent(sourcesDir, files) {
|
|
29
|
+
const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => ` - ${f}`).join("\n");
|
|
30
|
+
return `# llm-kb Knowledge Base
|
|
31
|
+
|
|
32
|
+
## How to access documents
|
|
33
|
+
|
|
34
|
+
### PDFs (pre-parsed)
|
|
35
|
+
PDFs have been parsed to markdown with bounding boxes.
|
|
36
|
+
Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
|
|
37
|
+
|
|
38
|
+
Available parsed sources:
|
|
39
|
+
${sourceList}
|
|
40
|
+
|
|
41
|
+
### Other file types (Excel, Word, PowerPoint, CSV, images)
|
|
42
|
+
You have bash and read tools. These libraries are pre-installed and available:
|
|
43
|
+
- **exceljs** \u2014 for .xlsx/.xls files
|
|
44
|
+
- **mammoth** \u2014 for .docx files
|
|
45
|
+
- **officeparser** \u2014 for .pptx files
|
|
46
|
+
- **csv-parse** \u2014 built into Node.js, use fs + split for .csv
|
|
47
|
+
|
|
48
|
+
Write a quick Node.js script to extract content when needed.
|
|
49
|
+
|
|
50
|
+
## Index file
|
|
51
|
+
Write the index to \`.llm-kb/wiki/index.md\`.
|
|
52
|
+
|
|
53
|
+
The index should be a markdown file with:
|
|
54
|
+
1. A title and last-updated timestamp
|
|
55
|
+
2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
|
|
56
|
+
3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
|
|
57
|
+
4. Total word count across all sources
|
|
58
|
+
`;
|
|
59
|
+
}
|
|
60
|
+
async function buildIndex(folder, sourcesDir, onOutput) {
|
|
61
|
+
const files = await readdir(sourcesDir);
|
|
62
|
+
const mdFiles = files.filter((f) => f.endsWith(".md"));
|
|
63
|
+
if (mdFiles.length === 0) {
|
|
64
|
+
throw new Error("No source files found to index");
|
|
65
|
+
}
|
|
66
|
+
const agentsContent = buildAgentsContent(sourcesDir, files);
|
|
67
|
+
const nodeModulesPath = getNodeModulesPath();
|
|
68
|
+
process.env.NODE_PATH = nodeModulesPath;
|
|
69
|
+
const loader = new DefaultResourceLoader({
|
|
70
|
+
cwd: folder,
|
|
71
|
+
agentsFilesOverride: (current) => ({
|
|
72
|
+
agentsFiles: [
|
|
73
|
+
...current.agentsFiles,
|
|
74
|
+
{ path: ".llm-kb/AGENTS.md", content: agentsContent }
|
|
75
|
+
]
|
|
76
|
+
})
|
|
77
|
+
});
|
|
78
|
+
await loader.reload();
|
|
79
|
+
const { session } = await createAgentSession({
|
|
80
|
+
cwd: folder,
|
|
81
|
+
resourceLoader: loader,
|
|
82
|
+
tools: [
|
|
83
|
+
createReadTool(folder),
|
|
84
|
+
createBashTool(folder),
|
|
85
|
+
createWriteTool(folder)
|
|
86
|
+
],
|
|
87
|
+
sessionManager: SessionManager.inMemory(),
|
|
88
|
+
settingsManager: SettingsManager.inMemory({
|
|
89
|
+
compaction: { enabled: false }
|
|
90
|
+
})
|
|
91
|
+
});
|
|
92
|
+
if (onOutput) {
|
|
93
|
+
session.subscribe((event) => {
|
|
94
|
+
if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
|
|
95
|
+
onOutput(event.assistantMessageEvent.delta);
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
|
|
100
|
+
Then write .llm-kb/wiki/index.md with a summary table of all sources.
|
|
101
|
+
|
|
102
|
+
Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
|
|
103
|
+
Add a total word count estimate at the bottom.`;
|
|
104
|
+
await session.prompt(prompt);
|
|
105
|
+
const indexPath = join(sourcesDir, "..", "index.md");
|
|
106
|
+
try {
|
|
107
|
+
const content = await readFile(indexPath, "utf-8");
|
|
108
|
+
session.dispose();
|
|
109
|
+
return content;
|
|
110
|
+
} catch {
|
|
111
|
+
session.dispose();
|
|
112
|
+
throw new Error("Agent did not create index.md");
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export {
|
|
117
|
+
buildIndex
|
|
118
|
+
};
|
package/bin/cli.js
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
buildIndex
|
|
4
|
+
} from "./chunk-MYQ36JJB.js";
|
|
2
5
|
|
|
3
6
|
// src/cli.ts
|
|
4
7
|
import { Command } from "commander";
|
|
@@ -123,121 +126,6 @@ ${p.text}`).join("\n\n---\n\n");
|
|
|
123
126
|
};
|
|
124
127
|
}
|
|
125
128
|
|
|
126
|
-
// src/indexer.ts
|
|
127
|
-
import {
|
|
128
|
-
createAgentSession,
|
|
129
|
-
createBashTool,
|
|
130
|
-
createReadTool,
|
|
131
|
-
createWriteTool,
|
|
132
|
-
DefaultResourceLoader,
|
|
133
|
-
SessionManager,
|
|
134
|
-
SettingsManager
|
|
135
|
-
} from "@mariozechner/pi-coding-agent";
|
|
136
|
-
import { readdir as readdir2, readFile } from "fs/promises";
|
|
137
|
-
import { join as join2, dirname } from "path";
|
|
138
|
-
import { fileURLToPath } from "url";
|
|
139
|
-
var __filename = fileURLToPath(import.meta.url);
|
|
140
|
-
var __dirname = dirname(__filename);
|
|
141
|
-
function getNodeModulesPath() {
|
|
142
|
-
let dir = __dirname;
|
|
143
|
-
for (let i = 0; i < 5; i++) {
|
|
144
|
-
const candidate = join2(dir, "node_modules");
|
|
145
|
-
try {
|
|
146
|
-
return candidate;
|
|
147
|
-
} catch {
|
|
148
|
-
dir = dirname(dir);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
return join2(process.cwd(), "node_modules");
|
|
152
|
-
}
|
|
153
|
-
function buildAgentsContent(sourcesDir, files) {
|
|
154
|
-
const sourceList = files.filter((f) => f.endsWith(".md")).map((f) => ` - ${f}`).join("\n");
|
|
155
|
-
return `# llm-kb Knowledge Base
|
|
156
|
-
|
|
157
|
-
## How to access documents
|
|
158
|
-
|
|
159
|
-
### PDFs (pre-parsed)
|
|
160
|
-
PDFs have been parsed to markdown with bounding boxes.
|
|
161
|
-
Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
|
|
162
|
-
|
|
163
|
-
Available parsed sources:
|
|
164
|
-
${sourceList}
|
|
165
|
-
|
|
166
|
-
### Other file types (Excel, Word, PowerPoint, CSV, images)
|
|
167
|
-
You have bash and read tools. These libraries are pre-installed and available:
|
|
168
|
-
- **exceljs** \u2014 for .xlsx/.xls files
|
|
169
|
-
- **mammoth** \u2014 for .docx files
|
|
170
|
-
- **officeparser** \u2014 for .pptx files
|
|
171
|
-
- **csv-parse** \u2014 built into Node.js, use fs + split for .csv
|
|
172
|
-
|
|
173
|
-
Write a quick Node.js script to extract content when needed.
|
|
174
|
-
|
|
175
|
-
## Index file
|
|
176
|
-
Write the index to \`.llm-kb/wiki/index.md\`.
|
|
177
|
-
|
|
178
|
-
The index should be a markdown file with:
|
|
179
|
-
1. A title and last-updated timestamp
|
|
180
|
-
2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
|
|
181
|
-
3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
|
|
182
|
-
4. Total word count across all sources
|
|
183
|
-
`;
|
|
184
|
-
}
|
|
185
|
-
async function buildIndex(folder, sourcesDir, onOutput) {
|
|
186
|
-
const files = await readdir2(sourcesDir);
|
|
187
|
-
const mdFiles = files.filter((f) => f.endsWith(".md"));
|
|
188
|
-
if (mdFiles.length === 0) {
|
|
189
|
-
throw new Error("No source files found to index");
|
|
190
|
-
}
|
|
191
|
-
const agentsContent = buildAgentsContent(sourcesDir, files);
|
|
192
|
-
const nodeModulesPath = getNodeModulesPath();
|
|
193
|
-
process.env.NODE_PATH = nodeModulesPath;
|
|
194
|
-
const loader = new DefaultResourceLoader({
|
|
195
|
-
cwd: folder,
|
|
196
|
-
agentsFilesOverride: (current) => ({
|
|
197
|
-
agentsFiles: [
|
|
198
|
-
...current.agentsFiles,
|
|
199
|
-
{ path: ".llm-kb/AGENTS.md", content: agentsContent }
|
|
200
|
-
]
|
|
201
|
-
})
|
|
202
|
-
});
|
|
203
|
-
await loader.reload();
|
|
204
|
-
const { session } = await createAgentSession({
|
|
205
|
-
cwd: folder,
|
|
206
|
-
resourceLoader: loader,
|
|
207
|
-
tools: [
|
|
208
|
-
createReadTool(folder),
|
|
209
|
-
createBashTool(folder),
|
|
210
|
-
createWriteTool(folder)
|
|
211
|
-
],
|
|
212
|
-
sessionManager: SessionManager.inMemory(),
|
|
213
|
-
settingsManager: SettingsManager.inMemory({
|
|
214
|
-
compaction: { enabled: false }
|
|
215
|
-
})
|
|
216
|
-
});
|
|
217
|
-
if (onOutput) {
|
|
218
|
-
session.subscribe((event) => {
|
|
219
|
-
if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
|
|
220
|
-
onOutput(event.assistantMessageEvent.delta);
|
|
221
|
-
}
|
|
222
|
-
});
|
|
223
|
-
}
|
|
224
|
-
const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
|
|
225
|
-
Then write .llm-kb/wiki/index.md with a summary table of all sources.
|
|
226
|
-
|
|
227
|
-
Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
|
|
228
|
-
Add a total word count estimate at the bottom.`;
|
|
229
|
-
await session.prompt(prompt);
|
|
230
|
-
const indexPath = join2(sourcesDir, "..", "index.md");
|
|
231
|
-
try {
|
|
232
|
-
const content = await readFile(indexPath, "utf-8");
|
|
233
|
-
session.dispose();
|
|
234
|
-
return content;
|
|
235
|
-
} catch {
|
|
236
|
-
session.dispose();
|
|
237
|
-
throw new Error("Agent did not create index.md");
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
|
|
241
129
|
// src/watcher.ts
|
|
242
130
|
import { watch } from "chokidar";
|
|
243
131
|
import { extname as extname2, basename as basename2 } from "path";
|
|
@@ -307,18 +195,145 @@ function startWatcher({ folder, sourcesDir, debounceMs = 2e3 }) {
|
|
|
307
195
|
return watcher;
|
|
308
196
|
}
|
|
309
197
|
|
|
310
|
-
// src/
|
|
198
|
+
// src/query.ts
|
|
199
|
+
import {
|
|
200
|
+
createAgentSession,
|
|
201
|
+
createBashTool,
|
|
202
|
+
createReadTool,
|
|
203
|
+
createWriteTool,
|
|
204
|
+
DefaultResourceLoader,
|
|
205
|
+
SessionManager,
|
|
206
|
+
SettingsManager
|
|
207
|
+
} from "@mariozechner/pi-coding-agent";
|
|
208
|
+
import { readdir as readdir2, mkdir as mkdir2 } from "fs/promises";
|
|
209
|
+
import { join as join3, dirname } from "path";
|
|
210
|
+
import { fileURLToPath } from "url";
|
|
211
|
+
var __dirname = dirname(fileURLToPath(import.meta.url));
|
|
212
|
+
function getNodeModulesPath() {
|
|
213
|
+
let dir = __dirname;
|
|
214
|
+
for (let i = 0; i < 5; i++) {
|
|
215
|
+
const candidate = join3(dir, "node_modules");
|
|
216
|
+
try {
|
|
217
|
+
return candidate;
|
|
218
|
+
} catch {
|
|
219
|
+
dir = dirname(dir);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return join3(process.cwd(), "node_modules");
|
|
223
|
+
}
|
|
224
|
+
function buildQueryAgents(sourceFiles, save) {
|
|
225
|
+
const sourceList = sourceFiles.map((f) => ` - ${f}`).join("\n");
|
|
226
|
+
let content = `# llm-kb Knowledge Base \u2014 Query Mode
|
|
227
|
+
|
|
228
|
+
## How to answer questions
|
|
229
|
+
|
|
230
|
+
1. FIRST read .llm-kb/wiki/index.md to understand all available sources
|
|
231
|
+
2. Based on the question, select the most relevant source files (usually 2-5)
|
|
232
|
+
3. Read those source files in full from .llm-kb/wiki/sources/
|
|
233
|
+
4. Answer with inline citations: (filename, page number)
|
|
234
|
+
5. If the answer requires cross-referencing multiple files, read additional ones
|
|
235
|
+
6. If you can't find the answer, say so \u2014 don't hallucinate
|
|
236
|
+
|
|
237
|
+
## Available parsed sources
|
|
238
|
+
${sourceList}
|
|
239
|
+
|
|
240
|
+
## Non-PDF files
|
|
241
|
+
If the user's folder has Excel, Word, or PowerPoint files, these libraries are available:
|
|
242
|
+
- **exceljs** \u2014 for .xlsx/.xls files
|
|
243
|
+
- **mammoth** \u2014 for .docx files
|
|
244
|
+
- **officeparser** \u2014 for .pptx files
|
|
245
|
+
Write a quick Node.js script via bash to read them.
|
|
246
|
+
|
|
247
|
+
## Rules
|
|
248
|
+
- Always cite sources with filename and page number
|
|
249
|
+
- Read the FULL source file, not just the beginning
|
|
250
|
+
- Prefer primary sources over previous analyses
|
|
251
|
+
`;
|
|
252
|
+
if (save) {
|
|
253
|
+
content += `
|
|
254
|
+
## Research Mode
|
|
255
|
+
Save your analysis to .llm-kb/wiki/outputs/ with a descriptive filename (e.g., comparison-analysis.md).
|
|
256
|
+
Include the question at the top and all citations.
|
|
257
|
+
`;
|
|
258
|
+
}
|
|
259
|
+
return content;
|
|
260
|
+
}
|
|
261
|
+
async function query(folder, question, options) {
|
|
262
|
+
const sourcesDir = join3(folder, ".llm-kb", "wiki", "sources");
|
|
263
|
+
const files = await readdir2(sourcesDir);
|
|
264
|
+
const mdFiles = files.filter((f) => f.endsWith(".md"));
|
|
265
|
+
if (mdFiles.length === 0) {
|
|
266
|
+
throw new Error("No sources found. Run 'llm-kb run' first to parse documents.");
|
|
267
|
+
}
|
|
268
|
+
if (options.save) {
|
|
269
|
+
await mkdir2(join3(folder, ".llm-kb", "wiki", "outputs"), { recursive: true });
|
|
270
|
+
}
|
|
271
|
+
process.env.NODE_PATH = getNodeModulesPath();
|
|
272
|
+
const agentsContent = buildQueryAgents(mdFiles, !!options.save);
|
|
273
|
+
const loader = new DefaultResourceLoader({
|
|
274
|
+
cwd: folder,
|
|
275
|
+
agentsFilesOverride: (current) => ({
|
|
276
|
+
agentsFiles: [
|
|
277
|
+
...current.agentsFiles,
|
|
278
|
+
{ path: ".llm-kb/AGENTS.md", content: agentsContent }
|
|
279
|
+
]
|
|
280
|
+
})
|
|
281
|
+
});
|
|
282
|
+
await loader.reload();
|
|
283
|
+
const tools = [createReadTool(folder)];
|
|
284
|
+
if (options.save) {
|
|
285
|
+
tools.push(createBashTool(folder), createWriteTool(folder));
|
|
286
|
+
}
|
|
287
|
+
const { session } = await createAgentSession({
|
|
288
|
+
cwd: folder,
|
|
289
|
+
resourceLoader: loader,
|
|
290
|
+
tools,
|
|
291
|
+
sessionManager: SessionManager.inMemory(),
|
|
292
|
+
settingsManager: SettingsManager.inMemory({
|
|
293
|
+
compaction: { enabled: false }
|
|
294
|
+
})
|
|
295
|
+
});
|
|
296
|
+
session.subscribe((event) => {
|
|
297
|
+
if (event.type === "message_update" && event.assistantMessageEvent.type === "text_delta") {
|
|
298
|
+
process.stdout.write(event.assistantMessageEvent.delta);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
await session.prompt(question);
|
|
302
|
+
console.log();
|
|
303
|
+
session.dispose();
|
|
304
|
+
if (options.save) {
|
|
305
|
+
const { buildIndex: buildIndex2 } = await import("./indexer-LSYSZXZX.js");
|
|
306
|
+
await buildIndex2(folder, sourcesDir);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// src/resolve-kb.ts
|
|
311
311
|
import { existsSync } from "fs";
|
|
312
|
-
import {
|
|
313
|
-
|
|
312
|
+
import { resolve as resolve2, join as join4, dirname as dirname2 } from "path";
|
|
313
|
+
function resolveKnowledgeBase(startDir) {
|
|
314
|
+
let dir = resolve2(startDir);
|
|
315
|
+
while (true) {
|
|
316
|
+
if (existsSync(join4(dir, ".llm-kb"))) {
|
|
317
|
+
return dir;
|
|
318
|
+
}
|
|
319
|
+
const parent = dirname2(dir);
|
|
320
|
+
if (parent === dir) return null;
|
|
321
|
+
dir = parent;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// src/cli.ts
|
|
326
|
+
import { existsSync as existsSync2 } from "fs";
|
|
327
|
+
import { mkdir as mkdir3 } from "fs/promises";
|
|
328
|
+
import { resolve as resolve3, join as join5 } from "path";
|
|
314
329
|
import chalk2 from "chalk";
|
|
315
330
|
var program = new Command();
|
|
316
|
-
program.name("llm-kb").description("Drop files into a folder. Get a knowledge base you can query.").version("0.
|
|
331
|
+
program.name("llm-kb").description("Drop files into a folder. Get a knowledge base you can query.").version("0.2.0");
|
|
317
332
|
program.command("run").description("Scan, parse, index, and watch a folder").argument("<folder>", "Path to your documents folder").action(async (folder) => {
|
|
318
333
|
console.log(`
|
|
319
|
-
${chalk2.bold("llm-kb")} v0.
|
|
334
|
+
${chalk2.bold("llm-kb")} v0.2.0
|
|
320
335
|
`);
|
|
321
|
-
if (!
|
|
336
|
+
if (!existsSync2(folder)) {
|
|
322
337
|
console.error(chalk2.red(`Error: Folder not found: ${folder}`));
|
|
323
338
|
process.exit(1);
|
|
324
339
|
}
|
|
@@ -331,16 +346,16 @@ ${chalk2.bold("llm-kb")} v0.1.0
|
|
|
331
346
|
const pdfs = files.filter((f) => f.ext === ".pdf");
|
|
332
347
|
console.log(` Found ${chalk2.bold(files.length.toString())} files (${summarize(files)})`);
|
|
333
348
|
if (pdfs.length === 0) return;
|
|
334
|
-
const root =
|
|
335
|
-
const sourcesDir =
|
|
336
|
-
await
|
|
349
|
+
const root = resolve3(folder);
|
|
350
|
+
const sourcesDir = join5(root, ".llm-kb", "wiki", "sources");
|
|
351
|
+
await mkdir3(sourcesDir, { recursive: true });
|
|
337
352
|
let parsed = 0;
|
|
338
353
|
let skipped = 0;
|
|
339
354
|
let failed = 0;
|
|
340
355
|
const errors = [];
|
|
341
356
|
for (let i = 0; i < pdfs.length; i++) {
|
|
342
357
|
const pdf = pdfs[i];
|
|
343
|
-
const fullPath =
|
|
358
|
+
const fullPath = join5(root, pdf.path);
|
|
344
359
|
const progress = ` Parsing... ${i + 1}/${pdfs.length} \u2014 ${pdf.name}`;
|
|
345
360
|
process.stdout.write(`\r${progress.padEnd(80)}`);
|
|
346
361
|
try {
|
|
@@ -378,4 +393,17 @@ ${chalk2.bold("llm-kb")} v0.1.0
|
|
|
378
393
|
Watching for new files... (Ctrl+C to stop)`));
|
|
379
394
|
startWatcher({ folder: root, sourcesDir });
|
|
380
395
|
});
|
|
396
|
+
program.command("query").description("Ask a question across your knowledge base").argument("<question>", "Your question").option("--folder <path>", "Path to document folder (auto-detects if omitted)").option("--save", "Save the answer to wiki/outputs/ (research mode)").action(async (question, options) => {
|
|
397
|
+
const root = resolveKnowledgeBase(options.folder || process.cwd());
|
|
398
|
+
if (!root) {
|
|
399
|
+
console.error(chalk2.red("No knowledge base found. Run 'llm-kb run <folder>' first."));
|
|
400
|
+
process.exit(1);
|
|
401
|
+
}
|
|
402
|
+
try {
|
|
403
|
+
await query(root, question, { save: options.save });
|
|
404
|
+
} catch (err) {
|
|
405
|
+
console.error(chalk2.red(err.message));
|
|
406
|
+
process.exit(1);
|
|
407
|
+
}
|
|
408
|
+
});
|
|
381
409
|
program.parse();
|
package/package.json
CHANGED
package/plan.md
CHANGED
|
@@ -45,7 +45,11 @@ Config file has no readers yet. Deferred to Phase 2/3. README updated instead.
|
|
|
45
45
|
- OCR via env var (local Tesseract or remote Azure bridge)
|
|
46
46
|
- Auth via Pi SDK (zero config)
|
|
47
47
|
|
|
48
|
-
**
|
|
49
|
-
- `llm-kb query "question"
|
|
50
|
-
-
|
|
51
|
-
-
|
|
48
|
+
**Phase 2 complete ✅:**
|
|
49
|
+
- `llm-kb query "question"` — auto-detects KB, streams cited answers
|
|
50
|
+
- `--save` flag — research mode, saves to `outputs/`, re-indexes
|
|
51
|
+
- Query mode is read-only (read tool only). Research mode adds bash + write.
|
|
52
|
+
|
|
53
|
+
**Deferred to Phase 4:**
|
|
54
|
+
- Trace logging (JSON per query: question, filesRead, citations, tokens, duration)
|
|
55
|
+
- Needed for eval, but no eval system yet to consume traces
|
package/src/cli.ts
CHANGED
|
@@ -5,6 +5,8 @@ import { scan, summarize } from "./scan.js";
|
|
|
5
5
|
import { parsePDF } from "./pdf.js";
|
|
6
6
|
import { buildIndex } from "./indexer.js";
|
|
7
7
|
import { startWatcher } from "./watcher.js";
|
|
8
|
+
import { query } from "./query.js";
|
|
9
|
+
import { resolveKnowledgeBase } from "./resolve-kb.js";
|
|
8
10
|
import { existsSync } from "node:fs";
|
|
9
11
|
import { mkdir } from "node:fs/promises";
|
|
10
12
|
import { resolve, join } from "node:path";
|
|
@@ -15,14 +17,14 @@ const program = new Command();
|
|
|
15
17
|
program
|
|
16
18
|
.name("llm-kb")
|
|
17
19
|
.description("Drop files into a folder. Get a knowledge base you can query.")
|
|
18
|
-
.version("0.
|
|
20
|
+
.version("0.2.0");
|
|
19
21
|
|
|
20
22
|
program
|
|
21
23
|
.command("run")
|
|
22
24
|
.description("Scan, parse, index, and watch a folder")
|
|
23
25
|
.argument("<folder>", "Path to your documents folder")
|
|
24
26
|
.action(async (folder: string) => {
|
|
25
|
-
console.log(`\n${chalk.bold("llm-kb")} v0.
|
|
27
|
+
console.log(`\n${chalk.bold("llm-kb")} v0.2.0\n`);
|
|
26
28
|
|
|
27
29
|
if (!existsSync(folder)) {
|
|
28
30
|
console.error(chalk.red(`Error: Folder not found: ${folder}`));
|
|
@@ -105,4 +107,26 @@ program
|
|
|
105
107
|
startWatcher({ folder: root, sourcesDir });
|
|
106
108
|
});
|
|
107
109
|
|
|
110
|
+
program
|
|
111
|
+
.command("query")
|
|
112
|
+
.description("Ask a question across your knowledge base")
|
|
113
|
+
.argument("<question>", "Your question")
|
|
114
|
+
.option("--folder <path>", "Path to document folder (auto-detects if omitted)")
|
|
115
|
+
.option("--save", "Save the answer to wiki/outputs/ (research mode)")
|
|
116
|
+
.action(async (question: string, options: { folder?: string; save?: boolean }) => {
|
|
117
|
+
const root = resolveKnowledgeBase(options.folder || process.cwd());
|
|
118
|
+
|
|
119
|
+
if (!root) {
|
|
120
|
+
console.error(chalk.red("No knowledge base found. Run 'llm-kb run <folder>' first."));
|
|
121
|
+
process.exit(1);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
await query(root, question, { save: options.save });
|
|
126
|
+
} catch (err: any) {
|
|
127
|
+
console.error(chalk.red(err.message));
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
108
132
|
program.parse();
|
package/src/query.ts
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createAgentSession,
|
|
3
|
+
createBashTool,
|
|
4
|
+
createReadTool,
|
|
5
|
+
createWriteTool,
|
|
6
|
+
DefaultResourceLoader,
|
|
7
|
+
SessionManager,
|
|
8
|
+
SettingsManager,
|
|
9
|
+
} from "@mariozechner/pi-coding-agent";
|
|
10
|
+
import { readdir, mkdir } from "node:fs/promises";
|
|
11
|
+
import { join, dirname } from "node:path";
|
|
12
|
+
import { fileURLToPath } from "node:url";
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
|
|
16
|
+
function getNodeModulesPath(): string {
|
|
17
|
+
let dir = __dirname;
|
|
18
|
+
for (let i = 0; i < 5; i++) {
|
|
19
|
+
const candidate = join(dir, "node_modules");
|
|
20
|
+
try { return candidate; } catch { dir = dirname(dir); }
|
|
21
|
+
}
|
|
22
|
+
return join(process.cwd(), "node_modules");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function buildQueryAgents(sourceFiles: string[], save: boolean): string {
|
|
26
|
+
const sourceList = sourceFiles.map((f) => ` - ${f}`).join("\n");
|
|
27
|
+
|
|
28
|
+
let content = `# llm-kb Knowledge Base — Query Mode
|
|
29
|
+
|
|
30
|
+
## How to answer questions
|
|
31
|
+
|
|
32
|
+
1. FIRST read .llm-kb/wiki/index.md to understand all available sources
|
|
33
|
+
2. Based on the question, select the most relevant source files (usually 2-5)
|
|
34
|
+
3. Read those source files in full from .llm-kb/wiki/sources/
|
|
35
|
+
4. Answer with inline citations: (filename, page number)
|
|
36
|
+
5. If the answer requires cross-referencing multiple files, read additional ones
|
|
37
|
+
6. If you can't find the answer, say so — don't hallucinate
|
|
38
|
+
|
|
39
|
+
## Available parsed sources
|
|
40
|
+
${sourceList}
|
|
41
|
+
|
|
42
|
+
## Non-PDF files
|
|
43
|
+
If the user's folder has Excel, Word, or PowerPoint files, these libraries are available:
|
|
44
|
+
- **exceljs** — for .xlsx/.xls files
|
|
45
|
+
- **mammoth** — for .docx files
|
|
46
|
+
- **officeparser** — for .pptx files
|
|
47
|
+
Write a quick Node.js script via bash to read them.
|
|
48
|
+
|
|
49
|
+
## Rules
|
|
50
|
+
- Always cite sources with filename and page number
|
|
51
|
+
- Read the FULL source file, not just the beginning
|
|
52
|
+
- Prefer primary sources over previous analyses
|
|
53
|
+
`;
|
|
54
|
+
|
|
55
|
+
if (save) {
|
|
56
|
+
content += `
|
|
57
|
+
## Research Mode
|
|
58
|
+
Save your analysis to .llm-kb/wiki/outputs/ with a descriptive filename (e.g., comparison-analysis.md).
|
|
59
|
+
Include the question at the top and all citations.
|
|
60
|
+
`;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return content;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export async function query(
|
|
67
|
+
folder: string,
|
|
68
|
+
question: string,
|
|
69
|
+
options: { save?: boolean }
|
|
70
|
+
): Promise<void> {
|
|
71
|
+
const sourcesDir = join(folder, ".llm-kb", "wiki", "sources");
|
|
72
|
+
|
|
73
|
+
const files = await readdir(sourcesDir);
|
|
74
|
+
const mdFiles = files.filter((f) => f.endsWith(".md"));
|
|
75
|
+
|
|
76
|
+
if (mdFiles.length === 0) {
|
|
77
|
+
throw new Error("No sources found. Run 'llm-kb run' first to parse documents.");
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (options.save) {
|
|
81
|
+
await mkdir(join(folder, ".llm-kb", "wiki", "outputs"), { recursive: true });
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
process.env.NODE_PATH = getNodeModulesPath();
|
|
85
|
+
|
|
86
|
+
const agentsContent = buildQueryAgents(mdFiles, !!options.save);
|
|
87
|
+
|
|
88
|
+
const loader = new DefaultResourceLoader({
|
|
89
|
+
cwd: folder,
|
|
90
|
+
agentsFilesOverride: (current) => ({
|
|
91
|
+
agentsFiles: [
|
|
92
|
+
...current.agentsFiles,
|
|
93
|
+
{ path: ".llm-kb/AGENTS.md", content: agentsContent },
|
|
94
|
+
],
|
|
95
|
+
}),
|
|
96
|
+
});
|
|
97
|
+
await loader.reload();
|
|
98
|
+
|
|
99
|
+
const tools = [createReadTool(folder)];
|
|
100
|
+
if (options.save) {
|
|
101
|
+
tools.push(createBashTool(folder), createWriteTool(folder));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const { session } = await createAgentSession({
|
|
105
|
+
cwd: folder,
|
|
106
|
+
resourceLoader: loader,
|
|
107
|
+
tools,
|
|
108
|
+
sessionManager: SessionManager.inMemory(),
|
|
109
|
+
settingsManager: SettingsManager.inMemory({
|
|
110
|
+
compaction: { enabled: false },
|
|
111
|
+
}),
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
session.subscribe((event) => {
|
|
115
|
+
if (
|
|
116
|
+
event.type === "message_update" &&
|
|
117
|
+
event.assistantMessageEvent.type === "text_delta"
|
|
118
|
+
) {
|
|
119
|
+
process.stdout.write(event.assistantMessageEvent.delta);
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
await session.prompt(question);
|
|
124
|
+
console.log();
|
|
125
|
+
session.dispose();
|
|
126
|
+
|
|
127
|
+
// Re-index after save so the compounding loop works
|
|
128
|
+
if (options.save) {
|
|
129
|
+
const { buildIndex } = await import("./indexer.js");
|
|
130
|
+
await buildIndex(folder, sourcesDir);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { resolve, join, dirname } from "node:path";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Walk up from startDir looking for a .llm-kb/ directory.
|
|
6
|
+
* Returns the folder containing .llm-kb/, or null if not found.
|
|
7
|
+
*/
|
|
8
|
+
export function resolveKnowledgeBase(startDir: string): string | null {
|
|
9
|
+
let dir = resolve(startDir);
|
|
10
|
+
|
|
11
|
+
while (true) {
|
|
12
|
+
if (existsSync(join(dir, ".llm-kb"))) {
|
|
13
|
+
return dir;
|
|
14
|
+
}
|
|
15
|
+
const parent = dirname(dir);
|
|
16
|
+
if (parent === dir) return null;
|
|
17
|
+
dir = parent;
|
|
18
|
+
}
|
|
19
|
+
}
|