@vpxa/kb 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/analyzers/dist/blast-radius-analyzer.js +13 -114
- package/packages/analyzers/dist/dependency-analyzer.js +11 -425
- package/packages/analyzers/dist/diagram-generator.js +4 -86
- package/packages/analyzers/dist/entry-point-analyzer.js +5 -239
- package/packages/analyzers/dist/index.js +1 -23
- package/packages/analyzers/dist/knowledge-producer.js +24 -113
- package/packages/analyzers/dist/pattern-analyzer.js +5 -359
- package/packages/analyzers/dist/regex-call-graph.js +1 -428
- package/packages/analyzers/dist/structure-analyzer.js +4 -258
- package/packages/analyzers/dist/symbol-analyzer.js +13 -442
- package/packages/analyzers/dist/ts-call-graph.js +1 -160
- package/packages/analyzers/dist/types.js +0 -1
- package/packages/chunker/dist/call-graph-extractor.js +1 -90
- package/packages/chunker/dist/chunker-factory.js +1 -36
- package/packages/chunker/dist/chunker.interface.js +0 -1
- package/packages/chunker/dist/code-chunker.js +14 -134
- package/packages/chunker/dist/generic-chunker.js +5 -72
- package/packages/chunker/dist/index.js +1 -21
- package/packages/chunker/dist/markdown-chunker.js +7 -119
- package/packages/chunker/dist/treesitter-chunker.js +8 -234
- package/packages/cli/dist/commands/analyze.js +3 -112
- package/packages/cli/dist/commands/context-cmds.js +1 -155
- package/packages/cli/dist/commands/environment.js +2 -204
- package/packages/cli/dist/commands/execution.js +1 -137
- package/packages/cli/dist/commands/graph.js +7 -81
- package/packages/cli/dist/commands/init.js +9 -87
- package/packages/cli/dist/commands/knowledge.js +1 -139
- package/packages/cli/dist/commands/search.js +8 -267
- package/packages/cli/dist/commands/system.js +4 -241
- package/packages/cli/dist/commands/workspace.js +2 -388
- package/packages/cli/dist/context.js +1 -14
- package/packages/cli/dist/helpers.js +3 -458
- package/packages/cli/dist/index.js +3 -69
- package/packages/cli/dist/kb-init.js +1 -82
- package/packages/cli/dist/types.js +0 -1
- package/packages/core/dist/constants.js +1 -43
- package/packages/core/dist/content-detector.js +1 -79
- package/packages/core/dist/errors.js +1 -40
- package/packages/core/dist/index.js +1 -9
- package/packages/core/dist/logger.js +1 -34
- package/packages/core/dist/types.js +0 -1
- package/packages/embeddings/dist/embedder.interface.js +0 -1
- package/packages/embeddings/dist/index.js +1 -5
- package/packages/embeddings/dist/onnx-embedder.js +1 -82
- package/packages/indexer/dist/file-hasher.js +1 -13
- package/packages/indexer/dist/filesystem-crawler.js +1 -125
- package/packages/indexer/dist/graph-extractor.js +1 -111
- package/packages/indexer/dist/incremental-indexer.js +1 -278
- package/packages/indexer/dist/index.js +1 -14
- package/packages/server/dist/api.js +1 -9
- package/packages/server/dist/config.js +1 -75
- package/packages/server/dist/curated-manager.js +9 -356
- package/packages/server/dist/index.js +1 -134
- package/packages/server/dist/replay-interceptor.js +1 -38
- package/packages/server/dist/resources/resources.js +2 -40
- package/packages/server/dist/server.js +1 -247
- package/packages/server/dist/tools/analyze.tools.js +1 -288
- package/packages/server/dist/tools/forge.tools.js +11 -499
- package/packages/server/dist/tools/forget.tool.js +3 -39
- package/packages/server/dist/tools/graph.tool.js +5 -110
- package/packages/server/dist/tools/list.tool.js +5 -53
- package/packages/server/dist/tools/lookup.tool.js +8 -51
- package/packages/server/dist/tools/onboard.tool.js +2 -112
- package/packages/server/dist/tools/produce.tool.js +4 -74
- package/packages/server/dist/tools/read.tool.js +4 -47
- package/packages/server/dist/tools/reindex.tool.js +2 -70
- package/packages/server/dist/tools/remember.tool.js +3 -42
- package/packages/server/dist/tools/replay.tool.js +6 -88
- package/packages/server/dist/tools/search.tool.js +17 -327
- package/packages/server/dist/tools/status.tool.js +3 -68
- package/packages/server/dist/tools/toolkit.tools.js +20 -1673
- package/packages/server/dist/tools/update.tool.js +3 -39
- package/packages/server/dist/tools/utility.tools.js +19 -456
- package/packages/store/dist/graph-store.interface.js +0 -1
- package/packages/store/dist/index.js +1 -9
- package/packages/store/dist/lance-store.js +1 -258
- package/packages/store/dist/sqlite-graph-store.js +8 -309
- package/packages/store/dist/store-factory.js +1 -14
- package/packages/store/dist/store.interface.js +0 -1
- package/packages/tools/dist/batch.js +1 -45
- package/packages/tools/dist/changelog.js +2 -112
- package/packages/tools/dist/check.js +2 -59
- package/packages/tools/dist/checkpoint.js +2 -43
- package/packages/tools/dist/codemod.js +2 -69
- package/packages/tools/dist/compact.js +3 -60
- package/packages/tools/dist/data-transform.js +1 -124
- package/packages/tools/dist/dead-symbols.js +2 -71
- package/packages/tools/dist/delegate.js +3 -128
- package/packages/tools/dist/diff-parse.js +3 -153
- package/packages/tools/dist/digest.js +7 -242
- package/packages/tools/dist/encode.js +1 -46
- package/packages/tools/dist/env-info.js +1 -58
- package/packages/tools/dist/eval.js +3 -79
- package/packages/tools/dist/evidence-map.js +3 -203
- package/packages/tools/dist/file-summary.js +2 -106
- package/packages/tools/dist/file-walk.js +1 -75
- package/packages/tools/dist/find-examples.js +3 -48
- package/packages/tools/dist/find.js +1 -120
- package/packages/tools/dist/forge-classify.js +2 -319
- package/packages/tools/dist/forge-ground.js +1 -184
- package/packages/tools/dist/git-context.js +3 -46
- package/packages/tools/dist/graph-query.js +1 -194
- package/packages/tools/dist/health.js +1 -118
- package/packages/tools/dist/http-request.js +1 -58
- package/packages/tools/dist/index.js +1 -273
- package/packages/tools/dist/lane.js +7 -227
- package/packages/tools/dist/measure.js +2 -119
- package/packages/tools/dist/onboard.js +42 -1136
- package/packages/tools/dist/parse-output.js +2 -158
- package/packages/tools/dist/process-manager.js +1 -69
- package/packages/tools/dist/queue.js +2 -126
- package/packages/tools/dist/regex-test.js +1 -39
- package/packages/tools/dist/rename.js +2 -70
- package/packages/tools/dist/replay.js +6 -108
- package/packages/tools/dist/schema-validate.js +1 -141
- package/packages/tools/dist/scope-map.js +1 -72
- package/packages/tools/dist/snippet.js +1 -80
- package/packages/tools/dist/stash.js +2 -60
- package/packages/tools/dist/stratum-card.js +5 -238
- package/packages/tools/dist/symbol.js +3 -87
- package/packages/tools/dist/test-run.js +2 -55
- package/packages/tools/dist/text-utils.js +2 -31
- package/packages/tools/dist/time-utils.js +1 -135
- package/packages/tools/dist/trace.js +2 -114
- package/packages/tools/dist/truncation.js +10 -41
- package/packages/tools/dist/watch.js +1 -61
- package/packages/tools/dist/web-fetch.js +9 -244
- package/packages/tools/dist/web-search.js +1 -46
- package/packages/tools/dist/workset.js +2 -77
- package/packages/tui/dist/App.js +260 -52468
- package/packages/tui/dist/index.js +286 -54551
- package/packages/tui/dist/panels/CuratedPanel.js +211 -34291
- package/packages/tui/dist/panels/LogPanel.js +259 -51703
- package/packages/tui/dist/panels/SearchPanel.js +212 -34824
- package/packages/tui/dist/panels/StatusPanel.js +211 -34304
|
@@ -1,278 +1 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { createChunker } from "@kb/chunker";
|
|
3
|
-
import { detectContentType } from "@kb/core";
|
|
4
|
-
import { generateRecordId, hashContent } from "./file-hasher.js";
|
|
5
|
-
import { FilesystemCrawler } from "./filesystem-crawler.js";
|
|
6
|
-
import { extractGraph } from "./graph-extractor.js";
|
|
7
|
-
async function runConcurrent(items, fn, concurrency, onError) {
|
|
8
|
-
let nextIndex = 0;
|
|
9
|
-
async function worker() {
|
|
10
|
-
while (nextIndex < items.length) {
|
|
11
|
-
const i = nextIndex++;
|
|
12
|
-
try {
|
|
13
|
-
await fn(items[i]);
|
|
14
|
-
} catch (err) {
|
|
15
|
-
onError?.(items[i], err);
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, () => worker()));
|
|
20
|
-
}
|
|
21
|
-
const DEFAULT_CONCURRENCY = Math.max(1, Math.floor(availableParallelism() / 2));
|
|
22
|
-
class IncrementalIndexer {
|
|
23
|
-
constructor(embedder, store) {
|
|
24
|
-
this.embedder = embedder;
|
|
25
|
-
this.store = store;
|
|
26
|
-
this.crawler = new FilesystemCrawler();
|
|
27
|
-
}
|
|
28
|
-
crawler;
|
|
29
|
-
indexing = false;
|
|
30
|
-
graphStore;
|
|
31
|
-
/** Set the graph store for auto-population during indexing and cleanup on re-index. */
|
|
32
|
-
setGraphStore(graphStore) {
|
|
33
|
-
this.graphStore = graphStore;
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Index all configured sources. Only re-indexes files that have changed.
|
|
37
|
-
* Sources are crawled in parallel, and file processing runs concurrently
|
|
38
|
-
* up to `config.indexing.concurrency` (default: half of CPU cores).
|
|
39
|
-
*/
|
|
40
|
-
async index(config, onProgress) {
|
|
41
|
-
if (this.indexing) {
|
|
42
|
-
throw new Error("Indexing is already in progress");
|
|
43
|
-
}
|
|
44
|
-
this.indexing = true;
|
|
45
|
-
try {
|
|
46
|
-
return await this.doIndex(config, onProgress, {});
|
|
47
|
-
} finally {
|
|
48
|
-
this.indexing = false;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
async doIndex(config, onProgress, opts = {}) {
|
|
52
|
-
const startTime = Date.now();
|
|
53
|
-
let filesProcessed = 0;
|
|
54
|
-
let filesSkipped = 0;
|
|
55
|
-
let chunksCreated = 0;
|
|
56
|
-
let filesRemoved = 0;
|
|
57
|
-
const concurrency = config.indexing.concurrency ?? DEFAULT_CONCURRENCY;
|
|
58
|
-
onProgress?.({
|
|
59
|
-
phase: "crawling",
|
|
60
|
-
filesTotal: 0,
|
|
61
|
-
filesProcessed: 0,
|
|
62
|
-
chunksTotal: 0,
|
|
63
|
-
chunksProcessed: 0
|
|
64
|
-
});
|
|
65
|
-
const crawlResults = await Promise.all(
|
|
66
|
-
config.sources.map(
|
|
67
|
-
(source) => this.crawler.crawl({
|
|
68
|
-
rootDir: source.path,
|
|
69
|
-
excludePatterns: source.excludePatterns
|
|
70
|
-
})
|
|
71
|
-
)
|
|
72
|
-
);
|
|
73
|
-
const allFiles = crawlResults.flat();
|
|
74
|
-
let filesToProcess;
|
|
75
|
-
let pathsToRemove;
|
|
76
|
-
if (opts.skipHashCheck) {
|
|
77
|
-
filesToProcess = allFiles;
|
|
78
|
-
pathsToRemove = [];
|
|
79
|
-
} else {
|
|
80
|
-
const existingPaths = await this.store.listSourcePaths();
|
|
81
|
-
const currentPathSet = new Set(allFiles.map((f) => f.relativePath));
|
|
82
|
-
pathsToRemove = existingPaths.filter(
|
|
83
|
-
(p) => !currentPathSet.has(p) && !p.startsWith("curated/")
|
|
84
|
-
);
|
|
85
|
-
filesToProcess = [];
|
|
86
|
-
await runConcurrent(
|
|
87
|
-
allFiles,
|
|
88
|
-
async (file) => {
|
|
89
|
-
const contentHash = hashContent(file.content);
|
|
90
|
-
const existingRecords = await this.store.getBySourcePath(file.relativePath);
|
|
91
|
-
if (existingRecords.length > 0 && existingRecords[0].fileHash === contentHash) {
|
|
92
|
-
filesSkipped++;
|
|
93
|
-
return;
|
|
94
|
-
}
|
|
95
|
-
filesToProcess.push(file);
|
|
96
|
-
},
|
|
97
|
-
concurrency,
|
|
98
|
-
(file, err) => console.error(`[indexer] hash check failed for ${file.relativePath}:`, err)
|
|
99
|
-
);
|
|
100
|
-
}
|
|
101
|
-
const totalFiles = filesToProcess.length;
|
|
102
|
-
const GRAPH_FLUSH_INTERVAL = 50;
|
|
103
|
-
let pendingNodes = [];
|
|
104
|
-
let pendingEdges = [];
|
|
105
|
-
let graphFilesAccumulated = 0;
|
|
106
|
-
const flushGraph = async () => {
|
|
107
|
-
if (!this.graphStore) return;
|
|
108
|
-
try {
|
|
109
|
-
if (pendingNodes.length > 0) {
|
|
110
|
-
await this.graphStore.upsertNodes(pendingNodes);
|
|
111
|
-
}
|
|
112
|
-
if (pendingEdges.length > 0) {
|
|
113
|
-
await this.graphStore.upsertEdges(pendingEdges);
|
|
114
|
-
}
|
|
115
|
-
} catch (err) {
|
|
116
|
-
console.error("[indexer] graph batch flush failed:", err);
|
|
117
|
-
}
|
|
118
|
-
pendingNodes = [];
|
|
119
|
-
pendingEdges = [];
|
|
120
|
-
graphFilesAccumulated = 0;
|
|
121
|
-
};
|
|
122
|
-
await runConcurrent(
|
|
123
|
-
filesToProcess,
|
|
124
|
-
async (file) => {
|
|
125
|
-
onProgress?.({
|
|
126
|
-
phase: "chunking",
|
|
127
|
-
filesTotal: totalFiles,
|
|
128
|
-
filesProcessed,
|
|
129
|
-
chunksTotal: chunksCreated,
|
|
130
|
-
chunksProcessed: chunksCreated,
|
|
131
|
-
currentFile: file.relativePath
|
|
132
|
-
});
|
|
133
|
-
const contentType = detectContentType(file.relativePath);
|
|
134
|
-
const chunker = createChunker(file.extension);
|
|
135
|
-
const chunks = chunker.chunk(file.content, {
|
|
136
|
-
sourcePath: file.relativePath,
|
|
137
|
-
contentType
|
|
138
|
-
});
|
|
139
|
-
if (chunks.length === 0) return;
|
|
140
|
-
onProgress?.({
|
|
141
|
-
phase: "embedding",
|
|
142
|
-
filesTotal: totalFiles,
|
|
143
|
-
filesProcessed,
|
|
144
|
-
chunksTotal: chunksCreated + chunks.length,
|
|
145
|
-
chunksProcessed: chunksCreated,
|
|
146
|
-
currentFile: file.relativePath
|
|
147
|
-
});
|
|
148
|
-
const vectors = await this.embedder.embedBatch(chunks.map((c) => c.text));
|
|
149
|
-
const fileHash = hashContent(file.content);
|
|
150
|
-
const records = chunks.map((chunk, i) => ({
|
|
151
|
-
id: generateRecordId(file.relativePath, i),
|
|
152
|
-
content: chunk.text,
|
|
153
|
-
sourcePath: chunk.sourcePath,
|
|
154
|
-
contentType: chunk.contentType,
|
|
155
|
-
headingPath: chunk.headingPath,
|
|
156
|
-
chunkIndex: chunk.chunkIndex,
|
|
157
|
-
totalChunks: chunk.totalChunks,
|
|
158
|
-
startLine: chunk.startLine,
|
|
159
|
-
endLine: chunk.endLine,
|
|
160
|
-
fileHash,
|
|
161
|
-
indexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
162
|
-
origin: "indexed",
|
|
163
|
-
tags: [],
|
|
164
|
-
version: 1
|
|
165
|
-
}));
|
|
166
|
-
onProgress?.({
|
|
167
|
-
phase: "storing",
|
|
168
|
-
filesTotal: totalFiles,
|
|
169
|
-
filesProcessed,
|
|
170
|
-
chunksTotal: chunksCreated + chunks.length,
|
|
171
|
-
chunksProcessed: chunksCreated,
|
|
172
|
-
currentFile: file.relativePath
|
|
173
|
-
});
|
|
174
|
-
await this.store.upsert(records, vectors);
|
|
175
|
-
if (this.graphStore) {
|
|
176
|
-
try {
|
|
177
|
-
if (!opts.graphCleared) {
|
|
178
|
-
await this.graphStore.deleteBySourcePath(file.relativePath);
|
|
179
|
-
}
|
|
180
|
-
const graph = extractGraph(file.content, file.relativePath);
|
|
181
|
-
if (graph.nodes.length > 0) pendingNodes.push(...graph.nodes);
|
|
182
|
-
if (graph.edges.length > 0) pendingEdges.push(...graph.edges);
|
|
183
|
-
graphFilesAccumulated++;
|
|
184
|
-
if (graphFilesAccumulated >= GRAPH_FLUSH_INTERVAL) {
|
|
185
|
-
await flushGraph();
|
|
186
|
-
}
|
|
187
|
-
} catch (err) {
|
|
188
|
-
console.error(`[indexer] graph extraction failed for ${file.relativePath}:`, err);
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
filesProcessed++;
|
|
192
|
-
chunksCreated += chunks.length;
|
|
193
|
-
},
|
|
194
|
-
concurrency,
|
|
195
|
-
(file, err) => console.error(`[indexer] processing failed for ${file.relativePath}:`, err)
|
|
196
|
-
);
|
|
197
|
-
await flushGraph();
|
|
198
|
-
if (pathsToRemove.length > 0) {
|
|
199
|
-
onProgress?.({
|
|
200
|
-
phase: "cleanup",
|
|
201
|
-
filesTotal: totalFiles,
|
|
202
|
-
filesProcessed,
|
|
203
|
-
chunksTotal: chunksCreated,
|
|
204
|
-
chunksProcessed: chunksCreated
|
|
205
|
-
});
|
|
206
|
-
await runConcurrent(
|
|
207
|
-
pathsToRemove,
|
|
208
|
-
async (path) => {
|
|
209
|
-
await this.store.deleteBySourcePath(path);
|
|
210
|
-
if (this.graphStore) {
|
|
211
|
-
await this.graphStore.deleteBySourcePath(path).catch((err) => console.error(`[indexer] graph cleanup failed for ${path}:`, err));
|
|
212
|
-
}
|
|
213
|
-
filesRemoved++;
|
|
214
|
-
},
|
|
215
|
-
concurrency,
|
|
216
|
-
(path, err) => console.error(`[indexer] cleanup failed for ${path}:`, err)
|
|
217
|
-
);
|
|
218
|
-
}
|
|
219
|
-
onProgress?.({
|
|
220
|
-
phase: "done",
|
|
221
|
-
filesTotal: totalFiles,
|
|
222
|
-
filesProcessed,
|
|
223
|
-
chunksTotal: chunksCreated,
|
|
224
|
-
chunksProcessed: chunksCreated
|
|
225
|
-
});
|
|
226
|
-
return {
|
|
227
|
-
filesProcessed,
|
|
228
|
-
filesSkipped,
|
|
229
|
-
chunksCreated,
|
|
230
|
-
filesRemoved,
|
|
231
|
-
durationMs: Date.now() - startTime
|
|
232
|
-
};
|
|
233
|
-
}
|
|
234
|
-
/**
|
|
235
|
-
* Force re-index all files (ignoring hashes).
|
|
236
|
-
*/
|
|
237
|
-
async reindexAll(config, onProgress) {
|
|
238
|
-
await this.store.dropTable();
|
|
239
|
-
if (this.graphStore) {
|
|
240
|
-
try {
|
|
241
|
-
const stats = await this.graphStore.getStats();
|
|
242
|
-
if (stats.nodeCount > 0) {
|
|
243
|
-
await this.graphStore.clear();
|
|
244
|
-
console.error(
|
|
245
|
-
`[indexer] Graph store cleared (was ${stats.nodeCount} nodes, ${stats.edgeCount} edges)`
|
|
246
|
-
);
|
|
247
|
-
}
|
|
248
|
-
} catch (err) {
|
|
249
|
-
console.error("[indexer] Graph store clear failed:", err);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
return this.doReindex(config, onProgress);
|
|
253
|
-
}
|
|
254
|
-
async doReindex(config, onProgress) {
|
|
255
|
-
if (this.indexing) {
|
|
256
|
-
throw new Error("Indexing is already in progress");
|
|
257
|
-
}
|
|
258
|
-
this.indexing = true;
|
|
259
|
-
try {
|
|
260
|
-
return await this.doIndex(config, onProgress, {
|
|
261
|
-
skipHashCheck: true,
|
|
262
|
-
graphCleared: true
|
|
263
|
-
});
|
|
264
|
-
} finally {
|
|
265
|
-
this.indexing = false;
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
/**
|
|
269
|
-
* Get current index statistics.
|
|
270
|
-
*/
|
|
271
|
-
async getStats() {
|
|
272
|
-
return this.store.getStats();
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
export {
|
|
276
|
-
IncrementalIndexer
|
|
277
|
-
};
|
|
278
|
-
//# sourceMappingURL=incremental-indexer.js.map
|
|
1
|
+
import{availableParallelism as G}from"node:os";import{createChunker as F}from"@kb/chunker";import{detectContentType as B}from"@kb/core";import{generateRecordId as K,hashContent as b}from"./file-hasher.js";import{FilesystemCrawler as H}from"./filesystem-crawler.js";import{extractGraph as A}from"./graph-extractor.js";async function S(c,s,r,o){let g=0;async function l(){for(;g<c.length;){const d=g++;try{await s(c[d])}catch(a){o?.(c[d],a)}}}await Promise.all(Array.from({length:Math.min(r,c.length)},()=>l()))}const L=Math.max(1,Math.floor(G()/2));class V{constructor(s,r){this.embedder=s;this.store=r;this.crawler=new H}crawler;indexing=!1;graphStore;setGraphStore(s){this.graphStore=s}async index(s,r){if(this.indexing)throw new Error("Indexing is already in progress");this.indexing=!0;try{return await this.doIndex(s,r,{})}finally{this.indexing=!1}}async doIndex(s,r,o={}){const g=Date.now();let l=0,d=0,a=0,C=0;const w=s.indexing.concurrency??L;r?.({phase:"crawling",filesTotal:0,filesProcessed:0,chunksTotal:0,chunksProcessed:0});const y=(await Promise.all(s.sources.map(e=>this.crawler.crawl({rootDir:e.path,excludePatterns:e.excludePatterns})))).flat();let p,f;if(o.skipHashCheck)p=y,f=[];else{const e=await this.store.listSourcePaths(),h=new Set(y.map(n=>n.relativePath));f=e.filter(n=>!h.has(n)&&!n.startsWith("curated/")),p=[],await S(y,async n=>{const i=b(n.content),P=await this.store.getBySourcePath(n.relativePath);if(P.length>0&&P[0].fileHash===i){d++;return}p.push(n)},w,(n,i)=>console.error(`[indexer] hash check failed for ${n.relativePath}:`,i))}const u=p.length,I=50;let m=[],x=[],k=0;const v=async()=>{if(this.graphStore){try{m.length>0&&await this.graphStore.upsertNodes(m),x.length>0&&await this.graphStore.upsertEdges(x)}catch(e){console.error("[indexer] graph batch flush failed:",e)}m=[],x=[],k=0}};return await S(p,async e=>{r?.({phase:"chunking",filesTotal:u,filesProcessed:l,chunksTotal:a,chunksProcessed:a,currentFile:e.relativePath});const h=B(e.relativePath),i=F(e.extension).chunk(e.content,{sourcePath:e.relativePath,contentType:h});if(i.length===0)return;r?.({phase:"embedding",filesTotal:u,filesProcessed:l,chunksTotal:a+i.length,chunksProcessed:a,currentFile:e.relativePath});const P=await this.embedder.embedBatch(i.map(t=>t.text)),T=b(e.content),R=i.map((t,E)=>({id:K(e.relativePath,E),content:t.text,sourcePath:t.sourcePath,contentType:t.contentType,headingPath:t.headingPath,chunkIndex:t.chunkIndex,totalChunks:t.totalChunks,startLine:t.startLine,endLine:t.endLine,fileHash:T,indexedAt:new Date().toISOString(),origin:"indexed",tags:[],version:1}));if(r?.({phase:"storing",filesTotal:u,filesProcessed:l,chunksTotal:a+i.length,chunksProcessed:a,currentFile:e.relativePath}),await this.store.upsert(R,P),this.graphStore)try{o.graphCleared||await this.graphStore.deleteBySourcePath(e.relativePath);const t=A(e.content,e.relativePath);t.nodes.length>0&&m.push(...t.nodes),t.edges.length>0&&x.push(...t.edges),k++,k>=I&&await v()}catch(t){console.error(`[indexer] graph extraction failed for ${e.relativePath}:`,t)}l++,a+=i.length},w,(e,h)=>console.error(`[indexer] processing failed for ${e.relativePath}:`,h)),await v(),f.length>0&&(r?.({phase:"cleanup",filesTotal:u,filesProcessed:l,chunksTotal:a,chunksProcessed:a}),await S(f,async e=>{await this.store.deleteBySourcePath(e),this.graphStore&&await this.graphStore.deleteBySourcePath(e).catch(h=>console.error(`[indexer] graph cleanup failed for ${e}:`,h)),C++},w,(e,h)=>console.error(`[indexer] cleanup failed for ${e}:`,h))),r?.({phase:"done",filesTotal:u,filesProcessed:l,chunksTotal:a,chunksProcessed:a}),{filesProcessed:l,filesSkipped:d,chunksCreated:a,filesRemoved:C,durationMs:Date.now()-g}}async reindexAll(s,r){if(await this.store.dropTable(),this.graphStore)try{const o=await this.graphStore.getStats();o.nodeCount>0&&(await this.graphStore.clear(),console.error(`[indexer] Graph store cleared (was ${o.nodeCount} nodes, ${o.edgeCount} edges)`))}catch(o){console.error("[indexer] Graph store clear failed:",o)}return this.doReindex(s,r)}async doReindex(s,r){if(this.indexing)throw new Error("Indexing is already in progress");this.indexing=!0;try{return await this.doIndex(s,r,{skipHashCheck:!0,graphCleared:!0})}finally{this.indexing=!1}}async getStats(){return this.store.getStats()}}export{V as IncrementalIndexer};
|
|
@@ -1,14 +1 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { FilesystemCrawler } from "./filesystem-crawler.js";
|
|
3
|
-
import { extractGraph } from "./graph-extractor.js";
|
|
4
|
-
import {
|
|
5
|
-
IncrementalIndexer
|
|
6
|
-
} from "./incremental-indexer.js";
|
|
7
|
-
export {
|
|
8
|
-
FilesystemCrawler,
|
|
9
|
-
IncrementalIndexer,
|
|
10
|
-
extractGraph,
|
|
11
|
-
generateRecordId,
|
|
12
|
-
hashContent
|
|
13
|
-
};
|
|
14
|
-
//# sourceMappingURL=index.js.map
|
|
1
|
+
import{generateRecordId as t,hashContent as o}from"./file-hasher.js";import{FilesystemCrawler as a}from"./filesystem-crawler.js";import{extractGraph as l}from"./graph-extractor.js";import{IncrementalIndexer as x}from"./incremental-indexer.js";export{a as FilesystemCrawler,x as IncrementalIndexer,l as extractGraph,t as generateRecordId,o as hashContent};
|
|
@@ -1,9 +1 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { createMcpServer, createServer, initializeKnowledgeBase } from "./server.js";
|
|
3
|
-
export {
|
|
4
|
-
createMcpServer,
|
|
5
|
-
createServer,
|
|
6
|
-
initializeKnowledgeBase,
|
|
7
|
-
loadConfig
|
|
8
|
-
};
|
|
9
|
-
//# sourceMappingURL=api.js.map
|
|
1
|
+
import{loadConfig as o}from"./config.js";import{createMcpServer as a,createServer as n,initializeKnowledgeBase as p}from"./server.js";export{a as createMcpServer,n as createServer,p as initializeKnowledgeBase,o as loadConfig};
|
|
@@ -1,75 +1 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { dirname, resolve } from "node:path";
|
|
3
|
-
import { fileURLToPath } from "node:url";
|
|
4
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
5
|
-
function guardContainedPath(resolved, root, label) {
|
|
6
|
-
const normalizedResolved = resolve(resolved);
|
|
7
|
-
const normalizedRoot = resolve(root);
|
|
8
|
-
if (!normalizedResolved.startsWith(normalizedRoot)) {
|
|
9
|
-
throw new Error(
|
|
10
|
-
`Config ${label} path escapes workspace root: ${resolved} is not under ${root}`
|
|
11
|
-
);
|
|
12
|
-
}
|
|
13
|
-
return normalizedResolved;
|
|
14
|
-
}
|
|
15
|
-
function loadConfig() {
|
|
16
|
-
const configPath = process.env.KB_CONFIG_PATH ?? (existsSync(resolve(process.cwd(), "kb.config.json")) ? resolve(process.cwd(), "kb.config.json") : resolve(__dirname, "..", "..", "..", "kb.config.json"));
|
|
17
|
-
try {
|
|
18
|
-
const raw = readFileSync(configPath, "utf-8");
|
|
19
|
-
const config = JSON.parse(raw);
|
|
20
|
-
if (!config.sources || !Array.isArray(config.sources) || config.sources.length === 0) {
|
|
21
|
-
throw new Error("Config must have at least one source");
|
|
22
|
-
}
|
|
23
|
-
if (!config.store?.path) {
|
|
24
|
-
throw new Error("Config must specify store.path");
|
|
25
|
-
}
|
|
26
|
-
const workspaceRoot = dirname(configPath);
|
|
27
|
-
config.sources = config.sources.map((source) => ({
|
|
28
|
-
...source,
|
|
29
|
-
path: guardContainedPath(resolve(workspaceRoot, source.path), workspaceRoot, "source")
|
|
30
|
-
}));
|
|
31
|
-
config.store.path = guardContainedPath(
|
|
32
|
-
resolve(workspaceRoot, config.store.path),
|
|
33
|
-
workspaceRoot,
|
|
34
|
-
"store"
|
|
35
|
-
);
|
|
36
|
-
config.curated = config.curated ?? { path: "curated" };
|
|
37
|
-
config.curated.path = guardContainedPath(
|
|
38
|
-
resolve(workspaceRoot, config.curated.path),
|
|
39
|
-
workspaceRoot,
|
|
40
|
-
"curated"
|
|
41
|
-
);
|
|
42
|
-
return config;
|
|
43
|
-
} catch (err) {
|
|
44
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
45
|
-
console.error(`[KB] Failed to load config from ${configPath}: ${message}`);
|
|
46
|
-
console.error("[KB] Falling back to default configuration");
|
|
47
|
-
return getDefaultConfig();
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
function getDefaultConfig() {
|
|
51
|
-
const workspaceRoot = process.env.KB_WORKSPACE_ROOT ?? process.cwd();
|
|
52
|
-
return {
|
|
53
|
-
sources: [
|
|
54
|
-
{
|
|
55
|
-
path: workspaceRoot,
|
|
56
|
-
excludePatterns: [
|
|
57
|
-
"node_modules/**",
|
|
58
|
-
"dist/**",
|
|
59
|
-
".git/**",
|
|
60
|
-
"coverage/**",
|
|
61
|
-
"*.lock",
|
|
62
|
-
"pnpm-lock.yaml"
|
|
63
|
-
]
|
|
64
|
-
}
|
|
65
|
-
],
|
|
66
|
-
indexing: { chunkSize: 1500, chunkOverlap: 200, minChunkSize: 100 },
|
|
67
|
-
embedding: { model: "mixedbread-ai/mxbai-embed-large-v1", dimensions: 1024 },
|
|
68
|
-
store: { backend: "lancedb", path: resolve(workspaceRoot, ".kb-data") },
|
|
69
|
-
curated: { path: resolve(workspaceRoot, "curated") }
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
export {
|
|
73
|
-
loadConfig
|
|
74
|
-
};
|
|
75
|
-
//# sourceMappingURL=config.js.map
|
|
1
|
+
import{existsSync as i,readFileSync as d}from"node:fs";import{dirname as c,resolve as e}from"node:path";import{fileURLToPath as u}from"node:url";const p=c(u(import.meta.url));function a(r,n,o){const t=e(r),s=e(n);if(!t.startsWith(s))throw new Error(`Config ${o} path escapes workspace root: ${r} is not under ${n}`);return t}function l(){const r=process.env.KB_CONFIG_PATH??(i(e(process.cwd(),"kb.config.json"))?e(process.cwd(),"kb.config.json"):e(p,"..","..","..","kb.config.json"));try{const n=d(r,"utf-8"),o=JSON.parse(n);if(!o.sources||!Array.isArray(o.sources)||o.sources.length===0)throw new Error("Config must have at least one source");if(!o.store?.path)throw new Error("Config must specify store.path");const t=c(r);return o.sources=o.sources.map(s=>({...s,path:a(e(t,s.path),t,"source")})),o.store.path=a(e(t,o.store.path),t,"store"),o.curated=o.curated??{path:"curated"},o.curated.path=a(e(t,o.curated.path),t,"curated"),o}catch(n){const o=n instanceof Error?n.message:String(n);return console.error(`[KB] Failed to load config from ${r}: ${o}`),console.error("[KB] Falling back to default configuration"),f()}}function f(){const r=process.env.KB_WORKSPACE_ROOT??process.cwd();return{sources:[{path:r,excludePatterns:["node_modules/**","dist/**",".git/**","coverage/**","*.lock","pnpm-lock.yaml"]}],indexing:{chunkSize:1500,chunkOverlap:200,minChunkSize:100},embedding:{model:"mixedbread-ai/mxbai-embed-large-v1",dimensions:1024},store:{backend:"lancedb",path:e(r,".kb-data")},curated:{path:e(r,"curated")}}}export{l as loadConfig};
|