xindex 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.ai/research/2026-04-10-file-watching.md +79 -0
- package/.ai/research/2026-04-10-mcp-output-format.md +129 -0
- package/.ai/task/INDEX.md +12 -0
- package/.ai/task/done/INDEX.md +3 -0
- package/.ai/task/done/task.2026-04-09-local-ai-research-protos.log.md +98 -0
- package/.ai/task/done/task.2026-04-09-local-ai-research-protos.md +102 -0
- package/.ai/task/task.2026-04-10-cluster-config.log.md +19 -0
- package/.ai/task/task.2026-04-10-cluster-config.md +118 -0
- package/.ai/task/task.2026-04-10-dir-indexing.log.md +8 -0
- package/.ai/task/task.2026-04-10-dir-indexing.md +92 -0
- package/.ai/task/task.2026-04-10-line-clustering.log.md +50 -0
- package/.ai/task/task.2026-04-10-line-clustering.md +176 -0
- package/.ai/task/task.2026-04-10-object-store.log.md +7 -0
- package/.ai/task/task.2026-04-10-object-store.md +81 -0
- package/.ai/task/task.2026-04-10-search-config.log.md +46 -0
- package/.ai/task/task.2026-04-10-search-config.md +274 -0
- package/.ai/task/task.2026-04-10-watch-indexing.log.md +32 -0
- package/.ai/task/task.2026-04-10-watch-indexing.md +101 -0
- package/.ai/task/task.2026-04-10-xindex-mcp.log.md +5 -0
- package/.ai/task/task.2026-04-10-xindex-mcp.md +92 -0
- package/.ai/task/task.2026-04-10-xindex-mcp.report.md +113 -0
- package/.claude/settings.local.json +73 -0
- package/.claude/skills/make-hof/SKILL.md +8 -0
- package/.claude/skills/make-hof/playbook.md +38 -0
- package/.cursor/mcp.json +8 -0
- package/.mcp.json +8 -0
- package/.xindex.json +22 -0
- package/CLAUDE.md +54 -0
- package/README.md +206 -0
- package/apps/indexApp.ts +31 -0
- package/apps/mcpApp.ts +119 -0
- package/apps/run.index.ts +19 -0
- package/apps/run.mcp.ts +49 -0
- package/apps/run.reset.ts +10 -0
- package/apps/run.search.ts +21 -0
- package/apps/run.watch.ts +44 -0
- package/apps/searchApp.ts +9 -0
- package/apps/watchApp.ts +53 -0
- package/apps/watchFileEventsApp.ts +39 -0
- package/bin/xindex-index +2 -0
- package/bin/xindex-mcp +2 -0
- package/bin/xindex-reset +2 -0
- package/bin/xindex-search +2 -0
- package/bin/xindex-watch +2 -0
- package/componets/IType.ts +1 -0
- package/componets/appId.ts +3 -0
- package/componets/buildComponents.ts +27 -0
- package/componets/config/loadConfig.ts +43 -0
- package/componets/config/xindexConfig.ts +4 -0
- package/componets/index/contentIndexDriver.ts +39 -0
- package/componets/index/formatSearchResults.ts +18 -0
- package/componets/index/getIndexStats.ts +11 -0
- package/componets/index/handleFileEvent.ts +25 -0
- package/componets/index/indexApi.ts +45 -0
- package/componets/index/vectraIndex.ts +11 -0
- package/componets/index/watcherLock.ts +107 -0
- package/componets/keywords/cleanUpKeywords.ts +38 -0
- package/componets/keywords/extractKeywords.ts +14 -0
- package/componets/keywords/refineKeywords.ts +16 -0
- package/componets/llm/embed.ts +18 -0
- package/componets/llm/queryLLM.ts +20 -0
- package/componets/logger.ts +34 -0
- package/componets/walkFiles.ts +51 -0
- package/componets/watchFiles.ts +106 -0
- package/features/indexContent.ts +16 -0
- package/features/removeContent.ts +9 -0
- package/features/resetIndex.ts +9 -0
- package/features/searchIndex.ts +33 -0
- package/package.json +32 -0
- package/packages/fun/src/IType.ts +5 -0
- package/packages/fun/src/array-finder.ts +55 -0
- package/packages/fun/src/array-index.ts +35 -0
- package/packages/fun/src/array.ts +112 -0
- package/packages/fun/src/assert.ts +5 -0
- package/packages/fun/src/asyncRequest.ts +35 -0
- package/packages/fun/src/callsites.ts +18 -0
- package/packages/fun/src/case-never.ts +9 -0
- package/packages/fun/src/casting.ts +41 -0
- package/packages/fun/src/collect.ts +13 -0
- package/packages/fun/src/concurrency.ts +186 -0
- package/packages/fun/src/container.ts +86 -0
- package/packages/fun/src/counter.ts +45 -0
- package/packages/fun/src/create-map.ts +2 -0
- package/packages/fun/src/dedupe.ts +2 -0
- package/packages/fun/src/defer.ts +55 -0
- package/packages/fun/src/delay.ts +5 -0
- package/packages/fun/src/discriminate.ts +34 -0
- package/packages/fun/src/enum-values.ts +12 -0
- package/packages/fun/src/exponential-backoff.ts +20 -0
- package/packages/fun/src/flatten.ts +11 -0
- package/packages/fun/src/hash.ts +67 -0
- package/packages/fun/src/hash128.ts +6 -0
- package/packages/fun/src/hash256.ts +6 -0
- package/packages/fun/src/hub.ts +53 -0
- package/packages/fun/src/id.ts +10 -0
- package/packages/fun/src/interval.ts +76 -0
- package/packages/fun/src/is-non-nullable.ts +2 -0
- package/packages/fun/src/isIterable.ts +3 -0
- package/packages/fun/src/mailbox.ts +13 -0
- package/packages/fun/src/map-record.ts +19 -0
- package/packages/fun/src/match-collections.ts +57 -0
- package/packages/fun/src/match-left-and-right-arrays.ts +78 -0
- package/packages/fun/src/mem.ts +26 -0
- package/packages/fun/src/memos.ts +28 -0
- package/packages/fun/src/normalizeError.ts +25 -0
- package/packages/fun/src/nothing.ts +3 -0
- package/packages/fun/src/pipe.ts +18 -0
- package/packages/fun/src/prettyJson.ts +3 -0
- package/packages/fun/src/project.ts +8 -0
- package/packages/fun/src/promise.ts +27 -0
- package/packages/fun/src/pubsub.ts +128 -0
- package/packages/fun/src/randomId.ts +14 -0
- package/packages/fun/src/regexp-escape.ts +13 -0
- package/packages/fun/src/retry.ts +15 -0
- package/packages/fun/src/serial.test.ts +107 -0
- package/packages/fun/src/serial.ts +17 -0
- package/packages/fun/src/sleep.ts +3 -0
- package/packages/fun/src/sort-object.ts +46 -0
- package/packages/fun/src/speed-test.ts +56 -0
- package/packages/fun/src/tick.ts +37 -0
- package/packages/fun/src/time-behavior.ts +50 -0
- package/packages/fun/src/time.ts +22 -0
- package/packages/fun/src/timedFallback.ts +37 -0
- package/packages/fun/src/timer.ts +30 -0
- package/packages/fun/src/value.ts +33 -0
- package/packages/fun/src/waitForCounter.ts +15 -0
- package/packages/streamx/src/batch.ts +23 -0
- package/packages/streamx/src/batchTimed.ts +113 -0
- package/packages/streamx/src/buffer.ts +72 -0
- package/packages/streamx/src/concatenate.ts +33 -0
- package/packages/streamx/src/filter.ts +14 -0
- package/packages/streamx/src/flat.ts +19 -0
- package/packages/streamx/src/flatMap.ts +9 -0
- package/packages/streamx/src/from.ts +30 -0
- package/packages/streamx/src/index.ts +49 -0
- package/packages/streamx/src/interval.ts +58 -0
- package/packages/streamx/src/loop.ts +8 -0
- package/packages/streamx/src/map.ts +12 -0
- package/packages/streamx/src/merge.ts +89 -0
- package/packages/streamx/src/nodeReadable.ts +6 -0
- package/packages/streamx/src/nodeTransform.ts +9 -0
- package/packages/streamx/src/nodeWritable.ts +38 -0
- package/packages/streamx/src/objectReader.ts +16 -0
- package/packages/streamx/src/polyfill.ts +20 -0
- package/packages/streamx/src/reader.ts +38 -0
- package/packages/streamx/src/reduce.ts +15 -0
- package/packages/streamx/src/scale.ts +93 -0
- package/packages/streamx/src/scaleSync.ts +13 -0
- package/packages/streamx/src/sequence.ts +7 -0
- package/packages/streamx/src/tap.ts +9 -0
- package/packages/streamx/src/toArray.ts +9 -0
- package/packages/streamx/src/writer.ts +96 -0
- package/rnd/hf.ts +14 -0
- package/rnd/keywords-compromise.ts +18 -0
- package/rnd/keywords-pipeline.ts +79 -0
- package/rnd/keywords.ts +38 -0
- package/rnd/test-vectra-memory.ts +63 -0
- package/rnd/vectra-keywords.ts +95 -0
- package/rnd/vectra.ts +50 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {BuildComponents} from "../componets/buildComponents.js";
|
|
2
|
+
import {BufferedLoggerToStdOut} from "../componets/logger.js";
|
|
3
|
+
import {SearchApp} from "./searchApp.js";
|
|
4
|
+
import {AppId} from "../componets/appId.js";
|
|
5
|
+
import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
|
|
6
|
+
|
|
7
|
+
const appId = AppId();
|
|
8
|
+
const log = BufferedLoggerToStdOut();
|
|
9
|
+
const {searchContentIndex, config} = await BuildComponents({log});
|
|
10
|
+
const search = SearchApp({searchContentIndex});
|
|
11
|
+
|
|
12
|
+
const query = process.argv.slice(2).join(" ");
|
|
13
|
+
if (!query) {
|
|
14
|
+
log("Usage: xindex.search <query>");
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
log(`[${appId}] searching: "${query}"`);
|
|
19
|
+
const results = await search(query);
|
|
20
|
+
const format = FormatSearchResults();
|
|
21
|
+
log(await format(query, results));
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import {BuildComponents} from "../componets/buildComponents.js";
|
|
2
|
+
import {HandleFileEvent} from "../componets/index/handleFileEvent.js";
|
|
3
|
+
import {BufferedLoggerToStdOut} from "../componets/logger.js";
|
|
4
|
+
import {WalkFiles} from "../componets/walkFiles.js";
|
|
5
|
+
import {WatchFiles} from "../componets/watchFiles.js";
|
|
6
|
+
import {WatcherLock} from "../componets/index/watcherLock.js";
|
|
7
|
+
import {WatchApp} from "./watchApp.js";
|
|
8
|
+
import {AppId} from "../componets/appId.js";
|
|
9
|
+
import {join} from "path";
|
|
10
|
+
|
|
11
|
+
const cwd = process.cwd();
|
|
12
|
+
const log = BufferedLoggerToStdOut();
|
|
13
|
+
const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
|
|
14
|
+
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
15
|
+
const watchFiles = WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
16
|
+
const handleFileEvent = HandleFileEvent({indexContent, removeContent, log});
|
|
17
|
+
|
|
18
|
+
const appId = AppId();
|
|
19
|
+
const watcherLock = WatcherLock({
|
|
20
|
+
lockPath: join(cwd, ".xindex", "lock.json"),
|
|
21
|
+
appId,
|
|
22
|
+
log,
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
const app = WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLock});
|
|
26
|
+
|
|
27
|
+
const inputs = process.argv.slice(2);
|
|
28
|
+
if (!inputs.length) inputs.push(".");
|
|
29
|
+
|
|
30
|
+
log(`[${appId}] started, indexing and watching: ${inputs.join(", ")}`);
|
|
31
|
+
|
|
32
|
+
process.on("SIGINT", async () => {
|
|
33
|
+
log(`\n[${appId}] shutting down — stopping watcher...`);
|
|
34
|
+
app.stop();
|
|
35
|
+
log(`[${appId}] releasing lock...`);
|
|
36
|
+
await watcherLock.release();
|
|
37
|
+
log(`[${appId}] waiting 7s for another watcher to take over...`);
|
|
38
|
+
await new Promise(r => setTimeout(r, 7000));
|
|
39
|
+
log(`[${appId}] exiting`);
|
|
40
|
+
process.exit(0);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
await app.run(inputs);
|
|
44
|
+
log("done. indexed:", await getIndexStats());
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import {ISearchIndex, IIndexRecord} from "../features/searchIndex.js";
|
|
2
|
+
|
|
3
|
+
export type ISearchApp = (query: string, limit?: number) => Promise<IIndexRecord[]>;
|
|
4
|
+
|
|
5
|
+
export function SearchApp({searchContentIndex}: {searchContentIndex: ISearchIndex}): ISearchApp {
|
|
6
|
+
return async function search(query, limit = 10) {
|
|
7
|
+
return await searchContentIndex(query, limit);
|
|
8
|
+
}
|
|
9
|
+
}
|
package/apps/watchApp.ts
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import {from} from "../packages/streamx/src/from.js";
|
|
2
|
+
import {map} from "../packages/streamx/src/map.js";
|
|
3
|
+
import {tap} from "../packages/streamx/src/tap.js";
|
|
4
|
+
import {run} from "../packages/streamx/src/index.js";
|
|
5
|
+
import {IWalkFiles} from "../componets/walkFiles.js";
|
|
6
|
+
import {FileEventType, IWatchFiles} from "../componets/watchFiles.js";
|
|
7
|
+
import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
|
|
8
|
+
import {ILogger} from "../componets/logger.js";
|
|
9
|
+
import {IWatcherLock} from "../componets/index/watcherLock.js";
|
|
10
|
+
import {WatchFileEventsApp} from "./watchFileEventsApp.js";
|
|
11
|
+
|
|
12
|
+
export type IWatchApp = {
|
|
13
|
+
run: (inputs: string[]) => Promise<void>;
|
|
14
|
+
stop: () => void;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLock}: {
|
|
18
|
+
walkFiles: IWalkFiles,
|
|
19
|
+
watchFiles: IWatchFiles,
|
|
20
|
+
handleFileEvent: IHandleFileEvent,
|
|
21
|
+
log: ILogger,
|
|
22
|
+
watcherLock: IWatcherLock,
|
|
23
|
+
}): IWatchApp {
|
|
24
|
+
return {
|
|
25
|
+
async run(inputs) {
|
|
26
|
+
// Phase 1: initial index — walk all files
|
|
27
|
+
await run(
|
|
28
|
+
from(walkFiles(inputs))
|
|
29
|
+
.pipe(tap(id => log(`indexing: ${id}`)))
|
|
30
|
+
.pipe(map<string, string>(async (id) => {
|
|
31
|
+
await handleFileEvent({type: FileEventType.index, path: id});
|
|
32
|
+
return id;
|
|
33
|
+
}))
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
log("initial index complete, acquiring watcher lock...");
|
|
37
|
+
|
|
38
|
+
// Phase 2: watch for changes — delegate to WatchFileEventsApp
|
|
39
|
+
const startWatch = WatchFileEventsApp({
|
|
40
|
+
watchFiles,
|
|
41
|
+
watchDir: inputs[0] ?? ".",
|
|
42
|
+
handleFileEvent,
|
|
43
|
+
log,
|
|
44
|
+
watcherLock,
|
|
45
|
+
});
|
|
46
|
+
startWatch();
|
|
47
|
+
},
|
|
48
|
+
stop() {
|
|
49
|
+
watcherLock.stopHeartbeat();
|
|
50
|
+
watcherLock.stopWaiting();
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import {IWatchFiles} from "../componets/watchFiles.js";
|
|
2
|
+
import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
|
|
3
|
+
import {ILogger} from "../componets/logger.js";
|
|
4
|
+
import {IWatcherLock} from "../componets/index/watcherLock.js";
|
|
5
|
+
|
|
6
|
+
export type IWatchFileEventsApp = () => void;
|
|
7
|
+
|
|
8
|
+
export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvent, log, watcherLock}: {
|
|
9
|
+
watchFiles: IWatchFiles,
|
|
10
|
+
watchDir: string,
|
|
11
|
+
handleFileEvent: IHandleFileEvent,
|
|
12
|
+
log: ILogger,
|
|
13
|
+
watcherLock: IWatcherLock,
|
|
14
|
+
}): IWatchFileEventsApp {
|
|
15
|
+
return function startWatchProcess() {
|
|
16
|
+
(async () => {
|
|
17
|
+
await new Promise(r => setTimeout(r, 500 + Math.random() * 500));
|
|
18
|
+
while (!watcherLock.stopped) {
|
|
19
|
+
if (await watcherLock.tryAcquire()) {
|
|
20
|
+
log(`watching for changes: ${watchDir}`);
|
|
21
|
+
const watcher = watchFiles([watchDir]);
|
|
22
|
+
const events = (async () => {
|
|
23
|
+
try {
|
|
24
|
+
for await (const e of watcher.events) await handleFileEvent(e);
|
|
25
|
+
} catch (e) {
|
|
26
|
+
log(`watch error: ${(e as any)?.message ?? e}`);
|
|
27
|
+
}
|
|
28
|
+
})();
|
|
29
|
+
await watcherLock.heartbeatUntilLost();
|
|
30
|
+
watcher.stop();
|
|
31
|
+
await events;
|
|
32
|
+
} else {
|
|
33
|
+
log("another watcher is active, standing by...");
|
|
34
|
+
await watcherLock.waitForRelease();
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
})();
|
|
38
|
+
};
|
|
39
|
+
}
|
package/bin/xindex-index
ADDED
package/bin/xindex-mcp
ADDED
package/bin/xindex-reset
ADDED
package/bin/xindex-watch
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export type IType<T extends { type: string }> = Readonly<T>
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import {Embed} from "./llm/embed.js";
|
|
2
|
+
import {ExtractKeywords} from "./keywords/extractKeywords.js";
|
|
3
|
+
import {CleanUpKeywords} from "./keywords/cleanUpKeywords.js";
|
|
4
|
+
import {ContentIndexDriver} from "./index/contentIndexDriver.js";
|
|
5
|
+
import {LoadConfig} from "./config/loadConfig.js";
|
|
6
|
+
import {ILogger} from "./logger.js";
|
|
7
|
+
|
|
8
|
+
export async function BuildComponents({log}: {log: ILogger}) {
|
|
9
|
+
const loadConfig = LoadConfig({configPath: ".xindex.json", log});
|
|
10
|
+
const config = await loadConfig();
|
|
11
|
+
|
|
12
|
+
const embed = Embed({pooling: "mean", normalize: true});
|
|
13
|
+
const extractKeywords = ExtractKeywords();
|
|
14
|
+
const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength: 2, ignoreKeywords: config.ignoreKeywords});
|
|
15
|
+
|
|
16
|
+
const DEFAULT_INDEX_PATH = ".xindex";
|
|
17
|
+
|
|
18
|
+
const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex}
|
|
19
|
+
= await ContentIndexDriver({
|
|
20
|
+
path: DEFAULT_INDEX_PATH,
|
|
21
|
+
embed,
|
|
22
|
+
extractKeywords,
|
|
23
|
+
cleanUpKeywords,
|
|
24
|
+
});
|
|
25
|
+
return {extractKeywords, cleanUpKeywords, indexContent, removeContent, getIndexStats,
|
|
26
|
+
searchContentIndex, resetIndex, config};
|
|
27
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import {readFile} from "fs/promises";
|
|
2
|
+
import {IXindexConfig} from "./xindexConfig.js";
|
|
3
|
+
import {ILogger} from "../logger.js";
|
|
4
|
+
|
|
5
|
+
const DEFAULTS: IXindexConfig = {
|
|
6
|
+
ignoreKeywords: [],
|
|
7
|
+
ignoreFiles: [],
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export type ILoadConfig = () => Promise<IXindexConfig>;
|
|
11
|
+
|
|
12
|
+
export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}): ILoadConfig {
|
|
13
|
+
return async function loadConfig() {
|
|
14
|
+
let raw: string;
|
|
15
|
+
try {
|
|
16
|
+
raw = await readFile(configPath, "utf8");
|
|
17
|
+
} catch (e: any) {
|
|
18
|
+
if (e?.code === "ENOENT") return {...DEFAULTS};
|
|
19
|
+
throw new Error(`Cannot read ${configPath}: ${e?.message ?? e}`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (!raw.trim()) return {...DEFAULTS};
|
|
23
|
+
|
|
24
|
+
let parsed: Record<string, unknown>;
|
|
25
|
+
try {
|
|
26
|
+
parsed = JSON.parse(raw);
|
|
27
|
+
} catch (e) {
|
|
28
|
+
throw new Error(`Failed to parse ${configPath}: ${e instanceof Error ? e.message : e}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const toStrings = (v: unknown) => Array.isArray(v) ? v.filter((e): e is string => typeof e === "string") : [];
|
|
32
|
+
const config: IXindexConfig = {
|
|
33
|
+
ignoreKeywords: toStrings(parsed.ignoreKeywords),
|
|
34
|
+
ignoreFiles: toStrings(parsed.ignoreFiles),
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
for (const kw of config.ignoreKeywords) {
|
|
38
|
+
if (kw.length <= 1) log(`warning: ignoreKeywords entry "${kw}" is <=1 char`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return config;
|
|
42
|
+
};
|
|
43
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import {IEmbed} from "../llm/embed.js";
|
|
2
|
+
import {IExtractKeywords} from "../keywords/extractKeywords.js";
|
|
3
|
+
import {ICleanUpKeywords} from "../keywords/cleanUpKeywords.js";
|
|
4
|
+
import {GetIndexStats, IGetIndexStats} from "./getIndexStats.js";
|
|
5
|
+
import {IndexContent, IIndexContent} from "../../features/indexContent.js";
|
|
6
|
+
import {SearchIndex, ISearchIndex} from "../../features/searchIndex.js";
|
|
7
|
+
import {RemoveContent, IRemoveContent} from "../../features/removeContent.js";
|
|
8
|
+
import {ResetIndex, IResetIndex} from "../../features/resetIndex.js";
|
|
9
|
+
import {VectraIndex} from "./vectraIndex.js";
|
|
10
|
+
import {IndexApi} from "./indexApi.js";
|
|
11
|
+
|
|
12
|
+
export type IContentIndexDriver = Readonly<{
|
|
13
|
+
getIndexStats: IGetIndexStats,
|
|
14
|
+
indexContent: IIndexContent,
|
|
15
|
+
removeContent: IRemoveContent,
|
|
16
|
+
searchContentIndex: ISearchIndex,
|
|
17
|
+
resetIndex: IResetIndex,
|
|
18
|
+
flush: () => Promise<void>,
|
|
19
|
+
}>;
|
|
20
|
+
|
|
21
|
+
export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, scoreThreshold}: {
|
|
22
|
+
path: string,
|
|
23
|
+
embed: IEmbed,
|
|
24
|
+
extractKeywords: IExtractKeywords,
|
|
25
|
+
cleanUpKeywords: ICleanUpKeywords,
|
|
26
|
+
scoreThreshold?: number,
|
|
27
|
+
}): Promise<IContentIndexDriver> {
|
|
28
|
+
const index = await VectraIndex(path + "/semantic");
|
|
29
|
+
const indexApi = IndexApi({index, embed});
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
getIndexStats: GetIndexStats({index}),
|
|
33
|
+
indexContent: IndexContent({extractKeywords, cleanUpKeywords, indexApi}),
|
|
34
|
+
removeContent: RemoveContent({indexApi}),
|
|
35
|
+
searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, scoreThreshold}),
|
|
36
|
+
resetIndex: ResetIndex({indexApi}),
|
|
37
|
+
flush: () => indexApi.flush(),
|
|
38
|
+
};
|
|
39
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import {IIndexRecord} from "../../features/searchIndex.js";
|
|
2
|
+
|
|
3
|
+
export type IFormatSearchResults = (query: string, results: IIndexRecord[]) => Promise<string>;
|
|
4
|
+
|
|
5
|
+
export function FormatSearchResults(): IFormatSearchResults {
|
|
6
|
+
return async function formatSearchResults(query, results) {
|
|
7
|
+
if (results.length === 0) return `No results for: "${query}"`;
|
|
8
|
+
|
|
9
|
+
const lines: string[] = [];
|
|
10
|
+
for (let i = 0; i < results.length; i++) {
|
|
11
|
+
const r = results[i];
|
|
12
|
+
const kw = r.keywords ? ` — ${r.keywords}` : "";
|
|
13
|
+
lines.push("", `${i + 1}. ${r.id}${kw}`);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return `Search: "${query}" — ${results.length} result(s)\n\n${lines.join("\n")}`;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import {LocalIndex} from "vectra";
|
|
2
|
+
|
|
3
|
+
export type IIndexStats = { indexedAmount: number };
|
|
4
|
+
export type IGetIndexStats = () => Promise<IIndexStats>;
|
|
5
|
+
|
|
6
|
+
export function GetIndexStats({index}: { index: LocalIndex }): IGetIndexStats {
|
|
7
|
+
return async function getIndexStats() {
|
|
8
|
+
const {items} = await index.getIndexStats();
|
|
9
|
+
return {indexedAmount: items} satisfies IIndexStats;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import {readFile} from "fs/promises";
|
|
2
|
+
import {IIndexContent} from "../../features/indexContent.js";
|
|
3
|
+
import {IRemoveContent} from "../../features/removeContent.js";
|
|
4
|
+
import {ILogger} from "../logger.js";
|
|
5
|
+
import {FileEventType, IFileEvent} from "../watchFiles.js";
|
|
6
|
+
|
|
7
|
+
export type IHandleFileEvent = (event: IFileEvent) => Promise<void>;
|
|
8
|
+
|
|
9
|
+
export function HandleFileEvent({indexContent, removeContent, log}: {
|
|
10
|
+
indexContent: IIndexContent,
|
|
11
|
+
removeContent: IRemoveContent,
|
|
12
|
+
log: ILogger,
|
|
13
|
+
}): IHandleFileEvent {
|
|
14
|
+
return async function handleFileEvent(event) {
|
|
15
|
+
if (event.type === FileEventType.index) {
|
|
16
|
+
try { await removeContent(event.path); } catch (e) { log(`remove failed: ${event.path} — ${(e as any)?.message ?? e}`); }
|
|
17
|
+
const text = await readFile(event.path, "utf8");
|
|
18
|
+
await indexContent(event.path, `${text}. ${event.path}`);
|
|
19
|
+
log(`index: ${event.path}`);
|
|
20
|
+
} else {
|
|
21
|
+
try { await removeContent(event.path); } catch (e) { log(`remove failed: ${event.path} — ${(e as any)?.message ?? e}`); }
|
|
22
|
+
log(`remove: ${event.path}`);
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import {LocalIndex} from "vectra";
|
|
2
|
+
import {IType} from "../IType.js";
|
|
3
|
+
import {ISerial, Serial} from "../../packages/fun/src/serial.js";
|
|
4
|
+
import {caseNever} from "../../packages/fun/src/case-never.js";
|
|
5
|
+
import {IEmbed} from "../llm/embed.js";
|
|
6
|
+
|
|
7
|
+
export enum IndexCommandType {
|
|
8
|
+
index = 'index',
|
|
9
|
+
delete = 'delete',
|
|
10
|
+
reset = 'reset',
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export type IIndexCommand =
|
|
14
|
+
| IType<{ type: IndexCommandType.index, id: string, content: string, keywords: string }>
|
|
15
|
+
| IType<{ type: IndexCommandType.delete, id: string }>
|
|
16
|
+
| IType<{ type: IndexCommandType.reset }>;
|
|
17
|
+
|
|
18
|
+
export type IIndexApi = ISerial<IIndexCommand, void>;
|
|
19
|
+
|
|
20
|
+
export function IndexApi({index, embed}: { index: LocalIndex, embed: IEmbed }): IIndexApi {
|
|
21
|
+
return Serial<IIndexCommand, void>(async msg => {
|
|
22
|
+
switch (msg.type) {
|
|
23
|
+
case IndexCommandType.delete: {
|
|
24
|
+
await index.deleteItem(msg.id);
|
|
25
|
+
break;
|
|
26
|
+
}
|
|
27
|
+
case IndexCommandType.index: {
|
|
28
|
+
const vector = await embed(msg.content);
|
|
29
|
+
await index.upsertItem({
|
|
30
|
+
id: msg.id,
|
|
31
|
+
vector,
|
|
32
|
+
metadata: {id: msg.id, keywords: msg.keywords},
|
|
33
|
+
});
|
|
34
|
+
break;
|
|
35
|
+
}
|
|
36
|
+
case IndexCommandType.reset: {
|
|
37
|
+
await index.deleteIndex();
|
|
38
|
+
await index.createIndex();
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
default:
|
|
42
|
+
return caseNever(msg);
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import {access, readFile, rm, writeFile} from "fs/promises";
|
|
2
|
+
import {ILogger} from "../logger.js";
|
|
3
|
+
|
|
4
|
+
export type LockData = { appId: string; lockedAt: number };
|
|
5
|
+
|
|
6
|
+
export type IWatcherLock = {
|
|
7
|
+
tryAcquire: () => Promise<boolean>;
|
|
8
|
+
release: () => Promise<void>;
|
|
9
|
+
heartbeatUntilLost: () => Promise<void>;
|
|
10
|
+
stopHeartbeat: () => void;
|
|
11
|
+
waitForRelease: () => Promise<void>;
|
|
12
|
+
stopWaiting: () => void;
|
|
13
|
+
readonly stopped: boolean;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export function WatcherLock({lockPath, appId, log, heartbeatMs = 5000, staleMs = 20000}: {
|
|
17
|
+
lockPath: string,
|
|
18
|
+
appId: string,
|
|
19
|
+
log: ILogger,
|
|
20
|
+
heartbeatMs?: number,
|
|
21
|
+
staleMs?: number,
|
|
22
|
+
}): IWatcherLock {
|
|
23
|
+
let heartbeat: { stop: () => void } | null = null;
|
|
24
|
+
let wait: { stop: () => void } | null = null;
|
|
25
|
+
let stopped = false;
|
|
26
|
+
|
|
27
|
+
async function readLock(): Promise<LockData | null> {
|
|
28
|
+
try {
|
|
29
|
+
await access(lockPath);
|
|
30
|
+
return JSON.parse(await readFile(lockPath, "utf8")) as LockData;
|
|
31
|
+
} catch {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function writeLock(): Promise<void> {
|
|
37
|
+
await writeFile(lockPath, JSON.stringify({appId, lockedAt: Date.now()} satisfies LockData));
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function isLockMine(current: LockData | null): boolean {
|
|
41
|
+
return current?.appId === appId;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
async tryAcquire() {
|
|
46
|
+
if (stopped) return false;
|
|
47
|
+
const lock = await readLock();
|
|
48
|
+
if (lock && (Date.now() - lock.lockedAt) < staleMs) return false;
|
|
49
|
+
try {
|
|
50
|
+
if (!lock) {
|
|
51
|
+
await writeFile(lockPath, JSON.stringify({appId, lockedAt: Date.now()}), {flag: "wx"});
|
|
52
|
+
} else {
|
|
53
|
+
await writeLock();
|
|
54
|
+
}
|
|
55
|
+
return true;
|
|
56
|
+
} catch {
|
|
57
|
+
const current = await readLock();
|
|
58
|
+
const isMine = isLockMine(current);
|
|
59
|
+
if (isMine) await writeLock();
|
|
60
|
+
return isMine;
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
|
|
64
|
+
async release() {
|
|
65
|
+
const lock = await readLock();
|
|
66
|
+
if (isLockMine(lock)) await rm(lockPath, {force: true});
|
|
67
|
+
},
|
|
68
|
+
|
|
69
|
+
heartbeatUntilLost() {
|
|
70
|
+
if (stopped) return Promise.resolve();
|
|
71
|
+
return new Promise<void>(resolve => {
|
|
72
|
+
const timer = setInterval(async () => {
|
|
73
|
+
if (stopped) return;
|
|
74
|
+
try {
|
|
75
|
+
const current = await readLock();
|
|
76
|
+
if (stopped) return;
|
|
77
|
+
if (current && !isLockMine(current)) {
|
|
78
|
+
log("watcher lock lost to another process");
|
|
79
|
+
heartbeat?.stop();
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
await writeLock();
|
|
83
|
+
} catch {}
|
|
84
|
+
}, heartbeatMs);
|
|
85
|
+
heartbeat = {stop: () => { clearInterval(timer); heartbeat = null; resolve(); }};
|
|
86
|
+
});
|
|
87
|
+
},
|
|
88
|
+
|
|
89
|
+
stopHeartbeat() { stopped = true; heartbeat?.stop(); wait?.stop(); },
|
|
90
|
+
|
|
91
|
+
waitForRelease() {
|
|
92
|
+
if (stopped) return Promise.resolve();
|
|
93
|
+
return new Promise<void>(resolve => {
|
|
94
|
+
const timer = setInterval(async () => {
|
|
95
|
+
if (stopped) { wait?.stop(); return; }
|
|
96
|
+
const lock = await readLock();
|
|
97
|
+
if (!lock || (Date.now() - lock.lockedAt) >= staleMs) wait?.stop();
|
|
98
|
+
}, heartbeatMs);
|
|
99
|
+
wait = {stop: () => { clearInterval(timer); wait = null; resolve(); }};
|
|
100
|
+
});
|
|
101
|
+
},
|
|
102
|
+
|
|
103
|
+
stopWaiting() { stopped = true; wait?.stop(); heartbeat?.stop(); },
|
|
104
|
+
|
|
105
|
+
get stopped() { return stopped; },
|
|
106
|
+
};
|
|
107
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import {createRequire} from "module";
|
|
2
|
+
|
|
3
|
+
const require = createRequire(import.meta.url);
|
|
4
|
+
const keyword_extractor = require("keyword-extractor");
|
|
5
|
+
|
|
6
|
+
export type ICleanUpKeywords = (keywords: string[]) => string[];
|
|
7
|
+
|
|
8
|
+
export function CleanUpKeywords({maxNgrams, minLength, ignoreKeywords = []}: {
|
|
9
|
+
maxNgrams: number, minLength: number, ignoreKeywords?: string[]
|
|
10
|
+
}): ICleanUpKeywords {
|
|
11
|
+
|
|
12
|
+
const normalize = (kw: string) => kw.toLowerCase().trim();
|
|
13
|
+
const ignoreSet = new Set(ignoreKeywords.map(normalize));
|
|
14
|
+
|
|
15
|
+
return function cleanUpKeywords(keywords) {
|
|
16
|
+
const cleaned = keywords.join(" ").replace(/\W+/gm, " ").trim();
|
|
17
|
+
|
|
18
|
+
const extracted: string[] = keyword_extractor.extract(cleaned, {
|
|
19
|
+
language: "english",
|
|
20
|
+
remove_digits: false,
|
|
21
|
+
return_changed_case: true,
|
|
22
|
+
remove_duplicates: true,
|
|
23
|
+
return_max_ngrams: maxNgrams,
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
const seen = new Set<string>();
|
|
27
|
+
|
|
28
|
+
return extracted
|
|
29
|
+
.map(normalize)
|
|
30
|
+
.filter((kw: string) => {
|
|
31
|
+
if (kw.length <= minLength || !/[a-z]/i.test(kw)) return false;
|
|
32
|
+
if (ignoreSet.has(kw)) return false;
|
|
33
|
+
if (seen.has(kw)) return false;
|
|
34
|
+
seen.add(kw);
|
|
35
|
+
return true;
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import nlp from "compromise";
|
|
2
|
+
|
|
3
|
+
export type IExtractKeywords = (text: string) => string[];
|
|
4
|
+
|
|
5
|
+
export function ExtractKeywords(): IExtractKeywords {
|
|
6
|
+
return function extractKeywords(text) {
|
|
7
|
+
const doc = nlp(text);
|
|
8
|
+
return [
|
|
9
|
+
...doc.topics().out("array"),
|
|
10
|
+
...doc.nouns().out("array"),
|
|
11
|
+
...doc.verbs().out("array"),
|
|
12
|
+
] as string[];
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import {IQueryLLM} from "../llm/queryLLM.js";
|
|
2
|
+
import {ICleanUpKeywords} from "./cleanUpKeywords.js";
|
|
3
|
+
|
|
4
|
+
export type IRefineKeywords = (keywords: string[]) => Promise<string[]>;
|
|
5
|
+
|
|
6
|
+
export function RefineKeywords({queryLLM, cleanUpKeywords, prompt}: {
|
|
7
|
+
queryLLM: IQueryLLM,
|
|
8
|
+
cleanUpKeywords: ICleanUpKeywords,
|
|
9
|
+
prompt: string,
|
|
10
|
+
}): IRefineKeywords {
|
|
11
|
+
return async function refineKeywords(keywords) {
|
|
12
|
+
const input = keywords.join(", ");
|
|
13
|
+
const result = await queryLLM(prompt, input);
|
|
14
|
+
return cleanUpKeywords(result.split(",").map(s => s.trim()));
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import {pipeline} from "@huggingface/transformers";
|
|
2
|
+
|
|
3
|
+
const embedder = await pipeline(
|
|
4
|
+
"feature-extraction",
|
|
5
|
+
"sentence-transformers/all-MiniLM-L6-v2"
|
|
6
|
+
);
|
|
7
|
+
|
|
8
|
+
export type IEmbed = (text: string) => Promise<number[]>;
|
|
9
|
+
|
|
10
|
+
type Pooling = "none" | "mean" | "cls" | "first_token" | "eos" | "last_token";
|
|
11
|
+
|
|
12
|
+
export function Embed({pooling, normalize}: {pooling: Pooling, normalize: boolean}): IEmbed {
|
|
13
|
+
return async function embed(text) {
|
|
14
|
+
const result = await embedder(text, {pooling, normalize});
|
|
15
|
+
return Array.from(result.data as Float32Array);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import {pipeline} from "@huggingface/transformers";
|
|
2
|
+
|
|
3
|
+
const generator = await pipeline(
|
|
4
|
+
"text-generation",
|
|
5
|
+
"HuggingFaceTB/SmolLM2-135M-Instruct"
|
|
6
|
+
);
|
|
7
|
+
|
|
8
|
+
export type IQueryLLM = (prompt: string, input: string, maxTokens?: number) => Promise<string>;
|
|
9
|
+
|
|
10
|
+
export function QueryLLM({maxTokens}: { maxTokens: number }): IQueryLLM {
|
|
11
|
+
return async function queryLLM(prompt, input, overrideMaxTokens?) {
|
|
12
|
+
const messages = [
|
|
13
|
+
{role: "system", content: prompt},
|
|
14
|
+
{role: "user", content: input},
|
|
15
|
+
];
|
|
16
|
+
const output = await generator(messages, {max_new_tokens: overrideMaxTokens ?? maxTokens});
|
|
17
|
+
return (output[0].generated_text.at(-1)?.content as string) ?? "";
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|