xindex 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.xindex.json +2 -1
  2. package/CLAUDE.md +1 -0
  3. package/README.md +6 -0
  4. package/apps/indexApp.ts +9 -8
  5. package/apps/mcpApp.ts +6 -6
  6. package/apps/run.index.ts +2 -2
  7. package/apps/run.mcp.ts +6 -4
  8. package/apps/run.search.ts +1 -1
  9. package/apps/run.watch.ts +3 -3
  10. package/apps/searchApp.ts +4 -2
  11. package/apps/watchApp.ts +16 -8
  12. package/apps/watchFileEventsApp.ts +14 -4
  13. package/componets/buildComponents.ts +25 -9
  14. package/componets/config/DEFAULT_LOCATE_BATCH_SIZE.ts +1 -0
  15. package/componets/config/INDEXING_BATCH_SIZE.ts +1 -0
  16. package/componets/config/WATCH_FLUSH_MS.ts +1 -0
  17. package/componets/config/loadConfig.ts +10 -1
  18. package/componets/config/xindexConfig.ts +2 -0
  19. package/componets/ignore/loadIgnoreChain.ts +40 -0
  20. package/componets/index/contentIndexDriver.ts +7 -5
  21. package/componets/index/documentContentIndexDriver.ts +126 -0
  22. package/componets/index/documentIndex.ts +26 -0
  23. package/componets/index/formatSearchResults.ts +16 -2
  24. package/componets/index/handleFileEvent.ts +48 -3
  25. package/componets/index/indexApi.ts +39 -11
  26. package/componets/locate/bm25.ts +50 -0
  27. package/componets/locate/inMemoryIndex.ts +48 -0
  28. package/componets/locate/locateInFile.ts +148 -0
  29. package/componets/locate/windowsOf.ts +29 -0
  30. package/componets/watchFiles.ts +5 -16
  31. package/features/indexContent.ts +12 -5
  32. package/features/removeContent.ts +3 -3
  33. package/features/searchIndex.ts +22 -5
  34. package/package.json +15 -2
  35. package/packages/streamx/src/batchTimed.ts +1 -1
  36. package/packages/streamx/src/buffer.ts +1 -1
  37. package/packages/streamx/src/defer.ts +55 -0
  38. package/packages/streamx/src/interval.ts +1 -1
  39. package/packages/streamx/src/merge.ts +1 -1
  40. package/packages/streamx/src/nodeWritable.ts +1 -1
  41. package/packages/streamx/src/scale.ts +2 -2
  42. package/packages/streamx/src/writer.ts +1 -1
  43. package/.ai/research/.gitkeep +0 -0
  44. package/.ai/task/.gitkeep +0 -0
  45. package/.claude/settings.local.json +0 -73
  46. package/.claude/skills/make-hof/SKILL.md +0 -8
  47. package/.claude/skills/make-hof/playbook.md +0 -38
  48. package/.cursor/mcp.json +0 -8
  49. package/media/MEDIUM.md +0 -139
  50. package/media/SOCIAL.md +0 -102
package/.xindex.json CHANGED
@@ -18,5 +18,6 @@
18
18
  "utf8", "length", "map", "slice", "push", "join", "resolve", "stringify",
19
19
  "json", "settimeout", "path", "readfile"
20
20
  ],
21
- "ignoreFiles": [".xindex"]
21
+ "ignoreFiles": [".xindex", "media"],
22
+ "maxLines": 12
22
23
  }
package/CLAUDE.md CHANGED
@@ -9,6 +9,7 @@
9
9
  - **Text diagrams** — ASCII flows, hierarchies, tables. Keep minimal.
10
10
  - **research/search/ground** — search the Internet using DuckDuckGo MCP
11
11
  - **plan dev / go / dev / implement** — start implementation → triggers Pre-implementation check
12
+ - **check types / test compilation** — run `yarn test.compilation` to validate TypeScript compilation without relying on `tsconfig.json`
12
13
  - **recover** — find most recent `task.*.md` in `.ai/task/` (by date+mtime, exclude `*.log.md`/`*.report.md`), summarize state and next steps
13
14
  - **pull details / expand / flesh out** — enrich task with full detail while preserving shape (see Detail expansion)
14
15
 
package/README.md CHANGED
@@ -233,6 +233,12 @@ yarn install # or npm install
233
233
  npm link # exposes xindex-* binaries from your working copy
234
234
  ```
235
235
 
236
+ Check TypeScript compilation:
237
+
238
+ ```bash
239
+ yarn test.compilation
240
+ ```
241
+
236
242
  ## License
237
243
 
238
244
  MIT
package/apps/indexApp.ts CHANGED
@@ -1,30 +1,31 @@
1
1
  import {readFile} from "fs/promises";
2
2
  import {from} from "../packages/streamx/src/from.js";
3
+ import {batch} from "../packages/streamx/src/batch.js";
3
4
  import {map} from "../packages/streamx/src/map.js";
4
5
  import {tap} from "../packages/streamx/src/tap.js";
5
6
  import {run} from "../packages/streamx/src/index.js";
6
7
  import {IWalkFiles} from "../componets/walkFiles.js";
7
8
  import {IIndexContent} from "../features/indexContent.js";
8
- import {IRemoveContent} from "../features/removeContent.js";
9
9
  import {ILogger} from "../componets/logger.js";
10
+ import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
10
11
 
11
12
  export type IIndexApp = (inputs: string[]) => Promise<void>;
12
13
 
13
- export function IndexApp({walkFiles, indexContent, removeContent, log}: {
14
+ export function IndexApp({walkFiles, indexContent, log}: {
14
15
  walkFiles: IWalkFiles,
15
16
  indexContent: IIndexContent,
16
- removeContent: IRemoveContent,
17
17
  log: ILogger,
18
18
  }): IIndexApp {
19
19
  return async function indexApp(inputs) {
20
20
  await run(
21
21
  from(walkFiles(inputs))
22
22
  .pipe(tap(id => log(`indexing: ${id}`)))
23
- .pipe(map<string, string>(async (id) => {
24
- try { await removeContent(id); } catch (e) { log(`remove failed: ${id} — ${(e as any)?.message ?? e}`); }
25
- const text = await readFile(id, "utf8");
26
- await indexContent(id, `${text}. ${id}`);
27
- return id;
23
+ .pipe(batch(INDEXING_BATCH_SIZE))
24
+ .pipe(map<string[], string[]>(async (ids) => {
25
+ const texts = await Promise.all(ids.map(id => readFile(id, "utf8")));
26
+ const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
27
+ await indexContent(items);
28
+ return ids;
28
29
  }))
29
30
  );
30
31
  }
package/apps/mcpApp.ts CHANGED
@@ -1,31 +1,31 @@
1
1
  import {McpServer} from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import {StdioServerTransport} from "@modelcontextprotocol/sdk/server/stdio.js";
3
3
  import {z} from "zod";
4
- import {ISearchIndex} from "../features/searchIndex.js";
5
4
  import {IIndexApp} from "./indexApp.js";
6
5
  import {IGetIndexStats} from "../componets/index/getIndexStats.js";
7
6
  import {IResetIndex} from "../features/resetIndex.js";
8
7
  import {IWatchFiles} from "../componets/watchFiles.js";
9
- import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
8
+ import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
10
9
  import {ILogger} from "../componets/logger.js";
11
10
  import {WatchFileEventsApp} from "./watchFileEventsApp.js";
12
11
  import {IWatcherLock} from "../componets/index/watcherLock.js";
13
12
  import {IXindexConfig} from "../componets/config/xindexConfig.js";
14
13
  import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
14
+ import {ISearchApp} from "./searchApp.js";
15
15
 
16
16
  export type IMcpApp = () => Promise<void>;
17
17
 
18
18
  export type IMcpWatch = {
19
19
  watchFiles: IWatchFiles,
20
20
  watchDir: string,
21
- handleFileEvent: IHandleFileEvent,
21
+ handleFileEvents: IHandleFileEvents,
22
22
  watcherLock: IWatcherLock,
23
23
  };
24
24
 
25
25
  export function McpApp({
26
- searchContentIndex, indexApp, getIndexStats, resetIndex, log, watch, config,
26
+ search, indexApp, getIndexStats, resetIndex, log, watch, config,
27
27
  }: {
28
- searchContentIndex: ISearchIndex,
28
+ search: ISearchApp,
29
29
  indexApp: IIndexApp,
30
30
  getIndexStats: IGetIndexStats,
31
31
  resetIndex: IResetIndex,
@@ -51,7 +51,7 @@ export function McpApp({
51
51
  }, async ({query, limit}) => {
52
52
  try {
53
53
  const format = FormatSearchResults();
54
- const results = await searchContentIndex(query, limit);
54
+ const results = await search(query, limit);
55
55
  const text = await format(query, results);
56
56
  return {content: [{type: "text" as const, text}]};
57
57
  } catch (e) {
package/apps/run.index.ts CHANGED
@@ -7,9 +7,9 @@ import {AppId} from "../componets/appId.js";
7
7
  const appId = AppId();
8
8
  const cwd = process.cwd();
9
9
  const log = BufferedLoggerToStdOut();
10
- const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
10
+ const {indexContent, getIndexStats, config} = await BuildComponents({log});
11
11
  const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
12
- const indexApp = IndexApp({walkFiles, indexContent, removeContent, log});
12
+ const indexApp = IndexApp({walkFiles, indexContent, log});
13
13
 
14
14
  const inputs = process.argv.slice(2);
15
15
  if (!inputs.length) inputs.push(".");
package/apps/run.mcp.ts CHANGED
@@ -1,11 +1,12 @@
1
1
  import {BuildComponents} from "../componets/buildComponents.js";
2
- import {HandleFileEvent} from "../componets/index/handleFileEvent.js";
2
+ import {HandleFileEvents} from "../componets/index/handleFileEvent.js";
3
3
  import {BufferedLoggerToStdErr} from "../componets/logger.js";
4
4
  import {WalkFiles} from "../componets/walkFiles.js";
5
5
  import {WatchFiles} from "../componets/watchFiles.js";
6
6
  import {WatcherLock} from "../componets/index/watcherLock.js";
7
7
  import {IndexApp} from "./indexApp.js";
8
8
  import {McpApp} from "./mcpApp.js";
9
+ import {SearchApp} from "./searchApp.js";
9
10
  import {join} from "path";
10
11
  import {AppId} from "../componets/appId.js";
11
12
 
@@ -17,7 +18,8 @@ const cwd = process.cwd();
17
18
  const log = BufferedLoggerToStdErr();
18
19
  const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex, config} = await BuildComponents({log});
19
20
  const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
20
- const indexApp = IndexApp({walkFiles, indexContent, removeContent, log});
21
+ const indexApp = IndexApp({walkFiles, indexContent, log});
22
+ const search = SearchApp({searchContentIndex});
21
23
 
22
24
  const appId = AppId();
23
25
  const watcherLock = WatcherLock({
@@ -29,7 +31,7 @@ const watcherLock = WatcherLock({
29
31
  const watch = watchDisabled ? undefined : {
30
32
  watchFiles: WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles}),
31
33
  watchDir: watchDirArg ? watchDirArg.split("=")[1] : ".",
32
- handleFileEvent: HandleFileEvent({indexContent, removeContent, log}),
34
+ handleFileEvents: HandleFileEvents({indexContent, removeContent, log}),
33
35
  watcherLock,
34
36
  };
35
37
 
@@ -45,5 +47,5 @@ process.on("SIGINT", async () => {
45
47
  });
46
48
 
47
49
  log(`[${appId}] started`);
48
- const mcpApp = McpApp({searchContentIndex, indexApp, getIndexStats, resetIndex, log, watch, config});
50
+ const mcpApp = McpApp({search, indexApp, getIndexStats, resetIndex, log, watch, config});
49
51
  await mcpApp();
@@ -4,7 +4,7 @@ import {SearchApp} from "./searchApp.js";
4
4
  import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
5
5
 
6
6
  const log = BufferedLoggerToStdOut();
7
- const {searchContentIndex, config} = await BuildComponents({log});
7
+ const {searchContentIndex} = await BuildComponents({log});
8
8
  const search = SearchApp({searchContentIndex});
9
9
 
10
10
  const query = process.argv.slice(2).join(" ");
package/apps/run.watch.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import {BuildComponents} from "../componets/buildComponents.js";
2
- import {HandleFileEvent} from "../componets/index/handleFileEvent.js";
2
+ import {HandleFileEvents} from "../componets/index/handleFileEvent.js";
3
3
  import {BufferedLoggerToStdOut} from "../componets/logger.js";
4
4
  import {WalkFiles} from "../componets/walkFiles.js";
5
5
  import {WatchFiles} from "../componets/watchFiles.js";
@@ -13,7 +13,7 @@ const log = BufferedLoggerToStdOut();
13
13
  const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
14
14
  const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
15
15
  const watchFiles = WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles});
16
- const handleFileEvent = HandleFileEvent({indexContent, removeContent, log});
16
+ const handleFileEvents = HandleFileEvents({indexContent, removeContent, log});
17
17
 
18
18
  const appId = AppId();
19
19
  const watcherLock = WatcherLock({
@@ -22,7 +22,7 @@ const watcherLock = WatcherLock({
22
22
  log,
23
23
  });
24
24
 
25
- const app = WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLock});
25
+ const app = WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock});
26
26
 
27
27
  const inputs = process.argv.slice(2);
28
28
  if (!inputs.length) inputs.push(".");
package/apps/searchApp.ts CHANGED
@@ -2,8 +2,10 @@ import {ISearchIndex, IIndexRecord} from "../features/searchIndex.js";
2
2
 
3
3
  export type ISearchApp = (query: string, limit?: number) => Promise<IIndexRecord[]>;
4
4
 
5
- export function SearchApp({searchContentIndex}: {searchContentIndex: ISearchIndex}): ISearchApp {
5
+ export function SearchApp({searchContentIndex}: {
6
+ searchContentIndex: ISearchIndex;
7
+ }): ISearchApp {
6
8
  return async function search(query, limit = 7) {
7
- return await searchContentIndex(query, limit);
9
+ return searchContentIndex(query, limit);
8
10
  }
9
11
  }
package/apps/watchApp.ts CHANGED
@@ -1,23 +1,28 @@
1
1
  import {from} from "../packages/streamx/src/from.js";
2
+ import {batch} from "../packages/streamx/src/batch.js";
2
3
  import {map} from "../packages/streamx/src/map.js";
3
4
  import {tap} from "../packages/streamx/src/tap.js";
4
5
  import {run} from "../packages/streamx/src/index.js";
6
+ import {readFile} from "fs/promises";
5
7
  import {IWalkFiles} from "../componets/walkFiles.js";
6
- import {FileEventType, IWatchFiles} from "../componets/watchFiles.js";
7
- import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
8
+ import {IWatchFiles} from "../componets/watchFiles.js";
9
+ import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
8
10
  import {ILogger} from "../componets/logger.js";
9
11
  import {IWatcherLock} from "../componets/index/watcherLock.js";
10
12
  import {WatchFileEventsApp} from "./watchFileEventsApp.js";
13
+ import {IIndexContent} from "../features/indexContent.js";
14
+ import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
11
15
 
12
16
  export type IWatchApp = {
13
17
  run: (inputs: string[]) => Promise<void>;
14
18
  stop: () => void;
15
19
  };
16
20
 
17
- export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLock}: {
21
+ export function WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock}: {
18
22
  walkFiles: IWalkFiles,
19
23
  watchFiles: IWatchFiles,
20
- handleFileEvent: IHandleFileEvent,
24
+ handleFileEvents: IHandleFileEvents,
25
+ indexContent: IIndexContent,
21
26
  log: ILogger,
22
27
  watcherLock: IWatcherLock,
23
28
  }): IWatchApp {
@@ -27,9 +32,12 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLo
27
32
  await run(
28
33
  from(walkFiles(inputs))
29
34
  .pipe(tap(id => log(`indexing: ${id}`)))
30
- .pipe(map<string, string>(async (id) => {
31
- await handleFileEvent({type: FileEventType.index, path: id});
32
- return id;
35
+ .pipe(batch(INDEXING_BATCH_SIZE))
36
+ .pipe(map<string[], string[]>(async (ids) => {
37
+ const texts = await Promise.all(ids.map(id => readFile(id, "utf8")));
38
+ const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
39
+ await indexContent(items);
40
+ return ids;
33
41
  }))
34
42
  );
35
43
 
@@ -39,7 +47,7 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLo
39
47
  const startWatch = WatchFileEventsApp({
40
48
  watchFiles,
41
49
  watchDir: inputs[0] ?? ".",
42
- handleFileEvent,
50
+ handleFileEvents,
43
51
  log,
44
52
  watcherLock,
45
53
  });
@@ -1,14 +1,20 @@
1
1
  import {IWatchFiles} from "../componets/watchFiles.js";
2
- import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
2
+ import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
3
3
  import {ILogger} from "../componets/logger.js";
4
4
  import {IWatcherLock} from "../componets/index/watcherLock.js";
5
+ import {from} from "../packages/streamx/src/from.js";
6
+ import {batchTimed} from "../packages/streamx/src/batchTimed.js";
7
+ import {map} from "../packages/streamx/src/map.js";
8
+ import {run} from "../packages/streamx/src/index.js";
9
+ import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE.js";
10
+ import {WATCH_FLUSH_MS} from "../componets/config/WATCH_FLUSH_MS.js";
5
11
 
6
12
  export type IWatchFileEventsApp = () => void;
7
13
 
8
- export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvent, log, watcherLock}: {
14
+ export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvents, log, watcherLock}: {
9
15
  watchFiles: IWatchFiles,
10
16
  watchDir: string,
11
- handleFileEvent: IHandleFileEvent,
17
+ handleFileEvents: IHandleFileEvents,
12
18
  log: ILogger,
13
19
  watcherLock: IWatcherLock,
14
20
  }): IWatchFileEventsApp {
@@ -21,7 +27,11 @@ export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvent, log,
21
27
  const watcher = watchFiles([watchDir]);
22
28
  const events = (async () => {
23
29
  try {
24
- for await (const e of watcher.events) await handleFileEvent(e);
30
+ await run(
31
+ from(watcher.events)
32
+ .pipe(batchTimed(INDEXING_BATCH_SIZE, WATCH_FLUSH_MS))
33
+ .pipe(map(handleFileEvents))
34
+ );
25
35
  } catch (e) {
26
36
  log(`watch error: ${(e as any)?.message ?? e}`);
27
37
  }
@@ -4,24 +4,40 @@ import {CleanUpKeywords} from "./keywords/cleanUpKeywords.js";
4
4
  import {ContentIndexDriver} from "./index/contentIndexDriver.js";
5
5
  import {LoadConfig} from "./config/loadConfig.js";
6
6
  import {ILogger} from "./logger.js";
7
+ import {LocateInFile} from "./locate/locateInFile.js";
7
8
 
8
- export async function BuildComponents({log}: {log: ILogger}) {
9
+ export async function BuildComponents({log}: { log: ILogger }) {
9
10
  const loadConfig = LoadConfig({configPath: ".xindex.json", log});
10
11
  const config = await loadConfig();
11
12
 
12
13
  const embed = Embed({pooling: "mean", normalize: true});
13
14
  const extractKeywords = ExtractKeywords();
14
- const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength: 2, ignoreKeywords: config.ignoreKeywords});
15
+ const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength: 1, ignoreKeywords: config.ignoreKeywords});
16
+
17
+ const locateInFile = LocateInFile({
18
+ embed,
19
+ extractKeywords,
20
+ cleanUpKeywords,
21
+ windowLines: config.maxLines,
22
+ maxFileBytes: config.maxFileBytes,
23
+ });
15
24
 
16
25
  const DEFAULT_INDEX_PATH = ".xindex";
17
26
 
27
+ const SCORE_THRESHOLD = 0.01;
28
+
18
29
  const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex}
19
30
  = await ContentIndexDriver({
20
- path: DEFAULT_INDEX_PATH,
21
- embed,
22
- extractKeywords,
23
- cleanUpKeywords,
24
- });
25
- return {extractKeywords, cleanUpKeywords, indexContent, removeContent, getIndexStats,
26
- searchContentIndex, resetIndex, config};
31
+ path: DEFAULT_INDEX_PATH,
32
+ embed,
33
+ extractKeywords,
34
+ cleanUpKeywords,
35
+ locateInFile,
36
+ scoreThreshold: SCORE_THRESHOLD
37
+ });
38
+
39
+ return {
40
+ extractKeywords, cleanUpKeywords, indexContent, removeContent, getIndexStats,
41
+ searchContentIndex, resetIndex, locateInFile, config
42
+ };
27
43
  }
@@ -0,0 +1 @@
1
+ export const DEFAULT_LOCATE_BATCH_SIZE = 3;
@@ -0,0 +1 @@
1
+ export const INDEXING_BATCH_SIZE = 5;
@@ -0,0 +1 @@
1
+ export const WATCH_FLUSH_MS = 500;
@@ -2,14 +2,19 @@ import {readFile} from "fs/promises";
2
2
  import {IXindexConfig} from "./xindexConfig.js";
3
3
  import {ILogger} from "../logger.js";
4
4
 
5
+ const DEFAULT_MAX_LINES = 30;
6
+ const DEFAULT_MAX_FILE_BYTES = 5_000_000;
7
+
5
8
  const DEFAULTS: IXindexConfig = {
6
9
  ignoreKeywords: [],
7
10
  ignoreFiles: [],
11
+ maxLines: DEFAULT_MAX_LINES,
12
+ maxFileBytes: DEFAULT_MAX_FILE_BYTES,
8
13
  };
9
14
 
10
15
  export type ILoadConfig = () => Promise<IXindexConfig>;
11
16
 
12
- export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}): ILoadConfig {
17
+ export function LoadConfig({configPath, log}: { configPath: string, log: ILogger }): ILoadConfig {
13
18
  return async function loadConfig() {
14
19
  let raw: string;
15
20
  try {
@@ -29,9 +34,13 @@ export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}
29
34
  }
30
35
 
31
36
  const toStrings = (v: unknown) => Array.isArray(v) ? v.filter((e): e is string => typeof e === "string") : [];
37
+ const toNum = (v: unknown, def: number): number => typeof v === "number" ? v : def;
38
+
32
39
  const config: IXindexConfig = {
33
40
  ignoreKeywords: toStrings(parsed.ignoreKeywords),
34
41
  ignoreFiles: toStrings(parsed.ignoreFiles),
42
+ maxLines: toNum(parsed.maxLines, DEFAULT_MAX_LINES),
43
+ maxFileBytes: toNum(parsed.maxFileBytes, DEFAULT_MAX_FILE_BYTES),
35
44
  };
36
45
 
37
46
  for (const kw of config.ignoreKeywords) {
@@ -1,4 +1,6 @@
1
1
  export type IXindexConfig = {
2
2
  ignoreKeywords: string[];
3
3
  ignoreFiles: string[];
4
+ maxLines: number;
5
+ maxFileBytes: number;
4
6
  };
@@ -0,0 +1,40 @@
1
+ import {readFile} from "fs/promises";
2
+ import {join, dirname, relative} from "path";
3
+ import ignore from "ignore";
4
+
5
+ /**
6
+ * Build an `ignore` instance that accumulates `.gitignore` rules from `cwd`
7
+ * down to the directory containing `relPath`.
8
+ *
9
+ * Mirrors the per-directory parent-chain logic in `walkFiles.ts`.
10
+ *
11
+ * @param cwd Absolute root (same as the `cwd` passed to WalkFiles/WatchFiles)
12
+ * @param relPath Path of the FS event, relative to `cwd`
13
+ * @param ignoreFiles Additional glob patterns from config (applied on top)
14
+ */
15
+ export async function loadIgnoreChain(
16
+ cwd: string,
17
+ relPath: string,
18
+ ignoreFiles: string[] = [],
19
+ ): Promise<ReturnType<typeof ignore>> {
20
+ // Segments from cwd down to (but not including) the file itself
21
+ const fileDir = dirname(relPath); // e.g. "pkg/sub" or "."
22
+ const segments = fileDir === "." ? [] : fileDir.split("/");
23
+
24
+ const ig = ignore();
25
+ ig.add(".*");
26
+
27
+ // Walk from root down: cwd, cwd/seg0, cwd/seg0/seg1, …
28
+ const dirs = [cwd, ...segments.map((_, i) => join(cwd, ...segments.slice(0, i + 1)))];
29
+ for (const dir of dirs) {
30
+ try {
31
+ const content = await readFile(join(dir, ".gitignore"), "utf8");
32
+ if (content) ig.add(content);
33
+ } catch {
34
+ // no .gitignore in this dir — fine
35
+ }
36
+ }
37
+
38
+ for (const pattern of ignoreFiles) ig.add(pattern);
39
+ return ig;
40
+ }
@@ -8,6 +8,7 @@ import {RemoveContent, IRemoveContent} from "../../features/removeContent.js";
8
8
  import {ResetIndex, IResetIndex} from "../../features/resetIndex.js";
9
9
  import {VectraIndex} from "./vectraIndex.js";
10
10
  import {IndexApi} from "./indexApi.js";
11
+ import {ILocateInFile} from "../locate/locateInFile.js";
11
12
 
12
13
  export type IContentIndexDriver = Readonly<{
13
14
  getIndexStats: IGetIndexStats,
@@ -18,21 +19,22 @@ export type IContentIndexDriver = Readonly<{
18
19
  flush: () => Promise<void>,
19
20
  }>;
20
21
 
21
- export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, scoreThreshold}: {
22
+ export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold}: {
22
23
  path: string,
23
24
  embed: IEmbed,
24
25
  extractKeywords: IExtractKeywords,
25
26
  cleanUpKeywords: ICleanUpKeywords,
26
- scoreThreshold?: number,
27
+ locateInFile: ILocateInFile,
28
+ scoreThreshold: number,
27
29
  }): Promise<IContentIndexDriver> {
28
30
  const index = await VectraIndex(path + "/semantic");
29
- const indexApi = IndexApi({index, embed});
31
+ const indexApi = IndexApi({index});
30
32
 
31
33
  return {
32
34
  getIndexStats: GetIndexStats({index}),
33
- indexContent: IndexContent({extractKeywords, cleanUpKeywords, indexApi}),
35
+ indexContent: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi}),
34
36
  removeContent: RemoveContent({indexApi}),
35
- searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, scoreThreshold}),
37
+ searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold}),
36
38
  resetIndex: ResetIndex({indexApi}),
37
39
  flush: () => indexApi.flush(),
38
40
  };
@@ -0,0 +1,126 @@
1
+ import {rm} from "node:fs/promises";
2
+ import {extname} from "node:path";
3
+ import type {IContentIndexDriver} from "./contentIndexDriver.js";
4
+ import type {IGetIndexStats} from "./getIndexStats.js";
5
+ import type {IIndexContent} from "../../features/indexContent.js";
6
+ import type {IRemoveContent} from "../../features/removeContent.js";
7
+ import type {IResetIndex} from "../../features/resetIndex.js";
8
+ import type {IIndexRecord, ISearchIndex} from "../../features/searchIndex.js";
9
+ import {DocumentIndex} from "./documentIndex.js";
10
+
11
+ export async function DocumentContentIndexDriver({path, model}: {
12
+ path: string,
13
+ model?: string,
14
+ }): Promise<IContentIndexDriver> {
15
+ const docsPath = `${path}/docs`;
16
+ let index = await DocumentIndex({path: docsPath, model});
17
+
18
+ const indexContent: IIndexContent = async items => {
19
+ for (const item of items) {
20
+ await index.upsertDocument(item.id, item.content, docTypeForPath(item.id), {});
21
+ }
22
+ };
23
+
24
+ const removeContent: IRemoveContent = async ids => {
25
+ for (const id of ids) {
26
+ await index.deleteDocument(id);
27
+ }
28
+ };
29
+
30
+ const searchContentIndex: ISearchIndex = async (query, limit) => {
31
+ const results = await index.queryDocuments(query, {
32
+ maxDocuments: limit,
33
+ maxChunks: 5,
34
+ isBm25: true,
35
+ });
36
+
37
+ const out: IIndexRecord[] = [];
38
+
39
+ for (const r of results) {
40
+ const text = await r.loadText();
41
+ const lineStarts = computeLineStarts(text);
42
+
43
+ for (const c of r.chunks) {
44
+ const {startPos, endPos} = c.item.metadata;
45
+ const snippet = text.slice(startPos, endPos);
46
+ const startLine = lineOf(lineStarts, startPos);
47
+ const endLine = lineOf(lineStarts, endPos);
48
+
49
+ out.push({
50
+ id: r.uri,
51
+ score: c.score,
52
+ startPos,
53
+ endPos,
54
+ snippet,
55
+ startLine,
56
+ endLine,
57
+ });
58
+ }
59
+ }
60
+
61
+ return out;
62
+ };
63
+
64
+ const resetIndex: IResetIndex = async () => {
65
+ /** Clears persisted catalog + chunks on disk; recreates empty index metadata. */
66
+ await rm(docsPath, {recursive: true, force: true});
67
+
68
+ /*
69
+ * Re-bind `index`: after `rm`, the prior LocalDocumentIndex instance may retain stale
70
+ * in-memory caches; `DocumentIndex` constructs a fresh instance against the recreated folder.
71
+ */
72
+ index = await DocumentIndex({path: docsPath, model});
73
+ };
74
+
75
+ const getIndexStats: IGetIndexStats = async () => {
76
+ const stats = await index.getCatalogStats();
77
+
78
+ return {indexedAmount: stats.documents ?? 0};
79
+ };
80
+
81
+ const flush = async () => {
82
+ /* LocalDocumentIndex persists updates without an explicit flush. */
83
+ };
84
+
85
+ return {
86
+ getIndexStats,
87
+ indexContent,
88
+ removeContent,
89
+ searchContentIndex,
90
+ resetIndex,
91
+ flush,
92
+ };
93
+ }
94
+
95
+ const DOC_TYPE_BY_EXT: Record<string, string> = {
96
+ ".ts": "ts", ".tsx": "tsx", ".mts": "ts", ".cts": "ts",
97
+ ".js": "js", ".jsx": "jsx", ".mjs": "js", ".cjs": "js",
98
+ ".py": "python",
99
+ ".go": "go",
100
+ ".java": "java",
101
+ ".cs": "csharp",
102
+ ".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp", ".hpp": "cpp", ".h": "cpp",
103
+ ".md": "markdown",
104
+ };
105
+
106
+ function docTypeForPath(path: string): string {
107
+ return DOC_TYPE_BY_EXT[extname(path).toLowerCase()] ?? "text";
108
+ }
109
+
110
+ function computeLineStarts(text: string): number[] {
111
+ const starts = [0];
112
+ for (let i = 0; i < text.length; i++) {
113
+ if (text.charCodeAt(i) === 10) starts.push(i + 1);
114
+ }
115
+ return starts;
116
+ }
117
+
118
+ function lineOf(lineStarts: number[], pos: number): number {
119
+ let lo = 0, hi = lineStarts.length - 1;
120
+ while (lo < hi) {
121
+ const mid = (lo + hi + 1) >>> 1;
122
+ if (lineStarts[mid] <= pos) lo = mid;
123
+ else hi = mid - 1;
124
+ }
125
+ return lo + 1;
126
+ }
@@ -0,0 +1,26 @@
1
+ import { LocalDocumentIndex, TransformersEmbeddings } from "vectra";
2
+
3
+ export type IDocumentIndex = LocalDocumentIndex;
4
+
5
+ export async function DocumentIndex({
6
+ path,
7
+ model = "Xenova/all-MiniLM-L6-v2",
8
+ dtype = "q8",
9
+ }: {
10
+ path: string;
11
+ model?: string;
12
+ dtype?: "fp32" | "fp16" | "q8" | "q4";
13
+ }): Promise<LocalDocumentIndex> {
14
+ const embeddings = await TransformersEmbeddings.create({ model, dtype });
15
+ const index = new LocalDocumentIndex({
16
+ folderPath: path,
17
+ embeddings,
18
+ chunkingConfig: { chunkSize: 512, chunkOverlap: 0 },
19
+ });
20
+
21
+ if (!(await index.isIndexCreated())) {
22
+ await index.createIndex();
23
+ }
24
+
25
+ return index;
26
+ }