xindex 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.xindex.json +2 -1
  2. package/CLAUDE.md +1 -0
  3. package/README.md +33 -26
  4. package/apps/indexApp.ts +9 -8
  5. package/apps/mcpApp.ts +6 -6
  6. package/apps/run.index.ts +2 -2
  7. package/apps/run.mcp.ts +6 -4
  8. package/apps/run.search.ts +1 -1
  9. package/apps/run.watch.ts +3 -3
  10. package/apps/searchApp.ts +4 -2
  11. package/apps/watchApp.ts +16 -8
  12. package/apps/watchFileEventsApp.ts +14 -4
  13. package/componets/buildComponents.ts +25 -9
  14. package/componets/config/DEFAULT_LOCATE_BATCH_SIZE.ts +1 -0
  15. package/componets/config/INDEXING_BATCH_SIZE.ts +1 -0
  16. package/componets/config/WATCH_FLUSH_MS.ts +1 -0
  17. package/componets/config/loadConfig.ts +10 -1
  18. package/componets/config/xindexConfig.ts +2 -0
  19. package/componets/ignore/loadIgnoreChain.ts +40 -0
  20. package/componets/index/contentIndexDriver.ts +7 -5
  21. package/componets/index/documentContentIndexDriver.ts +126 -0
  22. package/componets/index/documentIndex.ts +26 -0
  23. package/componets/index/formatSearchResults.ts +16 -2
  24. package/componets/index/handleFileEvent.ts +48 -3
  25. package/componets/index/indexApi.ts +39 -11
  26. package/componets/locate/bm25.ts +50 -0
  27. package/componets/locate/inMemoryIndex.ts +48 -0
  28. package/componets/locate/locateInFile.ts +148 -0
  29. package/componets/locate/windowsOf.ts +29 -0
  30. package/componets/watchFiles.ts +5 -16
  31. package/features/indexContent.ts +12 -5
  32. package/features/removeContent.ts +3 -3
  33. package/features/searchIndex.ts +22 -5
  34. package/package.json +15 -2
  35. package/packages/streamx/src/batchTimed.ts +1 -1
  36. package/packages/streamx/src/buffer.ts +1 -1
  37. package/packages/streamx/src/defer.ts +55 -0
  38. package/packages/streamx/src/interval.ts +1 -1
  39. package/packages/streamx/src/merge.ts +1 -1
  40. package/packages/streamx/src/nodeWritable.ts +1 -1
  41. package/packages/streamx/src/scale.ts +2 -2
  42. package/packages/streamx/src/writer.ts +1 -1
  43. package/.ai/research/.gitkeep +0 -0
  44. package/.ai/task/.gitkeep +0 -0
  45. package/.claude/settings.local.json +0 -73
  46. package/.claude/skills/make-hof/SKILL.md +0 -8
  47. package/.claude/skills/make-hof/playbook.md +0 -38
  48. package/.cursor/mcp.json +0 -8
  49. package/media/MEDIUM.md +0 -139
  50. package/media/SOCIAL.md +0 -102
package/.xindex.json CHANGED
@@ -18,5 +18,6 @@
18
18
  "utf8", "length", "map", "slice", "push", "join", "resolve", "stringify",
19
19
  "json", "settimeout", "path", "readfile"
20
20
  ],
21
- "ignoreFiles": [".xindex"]
21
+ "ignoreFiles": [".xindex", "media"],
22
+ "maxLines": 12
22
23
  }
package/CLAUDE.md CHANGED
@@ -9,6 +9,7 @@
9
9
  - **Text diagrams** — ASCII flows, hierarchies, tables. Keep minimal.
10
10
  - **research/search/ground** — search the Internet using DuckDuckGo MCP
11
11
  - **plan dev / go / dev / implement** — start implementation → triggers Pre-implementation check
12
+ - **check types / test compilation** — run `yarn test.compilation` to validate TypeScript compilation without relying on `tsconfig.json`
12
13
  - **recover** — find most recent `task.*.md` in `.ai/task/` (by date+mtime, exclude `*.log.md`/`*.report.md`), summarize state and next steps
13
14
  - **pull details / expand / flesh out** — enrich task with full detail while preserving shape (see Detail expansion)
14
15
 
package/README.md CHANGED
@@ -39,6 +39,15 @@ Drop this into `.mcp.json` at your project root:
39
39
 
40
40
  Open the project in Claude Code — it picks up the xindex MCP server and can call `xindex_search`, `xindex_index`, and `xindex_reset` directly. Fewer hallucinations, fewer round-trips.
41
41
 
42
+ ## Features
43
+
44
+ - **Local** — everything runs on your machine; embeddings cached on disk
45
+ - **Semantic search** — natural-language queries, not substring match
46
+ - **MCP server** — plugs into Claude Code via `.mcp.json`
47
+ - **Watch mode** — keeps the index warm while you code
48
+ - **Gitignore-aware** — respects `.gitignore` + custom ignore rules
49
+ - **Zero config** — works with defaults; `.xindex.json` is optional
50
+
42
51
  ## Claude Code skills (`@xi`)
43
52
 
44
53
  Two optional [Claude Code skills](https://docs.claude.com/en/docs/claude-code/skills) wrap the MCP tools so you don't have to think about them:
@@ -73,13 +82,12 @@ argument-hint: "[question]"
73
82
  Surface-level codebase discovery via xindex. Tool: `xindex_search` (natural-language, meaning-based).
74
83
 
75
84
  **Steps:**
76
- 1. Draft 510 focused queries from $ARGUMENTS (entry points, routing, config, integrations, tests, related patterns).
77
- 2. Run `xindex_search` for each.
78
- 3. If results are empty/sparse/stale → scoped-index the most relevant content-heavy root folders (one path per `xindex_index` call, e.g. `src`, `apps`, `features`, `componets`), then re-search. Prefer scoped over full-repo.
79
- 4. Refine with 2–3 narrower follow-ups.
80
- 5. Return file paths + brief keywords showing why each matched.
85
+ 1. Draft 37 queries from $ARGUMENTS (entry points, routing, config, tests, patterns); run `xindex_search` in parallel.
86
+ 2. If empty/sparse/stale → scoped `xindex_index` on relevant root folders (one path per call, e.g. `src`, `skills`, `agents`), then re-search. Prefer scoped over full-repo.
87
+ 3. Run 3–7 narrower follow-ups in parallel based on round-1 hits.
88
+ 4. Return file paths + brief keywords showing why each matched.
81
89
 
82
- Output = file links + keywords, not analysis. For reset or full re-index, delegate to `/xindex` (owns safety rules).
90
+ Output = file links + keywords, not analysis. **Escalate to `/ask-cursor` by default** (cheap codebase reasoning); only go to `/ask-claude` for multi-file/pattern analysis or `/ask-claude-opus` for trade-offs. For reset or full re-index, delegate to `/xindex` (owns safety rules).
83
91
  ````
84
92
 
85
93
  `xindex/SKILL.md`:
@@ -90,7 +98,7 @@ name: xindex
90
98
  description: Manages xindex semantic search — index, search, reset via MCP tools. For research questions, use /ask-xi.
91
99
  argument-hint: "[search query | index | reset]"
92
100
  ---
93
- Full xindex tool management. For research, use `/ask-xi`. Install: `npm i -g xindex`.
101
+ Full xindex tool management. For research, use `/ask-xi`. Install: `npm i -g xindex` ([npm](https://www.npmjs.com/package/xindex)).
94
102
 
95
103
  **Tools:**
96
104
  - `xindex_search` — find files by meaning (synonyms, semantics). Try before grepping blindly.
@@ -106,15 +114,6 @@ $ARGUMENTS
106
114
 
107
115
  Both skills assume the `xindex` MCP server is registered (see the section above). Restart Claude Code after adding skills.
108
116
 
109
- ## Features
110
-
111
- - **Local** — everything runs on your machine; embeddings cached on disk
112
- - **Semantic search** — natural-language queries, not substring match
113
- - **MCP server** — plugs into Claude Code via `.mcp.json`
114
- - **Watch mode** — keeps the index warm while you code
115
- - **Gitignore-aware** — respects `.gitignore` + custom ignore rules
116
- - **Zero config** — works with defaults; `.xindex.json` is optional
117
-
118
117
  ---
119
118
 
120
119
  ## CLI reference
@@ -129,7 +128,7 @@ xindex-index apps features
129
128
  ```
130
129
 
131
130
  ### `xindex-search <query...>`
132
- Search the index. All args are joined into one query. Default limit: 10.
131
+ Search the index. All args are joined into one query. Default limit: 7.
133
132
  ```bash
134
133
  xindex-search "database migration logic"
135
134
  xindex-search file watcher debounce
@@ -157,19 +156,22 @@ xindex-mcp --watch-dir=./src # watch a specific dir
157
156
 
158
157
  ## MCP tools
159
158
 
160
- | Tool | What it does | Input |
161
- |------|--------------|-------|
162
- | `xindex_search` | Semantic search | `query: string`, `limit?: number` (default 5, max 100) |
163
- | `xindex_index` | Index paths | `inputs: string[]` (at least one) |
164
- | `xindex_reset` | Wipe index (destructive) | — |
159
+ - **`xindex_search`** semantic search. `query: string`, `limit?: number` (default 7, max 50)
160
+ - **`xindex_index`** — index paths. `inputs: string[]` (at least one)
161
+ - **`xindex_reset`** wipe index (destructive). No input
165
162
 
166
- Note: CLI `xindex-search` defaults to 10 results; MCP `xindex_search` defaults to 5.
163
+ Note: both CLI `xindex-search` and MCP `xindex_search` default to 7 results; MCP caps at 50.
167
164
 
168
165
  ## Configuration
169
166
 
170
167
  ### `.xindex.json` (optional)
171
168
 
172
- Place at your project root. Both fields are optional arrays; unknown keys are ignored.
169
+ Project-root file. All fields optional; unknown keys ignored; missing/empty → defaults.
170
+
171
+ - **`ignoreKeywords`** — `string[]`, default `[]`. Tokens stripped before embedding — add project slang/boilerplate polluting results. Entries ≤1 char warn.
172
+ - **`ignoreFiles`** — `string[]`, default `[]`. Extra globs excluded during walk/watch, on top of `.gitignore` — add vendored/generated folders.
173
+ - **`maxLines`** — `number`, default `30`. Lines per chunk — tune if chunks feel over/under-sized.
174
+ - **`maxFileBytes`** — `number`, default `5000000`. Skip files over this (5 MB) — lower for faster indexing on huge generated files.
173
175
 
174
176
  ```json
175
177
  {
@@ -178,8 +180,7 @@ Place at your project root. Both fields are optional arrays; unknown keys are ig
178
180
  }
179
181
  ```
180
182
 
181
- - **`ignoreKeywords`** tokens stripped before embedding (noise words, language keywords, project slang). Keeps search focused on meaningful terms.
182
- - **`ignoreFiles`** — extra glob patterns excluded during walk/watch, on top of `.gitignore`.
183
+ Override only what you need; re-run `xindex-index .` (or let the watcher pick it up). Invalid JSON throws; wrong-typed fields fall back silently.
183
184
 
184
185
  ### `.xindex/` folder
185
186
 
@@ -233,6 +234,12 @@ yarn install # or npm install
233
234
  npm link # exposes xindex-* binaries from your working copy
234
235
  ```
235
236
 
237
+ Check TypeScript compilation:
238
+
239
+ ```bash
240
+ yarn test.compilation
241
+ ```
242
+
236
243
  ## License
237
244
 
238
245
  MIT
package/apps/indexApp.ts CHANGED
@@ -1,30 +1,31 @@
1
1
  import {readFile} from "fs/promises";
2
2
  import {from} from "../packages/streamx/src/from.js";
3
+ import {batch} from "../packages/streamx/src/batch.js";
3
4
  import {map} from "../packages/streamx/src/map.js";
4
5
  import {tap} from "../packages/streamx/src/tap.js";
5
6
  import {run} from "../packages/streamx/src/index.js";
6
7
  import {IWalkFiles} from "../componets/walkFiles.js";
7
8
  import {IIndexContent} from "../features/indexContent.js";
8
- import {IRemoveContent} from "../features/removeContent.js";
9
9
  import {ILogger} from "../componets/logger.js";
10
+ import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
10
11
 
11
12
  export type IIndexApp = (inputs: string[]) => Promise<void>;
12
13
 
13
- export function IndexApp({walkFiles, indexContent, removeContent, log}: {
14
+ export function IndexApp({walkFiles, indexContent, log}: {
14
15
  walkFiles: IWalkFiles,
15
16
  indexContent: IIndexContent,
16
- removeContent: IRemoveContent,
17
17
  log: ILogger,
18
18
  }): IIndexApp {
19
19
  return async function indexApp(inputs) {
20
20
  await run(
21
21
  from(walkFiles(inputs))
22
22
  .pipe(tap(id => log(`indexing: ${id}`)))
23
- .pipe(map<string, string>(async (id) => {
24
- try { await removeContent(id); } catch (e) { log(`remove failed: ${id} — ${(e as any)?.message ?? e}`); }
25
- const text = await readFile(id, "utf8");
26
- await indexContent(id, `${text}. ${id}`);
27
- return id;
23
+ .pipe(batch(INDEXING_BATCH_SIZE))
24
+ .pipe(map<string[], string[]>(async (ids) => {
25
+ const texts = await Promise.all(ids.map(id => readFile(id, "utf8")));
26
+ const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
27
+ await indexContent(items);
28
+ return ids;
28
29
  }))
29
30
  );
30
31
  }
package/apps/mcpApp.ts CHANGED
@@ -1,31 +1,31 @@
1
1
  import {McpServer} from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import {StdioServerTransport} from "@modelcontextprotocol/sdk/server/stdio.js";
3
3
  import {z} from "zod";
4
- import {ISearchIndex} from "../features/searchIndex.js";
5
4
  import {IIndexApp} from "./indexApp.js";
6
5
  import {IGetIndexStats} from "../componets/index/getIndexStats.js";
7
6
  import {IResetIndex} from "../features/resetIndex.js";
8
7
  import {IWatchFiles} from "../componets/watchFiles.js";
9
- import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
8
+ import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
10
9
  import {ILogger} from "../componets/logger.js";
11
10
  import {WatchFileEventsApp} from "./watchFileEventsApp.js";
12
11
  import {IWatcherLock} from "../componets/index/watcherLock.js";
13
12
  import {IXindexConfig} from "../componets/config/xindexConfig.js";
14
13
  import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
14
+ import {ISearchApp} from "./searchApp.js";
15
15
 
16
16
  export type IMcpApp = () => Promise<void>;
17
17
 
18
18
  export type IMcpWatch = {
19
19
  watchFiles: IWatchFiles,
20
20
  watchDir: string,
21
- handleFileEvent: IHandleFileEvent,
21
+ handleFileEvents: IHandleFileEvents,
22
22
  watcherLock: IWatcherLock,
23
23
  };
24
24
 
25
25
  export function McpApp({
26
- searchContentIndex, indexApp, getIndexStats, resetIndex, log, watch, config,
26
+ search, indexApp, getIndexStats, resetIndex, log, watch, config,
27
27
  }: {
28
- searchContentIndex: ISearchIndex,
28
+ search: ISearchApp,
29
29
  indexApp: IIndexApp,
30
30
  getIndexStats: IGetIndexStats,
31
31
  resetIndex: IResetIndex,
@@ -51,7 +51,7 @@ export function McpApp({
51
51
  }, async ({query, limit}) => {
52
52
  try {
53
53
  const format = FormatSearchResults();
54
- const results = await searchContentIndex(query, limit);
54
+ const results = await search(query, limit);
55
55
  const text = await format(query, results);
56
56
  return {content: [{type: "text" as const, text}]};
57
57
  } catch (e) {
package/apps/run.index.ts CHANGED
@@ -7,9 +7,9 @@ import {AppId} from "../componets/appId.js";
7
7
  const appId = AppId();
8
8
  const cwd = process.cwd();
9
9
  const log = BufferedLoggerToStdOut();
10
- const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
10
+ const {indexContent, getIndexStats, config} = await BuildComponents({log});
11
11
  const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
12
- const indexApp = IndexApp({walkFiles, indexContent, removeContent, log});
12
+ const indexApp = IndexApp({walkFiles, indexContent, log});
13
13
 
14
14
  const inputs = process.argv.slice(2);
15
15
  if (!inputs.length) inputs.push(".");
package/apps/run.mcp.ts CHANGED
@@ -1,11 +1,12 @@
1
1
  import {BuildComponents} from "../componets/buildComponents.js";
2
- import {HandleFileEvent} from "../componets/index/handleFileEvent.js";
2
+ import {HandleFileEvents} from "../componets/index/handleFileEvent.js";
3
3
  import {BufferedLoggerToStdErr} from "../componets/logger.js";
4
4
  import {WalkFiles} from "../componets/walkFiles.js";
5
5
  import {WatchFiles} from "../componets/watchFiles.js";
6
6
  import {WatcherLock} from "../componets/index/watcherLock.js";
7
7
  import {IndexApp} from "./indexApp.js";
8
8
  import {McpApp} from "./mcpApp.js";
9
+ import {SearchApp} from "./searchApp.js";
9
10
  import {join} from "path";
10
11
  import {AppId} from "../componets/appId.js";
11
12
 
@@ -17,7 +18,8 @@ const cwd = process.cwd();
17
18
  const log = BufferedLoggerToStdErr();
18
19
  const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex, config} = await BuildComponents({log});
19
20
  const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
20
- const indexApp = IndexApp({walkFiles, indexContent, removeContent, log});
21
+ const indexApp = IndexApp({walkFiles, indexContent, log});
22
+ const search = SearchApp({searchContentIndex});
21
23
 
22
24
  const appId = AppId();
23
25
  const watcherLock = WatcherLock({
@@ -29,7 +31,7 @@ const watcherLock = WatcherLock({
29
31
  const watch = watchDisabled ? undefined : {
30
32
  watchFiles: WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles}),
31
33
  watchDir: watchDirArg ? watchDirArg.split("=")[1] : ".",
32
- handleFileEvent: HandleFileEvent({indexContent, removeContent, log}),
34
+ handleFileEvents: HandleFileEvents({indexContent, removeContent, log}),
33
35
  watcherLock,
34
36
  };
35
37
 
@@ -45,5 +47,5 @@ process.on("SIGINT", async () => {
45
47
  });
46
48
 
47
49
  log(`[${appId}] started`);
48
- const mcpApp = McpApp({searchContentIndex, indexApp, getIndexStats, resetIndex, log, watch, config});
50
+ const mcpApp = McpApp({search, indexApp, getIndexStats, resetIndex, log, watch, config});
49
51
  await mcpApp();
@@ -4,7 +4,7 @@ import {SearchApp} from "./searchApp.js";
4
4
  import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
5
5
 
6
6
  const log = BufferedLoggerToStdOut();
7
- const {searchContentIndex, config} = await BuildComponents({log});
7
+ const {searchContentIndex} = await BuildComponents({log});
8
8
  const search = SearchApp({searchContentIndex});
9
9
 
10
10
  const query = process.argv.slice(2).join(" ");
package/apps/run.watch.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import {BuildComponents} from "../componets/buildComponents.js";
2
- import {HandleFileEvent} from "../componets/index/handleFileEvent.js";
2
+ import {HandleFileEvents} from "../componets/index/handleFileEvent.js";
3
3
  import {BufferedLoggerToStdOut} from "../componets/logger.js";
4
4
  import {WalkFiles} from "../componets/walkFiles.js";
5
5
  import {WatchFiles} from "../componets/watchFiles.js";
@@ -13,7 +13,7 @@ const log = BufferedLoggerToStdOut();
13
13
  const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
14
14
  const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
15
15
  const watchFiles = WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles});
16
- const handleFileEvent = HandleFileEvent({indexContent, removeContent, log});
16
+ const handleFileEvents = HandleFileEvents({indexContent, removeContent, log});
17
17
 
18
18
  const appId = AppId();
19
19
  const watcherLock = WatcherLock({
@@ -22,7 +22,7 @@ const watcherLock = WatcherLock({
22
22
  log,
23
23
  });
24
24
 
25
- const app = WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLock});
25
+ const app = WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock});
26
26
 
27
27
  const inputs = process.argv.slice(2);
28
28
  if (!inputs.length) inputs.push(".");
package/apps/searchApp.ts CHANGED
@@ -2,8 +2,10 @@ import {ISearchIndex, IIndexRecord} from "../features/searchIndex.js";
2
2
 
3
3
  export type ISearchApp = (query: string, limit?: number) => Promise<IIndexRecord[]>;
4
4
 
5
- export function SearchApp({searchContentIndex}: {searchContentIndex: ISearchIndex}): ISearchApp {
5
+ export function SearchApp({searchContentIndex}: {
6
+ searchContentIndex: ISearchIndex;
7
+ }): ISearchApp {
6
8
  return async function search(query, limit = 7) {
7
- return await searchContentIndex(query, limit);
9
+ return searchContentIndex(query, limit);
8
10
  }
9
11
  }
package/apps/watchApp.ts CHANGED
@@ -1,23 +1,28 @@
1
1
  import {from} from "../packages/streamx/src/from.js";
2
+ import {batch} from "../packages/streamx/src/batch.js";
2
3
  import {map} from "../packages/streamx/src/map.js";
3
4
  import {tap} from "../packages/streamx/src/tap.js";
4
5
  import {run} from "../packages/streamx/src/index.js";
6
+ import {readFile} from "fs/promises";
5
7
  import {IWalkFiles} from "../componets/walkFiles.js";
6
- import {FileEventType, IWatchFiles} from "../componets/watchFiles.js";
7
- import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
8
+ import {IWatchFiles} from "../componets/watchFiles.js";
9
+ import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
8
10
  import {ILogger} from "../componets/logger.js";
9
11
  import {IWatcherLock} from "../componets/index/watcherLock.js";
10
12
  import {WatchFileEventsApp} from "./watchFileEventsApp.js";
13
+ import {IIndexContent} from "../features/indexContent.js";
14
+ import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
11
15
 
12
16
  export type IWatchApp = {
13
17
  run: (inputs: string[]) => Promise<void>;
14
18
  stop: () => void;
15
19
  };
16
20
 
17
- export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLock}: {
21
+ export function WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock}: {
18
22
  walkFiles: IWalkFiles,
19
23
  watchFiles: IWatchFiles,
20
- handleFileEvent: IHandleFileEvent,
24
+ handleFileEvents: IHandleFileEvents,
25
+ indexContent: IIndexContent,
21
26
  log: ILogger,
22
27
  watcherLock: IWatcherLock,
23
28
  }): IWatchApp {
@@ -27,9 +32,12 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLo
27
32
  await run(
28
33
  from(walkFiles(inputs))
29
34
  .pipe(tap(id => log(`indexing: ${id}`)))
30
- .pipe(map<string, string>(async (id) => {
31
- await handleFileEvent({type: FileEventType.index, path: id});
32
- return id;
35
+ .pipe(batch(INDEXING_BATCH_SIZE))
36
+ .pipe(map<string[], string[]>(async (ids) => {
37
+ const texts = await Promise.all(ids.map(id => readFile(id, "utf8")));
38
+ const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
39
+ await indexContent(items);
40
+ return ids;
33
41
  }))
34
42
  );
35
43
 
@@ -39,7 +47,7 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLo
39
47
  const startWatch = WatchFileEventsApp({
40
48
  watchFiles,
41
49
  watchDir: inputs[0] ?? ".",
42
- handleFileEvent,
50
+ handleFileEvents,
43
51
  log,
44
52
  watcherLock,
45
53
  });
@@ -1,14 +1,20 @@
1
1
  import {IWatchFiles} from "../componets/watchFiles.js";
2
- import {IHandleFileEvent} from "../componets/index/handleFileEvent.js";
2
+ import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
3
3
  import {ILogger} from "../componets/logger.js";
4
4
  import {IWatcherLock} from "../componets/index/watcherLock.js";
5
+ import {from} from "../packages/streamx/src/from.js";
6
+ import {batchTimed} from "../packages/streamx/src/batchTimed.js";
7
+ import {map} from "../packages/streamx/src/map.js";
8
+ import {run} from "../packages/streamx/src/index.js";
9
+ import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE.js";
10
+ import {WATCH_FLUSH_MS} from "../componets/config/WATCH_FLUSH_MS.js";
5
11
 
6
12
  export type IWatchFileEventsApp = () => void;
7
13
 
8
- export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvent, log, watcherLock}: {
14
+ export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvents, log, watcherLock}: {
9
15
  watchFiles: IWatchFiles,
10
16
  watchDir: string,
11
- handleFileEvent: IHandleFileEvent,
17
+ handleFileEvents: IHandleFileEvents,
12
18
  log: ILogger,
13
19
  watcherLock: IWatcherLock,
14
20
  }): IWatchFileEventsApp {
@@ -21,7 +27,11 @@ export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvent, log,
21
27
  const watcher = watchFiles([watchDir]);
22
28
  const events = (async () => {
23
29
  try {
24
- for await (const e of watcher.events) await handleFileEvent(e);
30
+ await run(
31
+ from(watcher.events)
32
+ .pipe(batchTimed(INDEXING_BATCH_SIZE, WATCH_FLUSH_MS))
33
+ .pipe(map(handleFileEvents))
34
+ );
25
35
  } catch (e) {
26
36
  log(`watch error: ${(e as any)?.message ?? e}`);
27
37
  }
@@ -4,24 +4,40 @@ import {CleanUpKeywords} from "./keywords/cleanUpKeywords.js";
4
4
  import {ContentIndexDriver} from "./index/contentIndexDriver.js";
5
5
  import {LoadConfig} from "./config/loadConfig.js";
6
6
  import {ILogger} from "./logger.js";
7
+ import {LocateInFile} from "./locate/locateInFile.js";
7
8
 
8
- export async function BuildComponents({log}: {log: ILogger}) {
9
+ export async function BuildComponents({log}: { log: ILogger }) {
9
10
  const loadConfig = LoadConfig({configPath: ".xindex.json", log});
10
11
  const config = await loadConfig();
11
12
 
12
13
  const embed = Embed({pooling: "mean", normalize: true});
13
14
  const extractKeywords = ExtractKeywords();
14
- const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength: 2, ignoreKeywords: config.ignoreKeywords});
15
+ const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength: 1, ignoreKeywords: config.ignoreKeywords});
16
+
17
+ const locateInFile = LocateInFile({
18
+ embed,
19
+ extractKeywords,
20
+ cleanUpKeywords,
21
+ windowLines: config.maxLines,
22
+ maxFileBytes: config.maxFileBytes,
23
+ });
15
24
 
16
25
  const DEFAULT_INDEX_PATH = ".xindex";
17
26
 
27
+ const SCORE_THRESHOLD = 0.01;
28
+
18
29
  const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex}
19
30
  = await ContentIndexDriver({
20
- path: DEFAULT_INDEX_PATH,
21
- embed,
22
- extractKeywords,
23
- cleanUpKeywords,
24
- });
25
- return {extractKeywords, cleanUpKeywords, indexContent, removeContent, getIndexStats,
26
- searchContentIndex, resetIndex, config};
31
+ path: DEFAULT_INDEX_PATH,
32
+ embed,
33
+ extractKeywords,
34
+ cleanUpKeywords,
35
+ locateInFile,
36
+ scoreThreshold: SCORE_THRESHOLD
37
+ });
38
+
39
+ return {
40
+ extractKeywords, cleanUpKeywords, indexContent, removeContent, getIndexStats,
41
+ searchContentIndex, resetIndex, locateInFile, config
42
+ };
27
43
  }
@@ -0,0 +1 @@
1
+ export const DEFAULT_LOCATE_BATCH_SIZE = 3;
@@ -0,0 +1 @@
1
+ export const INDEXING_BATCH_SIZE = 5;
@@ -0,0 +1 @@
1
+ export const WATCH_FLUSH_MS = 500;
@@ -2,14 +2,19 @@ import {readFile} from "fs/promises";
2
2
  import {IXindexConfig} from "./xindexConfig.js";
3
3
  import {ILogger} from "../logger.js";
4
4
 
5
+ const DEFAULT_MAX_LINES = 30;
6
+ const DEFAULT_MAX_FILE_BYTES = 5_000_000;
7
+
5
8
  const DEFAULTS: IXindexConfig = {
6
9
  ignoreKeywords: [],
7
10
  ignoreFiles: [],
11
+ maxLines: DEFAULT_MAX_LINES,
12
+ maxFileBytes: DEFAULT_MAX_FILE_BYTES,
8
13
  };
9
14
 
10
15
  export type ILoadConfig = () => Promise<IXindexConfig>;
11
16
 
12
- export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}): ILoadConfig {
17
+ export function LoadConfig({configPath, log}: { configPath: string, log: ILogger }): ILoadConfig {
13
18
  return async function loadConfig() {
14
19
  let raw: string;
15
20
  try {
@@ -29,9 +34,13 @@ export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}
29
34
  }
30
35
 
31
36
  const toStrings = (v: unknown) => Array.isArray(v) ? v.filter((e): e is string => typeof e === "string") : [];
37
+ const toNum = (v: unknown, def: number): number => typeof v === "number" ? v : def;
38
+
32
39
  const config: IXindexConfig = {
33
40
  ignoreKeywords: toStrings(parsed.ignoreKeywords),
34
41
  ignoreFiles: toStrings(parsed.ignoreFiles),
42
+ maxLines: toNum(parsed.maxLines, DEFAULT_MAX_LINES),
43
+ maxFileBytes: toNum(parsed.maxFileBytes, DEFAULT_MAX_FILE_BYTES),
35
44
  };
36
45
 
37
46
  for (const kw of config.ignoreKeywords) {
@@ -1,4 +1,6 @@
1
1
  export type IXindexConfig = {
2
2
  ignoreKeywords: string[];
3
3
  ignoreFiles: string[];
4
+ maxLines: number;
5
+ maxFileBytes: number;
4
6
  };
@@ -0,0 +1,40 @@
1
+ import {readFile} from "fs/promises";
2
+ import {join, dirname, relative} from "path";
3
+ import ignore from "ignore";
4
+
5
+ /**
6
+ * Build an `ignore` instance that accumulates `.gitignore` rules from `cwd`
7
+ * down to the directory containing `relPath`.
8
+ *
9
+ * Mirrors the per-directory parent-chain logic in `walkFiles.ts`.
10
+ *
11
+ * @param cwd Absolute root (same as the `cwd` passed to WalkFiles/WatchFiles)
12
+ * @param relPath Path of the FS event, relative to `cwd`
13
+ * @param ignoreFiles Additional glob patterns from config (applied on top)
14
+ */
15
+ export async function loadIgnoreChain(
16
+ cwd: string,
17
+ relPath: string,
18
+ ignoreFiles: string[] = [],
19
+ ): Promise<ReturnType<typeof ignore>> {
20
+ // Segments from cwd down to (but not including) the file itself
21
+ const fileDir = dirname(relPath); // e.g. "pkg/sub" or "."
22
+ const segments = fileDir === "." ? [] : fileDir.split("/");
23
+
24
+ const ig = ignore();
25
+ ig.add(".*");
26
+
27
+ // Walk from root down: cwd, cwd/seg0, cwd/seg0/seg1, …
28
+ const dirs = [cwd, ...segments.map((_, i) => join(cwd, ...segments.slice(0, i + 1)))];
29
+ for (const dir of dirs) {
30
+ try {
31
+ const content = await readFile(join(dir, ".gitignore"), "utf8");
32
+ if (content) ig.add(content);
33
+ } catch {
34
+ // no .gitignore in this dir — fine
35
+ }
36
+ }
37
+
38
+ for (const pattern of ignoreFiles) ig.add(pattern);
39
+ return ig;
40
+ }
@@ -8,6 +8,7 @@ import {RemoveContent, IRemoveContent} from "../../features/removeContent.js";
8
8
  import {ResetIndex, IResetIndex} from "../../features/resetIndex.js";
9
9
  import {VectraIndex} from "./vectraIndex.js";
10
10
  import {IndexApi} from "./indexApi.js";
11
+ import {ILocateInFile} from "../locate/locateInFile.js";
11
12
 
12
13
  export type IContentIndexDriver = Readonly<{
13
14
  getIndexStats: IGetIndexStats,
@@ -18,21 +19,22 @@ export type IContentIndexDriver = Readonly<{
18
19
  flush: () => Promise<void>,
19
20
  }>;
20
21
 
21
- export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, scoreThreshold}: {
22
+ export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold}: {
22
23
  path: string,
23
24
  embed: IEmbed,
24
25
  extractKeywords: IExtractKeywords,
25
26
  cleanUpKeywords: ICleanUpKeywords,
26
- scoreThreshold?: number,
27
+ locateInFile: ILocateInFile,
28
+ scoreThreshold: number,
27
29
  }): Promise<IContentIndexDriver> {
28
30
  const index = await VectraIndex(path + "/semantic");
29
- const indexApi = IndexApi({index, embed});
31
+ const indexApi = IndexApi({index});
30
32
 
31
33
  return {
32
34
  getIndexStats: GetIndexStats({index}),
33
- indexContent: IndexContent({extractKeywords, cleanUpKeywords, indexApi}),
35
+ indexContent: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi}),
34
36
  removeContent: RemoveContent({indexApi}),
35
- searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, scoreThreshold}),
37
+ searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold}),
36
38
  resetIndex: ResetIndex({indexApi}),
37
39
  flush: () => indexApi.flush(),
38
40
  };