xindex 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.ai/research/2026-04-10-file-watching.md +79 -0
  2. package/.ai/research/2026-04-10-mcp-output-format.md +129 -0
  3. package/.ai/task/INDEX.md +12 -0
  4. package/.ai/task/done/INDEX.md +3 -0
  5. package/.ai/task/done/task.2026-04-09-local-ai-research-protos.log.md +98 -0
  6. package/.ai/task/done/task.2026-04-09-local-ai-research-protos.md +102 -0
  7. package/.ai/task/task.2026-04-10-cluster-config.log.md +19 -0
  8. package/.ai/task/task.2026-04-10-cluster-config.md +118 -0
  9. package/.ai/task/task.2026-04-10-dir-indexing.log.md +8 -0
  10. package/.ai/task/task.2026-04-10-dir-indexing.md +92 -0
  11. package/.ai/task/task.2026-04-10-line-clustering.log.md +50 -0
  12. package/.ai/task/task.2026-04-10-line-clustering.md +176 -0
  13. package/.ai/task/task.2026-04-10-object-store.log.md +7 -0
  14. package/.ai/task/task.2026-04-10-object-store.md +81 -0
  15. package/.ai/task/task.2026-04-10-search-config.log.md +46 -0
  16. package/.ai/task/task.2026-04-10-search-config.md +274 -0
  17. package/.ai/task/task.2026-04-10-watch-indexing.log.md +32 -0
  18. package/.ai/task/task.2026-04-10-watch-indexing.md +101 -0
  19. package/.ai/task/task.2026-04-10-xindex-mcp.log.md +5 -0
  20. package/.ai/task/task.2026-04-10-xindex-mcp.md +92 -0
  21. package/.ai/task/task.2026-04-10-xindex-mcp.report.md +113 -0
  22. package/.claude/settings.local.json +73 -0
  23. package/.claude/skills/make-hof/SKILL.md +8 -0
  24. package/.claude/skills/make-hof/playbook.md +38 -0
  25. package/.cursor/mcp.json +8 -0
  26. package/.mcp.json +8 -0
  27. package/.xindex.json +22 -0
  28. package/CLAUDE.md +54 -0
  29. package/README.md +206 -0
  30. package/apps/indexApp.ts +31 -0
  31. package/apps/mcpApp.ts +119 -0
  32. package/apps/run.index.ts +19 -0
  33. package/apps/run.mcp.ts +49 -0
  34. package/apps/run.reset.ts +10 -0
  35. package/apps/run.search.ts +21 -0
  36. package/apps/run.watch.ts +44 -0
  37. package/apps/searchApp.ts +9 -0
  38. package/apps/watchApp.ts +53 -0
  39. package/apps/watchFileEventsApp.ts +39 -0
  40. package/bin/xindex-index +2 -0
  41. package/bin/xindex-mcp +2 -0
  42. package/bin/xindex-reset +2 -0
  43. package/bin/xindex-search +2 -0
  44. package/bin/xindex-watch +2 -0
  45. package/componets/IType.ts +1 -0
  46. package/componets/appId.ts +3 -0
  47. package/componets/buildComponents.ts +27 -0
  48. package/componets/config/loadConfig.ts +43 -0
  49. package/componets/config/xindexConfig.ts +4 -0
  50. package/componets/index/contentIndexDriver.ts +39 -0
  51. package/componets/index/formatSearchResults.ts +18 -0
  52. package/componets/index/getIndexStats.ts +11 -0
  53. package/componets/index/handleFileEvent.ts +25 -0
  54. package/componets/index/indexApi.ts +45 -0
  55. package/componets/index/vectraIndex.ts +11 -0
  56. package/componets/index/watcherLock.ts +107 -0
  57. package/componets/keywords/cleanUpKeywords.ts +38 -0
  58. package/componets/keywords/extractKeywords.ts +14 -0
  59. package/componets/keywords/refineKeywords.ts +16 -0
  60. package/componets/llm/embed.ts +18 -0
  61. package/componets/llm/queryLLM.ts +20 -0
  62. package/componets/logger.ts +34 -0
  63. package/componets/walkFiles.ts +51 -0
  64. package/componets/watchFiles.ts +106 -0
  65. package/features/indexContent.ts +16 -0
  66. package/features/removeContent.ts +9 -0
  67. package/features/resetIndex.ts +9 -0
  68. package/features/searchIndex.ts +33 -0
  69. package/package.json +32 -0
  70. package/packages/fun/src/IType.ts +5 -0
  71. package/packages/fun/src/array-finder.ts +55 -0
  72. package/packages/fun/src/array-index.ts +35 -0
  73. package/packages/fun/src/array.ts +112 -0
  74. package/packages/fun/src/assert.ts +5 -0
  75. package/packages/fun/src/asyncRequest.ts +35 -0
  76. package/packages/fun/src/callsites.ts +18 -0
  77. package/packages/fun/src/case-never.ts +9 -0
  78. package/packages/fun/src/casting.ts +41 -0
  79. package/packages/fun/src/collect.ts +13 -0
  80. package/packages/fun/src/concurrency.ts +186 -0
  81. package/packages/fun/src/container.ts +86 -0
  82. package/packages/fun/src/counter.ts +45 -0
  83. package/packages/fun/src/create-map.ts +2 -0
  84. package/packages/fun/src/dedupe.ts +2 -0
  85. package/packages/fun/src/defer.ts +55 -0
  86. package/packages/fun/src/delay.ts +5 -0
  87. package/packages/fun/src/discriminate.ts +34 -0
  88. package/packages/fun/src/enum-values.ts +12 -0
  89. package/packages/fun/src/exponential-backoff.ts +20 -0
  90. package/packages/fun/src/flatten.ts +11 -0
  91. package/packages/fun/src/hash.ts +67 -0
  92. package/packages/fun/src/hash128.ts +6 -0
  93. package/packages/fun/src/hash256.ts +6 -0
  94. package/packages/fun/src/hub.ts +53 -0
  95. package/packages/fun/src/id.ts +10 -0
  96. package/packages/fun/src/interval.ts +76 -0
  97. package/packages/fun/src/is-non-nullable.ts +2 -0
  98. package/packages/fun/src/isIterable.ts +3 -0
  99. package/packages/fun/src/mailbox.ts +13 -0
  100. package/packages/fun/src/map-record.ts +19 -0
  101. package/packages/fun/src/match-collections.ts +57 -0
  102. package/packages/fun/src/match-left-and-right-arrays.ts +78 -0
  103. package/packages/fun/src/mem.ts +26 -0
  104. package/packages/fun/src/memos.ts +28 -0
  105. package/packages/fun/src/normalizeError.ts +25 -0
  106. package/packages/fun/src/nothing.ts +3 -0
  107. package/packages/fun/src/pipe.ts +18 -0
  108. package/packages/fun/src/prettyJson.ts +3 -0
  109. package/packages/fun/src/project.ts +8 -0
  110. package/packages/fun/src/promise.ts +27 -0
  111. package/packages/fun/src/pubsub.ts +128 -0
  112. package/packages/fun/src/randomId.ts +14 -0
  113. package/packages/fun/src/regexp-escape.ts +13 -0
  114. package/packages/fun/src/retry.ts +15 -0
  115. package/packages/fun/src/serial.test.ts +107 -0
  116. package/packages/fun/src/serial.ts +17 -0
  117. package/packages/fun/src/sleep.ts +3 -0
  118. package/packages/fun/src/sort-object.ts +46 -0
  119. package/packages/fun/src/speed-test.ts +56 -0
  120. package/packages/fun/src/tick.ts +37 -0
  121. package/packages/fun/src/time-behavior.ts +50 -0
  122. package/packages/fun/src/time.ts +22 -0
  123. package/packages/fun/src/timedFallback.ts +37 -0
  124. package/packages/fun/src/timer.ts +30 -0
  125. package/packages/fun/src/value.ts +33 -0
  126. package/packages/fun/src/waitForCounter.ts +15 -0
  127. package/packages/streamx/src/batch.ts +23 -0
  128. package/packages/streamx/src/batchTimed.ts +113 -0
  129. package/packages/streamx/src/buffer.ts +72 -0
  130. package/packages/streamx/src/concatenate.ts +33 -0
  131. package/packages/streamx/src/filter.ts +14 -0
  132. package/packages/streamx/src/flat.ts +19 -0
  133. package/packages/streamx/src/flatMap.ts +9 -0
  134. package/packages/streamx/src/from.ts +30 -0
  135. package/packages/streamx/src/index.ts +49 -0
  136. package/packages/streamx/src/interval.ts +58 -0
  137. package/packages/streamx/src/loop.ts +8 -0
  138. package/packages/streamx/src/map.ts +12 -0
  139. package/packages/streamx/src/merge.ts +89 -0
  140. package/packages/streamx/src/nodeReadable.ts +6 -0
  141. package/packages/streamx/src/nodeTransform.ts +9 -0
  142. package/packages/streamx/src/nodeWritable.ts +38 -0
  143. package/packages/streamx/src/objectReader.ts +16 -0
  144. package/packages/streamx/src/polyfill.ts +20 -0
  145. package/packages/streamx/src/reader.ts +38 -0
  146. package/packages/streamx/src/reduce.ts +15 -0
  147. package/packages/streamx/src/scale.ts +93 -0
  148. package/packages/streamx/src/scaleSync.ts +13 -0
  149. package/packages/streamx/src/sequence.ts +7 -0
  150. package/packages/streamx/src/tap.ts +9 -0
  151. package/packages/streamx/src/toArray.ts +9 -0
  152. package/packages/streamx/src/writer.ts +96 -0
  153. package/rnd/hf.ts +14 -0
  154. package/rnd/keywords-compromise.ts +18 -0
  155. package/rnd/keywords-pipeline.ts +79 -0
  156. package/rnd/keywords.ts +38 -0
  157. package/rnd/test-vectra-memory.ts +63 -0
  158. package/rnd/vectra-keywords.ts +95 -0
  159. package/rnd/vectra.ts +50 -0
  160. package/tsconfig.json +14 -0
@@ -0,0 +1,274 @@
1
+ # Task: Search Result Config — Keyword Ignore, File Ignore & Inline Code Snippets
2
+
3
+ ## Context
4
+
5
+ Three improvements to xindex, all configurable via `.xindex.json`:
6
+
7
+ 1. **Keyword ignore list** — exclude noisy keywords at index time (case-insensitive exact match). Improves grouping relevance. Requires re-index after config change — acceptable as one-time setup.
8
+ 2. **File ignore list** — gitignore-style glob patterns to exclude files from indexing. Same semantics as `.gitignore` but defined in `.xindex.json`. Applied in `WalkFiles` and `WatchFiles` alongside existing `.gitignore` rules.
9
+ 3. **Inline code snippets** — when a search result is small (≤ N lines), include actual source code in the output. Configurable via `.xindex.json` defaults + MCP tool parameter overrides.
10
+
11
+ **Current state:**
12
+ - `.xindex.json` exists but is empty `{}` — file is optional, may not exist at all
13
+ - Keywords: `compromise` NLP → `keyword-extractor` cleanup in `componets/keywords/cleanUpKeywords.ts`
14
+ - File walking: `componets/walkFiles.ts` uses `ignore` package for `.gitignore` rules; `componets/watchFiles.ts` has its own `loadGitignore` + `ignore()` at line 22-31
15
+ - Search results: 1-line summaries only (`1. path:from-to (score) — keywords`)
16
+ - Cluster metadata stores `fromLine`/`toLine` in `componets/index/indexMeta.ts:11` (`IClusterMeta`)
17
+ - MCP `xindex_search` accepts `query` and `limit` only
18
+ - No config loading exists
19
+ - Entry points that create `WalkFiles`/`WatchFiles`: `run.mcp.ts:18,30`, `run.index.ts:10`, `run.watch.ts:13-14`
20
+
21
+ **Decisions:**
22
+ - `.xindex.json` is **optional** — missing file → all defaults, no error
23
+ - `ignoreKeywords`: exact strings, case-insensitive. No globs/patterns.
24
+ - `ignoreFiles`: gitignore-style glob patterns (reuses existing `ignore` package)
25
+ - `maxSnippetResults: 3`, `maxSnippetLines: 7` — confirmed defaults
26
+ - Ignore list applied at **index time** — re-index + MCP restart required after config change. One-time setup, review in 3mo.
27
+ - File-level results (no cluster) **also get snippets** if total file lines ≤ `maxSnippetLines`
28
+ - Config field names are explicit: `ignoreKeywords`, `ignoreFiles`, `maxSnippetLines`, `maxSnippetResults`
29
+
30
+ ## Diagram
31
+
32
+ ```
33
+ .xindex.json (optional) MCP xindex_search
34
+ ┌──────────────────────────┐ ┌──────────────────────────────┐
35
+ │ ignoreKeywords: [...] │ │ query, limit │
36
+ │ ignoreFiles: [...] │ │ maxSnippetResults: 3 │ ← override
37
+ │ maxSnippetLines: 7 │ │ maxSnippetLines: 7 │ ← override
38
+ │ maxSnippetResults: 3 │ └──────┬───────────────────────┘
39
+ └──────┬───────────────────┘ │
40
+ │ │
41
+ ├─ ignoreFiles ────┐ │
42
+ │ ▼ │
43
+ │ WalkFiles + WatchFiles │
44
+ │ (skip matching paths) │
45
+ │ │
46
+ ├─ ignoreKeywords ─┐ │
47
+ │ ▼ │
48
+ │ CleanUpKeywords │
49
+ │ (index time) │
50
+ │ │
51
+ └─ snippet config ────────────────────────┤
52
+
53
+ Format results
54
+ ├─ cluster ≤ maxSnippetLines? → readSnippet
55
+ └─ file ≤ maxSnippetLines? → readSnippet
56
+
57
+ Data flow (indexing):
58
+ walkFiles(inputs) ← ignoreFiles applied here (NEW)
59
+ → readFile
60
+ → ExtractKeywords (compromise NLP)
61
+ → CleanUpKeywords (keyword-extractor + ignoreKeywords filter) ← NEW
62
+ → embed → vectra upsert + objectStore write
63
+
64
+ Data flow (search):
65
+ query
66
+ → extractKeywords → cleanUpKeywords → embed → vectra query
67
+ → filter by scoreThreshold → objectStore.read for each hit
68
+ → format results + readSnippet for top N small results ← NEW
69
+ ```
70
+
71
+ ## Steps
72
+
73
+ ### 1. Config schema & loading
74
+
75
+ - **1.1 Define config type** — create `componets/config/xindexConfig.ts`
76
+ ```ts
77
+ export type IXindexConfig = {
78
+ ignoreKeywords: string[];
79
+ ignoreFiles: string[];
80
+ maxSnippetLines: number;
81
+ maxSnippetResults: number;
82
+ };
83
+ ```
84
+ All fields optional in the JSON file; defaults applied at load time.
85
+
86
+ - **1.2 Load config** — create `componets/config/loadConfig.ts` as HOF
87
+ ```ts
88
+ export type ILoadConfig = () => Promise<IXindexConfig>;
89
+ export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}): ILoadConfig
90
+ ```
91
+ - Read `configPath` (`.xindex.json` in cwd)
92
+ - `JSON.parse`, apply defaults: `{ignoreKeywords: [], ignoreFiles: [], maxSnippetLines: 7, maxSnippetResults: 3}`
93
+ - If file missing or empty → return all defaults (no error)
94
+ - If JSON parse fails → throw with clear message including path
95
+ - Validate: use `log` to warn if any `ignoreKeywords` entry has length ≤ 1 (no `console.*` — project uses `ILogger`)
96
+
97
+ - **1.3 Wire into BuildComponents** — modify `componets/buildComponents.ts:6-22`
98
+ - Current: creates `embed`, `extractKeywords`, `cleanUpKeywords({maxNgrams: 2, minLength: 2})` then `ContentIndexDriver`
99
+ - `BuildComponents` currently takes no args. Add `{log}: {log: ILogger}` so `LoadConfig` can use it for warnings.
100
+ - Add: `const loadConfig = LoadConfig({configPath: ".xindex.json", log})` → `const config = await loadConfig()`
101
+ - Pass `config.ignoreKeywords` to `CleanUpKeywords`: `CleanUpKeywords({maxNgrams: 2, minLength: 2, ignoreKeywords: config.ignoreKeywords})`
102
+ - Return `config` in the output so callers can access snippet + file ignore settings
103
+ - `BuildComponents` return type gains `config: IXindexConfig`
104
+ - All callers need updating to pass `log` and destructure `config`:
105
+ - `apps/run.mcp.ts:19` — needs `config` for `McpApp` + `ignoreFiles` for `WalkFiles`/`WatchFiles`
106
+ - `apps/run.index.ts:11` — needs `config.ignoreFiles` for `WalkFiles`
107
+ - `apps/run.watch.ts:15` — needs `config.ignoreFiles` for `WalkFiles`/`WatchFiles`
108
+ - `apps/run.search.ts:8` — needs `config` for snippet settings
109
+
110
+ ### 2. Keyword ignore list
111
+
112
+ - **2.1 Extend CleanUpKeywords** — modify `componets/keywords/cleanUpKeywords.ts:8`
113
+ - Current signature: `CleanUpKeywords({maxNgrams, minLength}: {maxNgrams: number, minLength: number})`
114
+ - New signature: `CleanUpKeywords({maxNgrams, minLength, ignoreKeywords = []}: {maxNgrams: number, minLength: number, ignoreKeywords?: string[]})`
115
+ - Build `ignoreSet = new Set(ignoreKeywords.map(k => k.toLowerCase()))` at factory time (once, not per call)
116
+ - Add `if (ignoreSet.has(lower)) return false;` into existing filter chain at line 21-27, before the `seen` dedup check
117
+
118
+ Exact change at `cleanUpKeywords.ts`:
119
+ ```ts
120
+ export function CleanUpKeywords({maxNgrams, minLength, ignoreKeywords = []}: {
121
+ maxNgrams: number, minLength: number, ignoreKeywords?: string[]
122
+ }): ICleanUpKeywords {
123
+ const ignoreSet = new Set(ignoreKeywords.map(k => k.toLowerCase()));
124
+ return function cleanUpKeywords(keywords) {
125
+ // ... existing extraction ...
126
+ const seen = new Set<string>();
127
+ return extracted.filter((kw: string) => {
128
+ if (kw.length <= minLength || !/[a-z]/i.test(kw)) return false;
129
+ const lower = kw.toLowerCase();
130
+ if (ignoreSet.has(lower)) return false; // ← NEW
131
+ if (seen.has(lower)) return false;
132
+ seen.add(lower);
133
+ return true;
134
+ });
135
+ }
136
+ }
137
+ ```
138
+
139
+ - **2.2 Propagation** — single change at `buildComponents.ts:9` propagates to all consumers:
140
+ - `ContentIndexDriver` (`contentIndexDriver.ts:28`) passes `cleanUpKeywords` to:
141
+ - `ClusterLines` (`clusterLines.ts:20`) — uses at `:34-35` (top/bot split keywords) and `:56` (leaf keywords)
142
+ - `IndexFileContent` (`indexFileContent.ts:10`) — uses at `:19` (file-level keywords)
143
+ - `SearchContentIndex` (`searchContentIndex.ts:12`) — uses at `:22` (query keywords)
144
+ - All paths share the same `cleanUpKeywords` instance. No additional wiring needed.
145
+
146
+ ### 3. File ignore list
147
+
148
+ - **3.1 Extend WalkFiles** — modify `componets/walkFiles.ts:8`
149
+ - Current signature: `WalkFiles({cwd, log}: {cwd: string, log: ILogger})`
150
+ - New signature: `WalkFiles({cwd, log, ignoreFiles = []}: {cwd: string, log: ILogger, ignoreFiles?: string[]})`
151
+ - In `walk()` at line 18-22: the `ignore` instance `ig` is already constructed per-directory with accumulated `.gitignore` rules. Add `ignoreFiles` rules after existing rules:
152
+ ```ts
153
+ const ig = ignore();
154
+ for (const rule of rules) ig.add(rule);
155
+ for (const pattern of ignoreFiles) ig.add(pattern); // ← NEW
156
+ ```
157
+ - This makes `ignoreFiles` patterns behave identically to `.gitignore` entries — same glob syntax, same matching semantics (relative paths, directory trailing `/`, negation with `!`)
158
+ - The `ignore` package is already a dependency (`package.json:28`)
159
+
160
+ - **3.2 Extend WatchFiles** — modify `componets/watchFiles.ts:20`
161
+ - Current signature: `WatchFiles({cwd, log}: {cwd: string, log: ILogger})`
162
+ - New signature: `WatchFiles({cwd, log, ignoreFiles = []}: {cwd: string, log: ILogger, ignoreFiles?: string[]})`
163
+ - In `loadGitignore()` at line 22-31: creates its own `ignore()` instance per watched directory. Add `ignoreFiles` rules after `.gitignore` rules:
164
+ ```ts
165
+ async function loadGitignore(dir: string) {
166
+ const ig = ignore();
167
+ ig.add(".*");
168
+ try {
169
+ const content = await readFile(join(dir, ".gitignore"), "utf8");
170
+ ig.add(content);
171
+ } catch {}
172
+ for (const pattern of ignoreFiles) ig.add(pattern); // ← NEW
173
+ return ig;
174
+ }
175
+ ```
176
+
177
+ - **3.3 Wire ignoreFiles to all entry points** — pass `config.ignoreFiles` at each `WalkFiles`/`WatchFiles` construction:
178
+ - `apps/run.mcp.ts:18` — `WalkFiles({cwd, log})` → `WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles})`
179
+ - `apps/run.mcp.ts:30` — `WatchFiles({cwd, log})` → `WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles})`
180
+ - `apps/run.index.ts:10` — `WalkFiles({cwd, log})` → `WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles})`
181
+ - `apps/run.watch.ts:13` — `WalkFiles({cwd, log})` → `WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles})`
182
+ - `apps/run.watch.ts:14` — `WatchFiles({cwd, log})` → `WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles})`
183
+
184
+ ### 4. Inline code snippets
185
+
186
+ - **4.1 Add snippet params to MCP** — modify `apps/mcpApp.ts:34-58`
187
+ - `McpApp` factory gains `config: IXindexConfig` dependency (add to `mcpApp.ts:23` params)
188
+ - Extend `xindex_search` schema at line 37:
189
+ ```ts
190
+ inputSchema: z.object({
191
+ query: z.string().describe("Natural language search query"),
192
+ limit: z.number().int().min(1).max(100).default(10)
193
+ .describe("Max results to return, 10 by default, 100 max"),
194
+ maxSnippetResults: z.number().int().min(0).max(20).optional()
195
+ .describe("How many top results include inline code (default from .xindex.json, 3)"),
196
+ maxSnippetLines: z.number().int().min(0).max(50).optional()
197
+ .describe("Max lines in a result to qualify for inline code (default from .xindex.json, 7)"),
198
+ }),
199
+ ```
200
+ - In handler: resolve with config fallback:
201
+ ```ts
202
+ const sr = maxSnippetResults ?? config.maxSnippetResults;
203
+ const sl = maxSnippetLines ?? config.maxSnippetLines;
204
+ ```
205
+ - Update `apps/run.mcp.ts:48` — pass `config` to `McpApp`
206
+
207
+ - **4.2 Read source lines** — create `componets/index/readSnippet.ts`
208
+ ```ts
209
+ export type IReadSnippet = (record: IIndexRecord, maxLines: number) => Promise<string | null>;
210
+ export function ReadSnippet(): IReadSnippet
211
+ ```
212
+ Logic:
213
+ - **Cluster result** (`meta.type === StoreEntryType.cluster`): use `meta.fromLine`/`meta.toLine` directly from `IClusterMeta` (no need to parse from ID). Compute `lineCount = meta.toLine - meta.fromLine + 1`. If `lineCount > maxLines` → return `null`. Extract file path from `record.id` by splitting on last `:` (format `"path/to/file.ts:14-27"`). `readFile(filePath, "utf8")`, split lines, slice `[meta.fromLine-1, meta.toLine]`, return joined with `\n`.
214
+ - **File result** (`meta.type === StoreEntryType.meta`): `readFile(record.id, "utf8")`, split lines, count. If `lineCount > maxLines` → return `null`. Otherwise return full content.
215
+ - **Error handling**: on `readFile` failure (file deleted, moved, permission error) → return `null` silently. Search results still display; snippet is just omitted.
216
+
217
+ - **4.3 Format with code** — update result formatting in both entry points:
218
+
219
+ **MCP** (`apps/mcpApp.ts:47-51`): currently `results.map(...)` builds 1-line summaries. Replace with loop:
220
+ ```ts
221
+ const readSnippet = ReadSnippet();
222
+ const lines: string[] = [];
223
+ for (let i = 0; i < results.length; i++) {
224
+ const r = results[i];
225
+ const kw = r.meta.keywords ? ` — ${r.meta.keywords}` : "";
226
+ lines.push(`${i + 1}. ${r.id} (${r.score.toFixed(2)})${kw}`);
227
+ if (i < sr) {
228
+ const snippet = await readSnippet(r, sl);
229
+ if (snippet) lines.push("```\n" + snippet + "\n```");
230
+ }
231
+ }
232
+ ```
233
+
234
+ **CLI** (`apps/run.search.ts:8-31`): destructure config from `BuildComponents()`:
235
+ ```ts
236
+ const {searchContentIndex, config} = await BuildComponents({log});
237
+ ```
238
+ Then same snippet pattern using `config.maxSnippetResults` and `config.maxSnippetLines`:
239
+ ```ts
240
+ const readSnippet = ReadSnippet();
241
+ // ... in the result loop after existing log lines:
242
+ if (i < config.maxSnippetResults) {
243
+ const snippet = await readSnippet(results[i], config.maxSnippetLines);
244
+ if (snippet) log("```\n" + snippet + "\n```");
245
+ }
246
+ ```
247
+
248
+ ## Files Changed
249
+
250
+ | File | Change |
251
+ |------|--------|
252
+ | `componets/config/xindexConfig.ts` | **NEW** — `IXindexConfig` type with 4 fields |
253
+ | `componets/config/loadConfig.ts` | **NEW** — `LoadConfig` HOF, reads `.xindex.json` (optional), applies defaults, warns via `ILogger` |
254
+ | `componets/keywords/cleanUpKeywords.ts` | Add `ignoreKeywords` param + `ignoreSet` filter |
255
+ | `componets/walkFiles.ts` | Add `ignoreFiles` param, feed into `ignore()` instances |
256
+ | `componets/watchFiles.ts` | Add `ignoreFiles` param, feed into `loadGitignore()` `ignore()` instance |
257
+ | `componets/buildComponents.ts` | Add `{log}` param, load config, pass `ignoreKeywords` to `CleanUpKeywords`, return `config` |
258
+ | `componets/index/readSnippet.ts` | **NEW** — `ReadSnippet` HOF, reads file lines for a search result |
259
+ | `apps/mcpApp.ts` | Add `config` dep, `maxSnippetResults`/`maxSnippetLines` schema params, snippet formatting |
260
+ | `apps/run.mcp.ts` | Pass `config` to `McpApp`, `ignoreFiles` to `WalkFiles`/`WatchFiles`, `log` to `BuildComponents` |
261
+ | `apps/run.search.ts` | Pass `log` to `BuildComponents`, use `config` for snippet formatting |
262
+ | `apps/run.index.ts` | Pass `log` to `BuildComponents`, `ignoreFiles` to `WalkFiles` |
263
+ | `apps/run.watch.ts` | Pass `log` to `BuildComponents`, `ignoreFiles` to `WalkFiles`/`WatchFiles` |
264
+
265
+ ## Example `.xindex.json`
266
+
267
+ ```json
268
+ {
269
+ "ignoreKeywords": ["import", "export", "const", "function", "return", "async", "await"],
270
+ "ignoreFiles": ["*.test.ts", "*.spec.ts", "rnd/**", "dist/**"],
271
+ "maxSnippetLines": 7,
272
+ "maxSnippetResults": 3
273
+ }
274
+ ```
@@ -0,0 +1,32 @@
1
+ # Log: xindex-watch — Continuous Indexing with File Watcher
2
+
3
+ ### 2026-04-10
4
+
5
+ - Task created from user notes: "file watcher → apply to indexer → xindex-index runs → indexes provided or cwd → watches for changes → created/updated/moved/deleted → queued to stream → index content"
6
+ - Scouted codebase: IndexApp already uses streamx pipeline (`from → tap → map → run`), WalkFiles is async generator, Writer supports push-based streaming, merge combines streams
7
+ - Key decision: use `node:fs/promises` `watch()` (recursive, async iterable) — no external dep needed
8
+ - Key decision: tagged union `{type:"index"|"remove", path}` as common event shape for walk + watch streams
9
+ - Key decision: merge initial walk stream + watch stream into single pipeline
10
+ - Debounce needed: editors fire multiple events per save (write temp → rename → delete old)
11
+ - RemoveContent needed: Vectra has `deleteItem()` but no HOF wrapper exists yet
12
+ - **Clarification round resolved:**
13
+ - Watch is always on (no optional flag) — index all, then watch. Default behavior.
14
+ - Default to cwd when no args
15
+ - Event→Vectra: created→add, updated→delete+add, deleted→delete, moved→delete(old)+add(new)
16
+ - Binary filtering: deferred (TODO), keep simple for now
17
+ - Graceful shutdown: SIGINT → stop processing → ignore queued → exit
18
+ - Watching individual files: works fine with fs.watch, no issue
19
+ - **Consistency check:** fixed 6 issues — removed optional watch flag, added graceful shutdown to diagram/steps, clarified update=delete+add semantics, marked binary filtering as TODO
20
+ - **User clarification:** separate entry points — `xindex-watch` (new, continuous) vs `xindex-index` (existing, one-time). Both default to cwd.
21
+ - **Design decision:** Vectra `upsertItem` handles both add and update — no need for delete+add on updates, just upsert
22
+ - **Design decision:** WatchApp is a new HOF in `apps/watchApp.ts`; IndexApp stays unchanged; no modifications to MCP/search paths
23
+ - **Design decision:** WatchFiles uses `Writer<FileEvent>` to push events into streamx-compatible stream; `stop()` closes watchers + finishes writer
24
+ - **Implementation pivot:** streamx `Writer`/`merge`/`batchTimed` depend on `@handy/fun` (not installed in xindex). Rewrote to use plain async generators instead — simpler, no new deps. Two-phase approach: walk+index first, then watch+process.
25
+ - **Debounce approach:** collect events in Map (keyed by path, last event wins), flush after 150ms quiet period. Replaces batchTimed.
26
+ - **Watcher uses AbortController** for clean shutdown — `fs.watch` accepts `signal` option natively.
27
+ - **All steps implemented and verified:**
28
+ - Step 1: RemoveContent HOF + wired into ContentIndexDriver + BuildComponents ✓
29
+ - Step 2: WatchFiles component with debounced async generator ✓
30
+ - Step 3: WatchApp with two-phase (walk then watch) ✓
31
+ - Step 4: run.watch.ts + bin/xindex-watch + package.json + run.index.ts default ✓
32
+ - **Tested:** initial index, file create detection, file delete detection, SIGINT graceful shutdown — all pass
@@ -0,0 +1,101 @@
1
+ # Task: xindex-watch — Continuous Indexing with File Watcher
2
+
3
+ ## Context
4
+
5
+ Two entry points:
6
+ - `xindex-index` — one-time batch job: index all declared paths (default cwd), exit with status
7
+ - `xindex-watch` — **new**: index all, then watch for changes continuously, Ctrl+C to stop
8
+
9
+ Both default to cwd when no args. Watch handles create, update, move, delete events via a merged stream.
10
+
11
+ **Current pipeline** (`apps/indexApp.ts`):
12
+ ```
13
+ from(walkFiles(inputs))
14
+ .pipe(tap(log))
15
+ .pipe(map(readFile → extractKeywords → cleanUp → indexContent))
16
+ → run()
17
+ ```
18
+
19
+ **Key pieces already in place:**
20
+ - `WalkFiles` (`componets/walkFiles.ts`) — async generator yielding relative paths, .gitignore-aware
21
+ - `Writer` (`packages/streamx/src/writer.ts`) — push-based stream writer (backpressure-aware)
22
+ - `merge` (`packages/streamx/src/merge.ts`) — combines multiple streams
23
+ - `from` / `of` / `run` — streamx core
24
+
25
+ **Node.js `fs.watch`** (recursive works on all platforms since Node 22+):
26
+ - `fs.watch(dir, {recursive: true})` — macOS (FSEvents), Windows (ReadDirectoryChangesW), Linux (inotify, fd-per-dir)
27
+ - Returns `AsyncIterable<FileChangeInfo>` with `.eventType` ("rename" | "change") and `.filename`
28
+ - "rename" = create, delete, or move; "change" = content modified
29
+ - Need to `stat()` after event to distinguish create vs delete (file exists → create/update; doesn't exist → delete)
30
+ - Known issues: duplicate events per save, null filenames possible → handled by batchTimed dedup
31
+
32
+ **No external dep needed** — `node:fs/promises` `watch()` returns async iterable directly. If issues arise, chokidar v5 (1 dep, ESM-only) is a drop-in upgrade. See [research](../research/2026-04-10-file-watching.md).
33
+
34
+ ## Goal
35
+
36
+ Add `xindex-watch` entry point: index all provided paths (or cwd by default), then watch for changes and keep the index up to date continuously. Ctrl+C gracefully stops. Update `xindex-index` to default to cwd but remain a one-time job.
37
+
38
+ ## Diagram
39
+
40
+ ```
41
+ bin/xindex-index → run.index.ts bin/xindex-watch → run.watch.ts
42
+ │ one-time, exits │ continuous, SIGINT to stop
43
+ │ │
44
+ ├─ inputs || [cwd] ├─ inputs || [cwd]
45
+ └─ IndexApp(inputs) ├─ SIGINT → app.stop()
46
+ │ └─ WatchApp(inputs)
47
+ └─ walkFiles → index → exit │
48
+ ├── INITIAL: from(walkFiles)
49
+ │ → map(path → {type:"index", path})
50
+
51
+ ├── WATCH: from(watchFiles)
52
+ │ → yields {type:"index"|"remove", path}
53
+
54
+ └── merge(initial, watch)
55
+
56
+ ├── batchTimed (debounce)
57
+ ├── dedup per path
58
+ ├── flat()
59
+ ├── tap(log)
60
+ ├── type:"index" → readFile → extractKeywords → cleanUp → indexContent
61
+ ├── type:"remove" → removeContent
62
+
63
+
64
+ runs until SIGINT
65
+
66
+ Event → Vectra:
67
+ created → upsert (add)
68
+ updated → upsert (overwrite)
69
+ deleted → deleteItem
70
+ moved → deleteItem(old) + upsert(new) (two fs.watch events)
71
+ ```
72
+
73
+ ## Steps
74
+
75
+ ### 1. RemoveContent + Wiring
76
+ - **RemoveContent HOF** — create `componets/index/removeContent.ts` wrapping Vectra `deleteItem(id)`, matching `indexContent.ts` pattern
77
+ - **ContentIndexDriver** — add `removeContent` to interface and factory
78
+ - **BuildComponents** — expose `removeContent` in return object
79
+
80
+ ### 2. WatchFiles Component
81
+ - **FileEvent type** — define `{type: "index"|"remove", path: string}` in `componets/watchFiles.ts` (exported, shared with WatchApp)
82
+ - **WatchFiles HOF** — create `componets/watchFiles.ts`, uses `Writer<FileEvent>` + `fs.watch(dir, {recursive:true})`; returns `{stream, stop}`
83
+ - **Event mapping** — "change" + stat exists → index; "rename" + stat exists → index; "rename" + stat throws → remove; filter through gitignore rules
84
+ - **Stop method** — close FSWatcher handles + `writer.finish()` to end the stream
85
+
86
+ ### 3. WatchApp
87
+ - **WatchApp HOF** — create `apps/watchApp.ts`: merge walk stream (mapped to FileEvents) + watch stream → `batchTimed(20, 150)` → dedup per path → `flat()` → `tap(log)` → process: "index" → readFile → extractKeywords → cleanUp → indexContent; "remove" → removeContent → `run()`; returns `{run, stop}`
88
+
89
+ ### 4. Entry Points
90
+ - **run.watch.ts** — new entry point: default to cwd, SIGINT → `app.stop()`, log stats on exit
91
+ - **bin/xindex-watch** + package.json — bin shim + scripts entry
92
+ - **run.index.ts** — default to cwd instead of error-exit; IndexApp unchanged (one-time job)
93
+
94
+ ## Edge Cases
95
+
96
+ - **Gitignore changes** — if `.gitignore` itself is modified, watcher should reload rules (or at minimum, not index newly-ignored files)
97
+ - **Binary files** — TODO: filter by extension or detect encoding; for now, readFile utf8 on everything (may produce garbage keywords for binaries)
98
+ - **Rapid renames** — editor save = delete old + create new; debounce window prevents double-processing
99
+ - **Symlinks** — `fs.watch` recursive doesn't follow symlinks; acceptable default
100
+ - **Index doesn't have the file** — "remove" for a file not in index should be a no-op (log warning, don't throw)
101
+ - **Watching individual files** — `fs.watch` works on single files too; not a primary use case but no issue supporting it
@@ -0,0 +1,5 @@
1
+ ### 2026-04-10 — Task created
2
+
3
+ - User researched MCP SDK pattern (Server + StdioServerTransport + tool handlers)
4
+ - Scope: wrap existing xindex BuildComponents as 2 MCP tools (index + search)
5
+ - Integration: add to .claude/settings.json for Claude Code auto-discovery
@@ -0,0 +1,92 @@
1
+ # Task: xindex-mcp — MCP Server for Semantic Code Search
2
+
3
+ ## Context
4
+
5
+ xindex is a working local semantic code search tool (index files → query by meaning). Goal: wrap it as an MCP server so Claude Code can search the codebase directly.
6
+
7
+ **Existing xindex pipeline:**
8
+ - `extractKeywords` → `cleanUpKeywords` → `embed` (MiniLM-L6) → `vectra` (upsert/query)
9
+ - `BuildComponents()` wires everything
10
+ - Entry points: `apps/run.index.ts`, `apps/run.search.ts`
11
+
12
+ **MCP SDK (modern API — `registerTool` + Zod):**
13
+ ```ts
14
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
15
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
16
+ import { z } from "zod";
17
+
18
+ const server = new McpServer({ name: "xindex", version: "0.1.0" });
19
+
20
+ server.registerTool("xindex_search", {
21
+ title: "Search codebase",
22
+ description: "Semantic search over indexed codebase files",
23
+ inputSchema: z.object({
24
+ query: z.string().describe("Natural language search query"),
25
+ limit: z.number().int().min(1).max(50).default(10).describe("Max results"),
26
+ }),
27
+ annotations: { readOnlyHint: true },
28
+ }, async ({ query, limit }) => {
29
+ // call searchContentIndex(query, limit)
30
+ return { content: [{ type: "text", text: JSON.stringify(results) }] };
31
+ });
32
+
33
+ const transport = new StdioServerTransport();
34
+ await server.connect(transport);
35
+ ```
36
+
37
+ **Integration:** `.mcp.json` in project root (project scope — shared via git).
38
+
39
+ **Decisions:**
40
+ - `BuildComponents()` inits once at startup — just factories, fast
41
+ - MCP exposes **search only** — indexing is a separate process (`bin/xindex-index`)
42
+ - Future: file watcher daemon to keep index up to date (out of scope for this task)
43
+
44
+ ## Goal
45
+
46
+ Create an MCP server that exposes `xindex_search` as a tool, so Claude Code can search the codebase semantically. Indexing runs separately via CLI.
47
+
48
+ ## Diagram
49
+
50
+ ```
51
+ ┌─────────────────────────┐
52
+ │ bin/xindex-index │
53
+ │ (separate process) │
54
+ │ indexes files → .xindex│
55
+ └──────────┬──────────────┘
56
+ │ writes
57
+
58
+ .xindex/ (vectra)
59
+
60
+ │ reads
61
+ ┌──────────┐ stdio ┌───────┴──────────────┐
62
+ │Claude Code├──────────┤ xindex-mcp server │
63
+ └──────────┘ │ │
64
+ │ tool: xindex_search │
65
+ │ query → extract │
66
+ │ → cleanUp → embed │
67
+ │ → vectra query │
68
+ │ → IIndexRecord[] │
69
+ └───────────────────────┘
70
+ ```
71
+
72
+ ## Steps
73
+
74
+ ### 1. MCP Server Setup
75
+ - Install `@modelcontextprotocol/sdk` and `zod` dependencies
76
+ - Create `apps/mcp-server.ts` — McpServer + StdioServerTransport
77
+ - `BuildComponents()` at top level (once), use `searchContentIndex` in tool handler
78
+
79
+ ### 2. Tool: xindex_search
80
+ - Input schema: Zod `z.object({query: z.string(), limit: z.number().default(10)})` with `.describe()` on each field
81
+ - Handler: call `searchContentIndex(query, limit)` → return `IIndexRecord[]` as text content
82
+ - Use `annotations: { readOnlyHint: true }` — search has no side effects
83
+
84
+ ### 3. Integration
85
+ - Add `bin/xindex-mcp` entry point (`#!/usr/bin/env tsx` + import)
86
+ - Add `.mcp.json` to project root: `{"mcpServers": {"xindex": {"command": "npx", "args": ["tsx", "apps/mcp-server.ts"]}}}`
87
+ - Test: index codebase via CLI, restart Claude Code, verify tool appears, search it
88
+ - Debug with: `npx @modelcontextprotocol/inspector npx tsx apps/mcp-server.ts`
89
+
90
+ ## Future (out of scope)
91
+ - File watcher daemon — observe fs events, keep index up to date automatically
92
+ - `xindex_index` tool — allow Claude to trigger indexing directly
@@ -0,0 +1,113 @@
1
+ # Research: How to Build an MCP Server (2026)
2
+
3
+ ## Findings
4
+
5
+ ### 1. SDK & API
6
+
7
+ **Package:** `@modelcontextprotocol/sdk` (latest ^1.0.0)
8
+ **Peer dep:** `zod` ^3.22.0
9
+
10
+ **Modern API** (use this — old `server.tool()` and `setRequestHandler` are deprecated):
11
+
12
+ ```ts
13
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
14
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
15
+ import { z } from "zod";
16
+
17
+ const server = new McpServer({ name: "xindex", version: "0.1.0" });
18
+
19
+ server.registerTool(
20
+ "xindex_search",
21
+ {
22
+ title: "Search codebase",
23
+ description: "Semantic search over indexed codebase files",
24
+ inputSchema: z.object({
25
+ query: z.string().describe("Natural language search query"),
26
+ limit: z.number().int().min(1).max(50).default(10).describe("Max results"),
27
+ }),
28
+ annotations: { readOnlyHint: true },
29
+ },
30
+ async ({ query, limit }) => {
31
+ // call searchContentIndex(query, limit)
32
+ return {
33
+ content: [{ type: "text", text: JSON.stringify(results) }],
34
+ };
35
+ }
36
+ );
37
+
38
+ const transport = new StdioServerTransport();
39
+ await server.connect(transport);
40
+ ```
41
+
42
+ ### 2. Tool naming & annotations
43
+
44
+ - **snake_case** with service prefix: `xindex_search`, `xindex_index`
45
+ - **annotations**: `readOnlyHint: true` for search (signals no side effects), `destructiveHint: false` for index
46
+ - **inputSchema**: Zod objects with `.describe()` on each field — descriptions are shown to Claude
47
+
48
+ ### 3. Transport
49
+
50
+ **Stdio** is correct for local tools. Server reads JSON-RPC from stdin, writes to stdout. Use `console.error()` for logging (stdout is the protocol channel).
51
+
52
+ **Gotcha with tsx**: Works fine as shebang (`#!/usr/bin/env tsx`). No build step needed.
53
+
54
+ ### 4. Claude Code Integration
55
+
56
+ **Three scopes:**
57
+
58
+ | Scope | File | Shared? |
59
+ |-------|------|---------|
60
+ | Local (default) | `~/.claude.json` under project path | No |
61
+ | Project | `.mcp.json` in project root | Yes (git) |
62
+ | User | `~/.claude.json` global | No |
63
+
64
+ **For this project — use `.mcp.json` (project scope)** so it's checked in and works for anyone who clones.
65
+
66
+ ```json
67
+ {
68
+ "mcpServers": {
69
+ "xindex": {
70
+ "command": "npx",
71
+ "args": ["tsx", "apps/mcp-server.ts"],
72
+ "env": {}
73
+ }
74
+ }
75
+ }
76
+ ```
77
+
78
+ Or add via CLI:
79
+ ```bash
80
+ claude mcp add --transport stdio --scope project xindex -- npx tsx apps/mcp-server.ts
81
+ ```
82
+
83
+ ### 5. Package requirements
84
+
85
+ - `"type": "module"` in package.json — already have this
86
+ - tsconfig: `module` and `moduleResolution` should be `Node16` or `NodeNext` (SDK uses ES module exports with subpath imports)
87
+ - Current tsconfig has `"module": "Node16"` — compatible
88
+
89
+ ### 6. Testing
90
+
91
+ Use MCP Inspector for debugging:
92
+ ```bash
93
+ npx @modelcontextprotocol/inspector npx tsx apps/mcp-server.ts
94
+ ```
95
+
96
+ ## Recommendation
97
+
98
+ Minimal implementation — single file `apps/mcp-server.ts`:
99
+ 1. `BuildComponents()` at top level
100
+ 2. `server.registerTool("xindex_search", ...)` calling `searchContentIndex`
101
+ 3. `StdioServerTransport` + connect
102
+ 4. `.mcp.json` in project root for Claude Code discovery
103
+ 5. `bin/xindex-mcp` shebang entry point
104
+
105
+ No build step needed (tsx). No express/HTTP needed (stdio only). ~40 lines of code.
106
+
107
+ ## Sources
108
+
109
+ - [Official TypeScript SDK](https://github.com/modelcontextprotocol/typescript-sdk)
110
+ - [SDK server docs](https://github.com/modelcontextprotocol/typescript-sdk/blob/main/docs/server.md)
111
+ - [Claude Code MCP docs](https://code.claude.com/docs/en/mcp)
112
+ - [Anthropic MCP server reference](https://github.com/anthropics/skills/blob/main/skills/mcp-builder/reference/node_mcp_server.md)
113
+ - [npm package](https://www.npmjs.com/package/@modelcontextprotocol/sdk)