xindex 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.tmp/watch.log +9 -0
- package/README.md +31 -26
- package/apps/indexApp.ts +5 -5
- package/apps/run.index.ts +2 -2
- package/apps/run.mcp.ts +3 -3
- package/apps/run.watch.ts +3 -3
- package/apps/watchApp.ts +5 -5
- package/componets/buildComponents.ts +2 -1
- package/componets/config/INDEXING_BATCH_SIZE.ts +1 -1
- package/componets/config/WATCH_FLUSH_MS.ts +1 -1
- package/componets/config/loadConfig.ts +10 -1
- package/componets/config/xindexConfig.ts +1 -0
- package/componets/index/contentIndexDriver.ts +4 -2
- package/componets/index/handleFileEvent.ts +14 -34
- package/componets/io/safeIndexBatch.ts +30 -0
- package/componets/io/safeReadFile.ts +36 -0
- package/componets/locate/locateInFile.ts +11 -6
- package/componets/walkFiles.ts +34 -3
- package/features/indexContent.ts +16 -7
- package/package.json +6 -1
package/.tmp/watch.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
[xindex-1776976173089-3ab4dd18cfbfc8] started, indexing and watching: .
|
|
2
|
+
indexing: notes.md
|
|
3
|
+
indexing: util.ts
|
|
4
|
+
initial index complete, acquiring watcher lock...
|
|
5
|
+
done. indexed: {"indexedAmount":2}
|
|
6
|
+
watching for changes: .
|
|
7
|
+
index: wombat.md
|
|
8
|
+
remove: functional
|
|
9
|
+
remove batch failed: Error saving index: Error: ENOENT: no such file or directory, open '.xindex/semantic/index.json'
|
package/README.md
CHANGED
|
@@ -6,6 +6,8 @@ Local semantic code search for your codebase — plus an MCP server so Claude Co
|
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
9
|
+
Requires **Node ≥22**.
|
|
10
|
+
|
|
9
11
|
```bash
|
|
10
12
|
npm i -g xindex
|
|
11
13
|
```
|
|
@@ -39,6 +41,16 @@ Drop this into `.mcp.json` at your project root:
|
|
|
39
41
|
|
|
40
42
|
Open the project in Claude Code — it picks up the xindex MCP server and can call `xindex_search`, `xindex_index`, and `xindex_reset` directly. Fewer hallucinations, fewer round-trips.
|
|
41
43
|
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- **Local** — everything runs on your machine; embeddings cached on disk
|
|
47
|
+
- **Semantic search** — natural-language queries, not substring match
|
|
48
|
+
- **MCP server** — plugs into Claude Code via `.mcp.json`
|
|
49
|
+
- **Watch mode** — keeps the index warm while you code
|
|
50
|
+
- **Gitignore-aware** — respects `.gitignore` + custom ignore rules
|
|
51
|
+
- **Zero config** — works with defaults; `.xindex.json` is optional
|
|
52
|
+
- **Tolerant** — tolerates unreadable files, oversize files, empty files, binary files, and symlinks; each is skipped with a log line so the run always finishes
|
|
53
|
+
|
|
42
54
|
## Claude Code skills (`@xi`)
|
|
43
55
|
|
|
44
56
|
Two optional [Claude Code skills](https://docs.claude.com/en/docs/claude-code/skills) wrap the MCP tools so you don't have to think about them:
|
|
@@ -73,13 +85,12 @@ argument-hint: "[question]"
|
|
|
73
85
|
Surface-level codebase discovery via xindex. Tool: `xindex_search` (natural-language, meaning-based).
|
|
74
86
|
|
|
75
87
|
**Steps:**
|
|
76
|
-
1. Draft
|
|
77
|
-
2.
|
|
78
|
-
3.
|
|
79
|
-
4.
|
|
80
|
-
5. Return file paths + brief keywords showing why each matched.
|
|
88
|
+
1. Draft 3–7 queries from $ARGUMENTS (entry points, routing, config, tests, patterns); run `xindex_search` in parallel.
|
|
89
|
+
2. If empty/sparse/stale → scoped `xindex_index` on relevant root folders (one path per call, e.g. `src`, `skills`, `agents`), then re-search. Prefer scoped over full-repo.
|
|
90
|
+
3. Run 3–7 narrower follow-ups in parallel based on round-1 hits.
|
|
91
|
+
4. Return file paths + brief keywords showing why each matched.
|
|
81
92
|
|
|
82
|
-
Output = file links + keywords, not analysis. For reset or full re-index, delegate to `/xindex` (owns safety rules).
|
|
93
|
+
Output = file links + keywords, not analysis. **Escalate to `/ask-cursor` by default** (cheap codebase reasoning); only go to `/ask-claude` for multi-file/pattern analysis or `/ask-claude-opus` for trade-offs. For reset or full re-index, delegate to `/xindex` (owns safety rules).
|
|
83
94
|
````
|
|
84
95
|
|
|
85
96
|
`xindex/SKILL.md`:
|
|
@@ -90,7 +101,7 @@ name: xindex
|
|
|
90
101
|
description: Manages xindex semantic search — index, search, reset via MCP tools. For research questions, use /ask-xi.
|
|
91
102
|
argument-hint: "[search query | index | reset]"
|
|
92
103
|
---
|
|
93
|
-
Full xindex tool management. For research, use `/ask-xi`. Install: `npm i -g xindex
|
|
104
|
+
Full xindex tool management. For research, use `/ask-xi`. Install: `npm i -g xindex` ([npm](https://www.npmjs.com/package/xindex)).
|
|
94
105
|
|
|
95
106
|
**Tools:**
|
|
96
107
|
- `xindex_search` — find files by meaning (synonyms, semantics). Try before grepping blindly.
|
|
@@ -106,15 +117,6 @@ $ARGUMENTS
|
|
|
106
117
|
|
|
107
118
|
Both skills assume the `xindex` MCP server is registered (see the section above). Restart Claude Code after adding skills.
|
|
108
119
|
|
|
109
|
-
## Features
|
|
110
|
-
|
|
111
|
-
- **Local** — everything runs on your machine; embeddings cached on disk
|
|
112
|
-
- **Semantic search** — natural-language queries, not substring match
|
|
113
|
-
- **MCP server** — plugs into Claude Code via `.mcp.json`
|
|
114
|
-
- **Watch mode** — keeps the index warm while you code
|
|
115
|
-
- **Gitignore-aware** — respects `.gitignore` + custom ignore rules
|
|
116
|
-
- **Zero config** — works with defaults; `.xindex.json` is optional
|
|
117
|
-
|
|
118
120
|
---
|
|
119
121
|
|
|
120
122
|
## CLI reference
|
|
@@ -129,7 +131,7 @@ xindex-index apps features
|
|
|
129
131
|
```
|
|
130
132
|
|
|
131
133
|
### `xindex-search <query...>`
|
|
132
|
-
Search the index. All args are joined into one query. Default limit:
|
|
134
|
+
Search the index. All args are joined into one query. Default limit: 7.
|
|
133
135
|
```bash
|
|
134
136
|
xindex-search "database migration logic"
|
|
135
137
|
xindex-search file watcher debounce
|
|
@@ -157,19 +159,23 @@ xindex-mcp --watch-dir=./src # watch a specific dir
|
|
|
157
159
|
|
|
158
160
|
## MCP tools
|
|
159
161
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
| `xindex_index` | Index paths | `inputs: string[]` (at least one) |
|
|
164
|
-
| `xindex_reset` | Wipe index (destructive) | — |
|
|
162
|
+
- **`xindex_search`** — semantic search. `query: string`, `limit?: number` (default 7, max 50)
|
|
163
|
+
- **`xindex_index`** — index paths. `inputs: string[]` (at least one)
|
|
164
|
+
- **`xindex_reset`** — wipe index (destructive). No input
|
|
165
165
|
|
|
166
|
-
Note: CLI `xindex-search`
|
|
166
|
+
Note: both CLI `xindex-search` and MCP `xindex_search` default to 7 results; MCP caps at 50.
|
|
167
167
|
|
|
168
168
|
## Configuration
|
|
169
169
|
|
|
170
170
|
### `.xindex.json` (optional)
|
|
171
171
|
|
|
172
|
-
|
|
172
|
+
Project-root file. All fields optional; unknown keys ignored; missing/empty → defaults.
|
|
173
|
+
|
|
174
|
+
- **`ignoreKeywords`** — `string[]`, default `[]`. Tokens stripped before embedding — add project slang/boilerplate polluting results. Entries ≤1 char warn.
|
|
175
|
+
- **`ignoreFiles`** — `string[]`, default `[]`. Extra globs excluded during walk/watch, on top of `.gitignore` — add vendored/generated folders.
|
|
176
|
+
- **`maxLines`** — `number`, default `30`. Lines per chunk — tune if chunks feel over/under-sized.
|
|
177
|
+
- **`maxFileBytes`** — `number`, default `1000000`. Skip files over this (1 MB) — raise to index larger generated files.
|
|
178
|
+
- **`followSymlinks`** — `boolean`, default `false`. When `false`, symbolic links encountered during walk/watch are skipped with a log line. Set `true` to follow them (cycles are broken via `realpath` dedup).
|
|
173
179
|
|
|
174
180
|
```json
|
|
175
181
|
{
|
|
@@ -178,8 +184,7 @@ Place at your project root. Both fields are optional arrays; unknown keys are ig
|
|
|
178
184
|
}
|
|
179
185
|
```
|
|
180
186
|
|
|
181
|
-
|
|
182
|
-
- **`ignoreFiles`** — extra glob patterns excluded during walk/watch, on top of `.gitignore`.
|
|
187
|
+
Override only what you need; re-run `xindex-index .` (or let the watcher pick it up). Invalid JSON throws; wrong-typed fields fall back silently.
|
|
183
188
|
|
|
184
189
|
### `.xindex/` folder
|
|
185
190
|
|
package/apps/indexApp.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import {readFile} from "fs/promises";
|
|
2
1
|
import {from} from "../packages/streamx/src/from.js";
|
|
3
2
|
import {batch} from "../packages/streamx/src/batch.js";
|
|
4
3
|
import {map} from "../packages/streamx/src/map.js";
|
|
@@ -8,23 +7,24 @@ import {IWalkFiles} from "../componets/walkFiles.js";
|
|
|
8
7
|
import {IIndexContent} from "../features/indexContent.js";
|
|
9
8
|
import {ILogger} from "../componets/logger.js";
|
|
10
9
|
import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
|
|
10
|
+
import {SafeIndexBatch} from "../componets/io/safeIndexBatch.js";
|
|
11
11
|
|
|
12
12
|
export type IIndexApp = (inputs: string[]) => Promise<void>;
|
|
13
13
|
|
|
14
|
-
export function IndexApp({walkFiles, indexContent, log}: {
|
|
14
|
+
export function IndexApp({walkFiles, indexContent, log, maxFileBytes}: {
|
|
15
15
|
walkFiles: IWalkFiles,
|
|
16
16
|
indexContent: IIndexContent,
|
|
17
17
|
log: ILogger,
|
|
18
|
+
maxFileBytes: number,
|
|
18
19
|
}): IIndexApp {
|
|
20
|
+
const safeIndexBatch = SafeIndexBatch({indexContent, log, maxFileBytes});
|
|
19
21
|
return async function indexApp(inputs) {
|
|
20
22
|
await run(
|
|
21
23
|
from(walkFiles(inputs))
|
|
22
24
|
.pipe(tap(id => log(`indexing: ${id}`)))
|
|
23
25
|
.pipe(batch(INDEXING_BATCH_SIZE))
|
|
24
26
|
.pipe(map<string[], string[]>(async (ids) => {
|
|
25
|
-
|
|
26
|
-
const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
|
|
27
|
-
await indexContent(items);
|
|
27
|
+
await safeIndexBatch(ids);
|
|
28
28
|
return ids;
|
|
29
29
|
}))
|
|
30
30
|
);
|
package/apps/run.index.ts
CHANGED
|
@@ -8,8 +8,8 @@ const appId = AppId();
|
|
|
8
8
|
const cwd = process.cwd();
|
|
9
9
|
const log = BufferedLoggerToStdOut();
|
|
10
10
|
const {indexContent, getIndexStats, config} = await BuildComponents({log});
|
|
11
|
-
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
12
|
-
const indexApp = IndexApp({walkFiles, indexContent, log});
|
|
11
|
+
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles, followSymlinks: config.followSymlinks});
|
|
12
|
+
const indexApp = IndexApp({walkFiles, indexContent, log, maxFileBytes: config.maxFileBytes});
|
|
13
13
|
|
|
14
14
|
const inputs = process.argv.slice(2);
|
|
15
15
|
if (!inputs.length) inputs.push(".");
|
package/apps/run.mcp.ts
CHANGED
|
@@ -17,8 +17,8 @@ const watchDisabled = args.includes("--watch-disabled");
|
|
|
17
17
|
const cwd = process.cwd();
|
|
18
18
|
const log = BufferedLoggerToStdErr();
|
|
19
19
|
const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex, config} = await BuildComponents({log});
|
|
20
|
-
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
21
|
-
const indexApp = IndexApp({walkFiles, indexContent, log});
|
|
20
|
+
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles, followSymlinks: config.followSymlinks});
|
|
21
|
+
const indexApp = IndexApp({walkFiles, indexContent, log, maxFileBytes: config.maxFileBytes});
|
|
22
22
|
const search = SearchApp({searchContentIndex});
|
|
23
23
|
|
|
24
24
|
const appId = AppId();
|
|
@@ -31,7 +31,7 @@ const watcherLock = WatcherLock({
|
|
|
31
31
|
const watch = watchDisabled ? undefined : {
|
|
32
32
|
watchFiles: WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles}),
|
|
33
33
|
watchDir: watchDirArg ? watchDirArg.split("=")[1] : ".",
|
|
34
|
-
handleFileEvents: HandleFileEvents({indexContent, removeContent, log}),
|
|
34
|
+
handleFileEvents: HandleFileEvents({indexContent, removeContent, log, maxFileBytes: config.maxFileBytes}),
|
|
35
35
|
watcherLock,
|
|
36
36
|
};
|
|
37
37
|
|
package/apps/run.watch.ts
CHANGED
|
@@ -11,9 +11,9 @@ import {join} from "path";
|
|
|
11
11
|
const cwd = process.cwd();
|
|
12
12
|
const log = BufferedLoggerToStdOut();
|
|
13
13
|
const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
|
|
14
|
-
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
14
|
+
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles, followSymlinks: config.followSymlinks});
|
|
15
15
|
const watchFiles = WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
16
|
-
const handleFileEvents = HandleFileEvents({indexContent, removeContent, log});
|
|
16
|
+
const handleFileEvents = HandleFileEvents({indexContent, removeContent, log, maxFileBytes: config.maxFileBytes});
|
|
17
17
|
|
|
18
18
|
const appId = AppId();
|
|
19
19
|
const watcherLock = WatcherLock({
|
|
@@ -22,7 +22,7 @@ const watcherLock = WatcherLock({
|
|
|
22
22
|
log,
|
|
23
23
|
});
|
|
24
24
|
|
|
25
|
-
const app = WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock});
|
|
25
|
+
const app = WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock, maxFileBytes: config.maxFileBytes});
|
|
26
26
|
|
|
27
27
|
const inputs = process.argv.slice(2);
|
|
28
28
|
if (!inputs.length) inputs.push(".");
|
package/apps/watchApp.ts
CHANGED
|
@@ -3,7 +3,6 @@ import {batch} from "../packages/streamx/src/batch.js";
|
|
|
3
3
|
import {map} from "../packages/streamx/src/map.js";
|
|
4
4
|
import {tap} from "../packages/streamx/src/tap.js";
|
|
5
5
|
import {run} from "../packages/streamx/src/index.js";
|
|
6
|
-
import {readFile} from "fs/promises";
|
|
7
6
|
import {IWalkFiles} from "../componets/walkFiles.js";
|
|
8
7
|
import {IWatchFiles} from "../componets/watchFiles.js";
|
|
9
8
|
import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
|
|
@@ -12,20 +11,23 @@ import {IWatcherLock} from "../componets/index/watcherLock.js";
|
|
|
12
11
|
import {WatchFileEventsApp} from "./watchFileEventsApp.js";
|
|
13
12
|
import {IIndexContent} from "../features/indexContent.js";
|
|
14
13
|
import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
|
|
14
|
+
import {SafeIndexBatch} from "../componets/io/safeIndexBatch.js";
|
|
15
15
|
|
|
16
16
|
export type IWatchApp = {
|
|
17
17
|
run: (inputs: string[]) => Promise<void>;
|
|
18
18
|
stop: () => void;
|
|
19
19
|
};
|
|
20
20
|
|
|
21
|
-
export function WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock}: {
|
|
21
|
+
export function WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock, maxFileBytes}: {
|
|
22
22
|
walkFiles: IWalkFiles,
|
|
23
23
|
watchFiles: IWatchFiles,
|
|
24
24
|
handleFileEvents: IHandleFileEvents,
|
|
25
25
|
indexContent: IIndexContent,
|
|
26
26
|
log: ILogger,
|
|
27
27
|
watcherLock: IWatcherLock,
|
|
28
|
+
maxFileBytes: number,
|
|
28
29
|
}): IWatchApp {
|
|
30
|
+
const safeIndexBatch = SafeIndexBatch({indexContent, log, maxFileBytes});
|
|
29
31
|
return {
|
|
30
32
|
async run(inputs) {
|
|
31
33
|
// Phase 1: initial index — walk all files
|
|
@@ -34,9 +36,7 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent,
|
|
|
34
36
|
.pipe(tap(id => log(`indexing: ${id}`)))
|
|
35
37
|
.pipe(batch(INDEXING_BATCH_SIZE))
|
|
36
38
|
.pipe(map<string[], string[]>(async (ids) => {
|
|
37
|
-
|
|
38
|
-
const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
|
|
39
|
-
await indexContent(items);
|
|
39
|
+
await safeIndexBatch(ids);
|
|
40
40
|
return ids;
|
|
41
41
|
}))
|
|
42
42
|
);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export const INDEXING_BATCH_SIZE =
|
|
1
|
+
export const INDEXING_BATCH_SIZE = 16;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export const WATCH_FLUSH_MS =
|
|
1
|
+
export const WATCH_FLUSH_MS = 300;
|
|
@@ -3,13 +3,15 @@ import {IXindexConfig} from "./xindexConfig.js";
|
|
|
3
3
|
import {ILogger} from "../logger.js";
|
|
4
4
|
|
|
5
5
|
const DEFAULT_MAX_LINES = 30;
|
|
6
|
-
const DEFAULT_MAX_FILE_BYTES =
|
|
6
|
+
const DEFAULT_MAX_FILE_BYTES = 1_000_000;
|
|
7
|
+
const DEFAULT_FOLLOW_SYMLINKS = false;
|
|
7
8
|
|
|
8
9
|
const DEFAULTS: IXindexConfig = {
|
|
9
10
|
ignoreKeywords: [],
|
|
10
11
|
ignoreFiles: [],
|
|
11
12
|
maxLines: DEFAULT_MAX_LINES,
|
|
12
13
|
maxFileBytes: DEFAULT_MAX_FILE_BYTES,
|
|
14
|
+
followSymlinks: DEFAULT_FOLLOW_SYMLINKS,
|
|
13
15
|
};
|
|
14
16
|
|
|
15
17
|
export type ILoadConfig = () => Promise<IXindexConfig>;
|
|
@@ -41,11 +43,18 @@ export function LoadConfig({configPath, log}: { configPath: string, log: ILogger
|
|
|
41
43
|
ignoreFiles: toStrings(parsed.ignoreFiles),
|
|
42
44
|
maxLines: toNum(parsed.maxLines, DEFAULT_MAX_LINES),
|
|
43
45
|
maxFileBytes: toNum(parsed.maxFileBytes, DEFAULT_MAX_FILE_BYTES),
|
|
46
|
+
followSymlinks: typeof parsed.followSymlinks === "boolean" ? parsed.followSymlinks : DEFAULT_FOLLOW_SYMLINKS,
|
|
44
47
|
};
|
|
45
48
|
|
|
46
49
|
for (const kw of config.ignoreKeywords) {
|
|
47
50
|
if (kw.length <= 1) log(`warning: ignoreKeywords entry "${kw}" is <=1 char`);
|
|
48
51
|
}
|
|
52
|
+
if (config.maxFileBytes < 1024) {
|
|
53
|
+
log(`warning: maxFileBytes (${config.maxFileBytes}) < 1024 — likely a typo`);
|
|
54
|
+
}
|
|
55
|
+
if (parsed.followSymlinks !== undefined && typeof parsed.followSymlinks !== "boolean") {
|
|
56
|
+
log(`warning: followSymlinks must be boolean — using default (${DEFAULT_FOLLOW_SYMLINKS})`);
|
|
57
|
+
}
|
|
49
58
|
|
|
50
59
|
return config;
|
|
51
60
|
};
|
|
@@ -9,6 +9,7 @@ import {ResetIndex, IResetIndex} from "../../features/resetIndex.js";
|
|
|
9
9
|
import {VectraIndex} from "./vectraIndex.js";
|
|
10
10
|
import {IndexApi} from "./indexApi.js";
|
|
11
11
|
import {ILocateInFile} from "../locate/locateInFile.js";
|
|
12
|
+
import {ILogger} from "../logger.js";
|
|
12
13
|
|
|
13
14
|
export type IContentIndexDriver = Readonly<{
|
|
14
15
|
getIndexStats: IGetIndexStats,
|
|
@@ -19,20 +20,21 @@ export type IContentIndexDriver = Readonly<{
|
|
|
19
20
|
flush: () => Promise<void>,
|
|
20
21
|
}>;
|
|
21
22
|
|
|
22
|
-
export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold}: {
|
|
23
|
+
export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold, log}: {
|
|
23
24
|
path: string,
|
|
24
25
|
embed: IEmbed,
|
|
25
26
|
extractKeywords: IExtractKeywords,
|
|
26
27
|
cleanUpKeywords: ICleanUpKeywords,
|
|
27
28
|
locateInFile: ILocateInFile,
|
|
28
29
|
scoreThreshold: number,
|
|
30
|
+
log: ILogger,
|
|
29
31
|
}): Promise<IContentIndexDriver> {
|
|
30
32
|
const index = await VectraIndex(path + "/semantic");
|
|
31
33
|
const indexApi = IndexApi({index});
|
|
32
34
|
|
|
33
35
|
return {
|
|
34
36
|
getIndexStats: GetIndexStats({index}),
|
|
35
|
-
indexContent: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi}),
|
|
37
|
+
indexContent: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi, log}),
|
|
36
38
|
removeContent: RemoveContent({indexApi}),
|
|
37
39
|
searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold}),
|
|
38
40
|
resetIndex: ResetIndex({indexApi}),
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
import {readFile} from "fs/promises";
|
|
2
1
|
import {IIndexContent} from "../../features/indexContent.js";
|
|
3
2
|
import {IRemoveContent} from "../../features/removeContent.js";
|
|
4
3
|
import {ILogger} from "../logger.js";
|
|
5
4
|
import {FileEventType, IFileEvent} from "../watchFiles.js";
|
|
5
|
+
import {SafeIndexBatch} from "../io/safeIndexBatch.js";
|
|
6
6
|
|
|
7
7
|
export type IHandleFileEvent = (event: IFileEvent) => Promise<void>;
|
|
8
8
|
export type IHandleFileEvents = (events: IFileEvent[]) => Promise<void>;
|
|
9
9
|
|
|
10
|
-
export function HandleFileEvent({indexContent, removeContent, log}: {
|
|
10
|
+
export function HandleFileEvent({indexContent, removeContent, log, maxFileBytes}: {
|
|
11
11
|
indexContent: IIndexContent,
|
|
12
12
|
removeContent: IRemoveContent,
|
|
13
13
|
log: ILogger,
|
|
14
|
+
maxFileBytes: number,
|
|
14
15
|
}): IHandleFileEvent {
|
|
16
|
+
const safeIndexBatch = SafeIndexBatch({indexContent, log, maxFileBytes});
|
|
15
17
|
return async function handleFileEvent(event) {
|
|
16
18
|
if (event.type === FileEventType.index) {
|
|
17
|
-
|
|
18
|
-
await indexContent([{id: event.path, content: `${text}. ${event.path}`}]);
|
|
19
|
-
log(`index: ${event.path}`);
|
|
19
|
+
await safeIndexBatch([event.path]);
|
|
20
20
|
} else {
|
|
21
21
|
try { await removeContent([event.path]); } catch (e) { log(`remove failed: ${event.path} — ${(e as any)?.message ?? e}`); }
|
|
22
22
|
log(`remove: ${event.path}`);
|
|
@@ -24,42 +24,22 @@ export function HandleFileEvent({indexContent, removeContent, log}: {
|
|
|
24
24
|
};
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
export function HandleFileEvents({indexContent, removeContent, log}: {
|
|
27
|
+
export function HandleFileEvents({indexContent, removeContent, log, maxFileBytes}: {
|
|
28
28
|
indexContent: IIndexContent,
|
|
29
29
|
removeContent: IRemoveContent,
|
|
30
30
|
log: ILogger,
|
|
31
|
+
maxFileBytes: number,
|
|
31
32
|
}): IHandleFileEvents {
|
|
33
|
+
const safeIndexBatch = SafeIndexBatch({indexContent, log, maxFileBytes});
|
|
32
34
|
return async function handleFileEvents(events) {
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
+
const indexPaths = events.filter(e => e.type === FileEventType.index).map(e => e.path);
|
|
36
|
+
const removePaths = events.filter(e => e.type === FileEventType.remove).map(e => e.path);
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
log(`index: ${event.path}`);
|
|
39
|
-
try {
|
|
40
|
-
const text = await readFile(event.path, "utf8");
|
|
41
|
-
return {id: event.path, content: `${text}. ${event.path}`};
|
|
42
|
-
} catch (e) {
|
|
43
|
-
log(`index failed: ${event.path} — ${(e as any)?.message ?? e}`);
|
|
44
|
-
return undefined;
|
|
45
|
-
}
|
|
46
|
-
}))).filter((item): item is { id: string, content: string } => !!item);
|
|
47
|
-
|
|
48
|
-
if (indexItems.length > 0) {
|
|
49
|
-
try {
|
|
50
|
-
await indexContent(indexItems);
|
|
51
|
-
} catch (e) {
|
|
52
|
-
log(`index batch failed: ${(e as any)?.message ?? e}`);
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
if (removeEvents.length > 0) {
|
|
58
|
-
const removePaths = removeEvents.map((event) => {
|
|
59
|
-
log(`remove: ${event.path}`);
|
|
60
|
-
return event.path;
|
|
61
|
-
});
|
|
38
|
+
for (const p of indexPaths) log(`index: ${p}`);
|
|
39
|
+
if (indexPaths.length > 0) await safeIndexBatch(indexPaths);
|
|
62
40
|
|
|
41
|
+
if (removePaths.length > 0) {
|
|
42
|
+
for (const p of removePaths) log(`remove: ${p}`);
|
|
63
43
|
try {
|
|
64
44
|
await removeContent(removePaths);
|
|
65
45
|
} catch (e) {
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import {safeReadFile, SafeReadResultType} from "./safeReadFile.js";
|
|
2
|
+
import {IIndexContent} from "../../features/indexContent.js";
|
|
3
|
+
import {ILogger} from "../logger.js";
|
|
4
|
+
|
|
5
|
+
export type ISafeIndexBatch = (ids: string[]) => Promise<void>;
|
|
6
|
+
|
|
7
|
+
export function SafeIndexBatch({indexContent, log, maxFileBytes}: {
|
|
8
|
+
indexContent: IIndexContent,
|
|
9
|
+
log: ILogger,
|
|
10
|
+
maxFileBytes: number,
|
|
11
|
+
}): ISafeIndexBatch {
|
|
12
|
+
return async function safeIndexBatch(ids) {
|
|
13
|
+
const results = await Promise.all(ids.map(id => safeReadFile(id, maxFileBytes)));
|
|
14
|
+
const ok: {id: string, content: string}[] = [];
|
|
15
|
+
for (const r of results) {
|
|
16
|
+
if (r.type === SafeReadResultType.ok) {
|
|
17
|
+
ok.push({id: r.id, content: `${r.content}. ${r.id}`});
|
|
18
|
+
} else {
|
|
19
|
+
log(`skip ${r.id}: ${r.reason}`);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
if (ok.length > 0) {
|
|
23
|
+
try {
|
|
24
|
+
await indexContent(ok);
|
|
25
|
+
} catch (e: any) {
|
|
26
|
+
log(`index batch failed: ${e?.message ?? e}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import {stat, readFile} from "fs/promises";
|
|
2
|
+
import {IType} from "../IType.js";
|
|
3
|
+
|
|
4
|
+
export enum SafeReadResultType {
|
|
5
|
+
ok = "ok",
|
|
6
|
+
skip = "skip",
|
|
7
|
+
error = "error",
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export enum SafeReadSkipReason {
|
|
11
|
+
tooLarge = "tooLarge",
|
|
12
|
+
empty = "empty",
|
|
13
|
+
binary = "binary",
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export type ISafeReadResult =
|
|
17
|
+
| IType<{ type: SafeReadResultType.ok, id: string, content: string }>
|
|
18
|
+
| IType<{ type: SafeReadResultType.skip, id: string, reason: SafeReadSkipReason }>
|
|
19
|
+
| IType<{ type: SafeReadResultType.error, id: string, reason: string }>;
|
|
20
|
+
|
|
21
|
+
export async function safeReadFile(id: string, maxBytes: number): Promise<ISafeReadResult> {
|
|
22
|
+
try {
|
|
23
|
+
const s = await stat(id);
|
|
24
|
+
if (s.size > maxBytes) return {type: SafeReadResultType.skip, id, reason: SafeReadSkipReason.tooLarge};
|
|
25
|
+
const buf = await readFile(id);
|
|
26
|
+
const probe = buf.subarray(0, Math.min(10, buf.length));
|
|
27
|
+
for (let i = 0; i < probe.length; i++) {
|
|
28
|
+
if (probe[i] === 0) return {type: SafeReadResultType.skip, id, reason: SafeReadSkipReason.binary};
|
|
29
|
+
}
|
|
30
|
+
const content = buf.toString("utf8");
|
|
31
|
+
if (!content.trim()) return {type: SafeReadResultType.skip, id, reason: SafeReadSkipReason.empty};
|
|
32
|
+
return {type: SafeReadResultType.ok, id, content};
|
|
33
|
+
} catch (e: any) {
|
|
34
|
+
return {type: SafeReadResultType.error, id, reason: e?.code ?? e?.message ?? String(e)};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -11,9 +11,9 @@ import {filter} from "../../packages/streamx/src/filter.js";
|
|
|
11
11
|
import {map} from "../../packages/streamx/src/map.js";
|
|
12
12
|
import {flatMap} from "../../packages/streamx/src/flatMap.js";
|
|
13
13
|
import {tap} from "../../packages/streamx/src/tap.js";
|
|
14
|
-
import {scaleSync} from "../../packages/streamx/src/scaleSync.js";
|
|
15
14
|
import {run} from "../../packages/streamx/src/index.js";
|
|
16
15
|
import {DEFAULT_LOCATE_BATCH_SIZE} from "../config/DEFAULT_LOCATE_BATCH_SIZE";
|
|
16
|
+
import {scaleSync} from "../../packages/streamx/src/scaleSync";
|
|
17
17
|
|
|
18
18
|
export type ILocateInFile = (
|
|
19
19
|
query: string,
|
|
@@ -61,12 +61,17 @@ export function LocateInFile({
|
|
|
61
61
|
return false;
|
|
62
62
|
}
|
|
63
63
|
}))
|
|
64
|
-
.pipe(scaleSync(SCALE_FILE_READS, async (id: string) => {
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
.pipe(scaleSync(SCALE_FILE_READS, async (id: string): Promise<{ id: string, text: string } | null> => {
|
|
65
|
+
try {
|
|
66
|
+
const text = await readFile(id, "utf8");
|
|
67
|
+
return {id, text};
|
|
68
|
+
} catch {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
67
71
|
}))
|
|
68
|
-
.pipe(
|
|
69
|
-
|
|
72
|
+
.pipe(filter(async (r: { id: string, text: string } | null) => r !== null))
|
|
73
|
+
.pipe(flatMap((r: { id: string, text: string } | null): IWindow[] => {
|
|
74
|
+
return windowsOf({text: r!.text, id: r!.id, windowLines});
|
|
70
75
|
}));
|
|
71
76
|
|
|
72
77
|
const withVectors = from<IWindow>(windows)
|
package/componets/walkFiles.ts
CHANGED
|
@@ -1,11 +1,16 @@
|
|
|
1
|
-
import {readdir, stat, readFile} from "fs/promises";
|
|
1
|
+
import {readdir, stat, readFile, realpath} from "fs/promises";
|
|
2
2
|
import {join, relative} from "path";
|
|
3
3
|
import ignore from "ignore";
|
|
4
4
|
import {ILogger} from "./logger.js";
|
|
5
5
|
|
|
6
6
|
export type IWalkFiles = (inputs: string[]) => AsyncIterable<string>;
|
|
7
7
|
|
|
8
|
-
export function WalkFiles({cwd, log, ignoreFiles = []
|
|
8
|
+
export function WalkFiles({cwd, log, ignoreFiles = [], followSymlinks = false}: {
|
|
9
|
+
cwd: string,
|
|
10
|
+
log: ILogger,
|
|
11
|
+
ignoreFiles?: string[],
|
|
12
|
+
followSymlinks?: boolean,
|
|
13
|
+
}): IWalkFiles {
|
|
9
14
|
|
|
10
15
|
async function tryReadGitignore(dir: string): Promise<string> {
|
|
11
16
|
try {
|
|
@@ -15,6 +20,8 @@ export function WalkFiles({cwd, log, ignoreFiles = []}: {cwd: string, log: ILogg
|
|
|
15
20
|
}
|
|
16
21
|
}
|
|
17
22
|
|
|
23
|
+
const visited = new Set<string>();
|
|
24
|
+
|
|
18
25
|
async function* walk(dir: string, parentRules: string[]): AsyncIterable<string> {
|
|
19
26
|
const localGitignore = await tryReadGitignore(dir);
|
|
20
27
|
const rules = localGitignore ? [...parentRules, localGitignore] : parentRules;
|
|
@@ -27,11 +34,35 @@ export function WalkFiles({cwd, log, ignoreFiles = []}: {cwd: string, log: ILogg
|
|
|
27
34
|
const abs = join(dir, entry.name);
|
|
28
35
|
const rel = relative(cwd, abs);
|
|
29
36
|
|
|
37
|
+
if (entry.isSymbolicLink()) {
|
|
38
|
+
if (!followSymlinks) {
|
|
39
|
+
log(`skip symlink ${rel}`);
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
try {
|
|
43
|
+
const real = await realpath(abs);
|
|
44
|
+
if (visited.has(real)) continue;
|
|
45
|
+
visited.add(real);
|
|
46
|
+
const s = await stat(real);
|
|
47
|
+
if (s.isDirectory()) {
|
|
48
|
+
if (entry.name.startsWith(".")) continue;
|
|
49
|
+
if (ig.ignores(rel + "/")) continue;
|
|
50
|
+
yield* walk(abs, rules);
|
|
51
|
+
} else if (s.isFile()) {
|
|
52
|
+
if (ig.ignores(rel)) continue;
|
|
53
|
+
yield rel;
|
|
54
|
+
}
|
|
55
|
+
} catch (e: any) {
|
|
56
|
+
log(`skip symlink ${rel}: ${e?.code ?? e?.message ?? e}`);
|
|
57
|
+
}
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
|
|
30
61
|
if (entry.isDirectory()) {
|
|
31
62
|
if (entry.name.startsWith(".")) continue;
|
|
32
63
|
if (ig.ignores(rel + "/")) continue;
|
|
33
64
|
yield* walk(abs, rules);
|
|
34
|
-
} else {
|
|
65
|
+
} else if (entry.isFile()) {
|
|
35
66
|
if (ig.ignores(rel)) continue;
|
|
36
67
|
yield rel;
|
|
37
68
|
}
|
package/features/indexContent.ts
CHANGED
|
@@ -2,22 +2,31 @@ import {IndexCommandType, IIndexApi} from "../componets/index/indexApi.js";
|
|
|
2
2
|
import {IExtractKeywords} from "../componets/keywords/extractKeywords.js";
|
|
3
3
|
import {ICleanUpKeywords} from "../componets/keywords/cleanUpKeywords.js";
|
|
4
4
|
import {IEmbed} from "../componets/llm/embed.js";
|
|
5
|
+
import {ILogger} from "../componets/logger.js";
|
|
5
6
|
|
|
6
7
|
export type IIndexContent = (items: Array<{id: string, content: string}>) => Promise<void>;
|
|
7
8
|
|
|
8
|
-
export function IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi}: {
|
|
9
|
+
export function IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi, log}: {
|
|
9
10
|
extractKeywords: IExtractKeywords,
|
|
10
11
|
cleanUpKeywords: ICleanUpKeywords,
|
|
11
12
|
embed: IEmbed,
|
|
12
13
|
indexApi: IIndexApi,
|
|
14
|
+
log: ILogger,
|
|
13
15
|
}): IIndexContent {
|
|
14
16
|
return async function indexContent(items) {
|
|
15
|
-
const indexItems = await Promise.all(items.map(async (item) => {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
const indexItems = (await Promise.all(items.map(async (item) => {
|
|
18
|
+
try {
|
|
19
|
+
const keywords = cleanUpKeywords(extractKeywords(item.content)).join(", ");
|
|
20
|
+
const vector = await embed(keywords);
|
|
21
|
+
return {id: item.id, vector, keywords};
|
|
22
|
+
} catch (e: any) {
|
|
23
|
+
log(`skip ${item.id}: keywords/embed failed — ${e?.code ?? e?.message ?? e}`);
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
}))).filter((x): x is NonNullable<typeof x> => !!x);
|
|
20
27
|
|
|
21
|
-
|
|
28
|
+
if (indexItems.length > 0) {
|
|
29
|
+
await indexApi({type: IndexCommandType.index, items: indexItems});
|
|
30
|
+
}
|
|
22
31
|
}
|
|
23
32
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xindex",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6",
|
|
4
4
|
"description": "Local semantic code search — index codebase, search by meaning or keywords",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "xindex.ts",
|
|
@@ -15,11 +15,16 @@
|
|
|
15
15
|
"index": "tsx apps/run.index.ts",
|
|
16
16
|
"search": "tsx apps/run.search.ts",
|
|
17
17
|
"reset": "tsx apps/run.reset.ts",
|
|
18
|
+
"test": "npm run test.functional && npm run test.compilation",
|
|
18
19
|
"mcp": "tsx apps/run.mcp.ts",
|
|
19
20
|
"watch": "tsx apps/run.watch.ts",
|
|
21
|
+
"test.functional": "bash test/functional.sh",
|
|
20
22
|
"test.compilation": "npx -y tsc --ignoreConfig --noEmit --target ES2022 --module ESNext --moduleResolution bundler --esModuleInterop --skipLibCheck --strict false $(git ls-files '*.ts')"
|
|
21
23
|
},
|
|
22
24
|
"private": false,
|
|
25
|
+
"engines": {
|
|
26
|
+
"node": ">=22"
|
|
27
|
+
},
|
|
23
28
|
"keywords": [
|
|
24
29
|
"semantic-search",
|
|
25
30
|
"code-search",
|