xindex 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xindex.json +2 -1
- package/CLAUDE.md +1 -0
- package/README.md +73 -0
- package/apps/indexApp.ts +9 -8
- package/apps/mcpApp.ts +8 -8
- package/apps/run.index.ts +2 -2
- package/apps/run.mcp.ts +6 -4
- package/apps/run.search.ts +1 -1
- package/apps/run.watch.ts +3 -3
- package/apps/searchApp.ts +5 -3
- package/apps/watchApp.ts +16 -8
- package/apps/watchFileEventsApp.ts +14 -4
- package/componets/buildComponents.ts +25 -9
- package/componets/config/DEFAULT_LOCATE_BATCH_SIZE.ts +1 -0
- package/componets/config/INDEXING_BATCH_SIZE.ts +1 -0
- package/componets/config/WATCH_FLUSH_MS.ts +1 -0
- package/componets/config/loadConfig.ts +10 -1
- package/componets/config/xindexConfig.ts +2 -0
- package/componets/ignore/loadIgnoreChain.ts +40 -0
- package/componets/index/contentIndexDriver.ts +7 -5
- package/componets/index/documentContentIndexDriver.ts +126 -0
- package/componets/index/documentIndex.ts +26 -0
- package/componets/index/formatSearchResults.ts +16 -2
- package/componets/index/handleFileEvent.ts +48 -3
- package/componets/index/indexApi.ts +39 -11
- package/componets/locate/bm25.ts +50 -0
- package/componets/locate/inMemoryIndex.ts +48 -0
- package/componets/locate/locateInFile.ts +148 -0
- package/componets/locate/windowsOf.ts +29 -0
- package/componets/watchFiles.ts +5 -16
- package/features/indexContent.ts +12 -5
- package/features/removeContent.ts +3 -3
- package/features/searchIndex.ts +22 -5
- package/package.json +15 -2
- package/packages/streamx/src/batchTimed.ts +1 -1
- package/packages/streamx/src/buffer.ts +1 -1
- package/packages/streamx/src/defer.ts +55 -0
- package/packages/streamx/src/interval.ts +1 -1
- package/packages/streamx/src/merge.ts +1 -1
- package/packages/streamx/src/nodeWritable.ts +1 -1
- package/packages/streamx/src/scale.ts +2 -2
- package/packages/streamx/src/writer.ts +1 -1
- package/.ai/research/.gitkeep +0 -0
- package/.ai/task/.gitkeep +0 -0
- package/.claude/settings.local.json +0 -73
- package/.claude/skills/make-hof/SKILL.md +0 -8
- package/.claude/skills/make-hof/playbook.md +0 -38
- package/.cursor/mcp.json +0 -8
- package/media/MEDIUM.md +0 -139
- package/media/SOCIAL.md +0 -102
- package/rnd/hf.ts +0 -14
- package/rnd/keywords-compromise.ts +0 -18
- package/rnd/keywords-pipeline.ts +0 -79
- package/rnd/keywords.ts +0 -38
- package/rnd/test-vectra-memory.ts +0 -63
- package/rnd/vectra-keywords.ts +0 -95
- package/rnd/vectra.ts +0 -50
package/.xindex.json
CHANGED
package/CLAUDE.md
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
- **Text diagrams** — ASCII flows, hierarchies, tables. Keep minimal.
|
|
10
10
|
- **research/search/ground** — search the Internet using DuckDuckGo MCP
|
|
11
11
|
- **plan dev / go / dev / implement** — start implementation → triggers Pre-implementation check
|
|
12
|
+
- **check types / test compilation** — run `yarn test.compilation` to validate TypeScript compilation without relying on `tsconfig.json`
|
|
12
13
|
- **recover** — find most recent `task.*.md` in `.ai/task/` (by date+mtime, exclude `*.log.md`/`*.report.md`), summarize state and next steps
|
|
13
14
|
- **pull details / expand / flesh out** — enrich task with full detail while preserving shape (see Detail expansion)
|
|
14
15
|
|
package/README.md
CHANGED
|
@@ -39,6 +39,73 @@ Drop this into `.mcp.json` at your project root:
|
|
|
39
39
|
|
|
40
40
|
Open the project in Claude Code — it picks up the xindex MCP server and can call `xindex_search`, `xindex_index`, and `xindex_reset` directly. Fewer hallucinations, fewer round-trips.
|
|
41
41
|
|
|
42
|
+
## Claude Code skills (`@xi`)
|
|
43
|
+
|
|
44
|
+
Two optional [Claude Code skills](https://docs.claude.com/en/docs/claude-code/skills) wrap the MCP tools so you don't have to think about them:
|
|
45
|
+
|
|
46
|
+
- **`ask-xi`** — read-only discovery. `@xi where is auth handled` drafts several focused queries, runs `xindex_search` for each, and returns ranked file paths with matched keywords. Use it as a cheap first step before grepping or asking a heavier model.
|
|
47
|
+
- **`xindex`** — index management (`xindex_index`, `xindex_reset`). Reset requires explicit confirmation every time.
|
|
48
|
+
|
|
49
|
+
Keeping them separate keeps `@xi` safe to fire casually while destructive ops stay behind the `xindex` skill.
|
|
50
|
+
|
|
51
|
+
### Install
|
|
52
|
+
|
|
53
|
+
Pick one — project-scoped or user-global:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Project (checked in, shared with the repo)
|
|
57
|
+
mkdir -p .claude/skills/ask-xi .claude/skills/xindex
|
|
58
|
+
|
|
59
|
+
# Or user-global (available in every project)
|
|
60
|
+
mkdir -p ~/.claude/skills/ask-xi ~/.claude/skills/xindex
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Then drop these two files in.
|
|
64
|
+
|
|
65
|
+
`ask-xi/SKILL.md`:
|
|
66
|
+
|
|
67
|
+
````md
|
|
68
|
+
---
|
|
69
|
+
name: ask-xi
|
|
70
|
+
description: Discovers relevant files via xindex semantic search — preps queries, auto-indexes on empty, returns file links with keywords. Triggered by @xi.
|
|
71
|
+
argument-hint: "[question]"
|
|
72
|
+
---
|
|
73
|
+
Surface-level codebase discovery via xindex. Tool: `xindex_search` (natural-language, meaning-based).
|
|
74
|
+
|
|
75
|
+
**Steps:**
|
|
76
|
+
1. Draft 5–10 focused queries from $ARGUMENTS (entry points, routing, config, integrations, tests, related patterns).
|
|
77
|
+
2. Run `xindex_search` for each.
|
|
78
|
+
3. If results are empty/sparse/stale → scoped-index the most relevant content-heavy root folders (one path per `xindex_index` call, e.g. `src`, `apps`, `features`, `componets`), then re-search. Prefer scoped over full-repo.
|
|
79
|
+
4. Refine with 2–3 narrower follow-ups.
|
|
80
|
+
5. Return file paths + brief keywords showing why each matched.
|
|
81
|
+
|
|
82
|
+
Output = file links + keywords, not analysis. For reset or full re-index, delegate to `/xindex` (owns safety rules).
|
|
83
|
+
````
|
|
84
|
+
|
|
85
|
+
`xindex/SKILL.md`:
|
|
86
|
+
|
|
87
|
+
````md
|
|
88
|
+
---
|
|
89
|
+
name: xindex
|
|
90
|
+
description: Manages xindex semantic search — index, search, reset via MCP tools. For research questions, use /ask-xi.
|
|
91
|
+
argument-hint: "[search query | index | reset]"
|
|
92
|
+
---
|
|
93
|
+
Full xindex tool management. For research, use `/ask-xi`. Install: `npm i -g xindex`.
|
|
94
|
+
|
|
95
|
+
**Tools:**
|
|
96
|
+
- `xindex_search` — find files by meaning (synonyms, semantics). Try before grepping blindly.
|
|
97
|
+
- `xindex_index` — index a path (recursive, respects .gitignore). **MUST** run one path per call.
|
|
98
|
+
- `xindex_reset` — destructive wipe+rebuild. **MUST** get explicit user confirmation every time; if ambiguous, don't run.
|
|
99
|
+
|
|
100
|
+
**Workflow:** stale/corrupt → confirm → `xindex_reset` → `xindex_index(["."])` → `xindex_search`. Incremental → one-path `xindex_index(["changed/path"])` calls.
|
|
101
|
+
|
|
102
|
+
**Scoped indexing (preferred):** index only task-relevant content-heavy folders, sequentially. Full-repo `xindex_index(["."])` only for cross-cutting discovery.
|
|
103
|
+
|
|
104
|
+
$ARGUMENTS
|
|
105
|
+
````
|
|
106
|
+
|
|
107
|
+
Both skills assume the `xindex` MCP server is registered (see the section above). Restart Claude Code after adding skills.
|
|
108
|
+
|
|
42
109
|
## Features
|
|
43
110
|
|
|
44
111
|
- **Local** — everything runs on your machine; embeddings cached on disk
|
|
@@ -166,6 +233,12 @@ yarn install # or npm install
|
|
|
166
233
|
npm link # exposes xindex-* binaries from your working copy
|
|
167
234
|
```
|
|
168
235
|
|
|
236
|
+
Check TypeScript compilation:
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
yarn test.compilation
|
|
240
|
+
```
|
|
241
|
+
|
|
169
242
|
## License
|
|
170
243
|
|
|
171
244
|
MIT
|
package/apps/indexApp.ts
CHANGED
|
@@ -1,30 +1,31 @@
|
|
|
1
1
|
import {readFile} from "fs/promises";
|
|
2
2
|
import {from} from "../packages/streamx/src/from.js";
|
|
3
|
+
import {batch} from "../packages/streamx/src/batch.js";
|
|
3
4
|
import {map} from "../packages/streamx/src/map.js";
|
|
4
5
|
import {tap} from "../packages/streamx/src/tap.js";
|
|
5
6
|
import {run} from "../packages/streamx/src/index.js";
|
|
6
7
|
import {IWalkFiles} from "../componets/walkFiles.js";
|
|
7
8
|
import {IIndexContent} from "../features/indexContent.js";
|
|
8
|
-
import {IRemoveContent} from "../features/removeContent.js";
|
|
9
9
|
import {ILogger} from "../componets/logger.js";
|
|
10
|
+
import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
|
|
10
11
|
|
|
11
12
|
export type IIndexApp = (inputs: string[]) => Promise<void>;
|
|
12
13
|
|
|
13
|
-
export function IndexApp({walkFiles, indexContent,
|
|
14
|
+
export function IndexApp({walkFiles, indexContent, log}: {
|
|
14
15
|
walkFiles: IWalkFiles,
|
|
15
16
|
indexContent: IIndexContent,
|
|
16
|
-
removeContent: IRemoveContent,
|
|
17
17
|
log: ILogger,
|
|
18
18
|
}): IIndexApp {
|
|
19
19
|
return async function indexApp(inputs) {
|
|
20
20
|
await run(
|
|
21
21
|
from(walkFiles(inputs))
|
|
22
22
|
.pipe(tap(id => log(`indexing: ${id}`)))
|
|
23
|
-
.pipe(
|
|
24
|
-
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
.pipe(batch(INDEXING_BATCH_SIZE))
|
|
24
|
+
.pipe(map<string[], string[]>(async (ids) => {
|
|
25
|
+
const texts = await Promise.all(ids.map(id => readFile(id, "utf8")));
|
|
26
|
+
const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
|
|
27
|
+
await indexContent(items);
|
|
28
|
+
return ids;
|
|
28
29
|
}))
|
|
29
30
|
);
|
|
30
31
|
}
|
package/apps/mcpApp.ts
CHANGED
|
@@ -1,31 +1,31 @@
|
|
|
1
1
|
import {McpServer} from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
2
|
import {StdioServerTransport} from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
3
|
import {z} from "zod";
|
|
4
|
-
import {ISearchIndex} from "../features/searchIndex.js";
|
|
5
4
|
import {IIndexApp} from "./indexApp.js";
|
|
6
5
|
import {IGetIndexStats} from "../componets/index/getIndexStats.js";
|
|
7
6
|
import {IResetIndex} from "../features/resetIndex.js";
|
|
8
7
|
import {IWatchFiles} from "../componets/watchFiles.js";
|
|
9
|
-
import {
|
|
8
|
+
import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
|
|
10
9
|
import {ILogger} from "../componets/logger.js";
|
|
11
10
|
import {WatchFileEventsApp} from "./watchFileEventsApp.js";
|
|
12
11
|
import {IWatcherLock} from "../componets/index/watcherLock.js";
|
|
13
12
|
import {IXindexConfig} from "../componets/config/xindexConfig.js";
|
|
14
13
|
import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
|
|
14
|
+
import {ISearchApp} from "./searchApp.js";
|
|
15
15
|
|
|
16
16
|
export type IMcpApp = () => Promise<void>;
|
|
17
17
|
|
|
18
18
|
export type IMcpWatch = {
|
|
19
19
|
watchFiles: IWatchFiles,
|
|
20
20
|
watchDir: string,
|
|
21
|
-
|
|
21
|
+
handleFileEvents: IHandleFileEvents,
|
|
22
22
|
watcherLock: IWatcherLock,
|
|
23
23
|
};
|
|
24
24
|
|
|
25
25
|
export function McpApp({
|
|
26
|
-
|
|
26
|
+
search, indexApp, getIndexStats, resetIndex, log, watch, config,
|
|
27
27
|
}: {
|
|
28
|
-
|
|
28
|
+
search: ISearchApp,
|
|
29
29
|
indexApp: IIndexApp,
|
|
30
30
|
getIndexStats: IGetIndexStats,
|
|
31
31
|
resetIndex: IResetIndex,
|
|
@@ -44,14 +44,14 @@ export function McpApp({
|
|
|
44
44
|
inputSchema: z.object({
|
|
45
45
|
query: z.string()
|
|
46
46
|
.describe("Natural language search query"),
|
|
47
|
-
limit: z.number().int().min(1).max(
|
|
48
|
-
.describe("Max results to return,
|
|
47
|
+
limit: z.number().int().min(1).max(50).default(7)
|
|
48
|
+
.describe("Max results to return, 7 by default, 50 max"),
|
|
49
49
|
}),
|
|
50
50
|
annotations: {readOnlyHint: true},
|
|
51
51
|
}, async ({query, limit}) => {
|
|
52
52
|
try {
|
|
53
53
|
const format = FormatSearchResults();
|
|
54
|
-
const results = await
|
|
54
|
+
const results = await search(query, limit);
|
|
55
55
|
const text = await format(query, results);
|
|
56
56
|
return {content: [{type: "text" as const, text}]};
|
|
57
57
|
} catch (e) {
|
package/apps/run.index.ts
CHANGED
|
@@ -7,9 +7,9 @@ import {AppId} from "../componets/appId.js";
|
|
|
7
7
|
const appId = AppId();
|
|
8
8
|
const cwd = process.cwd();
|
|
9
9
|
const log = BufferedLoggerToStdOut();
|
|
10
|
-
const {indexContent,
|
|
10
|
+
const {indexContent, getIndexStats, config} = await BuildComponents({log});
|
|
11
11
|
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
12
|
-
const indexApp = IndexApp({walkFiles, indexContent,
|
|
12
|
+
const indexApp = IndexApp({walkFiles, indexContent, log});
|
|
13
13
|
|
|
14
14
|
const inputs = process.argv.slice(2);
|
|
15
15
|
if (!inputs.length) inputs.push(".");
|
package/apps/run.mcp.ts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import {BuildComponents} from "../componets/buildComponents.js";
|
|
2
|
-
import {
|
|
2
|
+
import {HandleFileEvents} from "../componets/index/handleFileEvent.js";
|
|
3
3
|
import {BufferedLoggerToStdErr} from "../componets/logger.js";
|
|
4
4
|
import {WalkFiles} from "../componets/walkFiles.js";
|
|
5
5
|
import {WatchFiles} from "../componets/watchFiles.js";
|
|
6
6
|
import {WatcherLock} from "../componets/index/watcherLock.js";
|
|
7
7
|
import {IndexApp} from "./indexApp.js";
|
|
8
8
|
import {McpApp} from "./mcpApp.js";
|
|
9
|
+
import {SearchApp} from "./searchApp.js";
|
|
9
10
|
import {join} from "path";
|
|
10
11
|
import {AppId} from "../componets/appId.js";
|
|
11
12
|
|
|
@@ -17,7 +18,8 @@ const cwd = process.cwd();
|
|
|
17
18
|
const log = BufferedLoggerToStdErr();
|
|
18
19
|
const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex, config} = await BuildComponents({log});
|
|
19
20
|
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
20
|
-
const indexApp = IndexApp({walkFiles, indexContent,
|
|
21
|
+
const indexApp = IndexApp({walkFiles, indexContent, log});
|
|
22
|
+
const search = SearchApp({searchContentIndex});
|
|
21
23
|
|
|
22
24
|
const appId = AppId();
|
|
23
25
|
const watcherLock = WatcherLock({
|
|
@@ -29,7 +31,7 @@ const watcherLock = WatcherLock({
|
|
|
29
31
|
const watch = watchDisabled ? undefined : {
|
|
30
32
|
watchFiles: WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles}),
|
|
31
33
|
watchDir: watchDirArg ? watchDirArg.split("=")[1] : ".",
|
|
32
|
-
|
|
34
|
+
handleFileEvents: HandleFileEvents({indexContent, removeContent, log}),
|
|
33
35
|
watcherLock,
|
|
34
36
|
};
|
|
35
37
|
|
|
@@ -45,5 +47,5 @@ process.on("SIGINT", async () => {
|
|
|
45
47
|
});
|
|
46
48
|
|
|
47
49
|
log(`[${appId}] started`);
|
|
48
|
-
const mcpApp = McpApp({
|
|
50
|
+
const mcpApp = McpApp({search, indexApp, getIndexStats, resetIndex, log, watch, config});
|
|
49
51
|
await mcpApp();
|
package/apps/run.search.ts
CHANGED
|
@@ -4,7 +4,7 @@ import {SearchApp} from "./searchApp.js";
|
|
|
4
4
|
import {FormatSearchResults} from "../componets/index/formatSearchResults.js";
|
|
5
5
|
|
|
6
6
|
const log = BufferedLoggerToStdOut();
|
|
7
|
-
const {searchContentIndex
|
|
7
|
+
const {searchContentIndex} = await BuildComponents({log});
|
|
8
8
|
const search = SearchApp({searchContentIndex});
|
|
9
9
|
|
|
10
10
|
const query = process.argv.slice(2).join(" ");
|
package/apps/run.watch.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {BuildComponents} from "../componets/buildComponents.js";
|
|
2
|
-
import {
|
|
2
|
+
import {HandleFileEvents} from "../componets/index/handleFileEvent.js";
|
|
3
3
|
import {BufferedLoggerToStdOut} from "../componets/logger.js";
|
|
4
4
|
import {WalkFiles} from "../componets/walkFiles.js";
|
|
5
5
|
import {WatchFiles} from "../componets/watchFiles.js";
|
|
@@ -13,7 +13,7 @@ const log = BufferedLoggerToStdOut();
|
|
|
13
13
|
const {indexContent, removeContent, getIndexStats, config} = await BuildComponents({log});
|
|
14
14
|
const walkFiles = WalkFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
15
15
|
const watchFiles = WatchFiles({cwd, log, ignoreFiles: config.ignoreFiles});
|
|
16
|
-
const
|
|
16
|
+
const handleFileEvents = HandleFileEvents({indexContent, removeContent, log});
|
|
17
17
|
|
|
18
18
|
const appId = AppId();
|
|
19
19
|
const watcherLock = WatcherLock({
|
|
@@ -22,7 +22,7 @@ const watcherLock = WatcherLock({
|
|
|
22
22
|
log,
|
|
23
23
|
});
|
|
24
24
|
|
|
25
|
-
const app = WatchApp({walkFiles, watchFiles,
|
|
25
|
+
const app = WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock});
|
|
26
26
|
|
|
27
27
|
const inputs = process.argv.slice(2);
|
|
28
28
|
if (!inputs.length) inputs.push(".");
|
package/apps/searchApp.ts
CHANGED
|
@@ -2,8 +2,10 @@ import {ISearchIndex, IIndexRecord} from "../features/searchIndex.js";
|
|
|
2
2
|
|
|
3
3
|
export type ISearchApp = (query: string, limit?: number) => Promise<IIndexRecord[]>;
|
|
4
4
|
|
|
5
|
-
export function SearchApp({searchContentIndex}: {
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
export function SearchApp({searchContentIndex}: {
|
|
6
|
+
searchContentIndex: ISearchIndex;
|
|
7
|
+
}): ISearchApp {
|
|
8
|
+
return async function search(query, limit = 7) {
|
|
9
|
+
return searchContentIndex(query, limit);
|
|
8
10
|
}
|
|
9
11
|
}
|
package/apps/watchApp.ts
CHANGED
|
@@ -1,23 +1,28 @@
|
|
|
1
1
|
import {from} from "../packages/streamx/src/from.js";
|
|
2
|
+
import {batch} from "../packages/streamx/src/batch.js";
|
|
2
3
|
import {map} from "../packages/streamx/src/map.js";
|
|
3
4
|
import {tap} from "../packages/streamx/src/tap.js";
|
|
4
5
|
import {run} from "../packages/streamx/src/index.js";
|
|
6
|
+
import {readFile} from "fs/promises";
|
|
5
7
|
import {IWalkFiles} from "../componets/walkFiles.js";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
+
import {IWatchFiles} from "../componets/watchFiles.js";
|
|
9
|
+
import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
|
|
8
10
|
import {ILogger} from "../componets/logger.js";
|
|
9
11
|
import {IWatcherLock} from "../componets/index/watcherLock.js";
|
|
10
12
|
import {WatchFileEventsApp} from "./watchFileEventsApp.js";
|
|
13
|
+
import {IIndexContent} from "../features/indexContent.js";
|
|
14
|
+
import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE";
|
|
11
15
|
|
|
12
16
|
export type IWatchApp = {
|
|
13
17
|
run: (inputs: string[]) => Promise<void>;
|
|
14
18
|
stop: () => void;
|
|
15
19
|
};
|
|
16
20
|
|
|
17
|
-
export function WatchApp({walkFiles, watchFiles,
|
|
21
|
+
export function WatchApp({walkFiles, watchFiles, handleFileEvents, indexContent, log, watcherLock}: {
|
|
18
22
|
walkFiles: IWalkFiles,
|
|
19
23
|
watchFiles: IWatchFiles,
|
|
20
|
-
|
|
24
|
+
handleFileEvents: IHandleFileEvents,
|
|
25
|
+
indexContent: IIndexContent,
|
|
21
26
|
log: ILogger,
|
|
22
27
|
watcherLock: IWatcherLock,
|
|
23
28
|
}): IWatchApp {
|
|
@@ -27,9 +32,12 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLo
|
|
|
27
32
|
await run(
|
|
28
33
|
from(walkFiles(inputs))
|
|
29
34
|
.pipe(tap(id => log(`indexing: ${id}`)))
|
|
30
|
-
.pipe(
|
|
31
|
-
|
|
32
|
-
|
|
35
|
+
.pipe(batch(INDEXING_BATCH_SIZE))
|
|
36
|
+
.pipe(map<string[], string[]>(async (ids) => {
|
|
37
|
+
const texts = await Promise.all(ids.map(id => readFile(id, "utf8")));
|
|
38
|
+
const items = ids.map((id, i) => ({id, content: `${texts[i]}. ${id}`}));
|
|
39
|
+
await indexContent(items);
|
|
40
|
+
return ids;
|
|
33
41
|
}))
|
|
34
42
|
);
|
|
35
43
|
|
|
@@ -39,7 +47,7 @@ export function WatchApp({walkFiles, watchFiles, handleFileEvent, log, watcherLo
|
|
|
39
47
|
const startWatch = WatchFileEventsApp({
|
|
40
48
|
watchFiles,
|
|
41
49
|
watchDir: inputs[0] ?? ".",
|
|
42
|
-
|
|
50
|
+
handleFileEvents,
|
|
43
51
|
log,
|
|
44
52
|
watcherLock,
|
|
45
53
|
});
|
|
@@ -1,14 +1,20 @@
|
|
|
1
1
|
import {IWatchFiles} from "../componets/watchFiles.js";
|
|
2
|
-
import {
|
|
2
|
+
import {IHandleFileEvents} from "../componets/index/handleFileEvent.js";
|
|
3
3
|
import {ILogger} from "../componets/logger.js";
|
|
4
4
|
import {IWatcherLock} from "../componets/index/watcherLock.js";
|
|
5
|
+
import {from} from "../packages/streamx/src/from.js";
|
|
6
|
+
import {batchTimed} from "../packages/streamx/src/batchTimed.js";
|
|
7
|
+
import {map} from "../packages/streamx/src/map.js";
|
|
8
|
+
import {run} from "../packages/streamx/src/index.js";
|
|
9
|
+
import {INDEXING_BATCH_SIZE} from "../componets/config/INDEXING_BATCH_SIZE.js";
|
|
10
|
+
import {WATCH_FLUSH_MS} from "../componets/config/WATCH_FLUSH_MS.js";
|
|
5
11
|
|
|
6
12
|
export type IWatchFileEventsApp = () => void;
|
|
7
13
|
|
|
8
|
-
export function WatchFileEventsApp({watchFiles, watchDir,
|
|
14
|
+
export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvents, log, watcherLock}: {
|
|
9
15
|
watchFiles: IWatchFiles,
|
|
10
16
|
watchDir: string,
|
|
11
|
-
|
|
17
|
+
handleFileEvents: IHandleFileEvents,
|
|
12
18
|
log: ILogger,
|
|
13
19
|
watcherLock: IWatcherLock,
|
|
14
20
|
}): IWatchFileEventsApp {
|
|
@@ -21,7 +27,11 @@ export function WatchFileEventsApp({watchFiles, watchDir, handleFileEvent, log,
|
|
|
21
27
|
const watcher = watchFiles([watchDir]);
|
|
22
28
|
const events = (async () => {
|
|
23
29
|
try {
|
|
24
|
-
|
|
30
|
+
await run(
|
|
31
|
+
from(watcher.events)
|
|
32
|
+
.pipe(batchTimed(INDEXING_BATCH_SIZE, WATCH_FLUSH_MS))
|
|
33
|
+
.pipe(map(handleFileEvents))
|
|
34
|
+
);
|
|
25
35
|
} catch (e) {
|
|
26
36
|
log(`watch error: ${(e as any)?.message ?? e}`);
|
|
27
37
|
}
|
|
@@ -4,24 +4,40 @@ import {CleanUpKeywords} from "./keywords/cleanUpKeywords.js";
|
|
|
4
4
|
import {ContentIndexDriver} from "./index/contentIndexDriver.js";
|
|
5
5
|
import {LoadConfig} from "./config/loadConfig.js";
|
|
6
6
|
import {ILogger} from "./logger.js";
|
|
7
|
+
import {LocateInFile} from "./locate/locateInFile.js";
|
|
7
8
|
|
|
8
|
-
export async function BuildComponents({log}: {log: ILogger}) {
|
|
9
|
+
export async function BuildComponents({log}: { log: ILogger }) {
|
|
9
10
|
const loadConfig = LoadConfig({configPath: ".xindex.json", log});
|
|
10
11
|
const config = await loadConfig();
|
|
11
12
|
|
|
12
13
|
const embed = Embed({pooling: "mean", normalize: true});
|
|
13
14
|
const extractKeywords = ExtractKeywords();
|
|
14
|
-
const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength:
|
|
15
|
+
const cleanUpKeywords = CleanUpKeywords({maxNgrams: 2, minLength: 1, ignoreKeywords: config.ignoreKeywords});
|
|
16
|
+
|
|
17
|
+
const locateInFile = LocateInFile({
|
|
18
|
+
embed,
|
|
19
|
+
extractKeywords,
|
|
20
|
+
cleanUpKeywords,
|
|
21
|
+
windowLines: config.maxLines,
|
|
22
|
+
maxFileBytes: config.maxFileBytes,
|
|
23
|
+
});
|
|
15
24
|
|
|
16
25
|
const DEFAULT_INDEX_PATH = ".xindex";
|
|
17
26
|
|
|
27
|
+
const SCORE_THRESHOLD = 0.01;
|
|
28
|
+
|
|
18
29
|
const {indexContent, removeContent, getIndexStats, searchContentIndex, resetIndex}
|
|
19
30
|
= await ContentIndexDriver({
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
path: DEFAULT_INDEX_PATH,
|
|
32
|
+
embed,
|
|
33
|
+
extractKeywords,
|
|
34
|
+
cleanUpKeywords,
|
|
35
|
+
locateInFile,
|
|
36
|
+
scoreThreshold: SCORE_THRESHOLD
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
extractKeywords, cleanUpKeywords, indexContent, removeContent, getIndexStats,
|
|
41
|
+
searchContentIndex, resetIndex, locateInFile, config
|
|
42
|
+
};
|
|
27
43
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const DEFAULT_LOCATE_BATCH_SIZE = 3;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const INDEXING_BATCH_SIZE = 5;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const WATCH_FLUSH_MS = 500;
|
|
@@ -2,14 +2,19 @@ import {readFile} from "fs/promises";
|
|
|
2
2
|
import {IXindexConfig} from "./xindexConfig.js";
|
|
3
3
|
import {ILogger} from "../logger.js";
|
|
4
4
|
|
|
5
|
+
const DEFAULT_MAX_LINES = 30;
|
|
6
|
+
const DEFAULT_MAX_FILE_BYTES = 5_000_000;
|
|
7
|
+
|
|
5
8
|
const DEFAULTS: IXindexConfig = {
|
|
6
9
|
ignoreKeywords: [],
|
|
7
10
|
ignoreFiles: [],
|
|
11
|
+
maxLines: DEFAULT_MAX_LINES,
|
|
12
|
+
maxFileBytes: DEFAULT_MAX_FILE_BYTES,
|
|
8
13
|
};
|
|
9
14
|
|
|
10
15
|
export type ILoadConfig = () => Promise<IXindexConfig>;
|
|
11
16
|
|
|
12
|
-
export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}): ILoadConfig {
|
|
17
|
+
export function LoadConfig({configPath, log}: { configPath: string, log: ILogger }): ILoadConfig {
|
|
13
18
|
return async function loadConfig() {
|
|
14
19
|
let raw: string;
|
|
15
20
|
try {
|
|
@@ -29,9 +34,13 @@ export function LoadConfig({configPath, log}: {configPath: string, log: ILogger}
|
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
const toStrings = (v: unknown) => Array.isArray(v) ? v.filter((e): e is string => typeof e === "string") : [];
|
|
37
|
+
const toNum = (v: unknown, def: number): number => typeof v === "number" ? v : def;
|
|
38
|
+
|
|
32
39
|
const config: IXindexConfig = {
|
|
33
40
|
ignoreKeywords: toStrings(parsed.ignoreKeywords),
|
|
34
41
|
ignoreFiles: toStrings(parsed.ignoreFiles),
|
|
42
|
+
maxLines: toNum(parsed.maxLines, DEFAULT_MAX_LINES),
|
|
43
|
+
maxFileBytes: toNum(parsed.maxFileBytes, DEFAULT_MAX_FILE_BYTES),
|
|
35
44
|
};
|
|
36
45
|
|
|
37
46
|
for (const kw of config.ignoreKeywords) {
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import {readFile} from "fs/promises";
|
|
2
|
+
import {join, dirname, relative} from "path";
|
|
3
|
+
import ignore from "ignore";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Build an `ignore` instance that accumulates `.gitignore` rules from `cwd`
|
|
7
|
+
* down to the directory containing `relPath`.
|
|
8
|
+
*
|
|
9
|
+
* Mirrors the per-directory parent-chain logic in `walkFiles.ts`.
|
|
10
|
+
*
|
|
11
|
+
* @param cwd Absolute root (same as the `cwd` passed to WalkFiles/WatchFiles)
|
|
12
|
+
* @param relPath Path of the FS event, relative to `cwd`
|
|
13
|
+
* @param ignoreFiles Additional glob patterns from config (applied on top)
|
|
14
|
+
*/
|
|
15
|
+
export async function loadIgnoreChain(
|
|
16
|
+
cwd: string,
|
|
17
|
+
relPath: string,
|
|
18
|
+
ignoreFiles: string[] = [],
|
|
19
|
+
): Promise<ReturnType<typeof ignore>> {
|
|
20
|
+
// Segments from cwd down to (but not including) the file itself
|
|
21
|
+
const fileDir = dirname(relPath); // e.g. "pkg/sub" or "."
|
|
22
|
+
const segments = fileDir === "." ? [] : fileDir.split("/");
|
|
23
|
+
|
|
24
|
+
const ig = ignore();
|
|
25
|
+
ig.add(".*");
|
|
26
|
+
|
|
27
|
+
// Walk from root down: cwd, cwd/seg0, cwd/seg0/seg1, …
|
|
28
|
+
const dirs = [cwd, ...segments.map((_, i) => join(cwd, ...segments.slice(0, i + 1)))];
|
|
29
|
+
for (const dir of dirs) {
|
|
30
|
+
try {
|
|
31
|
+
const content = await readFile(join(dir, ".gitignore"), "utf8");
|
|
32
|
+
if (content) ig.add(content);
|
|
33
|
+
} catch {
|
|
34
|
+
// no .gitignore in this dir — fine
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
for (const pattern of ignoreFiles) ig.add(pattern);
|
|
39
|
+
return ig;
|
|
40
|
+
}
|
|
@@ -8,6 +8,7 @@ import {RemoveContent, IRemoveContent} from "../../features/removeContent.js";
|
|
|
8
8
|
import {ResetIndex, IResetIndex} from "../../features/resetIndex.js";
|
|
9
9
|
import {VectraIndex} from "./vectraIndex.js";
|
|
10
10
|
import {IndexApi} from "./indexApi.js";
|
|
11
|
+
import {ILocateInFile} from "../locate/locateInFile.js";
|
|
11
12
|
|
|
12
13
|
export type IContentIndexDriver = Readonly<{
|
|
13
14
|
getIndexStats: IGetIndexStats,
|
|
@@ -18,21 +19,22 @@ export type IContentIndexDriver = Readonly<{
|
|
|
18
19
|
flush: () => Promise<void>,
|
|
19
20
|
}>;
|
|
20
21
|
|
|
21
|
-
export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, scoreThreshold}: {
|
|
22
|
+
export async function ContentIndexDriver({path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold}: {
|
|
22
23
|
path: string,
|
|
23
24
|
embed: IEmbed,
|
|
24
25
|
extractKeywords: IExtractKeywords,
|
|
25
26
|
cleanUpKeywords: ICleanUpKeywords,
|
|
26
|
-
|
|
27
|
+
locateInFile: ILocateInFile,
|
|
28
|
+
scoreThreshold: number,
|
|
27
29
|
}): Promise<IContentIndexDriver> {
|
|
28
30
|
const index = await VectraIndex(path + "/semantic");
|
|
29
|
-
const indexApi = IndexApi({index
|
|
31
|
+
const indexApi = IndexApi({index});
|
|
30
32
|
|
|
31
33
|
return {
|
|
32
34
|
getIndexStats: GetIndexStats({index}),
|
|
33
|
-
indexContent: IndexContent({extractKeywords, cleanUpKeywords, indexApi}),
|
|
35
|
+
indexContent: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi}),
|
|
34
36
|
removeContent: RemoveContent({indexApi}),
|
|
35
|
-
searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, scoreThreshold}),
|
|
37
|
+
searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold}),
|
|
36
38
|
resetIndex: ResetIndex({indexApi}),
|
|
37
39
|
flush: () => indexApi.flush(),
|
|
38
40
|
};
|