xindex 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.xindex.json +2 -1
  2. package/CLAUDE.md +1 -0
  3. package/README.md +6 -0
  4. package/apps/indexApp.ts +9 -8
  5. package/apps/mcpApp.ts +6 -6
  6. package/apps/run.index.ts +2 -2
  7. package/apps/run.mcp.ts +6 -4
  8. package/apps/run.search.ts +1 -1
  9. package/apps/run.watch.ts +3 -3
  10. package/apps/searchApp.ts +4 -2
  11. package/apps/watchApp.ts +16 -8
  12. package/apps/watchFileEventsApp.ts +14 -4
  13. package/componets/buildComponents.ts +25 -9
  14. package/componets/config/DEFAULT_LOCATE_BATCH_SIZE.ts +1 -0
  15. package/componets/config/INDEXING_BATCH_SIZE.ts +1 -0
  16. package/componets/config/WATCH_FLUSH_MS.ts +1 -0
  17. package/componets/config/loadConfig.ts +10 -1
  18. package/componets/config/xindexConfig.ts +2 -0
  19. package/componets/ignore/loadIgnoreChain.ts +40 -0
  20. package/componets/index/contentIndexDriver.ts +7 -5
  21. package/componets/index/documentContentIndexDriver.ts +126 -0
  22. package/componets/index/documentIndex.ts +26 -0
  23. package/componets/index/formatSearchResults.ts +16 -2
  24. package/componets/index/handleFileEvent.ts +48 -3
  25. package/componets/index/indexApi.ts +39 -11
  26. package/componets/locate/bm25.ts +50 -0
  27. package/componets/locate/inMemoryIndex.ts +48 -0
  28. package/componets/locate/locateInFile.ts +148 -0
  29. package/componets/locate/windowsOf.ts +29 -0
  30. package/componets/watchFiles.ts +5 -16
  31. package/features/indexContent.ts +12 -5
  32. package/features/removeContent.ts +3 -3
  33. package/features/searchIndex.ts +22 -5
  34. package/package.json +15 -2
  35. package/packages/streamx/src/batchTimed.ts +1 -1
  36. package/packages/streamx/src/buffer.ts +1 -1
  37. package/packages/streamx/src/defer.ts +55 -0
  38. package/packages/streamx/src/interval.ts +1 -1
  39. package/packages/streamx/src/merge.ts +1 -1
  40. package/packages/streamx/src/nodeWritable.ts +1 -1
  41. package/packages/streamx/src/scale.ts +2 -2
  42. package/packages/streamx/src/writer.ts +1 -1
  43. package/.ai/research/.gitkeep +0 -0
  44. package/.ai/task/.gitkeep +0 -0
  45. package/.claude/settings.local.json +0 -73
  46. package/.claude/skills/make-hof/SKILL.md +0 -8
  47. package/.claude/skills/make-hof/playbook.md +0 -38
  48. package/.cursor/mcp.json +0 -8
  49. package/media/MEDIUM.md +0 -139
  50. package/media/SOCIAL.md +0 -102
@@ -9,8 +9,22 @@ export function FormatSearchResults(): IFormatSearchResults {
9
9
  const lines: string[] = [];
10
10
  for (let i = 0; i < results.length; i++) {
11
11
  const r = results[i];
12
- const kw = r.keywords ? `${r.keywords}` : "";
13
- lines.push(`${i + 1}. ${r.id}${kw}`);
12
+ const scoreStr = `[${r.score.toFixed(2)}]`;
13
+ if (r.snippet !== undefined && r.snippet !== "") {
14
+ const header =
15
+ typeof r.startLine === "number"
16
+ ? `${i + 1}. ${r.id}:L${r.startLine}-L${r.endLine ?? r.startLine} ${scoreStr}`
17
+ : `${i + 1}. ${r.id} ${scoreStr}`;
18
+ const snippetBlock = r.snippet
19
+ .split("\n")
20
+ .map((line) => ` ${line}`)
21
+ .join("\n");
22
+ lines.push(`${header}\n\n${snippetBlock}`);
23
+ } else if (r.keywords) {
24
+ lines.push(`${i + 1}. ${r.id} ${scoreStr} — ${r.keywords}`);
25
+ } else {
26
+ lines.push(`${i + 1}. ${r.id} ${scoreStr}`);
27
+ }
14
28
  }
15
29
 
16
30
  return `\n# Search: "${query}" — ${results.length} result(s)\n\n${lines.join(";\n\n")}\n`;
@@ -5,6 +5,7 @@ import {ILogger} from "../logger.js";
5
5
  import {FileEventType, IFileEvent} from "../watchFiles.js";
6
6
 
7
7
  export type IHandleFileEvent = (event: IFileEvent) => Promise<void>;
8
+ export type IHandleFileEvents = (events: IFileEvent[]) => Promise<void>;
8
9
 
9
10
  export function HandleFileEvent({indexContent, removeContent, log}: {
10
11
  indexContent: IIndexContent,
@@ -13,13 +14,57 @@ export function HandleFileEvent({indexContent, removeContent, log}: {
13
14
  }): IHandleFileEvent {
14
15
  return async function handleFileEvent(event) {
15
16
  if (event.type === FileEventType.index) {
16
- try { await removeContent(event.path); } catch (e) { log(`remove failed: ${event.path} — ${(e as any)?.message ?? e}`); }
17
17
  const text = await readFile(event.path, "utf8");
18
- await indexContent(event.path, `${text}. ${event.path}`);
18
+ await indexContent([{id: event.path, content: `${text}. ${event.path}`}]);
19
19
  log(`index: ${event.path}`);
20
20
  } else {
21
- try { await removeContent(event.path); } catch (e) { log(`remove failed: ${event.path} — ${(e as any)?.message ?? e}`); }
21
+ try { await removeContent([event.path]); } catch (e) { log(`remove failed: ${event.path} — ${(e as any)?.message ?? e}`); }
22
22
  log(`remove: ${event.path}`);
23
23
  }
24
24
  };
25
25
  }
26
+
27
+ export function HandleFileEvents({indexContent, removeContent, log}: {
28
+ indexContent: IIndexContent,
29
+ removeContent: IRemoveContent,
30
+ log: ILogger,
31
+ }): IHandleFileEvents {
32
+ return async function handleFileEvents(events) {
33
+ const indexEvents = events.filter((event) => event.type === FileEventType.index);
34
+ const removeEvents = events.filter((event) => event.type === FileEventType.remove);
35
+
36
+ if (indexEvents.length > 0) {
37
+ const indexItems = (await Promise.all(indexEvents.map(async (event) => {
38
+ log(`index: ${event.path}`);
39
+ try {
40
+ const text = await readFile(event.path, "utf8");
41
+ return {id: event.path, content: `${text}. ${event.path}`};
42
+ } catch (e) {
43
+ log(`index failed: ${event.path} — ${(e as any)?.message ?? e}`);
44
+ return undefined;
45
+ }
46
+ }))).filter((item): item is { id: string, content: string } => !!item);
47
+
48
+ if (indexItems.length > 0) {
49
+ try {
50
+ await indexContent(indexItems);
51
+ } catch (e) {
52
+ log(`index batch failed: ${(e as any)?.message ?? e}`);
53
+ }
54
+ }
55
+ }
56
+
57
+ if (removeEvents.length > 0) {
58
+ const removePaths = removeEvents.map((event) => {
59
+ log(`remove: ${event.path}`);
60
+ return event.path;
61
+ });
62
+
63
+ try {
64
+ await removeContent(removePaths);
65
+ } catch (e) {
66
+ log(`remove batch failed: ${(e as any)?.message ?? e}`);
67
+ }
68
+ }
69
+ };
70
+ }
@@ -2,7 +2,6 @@ import {LocalIndex} from "vectra";
2
2
  import {IType} from "../IType.js";
3
3
  import {ISerial, Serial} from "../../packages/fun/src/serial.js";
4
4
  import {caseNever} from "../../packages/fun/src/case-never.js";
5
- import {IEmbed} from "../llm/embed.js";
6
5
 
7
6
  export enum IndexCommandType {
8
7
  index = 'index',
@@ -11,26 +10,55 @@ export enum IndexCommandType {
11
10
  }
12
11
 
13
12
  export type IIndexCommand =
14
- | IType<{ type: IndexCommandType.index, id: string, content: string, keywords: string }>
15
- | IType<{ type: IndexCommandType.delete, id: string }>
13
+ | IType<{ type: IndexCommandType.index, items: Array<{id: string, vector: number[], keywords: string}> }>
14
+ | IType<{ type: IndexCommandType.delete, ids: string[] }>
16
15
  | IType<{ type: IndexCommandType.reset }>;
17
16
 
18
17
  export type IIndexApi = ISerial<IIndexCommand, void>;
19
18
 
20
- export function IndexApi({index, embed}: { index: LocalIndex, embed: IEmbed }): IIndexApi {
19
+ export function IndexApi({index}: { index: LocalIndex }): IIndexApi {
21
20
  return Serial<IIndexCommand, void>(async msg => {
22
21
  switch (msg.type) {
23
22
  case IndexCommandType.delete: {
24
- await index.deleteItem(msg.id);
23
+ await index.beginUpdate();
24
+ try {
25
+ for (const id of msg.ids) {
26
+ await index.deleteItem(id);
27
+ }
28
+ } finally {
29
+ await index.endUpdate();
30
+ }
25
31
  break;
26
32
  }
27
33
  case IndexCommandType.index: {
28
- const vector = await embed(msg.content);
29
- await index.upsertItem({
30
- id: msg.id,
31
- vector,
32
- metadata: {id: msg.id, keywords: msg.keywords},
33
- });
34
+ try {
35
+ await index.batchInsertItems(msg.items.map(item => ({
36
+ id: item.id,
37
+ vector: item.vector,
38
+ metadata: {id: item.id, keywords: item.keywords},
39
+ })));
40
+ } catch (error) {
41
+ const errorMessage = String((error as Error)?.message ?? error).toLowerCase();
42
+ const isDuplicateIdError =
43
+ errorMessage.includes("already exists")
44
+ || errorMessage.includes("duplicate");
45
+ if (!isDuplicateIdError) {
46
+ throw error;
47
+ }
48
+
49
+ await index.beginUpdate();
50
+ try {
51
+ for (const item of msg.items) {
52
+ await index.upsertItem({
53
+ id: item.id,
54
+ vector: item.vector,
55
+ metadata: {id: item.id, keywords: item.keywords},
56
+ });
57
+ }
58
+ } finally {
59
+ await index.endUpdate();
60
+ }
61
+ }
34
62
  break;
35
63
  }
36
64
  case IndexCommandType.reset: {
@@ -0,0 +1,50 @@
1
+ export type IBm25Doc = { id: string; tokens: string[] };
2
+
3
+ export type IBm25 = {
4
+ score(queryTokens: string[]): Array<{ id: string; score: number }>;
5
+ };
6
+
7
+ export function Bm25({docs, k1 = 1.5, b = 0.75}: {
8
+ docs: IBm25Doc[];
9
+ k1?: number;
10
+ b?: number;
11
+ }): IBm25 {
12
+ const N = docs.length;
13
+ const df = new Map<string, number>();
14
+ const tfs: Array<Map<string, number>> = [];
15
+ let totalLen = 0;
16
+
17
+ for (const d of docs) {
18
+ const tf = new Map<string, number>();
19
+ for (const t of d.tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
20
+ tfs.push(tf);
21
+ totalLen += d.tokens.length;
22
+ for (const t of tf.keys()) df.set(t, (df.get(t) ?? 0) + 1);
23
+ }
24
+ const avgdl = totalLen / Math.max(1, N);
25
+
26
+ return {
27
+ score(queryTokens) {
28
+ const out: Array<{ id: string; score: number }> = [];
29
+ const uniq = [...new Set(queryTokens)];
30
+ for (let i = 0; i < docs.length; i++) {
31
+ const tf = tfs[i];
32
+ const dl = docs[i].tokens.length;
33
+ let s = 0;
34
+ for (const t of uniq) {
35
+ const f = tf.get(t);
36
+ if (!f) continue;
37
+ const n = df.get(t)!;
38
+ const idf = Math.log(((N - n + 0.5) / (n + 0.5)) + 1);
39
+ s += idf * (f * (k1 + 1)) / (f + k1 * (1 - b + b * dl / avgdl));
40
+ }
41
+ if (s > 0) out.push({id: docs[i].id, score: s});
42
+ }
43
+ return out.sort((a, b) => b.score - a.score);
44
+ },
45
+ };
46
+ }
47
+
48
+ export function tokenizeForBm25(text: string): string[] {
49
+ return text.toLowerCase().split(/[^a-z0-9]+/).filter(t => t.length >= 2);
50
+ }
@@ -0,0 +1,48 @@
1
+ import {LocalIndex, VirtualFileStorage} from "vectra";
2
+
3
+ export type ISnippetMeta = {
4
+ fileId: string;
5
+ startLine: number;
6
+ endLine: number;
7
+ snippet: string;
8
+ };
9
+
10
+ export type IInMemoryIndex = {
11
+ upsertItem(id: string, vector: number[], meta: ISnippetMeta): Promise<void>;
12
+ query(vector: number[], text: string, limit: number): Promise<Array<{score: number; meta: ISnippetMeta}>>;
13
+ dispose(): Promise<void>;
14
+ };
15
+
16
+ export function InMemoryIndex(_opts: {dimensions: number} = {dimensions: 0}): IInMemoryIndex {
17
+ const storage = new VirtualFileStorage();
18
+ const index = new LocalIndex("mem://idx", undefined, storage);
19
+ let created = false;
20
+
21
+ async function ensureCreated() {
22
+ if (created) return;
23
+ if (!(await index.isIndexCreated())) {
24
+ await index.createIndex();
25
+ }
26
+ created = true;
27
+ }
28
+
29
+ return {
30
+ async upsertItem(id, vector, meta) {
31
+ await ensureCreated();
32
+ await index.upsertItem({id, vector, metadata: meta as unknown as Record<string, any>});
33
+ },
34
+
35
+ async query(vector, text, limit) {
36
+ await ensureCreated();
37
+ const results = await index.queryItems(vector, text, limit);
38
+ return results.map(r => ({
39
+ score: r.score,
40
+ meta: r.item.metadata as unknown as ISnippetMeta,
41
+ }));
42
+ },
43
+
44
+ async dispose() {
45
+ // No-op: VirtualFileStorage is GC'd with the instance
46
+ },
47
+ };
48
+ }
@@ -0,0 +1,148 @@
1
+ import {readFile, stat} from "fs/promises";
2
+ import {IEmbed} from "../llm/embed.js";
3
+ import {IExtractKeywords} from "../keywords/extractKeywords.js";
4
+ import {ICleanUpKeywords} from "../keywords/cleanUpKeywords.js";
5
+ import {IIndexRecord} from "../../features/searchIndex.js";
6
+ import {IInMemoryIndex, InMemoryIndex} from "./inMemoryIndex.js";
7
+ import {IWindow, windowsOf} from "./windowsOf.js";
8
+ import {Bm25, IBm25Doc, tokenizeForBm25} from "./bm25.js";
9
+ import {from} from "../../packages/streamx/src/from.js";
10
+ import {filter} from "../../packages/streamx/src/filter.js";
11
+ import {map} from "../../packages/streamx/src/map.js";
12
+ import {flatMap} from "../../packages/streamx/src/flatMap.js";
13
+ import {tap} from "../../packages/streamx/src/tap.js";
14
+ import {scaleSync} from "../../packages/streamx/src/scaleSync.js";
15
+ import {run} from "../../packages/streamx/src/index.js";
16
+ import {DEFAULT_LOCATE_BATCH_SIZE} from "../config/DEFAULT_LOCATE_BATCH_SIZE";
17
+
18
+ export type ILocateInFile = (
19
+ query: string,
20
+ queryVector: number[],
21
+ candidates: IIndexRecord[],
22
+ limit: number,
23
+ ) => Promise<IIndexRecord[]>;
24
+
25
+ export function LocateInFile({
26
+ embed,
27
+ extractKeywords,
28
+ cleanUpKeywords,
29
+ windowLines = 15,
30
+ maxFileBytes = 10_000_000,
31
+ embedConcurrency = DEFAULT_LOCATE_BATCH_SIZE,
32
+ }: {
33
+ embed: IEmbed;
34
+ extractKeywords: IExtractKeywords;
35
+ cleanUpKeywords: ICleanUpKeywords;
36
+ windowLines?: number;
37
+ maxSnippetsPerFile?: number;
38
+ maxFileBytes?: number;
39
+ embedConcurrency?: number;
40
+ }): ILocateInFile {
41
+ type IWindowWithKeywords = IWindow & { keywords: string };
42
+ type IWindowWithVector = IWindowWithKeywords & { vector: number[] };
43
+
44
+ return async function locateInFile(query, queryVector, candidates, limit) {
45
+ const memIndex: IInMemoryIndex = InMemoryIndex({dimensions: queryVector.length});
46
+
47
+ try {
48
+ const ids = candidates.map(c => c.id);
49
+ const bm25Docs: IBm25Doc[] = [];
50
+ const metaById = new Map<string, { fileId: string; startLine: number; endLine: number; snippet: string }>();
51
+
52
+ const SCALE_FILE_READS = embedConcurrency;
53
+ const windows = from<string>(ids)
54
+ .pipe(filter(async (id: string) => {
55
+ try {
56
+ const s = await stat(id);
57
+ if (!s.isFile()) return false;
58
+ if (s.size > maxFileBytes) return false;
59
+ return true;
60
+ } catch {
61
+ return false;
62
+ }
63
+ }))
64
+ .pipe(scaleSync(SCALE_FILE_READS, async (id: string) => {
65
+ const text = await readFile(id, "utf8");
66
+ return {id, text};
67
+ }))
68
+ .pipe(flatMap(({id, text}: { id: string, text: string }): IWindow[] => {
69
+ return windowsOf({text, id, windowLines});
70
+ }));
71
+
72
+ const withVectors = from<IWindow>(windows)
73
+ .pipe(map<IWindow, IWindowWithKeywords>(w => {
74
+ const kw = cleanUpKeywords(extractKeywords(w.snippet)).join(", ");
75
+ return {...w, keywords: kw || w.snippet.slice(0, 200)};
76
+ }))
77
+ .pipe(scaleSync<IWindowWithKeywords, IWindowWithVector>(embedConcurrency, async (w) => {
78
+ const vector = await embed(w.keywords);
79
+ return {...w, vector};
80
+ }))
81
+ .pipe(tap(async (w: IWindowWithVector) => {
82
+ const id = `${w.fileId}:${w.startLine}:${w.endLine}`;
83
+ await memIndex.upsertItem(
84
+ id,
85
+ w.vector,
86
+ {fileId: w.fileId, startLine: w.startLine, endLine: w.endLine, snippet: w.snippet},
87
+ );
88
+ bm25Docs.push({id, tokens: tokenizeForBm25(w.snippet)});
89
+ metaById.set(id, {
90
+ fileId: w.fileId,
91
+ startLine: w.startLine,
92
+ endLine: w.endLine,
93
+ snippet: w.snippet
94
+ });
95
+ }));
96
+
97
+ await run(withVectors);
98
+
99
+ const poolSize = Math.max(limit * 8, 40);
100
+ const vecHits = await memIndex.query(queryVector, query, poolSize);
101
+
102
+ const bm25 = Bm25({docs: bm25Docs});
103
+ const bm25Hits = bm25.score(tokenizeForBm25(query)).slice(0, poolSize);
104
+
105
+ const cosById = new Map<string, number>();
106
+ vecHits.forEach(h => {
107
+ const id = `${h.meta.fileId}:${h.meta.startLine}:${h.meta.endLine}`;
108
+ cosById.set(id, h.score);
109
+ });
110
+ const bmById = new Map<string, number>();
111
+ bm25Hits.forEach(h => bmById.set(h.id, h.score));
112
+
113
+ const cosMax = Math.max(...vecHits.map(h => h.score), 1e-9);
114
+ const bmMax = Math.max(...bm25Hits.map(h => h.score), 1e-9);
115
+
116
+ const allIds = new Set<string>([...cosById.keys(), ...bmById.keys()]);
117
+ const ranked = [...allIds]
118
+ .map(id => {
119
+ const cosNorm = (cosById.get(id) ?? 0) / cosMax;
120
+ const bmNorm = (bmById.get(id) ?? 0) / bmMax;
121
+ return {id, score: Math.max(cosNorm, bmNorm), meta: metaById.get(id)!};
122
+ })
123
+ .filter(r => r.meta)
124
+ .sort((a, b) => b.score - a.score);
125
+
126
+ const kept: typeof ranked = [];
127
+ for (const h of ranked) {
128
+ const overlaps = kept.some(k =>
129
+ k.meta.fileId === h.meta.fileId &&
130
+ h.meta.startLine <= k.meta.endLine &&
131
+ h.meta.endLine >= k.meta.startLine
132
+ );
133
+ if (!overlaps) kept.push(h);
134
+ if (kept.length >= limit) break;
135
+ }
136
+
137
+ return kept.map(h => ({
138
+ score: h.score,
139
+ id: h.meta.fileId,
140
+ startLine: h.meta.startLine,
141
+ endLine: h.meta.endLine,
142
+ snippet: h.meta.snippet,
143
+ }));
144
+ } finally {
145
+ await memIndex.dispose();
146
+ }
147
+ };
148
+ }
@@ -0,0 +1,29 @@
1
+ export type IWindow = {
2
+ fileId: string;
3
+ startLine: number;
4
+ endLine: number;
5
+ snippet: string;
6
+ };
7
+
8
+ export function windowsOf({text, id, windowLines}: {
9
+ text: string;
10
+ id: string;
11
+ windowLines: number;
12
+ }): IWindow[] {
13
+ const lines = text.split("\n");
14
+ const windows: IWindow[] = [];
15
+ const scales = [Math.max(1, Math.floor(windowLines / 2)), windowLines];
16
+ for (const size of scales) {
17
+ const step = Math.max(1, Math.floor(size / 2));
18
+ for (let i = 0; i < lines.length; i += step) {
19
+ const slice = lines.slice(i, i + size);
20
+ windows.push({
21
+ fileId: id,
22
+ startLine: i + 1,
23
+ endLine: Math.min(i + size, lines.length),
24
+ snippet: slice.join("\n"),
25
+ });
26
+ }
27
+ }
28
+ return windows;
29
+ }
@@ -1,8 +1,8 @@
1
- import {readFile, stat, watch} from "fs/promises";
1
+ import {stat, watch} from "fs/promises";
2
2
  import {join, relative} from "path";
3
- import ignore from "ignore";
4
3
  import {ILogger} from "./logger.js";
5
4
  import {IType} from "./IType.js";
5
+ import {loadIgnoreChain} from "./ignore/loadIgnoreChain.js";
6
6
 
7
7
  export enum FileEventType {
8
8
  index = 'index',
@@ -19,18 +19,6 @@ export type IWatchFiles = (inputs: string[]) => IWatchFilesResult;
19
19
 
20
20
  export function WatchFiles({cwd, log, ignoreFiles = []}: { cwd: string, log: ILogger, ignoreFiles?: string[] }): IWatchFiles {
21
21
 
22
- async function loadGitignore(dir: string): Promise<ReturnType<typeof ignore>> {
23
- const ig = ignore();
24
- ig.add(".*");
25
- try {
26
- const content = await readFile(join(dir, ".gitignore"), "utf8");
27
- ig.add(content);
28
- } catch {
29
- }
30
- for (const pattern of ignoreFiles) ig.add(pattern);
31
- return ig;
32
- }
33
-
34
22
  return function watchFiles(inputs) {
35
23
  const pending = new Map<string, IFileEvent>();
36
24
  let notify: (() => void) | null = null;
@@ -38,7 +26,6 @@ export function WatchFiles({cwd, log, ignoreFiles = []}: { cwd: string, log: ILo
38
26
  const abortControllers: AbortController[] = [];
39
27
 
40
28
  async function startWatching(dir: string) {
41
- const ig = await loadGitignore(dir);
42
29
  const ac = new AbortController();
43
30
  abortControllers.push(ac);
44
31
 
@@ -48,9 +35,11 @@ export function WatchFiles({cwd, log, ignoreFiles = []}: { cwd: string, log: ILo
48
35
  if (!event.filename) continue;
49
36
 
50
37
  const rel = relative(cwd, join(dir, event.filename));
51
- if (ig.ignores(rel)) continue;
52
38
  if (rel.endsWith("~")) continue;
53
39
 
40
+ const ig = await loadIgnoreChain(cwd, rel, ignoreFiles);
41
+ if (ig.ignores(rel)) continue;
42
+
54
43
  try {
55
44
  await stat(join(dir, event.filename));
56
45
  pending.set(rel, {type: FileEventType.index, path: rel});
@@ -1,16 +1,23 @@
1
1
  import {IndexCommandType, IIndexApi} from "../componets/index/indexApi.js";
2
2
  import {IExtractKeywords} from "../componets/keywords/extractKeywords.js";
3
3
  import {ICleanUpKeywords} from "../componets/keywords/cleanUpKeywords.js";
4
+ import {IEmbed} from "../componets/llm/embed.js";
4
5
 
5
- export type IIndexContent = (id: string, content: string) => Promise<void>;
6
+ export type IIndexContent = (items: Array<{id: string, content: string}>) => Promise<void>;
6
7
 
7
- export function IndexContent({extractKeywords, cleanUpKeywords, indexApi}: {
8
+ export function IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi}: {
8
9
  extractKeywords: IExtractKeywords,
9
10
  cleanUpKeywords: ICleanUpKeywords,
11
+ embed: IEmbed,
10
12
  indexApi: IIndexApi,
11
13
  }): IIndexContent {
12
- return async function indexContent(id, content) {
13
- const keywords = cleanUpKeywords(extractKeywords(content)).join(", ");
14
- await indexApi({type: IndexCommandType.index, id, content: keywords, keywords});
14
+ return async function indexContent(items) {
15
+ const indexItems = await Promise.all(items.map(async (item) => {
16
+ const keywords = cleanUpKeywords(extractKeywords(item.content)).join(", ");
17
+ const vector = await embed(keywords);
18
+ return {id: item.id, vector, keywords};
19
+ }));
20
+
21
+ await indexApi({type: IndexCommandType.index, items: indexItems});
15
22
  }
16
23
  }
@@ -1,9 +1,9 @@
1
1
  import {IndexCommandType, IIndexApi} from "../componets/index/indexApi.js";
2
2
 
3
- export type IRemoveContent = (id: string) => Promise<void>;
3
+ export type IRemoveContent = (ids: string[]) => Promise<void>;
4
4
 
5
5
  export function RemoveContent({indexApi}: {indexApi: IIndexApi}): IRemoveContent {
6
- return async function removeContent(id) {
7
- await indexApi({type: IndexCommandType.delete, id});
6
+ return async function removeContent(ids) {
7
+ await indexApi({type: IndexCommandType.delete, ids});
8
8
  }
9
9
  }
@@ -2,26 +2,37 @@ import {LocalIndex} from "vectra";
2
2
  import {IEmbed} from "../componets/llm/embed.js";
3
3
  import {IExtractKeywords} from "../componets/keywords/extractKeywords.js";
4
4
  import {ICleanUpKeywords} from "../componets/keywords/cleanUpKeywords.js";
5
+ import {ILocateInFile} from "../componets/locate/locateInFile.js";
5
6
 
6
- export type IIndexRecord = { score: number; id: string; keywords: string };
7
+ export type IIndexRecord = {
8
+ score: number;
9
+ id: string;
10
+ keywords?: string;
11
+ startPos?: number;
12
+ endPos?: number;
13
+ snippet?: string;
14
+ startLine?: number;
15
+ endLine?: number;
16
+ };
7
17
 
8
18
  export type ISearchIndex = (query: string, limit: number) => Promise<IIndexRecord[]>;
9
19
 
10
- export function SearchIndex({extractKeywords, cleanUpKeywords, embed, index, scoreThreshold = 0.05}: {
20
+ export function SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold = 0.05}: {
11
21
  extractKeywords: IExtractKeywords,
12
22
  cleanUpKeywords: ICleanUpKeywords,
13
23
  embed: IEmbed,
14
24
  index: LocalIndex,
15
- scoreThreshold?: number
25
+ locateInFile: ILocateInFile,
26
+ scoreThreshold: number
16
27
  }): ISearchIndex {
17
28
  return async function searchContentIndex(query, limit) {
18
29
  const keywords = cleanUpKeywords(extractKeywords(query));
19
30
  const searchText = keywords.length > 0 ? keywords.join(", ") : query;
20
31
  const vector = await embed(searchText);
21
32
 
22
- const results = await index.queryItems(vector, searchText, limit);
33
+ const results = await index.queryItems(vector, searchText, limit * 2);
23
34
 
24
- return results
35
+ const candidates = results
25
36
  .filter(r => r.score >= scoreThreshold)
26
37
  .sort((a, b) => b.score - a.score)
27
38
  .map(r => ({
@@ -29,5 +40,11 @@ export function SearchIndex({extractKeywords, cleanUpKeywords, embed, index, sco
29
40
  id: r.item.id,
30
41
  keywords: typeof r.item.metadata?.keywords === "string" ? r.item.metadata.keywords : "",
31
42
  }));
43
+
44
+ if (!locateInFile || candidates.length === 0) {
45
+ return candidates;
46
+ }
47
+
48
+ return locateInFile(query, vector, candidates, limit);
32
49
  }
33
50
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xindex",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "Local semantic code search — index codebase, search by meaning or keywords",
5
5
  "type": "module",
6
6
  "main": "xindex.ts",
@@ -16,9 +16,19 @@
16
16
  "search": "tsx apps/run.search.ts",
17
17
  "reset": "tsx apps/run.reset.ts",
18
18
  "mcp": "tsx apps/run.mcp.ts",
19
- "watch": "tsx apps/run.watch.ts"
19
+ "watch": "tsx apps/run.watch.ts",
20
+ "test.compilation": "npx -y tsc --ignoreConfig --noEmit --target ES2022 --module ESNext --moduleResolution bundler --esModuleInterop --skipLibCheck --strict false $(git ls-files '*.ts')"
20
21
  },
21
22
  "private": false,
23
+ "keywords": [
24
+ "semantic-search",
25
+ "code-search",
26
+ "mcp",
27
+ "claude-code",
28
+ "embeddings",
29
+ "local-first",
30
+ "developer-tools"
31
+ ],
22
32
  "dependencies": {
23
33
  "@huggingface/transformers": "^4.0.1",
24
34
  "@modelcontextprotocol/sdk": "^1.29.0",
@@ -28,5 +38,8 @@
28
38
  "tsx": "^4.21.0",
29
39
  "vectra": "^0.14.0",
30
40
  "zod": "^4.3.6"
41
+ },
42
+ "devDependencies": {
43
+ "typescript": "^6.0.3"
31
44
  }
32
45
  }
@@ -1,7 +1,7 @@
1
1
  import { StreamXMapper } from './index';
2
2
  import { IRead, read } from './reader';
3
3
  import { IWriter, Writer } from './writer';
4
- import { syncTick } from '@handy/fun/tick';
4
+ import { syncTick } from '../../fun/src/tick';
5
5
  import { clearTimeout } from 'timers';
6
6
 
7
7
  type Milliseconds = number;
@@ -1,7 +1,7 @@
1
1
  import { StreamX, StreamXMapper } from './index';
2
2
  import { IRead, read } from './reader';
3
3
  import { IWriter, Writer } from './writer';
4
- import { syncTick } from '@handy/fun/tick';
4
+ import { syncTick } from '../../fun/src/tick';
5
5
 
6
6
  export function buffer<Input>(size: number): StreamXMapper<Input, Input> {
7
7
  let outputBuffer: IWriter<Input>;