xindex 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +0 -4
- package/.xindex.json +2 -1
- package/apps/indexApp.ts +1 -1
- package/apps/mcpApp.ts +18 -4
- package/apps/run.mcp.ts +1 -1
- package/apps/run.search.ts +2 -2
- package/apps/searchApp.ts +18 -6
- package/apps/watchApp.ts +1 -1
- package/components/buildComponents.ts +2 -0
- package/components/config/loadConfig.ts +6 -0
- package/components/config/xindexConfig.ts +2 -0
- package/components/index/contentIndexDriver.ts +3 -2
- package/components/keywords/cleanUpKeywords.ts +23 -3
- package/components/locate/locateInFile.ts +61 -57
- package/components/locate/windowsOf.ts +3 -1
- package/features/searchIndex.ts +71 -9
- package/package.json +2 -2
- package/packages/fun/src/array-finder.ts +1 -1
- package/packages/fun/src/array-index.ts +1 -1
- package/packages/fun/src/asyncRequest.ts +1 -1
- package/packages/fun/src/concurrency.ts +5 -5
- package/packages/fun/src/counter.ts +1 -1
- package/packages/fun/src/flatten.ts +12 -6
- package/packages/fun/src/hash128.ts +2 -2
- package/packages/fun/src/hash256.ts +2 -2
- package/packages/fun/src/hub.ts +1 -1
- package/packages/fun/src/interval.ts +1 -1
- package/packages/fun/src/mailbox.ts +1 -1
- package/packages/fun/src/match-left-and-right-arrays.ts +1 -1
- package/packages/fun/src/memos.ts +1 -1
- package/packages/fun/src/pubsub.ts +2 -2
- package/packages/fun/src/tick.ts +1 -1
- package/packages/fun/src/time-behavior.ts +1 -1
- package/packages/fun/src/timedFallback.ts +1 -1
- package/packages/fun/src/value.ts +1 -1
- package/packages/fun/src/waitForCounter.ts +2 -2
- package/packages/streamx/src/batch.ts +2 -2
- package/packages/streamx/src/batchTimed.ts +5 -5
- package/packages/streamx/src/buffer.ts +4 -4
- package/packages/streamx/src/concatenate.ts +1 -1
- package/packages/streamx/src/filter.ts +2 -2
- package/packages/streamx/src/flat.ts +1 -1
- package/packages/streamx/src/flatMap.ts +3 -3
- package/packages/streamx/src/from.ts +1 -1
- package/packages/streamx/src/interval.ts +3 -3
- package/packages/streamx/src/loop.ts +2 -2
- package/packages/streamx/src/map.ts +2 -2
- package/packages/streamx/src/merge.ts +4 -4
- package/packages/streamx/src/nodeReadable.ts +1 -1
- package/packages/streamx/src/nodeTransform.ts +2 -2
- package/packages/streamx/src/nodeWritable.ts +3 -3
- package/packages/streamx/src/objectReader.ts +2 -2
- package/packages/streamx/src/reader.ts +1 -1
- package/packages/streamx/src/reduce.ts +2 -2
- package/packages/streamx/src/scale.ts +7 -7
- package/packages/streamx/src/scaleSync.ts +5 -5
- package/packages/streamx/src/sequence.ts +1 -1
- package/packages/streamx/src/tap.ts +3 -3
- package/packages/streamx/src/toArray.ts +1 -1
- package/packages/streamx/src/writer.ts +4 -4
- package/tsconfig.json +1 -1
- package/components/index/documentContentIndexDriver.ts +0 -127
package/.mcp.json
CHANGED
package/.xindex.json
CHANGED
package/apps/indexApp.ts
CHANGED
|
@@ -6,7 +6,7 @@ import {run} from "../packages/streamx/src/index.js";
|
|
|
6
6
|
import {IWalkFiles} from "../components/walkFiles.js";
|
|
7
7
|
import {IIndexContent} from "../features/indexContent.js";
|
|
8
8
|
import {ILogger} from "../components/logger.js";
|
|
9
|
-
import {INDEXING_BATCH_SIZE} from "../components/config/INDEXING_BATCH_SIZE";
|
|
9
|
+
import {INDEXING_BATCH_SIZE} from "../components/config/INDEXING_BATCH_SIZE.js";
|
|
10
10
|
import {SafeIndexBatch} from "../components/io/safeIndexBatch.js";
|
|
11
11
|
|
|
12
12
|
export type IIndexApp = (inputs: string[]) => Promise<void>;
|
package/apps/mcpApp.ts
CHANGED
|
@@ -50,14 +50,28 @@ export function McpApp({
|
|
|
50
50
|
inputSchema: z.object({
|
|
51
51
|
query: z.string()
|
|
52
52
|
.describe("Natural language search query"),
|
|
53
|
-
limit: z.number().int().min(1).max(50).default(
|
|
54
|
-
.describe(
|
|
53
|
+
limit: z.number().int().min(1).max(50).default(config.searchDefaultLimit)
|
|
54
|
+
.describe(`Max results to return default ${config.searchDefaultLimit}, max 50)`),
|
|
55
|
+
windowLines: z.number().int().min(1).default(config.maxLines).optional()
|
|
56
|
+
.describe("Optional line-window size for per-file snippet locating"),
|
|
57
|
+
includePaths: z.array(z.string()).optional()
|
|
58
|
+
.describe("Optional gitignore-style include patterns (e.g. 'src/**', '**/*.ts', '!src/vendor/**')"),
|
|
59
|
+
excludePaths: z.array(z.string()).optional()
|
|
60
|
+
.describe("Optional gitignore-style exclude patterns (e.g. 'node_modules/**', '**/*.test.ts')"),
|
|
61
|
+
scoreThreshold: z.number().min(0).max(1).optional()
|
|
62
|
+
.describe("Optional minimum semantic score threshold (0..1)"),
|
|
55
63
|
}),
|
|
56
64
|
annotations: {readOnlyHint: true},
|
|
57
|
-
}, async ({query, limit}) => {
|
|
65
|
+
}, async ({query, limit, scoreThreshold, windowLines, includePaths, excludePaths}) => {
|
|
58
66
|
try {
|
|
59
67
|
const format = FormatSearchResults();
|
|
60
|
-
const results = await search(query,
|
|
68
|
+
const results = await search(query, {
|
|
69
|
+
limit,
|
|
70
|
+
scoreThreshold,
|
|
71
|
+
windowLines,
|
|
72
|
+
includePaths,
|
|
73
|
+
excludePaths,
|
|
74
|
+
});
|
|
61
75
|
const text = await format(query, results);
|
|
62
76
|
return {content: [{type: "text" as const, text}]};
|
|
63
77
|
} catch (e) {
|
package/apps/run.mcp.ts
CHANGED
|
@@ -29,7 +29,7 @@ const indexApp = async (inputs: string[]) => {
|
|
|
29
29
|
await rawIndexApp(inputs);
|
|
30
30
|
await flush();
|
|
31
31
|
};
|
|
32
|
-
const search = SearchApp({searchContentIndex});
|
|
32
|
+
const search = SearchApp({searchIndex: searchContentIndex, searchDefaultLimit: config.searchDefaultLimit});
|
|
33
33
|
|
|
34
34
|
const appId = AppId();
|
|
35
35
|
const watcherLock = WatcherLock({
|
package/apps/run.search.ts
CHANGED
|
@@ -4,8 +4,8 @@ import {SearchApp} from "./searchApp.js";
|
|
|
4
4
|
import {FormatSearchResults} from "../components/index/formatSearchResults.js";
|
|
5
5
|
|
|
6
6
|
const log = BufferedLoggerToStdOut();
|
|
7
|
-
const {searchContentIndex} = await BuildComponents({log});
|
|
8
|
-
const search = SearchApp({searchContentIndex});
|
|
7
|
+
const {searchContentIndex, config} = await BuildComponents({log});
|
|
8
|
+
const search = SearchApp({searchIndex: searchContentIndex, searchDefaultLimit: config.searchDefaultLimit});
|
|
9
9
|
|
|
10
10
|
const query = process.argv.slice(2).join(" ");
|
|
11
11
|
if (!query) {
|
package/apps/searchApp.ts
CHANGED
|
@@ -1,11 +1,23 @@
|
|
|
1
|
-
import {ISearchIndex,
|
|
1
|
+
import {IIndexRecord, ISearchIndex, ISearchIndexOptions} from "../features/searchIndex.js";
|
|
2
2
|
|
|
3
|
-
export type ISearchApp = (query: string,
|
|
3
|
+
export type ISearchApp = (query: string, options?: Partial<ISearchIndexOptions>) => Promise<IIndexRecord[]>;
|
|
4
4
|
|
|
5
|
-
export function SearchApp({
|
|
6
|
-
|
|
5
|
+
export function SearchApp({searchIndex, searchDefaultLimit = 7}: {
|
|
6
|
+
searchIndex: ISearchIndex;
|
|
7
|
+
searchDefaultLimit?: number;
|
|
7
8
|
}): ISearchApp {
|
|
8
|
-
return async function search(query,
|
|
9
|
-
|
|
9
|
+
return async function search(query, options = {}) {
|
|
10
|
+
const normalizedOptions: ISearchIndexOptions = {
|
|
11
|
+
limit: options.limit ?? searchDefaultLimit,
|
|
12
|
+
prefetchMultiplier: options.prefetchMultiplier,
|
|
13
|
+
scoreThreshold: options.scoreThreshold,
|
|
14
|
+
locateInFiles: options.locateInFiles,
|
|
15
|
+
windowLines: options.windowLines,
|
|
16
|
+
useKeywordsSearch: options.useKeywordsSearch,
|
|
17
|
+
includePaths: options.includePaths ?? [],
|
|
18
|
+
excludePaths: options.excludePaths ?? [],
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
return searchIndex({query, options: normalizedOptions});
|
|
10
22
|
}
|
|
11
23
|
}
|
package/apps/watchApp.ts
CHANGED
|
@@ -10,7 +10,7 @@ import {ILogger} from "../components/logger.js";
|
|
|
10
10
|
import {IWatcherLock} from "../components/index/watcherLock.js";
|
|
11
11
|
import {WatchFileEventsApp} from "./watchFileEventsApp.js";
|
|
12
12
|
import {IIndexContent} from "../features/indexContent.js";
|
|
13
|
-
import {INDEXING_BATCH_SIZE} from "../components/config/INDEXING_BATCH_SIZE";
|
|
13
|
+
import {INDEXING_BATCH_SIZE} from "../components/config/INDEXING_BATCH_SIZE.js";
|
|
14
14
|
import {SafeIndexBatch} from "../components/io/safeIndexBatch.js";
|
|
15
15
|
|
|
16
16
|
export type IWatchApp = {
|
|
@@ -23,6 +23,7 @@ export async function BuildComponents({log, watchCoalesceMs = 0, indexingCoalesc
|
|
|
23
23
|
extractKeywords,
|
|
24
24
|
cleanUpKeywords,
|
|
25
25
|
windowLines: config.maxLines,
|
|
26
|
+
embedConcurrency: config.searchConcurrency,
|
|
26
27
|
maxFileBytes: config.maxFileBytes,
|
|
27
28
|
});
|
|
28
29
|
|
|
@@ -38,6 +39,7 @@ export async function BuildComponents({log, watchCoalesceMs = 0, indexingCoalesc
|
|
|
38
39
|
cleanUpKeywords,
|
|
39
40
|
locateInFile,
|
|
40
41
|
scoreThreshold: SCORE_THRESHOLD,
|
|
42
|
+
searchConcurrency: config.searchConcurrency,
|
|
41
43
|
log,
|
|
42
44
|
indexingWatchCoalesceMs: watchCoalesceMs,
|
|
43
45
|
indexingCoalesceMs,
|
|
@@ -3,6 +3,8 @@ import {IXindexConfig} from "./xindexConfig.js";
|
|
|
3
3
|
import {ILogger} from "../logger.js";
|
|
4
4
|
|
|
5
5
|
const DEFAULT_MAX_LINES = 30;
|
|
6
|
+
const DEFAULT_SEARCH_DEFAULT_LIMIT = 7;
|
|
7
|
+
const DEFAULT_SEARCH_CONCURRENCY = 4;
|
|
6
8
|
const DEFAULT_MAX_FILE_BYTES = 50_000;
|
|
7
9
|
const DEFAULT_FOLLOW_SYMLINKS = false;
|
|
8
10
|
const DEFAULT_IGNORE_FILES = ['.xindex', 'node_modules'];
|
|
@@ -11,6 +13,8 @@ const DEFAULTS: IXindexConfig = {
|
|
|
11
13
|
ignoreKeywords: [],
|
|
12
14
|
ignoreFiles: DEFAULT_IGNORE_FILES,
|
|
13
15
|
maxLines: DEFAULT_MAX_LINES,
|
|
16
|
+
searchDefaultLimit: DEFAULT_SEARCH_DEFAULT_LIMIT,
|
|
17
|
+
searchConcurrency: DEFAULT_SEARCH_CONCURRENCY,
|
|
14
18
|
maxFileBytes: DEFAULT_MAX_FILE_BYTES,
|
|
15
19
|
followSymlinks: DEFAULT_FOLLOW_SYMLINKS,
|
|
16
20
|
};
|
|
@@ -45,6 +49,8 @@ export function LoadConfig({configPath, log}: { configPath: string, log: ILogger
|
|
|
45
49
|
ignoreKeywords: toStrings(parsed.ignoreKeywords),
|
|
46
50
|
ignoreFiles: toStrings(parsed.ignoreFiles),
|
|
47
51
|
maxLines: toNum(parsed.maxLines, DEFAULT_MAX_LINES),
|
|
52
|
+
searchDefaultLimit: Math.max(1, toNum(parsed.searchDefaultLimit, DEFAULT_SEARCH_DEFAULT_LIMIT)),
|
|
53
|
+
searchConcurrency: Math.max(1, toNum(parsed.searchConcurrency, DEFAULT_SEARCH_CONCURRENCY)),
|
|
48
54
|
maxFileBytes: toNum(parsed.maxFileBytes, DEFAULT_MAX_FILE_BYTES),
|
|
49
55
|
followSymlinks: typeof parsed.followSymlinks === "boolean" ? parsed.followSymlinks : DEFAULT_FOLLOW_SYMLINKS,
|
|
50
56
|
};
|
|
@@ -23,7 +23,7 @@ export type IContentIndexDriver = Readonly<{
|
|
|
23
23
|
}>;
|
|
24
24
|
|
|
25
25
|
export async function ContentIndexDriver({
|
|
26
|
-
path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold, log,
|
|
26
|
+
path, embed, extractKeywords, cleanUpKeywords, locateInFile, scoreThreshold, searchConcurrency, log,
|
|
27
27
|
indexingWatchCoalesceMs = 0, indexingCoalesceMs = 0,
|
|
28
28
|
}: {
|
|
29
29
|
path: string,
|
|
@@ -32,6 +32,7 @@ export async function ContentIndexDriver({
|
|
|
32
32
|
cleanUpKeywords: ICleanUpKeywords,
|
|
33
33
|
locateInFile: ILocateInFile,
|
|
34
34
|
scoreThreshold: number,
|
|
35
|
+
searchConcurrency: number,
|
|
35
36
|
log: ILogger,
|
|
36
37
|
indexingWatchCoalesceMs?: number,
|
|
37
38
|
indexingCoalesceMs?: number,
|
|
@@ -62,7 +63,7 @@ export async function ContentIndexDriver({
|
|
|
62
63
|
indexContentWatch: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi: watchCoalesce, log}),
|
|
63
64
|
indexContentBatch: IndexContent({extractKeywords, cleanUpKeywords, embed, indexApi: batchCoalesce, log}),
|
|
64
65
|
removeContent: RemoveContent({indexApi: drainingApi}),
|
|
65
|
-
searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold}),
|
|
66
|
+
searchContentIndex: SearchIndex({extractKeywords, cleanUpKeywords, embed, index, locateInFile, scoreThreshold, searchConcurrency}),
|
|
66
67
|
resetIndex: ResetIndex({indexApi: drainingApi}),
|
|
67
68
|
flush: flushAll,
|
|
68
69
|
};
|
|
@@ -13,7 +13,16 @@ export function CleanUpKeywords({maxNgrams, minLength, ignoreKeywords = []}: {
|
|
|
13
13
|
const ignoreSet = new Set(ignoreKeywords.map(normalize));
|
|
14
14
|
|
|
15
15
|
return function cleanUpKeywords(keywords) {
|
|
16
|
-
const
|
|
16
|
+
const joined = keywords.join(" ");
|
|
17
|
+
const expanded = joined + " " + splitIdentifiers(joined);
|
|
18
|
+
const tokens = expanded.replace(/[\W_]+/gm, " ").trim().split(/\s+/);
|
|
19
|
+
const seenTokens = new Set<string>();
|
|
20
|
+
const cleaned = tokens.filter(t => {
|
|
21
|
+
const k = t.toLowerCase();
|
|
22
|
+
if (!k || seenTokens.has(k)) return false;
|
|
23
|
+
seenTokens.add(k);
|
|
24
|
+
return true;
|
|
25
|
+
}).join(" ");
|
|
17
26
|
|
|
18
27
|
const extracted: string[] = keyword_extractor.extract(cleaned, {
|
|
19
28
|
language: "english",
|
|
@@ -25,14 +34,25 @@ export function CleanUpKeywords({maxNgrams, minLength, ignoreKeywords = []}: {
|
|
|
25
34
|
|
|
26
35
|
const seen = new Set<string>();
|
|
27
36
|
|
|
28
|
-
|
|
37
|
+
const output = extracted
|
|
29
38
|
.map(normalize)
|
|
30
39
|
.filter((kw: string) => {
|
|
31
|
-
if (kw.length <= minLength || !/[a-z]/i.test(kw)) return false;
|
|
40
|
+
// if (kw.length <= minLength || !/[a-z]/i.test(kw)) return false;
|
|
41
|
+
if (kw.length <= minLength) return false;
|
|
32
42
|
if (ignoreSet.has(kw)) return false;
|
|
33
43
|
if (seen.has(kw)) return false;
|
|
34
44
|
seen.add(kw);
|
|
35
45
|
return true;
|
|
36
46
|
});
|
|
47
|
+
|
|
48
|
+
// console.log(`CleanUpKeywords: input=${JSON.stringify(keywords, null, 2)} output=${JSON.stringify(output, null, 2)}`);
|
|
49
|
+
return output;
|
|
37
50
|
}
|
|
38
51
|
}
|
|
52
|
+
|
|
53
|
+
function splitIdentifiers(text: string): string {
|
|
54
|
+
return text
|
|
55
|
+
.replace(/[_\-.]+/g, " ")
|
|
56
|
+
.replace(/([a-z0-9])([A-Z])/g, "$1 $2")
|
|
57
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2");
|
|
58
|
+
}
|
|
@@ -6,20 +6,20 @@ import {IIndexRecord} from "../../features/searchIndex.js";
|
|
|
6
6
|
import {IInMemoryIndex, InMemoryIndex} from "./inMemoryIndex.js";
|
|
7
7
|
import {IWindow, windowsOf} from "./windowsOf.js";
|
|
8
8
|
import {Bm25, IBm25Doc, tokenizeForBm25} from "./bm25.js";
|
|
9
|
+
import {DEFAULT_LOCATE_BATCH_SIZE} from "../config/DEFAULT_LOCATE_BATCH_SIZE.js";
|
|
9
10
|
import {from} from "../../packages/streamx/src/from.js";
|
|
10
11
|
import {filter} from "../../packages/streamx/src/filter.js";
|
|
11
12
|
import {map} from "../../packages/streamx/src/map.js";
|
|
12
13
|
import {flatMap} from "../../packages/streamx/src/flatMap.js";
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {DEFAULT_LOCATE_BATCH_SIZE} from "../config/DEFAULT_LOCATE_BATCH_SIZE";
|
|
16
|
-
import {scaleSync} from "../../packages/streamx/src/scaleSync";
|
|
14
|
+
import {scaleSync} from "../../packages/streamx/src/scaleSync.js";
|
|
15
|
+
import {toArray} from "../../packages/streamx/src/toArray.js";
|
|
17
16
|
|
|
18
17
|
export type ILocateInFile = (
|
|
19
18
|
query: string,
|
|
20
19
|
queryVector: number[],
|
|
21
20
|
candidates: IIndexRecord[],
|
|
22
21
|
limit: number,
|
|
22
|
+
windowLinesOverride?: number,
|
|
23
23
|
) => Promise<IIndexRecord[]>;
|
|
24
24
|
|
|
25
25
|
export function LocateInFile({
|
|
@@ -41,67 +41,70 @@ export function LocateInFile({
|
|
|
41
41
|
type IWindowWithKeywords = IWindow & { keywords: string };
|
|
42
42
|
type IWindowWithVector = IWindowWithKeywords & { vector: number[] };
|
|
43
43
|
|
|
44
|
-
return async function locateInFile(query, queryVector, candidates, limit) {
|
|
44
|
+
return async function locateInFile(query, queryVector, candidates, limit, windowLinesOverride) {
|
|
45
|
+
const effectiveWindowLines = windowLinesOverride ?? windowLines;
|
|
46
|
+
const effectiveConcurrency = Math.max(1, embedConcurrency);
|
|
45
47
|
const memIndex: IInMemoryIndex = InMemoryIndex({dimensions: queryVector.length});
|
|
46
48
|
|
|
47
49
|
try {
|
|
48
|
-
const ids =
|
|
50
|
+
const ids: string[] = await toArray(
|
|
51
|
+
from<IIndexRecord>(candidates)
|
|
52
|
+
.pipe(scaleSync<IIndexRecord, string | null>(effectiveConcurrency, async (c) => {
|
|
53
|
+
try {
|
|
54
|
+
const s = await stat(c.id);
|
|
55
|
+
if (!s.isFile()) return null;
|
|
56
|
+
if (s.size > maxFileBytes) return null;
|
|
57
|
+
return c.id;
|
|
58
|
+
} catch {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
}))
|
|
62
|
+
.pipe(filter((id: string | null) => id !== null))
|
|
63
|
+
.pipe(map<string | null, string>((id) => id as string)),
|
|
64
|
+
);
|
|
49
65
|
const bm25Docs: IBm25Doc[] = [];
|
|
50
66
|
const metaById = new Map<string, { fileId: string; startLine: number; endLine: number; snippet: string }>();
|
|
51
67
|
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
const
|
|
67
|
-
return {
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
return windowsOf({text: r!.text, id: r!.id, windowLines});
|
|
75
|
-
}));
|
|
68
|
+
const withVectors: IWindowWithVector[] = await toArray(
|
|
69
|
+
from<string>(ids)
|
|
70
|
+
.pipe(scaleSync<string, {id: string, text: string} | null>(effectiveConcurrency, async (id) => {
|
|
71
|
+
try {
|
|
72
|
+
const text = await readFile(id, "utf8");
|
|
73
|
+
return {id, text};
|
|
74
|
+
} catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}))
|
|
78
|
+
.pipe(filter((r: {id: string, text: string} | null) => r !== null))
|
|
79
|
+
.pipe(flatMap((r: {id: string, text: string} | null): IWindow[] =>
|
|
80
|
+
windowsOf({text: r!.text, id: r!.id, windowLines: effectiveWindowLines})))
|
|
81
|
+
.pipe(map<IWindow, IWindowWithKeywords>((w: IWindow) => {
|
|
82
|
+
const kw = cleanUpKeywords(extractKeywords(w.snippet)).join(", ");
|
|
83
|
+
return {...w, keywords: kw || w.snippet.slice(0, 200)};
|
|
84
|
+
}))
|
|
85
|
+
.pipe(scaleSync<IWindowWithKeywords, IWindowWithVector>(effectiveConcurrency, async (w) => {
|
|
86
|
+
const vector = await embed(w.keywords);
|
|
87
|
+
return {...w, vector};
|
|
88
|
+
})),
|
|
89
|
+
);
|
|
76
90
|
|
|
77
|
-
const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
);
|
|
93
|
-
bm25Docs.push({id, tokens: tokenizeForBm25(w.snippet)});
|
|
94
|
-
metaById.set(id, {
|
|
95
|
-
fileId: w.fileId,
|
|
96
|
-
startLine: w.startLine,
|
|
97
|
-
endLine: w.endLine,
|
|
98
|
-
snippet: w.snippet
|
|
99
|
-
});
|
|
100
|
-
}));
|
|
101
|
-
|
|
102
|
-
await run(withVectors);
|
|
91
|
+
for (const w of withVectors) {
|
|
92
|
+
const id = `${w.fileId}:${w.startLine}:${w.endLine}`;
|
|
93
|
+
await memIndex.upsertItem(
|
|
94
|
+
id,
|
|
95
|
+
w.vector,
|
|
96
|
+
{fileId: w.fileId, startLine: w.startLine, endLine: w.endLine, snippet: w.snippet},
|
|
97
|
+
);
|
|
98
|
+
bm25Docs.push({id, tokens: tokenizeForBm25(w.snippet)});
|
|
99
|
+
metaById.set(id, {
|
|
100
|
+
fileId: w.fileId,
|
|
101
|
+
startLine: w.startLine,
|
|
102
|
+
endLine: w.endLine,
|
|
103
|
+
snippet: w.snippet,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
103
106
|
|
|
104
|
-
const poolSize = Math.
|
|
107
|
+
const poolSize = Math.min(limit * 10, 100);
|
|
105
108
|
const vecHits = await memIndex.query(queryVector, query, poolSize);
|
|
106
109
|
|
|
107
110
|
const bm25 = Bm25({docs: bm25Docs});
|
|
@@ -151,3 +154,4 @@ export function LocateInFile({
|
|
|
151
154
|
}
|
|
152
155
|
};
|
|
153
156
|
}
|
|
157
|
+
|
|
@@ -12,7 +12,9 @@ export function windowsOf({text, id, windowLines}: {
|
|
|
12
12
|
}): IWindow[] {
|
|
13
13
|
const lines = text.split("\n");
|
|
14
14
|
const windows: IWindow[] = [];
|
|
15
|
-
const
|
|
15
|
+
const a12 = Math.max(1, Math.floor(windowLines / 2));
|
|
16
|
+
// const a13 = Math.max(1, Math.floor(windowLines / 3));
|
|
17
|
+
const scales = [a12, windowLines];
|
|
16
18
|
for (const size of scales) {
|
|
17
19
|
const step = Math.max(1, Math.floor(size / 2));
|
|
18
20
|
for (let i = 0; i < lines.length; i += step) {
|
package/features/searchIndex.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import {LocalIndex} from "vectra";
|
|
2
|
+
import ignore from "ignore";
|
|
2
3
|
import {IEmbed} from "../components/llm/embed.js";
|
|
3
4
|
import {IExtractKeywords} from "../components/keywords/extractKeywords.js";
|
|
4
5
|
import {ICleanUpKeywords} from "../components/keywords/cleanUpKeywords.js";
|
|
5
6
|
import {ILocateInFile} from "../components/locate/locateInFile.js";
|
|
7
|
+
import {Concurrency} from "../packages/fun/src/concurrency.js";
|
|
6
8
|
|
|
7
9
|
export type IIndexRecord = {
|
|
8
10
|
score: number;
|
|
@@ -15,25 +17,66 @@ export type IIndexRecord = {
|
|
|
15
17
|
endLine?: number;
|
|
16
18
|
};
|
|
17
19
|
|
|
18
|
-
export type
|
|
20
|
+
export type ISearchIndexOptions = {
|
|
21
|
+
limit: number;
|
|
22
|
+
prefetchMultiplier?: number;
|
|
23
|
+
scoreThreshold?: number;
|
|
24
|
+
locateInFiles?: boolean;
|
|
25
|
+
windowLines?: number;
|
|
26
|
+
useKeywordsSearch?: boolean;
|
|
27
|
+
includePaths?: string[];
|
|
28
|
+
excludePaths?: string[];
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export type ISearchIndexQuery = Readonly<{
|
|
32
|
+
query: string;
|
|
33
|
+
options: ISearchIndexOptions;
|
|
34
|
+
}>;
|
|
35
|
+
export type ISearchIndex = (query: ISearchIndexQuery) => Promise<IIndexRecord[]>;
|
|
19
36
|
|
|
20
|
-
export function SearchIndex({
|
|
37
|
+
export function SearchIndex({
|
|
38
|
+
extractKeywords,
|
|
39
|
+
cleanUpKeywords,
|
|
40
|
+
embed,
|
|
41
|
+
index,
|
|
42
|
+
locateInFile,
|
|
43
|
+
searchConcurrency = 3,
|
|
44
|
+
scoreThreshold = 0.05
|
|
45
|
+
}: {
|
|
21
46
|
extractKeywords: IExtractKeywords,
|
|
22
47
|
cleanUpKeywords: ICleanUpKeywords,
|
|
23
48
|
embed: IEmbed,
|
|
24
49
|
index: LocalIndex,
|
|
25
50
|
locateInFile: ILocateInFile,
|
|
26
51
|
scoreThreshold: number
|
|
52
|
+
searchConcurrency?: number
|
|
27
53
|
}): ISearchIndex {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
54
|
+
const worker: ISearchIndex = async function searchContentIndex({
|
|
55
|
+
query, options: {
|
|
56
|
+
limit,
|
|
57
|
+
prefetchMultiplier = 3,
|
|
58
|
+
scoreThreshold: threshold = scoreThreshold,
|
|
59
|
+
locateInFiles = true,
|
|
60
|
+
windowLines,
|
|
61
|
+
useKeywordsSearch = false,
|
|
62
|
+
includePaths = [],
|
|
63
|
+
excludePaths = [],
|
|
64
|
+
}
|
|
65
|
+
}: ISearchIndexQuery) {
|
|
66
|
+
const keywords = useKeywordsSearch ? cleanUpKeywords(extractKeywords(query)) : [];
|
|
67
|
+
const searchText = useKeywordsSearch && keywords.length > 0 ? keywords.join(", ") : query;
|
|
31
68
|
const vector = await embed(searchText);
|
|
69
|
+
const includeMatcher = createGitignoreMatcher(includePaths);
|
|
70
|
+
const excludeMatcher = createGitignoreMatcher(excludePaths);
|
|
32
71
|
|
|
33
|
-
const
|
|
72
|
+
const expectedMultiplier = limit * prefetchMultiplier;
|
|
73
|
+
const pathPatternBoost = Math.max(1, includePaths.length + excludePaths.length);
|
|
74
|
+
const results = await index.queryItems(vector, searchText, expectedMultiplier + pathPatternBoost);
|
|
34
75
|
|
|
35
76
|
const candidates = results
|
|
36
|
-
.filter(r => r.score >=
|
|
77
|
+
.filter(r => r.score >= threshold)
|
|
78
|
+
.filter(r => includePaths.length === 0 || includeMatcher(r.item.id))
|
|
79
|
+
.filter(r => excludePaths.length === 0 || !excludeMatcher(r.item.id))
|
|
37
80
|
.sort((a, b) => b.score - a.score)
|
|
38
81
|
.map(r => ({
|
|
39
82
|
score: r.score,
|
|
@@ -41,10 +84,29 @@ export function SearchIndex({extractKeywords, cleanUpKeywords, embed, index, loc
|
|
|
41
84
|
keywords: typeof r.item.metadata?.keywords === "string" ? r.item.metadata.keywords : "",
|
|
42
85
|
}));
|
|
43
86
|
|
|
44
|
-
if (!locateInFile || candidates.length === 0) {
|
|
87
|
+
if (!locateInFile || !locateInFiles || candidates.length === 0) {
|
|
45
88
|
return candidates;
|
|
46
89
|
}
|
|
47
90
|
|
|
48
|
-
return locateInFile(query, vector, candidates, limit);
|
|
91
|
+
return locateInFile(query, vector, candidates, limit, windowLines);
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const control = Concurrency(searchConcurrency, worker);
|
|
95
|
+
|
|
96
|
+
return async (input) => {
|
|
97
|
+
const resolve = await control(input);
|
|
98
|
+
return await resolve();
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function createGitignoreMatcher(patterns: string[]): (path: string) => boolean {
|
|
103
|
+
const ig = ignore();
|
|
104
|
+
for (const pattern of patterns) {
|
|
105
|
+
ig.add(pattern);
|
|
49
106
|
}
|
|
107
|
+
return (path: string) => ig.ignores(normalizePath(path));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function normalizePath(path: string): string {
|
|
111
|
+
return path.replaceAll("\\", "/").replace(/^\.\/+/, "");
|
|
50
112
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xindex",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.20",
|
|
4
4
|
"description": "Local semantic code search — index codebase, search by meaning or keywords",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "xindex.ts",
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
"mcp": "tsx apps/run.mcp.ts",
|
|
22
22
|
"watch": "tsx apps/run.watch.ts",
|
|
23
23
|
"test.functional": "bash test/functional.sh",
|
|
24
|
-
"test.compilation": "
|
|
24
|
+
"test.compilation": "tsc --ignoreConfig --noEmit --target ES2022 --module ESNext --moduleResolution bundler --esModuleInterop --skipLibCheck --strict false $(git ls-files '*.ts') && tsc -p tsconfig.json --noEmit",
|
|
25
25
|
"test.npx": "docker run --rm -it -w /tmp node:22 bash -c 'npm i -g xindex && xindex-index tsx-0 && xindex-search streamx map | grep \"await mapper\" && which xindex | grep bin/xindex' ",
|
|
26
26
|
"console": "docker run --rm -it -v \"$PWD:/app\" -w /app node:22 bash"
|
|
27
27
|
},
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { Counter, ICounter } from './counter';
|
|
2
|
-
import { Defer, IDefer } from './defer';
|
|
3
|
-
import { IValue, Value } from './value';
|
|
4
|
-
import { waitForCounter, waitForZeroCounter } from './waitForCounter';
|
|
1
|
+
import { Counter, ICounter } from './counter.js';
|
|
2
|
+
import { Defer, IDefer } from './defer.js';
|
|
3
|
+
import { IValue, Value } from './value.js';
|
|
4
|
+
import { waitForCounter, waitForZeroCounter } from './waitForCounter.js';
|
|
5
5
|
|
|
6
6
|
export type IStopKeepConcurrency = () => Promise<void>;
|
|
7
7
|
|
|
@@ -168,7 +168,7 @@ export function KeyedConcurrency<Input, Output = any>(
|
|
|
168
168
|
const keyedControl = Concurrency<Input>(workerConcurrency, worker);
|
|
169
169
|
registry[key] = keyedControl;
|
|
170
170
|
|
|
171
|
-
const unsubscribe = keyedControl.quantity.subscribe(value => {
|
|
171
|
+
const unsubscribe = keyedControl.quantity.subscribe((value: number) => {
|
|
172
172
|
if (value <= 0) {
|
|
173
173
|
unsubscribe();
|
|
174
174
|
|
|
@@ -1,11 +1,17 @@
|
|
|
1
|
-
import { isIterable } from './isIterable';
|
|
1
|
+
import { isIterable } from './isIterable.js';
|
|
2
2
|
|
|
3
3
|
export function flatten<T>(items: T | T[] | Iterable<T>): T[] {
|
|
4
|
-
if (Array.isArray(items)
|
|
5
|
-
return
|
|
4
|
+
if (Array.isArray(items)) {
|
|
5
|
+
return items.reduce<T[]>((acc, item) => {
|
|
6
6
|
return [...acc, ...flatten(item)];
|
|
7
|
-
}, []
|
|
8
|
-
} else {
|
|
9
|
-
return [items];
|
|
7
|
+
}, []);
|
|
10
8
|
}
|
|
9
|
+
|
|
10
|
+
if (isIterable(items)) {
|
|
11
|
+
return [...items].reduce<T[]>((acc, item) => {
|
|
12
|
+
return [...acc, ...flatten(item)];
|
|
13
|
+
}, []);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return [items];
|
|
11
17
|
}
|
package/packages/fun/src/hub.ts
CHANGED
package/packages/fun/src/tick.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { StreamX, StreamXMapper } from './index';
|
|
1
|
+
import { StreamX, StreamXMapper } from './index.js';
|
|
2
2
|
|
|
3
3
|
export function batch<Input>(size: number): StreamXMapper<Input, Input[]> {
|
|
4
4
|
let batched: Input[] = [];
|
|
5
|
-
return inputStream =>
|
|
5
|
+
return (inputStream: StreamX<Input>) =>
|
|
6
6
|
(async function* batchedStream(): StreamX<Input[]> {
|
|
7
7
|
for await (const record of inputStream) {
|
|
8
8
|
batched.push(record);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { StreamXMapper } from './index';
|
|
2
|
-
import { IRead, read } from './reader';
|
|
3
|
-
import { IWriter, Writer } from './writer';
|
|
4
|
-
import { syncTick } from '../../fun/src/tick';
|
|
1
|
+
import { StreamXMapper } from './index.js';
|
|
2
|
+
import { IRead, read } from './reader.js';
|
|
3
|
+
import { IWriter, Writer } from './writer.js';
|
|
4
|
+
import { syncTick } from '../../fun/src/tick.js';
|
|
5
5
|
import { clearTimeout } from 'timers';
|
|
6
6
|
|
|
7
7
|
type Milliseconds = number;
|
|
@@ -41,7 +41,7 @@ export function batchTimed<Input>(
|
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
return inputStream => {
|
|
44
|
+
return (inputStream: AsyncIterable<Input>) => {
|
|
45
45
|
return {
|
|
46
46
|
[Symbol.asyncIterator](): AsyncIterator<Input[]> {
|
|
47
47
|
return {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { StreamX, StreamXMapper } from './index';
|
|
2
|
-
import { IRead, read } from './reader';
|
|
3
|
-
import { IWriter, Writer } from './writer';
|
|
4
|
-
import { syncTick } from '../../fun/src/tick';
|
|
1
|
+
import { StreamX, StreamXMapper } from './index.js';
|
|
2
|
+
import { IRead, read } from './reader.js';
|
|
3
|
+
import { IWriter, Writer } from './writer.js';
|
|
4
|
+
import { syncTick } from '../../fun/src/tick.js';
|
|
5
5
|
|
|
6
6
|
export function buffer<Input>(size: number): StreamXMapper<Input, Input> {
|
|
7
7
|
let outputBuffer: IWriter<Input>;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { Promised, StreamX, StreamXMapper } from './index';
|
|
1
|
+
import { Promised, StreamX, StreamXMapper } from './index.js';
|
|
2
2
|
|
|
3
3
|
export function filter<Input>(
|
|
4
4
|
condition: (input: Input) => Promised<boolean | undefined | null>
|
|
5
5
|
): StreamXMapper<Input, Input> {
|
|
6
|
-
return inputStream =>
|
|
6
|
+
return (inputStream: StreamX<Input>) =>
|
|
7
7
|
(async function* filtered(): StreamX<Input> {
|
|
8
8
|
for await (const record of inputStream) {
|
|
9
9
|
if (await condition(record)) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { flat } from './flat';
|
|
2
|
-
import { map } from './map';
|
|
3
|
-
import { pipe, Promised, StreamXLike, StreamXMapper } from './index';
|
|
1
|
+
import { flat } from './flat.js';
|
|
2
|
+
import { map } from './map.js';
|
|
3
|
+
import { pipe, Promised, StreamXLike, StreamXMapper } from './index.js';
|
|
4
4
|
|
|
5
5
|
export function flatMap<Input, Output>(
|
|
6
6
|
mapper: (input: Input) => Promised<Output | StreamXLike<Output>>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { Defer } from '../../fun/src/defer';
|
|
2
|
-
import { reader } from './reader';
|
|
3
|
-
import { StreamX } from './index';
|
|
1
|
+
import { Defer } from '../../fun/src/defer.js';
|
|
2
|
+
import { reader } from './reader.js';
|
|
3
|
+
import { StreamX } from './index.js';
|
|
4
4
|
|
|
5
5
|
export type IInterval = StreamX<number> & {
|
|
6
6
|
stop: () => void;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Promised, StreamX } from './index';
|
|
2
|
-
import { reader } from './reader';
|
|
1
|
+
import { Promised, StreamX } from './index.js';
|
|
2
|
+
import { reader } from './reader.js';
|
|
3
3
|
|
|
4
4
|
export function loop(condition: () => Promised<boolean>): StreamX<true> {
|
|
5
5
|
return reader<true>(async () => {
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { Promised, StreamX, StreamXMapper } from './index';
|
|
1
|
+
import { Promised, StreamX, StreamXMapper } from './index.js';
|
|
2
2
|
|
|
3
3
|
export function map<Input, Output>(
|
|
4
4
|
mapper: (input: Input) => Promised<Output>
|
|
5
5
|
): StreamXMapper<Input, Output> {
|
|
6
|
-
return inputStream =>
|
|
6
|
+
return (inputStream: StreamX<Input>) =>
|
|
7
7
|
(async function* mappedStream(): StreamX<Output> {
|
|
8
8
|
for await (const record of inputStream) {
|
|
9
9
|
yield await mapper(record);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { StreamX } from './index';
|
|
2
|
-
import { IRead, read } from './reader';
|
|
3
|
-
import { IWriter, Writer } from './writer';
|
|
4
|
-
import { syncTick } from '../../fun/src/tick';
|
|
1
|
+
import { StreamX } from './index.js';
|
|
2
|
+
import { IRead, read } from './reader.js';
|
|
3
|
+
import { IWriter, Writer } from './writer.js';
|
|
4
|
+
import { syncTick } from '../../fun/src/tick.js';
|
|
5
5
|
|
|
6
6
|
export function merge<T1, T2, T3, T4>(
|
|
7
7
|
stream1: StreamX<T1>,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { of, StreamXMapper } from './index';
|
|
1
|
+
import { of, StreamX, StreamXMapper } from './index.js';
|
|
2
2
|
import { Readable, ReadableOptions, Transform } from 'stream';
|
|
3
3
|
|
|
4
4
|
export function nodeTransform<Input, Output>(
|
|
5
5
|
transform: Transform,
|
|
6
6
|
options: ReadableOptions = {}
|
|
7
7
|
): StreamXMapper<Input, Output> {
|
|
8
|
-
return inputStream => of(Readable.from(inputStream, options).pipe(transform));
|
|
8
|
+
return (inputStream: StreamX<Input>) => of(Readable.from(inputStream, options).pipe(transform));
|
|
9
9
|
}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import { StreamX, StreamXMapper } from './index';
|
|
1
|
+
import { StreamX, StreamXMapper } from './index.js';
|
|
2
2
|
import { Writable } from 'stream';
|
|
3
|
-
import { Defer } from '../../fun/src/defer';
|
|
3
|
+
import { Defer } from '../../fun/src/defer.js';
|
|
4
4
|
|
|
5
5
|
export function nodeWritable<Type>(
|
|
6
6
|
writable: Writable,
|
|
7
7
|
encoding: BufferEncoding = 'utf-8'
|
|
8
8
|
): StreamXMapper<Type, Type> {
|
|
9
|
-
return inputStream =>
|
|
9
|
+
return (inputStream: StreamX<Type>) =>
|
|
10
10
|
(async function* _nodeWritable(): StreamX<Type> {
|
|
11
11
|
const deferEnd = Defer<void>();
|
|
12
12
|
writable.on('error', error => deferEnd.reject(error));
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Promised, StreamX } from './index';
|
|
2
|
-
import { reader } from './reader';
|
|
1
|
+
import { Promised, StreamX } from './index.js';
|
|
2
|
+
import { reader } from './reader.js';
|
|
3
3
|
|
|
4
4
|
export function objectReader<T extends object | object[]>(
|
|
5
5
|
read: () => Promised<T | null | undefined | boolean | number>
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { Promised, StreamX, StreamXMapper } from './index';
|
|
1
|
+
import { Promised, StreamX, StreamXMapper } from './index.js';
|
|
2
2
|
|
|
3
3
|
export function reduce<Input, Accumulator>(
|
|
4
4
|
reducer: (accumulator: Accumulator, input: Input) => Promised<Accumulator>,
|
|
5
5
|
initial: Accumulator
|
|
6
6
|
): StreamXMapper<Input, Accumulator> {
|
|
7
|
-
return inputStream =>
|
|
7
|
+
return (inputStream: StreamX<Input>) =>
|
|
8
8
|
(async function* reduced(): StreamX<Accumulator> {
|
|
9
9
|
let finalValue: Accumulator = initial;
|
|
10
10
|
for await (const input of inputStream) {
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { Promised, StreamXMapper } from './index';
|
|
2
|
-
import { IRead, read } from './reader';
|
|
3
|
-
import { IWriter, Writer } from './writer';
|
|
4
|
-
import { Concurrency, IPublishToConcurrency } from '../../fun/src/concurrency';
|
|
5
|
-
import { syncTick } from '../../fun/src/tick';
|
|
1
|
+
import { Promised, StreamX, StreamXMapper } from './index.js';
|
|
2
|
+
import { IRead, read } from './reader.js';
|
|
3
|
+
import { IWriter, Writer } from './writer.js';
|
|
4
|
+
import { Concurrency, IPublishToConcurrency } from '../../fun/src/concurrency.js';
|
|
5
|
+
import { syncTick } from '../../fun/src/tick.js';
|
|
6
6
|
|
|
7
7
|
export function scale<Input, Output>(
|
|
8
8
|
max: number,
|
|
@@ -24,7 +24,7 @@ export function scale<Input, Output>(
|
|
|
24
24
|
}
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
return inputStream => {
|
|
27
|
+
return (inputStream: StreamX<Input>) => {
|
|
28
28
|
return {
|
|
29
29
|
[Symbol.asyncIterator](): AsyncIterator<Output> {
|
|
30
30
|
return {
|
|
@@ -41,7 +41,7 @@ export function scale<Input, Output>(
|
|
|
41
41
|
outputBuffer = Writer<Output>();
|
|
42
42
|
|
|
43
43
|
if (!concurrencyControl) {
|
|
44
|
-
concurrencyControl = Concurrency<Input>(max, async input => {
|
|
44
|
+
concurrencyControl = Concurrency<Input>(max, async (input: Input) => {
|
|
45
45
|
try {
|
|
46
46
|
const output = await mapper(input);
|
|
47
47
|
await outputBuffer.write(output);
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import { pipe, Promised, StreamXMapper } from './index';
|
|
2
|
-
import { batch } from './batch';
|
|
3
|
-
import { map } from './map';
|
|
4
|
-
import { flat } from './flat';
|
|
1
|
+
import { pipe, Promised, StreamXMapper } from './index.js';
|
|
2
|
+
import { batch } from './batch.js';
|
|
3
|
+
import { map } from './map.js';
|
|
4
|
+
import { flat } from './flat.js';
|
|
5
5
|
|
|
6
6
|
export function scaleSync<Input, Output>(
|
|
7
7
|
size: number,
|
|
8
8
|
mapper: (input: Input) => Promised<Output>
|
|
9
9
|
): StreamXMapper<Input, Output> {
|
|
10
10
|
return pipe(batch<Input>(size))
|
|
11
|
-
.pipe(map(values => Promise.all(values.map(mapper))))
|
|
11
|
+
.pipe(map((values: Input[]) => Promise.all(values.map(mapper))))
|
|
12
12
|
.pipe(flat());
|
|
13
13
|
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { map } from './map';
|
|
2
|
-
import { Promised, StreamXMapper } from './index';
|
|
1
|
+
import { map } from './map.js';
|
|
2
|
+
import { Promised, StreamXMapper } from './index.js';
|
|
3
3
|
|
|
4
4
|
export function tap<Input>(fn: (input: Input) => Promised<any>): StreamXMapper<Input, Input> {
|
|
5
|
-
return map<Input, Input>(async (input): Promise<Input> => {
|
|
5
|
+
return map<Input, Input>(async (input: Input): Promise<Input> => {
|
|
6
6
|
await fn(input);
|
|
7
7
|
return input;
|
|
8
8
|
});
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { Defer, IDefer } from './defer';
|
|
2
|
-
import { of, StreamX } from './index';
|
|
3
|
-
import { reader } from './reader';
|
|
4
|
-
import { flat } from './flat';
|
|
1
|
+
import { Defer, IDefer } from './defer.js';
|
|
2
|
+
import { of, StreamX } from './index.js';
|
|
3
|
+
import { reader } from './reader.js';
|
|
4
|
+
import { flat } from './flat.js';
|
|
5
5
|
|
|
6
6
|
export type IWriter<T> = {
|
|
7
7
|
stream: StreamX<T>;
|
package/tsconfig.json
CHANGED
|
@@ -1,127 +0,0 @@
|
|
|
1
|
-
import {rm} from "node:fs/promises";
|
|
2
|
-
import {extname} from "node:path";
|
|
3
|
-
import type {IContentIndexDriver} from "./contentIndexDriver.js";
|
|
4
|
-
import type {IGetIndexStats} from "./getIndexStats.js";
|
|
5
|
-
import type {IIndexContent} from "../../features/indexContent.js";
|
|
6
|
-
import type {IRemoveContent} from "../../features/removeContent.js";
|
|
7
|
-
import type {IResetIndex} from "../../features/resetIndex.js";
|
|
8
|
-
import type {IIndexRecord, ISearchIndex} from "../../features/searchIndex.js";
|
|
9
|
-
import {DocumentIndex} from "./documentIndex.js";
|
|
10
|
-
|
|
11
|
-
export async function DocumentContentIndexDriver({path, model}: {
|
|
12
|
-
path: string,
|
|
13
|
-
model?: string,
|
|
14
|
-
}): Promise<IContentIndexDriver> {
|
|
15
|
-
const docsPath = `${path}/docs`;
|
|
16
|
-
let index = await DocumentIndex({path: docsPath, model});
|
|
17
|
-
|
|
18
|
-
const indexContent: IIndexContent = async items => {
|
|
19
|
-
for (const item of items) {
|
|
20
|
-
await index.upsertDocument(item.id, item.content, docTypeForPath(item.id), {});
|
|
21
|
-
}
|
|
22
|
-
};
|
|
23
|
-
|
|
24
|
-
const removeContent: IRemoveContent = async ids => {
|
|
25
|
-
for (const id of ids) {
|
|
26
|
-
await index.deleteDocument(id);
|
|
27
|
-
}
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
const searchContentIndex: ISearchIndex = async (query, limit) => {
|
|
31
|
-
const results = await index.queryDocuments(query, {
|
|
32
|
-
maxDocuments: limit,
|
|
33
|
-
maxChunks: 5,
|
|
34
|
-
isBm25: true,
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
const out: IIndexRecord[] = [];
|
|
38
|
-
|
|
39
|
-
for (const r of results) {
|
|
40
|
-
const text = await r.loadText();
|
|
41
|
-
const lineStarts = computeLineStarts(text);
|
|
42
|
-
|
|
43
|
-
for (const c of r.chunks) {
|
|
44
|
-
const {startPos, endPos} = c.item.metadata;
|
|
45
|
-
const snippet = text.slice(startPos, endPos);
|
|
46
|
-
const startLine = lineOf(lineStarts, startPos);
|
|
47
|
-
const endLine = lineOf(lineStarts, endPos);
|
|
48
|
-
|
|
49
|
-
out.push({
|
|
50
|
-
id: r.uri,
|
|
51
|
-
score: c.score,
|
|
52
|
-
startPos,
|
|
53
|
-
endPos,
|
|
54
|
-
snippet,
|
|
55
|
-
startLine,
|
|
56
|
-
endLine,
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
return out;
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
const resetIndex: IResetIndex = async () => {
|
|
65
|
-
/** Clears persisted catalog + chunks on disk; recreates empty index metadata. */
|
|
66
|
-
await rm(docsPath, {recursive: true, force: true});
|
|
67
|
-
|
|
68
|
-
/*
|
|
69
|
-
* Re-bind `index`: after `rm`, the prior LocalDocumentIndex instance may retain stale
|
|
70
|
-
* in-memory caches; `DocumentIndex` constructs a fresh instance against the recreated folder.
|
|
71
|
-
*/
|
|
72
|
-
index = await DocumentIndex({path: docsPath, model});
|
|
73
|
-
};
|
|
74
|
-
|
|
75
|
-
const getIndexStats: IGetIndexStats = async () => {
|
|
76
|
-
const stats = await index.getCatalogStats();
|
|
77
|
-
|
|
78
|
-
return {indexedAmount: stats.documents ?? 0};
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
const flush = async () => {
|
|
82
|
-
/* LocalDocumentIndex persists updates without an explicit flush. */
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
return {
|
|
86
|
-
getIndexStats,
|
|
87
|
-
indexContentWatch: indexContent,
|
|
88
|
-
indexContentBatch: indexContent,
|
|
89
|
-
removeContent,
|
|
90
|
-
searchContentIndex,
|
|
91
|
-
resetIndex,
|
|
92
|
-
flush,
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
const DOC_TYPE_BY_EXT: Record<string, string> = {
|
|
97
|
-
".ts": "ts", ".tsx": "tsx", ".mts": "ts", ".cts": "ts",
|
|
98
|
-
".js": "js", ".jsx": "jsx", ".mjs": "js", ".cjs": "js",
|
|
99
|
-
".py": "python",
|
|
100
|
-
".go": "go",
|
|
101
|
-
".java": "java",
|
|
102
|
-
".cs": "csharp",
|
|
103
|
-
".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp", ".hpp": "cpp", ".h": "cpp",
|
|
104
|
-
".md": "markdown",
|
|
105
|
-
};
|
|
106
|
-
|
|
107
|
-
function docTypeForPath(path: string): string {
|
|
108
|
-
return DOC_TYPE_BY_EXT[extname(path).toLowerCase()] ?? "text";
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
function computeLineStarts(text: string): number[] {
|
|
112
|
-
const starts = [0];
|
|
113
|
-
for (let i = 0; i < text.length; i++) {
|
|
114
|
-
if (text.charCodeAt(i) === 10) starts.push(i + 1);
|
|
115
|
-
}
|
|
116
|
-
return starts;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
function lineOf(lineStarts: number[], pos: number): number {
|
|
120
|
-
let lo = 0, hi = lineStarts.length - 1;
|
|
121
|
-
while (lo < hi) {
|
|
122
|
-
const mid = (lo + hi + 1) >>> 1;
|
|
123
|
-
if (lineStarts[mid] <= pos) lo = mid;
|
|
124
|
-
else hi = mid - 1;
|
|
125
|
-
}
|
|
126
|
-
return lo + 1;
|
|
127
|
-
}
|