@cue-dev/retrieval-core 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +27 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +64 -0
- package/dist/chunking.js +983 -0
- package/dist/index.d.ts +673 -0
- package/dist/index.js +6605 -0
- package/dist/indexing-ignore.d.ts +9 -0
- package/dist/indexing-ignore.js +151 -0
- package/dist/remote-sync.d.ts +193 -0
- package/dist/remote-sync.js +816 -0
- package/package.json +37 -0
- package/scripts/poc-node-parser-host.cjs +105 -0
- package/scripts/poc-parser-availability-benchmark.ts +338 -0
- package/src/chunking.ts +1187 -0
- package/src/index.ts +8338 -0
- package/src/indexing-ignore.ts +179 -0
- package/src/remote-sync.ts +1119 -0
- package/test/benchmark.thresholds.test.ts +815 -0
- package/test/chunking.config.test.ts +84 -0
- package/test/chunking.language-aware.test.ts +1248 -0
- package/test/chunking.parser-availability.poc.test.ts +86 -0
- package/test/claude-agent-provider.test.ts +209 -0
- package/test/embedding-context-prefix.test.ts +101 -0
- package/test/embedding-provider.test.ts +570 -0
- package/test/enhance-confidence.test.ts +752 -0
- package/test/index-prep.concurrency.regression.test.ts +142 -0
- package/test/integration.test.ts +508 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/mcp-search-quality.regression.test.ts +1358 -0
- package/test/remote-sync.integration.test.ts +350 -0
- package/test/smart-cutoff.config.test.ts +86 -0
- package/test/snippet-integrity.config.test.ts +59 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export declare const INDEXING_IGNORE_FILENAMES: readonly [".contextignore", ".cueignore"];
|
|
2
|
+
type IndexingPathKind = "file" | "dir";
|
|
3
|
+
export interface IndexingIgnoreMatcher {
|
|
4
|
+
patterns: string[];
|
|
5
|
+
shouldIgnorePath(path: string, kind: IndexingPathKind): boolean;
|
|
6
|
+
}
|
|
7
|
+
export declare function normalizeRepoRelativePath(path: string): string;
|
|
8
|
+
export declare function loadIndexingIgnoreMatcher(projectRootPath: string): Promise<IndexingIgnoreMatcher>;
|
|
9
|
+
export {};
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
export const INDEXING_IGNORE_FILENAMES = [".contextignore", ".cueignore"];
|
|
4
|
+
const INDEXING_CONTROL_FILENAMES = new Set(INDEXING_IGNORE_FILENAMES.map((name) => name.toLowerCase()));
|
|
5
|
+
function escapeRegexChar(char) {
|
|
6
|
+
return /[\\^$+?.()|[\]{}]/.test(char) ? `\\${char}` : char;
|
|
7
|
+
}
|
|
8
|
+
function compileGlobPattern(pattern) {
|
|
9
|
+
let output = "";
|
|
10
|
+
for (let i = 0; i < pattern.length; i += 1) {
|
|
11
|
+
const char = pattern[i];
|
|
12
|
+
if (char === undefined) {
|
|
13
|
+
continue;
|
|
14
|
+
}
|
|
15
|
+
if (char === "*") {
|
|
16
|
+
const next = pattern[i + 1];
|
|
17
|
+
if (next === "*") {
|
|
18
|
+
const afterDouble = pattern[i + 2];
|
|
19
|
+
if (afterDouble === "/") {
|
|
20
|
+
output += "(?:.*/)?";
|
|
21
|
+
i += 2;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
output += ".*";
|
|
25
|
+
i += 1;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
output += "[^/]*";
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (char === "?") {
|
|
32
|
+
output += "[^/]";
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
output += escapeRegexChar(char);
|
|
36
|
+
}
|
|
37
|
+
return new RegExp(`^${output}$`);
|
|
38
|
+
}
|
|
39
|
+
export function normalizeRepoRelativePath(path) {
|
|
40
|
+
const normalized = path
|
|
41
|
+
.replace(/\\/g, "/")
|
|
42
|
+
.replace(/^\.\/+/, "")
|
|
43
|
+
.replace(/^\/+/, "")
|
|
44
|
+
.replace(/\/+/g, "/")
|
|
45
|
+
.replace(/\/+$/, "");
|
|
46
|
+
return normalized;
|
|
47
|
+
}
|
|
48
|
+
function basename(path) {
|
|
49
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
50
|
+
if (normalized.length === 0) {
|
|
51
|
+
return "";
|
|
52
|
+
}
|
|
53
|
+
const lastSlash = normalized.lastIndexOf("/");
|
|
54
|
+
return lastSlash === -1 ? normalized : normalized.slice(lastSlash + 1);
|
|
55
|
+
}
|
|
56
|
+
function listAncestorDirectories(path, kind) {
|
|
57
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
58
|
+
if (normalized.length === 0) {
|
|
59
|
+
return [];
|
|
60
|
+
}
|
|
61
|
+
const segments = normalized.split("/");
|
|
62
|
+
const depth = kind === "dir" ? segments.length : Math.max(segments.length - 1, 0);
|
|
63
|
+
const output = [];
|
|
64
|
+
let current = "";
|
|
65
|
+
for (let i = 0; i < depth; i += 1) {
|
|
66
|
+
current = current.length === 0 ? segments[i] : `${current}/${segments[i]}`;
|
|
67
|
+
output.push(current);
|
|
68
|
+
}
|
|
69
|
+
return output;
|
|
70
|
+
}
|
|
71
|
+
function parseIgnorePatterns(content) {
|
|
72
|
+
const lines = content.split(/\r?\n/);
|
|
73
|
+
const output = [];
|
|
74
|
+
for (const line of lines) {
|
|
75
|
+
const trimmed = line.trim();
|
|
76
|
+
if (!trimmed || trimmed.startsWith("#")) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
// v1 is exclude-only; ignore negation directives for deterministic behavior.
|
|
80
|
+
if (trimmed.startsWith("!")) {
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
const normalized = normalizeRepoRelativePath(trimmed);
|
|
84
|
+
if (normalized.length === 0) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
output.push(trimmed.endsWith("/") ? `${normalized}/` : normalized);
|
|
88
|
+
}
|
|
89
|
+
return output;
|
|
90
|
+
}
|
|
91
|
+
function compileIgnoreRules(patterns) {
|
|
92
|
+
return patterns
|
|
93
|
+
.map((pattern) => {
|
|
94
|
+
const directoryOnly = pattern.endsWith("/");
|
|
95
|
+
const normalizedPattern = directoryOnly ? pattern.slice(0, -1) : pattern;
|
|
96
|
+
const normalized = normalizeRepoRelativePath(normalizedPattern);
|
|
97
|
+
if (normalized.length === 0) {
|
|
98
|
+
return undefined;
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
directory_only: directoryOnly,
|
|
102
|
+
has_slash: normalized.includes("/"),
|
|
103
|
+
regex: compileGlobPattern(normalized)
|
|
104
|
+
};
|
|
105
|
+
})
|
|
106
|
+
.filter((rule) => rule !== undefined);
|
|
107
|
+
}
|
|
108
|
+
function matchesRule(rule, path, kind) {
|
|
109
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
110
|
+
if (normalized.length === 0) {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
if (!rule.directory_only) {
|
|
114
|
+
if (rule.has_slash) {
|
|
115
|
+
return rule.regex.test(normalized);
|
|
116
|
+
}
|
|
117
|
+
return rule.regex.test(basename(normalized));
|
|
118
|
+
}
|
|
119
|
+
const ancestors = listAncestorDirectories(normalized, kind);
|
|
120
|
+
if (rule.has_slash) {
|
|
121
|
+
return ancestors.some((ancestor) => rule.regex.test(ancestor));
|
|
122
|
+
}
|
|
123
|
+
return ancestors.some((ancestor) => rule.regex.test(basename(ancestor)));
|
|
124
|
+
}
|
|
125
|
+
export async function loadIndexingIgnoreMatcher(projectRootPath) {
|
|
126
|
+
const root = resolve(projectRootPath);
|
|
127
|
+
const patterns = [];
|
|
128
|
+
for (const filename of INDEXING_IGNORE_FILENAMES) {
|
|
129
|
+
try {
|
|
130
|
+
const content = await readFile(join(root, filename), "utf8");
|
|
131
|
+
patterns.push(...parseIgnorePatterns(content));
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const rules = compileIgnoreRules(patterns);
|
|
138
|
+
return {
|
|
139
|
+
patterns,
|
|
140
|
+
shouldIgnorePath(path, kind) {
|
|
141
|
+
const normalized = normalizeRepoRelativePath(path);
|
|
142
|
+
if (normalized.length === 0) {
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
if (kind === "file" && INDEXING_CONTROL_FILENAMES.has(basename(normalized).toLowerCase())) {
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
return rules.some((rule) => matchesRule(rule, normalized, kind));
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
export declare const REMOTE_SYNC_STATE_MODE: "remote_delta_v1";
|
|
2
|
+
export declare const DEFAULT_REMOTE_SYNC_MAX_FILE_SIZE_BYTES = 1000000;
|
|
3
|
+
export interface RemoteSyncUploadCandidate {
|
|
4
|
+
path: string;
|
|
5
|
+
content: string;
|
|
6
|
+
language?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface RemoteSyncProjectFileStat {
|
|
9
|
+
path: string;
|
|
10
|
+
full_path: string;
|
|
11
|
+
size: number;
|
|
12
|
+
mtime_ms: number;
|
|
13
|
+
language?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface RemoteSyncStateEntry {
|
|
16
|
+
content_hash: string;
|
|
17
|
+
size: number;
|
|
18
|
+
mtime_ms: number;
|
|
19
|
+
language?: string;
|
|
20
|
+
}
|
|
21
|
+
export interface RemoteSyncStateFile {
|
|
22
|
+
mode: typeof REMOTE_SYNC_STATE_MODE;
|
|
23
|
+
workspace_id?: string;
|
|
24
|
+
last_index_version?: string;
|
|
25
|
+
files: Record<string, RemoteSyncStateEntry>;
|
|
26
|
+
updated_at: string;
|
|
27
|
+
}
|
|
28
|
+
export interface RemoteSyncDeltaPayload {
|
|
29
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
30
|
+
deleted_paths: string[];
|
|
31
|
+
}
|
|
32
|
+
export interface RemoteSyncDeltaBatch {
|
|
33
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
34
|
+
deleted_paths: string[];
|
|
35
|
+
approx_bytes: number;
|
|
36
|
+
}
|
|
37
|
+
export interface BuildRemoteSyncDeltaResult {
|
|
38
|
+
delta: RemoteSyncDeltaPayload;
|
|
39
|
+
upsert_state_entries: Record<string, RemoteSyncStateEntry>;
|
|
40
|
+
next_files: Record<string, RemoteSyncStateEntry>;
|
|
41
|
+
}
|
|
42
|
+
export interface RemoteSyncScanOptions {
|
|
43
|
+
max_file_size_bytes?: number;
|
|
44
|
+
excluded_dirs?: Set<string>;
|
|
45
|
+
excluded_files?: Set<string>;
|
|
46
|
+
excluded_file_suffixes?: Set<string>;
|
|
47
|
+
}
|
|
48
|
+
export declare class RemoteSyncHttpResponseError extends Error {
|
|
49
|
+
readonly status: number;
|
|
50
|
+
readonly payload?: unknown | undefined;
|
|
51
|
+
constructor(message: string, status: number, payload?: unknown | undefined);
|
|
52
|
+
}
|
|
53
|
+
export interface RunRemoteDeltaSyncInput {
|
|
54
|
+
project_root_path: string;
|
|
55
|
+
scan_root_path?: string;
|
|
56
|
+
workspace_id?: string;
|
|
57
|
+
previous_state?: RemoteSyncStateFile;
|
|
58
|
+
force_full_upsert?: boolean;
|
|
59
|
+
max_body_bytes: number;
|
|
60
|
+
retries?: number;
|
|
61
|
+
initial_delay_ms?: number;
|
|
62
|
+
stale_base_error?: (error: unknown) => boolean;
|
|
63
|
+
persist_state?: (state: RemoteSyncStateFile) => Promise<void>;
|
|
64
|
+
on_batch_processed?: (event: {
|
|
65
|
+
batch_index: number;
|
|
66
|
+
batch_count: number;
|
|
67
|
+
approx_bytes: number;
|
|
68
|
+
upsert_files: number;
|
|
69
|
+
deleted_paths: number;
|
|
70
|
+
latency_ms: number;
|
|
71
|
+
}) => void | Promise<void>;
|
|
72
|
+
push_delta: (request: {
|
|
73
|
+
workspace_id?: string;
|
|
74
|
+
project_root_path: string;
|
|
75
|
+
base_index_version?: string;
|
|
76
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
77
|
+
deleted_paths: string[];
|
|
78
|
+
}) => Promise<{
|
|
79
|
+
workspace_id?: string;
|
|
80
|
+
index_version?: string;
|
|
81
|
+
}>;
|
|
82
|
+
}
|
|
83
|
+
export interface RunRemoteDeltaSyncResult {
|
|
84
|
+
state: RemoteSyncStateFile;
|
|
85
|
+
changed: boolean;
|
|
86
|
+
workspace_id?: string;
|
|
87
|
+
index_version?: string;
|
|
88
|
+
applied_delta: {
|
|
89
|
+
upsert_files: number;
|
|
90
|
+
deleted_paths: number;
|
|
91
|
+
};
|
|
92
|
+
stats: {
|
|
93
|
+
batches_total: number;
|
|
94
|
+
bytes_total: number;
|
|
95
|
+
latency_ms: number;
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
export interface RemoteSyncCapabilities {
|
|
99
|
+
max_body_bytes: number;
|
|
100
|
+
sync_protocols?: string[];
|
|
101
|
+
max_blob_bytes?: number;
|
|
102
|
+
max_blob_batch_bytes?: number;
|
|
103
|
+
max_commit_body_bytes?: number;
|
|
104
|
+
upload_concurrency_hint?: number;
|
|
105
|
+
}
|
|
106
|
+
export interface RunRemoteAdaptiveSyncInput {
|
|
107
|
+
project_root_path: string;
|
|
108
|
+
scan_root_path?: string;
|
|
109
|
+
workspace_id?: string;
|
|
110
|
+
previous_state?: RemoteSyncStateFile;
|
|
111
|
+
force_full_upsert?: boolean;
|
|
112
|
+
capabilities: RemoteSyncCapabilities;
|
|
113
|
+
retries?: number;
|
|
114
|
+
initial_delay_ms?: number;
|
|
115
|
+
stale_base_error?: (error: unknown) => boolean;
|
|
116
|
+
persist_state?: (state: RemoteSyncStateFile) => Promise<void>;
|
|
117
|
+
push_delta: RunRemoteDeltaSyncInput["push_delta"];
|
|
118
|
+
upload_blobs: (request: {
|
|
119
|
+
workspace_id?: string;
|
|
120
|
+
project_root_path: string;
|
|
121
|
+
blobs: Array<{
|
|
122
|
+
hash: string;
|
|
123
|
+
content: string;
|
|
124
|
+
size_bytes: number;
|
|
125
|
+
}>;
|
|
126
|
+
}) => Promise<{
|
|
127
|
+
accepted_hashes: string[];
|
|
128
|
+
already_present_hashes: string[];
|
|
129
|
+
rejected: Array<{
|
|
130
|
+
hash: string;
|
|
131
|
+
reason: string;
|
|
132
|
+
}>;
|
|
133
|
+
}>;
|
|
134
|
+
commit_v2: (request: {
|
|
135
|
+
workspace_id?: string;
|
|
136
|
+
project_root_path: string;
|
|
137
|
+
base_index_version?: string;
|
|
138
|
+
upsert_files: Array<{
|
|
139
|
+
path: string;
|
|
140
|
+
blob_hash: string;
|
|
141
|
+
language?: string;
|
|
142
|
+
generated?: boolean;
|
|
143
|
+
binary?: boolean;
|
|
144
|
+
updated_at?: string;
|
|
145
|
+
}>;
|
|
146
|
+
deleted_paths: string[];
|
|
147
|
+
}) => Promise<{
|
|
148
|
+
workspace_id?: string;
|
|
149
|
+
index_version?: string;
|
|
150
|
+
}>;
|
|
151
|
+
on_upload_strategy_change?: (event: {
|
|
152
|
+
previous_concurrency: number;
|
|
153
|
+
next_concurrency: number;
|
|
154
|
+
reason: "success" | "error";
|
|
155
|
+
}) => void | Promise<void>;
|
|
156
|
+
}
|
|
157
|
+
export interface RunRemoteAdaptiveSyncResult extends RunRemoteDeltaSyncResult {
|
|
158
|
+
protocol: "delta_v1" | "blob_commit_v2";
|
|
159
|
+
}
|
|
160
|
+
export declare function collectProjectFileStats(project_root_path: string, options?: RemoteSyncScanOptions): Promise<Map<string, RemoteSyncProjectFileStat>>;
|
|
161
|
+
export declare function collectUploadCandidates(project_root_path: string, options?: RemoteSyncScanOptions): Promise<RemoteSyncUploadCandidate[]>;
|
|
162
|
+
export declare function buildRemoteSyncDeltaFromState(input: {
|
|
163
|
+
project_root_path: string;
|
|
164
|
+
previous_state?: RemoteSyncStateFile;
|
|
165
|
+
force_full_upsert: boolean;
|
|
166
|
+
options?: RemoteSyncScanOptions;
|
|
167
|
+
}): Promise<BuildRemoteSyncDeltaResult>;
|
|
168
|
+
export declare function estimateRemoteSyncDeltaRequestSize(input: {
|
|
169
|
+
project_root_path: string;
|
|
170
|
+
workspace_id?: string;
|
|
171
|
+
base_index_version?: string;
|
|
172
|
+
upsert_files: RemoteSyncUploadCandidate[];
|
|
173
|
+
deleted_paths: string[];
|
|
174
|
+
}): number;
|
|
175
|
+
export declare function splitRemoteSyncDeltaIntoBatches(input: {
|
|
176
|
+
project_root_path: string;
|
|
177
|
+
workspace_id?: string;
|
|
178
|
+
base_index_version?: string;
|
|
179
|
+
delta: RemoteSyncDeltaPayload;
|
|
180
|
+
max_body_bytes: number;
|
|
181
|
+
}): RemoteSyncDeltaBatch[];
|
|
182
|
+
export declare function readRemoteSyncState(path: string): Promise<RemoteSyncStateFile | undefined>;
|
|
183
|
+
export declare function writeRemoteSyncState(path: string, state: RemoteSyncStateFile): Promise<void>;
|
|
184
|
+
export declare function isStaleBaseIndexError(error: unknown): boolean;
|
|
185
|
+
export declare function isDeltaUnsupportedError(error: unknown): boolean;
|
|
186
|
+
export declare function isBlobCommitV2UnsupportedError(error: unknown): boolean;
|
|
187
|
+
export declare function runRemoteAdaptiveSync(input: RunRemoteAdaptiveSyncInput): Promise<RunRemoteAdaptiveSyncResult>;
|
|
188
|
+
export declare function retryWithBackoff<T>(input: {
|
|
189
|
+
fn: () => Promise<T>;
|
|
190
|
+
retries: number;
|
|
191
|
+
initial_delay_ms: number;
|
|
192
|
+
}): Promise<T>;
|
|
193
|
+
export declare function runRemoteDeltaSync(input: RunRemoteDeltaSyncInput): Promise<RunRemoteDeltaSyncResult>;
|