@rce-mcp/retrieval-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +19 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/chunking.d.ts +50 -0
- package/dist/chunking.js +520 -0
- package/dist/index.d.ts +390 -0
- package/dist/index.js +3417 -0
- package/dist/remote-sync.d.ts +116 -0
- package/dist/remote-sync.js +476 -0
- package/package.json +33 -0
- package/scripts/poc-node-parser-host.cjs +101 -0
- package/scripts/poc-parser-availability-benchmark.ts +290 -0
- package/src/chunking.ts +641 -0
- package/src/index.ts +4338 -0
- package/src/remote-sync.ts +651 -0
- package/test/benchmark.thresholds.test.ts +752 -0
- package/test/chunking.language-aware.test.ts +279 -0
- package/test/chunking.parser-availability.poc.test.ts +60 -0
- package/test/embedding-provider.test.ts +121 -0
- package/test/enhance-confidence.test.ts +357 -0
- package/test/integration.test.ts +324 -0
- package/test/local-sqlite.integration.test.ts +258 -0
- package/test/remote-sync.integration.test.ts +177 -0
- package/tsconfig.build.json +17 -0
- package/tsconfig.json +4 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
import type { EnhancePromptInput, EnhancePromptOutput, SearchContextInput, SearchContextOutput } from "@rce-mcp/contracts";
|
|
2
|
+
import { type CandidateScoreWeights, type IndexRepository, type QueryCache, type WorkspaceRecord } from "@rce-mcp/data-plane";
|
|
3
|
+
import { type Observability } from "@rce-mcp/observability";
|
|
4
|
+
import { type ChunkingStrategy } from "./chunking.js";
|
|
5
|
+
export declare const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BASE_URL = "https://router.tumuer.me/v1";
|
|
6
|
+
export declare const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
|
|
7
|
+
export declare const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS = 2560;
|
|
8
|
+
export declare const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS = 10000;
|
|
9
|
+
export declare const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE = 64;
|
|
10
|
+
export declare const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES = 2;
|
|
11
|
+
export interface RetrievalPathBiasConfig {
|
|
12
|
+
source_path_boost: number;
|
|
13
|
+
low_priority_prefix_penalty: number;
|
|
14
|
+
low_priority_substring_penalty: number;
|
|
15
|
+
low_priority_asset_penalty: number;
|
|
16
|
+
lockfile_penalty: number;
|
|
17
|
+
doc_intent_docs_boost: number;
|
|
18
|
+
doc_intent_markdown_boost: number;
|
|
19
|
+
docs_without_doc_intent_penalty: number;
|
|
20
|
+
non_doc_markdown_penalty: number;
|
|
21
|
+
code_intent_docs_penalty: number;
|
|
22
|
+
doc_intent_source_penalty: number;
|
|
23
|
+
workspace_manifest_root_boost: number;
|
|
24
|
+
workspace_manifest_nested_boost: number;
|
|
25
|
+
ui_component_tsx_boost: number;
|
|
26
|
+
ui_component_css_penalty: number;
|
|
27
|
+
docs_archive_penalty: number;
|
|
28
|
+
public_path_penalty: number;
|
|
29
|
+
test_path_penalty: number;
|
|
30
|
+
example_path_penalty: number;
|
|
31
|
+
declaration_file_penalty: number;
|
|
32
|
+
test_intent_test_boost: number;
|
|
33
|
+
example_intent_example_boost: number;
|
|
34
|
+
filename_token_match_boost: number;
|
|
35
|
+
negation_avoid_docs_penalty: number;
|
|
36
|
+
negation_avoid_tests_penalty: number;
|
|
37
|
+
negation_avoid_examples_penalty: number;
|
|
38
|
+
negation_avoid_archive_penalty: number;
|
|
39
|
+
min_total_bias: number;
|
|
40
|
+
max_total_bias: number;
|
|
41
|
+
}
|
|
42
|
+
export interface RetrievalRerankConfig {
|
|
43
|
+
low_information_penalty: number;
|
|
44
|
+
max_chunks_per_path_default: number;
|
|
45
|
+
max_chunks_per_path_file_lookup: number;
|
|
46
|
+
same_directory_penalty: number;
|
|
47
|
+
same_extension_penalty: number;
|
|
48
|
+
}
|
|
49
|
+
export interface RetrievalScoringConfig {
|
|
50
|
+
candidate_weights: CandidateScoreWeights;
|
|
51
|
+
path_bias: RetrievalPathBiasConfig;
|
|
52
|
+
rerank: RetrievalRerankConfig;
|
|
53
|
+
}
|
|
54
|
+
export interface RetrievalEnhancerConfig {
|
|
55
|
+
max_expansion_hints: number;
|
|
56
|
+
max_candidates_pre_rerank: number;
|
|
57
|
+
rerank_timeout_ms: number;
|
|
58
|
+
}
|
|
59
|
+
export interface RetrievalChunkingConfig {
|
|
60
|
+
strategy: ChunkingStrategy;
|
|
61
|
+
fallback_strategy: "sliding";
|
|
62
|
+
parse_timeout_ms: number;
|
|
63
|
+
enabled_languages: string[];
|
|
64
|
+
}
|
|
65
|
+
export type RetrievalScoringConfigInput = Partial<{
|
|
66
|
+
candidate_weights: Partial<CandidateScoreWeights>;
|
|
67
|
+
path_bias: Partial<RetrievalPathBiasConfig>;
|
|
68
|
+
rerank: Partial<RetrievalRerankConfig>;
|
|
69
|
+
}>;
|
|
70
|
+
export type RetrievalEnhancerConfigInput = Partial<RetrievalEnhancerConfig>;
|
|
71
|
+
export type RetrievalChunkingConfigInput = Partial<{
|
|
72
|
+
strategy: ChunkingStrategy;
|
|
73
|
+
fallback_strategy: "sliding";
|
|
74
|
+
parse_timeout_ms: number;
|
|
75
|
+
enabled_languages: string[];
|
|
76
|
+
}>;
|
|
77
|
+
export declare const BASELINE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig;
|
|
78
|
+
export declare const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG: RetrievalScoringConfig;
|
|
79
|
+
export declare const DEFAULT_RETRIEVAL_ENHANCER_CONFIG: RetrievalEnhancerConfig;
|
|
80
|
+
export declare const DEFAULT_RETRIEVAL_CHUNKING_CONFIG: RetrievalChunkingConfig;
|
|
81
|
+
declare const BUILTIN_RETRIEVAL_SCORING_PROFILES: {
|
|
82
|
+
readonly baseline: RetrievalScoringConfig;
|
|
83
|
+
readonly conservative: RetrievalScoringConfig;
|
|
84
|
+
};
|
|
85
|
+
export type BuiltinRetrievalScoringProfileId = keyof typeof BUILTIN_RETRIEVAL_SCORING_PROFILES;
|
|
86
|
+
export declare function resolveRetrievalScoringProfile(profile_id: string | undefined): {
|
|
87
|
+
profile_id: BuiltinRetrievalScoringProfileId;
|
|
88
|
+
config: RetrievalScoringConfig;
|
|
89
|
+
};
|
|
90
|
+
export declare function mergeRetrievalScoringConfig(base: RetrievalScoringConfig, overrides?: RetrievalScoringConfigInput): RetrievalScoringConfig;
|
|
91
|
+
export declare function mergeRetrievalEnhancerConfig(base: RetrievalEnhancerConfig, overrides?: RetrievalEnhancerConfigInput): RetrievalEnhancerConfig;
|
|
92
|
+
export declare function mergeRetrievalChunkingConfig(base: RetrievalChunkingConfig, overrides?: RetrievalChunkingConfigInput): RetrievalChunkingConfig;
|
|
93
|
+
declare const REASON_STRINGS: readonly ["semantic match", "exact symbol match", "path and token overlap", "recently modified relevant module"];
|
|
94
|
+
export type RetrievalReason = (typeof REASON_STRINGS)[number];
|
|
95
|
+
export declare class RetrievalError extends Error {
|
|
96
|
+
readonly code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE";
|
|
97
|
+
constructor(code: "INVALID_ARGUMENT" | "NOT_FOUND" | "UPSTREAM_FAILURE", message: string);
|
|
98
|
+
}
|
|
99
|
+
export interface RawFile {
|
|
100
|
+
path: string;
|
|
101
|
+
content: string;
|
|
102
|
+
language?: string;
|
|
103
|
+
updated_at?: string;
|
|
104
|
+
generated?: boolean;
|
|
105
|
+
binary?: boolean;
|
|
106
|
+
}
|
|
107
|
+
export interface IndexUploadArtifact {
|
|
108
|
+
tenant_id: string;
|
|
109
|
+
workspace_id: string;
|
|
110
|
+
index_version: string;
|
|
111
|
+
files: RawFile[];
|
|
112
|
+
manifest?: {
|
|
113
|
+
object_key: string;
|
|
114
|
+
checksum: string;
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
export interface IndexDeltaArtifact {
|
|
118
|
+
tenant_id: string;
|
|
119
|
+
workspace_id: string;
|
|
120
|
+
index_version: string;
|
|
121
|
+
base_index_version?: string;
|
|
122
|
+
upsert_files: RawFile[];
|
|
123
|
+
deleted_paths: string[];
|
|
124
|
+
}
|
|
125
|
+
interface ReadyIndex {
|
|
126
|
+
index_id: string;
|
|
127
|
+
tenant_id: string;
|
|
128
|
+
workspace_id: string;
|
|
129
|
+
index_version: string;
|
|
130
|
+
status: "indexing" | "ready" | "failed";
|
|
131
|
+
created_at: string;
|
|
132
|
+
updated_at: string;
|
|
133
|
+
}
|
|
134
|
+
interface PersistedFile {
|
|
135
|
+
file_id: string;
|
|
136
|
+
repo_path: string;
|
|
137
|
+
content_hash: string;
|
|
138
|
+
language?: string;
|
|
139
|
+
}
|
|
140
|
+
export interface IndexingWarning {
|
|
141
|
+
path: string;
|
|
142
|
+
reason: string;
|
|
143
|
+
category: "secret_exclusion" | "filter_exclusion";
|
|
144
|
+
}
|
|
145
|
+
export interface IndexingReport {
|
|
146
|
+
workspace_id: string;
|
|
147
|
+
index_version: string;
|
|
148
|
+
status: "ready";
|
|
149
|
+
counts: {
|
|
150
|
+
added: number;
|
|
151
|
+
modified: number;
|
|
152
|
+
deleted: number;
|
|
153
|
+
unchanged: number;
|
|
154
|
+
skipped: number;
|
|
155
|
+
};
|
|
156
|
+
skipped_files: Array<{
|
|
157
|
+
path: string;
|
|
158
|
+
reason: string;
|
|
159
|
+
}>;
|
|
160
|
+
warnings: IndexingWarning[];
|
|
161
|
+
}
|
|
162
|
+
export interface RetrievalCoreOptions {
|
|
163
|
+
cacheTtlSeconds?: number;
|
|
164
|
+
embeddingProvider?: EmbeddingProvider;
|
|
165
|
+
embeddingDescriptor?: EmbeddingDescriptor;
|
|
166
|
+
observability?: Observability;
|
|
167
|
+
scoringProfile?: BuiltinRetrievalScoringProfileId;
|
|
168
|
+
scoringProfileId?: string;
|
|
169
|
+
scoringConfig?: RetrievalScoringConfigInput;
|
|
170
|
+
enhancerConfig?: RetrievalEnhancerConfigInput;
|
|
171
|
+
chunkingConfig?: RetrievalChunkingConfigInput;
|
|
172
|
+
enhancerDecisionTraceEnabled?: boolean;
|
|
173
|
+
}
|
|
174
|
+
export interface EmbeddingDescriptor {
|
|
175
|
+
provider: string;
|
|
176
|
+
model?: string;
|
|
177
|
+
dimensions: number;
|
|
178
|
+
version?: string;
|
|
179
|
+
}
|
|
180
|
+
export type EmbeddingPurpose = "index" | "query";
|
|
181
|
+
export interface EmbeddingProvider {
|
|
182
|
+
embed(input: {
|
|
183
|
+
texts: string[];
|
|
184
|
+
purpose: EmbeddingPurpose;
|
|
185
|
+
}): Promise<number[][]>;
|
|
186
|
+
describe?(): EmbeddingDescriptor;
|
|
187
|
+
}
|
|
188
|
+
export interface DeterministicEmbeddingProviderOptions {
|
|
189
|
+
dimensions?: number;
|
|
190
|
+
model?: string;
|
|
191
|
+
version?: string;
|
|
192
|
+
}
|
|
193
|
+
export interface OpenAICompatibleEmbeddingProviderOptions {
|
|
194
|
+
base_url: string;
|
|
195
|
+
api_key: string;
|
|
196
|
+
model?: string;
|
|
197
|
+
dimensions?: number;
|
|
198
|
+
timeout_ms?: number;
|
|
199
|
+
batch_size?: number;
|
|
200
|
+
max_retries?: number;
|
|
201
|
+
observability?: Observability;
|
|
202
|
+
}
|
|
203
|
+
export declare class DeterministicEmbeddingProvider implements EmbeddingProvider {
|
|
204
|
+
private readonly dimensions;
|
|
205
|
+
private readonly model;
|
|
206
|
+
private readonly version;
|
|
207
|
+
constructor(options?: DeterministicEmbeddingProviderOptions);
|
|
208
|
+
embed(input: {
|
|
209
|
+
texts: string[];
|
|
210
|
+
purpose: EmbeddingPurpose;
|
|
211
|
+
}): Promise<number[][]>;
|
|
212
|
+
describe(): EmbeddingDescriptor;
|
|
213
|
+
}
|
|
214
|
+
export declare class OpenAICompatibleEmbeddingProvider implements EmbeddingProvider {
|
|
215
|
+
private readonly baseUrl;
|
|
216
|
+
private readonly endpoint;
|
|
217
|
+
private readonly apiKey;
|
|
218
|
+
private readonly model;
|
|
219
|
+
private readonly dimensions;
|
|
220
|
+
private readonly timeoutMs;
|
|
221
|
+
private readonly batchSize;
|
|
222
|
+
private readonly maxRetries;
|
|
223
|
+
private readonly observability;
|
|
224
|
+
constructor(options: OpenAICompatibleEmbeddingProviderOptions);
|
|
225
|
+
describe(): EmbeddingDescriptor;
|
|
226
|
+
embed(input: {
|
|
227
|
+
texts: string[];
|
|
228
|
+
purpose: EmbeddingPurpose;
|
|
229
|
+
}): Promise<number[][]>;
|
|
230
|
+
private embedBatchWithRetries;
|
|
231
|
+
private embedBatchOnce;
|
|
232
|
+
private retryDelayMs;
|
|
233
|
+
private toProviderFailure;
|
|
234
|
+
}
|
|
235
|
+
export declare class InMemoryIndexStore implements IndexRepository {
|
|
236
|
+
private readonly workspaces;
|
|
237
|
+
private readonly workspacesByPath;
|
|
238
|
+
private readonly indexes;
|
|
239
|
+
private readonly workspaceIndexes;
|
|
240
|
+
private readonly filesByIndex;
|
|
241
|
+
private readonly chunksByFile;
|
|
242
|
+
private readonly indexMetadata;
|
|
243
|
+
migrate(): Promise<void>;
|
|
244
|
+
upsertWorkspace(input: WorkspaceRecord): Promise<void>;
|
|
245
|
+
resolveWorkspaceByProjectRoot(tenant_id: string, project_root_path: string): Promise<WorkspaceRecord | undefined>;
|
|
246
|
+
resolveWorkspaceByWorkspaceId(tenant_id: string, workspace_id: string): Promise<WorkspaceRecord | undefined>;
|
|
247
|
+
createIndexVersion(input: {
|
|
248
|
+
tenant_id: string;
|
|
249
|
+
workspace_id: string;
|
|
250
|
+
index_version: string;
|
|
251
|
+
status?: "indexing" | "ready" | "failed";
|
|
252
|
+
}): Promise<ReadyIndex>;
|
|
253
|
+
markIndexStatus(input: {
|
|
254
|
+
tenant_id: string;
|
|
255
|
+
workspace_id: string;
|
|
256
|
+
index_id: string;
|
|
257
|
+
status: "indexing" | "ready" | "failed";
|
|
258
|
+
}): Promise<void>;
|
|
259
|
+
getIndexByVersion(input: {
|
|
260
|
+
tenant_id: string;
|
|
261
|
+
workspace_id: string;
|
|
262
|
+
index_version: string;
|
|
263
|
+
}): Promise<ReadyIndex | undefined>;
|
|
264
|
+
resetIndexContent(input: {
|
|
265
|
+
tenant_id: string;
|
|
266
|
+
index_id: string;
|
|
267
|
+
}): Promise<void>;
|
|
268
|
+
getLatestReadyIndex(input: {
|
|
269
|
+
tenant_id: string;
|
|
270
|
+
workspace_id: string;
|
|
271
|
+
}): Promise<ReadyIndex | undefined>;
|
|
272
|
+
getFilesByIndex(input: {
|
|
273
|
+
tenant_id: string;
|
|
274
|
+
index_id: string;
|
|
275
|
+
}): Promise<PersistedFile[]>;
|
|
276
|
+
copyFileFromIndex(input: {
|
|
277
|
+
tenant_id: string;
|
|
278
|
+
source_index_id: string;
|
|
279
|
+
target_index_id: string;
|
|
280
|
+
repo_path: string;
|
|
281
|
+
}): Promise<void>;
|
|
282
|
+
upsertFile(input: {
|
|
283
|
+
tenant_id: string;
|
|
284
|
+
index_id: string;
|
|
285
|
+
repo_path: string;
|
|
286
|
+
content_hash: string;
|
|
287
|
+
size_bytes: number;
|
|
288
|
+
language?: string;
|
|
289
|
+
}): Promise<{
|
|
290
|
+
file_id: string;
|
|
291
|
+
}>;
|
|
292
|
+
replaceFileChunks(input: {
|
|
293
|
+
tenant_id: string;
|
|
294
|
+
file_id: string;
|
|
295
|
+
repo_path: string;
|
|
296
|
+
chunks: Array<{
|
|
297
|
+
start_line: number;
|
|
298
|
+
end_line: number;
|
|
299
|
+
snippet: string;
|
|
300
|
+
embedding: number[];
|
|
301
|
+
generated?: boolean;
|
|
302
|
+
updated_at?: string;
|
|
303
|
+
}>;
|
|
304
|
+
}): Promise<void>;
|
|
305
|
+
saveManifest(): Promise<void>;
|
|
306
|
+
saveIndexMetadata(input: {
|
|
307
|
+
tenant_id: string;
|
|
308
|
+
index_id: string;
|
|
309
|
+
embedding_provider: string;
|
|
310
|
+
embedding_model?: string;
|
|
311
|
+
embedding_dimensions: number;
|
|
312
|
+
embedding_version?: string;
|
|
313
|
+
chunking_strategy: "language_aware" | "sliding";
|
|
314
|
+
chunking_fallback_strategy: "sliding";
|
|
315
|
+
}): Promise<void>;
|
|
316
|
+
getIndexMetadata(input: {
|
|
317
|
+
tenant_id: string;
|
|
318
|
+
index_id: string;
|
|
319
|
+
}): Promise<{
|
|
320
|
+
embedding_provider: string;
|
|
321
|
+
embedding_model?: string;
|
|
322
|
+
embedding_dimensions: number;
|
|
323
|
+
embedding_version?: string;
|
|
324
|
+
chunking_strategy: "language_aware" | "sliding";
|
|
325
|
+
chunking_fallback_strategy: "sliding";
|
|
326
|
+
created_at: string;
|
|
327
|
+
} | undefined>;
|
|
328
|
+
listChunksByIndex(input: {
|
|
329
|
+
tenant_id: string;
|
|
330
|
+
index_id: string;
|
|
331
|
+
filters?: {
|
|
332
|
+
language?: string;
|
|
333
|
+
path_prefix?: string;
|
|
334
|
+
glob?: string;
|
|
335
|
+
};
|
|
336
|
+
}): Promise<Array<{
|
|
337
|
+
chunk_id: string;
|
|
338
|
+
file_id: string;
|
|
339
|
+
path: string;
|
|
340
|
+
start_line: number;
|
|
341
|
+
end_line: number;
|
|
342
|
+
snippet: string;
|
|
343
|
+
language?: string;
|
|
344
|
+
generated?: boolean;
|
|
345
|
+
updated_at: string;
|
|
346
|
+
embedding: number[];
|
|
347
|
+
}>>;
|
|
348
|
+
}
|
|
349
|
+
export declare class RetrievalCore {
|
|
350
|
+
private readonly store;
|
|
351
|
+
private readonly cache;
|
|
352
|
+
private readonly cacheTtlSeconds;
|
|
353
|
+
private readonly embeddingProvider;
|
|
354
|
+
private readonly embeddingDescriptor;
|
|
355
|
+
private readonly observability;
|
|
356
|
+
private readonly scoringConfig;
|
|
357
|
+
private readonly scoringProfileId;
|
|
358
|
+
private readonly scoringConfigChecksum;
|
|
359
|
+
private readonly enhancerConfig;
|
|
360
|
+
private readonly chunkingConfig;
|
|
361
|
+
private readonly enhancerDecisionTraceEnabled;
|
|
362
|
+
private cacheHits;
|
|
363
|
+
private cacheMisses;
|
|
364
|
+
constructor(store: IndexRepository, cache: QueryCache, options?: RetrievalCoreOptions);
|
|
365
|
+
indexArtifact(artifact: IndexUploadArtifact): Promise<IndexingReport>;
|
|
366
|
+
indexArtifactDelta(artifact: IndexDeltaArtifact): Promise<IndexingReport>;
|
|
367
|
+
getIndexVersion(input: {
|
|
368
|
+
tenant_id: string;
|
|
369
|
+
workspace_id: string;
|
|
370
|
+
index_version: string;
|
|
371
|
+
}): Promise<{
|
|
372
|
+
index_id: string;
|
|
373
|
+
status: "indexing" | "ready" | "failed";
|
|
374
|
+
} | undefined>;
|
|
375
|
+
searchContext(input: {
|
|
376
|
+
trace_id: string;
|
|
377
|
+
tenant_id: string;
|
|
378
|
+
workspace_id: string;
|
|
379
|
+
request: SearchContextInput;
|
|
380
|
+
}): Promise<SearchContextOutput>;
|
|
381
|
+
enhancePrompt(input: {
|
|
382
|
+
trace_id: string;
|
|
383
|
+
tenant_id: string;
|
|
384
|
+
workspace_id?: string;
|
|
385
|
+
request: EnhancePromptInput;
|
|
386
|
+
}): Promise<EnhancePromptOutput>;
|
|
387
|
+
}
|
|
388
|
+
export declare function createDefaultRetrievalCore(): RetrievalCore;
|
|
389
|
+
export declare function seedWorkspaceIndex(core: RetrievalCore, artifact: IndexUploadArtifact): Promise<IndexingReport>;
|
|
390
|
+
export * from "./remote-sync.js";
|