@rce-mcp/retrieval-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3417 @@
1
+ import { createHash, randomUUID } from "node:crypto";
2
+ import { buildQueryCacheKey } from "@rce-mcp/data-plane";
3
+ import { InMemoryQueryCache } from "@rce-mcp/data-plane";
4
+ import { getObservability } from "@rce-mcp/observability";
5
+ import { buildChunksForFile, getChunkingParserAvailabilitySnapshot } from "./chunking.js";
6
+ const MAX_FILE_SIZE_BYTES = 1_000_000;
7
+ const MAX_CHUNKS_PER_FILE = 300;
8
+ const TARGET_CHUNK_TOKENS = 220;
9
+ const CHUNK_OVERLAP_TOKENS = 40;
10
+ const MAX_TOP_K = 20;
11
+ const MAX_CONTEXT_BUDGET_TOKENS = 12_000;
12
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BASE_URL = "https://router.tumuer.me/v1";
13
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B";
14
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS = 2560;
15
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS = 10_000;
16
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE = 64;
17
+ export const DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES = 2;
18
+ const DEFAULT_CANDIDATE_SCORE_WEIGHTS = {
19
+ lexical_weight: 0.6,
20
+ vector_weight: 0.4,
21
+ path_match_boost: 0.2,
22
+ recency_boost: 0.1,
23
+ generated_penalty: 0.2
24
+ };
25
+ export const BASELINE_RETRIEVAL_SCORING_CONFIG = {
26
+ candidate_weights: { ...DEFAULT_CANDIDATE_SCORE_WEIGHTS },
27
+ path_bias: {
28
+ source_path_boost: 0.12,
29
+ low_priority_prefix_penalty: 0.15,
30
+ low_priority_substring_penalty: 0.15,
31
+ low_priority_asset_penalty: 0.2,
32
+ lockfile_penalty: 0.2,
33
+ doc_intent_docs_boost: 0.18,
34
+ doc_intent_markdown_boost: 0.08,
35
+ docs_without_doc_intent_penalty: 0.2,
36
+ non_doc_markdown_penalty: 0.15,
37
+ code_intent_docs_penalty: 0.08,
38
+ doc_intent_source_penalty: 0.08,
39
+ workspace_manifest_root_boost: 0.26,
40
+ workspace_manifest_nested_boost: 0.16,
41
+ ui_component_tsx_boost: 0.18,
42
+ ui_component_css_penalty: 0.2,
43
+ docs_archive_penalty: 0.15,
44
+ public_path_penalty: 0.08,
45
+ test_path_penalty: 0.35,
46
+ example_path_penalty: 0.2,
47
+ declaration_file_penalty: 0.12,
48
+ test_intent_test_boost: 0.08,
49
+ example_intent_example_boost: 0.06,
50
+ filename_token_match_boost: 0.06,
51
+ negation_avoid_docs_penalty: 0.35,
52
+ negation_avoid_tests_penalty: 0.35,
53
+ negation_avoid_examples_penalty: 0.3,
54
+ negation_avoid_archive_penalty: 0.35,
55
+ min_total_bias: -0.45,
56
+ max_total_bias: 0.35
57
+ },
58
+ rerank: {
59
+ low_information_penalty: 0.15,
60
+ max_chunks_per_path_default: 2,
61
+ max_chunks_per_path_file_lookup: 1,
62
+ same_directory_penalty: 0,
63
+ same_extension_penalty: 0
64
+ }
65
+ };
66
+ export const CONSERVATIVE_RETRIEVAL_SCORING_CONFIG = {
67
+ candidate_weights: {
68
+ lexical_weight: 0.55,
69
+ vector_weight: 0.45,
70
+ path_match_boost: 0.1,
71
+ recency_boost: 0.05,
72
+ generated_penalty: 0.15
73
+ },
74
+ path_bias: {
75
+ source_path_boost: 0.08,
76
+ low_priority_prefix_penalty: 0.08,
77
+ low_priority_substring_penalty: 0.08,
78
+ low_priority_asset_penalty: 0.12,
79
+ lockfile_penalty: 0.12,
80
+ doc_intent_docs_boost: 0.1,
81
+ doc_intent_markdown_boost: 0.05,
82
+ docs_without_doc_intent_penalty: 0.08,
83
+ non_doc_markdown_penalty: 0.08,
84
+ code_intent_docs_penalty: 0.05,
85
+ doc_intent_source_penalty: 0.05,
86
+ workspace_manifest_root_boost: 0.14,
87
+ workspace_manifest_nested_boost: 0.1,
88
+ ui_component_tsx_boost: 0.1,
89
+ ui_component_css_penalty: 0.1,
90
+ docs_archive_penalty: 0.1,
91
+ public_path_penalty: 0.05,
92
+ test_path_penalty: 0.12,
93
+ example_path_penalty: 0.06,
94
+ declaration_file_penalty: 0.08,
95
+ test_intent_test_boost: 0.04,
96
+ example_intent_example_boost: 0.04,
97
+ filename_token_match_boost: 0.04,
98
+ negation_avoid_docs_penalty: 0.2,
99
+ negation_avoid_tests_penalty: 0.2,
100
+ negation_avoid_examples_penalty: 0.16,
101
+ negation_avoid_archive_penalty: 0.2,
102
+ min_total_bias: -0.25,
103
+ max_total_bias: 0.2
104
+ },
105
+ rerank: {
106
+ low_information_penalty: 0.1,
107
+ max_chunks_per_path_default: 2,
108
+ max_chunks_per_path_file_lookup: 1,
109
+ same_directory_penalty: 0,
110
+ same_extension_penalty: 0
111
+ }
112
+ };
113
+ export const DEFAULT_RETRIEVAL_ENHANCER_CONFIG = {
114
+ max_expansion_hints: 24,
115
+ max_candidates_pre_rerank: 4,
116
+ rerank_timeout_ms: 40
117
+ };
118
+ export const DEFAULT_RETRIEVAL_CHUNKING_CONFIG = {
119
+ strategy: "sliding",
120
+ fallback_strategy: "sliding",
121
+ parse_timeout_ms: 80,
122
+ enabled_languages: ["typescript", "javascript", "python", "go"]
123
+ };
124
+ const BUILTIN_RETRIEVAL_SCORING_PROFILES = {
125
+ baseline: BASELINE_RETRIEVAL_SCORING_CONFIG,
126
+ conservative: CONSERVATIVE_RETRIEVAL_SCORING_CONFIG
127
+ };
128
+ function deepCloneScoringConfig(config) {
129
+ return {
130
+ candidate_weights: { ...config.candidate_weights },
131
+ path_bias: { ...config.path_bias },
132
+ rerank: { ...config.rerank }
133
+ };
134
+ }
135
+ function assertFiniteNumber(value, label) {
136
+ if (typeof value !== "number" || !Number.isFinite(value)) {
137
+ throw new Error(`invalid retrieval scoring config: ${label} must be a finite number`);
138
+ }
139
+ return value;
140
+ }
141
+ function validateScoringConfig(config) {
142
+ const weights = config.candidate_weights;
143
+ const pathBias = config.path_bias;
144
+ const rerank = config.rerank;
145
+ assertFiniteNumber(weights.lexical_weight, "candidate_weights.lexical_weight");
146
+ assertFiniteNumber(weights.vector_weight, "candidate_weights.vector_weight");
147
+ assertFiniteNumber(weights.path_match_boost, "candidate_weights.path_match_boost");
148
+ assertFiniteNumber(weights.recency_boost, "candidate_weights.recency_boost");
149
+ assertFiniteNumber(weights.generated_penalty, "candidate_weights.generated_penalty");
150
+ const weightSum = weights.lexical_weight + weights.vector_weight;
151
+ if (Math.abs(weightSum - 1) > 0.001) {
152
+ throw new Error("invalid retrieval scoring config: lexical_weight + vector_weight must equal 1");
153
+ }
154
+ for (const [key, value] of Object.entries(pathBias)) {
155
+ assertFiniteNumber(value, `path_bias.${key}`);
156
+ }
157
+ if (pathBias.min_total_bias > pathBias.max_total_bias) {
158
+ throw new Error("invalid retrieval scoring config: path_bias.min_total_bias must be <= max_total_bias");
159
+ }
160
+ assertFiniteNumber(rerank.low_information_penalty, "rerank.low_information_penalty");
161
+ if (!Number.isInteger(rerank.max_chunks_per_path_default) || rerank.max_chunks_per_path_default <= 0) {
162
+ throw new Error("invalid retrieval scoring config: rerank.max_chunks_per_path_default must be a positive integer");
163
+ }
164
+ if (!Number.isInteger(rerank.max_chunks_per_path_file_lookup) || rerank.max_chunks_per_path_file_lookup <= 0) {
165
+ throw new Error("invalid retrieval scoring config: rerank.max_chunks_per_path_file_lookup must be a positive integer");
166
+ }
167
+ assertFiniteNumber(rerank.same_directory_penalty, "rerank.same_directory_penalty");
168
+ assertFiniteNumber(rerank.same_extension_penalty, "rerank.same_extension_penalty");
169
+ if (rerank.same_directory_penalty < 0) {
170
+ throw new Error("invalid retrieval scoring config: rerank.same_directory_penalty must be >= 0");
171
+ }
172
+ if (rerank.same_extension_penalty < 0) {
173
+ throw new Error("invalid retrieval scoring config: rerank.same_extension_penalty must be >= 0");
174
+ }
175
+ }
176
+ export function resolveRetrievalScoringProfile(profile_id) {
177
+ const normalized = (profile_id ?? "baseline").trim().toLowerCase();
178
+ if (!(normalized in BUILTIN_RETRIEVAL_SCORING_PROFILES)) {
179
+ throw new Error(`unknown retrieval scoring profile "${profile_id ?? ""}". Expected one of: ${Object.keys(BUILTIN_RETRIEVAL_SCORING_PROFILES).join(", ")}`);
180
+ }
181
+ return {
182
+ profile_id: normalized,
183
+ config: deepCloneScoringConfig(BUILTIN_RETRIEVAL_SCORING_PROFILES[normalized])
184
+ };
185
+ }
186
+ export function mergeRetrievalScoringConfig(base, overrides) {
187
+ const next = deepCloneScoringConfig(base);
188
+ if (overrides?.candidate_weights) {
189
+ next.candidate_weights = {
190
+ ...next.candidate_weights,
191
+ ...overrides.candidate_weights
192
+ };
193
+ }
194
+ if (overrides?.path_bias) {
195
+ next.path_bias = {
196
+ ...next.path_bias,
197
+ ...overrides.path_bias
198
+ };
199
+ }
200
+ if (overrides?.rerank) {
201
+ next.rerank = {
202
+ ...next.rerank,
203
+ ...overrides.rerank
204
+ };
205
+ }
206
+ validateScoringConfig(next);
207
+ return next;
208
+ }
209
+ function validateEnhancerConfig(config) {
210
+ if (!Number.isInteger(config.max_expansion_hints) || config.max_expansion_hints <= 0) {
211
+ throw new Error("invalid retrieval enhancer config: max_expansion_hints must be a positive integer");
212
+ }
213
+ if (!Number.isInteger(config.max_candidates_pre_rerank) || config.max_candidates_pre_rerank <= 0) {
214
+ throw new Error("invalid retrieval enhancer config: max_candidates_pre_rerank must be a positive integer");
215
+ }
216
+ if (!Number.isInteger(config.rerank_timeout_ms) || config.rerank_timeout_ms <= 0) {
217
+ throw new Error("invalid retrieval enhancer config: rerank_timeout_ms must be a positive integer");
218
+ }
219
+ }
220
+ export function mergeRetrievalEnhancerConfig(base, overrides) {
221
+ const next = {
222
+ ...base,
223
+ ...(overrides ?? {})
224
+ };
225
+ validateEnhancerConfig(next);
226
+ return next;
227
+ }
228
+ function normalizeChunkingLanguageList(value) {
229
+ const deduped = new Set();
230
+ for (const language of value) {
231
+ const normalized = language.trim().toLowerCase();
232
+ if (normalized.length === 0) {
233
+ continue;
234
+ }
235
+ deduped.add(normalized);
236
+ }
237
+ return [...deduped];
238
+ }
239
+ function validateChunkingConfig(config) {
240
+ if (!["language_aware", "sliding"].includes(config.strategy)) {
241
+ throw new Error("invalid retrieval chunking config: strategy must be language_aware|sliding");
242
+ }
243
+ if (config.fallback_strategy !== "sliding") {
244
+ throw new Error("invalid retrieval chunking config: fallback_strategy must be sliding");
245
+ }
246
+ if (!Number.isInteger(config.parse_timeout_ms) || config.parse_timeout_ms <= 0) {
247
+ throw new Error("invalid retrieval chunking config: parse_timeout_ms must be a positive integer");
248
+ }
249
+ if (!Array.isArray(config.enabled_languages) || config.enabled_languages.length === 0) {
250
+ throw new Error("invalid retrieval chunking config: enabled_languages must include at least one language");
251
+ }
252
+ for (const language of config.enabled_languages) {
253
+ if (typeof language !== "string" || language.trim().length === 0) {
254
+ throw new Error("invalid retrieval chunking config: enabled_languages must contain non-empty strings");
255
+ }
256
+ }
257
+ }
258
+ export function mergeRetrievalChunkingConfig(base, overrides) {
259
+ const next = {
260
+ ...base,
261
+ ...(overrides ?? {}),
262
+ enabled_languages: normalizeChunkingLanguageList(overrides?.enabled_languages ?? base.enabled_languages)
263
+ };
264
+ validateChunkingConfig(next);
265
+ return next;
266
+ }
267
+ function stableSerialize(value) {
268
+ if (Array.isArray(value)) {
269
+ return `[${value.map((entry) => stableSerialize(entry)).join(",")}]`;
270
+ }
271
+ if (value && typeof value === "object") {
272
+ const entries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b));
273
+ return `{${entries.map(([k, v]) => `${JSON.stringify(k)}:${stableSerialize(v)}`).join(",")}}`;
274
+ }
275
+ return JSON.stringify(value);
276
+ }
277
+ function scoringConfigChecksum(config) {
278
+ return sha256(stableSerialize(config)).slice(0, 12);
279
+ }
280
+ const REASON_STRINGS = [
281
+ "semantic match",
282
+ "exact symbol match",
283
+ "path and token overlap",
284
+ "recently modified relevant module"
285
+ ];
286
+ export class RetrievalError extends Error {
287
+ code;
288
+ constructor(code, message) {
289
+ super(message);
290
+ this.code = code;
291
+ }
292
+ }
293
+ class EmbeddingProviderRequestError extends Error {
294
+ reason;
295
+ retryable;
296
+ constructor(reason, retryable, message) {
297
+ super(message);
298
+ this.reason = reason;
299
+ this.retryable = retryable;
300
+ }
301
+ }
302
+ const SECRET_PATTERNS = [
303
+ { label: "aws_access_key", pattern: /AKIA[0-9A-Z]{16}/g },
304
+ { label: "private_key", pattern: /-----BEGIN (?:RSA|EC|OPENSSH) PRIVATE KEY-----/g },
305
+ { label: "generic_secret_key", pattern: /(?:api|secret|access)[_-]?key\s*[:=]\s*["']?[A-Za-z0-9_\-]{16,}["']?/gi },
306
+ { label: "bearer_token", pattern: /Bearer\s+[A-Za-z0-9._\-]{10,}/gi }
307
+ ];
308
+ function normalizePath(path) {
309
+ return path.replace(/\\/g, "/").replace(/\/+/g, "/").replace(/^\.\//, "").trim();
310
+ }
311
+ function normalizeQuery(query) {
312
+ return query.normalize("NFKC").trim();
313
+ }
314
+ function sha256(value) {
315
+ return createHash("sha256").update(value).digest("hex");
316
+ }
317
+ function pluralizeToken(token) {
318
+ if (!/^[a-z][a-z0-9]{2,}$/.test(token)) {
319
+ return undefined;
320
+ }
321
+ if (token.endsWith("s")) {
322
+ return undefined;
323
+ }
324
+ if (token.endsWith("y") && token.length > 3) {
325
+ return `${token.slice(0, -1)}ies`;
326
+ }
327
+ return `${token}s`;
328
+ }
329
+ function singularizeToken(token) {
330
+ if (!/^[a-z][a-z0-9]{2,}$/.test(token)) {
331
+ return undefined;
332
+ }
333
+ if (token.endsWith("ies") && token.length > 4) {
334
+ return `${token.slice(0, -3)}y`;
335
+ }
336
+ if (token.endsWith("es") && token.length > 4) {
337
+ return token.slice(0, -2);
338
+ }
339
+ if (token.endsWith("s") && !token.endsWith("ss") && token.length > 3) {
340
+ return token.slice(0, -1);
341
+ }
342
+ return undefined;
343
+ }
344
+ function tokenize(text) {
345
+ const coarseTokens = text
346
+ .split(/[^a-z0-9_./-]+/)
347
+ .map((token) => token.trim())
348
+ .filter(Boolean);
349
+ const expandedTokens = new Set();
350
+ const addToken = (value) => {
351
+ const normalized = value.trim().toLowerCase();
352
+ if (!normalized) {
353
+ return;
354
+ }
355
+ expandedTokens.add(normalized);
356
+ const singular = singularizeToken(normalized);
357
+ if (singular) {
358
+ expandedTokens.add(singular);
359
+ }
360
+ const plural = pluralizeToken(normalized);
361
+ if (plural) {
362
+ expandedTokens.add(plural);
363
+ }
364
+ };
365
+ for (const token of coarseTokens) {
366
+ addToken(token);
367
+ for (const part of token.split(/[./_-]+/).filter(Boolean)) {
368
+ addToken(part);
369
+ const camelSplit = part
370
+ .replace(/([a-z0-9])([A-Z])/g, "$1 $2")
371
+ .split(/\s+/)
372
+ .map((segment) => segment.trim().toLowerCase())
373
+ .filter(Boolean);
374
+ for (const segment of camelSplit) {
375
+ addToken(segment);
376
+ }
377
+ }
378
+ }
379
+ return [...expandedTokens];
380
+ }
381
+ function lexicalScore(query, haystack) {
382
+ const q = new Set(tokenize(query));
383
+ if (q.size === 0) {
384
+ return 0;
385
+ }
386
+ const h = new Set(tokenize(haystack));
387
+ let overlap = 0;
388
+ for (const token of q) {
389
+ if (h.has(token)) {
390
+ overlap += 1;
391
+ }
392
+ }
393
+ return overlap / q.size;
394
+ }
395
+ function cosineSimilarity(a, b) {
396
+ if (a.length === 0 || b.length === 0) {
397
+ return 0;
398
+ }
399
+ const max = Math.min(a.length, b.length);
400
+ let dot = 0;
401
+ let normA = 0;
402
+ let normB = 0;
403
+ for (let i = 0; i < max; i += 1) {
404
+ dot += (a[i] ?? 0) * (b[i] ?? 0);
405
+ normA += (a[i] ?? 0) * (a[i] ?? 0);
406
+ normB += (b[i] ?? 0) * (b[i] ?? 0);
407
+ }
408
+ if (normA === 0 || normB === 0) {
409
+ return 0;
410
+ }
411
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB));
412
+ }
413
+ function looksLowInformation(snippet) {
414
+ const trimmed = snippet.trim();
415
+ if (trimmed.length < 12) {
416
+ return true;
417
+ }
418
+ const noWhitespace = trimmed.replace(/\s+/g, "");
419
+ return /^(.)\1+$/.test(noWhitespace);
420
+ }
421
+ function chooseReason(input) {
422
+ if (input.path_match) {
423
+ return "exact symbol match";
424
+ }
425
+ if (input.recency_boosted) {
426
+ return "recently modified relevant module";
427
+ }
428
+ if (input.lexical > 0.3) {
429
+ return "path and token overlap";
430
+ }
431
+ return "semantic match";
432
+ }
433
+ const DOC_INTENT_TOKENS = new Set([
434
+ "adr",
435
+ "architecture",
436
+ "design",
437
+ "doc",
438
+ "docs",
439
+ "documentation",
440
+ "milestone",
441
+ "readme",
442
+ "spec",
443
+ "specification"
444
+ ]);
445
+ const CODE_INTENT_TOKENS = new Set([
446
+ "api",
447
+ "bug",
448
+ "bugfix",
449
+ "cli",
450
+ "code",
451
+ "command",
452
+ "component",
453
+ "error",
454
+ "entrypoint",
455
+ "feature",
456
+ "fix",
457
+ "function",
458
+ "handler",
459
+ "harden",
460
+ "hook",
461
+ "hooks",
462
+ "implement",
463
+ "implementation",
464
+ "middleware",
465
+ "module",
466
+ "optimize",
467
+ "pipeline",
468
+ "react",
469
+ "refactor",
470
+ "root",
471
+ "route",
472
+ "routing",
473
+ "source",
474
+ "struct",
475
+ "ts",
476
+ "type",
477
+ "typescript",
478
+ "tsx",
479
+ "ui"
480
+ ]);
481
+ const WORKSPACE_MANIFEST_TOKENS = new Set(["cargo", "members", "resolver", "workspace", "rust"]);
482
+ const UI_COMPONENT_TOKENS = new Set(["component", "layout", "react", "tsx", "ui"]);
483
+ const FILE_LOOKUP_TOKENS = new Set(["entrypoint", "file", "locate", "path", "where", "which"]);
484
+ const TEST_INTENT_TOKENS = new Set(["assert", "coverage", "e2e", "integration", "spec", "test", "tests", "unit"]);
485
+ const EXAMPLE_INTENT_TOKENS = new Set(["demo", "example", "examples", "sample", "tutorial"]);
486
+ const SOURCE_PATH_PREFIXES = ["src/", "app/", "apps/", "crates/", "internal/", "lib/", "package/", "packages/"];
487
+ const LOW_PRIORITY_PATH_PREFIXES = [
488
+ ".next/",
489
+ ".nuxt/",
490
+ ".svelte-kit/",
491
+ "build/",
492
+ "coverage/",
493
+ "dist/",
494
+ "node_modules/",
495
+ "out/",
496
+ "target/"
497
+ ];
498
+ const LOW_PRIORITY_PATH_SUBSTRINGS = ["/docs/_archive/", "/node_modules/", "/target/"];
499
+ const LOW_PRIORITY_ASSET_EXTENSIONS = [".gif", ".ico", ".jpeg", ".jpg", ".pdf", ".png", ".svg", ".webp"];
500
+ const LOW_PRIORITY_LOCK_FILES = [
501
+ "bun.lock",
502
+ "bun.lockb",
503
+ "cargo.lock",
504
+ "composer.lock",
505
+ "gemfile.lock",
506
+ "package-lock.json",
507
+ "pipfile.lock",
508
+ "pnpm-lock.yaml",
509
+ "yarn.lock"
510
+ ];
511
+ function hasNegationNearTarget(normalized, targetPattern) {
512
+ const negationPrefix = "(?:avoid|avoiding|exclude|excluding|without|skip|omit|ignore|deprioriti(?:s|z)e|not|no|leave\\s+out|keep\\s+[^\\n]{0,40}?\\s+out\\s+of)";
513
+ return (new RegExp(`\\b${negationPrefix}\\b[^\\n,.!?;]{0,48}\\b(?:${targetPattern})\\b`, "i").test(normalized) ||
514
+ new RegExp(`\\b(?:${targetPattern})\\b[^\\n,.!?;]{0,24}\\b(?:not|avoid|exclude|without|skip|omit)\\b`, "i").test(normalized));
515
+ }
516
+ function detectNegativePathPreferences(query) {
517
+ const normalized = query.toLowerCase();
518
+ return {
519
+ avoid_docs: hasNegationNearTarget(normalized, "docs?|documentation|readme|specs?|guides?|markdown"),
520
+ avoid_tests: hasNegationNearTarget(normalized, "tests?|specs?|fixtures?|__tests__"),
521
+ avoid_examples: hasNegationNearTarget(normalized, "examples?|samples?|demos?|tutorials?"),
522
+ avoid_archive: hasNegationNearTarget(normalized, "archive|archived|_archive")
523
+ };
524
+ }
525
+ function hasDocIntent(tokens) {
526
+ return tokens.some((token) => DOC_INTENT_TOKENS.has(token));
527
+ }
528
+ function hasCodeIntent(tokens) {
529
+ return tokens.some((token) => CODE_INTENT_TOKENS.has(token));
530
+ }
531
+ function hasWorkspaceManifestIntent(tokens) {
532
+ return tokens.some((token) => WORKSPACE_MANIFEST_TOKENS.has(token));
533
+ }
534
+ function hasUiComponentIntent(tokens) {
535
+ return tokens.some((token) => UI_COMPONENT_TOKENS.has(token));
536
+ }
537
+ function hasFileLookupIntent(tokens) {
538
+ return tokens.some((token) => FILE_LOOKUP_TOKENS.has(token));
539
+ }
540
+ function hasTestIntent(tokens) {
541
+ return tokens.some((token) => TEST_INTENT_TOKENS.has(token));
542
+ }
543
+ function hasExampleIntent(tokens) {
544
+ return tokens.some((token) => EXAMPLE_INTENT_TOKENS.has(token));
545
+ }
546
+ function pathQualityBias(path, queryTokens, config, queryText) {
547
+ const normalizedPath = path.toLowerCase();
548
+ const docIntent = hasDocIntent(queryTokens);
549
+ const codeIntent = hasCodeIntent(queryTokens);
550
+ const docsPreferred = docIntent && !codeIntent;
551
+ const negativePreferences = detectNegativePathPreferences(queryText ?? queryTokens.join(" "));
552
+ const workspaceManifestIntent = hasWorkspaceManifestIntent(queryTokens);
553
+ const uiComponentIntent = hasUiComponentIntent(queryTokens);
554
+ const testIntent = hasTestIntent(queryTokens);
555
+ const exampleIntent = hasExampleIntent(queryTokens);
556
+ let bias = 0;
557
+ const pathBias = config.path_bias;
558
+ const isSourcePath = SOURCE_PATH_PREFIXES.some((prefix) => normalizedPath.startsWith(prefix) || normalizedPath.includes(`/${prefix}`));
559
+ const isDocsPath = normalizedPath.startsWith("docs/") || normalizedPath.includes("/docs/");
560
+ const isMarkdown = normalizedPath.endsWith(".md");
561
+ const isTestPath = normalizedPath.startsWith("test/") ||
562
+ normalizedPath.startsWith("tests/") ||
563
+ normalizedPath.startsWith("__tests__/") ||
564
+ normalizedPath.includes("/test/") ||
565
+ normalizedPath.includes("/tests/") ||
566
+ normalizedPath.includes("/__tests__/") ||
567
+ normalizedPath.endsWith(".test.js") ||
568
+ normalizedPath.endsWith(".test.ts") ||
569
+ normalizedPath.endsWith(".spec.js") ||
570
+ normalizedPath.endsWith(".spec.ts");
571
+ const isExamplePath = normalizedPath.startsWith("example/") ||
572
+ normalizedPath.startsWith("examples/") ||
573
+ normalizedPath.includes("/example/") ||
574
+ normalizedPath.includes("/examples/");
575
+ const isLibPath = normalizedPath === "lib" || normalizedPath.startsWith("lib/") || normalizedPath.includes("/lib/");
576
+ const isChangelogLikeDoc = /(?:^|\/)(?:history|changelog|changes|readme)\.md$/.test(normalizedPath);
577
+ if (isSourcePath) {
578
+ bias += pathBias.source_path_boost;
579
+ }
580
+ if (codeIntent && isLibPath) {
581
+ bias += pathBias.source_path_boost;
582
+ }
583
+ if (LOW_PRIORITY_PATH_PREFIXES.some((prefix) => normalizedPath.startsWith(prefix))) {
584
+ bias -= pathBias.low_priority_prefix_penalty;
585
+ }
586
+ if (LOW_PRIORITY_PATH_SUBSTRINGS.some((pattern) => normalizedPath.includes(pattern))) {
587
+ bias -= pathBias.low_priority_substring_penalty;
588
+ }
589
+ if (LOW_PRIORITY_ASSET_EXTENSIONS.some((ext) => normalizedPath.endsWith(ext))) {
590
+ bias -= pathBias.low_priority_asset_penalty;
591
+ }
592
+ if (LOW_PRIORITY_LOCK_FILES.some((name) => normalizedPath.endsWith(name))) {
593
+ bias -= pathBias.lockfile_penalty;
594
+ }
595
+ if (docsPreferred && isDocsPath) {
596
+ bias += pathBias.doc_intent_docs_boost;
597
+ }
598
+ if (docsPreferred && isMarkdown) {
599
+ bias += pathBias.doc_intent_markdown_boost;
600
+ }
601
+ if (!docsPreferred && isDocsPath) {
602
+ bias -= pathBias.docs_without_doc_intent_penalty;
603
+ }
604
+ if (!docsPreferred && isMarkdown) {
605
+ bias -= pathBias.non_doc_markdown_penalty;
606
+ }
607
+ if (codeIntent && isDocsPath) {
608
+ bias -= pathBias.code_intent_docs_penalty;
609
+ }
610
+ if (codeIntent && isMarkdown) {
611
+ bias -= pathBias.non_doc_markdown_penalty;
612
+ }
613
+ if (codeIntent && isChangelogLikeDoc) {
614
+ bias -= pathBias.docs_without_doc_intent_penalty;
615
+ }
616
+ if (docsPreferred && isSourcePath) {
617
+ bias -= pathBias.doc_intent_source_penalty;
618
+ }
619
+ if (workspaceManifestIntent && normalizedPath === "cargo.toml") {
620
+ bias += pathBias.workspace_manifest_root_boost;
621
+ }
622
+ if (workspaceManifestIntent && normalizedPath.endsWith("/cargo.toml")) {
623
+ bias += pathBias.workspace_manifest_nested_boost;
624
+ }
625
+ if (uiComponentIntent && (normalizedPath.endsWith(".tsx") || normalizedPath.endsWith(".jsx"))) {
626
+ bias += pathBias.ui_component_tsx_boost;
627
+ }
628
+ if (uiComponentIntent && normalizedPath.endsWith(".css")) {
629
+ bias -= pathBias.ui_component_css_penalty;
630
+ }
631
+ if (normalizedPath.startsWith("docs/_archive/")) {
632
+ bias -= pathBias.docs_archive_penalty;
633
+ }
634
+ if (normalizedPath.includes("/public/") && !queryTokens.some((token) => token.includes("public"))) {
635
+ bias -= pathBias.public_path_penalty;
636
+ }
637
+ if (!testIntent && isTestPath) {
638
+ bias -= pathBias.test_path_penalty;
639
+ }
640
+ if (testIntent && isTestPath) {
641
+ bias += pathBias.test_intent_test_boost;
642
+ }
643
+ if (!exampleIntent && isExamplePath) {
644
+ bias -= pathBias.example_path_penalty;
645
+ }
646
+ if (exampleIntent && isExamplePath) {
647
+ bias += pathBias.example_intent_example_boost;
648
+ }
649
+ if (codeIntent && normalizedPath.endsWith(".d.ts")) {
650
+ bias -= pathBias.declaration_file_penalty;
651
+ }
652
+ if (negativePreferences.avoid_docs && (isDocsPath || isMarkdown)) {
653
+ bias -= pathBias.negation_avoid_docs_penalty;
654
+ }
655
+ if (negativePreferences.avoid_tests && isTestPath) {
656
+ bias -= pathBias.negation_avoid_tests_penalty;
657
+ }
658
+ if (negativePreferences.avoid_examples && isExamplePath) {
659
+ bias -= pathBias.negation_avoid_examples_penalty;
660
+ }
661
+ if (negativePreferences.avoid_archive && normalizedPath.includes("archive")) {
662
+ bias -= pathBias.negation_avoid_archive_penalty;
663
+ }
664
+ const filename = normalizedPath.split("/").pop() ?? normalizedPath;
665
+ const filenameStem = filename.replace(/\.[a-z0-9]+$/, "");
666
+ for (const token of queryTokens) {
667
+ if (token === filename || token === filenameStem) {
668
+ bias += pathBias.filename_token_match_boost;
669
+ }
670
+ }
671
+ return Math.max(pathBias.min_total_bias, Math.min(pathBias.max_total_bias, bias));
672
+ }
673
+ function buildChunks(file, chunkingConfig) {
674
+ const normalizedPath = normalizePath(file.path);
675
+ const chunkingResult = buildChunksForFile({
676
+ file: {
677
+ path: normalizedPath,
678
+ content: file.content,
679
+ language: file.language
680
+ },
681
+ config: {
682
+ strategy: chunkingConfig.strategy,
683
+ fallback_strategy: chunkingConfig.fallback_strategy,
684
+ target_chunk_tokens: TARGET_CHUNK_TOKENS,
685
+ chunk_overlap_tokens: CHUNK_OVERLAP_TOKENS,
686
+ max_chunks_per_file: MAX_CHUNKS_PER_FILE,
687
+ parse_timeout_ms: chunkingConfig.parse_timeout_ms,
688
+ enabled_languages: chunkingConfig.enabled_languages
689
+ },
690
+ tokenize
691
+ });
692
+ return {
693
+ chunks: chunkingResult.chunks.map((chunk) => ({
694
+ path: normalizedPath,
695
+ start_line: chunk.start_line,
696
+ end_line: chunk.end_line,
697
+ snippet: chunk.snippet,
698
+ language: file.language,
699
+ generated: file.generated,
700
+ updated_at: file.updated_at ?? new Date().toISOString(),
701
+ hash: sha256(`${normalizedPath}:${chunk.start_line}:${chunk.end_line}:${chunk.snippet}`)
702
+ })),
703
+ strategy: chunkingResult.strategy,
704
+ fallback_reason: chunkingResult.fallback_reason,
705
+ parse_latency_ms: chunkingResult.parse_latency_ms,
706
+ language_aware_attempt_latency_ms: chunkingResult.language_aware_attempt_latency_ms,
707
+ fallback_path_latency_ms: chunkingResult.fallback_path_latency_ms,
708
+ language: chunkingResult.language
709
+ };
710
+ }
711
+ function pseudoEmbedding(input, dimensions = 24) {
712
+ const safeDimensions = Math.max(1, dimensions);
713
+ let source = sha256(input);
714
+ let salt = 1;
715
+ while (source.length < safeDimensions * 2) {
716
+ source += sha256(`${input}:${salt}`);
717
+ salt += 1;
718
+ }
719
+ const out = [];
720
+ for (let i = 0; i < safeDimensions; i += 1) {
721
+ const offset = i * 2;
722
+ const segment = source.slice(offset, offset + 2);
723
+ out.push(parseInt(segment, 16) / 255);
724
+ }
725
+ return out;
726
+ }
727
+ function isFiniteNumberArray(value) {
728
+ return Array.isArray(value) && value.every((cell) => typeof cell === "number" && Number.isFinite(cell));
729
+ }
730
+ function sleep(ms) {
731
+ return new Promise((resolve) => {
732
+ setTimeout(resolve, ms);
733
+ });
734
+ }
735
+ export class DeterministicEmbeddingProvider {
736
+ dimensions;
737
+ model;
738
+ version;
739
+ constructor(options = {}) {
740
+ this.dimensions = options.dimensions ?? 24;
741
+ this.model = options.model ?? "pseudo-sha256";
742
+ this.version = options.version ?? "v1";
743
+ }
744
+ async embed(input) {
745
+ return input.texts.map((text) => pseudoEmbedding(text, this.dimensions));
746
+ }
747
+ describe() {
748
+ return {
749
+ provider: "deterministic",
750
+ model: this.model,
751
+ dimensions: this.dimensions,
752
+ version: this.version
753
+ };
754
+ }
755
+ }
756
+ export class OpenAICompatibleEmbeddingProvider {
757
+ baseUrl;
758
+ endpoint;
759
+ apiKey;
760
+ model;
761
+ dimensions;
762
+ timeoutMs;
763
+ batchSize;
764
+ maxRetries;
765
+ observability;
766
+ constructor(options) {
767
+ const baseUrl = options.base_url.trim().replace(/\/+$/, "");
768
+ if (baseUrl.length === 0) {
769
+ throw new Error("invalid openai-compatible embedding config: base_url must be non-empty");
770
+ }
771
+ const apiKey = options.api_key.trim();
772
+ if (apiKey.length === 0) {
773
+ throw new Error("invalid openai-compatible embedding config: api_key must be non-empty");
774
+ }
775
+ this.baseUrl = baseUrl;
776
+ this.endpoint = `${this.baseUrl}/embeddings`;
777
+ this.apiKey = apiKey;
778
+ this.model = options.model?.trim() || DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MODEL;
779
+ this.dimensions = options.dimensions ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_DIMENSIONS;
780
+ this.timeoutMs = options.timeout_ms ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_TIMEOUT_MS;
781
+ this.batchSize = options.batch_size ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_BATCH_SIZE;
782
+ this.maxRetries = options.max_retries ?? DEFAULT_OPENAI_COMPATIBLE_EMBEDDING_MAX_RETRIES;
783
+ this.observability = options.observability ?? getObservability("retrieval-core");
784
+ if (!Number.isInteger(this.dimensions) || this.dimensions <= 0) {
785
+ throw new Error("invalid openai-compatible embedding config: dimensions must be a positive integer");
786
+ }
787
+ if (!Number.isInteger(this.timeoutMs) || this.timeoutMs <= 0) {
788
+ throw new Error("invalid openai-compatible embedding config: timeout_ms must be a positive integer");
789
+ }
790
+ if (!Number.isInteger(this.batchSize) || this.batchSize <= 0) {
791
+ throw new Error("invalid openai-compatible embedding config: batch_size must be a positive integer");
792
+ }
793
+ if (!Number.isInteger(this.maxRetries) || this.maxRetries < 0) {
794
+ throw new Error("invalid openai-compatible embedding config: max_retries must be a non-negative integer");
795
+ }
796
+ }
797
+ describe() {
798
+ return {
799
+ provider: "openai_compatible",
800
+ model: this.model,
801
+ dimensions: this.dimensions
802
+ };
803
+ }
804
+ async embed(input) {
805
+ if (input.texts.length === 0) {
806
+ return [];
807
+ }
808
+ const output = [];
809
+ for (let offset = 0; offset < input.texts.length; offset += this.batchSize) {
810
+ const batch = input.texts.slice(offset, offset + this.batchSize);
811
+ const vectors = await this.embedBatchWithRetries(batch, input.purpose);
812
+ output.push(...vectors);
813
+ }
814
+ return output;
815
+ }
816
+ async embedBatchWithRetries(texts, purpose) {
817
+ const labels = {
818
+ provider: "openai_compatible",
819
+ model: this.model,
820
+ purpose
821
+ };
822
+ for (let attempt = 0; attempt <= this.maxRetries; attempt += 1) {
823
+ const startedAt = Date.now();
824
+ this.observability.metrics.increment("retrieval_embedding_provider_requests_total", 1, labels);
825
+ try {
826
+ return await this.embedBatchOnce(texts);
827
+ }
828
+ catch (error) {
829
+ const failure = this.toProviderFailure(error);
830
+ this.observability.metrics.increment("retrieval_embedding_provider_failures_total", 1, {
831
+ ...labels,
832
+ reason: failure.reason
833
+ });
834
+ const shouldRetry = failure.retryable && attempt < this.maxRetries;
835
+ this.observability.logger.warn("embedding provider request failed", {
836
+ provider: "openai_compatible",
837
+ model: this.model,
838
+ purpose,
839
+ reason: failure.reason,
840
+ retryable: failure.retryable,
841
+ retrying: shouldRetry,
842
+ attempt: attempt + 1,
843
+ max_attempts: this.maxRetries + 1
844
+ });
845
+ if (shouldRetry) {
846
+ await sleep(this.retryDelayMs(attempt));
847
+ continue;
848
+ }
849
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider request failed (${failure.reason}); ${failure.message}`);
850
+ }
851
+ finally {
852
+ this.observability.metrics.observe("retrieval_embedding_provider_latency_ms", Date.now() - startedAt, labels);
853
+ }
854
+ }
855
+ throw new RetrievalError("UPSTREAM_FAILURE", "embedding provider retries exhausted");
856
+ }
857
+ async embedBatchOnce(texts) {
858
+ const controller = new AbortController();
859
+ const timeoutId = setTimeout(() => {
860
+ controller.abort();
861
+ }, this.timeoutMs);
862
+ let response;
863
+ try {
864
+ response = await fetch(this.endpoint, {
865
+ method: "POST",
866
+ headers: {
867
+ authorization: `Bearer ${this.apiKey}`,
868
+ "content-type": "application/json"
869
+ },
870
+ body: JSON.stringify({
871
+ model: this.model,
872
+ input: texts,
873
+ encoding_format: "float"
874
+ }),
875
+ signal: controller.signal
876
+ });
877
+ }
878
+ catch (error) {
879
+ if (error && typeof error === "object" && "name" in error && error.name === "AbortError") {
880
+ throw new EmbeddingProviderRequestError("timeout", true, `request timed out after ${this.timeoutMs}ms`);
881
+ }
882
+ throw new EmbeddingProviderRequestError("network_error", true, error instanceof Error ? error.message : String(error));
883
+ }
884
+ finally {
885
+ clearTimeout(timeoutId);
886
+ }
887
+ if (!response.ok) {
888
+ const details = await safeResponseText(response);
889
+ if (response.status === 429) {
890
+ throw new EmbeddingProviderRequestError("rate_limited", true, `HTTP 429 ${details}`.trim());
891
+ }
892
+ if (response.status >= 500) {
893
+ throw new EmbeddingProviderRequestError("http_5xx", true, `HTTP ${response.status} ${details}`.trim());
894
+ }
895
+ if (response.status === 401 || response.status === 403) {
896
+ throw new EmbeddingProviderRequestError("auth_error", false, `HTTP ${response.status} ${details}`.trim());
897
+ }
898
+ if (response.status === 404) {
899
+ throw new EmbeddingProviderRequestError("endpoint_not_found", false, `HTTP 404 ${details}`.trim());
900
+ }
901
+ throw new EmbeddingProviderRequestError("http_4xx", false, `HTTP ${response.status} ${details}`.trim());
902
+ }
903
+ let payload;
904
+ try {
905
+ payload = await response.json();
906
+ }
907
+ catch {
908
+ throw new EmbeddingProviderRequestError("invalid_json", false, "provider returned non-JSON response");
909
+ }
910
+ if (!payload || typeof payload !== "object" || !("data" in payload)) {
911
+ throw new EmbeddingProviderRequestError("invalid_response", false, "provider response missing data field");
912
+ }
913
+ const rawData = payload.data;
914
+ if (!Array.isArray(rawData)) {
915
+ throw new EmbeddingProviderRequestError("invalid_response", false, "provider response data must be an array");
916
+ }
917
+ if (rawData.length !== texts.length) {
918
+ throw new EmbeddingProviderRequestError("count_mismatch", false, `provider returned ${rawData.length} embeddings for ${texts.length} inputs`);
919
+ }
920
+ const hasIndexField = rawData.every((entry) => entry && typeof entry === "object" && Number.isInteger(entry.index));
921
+ const ordered = hasIndexField
922
+ ? [...rawData].sort((a, b) => {
923
+ const left = a.index;
924
+ const right = b.index;
925
+ return left - right;
926
+ })
927
+ : rawData;
928
+ const vectors = [];
929
+ for (let i = 0; i < ordered.length; i += 1) {
930
+ const row = ordered[i];
931
+ if (!row || typeof row !== "object") {
932
+ throw new EmbeddingProviderRequestError("invalid_response", false, "embedding row must be an object");
933
+ }
934
+ if (hasIndexField && row.index !== i) {
935
+ throw new EmbeddingProviderRequestError("invalid_response", false, "embedding row indexes are not contiguous");
936
+ }
937
+ const embedding = row.embedding;
938
+ if (!isFiniteNumberArray(embedding)) {
939
+ throw new EmbeddingProviderRequestError("invalid_vector", false, "embedding must be a numeric array");
940
+ }
941
+ if (embedding.length !== this.dimensions) {
942
+ throw new EmbeddingProviderRequestError("dimension_mismatch", false, `expected ${this.dimensions} dimensions, received ${embedding.length}`);
943
+ }
944
+ vectors.push([...embedding]);
945
+ }
946
+ return vectors;
947
+ }
948
+ retryDelayMs(attempt) {
949
+ const base = 100 * (attempt + 1);
950
+ const jitter = Math.floor(Math.random() * 75);
951
+ return base + jitter;
952
+ }
953
+ toProviderFailure(error) {
954
+ if (error instanceof EmbeddingProviderRequestError) {
955
+ return error;
956
+ }
957
+ if (error instanceof RetrievalError) {
958
+ return new EmbeddingProviderRequestError("upstream_failure", false, error.message);
959
+ }
960
+ if (error instanceof Error) {
961
+ return new EmbeddingProviderRequestError("unknown_error", false, error.message);
962
+ }
963
+ return new EmbeddingProviderRequestError("unknown_error", false, String(error));
964
+ }
965
+ }
966
+ async function safeResponseText(response) {
967
+ try {
968
+ const text = await response.text();
969
+ return text.slice(0, 512);
970
+ }
971
+ catch {
972
+ return "";
973
+ }
974
+ }
975
+ function resolveEmbeddingDescriptor(provider) {
976
+ const described = provider.describe?.();
977
+ if (!described) {
978
+ return {
979
+ provider: "custom",
980
+ dimensions: 24
981
+ };
982
+ }
983
+ return {
984
+ provider: described.provider,
985
+ ...(described.model ? { model: described.model } : {}),
986
+ dimensions: described.dimensions,
987
+ ...(described.version ? { version: described.version } : {})
988
+ };
989
+ }
990
+ function normalizeEmbeddingDescriptor(descriptor) {
991
+ const provider = descriptor.provider.trim();
992
+ if (provider.length === 0) {
993
+ throw new Error("invalid embedding descriptor: provider must be non-empty");
994
+ }
995
+ if (!Number.isInteger(descriptor.dimensions) || descriptor.dimensions <= 0) {
996
+ throw new Error("invalid embedding descriptor: dimensions must be a positive integer");
997
+ }
998
+ return {
999
+ provider: provider.toLowerCase(),
1000
+ ...(descriptor.model ? { model: descriptor.model.trim() } : {}),
1001
+ dimensions: descriptor.dimensions,
1002
+ ...(descriptor.version ? { version: descriptor.version.trim() } : {})
1003
+ };
1004
+ }
1005
+ function classifyIntent(prompt) {
1006
+ const p = prompt.toLowerCase();
1007
+ if (/fix|bug|error|crash|regression/.test(p)) {
1008
+ return "bugfix";
1009
+ }
1010
+ if (/add|implement|new|support/.test(p)) {
1011
+ return "feature";
1012
+ }
1013
+ if (/refactor|cleanup|restructure/.test(p)) {
1014
+ return "refactor";
1015
+ }
1016
+ if (/readme|docs|documentation/.test(p)) {
1017
+ return "docs";
1018
+ }
1019
+ if (/test|coverage|assert/.test(p)) {
1020
+ return "tests";
1021
+ }
1022
+ return "unknown";
1023
+ }
1024
+ function detectDominantLanguage(prompt, history) {
1025
+ const latestUser = [...history].reverse().find((m) => m.role === "user")?.content ?? prompt;
1026
+ const sample = `${prompt}\n${latestUser}`.toLowerCase();
1027
+ if (/[\u3400-\u9fff]/.test(sample)) {
1028
+ return "zh";
1029
+ }
1030
+ if (/[áéíóúñ¿¡]/.test(sample) || /\b(implementar|arreglar|prueba|archivo|código)\b/.test(sample)) {
1031
+ return "es";
1032
+ }
1033
+ return "en";
1034
+ }
1035
+ const ENHANCER_FRAMEWORK_HINT_RULES = [
1036
+ {
1037
+ id: "flask",
1038
+ token_triggers: ["flask", "jinja", "template", "templates", "render_template", "render", "templating"],
1039
+ phrase_triggers: ["render template", "render_template", "jinja loader", "before render template"],
1040
+ symbol_hints: [
1041
+ "render_template",
1042
+ "render_template_string",
1043
+ "before_render_template",
1044
+ "DispatchingJinjaLoader",
1045
+ "register_error_handler"
1046
+ ],
1047
+ path_hints: [
1048
+ "src/flask/templating.py",
1049
+ "src/flask/views.py",
1050
+ "src/flask/signals.py",
1051
+ "src/flask/sansio/scaffold.py",
1052
+ "src/flask/sansio/app.py"
1053
+ ]
1054
+ },
1055
+ {
1056
+ id: "fastify",
1057
+ token_triggers: [
1058
+ "fastify",
1059
+ "hook",
1060
+ "hooks",
1061
+ "addhook",
1062
+ "seterrorhandler",
1063
+ "onrequest",
1064
+ "onerror",
1065
+ "plugin"
1066
+ ],
1067
+ phrase_triggers: ["error handler", "seterrorhandler", "addhook", "request lifecycle"],
1068
+ symbol_hints: ["addHook", "setErrorHandler", "onRequest", "onError", "preHandler", "setNotFoundHandler"],
1069
+ path_hints: ["fastify.js", "lib/hooks.js", "lib/error-handler.js", "lib/route.js", "lib/handle-request.js"]
1070
+ },
1071
+ {
1072
+ id: "node_web_runtime",
1073
+ token_triggers: [
1074
+ "express",
1075
+ "koa",
1076
+ "middleware",
1077
+ "router",
1078
+ "route",
1079
+ "handler",
1080
+ "request",
1081
+ "response",
1082
+ "pipeline"
1083
+ ],
1084
+ phrase_triggers: ["request pipeline", "error handler", "middleware chain", "route handler"],
1085
+ symbol_hints: [
1086
+ "middleware",
1087
+ "router",
1088
+ "handleRequest",
1089
+ "setErrorHandler",
1090
+ "onRequest",
1091
+ "response.send",
1092
+ "onerror",
1093
+ "app.onerror"
1094
+ ],
1095
+ path_hints: ["lib/application.js", "lib/request.js", "lib/response.js", "lib/context.js", "lib/router", "src/server.ts"]
1096
+ },
1097
+ {
1098
+ id: "http_client_runtime",
1099
+ token_triggers: ["axios", "request", "adapter", "dispatch", "url", "fetch", "http", "client"],
1100
+ phrase_triggers: ["http adapter", "request dispatch", "url builder", "request pipeline"],
1101
+ symbol_hints: ["dispatchRequest", "mergeConfig", "buildURL", "combineURLs", "adapter"],
1102
+ path_hints: ["lib/core", "lib/helpers", "lib/adapters", "src/client", "src/http"]
1103
+ }
1104
+ ];
1105
+ const SYMBOL_STOPWORDS = new Set([
1106
+ "about",
1107
+ "after",
1108
+ "before",
1109
+ "behavior",
1110
+ "change",
1111
+ "compatibility",
1112
+ "enhance",
1113
+ "flow",
1114
+ "keep",
1115
+ "logic",
1116
+ "module",
1117
+ "preserve",
1118
+ "refactor",
1119
+ "request",
1120
+ "response",
1121
+ "should",
1122
+ "stable",
1123
+ "support",
1124
+ "while"
1125
+ ]);
1126
+ const ENHANCER_IMPLEMENTATION_INTENT_HINTS = [
1127
+ "implementation",
1128
+ "source",
1129
+ "runtime",
1130
+ "module",
1131
+ "handler",
1132
+ "function",
1133
+ "class",
1134
+ "codepath",
1135
+ "src",
1136
+ "lib"
1137
+ ];
1138
+ const ENHANCER_DOCS_INTENT_TOKENS = new Set([
1139
+ "adr",
1140
+ "architecture",
1141
+ "doc",
1142
+ "docs",
1143
+ "documentation",
1144
+ "readme",
1145
+ "spec",
1146
+ "specification"
1147
+ ]);
1148
+ const ENHANCER_TEST_INTENT_TOKENS = new Set(["assert", "coverage", "integration", "spec", "test", "tests", "unit"]);
1149
+ const ENHANCER_CONCEPTUAL_INTENT_TOKENS = new Set([
1150
+ "architecture",
1151
+ "concept",
1152
+ "design",
1153
+ "explain",
1154
+ "guidance",
1155
+ "how",
1156
+ "overview",
1157
+ "reason",
1158
+ "tradeoff",
1159
+ "why"
1160
+ ]);
1161
+ function classifyEnhancerQueryIntent(prompt, history) {
1162
+ const recentMessages = [...history]
1163
+ .reverse()
1164
+ .filter((message) => message.role !== "system")
1165
+ .map((message) => message.content.trim())
1166
+ .filter((content) => content.length > 0 && content !== prompt.trim())
1167
+ .slice(0, 4)
1168
+ .reverse();
1169
+ const combined = normalizeQuery([prompt, ...recentMessages].join("\n").trim());
1170
+ const tokens = tokenize(combined);
1171
+ const extractedSymbols = extractLikelyCodeSymbols(combined, 20);
1172
+ const extractedPaths = extractPathLikeSymbols(combined);
1173
+ const quotedSymbols = (combined.match(/`[^`]+`/g) ?? []).length;
1174
+ const symbolSignal = extractedSymbols.length + extractedPaths.length * 2 + quotedSymbols * 2;
1175
+ if (symbolSignal >= 8 || (extractedSymbols.length >= 4 && (extractedPaths.length > 0 || quotedSymbols > 0))) {
1176
+ return "symbol-heavy";
1177
+ }
1178
+ let implSignal = 0;
1179
+ let conceptualSignal = 0;
1180
+ for (const token of tokens) {
1181
+ if (CODE_INTENT_TOKENS.has(token) || ENHANCER_IMPLEMENTATION_INTENT_HINTS.includes(token)) {
1182
+ implSignal += 1;
1183
+ }
1184
+ if (ENHANCER_DOCS_INTENT_TOKENS.has(token) || ENHANCER_CONCEPTUAL_INTENT_TOKENS.has(token)) {
1185
+ conceptualSignal += 1;
1186
+ }
1187
+ }
1188
+ const negativePreferences = detectNegativePathPreferences(combined);
1189
+ if (negativePreferences.avoid_docs || negativePreferences.avoid_tests || negativePreferences.avoid_examples) {
1190
+ implSignal += 2;
1191
+ }
1192
+ if (implSignal >= conceptualSignal + 1) {
1193
+ return "impl-focused";
1194
+ }
1195
+ return "conceptual";
1196
+ }
1197
+ function resolveEnhancerIntentPolicy(input) {
1198
+ const strictImplOnlyFiltering = input.query_intent !== "conceptual" &&
1199
+ (input.negative_preferences.avoid_docs ||
1200
+ input.negative_preferences.avoid_tests ||
1201
+ input.negative_preferences.avoid_examples);
1202
+ if (input.query_intent === "symbol-heavy") {
1203
+ return {
1204
+ max_expansion_hints: Math.min(input.enhancer_config.max_expansion_hints, 12),
1205
+ max_candidates_pre_rerank: Math.max(1, Math.min(input.enhancer_config.max_candidates_pre_rerank, 3)),
1206
+ max_candidates_per_directory_pre_rerank: 1,
1207
+ strict_impl_only_filtering: strictImplOnlyFiltering
1208
+ };
1209
+ }
1210
+ if (input.query_intent === "impl-focused") {
1211
+ return {
1212
+ max_expansion_hints: Math.min(input.enhancer_config.max_expansion_hints, 18),
1213
+ max_candidates_pre_rerank: Math.max(1, Math.min(input.enhancer_config.max_candidates_pre_rerank, 4)),
1214
+ max_candidates_per_directory_pre_rerank: 2,
1215
+ strict_impl_only_filtering: strictImplOnlyFiltering
1216
+ };
1217
+ }
1218
+ return {
1219
+ max_expansion_hints: Math.min(input.enhancer_config.max_expansion_hints, 22),
1220
+ max_candidates_pre_rerank: Math.max(1, Math.min(input.enhancer_config.max_candidates_pre_rerank, 4)),
1221
+ max_candidates_per_directory_pre_rerank: 3,
1222
+ strict_impl_only_filtering: false
1223
+ };
1224
+ }
1225
+ function looksLikeCodeSymbol(value) {
1226
+ if (value.length < 3 || value.length > 96) {
1227
+ return false;
1228
+ }
1229
+ if (!/^[A-Za-z_][A-Za-z0-9_./-]*$/.test(value)) {
1230
+ return false;
1231
+ }
1232
+ const normalized = value.toLowerCase();
1233
+ if (SYMBOL_STOPWORDS.has(normalized)) {
1234
+ return false;
1235
+ }
1236
+ return (value.includes("_") ||
1237
+ value.includes(".") ||
1238
+ value.includes("/") ||
1239
+ /[a-z][A-Z]/.test(value) ||
1240
+ /[A-Z]{2,}/.test(value) ||
1241
+ /^(?:get|set|add|create|render|handle|parse|build|validate|register|process|update|remove|is|has)[A-Z_]/.test(value));
1242
+ }
1243
+ function extractPathLikeSymbols(text) {
1244
+ const out = [];
1245
+ for (const match of text.matchAll(/\b(?:[A-Za-z0-9_.-]+\/)+[A-Za-z0-9_.-]+\.[A-Za-z0-9]+\b/g)) {
1246
+ const value = (match[0] ?? "").trim();
1247
+ if (value) {
1248
+ out.push(value);
1249
+ }
1250
+ }
1251
+ return out;
1252
+ }
1253
+ function extractLikelyCodeSymbols(text, limit = 12) {
1254
+ const symbols = [];
1255
+ const seen = new Set();
1256
+ const add = (candidate) => {
1257
+ const normalized = candidate.trim().replace(/^[`"'([{]+|[`"')\]}:;,.]+$/g, "");
1258
+ if (!normalized || seen.has(normalized) || !looksLikeCodeSymbol(normalized)) {
1259
+ return;
1260
+ }
1261
+ seen.add(normalized);
1262
+ symbols.push(normalized);
1263
+ };
1264
+ for (const match of text.matchAll(/`([^`]+)`/g)) {
1265
+ const quoted = match[1] ?? "";
1266
+ for (const part of quoted.split(/[\s,;(){}[\]]+/g).filter(Boolean)) {
1267
+ add(part);
1268
+ if (symbols.length >= limit) {
1269
+ return symbols;
1270
+ }
1271
+ }
1272
+ }
1273
+ for (const value of extractPathLikeSymbols(text)) {
1274
+ add(value);
1275
+ if (symbols.length >= limit) {
1276
+ return symbols;
1277
+ }
1278
+ }
1279
+ for (const match of text.matchAll(/\b([A-Za-z_][A-Za-z0-9_]*)\s*\(/g)) {
1280
+ add(match[1] ?? "");
1281
+ if (symbols.length >= limit) {
1282
+ return symbols;
1283
+ }
1284
+ }
1285
+ for (const match of text.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*(?:[./-][A-Za-z_][A-Za-z0-9_]*)*\b/g)) {
1286
+ add(match[0] ?? "");
1287
+ if (symbols.length >= limit) {
1288
+ return symbols;
1289
+ }
1290
+ }
1291
+ return symbols;
1292
+ }
1293
+ function extractPathFormHints(text, limit = 12) {
1294
+ const words = (text.toLowerCase().match(/\b[a-z][a-z0-9_]{1,48}\b/g) ?? [])
1295
+ .map((token) => token.trim())
1296
+ .filter(Boolean)
1297
+ .filter((token) => !SYMBOL_STOPWORDS.has(token));
1298
+ const out = [];
1299
+ const seen = new Set();
1300
+ for (let i = 0; i < words.length - 1; i += 1) {
1301
+ const left = words[i];
1302
+ const right = words[i + 1];
1303
+ if (!left || !right || left.length < 3 || right.length < 3) {
1304
+ continue;
1305
+ }
1306
+ const forms = [
1307
+ `${left}/${right}`,
1308
+ `${singularizeToken(left) ?? left}/${singularizeToken(right) ?? right}`,
1309
+ `${pluralizeToken(left) ?? left}/${right}`
1310
+ ];
1311
+ for (const form of forms) {
1312
+ if (seen.has(form)) {
1313
+ continue;
1314
+ }
1315
+ seen.add(form);
1316
+ out.push(form);
1317
+ if (out.length >= limit) {
1318
+ return out;
1319
+ }
1320
+ }
1321
+ }
1322
+ return out;
1323
+ }
1324
+ function buildEnhancerRetrievalQuery(prompt, history, options) {
1325
+ const maxExpansionHints = options?.maxExpansionHints ?? DEFAULT_RETRIEVAL_ENHANCER_CONFIG.max_expansion_hints;
1326
+ const queryIntent = options?.queryIntent ?? classifyEnhancerQueryIntent(prompt, history);
1327
+ const recentMessages = [...history]
1328
+ .reverse()
1329
+ .filter((message) => message.role !== "system")
1330
+ .map((message) => message.content.trim())
1331
+ .filter((content) => content.length > 0 && content !== prompt.trim())
1332
+ .slice(0, 4)
1333
+ .reverse();
1334
+ const combined = normalizeQuery([prompt, ...recentMessages].join("\n").trim());
1335
+ const lower = combined.toLowerCase();
1336
+ const queryTokens = tokenize(combined);
1337
+ const queryTokenSet = new Set(queryTokens);
1338
+ const docsIntent = queryTokens.some((token) => ENHANCER_DOCS_INTENT_TOKENS.has(token));
1339
+ const negativePreferences = detectNegativePathPreferences(combined);
1340
+ const testsIntent = queryTokens.some((token) => ENHANCER_TEST_INTENT_TOKENS.has(token));
1341
+ const implementationIntent = queryIntent !== "conceptual";
1342
+ const hintSet = new Set();
1343
+ const hints = [];
1344
+ const addHint = (value) => {
1345
+ if (hints.length >= maxExpansionHints) {
1346
+ return;
1347
+ }
1348
+ const normalized = value.trim();
1349
+ if (!normalized || hintSet.has(normalized)) {
1350
+ return;
1351
+ }
1352
+ hintSet.add(normalized);
1353
+ hints.push(normalized);
1354
+ };
1355
+ const extractedSymbols = extractLikelyCodeSymbols(combined, 12);
1356
+ for (const symbol of extractedSymbols) {
1357
+ addHint(symbol);
1358
+ }
1359
+ const extractedPathForms = extractPathFormHints(combined, 12);
1360
+ for (const pathForm of extractedPathForms) {
1361
+ addHint(pathForm);
1362
+ }
1363
+ const symbolHeavyPrompt = queryIntent === "symbol-heavy" || extractedSymbols.length >= 4 || extractedPathForms.length >= 4;
1364
+ const genericHintBudget = queryIntent === "symbol-heavy"
1365
+ ? Math.max(2, Math.floor(maxExpansionHints / 3))
1366
+ : queryIntent === "impl-focused"
1367
+ ? Math.max(4, Math.floor(maxExpansionHints * 0.8))
1368
+ : maxExpansionHints;
1369
+ const frameworkHintBudget = queryIntent === "symbol-heavy"
1370
+ ? Math.max(2, Math.floor(genericHintBudget / 2))
1371
+ : queryIntent === "impl-focused"
1372
+ ? Math.max(3, Math.floor(genericHintBudget * 0.8))
1373
+ : Math.max(4, Math.floor(genericHintBudget * 0.85));
1374
+ let genericHintCount = 0;
1375
+ let frameworkHintCount = 0;
1376
+ const addGenericHint = (value) => {
1377
+ if (genericHintCount >= genericHintBudget) {
1378
+ return;
1379
+ }
1380
+ const beforeCount = hints.length;
1381
+ addHint(value);
1382
+ if (hints.length > beforeCount) {
1383
+ genericHintCount += 1;
1384
+ }
1385
+ };
1386
+ const addFrameworkHint = (value) => {
1387
+ if (frameworkHintCount >= frameworkHintBudget) {
1388
+ return;
1389
+ }
1390
+ const beforeCount = hints.length;
1391
+ addGenericHint(value);
1392
+ if (hints.length > beforeCount) {
1393
+ frameworkHintCount += 1;
1394
+ }
1395
+ };
1396
+ if (/\b(cli|subcommand|command|flag)\b/.test(lower)) {
1397
+ addGenericHint("main.rs");
1398
+ addGenericHint("clap");
1399
+ addGenericHint("subcommand");
1400
+ addGenericHint("command");
1401
+ }
1402
+ if (/\b(react|tsx|ui|component|layout|panel)\b/.test(lower)) {
1403
+ addGenericHint("app.tsx");
1404
+ addGenericHint("component");
1405
+ addGenericHint("ui");
1406
+ }
1407
+ if (queryIntent === "conceptual" || (!implementationIntent && /\b(adr|readme|spec|docs|documentation|milestone)\b/.test(lower))) {
1408
+ addGenericHint("docs");
1409
+ addGenericHint("spec");
1410
+ addGenericHint("markdown");
1411
+ }
1412
+ if (/\b(python|django|flask|jinja|pydantic)\b/.test(lower)) {
1413
+ addGenericHint("py");
1414
+ addGenericHint("src");
1415
+ addGenericHint("views.py");
1416
+ addGenericHint("handlers");
1417
+ }
1418
+ if (/\b(node|javascript|typescript|ts|js|express|koa|fastify)\b/.test(lower)) {
1419
+ addGenericHint("src");
1420
+ addGenericHint("lib");
1421
+ addGenericHint("handler");
1422
+ addGenericHint("middleware");
1423
+ }
1424
+ const prefersRuntimeOnly = negativePreferences.avoid_docs || negativePreferences.avoid_tests || negativePreferences.avoid_examples;
1425
+ if (implementationIntent && (!docsIntent || prefersRuntimeOnly)) {
1426
+ for (const hint of ENHANCER_IMPLEMENTATION_INTENT_HINTS) {
1427
+ addGenericHint(hint);
1428
+ }
1429
+ }
1430
+ if (prefersRuntimeOnly) {
1431
+ addGenericHint("runtime");
1432
+ addGenericHint("src");
1433
+ addGenericHint("lib");
1434
+ addGenericHint("implementation");
1435
+ }
1436
+ if (testsIntent) {
1437
+ addGenericHint("test");
1438
+ addGenericHint("assert");
1439
+ }
1440
+ for (const rule of ENHANCER_FRAMEWORK_HINT_RULES) {
1441
+ const hasTokenTrigger = rule.token_triggers.some((token) => queryTokenSet.has(token));
1442
+ const hasPhraseTrigger = rule.phrase_triggers.some((phrase) => lower.includes(phrase));
1443
+ if (!hasTokenTrigger && !hasPhraseTrigger) {
1444
+ continue;
1445
+ }
1446
+ for (const symbol of rule.symbol_hints) {
1447
+ addFrameworkHint(symbol);
1448
+ }
1449
+ for (const pathHint of rule.path_hints) {
1450
+ addFrameworkHint(pathHint);
1451
+ }
1452
+ }
1453
+ const sections = [combined];
1454
+ sections.push(`query_intent: ${queryIntent}`);
1455
+ if (extractedSymbols.length > 0) {
1456
+ sections.push(`symbol_candidates: ${extractedSymbols.join(" ")}`);
1457
+ }
1458
+ if (hints.length > 0) {
1459
+ sections.push(`retrieval_hints: ${hints.join(" ")}`);
1460
+ }
1461
+ return {
1462
+ query: sections.join("\n").trim(),
1463
+ expanded_hint_count: hints.length,
1464
+ query_intent: queryIntent
1465
+ };
1466
+ }
1467
+ const ENHANCER_LOW_CONFIDENCE_WARNING = "Low retrieval confidence; narrowed context refs and added clarification questions.";
1468
+ const ENHANCER_CONFIDENCE_OVERLAP_STOPWORDS = new Set([
1469
+ "a",
1470
+ "about",
1471
+ "an",
1472
+ "and",
1473
+ "avoid",
1474
+ "behavior",
1475
+ "change",
1476
+ "doc",
1477
+ "docs",
1478
+ "documentation",
1479
+ "example",
1480
+ "examples",
1481
+ "for",
1482
+ "from",
1483
+ "in",
1484
+ "is",
1485
+ "not",
1486
+ "of",
1487
+ "on",
1488
+ "or",
1489
+ "readme",
1490
+ "spec",
1491
+ "tests",
1492
+ "test",
1493
+ "the",
1494
+ "to",
1495
+ "without",
1496
+ "with"
1497
+ ]);
1498
+ function parentDirectory(path) {
1499
+ const normalized = normalizePath(path);
1500
+ const idx = normalized.lastIndexOf("/");
1501
+ if (idx <= 0) {
1502
+ return ".";
1503
+ }
1504
+ return normalized.slice(0, idx);
1505
+ }
1506
+ function fileExtension(path) {
1507
+ const normalized = normalizePath(path).toLowerCase();
1508
+ const filename = normalized.split("/").pop() ?? normalized;
1509
+ const idx = filename.lastIndexOf(".");
1510
+ if (idx < 0) {
1511
+ return "";
1512
+ }
1513
+ return filename.slice(idx);
1514
+ }
1515
+ function average(values) {
1516
+ if (values.length === 0) {
1517
+ return 0;
1518
+ }
1519
+ return values.reduce((sum, value) => sum + value, 0) / values.length;
1520
+ }
1521
+ function percentile(values, p) {
1522
+ if (values.length === 0) {
1523
+ return 0;
1524
+ }
1525
+ const sorted = [...values].sort((a, b) => a - b);
1526
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * p) - 1));
1527
+ return sorted[idx] ?? 0;
1528
+ }
1529
+ function clamp01(value) {
1530
+ return Math.max(0, Math.min(1, value));
1531
+ }
1532
+ function normalizeEnhancerOverlapTokens(text) {
1533
+ return tokenize(text).filter((token) => token.length >= 3 && !ENHANCER_CONFIDENCE_OVERLAP_STOPWORDS.has(token));
1534
+ }
1535
+ function overlapRatio(queryTokens, result) {
1536
+ if (queryTokens.length === 0) {
1537
+ return 0;
1538
+ }
1539
+ const haystack = new Set(tokenize(`${result.path}\n${result.reason}\n${result.snippet}`));
1540
+ let overlap = 0;
1541
+ for (const token of queryTokens) {
1542
+ if (haystack.has(token)) {
1543
+ overlap += 1;
1544
+ }
1545
+ }
1546
+ return overlap / queryTokens.length;
1547
+ }
1548
+ function pathDiversity(results) {
1549
+ if (results.length === 0) {
1550
+ return 0;
1551
+ }
1552
+ const uniqueDirs = new Set(results.map((result) => parentDirectory(result.path).toLowerCase()));
1553
+ return uniqueDirs.size / results.length;
1554
+ }
1555
+ function isDocsLikePath(path) {
1556
+ const normalized = normalizePath(path).toLowerCase();
1557
+ return (normalized.startsWith("docs/") ||
1558
+ normalized.includes("/docs/") ||
1559
+ normalized.endsWith(".md") ||
1560
+ normalized.endsWith(".rst") ||
1561
+ normalized.endsWith(".txt"));
1562
+ }
1563
+ function isTestLikePath(path) {
1564
+ const normalized = normalizePath(path).toLowerCase();
1565
+ return (normalized.startsWith("test/") ||
1566
+ normalized.startsWith("tests/") ||
1567
+ normalized.startsWith("__tests__/") ||
1568
+ normalized.includes("/test/") ||
1569
+ normalized.includes("/tests/") ||
1570
+ normalized.includes("/__tests__/") ||
1571
+ normalized.endsWith(".test.js") ||
1572
+ normalized.endsWith(".test.ts") ||
1573
+ normalized.endsWith(".spec.js") ||
1574
+ normalized.endsWith(".spec.ts") ||
1575
+ normalized.endsWith(".spec.py"));
1576
+ }
1577
+ function isExampleLikePath(path) {
1578
+ const normalized = normalizePath(path).toLowerCase();
1579
+ return (normalized.startsWith("example/") ||
1580
+ normalized.startsWith("examples/") ||
1581
+ normalized.includes("/example/") ||
1582
+ normalized.includes("/examples/"));
1583
+ }
1584
+ function isArchiveLikePath(path) {
1585
+ const normalized = normalizePath(path).toLowerCase();
1586
+ return (normalized.includes("/archive/") ||
1587
+ normalized.includes("/_archive/") ||
1588
+ normalized.startsWith("archive/") ||
1589
+ normalized.startsWith("_archive/"));
1590
+ }
1591
+ function shouldAvoidPathFromNegation(path, preferences) {
1592
+ if (preferences.avoid_archive && isArchiveLikePath(path)) {
1593
+ return true;
1594
+ }
1595
+ if (preferences.avoid_docs && isDocsLikePath(path)) {
1596
+ return true;
1597
+ }
1598
+ if (preferences.avoid_tests && isTestLikePath(path)) {
1599
+ return true;
1600
+ }
1601
+ if (preferences.avoid_examples && isExampleLikePath(path)) {
1602
+ return true;
1603
+ }
1604
+ return false;
1605
+ }
1606
+ function isRiskyEnhancerPath(path, intent) {
1607
+ if (intent === "docs") {
1608
+ return false;
1609
+ }
1610
+ if (intent === "tests") {
1611
+ return isDocsLikePath(path) || isExampleLikePath(path);
1612
+ }
1613
+ return isDocsLikePath(path) || isTestLikePath(path) || isExampleLikePath(path);
1614
+ }
1615
+ function applyEnhancerIntentPathFiltering(results, input) {
1616
+ if (results.length === 0) {
1617
+ return [];
1618
+ }
1619
+ const preferred = results.filter((result) => !isRiskyEnhancerPath(result.path, input.intent) && !shouldAvoidPathFromNegation(result.path, input.negative_preferences));
1620
+ if (preferred.length > 0) {
1621
+ return preferred;
1622
+ }
1623
+ if (input.strict_impl_only_filtering) {
1624
+ const implOnly = results.filter((result) => !isDocsLikePath(result.path) &&
1625
+ !isTestLikePath(result.path) &&
1626
+ !isExampleLikePath(result.path) &&
1627
+ !isArchiveLikePath(result.path));
1628
+ if (implOnly.length > 0) {
1629
+ return implOnly;
1630
+ }
1631
+ }
1632
+ const tolerated = results.filter((result) => !shouldAvoidPathFromNegation(result.path, input.negative_preferences));
1633
+ return tolerated.length > 0 ? tolerated : results;
1634
+ }
1635
+ function compareSearchResults(a, b) {
1636
+ const scoreDiff = b.score - a.score;
1637
+ if (Math.abs(scoreDiff) > 1e-9) {
1638
+ return scoreDiff;
1639
+ }
1640
+ if (a.path !== b.path) {
1641
+ return a.path.localeCompare(b.path);
1642
+ }
1643
+ if (a.start_line !== b.start_line) {
1644
+ return a.start_line - b.start_line;
1645
+ }
1646
+ return a.end_line - b.end_line;
1647
+ }
1648
+ function dedupeEnhancerCandidatesByPath(results) {
1649
+ const byPath = new Map();
1650
+ for (const result of results) {
1651
+ const key = normalizePath(result.path).toLowerCase();
1652
+ const existing = byPath.get(key);
1653
+ if (!existing || compareSearchResults(result, existing) < 0) {
1654
+ byPath.set(key, result);
1655
+ }
1656
+ }
1657
+ return [...byPath.values()].sort(compareSearchResults);
1658
+ }
1659
+ function collapseEnhancerCandidatesByDirectory(results, maxPerDirectory) {
1660
+ if (maxPerDirectory <= 0) {
1661
+ return [];
1662
+ }
1663
+ const output = [];
1664
+ const directoryCounts = new Map();
1665
+ for (const result of [...results].sort(compareSearchResults)) {
1666
+ const directory = parentDirectory(result.path).toLowerCase();
1667
+ const count = directoryCounts.get(directory) ?? 0;
1668
+ if (count >= maxPerDirectory) {
1669
+ continue;
1670
+ }
1671
+ directoryCounts.set(directory, count + 1);
1672
+ output.push(result);
1673
+ }
1674
+ return output;
1675
+ }
1676
+ function extractEnhancerAnchors(text) {
1677
+ const anchors = new Set();
1678
+ for (const symbol of extractLikelyCodeSymbols(text, 10)) {
1679
+ const normalized = symbol.trim().toLowerCase();
1680
+ if (normalized.length >= 4) {
1681
+ anchors.add(normalized);
1682
+ }
1683
+ }
1684
+ for (const rawPath of extractPathLikeSymbols(text).slice(0, 10)) {
1685
+ const normalizedPath = normalizePath(rawPath).toLowerCase();
1686
+ if (normalizedPath.length >= 4) {
1687
+ anchors.add(normalizedPath);
1688
+ }
1689
+ const leaf = normalizedPath.split("/").pop();
1690
+ if (leaf && leaf.length >= 4) {
1691
+ anchors.add(leaf);
1692
+ }
1693
+ }
1694
+ return [...anchors];
1695
+ }
1696
+ function hasStrongEnhancerAnchorMatch(input) {
1697
+ const top = dedupeEnhancerCandidatesByPath(input.results).slice(0, 3);
1698
+ if (top.length === 0) {
1699
+ return false;
1700
+ }
1701
+ const topScore = top[0]?.score ?? 0;
1702
+ const runnerUpScore = top[1]?.score ?? Number.NEGATIVE_INFINITY;
1703
+ const strongScoreMargin = top.length === 1 || topScore - runnerUpScore >= 0.08;
1704
+ const hasTopExactSymbolMatch = top.some((result) => result.reason === "exact symbol match");
1705
+ if (hasTopExactSymbolMatch && strongScoreMargin && topScore >= 0.55) {
1706
+ return true;
1707
+ }
1708
+ const anchorSource = `${input.prompt}\n${input.history.map((entry) => entry.content).join("\n")}`;
1709
+ const anchors = extractEnhancerAnchors(anchorSource);
1710
+ if (anchors.length === 0) {
1711
+ return false;
1712
+ }
1713
+ let anchorMatches = 0;
1714
+ for (const result of top) {
1715
+ const normalizedPath = normalizePath(result.path).toLowerCase();
1716
+ const normalizedSnippet = result.snippet.toLowerCase();
1717
+ const matched = anchors.some((anchor) => normalizedPath.includes(anchor) || (anchor.length >= 5 && normalizedSnippet.includes(anchor)));
1718
+ if (matched) {
1719
+ anchorMatches += 1;
1720
+ }
1721
+ }
1722
+ return anchorMatches >= 2 && strongScoreMargin;
1723
+ }
1724
+ function shouldBypassEnhancerRerankForAnchors(input) {
1725
+ if (input.query_intent === "conceptual") {
1726
+ return false;
1727
+ }
1728
+ const hasStrongAnchors = hasStrongEnhancerAnchorMatch({
1729
+ prompt: input.prompt,
1730
+ history: input.history,
1731
+ results: input.results
1732
+ });
1733
+ if (!hasStrongAnchors) {
1734
+ return false;
1735
+ }
1736
+ if (input.query_intent === "symbol-heavy") {
1737
+ return true;
1738
+ }
1739
+ if (input.negative_preferences.avoid_docs ||
1740
+ input.negative_preferences.avoid_tests ||
1741
+ input.negative_preferences.avoid_examples ||
1742
+ input.negative_preferences.avoid_archive) {
1743
+ return true;
1744
+ }
1745
+ const top = dedupeEnhancerCandidatesByPath(input.results).slice(0, 2);
1746
+ if (top.length < 2) {
1747
+ return true;
1748
+ }
1749
+ const lead = (top[0]?.score ?? 0) - (top[1]?.score ?? 0);
1750
+ return lead >= 0.04;
1751
+ }
1752
+ function evaluateEnhancerConfidence(input) {
1753
+ const top = input.results.slice(0, 5);
1754
+ if (top.length === 0) {
1755
+ return {
1756
+ score_spread: 0,
1757
+ token_overlap: 0,
1758
+ path_diversity: 0,
1759
+ confidence_score: 0,
1760
+ confidence_threshold: 0.5,
1761
+ failed_signals: ["empty_results"],
1762
+ low_confidence: true
1763
+ };
1764
+ }
1765
+ const topScores = top.map((result) => result.score);
1766
+ const topScore = topScores[0] ?? 0;
1767
+ const runnerUpScore = topScores[1] ?? topScore;
1768
+ const scoreSpread = (topScores[0] ?? 0) - average(topScores);
1769
+ const scoreP25 = percentile(topScores, 0.25);
1770
+ const scoreP50 = percentile(topScores, 0.5);
1771
+ const scoreP75 = percentile(topScores, 0.75);
1772
+ const scoreIqr = Math.max(1e-6, scoreP75 - scoreP25);
1773
+ const leadStrength = clamp01((topScore - runnerUpScore) / (Math.abs(topScore - runnerUpScore) + scoreIqr));
1774
+ const spreadStrength = clamp01((topScore - scoreP50) / (Math.abs(topScore - scoreP50) + scoreIqr));
1775
+ const distributionStrength = average([leadStrength, spreadStrength]);
1776
+ const overlapTokens = normalizeEnhancerOverlapTokens(`${input.prompt}\n${input.retrieval_query}`);
1777
+ const topOverlap = average(top.slice(0, 3).map((result) => overlapRatio(overlapTokens, result)));
1778
+ const topPathDiversity = pathDiversity(top);
1779
+ const targetPathDiversity = input.query_intent === "conceptual" ? 0.6 : 0.45;
1780
+ const diversityStrength = clamp01(1 - Math.abs(topPathDiversity - targetPathDiversity) / Math.max(0.2, targetPathDiversity));
1781
+ const confidenceComponents = [distributionStrength, topOverlap, diversityStrength];
1782
+ const confidenceScore = average(confidenceComponents);
1783
+ const thresholdQuantile = input.query_intent === "symbol-heavy" ? 0.5 : input.query_intent === "impl-focused" ? 0.55 : 0.6;
1784
+ const confidenceThreshold = percentile(confidenceComponents, thresholdQuantile);
1785
+ const failedSignals = [];
1786
+ if (distributionStrength < confidenceThreshold) {
1787
+ failedSignals.push("score_spread");
1788
+ }
1789
+ if (topOverlap < confidenceThreshold) {
1790
+ failedSignals.push("token_overlap");
1791
+ }
1792
+ if (diversityStrength < confidenceThreshold) {
1793
+ failedSignals.push("path_diversity");
1794
+ }
1795
+ const strongSymbolOrPathSignal = top.some((result) => result.reason === "exact symbol match") && topOverlap >= 0.16;
1796
+ const lowConfidence = !strongSymbolOrPathSignal && confidenceScore + 0.01 < confidenceThreshold;
1797
+ return {
1798
+ score_spread: scoreSpread,
1799
+ token_overlap: topOverlap,
1800
+ path_diversity: topPathDiversity,
1801
+ confidence_score: confidenceScore,
1802
+ confidence_threshold: confidenceThreshold,
1803
+ failed_signals: failedSignals,
1804
+ low_confidence: lowConfidence
1805
+ };
1806
+ }
1807
+ function rankEnhancerResultsForConfidence(input) {
1808
+ const overlapTokens = normalizeEnhancerOverlapTokens(`${input.prompt}\n${input.retrieval_query}`);
1809
+ const anchors = extractEnhancerAnchors(`${input.prompt}\n${input.retrieval_query}`);
1810
+ const anchorScore = (result) => {
1811
+ const normalizedPath = normalizePath(result.path).toLowerCase();
1812
+ const normalizedSnippet = result.snippet.toLowerCase();
1813
+ let score = result.reason === "exact symbol match" ? 2 : 0;
1814
+ for (const anchor of anchors) {
1815
+ if (normalizedPath.includes(anchor)) {
1816
+ score += 2;
1817
+ continue;
1818
+ }
1819
+ if (anchor.length >= 5 && normalizedSnippet.includes(anchor)) {
1820
+ score += 1;
1821
+ }
1822
+ }
1823
+ return score;
1824
+ };
1825
+ return [...input.results].sort((a, b) => {
1826
+ const aExcludedByNegation = shouldAvoidPathFromNegation(a.path, input.negative_preferences) ? 1 : 0;
1827
+ const bExcludedByNegation = shouldAvoidPathFromNegation(b.path, input.negative_preferences) ? 1 : 0;
1828
+ if (aExcludedByNegation !== bExcludedByNegation) {
1829
+ return aExcludedByNegation - bExcludedByNegation;
1830
+ }
1831
+ const aRisk = isRiskyEnhancerPath(a.path, input.intent) ? 1 : 0;
1832
+ const bRisk = isRiskyEnhancerPath(b.path, input.intent) ? 1 : 0;
1833
+ if (aRisk !== bRisk) {
1834
+ return aRisk - bRisk;
1835
+ }
1836
+ if (input.query_intent !== "conceptual") {
1837
+ const aRuntimePenalty = isDocsLikePath(a.path) || isTestLikePath(a.path) || isExampleLikePath(a.path) || isArchiveLikePath(a.path) ? 1 : 0;
1838
+ const bRuntimePenalty = isDocsLikePath(b.path) || isTestLikePath(b.path) || isExampleLikePath(b.path) || isArchiveLikePath(b.path) ? 1 : 0;
1839
+ if (aRuntimePenalty !== bRuntimePenalty) {
1840
+ return aRuntimePenalty - bRuntimePenalty;
1841
+ }
1842
+ }
1843
+ const anchorDiff = anchorScore(b) - anchorScore(a);
1844
+ if (anchorDiff !== 0) {
1845
+ return anchorDiff;
1846
+ }
1847
+ const overlapDiff = overlapRatio(overlapTokens, b) - overlapRatio(overlapTokens, a);
1848
+ if (Math.abs(overlapDiff) > 1e-6) {
1849
+ return overlapDiff;
1850
+ }
1851
+ const scoreDiff = b.score - a.score;
1852
+ if (Math.abs(scoreDiff) > 1e-6) {
1853
+ return scoreDiff;
1854
+ }
1855
+ if (a.path !== b.path) {
1856
+ return a.path.localeCompare(b.path);
1857
+ }
1858
+ if (a.start_line !== b.start_line) {
1859
+ return a.start_line - b.start_line;
1860
+ }
1861
+ return a.end_line - b.end_line;
1862
+ });
1863
+ }
1864
+ async function runWithTimeout(input) {
1865
+ return await new Promise((resolve, reject) => {
1866
+ let settled = false;
1867
+ const timer = setTimeout(() => {
1868
+ if (settled) {
1869
+ return;
1870
+ }
1871
+ settled = true;
1872
+ reject(new Error(`timeout_after_${input.timeout_ms}ms`));
1873
+ }, input.timeout_ms);
1874
+ Promise.resolve()
1875
+ .then(() => input.fn())
1876
+ .then((value) => {
1877
+ if (settled) {
1878
+ return;
1879
+ }
1880
+ settled = true;
1881
+ clearTimeout(timer);
1882
+ resolve(value);
1883
+ })
1884
+ .catch((error) => {
1885
+ if (settled) {
1886
+ return;
1887
+ }
1888
+ settled = true;
1889
+ clearTimeout(timer);
1890
+ reject(error);
1891
+ });
1892
+ });
1893
+ }
1894
+ function deterministicEnhancerFallbackRanking(input) {
1895
+ const preferred = input.results.filter((result) => !isRiskyEnhancerPath(result.path, input.intent) && !shouldAvoidPathFromNegation(result.path, input.negative_preferences));
1896
+ const tolerated = input.results.filter((result) => !preferred.includes(result) && !shouldAvoidPathFromNegation(result.path, input.negative_preferences));
1897
+ const avoided = input.results.filter((result) => !preferred.includes(result) && !tolerated.includes(result));
1898
+ return [...preferred, ...tolerated, ...avoided];
1899
+ }
1900
+ function localizeLowConfidenceQuestion(input) {
1901
+ if (input.kind === "symbol") {
1902
+ if (input.language === "es") {
1903
+ return input.symbol
1904
+ ? `¿Puedes confirmar si el cambio debe centrarse en el símbolo "${input.symbol}"?`
1905
+ : "¿Qué función, clase o archivo exacto debe modificarse primero?";
1906
+ }
1907
+ if (input.language === "zh") {
1908
+ return input.symbol
1909
+ ? `请确认这次改动是否应优先围绕符号“${input.symbol}”展开?`
1910
+ : "请明确首先要修改的函数、类或文件路径。";
1911
+ }
1912
+ return input.symbol
1913
+ ? `Can you confirm whether "${input.symbol}" is the primary symbol to change?`
1914
+ : "Which exact function, class, or file should be edited first?";
1915
+ }
1916
+ if (input.kind === "source_priority") {
1917
+ if (input.language === "es") {
1918
+ return "¿Debemos priorizar archivos de implementación en src/lib y dejar docs/tests/examples fuera de alcance?";
1919
+ }
1920
+ if (input.language === "zh") {
1921
+ return "是否应优先修改 src/lib 下的实现代码,并排除 docs/tests/examples?";
1922
+ }
1923
+ return "Should we prioritize runtime implementation files (src/lib) and exclude docs/tests/examples from scope?";
1924
+ }
1925
+ if (input.language === "es") {
1926
+ return "¿Cuál es el alcance mínimo y el comportamiento que no debe cambiar?";
1927
+ }
1928
+ if (input.language === "zh") {
1929
+ return "这次改动的最小范围是什么?哪些行为必须保持不变?";
1930
+ }
1931
+ return "What is the minimal scope, and which behavior must remain unchanged?";
1932
+ }
1933
+ function trimToContextBudget(results) {
1934
+ let total = 0;
1935
+ const out = [];
1936
+ for (const result of results) {
1937
+ total += tokenize(result.snippet).length;
1938
+ if (total > MAX_CONTEXT_BUDGET_TOKENS) {
1939
+ break;
1940
+ }
1941
+ out.push(result);
1942
+ }
1943
+ return out;
1944
+ }
1945
+ function formatEnhancedPrompt(input) {
1946
+ const emptyRefsByLanguage = {
1947
+ en: "- (no file context available)",
1948
+ es: "- (no hay contexto de archivos disponible)",
1949
+ zh: "- (暂无可用文件上下文)"
1950
+ };
1951
+ const likelyFiles = input.refs.length > 0 ? input.refs.map((r) => `- ${r.path}:${r.start_line}`).join("\n") : emptyRefsByLanguage[input.language];
1952
+ if (input.language === "zh") {
1953
+ return [
1954
+ "目标",
1955
+ input.original_prompt,
1956
+ "",
1957
+ "当前状态",
1958
+ `- 识别意图: ${input.intent}`,
1959
+ "",
1960
+ "约束",
1961
+ "- 保持 v1 合约兼容和严格校验。",
1962
+ "",
1963
+ "可能涉及的文件",
1964
+ likelyFiles,
1965
+ "",
1966
+ "实现清单",
1967
+ "- 在改动前确认请求/响应合约。",
1968
+ "- 最小化改动并保持 tenant/workspace 隔离。",
1969
+ "",
1970
+ "边界情况",
1971
+ "- Workspace 没有可用索引。",
1972
+ "- 搜索过滤后结果为空。",
1973
+ "",
1974
+ "验证与测试",
1975
+ "- 运行 typecheck 和合约/工具测试。",
1976
+ "",
1977
+ "完成定义",
1978
+ "- 测试通过且行为符合 v1 规范。"
1979
+ ].join("\n");
1980
+ }
1981
+ if (input.language === "es") {
1982
+ return [
1983
+ "Objetivo",
1984
+ input.original_prompt,
1985
+ "",
1986
+ "Estado actual",
1987
+ `- Intención clasificada: ${input.intent}`,
1988
+ "",
1989
+ "Restricciones",
1990
+ "- Mantener compatibilidad con contratos v1 y validación estricta.",
1991
+ "",
1992
+ "Archivos probables a editar",
1993
+ likelyFiles,
1994
+ "",
1995
+ "Checklist de implementación",
1996
+ "- Confirmar entradas/salidas del contrato antes de modificar lógica.",
1997
+ "- Aplicar cambios mínimos y mantener aislamiento por tenant/workspace.",
1998
+ "",
1999
+ "Casos límite",
2000
+ "- Workspace sin índice listo.",
2001
+ "- Filtros de búsqueda que no devuelven resultados.",
2002
+ "",
2003
+ "Validación y pruebas",
2004
+ "- Ejecutar typecheck y pruebas de contratos/herramientas.",
2005
+ "",
2006
+ "Definición de terminado",
2007
+ "- Los tests pasan y el comportamiento coincide con el spec."
2008
+ ].join("\n");
2009
+ }
2010
+ return [
2011
+ "Goal",
2012
+ input.original_prompt,
2013
+ "",
2014
+ "Current state",
2015
+ `- Classified intent: ${input.intent}`,
2016
+ "",
2017
+ "Constraints",
2018
+ "- Keep v1 contract compatibility and strict schema validation.",
2019
+ "",
2020
+ "Likely files to edit",
2021
+ likelyFiles,
2022
+ "",
2023
+ "Implementation checklist",
2024
+ "- Confirm request/response contract assumptions before code edits.",
2025
+ "- Apply smallest safe changes while preserving tenant/workspace isolation.",
2026
+ "",
2027
+ "Edge cases",
2028
+ "- Workspace has no ready index.",
2029
+ "- Search filters produce empty result sets.",
2030
+ "",
2031
+ "Validation and tests",
2032
+ "- Run typecheck and contract/tool tests.",
2033
+ "",
2034
+ "Definition of done",
2035
+ "- Tests pass and behavior matches the v1 spec."
2036
+ ].join("\n");
2037
+ }
2038
+ function detectSecretMatches(content) {
2039
+ const matches = new Set();
2040
+ for (const pattern of SECRET_PATTERNS) {
2041
+ if (pattern.pattern.test(content)) {
2042
+ matches.add(pattern.label);
2043
+ }
2044
+ pattern.pattern.lastIndex = 0;
2045
+ }
2046
+ return [...matches];
2047
+ }
2048
+ export class InMemoryIndexStore {
2049
+ workspaces = new Map();
2050
+ workspacesByPath = new Map();
2051
+ indexes = new Map();
2052
+ workspaceIndexes = new Map();
2053
+ filesByIndex = new Map();
2054
+ chunksByFile = new Map();
2055
+ indexMetadata = new Map();
2056
+ async migrate() {
2057
+ return;
2058
+ }
2059
+ async upsertWorkspace(input) {
2060
+ this.workspaces.set(input.workspace_id, input);
2061
+ this.workspacesByPath.set(`${input.tenant_id}:${input.project_root_path}`, input);
2062
+ }
2063
+ async resolveWorkspaceByProjectRoot(tenant_id, project_root_path) {
2064
+ return this.workspacesByPath.get(`${tenant_id}:${project_root_path}`);
2065
+ }
2066
+ async resolveWorkspaceByWorkspaceId(tenant_id, workspace_id) {
2067
+ const workspace = this.workspaces.get(workspace_id);
2068
+ if (!workspace || workspace.tenant_id !== tenant_id) {
2069
+ return undefined;
2070
+ }
2071
+ return workspace;
2072
+ }
2073
+ async createIndexVersion(input) {
2074
+ const indexId = `idx_${randomUUID()}`;
2075
+ const index = {
2076
+ index_id: indexId,
2077
+ workspace_id: input.workspace_id,
2078
+ tenant_id: input.tenant_id,
2079
+ index_version: input.index_version,
2080
+ status: input.status ?? "indexing",
2081
+ created_at: new Date().toISOString(),
2082
+ updated_at: new Date().toISOString()
2083
+ };
2084
+ this.indexes.set(indexId, index);
2085
+ const existing = this.workspaceIndexes.get(input.workspace_id) ?? [];
2086
+ existing.push(indexId);
2087
+ this.workspaceIndexes.set(input.workspace_id, existing);
2088
+ this.filesByIndex.set(indexId, []);
2089
+ return index;
2090
+ }
2091
+ async markIndexStatus(input) {
2092
+ const index = this.indexes.get(input.index_id);
2093
+ if (!index) {
2094
+ return;
2095
+ }
2096
+ if (index.tenant_id !== input.tenant_id || index.workspace_id !== input.workspace_id) {
2097
+ return;
2098
+ }
2099
+ index.status = input.status;
2100
+ index.updated_at = new Date().toISOString();
2101
+ }
2102
+ async getIndexByVersion(input) {
2103
+ for (const index of this.indexes.values()) {
2104
+ if (index.tenant_id === input.tenant_id &&
2105
+ index.workspace_id === input.workspace_id &&
2106
+ index.index_version === input.index_version) {
2107
+ return index;
2108
+ }
2109
+ }
2110
+ return undefined;
2111
+ }
2112
+ async resetIndexContent(input) {
2113
+ const index = this.indexes.get(input.index_id);
2114
+ if (!index || index.tenant_id !== input.tenant_id) {
2115
+ return;
2116
+ }
2117
+ const files = this.filesByIndex.get(input.index_id) ?? [];
2118
+ for (const file of files) {
2119
+ this.chunksByFile.delete(file.file_id);
2120
+ }
2121
+ this.filesByIndex.set(input.index_id, []);
2122
+ }
2123
+ async getLatestReadyIndex(input) {
2124
+ const indexIds = this.workspaceIndexes.get(input.workspace_id) ?? [];
2125
+ const readyIndexes = indexIds
2126
+ .map((id) => this.indexes.get(id))
2127
+ .filter((index) => index !== undefined)
2128
+ .filter((index) => index.tenant_id === input.tenant_id && index.workspace_id === input.workspace_id && index.status === "ready");
2129
+ readyIndexes.sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime());
2130
+ return readyIndexes[0];
2131
+ }
2132
+ async getFilesByIndex(input) {
2133
+ const index = this.indexes.get(input.index_id);
2134
+ if (!index || index.tenant_id !== input.tenant_id) {
2135
+ return [];
2136
+ }
2137
+ return [...(this.filesByIndex.get(input.index_id) ?? [])];
2138
+ }
2139
+ async copyFileFromIndex(input) {
2140
+ const sourceFiles = this.filesByIndex.get(input.source_index_id) ?? [];
2141
+ const sourceFile = sourceFiles.find((file) => file.repo_path === input.repo_path);
2142
+ if (!sourceFile) {
2143
+ return;
2144
+ }
2145
+ const targetFiles = this.filesByIndex.get(input.target_index_id) ?? [];
2146
+ const targetFile = {
2147
+ file_id: `fil_${randomUUID()}`,
2148
+ repo_path: sourceFile.repo_path,
2149
+ content_hash: sourceFile.content_hash,
2150
+ language: sourceFile.language
2151
+ };
2152
+ targetFiles.push(targetFile);
2153
+ this.filesByIndex.set(input.target_index_id, targetFiles);
2154
+ const sourceChunks = this.chunksByFile.get(sourceFile.file_id) ?? [];
2155
+ this.chunksByFile.set(targetFile.file_id, sourceChunks.map((chunk) => ({
2156
+ ...chunk,
2157
+ hash: sha256(`${targetFile.file_id}:${chunk.start_line}:${chunk.end_line}:${chunk.snippet}`)
2158
+ })));
2159
+ }
2160
+ async upsertFile(input) {
2161
+ const files = this.filesByIndex.get(input.index_id) ?? [];
2162
+ const existing = files.find((file) => file.repo_path === input.repo_path);
2163
+ if (existing) {
2164
+ existing.content_hash = input.content_hash;
2165
+ existing.language = input.language;
2166
+ return { file_id: existing.file_id };
2167
+ }
2168
+ const file_id = `fil_${randomUUID()}`;
2169
+ files.push({
2170
+ file_id,
2171
+ repo_path: input.repo_path,
2172
+ content_hash: input.content_hash,
2173
+ language: input.language
2174
+ });
2175
+ this.filesByIndex.set(input.index_id, files);
2176
+ return { file_id };
2177
+ }
2178
+ async replaceFileChunks(input) {
2179
+ this.chunksByFile.set(input.file_id, input.chunks.map((chunk) => ({
2180
+ path: input.repo_path,
2181
+ start_line: chunk.start_line,
2182
+ end_line: chunk.end_line,
2183
+ snippet: chunk.snippet,
2184
+ generated: chunk.generated,
2185
+ updated_at: chunk.updated_at ?? new Date().toISOString(),
2186
+ hash: sha256(`${input.file_id}:${chunk.start_line}:${chunk.end_line}:${chunk.snippet}`),
2187
+ embedding: [...chunk.embedding]
2188
+ })));
2189
+ }
2190
+ async saveManifest() {
2191
+ return;
2192
+ }
2193
+ async saveIndexMetadata(input) {
2194
+ this.indexMetadata.set(input.index_id, {
2195
+ tenant_id: input.tenant_id,
2196
+ embedding_provider: input.embedding_provider,
2197
+ ...(input.embedding_model ? { embedding_model: input.embedding_model } : {}),
2198
+ embedding_dimensions: input.embedding_dimensions,
2199
+ ...(input.embedding_version ? { embedding_version: input.embedding_version } : {}),
2200
+ chunking_strategy: input.chunking_strategy,
2201
+ chunking_fallback_strategy: input.chunking_fallback_strategy,
2202
+ created_at: new Date().toISOString()
2203
+ });
2204
+ }
2205
+ async getIndexMetadata(input) {
2206
+ const metadata = this.indexMetadata.get(input.index_id);
2207
+ if (!metadata || metadata.tenant_id !== input.tenant_id) {
2208
+ return undefined;
2209
+ }
2210
+ return {
2211
+ embedding_provider: metadata.embedding_provider,
2212
+ ...(metadata.embedding_model ? { embedding_model: metadata.embedding_model } : {}),
2213
+ embedding_dimensions: metadata.embedding_dimensions,
2214
+ ...(metadata.embedding_version ? { embedding_version: metadata.embedding_version } : {}),
2215
+ chunking_strategy: metadata.chunking_strategy,
2216
+ chunking_fallback_strategy: metadata.chunking_fallback_strategy,
2217
+ created_at: metadata.created_at
2218
+ };
2219
+ }
2220
+ async listChunksByIndex(input) {
2221
+ const index = this.indexes.get(input.index_id);
2222
+ if (!index || index.tenant_id !== input.tenant_id) {
2223
+ return [];
2224
+ }
2225
+ const files = this.filesByIndex.get(input.index_id) ?? [];
2226
+ const globRegex = input.filters?.glob ? compileGlob(input.filters.glob) : undefined;
2227
+ const output = [];
2228
+ for (const file of files) {
2229
+ if (input.filters?.language && file.language !== input.filters.language) {
2230
+ continue;
2231
+ }
2232
+ const chunks = this.chunksByFile.get(file.file_id) ?? [];
2233
+ for (const chunk of chunks) {
2234
+ if (input.filters?.path_prefix && !chunk.path.startsWith(input.filters.path_prefix)) {
2235
+ continue;
2236
+ }
2237
+ if (globRegex && !globRegex.test(chunk.path)) {
2238
+ continue;
2239
+ }
2240
+ output.push({
2241
+ chunk_id: `chk_${sha256(`${file.file_id}:${chunk.hash}`)}`,
2242
+ file_id: file.file_id,
2243
+ path: chunk.path,
2244
+ start_line: chunk.start_line,
2245
+ end_line: chunk.end_line,
2246
+ snippet: chunk.snippet,
2247
+ ...(file.language ? { language: file.language } : {}),
2248
+ ...(chunk.generated ? { generated: chunk.generated } : {}),
2249
+ updated_at: chunk.updated_at,
2250
+ embedding: [...(chunk.embedding ?? pseudoEmbedding(chunk.snippet))]
2251
+ });
2252
+ }
2253
+ }
2254
+ return output;
2255
+ }
2256
+ }
2257
+ function compileGlob(glob) {
2258
+ const escaped = glob.replace(/[.+^${}()|[\]\\]/g, "\\$&");
2259
+ const pattern = `^${escaped.replace(/\*/g, ".*").replace(/\?/g, ".")}$`;
2260
+ return new RegExp(pattern);
2261
+ }
2262
+ export class RetrievalCore {
2263
+ store;
2264
+ cache;
2265
+ cacheTtlSeconds;
2266
+ embeddingProvider;
2267
+ embeddingDescriptor;
2268
+ observability;
2269
+ scoringConfig;
2270
+ scoringProfileId;
2271
+ scoringConfigChecksum;
2272
+ enhancerConfig;
2273
+ chunkingConfig;
2274
+ enhancerDecisionTraceEnabled;
2275
+ cacheHits = 0;
2276
+ cacheMisses = 0;
2277
+ constructor(store, cache, options) {
2278
+ this.store = store;
2279
+ this.cache = cache;
2280
+ this.cacheTtlSeconds = options?.cacheTtlSeconds ?? 60;
2281
+ this.embeddingProvider = options?.embeddingProvider ?? new DeterministicEmbeddingProvider();
2282
+ this.embeddingDescriptor = normalizeEmbeddingDescriptor(options?.embeddingDescriptor ?? resolveEmbeddingDescriptor(this.embeddingProvider));
2283
+ this.observability = options?.observability ?? getObservability("retrieval-core");
2284
+ const baseProfile = resolveRetrievalScoringProfile(options?.scoringProfile);
2285
+ this.scoringConfig = mergeRetrievalScoringConfig(baseProfile.config, options?.scoringConfig);
2286
+ this.scoringProfileId = options?.scoringProfileId ?? baseProfile.profile_id;
2287
+ this.scoringConfigChecksum = scoringConfigChecksum(this.scoringConfig);
2288
+ this.enhancerConfig = mergeRetrievalEnhancerConfig(DEFAULT_RETRIEVAL_ENHANCER_CONFIG, options?.enhancerConfig);
2289
+ this.chunkingConfig = mergeRetrievalChunkingConfig(DEFAULT_RETRIEVAL_CHUNKING_CONFIG, options?.chunkingConfig);
2290
+ this.enhancerDecisionTraceEnabled = Boolean(options?.enhancerDecisionTraceEnabled);
2291
+ }
2292
+ async indexArtifact(artifact) {
2293
+ const existingIndex = await this.store.getIndexByVersion({
2294
+ tenant_id: artifact.tenant_id,
2295
+ workspace_id: artifact.workspace_id,
2296
+ index_version: artifact.index_version
2297
+ });
2298
+ if (existingIndex?.status === "ready") {
2299
+ return {
2300
+ workspace_id: artifact.workspace_id,
2301
+ index_version: artifact.index_version,
2302
+ status: "ready",
2303
+ counts: {
2304
+ added: 0,
2305
+ modified: 0,
2306
+ deleted: 0,
2307
+ unchanged: 0,
2308
+ skipped: 0
2309
+ },
2310
+ skipped_files: [],
2311
+ warnings: []
2312
+ };
2313
+ }
2314
+ const previousReadyIndex = await this.store.getLatestReadyIndex({
2315
+ tenant_id: artifact.tenant_id,
2316
+ workspace_id: artifact.workspace_id
2317
+ });
2318
+ const nextIndex = existingIndex
2319
+ ? existingIndex
2320
+ : await this.store.createIndexVersion({
2321
+ tenant_id: artifact.tenant_id,
2322
+ workspace_id: artifact.workspace_id,
2323
+ index_version: artifact.index_version,
2324
+ status: "indexing"
2325
+ });
2326
+ if (existingIndex) {
2327
+ await this.store.markIndexStatus({
2328
+ tenant_id: artifact.tenant_id,
2329
+ workspace_id: artifact.workspace_id,
2330
+ index_id: existingIndex.index_id,
2331
+ status: "indexing"
2332
+ });
2333
+ await this.store.resetIndexContent({
2334
+ tenant_id: artifact.tenant_id,
2335
+ index_id: existingIndex.index_id
2336
+ });
2337
+ }
2338
+ const normalizedFiles = artifact.files.map((file) => ({
2339
+ ...file,
2340
+ path: normalizePath(file.path)
2341
+ }));
2342
+ const skipped = [];
2343
+ const warnings = [];
2344
+ const candidateFiles = normalizedFiles.filter((file) => {
2345
+ const sizeBytes = Buffer.byteLength(file.content, "utf8");
2346
+ if (file.binary) {
2347
+ skipped.push({ path: file.path, reason: "binary file" });
2348
+ return false;
2349
+ }
2350
+ if (sizeBytes > MAX_FILE_SIZE_BYTES) {
2351
+ skipped.push({ path: file.path, reason: "max file size exceeded" });
2352
+ return false;
2353
+ }
2354
+ const secretMatches = detectSecretMatches(file.content);
2355
+ if (secretMatches.length > 0) {
2356
+ skipped.push({ path: file.path, reason: `secret scan matched (${secretMatches.join(", ")})` });
2357
+ warnings.push({
2358
+ path: file.path,
2359
+ reason: `excluded by default secret scanner: ${secretMatches.join(", ")}`,
2360
+ category: "secret_exclusion"
2361
+ });
2362
+ return false;
2363
+ }
2364
+ return true;
2365
+ });
2366
+ const previousHashes = new Map();
2367
+ if (previousReadyIndex) {
2368
+ const previousFiles = await this.store.getFilesByIndex({
2369
+ tenant_id: artifact.tenant_id,
2370
+ index_id: previousReadyIndex.index_id
2371
+ });
2372
+ for (const file of previousFiles) {
2373
+ previousHashes.set(file.repo_path, file.content_hash);
2374
+ }
2375
+ }
2376
+ const nextHashes = new Map();
2377
+ const changedFiles = [];
2378
+ let added = 0;
2379
+ let modified = 0;
2380
+ let unchanged = 0;
2381
+ for (const file of candidateFiles) {
2382
+ const hash = sha256(file.content);
2383
+ nextHashes.set(file.path, hash);
2384
+ const old = previousHashes.get(file.path);
2385
+ if (!old) {
2386
+ added += 1;
2387
+ changedFiles.push(file);
2388
+ }
2389
+ else if (old !== hash) {
2390
+ modified += 1;
2391
+ changedFiles.push(file);
2392
+ }
2393
+ else {
2394
+ unchanged += 1;
2395
+ }
2396
+ }
2397
+ let deleted = 0;
2398
+ for (const oldPath of previousHashes.keys()) {
2399
+ if (!nextHashes.has(oldPath)) {
2400
+ deleted += 1;
2401
+ }
2402
+ }
2403
+ try {
2404
+ await this.store.saveIndexMetadata?.({
2405
+ tenant_id: artifact.tenant_id,
2406
+ index_id: nextIndex.index_id,
2407
+ embedding_provider: this.embeddingDescriptor.provider,
2408
+ embedding_model: this.embeddingDescriptor.model,
2409
+ embedding_dimensions: this.embeddingDescriptor.dimensions,
2410
+ embedding_version: this.embeddingDescriptor.version,
2411
+ chunking_strategy: this.chunkingConfig.strategy,
2412
+ chunking_fallback_strategy: this.chunkingConfig.fallback_strategy
2413
+ });
2414
+ this.observability.logger.info("index embedding metadata persisted", {
2415
+ tenant_id: artifact.tenant_id,
2416
+ workspace_id: artifact.workspace_id,
2417
+ index_id: nextIndex.index_id,
2418
+ embedding_provider: this.embeddingDescriptor.provider,
2419
+ embedding_model: this.embeddingDescriptor.model ?? "unknown",
2420
+ embedding_dimensions: this.embeddingDescriptor.dimensions
2421
+ });
2422
+ if (previousReadyIndex) {
2423
+ for (const [path, hash] of nextHashes) {
2424
+ const old = previousHashes.get(path);
2425
+ if (old && old === hash) {
2426
+ await this.store.copyFileFromIndex({
2427
+ tenant_id: artifact.tenant_id,
2428
+ source_index_id: previousReadyIndex.index_id,
2429
+ target_index_id: nextIndex.index_id,
2430
+ repo_path: path
2431
+ });
2432
+ }
2433
+ }
2434
+ }
2435
+ if (this.chunkingConfig.strategy === "language_aware") {
2436
+ const parserAvailabilitySnapshot = getChunkingParserAvailabilitySnapshot({
2437
+ enabled_languages: this.chunkingConfig.enabled_languages
2438
+ });
2439
+ for (const availability of parserAvailabilitySnapshot) {
2440
+ this.observability.metrics.gauge("index_chunking_parser_availability", availability.status === "available" ? 1 : 0, {
2441
+ tenant_id: artifact.tenant_id,
2442
+ language: availability.language,
2443
+ status: availability.status
2444
+ });
2445
+ }
2446
+ this.observability.logger.info("chunking parser availability snapshot", {
2447
+ tenant_id: artifact.tenant_id,
2448
+ workspace_id: artifact.workspace_id,
2449
+ index_id: nextIndex.index_id,
2450
+ snapshot: parserAvailabilitySnapshot
2451
+ });
2452
+ }
2453
+ for (const file of changedFiles) {
2454
+ const chunkBuild = buildChunks(file, this.chunkingConfig);
2455
+ const chunks = chunkBuild.chunks;
2456
+ const chunkLanguage = chunkBuild.language ?? file.language ?? "unknown";
2457
+ this.observability.metrics.increment("index_chunking_strategy_total", 1, {
2458
+ tenant_id: artifact.tenant_id,
2459
+ strategy: chunkBuild.strategy,
2460
+ language: chunkLanguage,
2461
+ reason: chunkBuild.fallback_reason ?? "none"
2462
+ });
2463
+ if (chunkBuild.fallback_reason) {
2464
+ this.observability.metrics.increment("index_chunking_fallback_total", 1, {
2465
+ tenant_id: artifact.tenant_id,
2466
+ reason: chunkBuild.fallback_reason,
2467
+ language: chunkLanguage
2468
+ });
2469
+ }
2470
+ if (typeof chunkBuild.parse_latency_ms === "number") {
2471
+ this.observability.metrics.observe("index_chunk_parse_latency_ms", chunkBuild.parse_latency_ms, {
2472
+ tenant_id: artifact.tenant_id,
2473
+ language: chunkLanguage
2474
+ });
2475
+ }
2476
+ if (typeof chunkBuild.language_aware_attempt_latency_ms === "number") {
2477
+ this.observability.metrics.observe("index_chunk_language_aware_attempt_latency_ms", chunkBuild.language_aware_attempt_latency_ms, {
2478
+ tenant_id: artifact.tenant_id,
2479
+ language: chunkLanguage,
2480
+ outcome: chunkBuild.fallback_reason ? "fallback" : "success"
2481
+ });
2482
+ }
2483
+ if (typeof chunkBuild.fallback_path_latency_ms === "number" && chunkBuild.fallback_reason) {
2484
+ this.observability.metrics.observe("index_chunk_fallback_path_latency_ms", chunkBuild.fallback_path_latency_ms, {
2485
+ tenant_id: artifact.tenant_id,
2486
+ language: chunkLanguage,
2487
+ reason: chunkBuild.fallback_reason
2488
+ });
2489
+ }
2490
+ const estimatedEmbeddingTokens = chunks.reduce((sum, chunk) => sum + tokenize(chunk.snippet).length, 0);
2491
+ this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
2492
+ tenant_id: artifact.tenant_id
2493
+ });
2494
+ const embeddings = chunks.length === 0
2495
+ ? []
2496
+ : await this.embeddingProvider.embed({
2497
+ texts: chunks.map((chunk) => chunk.snippet),
2498
+ purpose: "index"
2499
+ });
2500
+ if (embeddings.length !== chunks.length) {
2501
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider returned ${embeddings.length} vectors for ${chunks.length} indexed chunks`);
2502
+ }
2503
+ const contentHash = sha256(file.content);
2504
+ const sizeBytes = Buffer.byteLength(file.content, "utf8");
2505
+ const chunkEmbeddings = chunks.map((chunk, index) => {
2506
+ const embedding = embeddings[index];
2507
+ if (!isFiniteNumberArray(embedding)) {
2508
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider returned a non-numeric vector for ${file.path}:${chunk.start_line}-${chunk.end_line}`);
2509
+ }
2510
+ if (embedding.length !== this.embeddingDescriptor.dimensions) {
2511
+ this.observability.metrics.increment("index_embedding_dimension_mismatch_total", 1, {
2512
+ tenant_id: artifact.tenant_id,
2513
+ expected_dimensions: this.embeddingDescriptor.dimensions,
2514
+ actual_dimensions: embedding.length
2515
+ });
2516
+ this.observability.logger.warn("embedding dimensions mismatch provider descriptor; failing index build", {
2517
+ tenant_id: artifact.tenant_id,
2518
+ workspace_id: artifact.workspace_id,
2519
+ path: file.path,
2520
+ expected_dimensions: this.embeddingDescriptor.dimensions,
2521
+ actual_dimensions: embedding.length
2522
+ });
2523
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding dimension mismatch for ${file.path}; expected ${this.embeddingDescriptor.dimensions}, received ${embedding.length}`);
2524
+ }
2525
+ return embedding;
2526
+ });
2527
+ const fileRow = await this.store.upsertFile({
2528
+ tenant_id: artifact.tenant_id,
2529
+ index_id: nextIndex.index_id,
2530
+ repo_path: file.path,
2531
+ content_hash: contentHash,
2532
+ size_bytes: sizeBytes,
2533
+ language: file.language,
2534
+ updated_at: file.updated_at ?? new Date().toISOString()
2535
+ });
2536
+ await this.store.replaceFileChunks({
2537
+ tenant_id: artifact.tenant_id,
2538
+ file_id: fileRow.file_id,
2539
+ repo_path: file.path,
2540
+ chunks: chunks.map((chunk, index) => ({
2541
+ start_line: chunk.start_line,
2542
+ end_line: chunk.end_line,
2543
+ snippet: chunk.snippet,
2544
+ embedding: chunkEmbeddings[index],
2545
+ generated: chunk.generated,
2546
+ updated_at: chunk.updated_at
2547
+ }))
2548
+ });
2549
+ }
2550
+ if (artifact.manifest) {
2551
+ await this.store.saveManifest({
2552
+ index_id: nextIndex.index_id,
2553
+ object_key: artifact.manifest.object_key,
2554
+ checksum: artifact.manifest.checksum
2555
+ });
2556
+ }
2557
+ await this.store.markIndexStatus({
2558
+ tenant_id: artifact.tenant_id,
2559
+ workspace_id: artifact.workspace_id,
2560
+ index_id: nextIndex.index_id,
2561
+ status: "ready"
2562
+ });
2563
+ await this.cache.invalidateWorkspace(artifact.workspace_id);
2564
+ return {
2565
+ workspace_id: artifact.workspace_id,
2566
+ index_version: artifact.index_version,
2567
+ status: "ready",
2568
+ counts: {
2569
+ added,
2570
+ modified,
2571
+ deleted,
2572
+ unchanged,
2573
+ skipped: skipped.length
2574
+ },
2575
+ skipped_files: skipped,
2576
+ warnings
2577
+ };
2578
+ }
2579
+ catch (error) {
2580
+ await this.store.markIndexStatus({
2581
+ tenant_id: artifact.tenant_id,
2582
+ workspace_id: artifact.workspace_id,
2583
+ index_id: nextIndex.index_id,
2584
+ status: "failed"
2585
+ });
2586
+ throw error;
2587
+ }
2588
+ }
2589
+ async indexArtifactDelta(artifact) {
2590
+ const existingIndex = await this.store.getIndexByVersion({
2591
+ tenant_id: artifact.tenant_id,
2592
+ workspace_id: artifact.workspace_id,
2593
+ index_version: artifact.index_version
2594
+ });
2595
+ if (existingIndex?.status === "ready") {
2596
+ return {
2597
+ workspace_id: artifact.workspace_id,
2598
+ index_version: artifact.index_version,
2599
+ status: "ready",
2600
+ counts: {
2601
+ added: 0,
2602
+ modified: 0,
2603
+ deleted: 0,
2604
+ unchanged: 0,
2605
+ skipped: 0
2606
+ },
2607
+ skipped_files: [],
2608
+ warnings: []
2609
+ };
2610
+ }
2611
+ const baseIndex = artifact.base_index_version
2612
+ ? await this.store.getIndexByVersion({
2613
+ tenant_id: artifact.tenant_id,
2614
+ workspace_id: artifact.workspace_id,
2615
+ index_version: artifact.base_index_version
2616
+ })
2617
+ : await this.store.getLatestReadyIndex({
2618
+ tenant_id: artifact.tenant_id,
2619
+ workspace_id: artifact.workspace_id
2620
+ });
2621
+ if (artifact.base_index_version && (!baseIndex || baseIndex.status !== "ready")) {
2622
+ throw new RetrievalError("INVALID_ARGUMENT", `base index version ${artifact.base_index_version} is not ready for workspace ${artifact.workspace_id}`);
2623
+ }
2624
+ const nextIndex = existingIndex
2625
+ ? existingIndex
2626
+ : await this.store.createIndexVersion({
2627
+ tenant_id: artifact.tenant_id,
2628
+ workspace_id: artifact.workspace_id,
2629
+ index_version: artifact.index_version,
2630
+ status: "indexing"
2631
+ });
2632
+ if (existingIndex) {
2633
+ await this.store.markIndexStatus({
2634
+ tenant_id: artifact.tenant_id,
2635
+ workspace_id: artifact.workspace_id,
2636
+ index_id: existingIndex.index_id,
2637
+ status: "indexing"
2638
+ });
2639
+ await this.store.resetIndexContent({
2640
+ tenant_id: artifact.tenant_id,
2641
+ index_id: existingIndex.index_id
2642
+ });
2643
+ }
2644
+ const dedupedUpsertFiles = new Map();
2645
+ for (const file of artifact.upsert_files) {
2646
+ const normalizedPath = normalizePath(file.path);
2647
+ dedupedUpsertFiles.set(normalizedPath, {
2648
+ ...file,
2649
+ path: normalizedPath
2650
+ });
2651
+ }
2652
+ const normalizedUpsertFiles = [...dedupedUpsertFiles.values()];
2653
+ const skipped = [];
2654
+ const warnings = [];
2655
+ const candidateFiles = normalizedUpsertFiles.filter((file) => {
2656
+ const sizeBytes = Buffer.byteLength(file.content, "utf8");
2657
+ if (file.binary) {
2658
+ skipped.push({ path: file.path, reason: "binary file" });
2659
+ return false;
2660
+ }
2661
+ if (sizeBytes > MAX_FILE_SIZE_BYTES) {
2662
+ skipped.push({ path: file.path, reason: "max file size exceeded" });
2663
+ return false;
2664
+ }
2665
+ const secretMatches = detectSecretMatches(file.content);
2666
+ if (secretMatches.length > 0) {
2667
+ skipped.push({ path: file.path, reason: `secret scan matched (${secretMatches.join(", ")})` });
2668
+ warnings.push({
2669
+ path: file.path,
2670
+ reason: `excluded by default secret scanner: ${secretMatches.join(", ")}`,
2671
+ category: "secret_exclusion"
2672
+ });
2673
+ return false;
2674
+ }
2675
+ return true;
2676
+ });
2677
+ const baseHashes = new Map();
2678
+ if (baseIndex?.status === "ready") {
2679
+ const baseFiles = await this.store.getFilesByIndex({
2680
+ tenant_id: artifact.tenant_id,
2681
+ index_id: baseIndex.index_id
2682
+ });
2683
+ for (const file of baseFiles) {
2684
+ baseHashes.set(file.repo_path, file.content_hash);
2685
+ }
2686
+ }
2687
+ const changedFiles = [];
2688
+ let added = 0;
2689
+ let modified = 0;
2690
+ for (const file of candidateFiles) {
2691
+ const hash = sha256(file.content);
2692
+ const old = baseHashes.get(file.path);
2693
+ if (!old) {
2694
+ added += 1;
2695
+ changedFiles.push(file);
2696
+ }
2697
+ else if (old !== hash) {
2698
+ modified += 1;
2699
+ changedFiles.push(file);
2700
+ }
2701
+ }
2702
+ const changedPaths = new Set(changedFiles.map((file) => file.path));
2703
+ const deletedPaths = new Set(artifact.deleted_paths
2704
+ .map((path) => normalizePath(path))
2705
+ .filter((path) => baseHashes.has(path) && !changedPaths.has(path)));
2706
+ const copyPaths = [];
2707
+ for (const path of baseHashes.keys()) {
2708
+ if (deletedPaths.has(path) || changedPaths.has(path)) {
2709
+ continue;
2710
+ }
2711
+ copyPaths.push(path);
2712
+ }
2713
+ const unchanged = copyPaths.length;
2714
+ const deleted = deletedPaths.size;
2715
+ try {
2716
+ await this.store.saveIndexMetadata?.({
2717
+ tenant_id: artifact.tenant_id,
2718
+ index_id: nextIndex.index_id,
2719
+ embedding_provider: this.embeddingDescriptor.provider,
2720
+ embedding_model: this.embeddingDescriptor.model,
2721
+ embedding_dimensions: this.embeddingDescriptor.dimensions,
2722
+ embedding_version: this.embeddingDescriptor.version,
2723
+ chunking_strategy: this.chunkingConfig.strategy,
2724
+ chunking_fallback_strategy: this.chunkingConfig.fallback_strategy
2725
+ });
2726
+ this.observability.logger.info("index embedding metadata persisted", {
2727
+ tenant_id: artifact.tenant_id,
2728
+ workspace_id: artifact.workspace_id,
2729
+ index_id: nextIndex.index_id,
2730
+ embedding_provider: this.embeddingDescriptor.provider,
2731
+ embedding_model: this.embeddingDescriptor.model ?? "unknown",
2732
+ embedding_dimensions: this.embeddingDescriptor.dimensions
2733
+ });
2734
+ if (baseIndex?.status === "ready") {
2735
+ for (const path of copyPaths) {
2736
+ await this.store.copyFileFromIndex({
2737
+ tenant_id: artifact.tenant_id,
2738
+ source_index_id: baseIndex.index_id,
2739
+ target_index_id: nextIndex.index_id,
2740
+ repo_path: path
2741
+ });
2742
+ }
2743
+ }
2744
+ if (this.chunkingConfig.strategy === "language_aware") {
2745
+ const parserAvailabilitySnapshot = getChunkingParserAvailabilitySnapshot({
2746
+ enabled_languages: this.chunkingConfig.enabled_languages
2747
+ });
2748
+ for (const availability of parserAvailabilitySnapshot) {
2749
+ this.observability.metrics.gauge("index_chunking_parser_availability", availability.status === "available" ? 1 : 0, {
2750
+ tenant_id: artifact.tenant_id,
2751
+ language: availability.language,
2752
+ status: availability.status
2753
+ });
2754
+ }
2755
+ this.observability.logger.info("chunking parser availability snapshot", {
2756
+ tenant_id: artifact.tenant_id,
2757
+ workspace_id: artifact.workspace_id,
2758
+ index_id: nextIndex.index_id,
2759
+ snapshot: parserAvailabilitySnapshot
2760
+ });
2761
+ }
2762
+ for (const file of changedFiles) {
2763
+ const chunkBuild = buildChunks(file, this.chunkingConfig);
2764
+ const chunks = chunkBuild.chunks;
2765
+ const chunkLanguage = chunkBuild.language ?? file.language ?? "unknown";
2766
+ this.observability.metrics.increment("index_chunking_strategy_total", 1, {
2767
+ tenant_id: artifact.tenant_id,
2768
+ strategy: chunkBuild.strategy,
2769
+ language: chunkLanguage,
2770
+ reason: chunkBuild.fallback_reason ?? "none"
2771
+ });
2772
+ if (chunkBuild.fallback_reason) {
2773
+ this.observability.metrics.increment("index_chunking_fallback_total", 1, {
2774
+ tenant_id: artifact.tenant_id,
2775
+ reason: chunkBuild.fallback_reason,
2776
+ language: chunkLanguage
2777
+ });
2778
+ }
2779
+ if (typeof chunkBuild.parse_latency_ms === "number") {
2780
+ this.observability.metrics.observe("index_chunk_parse_latency_ms", chunkBuild.parse_latency_ms, {
2781
+ tenant_id: artifact.tenant_id,
2782
+ language: chunkLanguage
2783
+ });
2784
+ }
2785
+ if (typeof chunkBuild.language_aware_attempt_latency_ms === "number") {
2786
+ this.observability.metrics.observe("index_chunk_language_aware_attempt_latency_ms", chunkBuild.language_aware_attempt_latency_ms, {
2787
+ tenant_id: artifact.tenant_id,
2788
+ language: chunkLanguage,
2789
+ outcome: chunkBuild.fallback_reason ? "fallback" : "success"
2790
+ });
2791
+ }
2792
+ if (typeof chunkBuild.fallback_path_latency_ms === "number" && chunkBuild.fallback_reason) {
2793
+ this.observability.metrics.observe("index_chunk_fallback_path_latency_ms", chunkBuild.fallback_path_latency_ms, {
2794
+ tenant_id: artifact.tenant_id,
2795
+ language: chunkLanguage,
2796
+ reason: chunkBuild.fallback_reason
2797
+ });
2798
+ }
2799
+ const estimatedEmbeddingTokens = chunks.reduce((sum, chunk) => sum + tokenize(chunk.snippet).length, 0);
2800
+ this.observability.metrics.increment("index_embedding_tokens_total", estimatedEmbeddingTokens, {
2801
+ tenant_id: artifact.tenant_id
2802
+ });
2803
+ const embeddings = chunks.length === 0
2804
+ ? []
2805
+ : await this.embeddingProvider.embed({
2806
+ texts: chunks.map((chunk) => chunk.snippet),
2807
+ purpose: "index"
2808
+ });
2809
+ if (embeddings.length !== chunks.length) {
2810
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider returned ${embeddings.length} vectors for ${chunks.length} indexed chunks`);
2811
+ }
2812
+ const contentHash = sha256(file.content);
2813
+ const sizeBytes = Buffer.byteLength(file.content, "utf8");
2814
+ const chunkEmbeddings = chunks.map((chunk, index) => {
2815
+ const embedding = embeddings[index];
2816
+ if (!isFiniteNumberArray(embedding)) {
2817
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider returned a non-numeric vector for ${file.path}:${chunk.start_line}-${chunk.end_line}`);
2818
+ }
2819
+ if (embedding.length !== this.embeddingDescriptor.dimensions) {
2820
+ this.observability.metrics.increment("index_embedding_dimension_mismatch_total", 1, {
2821
+ tenant_id: artifact.tenant_id,
2822
+ expected_dimensions: this.embeddingDescriptor.dimensions,
2823
+ actual_dimensions: embedding.length
2824
+ });
2825
+ this.observability.logger.warn("embedding dimensions mismatch provider descriptor; failing index build", {
2826
+ tenant_id: artifact.tenant_id,
2827
+ workspace_id: artifact.workspace_id,
2828
+ path: file.path,
2829
+ expected_dimensions: this.embeddingDescriptor.dimensions,
2830
+ actual_dimensions: embedding.length
2831
+ });
2832
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding dimension mismatch for ${file.path}; expected ${this.embeddingDescriptor.dimensions}, received ${embedding.length}`);
2833
+ }
2834
+ return embedding;
2835
+ });
2836
+ const fileRow = await this.store.upsertFile({
2837
+ tenant_id: artifact.tenant_id,
2838
+ index_id: nextIndex.index_id,
2839
+ repo_path: file.path,
2840
+ content_hash: contentHash,
2841
+ size_bytes: sizeBytes,
2842
+ language: file.language,
2843
+ updated_at: file.updated_at ?? new Date().toISOString()
2844
+ });
2845
+ await this.store.replaceFileChunks({
2846
+ tenant_id: artifact.tenant_id,
2847
+ file_id: fileRow.file_id,
2848
+ repo_path: file.path,
2849
+ chunks: chunks.map((chunk, index) => ({
2850
+ start_line: chunk.start_line,
2851
+ end_line: chunk.end_line,
2852
+ snippet: chunk.snippet,
2853
+ embedding: chunkEmbeddings[index],
2854
+ generated: chunk.generated,
2855
+ updated_at: chunk.updated_at
2856
+ }))
2857
+ });
2858
+ }
2859
+ await this.store.markIndexStatus({
2860
+ tenant_id: artifact.tenant_id,
2861
+ workspace_id: artifact.workspace_id,
2862
+ index_id: nextIndex.index_id,
2863
+ status: "ready"
2864
+ });
2865
+ await this.cache.invalidateWorkspace(artifact.workspace_id);
2866
+ return {
2867
+ workspace_id: artifact.workspace_id,
2868
+ index_version: artifact.index_version,
2869
+ status: "ready",
2870
+ counts: {
2871
+ added,
2872
+ modified,
2873
+ deleted,
2874
+ unchanged,
2875
+ skipped: skipped.length
2876
+ },
2877
+ skipped_files: skipped,
2878
+ warnings
2879
+ };
2880
+ }
2881
+ catch (error) {
2882
+ await this.store.markIndexStatus({
2883
+ tenant_id: artifact.tenant_id,
2884
+ workspace_id: artifact.workspace_id,
2885
+ index_id: nextIndex.index_id,
2886
+ status: "failed"
2887
+ });
2888
+ throw error;
2889
+ }
2890
+ }
2891
+ async getIndexVersion(input) {
2892
+ const existing = await this.store.getIndexByVersion({
2893
+ tenant_id: input.tenant_id,
2894
+ workspace_id: input.workspace_id,
2895
+ index_version: input.index_version
2896
+ });
2897
+ if (!existing) {
2898
+ return undefined;
2899
+ }
2900
+ return {
2901
+ index_id: existing.index_id,
2902
+ status: existing.status
2903
+ };
2904
+ }
2905
+ async searchContext(input) {
2906
+ const searchStartedAt = Date.now();
2907
+ const index = await this.store.getLatestReadyIndex({
2908
+ tenant_id: input.tenant_id,
2909
+ workspace_id: input.workspace_id
2910
+ });
2911
+ if (!index) {
2912
+ throw new RetrievalError("NOT_FOUND", "No ready index found for workspace. Run initial batch-upload push.");
2913
+ }
2914
+ const indexMetadata = await this.store.getIndexMetadata?.({
2915
+ tenant_id: input.tenant_id,
2916
+ index_id: index.index_id
2917
+ });
2918
+ const topK = Math.min(input.request.top_k ?? 8, MAX_TOP_K);
2919
+ const candidatePoolTopK = Math.min(MAX_TOP_K, Math.max(topK * 4, 12));
2920
+ const query = normalizeQuery(input.request.query);
2921
+ if (!indexMetadata) {
2922
+ this.observability.metrics.increment("retrieval_embedding_metadata_mismatch_total", 1, {
2923
+ reason: "metadata_missing"
2924
+ });
2925
+ this.observability.logger.warn("index metadata missing embedding configuration; reindex required", {
2926
+ tenant_id: input.tenant_id,
2927
+ workspace_id: input.workspace_id,
2928
+ index_id: index.index_id,
2929
+ reindex_required: true
2930
+ });
2931
+ throw new RetrievalError("INVALID_ARGUMENT", "index embedding metadata missing; reindex required with current embedding model and dimensions.");
2932
+ }
2933
+ const expectedProvider = this.embeddingDescriptor.provider;
2934
+ const expectedModel = this.embeddingDescriptor.model ?? "";
2935
+ const expectedDimensions = this.embeddingDescriptor.dimensions;
2936
+ const actualProvider = indexMetadata.embedding_provider;
2937
+ const actualModel = indexMetadata.embedding_model ?? "";
2938
+ const actualDimensions = indexMetadata.embedding_dimensions;
2939
+ let mismatchReason;
2940
+ if (actualProvider !== expectedProvider) {
2941
+ mismatchReason = "provider_mismatch";
2942
+ }
2943
+ else if (actualModel !== expectedModel) {
2944
+ mismatchReason = "model_mismatch";
2945
+ }
2946
+ else if (actualDimensions !== expectedDimensions) {
2947
+ mismatchReason = "dimension_mismatch";
2948
+ }
2949
+ if (mismatchReason) {
2950
+ this.observability.metrics.increment("retrieval_embedding_metadata_mismatch_total", 1, {
2951
+ reason: mismatchReason
2952
+ });
2953
+ this.observability.logger.warn("embedding compatibility mismatch detected; reindex required", {
2954
+ tenant_id: input.tenant_id,
2955
+ workspace_id: input.workspace_id,
2956
+ index_id: index.index_id,
2957
+ reason: mismatchReason,
2958
+ expected_provider: expectedProvider,
2959
+ expected_model: expectedModel || "unknown",
2960
+ expected_dimensions: expectedDimensions,
2961
+ actual_provider: actualProvider,
2962
+ actual_model: actualModel || "unknown",
2963
+ actual_dimensions: actualDimensions,
2964
+ reindex_required: true
2965
+ });
2966
+ throw new RetrievalError("INVALID_ARGUMENT", "embedding configuration mismatch; reindex required with current embedding model and dimensions.");
2967
+ }
2968
+ const queryEmbeddings = await this.embeddingProvider.embed({
2969
+ texts: [query],
2970
+ purpose: "query"
2971
+ });
2972
+ if (queryEmbeddings.length !== 1 || !isFiniteNumberArray(queryEmbeddings[0])) {
2973
+ throw new RetrievalError("UPSTREAM_FAILURE", "embedding provider returned an invalid query embedding response");
2974
+ }
2975
+ const queryEmbedding = queryEmbeddings[0];
2976
+ if (queryEmbedding.length !== this.embeddingDescriptor.dimensions) {
2977
+ throw new RetrievalError("UPSTREAM_FAILURE", `embedding provider returned query embedding dimensions ${queryEmbedding.length}; expected ${this.embeddingDescriptor.dimensions}`);
2978
+ }
2979
+ const queryTokens = tokenize(query);
2980
+ const cacheKey = buildQueryCacheKey({
2981
+ workspace_id: input.workspace_id,
2982
+ index_version: index.index_version,
2983
+ query,
2984
+ top_k: topK,
2985
+ filters: input.request.filters
2986
+ });
2987
+ const cached = await this.cache.get(cacheKey);
2988
+ if (cached) {
2989
+ this.cacheHits += 1;
2990
+ const ratio = this.cacheHits / Math.max(1, this.cacheHits + this.cacheMisses);
2991
+ this.observability.metrics.gauge("retrieval_cache_hit_ratio", ratio, {});
2992
+ return { ...cached, trace_id: input.trace_id };
2993
+ }
2994
+ this.cacheMisses += 1;
2995
+ const ratio = this.cacheHits / Math.max(1, this.cacheHits + this.cacheMisses);
2996
+ this.observability.metrics.gauge("retrieval_cache_hit_ratio", ratio, {});
2997
+ const candidates = await this.observability.tracing.withSpan("retrieval.candidate_generation", {
2998
+ trace_id: input.trace_id,
2999
+ tenant_id: input.tenant_id,
3000
+ workspace_id: input.workspace_id
3001
+ }, async () => {
3002
+ let ranked;
3003
+ if (this.store.rankChunksByIndex) {
3004
+ ranked = await this.store.rankChunksByIndex({
3005
+ tenant_id: input.tenant_id,
3006
+ index_id: index.index_id,
3007
+ query,
3008
+ query_embedding: queryEmbedding,
3009
+ query_tokens: queryTokens,
3010
+ top_k: candidatePoolTopK,
3011
+ candidate_weights: this.scoringConfig.candidate_weights,
3012
+ filters: input.request.filters
3013
+ });
3014
+ }
3015
+ const output = ranked && ranked.length > 0
3016
+ ? ranked
3017
+ .map((candidate) => {
3018
+ let score = candidate.score;
3019
+ score += pathQualityBias(candidate.path, queryTokens, this.scoringConfig, query);
3020
+ if (looksLowInformation(candidate.snippet)) {
3021
+ score -= this.scoringConfig.rerank.low_information_penalty;
3022
+ }
3023
+ const reason = chooseReason({
3024
+ lexical: candidate.lexical_score,
3025
+ path_match: candidate.path_match,
3026
+ recency_boosted: candidate.recency_boosted
3027
+ });
3028
+ return {
3029
+ path: candidate.path,
3030
+ start_line: candidate.start_line,
3031
+ end_line: candidate.end_line,
3032
+ snippet: candidate.snippet,
3033
+ score,
3034
+ reason
3035
+ };
3036
+ })
3037
+ .filter((candidate) => candidate.end_line >= candidate.start_line)
3038
+ .sort((a, b) => b.score - a.score)
3039
+ : (await this.store.listChunksByIndex({
3040
+ tenant_id: input.tenant_id,
3041
+ index_id: index.index_id,
3042
+ filters: input.request.filters
3043
+ }))
3044
+ .map((chunk) => {
3045
+ const haystack = `${chunk.path}\n${chunk.snippet}`;
3046
+ const l = lexicalScore(query, haystack);
3047
+ const v = cosineSimilarity(queryEmbedding, chunk.embedding);
3048
+ const pathMatch = queryTokens.some((token) => chunk.path.toLowerCase().includes(token));
3049
+ const recencyBoost = Date.now() - new Date(chunk.updated_at).getTime() < 14 * 24 * 3600 * 1000;
3050
+ const candidateWeights = this.scoringConfig.candidate_weights;
3051
+ let score = l * candidateWeights.lexical_weight + v * candidateWeights.vector_weight;
3052
+ if (pathMatch) {
3053
+ score += candidateWeights.path_match_boost;
3054
+ }
3055
+ if (recencyBoost) {
3056
+ score += candidateWeights.recency_boost;
3057
+ }
3058
+ if (chunk.generated) {
3059
+ score -= candidateWeights.generated_penalty;
3060
+ }
3061
+ score += pathQualityBias(chunk.path, queryTokens, this.scoringConfig, query);
3062
+ if (looksLowInformation(chunk.snippet)) {
3063
+ score -= this.scoringConfig.rerank.low_information_penalty;
3064
+ }
3065
+ const reason = chooseReason({ lexical: l, path_match: pathMatch, recency_boosted: recencyBoost });
3066
+ return {
3067
+ path: chunk.path,
3068
+ start_line: chunk.start_line,
3069
+ end_line: chunk.end_line,
3070
+ snippet: chunk.snippet,
3071
+ score,
3072
+ reason
3073
+ };
3074
+ })
3075
+ .filter((candidate) => candidate.end_line >= candidate.start_line)
3076
+ .sort((a, b) => b.score - a.score);
3077
+ this.observability.metrics.observe("retrieval_candidates_count", output.length, {
3078
+ channel: "hybrid",
3079
+ retrieval_profile_id: this.scoringProfileId
3080
+ });
3081
+ return output;
3082
+ });
3083
+ const deduped = await this.observability.tracing.withSpan("retrieval.rerank", { trace_id: input.trace_id }, async () => {
3084
+ const output = [];
3085
+ const seen = new Set();
3086
+ const pathCounts = new Map();
3087
+ const directoryCounts = new Map();
3088
+ const extensionCounts = new Map();
3089
+ const maxChunksPerPath = hasFileLookupIntent(queryTokens)
3090
+ ? this.scoringConfig.rerank.max_chunks_per_path_file_lookup
3091
+ : this.scoringConfig.rerank.max_chunks_per_path_default;
3092
+ const available = [...candidates];
3093
+ while (output.length < topK && available.length > 0) {
3094
+ let bestIndex = -1;
3095
+ let bestAdjustedScore = Number.NEGATIVE_INFINITY;
3096
+ let bestRawScore = Number.NEGATIVE_INFINITY;
3097
+ for (let i = 0; i < available.length; i += 1) {
3098
+ const candidate = available[i];
3099
+ if (!candidate) {
3100
+ continue;
3101
+ }
3102
+ const key = `${candidate.path}:${candidate.start_line}:${candidate.end_line}`;
3103
+ if (seen.has(key)) {
3104
+ continue;
3105
+ }
3106
+ const pathCount = pathCounts.get(candidate.path) ?? 0;
3107
+ if (pathCount >= maxChunksPerPath) {
3108
+ continue;
3109
+ }
3110
+ const directoryKey = parentDirectory(candidate.path).toLowerCase();
3111
+ const extensionKey = fileExtension(candidate.path);
3112
+ const adjustedScore = candidate.score -
3113
+ (directoryCounts.get(directoryKey) ?? 0) * this.scoringConfig.rerank.same_directory_penalty -
3114
+ (extensionCounts.get(extensionKey) ?? 0) * this.scoringConfig.rerank.same_extension_penalty;
3115
+ const currentBest = bestIndex >= 0 ? available[bestIndex] : undefined;
3116
+ const isBetter = adjustedScore > bestAdjustedScore + 1e-9 ||
3117
+ (Math.abs(adjustedScore - bestAdjustedScore) <= 1e-9 &&
3118
+ (candidate.score > bestRawScore + 1e-9 ||
3119
+ (Math.abs(candidate.score - bestRawScore) <= 1e-9 &&
3120
+ (!currentBest ||
3121
+ candidate.path.localeCompare(currentBest.path) < 0 ||
3122
+ (candidate.path === currentBest.path &&
3123
+ (candidate.start_line < currentBest.start_line ||
3124
+ (candidate.start_line === currentBest.start_line &&
3125
+ candidate.end_line < currentBest.end_line)))))));
3126
+ if (isBetter) {
3127
+ bestAdjustedScore = adjustedScore;
3128
+ bestRawScore = candidate.score;
3129
+ bestIndex = i;
3130
+ }
3131
+ }
3132
+ if (bestIndex < 0) {
3133
+ break;
3134
+ }
3135
+ const selected = available.splice(bestIndex, 1)[0];
3136
+ if (!selected) {
3137
+ break;
3138
+ }
3139
+ const selectedKey = `${selected.path}:${selected.start_line}:${selected.end_line}`;
3140
+ seen.add(selectedKey);
3141
+ pathCounts.set(selected.path, (pathCounts.get(selected.path) ?? 0) + 1);
3142
+ const selectedDirectory = parentDirectory(selected.path).toLowerCase();
3143
+ const selectedExtension = fileExtension(selected.path);
3144
+ directoryCounts.set(selectedDirectory, (directoryCounts.get(selectedDirectory) ?? 0) + 1);
3145
+ extensionCounts.set(selectedExtension, (extensionCounts.get(selectedExtension) ?? 0) + 1);
3146
+ output.push(selected);
3147
+ }
3148
+ return output;
3149
+ });
3150
+ const output = {
3151
+ trace_id: input.trace_id,
3152
+ results: deduped,
3153
+ search_metadata: {
3154
+ latency_ms: Date.now() - searchStartedAt,
3155
+ retrieval_mode: "hybrid",
3156
+ index_version: index.index_version
3157
+ }
3158
+ };
3159
+ this.observability.metrics.observe("retrieval_topk_hit_proxy", deduped.length > 0 ? 1 : 0, {
3160
+ retrieval_profile_id: this.scoringProfileId
3161
+ });
3162
+ this.observability.logger.info("search_context completed", {
3163
+ trace_id: input.trace_id,
3164
+ tenant_id: input.tenant_id,
3165
+ workspace_id: input.workspace_id,
3166
+ latency_ms: output.search_metadata.latency_ms,
3167
+ result_count: output.results.length,
3168
+ retrieval_profile_id: this.scoringProfileId,
3169
+ retrieval_profile_checksum: this.scoringConfigChecksum
3170
+ });
3171
+ await this.cache.set(cacheKey, output, this.cacheTtlSeconds);
3172
+ return output;
3173
+ }
3174
+ async enhancePrompt(input) {
3175
+ const startedAt = Date.now();
3176
+ const warnings = [];
3177
+ const questions = [];
3178
+ const addQuestion = (value) => {
3179
+ if (!questions.includes(value)) {
3180
+ questions.push(value);
3181
+ }
3182
+ };
3183
+ const intent = classifyIntent(input.request.prompt);
3184
+ const queryIntent = classifyEnhancerQueryIntent(input.request.prompt, input.request.conversation_history);
3185
+ const language = detectDominantLanguage(input.request.prompt, input.request.conversation_history);
3186
+ const negativePreferences = detectNegativePathPreferences(`${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`);
3187
+ const intentPolicy = resolveEnhancerIntentPolicy({
3188
+ query_intent: queryIntent,
3189
+ enhancer_config: this.enhancerConfig,
3190
+ negative_preferences: negativePreferences
3191
+ });
3192
+ const rerankTimeoutMs = queryIntent === "symbol-heavy"
3193
+ ? Math.max(20, Math.floor(this.enhancerConfig.rerank_timeout_ms * 0.75))
3194
+ : this.enhancerConfig.rerank_timeout_ms;
3195
+ let searchResults = [];
3196
+ let retrievalQuery = "";
3197
+ let lowConfidenceTriggered = false;
3198
+ let fallbackTriggered = false;
3199
+ let fallbackReason = null;
3200
+ let confidenceSignals;
3201
+ let queryExpansionMs = 0;
3202
+ let expandedHintCount = 0;
3203
+ let candidateCountPreRerank = 0;
3204
+ let candidateCountPostRerank = 0;
3205
+ let rerankMs = 0;
3206
+ let strongAnchorBypassUsed = false;
3207
+ let rerankUsed = false;
3208
+ if (!input.request.project_root_path || !input.workspace_id) {
3209
+ warnings.push("Workspace context missing. Running text-only enhancement mode.");
3210
+ fallbackTriggered = true;
3211
+ fallbackReason = "workspace_context_missing";
3212
+ }
3213
+ else {
3214
+ try {
3215
+ const expansionStartedAt = Date.now();
3216
+ const expandedQuery = buildEnhancerRetrievalQuery(input.request.prompt, input.request.conversation_history, {
3217
+ maxExpansionHints: intentPolicy.max_expansion_hints,
3218
+ queryIntent
3219
+ });
3220
+ queryExpansionMs = Date.now() - expansionStartedAt;
3221
+ retrievalQuery = expandedQuery.query;
3222
+ expandedHintCount = expandedQuery.expanded_hint_count;
3223
+ const retrieval = await this.searchContext({
3224
+ trace_id: input.trace_id,
3225
+ tenant_id: input.tenant_id,
3226
+ workspace_id: input.workspace_id,
3227
+ request: {
3228
+ project_root_path: input.request.project_root_path,
3229
+ query: retrievalQuery,
3230
+ top_k: MAX_TOP_K
3231
+ }
3232
+ });
3233
+ const budgetedResults = trimToContextBudget(retrieval.results);
3234
+ const dedupedByPath = dedupeEnhancerCandidatesByPath(budgetedResults);
3235
+ const collapsedByDirectory = collapseEnhancerCandidatesByDirectory(dedupedByPath, intentPolicy.max_candidates_per_directory_pre_rerank);
3236
+ const filteredCandidates = applyEnhancerIntentPathFiltering(collapsedByDirectory, {
3237
+ intent,
3238
+ negative_preferences: negativePreferences,
3239
+ strict_impl_only_filtering: intentPolicy.strict_impl_only_filtering
3240
+ });
3241
+ searchResults = filteredCandidates.slice(0, intentPolicy.max_candidates_pre_rerank);
3242
+ candidateCountPreRerank = searchResults.length;
3243
+ const shouldSkipRerankForStrongAnchors = shouldBypassEnhancerRerankForAnchors({
3244
+ query_intent: queryIntent,
3245
+ prompt: input.request.prompt,
3246
+ history: input.request.conversation_history,
3247
+ results: searchResults,
3248
+ negative_preferences: negativePreferences
3249
+ });
3250
+ strongAnchorBypassUsed = shouldSkipRerankForStrongAnchors;
3251
+ confidenceSignals = evaluateEnhancerConfidence({
3252
+ prompt: input.request.prompt,
3253
+ retrieval_query: retrievalQuery,
3254
+ query_intent: queryIntent,
3255
+ results: filteredCandidates.slice(0, Math.max(intentPolicy.max_candidates_pre_rerank * 2, 6))
3256
+ });
3257
+ if (!shouldSkipRerankForStrongAnchors && confidenceSignals.low_confidence) {
3258
+ lowConfidenceTriggered = true;
3259
+ rerankUsed = true;
3260
+ warnings.push(ENHANCER_LOW_CONFIDENCE_WARNING);
3261
+ const rerankStartedAt = Date.now();
3262
+ let reranked;
3263
+ try {
3264
+ reranked = await runWithTimeout({
3265
+ timeout_ms: rerankTimeoutMs,
3266
+ fn: () => rankEnhancerResultsForConfidence({
3267
+ results: searchResults,
3268
+ intent,
3269
+ query_intent: queryIntent,
3270
+ negative_preferences: negativePreferences,
3271
+ prompt: input.request.prompt,
3272
+ retrieval_query: retrievalQuery
3273
+ })
3274
+ });
3275
+ }
3276
+ catch {
3277
+ fallbackTriggered = true;
3278
+ fallbackReason = "rerank_timeout";
3279
+ warnings.push("Enhancer rerank timeout; applied deterministic fallback ranking.");
3280
+ reranked = deterministicEnhancerFallbackRanking({
3281
+ results: searchResults,
3282
+ intent,
3283
+ negative_preferences: negativePreferences
3284
+ });
3285
+ }
3286
+ finally {
3287
+ rerankMs = Date.now() - rerankStartedAt;
3288
+ }
3289
+ searchResults = dedupeEnhancerCandidatesByPath(applyEnhancerIntentPathFiltering(reranked, {
3290
+ intent,
3291
+ negative_preferences: negativePreferences,
3292
+ strict_impl_only_filtering: intentPolicy.strict_impl_only_filtering
3293
+ }));
3294
+ searchResults = collapseEnhancerCandidatesByDirectory(searchResults, intentPolicy.max_candidates_per_directory_pre_rerank).slice(0, intentPolicy.max_candidates_pre_rerank);
3295
+ const symbolCandidates = extractLikelyCodeSymbols(`${input.request.prompt}\n${input.request.conversation_history.map((entry) => entry.content).join("\n")}`, 3);
3296
+ if (confidenceSignals.failed_signals.includes("score_spread")) {
3297
+ addQuestion(localizeLowConfidenceQuestion({ language, kind: "scope" }));
3298
+ }
3299
+ if (confidenceSignals.failed_signals.includes("token_overlap")) {
3300
+ addQuestion(localizeLowConfidenceQuestion({ language, kind: "symbol", symbol: symbolCandidates[0] }));
3301
+ }
3302
+ if (confidenceSignals.failed_signals.includes("path_diversity")) {
3303
+ addQuestion(localizeLowConfidenceQuestion({ language, kind: "source_priority" }));
3304
+ }
3305
+ }
3306
+ else {
3307
+ searchResults = dedupeEnhancerCandidatesByPath(searchResults);
3308
+ searchResults = collapseEnhancerCandidatesByDirectory(searchResults, intentPolicy.max_candidates_per_directory_pre_rerank).slice(0, intentPolicy.max_candidates_pre_rerank);
3309
+ }
3310
+ candidateCountPostRerank = searchResults.length;
3311
+ }
3312
+ catch (error) {
3313
+ warnings.push("Context retrieval unavailable; enhancement generated with limited confidence.");
3314
+ fallbackTriggered = true;
3315
+ fallbackReason = "context_retrieval_unavailable";
3316
+ if (error instanceof RetrievalError && error.code === "NOT_FOUND") {
3317
+ warnings.push("No ready index found for workspace.");
3318
+ fallbackReason = "no_ready_index";
3319
+ }
3320
+ }
3321
+ }
3322
+ if (intent === "unknown") {
3323
+ addQuestion(language === "es"
3324
+ ? "¿Cuál es el resultado esperado exacto y el alcance del cambio?"
3325
+ : language === "zh"
3326
+ ? "这次变更的精确目标和范围是什么?"
3327
+ : "What exact outcome and scope should this change target?");
3328
+ }
3329
+ const contextRefs = searchResults.map((result) => ({
3330
+ path: result.path,
3331
+ start_line: result.start_line,
3332
+ end_line: result.end_line,
3333
+ reason: result.reason
3334
+ }));
3335
+ const enhancedPrompt = formatEnhancedPrompt({
3336
+ intent,
3337
+ language,
3338
+ original_prompt: input.request.prompt,
3339
+ refs: contextRefs
3340
+ });
3341
+ const output = {
3342
+ trace_id: input.trace_id,
3343
+ enhanced_prompt: enhancedPrompt,
3344
+ context_refs: contextRefs,
3345
+ warnings,
3346
+ questions
3347
+ };
3348
+ const latency_ms = Date.now() - startedAt;
3349
+ this.observability.metrics.observe("enhancer_latency_ms", latency_ms, {});
3350
+ this.observability.metrics.observe("enhancer_context_refs_count", output.context_refs.length, {});
3351
+ this.observability.metrics.observe("enhancer_questions_count", output.questions.length, {});
3352
+ this.observability.metrics.observe("enhancer_query_expansion_ms", queryExpansionMs, {
3353
+ retrieval_profile_id: this.scoringProfileId
3354
+ });
3355
+ this.observability.metrics.observe("enhancer_expanded_hint_count", expandedHintCount, {
3356
+ retrieval_profile_id: this.scoringProfileId
3357
+ });
3358
+ this.observability.metrics.observe("enhancer_candidate_count_pre_rerank", candidateCountPreRerank, {
3359
+ retrieval_profile_id: this.scoringProfileId
3360
+ });
3361
+ this.observability.metrics.observe("enhancer_candidate_count_post_rerank", candidateCountPostRerank, {
3362
+ retrieval_profile_id: this.scoringProfileId
3363
+ });
3364
+ this.observability.metrics.observe("enhancer_rerank_ms", rerankMs, {
3365
+ retrieval_profile_id: this.scoringProfileId
3366
+ });
3367
+ this.observability.metrics.observe("enhancer_low_confidence_triggered", lowConfidenceTriggered ? 1 : 0, {});
3368
+ this.observability.metrics.observe("enhancer_fallback_triggered", fallbackTriggered ? 1 : 0, {});
3369
+ if (lowConfidenceTriggered) {
3370
+ this.observability.metrics.increment("enhancer_low_confidence_total", 1, {
3371
+ retrieval_profile_id: this.scoringProfileId
3372
+ });
3373
+ }
3374
+ if (fallbackTriggered) {
3375
+ this.observability.metrics.increment("enhancer_fallback_total", 1, {
3376
+ retrieval_profile_id: this.scoringProfileId,
3377
+ reason: fallbackReason ?? "unknown"
3378
+ });
3379
+ }
3380
+ this.observability.logger.info("enhance_prompt completed", {
3381
+ trace_id: input.trace_id,
3382
+ tenant_id: input.tenant_id,
3383
+ workspace_id: input.workspace_id ?? "none",
3384
+ latency_ms,
3385
+ context_refs: output.context_refs.length,
3386
+ query_expansion_ms: queryExpansionMs,
3387
+ expanded_hint_count: expandedHintCount,
3388
+ candidate_count_pre_rerank: candidateCountPreRerank,
3389
+ candidate_count_post_rerank: candidateCountPostRerank,
3390
+ rerank_ms: rerankMs,
3391
+ low_confidence_triggered: lowConfidenceTriggered,
3392
+ fallback_triggered: fallbackTriggered,
3393
+ fallback_reason: fallbackReason,
3394
+ query_intent: queryIntent,
3395
+ confidence_score_spread: confidenceSignals?.score_spread ?? null,
3396
+ confidence_token_overlap: confidenceSignals?.token_overlap ?? null,
3397
+ confidence_path_diversity: confidenceSignals?.path_diversity ?? null,
3398
+ ...(this.enhancerDecisionTraceEnabled
3399
+ ? {
3400
+ intent_class: intent,
3401
+ strong_anchor_bypass_used: strongAnchorBypassUsed,
3402
+ rerank_used: rerankUsed,
3403
+ confidence_score: confidenceSignals?.confidence_score ?? null,
3404
+ confidence_threshold: confidenceSignals?.confidence_threshold ?? null
3405
+ }
3406
+ : {})
3407
+ });
3408
+ return output;
3409
+ }
3410
+ }
3411
+ export function createDefaultRetrievalCore() {
3412
+ return new RetrievalCore(new InMemoryIndexStore(), new InMemoryQueryCache());
3413
+ }
3414
+ export async function seedWorkspaceIndex(core, artifact) {
3415
+ return core.indexArtifact(artifact);
3416
+ }
3417
+ export * from "./remote-sync.js";