sweet-search 2.5.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
package/core/cli.js
CHANGED
|
@@ -20,6 +20,9 @@ if (args[0] === 'init') {
|
|
|
20
20
|
} else if (args[0] === 'prewarm-vocab') {
|
|
21
21
|
const { handlePrewarmVocabCli } = await import('./vocabulary/index.js');
|
|
22
22
|
await handlePrewarmVocabCli(args.slice(1));
|
|
23
|
+
} else if (args[0] === 'reconcile' || args[0] === 'rebuild') {
|
|
24
|
+
const { handleIncrementalCli } = await import('./incremental-indexing/application/operator-cli.mjs');
|
|
25
|
+
await handleIncrementalCli(args[0], args.slice(1));
|
|
23
26
|
} else if (args[0] === 'read') {
|
|
24
27
|
// Filesystem-grounded reader; runs in JS (no native equivalent yet).
|
|
25
28
|
const { handleReadCli } = await import('./search/search-read.js');
|
|
@@ -28,6 +31,10 @@ if (args[0] === 'init') {
|
|
|
28
31
|
// Hybrid span-selection reader; runs in JS (depends on LI index + ranking).
|
|
29
32
|
const { handleReadSemanticCli } = await import('./search/search-read-semantic.js');
|
|
30
33
|
await handleReadSemanticCli(args.slice(1));
|
|
34
|
+
} else if (args[0] === 'trace') {
|
|
35
|
+
// Unified structural code context: callers, callees, and impact.
|
|
36
|
+
const { handleTraceCli } = await import('./search/search-trace.js');
|
|
37
|
+
await handleTraceCli(args.slice(1));
|
|
31
38
|
} else if (args[0] === 'index') {
|
|
32
39
|
// Indexing pipeline. Forwarded to index-codebase-v21.js::main(), which
|
|
33
40
|
// reads its own flags via process.argv. Setting argv here is required
|
|
@@ -39,9 +46,15 @@ if (args[0] === 'init') {
|
|
|
39
46
|
// `index` so existing flag combos (--full / --graph-only / --vectors-only /
|
|
40
47
|
// --files-from-stdin / --late-interaction-model=… / etc.) all work.
|
|
41
48
|
const indexerArgs = args.slice(1);
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
49
|
+
const hasAddHint = indexerArgs.includes('--add') || indexerArgs.some((arg) => arg.startsWith('--add='));
|
|
50
|
+
if (hasAddHint) {
|
|
51
|
+
const { handleIndexAddCli } = await import('./incremental-indexing/application/operator-cli.mjs');
|
|
52
|
+
await handleIndexAddCli(indexerArgs);
|
|
53
|
+
} else {
|
|
54
|
+
process.argv = [process.argv[0], 'index-codebase-v21.js', ...indexerArgs];
|
|
55
|
+
const { main: runIndexer } = await import('./indexing/index-codebase-v21.js');
|
|
56
|
+
await runIndexer();
|
|
57
|
+
}
|
|
45
58
|
} else if (args[0] === '--serve' || args[0] === '--stop') {
|
|
46
59
|
// Warm search server lifecycle is implemented in JS.
|
|
47
60
|
const { runCli } = await import('./search/index.js');
|
|
@@ -51,9 +64,16 @@ if (args[0] === 'init') {
|
|
|
51
64
|
|
|
52
65
|
Usage:
|
|
53
66
|
sweet-search <query> Search the indexed codebase
|
|
67
|
+
sweet-search trace <symbol> Structural context: callers, callees, impact
|
|
54
68
|
sweet-search read <file...> Filesystem-grounded read (1-20 files)
|
|
55
69
|
sweet-search read-semantic <f> <q> Return only file spans relevant to a query
|
|
56
70
|
sweet-search index [options] Build / update the codebase index
|
|
71
|
+
sweet-search index --add <path> Hint a file as dirty
|
|
72
|
+
sweet-search reconcile status Show incremental epoch and dirty status
|
|
73
|
+
sweet-search reconcile inspect <path> Explain why a file is dirty or clean
|
|
74
|
+
sweet-search reconcile pause|resume Pause or resume automatic reconcile work
|
|
75
|
+
sweet-search rebuild status Show incremental maintenance queue
|
|
76
|
+
sweet-search rebuild force <tier> Queue maintenance for a tier
|
|
57
77
|
sweet-search init [options] Set up runtime assets and models
|
|
58
78
|
sweet-search uninstall [opts] Remove local state created by init
|
|
59
79
|
sweet-search prewarm-vocab [file] Pre-warm vocabulary cache with terms
|
|
@@ -70,6 +90,7 @@ Indexing flags (sweet-search index ...):
|
|
|
70
90
|
--graph-only Build code graph only
|
|
71
91
|
--vectors-only Build vectors + HNSW only (skips code graph)
|
|
72
92
|
--files-from-stdin Read newline-delimited paths from stdin
|
|
93
|
+
--add <path> Queue a dirty-file hint without running the indexer
|
|
73
94
|
--late-interaction-model=ID Override the LI variant for this run
|
|
74
95
|
--no-late-interaction Skip LI index build
|
|
75
96
|
--quiet | --verbose Logging verbosity
|
|
@@ -12,6 +12,97 @@
|
|
|
12
12
|
// Default edge types to follow during expansion
|
|
13
13
|
const DEFAULT_EDGE_TYPES = new Set(['imports', 'extends', 'implements', 'uses', 'calls']);
|
|
14
14
|
|
|
15
|
+
// SQLite-variable-limit guard. Mirrors SAFE_IN_CLAUSE_BATCH in
|
|
16
|
+
// core/infrastructure/db-utils.js; inlined here so this module stays
|
|
17
|
+
// import-free (callers inject all dependencies). 2-hop expansion can in
|
|
18
|
+
// principle balloon to thousands of IDs when a seed entity has many
|
|
19
|
+
// outgoing edges; without this guard, an `IN(?,?,...)` over a >32k array
|
|
20
|
+
// crashes with "too many SQL variables". Fail fast at 999 with a clear
|
|
21
|
+
// message instead.
|
|
22
|
+
const _SAFE_IN_CLAUSE_BATCH = 999;
|
|
23
|
+
function _assertInClauseSize(n, label) {
|
|
24
|
+
if (n > _SAFE_IN_CLAUSE_BATCH) {
|
|
25
|
+
throw new RangeError(
|
|
26
|
+
`${label}: IN(?,?,...) clause would bind ${n} parameters, exceeding ` +
|
|
27
|
+
`SAFE_IN_CLAUSE_BATCH=${_SAFE_IN_CLAUSE_BATCH}. Chunk via ` +
|
|
28
|
+
`chunkedIn() in core/infrastructure/db-utils.js or upstream-cap the input.`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const _VISIBILITY_CACHE = new WeakMap();
|
|
34
|
+
|
|
35
|
+
function _sqlAliasPrefix(alias = '') {
|
|
36
|
+
if (!alias) return '';
|
|
37
|
+
const normalized = String(alias).endsWith('.') ? String(alias).slice(0, -1) : String(alias);
|
|
38
|
+
return normalized ? `${normalized}.` : '';
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function _visibilityInfo(db) {
|
|
42
|
+
let cached = _VISIBILITY_CACHE.get(db);
|
|
43
|
+
if (cached) return cached;
|
|
44
|
+
const hasColumns = (table, columns) => {
|
|
45
|
+
try {
|
|
46
|
+
const names = new Set(db.prepare(`PRAGMA table_info(${table})`).all().map((c) => c.name));
|
|
47
|
+
return columns.every((c) => names.has(c));
|
|
48
|
+
} catch {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
cached = {
|
|
53
|
+
entities: hasColumns('entities', ['epoch_written', 'epoch_retired']),
|
|
54
|
+
relationships: hasColumns('relationships', ['epoch_written', 'epoch_retired']),
|
|
55
|
+
};
|
|
56
|
+
_VISIBILITY_CACHE.set(db, cached);
|
|
57
|
+
return cached;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function _entityVisibility(db, manifestEpoch, alias = '', options = {}) {
|
|
61
|
+
const prefix = _sqlAliasPrefix(alias);
|
|
62
|
+
let sql;
|
|
63
|
+
if (!_visibilityInfo(db).entities) {
|
|
64
|
+
sql = `${prefix}stale_since IS NULL`;
|
|
65
|
+
} else if (Number.isInteger(manifestEpoch)) {
|
|
66
|
+
sql = `(${prefix}epoch_written IS NULL OR ${prefix}epoch_written <= ?)
|
|
67
|
+
AND (${prefix}epoch_retired IS NULL OR ${prefix}epoch_retired > ?)
|
|
68
|
+
AND (${prefix}stale_since IS NULL OR (${prefix}epoch_retired IS NOT NULL AND ${prefix}epoch_retired > ?))`;
|
|
69
|
+
} else {
|
|
70
|
+
sql = `${prefix}stale_since IS NULL AND ${prefix}epoch_retired IS NULL`;
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
sql: options.allowNullJoined ? `(${sql} OR ${prefix}id IS NULL)` : sql,
|
|
74
|
+
params: _visibilityInfo(db).entities && Number.isInteger(manifestEpoch)
|
|
75
|
+
? [manifestEpoch, manifestEpoch, manifestEpoch]
|
|
76
|
+
: [],
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function _relationshipVisibility(db, manifestEpoch, alias = '') {
|
|
81
|
+
const prefix = _sqlAliasPrefix(alias);
|
|
82
|
+
if (!_visibilityInfo(db).relationships) return { sql: '1=1', params: [] };
|
|
83
|
+
if (Number.isInteger(manifestEpoch)) {
|
|
84
|
+
return {
|
|
85
|
+
sql: `(${prefix}epoch_written IS NULL OR ${prefix}epoch_written <= ?)
|
|
86
|
+
AND (${prefix}epoch_retired IS NULL OR ${prefix}epoch_retired > ?)`,
|
|
87
|
+
params: [manifestEpoch, manifestEpoch],
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
return { sql: `${prefix}epoch_retired IS NULL`, params: [] };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Per-stage profiling hooks. No-op unless `globalThis.__stageTimings` is set
|
|
94
|
+
// by scripts/profile-search-stages.mjs (same convention as search-hybrid.js
|
|
95
|
+
// and search-postprocess.js).
|
|
96
|
+
function __ptStart() {
|
|
97
|
+
return globalThis.__stageTimings ? performance.now() : null;
|
|
98
|
+
}
|
|
99
|
+
function __ptEnd(stage, t0) {
|
|
100
|
+
if (t0 == null || !globalThis.__stageTimings) return;
|
|
101
|
+
const ms = performance.now() - t0;
|
|
102
|
+
const buf = globalThis.__stageTimings;
|
|
103
|
+
(buf[stage] = buf[stage] || []).push(ms);
|
|
104
|
+
}
|
|
105
|
+
|
|
15
106
|
// --- Token Estimation Helpers ---
|
|
16
107
|
|
|
17
108
|
// Language-specific tokens-per-line averages (from CodeSearchNet analysis)
|
|
@@ -66,6 +157,7 @@ export function loadChunkTexts(codebaseDbOrRepo, ids) {
|
|
|
66
157
|
}
|
|
67
158
|
// Legacy raw-DB path (backward compat)
|
|
68
159
|
try {
|
|
160
|
+
_assertInClauseSize(ids.length, 'graph-expansion.getChunkTexts');
|
|
69
161
|
const ph = ids.map(() => '?').join(',');
|
|
70
162
|
const rows = codebaseDbOrRepo.prepare(
|
|
71
163
|
`SELECT id, text FROM vectors WHERE id IN (${ph})`
|
|
@@ -201,20 +293,52 @@ export function expandResults(db, results, options = {}) {
|
|
|
201
293
|
cosineSimilarity = null,
|
|
202
294
|
codebaseDb = null,
|
|
203
295
|
readFileLines = null,
|
|
296
|
+
format = null,
|
|
297
|
+
manifestEpoch = null,
|
|
204
298
|
} = options;
|
|
299
|
+
// F1 envelope cap (2026-05-07): drop graph-expanded entities whose line span
|
|
300
|
+
// exceeds maxEnvelopeLines. The taxonomy diagnosed mega-class envelopes
|
|
301
|
+
// (Flask App 951L, Scaffold 646L, uv do_lock 555L) as the #1 failure mode —
|
|
302
|
+
// these are pulled from the entity DB by graph expansion, not the chunker.
|
|
303
|
+
// Capped here so the seed chunks (30-60 lines each) keep the top spot.
|
|
304
|
+
//
|
|
305
|
+
// Format-gated to agent: GCSN NL queries don't carry format='agent' so are
|
|
306
|
+
// unaffected. Cap default 500 was selected by dev sweep over {Inf, 500, 300,
|
|
307
|
+
// 200, 150, 100}: cap=500 was the only value with zero regressions on
|
|
308
|
+
// FreshStack uv (lower caps regressed PASS counts). Yields +1 probe PASS
|
|
309
|
+
// (S5-Q9 Flask Scaffold class) and +1 FreshStack PARTIAL (UV-NL-2 do_lock).
|
|
310
|
+
// Held-out probes flat — no overfit signature, but also no held-out transfer
|
|
311
|
+
// since the failure mode (mega-class envelope) isn't present in held-out.
|
|
312
|
+
const maxEnvelopeLines = (() => {
|
|
313
|
+
const raw = process.env.SWEET_SEARCH_MAX_ENVELOPE_LINES;
|
|
314
|
+
if (raw != null && raw !== '') {
|
|
315
|
+
const n = Number.parseInt(raw, 10);
|
|
316
|
+
if (Number.isFinite(n) && n > 0) return n;
|
|
317
|
+
}
|
|
318
|
+
return options.maxEnvelopeLines ?? 500;
|
|
319
|
+
})();
|
|
320
|
+
const isAgentFormat = format === 'agent' || format === 'agent_full'
|
|
321
|
+
|| format === 'agent_full_xl' || format === 'agent_preview'
|
|
322
|
+
|| process.env.SWEET_SEARCH_FORCE_BM25F_BOOSTS === '1';
|
|
323
|
+
const envelopeCapEnabled = isAgentFormat && Number.isFinite(maxEnvelopeLines);
|
|
205
324
|
const clampedSemanticWeight = clampSemanticWeight(semanticWeight);
|
|
206
325
|
|
|
207
326
|
if (expandMode === 'none' || results.length === 0) return results;
|
|
208
327
|
|
|
209
328
|
// Collect entity IDs from results
|
|
210
|
-
const
|
|
329
|
+
const __t_seeds = __ptStart();
|
|
330
|
+
const seedIds = collectSeedIds(db, results, { manifestEpoch });
|
|
331
|
+
__ptEnd('expand:collectSeedIds', __t_seeds);
|
|
211
332
|
if (seedIds.size === 0) return results;
|
|
212
333
|
|
|
213
334
|
// 1-hop expansion: find neighbors via forward + reverse edges
|
|
214
|
-
const
|
|
335
|
+
const __t_hop1 = __ptStart();
|
|
336
|
+
const expanded = expandOneHop(db, seedIds, edgeTypes, { manifestEpoch });
|
|
337
|
+
__ptEnd('expand:expandOneHop', __t_hop1);
|
|
215
338
|
|
|
216
339
|
// 2-hop expansion (if requested)
|
|
217
340
|
if (expandMode === '2hop' && expanded.size > 0) {
|
|
341
|
+
const __t_hop2 = __ptStart();
|
|
218
342
|
if (adaptiveHop2) {
|
|
219
343
|
expandSecondHopAdaptive(db, seedIds, expanded, edgeTypes, {
|
|
220
344
|
maxHop2: maxExpanded,
|
|
@@ -223,6 +347,7 @@ export function expandResults(db, results, options = {}) {
|
|
|
223
347
|
hnswIndex,
|
|
224
348
|
semanticWeight: clampedSemanticWeight,
|
|
225
349
|
cosineSimilarity,
|
|
350
|
+
manifestEpoch,
|
|
226
351
|
});
|
|
227
352
|
} else {
|
|
228
353
|
expandSecondHop(db, seedIds, expanded, edgeTypes, {
|
|
@@ -230,15 +355,31 @@ export function expandResults(db, results, options = {}) {
|
|
|
230
355
|
hnswIndex,
|
|
231
356
|
semanticWeight: clampedSemanticWeight,
|
|
232
357
|
cosineSimilarity,
|
|
358
|
+
manifestEpoch,
|
|
233
359
|
});
|
|
234
360
|
}
|
|
361
|
+
__ptEnd(adaptiveHop2 ? 'expand:expandSecondHopAdaptive' : 'expand:expandSecondHop', __t_hop2);
|
|
235
362
|
}
|
|
236
363
|
|
|
237
364
|
if (expanded.size === 0) return results;
|
|
238
365
|
|
|
239
366
|
// Look up entity details for expanded IDs, respecting maxExpanded
|
|
240
367
|
const expandedIds = [...expanded.keys()].slice(0, maxExpanded);
|
|
241
|
-
const
|
|
368
|
+
const __t_lookup = __ptStart();
|
|
369
|
+
let expandedResults = lookupEntities(db, expandedIds, expanded, { manifestEpoch });
|
|
370
|
+
__ptEnd('expand:lookupEntities', __t_lookup);
|
|
371
|
+
|
|
372
|
+
// F1 envelope cap: drop expanded entities exceeding line cap (agent format only).
|
|
373
|
+
if (envelopeCapEnabled && expandedResults.length > 0) {
|
|
374
|
+
const beforeLen = expandedResults.length;
|
|
375
|
+
expandedResults = expandedResults.filter(er => {
|
|
376
|
+
const lines = (er.endLine - er.startLine) + 1;
|
|
377
|
+
return Number.isFinite(lines) && lines <= maxEnvelopeLines;
|
|
378
|
+
});
|
|
379
|
+
if (process.env.SWEET_SEARCH_DEBUG_ENVELOPE_CAP === '1' && expandedResults.length < beforeLen) {
|
|
380
|
+
console.warn(`[envelope-cap] dropped ${beforeLen - expandedResults.length}/${beforeLen} expanded entities (cap=${maxEnvelopeLines})`);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
242
383
|
|
|
243
384
|
// Score expanded results relative to original results
|
|
244
385
|
const maxOriginalScore = Math.max(...results.map(r => r.score || 0), 1);
|
|
@@ -249,18 +390,22 @@ export function expandResults(db, results, options = {}) {
|
|
|
249
390
|
}
|
|
250
391
|
|
|
251
392
|
// Rerank expanded results using composite scoring (file proximity + entity type + semantic)
|
|
393
|
+
const __t_rerank = __ptStart();
|
|
252
394
|
rerankExpanded(expandedResults, results, {
|
|
253
395
|
queryInt8,
|
|
254
396
|
hnswIndex,
|
|
255
397
|
semanticWeight: clampedSemanticWeight,
|
|
256
398
|
cosineSimilarity,
|
|
257
399
|
});
|
|
400
|
+
__ptEnd('expand:rerankExpanded', __t_rerank);
|
|
258
401
|
|
|
259
402
|
// Apply token budget
|
|
403
|
+
const __t_budget = __ptStart();
|
|
260
404
|
const { results: budgeted, stats: budgetStats } = applyTokenBudget(
|
|
261
405
|
[...results, ...expandedResults], tokenBudget,
|
|
262
406
|
{ expandedBudget, codebaseDb, readFileLines }
|
|
263
407
|
);
|
|
408
|
+
__ptEnd('expand:applyTokenBudget', __t_budget);
|
|
264
409
|
|
|
265
410
|
budgeted._budgetStats = budgetStats;
|
|
266
411
|
return budgeted;
|
|
@@ -274,7 +419,7 @@ export function expandResults(db, results, options = {}) {
|
|
|
274
419
|
* @param {Array} results
|
|
275
420
|
* @returns {Set<string>}
|
|
276
421
|
*/
|
|
277
|
-
function collectSeedIds(db, results) {
|
|
422
|
+
function collectSeedIds(db, results, options = {}) {
|
|
278
423
|
const seedIds = new Set();
|
|
279
424
|
const needsLineMatch = [];
|
|
280
425
|
|
|
@@ -295,13 +440,28 @@ function collectSeedIds(db, results) {
|
|
|
295
440
|
|
|
296
441
|
if (needsLineMatch.length === 0) return seedIds;
|
|
297
442
|
|
|
298
|
-
//
|
|
299
|
-
|
|
443
|
+
// Per-result indexed point query. Hybrid output is keyed on chunk-ids
|
|
444
|
+
// (path:start-end:n), so this fallback is the COMMON path for graph
|
|
445
|
+
// expansion, not a rare one. The original implementation did a full
|
|
446
|
+
// SELECT * FROM entities and then an O(N×M) JS-side scan to find the
|
|
447
|
+
// smallest overlapping entity per result — costing ~11ms p50 on
|
|
448
|
+
// production-sized indexes (10 results × 100k+ entities = 1M JS-side
|
|
449
|
+
// comparisons + materialization GC). Replaced with a single prepared
|
|
450
|
+
// statement that uses the (file_path, start_line, end_line) index for
|
|
451
|
+
// O(log N) lookup. Reuses the same prepared statement across all
|
|
452
|
+
// needsLineMatch results in one collectSeedIds call.
|
|
453
|
+
let findStmt;
|
|
300
454
|
try {
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
FROM entities
|
|
304
|
-
|
|
455
|
+
const entityVis = _entityVisibility(db, options.manifestEpoch);
|
|
456
|
+
findStmt = db.prepare(`
|
|
457
|
+
SELECT id FROM entities
|
|
458
|
+
WHERE file_path = ?
|
|
459
|
+
AND start_line <= ?
|
|
460
|
+
AND end_line >= ?
|
|
461
|
+
AND ${entityVis.sql}
|
|
462
|
+
ORDER BY (end_line - start_line) ASC
|
|
463
|
+
LIMIT 1
|
|
464
|
+
`);
|
|
305
465
|
} catch {
|
|
306
466
|
return seedIds;
|
|
307
467
|
}
|
|
@@ -331,23 +491,19 @@ function collectSeedIds(db, results) {
|
|
|
331
491
|
}
|
|
332
492
|
}
|
|
333
493
|
if (!filePath || lineStart == null) continue;
|
|
334
|
-
// If we still don't have an end line, treat the chunk as a single line.
|
|
335
494
|
if (lineEnd == null) lineEnd = lineStart;
|
|
336
495
|
|
|
337
|
-
//
|
|
338
|
-
//
|
|
339
|
-
//
|
|
340
|
-
//
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
const size = (e.end_line - e.start_line) + 1;
|
|
348
|
-
if (size < bestSize) { bestSize = size; bestId = e.id; }
|
|
496
|
+
// Smallest enclosing/overlapping entity wins (functions/methods over
|
|
497
|
+
// file-level containers). The SQL ORDER BY (end_line - start_line) ASC
|
|
498
|
+
// matches the JS `bestSize` selection in the prior implementation
|
|
499
|
+
// exactly: same overlap predicate, same tie-breaker.
|
|
500
|
+
try {
|
|
501
|
+
const row = findStmt.get(filePath, lineEnd, lineStart, ..._entityVisibility(db, options.manifestEpoch).params);
|
|
502
|
+
if (row?.id) seedIds.add(row.id);
|
|
503
|
+
} catch {
|
|
504
|
+
// Skip this result; preserves prior behavior of silently dropping
|
|
505
|
+
// entries the lookup couldn't match.
|
|
349
506
|
}
|
|
350
|
-
if (bestId) seedIds.add(bestId);
|
|
351
507
|
}
|
|
352
508
|
|
|
353
509
|
return seedIds;
|
|
@@ -361,18 +517,21 @@ function collectSeedIds(db, results) {
|
|
|
361
517
|
* @param {Set<string>} edgeTypes
|
|
362
518
|
* @returns {Map<string, {via: string, direction: string, score: number, hops?: number}>}
|
|
363
519
|
*/
|
|
364
|
-
export function expandOneHop(db, seedIds, edgeTypes) {
|
|
520
|
+
export function expandOneHop(db, seedIds, edgeTypes, options = {}) {
|
|
365
521
|
const expanded = new Map();
|
|
366
522
|
const seedArray = [...seedIds];
|
|
523
|
+
_assertInClauseSize(seedArray.length, 'graph-expansion.expandOneHop.seeds');
|
|
367
524
|
const placeholders = seedArray.map(() => '?').join(',');
|
|
368
525
|
|
|
369
526
|
// Forward edges: seed -> neighbor
|
|
370
527
|
let forwardRels;
|
|
371
528
|
try {
|
|
529
|
+
const relVis = _relationshipVisibility(db, options.manifestEpoch);
|
|
372
530
|
forwardRels = db.prepare(`
|
|
373
531
|
SELECT DISTINCT target_id, type FROM relationships
|
|
374
532
|
WHERE source_id IN (${placeholders}) AND target_id IS NOT NULL
|
|
375
|
-
|
|
533
|
+
AND ${relVis.sql}
|
|
534
|
+
`).all(...seedArray, ...relVis.params);
|
|
376
535
|
} catch {
|
|
377
536
|
forwardRels = [];
|
|
378
537
|
}
|
|
@@ -380,10 +539,12 @@ export function expandOneHop(db, seedIds, edgeTypes) {
|
|
|
380
539
|
// Reverse edges: neighbor -> seed
|
|
381
540
|
let reverseRels;
|
|
382
541
|
try {
|
|
542
|
+
const relVis = _relationshipVisibility(db, options.manifestEpoch);
|
|
383
543
|
reverseRels = db.prepare(`
|
|
384
544
|
SELECT DISTINCT source_id, type FROM relationships
|
|
385
545
|
WHERE target_id IN (${placeholders}) AND source_id IS NOT NULL
|
|
386
|
-
|
|
546
|
+
AND ${relVis.sql}
|
|
547
|
+
`).all(...seedArray, ...relVis.params);
|
|
387
548
|
} catch {
|
|
388
549
|
reverseRels = [];
|
|
389
550
|
}
|
|
@@ -427,15 +588,18 @@ export function expandSecondHop(db, seedIds, expanded, edgeTypes, options = {})
|
|
|
427
588
|
|
|
428
589
|
const hop1Ids = [...expanded.keys()];
|
|
429
590
|
if (hop1Ids.length === 0) return;
|
|
591
|
+
_assertInClauseSize(hop1Ids.length, 'graph-expansion.expand2Hop.forward');
|
|
430
592
|
|
|
431
593
|
const ph = hop1Ids.map(() => '?').join(',');
|
|
432
594
|
|
|
433
595
|
let hop2Forward;
|
|
434
596
|
try {
|
|
597
|
+
const relVis = _relationshipVisibility(db, options.manifestEpoch);
|
|
435
598
|
hop2Forward = db.prepare(`
|
|
436
599
|
SELECT source_id, target_id, type FROM relationships
|
|
437
600
|
WHERE source_id IN (${ph}) AND target_id IS NOT NULL
|
|
438
|
-
|
|
601
|
+
AND ${relVis.sql}
|
|
602
|
+
`).all(...hop1Ids, ...relVis.params);
|
|
439
603
|
} catch {
|
|
440
604
|
return;
|
|
441
605
|
}
|
|
@@ -520,6 +684,7 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
|
|
|
520
684
|
|
|
521
685
|
const hop1Ids = [...hop1Expanded.keys()];
|
|
522
686
|
if (hop1Ids.length === 0) return { added: 0, budgetUsed: 0, candidates: 0 };
|
|
687
|
+
_assertInClauseSize(hop1Ids.length, 'graph-expansion.expand2HopRanked.hop1');
|
|
523
688
|
|
|
524
689
|
const ph = hop1Ids.map(() => '?').join(',');
|
|
525
690
|
|
|
@@ -531,11 +696,13 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
|
|
|
531
696
|
const typeList = [...edgeTypes].map(t => `'${t}'`).join(',');
|
|
532
697
|
let degreeMap;
|
|
533
698
|
try {
|
|
699
|
+
const relVis = _relationshipVisibility(db, options.manifestEpoch);
|
|
534
700
|
const degRows = db.prepare(`
|
|
535
701
|
SELECT source_id, COUNT(*) as deg FROM relationships
|
|
536
702
|
WHERE source_id IN (${ph}) AND type IN (${typeList})
|
|
703
|
+
AND ${relVis.sql}
|
|
537
704
|
GROUP BY source_id
|
|
538
|
-
`).all(...hop1Ids);
|
|
705
|
+
`).all(...hop1Ids, ...relVis.params);
|
|
539
706
|
degreeMap = new Map(degRows.map(r => [r.source_id, r.deg]));
|
|
540
707
|
} catch {
|
|
541
708
|
degreeMap = new Map();
|
|
@@ -544,12 +711,15 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
|
|
|
544
711
|
// Query candidate 2-hop targets with source, weights, and line ranges
|
|
545
712
|
let rawCandidates;
|
|
546
713
|
try {
|
|
714
|
+
const entityVis = _entityVisibility(db, options.manifestEpoch, 'e');
|
|
715
|
+
const relVis = _relationshipVisibility(db, options.manifestEpoch, 'r');
|
|
547
716
|
rawCandidates = db.prepare(`
|
|
548
717
|
SELECT r.source_id, r.target_id, r.type, r.weight, e.file_path, e.start_line, e.end_line
|
|
549
718
|
FROM relationships r
|
|
550
|
-
JOIN entities e ON e.id = r.target_id AND
|
|
719
|
+
JOIN entities e ON e.id = r.target_id AND ${entityVis.sql}
|
|
551
720
|
WHERE r.source_id IN (${ph}) AND r.target_id IS NOT NULL
|
|
552
|
-
|
|
721
|
+
AND ${relVis.sql}
|
|
722
|
+
`).all(...entityVis.params, ...hop1Ids, ...relVis.params);
|
|
553
723
|
} catch {
|
|
554
724
|
return { added: 0, budgetUsed: 0, candidates: 0 };
|
|
555
725
|
}
|
|
@@ -667,16 +837,18 @@ export function expandSecondHopAdaptive(db, seedIds, hop1Expanded, edgeTypes, op
|
|
|
667
837
|
* @param {Map<string, Object>} expansionMeta
|
|
668
838
|
* @returns {Array}
|
|
669
839
|
*/
|
|
670
|
-
function lookupEntities(db, expandedIds, expansionMeta) {
|
|
840
|
+
function lookupEntities(db, expandedIds, expansionMeta, options = {}) {
|
|
671
841
|
if (expandedIds.length === 0) return [];
|
|
842
|
+
_assertInClauseSize(expandedIds.length, 'graph-expansion.lookupEntities');
|
|
672
843
|
|
|
673
844
|
const ph = expandedIds.map(() => '?').join(',');
|
|
674
845
|
let entities;
|
|
675
846
|
try {
|
|
847
|
+
const entityVis = _entityVisibility(db, options.manifestEpoch);
|
|
676
848
|
entities = db.prepare(`
|
|
677
849
|
SELECT id, file_path, type, name, signature, start_line, end_line
|
|
678
|
-
FROM entities WHERE id IN (${ph}) AND
|
|
679
|
-
`).all(...expandedIds);
|
|
850
|
+
FROM entities WHERE id IN (${ph}) AND ${entityVis.sql}
|
|
851
|
+
`).all(...expandedIds, ...entityVis.params);
|
|
680
852
|
} catch {
|
|
681
853
|
return [];
|
|
682
854
|
}
|
|
@@ -852,18 +1024,25 @@ export function applyTokenBudget(results, budget, options = {}) {
|
|
|
852
1024
|
* @param {string[]} entityIds
|
|
853
1025
|
* @returns {{ total: number, byType: Record<string, number> }}
|
|
854
1026
|
*/
|
|
855
|
-
export function getExpansionStats(db, entityIds) {
|
|
1027
|
+
export function getExpansionStats(db, entityIds, options = {}) {
|
|
856
1028
|
if (!entityIds || entityIds.length === 0) return { total: 0, byType: {} };
|
|
1029
|
+
// The query interpolates `${ph}` twice (source_id IN OR target_id IN) and the
|
|
1030
|
+
// `.all()` call binds entityIds twice in one prepared statement, so the
|
|
1031
|
+
// SQLite-parameter ceiling is reached at half the array length — guard the
|
|
1032
|
+
// actual bind count, not the array length.
|
|
1033
|
+
_assertInClauseSize(2 * entityIds.length, 'graph-expansion.getExpansionStats (double-bind OR)');
|
|
857
1034
|
|
|
858
1035
|
const ph = entityIds.map(() => '?').join(',');
|
|
859
1036
|
let rels;
|
|
860
1037
|
try {
|
|
1038
|
+
const relVis = _relationshipVisibility(db, options.manifestEpoch);
|
|
861
1039
|
rels = db.prepare(`
|
|
862
1040
|
SELECT type, COUNT(*) as count FROM relationships
|
|
863
1041
|
WHERE (source_id IN (${ph}) OR target_id IN (${ph}))
|
|
864
1042
|
AND source_id IS NOT NULL AND target_id IS NOT NULL
|
|
1043
|
+
AND ${relVis.sql}
|
|
865
1044
|
GROUP BY type
|
|
866
|
-
`).all(...entityIds, ...entityIds);
|
|
1045
|
+
`).all(...entityIds, ...entityIds, ...relVis.params);
|
|
867
1046
|
} catch {
|
|
868
1047
|
return { total: 0, byType: {} };
|
|
869
1048
|
}
|