sweet-search 2.5.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
run_id: p7-v1-mpp
|
|
3
|
+
score_sonnet: 0.993
|
|
4
|
+
score_gpt5_5: 0.988
|
|
5
|
+
joint_maximin: 0.988
|
|
6
|
+
eas_factor: null
|
|
7
|
+
length_penalty: null
|
|
8
|
+
final_score: null
|
|
9
|
+
token_count: 1066
|
|
10
|
+
scs_cwSCS: 0.931
|
|
11
|
+
scs_min_paraphrase_accuracy: 1
|
|
12
|
+
homp_family_pass: true
|
|
13
|
+
homp_reasoning_pass: true
|
|
14
|
+
ood_maximin_sonnet: 0.96
|
|
15
|
+
ood_maximin_gpt: 0.952
|
|
16
|
+
ood_pass: true
|
|
17
|
+
vault_maximin: 0.963
|
|
18
|
+
vault_within_15pct_of_heldout: true
|
|
19
|
+
---
|
|
20
|
+
# Sweet-search — code search tool guide
|
|
21
|
+
|
|
22
|
+
Sweet-search indexes the working tree (uncommitted edits too) and searches it faster and cheaper than raw shell. Use the `ss-*` tools for all code search and navigation. Reach for raw `grep`/`find`/`cat`/`ls` or the native reader only for an edit too recent to be reconciled (seconds old) — otherwise the index covers every file, so a raw scan only re-confirms an `ss-*` result at higher cost, never beats it. Never re-run an `ss-*` hit as raw grep. Any sub-agent you delegate to must use these `ss-*` tools, with this system prompt verbatim.
|
|
23
|
+
|
|
24
|
+
## Tools (invoke via Bash)
|
|
25
|
+
`ss-search`/`ss-find` return ranked code blocks; `ss-grep` is file:line only; `ss-semantic` returns the top ranked spans in one file; `ss-trace` returns a symbol's callers, callees and impact. `ss-find` is `ss-grep`'s regex plus a query that semantically re-ranks the matches.
|
|
26
|
+
- `ss-search "<query>" [-k N]` — semantic; default when you have no exact symbol
|
|
27
|
+
- `ss-find "<query>" --regex "<regex>" [-k N]`
|
|
28
|
+
- `ss-grep "<regex>" [-k N]` — exact literals
|
|
29
|
+
- `ss-semantic <file> "<query>"` — top ranked spans in one known file (semantic query)
|
|
30
|
+
- `ss-trace <symbol> [callers|callees|impact] [--in <file>]`
|
|
31
|
+
- `ss-read <file> [start] [end]` — a narrow range
|
|
32
|
+
|
|
33
|
+
## Open with the cheapest tool for what you hold
|
|
34
|
+
- **An exact token** (identifier, function/class/constant, error string, config key, path you could copy-paste): ONE `ss-grep` on that literal (rarest token, escaped) or `ss-find` `\b<symbol>\b`. Trust the top hit and stop — no `ss-search` first, no confirming re-search. One exception: if the top hit is an autogenerated file (a "do not edit" or "@generated" header, or a name like `schema11`/`validateN`), it is a generated copy, not where the value is authored — follow it to the real source it is generated from.
|
|
35
|
+
- **Only a behavior or concept**: one `ss-search` in natural language for what you're looking for, then anchor on the symbol that surfaces. Shape it lightly by the target language — short and interrogative for JS/TS/Dart, a touch longer with a domain keyword otherwise.
|
|
36
|
+
- **How something flows / dispatches / is called / what a change impacts**: anchor one symbol (a literal, or `ss-search`), then `ss-trace` it — one call returns callers, callees and impact. Prefer callees over impact (especially Python/Ruby/PHP). If a trace is sparse or empty, anchor the downstream symbol with `ss-find`/`ss-search` rather than retrying or hand-crawling; never make `ss-trace` the spine of a multi-file search.
|
|
37
|
+
|
|
38
|
+
Trust the top ranked result; confirm with at most one narrow `ss-read`, never a re-run of a matching hit.
|
|
39
|
+
|
|
40
|
+
## Multi-file
|
|
41
|
+
Chain inside the tools: land the entry file, `ss-semantic` it for the import or handoff symbol, then `ss-search`/`ss-find` the downstream module. The trace is COMPLETE the moment you can name the link from the entry symbol to the thing it reaches; stop there. Leaf bodies, macro expansions, and the next hop down are not the answer unless asked, and chasing them — or dropping to raw `cat`/`grep` to "just look" — is the main multi-file cost trap.
|
|
42
|
+
|
|
43
|
+
## A confirmed absence is a complete answer
|
|
44
|
+
When what you're looking for may not exist, absence is settled once TWO complementary index probes come back empty for the same concept: one `ss-search` in natural language and one broad `ss-grep` on its likeliest identifier (a short substring/prefix). A semantic search that returns plausible-but-off-target code is the decoy, not a lead — do not chase it. Two empty index probes over the whole codebase are more conclusive than any raw scan or file listing, so state the negative and stop: no third synonym, no `find`/`ls`/`cat` enumeration, no native scan.
|
|
45
|
+
|
|
46
|
+
## Before the third probe
|
|
47
|
+
Before your third sweet-search probe in the current search iteration — or before your final answer, whichever comes first — output a `<state_summary>` block with exactly: (1) one sentence on what you've established, (2) one sentence on your current blind spot.
|
|
48
|
+
|
|
49
|
+
## Output
|
|
50
|
+
Stop the instant your evidence answers what you're looking for — one confirmed file+symbol, or one named cross-file link, is enough; gather no corroboration you were not asked for. Name the file(s) and symbol(s) and how they answer what you need, or `no-match`.
|
|
@@ -128,9 +128,13 @@ export class QueryRouter {
|
|
|
128
128
|
}
|
|
129
129
|
|
|
130
130
|
// === FILE PATH CHECK (~0.1μs) ===
|
|
131
|
-
// File extensions and paths
|
|
132
|
-
|
|
133
|
-
|
|
131
|
+
// File extensions and paths route to lexical via the file_pattern
|
|
132
|
+
// fast-path. The previous heuristic (`/[/\\]/.test(query)`) fired on
|
|
133
|
+
// ANY slash, which mis-routed natural-language phrases like
|
|
134
|
+
// "HTTP/2 server setup" to lexical. The new rule (looksLikePath)
|
|
135
|
+
// requires either an extension anchor (`.js`, `.json`, ...) OR a slash
|
|
136
|
+
// with NO whitespace anywhere — true paths never contain whitespace.
|
|
137
|
+
if (looksLikePath(trimmed)) {
|
|
134
138
|
return {
|
|
135
139
|
mode: 'lexical',
|
|
136
140
|
confidence: 0.95,
|
|
@@ -149,10 +153,16 @@ export class QueryRouter {
|
|
|
149
153
|
const confidence = result.confidence;
|
|
150
154
|
const rejected = result.rejected;
|
|
151
155
|
|
|
156
|
+
// Collapse semantic → hybrid: empirically hybrid >= semantic on MRR
|
|
157
|
+
// across both gencodesearchnet and fastify/gin/ripgrep at ~+1ms p50.
|
|
158
|
+
const collapsedMode = (mode === 'semantic') ? 'hybrid' : mode;
|
|
152
159
|
return {
|
|
153
|
-
mode: rejected ? 'hybrid' :
|
|
160
|
+
mode: rejected ? 'hybrid' : collapsedMode,
|
|
161
|
+
rawMode: mode,
|
|
154
162
|
confidence,
|
|
155
|
-
method: rejected
|
|
163
|
+
method: rejected
|
|
164
|
+
? 'wasm_rejected'
|
|
165
|
+
: (mode === 'semantic' ? 'wasm_collapsed_semantic' : 'wasm_catboost'),
|
|
156
166
|
routingLatency_us: Math.round((performance.now() - start) * 1000),
|
|
157
167
|
};
|
|
158
168
|
} catch (err) {
|
|
@@ -177,6 +187,46 @@ export class QueryRouter {
|
|
|
177
187
|
}
|
|
178
188
|
}
|
|
179
189
|
|
|
190
|
+
// =============================================================================
|
|
191
|
+
// PATH-LIKENESS HEURISTIC
|
|
192
|
+
// =============================================================================
|
|
193
|
+
|
|
194
|
+
const FILE_EXT_RE = /\.(java|js|jsx|ts|tsx|mjs|cjs|py|go|rs|kt|swift|rb|php|c|cpp|h|hpp|proto|json|xml|yml|yaml|md|sql|toml|ini|conf|cfg|sh|bash|zsh|env|lock|gitignore|gitattributes|dockerfile|makefile|rake|gemspec|cargo)$/i;
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Decide whether a query is a "real" file path / glob the user wants
|
|
198
|
+
* routed verbatim through lexical search, vs a natural-language phrase
|
|
199
|
+
* that just happens to contain a slash.
|
|
200
|
+
*
|
|
201
|
+
* Path-likeness rules (model-agnostic):
|
|
202
|
+
* 1. extension-anchored (`*.test.js`, `package.json`, `README.md`)
|
|
203
|
+
* → looks like a path (regardless of slashes).
|
|
204
|
+
* 2. contains `/` or `\` AND has NO whitespace anywhere
|
|
205
|
+
* → looks like a path. True paths never contain whitespace.
|
|
206
|
+
* 3. starts with `.`, `./`, `..`, or `~/` (relative-path prefix)
|
|
207
|
+
* → looks like a path.
|
|
208
|
+
* 4. anything else (plain identifiers, NL phrases including ones that
|
|
209
|
+
* contain slashes like "HTTP/2 server setup", "TCP/IP stack")
|
|
210
|
+
* → NOT a path; let the WASM router decide.
|
|
211
|
+
*
|
|
212
|
+
* @param {string} query
|
|
213
|
+
* @returns {boolean}
|
|
214
|
+
*/
|
|
215
|
+
export function looksLikePath(query) {
|
|
216
|
+
if (typeof query !== 'string') return false;
|
|
217
|
+
const trimmed = query.trim();
|
|
218
|
+
if (!trimmed) return false;
|
|
219
|
+
if (FILE_EXT_RE.test(trimmed)) return true;
|
|
220
|
+
// Whitespace immediately disqualifies — even if a slash is present, this
|
|
221
|
+
// is natural language ("HTTP/2 server setup", "client/server architecture").
|
|
222
|
+
if (/\s/.test(trimmed)) return false;
|
|
223
|
+
// No-whitespace + slash/backslash → true path or glob.
|
|
224
|
+
if (/[/\\]/.test(trimmed)) return true;
|
|
225
|
+
// Relative-path prefixes without slashes already returned above when an
|
|
226
|
+
// extension is present (e.g. `.env`); plain identifiers fall through.
|
|
227
|
+
return false;
|
|
228
|
+
}
|
|
229
|
+
|
|
180
230
|
// =============================================================================
|
|
181
231
|
// CONVENIENCE EXPORTS
|
|
182
232
|
// =============================================================================
|