sweet-search 2.5.11 β 2.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -65
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt-mcp.md +51 -0
- package/eval/agent-read-workflows/bin/_ss-argparse.mjs +148 -0
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +35 -19
- package/eval/agent-read-workflows/bin/ss-find +1 -1
- package/eval/agent-read-workflows/bin/ss-grep +1 -1
- package/mcp/server.js +18 -0
- package/package.json +9 -7
- package/scripts/init.js +157 -28
- package/scripts/inject-agent-instructions.js +50 -15
- package/scripts/install-mcp-server.js +122 -0
- package/scripts/uninstall.js +26 -1
package/README.md
CHANGED
|
@@ -4,9 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
### *Maybe grep isn't all you needβ¦* π¬
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
|
|
8
|
+
Every AI coding agent of today is stuck believing grep+Read is the way... ***sweet-search*** challenges the narrative π
|
|
9
|
+
|
|
10
|
+
A 100% local code-search engine for *Claude Code*, *Codex*, *Cursor* & friends with six blazing and purpose-built tools that hand your agent ranked, ready-to-use answers instead of walls of grep output. Up to 34% cheaper, 56% fewer tool calls, more useful answers, SOTA retrieval quality, zero API keys.
|
|
10
11
|
|
|
11
12
|
[](https://www.npmjs.com/package/sweet-search)
|
|
12
13
|
[](LICENSE)
|
|
@@ -18,29 +19,16 @@ and an evolved system prompt that teaches your agent to use it all β even on p
|
|
|
18
19
|
|
|
19
20
|
---
|
|
20
21
|
|
|
21
|
-
Your AI agent burns most of its tokens *looking* for code: grep, read, grep again, read more.
|
|
22
|
-
**sweet-search** replaces that loop with six purpose-built tools that return ranked, self-contained answers β
|
|
23
|
-
backed by a Rust/WASM engine, ColBERT-style late interaction, a code knowledge graph, and an index that
|
|
24
|
-
updates itself as you type.
|
|
25
|
-
|
|
26
|
-
<div align="center">
|
|
27
|
-
|
|
28
|
-
**10.2Γ** ripgrep's median grep speed Β· **2.9 ms** warm queries Β· **47Γ** faster reranking kernels Β· **0** API keys
|
|
29
|
-
|
|
30
|
-
<sub>measured in-repo β sources in [Benchmarks](#-benchmarks)</sub>
|
|
31
|
-
|
|
32
|
-
</div>
|
|
33
|
-
|
|
34
22
|
## β¨ Highlights
|
|
35
23
|
|
|
36
|
-
- **Hybrid retrieval** β BM25F lexical + dense semantic + structural graph signals, fused per query
|
|
37
|
-
- **Agent-native by design** β token-budgeted output tiers, an MCP server, and a GEPA-evolved system prompt installed into Claude Code, Codex, Gemini CLI, and Cursor with one command
|
|
38
|
-
- **Indexed grep, ~10Γ ripgrep** β a sparse n-gram prefilter skips the files that provably can't match
|
|
24
|
+
- **Hybrid retrieval** β one of the six tools uses BM25F lexical + dense semantic + structural graph signals, fused per query and reranked by late-interaction
|
|
25
|
+
- **Agent-native by design** β token-budgeted output tiers, an optional MCP server (and default zero-overhead CLI), and a GEPA-evolved system prompt installed into Claude Code, Codex, Gemini CLI, and Cursor with one command
|
|
26
|
+
- **Indexed grep, ~10Γ faster than ripgrep** β a sparse n-gram prefilter skips the files that provably can't match
|
|
39
27
|
- **ColBERT-style reranking, locally** β per-token MaxSim late interaction on hand-written SIMD kernels
|
|
40
|
-
- **
|
|
41
|
-
- **Never stale** β
|
|
42
|
-
- **
|
|
43
|
-
- **Local-first** β all models run on-device; nothing is sent anywhere, ever
|
|
28
|
+
- **GPU-accelerated indexing** β Apple Metal, CUDA, CoreML Neural Engine, or plain CPU via ORT; same engine, auto-selected
|
|
29
|
+
- **Never stale** β incremental indexing keeps the index aligned with your *working tree*, uncommitted edits included
|
|
30
|
+
- **No storage hassle** β indexed artifacts maximally optimized without any accuracy tradeoff; up to INT4 quantization
|
|
31
|
+
- **Local-first** β all models run on-device; nothing is sent anywhere, ever. CPU-inference supported for all models
|
|
44
32
|
|
|
45
33
|
## π Table of Contents
|
|
46
34
|
|
|
@@ -146,13 +134,13 @@ We measure sweet-search four ways β from how much it helps a real agent down t
|
|
|
146
134
|
<tr>
|
|
147
135
|
<td width="50%" valign="top">
|
|
148
136
|
|
|
149
|
-
π€
|
|
137
|
+
π€ **β [Code-retrieval](#bench-code-retrieval)** *(agent-in-the-loop)*<br>
|
|
150
138
|
<sub>Does it make a real coding agent **cheaper and more useful** when it searches your repo? Paired against each model's own grep-and-read loop.</sub>
|
|
151
139
|
|
|
152
140
|
</td>
|
|
153
141
|
<td width="50%" valign="top">
|
|
154
142
|
|
|
155
|
-
π§
|
|
143
|
+
π§ **β‘ [Task-completion](#bench-task-completion)** *(coming soon)*<br>
|
|
156
144
|
<sub>Does cheaper, denser context **compound** into a higher resolve-rate on multi-step engineering tasks? Harness in progress.</sub>
|
|
157
145
|
|
|
158
146
|
</td>
|
|
@@ -160,13 +148,13 @@ We measure sweet-search four ways β from how much it helps a real agent down t
|
|
|
160
148
|
<tr>
|
|
161
149
|
<td width="50%" valign="top">
|
|
162
150
|
|
|
163
|
-
π
|
|
151
|
+
π **β’ [Paper-type IR](#bench-paper-type)** *(academic)*<br>
|
|
164
152
|
<sub>The standard NLβcode retrieval suites (GCSN, M2CRB, CoSQAβ¦), full-corpus MRR@10.</sub>
|
|
165
153
|
|
|
166
154
|
</td>
|
|
167
155
|
<td width="50%" valign="top">
|
|
168
156
|
|
|
169
|
-
β‘
|
|
157
|
+
β‘ **β£ [Engine speed](#bench-engine-speed)**<br>
|
|
170
158
|
<sub>Raw systems numbers β grep throughput, query latency, rerank kernels, HNSW.</sub>
|
|
171
159
|
|
|
172
160
|
</td>
|
|
@@ -233,43 +221,52 @@ The win is **harness-adaptive**: where the native loop is disciplined (Claude Co
|
|
|
233
221
|
<a id="bench-paper-type"></a>
|
|
234
222
|
### π 3. Paper-type retrieval benchmarks β *academic NLβcode IR*
|
|
235
223
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
|
246
|
-
|
|
247
|
-
|
|
|
248
|
-
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
**
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
224
|
+
Every number below is the **`ss-search` pipeline end-to-end** β the same binary you install β run
|
|
225
|
+
against the **full benchmark corpus** (no 99-distractor shortcuts), **zero-shot** (we never
|
|
226
|
+
fine-tune on these tasks). Where a benchmark's queries are docstrings, we strip the docstring out of the
|
|
227
|
+
indexed code so the query can't trivially match itself β the standard retrieval protocol.
|
|
228
|
+
|
|
229
|
+
We're SOTA in June 2026 on 3/4 attempted benchmarks at HARDER settings (running on full pool) than most other attempts!
|
|
230
|
+
|
|
231
|
+
| π Benchmark | π What it tests | # Queries | π Pool | π― MRR@10 | π SOTA? |
|
|
232
|
+
|-----------|---------------|---------:|---------:|--------:|--------:|
|
|
233
|
+
| π **GenCodeSearchNet** | NLβcode, 6 languages | 6,000 | full 6,000 | **86.6** | YES β
|
|
|
234
|
+
| π **CoSQA** | web queries β Python | 500 | full 6,267 | **65.5** | β
(zero-shot) |
|
|
235
|
+
| πΊοΈ M2CRB | multilingual NLβcode (ES/PT/DE/FR β Py/Java/JS) | 5,795 | full 5,795 | 54.0 | YES β
|
|
|
236
|
+
| π‘οΈ AdvTest | adversarial, identifier-obfuscated Python | 19,210 | full 19,210 | 51.4 | NO β |
|
|
237
|
+
|
|
238
|
+
<sub>SOTA = best result we can find in the published literature as of June 2026; cross-metric/protocol comparisons are spelled out per benchmark below.</sub>
|
|
239
|
+
|
|
240
|
+
#### π GenCodeSearchNet β `86.6` Β· π SOTA in June 2026
|
|
241
|
+
- **The BEST PUBLISHED number we can find, anywhere**
|
|
242
|
+
- The benchmark's own paper caps at **MRR β€ 0.42** for fine-tuned baselines (β€ 0.10 cross-lingual); even zero-shot OpenAI Ada-2 reaches 0.79β0.94 β but **all of it against a tiny 99-distractor pool**.
|
|
243
|
+
- We score **0.866 against the entire 6,000-document corpus** β *a strictly harder setting* β and **zero-shot**. π₯
|
|
244
|
+
|
|
245
|
+
#### π CoSQA β `65.5` Β· π₯ Zero-shot SOTA in June 2026
|
|
246
|
+
- **Beats EVERY PUBLISHED zero-shot model**
|
|
247
|
+
- Canonical setup: 500 real web queries β the fixed **6,267-code database**, no fine-tuning.
|
|
248
|
+
- Clears the strongest zero-shot results out there β CodeSage-Large `47.5` Β· OpenAI text-embedding-3-large `55.4` Β· OASIS `55.8` β and goes **toe-to-toe with *fine-tuned* CodeBERT / GraphCodeBERT** (64.7 / 67.5). πͺ
|
|
249
|
+
- <sub>CoSQA has known label noise, so we read the absolute height with a pinch of salt.</sub>
|
|
250
|
+
|
|
251
|
+
#### πΊοΈ M2CRB β `54.0` Β· π SOTA in June 2026
|
|
252
|
+
- **the BEST PUBLISHED number we can find, anywhere** β and zero-shot
|
|
253
|
+
- πͺπΈ Spanish Β· π΅πΉ Portuguese Β· π©πͺ German Β· π«π· French β Python / Java / JavaScript.
|
|
254
|
+
- The paper's best β a CodeBERT **fine-tuned on the task** β reaches **52.7 auMRRc**, a metric that *averages over easier, smaller pools* (so `auMRRc β₯ full-pool MRR` for any model). Our **54.0 is full-pool MRR@10** over all 5,795 functions in one pool β a **strictly harder** measure, cleared with **no fine-tuning**. π₯
|
|
255
|
+
|
|
256
|
+
#### π‘οΈ AdvTest β `51.4` Β· π§ͺ **our honest worst case β and we publish it anyway**
|
|
257
|
+
- Adversarial obfuscation (`def Func(arg_0):`) deletes the lexical + graph signals our hybrid feeds on β yet we still **beat the classic fine-tuned baselines** (CodeBERT `27` Β· GraphCodeBERT `35` Β· UniXcoder `41`), and our stack *still lifts our own encoder ~3pp even here*.
|
|
258
|
+
- π **Full transparency:** we could **not** reproduce the often-cited `59.5` for the bare CodeRankEmbed encoder β the *reference FP32 model* scores **54.7** on our leak-free corpus, our shipped INT8 build **51.4**. The gap is stricter preprocessing + INT8 quantization, **not** the retrieval pipeline. We report exactly what we measured.
|
|
264
259
|
|
|
265
260
|
<details>
|
|
266
|
-
<summary><b>Methodology &
|
|
261
|
+
<summary><b>Methodology, protocol & honesty notes</b></summary>
|
|
267
262
|
|
|
268
|
-
- **Reproduction:** result artifacts live in `eval/results
|
|
269
|
-
- **
|
|
270
|
-
- **
|
|
271
|
-
- **
|
|
272
|
-
-
|
|
263
|
+
- **Reproduction:** result artifacts live in [`eval/results/`](eval/results/); rerun via `eval/run_all.js`. The canonical full-pool loaders are in `eval/download_data.py`.
|
|
264
|
+
- **Full corpus, not distractors.** Published baselines for GCSN- and CoSQA-style benchmarks typically rank the gold against 99 sampled distractors; every number here ranks against the benchmark's *full* corpus (6kβ19k candidates) β strictly harder.
|
|
265
|
+
- **Zero-shot + docstring-stripped.** We never fine-tune on these tasks. For docstring-derived benchmarks (AdvTest, M2CRB) we strip the docstring from the indexed code β otherwise the NL query matches itself verbatim (a no-strip AdvTest run scores a meaningless 0.98). This is the standard protocol; it is also why our AdvTest is lower than naΓ―ve setups that leave the docstring in.
|
|
266
|
+
- **What we deliberately don't claim yet.** CoIR (official metric NDCG@10 over per-subtask corpora up to ~1M docs), CoSQA+ (multi-positive, MAP-primary), and CLARC (per-group pools) use protocols and metrics our single-pool MRR@10 harness doesn't currently match. Rather than publish apples-to-oranges numbers, we omit them; faithful per-subtask CoIR (NDCG@10) runs are queued.
|
|
267
|
+
- **M2CRB** β the paper's metric is *auMRRc* (area under the MRR-vs-pool-size curve; best published **52.7**, fine-tuned). Because that area averages over easier small pools, `auMRRc β₯ full-pool MRR` for any model β so our **54.0 full-pool MRR@10** (all 5,795 functions, zero-shot) clears their best on a strictly harder measure. No one publishes a plain full-corpus MRR@10 on M2CRB, so ours is the best available.
|
|
268
|
+
- **AdvTest honesty note.** We could not reproduce the commonly-cited 59.5 for the bare CodeRankEmbed encoder on our corpus: the reference FP32 model scores 54.7 on our leak-free, docstring-stripped, full-19,210 setup, and our shipped INT8 build 51.4. We report our measured numbers and the reference check rather than the leaderboard figure.
|
|
269
|
+
- **Honesty corner:** CrossCodeEval β cross-file *completion-context* retrieval, a different task than NL search β sits at 0.12. We don't optimize for it and report it anyway.
|
|
273
270
|
|
|
274
271
|
</details>
|
|
275
272
|
|
|
@@ -314,7 +311,8 @@ to be *consumed by an agent* β a useful answer, not a wall of matches to scrol
|
|
|
314
311
|
|
|
315
312
|
A hybrid search pipeline with late interaction reranking that returns actual code blocks.
|
|
316
313
|
|
|
317
|
-
|
|
314
|
+
Leading published-benchmark results β strongest we can find on GenCodeSearchNet, and above every published
|
|
315
|
+
zero-shot model on CoSQA. See [`benchmarks`](#-benchmarks).
|
|
318
316
|
|
|
319
317
|
```mermaid
|
|
320
318
|
flowchart TD
|
|
@@ -733,12 +731,13 @@ the three-stage retrieval it feeds at query time.
|
|
|
733
731
|
sweet-search meets your agent wherever it is β shell tools, MCP, or injected instructions:
|
|
734
732
|
|
|
735
733
|
```jsonc
|
|
736
|
-
// .
|
|
734
|
+
// .mcp.json (project root) β that's the whole integration
|
|
735
|
+
// or just run: sweet-search init --mcp
|
|
737
736
|
{
|
|
738
737
|
"mcpServers": {
|
|
739
738
|
"sweet-search": {
|
|
740
739
|
"command": "npx",
|
|
741
|
-
"args": ["sweet-search-mcp", "--project-root", "/absolute/path/to/your/repo"]
|
|
740
|
+
"args": ["-y", "sweet-search-mcp", "--project-root", "/absolute/path/to/your/repo"]
|
|
742
741
|
}
|
|
743
742
|
}
|
|
744
743
|
}
|
|
@@ -797,6 +796,16 @@ sweet-search stands on a lot of shoulders, and we'd rather name them than preten
|
|
|
797
796
|
|
|
798
797
|
<div align="center">
|
|
799
798
|
|
|
800
|
-
|
|
799
|
+
### Found it useful?
|
|
800
|
+
|
|
801
|
+
If sweet-search saves your agent's tokens, a β helps other agents' humans find it.
|
|
802
|
+
|
|
803
|
+
<a href="https://github.com/mrsladoje/sweet-search">
|
|
804
|
+
<img src="https://img.shields.io/badge/β%20Star%20sweet--search%20on%20GitHub-181717?style=for-the-badge&logo=github&logoColor=white" alt="Star sweet-search on GitHub" />
|
|
805
|
+
</a>
|
|
806
|
+
|
|
807
|
+
<br/><br/>
|
|
808
|
+
|
|
809
|
+
[](https://github.com/mrsladoje/sweet-search/stargazers)
|
|
801
810
|
|
|
802
811
|
</div>
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
---
|
|
2
|
+
variant: mcp
|
|
3
|
+
derived_from: p7-v1-mpp
|
|
4
|
+
source_prompt: core/prompt-optimization/data/p7-variant-restarts/p7-gen3-candidates/Mpp.md
|
|
5
|
+
benchmarked: false
|
|
6
|
+
note: >-
|
|
7
|
+
Hand-derived MCP-tool variant of the frozen M++ champion (p7-v1-mpp). The
|
|
8
|
+
STRATEGY core β routing by what-you-hold, trust-the-top-hit, sufficiency
|
|
9
|
+
stops, the two-probe absence rule, the <state_summary> gate, and the output
|
|
10
|
+
discipline β is preserved (semantics intact; the <state_summary> gate and
|
|
11
|
+
Output section are byte-identical, the rest carries only the tool-mechanics
|
|
12
|
+
rename). Only the tool-mechanics layer is remapped: the six ss-* CLI commands collapse onto the sweet-search MCP
|
|
13
|
+
tool surface (a UNIFIED `search` that subsumes ss-search/ss-find/ss-grep, plus
|
|
14
|
+
`trace` / `read-semantic` / `read` / `repo-map`). The anti-raw-scan discipline
|
|
15
|
+
is KEPT and retargeted from "ss-* vs raw grep" to "sweet-search tools vs native
|
|
16
|
+
Grep/Read/Bash", because an MCP host (e.g. Claude Code) still exposes native
|
|
17
|
+
file tools alongside the server.
|
|
18
|
+
NOT yet validated on held-out / OOD / vault against the MCP transport. Do NOT
|
|
19
|
+
treat its quality as equivalent to the CLI champion until a P7 eval is run
|
|
20
|
+
through MCP. Until then this ships only behind `init --mcp --no-cli`.
|
|
21
|
+
---
|
|
22
|
+
# Sweet-search β code search tool guide
|
|
23
|
+
|
|
24
|
+
Sweet-search indexes the working tree (uncommitted edits too) and searches it faster and cheaper than scanning files by hand. Use the sweet-search tools for all code search and navigation, in preference to native Grep/Read or shell `grep`/`find`/`cat`/`ls` β the index covers every file, so a manual scan only re-confirms a sweet-search result at higher cost, never beats it. Reach for a raw read only for an edit too recent to be reconciled (seconds old). Never re-run a sweet-search hit as a native Grep. Any sub-agent you delegate to must use these sweet-search tools, with this system prompt verbatim.
|
|
25
|
+
|
|
26
|
+
## Tools
|
|
27
|
+
The sweet-search MCP server exposes these (call each by the name your client lists it under):
|
|
28
|
+
- **search** β hybrid code search; returns ranked, self-contained code blocks. Your primary tool: it subsumes semantic search, exact-literal search, and semantic-reranked regex. Give it a natural-language query for a concept, or pass a `regex` (or lexical mode) for an exact token.
|
|
29
|
+
- **trace** β a symbol's callers, callees and impact in one call.
|
|
30
|
+
- **read-semantic** β the top ranked spans of one known file for a query.
|
|
31
|
+
- **read** β a narrow file range when you already know the file and lines.
|
|
32
|
+
- **repo-map** β a PageRank-scored symbol overview when you need orientation, not a specific answer.
|
|
33
|
+
|
|
34
|
+
## Open with the cheapest tool for what you hold
|
|
35
|
+
- **An exact token** (identifier, function/class/constant, error string, config key, path you could copy-paste): ONE `search` with that literal as a `regex` (rarest token, escaped). Trust the top hit and stop β no natural-language search first, no confirming re-search. One exception: if the top hit is an autogenerated file (a "do not edit" or "@generated" header, or a name like `schema11`/`validateN`), it is a generated copy, not where the value is authored β follow it to the real source it is generated from.
|
|
36
|
+
- **Only a behavior or concept**: one `search` in natural language for what you're looking for, then anchor on the symbol that surfaces. Shape it lightly by the target language β short and interrogative for JS/TS/Dart, a touch longer with a domain keyword otherwise.
|
|
37
|
+
- **How something flows / dispatches / is called / what a change impacts**: anchor one symbol (a literal, or a `search`), then `trace` it β one call returns callers, callees and impact. Prefer callees over impact (especially Python/Ruby/PHP). If a trace is sparse or empty, anchor the downstream symbol with `search` rather than retrying or hand-crawling; never make `trace` the spine of a multi-file search.
|
|
38
|
+
|
|
39
|
+
Trust the top ranked result; confirm with at most one narrow `read`, never a re-run of a matching hit.
|
|
40
|
+
|
|
41
|
+
## Multi-file
|
|
42
|
+
Chain inside the tools: land the entry file, `read-semantic` it for the import or handoff symbol, then `search` the downstream module. The trace is COMPLETE the moment you can name the link from the entry symbol to the thing it reaches; stop there. Leaf bodies, macro expansions, and the next hop down are not the answer unless asked, and chasing them β or dropping to a native Grep/Read to "just look" β is the main multi-file cost trap.
|
|
43
|
+
|
|
44
|
+
## A confirmed absence is a complete answer
|
|
45
|
+
When what you're looking for may not exist, absence is settled once TWO complementary `search` probes come back empty for the same concept: one in natural language and one as a broad `regex` on its likeliest identifier (a short substring/prefix). A search that returns plausible-but-off-target code is the decoy, not a lead β do not chase it. Two empty index probes over the whole codebase are more conclusive than any native scan or file listing, so state the negative and stop: no third synonym, no native `grep`/`ls`/`cat` enumeration.
|
|
46
|
+
|
|
47
|
+
## Before the third probe
|
|
48
|
+
Before your third sweet-search probe in the current search iteration β or before your final answer, whichever comes first β output a `<state_summary>` block with exactly: (1) one sentence on what you've established, (2) one sentence on your current blind spot.
|
|
49
|
+
|
|
50
|
+
## Output
|
|
51
|
+
Stop the instant your evidence answers what you're looking for β one confirmed file+symbol, or one named cross-file link, is enough; gather no corroboration you were not asked for. Name the file(s) and symbol(s) and how they answer what you need, or `no-match`.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// Pure argument-parsing helpers for the ss-* CLI wrappers.
|
|
2
|
+
//
|
|
3
|
+
// Extracted from _ss-helpers.mjs so they can be unit-tested without triggering
|
|
4
|
+
// the CLI's top-level IIFE (which runs on import). NOTHING here touches
|
|
5
|
+
// process.* or the filesystem β every function is a pure transform over an
|
|
6
|
+
// args array (some mutate the array in place, by design, and return a value).
|
|
7
|
+
|
|
8
|
+
// --- value-flag parsers (mutate `args`, returning the consumed value) --------
|
|
9
|
+
|
|
10
|
+
export function parseFlag(args, name, fallback) {
|
|
11
|
+
const i = args.indexOf(name);
|
|
12
|
+
if (i === -1) return fallback;
|
|
13
|
+
const v = args[i + 1];
|
|
14
|
+
args.splice(i, 2);
|
|
15
|
+
return v;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function parseShortFlag(args, names, fallback) {
|
|
19
|
+
for (const n of names) {
|
|
20
|
+
const i = args.indexOf(n);
|
|
21
|
+
if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
|
|
22
|
+
}
|
|
23
|
+
return fallback;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Boolean (value-less) flag: remove every occurrence, return whether any present.
|
|
27
|
+
export function parseBoolFlag(args, names) {
|
|
28
|
+
let present = false;
|
|
29
|
+
for (const n of names) {
|
|
30
|
+
let i;
|
|
31
|
+
while ((i = args.indexOf(n)) !== -1) { args.splice(i, 1); present = true; }
|
|
32
|
+
}
|
|
33
|
+
return present;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// --- pattern construction ----------------------------------------------------
|
|
37
|
+
|
|
38
|
+
export function escapeRegex(s) {
|
|
39
|
+
return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Translate the grep-family pattern flags into a single regex β no engine change
|
|
43
|
+
// needed. `-F` escapes the pattern so metacharacters are literal; `-w` wraps it
|
|
44
|
+
// in word boundaries; `-i` prepends the `(?i)` inline flag the planner already
|
|
45
|
+
// honours end-to-end (hasCaseInsensitiveRegexFlag β ripgrep prefilter + Rust
|
|
46
|
+
// gram+grep). Order matters: escape (literal) β word-wrap β case flag.
|
|
47
|
+
export function buildGrepPattern(pattern, { ignoreCase = false, wordBound = false, fixedString = false } = {}) {
|
|
48
|
+
if (!pattern) return pattern;
|
|
49
|
+
let p = fixedString ? escapeRegex(pattern) : pattern;
|
|
50
|
+
if (wordBound) p = `\\b(?:${p})\\b`;
|
|
51
|
+
if (ignoreCase && !/^\(\?[a-z-]*i[a-z-]*[:)]/.test(p)) p = `(?i)${p}`;
|
|
52
|
+
return p;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// --- inert flags (always true for ss-*, safe to accept as no-ops) ------------
|
|
56
|
+
// These never change which lines match: we always print file:line, always
|
|
57
|
+
// search the whole index, never colourise. Stripping them lets reflexive grep
|
|
58
|
+
// muscle-memory pass without a wasted call β UNLIKE semantic flags (-w/-F/-v/
|
|
59
|
+
// -Cβ¦), which we either implement or reject, never silently drop.
|
|
60
|
+
export const INERT_FLAGS = new Set([
|
|
61
|
+
'-n', '--line-number', '-H', '--with-filename', '--no-filename',
|
|
62
|
+
'-r', '-R', '--recursive', '--color', '--colour',
|
|
63
|
+
]);
|
|
64
|
+
|
|
65
|
+
export function stripInertFlags(args) {
|
|
66
|
+
for (let i = args.length - 1; i >= 0; i--) {
|
|
67
|
+
const a = args[i];
|
|
68
|
+
if (typeof a === 'string' && (INERT_FLAGS.has(a) || /^--colou?r=/.test(a))) {
|
|
69
|
+
args.splice(i, 1);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// --- normalisation: make agent-typed forms canonical before parsing ----------
|
|
75
|
+
// Short flags that consume a following value, and value-less boolean shorts.
|
|
76
|
+
// Used to split attached/bundled forms (-k5, -iw, -iwk5) the way getopt would,
|
|
77
|
+
// so they parse instead of being mistaken for an unknown flag or the pattern.
|
|
78
|
+
export const VALUE_SHORTS = new Set(['k']);
|
|
79
|
+
export const BOOL_SHORTS = new Set(['i', 'w', 'F']);
|
|
80
|
+
|
|
81
|
+
export function normalizeArgs(args) {
|
|
82
|
+
const out = [];
|
|
83
|
+
let positionalOnly = false;
|
|
84
|
+
for (const tok of args) {
|
|
85
|
+
if (positionalOnly || typeof tok !== 'string') { out.push(tok); continue; }
|
|
86
|
+
if (tok === '--') { out.push(tok); positionalOnly = true; continue; }
|
|
87
|
+
|
|
88
|
+
// --name=value β --name value
|
|
89
|
+
let m = /^(--[A-Za-z][\w-]*)=(.*)$/.exec(tok);
|
|
90
|
+
if (m) { out.push(m[1], m[2]); continue; }
|
|
91
|
+
|
|
92
|
+
// attached short value or boolean bundle: -k5, -iw, -iwk5
|
|
93
|
+
m = /^-([A-Za-z])(.+)$/.exec(tok);
|
|
94
|
+
if (m) {
|
|
95
|
+
const first = m[1];
|
|
96
|
+
if (VALUE_SHORTS.has(first)) { out.push('-' + first, m[2]); continue; } // -k5 β -k 5
|
|
97
|
+
if (BOOL_SHORTS.has(first)) {
|
|
98
|
+
const chars = tok.slice(1);
|
|
99
|
+
const expanded = [];
|
|
100
|
+
let i = 0, ok = true;
|
|
101
|
+
while (i < chars.length) {
|
|
102
|
+
const ch = chars[i];
|
|
103
|
+
if (BOOL_SHORTS.has(ch)) { expanded.push('-' + ch); i++; }
|
|
104
|
+
else if (VALUE_SHORTS.has(ch)) { // value short ends the bundle
|
|
105
|
+
const val = chars.slice(i + 1);
|
|
106
|
+
expanded.push('-' + ch);
|
|
107
|
+
if (val) expanded.push(val);
|
|
108
|
+
i = chars.length;
|
|
109
|
+
} else { ok = false; break; } // unknown char β leave token intact
|
|
110
|
+
}
|
|
111
|
+
if (ok) { out.push(...expanded); continue; }
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
out.push(tok);
|
|
115
|
+
}
|
|
116
|
+
return out;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// A token that looks like a real CLI option, as opposed to a regex/query that
|
|
120
|
+
// merely begins with '-' (e.g. `-?\d+`, `-->`). Narrow on purpose: single short
|
|
121
|
+
// letter, pure-letter bundle, or GNU long flag. Anything containing regex
|
|
122
|
+
// metacharacters falls through and is treated as the positional pattern, so a
|
|
123
|
+
// dash-leading pattern works WITHOUT the agent needing to know about `--`.
|
|
124
|
+
export function looksLikeOption(tok) {
|
|
125
|
+
if (typeof tok !== 'string' || tok === '-' || tok === '--') return false;
|
|
126
|
+
return /^-[A-Za-z]$/.test(tok) // -i
|
|
127
|
+
|| /^-[A-Za-z]{2,}$/.test(tok) // -iw (pure-letter bundle)
|
|
128
|
+
|| /^--[A-Za-z][\w-]*$/.test(tok); // --ignore-case
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// After known flags are consumed, resolve the positional pattern. `--` ends
|
|
132
|
+
// option parsing (everything after is positional). Any remaining option-shaped
|
|
133
|
+
// token is an unsupported flag β reported, not silently dropped and not
|
|
134
|
+
// mistaken for the pattern. Returns { pattern, unknownFlag }; the caller decides
|
|
135
|
+
// how to surface the error (kept side-effect-free for testability).
|
|
136
|
+
export function extractPositional(args) {
|
|
137
|
+
const sep = args.indexOf('--');
|
|
138
|
+
if (sep !== -1) {
|
|
139
|
+
const before = args.slice(0, sep);
|
|
140
|
+
const after = args.slice(sep + 1);
|
|
141
|
+
const bad = before.find(looksLikeOption);
|
|
142
|
+
if (bad) return { pattern: undefined, unknownFlag: bad };
|
|
143
|
+
return { pattern: after[0], unknownFlag: null };
|
|
144
|
+
}
|
|
145
|
+
const bad = args.find(looksLikeOption);
|
|
146
|
+
if (bad) return { pattern: undefined, unknownFlag: bad };
|
|
147
|
+
return { pattern: args[0], unknownFlag: null };
|
|
148
|
+
}
|
|
@@ -13,6 +13,10 @@ import path from 'node:path';
|
|
|
13
13
|
import { createHash } from 'node:crypto';
|
|
14
14
|
import { existsSync, readFileSync } from 'node:fs';
|
|
15
15
|
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import {
|
|
17
|
+
parseFlag, parseShortFlag, parseBoolFlag,
|
|
18
|
+
buildGrepPattern, stripInertFlags, normalizeArgs, extractPositional,
|
|
19
|
+
} from './_ss-argparse.mjs';
|
|
16
20
|
|
|
17
21
|
// 8-char SHA1 prefix is enough for grouping identical queries across
|
|
18
22
|
// benchmark runs without bloating artifacts.
|
|
@@ -40,19 +44,18 @@ process.env.SWEET_SEARCH_PROJECT_ROOT = PROJECT_ROOT;
|
|
|
40
44
|
const subcommand = process.argv[2];
|
|
41
45
|
const rest = process.argv.slice(3);
|
|
42
46
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
|
|
47
|
+
// Pure arg-parsing helpers (parseFlag/parseShortFlag/parseBoolFlag/
|
|
48
|
+
// buildGrepPattern/stripInertFlags/normalizeArgs/extractPositional) live in
|
|
49
|
+
// ./_ss-argparse.mjs so they can be unit-tested without this file's top-level
|
|
50
|
+
// IIFE firing. resolvePositional wraps the side-effect-free extractPositional
|
|
51
|
+
// with the CLI's loud-error exit.
|
|
52
|
+
function resolvePositional(args, usage) {
|
|
53
|
+
const { pattern, unknownFlag } = extractPositional(args);
|
|
54
|
+
if (unknownFlag) {
|
|
55
|
+
process.stderr.write(`[ss] unrecognised option "${unknownFlag}"\n${usage}\n`);
|
|
56
|
+
process.exit(2);
|
|
54
57
|
}
|
|
55
|
-
return
|
|
58
|
+
return pattern;
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
async function getSweetSearch() {
|
|
@@ -79,11 +82,17 @@ async function ensureWarmServerReady({ timeoutMs = 60000, intervalMs = 500 } = {
|
|
|
79
82
|
|
|
80
83
|
// --- subcommands ----------------------------------------------------------
|
|
81
84
|
|
|
82
|
-
|
|
85
|
+
const GREP_USAGE = 'Usage: ss-grep <regex> [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [-k N]';
|
|
86
|
+
async function cmdGrep(rawArgs) {
|
|
87
|
+
const args = normalizeArgs(rawArgs);
|
|
88
|
+
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
89
|
+
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
90
|
+
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
83
91
|
const k = +parseShortFlag(args, ['-k', '--top'], 20);
|
|
84
|
-
|
|
92
|
+
stripInertFlags(args);
|
|
93
|
+
const regex = buildGrepPattern(resolvePositional(args, GREP_USAGE), { ignoreCase, wordBound, fixedString });
|
|
85
94
|
if (!regex) {
|
|
86
|
-
process.stderr.write(
|
|
95
|
+
process.stderr.write(GREP_USAGE + '\n');
|
|
87
96
|
process.exit(2);
|
|
88
97
|
}
|
|
89
98
|
const s = await getSweetSearch();
|
|
@@ -109,27 +118,34 @@ async function cmdGrep(args) {
|
|
|
109
118
|
process.exit(0);
|
|
110
119
|
}
|
|
111
120
|
|
|
112
|
-
async function cmdFind(
|
|
121
|
+
async function cmdFind(rawArgs) {
|
|
122
|
+
const args = normalizeArgs(rawArgs);
|
|
113
123
|
// ColGrep pattern search with token-budgeted agent packaging β returns the
|
|
114
124
|
// FULL useful answer (ranked code blocks + confidence + sufficiency), the same
|
|
115
125
|
// agent packaging ss-search emits. ss-grep is the short/locator counterpart, so
|
|
116
126
|
// ss-find defaults to the full answer: it saves the follow-up read entirely.
|
|
117
127
|
// (Mirrors the agent-in-the-loop H2H adapter eval/agent-eval/tools/
|
|
118
128
|
// pattern-agent-tools.js, which calls search(...,{format:'agent'}).)
|
|
129
|
+
const FIND_USAGE = 'Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [--full|--xl] [-k N]';
|
|
119
130
|
let format = 'agent';
|
|
120
131
|
if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
|
|
121
132
|
if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
|
|
133
|
+
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
134
|
+
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
135
|
+
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
122
136
|
const k = +parseShortFlag(args, ['-k', '--top'], 6);
|
|
123
137
|
const regex = parseFlag(args, '--regex', '');
|
|
124
|
-
|
|
138
|
+
stripInertFlags(args);
|
|
139
|
+
const query = resolvePositional(args, FIND_USAGE);
|
|
125
140
|
if (!query) {
|
|
126
|
-
process.stderr.write(
|
|
141
|
+
process.stderr.write(FIND_USAGE + '\n');
|
|
127
142
|
process.exit(2);
|
|
128
143
|
}
|
|
129
144
|
// Budget-sweep experiment hook: lets the bench pin the response token budget
|
|
130
145
|
// per-process without changing the agent-visible tool surface.
|
|
131
146
|
const envFindBudget = Number(process.env.SS_SMOKE_FIND_BUDGET || '') || null;
|
|
132
|
-
|
|
147
|
+
// Pattern flags apply to the regex candidate generator; the NL query is untouched.
|
|
148
|
+
const effectiveRegex = buildGrepPattern(regex || '', { ignoreCase, wordBound, fixedString });
|
|
133
149
|
const s = await getSweetSearch();
|
|
134
150
|
if (!s.hasLateInteractionIndex) {
|
|
135
151
|
process.stderr.write(`[ss-find] no late-interaction index β falling back to ss-grep\n`);
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Use for behavioural / semantic questions where lexical alone won't pinpoint the
|
|
6
6
|
# chunk. (ss-grep is the short file:line locator.)
|
|
7
7
|
#
|
|
8
|
-
# Usage: ss-find "<query>" --regex "<regex>" [--full|--xl] [-k N]
|
|
8
|
+
# Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [--full|--xl] [-k N]
|
|
9
9
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
10
10
|
TMPERR=$(mktemp)
|
|
11
11
|
node "$DIR/_ss-helpers.mjs" find "$@" 2>"$TMPERR"
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# ss-grep: indexed bare grep (gram-prefiltered) over the cwd's Sweet Search index.
|
|
3
3
|
# Compact agent-friendly output: file:line matchText
|
|
4
4
|
#
|
|
5
|
-
# Usage: ss-grep <regex> [-k N]
|
|
5
|
+
# Usage: ss-grep <regex> [-i|--ignore-case] [-k N]
|
|
6
6
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
7
7
|
TMPERR=$(mktemp)
|
|
8
8
|
node "$DIR/_ss-helpers.mjs" grep "$@" 2>"$TMPERR"
|
package/mcp/server.js
CHANGED
|
@@ -115,10 +115,28 @@ const vocabDeps = { coreDir };
|
|
|
115
115
|
// MCP Server
|
|
116
116
|
// ---------------------------------------------------------------------------
|
|
117
117
|
|
|
118
|
+
// MCP `instructions` β the agent-routing policy delivered to hosts that connect
|
|
119
|
+
// to this server WITHOUT a project file to inject into (Claude Desktop, a remote
|
|
120
|
+
// endpoint). This is the SECONDARY carrier; the primary is the MCP-variant prompt
|
|
121
|
+
// that `sweet-search init --mcp --no-cli` injects into CLAUDE.md/AGENTS.md (the
|
|
122
|
+
// high-salience slot). Best-effort: if the ship-file is missing the server still
|
|
123
|
+
// starts, just without instructions.
|
|
124
|
+
const MCP_INSTRUCTIONS = (() => {
|
|
125
|
+
try {
|
|
126
|
+
const p = path.join(__dirname, '..', 'core/prompt-optimization/data/p7-final/sweet-search-system-prompt-mcp.md');
|
|
127
|
+
const raw = readFileSync(p, 'utf8');
|
|
128
|
+
return raw.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n/, '').trim() || undefined;
|
|
129
|
+
} catch (err) {
|
|
130
|
+
if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] mcp instructions: ${err?.message || err}\n`);
|
|
131
|
+
return undefined;
|
|
132
|
+
}
|
|
133
|
+
})();
|
|
134
|
+
|
|
118
135
|
const server = new McpServer({
|
|
119
136
|
name: 'sweet-search',
|
|
120
137
|
version: PKG_VERSION,
|
|
121
138
|
}, {
|
|
139
|
+
...(MCP_INSTRUCTIONS ? { instructions: MCP_INSTRUCTIONS } : {}),
|
|
122
140
|
capabilities: {
|
|
123
141
|
tools: { listChanged: false },
|
|
124
142
|
resources: { subscribe: false, listChanged: false },
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sweet-search",
|
|
3
|
-
"version": "2.5.
|
|
3
|
+
"version": "2.5.13",
|
|
4
4
|
"description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "core/search/sweet-search.js",
|
|
@@ -61,6 +61,7 @@
|
|
|
61
61
|
"scripts/smoke-test.js",
|
|
62
62
|
"scripts/inject-agent-instructions.js",
|
|
63
63
|
"scripts/write-claude-rules.js",
|
|
64
|
+
"scripts/install-mcp-server.js",
|
|
64
65
|
"scripts/install-prompt-reminders.js",
|
|
65
66
|
"scripts/install-tool-enforcement.js",
|
|
66
67
|
"scripts/hooks/",
|
|
@@ -75,6 +76,7 @@
|
|
|
75
76
|
"eval/agent-read-workflows/bin/ss-trace",
|
|
76
77
|
"eval/agent-read-workflows/bin/ss-read",
|
|
77
78
|
"eval/agent-read-workflows/bin/_ss-helpers.mjs",
|
|
79
|
+
"eval/agent-read-workflows/bin/_ss-argparse.mjs",
|
|
78
80
|
"crates/wasm-router/pkg/",
|
|
79
81
|
"LICENSE",
|
|
80
82
|
"NOTICE"
|
|
@@ -163,12 +165,12 @@
|
|
|
163
165
|
},
|
|
164
166
|
"optionalDependencies": {
|
|
165
167
|
"usearch": "^2.21.4",
|
|
166
|
-
"@sweet-search/native-darwin-arm64": "2.5.
|
|
167
|
-
"@sweet-search/native-darwin-x64": "2.5.
|
|
168
|
-
"@sweet-search/native-linux-arm64-gnu": "2.5.
|
|
169
|
-
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.
|
|
170
|
-
"@sweet-search/native-linux-x64-gnu": "2.5.
|
|
171
|
-
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.
|
|
168
|
+
"@sweet-search/native-darwin-arm64": "2.5.13",
|
|
169
|
+
"@sweet-search/native-darwin-x64": "2.5.13",
|
|
170
|
+
"@sweet-search/native-linux-arm64-gnu": "2.5.13",
|
|
171
|
+
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.13",
|
|
172
|
+
"@sweet-search/native-linux-x64-gnu": "2.5.13",
|
|
173
|
+
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.13"
|
|
172
174
|
},
|
|
173
175
|
"engines": {
|
|
174
176
|
"node": ">=18.0.0"
|
package/scripts/init.js
CHANGED
|
@@ -38,9 +38,10 @@ import {
|
|
|
38
38
|
import { describeDedupConfig } from '../core/infrastructure/index.js';
|
|
39
39
|
import { verifyRuntime, getMaxsimTier, getRouterType } from './verify-runtime.js';
|
|
40
40
|
import { ALL_HARNESSES, injectAgentInstructions } from './inject-agent-instructions.js';
|
|
41
|
-
import { writeClaudeRules } from './write-claude-rules.js';
|
|
42
|
-
import {
|
|
43
|
-
import {
|
|
41
|
+
import { writeClaudeRules, removeClaudeRules } from './write-claude-rules.js';
|
|
42
|
+
import { installMcpServer } from './install-mcp-server.js';
|
|
43
|
+
import { installPromptReminderHook, removePromptReminderHook } from './install-prompt-reminders.js';
|
|
44
|
+
import { installToolEnforcement, removeToolEnforcement } from './install-tool-enforcement.js';
|
|
44
45
|
import { isNativeInferenceAvailable } from '../core/infrastructure/native-inference.js';
|
|
45
46
|
|
|
46
47
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
@@ -83,6 +84,15 @@ export function parseInitArgs(args) {
|
|
|
83
84
|
enforceTools: false, // P3: --enforce-tools (default OFF β opt-in strict mode)
|
|
84
85
|
codex: false, // --codex: wire the Codex CLI SessionStart hook
|
|
85
86
|
codexEnableGlobalHooks: false, // --codex-enable-global-hooks: also enable the flag in ~/.codex/config.toml
|
|
87
|
+
// Contact-surface flags (additive at install, exclusive at consumption):
|
|
88
|
+
// --mcp registers the sweet-search MCP server in the project .mcp.json
|
|
89
|
+
// (additive β the CLI stays). Harness-agnostic, root-level.
|
|
90
|
+
// --no-cli makes MCP the agent's *contact surface*: inject the MCP-tool
|
|
91
|
+
// prompt variant instead of the ss-* CLI one, and skip the
|
|
92
|
+
// CLI-surface-specific supplements (rules file, ss-* reminder).
|
|
93
|
+
// Indexing still runs through the CLI/engine. Requires --mcp.
|
|
94
|
+
mcp: false,
|
|
95
|
+
noCli: false,
|
|
86
96
|
};
|
|
87
97
|
|
|
88
98
|
for (let i = 0; i < args.length; i++) {
|
|
@@ -188,12 +198,40 @@ export function parseInitArgs(args) {
|
|
|
188
198
|
// P3: opt-in strict mode β denies native Grep + installs a Read
|
|
189
199
|
// hint hook. Opinionated and Claude-specific (per Β§4D).
|
|
190
200
|
result.enforceTools = true;
|
|
201
|
+
} else if (arg === '--mcp') {
|
|
202
|
+
// Register the sweet-search MCP server in the project root .mcp.json.
|
|
203
|
+
// Additive: the CLI surface stays. Independent of --no-claude.
|
|
204
|
+
result.mcp = true;
|
|
205
|
+
} else if (arg === '--no-cli') {
|
|
206
|
+
// Make MCP the agent's contact surface: inject the MCP-tool prompt
|
|
207
|
+
// variant and skip the CLI-surface supplements. Requires --mcp (the
|
|
208
|
+
// agent would otherwise have no way to reach sweet-search). Indexing
|
|
209
|
+
// still uses the CLI/engine.
|
|
210
|
+
result.noCli = true;
|
|
191
211
|
}
|
|
192
212
|
}
|
|
193
213
|
|
|
194
214
|
return result;
|
|
195
215
|
}
|
|
196
216
|
|
|
217
|
+
/**
|
|
218
|
+
* Cross-flag validation for init args. Currently the only rule: `--no-cli`
|
|
219
|
+
* (suppress the CLI contact surface) is meaningless without `--mcp` (the
|
|
220
|
+
* replacement contact surface). Returns `{ ok, error }`.
|
|
221
|
+
*/
|
|
222
|
+
export function validateInitArgs(parsed) {
|
|
223
|
+
if (parsed.noCli && !parsed.mcp) {
|
|
224
|
+
return {
|
|
225
|
+
ok: false,
|
|
226
|
+
error:
|
|
227
|
+
'--no-cli requires --mcp. Suppressing the CLI contact surface leaves the agent '
|
|
228
|
+
+ 'with no way to reach sweet-search unless the MCP server is registered. '
|
|
229
|
+
+ 'Re-run as `sweet-search init --mcp --no-cli`, or drop --no-cli.',
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
return { ok: true, error: null };
|
|
233
|
+
}
|
|
234
|
+
|
|
197
235
|
/**
|
|
198
236
|
* Resolve the active harness list. Default is `claude-code` only;
|
|
199
237
|
* `--agents` / `--gemini` / `--cursor` add to that set; `--no-claude`
|
|
@@ -713,7 +751,7 @@ function printReport(report) {
|
|
|
713
751
|
profile, maxsimTier, routerType, models, verification, runtimeDownloads,
|
|
714
752
|
capability, cascadeReport, dedupReport, prewarmHookReport, skillReport,
|
|
715
753
|
liChoices, agentInstructionsReport, claudeRulesReport,
|
|
716
|
-
promptReminderReport, toolEnforcementReport,
|
|
754
|
+
promptReminderReport, toolEnforcementReport, mcpServerReport,
|
|
717
755
|
} = report;
|
|
718
756
|
|
|
719
757
|
console.log('');
|
|
@@ -833,6 +871,9 @@ function printReport(report) {
|
|
|
833
871
|
if (toolEnforcementReport && toolEnforcementReport.status !== 'skipped') {
|
|
834
872
|
console.log(` Tool enforcement: ${toolEnforcementReport.status} (Grep deny + Read hint)`);
|
|
835
873
|
}
|
|
874
|
+
if (mcpServerReport && mcpServerReport.status) {
|
|
875
|
+
console.log(` MCP server (.mcp.json): ${mcpServerReport.status}${mcpServerReport.detail ? ` β ${mcpServerReport.detail}` : ''}`);
|
|
876
|
+
}
|
|
836
877
|
|
|
837
878
|
console.log(` Runtime downloads: ${runtimeDownloads}`);
|
|
838
879
|
|
|
@@ -1429,7 +1470,24 @@ Options:
|
|
|
1429
1470
|
native Read suggesting ss-read / ss-semantic.
|
|
1430
1471
|
Read is hinted, not blocked, because edit
|
|
1431
1472
|
workflows legitimately need Read. Always
|
|
1432
|
-
implied off when --no-claude is set.
|
|
1473
|
+
implied off when --no-claude or --no-cli is set.
|
|
1474
|
+
--mcp Register the sweet-search MCP server in the project
|
|
1475
|
+
root .mcp.json (an "npx -y sweet-search-mcp" entry
|
|
1476
|
+
under mcpServers.sweet-search). Additive and
|
|
1477
|
+
idempotent β the CLI surface stays, other servers
|
|
1478
|
+
and JSON keys are preserved. Root-level and
|
|
1479
|
+
harness-agnostic (independent of --no-claude). The
|
|
1480
|
+
MCP server is a thin adapter over the same engine
|
|
1481
|
+
the CLI wraps.
|
|
1482
|
+
--no-cli Make MCP the agent's CONTACT SURFACE: inject the
|
|
1483
|
+
MCP-tool prompt variant instead of the ss-* CLI
|
|
1484
|
+
one, and skip the CLI-surface supplements (the
|
|
1485
|
+
.claude/rules file, the ss-* prompt reminder, tool
|
|
1486
|
+
enforcement). Indexing still runs through the CLI/
|
|
1487
|
+
engine β this only changes how the agent searches.
|
|
1488
|
+
Requires --mcp. NOTE: the MCP prompt variant is
|
|
1489
|
+
hand-derived from the frozen CLI champion and is not
|
|
1490
|
+
yet benchmarked on the MCP transport.
|
|
1433
1491
|
--verbose, -v Enable verbose output
|
|
1434
1492
|
--help, -h Show this help
|
|
1435
1493
|
|
|
@@ -1448,9 +1506,11 @@ CoreML cascade (M3+ Apple Silicon only):
|
|
|
1448
1506
|
strategy.
|
|
1449
1507
|
|
|
1450
1508
|
Examples:
|
|
1451
|
-
sweet-search init # Full profile (default)
|
|
1509
|
+
sweet-search init # Full profile (default); CLI contact surface
|
|
1452
1510
|
sweet-search init --profile core # Core profile (no model downloads)
|
|
1453
1511
|
sweet-search init --force # Re-download all models
|
|
1512
|
+
sweet-search init --mcp # Also register the MCP server (CLI stays)
|
|
1513
|
+
sweet-search init --mcp --no-cli # MCP-only contact surface (MCP-variant prompt)
|
|
1454
1514
|
sweet-search init --build-coreml-cascade # Trace the cascade locally (dev only)
|
|
1455
1515
|
`);
|
|
1456
1516
|
}
|
|
@@ -1467,6 +1527,13 @@ export async function runInit(args) {
|
|
|
1467
1527
|
return;
|
|
1468
1528
|
}
|
|
1469
1529
|
|
|
1530
|
+
const validation = validateInitArgs(parsed);
|
|
1531
|
+
if (!validation.ok) {
|
|
1532
|
+
console.error(`sweet-search init: ${validation.error}`);
|
|
1533
|
+
process.exitCode = 1;
|
|
1534
|
+
return;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1470
1537
|
// 0. Animated banner (best-effort; only on an interactive TTY, never in CI/pipes).
|
|
1471
1538
|
if (process.stdout.isTTY && !process.env.CI && !process.env.NO_BANNER && !process.env.SWEET_SEARCH_NO_BANNER) {
|
|
1472
1539
|
// query:false β init is interactive (readline); avoid any stdin contention with the terminal capability probe.
|
|
@@ -1918,6 +1985,30 @@ export async function runInit(args) {
|
|
|
1918
1985
|
// Idempotent marker block so re-init never duplicates content.
|
|
1919
1986
|
// `--no-agent-instructions` is the umbrella that skips the
|
|
1920
1987
|
// instruction-file injection layer entirely.
|
|
1988
|
+
//
|
|
1989
|
+
// 11.5 MCP server registration (`--mcp`). Writes the project-root `.mcp.json`
|
|
1990
|
+
// entry for `sweet-search-mcp`. Additive + idempotent + harness-agnostic
|
|
1991
|
+
// (root-level, independent of --no-claude). The MCP server is a thin
|
|
1992
|
+
// adapter over the same engine the CLI wraps β `--mcp` adds it, it never
|
|
1993
|
+
// replaces the CLI. `--no-cli` (below) only switches the agent's contact
|
|
1994
|
+
// surface to MCP; indexing keeps running through the CLI/engine.
|
|
1995
|
+
let mcpServerReport = null;
|
|
1996
|
+
if (parsed.mcp) {
|
|
1997
|
+
try {
|
|
1998
|
+
mcpServerReport = installMcpServer({ projectRoot });
|
|
1999
|
+
process.stderr.write(
|
|
2000
|
+
`[init] MCP server (.mcp.json): ${mcpServerReport.status}`
|
|
2001
|
+
+ (mcpServerReport.detail ? ` β ${mcpServerReport.detail}` : '') + '\n',
|
|
2002
|
+
);
|
|
2003
|
+
} catch (err) {
|
|
2004
|
+
process.stderr.write(`[init] Warning: MCP server registration failed: ${err.message}\n`);
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
// Contact-surface variant: --no-cli makes MCP the agent's surface, so we
|
|
2009
|
+
// inject the MCP-tool prompt variant instead of the ss-* CLI champion.
|
|
2010
|
+
const promptVariant = parsed.noCli ? 'mcp' : 'cli';
|
|
2011
|
+
|
|
1921
2012
|
let agentInstructionsReport = null;
|
|
1922
2013
|
let claudeRulesReport = null;
|
|
1923
2014
|
if (!parsed.skipAgentInstructions) {
|
|
@@ -1936,25 +2027,40 @@ export async function runInit(args) {
|
|
|
1936
2027
|
projectRoot,
|
|
1937
2028
|
harnesses: activeHarnesses,
|
|
1938
2029
|
useSymlinks: parsed.symlinkInstructionFiles,
|
|
2030
|
+
variant: promptVariant,
|
|
1939
2031
|
});
|
|
1940
2032
|
const summary = Object.entries(agentInstructionsReport.harnesses)
|
|
1941
2033
|
.map(([k, v]) => `${k}=${v}`).join(' ');
|
|
1942
2034
|
const canonical = agentInstructionsReport.canonical
|
|
1943
2035
|
? ` (canonical=${agentInstructionsReport.canonical})` : '';
|
|
1944
|
-
|
|
2036
|
+
const variantTag = promptVariant === 'mcp' ? ' [mcp variant]' : '';
|
|
2037
|
+
process.stderr.write(`[init] Agent instructions: ${summary || '(none)'}${canonical}${variantTag}\n`);
|
|
1945
2038
|
} catch (err) {
|
|
1946
2039
|
process.stderr.write(`[init] Warning: Agent-instruction injection failed: ${err.message}\n`);
|
|
1947
2040
|
}
|
|
1948
|
-
// Claude rules file is only useful when claude-code is enabled
|
|
1949
|
-
// sole load path is the
|
|
1950
|
-
//
|
|
2041
|
+
// Claude rules file is only useful when claude-code is enabled AND the
|
|
2042
|
+
// CLI is the contact surface β its sole load path is the
|
|
2043
|
+
// @.claude/rules/sweet-search.md import line that injectAgentInstructions
|
|
2044
|
+
// writes into CLAUDE.md (omitted in the --no-cli MCP variant), and its
|
|
2045
|
+
// body is written in ss-* CLI terms. Under --no-cli we TEAR DOWN any rules
|
|
2046
|
+
// file a prior CLI init wrote (idempotent: not-found when absent) so a
|
|
2047
|
+
// cliβmcp re-init never leaves a stale ss-* supplement contradicting the
|
|
2048
|
+
// injected MCP prompt.
|
|
1951
2049
|
if (activeHarnesses.includes('claude-code')) {
|
|
1952
2050
|
try {
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
2051
|
+
if (parsed.noCli) {
|
|
2052
|
+
const status = removeClaudeRules({ projectRoot });
|
|
2053
|
+
claudeRulesReport = { status };
|
|
2054
|
+
if (status === 'removed' || parsed.verbose) {
|
|
2055
|
+
process.stderr.write(`[init] Claude rules: ${status}${status === 'removed' ? ' (--no-cli β stale ss-* CLI supplement torn down)' : ' (--no-cli)'}\n`);
|
|
2056
|
+
}
|
|
2057
|
+
} else {
|
|
2058
|
+
const status = writeClaudeRules({ projectRoot });
|
|
2059
|
+
claudeRulesReport = { status };
|
|
2060
|
+
process.stderr.write(`[init] Claude rules: ${status}\n`);
|
|
2061
|
+
}
|
|
1956
2062
|
} catch (err) {
|
|
1957
|
-
process.stderr.write(`[init] Warning:
|
|
2063
|
+
process.stderr.write(`[init] Warning: Claude rules ${parsed.noCli ? 'teardown' : 'write'} failed: ${err.message}\n`);
|
|
1958
2064
|
}
|
|
1959
2065
|
}
|
|
1960
2066
|
}
|
|
@@ -1968,15 +2074,26 @@ export async function runInit(args) {
|
|
|
1968
2074
|
// `.claude/hooks/sweet-search-remind-tools.mjs` with a
|
|
1969
2075
|
// `hooks.UserPromptSubmit` entry in `.claude/settings.json` keyed by
|
|
1970
2076
|
// filename so re-init updates rather than duplicates.
|
|
2077
|
+
// Under --no-cli the reminder body (ss-* CLI Bash commands) contradicts
|
|
2078
|
+
// the injected MCP-variant prompt, so we TEAR DOWN any reminder hook a
|
|
2079
|
+
// prior CLI init installed (idempotent: not-found when absent) rather
|
|
2080
|
+
// than merely skipping the install. An MCP-variant reminder is a follow-up.
|
|
1971
2081
|
let promptReminderReport = null;
|
|
1972
2082
|
if (!parsed.noClaude) {
|
|
1973
|
-
|
|
1974
|
-
projectRoot
|
|
1975
|
-
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
|
|
2083
|
+
if (parsed.noCli) {
|
|
2084
|
+
promptReminderReport = removePromptReminderHook({ projectRoot });
|
|
2085
|
+
if (parsed.verbose || promptReminderReport.status === 'error') {
|
|
2086
|
+
process.stderr.write(`[init] Prompt reminder hook: ${promptReminderReport.status} (--no-cli) β ${promptReminderReport.detail}\n`);
|
|
2087
|
+
}
|
|
2088
|
+
} else {
|
|
2089
|
+
promptReminderReport = installPromptReminderHook({
|
|
2090
|
+
projectRoot,
|
|
2091
|
+
packageRoot: PACKAGE_ROOT,
|
|
2092
|
+
skipped: parsed.skipPromptReminders,
|
|
2093
|
+
});
|
|
2094
|
+
if (parsed.verbose || promptReminderReport.status === 'error') {
|
|
2095
|
+
process.stderr.write(`[init] Prompt reminder hook: ${promptReminderReport.status} β ${promptReminderReport.detail}\n`);
|
|
2096
|
+
}
|
|
1980
2097
|
}
|
|
1981
2098
|
}
|
|
1982
2099
|
|
|
@@ -1984,15 +2101,26 @@ export async function runInit(args) {
|
|
|
1984
2101
|
// `--enforce-tools`; universal `--no-claude` gate above. Adds
|
|
1985
2102
|
// `permissions.deny: ["Grep"]` and a PreToolUse hint hook for `Read`
|
|
1986
2103
|
// in `.claude/settings.json`. Strict + opinionated; off by default.
|
|
2104
|
+
// Under --no-cli the Read hint points at ss-read / ss-semantic (CLI
|
|
2105
|
+
// surface) and denying native Grep is moot when MCP `search` is the
|
|
2106
|
+
// contact surface β so we TEAR DOWN any enforcement a prior CLI init
|
|
2107
|
+
// wrote (idempotent: not-found when absent) instead of merely skipping.
|
|
1987
2108
|
let toolEnforcementReport = null;
|
|
1988
2109
|
if (!parsed.noClaude) {
|
|
1989
|
-
|
|
1990
|
-
projectRoot
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
2110
|
+
if (parsed.noCli) {
|
|
2111
|
+
toolEnforcementReport = removeToolEnforcement({ projectRoot });
|
|
2112
|
+
if (parsed.verbose || toolEnforcementReport.status === 'error') {
|
|
2113
|
+
process.stderr.write(`[init] Tool enforcement: ${toolEnforcementReport.status} (--no-cli)${toolEnforcementReport.detail ? ` β ${toolEnforcementReport.detail}` : ''}\n`);
|
|
2114
|
+
}
|
|
2115
|
+
} else {
|
|
2116
|
+
toolEnforcementReport = installToolEnforcement({
|
|
2117
|
+
projectRoot,
|
|
2118
|
+
packageRoot: PACKAGE_ROOT,
|
|
2119
|
+
skipped: !parsed.enforceTools,
|
|
2120
|
+
});
|
|
2121
|
+
if (parsed.verbose || toolEnforcementReport.status === 'error') {
|
|
2122
|
+
process.stderr.write(`[init] Tool enforcement: ${toolEnforcementReport.status} β ${toolEnforcementReport.detail}\n`);
|
|
2123
|
+
}
|
|
1996
2124
|
}
|
|
1997
2125
|
}
|
|
1998
2126
|
|
|
@@ -2014,6 +2142,7 @@ export async function runInit(args) {
|
|
|
2014
2142
|
claudeRulesReport,
|
|
2015
2143
|
promptReminderReport,
|
|
2016
2144
|
toolEnforcementReport,
|
|
2145
|
+
mcpServerReport,
|
|
2017
2146
|
});
|
|
2018
2147
|
}
|
|
2019
2148
|
|
|
@@ -63,20 +63,27 @@ function escapeRegex(s) {
|
|
|
63
63
|
|
|
64
64
|
const SHIP_FILE_REL = 'core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md';
|
|
65
65
|
|
|
66
|
+
// MCP-tool variant of the policy (init --mcp --no-cli). Same strategy core; the
|
|
67
|
+
// tool-mechanics layer is remapped from the ss-* CLI surface onto the
|
|
68
|
+
// sweet-search MCP tool surface. Read lazily β only the variant actually
|
|
69
|
+
// requested needs to exist, so importing this module never requires the MCP
|
|
70
|
+
// ship-file to be present.
|
|
71
|
+
const MCP_SHIP_FILE_REL = 'core/prompt-optimization/data/p7-final/sweet-search-system-prompt-mcp.md';
|
|
72
|
+
|
|
66
73
|
/** Strip a leading YAML front-matter block (`---\n β¦ \n---\n`) if present. */
|
|
67
74
|
export function stripFrontMatter(text) {
|
|
68
75
|
return text.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n/, '');
|
|
69
76
|
}
|
|
70
77
|
|
|
71
|
-
function readShippedPolicy() {
|
|
78
|
+
function readShippedPolicy(rel = SHIP_FILE_REL, { label = 'M++' } = {}) {
|
|
72
79
|
const here = dirname(fileURLToPath(import.meta.url)); // <pkg>/scripts
|
|
73
|
-
const shipPath = join(here, '..',
|
|
80
|
+
const shipPath = join(here, '..', rel);
|
|
74
81
|
let raw;
|
|
75
82
|
try {
|
|
76
83
|
raw = readFileSync(shipPath, 'utf8');
|
|
77
84
|
} catch (err) {
|
|
78
85
|
throw new Error(
|
|
79
|
-
`inject-agent-instructions: cannot read the
|
|
86
|
+
`inject-agent-instructions: cannot read the ${label} ship-file at ${shipPath}. ` +
|
|
80
87
|
'It MUST be present (packaged via package.json "files"). Regenerate with ' +
|
|
81
88
|
'`node core/prompt-optimization/sweep/finalize-mpp.mjs`. ' +
|
|
82
89
|
`Cause: ${err.message}`,
|
|
@@ -84,13 +91,31 @@ function readShippedPolicy() {
|
|
|
84
91
|
}
|
|
85
92
|
const body = stripFrontMatter(raw).trimEnd();
|
|
86
93
|
if (!body) {
|
|
87
|
-
throw new Error(`inject-agent-instructions:
|
|
94
|
+
throw new Error(`inject-agent-instructions: ${label} ship-file at ${shipPath} has an empty body.`);
|
|
88
95
|
}
|
|
89
96
|
return body;
|
|
90
97
|
}
|
|
91
98
|
|
|
92
99
|
export const CANONICAL_POLICY_BODY = readShippedPolicy();
|
|
93
100
|
|
|
101
|
+
let _mcpPolicyBody = null;
|
|
102
|
+
/** Lazily read + cache the MCP-variant policy body. */
|
|
103
|
+
export function getMcpPolicyBody() {
|
|
104
|
+
if (_mcpPolicyBody == null) {
|
|
105
|
+
_mcpPolicyBody = readShippedPolicy(MCP_SHIP_FILE_REL, { label: 'M++ (MCP variant)' });
|
|
106
|
+
}
|
|
107
|
+
return _mcpPolicyBody;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Resolve the policy body for a contact-surface variant.
|
|
112
|
+
* 'cli' (default) β the frozen ss-* CLI champion (CANONICAL_POLICY_BODY)
|
|
113
|
+
* 'mcp' β the MCP-tool variant (init --mcp --no-cli)
|
|
114
|
+
*/
|
|
115
|
+
export function getPolicyBody(variant = 'cli') {
|
|
116
|
+
return variant === 'mcp' ? getMcpPolicyBody() : CANONICAL_POLICY_BODY;
|
|
117
|
+
}
|
|
118
|
+
|
|
94
119
|
const CURSOR_FRONTMATTER = `---
|
|
95
120
|
description: Sweet Search tool-routing, stopping, and citation policy
|
|
96
121
|
alwaysApply: false
|
|
@@ -111,12 +136,12 @@ function wrapMarker(body) {
|
|
|
111
136
|
* full policy plus, for CLAUDE.md, an extra `@.claude/rules/sweet-search.md`
|
|
112
137
|
* import line so the Claude-specific shim is loaded.
|
|
113
138
|
*/
|
|
114
|
-
export function buildCanonicalBlock({ extraImports = [] } = {}) {
|
|
139
|
+
export function buildCanonicalBlock({ extraImports = [], policyBody = CANONICAL_POLICY_BODY } = {}) {
|
|
115
140
|
if (extraImports.length === 0) {
|
|
116
|
-
return wrapMarker(
|
|
141
|
+
return wrapMarker(policyBody);
|
|
117
142
|
}
|
|
118
143
|
const importLines = extraImports.map(t => `@${t}`).join('\n');
|
|
119
|
-
return wrapMarker(`${
|
|
144
|
+
return wrapMarker(`${policyBody}\n${importLines}\n`);
|
|
120
145
|
}
|
|
121
146
|
|
|
122
147
|
/**
|
|
@@ -133,8 +158,8 @@ export function buildImportBlock({ importTargets }) {
|
|
|
133
158
|
}
|
|
134
159
|
|
|
135
160
|
/** Body for the cursor .mdc (frontmatter + inlined canonical body). */
|
|
136
|
-
export function buildCursorFile() {
|
|
137
|
-
return CURSOR_FRONTMATTER + wrapMarker(
|
|
161
|
+
export function buildCursorFile(policyBody = CANONICAL_POLICY_BODY) {
|
|
162
|
+
return CURSOR_FRONTMATTER + wrapMarker(policyBody);
|
|
138
163
|
}
|
|
139
164
|
|
|
140
165
|
// βββ Marker injection βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -265,26 +290,36 @@ export function injectAgentInstructions({
|
|
|
265
290
|
projectRoot,
|
|
266
291
|
harnesses = ALL_HARNESSES,
|
|
267
292
|
useSymlinks = true,
|
|
293
|
+
variant = 'cli',
|
|
268
294
|
} = {}) {
|
|
269
295
|
if (!projectRoot) throw new TypeError('inject-agent-instructions: projectRoot is required');
|
|
270
296
|
const enabled = new Set(harnesses);
|
|
271
|
-
const report = { harnesses: {}, canonical: null };
|
|
297
|
+
const report = { harnesses: {}, canonical: null, variant };
|
|
272
298
|
|
|
273
299
|
if (enabled.size === 0) return report;
|
|
274
300
|
|
|
301
|
+
// Variant selects the policy body. The MCP variant retargets every ss-* CLI
|
|
302
|
+
// reference onto the sweet-search MCP tool surface; it also drops the Claude
|
|
303
|
+
// `@.claude/rules/sweet-search.md` import because that supplement is written
|
|
304
|
+
// in ss-* CLI terms and would contradict the MCP body (the CLI rules file is
|
|
305
|
+
// skipped under --no-cli in init too).
|
|
306
|
+
const policyBody = getPolicyBody(variant);
|
|
307
|
+
const claudeExtraImports = variant === 'mcp' ? [] : ['.claude/rules/sweet-search.md'];
|
|
308
|
+
|
|
275
309
|
// 1. Canonical file: CLAUDE.md when Claude Code is enabled, else AGENTS.md.
|
|
276
|
-
// Body is the full policy plus (Claude-only) the @.claude/rules import.
|
|
310
|
+
// Body is the full policy plus (Claude-only, CLI variant) the @.claude/rules import.
|
|
277
311
|
let canonicalFile;
|
|
278
312
|
let canonicalBlock;
|
|
279
313
|
if (enabled.has('claude-code')) {
|
|
280
314
|
canonicalFile = CLAUDE_FILE;
|
|
281
315
|
canonicalBlock = buildCanonicalBlock({
|
|
282
|
-
extraImports:
|
|
316
|
+
extraImports: claudeExtraImports,
|
|
317
|
+
policyBody,
|
|
283
318
|
});
|
|
284
319
|
report.canonical = 'claude-code';
|
|
285
320
|
} else if (enabled.has('agents') || enabled.has('gemini') || enabled.has('cursor')) {
|
|
286
321
|
canonicalFile = AGENTS_FILE;
|
|
287
|
-
canonicalBlock = buildCanonicalBlock();
|
|
322
|
+
canonicalBlock = buildCanonicalBlock({ policyBody });
|
|
288
323
|
report.canonical = 'agents'; // AGENTS.md is the multi-harness convention (Codex, OpenCode, β¦)
|
|
289
324
|
} else {
|
|
290
325
|
return report; // no canonical, nothing to write
|
|
@@ -343,12 +378,12 @@ export function injectAgentInstructions({
|
|
|
343
378
|
// and any user notes outside the markers.
|
|
344
379
|
report.harnesses.cursor = injectMarkerBlock({
|
|
345
380
|
filePath: cursorPath,
|
|
346
|
-
block: buildCanonicalBlock(),
|
|
381
|
+
block: buildCanonicalBlock({ policyBody }),
|
|
347
382
|
});
|
|
348
383
|
} else {
|
|
349
384
|
// Fresh file β write frontmatter + canonical body in marker block.
|
|
350
385
|
mkdirSync(dirname(cursorPath), { recursive: true });
|
|
351
|
-
writeFileSync(cursorPath, buildCursorFile());
|
|
386
|
+
writeFileSync(cursorPath, buildCursorFile(policyBody));
|
|
352
387
|
report.harnesses.cursor = 'created';
|
|
353
388
|
}
|
|
354
389
|
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Project-local MCP server registration for sweet-search.
|
|
3
|
+
*
|
|
4
|
+
* `sweet-search init --mcp` writes a `sweet-search` entry into the project's
|
|
5
|
+
* root `.mcp.json` β the project-scoped MCP config read by Claude Code and other
|
|
6
|
+
* MCP hosts (see docs/search/MCP_INTEGRATION.md Β§`.mcp.json`). Additive and
|
|
7
|
+
* idempotent: existing servers and any other top-level keys are preserved; only
|
|
8
|
+
* `mcpServers.sweet-search` is created/updated.
|
|
9
|
+
*
|
|
10
|
+
* Design notes:
|
|
11
|
+
* - This is ROOT-level and harness-agnostic. It is independent of `--no-claude`
|
|
12
|
+
* (which gates `.claude/*` writes only). `.mcp.json` lives at the repo root.
|
|
13
|
+
* - The MCP server is a thin adapter over the SAME engine the CLI wraps. `--mcp`
|
|
14
|
+
* ADDS it; it never replaces the CLI. `--no-cli` only swaps the agent's
|
|
15
|
+
* *contact surface* to MCP β indexing still runs through the CLI/engine.
|
|
16
|
+
* - We never clobber an unparseable user `.mcp.json`; we fail loudly instead.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync } from 'node:fs';
|
|
20
|
+
import { dirname, join } from 'node:path';
|
|
21
|
+
|
|
22
|
+
export const MCP_CONFIG_FILE = '.mcp.json';
|
|
23
|
+
export const MCP_SERVER_KEY = 'sweet-search';
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* The canonical server entry. Uses `npx -y sweet-search-mcp` (the published bin)
|
|
27
|
+
* so the registration keeps working after a global/local install without a
|
|
28
|
+
* hard-coded path, and pins the target repo via `--project-root`.
|
|
29
|
+
*/
|
|
30
|
+
export function buildServerEntry({ projectRoot }) {
|
|
31
|
+
return {
|
|
32
|
+
command: 'npx',
|
|
33
|
+
args: ['-y', 'sweet-search-mcp', '--project-root', projectRoot],
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function isPlainObject(v) {
|
|
38
|
+
return v !== null && typeof v === 'object' && !Array.isArray(v);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function deepEqual(a, b) {
|
|
42
|
+
return JSON.stringify(a) === JSON.stringify(b);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Install/update the sweet-search MCP server registration.
|
|
47
|
+
* Idempotent. Returns `{ status, path, detail? }` where status is one of:
|
|
48
|
+
* 'created' β wrote a fresh .mcp.json
|
|
49
|
+
* 'added' β file existed, added our server entry
|
|
50
|
+
* 'updated' β our entry existed but differed; rewritten
|
|
51
|
+
* 'unchanged' β our entry already matches
|
|
52
|
+
* 'error' β existing file is not a usable JSON object (left untouched)
|
|
53
|
+
*/
|
|
54
|
+
export function installMcpServer({ projectRoot, configFile = MCP_CONFIG_FILE } = {}) {
|
|
55
|
+
if (!projectRoot) throw new TypeError('install-mcp-server: projectRoot is required');
|
|
56
|
+
const configPath = join(projectRoot, configFile);
|
|
57
|
+
const entry = buildServerEntry({ projectRoot });
|
|
58
|
+
|
|
59
|
+
let config = {};
|
|
60
|
+
const existed = existsSync(configPath);
|
|
61
|
+
if (existed) {
|
|
62
|
+
let raw;
|
|
63
|
+
try {
|
|
64
|
+
raw = readFileSync(configPath, 'utf8');
|
|
65
|
+
} catch (err) {
|
|
66
|
+
return { status: 'error', path: configPath, detail: `cannot read ${configFile}: ${err.message}` };
|
|
67
|
+
}
|
|
68
|
+
try {
|
|
69
|
+
config = JSON.parse(raw);
|
|
70
|
+
} catch (err) {
|
|
71
|
+
return { status: 'error', path: configPath, detail: `existing ${configFile} is not valid JSON: ${err.message}` };
|
|
72
|
+
}
|
|
73
|
+
if (!isPlainObject(config)) {
|
|
74
|
+
return { status: 'error', path: configPath, detail: `existing ${configFile} is not a JSON object` };
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (!isPlainObject(config.mcpServers)) config.mcpServers = {};
|
|
79
|
+
|
|
80
|
+
const prev = config.mcpServers[MCP_SERVER_KEY];
|
|
81
|
+
if (prev && deepEqual(prev, entry)) {
|
|
82
|
+
return { status: 'unchanged', path: configPath };
|
|
83
|
+
}
|
|
84
|
+
const hadEntry = prev !== undefined;
|
|
85
|
+
config.mcpServers[MCP_SERVER_KEY] = entry;
|
|
86
|
+
|
|
87
|
+
mkdirSync(dirname(configPath), { recursive: true });
|
|
88
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
89
|
+
return { status: existed ? (hadEntry ? 'updated' : 'added') : 'created', path: configPath };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Reverse `installMcpServer`. Removes only our `mcpServers.sweet-search` entry,
|
|
94
|
+
* preserving any other servers / top-level keys. Deletes the file outright only
|
|
95
|
+
* when it becomes wholly empty (no other servers, no other top-level keys).
|
|
96
|
+
* @returns 'removed' | 'file-deleted' | 'not-found' | 'dry-run'
|
|
97
|
+
*/
|
|
98
|
+
export function removeMcpServer({ projectRoot, configFile = MCP_CONFIG_FILE, dryRun = false } = {}) {
|
|
99
|
+
if (!projectRoot) throw new TypeError('remove-mcp-server: projectRoot is required');
|
|
100
|
+
const configPath = join(projectRoot, configFile);
|
|
101
|
+
if (!existsSync(configPath)) return 'not-found';
|
|
102
|
+
let config;
|
|
103
|
+
try {
|
|
104
|
+
config = JSON.parse(readFileSync(configPath, 'utf8'));
|
|
105
|
+
} catch {
|
|
106
|
+
return 'not-found'; // unparseable / not ours β never touch it
|
|
107
|
+
}
|
|
108
|
+
if (!isPlainObject(config) || !isPlainObject(config.mcpServers) || !(MCP_SERVER_KEY in config.mcpServers)) {
|
|
109
|
+
return 'not-found';
|
|
110
|
+
}
|
|
111
|
+
if (dryRun) return 'dry-run';
|
|
112
|
+
|
|
113
|
+
delete config.mcpServers[MCP_SERVER_KEY];
|
|
114
|
+
const hasOtherServers = Object.keys(config.mcpServers).length > 0;
|
|
115
|
+
const hasOtherKeys = Object.keys(config).some((k) => k !== 'mcpServers');
|
|
116
|
+
if (!hasOtherServers && !hasOtherKeys) {
|
|
117
|
+
unlinkSync(configPath);
|
|
118
|
+
return 'file-deleted';
|
|
119
|
+
}
|
|
120
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
121
|
+
return 'removed';
|
|
122
|
+
}
|
package/scripts/uninstall.js
CHANGED
|
@@ -20,6 +20,7 @@ import { getCoremlCascadeRoot, getCoremlCascadeState } from '../core/infrastruct
|
|
|
20
20
|
import { PREWARM_HOOK_FILENAME } from './init.js';
|
|
21
21
|
import { removeAgentInstructions } from './inject-agent-instructions.js';
|
|
22
22
|
import { removeClaudeRules } from './write-claude-rules.js';
|
|
23
|
+
import { removeMcpServer } from './install-mcp-server.js';
|
|
23
24
|
import { removePromptReminderHook } from './install-prompt-reminders.js';
|
|
24
25
|
import { removeToolEnforcement } from './install-tool-enforcement.js';
|
|
25
26
|
import { projectSocketPath, projectPidFile } from '../core/search/server-identity.js';
|
|
@@ -711,11 +712,16 @@ export async function runUninstall(args) {
|
|
|
711
712
|
const codexHookPreview = removeCodexSessionStartHook(projectRoot, { dryRun: true });
|
|
712
713
|
const hasCodexHook = codexHookPreview.status === 'dry-run';
|
|
713
714
|
|
|
715
|
+
// MCP server registration (.mcp.json mcpServers.sweet-search), written by
|
|
716
|
+
// `init --mcp`.
|
|
717
|
+
const mcpServerPreview = removeMcpServer({ projectRoot, dryRun: true });
|
|
718
|
+
const hasMcpServer = mcpServerPreview === 'dry-run';
|
|
719
|
+
|
|
714
720
|
// Nothing to remove?
|
|
715
721
|
if (
|
|
716
722
|
removals.length === 0 && !hasHookEntry && !hasSkillEntry && !hasIndexMaintainerHook
|
|
717
723
|
&& !agentInstructionsTouched && !hasClaudeRules
|
|
718
|
-
&& !hasPromptReminder && !hasToolEnforcement && !hasCodexHook
|
|
724
|
+
&& !hasPromptReminder && !hasToolEnforcement && !hasCodexHook && !hasMcpServer
|
|
719
725
|
) {
|
|
720
726
|
console.log('Nothing to remove β Sweet Search is not initialized in this project.');
|
|
721
727
|
return;
|
|
@@ -758,6 +764,9 @@ export async function runUninstall(args) {
|
|
|
758
764
|
if (hasCodexHook) {
|
|
759
765
|
console.log(` Codex SessionStart hook (.codex/hooks.json)`);
|
|
760
766
|
}
|
|
767
|
+
if (hasMcpServer) {
|
|
768
|
+
console.log(` MCP server registration (.mcp.json β mcpServers.sweet-search)`);
|
|
769
|
+
}
|
|
761
770
|
console.log(` Total: ${formatBytes(totalBytes)}`);
|
|
762
771
|
if (parsed.keepModels) {
|
|
763
772
|
console.log(' Model cache: kept (--keep-models)');
|
|
@@ -783,6 +792,10 @@ export async function runUninstall(args) {
|
|
|
783
792
|
if (dryCodex.status === 'dry-run') {
|
|
784
793
|
console.log(` Would also remove: Codex SessionStart hook (.codex/hooks.json β ${dryCodex.detail})`);
|
|
785
794
|
}
|
|
795
|
+
const dryMcp = removeMcpServer({ projectRoot, dryRun: true });
|
|
796
|
+
if (dryMcp === 'dry-run') {
|
|
797
|
+
console.log(` Would also remove: MCP server registration (.mcp.json β mcpServers.sweet-search)`);
|
|
798
|
+
}
|
|
786
799
|
console.log('Dry run β nothing was removed.');
|
|
787
800
|
return;
|
|
788
801
|
}
|
|
@@ -949,6 +962,18 @@ export async function runUninstall(args) {
|
|
|
949
962
|
kept++;
|
|
950
963
|
}
|
|
951
964
|
|
|
965
|
+
// MCP server registration (.mcp.json mcpServers.sweet-search). Only our entry
|
|
966
|
+
// is removed; other servers and JSON keys are preserved.
|
|
967
|
+
const mcpServerResult = removeMcpServer({ projectRoot, dryRun: parsed.dryRun });
|
|
968
|
+
if (mcpServerResult === 'removed') {
|
|
969
|
+
console.log(` Removed: MCP server registration (.mcp.json β mcpServers.sweet-search)`);
|
|
970
|
+
removed++;
|
|
971
|
+
} else if (mcpServerResult === 'file-deleted') {
|
|
972
|
+
console.log(` Removed: .mcp.json (wholly sweet-search-managed)`);
|
|
973
|
+
removed++;
|
|
974
|
+
}
|
|
975
|
+
// 'not-found' / 'dry-run' are silent.
|
|
976
|
+
|
|
952
977
|
// Purge npm packages
|
|
953
978
|
if (parsed.purge) {
|
|
954
979
|
console.log('');
|