semble 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {semble-0.3.2 → semble-0.3.3}/PKG-INFO +8 -3
- {semble-0.3.2 → semble-0.3.3}/README.md +7 -2
- {semble-0.3.2 → semble-0.3.3}/docs/installation.md +50 -2
- semble-0.3.3/src/semble/agents/pi.md +40 -0
- semble-0.3.3/src/semble/agents/reasonix.md +42 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/cache.py +18 -2
- {semble-0.3.2 → semble-0.3.3}/src/semble/cli.py +43 -2
- {semble-0.3.2 → semble-0.3.3}/src/semble/installer/agents.py +27 -11
- {semble-0.3.2 → semble-0.3.3}/src/semble/installer/config.py +2 -2
- {semble-0.3.2 → semble-0.3.3}/src/semble/installer/installer.py +8 -8
- {semble-0.3.2 → semble-0.3.3}/src/semble/version.py +1 -1
- {semble-0.3.2 → semble-0.3.3}/src/semble.egg-info/PKG-INFO +8 -3
- {semble-0.3.2 → semble-0.3.3}/src/semble.egg-info/SOURCES.txt +2 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_cache.py +17 -4
- {semble-0.3.2 → semble-0.3.3}/tests/test_cli.py +143 -3
- {semble-0.3.2 → semble-0.3.3}/tests/test_installer.py +17 -11
- {semble-0.3.2 → semble-0.3.3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/.github/workflows/ci.yaml +0 -0
- {semble-0.3.2 → semble-0.3.3}/.github/workflows/release.yaml +0 -0
- {semble-0.3.2 → semble-0.3.3}/.gitignore +0 -0
- {semble-0.3.2 → semble-0.3.3}/.pre-commit-config.yaml +0 -0
- {semble-0.3.2 → semble-0.3.3}/CITATION.cff +0 -0
- {semble-0.3.2 → semble-0.3.3}/CONTRIBUTING.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/LICENSE +0 -0
- {semble-0.3.2 → semble-0.3.3}/MANIFEST.in +0 -0
- {semble-0.3.2 → semble-0.3.3}/Makefile +0 -0
- {semble-0.3.2 → semble-0.3.3}/pyproject.toml +0 -0
- {semble-0.3.2 → semble-0.3.3}/setup.cfg +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/agents/claude.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/agents/copilot.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/agents/cursor.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/agents/gemini.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/agents/kiro.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/agents/opencode.md +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/chunking/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/chunking/chunking.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/chunking/core.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/create.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/dense.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/file_walker.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/files.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/index.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/sparse.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/index/types.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/installer/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/mcp.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/py.typed +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/ranking/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/ranking/boosting.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/ranking/penalties.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/ranking/weighting.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/search.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/stats.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/tokens.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/types.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble/utils.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble.egg-info/dependency_links.txt +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble.egg-info/entry_points.txt +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble.egg-info/requires.txt +0 -0
- {semble-0.3.2 → semble-0.3.3}/src/semble.egg-info/top_level.txt +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/conftest.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/index/test_dense.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/index/test_index.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_chunker.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_file_walker.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_files.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_git.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_mcp.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_ranking.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_search.py +0 -0
- {semble-0.3.2 → semble-0.3.3}/tests/test_stats.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: semble
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Fast and Accurate Code Search for Agents
|
|
5
5
|
Author-email: Thomas van Dongen <thomasvdongen@proton.me>, Stéphan Tulkens <stephantul@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -98,7 +98,7 @@ Dynamic: license-file
|
|
|
98
98
|
|
|
99
99
|
</div>
|
|
100
100
|
|
|
101
|
-
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services.
|
|
101
|
+
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
|
|
102
102
|
|
|
103
103
|
## Quickstart
|
|
104
104
|
|
|
@@ -210,7 +210,12 @@ semble savings --verbose # also show breakdown by call type
|
|
|
210
210
|
|
|
211
211
|
Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
|
|
212
212
|
|
|
213
|
-
|
|
213
|
+
</details>
|
|
214
|
+
|
|
215
|
+
<details>
|
|
216
|
+
<summary>Storage</summary>
|
|
217
|
+
|
|
218
|
+
By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
|
|
214
219
|
|
|
215
220
|
</details>
|
|
216
221
|
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
</div>
|
|
26
26
|
|
|
27
|
-
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services.
|
|
27
|
+
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
|
|
28
28
|
|
|
29
29
|
## Quickstart
|
|
30
30
|
|
|
@@ -136,7 +136,12 @@ semble savings --verbose # also show breakdown by call type
|
|
|
136
136
|
|
|
137
137
|
Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
|
|
138
138
|
|
|
139
|
-
|
|
139
|
+
</details>
|
|
140
|
+
|
|
141
|
+
<details>
|
|
142
|
+
<summary>Storage</summary>
|
|
143
|
+
|
|
144
|
+
By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
|
|
140
145
|
|
|
141
146
|
</details>
|
|
142
147
|
|
|
@@ -21,7 +21,9 @@ To undo:
|
|
|
21
21
|
semble uninstall
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
-
Supported agents: Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Codex, VS Code, Windsurf, and
|
|
24
|
+
Supported agents: Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Codex, VS Code, Windsurf, Zed, Reasonix, and Pi.
|
|
25
|
+
|
|
26
|
+
> **Pi prerequisite:** Pi requires the MCP extension to be installed before semble can connect. Run `pi install npm:pi-mcp-extension` once, then `semble install`.
|
|
25
27
|
|
|
26
28
|
---
|
|
27
29
|
|
|
@@ -198,6 +200,48 @@ Add to `~/.config/zed/settings.json` (or `.zed/settings.json` in your project):
|
|
|
198
200
|
|
|
199
201
|
</details>
|
|
200
202
|
|
|
203
|
+
<details>
|
|
204
|
+
<summary>Reasonix</summary>
|
|
205
|
+
|
|
206
|
+
Add to `~/.reasonix/config.json` (the backwards-compatible MCP config path read by all Reasonix versions):
|
|
207
|
+
|
|
208
|
+
```json
|
|
209
|
+
{
|
|
210
|
+
"mcpServers": {
|
|
211
|
+
"semble": {
|
|
212
|
+
"command": "uvx",
|
|
213
|
+
"args": ["--from", "semble[mcp]", "semble"]
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
</details>
|
|
220
|
+
|
|
221
|
+
<details>
|
|
222
|
+
<summary>Pi</summary>
|
|
223
|
+
|
|
224
|
+
First install the Pi MCP extension (one-time prerequisite):
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
pi install npm:pi-mcp-extension
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Then add to `~/.pi/agent/mcp.json`:
|
|
231
|
+
|
|
232
|
+
```json
|
|
233
|
+
{
|
|
234
|
+
"mcpServers": {
|
|
235
|
+
"semble": {
|
|
236
|
+
"command": "uvx",
|
|
237
|
+
"args": ["--from", "semble[mcp]", "semble"]
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
</details>
|
|
244
|
+
|
|
201
245
|
By default the MCP server indexes only code files. To also index documentation, config, or everything, append `--content docs`, `--content config`, or `--content all` to the server command. For example, in Claude Code:
|
|
202
246
|
|
|
203
247
|
```bash
|
|
@@ -250,7 +294,9 @@ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its plac
|
|
|
250
294
|
|
|
251
295
|
### Sub-agent
|
|
252
296
|
|
|
253
|
-
For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot), you can install a dedicated `semble-search` sub-agent. Copy the appropriate file from [`src/semble/agents/`](../src/semble/agents/) to your agent's agents directory:
|
|
297
|
+
For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Reasonix, Pi), you can install a dedicated `semble-search` sub-agent. Copy the appropriate file from [`src/semble/agents/`](../src/semble/agents/) to your agent's agents directory:
|
|
298
|
+
|
|
299
|
+
> **Pi prerequisite:** Pi sub-agents require the Pi agents extension. Run `pi install npm:pi-agents` once before installing.
|
|
254
300
|
|
|
255
301
|
| Agent | File | Destination |
|
|
256
302
|
|---|---|---|
|
|
@@ -260,3 +306,5 @@ For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, Op
|
|
|
260
306
|
| Kiro | `kiro.md` | `~/.kiro/agents/semble-search.md` |
|
|
261
307
|
| OpenCode | `opencode.md` | `~/.config/opencode/agents/semble-search.md` |
|
|
262
308
|
| GitHub Copilot | `copilot.md` | `~/.copilot/agents/semble-search.agent.md` |
|
|
309
|
+
| Reasonix | `reasonix.md` | `~/.reasonix/skills/semble-search.md` |
|
|
310
|
+
| Pi | `pi.md` | `~/.pi/agents/semble-search.md` |
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: semble-search
|
|
3
|
+
description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
semble search "authentication flow" ./my-project
|
|
10
|
+
semble search "save_pretrained" ./my-project
|
|
11
|
+
semble search "save model to disk" ./my-project --top-k 10
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Results are cached automatically on first run and invalidated when files change.
|
|
15
|
+
|
|
16
|
+
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
semble search "deployment guide" ./my-project --content docs
|
|
20
|
+
semble search "database host port" ./my-project --content config
|
|
21
|
+
semble search "authentication" ./my-project --content all
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
semble find-related src/auth.py 42 ./my-project
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
31
|
+
|
|
32
|
+
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
33
|
+
|
|
34
|
+
### Workflow
|
|
35
|
+
|
|
36
|
+
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
37
|
+
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
38
|
+
3. Inspect full files only when the returned chunk does not give enough context.
|
|
39
|
+
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
40
|
+
5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: semble-search
|
|
3
|
+
description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over bash/grep for any semantic or exploratory question.
|
|
4
|
+
runAs: subagent
|
|
5
|
+
allowed-tools: bash, read_file
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
semble search "authentication flow" ./my-project
|
|
12
|
+
semble search "save_pretrained" ./my-project
|
|
13
|
+
semble search "save model to disk" ./my-project --top-k 10
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Results are cached automatically on first run and invalidated when files change.
|
|
17
|
+
|
|
18
|
+
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
semble search "deployment guide" ./my-project --content docs
|
|
22
|
+
semble search "database host port" ./my-project --content config
|
|
23
|
+
semble search "authentication" ./my-project --content all
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
semble find-related src/auth.py 42 ./my-project
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
33
|
+
|
|
34
|
+
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
35
|
+
|
|
36
|
+
### Workflow
|
|
37
|
+
|
|
38
|
+
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
39
|
+
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
40
|
+
3. Inspect full files only when the returned chunk does not give enough context.
|
|
41
|
+
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
42
|
+
5. Use bash/grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
import shutil
|
|
5
6
|
import sys
|
|
@@ -13,6 +14,8 @@ from semble.index.types import PersistencePath
|
|
|
13
14
|
from semble.types import ContentType
|
|
14
15
|
from semble.utils import is_git_url, resolve_model_name
|
|
15
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
16
19
|
if TYPE_CHECKING:
|
|
17
20
|
from semble.index import SembleIndex
|
|
18
21
|
|
|
@@ -48,11 +51,24 @@ def _linux_cache_dir(name: str) -> Path:
|
|
|
48
51
|
return base / name
|
|
49
52
|
|
|
50
53
|
|
|
54
|
+
def _get_valid_user_cache_dir() -> Path | None:
|
|
55
|
+
"""Gets the user cache dir if it is set and is a valid path."""
|
|
56
|
+
user_cache_location = os.getenv("SEMBLE_CACHE_LOCATION")
|
|
57
|
+
if user_cache_location is None:
|
|
58
|
+
return None
|
|
59
|
+
user_cache_dir = Path(user_cache_location)
|
|
60
|
+
if not user_cache_dir.is_absolute():
|
|
61
|
+
logger.warning("SEMBLE_CACHE_LOCATION is not an absolute path: %s", user_cache_location)
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
return user_cache_dir
|
|
65
|
+
|
|
66
|
+
|
|
51
67
|
def resolve_cache_folder() -> Path:
|
|
52
68
|
"""Resolves a cache folder, respects SEMBLE_CACHE_LOCATION (highest precedence), XDG_CACHE_HOME."""
|
|
53
69
|
name = "semble"
|
|
54
|
-
if
|
|
55
|
-
cache_dir =
|
|
70
|
+
if user_cache_dir := _get_valid_user_cache_dir():
|
|
71
|
+
cache_dir = user_cache_dir
|
|
56
72
|
elif sys.platform == "win32":
|
|
57
73
|
cache_dir = _windows_cache_dir(name)
|
|
58
74
|
elif sys.platform == "darwin":
|
|
@@ -1,19 +1,26 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import asyncio
|
|
3
3
|
import json
|
|
4
|
+
import re
|
|
4
5
|
import sys
|
|
5
6
|
import warnings
|
|
6
7
|
from importlib.util import find_spec
|
|
8
|
+
from shutil import rmtree
|
|
9
|
+
from typing import Literal
|
|
7
10
|
|
|
8
11
|
from model2vec.utils import get_package_extras
|
|
9
12
|
|
|
10
|
-
from semble.cache import find_index_from_cache_folder
|
|
13
|
+
from semble.cache import find_index_from_cache_folder, resolve_cache_folder
|
|
11
14
|
from semble.index import SembleIndex
|
|
15
|
+
from semble.index.types import PersistencePath
|
|
12
16
|
from semble.stats import format_savings_report
|
|
13
17
|
from semble.types import ContentType
|
|
14
18
|
from semble.utils import format_results, is_git_url, resolve_chunk
|
|
15
19
|
|
|
16
|
-
_CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help"})
|
|
20
|
+
_CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help", "clear"})
|
|
21
|
+
_CLEAR_CHOICE = Literal["all", "index", "savings"]
|
|
22
|
+
|
|
23
|
+
_SHA_256_REGEX = re.compile(r"^[a-f0-9]{64}$")
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
def _build_index(path: str, content: list[ContentType]) -> SembleIndex:
|
|
@@ -131,6 +138,35 @@ def _run_find_related(path: str, file_path: str, line: int, top_k: int, content:
|
|
|
131
138
|
_maybe_save_index(index, path)
|
|
132
139
|
|
|
133
140
|
|
|
141
|
+
def _run_clear(clear_type: _CLEAR_CHOICE) -> None:
|
|
142
|
+
"""Run the `clear` subcommand."""
|
|
143
|
+
cache_folder = resolve_cache_folder()
|
|
144
|
+
if clear_type == "index" or clear_type == "all":
|
|
145
|
+
indexes = []
|
|
146
|
+
for path in cache_folder.glob("*/index"):
|
|
147
|
+
if not _SHA_256_REGEX.match(path.parent.name):
|
|
148
|
+
continue
|
|
149
|
+
if PersistencePath.from_path(path).non_existing():
|
|
150
|
+
continue
|
|
151
|
+
indexes.append(path)
|
|
152
|
+
|
|
153
|
+
if not indexes:
|
|
154
|
+
print(f"No indexes found to clear in `{cache_folder}`")
|
|
155
|
+
else:
|
|
156
|
+
for path in indexes:
|
|
157
|
+
index_folder = path.parent
|
|
158
|
+
rmtree(index_folder)
|
|
159
|
+
print(f"Cleared index at `{index_folder}`")
|
|
160
|
+
|
|
161
|
+
if clear_type == "savings" or clear_type == "all":
|
|
162
|
+
path = cache_folder / "savings.jsonl"
|
|
163
|
+
if not path.exists():
|
|
164
|
+
print(f"No savings file found at `{path}`")
|
|
165
|
+
else:
|
|
166
|
+
path.unlink()
|
|
167
|
+
print(f"Cleared savings at `{path}`")
|
|
168
|
+
|
|
169
|
+
|
|
134
170
|
def _cli_main() -> None:
|
|
135
171
|
parser = argparse.ArgumentParser(prog="semble")
|
|
136
172
|
sub = parser.add_subparsers(dest="command")
|
|
@@ -141,6 +177,9 @@ def _cli_main() -> None:
|
|
|
141
177
|
search_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
|
|
142
178
|
_add_content_args(search_p)
|
|
143
179
|
|
|
180
|
+
clear_p = sub.add_parser("clear", help="Clear the index cache.")
|
|
181
|
+
clear_p.add_argument("type", choices=["all", "index", "savings"], help="Type of cache to clear.")
|
|
182
|
+
|
|
144
183
|
related_p = sub.add_parser("find-related", help="Find code similar to a specific location.")
|
|
145
184
|
related_p.add_argument("file_path", help="File path as shown in search results.")
|
|
146
185
|
related_p.add_argument("line", type=int, help="Line number (1-indexed).")
|
|
@@ -162,6 +201,8 @@ def _cli_main() -> None:
|
|
|
162
201
|
from semble.installer import run
|
|
163
202
|
|
|
164
203
|
run(args.command)
|
|
204
|
+
elif args.command == "clear":
|
|
205
|
+
_run_clear(args.type)
|
|
165
206
|
elif args.command == "search":
|
|
166
207
|
_run_search(args.path, args.query, args.top_k, _resolve_content(args.content, args.include_text_files))
|
|
167
208
|
elif args.command == "find-related":
|
|
@@ -5,13 +5,12 @@ import shutil
|
|
|
5
5
|
import sys
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import Literal
|
|
9
9
|
|
|
10
10
|
_HOME = Path.home()
|
|
11
11
|
|
|
12
12
|
Action = Literal["created", "updated", "unchanged", "not-found", "removed", "error", "skipped"]
|
|
13
13
|
Mode = Literal["install", "uninstall"]
|
|
14
|
-
PathResolver = Callable[[], Path]
|
|
15
14
|
|
|
16
15
|
SEMBLE_START = "<!-- SEMBLE_START -->"
|
|
17
16
|
SEMBLE_END = "<!-- SEMBLE_END -->"
|
|
@@ -39,7 +38,7 @@ _ZED_SERVER_CONFIG: dict[str, object] = { # Zed requires "source": "custom" for
|
|
|
39
38
|
"args": ["--from", "semble[mcp]", "semble"],
|
|
40
39
|
}
|
|
41
40
|
|
|
42
|
-
|
|
41
|
+
INSTRUCTIONS = f"""\
|
|
43
42
|
{SEMBLE_START}
|
|
44
43
|
## Semble Code Search
|
|
45
44
|
|
|
@@ -78,15 +77,11 @@ The index is built on first run and cached automatically. If `semble` is not on
|
|
|
78
77
|
class McpConfig:
|
|
79
78
|
"""MCP integration config for one agent."""
|
|
80
79
|
|
|
81
|
-
path: Path
|
|
80
|
+
path: Path
|
|
82
81
|
key: str
|
|
83
82
|
entry: dict[str, object]
|
|
84
83
|
format: Literal["json", "toml"] = "json"
|
|
85
84
|
|
|
86
|
-
def resolved_path(self) -> Path:
|
|
87
|
-
"""Return the resolved config path."""
|
|
88
|
-
return self.path() if callable(self.path) else self.path
|
|
89
|
-
|
|
90
85
|
|
|
91
86
|
@dataclass(frozen=True)
|
|
92
87
|
class WriteResult:
|
|
@@ -110,7 +105,7 @@ class AgentTarget:
|
|
|
110
105
|
|
|
111
106
|
def resolved_mcp_path(self) -> Path | None:
|
|
112
107
|
"""Return the resolved MCP config path, or None if MCP is unsupported."""
|
|
113
|
-
return self.mcp.
|
|
108
|
+
return self.mcp.path if self.mcp else None
|
|
114
109
|
|
|
115
110
|
|
|
116
111
|
def _opencode_mcp_path() -> Path:
|
|
@@ -175,7 +170,7 @@ AGENTS: list[AgentTarget] = [
|
|
|
175
170
|
display_name="Opencode",
|
|
176
171
|
binary="opencode",
|
|
177
172
|
config_dir=_HOME / ".config" / "opencode",
|
|
178
|
-
mcp=McpConfig(_opencode_mcp_path, "mcp", _OPENCODE_SERVER_CONFIG),
|
|
173
|
+
mcp=McpConfig(_opencode_mcp_path(), "mcp", _OPENCODE_SERVER_CONFIG),
|
|
179
174
|
instructions_path=_HOME / ".config" / "opencode" / "AGENTS.md",
|
|
180
175
|
subagent_path=_HOME / ".config" / "opencode" / "agents" / "semble-search.md",
|
|
181
176
|
),
|
|
@@ -201,7 +196,7 @@ AGENTS: list[AgentTarget] = [
|
|
|
201
196
|
display_name="VS Code",
|
|
202
197
|
binary="code",
|
|
203
198
|
config_dir=None,
|
|
204
|
-
mcp=McpConfig(_vscode_mcp_path, "servers", _STDIO_SERVER_CONFIG),
|
|
199
|
+
mcp=McpConfig(_vscode_mcp_path(), "servers", _STDIO_SERVER_CONFIG),
|
|
205
200
|
instructions_path=None,
|
|
206
201
|
),
|
|
207
202
|
AgentTarget(
|
|
@@ -220,6 +215,27 @@ AGENTS: list[AgentTarget] = [
|
|
|
220
215
|
mcp=McpConfig(_HOME / ".config" / "zed" / "settings.json", "context_servers", _ZED_SERVER_CONFIG),
|
|
221
216
|
instructions_path=None,
|
|
222
217
|
),
|
|
218
|
+
AgentTarget(
|
|
219
|
+
id="reasonix",
|
|
220
|
+
display_name="Reasonix",
|
|
221
|
+
binary="reasonix",
|
|
222
|
+
config_dir=_HOME / ".config" / "reasonix",
|
|
223
|
+
# ~/.reasonix/config.json is the legacy v0.x path still read by v1.x for backwards compat.
|
|
224
|
+
# The v1.x canonical config is ~/.config/reasonix/config.toml ([[plugins]]), but the JSON
|
|
225
|
+
# path requires no special TOML handling and works for new users who have never had v0.x.
|
|
226
|
+
mcp=McpConfig(_HOME / ".reasonix" / "config.json", "mcpServers", _BARE_STDIO_SERVER_CONFIG),
|
|
227
|
+
instructions_path=_HOME / ".config" / "reasonix" / "REASONIX.md",
|
|
228
|
+
subagent_path=_HOME / ".reasonix" / "skills" / "semble-search.md",
|
|
229
|
+
),
|
|
230
|
+
AgentTarget(
|
|
231
|
+
id="pi",
|
|
232
|
+
display_name="Pi",
|
|
233
|
+
binary="pi",
|
|
234
|
+
config_dir=_HOME / ".pi",
|
|
235
|
+
mcp=McpConfig(_HOME / ".pi" / "agent" / "mcp.json", "mcpServers", _BARE_STDIO_SERVER_CONFIG),
|
|
236
|
+
instructions_path=None,
|
|
237
|
+
subagent_path=_HOME / ".pi" / "agents" / "semble-search.md",
|
|
238
|
+
),
|
|
223
239
|
]
|
|
224
240
|
|
|
225
241
|
|
|
@@ -230,7 +230,7 @@ def _strip_toml_section(text: str, header: str) -> str:
|
|
|
230
230
|
return "".join(result)
|
|
231
231
|
|
|
232
232
|
|
|
233
|
-
def
|
|
233
|
+
def merge_toml_block(path: Path) -> Action:
|
|
234
234
|
"""Add (or refresh) the semble [mcp_servers.semble] table in a Codex config.toml as text."""
|
|
235
235
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
236
236
|
existed = path.exists()
|
|
@@ -242,7 +242,7 @@ def _merge_toml_block(path: Path) -> Action:
|
|
|
242
242
|
return "created" if not existed else "updated"
|
|
243
243
|
|
|
244
244
|
|
|
245
|
-
def
|
|
245
|
+
def remove_toml_block(path: Path) -> Action:
|
|
246
246
|
"""Remove the semble [mcp_servers.semble] table from a Codex config.toml, leaving the rest."""
|
|
247
247
|
if not path.exists():
|
|
248
248
|
return "not-found"
|
|
@@ -9,19 +9,19 @@ from typing import Callable, NoReturn, Sequence, TypeVar
|
|
|
9
9
|
import questionary
|
|
10
10
|
|
|
11
11
|
from semble.installer.agents import (
|
|
12
|
-
_INSTRUCTIONS,
|
|
13
12
|
AGENTS,
|
|
13
|
+
INSTRUCTIONS,
|
|
14
14
|
AgentTarget,
|
|
15
15
|
Mode,
|
|
16
16
|
WriteResult,
|
|
17
17
|
is_detected,
|
|
18
18
|
)
|
|
19
19
|
from semble.installer.config import (
|
|
20
|
-
_merge_toml_block,
|
|
21
|
-
_remove_toml_block,
|
|
22
20
|
merge_json_member,
|
|
21
|
+
merge_toml_block,
|
|
23
22
|
remove_json_member,
|
|
24
23
|
remove_marked,
|
|
24
|
+
remove_toml_block,
|
|
25
25
|
replace_or_append_marked,
|
|
26
26
|
)
|
|
27
27
|
|
|
@@ -51,14 +51,14 @@ class _Integration:
|
|
|
51
51
|
def merge_mcp(agent: AgentTarget) -> WriteResult:
|
|
52
52
|
"""Add the semble MCP entry to the agent's config."""
|
|
53
53
|
assert agent.mcp is not None
|
|
54
|
-
path = agent.mcp.
|
|
54
|
+
path = agent.mcp.path
|
|
55
55
|
return WriteResult(path, merge_json_member(path, agent.mcp.key, "semble", agent.mcp.entry))
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
def remove_mcp(agent: AgentTarget) -> WriteResult:
|
|
59
59
|
"""Remove the semble MCP entry from the agent's config."""
|
|
60
60
|
assert agent.mcp is not None
|
|
61
|
-
path = agent.mcp.
|
|
61
|
+
path = agent.mcp.path
|
|
62
62
|
return WriteResult(path, remove_json_member(path, agent.mcp.key, "semble"))
|
|
63
63
|
|
|
64
64
|
|
|
@@ -66,9 +66,9 @@ def _apply_mcp(agent: AgentTarget, mode: Mode) -> WriteResult | None:
|
|
|
66
66
|
"""Apply or remove the MCP server integration for one agent."""
|
|
67
67
|
if agent.mcp is None:
|
|
68
68
|
return None
|
|
69
|
-
path = agent.mcp.
|
|
69
|
+
path = agent.mcp.path
|
|
70
70
|
if agent.mcp.format == "toml":
|
|
71
|
-
return WriteResult(path,
|
|
71
|
+
return WriteResult(path, merge_toml_block(path) if mode == "install" else remove_toml_block(path))
|
|
72
72
|
return merge_mcp(agent) if mode == "install" else remove_mcp(agent)
|
|
73
73
|
|
|
74
74
|
|
|
@@ -77,7 +77,7 @@ def _apply_instructions(agent: AgentTarget, mode: Mode) -> WriteResult | None:
|
|
|
77
77
|
path = agent.instructions_path
|
|
78
78
|
if path is None:
|
|
79
79
|
return None
|
|
80
|
-
action = replace_or_append_marked(path,
|
|
80
|
+
action = replace_or_append_marked(path, INSTRUCTIONS) if mode == "install" else remove_marked(path)
|
|
81
81
|
return WriteResult(path, action)
|
|
82
82
|
|
|
83
83
|
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version_triple__ = (0, 3,
|
|
1
|
+
__version_triple__ = (0, 3, 3)
|
|
2
2
|
__version__ = ".".join(map(str, __version_triple__))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: semble
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Fast and Accurate Code Search for Agents
|
|
5
5
|
Author-email: Thomas van Dongen <thomasvdongen@proton.me>, Stéphan Tulkens <stephantul@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -98,7 +98,7 @@ Dynamic: license-file
|
|
|
98
98
|
|
|
99
99
|
</div>
|
|
100
100
|
|
|
101
|
-
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services.
|
|
101
|
+
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
|
|
102
102
|
|
|
103
103
|
## Quickstart
|
|
104
104
|
|
|
@@ -210,7 +210,12 @@ semble savings --verbose # also show breakdown by call type
|
|
|
210
210
|
|
|
211
211
|
Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
|
|
212
212
|
|
|
213
|
-
|
|
213
|
+
</details>
|
|
214
|
+
|
|
215
|
+
<details>
|
|
216
|
+
<summary>Storage</summary>
|
|
217
|
+
|
|
218
|
+
By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
|
|
214
219
|
|
|
215
220
|
</details>
|
|
216
221
|
|
|
@@ -34,6 +34,8 @@ src/semble/agents/cursor.md
|
|
|
34
34
|
src/semble/agents/gemini.md
|
|
35
35
|
src/semble/agents/kiro.md
|
|
36
36
|
src/semble/agents/opencode.md
|
|
37
|
+
src/semble/agents/pi.md
|
|
38
|
+
src/semble/agents/reasonix.md
|
|
37
39
|
src/semble/chunking/__init__.py
|
|
38
40
|
src/semble/chunking/chunking.py
|
|
39
41
|
src/semble/chunking/core.py
|
|
@@ -8,6 +8,7 @@ from unittest.mock import MagicMock, patch
|
|
|
8
8
|
import pytest
|
|
9
9
|
|
|
10
10
|
from semble.cache import (
|
|
11
|
+
_get_valid_user_cache_dir,
|
|
11
12
|
_linux_cache_dir,
|
|
12
13
|
_windows_cache_dir,
|
|
13
14
|
clear_cache,
|
|
@@ -81,18 +82,30 @@ def test_save_index_to_cache(tmp_path: Path) -> None:
|
|
|
81
82
|
[
|
|
82
83
|
("win32", "semble.cache._windows_cache_dir", Path("/win")),
|
|
83
84
|
("linux", "semble.cache._linux_cache_dir", Path("/linux")),
|
|
85
|
+
("darwin", "semble.cache._macos_cache_dir", Path("/macos")),
|
|
84
86
|
],
|
|
85
87
|
)
|
|
86
88
|
def test_resolve_cache_folder(platform: str, mock_target: str, expected: Path) -> None:
|
|
87
89
|
"""resolve_cache_folder calls the correct platform helper."""
|
|
88
|
-
with
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
with (
|
|
91
|
+
patch.object(sys, "platform", platform),
|
|
92
|
+
patch.dict("os.environ", {}, clear=True),
|
|
93
|
+
patch(mock_target, return_value=expected) as mock_fn,
|
|
94
|
+
patch("pathlib.Path.mkdir"),
|
|
95
|
+
):
|
|
96
|
+
result = resolve_cache_folder()
|
|
92
97
|
mock_fn.assert_called_once_with("semble")
|
|
93
98
|
assert result == expected
|
|
94
99
|
|
|
95
100
|
|
|
101
|
+
def test_get_valid_user_cache_dir_relative_path() -> None:
|
|
102
|
+
"""_get_valid_user_cache_dir returns None when SEMBLE_CACHE_LOCATION is a relative path."""
|
|
103
|
+
with patch.dict("os.environ", {"SEMBLE_CACHE_LOCATION": "relative/path"}):
|
|
104
|
+
with patch("semble.cache.logger") as mock_logger:
|
|
105
|
+
assert _get_valid_user_cache_dir() is None
|
|
106
|
+
mock_logger.warning.assert_called_once()
|
|
107
|
+
|
|
108
|
+
|
|
96
109
|
def test_resolve_cache_folder_semble_cache_location(tmp_path: Path) -> None:
|
|
97
110
|
"""SEMBLE_CACHE_LOCATION takes precedence over all platform-specific helpers."""
|
|
98
111
|
custom = tmp_path / "custom_cache"
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import sys
|
|
2
|
+
import warnings
|
|
2
3
|
from importlib.resources import files
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from unittest.mock import MagicMock, patch
|
|
5
6
|
|
|
6
7
|
import pytest
|
|
7
8
|
|
|
8
|
-
from semble.cli import _cli_main, _maybe_save_index, main
|
|
9
|
+
from semble.cli import _cli_main, _maybe_save_index, _run_clear, main
|
|
9
10
|
from semble.types import ContentType, SearchResult
|
|
10
11
|
from tests.conftest import make_chunk
|
|
11
12
|
|
|
@@ -172,8 +173,6 @@ def test_include_text_files_cli_deprecated(
|
|
|
172
173
|
capsys: pytest.CaptureFixture[str],
|
|
173
174
|
) -> None:
|
|
174
175
|
"""--include-text-files on CLI raises DeprecationWarning."""
|
|
175
|
-
import warnings
|
|
176
|
-
|
|
177
176
|
chunk = make_chunk("def foo(): pass", "src/foo.py")
|
|
178
177
|
fake_index = MagicMock()
|
|
179
178
|
fake_index.search.return_value = [SearchResult(chunk=chunk, score=0.9)]
|
|
@@ -229,3 +228,144 @@ def test_agent_file_tools_are_bash_only() -> None:
|
|
|
229
228
|
tools = [t.strip() for t in tools_line.removeprefix("tools:").split(",")]
|
|
230
229
|
assert set(tools) == {"Bash", "Read"}, f"Unexpected tools in agent file: {tools}"
|
|
231
230
|
assert not any("mcp__" in t for t in tools)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _make_valid_index_dir(cache_folder: Path, sha: str = "a" * 64) -> Path:
|
|
234
|
+
"""Create a fake valid index directory with the expected structure."""
|
|
235
|
+
index_dir = cache_folder / sha / "index"
|
|
236
|
+
index_dir.mkdir(parents=True)
|
|
237
|
+
# Create the files that PersistencePath.non_existing checks
|
|
238
|
+
(index_dir / "chunks.json").write_text("[]")
|
|
239
|
+
(index_dir / "bm25_index").write_text("")
|
|
240
|
+
(index_dir / "semantic_index").write_text("")
|
|
241
|
+
(index_dir / "metadata.json").write_text("{}")
|
|
242
|
+
return index_dir
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@pytest.mark.parametrize(
|
|
246
|
+
("scenario", "expected_in_output"),
|
|
247
|
+
[
|
|
248
|
+
("valid", ["Cleared index", "a" * 64, "b" * 64]),
|
|
249
|
+
("empty", ["No indexes found"]),
|
|
250
|
+
("non_sha", ["No indexes found"]),
|
|
251
|
+
("incomplete", ["No indexes found"]),
|
|
252
|
+
],
|
|
253
|
+
)
|
|
254
|
+
def test_run_clear_index(
|
|
255
|
+
scenario: str, expected_in_output: list[str], tmp_path: Path, capsys: pytest.CaptureFixture[str]
|
|
256
|
+
) -> None:
|
|
257
|
+
"""_run_clear('index') finds valid indexes, and skips non-SHA/incomplete/empty dirs."""
|
|
258
|
+
if scenario == "valid":
|
|
259
|
+
_make_valid_index_dir(tmp_path, "a" * 64)
|
|
260
|
+
_make_valid_index_dir(tmp_path, "b" * 64)
|
|
261
|
+
elif scenario == "non_sha":
|
|
262
|
+
bad_dir = tmp_path / "not-a-sha" / "index"
|
|
263
|
+
bad_dir.mkdir(parents=True)
|
|
264
|
+
(bad_dir / "chunks.json").write_text("[]")
|
|
265
|
+
(bad_dir / "bm25_index").write_text("")
|
|
266
|
+
(bad_dir / "semantic_index").write_text("")
|
|
267
|
+
(bad_dir / "metadata.json").write_text("{}")
|
|
268
|
+
elif scenario == "incomplete":
|
|
269
|
+
index_dir = tmp_path / ("c" * 64) / "index"
|
|
270
|
+
index_dir.mkdir(parents=True)
|
|
271
|
+
|
|
272
|
+
with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
|
|
273
|
+
_run_clear("index")
|
|
274
|
+
|
|
275
|
+
out = capsys.readouterr().out
|
|
276
|
+
for fragment in expected_in_output:
|
|
277
|
+
assert fragment in out
|
|
278
|
+
|
|
279
|
+
if scenario == "valid":
|
|
280
|
+
assert not (tmp_path / ("a" * 64)).exists()
|
|
281
|
+
assert not (tmp_path / ("b" * 64)).exists()
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
@pytest.mark.parametrize(
|
|
285
|
+
("create_file", "expected"),
|
|
286
|
+
[
|
|
287
|
+
(True, "Cleared savings"),
|
|
288
|
+
(False, "No savings file found"),
|
|
289
|
+
],
|
|
290
|
+
)
|
|
291
|
+
def test_run_clear_savings(
|
|
292
|
+
create_file: bool, expected: str, tmp_path: Path, capsys: pytest.CaptureFixture[str]
|
|
293
|
+
) -> None:
|
|
294
|
+
"""_run_clear('savings') deletes the file when present, reports missing otherwise."""
|
|
295
|
+
savings_file = tmp_path / "savings.jsonl"
|
|
296
|
+
if create_file:
|
|
297
|
+
savings_file.write_text('{"tokens": 100}\n')
|
|
298
|
+
|
|
299
|
+
with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
|
|
300
|
+
_run_clear("savings")
|
|
301
|
+
|
|
302
|
+
if create_file:
|
|
303
|
+
assert not savings_file.exists()
|
|
304
|
+
out = capsys.readouterr().out
|
|
305
|
+
assert expected in out
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
@pytest.mark.parametrize(
|
|
309
|
+
("populate", "expected_fragments"),
|
|
310
|
+
[
|
|
311
|
+
(True, ["Cleared index", "d" * 64, "Cleared savings"]),
|
|
312
|
+
(False, ["No indexes found", "No savings file found"]),
|
|
313
|
+
],
|
|
314
|
+
)
|
|
315
|
+
def test_run_clear_all(
|
|
316
|
+
populate: bool, expected_fragments: list[str], tmp_path: Path, capsys: pytest.CaptureFixture[str]
|
|
317
|
+
) -> None:
|
|
318
|
+
"""_run_clear('all') handles both indexes and savings."""
|
|
319
|
+
if populate:
|
|
320
|
+
_make_valid_index_dir(tmp_path, "d" * 64)
|
|
321
|
+
(tmp_path / "savings.jsonl").write_text('{"tokens": 50}\n')
|
|
322
|
+
|
|
323
|
+
with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
|
|
324
|
+
_run_clear("all")
|
|
325
|
+
|
|
326
|
+
out = capsys.readouterr().out
|
|
327
|
+
for fragment in expected_fragments:
|
|
328
|
+
assert fragment in out
|
|
329
|
+
|
|
330
|
+
if populate:
|
|
331
|
+
assert not (tmp_path / ("d" * 64)).exists()
|
|
332
|
+
assert not (tmp_path / "savings.jsonl").exists()
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@pytest.mark.parametrize(
|
|
336
|
+
("subcommand", "setup_index", "setup_savings", "expected_fragments"),
|
|
337
|
+
[
|
|
338
|
+
("index", True, False, ["Cleared index", "e" * 64]),
|
|
339
|
+
("savings", False, True, ["Cleared savings"]),
|
|
340
|
+
("all", True, True, ["Cleared index", "Cleared savings"]),
|
|
341
|
+
],
|
|
342
|
+
)
|
|
343
|
+
def test_cli_clear_command(
|
|
344
|
+
subcommand: str,
|
|
345
|
+
setup_index: bool,
|
|
346
|
+
setup_savings: bool,
|
|
347
|
+
expected_fragments: list[str],
|
|
348
|
+
tmp_path: Path,
|
|
349
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
350
|
+
capsys: pytest.CaptureFixture[str],
|
|
351
|
+
) -> None:
|
|
352
|
+
"""The `semble clear <subcommand>` CLI dispatches to _run_clear correctly."""
|
|
353
|
+
sha = "e" * 64
|
|
354
|
+
if setup_index:
|
|
355
|
+
_make_valid_index_dir(tmp_path, sha)
|
|
356
|
+
savings_file = tmp_path / "savings.jsonl"
|
|
357
|
+
if setup_savings:
|
|
358
|
+
savings_file.write_text('{"tokens": 200}\n')
|
|
359
|
+
|
|
360
|
+
monkeypatch.setattr(sys, "argv", ["semble", "clear", subcommand])
|
|
361
|
+
with patch("semble.cli.resolve_cache_folder", return_value=tmp_path):
|
|
362
|
+
_cli_main()
|
|
363
|
+
|
|
364
|
+
out = capsys.readouterr().out
|
|
365
|
+
for fragment in expected_fragments:
|
|
366
|
+
assert fragment in out
|
|
367
|
+
|
|
368
|
+
if setup_index:
|
|
369
|
+
assert not (tmp_path / sha).exists()
|
|
370
|
+
if setup_savings:
|
|
371
|
+
assert not savings_file.exists()
|
|
@@ -16,9 +16,9 @@ from semble.installer.agents import (
|
|
|
16
16
|
)
|
|
17
17
|
from semble.installer.config import (
|
|
18
18
|
_CODEX_MCP_HEADER,
|
|
19
|
-
|
|
20
|
-
_remove_toml_block,
|
|
19
|
+
merge_toml_block,
|
|
21
20
|
remove_marked,
|
|
21
|
+
remove_toml_block,
|
|
22
22
|
replace_or_append_marked,
|
|
23
23
|
)
|
|
24
24
|
from semble.installer.installer import (
|
|
@@ -125,7 +125,13 @@ def test_merge_mcp_errors(claude_agent, content):
|
|
|
125
125
|
|
|
126
126
|
@pytest.mark.parametrize(
|
|
127
127
|
("agent_id", "key"),
|
|
128
|
-
[
|
|
128
|
+
[
|
|
129
|
+
("zed", "context_servers"),
|
|
130
|
+
("windsurf", "mcpServers"),
|
|
131
|
+
("copilot", "mcpServers"),
|
|
132
|
+
("reasonix", "mcpServers"),
|
|
133
|
+
("pi", "mcpServers"),
|
|
134
|
+
],
|
|
129
135
|
)
|
|
130
136
|
def test_merge_mcp_writes_under_agent_key(tmp_path, agent_id, key):
|
|
131
137
|
"""merge_mcp writes the semble entry under each agent's own top-level MCP key."""
|
|
@@ -206,14 +212,14 @@ def test_codex_toml_merge_and_remove(tmp_path):
|
|
|
206
212
|
"""The Codex TOML helpers add/remove [mcp_servers.semble] while preserving other tables and keys."""
|
|
207
213
|
f = tmp_path / "config.toml"
|
|
208
214
|
f.write_text('model = "gpt-5"\n\n[mcp_servers.other]\ncommand = "x"\n')
|
|
209
|
-
assert
|
|
215
|
+
assert merge_toml_block(f) == "updated"
|
|
210
216
|
text = f.read_text()
|
|
211
217
|
assert _CODEX_MCP_HEADER in text
|
|
212
218
|
assert 'model = "gpt-5"' in text
|
|
213
219
|
assert "[mcp_servers.other]" in text
|
|
214
|
-
assert
|
|
220
|
+
assert merge_toml_block(f) == "unchanged" # idempotent
|
|
215
221
|
|
|
216
|
-
assert
|
|
222
|
+
assert remove_toml_block(f) == "removed"
|
|
217
223
|
text = f.read_text()
|
|
218
224
|
assert _CODEX_MCP_HEADER not in text
|
|
219
225
|
assert "[mcp_servers.other]" in text # only the semble table is removed
|
|
@@ -223,7 +229,7 @@ def test_codex_toml_merge_replaces_section_with_inline_comment(tmp_path):
|
|
|
223
229
|
"""_merge_toml_block replaces an existing semble table even when the header has a trailing comment."""
|
|
224
230
|
f = tmp_path / "config.toml"
|
|
225
231
|
f.write_text('[mcp_servers.semble] # added manually\ncommand = "old"\n')
|
|
226
|
-
assert
|
|
232
|
+
assert merge_toml_block(f) == "updated"
|
|
227
233
|
text = f.read_text()
|
|
228
234
|
assert text.count("[mcp_servers.semble]") == 1
|
|
229
235
|
|
|
@@ -237,14 +243,14 @@ def test_remove_toml_not_found(tmp_path, setup, expected):
|
|
|
237
243
|
f = tmp_path / "config.toml"
|
|
238
244
|
if setup is not None:
|
|
239
245
|
f.write_text(setup)
|
|
240
|
-
assert
|
|
246
|
+
assert remove_toml_block(f) == expected
|
|
241
247
|
|
|
242
248
|
|
|
243
249
|
def test_remove_toml_deletes_file_when_only_semble(tmp_path):
|
|
244
250
|
"""_remove_toml_block unlinks the file when removing semble leaves it empty."""
|
|
245
251
|
f = tmp_path / "config.toml"
|
|
246
|
-
|
|
247
|
-
|
|
252
|
+
merge_toml_block(f)
|
|
253
|
+
remove_toml_block(f)
|
|
248
254
|
assert not f.exists()
|
|
249
255
|
|
|
250
256
|
|
|
@@ -265,7 +271,7 @@ def test_remove_toml_strips_sub_tables(tmp_path, content):
|
|
|
265
271
|
"""_remove_toml_block removes sub-tables like [mcp_servers.semble.tools.search], before or after the main header."""
|
|
266
272
|
f = tmp_path / "config.toml"
|
|
267
273
|
f.write_text(content)
|
|
268
|
-
assert
|
|
274
|
+
assert remove_toml_block(f) == "removed"
|
|
269
275
|
text = f.read_text()
|
|
270
276
|
assert "[mcp_servers.semble]" not in text
|
|
271
277
|
assert "[mcp_servers.semble.tools.search]" not in text
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|