semble 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {semble-0.3.0 → semble-0.3.2}/PKG-INFO +36 -329
- {semble-0.3.0 → semble-0.3.2}/README.md +34 -328
- semble-0.3.2/docs/installation.md +262 -0
- {semble-0.3.0 → semble-0.3.2}/pyproject.toml +3 -1
- {semble-0.3.0 → semble-0.3.2}/src/semble/cache.py +4 -2
- {semble-0.3.0 → semble-0.3.2}/src/semble/cli.py +9 -46
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/index.py +2 -2
- semble-0.3.2/src/semble/installer/__init__.py +3 -0
- semble-0.3.2/src/semble/installer/agents.py +230 -0
- semble-0.3.2/src/semble/installer/config.py +257 -0
- semble-0.3.2/src/semble/installer/installer.py +207 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/version.py +1 -1
- {semble-0.3.0 → semble-0.3.2}/src/semble.egg-info/PKG-INFO +36 -329
- {semble-0.3.0 → semble-0.3.2}/src/semble.egg-info/SOURCES.txt +6 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble.egg-info/requires.txt +1 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_cache.py +9 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_cli.py +1 -48
- semble-0.3.2/tests/test_installer.py +453 -0
- {semble-0.3.0 → semble-0.3.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/.github/workflows/ci.yaml +0 -0
- {semble-0.3.0 → semble-0.3.2}/.github/workflows/release.yaml +0 -0
- {semble-0.3.0 → semble-0.3.2}/.gitignore +0 -0
- {semble-0.3.0 → semble-0.3.2}/.pre-commit-config.yaml +0 -0
- {semble-0.3.0 → semble-0.3.2}/CITATION.cff +0 -0
- {semble-0.3.0 → semble-0.3.2}/CONTRIBUTING.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/LICENSE +0 -0
- {semble-0.3.0 → semble-0.3.2}/MANIFEST.in +0 -0
- {semble-0.3.0 → semble-0.3.2}/Makefile +0 -0
- {semble-0.3.0 → semble-0.3.2}/setup.cfg +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/__init__.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/agents/claude.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/agents/copilot.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/agents/cursor.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/agents/gemini.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/agents/kiro.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/agents/opencode.md +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/chunking/__init__.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/chunking/chunking.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/chunking/core.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/__init__.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/create.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/dense.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/file_walker.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/files.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/sparse.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/index/types.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/mcp.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/py.typed +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/ranking/__init__.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/ranking/boosting.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/ranking/penalties.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/ranking/weighting.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/search.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/stats.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/tokens.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/types.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble/utils.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble.egg-info/dependency_links.txt +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble.egg-info/entry_points.txt +0 -0
- {semble-0.3.0 → semble-0.3.2}/src/semble.egg-info/top_level.txt +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/__init__.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/conftest.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/index/test_dense.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/index/test_index.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_chunker.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_file_walker.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_files.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_git.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_mcp.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_ranking.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_search.py +0 -0
- {semble-0.3.0 → semble-0.3.2}/tests/test_stats.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: semble
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Fast and Accurate Code Search for Agents
|
|
5
5
|
Author-email: Thomas van Dongen <thomasvdongen@proton.me>, Stéphan Tulkens <stephantul@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -52,6 +52,7 @@ Requires-Dist: pathspec>=0.12
|
|
|
52
52
|
Requires-Dist: tree-sitter<0.26,>=0.25
|
|
53
53
|
Requires-Dist: tree-sitter-language-pack!=1.6.3,<1.8.0,>=1.0
|
|
54
54
|
Requires-Dist: orjson
|
|
55
|
+
Requires-Dist: questionary<3.0,>=2.0
|
|
55
56
|
Provides-Extra: mcp
|
|
56
57
|
Requires-Dist: mcp<2.0,>=1.0; extra == "mcp"
|
|
57
58
|
Requires-Dist: watchfiles>=0.21; extra == "mcp"
|
|
@@ -71,7 +72,6 @@ Requires-Dist: pydoclint>=0.5.3; extra == "dev"
|
|
|
71
72
|
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
72
73
|
Dynamic: license-file
|
|
73
74
|
|
|
74
|
-
|
|
75
75
|
<h2 align="center">
|
|
76
76
|
<img width="30%" alt="semble logo" src="https://raw.githubusercontent.com/MinishLab/semble/main/assets/images/semble_logo.png"><br/>
|
|
77
77
|
Fast and Accurate Code Search for Agents<br/>
|
|
@@ -90,11 +90,12 @@ Dynamic: license-file
|
|
|
90
90
|
</h2>
|
|
91
91
|
|
|
92
92
|
[Quickstart](#quickstart) •
|
|
93
|
-
[MCP Server](#mcp-server) •
|
|
94
|
-
[AGENTS.md](#agentsmd) •
|
|
95
93
|
[CLI](#cli) •
|
|
94
|
+
[MCP Server](#mcp-server) •
|
|
95
|
+
[Installation](docs/installation.md) •
|
|
96
96
|
[Benchmarks](#benchmarks)
|
|
97
97
|
|
|
98
|
+
|
|
98
99
|
</div>
|
|
99
100
|
|
|
100
101
|
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Run it as an [MCP server](#mcp-server) or call it from the shell via [AGENTS.md](#agentsmd) and any agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
|
|
@@ -103,93 +104,29 @@ Semble is a code search library built for agents. It returns the exact code snip
|
|
|
103
104
|
|
|
104
105
|
Your agent queries Semble in natural language (e.g. `"How is authentication handled?"`) and gets back only the relevant code snippets, without grepping or reading full files.
|
|
105
106
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
- **[MCP server](#mcp-server)**: an MCP server for your agent.
|
|
109
|
-
- **[AGENTS.md](#agentsmd)**: an AGENTS.md snippet with instructions for calling Semble via the CLI.
|
|
110
|
-
- **[Sub-agent](#sub-agent-setup)**: a dedicated `semble-search` sub-agent for harnesses that support it.
|
|
111
|
-
|
|
112
|
-
### MCP
|
|
113
|
-
|
|
114
|
-
Expose Semble as a native tool via MCP so your agent can call it directly. Add it to Claude Code (requires [uv](https://docs.astral.sh/uv/getting-started/installation/)):
|
|
107
|
+
The fastest way to get started is the interactive installer. Install [uv](https://docs.astral.sh/uv/getting-started/installation/), then run:
|
|
115
108
|
|
|
116
109
|
```bash
|
|
117
|
-
|
|
110
|
+
uv tool install semble
|
|
111
|
+
semble install
|
|
118
112
|
```
|
|
119
113
|
|
|
120
|
-
|
|
114
|
+
`semble install` detects installed coding agents such as Claude Code, Codex, and OpenCode, and then lets you choose which integrations to enable:
|
|
121
115
|
|
|
122
|
-
|
|
116
|
+
- **MCP server**: lets the agent call Semble directly as a tool.
|
|
117
|
+
- **Instructions**: adds CLI usage guidance to AGENTS.md / CLAUDE.md.
|
|
118
|
+
- **Sub-agent**: installs a dedicated `semble-search` sub-agent.
|
|
123
119
|
|
|
124
|
-
|
|
120
|
+
To undo the setup, run `semble uninstall`.
|
|
125
121
|
|
|
126
|
-
|
|
127
|
-
uv tool install semble # Install with uv (recommended)
|
|
128
|
-
pip install semble # Or with pip
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
<details>
|
|
132
|
-
<summary>AGENTS.md / CLAUDE.md snippet</summary>
|
|
133
|
-
|
|
134
|
-
```markdown
|
|
135
|
-
## Code Search
|
|
136
|
-
|
|
137
|
-
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
138
|
-
|
|
139
|
-
```bash
|
|
140
|
-
semble search "authentication flow" ./my-project
|
|
141
|
-
semble search "save_pretrained" ./my-project
|
|
142
|
-
semble search "save model to disk" ./my-project --top-k 10
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
The index is built on first run (and cached for subsequent runs) and invalidated automatically when files change.
|
|
146
|
-
|
|
147
|
-
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
148
|
-
|
|
149
|
-
```bash
|
|
150
|
-
semble search "deployment guide" ./my-project --content docs
|
|
151
|
-
semble search "database host port" ./my-project --content config
|
|
152
|
-
semble search "authentication" ./my-project --content all
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
156
|
-
|
|
157
|
-
```bash
|
|
158
|
-
semble find-related src/auth.py 42 ./my-project
|
|
159
|
-
```
|
|
160
|
-
|
|
161
|
-
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
162
|
-
|
|
163
|
-
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
164
|
-
|
|
165
|
-
### Workflow
|
|
166
|
-
|
|
167
|
-
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
168
|
-
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
169
|
-
3. Inspect full files only when the returned chunk does not give enough context.
|
|
170
|
-
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
171
|
-
5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
</details>
|
|
175
|
-
|
|
176
|
-
### Sub-agent
|
|
177
|
-
|
|
178
|
-
For harnesses that support sub-agents, install a dedicated `semble-search` sub-agent so search runs in its own context (requires the CLI):
|
|
179
|
-
|
|
180
|
-
```bash
|
|
181
|
-
semble init # Claude Code → .claude/agents/semble-search.md
|
|
182
|
-
```
|
|
183
|
-
|
|
184
|
-
See [Sub-agent setup](#sub-agent-setup) below for other harnesses (Cursor, Codex, OpenCode, etc.).
|
|
122
|
+
For manual setup instructions (MCP config per agent, AGENTS.md snippet, sub-agent files), see the [installation docs](docs/installation.md).
|
|
185
123
|
|
|
186
124
|
<details>
|
|
187
125
|
<summary>Updating Semble</summary>
|
|
188
126
|
|
|
189
127
|
```bash
|
|
190
|
-
uv tool upgrade semble
|
|
191
|
-
uv cache clean semble
|
|
192
|
-
pip install --upgrade semble # with pip
|
|
128
|
+
uv tool upgrade semble # upgrade
|
|
129
|
+
uv cache clean semble # for MCP users (restart your MCP client after)
|
|
193
130
|
```
|
|
194
131
|
|
|
195
132
|
</details>
|
|
@@ -203,257 +140,9 @@ pip install --upgrade semble # with pip
|
|
|
203
140
|
- **MCP server**: works with Claude Code, Cursor, Codex, OpenCode, VS Code, and any other MCP-compatible agent.
|
|
204
141
|
- **Local and remote**: pass a local path or a git URL.
|
|
205
142
|
|
|
206
|
-
## MCP Server
|
|
207
|
-
|
|
208
|
-
Semble can run as an MCP server so agents can search any codebase directly. Repos are cloned and indexed on demand, and indexes are cached for the lifetime of the session. Local paths are watched for file changes and re-indexed automatically.
|
|
209
|
-
|
|
210
|
-
### Setup
|
|
211
|
-
|
|
212
|
-
> Requires [uv](https://docs.astral.sh/uv/getting-started/installation/) to be installed.
|
|
213
|
-
|
|
214
|
-
<details>
|
|
215
|
-
<summary>Claude Code</summary>
|
|
216
|
-
|
|
217
|
-
```bash
|
|
218
|
-
claude mcp add semble -s user -- uvx --from "semble[mcp]" semble
|
|
219
|
-
```
|
|
220
|
-
|
|
221
|
-
</details>
|
|
222
|
-
|
|
223
|
-
<details>
|
|
224
|
-
<summary>Cursor</summary>
|
|
225
|
-
|
|
226
|
-
Add to `~/.cursor/mcp.json` (or `.cursor/mcp.json` in your project):
|
|
227
|
-
|
|
228
|
-
```json
|
|
229
|
-
{
|
|
230
|
-
"mcpServers": {
|
|
231
|
-
"semble": {
|
|
232
|
-
"command": "uvx",
|
|
233
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
```
|
|
238
|
-
|
|
239
|
-
</details>
|
|
240
|
-
|
|
241
|
-
<details>
|
|
242
|
-
<summary>Codex</summary>
|
|
243
|
-
|
|
244
|
-
Add to `~/.codex/config.toml`:
|
|
245
|
-
|
|
246
|
-
```toml
|
|
247
|
-
[mcp_servers.semble]
|
|
248
|
-
command = "uvx"
|
|
249
|
-
args = ["--from", "semble[mcp]", "semble"]
|
|
250
|
-
```
|
|
251
|
-
|
|
252
|
-
</details>
|
|
253
|
-
|
|
254
|
-
<details>
|
|
255
|
-
<summary>OpenCode</summary>
|
|
256
|
-
|
|
257
|
-
Add to `~/.opencode/config.json`:
|
|
258
|
-
|
|
259
|
-
```json
|
|
260
|
-
{
|
|
261
|
-
"mcp": {
|
|
262
|
-
"semble": {
|
|
263
|
-
"type": "local",
|
|
264
|
-
"command": ["uvx", "--from", "semble[mcp]", "semble"]
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
```
|
|
269
|
-
|
|
270
|
-
</details>
|
|
271
|
-
|
|
272
|
-
<details>
|
|
273
|
-
<summary>VS Code</summary>
|
|
274
|
-
|
|
275
|
-
Add to `.vscode/mcp.json` in your project (or your user profile's `mcp.json`):
|
|
276
|
-
|
|
277
|
-
```json
|
|
278
|
-
{
|
|
279
|
-
"servers": {
|
|
280
|
-
"semble": {
|
|
281
|
-
"command": "uvx",
|
|
282
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
```
|
|
287
|
-
|
|
288
|
-
</details>
|
|
289
|
-
|
|
290
|
-
<details>
|
|
291
|
-
<summary>GitHub Copilot CLI</summary>
|
|
292
|
-
|
|
293
|
-
Add to `~/.copilot/mcp-config.json`:
|
|
294
|
-
|
|
295
|
-
```json
|
|
296
|
-
{
|
|
297
|
-
"mcpServers": {
|
|
298
|
-
"semble": {
|
|
299
|
-
"command": "uvx",
|
|
300
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
</details>
|
|
307
|
-
|
|
308
|
-
<details>
|
|
309
|
-
<summary>Windsurf</summary>
|
|
310
|
-
|
|
311
|
-
Add to `~/.codeium/windsurf/mcp_config.json`:
|
|
312
|
-
|
|
313
|
-
```json
|
|
314
|
-
{
|
|
315
|
-
"mcpServers": {
|
|
316
|
-
"semble": {
|
|
317
|
-
"command": "uvx",
|
|
318
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
</details>
|
|
325
|
-
|
|
326
|
-
<details>
|
|
327
|
-
<summary>Gemini CLI</summary>
|
|
328
|
-
|
|
329
|
-
Add to `~/.gemini/settings.json`:
|
|
330
|
-
|
|
331
|
-
```json
|
|
332
|
-
{
|
|
333
|
-
"mcpServers": {
|
|
334
|
-
"semble": {
|
|
335
|
-
"command": "uvx",
|
|
336
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
</details>
|
|
343
|
-
|
|
344
|
-
<details>
|
|
345
|
-
<summary>Kiro</summary>
|
|
346
|
-
|
|
347
|
-
Add to `~/.kiro/settings/mcp.json` (or `.kiro/settings/mcp.json` in your project):
|
|
348
|
-
|
|
349
|
-
```json
|
|
350
|
-
{
|
|
351
|
-
"mcpServers": {
|
|
352
|
-
"semble": {
|
|
353
|
-
"command": "uvx",
|
|
354
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
}
|
|
358
|
-
```
|
|
359
|
-
|
|
360
|
-
</details>
|
|
361
|
-
|
|
362
|
-
<details>
|
|
363
|
-
<summary>Zed</summary>
|
|
364
|
-
|
|
365
|
-
Add to `~/.config/zed/settings.json` (or `.zed/settings.json` in your project):
|
|
366
|
-
|
|
367
|
-
```json
|
|
368
|
-
{
|
|
369
|
-
"context_servers": {
|
|
370
|
-
"semble": {
|
|
371
|
-
"command": "uvx",
|
|
372
|
-
"args": ["--from", "semble[mcp]", "semble"]
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
```
|
|
377
|
-
|
|
378
|
-
</details>
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
### Tools
|
|
382
|
-
|
|
383
|
-
| Tool | Description |
|
|
384
|
-
|------|-------------|
|
|
385
|
-
| `search` | Search a codebase with a natural-language or code query. Pass `repo` as a local directory path or an https:// git URL. |
|
|
386
|
-
| `find_related` | Given a file path and line number, return chunks semantically similar to the code at that location. |
|
|
387
|
-
|
|
388
|
-
By default the MCP server indexes only code files. To also index documentation, config, or everything, append `--content docs`, `--content config`, or `--content all` to the server command, or a combination, e.g. `--content code docs`. For example, in Claude Code: `claude mcp add semble -s user -- uvx --from "semble[mcp]" semble --content all`.
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
<a id="bash-agentsmd"></a>
|
|
392
|
-
|
|
393
|
-
## Bash / AGENTS.md
|
|
394
|
-
|
|
395
|
-
An alternative to MCP is to invoke Semble via Bash. Sub-agents cannot call MCP tools directly, so this is the only option for sub-agent support; it can also be used alongside MCP for the top-level agent.
|
|
396
|
-
|
|
397
|
-
To add Bash support, append the following to your `AGENTS.md`, `CLAUDE.md`, `GEMINI.md`, or equivalent:
|
|
398
|
-
|
|
399
|
-
```markdown
|
|
400
|
-
## Code Search
|
|
401
|
-
|
|
402
|
-
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
403
|
-
|
|
404
|
-
```bash
|
|
405
|
-
semble search "authentication flow" ./my-project
|
|
406
|
-
semble search "save_pretrained" ./my-project
|
|
407
|
-
semble search "save model to disk" ./my-project --top-k 10
|
|
408
|
-
```
|
|
409
|
-
|
|
410
|
-
The index is built on first run (and cached for subsequent runs) and invalidated automatically when files change.
|
|
411
|
-
|
|
412
|
-
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
413
|
-
|
|
414
|
-
```bash
|
|
415
|
-
semble search "deployment guide" ./my-project --content docs
|
|
416
|
-
semble search "database host port" ./my-project --content config
|
|
417
|
-
semble search "authentication" ./my-project --content all
|
|
418
|
-
```
|
|
419
|
-
|
|
420
|
-
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
421
|
-
|
|
422
|
-
```bash
|
|
423
|
-
semble find-related src/auth.py 42 ./my-project
|
|
424
|
-
```
|
|
425
|
-
|
|
426
|
-
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
427
|
-
|
|
428
|
-
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
429
|
-
|
|
430
|
-
### Workflow
|
|
431
|
-
|
|
432
|
-
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
433
|
-
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
434
|
-
3. Inspect full files only when the returned chunk does not give enough context.
|
|
435
|
-
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
436
|
-
5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
437
|
-
```
|
|
438
|
-
|
|
439
|
-
### Sub-agent setup
|
|
440
|
-
|
|
441
|
-
Claude Code, Gemini CLI, Cursor, OpenCode, GitHub Copilot CLI, and Kiro all support a dedicated semble search sub-agent. Run `semble init` once in your project root:
|
|
442
|
-
|
|
443
|
-
```bash
|
|
444
|
-
semble init # Claude Code → .claude/agents/semble-search.md
|
|
445
|
-
semble init --agent gemini # Gemini CLI → .gemini/agents/semble-search.md
|
|
446
|
-
semble init --agent cursor # Cursor → .cursor/agents/semble-search.md
|
|
447
|
-
semble init --agent opencode # OpenCode → .opencode/agents/semble-search.md
|
|
448
|
-
semble init --agent copilot # Copilot CLI → .github/agents/semble-search.md
|
|
449
|
-
semble init --agent kiro # Kiro → .kiro/agents/semble-search.md
|
|
450
|
-
```
|
|
451
|
-
|
|
452
|
-
If semble is not on `$PATH`, prefix the command with `uvx --from "semble[mcp]"`.
|
|
453
|
-
|
|
454
143
|
## CLI
|
|
455
144
|
|
|
456
|
-
Semble also ships as a standalone CLI. This is useful in scripts or anywhere you want search results without an MCP session.
|
|
145
|
+
Semble also ships as a standalone CLI. This is useful in scripts or anywhere you want search results without an MCP session. Indexes are built and cached on first run, and invalidated automatically when files change.
|
|
457
146
|
|
|
458
147
|
```bash
|
|
459
148
|
# Search a local repo (index is built and cached automatically)
|
|
@@ -521,7 +210,7 @@ semble savings --verbose # also show breakdown by call type
|
|
|
521
210
|
|
|
522
211
|
Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
|
|
523
212
|
|
|
524
|
-
|
|
213
|
+
By default, stats are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. 'd:\caches\storemysemblecachehere'.
|
|
525
214
|
|
|
526
215
|
</details>
|
|
527
216
|
|
|
@@ -564,6 +253,18 @@ result.chunk.content # "def save_pretrained(self, path: PathLike, ..."
|
|
|
564
253
|
|
|
565
254
|
</details>
|
|
566
255
|
|
|
256
|
+
## MCP Server
|
|
257
|
+
|
|
258
|
+
Semble runs as an MCP server so agents can search any codebase directly as a native tool call. Repos are indexed on demand and cached; local paths are re-indexed automatically on file changes.
|
|
259
|
+
|
|
260
|
+
| Tool | Description |
|
|
261
|
+
|------|-------------|
|
|
262
|
+
| `search` | Search a codebase with a natural-language or code query. Pass `repo` as a local path or an https:// git URL. |
|
|
263
|
+
| `find_related` | Given a file path and line number, return chunks semantically similar to the code at that location. |
|
|
264
|
+
|
|
265
|
+
For per-agent setup instructions, see the [installation docs](docs/installation.md#mcp-server).
|
|
266
|
+
|
|
267
|
+
|
|
567
268
|
## Benchmarks
|
|
568
269
|
|
|
569
270
|
We benchmark quality and speed across ~1,250 queries over 63 repositories in 19 languages (left), and token efficiency against grep+read at equivalent recall levels (right).
|
|
@@ -596,6 +297,12 @@ After fusing, results are reranked with a set of code-aware signals:
|
|
|
596
297
|
|
|
597
298
|
Because the embedding model is static with no transformer forward pass at query time, all of this runs in milliseconds on CPU.
|
|
598
299
|
|
|
300
|
+
Indexes are cached to disk automatically on the first search. On subsequent runs, Semble walks the file tree and compares modification times; if any file was added, removed, or changed, the index is fully rebuilt. In MCP mode, a file watcher detects changes and triggers a rebuild automatically so the index is always current within the same session.
|
|
301
|
+
|
|
302
|
+
## Acknowledgements
|
|
303
|
+
|
|
304
|
+
Thanks to [Greptile](https://greptile.com) for providing free access to their AI code review platform.
|
|
305
|
+
|
|
599
306
|
## License
|
|
600
307
|
|
|
601
308
|
MIT
|