semble 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {semble-0.3.2 → semble-0.3.4}/PKG-INFO +31 -11
- {semble-0.3.2 → semble-0.3.4}/README.md +30 -10
- {semble-0.3.2 → semble-0.3.4}/docs/installation.md +94 -2
- semble-0.3.4/src/semble/agents/commandcode.md +41 -0
- semble-0.3.4/src/semble/agents/gemini.md +43 -0
- semble-0.3.4/src/semble/agents/pi.md +40 -0
- semble-0.3.4/src/semble/agents/reasonix.md +42 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/cache.py +18 -2
- {semble-0.3.2 → semble-0.3.4}/src/semble/cli.py +45 -5
- {semble-0.3.2 → semble-0.3.4}/src/semble/installer/agents.py +45 -11
- {semble-0.3.2 → semble-0.3.4}/src/semble/installer/config.py +2 -2
- {semble-0.3.2 → semble-0.3.4}/src/semble/installer/installer.py +9 -9
- semble-0.3.4/src/semble/stats.py +234 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/version.py +1 -1
- {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/PKG-INFO +31 -11
- {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/SOURCES.txt +4 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_cache.py +17 -4
- {semble-0.3.2 → semble-0.3.4}/tests/test_cli.py +143 -3
- {semble-0.3.2 → semble-0.3.4}/tests/test_installer.py +19 -11
- {semble-0.3.2 → semble-0.3.4}/tests/test_stats.py +19 -39
- semble-0.3.2/src/semble/stats.py +0 -148
- {semble-0.3.2 → semble-0.3.4}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/.github/workflows/ci.yaml +0 -0
- {semble-0.3.2 → semble-0.3.4}/.github/workflows/release.yaml +0 -0
- {semble-0.3.2 → semble-0.3.4}/.gitignore +0 -0
- {semble-0.3.2 → semble-0.3.4}/.pre-commit-config.yaml +0 -0
- {semble-0.3.2 → semble-0.3.4}/CITATION.cff +0 -0
- {semble-0.3.2 → semble-0.3.4}/CONTRIBUTING.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/LICENSE +0 -0
- {semble-0.3.2 → semble-0.3.4}/MANIFEST.in +0 -0
- {semble-0.3.2 → semble-0.3.4}/Makefile +0 -0
- {semble-0.3.2 → semble-0.3.4}/pyproject.toml +0 -0
- {semble-0.3.2 → semble-0.3.4}/setup.cfg +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/__init__.py +0 -0
- /semble-0.3.2/src/semble/agents/gemini.md → /semble-0.3.4/src/semble/agents/antigravity.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/agents/claude.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/agents/copilot.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/agents/cursor.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/agents/kiro.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/agents/opencode.md +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/chunking/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/chunking/chunking.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/chunking/core.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/create.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/dense.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/file_walker.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/files.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/index.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/sparse.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/index/types.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/installer/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/mcp.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/py.typed +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/boosting.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/penalties.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/weighting.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/search.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/tokens.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/types.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble/utils.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/dependency_links.txt +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/entry_points.txt +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/requires.txt +0 -0
- {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/top_level.txt +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/__init__.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/conftest.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/index/test_dense.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/index/test_index.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_chunker.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_file_walker.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_files.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_git.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_mcp.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_ranking.py +0 -0
- {semble-0.3.2 → semble-0.3.4}/tests/test_search.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: semble
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Fast and Accurate Code Search for Agents
|
|
5
5
|
Author-email: Thomas van Dongen <thomasvdongen@proton.me>, Stéphan Tulkens <stephantul@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -98,7 +98,7 @@ Dynamic: license-file
|
|
|
98
98
|
|
|
99
99
|
</div>
|
|
100
100
|
|
|
101
|
-
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services.
|
|
101
|
+
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
|
|
102
102
|
|
|
103
103
|
## Quickstart
|
|
104
104
|
|
|
@@ -194,23 +194,43 @@ Semble also always skips a set of well-known non-source directories regardless o
|
|
|
194
194
|
`semble savings` shows how many tokens semble has saved across all your searches:
|
|
195
195
|
|
|
196
196
|
```bash
|
|
197
|
-
semble savings
|
|
198
|
-
semble savings --verbose # also show breakdown by call type
|
|
197
|
+
semble savings
|
|
199
198
|
```
|
|
200
199
|
|
|
201
200
|
```
|
|
202
201
|
Semble Token Savings
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
202
|
+
════════════════════════════════════════════════════════════════════════
|
|
203
|
+
|
|
204
|
+
Total saved: ~714.2M tokens (94%)
|
|
205
|
+
Total calls: 14.3k
|
|
206
|
+
Efficiency: ███████████████████████░ 94%
|
|
207
|
+
|
|
208
|
+
By Period
|
|
209
|
+
────────────────────────────────────────────────────────────────────────
|
|
210
|
+
Period Calls Saved Ratio
|
|
211
|
+
────────────────────────────────────────────────────────────────────────
|
|
212
|
+
Today 198 ~1.4M tokens ███████████████████████░ 95%
|
|
213
|
+
Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94%
|
|
214
|
+
All time 14.3k ~714.2M tokens ███████████████████████░ 94%
|
|
215
|
+
|
|
216
|
+
By Call Type
|
|
217
|
+
────────────────────────────────────────────────────────────────────────
|
|
218
|
+
# Call type Calls Share
|
|
219
|
+
────────────────────────────────────────────────────────────────────────
|
|
220
|
+
1. search 14.1k ████████████████ 99%
|
|
221
|
+
2. find_related 205 █░░░░░░░░░░░░░░░ 1%
|
|
222
|
+
════════════════════════════════════════════════════════════════════════
|
|
209
223
|
```
|
|
210
224
|
|
|
225
|
+
|
|
211
226
|
Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
|
|
212
227
|
|
|
213
|
-
|
|
228
|
+
</details>
|
|
229
|
+
|
|
230
|
+
<details>
|
|
231
|
+
<summary>Storage</summary>
|
|
232
|
+
|
|
233
|
+
By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
|
|
214
234
|
|
|
215
235
|
</details>
|
|
216
236
|
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
</div>
|
|
26
26
|
|
|
27
|
-
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services.
|
|
27
|
+
Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
|
|
28
28
|
|
|
29
29
|
## Quickstart
|
|
30
30
|
|
|
@@ -120,23 +120,43 @@ Semble also always skips a set of well-known non-source directories regardless o
|
|
|
120
120
|
`semble savings` shows how many tokens semble has saved across all your searches:
|
|
121
121
|
|
|
122
122
|
```bash
|
|
123
|
-
semble savings
|
|
124
|
-
semble savings --verbose # also show breakdown by call type
|
|
123
|
+
semble savings
|
|
125
124
|
```
|
|
126
125
|
|
|
127
126
|
```
|
|
128
127
|
Semble Token Savings
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
128
|
+
════════════════════════════════════════════════════════════════════════
|
|
129
|
+
|
|
130
|
+
Total saved: ~714.2M tokens (94%)
|
|
131
|
+
Total calls: 14.3k
|
|
132
|
+
Efficiency: ███████████████████████░ 94%
|
|
133
|
+
|
|
134
|
+
By Period
|
|
135
|
+
────────────────────────────────────────────────────────────────────────
|
|
136
|
+
Period Calls Saved Ratio
|
|
137
|
+
────────────────────────────────────────────────────────────────────────
|
|
138
|
+
Today 198 ~1.4M tokens ███████████████████████░ 95%
|
|
139
|
+
Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94%
|
|
140
|
+
All time 14.3k ~714.2M tokens ███████████████████████░ 94%
|
|
141
|
+
|
|
142
|
+
By Call Type
|
|
143
|
+
────────────────────────────────────────────────────────────────────────
|
|
144
|
+
# Call type Calls Share
|
|
145
|
+
────────────────────────────────────────────────────────────────────────
|
|
146
|
+
1. search 14.1k ████████████████ 99%
|
|
147
|
+
2. find_related 205 █░░░░░░░░░░░░░░░ 1%
|
|
148
|
+
════════════════════════════════════════════════════════════════════════
|
|
135
149
|
```
|
|
136
150
|
|
|
151
|
+
|
|
137
152
|
Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
|
|
138
153
|
|
|
139
|
-
|
|
154
|
+
</details>
|
|
155
|
+
|
|
156
|
+
<details>
|
|
157
|
+
<summary>Storage</summary>
|
|
158
|
+
|
|
159
|
+
By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
|
|
140
160
|
|
|
141
161
|
</details>
|
|
142
162
|
|
|
@@ -21,7 +21,9 @@ To undo:
|
|
|
21
21
|
semble uninstall
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
-
Supported agents: Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Codex, VS Code, Windsurf, and
|
|
24
|
+
Supported agents: Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Codex, VS Code, Windsurf, Zed, Reasonix, Pi, Command Code, and Antigravity.
|
|
25
|
+
|
|
26
|
+
> **Pi prerequisite:** Pi requires the MCP extension to be installed before semble can connect. Run `pi install npm:pi-mcp-extension` once, then `semble install`.
|
|
25
27
|
|
|
26
28
|
---
|
|
27
29
|
|
|
@@ -198,6 +200,90 @@ Add to `~/.config/zed/settings.json` (or `.zed/settings.json` in your project):
|
|
|
198
200
|
|
|
199
201
|
</details>
|
|
200
202
|
|
|
203
|
+
<details>
|
|
204
|
+
<summary>Reasonix</summary>
|
|
205
|
+
|
|
206
|
+
Add to `~/.reasonix/config.json` (the backwards-compatible MCP config path read by all Reasonix versions):
|
|
207
|
+
|
|
208
|
+
```json
|
|
209
|
+
{
|
|
210
|
+
"mcpServers": {
|
|
211
|
+
"semble": {
|
|
212
|
+
"command": "uvx",
|
|
213
|
+
"args": ["--from", "semble[mcp]", "semble"]
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
</details>
|
|
220
|
+
|
|
221
|
+
<details>
|
|
222
|
+
<summary>Pi</summary>
|
|
223
|
+
|
|
224
|
+
First install the Pi MCP extension (one-time prerequisite):
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
pi install npm:pi-mcp-extension
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Then add to `~/.pi/agent/mcp.json`:
|
|
231
|
+
|
|
232
|
+
```json
|
|
233
|
+
{
|
|
234
|
+
"mcpServers": {
|
|
235
|
+
"semble": {
|
|
236
|
+
"command": "uvx",
|
|
237
|
+
"args": ["--from", "semble[mcp]", "semble"]
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
</details>
|
|
244
|
+
|
|
245
|
+
<details>
|
|
246
|
+
<summary>Antigravity</summary>
|
|
247
|
+
|
|
248
|
+
Add to `~/.gemini/config/mcp_config.json`:
|
|
249
|
+
|
|
250
|
+
```json
|
|
251
|
+
{
|
|
252
|
+
"mcpServers": {
|
|
253
|
+
"semble": {
|
|
254
|
+
"command": "uvx",
|
|
255
|
+
"args": ["--from", "semble[mcp]", "semble"]
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
</details>
|
|
262
|
+
|
|
263
|
+
<details>
|
|
264
|
+
<summary>Command Code</summary>
|
|
265
|
+
|
|
266
|
+
Add to `~/.commandcode/mcp.json`:
|
|
267
|
+
|
|
268
|
+
```json
|
|
269
|
+
{
|
|
270
|
+
"mcpServers": {
|
|
271
|
+
"semble": {
|
|
272
|
+
"command": "uvx",
|
|
273
|
+
"args": ["--from", "semble[mcp]", "semble"]
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
Or use the CLI:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
cmd mcp add --scope user semble -- uvx --from "semble[mcp]" semble
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
</details>
|
|
286
|
+
|
|
201
287
|
By default the MCP server indexes only code files. To also index documentation, config, or everything, append `--content docs`, `--content config`, or `--content all` to the server command. For example, in Claude Code:
|
|
202
288
|
|
|
203
289
|
```bash
|
|
@@ -250,7 +336,9 @@ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its plac
|
|
|
250
336
|
|
|
251
337
|
### Sub-agent
|
|
252
338
|
|
|
253
|
-
For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot), you can install a dedicated `semble-search` sub-agent. Copy the appropriate file from [`src/semble/agents/`](../src/semble/agents/) to your agent's agents directory:
|
|
339
|
+
For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Reasonix, Pi, Command Code, Antigravity), you can install a dedicated `semble-search` sub-agent. Copy the appropriate file from [`src/semble/agents/`](../src/semble/agents/) to your agent's agents directory:
|
|
340
|
+
|
|
341
|
+
> **Pi prerequisite:** Pi sub-agents require the Pi agents extension. Run `pi install npm:pi-agents` once before installing.
|
|
254
342
|
|
|
255
343
|
| Agent | File | Destination |
|
|
256
344
|
|---|---|---|
|
|
@@ -260,3 +348,7 @@ For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, Op
|
|
|
260
348
|
| Kiro | `kiro.md` | `~/.kiro/agents/semble-search.md` |
|
|
261
349
|
| OpenCode | `opencode.md` | `~/.config/opencode/agents/semble-search.md` |
|
|
262
350
|
| GitHub Copilot | `copilot.md` | `~/.copilot/agents/semble-search.agent.md` |
|
|
351
|
+
| Reasonix | `reasonix.md` | `~/.reasonix/skills/semble-search.md` |
|
|
352
|
+
| Pi | `pi.md` | `~/.pi/agents/semble-search.md` |
|
|
353
|
+
| Command Code | `commandcode.md` | `~/.commandcode/agents/semble-search.md` |
|
|
354
|
+
| Antigravity | `antigravity.md` | `~/.gemini/config/skills/semble-search/SKILL.md` |
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: semble-search
|
|
3
|
+
description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question.
|
|
4
|
+
tools: bash, read_file
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
semble search "authentication flow" ./my-project
|
|
11
|
+
semble search "save_pretrained" ./my-project
|
|
12
|
+
semble search "save model to disk" ./my-project --top-k 10
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Results are cached automatically on first run and invalidated when files change.
|
|
16
|
+
|
|
17
|
+
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
semble search "deployment guide" ./my-project --content docs
|
|
21
|
+
semble search "database host port" ./my-project --content config
|
|
22
|
+
semble search "authentication" ./my-project --content all
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
semble find-related src/auth.py 42 ./my-project
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
32
|
+
|
|
33
|
+
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
34
|
+
|
|
35
|
+
### Workflow
|
|
36
|
+
|
|
37
|
+
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
38
|
+
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
39
|
+
3. Inspect full files only when the returned chunk does not give enough context.
|
|
40
|
+
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
41
|
+
5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: semble-search
|
|
3
|
+
description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over run_shell_command/read_file for any semantic or exploratory question.
|
|
4
|
+
tools:
|
|
5
|
+
- run_shell_command
|
|
6
|
+
- read_file
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
semble search "authentication flow" ./my-project
|
|
13
|
+
semble search "save_pretrained" ./my-project
|
|
14
|
+
semble search "save model to disk" ./my-project --top-k 10
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Results are cached automatically on first run and invalidated when files change.
|
|
18
|
+
|
|
19
|
+
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
semble search "deployment guide" ./my-project --content docs
|
|
23
|
+
semble search "database host port" ./my-project --content config
|
|
24
|
+
semble search "authentication" ./my-project --content all
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
semble find-related src/auth.py 42 ./my-project
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
34
|
+
|
|
35
|
+
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
36
|
+
|
|
37
|
+
### Workflow
|
|
38
|
+
|
|
39
|
+
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
40
|
+
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
41
|
+
3. Inspect full files only when the returned chunk does not give enough context.
|
|
42
|
+
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
43
|
+
5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: semble-search
|
|
3
|
+
description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
semble search "authentication flow" ./my-project
|
|
10
|
+
semble search "save_pretrained" ./my-project
|
|
11
|
+
semble search "save model to disk" ./my-project --top-k 10
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Results are cached automatically on first run and invalidated when files change.
|
|
15
|
+
|
|
16
|
+
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
semble search "deployment guide" ./my-project --content docs
|
|
20
|
+
semble search "database host port" ./my-project --content config
|
|
21
|
+
semble search "authentication" ./my-project --content all
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
semble find-related src/auth.py 42 ./my-project
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
31
|
+
|
|
32
|
+
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
33
|
+
|
|
34
|
+
### Workflow
|
|
35
|
+
|
|
36
|
+
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
37
|
+
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
38
|
+
3. Inspect full files only when the returned chunk does not give enough context.
|
|
39
|
+
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
40
|
+
5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: semble-search
|
|
3
|
+
description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over bash/grep for any semantic or exploratory question.
|
|
4
|
+
runAs: subagent
|
|
5
|
+
allowed-tools: bash, read_file
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
semble search "authentication flow" ./my-project
|
|
12
|
+
semble search "save_pretrained" ./my-project
|
|
13
|
+
semble search "save model to disk" ./my-project --top-k 10
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Results are cached automatically on first run and invalidated when files change.
|
|
17
|
+
|
|
18
|
+
Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
semble search "deployment guide" ./my-project --content docs
|
|
22
|
+
semble search "database host port" ./my-project --content config
|
|
23
|
+
semble search "authentication" ./my-project --content all
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
semble find-related src/auth.py 42 ./my-project
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
`path` defaults to the current directory when omitted; git URLs are accepted.
|
|
33
|
+
|
|
34
|
+
If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
|
|
35
|
+
|
|
36
|
+
### Workflow
|
|
37
|
+
|
|
38
|
+
1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
|
|
39
|
+
2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
|
|
40
|
+
3. Inspect full files only when the returned chunk does not give enough context.
|
|
41
|
+
4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
|
|
42
|
+
5. Use bash/grep only when you need exhaustive literal matches or quick confirmation of an exact string.
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
import shutil
|
|
5
6
|
import sys
|
|
@@ -13,6 +14,8 @@ from semble.index.types import PersistencePath
|
|
|
13
14
|
from semble.types import ContentType
|
|
14
15
|
from semble.utils import is_git_url, resolve_model_name
|
|
15
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
16
19
|
if TYPE_CHECKING:
|
|
17
20
|
from semble.index import SembleIndex
|
|
18
21
|
|
|
@@ -48,11 +51,24 @@ def _linux_cache_dir(name: str) -> Path:
|
|
|
48
51
|
return base / name
|
|
49
52
|
|
|
50
53
|
|
|
54
|
+
def _get_valid_user_cache_dir() -> Path | None:
|
|
55
|
+
"""Gets the user cache dir if it is set and is a valid path."""
|
|
56
|
+
user_cache_location = os.getenv("SEMBLE_CACHE_LOCATION")
|
|
57
|
+
if user_cache_location is None:
|
|
58
|
+
return None
|
|
59
|
+
user_cache_dir = Path(user_cache_location)
|
|
60
|
+
if not user_cache_dir.is_absolute():
|
|
61
|
+
logger.warning("SEMBLE_CACHE_LOCATION is not an absolute path: %s", user_cache_location)
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
return user_cache_dir
|
|
65
|
+
|
|
66
|
+
|
|
51
67
|
def resolve_cache_folder() -> Path:
|
|
52
68
|
"""Resolves a cache folder, respects SEMBLE_CACHE_LOCATION (highest precedence), XDG_CACHE_HOME."""
|
|
53
69
|
name = "semble"
|
|
54
|
-
if
|
|
55
|
-
cache_dir =
|
|
70
|
+
if user_cache_dir := _get_valid_user_cache_dir():
|
|
71
|
+
cache_dir = user_cache_dir
|
|
56
72
|
elif sys.platform == "win32":
|
|
57
73
|
cache_dir = _windows_cache_dir(name)
|
|
58
74
|
elif sys.platform == "darwin":
|
|
@@ -1,19 +1,26 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import asyncio
|
|
3
3
|
import json
|
|
4
|
+
import re
|
|
4
5
|
import sys
|
|
5
6
|
import warnings
|
|
6
7
|
from importlib.util import find_spec
|
|
8
|
+
from shutil import rmtree
|
|
9
|
+
from typing import Literal
|
|
7
10
|
|
|
8
11
|
from model2vec.utils import get_package_extras
|
|
9
12
|
|
|
10
|
-
from semble.cache import find_index_from_cache_folder
|
|
13
|
+
from semble.cache import find_index_from_cache_folder, resolve_cache_folder
|
|
11
14
|
from semble.index import SembleIndex
|
|
15
|
+
from semble.index.types import PersistencePath
|
|
12
16
|
from semble.stats import format_savings_report
|
|
13
17
|
from semble.types import ContentType
|
|
14
18
|
from semble.utils import format_results, is_git_url, resolve_chunk
|
|
15
19
|
|
|
16
|
-
_CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help"})
|
|
20
|
+
_CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help", "clear"})
|
|
21
|
+
_CLEAR_CHOICE = Literal["all", "index", "savings"]
|
|
22
|
+
|
|
23
|
+
_SHA_256_REGEX = re.compile(r"^[a-f0-9]{64}$")
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
def _build_index(path: str, content: list[ContentType]) -> SembleIndex:
|
|
@@ -131,6 +138,35 @@ def _run_find_related(path: str, file_path: str, line: int, top_k: int, content:
|
|
|
131
138
|
_maybe_save_index(index, path)
|
|
132
139
|
|
|
133
140
|
|
|
141
|
+
def _run_clear(clear_type: _CLEAR_CHOICE) -> None:
|
|
142
|
+
"""Run the `clear` subcommand."""
|
|
143
|
+
cache_folder = resolve_cache_folder()
|
|
144
|
+
if clear_type == "index" or clear_type == "all":
|
|
145
|
+
indexes = []
|
|
146
|
+
for path in cache_folder.glob("*/index"):
|
|
147
|
+
if not _SHA_256_REGEX.match(path.parent.name):
|
|
148
|
+
continue
|
|
149
|
+
if PersistencePath.from_path(path).non_existing():
|
|
150
|
+
continue
|
|
151
|
+
indexes.append(path)
|
|
152
|
+
|
|
153
|
+
if not indexes:
|
|
154
|
+
print(f"No indexes found to clear in `{cache_folder}`")
|
|
155
|
+
else:
|
|
156
|
+
for path in indexes:
|
|
157
|
+
index_folder = path.parent
|
|
158
|
+
rmtree(index_folder)
|
|
159
|
+
print(f"Cleared index at `{index_folder}`")
|
|
160
|
+
|
|
161
|
+
if clear_type == "savings" or clear_type == "all":
|
|
162
|
+
path = cache_folder / "savings.jsonl"
|
|
163
|
+
if not path.exists():
|
|
164
|
+
print(f"No savings file found at `{path}`")
|
|
165
|
+
else:
|
|
166
|
+
path.unlink()
|
|
167
|
+
print(f"Cleared savings at `{path}`")
|
|
168
|
+
|
|
169
|
+
|
|
134
170
|
def _cli_main() -> None:
|
|
135
171
|
parser = argparse.ArgumentParser(prog="semble")
|
|
136
172
|
sub = parser.add_subparsers(dest="command")
|
|
@@ -141,6 +177,9 @@ def _cli_main() -> None:
|
|
|
141
177
|
search_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
|
|
142
178
|
_add_content_args(search_p)
|
|
143
179
|
|
|
180
|
+
clear_p = sub.add_parser("clear", help="Clear the index cache.")
|
|
181
|
+
clear_p.add_argument("type", choices=["all", "index", "savings"], help="Type of cache to clear.")
|
|
182
|
+
|
|
144
183
|
related_p = sub.add_parser("find-related", help="Find code similar to a specific location.")
|
|
145
184
|
related_p.add_argument("file_path", help="File path as shown in search results.")
|
|
146
185
|
related_p.add_argument("line", type=int, help="Line number (1-indexed).")
|
|
@@ -148,8 +187,7 @@ def _cli_main() -> None:
|
|
|
148
187
|
related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
|
|
149
188
|
_add_content_args(related_p)
|
|
150
189
|
|
|
151
|
-
|
|
152
|
-
savings_p.add_argument("--verbose", action="store_true", help="Also show usage breakdown by call type.")
|
|
190
|
+
sub.add_parser("savings", help="Show token savings and usage stats.")
|
|
153
191
|
|
|
154
192
|
sub.add_parser("install", help="Interactively configure semble across coding agents.")
|
|
155
193
|
sub.add_parser("uninstall", help="Interactively remove semble configuration from coding agents.")
|
|
@@ -157,11 +195,13 @@ def _cli_main() -> None:
|
|
|
157
195
|
args = parser.parse_args()
|
|
158
196
|
|
|
159
197
|
if args.command == "savings":
|
|
160
|
-
print(format_savings_report(
|
|
198
|
+
print(format_savings_report())
|
|
161
199
|
elif args.command in ("install", "uninstall"):
|
|
162
200
|
from semble.installer import run
|
|
163
201
|
|
|
164
202
|
run(args.command)
|
|
203
|
+
elif args.command == "clear":
|
|
204
|
+
_run_clear(args.type)
|
|
165
205
|
elif args.command == "search":
|
|
166
206
|
_run_search(args.path, args.query, args.top_k, _resolve_content(args.content, args.include_text_files))
|
|
167
207
|
elif args.command == "find-related":
|