semble 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {semble-0.3.2 → semble-0.3.4}/PKG-INFO +31 -11
  2. {semble-0.3.2 → semble-0.3.4}/README.md +30 -10
  3. {semble-0.3.2 → semble-0.3.4}/docs/installation.md +94 -2
  4. semble-0.3.4/src/semble/agents/commandcode.md +41 -0
  5. semble-0.3.4/src/semble/agents/gemini.md +43 -0
  6. semble-0.3.4/src/semble/agents/pi.md +40 -0
  7. semble-0.3.4/src/semble/agents/reasonix.md +42 -0
  8. {semble-0.3.2 → semble-0.3.4}/src/semble/cache.py +18 -2
  9. {semble-0.3.2 → semble-0.3.4}/src/semble/cli.py +45 -5
  10. {semble-0.3.2 → semble-0.3.4}/src/semble/installer/agents.py +45 -11
  11. {semble-0.3.2 → semble-0.3.4}/src/semble/installer/config.py +2 -2
  12. {semble-0.3.2 → semble-0.3.4}/src/semble/installer/installer.py +9 -9
  13. semble-0.3.4/src/semble/stats.py +234 -0
  14. {semble-0.3.2 → semble-0.3.4}/src/semble/version.py +1 -1
  15. {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/PKG-INFO +31 -11
  16. {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/SOURCES.txt +4 -0
  17. {semble-0.3.2 → semble-0.3.4}/tests/test_cache.py +17 -4
  18. {semble-0.3.2 → semble-0.3.4}/tests/test_cli.py +143 -3
  19. {semble-0.3.2 → semble-0.3.4}/tests/test_installer.py +19 -11
  20. {semble-0.3.2 → semble-0.3.4}/tests/test_stats.py +19 -39
  21. semble-0.3.2/src/semble/stats.py +0 -148
  22. {semble-0.3.2 → semble-0.3.4}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  23. {semble-0.3.2 → semble-0.3.4}/.github/workflows/ci.yaml +0 -0
  24. {semble-0.3.2 → semble-0.3.4}/.github/workflows/release.yaml +0 -0
  25. {semble-0.3.2 → semble-0.3.4}/.gitignore +0 -0
  26. {semble-0.3.2 → semble-0.3.4}/.pre-commit-config.yaml +0 -0
  27. {semble-0.3.2 → semble-0.3.4}/CITATION.cff +0 -0
  28. {semble-0.3.2 → semble-0.3.4}/CONTRIBUTING.md +0 -0
  29. {semble-0.3.2 → semble-0.3.4}/LICENSE +0 -0
  30. {semble-0.3.2 → semble-0.3.4}/MANIFEST.in +0 -0
  31. {semble-0.3.2 → semble-0.3.4}/Makefile +0 -0
  32. {semble-0.3.2 → semble-0.3.4}/pyproject.toml +0 -0
  33. {semble-0.3.2 → semble-0.3.4}/setup.cfg +0 -0
  34. {semble-0.3.2 → semble-0.3.4}/src/semble/__init__.py +0 -0
  35. /semble-0.3.2/src/semble/agents/gemini.md → /semble-0.3.4/src/semble/agents/antigravity.md +0 -0
  36. {semble-0.3.2 → semble-0.3.4}/src/semble/agents/claude.md +0 -0
  37. {semble-0.3.2 → semble-0.3.4}/src/semble/agents/copilot.md +0 -0
  38. {semble-0.3.2 → semble-0.3.4}/src/semble/agents/cursor.md +0 -0
  39. {semble-0.3.2 → semble-0.3.4}/src/semble/agents/kiro.md +0 -0
  40. {semble-0.3.2 → semble-0.3.4}/src/semble/agents/opencode.md +0 -0
  41. {semble-0.3.2 → semble-0.3.4}/src/semble/chunking/__init__.py +0 -0
  42. {semble-0.3.2 → semble-0.3.4}/src/semble/chunking/chunking.py +0 -0
  43. {semble-0.3.2 → semble-0.3.4}/src/semble/chunking/core.py +0 -0
  44. {semble-0.3.2 → semble-0.3.4}/src/semble/index/__init__.py +0 -0
  45. {semble-0.3.2 → semble-0.3.4}/src/semble/index/create.py +0 -0
  46. {semble-0.3.2 → semble-0.3.4}/src/semble/index/dense.py +0 -0
  47. {semble-0.3.2 → semble-0.3.4}/src/semble/index/file_walker.py +0 -0
  48. {semble-0.3.2 → semble-0.3.4}/src/semble/index/files.py +0 -0
  49. {semble-0.3.2 → semble-0.3.4}/src/semble/index/index.py +0 -0
  50. {semble-0.3.2 → semble-0.3.4}/src/semble/index/sparse.py +0 -0
  51. {semble-0.3.2 → semble-0.3.4}/src/semble/index/types.py +0 -0
  52. {semble-0.3.2 → semble-0.3.4}/src/semble/installer/__init__.py +0 -0
  53. {semble-0.3.2 → semble-0.3.4}/src/semble/mcp.py +0 -0
  54. {semble-0.3.2 → semble-0.3.4}/src/semble/py.typed +0 -0
  55. {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/__init__.py +0 -0
  56. {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/boosting.py +0 -0
  57. {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/penalties.py +0 -0
  58. {semble-0.3.2 → semble-0.3.4}/src/semble/ranking/weighting.py +0 -0
  59. {semble-0.3.2 → semble-0.3.4}/src/semble/search.py +0 -0
  60. {semble-0.3.2 → semble-0.3.4}/src/semble/tokens.py +0 -0
  61. {semble-0.3.2 → semble-0.3.4}/src/semble/types.py +0 -0
  62. {semble-0.3.2 → semble-0.3.4}/src/semble/utils.py +0 -0
  63. {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/dependency_links.txt +0 -0
  64. {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/entry_points.txt +0 -0
  65. {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/requires.txt +0 -0
  66. {semble-0.3.2 → semble-0.3.4}/src/semble.egg-info/top_level.txt +0 -0
  67. {semble-0.3.2 → semble-0.3.4}/tests/__init__.py +0 -0
  68. {semble-0.3.2 → semble-0.3.4}/tests/conftest.py +0 -0
  69. {semble-0.3.2 → semble-0.3.4}/tests/index/test_dense.py +0 -0
  70. {semble-0.3.2 → semble-0.3.4}/tests/index/test_index.py +0 -0
  71. {semble-0.3.2 → semble-0.3.4}/tests/test_chunker.py +0 -0
  72. {semble-0.3.2 → semble-0.3.4}/tests/test_file_walker.py +0 -0
  73. {semble-0.3.2 → semble-0.3.4}/tests/test_files.py +0 -0
  74. {semble-0.3.2 → semble-0.3.4}/tests/test_git.py +0 -0
  75. {semble-0.3.2 → semble-0.3.4}/tests/test_mcp.py +0 -0
  76. {semble-0.3.2 → semble-0.3.4}/tests/test_ranking.py +0 -0
  77. {semble-0.3.2 → semble-0.3.4}/tests/test_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: semble
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Fast and Accurate Code Search for Agents
5
5
  Author-email: Thomas van Dongen <thomasvdongen@proton.me>, Stéphan Tulkens <stephantul@gmail.com>
6
6
  License: MIT License
@@ -98,7 +98,7 @@ Dynamic: license-file
98
98
 
99
99
  </div>
100
100
 
101
- Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Run it as an [MCP server](#mcp-server) or call it from the shell via [AGENTS.md](#agentsmd) and any agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
101
+ Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
102
102
 
103
103
  ## Quickstart
104
104
 
@@ -194,23 +194,43 @@ Semble also always skips a set of well-known non-source directories regardless o
194
194
  `semble savings` shows how many tokens semble has saved across all your searches:
195
195
 
196
196
  ```bash
197
- semble savings # summary by period
198
- semble savings --verbose # also show breakdown by call type
197
+ semble savings
199
198
  ```
200
199
 
201
200
  ```
202
201
  Semble Token Savings
203
- ════════════════════════════════════════════════════════════════
204
- Period Calls Savings
205
- ────────────────────────────────────────────────────────────────
206
- Today 42 [███████████████░] ~58.4k tokens (95%)
207
- Last 7 days 287 [██████████████░░] ~312.4k tokens (90%)
208
- All time 1.4k [██████████████░░] ~1.2M tokens (89%)
202
+ ════════════════════════════════════════════════════════════════════════
203
+
204
+ Total saved: ~714.2M tokens (94%)
205
+ Total calls: 14.3k
206
+ Efficiency: ███████████████████████░ 94%
207
+
208
+ By Period
209
+ ────────────────────────────────────────────────────────────────────────
210
+ Period Calls Saved Ratio
211
+ ────────────────────────────────────────────────────────────────────────
212
+ Today 198 ~1.4M tokens ███████████████████████░ 95%
213
+ Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94%
214
+ All time 14.3k ~714.2M tokens ███████████████████████░ 94%
215
+
216
+ By Call Type
217
+ ────────────────────────────────────────────────────────────────────────
218
+ # Call type Calls Share
219
+ ────────────────────────────────────────────────────────────────────────
220
+ 1. search 14.1k ████████████████ 99%
221
+ 2. find_related 205 █░░░░░░░░░░░░░░░ 1%
222
+ ════════════════════════════════════════════════════════════════════════
209
223
  ```
210
224
 
225
+
211
226
  Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
212
227
 
213
- By default, stats are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. 'd:\caches\storemysemblecachehere'.
228
+ </details>
229
+
230
+ <details>
231
+ <summary>Storage</summary>
232
+
233
+ By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
214
234
 
215
235
  </details>
216
236
 
@@ -24,7 +24,7 @@
24
24
 
25
25
  </div>
26
26
 
27
- Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Run it as an [MCP server](#mcp-server) or call it from the shell via [AGENTS.md](#agentsmd) and any agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
27
+ Semble is a code search library built for agents. It returns the exact code snippets they need instantly, using ~98% fewer tokens than grep+read. Indexing and searching a full codebase end-to-end takes under a second, with ~200x faster indexing and ~10x faster queries than a code-specialized transformer, at 99% of its retrieval quality (see [benchmarks](#benchmarks)). Everything runs on CPU with no API keys, GPU, or external services. Use it as an MCP server, a CLI tool via AGENTS.md, or a dedicated sub-agent, and any coding agent (Claude Code, Cursor, Codex, OpenCode, etc.) gets instant access to any repo.
28
28
 
29
29
  ## Quickstart
30
30
 
@@ -120,23 +120,43 @@ Semble also always skips a set of well-known non-source directories regardless o
120
120
  `semble savings` shows how many tokens semble has saved across all your searches:
121
121
 
122
122
  ```bash
123
- semble savings # summary by period
124
- semble savings --verbose # also show breakdown by call type
123
+ semble savings
125
124
  ```
126
125
 
127
126
  ```
128
127
  Semble Token Savings
129
- ════════════════════════════════════════════════════════════════
130
- Period Calls Savings
131
- ────────────────────────────────────────────────────────────────
132
- Today 42 [███████████████░] ~58.4k tokens (95%)
133
- Last 7 days 287 [██████████████░░] ~312.4k tokens (90%)
134
- All time 1.4k [██████████████░░] ~1.2M tokens (89%)
128
+ ════════════════════════════════════════════════════════════════════════
129
+
130
+ Total saved: ~714.2M tokens (94%)
131
+ Total calls: 14.3k
132
+ Efficiency: ███████████████████████░ 94%
133
+
134
+ By Period
135
+ ────────────────────────────────────────────────────────────────────────
136
+ Period Calls Saved Ratio
137
+ ────────────────────────────────────────────────────────────────────────
138
+ Today 198 ~1.4M tokens ███████████████████████░ 95%
139
+ Last 7 days 13.1k ~707.2M tokens ███████████████████████░ 94%
140
+ All time 14.3k ~714.2M tokens ███████████████████████░ 94%
141
+
142
+ By Call Type
143
+ ────────────────────────────────────────────────────────────────────────
144
+ # Call type Calls Share
145
+ ────────────────────────────────────────────────────────────────────────
146
+ 1. search 14.1k ████████████████ 99%
147
+ 2. find_related 205 █░░░░░░░░░░░░░░░ 1%
148
+ ════════════════════════════════════════════════════════════════════════
135
149
  ```
136
150
 
151
+
137
152
  Savings are calculated as follows: for each call, semble records the total character count of the unique files containing returned chunks and the character count of the snippets returned. Estimated tokens saved is `(file chars − snippet chars) / 4` (4 chars per token). This is a conservative estimate: the baseline is reading matched files in full, which is how coding agents often explore unfamiliar code.
138
153
 
139
- By default, stats are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. 'd:\caches\storemysemblecachehere'.
154
+ </details>
155
+
156
+ <details>
157
+ <summary>Storage</summary>
158
+
159
+ By default, your Semble savings statistics and any saved indexes are stored in the OS cache folder (`~/Library/Caches/semble/` on macOS, `~/.cache/semble/` on Linux, `%LOCALAPPDATA%\semble\Cache\` on Windows). To override this location you can supply an environment variable `SEMBLE_CACHE_LOCATION` which should be the full path to the target cache location e.g. `~/my-folder/my-caches/semble`.
140
160
 
141
161
  </details>
142
162
 
@@ -21,7 +21,9 @@ To undo:
21
21
  semble uninstall
22
22
  ```
23
23
 
24
- Supported agents: Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Codex, VS Code, Windsurf, and Zed.
24
+ Supported agents: Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Codex, VS Code, Windsurf, Zed, Reasonix, Pi, Command Code, and Antigravity.
25
+
26
+ > **Pi prerequisite:** Pi requires the MCP extension to be installed before semble can connect. Run `pi install npm:pi-mcp-extension` once, then `semble install`.
25
27
 
26
28
  ---
27
29
 
@@ -198,6 +200,90 @@ Add to `~/.config/zed/settings.json` (or `.zed/settings.json` in your project):
198
200
 
199
201
  </details>
200
202
 
203
+ <details>
204
+ <summary>Reasonix</summary>
205
+
206
+ Add to `~/.reasonix/config.json` (the backwards-compatible MCP config path read by all Reasonix versions):
207
+
208
+ ```json
209
+ {
210
+ "mcpServers": {
211
+ "semble": {
212
+ "command": "uvx",
213
+ "args": ["--from", "semble[mcp]", "semble"]
214
+ }
215
+ }
216
+ }
217
+ ```
218
+
219
+ </details>
220
+
221
+ <details>
222
+ <summary>Pi</summary>
223
+
224
+ First install the Pi MCP extension (one-time prerequisite):
225
+
226
+ ```bash
227
+ pi install npm:pi-mcp-extension
228
+ ```
229
+
230
+ Then add to `~/.pi/agent/mcp.json`:
231
+
232
+ ```json
233
+ {
234
+ "mcpServers": {
235
+ "semble": {
236
+ "command": "uvx",
237
+ "args": ["--from", "semble[mcp]", "semble"]
238
+ }
239
+ }
240
+ }
241
+ ```
242
+
243
+ </details>
244
+
245
+ <details>
246
+ <summary>Antigravity</summary>
247
+
248
+ Add to `~/.gemini/config/mcp_config.json`:
249
+
250
+ ```json
251
+ {
252
+ "mcpServers": {
253
+ "semble": {
254
+ "command": "uvx",
255
+ "args": ["--from", "semble[mcp]", "semble"]
256
+ }
257
+ }
258
+ }
259
+ ```
260
+
261
+ </details>
262
+
263
+ <details>
264
+ <summary>Command Code</summary>
265
+
266
+ Add to `~/.commandcode/mcp.json`:
267
+
268
+ ```json
269
+ {
270
+ "mcpServers": {
271
+ "semble": {
272
+ "command": "uvx",
273
+ "args": ["--from", "semble[mcp]", "semble"]
274
+ }
275
+ }
276
+ }
277
+ ```
278
+
279
+ Or use the CLI:
280
+
281
+ ```bash
282
+ cmd mcp add --scope user semble -- uvx --from "semble[mcp]" semble
283
+ ```
284
+
285
+ </details>
286
+
201
287
  By default the MCP server indexes only code files. To also index documentation, config, or everything, append `--content docs`, `--content config`, or `--content all` to the server command. For example, in Claude Code:
202
288
 
203
289
  ```bash
@@ -250,7 +336,9 @@ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its plac
250
336
 
251
337
  ### Sub-agent
252
338
 
253
- For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot), you can install a dedicated `semble-search` sub-agent. Copy the appropriate file from [`src/semble/agents/`](../src/semble/agents/) to your agent's agents directory:
339
+ For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, OpenCode, GitHub Copilot, Reasonix, Pi, Command Code, Antigravity), you can install a dedicated `semble-search` sub-agent. Copy the appropriate file from [`src/semble/agents/`](../src/semble/agents/) to your agent's agents directory:
340
+
341
+ > **Pi prerequisite:** Pi sub-agents require the Pi agents extension. Run `pi install npm:pi-agents` once before installing.
254
342
 
255
343
  | Agent | File | Destination |
256
344
  |---|---|---|
@@ -260,3 +348,7 @@ For harnesses that support sub-agents (Claude Code, Cursor, Gemini CLI, Kiro, Op
260
348
  | Kiro | `kiro.md` | `~/.kiro/agents/semble-search.md` |
261
349
  | OpenCode | `opencode.md` | `~/.config/opencode/agents/semble-search.md` |
262
350
  | GitHub Copilot | `copilot.md` | `~/.copilot/agents/semble-search.agent.md` |
351
+ | Reasonix | `reasonix.md` | `~/.reasonix/skills/semble-search.md` |
352
+ | Pi | `pi.md` | `~/.pi/agents/semble-search.md` |
353
+ | Command Code | `commandcode.md` | `~/.commandcode/agents/semble-search.md` |
354
+ | Antigravity | `antigravity.md` | `~/.gemini/config/skills/semble-search/SKILL.md` |
@@ -0,0 +1,41 @@
1
+ ---
2
+ name: semble-search
3
+ description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question.
4
+ tools: bash, read_file
5
+ ---
6
+
7
+ Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
8
+
9
+ ```bash
10
+ semble search "authentication flow" ./my-project
11
+ semble search "save_pretrained" ./my-project
12
+ semble search "save model to disk" ./my-project --top-k 10
13
+ ```
14
+
15
+ Results are cached automatically on first run and invalidated when files change.
16
+
17
+ Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
18
+
19
+ ```bash
20
+ semble search "deployment guide" ./my-project --content docs
21
+ semble search "database host port" ./my-project --content config
22
+ semble search "authentication" ./my-project --content all
23
+ ```
24
+
25
+ Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
26
+
27
+ ```bash
28
+ semble find-related src/auth.py 42 ./my-project
29
+ ```
30
+
31
+ `path` defaults to the current directory when omitted; git URLs are accepted.
32
+
33
+ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
34
+
35
+ ### Workflow
36
+
37
+ 1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
38
+ 2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
39
+ 3. Inspect full files only when the returned chunk does not give enough context.
40
+ 4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
41
+ 5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: semble-search
3
+ description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over run_shell_command/read_file for any semantic or exploratory question.
4
+ tools:
5
+ - run_shell_command
6
+ - read_file
7
+ ---
8
+
9
+ Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
10
+
11
+ ```bash
12
+ semble search "authentication flow" ./my-project
13
+ semble search "save_pretrained" ./my-project
14
+ semble search "save model to disk" ./my-project --top-k 10
15
+ ```
16
+
17
+ Results are cached automatically on first run and invalidated when files change.
18
+
19
+ Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
20
+
21
+ ```bash
22
+ semble search "deployment guide" ./my-project --content docs
23
+ semble search "database host port" ./my-project --content config
24
+ semble search "authentication" ./my-project --content all
25
+ ```
26
+
27
+ Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
28
+
29
+ ```bash
30
+ semble find-related src/auth.py 42 ./my-project
31
+ ```
32
+
33
+ `path` defaults to the current directory when omitted; git URLs are accepted.
34
+
35
+ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
36
+
37
+ ### Workflow
38
+
39
+ 1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
40
+ 2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
41
+ 3. Inspect full files only when the returned chunk does not give enough context.
42
+ 4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
43
+ 5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
@@ -0,0 +1,40 @@
1
+ ---
2
+ name: semble-search
3
+ description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over Bash/Read for any semantic or exploratory question.
4
+ ---
5
+
6
+ Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
7
+
8
+ ```bash
9
+ semble search "authentication flow" ./my-project
10
+ semble search "save_pretrained" ./my-project
11
+ semble search "save model to disk" ./my-project --top-k 10
12
+ ```
13
+
14
+ Results are cached automatically on first run and invalidated when files change.
15
+
16
+ Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
17
+
18
+ ```bash
19
+ semble search "deployment guide" ./my-project --content docs
20
+ semble search "database host port" ./my-project --content config
21
+ semble search "authentication" ./my-project --content all
22
+ ```
23
+
24
+ Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
25
+
26
+ ```bash
27
+ semble find-related src/auth.py 42 ./my-project
28
+ ```
29
+
30
+ `path` defaults to the current directory when omitted; git URLs are accepted.
31
+
32
+ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
33
+
34
+ ### Workflow
35
+
36
+ 1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
37
+ 2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
38
+ 3. Inspect full files only when the returned chunk does not give enough context.
39
+ 4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
40
+ 5. Use grep only when you need exhaustive literal matches or quick confirmation of an exact string.
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: semble-search
3
+ description: Code search agent for exploring any codebase. Use for finding code by intent, locating implementations, understanding how something works, or discovering related code. Prefer over bash/grep for any semantic or exploratory question.
4
+ runAs: subagent
5
+ allowed-tools: bash, read_file
6
+ ---
7
+
8
+ Use `semble search` to find code by describing what it does or naming a symbol/identifier, instead of grep:
9
+
10
+ ```bash
11
+ semble search "authentication flow" ./my-project
12
+ semble search "save_pretrained" ./my-project
13
+ semble search "save model to disk" ./my-project --top-k 10
14
+ ```
15
+
16
+ Results are cached automatically on first run and invalidated when files change.
17
+
18
+ Use `--content docs` to search documentation and prose, `--content config` for config files (yaml, toml, etc.), or `--content all` to search code, docs, and config:
19
+
20
+ ```bash
21
+ semble search "deployment guide" ./my-project --content docs
22
+ semble search "database host port" ./my-project --content config
23
+ semble search "authentication" ./my-project --content all
24
+ ```
25
+
26
+ Use `semble find-related` to discover code similar to a known location (pass `file_path` and `line` from a prior search result):
27
+
28
+ ```bash
29
+ semble find-related src/auth.py 42 ./my-project
30
+ ```
31
+
32
+ `path` defaults to the current directory when omitted; git URLs are accepted.
33
+
34
+ If `semble` is not on `$PATH`, use `uvx --from "semble[mcp]" semble` in its place.
35
+
36
+ ### Workflow
37
+
38
+ 1. Start with `semble search` to find relevant chunks. The index is built and cached automatically.
39
+ 2. Use `--content docs` for documentation, `--content config` for config files, or `--content all` for everything.
40
+ 3. Inspect full files only when the returned chunk does not give enough context.
41
+ 4. Optionally use `semble find-related` with a promising result's `file_path` and `line` to discover related implementations.
42
+ 5. Use bash/grep only when you need exhaustive literal matches or quick confirmation of an exact string.
@@ -1,5 +1,6 @@
1
1
  import hashlib
2
2
  import json
3
+ import logging
3
4
  import os
4
5
  import shutil
5
6
  import sys
@@ -13,6 +14,8 @@ from semble.index.types import PersistencePath
13
14
  from semble.types import ContentType
14
15
  from semble.utils import is_git_url, resolve_model_name
15
16
 
17
+ logger = logging.getLogger(__name__)
18
+
16
19
  if TYPE_CHECKING:
17
20
  from semble.index import SembleIndex
18
21
 
@@ -48,11 +51,24 @@ def _linux_cache_dir(name: str) -> Path:
48
51
  return base / name
49
52
 
50
53
 
54
+ def _get_valid_user_cache_dir() -> Path | None:
55
+ """Gets the user cache dir if it is set and is a valid path."""
56
+ user_cache_location = os.getenv("SEMBLE_CACHE_LOCATION")
57
+ if user_cache_location is None:
58
+ return None
59
+ user_cache_dir = Path(user_cache_location)
60
+ if not user_cache_dir.is_absolute():
61
+ logger.warning("SEMBLE_CACHE_LOCATION is not an absolute path: %s", user_cache_location)
62
+ return None
63
+
64
+ return user_cache_dir
65
+
66
+
51
67
  def resolve_cache_folder() -> Path:
52
68
  """Resolves a cache folder, respects SEMBLE_CACHE_LOCATION (highest precedence), XDG_CACHE_HOME."""
53
69
  name = "semble"
54
- if semble_cache_location := os.getenv("SEMBLE_CACHE_LOCATION"):
55
- cache_dir = Path(semble_cache_location)
70
+ if user_cache_dir := _get_valid_user_cache_dir():
71
+ cache_dir = user_cache_dir
56
72
  elif sys.platform == "win32":
57
73
  cache_dir = _windows_cache_dir(name)
58
74
  elif sys.platform == "darwin":
@@ -1,19 +1,26 @@
1
1
  import argparse
2
2
  import asyncio
3
3
  import json
4
+ import re
4
5
  import sys
5
6
  import warnings
6
7
  from importlib.util import find_spec
8
+ from shutil import rmtree
9
+ from typing import Literal
7
10
 
8
11
  from model2vec.utils import get_package_extras
9
12
 
10
- from semble.cache import find_index_from_cache_folder
13
+ from semble.cache import find_index_from_cache_folder, resolve_cache_folder
11
14
  from semble.index import SembleIndex
15
+ from semble.index.types import PersistencePath
12
16
  from semble.stats import format_savings_report
13
17
  from semble.types import ContentType
14
18
  from semble.utils import format_results, is_git_url, resolve_chunk
15
19
 
16
- _CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help"})
20
+ _CLI_DISPATCH_ARGS = frozenset({"search", "find-related", "install", "uninstall", "savings", "-h", "--help", "clear"})
21
+ _CLEAR_CHOICE = Literal["all", "index", "savings"]
22
+
23
+ _SHA_256_REGEX = re.compile(r"^[a-f0-9]{64}$")
17
24
 
18
25
 
19
26
  def _build_index(path: str, content: list[ContentType]) -> SembleIndex:
@@ -131,6 +138,35 @@ def _run_find_related(path: str, file_path: str, line: int, top_k: int, content:
131
138
  _maybe_save_index(index, path)
132
139
 
133
140
 
141
+ def _run_clear(clear_type: _CLEAR_CHOICE) -> None:
142
+ """Run the `clear` subcommand."""
143
+ cache_folder = resolve_cache_folder()
144
+ if clear_type == "index" or clear_type == "all":
145
+ indexes = []
146
+ for path in cache_folder.glob("*/index"):
147
+ if not _SHA_256_REGEX.match(path.parent.name):
148
+ continue
149
+ if PersistencePath.from_path(path).non_existing():
150
+ continue
151
+ indexes.append(path)
152
+
153
+ if not indexes:
154
+ print(f"No indexes found to clear in `{cache_folder}`")
155
+ else:
156
+ for path in indexes:
157
+ index_folder = path.parent
158
+ rmtree(index_folder)
159
+ print(f"Cleared index at `{index_folder}`")
160
+
161
+ if clear_type == "savings" or clear_type == "all":
162
+ path = cache_folder / "savings.jsonl"
163
+ if not path.exists():
164
+ print(f"No savings file found at `{path}`")
165
+ else:
166
+ path.unlink()
167
+ print(f"Cleared savings at `{path}`")
168
+
169
+
134
170
  def _cli_main() -> None:
135
171
  parser = argparse.ArgumentParser(prog="semble")
136
172
  sub = parser.add_subparsers(dest="command")
@@ -141,6 +177,9 @@ def _cli_main() -> None:
141
177
  search_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
142
178
  _add_content_args(search_p)
143
179
 
180
+ clear_p = sub.add_parser("clear", help="Clear the index cache.")
181
+ clear_p.add_argument("type", choices=["all", "index", "savings"], help="Type of cache to clear.")
182
+
144
183
  related_p = sub.add_parser("find-related", help="Find code similar to a specific location.")
145
184
  related_p.add_argument("file_path", help="File path as shown in search results.")
146
185
  related_p.add_argument("line", type=int, help="Line number (1-indexed).")
@@ -148,8 +187,7 @@ def _cli_main() -> None:
148
187
  related_p.add_argument("-k", "--top-k", type=int, default=5, help="Number of results (default: 5).")
149
188
  _add_content_args(related_p)
150
189
 
151
- savings_p = sub.add_parser("savings", help="Show token savings and usage stats.")
152
- savings_p.add_argument("--verbose", action="store_true", help="Also show usage breakdown by call type.")
190
+ sub.add_parser("savings", help="Show token savings and usage stats.")
153
191
 
154
192
  sub.add_parser("install", help="Interactively configure semble across coding agents.")
155
193
  sub.add_parser("uninstall", help="Interactively remove semble configuration from coding agents.")
@@ -157,11 +195,13 @@ def _cli_main() -> None:
157
195
  args = parser.parse_args()
158
196
 
159
197
  if args.command == "savings":
160
- print(format_savings_report(verbose=args.verbose))
198
+ print(format_savings_report())
161
199
  elif args.command in ("install", "uninstall"):
162
200
  from semble.installer import run
163
201
 
164
202
  run(args.command)
203
+ elif args.command == "clear":
204
+ _run_clear(args.type)
165
205
  elif args.command == "search":
166
206
  _run_search(args.path, args.query, args.top_k, _resolve_content(args.content, args.include_text_files))
167
207
  elif args.command == "find-related":