codetool-explore 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. codetool_explore-0.5.0/.gitignore +19 -0
  2. codetool_explore-0.5.0/LICENSE +21 -0
  3. codetool_explore-0.5.0/PKG-INFO +240 -0
  4. codetool_explore-0.5.0/README.md +210 -0
  5. codetool_explore-0.5.0/benchmarks/benchmark_output_lengths.py +489 -0
  6. codetool_explore-0.5.0/benchmarks/benchmark_search.py +919 -0
  7. codetool_explore-0.5.0/hatch_build.py +151 -0
  8. codetool_explore-0.5.0/pyproject.toml +95 -0
  9. codetool_explore-0.5.0/rust/Cargo.lock +267 -0
  10. codetool_explore-0.5.0/rust/Cargo.toml +12 -0
  11. codetool_explore-0.5.0/rust/src/app.rs +53 -0
  12. codetool_explore-0.5.0/rust/src/case.rs +20 -0
  13. codetool_explore-0.5.0/rust/src/config.rs +223 -0
  14. codetool_explore-0.5.0/rust/src/constants.rs +30 -0
  15. codetool_explore-0.5.0/rust/src/file_search.rs +70 -0
  16. codetool_explore-0.5.0/rust/src/ignore_rules.rs +168 -0
  17. codetool_explore-0.5.0/rust/src/literal.rs +220 -0
  18. codetool_explore-0.5.0/rust/src/main.rs +30 -0
  19. codetool_explore-0.5.0/rust/src/matcher.rs +70 -0
  20. codetool_explore-0.5.0/rust/src/models.rs +50 -0
  21. codetool_explore-0.5.0/rust/src/output.rs +268 -0
  22. codetool_explore-0.5.0/rust/src/path_utils.rs +125 -0
  23. codetool_explore-0.5.0/rust/src/ranking.rs +533 -0
  24. codetool_explore-0.5.0/rust/src/regex_search.rs +150 -0
  25. codetool_explore-0.5.0/rust/src/search.rs +96 -0
  26. codetool_explore-0.5.0/rust/src/text.rs +164 -0
  27. codetool_explore-0.5.0/rust/src/walker.rs +871 -0
  28. codetool_explore-0.5.0/scripts/package_rust_binary.py +200 -0
  29. codetool_explore-0.5.0/scripts/update_readme_benchmarks.py +362 -0
  30. codetool_explore-0.5.0/src/codetool_explore/__init__.py +35 -0
  31. codetool_explore-0.5.0/src/codetool_explore/api.py +266 -0
  32. codetool_explore-0.5.0/src/codetool_explore/cli.py +188 -0
  33. codetool_explore-0.5.0/src/codetool_explore/compression.py +150 -0
  34. codetool_explore-0.5.0/src/codetool_explore/cursor.py +71 -0
  35. codetool_explore-0.5.0/src/codetool_explore/errors.py +23 -0
  36. codetool_explore-0.5.0/src/codetool_explore/explorer.py +497 -0
  37. codetool_explore-0.5.0/src/codetool_explore/ignore.py +222 -0
  38. codetool_explore-0.5.0/src/codetool_explore/py.typed +0 -0
  39. codetool_explore-0.5.0/src/codetool_explore/python_backend/__init__.py +154 -0
  40. codetool_explore-0.5.0/src/codetool_explore/python_backend/case.py +19 -0
  41. codetool_explore-0.5.0/src/codetool_explore/python_backend/config.py +35 -0
  42. codetool_explore-0.5.0/src/codetool_explore/python_backend/constants.py +39 -0
  43. codetool_explore-0.5.0/src/codetool_explore/python_backend/file_search.py +51 -0
  44. codetool_explore-0.5.0/src/codetool_explore/python_backend/ignore_rules.py +40 -0
  45. codetool_explore-0.5.0/src/codetool_explore/python_backend/literal.py +79 -0
  46. codetool_explore-0.5.0/src/codetool_explore/python_backend/matcher.py +79 -0
  47. codetool_explore-0.5.0/src/codetool_explore/python_backend/models.py +49 -0
  48. codetool_explore-0.5.0/src/codetool_explore/python_backend/output.py +82 -0
  49. codetool_explore-0.5.0/src/codetool_explore/python_backend/regex_search.py +63 -0
  50. codetool_explore-0.5.0/src/codetool_explore/python_backend/search.py +327 -0
  51. codetool_explore-0.5.0/src/codetool_explore/python_backend/text.py +39 -0
  52. codetool_explore-0.5.0/src/codetool_explore/python_backend/walker.py +119 -0
  53. codetool_explore-0.5.0/src/codetool_explore/ranking.py +384 -0
  54. codetool_explore-0.5.0/src/codetool_explore/roots.py +148 -0
  55. codetool_explore-0.5.0/src/codetool_explore/rust_backend.py +308 -0
  56. codetool_explore-0.5.0/src/codetool_explore/text_output.py +475 -0
  57. codetool_explore-0.5.0/tests/test___init__.py +25 -0
  58. codetool_explore-0.5.0/tests/test_api.py +823 -0
  59. codetool_explore-0.5.0/tests/test_cli.py +256 -0
  60. codetool_explore-0.5.0/tests/test_cursor.py +44 -0
  61. codetool_explore-0.5.0/tests/test_hatch_build.py +36 -0
  62. codetool_explore-0.5.0/tests/test_ignore.py +47 -0
  63. codetool_explore-0.5.0/tests/test_packaged_binary.py +112 -0
  64. codetool_explore-0.5.0/tests/test_python_backend.py +354 -0
  65. codetool_explore-0.5.0/tests/test_ranking.py +118 -0
  66. codetool_explore-0.5.0/tests/test_rust_backend.py +236 -0
  67. codetool_explore-0.5.0/tests/test_rust_cli.py +474 -0
  68. codetool_explore-0.5.0/uv.lock +368 -0
@@ -0,0 +1,19 @@
1
+ .venv/
2
+ __pycache__/
3
+ .pytest_cache/
4
+ *.py[cod]
5
+ rust/target/
6
+ benchmark-corpus/
7
+ benchmarks/corpus/
8
+ reports/*
9
+ !reports/research_lab_benchmark.json
10
+ !reports/research_lab_result.html
11
+ !reports/rtk_vs_codetool_output_length_analysis.html
12
+ !reports/rtk_vs_codetool_output_lengths.json
13
+ !reports/search_benchmark.json
14
+ !reports/search_benchmark_analysis.html
15
+ !reports/search_benchmark_rtk_compression.json
16
+ !reports/search_compression_analysis.html
17
+ research/
18
+ .tmp*/
19
+ .ruff_cache/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 drod
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,240 @@
1
+ Metadata-Version: 2.4
2
+ Name: codetool-explore
3
+ Version: 0.5.0
4
+ Summary: Fast, dependency-free workspace search, read, and list exploration for coding-agent tools with Rust backend
5
+ Project-URL: Homepage, https://github.com/pbi-agent/codetool-explore
6
+ Project-URL: Repository, https://github.com/pbi-agent/codetool-explore
7
+ Project-URL: Issues, https://github.com/pbi-agent/codetool-explore/issues
8
+ Project-URL: Changelog, https://github.com/pbi-agent/codetool-explore/releases
9
+ Author-email: drod <naceur.bs@gmail.com>
10
+ Maintainer-email: drod <naceur.bs@gmail.com>
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Keywords: agent,code-search,developer-tools,explore,file-search,filesystem,rust,search,text-search
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: Microsoft :: Windows
19
+ Classifier: Operating System :: POSIX :: Linux
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3 :: Only
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Rust
24
+ Classifier: Topic :: Software Development
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: Text Processing :: Indexing
27
+ Classifier: Typing :: Typed
28
+ Requires-Python: >=3.12
29
+ Description-Content-Type: text/markdown
30
+
31
+ # codetool-explore
32
+
33
+ `codetool-explore` is a workspace exploration library built for coding-agent harnesses: fast content search, fast filename/path discovery, read-only file viewing, compact structured results, and predictable token usage.
34
+
35
+ - **Agent-first API**: one public `explore()` call with `target="content"`, `"path"`, `"content_or_path"`, `"read"`, or `"list"`.
36
+ - **Performance-oriented**: dependency-free Python fallback plus optional Rust CLI acceleration for literal and regex content/path search.
37
+ - **Token-compressed output**: compact result keys by default for search, tree-compressed text by default for list, plain text by default for read, `result_format="text"` for raw RTK-style text, and `result_format="full"` for the uncompressed backend shape.
38
+
39
+ ```python
40
+ from codetool_explore import explore
41
+
42
+ content = explore("UserService", root=".", mode="files")
43
+ paths = explore("service", root=".", target="path", glob="*.py")
44
+ mixed = explore("UserService", root=".", target="content_or_path")
45
+ scoped = explore("search_workspace", root=["src", "webapp", "tests"], regex=False)
46
+ snippet = explore("README.md", root=".", target="read", start_line=20, limit=40)
47
+ listing = explore("src", root=".", target="list", limit=100)
48
+ ```
49
+
50
+ Patterns are regexes by default, so alternation works without extra flags:
51
+
52
+ ```python
53
+ explore("Maximum number of results|Text or regex pattern", root="tests")
54
+ ```
55
+
56
+ Pass `regex=False` for exact literal matching.
57
+
58
+ For maximum token compression, request raw text:
59
+
60
+ ```python
61
+ print(explore("UserService", root=".", regex=False, result_format="text"))
62
+ ```
63
+
64
+ Raw text omits backend/totals metadata, groups repeated path prefixes in a small
65
+ tree, crops long snippets/context aggressively, and prints `No Match` for empty
66
+ results. It includes a compact pagination header only when another page exists:
67
+
68
+ ```text
69
+ -- more: cursor=50
70
+ src/
71
+ a.py
72
+ ```
73
+
74
+ Raw mode grammar:
75
+
76
+ - `mode="files"`: matching filenames only.
77
+ - `mode="count"`: `path xN`, where `N` is the per-file count.
78
+ - `mode="snippets"`: `path:line:text` without context, or tree-grouped files
79
+ where `line:text` marks a match and other indented text is surrounding context.
80
+ With `target="content_or_path"`, path-only matches are returned as filename rows.
81
+
82
+ ## API
83
+
84
+ ```python
85
+ explore(
86
+ pattern,
87
+ root=".", # path, file, or non-empty list/tuple of paths
88
+ target="content", # "content", "path", "content_or_path", "read", or "list"
89
+ regex=True, # set False for literal search
90
+ path_scope="path", # "path" or "basename" for path matching
91
+ glob=None,
92
+ exclude=None,
93
+ case="smart",
94
+ mode="files", # "files", "snippets", or "count"
95
+ context_lines=0,
96
+ limit=50,
97
+ cursor=None,
98
+ start_line=1, # first line for target="read"
99
+ backend="auto", # "auto", "python", "rust"/"native"
100
+ result_format=None, # default compressed for search, text for read, tree text for list
101
+ )
102
+ ```
103
+
104
+ `target="content"` searches file contents. `target="path"` searches relative
105
+ file paths without opening file contents. `target="content_or_path"` returns
106
+ files matching either target and marks each row with its match kind.
107
+ `mode="snippets"` supports `target="content"` and `target="content_or_path"`;
108
+ path-only rows under `target="content_or_path"` are returned without
109
+ line/snippet fields.
110
+
111
+ `target="read"` treats `pattern` as one known file path, resolves relative paths
112
+ under a single `root`, and returns plain text with no line-number prefixes.
113
+ Use `start_line` and `limit` to cap the returned line range; if more lines
114
+ remain, text output starts with `-- more: cursor=N`. CSV files are read as
115
+ ordinary text. Binary-looking, missing, unreadable, or directory paths fail with
116
+ controlled `ExploreError` subclasses.
117
+
118
+ `target="list"` treats `pattern` as one file/directory path and returns one
119
+ directory level. Text output uses the same compact tree display as raw search
120
+ output when that saves tokens. Directories end with `/`; file paths are returned
121
+ as one entry. It honors `glob`, `exclude`, ignore files, `limit`, and `cursor`.
122
+ Read/list use the pure-Python stdlib implementation even when `backend="auto"`
123
+ or `"rust"` is requested.
124
+
125
+ `backend="auto"` uses the Rust helper when present, then falls back to pure Python. Regex searches use Rust when supported by its regex engine and fall back to Python for compatibility, including Python `re.finditer` counts for patterns that can match empty spans.
126
+
127
+ `root` accepts `str | os.PathLike | Sequence[str | os.PathLike]`. It may be a
128
+ workspace directory, a single file, or a non-empty list/tuple of directories and
129
+ files:
130
+
131
+ ```python
132
+ explore("search_workspace", root=["src", "webapp", "tests"], regex=False)
133
+ ```
134
+
135
+ When calling through JSON/tool schemas, pass multi-root values as a JSON array,
136
+ for example `"root": ["src", "webapp", "tests"]`. For resilience with coding
137
+ agents, a space-delimited string such as `"root": "src webapp tests"` is also
138
+ treated as multiple roots when that exact path does not exist and every split
139
+ token is an existing file or directory. Existing paths with spaces still take
140
+ priority; quote individual spaced paths if combining them in one string.
141
+
142
+ File roots search only that file and report paths relative to the file's parent
143
+ directory. Multi-root searches report paths relative to the roots' common base,
144
+ so sibling roots keep prefixes such as `src/...` and `tests/...`; this also lets
145
+ `exclude=["src/generated/**"]` target one root.
146
+
147
+ Controlled failures raise `ExploreError` subclasses:
148
+
149
+ - `ExploreArgumentError` for invalid arguments.
150
+ - `ExplorePatternError` for invalid/unsupported patterns.
151
+ - `ExploreRootError` for missing or unsearchable roots.
152
+ - `ExploreBackendError` for backend runtime failures.
153
+
154
+ ## CLI
155
+
156
+ ```bash
157
+ codetool-explore "UserService" . --literal --format text
158
+ codetool-explore "service" . --target path --literal
159
+ codetool-explore "User(Service|Repository)" --root src --mode snippets --raw
160
+ codetool-explore "search_workspace" --root src --root webapp --root tests --literal
161
+ codetool-explore --read README.md --start-line 20 --limit 40
162
+ codetool-explore --list src --glob "*.py"
163
+ ```
164
+
165
+ The CLI defaults to compact JSON for search, plain text for `--read`, and
166
+ tree-compressed text for `--list`.
167
+ Use `--format text` or `--raw` for raw search text; no search matches print
168
+ `No Match`. Repeat `--root` for multiple search roots; read/list accept a single
169
+ root only. A single quoted space-delimited `--root` is accepted as a compatibility
170
+ fallback when it can be split into existing roots.
171
+
172
+ ## Install
173
+
174
+ ```bash
175
+ uv install codetool-explore
176
+ ```
177
+
178
+ Wheels can include a platform-specific Rust helper. Without it, the package still works through the Python stdlib backend.
179
+
180
+ ## Benchmarks
181
+
182
+ Reproduce and refresh the generated README data:
183
+
184
+ ```bash
185
+ cargo build --release --manifest-path rust/Cargo.toml
186
+ uv run python benchmarks/benchmark_search.py \
187
+ --output reports/search_benchmark.json \
188
+ --update-readme
189
+ uv run python benchmarks/benchmark_output_lengths.py \
190
+ --output reports/rtk_vs_codetool_output_lengths.json
191
+ uv run python scripts/update_readme_benchmarks.py \
192
+ --performance reports/search_benchmark.json \
193
+ --tokens reports/rtk_vs_codetool_output_lengths.json
194
+ ```
195
+
196
+ <!-- benchmark-results:start -->
197
+
198
+ <!-- Generated by scripts/update_readme_benchmarks.py; do not edit by hand. -->
199
+
200
+ ### Execution performance
201
+
202
+ Mean of median wall-clock timings across 5 corpora × 7 scenarios, 5 measured rounds after 1 warmup.
203
+
204
+ | Tool | Mean median time | Chart |
205
+ | --- | ---: | --- |
206
+ | `codetool-explore` | 127.0 ms | ███████████░░░░░░░ |
207
+ | `rg` | 138.2 ms | ████████████░░░░░░ |
208
+ | `rtk` | 199.7 ms | ██████████████████ |
209
+
210
+ `codetool-explore` is the fastest tool in this run.
211
+
212
+ Source: `reports/search_benchmark.json`.
213
+
214
+ ### Token compression
215
+
216
+ Token counts use `tiktoken` when available. The table compares output across 7 RTK-corpus scenarios.
217
+
218
+ | Output | Tokens | Bytes | Chart |
219
+ | --- | ---: | ---: | --- |
220
+ | `explore(..., result_format="text")` | 11,008 | 34.3 KB | ██░░░░░░░░░░░░░░░░ |
221
+ | `rtk grep` stdout | 19,646 | 60.1 KB | ███░░░░░░░░░░░░░░░ |
222
+ | default `explore(...)` | 38,393 | 125.3 KB | █████░░░░░░░░░░░░░ |
223
+ | `explore(..., result_format="full")` | 39,027 | 134.7 KB | █████░░░░░░░░░░░░░ |
224
+ | `rg` stdout | 129,775 | 402.4 KB | ██████████████████ |
225
+
226
+ Default structured output is 7.03% smaller than the full structured shape. Raw text omits backend/totals metadata, includes only a cursor hint when truncated, and prints `No Match` for empty pages. Raw text is 0.56× the `rtk grep` token count in this run.
227
+
228
+ Source: `reports/rtk_vs_codetool_output_lengths.json`.
229
+
230
+ <!-- benchmark-results:end -->
231
+
232
+ ## Development
233
+
234
+ ```bash
235
+ uv run pytest
236
+ uv run python scripts/package_rust_binary.py
237
+ uv build --wheel
238
+ ```
239
+
240
+ Release wheels are built in CI with the staged Rust helper for each target platform.
@@ -0,0 +1,210 @@
1
+ # codetool-explore
2
+
3
+ `codetool-explore` is a workspace exploration library built for coding-agent harnesses: fast content search, fast filename/path discovery, read-only file viewing, compact structured results, and predictable token usage.
4
+
5
+ - **Agent-first API**: one public `explore()` call with `target="content"`, `"path"`, `"content_or_path"`, `"read"`, or `"list"`.
6
+ - **Performance-oriented**: dependency-free Python fallback plus optional Rust CLI acceleration for literal and regex content/path search.
7
+ - **Token-compressed output**: compact result keys by default for search, tree-compressed text by default for list, plain text by default for read, `result_format="text"` for raw RTK-style text, and `result_format="full"` for the uncompressed backend shape.
8
+
9
+ ```python
10
+ from codetool_explore import explore
11
+
12
+ content = explore("UserService", root=".", mode="files")
13
+ paths = explore("service", root=".", target="path", glob="*.py")
14
+ mixed = explore("UserService", root=".", target="content_or_path")
15
+ scoped = explore("search_workspace", root=["src", "webapp", "tests"], regex=False)
16
+ snippet = explore("README.md", root=".", target="read", start_line=20, limit=40)
17
+ listing = explore("src", root=".", target="list", limit=100)
18
+ ```
19
+
20
+ Patterns are regexes by default, so alternation works without extra flags:
21
+
22
+ ```python
23
+ explore("Maximum number of results|Text or regex pattern", root="tests")
24
+ ```
25
+
26
+ Pass `regex=False` for exact literal matching.
27
+
28
+ For maximum token compression, request raw text:
29
+
30
+ ```python
31
+ print(explore("UserService", root=".", regex=False, result_format="text"))
32
+ ```
33
+
34
+ Raw text omits backend/totals metadata, groups repeated path prefixes in a small
35
+ tree, crops long snippets/context aggressively, and prints `No Match` for empty
36
+ results. It includes a compact pagination header only when another page exists:
37
+
38
+ ```text
39
+ -- more: cursor=50
40
+ src/
41
+ a.py
42
+ ```
43
+
44
+ Raw mode grammar:
45
+
46
+ - `mode="files"`: matching filenames only.
47
+ - `mode="count"`: `path xN`, where `N` is the per-file count.
48
+ - `mode="snippets"`: `path:line:text` without context, or tree-grouped files
49
+ where `line:text` marks a match and other indented text is surrounding context.
50
+ With `target="content_or_path"`, path-only matches are returned as filename rows.
51
+
52
+ ## API
53
+
54
+ ```python
55
+ explore(
56
+ pattern,
57
+ root=".", # path, file, or non-empty list/tuple of paths
58
+ target="content", # "content", "path", "content_or_path", "read", or "list"
59
+ regex=True, # set False for literal search
60
+ path_scope="path", # "path" or "basename" for path matching
61
+ glob=None,
62
+ exclude=None,
63
+ case="smart",
64
+ mode="files", # "files", "snippets", or "count"
65
+ context_lines=0,
66
+ limit=50,
67
+ cursor=None,
68
+ start_line=1, # first line for target="read"
69
+ backend="auto", # "auto", "python", "rust"/"native"
70
+ result_format=None, # default compressed for search, text for read, tree text for list
71
+ )
72
+ ```
73
+
74
+ `target="content"` searches file contents. `target="path"` searches relative
75
+ file paths without opening file contents. `target="content_or_path"` returns
76
+ files matching either target and marks each row with its match kind.
77
+ `mode="snippets"` supports `target="content"` and `target="content_or_path"`;
78
+ path-only rows under `target="content_or_path"` are returned without
79
+ line/snippet fields.
80
+
81
+ `target="read"` treats `pattern` as one known file path, resolves relative paths
82
+ under a single `root`, and returns plain text with no line-number prefixes.
83
+ Use `start_line` and `limit` to cap the returned line range; if more lines
84
+ remain, text output starts with `-- more: cursor=N`. CSV files are read as
85
+ ordinary text. Binary-looking, missing, unreadable, or directory paths fail with
86
+ controlled `ExploreError` subclasses.
87
+
88
+ `target="list"` treats `pattern` as one file/directory path and returns one
89
+ directory level. Text output uses the same compact tree display as raw search
90
+ output when that saves tokens. Directories end with `/`; file paths are returned
91
+ as one entry. It honors `glob`, `exclude`, ignore files, `limit`, and `cursor`.
92
+ Read/list use the pure-Python stdlib implementation even when `backend="auto"`
93
+ or `"rust"` is requested.
94
+
95
+ `backend="auto"` uses the Rust helper when present, then falls back to pure Python. Regex searches use Rust when supported by its regex engine and fall back to Python for compatibility, including Python `re.finditer` counts for patterns that can match empty spans.
96
+
97
+ `root` accepts `str | os.PathLike | Sequence[str | os.PathLike]`. It may be a
98
+ workspace directory, a single file, or a non-empty list/tuple of directories and
99
+ files:
100
+
101
+ ```python
102
+ explore("search_workspace", root=["src", "webapp", "tests"], regex=False)
103
+ ```
104
+
105
+ When calling through JSON/tool schemas, pass multi-root values as a JSON array,
106
+ for example `"root": ["src", "webapp", "tests"]`. For resilience with coding
107
+ agents, a space-delimited string such as `"root": "src webapp tests"` is also
108
+ treated as multiple roots when that exact path does not exist and every split
109
+ token is an existing file or directory. Existing paths with spaces still take
110
+ priority; quote individual spaced paths if combining them in one string.
111
+
112
+ File roots search only that file and report paths relative to the file's parent
113
+ directory. Multi-root searches report paths relative to the roots' common base,
114
+ so sibling roots keep prefixes such as `src/...` and `tests/...`; this also lets
115
+ `exclude=["src/generated/**"]` target one root.
116
+
117
+ Controlled failures raise `ExploreError` subclasses:
118
+
119
+ - `ExploreArgumentError` for invalid arguments.
120
+ - `ExplorePatternError` for invalid/unsupported patterns.
121
+ - `ExploreRootError` for missing or unsearchable roots.
122
+ - `ExploreBackendError` for backend runtime failures.
123
+
124
+ ## CLI
125
+
126
+ ```bash
127
+ codetool-explore "UserService" . --literal --format text
128
+ codetool-explore "service" . --target path --literal
129
+ codetool-explore "User(Service|Repository)" --root src --mode snippets --raw
130
+ codetool-explore "search_workspace" --root src --root webapp --root tests --literal
131
+ codetool-explore --read README.md --start-line 20 --limit 40
132
+ codetool-explore --list src --glob "*.py"
133
+ ```
134
+
135
+ The CLI defaults to compact JSON for search, plain text for `--read`, and
136
+ tree-compressed text for `--list`.
137
+ Use `--format text` or `--raw` for raw search text; no search matches print
138
+ `No Match`. Repeat `--root` for multiple search roots; read/list accept a single
139
+ root only. A single quoted space-delimited `--root` is accepted as a compatibility
140
+ fallback when it can be split into existing roots.
141
+
142
+ ## Install
143
+
144
+ ```bash
145
+ uv install codetool-explore
146
+ ```
147
+
148
+ Wheels can include a platform-specific Rust helper. Without it, the package still works through the Python stdlib backend.
149
+
150
+ ## Benchmarks
151
+
152
+ Reproduce and refresh the generated README data:
153
+
154
+ ```bash
155
+ cargo build --release --manifest-path rust/Cargo.toml
156
+ uv run python benchmarks/benchmark_search.py \
157
+ --output reports/search_benchmark.json \
158
+ --update-readme
159
+ uv run python benchmarks/benchmark_output_lengths.py \
160
+ --output reports/rtk_vs_codetool_output_lengths.json
161
+ uv run python scripts/update_readme_benchmarks.py \
162
+ --performance reports/search_benchmark.json \
163
+ --tokens reports/rtk_vs_codetool_output_lengths.json
164
+ ```
165
+
166
+ <!-- benchmark-results:start -->
167
+
168
+ <!-- Generated by scripts/update_readme_benchmarks.py; do not edit by hand. -->
169
+
170
+ ### Execution performance
171
+
172
+ Mean of median wall-clock timings across 5 corpora × 7 scenarios, 5 measured rounds after 1 warmup.
173
+
174
+ | Tool | Mean median time | Chart |
175
+ | --- | ---: | --- |
176
+ | `codetool-explore` | 127.0 ms | ███████████░░░░░░░ |
177
+ | `rg` | 138.2 ms | ████████████░░░░░░ |
178
+ | `rtk` | 199.7 ms | ██████████████████ |
179
+
180
+ `codetool-explore` is the fastest tool in this run.
181
+
182
+ Source: `reports/search_benchmark.json`.
183
+
184
+ ### Token compression
185
+
186
+ Token counts use `tiktoken` when available. The table compares output across 7 RTK-corpus scenarios.
187
+
188
+ | Output | Tokens | Bytes | Chart |
189
+ | --- | ---: | ---: | --- |
190
+ | `explore(..., result_format="text")` | 11,008 | 34.3 KB | ██░░░░░░░░░░░░░░░░ |
191
+ | `rtk grep` stdout | 19,646 | 60.1 KB | ███░░░░░░░░░░░░░░░ |
192
+ | default `explore(...)` | 38,393 | 125.3 KB | █████░░░░░░░░░░░░░ |
193
+ | `explore(..., result_format="full")` | 39,027 | 134.7 KB | █████░░░░░░░░░░░░░ |
194
+ | `rg` stdout | 129,775 | 402.4 KB | ██████████████████ |
195
+
196
+ Default structured output is 7.03% smaller than the full structured shape. Raw text omits backend/totals metadata, includes only a cursor hint when truncated, and prints `No Match` for empty pages. Raw text is 0.56× the `rtk grep` token count in this run.
197
+
198
+ Source: `reports/rtk_vs_codetool_output_lengths.json`.
199
+
200
+ <!-- benchmark-results:end -->
201
+
202
+ ## Development
203
+
204
+ ```bash
205
+ uv run pytest
206
+ uv run python scripts/package_rust_binary.py
207
+ uv build --wheel
208
+ ```
209
+
210
+ Release wheels are built in CI with the staged Rust helper for each target platform.