codetool-explore 0.5.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. codetool_explore/__init__.py +35 -0
  2. codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe +0 -0
  3. codetool_explore/api.py +266 -0
  4. codetool_explore/cli.py +188 -0
  5. codetool_explore/compression.py +150 -0
  6. codetool_explore/cursor.py +71 -0
  7. codetool_explore/errors.py +23 -0
  8. codetool_explore/explorer.py +497 -0
  9. codetool_explore/ignore.py +222 -0
  10. codetool_explore/py.typed +0 -0
  11. codetool_explore/python_backend/__init__.py +154 -0
  12. codetool_explore/python_backend/case.py +19 -0
  13. codetool_explore/python_backend/config.py +35 -0
  14. codetool_explore/python_backend/constants.py +39 -0
  15. codetool_explore/python_backend/file_search.py +51 -0
  16. codetool_explore/python_backend/ignore_rules.py +40 -0
  17. codetool_explore/python_backend/literal.py +79 -0
  18. codetool_explore/python_backend/matcher.py +79 -0
  19. codetool_explore/python_backend/models.py +49 -0
  20. codetool_explore/python_backend/output.py +82 -0
  21. codetool_explore/python_backend/regex_search.py +63 -0
  22. codetool_explore/python_backend/search.py +327 -0
  23. codetool_explore/python_backend/text.py +39 -0
  24. codetool_explore/python_backend/walker.py +119 -0
  25. codetool_explore/ranking.py +384 -0
  26. codetool_explore/roots.py +148 -0
  27. codetool_explore/rust_backend.py +308 -0
  28. codetool_explore/text_output.py +475 -0
  29. codetool_explore-0.5.0.dist-info/METADATA +240 -0
  30. codetool_explore-0.5.0.dist-info/RECORD +33 -0
  31. codetool_explore-0.5.0.dist-info/WHEEL +4 -0
  32. codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
  33. codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,475 @@
1
+ """Token-compact plain-text rendering for search and exploration results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from collections import defaultdict
7
+ from collections.abc import Iterable
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+ NO_MATCH = "No Match"
12
+ MAX_TEXT_CHARS = 80
13
+ MAX_CONTEXT_CHARS = 48
14
+ SNIPPET_TREE_ONLY_MIN_MATCHES = 20
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class _Row:
19
+ path: str
20
+ suffix: str
21
+ match: dict[str, object]
22
+
23
+
24
+ def format_text_result(result: dict[str, object]) -> str:
25
+ """Return an rg/RTK-inspired compact text representation.
26
+
27
+ The renderer deliberately omits backend and totals metadata. It emits only
28
+ the current page of matches, factoring repeated path prefixes when that is
29
+ estimated to save tokens. A one-line pagination hint is included only when
30
+ another page exists.
31
+ """
32
+
33
+ target = str(result.get("target", "content"))
34
+ if target == "read":
35
+ return _format_read_text(result)
36
+ if target == "list":
37
+ return _format_list_text(result)
38
+
39
+ matches = [item for item in result.get("matches", []) if isinstance(item, dict)]
40
+ if not matches:
41
+ return NO_MATCH
42
+
43
+ mode = str(result.get("mode", "files"))
44
+ if mode == "snippets":
45
+ pattern = str(result.get("pattern", ""))
46
+ candidates = _snippet_candidates(
47
+ matches,
48
+ pattern=pattern,
49
+ regex=bool(result.get("regex", False)),
50
+ ignore_case=str(result.get("effective_case", "")) == "insensitive",
51
+ )
52
+ elif mode == "count":
53
+ rows = [_count_row(match) for match in matches]
54
+ candidates = [_format_flat(rows), _format_tree(rows)]
55
+ else:
56
+ rows = [_files_row(match) for match in matches]
57
+ candidates = [_format_flat(rows), _format_tree(rows)]
58
+
59
+ text = min(candidates, key=_score)
60
+ return _with_pagination_header(text, result)
61
+
62
+
63
+ def _with_pagination_header(text: str, result: dict[str, object]) -> str:
64
+ next_cursor = result.get("next_cursor")
65
+ if next_cursor in (None, ""):
66
+ return text
67
+ if not text:
68
+ return f"-- more: cursor={next_cursor}"
69
+ return f"-- more: cursor={next_cursor}\n{text}"
70
+
71
+
72
+ def _format_read_text(result: dict[str, object]) -> str:
73
+ markers: list[str] = []
74
+ next_cursor = result.get("next_cursor")
75
+ if next_cursor not in (None, ""):
76
+ markers.append(f"-- more: cursor={next_cursor}")
77
+ if result.get("content_truncated"):
78
+ markers.append("-- content omitted: output cap")
79
+
80
+ text = str(result.get("text", ""))
81
+ if text or int(result.get("returned", 0) or 0) > 0:
82
+ markers.append(text)
83
+ return "\n".join(markers)
84
+
85
+
86
+ def _format_list_text(result: dict[str, object]) -> str:
87
+ entries = [
88
+ item for item in result.get("entries", []) if isinstance(item, dict)
89
+ ]
90
+ rows = [_list_row(entry) for entry in entries]
91
+ candidates = [_format_flat(rows), _format_tree(rows)]
92
+ text = min(candidates, key=_score)
93
+ return _with_pagination_header(text, result)
94
+
95
+
96
+ def _score(text: str) -> tuple[int, int]:
97
+ """Cheap runtime proxy for token count without adding tokenizer deps.
98
+
99
+ Byte length correlated better than a regex token-ish count across the
100
+ benchmark corpus and avoids rescanning large outputs with a costly regex.
101
+ """
102
+
103
+ return (len(text.encode("utf-8")), len(text))
104
+
105
+
106
+ def _clean_text(
107
+ value: object,
108
+ *,
109
+ pattern: str = "",
110
+ regex: bool = False,
111
+ ignore_case: bool = False,
112
+ max_chars: int = MAX_TEXT_CHARS,
113
+ ) -> str:
114
+ text = str(value).replace("\r", " ").replace("\n", " ").strip()
115
+ return _crop_text(
116
+ text,
117
+ pattern=pattern,
118
+ regex=regex,
119
+ ignore_case=ignore_case,
120
+ max_chars=max_chars,
121
+ )
122
+
123
+
124
+ def _crop_text(
125
+ text: str,
126
+ *,
127
+ pattern: str = "",
128
+ regex: bool = False,
129
+ ignore_case: bool = False,
130
+ max_chars: int = MAX_TEXT_CHARS,
131
+ ) -> str:
132
+ if len(text) <= max_chars:
133
+ return text
134
+ if max_chars <= 1:
135
+ return text[:max_chars]
136
+ index = _match_index(
137
+ text,
138
+ pattern=pattern,
139
+ regex=regex,
140
+ ignore_case=ignore_case,
141
+ )
142
+ if index >= 0:
143
+ start = max(0, index - max_chars // 3)
144
+ end = min(len(text), start + max_chars)
145
+ if end == len(text):
146
+ start = max(0, end - max_chars)
147
+ snippet = text[start:end].strip()
148
+ if start > 0:
149
+ snippet = "…" + snippet
150
+ if end < len(text):
151
+ snippet += "…"
152
+ return snippet
153
+ return text[: max_chars - 1].rstrip() + "…"
154
+
155
+
156
+ def _match_index(
157
+ text: str, *, pattern: str = "", regex: bool = False, ignore_case: bool = False
158
+ ) -> int:
159
+ if not pattern:
160
+ return -1
161
+ if regex:
162
+ try:
163
+ flags = re.IGNORECASE if ignore_case else 0
164
+ match = re.search(pattern, text, flags)
165
+ except re.error:
166
+ match = None
167
+ if match is not None:
168
+ return match.start()
169
+ haystack = text.lower() if ignore_case else text
170
+ needle = pattern.lower() if ignore_case else pattern
171
+ return haystack.find(needle)
172
+
173
+
174
+ def _path(value: object) -> str:
175
+ path = str(value or "")
176
+ while path.startswith("./"):
177
+ path = path[2:]
178
+ return path.replace("\\", "/") or "."
179
+
180
+
181
+ def _int(value: object, default: int = 0) -> int:
182
+ try:
183
+ return int(value) # type: ignore[arg-type]
184
+ except (TypeError, ValueError):
185
+ return default
186
+
187
+
188
+ def _files_row(match: dict[str, object]) -> _Row:
189
+ return _Row(_path(match.get("path")), "", match)
190
+
191
+
192
+ def _count_row(match: dict[str, object]) -> _Row:
193
+ return _Row(_path(match.get("path")), f" x{_int(match.get('count'), 0)}", match)
194
+
195
+
196
+ def _list_row(entry: dict[str, object]) -> _Row:
197
+ path = _path(entry.get("path"))
198
+ if entry.get("kind") == "dir":
199
+ return _Row(path.rstrip("/") or ".", "/", entry)
200
+ return _Row(path, "", entry)
201
+
202
+
203
+ def _format_flat(rows: Iterable[_Row]) -> str:
204
+ return "\n".join(f"{row.path}{row.suffix}" for row in rows)
205
+
206
+
207
+ class _TreeNode:
208
+ def __init__(self) -> None:
209
+ self.children: dict[str, _TreeNode] = {}
210
+ self.rows: list[_Row] = []
211
+ self.first_index = 1_000_000_000
212
+
213
+
214
+ def _format_tree(rows: Iterable[_Row]) -> str:
215
+ root = _TreeNode()
216
+ for index, row in enumerate(rows):
217
+ node = root
218
+ node.first_index = min(node.first_index, index)
219
+ for part in row.path.split("/"):
220
+ node = node.children.setdefault(part, _TreeNode())
221
+ node.first_index = min(node.first_index, index)
222
+ node.rows.append(row)
223
+ lines: list[str] = []
224
+ _render_tree(root, lines, depth=0)
225
+ return "\n".join(lines)
226
+
227
+
228
+ def _ordered_children(node: _TreeNode) -> list[tuple[str, _TreeNode]]:
229
+ return sorted(node.children.items(), key=lambda item: item[1].first_index)
230
+
231
+
232
+ def _render_tree(node: _TreeNode, lines: list[str], *, depth: int) -> None:
233
+ indent = " " * depth
234
+ for name, child in _ordered_children(node):
235
+ if child.rows and not child.children:
236
+ for row in child.rows:
237
+ lines.append(f"{indent}{name}{row.suffix}")
238
+ else:
239
+ lines.append(f"{indent}{name}/")
240
+ _render_tree(child, lines, depth=depth + 1)
241
+
242
+
243
+ def _format_snippets_flat(
244
+ matches: list[dict[str, object]],
245
+ *,
246
+ pattern: str,
247
+ regex: bool,
248
+ ignore_case: bool,
249
+ ) -> str:
250
+ lines: list[str] = []
251
+ for match in matches:
252
+ path = _path(match.get("path"))
253
+ raw_context = match.get("context")
254
+ if isinstance(raw_context, list) and raw_context:
255
+ context = _context_entries(
256
+ [match],
257
+ pattern=pattern,
258
+ regex=regex,
259
+ ignore_case=ignore_case,
260
+ )
261
+ lines.append(path)
262
+ lines.extend(f" {entry}" for entry in context)
263
+ else:
264
+ if "snippet" not in match and "line" not in match:
265
+ lines.append(path)
266
+ continue
267
+ line = _int(match.get("line"), 0)
268
+ snippet = _clean_text(
269
+ match.get("snippet", ""),
270
+ pattern=pattern,
271
+ regex=regex,
272
+ ignore_case=ignore_case,
273
+ )
274
+ lines.append(f"{path}:{line}:{snippet}")
275
+ return "\n".join(lines)
276
+
277
+
278
+ def _snippet_candidates(
279
+ matches: list[dict[str, object]],
280
+ *,
281
+ pattern: str,
282
+ regex: bool,
283
+ ignore_case: bool,
284
+ ) -> list[str]:
285
+ if _snippet_tree_likely_wins(matches):
286
+ return [
287
+ _format_snippets_tree(
288
+ matches,
289
+ pattern=pattern,
290
+ regex=regex,
291
+ ignore_case=ignore_case,
292
+ )
293
+ ]
294
+ return [
295
+ _format_snippets_flat(
296
+ matches,
297
+ pattern=pattern,
298
+ regex=regex,
299
+ ignore_case=ignore_case,
300
+ ),
301
+ _format_snippets_tree(
302
+ matches,
303
+ pattern=pattern,
304
+ regex=regex,
305
+ ignore_case=ignore_case,
306
+ ),
307
+ ]
308
+
309
+
310
+ def _snippet_tree_likely_wins(matches: list[dict[str, object]]) -> bool:
311
+ """Return true when flat snippet rendering would mostly duplicate paths."""
312
+
313
+ if len(matches) < SNIPPET_TREE_ONLY_MIN_MATCHES:
314
+ return False
315
+ paths: list[str] = []
316
+ unique_paths: list[str] = []
317
+ seen: set[str] = set()
318
+ for match in matches:
319
+ path = _path(match.get("path"))
320
+ paths.append(path)
321
+ if path not in seen:
322
+ seen.add(path)
323
+ unique_paths.append(path)
324
+ if len(paths) <= 1:
325
+ return False
326
+ flat_path_bytes = sum(len(path.encode("utf-8")) + 1 for path in paths)
327
+ tree_path_text = _format_tree(_Row(path, "", {}) for path in unique_paths)
328
+ return len(tree_path_text.encode("utf-8")) <= flat_path_bytes
329
+
330
+
331
+ def _format_snippets_tree(
332
+ matches: list[dict[str, object]],
333
+ *,
334
+ pattern: str,
335
+ regex: bool,
336
+ ignore_case: bool,
337
+ ) -> str:
338
+ grouped: dict[str, list[dict[str, object]]] = defaultdict(list)
339
+ order: list[str] = []
340
+ for match in matches:
341
+ path = _path(match.get("path"))
342
+ if path not in grouped:
343
+ order.append(path)
344
+ grouped[path].append(match)
345
+
346
+ rows = [_Row(path, "", {"snippet_matches": grouped[path]}) for path in order]
347
+ root = _TreeNode()
348
+ for index, row in enumerate(rows):
349
+ node = root
350
+ node.first_index = min(node.first_index, index)
351
+ for part in row.path.split("/"):
352
+ node = node.children.setdefault(part, _TreeNode())
353
+ node.first_index = min(node.first_index, index)
354
+ node.rows.append(row)
355
+
356
+ lines: list[str] = []
357
+ _render_snippet_tree(
358
+ root,
359
+ lines,
360
+ depth=0,
361
+ pattern=pattern,
362
+ regex=regex,
363
+ ignore_case=ignore_case,
364
+ )
365
+ return "\n".join(lines)
366
+
367
+
368
+ def _render_snippet_tree(
369
+ node: _TreeNode,
370
+ lines: list[str],
371
+ *,
372
+ depth: int,
373
+ pattern: str,
374
+ regex: bool,
375
+ ignore_case: bool,
376
+ ) -> None:
377
+ indent = " " * depth
378
+ for name, child in _ordered_children(node):
379
+ if child.rows and not child.children:
380
+ for row in child.rows:
381
+ lines.append(f"{indent}{name}")
382
+ matches = row.match.get("snippet_matches", [])
383
+ if isinstance(matches, list):
384
+ for entry in _context_entries(
385
+ matches,
386
+ pattern=pattern,
387
+ regex=regex,
388
+ ignore_case=ignore_case,
389
+ ):
390
+ lines.append(f"{indent} {entry}")
391
+ else:
392
+ lines.append(f"{indent}{name}/")
393
+ _render_snippet_tree(
394
+ child,
395
+ lines,
396
+ depth=depth + 1,
397
+ pattern=pattern,
398
+ regex=regex,
399
+ ignore_case=ignore_case,
400
+ )
401
+
402
+
403
+ def _context_entries(
404
+ matches: list[Any],
405
+ *,
406
+ pattern: str = "",
407
+ regex: bool = False,
408
+ ignore_case: bool = False,
409
+ ) -> list[str]:
410
+ """Merge snippet rows and context into compact per-line entries.
411
+
412
+ Match lines use ``line:text``; surrounding context is emitted as plain text.
413
+ Duplicate context lines from adjacent snippets are emitted once.
414
+ """
415
+
416
+ by_line: dict[int, tuple[str, bool]] = {}
417
+ insertion_order: list[int] = []
418
+ for item in matches:
419
+ if not isinstance(item, dict):
420
+ continue
421
+ match_line = _int(item.get("line"), 0)
422
+ context = item.get("context")
423
+ if isinstance(context, list) and context:
424
+ for raw_ctx in context:
425
+ if not isinstance(raw_ctx, dict):
426
+ continue
427
+ line = _int(raw_ctx.get("line"), 0)
428
+ if line <= 0:
429
+ continue
430
+ is_match = line == match_line
431
+ text = _clean_text(
432
+ item.get("snippet", "")
433
+ if is_match
434
+ else raw_ctx.get("text", ""),
435
+ pattern=pattern if is_match else "",
436
+ regex=regex if is_match else False,
437
+ ignore_case=ignore_case if is_match else False,
438
+ max_chars=MAX_TEXT_CHARS if is_match else MAX_CONTEXT_CHARS,
439
+ )
440
+ if line not in by_line:
441
+ insertion_order.append(line)
442
+ by_line[line] = (text, is_match)
443
+ else:
444
+ previous_text, previous_match = by_line[line]
445
+ by_line[line] = (
446
+ text if is_match else previous_text,
447
+ previous_match or is_match,
448
+ )
449
+ if match_line > 0:
450
+ if match_line not in by_line:
451
+ insertion_order.append(match_line)
452
+ by_line[match_line] = (
453
+ _clean_text(
454
+ item.get("snippet", ""),
455
+ pattern=pattern,
456
+ regex=regex,
457
+ ignore_case=ignore_case,
458
+ ),
459
+ True,
460
+ )
461
+
462
+ if not by_line:
463
+ return []
464
+ ordered_lines = sorted(insertion_order)
465
+ entries: list[str] = []
466
+ seen: set[int] = set()
467
+ for line in ordered_lines:
468
+ if line in seen:
469
+ continue
470
+ seen.add(line)
471
+ text, is_match = by_line[line]
472
+ if not is_match and text == "":
473
+ continue
474
+ entries.append(f"{line}:{text}" if is_match else text)
475
+ return entries