agentgrep 0.1.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentgrep/__init__.py +3151 -0
- agentgrep/__main__.py +7 -0
- agentgrep/mcp.py +619 -0
- agentgrep-0.1.0a0.dist-info/METADATA +25 -0
- agentgrep-0.1.0a0.dist-info/RECORD +8 -0
- agentgrep-0.1.0a0.dist-info/WHEEL +4 -0
- agentgrep-0.1.0a0.dist-info/entry_points.txt +3 -0
- agentgrep-0.1.0a0.dist-info/licenses/LICENSE +21 -0
agentgrep/__init__.py
ADDED
|
@@ -0,0 +1,3151 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.14"
|
|
4
|
+
# dependencies = ["pydantic>=2.11.3", "textual>=3.2.0"]
|
|
5
|
+
# ///
|
|
6
|
+
"""Search local AI agent prompts and history without mutating agent stores.
|
|
7
|
+
|
|
8
|
+
The tool discovers known read-only stores under ``~/.codex``, ``~/.claude``,
|
|
9
|
+
``~/.cursor``, and Cursor's official IDE storage locations, then normalizes
|
|
10
|
+
results through named adapters.
|
|
11
|
+
|
|
12
|
+
Examples
|
|
13
|
+
--------
|
|
14
|
+
List prompts containing both ``serenity`` and ``bliss``:
|
|
15
|
+
|
|
16
|
+
>>> query = SearchQuery(
|
|
17
|
+
... terms=("serenity", "bliss"),
|
|
18
|
+
... search_type="prompts",
|
|
19
|
+
... any_term=False,
|
|
20
|
+
... regex=False,
|
|
21
|
+
... case_sensitive=False,
|
|
22
|
+
... agents=("codex",),
|
|
23
|
+
... limit=None,
|
|
24
|
+
... )
|
|
25
|
+
>>> matches_text("A serenity prompt with bliss inside.", query)
|
|
26
|
+
True
|
|
27
|
+
>>> matches_text("Only serenity appears here.", query)
|
|
28
|
+
False
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import argparse
|
|
34
|
+
import contextlib
|
|
35
|
+
import dataclasses
|
|
36
|
+
import importlib
|
|
37
|
+
import itertools
|
|
38
|
+
import json
|
|
39
|
+
import os
|
|
40
|
+
import pathlib
|
|
41
|
+
import re
|
|
42
|
+
import select
|
|
43
|
+
import shutil
|
|
44
|
+
import signal
|
|
45
|
+
import sqlite3
|
|
46
|
+
import subprocess
|
|
47
|
+
import sys
|
|
48
|
+
import textwrap
|
|
49
|
+
import threading
|
|
50
|
+
import time
|
|
51
|
+
import typing as t
|
|
52
|
+
|
|
53
|
+
if t.TYPE_CHECKING:
|
|
54
|
+
import collections.abc as cabc
|
|
55
|
+
|
|
56
|
+
PrivatePathBase = pathlib.Path
|
|
57
|
+
else:
|
|
58
|
+
PrivatePathBase = type(pathlib.Path())
|
|
59
|
+
|
|
60
|
+
AgentName = t.Literal["codex", "claude", "cursor"]
|
|
61
|
+
OutputMode = t.Literal["text", "json", "ndjson", "ui"]
|
|
62
|
+
ProgressMode = t.Literal["auto", "always", "never"]
|
|
63
|
+
PathKind = t.Literal["history_file", "session_file", "sqlite_db"]
|
|
64
|
+
SearchType = t.Literal["prompts", "history", "all"]
|
|
65
|
+
SourceKind = t.Literal["json", "jsonl", "sqlite"]
|
|
66
|
+
ColorMode = t.Literal["auto", "always", "never"]
|
|
67
|
+
type JSONScalar = str | int | float | bool | None
|
|
68
|
+
type JSONValue = JSONScalar | list[JSONValue] | dict[str, JSONValue]
|
|
69
|
+
type SummaryRow = tuple[object, object, object, object, object, object, object, object]
|
|
70
|
+
type KeyValueRow = tuple[object, object]
|
|
71
|
+
|
|
72
|
+
AGENT_CHOICES: tuple[AgentName, ...] = ("codex", "claude", "cursor")
|
|
73
|
+
JSON_FILE_SUFFIXES: frozenset[str] = frozenset({".json", ".jsonl"})
|
|
74
|
+
SCHEMA_VERSION: str = "agentgrep.v1"
|
|
75
|
+
USER_ROLES: frozenset[str] = frozenset({"human", "user"})
|
|
76
|
+
CURSOR_STATE_TOKENS: tuple[str, ...] = ("chat", "composer", "prompt", "history")
|
|
77
|
+
OFFICIAL_CURSOR_STATE_PATHS: tuple[pathlib.Path, ...] = (
|
|
78
|
+
pathlib.Path("~/.config/Cursor/User/globalStorage/state.vscdb").expanduser(),
|
|
79
|
+
pathlib.Path(
|
|
80
|
+
"~/Library/Application Support/Cursor/User/globalStorage/state.vscdb",
|
|
81
|
+
).expanduser(),
|
|
82
|
+
pathlib.Path("~/AppData/Roaming/Cursor/User/globalStorage/state.vscdb").expanduser(),
|
|
83
|
+
)
|
|
84
|
+
EnvelopeFactory = t.Callable[[str, dict[str, object], list[dict[str, object]]], dict[str, object]]
|
|
85
|
+
|
|
86
|
+
OPTIONS_EXPECTING_VALUE: frozenset[str] = frozenset(
|
|
87
|
+
{
|
|
88
|
+
"--agent",
|
|
89
|
+
"--type",
|
|
90
|
+
"--limit",
|
|
91
|
+
"--color",
|
|
92
|
+
"--progress",
|
|
93
|
+
},
|
|
94
|
+
)
|
|
95
|
+
OPTIONS_FLAG_ONLY: frozenset[str] = frozenset(
|
|
96
|
+
{
|
|
97
|
+
"-h",
|
|
98
|
+
"--help",
|
|
99
|
+
"--any",
|
|
100
|
+
"--regex",
|
|
101
|
+
"--case-sensitive",
|
|
102
|
+
"--json",
|
|
103
|
+
"--ndjson",
|
|
104
|
+
"--ui",
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def build_description(
|
|
110
|
+
intro: str,
|
|
111
|
+
example_blocks: cabc.Sequence[tuple[str | None, cabc.Sequence[str]]],
|
|
112
|
+
) -> str:
|
|
113
|
+
"""Assemble help text with example sections."""
|
|
114
|
+
sections: list[str] = []
|
|
115
|
+
intro_text = textwrap.dedent(intro).strip()
|
|
116
|
+
if intro_text:
|
|
117
|
+
sections.append(intro_text)
|
|
118
|
+
|
|
119
|
+
for heading, commands in example_blocks:
|
|
120
|
+
if not commands:
|
|
121
|
+
continue
|
|
122
|
+
title = "examples:" if heading is None else f"{heading} examples:"
|
|
123
|
+
lines = [title]
|
|
124
|
+
lines.extend(f" {command}" for command in commands)
|
|
125
|
+
sections.append("\n".join(lines))
|
|
126
|
+
|
|
127
|
+
return "\n\n".join(sections)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
CLI_DESCRIPTION = build_description(
|
|
131
|
+
"""
|
|
132
|
+
Read-only search across Codex, Claude, and Cursor local stores.
|
|
133
|
+
|
|
134
|
+
``search`` is the default subcommand. ``agentgrep bliss`` is
|
|
135
|
+
equivalent to ``agentgrep search bliss``.
|
|
136
|
+
""",
|
|
137
|
+
(
|
|
138
|
+
(
|
|
139
|
+
"quick",
|
|
140
|
+
(
|
|
141
|
+
"agentgrep bliss",
|
|
142
|
+
"agentgrep serene bliss --agent codex",
|
|
143
|
+
),
|
|
144
|
+
),
|
|
145
|
+
(
|
|
146
|
+
"search",
|
|
147
|
+
(
|
|
148
|
+
"agentgrep search bliss",
|
|
149
|
+
"agentgrep search serene bliss --agent codex",
|
|
150
|
+
"agentgrep search prompt history --type history --ndjson",
|
|
151
|
+
"agentgrep search design --ui",
|
|
152
|
+
),
|
|
153
|
+
),
|
|
154
|
+
(
|
|
155
|
+
"find",
|
|
156
|
+
(
|
|
157
|
+
"agentgrep find codex",
|
|
158
|
+
"agentgrep find sessions --agent codex",
|
|
159
|
+
"agentgrep find cursor --json",
|
|
160
|
+
),
|
|
161
|
+
),
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
SEARCH_DESCRIPTION = build_description(
|
|
165
|
+
"""
|
|
166
|
+
Search normalized prompts or history across supported agent stores.
|
|
167
|
+
""",
|
|
168
|
+
(
|
|
169
|
+
(
|
|
170
|
+
None,
|
|
171
|
+
(
|
|
172
|
+
"agentgrep search bliss",
|
|
173
|
+
"agentgrep search serene bliss --agent codex",
|
|
174
|
+
"agentgrep search prompt history --type history --ndjson",
|
|
175
|
+
"agentgrep search serenity --json",
|
|
176
|
+
"agentgrep search design --ui",
|
|
177
|
+
),
|
|
178
|
+
),
|
|
179
|
+
),
|
|
180
|
+
)
|
|
181
|
+
FIND_DESCRIPTION = build_description(
|
|
182
|
+
"""
|
|
183
|
+
Find known prompt, history, and store paths without parsing message text.
|
|
184
|
+
""",
|
|
185
|
+
(
|
|
186
|
+
(
|
|
187
|
+
None,
|
|
188
|
+
(
|
|
189
|
+
"agentgrep find codex",
|
|
190
|
+
"agentgrep find sessions --agent codex",
|
|
191
|
+
"agentgrep find cursor --json",
|
|
192
|
+
),
|
|
193
|
+
),
|
|
194
|
+
),
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class PrivatePath(PrivatePathBase):
|
|
199
|
+
"""Path subclass that hides the user's home directory in textual output."""
|
|
200
|
+
|
|
201
|
+
def __new__(cls, *args: t.Any, **kwargs: t.Any) -> PrivatePath:
|
|
202
|
+
"""Create a privacy-aware path."""
|
|
203
|
+
return super().__new__(cls, *args, **kwargs)
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def _collapse_home(cls, value: str) -> str:
|
|
207
|
+
"""Collapse the user's home directory to ``~`` when ``value`` is inside it."""
|
|
208
|
+
if value.startswith("~"):
|
|
209
|
+
return value
|
|
210
|
+
|
|
211
|
+
home = str(pathlib.Path.home())
|
|
212
|
+
if value == home:
|
|
213
|
+
return "~"
|
|
214
|
+
|
|
215
|
+
separators = {os.sep}
|
|
216
|
+
if os.altsep:
|
|
217
|
+
separators.add(os.altsep)
|
|
218
|
+
|
|
219
|
+
for separator in separators:
|
|
220
|
+
home_with_separator = home + separator
|
|
221
|
+
if value.startswith(home_with_separator):
|
|
222
|
+
return "~" + value[len(home) :]
|
|
223
|
+
|
|
224
|
+
return value
|
|
225
|
+
|
|
226
|
+
def __str__(self) -> str:
|
|
227
|
+
"""Return string output with the home directory collapsed."""
|
|
228
|
+
return self._collapse_home(pathlib.Path.__str__(self))
|
|
229
|
+
|
|
230
|
+
def __repr__(self) -> str:
|
|
231
|
+
"""Return repr output with the home directory collapsed."""
|
|
232
|
+
return f"{self.__class__.__name__}({str(self)!r})"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def format_display_path(path: pathlib.Path | str, *, directory: bool = False) -> str:
|
|
236
|
+
"""Return a privacy-safe display path."""
|
|
237
|
+
display = str(PrivatePath(path))
|
|
238
|
+
if directory and not display.endswith("/"):
|
|
239
|
+
return f"{display.rstrip('/')}/"
|
|
240
|
+
return display
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class SearchRecordPayload(t.TypedDict):
|
|
244
|
+
"""JSON payload for search records."""
|
|
245
|
+
|
|
246
|
+
schema_version: str
|
|
247
|
+
kind: t.Literal["prompt", "history"]
|
|
248
|
+
agent: AgentName
|
|
249
|
+
store: str
|
|
250
|
+
adapter_id: str
|
|
251
|
+
path: str
|
|
252
|
+
text: str
|
|
253
|
+
title: str | None
|
|
254
|
+
role: str | None
|
|
255
|
+
timestamp: str | None
|
|
256
|
+
model: str | None
|
|
257
|
+
session_id: str | None
|
|
258
|
+
conversation_id: str | None
|
|
259
|
+
metadata: dict[str, object]
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class FindRecordPayload(t.TypedDict):
|
|
263
|
+
"""JSON payload for find records."""
|
|
264
|
+
|
|
265
|
+
schema_version: str
|
|
266
|
+
kind: t.Literal["find"]
|
|
267
|
+
agent: AgentName
|
|
268
|
+
store: str
|
|
269
|
+
adapter_id: str
|
|
270
|
+
path: str
|
|
271
|
+
path_kind: PathKind
|
|
272
|
+
metadata: dict[str, object]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class SourceHandlePayload(t.TypedDict):
|
|
276
|
+
"""JSON payload for discovered sources."""
|
|
277
|
+
|
|
278
|
+
schema_version: str
|
|
279
|
+
agent: AgentName
|
|
280
|
+
store: str
|
|
281
|
+
adapter_id: str
|
|
282
|
+
path: str
|
|
283
|
+
path_kind: PathKind
|
|
284
|
+
source_kind: SourceKind
|
|
285
|
+
search_root: str | None
|
|
286
|
+
mtime_ns: int
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class EnvelopePayload(t.TypedDict):
|
|
290
|
+
"""JSON payload for top-level envelopes."""
|
|
291
|
+
|
|
292
|
+
schema_version: str
|
|
293
|
+
command: str
|
|
294
|
+
query: dict[str, object]
|
|
295
|
+
results: list[dict[str, object]]
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class PydanticTypeAdapter(t.Protocol):
|
|
299
|
+
"""Minimal TypeAdapter surface used by ``agentgrep``."""
|
|
300
|
+
|
|
301
|
+
def validate_python(self, value: object, /) -> object:
|
|
302
|
+
"""Validate a Python object."""
|
|
303
|
+
...
|
|
304
|
+
|
|
305
|
+
def dump_python(self, value: object, /, *, mode: str = "python") -> object:
|
|
306
|
+
"""Dump a Python object."""
|
|
307
|
+
...
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class PydanticTypeAdapterFactory(t.Protocol):
|
|
311
|
+
"""Factory for creating TypeAdapters."""
|
|
312
|
+
|
|
313
|
+
def __call__(self, value_type: object, /) -> PydanticTypeAdapter:
|
|
314
|
+
"""Create a TypeAdapter."""
|
|
315
|
+
...
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class PydanticModule(t.Protocol):
|
|
319
|
+
"""Minimal Pydantic module surface used at runtime."""
|
|
320
|
+
|
|
321
|
+
TypeAdapter: PydanticTypeAdapterFactory
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class HelpTheme(t.Protocol):
|
|
325
|
+
"""Minimal argparse help theme surface."""
|
|
326
|
+
|
|
327
|
+
heading: str
|
|
328
|
+
reset: str
|
|
329
|
+
label: str
|
|
330
|
+
long_option: str
|
|
331
|
+
short_option: str
|
|
332
|
+
prog: str
|
|
333
|
+
action: str
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class AnsiHelpTheme(t.NamedTuple):
|
|
337
|
+
"""ANSI theme values for syntax-colored help examples."""
|
|
338
|
+
|
|
339
|
+
heading: str
|
|
340
|
+
reset: str
|
|
341
|
+
label: str
|
|
342
|
+
long_option: str
|
|
343
|
+
short_option: str
|
|
344
|
+
prog: str
|
|
345
|
+
action: str
|
|
346
|
+
|
|
347
|
+
@classmethod
|
|
348
|
+
def default(cls) -> AnsiHelpTheme:
|
|
349
|
+
"""Return the default help theme."""
|
|
350
|
+
return cls(
|
|
351
|
+
heading="\x1b[1;36m",
|
|
352
|
+
reset="\x1b[0m",
|
|
353
|
+
label="\x1b[33m",
|
|
354
|
+
long_option="\x1b[32m",
|
|
355
|
+
short_option="\x1b[32m",
|
|
356
|
+
prog="\x1b[1;35m",
|
|
357
|
+
action="\x1b[36m",
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
@dataclasses.dataclass(frozen=True, slots=True)
|
|
362
|
+
class AnsiColors:
|
|
363
|
+
"""Semantic ANSI colors for terminal status output."""
|
|
364
|
+
|
|
365
|
+
enabled: bool
|
|
366
|
+
|
|
367
|
+
SUCCESS: t.ClassVar[str] = "\x1b[32m"
|
|
368
|
+
WARNING: t.ClassVar[str] = "\x1b[33m"
|
|
369
|
+
ERROR: t.ClassVar[str] = "\x1b[31m"
|
|
370
|
+
INFO: t.ClassVar[str] = "\x1b[36m"
|
|
371
|
+
HEADING: t.ClassVar[str] = "\x1b[1;36m"
|
|
372
|
+
HIGHLIGHT: t.ClassVar[str] = "\x1b[35m"
|
|
373
|
+
MUTED: t.ClassVar[str] = "\x1b[34m"
|
|
374
|
+
WHITE: t.ClassVar[str] = "\x1b[37m"
|
|
375
|
+
RESET: t.ClassVar[str] = "\x1b[0m"
|
|
376
|
+
|
|
377
|
+
@classmethod
|
|
378
|
+
def for_stream(cls, color_mode: ColorMode, stream: t.TextIO) -> AnsiColors:
|
|
379
|
+
"""Build semantic colors for ``stream`` and ``color_mode``."""
|
|
380
|
+
return cls(enabled=should_enable_color(color_mode, stream))
|
|
381
|
+
|
|
382
|
+
def colorize(self, text: str, color: str) -> str:
|
|
383
|
+
"""Apply ``color`` to ``text`` when colors are enabled."""
|
|
384
|
+
if not self.enabled:
|
|
385
|
+
return text
|
|
386
|
+
return f"{color}{text}{self.RESET}"
|
|
387
|
+
|
|
388
|
+
def success(self, text: str) -> str:
|
|
389
|
+
"""Format text as success."""
|
|
390
|
+
return self.colorize(text, self.SUCCESS)
|
|
391
|
+
|
|
392
|
+
def warning(self, text: str) -> str:
|
|
393
|
+
"""Format text as warning."""
|
|
394
|
+
return self.colorize(text, self.WARNING)
|
|
395
|
+
|
|
396
|
+
def error(self, text: str) -> str:
|
|
397
|
+
"""Format text as error."""
|
|
398
|
+
return self.colorize(text, self.ERROR)
|
|
399
|
+
|
|
400
|
+
def info(self, text: str) -> str:
|
|
401
|
+
"""Format text as informational."""
|
|
402
|
+
return self.colorize(text, self.INFO)
|
|
403
|
+
|
|
404
|
+
def heading(self, text: str) -> str:
|
|
405
|
+
"""Format text as a status heading."""
|
|
406
|
+
return self.colorize(text, self.HEADING)
|
|
407
|
+
|
|
408
|
+
def highlight(self, text: str) -> str:
|
|
409
|
+
"""Format text as highlighted."""
|
|
410
|
+
return self.colorize(text, self.HIGHLIGHT)
|
|
411
|
+
|
|
412
|
+
def muted(self, text: str) -> str:
|
|
413
|
+
"""Format text as muted."""
|
|
414
|
+
return self.colorize(text, self.MUTED)
|
|
415
|
+
|
|
416
|
+
def white(self, text: str) -> str:
|
|
417
|
+
"""Format text as plain white."""
|
|
418
|
+
return self.colorize(text, self.WHITE)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def should_enable_color(color_mode: ColorMode, stream: t.TextIO) -> bool:
|
|
422
|
+
"""Return whether output written to ``stream`` should use colors."""
|
|
423
|
+
if os.environ.get("NO_COLOR"):
|
|
424
|
+
return False
|
|
425
|
+
if color_mode == "never":
|
|
426
|
+
return False
|
|
427
|
+
if color_mode == "always":
|
|
428
|
+
return True
|
|
429
|
+
if os.environ.get("FORCE_COLOR"):
|
|
430
|
+
return True
|
|
431
|
+
return bool(getattr(stream, "isatty", lambda: False)())
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def should_enable_help_color(color_mode: ColorMode) -> bool:
|
|
435
|
+
"""Return whether help output should use colors."""
|
|
436
|
+
return should_enable_color(color_mode, sys.stdout)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def create_themed_formatter(color_mode: ColorMode) -> type[AgentGrepHelpFormatter]:
|
|
440
|
+
"""Create a formatter class with a bound theme."""
|
|
441
|
+
theme = AnsiHelpTheme.default() if should_enable_help_color(color_mode) else None
|
|
442
|
+
|
|
443
|
+
class ThemedAgentGrepHelpFormatter(AgentGrepHelpFormatter):
|
|
444
|
+
"""AgentGrepHelpFormatter with a configured theme."""
|
|
445
|
+
|
|
446
|
+
_theme: object | None
|
|
447
|
+
|
|
448
|
+
def __init__(
|
|
449
|
+
self,
|
|
450
|
+
prog: str,
|
|
451
|
+
indent_increment: int = 2,
|
|
452
|
+
max_help_position: int = 24,
|
|
453
|
+
width: int | None = None,
|
|
454
|
+
*,
|
|
455
|
+
color: bool = True,
|
|
456
|
+
) -> None:
|
|
457
|
+
super().__init__(
|
|
458
|
+
prog,
|
|
459
|
+
indent_increment=indent_increment,
|
|
460
|
+
max_help_position=max_help_position,
|
|
461
|
+
width=width,
|
|
462
|
+
color=color,
|
|
463
|
+
)
|
|
464
|
+
self._theme = theme
|
|
465
|
+
|
|
466
|
+
return ThemedAgentGrepHelpFormatter
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
class AgentGrepHelpFormatter(argparse.RawDescriptionHelpFormatter):
|
|
470
|
+
"""Extend help output with syntax-colored example sections."""
|
|
471
|
+
|
|
472
|
+
_theme: object | None = None
|
|
473
|
+
|
|
474
|
+
@t.override
|
|
475
|
+
def _fill_text(self, text: str, width: int, indent: str) -> str:
|
|
476
|
+
"""Colorize ``examples:`` blocks when a theme is available."""
|
|
477
|
+
theme = t.cast("HelpTheme | None", getattr(self, "_theme", None))
|
|
478
|
+
if not text or theme is None:
|
|
479
|
+
return super()._fill_text(text, width, indent)
|
|
480
|
+
|
|
481
|
+
lines = text.splitlines(keepends=True)
|
|
482
|
+
formatted_lines: list[str] = []
|
|
483
|
+
in_examples_block = False
|
|
484
|
+
expect_value = False
|
|
485
|
+
|
|
486
|
+
for line in lines:
|
|
487
|
+
if line.strip() == "":
|
|
488
|
+
in_examples_block = False
|
|
489
|
+
expect_value = False
|
|
490
|
+
formatted_lines.append(f"{indent}{line}")
|
|
491
|
+
continue
|
|
492
|
+
|
|
493
|
+
has_newline = line.endswith("\n")
|
|
494
|
+
stripped_line = line.rstrip("\n")
|
|
495
|
+
leading_length = len(stripped_line) - len(stripped_line.lstrip(" "))
|
|
496
|
+
leading = stripped_line[:leading_length]
|
|
497
|
+
content = stripped_line[leading_length:]
|
|
498
|
+
content_lower = content.lower()
|
|
499
|
+
is_section_heading = (
|
|
500
|
+
content_lower.endswith("examples:") and content_lower != "examples:"
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
if is_section_heading or content_lower == "examples:":
|
|
504
|
+
formatted_content = f"{theme.heading}{content}{theme.reset}"
|
|
505
|
+
in_examples_block = True
|
|
506
|
+
expect_value = False
|
|
507
|
+
elif in_examples_block:
|
|
508
|
+
colored = self._colorize_example_line(
|
|
509
|
+
content,
|
|
510
|
+
theme=theme,
|
|
511
|
+
expect_value=expect_value,
|
|
512
|
+
)
|
|
513
|
+
expect_value = colored.expect_value
|
|
514
|
+
formatted_content = colored.text
|
|
515
|
+
else:
|
|
516
|
+
formatted_content = stripped_line
|
|
517
|
+
|
|
518
|
+
newline = "\n" if has_newline else ""
|
|
519
|
+
formatted_lines.append(f"{indent}{leading}{formatted_content}{newline}")
|
|
520
|
+
|
|
521
|
+
return "".join(formatted_lines)
|
|
522
|
+
|
|
523
|
+
class _ColorizedLine(t.NamedTuple):
|
|
524
|
+
"""Result of colorizing one example line."""
|
|
525
|
+
|
|
526
|
+
text: str
|
|
527
|
+
expect_value: bool
|
|
528
|
+
|
|
529
|
+
def _colorize_example_line(
|
|
530
|
+
self,
|
|
531
|
+
content: str,
|
|
532
|
+
*,
|
|
533
|
+
theme: HelpTheme,
|
|
534
|
+
expect_value: bool,
|
|
535
|
+
) -> _ColorizedLine:
|
|
536
|
+
"""Colorize program, subcommand, options, and option values."""
|
|
537
|
+
parts: list[str] = []
|
|
538
|
+
expecting_value = expect_value
|
|
539
|
+
first_token = True
|
|
540
|
+
colored_subcommand = False
|
|
541
|
+
|
|
542
|
+
for match in re.finditer(r"\s+|\S+", content):
|
|
543
|
+
token = match.group()
|
|
544
|
+
if token.isspace():
|
|
545
|
+
parts.append(token)
|
|
546
|
+
continue
|
|
547
|
+
|
|
548
|
+
if expecting_value:
|
|
549
|
+
color = theme.label
|
|
550
|
+
expecting_value = False
|
|
551
|
+
elif token.startswith("--"):
|
|
552
|
+
color = theme.long_option
|
|
553
|
+
expecting_value = (
|
|
554
|
+
token not in OPTIONS_FLAG_ONLY and token in OPTIONS_EXPECTING_VALUE
|
|
555
|
+
)
|
|
556
|
+
elif token.startswith("-"):
|
|
557
|
+
color = theme.short_option
|
|
558
|
+
expecting_value = (
|
|
559
|
+
token not in OPTIONS_FLAG_ONLY and token in OPTIONS_EXPECTING_VALUE
|
|
560
|
+
)
|
|
561
|
+
elif first_token:
|
|
562
|
+
color = theme.prog
|
|
563
|
+
elif not colored_subcommand:
|
|
564
|
+
color = theme.action
|
|
565
|
+
colored_subcommand = True
|
|
566
|
+
else:
|
|
567
|
+
color = None
|
|
568
|
+
|
|
569
|
+
first_token = False
|
|
570
|
+
if color is None:
|
|
571
|
+
parts.append(token)
|
|
572
|
+
else:
|
|
573
|
+
parts.append(f"{color}{token}{theme.reset}")
|
|
574
|
+
|
|
575
|
+
return self._ColorizedLine("".join(parts), expecting_value)
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
class TextualContainersModule(t.Protocol):
|
|
579
|
+
"""Minimal Textual containers module surface."""
|
|
580
|
+
|
|
581
|
+
Horizontal: cabc.Callable[..., t.ContextManager[object]]
|
|
582
|
+
Vertical: cabc.Callable[..., t.ContextManager[object]]
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
class TextualAppModule(t.Protocol):
|
|
586
|
+
"""Minimal Textual app module surface."""
|
|
587
|
+
|
|
588
|
+
App: type[object]
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
class DataTableLike(t.Protocol):
|
|
592
|
+
"""Minimal DataTable surface used by the TUI."""
|
|
593
|
+
|
|
594
|
+
cursor_type: str
|
|
595
|
+
|
|
596
|
+
def add_columns(self, *labels: str) -> None:
|
|
597
|
+
"""Add columns."""
|
|
598
|
+
...
|
|
599
|
+
|
|
600
|
+
def clear(self) -> None:
|
|
601
|
+
"""Clear rows."""
|
|
602
|
+
...
|
|
603
|
+
|
|
604
|
+
def add_row(self, *values: str, key: str | None = None) -> None:
|
|
605
|
+
"""Add one row."""
|
|
606
|
+
...
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
class StaticLike(t.Protocol):
|
|
610
|
+
"""Minimal Static widget surface used by the TUI."""
|
|
611
|
+
|
|
612
|
+
def update(self, content: str) -> None:
|
|
613
|
+
"""Update widget contents."""
|
|
614
|
+
...
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
class QueryAppLike(t.Protocol):
|
|
618
|
+
"""Minimal Textual app query surface used by the TUI."""
|
|
619
|
+
|
|
620
|
+
def query_one(self, selector: object, expect_type: object | None = None) -> object:
|
|
621
|
+
"""Look up one widget."""
|
|
622
|
+
...
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
class RunnableAppLike(t.Protocol):
|
|
626
|
+
"""Minimal runnable app surface."""
|
|
627
|
+
|
|
628
|
+
def run(self) -> None:
|
|
629
|
+
"""Run the application."""
|
|
630
|
+
...
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
class TextualWidgetsModule(t.Protocol):
|
|
634
|
+
"""Minimal Textual widgets module surface."""
|
|
635
|
+
|
|
636
|
+
DataTable: cabc.Callable[..., object]
|
|
637
|
+
Footer: cabc.Callable[[], object]
|
|
638
|
+
Header: cabc.Callable[[], object]
|
|
639
|
+
Input: cabc.Callable[..., object]
|
|
640
|
+
Static: cabc.Callable[..., object]
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
@dataclasses.dataclass(slots=True)
|
|
644
|
+
class BackendSelection:
|
|
645
|
+
"""Selected optional subprocess backends."""
|
|
646
|
+
|
|
647
|
+
find_tool: str | None
|
|
648
|
+
grep_tool: str | None
|
|
649
|
+
json_tool: str | None
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
@dataclasses.dataclass(slots=True)
|
|
653
|
+
class SearchArgs:
|
|
654
|
+
"""Typed arguments for ``agentgrep search``."""
|
|
655
|
+
|
|
656
|
+
terms: tuple[str, ...]
|
|
657
|
+
agents: tuple[AgentName, ...]
|
|
658
|
+
search_type: SearchType
|
|
659
|
+
any_term: bool
|
|
660
|
+
regex: bool
|
|
661
|
+
case_sensitive: bool
|
|
662
|
+
limit: int | None
|
|
663
|
+
output_mode: OutputMode
|
|
664
|
+
color_mode: ColorMode
|
|
665
|
+
progress_mode: ProgressMode
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
@dataclasses.dataclass(slots=True)
|
|
669
|
+
class FindArgs:
|
|
670
|
+
"""Typed arguments for ``agentgrep find``."""
|
|
671
|
+
|
|
672
|
+
pattern: str | None
|
|
673
|
+
agents: tuple[AgentName, ...]
|
|
674
|
+
limit: int | None
|
|
675
|
+
output_mode: OutputMode
|
|
676
|
+
color_mode: ColorMode
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
@dataclasses.dataclass(slots=True)
|
|
680
|
+
class SearchQuery:
|
|
681
|
+
"""Compiled search configuration."""
|
|
682
|
+
|
|
683
|
+
terms: tuple[str, ...]
|
|
684
|
+
search_type: SearchType
|
|
685
|
+
any_term: bool
|
|
686
|
+
regex: bool
|
|
687
|
+
case_sensitive: bool
|
|
688
|
+
agents: tuple[AgentName, ...]
|
|
689
|
+
limit: int | None
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
@dataclasses.dataclass(slots=True)
|
|
693
|
+
class SourceHandle:
|
|
694
|
+
"""A discovered, parseable source file or SQLite database."""
|
|
695
|
+
|
|
696
|
+
agent: AgentName
|
|
697
|
+
store: str
|
|
698
|
+
adapter_id: str
|
|
699
|
+
path: pathlib.Path
|
|
700
|
+
path_kind: PathKind
|
|
701
|
+
source_kind: SourceKind
|
|
702
|
+
search_root: pathlib.Path | None
|
|
703
|
+
mtime_ns: int
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
@dataclasses.dataclass(slots=True)
|
|
707
|
+
class SearchRecord:
|
|
708
|
+
"""Normalized prompt/history record."""
|
|
709
|
+
|
|
710
|
+
kind: t.Literal["prompt", "history"]
|
|
711
|
+
agent: AgentName
|
|
712
|
+
store: str
|
|
713
|
+
adapter_id: str
|
|
714
|
+
path: pathlib.Path
|
|
715
|
+
text: str
|
|
716
|
+
title: str | None = None
|
|
717
|
+
role: str | None = None
|
|
718
|
+
timestamp: str | None = None
|
|
719
|
+
model: str | None = None
|
|
720
|
+
session_id: str | None = None
|
|
721
|
+
conversation_id: str | None = None
|
|
722
|
+
metadata: dict[str, object] = dataclasses.field(default_factory=dict)
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
@dataclasses.dataclass(slots=True)
|
|
726
|
+
class FindRecord:
|
|
727
|
+
"""Normalized discovery record for ``agentgrep find``."""
|
|
728
|
+
|
|
729
|
+
kind: t.Literal["find"]
|
|
730
|
+
agent: AgentName
|
|
731
|
+
store: str
|
|
732
|
+
adapter_id: str
|
|
733
|
+
path: pathlib.Path
|
|
734
|
+
path_kind: PathKind
|
|
735
|
+
metadata: dict[str, object] = dataclasses.field(default_factory=dict)
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
@dataclasses.dataclass(slots=True)
|
|
739
|
+
class MessageCandidate:
|
|
740
|
+
"""Intermediate parsed message representation."""
|
|
741
|
+
|
|
742
|
+
role: str | None
|
|
743
|
+
text: str
|
|
744
|
+
title: str | None = None
|
|
745
|
+
timestamp: str | None = None
|
|
746
|
+
model: str | None = None
|
|
747
|
+
session_id: str | None = None
|
|
748
|
+
conversation_id: str | None = None
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
class SearchControl:
|
|
752
|
+
"""Thread-safe cooperative controls for an active search."""
|
|
753
|
+
|
|
754
|
+
def __init__(self) -> None:
|
|
755
|
+
self._answer_now = threading.Event()
|
|
756
|
+
|
|
757
|
+
def request_answer_now(self) -> None:
|
|
758
|
+
"""Request that search return the results collected so far."""
|
|
759
|
+
self._answer_now.set()
|
|
760
|
+
|
|
761
|
+
def answer_now_requested(self) -> bool:
|
|
762
|
+
"""Return whether search should stop and answer with partial results."""
|
|
763
|
+
return self._answer_now.is_set()
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
class AnswerNowInputListener:
|
|
767
|
+
"""Listen for a blank Enter keypress and request a partial answer."""
|
|
768
|
+
|
|
769
|
+
def __init__(
|
|
770
|
+
self,
|
|
771
|
+
control: SearchControl,
|
|
772
|
+
*,
|
|
773
|
+
stream: t.TextIO | None = None,
|
|
774
|
+
poll_interval: float = 0.1,
|
|
775
|
+
) -> None:
|
|
776
|
+
self._control = control
|
|
777
|
+
self._stream = stream if stream is not None else sys.stdin
|
|
778
|
+
self._poll_interval = poll_interval
|
|
779
|
+
self._stop_event = threading.Event()
|
|
780
|
+
self._thread: threading.Thread | None = None
|
|
781
|
+
|
|
782
|
+
def start(self) -> None:
|
|
783
|
+
"""Start listening for a blank line on stdin."""
|
|
784
|
+
if self._thread is not None and self._thread.is_alive():
|
|
785
|
+
return
|
|
786
|
+
self._stop_event.clear()
|
|
787
|
+
self._thread = threading.Thread(
|
|
788
|
+
target=self._run,
|
|
789
|
+
daemon=True,
|
|
790
|
+
name="agentgrep-answer-now-input",
|
|
791
|
+
)
|
|
792
|
+
self._thread.start()
|
|
793
|
+
|
|
794
|
+
def stop(self) -> None:
|
|
795
|
+
"""Stop listening when possible."""
|
|
796
|
+
self._stop_event.set()
|
|
797
|
+
thread = self._thread
|
|
798
|
+
self._thread = None
|
|
799
|
+
if thread is not None:
|
|
800
|
+
thread.join(timeout=0.2)
|
|
801
|
+
|
|
802
|
+
def _run(self) -> None:
|
|
803
|
+
selectable = self._stream_is_selectable()
|
|
804
|
+
while not self._stop_event.is_set() and not self._control.answer_now_requested():
|
|
805
|
+
line = self._read_line(selectable)
|
|
806
|
+
if line is None:
|
|
807
|
+
continue
|
|
808
|
+
if line == "":
|
|
809
|
+
return
|
|
810
|
+
if line.strip() == "":
|
|
811
|
+
self._control.request_answer_now()
|
|
812
|
+
return
|
|
813
|
+
if not selectable:
|
|
814
|
+
return
|
|
815
|
+
|
|
816
|
+
def _read_line(self, selectable: bool) -> str | None:
|
|
817
|
+
if selectable:
|
|
818
|
+
try:
|
|
819
|
+
readable, _, _ = select.select([self._stream], [], [], self._poll_interval)
|
|
820
|
+
except OSError, TypeError, ValueError:
|
|
821
|
+
return None
|
|
822
|
+
if not readable:
|
|
823
|
+
return None
|
|
824
|
+
try:
|
|
825
|
+
return self._stream.readline()
|
|
826
|
+
except OSError, ValueError:
|
|
827
|
+
return ""
|
|
828
|
+
|
|
829
|
+
def _stream_is_selectable(self) -> bool:
|
|
830
|
+
try:
|
|
831
|
+
_ = self._stream.fileno()
|
|
832
|
+
readable, _, _ = select.select([self._stream], [], [], 0)
|
|
833
|
+
except AttributeError, OSError, TypeError, ValueError:
|
|
834
|
+
return False
|
|
835
|
+
return isinstance(readable, list)
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
class SearchProgress(t.Protocol):
|
|
839
|
+
"""Progress reporter used by search internals."""
|
|
840
|
+
|
|
841
|
+
def start(self, query: SearchQuery) -> None:
|
|
842
|
+
"""Mark search start."""
|
|
843
|
+
...
|
|
844
|
+
|
|
845
|
+
def sources_discovered(self, count: int) -> None:
|
|
846
|
+
"""Report discovered source count."""
|
|
847
|
+
...
|
|
848
|
+
|
|
849
|
+
def prefilter_started(self, root: pathlib.Path) -> None:
|
|
850
|
+
"""Report root prefilter start."""
|
|
851
|
+
...
|
|
852
|
+
|
|
853
|
+
def sources_planned(self, planned: int, total: int) -> None:
|
|
854
|
+
"""Report selected source count."""
|
|
855
|
+
...
|
|
856
|
+
|
|
857
|
+
def source_started(self, index: int, total: int, source: SourceHandle) -> None:
|
|
858
|
+
"""Report source scan start."""
|
|
859
|
+
...
|
|
860
|
+
|
|
861
|
+
def source_finished(
|
|
862
|
+
self,
|
|
863
|
+
index: int,
|
|
864
|
+
total: int,
|
|
865
|
+
source: SourceHandle,
|
|
866
|
+
records: int,
|
|
867
|
+
matches: int,
|
|
868
|
+
) -> None:
|
|
869
|
+
"""Report source scan completion."""
|
|
870
|
+
...
|
|
871
|
+
|
|
872
|
+
def result_added(self, count: int) -> None:
|
|
873
|
+
"""Report deduped result count."""
|
|
874
|
+
...
|
|
875
|
+
|
|
876
|
+
def finish(self, result_count: int) -> None:
|
|
877
|
+
"""Report search completion."""
|
|
878
|
+
...
|
|
879
|
+
|
|
880
|
+
def answer_now(self, result_count: int) -> None:
|
|
881
|
+
"""Report early search completion with partial results."""
|
|
882
|
+
...
|
|
883
|
+
|
|
884
|
+
def interrupt(self) -> None:
|
|
885
|
+
"""Report interrupted search."""
|
|
886
|
+
...
|
|
887
|
+
|
|
888
|
+
def close(self) -> None:
|
|
889
|
+
"""Release any progress resources."""
|
|
890
|
+
...
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
class NoopSearchProgress:
|
|
894
|
+
"""Silent search progress reporter."""
|
|
895
|
+
|
|
896
|
+
def start(self, query: SearchQuery) -> None:
|
|
897
|
+
"""Ignore search start."""
|
|
898
|
+
|
|
899
|
+
def sources_discovered(self, count: int) -> None:
|
|
900
|
+
"""Ignore discovered source count."""
|
|
901
|
+
|
|
902
|
+
def prefilter_started(self, root: pathlib.Path) -> None:
|
|
903
|
+
"""Ignore root prefilter start."""
|
|
904
|
+
|
|
905
|
+
def sources_planned(self, planned: int, total: int) -> None:
|
|
906
|
+
"""Ignore selected source count."""
|
|
907
|
+
|
|
908
|
+
def source_started(self, index: int, total: int, source: SourceHandle) -> None:
|
|
909
|
+
"""Ignore source scan start."""
|
|
910
|
+
|
|
911
|
+
def source_finished(
|
|
912
|
+
self,
|
|
913
|
+
index: int,
|
|
914
|
+
total: int,
|
|
915
|
+
source: SourceHandle,
|
|
916
|
+
records: int,
|
|
917
|
+
matches: int,
|
|
918
|
+
) -> None:
|
|
919
|
+
"""Ignore source scan completion."""
|
|
920
|
+
|
|
921
|
+
def result_added(self, count: int) -> None:
|
|
922
|
+
"""Ignore deduped result count."""
|
|
923
|
+
|
|
924
|
+
def finish(self, result_count: int) -> None:
|
|
925
|
+
"""Ignore search completion."""
|
|
926
|
+
|
|
927
|
+
def answer_now(self, result_count: int) -> None:
|
|
928
|
+
"""Ignore early search completion."""
|
|
929
|
+
|
|
930
|
+
def interrupt(self) -> None:
|
|
931
|
+
"""Ignore interrupted search."""
|
|
932
|
+
|
|
933
|
+
def close(self) -> None:
|
|
934
|
+
"""Nothing to release."""
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
class ConsoleSearchProgress:
|
|
938
|
+
"""Human progress reporter for potentially long searches."""
|
|
939
|
+
|
|
940
|
+
_SPINNER_FRAMES: t.ClassVar[str] = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
|
|
941
|
+
|
|
942
|
+
def __init__(
|
|
943
|
+
self,
|
|
944
|
+
*,
|
|
945
|
+
enabled: bool,
|
|
946
|
+
stream: t.TextIO | None = None,
|
|
947
|
+
tty: bool | None = None,
|
|
948
|
+
color_mode: ColorMode = "auto",
|
|
949
|
+
refresh_interval: float = 0.1,
|
|
950
|
+
heartbeat_interval: float = 10.0,
|
|
951
|
+
answer_now_hint: bool = False,
|
|
952
|
+
) -> None:
|
|
953
|
+
self._enabled = enabled
|
|
954
|
+
self._stream = stream if stream is not None else sys.stderr
|
|
955
|
+
self._tty = (
|
|
956
|
+
tty
|
|
957
|
+
if tty is not None
|
|
958
|
+
else bool(
|
|
959
|
+
getattr(self._stream, "isatty", lambda: False)(),
|
|
960
|
+
)
|
|
961
|
+
)
|
|
962
|
+
self._colors = AnsiColors.for_stream(color_mode, self._stream)
|
|
963
|
+
self._refresh_interval = refresh_interval
|
|
964
|
+
self._heartbeat_interval = heartbeat_interval
|
|
965
|
+
self._answer_now_hint = answer_now_hint
|
|
966
|
+
self._lock = threading.Lock()
|
|
967
|
+
self._stop_event = threading.Event()
|
|
968
|
+
self._thread: threading.Thread | None = None
|
|
969
|
+
self._started_at: float | None = None
|
|
970
|
+
self._last_heartbeat_at: float | None = None
|
|
971
|
+
self._last_line_len = 0
|
|
972
|
+
self._query_label = "search"
|
|
973
|
+
self._phase = "starting"
|
|
974
|
+
self._detail: str | None = None
|
|
975
|
+
self._current: int | None = None
|
|
976
|
+
self._total: int | None = None
|
|
977
|
+
self._matches = 0
|
|
978
|
+
self._finished = False
|
|
979
|
+
|
|
980
|
+
def start(self, query: SearchQuery) -> None:
|
|
981
|
+
"""Begin progress reporting for ``query``."""
|
|
982
|
+
if not self._enabled:
|
|
983
|
+
return
|
|
984
|
+
label = " ".join(query.terms) if query.terms else "all records"
|
|
985
|
+
now = time.monotonic()
|
|
986
|
+
with self._lock:
|
|
987
|
+
self._query_label = label
|
|
988
|
+
self._phase = "discovering"
|
|
989
|
+
self._detail = None
|
|
990
|
+
self._current = None
|
|
991
|
+
self._total = None
|
|
992
|
+
self._matches = 0
|
|
993
|
+
self._started_at = now
|
|
994
|
+
self._last_heartbeat_at = now
|
|
995
|
+
self._finished = False
|
|
996
|
+
if self._tty:
|
|
997
|
+
self._ensure_tty_thread()
|
|
998
|
+
else:
|
|
999
|
+
self._emit_line(self._start_line(label))
|
|
1000
|
+
|
|
1001
|
+
def sources_discovered(self, count: int) -> None:
|
|
1002
|
+
"""Report discovered source count."""
|
|
1003
|
+
self.set_status("discovered", total=count, detail=f"{count} sources")
|
|
1004
|
+
|
|
1005
|
+
def prefilter_started(self, root: pathlib.Path) -> None:
|
|
1006
|
+
"""Report root prefilter start."""
|
|
1007
|
+
self.set_status("prefiltering", detail=format_display_path(root, directory=True))
|
|
1008
|
+
|
|
1009
|
+
def sources_planned(self, planned: int, total: int) -> None:
|
|
1010
|
+
"""Report selected source count."""
|
|
1011
|
+
self.set_status("planning", current=planned, total=total, detail="candidate sources")
|
|
1012
|
+
|
|
1013
|
+
def source_started(self, index: int, total: int, source: SourceHandle) -> None:
|
|
1014
|
+
"""Report source scan start."""
|
|
1015
|
+
self.set_status("scanning", current=index, total=total, detail=source.path.name)
|
|
1016
|
+
|
|
1017
|
+
def source_finished(
|
|
1018
|
+
self,
|
|
1019
|
+
index: int,
|
|
1020
|
+
total: int,
|
|
1021
|
+
source: SourceHandle,
|
|
1022
|
+
records: int,
|
|
1023
|
+
matches: int,
|
|
1024
|
+
) -> None:
|
|
1025
|
+
"""Report source scan completion."""
|
|
1026
|
+
self.set_status(
|
|
1027
|
+
"scanning",
|
|
1028
|
+
current=index,
|
|
1029
|
+
total=total,
|
|
1030
|
+
detail=f"{records} records, {format_match_count(matches)} in {source.path.name}",
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
def result_added(self, count: int) -> None:
|
|
1034
|
+
"""Report deduped result count."""
|
|
1035
|
+
if not self._enabled:
|
|
1036
|
+
return
|
|
1037
|
+
with self._lock:
|
|
1038
|
+
self._matches = count
|
|
1039
|
+
self._emit_heartbeat_if_due()
|
|
1040
|
+
|
|
1041
|
+
def set_status(
|
|
1042
|
+
self,
|
|
1043
|
+
phase: str,
|
|
1044
|
+
*,
|
|
1045
|
+
current: int | None = None,
|
|
1046
|
+
total: int | None = None,
|
|
1047
|
+
detail: str | None = None,
|
|
1048
|
+
) -> None:
|
|
1049
|
+
"""Update the current progress status."""
|
|
1050
|
+
if not self._enabled:
|
|
1051
|
+
return
|
|
1052
|
+
with self._lock:
|
|
1053
|
+
self._phase = phase
|
|
1054
|
+
self._current = current
|
|
1055
|
+
self._total = total
|
|
1056
|
+
self._detail = detail
|
|
1057
|
+
self._emit_heartbeat_if_due()
|
|
1058
|
+
|
|
1059
|
+
def finish(self, result_count: int) -> None:
|
|
1060
|
+
"""Finish progress reporting."""
|
|
1061
|
+
if not self._enabled:
|
|
1062
|
+
return
|
|
1063
|
+
with self._lock:
|
|
1064
|
+
self._matches = result_count
|
|
1065
|
+
self._phase = "complete"
|
|
1066
|
+
self._finished = True
|
|
1067
|
+
if self._tty:
|
|
1068
|
+
self._stop_tty_thread()
|
|
1069
|
+
self._clear_tty_line()
|
|
1070
|
+
return
|
|
1071
|
+
elapsed = self._elapsed_seconds()
|
|
1072
|
+
self._emit_line(
|
|
1073
|
+
self._finish_line(result_count, elapsed),
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
def answer_now(self, result_count: int) -> None:
|
|
1077
|
+
"""Finish progress reporting with a partial-answer status."""
|
|
1078
|
+
if not self._enabled:
|
|
1079
|
+
return
|
|
1080
|
+
with self._lock:
|
|
1081
|
+
self._matches = result_count
|
|
1082
|
+
self._phase = "answering now"
|
|
1083
|
+
self._finished = True
|
|
1084
|
+
line = self._answer_now_line(result_count)
|
|
1085
|
+
if self._tty:
|
|
1086
|
+
self._stop_tty_thread()
|
|
1087
|
+
self._write_tty_line(line)
|
|
1088
|
+
return
|
|
1089
|
+
self._emit_line(line)
|
|
1090
|
+
|
|
1091
|
+
def close(self) -> None:
|
|
1092
|
+
"""Stop any active progress renderer."""
|
|
1093
|
+
if not self._enabled:
|
|
1094
|
+
return
|
|
1095
|
+
if self._tty:
|
|
1096
|
+
self._stop_tty_thread()
|
|
1097
|
+
self._clear_tty_line()
|
|
1098
|
+
|
|
1099
|
+
def interrupt(self) -> None:
|
|
1100
|
+
"""Stop progress rendering while preserving the current status."""
|
|
1101
|
+
if not self._enabled:
|
|
1102
|
+
return
|
|
1103
|
+
if self._tty:
|
|
1104
|
+
self._stop_tty_thread()
|
|
1105
|
+
self._write_tty_summary_line()
|
|
1106
|
+
return
|
|
1107
|
+
self._emit_line(self._summary())
|
|
1108
|
+
|
|
1109
|
+
def _ensure_tty_thread(self) -> None:
|
|
1110
|
+
if self._thread is not None and self._thread.is_alive():
|
|
1111
|
+
return
|
|
1112
|
+
self._stop_event.clear()
|
|
1113
|
+
self._thread = threading.Thread(
|
|
1114
|
+
target=self._tty_loop,
|
|
1115
|
+
daemon=True,
|
|
1116
|
+
name="agentgrep-search-progress",
|
|
1117
|
+
)
|
|
1118
|
+
self._thread.start()
|
|
1119
|
+
|
|
1120
|
+
def _stop_tty_thread(self) -> None:
|
|
1121
|
+
self._stop_event.set()
|
|
1122
|
+
thread = self._thread
|
|
1123
|
+
self._thread = None
|
|
1124
|
+
if thread is not None:
|
|
1125
|
+
thread.join(timeout=1.0)
|
|
1126
|
+
|
|
1127
|
+
def _tty_loop(self) -> None:
|
|
1128
|
+
frames = itertools.cycle(self._SPINNER_FRAMES)
|
|
1129
|
+
while not self._stop_event.is_set():
|
|
1130
|
+
self._render_tty(next(frames))
|
|
1131
|
+
self._stop_event.wait(self._refresh_interval)
|
|
1132
|
+
|
|
1133
|
+
def _render_tty(self, frame: str) -> None:
|
|
1134
|
+
summary = self._summary()
|
|
1135
|
+
line = f"{self._colors.info(frame)} {summary}"
|
|
1136
|
+
with self._lock:
|
|
1137
|
+
try:
|
|
1138
|
+
self._stream.write("\r\033[2K" + line)
|
|
1139
|
+
self._stream.flush()
|
|
1140
|
+
self._last_line_len = len(line)
|
|
1141
|
+
except OSError, ValueError:
|
|
1142
|
+
pass
|
|
1143
|
+
|
|
1144
|
+
def _clear_tty_line(self) -> None:
|
|
1145
|
+
with self._lock:
|
|
1146
|
+
if self._last_line_len == 0:
|
|
1147
|
+
return
|
|
1148
|
+
try:
|
|
1149
|
+
self._stream.write("\r\033[2K")
|
|
1150
|
+
self._stream.flush()
|
|
1151
|
+
except OSError, ValueError:
|
|
1152
|
+
pass
|
|
1153
|
+
self._last_line_len = 0
|
|
1154
|
+
|
|
1155
|
+
def _write_tty_summary_line(self) -> None:
|
|
1156
|
+
line = self._summary()
|
|
1157
|
+
self._write_tty_line(line)
|
|
1158
|
+
|
|
1159
|
+
def _write_tty_line(self, line: str) -> None:
|
|
1160
|
+
with self._lock:
|
|
1161
|
+
try:
|
|
1162
|
+
self._stream.write("\r\033[2K" + line + "\n")
|
|
1163
|
+
self._stream.flush()
|
|
1164
|
+
except OSError, ValueError:
|
|
1165
|
+
pass
|
|
1166
|
+
self._last_line_len = 0
|
|
1167
|
+
|
|
1168
|
+
def _emit_heartbeat_if_due(self) -> None:
|
|
1169
|
+
if not self._enabled or self._tty:
|
|
1170
|
+
return
|
|
1171
|
+
with self._lock:
|
|
1172
|
+
last = self._last_heartbeat_at
|
|
1173
|
+
label = self._query_label
|
|
1174
|
+
if last is None:
|
|
1175
|
+
return
|
|
1176
|
+
now = time.monotonic()
|
|
1177
|
+
if now - last < self._heartbeat_interval:
|
|
1178
|
+
return
|
|
1179
|
+
elapsed = self._elapsed_seconds()
|
|
1180
|
+
self._emit_line(
|
|
1181
|
+
self._heartbeat_line(label, elapsed),
|
|
1182
|
+
)
|
|
1183
|
+
with self._lock:
|
|
1184
|
+
self._last_heartbeat_at = now
|
|
1185
|
+
|
|
1186
|
+
def _emit_line(self, line: str) -> None:
|
|
1187
|
+
try:
|
|
1188
|
+
self._stream.write(line + "\n")
|
|
1189
|
+
self._stream.flush()
|
|
1190
|
+
except OSError, ValueError:
|
|
1191
|
+
pass
|
|
1192
|
+
|
|
1193
|
+
def _summary(self) -> str:
|
|
1194
|
+
elapsed = self._elapsed_seconds()
|
|
1195
|
+
parts = [
|
|
1196
|
+
self._start_line(self._query_label),
|
|
1197
|
+
self._status_text(),
|
|
1198
|
+
self._colors.warning(format_match_count(self._matches)),
|
|
1199
|
+
self._colors.muted(f"{elapsed:.1f}s"),
|
|
1200
|
+
]
|
|
1201
|
+
if self._answer_now_hint:
|
|
1202
|
+
parts.append(self._colors.white("[Press enter, answer now]"))
|
|
1203
|
+
return " | ".join(parts)
|
|
1204
|
+
|
|
1205
|
+
def _start_line(self, label: str) -> str:
|
|
1206
|
+
return f"{self._colors.heading('Searching')} {self._colors.highlight(label)}"
|
|
1207
|
+
|
|
1208
|
+
def _heartbeat_line(self, label: str, elapsed: float) -> str:
|
|
1209
|
+
prefix = f"{self._colors.muted('...')} {self._colors.heading('still searching')}"
|
|
1210
|
+
elapsed_text = self._colors.muted(f"{elapsed:.0f}s elapsed")
|
|
1211
|
+
return f"{prefix} {self._colors.highlight(label)}: {self._status_text()} ({elapsed_text})"
|
|
1212
|
+
|
|
1213
|
+
def _finish_line(self, result_count: int, elapsed: float) -> str:
|
|
1214
|
+
return (
|
|
1215
|
+
f"{self._colors.success('Search complete:')} "
|
|
1216
|
+
f"{self._colors.warning(format_match_count(result_count))} "
|
|
1217
|
+
f"({self._colors.muted(f'{elapsed:.1f}s elapsed')})"
|
|
1218
|
+
)
|
|
1219
|
+
|
|
1220
|
+
def _answer_now_line(self, result_count: int) -> str:
|
|
1221
|
+
return (
|
|
1222
|
+
f"{self._colors.success('Answering now:')} "
|
|
1223
|
+
f"{self._colors.warning(format_match_count(result_count))}"
|
|
1224
|
+
)
|
|
1225
|
+
|
|
1226
|
+
def _status_text(self) -> str:
|
|
1227
|
+
with self._lock:
|
|
1228
|
+
phase = self._phase
|
|
1229
|
+
current = self._current
|
|
1230
|
+
total = self._total
|
|
1231
|
+
detail = self._detail
|
|
1232
|
+
if current is not None and total is not None:
|
|
1233
|
+
count = self._colors.warning(f"{current}/{total}")
|
|
1234
|
+
return f"{self._colors.heading(phase)} {count} {self._colors.muted('sources')}"
|
|
1235
|
+
if detail:
|
|
1236
|
+
return f"{self._colors.heading(phase)} {self._colors.muted(detail)}"
|
|
1237
|
+
return self._colors.heading(phase)
|
|
1238
|
+
|
|
1239
|
+
def _elapsed_seconds(self) -> float:
|
|
1240
|
+
with self._lock:
|
|
1241
|
+
started = self._started_at
|
|
1242
|
+
if started is None:
|
|
1243
|
+
return 0.0
|
|
1244
|
+
return time.monotonic() - started
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
def format_match_count(count: int) -> str:
|
|
1248
|
+
"""Return a human-readable match count."""
|
|
1249
|
+
suffix = "match" if count == 1 else "matches"
|
|
1250
|
+
return f"{count} {suffix}"
|
|
1251
|
+
|
|
1252
|
+
|
|
1253
|
+
def noop_search_progress() -> SearchProgress:
|
|
1254
|
+
"""Return a silent search progress reporter."""
|
|
1255
|
+
return NoopSearchProgress()
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
def select_backends() -> BackendSelection:
|
|
1259
|
+
"""Return the best available subprocess helpers."""
|
|
1260
|
+
return BackendSelection(
|
|
1261
|
+
find_tool=which_first(("fd", "fdfind")),
|
|
1262
|
+
grep_tool=which_first(("rg", "ag")),
|
|
1263
|
+
json_tool=which_first(("jq", "jaq")),
|
|
1264
|
+
)
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
def which_first(names: tuple[str, ...]) -> str | None:
|
|
1268
|
+
"""Return the first executable available on ``PATH``."""
|
|
1269
|
+
for name in names:
|
|
1270
|
+
found = shutil.which(name)
|
|
1271
|
+
if found is not None:
|
|
1272
|
+
return found
|
|
1273
|
+
return None
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
def run_readonly_command(
|
|
1277
|
+
command: list[str],
|
|
1278
|
+
*,
|
|
1279
|
+
control: SearchControl | None = None,
|
|
1280
|
+
) -> subprocess.CompletedProcess[str]:
|
|
1281
|
+
"""Run a command without a shell and capture text output."""
|
|
1282
|
+
if control is None:
|
|
1283
|
+
return subprocess.run(
|
|
1284
|
+
command,
|
|
1285
|
+
capture_output=True,
|
|
1286
|
+
text=True,
|
|
1287
|
+
check=False,
|
|
1288
|
+
)
|
|
1289
|
+
process = subprocess.Popen(
|
|
1290
|
+
command,
|
|
1291
|
+
stdout=subprocess.PIPE,
|
|
1292
|
+
stderr=subprocess.PIPE,
|
|
1293
|
+
text=True,
|
|
1294
|
+
)
|
|
1295
|
+
while True:
|
|
1296
|
+
try:
|
|
1297
|
+
stdout, stderr = process.communicate(timeout=0.05)
|
|
1298
|
+
except subprocess.TimeoutExpired:
|
|
1299
|
+
if control.answer_now_requested():
|
|
1300
|
+
process.terminate()
|
|
1301
|
+
try:
|
|
1302
|
+
stdout, stderr = process.communicate(timeout=0.2)
|
|
1303
|
+
except subprocess.TimeoutExpired:
|
|
1304
|
+
process.kill()
|
|
1305
|
+
stdout, stderr = process.communicate()
|
|
1306
|
+
return subprocess.CompletedProcess(
|
|
1307
|
+
command,
|
|
1308
|
+
process.returncode,
|
|
1309
|
+
stdout,
|
|
1310
|
+
stderr,
|
|
1311
|
+
)
|
|
1312
|
+
continue
|
|
1313
|
+
return subprocess.CompletedProcess(command, process.returncode, stdout, stderr)
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
@dataclasses.dataclass(slots=True)
|
|
1317
|
+
class ParserBundle:
|
|
1318
|
+
"""CLI parsers used for root and subcommand help."""
|
|
1319
|
+
|
|
1320
|
+
parser: argparse.ArgumentParser
|
|
1321
|
+
search_parser: argparse.ArgumentParser
|
|
1322
|
+
find_parser: argparse.ArgumentParser
|
|
1323
|
+
|
|
1324
|
+
|
|
1325
|
+
def normalize_color_mode(argv: cabc.Sequence[str] | None) -> ColorMode:
|
|
1326
|
+
"""Return the requested CLI color mode."""
|
|
1327
|
+
if argv is None:
|
|
1328
|
+
argv = sys.argv[1:]
|
|
1329
|
+
for index, argument in enumerate(argv):
|
|
1330
|
+
if argument == "--color" and index + 1 < len(argv):
|
|
1331
|
+
value = argv[index + 1]
|
|
1332
|
+
if value in {"auto", "always", "never"}:
|
|
1333
|
+
return t.cast("ColorMode", value)
|
|
1334
|
+
if argument.startswith("--color="):
|
|
1335
|
+
value = argument.partition("=")[2]
|
|
1336
|
+
if value in {"auto", "always", "never"}:
|
|
1337
|
+
return t.cast("ColorMode", value)
|
|
1338
|
+
return "auto"
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
SUBCOMMANDS: frozenset[str] = frozenset({"search", "find"})
|
|
1342
|
+
|
|
1343
|
+
|
|
1344
|
+
def inject_default_subcommand(
|
|
1345
|
+
argv: cabc.Sequence[str] | None,
|
|
1346
|
+
) -> cabc.Sequence[str] | None:
|
|
1347
|
+
"""Prepend ``search`` to ``argv`` when no subcommand is supplied.
|
|
1348
|
+
|
|
1349
|
+
Walks ``argv`` skipping the global ``--color`` option and any help flag.
|
|
1350
|
+
If the first remaining token is not a known subcommand, inserts
|
|
1351
|
+
``search`` at that position so ``agentgrep bliss`` parses identically
|
|
1352
|
+
to ``agentgrep search bliss``. Returns the input unchanged when no
|
|
1353
|
+
injection is needed.
|
|
1354
|
+
|
|
1355
|
+
Examples
|
|
1356
|
+
--------
|
|
1357
|
+
>>> inject_default_subcommand(["bliss"])
|
|
1358
|
+
['search', 'bliss']
|
|
1359
|
+
>>> inject_default_subcommand(["search", "bliss"])
|
|
1360
|
+
['search', 'bliss']
|
|
1361
|
+
>>> inject_default_subcommand(["find", "codex"])
|
|
1362
|
+
['find', 'codex']
|
|
1363
|
+
>>> inject_default_subcommand(["--color", "never", "bliss"])
|
|
1364
|
+
['--color', 'never', 'search', 'bliss']
|
|
1365
|
+
>>> inject_default_subcommand(["--help"])
|
|
1366
|
+
['--help']
|
|
1367
|
+
>>> inject_default_subcommand([])
|
|
1368
|
+
[]
|
|
1369
|
+
"""
|
|
1370
|
+
effective = list(sys.argv[1:]) if argv is None else list(argv)
|
|
1371
|
+
index = 0
|
|
1372
|
+
while index < len(effective):
|
|
1373
|
+
token = effective[index]
|
|
1374
|
+
if token in {"-h", "--help"}:
|
|
1375
|
+
return argv
|
|
1376
|
+
if token == "--color" and index + 1 < len(effective):
|
|
1377
|
+
index += 2
|
|
1378
|
+
continue
|
|
1379
|
+
if token.startswith("--color="):
|
|
1380
|
+
index += 1
|
|
1381
|
+
continue
|
|
1382
|
+
if token in SUBCOMMANDS:
|
|
1383
|
+
return argv
|
|
1384
|
+
effective.insert(index, "search")
|
|
1385
|
+
return effective
|
|
1386
|
+
return argv
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
|
+
@contextlib.contextmanager
|
|
1390
|
+
def configured_color_environment(color_mode: ColorMode) -> cabc.Iterator[None]:
|
|
1391
|
+
"""Temporarily configure env vars for argparse help color handling."""
|
|
1392
|
+
force_color = os.environ.get("FORCE_COLOR")
|
|
1393
|
+
try:
|
|
1394
|
+
if color_mode == "always" and not os.environ.get("NO_COLOR"):
|
|
1395
|
+
os.environ["FORCE_COLOR"] = "1"
|
|
1396
|
+
yield
|
|
1397
|
+
finally:
|
|
1398
|
+
if force_color is None:
|
|
1399
|
+
_ = os.environ.pop("FORCE_COLOR", None)
|
|
1400
|
+
else:
|
|
1401
|
+
os.environ["FORCE_COLOR"] = force_color
|
|
1402
|
+
|
|
1403
|
+
|
|
1404
|
+
def create_parser(
|
|
1405
|
+
color_mode: ColorMode,
|
|
1406
|
+
) -> ParserBundle:
|
|
1407
|
+
"""Create the root parser and subparsers."""
|
|
1408
|
+
formatter_class = create_themed_formatter(color_mode)
|
|
1409
|
+
parser = argparse.ArgumentParser(
|
|
1410
|
+
prog="agentgrep",
|
|
1411
|
+
description=CLI_DESCRIPTION,
|
|
1412
|
+
formatter_class=formatter_class,
|
|
1413
|
+
color=color_mode != "never",
|
|
1414
|
+
)
|
|
1415
|
+
_ = parser.add_argument(
|
|
1416
|
+
"--color",
|
|
1417
|
+
choices=["auto", "always", "never"],
|
|
1418
|
+
default="auto",
|
|
1419
|
+
help="when to use colors: auto (default), always, or never",
|
|
1420
|
+
)
|
|
1421
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
1422
|
+
|
|
1423
|
+
search_parser = subparsers.add_parser(
|
|
1424
|
+
"search",
|
|
1425
|
+
help="Search normalized prompts or history",
|
|
1426
|
+
description=SEARCH_DESCRIPTION,
|
|
1427
|
+
formatter_class=formatter_class,
|
|
1428
|
+
color=color_mode != "never",
|
|
1429
|
+
)
|
|
1430
|
+
add_common_agent_options(search_parser)
|
|
1431
|
+
_ = search_parser.add_argument("terms", nargs="*", help="Keywords or regex patterns")
|
|
1432
|
+
_ = search_parser.add_argument(
|
|
1433
|
+
"--type",
|
|
1434
|
+
choices=["prompts", "history", "all"],
|
|
1435
|
+
default="prompts",
|
|
1436
|
+
dest="search_type",
|
|
1437
|
+
help="Record type to search (default: prompts)",
|
|
1438
|
+
)
|
|
1439
|
+
_ = search_parser.add_argument(
|
|
1440
|
+
"--any",
|
|
1441
|
+
action="store_true",
|
|
1442
|
+
help="Match any term instead of requiring all terms",
|
|
1443
|
+
)
|
|
1444
|
+
_ = search_parser.add_argument(
|
|
1445
|
+
"--regex",
|
|
1446
|
+
action="store_true",
|
|
1447
|
+
help="Treat terms as regular expressions",
|
|
1448
|
+
)
|
|
1449
|
+
_ = search_parser.add_argument(
|
|
1450
|
+
"--case-sensitive",
|
|
1451
|
+
action="store_true",
|
|
1452
|
+
help="Perform case-sensitive matching",
|
|
1453
|
+
)
|
|
1454
|
+
_ = search_parser.add_argument(
|
|
1455
|
+
"--limit",
|
|
1456
|
+
type=int,
|
|
1457
|
+
metavar="N",
|
|
1458
|
+
help="Limit the number of results",
|
|
1459
|
+
)
|
|
1460
|
+
_ = search_parser.add_argument(
|
|
1461
|
+
"--progress",
|
|
1462
|
+
choices=["auto", "always", "never"],
|
|
1463
|
+
default="auto",
|
|
1464
|
+
help="Show search progress on stderr",
|
|
1465
|
+
)
|
|
1466
|
+
add_output_mode_options(search_parser, allow_ui=True)
|
|
1467
|
+
|
|
1468
|
+
find_parser = subparsers.add_parser(
|
|
1469
|
+
"find",
|
|
1470
|
+
help="Find known prompt/history stores and session files",
|
|
1471
|
+
description=FIND_DESCRIPTION,
|
|
1472
|
+
formatter_class=formatter_class,
|
|
1473
|
+
color=color_mode != "never",
|
|
1474
|
+
)
|
|
1475
|
+
add_common_agent_options(find_parser)
|
|
1476
|
+
_ = find_parser.add_argument(
|
|
1477
|
+
"pattern",
|
|
1478
|
+
nargs="?",
|
|
1479
|
+
help="Optional substring to match against discovered paths",
|
|
1480
|
+
)
|
|
1481
|
+
_ = find_parser.add_argument(
|
|
1482
|
+
"--limit",
|
|
1483
|
+
type=int,
|
|
1484
|
+
metavar="N",
|
|
1485
|
+
help="Limit the number of results",
|
|
1486
|
+
)
|
|
1487
|
+
add_output_mode_options(find_parser, allow_ui=False)
|
|
1488
|
+
return ParserBundle(parser=parser, search_parser=search_parser, find_parser=find_parser)
|
|
1489
|
+
|
|
1490
|
+
|
|
1491
|
+
def parse_args(
|
|
1492
|
+
argv: cabc.Sequence[str] | None = None,
|
|
1493
|
+
) -> SearchArgs | FindArgs | None:
|
|
1494
|
+
"""Parse CLI arguments into typed dataclasses."""
|
|
1495
|
+
color_mode = normalize_color_mode(argv)
|
|
1496
|
+
argv = inject_default_subcommand(argv)
|
|
1497
|
+
with configured_color_environment(color_mode):
|
|
1498
|
+
bundle = create_parser(color_mode)
|
|
1499
|
+
namespace = bundle.parser.parse_args(argv)
|
|
1500
|
+
if t.cast("str | None", getattr(namespace, "command", None)) is None:
|
|
1501
|
+
with configured_color_environment(color_mode):
|
|
1502
|
+
bundle.parser.print_help()
|
|
1503
|
+
return None
|
|
1504
|
+
agents = parse_agents(t.cast("list[str]", namespace.agent))
|
|
1505
|
+
output_mode = parse_output_mode(namespace)
|
|
1506
|
+
limit = t.cast("int | None", namespace.limit)
|
|
1507
|
+
if limit is not None and limit < 1:
|
|
1508
|
+
with configured_color_environment(color_mode):
|
|
1509
|
+
bundle.parser.error("--limit must be greater than 0")
|
|
1510
|
+
|
|
1511
|
+
command = t.cast("str", namespace.command)
|
|
1512
|
+
if command == "search":
|
|
1513
|
+
terms = tuple(t.cast("list[str]", namespace.terms))
|
|
1514
|
+
if not terms:
|
|
1515
|
+
with configured_color_environment(color_mode):
|
|
1516
|
+
bundle.search_parser.print_help()
|
|
1517
|
+
return None
|
|
1518
|
+
return SearchArgs(
|
|
1519
|
+
terms=terms,
|
|
1520
|
+
agents=agents,
|
|
1521
|
+
search_type=t.cast("SearchType", namespace.search_type),
|
|
1522
|
+
any_term=t.cast("bool", namespace.any),
|
|
1523
|
+
regex=t.cast("bool", namespace.regex),
|
|
1524
|
+
case_sensitive=t.cast("bool", namespace.case_sensitive),
|
|
1525
|
+
limit=limit,
|
|
1526
|
+
output_mode=output_mode,
|
|
1527
|
+
color_mode=color_mode,
|
|
1528
|
+
progress_mode=t.cast("ProgressMode", namespace.progress),
|
|
1529
|
+
)
|
|
1530
|
+
pattern = t.cast("str | None", namespace.pattern)
|
|
1531
|
+
if not pattern:
|
|
1532
|
+
with configured_color_environment(color_mode):
|
|
1533
|
+
bundle.find_parser.print_help()
|
|
1534
|
+
return None
|
|
1535
|
+
return FindArgs(
|
|
1536
|
+
pattern=pattern,
|
|
1537
|
+
agents=agents,
|
|
1538
|
+
limit=limit,
|
|
1539
|
+
output_mode=output_mode,
|
|
1540
|
+
color_mode=color_mode,
|
|
1541
|
+
)
|
|
1542
|
+
|
|
1543
|
+
|
|
1544
|
+
def add_common_agent_options(parser: argparse.ArgumentParser) -> None:
|
|
1545
|
+
"""Attach shared agent selection flags."""
|
|
1546
|
+
_ = parser.add_argument(
|
|
1547
|
+
"--agent",
|
|
1548
|
+
action="append",
|
|
1549
|
+
choices=[*AGENT_CHOICES, "all"],
|
|
1550
|
+
default=[],
|
|
1551
|
+
help="Limit results to a specific agent; repeatable",
|
|
1552
|
+
)
|
|
1553
|
+
|
|
1554
|
+
|
|
1555
|
+
def add_output_mode_options(
|
|
1556
|
+
parser: argparse.ArgumentParser,
|
|
1557
|
+
*,
|
|
1558
|
+
allow_ui: bool,
|
|
1559
|
+
) -> None:
|
|
1560
|
+
"""Attach mutually exclusive output mode flags."""
|
|
1561
|
+
group = parser.add_mutually_exclusive_group()
|
|
1562
|
+
_ = group.add_argument("--json", action="store_true", help="Emit one JSON document")
|
|
1563
|
+
_ = group.add_argument("--ndjson", action="store_true", help="Emit one JSON object per line")
|
|
1564
|
+
if allow_ui:
|
|
1565
|
+
_ = group.add_argument("--ui", action="store_true", help="Launch a read-only UI")
|
|
1566
|
+
|
|
1567
|
+
|
|
1568
|
+
def parse_agents(values: list[str]) -> tuple[AgentName, ...]:
|
|
1569
|
+
"""Normalize ``--agent`` selections."""
|
|
1570
|
+
if not values or "all" in values:
|
|
1571
|
+
return AGENT_CHOICES
|
|
1572
|
+
ordered = tuple(t.cast("AgentName", value) for value in values if value != "all")
|
|
1573
|
+
return ordered or AGENT_CHOICES
|
|
1574
|
+
|
|
1575
|
+
|
|
1576
|
+
def parse_output_mode(namespace: argparse.Namespace) -> OutputMode:
|
|
1577
|
+
"""Return the selected output mode."""
|
|
1578
|
+
if getattr(namespace, "json", False):
|
|
1579
|
+
return "json"
|
|
1580
|
+
if getattr(namespace, "ndjson", False):
|
|
1581
|
+
return "ndjson"
|
|
1582
|
+
if getattr(namespace, "ui", False):
|
|
1583
|
+
return "ui"
|
|
1584
|
+
return "text"
|
|
1585
|
+
|
|
1586
|
+
|
|
1587
|
+
def make_search_query(args: SearchArgs) -> SearchQuery:
|
|
1588
|
+
"""Convert parsed search arguments into a query object."""
|
|
1589
|
+
return SearchQuery(
|
|
1590
|
+
terms=args.terms,
|
|
1591
|
+
search_type=args.search_type,
|
|
1592
|
+
any_term=args.any_term,
|
|
1593
|
+
regex=args.regex,
|
|
1594
|
+
case_sensitive=args.case_sensitive,
|
|
1595
|
+
agents=args.agents,
|
|
1596
|
+
limit=args.limit,
|
|
1597
|
+
)
|
|
1598
|
+
|
|
1599
|
+
|
|
1600
|
+
def discover_sources(
|
|
1601
|
+
home: pathlib.Path,
|
|
1602
|
+
agents: tuple[AgentName, ...],
|
|
1603
|
+
backends: BackendSelection,
|
|
1604
|
+
) -> list[SourceHandle]:
|
|
1605
|
+
"""Discover all known parseable sources for the selected agents."""
|
|
1606
|
+
discovered: list[SourceHandle] = []
|
|
1607
|
+
for agent in agents:
|
|
1608
|
+
if agent == "codex":
|
|
1609
|
+
discovered.extend(discover_codex_sources(home, backends))
|
|
1610
|
+
elif agent == "claude":
|
|
1611
|
+
discovered.extend(discover_claude_sources(home, backends))
|
|
1612
|
+
elif agent == "cursor":
|
|
1613
|
+
discovered.extend(discover_cursor_sources(home, backends))
|
|
1614
|
+
discovered.sort(key=lambda item: (item.agent, item.store, str(item.path)))
|
|
1615
|
+
return discovered
|
|
1616
|
+
|
|
1617
|
+
|
|
1618
|
+
def file_mtime_ns(path: pathlib.Path) -> int:
|
|
1619
|
+
"""Return a cached modification time for a path."""
|
|
1620
|
+
try:
|
|
1621
|
+
return path.stat().st_mtime_ns
|
|
1622
|
+
except OSError:
|
|
1623
|
+
return 0
|
|
1624
|
+
|
|
1625
|
+
|
|
1626
|
+
def discover_codex_sources(
|
|
1627
|
+
home: pathlib.Path,
|
|
1628
|
+
backends: BackendSelection,
|
|
1629
|
+
) -> list[SourceHandle]:
|
|
1630
|
+
"""Discover Codex sessions and command history."""
|
|
1631
|
+
root = home / ".codex"
|
|
1632
|
+
sources: list[SourceHandle] = []
|
|
1633
|
+
if not root.exists():
|
|
1634
|
+
return sources
|
|
1635
|
+
|
|
1636
|
+
for name in ("history.json", "history.jsonl"):
|
|
1637
|
+
path = root / name
|
|
1638
|
+
if path.is_file():
|
|
1639
|
+
sources.append(
|
|
1640
|
+
SourceHandle(
|
|
1641
|
+
agent="codex",
|
|
1642
|
+
store="codex.history",
|
|
1643
|
+
adapter_id="codex.history_json.v1",
|
|
1644
|
+
path=path,
|
|
1645
|
+
path_kind="history_file",
|
|
1646
|
+
source_kind="jsonl" if path.suffix == ".jsonl" else "json",
|
|
1647
|
+
search_root=None,
|
|
1648
|
+
mtime_ns=file_mtime_ns(path),
|
|
1649
|
+
),
|
|
1650
|
+
)
|
|
1651
|
+
|
|
1652
|
+
sessions_root = root / "sessions"
|
|
1653
|
+
sources.extend(
|
|
1654
|
+
SourceHandle(
|
|
1655
|
+
agent="codex",
|
|
1656
|
+
store="codex.sessions",
|
|
1657
|
+
adapter_id="codex.sessions_jsonl.v1",
|
|
1658
|
+
path=path,
|
|
1659
|
+
path_kind="session_file",
|
|
1660
|
+
source_kind="jsonl",
|
|
1661
|
+
search_root=sessions_root,
|
|
1662
|
+
mtime_ns=file_mtime_ns(path),
|
|
1663
|
+
)
|
|
1664
|
+
for path in list_files_matching(sessions_root, "*.jsonl", backends.find_tool)
|
|
1665
|
+
)
|
|
1666
|
+
return sources
|
|
1667
|
+
|
|
1668
|
+
|
|
1669
|
+
def discover_claude_sources(
|
|
1670
|
+
home: pathlib.Path,
|
|
1671
|
+
backends: BackendSelection,
|
|
1672
|
+
) -> list[SourceHandle]:
|
|
1673
|
+
"""Discover Claude Code project session files."""
|
|
1674
|
+
root = home / ".claude" / "projects"
|
|
1675
|
+
if not root.exists():
|
|
1676
|
+
return []
|
|
1677
|
+
return [
|
|
1678
|
+
SourceHandle(
|
|
1679
|
+
agent="claude",
|
|
1680
|
+
store="claude.projects",
|
|
1681
|
+
adapter_id="claude.projects_jsonl.v1",
|
|
1682
|
+
path=path,
|
|
1683
|
+
path_kind="session_file",
|
|
1684
|
+
source_kind="jsonl",
|
|
1685
|
+
search_root=root,
|
|
1686
|
+
mtime_ns=file_mtime_ns(path),
|
|
1687
|
+
)
|
|
1688
|
+
for path in list_files_matching(root, "*.jsonl", backends.find_tool)
|
|
1689
|
+
]
|
|
1690
|
+
|
|
1691
|
+
|
|
1692
|
+
def discover_cursor_sources(
|
|
1693
|
+
home: pathlib.Path,
|
|
1694
|
+
backends: BackendSelection,
|
|
1695
|
+
) -> list[SourceHandle]:
|
|
1696
|
+
"""Discover Cursor databases from both home-local and official roots."""
|
|
1697
|
+
sources: list[SourceHandle] = []
|
|
1698
|
+
tracking_db = home / ".cursor" / "ai-tracking" / "ai-code-tracking.db"
|
|
1699
|
+
if tracking_db.is_file():
|
|
1700
|
+
sources.append(
|
|
1701
|
+
SourceHandle(
|
|
1702
|
+
agent="cursor",
|
|
1703
|
+
store="cursor.ai_tracking",
|
|
1704
|
+
adapter_id="cursor.ai_tracking_sqlite.v1",
|
|
1705
|
+
path=tracking_db,
|
|
1706
|
+
path_kind="sqlite_db",
|
|
1707
|
+
source_kind="sqlite",
|
|
1708
|
+
search_root=None,
|
|
1709
|
+
mtime_ns=file_mtime_ns(tracking_db),
|
|
1710
|
+
),
|
|
1711
|
+
)
|
|
1712
|
+
|
|
1713
|
+
seen_paths: set[pathlib.Path] = set()
|
|
1714
|
+
for path in OFFICIAL_CURSOR_STATE_PATHS:
|
|
1715
|
+
if path.is_file():
|
|
1716
|
+
seen_paths.add(path)
|
|
1717
|
+
sources.append(
|
|
1718
|
+
SourceHandle(
|
|
1719
|
+
agent="cursor",
|
|
1720
|
+
store="cursor.state",
|
|
1721
|
+
adapter_id="cursor.state_vscdb_modern.v1",
|
|
1722
|
+
path=path,
|
|
1723
|
+
path_kind="sqlite_db",
|
|
1724
|
+
source_kind="sqlite",
|
|
1725
|
+
search_root=None,
|
|
1726
|
+
mtime_ns=file_mtime_ns(path),
|
|
1727
|
+
),
|
|
1728
|
+
)
|
|
1729
|
+
cursor_root = home / ".cursor"
|
|
1730
|
+
for path in list_files_matching(cursor_root, "state.vscdb", backends.find_tool):
|
|
1731
|
+
if path in seen_paths:
|
|
1732
|
+
continue
|
|
1733
|
+
sources.append(
|
|
1734
|
+
SourceHandle(
|
|
1735
|
+
agent="cursor",
|
|
1736
|
+
store="cursor.state",
|
|
1737
|
+
adapter_id="cursor.state_vscdb_legacy.v1",
|
|
1738
|
+
path=path,
|
|
1739
|
+
path_kind="sqlite_db",
|
|
1740
|
+
source_kind="sqlite",
|
|
1741
|
+
search_root=None,
|
|
1742
|
+
mtime_ns=file_mtime_ns(path),
|
|
1743
|
+
),
|
|
1744
|
+
)
|
|
1745
|
+
return sources
|
|
1746
|
+
|
|
1747
|
+
|
|
1748
|
+
def list_files_matching(
|
|
1749
|
+
root: pathlib.Path,
|
|
1750
|
+
glob_pattern: str,
|
|
1751
|
+
fd_program: str | None,
|
|
1752
|
+
) -> list[pathlib.Path]:
|
|
1753
|
+
"""List files under ``root`` that match a glob."""
|
|
1754
|
+
if not root.exists():
|
|
1755
|
+
return []
|
|
1756
|
+
if fd_program is not None:
|
|
1757
|
+
command = [fd_program, "-a", "-t", "f", "--glob", glob_pattern, str(root)]
|
|
1758
|
+
completed = run_readonly_command(command)
|
|
1759
|
+
if completed.returncode == 0:
|
|
1760
|
+
return [pathlib.Path(line) for line in completed.stdout.splitlines() if line.strip()]
|
|
1761
|
+
return sorted(path for path in root.rglob(glob_pattern) if path.is_file())
|
|
1762
|
+
|
|
1763
|
+
|
|
1764
|
+
def search_sources(
|
|
1765
|
+
query: SearchQuery,
|
|
1766
|
+
sources: list[SourceHandle],
|
|
1767
|
+
backends: BackendSelection,
|
|
1768
|
+
*,
|
|
1769
|
+
progress: SearchProgress | None = None,
|
|
1770
|
+
control: SearchControl | None = None,
|
|
1771
|
+
) -> list[SearchRecord]:
|
|
1772
|
+
"""Parse and filter search results across all selected sources."""
|
|
1773
|
+
active_progress = noop_search_progress() if progress is None else progress
|
|
1774
|
+
active_control = SearchControl() if control is None else control
|
|
1775
|
+
planned_sources = plan_search_sources(
|
|
1776
|
+
query,
|
|
1777
|
+
sources,
|
|
1778
|
+
backends,
|
|
1779
|
+
progress=active_progress,
|
|
1780
|
+
control=active_control,
|
|
1781
|
+
)
|
|
1782
|
+
if active_control.answer_now_requested():
|
|
1783
|
+
active_progress.answer_now(0)
|
|
1784
|
+
return []
|
|
1785
|
+
active_progress.sources_planned(len(planned_sources), len(sources))
|
|
1786
|
+
records = collect_search_records(
|
|
1787
|
+
query,
|
|
1788
|
+
planned_sources,
|
|
1789
|
+
progress=active_progress,
|
|
1790
|
+
control=active_control,
|
|
1791
|
+
)
|
|
1792
|
+
if active_control.answer_now_requested():
|
|
1793
|
+
active_progress.answer_now(len(records))
|
|
1794
|
+
else:
|
|
1795
|
+
active_progress.finish(len(records))
|
|
1796
|
+
return records
|
|
1797
|
+
|
|
1798
|
+
|
|
1799
|
+
def run_search_query(
|
|
1800
|
+
home: pathlib.Path,
|
|
1801
|
+
query: SearchQuery,
|
|
1802
|
+
*,
|
|
1803
|
+
backends: BackendSelection | None = None,
|
|
1804
|
+
progress: SearchProgress | None = None,
|
|
1805
|
+
control: SearchControl | None = None,
|
|
1806
|
+
) -> list[SearchRecord]:
|
|
1807
|
+
"""Discover sources and run a normalized search query."""
|
|
1808
|
+
active_backends = select_backends() if backends is None else backends
|
|
1809
|
+
active_progress = noop_search_progress() if progress is None else progress
|
|
1810
|
+
active_control = SearchControl() if control is None else control
|
|
1811
|
+
active_progress.start(query)
|
|
1812
|
+
interrupted = False
|
|
1813
|
+
try:
|
|
1814
|
+
sources = discover_sources(home, query.agents, active_backends)
|
|
1815
|
+
active_progress.sources_discovered(len(sources))
|
|
1816
|
+
return search_sources(
|
|
1817
|
+
query,
|
|
1818
|
+
sources,
|
|
1819
|
+
active_backends,
|
|
1820
|
+
progress=active_progress,
|
|
1821
|
+
control=active_control,
|
|
1822
|
+
)
|
|
1823
|
+
except KeyboardInterrupt:
|
|
1824
|
+
interrupted = True
|
|
1825
|
+
active_progress.interrupt()
|
|
1826
|
+
raise
|
|
1827
|
+
finally:
|
|
1828
|
+
if not interrupted:
|
|
1829
|
+
active_progress.close()
|
|
1830
|
+
|
|
1831
|
+
|
|
1832
|
+
def plan_search_sources(
|
|
1833
|
+
query: SearchQuery,
|
|
1834
|
+
sources: list[SourceHandle],
|
|
1835
|
+
backends: BackendSelection,
|
|
1836
|
+
*,
|
|
1837
|
+
progress: SearchProgress | None = None,
|
|
1838
|
+
control: SearchControl | None = None,
|
|
1839
|
+
) -> list[SourceHandle]:
|
|
1840
|
+
"""Return the candidate sources to parse for a search query."""
|
|
1841
|
+
active_progress = noop_search_progress() if progress is None else progress
|
|
1842
|
+
active_control = SearchControl() if control is None else control
|
|
1843
|
+
if not query.terms:
|
|
1844
|
+
return sources
|
|
1845
|
+
|
|
1846
|
+
planned_sources = list(sources)
|
|
1847
|
+
if backends.grep_tool is not None:
|
|
1848
|
+
planned_sources = prefilter_sources_by_root(
|
|
1849
|
+
query,
|
|
1850
|
+
planned_sources,
|
|
1851
|
+
backends.grep_tool,
|
|
1852
|
+
progress=active_progress,
|
|
1853
|
+
control=active_control,
|
|
1854
|
+
)
|
|
1855
|
+
ordered_sources = [
|
|
1856
|
+
source
|
|
1857
|
+
for source in planned_sources
|
|
1858
|
+
if not active_control.answer_now_requested()
|
|
1859
|
+
and (
|
|
1860
|
+
source.search_root is not None
|
|
1861
|
+
or direct_source_matches(source, query, backends, active_control)
|
|
1862
|
+
)
|
|
1863
|
+
]
|
|
1864
|
+
ordered_sources.sort(key=source_order_key)
|
|
1865
|
+
return ordered_sources
|
|
1866
|
+
|
|
1867
|
+
|
|
1868
|
+
def source_order_key(source: SourceHandle) -> tuple[int, str]:
|
|
1869
|
+
"""Return a newest-first search order key for sources."""
|
|
1870
|
+
return (-source.mtime_ns, str(source.path))
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
def prefilter_sources_by_root(
|
|
1874
|
+
query: SearchQuery,
|
|
1875
|
+
sources: list[SourceHandle],
|
|
1876
|
+
grep_program: str,
|
|
1877
|
+
*,
|
|
1878
|
+
progress: SearchProgress | None = None,
|
|
1879
|
+
control: SearchControl | None = None,
|
|
1880
|
+
) -> list[SourceHandle]:
|
|
1881
|
+
"""Prefilter file-backed sources by searching each root once."""
|
|
1882
|
+
active_progress = noop_search_progress() if progress is None else progress
|
|
1883
|
+
active_control = SearchControl() if control is None else control
|
|
1884
|
+
matched_paths_by_root: dict[pathlib.Path, set[pathlib.Path] | None] = {}
|
|
1885
|
+
filtered_sources: list[SourceHandle] = []
|
|
1886
|
+
for source in sources:
|
|
1887
|
+
if active_control.answer_now_requested():
|
|
1888
|
+
break
|
|
1889
|
+
search_root = source.search_root
|
|
1890
|
+
if search_root is None:
|
|
1891
|
+
filtered_sources.append(source)
|
|
1892
|
+
continue
|
|
1893
|
+
|
|
1894
|
+
if search_root not in matched_paths_by_root:
|
|
1895
|
+
active_progress.prefilter_started(search_root)
|
|
1896
|
+
matched_paths_by_root[search_root] = grep_root_paths(
|
|
1897
|
+
search_root,
|
|
1898
|
+
query,
|
|
1899
|
+
grep_program,
|
|
1900
|
+
control=active_control,
|
|
1901
|
+
)
|
|
1902
|
+
if active_control.answer_now_requested():
|
|
1903
|
+
break
|
|
1904
|
+
|
|
1905
|
+
matched_paths = matched_paths_by_root[search_root]
|
|
1906
|
+
if matched_paths is None or source.path in matched_paths:
|
|
1907
|
+
filtered_sources.append(source)
|
|
1908
|
+
return filtered_sources
|
|
1909
|
+
|
|
1910
|
+
|
|
1911
|
+
def grep_root_paths(
|
|
1912
|
+
search_root: pathlib.Path,
|
|
1913
|
+
query: SearchQuery,
|
|
1914
|
+
grep_program: str,
|
|
1915
|
+
*,
|
|
1916
|
+
control: SearchControl | None = None,
|
|
1917
|
+
) -> set[pathlib.Path] | None:
|
|
1918
|
+
"""Return file paths matched by a whole-root grep."""
|
|
1919
|
+
active_control = SearchControl() if control is None else control
|
|
1920
|
+
matched_sets: list[set[pathlib.Path]] = []
|
|
1921
|
+
for term in query.terms:
|
|
1922
|
+
if active_control.answer_now_requested():
|
|
1923
|
+
return set()
|
|
1924
|
+
command = build_grep_command(
|
|
1925
|
+
grep_program,
|
|
1926
|
+
term,
|
|
1927
|
+
search_root,
|
|
1928
|
+
regex=query.regex,
|
|
1929
|
+
case_sensitive=query.case_sensitive,
|
|
1930
|
+
)
|
|
1931
|
+
completed = run_readonly_command(command, control=active_control)
|
|
1932
|
+
if active_control.answer_now_requested():
|
|
1933
|
+
return set()
|
|
1934
|
+
if completed.returncode not in {0, 1}:
|
|
1935
|
+
return None
|
|
1936
|
+
matched_sets.append(
|
|
1937
|
+
{pathlib.Path(line) for line in completed.stdout.splitlines() if line.strip()},
|
|
1938
|
+
)
|
|
1939
|
+
|
|
1940
|
+
if not matched_sets:
|
|
1941
|
+
return set()
|
|
1942
|
+
if query.any_term:
|
|
1943
|
+
merged: set[pathlib.Path] = set()
|
|
1944
|
+
for matched in matched_sets:
|
|
1945
|
+
merged.update(matched)
|
|
1946
|
+
return merged
|
|
1947
|
+
|
|
1948
|
+
intersection = matched_sets[0].copy()
|
|
1949
|
+
for matched in matched_sets[1:]:
|
|
1950
|
+
intersection.intersection_update(matched)
|
|
1951
|
+
return intersection
|
|
1952
|
+
|
|
1953
|
+
|
|
1954
|
+
def direct_source_matches(
|
|
1955
|
+
source: SourceHandle,
|
|
1956
|
+
query: SearchQuery,
|
|
1957
|
+
backends: BackendSelection,
|
|
1958
|
+
control: SearchControl | None = None,
|
|
1959
|
+
) -> bool:
|
|
1960
|
+
"""Return whether a direct source should be parsed."""
|
|
1961
|
+
active_control = SearchControl() if control is None else control
|
|
1962
|
+
if active_control.answer_now_requested():
|
|
1963
|
+
return False
|
|
1964
|
+
if source.source_kind == "sqlite":
|
|
1965
|
+
return True
|
|
1966
|
+
if backends.grep_tool is not None:
|
|
1967
|
+
grep_match = grep_file_matches(
|
|
1968
|
+
source.path,
|
|
1969
|
+
query,
|
|
1970
|
+
backends.grep_tool,
|
|
1971
|
+
control=active_control,
|
|
1972
|
+
)
|
|
1973
|
+
if active_control.answer_now_requested():
|
|
1974
|
+
return False
|
|
1975
|
+
if grep_match is not None:
|
|
1976
|
+
return grep_match
|
|
1977
|
+
if source.path.suffix in JSON_FILE_SUFFIXES and backends.json_tool is not None:
|
|
1978
|
+
extracted = flatten_json_strings_with_tool(
|
|
1979
|
+
source.path,
|
|
1980
|
+
backends.json_tool,
|
|
1981
|
+
control=active_control,
|
|
1982
|
+
)
|
|
1983
|
+
if active_control.answer_now_requested():
|
|
1984
|
+
return False
|
|
1985
|
+
if extracted is not None:
|
|
1986
|
+
return matches_text(extracted, query)
|
|
1987
|
+
return matches_text(read_text_file(source.path), query)
|
|
1988
|
+
|
|
1989
|
+
|
|
1990
|
+
def collect_search_records(
|
|
1991
|
+
query: SearchQuery,
|
|
1992
|
+
sources: list[SourceHandle],
|
|
1993
|
+
*,
|
|
1994
|
+
progress: SearchProgress | None = None,
|
|
1995
|
+
control: SearchControl | None = None,
|
|
1996
|
+
) -> list[SearchRecord]:
|
|
1997
|
+
"""Parse candidate sources and collect matching records."""
|
|
1998
|
+
active_progress = noop_search_progress() if progress is None else progress
|
|
1999
|
+
active_control = SearchControl() if control is None else control
|
|
2000
|
+
deduped: dict[tuple[str, str, str, str, str], SearchRecord] = {}
|
|
2001
|
+
total = len(sources)
|
|
2002
|
+
for index, source in enumerate(sources, start=1):
|
|
2003
|
+
if active_control.answer_now_requested() or (
|
|
2004
|
+
query.limit is not None and len(deduped) >= query.limit
|
|
2005
|
+
):
|
|
2006
|
+
break
|
|
2007
|
+
active_progress.source_started(index, total, source)
|
|
2008
|
+
records_seen = 0
|
|
2009
|
+
matches_seen = 0
|
|
2010
|
+
matching_records: list[SearchRecord] = []
|
|
2011
|
+
for record in iter_source_records(source):
|
|
2012
|
+
if active_control.answer_now_requested():
|
|
2013
|
+
break
|
|
2014
|
+
records_seen += 1
|
|
2015
|
+
if matches_record(record, query):
|
|
2016
|
+
matches_seen += 1
|
|
2017
|
+
matching_records.append(record)
|
|
2018
|
+
active_progress.source_finished(index, total, source, records_seen, matches_seen)
|
|
2019
|
+
matching_records.sort(key=search_record_sort_key, reverse=True)
|
|
2020
|
+
for record in matching_records:
|
|
2021
|
+
dedupe_key = record_dedupe_key(record)
|
|
2022
|
+
if dedupe_key not in deduped:
|
|
2023
|
+
deduped[dedupe_key] = record
|
|
2024
|
+
active_progress.result_added(len(deduped))
|
|
2025
|
+
if active_control.answer_now_requested() or (
|
|
2026
|
+
query.limit is not None and len(deduped) >= query.limit
|
|
2027
|
+
):
|
|
2028
|
+
break
|
|
2029
|
+
results = list(deduped.values())
|
|
2030
|
+
results.sort(key=search_record_sort_key, reverse=True)
|
|
2031
|
+
return results
|
|
2032
|
+
|
|
2033
|
+
|
|
2034
|
+
def find_sources(
|
|
2035
|
+
pattern: str | None,
|
|
2036
|
+
sources: list[SourceHandle],
|
|
2037
|
+
limit: int | None,
|
|
2038
|
+
) -> list[FindRecord]:
|
|
2039
|
+
"""Build filtered ``find`` results from discovered sources."""
|
|
2040
|
+
query = pattern.casefold() if pattern is not None else None
|
|
2041
|
+
results: list[FindRecord] = []
|
|
2042
|
+
for source in sources:
|
|
2043
|
+
record = FindRecord(
|
|
2044
|
+
kind="find",
|
|
2045
|
+
agent=source.agent,
|
|
2046
|
+
store=source.store,
|
|
2047
|
+
adapter_id=source.adapter_id,
|
|
2048
|
+
path=source.path,
|
|
2049
|
+
path_kind=source.path_kind,
|
|
2050
|
+
metadata={"source_kind": source.source_kind},
|
|
2051
|
+
)
|
|
2052
|
+
if query is not None:
|
|
2053
|
+
haystack = " ".join(
|
|
2054
|
+
(
|
|
2055
|
+
record.agent,
|
|
2056
|
+
record.store,
|
|
2057
|
+
record.adapter_id,
|
|
2058
|
+
str(record.path),
|
|
2059
|
+
record.path_kind,
|
|
2060
|
+
),
|
|
2061
|
+
).casefold()
|
|
2062
|
+
if query not in haystack:
|
|
2063
|
+
continue
|
|
2064
|
+
results.append(record)
|
|
2065
|
+
if limit is not None and len(results) >= limit:
|
|
2066
|
+
break
|
|
2067
|
+
return results
|
|
2068
|
+
|
|
2069
|
+
|
|
2070
|
+
def run_find_query(
|
|
2071
|
+
home: pathlib.Path,
|
|
2072
|
+
agents: tuple[AgentName, ...],
|
|
2073
|
+
*,
|
|
2074
|
+
pattern: str | None,
|
|
2075
|
+
limit: int | None,
|
|
2076
|
+
backends: BackendSelection | None = None,
|
|
2077
|
+
) -> list[FindRecord]:
|
|
2078
|
+
"""Discover sources and build normalized ``find`` results."""
|
|
2079
|
+
active_backends = select_backends() if backends is None else backends
|
|
2080
|
+
sources = discover_sources(home, agents, active_backends)
|
|
2081
|
+
return find_sources(pattern, sources, limit)
|
|
2082
|
+
|
|
2083
|
+
|
|
2084
|
+
def iter_source_records(
|
|
2085
|
+
source: SourceHandle,
|
|
2086
|
+
) -> cabc.Iterator[SearchRecord]:
|
|
2087
|
+
"""Dispatch to the adapter parser for one source."""
|
|
2088
|
+
if source.adapter_id == "codex.sessions_jsonl.v1":
|
|
2089
|
+
yield from parse_codex_session_file(source)
|
|
2090
|
+
return
|
|
2091
|
+
if source.adapter_id == "codex.history_json.v1":
|
|
2092
|
+
yield from parse_codex_history_file(source)
|
|
2093
|
+
return
|
|
2094
|
+
if source.adapter_id == "claude.projects_jsonl.v1":
|
|
2095
|
+
yield from parse_claude_project_file(source)
|
|
2096
|
+
return
|
|
2097
|
+
if source.adapter_id == "cursor.ai_tracking_sqlite.v1":
|
|
2098
|
+
yield from parse_cursor_ai_tracking_db(source)
|
|
2099
|
+
return
|
|
2100
|
+
if source.adapter_id in {"cursor.state_vscdb_modern.v1", "cursor.state_vscdb_legacy.v1"}:
|
|
2101
|
+
yield from parse_cursor_state_db(source)
|
|
2102
|
+
|
|
2103
|
+
|
|
2104
|
+
def parse_codex_session_file(
|
|
2105
|
+
source: SourceHandle,
|
|
2106
|
+
) -> cabc.Iterator[SearchRecord]:
|
|
2107
|
+
"""Parse Codex session JSONL files."""
|
|
2108
|
+
session_id = source.path.stem
|
|
2109
|
+
session_model: str | None = None
|
|
2110
|
+
for event in iter_jsonl(source.path):
|
|
2111
|
+
if not isinstance(event, dict):
|
|
2112
|
+
continue
|
|
2113
|
+
event_type = str(event.get("type", ""))
|
|
2114
|
+
payload = event.get("payload")
|
|
2115
|
+
if event_type == "session_meta" and isinstance(payload, dict):
|
|
2116
|
+
session_id = as_optional_str(payload.get("id")) or session_id
|
|
2117
|
+
session_model = (
|
|
2118
|
+
as_optional_str(payload.get("model"))
|
|
2119
|
+
or as_optional_str(payload.get("model_name"))
|
|
2120
|
+
or as_optional_str(payload.get("model_provider"))
|
|
2121
|
+
or session_model
|
|
2122
|
+
)
|
|
2123
|
+
continue
|
|
2124
|
+
if event_type != "response_item" or not isinstance(payload, dict):
|
|
2125
|
+
continue
|
|
2126
|
+
candidate = candidate_from_mapping(
|
|
2127
|
+
t.cast("dict[str, object]", payload),
|
|
2128
|
+
timestamp=as_optional_str(event.get("timestamp")),
|
|
2129
|
+
model=session_model,
|
|
2130
|
+
session_id=session_id,
|
|
2131
|
+
conversation_id=session_id,
|
|
2132
|
+
)
|
|
2133
|
+
if candidate is None:
|
|
2134
|
+
continue
|
|
2135
|
+
yield build_search_record(source, candidate)
|
|
2136
|
+
|
|
2137
|
+
|
|
2138
|
+
def parse_codex_history_file(
|
|
2139
|
+
source: SourceHandle,
|
|
2140
|
+
) -> cabc.Iterator[SearchRecord]:
|
|
2141
|
+
"""Parse Codex command history files."""
|
|
2142
|
+
entries: list[JSONValue]
|
|
2143
|
+
if source.source_kind == "json":
|
|
2144
|
+
payload = read_json_file(source.path)
|
|
2145
|
+
entries = payload if isinstance(payload, list) else []
|
|
2146
|
+
else:
|
|
2147
|
+
entries = list(iter_jsonl(source.path))
|
|
2148
|
+
|
|
2149
|
+
for entry in entries:
|
|
2150
|
+
if not isinstance(entry, dict):
|
|
2151
|
+
continue
|
|
2152
|
+
command = as_optional_str(entry.get("command"))
|
|
2153
|
+
if not command:
|
|
2154
|
+
continue
|
|
2155
|
+
yield SearchRecord(
|
|
2156
|
+
kind="history",
|
|
2157
|
+
agent=source.agent,
|
|
2158
|
+
store=source.store,
|
|
2159
|
+
adapter_id=source.adapter_id,
|
|
2160
|
+
path=source.path,
|
|
2161
|
+
text=command,
|
|
2162
|
+
title="Codex command history",
|
|
2163
|
+
role="user",
|
|
2164
|
+
timestamp=as_optional_str(entry.get("timestamp")),
|
|
2165
|
+
)
|
|
2166
|
+
|
|
2167
|
+
|
|
2168
|
+
def parse_claude_project_file(
|
|
2169
|
+
source: SourceHandle,
|
|
2170
|
+
) -> cabc.Iterator[SearchRecord]:
|
|
2171
|
+
"""Parse Claude Code project JSONL files using lightweight heuristics."""
|
|
2172
|
+
conversation_id = source.path.stem
|
|
2173
|
+
seen: set[tuple[str | None, str, str | None, str | None]] = set()
|
|
2174
|
+
for event in iter_jsonl(source.path):
|
|
2175
|
+
for candidate in iter_message_candidates(
|
|
2176
|
+
event,
|
|
2177
|
+
fallback_conversation_id=conversation_id,
|
|
2178
|
+
):
|
|
2179
|
+
key = (
|
|
2180
|
+
candidate.role,
|
|
2181
|
+
candidate.text,
|
|
2182
|
+
candidate.timestamp,
|
|
2183
|
+
candidate.conversation_id,
|
|
2184
|
+
)
|
|
2185
|
+
if key in seen:
|
|
2186
|
+
continue
|
|
2187
|
+
seen.add(key)
|
|
2188
|
+
yield build_search_record(source, candidate)
|
|
2189
|
+
|
|
2190
|
+
|
|
2191
|
+
def parse_cursor_ai_tracking_db(
|
|
2192
|
+
source: SourceHandle,
|
|
2193
|
+
) -> cabc.Iterator[SearchRecord]:
|
|
2194
|
+
"""Parse Cursor AI tracking summaries."""
|
|
2195
|
+
connection = open_readonly_sqlite(source.path)
|
|
2196
|
+
try:
|
|
2197
|
+
for row in iter_conversation_summaries(connection):
|
|
2198
|
+
(
|
|
2199
|
+
conversation_id,
|
|
2200
|
+
title,
|
|
2201
|
+
tldr,
|
|
2202
|
+
overview,
|
|
2203
|
+
bullets,
|
|
2204
|
+
model,
|
|
2205
|
+
mode,
|
|
2206
|
+
updated_at,
|
|
2207
|
+
) = row
|
|
2208
|
+
text_parts = [
|
|
2209
|
+
part
|
|
2210
|
+
for part in (
|
|
2211
|
+
as_optional_str(title),
|
|
2212
|
+
as_optional_str(tldr),
|
|
2213
|
+
as_optional_str(overview),
|
|
2214
|
+
flatten_summary_bullets(bullets),
|
|
2215
|
+
)
|
|
2216
|
+
if part
|
|
2217
|
+
]
|
|
2218
|
+
if not text_parts:
|
|
2219
|
+
continue
|
|
2220
|
+
yield SearchRecord(
|
|
2221
|
+
kind="history",
|
|
2222
|
+
agent=source.agent,
|
|
2223
|
+
store=source.store,
|
|
2224
|
+
adapter_id=source.adapter_id,
|
|
2225
|
+
path=source.path,
|
|
2226
|
+
text="\n\n".join(text_parts),
|
|
2227
|
+
title=as_optional_str(title),
|
|
2228
|
+
role="assistant",
|
|
2229
|
+
timestamp=as_optional_str(updated_at),
|
|
2230
|
+
model=as_optional_str(model),
|
|
2231
|
+
conversation_id=as_optional_str(conversation_id),
|
|
2232
|
+
metadata={"mode": as_optional_str(mode) or ""},
|
|
2233
|
+
)
|
|
2234
|
+
except sqlite3.DatabaseError:
|
|
2235
|
+
return
|
|
2236
|
+
finally:
|
|
2237
|
+
connection.close()
|
|
2238
|
+
|
|
2239
|
+
|
|
2240
|
+
def parse_cursor_state_db(
|
|
2241
|
+
source: SourceHandle,
|
|
2242
|
+
) -> cabc.Iterator[SearchRecord]:
|
|
2243
|
+
"""Parse Cursor ``state.vscdb`` tables with generic JSON extraction."""
|
|
2244
|
+
connection = open_readonly_sqlite(source.path)
|
|
2245
|
+
try:
|
|
2246
|
+
tables = sqlite_table_names(connection)
|
|
2247
|
+
candidate_tables = [name for name in ("ItemTable", "cursorDiskKV") if name in tables]
|
|
2248
|
+
seen: set[tuple[str | None, str, str | None, str | None]] = set()
|
|
2249
|
+
for table in candidate_tables:
|
|
2250
|
+
for key, raw_value in iter_key_value_rows(connection, table):
|
|
2251
|
+
lowered_key = key.casefold()
|
|
2252
|
+
if not any(token in lowered_key for token in CURSOR_STATE_TOKENS):
|
|
2253
|
+
continue
|
|
2254
|
+
decoded = decode_sqlite_value(raw_value)
|
|
2255
|
+
if decoded is None:
|
|
2256
|
+
continue
|
|
2257
|
+
parsed = parse_embedded_json(decoded)
|
|
2258
|
+
if parsed is None:
|
|
2259
|
+
continue
|
|
2260
|
+
for candidate in iter_message_candidates(
|
|
2261
|
+
parsed,
|
|
2262
|
+
fallback_title=key,
|
|
2263
|
+
fallback_conversation_id=key,
|
|
2264
|
+
):
|
|
2265
|
+
entry_key = (
|
|
2266
|
+
candidate.role,
|
|
2267
|
+
candidate.text,
|
|
2268
|
+
candidate.timestamp,
|
|
2269
|
+
candidate.conversation_id,
|
|
2270
|
+
)
|
|
2271
|
+
if entry_key in seen:
|
|
2272
|
+
continue
|
|
2273
|
+
seen.add(entry_key)
|
|
2274
|
+
yield build_search_record(source, candidate)
|
|
2275
|
+
except sqlite3.DatabaseError:
|
|
2276
|
+
return
|
|
2277
|
+
finally:
|
|
2278
|
+
connection.close()
|
|
2279
|
+
|
|
2280
|
+
|
|
2281
|
+
def open_readonly_sqlite(path: pathlib.Path) -> sqlite3.Connection:
|
|
2282
|
+
"""Open a SQLite database with a read-only URI."""
|
|
2283
|
+
return sqlite3.connect(f"file:{path}?mode=ro", uri=True)
|
|
2284
|
+
|
|
2285
|
+
|
|
2286
|
+
def sqlite_table_names(connection: sqlite3.Connection) -> set[str]:
|
|
2287
|
+
"""Return the table names from a SQLite connection."""
|
|
2288
|
+
rows = t.cast(
|
|
2289
|
+
"cabc.Iterable[tuple[object]]",
|
|
2290
|
+
connection.execute("SELECT name FROM sqlite_master WHERE type = 'table'"),
|
|
2291
|
+
)
|
|
2292
|
+
names: set[str] = set()
|
|
2293
|
+
for row in rows:
|
|
2294
|
+
name = row[0]
|
|
2295
|
+
if isinstance(name, str):
|
|
2296
|
+
names.add(name)
|
|
2297
|
+
return names
|
|
2298
|
+
|
|
2299
|
+
|
|
2300
|
+
def iter_key_value_rows(
|
|
2301
|
+
connection: sqlite3.Connection,
|
|
2302
|
+
table: str,
|
|
2303
|
+
) -> cabc.Iterator[tuple[str, object]]:
|
|
2304
|
+
"""Yield likely key/value rows from a SQLite table."""
|
|
2305
|
+
if table not in {"ItemTable", "cursorDiskKV"}:
|
|
2306
|
+
return
|
|
2307
|
+
info = t.cast(
|
|
2308
|
+
"cabc.Iterable[tuple[object, ...]]",
|
|
2309
|
+
connection.execute(f"PRAGMA table_info({table})"),
|
|
2310
|
+
)
|
|
2311
|
+
columns = [str(row[1]) for row in info]
|
|
2312
|
+
if "key" not in columns or "value" not in columns:
|
|
2313
|
+
return
|
|
2314
|
+
query = "SELECT key, value FROM ItemTable"
|
|
2315
|
+
if table == "cursorDiskKV":
|
|
2316
|
+
query = "SELECT key, value FROM cursorDiskKV"
|
|
2317
|
+
rows = t.cast("cabc.Iterable[KeyValueRow]", connection.execute(query))
|
|
2318
|
+
for key, value in rows:
|
|
2319
|
+
if isinstance(key, str):
|
|
2320
|
+
yield key, value
|
|
2321
|
+
|
|
2322
|
+
|
|
2323
|
+
def iter_conversation_summaries(
|
|
2324
|
+
connection: sqlite3.Connection,
|
|
2325
|
+
) -> cabc.Iterator[SummaryRow]:
|
|
2326
|
+
"""Yield typed rows from Cursor AI tracking summaries."""
|
|
2327
|
+
query = """
|
|
2328
|
+
SELECT
|
|
2329
|
+
conversationId,
|
|
2330
|
+
title,
|
|
2331
|
+
tldr,
|
|
2332
|
+
overview,
|
|
2333
|
+
summaryBullets,
|
|
2334
|
+
model,
|
|
2335
|
+
mode,
|
|
2336
|
+
updatedAt
|
|
2337
|
+
FROM conversation_summaries
|
|
2338
|
+
"""
|
|
2339
|
+
rows = t.cast("cabc.Iterable[SummaryRow]", connection.execute(query))
|
|
2340
|
+
yield from rows
|
|
2341
|
+
|
|
2342
|
+
|
|
2343
|
+
def build_grep_command(
|
|
2344
|
+
grep_program: str,
|
|
2345
|
+
term: str,
|
|
2346
|
+
target: pathlib.Path,
|
|
2347
|
+
*,
|
|
2348
|
+
regex: bool,
|
|
2349
|
+
case_sensitive: bool,
|
|
2350
|
+
) -> list[str]:
|
|
2351
|
+
"""Build a read-only grep command for one term and target."""
|
|
2352
|
+
command = [grep_program, "-l", term, str(target)]
|
|
2353
|
+
if not regex:
|
|
2354
|
+
fixed_flag = "-F" if grep_program.endswith("rg") else "-Q"
|
|
2355
|
+
command.insert(2, fixed_flag)
|
|
2356
|
+
if not case_sensitive:
|
|
2357
|
+
command.insert(1, "-i")
|
|
2358
|
+
return command
|
|
2359
|
+
|
|
2360
|
+
|
|
2361
|
+
def flatten_json_strings_with_tool(
|
|
2362
|
+
path: pathlib.Path,
|
|
2363
|
+
program: str,
|
|
2364
|
+
*,
|
|
2365
|
+
control: SearchControl | None = None,
|
|
2366
|
+
) -> str | None:
|
|
2367
|
+
"""Return flattened JSON strings using ``jq`` or ``jaq``."""
|
|
2368
|
+
command = [program, "-r", ".. | strings", str(path)]
|
|
2369
|
+
completed = run_readonly_command(command, control=control)
|
|
2370
|
+
if completed.returncode != 0:
|
|
2371
|
+
return None
|
|
2372
|
+
return completed.stdout
|
|
2373
|
+
|
|
2374
|
+
|
|
2375
|
+
def grep_file_matches(
|
|
2376
|
+
path: pathlib.Path,
|
|
2377
|
+
query: SearchQuery,
|
|
2378
|
+
program: str,
|
|
2379
|
+
*,
|
|
2380
|
+
control: SearchControl | None = None,
|
|
2381
|
+
) -> bool | None:
|
|
2382
|
+
"""Use ``rg`` or ``ag`` as a read-only prefilter."""
|
|
2383
|
+
active_control = SearchControl() if control is None else control
|
|
2384
|
+
matchers = [
|
|
2385
|
+
run_readonly_command(
|
|
2386
|
+
build_grep_command(
|
|
2387
|
+
program,
|
|
2388
|
+
term,
|
|
2389
|
+
path,
|
|
2390
|
+
regex=query.regex,
|
|
2391
|
+
case_sensitive=query.case_sensitive,
|
|
2392
|
+
),
|
|
2393
|
+
control=active_control,
|
|
2394
|
+
).returncode
|
|
2395
|
+
== 0
|
|
2396
|
+
for term in query.terms
|
|
2397
|
+
if not active_control.answer_now_requested()
|
|
2398
|
+
]
|
|
2399
|
+
if active_control.answer_now_requested():
|
|
2400
|
+
return False
|
|
2401
|
+
return any(matchers) if query.any_term else all(matchers)
|
|
2402
|
+
|
|
2403
|
+
|
|
2404
|
+
def read_text_file(path: pathlib.Path) -> str:
|
|
2405
|
+
"""Read a text file with replacement for decode errors."""
|
|
2406
|
+
try:
|
|
2407
|
+
return path.read_text(encoding="utf-8", errors="replace")
|
|
2408
|
+
except OSError:
|
|
2409
|
+
return ""
|
|
2410
|
+
|
|
2411
|
+
|
|
2412
|
+
def read_json_file(path: pathlib.Path) -> JSONValue | None:
|
|
2413
|
+
"""Read a JSON file."""
|
|
2414
|
+
try:
|
|
2415
|
+
parsed = t.cast("object", json.loads(path.read_text(encoding="utf-8")))
|
|
2416
|
+
except OSError, json.JSONDecodeError:
|
|
2417
|
+
return None
|
|
2418
|
+
if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
|
|
2419
|
+
return t.cast("JSONValue", parsed)
|
|
2420
|
+
return None
|
|
2421
|
+
|
|
2422
|
+
|
|
2423
|
+
def iter_jsonl(path: pathlib.Path) -> cabc.Iterator[JSONValue]:
|
|
2424
|
+
"""Yield decoded JSON objects from a JSONL file."""
|
|
2425
|
+
try:
|
|
2426
|
+
with path.open(encoding="utf-8") as handle:
|
|
2427
|
+
for line in handle:
|
|
2428
|
+
stripped = line.strip()
|
|
2429
|
+
if not stripped:
|
|
2430
|
+
continue
|
|
2431
|
+
try:
|
|
2432
|
+
parsed = t.cast("object", json.loads(stripped))
|
|
2433
|
+
except json.JSONDecodeError:
|
|
2434
|
+
continue
|
|
2435
|
+
if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
|
|
2436
|
+
yield t.cast("JSONValue", parsed)
|
|
2437
|
+
except OSError:
|
|
2438
|
+
return
|
|
2439
|
+
|
|
2440
|
+
|
|
2441
|
+
def candidate_from_mapping(
|
|
2442
|
+
mapping: dict[str, object],
|
|
2443
|
+
*,
|
|
2444
|
+
timestamp: str | None,
|
|
2445
|
+
model: str | None,
|
|
2446
|
+
session_id: str | None,
|
|
2447
|
+
conversation_id: str | None,
|
|
2448
|
+
) -> MessageCandidate | None:
|
|
2449
|
+
"""Extract one message candidate from a known message-like mapping."""
|
|
2450
|
+
role = extract_role(mapping)
|
|
2451
|
+
text = extract_message_text(mapping)
|
|
2452
|
+
if role is None or not text:
|
|
2453
|
+
return None
|
|
2454
|
+
return MessageCandidate(
|
|
2455
|
+
role=role,
|
|
2456
|
+
text=text,
|
|
2457
|
+
title=extract_title(mapping),
|
|
2458
|
+
timestamp=timestamp or extract_timestamp(mapping),
|
|
2459
|
+
model=model or extract_model(mapping),
|
|
2460
|
+
session_id=session_id or extract_session_id(mapping),
|
|
2461
|
+
conversation_id=conversation_id or extract_conversation_id(mapping),
|
|
2462
|
+
)
|
|
2463
|
+
|
|
2464
|
+
|
|
2465
|
+
def iter_message_candidates(
|
|
2466
|
+
value: JSONValue | None,
|
|
2467
|
+
*,
|
|
2468
|
+
fallback_title: str | None = None,
|
|
2469
|
+
fallback_conversation_id: str | None = None,
|
|
2470
|
+
) -> cabc.Iterator[MessageCandidate]:
|
|
2471
|
+
"""Recursively walk a JSON value and yield message candidates."""
|
|
2472
|
+
if isinstance(value, dict):
|
|
2473
|
+
mapping = t.cast("dict[str, object]", value)
|
|
2474
|
+
role = extract_role(mapping)
|
|
2475
|
+
text = extract_message_text(mapping)
|
|
2476
|
+
if role is not None and text:
|
|
2477
|
+
yield MessageCandidate(
|
|
2478
|
+
role=role,
|
|
2479
|
+
text=text,
|
|
2480
|
+
title=extract_title(mapping) or fallback_title,
|
|
2481
|
+
timestamp=extract_timestamp(mapping),
|
|
2482
|
+
model=extract_model(mapping),
|
|
2483
|
+
session_id=extract_session_id(mapping),
|
|
2484
|
+
conversation_id=extract_conversation_id(mapping) or fallback_conversation_id,
|
|
2485
|
+
)
|
|
2486
|
+
for nested in mapping.values():
|
|
2487
|
+
yield from iter_message_candidates(
|
|
2488
|
+
t.cast("JSONValue | None", nested),
|
|
2489
|
+
fallback_title=fallback_title,
|
|
2490
|
+
fallback_conversation_id=fallback_conversation_id,
|
|
2491
|
+
)
|
|
2492
|
+
elif isinstance(value, list):
|
|
2493
|
+
for item in value:
|
|
2494
|
+
yield from iter_message_candidates(
|
|
2495
|
+
item,
|
|
2496
|
+
fallback_title=fallback_title,
|
|
2497
|
+
fallback_conversation_id=fallback_conversation_id,
|
|
2498
|
+
)
|
|
2499
|
+
|
|
2500
|
+
|
|
2501
|
+
def extract_role(mapping: dict[str, object]) -> str | None:
|
|
2502
|
+
"""Extract a normalized role from a mapping."""
|
|
2503
|
+
for key in ("role", "sender", "author", "speaker"):
|
|
2504
|
+
value = mapping.get(key)
|
|
2505
|
+
if isinstance(value, str) and value.strip():
|
|
2506
|
+
return value.strip()
|
|
2507
|
+
if isinstance(value, dict):
|
|
2508
|
+
nested_mapping = t.cast("dict[str, object]", value)
|
|
2509
|
+
nested = as_optional_str(nested_mapping.get("role")) or as_optional_str(
|
|
2510
|
+
nested_mapping.get("name"),
|
|
2511
|
+
)
|
|
2512
|
+
if nested is not None:
|
|
2513
|
+
return nested
|
|
2514
|
+
return None
|
|
2515
|
+
|
|
2516
|
+
|
|
2517
|
+
def extract_message_text(mapping: dict[str, object]) -> str | None:
|
|
2518
|
+
"""Extract message text from common content fields."""
|
|
2519
|
+
for key in ("content", "text", "message", "body", "prompt", "value", "parts"):
|
|
2520
|
+
if key in mapping:
|
|
2521
|
+
flattened = flatten_content_value(t.cast("JSONValue | None", mapping[key]))
|
|
2522
|
+
if flattened:
|
|
2523
|
+
return flattened
|
|
2524
|
+
return None
|
|
2525
|
+
|
|
2526
|
+
|
|
2527
|
+
def flatten_content_value(value: JSONValue | None) -> str | None:
|
|
2528
|
+
"""Flatten a message content payload into text."""
|
|
2529
|
+
parts = list(iter_text_fragments(value))
|
|
2530
|
+
if not parts:
|
|
2531
|
+
return None
|
|
2532
|
+
return "\n".join(part for part in parts if part.strip()).strip() or None
|
|
2533
|
+
|
|
2534
|
+
|
|
2535
|
+
def iter_text_fragments(
|
|
2536
|
+
value: JSONValue | None,
|
|
2537
|
+
) -> cabc.Iterator[str]:
|
|
2538
|
+
"""Yield text fragments from a nested content payload."""
|
|
2539
|
+
if isinstance(value, str):
|
|
2540
|
+
stripped = value.strip()
|
|
2541
|
+
if stripped:
|
|
2542
|
+
yield stripped
|
|
2543
|
+
return
|
|
2544
|
+
if isinstance(value, list):
|
|
2545
|
+
for item in value:
|
|
2546
|
+
yield from iter_text_fragments(item)
|
|
2547
|
+
return
|
|
2548
|
+
if isinstance(value, dict):
|
|
2549
|
+
mapping = t.cast("dict[str, object]", value)
|
|
2550
|
+
for key in ("text", "content", "message", "body", "prompt", "value", "parts"):
|
|
2551
|
+
if key in mapping:
|
|
2552
|
+
yield from iter_text_fragments(t.cast("JSONValue | None", mapping[key]))
|
|
2553
|
+
|
|
2554
|
+
|
|
2555
|
+
def extract_title(mapping: dict[str, object]) -> str | None:
|
|
2556
|
+
"""Extract a title-like field."""
|
|
2557
|
+
for key in ("title", "name", "topic"):
|
|
2558
|
+
title = as_optional_str(mapping.get(key))
|
|
2559
|
+
if title is not None:
|
|
2560
|
+
return title
|
|
2561
|
+
return None
|
|
2562
|
+
|
|
2563
|
+
|
|
2564
|
+
def extract_timestamp(mapping: dict[str, object]) -> str | None:
|
|
2565
|
+
"""Extract a timestamp-like field."""
|
|
2566
|
+
for key in ("timestamp", "updatedAt", "createdAt", "ts"):
|
|
2567
|
+
timestamp = as_optional_str(mapping.get(key))
|
|
2568
|
+
if timestamp is not None:
|
|
2569
|
+
return timestamp
|
|
2570
|
+
return None
|
|
2571
|
+
|
|
2572
|
+
|
|
2573
|
+
def extract_model(mapping: dict[str, object]) -> str | None:
|
|
2574
|
+
"""Extract a model name."""
|
|
2575
|
+
for key in ("model", "modelName", "model_name"):
|
|
2576
|
+
model = as_optional_str(mapping.get(key))
|
|
2577
|
+
if model is not None:
|
|
2578
|
+
return model
|
|
2579
|
+
return None
|
|
2580
|
+
|
|
2581
|
+
|
|
2582
|
+
def extract_session_id(mapping: dict[str, object]) -> str | None:
|
|
2583
|
+
"""Extract a session identifier."""
|
|
2584
|
+
for key in ("session_id", "sessionId", "id"):
|
|
2585
|
+
value = as_optional_str(mapping.get(key))
|
|
2586
|
+
if value is not None:
|
|
2587
|
+
return value
|
|
2588
|
+
return None
|
|
2589
|
+
|
|
2590
|
+
|
|
2591
|
+
def extract_conversation_id(mapping: dict[str, object]) -> str | None:
|
|
2592
|
+
"""Extract a conversation identifier."""
|
|
2593
|
+
for key in ("conversation_id", "conversationId", "threadId"):
|
|
2594
|
+
value = as_optional_str(mapping.get(key))
|
|
2595
|
+
if value is not None:
|
|
2596
|
+
return value
|
|
2597
|
+
return None
|
|
2598
|
+
|
|
2599
|
+
|
|
2600
|
+
def flatten_summary_bullets(value: object) -> str | None:
|
|
2601
|
+
"""Flatten Cursor summary bullets."""
|
|
2602
|
+
if value is None:
|
|
2603
|
+
return None
|
|
2604
|
+
if isinstance(value, str):
|
|
2605
|
+
parsed = parse_embedded_json(value)
|
|
2606
|
+
if isinstance(parsed, list):
|
|
2607
|
+
bullets = [item for item in parsed if isinstance(item, str) and item.strip()]
|
|
2608
|
+
return "\n".join(f"- {item}" for item in bullets) if bullets else value.strip() or None
|
|
2609
|
+
return value.strip() or None
|
|
2610
|
+
if isinstance(value, (bytes, bytearray)):
|
|
2611
|
+
decoded = decode_sqlite_value(value)
|
|
2612
|
+
return flatten_summary_bullets(decoded)
|
|
2613
|
+
return None
|
|
2614
|
+
|
|
2615
|
+
|
|
2616
|
+
def decode_sqlite_value(value: object) -> str | None:
|
|
2617
|
+
"""Decode a SQLite value into UTF-8 text if possible."""
|
|
2618
|
+
if isinstance(value, str):
|
|
2619
|
+
return value
|
|
2620
|
+
if isinstance(value, (bytes, bytearray)):
|
|
2621
|
+
return bytes(value).decode("utf-8", errors="replace")
|
|
2622
|
+
return None
|
|
2623
|
+
|
|
2624
|
+
|
|
2625
|
+
def parse_embedded_json(text: str) -> JSONValue | None:
|
|
2626
|
+
"""Parse a JSON-encoded string, returning ``None`` when unavailable."""
|
|
2627
|
+
stripped = text.strip()
|
|
2628
|
+
if not stripped or stripped[0] not in "[{":
|
|
2629
|
+
return None
|
|
2630
|
+
try:
|
|
2631
|
+
parsed = t.cast("object", json.loads(stripped))
|
|
2632
|
+
except json.JSONDecodeError:
|
|
2633
|
+
return None
|
|
2634
|
+
if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
|
|
2635
|
+
return t.cast("JSONValue", parsed)
|
|
2636
|
+
return None
|
|
2637
|
+
|
|
2638
|
+
|
|
2639
|
+
def build_search_record(source: SourceHandle, candidate: MessageCandidate) -> SearchRecord:
|
|
2640
|
+
"""Convert a parsed candidate into a normalized search record."""
|
|
2641
|
+
role = candidate.role.casefold() if candidate.role is not None else None
|
|
2642
|
+
kind: t.Literal["prompt", "history"] = "prompt" if role in USER_ROLES else "history"
|
|
2643
|
+
return SearchRecord(
|
|
2644
|
+
kind=kind,
|
|
2645
|
+
agent=source.agent,
|
|
2646
|
+
store=source.store,
|
|
2647
|
+
adapter_id=source.adapter_id,
|
|
2648
|
+
path=source.path,
|
|
2649
|
+
text=candidate.text,
|
|
2650
|
+
title=candidate.title,
|
|
2651
|
+
role=candidate.role,
|
|
2652
|
+
timestamp=candidate.timestamp,
|
|
2653
|
+
model=candidate.model,
|
|
2654
|
+
session_id=candidate.session_id,
|
|
2655
|
+
conversation_id=candidate.conversation_id,
|
|
2656
|
+
)
|
|
2657
|
+
|
|
2658
|
+
|
|
2659
|
+
def matches_record(record: SearchRecord, query: SearchQuery) -> bool:
|
|
2660
|
+
"""Return whether a normalized record should be included."""
|
|
2661
|
+
if query.search_type == "prompts" and record.kind != "prompt":
|
|
2662
|
+
return False
|
|
2663
|
+
if query.search_type == "history" and record.kind != "history":
|
|
2664
|
+
return False
|
|
2665
|
+
return matches_text(build_search_haystack(record), query)
|
|
2666
|
+
|
|
2667
|
+
|
|
2668
|
+
def build_search_haystack(record: SearchRecord) -> str:
|
|
2669
|
+
"""Build a searchable text surface for a record."""
|
|
2670
|
+
parts = [
|
|
2671
|
+
record.title or "",
|
|
2672
|
+
record.text,
|
|
2673
|
+
record.model or "",
|
|
2674
|
+
record.role or "",
|
|
2675
|
+
str(record.path),
|
|
2676
|
+
]
|
|
2677
|
+
return "\n".join(part for part in parts if part)
|
|
2678
|
+
|
|
2679
|
+
|
|
2680
|
+
def matches_text(text: str, query: SearchQuery) -> bool:
|
|
2681
|
+
"""Return whether ``text`` matches the query."""
|
|
2682
|
+
if not query.terms:
|
|
2683
|
+
return True
|
|
2684
|
+
if query.regex:
|
|
2685
|
+
flags = 0 if query.case_sensitive else re.IGNORECASE
|
|
2686
|
+
results = [re.search(term, text, flags) is not None for term in query.terms]
|
|
2687
|
+
else:
|
|
2688
|
+
haystack = text if query.case_sensitive else text.casefold()
|
|
2689
|
+
needles = (
|
|
2690
|
+
query.terms if query.case_sensitive else tuple(term.casefold() for term in query.terms)
|
|
2691
|
+
)
|
|
2692
|
+
results = [needle in haystack for needle in needles]
|
|
2693
|
+
return any(results) if query.any_term else all(results)
|
|
2694
|
+
|
|
2695
|
+
|
|
2696
|
+
def search_record_sort_key(record: SearchRecord) -> tuple[str, str, str]:
|
|
2697
|
+
"""Return a stable sort key."""
|
|
2698
|
+
return (record.timestamp or "", record.agent, str(record.path))
|
|
2699
|
+
|
|
2700
|
+
|
|
2701
|
+
def record_dedupe_key(record: SearchRecord) -> tuple[str, str, str, str, str]:
|
|
2702
|
+
"""Return the per-session dedupe key for a search record."""
|
|
2703
|
+
session_identity = record.session_id or record.conversation_id or str(record.path)
|
|
2704
|
+
return (
|
|
2705
|
+
record.kind,
|
|
2706
|
+
record.agent,
|
|
2707
|
+
record.store,
|
|
2708
|
+
session_identity,
|
|
2709
|
+
record.text,
|
|
2710
|
+
)
|
|
2711
|
+
|
|
2712
|
+
|
|
2713
|
+
def as_optional_str(value: object) -> str | None:
|
|
2714
|
+
"""Return a stripped string when possible."""
|
|
2715
|
+
if isinstance(value, str):
|
|
2716
|
+
stripped = value.strip()
|
|
2717
|
+
return stripped or None
|
|
2718
|
+
return None
|
|
2719
|
+
|
|
2720
|
+
|
|
2721
|
+
def maybe_use_pydantic() -> tuple[
|
|
2722
|
+
t.Callable[[SearchRecord], dict[str, object]],
|
|
2723
|
+
t.Callable[[FindRecord], dict[str, object]],
|
|
2724
|
+
EnvelopeFactory,
|
|
2725
|
+
]:
|
|
2726
|
+
"""Return serializers backed by Pydantic when available."""
|
|
2727
|
+
pydantic_module = t.cast(
|
|
2728
|
+
"PydanticModule",
|
|
2729
|
+
t.cast("object", importlib.import_module("pydantic")),
|
|
2730
|
+
)
|
|
2731
|
+
search_adapter = pydantic_module.TypeAdapter(SearchRecordPayload)
|
|
2732
|
+
find_adapter = pydantic_module.TypeAdapter(FindRecordPayload)
|
|
2733
|
+
envelope_adapter = pydantic_module.TypeAdapter(EnvelopePayload)
|
|
2734
|
+
|
|
2735
|
+
def pydantic_search(record: SearchRecord) -> dict[str, object]:
|
|
2736
|
+
payload = search_adapter.validate_python(serialize_search_record(record))
|
|
2737
|
+
dumped = search_adapter.dump_python(payload, mode="json")
|
|
2738
|
+
return t.cast("dict[str, object]", dumped)
|
|
2739
|
+
|
|
2740
|
+
def pydantic_find(record: FindRecord) -> dict[str, object]:
|
|
2741
|
+
payload = find_adapter.validate_python(serialize_find_record(record))
|
|
2742
|
+
dumped = find_adapter.dump_python(payload, mode="json")
|
|
2743
|
+
return t.cast("dict[str, object]", dumped)
|
|
2744
|
+
|
|
2745
|
+
def pydantic_envelope(
|
|
2746
|
+
command: str,
|
|
2747
|
+
query_data: dict[str, object],
|
|
2748
|
+
results: list[dict[str, object]],
|
|
2749
|
+
) -> dict[str, object]:
|
|
2750
|
+
payload = envelope_adapter.validate_python(
|
|
2751
|
+
build_envelope(command, query_data, results),
|
|
2752
|
+
)
|
|
2753
|
+
dumped = envelope_adapter.dump_python(payload, mode="json")
|
|
2754
|
+
return t.cast("dict[str, object]", dumped)
|
|
2755
|
+
|
|
2756
|
+
return pydantic_search, pydantic_find, pydantic_envelope
|
|
2757
|
+
|
|
2758
|
+
|
|
2759
|
+
def maybe_build_pydantic() -> tuple[
|
|
2760
|
+
t.Callable[[SearchRecord], dict[str, object]],
|
|
2761
|
+
t.Callable[[FindRecord], dict[str, object]],
|
|
2762
|
+
EnvelopeFactory,
|
|
2763
|
+
]:
|
|
2764
|
+
"""Return Pydantic serializers or plain fallbacks."""
|
|
2765
|
+
try:
|
|
2766
|
+
return maybe_use_pydantic()
|
|
2767
|
+
except ImportError:
|
|
2768
|
+
return (
|
|
2769
|
+
lambda record: t.cast("dict[str, object]", serialize_search_record(record)),
|
|
2770
|
+
lambda record: t.cast("dict[str, object]", serialize_find_record(record)),
|
|
2771
|
+
lambda command, query_data, results: t.cast(
|
|
2772
|
+
"dict[str, object]",
|
|
2773
|
+
build_envelope(command, query_data, results),
|
|
2774
|
+
),
|
|
2775
|
+
)
|
|
2776
|
+
|
|
2777
|
+
|
|
2778
|
+
def serialize_search_record(record: SearchRecord) -> SearchRecordPayload:
|
|
2779
|
+
"""Serialize a search record to a JSON-compatible mapping."""
|
|
2780
|
+
return {
|
|
2781
|
+
"schema_version": SCHEMA_VERSION,
|
|
2782
|
+
"kind": record.kind,
|
|
2783
|
+
"agent": record.agent,
|
|
2784
|
+
"store": record.store,
|
|
2785
|
+
"adapter_id": record.adapter_id,
|
|
2786
|
+
"path": format_display_path(record.path),
|
|
2787
|
+
"text": record.text,
|
|
2788
|
+
"title": record.title,
|
|
2789
|
+
"role": record.role,
|
|
2790
|
+
"timestamp": record.timestamp,
|
|
2791
|
+
"model": record.model,
|
|
2792
|
+
"session_id": record.session_id,
|
|
2793
|
+
"conversation_id": record.conversation_id,
|
|
2794
|
+
"metadata": record.metadata,
|
|
2795
|
+
}
|
|
2796
|
+
|
|
2797
|
+
|
|
2798
|
+
def serialize_find_record(record: FindRecord) -> FindRecordPayload:
|
|
2799
|
+
"""Serialize a find record to a JSON-compatible mapping."""
|
|
2800
|
+
return {
|
|
2801
|
+
"schema_version": SCHEMA_VERSION,
|
|
2802
|
+
"kind": record.kind,
|
|
2803
|
+
"agent": record.agent,
|
|
2804
|
+
"store": record.store,
|
|
2805
|
+
"adapter_id": record.adapter_id,
|
|
2806
|
+
"path": format_display_path(record.path),
|
|
2807
|
+
"path_kind": record.path_kind,
|
|
2808
|
+
"metadata": record.metadata,
|
|
2809
|
+
}
|
|
2810
|
+
|
|
2811
|
+
|
|
2812
|
+
def serialize_source_handle(source: SourceHandle) -> SourceHandlePayload:
|
|
2813
|
+
"""Serialize a source handle to a JSON-compatible mapping."""
|
|
2814
|
+
return {
|
|
2815
|
+
"schema_version": SCHEMA_VERSION,
|
|
2816
|
+
"agent": source.agent,
|
|
2817
|
+
"store": source.store,
|
|
2818
|
+
"adapter_id": source.adapter_id,
|
|
2819
|
+
"path": format_display_path(source.path),
|
|
2820
|
+
"path_kind": source.path_kind,
|
|
2821
|
+
"source_kind": source.source_kind,
|
|
2822
|
+
"search_root": (
|
|
2823
|
+
None
|
|
2824
|
+
if source.search_root is None
|
|
2825
|
+
else format_display_path(source.search_root, directory=True)
|
|
2826
|
+
),
|
|
2827
|
+
"mtime_ns": source.mtime_ns,
|
|
2828
|
+
}
|
|
2829
|
+
|
|
2830
|
+
|
|
2831
|
+
def build_envelope(
|
|
2832
|
+
command: str,
|
|
2833
|
+
query_data: dict[str, object],
|
|
2834
|
+
results: list[dict[str, object]],
|
|
2835
|
+
) -> EnvelopePayload:
|
|
2836
|
+
"""Build a JSON envelope."""
|
|
2837
|
+
return {
|
|
2838
|
+
"schema_version": SCHEMA_VERSION,
|
|
2839
|
+
"command": command,
|
|
2840
|
+
"query": query_data,
|
|
2841
|
+
"results": results,
|
|
2842
|
+
}
|
|
2843
|
+
|
|
2844
|
+
|
|
2845
|
+
def print_search_results(records: list[SearchRecord], args: SearchArgs) -> None:
|
|
2846
|
+
"""Emit search results in the requested format."""
|
|
2847
|
+
serialize_search, _, serialize_envelope = maybe_build_pydantic()
|
|
2848
|
+
query_data: dict[str, object] = {
|
|
2849
|
+
"terms": list(args.terms),
|
|
2850
|
+
"agents": list(args.agents),
|
|
2851
|
+
"type": args.search_type,
|
|
2852
|
+
"any": args.any_term,
|
|
2853
|
+
"regex": args.regex,
|
|
2854
|
+
"case_sensitive": args.case_sensitive,
|
|
2855
|
+
"limit": args.limit,
|
|
2856
|
+
}
|
|
2857
|
+
if args.output_mode == "json":
|
|
2858
|
+
payload = serialize_envelope(
|
|
2859
|
+
"search",
|
|
2860
|
+
query_data,
|
|
2861
|
+
[serialize_search(record) for record in records],
|
|
2862
|
+
)
|
|
2863
|
+
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
|
2864
|
+
return
|
|
2865
|
+
if args.output_mode == "ndjson":
|
|
2866
|
+
for record in records:
|
|
2867
|
+
print(json.dumps(serialize_search(record), ensure_ascii=False))
|
|
2868
|
+
return
|
|
2869
|
+
for index, record in enumerate(records, start=1):
|
|
2870
|
+
heading = f"[{index}] {record.agent} {record.kind} {record.store}"
|
|
2871
|
+
details = [record.timestamp, record.model, format_display_path(record.path)]
|
|
2872
|
+
print(heading)
|
|
2873
|
+
print(" | ".join(detail for detail in details if detail))
|
|
2874
|
+
if record.title:
|
|
2875
|
+
print(record.title)
|
|
2876
|
+
print()
|
|
2877
|
+
print(record.text)
|
|
2878
|
+
print()
|
|
2879
|
+
|
|
2880
|
+
|
|
2881
|
+
def search_progress_enabled(args: SearchArgs) -> bool:
|
|
2882
|
+
"""Return whether search progress should be shown for ``args``."""
|
|
2883
|
+
human_output = args.output_mode in {"text", "ui"}
|
|
2884
|
+
return args.progress_mode == "always" or (args.progress_mode == "auto" and human_output)
|
|
2885
|
+
|
|
2886
|
+
|
|
2887
|
+
def should_enable_answer_now(
|
|
2888
|
+
args: SearchArgs,
|
|
2889
|
+
*,
|
|
2890
|
+
stdin: t.TextIO | None = None,
|
|
2891
|
+
stderr: t.TextIO | None = None,
|
|
2892
|
+
) -> bool:
|
|
2893
|
+
"""Return whether Enter should request a partial answer for this search."""
|
|
2894
|
+
input_stream = stdin if stdin is not None else sys.stdin
|
|
2895
|
+
error_stream = stderr if stderr is not None else sys.stderr
|
|
2896
|
+
return (
|
|
2897
|
+
args.output_mode == "text"
|
|
2898
|
+
and search_progress_enabled(args)
|
|
2899
|
+
and bool(getattr(input_stream, "isatty", lambda: False)())
|
|
2900
|
+
and bool(getattr(error_stream, "isatty", lambda: False)())
|
|
2901
|
+
)
|
|
2902
|
+
|
|
2903
|
+
|
|
2904
|
+
def build_search_progress(args: SearchArgs, *, answer_now_hint: bool = False) -> SearchProgress:
|
|
2905
|
+
"""Build the progress reporter for a search invocation."""
|
|
2906
|
+
enabled = search_progress_enabled(args)
|
|
2907
|
+
if not enabled:
|
|
2908
|
+
return noop_search_progress()
|
|
2909
|
+
return ConsoleSearchProgress(
|
|
2910
|
+
enabled=True,
|
|
2911
|
+
color_mode=args.color_mode,
|
|
2912
|
+
answer_now_hint=answer_now_hint,
|
|
2913
|
+
)
|
|
2914
|
+
|
|
2915
|
+
|
|
2916
|
+
def print_find_results(records: list[FindRecord], args: FindArgs) -> None:
|
|
2917
|
+
"""Emit find results in the requested format."""
|
|
2918
|
+
_, serialize_find, serialize_envelope = maybe_build_pydantic()
|
|
2919
|
+
query_data: dict[str, object] = {
|
|
2920
|
+
"pattern": args.pattern,
|
|
2921
|
+
"agents": list(args.agents),
|
|
2922
|
+
"limit": args.limit,
|
|
2923
|
+
}
|
|
2924
|
+
if args.output_mode == "json":
|
|
2925
|
+
payload = serialize_envelope(
|
|
2926
|
+
"find",
|
|
2927
|
+
query_data,
|
|
2928
|
+
[serialize_find(record) for record in records],
|
|
2929
|
+
)
|
|
2930
|
+
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
|
2931
|
+
return
|
|
2932
|
+
if args.output_mode == "ndjson":
|
|
2933
|
+
for record in records:
|
|
2934
|
+
print(json.dumps(serialize_find(record), ensure_ascii=False))
|
|
2935
|
+
return
|
|
2936
|
+
for record in records:
|
|
2937
|
+
print(f"{record.agent} {record.path_kind} {record.store}")
|
|
2938
|
+
print(format_display_path(record.path))
|
|
2939
|
+
print()
|
|
2940
|
+
|
|
2941
|
+
|
|
2942
|
+
def run_ui(records: list[SearchRecord]) -> None:
|
|
2943
|
+
"""Launch a small read-only Textual explorer."""
|
|
2944
|
+
try:
|
|
2945
|
+
textual_app = t.cast(
|
|
2946
|
+
"TextualAppModule",
|
|
2947
|
+
t.cast("object", importlib.import_module("textual.app")),
|
|
2948
|
+
)
|
|
2949
|
+
textual_containers = t.cast(
|
|
2950
|
+
"TextualContainersModule",
|
|
2951
|
+
t.cast("object", importlib.import_module("textual.containers")),
|
|
2952
|
+
)
|
|
2953
|
+
textual_widgets = t.cast(
|
|
2954
|
+
"TextualWidgetsModule",
|
|
2955
|
+
t.cast("object", importlib.import_module("textual.widgets")),
|
|
2956
|
+
)
|
|
2957
|
+
except ImportError as error:
|
|
2958
|
+
msg = "Textual is required for --ui. Run with `uv run py/agentgrep.py ... --ui`."
|
|
2959
|
+
raise RuntimeError(msg) from error
|
|
2960
|
+
|
|
2961
|
+
app_type = textual_app.App
|
|
2962
|
+
horizontal = textual_containers.Horizontal
|
|
2963
|
+
vertical = textual_containers.Vertical
|
|
2964
|
+
data_table_type = textual_widgets.DataTable
|
|
2965
|
+
footer = textual_widgets.Footer
|
|
2966
|
+
header = textual_widgets.Header
|
|
2967
|
+
input_widget = textual_widgets.Input
|
|
2968
|
+
static_type = textual_widgets.Static
|
|
2969
|
+
|
|
2970
|
+
class AgentGrepApp(app_type): # ty: ignore[unsupported-base]
|
|
2971
|
+
"""Read-only explorer for normalized search records."""
|
|
2972
|
+
|
|
2973
|
+
CSS: t.ClassVar[str] = """
|
|
2974
|
+
Screen {
|
|
2975
|
+
layout: vertical;
|
|
2976
|
+
}
|
|
2977
|
+
#body {
|
|
2978
|
+
height: 1fr;
|
|
2979
|
+
}
|
|
2980
|
+
#detail {
|
|
2981
|
+
border: round $accent;
|
|
2982
|
+
padding: 1 2;
|
|
2983
|
+
overflow-y: auto;
|
|
2984
|
+
}
|
|
2985
|
+
DataTable {
|
|
2986
|
+
height: 1fr;
|
|
2987
|
+
}
|
|
2988
|
+
"""
|
|
2989
|
+
BINDINGS: t.ClassVar[list[tuple[str, str, str]]] = [("q", "quit", "Quit")]
|
|
2990
|
+
all_records: list[SearchRecord]
|
|
2991
|
+
filtered_records: list[SearchRecord]
|
|
2992
|
+
|
|
2993
|
+
def __init__(self, initial_records: list[SearchRecord]) -> None:
|
|
2994
|
+
super().__init__()
|
|
2995
|
+
self.all_records = initial_records
|
|
2996
|
+
self.filtered_records = initial_records
|
|
2997
|
+
|
|
2998
|
+
def compose(self) -> cabc.Iterator[object]:
|
|
2999
|
+
yield header()
|
|
3000
|
+
yield input_widget(placeholder="Filter by keyword", id="filter")
|
|
3001
|
+
with horizontal(id="body"):
|
|
3002
|
+
yield data_table_type(id="results")
|
|
3003
|
+
with vertical():
|
|
3004
|
+
yield static_type("Select a result to inspect full text.", id="detail")
|
|
3005
|
+
yield footer()
|
|
3006
|
+
|
|
3007
|
+
def on_mount(self) -> None:
|
|
3008
|
+
app = t.cast("QueryAppLike", t.cast("object", self))
|
|
3009
|
+
table = t.cast("DataTableLike", app.query_one(data_table_type))
|
|
3010
|
+
table.cursor_type = "row"
|
|
3011
|
+
table.add_columns("Agent", "Kind", "Timestamp", "Title", "Path")
|
|
3012
|
+
self.refresh_table()
|
|
3013
|
+
|
|
3014
|
+
def on_input_changed(self, event: object) -> None:
|
|
3015
|
+
value = str(getattr(event, "value", "")).strip().casefold()
|
|
3016
|
+
self.filtered_records = (
|
|
3017
|
+
self.all_records
|
|
3018
|
+
if not value
|
|
3019
|
+
else [
|
|
3020
|
+
record
|
|
3021
|
+
for record in self.all_records
|
|
3022
|
+
if value in build_search_haystack(record).casefold()
|
|
3023
|
+
]
|
|
3024
|
+
)
|
|
3025
|
+
self.refresh_table()
|
|
3026
|
+
|
|
3027
|
+
def refresh_table(self) -> None:
|
|
3028
|
+
app = t.cast("QueryAppLike", t.cast("object", self))
|
|
3029
|
+
table = t.cast("DataTableLike", app.query_one(data_table_type))
|
|
3030
|
+
table.clear()
|
|
3031
|
+
for record in self.filtered_records:
|
|
3032
|
+
table.add_row(
|
|
3033
|
+
record.agent,
|
|
3034
|
+
record.kind,
|
|
3035
|
+
record.timestamp or "",
|
|
3036
|
+
record.title or "",
|
|
3037
|
+
format_display_path(record.path),
|
|
3038
|
+
key=str(id(record)),
|
|
3039
|
+
)
|
|
3040
|
+
if self.filtered_records:
|
|
3041
|
+
self.show_detail(self.filtered_records[0])
|
|
3042
|
+
else:
|
|
3043
|
+
detail = t.cast("StaticLike", app.query_one("#detail", static_type))
|
|
3044
|
+
detail.update("No results.")
|
|
3045
|
+
|
|
3046
|
+
def on_data_table_row_highlighted(self, event: object) -> None:
|
|
3047
|
+
row_index = int(getattr(event, "cursor_row", -1))
|
|
3048
|
+
if 0 <= row_index < len(self.filtered_records):
|
|
3049
|
+
self.show_detail(self.filtered_records[row_index])
|
|
3050
|
+
|
|
3051
|
+
def show_detail(self, record: SearchRecord) -> None:
|
|
3052
|
+
details = [
|
|
3053
|
+
f"Agent: {record.agent}",
|
|
3054
|
+
f"Kind: {record.kind}",
|
|
3055
|
+
f"Store: {record.store}",
|
|
3056
|
+
f"Adapter: {record.adapter_id}",
|
|
3057
|
+
f"Timestamp: {record.timestamp or 'unknown'}",
|
|
3058
|
+
f"Model: {record.model or 'unknown'}",
|
|
3059
|
+
f"Path: {format_display_path(record.path)}",
|
|
3060
|
+
"",
|
|
3061
|
+
record.text,
|
|
3062
|
+
]
|
|
3063
|
+
app = t.cast("QueryAppLike", t.cast("object", self))
|
|
3064
|
+
detail = t.cast("StaticLike", app.query_one("#detail", static_type))
|
|
3065
|
+
detail.update("\n".join(details))
|
|
3066
|
+
|
|
3067
|
+
app = t.cast("RunnableAppLike", t.cast("object", AgentGrepApp(records)))
|
|
3068
|
+
app.run()
|
|
3069
|
+
|
|
3070
|
+
|
|
3071
|
+
def run_search_command(args: SearchArgs) -> int:
|
|
3072
|
+
"""Execute ``agentgrep search``."""
|
|
3073
|
+
if not args.terms and args.output_mode != "ui":
|
|
3074
|
+
msg = "search requires at least one term unless --ui is used"
|
|
3075
|
+
raise SystemExit(msg)
|
|
3076
|
+
query = make_search_query(args)
|
|
3077
|
+
answer_now_enabled = should_enable_answer_now(args)
|
|
3078
|
+
control = SearchControl()
|
|
3079
|
+
listener = AnswerNowInputListener(control) if answer_now_enabled else None
|
|
3080
|
+
progress = build_search_progress(args, answer_now_hint=answer_now_enabled)
|
|
3081
|
+
if listener is not None:
|
|
3082
|
+
listener.start()
|
|
3083
|
+
try:
|
|
3084
|
+
records = run_search_query(
|
|
3085
|
+
pathlib.Path.home(),
|
|
3086
|
+
query,
|
|
3087
|
+
progress=progress,
|
|
3088
|
+
control=control,
|
|
3089
|
+
)
|
|
3090
|
+
finally:
|
|
3091
|
+
if listener is not None:
|
|
3092
|
+
listener.stop()
|
|
3093
|
+
if args.output_mode == "ui":
|
|
3094
|
+
run_ui(records)
|
|
3095
|
+
return 0
|
|
3096
|
+
print_search_results(records, args)
|
|
3097
|
+
if records:
|
|
3098
|
+
return 0
|
|
3099
|
+
if args.output_mode == "text":
|
|
3100
|
+
print("No matches found.", file=sys.stderr)
|
|
3101
|
+
return 1
|
|
3102
|
+
|
|
3103
|
+
|
|
3104
|
+
def run_find_command(args: FindArgs) -> int:
|
|
3105
|
+
"""Execute ``agentgrep find``."""
|
|
3106
|
+
records = run_find_query(
|
|
3107
|
+
pathlib.Path.home(),
|
|
3108
|
+
args.agents,
|
|
3109
|
+
pattern=args.pattern,
|
|
3110
|
+
limit=args.limit,
|
|
3111
|
+
)
|
|
3112
|
+
print_find_results(records, args)
|
|
3113
|
+
if records:
|
|
3114
|
+
return 0
|
|
3115
|
+
if args.output_mode == "text":
|
|
3116
|
+
print("No matching sources found.", file=sys.stderr)
|
|
3117
|
+
return 1
|
|
3118
|
+
|
|
3119
|
+
|
|
3120
|
+
def _exit_on_sigint() -> t.NoReturn:
|
|
3121
|
+
"""Terminate with Ctrl-C signal semantics where the platform supports them."""
|
|
3122
|
+
if sys.platform == "win32":
|
|
3123
|
+
raise SystemExit(130)
|
|
3124
|
+
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
3125
|
+
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
|
3126
|
+
signal.raise_signal(signal.SIGINT)
|
|
3127
|
+
raise SystemExit(130) # pragma: no cover
|
|
3128
|
+
|
|
3129
|
+
|
|
3130
|
+
def _write_interrupt_notice() -> None:
|
|
3131
|
+
with contextlib.suppress(OSError, ValueError):
|
|
3132
|
+
sys.stderr.write("Interrupted by user.\n")
|
|
3133
|
+
sys.stderr.flush()
|
|
3134
|
+
|
|
3135
|
+
|
|
3136
|
+
def main(argv: cabc.Sequence[str] | None = None) -> int:
|
|
3137
|
+
"""Run the CLI."""
|
|
3138
|
+
try:
|
|
3139
|
+
parsed = parse_args(argv)
|
|
3140
|
+
if parsed is None:
|
|
3141
|
+
return 0
|
|
3142
|
+
if isinstance(parsed, SearchArgs):
|
|
3143
|
+
return run_search_command(parsed)
|
|
3144
|
+
return run_find_command(parsed)
|
|
3145
|
+
except KeyboardInterrupt:
|
|
3146
|
+
_write_interrupt_notice()
|
|
3147
|
+
_exit_on_sigint()
|
|
3148
|
+
|
|
3149
|
+
|
|
3150
|
+
if __name__ == "__main__":
|
|
3151
|
+
raise SystemExit(main())
|