agentgrep 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentgrep/__init__.py ADDED
@@ -0,0 +1,3151 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.14"
4
+ # dependencies = ["pydantic>=2.11.3", "textual>=3.2.0"]
5
+ # ///
6
+ """Search local AI agent prompts and history without mutating agent stores.
7
+
8
+ The tool discovers known read-only stores under ``~/.codex``, ``~/.claude``,
9
+ ``~/.cursor``, and Cursor's official IDE storage locations, then normalizes
10
+ results through named adapters.
11
+
12
+ Examples
13
+ --------
14
+ List prompts containing both ``serenity`` and ``bliss``:
15
+
16
+ >>> query = SearchQuery(
17
+ ... terms=("serenity", "bliss"),
18
+ ... search_type="prompts",
19
+ ... any_term=False,
20
+ ... regex=False,
21
+ ... case_sensitive=False,
22
+ ... agents=("codex",),
23
+ ... limit=None,
24
+ ... )
25
+ >>> matches_text("A serenity prompt with bliss inside.", query)
26
+ True
27
+ >>> matches_text("Only serenity appears here.", query)
28
+ False
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import argparse
34
+ import contextlib
35
+ import dataclasses
36
+ import importlib
37
+ import itertools
38
+ import json
39
+ import os
40
+ import pathlib
41
+ import re
42
+ import select
43
+ import shutil
44
+ import signal
45
+ import sqlite3
46
+ import subprocess
47
+ import sys
48
+ import textwrap
49
+ import threading
50
+ import time
51
+ import typing as t
52
+
53
+ if t.TYPE_CHECKING:
54
+ import collections.abc as cabc
55
+
56
+ PrivatePathBase = pathlib.Path
57
+ else:
58
+ PrivatePathBase = type(pathlib.Path())
59
+
60
+ AgentName = t.Literal["codex", "claude", "cursor"]
61
+ OutputMode = t.Literal["text", "json", "ndjson", "ui"]
62
+ ProgressMode = t.Literal["auto", "always", "never"]
63
+ PathKind = t.Literal["history_file", "session_file", "sqlite_db"]
64
+ SearchType = t.Literal["prompts", "history", "all"]
65
+ SourceKind = t.Literal["json", "jsonl", "sqlite"]
66
+ ColorMode = t.Literal["auto", "always", "never"]
67
+ type JSONScalar = str | int | float | bool | None
68
+ type JSONValue = JSONScalar | list[JSONValue] | dict[str, JSONValue]
69
+ type SummaryRow = tuple[object, object, object, object, object, object, object, object]
70
+ type KeyValueRow = tuple[object, object]
71
+
72
+ AGENT_CHOICES: tuple[AgentName, ...] = ("codex", "claude", "cursor")
73
+ JSON_FILE_SUFFIXES: frozenset[str] = frozenset({".json", ".jsonl"})
74
+ SCHEMA_VERSION: str = "agentgrep.v1"
75
+ USER_ROLES: frozenset[str] = frozenset({"human", "user"})
76
+ CURSOR_STATE_TOKENS: tuple[str, ...] = ("chat", "composer", "prompt", "history")
77
+ OFFICIAL_CURSOR_STATE_PATHS: tuple[pathlib.Path, ...] = (
78
+ pathlib.Path("~/.config/Cursor/User/globalStorage/state.vscdb").expanduser(),
79
+ pathlib.Path(
80
+ "~/Library/Application Support/Cursor/User/globalStorage/state.vscdb",
81
+ ).expanduser(),
82
+ pathlib.Path("~/AppData/Roaming/Cursor/User/globalStorage/state.vscdb").expanduser(),
83
+ )
84
+ EnvelopeFactory = t.Callable[[str, dict[str, object], list[dict[str, object]]], dict[str, object]]
85
+
86
+ OPTIONS_EXPECTING_VALUE: frozenset[str] = frozenset(
87
+ {
88
+ "--agent",
89
+ "--type",
90
+ "--limit",
91
+ "--color",
92
+ "--progress",
93
+ },
94
+ )
95
+ OPTIONS_FLAG_ONLY: frozenset[str] = frozenset(
96
+ {
97
+ "-h",
98
+ "--help",
99
+ "--any",
100
+ "--regex",
101
+ "--case-sensitive",
102
+ "--json",
103
+ "--ndjson",
104
+ "--ui",
105
+ },
106
+ )
107
+
108
+
109
+ def build_description(
110
+ intro: str,
111
+ example_blocks: cabc.Sequence[tuple[str | None, cabc.Sequence[str]]],
112
+ ) -> str:
113
+ """Assemble help text with example sections."""
114
+ sections: list[str] = []
115
+ intro_text = textwrap.dedent(intro).strip()
116
+ if intro_text:
117
+ sections.append(intro_text)
118
+
119
+ for heading, commands in example_blocks:
120
+ if not commands:
121
+ continue
122
+ title = "examples:" if heading is None else f"{heading} examples:"
123
+ lines = [title]
124
+ lines.extend(f" {command}" for command in commands)
125
+ sections.append("\n".join(lines))
126
+
127
+ return "\n\n".join(sections)
128
+
129
+
130
+ CLI_DESCRIPTION = build_description(
131
+ """
132
+ Read-only search across Codex, Claude, and Cursor local stores.
133
+
134
+ ``search`` is the default subcommand. ``agentgrep bliss`` is
135
+ equivalent to ``agentgrep search bliss``.
136
+ """,
137
+ (
138
+ (
139
+ "quick",
140
+ (
141
+ "agentgrep bliss",
142
+ "agentgrep serene bliss --agent codex",
143
+ ),
144
+ ),
145
+ (
146
+ "search",
147
+ (
148
+ "agentgrep search bliss",
149
+ "agentgrep search serene bliss --agent codex",
150
+ "agentgrep search prompt history --type history --ndjson",
151
+ "agentgrep search design --ui",
152
+ ),
153
+ ),
154
+ (
155
+ "find",
156
+ (
157
+ "agentgrep find codex",
158
+ "agentgrep find sessions --agent codex",
159
+ "agentgrep find cursor --json",
160
+ ),
161
+ ),
162
+ ),
163
+ )
164
+ SEARCH_DESCRIPTION = build_description(
165
+ """
166
+ Search normalized prompts or history across supported agent stores.
167
+ """,
168
+ (
169
+ (
170
+ None,
171
+ (
172
+ "agentgrep search bliss",
173
+ "agentgrep search serene bliss --agent codex",
174
+ "agentgrep search prompt history --type history --ndjson",
175
+ "agentgrep search serenity --json",
176
+ "agentgrep search design --ui",
177
+ ),
178
+ ),
179
+ ),
180
+ )
181
+ FIND_DESCRIPTION = build_description(
182
+ """
183
+ Find known prompt, history, and store paths without parsing message text.
184
+ """,
185
+ (
186
+ (
187
+ None,
188
+ (
189
+ "agentgrep find codex",
190
+ "agentgrep find sessions --agent codex",
191
+ "agentgrep find cursor --json",
192
+ ),
193
+ ),
194
+ ),
195
+ )
196
+
197
+
198
+ class PrivatePath(PrivatePathBase):
199
+ """Path subclass that hides the user's home directory in textual output."""
200
+
201
+ def __new__(cls, *args: t.Any, **kwargs: t.Any) -> PrivatePath:
202
+ """Create a privacy-aware path."""
203
+ return super().__new__(cls, *args, **kwargs)
204
+
205
+ @classmethod
206
+ def _collapse_home(cls, value: str) -> str:
207
+ """Collapse the user's home directory to ``~`` when ``value`` is inside it."""
208
+ if value.startswith("~"):
209
+ return value
210
+
211
+ home = str(pathlib.Path.home())
212
+ if value == home:
213
+ return "~"
214
+
215
+ separators = {os.sep}
216
+ if os.altsep:
217
+ separators.add(os.altsep)
218
+
219
+ for separator in separators:
220
+ home_with_separator = home + separator
221
+ if value.startswith(home_with_separator):
222
+ return "~" + value[len(home) :]
223
+
224
+ return value
225
+
226
+ def __str__(self) -> str:
227
+ """Return string output with the home directory collapsed."""
228
+ return self._collapse_home(pathlib.Path.__str__(self))
229
+
230
+ def __repr__(self) -> str:
231
+ """Return repr output with the home directory collapsed."""
232
+ return f"{self.__class__.__name__}({str(self)!r})"
233
+
234
+
235
+ def format_display_path(path: pathlib.Path | str, *, directory: bool = False) -> str:
236
+ """Return a privacy-safe display path."""
237
+ display = str(PrivatePath(path))
238
+ if directory and not display.endswith("/"):
239
+ return f"{display.rstrip('/')}/"
240
+ return display
241
+
242
+
243
+ class SearchRecordPayload(t.TypedDict):
244
+ """JSON payload for search records."""
245
+
246
+ schema_version: str
247
+ kind: t.Literal["prompt", "history"]
248
+ agent: AgentName
249
+ store: str
250
+ adapter_id: str
251
+ path: str
252
+ text: str
253
+ title: str | None
254
+ role: str | None
255
+ timestamp: str | None
256
+ model: str | None
257
+ session_id: str | None
258
+ conversation_id: str | None
259
+ metadata: dict[str, object]
260
+
261
+
262
+ class FindRecordPayload(t.TypedDict):
263
+ """JSON payload for find records."""
264
+
265
+ schema_version: str
266
+ kind: t.Literal["find"]
267
+ agent: AgentName
268
+ store: str
269
+ adapter_id: str
270
+ path: str
271
+ path_kind: PathKind
272
+ metadata: dict[str, object]
273
+
274
+
275
+ class SourceHandlePayload(t.TypedDict):
276
+ """JSON payload for discovered sources."""
277
+
278
+ schema_version: str
279
+ agent: AgentName
280
+ store: str
281
+ adapter_id: str
282
+ path: str
283
+ path_kind: PathKind
284
+ source_kind: SourceKind
285
+ search_root: str | None
286
+ mtime_ns: int
287
+
288
+
289
+ class EnvelopePayload(t.TypedDict):
290
+ """JSON payload for top-level envelopes."""
291
+
292
+ schema_version: str
293
+ command: str
294
+ query: dict[str, object]
295
+ results: list[dict[str, object]]
296
+
297
+
298
+ class PydanticTypeAdapter(t.Protocol):
299
+ """Minimal TypeAdapter surface used by ``agentgrep``."""
300
+
301
+ def validate_python(self, value: object, /) -> object:
302
+ """Validate a Python object."""
303
+ ...
304
+
305
+ def dump_python(self, value: object, /, *, mode: str = "python") -> object:
306
+ """Dump a Python object."""
307
+ ...
308
+
309
+
310
+ class PydanticTypeAdapterFactory(t.Protocol):
311
+ """Factory for creating TypeAdapters."""
312
+
313
+ def __call__(self, value_type: object, /) -> PydanticTypeAdapter:
314
+ """Create a TypeAdapter."""
315
+ ...
316
+
317
+
318
+ class PydanticModule(t.Protocol):
319
+ """Minimal Pydantic module surface used at runtime."""
320
+
321
+ TypeAdapter: PydanticTypeAdapterFactory
322
+
323
+
324
+ class HelpTheme(t.Protocol):
325
+ """Minimal argparse help theme surface."""
326
+
327
+ heading: str
328
+ reset: str
329
+ label: str
330
+ long_option: str
331
+ short_option: str
332
+ prog: str
333
+ action: str
334
+
335
+
336
+ class AnsiHelpTheme(t.NamedTuple):
337
+ """ANSI theme values for syntax-colored help examples."""
338
+
339
+ heading: str
340
+ reset: str
341
+ label: str
342
+ long_option: str
343
+ short_option: str
344
+ prog: str
345
+ action: str
346
+
347
+ @classmethod
348
+ def default(cls) -> AnsiHelpTheme:
349
+ """Return the default help theme."""
350
+ return cls(
351
+ heading="\x1b[1;36m",
352
+ reset="\x1b[0m",
353
+ label="\x1b[33m",
354
+ long_option="\x1b[32m",
355
+ short_option="\x1b[32m",
356
+ prog="\x1b[1;35m",
357
+ action="\x1b[36m",
358
+ )
359
+
360
+
361
+ @dataclasses.dataclass(frozen=True, slots=True)
362
+ class AnsiColors:
363
+ """Semantic ANSI colors for terminal status output."""
364
+
365
+ enabled: bool
366
+
367
+ SUCCESS: t.ClassVar[str] = "\x1b[32m"
368
+ WARNING: t.ClassVar[str] = "\x1b[33m"
369
+ ERROR: t.ClassVar[str] = "\x1b[31m"
370
+ INFO: t.ClassVar[str] = "\x1b[36m"
371
+ HEADING: t.ClassVar[str] = "\x1b[1;36m"
372
+ HIGHLIGHT: t.ClassVar[str] = "\x1b[35m"
373
+ MUTED: t.ClassVar[str] = "\x1b[34m"
374
+ WHITE: t.ClassVar[str] = "\x1b[37m"
375
+ RESET: t.ClassVar[str] = "\x1b[0m"
376
+
377
+ @classmethod
378
+ def for_stream(cls, color_mode: ColorMode, stream: t.TextIO) -> AnsiColors:
379
+ """Build semantic colors for ``stream`` and ``color_mode``."""
380
+ return cls(enabled=should_enable_color(color_mode, stream))
381
+
382
+ def colorize(self, text: str, color: str) -> str:
383
+ """Apply ``color`` to ``text`` when colors are enabled."""
384
+ if not self.enabled:
385
+ return text
386
+ return f"{color}{text}{self.RESET}"
387
+
388
+ def success(self, text: str) -> str:
389
+ """Format text as success."""
390
+ return self.colorize(text, self.SUCCESS)
391
+
392
+ def warning(self, text: str) -> str:
393
+ """Format text as warning."""
394
+ return self.colorize(text, self.WARNING)
395
+
396
+ def error(self, text: str) -> str:
397
+ """Format text as error."""
398
+ return self.colorize(text, self.ERROR)
399
+
400
+ def info(self, text: str) -> str:
401
+ """Format text as informational."""
402
+ return self.colorize(text, self.INFO)
403
+
404
+ def heading(self, text: str) -> str:
405
+ """Format text as a status heading."""
406
+ return self.colorize(text, self.HEADING)
407
+
408
+ def highlight(self, text: str) -> str:
409
+ """Format text as highlighted."""
410
+ return self.colorize(text, self.HIGHLIGHT)
411
+
412
+ def muted(self, text: str) -> str:
413
+ """Format text as muted."""
414
+ return self.colorize(text, self.MUTED)
415
+
416
+ def white(self, text: str) -> str:
417
+ """Format text as plain white."""
418
+ return self.colorize(text, self.WHITE)
419
+
420
+
421
+ def should_enable_color(color_mode: ColorMode, stream: t.TextIO) -> bool:
422
+ """Return whether output written to ``stream`` should use colors."""
423
+ if os.environ.get("NO_COLOR"):
424
+ return False
425
+ if color_mode == "never":
426
+ return False
427
+ if color_mode == "always":
428
+ return True
429
+ if os.environ.get("FORCE_COLOR"):
430
+ return True
431
+ return bool(getattr(stream, "isatty", lambda: False)())
432
+
433
+
434
+ def should_enable_help_color(color_mode: ColorMode) -> bool:
435
+ """Return whether help output should use colors."""
436
+ return should_enable_color(color_mode, sys.stdout)
437
+
438
+
439
+ def create_themed_formatter(color_mode: ColorMode) -> type[AgentGrepHelpFormatter]:
440
+ """Create a formatter class with a bound theme."""
441
+ theme = AnsiHelpTheme.default() if should_enable_help_color(color_mode) else None
442
+
443
+ class ThemedAgentGrepHelpFormatter(AgentGrepHelpFormatter):
444
+ """AgentGrepHelpFormatter with a configured theme."""
445
+
446
+ _theme: object | None
447
+
448
+ def __init__(
449
+ self,
450
+ prog: str,
451
+ indent_increment: int = 2,
452
+ max_help_position: int = 24,
453
+ width: int | None = None,
454
+ *,
455
+ color: bool = True,
456
+ ) -> None:
457
+ super().__init__(
458
+ prog,
459
+ indent_increment=indent_increment,
460
+ max_help_position=max_help_position,
461
+ width=width,
462
+ color=color,
463
+ )
464
+ self._theme = theme
465
+
466
+ return ThemedAgentGrepHelpFormatter
467
+
468
+
469
+ class AgentGrepHelpFormatter(argparse.RawDescriptionHelpFormatter):
470
+ """Extend help output with syntax-colored example sections."""
471
+
472
+ _theme: object | None = None
473
+
474
+ @t.override
475
+ def _fill_text(self, text: str, width: int, indent: str) -> str:
476
+ """Colorize ``examples:`` blocks when a theme is available."""
477
+ theme = t.cast("HelpTheme | None", getattr(self, "_theme", None))
478
+ if not text or theme is None:
479
+ return super()._fill_text(text, width, indent)
480
+
481
+ lines = text.splitlines(keepends=True)
482
+ formatted_lines: list[str] = []
483
+ in_examples_block = False
484
+ expect_value = False
485
+
486
+ for line in lines:
487
+ if line.strip() == "":
488
+ in_examples_block = False
489
+ expect_value = False
490
+ formatted_lines.append(f"{indent}{line}")
491
+ continue
492
+
493
+ has_newline = line.endswith("\n")
494
+ stripped_line = line.rstrip("\n")
495
+ leading_length = len(stripped_line) - len(stripped_line.lstrip(" "))
496
+ leading = stripped_line[:leading_length]
497
+ content = stripped_line[leading_length:]
498
+ content_lower = content.lower()
499
+ is_section_heading = (
500
+ content_lower.endswith("examples:") and content_lower != "examples:"
501
+ )
502
+
503
+ if is_section_heading or content_lower == "examples:":
504
+ formatted_content = f"{theme.heading}{content}{theme.reset}"
505
+ in_examples_block = True
506
+ expect_value = False
507
+ elif in_examples_block:
508
+ colored = self._colorize_example_line(
509
+ content,
510
+ theme=theme,
511
+ expect_value=expect_value,
512
+ )
513
+ expect_value = colored.expect_value
514
+ formatted_content = colored.text
515
+ else:
516
+ formatted_content = stripped_line
517
+
518
+ newline = "\n" if has_newline else ""
519
+ formatted_lines.append(f"{indent}{leading}{formatted_content}{newline}")
520
+
521
+ return "".join(formatted_lines)
522
+
523
+ class _ColorizedLine(t.NamedTuple):
524
+ """Result of colorizing one example line."""
525
+
526
+ text: str
527
+ expect_value: bool
528
+
529
+ def _colorize_example_line(
530
+ self,
531
+ content: str,
532
+ *,
533
+ theme: HelpTheme,
534
+ expect_value: bool,
535
+ ) -> _ColorizedLine:
536
+ """Colorize program, subcommand, options, and option values."""
537
+ parts: list[str] = []
538
+ expecting_value = expect_value
539
+ first_token = True
540
+ colored_subcommand = False
541
+
542
+ for match in re.finditer(r"\s+|\S+", content):
543
+ token = match.group()
544
+ if token.isspace():
545
+ parts.append(token)
546
+ continue
547
+
548
+ if expecting_value:
549
+ color = theme.label
550
+ expecting_value = False
551
+ elif token.startswith("--"):
552
+ color = theme.long_option
553
+ expecting_value = (
554
+ token not in OPTIONS_FLAG_ONLY and token in OPTIONS_EXPECTING_VALUE
555
+ )
556
+ elif token.startswith("-"):
557
+ color = theme.short_option
558
+ expecting_value = (
559
+ token not in OPTIONS_FLAG_ONLY and token in OPTIONS_EXPECTING_VALUE
560
+ )
561
+ elif first_token:
562
+ color = theme.prog
563
+ elif not colored_subcommand:
564
+ color = theme.action
565
+ colored_subcommand = True
566
+ else:
567
+ color = None
568
+
569
+ first_token = False
570
+ if color is None:
571
+ parts.append(token)
572
+ else:
573
+ parts.append(f"{color}{token}{theme.reset}")
574
+
575
+ return self._ColorizedLine("".join(parts), expecting_value)
576
+
577
+
578
+ class TextualContainersModule(t.Protocol):
579
+ """Minimal Textual containers module surface."""
580
+
581
+ Horizontal: cabc.Callable[..., t.ContextManager[object]]
582
+ Vertical: cabc.Callable[..., t.ContextManager[object]]
583
+
584
+
585
+ class TextualAppModule(t.Protocol):
586
+ """Minimal Textual app module surface."""
587
+
588
+ App: type[object]
589
+
590
+
591
+ class DataTableLike(t.Protocol):
592
+ """Minimal DataTable surface used by the TUI."""
593
+
594
+ cursor_type: str
595
+
596
+ def add_columns(self, *labels: str) -> None:
597
+ """Add columns."""
598
+ ...
599
+
600
+ def clear(self) -> None:
601
+ """Clear rows."""
602
+ ...
603
+
604
+ def add_row(self, *values: str, key: str | None = None) -> None:
605
+ """Add one row."""
606
+ ...
607
+
608
+
609
+ class StaticLike(t.Protocol):
610
+ """Minimal Static widget surface used by the TUI."""
611
+
612
+ def update(self, content: str) -> None:
613
+ """Update widget contents."""
614
+ ...
615
+
616
+
617
+ class QueryAppLike(t.Protocol):
618
+ """Minimal Textual app query surface used by the TUI."""
619
+
620
+ def query_one(self, selector: object, expect_type: object | None = None) -> object:
621
+ """Look up one widget."""
622
+ ...
623
+
624
+
625
+ class RunnableAppLike(t.Protocol):
626
+ """Minimal runnable app surface."""
627
+
628
+ def run(self) -> None:
629
+ """Run the application."""
630
+ ...
631
+
632
+
633
+ class TextualWidgetsModule(t.Protocol):
634
+ """Minimal Textual widgets module surface."""
635
+
636
+ DataTable: cabc.Callable[..., object]
637
+ Footer: cabc.Callable[[], object]
638
+ Header: cabc.Callable[[], object]
639
+ Input: cabc.Callable[..., object]
640
+ Static: cabc.Callable[..., object]
641
+
642
+
643
+ @dataclasses.dataclass(slots=True)
644
+ class BackendSelection:
645
+ """Selected optional subprocess backends."""
646
+
647
+ find_tool: str | None
648
+ grep_tool: str | None
649
+ json_tool: str | None
650
+
651
+
652
+ @dataclasses.dataclass(slots=True)
653
+ class SearchArgs:
654
+ """Typed arguments for ``agentgrep search``."""
655
+
656
+ terms: tuple[str, ...]
657
+ agents: tuple[AgentName, ...]
658
+ search_type: SearchType
659
+ any_term: bool
660
+ regex: bool
661
+ case_sensitive: bool
662
+ limit: int | None
663
+ output_mode: OutputMode
664
+ color_mode: ColorMode
665
+ progress_mode: ProgressMode
666
+
667
+
668
+ @dataclasses.dataclass(slots=True)
669
+ class FindArgs:
670
+ """Typed arguments for ``agentgrep find``."""
671
+
672
+ pattern: str | None
673
+ agents: tuple[AgentName, ...]
674
+ limit: int | None
675
+ output_mode: OutputMode
676
+ color_mode: ColorMode
677
+
678
+
679
+ @dataclasses.dataclass(slots=True)
680
+ class SearchQuery:
681
+ """Compiled search configuration."""
682
+
683
+ terms: tuple[str, ...]
684
+ search_type: SearchType
685
+ any_term: bool
686
+ regex: bool
687
+ case_sensitive: bool
688
+ agents: tuple[AgentName, ...]
689
+ limit: int | None
690
+
691
+
692
+ @dataclasses.dataclass(slots=True)
693
+ class SourceHandle:
694
+ """A discovered, parseable source file or SQLite database."""
695
+
696
+ agent: AgentName
697
+ store: str
698
+ adapter_id: str
699
+ path: pathlib.Path
700
+ path_kind: PathKind
701
+ source_kind: SourceKind
702
+ search_root: pathlib.Path | None
703
+ mtime_ns: int
704
+
705
+
706
+ @dataclasses.dataclass(slots=True)
707
+ class SearchRecord:
708
+ """Normalized prompt/history record."""
709
+
710
+ kind: t.Literal["prompt", "history"]
711
+ agent: AgentName
712
+ store: str
713
+ adapter_id: str
714
+ path: pathlib.Path
715
+ text: str
716
+ title: str | None = None
717
+ role: str | None = None
718
+ timestamp: str | None = None
719
+ model: str | None = None
720
+ session_id: str | None = None
721
+ conversation_id: str | None = None
722
+ metadata: dict[str, object] = dataclasses.field(default_factory=dict)
723
+
724
+
725
+ @dataclasses.dataclass(slots=True)
726
+ class FindRecord:
727
+ """Normalized discovery record for ``agentgrep find``."""
728
+
729
+ kind: t.Literal["find"]
730
+ agent: AgentName
731
+ store: str
732
+ adapter_id: str
733
+ path: pathlib.Path
734
+ path_kind: PathKind
735
+ metadata: dict[str, object] = dataclasses.field(default_factory=dict)
736
+
737
+
738
+ @dataclasses.dataclass(slots=True)
739
+ class MessageCandidate:
740
+ """Intermediate parsed message representation."""
741
+
742
+ role: str | None
743
+ text: str
744
+ title: str | None = None
745
+ timestamp: str | None = None
746
+ model: str | None = None
747
+ session_id: str | None = None
748
+ conversation_id: str | None = None
749
+
750
+
751
+ class SearchControl:
752
+ """Thread-safe cooperative controls for an active search."""
753
+
754
+ def __init__(self) -> None:
755
+ self._answer_now = threading.Event()
756
+
757
+ def request_answer_now(self) -> None:
758
+ """Request that search return the results collected so far."""
759
+ self._answer_now.set()
760
+
761
+ def answer_now_requested(self) -> bool:
762
+ """Return whether search should stop and answer with partial results."""
763
+ return self._answer_now.is_set()
764
+
765
+
766
+ class AnswerNowInputListener:
767
+ """Listen for a blank Enter keypress and request a partial answer."""
768
+
769
+ def __init__(
770
+ self,
771
+ control: SearchControl,
772
+ *,
773
+ stream: t.TextIO | None = None,
774
+ poll_interval: float = 0.1,
775
+ ) -> None:
776
+ self._control = control
777
+ self._stream = stream if stream is not None else sys.stdin
778
+ self._poll_interval = poll_interval
779
+ self._stop_event = threading.Event()
780
+ self._thread: threading.Thread | None = None
781
+
782
+ def start(self) -> None:
783
+ """Start listening for a blank line on stdin."""
784
+ if self._thread is not None and self._thread.is_alive():
785
+ return
786
+ self._stop_event.clear()
787
+ self._thread = threading.Thread(
788
+ target=self._run,
789
+ daemon=True,
790
+ name="agentgrep-answer-now-input",
791
+ )
792
+ self._thread.start()
793
+
794
+ def stop(self) -> None:
795
+ """Stop listening when possible."""
796
+ self._stop_event.set()
797
+ thread = self._thread
798
+ self._thread = None
799
+ if thread is not None:
800
+ thread.join(timeout=0.2)
801
+
802
+ def _run(self) -> None:
803
+ selectable = self._stream_is_selectable()
804
+ while not self._stop_event.is_set() and not self._control.answer_now_requested():
805
+ line = self._read_line(selectable)
806
+ if line is None:
807
+ continue
808
+ if line == "":
809
+ return
810
+ if line.strip() == "":
811
+ self._control.request_answer_now()
812
+ return
813
+ if not selectable:
814
+ return
815
+
816
+ def _read_line(self, selectable: bool) -> str | None:
817
+ if selectable:
818
+ try:
819
+ readable, _, _ = select.select([self._stream], [], [], self._poll_interval)
820
+ except OSError, TypeError, ValueError:
821
+ return None
822
+ if not readable:
823
+ return None
824
+ try:
825
+ return self._stream.readline()
826
+ except OSError, ValueError:
827
+ return ""
828
+
829
+ def _stream_is_selectable(self) -> bool:
830
+ try:
831
+ _ = self._stream.fileno()
832
+ readable, _, _ = select.select([self._stream], [], [], 0)
833
+ except AttributeError, OSError, TypeError, ValueError:
834
+ return False
835
+ return isinstance(readable, list)
836
+
837
+
838
+ class SearchProgress(t.Protocol):
839
+ """Progress reporter used by search internals."""
840
+
841
+ def start(self, query: SearchQuery) -> None:
842
+ """Mark search start."""
843
+ ...
844
+
845
+ def sources_discovered(self, count: int) -> None:
846
+ """Report discovered source count."""
847
+ ...
848
+
849
+ def prefilter_started(self, root: pathlib.Path) -> None:
850
+ """Report root prefilter start."""
851
+ ...
852
+
853
+ def sources_planned(self, planned: int, total: int) -> None:
854
+ """Report selected source count."""
855
+ ...
856
+
857
+ def source_started(self, index: int, total: int, source: SourceHandle) -> None:
858
+ """Report source scan start."""
859
+ ...
860
+
861
+ def source_finished(
862
+ self,
863
+ index: int,
864
+ total: int,
865
+ source: SourceHandle,
866
+ records: int,
867
+ matches: int,
868
+ ) -> None:
869
+ """Report source scan completion."""
870
+ ...
871
+
872
+ def result_added(self, count: int) -> None:
873
+ """Report deduped result count."""
874
+ ...
875
+
876
+ def finish(self, result_count: int) -> None:
877
+ """Report search completion."""
878
+ ...
879
+
880
+ def answer_now(self, result_count: int) -> None:
881
+ """Report early search completion with partial results."""
882
+ ...
883
+
884
+ def interrupt(self) -> None:
885
+ """Report interrupted search."""
886
+ ...
887
+
888
+ def close(self) -> None:
889
+ """Release any progress resources."""
890
+ ...
891
+
892
+
893
+ class NoopSearchProgress:
894
+ """Silent search progress reporter."""
895
+
896
+ def start(self, query: SearchQuery) -> None:
897
+ """Ignore search start."""
898
+
899
+ def sources_discovered(self, count: int) -> None:
900
+ """Ignore discovered source count."""
901
+
902
+ def prefilter_started(self, root: pathlib.Path) -> None:
903
+ """Ignore root prefilter start."""
904
+
905
+ def sources_planned(self, planned: int, total: int) -> None:
906
+ """Ignore selected source count."""
907
+
908
+ def source_started(self, index: int, total: int, source: SourceHandle) -> None:
909
+ """Ignore source scan start."""
910
+
911
+ def source_finished(
912
+ self,
913
+ index: int,
914
+ total: int,
915
+ source: SourceHandle,
916
+ records: int,
917
+ matches: int,
918
+ ) -> None:
919
+ """Ignore source scan completion."""
920
+
921
+ def result_added(self, count: int) -> None:
922
+ """Ignore deduped result count."""
923
+
924
+ def finish(self, result_count: int) -> None:
925
+ """Ignore search completion."""
926
+
927
+ def answer_now(self, result_count: int) -> None:
928
+ """Ignore early search completion."""
929
+
930
+ def interrupt(self) -> None:
931
+ """Ignore interrupted search."""
932
+
933
+ def close(self) -> None:
934
+ """Nothing to release."""
935
+
936
+
937
+ class ConsoleSearchProgress:
938
+ """Human progress reporter for potentially long searches."""
939
+
940
+ _SPINNER_FRAMES: t.ClassVar[str] = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
941
+
942
+ def __init__(
943
+ self,
944
+ *,
945
+ enabled: bool,
946
+ stream: t.TextIO | None = None,
947
+ tty: bool | None = None,
948
+ color_mode: ColorMode = "auto",
949
+ refresh_interval: float = 0.1,
950
+ heartbeat_interval: float = 10.0,
951
+ answer_now_hint: bool = False,
952
+ ) -> None:
953
+ self._enabled = enabled
954
+ self._stream = stream if stream is not None else sys.stderr
955
+ self._tty = (
956
+ tty
957
+ if tty is not None
958
+ else bool(
959
+ getattr(self._stream, "isatty", lambda: False)(),
960
+ )
961
+ )
962
+ self._colors = AnsiColors.for_stream(color_mode, self._stream)
963
+ self._refresh_interval = refresh_interval
964
+ self._heartbeat_interval = heartbeat_interval
965
+ self._answer_now_hint = answer_now_hint
966
+ self._lock = threading.Lock()
967
+ self._stop_event = threading.Event()
968
+ self._thread: threading.Thread | None = None
969
+ self._started_at: float | None = None
970
+ self._last_heartbeat_at: float | None = None
971
+ self._last_line_len = 0
972
+ self._query_label = "search"
973
+ self._phase = "starting"
974
+ self._detail: str | None = None
975
+ self._current: int | None = None
976
+ self._total: int | None = None
977
+ self._matches = 0
978
+ self._finished = False
979
+
980
+ def start(self, query: SearchQuery) -> None:
981
+ """Begin progress reporting for ``query``."""
982
+ if not self._enabled:
983
+ return
984
+ label = " ".join(query.terms) if query.terms else "all records"
985
+ now = time.monotonic()
986
+ with self._lock:
987
+ self._query_label = label
988
+ self._phase = "discovering"
989
+ self._detail = None
990
+ self._current = None
991
+ self._total = None
992
+ self._matches = 0
993
+ self._started_at = now
994
+ self._last_heartbeat_at = now
995
+ self._finished = False
996
+ if self._tty:
997
+ self._ensure_tty_thread()
998
+ else:
999
+ self._emit_line(self._start_line(label))
1000
+
1001
+ def sources_discovered(self, count: int) -> None:
1002
+ """Report discovered source count."""
1003
+ self.set_status("discovered", total=count, detail=f"{count} sources")
1004
+
1005
+ def prefilter_started(self, root: pathlib.Path) -> None:
1006
+ """Report root prefilter start."""
1007
+ self.set_status("prefiltering", detail=format_display_path(root, directory=True))
1008
+
1009
+ def sources_planned(self, planned: int, total: int) -> None:
1010
+ """Report selected source count."""
1011
+ self.set_status("planning", current=planned, total=total, detail="candidate sources")
1012
+
1013
+ def source_started(self, index: int, total: int, source: SourceHandle) -> None:
1014
+ """Report source scan start."""
1015
+ self.set_status("scanning", current=index, total=total, detail=source.path.name)
1016
+
1017
+ def source_finished(
1018
+ self,
1019
+ index: int,
1020
+ total: int,
1021
+ source: SourceHandle,
1022
+ records: int,
1023
+ matches: int,
1024
+ ) -> None:
1025
+ """Report source scan completion."""
1026
+ self.set_status(
1027
+ "scanning",
1028
+ current=index,
1029
+ total=total,
1030
+ detail=f"{records} records, {format_match_count(matches)} in {source.path.name}",
1031
+ )
1032
+
1033
+ def result_added(self, count: int) -> None:
1034
+ """Report deduped result count."""
1035
+ if not self._enabled:
1036
+ return
1037
+ with self._lock:
1038
+ self._matches = count
1039
+ self._emit_heartbeat_if_due()
1040
+
1041
+ def set_status(
1042
+ self,
1043
+ phase: str,
1044
+ *,
1045
+ current: int | None = None,
1046
+ total: int | None = None,
1047
+ detail: str | None = None,
1048
+ ) -> None:
1049
+ """Update the current progress status."""
1050
+ if not self._enabled:
1051
+ return
1052
+ with self._lock:
1053
+ self._phase = phase
1054
+ self._current = current
1055
+ self._total = total
1056
+ self._detail = detail
1057
+ self._emit_heartbeat_if_due()
1058
+
1059
+ def finish(self, result_count: int) -> None:
1060
+ """Finish progress reporting."""
1061
+ if not self._enabled:
1062
+ return
1063
+ with self._lock:
1064
+ self._matches = result_count
1065
+ self._phase = "complete"
1066
+ self._finished = True
1067
+ if self._tty:
1068
+ self._stop_tty_thread()
1069
+ self._clear_tty_line()
1070
+ return
1071
+ elapsed = self._elapsed_seconds()
1072
+ self._emit_line(
1073
+ self._finish_line(result_count, elapsed),
1074
+ )
1075
+
1076
+ def answer_now(self, result_count: int) -> None:
1077
+ """Finish progress reporting with a partial-answer status."""
1078
+ if not self._enabled:
1079
+ return
1080
+ with self._lock:
1081
+ self._matches = result_count
1082
+ self._phase = "answering now"
1083
+ self._finished = True
1084
+ line = self._answer_now_line(result_count)
1085
+ if self._tty:
1086
+ self._stop_tty_thread()
1087
+ self._write_tty_line(line)
1088
+ return
1089
+ self._emit_line(line)
1090
+
1091
+ def close(self) -> None:
1092
+ """Stop any active progress renderer."""
1093
+ if not self._enabled:
1094
+ return
1095
+ if self._tty:
1096
+ self._stop_tty_thread()
1097
+ self._clear_tty_line()
1098
+
1099
+ def interrupt(self) -> None:
1100
+ """Stop progress rendering while preserving the current status."""
1101
+ if not self._enabled:
1102
+ return
1103
+ if self._tty:
1104
+ self._stop_tty_thread()
1105
+ self._write_tty_summary_line()
1106
+ return
1107
+ self._emit_line(self._summary())
1108
+
1109
+ def _ensure_tty_thread(self) -> None:
1110
+ if self._thread is not None and self._thread.is_alive():
1111
+ return
1112
+ self._stop_event.clear()
1113
+ self._thread = threading.Thread(
1114
+ target=self._tty_loop,
1115
+ daemon=True,
1116
+ name="agentgrep-search-progress",
1117
+ )
1118
+ self._thread.start()
1119
+
1120
+ def _stop_tty_thread(self) -> None:
1121
+ self._stop_event.set()
1122
+ thread = self._thread
1123
+ self._thread = None
1124
+ if thread is not None:
1125
+ thread.join(timeout=1.0)
1126
+
1127
+ def _tty_loop(self) -> None:
1128
+ frames = itertools.cycle(self._SPINNER_FRAMES)
1129
+ while not self._stop_event.is_set():
1130
+ self._render_tty(next(frames))
1131
+ self._stop_event.wait(self._refresh_interval)
1132
+
1133
+ def _render_tty(self, frame: str) -> None:
1134
+ summary = self._summary()
1135
+ line = f"{self._colors.info(frame)} {summary}"
1136
+ with self._lock:
1137
+ try:
1138
+ self._stream.write("\r\033[2K" + line)
1139
+ self._stream.flush()
1140
+ self._last_line_len = len(line)
1141
+ except OSError, ValueError:
1142
+ pass
1143
+
1144
+ def _clear_tty_line(self) -> None:
1145
+ with self._lock:
1146
+ if self._last_line_len == 0:
1147
+ return
1148
+ try:
1149
+ self._stream.write("\r\033[2K")
1150
+ self._stream.flush()
1151
+ except OSError, ValueError:
1152
+ pass
1153
+ self._last_line_len = 0
1154
+
1155
+ def _write_tty_summary_line(self) -> None:
1156
+ line = self._summary()
1157
+ self._write_tty_line(line)
1158
+
1159
+ def _write_tty_line(self, line: str) -> None:
1160
+ with self._lock:
1161
+ try:
1162
+ self._stream.write("\r\033[2K" + line + "\n")
1163
+ self._stream.flush()
1164
+ except OSError, ValueError:
1165
+ pass
1166
+ self._last_line_len = 0
1167
+
1168
+ def _emit_heartbeat_if_due(self) -> None:
1169
+ if not self._enabled or self._tty:
1170
+ return
1171
+ with self._lock:
1172
+ last = self._last_heartbeat_at
1173
+ label = self._query_label
1174
+ if last is None:
1175
+ return
1176
+ now = time.monotonic()
1177
+ if now - last < self._heartbeat_interval:
1178
+ return
1179
+ elapsed = self._elapsed_seconds()
1180
+ self._emit_line(
1181
+ self._heartbeat_line(label, elapsed),
1182
+ )
1183
+ with self._lock:
1184
+ self._last_heartbeat_at = now
1185
+
1186
+ def _emit_line(self, line: str) -> None:
1187
+ try:
1188
+ self._stream.write(line + "\n")
1189
+ self._stream.flush()
1190
+ except OSError, ValueError:
1191
+ pass
1192
+
1193
+ def _summary(self) -> str:
1194
+ elapsed = self._elapsed_seconds()
1195
+ parts = [
1196
+ self._start_line(self._query_label),
1197
+ self._status_text(),
1198
+ self._colors.warning(format_match_count(self._matches)),
1199
+ self._colors.muted(f"{elapsed:.1f}s"),
1200
+ ]
1201
+ if self._answer_now_hint:
1202
+ parts.append(self._colors.white("[Press enter, answer now]"))
1203
+ return " | ".join(parts)
1204
+
1205
+ def _start_line(self, label: str) -> str:
1206
+ return f"{self._colors.heading('Searching')} {self._colors.highlight(label)}"
1207
+
1208
+ def _heartbeat_line(self, label: str, elapsed: float) -> str:
1209
+ prefix = f"{self._colors.muted('...')} {self._colors.heading('still searching')}"
1210
+ elapsed_text = self._colors.muted(f"{elapsed:.0f}s elapsed")
1211
+ return f"{prefix} {self._colors.highlight(label)}: {self._status_text()} ({elapsed_text})"
1212
+
1213
+ def _finish_line(self, result_count: int, elapsed: float) -> str:
1214
+ return (
1215
+ f"{self._colors.success('Search complete:')} "
1216
+ f"{self._colors.warning(format_match_count(result_count))} "
1217
+ f"({self._colors.muted(f'{elapsed:.1f}s elapsed')})"
1218
+ )
1219
+
1220
+ def _answer_now_line(self, result_count: int) -> str:
1221
+ return (
1222
+ f"{self._colors.success('Answering now:')} "
1223
+ f"{self._colors.warning(format_match_count(result_count))}"
1224
+ )
1225
+
1226
+ def _status_text(self) -> str:
1227
+ with self._lock:
1228
+ phase = self._phase
1229
+ current = self._current
1230
+ total = self._total
1231
+ detail = self._detail
1232
+ if current is not None and total is not None:
1233
+ count = self._colors.warning(f"{current}/{total}")
1234
+ return f"{self._colors.heading(phase)} {count} {self._colors.muted('sources')}"
1235
+ if detail:
1236
+ return f"{self._colors.heading(phase)} {self._colors.muted(detail)}"
1237
+ return self._colors.heading(phase)
1238
+
1239
+ def _elapsed_seconds(self) -> float:
1240
+ with self._lock:
1241
+ started = self._started_at
1242
+ if started is None:
1243
+ return 0.0
1244
+ return time.monotonic() - started
1245
+
1246
+
1247
+ def format_match_count(count: int) -> str:
1248
+ """Return a human-readable match count."""
1249
+ suffix = "match" if count == 1 else "matches"
1250
+ return f"{count} {suffix}"
1251
+
1252
+
1253
+ def noop_search_progress() -> SearchProgress:
1254
+ """Return a silent search progress reporter."""
1255
+ return NoopSearchProgress()
1256
+
1257
+
1258
+ def select_backends() -> BackendSelection:
1259
+ """Return the best available subprocess helpers."""
1260
+ return BackendSelection(
1261
+ find_tool=which_first(("fd", "fdfind")),
1262
+ grep_tool=which_first(("rg", "ag")),
1263
+ json_tool=which_first(("jq", "jaq")),
1264
+ )
1265
+
1266
+
1267
+ def which_first(names: tuple[str, ...]) -> str | None:
1268
+ """Return the first executable available on ``PATH``."""
1269
+ for name in names:
1270
+ found = shutil.which(name)
1271
+ if found is not None:
1272
+ return found
1273
+ return None
1274
+
1275
+
1276
+ def run_readonly_command(
1277
+ command: list[str],
1278
+ *,
1279
+ control: SearchControl | None = None,
1280
+ ) -> subprocess.CompletedProcess[str]:
1281
+ """Run a command without a shell and capture text output."""
1282
+ if control is None:
1283
+ return subprocess.run(
1284
+ command,
1285
+ capture_output=True,
1286
+ text=True,
1287
+ check=False,
1288
+ )
1289
+ process = subprocess.Popen(
1290
+ command,
1291
+ stdout=subprocess.PIPE,
1292
+ stderr=subprocess.PIPE,
1293
+ text=True,
1294
+ )
1295
+ while True:
1296
+ try:
1297
+ stdout, stderr = process.communicate(timeout=0.05)
1298
+ except subprocess.TimeoutExpired:
1299
+ if control.answer_now_requested():
1300
+ process.terminate()
1301
+ try:
1302
+ stdout, stderr = process.communicate(timeout=0.2)
1303
+ except subprocess.TimeoutExpired:
1304
+ process.kill()
1305
+ stdout, stderr = process.communicate()
1306
+ return subprocess.CompletedProcess(
1307
+ command,
1308
+ process.returncode,
1309
+ stdout,
1310
+ stderr,
1311
+ )
1312
+ continue
1313
+ return subprocess.CompletedProcess(command, process.returncode, stdout, stderr)
1314
+
1315
+
1316
+ @dataclasses.dataclass(slots=True)
1317
+ class ParserBundle:
1318
+ """CLI parsers used for root and subcommand help."""
1319
+
1320
+ parser: argparse.ArgumentParser
1321
+ search_parser: argparse.ArgumentParser
1322
+ find_parser: argparse.ArgumentParser
1323
+
1324
+
1325
+ def normalize_color_mode(argv: cabc.Sequence[str] | None) -> ColorMode:
1326
+ """Return the requested CLI color mode."""
1327
+ if argv is None:
1328
+ argv = sys.argv[1:]
1329
+ for index, argument in enumerate(argv):
1330
+ if argument == "--color" and index + 1 < len(argv):
1331
+ value = argv[index + 1]
1332
+ if value in {"auto", "always", "never"}:
1333
+ return t.cast("ColorMode", value)
1334
+ if argument.startswith("--color="):
1335
+ value = argument.partition("=")[2]
1336
+ if value in {"auto", "always", "never"}:
1337
+ return t.cast("ColorMode", value)
1338
+ return "auto"
1339
+
1340
+
1341
+ SUBCOMMANDS: frozenset[str] = frozenset({"search", "find"})
1342
+
1343
+
1344
+ def inject_default_subcommand(
1345
+ argv: cabc.Sequence[str] | None,
1346
+ ) -> cabc.Sequence[str] | None:
1347
+ """Prepend ``search`` to ``argv`` when no subcommand is supplied.
1348
+
1349
+ Walks ``argv`` skipping the global ``--color`` option and any help flag.
1350
+ If the first remaining token is not a known subcommand, inserts
1351
+ ``search`` at that position so ``agentgrep bliss`` parses identically
1352
+ to ``agentgrep search bliss``. Returns the input unchanged when no
1353
+ injection is needed.
1354
+
1355
+ Examples
1356
+ --------
1357
+ >>> inject_default_subcommand(["bliss"])
1358
+ ['search', 'bliss']
1359
+ >>> inject_default_subcommand(["search", "bliss"])
1360
+ ['search', 'bliss']
1361
+ >>> inject_default_subcommand(["find", "codex"])
1362
+ ['find', 'codex']
1363
+ >>> inject_default_subcommand(["--color", "never", "bliss"])
1364
+ ['--color', 'never', 'search', 'bliss']
1365
+ >>> inject_default_subcommand(["--help"])
1366
+ ['--help']
1367
+ >>> inject_default_subcommand([])
1368
+ []
1369
+ """
1370
+ effective = list(sys.argv[1:]) if argv is None else list(argv)
1371
+ index = 0
1372
+ while index < len(effective):
1373
+ token = effective[index]
1374
+ if token in {"-h", "--help"}:
1375
+ return argv
1376
+ if token == "--color" and index + 1 < len(effective):
1377
+ index += 2
1378
+ continue
1379
+ if token.startswith("--color="):
1380
+ index += 1
1381
+ continue
1382
+ if token in SUBCOMMANDS:
1383
+ return argv
1384
+ effective.insert(index, "search")
1385
+ return effective
1386
+ return argv
1387
+
1388
+
1389
+ @contextlib.contextmanager
1390
+ def configured_color_environment(color_mode: ColorMode) -> cabc.Iterator[None]:
1391
+ """Temporarily configure env vars for argparse help color handling."""
1392
+ force_color = os.environ.get("FORCE_COLOR")
1393
+ try:
1394
+ if color_mode == "always" and not os.environ.get("NO_COLOR"):
1395
+ os.environ["FORCE_COLOR"] = "1"
1396
+ yield
1397
+ finally:
1398
+ if force_color is None:
1399
+ _ = os.environ.pop("FORCE_COLOR", None)
1400
+ else:
1401
+ os.environ["FORCE_COLOR"] = force_color
1402
+
1403
+
1404
+ def create_parser(
1405
+ color_mode: ColorMode,
1406
+ ) -> ParserBundle:
1407
+ """Create the root parser and subparsers."""
1408
+ formatter_class = create_themed_formatter(color_mode)
1409
+ parser = argparse.ArgumentParser(
1410
+ prog="agentgrep",
1411
+ description=CLI_DESCRIPTION,
1412
+ formatter_class=formatter_class,
1413
+ color=color_mode != "never",
1414
+ )
1415
+ _ = parser.add_argument(
1416
+ "--color",
1417
+ choices=["auto", "always", "never"],
1418
+ default="auto",
1419
+ help="when to use colors: auto (default), always, or never",
1420
+ )
1421
+ subparsers = parser.add_subparsers(dest="command")
1422
+
1423
+ search_parser = subparsers.add_parser(
1424
+ "search",
1425
+ help="Search normalized prompts or history",
1426
+ description=SEARCH_DESCRIPTION,
1427
+ formatter_class=formatter_class,
1428
+ color=color_mode != "never",
1429
+ )
1430
+ add_common_agent_options(search_parser)
1431
+ _ = search_parser.add_argument("terms", nargs="*", help="Keywords or regex patterns")
1432
+ _ = search_parser.add_argument(
1433
+ "--type",
1434
+ choices=["prompts", "history", "all"],
1435
+ default="prompts",
1436
+ dest="search_type",
1437
+ help="Record type to search (default: prompts)",
1438
+ )
1439
+ _ = search_parser.add_argument(
1440
+ "--any",
1441
+ action="store_true",
1442
+ help="Match any term instead of requiring all terms",
1443
+ )
1444
+ _ = search_parser.add_argument(
1445
+ "--regex",
1446
+ action="store_true",
1447
+ help="Treat terms as regular expressions",
1448
+ )
1449
+ _ = search_parser.add_argument(
1450
+ "--case-sensitive",
1451
+ action="store_true",
1452
+ help="Perform case-sensitive matching",
1453
+ )
1454
+ _ = search_parser.add_argument(
1455
+ "--limit",
1456
+ type=int,
1457
+ metavar="N",
1458
+ help="Limit the number of results",
1459
+ )
1460
+ _ = search_parser.add_argument(
1461
+ "--progress",
1462
+ choices=["auto", "always", "never"],
1463
+ default="auto",
1464
+ help="Show search progress on stderr",
1465
+ )
1466
+ add_output_mode_options(search_parser, allow_ui=True)
1467
+
1468
+ find_parser = subparsers.add_parser(
1469
+ "find",
1470
+ help="Find known prompt/history stores and session files",
1471
+ description=FIND_DESCRIPTION,
1472
+ formatter_class=formatter_class,
1473
+ color=color_mode != "never",
1474
+ )
1475
+ add_common_agent_options(find_parser)
1476
+ _ = find_parser.add_argument(
1477
+ "pattern",
1478
+ nargs="?",
1479
+ help="Optional substring to match against discovered paths",
1480
+ )
1481
+ _ = find_parser.add_argument(
1482
+ "--limit",
1483
+ type=int,
1484
+ metavar="N",
1485
+ help="Limit the number of results",
1486
+ )
1487
+ add_output_mode_options(find_parser, allow_ui=False)
1488
+ return ParserBundle(parser=parser, search_parser=search_parser, find_parser=find_parser)
1489
+
1490
+
1491
+ def parse_args(
1492
+ argv: cabc.Sequence[str] | None = None,
1493
+ ) -> SearchArgs | FindArgs | None:
1494
+ """Parse CLI arguments into typed dataclasses."""
1495
+ color_mode = normalize_color_mode(argv)
1496
+ argv = inject_default_subcommand(argv)
1497
+ with configured_color_environment(color_mode):
1498
+ bundle = create_parser(color_mode)
1499
+ namespace = bundle.parser.parse_args(argv)
1500
+ if t.cast("str | None", getattr(namespace, "command", None)) is None:
1501
+ with configured_color_environment(color_mode):
1502
+ bundle.parser.print_help()
1503
+ return None
1504
+ agents = parse_agents(t.cast("list[str]", namespace.agent))
1505
+ output_mode = parse_output_mode(namespace)
1506
+ limit = t.cast("int | None", namespace.limit)
1507
+ if limit is not None and limit < 1:
1508
+ with configured_color_environment(color_mode):
1509
+ bundle.parser.error("--limit must be greater than 0")
1510
+
1511
+ command = t.cast("str", namespace.command)
1512
+ if command == "search":
1513
+ terms = tuple(t.cast("list[str]", namespace.terms))
1514
+ if not terms:
1515
+ with configured_color_environment(color_mode):
1516
+ bundle.search_parser.print_help()
1517
+ return None
1518
+ return SearchArgs(
1519
+ terms=terms,
1520
+ agents=agents,
1521
+ search_type=t.cast("SearchType", namespace.search_type),
1522
+ any_term=t.cast("bool", namespace.any),
1523
+ regex=t.cast("bool", namespace.regex),
1524
+ case_sensitive=t.cast("bool", namespace.case_sensitive),
1525
+ limit=limit,
1526
+ output_mode=output_mode,
1527
+ color_mode=color_mode,
1528
+ progress_mode=t.cast("ProgressMode", namespace.progress),
1529
+ )
1530
+ pattern = t.cast("str | None", namespace.pattern)
1531
+ if not pattern:
1532
+ with configured_color_environment(color_mode):
1533
+ bundle.find_parser.print_help()
1534
+ return None
1535
+ return FindArgs(
1536
+ pattern=pattern,
1537
+ agents=agents,
1538
+ limit=limit,
1539
+ output_mode=output_mode,
1540
+ color_mode=color_mode,
1541
+ )
1542
+
1543
+
1544
+ def add_common_agent_options(parser: argparse.ArgumentParser) -> None:
1545
+ """Attach shared agent selection flags."""
1546
+ _ = parser.add_argument(
1547
+ "--agent",
1548
+ action="append",
1549
+ choices=[*AGENT_CHOICES, "all"],
1550
+ default=[],
1551
+ help="Limit results to a specific agent; repeatable",
1552
+ )
1553
+
1554
+
1555
+ def add_output_mode_options(
1556
+ parser: argparse.ArgumentParser,
1557
+ *,
1558
+ allow_ui: bool,
1559
+ ) -> None:
1560
+ """Attach mutually exclusive output mode flags."""
1561
+ group = parser.add_mutually_exclusive_group()
1562
+ _ = group.add_argument("--json", action="store_true", help="Emit one JSON document")
1563
+ _ = group.add_argument("--ndjson", action="store_true", help="Emit one JSON object per line")
1564
+ if allow_ui:
1565
+ _ = group.add_argument("--ui", action="store_true", help="Launch a read-only UI")
1566
+
1567
+
1568
+ def parse_agents(values: list[str]) -> tuple[AgentName, ...]:
1569
+ """Normalize ``--agent`` selections."""
1570
+ if not values or "all" in values:
1571
+ return AGENT_CHOICES
1572
+ ordered = tuple(t.cast("AgentName", value) for value in values if value != "all")
1573
+ return ordered or AGENT_CHOICES
1574
+
1575
+
1576
+ def parse_output_mode(namespace: argparse.Namespace) -> OutputMode:
1577
+ """Return the selected output mode."""
1578
+ if getattr(namespace, "json", False):
1579
+ return "json"
1580
+ if getattr(namespace, "ndjson", False):
1581
+ return "ndjson"
1582
+ if getattr(namespace, "ui", False):
1583
+ return "ui"
1584
+ return "text"
1585
+
1586
+
1587
+ def make_search_query(args: SearchArgs) -> SearchQuery:
1588
+ """Convert parsed search arguments into a query object."""
1589
+ return SearchQuery(
1590
+ terms=args.terms,
1591
+ search_type=args.search_type,
1592
+ any_term=args.any_term,
1593
+ regex=args.regex,
1594
+ case_sensitive=args.case_sensitive,
1595
+ agents=args.agents,
1596
+ limit=args.limit,
1597
+ )
1598
+
1599
+
1600
+ def discover_sources(
1601
+ home: pathlib.Path,
1602
+ agents: tuple[AgentName, ...],
1603
+ backends: BackendSelection,
1604
+ ) -> list[SourceHandle]:
1605
+ """Discover all known parseable sources for the selected agents."""
1606
+ discovered: list[SourceHandle] = []
1607
+ for agent in agents:
1608
+ if agent == "codex":
1609
+ discovered.extend(discover_codex_sources(home, backends))
1610
+ elif agent == "claude":
1611
+ discovered.extend(discover_claude_sources(home, backends))
1612
+ elif agent == "cursor":
1613
+ discovered.extend(discover_cursor_sources(home, backends))
1614
+ discovered.sort(key=lambda item: (item.agent, item.store, str(item.path)))
1615
+ return discovered
1616
+
1617
+
1618
+ def file_mtime_ns(path: pathlib.Path) -> int:
1619
+ """Return a cached modification time for a path."""
1620
+ try:
1621
+ return path.stat().st_mtime_ns
1622
+ except OSError:
1623
+ return 0
1624
+
1625
+
1626
+ def discover_codex_sources(
1627
+ home: pathlib.Path,
1628
+ backends: BackendSelection,
1629
+ ) -> list[SourceHandle]:
1630
+ """Discover Codex sessions and command history."""
1631
+ root = home / ".codex"
1632
+ sources: list[SourceHandle] = []
1633
+ if not root.exists():
1634
+ return sources
1635
+
1636
+ for name in ("history.json", "history.jsonl"):
1637
+ path = root / name
1638
+ if path.is_file():
1639
+ sources.append(
1640
+ SourceHandle(
1641
+ agent="codex",
1642
+ store="codex.history",
1643
+ adapter_id="codex.history_json.v1",
1644
+ path=path,
1645
+ path_kind="history_file",
1646
+ source_kind="jsonl" if path.suffix == ".jsonl" else "json",
1647
+ search_root=None,
1648
+ mtime_ns=file_mtime_ns(path),
1649
+ ),
1650
+ )
1651
+
1652
+ sessions_root = root / "sessions"
1653
+ sources.extend(
1654
+ SourceHandle(
1655
+ agent="codex",
1656
+ store="codex.sessions",
1657
+ adapter_id="codex.sessions_jsonl.v1",
1658
+ path=path,
1659
+ path_kind="session_file",
1660
+ source_kind="jsonl",
1661
+ search_root=sessions_root,
1662
+ mtime_ns=file_mtime_ns(path),
1663
+ )
1664
+ for path in list_files_matching(sessions_root, "*.jsonl", backends.find_tool)
1665
+ )
1666
+ return sources
1667
+
1668
+
1669
+ def discover_claude_sources(
1670
+ home: pathlib.Path,
1671
+ backends: BackendSelection,
1672
+ ) -> list[SourceHandle]:
1673
+ """Discover Claude Code project session files."""
1674
+ root = home / ".claude" / "projects"
1675
+ if not root.exists():
1676
+ return []
1677
+ return [
1678
+ SourceHandle(
1679
+ agent="claude",
1680
+ store="claude.projects",
1681
+ adapter_id="claude.projects_jsonl.v1",
1682
+ path=path,
1683
+ path_kind="session_file",
1684
+ source_kind="jsonl",
1685
+ search_root=root,
1686
+ mtime_ns=file_mtime_ns(path),
1687
+ )
1688
+ for path in list_files_matching(root, "*.jsonl", backends.find_tool)
1689
+ ]
1690
+
1691
+
1692
+ def discover_cursor_sources(
1693
+ home: pathlib.Path,
1694
+ backends: BackendSelection,
1695
+ ) -> list[SourceHandle]:
1696
+ """Discover Cursor databases from both home-local and official roots."""
1697
+ sources: list[SourceHandle] = []
1698
+ tracking_db = home / ".cursor" / "ai-tracking" / "ai-code-tracking.db"
1699
+ if tracking_db.is_file():
1700
+ sources.append(
1701
+ SourceHandle(
1702
+ agent="cursor",
1703
+ store="cursor.ai_tracking",
1704
+ adapter_id="cursor.ai_tracking_sqlite.v1",
1705
+ path=tracking_db,
1706
+ path_kind="sqlite_db",
1707
+ source_kind="sqlite",
1708
+ search_root=None,
1709
+ mtime_ns=file_mtime_ns(tracking_db),
1710
+ ),
1711
+ )
1712
+
1713
+ seen_paths: set[pathlib.Path] = set()
1714
+ for path in OFFICIAL_CURSOR_STATE_PATHS:
1715
+ if path.is_file():
1716
+ seen_paths.add(path)
1717
+ sources.append(
1718
+ SourceHandle(
1719
+ agent="cursor",
1720
+ store="cursor.state",
1721
+ adapter_id="cursor.state_vscdb_modern.v1",
1722
+ path=path,
1723
+ path_kind="sqlite_db",
1724
+ source_kind="sqlite",
1725
+ search_root=None,
1726
+ mtime_ns=file_mtime_ns(path),
1727
+ ),
1728
+ )
1729
+ cursor_root = home / ".cursor"
1730
+ for path in list_files_matching(cursor_root, "state.vscdb", backends.find_tool):
1731
+ if path in seen_paths:
1732
+ continue
1733
+ sources.append(
1734
+ SourceHandle(
1735
+ agent="cursor",
1736
+ store="cursor.state",
1737
+ adapter_id="cursor.state_vscdb_legacy.v1",
1738
+ path=path,
1739
+ path_kind="sqlite_db",
1740
+ source_kind="sqlite",
1741
+ search_root=None,
1742
+ mtime_ns=file_mtime_ns(path),
1743
+ ),
1744
+ )
1745
+ return sources
1746
+
1747
+
1748
+ def list_files_matching(
1749
+ root: pathlib.Path,
1750
+ glob_pattern: str,
1751
+ fd_program: str | None,
1752
+ ) -> list[pathlib.Path]:
1753
+ """List files under ``root`` that match a glob."""
1754
+ if not root.exists():
1755
+ return []
1756
+ if fd_program is not None:
1757
+ command = [fd_program, "-a", "-t", "f", "--glob", glob_pattern, str(root)]
1758
+ completed = run_readonly_command(command)
1759
+ if completed.returncode == 0:
1760
+ return [pathlib.Path(line) for line in completed.stdout.splitlines() if line.strip()]
1761
+ return sorted(path for path in root.rglob(glob_pattern) if path.is_file())
1762
+
1763
+
1764
+ def search_sources(
1765
+ query: SearchQuery,
1766
+ sources: list[SourceHandle],
1767
+ backends: BackendSelection,
1768
+ *,
1769
+ progress: SearchProgress | None = None,
1770
+ control: SearchControl | None = None,
1771
+ ) -> list[SearchRecord]:
1772
+ """Parse and filter search results across all selected sources."""
1773
+ active_progress = noop_search_progress() if progress is None else progress
1774
+ active_control = SearchControl() if control is None else control
1775
+ planned_sources = plan_search_sources(
1776
+ query,
1777
+ sources,
1778
+ backends,
1779
+ progress=active_progress,
1780
+ control=active_control,
1781
+ )
1782
+ if active_control.answer_now_requested():
1783
+ active_progress.answer_now(0)
1784
+ return []
1785
+ active_progress.sources_planned(len(planned_sources), len(sources))
1786
+ records = collect_search_records(
1787
+ query,
1788
+ planned_sources,
1789
+ progress=active_progress,
1790
+ control=active_control,
1791
+ )
1792
+ if active_control.answer_now_requested():
1793
+ active_progress.answer_now(len(records))
1794
+ else:
1795
+ active_progress.finish(len(records))
1796
+ return records
1797
+
1798
+
1799
+ def run_search_query(
1800
+ home: pathlib.Path,
1801
+ query: SearchQuery,
1802
+ *,
1803
+ backends: BackendSelection | None = None,
1804
+ progress: SearchProgress | None = None,
1805
+ control: SearchControl | None = None,
1806
+ ) -> list[SearchRecord]:
1807
+ """Discover sources and run a normalized search query."""
1808
+ active_backends = select_backends() if backends is None else backends
1809
+ active_progress = noop_search_progress() if progress is None else progress
1810
+ active_control = SearchControl() if control is None else control
1811
+ active_progress.start(query)
1812
+ interrupted = False
1813
+ try:
1814
+ sources = discover_sources(home, query.agents, active_backends)
1815
+ active_progress.sources_discovered(len(sources))
1816
+ return search_sources(
1817
+ query,
1818
+ sources,
1819
+ active_backends,
1820
+ progress=active_progress,
1821
+ control=active_control,
1822
+ )
1823
+ except KeyboardInterrupt:
1824
+ interrupted = True
1825
+ active_progress.interrupt()
1826
+ raise
1827
+ finally:
1828
+ if not interrupted:
1829
+ active_progress.close()
1830
+
1831
+
1832
+ def plan_search_sources(
1833
+ query: SearchQuery,
1834
+ sources: list[SourceHandle],
1835
+ backends: BackendSelection,
1836
+ *,
1837
+ progress: SearchProgress | None = None,
1838
+ control: SearchControl | None = None,
1839
+ ) -> list[SourceHandle]:
1840
+ """Return the candidate sources to parse for a search query."""
1841
+ active_progress = noop_search_progress() if progress is None else progress
1842
+ active_control = SearchControl() if control is None else control
1843
+ if not query.terms:
1844
+ return sources
1845
+
1846
+ planned_sources = list(sources)
1847
+ if backends.grep_tool is not None:
1848
+ planned_sources = prefilter_sources_by_root(
1849
+ query,
1850
+ planned_sources,
1851
+ backends.grep_tool,
1852
+ progress=active_progress,
1853
+ control=active_control,
1854
+ )
1855
+ ordered_sources = [
1856
+ source
1857
+ for source in planned_sources
1858
+ if not active_control.answer_now_requested()
1859
+ and (
1860
+ source.search_root is not None
1861
+ or direct_source_matches(source, query, backends, active_control)
1862
+ )
1863
+ ]
1864
+ ordered_sources.sort(key=source_order_key)
1865
+ return ordered_sources
1866
+
1867
+
1868
+ def source_order_key(source: SourceHandle) -> tuple[int, str]:
1869
+ """Return a newest-first search order key for sources."""
1870
+ return (-source.mtime_ns, str(source.path))
1871
+
1872
+
1873
+ def prefilter_sources_by_root(
1874
+ query: SearchQuery,
1875
+ sources: list[SourceHandle],
1876
+ grep_program: str,
1877
+ *,
1878
+ progress: SearchProgress | None = None,
1879
+ control: SearchControl | None = None,
1880
+ ) -> list[SourceHandle]:
1881
+ """Prefilter file-backed sources by searching each root once."""
1882
+ active_progress = noop_search_progress() if progress is None else progress
1883
+ active_control = SearchControl() if control is None else control
1884
+ matched_paths_by_root: dict[pathlib.Path, set[pathlib.Path] | None] = {}
1885
+ filtered_sources: list[SourceHandle] = []
1886
+ for source in sources:
1887
+ if active_control.answer_now_requested():
1888
+ break
1889
+ search_root = source.search_root
1890
+ if search_root is None:
1891
+ filtered_sources.append(source)
1892
+ continue
1893
+
1894
+ if search_root not in matched_paths_by_root:
1895
+ active_progress.prefilter_started(search_root)
1896
+ matched_paths_by_root[search_root] = grep_root_paths(
1897
+ search_root,
1898
+ query,
1899
+ grep_program,
1900
+ control=active_control,
1901
+ )
1902
+ if active_control.answer_now_requested():
1903
+ break
1904
+
1905
+ matched_paths = matched_paths_by_root[search_root]
1906
+ if matched_paths is None or source.path in matched_paths:
1907
+ filtered_sources.append(source)
1908
+ return filtered_sources
1909
+
1910
+
1911
+ def grep_root_paths(
1912
+ search_root: pathlib.Path,
1913
+ query: SearchQuery,
1914
+ grep_program: str,
1915
+ *,
1916
+ control: SearchControl | None = None,
1917
+ ) -> set[pathlib.Path] | None:
1918
+ """Return file paths matched by a whole-root grep."""
1919
+ active_control = SearchControl() if control is None else control
1920
+ matched_sets: list[set[pathlib.Path]] = []
1921
+ for term in query.terms:
1922
+ if active_control.answer_now_requested():
1923
+ return set()
1924
+ command = build_grep_command(
1925
+ grep_program,
1926
+ term,
1927
+ search_root,
1928
+ regex=query.regex,
1929
+ case_sensitive=query.case_sensitive,
1930
+ )
1931
+ completed = run_readonly_command(command, control=active_control)
1932
+ if active_control.answer_now_requested():
1933
+ return set()
1934
+ if completed.returncode not in {0, 1}:
1935
+ return None
1936
+ matched_sets.append(
1937
+ {pathlib.Path(line) for line in completed.stdout.splitlines() if line.strip()},
1938
+ )
1939
+
1940
+ if not matched_sets:
1941
+ return set()
1942
+ if query.any_term:
1943
+ merged: set[pathlib.Path] = set()
1944
+ for matched in matched_sets:
1945
+ merged.update(matched)
1946
+ return merged
1947
+
1948
+ intersection = matched_sets[0].copy()
1949
+ for matched in matched_sets[1:]:
1950
+ intersection.intersection_update(matched)
1951
+ return intersection
1952
+
1953
+
1954
+ def direct_source_matches(
1955
+ source: SourceHandle,
1956
+ query: SearchQuery,
1957
+ backends: BackendSelection,
1958
+ control: SearchControl | None = None,
1959
+ ) -> bool:
1960
+ """Return whether a direct source should be parsed."""
1961
+ active_control = SearchControl() if control is None else control
1962
+ if active_control.answer_now_requested():
1963
+ return False
1964
+ if source.source_kind == "sqlite":
1965
+ return True
1966
+ if backends.grep_tool is not None:
1967
+ grep_match = grep_file_matches(
1968
+ source.path,
1969
+ query,
1970
+ backends.grep_tool,
1971
+ control=active_control,
1972
+ )
1973
+ if active_control.answer_now_requested():
1974
+ return False
1975
+ if grep_match is not None:
1976
+ return grep_match
1977
+ if source.path.suffix in JSON_FILE_SUFFIXES and backends.json_tool is not None:
1978
+ extracted = flatten_json_strings_with_tool(
1979
+ source.path,
1980
+ backends.json_tool,
1981
+ control=active_control,
1982
+ )
1983
+ if active_control.answer_now_requested():
1984
+ return False
1985
+ if extracted is not None:
1986
+ return matches_text(extracted, query)
1987
+ return matches_text(read_text_file(source.path), query)
1988
+
1989
+
1990
+ def collect_search_records(
1991
+ query: SearchQuery,
1992
+ sources: list[SourceHandle],
1993
+ *,
1994
+ progress: SearchProgress | None = None,
1995
+ control: SearchControl | None = None,
1996
+ ) -> list[SearchRecord]:
1997
+ """Parse candidate sources and collect matching records."""
1998
+ active_progress = noop_search_progress() if progress is None else progress
1999
+ active_control = SearchControl() if control is None else control
2000
+ deduped: dict[tuple[str, str, str, str, str], SearchRecord] = {}
2001
+ total = len(sources)
2002
+ for index, source in enumerate(sources, start=1):
2003
+ if active_control.answer_now_requested() or (
2004
+ query.limit is not None and len(deduped) >= query.limit
2005
+ ):
2006
+ break
2007
+ active_progress.source_started(index, total, source)
2008
+ records_seen = 0
2009
+ matches_seen = 0
2010
+ matching_records: list[SearchRecord] = []
2011
+ for record in iter_source_records(source):
2012
+ if active_control.answer_now_requested():
2013
+ break
2014
+ records_seen += 1
2015
+ if matches_record(record, query):
2016
+ matches_seen += 1
2017
+ matching_records.append(record)
2018
+ active_progress.source_finished(index, total, source, records_seen, matches_seen)
2019
+ matching_records.sort(key=search_record_sort_key, reverse=True)
2020
+ for record in matching_records:
2021
+ dedupe_key = record_dedupe_key(record)
2022
+ if dedupe_key not in deduped:
2023
+ deduped[dedupe_key] = record
2024
+ active_progress.result_added(len(deduped))
2025
+ if active_control.answer_now_requested() or (
2026
+ query.limit is not None and len(deduped) >= query.limit
2027
+ ):
2028
+ break
2029
+ results = list(deduped.values())
2030
+ results.sort(key=search_record_sort_key, reverse=True)
2031
+ return results
2032
+
2033
+
2034
+ def find_sources(
2035
+ pattern: str | None,
2036
+ sources: list[SourceHandle],
2037
+ limit: int | None,
2038
+ ) -> list[FindRecord]:
2039
+ """Build filtered ``find`` results from discovered sources."""
2040
+ query = pattern.casefold() if pattern is not None else None
2041
+ results: list[FindRecord] = []
2042
+ for source in sources:
2043
+ record = FindRecord(
2044
+ kind="find",
2045
+ agent=source.agent,
2046
+ store=source.store,
2047
+ adapter_id=source.adapter_id,
2048
+ path=source.path,
2049
+ path_kind=source.path_kind,
2050
+ metadata={"source_kind": source.source_kind},
2051
+ )
2052
+ if query is not None:
2053
+ haystack = " ".join(
2054
+ (
2055
+ record.agent,
2056
+ record.store,
2057
+ record.adapter_id,
2058
+ str(record.path),
2059
+ record.path_kind,
2060
+ ),
2061
+ ).casefold()
2062
+ if query not in haystack:
2063
+ continue
2064
+ results.append(record)
2065
+ if limit is not None and len(results) >= limit:
2066
+ break
2067
+ return results
2068
+
2069
+
2070
+ def run_find_query(
2071
+ home: pathlib.Path,
2072
+ agents: tuple[AgentName, ...],
2073
+ *,
2074
+ pattern: str | None,
2075
+ limit: int | None,
2076
+ backends: BackendSelection | None = None,
2077
+ ) -> list[FindRecord]:
2078
+ """Discover sources and build normalized ``find`` results."""
2079
+ active_backends = select_backends() if backends is None else backends
2080
+ sources = discover_sources(home, agents, active_backends)
2081
+ return find_sources(pattern, sources, limit)
2082
+
2083
+
2084
+ def iter_source_records(
2085
+ source: SourceHandle,
2086
+ ) -> cabc.Iterator[SearchRecord]:
2087
+ """Dispatch to the adapter parser for one source."""
2088
+ if source.adapter_id == "codex.sessions_jsonl.v1":
2089
+ yield from parse_codex_session_file(source)
2090
+ return
2091
+ if source.adapter_id == "codex.history_json.v1":
2092
+ yield from parse_codex_history_file(source)
2093
+ return
2094
+ if source.adapter_id == "claude.projects_jsonl.v1":
2095
+ yield from parse_claude_project_file(source)
2096
+ return
2097
+ if source.adapter_id == "cursor.ai_tracking_sqlite.v1":
2098
+ yield from parse_cursor_ai_tracking_db(source)
2099
+ return
2100
+ if source.adapter_id in {"cursor.state_vscdb_modern.v1", "cursor.state_vscdb_legacy.v1"}:
2101
+ yield from parse_cursor_state_db(source)
2102
+
2103
+
2104
+ def parse_codex_session_file(
2105
+ source: SourceHandle,
2106
+ ) -> cabc.Iterator[SearchRecord]:
2107
+ """Parse Codex session JSONL files."""
2108
+ session_id = source.path.stem
2109
+ session_model: str | None = None
2110
+ for event in iter_jsonl(source.path):
2111
+ if not isinstance(event, dict):
2112
+ continue
2113
+ event_type = str(event.get("type", ""))
2114
+ payload = event.get("payload")
2115
+ if event_type == "session_meta" and isinstance(payload, dict):
2116
+ session_id = as_optional_str(payload.get("id")) or session_id
2117
+ session_model = (
2118
+ as_optional_str(payload.get("model"))
2119
+ or as_optional_str(payload.get("model_name"))
2120
+ or as_optional_str(payload.get("model_provider"))
2121
+ or session_model
2122
+ )
2123
+ continue
2124
+ if event_type != "response_item" or not isinstance(payload, dict):
2125
+ continue
2126
+ candidate = candidate_from_mapping(
2127
+ t.cast("dict[str, object]", payload),
2128
+ timestamp=as_optional_str(event.get("timestamp")),
2129
+ model=session_model,
2130
+ session_id=session_id,
2131
+ conversation_id=session_id,
2132
+ )
2133
+ if candidate is None:
2134
+ continue
2135
+ yield build_search_record(source, candidate)
2136
+
2137
+
2138
+ def parse_codex_history_file(
2139
+ source: SourceHandle,
2140
+ ) -> cabc.Iterator[SearchRecord]:
2141
+ """Parse Codex command history files."""
2142
+ entries: list[JSONValue]
2143
+ if source.source_kind == "json":
2144
+ payload = read_json_file(source.path)
2145
+ entries = payload if isinstance(payload, list) else []
2146
+ else:
2147
+ entries = list(iter_jsonl(source.path))
2148
+
2149
+ for entry in entries:
2150
+ if not isinstance(entry, dict):
2151
+ continue
2152
+ command = as_optional_str(entry.get("command"))
2153
+ if not command:
2154
+ continue
2155
+ yield SearchRecord(
2156
+ kind="history",
2157
+ agent=source.agent,
2158
+ store=source.store,
2159
+ adapter_id=source.adapter_id,
2160
+ path=source.path,
2161
+ text=command,
2162
+ title="Codex command history",
2163
+ role="user",
2164
+ timestamp=as_optional_str(entry.get("timestamp")),
2165
+ )
2166
+
2167
+
2168
+ def parse_claude_project_file(
2169
+ source: SourceHandle,
2170
+ ) -> cabc.Iterator[SearchRecord]:
2171
+ """Parse Claude Code project JSONL files using lightweight heuristics."""
2172
+ conversation_id = source.path.stem
2173
+ seen: set[tuple[str | None, str, str | None, str | None]] = set()
2174
+ for event in iter_jsonl(source.path):
2175
+ for candidate in iter_message_candidates(
2176
+ event,
2177
+ fallback_conversation_id=conversation_id,
2178
+ ):
2179
+ key = (
2180
+ candidate.role,
2181
+ candidate.text,
2182
+ candidate.timestamp,
2183
+ candidate.conversation_id,
2184
+ )
2185
+ if key in seen:
2186
+ continue
2187
+ seen.add(key)
2188
+ yield build_search_record(source, candidate)
2189
+
2190
+
2191
+ def parse_cursor_ai_tracking_db(
2192
+ source: SourceHandle,
2193
+ ) -> cabc.Iterator[SearchRecord]:
2194
+ """Parse Cursor AI tracking summaries."""
2195
+ connection = open_readonly_sqlite(source.path)
2196
+ try:
2197
+ for row in iter_conversation_summaries(connection):
2198
+ (
2199
+ conversation_id,
2200
+ title,
2201
+ tldr,
2202
+ overview,
2203
+ bullets,
2204
+ model,
2205
+ mode,
2206
+ updated_at,
2207
+ ) = row
2208
+ text_parts = [
2209
+ part
2210
+ for part in (
2211
+ as_optional_str(title),
2212
+ as_optional_str(tldr),
2213
+ as_optional_str(overview),
2214
+ flatten_summary_bullets(bullets),
2215
+ )
2216
+ if part
2217
+ ]
2218
+ if not text_parts:
2219
+ continue
2220
+ yield SearchRecord(
2221
+ kind="history",
2222
+ agent=source.agent,
2223
+ store=source.store,
2224
+ adapter_id=source.adapter_id,
2225
+ path=source.path,
2226
+ text="\n\n".join(text_parts),
2227
+ title=as_optional_str(title),
2228
+ role="assistant",
2229
+ timestamp=as_optional_str(updated_at),
2230
+ model=as_optional_str(model),
2231
+ conversation_id=as_optional_str(conversation_id),
2232
+ metadata={"mode": as_optional_str(mode) or ""},
2233
+ )
2234
+ except sqlite3.DatabaseError:
2235
+ return
2236
+ finally:
2237
+ connection.close()
2238
+
2239
+
2240
+ def parse_cursor_state_db(
2241
+ source: SourceHandle,
2242
+ ) -> cabc.Iterator[SearchRecord]:
2243
+ """Parse Cursor ``state.vscdb`` tables with generic JSON extraction."""
2244
+ connection = open_readonly_sqlite(source.path)
2245
+ try:
2246
+ tables = sqlite_table_names(connection)
2247
+ candidate_tables = [name for name in ("ItemTable", "cursorDiskKV") if name in tables]
2248
+ seen: set[tuple[str | None, str, str | None, str | None]] = set()
2249
+ for table in candidate_tables:
2250
+ for key, raw_value in iter_key_value_rows(connection, table):
2251
+ lowered_key = key.casefold()
2252
+ if not any(token in lowered_key for token in CURSOR_STATE_TOKENS):
2253
+ continue
2254
+ decoded = decode_sqlite_value(raw_value)
2255
+ if decoded is None:
2256
+ continue
2257
+ parsed = parse_embedded_json(decoded)
2258
+ if parsed is None:
2259
+ continue
2260
+ for candidate in iter_message_candidates(
2261
+ parsed,
2262
+ fallback_title=key,
2263
+ fallback_conversation_id=key,
2264
+ ):
2265
+ entry_key = (
2266
+ candidate.role,
2267
+ candidate.text,
2268
+ candidate.timestamp,
2269
+ candidate.conversation_id,
2270
+ )
2271
+ if entry_key in seen:
2272
+ continue
2273
+ seen.add(entry_key)
2274
+ yield build_search_record(source, candidate)
2275
+ except sqlite3.DatabaseError:
2276
+ return
2277
+ finally:
2278
+ connection.close()
2279
+
2280
+
2281
+ def open_readonly_sqlite(path: pathlib.Path) -> sqlite3.Connection:
2282
+ """Open a SQLite database with a read-only URI."""
2283
+ return sqlite3.connect(f"file:{path}?mode=ro", uri=True)
2284
+
2285
+
2286
+ def sqlite_table_names(connection: sqlite3.Connection) -> set[str]:
2287
+ """Return the table names from a SQLite connection."""
2288
+ rows = t.cast(
2289
+ "cabc.Iterable[tuple[object]]",
2290
+ connection.execute("SELECT name FROM sqlite_master WHERE type = 'table'"),
2291
+ )
2292
+ names: set[str] = set()
2293
+ for row in rows:
2294
+ name = row[0]
2295
+ if isinstance(name, str):
2296
+ names.add(name)
2297
+ return names
2298
+
2299
+
2300
+ def iter_key_value_rows(
2301
+ connection: sqlite3.Connection,
2302
+ table: str,
2303
+ ) -> cabc.Iterator[tuple[str, object]]:
2304
+ """Yield likely key/value rows from a SQLite table."""
2305
+ if table not in {"ItemTable", "cursorDiskKV"}:
2306
+ return
2307
+ info = t.cast(
2308
+ "cabc.Iterable[tuple[object, ...]]",
2309
+ connection.execute(f"PRAGMA table_info({table})"),
2310
+ )
2311
+ columns = [str(row[1]) for row in info]
2312
+ if "key" not in columns or "value" not in columns:
2313
+ return
2314
+ query = "SELECT key, value FROM ItemTable"
2315
+ if table == "cursorDiskKV":
2316
+ query = "SELECT key, value FROM cursorDiskKV"
2317
+ rows = t.cast("cabc.Iterable[KeyValueRow]", connection.execute(query))
2318
+ for key, value in rows:
2319
+ if isinstance(key, str):
2320
+ yield key, value
2321
+
2322
+
2323
+ def iter_conversation_summaries(
2324
+ connection: sqlite3.Connection,
2325
+ ) -> cabc.Iterator[SummaryRow]:
2326
+ """Yield typed rows from Cursor AI tracking summaries."""
2327
+ query = """
2328
+ SELECT
2329
+ conversationId,
2330
+ title,
2331
+ tldr,
2332
+ overview,
2333
+ summaryBullets,
2334
+ model,
2335
+ mode,
2336
+ updatedAt
2337
+ FROM conversation_summaries
2338
+ """
2339
+ rows = t.cast("cabc.Iterable[SummaryRow]", connection.execute(query))
2340
+ yield from rows
2341
+
2342
+
2343
+ def build_grep_command(
2344
+ grep_program: str,
2345
+ term: str,
2346
+ target: pathlib.Path,
2347
+ *,
2348
+ regex: bool,
2349
+ case_sensitive: bool,
2350
+ ) -> list[str]:
2351
+ """Build a read-only grep command for one term and target."""
2352
+ command = [grep_program, "-l", term, str(target)]
2353
+ if not regex:
2354
+ fixed_flag = "-F" if grep_program.endswith("rg") else "-Q"
2355
+ command.insert(2, fixed_flag)
2356
+ if not case_sensitive:
2357
+ command.insert(1, "-i")
2358
+ return command
2359
+
2360
+
2361
+ def flatten_json_strings_with_tool(
2362
+ path: pathlib.Path,
2363
+ program: str,
2364
+ *,
2365
+ control: SearchControl | None = None,
2366
+ ) -> str | None:
2367
+ """Return flattened JSON strings using ``jq`` or ``jaq``."""
2368
+ command = [program, "-r", ".. | strings", str(path)]
2369
+ completed = run_readonly_command(command, control=control)
2370
+ if completed.returncode != 0:
2371
+ return None
2372
+ return completed.stdout
2373
+
2374
+
2375
+ def grep_file_matches(
2376
+ path: pathlib.Path,
2377
+ query: SearchQuery,
2378
+ program: str,
2379
+ *,
2380
+ control: SearchControl | None = None,
2381
+ ) -> bool | None:
2382
+ """Use ``rg`` or ``ag`` as a read-only prefilter."""
2383
+ active_control = SearchControl() if control is None else control
2384
+ matchers = [
2385
+ run_readonly_command(
2386
+ build_grep_command(
2387
+ program,
2388
+ term,
2389
+ path,
2390
+ regex=query.regex,
2391
+ case_sensitive=query.case_sensitive,
2392
+ ),
2393
+ control=active_control,
2394
+ ).returncode
2395
+ == 0
2396
+ for term in query.terms
2397
+ if not active_control.answer_now_requested()
2398
+ ]
2399
+ if active_control.answer_now_requested():
2400
+ return False
2401
+ return any(matchers) if query.any_term else all(matchers)
2402
+
2403
+
2404
+ def read_text_file(path: pathlib.Path) -> str:
2405
+ """Read a text file with replacement for decode errors."""
2406
+ try:
2407
+ return path.read_text(encoding="utf-8", errors="replace")
2408
+ except OSError:
2409
+ return ""
2410
+
2411
+
2412
+ def read_json_file(path: pathlib.Path) -> JSONValue | None:
2413
+ """Read a JSON file."""
2414
+ try:
2415
+ parsed = t.cast("object", json.loads(path.read_text(encoding="utf-8")))
2416
+ except OSError, json.JSONDecodeError:
2417
+ return None
2418
+ if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
2419
+ return t.cast("JSONValue", parsed)
2420
+ return None
2421
+
2422
+
2423
+ def iter_jsonl(path: pathlib.Path) -> cabc.Iterator[JSONValue]:
2424
+ """Yield decoded JSON objects from a JSONL file."""
2425
+ try:
2426
+ with path.open(encoding="utf-8") as handle:
2427
+ for line in handle:
2428
+ stripped = line.strip()
2429
+ if not stripped:
2430
+ continue
2431
+ try:
2432
+ parsed = t.cast("object", json.loads(stripped))
2433
+ except json.JSONDecodeError:
2434
+ continue
2435
+ if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
2436
+ yield t.cast("JSONValue", parsed)
2437
+ except OSError:
2438
+ return
2439
+
2440
+
2441
+ def candidate_from_mapping(
2442
+ mapping: dict[str, object],
2443
+ *,
2444
+ timestamp: str | None,
2445
+ model: str | None,
2446
+ session_id: str | None,
2447
+ conversation_id: str | None,
2448
+ ) -> MessageCandidate | None:
2449
+ """Extract one message candidate from a known message-like mapping."""
2450
+ role = extract_role(mapping)
2451
+ text = extract_message_text(mapping)
2452
+ if role is None or not text:
2453
+ return None
2454
+ return MessageCandidate(
2455
+ role=role,
2456
+ text=text,
2457
+ title=extract_title(mapping),
2458
+ timestamp=timestamp or extract_timestamp(mapping),
2459
+ model=model or extract_model(mapping),
2460
+ session_id=session_id or extract_session_id(mapping),
2461
+ conversation_id=conversation_id or extract_conversation_id(mapping),
2462
+ )
2463
+
2464
+
2465
+ def iter_message_candidates(
2466
+ value: JSONValue | None,
2467
+ *,
2468
+ fallback_title: str | None = None,
2469
+ fallback_conversation_id: str | None = None,
2470
+ ) -> cabc.Iterator[MessageCandidate]:
2471
+ """Recursively walk a JSON value and yield message candidates."""
2472
+ if isinstance(value, dict):
2473
+ mapping = t.cast("dict[str, object]", value)
2474
+ role = extract_role(mapping)
2475
+ text = extract_message_text(mapping)
2476
+ if role is not None and text:
2477
+ yield MessageCandidate(
2478
+ role=role,
2479
+ text=text,
2480
+ title=extract_title(mapping) or fallback_title,
2481
+ timestamp=extract_timestamp(mapping),
2482
+ model=extract_model(mapping),
2483
+ session_id=extract_session_id(mapping),
2484
+ conversation_id=extract_conversation_id(mapping) or fallback_conversation_id,
2485
+ )
2486
+ for nested in mapping.values():
2487
+ yield from iter_message_candidates(
2488
+ t.cast("JSONValue | None", nested),
2489
+ fallback_title=fallback_title,
2490
+ fallback_conversation_id=fallback_conversation_id,
2491
+ )
2492
+ elif isinstance(value, list):
2493
+ for item in value:
2494
+ yield from iter_message_candidates(
2495
+ item,
2496
+ fallback_title=fallback_title,
2497
+ fallback_conversation_id=fallback_conversation_id,
2498
+ )
2499
+
2500
+
2501
+ def extract_role(mapping: dict[str, object]) -> str | None:
2502
+ """Extract a normalized role from a mapping."""
2503
+ for key in ("role", "sender", "author", "speaker"):
2504
+ value = mapping.get(key)
2505
+ if isinstance(value, str) and value.strip():
2506
+ return value.strip()
2507
+ if isinstance(value, dict):
2508
+ nested_mapping = t.cast("dict[str, object]", value)
2509
+ nested = as_optional_str(nested_mapping.get("role")) or as_optional_str(
2510
+ nested_mapping.get("name"),
2511
+ )
2512
+ if nested is not None:
2513
+ return nested
2514
+ return None
2515
+
2516
+
2517
+ def extract_message_text(mapping: dict[str, object]) -> str | None:
2518
+ """Extract message text from common content fields."""
2519
+ for key in ("content", "text", "message", "body", "prompt", "value", "parts"):
2520
+ if key in mapping:
2521
+ flattened = flatten_content_value(t.cast("JSONValue | None", mapping[key]))
2522
+ if flattened:
2523
+ return flattened
2524
+ return None
2525
+
2526
+
2527
+ def flatten_content_value(value: JSONValue | None) -> str | None:
2528
+ """Flatten a message content payload into text."""
2529
+ parts = list(iter_text_fragments(value))
2530
+ if not parts:
2531
+ return None
2532
+ return "\n".join(part for part in parts if part.strip()).strip() or None
2533
+
2534
+
2535
+ def iter_text_fragments(
2536
+ value: JSONValue | None,
2537
+ ) -> cabc.Iterator[str]:
2538
+ """Yield text fragments from a nested content payload."""
2539
+ if isinstance(value, str):
2540
+ stripped = value.strip()
2541
+ if stripped:
2542
+ yield stripped
2543
+ return
2544
+ if isinstance(value, list):
2545
+ for item in value:
2546
+ yield from iter_text_fragments(item)
2547
+ return
2548
+ if isinstance(value, dict):
2549
+ mapping = t.cast("dict[str, object]", value)
2550
+ for key in ("text", "content", "message", "body", "prompt", "value", "parts"):
2551
+ if key in mapping:
2552
+ yield from iter_text_fragments(t.cast("JSONValue | None", mapping[key]))
2553
+
2554
+
2555
+ def extract_title(mapping: dict[str, object]) -> str | None:
2556
+ """Extract a title-like field."""
2557
+ for key in ("title", "name", "topic"):
2558
+ title = as_optional_str(mapping.get(key))
2559
+ if title is not None:
2560
+ return title
2561
+ return None
2562
+
2563
+
2564
+ def extract_timestamp(mapping: dict[str, object]) -> str | None:
2565
+ """Extract a timestamp-like field."""
2566
+ for key in ("timestamp", "updatedAt", "createdAt", "ts"):
2567
+ timestamp = as_optional_str(mapping.get(key))
2568
+ if timestamp is not None:
2569
+ return timestamp
2570
+ return None
2571
+
2572
+
2573
+ def extract_model(mapping: dict[str, object]) -> str | None:
2574
+ """Extract a model name."""
2575
+ for key in ("model", "modelName", "model_name"):
2576
+ model = as_optional_str(mapping.get(key))
2577
+ if model is not None:
2578
+ return model
2579
+ return None
2580
+
2581
+
2582
+ def extract_session_id(mapping: dict[str, object]) -> str | None:
2583
+ """Extract a session identifier."""
2584
+ for key in ("session_id", "sessionId", "id"):
2585
+ value = as_optional_str(mapping.get(key))
2586
+ if value is not None:
2587
+ return value
2588
+ return None
2589
+
2590
+
2591
+ def extract_conversation_id(mapping: dict[str, object]) -> str | None:
2592
+ """Extract a conversation identifier."""
2593
+ for key in ("conversation_id", "conversationId", "threadId"):
2594
+ value = as_optional_str(mapping.get(key))
2595
+ if value is not None:
2596
+ return value
2597
+ return None
2598
+
2599
+
2600
+ def flatten_summary_bullets(value: object) -> str | None:
2601
+ """Flatten Cursor summary bullets."""
2602
+ if value is None:
2603
+ return None
2604
+ if isinstance(value, str):
2605
+ parsed = parse_embedded_json(value)
2606
+ if isinstance(parsed, list):
2607
+ bullets = [item for item in parsed if isinstance(item, str) and item.strip()]
2608
+ return "\n".join(f"- {item}" for item in bullets) if bullets else value.strip() or None
2609
+ return value.strip() or None
2610
+ if isinstance(value, (bytes, bytearray)):
2611
+ decoded = decode_sqlite_value(value)
2612
+ return flatten_summary_bullets(decoded)
2613
+ return None
2614
+
2615
+
2616
+ def decode_sqlite_value(value: object) -> str | None:
2617
+ """Decode a SQLite value into UTF-8 text if possible."""
2618
+ if isinstance(value, str):
2619
+ return value
2620
+ if isinstance(value, (bytes, bytearray)):
2621
+ return bytes(value).decode("utf-8", errors="replace")
2622
+ return None
2623
+
2624
+
2625
+ def parse_embedded_json(text: str) -> JSONValue | None:
2626
+ """Parse a JSON-encoded string, returning ``None`` when unavailable."""
2627
+ stripped = text.strip()
2628
+ if not stripped or stripped[0] not in "[{":
2629
+ return None
2630
+ try:
2631
+ parsed = t.cast("object", json.loads(stripped))
2632
+ except json.JSONDecodeError:
2633
+ return None
2634
+ if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
2635
+ return t.cast("JSONValue", parsed)
2636
+ return None
2637
+
2638
+
2639
+ def build_search_record(source: SourceHandle, candidate: MessageCandidate) -> SearchRecord:
2640
+ """Convert a parsed candidate into a normalized search record."""
2641
+ role = candidate.role.casefold() if candidate.role is not None else None
2642
+ kind: t.Literal["prompt", "history"] = "prompt" if role in USER_ROLES else "history"
2643
+ return SearchRecord(
2644
+ kind=kind,
2645
+ agent=source.agent,
2646
+ store=source.store,
2647
+ adapter_id=source.adapter_id,
2648
+ path=source.path,
2649
+ text=candidate.text,
2650
+ title=candidate.title,
2651
+ role=candidate.role,
2652
+ timestamp=candidate.timestamp,
2653
+ model=candidate.model,
2654
+ session_id=candidate.session_id,
2655
+ conversation_id=candidate.conversation_id,
2656
+ )
2657
+
2658
+
2659
+ def matches_record(record: SearchRecord, query: SearchQuery) -> bool:
2660
+ """Return whether a normalized record should be included."""
2661
+ if query.search_type == "prompts" and record.kind != "prompt":
2662
+ return False
2663
+ if query.search_type == "history" and record.kind != "history":
2664
+ return False
2665
+ return matches_text(build_search_haystack(record), query)
2666
+
2667
+
2668
+ def build_search_haystack(record: SearchRecord) -> str:
2669
+ """Build a searchable text surface for a record."""
2670
+ parts = [
2671
+ record.title or "",
2672
+ record.text,
2673
+ record.model or "",
2674
+ record.role or "",
2675
+ str(record.path),
2676
+ ]
2677
+ return "\n".join(part for part in parts if part)
2678
+
2679
+
2680
+ def matches_text(text: str, query: SearchQuery) -> bool:
2681
+ """Return whether ``text`` matches the query."""
2682
+ if not query.terms:
2683
+ return True
2684
+ if query.regex:
2685
+ flags = 0 if query.case_sensitive else re.IGNORECASE
2686
+ results = [re.search(term, text, flags) is not None for term in query.terms]
2687
+ else:
2688
+ haystack = text if query.case_sensitive else text.casefold()
2689
+ needles = (
2690
+ query.terms if query.case_sensitive else tuple(term.casefold() for term in query.terms)
2691
+ )
2692
+ results = [needle in haystack for needle in needles]
2693
+ return any(results) if query.any_term else all(results)
2694
+
2695
+
2696
+ def search_record_sort_key(record: SearchRecord) -> tuple[str, str, str]:
2697
+ """Return a stable sort key."""
2698
+ return (record.timestamp or "", record.agent, str(record.path))
2699
+
2700
+
2701
+ def record_dedupe_key(record: SearchRecord) -> tuple[str, str, str, str, str]:
2702
+ """Return the per-session dedupe key for a search record."""
2703
+ session_identity = record.session_id or record.conversation_id or str(record.path)
2704
+ return (
2705
+ record.kind,
2706
+ record.agent,
2707
+ record.store,
2708
+ session_identity,
2709
+ record.text,
2710
+ )
2711
+
2712
+
2713
+ def as_optional_str(value: object) -> str | None:
2714
+ """Return a stripped string when possible."""
2715
+ if isinstance(value, str):
2716
+ stripped = value.strip()
2717
+ return stripped or None
2718
+ return None
2719
+
2720
+
2721
+ def maybe_use_pydantic() -> tuple[
2722
+ t.Callable[[SearchRecord], dict[str, object]],
2723
+ t.Callable[[FindRecord], dict[str, object]],
2724
+ EnvelopeFactory,
2725
+ ]:
2726
+ """Return serializers backed by Pydantic when available."""
2727
+ pydantic_module = t.cast(
2728
+ "PydanticModule",
2729
+ t.cast("object", importlib.import_module("pydantic")),
2730
+ )
2731
+ search_adapter = pydantic_module.TypeAdapter(SearchRecordPayload)
2732
+ find_adapter = pydantic_module.TypeAdapter(FindRecordPayload)
2733
+ envelope_adapter = pydantic_module.TypeAdapter(EnvelopePayload)
2734
+
2735
+ def pydantic_search(record: SearchRecord) -> dict[str, object]:
2736
+ payload = search_adapter.validate_python(serialize_search_record(record))
2737
+ dumped = search_adapter.dump_python(payload, mode="json")
2738
+ return t.cast("dict[str, object]", dumped)
2739
+
2740
+ def pydantic_find(record: FindRecord) -> dict[str, object]:
2741
+ payload = find_adapter.validate_python(serialize_find_record(record))
2742
+ dumped = find_adapter.dump_python(payload, mode="json")
2743
+ return t.cast("dict[str, object]", dumped)
2744
+
2745
+ def pydantic_envelope(
2746
+ command: str,
2747
+ query_data: dict[str, object],
2748
+ results: list[dict[str, object]],
2749
+ ) -> dict[str, object]:
2750
+ payload = envelope_adapter.validate_python(
2751
+ build_envelope(command, query_data, results),
2752
+ )
2753
+ dumped = envelope_adapter.dump_python(payload, mode="json")
2754
+ return t.cast("dict[str, object]", dumped)
2755
+
2756
+ return pydantic_search, pydantic_find, pydantic_envelope
2757
+
2758
+
2759
+ def maybe_build_pydantic() -> tuple[
2760
+ t.Callable[[SearchRecord], dict[str, object]],
2761
+ t.Callable[[FindRecord], dict[str, object]],
2762
+ EnvelopeFactory,
2763
+ ]:
2764
+ """Return Pydantic serializers or plain fallbacks."""
2765
+ try:
2766
+ return maybe_use_pydantic()
2767
+ except ImportError:
2768
+ return (
2769
+ lambda record: t.cast("dict[str, object]", serialize_search_record(record)),
2770
+ lambda record: t.cast("dict[str, object]", serialize_find_record(record)),
2771
+ lambda command, query_data, results: t.cast(
2772
+ "dict[str, object]",
2773
+ build_envelope(command, query_data, results),
2774
+ ),
2775
+ )
2776
+
2777
+
2778
+ def serialize_search_record(record: SearchRecord) -> SearchRecordPayload:
2779
+ """Serialize a search record to a JSON-compatible mapping."""
2780
+ return {
2781
+ "schema_version": SCHEMA_VERSION,
2782
+ "kind": record.kind,
2783
+ "agent": record.agent,
2784
+ "store": record.store,
2785
+ "adapter_id": record.adapter_id,
2786
+ "path": format_display_path(record.path),
2787
+ "text": record.text,
2788
+ "title": record.title,
2789
+ "role": record.role,
2790
+ "timestamp": record.timestamp,
2791
+ "model": record.model,
2792
+ "session_id": record.session_id,
2793
+ "conversation_id": record.conversation_id,
2794
+ "metadata": record.metadata,
2795
+ }
2796
+
2797
+
2798
+ def serialize_find_record(record: FindRecord) -> FindRecordPayload:
2799
+ """Serialize a find record to a JSON-compatible mapping."""
2800
+ return {
2801
+ "schema_version": SCHEMA_VERSION,
2802
+ "kind": record.kind,
2803
+ "agent": record.agent,
2804
+ "store": record.store,
2805
+ "adapter_id": record.adapter_id,
2806
+ "path": format_display_path(record.path),
2807
+ "path_kind": record.path_kind,
2808
+ "metadata": record.metadata,
2809
+ }
2810
+
2811
+
2812
+ def serialize_source_handle(source: SourceHandle) -> SourceHandlePayload:
2813
+ """Serialize a source handle to a JSON-compatible mapping."""
2814
+ return {
2815
+ "schema_version": SCHEMA_VERSION,
2816
+ "agent": source.agent,
2817
+ "store": source.store,
2818
+ "adapter_id": source.adapter_id,
2819
+ "path": format_display_path(source.path),
2820
+ "path_kind": source.path_kind,
2821
+ "source_kind": source.source_kind,
2822
+ "search_root": (
2823
+ None
2824
+ if source.search_root is None
2825
+ else format_display_path(source.search_root, directory=True)
2826
+ ),
2827
+ "mtime_ns": source.mtime_ns,
2828
+ }
2829
+
2830
+
2831
+ def build_envelope(
2832
+ command: str,
2833
+ query_data: dict[str, object],
2834
+ results: list[dict[str, object]],
2835
+ ) -> EnvelopePayload:
2836
+ """Build a JSON envelope."""
2837
+ return {
2838
+ "schema_version": SCHEMA_VERSION,
2839
+ "command": command,
2840
+ "query": query_data,
2841
+ "results": results,
2842
+ }
2843
+
2844
+
2845
+ def print_search_results(records: list[SearchRecord], args: SearchArgs) -> None:
2846
+ """Emit search results in the requested format."""
2847
+ serialize_search, _, serialize_envelope = maybe_build_pydantic()
2848
+ query_data: dict[str, object] = {
2849
+ "terms": list(args.terms),
2850
+ "agents": list(args.agents),
2851
+ "type": args.search_type,
2852
+ "any": args.any_term,
2853
+ "regex": args.regex,
2854
+ "case_sensitive": args.case_sensitive,
2855
+ "limit": args.limit,
2856
+ }
2857
+ if args.output_mode == "json":
2858
+ payload = serialize_envelope(
2859
+ "search",
2860
+ query_data,
2861
+ [serialize_search(record) for record in records],
2862
+ )
2863
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
2864
+ return
2865
+ if args.output_mode == "ndjson":
2866
+ for record in records:
2867
+ print(json.dumps(serialize_search(record), ensure_ascii=False))
2868
+ return
2869
+ for index, record in enumerate(records, start=1):
2870
+ heading = f"[{index}] {record.agent} {record.kind} {record.store}"
2871
+ details = [record.timestamp, record.model, format_display_path(record.path)]
2872
+ print(heading)
2873
+ print(" | ".join(detail for detail in details if detail))
2874
+ if record.title:
2875
+ print(record.title)
2876
+ print()
2877
+ print(record.text)
2878
+ print()
2879
+
2880
+
2881
+ def search_progress_enabled(args: SearchArgs) -> bool:
2882
+ """Return whether search progress should be shown for ``args``."""
2883
+ human_output = args.output_mode in {"text", "ui"}
2884
+ return args.progress_mode == "always" or (args.progress_mode == "auto" and human_output)
2885
+
2886
+
2887
+ def should_enable_answer_now(
2888
+ args: SearchArgs,
2889
+ *,
2890
+ stdin: t.TextIO | None = None,
2891
+ stderr: t.TextIO | None = None,
2892
+ ) -> bool:
2893
+ """Return whether Enter should request a partial answer for this search."""
2894
+ input_stream = stdin if stdin is not None else sys.stdin
2895
+ error_stream = stderr if stderr is not None else sys.stderr
2896
+ return (
2897
+ args.output_mode == "text"
2898
+ and search_progress_enabled(args)
2899
+ and bool(getattr(input_stream, "isatty", lambda: False)())
2900
+ and bool(getattr(error_stream, "isatty", lambda: False)())
2901
+ )
2902
+
2903
+
2904
+ def build_search_progress(args: SearchArgs, *, answer_now_hint: bool = False) -> SearchProgress:
2905
+ """Build the progress reporter for a search invocation."""
2906
+ enabled = search_progress_enabled(args)
2907
+ if not enabled:
2908
+ return noop_search_progress()
2909
+ return ConsoleSearchProgress(
2910
+ enabled=True,
2911
+ color_mode=args.color_mode,
2912
+ answer_now_hint=answer_now_hint,
2913
+ )
2914
+
2915
+
2916
+ def print_find_results(records: list[FindRecord], args: FindArgs) -> None:
2917
+ """Emit find results in the requested format."""
2918
+ _, serialize_find, serialize_envelope = maybe_build_pydantic()
2919
+ query_data: dict[str, object] = {
2920
+ "pattern": args.pattern,
2921
+ "agents": list(args.agents),
2922
+ "limit": args.limit,
2923
+ }
2924
+ if args.output_mode == "json":
2925
+ payload = serialize_envelope(
2926
+ "find",
2927
+ query_data,
2928
+ [serialize_find(record) for record in records],
2929
+ )
2930
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
2931
+ return
2932
+ if args.output_mode == "ndjson":
2933
+ for record in records:
2934
+ print(json.dumps(serialize_find(record), ensure_ascii=False))
2935
+ return
2936
+ for record in records:
2937
+ print(f"{record.agent} {record.path_kind} {record.store}")
2938
+ print(format_display_path(record.path))
2939
+ print()
2940
+
2941
+
2942
+ def run_ui(records: list[SearchRecord]) -> None:
2943
+ """Launch a small read-only Textual explorer."""
2944
+ try:
2945
+ textual_app = t.cast(
2946
+ "TextualAppModule",
2947
+ t.cast("object", importlib.import_module("textual.app")),
2948
+ )
2949
+ textual_containers = t.cast(
2950
+ "TextualContainersModule",
2951
+ t.cast("object", importlib.import_module("textual.containers")),
2952
+ )
2953
+ textual_widgets = t.cast(
2954
+ "TextualWidgetsModule",
2955
+ t.cast("object", importlib.import_module("textual.widgets")),
2956
+ )
2957
+ except ImportError as error:
2958
+ msg = "Textual is required for --ui. Run with `uv run py/agentgrep.py ... --ui`."
2959
+ raise RuntimeError(msg) from error
2960
+
2961
+ app_type = textual_app.App
2962
+ horizontal = textual_containers.Horizontal
2963
+ vertical = textual_containers.Vertical
2964
+ data_table_type = textual_widgets.DataTable
2965
+ footer = textual_widgets.Footer
2966
+ header = textual_widgets.Header
2967
+ input_widget = textual_widgets.Input
2968
+ static_type = textual_widgets.Static
2969
+
2970
+ class AgentGrepApp(app_type): # ty: ignore[unsupported-base]
2971
+ """Read-only explorer for normalized search records."""
2972
+
2973
+ CSS: t.ClassVar[str] = """
2974
+ Screen {
2975
+ layout: vertical;
2976
+ }
2977
+ #body {
2978
+ height: 1fr;
2979
+ }
2980
+ #detail {
2981
+ border: round $accent;
2982
+ padding: 1 2;
2983
+ overflow-y: auto;
2984
+ }
2985
+ DataTable {
2986
+ height: 1fr;
2987
+ }
2988
+ """
2989
+ BINDINGS: t.ClassVar[list[tuple[str, str, str]]] = [("q", "quit", "Quit")]
2990
+ all_records: list[SearchRecord]
2991
+ filtered_records: list[SearchRecord]
2992
+
2993
+ def __init__(self, initial_records: list[SearchRecord]) -> None:
2994
+ super().__init__()
2995
+ self.all_records = initial_records
2996
+ self.filtered_records = initial_records
2997
+
2998
+ def compose(self) -> cabc.Iterator[object]:
2999
+ yield header()
3000
+ yield input_widget(placeholder="Filter by keyword", id="filter")
3001
+ with horizontal(id="body"):
3002
+ yield data_table_type(id="results")
3003
+ with vertical():
3004
+ yield static_type("Select a result to inspect full text.", id="detail")
3005
+ yield footer()
3006
+
3007
+ def on_mount(self) -> None:
3008
+ app = t.cast("QueryAppLike", t.cast("object", self))
3009
+ table = t.cast("DataTableLike", app.query_one(data_table_type))
3010
+ table.cursor_type = "row"
3011
+ table.add_columns("Agent", "Kind", "Timestamp", "Title", "Path")
3012
+ self.refresh_table()
3013
+
3014
+ def on_input_changed(self, event: object) -> None:
3015
+ value = str(getattr(event, "value", "")).strip().casefold()
3016
+ self.filtered_records = (
3017
+ self.all_records
3018
+ if not value
3019
+ else [
3020
+ record
3021
+ for record in self.all_records
3022
+ if value in build_search_haystack(record).casefold()
3023
+ ]
3024
+ )
3025
+ self.refresh_table()
3026
+
3027
+ def refresh_table(self) -> None:
3028
+ app = t.cast("QueryAppLike", t.cast("object", self))
3029
+ table = t.cast("DataTableLike", app.query_one(data_table_type))
3030
+ table.clear()
3031
+ for record in self.filtered_records:
3032
+ table.add_row(
3033
+ record.agent,
3034
+ record.kind,
3035
+ record.timestamp or "",
3036
+ record.title or "",
3037
+ format_display_path(record.path),
3038
+ key=str(id(record)),
3039
+ )
3040
+ if self.filtered_records:
3041
+ self.show_detail(self.filtered_records[0])
3042
+ else:
3043
+ detail = t.cast("StaticLike", app.query_one("#detail", static_type))
3044
+ detail.update("No results.")
3045
+
3046
+ def on_data_table_row_highlighted(self, event: object) -> None:
3047
+ row_index = int(getattr(event, "cursor_row", -1))
3048
+ if 0 <= row_index < len(self.filtered_records):
3049
+ self.show_detail(self.filtered_records[row_index])
3050
+
3051
+ def show_detail(self, record: SearchRecord) -> None:
3052
+ details = [
3053
+ f"Agent: {record.agent}",
3054
+ f"Kind: {record.kind}",
3055
+ f"Store: {record.store}",
3056
+ f"Adapter: {record.adapter_id}",
3057
+ f"Timestamp: {record.timestamp or 'unknown'}",
3058
+ f"Model: {record.model or 'unknown'}",
3059
+ f"Path: {format_display_path(record.path)}",
3060
+ "",
3061
+ record.text,
3062
+ ]
3063
+ app = t.cast("QueryAppLike", t.cast("object", self))
3064
+ detail = t.cast("StaticLike", app.query_one("#detail", static_type))
3065
+ detail.update("\n".join(details))
3066
+
3067
+ app = t.cast("RunnableAppLike", t.cast("object", AgentGrepApp(records)))
3068
+ app.run()
3069
+
3070
+
3071
+ def run_search_command(args: SearchArgs) -> int:
3072
+ """Execute ``agentgrep search``."""
3073
+ if not args.terms and args.output_mode != "ui":
3074
+ msg = "search requires at least one term unless --ui is used"
3075
+ raise SystemExit(msg)
3076
+ query = make_search_query(args)
3077
+ answer_now_enabled = should_enable_answer_now(args)
3078
+ control = SearchControl()
3079
+ listener = AnswerNowInputListener(control) if answer_now_enabled else None
3080
+ progress = build_search_progress(args, answer_now_hint=answer_now_enabled)
3081
+ if listener is not None:
3082
+ listener.start()
3083
+ try:
3084
+ records = run_search_query(
3085
+ pathlib.Path.home(),
3086
+ query,
3087
+ progress=progress,
3088
+ control=control,
3089
+ )
3090
+ finally:
3091
+ if listener is not None:
3092
+ listener.stop()
3093
+ if args.output_mode == "ui":
3094
+ run_ui(records)
3095
+ return 0
3096
+ print_search_results(records, args)
3097
+ if records:
3098
+ return 0
3099
+ if args.output_mode == "text":
3100
+ print("No matches found.", file=sys.stderr)
3101
+ return 1
3102
+
3103
+
3104
+ def run_find_command(args: FindArgs) -> int:
3105
+ """Execute ``agentgrep find``."""
3106
+ records = run_find_query(
3107
+ pathlib.Path.home(),
3108
+ args.agents,
3109
+ pattern=args.pattern,
3110
+ limit=args.limit,
3111
+ )
3112
+ print_find_results(records, args)
3113
+ if records:
3114
+ return 0
3115
+ if args.output_mode == "text":
3116
+ print("No matching sources found.", file=sys.stderr)
3117
+ return 1
3118
+
3119
+
3120
+ def _exit_on_sigint() -> t.NoReturn:
3121
+ """Terminate with Ctrl-C signal semantics where the platform supports them."""
3122
+ if sys.platform == "win32":
3123
+ raise SystemExit(130)
3124
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
3125
+ signal.signal(signal.SIGINT, signal.SIG_DFL)
3126
+ signal.raise_signal(signal.SIGINT)
3127
+ raise SystemExit(130) # pragma: no cover
3128
+
3129
+
3130
+ def _write_interrupt_notice() -> None:
3131
+ with contextlib.suppress(OSError, ValueError):
3132
+ sys.stderr.write("Interrupted by user.\n")
3133
+ sys.stderr.flush()
3134
+
3135
+
3136
+ def main(argv: cabc.Sequence[str] | None = None) -> int:
3137
+ """Run the CLI."""
3138
+ try:
3139
+ parsed = parse_args(argv)
3140
+ if parsed is None:
3141
+ return 0
3142
+ if isinstance(parsed, SearchArgs):
3143
+ return run_search_command(parsed)
3144
+ return run_find_command(parsed)
3145
+ except KeyboardInterrupt:
3146
+ _write_interrupt_notice()
3147
+ _exit_on_sigint()
3148
+
3149
+
3150
+ if __name__ == "__main__":
3151
+ raise SystemExit(main())