rlmgrep 0.1.18__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlmgrep/__init__.py +1 -1
- rlmgrep/cli.py +153 -8
- rlmgrep/ingest.py +30 -6
- {rlmgrep-0.1.18.dist-info → rlmgrep-0.1.24.dist-info}/METADATA +3 -2
- rlmgrep-0.1.24.dist-info/RECORD +14 -0
- rlmgrep-0.1.18.dist-info/RECORD +0 -14
- {rlmgrep-0.1.18.dist-info → rlmgrep-0.1.24.dist-info}/WHEEL +0 -0
- {rlmgrep-0.1.18.dist-info → rlmgrep-0.1.24.dist-info}/entry_points.txt +0 -0
- {rlmgrep-0.1.18.dist-info → rlmgrep-0.1.24.dist-info}/top_level.txt +0 -0
rlmgrep/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
__all__ = ["__version__"]
|
|
2
|
-
__version__ = "0.1.
|
|
2
|
+
__version__ = "0.1.24"
|
rlmgrep/cli.py
CHANGED
|
@@ -1,13 +1,26 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import argparse
|
|
4
|
+
import logging
|
|
4
5
|
import os
|
|
5
|
-
import sys
|
|
6
6
|
import shutil
|
|
7
7
|
import subprocess
|
|
8
|
+
import sys
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
|
|
10
11
|
import dspy
|
|
12
|
+
from rich import box
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.panel import Panel
|
|
15
|
+
from rich.progress import (
|
|
16
|
+
BarColumn,
|
|
17
|
+
Progress,
|
|
18
|
+
SpinnerColumn,
|
|
19
|
+
TaskProgressColumn,
|
|
20
|
+
TextColumn,
|
|
21
|
+
TimeElapsedColumn,
|
|
22
|
+
)
|
|
23
|
+
from rich.text import Text
|
|
11
24
|
from . import __version__
|
|
12
25
|
from .config import ensure_default_config, load_config
|
|
13
26
|
from .file_map import build_file_map
|
|
@@ -26,6 +39,80 @@ def _warn(msg: str) -> None:
|
|
|
26
39
|
print(f"rlmgrep: {msg}", file=sys.stderr)
|
|
27
40
|
|
|
28
41
|
|
|
42
|
+
def _console() -> Console:
|
|
43
|
+
use_color = sys.stderr.isatty() and not os.getenv("NO_COLOR")
|
|
44
|
+
return Console(stderr=True, force_terminal=use_color, color_system="auto")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class _RLMIterationHandler(logging.Handler):
|
|
48
|
+
def __init__(self, console: Console) -> None:
|
|
49
|
+
super().__init__(level=logging.INFO)
|
|
50
|
+
self._console = console
|
|
51
|
+
self._title: str | None = None
|
|
52
|
+
self._lines: list[str] = []
|
|
53
|
+
|
|
54
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
55
|
+
msg = record.getMessage()
|
|
56
|
+
if "RLM iteration" in msg:
|
|
57
|
+
self.flush_panel()
|
|
58
|
+
self._title = "RLM iteration"
|
|
59
|
+
self._lines = [msg]
|
|
60
|
+
return
|
|
61
|
+
if self._title is None:
|
|
62
|
+
self._title = "RLM output"
|
|
63
|
+
self._lines.append(msg)
|
|
64
|
+
|
|
65
|
+
def flush_panel(self) -> None:
|
|
66
|
+
if self._title is None:
|
|
67
|
+
return
|
|
68
|
+
body = "\n".join(self._lines).strip() or " "
|
|
69
|
+
self._console.print(
|
|
70
|
+
Panel(
|
|
71
|
+
Text(body),
|
|
72
|
+
title=self._title,
|
|
73
|
+
border_style="blue",
|
|
74
|
+
box=box.ROUNDED,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
self._title = None
|
|
78
|
+
self._lines = []
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _setup_verbose_logging(console: Console) -> _RLMIterationHandler:
|
|
82
|
+
logger = logging.getLogger("dspy.predict.rlm")
|
|
83
|
+
handler = _RLMIterationHandler(console)
|
|
84
|
+
logger.addHandler(handler)
|
|
85
|
+
logger.setLevel(logging.INFO)
|
|
86
|
+
logger.propagate = False
|
|
87
|
+
return handler
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _print_answer(console: Console, answer: str) -> None:
|
|
91
|
+
text = Text(answer.strip())
|
|
92
|
+
panel = Panel(
|
|
93
|
+
text,
|
|
94
|
+
title="Answer",
|
|
95
|
+
border_style="cyan",
|
|
96
|
+
box=box.ROUNDED,
|
|
97
|
+
)
|
|
98
|
+
console.print(panel)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _print_matches(console: Console, lines: list[str], use_color: bool) -> None:
|
|
102
|
+
body = "\n".join(lines).strip()
|
|
103
|
+
if not body:
|
|
104
|
+
body = "No matches"
|
|
105
|
+
text = Text.from_ansi(body) if use_color else Text(body)
|
|
106
|
+
console.print(
|
|
107
|
+
Panel(
|
|
108
|
+
text,
|
|
109
|
+
title="Matches",
|
|
110
|
+
border_style="cyan",
|
|
111
|
+
box=box.ROUNDED,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
29
116
|
def _confirm_over_limit(count: int, threshold: int) -> bool:
|
|
30
117
|
prompt = (
|
|
31
118
|
f"rlmgrep: {count} files to load (over {threshold}). Continue? [y/N] "
|
|
@@ -450,6 +537,9 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
450
537
|
for w in config_warnings:
|
|
451
538
|
_warn(w)
|
|
452
539
|
|
|
540
|
+
console = _console()
|
|
541
|
+
progress = None
|
|
542
|
+
|
|
453
543
|
# Resolve input corpus.
|
|
454
544
|
globs = _split_list(args.globs)
|
|
455
545
|
type_names = _split_list(args.types)
|
|
@@ -511,7 +601,29 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
511
601
|
extra_ignores.extend(_global_ignore_paths(ignore_root))
|
|
512
602
|
ignore_spec = build_ignore_spec(ignore_root, extra_paths=extra_ignores)
|
|
513
603
|
|
|
514
|
-
|
|
604
|
+
scan_task = None
|
|
605
|
+
load_task = None
|
|
606
|
+
scan_count = 0
|
|
607
|
+
if sys.stderr.isatty():
|
|
608
|
+
progress = Progress(
|
|
609
|
+
SpinnerColumn(),
|
|
610
|
+
TextColumn("{task.description}"),
|
|
611
|
+
BarColumn(),
|
|
612
|
+
TaskProgressColumn(),
|
|
613
|
+
TimeElapsedColumn(),
|
|
614
|
+
console=console,
|
|
615
|
+
transient=False,
|
|
616
|
+
)
|
|
617
|
+
progress.start()
|
|
618
|
+
scan_task = progress.add_task("Scanning files", total=None)
|
|
619
|
+
|
|
620
|
+
def _scan_update(count: int) -> None:
|
|
621
|
+
nonlocal scan_count
|
|
622
|
+
scan_count = count
|
|
623
|
+
if progress is not None and scan_task is not None:
|
|
624
|
+
progress.update(scan_task, completed=count)
|
|
625
|
+
|
|
626
|
+
candidates, scanned = collect_candidates(
|
|
515
627
|
input_paths,
|
|
516
628
|
cwd=cwd,
|
|
517
629
|
recursive=args.recursive,
|
|
@@ -520,12 +632,21 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
520
632
|
include_hidden=args.hidden,
|
|
521
633
|
ignore_spec=ignore_spec,
|
|
522
634
|
ignore_root=ignore_root,
|
|
635
|
+
scan_progress=_scan_update if progress is not None else None,
|
|
523
636
|
)
|
|
637
|
+
if progress is not None and scan_task is not None:
|
|
638
|
+
progress.update(
|
|
639
|
+
scan_task,
|
|
640
|
+
total=scanned or scan_count,
|
|
641
|
+
completed=scanned or scan_count,
|
|
642
|
+
)
|
|
524
643
|
candidate_count = len(candidates)
|
|
525
644
|
if hard_max is not None and candidate_count > hard_max:
|
|
526
645
|
_warn(
|
|
527
646
|
f"{candidate_count} files to load (over {hard_max}); aborting"
|
|
528
647
|
)
|
|
648
|
+
if progress is not None:
|
|
649
|
+
progress.stop()
|
|
529
650
|
return 2
|
|
530
651
|
if (
|
|
531
652
|
warn_threshold is not None
|
|
@@ -533,7 +654,19 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
533
654
|
and not args.yes
|
|
534
655
|
):
|
|
535
656
|
if not _confirm_over_limit(candidate_count, warn_threshold):
|
|
657
|
+
if progress is not None:
|
|
658
|
+
progress.stop()
|
|
536
659
|
return 2
|
|
660
|
+
if progress is not None:
|
|
661
|
+
load_task = progress.add_task(
|
|
662
|
+
"Loading files",
|
|
663
|
+
total=candidate_count,
|
|
664
|
+
completed=0,
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
def _load_update(done: int, total: int) -> None:
|
|
668
|
+
if progress is not None and load_task is not None:
|
|
669
|
+
progress.update(load_task, completed=done, total=total)
|
|
537
670
|
|
|
538
671
|
files, warnings = load_files(
|
|
539
672
|
candidates,
|
|
@@ -543,7 +676,10 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
543
676
|
enable_audio=md_enable_audio,
|
|
544
677
|
audio_transcriber=audio_transcriber,
|
|
545
678
|
binary_as_text=args.binary_as_text,
|
|
679
|
+
progress=_load_update if progress is not None else None,
|
|
546
680
|
)
|
|
681
|
+
if progress is not None:
|
|
682
|
+
progress.stop()
|
|
547
683
|
|
|
548
684
|
for w in warnings:
|
|
549
685
|
_warn(w)
|
|
@@ -620,6 +756,10 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
620
756
|
|
|
621
757
|
directory = {k: v.text for k, v in files.items()}
|
|
622
758
|
|
|
759
|
+
verbose_handler = None
|
|
760
|
+
if args.verbose:
|
|
761
|
+
verbose_handler = _setup_verbose_logging(console)
|
|
762
|
+
|
|
623
763
|
try:
|
|
624
764
|
proposed, answer = run_rlm(
|
|
625
765
|
directory=directory,
|
|
@@ -634,6 +774,9 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
634
774
|
except Exception as exc: # pragma: no cover - defensive
|
|
635
775
|
_warn(f"RLM failure: {exc}")
|
|
636
776
|
return 2
|
|
777
|
+
finally:
|
|
778
|
+
if verbose_handler is not None:
|
|
779
|
+
verbose_handler.flush_panel()
|
|
637
780
|
|
|
638
781
|
verified, dropped = verify_matches(proposed, files)
|
|
639
782
|
if dropped:
|
|
@@ -660,13 +803,15 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
660
803
|
heading=True,
|
|
661
804
|
)
|
|
662
805
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
print("--")
|
|
806
|
+
stdout_console = Console(force_terminal=use_color, color_system="auto")
|
|
807
|
+
if args.answer and answer:
|
|
808
|
+
_print_answer(console, answer)
|
|
667
809
|
|
|
668
|
-
|
|
669
|
-
|
|
810
|
+
if use_color and sys.stdout.isatty():
|
|
811
|
+
_print_matches(stdout_console, output_lines, use_color=use_color)
|
|
812
|
+
else:
|
|
813
|
+
for line in output_lines:
|
|
814
|
+
print(line)
|
|
670
815
|
|
|
671
816
|
total_matches = sum(len(lines) for lines in verified.values())
|
|
672
817
|
if total_matches > 0:
|
rlmgrep/ingest.py
CHANGED
|
@@ -147,20 +147,33 @@ def _load_file(
|
|
|
147
147
|
return None, None, str(exc)
|
|
148
148
|
|
|
149
149
|
|
|
150
|
-
def collect_files(
|
|
150
|
+
def collect_files(
|
|
151
|
+
paths: Iterable[str],
|
|
152
|
+
recursive: bool = True,
|
|
153
|
+
progress: Callable[[int], None] | None = None,
|
|
154
|
+
) -> list[Path]:
|
|
151
155
|
files: list[Path] = []
|
|
156
|
+
scanned = 0
|
|
152
157
|
for raw in paths:
|
|
153
158
|
p = Path(raw)
|
|
154
159
|
if not p.exists():
|
|
155
160
|
continue
|
|
156
161
|
if p.is_dir():
|
|
157
162
|
if recursive:
|
|
158
|
-
|
|
163
|
+
for fp in p.rglob("*"):
|
|
164
|
+
if fp.is_file():
|
|
165
|
+
files.append(fp)
|
|
166
|
+
scanned += 1
|
|
167
|
+
if progress is not None:
|
|
168
|
+
progress(scanned)
|
|
159
169
|
else:
|
|
160
170
|
# No recursion: ignore directories.
|
|
161
171
|
continue
|
|
162
172
|
elif p.is_file():
|
|
163
173
|
files.append(p)
|
|
174
|
+
scanned += 1
|
|
175
|
+
if progress is not None:
|
|
176
|
+
progress(scanned)
|
|
164
177
|
return files
|
|
165
178
|
|
|
166
179
|
|
|
@@ -344,8 +357,10 @@ def collect_candidates(
|
|
|
344
357
|
include_hidden: bool = False,
|
|
345
358
|
ignore_spec: "pathspec.PathSpec | None" = None,
|
|
346
359
|
ignore_root: Path | None = None,
|
|
347
|
-
|
|
348
|
-
|
|
360
|
+
scan_progress: Callable[[int], None] | None = None,
|
|
361
|
+
) -> tuple[list[Path], int]:
|
|
362
|
+
files = collect_files(paths, recursive=recursive, progress=scan_progress)
|
|
363
|
+
scanned = len(files)
|
|
349
364
|
explicit_files: set[Path] = set()
|
|
350
365
|
ignore_root_resolved: Path | None = None
|
|
351
366
|
if ignore_root is not None:
|
|
@@ -384,7 +399,7 @@ def collect_candidates(
|
|
|
384
399
|
continue
|
|
385
400
|
|
|
386
401
|
candidates.append(fp)
|
|
387
|
-
return candidates
|
|
402
|
+
return candidates, scanned
|
|
388
403
|
|
|
389
404
|
|
|
390
405
|
def load_files(
|
|
@@ -395,13 +410,16 @@ def load_files(
|
|
|
395
410
|
enable_audio: bool = False,
|
|
396
411
|
audio_transcriber: Callable[[Path], str] | None = None,
|
|
397
412
|
binary_as_text: bool = False,
|
|
413
|
+
progress: Callable[[int, int], None] | None = None,
|
|
398
414
|
) -> tuple[dict[str, FileRecord], list[str]]:
|
|
399
415
|
records: dict[str, FileRecord] = {}
|
|
400
416
|
warnings: list[str] = []
|
|
401
417
|
image_convert_count = 0
|
|
402
418
|
audio_convert_count = 0
|
|
403
419
|
|
|
404
|
-
|
|
420
|
+
candidate_list = list(candidates)
|
|
421
|
+
total = len(candidate_list)
|
|
422
|
+
for idx, fp in enumerate(candidate_list, start=1):
|
|
405
423
|
try:
|
|
406
424
|
key = fp.relative_to(cwd).as_posix()
|
|
407
425
|
except ValueError:
|
|
@@ -432,15 +450,21 @@ def load_files(
|
|
|
432
450
|
}
|
|
433
451
|
if err not in silent_errors and "No converter attempted a conversion" not in err:
|
|
434
452
|
warnings.append(f"skip {fp}: {err}")
|
|
453
|
+
if progress is not None:
|
|
454
|
+
progress(idx, total)
|
|
435
455
|
continue
|
|
436
456
|
if text is None:
|
|
437
457
|
warnings.append(f"skip {fp}: unreadable")
|
|
458
|
+
if progress is not None:
|
|
459
|
+
progress(idx, total)
|
|
438
460
|
continue
|
|
439
461
|
|
|
440
462
|
lines = text.split("\n")
|
|
441
463
|
if page_map is not None and len(page_map) != len(lines):
|
|
442
464
|
page_map = None
|
|
443
465
|
records[key] = FileRecord(path=key, text=text, lines=lines, page_map=page_map)
|
|
466
|
+
if progress is not None:
|
|
467
|
+
progress(idx, total)
|
|
444
468
|
|
|
445
469
|
if image_convert_count > 5:
|
|
446
470
|
warnings.append(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.24
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -10,6 +10,7 @@ Requires-Dist: dspy>=3.1.1
|
|
|
10
10
|
Requires-Dist: markitdown[all]>=0.1.4
|
|
11
11
|
Requires-Dist: pathspec>=0.12.1
|
|
12
12
|
Requires-Dist: pypdf>=4.0.0
|
|
13
|
+
Requires-Dist: rich>=13.7.0
|
|
13
14
|
|
|
14
15
|
# rlmgrep
|
|
15
16
|
|
|
@@ -18,7 +19,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
|
|
|
18
19
|
## Quickstart
|
|
19
20
|
|
|
20
21
|
```sh
|
|
21
|
-
uv tool install rlmgrep
|
|
22
|
+
uv tool install --python 3.11 rlmgrep
|
|
22
23
|
# or from GitHub:
|
|
23
24
|
# uv tool install git+https://github.com/halfprice06/rlmgrep.git
|
|
24
25
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
rlmgrep/__init__.py,sha256=AQHt4tNjQ9ZT8Ifjp-2cgmKUa_wBZ0HPWKSqVqYqMmg,49
|
|
2
|
+
rlmgrep/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
+
rlmgrep/cli.py,sha256=_WMxPi44qLaA-iJ_TqQzD8mVvwEQigQRNgyMaEYMhVc,27760
|
|
4
|
+
rlmgrep/config.py,sha256=u1iz-nI8dj-dZETbpIki3RQefHJEyi5oE5zE4_IR8kg,2399
|
|
5
|
+
rlmgrep/file_map.py,sha256=x2Ri1wzK8_87GUorsAV01K_nYLZcv30yIquDeTCcdEw,876
|
|
6
|
+
rlmgrep/ingest.py,sha256=906JUwWRC0XDoYRXs4-XdV3fay8mQc324l0suQLyS-k,13738
|
|
7
|
+
rlmgrep/interpreter.py,sha256=s_nMRxLlAU9C0JmUzUBW5NbVbuH67doVWF54K54STlA,2478
|
|
8
|
+
rlmgrep/render.py,sha256=mCTT6yuKNv7HJ46LzOyLkCbyBedCWSNd7UeubyLXcyM,3356
|
|
9
|
+
rlmgrep/rlm.py,sha256=i3rCTp8OABByF60Un5gO7265gaW4spwU0OFKIz4surg,5750
|
|
10
|
+
rlmgrep-0.1.24.dist-info/METADATA,sha256=tp03IXFCBKnSJjPGn_vNmqXxRgor7FBF0z33pyfXPq8,8011
|
|
11
|
+
rlmgrep-0.1.24.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
+
rlmgrep-0.1.24.dist-info/entry_points.txt,sha256=UV6QkEbkwBO1JJ53mm84_n35tVyOczPvOQ14ga7vrCI,45
|
|
13
|
+
rlmgrep-0.1.24.dist-info/top_level.txt,sha256=gTujSRsO58c80eN7aRH2cfe51FHxx8LJ1w1Y2YlHti0,8
|
|
14
|
+
rlmgrep-0.1.24.dist-info/RECORD,,
|
rlmgrep-0.1.18.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
rlmgrep/__init__.py,sha256=4P4PJ704cude_tDknqGG0LoqrFcJS7Bpzjp_q0uTPNg,49
|
|
2
|
-
rlmgrep/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
-
rlmgrep/cli.py,sha256=DbA8WDqkUrWYV5lItA_mlYB9v0H9ZOPm8JjZLIX1Y7E,23291
|
|
4
|
-
rlmgrep/config.py,sha256=u1iz-nI8dj-dZETbpIki3RQefHJEyi5oE5zE4_IR8kg,2399
|
|
5
|
-
rlmgrep/file_map.py,sha256=x2Ri1wzK8_87GUorsAV01K_nYLZcv30yIquDeTCcdEw,876
|
|
6
|
-
rlmgrep/ingest.py,sha256=3qPJ-FZfWpxwTJBSj_EPWNDCdDDgNgZIGyCTXyXOZfk,12891
|
|
7
|
-
rlmgrep/interpreter.py,sha256=s_nMRxLlAU9C0JmUzUBW5NbVbuH67doVWF54K54STlA,2478
|
|
8
|
-
rlmgrep/render.py,sha256=mCTT6yuKNv7HJ46LzOyLkCbyBedCWSNd7UeubyLXcyM,3356
|
|
9
|
-
rlmgrep/rlm.py,sha256=i3rCTp8OABByF60Un5gO7265gaW4spwU0OFKIz4surg,5750
|
|
10
|
-
rlmgrep-0.1.18.dist-info/METADATA,sha256=6doosFWzRkxGxbKynYyUMMmF6ih0rs8BWGdi0BMeCFs,7969
|
|
11
|
-
rlmgrep-0.1.18.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
-
rlmgrep-0.1.18.dist-info/entry_points.txt,sha256=UV6QkEbkwBO1JJ53mm84_n35tVyOczPvOQ14ga7vrCI,45
|
|
13
|
-
rlmgrep-0.1.18.dist-info/top_level.txt,sha256=gTujSRsO58c80eN7aRH2cfe51FHxx8LJ1w1Y2YlHti0,8
|
|
14
|
-
rlmgrep-0.1.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|