rlmgrep 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlmgrep/__init__.py +1 -1
- rlmgrep/cli.py +53 -15
- rlmgrep/config.py +1 -1
- rlmgrep/render.py +35 -17
- rlmgrep/rlm.py +19 -1
- {rlmgrep-0.1.3.dist-info → rlmgrep-0.1.5.dist-info}/METADATA +9 -11
- rlmgrep-0.1.5.dist-info/RECORD +14 -0
- rlmgrep-0.1.3.dist-info/RECORD +0 -14
- {rlmgrep-0.1.3.dist-info → rlmgrep-0.1.5.dist-info}/WHEEL +0 -0
- {rlmgrep-0.1.3.dist-info → rlmgrep-0.1.5.dist-info}/entry_points.txt +0 -0
- {rlmgrep-0.1.3.dist-info → rlmgrep-0.1.5.dist-info}/top_level.txt +0 -0
rlmgrep/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
__all__ = ["__version__"]
|
|
2
|
-
__version__ = "0.1.
|
|
2
|
+
__version__ = "0.1.5"
|
rlmgrep/cli.py
CHANGED
|
@@ -72,11 +72,10 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
|
72
72
|
parser.add_argument("pattern", nargs="?", help="Query string (interpreted by RLM)")
|
|
73
73
|
parser.add_argument("paths", nargs="*", help="Files or directories")
|
|
74
74
|
|
|
75
|
-
parser.add_argument("-n", dest="line_numbers", action="store_true", help="Show line numbers")
|
|
76
|
-
parser.add_argument("-H", dest="with_filename", action="store_true", help="Always show filenames")
|
|
75
|
+
parser.add_argument("-n", dest="line_numbers", action="store_true", help="Show line numbers (default)")
|
|
77
76
|
parser.add_argument("-r", dest="recursive", action="store_true", help="Recursive (directories are searched recursively by default)")
|
|
78
77
|
parser.add_argument("--no-recursive", dest="recursive", action="store_false", help="Do not recurse directories")
|
|
79
|
-
parser.set_defaults(recursive=True)
|
|
78
|
+
parser.set_defaults(recursive=True, line_numbers=True)
|
|
80
79
|
|
|
81
80
|
parser.add_argument("-C", dest="context", type=int, default=0, help="Context lines before/after")
|
|
82
81
|
parser.add_argument("-A", dest="after", type=int, default=None, help="Context lines after")
|
|
@@ -239,6 +238,33 @@ def _build_markitdown(config: dict, warnings: list[str]):
|
|
|
239
238
|
kwargs["base_url"] = api_base
|
|
240
239
|
return OpenAI(**kwargs)
|
|
241
240
|
|
|
241
|
+
class _LiteLLMClient:
|
|
242
|
+
def __init__(self, api_key: str | None, api_base: str | None):
|
|
243
|
+
try:
|
|
244
|
+
import litellm # type: ignore
|
|
245
|
+
except Exception as exc:
|
|
246
|
+
raise RuntimeError("litellm not available") from exc
|
|
247
|
+
self._litellm = litellm
|
|
248
|
+
self._api_key = api_key
|
|
249
|
+
self._api_base = api_base
|
|
250
|
+
self.chat = self._Chat(self)
|
|
251
|
+
|
|
252
|
+
class _Chat:
|
|
253
|
+
def __init__(self, parent):
|
|
254
|
+
self.completions = parent._Completions(parent)
|
|
255
|
+
|
|
256
|
+
class _Completions:
|
|
257
|
+
def __init__(self, parent):
|
|
258
|
+
self._parent = parent
|
|
259
|
+
|
|
260
|
+
def create(self, model: str, messages):
|
|
261
|
+
kwargs = {"model": model, "messages": messages}
|
|
262
|
+
if self._parent._api_key:
|
|
263
|
+
kwargs["api_key"] = self._parent._api_key
|
|
264
|
+
if self._parent._api_base:
|
|
265
|
+
kwargs["api_base"] = self._parent._api_base
|
|
266
|
+
return self._parent._litellm.completion(**kwargs)
|
|
267
|
+
|
|
242
268
|
llm_client = None
|
|
243
269
|
if enable_images:
|
|
244
270
|
if not llm_model:
|
|
@@ -246,18 +272,27 @@ def _build_markitdown(config: dict, warnings: list[str]):
|
|
|
246
272
|
"markitdown_enable_images set but markitdown_image_llm_model missing; skipping images"
|
|
247
273
|
)
|
|
248
274
|
enable_images = False
|
|
249
|
-
elif llm_provider != "openai":
|
|
250
|
-
warnings.append(
|
|
251
|
-
f"markitdown image LLM provider '{llm_provider}' not supported; skipping images"
|
|
252
|
-
)
|
|
253
|
-
enable_images = False
|
|
254
275
|
else:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
276
|
+
if llm_provider == "openai":
|
|
277
|
+
llm_client = _openai_client(
|
|
278
|
+
llm_api_key,
|
|
279
|
+
llm_api_base,
|
|
280
|
+
"openai package missing; skipping image conversion",
|
|
281
|
+
)
|
|
282
|
+
if llm_client is None:
|
|
283
|
+
enable_images = False
|
|
284
|
+
elif llm_provider in {"gemini", "anthropic"}:
|
|
285
|
+
try:
|
|
286
|
+
llm_client = _LiteLLMClient(llm_api_key, llm_api_base)
|
|
287
|
+
except RuntimeError:
|
|
288
|
+
warnings.append(
|
|
289
|
+
"litellm not available; skipping image conversion"
|
|
290
|
+
)
|
|
291
|
+
enable_images = False
|
|
292
|
+
else:
|
|
293
|
+
warnings.append(
|
|
294
|
+
f"markitdown image LLM provider '{llm_provider}' not supported; skipping images"
|
|
295
|
+
)
|
|
261
296
|
enable_images = False
|
|
262
297
|
|
|
263
298
|
md_kwargs: dict[str, object] = {"enable_plugins": False}
|
|
@@ -513,13 +548,16 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
513
548
|
before = args.before if args.before is not None else args.context
|
|
514
549
|
after = args.after if args.after is not None else args.context
|
|
515
550
|
|
|
551
|
+
use_color = sys.stdout.isatty() and not os.getenv("NO_COLOR")
|
|
552
|
+
|
|
516
553
|
output_lines = render_matches(
|
|
517
554
|
files=files,
|
|
518
555
|
matches=verified,
|
|
519
556
|
show_line_numbers=args.line_numbers,
|
|
520
|
-
show_filename=args.with_filename,
|
|
521
557
|
before=before,
|
|
522
558
|
after=after,
|
|
559
|
+
use_color=use_color,
|
|
560
|
+
heading=True,
|
|
523
561
|
)
|
|
524
562
|
|
|
525
563
|
if args.answer:
|
rlmgrep/config.py
CHANGED
|
@@ -25,7 +25,7 @@ DEFAULT_CONFIG_TEXT = "\n".join(
|
|
|
25
25
|
"# markitdown_image_llm_api_base = \"\"",
|
|
26
26
|
"# markitdown_image_llm_prompt = \"\"",
|
|
27
27
|
"# markitdown_enable_audio = false",
|
|
28
|
-
"# markitdown_audio_model = \"gpt-4o-mini-transcribe\"",
|
|
28
|
+
"# markitdown_audio_model = \"gpt-4o-mini-transcribe-2025-12-15\"",
|
|
29
29
|
"# markitdown_audio_provider = \"openai\"",
|
|
30
30
|
"# markitdown_audio_api_key = \"\"",
|
|
31
31
|
"# markitdown_audio_api_base = \"\"",
|
rlmgrep/render.py
CHANGED
|
@@ -2,23 +2,37 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from .ingest import FileRecord
|
|
4
4
|
|
|
5
|
+
COLOR_RESET = "\x1b[0m"
|
|
6
|
+
COLOR_PATH = "\x1b[35m"
|
|
7
|
+
COLOR_LINE_NO = "\x1b[32m"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _colorize(text: str, color: str, use_color: bool) -> str:
|
|
11
|
+
if not use_color:
|
|
12
|
+
return text
|
|
13
|
+
return f"{color}{text}{COLOR_RESET}"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _format_heading(path: str, use_color: bool) -> str:
|
|
17
|
+
if not path.startswith((".", "/")):
|
|
18
|
+
path = f"./{path}"
|
|
19
|
+
return _colorize(path, COLOR_PATH, use_color)
|
|
20
|
+
|
|
5
21
|
|
|
6
22
|
def _format_line(
|
|
7
|
-
path: str,
|
|
8
23
|
line_no: int,
|
|
9
24
|
text: str,
|
|
10
25
|
is_match: bool,
|
|
11
|
-
show_filename: bool,
|
|
12
26
|
show_line_numbers: bool,
|
|
27
|
+
use_color: bool,
|
|
28
|
+
heading: bool,
|
|
13
29
|
) -> str:
|
|
14
30
|
delim = ":" if is_match else "-"
|
|
15
|
-
if
|
|
16
|
-
return
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
return f"{line_no}{delim}{text}"
|
|
21
|
-
return text
|
|
31
|
+
if not show_line_numbers:
|
|
32
|
+
return text
|
|
33
|
+
prefix = _colorize(str(line_no), COLOR_LINE_NO, use_color)
|
|
34
|
+
sep = "\t" if heading else ""
|
|
35
|
+
return f"{prefix}{delim}{sep}{text}"
|
|
22
36
|
|
|
23
37
|
|
|
24
38
|
def _merge_ranges(ranges: list[tuple[int, int]]) -> list[tuple[int, int]]:
|
|
@@ -39,18 +53,22 @@ def render_matches(
|
|
|
39
53
|
files: dict[str, FileRecord],
|
|
40
54
|
matches: dict[str, list[int]],
|
|
41
55
|
show_line_numbers: bool,
|
|
42
|
-
show_filename: bool,
|
|
43
56
|
before: int,
|
|
44
57
|
after: int,
|
|
58
|
+
use_color: bool = False,
|
|
59
|
+
heading: bool = True,
|
|
45
60
|
) -> list[str]:
|
|
46
61
|
output: list[str] = []
|
|
47
|
-
multiple_files = len(files) > 1
|
|
48
|
-
show_filename = show_filename or multiple_files
|
|
49
62
|
|
|
50
|
-
|
|
63
|
+
paths = sorted(matches.keys())
|
|
64
|
+
for idx, path in enumerate(paths):
|
|
51
65
|
record = files.get(path)
|
|
52
66
|
if record is None:
|
|
53
67
|
continue
|
|
68
|
+
if heading:
|
|
69
|
+
if idx > 0:
|
|
70
|
+
output.append("")
|
|
71
|
+
output.append(_format_heading(path, use_color))
|
|
54
72
|
lines = record.lines
|
|
55
73
|
page_map = record.page_map
|
|
56
74
|
n_lines = len(lines)
|
|
@@ -65,12 +83,12 @@ def render_matches(
|
|
|
65
83
|
text = f"{text}\tpage={page_map[line_no - 1]}"
|
|
66
84
|
output.append(
|
|
67
85
|
_format_line(
|
|
68
|
-
path,
|
|
69
86
|
line_no,
|
|
70
87
|
text,
|
|
71
88
|
True,
|
|
72
|
-
show_filename,
|
|
73
89
|
show_line_numbers,
|
|
90
|
+
use_color,
|
|
91
|
+
heading,
|
|
74
92
|
)
|
|
75
93
|
)
|
|
76
94
|
continue
|
|
@@ -90,12 +108,12 @@ def render_matches(
|
|
|
90
108
|
is_match = line_no in match_set
|
|
91
109
|
output.append(
|
|
92
110
|
_format_line(
|
|
93
|
-
path,
|
|
94
111
|
line_no,
|
|
95
112
|
text,
|
|
96
113
|
is_match,
|
|
97
|
-
show_filename,
|
|
98
114
|
show_line_numbers,
|
|
115
|
+
use_color,
|
|
116
|
+
heading,
|
|
99
117
|
)
|
|
100
118
|
)
|
|
101
119
|
if idx < len(merged) - 1:
|
rlmgrep/rlm.py
CHANGED
|
@@ -24,6 +24,10 @@ class RLMGrepSignature(dspy.Signature):
|
|
|
24
24
|
find relevant lines. Return all relevant matches you can find, avoid duplicates,
|
|
25
25
|
and only use exact paths from the directory keys.
|
|
26
26
|
Always read the ASCII file map first to orient yourself to the available paths.
|
|
27
|
+
Do not wrap code in backticks; only raw Python.
|
|
28
|
+
Do not import pandas or numpy; use built-ins only.
|
|
29
|
+
|
|
30
|
+
Files like "photo.jpg.md" or "audio.mp3.md" are LLM descriptions/transcriptions of images/audio that were originally in the directory but have been converted to md to make them searchable by you.
|
|
27
31
|
"""
|
|
28
32
|
|
|
29
33
|
directory: dict = dspy.InputField(
|
|
@@ -52,7 +56,21 @@ class RLMGrepSignature(dspy.Signature):
|
|
|
52
56
|
|
|
53
57
|
class RLMGrepAnswerSignature(dspy.Signature):
|
|
54
58
|
"""
|
|
55
|
-
|
|
59
|
+
You are the search engine for rlmgrep, a grep-shaped CLI for coding agents.
|
|
60
|
+
Inputs include a directory mapping of files (path -> full text), an ASCII file
|
|
61
|
+
map, and a user query string. Your output must be grep-printable matches as
|
|
62
|
+
(path, line) pairs that point to real lines in the provided texts.
|
|
63
|
+
The query may be natural language or a short pattern; interpret it freely to
|
|
64
|
+
find relevant lines. Return all relevant matches you can find, avoid duplicates,
|
|
65
|
+
and only use exact paths from the directory keys.
|
|
66
|
+
Always read the ASCII file map first to orient yourself to the available paths.
|
|
67
|
+
Do not wrap code in backticks; only raw Python.
|
|
68
|
+
Do not import pandas or numpy; use built-ins only.
|
|
69
|
+
|
|
70
|
+
In this mode you are also responsible for generating a narrative answer to the query based on the provided files.
|
|
71
|
+
|
|
72
|
+
Files like "photo.jpg.md" or "audio.mp3.md" are LLM descriptions/transcriptions of images/audio that were originally in the directory but have been converted to md to make them searchable by you.
|
|
73
|
+
|
|
56
74
|
"""
|
|
57
75
|
|
|
58
76
|
directory: dict = dspy.InputField(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -61,8 +61,7 @@ rlmgrep [options] "query" [paths...]
|
|
|
61
61
|
|
|
62
62
|
Common options:
|
|
63
63
|
|
|
64
|
-
- `-n` show line numbers
|
|
65
|
-
- `-H` always show filenames
|
|
64
|
+
- `-n` show line numbers (default)
|
|
66
65
|
- `-C N` context lines before/after (grep-style)
|
|
67
66
|
- `-A N` context lines after
|
|
68
67
|
- `-B N` context lines before
|
|
@@ -109,10 +108,9 @@ rg -l "token" . | rlmgrep --stdin-files --answer "what does this token control?"
|
|
|
109
108
|
## Output contract (stable for agents)
|
|
110
109
|
|
|
111
110
|
- Matches are written to stdout; warnings go to stderr.
|
|
112
|
-
- Output uses
|
|
113
|
-
-
|
|
114
|
-
- `
|
|
115
|
-
- If `-H` or `-n` are omitted, their parts are omitted.
|
|
111
|
+
- Output uses rg-style headings by default:
|
|
112
|
+
- A file header line like `./path/to/file`
|
|
113
|
+
- Then `line:\ttext` for matches, `line-\ttext` for context lines
|
|
116
114
|
- Line numbers are 1-based.
|
|
117
115
|
- When context ranges are disjoint, a `--` line separates groups.
|
|
118
116
|
- Exit codes:
|
|
@@ -120,7 +118,7 @@ rg -l "token" . | rlmgrep --stdin-files --answer "what does this token control?"
|
|
|
120
118
|
- `1` = no matches
|
|
121
119
|
- `2` = usage/config/error
|
|
122
120
|
|
|
123
|
-
Agent tip: use `-n
|
|
121
|
+
Agent tip: use `-n` and no context for parse-friendly output, then key off exit codes.
|
|
124
122
|
|
|
125
123
|
## Regex-style queries (best effort)
|
|
126
124
|
|
|
@@ -160,7 +158,7 @@ file_hard_max = 1000
|
|
|
160
158
|
# markitdown_image_llm_api_base = ""
|
|
161
159
|
# markitdown_image_llm_prompt = ""
|
|
162
160
|
# markitdown_enable_audio = false
|
|
163
|
-
# markitdown_audio_model = "gpt-4o-mini-transcribe"
|
|
161
|
+
# markitdown_audio_model = "gpt-4o-mini-transcribe-2025-12-15"
|
|
164
162
|
# markitdown_audio_provider = "openai"
|
|
165
163
|
# markitdown_audio_api_key = ""
|
|
166
164
|
# markitdown_audio_api_base = ""
|
|
@@ -180,7 +178,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
180
178
|
## Non-text files (PDF, images, audio)
|
|
181
179
|
|
|
182
180
|
- PDF files are parsed with `pypdf`. Each page gets a marker line `===== Page N =====`, and output lines include a `page=N` suffix.
|
|
183
|
-
- Images and audio are converted via `markitdown` when enabled in config.
|
|
181
|
+
- Images and audio are converted via `markitdown` when enabled in config. Image conversion supports `openai`, `anthropic`, and `gemini` providers; audio conversion currently supports `openai` only.
|
|
184
182
|
- Converted image/audio text is cached in sidecar files named `<original>.<ext>.md` next to the original file and reused on subsequent runs.
|
|
185
183
|
- Use `-a/--text` to force binary files to be read as text (UTF-8 with replacement).
|
|
186
184
|
|
|
@@ -188,7 +186,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
188
186
|
|
|
189
187
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
190
188
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
191
|
-
- For reproducible parsing, use `-n
|
|
189
|
+
- For reproducible parsing, use `-n` and avoid context (`-C/-A/-B`).
|
|
192
190
|
|
|
193
191
|
## Development
|
|
194
192
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
rlmgrep/__init__.py,sha256=nkO5AhSYqNqN7gEHOfOJe6qbc5OMva0ggSpwLNV5QR8,48
|
|
2
|
+
rlmgrep/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
+
rlmgrep/cli.py,sha256=qCN6Nvepb-HvpbvCSp-NAyThuZbML6k89ndzAqlg_-I,20246
|
|
4
|
+
rlmgrep/config.py,sha256=u1iz-nI8dj-dZETbpIki3RQefHJEyi5oE5zE4_IR8kg,2399
|
|
5
|
+
rlmgrep/file_map.py,sha256=x2Ri1wzK8_87GUorsAV01K_nYLZcv30yIquDeTCcdEw,876
|
|
6
|
+
rlmgrep/ingest.py,sha256=uCz2el9B-RIT9umFo-gFEdAsmWPP1IJOArFFQY0D_1A,9127
|
|
7
|
+
rlmgrep/interpreter.py,sha256=s_nMRxLlAU9C0JmUzUBW5NbVbuH67doVWF54K54STlA,2478
|
|
8
|
+
rlmgrep/render.py,sha256=OYZy7BuJJe-KsDhEGAz6JA5RGd65ZInPWf9wLDJE0ag,3554
|
|
9
|
+
rlmgrep/rlm.py,sha256=i3rCTp8OABByF60Un5gO7265gaW4spwU0OFKIz4surg,5750
|
|
10
|
+
rlmgrep-0.1.5.dist-info/METADATA,sha256=aY7Fy6DgS7lmtxFzmt79ry2PebKB43744ncjlT9mZHM,6584
|
|
11
|
+
rlmgrep-0.1.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
+
rlmgrep-0.1.5.dist-info/entry_points.txt,sha256=UV6QkEbkwBO1JJ53mm84_n35tVyOczPvOQ14ga7vrCI,45
|
|
13
|
+
rlmgrep-0.1.5.dist-info/top_level.txt,sha256=gTujSRsO58c80eN7aRH2cfe51FHxx8LJ1w1Y2YlHti0,8
|
|
14
|
+
rlmgrep-0.1.5.dist-info/RECORD,,
|
rlmgrep-0.1.3.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
rlmgrep/__init__.py,sha256=cVZBCfo6mJZvGFsStEhk2sSrk77IfDImcTVxgYIhNmY,48
|
|
2
|
-
rlmgrep/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
-
rlmgrep/cli.py,sha256=wR9zJAzkp8jl42zMHL19r4oCxGKfN6K72-JzmQlUS74,18768
|
|
4
|
-
rlmgrep/config.py,sha256=A6VLuuXSgQ1vM207CP0G92Mg3et93dGSmkkLQ0IOfwk,2388
|
|
5
|
-
rlmgrep/file_map.py,sha256=x2Ri1wzK8_87GUorsAV01K_nYLZcv30yIquDeTCcdEw,876
|
|
6
|
-
rlmgrep/ingest.py,sha256=uCz2el9B-RIT9umFo-gFEdAsmWPP1IJOArFFQY0D_1A,9127
|
|
7
|
-
rlmgrep/interpreter.py,sha256=s_nMRxLlAU9C0JmUzUBW5NbVbuH67doVWF54K54STlA,2478
|
|
8
|
-
rlmgrep/render.py,sha256=w6KOfont2M7pQz_EEngTFMY5xJEE11N_ko8P9x5FdH8,3097
|
|
9
|
-
rlmgrep/rlm.py,sha256=LZfkyWxjvtf8dwo5JxetKvvpBYeGKhajwHEVpCb2eo4,4474
|
|
10
|
-
rlmgrep-0.1.3.dist-info/METADATA,sha256=RuGjNIucLiFErCBf4KnH4An7lhgUE5vLIT3WwtmCBEY,6615
|
|
11
|
-
rlmgrep-0.1.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
-
rlmgrep-0.1.3.dist-info/entry_points.txt,sha256=UV6QkEbkwBO1JJ53mm84_n35tVyOczPvOQ14ga7vrCI,45
|
|
13
|
-
rlmgrep-0.1.3.dist-info/top_level.txt,sha256=gTujSRsO58c80eN7aRH2cfe51FHxx8LJ1w1Y2YlHti0,8
|
|
14
|
-
rlmgrep-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|