rlmgrep 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/PKG-INFO +5 -7
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/README.md +4 -6
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/pyproject.toml +1 -1
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/__init__.py +1 -1
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/cli.py +47 -13
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/config.py +1 -1
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/render.py +5 -5
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep.egg-info/PKG-INFO +5 -7
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/__main__.py +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/file_map.py +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/ingest.py +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/interpreter.py +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep/rlm.py +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep.egg-info/SOURCES.txt +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep.egg-info/dependency_links.txt +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep.egg-info/entry_points.txt +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep.egg-info/requires.txt +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/rlmgrep.egg-info/top_level.txt +0 -0
- {rlmgrep-0.1.4 → rlmgrep-0.1.5}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -62,8 +62,6 @@ rlmgrep [options] "query" [paths...]
|
|
|
62
62
|
Common options:
|
|
63
63
|
|
|
64
64
|
- `-n` show line numbers (default)
|
|
65
|
-
- `--no-line-number` hide line numbers
|
|
66
|
-
- `-H` always show filenames (default)
|
|
67
65
|
- `-C N` context lines before/after (grep-style)
|
|
68
66
|
- `-A N` context lines after
|
|
69
67
|
- `-B N` context lines before
|
|
@@ -120,7 +118,7 @@ rg -l "token" . | rlmgrep --stdin-files --answer "what does this token control?"
|
|
|
120
118
|
- `1` = no matches
|
|
121
119
|
- `2` = usage/config/error
|
|
122
120
|
|
|
123
|
-
Agent tip: use `-n
|
|
121
|
+
Agent tip: use `-n` and no context for parse-friendly output, then key off exit codes.
|
|
124
122
|
|
|
125
123
|
## Regex-style queries (best effort)
|
|
126
124
|
|
|
@@ -160,7 +158,7 @@ file_hard_max = 1000
|
|
|
160
158
|
# markitdown_image_llm_api_base = ""
|
|
161
159
|
# markitdown_image_llm_prompt = ""
|
|
162
160
|
# markitdown_enable_audio = false
|
|
163
|
-
# markitdown_audio_model = "gpt-4o-mini-transcribe"
|
|
161
|
+
# markitdown_audio_model = "gpt-4o-mini-transcribe-2025-12-15"
|
|
164
162
|
# markitdown_audio_provider = "openai"
|
|
165
163
|
# markitdown_audio_api_key = ""
|
|
166
164
|
# markitdown_audio_api_base = ""
|
|
@@ -180,7 +178,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
180
178
|
## Non-text files (PDF, images, audio)
|
|
181
179
|
|
|
182
180
|
- PDF files are parsed with `pypdf`. Each page gets a marker line `===== Page N =====`, and output lines include a `page=N` suffix.
|
|
183
|
-
- Images and audio are converted via `markitdown` when enabled in config.
|
|
181
|
+
- Images and audio are converted via `markitdown` when enabled in config. Image conversion supports `openai`, `anthropic`, and `gemini` providers; audio conversion currently supports `openai` only.
|
|
184
182
|
- Converted image/audio text is cached in sidecar files named `<original>.<ext>.md` next to the original file and reused on subsequent runs.
|
|
185
183
|
- Use `-a/--text` to force binary files to be read as text (UTF-8 with replacement).
|
|
186
184
|
|
|
@@ -188,7 +186,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
188
186
|
|
|
189
187
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
190
188
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
191
|
-
- For reproducible parsing, use `-n
|
|
189
|
+
- For reproducible parsing, use `-n` and avoid context (`-C/-A/-B`).
|
|
192
190
|
|
|
193
191
|
## Development
|
|
194
192
|
|
|
@@ -50,8 +50,6 @@ rlmgrep [options] "query" [paths...]
|
|
|
50
50
|
Common options:
|
|
51
51
|
|
|
52
52
|
- `-n` show line numbers (default)
|
|
53
|
-
- `--no-line-number` hide line numbers
|
|
54
|
-
- `-H` always show filenames (default)
|
|
55
53
|
- `-C N` context lines before/after (grep-style)
|
|
56
54
|
- `-A N` context lines after
|
|
57
55
|
- `-B N` context lines before
|
|
@@ -108,7 +106,7 @@ rg -l "token" . | rlmgrep --stdin-files --answer "what does this token control?"
|
|
|
108
106
|
- `1` = no matches
|
|
109
107
|
- `2` = usage/config/error
|
|
110
108
|
|
|
111
|
-
Agent tip: use `-n
|
|
109
|
+
Agent tip: use `-n` and no context for parse-friendly output, then key off exit codes.
|
|
112
110
|
|
|
113
111
|
## Regex-style queries (best effort)
|
|
114
112
|
|
|
@@ -148,7 +146,7 @@ file_hard_max = 1000
|
|
|
148
146
|
# markitdown_image_llm_api_base = ""
|
|
149
147
|
# markitdown_image_llm_prompt = ""
|
|
150
148
|
# markitdown_enable_audio = false
|
|
151
|
-
# markitdown_audio_model = "gpt-4o-mini-transcribe"
|
|
149
|
+
# markitdown_audio_model = "gpt-4o-mini-transcribe-2025-12-15"
|
|
152
150
|
# markitdown_audio_provider = "openai"
|
|
153
151
|
# markitdown_audio_api_key = ""
|
|
154
152
|
# markitdown_audio_api_base = ""
|
|
@@ -168,7 +166,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
168
166
|
## Non-text files (PDF, images, audio)
|
|
169
167
|
|
|
170
168
|
- PDF files are parsed with `pypdf`. Each page gets a marker line `===== Page N =====`, and output lines include a `page=N` suffix.
|
|
171
|
-
- Images and audio are converted via `markitdown` when enabled in config.
|
|
169
|
+
- Images and audio are converted via `markitdown` when enabled in config. Image conversion supports `openai`, `anthropic`, and `gemini` providers; audio conversion currently supports `openai` only.
|
|
172
170
|
- Converted image/audio text is cached in sidecar files named `<original>.<ext>.md` next to the original file and reused on subsequent runs.
|
|
173
171
|
- Use `-a/--text` to force binary files to be read as text (UTF-8 with replacement).
|
|
174
172
|
|
|
@@ -176,7 +174,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
176
174
|
|
|
177
175
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
178
176
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
179
|
-
- For reproducible parsing, use `-n
|
|
177
|
+
- For reproducible parsing, use `-n` and avoid context (`-C/-A/-B`).
|
|
180
178
|
|
|
181
179
|
## Development
|
|
182
180
|
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
__all__ = ["__version__"]
|
|
2
|
-
__version__ = "0.1.
|
|
2
|
+
__version__ = "0.1.5"
|
|
@@ -73,8 +73,6 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
|
73
73
|
parser.add_argument("paths", nargs="*", help="Files or directories")
|
|
74
74
|
|
|
75
75
|
parser.add_argument("-n", dest="line_numbers", action="store_true", help="Show line numbers (default)")
|
|
76
|
-
parser.add_argument("--no-line-number", dest="line_numbers", action="store_false", help="Hide line numbers")
|
|
77
|
-
parser.add_argument("-H", dest="with_filename", action="store_true", help="Always show filenames (default)")
|
|
78
76
|
parser.add_argument("-r", dest="recursive", action="store_true", help="Recursive (directories are searched recursively by default)")
|
|
79
77
|
parser.add_argument("--no-recursive", dest="recursive", action="store_false", help="Do not recurse directories")
|
|
80
78
|
parser.set_defaults(recursive=True, line_numbers=True)
|
|
@@ -240,6 +238,33 @@ def _build_markitdown(config: dict, warnings: list[str]):
|
|
|
240
238
|
kwargs["base_url"] = api_base
|
|
241
239
|
return OpenAI(**kwargs)
|
|
242
240
|
|
|
241
|
+
class _LiteLLMClient:
|
|
242
|
+
def __init__(self, api_key: str | None, api_base: str | None):
|
|
243
|
+
try:
|
|
244
|
+
import litellm # type: ignore
|
|
245
|
+
except Exception as exc:
|
|
246
|
+
raise RuntimeError("litellm not available") from exc
|
|
247
|
+
self._litellm = litellm
|
|
248
|
+
self._api_key = api_key
|
|
249
|
+
self._api_base = api_base
|
|
250
|
+
self.chat = self._Chat(self)
|
|
251
|
+
|
|
252
|
+
class _Chat:
|
|
253
|
+
def __init__(self, parent):
|
|
254
|
+
self.completions = parent._Completions(parent)
|
|
255
|
+
|
|
256
|
+
class _Completions:
|
|
257
|
+
def __init__(self, parent):
|
|
258
|
+
self._parent = parent
|
|
259
|
+
|
|
260
|
+
def create(self, model: str, messages):
|
|
261
|
+
kwargs = {"model": model, "messages": messages}
|
|
262
|
+
if self._parent._api_key:
|
|
263
|
+
kwargs["api_key"] = self._parent._api_key
|
|
264
|
+
if self._parent._api_base:
|
|
265
|
+
kwargs["api_base"] = self._parent._api_base
|
|
266
|
+
return self._parent._litellm.completion(**kwargs)
|
|
267
|
+
|
|
243
268
|
llm_client = None
|
|
244
269
|
if enable_images:
|
|
245
270
|
if not llm_model:
|
|
@@ -247,18 +272,27 @@ def _build_markitdown(config: dict, warnings: list[str]):
|
|
|
247
272
|
"markitdown_enable_images set but markitdown_image_llm_model missing; skipping images"
|
|
248
273
|
)
|
|
249
274
|
enable_images = False
|
|
250
|
-
elif llm_provider != "openai":
|
|
251
|
-
warnings.append(
|
|
252
|
-
f"markitdown image LLM provider '{llm_provider}' not supported; skipping images"
|
|
253
|
-
)
|
|
254
|
-
enable_images = False
|
|
255
275
|
else:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
276
|
+
if llm_provider == "openai":
|
|
277
|
+
llm_client = _openai_client(
|
|
278
|
+
llm_api_key,
|
|
279
|
+
llm_api_base,
|
|
280
|
+
"openai package missing; skipping image conversion",
|
|
281
|
+
)
|
|
282
|
+
if llm_client is None:
|
|
283
|
+
enable_images = False
|
|
284
|
+
elif llm_provider in {"gemini", "anthropic"}:
|
|
285
|
+
try:
|
|
286
|
+
llm_client = _LiteLLMClient(llm_api_key, llm_api_base)
|
|
287
|
+
except RuntimeError:
|
|
288
|
+
warnings.append(
|
|
289
|
+
"litellm not available; skipping image conversion"
|
|
290
|
+
)
|
|
291
|
+
enable_images = False
|
|
292
|
+
else:
|
|
293
|
+
warnings.append(
|
|
294
|
+
f"markitdown image LLM provider '{llm_provider}' not supported; skipping images"
|
|
295
|
+
)
|
|
262
296
|
enable_images = False
|
|
263
297
|
|
|
264
298
|
md_kwargs: dict[str, object] = {"enable_plugins": False}
|
|
@@ -25,7 +25,7 @@ DEFAULT_CONFIG_TEXT = "\n".join(
|
|
|
25
25
|
"# markitdown_image_llm_api_base = \"\"",
|
|
26
26
|
"# markitdown_image_llm_prompt = \"\"",
|
|
27
27
|
"# markitdown_enable_audio = false",
|
|
28
|
-
"# markitdown_audio_model = \"gpt-4o-mini-transcribe\"",
|
|
28
|
+
"# markitdown_audio_model = \"gpt-4o-mini-transcribe-2025-12-15\"",
|
|
29
29
|
"# markitdown_audio_provider = \"openai\"",
|
|
30
30
|
"# markitdown_audio_api_key = \"\"",
|
|
31
31
|
"# markitdown_audio_api_base = \"\"",
|
|
@@ -28,11 +28,11 @@ def _format_line(
|
|
|
28
28
|
heading: bool,
|
|
29
29
|
) -> str:
|
|
30
30
|
delim = ":" if is_match else "-"
|
|
31
|
-
if show_line_numbers:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return text
|
|
31
|
+
if not show_line_numbers:
|
|
32
|
+
return text
|
|
33
|
+
prefix = _colorize(str(line_no), COLOR_LINE_NO, use_color)
|
|
34
|
+
sep = "\t" if heading else ""
|
|
35
|
+
return f"{prefix}{delim}{sep}{text}"
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def _merge_ranges(ranges: list[tuple[int, int]]) -> list[tuple[int, int]]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -62,8 +62,6 @@ rlmgrep [options] "query" [paths...]
|
|
|
62
62
|
Common options:
|
|
63
63
|
|
|
64
64
|
- `-n` show line numbers (default)
|
|
65
|
-
- `--no-line-number` hide line numbers
|
|
66
|
-
- `-H` always show filenames (default)
|
|
67
65
|
- `-C N` context lines before/after (grep-style)
|
|
68
66
|
- `-A N` context lines after
|
|
69
67
|
- `-B N` context lines before
|
|
@@ -120,7 +118,7 @@ rg -l "token" . | rlmgrep --stdin-files --answer "what does this token control?"
|
|
|
120
118
|
- `1` = no matches
|
|
121
119
|
- `2` = usage/config/error
|
|
122
120
|
|
|
123
|
-
Agent tip: use `-n
|
|
121
|
+
Agent tip: use `-n` and no context for parse-friendly output, then key off exit codes.
|
|
124
122
|
|
|
125
123
|
## Regex-style queries (best effort)
|
|
126
124
|
|
|
@@ -160,7 +158,7 @@ file_hard_max = 1000
|
|
|
160
158
|
# markitdown_image_llm_api_base = ""
|
|
161
159
|
# markitdown_image_llm_prompt = ""
|
|
162
160
|
# markitdown_enable_audio = false
|
|
163
|
-
# markitdown_audio_model = "gpt-4o-mini-transcribe"
|
|
161
|
+
# markitdown_audio_model = "gpt-4o-mini-transcribe-2025-12-15"
|
|
164
162
|
# markitdown_audio_provider = "openai"
|
|
165
163
|
# markitdown_audio_api_key = ""
|
|
166
164
|
# markitdown_audio_api_base = ""
|
|
@@ -180,7 +178,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
180
178
|
## Non-text files (PDF, images, audio)
|
|
181
179
|
|
|
182
180
|
- PDF files are parsed with `pypdf`. Each page gets a marker line `===== Page N =====`, and output lines include a `page=N` suffix.
|
|
183
|
-
- Images and audio are converted via `markitdown` when enabled in config.
|
|
181
|
+
- Images and audio are converted via `markitdown` when enabled in config. Image conversion supports `openai`, `anthropic`, and `gemini` providers; audio conversion currently supports `openai` only.
|
|
184
182
|
- Converted image/audio text is cached in sidecar files named `<original>.<ext>.md` next to the original file and reused on subsequent runs.
|
|
185
183
|
- Use `-a/--text` to force binary files to be read as text (UTF-8 with replacement).
|
|
186
184
|
|
|
@@ -188,7 +186,7 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
188
186
|
|
|
189
187
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
190
188
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
191
|
-
- For reproducible parsing, use `-n
|
|
189
|
+
- For reproducible parsing, use `-n` and avoid context (`-C/-A/-B`).
|
|
192
190
|
|
|
193
191
|
## Development
|
|
194
192
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|