rlmgrep 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/PKG-INFO +7 -5
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/README.md +6 -4
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/pyproject.toml +1 -1
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/cli.py +47 -2
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/config.py +2 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/ingest.py +24 -10
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/PKG-INFO +7 -5
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/__init__.py +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/__main__.py +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/file_map.py +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/interpreter.py +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/render.py +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/rlm.py +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/SOURCES.txt +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/dependency_links.txt +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/entry_points.txt +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/requires.txt +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/top_level.txt +0 -0
- {rlmgrep-0.1.0 → rlmgrep-0.1.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -17,7 +17,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
|
|
|
17
17
|
## Quickstart
|
|
18
18
|
|
|
19
19
|
```sh
|
|
20
|
-
uv tool install --python 3.11
|
|
20
|
+
uv tool install --python 3.11 rlmgrep
|
|
21
21
|
# or from GitHub:
|
|
22
22
|
# uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
|
|
23
23
|
|
|
@@ -71,6 +71,7 @@ Common options:
|
|
|
71
71
|
- `--type T` include file types (repeatable, comma-separated)
|
|
72
72
|
- `--no-recursive` do not recurse directories
|
|
73
73
|
- `-a`, `--text` treat binary files as text
|
|
74
|
+
- `-y`, `--yes` skip file count confirmation
|
|
74
75
|
- `--model`, `--sub-model` override model names
|
|
75
76
|
- `--api-key`, `--api-base`, `--model-type` override provider settings
|
|
76
77
|
- `--max-iterations`, `--max-llm-calls` cap RLM search effort
|
|
@@ -99,6 +100,7 @@ cat README.md | rlmgrep "install"
|
|
|
99
100
|
- `-g/--glob` matches path globs against normalized paths (forward slashes).
|
|
100
101
|
- Paths are printed relative to the current working directory when possible.
|
|
101
102
|
- If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
|
|
103
|
+
- rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
|
|
102
104
|
|
|
103
105
|
## Output contract (stable for agents)
|
|
104
106
|
|
|
@@ -133,6 +135,8 @@ temperature = 1.0
|
|
|
133
135
|
max_tokens = 64000
|
|
134
136
|
max_iterations = 10
|
|
135
137
|
max_llm_calls = 20
|
|
138
|
+
file_warn_threshold = 200
|
|
139
|
+
file_hard_max = 1000
|
|
136
140
|
# markitdown_enable_images = false
|
|
137
141
|
# markitdown_image_llm_model = "gpt-5-mini"
|
|
138
142
|
# markitdown_image_llm_provider = "openai"
|
|
@@ -168,10 +172,8 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
168
172
|
|
|
169
173
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
170
174
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
171
|
-
- Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
|
|
172
175
|
- For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
|
|
173
|
-
|
|
174
|
-
|
|
176
|
+
|
|
175
177
|
## Development
|
|
176
178
|
|
|
177
179
|
- Install locally: `pip install -e .` or `uv tool install .`
|
|
@@ -5,7 +5,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
|
|
|
5
5
|
## Quickstart
|
|
6
6
|
|
|
7
7
|
```sh
|
|
8
|
-
uv tool install --python 3.11
|
|
8
|
+
uv tool install --python 3.11 rlmgrep
|
|
9
9
|
# or from GitHub:
|
|
10
10
|
# uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
|
|
11
11
|
|
|
@@ -59,6 +59,7 @@ Common options:
|
|
|
59
59
|
- `--type T` include file types (repeatable, comma-separated)
|
|
60
60
|
- `--no-recursive` do not recurse directories
|
|
61
61
|
- `-a`, `--text` treat binary files as text
|
|
62
|
+
- `-y`, `--yes` skip file count confirmation
|
|
62
63
|
- `--model`, `--sub-model` override model names
|
|
63
64
|
- `--api-key`, `--api-base`, `--model-type` override provider settings
|
|
64
65
|
- `--max-iterations`, `--max-llm-calls` cap RLM search effort
|
|
@@ -87,6 +88,7 @@ cat README.md | rlmgrep "install"
|
|
|
87
88
|
- `-g/--glob` matches path globs against normalized paths (forward slashes).
|
|
88
89
|
- Paths are printed relative to the current working directory when possible.
|
|
89
90
|
- If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
|
|
91
|
+
- rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
|
|
90
92
|
|
|
91
93
|
## Output contract (stable for agents)
|
|
92
94
|
|
|
@@ -121,6 +123,8 @@ temperature = 1.0
|
|
|
121
123
|
max_tokens = 64000
|
|
122
124
|
max_iterations = 10
|
|
123
125
|
max_llm_calls = 20
|
|
126
|
+
file_warn_threshold = 200
|
|
127
|
+
file_hard_max = 1000
|
|
124
128
|
# markitdown_enable_images = false
|
|
125
129
|
# markitdown_image_llm_model = "gpt-5-mini"
|
|
126
130
|
# markitdown_image_llm_provider = "openai"
|
|
@@ -156,10 +160,8 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
156
160
|
|
|
157
161
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
158
162
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
159
|
-
- Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
|
|
160
163
|
- For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
|
|
161
|
-
|
|
162
|
-
|
|
164
|
+
|
|
163
165
|
## Development
|
|
164
166
|
|
|
165
167
|
- Install locally: `pip install -e .` or `uv tool install .`
|
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
import dspy
|
|
9
9
|
from .config import ensure_default_config, load_config
|
|
10
10
|
from .file_map import build_file_map
|
|
11
|
-
from .ingest import FileRecord, load_files, resolve_type_exts
|
|
11
|
+
from .ingest import FileRecord, collect_candidates, load_files, resolve_type_exts
|
|
12
12
|
from .rlm import Match, build_lm, run_rlm
|
|
13
13
|
from .render import render_matches
|
|
14
14
|
|
|
@@ -17,6 +17,23 @@ def _warn(msg: str) -> None:
|
|
|
17
17
|
print(f"rlmgrep: {msg}", file=sys.stderr)
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def _confirm_over_limit(count: int, threshold: int) -> bool:
|
|
21
|
+
prompt = (
|
|
22
|
+
f"rlmgrep: {count} files to load (over {threshold}). Continue? [y/N] "
|
|
23
|
+
)
|
|
24
|
+
try:
|
|
25
|
+
with open("/dev/tty", "r+") as tty:
|
|
26
|
+
print(prompt, file=tty, end="", flush=True)
|
|
27
|
+
response = tty.readline()
|
|
28
|
+
except Exception:
|
|
29
|
+
if not sys.stdin.isatty():
|
|
30
|
+
_warn("refusing to prompt for confirmation; use --yes to proceed")
|
|
31
|
+
return False
|
|
32
|
+
print(prompt, file=sys.stderr, end="", flush=True)
|
|
33
|
+
response = sys.stdin.readline()
|
|
34
|
+
return response.strip().lower() in {"y", "yes"}
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
def verify_matches(
|
|
21
38
|
matches: list[Match],
|
|
22
39
|
files: dict[str, FileRecord],
|
|
@@ -65,6 +82,7 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
|
65
82
|
parser.add_argument("-m", dest="max_count", type=int, default=None, help="Max matching lines per file")
|
|
66
83
|
parser.add_argument("-a", "--text", dest="binary_as_text", action="store_true", help="Search binary files as text")
|
|
67
84
|
parser.add_argument("--answer", action="store_true", help="Print a narrative answer before grep output")
|
|
85
|
+
parser.add_argument("-y", "--yes", action="store_true", help="Skip file count confirmation")
|
|
68
86
|
|
|
69
87
|
parser.add_argument("-g", "--glob", dest="globs", action="append", default=[], help="Include files matching glob (may repeat)")
|
|
70
88
|
parser.add_argument("--type", dest="types", action="append", default=[], help="Include file types (py, js, md, etc.). May repeat")
|
|
@@ -328,12 +346,39 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
328
346
|
}
|
|
329
347
|
warnings: list[str] = []
|
|
330
348
|
else:
|
|
331
|
-
|
|
349
|
+
warn_threshold = _parse_num(
|
|
350
|
+
_pick(None, config, "file_warn_threshold", 200), int
|
|
351
|
+
)
|
|
352
|
+
hard_max = _parse_num(_pick(None, config, "file_hard_max", 1000), int)
|
|
353
|
+
if warn_threshold is not None and warn_threshold <= 0:
|
|
354
|
+
warn_threshold = None
|
|
355
|
+
if hard_max is not None and hard_max <= 0:
|
|
356
|
+
hard_max = None
|
|
357
|
+
|
|
358
|
+
candidates = collect_candidates(
|
|
332
359
|
args.paths,
|
|
333
360
|
cwd=cwd,
|
|
334
361
|
recursive=args.recursive,
|
|
335
362
|
include_globs=globs,
|
|
336
363
|
type_exts=type_exts,
|
|
364
|
+
)
|
|
365
|
+
candidate_count = len(candidates)
|
|
366
|
+
if hard_max is not None and candidate_count > hard_max:
|
|
367
|
+
_warn(
|
|
368
|
+
f"{candidate_count} files to load (over {hard_max}); aborting"
|
|
369
|
+
)
|
|
370
|
+
return 2
|
|
371
|
+
if (
|
|
372
|
+
warn_threshold is not None
|
|
373
|
+
and candidate_count > warn_threshold
|
|
374
|
+
and not args.yes
|
|
375
|
+
):
|
|
376
|
+
if not _confirm_over_limit(candidate_count, warn_threshold):
|
|
377
|
+
return 2
|
|
378
|
+
|
|
379
|
+
files, warnings = load_files(
|
|
380
|
+
candidates,
|
|
381
|
+
cwd=cwd,
|
|
337
382
|
markitdown=markitdown,
|
|
338
383
|
enable_images=md_enable_images,
|
|
339
384
|
enable_audio=md_enable_audio,
|
|
@@ -19,6 +19,8 @@ DEFAULT_CONFIG_TEXT = "\n".join(
|
|
|
19
19
|
"max_tokens = 64000",
|
|
20
20
|
"max_iterations = 10",
|
|
21
21
|
"max_llm_calls = 20",
|
|
22
|
+
"file_warn_threshold = 200",
|
|
23
|
+
"file_hard_max = 1000",
|
|
22
24
|
"# markitdown_enable_images = false",
|
|
23
25
|
"# markitdown_image_llm_model = \"gpt-5-mini\"",
|
|
24
26
|
"# markitdown_image_llm_provider = \"openai\"",
|
|
@@ -237,12 +237,34 @@ def _matches_globs(path: str, globs: list[str]) -> bool:
|
|
|
237
237
|
return False
|
|
238
238
|
|
|
239
239
|
|
|
240
|
-
def
|
|
240
|
+
def collect_candidates(
|
|
241
241
|
paths: Iterable[str],
|
|
242
242
|
cwd: Path,
|
|
243
243
|
recursive: bool = True,
|
|
244
244
|
include_globs: list[str] | None = None,
|
|
245
245
|
type_exts: set[str] | None = None,
|
|
246
|
+
) -> list[Path]:
|
|
247
|
+
files = collect_files(paths, recursive=recursive)
|
|
248
|
+
candidates: list[Path] = []
|
|
249
|
+
for fp in files:
|
|
250
|
+
try:
|
|
251
|
+
key = fp.relative_to(cwd).as_posix()
|
|
252
|
+
except ValueError:
|
|
253
|
+
key = fp.as_posix()
|
|
254
|
+
|
|
255
|
+
if include_globs and not _matches_globs(key, include_globs):
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
if type_exts and fp.suffix.lower() not in type_exts:
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
candidates.append(fp)
|
|
262
|
+
return candidates
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def load_files(
|
|
266
|
+
candidates: Iterable[Path],
|
|
267
|
+
cwd: Path,
|
|
246
268
|
markitdown: Any | None = None,
|
|
247
269
|
enable_images: bool = False,
|
|
248
270
|
enable_audio: bool = False,
|
|
@@ -254,20 +276,12 @@ def load_files(
|
|
|
254
276
|
image_convert_count = 0
|
|
255
277
|
audio_convert_count = 0
|
|
256
278
|
|
|
257
|
-
|
|
258
|
-
for fp in files:
|
|
279
|
+
for fp in candidates:
|
|
259
280
|
try:
|
|
260
281
|
key = fp.relative_to(cwd).as_posix()
|
|
261
282
|
except ValueError:
|
|
262
283
|
key = fp.as_posix()
|
|
263
284
|
|
|
264
|
-
if include_globs and not _matches_globs(key, include_globs):
|
|
265
|
-
continue
|
|
266
|
-
|
|
267
|
-
if type_exts:
|
|
268
|
-
if fp.suffix.lower() not in type_exts:
|
|
269
|
-
continue
|
|
270
|
-
|
|
271
285
|
suffix = fp.suffix.lower()
|
|
272
286
|
if markitdown is not None and not binary_as_text:
|
|
273
287
|
if enable_images and suffix in IMAGE_EXTS:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -17,7 +17,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
|
|
|
17
17
|
## Quickstart
|
|
18
18
|
|
|
19
19
|
```sh
|
|
20
|
-
uv tool install --python 3.11
|
|
20
|
+
uv tool install --python 3.11 rlmgrep
|
|
21
21
|
# or from GitHub:
|
|
22
22
|
# uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
|
|
23
23
|
|
|
@@ -71,6 +71,7 @@ Common options:
|
|
|
71
71
|
- `--type T` include file types (repeatable, comma-separated)
|
|
72
72
|
- `--no-recursive` do not recurse directories
|
|
73
73
|
- `-a`, `--text` treat binary files as text
|
|
74
|
+
- `-y`, `--yes` skip file count confirmation
|
|
74
75
|
- `--model`, `--sub-model` override model names
|
|
75
76
|
- `--api-key`, `--api-base`, `--model-type` override provider settings
|
|
76
77
|
- `--max-iterations`, `--max-llm-calls` cap RLM search effort
|
|
@@ -99,6 +100,7 @@ cat README.md | rlmgrep "install"
|
|
|
99
100
|
- `-g/--glob` matches path globs against normalized paths (forward slashes).
|
|
100
101
|
- Paths are printed relative to the current working directory when possible.
|
|
101
102
|
- If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
|
|
103
|
+
- rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
|
|
102
104
|
|
|
103
105
|
## Output contract (stable for agents)
|
|
104
106
|
|
|
@@ -133,6 +135,8 @@ temperature = 1.0
|
|
|
133
135
|
max_tokens = 64000
|
|
134
136
|
max_iterations = 10
|
|
135
137
|
max_llm_calls = 20
|
|
138
|
+
file_warn_threshold = 200
|
|
139
|
+
file_hard_max = 1000
|
|
136
140
|
# markitdown_enable_images = false
|
|
137
141
|
# markitdown_image_llm_model = "gpt-5-mini"
|
|
138
142
|
# markitdown_image_llm_provider = "openai"
|
|
@@ -168,10 +172,8 @@ If more than one provider key is set and the model does not make the provider ob
|
|
|
168
172
|
|
|
169
173
|
- Prefer narrow corpora (globs/types) to reduce token usage.
|
|
170
174
|
- Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
|
|
171
|
-
- Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
|
|
172
175
|
- For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
|
|
173
|
-
|
|
174
|
-
|
|
176
|
+
|
|
175
177
|
## Development
|
|
176
178
|
|
|
177
179
|
- Install locally: `pip install -e .` or `uv tool install .`
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|