rlmgrep 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlmgrep
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Grep-shaped CLI search powered by DSPy RLM
5
5
  Author: rlmgrep
6
6
  License: MIT
@@ -17,7 +17,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
17
17
  ## Quickstart
18
18
 
19
19
  ```sh
20
- uv tool install --python 3.11 .
20
+ uv tool install --python 3.11 rlmgrep
21
21
  # or from GitHub:
22
22
  # uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
23
23
 
@@ -71,6 +71,7 @@ Common options:
71
71
  - `--type T` include file types (repeatable, comma-separated)
72
72
  - `--no-recursive` do not recurse directories
73
73
  - `-a`, `--text` treat binary files as text
74
+ - `-y`, `--yes` skip file count confirmation
74
75
  - `--model`, `--sub-model` override model names
75
76
  - `--api-key`, `--api-base`, `--model-type` override provider settings
76
77
  - `--max-iterations`, `--max-llm-calls` cap RLM search effort
@@ -99,6 +100,7 @@ cat README.md | rlmgrep "install"
99
100
  - `-g/--glob` matches path globs against normalized paths (forward slashes).
100
101
  - Paths are printed relative to the current working directory when possible.
101
102
  - If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
103
+ - rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
102
104
 
103
105
  ## Output contract (stable for agents)
104
106
 
@@ -133,6 +135,8 @@ temperature = 1.0
133
135
  max_tokens = 64000
134
136
  max_iterations = 10
135
137
  max_llm_calls = 20
138
+ file_warn_threshold = 200
139
+ file_hard_max = 1000
136
140
  # markitdown_enable_images = false
137
141
  # markitdown_image_llm_model = "gpt-5-mini"
138
142
  # markitdown_image_llm_provider = "openai"
@@ -168,10 +172,8 @@ If more than one provider key is set and the model does not make the provider ob
168
172
 
169
173
  - Prefer narrow corpora (globs/types) to reduce token usage.
170
174
  - Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
171
- - Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
172
175
  - For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
173
- - RLM results are verified against real file lines; invalid or duplicate matches are dropped and reported.
174
-
176
+
175
177
  ## Development
176
178
 
177
179
  - Install locally: `pip install -e .` or `uv tool install .`
@@ -5,7 +5,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
5
5
  ## Quickstart
6
6
 
7
7
  ```sh
8
- uv tool install --python 3.11 .
8
+ uv tool install --python 3.11 rlmgrep
9
9
  # or from GitHub:
10
10
  # uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
11
11
 
@@ -59,6 +59,7 @@ Common options:
59
59
  - `--type T` include file types (repeatable, comma-separated)
60
60
  - `--no-recursive` do not recurse directories
61
61
  - `-a`, `--text` treat binary files as text
62
+ - `-y`, `--yes` skip file count confirmation
62
63
  - `--model`, `--sub-model` override model names
63
64
  - `--api-key`, `--api-base`, `--model-type` override provider settings
64
65
  - `--max-iterations`, `--max-llm-calls` cap RLM search effort
@@ -87,6 +88,7 @@ cat README.md | rlmgrep "install"
87
88
  - `-g/--glob` matches path globs against normalized paths (forward slashes).
88
89
  - Paths are printed relative to the current working directory when possible.
89
90
  - If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
91
+ - rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
90
92
 
91
93
  ## Output contract (stable for agents)
92
94
 
@@ -121,6 +123,8 @@ temperature = 1.0
121
123
  max_tokens = 64000
122
124
  max_iterations = 10
123
125
  max_llm_calls = 20
126
+ file_warn_threshold = 200
127
+ file_hard_max = 1000
124
128
  # markitdown_enable_images = false
125
129
  # markitdown_image_llm_model = "gpt-5-mini"
126
130
  # markitdown_image_llm_provider = "openai"
@@ -156,10 +160,8 @@ If more than one provider key is set and the model does not make the provider ob
156
160
 
157
161
  - Prefer narrow corpora (globs/types) to reduce token usage.
158
162
  - Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
159
- - Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
160
163
  - For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
161
- - RLM results are verified against real file lines; invalid or duplicate matches are dropped and reported.
162
-
164
+
163
165
  ## Development
164
166
 
165
167
  - Install locally: `pip install -e .` or `uv tool install .`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "rlmgrep"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "Grep-shaped CLI search powered by DSPy RLM"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -8,7 +8,7 @@ from pathlib import Path
8
8
  import dspy
9
9
  from .config import ensure_default_config, load_config
10
10
  from .file_map import build_file_map
11
- from .ingest import FileRecord, load_files, resolve_type_exts
11
+ from .ingest import FileRecord, collect_candidates, load_files, resolve_type_exts
12
12
  from .rlm import Match, build_lm, run_rlm
13
13
  from .render import render_matches
14
14
 
@@ -17,6 +17,23 @@ def _warn(msg: str) -> None:
17
17
  print(f"rlmgrep: {msg}", file=sys.stderr)
18
18
 
19
19
 
20
+ def _confirm_over_limit(count: int, threshold: int) -> bool:
21
+ prompt = (
22
+ f"rlmgrep: {count} files to load (over {threshold}). Continue? [y/N] "
23
+ )
24
+ try:
25
+ with open("/dev/tty", "r+") as tty:
26
+ print(prompt, file=tty, end="", flush=True)
27
+ response = tty.readline()
28
+ except Exception:
29
+ if not sys.stdin.isatty():
30
+ _warn("refusing to prompt for confirmation; use --yes to proceed")
31
+ return False
32
+ print(prompt, file=sys.stderr, end="", flush=True)
33
+ response = sys.stdin.readline()
34
+ return response.strip().lower() in {"y", "yes"}
35
+
36
+
20
37
  def verify_matches(
21
38
  matches: list[Match],
22
39
  files: dict[str, FileRecord],
@@ -65,6 +82,7 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
65
82
  parser.add_argument("-m", dest="max_count", type=int, default=None, help="Max matching lines per file")
66
83
  parser.add_argument("-a", "--text", dest="binary_as_text", action="store_true", help="Search binary files as text")
67
84
  parser.add_argument("--answer", action="store_true", help="Print a narrative answer before grep output")
85
+ parser.add_argument("-y", "--yes", action="store_true", help="Skip file count confirmation")
68
86
 
69
87
  parser.add_argument("-g", "--glob", dest="globs", action="append", default=[], help="Include files matching glob (may repeat)")
70
88
  parser.add_argument("--type", dest="types", action="append", default=[], help="Include file types (py, js, md, etc.). May repeat")
@@ -328,12 +346,39 @@ def main(argv: list[str] | None = None) -> int:
328
346
  }
329
347
  warnings: list[str] = []
330
348
  else:
331
- files, warnings = load_files(
349
+ warn_threshold = _parse_num(
350
+ _pick(None, config, "file_warn_threshold", 200), int
351
+ )
352
+ hard_max = _parse_num(_pick(None, config, "file_hard_max", 1000), int)
353
+ if warn_threshold is not None and warn_threshold <= 0:
354
+ warn_threshold = None
355
+ if hard_max is not None and hard_max <= 0:
356
+ hard_max = None
357
+
358
+ candidates = collect_candidates(
332
359
  args.paths,
333
360
  cwd=cwd,
334
361
  recursive=args.recursive,
335
362
  include_globs=globs,
336
363
  type_exts=type_exts,
364
+ )
365
+ candidate_count = len(candidates)
366
+ if hard_max is not None and candidate_count > hard_max:
367
+ _warn(
368
+ f"{candidate_count} files to load (over {hard_max}); aborting"
369
+ )
370
+ return 2
371
+ if (
372
+ warn_threshold is not None
373
+ and candidate_count > warn_threshold
374
+ and not args.yes
375
+ ):
376
+ if not _confirm_over_limit(candidate_count, warn_threshold):
377
+ return 2
378
+
379
+ files, warnings = load_files(
380
+ candidates,
381
+ cwd=cwd,
337
382
  markitdown=markitdown,
338
383
  enable_images=md_enable_images,
339
384
  enable_audio=md_enable_audio,
@@ -19,6 +19,8 @@ DEFAULT_CONFIG_TEXT = "\n".join(
19
19
  "max_tokens = 64000",
20
20
  "max_iterations = 10",
21
21
  "max_llm_calls = 20",
22
+ "file_warn_threshold = 200",
23
+ "file_hard_max = 1000",
22
24
  "# markitdown_enable_images = false",
23
25
  "# markitdown_image_llm_model = \"gpt-5-mini\"",
24
26
  "# markitdown_image_llm_provider = \"openai\"",
@@ -237,12 +237,34 @@ def _matches_globs(path: str, globs: list[str]) -> bool:
237
237
  return False
238
238
 
239
239
 
240
- def load_files(
240
+ def collect_candidates(
241
241
  paths: Iterable[str],
242
242
  cwd: Path,
243
243
  recursive: bool = True,
244
244
  include_globs: list[str] | None = None,
245
245
  type_exts: set[str] | None = None,
246
+ ) -> list[Path]:
247
+ files = collect_files(paths, recursive=recursive)
248
+ candidates: list[Path] = []
249
+ for fp in files:
250
+ try:
251
+ key = fp.relative_to(cwd).as_posix()
252
+ except ValueError:
253
+ key = fp.as_posix()
254
+
255
+ if include_globs and not _matches_globs(key, include_globs):
256
+ continue
257
+
258
+ if type_exts and fp.suffix.lower() not in type_exts:
259
+ continue
260
+
261
+ candidates.append(fp)
262
+ return candidates
263
+
264
+
265
+ def load_files(
266
+ candidates: Iterable[Path],
267
+ cwd: Path,
246
268
  markitdown: Any | None = None,
247
269
  enable_images: bool = False,
248
270
  enable_audio: bool = False,
@@ -254,20 +276,12 @@ def load_files(
254
276
  image_convert_count = 0
255
277
  audio_convert_count = 0
256
278
 
257
- files = collect_files(paths, recursive=recursive)
258
- for fp in files:
279
+ for fp in candidates:
259
280
  try:
260
281
  key = fp.relative_to(cwd).as_posix()
261
282
  except ValueError:
262
283
  key = fp.as_posix()
263
284
 
264
- if include_globs and not _matches_globs(key, include_globs):
265
- continue
266
-
267
- if type_exts:
268
- if fp.suffix.lower() not in type_exts:
269
- continue
270
-
271
285
  suffix = fp.suffix.lower()
272
286
  if markitdown is not None and not binary_as_text:
273
287
  if enable_images and suffix in IMAGE_EXTS:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlmgrep
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Grep-shaped CLI search powered by DSPy RLM
5
5
  Author: rlmgrep
6
6
  License: MIT
@@ -17,7 +17,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
17
17
  ## Quickstart
18
18
 
19
19
  ```sh
20
- uv tool install --python 3.11 .
20
+ uv tool install --python 3.11 rlmgrep
21
21
  # or from GitHub:
22
22
  # uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
23
23
 
@@ -71,6 +71,7 @@ Common options:
71
71
  - `--type T` include file types (repeatable, comma-separated)
72
72
  - `--no-recursive` do not recurse directories
73
73
  - `-a`, `--text` treat binary files as text
74
+ - `-y`, `--yes` skip file count confirmation
74
75
  - `--model`, `--sub-model` override model names
75
76
  - `--api-key`, `--api-base`, `--model-type` override provider settings
76
77
  - `--max-iterations`, `--max-llm-calls` cap RLM search effort
@@ -99,6 +100,7 @@ cat README.md | rlmgrep "install"
99
100
  - `-g/--glob` matches path globs against normalized paths (forward slashes).
100
101
  - Paths are printed relative to the current working directory when possible.
101
102
  - If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
103
+ - rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
102
104
 
103
105
  ## Output contract (stable for agents)
104
106
 
@@ -133,6 +135,8 @@ temperature = 1.0
133
135
  max_tokens = 64000
134
136
  max_iterations = 10
135
137
  max_llm_calls = 20
138
+ file_warn_threshold = 200
139
+ file_hard_max = 1000
136
140
  # markitdown_enable_images = false
137
141
  # markitdown_image_llm_model = "gpt-5-mini"
138
142
  # markitdown_image_llm_provider = "openai"
@@ -168,10 +172,8 @@ If more than one provider key is set and the model does not make the provider ob
168
172
 
169
173
  - Prefer narrow corpora (globs/types) to reduce token usage.
170
174
  - Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
171
- - Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
172
175
  - For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
173
- - RLM results are verified against real file lines; invalid or duplicate matches are dropped and reported.
174
-
176
+
175
177
  ## Development
176
178
 
177
179
  - Install locally: `pip install -e .` or `uv tool install .`
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes