PyPI - rlmgrep - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

rlmgrep 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rlmgrep
-Version: 0.1.0
+Version: 0.1.1
 Summary: Grep-shaped CLI search powered by DSPy RLM
 Author: rlmgrep
 License: MIT
@@ -17,7 +17,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
 ## Quickstart
 ```sh
-uv tool install --python 3.11 .
+uv tool install --python 3.11 rlmgrep
 # or from GitHub:
 # uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
@@ -71,6 +71,7 @@ Common options:
 - `--type T` include file types (repeatable, comma-separated)
 - `--no-recursive` do not recurse directories
 - `-a`, `--text` treat binary files as text
+- `-y`, `--yes` skip file count confirmation
 - `--model`, `--sub-model` override model names
 - `--api-key`, `--api-base`, `--model-type` override provider settings
 - `--max-iterations`, `--max-llm-calls` cap RLM search effort
@@ -99,6 +100,7 @@ cat README.md | rlmgrep "install"
 - `-g/--glob` matches path globs against normalized paths (forward slashes).
 - Paths are printed relative to the current working directory when possible.
 - If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
+- rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
 ## Output contract (stable for agents)
@@ -133,6 +135,8 @@ temperature = 1.0
 max_tokens = 64000
 max_iterations = 10
 max_llm_calls = 20
+file_warn_threshold = 200
+file_hard_max = 1000
 # markitdown_enable_images = false
 # markitdown_image_llm_model = "gpt-5-mini"
 # markitdown_image_llm_provider = "openai"
@@ -168,10 +172,8 @@ If more than one provider key is set and the model does not make the provider ob
 - Prefer narrow corpora (globs/types) to reduce token usage.
 - Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
-- Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
 - For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
-- RLM results are verified against real file lines; invalid or duplicate matches are dropped and reported.
 ## Development
 - Install locally: `pip install -e .` or `uv tool install .`

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/README.md RENAMED Viewed

@@ -5,7 +5,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
 ## Quickstart
 ```sh
-uv tool install --python 3.11 .
+uv tool install --python 3.11 rlmgrep
 # or from GitHub:
 # uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
@@ -59,6 +59,7 @@ Common options:
 - `--type T` include file types (repeatable, comma-separated)
 - `--no-recursive` do not recurse directories
 - `-a`, `--text` treat binary files as text
+- `-y`, `--yes` skip file count confirmation
 - `--model`, `--sub-model` override model names
 - `--api-key`, `--api-base`, `--model-type` override provider settings
 - `--max-iterations`, `--max-llm-calls` cap RLM search effort
@@ -87,6 +88,7 @@ cat README.md | rlmgrep "install"
 - `-g/--glob` matches path globs against normalized paths (forward slashes).
 - Paths are printed relative to the current working directory when possible.
 - If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
+- rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
 ## Output contract (stable for agents)
@@ -121,6 +123,8 @@ temperature = 1.0
 max_tokens = 64000
 max_iterations = 10
 max_llm_calls = 20
+file_warn_threshold = 200
+file_hard_max = 1000
 # markitdown_enable_images = false
 # markitdown_image_llm_model = "gpt-5-mini"
 # markitdown_image_llm_provider = "openai"
@@ -156,10 +160,8 @@ If more than one provider key is set and the model does not make the provider ob
 - Prefer narrow corpora (globs/types) to reduce token usage.
 - Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
-- Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
 - For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
-- RLM results are verified against real file lines; invalid or duplicate matches are dropped and reported.
 ## Development
 - Install locally: `pip install -e .` or `uv tool install .`

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "rlmgrep"
-version = "0.1.0"
+version = "0.1.1"
 description = "Grep-shaped CLI search powered by DSPy RLM"
 readme = "README.md"
 requires-python = ">=3.11"

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/cli.py RENAMED Viewed

@@ -8,7 +8,7 @@ from pathlib import Path
 import dspy
 from .config import ensure_default_config, load_config
 from .file_map import build_file_map
-from .ingest import FileRecord, load_files, resolve_type_exts
+from .ingest import FileRecord, collect_candidates, load_files, resolve_type_exts
 from .rlm import Match, build_lm, run_rlm
 from .render import render_matches
@@ -17,6 +17,23 @@ def _warn(msg: str) -> None:
     print(f"rlmgrep: {msg}", file=sys.stderr)
+def _confirm_over_limit(count: int, threshold: int) -> bool:
+    prompt = (
+        f"rlmgrep: {count} files to load (over {threshold}). Continue? [y/N] "
+    )
+    try:
+        with open("/dev/tty", "r+") as tty:
+            print(prompt, file=tty, end="", flush=True)
+            response = tty.readline()
+    except Exception:
+        if not sys.stdin.isatty():
+            _warn("refusing to prompt for confirmation; use --yes to proceed")
+            return False
+        print(prompt, file=sys.stderr, end="", flush=True)
+        response = sys.stdin.readline()
+    return response.strip().lower() in {"y", "yes"}
 def verify_matches(
     matches: list[Match],
     files: dict[str, FileRecord],
@@ -65,6 +82,7 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
     parser.add_argument("-m", dest="max_count", type=int, default=None, help="Max matching lines per file")
     parser.add_argument("-a", "--text", dest="binary_as_text", action="store_true", help="Search binary files as text")
     parser.add_argument("--answer", action="store_true", help="Print a narrative answer before grep output")
+    parser.add_argument("-y", "--yes", action="store_true", help="Skip file count confirmation")
     parser.add_argument("-g", "--glob", dest="globs", action="append", default=[], help="Include files matching glob (may repeat)")
     parser.add_argument("--type", dest="types", action="append", default=[], help="Include file types (py, js, md, etc.). May repeat")
@@ -328,12 +346,39 @@ def main(argv: list[str] | None = None) -> int:
         }
         warnings: list[str] = []
     else:
-        files, warnings = load_files(
+        warn_threshold = _parse_num(
+            _pick(None, config, "file_warn_threshold", 200), int
+        )
+        hard_max = _parse_num(_pick(None, config, "file_hard_max", 1000), int)
+        if warn_threshold is not None and warn_threshold <= 0:
+            warn_threshold = None
+        if hard_max is not None and hard_max <= 0:
+            hard_max = None
+        candidates = collect_candidates(
             args.paths,
             cwd=cwd,
             recursive=args.recursive,
             include_globs=globs,
             type_exts=type_exts,
+        )
+        candidate_count = len(candidates)
+        if hard_max is not None and candidate_count > hard_max:
+            _warn(
+                f"{candidate_count} files to load (over {hard_max}); aborting"
+            )
+            return 2
+        if (
+            warn_threshold is not None
+            and candidate_count > warn_threshold
+            and not args.yes
+        ):
+            if not _confirm_over_limit(candidate_count, warn_threshold):
+                return 2
+        files, warnings = load_files(
+            candidates,
+            cwd=cwd,
             markitdown=markitdown,
             enable_images=md_enable_images,
             enable_audio=md_enable_audio,

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/config.py RENAMED Viewed

@@ -19,6 +19,8 @@ DEFAULT_CONFIG_TEXT = "\n".join(
         "max_tokens = 64000",
         "max_iterations = 10",
         "max_llm_calls = 20",
+        "file_warn_threshold = 200",
+        "file_hard_max = 1000",
         "# markitdown_enable_images = false",
         "# markitdown_image_llm_model = \"gpt-5-mini\"",
         "# markitdown_image_llm_provider = \"openai\"",

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep/ingest.py RENAMED Viewed

@@ -237,12 +237,34 @@ def _matches_globs(path: str, globs: list[str]) -> bool:
     return False
-def load_files(
+def collect_candidates(
     paths: Iterable[str],
     cwd: Path,
     recursive: bool = True,
     include_globs: list[str] | None = None,
     type_exts: set[str] | None = None,
+) -> list[Path]:
+    files = collect_files(paths, recursive=recursive)
+    candidates: list[Path] = []
+    for fp in files:
+        try:
+            key = fp.relative_to(cwd).as_posix()
+        except ValueError:
+            key = fp.as_posix()
+        if include_globs and not _matches_globs(key, include_globs):
+            continue
+        if type_exts and fp.suffix.lower() not in type_exts:
+            continue
+        candidates.append(fp)
+    return candidates
+def load_files(
+    candidates: Iterable[Path],
+    cwd: Path,
     markitdown: Any | None = None,
     enable_images: bool = False,
     enable_audio: bool = False,
@@ -254,20 +276,12 @@ def load_files(
     image_convert_count = 0
     audio_convert_count = 0
-    files = collect_files(paths, recursive=recursive)
-    for fp in files:
+    for fp in candidates:
         try:
             key = fp.relative_to(cwd).as_posix()
         except ValueError:
             key = fp.as_posix()
-        if include_globs and not _matches_globs(key, include_globs):
-            continue
-        if type_exts:
-            if fp.suffix.lower() not in type_exts:
-                continue
         suffix = fp.suffix.lower()
         if markitdown is not None and not binary_as_text:
             if enable_images and suffix in IMAGE_EXTS:

{rlmgrep-0.1.0 → rlmgrep-0.1.1}/rlmgrep.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rlmgrep
-Version: 0.1.0
+Version: 0.1.1
 Summary: Grep-shaped CLI search powered by DSPy RLM
 Author: rlmgrep
 License: MIT
@@ -17,7 +17,7 @@ Grep-shaped search powered by DSPy RLM. It accepts a natural-language query, sca
 ## Quickstart
 ```sh
-uv tool install --python 3.11 .
+uv tool install --python 3.11 rlmgrep
 # or from GitHub:
 # uv tool install --python 3.11 git+https://github.com/halfprice06/rlmgrep.git
@@ -71,6 +71,7 @@ Common options:
 - `--type T` include file types (repeatable, comma-separated)
 - `--no-recursive` do not recurse directories
 - `-a`, `--text` treat binary files as text
+- `-y`, `--yes` skip file count confirmation
 - `--model`, `--sub-model` override model names
 - `--api-key`, `--api-base`, `--model-type` override provider settings
 - `--max-iterations`, `--max-llm-calls` cap RLM search effort
@@ -99,6 +100,7 @@ cat README.md | rlmgrep "install"
 - `-g/--glob` matches path globs against normalized paths (forward slashes).
 - Paths are printed relative to the current working directory when possible.
 - If no paths are provided, rlmgrep reads from stdin and uses the synthetic path `<stdin>`; if stdin is empty, it exits with code 2.
+- rlmgrep asks for confirmation when more than 200 files would be loaded (use `-y/--yes` to skip), and aborts when more than 1000 files would be loaded.
 ## Output contract (stable for agents)
@@ -133,6 +135,8 @@ temperature = 1.0
 max_tokens = 64000
 max_iterations = 10
 max_llm_calls = 20
+file_warn_threshold = 200
+file_hard_max = 1000
 # markitdown_enable_images = false
 # markitdown_image_llm_model = "gpt-5-mini"
 # markitdown_image_llm_provider = "openai"
@@ -168,10 +172,8 @@ If more than one provider key is set and the model does not make the provider ob
 - Prefer narrow corpora (globs/types) to reduce token usage.
 - Use `--max-llm-calls` to cap costs; combine with small `--max-iterations` for safety.
-- Always read stderr for warnings (skipped files, config issues, ambiguous API keys).
 - For reproducible parsing, use `-n -H` and avoid context (`-C/-A/-B`).
-- RLM results are verified against real file lines; invalid or duplicate matches are dropped and reported.
 ## Development
 - Install locally: `pip install -e .` or `uv tool install .`