rlmgrep 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlmgrep/__init__.py +1 -1
- rlmgrep/cli.py +25 -1
- rlmgrep/ingest.py +90 -1
- {rlmgrep-0.1.10.dist-info → rlmgrep-0.1.12.dist-info}/METADATA +18 -4
- rlmgrep-0.1.12.dist-info/RECORD +14 -0
- rlmgrep-0.1.10.dist-info/RECORD +0 -14
- {rlmgrep-0.1.10.dist-info → rlmgrep-0.1.12.dist-info}/WHEEL +0 -0
- {rlmgrep-0.1.10.dist-info → rlmgrep-0.1.12.dist-info}/entry_points.txt +0 -0
- {rlmgrep-0.1.10.dist-info → rlmgrep-0.1.12.dist-info}/top_level.txt +0 -0
rlmgrep/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
__all__ = ["__version__"]
|
|
2
|
-
__version__ = "0.1.
|
|
2
|
+
__version__ = "0.1.12"
|
rlmgrep/cli.py
CHANGED
|
@@ -9,7 +9,13 @@ import dspy
|
|
|
9
9
|
from . import __version__
|
|
10
10
|
from .config import ensure_default_config, load_config
|
|
11
11
|
from .file_map import build_file_map
|
|
12
|
-
from .ingest import
|
|
12
|
+
from .ingest import (
|
|
13
|
+
FileRecord,
|
|
14
|
+
build_gitignore_spec,
|
|
15
|
+
collect_candidates,
|
|
16
|
+
load_files,
|
|
17
|
+
resolve_type_exts,
|
|
18
|
+
)
|
|
13
19
|
from .rlm import Match, build_lm, run_rlm
|
|
14
20
|
from .render import render_matches
|
|
15
21
|
|
|
@@ -81,6 +87,8 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
|
|
|
81
87
|
parser.add_argument("-B", dest="before", type=int, default=None, help="Context lines before")
|
|
82
88
|
parser.add_argument("-m", dest="max_count", type=int, default=None, help="Max matching lines per file")
|
|
83
89
|
parser.add_argument("-a", "--text", dest="binary_as_text", action="store_true", help="Search binary files as text")
|
|
90
|
+
parser.add_argument("--hidden", action="store_true", help="Include hidden files and directories")
|
|
91
|
+
parser.add_argument("--no-ignore", dest="no_ignore", action="store_true", help="Do not respect .gitignore")
|
|
84
92
|
parser.add_argument("--answer", action="store_true", help="Print a narrative answer before grep output")
|
|
85
93
|
parser.add_argument("-y", "--yes", action="store_true", help="Skip file count confirmation")
|
|
86
94
|
parser.add_argument(
|
|
@@ -139,6 +147,13 @@ def _pick(cli_value, config: dict, key: str, default=None):
|
|
|
139
147
|
return default
|
|
140
148
|
|
|
141
149
|
|
|
150
|
+
def _find_git_root(start: Path) -> Path | None:
|
|
151
|
+
for p in [start, *start.parents]:
|
|
152
|
+
if (p / ".git").is_dir():
|
|
153
|
+
return p
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
|
|
142
157
|
def _env_value(name: str) -> str | None:
|
|
143
158
|
val = os.getenv(name)
|
|
144
159
|
if val is None:
|
|
@@ -424,12 +439,21 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
424
439
|
if hard_max is not None and hard_max <= 0:
|
|
425
440
|
hard_max = None
|
|
426
441
|
|
|
442
|
+
ignore_spec = None
|
|
443
|
+
ignore_root = None
|
|
444
|
+
if not args.no_ignore:
|
|
445
|
+
ignore_root = _find_git_root(cwd) or cwd
|
|
446
|
+
ignore_spec = build_gitignore_spec(ignore_root)
|
|
447
|
+
|
|
427
448
|
candidates = collect_candidates(
|
|
428
449
|
input_paths,
|
|
429
450
|
cwd=cwd,
|
|
430
451
|
recursive=args.recursive,
|
|
431
452
|
include_globs=globs,
|
|
432
453
|
type_exts=type_exts,
|
|
454
|
+
include_hidden=args.hidden,
|
|
455
|
+
ignore_spec=ignore_spec,
|
|
456
|
+
ignore_root=ignore_root,
|
|
433
457
|
)
|
|
434
458
|
candidate_count = len(candidates)
|
|
435
459
|
if hard_max is not None and candidate_count > hard_max:
|
rlmgrep/ingest.py
CHANGED
|
@@ -2,8 +2,14 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from fnmatch import fnmatch
|
|
5
|
+
import os
|
|
5
6
|
from pathlib import Path, PurePosixPath
|
|
6
|
-
from typing import
|
|
7
|
+
from typing import Any, Callable, Iterable
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pathspec
|
|
11
|
+
except Exception: # pragma: no cover - optional at import time
|
|
12
|
+
pathspec = None
|
|
7
13
|
|
|
8
14
|
from pypdf import PdfReader
|
|
9
15
|
|
|
@@ -161,6 +167,64 @@ def collect_files(paths: Iterable[str], recursive: bool = True) -> list[Path]:
|
|
|
161
167
|
return files
|
|
162
168
|
|
|
163
169
|
|
|
170
|
+
def build_gitignore_spec(root: Path) -> "pathspec.PathSpec | None":
|
|
171
|
+
if pathspec is None:
|
|
172
|
+
return None
|
|
173
|
+
root = root.resolve()
|
|
174
|
+
gitignore_paths: list[Path] = []
|
|
175
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
176
|
+
if ".git" in dirnames:
|
|
177
|
+
dirnames.remove(".git")
|
|
178
|
+
if ".gitignore" in filenames:
|
|
179
|
+
gitignore_paths.append(Path(dirpath) / ".gitignore")
|
|
180
|
+
|
|
181
|
+
if not gitignore_paths:
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
def _sort_key(p: Path) -> tuple[int, str]:
|
|
185
|
+
try:
|
|
186
|
+
rel = p.parent.relative_to(root)
|
|
187
|
+
depth = len(rel.parts)
|
|
188
|
+
return depth, rel.as_posix()
|
|
189
|
+
except ValueError:
|
|
190
|
+
return 0, p.as_posix()
|
|
191
|
+
|
|
192
|
+
gitignore_paths.sort(key=_sort_key)
|
|
193
|
+
|
|
194
|
+
patterns: list[str] = []
|
|
195
|
+
for gi in gitignore_paths:
|
|
196
|
+
try:
|
|
197
|
+
rel_dir = gi.parent.relative_to(root).as_posix()
|
|
198
|
+
except ValueError:
|
|
199
|
+
rel_dir = ""
|
|
200
|
+
try:
|
|
201
|
+
raw_lines = gi.read_text(encoding="utf-8", errors="ignore").splitlines()
|
|
202
|
+
except Exception:
|
|
203
|
+
continue
|
|
204
|
+
for raw in raw_lines:
|
|
205
|
+
line = raw.rstrip("\n")
|
|
206
|
+
if not line:
|
|
207
|
+
continue
|
|
208
|
+
if line.startswith("\\#") or line.startswith("\\!"):
|
|
209
|
+
line = line[1:]
|
|
210
|
+
elif line.startswith("#"):
|
|
211
|
+
continue
|
|
212
|
+
negated = line.startswith("!")
|
|
213
|
+
if negated:
|
|
214
|
+
line = line[1:]
|
|
215
|
+
if line.startswith("/"):
|
|
216
|
+
line = line[1:]
|
|
217
|
+
if rel_dir:
|
|
218
|
+
line = f"{rel_dir}/{line}"
|
|
219
|
+
if negated:
|
|
220
|
+
line = "!" + line
|
|
221
|
+
patterns.append(line)
|
|
222
|
+
|
|
223
|
+
if not patterns:
|
|
224
|
+
return None
|
|
225
|
+
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
226
|
+
|
|
227
|
+
|
|
164
228
|
TYPE_EXTS = {
|
|
165
229
|
"bash": {".bash"},
|
|
166
230
|
"c": {".c", ".h"},
|
|
@@ -237,21 +301,46 @@ def _matches_globs(path: str, globs: list[str]) -> bool:
|
|
|
237
301
|
return False
|
|
238
302
|
|
|
239
303
|
|
|
304
|
+
def _is_hidden_path(path: Path) -> bool:
|
|
305
|
+
return any(part.startswith(".") for part in path.parts if part)
|
|
306
|
+
|
|
307
|
+
|
|
240
308
|
def collect_candidates(
|
|
241
309
|
paths: Iterable[str],
|
|
242
310
|
cwd: Path,
|
|
243
311
|
recursive: bool = True,
|
|
244
312
|
include_globs: list[str] | None = None,
|
|
245
313
|
type_exts: set[str] | None = None,
|
|
314
|
+
include_hidden: bool = False,
|
|
315
|
+
ignore_spec: "pathspec.PathSpec | None" = None,
|
|
316
|
+
ignore_root: Path | None = None,
|
|
246
317
|
) -> list[Path]:
|
|
247
318
|
files = collect_files(paths, recursive=recursive)
|
|
319
|
+
explicit_files: set[Path] = set()
|
|
320
|
+
for raw in paths:
|
|
321
|
+
p = Path(raw)
|
|
322
|
+
if p.exists() and p.is_file():
|
|
323
|
+
explicit_files.add(p.resolve())
|
|
248
324
|
candidates: list[Path] = []
|
|
249
325
|
for fp in files:
|
|
326
|
+
fp_resolved = fp.resolve()
|
|
327
|
+
is_explicit = fp_resolved in explicit_files
|
|
328
|
+
if not include_hidden and not is_explicit and _is_hidden_path(fp):
|
|
329
|
+
continue
|
|
330
|
+
|
|
250
331
|
try:
|
|
251
332
|
key = fp.relative_to(cwd).as_posix()
|
|
252
333
|
except ValueError:
|
|
253
334
|
key = fp.as_posix()
|
|
254
335
|
|
|
336
|
+
if ignore_spec is not None and ignore_root is not None and not is_explicit:
|
|
337
|
+
try:
|
|
338
|
+
rel = fp.relative_to(ignore_root).as_posix()
|
|
339
|
+
except ValueError:
|
|
340
|
+
rel = None
|
|
341
|
+
if rel and ignore_spec.match_file(rel):
|
|
342
|
+
continue
|
|
343
|
+
|
|
255
344
|
if include_globs and not _matches_globs(key, include_globs):
|
|
256
345
|
continue
|
|
257
346
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlmgrep
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: Grep-shaped CLI search powered by DSPy RLM
|
|
5
5
|
Author: rlmgrep
|
|
6
6
|
License: MIT
|
|
@@ -8,6 +8,7 @@ Requires-Python: >=3.11
|
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: dspy>=3.1.1
|
|
10
10
|
Requires-Dist: markitdown[all]>=0.1.4
|
|
11
|
+
Requires-Dist: pathspec>=0.12.1
|
|
11
12
|
Requires-Dist: pypdf>=4.0.0
|
|
12
13
|
|
|
13
14
|
# rlmgrep
|
|
@@ -22,10 +23,20 @@ uv tool install rlmgrep
|
|
|
22
23
|
# uv tool install git+https://github.com/halfprice06/rlmgrep.git
|
|
23
24
|
|
|
24
25
|
export OPENAI_API_KEY=... # or set keys in ~/.rlmgrep
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```sh
|
|
25
29
|
rlmgrep --answer "What does this repo do and where are the entry points?" .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+

|
|
33
|
+
|
|
34
|
+
```sh
|
|
26
35
|
rlmgrep -C 2 "Where is retry/backoff configured and what are the defaults?" .
|
|
27
36
|
```
|
|
28
37
|
|
|
38
|
+

|
|
39
|
+
|
|
29
40
|
## Requirements
|
|
30
41
|
|
|
31
42
|
- Python 3.11+
|
|
@@ -39,8 +50,8 @@ One of rlmgrep’s most useful features is that it can “grep” **PDFs and Off
|
|
|
39
50
|
How it works:
|
|
40
51
|
- **PDFs** are parsed with `pypdf`. Each page gets a marker line like `===== Page N =====`, and output lines include a `page=N` suffix. Line numbers refer to the extracted text (not PDF coordinates).
|
|
41
52
|
- **Office & binary docs** (`.docx`, `.pptx`, `.xlsx`, `.html`, `.zip`, etc.) are converted to Markdown via **MarkItDown**. This happens during ingestion, so rlmgrep can search them like any other text file.
|
|
42
|
-
- **Images** can be described by a vision model through MarkItDown (OpenAI/Anthropic/Gemini).
|
|
43
|
-
- **Audio** transcription is supported through OpenAI when enabled.
|
|
53
|
+
- **Images** can be described by a vision model and then searched through MarkItDown (OpenAI/Anthropic/Gemini), enable and configure in config.toml.
|
|
54
|
+
- **Audio** transcription is supported through OpenAI when enabled, configure in config.toml.
|
|
44
55
|
|
|
45
56
|
Sidecar caching:
|
|
46
57
|
- For images/audio, converted text is cached next to the original file as `<original>.<ext>.md` and reused on later runs.
|
|
@@ -48,7 +59,7 @@ Sidecar caching:
|
|
|
48
59
|
|
|
49
60
|
## Install Deno
|
|
50
61
|
|
|
51
|
-
DSPy requires the Deno runtime. Install it with the official scripts:
|
|
62
|
+
DSPy's default implementation of RLM requires the Deno runtime. Install it with the official scripts:
|
|
52
63
|
|
|
53
64
|
macOS/Linux:
|
|
54
65
|
|
|
@@ -83,6 +94,8 @@ Common options:
|
|
|
83
94
|
- `-m N` max matching lines per file
|
|
84
95
|
- `-g GLOB` include files matching glob (repeatable, comma-separated)
|
|
85
96
|
- `--type T` include file types (repeatable, comma-separated)
|
|
97
|
+
- `--hidden` include hidden files and directories
|
|
98
|
+
- `--no-ignore` do not respect `.gitignore`
|
|
86
99
|
- `--no-recursive` do not recurse directories
|
|
87
100
|
- `-a`, `--text` treat binary files as text
|
|
88
101
|
- `-y`, `--yes` skip file count confirmation
|
|
@@ -115,6 +128,7 @@ rg -l "token" . | rlmgrep --files-from-stdin --answer "What does this token cont
|
|
|
115
128
|
## Input selection
|
|
116
129
|
|
|
117
130
|
- Directories are searched recursively by default. Use `--no-recursive` to stop recursion.
|
|
131
|
+
- Hidden files and `.gitignore` rules are respected by default. Use `--hidden` or `--no-ignore` to include them.
|
|
118
132
|
- `--type` uses built-in type mappings (e.g., `py`, `js`, `md`); unknown values are treated as file extensions.
|
|
119
133
|
- `-g/--glob` matches path globs against normalized paths (forward slashes).
|
|
120
134
|
- Paths are printed relative to the current working directory when possible.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
rlmgrep/__init__.py,sha256=Oi10B9QTmGI-1i704NHr63Oy1A8W2kzfQCYB6asuv0w,49
|
|
2
|
+
rlmgrep/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
+
rlmgrep/cli.py,sha256=sUtcvf-3U1fpUdFYmEE9j75xSgBAx9Uv3XY1lobHURk,21150
|
|
4
|
+
rlmgrep/config.py,sha256=u1iz-nI8dj-dZETbpIki3RQefHJEyi5oE5zE4_IR8kg,2399
|
|
5
|
+
rlmgrep/file_map.py,sha256=x2Ri1wzK8_87GUorsAV01K_nYLZcv30yIquDeTCcdEw,876
|
|
6
|
+
rlmgrep/ingest.py,sha256=St-MaKUBeAUWwEKtjfNej8Wt9aVhg9-VYEm_GDOV7uU,11909
|
|
7
|
+
rlmgrep/interpreter.py,sha256=s_nMRxLlAU9C0JmUzUBW5NbVbuH67doVWF54K54STlA,2478
|
|
8
|
+
rlmgrep/render.py,sha256=mCTT6yuKNv7HJ46LzOyLkCbyBedCWSNd7UeubyLXcyM,3356
|
|
9
|
+
rlmgrep/rlm.py,sha256=i3rCTp8OABByF60Un5gO7265gaW4spwU0OFKIz4surg,5750
|
|
10
|
+
rlmgrep-0.1.12.dist-info/METADATA,sha256=UsbAYXqsGnSUfqdOypf4D4LTbgCaEuwHTCwp5MNMAxU,7849
|
|
11
|
+
rlmgrep-0.1.12.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
+
rlmgrep-0.1.12.dist-info/entry_points.txt,sha256=UV6QkEbkwBO1JJ53mm84_n35tVyOczPvOQ14ga7vrCI,45
|
|
13
|
+
rlmgrep-0.1.12.dist-info/top_level.txt,sha256=gTujSRsO58c80eN7aRH2cfe51FHxx8LJ1w1Y2YlHti0,8
|
|
14
|
+
rlmgrep-0.1.12.dist-info/RECORD,,
|
rlmgrep-0.1.10.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
rlmgrep/__init__.py,sha256=EFWL7tqAdmDdGhDfl3jtag-Ds4vHOCNDFdvMC2yYodU,49
|
|
2
|
-
rlmgrep/__main__.py,sha256=MHKZ_ae3fSLGTLUUMOx15fWdeOnJSHhq-zslRP5F5Lc,79
|
|
3
|
-
rlmgrep/cli.py,sha256=Jn7knAQq3Bnb578QK33RxDZ102yFrVSbNFjUKBGkb1o,20417
|
|
4
|
-
rlmgrep/config.py,sha256=u1iz-nI8dj-dZETbpIki3RQefHJEyi5oE5zE4_IR8kg,2399
|
|
5
|
-
rlmgrep/file_map.py,sha256=x2Ri1wzK8_87GUorsAV01K_nYLZcv30yIquDeTCcdEw,876
|
|
6
|
-
rlmgrep/ingest.py,sha256=uCz2el9B-RIT9umFo-gFEdAsmWPP1IJOArFFQY0D_1A,9127
|
|
7
|
-
rlmgrep/interpreter.py,sha256=s_nMRxLlAU9C0JmUzUBW5NbVbuH67doVWF54K54STlA,2478
|
|
8
|
-
rlmgrep/render.py,sha256=mCTT6yuKNv7HJ46LzOyLkCbyBedCWSNd7UeubyLXcyM,3356
|
|
9
|
-
rlmgrep/rlm.py,sha256=i3rCTp8OABByF60Un5gO7265gaW4spwU0OFKIz4surg,5750
|
|
10
|
-
rlmgrep-0.1.10.dist-info/METADATA,sha256=u4EfxtRfTmzvuu4jD5lBsXZQIHjElMZs9iZe79d4Peg,7349
|
|
11
|
-
rlmgrep-0.1.10.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
12
|
-
rlmgrep-0.1.10.dist-info/entry_points.txt,sha256=UV6QkEbkwBO1JJ53mm84_n35tVyOczPvOQ14ga7vrCI,45
|
|
13
|
-
rlmgrep-0.1.10.dist-info/top_level.txt,sha256=gTujSRsO58c80eN7aRH2cfe51FHxx8LJ1w1Y2YlHti0,8
|
|
14
|
-
rlmgrep-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|