rlmgrep 0.1.11__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlmgrep
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: Grep-shaped CLI search powered by DSPy RLM
5
5
  Author: rlmgrep
6
6
  License: MIT
@@ -8,6 +8,7 @@ Requires-Python: >=3.11
8
8
  Description-Content-Type: text/markdown
9
9
  Requires-Dist: dspy>=3.1.1
10
10
  Requires-Dist: markitdown[all]>=0.1.4
11
+ Requires-Dist: pathspec>=0.12.1
11
12
  Requires-Dist: pypdf>=4.0.0
12
13
 
13
14
  # rlmgrep
@@ -93,6 +94,8 @@ Common options:
93
94
  - `-m N` max matching lines per file
94
95
  - `-g GLOB` include files matching glob (repeatable, comma-separated)
95
96
  - `--type T` include file types (repeatable, comma-separated)
97
+ - `--hidden` include hidden files and directories
98
+ - `--no-ignore` do not respect `.gitignore`
96
99
  - `--no-recursive` do not recurse directories
97
100
  - `-a`, `--text` treat binary files as text
98
101
  - `-y`, `--yes` skip file count confirmation
@@ -125,6 +128,7 @@ rg -l "token" . | rlmgrep --files-from-stdin --answer "What does this token cont
125
128
  ## Input selection
126
129
 
127
130
  - Directories are searched recursively by default. Use `--no-recursive` to stop recursion.
131
+ - Hidden files and `.gitignore` rules are respected by default. Use `--hidden` or `--no-ignore` to include them.
128
132
  - `--type` uses built-in type mappings (e.g., `py`, `js`, `md`); unknown values are treated as file extensions.
129
133
  - `-g/--glob` matches path globs against normalized paths (forward slashes).
130
134
  - Paths are printed relative to the current working directory when possible.
@@ -81,6 +81,8 @@ Common options:
81
81
  - `-m N` max matching lines per file
82
82
  - `-g GLOB` include files matching glob (repeatable, comma-separated)
83
83
  - `--type T` include file types (repeatable, comma-separated)
84
+ - `--hidden` include hidden files and directories
85
+ - `--no-ignore` do not respect `.gitignore`
84
86
  - `--no-recursive` do not recurse directories
85
87
  - `-a`, `--text` treat binary files as text
86
88
  - `-y`, `--yes` skip file count confirmation
@@ -113,6 +115,7 @@ rg -l "token" . | rlmgrep --files-from-stdin --answer "What does this token cont
113
115
  ## Input selection
114
116
 
115
117
  - Directories are searched recursively by default. Use `--no-recursive` to stop recursion.
118
+ - Hidden files and `.gitignore` rules are respected by default. Use `--hidden` or `--no-ignore` to include them.
116
119
  - `--type` uses built-in type mappings (e.g., `py`, `js`, `md`); unknown values are treated as file extensions.
117
120
  - `-g/--glob` matches path globs against normalized paths (forward slashes).
118
121
  - Paths are printed relative to the current working directory when possible.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "rlmgrep"
3
- version = "0.1.11"
3
+ version = "0.1.12"
4
4
  description = "Grep-shaped CLI search powered by DSPy RLM"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -9,6 +9,7 @@ license = { text = "MIT" }
9
9
  dependencies = [
10
10
  "dspy>=3.1.1",
11
11
  "markitdown[all]>=0.1.4",
12
+ "pathspec>=0.12.1",
12
13
  "pypdf>=4.0.0",
13
14
  ]
14
15
 
@@ -1,2 +1,2 @@
1
1
  __all__ = ["__version__"]
2
- __version__ = "0.1.11"
2
+ __version__ = "0.1.12"
@@ -9,7 +9,13 @@ import dspy
9
9
  from . import __version__
10
10
  from .config import ensure_default_config, load_config
11
11
  from .file_map import build_file_map
12
- from .ingest import FileRecord, collect_candidates, load_files, resolve_type_exts
12
+ from .ingest import (
13
+ FileRecord,
14
+ build_gitignore_spec,
15
+ collect_candidates,
16
+ load_files,
17
+ resolve_type_exts,
18
+ )
13
19
  from .rlm import Match, build_lm, run_rlm
14
20
  from .render import render_matches
15
21
 
@@ -81,6 +87,8 @@ def _parse_args(argv: list[str]) -> argparse.Namespace:
81
87
  parser.add_argument("-B", dest="before", type=int, default=None, help="Context lines before")
82
88
  parser.add_argument("-m", dest="max_count", type=int, default=None, help="Max matching lines per file")
83
89
  parser.add_argument("-a", "--text", dest="binary_as_text", action="store_true", help="Search binary files as text")
90
+ parser.add_argument("--hidden", action="store_true", help="Include hidden files and directories")
91
+ parser.add_argument("--no-ignore", dest="no_ignore", action="store_true", help="Do not respect .gitignore")
84
92
  parser.add_argument("--answer", action="store_true", help="Print a narrative answer before grep output")
85
93
  parser.add_argument("-y", "--yes", action="store_true", help="Skip file count confirmation")
86
94
  parser.add_argument(
@@ -139,6 +147,13 @@ def _pick(cli_value, config: dict, key: str, default=None):
139
147
  return default
140
148
 
141
149
 
150
+ def _find_git_root(start: Path) -> Path | None:
151
+ for p in [start, *start.parents]:
152
+ if (p / ".git").is_dir():
153
+ return p
154
+ return None
155
+
156
+
142
157
  def _env_value(name: str) -> str | None:
143
158
  val = os.getenv(name)
144
159
  if val is None:
@@ -424,12 +439,21 @@ def main(argv: list[str] | None = None) -> int:
424
439
  if hard_max is not None and hard_max <= 0:
425
440
  hard_max = None
426
441
 
442
+ ignore_spec = None
443
+ ignore_root = None
444
+ if not args.no_ignore:
445
+ ignore_root = _find_git_root(cwd) or cwd
446
+ ignore_spec = build_gitignore_spec(ignore_root)
447
+
427
448
  candidates = collect_candidates(
428
449
  input_paths,
429
450
  cwd=cwd,
430
451
  recursive=args.recursive,
431
452
  include_globs=globs,
432
453
  type_exts=type_exts,
454
+ include_hidden=args.hidden,
455
+ ignore_spec=ignore_spec,
456
+ ignore_root=ignore_root,
433
457
  )
434
458
  candidate_count = len(candidates)
435
459
  if hard_max is not None and candidate_count > hard_max:
@@ -2,8 +2,14 @@ from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass
4
4
  from fnmatch import fnmatch
5
+ import os
5
6
  from pathlib import Path, PurePosixPath
6
- from typing import Iterable, Any, Callable
7
+ from typing import Any, Callable, Iterable
8
+
9
+ try:
10
+ import pathspec
11
+ except Exception: # pragma: no cover - optional at import time
12
+ pathspec = None
7
13
 
8
14
  from pypdf import PdfReader
9
15
 
@@ -161,6 +167,64 @@ def collect_files(paths: Iterable[str], recursive: bool = True) -> list[Path]:
161
167
  return files
162
168
 
163
169
 
170
+ def build_gitignore_spec(root: Path) -> "pathspec.PathSpec | None":
171
+ if pathspec is None:
172
+ return None
173
+ root = root.resolve()
174
+ gitignore_paths: list[Path] = []
175
+ for dirpath, dirnames, filenames in os.walk(root):
176
+ if ".git" in dirnames:
177
+ dirnames.remove(".git")
178
+ if ".gitignore" in filenames:
179
+ gitignore_paths.append(Path(dirpath) / ".gitignore")
180
+
181
+ if not gitignore_paths:
182
+ return None
183
+
184
+ def _sort_key(p: Path) -> tuple[int, str]:
185
+ try:
186
+ rel = p.parent.relative_to(root)
187
+ depth = len(rel.parts)
188
+ return depth, rel.as_posix()
189
+ except ValueError:
190
+ return 0, p.as_posix()
191
+
192
+ gitignore_paths.sort(key=_sort_key)
193
+
194
+ patterns: list[str] = []
195
+ for gi in gitignore_paths:
196
+ try:
197
+ rel_dir = gi.parent.relative_to(root).as_posix()
198
+ except ValueError:
199
+ rel_dir = ""
200
+ try:
201
+ raw_lines = gi.read_text(encoding="utf-8", errors="ignore").splitlines()
202
+ except Exception:
203
+ continue
204
+ for raw in raw_lines:
205
+ line = raw.rstrip("\n")
206
+ if not line:
207
+ continue
208
+ if line.startswith("\\#") or line.startswith("\\!"):
209
+ line = line[1:]
210
+ elif line.startswith("#"):
211
+ continue
212
+ negated = line.startswith("!")
213
+ if negated:
214
+ line = line[1:]
215
+ if line.startswith("/"):
216
+ line = line[1:]
217
+ if rel_dir:
218
+ line = f"{rel_dir}/{line}"
219
+ if negated:
220
+ line = "!" + line
221
+ patterns.append(line)
222
+
223
+ if not patterns:
224
+ return None
225
+ return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
226
+
227
+
164
228
  TYPE_EXTS = {
165
229
  "bash": {".bash"},
166
230
  "c": {".c", ".h"},
@@ -237,21 +301,46 @@ def _matches_globs(path: str, globs: list[str]) -> bool:
237
301
  return False
238
302
 
239
303
 
304
+ def _is_hidden_path(path: Path) -> bool:
305
+ return any(part.startswith(".") for part in path.parts if part)
306
+
307
+
240
308
  def collect_candidates(
241
309
  paths: Iterable[str],
242
310
  cwd: Path,
243
311
  recursive: bool = True,
244
312
  include_globs: list[str] | None = None,
245
313
  type_exts: set[str] | None = None,
314
+ include_hidden: bool = False,
315
+ ignore_spec: "pathspec.PathSpec | None" = None,
316
+ ignore_root: Path | None = None,
246
317
  ) -> list[Path]:
247
318
  files = collect_files(paths, recursive=recursive)
319
+ explicit_files: set[Path] = set()
320
+ for raw in paths:
321
+ p = Path(raw)
322
+ if p.exists() and p.is_file():
323
+ explicit_files.add(p.resolve())
248
324
  candidates: list[Path] = []
249
325
  for fp in files:
326
+ fp_resolved = fp.resolve()
327
+ is_explicit = fp_resolved in explicit_files
328
+ if not include_hidden and not is_explicit and _is_hidden_path(fp):
329
+ continue
330
+
250
331
  try:
251
332
  key = fp.relative_to(cwd).as_posix()
252
333
  except ValueError:
253
334
  key = fp.as_posix()
254
335
 
336
+ if ignore_spec is not None and ignore_root is not None and not is_explicit:
337
+ try:
338
+ rel = fp.relative_to(ignore_root).as_posix()
339
+ except ValueError:
340
+ rel = None
341
+ if rel and ignore_spec.match_file(rel):
342
+ continue
343
+
255
344
  if include_globs and not _matches_globs(key, include_globs):
256
345
  continue
257
346
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlmgrep
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: Grep-shaped CLI search powered by DSPy RLM
5
5
  Author: rlmgrep
6
6
  License: MIT
@@ -8,6 +8,7 @@ Requires-Python: >=3.11
8
8
  Description-Content-Type: text/markdown
9
9
  Requires-Dist: dspy>=3.1.1
10
10
  Requires-Dist: markitdown[all]>=0.1.4
11
+ Requires-Dist: pathspec>=0.12.1
11
12
  Requires-Dist: pypdf>=4.0.0
12
13
 
13
14
  # rlmgrep
@@ -93,6 +94,8 @@ Common options:
93
94
  - `-m N` max matching lines per file
94
95
  - `-g GLOB` include files matching glob (repeatable, comma-separated)
95
96
  - `--type T` include file types (repeatable, comma-separated)
97
+ - `--hidden` include hidden files and directories
98
+ - `--no-ignore` do not respect `.gitignore`
96
99
  - `--no-recursive` do not recurse directories
97
100
  - `-a`, `--text` treat binary files as text
98
101
  - `-y`, `--yes` skip file count confirmation
@@ -125,6 +128,7 @@ rg -l "token" . | rlmgrep --files-from-stdin --answer "What does this token cont
125
128
  ## Input selection
126
129
 
127
130
  - Directories are searched recursively by default. Use `--no-recursive` to stop recursion.
131
+ - Hidden files and `.gitignore` rules are respected by default. Use `--hidden` or `--no-ignore` to include them.
128
132
  - `--type` uses built-in type mappings (e.g., `py`, `js`, `md`); unknown values are treated as file extensions.
129
133
  - `-g/--glob` matches path globs against normalized paths (forward slashes).
130
134
  - Paths are printed relative to the current working directory when possible.
@@ -1,3 +1,4 @@
1
1
  dspy>=3.1.1
2
2
  markitdown[all]>=0.1.4
3
+ pathspec>=0.12.1
3
4
  pypdf>=4.0.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes