java-codebase-rag 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +2813 -0
- brownfield_events.py +58 -0
- build_ast_graph.py +3081 -0
- chunk_heuristics.py +62 -0
- graph_enrich.py +1681 -0
- index_common.py +10 -0
- java_codebase_rag/__init__.py +1 -0
- java_codebase_rag/cli.py +761 -0
- java_codebase_rag/cli_progress.py +52 -0
- java_codebase_rag/config.py +327 -0
- java_codebase_rag/pipeline.py +189 -0
- java_codebase_rag-0.1.0.dist-info/METADATA +818 -0
- java_codebase_rag-0.1.0.dist-info/RECORD +27 -0
- java_codebase_rag-0.1.0.dist-info/WHEEL +5 -0
- java_codebase_rag-0.1.0.dist-info/entry_points.txt +3 -0
- java_codebase_rag-0.1.0.dist-info/licenses/LICENSE +21 -0
- java_codebase_rag-0.1.0.dist-info/top_level.txt +17 -0
- java_index_flow_lancedb.py +398 -0
- java_index_v1_common.py +33 -0
- java_ontology.py +446 -0
- kuzu_queries.py +1989 -0
- mcp_hints.py +748 -0
- mcp_v2.py +1957 -0
- path_filtering.py +472 -0
- pr_analysis.py +534 -0
- search_lancedb.py +1075 -0
- server.py +578 -0
path_filtering.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""Layered path ignore rules for Java indexing and graph enrichment (B5).
|
|
2
|
+
|
|
3
|
+
Resolution order (later overrides earlier; innermost nested wins among peers):
|
|
4
|
+
|
|
5
|
+
1. ``builtin_default`` — legacy ``COMMON_EXCLUDED_PATH_PATTERNS`` (gitignore-style).
|
|
6
|
+
2. ``project_root`` — ``<project>/.java-codebase-rag/ignore``.
|
|
7
|
+
3. ``nested`` — each ``<dir>/.java-codebase-rag/ignore`` along the path from project root
|
|
8
|
+
to the file's parent (outer dirs first, inner dirs last).
|
|
9
|
+
4. ``gitignore`` — each ``.gitignore`` from project root down to the file's parent
|
|
10
|
+
(when ``use_gitignore`` is true), using :class:`pathspec.GitIgnoreSpec`.
|
|
11
|
+
|
|
12
|
+
Paths outside ``project_root`` are never ignored by this object.
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import fnmatch
|
|
17
|
+
import os
|
|
18
|
+
import warnings
|
|
19
|
+
from collections.abc import Iterator, Sequence
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import overload
|
|
23
|
+
|
|
24
|
+
from pathspec import GitIgnoreSpec
|
|
25
|
+
|
|
26
|
+
# Pruning for LocalFile sources: skip VCS, build outputs, dependency trees, and
|
|
27
|
+
# test sources (we currently index prod Java only to keep the semantic index clean).
|
|
28
|
+
# Also avoids EMFILE under default ulimits when the engine traverses in parallel.
|
|
29
|
+
#
|
|
30
|
+
# Note on build-output dir names: ``out``, ``build`` and ``target`` are also legal
|
|
31
|
+
# Java package names (e.g. ``com.example.out.api``). The unconditional ``**/out/**``
|
|
32
|
+
# pattern that previously lived here false-matched such packages and silently
|
|
33
|
+
# dropped real source files. These dirs are now pruned only when they sit next to
|
|
34
|
+
# a build-tool indicator (``pom.xml``, ``build.gradle``, ``build.gradle.kts``,
|
|
35
|
+
# ``settings.gradle``, ``settings.gradle.kts``) — see ``_is_build_output_dir``
|
|
36
|
+
# and ``BUILD_DIR_NAMES``. If you genuinely need to skip an arbitrary nested
|
|
37
|
+
# directory, add a ``.java-codebase-rag/ignore`` entry at the project or subtree root.
|
|
38
|
+
COMMON_EXCLUDED_PATH_PATTERNS: list[str] = [
|
|
39
|
+
"**/.*",
|
|
40
|
+
"**/.git/**",
|
|
41
|
+
"**/.idea/**",
|
|
42
|
+
"**/.venv/**",
|
|
43
|
+
"**/node_modules/**",
|
|
44
|
+
"**/*.class",
|
|
45
|
+
"**/src/test/java/**",
|
|
46
|
+
"**/src/test/resources/**",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
# Directory names that are pruned ONLY when they sit next to a build-tool indicator.
|
|
50
|
+
# The check is ``parent_dir`` contains any of ``BUILD_TOOL_INDICATORS``.
|
|
51
|
+
BUILD_DIR_NAMES: tuple[str, ...] = ("target", "build", "out")
|
|
52
|
+
|
|
53
|
+
# Files whose presence in a directory marks it as a JVM build module. When one
|
|
54
|
+
# of these sits next to a ``BUILD_DIR_NAMES`` entry, that entry is treated as
|
|
55
|
+
# build output and pruned from the walk.
|
|
56
|
+
BUILD_TOOL_INDICATORS: tuple[str, ...] = (
|
|
57
|
+
"pom.xml",
|
|
58
|
+
"build.gradle",
|
|
59
|
+
"build.gradle.kts",
|
|
60
|
+
"settings.gradle",
|
|
61
|
+
"settings.gradle.kts",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Directory names always pruned regardless of siblings (universal nuisance dirs;
|
|
65
|
+
# never a legal package name in practice).
|
|
66
|
+
UNCONDITIONAL_PRUNE_DIRS: frozenset[str] = frozenset({
|
|
67
|
+
".git",
|
|
68
|
+
".idea",
|
|
69
|
+
".venv",
|
|
70
|
+
"node_modules",
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _is_build_output_dir(parent_dir: str, dirname: str) -> bool:
|
|
75
|
+
"""True iff ``<parent_dir>/<dirname>`` looks like a JVM build-output directory.
|
|
76
|
+
|
|
77
|
+
A name in :data:`BUILD_DIR_NAMES` is build output only when its parent
|
|
78
|
+
directory contains a build-tool indicator (Maven/Gradle marker file).
|
|
79
|
+
Otherwise, names like ``out`` are treated as ordinary subdirectories so
|
|
80
|
+
Java sources under packages such as ``com.example.out.api`` survive the walk.
|
|
81
|
+
"""
|
|
82
|
+
if dirname not in BUILD_DIR_NAMES:
|
|
83
|
+
return False
|
|
84
|
+
try:
|
|
85
|
+
with os.scandir(parent_dir) as it:
|
|
86
|
+
siblings = {entry.name for entry in it}
|
|
87
|
+
except OSError:
|
|
88
|
+
return False
|
|
89
|
+
return any(marker in siblings for marker in BUILD_TOOL_INDICATORS)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def compile_excluded_glob_patterns(
|
|
93
|
+
patterns: Sequence[str] | tuple[str, ...],
|
|
94
|
+
) -> list[str]:
|
|
95
|
+
"""Store exclude patterns in list form; same as ast-graph ``index`` compile step."""
|
|
96
|
+
return list(patterns)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def is_relative_path_excluded(rel_posix: str, exclude_globs: list[str]) -> bool:
|
|
100
|
+
"""True if a project-relative path matches an exclude glob (incl. ``**/<path>``)."""
|
|
101
|
+
for pat in exclude_globs:
|
|
102
|
+
if fnmatch.fnmatch(rel_posix, pat):
|
|
103
|
+
return True
|
|
104
|
+
if fnmatch.fnmatch(f"**/{rel_posix}", pat):
|
|
105
|
+
return True
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True)
|
|
110
|
+
class IgnoreLayer:
|
|
111
|
+
"""One ignore configuration anchored at ``root`` (patterns apply under this dir)."""
|
|
112
|
+
|
|
113
|
+
root: Path
|
|
114
|
+
spec: GitIgnoreSpec
|
|
115
|
+
source: str
|
|
116
|
+
ignore_file: Path | None = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _read_ignore_lines(path: Path) -> list[str]:
|
|
120
|
+
try:
|
|
121
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
122
|
+
except OSError:
|
|
123
|
+
return []
|
|
124
|
+
return text.splitlines()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _line_has_negation(lines: Sequence[str]) -> bool:
|
|
128
|
+
for raw in lines:
|
|
129
|
+
s = raw.strip()
|
|
130
|
+
if not s or s.startswith("#"):
|
|
131
|
+
continue
|
|
132
|
+
if s.startswith("\\!"):
|
|
133
|
+
continue
|
|
134
|
+
if s.startswith("!"):
|
|
135
|
+
return True
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _scan_negation_any_bundle_ignore(project_root: Path) -> bool:
|
|
140
|
+
"""Return True if any ``.java-codebase-rag/ignore`` contains a negation (``!``) line.
|
|
141
|
+
|
|
142
|
+
Runs one ``rglob`` at :class:`LayeredIgnore` construction. Fine for typical
|
|
143
|
+
repos; very large monorepos pay a full-tree walk on every new ``LayeredIgnore``
|
|
144
|
+
instance (same for :func:`_scan_negation_any_gitignore`).
|
|
145
|
+
"""
|
|
146
|
+
root = project_root.resolve()
|
|
147
|
+
try:
|
|
148
|
+
for p in root.rglob(".java-codebase-rag"):
|
|
149
|
+
if not p.is_dir():
|
|
150
|
+
continue
|
|
151
|
+
ign = p / "ignore"
|
|
152
|
+
if ign.is_file() and _line_has_negation(_read_ignore_lines(ign)):
|
|
153
|
+
return True
|
|
154
|
+
except OSError:
|
|
155
|
+
return False
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _scan_negation_any_gitignore(project_root: Path) -> bool:
|
|
160
|
+
"""See :func:`_scan_negation_any_bundle_ignore` (also uses ``rglob``)."""
|
|
161
|
+
root = project_root.resolve()
|
|
162
|
+
try:
|
|
163
|
+
for p in root.rglob(".gitignore"):
|
|
164
|
+
if p.is_file() and _line_has_negation(_read_ignore_lines(p)):
|
|
165
|
+
return True
|
|
166
|
+
except OSError:
|
|
167
|
+
return False
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _prefix_line_to_project(
|
|
172
|
+
prefix_posix: str,
|
|
173
|
+
raw_line: str,
|
|
174
|
+
) -> str | None:
|
|
175
|
+
"""Map a gitignore line from a subdirectory anchor to project-root-relative."""
|
|
176
|
+
line = raw_line.strip()
|
|
177
|
+
if not line or line.startswith("#"):
|
|
178
|
+
return None
|
|
179
|
+
neg = line.startswith("!")
|
|
180
|
+
body = line[1:] if neg else line
|
|
181
|
+
if body.startswith("\\#") or body.startswith("\\!"):
|
|
182
|
+
body = body[1:]
|
|
183
|
+
anchored = body.startswith("/")
|
|
184
|
+
if anchored:
|
|
185
|
+
body = body[1:]
|
|
186
|
+
if prefix_posix:
|
|
187
|
+
mapped = f"{prefix_posix}/{body}" if body else prefix_posix
|
|
188
|
+
else:
|
|
189
|
+
mapped = body
|
|
190
|
+
return f"!{mapped}" if neg else mapped
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _mega_build_for_rel(
|
|
194
|
+
self_root: Path,
|
|
195
|
+
rel_project: str,
|
|
196
|
+
*,
|
|
197
|
+
use_gitignore: bool,
|
|
198
|
+
builtin_lines: list[str],
|
|
199
|
+
project_ignore_path: Path,
|
|
200
|
+
project_lines: list[str] | None,
|
|
201
|
+
) -> tuple[list[str], list[tuple[str, Path | None, int, str]]]:
|
|
202
|
+
"""Mega gitignore lines (project-relative) + (source, file, line_no, pattern_text)."""
|
|
203
|
+
mega: list[str] = []
|
|
204
|
+
meta: list[tuple[str, Path | None, int, str]] = []
|
|
205
|
+
|
|
206
|
+
def extend_builtin() -> None:
|
|
207
|
+
for i, raw in enumerate(builtin_lines, start=1):
|
|
208
|
+
s = raw.strip()
|
|
209
|
+
if not s or s.startswith("#"):
|
|
210
|
+
continue
|
|
211
|
+
mega.append(raw.rstrip("\n"))
|
|
212
|
+
meta.append(("builtin_default", None, i, s))
|
|
213
|
+
|
|
214
|
+
def extend_file(source: str, path: Path, lines: Sequence[str]) -> None:
|
|
215
|
+
for lineno, raw in enumerate(lines, start=1):
|
|
216
|
+
s = raw.strip()
|
|
217
|
+
if not s or s.startswith("#"):
|
|
218
|
+
continue
|
|
219
|
+
mega.append(raw.rstrip("\n"))
|
|
220
|
+
meta.append((source, path, lineno, s))
|
|
221
|
+
|
|
222
|
+
extend_builtin()
|
|
223
|
+
if project_lines is not None:
|
|
224
|
+
extend_file("project_root", project_ignore_path, project_lines)
|
|
225
|
+
|
|
226
|
+
parts = Path(rel_project).parts
|
|
227
|
+
dir_parts = parts[:-1] if len(parts) > 1 else ()
|
|
228
|
+
for i in range(1, len(dir_parts) + 1):
|
|
229
|
+
anchor = self_root.joinpath(*dir_parts[:i])
|
|
230
|
+
nested_path = anchor / ".java-codebase-rag" / "ignore"
|
|
231
|
+
if not nested_path.is_file():
|
|
232
|
+
continue
|
|
233
|
+
prefix = anchor.relative_to(self_root).as_posix()
|
|
234
|
+
nlines = _read_ignore_lines(nested_path)
|
|
235
|
+
for lineno, raw in enumerate(nlines, start=1):
|
|
236
|
+
mapped = _prefix_line_to_project(prefix, raw)
|
|
237
|
+
if mapped is None:
|
|
238
|
+
continue
|
|
239
|
+
mega.append(mapped)
|
|
240
|
+
meta.append(("nested", nested_path, lineno, raw.strip()))
|
|
241
|
+
|
|
242
|
+
if use_gitignore:
|
|
243
|
+
for i in range(len(dir_parts) + 1):
|
|
244
|
+
anchor = self_root if i == 0 else self_root.joinpath(*dir_parts[:i])
|
|
245
|
+
git_path = anchor / ".gitignore"
|
|
246
|
+
if not git_path.is_file():
|
|
247
|
+
continue
|
|
248
|
+
prefix = anchor.relative_to(self_root).as_posix() if i > 0 else ""
|
|
249
|
+
glines = _read_ignore_lines(git_path)
|
|
250
|
+
for lineno, raw in enumerate(glines, start=1):
|
|
251
|
+
mapped = _prefix_line_to_project(prefix, raw)
|
|
252
|
+
if mapped is None:
|
|
253
|
+
continue
|
|
254
|
+
mega.append(mapped)
|
|
255
|
+
meta.append(("gitignore", git_path, lineno, raw.strip()))
|
|
256
|
+
|
|
257
|
+
return mega, meta
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _winning_row(
|
|
261
|
+
rel: str,
|
|
262
|
+
mega: list[str],
|
|
263
|
+
meta: list[tuple[str, Path | None, int, str]],
|
|
264
|
+
) -> tuple[str, Path | None, int, str]:
|
|
265
|
+
"""The last rule line that changes the cumulative match result (git semantics)."""
|
|
266
|
+
if not mega:
|
|
267
|
+
return "builtin_default", None, 1, ""
|
|
268
|
+
state = False
|
|
269
|
+
last_idx = 0
|
|
270
|
+
for i in range(len(mega)):
|
|
271
|
+
cur = GitIgnoreSpec.from_lines(mega[: i + 1]).match_file(rel)
|
|
272
|
+
if cur != state:
|
|
273
|
+
last_idx = i
|
|
274
|
+
state = cur
|
|
275
|
+
return meta[last_idx]
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class LayeredIgnore:
|
|
279
|
+
"""Evaluate layered ignore rules anchored at a single project root."""
|
|
280
|
+
|
|
281
|
+
def __init__(
|
|
282
|
+
self,
|
|
283
|
+
project_root: Path | str,
|
|
284
|
+
*,
|
|
285
|
+
use_gitignore: bool = True,
|
|
286
|
+
builtin_patterns: Sequence[str] | None = None,
|
|
287
|
+
) -> None:
|
|
288
|
+
self.project_root = Path(project_root).expanduser().resolve()
|
|
289
|
+
self.use_gitignore = use_gitignore
|
|
290
|
+
self._builtin_lines = (
|
|
291
|
+
list(builtin_patterns)
|
|
292
|
+
if builtin_patterns is not None
|
|
293
|
+
else list(COMMON_EXCLUDED_PATH_PATTERNS)
|
|
294
|
+
)
|
|
295
|
+
self._project_ignore_path = self.project_root / ".java-codebase-rag" / "ignore"
|
|
296
|
+
self._project_lines: list[str] | None = None
|
|
297
|
+
if self._project_ignore_path.is_file():
|
|
298
|
+
self._project_lines = _read_ignore_lines(self._project_ignore_path)
|
|
299
|
+
self._permissive_coco_walk = (
|
|
300
|
+
_scan_negation_any_bundle_ignore(self.project_root)
|
|
301
|
+
or (use_gitignore and _scan_negation_any_gitignore(self.project_root))
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def cocoindex_excluded_patterns(self) -> list[str]:
|
|
305
|
+
"""Patterns for CocoIndex ``PatternFilePathMatcher.excluded_patterns``.
|
|
306
|
+
|
|
307
|
+
Matches pre-B5 behaviour when no negation rules exist anywhere under the
|
|
308
|
+
project that could un-ignore paths under pruned directories. Otherwise
|
|
309
|
+
returns an empty list and callers must filter each path with
|
|
310
|
+
:meth:`is_ignored`.
|
|
311
|
+
"""
|
|
312
|
+
if self._permissive_coco_walk:
|
|
313
|
+
return []
|
|
314
|
+
return list(self._builtin_lines)
|
|
315
|
+
|
|
316
|
+
def _rel_project(self, path: Path) -> str | None:
|
|
317
|
+
try:
|
|
318
|
+
return path.resolve().relative_to(self.project_root).as_posix()
|
|
319
|
+
except ValueError:
|
|
320
|
+
return None
|
|
321
|
+
|
|
322
|
+
def _path_for_display(self, path: Path | None) -> str:
|
|
323
|
+
"""Project-relative POSIX path when under ``project_root``; else best-effort short path."""
|
|
324
|
+
if path is None:
|
|
325
|
+
return ""
|
|
326
|
+
try:
|
|
327
|
+
return path.resolve().relative_to(self.project_root).as_posix()
|
|
328
|
+
except ValueError:
|
|
329
|
+
try:
|
|
330
|
+
return path.resolve().relative_to(Path.cwd()).as_posix()
|
|
331
|
+
except ValueError:
|
|
332
|
+
return path.as_posix()
|
|
333
|
+
|
|
334
|
+
def _mega(self, rel_project: str) -> tuple[list[str], GitIgnoreSpec, list[tuple[str, Path | None, int, str]]]:
|
|
335
|
+
mega, meta = _mega_build_for_rel(
|
|
336
|
+
self.project_root,
|
|
337
|
+
rel_project,
|
|
338
|
+
use_gitignore=self.use_gitignore,
|
|
339
|
+
builtin_lines=self._builtin_lines,
|
|
340
|
+
project_ignore_path=self._project_ignore_path,
|
|
341
|
+
project_lines=self._project_lines,
|
|
342
|
+
)
|
|
343
|
+
return mega, GitIgnoreSpec.from_lines(mega), meta
|
|
344
|
+
|
|
345
|
+
def is_ignored(self, path: Path) -> tuple[bool, IgnoreLayer | None]:
|
|
346
|
+
"""Return whether ``path`` is ignored and which layer last matched."""
|
|
347
|
+
rel = self._rel_project(path)
|
|
348
|
+
if rel is None:
|
|
349
|
+
return False, None
|
|
350
|
+
mega, spec, meta = self._mega(rel)
|
|
351
|
+
if not mega:
|
|
352
|
+
return False, None
|
|
353
|
+
ignored = spec.match_file(rel)
|
|
354
|
+
if not ignored:
|
|
355
|
+
return False, None
|
|
356
|
+
src, fp, ln, _pat = _winning_row(rel, mega, meta)
|
|
357
|
+
return True, IgnoreLayer(
|
|
358
|
+
root=self.project_root,
|
|
359
|
+
spec=spec,
|
|
360
|
+
source=src,
|
|
361
|
+
ignore_file=fp,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
def diagnose(self, path: Path) -> str:
|
|
365
|
+
"""Human-readable, multi-line explanation of the ignore decision."""
|
|
366
|
+
d = self.diagnose_dict(path)
|
|
367
|
+
expl = d.get("explanation", "")
|
|
368
|
+
layer = d.get("layer")
|
|
369
|
+
ign = d.get("ignored")
|
|
370
|
+
mp = d.get("matching_pattern")
|
|
371
|
+
lines = [
|
|
372
|
+
f"ignored={ign}",
|
|
373
|
+
f"layer={layer!r}",
|
|
374
|
+
f"matching_pattern={mp!r}",
|
|
375
|
+
str(expl),
|
|
376
|
+
]
|
|
377
|
+
return "\n".join(lines)
|
|
378
|
+
|
|
379
|
+
def diagnose_dict(self, path: Path) -> dict[str, object]:
|
|
380
|
+
"""Structured diagnose payload for MCP ``diagnose_ignore``."""
|
|
381
|
+
rel = self._rel_project(path)
|
|
382
|
+
if rel is None:
|
|
383
|
+
return {
|
|
384
|
+
"ignored": False,
|
|
385
|
+
"layer": None,
|
|
386
|
+
"matching_pattern": None,
|
|
387
|
+
"explanation": (
|
|
388
|
+
f"Path {self._path_for_display(path)!r} is outside the configured "
|
|
389
|
+
"project root — not ignored."
|
|
390
|
+
),
|
|
391
|
+
}
|
|
392
|
+
mega, spec, meta = self._mega(rel)
|
|
393
|
+
if not mega:
|
|
394
|
+
return {
|
|
395
|
+
"ignored": False,
|
|
396
|
+
"layer": None,
|
|
397
|
+
"matching_pattern": None,
|
|
398
|
+
"explanation": f"Path {rel!r} is not ignored by any configured layer.",
|
|
399
|
+
}
|
|
400
|
+
ignored = spec.match_file(rel)
|
|
401
|
+
if not ignored:
|
|
402
|
+
return {
|
|
403
|
+
"ignored": False,
|
|
404
|
+
"layer": None,
|
|
405
|
+
"matching_pattern": None,
|
|
406
|
+
"explanation": f"Path {rel!r} is not ignored by any configured layer.",
|
|
407
|
+
}
|
|
408
|
+
src, fp, ln, pat = _winning_row(rel, mega, meta)
|
|
409
|
+
if fp is not None:
|
|
410
|
+
expl = (
|
|
411
|
+
f"Excluded by {self._path_for_display(fp)} ({src}) at line {ln}: {pat!r}"
|
|
412
|
+
)
|
|
413
|
+
else:
|
|
414
|
+
expl = f"Excluded by builtin default ({src}) at builtin line {ln}: {pat!r}"
|
|
415
|
+
return {
|
|
416
|
+
"ignored": True,
|
|
417
|
+
"layer": src,
|
|
418
|
+
"matching_pattern": pat,
|
|
419
|
+
"explanation": expl,
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
@overload
|
|
424
|
+
def iter_java_source_files(root: Path, exclude_globs: list[str]) -> Iterator[Path]: ...
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
@overload
|
|
428
|
+
def iter_java_source_files(root: Path, *, ignore: LayeredIgnore) -> Iterator[Path]: ...
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def iter_java_source_files(
|
|
432
|
+
root: Path,
|
|
433
|
+
exclude_globs: list[str] | None = None,
|
|
434
|
+
*,
|
|
435
|
+
ignore: LayeredIgnore | None = None,
|
|
436
|
+
) -> Iterator[Path]:
|
|
437
|
+
"""Walk ``root`` for ``*.java``, honouring prunes and layered ignore rules."""
|
|
438
|
+
if exclude_globs is not None and ignore is not None:
|
|
439
|
+
raise TypeError("pass either exclude_globs or ignore=, not both")
|
|
440
|
+
if exclude_globs is not None:
|
|
441
|
+
warnings.warn(
|
|
442
|
+
"iter_java_source_files(root, exclude_globs) is deprecated; "
|
|
443
|
+
"use iter_java_source_files(root, ignore=LayeredIgnore(root, ...)).",
|
|
444
|
+
DeprecationWarning,
|
|
445
|
+
stacklevel=2,
|
|
446
|
+
)
|
|
447
|
+
ignore_ctx = LayeredIgnore(root, builtin_patterns=exclude_globs, use_gitignore=False)
|
|
448
|
+
elif ignore is not None:
|
|
449
|
+
ignore_ctx = ignore
|
|
450
|
+
else:
|
|
451
|
+
ignore_ctx = LayeredIgnore(root)
|
|
452
|
+
root = root.resolve()
|
|
453
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
454
|
+
# Universal nuisance dirs (VCS, IDE, deps) are pruned unconditionally.
|
|
455
|
+
# Build-output dirs (``out`` / ``build`` / ``target``) are pruned only when
|
|
456
|
+
# they sit alongside a build-tool indicator file — otherwise names like
|
|
457
|
+
# ``out`` belong to a Java package (e.g. ``com.example.out.api``) and must
|
|
458
|
+
# be walked. See ``_is_build_output_dir``.
|
|
459
|
+
dirnames[:] = [
|
|
460
|
+
d
|
|
461
|
+
for d in dirnames
|
|
462
|
+
if d not in UNCONDITIONAL_PRUNE_DIRS
|
|
463
|
+
and not _is_build_output_dir(dirpath, d)
|
|
464
|
+
]
|
|
465
|
+
for fn in filenames:
|
|
466
|
+
if not fn.endswith(".java"):
|
|
467
|
+
continue
|
|
468
|
+
p = Path(dirpath) / fn
|
|
469
|
+
ign, _ = ignore_ctx.is_ignored(p)
|
|
470
|
+
if ign:
|
|
471
|
+
continue
|
|
472
|
+
yield p
|