agent-wiki-cli 0.3.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_wiki_cli-0.3.28.dist-info/METADATA +425 -0
- agent_wiki_cli-0.3.28.dist-info/RECORD +47 -0
- agent_wiki_cli-0.3.28.dist-info/WHEEL +5 -0
- agent_wiki_cli-0.3.28.dist-info/entry_points.txt +2 -0
- agent_wiki_cli-0.3.28.dist-info/licenses/LICENSE +21 -0
- agent_wiki_cli-0.3.28.dist-info/top_level.txt +1 -0
- llm_wiki_cli/__init__.py +7 -0
- llm_wiki_cli/cli.py +231 -0
- llm_wiki_cli/commands/__init__.py +1 -0
- llm_wiki_cli/commands/bootstrap_cmd.py +1072 -0
- llm_wiki_cli/commands/bump_cmd.py +55 -0
- llm_wiki_cli/commands/context_cmd.py +427 -0
- llm_wiki_cli/commands/extract_cmd.py +745 -0
- llm_wiki_cli/commands/generate_prompt_cmd.py +89 -0
- llm_wiki_cli/commands/hook_cmd.py +161 -0
- llm_wiki_cli/commands/init_cmd.py +92 -0
- llm_wiki_cli/commands/lint_cmd.py +294 -0
- llm_wiki_cli/commands/migrate_cmd.py +892 -0
- llm_wiki_cli/commands/release_cmd.py +163 -0
- llm_wiki_cli/commands/status_cmd.py +70 -0
- llm_wiki_cli/commands/sync_cmd.py +521 -0
- llm_wiki_cli/commands/trigger_cmd.py +205 -0
- llm_wiki_cli/commands/uninstall_cmd.py +221 -0
- llm_wiki_cli/commands/upgrade_cmd.py +196 -0
- llm_wiki_cli/config.py +318 -0
- llm_wiki_cli/extractors/__init__.py +46 -0
- llm_wiki_cli/extractors/common.py +90 -0
- llm_wiki_cli/extractors/go_extractor.py +143 -0
- llm_wiki_cli/extractors/go_scripts/go.mod +3 -0
- llm_wiki_cli/extractors/go_scripts/main.go +668 -0
- llm_wiki_cli/extractors/python_extractor.py +346 -0
- llm_wiki_cli/extractors/rust_extractor.py +143 -0
- llm_wiki_cli/extractors/rust_scripts/Cargo.lock +110 -0
- llm_wiki_cli/extractors/rust_scripts/Cargo.toml +11 -0
- llm_wiki_cli/extractors/rust_scripts/src/main.rs +803 -0
- llm_wiki_cli/extractors/ts_extractor.py +206 -0
- llm_wiki_cli/extractors/ts_scripts/extract.js +485 -0
- llm_wiki_cli/extractors/ts_scripts/package.json +10 -0
- llm_wiki_cli/services/__init__.py +0 -0
- llm_wiki_cli/services/circuit_breaker.py +79 -0
- llm_wiki_cli/services/io.py +47 -0
- llm_wiki_cli/services/lockfile.py +60 -0
- llm_wiki_cli/services/packages.py +173 -0
- llm_wiki_cli/services/paths.py +31 -0
- llm_wiki_cli/services/schema.py +214 -0
- llm_wiki_cli/services/secure_file.py +22 -0
- llm_wiki_cli/services/versioning.py +193 -0
llm_wiki_cli/config.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""Shared constants and utilities for agent-wiki-cli."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from fnmatch import fnmatch
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
DEFAULT_WIKI_DIR = "docs/llm_wiki"
|
|
11
|
+
|
|
12
|
+
# Directories excluded from source-file scans. This intentionally covers both
|
|
13
|
+
# conventional environment names (``.venv``) and environment internals
|
|
14
|
+
# (``site-packages``) so renamed virtualenvs still stay out of the wiki.
|
|
15
|
+
EXCLUDED_DIRS: set[str] = {
|
|
16
|
+
".cache",
|
|
17
|
+
".direnv",
|
|
18
|
+
".eggs",
|
|
19
|
+
".env",
|
|
20
|
+
".git",
|
|
21
|
+
".mypy_cache",
|
|
22
|
+
".next",
|
|
23
|
+
".nox",
|
|
24
|
+
".npm",
|
|
25
|
+
".nuxt",
|
|
26
|
+
".parcel-cache",
|
|
27
|
+
".pnpm-store",
|
|
28
|
+
".pyre",
|
|
29
|
+
".pytest_cache",
|
|
30
|
+
".ruff_cache",
|
|
31
|
+
".svelte-kit",
|
|
32
|
+
".tox",
|
|
33
|
+
".venv",
|
|
34
|
+
".virtualenv",
|
|
35
|
+
".vite",
|
|
36
|
+
".yarn",
|
|
37
|
+
"__pycache__",
|
|
38
|
+
"__pypackages__",
|
|
39
|
+
"bower_components",
|
|
40
|
+
"build",
|
|
41
|
+
"coverage",
|
|
42
|
+
"dist",
|
|
43
|
+
"env",
|
|
44
|
+
"htmlcov",
|
|
45
|
+
"jspm_packages",
|
|
46
|
+
"node_modules",
|
|
47
|
+
"out",
|
|
48
|
+
"site-packages",
|
|
49
|
+
"target",
|
|
50
|
+
"venv",
|
|
51
|
+
"virtualenv",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
AGENT_CHOICES = ["claude", "cursor", "copilot", "aider", "opencode", "generic"]
|
|
55
|
+
|
|
56
|
+
# Agents that have a real CLI executable (key=agent name, value=executable)
|
|
57
|
+
CLI_AGENTS: dict[str, str] = {
|
|
58
|
+
"claude": "claude",
|
|
59
|
+
"aider": "aider",
|
|
60
|
+
"opencode": "opencode",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Agents that are IDE-only and cannot run headlessly
|
|
64
|
+
IDE_AGENTS: set[str] = {"cursor", "copilot", "generic"}
|
|
65
|
+
|
|
66
|
+
# Docker file discovery patterns
|
|
67
|
+
DOCKERFILE_PATTERNS: list[str] = [
|
|
68
|
+
"Dockerfile",
|
|
69
|
+
"Dockerfile.*",
|
|
70
|
+
"*.dockerfile",
|
|
71
|
+
]
|
|
72
|
+
COMPOSE_PATTERNS: list[str] = [
|
|
73
|
+
"docker-compose.yml",
|
|
74
|
+
"docker-compose.*.yml",
|
|
75
|
+
"docker-compose.yaml",
|
|
76
|
+
"docker-compose.*.yaml",
|
|
77
|
+
"compose.yml",
|
|
78
|
+
"compose.*.yml",
|
|
79
|
+
"compose.yaml",
|
|
80
|
+
"compose.*.yaml",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PathValidationError(ValueError):
|
|
85
|
+
"""Raised when a user-provided path escapes the project root."""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_path(path: str, label: str = "path") -> Path:
|
|
89
|
+
"""Ensure *path* resolves inside the current working directory.
|
|
90
|
+
|
|
91
|
+
Raises PathValidationError with a clear message if the resolved path escapes the
|
|
92
|
+
repository root (cwd).
|
|
93
|
+
"""
|
|
94
|
+
resolved = (Path.cwd() / path).resolve()
|
|
95
|
+
cwd = Path.cwd().resolve()
|
|
96
|
+
try:
|
|
97
|
+
resolved.relative_to(cwd)
|
|
98
|
+
except ValueError:
|
|
99
|
+
raise PathValidationError(
|
|
100
|
+
f"Error: {label} '{path}' resolves to '{resolved}', "
|
|
101
|
+
f"which is outside the project root '{cwd}'."
|
|
102
|
+
)
|
|
103
|
+
return resolved
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Registry mapping language name → extractor entry point.
|
|
107
|
+
# Format: "module.path:ClassName"
|
|
108
|
+
# New extractors (TypeScript, Go, Rust, …) are registered here.
|
|
109
|
+
EXTRACTOR_REGISTRY: dict[str, str] = {
|
|
110
|
+
"python": "llm_wiki_cli.extractors.python_extractor:PythonExtractor",
|
|
111
|
+
"typescript": "llm_wiki_cli.extractors.ts_extractor:TypeScriptExtractor",
|
|
112
|
+
"go": "llm_wiki_cli.extractors.go_extractor:GoExtractor",
|
|
113
|
+
"rust": "llm_wiki_cli.extractors.rust_extractor:RustExtractor",
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_agent_config_path(wiki_dir: "str | Path") -> Path:
|
|
118
|
+
"""Return the local-only agent config file path.
|
|
119
|
+
|
|
120
|
+
Stored at ``.git/.llm-wiki-agent`` so it is never committed and each
|
|
121
|
+
developer on a shared repo can use their own preferred agent without
|
|
122
|
+
affecting teammates.
|
|
123
|
+
|
|
124
|
+
Falls back to ``wiki_dir/.llm-wiki-agent`` when not inside a git
|
|
125
|
+
repository (e.g. bare CI environments or tests that don't init git).
|
|
126
|
+
"""
|
|
127
|
+
if Path(".git").is_dir():
|
|
128
|
+
return Path(".git") / ".llm-wiki-agent"
|
|
129
|
+
return Path(wiki_dir) / ".llm-wiki-agent"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# Default config values for new installations.
|
|
133
|
+
_DEFAULT_CONFIG: dict[str, object] = {
|
|
134
|
+
"agent": "generic",
|
|
135
|
+
"quality_hints": True,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass(frozen=True)
|
|
140
|
+
class _GitignoreRule:
|
|
141
|
+
base: str
|
|
142
|
+
pattern: str
|
|
143
|
+
negated: bool
|
|
144
|
+
directory_only: bool
|
|
145
|
+
anchored: bool
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class GitIgnoreMatcher:
|
|
149
|
+
"""Ordered gitignore matcher for repository scans.
|
|
150
|
+
|
|
151
|
+
This supports the semantics the extractors need without reparsing the same
|
|
152
|
+
.gitignore file for every source file: negation, root-anchored patterns,
|
|
153
|
+
nested .gitignore files, directory-only rules, and common ``**`` patterns.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
def __init__(self, rules: list[_GitignoreRule]):
|
|
157
|
+
self._rules = rules
|
|
158
|
+
|
|
159
|
+
def is_ignored(self, rel_path: str) -> bool:
|
|
160
|
+
rel_path = rel_path.replace("\\", "/").strip("/")
|
|
161
|
+
ignored = False
|
|
162
|
+
for rule in self._rules:
|
|
163
|
+
if _rule_matches(rel_path, rule):
|
|
164
|
+
ignored = not rule.negated
|
|
165
|
+
return ignored
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _parse_gitignore_file(gitignore_path: Path, base: str = "") -> list[_GitignoreRule]:
|
|
169
|
+
rules: list[_GitignoreRule] = []
|
|
170
|
+
if not gitignore_path.exists():
|
|
171
|
+
return rules
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
with open(gitignore_path, "r", encoding="utf-8") as f:
|
|
175
|
+
for raw in f:
|
|
176
|
+
line = raw.rstrip("\n\r")
|
|
177
|
+
if not line or line.startswith("#"):
|
|
178
|
+
continue
|
|
179
|
+
negated = line.startswith("!")
|
|
180
|
+
if negated:
|
|
181
|
+
line = line[1:]
|
|
182
|
+
anchored = line.startswith("/")
|
|
183
|
+
if anchored:
|
|
184
|
+
line = line[1:]
|
|
185
|
+
directory_only = line.endswith("/")
|
|
186
|
+
line = line.rstrip("/")
|
|
187
|
+
if line:
|
|
188
|
+
rules.append(_GitignoreRule(
|
|
189
|
+
base=base.strip("/"),
|
|
190
|
+
pattern=line.replace("\\", "/"),
|
|
191
|
+
negated=negated,
|
|
192
|
+
directory_only=directory_only,
|
|
193
|
+
anchored=anchored,
|
|
194
|
+
))
|
|
195
|
+
except OSError:
|
|
196
|
+
pass
|
|
197
|
+
return rules
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _match_gitignore_pattern(rel_path: str, pattern: str, *, directory_only: bool = False) -> bool:
|
|
201
|
+
"""Check if a relative path matches a gitignore pattern."""
|
|
202
|
+
rel_path = rel_path.replace("\\", "/").strip("/")
|
|
203
|
+
pattern = pattern.replace("\\", "/").strip("/")
|
|
204
|
+
|
|
205
|
+
if not rel_path or not pattern:
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
if directory_only:
|
|
209
|
+
if "/" in pattern:
|
|
210
|
+
return rel_path == pattern or rel_path.startswith(pattern + "/") or fnmatch(rel_path, pattern + "/**")
|
|
211
|
+
parts = rel_path.split("/")
|
|
212
|
+
return any(fnmatch(part, pattern) for part in parts[:-1])
|
|
213
|
+
|
|
214
|
+
if pattern.endswith("/**"):
|
|
215
|
+
prefix = pattern[:-3].rstrip("/")
|
|
216
|
+
return rel_path == prefix or rel_path.startswith(prefix + "/")
|
|
217
|
+
|
|
218
|
+
if "/" in pattern:
|
|
219
|
+
return fnmatch(rel_path, pattern) or fnmatch(rel_path, pattern + "/**")
|
|
220
|
+
|
|
221
|
+
return any(fnmatch(part, pattern) for part in rel_path.split("/"))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _rule_matches(rel_path: str, rule: _GitignoreRule) -> bool:
|
|
225
|
+
if rule.base:
|
|
226
|
+
if rel_path == rule.base:
|
|
227
|
+
candidate = ""
|
|
228
|
+
elif rel_path.startswith(rule.base + "/"):
|
|
229
|
+
candidate = rel_path[len(rule.base) + 1:]
|
|
230
|
+
else:
|
|
231
|
+
return False
|
|
232
|
+
else:
|
|
233
|
+
candidate = rel_path
|
|
234
|
+
if not candidate:
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
if rule.anchored:
|
|
238
|
+
pattern = rule.pattern
|
|
239
|
+
if rule.directory_only:
|
|
240
|
+
return (
|
|
241
|
+
candidate == pattern
|
|
242
|
+
or candidate.startswith(pattern + "/")
|
|
243
|
+
or fnmatch(candidate, pattern + "/**")
|
|
244
|
+
)
|
|
245
|
+
if "/" not in pattern:
|
|
246
|
+
return "/" not in candidate and fnmatch(candidate, pattern)
|
|
247
|
+
return fnmatch(candidate, pattern) or fnmatch(candidate, pattern + "/**")
|
|
248
|
+
|
|
249
|
+
return _match_gitignore_pattern(
|
|
250
|
+
candidate,
|
|
251
|
+
rule.pattern,
|
|
252
|
+
directory_only=rule.directory_only,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def build_gitignore_matcher(root: "str | Path") -> GitIgnoreMatcher:
|
|
257
|
+
"""Parse root and nested .gitignore files once for a source scan."""
|
|
258
|
+
root_path = Path(root)
|
|
259
|
+
rules: list[_GitignoreRule] = []
|
|
260
|
+
if not root_path.exists():
|
|
261
|
+
return GitIgnoreMatcher(rules)
|
|
262
|
+
|
|
263
|
+
for gitignore in sorted(root_path.rglob(".gitignore")):
|
|
264
|
+
try:
|
|
265
|
+
rel_parent = gitignore.parent.relative_to(root_path)
|
|
266
|
+
except ValueError:
|
|
267
|
+
continue
|
|
268
|
+
if not EXCLUDED_DIRS.isdisjoint(rel_parent.parts):
|
|
269
|
+
continue
|
|
270
|
+
base = "" if rel_parent == Path(".") else rel_parent.as_posix()
|
|
271
|
+
rules.extend(_parse_gitignore_file(gitignore, base))
|
|
272
|
+
return GitIgnoreMatcher(rules)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def is_ignored_by_gitignore(rel_path: str, gitignore_path: Path = Path(".gitignore")) -> bool:
|
|
276
|
+
"""Check if a relative path is ignored according to one .gitignore file."""
|
|
277
|
+
matcher = GitIgnoreMatcher(_parse_gitignore_file(gitignore_path, ""))
|
|
278
|
+
return matcher.is_ignored(rel_path)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def read_config(wiki_dir: "str | Path") -> dict:
|
|
282
|
+
"""Read the persisted llm-wiki config as a dict.
|
|
283
|
+
|
|
284
|
+
Handles backward compatibility: if the file contains a bare agent name
|
|
285
|
+
string (pre-v0.3 format), it is treated as ``{"agent": "<value>", "quality_hints": true}``.
|
|
286
|
+
|
|
287
|
+
Returns *_DEFAULT_CONFIG* values for any missing keys.
|
|
288
|
+
"""
|
|
289
|
+
config_path = get_agent_config_path(wiki_dir)
|
|
290
|
+
if not config_path.exists():
|
|
291
|
+
return dict(_DEFAULT_CONFIG)
|
|
292
|
+
|
|
293
|
+
raw = config_path.read_text(encoding="utf-8").strip()
|
|
294
|
+
|
|
295
|
+
# Backward compat: bare string = old format (just the agent name)
|
|
296
|
+
if not raw.startswith("{"):
|
|
297
|
+
result = dict(_DEFAULT_CONFIG)
|
|
298
|
+
result["agent"] = raw
|
|
299
|
+
return result
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
data = json.loads(raw)
|
|
303
|
+
except json.JSONDecodeError:
|
|
304
|
+
# Corrupted file — treat as defaults
|
|
305
|
+
result = dict(_DEFAULT_CONFIG)
|
|
306
|
+
return result
|
|
307
|
+
|
|
308
|
+
# Fill in any missing keys from defaults
|
|
309
|
+
for key, default in _DEFAULT_CONFIG.items():
|
|
310
|
+
data.setdefault(key, default)
|
|
311
|
+
return data
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def write_config(wiki_dir: "str | Path", data: dict) -> None:
|
|
315
|
+
"""Persist the llm-wiki config dict to the agent config file."""
|
|
316
|
+
config_path = get_agent_config_path(wiki_dir)
|
|
317
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
318
|
+
config_path.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Extractor plugin architecture for agent-wiki-cli."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Protocol
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ExtractorProtocol(Protocol):
|
|
9
|
+
"""Protocol that all language extractors must implement.
|
|
10
|
+
|
|
11
|
+
An extractor is responsible for scanning source files of a particular
|
|
12
|
+
language, parsing their structure (classes, functions, imports, etc.)
|
|
13
|
+
and returning a uniform inventory dict.
|
|
14
|
+
|
|
15
|
+
Each value in the returned inventory dict **must** include a
|
|
16
|
+
``"language"`` key identifying which language produced the entry.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def extract(
|
|
20
|
+
self,
|
|
21
|
+
src_dir: str,
|
|
22
|
+
only_files: list[str] | None = None,
|
|
23
|
+
deep: bool = False,
|
|
24
|
+
) -> dict:
|
|
25
|
+
"""Scan *src_dir* and return an inventory dict mapping filepath → file_entry.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
src_dir:
|
|
30
|
+
Root directory to scan.
|
|
31
|
+
only_files:
|
|
32
|
+
Optional list of paths (relative to *src_dir*) to restrict
|
|
33
|
+
extraction to. When ``None``, all files of the supported
|
|
34
|
+
language found under *src_dir* are scanned.
|
|
35
|
+
deep:
|
|
36
|
+
When ``True``, include enriched data (docstrings, attributes,
|
|
37
|
+
method details, imports). When ``False``, return a slim
|
|
38
|
+
name-only summary suitable for quick index generation.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
dict
|
|
43
|
+
``{filepath: file_entry}`` where each ``file_entry`` contains
|
|
44
|
+
at minimum ``"classes"``, ``"functions"``, and ``"language"``.
|
|
45
|
+
"""
|
|
46
|
+
...
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Shared helpers for source-file extractor discovery and filtering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
from ..config import EXCLUDED_DIRS, GitIgnoreMatcher, build_gitignore_matcher
|
|
9
|
+
|
|
10
|
+
LANGUAGE_EXTENSIONS: dict[str, tuple[str, ...]] = {
|
|
11
|
+
"python": (".py",),
|
|
12
|
+
"typescript": (".ts", ".tsx"),
|
|
13
|
+
"go": (".go",),
|
|
14
|
+
"rust": (".rs",),
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def should_skip_source_path(path: Path, src_path: Path, matcher: GitIgnoreMatcher | None = None) -> bool:
|
|
19
|
+
"""Return True when *path* should be skipped for source extraction."""
|
|
20
|
+
rel = path.relative_to(src_path)
|
|
21
|
+
if not EXCLUDED_DIRS.isdisjoint(rel.parts):
|
|
22
|
+
return True
|
|
23
|
+
if matcher and matcher.is_ignored(rel.as_posix()):
|
|
24
|
+
return True
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def discover_source_files(
|
|
29
|
+
src_dir: str,
|
|
30
|
+
extensions: Iterable[str],
|
|
31
|
+
*,
|
|
32
|
+
only_files: list[str] | None = None,
|
|
33
|
+
language: str | None = None,
|
|
34
|
+
matcher: GitIgnoreMatcher | None = None,
|
|
35
|
+
) -> list[str]:
|
|
36
|
+
"""Return matching source files relative to *src_dir*.
|
|
37
|
+
|
|
38
|
+
The returned paths use forward slashes and respect excluded directories and
|
|
39
|
+
gitignore rules. Language-specific conventions that avoid generated or
|
|
40
|
+
duplicate files are handled here so Python wrappers can skip toolchains when
|
|
41
|
+
there is nothing useful to scan.
|
|
42
|
+
"""
|
|
43
|
+
src_path = Path(src_dir).resolve()
|
|
44
|
+
matcher = matcher or build_gitignore_matcher(src_path)
|
|
45
|
+
extensions = tuple(extensions)
|
|
46
|
+
|
|
47
|
+
if only_files is not None:
|
|
48
|
+
candidates = [src_path / f for f in only_files]
|
|
49
|
+
else:
|
|
50
|
+
candidates = []
|
|
51
|
+
for ext in extensions:
|
|
52
|
+
candidates.extend(src_path.rglob(f"*{ext}"))
|
|
53
|
+
|
|
54
|
+
result: list[str] = []
|
|
55
|
+
seen: set[str] = set()
|
|
56
|
+
for path in candidates:
|
|
57
|
+
try:
|
|
58
|
+
resolved = path.resolve()
|
|
59
|
+
rel = resolved.relative_to(src_path)
|
|
60
|
+
except (OSError, ValueError):
|
|
61
|
+
continue
|
|
62
|
+
if not resolved.is_file() or resolved.suffix not in extensions:
|
|
63
|
+
continue
|
|
64
|
+
if should_skip_source_path(resolved, src_path, matcher):
|
|
65
|
+
continue
|
|
66
|
+
if language == "typescript" and resolved.name.endswith(".d.ts"):
|
|
67
|
+
continue
|
|
68
|
+
if language == "go" and resolved.name.endswith("_test.go"):
|
|
69
|
+
continue
|
|
70
|
+
rel_posix = rel.as_posix()
|
|
71
|
+
if rel_posix not in seen:
|
|
72
|
+
result.append(rel_posix)
|
|
73
|
+
seen.add(rel_posix)
|
|
74
|
+
return sorted(result)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def filter_bundled_inventory(inventory: dict, scripts_dir: Path) -> dict:
|
|
78
|
+
"""Remove extractor implementation files from a subprocess inventory."""
|
|
79
|
+
scripts_abs = scripts_dir.resolve().as_posix() + "/"
|
|
80
|
+
filtered: dict = {}
|
|
81
|
+
for fp, data in inventory.items():
|
|
82
|
+
fp_posix = fp.replace("\\", "/")
|
|
83
|
+
try:
|
|
84
|
+
resolved = Path(fp).resolve().as_posix()
|
|
85
|
+
except OSError:
|
|
86
|
+
resolved = fp_posix
|
|
87
|
+
if fp_posix.startswith(scripts_abs) or resolved.startswith(scripts_abs):
|
|
88
|
+
continue
|
|
89
|
+
filtered[fp_posix] = data
|
|
90
|
+
return filtered
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Go AST extractor for agent-wiki-cli.
|
|
2
|
+
|
|
3
|
+
Implements :class:`~llm_wiki_cli.extractors.ExtractorProtocol` by delegating
|
|
4
|
+
to a bundled Go script (``go_scripts/main.go``) that uses ``go/ast`` and
|
|
5
|
+
``go/parser`` for Go AST traversal.
|
|
6
|
+
|
|
7
|
+
Requirements
|
|
8
|
+
------------
|
|
9
|
+
* Go toolchain (``go``) on PATH.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import shutil
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from .common import discover_source_files, filter_bundled_inventory
|
|
21
|
+
|
|
22
|
+
_GO_SCRIPTS_DIR = Path(__file__).parent / "go_scripts"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GoExtractor:
|
|
26
|
+
"""Extractor for Go source files using a ``go run`` subprocess.
|
|
27
|
+
|
|
28
|
+
Implements :class:`~llm_wiki_cli.extractors.ExtractorProtocol`.
|
|
29
|
+
|
|
30
|
+
Each returned file entry includes ``"language": "go"``.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
last_error: str | None = None
|
|
34
|
+
|
|
35
|
+
def extract(
|
|
36
|
+
self,
|
|
37
|
+
src_dir: str,
|
|
38
|
+
only_files: list[str] | None = None,
|
|
39
|
+
deep: bool = False,
|
|
40
|
+
) -> dict:
|
|
41
|
+
"""Scan *src_dir* for Go files and return an inventory dict.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
src_dir:
|
|
46
|
+
Root directory to scan.
|
|
47
|
+
only_files:
|
|
48
|
+
Optional list of paths (relative to *src_dir*) to restrict
|
|
49
|
+
extraction to. When ``None``, all ``.go`` files found under
|
|
50
|
+
*src_dir* are scanned (excluding ``_test.go``, ``vendor/``, etc.).
|
|
51
|
+
deep:
|
|
52
|
+
When ``True``, include enriched data (doc comments, struct fields,
|
|
53
|
+
method details, imports). When ``False``, return a slim format.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
dict
|
|
58
|
+
``{filepath: file_entry}`` where each ``file_entry`` contains at
|
|
59
|
+
minimum ``"classes"``, ``"functions"``, and ``"language"``.
|
|
60
|
+
"""
|
|
61
|
+
self.last_error = None
|
|
62
|
+
source_files = discover_source_files(
|
|
63
|
+
src_dir, (".go",), only_files=only_files, language="go",
|
|
64
|
+
)
|
|
65
|
+
if not source_files:
|
|
66
|
+
return {}
|
|
67
|
+
|
|
68
|
+
if not shutil.which("go"):
|
|
69
|
+
self.last_error = "go not found. Install Go (https://go.dev/dl/) to enable Go extraction."
|
|
70
|
+
print(f"llm-wiki Go extractor: {self.last_error}", file=sys.stderr)
|
|
71
|
+
return {}
|
|
72
|
+
|
|
73
|
+
cmd = [
|
|
74
|
+
"go", "run", ".",
|
|
75
|
+
"--src-dir", str(Path(src_dir).resolve()),
|
|
76
|
+
]
|
|
77
|
+
cmd += ["--only-files", ",".join(source_files)]
|
|
78
|
+
if deep:
|
|
79
|
+
cmd.append("--deep")
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
result = subprocess.run(
|
|
83
|
+
cmd,
|
|
84
|
+
capture_output=True,
|
|
85
|
+
text=True,
|
|
86
|
+
check=True,
|
|
87
|
+
timeout=120,
|
|
88
|
+
cwd=str(_GO_SCRIPTS_DIR),
|
|
89
|
+
)
|
|
90
|
+
except subprocess.CalledProcessError as exc:
|
|
91
|
+
self.last_error = "extraction failed"
|
|
92
|
+
print(
|
|
93
|
+
f"llm-wiki Go extractor: extraction failed.\n{exc.stderr}",
|
|
94
|
+
file=sys.stderr,
|
|
95
|
+
)
|
|
96
|
+
return {}
|
|
97
|
+
except subprocess.TimeoutExpired:
|
|
98
|
+
self.last_error = "extraction timed out after 120 s"
|
|
99
|
+
print(
|
|
100
|
+
"llm-wiki Go extractor: extraction timed out after 120 s.",
|
|
101
|
+
file=sys.stderr,
|
|
102
|
+
)
|
|
103
|
+
return {}
|
|
104
|
+
except FileNotFoundError:
|
|
105
|
+
self.last_error = "go executable not found"
|
|
106
|
+
print(
|
|
107
|
+
"llm-wiki Go extractor: go executable not found.",
|
|
108
|
+
file=sys.stderr,
|
|
109
|
+
)
|
|
110
|
+
return {}
|
|
111
|
+
|
|
112
|
+
# Forward any warnings the Go script wrote to stderr.
|
|
113
|
+
if result.stderr.strip():
|
|
114
|
+
sys.stderr.write(result.stderr)
|
|
115
|
+
|
|
116
|
+
if not result.stdout.strip():
|
|
117
|
+
return {}
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
inventory: dict = json.loads(result.stdout)
|
|
121
|
+
except json.JSONDecodeError as exc:
|
|
122
|
+
self.last_error = "malformed JSON output"
|
|
123
|
+
print(
|
|
124
|
+
f"llm-wiki Go extractor: malformed JSON output — {exc}",
|
|
125
|
+
file=sys.stderr,
|
|
126
|
+
)
|
|
127
|
+
return {}
|
|
128
|
+
|
|
129
|
+
for entry in inventory.values():
|
|
130
|
+
entry["language"] = "go"
|
|
131
|
+
|
|
132
|
+
inventory = filter_bundled_inventory(inventory, _GO_SCRIPTS_DIR)
|
|
133
|
+
|
|
134
|
+
src_root = Path(src_dir).resolve()
|
|
135
|
+
normalized_inventory: dict = {}
|
|
136
|
+
for fp, data in inventory.items():
|
|
137
|
+
try:
|
|
138
|
+
rel = Path(fp).resolve().relative_to(src_root).as_posix()
|
|
139
|
+
except ValueError:
|
|
140
|
+
rel = fp.replace("\\", "/")
|
|
141
|
+
normalized_inventory[rel] = data
|
|
142
|
+
|
|
143
|
+
return normalized_inventory
|