agent-wiki-cli 0.3.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_wiki_cli-0.3.28.dist-info/METADATA +425 -0
- agent_wiki_cli-0.3.28.dist-info/RECORD +47 -0
- agent_wiki_cli-0.3.28.dist-info/WHEEL +5 -0
- agent_wiki_cli-0.3.28.dist-info/entry_points.txt +2 -0
- agent_wiki_cli-0.3.28.dist-info/licenses/LICENSE +21 -0
- agent_wiki_cli-0.3.28.dist-info/top_level.txt +1 -0
- llm_wiki_cli/__init__.py +7 -0
- llm_wiki_cli/cli.py +231 -0
- llm_wiki_cli/commands/__init__.py +1 -0
- llm_wiki_cli/commands/bootstrap_cmd.py +1072 -0
- llm_wiki_cli/commands/bump_cmd.py +55 -0
- llm_wiki_cli/commands/context_cmd.py +427 -0
- llm_wiki_cli/commands/extract_cmd.py +745 -0
- llm_wiki_cli/commands/generate_prompt_cmd.py +89 -0
- llm_wiki_cli/commands/hook_cmd.py +161 -0
- llm_wiki_cli/commands/init_cmd.py +92 -0
- llm_wiki_cli/commands/lint_cmd.py +294 -0
- llm_wiki_cli/commands/migrate_cmd.py +892 -0
- llm_wiki_cli/commands/release_cmd.py +163 -0
- llm_wiki_cli/commands/status_cmd.py +70 -0
- llm_wiki_cli/commands/sync_cmd.py +521 -0
- llm_wiki_cli/commands/trigger_cmd.py +205 -0
- llm_wiki_cli/commands/uninstall_cmd.py +221 -0
- llm_wiki_cli/commands/upgrade_cmd.py +196 -0
- llm_wiki_cli/config.py +318 -0
- llm_wiki_cli/extractors/__init__.py +46 -0
- llm_wiki_cli/extractors/common.py +90 -0
- llm_wiki_cli/extractors/go_extractor.py +143 -0
- llm_wiki_cli/extractors/go_scripts/go.mod +3 -0
- llm_wiki_cli/extractors/go_scripts/main.go +668 -0
- llm_wiki_cli/extractors/python_extractor.py +346 -0
- llm_wiki_cli/extractors/rust_extractor.py +143 -0
- llm_wiki_cli/extractors/rust_scripts/Cargo.lock +110 -0
- llm_wiki_cli/extractors/rust_scripts/Cargo.toml +11 -0
- llm_wiki_cli/extractors/rust_scripts/src/main.rs +803 -0
- llm_wiki_cli/extractors/ts_extractor.py +206 -0
- llm_wiki_cli/extractors/ts_scripts/extract.js +485 -0
- llm_wiki_cli/extractors/ts_scripts/package.json +10 -0
- llm_wiki_cli/services/__init__.py +0 -0
- llm_wiki_cli/services/circuit_breaker.py +79 -0
- llm_wiki_cli/services/io.py +47 -0
- llm_wiki_cli/services/lockfile.py +60 -0
- llm_wiki_cli/services/packages.py +173 -0
- llm_wiki_cli/services/paths.py +31 -0
- llm_wiki_cli/services/schema.py +214 -0
- llm_wiki_cli/services/secure_file.py +22 -0
- llm_wiki_cli/services/versioning.py +193 -0
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ..config import COMPOSE_PATTERNS, DOCKERFILE_PATTERNS, EXCLUDED_DIRS, EXTRACTOR_REGISTRY, validate_path
|
|
14
|
+
from ..extractors.common import LANGUAGE_EXTENSIONS, discover_source_files
|
|
15
|
+
from ..services.packages import discover_packages, stamp_inventory_packages
|
|
16
|
+
|
|
17
|
+
# Re-export ComponentVisitor so existing callers that import it from here
|
|
18
|
+
# continue to work without modification.
|
|
19
|
+
from ..extractors.python_extractor import ComponentVisitor # noqa: F401
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ── Extractor loader ─────────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@lru_cache(maxsize=None)
|
|
26
|
+
def _load_extractor(entry_point: str):
|
|
27
|
+
"""Instantiate an extractor from a ``"module.path:ClassName"`` string."""
|
|
28
|
+
module_path, class_name = entry_point.rsplit(":", 1)
|
|
29
|
+
module = importlib.import_module(module_path)
|
|
30
|
+
return getattr(module, class_name)()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class ExtractorStatus:
|
|
35
|
+
language: str
|
|
36
|
+
state: str # ok | skipped | failed
|
|
37
|
+
files_found: int
|
|
38
|
+
message: str = ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class InventoryResult:
|
|
43
|
+
inventory: dict
|
|
44
|
+
statuses: dict[str, ExtractorStatus]
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def failed(self) -> list[ExtractorStatus]:
|
|
48
|
+
return [s for s in self.statuses.values() if s.state == "failed"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def print_inventory_failures(result: InventoryResult, *, file=None) -> None:
|
|
52
|
+
"""Print extractor failures in a consistent form."""
|
|
53
|
+
stream = file or sys.stderr
|
|
54
|
+
for status in result.failed:
|
|
55
|
+
detail = f": {status.message}" if status.message else ""
|
|
56
|
+
print(f"Error: {status.language} extraction failed{detail}", file=stream)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ── Backward-compatible public API ───────────────────────────────────
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_inventory_result(src_dir, deep=False, only_files=None, include_empty=False) -> InventoryResult:
|
|
63
|
+
"""Scan source files across all registered languages and return inventory.
|
|
64
|
+
|
|
65
|
+
Runs every extractor in :data:`EXTRACTOR_REGISTRY` and merges the
|
|
66
|
+
results into a single dict keyed by file path.
|
|
67
|
+
|
|
68
|
+
If deep=True, returns enriched data (docstrings, attributes, methods, imports).
|
|
69
|
+
If deep=False, returns the slim format for backward compatibility.
|
|
70
|
+
If only_files is given, restrict to those relative paths.
|
|
71
|
+
If include_empty=True, include all .py files even without extractable components.
|
|
72
|
+
|
|
73
|
+
Each entry is stamped with a ``"package"`` key (package name or
|
|
74
|
+
``None``) derived from ``pyproject.toml`` / ``setup.py`` markers.
|
|
75
|
+
"""
|
|
76
|
+
inventory: dict = {}
|
|
77
|
+
statuses: dict[str, ExtractorStatus] = {}
|
|
78
|
+
for language, entry_point in EXTRACTOR_REGISTRY.items():
|
|
79
|
+
source_files = discover_source_files(
|
|
80
|
+
src_dir,
|
|
81
|
+
LANGUAGE_EXTENSIONS.get(language, ()),
|
|
82
|
+
only_files=only_files,
|
|
83
|
+
language=language,
|
|
84
|
+
)
|
|
85
|
+
if not source_files:
|
|
86
|
+
statuses[language] = ExtractorStatus(language, "skipped", 0)
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
extractor = _load_extractor(entry_point)
|
|
90
|
+
# Reset cached extractor state from any previous invocation.
|
|
91
|
+
if hasattr(extractor, "last_error"):
|
|
92
|
+
extractor.last_error = None
|
|
93
|
+
kwargs = {"src_dir": src_dir, "only_files": only_files, "deep": deep}
|
|
94
|
+
if language == "python":
|
|
95
|
+
kwargs["include_empty"] = include_empty
|
|
96
|
+
try:
|
|
97
|
+
extracted = extractor.extract(**kwargs)
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
statuses[language] = ExtractorStatus(language, "failed", len(source_files), str(exc))
|
|
100
|
+
continue
|
|
101
|
+
error = getattr(extractor, "last_error", None)
|
|
102
|
+
if error:
|
|
103
|
+
statuses[language] = ExtractorStatus(language, "failed", len(source_files), str(error))
|
|
104
|
+
continue
|
|
105
|
+
inventory.update(extracted)
|
|
106
|
+
statuses[language] = ExtractorStatus(language, "ok", len(source_files))
|
|
107
|
+
|
|
108
|
+
# Stamp package ownership
|
|
109
|
+
packages = discover_packages(src_dir)
|
|
110
|
+
stamp_inventory_packages(inventory, packages)
|
|
111
|
+
|
|
112
|
+
return InventoryResult(inventory=inventory, statuses=statuses)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_inventory(src_dir, deep=False, only_files=None, include_empty=False):
|
|
116
|
+
"""Backward-compatible inventory API returning only the inventory dict."""
|
|
117
|
+
return get_inventory_result(
|
|
118
|
+
src_dir, deep=deep, only_files=only_files, include_empty=include_empty,
|
|
119
|
+
).inventory
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def ensure_complete_inventory(result: InventoryResult) -> bool:
|
|
123
|
+
"""Return True when all extractors that had matching source files succeeded."""
|
|
124
|
+
return not result.failed
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def infer_language_from_path(filepath: str) -> str | None:
|
|
128
|
+
suffix = Path(filepath).suffix
|
|
129
|
+
for language, extensions in LANGUAGE_EXTENSIONS.items():
|
|
130
|
+
if suffix in extensions:
|
|
131
|
+
return language
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def languages_with_source(src_dir: str, only_files: list[str] | None = None) -> set[str]:
|
|
136
|
+
languages: set[str] = set()
|
|
137
|
+
for language, extensions in LANGUAGE_EXTENSIONS.items():
|
|
138
|
+
if discover_source_files(src_dir, extensions, only_files=only_files, language=language):
|
|
139
|
+
languages.add(language)
|
|
140
|
+
return languages
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _inventory_or_exit(src_dir: str, *, deep: bool = False, only_files=None, include_empty: bool = False) -> dict:
|
|
144
|
+
result = get_inventory_result(src_dir, deep=deep, only_files=only_files, include_empty=include_empty)
|
|
145
|
+
if result.failed:
|
|
146
|
+
print_inventory_failures(result)
|
|
147
|
+
sys.exit(1)
|
|
148
|
+
return result.inventory
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _git_changed_files(src_dir: str) -> list[str] | None:
|
|
152
|
+
"""Return list of files changed in the last commit, relative to *src_dir*.
|
|
153
|
+
|
|
154
|
+
Returns None if git is unavailable or there are no commits.
|
|
155
|
+
"""
|
|
156
|
+
try:
|
|
157
|
+
result = subprocess.run(
|
|
158
|
+
["git", "diff", "--name-only", "HEAD~1..HEAD"],
|
|
159
|
+
capture_output=True, text=True, check=True, timeout=15,
|
|
160
|
+
cwd=src_dir,
|
|
161
|
+
)
|
|
162
|
+
return [line for line in result.stdout.splitlines() if line.strip()]
|
|
163
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _summarize_inventory(inventory: dict) -> dict:
|
|
168
|
+
"""Produce a compact one-line-per-symbol summary from a shallow inventory."""
|
|
169
|
+
summary: dict[str, dict] = {}
|
|
170
|
+
for fp, data in inventory.items():
|
|
171
|
+
entry: dict[str, list] = {}
|
|
172
|
+
cls_names = [c["name"] for c in data.get("classes", [])]
|
|
173
|
+
fn_names = [f["name"] for f in data.get("functions", [])]
|
|
174
|
+
if cls_names:
|
|
175
|
+
entry["classes"] = cls_names
|
|
176
|
+
if fn_names:
|
|
177
|
+
entry["functions"] = fn_names
|
|
178
|
+
if entry:
|
|
179
|
+
summary[fp] = entry
|
|
180
|
+
return summary
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def run(args):
|
|
184
|
+
src_dir: str = getattr(args, "src_dir", ".")
|
|
185
|
+
validate_path(src_dir, "--src-dir")
|
|
186
|
+
changed: bool = getattr(args, "changed", False)
|
|
187
|
+
summary: bool = getattr(args, "summary", False)
|
|
188
|
+
deep: bool = getattr(args, "deep", False)
|
|
189
|
+
paths: list[str] | None = getattr(args, "paths", None)
|
|
190
|
+
package_filter: str | None = getattr(args, "package", None)
|
|
191
|
+
include_empty: bool = getattr(args, "include_empty", False)
|
|
192
|
+
|
|
193
|
+
only_files = None
|
|
194
|
+
|
|
195
|
+
if changed and paths:
|
|
196
|
+
print("Error: --changed and --paths are mutually exclusive.", file=sys.stderr)
|
|
197
|
+
sys.exit(2)
|
|
198
|
+
|
|
199
|
+
if changed:
|
|
200
|
+
only_files = _git_changed_files(src_dir)
|
|
201
|
+
if only_files is None:
|
|
202
|
+
print("Warning: Could not get changed files from git. Falling back to full scan.", file=sys.stderr)
|
|
203
|
+
elif not only_files:
|
|
204
|
+
print("No files changed in the last commit.", file=sys.stderr)
|
|
205
|
+
return
|
|
206
|
+
else:
|
|
207
|
+
print(f"Extracting {len(only_files)} changed file(s)...", file=sys.stderr)
|
|
208
|
+
elif paths:
|
|
209
|
+
only_files = paths
|
|
210
|
+
print(f"Extracting {len(only_files)} specified path(s)...", file=sys.stderr)
|
|
211
|
+
else:
|
|
212
|
+
print(f"Extracting inventory from {src_dir}...", file=sys.stderr)
|
|
213
|
+
|
|
214
|
+
result = get_inventory_result(src_dir, deep=deep, only_files=only_files,
|
|
215
|
+
include_empty=include_empty)
|
|
216
|
+
if result.failed:
|
|
217
|
+
print_inventory_failures(result)
|
|
218
|
+
sys.exit(1)
|
|
219
|
+
inventory = result.inventory
|
|
220
|
+
|
|
221
|
+
if package_filter:
|
|
222
|
+
inventory = {
|
|
223
|
+
fp: data for fp, data in inventory.items()
|
|
224
|
+
if data.get("package") == package_filter
|
|
225
|
+
}
|
|
226
|
+
if not inventory:
|
|
227
|
+
print(f"No files found for package '{package_filter}'.", file=sys.stderr)
|
|
228
|
+
sys.exit(1)
|
|
229
|
+
|
|
230
|
+
if summary:
|
|
231
|
+
inventory = _summarize_inventory(inventory)
|
|
232
|
+
|
|
233
|
+
docker_inv = get_docker_inventory(src_dir)
|
|
234
|
+
|
|
235
|
+
output: dict = {"inventory": inventory}
|
|
236
|
+
if docker_inv:
|
|
237
|
+
output["docker"] = docker_inv
|
|
238
|
+
|
|
239
|
+
print(json.dumps(output, indent=2))
|
|
240
|
+
print(f"Extracted {len(inventory)} files with tracked components.", file=sys.stderr)
|
|
241
|
+
if docker_inv:
|
|
242
|
+
print(f"Docker inventory: {len(docker_inv)} file(s).", file=sys.stderr)
|
|
243
|
+
else:
|
|
244
|
+
print("No Docker/Compose files found.", file=sys.stderr)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# ── Call-graph extraction for workflow detection ──────────────────────
|
|
248
|
+
|
|
249
|
+
def _module_name(filepath: str) -> str:
|
|
250
|
+
return Path(filepath).stem
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def get_call_graph(inventory: dict) -> dict:
|
|
254
|
+
"""Build cross-module call chains from a deep inventory.
|
|
255
|
+
|
|
256
|
+
Detects functions that import and reference symbols from 3+ other
|
|
257
|
+
project-internal modules — these are workflow candidates.
|
|
258
|
+
|
|
259
|
+
Returns a dict of workflow_name -> {entry, chain, modules_touched}.
|
|
260
|
+
"""
|
|
261
|
+
# Map of known module stems from inventory
|
|
262
|
+
known_modules = {_module_name(fp) for fp in inventory}
|
|
263
|
+
# Map of symbol name -> defining module stem
|
|
264
|
+
symbol_to_module: dict[str, str] = {}
|
|
265
|
+
for fp, data in inventory.items():
|
|
266
|
+
mod = _module_name(fp)
|
|
267
|
+
for cls in data.get("classes", []):
|
|
268
|
+
symbol_to_module[cls["name"]] = mod
|
|
269
|
+
for fn in data.get("functions", []):
|
|
270
|
+
symbol_to_module[fn["name"]] = mod
|
|
271
|
+
|
|
272
|
+
workflows: dict[str, dict] = {}
|
|
273
|
+
|
|
274
|
+
# Determine which paths are test files — skip them for workflow detection
|
|
275
|
+
_TEST_STEMS = {"conftest"}
|
|
276
|
+
_TEST_DIRS = {"tests", "test", "__tests__"}
|
|
277
|
+
|
|
278
|
+
for fp, data in inventory.items():
|
|
279
|
+
fp_path = Path(fp)
|
|
280
|
+
# Skip test files: file stem starts with 'test_' or lives under a tests dir
|
|
281
|
+
if fp_path.stem.startswith("test_") or fp_path.stem in _TEST_STEMS:
|
|
282
|
+
continue
|
|
283
|
+
if _TEST_DIRS & set(fp_path.parts):
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
mod = _module_name(fp)
|
|
287
|
+
imports = data.get("imports", [])
|
|
288
|
+
|
|
289
|
+
# Resolve which internal modules this file imports from
|
|
290
|
+
imported_symbols: dict[str, str] = {} # symbol_name -> source_module
|
|
291
|
+
for imp in imports:
|
|
292
|
+
# Check if the imported name maps to a known symbol
|
|
293
|
+
name = imp["name"]
|
|
294
|
+
if name in symbol_to_module and symbol_to_module[name] != mod:
|
|
295
|
+
imported_symbols[name] = symbol_to_module[name]
|
|
296
|
+
# Also check if the import's module path contains a known module
|
|
297
|
+
imp_mod = imp.get("module", "")
|
|
298
|
+
imp_mod_stem = imp_mod.rsplit(".", 1)[-1] if imp_mod else ""
|
|
299
|
+
if imp_mod_stem in known_modules and imp_mod_stem != mod:
|
|
300
|
+
imported_symbols[name] = imp_mod_stem
|
|
301
|
+
|
|
302
|
+
if not imported_symbols:
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
# For each function in this module, find which imported symbols it references
|
|
306
|
+
all_functions = list(data.get("functions", []))
|
|
307
|
+
for cls in data.get("classes", []):
|
|
308
|
+
for method in cls.get("methods", []):
|
|
309
|
+
all_functions.append(method)
|
|
310
|
+
|
|
311
|
+
for fn in all_functions:
|
|
312
|
+
touched_modules: set[str] = set()
|
|
313
|
+
chain: list[str] = []
|
|
314
|
+
|
|
315
|
+
# Check params, return types, decorators for references to imported symbols
|
|
316
|
+
for sym_name, src_mod in imported_symbols.items():
|
|
317
|
+
referenced = False
|
|
318
|
+
for p in fn.get("params", []):
|
|
319
|
+
if sym_name in p.get("type", ""):
|
|
320
|
+
referenced = True
|
|
321
|
+
if sym_name in fn.get("return_type", ""):
|
|
322
|
+
referenced = True
|
|
323
|
+
for dec in fn.get("decorators", []):
|
|
324
|
+
if sym_name in dec:
|
|
325
|
+
referenced = True
|
|
326
|
+
# Check docstring for symbol mentions
|
|
327
|
+
if sym_name in fn.get("docstring", ""):
|
|
328
|
+
referenced = True
|
|
329
|
+
|
|
330
|
+
if referenced:
|
|
331
|
+
touched_modules.add(src_mod)
|
|
332
|
+
chain.append(f"{src_mod}.{sym_name}")
|
|
333
|
+
|
|
334
|
+
# Workflow threshold: function touches 3+ other internal modules
|
|
335
|
+
if len(touched_modules) >= 3:
|
|
336
|
+
fn_name = fn["name"]
|
|
337
|
+
# Clean up workflow name
|
|
338
|
+
wf_name = fn_name.lstrip("_")
|
|
339
|
+
if wf_name == "run":
|
|
340
|
+
wf_name = f"{mod}_flow"
|
|
341
|
+
|
|
342
|
+
workflows[wf_name] = {
|
|
343
|
+
"entry": f"{mod}.{fn_name}",
|
|
344
|
+
"entry_module": mod,
|
|
345
|
+
"chain": chain,
|
|
346
|
+
"modules_touched": sorted(touched_modules | {mod}),
|
|
347
|
+
"docstring": fn.get("docstring", ""),
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return workflows
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# ── Docker / Compose extraction ──────────────────────────────────────
|
|
354
|
+
|
|
355
|
+
def _parse_dockerfile(text: str) -> dict:
|
|
356
|
+
"""Parse a Dockerfile into a structured dict (line-based, no external deps)."""
|
|
357
|
+
stages: list[dict] = []
|
|
358
|
+
current_stage: str | None = None
|
|
359
|
+
ports: list[str] = []
|
|
360
|
+
env_vars: list[dict] = []
|
|
361
|
+
volumes: list[str] = []
|
|
362
|
+
copies: list[dict] = []
|
|
363
|
+
build_args: list[dict] = []
|
|
364
|
+
labels: dict[str, str] = {}
|
|
365
|
+
entrypoint: str = ""
|
|
366
|
+
cmd: str = ""
|
|
367
|
+
workdir: str = ""
|
|
368
|
+
healthcheck: str = ""
|
|
369
|
+
|
|
370
|
+
# Join continuation lines (trailing backslash)
|
|
371
|
+
logical_lines: list[str] = []
|
|
372
|
+
buf = ""
|
|
373
|
+
for raw in text.splitlines():
|
|
374
|
+
stripped = raw.rstrip()
|
|
375
|
+
if stripped.endswith("\\"):
|
|
376
|
+
buf += stripped[:-1] + " "
|
|
377
|
+
else:
|
|
378
|
+
buf += stripped
|
|
379
|
+
logical_lines.append(buf)
|
|
380
|
+
buf = ""
|
|
381
|
+
if buf:
|
|
382
|
+
logical_lines.append(buf)
|
|
383
|
+
|
|
384
|
+
for line in logical_lines:
|
|
385
|
+
trimmed = line.strip()
|
|
386
|
+
if not trimmed or trimmed.startswith("#"):
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
upper = trimmed.split()[0].upper() if trimmed.split() else ""
|
|
390
|
+
|
|
391
|
+
if upper == "FROM":
|
|
392
|
+
parts = trimmed.split()
|
|
393
|
+
image = parts[1] if len(parts) >= 2 else "unknown"
|
|
394
|
+
alias = ""
|
|
395
|
+
if len(parts) >= 4 and parts[2].upper() == "AS":
|
|
396
|
+
alias = parts[3]
|
|
397
|
+
stage = {"image": image, "alias": alias}
|
|
398
|
+
stages.append(stage)
|
|
399
|
+
current_stage = alias or image
|
|
400
|
+
|
|
401
|
+
elif upper == "EXPOSE":
|
|
402
|
+
for token in trimmed.split()[1:]:
|
|
403
|
+
ports.append(token)
|
|
404
|
+
|
|
405
|
+
elif upper == "ENV":
|
|
406
|
+
rest = trimmed[4:].strip()
|
|
407
|
+
if "=" in rest:
|
|
408
|
+
for pair in re.findall(r'(\w+)=("(?:[^"\\]|\\.)*"|\S+)', rest):
|
|
409
|
+
env_vars.append({"name": pair[0], "default": pair[1].strip('"')})
|
|
410
|
+
else:
|
|
411
|
+
parts = rest.split(None, 1)
|
|
412
|
+
if len(parts) == 2:
|
|
413
|
+
env_vars.append({"name": parts[0], "default": parts[1]})
|
|
414
|
+
elif parts:
|
|
415
|
+
env_vars.append({"name": parts[0], "default": ""})
|
|
416
|
+
|
|
417
|
+
elif upper == "VOLUME":
|
|
418
|
+
rest = trimmed[7:].strip()
|
|
419
|
+
if rest.startswith("["):
|
|
420
|
+
for v in re.findall(r'"([^"]+)"', rest):
|
|
421
|
+
volumes.append(v)
|
|
422
|
+
else:
|
|
423
|
+
volumes.extend(rest.split())
|
|
424
|
+
|
|
425
|
+
elif upper in ("COPY", "ADD"):
|
|
426
|
+
parts = trimmed.split()
|
|
427
|
+
flags = [p for p in parts[1:] if p.startswith("--")]
|
|
428
|
+
non_flag = [p for p in parts[1:] if not p.startswith("--")]
|
|
429
|
+
src = " ".join(non_flag[:-1]) if len(non_flag) >= 2 else ""
|
|
430
|
+
dest = non_flag[-1] if non_flag else ""
|
|
431
|
+
from_stage = ""
|
|
432
|
+
for f in flags:
|
|
433
|
+
if f.startswith("--from="):
|
|
434
|
+
from_stage = f.split("=", 1)[1]
|
|
435
|
+
copies.append({"src": src, "dest": dest, "from_stage": from_stage, "instruction": upper})
|
|
436
|
+
|
|
437
|
+
elif upper == "WORKDIR":
|
|
438
|
+
workdir = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
|
|
439
|
+
|
|
440
|
+
elif upper == "ARG":
|
|
441
|
+
rest = trimmed[4:].strip()
|
|
442
|
+
if "=" in rest:
|
|
443
|
+
name, default = rest.split("=", 1)
|
|
444
|
+
build_args.append({"name": name.strip(), "default": default.strip()})
|
|
445
|
+
else:
|
|
446
|
+
build_args.append({"name": rest, "default": ""})
|
|
447
|
+
|
|
448
|
+
elif upper == "LABEL":
|
|
449
|
+
for pair in re.findall(r'(\S+)=("(?:[^"\\]|\\.)*"|\S+)', trimmed[6:]):
|
|
450
|
+
labels[pair[0]] = pair[1].strip('"')
|
|
451
|
+
|
|
452
|
+
elif upper == "ENTRYPOINT":
|
|
453
|
+
entrypoint = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
|
|
454
|
+
|
|
455
|
+
elif upper == "CMD":
|
|
456
|
+
cmd = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
|
|
457
|
+
|
|
458
|
+
elif upper == "HEALTHCHECK":
|
|
459
|
+
rest = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
|
|
460
|
+
if rest.upper() != "NONE":
|
|
461
|
+
healthcheck = rest
|
|
462
|
+
|
|
463
|
+
return {
|
|
464
|
+
"type": "dockerfile",
|
|
465
|
+
"stages": stages,
|
|
466
|
+
"ports": ports,
|
|
467
|
+
"env_vars": env_vars,
|
|
468
|
+
"volumes": volumes,
|
|
469
|
+
"copies": copies,
|
|
470
|
+
"build_args": build_args,
|
|
471
|
+
"labels": labels,
|
|
472
|
+
"entrypoint": entrypoint,
|
|
473
|
+
"cmd": cmd,
|
|
474
|
+
"workdir": workdir,
|
|
475
|
+
"healthcheck": healthcheck,
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def _parse_inline_yaml_list(value: str) -> list[str] | None:
|
|
480
|
+
"""Parse an inline YAML list like ``["CMD", "curl", "-f", "http://..."]``.
|
|
481
|
+
|
|
482
|
+
Returns a list of strings if the value is an inline list, otherwise None.
|
|
483
|
+
"""
|
|
484
|
+
value = value.strip()
|
|
485
|
+
if value.startswith("[") and value.endswith("]"):
|
|
486
|
+
inner = value[1:-1]
|
|
487
|
+
items: list[str] = []
|
|
488
|
+
for item in re.split(r",\s*", inner):
|
|
489
|
+
item = item.strip().strip('"').strip("'")
|
|
490
|
+
if item:
|
|
491
|
+
items.append(item)
|
|
492
|
+
return items
|
|
493
|
+
return None
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _parse_compose(text: str) -> dict:
|
|
497
|
+
"""Parse a docker-compose YAML file using line-based parsing (no PyYAML).
|
|
498
|
+
|
|
499
|
+
Handles the most common patterns: top-level keys (services, networks,
|
|
500
|
+
volumes) and nested mappings under each service (environment, build,
|
|
501
|
+
deploy, healthcheck, depends_on) at arbitrary depth. Complex YAML
|
|
502
|
+
features (anchors, merge keys, multi-line block scalars) are best-effort.
|
|
503
|
+
"""
|
|
504
|
+
services: dict[str, dict] = {}
|
|
505
|
+
networks: list[str] = []
|
|
506
|
+
named_volumes: list[str] = []
|
|
507
|
+
|
|
508
|
+
current_top: str = "" # "services" | "networks" | "volumes" | ""
|
|
509
|
+
current_service: str = ""
|
|
510
|
+
# Stack of keys at each nesting depth (relative to service, depth 0 = indent 4)
|
|
511
|
+
key_stack: list[str] = []
|
|
512
|
+
|
|
513
|
+
def _strip_yaml_quotes(value: str) -> str:
|
|
514
|
+
"""Remove surrounding YAML quotes from a value."""
|
|
515
|
+
if len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
|
|
516
|
+
return value[1:-1]
|
|
517
|
+
return value
|
|
518
|
+
|
|
519
|
+
def _navigate(path: list[str], create: bool = False):
|
|
520
|
+
"""Navigate to the parent for path, returning (parent_dict, final_key).
|
|
521
|
+
|
|
522
|
+
When *create* is True, intermediate dicts are created. If an
|
|
523
|
+
intermediate value is an empty list it is promoted to a dict (the
|
|
524
|
+
initial ``[]`` was a provisional guess — now we know it's a mapping).
|
|
525
|
+
"""
|
|
526
|
+
if not current_service or not path:
|
|
527
|
+
return None, None
|
|
528
|
+
target = services[current_service]
|
|
529
|
+
for part in path[:-1]:
|
|
530
|
+
if part not in target:
|
|
531
|
+
if create:
|
|
532
|
+
target[part] = {}
|
|
533
|
+
else:
|
|
534
|
+
return None, None
|
|
535
|
+
child = target[part]
|
|
536
|
+
# Promote empty list to dict — we guessed list, but it's a mapping
|
|
537
|
+
if isinstance(child, list) and not child:
|
|
538
|
+
target[part] = {}
|
|
539
|
+
child = target[part]
|
|
540
|
+
if not isinstance(child, dict):
|
|
541
|
+
return None, None
|
|
542
|
+
target = child
|
|
543
|
+
return target, path[-1]
|
|
544
|
+
|
|
545
|
+
for raw_line in text.splitlines():
|
|
546
|
+
stripped = raw_line.strip()
|
|
547
|
+
if not stripped or stripped.startswith("#"):
|
|
548
|
+
continue
|
|
549
|
+
|
|
550
|
+
indent = len(raw_line) - len(raw_line.lstrip())
|
|
551
|
+
|
|
552
|
+
# ── top-level key (indent 0) ──
|
|
553
|
+
if indent == 0 and ":" in stripped:
|
|
554
|
+
key = stripped.split(":")[0].strip()
|
|
555
|
+
current_top = key
|
|
556
|
+
current_service = ""
|
|
557
|
+
key_stack = []
|
|
558
|
+
continue
|
|
559
|
+
|
|
560
|
+
# ── under "services" ──
|
|
561
|
+
if current_top == "services":
|
|
562
|
+
# service name (indent 2)
|
|
563
|
+
if indent == 2 and ":" in stripped and not stripped.startswith("-"):
|
|
564
|
+
current_service = stripped.split(":")[0].strip()
|
|
565
|
+
services.setdefault(current_service, {})
|
|
566
|
+
key_stack = []
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
if not current_service:
|
|
570
|
+
continue
|
|
571
|
+
|
|
572
|
+
# depth relative to service body (indent 4 → depth 0)
|
|
573
|
+
depth = (indent - 4) // 2
|
|
574
|
+
if depth < 0:
|
|
575
|
+
continue
|
|
576
|
+
|
|
577
|
+
# Trim key_stack to current depth
|
|
578
|
+
key_stack = key_stack[:depth]
|
|
579
|
+
|
|
580
|
+
# ── list item (- ...) ──
|
|
581
|
+
if stripped.startswith("- "):
|
|
582
|
+
item_value = _strip_yaml_quotes(stripped[2:].strip())
|
|
583
|
+
if key_stack:
|
|
584
|
+
parent, final_key = _navigate(key_stack)
|
|
585
|
+
if parent is not None and final_key is not None:
|
|
586
|
+
existing = parent.get(final_key)
|
|
587
|
+
if isinstance(existing, list):
|
|
588
|
+
existing.append(item_value)
|
|
589
|
+
continue
|
|
590
|
+
|
|
591
|
+
# ── key:value or key: (mapping start) ──
|
|
592
|
+
if ":" in stripped:
|
|
593
|
+
key, _, value = stripped.partition(":")
|
|
594
|
+
key = key.strip()
|
|
595
|
+
value = value.strip()
|
|
596
|
+
|
|
597
|
+
key_stack = key_stack[:depth] + [key]
|
|
598
|
+
path = list(key_stack)
|
|
599
|
+
|
|
600
|
+
parent, final_key = _navigate(path, create=True)
|
|
601
|
+
if parent is None or final_key is None:
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
if value:
|
|
605
|
+
# Check for inline YAML list: [item1, item2, ...]
|
|
606
|
+
inline = _parse_inline_yaml_list(value)
|
|
607
|
+
if inline is not None:
|
|
608
|
+
parent[final_key] = inline
|
|
609
|
+
else:
|
|
610
|
+
parent[final_key] = _strip_yaml_quotes(value)
|
|
611
|
+
else:
|
|
612
|
+
# Start of a sub-block — initialise as empty list.
|
|
613
|
+
# If nested key:value lines follow, _navigate will
|
|
614
|
+
# promote it to a dict automatically.
|
|
615
|
+
if final_key not in parent:
|
|
616
|
+
parent[final_key] = []
|
|
617
|
+
continue
|
|
618
|
+
|
|
619
|
+
# ── under "networks" — collect names at indent 2 ──
|
|
620
|
+
if current_top == "networks":
|
|
621
|
+
if indent == 2 and ":" in stripped:
|
|
622
|
+
networks.append(stripped.split(":")[0].strip())
|
|
623
|
+
continue
|
|
624
|
+
|
|
625
|
+
# ── under "volumes" — collect names at indent 2 ──
|
|
626
|
+
if current_top == "volumes":
|
|
627
|
+
if indent == 2 and ":" in stripped:
|
|
628
|
+
named_volumes.append(stripped.split(":")[0].strip())
|
|
629
|
+
continue
|
|
630
|
+
|
|
631
|
+
return {
|
|
632
|
+
"type": "compose",
|
|
633
|
+
"services": services,
|
|
634
|
+
"networks": networks,
|
|
635
|
+
"volumes": named_volumes,
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def _looks_like_compose(text: str) -> bool:
|
|
640
|
+
"""Return True if the file content appears to be a docker-compose file.
|
|
641
|
+
|
|
642
|
+
Checks for a ``services:`` top-level key at indent 0 AND at least one
|
|
643
|
+
service containing a compose-specific key (``image``, ``build``,
|
|
644
|
+
``ports``, ``depends_on``, ``container_name``, ``environment``,
|
|
645
|
+
``volumes``, ``command``, ``healthcheck``). This avoids false positives
|
|
646
|
+
from non-compose YAML files that happen to have a ``services:`` key.
|
|
647
|
+
"""
|
|
648
|
+
_COMPOSE_SERVICE_KEYS = {
|
|
649
|
+
"image:", "build:", "ports:", "depends_on:", "container_name:",
|
|
650
|
+
"environment:", "volumes:", "command:", "healthcheck:", "restart:",
|
|
651
|
+
"networks:", "deploy:", "profiles:",
|
|
652
|
+
}
|
|
653
|
+
has_services = False
|
|
654
|
+
in_services = False
|
|
655
|
+
for line in text.splitlines():
|
|
656
|
+
stripped = line.strip()
|
|
657
|
+
if not stripped or stripped.startswith("#"):
|
|
658
|
+
continue
|
|
659
|
+
if line.startswith("services:") or line.startswith("services :"):
|
|
660
|
+
has_services = True
|
|
661
|
+
in_services = True
|
|
662
|
+
continue
|
|
663
|
+
# Another top-level key ends the services block
|
|
664
|
+
if in_services and not line[0].isspace():
|
|
665
|
+
in_services = False
|
|
666
|
+
if in_services:
|
|
667
|
+
for ck in _COMPOSE_SERVICE_KEYS:
|
|
668
|
+
if ck in stripped:
|
|
669
|
+
return True
|
|
670
|
+
return False
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def get_docker_inventory(src_dir: str) -> dict:
|
|
674
|
+
"""Discover and parse Dockerfiles and Compose files in the source tree.
|
|
675
|
+
|
|
676
|
+
Uses two strategies:
|
|
677
|
+
1. **Name-based**: glob patterns from config (Dockerfile*, *.dockerfile,
|
|
678
|
+
docker-compose*.yml, compose*.yml) — searched recursively.
|
|
679
|
+
2. **Content-based**: any ``.yml`` / ``.yaml`` file containing a
|
|
680
|
+
``services:`` top-level key is treated as a Compose file. This
|
|
681
|
+
catches non-standard names like ``infra.yml`` or ``core.yml`` that
|
|
682
|
+
are common in split-compose layouts.
|
|
683
|
+
|
|
684
|
+
Respects .gitignore rules to skip ignored files.
|
|
685
|
+
|
|
686
|
+
Returns a dict of relative-path -> parsed data. Keys always use
|
|
687
|
+
forward slashes regardless of the host OS.
|
|
688
|
+
"""
|
|
689
|
+
from ..config import build_gitignore_matcher
|
|
690
|
+
|
|
691
|
+
src_path = Path(src_dir)
|
|
692
|
+
inventory: dict[str, dict] = {}
|
|
693
|
+
matcher = build_gitignore_matcher(src_path)
|
|
694
|
+
|
|
695
|
+
def _rel(path: Path) -> str:
|
|
696
|
+
"""Return a forward-slash relative path (consistent across OSes)."""
|
|
697
|
+
return str(path.relative_to(src_path)).replace(os.sep, "/")
|
|
698
|
+
|
|
699
|
+
def _should_skip(path: Path) -> bool:
|
|
700
|
+
"""Check if a path should be skipped (excluded_dirs or gitignore)."""
|
|
701
|
+
rel = path.relative_to(src_path)
|
|
702
|
+
# Check hardcoded exclusions
|
|
703
|
+
if not EXCLUDED_DIRS.isdisjoint(rel.parts):
|
|
704
|
+
return True
|
|
705
|
+
rel_str = str(rel).replace("\\", "/")
|
|
706
|
+
if matcher.is_ignored(rel_str):
|
|
707
|
+
return True
|
|
708
|
+
return False
|
|
709
|
+
|
|
710
|
+
# Suffixes that should never be treated as Dockerfiles
|
|
711
|
+
_DOC_SUFFIXES = {".md", ".txt", ".rst", ".html", ".json"}
|
|
712
|
+
|
|
713
|
+
# Discover Dockerfiles (recursive)
|
|
714
|
+
for pattern in DOCKERFILE_PATTERNS:
|
|
715
|
+
for match in src_path.rglob(pattern):
|
|
716
|
+
if match.suffix.lower() in _DOC_SUFFIXES:
|
|
717
|
+
continue
|
|
718
|
+
if match.is_file() and not _should_skip(match):
|
|
719
|
+
rel = _rel(match)
|
|
720
|
+
if rel not in inventory:
|
|
721
|
+
inventory[rel] = _parse_dockerfile(match.read_text(errors="replace"))
|
|
722
|
+
|
|
723
|
+
# Discover Compose files — name-based (recursive)
|
|
724
|
+
for pattern in COMPOSE_PATTERNS:
|
|
725
|
+
for match in src_path.rglob(pattern):
|
|
726
|
+
if match.is_file() and not _should_skip(match):
|
|
727
|
+
rel = _rel(match)
|
|
728
|
+
if rel not in inventory:
|
|
729
|
+
inventory[rel] = _parse_compose(match.read_text(errors="replace"))
|
|
730
|
+
|
|
731
|
+
# Discover Compose files — content-based (recursive, YAML files only)
|
|
732
|
+
for ext in ("*.yml", "*.yaml"):
|
|
733
|
+
for match in src_path.rglob(ext):
|
|
734
|
+
if not match.is_file():
|
|
735
|
+
continue
|
|
736
|
+
if _should_skip(match):
|
|
737
|
+
continue
|
|
738
|
+
rel = _rel(match)
|
|
739
|
+
if rel in inventory:
|
|
740
|
+
continue
|
|
741
|
+
text = match.read_text(errors="replace")
|
|
742
|
+
if _looks_like_compose(text):
|
|
743
|
+
inventory[rel] = _parse_compose(text)
|
|
744
|
+
|
|
745
|
+
return inventory
|