agent-wiki-cli 0.3.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. agent_wiki_cli-0.3.28.dist-info/METADATA +425 -0
  2. agent_wiki_cli-0.3.28.dist-info/RECORD +47 -0
  3. agent_wiki_cli-0.3.28.dist-info/WHEEL +5 -0
  4. agent_wiki_cli-0.3.28.dist-info/entry_points.txt +2 -0
  5. agent_wiki_cli-0.3.28.dist-info/licenses/LICENSE +21 -0
  6. agent_wiki_cli-0.3.28.dist-info/top_level.txt +1 -0
  7. llm_wiki_cli/__init__.py +7 -0
  8. llm_wiki_cli/cli.py +231 -0
  9. llm_wiki_cli/commands/__init__.py +1 -0
  10. llm_wiki_cli/commands/bootstrap_cmd.py +1072 -0
  11. llm_wiki_cli/commands/bump_cmd.py +55 -0
  12. llm_wiki_cli/commands/context_cmd.py +427 -0
  13. llm_wiki_cli/commands/extract_cmd.py +745 -0
  14. llm_wiki_cli/commands/generate_prompt_cmd.py +89 -0
  15. llm_wiki_cli/commands/hook_cmd.py +161 -0
  16. llm_wiki_cli/commands/init_cmd.py +92 -0
  17. llm_wiki_cli/commands/lint_cmd.py +294 -0
  18. llm_wiki_cli/commands/migrate_cmd.py +892 -0
  19. llm_wiki_cli/commands/release_cmd.py +163 -0
  20. llm_wiki_cli/commands/status_cmd.py +70 -0
  21. llm_wiki_cli/commands/sync_cmd.py +521 -0
  22. llm_wiki_cli/commands/trigger_cmd.py +205 -0
  23. llm_wiki_cli/commands/uninstall_cmd.py +221 -0
  24. llm_wiki_cli/commands/upgrade_cmd.py +196 -0
  25. llm_wiki_cli/config.py +318 -0
  26. llm_wiki_cli/extractors/__init__.py +46 -0
  27. llm_wiki_cli/extractors/common.py +90 -0
  28. llm_wiki_cli/extractors/go_extractor.py +143 -0
  29. llm_wiki_cli/extractors/go_scripts/go.mod +3 -0
  30. llm_wiki_cli/extractors/go_scripts/main.go +668 -0
  31. llm_wiki_cli/extractors/python_extractor.py +346 -0
  32. llm_wiki_cli/extractors/rust_extractor.py +143 -0
  33. llm_wiki_cli/extractors/rust_scripts/Cargo.lock +110 -0
  34. llm_wiki_cli/extractors/rust_scripts/Cargo.toml +11 -0
  35. llm_wiki_cli/extractors/rust_scripts/src/main.rs +803 -0
  36. llm_wiki_cli/extractors/ts_extractor.py +206 -0
  37. llm_wiki_cli/extractors/ts_scripts/extract.js +485 -0
  38. llm_wiki_cli/extractors/ts_scripts/package.json +10 -0
  39. llm_wiki_cli/services/__init__.py +0 -0
  40. llm_wiki_cli/services/circuit_breaker.py +79 -0
  41. llm_wiki_cli/services/io.py +47 -0
  42. llm_wiki_cli/services/lockfile.py +60 -0
  43. llm_wiki_cli/services/packages.py +173 -0
  44. llm_wiki_cli/services/paths.py +31 -0
  45. llm_wiki_cli/services/schema.py +214 -0
  46. llm_wiki_cli/services/secure_file.py +22 -0
  47. llm_wiki_cli/services/versioning.py +193 -0
@@ -0,0 +1,745 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import json
5
+ import os
6
+ import re
7
+ import subprocess
8
+ import sys
9
+ from dataclasses import dataclass
10
+ from functools import lru_cache
11
+ from pathlib import Path
12
+
13
+ from ..config import COMPOSE_PATTERNS, DOCKERFILE_PATTERNS, EXCLUDED_DIRS, EXTRACTOR_REGISTRY, validate_path
14
+ from ..extractors.common import LANGUAGE_EXTENSIONS, discover_source_files
15
+ from ..services.packages import discover_packages, stamp_inventory_packages
16
+
17
+ # Re-export ComponentVisitor so existing callers that import it from here
18
+ # continue to work without modification.
19
+ from ..extractors.python_extractor import ComponentVisitor # noqa: F401
20
+
21
+
22
+ # ── Extractor loader ─────────────────────────────────────────────────
23
+
24
+
25
+ @lru_cache(maxsize=None)
26
+ def _load_extractor(entry_point: str):
27
+ """Instantiate an extractor from a ``"module.path:ClassName"`` string."""
28
+ module_path, class_name = entry_point.rsplit(":", 1)
29
+ module = importlib.import_module(module_path)
30
+ return getattr(module, class_name)()
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ExtractorStatus:
35
+ language: str
36
+ state: str # ok | skipped | failed
37
+ files_found: int
38
+ message: str = ""
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class InventoryResult:
43
+ inventory: dict
44
+ statuses: dict[str, ExtractorStatus]
45
+
46
+ @property
47
+ def failed(self) -> list[ExtractorStatus]:
48
+ return [s for s in self.statuses.values() if s.state == "failed"]
49
+
50
+
51
+ def print_inventory_failures(result: InventoryResult, *, file=None) -> None:
52
+ """Print extractor failures in a consistent form."""
53
+ stream = file or sys.stderr
54
+ for status in result.failed:
55
+ detail = f": {status.message}" if status.message else ""
56
+ print(f"Error: {status.language} extraction failed{detail}", file=stream)
57
+
58
+
59
+ # ── Backward-compatible public API ───────────────────────────────────
60
+
61
+
62
+ def get_inventory_result(src_dir, deep=False, only_files=None, include_empty=False) -> InventoryResult:
63
+ """Scan source files across all registered languages and return inventory.
64
+
65
+ Runs every extractor in :data:`EXTRACTOR_REGISTRY` and merges the
66
+ results into a single dict keyed by file path.
67
+
68
+ If deep=True, returns enriched data (docstrings, attributes, methods, imports).
69
+ If deep=False, returns the slim format for backward compatibility.
70
+ If only_files is given, restrict to those relative paths.
71
+ If include_empty=True, include all .py files even without extractable components.
72
+
73
+ Each entry is stamped with a ``"package"`` key (package name or
74
+ ``None``) derived from ``pyproject.toml`` / ``setup.py`` markers.
75
+ """
76
+ inventory: dict = {}
77
+ statuses: dict[str, ExtractorStatus] = {}
78
+ for language, entry_point in EXTRACTOR_REGISTRY.items():
79
+ source_files = discover_source_files(
80
+ src_dir,
81
+ LANGUAGE_EXTENSIONS.get(language, ()),
82
+ only_files=only_files,
83
+ language=language,
84
+ )
85
+ if not source_files:
86
+ statuses[language] = ExtractorStatus(language, "skipped", 0)
87
+ continue
88
+
89
+ extractor = _load_extractor(entry_point)
90
+ # Reset cached extractor state from any previous invocation.
91
+ if hasattr(extractor, "last_error"):
92
+ extractor.last_error = None
93
+ kwargs = {"src_dir": src_dir, "only_files": only_files, "deep": deep}
94
+ if language == "python":
95
+ kwargs["include_empty"] = include_empty
96
+ try:
97
+ extracted = extractor.extract(**kwargs)
98
+ except Exception as exc:
99
+ statuses[language] = ExtractorStatus(language, "failed", len(source_files), str(exc))
100
+ continue
101
+ error = getattr(extractor, "last_error", None)
102
+ if error:
103
+ statuses[language] = ExtractorStatus(language, "failed", len(source_files), str(error))
104
+ continue
105
+ inventory.update(extracted)
106
+ statuses[language] = ExtractorStatus(language, "ok", len(source_files))
107
+
108
+ # Stamp package ownership
109
+ packages = discover_packages(src_dir)
110
+ stamp_inventory_packages(inventory, packages)
111
+
112
+ return InventoryResult(inventory=inventory, statuses=statuses)
113
+
114
+
115
+ def get_inventory(src_dir, deep=False, only_files=None, include_empty=False):
116
+ """Backward-compatible inventory API returning only the inventory dict."""
117
+ return get_inventory_result(
118
+ src_dir, deep=deep, only_files=only_files, include_empty=include_empty,
119
+ ).inventory
120
+
121
+
122
+ def ensure_complete_inventory(result: InventoryResult) -> bool:
123
+ """Return True when all extractors that had matching source files succeeded."""
124
+ return not result.failed
125
+
126
+
127
+ def infer_language_from_path(filepath: str) -> str | None:
128
+ suffix = Path(filepath).suffix
129
+ for language, extensions in LANGUAGE_EXTENSIONS.items():
130
+ if suffix in extensions:
131
+ return language
132
+ return None
133
+
134
+
135
+ def languages_with_source(src_dir: str, only_files: list[str] | None = None) -> set[str]:
136
+ languages: set[str] = set()
137
+ for language, extensions in LANGUAGE_EXTENSIONS.items():
138
+ if discover_source_files(src_dir, extensions, only_files=only_files, language=language):
139
+ languages.add(language)
140
+ return languages
141
+
142
+
143
+ def _inventory_or_exit(src_dir: str, *, deep: bool = False, only_files=None, include_empty: bool = False) -> dict:
144
+ result = get_inventory_result(src_dir, deep=deep, only_files=only_files, include_empty=include_empty)
145
+ if result.failed:
146
+ print_inventory_failures(result)
147
+ sys.exit(1)
148
+ return result.inventory
149
+
150
+
151
+ def _git_changed_files(src_dir: str) -> list[str] | None:
152
+ """Return list of files changed in the last commit, relative to *src_dir*.
153
+
154
+ Returns None if git is unavailable or there are no commits.
155
+ """
156
+ try:
157
+ result = subprocess.run(
158
+ ["git", "diff", "--name-only", "HEAD~1..HEAD"],
159
+ capture_output=True, text=True, check=True, timeout=15,
160
+ cwd=src_dir,
161
+ )
162
+ return [line for line in result.stdout.splitlines() if line.strip()]
163
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
164
+ return None
165
+
166
+
167
+ def _summarize_inventory(inventory: dict) -> dict:
168
+ """Produce a compact one-line-per-symbol summary from a shallow inventory."""
169
+ summary: dict[str, dict] = {}
170
+ for fp, data in inventory.items():
171
+ entry: dict[str, list] = {}
172
+ cls_names = [c["name"] for c in data.get("classes", [])]
173
+ fn_names = [f["name"] for f in data.get("functions", [])]
174
+ if cls_names:
175
+ entry["classes"] = cls_names
176
+ if fn_names:
177
+ entry["functions"] = fn_names
178
+ if entry:
179
+ summary[fp] = entry
180
+ return summary
181
+
182
+
183
+ def run(args):
184
+ src_dir: str = getattr(args, "src_dir", ".")
185
+ validate_path(src_dir, "--src-dir")
186
+ changed: bool = getattr(args, "changed", False)
187
+ summary: bool = getattr(args, "summary", False)
188
+ deep: bool = getattr(args, "deep", False)
189
+ paths: list[str] | None = getattr(args, "paths", None)
190
+ package_filter: str | None = getattr(args, "package", None)
191
+ include_empty: bool = getattr(args, "include_empty", False)
192
+
193
+ only_files = None
194
+
195
+ if changed and paths:
196
+ print("Error: --changed and --paths are mutually exclusive.", file=sys.stderr)
197
+ sys.exit(2)
198
+
199
+ if changed:
200
+ only_files = _git_changed_files(src_dir)
201
+ if only_files is None:
202
+ print("Warning: Could not get changed files from git. Falling back to full scan.", file=sys.stderr)
203
+ elif not only_files:
204
+ print("No files changed in the last commit.", file=sys.stderr)
205
+ return
206
+ else:
207
+ print(f"Extracting {len(only_files)} changed file(s)...", file=sys.stderr)
208
+ elif paths:
209
+ only_files = paths
210
+ print(f"Extracting {len(only_files)} specified path(s)...", file=sys.stderr)
211
+ else:
212
+ print(f"Extracting inventory from {src_dir}...", file=sys.stderr)
213
+
214
+ result = get_inventory_result(src_dir, deep=deep, only_files=only_files,
215
+ include_empty=include_empty)
216
+ if result.failed:
217
+ print_inventory_failures(result)
218
+ sys.exit(1)
219
+ inventory = result.inventory
220
+
221
+ if package_filter:
222
+ inventory = {
223
+ fp: data for fp, data in inventory.items()
224
+ if data.get("package") == package_filter
225
+ }
226
+ if not inventory:
227
+ print(f"No files found for package '{package_filter}'.", file=sys.stderr)
228
+ sys.exit(1)
229
+
230
+ if summary:
231
+ inventory = _summarize_inventory(inventory)
232
+
233
+ docker_inv = get_docker_inventory(src_dir)
234
+
235
+ output: dict = {"inventory": inventory}
236
+ if docker_inv:
237
+ output["docker"] = docker_inv
238
+
239
+ print(json.dumps(output, indent=2))
240
+ print(f"Extracted {len(inventory)} files with tracked components.", file=sys.stderr)
241
+ if docker_inv:
242
+ print(f"Docker inventory: {len(docker_inv)} file(s).", file=sys.stderr)
243
+ else:
244
+ print("No Docker/Compose files found.", file=sys.stderr)
245
+
246
+
247
+ # ── Call-graph extraction for workflow detection ──────────────────────
248
+
249
+ def _module_name(filepath: str) -> str:
250
+ return Path(filepath).stem
251
+
252
+
253
+ def get_call_graph(inventory: dict) -> dict:
254
+ """Build cross-module call chains from a deep inventory.
255
+
256
+ Detects functions that import and reference symbols from 3+ other
257
+ project-internal modules — these are workflow candidates.
258
+
259
+ Returns a dict of workflow_name -> {entry, chain, modules_touched}.
260
+ """
261
+ # Map of known module stems from inventory
262
+ known_modules = {_module_name(fp) for fp in inventory}
263
+ # Map of symbol name -> defining module stem
264
+ symbol_to_module: dict[str, str] = {}
265
+ for fp, data in inventory.items():
266
+ mod = _module_name(fp)
267
+ for cls in data.get("classes", []):
268
+ symbol_to_module[cls["name"]] = mod
269
+ for fn in data.get("functions", []):
270
+ symbol_to_module[fn["name"]] = mod
271
+
272
+ workflows: dict[str, dict] = {}
273
+
274
+ # Determine which paths are test files — skip them for workflow detection
275
+ _TEST_STEMS = {"conftest"}
276
+ _TEST_DIRS = {"tests", "test", "__tests__"}
277
+
278
+ for fp, data in inventory.items():
279
+ fp_path = Path(fp)
280
+ # Skip test files: file stem starts with 'test_' or lives under a tests dir
281
+ if fp_path.stem.startswith("test_") or fp_path.stem in _TEST_STEMS:
282
+ continue
283
+ if _TEST_DIRS & set(fp_path.parts):
284
+ continue
285
+
286
+ mod = _module_name(fp)
287
+ imports = data.get("imports", [])
288
+
289
+ # Resolve which internal modules this file imports from
290
+ imported_symbols: dict[str, str] = {} # symbol_name -> source_module
291
+ for imp in imports:
292
+ # Check if the imported name maps to a known symbol
293
+ name = imp["name"]
294
+ if name in symbol_to_module and symbol_to_module[name] != mod:
295
+ imported_symbols[name] = symbol_to_module[name]
296
+ # Also check if the import's module path contains a known module
297
+ imp_mod = imp.get("module", "")
298
+ imp_mod_stem = imp_mod.rsplit(".", 1)[-1] if imp_mod else ""
299
+ if imp_mod_stem in known_modules and imp_mod_stem != mod:
300
+ imported_symbols[name] = imp_mod_stem
301
+
302
+ if not imported_symbols:
303
+ continue
304
+
305
+ # For each function in this module, find which imported symbols it references
306
+ all_functions = list(data.get("functions", []))
307
+ for cls in data.get("classes", []):
308
+ for method in cls.get("methods", []):
309
+ all_functions.append(method)
310
+
311
+ for fn in all_functions:
312
+ touched_modules: set[str] = set()
313
+ chain: list[str] = []
314
+
315
+ # Check params, return types, decorators for references to imported symbols
316
+ for sym_name, src_mod in imported_symbols.items():
317
+ referenced = False
318
+ for p in fn.get("params", []):
319
+ if sym_name in p.get("type", ""):
320
+ referenced = True
321
+ if sym_name in fn.get("return_type", ""):
322
+ referenced = True
323
+ for dec in fn.get("decorators", []):
324
+ if sym_name in dec:
325
+ referenced = True
326
+ # Check docstring for symbol mentions
327
+ if sym_name in fn.get("docstring", ""):
328
+ referenced = True
329
+
330
+ if referenced:
331
+ touched_modules.add(src_mod)
332
+ chain.append(f"{src_mod}.{sym_name}")
333
+
334
+ # Workflow threshold: function touches 3+ other internal modules
335
+ if len(touched_modules) >= 3:
336
+ fn_name = fn["name"]
337
+ # Clean up workflow name
338
+ wf_name = fn_name.lstrip("_")
339
+ if wf_name == "run":
340
+ wf_name = f"{mod}_flow"
341
+
342
+ workflows[wf_name] = {
343
+ "entry": f"{mod}.{fn_name}",
344
+ "entry_module": mod,
345
+ "chain": chain,
346
+ "modules_touched": sorted(touched_modules | {mod}),
347
+ "docstring": fn.get("docstring", ""),
348
+ }
349
+
350
+ return workflows
351
+
352
+
353
+ # ── Docker / Compose extraction ──────────────────────────────────────
354
+
355
+ def _parse_dockerfile(text: str) -> dict:
356
+ """Parse a Dockerfile into a structured dict (line-based, no external deps)."""
357
+ stages: list[dict] = []
358
+ current_stage: str | None = None
359
+ ports: list[str] = []
360
+ env_vars: list[dict] = []
361
+ volumes: list[str] = []
362
+ copies: list[dict] = []
363
+ build_args: list[dict] = []
364
+ labels: dict[str, str] = {}
365
+ entrypoint: str = ""
366
+ cmd: str = ""
367
+ workdir: str = ""
368
+ healthcheck: str = ""
369
+
370
+ # Join continuation lines (trailing backslash)
371
+ logical_lines: list[str] = []
372
+ buf = ""
373
+ for raw in text.splitlines():
374
+ stripped = raw.rstrip()
375
+ if stripped.endswith("\\"):
376
+ buf += stripped[:-1] + " "
377
+ else:
378
+ buf += stripped
379
+ logical_lines.append(buf)
380
+ buf = ""
381
+ if buf:
382
+ logical_lines.append(buf)
383
+
384
+ for line in logical_lines:
385
+ trimmed = line.strip()
386
+ if not trimmed or trimmed.startswith("#"):
387
+ continue
388
+
389
+ upper = trimmed.split()[0].upper() if trimmed.split() else ""
390
+
391
+ if upper == "FROM":
392
+ parts = trimmed.split()
393
+ image = parts[1] if len(parts) >= 2 else "unknown"
394
+ alias = ""
395
+ if len(parts) >= 4 and parts[2].upper() == "AS":
396
+ alias = parts[3]
397
+ stage = {"image": image, "alias": alias}
398
+ stages.append(stage)
399
+ current_stage = alias or image
400
+
401
+ elif upper == "EXPOSE":
402
+ for token in trimmed.split()[1:]:
403
+ ports.append(token)
404
+
405
+ elif upper == "ENV":
406
+ rest = trimmed[4:].strip()
407
+ if "=" in rest:
408
+ for pair in re.findall(r'(\w+)=("(?:[^"\\]|\\.)*"|\S+)', rest):
409
+ env_vars.append({"name": pair[0], "default": pair[1].strip('"')})
410
+ else:
411
+ parts = rest.split(None, 1)
412
+ if len(parts) == 2:
413
+ env_vars.append({"name": parts[0], "default": parts[1]})
414
+ elif parts:
415
+ env_vars.append({"name": parts[0], "default": ""})
416
+
417
+ elif upper == "VOLUME":
418
+ rest = trimmed[7:].strip()
419
+ if rest.startswith("["):
420
+ for v in re.findall(r'"([^"]+)"', rest):
421
+ volumes.append(v)
422
+ else:
423
+ volumes.extend(rest.split())
424
+
425
+ elif upper in ("COPY", "ADD"):
426
+ parts = trimmed.split()
427
+ flags = [p for p in parts[1:] if p.startswith("--")]
428
+ non_flag = [p for p in parts[1:] if not p.startswith("--")]
429
+ src = " ".join(non_flag[:-1]) if len(non_flag) >= 2 else ""
430
+ dest = non_flag[-1] if non_flag else ""
431
+ from_stage = ""
432
+ for f in flags:
433
+ if f.startswith("--from="):
434
+ from_stage = f.split("=", 1)[1]
435
+ copies.append({"src": src, "dest": dest, "from_stage": from_stage, "instruction": upper})
436
+
437
+ elif upper == "WORKDIR":
438
+ workdir = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
439
+
440
+ elif upper == "ARG":
441
+ rest = trimmed[4:].strip()
442
+ if "=" in rest:
443
+ name, default = rest.split("=", 1)
444
+ build_args.append({"name": name.strip(), "default": default.strip()})
445
+ else:
446
+ build_args.append({"name": rest, "default": ""})
447
+
448
+ elif upper == "LABEL":
449
+ for pair in re.findall(r'(\S+)=("(?:[^"\\]|\\.)*"|\S+)', trimmed[6:]):
450
+ labels[pair[0]] = pair[1].strip('"')
451
+
452
+ elif upper == "ENTRYPOINT":
453
+ entrypoint = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
454
+
455
+ elif upper == "CMD":
456
+ cmd = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
457
+
458
+ elif upper == "HEALTHCHECK":
459
+ rest = trimmed.split(None, 1)[1] if len(trimmed.split()) > 1 else ""
460
+ if rest.upper() != "NONE":
461
+ healthcheck = rest
462
+
463
+ return {
464
+ "type": "dockerfile",
465
+ "stages": stages,
466
+ "ports": ports,
467
+ "env_vars": env_vars,
468
+ "volumes": volumes,
469
+ "copies": copies,
470
+ "build_args": build_args,
471
+ "labels": labels,
472
+ "entrypoint": entrypoint,
473
+ "cmd": cmd,
474
+ "workdir": workdir,
475
+ "healthcheck": healthcheck,
476
+ }
477
+
478
+
479
+ def _parse_inline_yaml_list(value: str) -> list[str] | None:
480
+ """Parse an inline YAML list like ``["CMD", "curl", "-f", "http://..."]``.
481
+
482
+ Returns a list of strings if the value is an inline list, otherwise None.
483
+ """
484
+ value = value.strip()
485
+ if value.startswith("[") and value.endswith("]"):
486
+ inner = value[1:-1]
487
+ items: list[str] = []
488
+ for item in re.split(r",\s*", inner):
489
+ item = item.strip().strip('"').strip("'")
490
+ if item:
491
+ items.append(item)
492
+ return items
493
+ return None
494
+
495
+
496
+ def _parse_compose(text: str) -> dict:
497
+ """Parse a docker-compose YAML file using line-based parsing (no PyYAML).
498
+
499
+ Handles the most common patterns: top-level keys (services, networks,
500
+ volumes) and nested mappings under each service (environment, build,
501
+ deploy, healthcheck, depends_on) at arbitrary depth. Complex YAML
502
+ features (anchors, merge keys, multi-line block scalars) are best-effort.
503
+ """
504
+ services: dict[str, dict] = {}
505
+ networks: list[str] = []
506
+ named_volumes: list[str] = []
507
+
508
+ current_top: str = "" # "services" | "networks" | "volumes" | ""
509
+ current_service: str = ""
510
+ # Stack of keys at each nesting depth (relative to service, depth 0 = indent 4)
511
+ key_stack: list[str] = []
512
+
513
+ def _strip_yaml_quotes(value: str) -> str:
514
+ """Remove surrounding YAML quotes from a value."""
515
+ if len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
516
+ return value[1:-1]
517
+ return value
518
+
519
+ def _navigate(path: list[str], create: bool = False):
520
+ """Navigate to the parent for path, returning (parent_dict, final_key).
521
+
522
+ When *create* is True, intermediate dicts are created. If an
523
+ intermediate value is an empty list it is promoted to a dict (the
524
+ initial ``[]`` was a provisional guess — now we know it's a mapping).
525
+ """
526
+ if not current_service or not path:
527
+ return None, None
528
+ target = services[current_service]
529
+ for part in path[:-1]:
530
+ if part not in target:
531
+ if create:
532
+ target[part] = {}
533
+ else:
534
+ return None, None
535
+ child = target[part]
536
+ # Promote empty list to dict — we guessed list, but it's a mapping
537
+ if isinstance(child, list) and not child:
538
+ target[part] = {}
539
+ child = target[part]
540
+ if not isinstance(child, dict):
541
+ return None, None
542
+ target = child
543
+ return target, path[-1]
544
+
545
+ for raw_line in text.splitlines():
546
+ stripped = raw_line.strip()
547
+ if not stripped or stripped.startswith("#"):
548
+ continue
549
+
550
+ indent = len(raw_line) - len(raw_line.lstrip())
551
+
552
+ # ── top-level key (indent 0) ──
553
+ if indent == 0 and ":" in stripped:
554
+ key = stripped.split(":")[0].strip()
555
+ current_top = key
556
+ current_service = ""
557
+ key_stack = []
558
+ continue
559
+
560
+ # ── under "services" ──
561
+ if current_top == "services":
562
+ # service name (indent 2)
563
+ if indent == 2 and ":" in stripped and not stripped.startswith("-"):
564
+ current_service = stripped.split(":")[0].strip()
565
+ services.setdefault(current_service, {})
566
+ key_stack = []
567
+ continue
568
+
569
+ if not current_service:
570
+ continue
571
+
572
+ # depth relative to service body (indent 4 → depth 0)
573
+ depth = (indent - 4) // 2
574
+ if depth < 0:
575
+ continue
576
+
577
+ # Trim key_stack to current depth
578
+ key_stack = key_stack[:depth]
579
+
580
+ # ── list item (- ...) ──
581
+ if stripped.startswith("- "):
582
+ item_value = _strip_yaml_quotes(stripped[2:].strip())
583
+ if key_stack:
584
+ parent, final_key = _navigate(key_stack)
585
+ if parent is not None and final_key is not None:
586
+ existing = parent.get(final_key)
587
+ if isinstance(existing, list):
588
+ existing.append(item_value)
589
+ continue
590
+
591
+ # ── key:value or key: (mapping start) ──
592
+ if ":" in stripped:
593
+ key, _, value = stripped.partition(":")
594
+ key = key.strip()
595
+ value = value.strip()
596
+
597
+ key_stack = key_stack[:depth] + [key]
598
+ path = list(key_stack)
599
+
600
+ parent, final_key = _navigate(path, create=True)
601
+ if parent is None or final_key is None:
602
+ continue
603
+
604
+ if value:
605
+ # Check for inline YAML list: [item1, item2, ...]
606
+ inline = _parse_inline_yaml_list(value)
607
+ if inline is not None:
608
+ parent[final_key] = inline
609
+ else:
610
+ parent[final_key] = _strip_yaml_quotes(value)
611
+ else:
612
+ # Start of a sub-block — initialise as empty list.
613
+ # If nested key:value lines follow, _navigate will
614
+ # promote it to a dict automatically.
615
+ if final_key not in parent:
616
+ parent[final_key] = []
617
+ continue
618
+
619
+ # ── under "networks" — collect names at indent 2 ──
620
+ if current_top == "networks":
621
+ if indent == 2 and ":" in stripped:
622
+ networks.append(stripped.split(":")[0].strip())
623
+ continue
624
+
625
+ # ── under "volumes" — collect names at indent 2 ──
626
+ if current_top == "volumes":
627
+ if indent == 2 and ":" in stripped:
628
+ named_volumes.append(stripped.split(":")[0].strip())
629
+ continue
630
+
631
+ return {
632
+ "type": "compose",
633
+ "services": services,
634
+ "networks": networks,
635
+ "volumes": named_volumes,
636
+ }
637
+
638
+
639
+ def _looks_like_compose(text: str) -> bool:
640
+ """Return True if the file content appears to be a docker-compose file.
641
+
642
+ Checks for a ``services:`` top-level key at indent 0 AND at least one
643
+ service containing a compose-specific key (``image``, ``build``,
644
+ ``ports``, ``depends_on``, ``container_name``, ``environment``,
645
+ ``volumes``, ``command``, ``healthcheck``). This avoids false positives
646
+ from non-compose YAML files that happen to have a ``services:`` key.
647
+ """
648
+ _COMPOSE_SERVICE_KEYS = {
649
+ "image:", "build:", "ports:", "depends_on:", "container_name:",
650
+ "environment:", "volumes:", "command:", "healthcheck:", "restart:",
651
+ "networks:", "deploy:", "profiles:",
652
+ }
653
+ has_services = False
654
+ in_services = False
655
+ for line in text.splitlines():
656
+ stripped = line.strip()
657
+ if not stripped or stripped.startswith("#"):
658
+ continue
659
+ if line.startswith("services:") or line.startswith("services :"):
660
+ has_services = True
661
+ in_services = True
662
+ continue
663
+ # Another top-level key ends the services block
664
+ if in_services and not line[0].isspace():
665
+ in_services = False
666
+ if in_services:
667
+ for ck in _COMPOSE_SERVICE_KEYS:
668
+ if ck in stripped:
669
+ return True
670
+ return False
671
+
672
+
673
+ def get_docker_inventory(src_dir: str) -> dict:
674
+ """Discover and parse Dockerfiles and Compose files in the source tree.
675
+
676
+ Uses two strategies:
677
+ 1. **Name-based**: glob patterns from config (Dockerfile*, *.dockerfile,
678
+ docker-compose*.yml, compose*.yml) — searched recursively.
679
+ 2. **Content-based**: any ``.yml`` / ``.yaml`` file containing a
680
+ ``services:`` top-level key is treated as a Compose file. This
681
+ catches non-standard names like ``infra.yml`` or ``core.yml`` that
682
+ are common in split-compose layouts.
683
+
684
+ Respects .gitignore rules to skip ignored files.
685
+
686
+ Returns a dict of relative-path -> parsed data. Keys always use
687
+ forward slashes regardless of the host OS.
688
+ """
689
+ from ..config import build_gitignore_matcher
690
+
691
+ src_path = Path(src_dir)
692
+ inventory: dict[str, dict] = {}
693
+ matcher = build_gitignore_matcher(src_path)
694
+
695
+ def _rel(path: Path) -> str:
696
+ """Return a forward-slash relative path (consistent across OSes)."""
697
+ return str(path.relative_to(src_path)).replace(os.sep, "/")
698
+
699
+ def _should_skip(path: Path) -> bool:
700
+ """Check if a path should be skipped (excluded_dirs or gitignore)."""
701
+ rel = path.relative_to(src_path)
702
+ # Check hardcoded exclusions
703
+ if not EXCLUDED_DIRS.isdisjoint(rel.parts):
704
+ return True
705
+ rel_str = str(rel).replace("\\", "/")
706
+ if matcher.is_ignored(rel_str):
707
+ return True
708
+ return False
709
+
710
+ # Suffixes that should never be treated as Dockerfiles
711
+ _DOC_SUFFIXES = {".md", ".txt", ".rst", ".html", ".json"}
712
+
713
+ # Discover Dockerfiles (recursive)
714
+ for pattern in DOCKERFILE_PATTERNS:
715
+ for match in src_path.rglob(pattern):
716
+ if match.suffix.lower() in _DOC_SUFFIXES:
717
+ continue
718
+ if match.is_file() and not _should_skip(match):
719
+ rel = _rel(match)
720
+ if rel not in inventory:
721
+ inventory[rel] = _parse_dockerfile(match.read_text(errors="replace"))
722
+
723
+ # Discover Compose files — name-based (recursive)
724
+ for pattern in COMPOSE_PATTERNS:
725
+ for match in src_path.rglob(pattern):
726
+ if match.is_file() and not _should_skip(match):
727
+ rel = _rel(match)
728
+ if rel not in inventory:
729
+ inventory[rel] = _parse_compose(match.read_text(errors="replace"))
730
+
731
+ # Discover Compose files — content-based (recursive, YAML files only)
732
+ for ext in ("*.yml", "*.yaml"):
733
+ for match in src_path.rglob(ext):
734
+ if not match.is_file():
735
+ continue
736
+ if _should_skip(match):
737
+ continue
738
+ rel = _rel(match)
739
+ if rel in inventory:
740
+ continue
741
+ text = match.read_text(errors="replace")
742
+ if _looks_like_compose(text):
743
+ inventory[rel] = _parse_compose(text)
744
+
745
+ return inventory