agent-governance 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1049 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import re
8
+ import shlex
9
+ import tempfile
10
+ from dataclasses import dataclass
11
+ from datetime import datetime, timezone
12
+ from importlib import metadata
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import yaml
17
+
18
+ from agent_governance import PACKAGE_NAME
19
+
20
+ try:
21
+ import tomllib # type: ignore
22
+ except ModuleNotFoundError: # pragma: no cover
23
+ import tomli as tomllib # type: ignore
24
+
25
+
26
+ RISK_PATH_CANDIDATES = [
27
+ "data",
28
+ "schemas",
29
+ "migrations",
30
+ "infra",
31
+ "terraform",
32
+ "docker",
33
+ "k8s",
34
+ ".github/workflows",
35
+ "scripts",
36
+ "config",
37
+ "db",
38
+ ]
39
+
40
+ ROUTING_TRIGGERS = {
41
+ "schemas/": "schema_guard",
42
+ "migrations/": "migration_guard",
43
+ "infra/": "infra_guard",
44
+ "terraform/": "infra_guard",
45
+ "k8s/": "infra_guard",
46
+ "docker/": "container_guard",
47
+ "db/": "data_guard",
48
+ "data/": "data_guard",
49
+ ".github/workflows/": "ci_guard",
50
+ }
51
+
52
+
53
+ @dataclass
54
+ class Signal:
55
+ path: str
56
+ line_start: int
57
+ line_end: int
58
+ snippet: str
59
+ note: str
60
+
61
+
62
+ @dataclass
63
+ class Fact:
64
+ fact_type: str
65
+ value: Any
66
+ source: dict[str, Any]
67
+
68
+
69
+ class ParseError(Exception):
70
+ def __init__(self, path: Path, message: str) -> None:
71
+ super().__init__(message)
72
+ self.path = path
73
+ self.message = message
74
+
75
+
76
+ def read_text_file(path: Path, required: bool = False) -> str | None:
77
+ try:
78
+ data = path.read_bytes()
79
+ except OSError as exc:
80
+ if required:
81
+ raise ParseError(path, str(exc)) from exc
82
+ return None
83
+ if b"\x00" in data:
84
+ if required:
85
+ raise ParseError(path, "binary file not supported")
86
+ return None
87
+ return data.decode("utf-8", errors="replace")
88
+
89
+
90
+ def read_lines(path: Path, required: bool = False) -> list[str]:
91
+ text = read_text_file(path, required=required)
92
+ if text is None:
93
+ return []
94
+ return text.splitlines()
95
+
96
+
97
+ def find_line_number(lines: list[str], predicate: re.Pattern[str]) -> int | None:
98
+ for idx, line in enumerate(lines, start=1):
99
+ if predicate.search(line):
100
+ return idx
101
+ return None
102
+
103
+
104
+ def make_signal(
105
+ root: Path,
106
+ path: Path,
107
+ line_start: int,
108
+ line_end: int,
109
+ note: str,
110
+ lines: list[str] | None = None,
111
+ ) -> Signal:
112
+ lines = lines if lines is not None else read_lines(path)
113
+ snippet_lines = lines[line_start - 1 : line_end] if lines else []
114
+ snippet = "\n".join(snippet_lines).strip()
115
+ return Signal(
116
+ path=str(path.relative_to(root)),
117
+ line_start=line_start,
118
+ line_end=line_end,
119
+ snippet=snippet,
120
+ note=note,
121
+ )
122
+
123
+
124
+ def make_signal_path_only(root: Path, path: Path, note: str) -> Signal:
125
+ return Signal(
126
+ path=str(path.relative_to(root)),
127
+ line_start=1,
128
+ line_end=1,
129
+ snippet="",
130
+ note=note,
131
+ )
132
+
133
+
134
+ def add_fact(facts: list[Fact], fact_type: str, value: Any, signal: Signal) -> None:
135
+ facts.append(
136
+ Fact(
137
+ fact_type=fact_type,
138
+ value=value,
139
+ source={
140
+ "path": signal.path,
141
+ "line_start": signal.line_start,
142
+ "line_end": signal.line_end,
143
+ },
144
+ )
145
+ )
146
+
147
+
148
+ def find_first_match(root: Path, filename: str) -> Path | None:
149
+ direct = root / filename
150
+ if direct.exists():
151
+ return direct
152
+ matches = sorted(p for p in root.rglob(filename) if p.is_file())
153
+ return matches[0] if matches else None
154
+
155
+
156
+ def detect_python(
157
+ root: Path, signals: list[Signal], facts: list[Fact]
158
+ ) -> tuple[bool, list[str], list[dict[str, Any]]]:
159
+ detected = False
160
+ build_tools: list[str] = []
161
+ verify_commands: list[dict[str, Any]] = []
162
+
163
+ pyproject = find_first_match(root, "pyproject.toml")
164
+ if pyproject:
165
+ detected = True
166
+ text = read_text_file(pyproject, required=True)
167
+ lines = read_lines(pyproject, required=True)
168
+ try:
169
+ data = tomllib.loads(text or "")
170
+ except tomllib.TOMLDecodeError as exc:
171
+ raise ParseError(pyproject, str(exc)) from exc
172
+ tool = data.get("tool", {}) if isinstance(data, dict) else {}
173
+ if "poetry" in tool:
174
+ line = find_line_number(lines, re.compile(r"^\s*\[tool\.poetry\]")) or 1
175
+ signal = make_signal(
176
+ root, pyproject, line, line, "pyproject tool.poetry", lines=lines
177
+ )
178
+ signals.append(signal)
179
+ add_fact(facts, "detected_build_tool", "poetry", signal)
180
+ build_tools.append("poetry")
181
+ verify_commands.append(
182
+ {
183
+ "cwd": ".",
184
+ "command": ["poetry", "run", "pytest", "-q"],
185
+ "reason": "pyproject tool.poetry",
186
+ "source": signal,
187
+ }
188
+ )
189
+ if "uv" in tool:
190
+ line = find_line_number(lines, re.compile(r"^\s*\[tool\.uv\]")) or 1
191
+ signal = make_signal(
192
+ root, pyproject, line, line, "pyproject tool.uv", lines=lines
193
+ )
194
+ signals.append(signal)
195
+ add_fact(facts, "detected_build_tool", "uv", signal)
196
+ build_tools.append("uv")
197
+ verify_commands.append(
198
+ {
199
+ "cwd": ".",
200
+ "command": ["uv", "run", "pytest", "-q"],
201
+ "reason": "pyproject tool.uv",
202
+ "source": signal,
203
+ }
204
+ )
205
+ for section in ["ruff", "pytest", "mypy", "pyright"]:
206
+ if section in tool:
207
+ pattern = re.compile(rf"^\s*\[tool\.{section}(\.|\\])")
208
+ line = find_line_number(lines, pattern) or 1
209
+ signal = make_signal(
210
+ root,
211
+ pyproject,
212
+ line,
213
+ line,
214
+ f"pyproject tool.{section}",
215
+ lines=lines,
216
+ )
217
+ signals.append(signal)
218
+ add_fact(facts, "detected_python_tool", section, signal)
219
+ if not build_tools:
220
+ line = find_line_number(lines, re.compile(r"^\s*\[project\]")) or 1
221
+ signal = make_signal(
222
+ root, pyproject, line, line, "pyproject project", lines=lines
223
+ )
224
+ signals.append(signal)
225
+ add_fact(facts, "detected_build_tool", "pip", signal)
226
+ build_tools.append("pip")
227
+ verify_commands.append(
228
+ {
229
+ "cwd": ".",
230
+ "command": ["python", "-m", "pytest", "-q"],
231
+ "reason": "pyproject project",
232
+ "source": signal,
233
+ }
234
+ )
235
+ else:
236
+ for name in ["requirements.txt", "setup.cfg", "setup.py"]:
237
+ path = find_first_match(root, name)
238
+ if path:
239
+ detected = True
240
+ signal = make_signal(root, path, 1, 1, f"{name} present")
241
+ signals.append(signal)
242
+ add_fact(facts, "detected_build_tool", "pip", signal)
243
+ build_tools.append("pip")
244
+ verify_commands.append(
245
+ {
246
+ "cwd": ".",
247
+ "command": ["python", "-m", "pytest", "-q"],
248
+ "reason": f"{name} present",
249
+ "source": signal,
250
+ }
251
+ )
252
+ break
253
+
254
+ return detected, build_tools, verify_commands
255
+
256
+
257
+ def _find_json_key_line(lines: list[str], key: str) -> int:
258
+ pattern = re.compile(rf'"{re.escape(key)}"\s*:')
259
+ return find_line_number(lines, pattern) or 1
260
+
261
+
262
+ def detect_node(
263
+ root: Path, signals: list[Signal], facts: list[Fact]
264
+ ) -> tuple[bool, list[str], list[dict[str, Any]]]:
265
+ pkg = find_first_match(root, "package.json")
266
+ if not pkg:
267
+ return False, [], []
268
+
269
+ detected = True
270
+ text = read_text_file(pkg, required=True)
271
+ lines = read_lines(pkg, required=True)
272
+ try:
273
+ data = json.loads(text or "")
274
+ except json.JSONDecodeError as exc:
275
+ raise ParseError(pkg, str(exc)) from exc
276
+
277
+ build_tools: list[str] = []
278
+ verify_commands: list[dict[str, Any]] = []
279
+
280
+ package_manager = data.get("packageManager", "")
281
+ runner = "npm"
282
+ if isinstance(package_manager, str):
283
+ if package_manager.startswith("pnpm"):
284
+ runner = "pnpm"
285
+ build_tools.append("pnpm")
286
+ elif package_manager.startswith("yarn"):
287
+ runner = "yarn"
288
+ build_tools.append("yarn")
289
+ elif package_manager.startswith("npm"):
290
+ runner = "npm"
291
+ build_tools.append("npm")
292
+ if not build_tools:
293
+ build_tools.append("npm")
294
+
295
+ signal_line = _find_json_key_line(lines, "scripts")
296
+ signal = make_signal(
297
+ root, pkg, signal_line, signal_line, "package.json scripts", lines=lines
298
+ )
299
+ signals.append(signal)
300
+ add_fact(facts, "detected_build_tool", build_tools[-1], signal)
301
+
302
+ scripts = data.get("scripts", {}) if isinstance(data, dict) else {}
303
+ if isinstance(scripts, dict):
304
+ script_map = [
305
+ ("test", "test"),
306
+ ("lint", "lint"),
307
+ ("typecheck", "typecheck"),
308
+ ]
309
+ for script_key, kind in script_map:
310
+ if script_key in scripts:
311
+ line = _find_json_key_line(lines, script_key)
312
+ script_signal = make_signal(
313
+ root,
314
+ pkg,
315
+ line,
316
+ line,
317
+ f"package.json script {script_key}",
318
+ lines=lines,
319
+ )
320
+ signals.append(script_signal)
321
+ add_fact(facts, "detected_node_script", script_key, script_signal)
322
+ if runner in ["pnpm", "yarn"]:
323
+ cmd = [runner, "-s", script_key]
324
+ elif script_key == "test":
325
+ cmd = ["npm", "test"]
326
+ else:
327
+ cmd = ["npm", "run", script_key]
328
+ verify_commands.append(
329
+ {
330
+ "cwd": ".",
331
+ "command": cmd,
332
+ "reason": f"package.json script {script_key}",
333
+ "source": script_signal,
334
+ }
335
+ )
336
+
337
+ return detected, build_tools, verify_commands
338
+
339
+
340
+ def detect_rust(
341
+ root: Path, signals: list[Signal], facts: list[Fact]
342
+ ) -> tuple[bool, list[str], list[dict[str, Any]]]:
343
+ cargo = find_first_match(root, "Cargo.toml")
344
+ if not cargo:
345
+ return False, [], []
346
+
347
+ lines = read_lines(cargo, required=True)
348
+ line = find_line_number(lines, re.compile(r"^\s*\[package\]")) or 1
349
+ signal = make_signal(root, cargo, line, line, "Cargo.toml package", lines=lines)
350
+ signals.append(signal)
351
+ add_fact(facts, "detected_build_tool", "cargo", signal)
352
+
353
+ return (
354
+ True,
355
+ ["cargo"],
356
+ [
357
+ {
358
+ "cwd": ".",
359
+ "command": ["cargo", "test"],
360
+ "reason": "Cargo.toml package",
361
+ "source": signal,
362
+ }
363
+ ],
364
+ )
365
+
366
+
367
+ def detect_go(
368
+ root: Path, signals: list[Signal], facts: list[Fact]
369
+ ) -> tuple[bool, list[str], list[dict[str, Any]]]:
370
+ go_mod = find_first_match(root, "go.mod")
371
+ if not go_mod:
372
+ return False, [], []
373
+
374
+ signal = make_signal(root, go_mod, 1, 1, "go.mod present")
375
+ signals.append(signal)
376
+ add_fact(facts, "detected_build_tool", "go", signal)
377
+
378
+ return (
379
+ True,
380
+ ["go"],
381
+ [
382
+ {
383
+ "cwd": ".",
384
+ "command": ["go", "test", "./..."],
385
+ "reason": "go.mod present",
386
+ "source": signal,
387
+ }
388
+ ],
389
+ )
390
+
391
+
392
+ def detect_ci(
393
+ root: Path, signals: list[Signal], facts: list[Fact]
394
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
395
+ workflows_dir = root / ".github" / "workflows"
396
+ if not workflows_dir.exists():
397
+ return [], []
398
+
399
+ commands: list[dict[str, Any]] = []
400
+ ci_gates: list[dict[str, Any]] = []
401
+
402
+ for workflow in sorted(workflows_dir.glob("*.y*ml")):
403
+ lines = read_lines(workflow)
404
+ run_entries = extract_run_commands(lines)
405
+ for cmd, line_no in run_entries:
406
+ label = "ci run step"
407
+ signal = make_signal(root, workflow, line_no, line_no, label, lines=lines)
408
+ signals.append(signal)
409
+
410
+ if not is_repo_local_ci_command(cmd):
411
+ continue
412
+
413
+ command_array = shlex.split(cmd)
414
+ commands.append(
415
+ {
416
+ "cwd": ".",
417
+ "command": command_array,
418
+ "reason": f"ci workflow {workflow.name}",
419
+ "source": signal,
420
+ }
421
+ )
422
+
423
+ kind = classify_ci_command(cmd)
424
+ if kind:
425
+ ci_gates.append(
426
+ {
427
+ "kind": kind,
428
+ "cwd": ".",
429
+ "command": command_array,
430
+ }
431
+ )
432
+ add_fact(
433
+ facts, "ci_gate", {"kind": kind, "command": command_array}, signal
434
+ )
435
+
436
+ return commands, ci_gates
437
+
438
+
439
+ def extract_run_commands(lines: list[str]) -> list[tuple[str, int]]:
440
+ results: list[tuple[str, int]] = []
441
+ idx = 0
442
+ while idx < len(lines):
443
+ line = lines[idx]
444
+ if re.match(r"^\s*run:\s*\|\s*$", line):
445
+ base_indent = len(line) - len(line.lstrip())
446
+ idx += 1
447
+ block_lines = []
448
+ while idx < len(lines):
449
+ next_line = lines[idx]
450
+ indent = len(next_line) - len(next_line.lstrip())
451
+ if indent <= base_indent:
452
+ break
453
+ stripped = next_line.strip()
454
+ if stripped:
455
+ block_lines.append((stripped, idx + 1))
456
+ idx += 1
457
+ if block_lines:
458
+ results.append(block_lines[0])
459
+ continue
460
+ match = re.match(r"^\s*run:\s*(.+)$", line)
461
+ if match:
462
+ results.append((match.group(1).strip(), idx + 1))
463
+ idx += 1
464
+ return results
465
+
466
+
467
+ def is_repo_local_ci_command(command: str) -> bool:
468
+ markers = [
469
+ "pytest",
470
+ "ruff",
471
+ "mypy",
472
+ "pyright",
473
+ "cargo test",
474
+ "go test",
475
+ "npm",
476
+ "pnpm",
477
+ "yarn",
478
+ ]
479
+ return any(marker in command for marker in markers)
480
+
481
+
482
+ def classify_ci_command(command: str) -> str | None:
483
+ lowered = command.lower()
484
+ if "pytest" in lowered or "cargo test" in lowered or "go test" in lowered:
485
+ return "test"
486
+ if "npm" in lowered or "pnpm" in lowered or "yarn" in lowered:
487
+ if "test" in lowered:
488
+ return "test"
489
+ if "lint" in lowered or "ruff" in lowered:
490
+ return "lint"
491
+ if "mypy" in lowered or "pyright" in lowered or "typecheck" in lowered:
492
+ return "typecheck"
493
+ return None
494
+
495
+
496
+ def detect_risk_paths(
497
+ root: Path, signals: list[Signal], facts: list[Fact]
498
+ ) -> list[str]:
499
+ risk_paths: list[str] = []
500
+ for candidate in RISK_PATH_CANDIDATES:
501
+ path = root / candidate
502
+ if not path.exists():
503
+ continue
504
+ evidence = find_evidence_file(path)
505
+ if evidence is None:
506
+ continue
507
+ signal = make_signal_path_only(root, evidence, f"risk path {candidate}")
508
+ signals.append(signal)
509
+ value = f"{candidate}/"
510
+ risk_paths.append(value)
511
+ add_fact(facts, "risk_path", value, signal)
512
+ return risk_paths
513
+
514
+
515
+ def find_evidence_file(path: Path) -> Path | None:
516
+ if path.is_file():
517
+ return path
518
+ files = sorted(p for p in path.rglob("*") if p.is_file())
519
+ return files[0] if files else None
520
+
521
+
522
+ def dedupe_commands(commands: list[dict[str, Any]]) -> list[dict[str, Any]]:
523
+ seen: set[tuple[str, ...]] = set()
524
+ deduped: list[dict[str, Any]] = []
525
+ for command in commands:
526
+ key = tuple(command["command"])
527
+ if key in seen:
528
+ continue
529
+ seen.add(key)
530
+ deduped.append(command)
531
+ return deduped
532
+
533
+
534
+ def normalize_verify_commands(commands: list[dict[str, Any]]) -> list[dict[str, Any]]:
535
+ normalized = []
536
+ for command in commands:
537
+ normalized.append({"cwd": command["cwd"], "command": command["command"]})
538
+ return normalized
539
+
540
+
541
+ def merge_overlay(base: dict[str, Any], overlay: dict[str, Any]) -> dict[str, Any]:
542
+ merged = dict(base)
543
+ for key in ["verify_commands", "risk_paths"]:
544
+ base_list = merged.get(key, [])
545
+ overlay_list = overlay.get(key, [])
546
+ merged[key] = base_list + overlay_list
547
+ for key in overlay:
548
+ if key not in ["verify_commands", "risk_paths"]:
549
+ merged[key] = overlay[key]
550
+ return merged
551
+
552
+
553
+ def render_report(
554
+ summary: dict[str, Any],
555
+ signals: list[Signal],
556
+ facts: list[Fact],
557
+ write: bool,
558
+ out_dir: Path,
559
+ agents_status: dict[str, Any] | None = None,
560
+ ) -> str:
561
+ lines: list[str] = []
562
+ lines.append("# Repo Init Report")
563
+ lines.append("")
564
+ lines.append("## Summary")
565
+ lines.append(f"- repo_root: .")
566
+ lines.append(f"- detected_languages: {', '.join(summary['languages']) or 'none'}")
567
+ lines.append(
568
+ f"- detected_build_tools: {', '.join(summary['build_tools']) or 'none'}"
569
+ )
570
+ lines.append(f"- verify_commands: {len(summary['verify_commands'])}")
571
+ lines.append("")
572
+ lines.append("## Detected signals")
573
+ if not signals:
574
+ lines.append("- none")
575
+ else:
576
+ for signal in sorted(signals, key=lambda s: (s.path, s.line_start, s.note)):
577
+ location = f"{signal.path}:{signal.line_start}-{signal.line_end}"
578
+ snippet = signal.snippet.replace("\n", " ")
579
+ lines.append(f"- {location} | {signal.note} | {snippet}")
580
+ lines.append("")
581
+ lines.append("## Evidence")
582
+ if not facts:
583
+ lines.append("- none")
584
+ else:
585
+ for fact in sorted(
586
+ facts,
587
+ key=lambda f: (
588
+ f.source["path"],
589
+ f.source["line_start"],
590
+ f.fact_type,
591
+ str(f.value),
592
+ ),
593
+ ):
594
+ location = (
595
+ f"{fact.source['path']}:{fact.source['line_start']}-"
596
+ f"{fact.source['line_end']}"
597
+ )
598
+ lines.append(f"- {fact.fact_type}: {fact.value} | {location}")
599
+ lines.append("")
600
+ lines.append("## Proposed verify pipeline")
601
+ if not summary["verify_commands"]:
602
+ lines.append("- none")
603
+ else:
604
+ for item in summary["verify_commands"]:
605
+ cmd = " ".join(item["command"])
606
+ lines.append(f"- {cmd} (from {item['reason']})")
607
+ lines.append("")
608
+ lines.append("## Proposed risk paths")
609
+ if not summary["risk_paths"]:
610
+ lines.append("- none")
611
+ else:
612
+ for risk in summary["risk_paths"]:
613
+ lines.append(f"- {risk}")
614
+ lines.append("")
615
+ lines.append("## AGENTS.md status")
616
+ if not agents_status:
617
+ lines.append("- unknown")
618
+ else:
619
+ lines.append(f"- status: {agents_status['status']}")
620
+ if agents_status.get("details"):
621
+ for detail in agents_status["details"]:
622
+ lines.append(f"- {detail}")
623
+ if agents_status.get("status") in {"missing", "incompatible"}:
624
+ lines.append("- action: run agentctl bootstrap to create/update policy block")
625
+ lines.append("")
626
+ if not write:
627
+ lines.append("## Planned file writes")
628
+ lines.append(f"- {out_dir / 'AGENTS.repo.overlay.yaml'}")
629
+ lines.append(f"- {out_dir / 'init_report.md'}")
630
+ lines.append(f"- {out_dir / 'init_facts.json'}")
631
+ lines.append("")
632
+ return "\n".join(lines) + "\n"
633
+
634
+
635
+ def _display_path(root: Path, path: Path) -> str:
636
+ root = root.resolve()
637
+ path = path.resolve()
638
+ try:
639
+ return path.relative_to(root).as_posix()
640
+ except ValueError:
641
+ temp_root = Path(tempfile.gettempdir()).resolve()
642
+ try:
643
+ rel = path.relative_to(temp_root)
644
+ return f"<tmp>/{rel.as_posix()}"
645
+ except ValueError:
646
+ return path.name
647
+
648
+
649
+ def _normalize_error_message(root: Path, message: str) -> str:
650
+ root_str = str(root.resolve())
651
+ temp_root = str(Path(tempfile.gettempdir()).resolve())
652
+ normalized = message.replace(root_str, "..")
653
+ normalized = normalized.replace(temp_root, "<tmp>")
654
+ return normalized
655
+
656
+
657
+ def render_error_report(root: Path, path: Path, message: str) -> str:
658
+ lines: list[str] = []
659
+ lines.append("# Repo Init Report")
660
+ lines.append("")
661
+ lines.append("## Summary")
662
+ lines.append("- status: failed")
663
+ lines.append("")
664
+ lines.append("## Errors")
665
+ display_path = _display_path(root, path)
666
+ normalized_message = _normalize_error_message(root, message)
667
+ lines.append(f"- {display_path}: {normalized_message}")
668
+ lines.append("")
669
+ return "\n".join(lines) + "\n"
670
+
671
+
672
+ def write_outputs(
673
+ out_dir: Path,
674
+ overlay: dict[str, Any],
675
+ report: str,
676
+ facts: list[Fact],
677
+ init_tool_version: str,
678
+ generated_at_utc: str,
679
+ ) -> None:
680
+ out_dir.mkdir(parents=True, exist_ok=True)
681
+ overlay_path = out_dir / "AGENTS.repo.overlay.yaml"
682
+ report_path = out_dir / "init_report.md"
683
+ facts_path = out_dir / "init_facts.json"
684
+
685
+ overlay_path.write_text(yaml.safe_dump(overlay, sort_keys=False))
686
+ report_path.write_text(report)
687
+ facts_payload = {
688
+ "init_tool_version": init_tool_version,
689
+ "generated_at_utc": generated_at_utc,
690
+ "facts": [
691
+ {
692
+ "fact_type": fact.fact_type,
693
+ "value": fact.value,
694
+ "source": fact.source,
695
+ }
696
+ for fact in facts
697
+ ],
698
+ }
699
+ facts_path.write_text(json.dumps(facts_payload, indent=2, sort_keys=True))
700
+
701
+
702
+ def update_gitignore(root: Path, out_dir: Path) -> None:
703
+ gitignore = root / ".gitignore"
704
+ if not gitignore.exists():
705
+ return
706
+ line = f"{out_dir.as_posix()}/"
707
+ content_text = read_text_file(gitignore)
708
+ if content_text is None:
709
+ return
710
+ content = content_text.splitlines()
711
+ if line in content:
712
+ return
713
+ content.append(line)
714
+ gitignore.write_text("\n".join(content) + "\n")
715
+
716
+
717
+ def build_overlay(
718
+ root: Path,
719
+ ) -> tuple[dict[str, Any], list[Signal], list[Fact], list[dict[str, Any]]]:
720
+ signals: list[Signal] = []
721
+ facts: list[Fact] = []
722
+
723
+ detected_languages: set[str] = set()
724
+ detected_build_tools: list[str] = []
725
+ verify_commands: list[dict[str, Any]] = []
726
+
727
+ ci_commands, ci_gates = detect_ci(root, signals, facts)
728
+ if ci_commands:
729
+ verify_commands.extend(ci_commands)
730
+
731
+ python_detected, python_tools, python_commands = detect_python(root, signals, facts)
732
+ if python_detected:
733
+ detected_languages.add("python")
734
+ detected_build_tools.extend(python_tools)
735
+ verify_commands.extend(python_commands)
736
+
737
+ node_detected, node_tools, node_commands = detect_node(root, signals, facts)
738
+ if node_detected:
739
+ detected_languages.add("node")
740
+ detected_build_tools.extend(node_tools)
741
+ verify_commands.extend(node_commands)
742
+
743
+ rust_detected, rust_tools, rust_commands = detect_rust(root, signals, facts)
744
+ if rust_detected:
745
+ detected_languages.add("rust")
746
+ detected_build_tools.extend(rust_tools)
747
+ verify_commands.extend(rust_commands)
748
+
749
+ go_detected, go_tools, go_commands = detect_go(root, signals, facts)
750
+ if go_detected:
751
+ detected_languages.add("go")
752
+ detected_build_tools.extend(go_tools)
753
+ verify_commands.extend(go_commands)
754
+
755
+ risk_paths = detect_risk_paths(root, signals, facts)
756
+
757
+ detected_build_tools = sorted(set(detected_build_tools))
758
+ detected_languages = sorted(detected_languages)
759
+
760
+ verify_commands = dedupe_commands(verify_commands)
761
+
762
+ routing_triggers = []
763
+ for risk_path in sorted(risk_paths):
764
+ gate = ROUTING_TRIGGERS.get(risk_path)
765
+ if gate:
766
+ routing_triggers.append({"path_glob": risk_path + "*", "gate": gate})
767
+
768
+ verify_details = verify_commands
769
+ overlay = {
770
+ "repo_root": ".",
771
+ "detected_languages": detected_languages,
772
+ "detected_build_tools": detected_build_tools,
773
+ "verify_commands": normalize_verify_commands(verify_commands),
774
+ "risk_paths": sorted(risk_paths),
775
+ "ci_gates": ci_gates,
776
+ "suggested_routing_triggers": routing_triggers,
777
+ }
778
+
779
+ for command in verify_commands:
780
+ if command.get("source"):
781
+ add_fact(
782
+ facts,
783
+ "verify_command",
784
+ {"cwd": command["cwd"], "command": command["command"]},
785
+ command["source"],
786
+ )
787
+
788
+ return overlay, signals, facts, verify_details
789
+
790
+
791
+ def run_init(
792
+ root: Path,
793
+ write: bool = False,
794
+ out_dir: str = ".agents/generated",
795
+ force: bool = False,
796
+ print_agents_template: bool = False,
797
+ strict: bool = False,
798
+ ) -> int:
799
+ try:
800
+ agents_status, agents_template = check_agents_md(
801
+ root, signals=None, facts=None, strict=strict
802
+ )
803
+ except ParseError as exc:
804
+ report = render_error_report(root, exc.path, exc.message)
805
+ print(report, end="")
806
+ if print_agents_template:
807
+ print(check_agents_md(root, None, None, strict=False)[1], end="")
808
+ return 2
809
+ try:
810
+ overlay, signals, facts, verify_details = build_overlay(root)
811
+ except ParseError as exc:
812
+ report = render_error_report(root, exc.path, exc.message)
813
+ print(report, end="")
814
+ if agents_status["status"] == "missing" and print_agents_template:
815
+ print(agents_template, end="")
816
+ return 2
817
+ try:
818
+ agents_status, _agents_template = check_agents_md(
819
+ root, signals=signals, facts=facts, strict=strict
820
+ )
821
+ except ParseError as exc:
822
+ report = render_error_report(root, exc.path, exc.message)
823
+ print(report, end="")
824
+ return 2
825
+ out_path = root / out_dir
826
+ out_dir_display = Path(out_dir)
827
+ init_tool_version = _resolve_init_tool_version()
828
+ generated_at_utc = _generated_timestamp()
829
+
830
+ summary = {
831
+ "languages": overlay["detected_languages"],
832
+ "build_tools": overlay["detected_build_tools"],
833
+ "verify_commands": verify_details,
834
+ "risk_paths": overlay["risk_paths"],
835
+ }
836
+ report = render_report(
837
+ summary, signals, facts, write, out_dir_display, agents_status
838
+ )
839
+ ordered_overlay = {
840
+ "repo_root": overlay["repo_root"],
841
+ "init_tool_version": init_tool_version,
842
+ "generated_at_utc": generated_at_utc,
843
+ "detected_languages": overlay["detected_languages"],
844
+ "detected_build_tools": overlay["detected_build_tools"],
845
+ "verify_commands": overlay["verify_commands"],
846
+ "risk_paths": overlay["risk_paths"],
847
+ "ci_gates": overlay["ci_gates"],
848
+ "suggested_routing_triggers": overlay["suggested_routing_triggers"],
849
+ }
850
+
851
+ if write:
852
+ outputs = [
853
+ out_path / "AGENTS.repo.overlay.yaml",
854
+ out_path / "init_report.md",
855
+ out_path / "init_facts.json",
856
+ ]
857
+ if not force:
858
+ existing = [str(path) for path in outputs if path.exists()]
859
+ if existing:
860
+ report = render_error_report(
861
+ root,
862
+ out_path,
863
+ f"outputs exist: {', '.join(existing)}",
864
+ )
865
+ print(report, end="")
866
+ return 3
867
+ try:
868
+ write_outputs(
869
+ out_path,
870
+ ordered_overlay,
871
+ report,
872
+ facts,
873
+ init_tool_version,
874
+ generated_at_utc,
875
+ )
876
+ update_gitignore(root, out_dir_display)
877
+ except OSError as exc:
878
+ report = render_error_report(root, out_path, f"write failed: {exc}")
879
+ print(report, end="")
880
+ return 3
881
+
882
+ print(report, end="")
883
+ if agents_status["status"] == "missing" and print_agents_template:
884
+ print(agents_template, end="")
885
+ return 0
886
+
887
+
888
+ def _generated_timestamp() -> str:
889
+ epoch = os.environ.get("SOURCE_DATE_EPOCH")
890
+ if epoch:
891
+ return datetime.fromtimestamp(int(epoch), tz=timezone.utc).isoformat()
892
+ return datetime.now(timezone.utc).isoformat()
893
+
894
+
895
+ def parse_agents_policy(path: Path) -> tuple[dict[str, Any], str | None]:
896
+ content_text = read_text_file(path, required=True)
897
+ content = content_text.splitlines() if content_text else []
898
+ fence_indices = [idx for idx, line in enumerate(content) if line.strip() == "```yaml"]
899
+ if fence_indices:
900
+ if len(fence_indices) > 1:
901
+ raise ParseError(path, "multiple policy blocks found")
902
+ fence_start = fence_indices[0]
903
+ fence_end = None
904
+ for idx in range(fence_start + 1, len(content)):
905
+ if content[idx].strip() == "```":
906
+ fence_end = idx
907
+ break
908
+ if fence_end is None:
909
+ raise ParseError(path, "unterminated policy block")
910
+ for idx, line in enumerate(content):
911
+ if fence_start <= idx <= fence_end:
912
+ continue
913
+ if line.lstrip().startswith("policy_schema_version:"):
914
+ raise ParseError(path, "multiple policy blocks found")
915
+ start = fence_start + 1
916
+ end = fence_end - 1
917
+ else:
918
+ starts = [
919
+ idx
920
+ for idx, line in enumerate(content)
921
+ if line.lstrip().startswith("policy_schema_version:")
922
+ ]
923
+ if not starts:
924
+ raise ParseError(path, "missing policy block")
925
+ if len(starts) > 1:
926
+ raise ParseError(path, "multiple policy blocks found")
927
+ start = starts[0]
928
+ start_indent = len(content[start]) - len(content[start].lstrip())
929
+ end = start
930
+ for idx in range(start + 1, len(content)):
931
+ line = content[idx]
932
+ if not line.strip():
933
+ break
934
+ if re.match(r"^#{1,6}\\s", line.lstrip()):
935
+ break
936
+ line_indent = len(line) - len(line.lstrip())
937
+ if line_indent < start_indent:
938
+ break
939
+ end = idx
940
+ block = "\n".join(content[start : end + 1])
941
+ try:
942
+ data = yaml.safe_load(block)
943
+ except yaml.YAMLError as exc:
944
+ raise ParseError(path, str(exc)) from exc
945
+ if not isinstance(data, dict):
946
+ raise ParseError(path, "policy block must be a YAML mapping")
947
+ return data, f"{path.name}:{start + 1}-{end + 1}"
948
+
949
+
950
+ def check_agents_md(
951
+ root: Path,
952
+ signals: list[Signal] | None,
953
+ facts: list[Fact] | None,
954
+ strict: bool,
955
+ ) -> tuple[dict[str, Any], str]:
956
+ path = root / "AGENTS.md"
957
+ template = (
958
+ "policy_schema_version: 1\\n"
959
+ "min_tool_version: 1.0.4\\n"
960
+ "allowed_roles:\\n"
961
+ " - triage\\n\\n"
962
+ "## agent init behavior\\n"
963
+ "- init is evidence-only (no LLM)\\n"
964
+ "- ignore: .venv/, node_modules/, .git/\\n"
965
+ "- prefer python3 over python when present\\n\\n"
966
+ "## Notes\\n"
967
+ "- Add human context here.\\n"
968
+ )
969
+ if not path.exists():
970
+ return {"status": "missing", "details": ["AGENTS.md not found"]}, template
971
+
972
+ lines = read_lines(path, required=True)
973
+ details: list[str] = []
974
+ status = "ok"
975
+ try:
976
+ policy, block_range = parse_agents_policy(path)
977
+ except ParseError as exc:
978
+ if strict:
979
+ raise
980
+ status = "incompatible"
981
+ details.append(f"{exc.path}: {exc.message}")
982
+ return {"status": status, "details": details}, template
983
+
984
+ version = policy.get("policy_schema_version")
985
+ if not isinstance(version, int):
986
+ if strict:
987
+ raise ParseError(path, "policy_schema_version must be an integer")
988
+ status = "incompatible"
989
+ details.append("policy_schema_version must be an integer")
990
+ elif version < 1 or version > 1:
991
+ if strict:
992
+ raise ParseError(path, f"unsupported policy_schema_version: {version}")
993
+ status = "incompatible"
994
+ details.append(f"unsupported policy_schema_version: {version}")
995
+ else:
996
+ details.append(f"policy_schema_version: {version}")
997
+
998
+ required_sections = ["## agent init behavior"]
999
+ for section in required_sections:
1000
+ line = find_line_number(lines, re.compile(re.escape(section))) or 0
1001
+ if not line:
1002
+ status = "incompatible"
1003
+ details.append(f"missing section: {section}")
1004
+ else:
1005
+ details.append(f"{section} at {path.name}:{line}-{line}")
1006
+
1007
+ if signals is not None and block_range:
1008
+ block_start = block_range.split(":")[1].split("-")[0]
1009
+ line_no = int(block_start)
1010
+ signal = make_signal(root, path, line_no, line_no, "AGENTS.md policy block")
1011
+ signals.append(signal)
1012
+ if facts is not None:
1013
+ add_fact(facts, "agents_md_policy", {"range": block_range}, signal)
1014
+
1015
+ return {"status": status, "details": details}, template
1016
+
1017
+
1018
+ def _resolve_init_tool_version() -> str:
1019
+ for parent in Path(__file__).resolve().parents:
1020
+ candidate = parent / "pyproject.toml"
1021
+ if not candidate.exists():
1022
+ continue
1023
+ text = read_text_file(candidate)
1024
+ if text is None:
1025
+ return "unknown"
1026
+ try:
1027
+ data = tomllib.loads(text)
1028
+ except tomllib.TOMLDecodeError:
1029
+ return "unknown"
1030
+ if not isinstance(data, dict):
1031
+ return "unknown"
1032
+ project = data.get("project", {})
1033
+ if isinstance(project, dict):
1034
+ version = project.get("version")
1035
+ if isinstance(version, str) and version.strip():
1036
+ return version.strip()
1037
+ tool = data.get("tool", {})
1038
+ if isinstance(tool, dict):
1039
+ poetry = tool.get("poetry", {})
1040
+ if isinstance(poetry, dict):
1041
+ version = poetry.get("version")
1042
+ if isinstance(version, str) and version.strip():
1043
+ return version.strip()
1044
+ return "unknown"
1045
+
1046
+ try:
1047
+ return metadata.version(PACKAGE_NAME)
1048
+ except metadata.PackageNotFoundError:
1049
+ return "unknown"