jekyll-theme-zer0 1.22.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1046 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ triage.py — Issue Autopilot classifier / planner (deterministic engine).
4
+
5
+ Reads every OPEN GitHub issue for the configured repo, classifies each one into a
6
+ `disposition` (per .issues/config.yml, first-match-wins), groups the resolvable /
7
+ closable ones into PR-sized `batches`, and emits a stable machine contract
8
+ (.issues/plan.json) plus human-readable worklist + report Markdown.
9
+
10
+ Subcommands:
11
+ plan fetch open issues, classify, group, write all artifacts
12
+ status print a concise dashboard (reads plan.json if present, else recomputes)
13
+
14
+ Artifacts written by `plan` (unless --dry-run):
15
+ .issues/index.json raw snapshot of open issues (transient)
16
+ .issues/plan.json the machine contract (transient)
17
+ .issues/worklists/<YYYY-MM-DD>.md readable worklist for the autopilot loop
18
+ .issues/reports/<YYYY-MM-DD>.md short health report
19
+
20
+ ---------------------------------------------------------------------------
21
+ READ / PLAN ONLY. This script NEVER mutates GitHub. It does not close,
22
+ comment, label, or open PRs. It only reads issues (via the `gh` CLI, the
23
+ repo's house pattern) and writes plan files to .issues/. Closing /
24
+ commenting / PR-opening is performed elsewhere by the autopilot agents and
25
+ workflows, which ACT on this plan but never re-decide policy.
26
+
27
+ HARD GUARDRAIL (enforced in code below, not a config knob): a HUMAN-authored
28
+ issue can NEVER receive a *heuristic close* disposition (stale bot-noise, etc.).
29
+ Any such match is downgraded to `needs-human`. Only bot/automation-authored
30
+ noise is ever `eligible_autoclose`.
31
+
32
+ A human issue is closed by the autopilot through ONE evidence-based path only:
33
+ the verify-and-close lane. Each non-protected, non-epic human issue is flagged
34
+ `verify_candidate`; the issue-verifier (LLM, read-only) checks whether its fix
35
+ is ALREADY on `main`; and scripts/issues/verify_close.py closes it ONLY when the
36
+ verdict is resolved=true (high confidence + evidence) AND `main`'s full CI/CD
37
+ gate suite is green. This engine only flags candidacy — it never closes.
38
+
39
+ Untrusted-input quarantine: issue title/body text is treated strictly as DATA.
40
+ It is regex-matched and copied into reports, but NEVER eval'd / exec'd / shelled.
41
+ ---------------------------------------------------------------------------
42
+
43
+ Author: IT-Journey Team | Part of the Issue Autopilot foundation.
44
+ """
45
+ from __future__ import annotations
46
+
47
+ import argparse
48
+ import json
49
+ import re
50
+ import subprocess
51
+ import sys
52
+ from datetime import datetime, timezone
53
+ from pathlib import Path
54
+ from typing import Any, Optional
55
+
56
+ try:
57
+ import yaml
58
+ except ImportError: # pragma: no cover - environment guard
59
+ print("ERROR: PyYAML required. pip install pyyaml", file=sys.stderr)
60
+ sys.exit(1)
61
+
62
+ # --------------------------------------------------------------------------- #
63
+ # Paths
64
+ # --------------------------------------------------------------------------- #
65
+ SCRIPT_DIR = Path(__file__).resolve().parent
66
+ REPO_ROOT = SCRIPT_DIR.parent.parent
67
+ ISSUES_DIR = REPO_ROOT / ".issues"
68
+ CONFIG_PATH = ISSUES_DIR / "config.yml"
69
+
70
+ # Bot detection heuristics layered on top of gh's author.is_bot flag.
71
+ BOT_LOGIN_SUFFIX = "[bot]"
72
+ BOT_LOGIN_EXACT = "app/github-actions"
73
+
74
+ # How many open issues to ask gh for.
75
+ GH_FETCH_LIMIT = 200
76
+
77
+ GH_JSON_FIELDS = (
78
+ "number,title,body,labels,author,createdAt,updatedAt,comments,milestone"
79
+ )
80
+
81
+
82
+ # --------------------------------------------------------------------------- #
83
+ # Small utilities
84
+ # --------------------------------------------------------------------------- #
85
+ def warn(msg: str) -> None:
86
+ """Emit a GitHub-Actions-style warning that is harmless in a plain shell."""
87
+ print(f"::warning::{msg}", file=sys.stderr)
88
+
89
+
90
+ def now_iso() -> str:
91
+ """ISO-8601 UTC timestamp, second precision, no microseconds (stable key)."""
92
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
93
+
94
+
95
+ def today_str() -> str:
96
+ """UTC date as YYYY-MM-DD for daily artifact filenames."""
97
+ return datetime.now(timezone.utc).strftime("%Y-%m-%d")
98
+
99
+
100
+ def load_config(path: Path = CONFIG_PATH) -> dict[str, Any]:
101
+ """Load .issues/config.yml, conforming to its exact schema."""
102
+ try:
103
+ with path.open("r", encoding="utf-8") as fh:
104
+ data = yaml.safe_load(fh) or {}
105
+ except FileNotFoundError:
106
+ print(f"ERROR: config not found at {path}", file=sys.stderr)
107
+ raise
108
+ except yaml.YAMLError as exc:
109
+ print(f"ERROR: could not parse {path}: {exc}", file=sys.stderr)
110
+ raise
111
+ if not isinstance(data, dict):
112
+ raise ValueError(f"config root must be a mapping, got {type(data).__name__}")
113
+ return data
114
+
115
+
116
+ # --------------------------------------------------------------------------- #
117
+ # Fetch
118
+ # --------------------------------------------------------------------------- #
119
+ def fetch_issues_via_gh(repo: str) -> list[dict[str, Any]]:
120
+ """Fetch open issues with the `gh` CLI. Raises on any gh failure."""
121
+ cmd = [
122
+ "gh",
123
+ "issue",
124
+ "list",
125
+ "--repo",
126
+ repo,
127
+ "--state",
128
+ "open",
129
+ "--limit",
130
+ str(GH_FETCH_LIMIT),
131
+ "--json",
132
+ GH_JSON_FIELDS,
133
+ ]
134
+ proc = subprocess.run(cmd, capture_output=True, text=True)
135
+ if proc.returncode != 0:
136
+ raise RuntimeError(
137
+ f"gh exited {proc.returncode}: {proc.stderr.strip() or '(no stderr)'}"
138
+ )
139
+ try:
140
+ issues = json.loads(proc.stdout or "[]")
141
+ except json.JSONDecodeError as exc:
142
+ raise RuntimeError(f"could not parse gh JSON output: {exc}") from exc
143
+ if not isinstance(issues, list):
144
+ raise RuntimeError("gh returned non-list JSON for issue list")
145
+ return issues
146
+
147
+
148
+ def load_issues_from_file(path: Path) -> list[dict[str, Any]]:
149
+ """Load a previously-saved index.json (gh fallback). Returns [] on trouble."""
150
+ try:
151
+ with path.open("r", encoding="utf-8") as fh:
152
+ data = json.load(fh)
153
+ except FileNotFoundError:
154
+ warn(f"--from-file {path} not found; proceeding with zero issues")
155
+ return []
156
+ except json.JSONDecodeError as exc:
157
+ warn(f"--from-file {path} is not valid JSON ({exc}); proceeding with zero issues")
158
+ return []
159
+ if not isinstance(data, list):
160
+ warn(f"--from-file {path} did not contain a JSON list; proceeding with zero issues")
161
+ return []
162
+ return data
163
+
164
+
165
+ def acquire_issues(
166
+ repo: str, from_file: Optional[str]
167
+ ) -> tuple[list[dict[str, Any]], str]:
168
+ """
169
+ Resolve the issue list and report the source.
170
+
171
+ Order of preference:
172
+ 1. --from-file (explicit offline mode).
173
+ 2. gh CLI.
174
+ 3. on gh failure, fall back to .issues/index.json if present.
175
+
176
+ Returns (issues, source) where source describes provenance. Degrades to an
177
+ empty list (never raises) so callers can still emit a partial plan + exit 0.
178
+ """
179
+ if from_file:
180
+ return load_issues_from_file(Path(from_file)), f"file:{from_file}"
181
+
182
+ try:
183
+ return fetch_issues_via_gh(repo), "gh"
184
+ except (RuntimeError, FileNotFoundError, OSError) as exc:
185
+ warn(f"gh unavailable or errored ({exc}); attempting .issues/index.json fallback")
186
+ idx = ISSUES_DIR / "index.json"
187
+ if idx.exists():
188
+ return load_issues_from_file(idx), f"fallback:{idx}"
189
+ warn("no cached index.json fallback found; proceeding with zero issues")
190
+ return [], "empty"
191
+
192
+
193
+ # --------------------------------------------------------------------------- #
194
+ # Normalization helpers (issue field extraction — all treated as DATA)
195
+ # --------------------------------------------------------------------------- #
196
+ def label_names(issue: dict[str, Any]) -> list[str]:
197
+ """Return label name strings; tolerate either dict-labels or bare strings."""
198
+ out: list[str] = []
199
+ for lbl in issue.get("labels") or []:
200
+ if isinstance(lbl, dict):
201
+ name = lbl.get("name")
202
+ if isinstance(name, str):
203
+ out.append(name)
204
+ elif isinstance(lbl, str):
205
+ out.append(lbl)
206
+ return out
207
+
208
+
209
+ def author_login(issue: dict[str, Any]) -> str:
210
+ author = issue.get("author") or {}
211
+ if isinstance(author, dict):
212
+ return str(author.get("login") or "")
213
+ return ""
214
+
215
+
216
+ def is_bot_author(issue: dict[str, Any]) -> bool:
217
+ """
218
+ Bot-ness per the rules: gh's author.is_bot OR a login containing "[bot]"
219
+ OR login == "app/github-actions".
220
+ """
221
+ author = issue.get("author") or {}
222
+ if isinstance(author, dict) and author.get("is_bot") is True:
223
+ return True
224
+ login = author_login(issue)
225
+ if BOT_LOGIN_SUFFIX in login:
226
+ return True
227
+ if login == BOT_LOGIN_EXACT:
228
+ return True
229
+ return False
230
+
231
+
232
+ def infer_area(issue: dict[str, Any], collections: list[str]) -> Optional[str]:
233
+ """
234
+ Infer the collection/area for a content issue from its labels first
235
+ (`collection:quests` or `collection/quests`), then from a title mention.
236
+ Returns a collection name or None.
237
+ """
238
+ labels = label_names(issue)
239
+ for lbl in labels:
240
+ low = lbl.lower()
241
+ for sep in (":", "/"):
242
+ prefix = f"collection{sep}"
243
+ if low.startswith(prefix):
244
+ candidate = low[len(prefix):].strip()
245
+ # Tolerate `_quests` / `quests` / `quest`.
246
+ candidate = candidate.lstrip("_")
247
+ for coll in collections:
248
+ if candidate == coll or candidate == coll.rstrip("s"):
249
+ return coll
250
+ title = str(issue.get("title") or "")
251
+ title_low = title.lower()
252
+ for coll in collections:
253
+ # Word-ish match on the collection name in the title.
254
+ if re.search(rf"\b{re.escape(coll)}\b", title_low):
255
+ return coll
256
+ return None
257
+
258
+
259
+ # --------------------------------------------------------------------------- #
260
+ # Classification
261
+ # --------------------------------------------------------------------------- #
262
+ def match_disposition(
263
+ issue: dict[str, Any], match: dict[str, Any]
264
+ ) -> bool:
265
+ """
266
+ Evaluate a single disposition `match` object as an AND over present keys.
267
+ Omitted keys are unconstrained. Title/body are regex-matched as DATA only.
268
+ The optional `any_of` key holds a list of sub-match dicts and passes if ANY
269
+ of them matches (an OR group), AND-ed with the other keys present here.
270
+ """
271
+ if not isinstance(match, dict):
272
+ return False
273
+
274
+ labels = label_names(issue)
275
+ label_set = {lbl.lower() for lbl in labels}
276
+
277
+ if "any_of" in match:
278
+ subs = [s for s in (match.get("any_of") or []) if isinstance(s, dict)]
279
+ if not any(match_disposition(issue, sub) for sub in subs):
280
+ return False
281
+
282
+ if "author_is_bot" in match:
283
+ if bool(is_bot_author(issue)) != bool(match["author_is_bot"]):
284
+ return False
285
+
286
+ if "any_label" in match:
287
+ wanted = {str(x).lower() for x in (match["any_label"] or [])}
288
+ if not (label_set & wanted):
289
+ return False
290
+
291
+ if "all_label" in match:
292
+ wanted = {str(x).lower() for x in (match["all_label"] or [])}
293
+ if not wanted.issubset(label_set):
294
+ return False
295
+
296
+ if "title_regex" in match and match["title_regex"]:
297
+ title = str(issue.get("title") or "")
298
+ try:
299
+ if not re.search(str(match["title_regex"]), title, re.IGNORECASE):
300
+ return False
301
+ except re.error as exc:
302
+ warn(f"invalid title_regex {match['title_regex']!r}: {exc}")
303
+ return False
304
+
305
+ if "body_regex" in match and match["body_regex"]:
306
+ body = issue.get("body")
307
+ body = "" if body is None else str(body)
308
+ try:
309
+ if not re.search(str(match["body_regex"]), body, re.IGNORECASE):
310
+ return False
311
+ except re.error as exc:
312
+ warn(f"invalid body_regex {match['body_regex']!r}: {exc}")
313
+ return False
314
+
315
+ return True
316
+
317
+
318
+ def classify_issue(
319
+ issue: dict[str, Any], config: dict[str, Any]
320
+ ) -> dict[str, Any]:
321
+ """
322
+ Classify one issue. Returns the per-issue plan record. Applies the hard
323
+ human-authored close guardrail unconditionally.
324
+ """
325
+ dispositions = config.get("dispositions") or []
326
+ default = config.get("default_disposition") or {
327
+ "id": "needs-human",
328
+ "action": "route-human",
329
+ }
330
+ collections = (config.get("areas") or {}).get("collections") or []
331
+
332
+ is_bot = is_bot_author(issue)
333
+ labels = label_names(issue)
334
+ login = author_login(issue)
335
+
336
+ chosen: dict[str, Any] = {}
337
+ for disp in dispositions:
338
+ if not isinstance(disp, dict):
339
+ continue
340
+ if match_disposition(issue, disp.get("match") or {}):
341
+ chosen = disp
342
+ break
343
+ if not chosen:
344
+ chosen = default
345
+
346
+ disposition_id = str(chosen.get("id") or "needs-human")
347
+ action = str(chosen.get("action") or "route-human")
348
+ route_to = chosen.get("route_to")
349
+ note = chosen.get("note")
350
+ downgrade_reason: Optional[str] = None
351
+
352
+ # HARD GUARDRAIL: a HEURISTIC close disposition (stale bot-noise, etc.) on a
353
+ # HUMAN-authored issue is downgraded to needs-human, unconditionally and in
354
+ # code (never a config knob). This blocks closing a human issue just because
355
+ # it *looks* stale — it does NOT block the evidence-based verify-and-close
356
+ # path below, which closes a human issue only with a verifier verdict AND a
357
+ # green CI/CD gate.
358
+ is_close = action.startswith("recommend-close") or disposition_id.startswith("close-")
359
+ if is_close and not is_bot:
360
+ downgrade_reason = "human-authored; close requires a human"
361
+ disposition_id = str(default.get("id") or "needs-human")
362
+ action = str(default.get("action") or "route-human")
363
+ route_to = default.get("route_to")
364
+ note = default.get("note")
365
+ is_close = action.startswith("recommend-close") or disposition_id.startswith("close-")
366
+
367
+ # Auto-close eligibility (HEURISTIC path): only a close action AND a bot
368
+ # author qualifies. Human issues are never closed on heuristics.
369
+ eligible_autoclose = bool(is_close and is_bot)
370
+
371
+ # verify-and-close candidacy (EVIDENCE path, orthogonal to the heuristic one):
372
+ # a human-authored, non-protected, non-epic OPEN issue may be handed to the
373
+ # issue-verifier to check whether its fix is ALREADY on `main`. This flag
374
+ # NEVER closes anything by itself — verify_close.py closes only when the
375
+ # verifier returns resolved=true (high confidence + evidence) AND `main`'s
376
+ # full CI/CD gate suite is green. It is the ONLY path by which the autopilot
377
+ # ever closes a human-authored issue.
378
+ verify_candidate = bool(
379
+ not is_bot
380
+ and disposition_id != "backlog-managed"
381
+ and action != "decompose"
382
+ )
383
+
384
+ area = infer_area(issue, collections)
385
+
386
+ record: dict[str, Any] = {
387
+ "number": issue.get("number"),
388
+ "title": str(issue.get("title") or ""),
389
+ "author_login": login,
390
+ "is_bot": is_bot,
391
+ "labels": labels,
392
+ "disposition_id": disposition_id,
393
+ "action": action,
394
+ "route_to": route_to,
395
+ "note": note,
396
+ "area": area,
397
+ "eligible_autoclose": eligible_autoclose,
398
+ "verify_candidate": verify_candidate,
399
+ "created_at": issue.get("createdAt"),
400
+ "updated_at": issue.get("updatedAt"),
401
+ }
402
+ if downgrade_reason:
403
+ record["downgrade_reason"] = downgrade_reason
404
+ return record
405
+
406
+
407
+ # --------------------------------------------------------------------------- #
408
+ # Batching
409
+ # --------------------------------------------------------------------------- #
410
+ def _branch_for(action: str, disposition_id: str, area: Optional[str],
411
+ numbers: list[int]) -> str:
412
+ """Compute a suggested branch name from the action + area + first issue."""
413
+ first = numbers[0] if numbers else 0
414
+ if action.startswith("recommend-close") or disposition_id.startswith("close-"):
415
+ return f"chore/issue-autopilot-{disposition_id}"
416
+ if action == "resolve-content":
417
+ area_part = area or "general"
418
+ return f"content/issue-{area_part}-{first}"
419
+ if action == "resolve-code":
420
+ return f"fix/issue-{first}"
421
+ if action == "decompose":
422
+ return f"chore/issue-autopilot-decompose-{first}"
423
+ return f"chore/issue-autopilot-{disposition_id}"
424
+
425
+
426
+ def _suggested_labels(config: dict[str, Any], action: str,
427
+ disposition_id: str) -> list[str]:
428
+ """Pick the autopilot label set appropriate for the batch action."""
429
+ labels_cfg = config.get("labels") or {}
430
+ out: list[str] = []
431
+ triaged = labels_cfg.get("triaged")
432
+ if triaged:
433
+ out.append(str(triaged))
434
+ if action.startswith("recommend-close") or disposition_id.startswith("close-"):
435
+ stale = labels_cfg.get("stale")
436
+ if stale:
437
+ out.append(str(stale))
438
+ elif action in ("resolve-content", "resolve-code"):
439
+ pr = labels_cfg.get("pr")
440
+ if pr:
441
+ out.append(str(pr))
442
+ elif action == "decompose":
443
+ epic = labels_cfg.get("epic")
444
+ if epic:
445
+ out.append(str(epic))
446
+ elif action == "route-human":
447
+ nh = labels_cfg.get("needs_human")
448
+ if nh:
449
+ out.append(str(nh))
450
+ return out
451
+
452
+
453
+ def build_batches(
454
+ records: list[dict[str, Any]], config: dict[str, Any]
455
+ ) -> list[dict[str, Any]]:
456
+ """
457
+ Group classified issues into PR-sized batches per the grouping rules:
458
+ - close-* dispositions -> one batch per disposition.
459
+ - resolve-content -> one batch per area (collection).
460
+ - resolve-code -> one batch per issue.
461
+ - decompose (epic) -> one batch per issue.
462
+ - needs-human -> one batch (informational; no PR).
463
+
464
+ Honors limits.max_resolve_batches_per_run: extra resolve batches are KEPT
465
+ but marked deferred=true with a reason (never silently dropped).
466
+ """
467
+ limits = config.get("limits") or {}
468
+ max_resolve = int(limits.get("max_resolve_batches_per_run", 3) or 0)
469
+
470
+ batches: list[dict[str, Any]] = []
471
+
472
+ # --- close-* : one batch per disposition -------------------------------- #
473
+ close_groups: dict[str, list[dict[str, Any]]] = {}
474
+ # --- resolve-content : one batch per area ------------------------------- #
475
+ content_groups: dict[Optional[str], list[dict[str, Any]]] = {}
476
+ # --- resolve-code : per issue ------------------------------------------- #
477
+ code_records: list[dict[str, Any]] = []
478
+ # --- decompose : per issue ---------------------------------------------- #
479
+ decompose_records: list[dict[str, Any]] = []
480
+ # --- needs-human : one informational batch ------------------------------ #
481
+ human_records: list[dict[str, Any]] = []
482
+ # --- skip : protected / left-alone (e.g. backlog-managed) --------------- #
483
+ skipped_records: list[dict[str, Any]] = []
484
+
485
+ for rec in records:
486
+ action = rec["action"]
487
+ disp = rec["disposition_id"]
488
+ if action == "skip":
489
+ skipped_records.append(rec)
490
+ elif action.startswith("recommend-close") or disp.startswith("close-"):
491
+ close_groups.setdefault(disp, []).append(rec)
492
+ elif action == "resolve-content":
493
+ content_groups.setdefault(rec.get("area"), []).append(rec)
494
+ elif action == "resolve-code":
495
+ code_records.append(rec)
496
+ elif action == "decompose":
497
+ decompose_records.append(rec)
498
+ else: # route-human / needs-human / anything else
499
+ human_records.append(rec)
500
+
501
+ # close batches (one per disposition) — triage, no PR.
502
+ for disp, recs in sorted(close_groups.items()):
503
+ numbers = [r["number"] for r in recs]
504
+ action = recs[0]["action"]
505
+ note = recs[0].get("note")
506
+ batches.append({
507
+ "id": disp,
508
+ "disposition_id": disp,
509
+ "action": action,
510
+ "area": None,
511
+ "issue_numbers": numbers,
512
+ "title": f"Recommend close: {disp} ({len(numbers)} issue(s))",
513
+ "suggested_branch": _branch_for(action, disp, None, numbers),
514
+ "suggested_labels": _suggested_labels(config, action, disp),
515
+ "note": note,
516
+ "deferred": False,
517
+ })
518
+
519
+ # decompose batches (one per epic issue) — triage, no PR.
520
+ for rec in decompose_records:
521
+ disp = rec["disposition_id"]
522
+ n = rec["number"]
523
+ batches.append({
524
+ "id": f"{disp}-{n}",
525
+ "disposition_id": disp,
526
+ "action": rec["action"],
527
+ "area": rec.get("area"),
528
+ "issue_numbers": [n],
529
+ "title": f"Decompose epic #{n}: {rec['title'][:60]}",
530
+ "suggested_branch": _branch_for(rec["action"], disp, rec.get("area"), [n]),
531
+ "suggested_labels": _suggested_labels(config, rec["action"], disp),
532
+ "note": rec.get("note"),
533
+ "deferred": False,
534
+ })
535
+
536
+ # --- resolve batches: collect first, then apply the per-run cap --------- #
537
+ resolve_batches: list[dict[str, Any]] = []
538
+
539
+ for area, recs in sorted(
540
+ content_groups.items(), key=lambda kv: (kv[0] is None, kv[0] or "")
541
+ ):
542
+ numbers = [r["number"] for r in recs]
543
+ area_id = area or "general"
544
+ resolve_batches.append({
545
+ "id": f"content-{area_id}",
546
+ "disposition_id": recs[0]["disposition_id"],
547
+ "action": "resolve-content",
548
+ "area": area,
549
+ "issue_numbers": numbers,
550
+ "title": f"Resolve content ({area_id}): {len(numbers)} issue(s)",
551
+ "suggested_branch": _branch_for("resolve-content", recs[0]["disposition_id"], area, numbers),
552
+ "suggested_labels": _suggested_labels(config, "resolve-content", recs[0]["disposition_id"]),
553
+ "note": recs[0].get("note"),
554
+ "deferred": False,
555
+ })
556
+
557
+ for rec in code_records:
558
+ n = rec["number"]
559
+ disp = rec["disposition_id"]
560
+ resolve_batches.append({
561
+ "id": f"code-{n}",
562
+ "disposition_id": disp,
563
+ "action": "resolve-code",
564
+ "area": rec.get("area"),
565
+ "issue_numbers": [n],
566
+ "title": f"Resolve code #{n}: {rec['title'][:60]}",
567
+ "suggested_branch": _branch_for("resolve-code", disp, rec.get("area"), [n]),
568
+ "suggested_labels": _suggested_labels(config, "resolve-code", disp),
569
+ "note": rec.get("note"),
570
+ "deferred": False,
571
+ })
572
+
573
+ # Apply the cap: keep extras but flag them deferred (never drop silently).
574
+ for idx, batch in enumerate(resolve_batches):
575
+ if max_resolve and idx >= max_resolve:
576
+ batch["deferred"] = True
577
+ batch["deferred_reason"] = (
578
+ f"exceeds limits.max_resolve_batches_per_run={max_resolve} "
579
+ f"(resolve batch #{idx + 1} of {len(resolve_batches)})"
580
+ )
581
+ warn(
582
+ f"deferred resolve batch {batch['id']} "
583
+ f"({batch['deferred_reason']})"
584
+ )
585
+ batches.extend(resolve_batches)
586
+
587
+ # needs-human informational batch (no PR).
588
+ if human_records:
589
+ numbers = [r["number"] for r in human_records]
590
+ default = config.get("default_disposition") or {}
591
+ disp = str(default.get("id") or "needs-human")
592
+ batches.append({
593
+ "id": "needs-human",
594
+ "disposition_id": disp,
595
+ "action": str(default.get("action") or "route-human"),
596
+ "area": None,
597
+ "issue_numbers": numbers,
598
+ "title": f"Needs human ({len(numbers)} issue(s))",
599
+ "suggested_branch": None,
600
+ "suggested_labels": _suggested_labels(config, "route-human", disp),
601
+ "note": "Informational. No PR — these require human judgment.",
602
+ "deferred": False,
603
+ })
604
+
605
+ # Protected / left-alone batch (e.g. backlog-managed) — recorded, never acted on.
606
+ if skipped_records:
607
+ numbers = [r["number"] for r in skipped_records]
608
+ disp = skipped_records[0]["disposition_id"]
609
+ batches.append({
610
+ "id": "skipped",
611
+ "disposition_id": disp,
612
+ "action": "skip",
613
+ "area": None,
614
+ "issue_numbers": numbers,
615
+ "title": f"Left alone — protected ({len(numbers)} issue(s))",
616
+ "suggested_branch": None,
617
+ "suggested_labels": [],
618
+ "note": "Protected (e.g. backlog-managed). Recorded but never acted on.",
619
+ "deferred": False,
620
+ })
621
+
622
+ return batches
623
+
624
+
625
+ # --------------------------------------------------------------------------- #
626
+ # Counts + plan assembly
627
+ # --------------------------------------------------------------------------- #
628
+ def compute_counts(records: list[dict[str, Any]]) -> dict[str, Any]:
629
+ by_disposition: dict[str, int] = {}
630
+ bot = human = eligible = verify_candidates = 0
631
+ for rec in records:
632
+ by_disposition[rec["disposition_id"]] = (
633
+ by_disposition.get(rec["disposition_id"], 0) + 1
634
+ )
635
+ if rec["is_bot"]:
636
+ bot += 1
637
+ else:
638
+ human += 1
639
+ if rec["eligible_autoclose"]:
640
+ eligible += 1
641
+ if rec.get("verify_candidate"):
642
+ verify_candidates += 1
643
+ return {
644
+ "by_disposition": dict(sorted(by_disposition.items())),
645
+ "total": len(records),
646
+ "bot": bot,
647
+ "human": human,
648
+ "eligible_autoclose": eligible,
649
+ "verify_candidates": verify_candidates,
650
+ }
651
+
652
+
653
+ def build_plan(
654
+ issues: list[dict[str, Any]], config: dict[str, Any], repo: str
655
+ ) -> dict[str, Any]:
656
+ """Classify, batch, and assemble the full plan dict (the machine contract)."""
657
+ limits = config.get("limits") or {}
658
+ max_issues = int(limits.get("max_issues_per_run", 0) or 0)
659
+
660
+ # Boundedness: classify everything, but only plan up to max_issues_per_run.
661
+ considered = issues
662
+ deferred_count = 0
663
+ if max_issues and len(issues) > max_issues:
664
+ considered = issues[:max_issues]
665
+ deferred_count = len(issues) - max_issues
666
+ warn(
667
+ f"{deferred_count} issue(s) beyond limits.max_issues_per_run="
668
+ f"{max_issues} were not classified this run"
669
+ )
670
+
671
+ records = [classify_issue(i, config) for i in considered]
672
+ batches = build_batches(records, config)
673
+ counts = compute_counts(records)
674
+
675
+ return {
676
+ "generated": now_iso(),
677
+ "repo": repo,
678
+ "counts": counts,
679
+ "batches": batches,
680
+ "issues": records,
681
+ "deferred_unclassified_count": deferred_count,
682
+ "total_open_seen": len(issues),
683
+ }
684
+
685
+
686
+ # --------------------------------------------------------------------------- #
687
+ # Artifact writers
688
+ # --------------------------------------------------------------------------- #
689
+ def write_json(path: Path, data: Any) -> None:
690
+ path.parent.mkdir(parents=True, exist_ok=True)
691
+ with path.open("w", encoding="utf-8") as fh:
692
+ json.dump(data, fh, indent=2, ensure_ascii=False)
693
+ fh.write("\n")
694
+
695
+
696
+ def _md_code(text: str) -> str:
697
+ """
698
+ Render arbitrary text as a backtick-safe inline-code span so it reads as
699
+ opaque DATA in the worklist, never as Markdown structure or instructions.
700
+ The fence is chosen longer than the longest backtick run inside the text, and
701
+ padded with spaces so a leading/trailing backtick can't merge with the fence.
702
+ """
703
+ text = re.sub(r"\s+", " ", text or "").strip()
704
+ if not text:
705
+ return "``"
706
+ longest = max((len(m) for m in re.findall(r"`+", text)), default=0)
707
+ fence = "`" * (longest + 1)
708
+ return f"{fence} {text} {fence}"
709
+
710
+
711
+ def _md_issue_line(rec: dict[str, Any]) -> str:
712
+ """
713
+ One worklist line for an issue. The title is attacker-controlled, so it is
714
+ emitted as an inline-code span (DATA) — never as plain Markdown the reading
715
+ agent could mistake for instructions.
716
+ """
717
+ extra = ""
718
+ if rec.get("downgrade_reason"):
719
+ extra = f" _(downgraded: {rec['downgrade_reason']})_"
720
+ return f" - `#{rec['number']}` — {_md_code(rec.get('title') or '')}{extra}"
721
+
722
+
723
+ def render_worklist_md(plan: dict[str, Any]) -> str:
724
+ counts = plan["counts"]
725
+ lines: list[str] = []
726
+ lines.append("# Issue Autopilot — worklist")
727
+ lines.append("")
728
+ lines.append(f"_Generated {plan['generated']} for `{plan['repo']}`._")
729
+ lines.append("")
730
+ lines.append(
731
+ f"Open issues seen: **{plan.get('total_open_seen', counts['total'])}** "
732
+ f"· classified: **{counts['total']}** "
733
+ f"· bot: **{counts['bot']}** · human: **{counts['human']}** "
734
+ f"· auto-close eligible: **{counts['eligible_autoclose']}**"
735
+ )
736
+ lines.append("")
737
+
738
+ # Dispositions summary table.
739
+ lines.append("## Dispositions")
740
+ lines.append("")
741
+ lines.append("| disposition | count | action |")
742
+ lines.append("| --- | ---: | --- |")
743
+ action_by_disp: dict[str, str] = {}
744
+ for rec in plan["issues"]:
745
+ action_by_disp.setdefault(rec["disposition_id"], rec["action"])
746
+ for disp, count in counts["by_disposition"].items():
747
+ lines.append(f"| `{disp}` | {count} | {action_by_disp.get(disp, '?')} |")
748
+ lines.append("")
749
+
750
+ # Batches.
751
+ lines.append("## Batches")
752
+ lines.append("")
753
+ active_batches = [b for b in plan["batches"]
754
+ if b["id"] not in ("needs-human", "skipped")]
755
+ if not active_batches:
756
+ lines.append("_No actionable batches this run._")
757
+ lines.append("")
758
+ issues_by_number = {r["number"]: r for r in plan["issues"]}
759
+ for batch in active_batches:
760
+ flag = " (DEFERRED)" if batch.get("deferred") else ""
761
+ lines.append(f"### `{batch['id']}`{flag} — {batch['title']}")
762
+ lines.append("")
763
+ lines.append(f"- **Action:** `{batch['action']}`")
764
+ if batch.get("area"):
765
+ lines.append(f"- **Area:** `{batch['area']}`")
766
+ if batch.get("suggested_branch"):
767
+ lines.append(f"- **Suggested branch:** `{batch['suggested_branch']}`")
768
+ if batch.get("suggested_labels"):
769
+ labs = ", ".join(f"`{x}`" for x in batch["suggested_labels"])
770
+ lines.append(f"- **Suggested labels:** {labs}")
771
+ if batch.get("deferred") and batch.get("deferred_reason"):
772
+ lines.append(f"- **Deferred:** {batch['deferred_reason']}")
773
+ if batch.get("note"):
774
+ note = re.sub(r"\s+", " ", str(batch["note"])).strip()
775
+ lines.append(f"- **Note:** {note}")
776
+ lines.append("- **Issues:**")
777
+ for n in batch["issue_numbers"]:
778
+ rec = issues_by_number.get(n)
779
+ if rec:
780
+ lines.append(_md_issue_line(rec))
781
+ else:
782
+ lines.append(f" - `#{n}`")
783
+ lines.append("")
784
+
785
+ # Needs human.
786
+ lines.append("## Needs human")
787
+ lines.append("")
788
+ human_batch = next((b for b in plan["batches"] if b["id"] == "needs-human"), None)
789
+ if human_batch and human_batch["issue_numbers"]:
790
+ for n in human_batch["issue_numbers"]:
791
+ rec = issues_by_number.get(n)
792
+ if rec:
793
+ lines.append(_md_issue_line(rec))
794
+ lines.append("")
795
+ else:
796
+ lines.append("_None this run._")
797
+ lines.append("")
798
+
799
+ # Left alone — protected (e.g. backlog-managed). The autopilot never acts here.
800
+ skipped_batch = next((b for b in plan["batches"] if b["id"] == "skipped"), None)
801
+ if skipped_batch and skipped_batch["issue_numbers"]:
802
+ lines.append("## Left alone (protected)")
803
+ lines.append("")
804
+ note = re.sub(r"\s+", " ", str(skipped_batch.get("note") or "")).strip()
805
+ if note:
806
+ lines.append(f"_{note}_")
807
+ lines.append("")
808
+ for n in skipped_batch["issue_numbers"]:
809
+ rec = issues_by_number.get(n)
810
+ if rec:
811
+ lines.append(_md_issue_line(rec))
812
+ lines.append("")
813
+
814
+ # Boundedness note — never imply full coverage.
815
+ lines.append("## Skipped / deferred")
816
+ lines.append("")
817
+ deferred_batches = [b for b in plan["batches"] if b.get("deferred")]
818
+ unclassified = plan.get("deferred_unclassified_count", 0)
819
+ if not deferred_batches and not unclassified:
820
+ lines.append(
821
+ "_Nothing deferred this run. This worklist reflects only the open "
822
+ "issues seen at generation time — it is not a guarantee of full "
823
+ "backlog coverage._"
824
+ )
825
+ else:
826
+ if unclassified:
827
+ lines.append(
828
+ f"- **{unclassified}** open issue(s) beyond "
829
+ f"`limits.max_issues_per_run` were not classified this run."
830
+ )
831
+ for b in deferred_batches:
832
+ lines.append(
833
+ f"- Resolve batch `{b['id']}` deferred: "
834
+ f"{b.get('deferred_reason', 'cap reached')}"
835
+ )
836
+ lines.append("")
837
+ lines.append(
838
+ "_This worklist reflects only the open issues seen at generation "
839
+ "time and is bounded by the configured per-run limits — it is not a "
840
+ "guarantee of full backlog coverage._"
841
+ )
842
+ lines.append("")
843
+ return "\n".join(lines)
844
+
845
+
846
+ def render_report_md(plan: dict[str, Any], issues_raw: list[dict[str, Any]]) -> str:
847
+ counts = plan["counts"]
848
+ lines: list[str] = []
849
+ lines.append("# Issue Autopilot — health report")
850
+ lines.append("")
851
+ lines.append(f"_Generated {plan['generated']} for `{plan['repo']}`._")
852
+ lines.append("")
853
+ lines.append(f"- Open issues seen: **{plan.get('total_open_seen', counts['total'])}**")
854
+ lines.append(f"- Classified this run: **{counts['total']}**")
855
+ lines.append(f"- Bot-authored: **{counts['bot']}** · Human-authored: **{counts['human']}**")
856
+ lines.append(f"- Auto-close eligible (bot + close): **{counts['eligible_autoclose']}**")
857
+ # Count needs-human directly from records (the disposition id may differ).
858
+ need_human = sum(1 for r in plan["issues"] if r["action"] == "route-human")
859
+ lines.append(f"- Need human attention: **{need_human}**")
860
+
861
+ # Oldest open issue (by createdAt) from the raw snapshot.
862
+ oldest = _oldest_issue(issues_raw)
863
+ if oldest:
864
+ n = oldest.get("number")
865
+ created = oldest.get("createdAt") or "?"
866
+ lines.append(
867
+ f"- Oldest open issue: `#{n}` ({created}) — "
868
+ f"{_md_code(str(oldest.get('title') or ''))}"
869
+ )
870
+ lines.append("")
871
+
872
+ lines.append("## Disposition breakdown")
873
+ lines.append("")
874
+ lines.append("| disposition | count |")
875
+ lines.append("| --- | ---: |")
876
+ for disp, count in counts["by_disposition"].items():
877
+ lines.append(f"| `{disp}` | {count} |")
878
+ lines.append("")
879
+ return "\n".join(lines)
880
+
881
+
882
+ def _oldest_issue(issues_raw: list[dict[str, Any]]) -> Optional[dict[str, Any]]:
883
+ dated = [i for i in issues_raw if i.get("createdAt")]
884
+ if not dated:
885
+ return None
886
+ return min(dated, key=lambda i: str(i.get("createdAt")))
887
+
888
+
889
+ # --------------------------------------------------------------------------- #
890
+ # Subcommand: plan
891
+ # --------------------------------------------------------------------------- #
892
+ def cmd_plan(args: argparse.Namespace) -> int:
893
+ try:
894
+ config = load_config()
895
+ except (FileNotFoundError, ValueError, yaml.YAMLError):
896
+ return 1
897
+
898
+ repo = args.repo or str(config.get("repo") or "")
899
+ if not repo:
900
+ print("ERROR: no repo specified and config has no `repo` key", file=sys.stderr)
901
+ return 1
902
+
903
+ issues, source = acquire_issues(repo, args.from_file)
904
+ print(f"Fetched {len(issues)} open issue(s) [source={source}]", file=sys.stderr)
905
+
906
+ plan = build_plan(issues, config, repo)
907
+
908
+ output_cfg = config.get("output") or {}
909
+ # Default to the repo root (script-relative), NOT cwd, so artifacts always land
910
+ # in <repo>/.issues/ and dispatch.py (which reads script-relative) agrees,
911
+ # regardless of where the command was invoked from.
912
+ base = Path(args.output_dir) if args.output_dir else REPO_ROOT
913
+ index_path = base / str(output_cfg.get("index", ".issues/index.json"))
914
+ plan_path = base / str(output_cfg.get("plan", ".issues/plan.json"))
915
+ worklist_dir = base / str(output_cfg.get("worklist_dir", ".issues/worklists"))
916
+ report_dir = base / str(output_cfg.get("report_dir", ".issues/reports"))
917
+
918
+ worklist_md = render_worklist_md(plan)
919
+ report_md = render_report_md(plan, issues)
920
+
921
+ if args.dry_run:
922
+ print("--- plan.json (dry-run, not written) ---")
923
+ print(json.dumps(plan, indent=2, ensure_ascii=False))
924
+ print("\n--- worklist (dry-run, not written) ---")
925
+ print(worklist_md)
926
+ return 0
927
+
928
+ # Persist raw snapshot only when freshly fetched (don't clobber a --from-file
929
+ # source with itself, but always write when source is gh/fallback).
930
+ if source.startswith("gh") or source.startswith("fallback") or source == "empty":
931
+ write_json(index_path, issues)
932
+ write_json(plan_path, plan)
933
+
934
+ worklist_dir.mkdir(parents=True, exist_ok=True)
935
+ report_dir.mkdir(parents=True, exist_ok=True)
936
+ day = today_str()
937
+ (worklist_dir / f"{day}.md").write_text(worklist_md, encoding="utf-8")
938
+ (report_dir / f"{day}.md").write_text(report_md, encoding="utf-8")
939
+
940
+ print(f"Wrote {plan_path}")
941
+ print(f"Wrote {worklist_dir / (day + '.md')}")
942
+ print(f"Wrote {report_dir / (day + '.md')}")
943
+ return 0
944
+
945
+
946
+ # --------------------------------------------------------------------------- #
947
+ # Subcommand: status
948
+ # --------------------------------------------------------------------------- #
949
+ def cmd_status(args: argparse.Namespace) -> int:
950
+ try:
951
+ config = load_config()
952
+ except (FileNotFoundError, ValueError, yaml.YAMLError):
953
+ return 1
954
+
955
+ output_cfg = config.get("output") or {}
956
+ base = Path(args.output_dir) if args.output_dir else REPO_ROOT
957
+ plan_path = base / str(output_cfg.get("plan", ".issues/plan.json"))
958
+
959
+ plan: Optional[dict[str, Any]] = None
960
+ if plan_path.exists():
961
+ try:
962
+ with plan_path.open("r", encoding="utf-8") as fh:
963
+ plan = json.load(fh)
964
+ except json.JSONDecodeError as exc:
965
+ warn(f"existing plan.json is unparseable ({exc}); recomputing")
966
+ plan = None
967
+
968
+ if plan is None:
969
+ repo = args.repo or str(config.get("repo") or "")
970
+ issues, source = acquire_issues(repo, args.from_file)
971
+ print(f"(recomputed from {source})", file=sys.stderr)
972
+ plan = build_plan(issues, config, repo)
973
+
974
+ counts = plan["counts"]
975
+ print("Issue Autopilot — status")
976
+ print(f" repo: {plan.get('repo')}")
977
+ print(f" generated: {plan.get('generated')}")
978
+ print(f" total: {counts['total']} (bot={counts['bot']} human={counts['human']})")
979
+ print(f" auto-close eligible: {counts['eligible_autoclose']}")
980
+ print(f" verify candidates: {counts.get('verify_candidates', 0)}")
981
+ need_human = sum(1 for r in plan["issues"] if r["action"] == "route-human")
982
+ print(f" need human: {need_human}")
983
+ print(" by disposition:")
984
+ for disp, count in counts["by_disposition"].items():
985
+ print(f" {disp:<16} {count}")
986
+ active = [b for b in plan["batches"] if b["id"] != "needs-human"]
987
+ deferred = [b for b in active if b.get("deferred")]
988
+ print(f" batches: {len(active)} actionable ({len(deferred)} deferred)")
989
+ return 0
990
+
991
+
992
+ # --------------------------------------------------------------------------- #
993
+ # CLI
994
+ # --------------------------------------------------------------------------- #
995
+ def build_parser() -> argparse.ArgumentParser:
996
+ parser = argparse.ArgumentParser(
997
+ prog="triage.py",
998
+ description=(
999
+ "Issue Autopilot classifier/planner. READ + PLAN ONLY — never "
1000
+ "mutates GitHub. Classifies open issues and emits plan artifacts."
1001
+ ),
1002
+ )
1003
+ sub = parser.add_subparsers(dest="command", required=True)
1004
+
1005
+ p_plan = sub.add_parser(
1006
+ "plan", help="fetch open issues, classify, group, write artifacts"
1007
+ )
1008
+ p_plan.add_argument("--repo", default=None, help="owner/name (default: config.repo)")
1009
+ p_plan.add_argument(
1010
+ "--from-file", default=None,
1011
+ help="read a saved index.json instead of calling gh (offline fallback)",
1012
+ )
1013
+ p_plan.add_argument(
1014
+ "--dry-run", action="store_true",
1015
+ help="compute and print the plan but write nothing",
1016
+ )
1017
+ p_plan.add_argument(
1018
+ "--output-dir", default=None,
1019
+ help="base dir for artifacts (default: current working directory)",
1020
+ )
1021
+ p_plan.set_defaults(func=cmd_plan)
1022
+
1023
+ p_status = sub.add_parser(
1024
+ "status", help="print a dashboard (reads latest plan.json, else recomputes)"
1025
+ )
1026
+ p_status.add_argument("--repo", default=None, help="owner/name (default: config.repo)")
1027
+ p_status.add_argument(
1028
+ "--from-file", default=None,
1029
+ help="read a saved index.json instead of calling gh (offline fallback)",
1030
+ )
1031
+ p_status.add_argument(
1032
+ "--output-dir", default=None,
1033
+ help="base dir to look for plan.json (default: current working directory)",
1034
+ )
1035
+ p_status.set_defaults(func=cmd_status)
1036
+ return parser
1037
+
1038
+
1039
+ def main(argv: Optional[list[str]] = None) -> int:
1040
+ parser = build_parser()
1041
+ args = parser.parse_args(argv)
1042
+ return int(args.func(args))
1043
+
1044
+
1045
+ if __name__ == "__main__":
1046
+ sys.exit(main())