@deftai/directive-content 0.55.2 → 0.56.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/.githooks/pre-commit +143 -0
  2. package/.githooks/pre-push +121 -0
  3. package/QUICK-START.md +2 -2
  4. package/Taskfile.yml +934 -0
  5. package/UPGRADING.md +47 -1
  6. package/events/README.md +3 -3
  7. package/package.json +5 -4
  8. package/scripts/_agents_md.py +494 -0
  9. package/scripts/_cache_fetch.py +635 -0
  10. package/scripts/_cache_quota.py +529 -0
  11. package/scripts/_cache_refresh.py +163 -0
  12. package/scripts/_cache_validate.py +209 -0
  13. package/scripts/_content_root.py +42 -0
  14. package/scripts/_doctor_state.py +277 -0
  15. package/scripts/_event_detect.py +305 -0
  16. package/scripts/_events.py +514 -0
  17. package/scripts/_lifecycle_hygiene.py +568 -0
  18. package/scripts/_pathspec.py +91 -0
  19. package/scripts/_policy_show_cli.py +266 -0
  20. package/scripts/_precutover.py +92 -0
  21. package/scripts/_project_context.py +224 -0
  22. package/scripts/_project_definition_io.py +164 -0
  23. package/scripts/_relocate_snapshot.py +209 -0
  24. package/scripts/_relocate_states.py +343 -0
  25. package/scripts/_resolve_preflight_path.py +152 -0
  26. package/scripts/_safe_subprocess.py +167 -0
  27. package/scripts/_session_start_hook.py +205 -0
  28. package/scripts/_sor_gate_diff.py +365 -0
  29. package/scripts/_stdio_utf8.py +59 -0
  30. package/scripts/_triage_bootstrap_gitignore.py +904 -0
  31. package/scripts/_triage_classify_cli.py +122 -0
  32. package/scripts/_triage_queue_cli.py +625 -0
  33. package/scripts/_triage_scope_cli.py +343 -0
  34. package/scripts/_triage_scope_drift_cli.py +121 -0
  35. package/scripts/_triage_scope_ignores.py +286 -0
  36. package/scripts/_triage_scope_milestone.py +432 -0
  37. package/scripts/_triage_scope_mutations.py +337 -0
  38. package/scripts/_triage_scope_renderers.py +207 -0
  39. package/scripts/_triage_smoketest_stages.py +674 -0
  40. package/scripts/_triage_subscribe_cli.py +140 -0
  41. package/scripts/_triage_welcome_cli.py +421 -0
  42. package/scripts/_vbrief_build.py +239 -0
  43. package/scripts/_vbrief_fidelity.py +479 -0
  44. package/scripts/_vbrief_legacy.py +589 -0
  45. package/scripts/_vbrief_reconciliation.py +883 -0
  46. package/scripts/_vbrief_routing.py +277 -0
  47. package/scripts/_vbrief_safety.py +778 -0
  48. package/scripts/_vbrief_sources.py +312 -0
  49. package/scripts/_vbrief_speckit.py +262 -0
  50. package/scripts/_vbrief_story_quality.py +353 -0
  51. package/scripts/_vbrief_validation.py +299 -0
  52. package/scripts/build_dist.py +412 -0
  53. package/scripts/cache.py +1078 -0
  54. package/scripts/cache_scanner.py +745 -0
  55. package/scripts/candidates_log.py +432 -0
  56. package/scripts/capacity_backfill.py +680 -0
  57. package/scripts/capacity_show.py +653 -0
  58. package/scripts/ci_local.py +689 -0
  59. package/scripts/code_structure_validate.py +765 -0
  60. package/scripts/codebase_default_extractor.py +495 -0
  61. package/scripts/codebase_map.py +304 -0
  62. package/scripts/codebase_map_fresh.py +104 -0
  63. package/scripts/codebase_projection_registry.py +94 -0
  64. package/scripts/codebase_provider.py +582 -0
  65. package/scripts/doctor.py +2257 -0
  66. package/scripts/framework_commands.py +505 -0
  67. package/scripts/gh_rest.py +882 -0
  68. package/scripts/github_auth_modes.py +437 -0
  69. package/scripts/github_body.py +292 -0
  70. package/scripts/ip_risk.py +531 -0
  71. package/scripts/issue_emit.py +670 -0
  72. package/scripts/issue_ingest.py +1064 -0
  73. package/scripts/migrate_preflight.py +418 -0
  74. package/scripts/migrate_vbrief.py +2677 -0
  75. package/scripts/monitor_pr.py +401 -0
  76. package/scripts/pack_migrate_lessons.py +336 -0
  77. package/scripts/pack_migrate_patterns.py +254 -0
  78. package/scripts/pack_migrate_rules.py +350 -0
  79. package/scripts/pack_migrate_skills.py +423 -0
  80. package/scripts/pack_migrate_strategies.py +311 -0
  81. package/scripts/pack_migrate_swarm_spec.py +250 -0
  82. package/scripts/pack_render.py +434 -0
  83. package/scripts/packs_slice.py +712 -0
  84. package/scripts/platform_capabilities.py +336 -0
  85. package/scripts/policy.py +2826 -0
  86. package/scripts/policy_set.py +324 -0
  87. package/scripts/pr_check_closing_keywords.py +524 -0
  88. package/scripts/pr_check_protected_issues.py +267 -0
  89. package/scripts/pr_merge_readiness.py +1004 -0
  90. package/scripts/pr_wait_mergeable.py +669 -0
  91. package/scripts/prd_render.py +159 -0
  92. package/scripts/preflight_architecture_sor.py +974 -0
  93. package/scripts/preflight_branch.py +289 -0
  94. package/scripts/preflight_cache.py +974 -0
  95. package/scripts/preflight_gh.py +721 -0
  96. package/scripts/preflight_implementation.py +272 -0
  97. package/scripts/preflight_story_start.py +838 -0
  98. package/scripts/preflight_wip_cap.py +149 -0
  99. package/scripts/probe_session.py +545 -0
  100. package/scripts/project_render.py +293 -0
  101. package/scripts/quarantine_ext.py +237 -0
  102. package/scripts/reconcile_issues.py +1442 -0
  103. package/scripts/refresh-path.ps1 +107 -0
  104. package/scripts/release.py +2030 -0
  105. package/scripts/release_e2e.py +1011 -0
  106. package/scripts/release_publish.py +486 -0
  107. package/scripts/release_rollback.py +980 -0
  108. package/scripts/relocate.py +1034 -0
  109. package/scripts/resolve_changelog_unreleased.py +667 -0
  110. package/scripts/resolve_version.py +490 -0
  111. package/scripts/resume_conditions.py +706 -0
  112. package/scripts/ritual_sentinel.py +609 -0
  113. package/scripts/roadmap_render.py +635 -0
  114. package/scripts/rule_ownership_lint.py +325 -0
  115. package/scripts/scm.py +591 -0
  116. package/scripts/scope_audit_log.py +387 -0
  117. package/scripts/scope_decompose.py +654 -0
  118. package/scripts/scope_demote.py +509 -0
  119. package/scripts/scope_lifecycle.py +1126 -0
  120. package/scripts/scope_undo.py +772 -0
  121. package/scripts/session_start.py +406 -0
  122. package/scripts/setup_ghx.py +339 -0
  123. package/scripts/setup_windows.ps1 +220 -0
  124. package/scripts/slice_audit.py +585 -0
  125. package/scripts/slice_record.py +530 -0
  126. package/scripts/slice_record_existing.py +692 -0
  127. package/scripts/slug_normalize.py +178 -0
  128. package/scripts/spec_render.py +477 -0
  129. package/scripts/spec_validate.py +238 -0
  130. package/scripts/subagent_monitor.py +658 -0
  131. package/scripts/swarm_complete_cohort.py +644 -0
  132. package/scripts/swarm_launch.py +1206 -0
  133. package/scripts/swarm_readiness.py +554 -0
  134. package/scripts/swarm_verify_review_clean.py +438 -0
  135. package/scripts/swarm_worktrees.py +497 -0
  136. package/scripts/toolchain-check.py +52 -0
  137. package/scripts/triage_actions.py +871 -0
  138. package/scripts/triage_bootstrap.py +1153 -0
  139. package/scripts/triage_bulk.py +630 -0
  140. package/scripts/triage_classify.py +932 -0
  141. package/scripts/triage_help.py +1685 -0
  142. package/scripts/triage_queue.py +1944 -0
  143. package/scripts/triage_reconcile.py +581 -0
  144. package/scripts/triage_refresh.py +643 -0
  145. package/scripts/triage_scope.py +999 -0
  146. package/scripts/triage_scope_drift.py +575 -0
  147. package/scripts/triage_smoketest.py +396 -0
  148. package/scripts/triage_subscribe.py +399 -0
  149. package/scripts/triage_summary.py +1011 -0
  150. package/scripts/triage_welcome.py +1178 -0
  151. package/scripts/ts_check_lane.py +86 -0
  152. package/scripts/validate-links.py +64 -0
  153. package/scripts/validate_strategy_output.py +212 -0
  154. package/scripts/vbrief_activate.py +228 -0
  155. package/scripts/vbrief_migrate_conformance.py +368 -0
  156. package/scripts/vbrief_reconcile_graph.py +306 -0
  157. package/scripts/vbrief_reconcile_labels.py +460 -0
  158. package/scripts/vbrief_reconcile_umbrellas.py +741 -0
  159. package/scripts/vbrief_validate.py +1195 -0
  160. package/scripts/verify-stubs.py +61 -0
  161. package/scripts/verify_capacity.py +160 -0
  162. package/scripts/verify_encoding.py +699 -0
  163. package/scripts/verify_hooks_installed.py +206 -0
  164. package/scripts/verify_investigation.py +360 -0
  165. package/scripts/verify_judgment_gates.py +827 -0
  166. package/scripts/verify_no_task_runtime.py +171 -0
  167. package/scripts/verify_scm_boundary.py +509 -0
  168. package/scripts/verify_session_ritual.py +389 -0
  169. package/scripts/verify_tools.py +426 -0
  170. package/scripts/verify_vbrief_conformance.py +478 -0
  171. package/tasks/architecture.yml +13 -0
  172. package/tasks/cache.yml +69 -0
  173. package/tasks/capacity.yml +38 -0
  174. package/tasks/change.yml +46 -0
  175. package/tasks/changelog.yml +24 -0
  176. package/tasks/ci.yml +49 -0
  177. package/tasks/codebase.yml +47 -0
  178. package/tasks/commit.yml +30 -0
  179. package/tasks/core.yml +126 -0
  180. package/tasks/deployments.yml +54 -0
  181. package/tasks/framework.yml +74 -0
  182. package/tasks/install.yml +60 -0
  183. package/tasks/issue.yml +50 -0
  184. package/tasks/migrate.yml +73 -0
  185. package/tasks/packs.yml +92 -0
  186. package/tasks/policy.yml +75 -0
  187. package/tasks/pr.yml +89 -0
  188. package/tasks/prd.yml +39 -0
  189. package/tasks/project.yml +27 -0
  190. package/tasks/reconcile.yml +32 -0
  191. package/tasks/relocate.yml +56 -0
  192. package/tasks/roadmap.yml +28 -0
  193. package/tasks/scm.yml +126 -0
  194. package/tasks/scope-undo.yml +36 -0
  195. package/tasks/scope.yml +141 -0
  196. package/tasks/session.yml +19 -0
  197. package/tasks/setup.yml +37 -0
  198. package/tasks/slice.yml +69 -0
  199. package/tasks/spec.yml +41 -0
  200. package/tasks/swarm.yml +85 -0
  201. package/tasks/toolchain.yml +13 -0
  202. package/tasks/triage-actions.yml +94 -0
  203. package/tasks/triage-bootstrap.yml +43 -0
  204. package/tasks/triage-bulk.yml +75 -0
  205. package/tasks/triage-classify.yml +30 -0
  206. package/tasks/triage-queue.yml +50 -0
  207. package/tasks/triage-reconcile.yml +29 -0
  208. package/tasks/triage-scope-drift.yml +29 -0
  209. package/tasks/triage-scope.yml +31 -0
  210. package/tasks/triage-smoketest.yml +33 -0
  211. package/tasks/triage-subscribe.yml +36 -0
  212. package/tasks/triage-summary.yml +29 -0
  213. package/tasks/triage-welcome.yml +32 -0
  214. package/tasks/ts.yml +328 -0
  215. package/tasks/vbrief.yml +206 -0
  216. package/tasks/verify.yml +292 -0
  217. package/templates/agents-entry.md +1 -1
@@ -0,0 +1,680 @@
1
+ #!/usr/bin/env python3
2
+ """capacity_backfill.py -- one-time capacity-bucket classifier for completed vBRIEFs (#1606).
3
+
4
+ The capacity engine (``scripts/capacity_show.py``, #1419 Slice 4) counts a
5
+ completed vBRIEF toward a bucket only when it carries an explicit
6
+ ``plan.metadata.capacityBucket`` (and a ``plan.metadata.completedAt`` inside
7
+ the trailing window). ``task scope:complete`` stamps both fields going
8
+ FORWARD -- but only from ``defaultBucket`` -- so a pre-adoption tree
9
+ (directive itself included) has completed work that is *classification
10
+ cold-start*: the history exists, but every completion is unclassified, so the
11
+ ``minSampleSize`` guard pins capacity in advisory mode forever.
12
+
13
+ This module is the deferred ``task capacity:backfill`` migration the #1419 RFC
14
+ ("Brownfield Backfill") specified: a one-time, dry-run-default, git-reversible
15
+ pass that derives the two missing facts onto ``completed/`` vBRIEFs:
16
+
17
+ * ``plan.metadata.completedAt`` -- the git landing time of the file (the most
18
+ recent commit that touched it), when not already present. Deterministic,
19
+ zero human input.
20
+ * ``plan.metadata.capacityBucket`` (+ ``plan.metadata.capacityBucketSource``)
21
+ -- inferred from the vBRIEF's origin-issue labels (its ``x-vbrief/github-issue``
22
+ reference) matched against the declared ``capacityAllocation.buckets[].match.labels``
23
+ predicates. A label match yields ``source="match"`` (high confidence); no
24
+ match (or no cached issue / no issue reference) falls to ``defaultBucket``
25
+ with ``source="default"`` and is surfaced in the low-confidence batch for
26
+ human review.
27
+
28
+ Guarantees:
29
+
30
+ * **Dry-run by default.** Writes only with ``--apply``.
31
+ * **Idempotent.** An explicit existing ``capacityBucket`` / ``completedAt`` is
32
+ preserved; a re-run is a no-op for already-stamped files.
33
+ * **Never mutates ``cost``** -- historical cost actuals are not backfillable
34
+ (no telemetry exists for past runs); ``cost`` accrues forward only.
35
+ * **Offline.** Reads cached issue labels from ``.deft-cache/github-issue/``;
36
+ no ``gh`` / network calls. Git is the only subprocess (landing time).
37
+
38
+ Exit codes (three-state, mirrors ``scripts/triage_reconcile.py``):
39
+
40
+ * ``0`` -- backfill completed (or was a no-op on a re-run / dry-run).
41
+ * ``1`` -- a runtime step failed (e.g. a write raised).
42
+ * ``2`` -- config error: ``--project-root`` missing, or
43
+ ``plan.policy.capacityAllocation`` is not configured (nothing to classify
44
+ against).
45
+
46
+ Refs: #1606 (this tool), #1419 (parent RFC -- Brownfield Backfill), #1511
47
+ (flip gates advisory -> enforce; backfill is its prerequisite).
48
+ """
49
+
50
+ from __future__ import annotations
51
+
52
+ import argparse
53
+ import json
54
+ import os
55
+ import sys
56
+ from dataclasses import dataclass, field
57
+ from datetime import UTC, datetime
58
+ from pathlib import Path
59
+ from typing import Any
60
+
61
+ # Make sibling ``scripts`` modules importable when invoked as
62
+ # ``python scripts/capacity_backfill.py`` from the project root.
63
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
64
+
65
+ from _safe_subprocess import run_text # noqa: E402
66
+ from _stdio_utf8 import reconfigure_stdio # noqa: E402
67
+ from policy import ( # noqa: E402
68
+ load_project_definition,
69
+ resolve_capacity_allocation,
70
+ )
71
+
72
+ reconfigure_stdio()
73
+
74
+ #: Lifecycle folder the backfill operates on (the backward / completed view).
75
+ COMPLETED_FOLDER: str = "completed"
76
+
77
+ #: Default location of the github-issue label cache (offline label source).
78
+ CACHE_RELPATH: tuple[str, ...] = (".deft-cache", "github-issue")
79
+
80
+ #: ``capacityBucketSource`` values this tool records.
81
+ SOURCE_MATCH: str = "match" # a bucket match.labels predicate matched
82
+ SOURCE_DEFAULT: str = "default" # no match -> defaultBucket (low confidence)
83
+
84
+
85
+ # ---------------------------------------------------------------------------
86
+ # Data model
87
+ # ---------------------------------------------------------------------------
88
+
89
+
90
+ @dataclass(frozen=True)
91
+ class BucketMatcher:
92
+ """A bucket id paired with its ``match.labels.any-of`` label set."""
93
+
94
+ bucket_id: str
95
+ labels: frozenset[str]
96
+
97
+
98
+ @dataclass(frozen=True)
99
+ class BackfillItem:
100
+ """One completed vBRIEF's resolved backfill facts."""
101
+
102
+ rel_path: str
103
+ issue_number: int | None
104
+ bucket: str
105
+ source: str # SOURCE_MATCH | SOURCE_DEFAULT
106
+ set_bucket: bool # capacityBucket was absent and will be / was stamped
107
+ set_completed_at: bool # completedAt was absent and will be / was stamped
108
+
109
+
110
+ @dataclass
111
+ class BackfillResult:
112
+ """Aggregate result returned by :func:`backfill`."""
113
+
114
+ project_root: Path
115
+ dry_run: bool
116
+ scanned: int = 0
117
+ stamped_bucket: int = 0
118
+ stamped_completed_at: int = 0
119
+ already_classified: int = 0
120
+ matched: int = 0
121
+ defaulted: int = 0
122
+ fetched: int = 0
123
+ skipped_out_of_window: int = 0
124
+ skipped_unreadable: int = 0
125
+ window_only: bool = False
126
+ window_days: int = 0
127
+ items: list[BackfillItem] = field(default_factory=list)
128
+ low_confidence: list[BackfillItem] = field(default_factory=list)
129
+ error: str | None = None
130
+ exit_code: int = 0
131
+
132
+ def summary(self) -> str:
133
+ """Render the human-readable recap the operator sees."""
134
+ verb = "would stamp" if self.dry_run else "stamped"
135
+ mark = "✓" if self.exit_code == 0 else "✗"
136
+ lines = ["", "Capacity backfill recap:"]
137
+ lines.append(
138
+ f" {mark} scanned {self.scanned} completed vBRIEF(s); "
139
+ f"{verb} capacityBucket on {self.stamped_bucket} "
140
+ f"(matched {self.matched}, defaulted {self.defaulted}); "
141
+ f"{verb} completedAt on {self.stamped_completed_at}; "
142
+ f"{self.already_classified} already classified"
143
+ )
144
+ if self.fetched:
145
+ lines.append(
146
+ f" fetched labels for {self.fetched} uncached issue(s) via REST"
147
+ )
148
+ if self.window_only:
149
+ lines.append(
150
+ f" window-only: skipped {self.skipped_out_of_window} "
151
+ f"completion(s) outside the trailing {self.window_days}d window"
152
+ )
153
+ if self.skipped_unreadable:
154
+ lines.append(
155
+ f" skipped {self.skipped_unreadable} unreadable/malformed "
156
+ "completed vBRIEF file(s) (not counted in scanned)"
157
+ )
158
+ if self.error:
159
+ lines.append(f" error: {self.error}")
160
+ if self.low_confidence:
161
+ lines.append("")
162
+ lines.append(
163
+ f" Low-confidence batch ({len(self.low_confidence)}) -- "
164
+ "no label match, fell to defaultBucket; review + re-bucket as needed:"
165
+ )
166
+ for item in self.low_confidence:
167
+ issue = f"#{item.issue_number}" if item.issue_number else "(no issue ref)"
168
+ lines.append(f" {issue} -> {item.bucket} [{item.rel_path}]")
169
+ if self.dry_run and self.exit_code == 0:
170
+ lines.append("")
171
+ lines.append(" Dry-run -- re-run with --apply to write these changes.")
172
+ return "\n".join(lines)
173
+
174
+
175
+ # ---------------------------------------------------------------------------
176
+ # Bucket-matcher resolution (reads the RAW match.labels the policy resolver drops)
177
+ # ---------------------------------------------------------------------------
178
+
179
+
180
+ def load_bucket_matchers(project_root: Path) -> tuple[list[BucketMatcher], str]:
181
+ """Return ``(ordered matchers, default_bucket)`` from PROJECT-DEFINITION.
182
+
183
+ ``resolve_capacity_allocation`` intentionally exposes only ``id`` + ``target``
184
+ per bucket, so the raw ``match.labels.any-of`` predicate is read directly
185
+ here (mirrors ``_lifecycle_hygiene.resolve_epic_thresholds`` reading the raw
186
+ block for ``epicStrandedDays``). Matchers preserve declaration order so the
187
+ first bucket whose label set intersects wins.
188
+ """
189
+ data, _err = load_project_definition(project_root)
190
+ matchers: list[BucketMatcher] = []
191
+ if not isinstance(data, dict):
192
+ return matchers, ""
193
+ plan = data.get("plan")
194
+ policy = plan.get("policy") if isinstance(plan, dict) else None
195
+ cap = policy.get("capacityAllocation") if isinstance(policy, dict) else None
196
+ if not isinstance(cap, dict):
197
+ return matchers, ""
198
+ buckets = cap.get("buckets")
199
+ if isinstance(buckets, list):
200
+ for bucket in buckets:
201
+ if not isinstance(bucket, dict):
202
+ continue
203
+ bucket_id = bucket.get("id")
204
+ if not isinstance(bucket_id, str) or not bucket_id.strip():
205
+ continue
206
+ labels = _match_labels(bucket.get("match"))
207
+ matchers.append(
208
+ BucketMatcher(bucket_id=bucket_id.strip(), labels=frozenset(labels))
209
+ )
210
+ default_bucket = cap.get("defaultBucket")
211
+ return matchers, default_bucket if isinstance(default_bucket, str) else ""
212
+
213
+
214
+ def _match_labels(match: Any) -> set[str]:
215
+ """Extract the ``match.labels.any-of`` string set from a bucket block."""
216
+ if not isinstance(match, dict):
217
+ return set()
218
+ labels = match.get("labels")
219
+ if not isinstance(labels, dict):
220
+ return set()
221
+ any_of = labels.get("any-of")
222
+ if not isinstance(any_of, list):
223
+ return set()
224
+ return {x for x in any_of if isinstance(x, str) and x}
225
+
226
+
227
+ def classify_bucket(
228
+ issue_labels: set[str], matchers: list[BucketMatcher], default_bucket: str
229
+ ) -> tuple[str, str]:
230
+ """Return ``(bucket_id, source)`` for an issue's label set.
231
+
232
+ First matcher (declaration order) whose ``labels`` intersect *issue_labels*
233
+ wins with ``source="match"``. No intersection -> ``(default_bucket, "default")``.
234
+ """
235
+ for matcher in matchers:
236
+ if matcher.labels & issue_labels:
237
+ return matcher.bucket_id, SOURCE_MATCH
238
+ return default_bucket, SOURCE_DEFAULT
239
+
240
+
241
+ # ---------------------------------------------------------------------------
242
+ # vBRIEF + cache + git helpers
243
+ # ---------------------------------------------------------------------------
244
+
245
+
246
+ def _parse_iso(value: Any) -> datetime | None:
247
+ """Parse an ISO-8601 timestamp (``...Z`` or offset form) to aware UTC."""
248
+ if not isinstance(value, str) or not value.strip():
249
+ return None
250
+ text = value.strip()
251
+ if text.endswith("Z"):
252
+ text = text[:-1] + "+00:00"
253
+ try:
254
+ parsed = datetime.fromisoformat(text)
255
+ except ValueError:
256
+ return None
257
+ if parsed.tzinfo is None:
258
+ parsed = parsed.replace(tzinfo=UTC)
259
+ return parsed.astimezone(UTC)
260
+
261
+
262
+ def _to_iso_z(dt: datetime) -> str:
263
+ """Render an aware datetime as the canonical ``...Z`` form used on disk."""
264
+ return dt.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
265
+
266
+
267
+ def extract_issue_ref(plan: dict[str, Any]) -> tuple[str | None, int | None]:
268
+ """Pull ``(repo, issue_number)`` from a vBRIEF plan's ``x-vbrief/github-issue`` ref."""
269
+ refs = plan.get("references")
270
+ if not isinstance(refs, list):
271
+ return None, None
272
+ for ref in refs:
273
+ if not isinstance(ref, dict) or ref.get("type") != "x-vbrief/github-issue":
274
+ continue
275
+ uri = ref.get("uri")
276
+ if not isinstance(uri, str):
277
+ continue
278
+ cleaned = uri.strip().rstrip("/")
279
+ parts = [p for p in cleaned.split("://", 1)[-1].split("/") if p]
280
+ if len(parts) >= 4 and parts[-2] == "issues" and parts[-1].isdigit():
281
+ return f"{parts[-4]}/{parts[-3]}", int(parts[-1])
282
+ return None, None
283
+
284
+
285
+ def cached_issue_labels(
286
+ project_root: Path, repo: str, issue_number: int, *, cache_dir: Path | None = None
287
+ ) -> set[str] | None:
288
+ """Return the cached label set for ``repo#issue_number`` (offline), or None.
289
+
290
+ None means the issue is not in the cache (a label match cannot be attempted);
291
+ an empty set means the issue is cached but carries no labels.
292
+ """
293
+ base = cache_dir or project_root.joinpath(*CACHE_RELPATH)
294
+ raw_path = base / repo / str(issue_number) / "raw.json"
295
+ if not raw_path.is_file():
296
+ return None
297
+ try:
298
+ data = json.loads(raw_path.read_text(encoding="utf-8"))
299
+ except (OSError, ValueError):
300
+ return None
301
+ labels = data.get("labels") if isinstance(data, dict) else None
302
+ if not isinstance(labels, list):
303
+ return set()
304
+ out: set[str] = set()
305
+ for label in labels:
306
+ if isinstance(label, str) and label:
307
+ out.add(label)
308
+ elif isinstance(label, dict):
309
+ name = label.get("name")
310
+ if isinstance(name, str) and name:
311
+ out.add(name)
312
+ return out
313
+
314
+
315
+ def git_landing_time(repo_rel_path: str, project_root: Path) -> str | None:
316
+ """Return the most recent commit timestamp touching *repo_rel_path*, as ``...Z``.
317
+
318
+ *repo_rel_path* MUST be relative to the git repository root (e.g.
319
+ ``vbrief/completed/<name>``), not the lifecycle folder. Uses
320
+ ``git log -1 --format=%cI -- <path>`` (committer date, ISO-8601 strict) as a
321
+ deterministic proxy for when the vBRIEF landed in ``completed/``. Returns None
322
+ when git is unavailable or the file is untracked.
323
+ """
324
+ try:
325
+ result = run_text(
326
+ ["git", "log", "-1", "--format=%cI", "--", repo_rel_path],
327
+ cwd=str(project_root),
328
+ )
329
+ except (OSError, ValueError):
330
+ return None
331
+ if result.returncode != 0:
332
+ return None
333
+ parsed = _parse_iso(result.stdout.strip())
334
+ return _to_iso_z(parsed) if parsed is not None else None
335
+
336
+
337
+ def fetch_issue_labels(repo: str, issue_number: int) -> set[str] | None:
338
+ """Fetch an issue's label set via the REST shim (closed-issue-safe), or None.
339
+
340
+ Routes through ``scripts/gh_rest.rest_issue_view`` (REST, never GraphQL --
341
+ respects the #954 bucket-hygiene rule and the #1145 scm-boundary). Imported
342
+ lazily so the offline default path has no ``gh`` dependency and the unit
343
+ tests need no network. Any failure (no gh, network error, malformed
344
+ response) returns None so the caller falls back to the default bucket.
345
+ """
346
+ try:
347
+ from gh_rest import rest_issue_view # noqa: PLC0415 -- lazy, opt-in only
348
+
349
+ issue = rest_issue_view(repo, issue_number)
350
+ except Exception: # noqa: BLE001 -- any fetch failure degrades to default
351
+ return None
352
+ labels = issue.get("labels") if isinstance(issue, dict) else None
353
+ if not isinstance(labels, list):
354
+ return set()
355
+ out: set[str] = set()
356
+ for label in labels:
357
+ if isinstance(label, str) and label:
358
+ out.add(label)
359
+ elif isinstance(label, dict):
360
+ name = label.get("name")
361
+ if isinstance(name, str) and name:
362
+ out.add(name)
363
+ return out
364
+
365
+
366
+ # ---------------------------------------------------------------------------
367
+ # Core backfill logic
368
+ # ---------------------------------------------------------------------------
369
+
370
+
371
+ def backfill(
372
+ project_root: Path,
373
+ *,
374
+ cache_dir: Path | None = None,
375
+ dry_run: bool = True,
376
+ window_only: bool = False,
377
+ fetch: bool = False,
378
+ now: datetime | None = None,
379
+ ) -> BackfillResult:
380
+ """Backfill ``capacityBucket`` / ``completedAt`` on completed vBRIEFs.
381
+
382
+ Idempotent: explicit existing values are preserved. ``cost`` is never
383
+ touched. When *window_only* is set, completions whose effective
384
+ ``completedAt`` falls outside the trailing ``capacityAllocation.window``
385
+ are skipped (the activation-critical subset is exactly the in-window one).
386
+ When *fetch* is set, origin-issue labels missing from the local cache are
387
+ pulled via the REST shim (the one-time online opt-in for brownfield history
388
+ whose closed issues are not in the open-issue-scoped triage cache).
389
+ """
390
+ now_dt = now or datetime.now(UTC)
391
+ allocation = resolve_capacity_allocation(project_root)
392
+ result = BackfillResult(
393
+ project_root=project_root,
394
+ dry_run=dry_run,
395
+ window_only=window_only,
396
+ window_days=allocation.window_days,
397
+ )
398
+ if not allocation.configured:
399
+ result.error = (
400
+ "plan.policy.capacityAllocation is not configured -- configure "
401
+ "buckets before backfilling (see #1419 / task capacity:show)"
402
+ )
403
+ result.exit_code = 2
404
+ return result
405
+
406
+ matchers, default_bucket = load_bucket_matchers(project_root)
407
+ if not default_bucket:
408
+ # resolve_capacity_allocation validated the block, but a missing
409
+ # defaultBucket means unmatched work has nowhere to go -- fail loud.
410
+ result.error = (
411
+ "capacityAllocation.defaultBucket is required for backfill "
412
+ "(unmatched completions must have a fallback bucket)"
413
+ )
414
+ result.exit_code = 2
415
+ return result
416
+
417
+ completed_dir = project_root / "vbrief" / COMPLETED_FOLDER
418
+ if not completed_dir.is_dir():
419
+ return result
420
+
421
+ for path in sorted(completed_dir.glob("*.vbrief.json")):
422
+ try:
423
+ data = json.loads(path.read_text(encoding="utf-8"))
424
+ except (OSError, ValueError):
425
+ # Corrupted / non-UTF-8 / malformed-JSON files are skipped, but the
426
+ # skip is now counted so the summary's ``scanned`` figure is not
427
+ # silently lower than the actual file count (#1606 review).
428
+ result.skipped_unreadable += 1
429
+ continue
430
+ plan = data.get("plan") if isinstance(data, dict) else None
431
+ if not isinstance(plan, dict):
432
+ continue
433
+ result.scanned += 1
434
+ rel_path = f"{COMPLETED_FOLDER}/{path.name}"
435
+ git_rel_path = f"vbrief/{rel_path}"
436
+
437
+ metadata = plan.get("metadata")
438
+ if not isinstance(metadata, dict):
439
+ metadata = {}
440
+
441
+ existing_bucket = metadata.get("capacityBucket")
442
+ has_bucket = isinstance(existing_bucket, str) and bool(existing_bucket.strip())
443
+ existing_completed_at = metadata.get("completedAt")
444
+ has_completed_at = (
445
+ isinstance(existing_completed_at, str) and bool(existing_completed_at.strip())
446
+ )
447
+
448
+ # Resolve the effective completedAt (existing, else git landing time)
449
+ # so the window filter and the stamp share one value.
450
+ effective_completed_at = existing_completed_at if has_completed_at else None
451
+ git_completed_at: str | None = None
452
+ if not has_completed_at:
453
+ git_completed_at = git_landing_time(git_rel_path, project_root)
454
+ effective_completed_at = git_completed_at
455
+
456
+ if window_only and not _in_window(effective_completed_at, allocation.window_days, now_dt):
457
+ result.skipped_out_of_window += 1
458
+ continue
459
+
460
+ repo, issue_number = extract_issue_ref(plan)
461
+ if has_bucket:
462
+ result.already_classified += 1
463
+ bucket = existing_bucket.strip()
464
+ source = "preserved"
465
+ else:
466
+ labels: set[str] | None = None
467
+ if repo and issue_number is not None:
468
+ labels = cached_issue_labels(
469
+ project_root, repo, issue_number, cache_dir=cache_dir
470
+ )
471
+ if labels is None and fetch:
472
+ labels = fetch_issue_labels(repo, issue_number)
473
+ if labels is not None:
474
+ result.fetched += 1
475
+ bucket, source = classify_bucket(labels or set(), matchers, default_bucket)
476
+
477
+ set_bucket = not has_bucket
478
+ set_completed_at = not has_completed_at and git_completed_at is not None
479
+
480
+ item = BackfillItem(
481
+ rel_path=rel_path,
482
+ issue_number=issue_number,
483
+ bucket=bucket,
484
+ source=source,
485
+ set_bucket=set_bucket,
486
+ set_completed_at=set_completed_at,
487
+ )
488
+ result.items.append(item)
489
+
490
+ # Write FIRST (apply mode), then tally -- so an OSError mid-run leaves
491
+ # the summary counting only what actually reached disk, not the failed
492
+ # item (#1606 review). Dry-run performs no write and falls straight to
493
+ # the tally so it reports what it WOULD stamp.
494
+ if not dry_run and (set_bucket or set_completed_at):
495
+ try:
496
+ _write_metadata(
497
+ path,
498
+ data,
499
+ plan,
500
+ metadata,
501
+ bucket=bucket if set_bucket else None,
502
+ source=source if set_bucket else None,
503
+ completed_at=git_completed_at if set_completed_at else None,
504
+ )
505
+ except OSError as exc:
506
+ result.error = f"{type(exc).__name__}: {exc} ({rel_path})"
507
+ result.exit_code = 1
508
+ return result
509
+
510
+ if set_bucket:
511
+ result.stamped_bucket += 1
512
+ if source == SOURCE_MATCH:
513
+ result.matched += 1
514
+ else:
515
+ result.defaulted += 1
516
+ result.low_confidence.append(item)
517
+ if set_completed_at:
518
+ result.stamped_completed_at += 1
519
+
520
+ return result
521
+
522
+
523
+ def _in_window(completed_at: str | None, window_days: int, now: datetime) -> bool:
524
+ """True when *completed_at* parses and falls within ``[0, window_days]`` of now."""
525
+ parsed = _parse_iso(completed_at)
526
+ if parsed is None:
527
+ return False
528
+ age_days = (now - parsed).total_seconds() / 86400.0
529
+ return 0 <= age_days <= window_days
530
+
531
+
532
+ def _write_metadata(
533
+ path: Path,
534
+ data: dict[str, Any],
535
+ plan: dict[str, Any],
536
+ metadata: dict[str, Any],
537
+ *,
538
+ bucket: str | None,
539
+ source: str | None,
540
+ completed_at: str | None,
541
+ ) -> None:
542
+ """Stamp the resolved fields onto *plan.metadata* and write the file.
543
+
544
+ ``cost`` is never read or written here. Mirrors the JSON write style of
545
+ ``scripts/scope_lifecycle.py`` (2-space indent, ensure_ascii=False, trailing
546
+ newline) so the diff stays minimal and encoding-clean.
547
+ """
548
+ if not isinstance(plan.get("metadata"), dict):
549
+ plan["metadata"] = metadata
550
+ if completed_at is not None:
551
+ metadata["completedAt"] = completed_at
552
+ if bucket is not None:
553
+ metadata["capacityBucket"] = bucket
554
+ if source is not None:
555
+ metadata["capacityBucketSource"] = source
556
+ path.write_text(
557
+ json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8"
558
+ )
559
+
560
+
561
+ # ---------------------------------------------------------------------------
562
+ # CLI
563
+ # ---------------------------------------------------------------------------
564
+
565
+
566
+ def _emit_json(result: BackfillResult) -> str:
567
+ payload = {
568
+ "project_root": str(result.project_root),
569
+ "dry_run": result.dry_run,
570
+ "scanned": result.scanned,
571
+ "stamped_bucket": result.stamped_bucket,
572
+ "stamped_completed_at": result.stamped_completed_at,
573
+ "already_classified": result.already_classified,
574
+ "matched": result.matched,
575
+ "defaulted": result.defaulted,
576
+ "fetched": result.fetched,
577
+ "skipped_out_of_window": result.skipped_out_of_window,
578
+ "skipped_unreadable": result.skipped_unreadable,
579
+ "window_only": result.window_only,
580
+ "window_days": result.window_days,
581
+ "exit_code": result.exit_code,
582
+ "error": result.error,
583
+ "low_confidence": [
584
+ {"issue_number": it.issue_number, "bucket": it.bucket, "rel_path": it.rel_path}
585
+ for it in result.low_confidence
586
+ ],
587
+ }
588
+ return json.dumps(payload, sort_keys=True)
589
+
590
+
591
+ def _build_parser() -> argparse.ArgumentParser:
592
+ parser = argparse.ArgumentParser(
593
+ prog="capacity_backfill.py",
594
+ description=(
595
+ "One-time capacity-bucket classifier for completed vBRIEFs (#1606). "
596
+ "Stamps plan.metadata.capacityBucket (inferred from origin-issue "
597
+ "labels via the configured bucket match rules) and completedAt "
598
+ "(git landing time) onto completed/ vBRIEFs that lack them. "
599
+ "Dry-run by default; idempotent; never touches cost."
600
+ ),
601
+ )
602
+ parser.add_argument(
603
+ "--project-root",
604
+ default=os.environ.get("DEFT_PROJECT_ROOT", "."),
605
+ help="Path to the project root (default: $DEFT_PROJECT_ROOT or cwd).",
606
+ )
607
+ parser.add_argument(
608
+ "--apply",
609
+ action="store_true",
610
+ help="Write the changes. Without this flag the tool is a dry-run.",
611
+ )
612
+ parser.add_argument(
613
+ "--window-only",
614
+ action="store_true",
615
+ dest="window_only",
616
+ help=(
617
+ "Only backfill completions whose completedAt falls within the "
618
+ "trailing capacityAllocation.window -- the activation-critical "
619
+ "subset capacity:show actually counts."
620
+ ),
621
+ )
622
+ parser.add_argument(
623
+ "--fetch",
624
+ action="store_true",
625
+ help=(
626
+ "Pull origin-issue labels via the REST shim for issues missing from "
627
+ "the local cache (a one-time online opt-in for brownfield history; "
628
+ "closed issues are not in the open-issue-scoped triage cache). "
629
+ "Without this flag the tool is fully offline."
630
+ ),
631
+ )
632
+ parser.add_argument(
633
+ "--cache-dir",
634
+ default=None,
635
+ help=(
636
+ "Override the github-issue label cache directory "
637
+ "(default: <project-root>/.deft-cache/github-issue)."
638
+ ),
639
+ )
640
+ parser.add_argument(
641
+ "--json",
642
+ action="store_true",
643
+ dest="emit_json",
644
+ help="Emit a structured JSON payload instead of the human recap.",
645
+ )
646
+ return parser
647
+
648
+
649
+ def main(argv: list[str] | None = None) -> int:
650
+ parser = _build_parser()
651
+ args = parser.parse_args(argv)
652
+
653
+ project_root = Path(args.project_root).resolve()
654
+ if not project_root.exists() or not project_root.is_dir():
655
+ print(
656
+ f"❌ capacity:backfill: --project-root {project_root} does not exist "
657
+ "or is not a directory.",
658
+ file=sys.stderr,
659
+ )
660
+ return 2
661
+
662
+ cache_dir = Path(args.cache_dir).resolve() if args.cache_dir else None
663
+ result = backfill(
664
+ project_root,
665
+ cache_dir=cache_dir,
666
+ dry_run=not args.apply,
667
+ window_only=args.window_only,
668
+ fetch=args.fetch,
669
+ )
670
+
671
+ if args.emit_json:
672
+ print(_emit_json(result))
673
+ else:
674
+ print(result.summary(), file=sys.stderr if result.exit_code else sys.stdout)
675
+
676
+ return result.exit_code
677
+
678
+
679
+ if __name__ == "__main__":
680
+ raise SystemExit(main())