@deftai/directive-content 0.59.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.githooks/pre-push +10 -9
  2. package/Taskfile.yml +48 -58
  3. package/UPGRADING.md +1 -1
  4. package/docs/assets/directive-lifecycle-diagram.png +0 -0
  5. package/docs/directive-lifecycle.md +73 -0
  6. package/docs/getting-started.md +5 -1
  7. package/package.json +3 -3
  8. package/packs/skills/skills-pack-0.1.json +22 -22
  9. package/scm/github.md +20 -2
  10. package/tasks/change.yml +16 -31
  11. package/tasks/ci.yml +8 -0
  12. package/tasks/commit.yml +12 -19
  13. package/tasks/core.yml +10 -0
  14. package/tasks/engine.yml +42 -0
  15. package/tasks/framework.yml +3 -0
  16. package/tasks/install.yml +20 -19
  17. package/tasks/migrate.yml +26 -15
  18. package/tasks/project.yml +16 -0
  19. package/tasks/toolchain.yml +15 -5
  20. package/tasks/vbrief.yml +4 -3
  21. package/tasks/verify.yml +12 -14
  22. package/scripts/_agents_md.py +0 -494
  23. package/scripts/_cache_fetch.py +0 -635
  24. package/scripts/_cache_quota.py +0 -529
  25. package/scripts/_cache_refresh.py +0 -163
  26. package/scripts/_cache_validate.py +0 -209
  27. package/scripts/_content_root.py +0 -42
  28. package/scripts/_doctor_state.py +0 -277
  29. package/scripts/_event_detect.py +0 -305
  30. package/scripts/_events.py +0 -514
  31. package/scripts/_lifecycle_hygiene.py +0 -568
  32. package/scripts/_pathspec.py +0 -91
  33. package/scripts/_policy_show_cli.py +0 -266
  34. package/scripts/_precutover.py +0 -92
  35. package/scripts/_project_context.py +0 -224
  36. package/scripts/_project_definition_io.py +0 -164
  37. package/scripts/_relocate_snapshot.py +0 -209
  38. package/scripts/_relocate_states.py +0 -343
  39. package/scripts/_resolve_preflight_path.py +0 -152
  40. package/scripts/_safe_subprocess.py +0 -167
  41. package/scripts/_session_start_hook.py +0 -205
  42. package/scripts/_sor_gate_diff.py +0 -365
  43. package/scripts/_stdio_utf8.py +0 -59
  44. package/scripts/_triage_bootstrap_gitignore.py +0 -904
  45. package/scripts/_triage_classify_cli.py +0 -122
  46. package/scripts/_triage_queue_cli.py +0 -625
  47. package/scripts/_triage_scope_cli.py +0 -343
  48. package/scripts/_triage_scope_drift_cli.py +0 -121
  49. package/scripts/_triage_scope_ignores.py +0 -286
  50. package/scripts/_triage_scope_milestone.py +0 -432
  51. package/scripts/_triage_scope_mutations.py +0 -337
  52. package/scripts/_triage_scope_renderers.py +0 -207
  53. package/scripts/_triage_smoketest_stages.py +0 -674
  54. package/scripts/_triage_subscribe_cli.py +0 -140
  55. package/scripts/_triage_welcome_cli.py +0 -421
  56. package/scripts/_vbrief_build.py +0 -239
  57. package/scripts/_vbrief_fidelity.py +0 -479
  58. package/scripts/_vbrief_legacy.py +0 -589
  59. package/scripts/_vbrief_reconciliation.py +0 -883
  60. package/scripts/_vbrief_routing.py +0 -277
  61. package/scripts/_vbrief_safety.py +0 -778
  62. package/scripts/_vbrief_sources.py +0 -312
  63. package/scripts/_vbrief_speckit.py +0 -262
  64. package/scripts/_vbrief_story_quality.py +0 -353
  65. package/scripts/_vbrief_validation.py +0 -299
  66. package/scripts/build_dist.py +0 -412
  67. package/scripts/cache.py +0 -1078
  68. package/scripts/cache_scanner.py +0 -745
  69. package/scripts/candidates_log.py +0 -432
  70. package/scripts/capacity_backfill.py +0 -680
  71. package/scripts/capacity_show.py +0 -653
  72. package/scripts/ci_local.py +0 -689
  73. package/scripts/code_structure_validate.py +0 -765
  74. package/scripts/codebase_default_extractor.py +0 -495
  75. package/scripts/codebase_map.py +0 -304
  76. package/scripts/codebase_map_fresh.py +0 -104
  77. package/scripts/codebase_projection_registry.py +0 -94
  78. package/scripts/codebase_provider.py +0 -582
  79. package/scripts/doctor.py +0 -2552
  80. package/scripts/framework_commands.py +0 -505
  81. package/scripts/gh_rest.py +0 -882
  82. package/scripts/github_auth_modes.py +0 -437
  83. package/scripts/github_body.py +0 -292
  84. package/scripts/ip_risk.py +0 -531
  85. package/scripts/issue_emit.py +0 -670
  86. package/scripts/issue_ingest.py +0 -1064
  87. package/scripts/migrate_preflight.py +0 -418
  88. package/scripts/migrate_vbrief.py +0 -2677
  89. package/scripts/monitor_pr.py +0 -401
  90. package/scripts/pack_migrate_lessons.py +0 -336
  91. package/scripts/pack_migrate_patterns.py +0 -254
  92. package/scripts/pack_migrate_rules.py +0 -350
  93. package/scripts/pack_migrate_skills.py +0 -423
  94. package/scripts/pack_migrate_strategies.py +0 -311
  95. package/scripts/pack_migrate_swarm_spec.py +0 -250
  96. package/scripts/pack_render.py +0 -434
  97. package/scripts/packs_slice.py +0 -712
  98. package/scripts/platform_capabilities.py +0 -336
  99. package/scripts/policy.py +0 -2826
  100. package/scripts/policy_set.py +0 -324
  101. package/scripts/pr_check_closing_keywords.py +0 -524
  102. package/scripts/pr_check_protected_issues.py +0 -267
  103. package/scripts/pr_merge_readiness.py +0 -1004
  104. package/scripts/pr_wait_mergeable.py +0 -669
  105. package/scripts/prd_render.py +0 -159
  106. package/scripts/preflight_architecture_sor.py +0 -974
  107. package/scripts/preflight_branch.py +0 -289
  108. package/scripts/preflight_cache.py +0 -974
  109. package/scripts/preflight_gh.py +0 -721
  110. package/scripts/preflight_implementation.py +0 -272
  111. package/scripts/preflight_story_start.py +0 -838
  112. package/scripts/preflight_wip_cap.py +0 -149
  113. package/scripts/probe_session.py +0 -545
  114. package/scripts/project_render.py +0 -293
  115. package/scripts/quarantine_ext.py +0 -237
  116. package/scripts/reconcile_issues.py +0 -1442
  117. package/scripts/refresh-path.ps1 +0 -107
  118. package/scripts/release.py +0 -2030
  119. package/scripts/release_e2e.py +0 -1011
  120. package/scripts/release_publish.py +0 -486
  121. package/scripts/release_rollback.py +0 -980
  122. package/scripts/relocate.py +0 -1034
  123. package/scripts/resolve_changelog_unreleased.py +0 -667
  124. package/scripts/resolve_version.py +0 -490
  125. package/scripts/resume_conditions.py +0 -706
  126. package/scripts/ritual_sentinel.py +0 -609
  127. package/scripts/roadmap_render.py +0 -635
  128. package/scripts/rule_ownership_lint.py +0 -325
  129. package/scripts/scm.py +0 -591
  130. package/scripts/scope_audit_log.py +0 -387
  131. package/scripts/scope_decompose.py +0 -654
  132. package/scripts/scope_demote.py +0 -509
  133. package/scripts/scope_lifecycle.py +0 -1126
  134. package/scripts/scope_undo.py +0 -772
  135. package/scripts/session_start.py +0 -406
  136. package/scripts/setup_ghx.py +0 -339
  137. package/scripts/setup_windows.ps1 +0 -220
  138. package/scripts/slice_audit.py +0 -585
  139. package/scripts/slice_record.py +0 -530
  140. package/scripts/slice_record_existing.py +0 -692
  141. package/scripts/slug_normalize.py +0 -178
  142. package/scripts/spec_render.py +0 -477
  143. package/scripts/spec_validate.py +0 -238
  144. package/scripts/subagent_monitor.py +0 -658
  145. package/scripts/swarm_complete_cohort.py +0 -644
  146. package/scripts/swarm_launch.py +0 -1206
  147. package/scripts/swarm_readiness.py +0 -554
  148. package/scripts/swarm_verify_review_clean.py +0 -438
  149. package/scripts/swarm_worktrees.py +0 -497
  150. package/scripts/toolchain-check.py +0 -52
  151. package/scripts/triage_actions.py +0 -871
  152. package/scripts/triage_bootstrap.py +0 -1153
  153. package/scripts/triage_bulk.py +0 -630
  154. package/scripts/triage_classify.py +0 -932
  155. package/scripts/triage_help.py +0 -1685
  156. package/scripts/triage_queue.py +0 -1944
  157. package/scripts/triage_reconcile.py +0 -581
  158. package/scripts/triage_refresh.py +0 -643
  159. package/scripts/triage_scope.py +0 -999
  160. package/scripts/triage_scope_drift.py +0 -575
  161. package/scripts/triage_smoketest.py +0 -396
  162. package/scripts/triage_subscribe.py +0 -399
  163. package/scripts/triage_summary.py +0 -1011
  164. package/scripts/triage_welcome.py +0 -1178
  165. package/scripts/ts_check_lane.py +0 -86
  166. package/scripts/validate-links.py +0 -64
  167. package/scripts/validate_strategy_output.py +0 -212
  168. package/scripts/vbrief_activate.py +0 -228
  169. package/scripts/vbrief_migrate_conformance.py +0 -368
  170. package/scripts/vbrief_reconcile_graph.py +0 -306
  171. package/scripts/vbrief_reconcile_labels.py +0 -460
  172. package/scripts/vbrief_reconcile_umbrellas.py +0 -741
  173. package/scripts/vbrief_validate.py +0 -1144
  174. package/scripts/verify-stubs.py +0 -61
  175. package/scripts/verify_capacity.py +0 -160
  176. package/scripts/verify_encoding.py +0 -699
  177. package/scripts/verify_hooks_installed.py +0 -206
  178. package/scripts/verify_investigation.py +0 -360
  179. package/scripts/verify_judgment_gates.py +0 -827
  180. package/scripts/verify_no_task_runtime.py +0 -171
  181. package/scripts/verify_scm_boundary.py +0 -509
  182. package/scripts/verify_session_ritual.py +0 -389
  183. package/scripts/verify_tools.py +0 -426
  184. package/scripts/verify_vbrief_conformance.py +0 -478
@@ -1,635 +0,0 @@
1
- #!/usr/bin/env python3
2
- """_cache_fetch.py -- cache:fetch-all orchestrator (#883 Story 2 + #1239 REST migration).
3
-
4
- Drives the per-repo bootstrap mirror that writes one
5
- ``.deft-cache/github-issue/<owner>/<repo>/<N>/`` entry per upstream
6
- issue. Lives in a separate module from :mod:`cache` to keep the parent
7
- under the 1000-line MUST limit from ``coding/coding.md``.
8
-
9
- #1239 / Writer-side REST migration
10
- ----------------------------------
11
- Pre-#1239 the orchestrator drained the GraphQL bucket via ``task
12
- scm:issue:list`` + ``task scm:issue:view`` (one round trip per issue,
13
- ~1.27s/issue on the 2026-05-19 dogfood). The 396-issue cohort burned
14
- ~8.5 minutes and ~400 GraphQL points while the REST ``core`` bucket
15
- sat idle. This module now drives the enumeration through the paginated
16
- REST endpoint :func:`gh_rest.rest_issue_list_paginated` (a 396-issue
17
- cohort fans out to 4 round trips at ``per_page=100``) and consumes the
18
- full REST issue payload directly -- no per-issue follow-up fetch is
19
- needed because ``GET /repos/.../issues`` returns ``title`` / ``body`` /
20
- ``state`` / ``labels`` / ``updated_at`` inline.
21
-
22
- Cached payloads now carry the canonical lowercase ``"state": "open"``
23
- (REST shape) -- this is the writer-side fix that #1236's reader-side
24
- defensive lowercase compare also addresses for any pre-migration cache
25
- still on disk.
26
-
27
- Test seams
28
- ----------
29
- - :data:`_paginated_lister` -- callable matching ``rest_issue_list_paginated``.
30
- Tests rebind it to deterministic fakes via ``monkeypatch.setattr``.
31
- - :data:`_sleep` -- ``time.sleep``. Tests rebind for hermetic per-issue
32
- delay coverage.
33
- - :data:`_run_subprocess` -- legacy alias preserved for tests still
34
- pinning the GraphQL flow. New paths route through the REST seam.
35
- """
36
-
37
- from __future__ import annotations
38
-
39
- import json
40
- import re
41
- import subprocess
42
- import sys
43
- import time
44
- from collections.abc import Callable
45
- from dataclasses import dataclass, field
46
- from pathlib import Path
47
- from typing import Any
48
-
49
- # Make sibling ``scripts`` modules importable when this script is
50
- # executed via ``python scripts/_cache_fetch.py`` from a Taskfile
51
- # dispatch.
52
- sys.path.insert(0, str(Path(__file__).resolve().parent))
53
-
54
- from gh_rest import ( # noqa: E402 -- intentional sys.path tweak
55
- GhRestError,
56
- InvalidRepoError,
57
- rest_issue_list_paginated,
58
- rest_issue_view,
59
- )
60
-
61
- # ---------------------------------------------------------------------------
62
- # Test seams (module-level callables; monkeypatched by tests)
63
- # ---------------------------------------------------------------------------
64
-
65
- #: Paginated REST issue lister. Tests rebind to a deterministic fake via
66
- #: ``monkeypatch.setattr(_cache_fetch, "_paginated_lister", fake)``.
67
- _paginated_lister: Callable[..., list[dict[str, Any]]] = rest_issue_list_paginated
68
-
69
- #: Single-issue REST reader used by the #1476 state-refresh path to
70
- #: resolve the live state of a cached-open entry that vanished from the
71
- #: default open-only enumeration. Tests rebind to a deterministic fake
72
- #: via ``monkeypatch.setattr(_cache_fetch, "_single_issue_fetcher", fake)``.
73
- _single_issue_fetcher: Callable[[str, int], dict[str, Any]] = rest_issue_view
74
-
75
- #: Sleep callable; tests rebind to a no-op so the per-issue delay loop
76
- #: doesn't burn wall-clock.
77
- _sleep: Callable[[float], None] = time.sleep
78
-
79
- #: Progress writer; tests rebind to capture lines without stderr I/O.
80
- _progress_writer: Callable[[str], None] = sys.stderr.write
81
-
82
- #: Progress flusher; tests may rebind alongside ``_progress_writer`` when the
83
- #: writer is not stderr-backed.
84
- _progress_flusher: Callable[[], None] = sys.stderr.flush
85
-
86
- #: Legacy subprocess seam preserved for back-compat with tests that
87
- #: pinned the pre-#1239 GraphQL flow. Unused on the REST path.
88
- _run_subprocess: Callable[..., Any] = subprocess.run
89
-
90
- #: Compiled rate-limit detector. Matches the canonical 429 surfaces
91
- #: emitted by gh / ghx in stderr; retained for the REST flow because
92
- #: the REST core bucket can also throttle (5,000/hr/user).
93
- _RATE_LIMIT_RE: re.Pattern[str] = re.compile(
94
- r"(?:HTTP\s*429|API rate limit exceeded|rate limit exceeded)", re.IGNORECASE
95
- )
96
- _RETRY_AFTER_RE: re.Pattern[str] = re.compile(r"Retry-After:\s*(\d+)", re.IGNORECASE)
97
-
98
- #: Fallback Retry-After interval when the 429 stderr text omits the
99
- #: header. 60s mirrors GitHub's documented per-token recovery cadence.
100
- DEFAULT_RETRY_AFTER_FALLBACK_S: int = 60
101
-
102
- #: Emit in-loop progress every N processed issues on large cohorts so
103
- #: ``task triage:bootstrap`` step 1 does not look hung (#1562).
104
- PROGRESS_EVERY_N: int = 50
105
-
106
-
107
- class CacheFetchError(RuntimeError):
108
- """Subprocess / parse failure during fetch-all orchestration."""
109
-
110
-
111
- # ---------------------------------------------------------------------------
112
- # Rate-limit detection (REST core bucket recovery)
113
- # ---------------------------------------------------------------------------
114
-
115
-
116
- def detect_rate_limit(stderr: str) -> tuple[bool, int]:
117
- """Detect a 429 / rate-limit response in subprocess stderr.
118
-
119
- Returns ``(is_rate_limited, retry_after_seconds)``. When the
120
- Retry-After header is absent, the fallback constant is returned.
121
- """
122
- if not stderr or not _RATE_LIMIT_RE.search(stderr):
123
- return False, DEFAULT_RETRY_AFTER_FALLBACK_S
124
- m = _RETRY_AFTER_RE.search(stderr)
125
- if m:
126
- try:
127
- return True, int(m.group(1))
128
- except ValueError:
129
- return True, DEFAULT_RETRY_AFTER_FALLBACK_S
130
- return True, DEFAULT_RETRY_AFTER_FALLBACK_S
131
-
132
-
133
- # ---------------------------------------------------------------------------
134
- # REST normalisation
135
- # ---------------------------------------------------------------------------
136
-
137
-
138
- def _normalise_rest_issue(raw: dict[str, Any]) -> dict[str, Any]:
139
- """Return a defensive copy of the REST issue payload with canonical fields.
140
-
141
- REST already emits the field shapes downstream consumers want
142
- (``state`` lowercase, ``updated_at`` snake_case, ``labels`` as list
143
- of objects). We only:
144
-
145
- * Ensure ``state`` is lowercase (defensive -- the REST API is
146
- lowercase by contract, but a future gh / ghx version that
147
- capitalised the value would otherwise re-introduce the #1236
148
- reader-side regression).
149
-
150
- The dict is shallow-copied so callers can mutate further without
151
- aliasing the underlying ``gh api`` response.
152
- """
153
- out = dict(raw)
154
- state = out.get("state")
155
- if isinstance(state, str):
156
- out["state"] = state.lower()
157
- return out
158
-
159
-
160
- # ---------------------------------------------------------------------------
161
- # Result aggregator
162
- # ---------------------------------------------------------------------------
163
-
164
-
165
- @dataclass
166
- class FetchAllReport:
167
- """Aggregate counts returned by :func:`run_fetch_all`.
168
-
169
- Counter terminology (#1247)
170
- ---------------------------
171
- Pre-#1247 the report exposed three counters named ``succeeded`` /
172
- ``failed`` / ``skipped``. Operators read the recap line
173
- ``cache:fetch-all ... succeeded=1 failed=0 skipped=396`` as "1 of
174
- 397 items processed, 396 dropped" and assumed something was wrong
175
- -- when in fact ``succeeded`` counted per-issue cache writes that
176
- actually landed on disk (a fresh fetch + put), ``skipped`` counted
177
- per-issue entries that were already-fresh in the cache (TTL window
178
- still valid, so no re-fetch was needed), and ``failed`` counted
179
- per-issue write errors. The terminology was at three different
180
- levels of abstraction.
181
-
182
- The canonical attribute names are now ``issues_written`` /
183
- ``already_fresh`` / ``issues_failed``. The legacy ``succeeded`` /
184
- ``failed`` / ``skipped`` attributes remain as backward-compatible
185
- aliases (read-write) so external callers and tests that still
186
- reference the old names keep working until they migrate.
187
-
188
- :meth:`to_json` emits the new keys as the primary surface and
189
- duplicates them under the legacy keys for one release. The
190
- :meth:`summary_line` renderer produces the unambiguous human-
191
- readable string the triage:bootstrap recap and ``task
192
- cache:fetch-all`` direct invocations consume.
193
- """
194
-
195
- #: Per-issue cache writes that landed (fresh fetch + put). Was named
196
- #: ``succeeded`` pre-#1247.
197
- issues_written: int = 0
198
- #: Per-issue cache writes that errored out. Was named ``failed``
199
- #: pre-#1247.
200
- issues_failed: int = 0
201
- #: Per-issue entries skipped because the on-disk cache was still
202
- #: within its TTL window (no re-fetch needed). Was named ``skipped``
203
- #: pre-#1247 -- the source of the misleading "why are 396 things
204
- #: skipped?" first-read.
205
- already_fresh: int = 0
206
- failures: list[dict[str, str]] = field(default_factory=list)
207
-
208
- # ----- Backward-compat property aliases (#1247) -----
209
- #
210
- # External callers (scripts/triage_bootstrap.py recap line,
211
- # tests/test_cache.py, tests/integration/test_cache_*.py) still
212
- # read ``report.succeeded`` / ``report.failed`` / ``report.skipped``.
213
- # The aliases below preserve that surface so the rename is non-
214
- # breaking; new code SHOULD use the canonical names above.
215
-
216
- @property
217
- def succeeded(self) -> int:
218
- """Legacy alias for :attr:`issues_written` (#1247)."""
219
- return self.issues_written
220
-
221
- @succeeded.setter
222
- def succeeded(self, value: int) -> None:
223
- self.issues_written = value
224
-
225
- @property
226
- def failed(self) -> int:
227
- """Legacy alias for :attr:`issues_failed` (#1247)."""
228
- return self.issues_failed
229
-
230
- @failed.setter
231
- def failed(self, value: int) -> None:
232
- self.issues_failed = value
233
-
234
- @property
235
- def skipped(self) -> int:
236
- """Legacy alias for :attr:`already_fresh` (#1247)."""
237
- return self.already_fresh
238
-
239
- @skipped.setter
240
- def skipped(self, value: int) -> None:
241
- self.already_fresh = value
242
-
243
- def to_json(self) -> str:
244
- """Serialise the report.
245
-
246
- v1 emits both the canonical (#1247) and legacy keys so existing
247
- consumers (``tests/test_cache.py::test_partial_failure_exit_shape``
248
- asserts ``payload["succeeded"]`` / ``payload["failed"]``) keep
249
- passing while the framework completes the rename rollout. The
250
- legacy duplicates are removed in a future release once the rest
251
- of the consumer tree has migrated.
252
- """
253
- return json.dumps(
254
- {
255
- # Canonical (#1247) -- the unambiguous noun-level surface.
256
- "issues_written": self.issues_written,
257
- "already_fresh": self.already_fresh,
258
- "issues_failed": self.issues_failed,
259
- # Legacy aliases preserved one release for back-compat.
260
- "succeeded": self.issues_written,
261
- "failed": self.issues_failed,
262
- "skipped": self.already_fresh,
263
- "failures": self.failures,
264
- },
265
- ensure_ascii=False,
266
- sort_keys=True,
267
- )
268
-
269
- def summary_line(self, *, source: str, repo: str) -> str:
270
- """Render the unambiguous human-readable recap line (#1247).
271
-
272
- Replaces the misleading ``succeeded=1 failed=0 skipped=396``
273
- formatting with explicit per-issue counter names so an operator
274
- reading the first signal of a bootstrap run does not have to
275
- ask "why are 396 things skipped?". The naming follows the GH
276
- issue body's 'Expected' suggestion:
277
-
278
- cache:fetch-all source=github-issue repo=owner/name
279
- issues_written=1 already_fresh=396 issues_failed=0
280
-
281
- Operators / orchestrators / recap formatters that need a
282
- single-line, machine-greppable status string SHOULD prefer this
283
- method over hand-formatting against the individual attributes.
284
- """
285
- return (
286
- f"cache:fetch-all source={source} repo={repo} "
287
- f"issues_written={self.issues_written} "
288
- f"already_fresh={self.already_fresh} "
289
- f"issues_failed={self.issues_failed}"
290
- )
291
-
292
-
293
- # ---------------------------------------------------------------------------
294
- # Orchestrator
295
- # ---------------------------------------------------------------------------
296
-
297
-
298
- def run_fetch_all(
299
- *,
300
- repo: str,
301
- is_fresh: Callable[[Path], bool],
302
- entry_dir_for: Callable[[str], Path],
303
- do_put: Callable[[str, dict[str, Any]], None],
304
- batch_size: int,
305
- delay_ms: int,
306
- state: str,
307
- limit: int,
308
- labels: tuple[str, ...] = (),
309
- author: str | None = None,
310
- ) -> FetchAllReport:
311
- """Drive the cache:fetch-all loop via paginated REST.
312
-
313
- Args:
314
- repo: Validated ``owner/repo`` slug.
315
- is_fresh: Callable ``meta_path -> bool`` that returns True when
316
- the on-disk meta.json is fresh per its TTL. Caller-supplied
317
- so this module does not import the cache layer's validator
318
- directly.
319
- entry_dir_for: Callable ``key -> Path`` that maps a cache key to
320
- the entry directory path.
321
- do_put: Callable ``(key, raw) -> None`` that persists the issue
322
- via cache:put. Raises on failure.
323
- batch_size: Per-issue checkpoint cadence for the inter-issue
324
- delay. Validated > 0 by the caller. Pre-#1239 this also
325
- controlled the GraphQL fan-out; on the REST path the
326
- enumeration cost is amortised across pages so the parameter
327
- only paces the local cache:put loop.
328
- delay_ms: Per-issue inter-call delay (ms). Validated >= 0 by the
329
- caller.
330
- state: Forwarded to ``rest_issue_list_paginated --state``
331
- (``open``/``closed``/``all``).
332
- limit: Forwarded to ``rest_issue_list_paginated --limit``.
333
- labels: Optional label filter (#1033) forwarded to the REST
334
- enumeration so a bootstrap can scope ingestion to issues
335
- carrying the given label(s). Empty tuple (default) ingests
336
- the full backlog.
337
- author: Optional issue-creator login (#1055) forwarded to the
338
- REST enumeration's ``creator`` param. ``None`` (default)
339
- applies no author filter. Composes with ``labels`` via AND.
340
-
341
- Returns:
342
- :class:`FetchAllReport` with per-issue success / failure /
343
- skipped counts and a structured failures list.
344
-
345
- Raises:
346
- CacheFetchError: When the REST enumeration itself fails (the
347
- cohort cannot be listed). Per-issue ``cache:put`` failures
348
- are captured on the report, not raised.
349
- """
350
- issues = _list_issues_rest(
351
- repo, state=state, limit=limit, labels=labels, author=author
352
- )
353
- report = FetchAllReport()
354
- total = len(issues)
355
- if total >= PROGRESS_EVERY_N:
356
- _emit_fetch_progress(
357
- repo=repo,
358
- phase="enumerated",
359
- processed=0,
360
- total=total,
361
- report=report,
362
- )
363
-
364
- for i, issue in enumerate(issues):
365
- processed = i + 1
366
- raw = _normalise_rest_issue(issue)
367
- number = raw.get("number")
368
- if not isinstance(number, int) or number <= 0:
369
- report.issues_failed += 1
370
- report.failures.append(
371
- {"key": f"{repo}/?", "reason": f"invalid 'number' field: {number!r}"}
372
- )
373
- else:
374
- key = f"{repo}/{number}"
375
- edir = entry_dir_for(key)
376
- if is_fresh(edir / "meta.json"):
377
- report.already_fresh += 1
378
- else:
379
- try:
380
- do_put(key, raw)
381
- report.issues_written += 1
382
- except Exception as exc: # noqa: BLE001 -- caller's CacheError variants
383
- report.issues_failed += 1
384
- report.failures.append({"key": key, "reason": str(exc)})
385
-
386
- if total >= PROGRESS_EVERY_N and (
387
- processed % PROGRESS_EVERY_N == 0 or processed == total
388
- ):
389
- _emit_fetch_progress(
390
- repo=repo,
391
- phase="writing",
392
- processed=processed,
393
- total=total,
394
- report=report,
395
- )
396
-
397
- # Optional explicit pacing via ``--delay-ms``; production default
398
- # is 0 (#1562) so normal REST-batched bootstrap does not sleep
399
- # locally. Rate-limit recovery sleeps only on 429 retry paths.
400
- _maybe_sleep(delay_ms)
401
- if processed % batch_size == 0:
402
- _maybe_sleep(delay_ms)
403
-
404
- return report
405
-
406
-
407
- def _list_issues_rest(
408
- repo: str,
409
- *,
410
- state: str,
411
- limit: int,
412
- labels: tuple[str, ...] = (),
413
- author: str | None = None,
414
- ) -> list[dict[str, Any]]:
415
- """Wrap :func:`rest_issue_list_paginated` with retry on REST 429.
416
-
417
- REST's ``core`` bucket has a 5000/hr/user budget -- much larger than
418
- GraphQL's, but still throttleable on hot swarm sessions. On a 429
419
- we honour the gh-reported Retry-After (or the fallback constant)
420
- and try once more before surfacing the failure.
421
-
422
- ``labels`` (#1033) and ``author`` (#1055) are forwarded to the
423
- paginated lister so an operator can scope ingestion. They compose
424
- via AND server-side. They are only added to the lister call when a
425
- filter is actually set, so the no-filter call signature stays
426
- identical to the pre-#1033/#1055 behaviour (the #1476 refresh-closed
427
- reconciliation path reuses this helper with no filters).
428
- """
429
- filter_kwargs: dict[str, Any] = {}
430
- if labels:
431
- filter_kwargs["labels"] = labels
432
- if author is not None:
433
- filter_kwargs["author"] = author
434
- try:
435
- return _paginated_lister(repo, state=state, limit=limit, **filter_kwargs)
436
- except InvalidRepoError as exc:
437
- raise CacheFetchError(f"invalid --repo {repo!r} for REST list enumeration: {exc}") from exc
438
- except GhRestError as exc:
439
- is_429, retry_after = detect_rate_limit(str(exc) or exc.stderr or "")
440
- if not is_429:
441
- raise CacheFetchError(
442
- f"rest_issue_list_paginated failed for repo={repo}: {exc}"
443
- ) from exc
444
- sys.stderr.write(
445
- f"cache:fetch-all rate-limited on enumeration ({repo}); sleeping "
446
- f"{retry_after}s before retry\n"
447
- )
448
- _sleep(retry_after)
449
- try:
450
- return _paginated_lister(repo, state=state, limit=limit, **filter_kwargs)
451
- except GhRestError as exc2:
452
- raise CacheFetchError(
453
- f"rest_issue_list_paginated failed twice for repo={repo}: {exc2}"
454
- ) from exc2
455
-
456
-
457
- def _maybe_sleep(delay_ms: int) -> None:
458
- if delay_ms > 0:
459
- _sleep(delay_ms / 1000.0)
460
-
461
-
462
- def _emit_fetch_progress(
463
- *,
464
- repo: str,
465
- phase: str,
466
- processed: int,
467
- total: int,
468
- report: FetchAllReport,
469
- ) -> None:
470
- """Write a single stderr progress line for long cache:fetch-all runs (#1562)."""
471
- if phase == "enumerated":
472
- line = (
473
- f"cache:fetch-all progress repo={repo} "
474
- f"enumerated={total} issues; writing cache entries...\n"
475
- )
476
- else:
477
- line = (
478
- f"cache:fetch-all progress repo={repo} "
479
- f"processed={processed}/{total} "
480
- f"issues_written={report.issues_written} "
481
- f"already_fresh={report.already_fresh} "
482
- f"issues_failed={report.issues_failed}\n"
483
- )
484
- try:
485
- _progress_writer(line)
486
- _progress_flusher()
487
- except (OSError, ValueError):
488
- # Progress emission is best-effort; cache writes must continue if the
489
- # operator's stderr/log sink is closed or unavailable.
490
- return
491
-
492
-
493
- # ---------------------------------------------------------------------------
494
- # State-refresh path (#1476) -- reconcile cached-open entries that closed
495
- # upstream against the default open-only enumeration.
496
- # ---------------------------------------------------------------------------
497
-
498
-
499
- @dataclass
500
- class StateRefreshReport:
501
- """Aggregate counts returned by :func:`run_state_refresh` (#1476).
502
-
503
- The default ``cache:fetch-all`` enumeration is ``state=open``; once an
504
- issue closes upstream it drops out of that enumeration and its cached
505
- ``raw.json`` is never rewritten -- so a closed issue keeps showing up
506
- as actionable ``triage:queue`` work for the full 7-day cache TTL
507
- (the #1322 shape). This report records the reconciliation that fixes
508
- that: each cached-open entry that is no longer in the open enumeration
509
- is revisited individually and rewritten to its live state.
510
- """
511
-
512
- #: Cached-open entries that were revisited because they were absent
513
- #: from the open enumeration (i.e. closed-upstream candidates).
514
- revisited: int = 0
515
- #: Revisited entries confirmed closed upstream and rewritten to
516
- #: ``state=closed`` on disk.
517
- closed_rewritten: int = 0
518
- #: Revisited entries that were still open upstream (a transient drop
519
- #: from the enumeration, e.g. pagination race) -- left untouched.
520
- still_open: int = 0
521
- #: Revisited entries whose single-issue fetch or rewrite errored.
522
- refresh_failed: int = 0
523
- failures: list[dict[str, str]] = field(default_factory=list)
524
-
525
- def to_json(self) -> str:
526
- return json.dumps(
527
- {
528
- "revisited": self.revisited,
529
- "closed_rewritten": self.closed_rewritten,
530
- "still_open": self.still_open,
531
- "refresh_failed": self.refresh_failed,
532
- "failures": self.failures,
533
- },
534
- ensure_ascii=False,
535
- sort_keys=True,
536
- )
537
-
538
- def summary_line(self, *, source: str, repo: str) -> str:
539
- return (
540
- f"cache:refresh-closed source={source} repo={repo} "
541
- f"revisited={self.revisited} "
542
- f"closed_rewritten={self.closed_rewritten} "
543
- f"still_open={self.still_open} "
544
- f"refresh_failed={self.refresh_failed}"
545
- )
546
-
547
-
548
- def list_open_issue_numbers(
549
- repo: str, *, state: str = "open", limit: int = 1000
550
- ) -> set[int]:
551
- """Return the set of issue numbers for ``repo`` from the REST enumeration.
552
-
553
- Wraps :func:`_list_issues_rest` (so it shares the 429-retry path and
554
- the ``_paginated_lister`` test seam) and projects the result down to
555
- the integer ``number`` field. Used by the #1476 state-refresh path in
556
- :mod:`cache` to learn which cached entries are still open upstream.
557
- """
558
- numbers: set[int] = set()
559
- for issue in _list_issues_rest(repo, state=state, limit=limit):
560
- number = issue.get("number") if isinstance(issue, dict) else None
561
- if isinstance(number, int) and number > 0:
562
- numbers.add(number)
563
- return numbers
564
-
565
-
566
- def run_state_refresh(
567
- *,
568
- repo: str,
569
- open_numbers: set[int],
570
- cached_open: list[tuple[int, dict[str, Any]]],
571
- do_put: Callable[[str, dict[str, Any]], None],
572
- fetch_single: Callable[[str, int], dict[str, Any]] | None = None,
573
- delay_ms: int = 0,
574
- ) -> StateRefreshReport:
575
- """Reconcile cached-open entries that dropped out of the open enumeration.
576
-
577
- Args:
578
- repo: Validated ``owner/repo`` slug.
579
- open_numbers: Issue numbers currently returned by the upstream
580
- open-only enumeration (e.g. from :func:`list_open_issue_numbers`).
581
- cached_open: ``(number, raw)`` pairs for on-disk cache entries
582
- whose ``raw.json`` currently says ``state=open``. Supplied by
583
- the caller (the :mod:`cache` layer owns the disk walk).
584
- do_put: Callable ``(key, raw) -> None`` that rewrites the cache
585
- entry. Bound to ``cache_put`` by the caller. Raises on failure.
586
- fetch_single: Callable ``(repo, n) -> dict`` returning the live
587
- single-issue REST payload. Defaults to the module seam
588
- :data:`_single_issue_fetcher`.
589
- delay_ms: Per-revisit inter-call delay (ms) so a large reconcile
590
- does not hammer the REST core bucket.
591
-
592
- Returns:
593
- :class:`StateRefreshReport` with revisit / rewrite / failure
594
- counts and a structured failures list.
595
-
596
- A cached-open entry whose number IS in ``open_numbers`` is still open
597
- upstream and skipped entirely (no fetch). Only the entries that
598
- vanished from the enumeration are revisited: their live state is
599
- fetched and, when ``closed``, the entry's ``raw.json`` is rewritten
600
- via ``do_put`` so the next ``triage:queue`` walk excludes it.
601
- """
602
- fetcher = fetch_single if fetch_single is not None else _single_issue_fetcher
603
- report = StateRefreshReport()
604
- for number, _raw in cached_open:
605
- if number in open_numbers:
606
- # Still open upstream -- nothing to reconcile.
607
- continue
608
- report.revisited += 1
609
- key = f"{repo}/{number}"
610
- try:
611
- live = fetcher(repo, number)
612
- except Exception as exc: # noqa: BLE001 -- any fetch failure is recorded
613
- report.refresh_failed += 1
614
- report.failures.append({"key": key, "reason": f"fetch failed: {exc}"})
615
- _maybe_sleep(delay_ms)
616
- continue
617
- live_state_raw = live.get("state") if isinstance(live, dict) else None
618
- live_state = (
619
- live_state_raw.lower() if isinstance(live_state_raw, str) else None
620
- )
621
- if live_state == "closed":
622
- try:
623
- do_put(key, _normalise_rest_issue(live))
624
- report.closed_rewritten += 1
625
- except Exception as exc: # noqa: BLE001 -- any rewrite failure recorded
626
- report.refresh_failed += 1
627
- report.failures.append(
628
- {"key": key, "reason": f"rewrite failed: {exc}"}
629
- )
630
- else:
631
- # Live state is open (or unparseable) -- leave the cache as-is
632
- # rather than risk dropping a genuinely-open issue.
633
- report.still_open += 1
634
- _maybe_sleep(delay_ms)
635
- return report