@deftai/directive-content 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/.githooks/pre-push +10 -9
  2. package/Taskfile.yml +57 -67
  3. package/UPGRADING.md +1 -1
  4. package/docs/assets/directive-lifecycle-diagram.png +0 -0
  5. package/docs/directive-lifecycle.md +73 -0
  6. package/docs/getting-started.md +5 -1
  7. package/package.json +3 -3
  8. package/packs/rules/rules-pack-0.1.json +3 -3
  9. package/packs/skills/skills-pack-0.1.json +22 -22
  10. package/scm/github.md +20 -2
  11. package/tasks/change.yml +16 -31
  12. package/tasks/ci.yml +8 -0
  13. package/tasks/commit.yml +12 -19
  14. package/tasks/core.yml +10 -0
  15. package/tasks/engine.yml +42 -0
  16. package/tasks/framework.yml +3 -0
  17. package/tasks/install.yml +20 -19
  18. package/tasks/migrate.yml +26 -15
  19. package/tasks/project.yml +16 -0
  20. package/tasks/relocate.yml +18 -48
  21. package/tasks/toolchain.yml +15 -5
  22. package/tasks/vbrief.yml +4 -3
  23. package/tasks/verify.yml +12 -14
  24. package/templates/agents-entry.md +1 -2
  25. package/scripts/_agents_md.py +0 -494
  26. package/scripts/_cache_fetch.py +0 -635
  27. package/scripts/_cache_quota.py +0 -529
  28. package/scripts/_cache_refresh.py +0 -163
  29. package/scripts/_cache_validate.py +0 -209
  30. package/scripts/_content_root.py +0 -42
  31. package/scripts/_doctor_state.py +0 -277
  32. package/scripts/_event_detect.py +0 -305
  33. package/scripts/_events.py +0 -514
  34. package/scripts/_lifecycle_hygiene.py +0 -568
  35. package/scripts/_pathspec.py +0 -91
  36. package/scripts/_policy_show_cli.py +0 -266
  37. package/scripts/_precutover.py +0 -92
  38. package/scripts/_project_context.py +0 -224
  39. package/scripts/_project_definition_io.py +0 -164
  40. package/scripts/_relocate_snapshot.py +0 -209
  41. package/scripts/_relocate_states.py +0 -343
  42. package/scripts/_resolve_preflight_path.py +0 -152
  43. package/scripts/_safe_subprocess.py +0 -167
  44. package/scripts/_session_start_hook.py +0 -205
  45. package/scripts/_sor_gate_diff.py +0 -365
  46. package/scripts/_stdio_utf8.py +0 -59
  47. package/scripts/_triage_bootstrap_gitignore.py +0 -904
  48. package/scripts/_triage_classify_cli.py +0 -122
  49. package/scripts/_triage_queue_cli.py +0 -625
  50. package/scripts/_triage_scope_cli.py +0 -343
  51. package/scripts/_triage_scope_drift_cli.py +0 -121
  52. package/scripts/_triage_scope_ignores.py +0 -286
  53. package/scripts/_triage_scope_milestone.py +0 -432
  54. package/scripts/_triage_scope_mutations.py +0 -337
  55. package/scripts/_triage_scope_renderers.py +0 -207
  56. package/scripts/_triage_smoketest_stages.py +0 -674
  57. package/scripts/_triage_subscribe_cli.py +0 -140
  58. package/scripts/_triage_welcome_cli.py +0 -421
  59. package/scripts/_vbrief_build.py +0 -239
  60. package/scripts/_vbrief_fidelity.py +0 -479
  61. package/scripts/_vbrief_legacy.py +0 -589
  62. package/scripts/_vbrief_reconciliation.py +0 -883
  63. package/scripts/_vbrief_routing.py +0 -277
  64. package/scripts/_vbrief_safety.py +0 -778
  65. package/scripts/_vbrief_sources.py +0 -312
  66. package/scripts/_vbrief_speckit.py +0 -262
  67. package/scripts/_vbrief_story_quality.py +0 -353
  68. package/scripts/_vbrief_validation.py +0 -299
  69. package/scripts/build_dist.py +0 -412
  70. package/scripts/cache.py +0 -1078
  71. package/scripts/cache_scanner.py +0 -745
  72. package/scripts/candidates_log.py +0 -432
  73. package/scripts/capacity_backfill.py +0 -680
  74. package/scripts/capacity_show.py +0 -653
  75. package/scripts/ci_local.py +0 -689
  76. package/scripts/code_structure_validate.py +0 -765
  77. package/scripts/codebase_default_extractor.py +0 -495
  78. package/scripts/codebase_map.py +0 -304
  79. package/scripts/codebase_map_fresh.py +0 -104
  80. package/scripts/codebase_projection_registry.py +0 -94
  81. package/scripts/codebase_provider.py +0 -582
  82. package/scripts/doctor.py +0 -2551
  83. package/scripts/framework_commands.py +0 -505
  84. package/scripts/gh_rest.py +0 -882
  85. package/scripts/github_auth_modes.py +0 -437
  86. package/scripts/github_body.py +0 -292
  87. package/scripts/ip_risk.py +0 -531
  88. package/scripts/issue_emit.py +0 -670
  89. package/scripts/issue_ingest.py +0 -1064
  90. package/scripts/migrate_preflight.py +0 -418
  91. package/scripts/migrate_vbrief.py +0 -2677
  92. package/scripts/monitor_pr.py +0 -401
  93. package/scripts/pack_migrate_lessons.py +0 -336
  94. package/scripts/pack_migrate_patterns.py +0 -254
  95. package/scripts/pack_migrate_rules.py +0 -350
  96. package/scripts/pack_migrate_skills.py +0 -423
  97. package/scripts/pack_migrate_strategies.py +0 -311
  98. package/scripts/pack_migrate_swarm_spec.py +0 -250
  99. package/scripts/pack_render.py +0 -434
  100. package/scripts/packs_slice.py +0 -712
  101. package/scripts/platform_capabilities.py +0 -336
  102. package/scripts/policy.py +0 -2826
  103. package/scripts/policy_set.py +0 -324
  104. package/scripts/pr_check_closing_keywords.py +0 -524
  105. package/scripts/pr_check_protected_issues.py +0 -267
  106. package/scripts/pr_merge_readiness.py +0 -1004
  107. package/scripts/pr_wait_mergeable.py +0 -669
  108. package/scripts/prd_render.py +0 -159
  109. package/scripts/preflight_architecture_sor.py +0 -974
  110. package/scripts/preflight_branch.py +0 -289
  111. package/scripts/preflight_cache.py +0 -974
  112. package/scripts/preflight_gh.py +0 -721
  113. package/scripts/preflight_implementation.py +0 -272
  114. package/scripts/preflight_story_start.py +0 -838
  115. package/scripts/preflight_wip_cap.py +0 -149
  116. package/scripts/probe_session.py +0 -545
  117. package/scripts/project_render.py +0 -293
  118. package/scripts/quarantine_ext.py +0 -237
  119. package/scripts/reconcile_issues.py +0 -1442
  120. package/scripts/refresh-path.ps1 +0 -107
  121. package/scripts/release.py +0 -2030
  122. package/scripts/release_e2e.py +0 -1011
  123. package/scripts/release_publish.py +0 -486
  124. package/scripts/release_rollback.py +0 -980
  125. package/scripts/relocate.py +0 -1034
  126. package/scripts/resolve_changelog_unreleased.py +0 -667
  127. package/scripts/resolve_version.py +0 -490
  128. package/scripts/resume_conditions.py +0 -706
  129. package/scripts/ritual_sentinel.py +0 -609
  130. package/scripts/roadmap_render.py +0 -635
  131. package/scripts/rule_ownership_lint.py +0 -325
  132. package/scripts/scm.py +0 -591
  133. package/scripts/scope_audit_log.py +0 -387
  134. package/scripts/scope_decompose.py +0 -654
  135. package/scripts/scope_demote.py +0 -509
  136. package/scripts/scope_lifecycle.py +0 -1126
  137. package/scripts/scope_undo.py +0 -772
  138. package/scripts/session_start.py +0 -406
  139. package/scripts/setup_ghx.py +0 -339
  140. package/scripts/setup_windows.ps1 +0 -220
  141. package/scripts/slice_audit.py +0 -585
  142. package/scripts/slice_record.py +0 -530
  143. package/scripts/slice_record_existing.py +0 -692
  144. package/scripts/slug_normalize.py +0 -178
  145. package/scripts/spec_render.py +0 -477
  146. package/scripts/spec_validate.py +0 -238
  147. package/scripts/subagent_monitor.py +0 -658
  148. package/scripts/swarm_complete_cohort.py +0 -644
  149. package/scripts/swarm_launch.py +0 -1206
  150. package/scripts/swarm_readiness.py +0 -554
  151. package/scripts/swarm_verify_review_clean.py +0 -438
  152. package/scripts/swarm_worktrees.py +0 -497
  153. package/scripts/toolchain-check.py +0 -52
  154. package/scripts/triage_actions.py +0 -871
  155. package/scripts/triage_bootstrap.py +0 -1153
  156. package/scripts/triage_bulk.py +0 -630
  157. package/scripts/triage_classify.py +0 -932
  158. package/scripts/triage_help.py +0 -1685
  159. package/scripts/triage_queue.py +0 -1944
  160. package/scripts/triage_reconcile.py +0 -581
  161. package/scripts/triage_refresh.py +0 -643
  162. package/scripts/triage_scope.py +0 -999
  163. package/scripts/triage_scope_drift.py +0 -575
  164. package/scripts/triage_smoketest.py +0 -396
  165. package/scripts/triage_subscribe.py +0 -399
  166. package/scripts/triage_summary.py +0 -1011
  167. package/scripts/triage_welcome.py +0 -1178
  168. package/scripts/ts_check_lane.py +0 -86
  169. package/scripts/validate-links.py +0 -64
  170. package/scripts/validate_strategy_output.py +0 -212
  171. package/scripts/vbrief_activate.py +0 -228
  172. package/scripts/vbrief_migrate_conformance.py +0 -368
  173. package/scripts/vbrief_reconcile_graph.py +0 -306
  174. package/scripts/vbrief_reconcile_labels.py +0 -460
  175. package/scripts/vbrief_reconcile_umbrellas.py +0 -741
  176. package/scripts/vbrief_validate.py +0 -1144
  177. package/scripts/verify-stubs.py +0 -61
  178. package/scripts/verify_capacity.py +0 -160
  179. package/scripts/verify_encoding.py +0 -699
  180. package/scripts/verify_hooks_installed.py +0 -206
  181. package/scripts/verify_investigation.py +0 -360
  182. package/scripts/verify_judgment_gates.py +0 -827
  183. package/scripts/verify_no_task_runtime.py +0 -171
  184. package/scripts/verify_scm_boundary.py +0 -509
  185. package/scripts/verify_session_ritual.py +0 -389
  186. package/scripts/verify_tools.py +0 -426
  187. package/scripts/verify_vbrief_conformance.py +0 -478
@@ -1,635 +0,0 @@
1
- #!/usr/bin/env python3
2
- """_cache_fetch.py -- cache:fetch-all orchestrator (#883 Story 2 + #1239 REST migration).
3
-
4
- Drives the per-repo bootstrap mirror that writes one
5
- ``.deft-cache/github-issue/<owner>/<repo>/<N>/`` entry per upstream
6
- issue. Lives in a separate module from :mod:`cache` to keep the parent
7
- under the 1000-line MUST limit from ``coding/coding.md``.
8
-
9
- #1239 / Writer-side REST migration
10
- ----------------------------------
11
- Pre-#1239 the orchestrator drained the GraphQL bucket via ``task
12
- scm:issue:list`` + ``task scm:issue:view`` (one round trip per issue,
13
- ~1.27s/issue on the 2026-05-19 dogfood). The 396-issue cohort burned
14
- ~8.5 minutes and ~400 GraphQL points while the REST ``core`` bucket
15
- sat idle. This module now drives the enumeration through the paginated
16
- REST endpoint :func:`gh_rest.rest_issue_list_paginated` (a 396-issue
17
- cohort fans out to 4 round trips at ``per_page=100``) and consumes the
18
- full REST issue payload directly -- no per-issue follow-up fetch is
19
- needed because ``GET /repos/.../issues`` returns ``title`` / ``body`` /
20
- ``state`` / ``labels`` / ``updated_at`` inline.
21
-
22
- Cached payloads now carry the canonical lowercase ``"state": "open"``
23
- (REST shape) -- this is the writer-side fix that #1236's reader-side
24
- defensive lowercase compare also addresses for any pre-migration cache
25
- still on disk.
26
-
27
- Test seams
28
- ----------
29
- - :data:`_paginated_lister` -- callable matching ``rest_issue_list_paginated``.
30
- Tests rebind it to deterministic fakes via ``monkeypatch.setattr``.
31
- - :data:`_sleep` -- ``time.sleep``. Tests rebind for hermetic per-issue
32
- delay coverage.
33
- - :data:`_run_subprocess` -- legacy alias preserved for tests still
34
- pinning the GraphQL flow. New paths route through the REST seam.
35
- """
36
-
37
- from __future__ import annotations
38
-
39
- import json
40
- import re
41
- import subprocess
42
- import sys
43
- import time
44
- from collections.abc import Callable
45
- from dataclasses import dataclass, field
46
- from pathlib import Path
47
- from typing import Any
48
-
49
- # Make sibling ``scripts`` modules importable when this script is
50
- # executed via ``python scripts/_cache_fetch.py`` from a Taskfile
51
- # dispatch.
52
- sys.path.insert(0, str(Path(__file__).resolve().parent))
53
-
54
- from gh_rest import ( # noqa: E402 -- intentional sys.path tweak
55
- GhRestError,
56
- InvalidRepoError,
57
- rest_issue_list_paginated,
58
- rest_issue_view,
59
- )
60
-
61
- # ---------------------------------------------------------------------------
62
- # Test seams (module-level callables; monkeypatched by tests)
63
- # ---------------------------------------------------------------------------
64
-
65
- #: Paginated REST issue lister. Tests rebind to a deterministic fake via
66
- #: ``monkeypatch.setattr(_cache_fetch, "_paginated_lister", fake)``.
67
- _paginated_lister: Callable[..., list[dict[str, Any]]] = rest_issue_list_paginated
68
-
69
- #: Single-issue REST reader used by the #1476 state-refresh path to
70
- #: resolve the live state of a cached-open entry that vanished from the
71
- #: default open-only enumeration. Tests rebind to a deterministic fake
72
- #: via ``monkeypatch.setattr(_cache_fetch, "_single_issue_fetcher", fake)``.
73
- _single_issue_fetcher: Callable[[str, int], dict[str, Any]] = rest_issue_view
74
-
75
- #: Sleep callable; tests rebind to a no-op so the per-issue delay loop
76
- #: doesn't burn wall-clock.
77
- _sleep: Callable[[float], None] = time.sleep
78
-
79
- #: Progress writer; tests rebind to capture lines without stderr I/O.
80
- _progress_writer: Callable[[str], None] = sys.stderr.write
81
-
82
- #: Progress flusher; tests may rebind alongside ``_progress_writer`` when the
83
- #: writer is not stderr-backed.
84
- _progress_flusher: Callable[[], None] = sys.stderr.flush
85
-
86
- #: Legacy subprocess seam preserved for back-compat with tests that
87
- #: pinned the pre-#1239 GraphQL flow. Unused on the REST path.
88
- _run_subprocess: Callable[..., Any] = subprocess.run
89
-
90
- #: Compiled rate-limit detector. Matches the canonical 429 surfaces
91
- #: emitted by gh / ghx in stderr; retained for the REST flow because
92
- #: the REST core bucket can also throttle (5,000/hr/user).
93
- _RATE_LIMIT_RE: re.Pattern[str] = re.compile(
94
- r"(?:HTTP\s*429|API rate limit exceeded|rate limit exceeded)", re.IGNORECASE
95
- )
96
- _RETRY_AFTER_RE: re.Pattern[str] = re.compile(r"Retry-After:\s*(\d+)", re.IGNORECASE)
97
-
98
- #: Fallback Retry-After interval when the 429 stderr text omits the
99
- #: header. 60s mirrors GitHub's documented per-token recovery cadence.
100
- DEFAULT_RETRY_AFTER_FALLBACK_S: int = 60
101
-
102
- #: Emit in-loop progress every N processed issues on large cohorts so
103
- #: ``task triage:bootstrap`` step 1 does not look hung (#1562).
104
- PROGRESS_EVERY_N: int = 50
105
-
106
-
107
- class CacheFetchError(RuntimeError):
108
- """Subprocess / parse failure during fetch-all orchestration."""
109
-
110
-
111
- # ---------------------------------------------------------------------------
112
- # Rate-limit detection (REST core bucket recovery)
113
- # ---------------------------------------------------------------------------
114
-
115
-
116
- def detect_rate_limit(stderr: str) -> tuple[bool, int]:
117
- """Detect a 429 / rate-limit response in subprocess stderr.
118
-
119
- Returns ``(is_rate_limited, retry_after_seconds)``. When the
120
- Retry-After header is absent, the fallback constant is returned.
121
- """
122
- if not stderr or not _RATE_LIMIT_RE.search(stderr):
123
- return False, DEFAULT_RETRY_AFTER_FALLBACK_S
124
- m = _RETRY_AFTER_RE.search(stderr)
125
- if m:
126
- try:
127
- return True, int(m.group(1))
128
- except ValueError:
129
- return True, DEFAULT_RETRY_AFTER_FALLBACK_S
130
- return True, DEFAULT_RETRY_AFTER_FALLBACK_S
131
-
132
-
133
- # ---------------------------------------------------------------------------
134
- # REST normalisation
135
- # ---------------------------------------------------------------------------
136
-
137
-
138
- def _normalise_rest_issue(raw: dict[str, Any]) -> dict[str, Any]:
139
- """Return a defensive copy of the REST issue payload with canonical fields.
140
-
141
- REST already emits the field shapes downstream consumers want
142
- (``state`` lowercase, ``updated_at`` snake_case, ``labels`` as list
143
- of objects). We only:
144
-
145
- * Ensure ``state`` is lowercase (defensive -- the REST API is
146
- lowercase by contract, but a future gh / ghx version that
147
- capitalised the value would otherwise re-introduce the #1236
148
- reader-side regression).
149
-
150
- The dict is shallow-copied so callers can mutate further without
151
- aliasing the underlying ``gh api`` response.
152
- """
153
- out = dict(raw)
154
- state = out.get("state")
155
- if isinstance(state, str):
156
- out["state"] = state.lower()
157
- return out
158
-
159
-
160
- # ---------------------------------------------------------------------------
161
- # Result aggregator
162
- # ---------------------------------------------------------------------------
163
-
164
-
165
- @dataclass
166
- class FetchAllReport:
167
- """Aggregate counts returned by :func:`run_fetch_all`.
168
-
169
- Counter terminology (#1247)
170
- ---------------------------
171
- Pre-#1247 the report exposed three counters named ``succeeded`` /
172
- ``failed`` / ``skipped``. Operators read the recap line
173
- ``cache:fetch-all ... succeeded=1 failed=0 skipped=396`` as "1 of
174
- 397 items processed, 396 dropped" and assumed something was wrong
175
- -- when in fact ``succeeded`` counted per-issue cache writes that
176
- actually landed on disk (a fresh fetch + put), ``skipped`` counted
177
- per-issue entries that were already-fresh in the cache (TTL window
178
- still valid, so no re-fetch was needed), and ``failed`` counted
179
- per-issue write errors. The terminology was at three different
180
- levels of abstraction.
181
-
182
- The canonical attribute names are now ``issues_written`` /
183
- ``already_fresh`` / ``issues_failed``. The legacy ``succeeded`` /
184
- ``failed`` / ``skipped`` attributes remain as backward-compatible
185
- aliases (read-write) so external callers and tests that still
186
- reference the old names keep working until they migrate.
187
-
188
- :meth:`to_json` emits the new keys as the primary surface and
189
- duplicates them under the legacy keys for one release. The
190
- :meth:`summary_line` renderer produces the unambiguous human-
191
- readable string the triage:bootstrap recap and ``task
192
- cache:fetch-all`` direct invocations consume.
193
- """
194
-
195
- #: Per-issue cache writes that landed (fresh fetch + put). Was named
196
- #: ``succeeded`` pre-#1247.
197
- issues_written: int = 0
198
- #: Per-issue cache writes that errored out. Was named ``failed``
199
- #: pre-#1247.
200
- issues_failed: int = 0
201
- #: Per-issue entries skipped because the on-disk cache was still
202
- #: within its TTL window (no re-fetch needed). Was named ``skipped``
203
- #: pre-#1247 -- the source of the misleading "why are 396 things
204
- #: skipped?" first-read.
205
- already_fresh: int = 0
206
- failures: list[dict[str, str]] = field(default_factory=list)
207
-
208
- # ----- Backward-compat property aliases (#1247) -----
209
- #
210
- # External callers (scripts/triage_bootstrap.py recap line,
211
- # tests/test_cache.py, tests/integration/test_cache_*.py) still
212
- # read ``report.succeeded`` / ``report.failed`` / ``report.skipped``.
213
- # The aliases below preserve that surface so the rename is non-
214
- # breaking; new code SHOULD use the canonical names above.
215
-
216
- @property
217
- def succeeded(self) -> int:
218
- """Legacy alias for :attr:`issues_written` (#1247)."""
219
- return self.issues_written
220
-
221
- @succeeded.setter
222
- def succeeded(self, value: int) -> None:
223
- self.issues_written = value
224
-
225
- @property
226
- def failed(self) -> int:
227
- """Legacy alias for :attr:`issues_failed` (#1247)."""
228
- return self.issues_failed
229
-
230
- @failed.setter
231
- def failed(self, value: int) -> None:
232
- self.issues_failed = value
233
-
234
- @property
235
- def skipped(self) -> int:
236
- """Legacy alias for :attr:`already_fresh` (#1247)."""
237
- return self.already_fresh
238
-
239
- @skipped.setter
240
- def skipped(self, value: int) -> None:
241
- self.already_fresh = value
242
-
243
- def to_json(self) -> str:
244
- """Serialise the report.
245
-
246
- v1 emits both the canonical (#1247) and legacy keys so existing
247
- consumers (``tests/test_cache.py::test_partial_failure_exit_shape``
248
- asserts ``payload["succeeded"]`` / ``payload["failed"]``) keep
249
- passing while the framework completes the rename rollout. The
250
- legacy duplicates are removed in a future release once the rest
251
- of the consumer tree has migrated.
252
- """
253
- return json.dumps(
254
- {
255
- # Canonical (#1247) -- the unambiguous noun-level surface.
256
- "issues_written": self.issues_written,
257
- "already_fresh": self.already_fresh,
258
- "issues_failed": self.issues_failed,
259
- # Legacy aliases preserved one release for back-compat.
260
- "succeeded": self.issues_written,
261
- "failed": self.issues_failed,
262
- "skipped": self.already_fresh,
263
- "failures": self.failures,
264
- },
265
- ensure_ascii=False,
266
- sort_keys=True,
267
- )
268
-
269
- def summary_line(self, *, source: str, repo: str) -> str:
270
- """Render the unambiguous human-readable recap line (#1247).
271
-
272
- Replaces the misleading ``succeeded=1 failed=0 skipped=396``
273
- formatting with explicit per-issue counter names so an operator
274
- reading the first signal of a bootstrap run does not have to
275
- ask "why are 396 things skipped?". The naming follows the GH
276
- issue body's 'Expected' suggestion:
277
-
278
- cache:fetch-all source=github-issue repo=owner/name
279
- issues_written=1 already_fresh=396 issues_failed=0
280
-
281
- Operators / orchestrators / recap formatters that need a
282
- single-line, machine-greppable status string SHOULD prefer this
283
- method over hand-formatting against the individual attributes.
284
- """
285
- return (
286
- f"cache:fetch-all source={source} repo={repo} "
287
- f"issues_written={self.issues_written} "
288
- f"already_fresh={self.already_fresh} "
289
- f"issues_failed={self.issues_failed}"
290
- )
291
-
292
-
293
- # ---------------------------------------------------------------------------
294
- # Orchestrator
295
- # ---------------------------------------------------------------------------
296
-
297
-
298
- def run_fetch_all(
299
- *,
300
- repo: str,
301
- is_fresh: Callable[[Path], bool],
302
- entry_dir_for: Callable[[str], Path],
303
- do_put: Callable[[str, dict[str, Any]], None],
304
- batch_size: int,
305
- delay_ms: int,
306
- state: str,
307
- limit: int,
308
- labels: tuple[str, ...] = (),
309
- author: str | None = None,
310
- ) -> FetchAllReport:
311
- """Drive the cache:fetch-all loop via paginated REST.
312
-
313
- Args:
314
- repo: Validated ``owner/repo`` slug.
315
- is_fresh: Callable ``meta_path -> bool`` that returns True when
316
- the on-disk meta.json is fresh per its TTL. Caller-supplied
317
- so this module does not import the cache layer's validator
318
- directly.
319
- entry_dir_for: Callable ``key -> Path`` that maps a cache key to
320
- the entry directory path.
321
- do_put: Callable ``(key, raw) -> None`` that persists the issue
322
- via cache:put. Raises on failure.
323
- batch_size: Per-issue checkpoint cadence for the inter-issue
324
- delay. Validated > 0 by the caller. Pre-#1239 this also
325
- controlled the GraphQL fan-out; on the REST path the
326
- enumeration cost is amortised across pages so the parameter
327
- only paces the local cache:put loop.
328
- delay_ms: Per-issue inter-call delay (ms). Validated >= 0 by the
329
- caller.
330
- state: Forwarded to ``rest_issue_list_paginated --state``
331
- (``open``/``closed``/``all``).
332
- limit: Forwarded to ``rest_issue_list_paginated --limit``.
333
- labels: Optional label filter (#1033) forwarded to the REST
334
- enumeration so a bootstrap can scope ingestion to issues
335
- carrying the given label(s). Empty tuple (default) ingests
336
- the full backlog.
337
- author: Optional issue-creator login (#1055) forwarded to the
338
- REST enumeration's ``creator`` param. ``None`` (default)
339
- applies no author filter. Composes with ``labels`` via AND.
340
-
341
- Returns:
342
- :class:`FetchAllReport` with per-issue success / failure /
343
- skipped counts and a structured failures list.
344
-
345
- Raises:
346
- CacheFetchError: When the REST enumeration itself fails (the
347
- cohort cannot be listed). Per-issue ``cache:put`` failures
348
- are captured on the report, not raised.
349
- """
350
- issues = _list_issues_rest(
351
- repo, state=state, limit=limit, labels=labels, author=author
352
- )
353
- report = FetchAllReport()
354
- total = len(issues)
355
- if total >= PROGRESS_EVERY_N:
356
- _emit_fetch_progress(
357
- repo=repo,
358
- phase="enumerated",
359
- processed=0,
360
- total=total,
361
- report=report,
362
- )
363
-
364
- for i, issue in enumerate(issues):
365
- processed = i + 1
366
- raw = _normalise_rest_issue(issue)
367
- number = raw.get("number")
368
- if not isinstance(number, int) or number <= 0:
369
- report.issues_failed += 1
370
- report.failures.append(
371
- {"key": f"{repo}/?", "reason": f"invalid 'number' field: {number!r}"}
372
- )
373
- else:
374
- key = f"{repo}/{number}"
375
- edir = entry_dir_for(key)
376
- if is_fresh(edir / "meta.json"):
377
- report.already_fresh += 1
378
- else:
379
- try:
380
- do_put(key, raw)
381
- report.issues_written += 1
382
- except Exception as exc: # noqa: BLE001 -- caller's CacheError variants
383
- report.issues_failed += 1
384
- report.failures.append({"key": key, "reason": str(exc)})
385
-
386
- if total >= PROGRESS_EVERY_N and (
387
- processed % PROGRESS_EVERY_N == 0 or processed == total
388
- ):
389
- _emit_fetch_progress(
390
- repo=repo,
391
- phase="writing",
392
- processed=processed,
393
- total=total,
394
- report=report,
395
- )
396
-
397
- # Optional explicit pacing via ``--delay-ms``; production default
398
- # is 0 (#1562) so normal REST-batched bootstrap does not sleep
399
- # locally. Rate-limit recovery sleeps only on 429 retry paths.
400
- _maybe_sleep(delay_ms)
401
- if processed % batch_size == 0:
402
- _maybe_sleep(delay_ms)
403
-
404
- return report
405
-
406
-
407
- def _list_issues_rest(
408
- repo: str,
409
- *,
410
- state: str,
411
- limit: int,
412
- labels: tuple[str, ...] = (),
413
- author: str | None = None,
414
- ) -> list[dict[str, Any]]:
415
- """Wrap :func:`rest_issue_list_paginated` with retry on REST 429.
416
-
417
- REST's ``core`` bucket has a 5000/hr/user budget -- much larger than
418
- GraphQL's, but still throttleable on hot swarm sessions. On a 429
419
- we honour the gh-reported Retry-After (or the fallback constant)
420
- and try once more before surfacing the failure.
421
-
422
- ``labels`` (#1033) and ``author`` (#1055) are forwarded to the
423
- paginated lister so an operator can scope ingestion. They compose
424
- via AND server-side. They are only added to the lister call when a
425
- filter is actually set, so the no-filter call signature stays
426
- identical to the pre-#1033/#1055 behaviour (the #1476 refresh-closed
427
- reconciliation path reuses this helper with no filters).
428
- """
429
- filter_kwargs: dict[str, Any] = {}
430
- if labels:
431
- filter_kwargs["labels"] = labels
432
- if author is not None:
433
- filter_kwargs["author"] = author
434
- try:
435
- return _paginated_lister(repo, state=state, limit=limit, **filter_kwargs)
436
- except InvalidRepoError as exc:
437
- raise CacheFetchError(f"invalid --repo {repo!r} for REST list enumeration: {exc}") from exc
438
- except GhRestError as exc:
439
- is_429, retry_after = detect_rate_limit(str(exc) or exc.stderr or "")
440
- if not is_429:
441
- raise CacheFetchError(
442
- f"rest_issue_list_paginated failed for repo={repo}: {exc}"
443
- ) from exc
444
- sys.stderr.write(
445
- f"cache:fetch-all rate-limited on enumeration ({repo}); sleeping "
446
- f"{retry_after}s before retry\n"
447
- )
448
- _sleep(retry_after)
449
- try:
450
- return _paginated_lister(repo, state=state, limit=limit, **filter_kwargs)
451
- except GhRestError as exc2:
452
- raise CacheFetchError(
453
- f"rest_issue_list_paginated failed twice for repo={repo}: {exc2}"
454
- ) from exc2
455
-
456
-
457
- def _maybe_sleep(delay_ms: int) -> None:
458
- if delay_ms > 0:
459
- _sleep(delay_ms / 1000.0)
460
-
461
-
462
- def _emit_fetch_progress(
463
- *,
464
- repo: str,
465
- phase: str,
466
- processed: int,
467
- total: int,
468
- report: FetchAllReport,
469
- ) -> None:
470
- """Write a single stderr progress line for long cache:fetch-all runs (#1562)."""
471
- if phase == "enumerated":
472
- line = (
473
- f"cache:fetch-all progress repo={repo} "
474
- f"enumerated={total} issues; writing cache entries...\n"
475
- )
476
- else:
477
- line = (
478
- f"cache:fetch-all progress repo={repo} "
479
- f"processed={processed}/{total} "
480
- f"issues_written={report.issues_written} "
481
- f"already_fresh={report.already_fresh} "
482
- f"issues_failed={report.issues_failed}\n"
483
- )
484
- try:
485
- _progress_writer(line)
486
- _progress_flusher()
487
- except (OSError, ValueError):
488
- # Progress emission is best-effort; cache writes must continue if the
489
- # operator's stderr/log sink is closed or unavailable.
490
- return
491
-
492
-
493
- # ---------------------------------------------------------------------------
494
- # State-refresh path (#1476) -- reconcile cached-open entries that closed
495
- # upstream against the default open-only enumeration.
496
- # ---------------------------------------------------------------------------
497
-
498
-
499
- @dataclass
500
- class StateRefreshReport:
501
- """Aggregate counts returned by :func:`run_state_refresh` (#1476).
502
-
503
- The default ``cache:fetch-all`` enumeration is ``state=open``; once an
504
- issue closes upstream it drops out of that enumeration and its cached
505
- ``raw.json`` is never rewritten -- so a closed issue keeps showing up
506
- as actionable ``triage:queue`` work for the full 7-day cache TTL
507
- (the #1322 shape). This report records the reconciliation that fixes
508
- that: each cached-open entry that is no longer in the open enumeration
509
- is revisited individually and rewritten to its live state.
510
- """
511
-
512
- #: Cached-open entries that were revisited because they were absent
513
- #: from the open enumeration (i.e. closed-upstream candidates).
514
- revisited: int = 0
515
- #: Revisited entries confirmed closed upstream and rewritten to
516
- #: ``state=closed`` on disk.
517
- closed_rewritten: int = 0
518
- #: Revisited entries that were still open upstream (a transient drop
519
- #: from the enumeration, e.g. pagination race) -- left untouched.
520
- still_open: int = 0
521
- #: Revisited entries whose single-issue fetch or rewrite errored.
522
- refresh_failed: int = 0
523
- failures: list[dict[str, str]] = field(default_factory=list)
524
-
525
- def to_json(self) -> str:
526
- return json.dumps(
527
- {
528
- "revisited": self.revisited,
529
- "closed_rewritten": self.closed_rewritten,
530
- "still_open": self.still_open,
531
- "refresh_failed": self.refresh_failed,
532
- "failures": self.failures,
533
- },
534
- ensure_ascii=False,
535
- sort_keys=True,
536
- )
537
-
538
- def summary_line(self, *, source: str, repo: str) -> str:
539
- return (
540
- f"cache:refresh-closed source={source} repo={repo} "
541
- f"revisited={self.revisited} "
542
- f"closed_rewritten={self.closed_rewritten} "
543
- f"still_open={self.still_open} "
544
- f"refresh_failed={self.refresh_failed}"
545
- )
546
-
547
-
548
- def list_open_issue_numbers(
549
- repo: str, *, state: str = "open", limit: int = 1000
550
- ) -> set[int]:
551
- """Return the set of issue numbers for ``repo`` from the REST enumeration.
552
-
553
- Wraps :func:`_list_issues_rest` (so it shares the 429-retry path and
554
- the ``_paginated_lister`` test seam) and projects the result down to
555
- the integer ``number`` field. Used by the #1476 state-refresh path in
556
- :mod:`cache` to learn which cached entries are still open upstream.
557
- """
558
- numbers: set[int] = set()
559
- for issue in _list_issues_rest(repo, state=state, limit=limit):
560
- number = issue.get("number") if isinstance(issue, dict) else None
561
- if isinstance(number, int) and number > 0:
562
- numbers.add(number)
563
- return numbers
564
-
565
-
566
- def run_state_refresh(
567
- *,
568
- repo: str,
569
- open_numbers: set[int],
570
- cached_open: list[tuple[int, dict[str, Any]]],
571
- do_put: Callable[[str, dict[str, Any]], None],
572
- fetch_single: Callable[[str, int], dict[str, Any]] | None = None,
573
- delay_ms: int = 0,
574
- ) -> StateRefreshReport:
575
- """Reconcile cached-open entries that dropped out of the open enumeration.
576
-
577
- Args:
578
- repo: Validated ``owner/repo`` slug.
579
- open_numbers: Issue numbers currently returned by the upstream
580
- open-only enumeration (e.g. from :func:`list_open_issue_numbers`).
581
- cached_open: ``(number, raw)`` pairs for on-disk cache entries
582
- whose ``raw.json`` currently says ``state=open``. Supplied by
583
- the caller (the :mod:`cache` layer owns the disk walk).
584
- do_put: Callable ``(key, raw) -> None`` that rewrites the cache
585
- entry. Bound to ``cache_put`` by the caller. Raises on failure.
586
- fetch_single: Callable ``(repo, n) -> dict`` returning the live
587
- single-issue REST payload. Defaults to the module seam
588
- :data:`_single_issue_fetcher`.
589
- delay_ms: Per-revisit inter-call delay (ms) so a large reconcile
590
- does not hammer the REST core bucket.
591
-
592
- Returns:
593
- :class:`StateRefreshReport` with revisit / rewrite / failure
594
- counts and a structured failures list.
595
-
596
- A cached-open entry whose number IS in ``open_numbers`` is still open
597
- upstream and skipped entirely (no fetch). Only the entries that
598
- vanished from the enumeration are revisited: their live state is
599
- fetched and, when ``closed``, the entry's ``raw.json`` is rewritten
600
- via ``do_put`` so the next ``triage:queue`` walk excludes it.
601
- """
602
- fetcher = fetch_single if fetch_single is not None else _single_issue_fetcher
603
- report = StateRefreshReport()
604
- for number, _raw in cached_open:
605
- if number in open_numbers:
606
- # Still open upstream -- nothing to reconcile.
607
- continue
608
- report.revisited += 1
609
- key = f"{repo}/{number}"
610
- try:
611
- live = fetcher(repo, number)
612
- except Exception as exc: # noqa: BLE001 -- any fetch failure is recorded
613
- report.refresh_failed += 1
614
- report.failures.append({"key": key, "reason": f"fetch failed: {exc}"})
615
- _maybe_sleep(delay_ms)
616
- continue
617
- live_state_raw = live.get("state") if isinstance(live, dict) else None
618
- live_state = (
619
- live_state_raw.lower() if isinstance(live_state_raw, str) else None
620
- )
621
- if live_state == "closed":
622
- try:
623
- do_put(key, _normalise_rest_issue(live))
624
- report.closed_rewritten += 1
625
- except Exception as exc: # noqa: BLE001 -- any rewrite failure recorded
626
- report.refresh_failed += 1
627
- report.failures.append(
628
- {"key": key, "reason": f"rewrite failed: {exc}"}
629
- )
630
- else:
631
- # Live state is open (or unparseable) -- leave the cache as-is
632
- # rather than risk dropping a genuinely-open issue.
633
- report.still_open += 1
634
- _maybe_sleep(delay_ms)
635
- return report