@deftai/directive-content 0.55.2 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/.githooks/pre-commit +143 -0
  2. package/.githooks/pre-push +121 -0
  3. package/QUICK-START.md +2 -2
  4. package/Taskfile.yml +934 -0
  5. package/UPGRADING.md +47 -1
  6. package/events/README.md +3 -3
  7. package/package.json +5 -4
  8. package/scripts/_agents_md.py +494 -0
  9. package/scripts/_cache_fetch.py +635 -0
  10. package/scripts/_cache_quota.py +529 -0
  11. package/scripts/_cache_refresh.py +163 -0
  12. package/scripts/_cache_validate.py +209 -0
  13. package/scripts/_content_root.py +42 -0
  14. package/scripts/_doctor_state.py +277 -0
  15. package/scripts/_event_detect.py +305 -0
  16. package/scripts/_events.py +514 -0
  17. package/scripts/_lifecycle_hygiene.py +568 -0
  18. package/scripts/_pathspec.py +91 -0
  19. package/scripts/_policy_show_cli.py +266 -0
  20. package/scripts/_precutover.py +92 -0
  21. package/scripts/_project_context.py +224 -0
  22. package/scripts/_project_definition_io.py +164 -0
  23. package/scripts/_relocate_snapshot.py +209 -0
  24. package/scripts/_relocate_states.py +343 -0
  25. package/scripts/_resolve_preflight_path.py +152 -0
  26. package/scripts/_safe_subprocess.py +167 -0
  27. package/scripts/_session_start_hook.py +205 -0
  28. package/scripts/_sor_gate_diff.py +365 -0
  29. package/scripts/_stdio_utf8.py +59 -0
  30. package/scripts/_triage_bootstrap_gitignore.py +904 -0
  31. package/scripts/_triage_classify_cli.py +122 -0
  32. package/scripts/_triage_queue_cli.py +625 -0
  33. package/scripts/_triage_scope_cli.py +343 -0
  34. package/scripts/_triage_scope_drift_cli.py +121 -0
  35. package/scripts/_triage_scope_ignores.py +286 -0
  36. package/scripts/_triage_scope_milestone.py +432 -0
  37. package/scripts/_triage_scope_mutations.py +337 -0
  38. package/scripts/_triage_scope_renderers.py +207 -0
  39. package/scripts/_triage_smoketest_stages.py +674 -0
  40. package/scripts/_triage_subscribe_cli.py +140 -0
  41. package/scripts/_triage_welcome_cli.py +421 -0
  42. package/scripts/_vbrief_build.py +239 -0
  43. package/scripts/_vbrief_fidelity.py +479 -0
  44. package/scripts/_vbrief_legacy.py +589 -0
  45. package/scripts/_vbrief_reconciliation.py +883 -0
  46. package/scripts/_vbrief_routing.py +277 -0
  47. package/scripts/_vbrief_safety.py +778 -0
  48. package/scripts/_vbrief_sources.py +312 -0
  49. package/scripts/_vbrief_speckit.py +262 -0
  50. package/scripts/_vbrief_story_quality.py +353 -0
  51. package/scripts/_vbrief_validation.py +299 -0
  52. package/scripts/build_dist.py +412 -0
  53. package/scripts/cache.py +1078 -0
  54. package/scripts/cache_scanner.py +745 -0
  55. package/scripts/candidates_log.py +432 -0
  56. package/scripts/capacity_backfill.py +680 -0
  57. package/scripts/capacity_show.py +653 -0
  58. package/scripts/ci_local.py +689 -0
  59. package/scripts/code_structure_validate.py +765 -0
  60. package/scripts/codebase_default_extractor.py +495 -0
  61. package/scripts/codebase_map.py +304 -0
  62. package/scripts/codebase_map_fresh.py +104 -0
  63. package/scripts/codebase_projection_registry.py +94 -0
  64. package/scripts/codebase_provider.py +582 -0
  65. package/scripts/doctor.py +2257 -0
  66. package/scripts/framework_commands.py +505 -0
  67. package/scripts/gh_rest.py +882 -0
  68. package/scripts/github_auth_modes.py +437 -0
  69. package/scripts/github_body.py +292 -0
  70. package/scripts/ip_risk.py +531 -0
  71. package/scripts/issue_emit.py +670 -0
  72. package/scripts/issue_ingest.py +1064 -0
  73. package/scripts/migrate_preflight.py +418 -0
  74. package/scripts/migrate_vbrief.py +2677 -0
  75. package/scripts/monitor_pr.py +401 -0
  76. package/scripts/pack_migrate_lessons.py +336 -0
  77. package/scripts/pack_migrate_patterns.py +254 -0
  78. package/scripts/pack_migrate_rules.py +350 -0
  79. package/scripts/pack_migrate_skills.py +423 -0
  80. package/scripts/pack_migrate_strategies.py +311 -0
  81. package/scripts/pack_migrate_swarm_spec.py +250 -0
  82. package/scripts/pack_render.py +434 -0
  83. package/scripts/packs_slice.py +712 -0
  84. package/scripts/platform_capabilities.py +336 -0
  85. package/scripts/policy.py +2826 -0
  86. package/scripts/policy_set.py +324 -0
  87. package/scripts/pr_check_closing_keywords.py +524 -0
  88. package/scripts/pr_check_protected_issues.py +267 -0
  89. package/scripts/pr_merge_readiness.py +1004 -0
  90. package/scripts/pr_wait_mergeable.py +669 -0
  91. package/scripts/prd_render.py +159 -0
  92. package/scripts/preflight_architecture_sor.py +974 -0
  93. package/scripts/preflight_branch.py +289 -0
  94. package/scripts/preflight_cache.py +974 -0
  95. package/scripts/preflight_gh.py +721 -0
  96. package/scripts/preflight_implementation.py +272 -0
  97. package/scripts/preflight_story_start.py +838 -0
  98. package/scripts/preflight_wip_cap.py +149 -0
  99. package/scripts/probe_session.py +545 -0
  100. package/scripts/project_render.py +293 -0
  101. package/scripts/quarantine_ext.py +237 -0
  102. package/scripts/reconcile_issues.py +1442 -0
  103. package/scripts/refresh-path.ps1 +107 -0
  104. package/scripts/release.py +2030 -0
  105. package/scripts/release_e2e.py +1011 -0
  106. package/scripts/release_publish.py +486 -0
  107. package/scripts/release_rollback.py +980 -0
  108. package/scripts/relocate.py +1034 -0
  109. package/scripts/resolve_changelog_unreleased.py +667 -0
  110. package/scripts/resolve_version.py +490 -0
  111. package/scripts/resume_conditions.py +706 -0
  112. package/scripts/ritual_sentinel.py +609 -0
  113. package/scripts/roadmap_render.py +635 -0
  114. package/scripts/rule_ownership_lint.py +325 -0
  115. package/scripts/scm.py +591 -0
  116. package/scripts/scope_audit_log.py +387 -0
  117. package/scripts/scope_decompose.py +654 -0
  118. package/scripts/scope_demote.py +509 -0
  119. package/scripts/scope_lifecycle.py +1126 -0
  120. package/scripts/scope_undo.py +772 -0
  121. package/scripts/session_start.py +406 -0
  122. package/scripts/setup_ghx.py +339 -0
  123. package/scripts/setup_windows.ps1 +220 -0
  124. package/scripts/slice_audit.py +585 -0
  125. package/scripts/slice_record.py +530 -0
  126. package/scripts/slice_record_existing.py +692 -0
  127. package/scripts/slug_normalize.py +178 -0
  128. package/scripts/spec_render.py +477 -0
  129. package/scripts/spec_validate.py +238 -0
  130. package/scripts/subagent_monitor.py +658 -0
  131. package/scripts/swarm_complete_cohort.py +644 -0
  132. package/scripts/swarm_launch.py +1206 -0
  133. package/scripts/swarm_readiness.py +554 -0
  134. package/scripts/swarm_verify_review_clean.py +438 -0
  135. package/scripts/swarm_worktrees.py +497 -0
  136. package/scripts/toolchain-check.py +52 -0
  137. package/scripts/triage_actions.py +871 -0
  138. package/scripts/triage_bootstrap.py +1153 -0
  139. package/scripts/triage_bulk.py +630 -0
  140. package/scripts/triage_classify.py +932 -0
  141. package/scripts/triage_help.py +1685 -0
  142. package/scripts/triage_queue.py +1944 -0
  143. package/scripts/triage_reconcile.py +581 -0
  144. package/scripts/triage_refresh.py +643 -0
  145. package/scripts/triage_scope.py +999 -0
  146. package/scripts/triage_scope_drift.py +575 -0
  147. package/scripts/triage_smoketest.py +396 -0
  148. package/scripts/triage_subscribe.py +399 -0
  149. package/scripts/triage_summary.py +1011 -0
  150. package/scripts/triage_welcome.py +1178 -0
  151. package/scripts/ts_check_lane.py +86 -0
  152. package/scripts/validate-links.py +64 -0
  153. package/scripts/validate_strategy_output.py +212 -0
  154. package/scripts/vbrief_activate.py +228 -0
  155. package/scripts/vbrief_migrate_conformance.py +368 -0
  156. package/scripts/vbrief_reconcile_graph.py +306 -0
  157. package/scripts/vbrief_reconcile_labels.py +460 -0
  158. package/scripts/vbrief_reconcile_umbrellas.py +741 -0
  159. package/scripts/vbrief_validate.py +1195 -0
  160. package/scripts/verify-stubs.py +61 -0
  161. package/scripts/verify_capacity.py +160 -0
  162. package/scripts/verify_encoding.py +699 -0
  163. package/scripts/verify_hooks_installed.py +206 -0
  164. package/scripts/verify_investigation.py +360 -0
  165. package/scripts/verify_judgment_gates.py +827 -0
  166. package/scripts/verify_no_task_runtime.py +171 -0
  167. package/scripts/verify_scm_boundary.py +509 -0
  168. package/scripts/verify_session_ritual.py +389 -0
  169. package/scripts/verify_tools.py +426 -0
  170. package/scripts/verify_vbrief_conformance.py +478 -0
  171. package/tasks/architecture.yml +13 -0
  172. package/tasks/cache.yml +69 -0
  173. package/tasks/capacity.yml +38 -0
  174. package/tasks/change.yml +46 -0
  175. package/tasks/changelog.yml +24 -0
  176. package/tasks/ci.yml +49 -0
  177. package/tasks/codebase.yml +47 -0
  178. package/tasks/commit.yml +30 -0
  179. package/tasks/core.yml +126 -0
  180. package/tasks/deployments.yml +54 -0
  181. package/tasks/framework.yml +74 -0
  182. package/tasks/install.yml +60 -0
  183. package/tasks/issue.yml +50 -0
  184. package/tasks/migrate.yml +73 -0
  185. package/tasks/packs.yml +92 -0
  186. package/tasks/policy.yml +75 -0
  187. package/tasks/pr.yml +89 -0
  188. package/tasks/prd.yml +39 -0
  189. package/tasks/project.yml +27 -0
  190. package/tasks/reconcile.yml +32 -0
  191. package/tasks/relocate.yml +56 -0
  192. package/tasks/roadmap.yml +28 -0
  193. package/tasks/scm.yml +126 -0
  194. package/tasks/scope-undo.yml +36 -0
  195. package/tasks/scope.yml +141 -0
  196. package/tasks/session.yml +19 -0
  197. package/tasks/setup.yml +37 -0
  198. package/tasks/slice.yml +69 -0
  199. package/tasks/spec.yml +41 -0
  200. package/tasks/swarm.yml +85 -0
  201. package/tasks/toolchain.yml +13 -0
  202. package/tasks/triage-actions.yml +94 -0
  203. package/tasks/triage-bootstrap.yml +43 -0
  204. package/tasks/triage-bulk.yml +75 -0
  205. package/tasks/triage-classify.yml +30 -0
  206. package/tasks/triage-queue.yml +50 -0
  207. package/tasks/triage-reconcile.yml +29 -0
  208. package/tasks/triage-scope-drift.yml +29 -0
  209. package/tasks/triage-scope.yml +31 -0
  210. package/tasks/triage-smoketest.yml +33 -0
  211. package/tasks/triage-subscribe.yml +36 -0
  212. package/tasks/triage-summary.yml +29 -0
  213. package/tasks/triage-welcome.yml +32 -0
  214. package/tasks/ts.yml +328 -0
  215. package/tasks/vbrief.yml +206 -0
  216. package/tasks/verify.yml +292 -0
  217. package/templates/agents-entry.md +1 -1
@@ -0,0 +1,529 @@
1
+ #!/usr/bin/env python3
2
+ """_cache_quota.py -- size cap, entry cap, LRU eviction for the cache (#947).
3
+
4
+ Extracted from :mod:`cache` to keep the parent module under the deft
5
+ 1000-line MUST limit (mirrors the existing ``_cache_fetch`` /
6
+ ``_cache_validate`` split). The module owns:
7
+
8
+ - Cap resolution from env vars (``DEFT_CACHE_MAX_BYTES``,
9
+ ``DEFT_CACHE_MAX_ENTRIES``) with sensible defaults baked in
10
+ (100 MB / 10,000 entries; sized from the v0.26.0 smoke evidence
11
+ documented in ``docs/smoke-2026-05-07-v0.26.0-rerun.md`` --
12
+ 320 entries = 3.03 MB, ~10 KB/entry average).
13
+ - Usage scanning across the cache root: enumerate every entry's
14
+ ``meta.json``, sum ``size_bytes`` for the byte total, count entries,
15
+ and read ``meta.json`` ``mtime`` for the LRU timestamp.
16
+ - LRU eviction: pick the oldest entry by ``(mtime, path)`` (path tie-break
17
+ for filesystems with 1s mtime granularity), remove the directory,
18
+ return the freed bytes + record so the caller can append a
19
+ ``cache:evict`` audit row.
20
+ - :class:`CacheCapBreachedError`: raised when caps cannot be honored
21
+ even after eviction (e.g. the new entry alone exceeds the byte cap,
22
+ or every entry on disk is the just-written one). The cache CLI maps
23
+ this to exit-code 3 so callers can distinguish "schema invalid"
24
+ (exit 2) from "honoring the cap is impossible" (exit 3).
25
+
26
+ LRU signal: the ``meta.json`` mtime is touched (single ``os.utime``
27
+ syscall) on each ``cache:get`` hit. A v0.26.0 cache tree's existing
28
+ entries already have a valid mtime (the original write timestamp),
29
+ so this is backward-compatible without migration. The schema-bump
30
+ alternative (add a ``last_accessed_at`` field) was rejected because
31
+ it would (a) force a write-on-read of meta.json including schema
32
+ re-validation, (b) require coordinated edits to the FROZEN
33
+ ``vbrief/schemas/cache-meta.schema.json`` plus the in-module
34
+ validator, and (c) impose a migration burden on pre-existing
35
+ cache trees. See the #947 vBRIEF ``DesignChoice`` narrative.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import contextlib
41
+ import json
42
+ import os
43
+ import shutil
44
+ from collections.abc import Callable, Iterable
45
+ from dataclasses import dataclass
46
+ from pathlib import Path
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Defaults + env-var contract
50
+ # ---------------------------------------------------------------------------
51
+
52
+ #: 100 MB. Defensible at the smoke-evidenced ~10 KB/entry average;
53
+ #: a 50,000-issue mono-repo would consume ~500 MB without a cap.
54
+ DEFAULT_MAX_BYTES: int = 100 * 1024 * 1024
55
+
56
+ #: 10,000 entries. Equivalent to the byte cap at the smoke-evidenced
57
+ #: per-entry average, so either threshold should trip first depending
58
+ #: on the actual mix of small vs large issues in the working set.
59
+ DEFAULT_MAX_ENTRIES: int = 10_000
60
+
61
+ #: A cap value of 0 (or a non-numeric env value) disables that cap.
62
+ ENV_MAX_BYTES: str = "DEFT_CACHE_MAX_BYTES"
63
+ ENV_MAX_ENTRIES: str = "DEFT_CACHE_MAX_ENTRIES"
64
+
65
+ #: Sentinel meaning "the cap is disabled" -- evictor never trips for it.
66
+ CAP_DISABLED: int = 0
67
+
68
+
69
+ class CacheCapBreachedError(RuntimeError):
70
+ """Raised when the cache cap cannot be honored even after eviction.
71
+
72
+ Attributes mirror the structured exit shape that callers (and the
73
+ CLI exit-3 path) display so an operator can see *why* the put was
74
+ refused and what they could free up to make room.
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ *,
80
+ reason: str,
81
+ max_bytes: int,
82
+ max_entries: int,
83
+ current_bytes: int,
84
+ current_entries: int,
85
+ incoming_bytes: int,
86
+ ) -> None:
87
+ self.reason = reason
88
+ self.max_bytes = max_bytes
89
+ self.max_entries = max_entries
90
+ self.current_bytes = current_bytes
91
+ self.current_entries = current_entries
92
+ self.incoming_bytes = incoming_bytes
93
+ super().__init__(
94
+ f"cache cap breached ({reason}): "
95
+ f"max_bytes={max_bytes} max_entries={max_entries} "
96
+ f"current_bytes={current_bytes} current_entries={current_entries} "
97
+ f"incoming_bytes={incoming_bytes}"
98
+ )
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Cap resolution
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+ @dataclass(frozen=True)
107
+ class CacheCaps:
108
+ """Resolved cap thresholds in effect for one operation.
109
+
110
+ A value of :data:`CAP_DISABLED` (0) means the corresponding cap is
111
+ not enforced. Negative env values are clamped to 0 (disabled) rather
112
+ than raising -- the caps are an operator-friendly knob, not a
113
+ strict-mode setting.
114
+ """
115
+
116
+ max_bytes: int
117
+ max_entries: int
118
+
119
+ @property
120
+ def bytes_enforced(self) -> bool:
121
+ return self.max_bytes > 0
122
+
123
+ @property
124
+ def entries_enforced(self) -> bool:
125
+ return self.max_entries > 0
126
+
127
+ @property
128
+ def any_enforced(self) -> bool:
129
+ return self.bytes_enforced or self.entries_enforced
130
+
131
+
132
+ def _parse_int_env(name: str, default: int) -> int:
133
+ """Parse an int from ``os.environ[name]``, falling back to ``default``.
134
+
135
+ Non-numeric or negative values resolve to ``CAP_DISABLED`` so a typo
136
+ in the env var doesn't masquerade as an enforced cap. ``""`` (empty
137
+ string) means "use the default" (consistent with how shell-set-but-
138
+ -unset env vars usually behave).
139
+ """
140
+ raw = os.environ.get(name)
141
+ if raw is None or raw == "":
142
+ return default
143
+ try:
144
+ value = int(raw)
145
+ except ValueError:
146
+ return CAP_DISABLED
147
+ return value if value >= 0 else CAP_DISABLED
148
+
149
+
150
+ def resolve_caps(
151
+ *,
152
+ max_bytes: int | None = None,
153
+ max_entries: int | None = None,
154
+ ) -> CacheCaps:
155
+ """Resolve the active caps from explicit args, env vars, or defaults.
156
+
157
+ Resolution order (highest precedence first):
158
+
159
+ 1. Explicit ``max_bytes`` / ``max_entries`` kwargs (used by tests
160
+ that need deterministic caps regardless of process env).
161
+ 2. ``DEFT_CACHE_MAX_BYTES`` / ``DEFT_CACHE_MAX_ENTRIES`` env vars.
162
+ 3. Module defaults (:data:`DEFAULT_MAX_BYTES`,
163
+ :data:`DEFAULT_MAX_ENTRIES`).
164
+ """
165
+ if max_bytes is None:
166
+ max_bytes = _parse_int_env(ENV_MAX_BYTES, DEFAULT_MAX_BYTES)
167
+ if max_entries is None:
168
+ max_entries = _parse_int_env(ENV_MAX_ENTRIES, DEFAULT_MAX_ENTRIES)
169
+ if max_bytes < 0:
170
+ max_bytes = CAP_DISABLED
171
+ if max_entries < 0:
172
+ max_entries = CAP_DISABLED
173
+ return CacheCaps(max_bytes=max_bytes, max_entries=max_entries)
174
+
175
+
176
+ # ---------------------------------------------------------------------------
177
+ # Usage scan
178
+ # ---------------------------------------------------------------------------
179
+
180
+
181
+ @dataclass(frozen=True)
182
+ class EntryUsage:
183
+ """One on-disk cache entry seen by the usage scan.
184
+
185
+ ``size_bytes`` is read from the entry's ``meta.json`` (authoritative
186
+ -- written at cache:put time and validated against the schema). If
187
+ the meta.json is missing or unparseable the entry is still listed
188
+ with ``size_bytes=0`` so eviction can drain corrupt entries first
189
+ (they cannot be served by ``cache_get`` anyway).
190
+ """
191
+
192
+ entry_dir: Path
193
+ source: str
194
+ key: str
195
+ size_bytes: int
196
+ last_accessed: float
197
+ meta_present: bool
198
+
199
+
200
+ @dataclass(frozen=True)
201
+ class UsageReport:
202
+ """Aggregate usage at the time of the scan."""
203
+
204
+ total_bytes: int
205
+ total_entries: int
206
+ entries: tuple[EntryUsage, ...]
207
+
208
+
209
+ def _read_meta_size(meta_path: Path) -> tuple[int, str, str, bool]:
210
+ """Read ``size_bytes`` + (source, key) from a meta.json.
211
+
212
+ Returns ``(size_bytes, source, key, meta_present)``. On parse
213
+ failure, returns zeros so the corrupt entry sorts as evictable
214
+ without polluting the byte total.
215
+ """
216
+ try:
217
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
218
+ except (OSError, json.JSONDecodeError):
219
+ return 0, "", "", False
220
+ if not isinstance(meta, dict):
221
+ return 0, "", "", False
222
+ size = meta.get("size_bytes")
223
+ src = meta.get("source")
224
+ key = meta.get("key")
225
+ if not isinstance(size, int) or size < 0:
226
+ size = 0
227
+ if not isinstance(src, str):
228
+ src = ""
229
+ if not isinstance(key, str):
230
+ key = ""
231
+ return size, src, key, True
232
+
233
+
234
+ def scan_usage(
235
+ cache_root: Path,
236
+ *,
237
+ sources: Iterable[str],
238
+ ) -> UsageReport:
239
+ """Walk the cache root, returning one :class:`EntryUsage` per entry.
240
+
241
+ ``sources`` is the cache layer's ``ALLOWED_SOURCES`` tuple. The
242
+ scan only descends into those subtrees so audit-log / scratch files
243
+ at the cache root level don't pollute the count.
244
+ """
245
+ if not cache_root.exists():
246
+ return UsageReport(total_bytes=0, total_entries=0, entries=())
247
+ entries: list[EntryUsage] = []
248
+ total_bytes = 0
249
+ for src in sources:
250
+ src_root = cache_root / src
251
+ if not src_root.exists():
252
+ continue
253
+ # Snapshot before iteration: callers may evict mid-walk.
254
+ for meta_path in list(src_root.rglob("meta.json")):
255
+ size, meta_src, meta_key, present = _read_meta_size(meta_path)
256
+ try:
257
+ mtime = meta_path.stat().st_mtime
258
+ except OSError:
259
+ mtime = 0.0
260
+ entries.append(
261
+ EntryUsage(
262
+ entry_dir=meta_path.parent,
263
+ source=meta_src or src,
264
+ key=meta_key
265
+ or str(meta_path.parent.relative_to(src_root)).replace(
266
+ os.sep, "/"
267
+ ),
268
+ size_bytes=size,
269
+ last_accessed=mtime,
270
+ meta_present=present,
271
+ )
272
+ )
273
+ total_bytes += size
274
+ return UsageReport(
275
+ total_bytes=total_bytes,
276
+ total_entries=len(entries),
277
+ entries=tuple(entries),
278
+ )
279
+
280
+
281
+ # ---------------------------------------------------------------------------
282
+ # LRU eviction primitives
283
+ # ---------------------------------------------------------------------------
284
+
285
+
286
+ def lru_order(usage: UsageReport) -> tuple[EntryUsage, ...]:
287
+ """Return entries oldest-first by (mtime, entry_dir-as-str).
288
+
289
+ The path tie-break is what makes eviction deterministic across
290
+ filesystems with 1s mtime granularity (most ext4 / NTFS configs).
291
+ Tests can rely on a stable order even for entries written within
292
+ the same second.
293
+ """
294
+ return tuple(sorted(usage.entries, key=lambda e: (e.last_accessed, str(e.entry_dir))))
295
+
296
+
297
+ def cap_breached(
298
+ usage: UsageReport,
299
+ caps: CacheCaps,
300
+ *,
301
+ incoming_bytes: int = 0,
302
+ incoming_entries: int = 0,
303
+ ) -> bool:
304
+ """Return True iff ``usage`` plus a hypothetical add breaches caps.
305
+
306
+ ``incoming_bytes`` is the net byte delta the caller plans to add
307
+ (already accounting for any existing entry being replaced).
308
+ ``incoming_entries`` is the entry-count delta (0 for a re-put of an
309
+ existing key, 1 for a brand-new entry, 0 for prune-to-cap).
310
+ """
311
+ if caps.bytes_enforced and usage.total_bytes + incoming_bytes > caps.max_bytes:
312
+ return True
313
+ return bool(
314
+ caps.entries_enforced
315
+ and usage.total_entries + incoming_entries > caps.max_entries
316
+ )
317
+
318
+
319
+ def evict_lru(
320
+ cache_root: Path,
321
+ *,
322
+ sources: Iterable[str],
323
+ caps: CacheCaps,
324
+ incoming_bytes: int = 0,
325
+ incoming_entries: int = 0,
326
+ protect_keys: Iterable[tuple[str, str]] = (),
327
+ on_evict: EvictCallback | None = None,
328
+ ) -> list[EntryUsage]:
329
+ """Evict LRU entries until the cap fits the incoming delta.
330
+
331
+ Single-pass O(n log n): one ``scan_usage`` call up-front, then iterate
332
+ the LRU-ordered candidate list maintaining running totals so each
333
+ eviction does not re-scan the cache root (the previous O(n^2)
334
+ re-scan pattern was a P2 finding from the iter-0 review).
335
+
336
+ Returns the list of evicted :class:`EntryUsage` records, oldest
337
+ first, in eviction order.
338
+
339
+ Args:
340
+ cache_root: Cache root path.
341
+ sources: Cache layer's ALLOWED_SOURCES tuple.
342
+ caps: Resolved cap thresholds.
343
+ incoming_bytes: Bytes the caller plans to add post-eviction.
344
+ May be negative for a shrinking re-put (caller subtracts
345
+ the existing entry's size).
346
+ incoming_entries: Entry-count delta (0 for re-put / prune-to-
347
+ cap, 1 for a brand-new entry).
348
+ protect_keys: Iterable of (source, key) pairs that MUST NOT be
349
+ evicted (typically the entry currently being written, so a
350
+ re-put cannot self-evict).
351
+ on_evict: Optional callback invoked once per evicted entry
352
+ BEFORE the directory is removed. Receives the victim, the
353
+ already-narrowed reason string (``"size_cap"`` /
354
+ ``"entry_cap"`` / ``"size_cap+entry_cap"``) reflecting which
355
+ cap was actually exceeded at the moment of eviction, and
356
+ the resolved caps for caller introspection.
357
+ """
358
+ if not caps.any_enforced:
359
+ return []
360
+ protect = {(s, k) for s, k in protect_keys}
361
+ usage = scan_usage(cache_root, sources=sources)
362
+ if not cap_breached(
363
+ usage,
364
+ caps,
365
+ incoming_bytes=incoming_bytes,
366
+ incoming_entries=incoming_entries,
367
+ ):
368
+ return []
369
+ ordered = [e for e in lru_order(usage) if (e.source, e.key) not in protect]
370
+ if not ordered:
371
+ # Every entry is protected -- caller decides what to do.
372
+ return []
373
+ evicted: list[EntryUsage] = []
374
+ running_bytes = usage.total_bytes
375
+ running_entries = usage.total_entries
376
+ for victim in ordered:
377
+ bytes_breach = (
378
+ caps.bytes_enforced
379
+ and running_bytes + incoming_bytes > caps.max_bytes
380
+ )
381
+ entries_breach = (
382
+ caps.entries_enforced
383
+ and running_entries + incoming_entries > caps.max_entries
384
+ )
385
+ if not (bytes_breach or entries_breach):
386
+ break
387
+ reasons: list[str] = []
388
+ if bytes_breach:
389
+ reasons.append("size_cap")
390
+ if entries_breach:
391
+ reasons.append("entry_cap")
392
+ reason = "+".join(reasons) or "unknown"
393
+ if on_evict is not None:
394
+ on_evict(victim, reason, caps)
395
+ # Concurrent removal -- treat as a no-op.
396
+ with contextlib.suppress(FileNotFoundError):
397
+ shutil.rmtree(victim.entry_dir)
398
+ evicted.append(victim)
399
+ running_bytes -= victim.size_bytes
400
+ running_entries -= 1
401
+ return evicted
402
+
403
+
404
+ #: Type alias for the on_evict callback used by :func:`evict_lru` and
405
+ #: :func:`enforce_caps`. Invoked once per evicted entry BEFORE the
406
+ #: directory is removed. Signature is ``(victim, reason, caps)`` where
407
+ #: ``reason`` is the already-narrowed breach descriptor reflecting which
408
+ #: cap was actually exceeded at the moment of eviction (P1 fix from the
409
+ #: iter-1 review: the previous ``(victim, caps, incoming_bytes)`` shape
410
+ #: forced the audit callback to recompute reason without enough context
411
+ #: and ended up tagging every record ``size_cap+entry_cap`` under the
412
+ #: defaults).
413
+ EvictCallback = Callable[[EntryUsage, str, CacheCaps], None]
414
+
415
+
416
+ # ---------------------------------------------------------------------------
417
+ # High-level enforce_caps entry point
418
+ # ---------------------------------------------------------------------------
419
+
420
+
421
+ @dataclass(frozen=True)
422
+ class EnforceResult:
423
+ """Outcome of an enforce_caps() call.
424
+
425
+ ``evicted`` is the list of entries removed during enforcement. Empty
426
+ when the cache was already under cap or no caps were enforced.
427
+ ``would_breach`` is True iff eviction could not free enough -- the
428
+ caller (cache:put) raises :class:`CacheCapBreachedError` in that
429
+ case; prune-to-cap surfaces a structured warning.
430
+ """
431
+
432
+ evicted: tuple[EntryUsage, ...]
433
+ final_usage: UsageReport
434
+ would_breach: bool
435
+
436
+
437
+ def predict_eviction_set(
438
+ cache_root: Path,
439
+ *,
440
+ sources: Iterable[str],
441
+ caps: CacheCaps,
442
+ ) -> tuple[EntryUsage, ...]:
443
+ """Compute the LRU eviction set without removing anything (dry-run).
444
+
445
+ Walks entries in LRU order, accumulating evictions until the
446
+ projected running totals fit under the caps. Used by
447
+ ``cache:prune --to-cap --dry-run`` so operators can preview what
448
+ would be evicted before committing.
449
+ """
450
+ if not caps.any_enforced:
451
+ return ()
452
+ usage = scan_usage(cache_root, sources=sources)
453
+ if not cap_breached(usage, caps):
454
+ return ()
455
+ ordered = lru_order(usage)
456
+ evicted: list[EntryUsage] = []
457
+ running_bytes = usage.total_bytes
458
+ running_entries = usage.total_entries
459
+ for entry in ordered:
460
+ if not (
461
+ (caps.bytes_enforced and running_bytes > caps.max_bytes)
462
+ or (caps.entries_enforced and running_entries > caps.max_entries)
463
+ ):
464
+ break
465
+ evicted.append(entry)
466
+ running_bytes -= entry.size_bytes
467
+ running_entries -= 1
468
+ return tuple(evicted)
469
+
470
+
471
+ def enforce_caps(
472
+ cache_root: Path,
473
+ *,
474
+ sources: Iterable[str],
475
+ caps: CacheCaps | None = None,
476
+ incoming_bytes: int = 0,
477
+ incoming_entries: int = 0,
478
+ protect_keys: Iterable[tuple[str, str]] = (),
479
+ on_evict: EvictCallback | None = None,
480
+ ) -> EnforceResult:
481
+ """Evict LRU entries until the cap fits the incoming delta.
482
+
483
+ Wrap :func:`evict_lru` with a final cap-breach check so callers can
484
+ differentiate "evicted cleanly" from "evicted but still breached".
485
+ """
486
+ resolved = caps if caps is not None else resolve_caps()
487
+ evicted = evict_lru(
488
+ cache_root,
489
+ sources=sources,
490
+ caps=resolved,
491
+ incoming_bytes=incoming_bytes,
492
+ incoming_entries=incoming_entries,
493
+ protect_keys=protect_keys,
494
+ on_evict=on_evict,
495
+ )
496
+ final_usage = scan_usage(cache_root, sources=sources)
497
+ breached = cap_breached(
498
+ final_usage,
499
+ resolved,
500
+ incoming_bytes=incoming_bytes,
501
+ incoming_entries=incoming_entries,
502
+ )
503
+ return EnforceResult(
504
+ evicted=tuple(evicted),
505
+ final_usage=final_usage,
506
+ would_breach=breached,
507
+ )
508
+
509
+
510
+ __all__ = [
511
+ "CAP_DISABLED",
512
+ "CacheCapBreachedError",
513
+ "CacheCaps",
514
+ "DEFAULT_MAX_BYTES",
515
+ "DEFAULT_MAX_ENTRIES",
516
+ "ENV_MAX_BYTES",
517
+ "ENV_MAX_ENTRIES",
518
+ "EnforceResult",
519
+ "EntryUsage",
520
+ "EvictCallback",
521
+ "UsageReport",
522
+ "cap_breached",
523
+ "enforce_caps",
524
+ "evict_lru",
525
+ "lru_order",
526
+ "predict_eviction_set",
527
+ "resolve_caps",
528
+ "scan_usage",
529
+ ]
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env python3
2
+ """_cache_refresh.py -- cache:refresh-closed reconciliation (#1476).
3
+
4
+ Split out of :mod:`cache` so the parent stays under the deft 1000-line
5
+ MUST limit (mirrors the existing ``_cache_fetch`` / ``_cache_quota`` /
6
+ ``_cache_validate`` split).
7
+
8
+ Why this module exists
9
+ ----------------------
10
+ The default ``cache:fetch-all`` enumeration is ``state=open``. Once an
11
+ upstream GitHub issue closes it drops out of that enumeration, so its
12
+ cached ``raw.json`` is never rewritten and keeps saying ``state=open``
13
+ for the full 7-day cache TTL. ``triage:queue`` then keeps ranking the
14
+ closed issue as actionable untriaged work -- the #1322 shape recorded
15
+ in #1476.
16
+
17
+ This module reconciles that gap. :func:`cache_refresh_closed`:
18
+
19
+ 1. Scans on-disk cache entries whose ``raw.json`` says ``state=open``.
20
+ 2. Enumerates the current open issue numbers (the authoritative set).
21
+ 3. For each cached-open entry NOT in the open enumeration, fetches its
22
+ live single-issue state and, when closed, rewrites the entry via
23
+ ``cache.cache_put`` so the next queue walk excludes it.
24
+
25
+ The single-issue fetch + rewrite loop lives in
26
+ :func:`_cache_fetch.run_state_refresh`; the open enumeration in
27
+ :func:`_cache_fetch.list_open_issue_numbers`. This module owns the
28
+ on-disk scan and the ``cache_put`` binding.
29
+
30
+ Import-cycle note
31
+ -----------------
32
+ ``cache`` imports ``cache_refresh_closed`` from here at module load, so
33
+ this module MUST NOT import ``cache`` at the top level. The single
34
+ ``import cache`` lives inside :func:`cache_refresh_closed`, by which
35
+ time ``cache`` is fully initialised.
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import contextlib
41
+ import json
42
+ import sys
43
+ from pathlib import Path
44
+ from typing import Any
45
+
46
+ # Make sibling ``scripts`` modules importable when invoked via
47
+ # ``python scripts/cache.py`` from a Taskfile dispatch.
48
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
49
+
50
+ from _cache_fetch import ( # noqa: E402 -- intentional sys.path tweak
51
+ StateRefreshReport,
52
+ list_open_issue_numbers,
53
+ run_state_refresh,
54
+ )
55
+
56
+ #: Default cache source. v1 ships ``github-issue`` only (mirrors
57
+ #: ``cache.ALLOWED_SOURCES``).
58
+ _DEFAULT_SOURCE = "github-issue"
59
+
60
+
61
+ def scan_cached_open_entries(
62
+ repo: str,
63
+ *,
64
+ source: str,
65
+ cache_root: Path,
66
+ ) -> list[tuple[int, dict[str, Any]]]:
67
+ """Return ``(number, raw)`` for on-disk cache entries that say ``state=open``.
68
+
69
+ Walks ``<cache_root>/<source>/<owner>/<name>/<N>/raw.json`` and yields
70
+ the parsed payloads whose normalised ``state`` is ``open`` -- the
71
+ candidate set :func:`cache_refresh_closed` revisits against the live
72
+ open enumeration. The lowercase compare mirrors the #1236 reader-side
73
+ normalisation so a pre-#1239 cache carrying ``"state": "OPEN"`` is
74
+ still considered.
75
+ """
76
+ if "/" not in repo:
77
+ return []
78
+ owner, name = repo.split("/", 1)
79
+ base = cache_root / source / owner / name
80
+ if not base.is_dir():
81
+ return []
82
+ out: list[tuple[int, dict[str, Any]]] = []
83
+ for entry in sorted(base.iterdir()):
84
+ if not entry.is_dir() or not entry.name.isdigit():
85
+ continue
86
+ raw_path = entry / "raw.json"
87
+ if not raw_path.is_file():
88
+ continue
89
+ try:
90
+ raw = json.loads(raw_path.read_text(encoding="utf-8"))
91
+ except (OSError, json.JSONDecodeError):
92
+ continue
93
+ if not isinstance(raw, dict):
94
+ continue
95
+ number = raw.get("number")
96
+ if not isinstance(number, int):
97
+ with contextlib.suppress(ValueError, TypeError):
98
+ number = int(entry.name)
99
+ if not isinstance(number, int):
100
+ continue
101
+ state_raw = raw.get("state") or "open"
102
+ state = state_raw.lower() if isinstance(state_raw, str) else "open"
103
+ if state != "open":
104
+ continue
105
+ out.append((int(number), raw))
106
+ return out
107
+
108
+
109
+ def cache_refresh_closed(
110
+ *,
111
+ source: str,
112
+ repo: str,
113
+ ttl_seconds: int | None = None,
114
+ delay_ms: int | None = None,
115
+ limit: int = 1000,
116
+ cache_root: Path | None = None,
117
+ ) -> StateRefreshReport:
118
+ """Rewrite cached-open entries that closed upstream to ``state=closed`` (#1476).
119
+
120
+ See the module docstring for the three-step reconciliation. Returns a
121
+ :class:`_cache_fetch.StateRefreshReport`. When no cached-open entries
122
+ exist the open enumeration is skipped entirely (an empty report is
123
+ returned without any network call).
124
+
125
+ Raises:
126
+ cache.CacheError: On an unsupported source, a malformed repo, or a
127
+ negative ``delay_ms`` -- so CLI / Taskfile callers exit non-zero
128
+ via the same error class as the rest of the cache surface.
129
+ """
130
+ # Deferred import breaks the cache <-> _cache_refresh cycle (see the
131
+ # module docstring). ``cache`` is fully initialised by call time.
132
+ import cache
133
+
134
+ if source != _DEFAULT_SOURCE:
135
+ raise cache.CacheError(
136
+ f"cache:refresh-closed source={source!r} not supported in v1 "
137
+ "(supports: github-issue only; other sources deferred to v2)"
138
+ )
139
+ if not cache._REPO_RE.match(repo):
140
+ raise cache.CacheError(
141
+ f"invalid --repo {repo!r}: expected 'owner/repo' "
142
+ "(alphanumerics, '.', '_', '-' only)"
143
+ )
144
+ effective_delay = delay_ms if delay_ms is not None else cache.DEFAULT_DELAY_MS
145
+ if effective_delay < 0:
146
+ raise cache.CacheError(f"--delay-ms must be >= 0 (got {effective_delay!r})")
147
+
148
+ root = cache_root if cache_root is not None else cache.DEFAULT_CACHE_ROOT
149
+ cached_open = scan_cached_open_entries(repo, source=source, cache_root=Path(root))
150
+ if not cached_open:
151
+ return StateRefreshReport()
152
+ open_numbers = list_open_issue_numbers(repo, state="open", limit=limit)
153
+
154
+ def _do_put(key: str, raw: dict[str, Any]) -> None:
155
+ cache.cache_put(source, key, raw, ttl_seconds=ttl_seconds, cache_root=cache_root)
156
+
157
+ return run_state_refresh(
158
+ repo=repo,
159
+ open_numbers=open_numbers,
160
+ cached_open=cached_open,
161
+ do_put=_do_put,
162
+ delay_ms=effective_delay,
163
+ )