@deftai/directive-content 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/.githooks/pre-push +10 -9
  2. package/Taskfile.yml +57 -67
  3. package/UPGRADING.md +1 -1
  4. package/docs/assets/directive-lifecycle-diagram.png +0 -0
  5. package/docs/directive-lifecycle.md +73 -0
  6. package/docs/getting-started.md +5 -1
  7. package/package.json +3 -3
  8. package/packs/rules/rules-pack-0.1.json +3 -3
  9. package/packs/skills/skills-pack-0.1.json +22 -22
  10. package/scm/github.md +20 -2
  11. package/tasks/change.yml +16 -31
  12. package/tasks/ci.yml +8 -0
  13. package/tasks/commit.yml +12 -19
  14. package/tasks/core.yml +10 -0
  15. package/tasks/engine.yml +42 -0
  16. package/tasks/framework.yml +3 -0
  17. package/tasks/install.yml +20 -19
  18. package/tasks/migrate.yml +26 -15
  19. package/tasks/project.yml +16 -0
  20. package/tasks/relocate.yml +18 -48
  21. package/tasks/toolchain.yml +15 -5
  22. package/tasks/vbrief.yml +4 -3
  23. package/tasks/verify.yml +12 -14
  24. package/templates/agents-entry.md +1 -2
  25. package/scripts/_agents_md.py +0 -494
  26. package/scripts/_cache_fetch.py +0 -635
  27. package/scripts/_cache_quota.py +0 -529
  28. package/scripts/_cache_refresh.py +0 -163
  29. package/scripts/_cache_validate.py +0 -209
  30. package/scripts/_content_root.py +0 -42
  31. package/scripts/_doctor_state.py +0 -277
  32. package/scripts/_event_detect.py +0 -305
  33. package/scripts/_events.py +0 -514
  34. package/scripts/_lifecycle_hygiene.py +0 -568
  35. package/scripts/_pathspec.py +0 -91
  36. package/scripts/_policy_show_cli.py +0 -266
  37. package/scripts/_precutover.py +0 -92
  38. package/scripts/_project_context.py +0 -224
  39. package/scripts/_project_definition_io.py +0 -164
  40. package/scripts/_relocate_snapshot.py +0 -209
  41. package/scripts/_relocate_states.py +0 -343
  42. package/scripts/_resolve_preflight_path.py +0 -152
  43. package/scripts/_safe_subprocess.py +0 -167
  44. package/scripts/_session_start_hook.py +0 -205
  45. package/scripts/_sor_gate_diff.py +0 -365
  46. package/scripts/_stdio_utf8.py +0 -59
  47. package/scripts/_triage_bootstrap_gitignore.py +0 -904
  48. package/scripts/_triage_classify_cli.py +0 -122
  49. package/scripts/_triage_queue_cli.py +0 -625
  50. package/scripts/_triage_scope_cli.py +0 -343
  51. package/scripts/_triage_scope_drift_cli.py +0 -121
  52. package/scripts/_triage_scope_ignores.py +0 -286
  53. package/scripts/_triage_scope_milestone.py +0 -432
  54. package/scripts/_triage_scope_mutations.py +0 -337
  55. package/scripts/_triage_scope_renderers.py +0 -207
  56. package/scripts/_triage_smoketest_stages.py +0 -674
  57. package/scripts/_triage_subscribe_cli.py +0 -140
  58. package/scripts/_triage_welcome_cli.py +0 -421
  59. package/scripts/_vbrief_build.py +0 -239
  60. package/scripts/_vbrief_fidelity.py +0 -479
  61. package/scripts/_vbrief_legacy.py +0 -589
  62. package/scripts/_vbrief_reconciliation.py +0 -883
  63. package/scripts/_vbrief_routing.py +0 -277
  64. package/scripts/_vbrief_safety.py +0 -778
  65. package/scripts/_vbrief_sources.py +0 -312
  66. package/scripts/_vbrief_speckit.py +0 -262
  67. package/scripts/_vbrief_story_quality.py +0 -353
  68. package/scripts/_vbrief_validation.py +0 -299
  69. package/scripts/build_dist.py +0 -412
  70. package/scripts/cache.py +0 -1078
  71. package/scripts/cache_scanner.py +0 -745
  72. package/scripts/candidates_log.py +0 -432
  73. package/scripts/capacity_backfill.py +0 -680
  74. package/scripts/capacity_show.py +0 -653
  75. package/scripts/ci_local.py +0 -689
  76. package/scripts/code_structure_validate.py +0 -765
  77. package/scripts/codebase_default_extractor.py +0 -495
  78. package/scripts/codebase_map.py +0 -304
  79. package/scripts/codebase_map_fresh.py +0 -104
  80. package/scripts/codebase_projection_registry.py +0 -94
  81. package/scripts/codebase_provider.py +0 -582
  82. package/scripts/doctor.py +0 -2551
  83. package/scripts/framework_commands.py +0 -505
  84. package/scripts/gh_rest.py +0 -882
  85. package/scripts/github_auth_modes.py +0 -437
  86. package/scripts/github_body.py +0 -292
  87. package/scripts/ip_risk.py +0 -531
  88. package/scripts/issue_emit.py +0 -670
  89. package/scripts/issue_ingest.py +0 -1064
  90. package/scripts/migrate_preflight.py +0 -418
  91. package/scripts/migrate_vbrief.py +0 -2677
  92. package/scripts/monitor_pr.py +0 -401
  93. package/scripts/pack_migrate_lessons.py +0 -336
  94. package/scripts/pack_migrate_patterns.py +0 -254
  95. package/scripts/pack_migrate_rules.py +0 -350
  96. package/scripts/pack_migrate_skills.py +0 -423
  97. package/scripts/pack_migrate_strategies.py +0 -311
  98. package/scripts/pack_migrate_swarm_spec.py +0 -250
  99. package/scripts/pack_render.py +0 -434
  100. package/scripts/packs_slice.py +0 -712
  101. package/scripts/platform_capabilities.py +0 -336
  102. package/scripts/policy.py +0 -2826
  103. package/scripts/policy_set.py +0 -324
  104. package/scripts/pr_check_closing_keywords.py +0 -524
  105. package/scripts/pr_check_protected_issues.py +0 -267
  106. package/scripts/pr_merge_readiness.py +0 -1004
  107. package/scripts/pr_wait_mergeable.py +0 -669
  108. package/scripts/prd_render.py +0 -159
  109. package/scripts/preflight_architecture_sor.py +0 -974
  110. package/scripts/preflight_branch.py +0 -289
  111. package/scripts/preflight_cache.py +0 -974
  112. package/scripts/preflight_gh.py +0 -721
  113. package/scripts/preflight_implementation.py +0 -272
  114. package/scripts/preflight_story_start.py +0 -838
  115. package/scripts/preflight_wip_cap.py +0 -149
  116. package/scripts/probe_session.py +0 -545
  117. package/scripts/project_render.py +0 -293
  118. package/scripts/quarantine_ext.py +0 -237
  119. package/scripts/reconcile_issues.py +0 -1442
  120. package/scripts/refresh-path.ps1 +0 -107
  121. package/scripts/release.py +0 -2030
  122. package/scripts/release_e2e.py +0 -1011
  123. package/scripts/release_publish.py +0 -486
  124. package/scripts/release_rollback.py +0 -980
  125. package/scripts/relocate.py +0 -1034
  126. package/scripts/resolve_changelog_unreleased.py +0 -667
  127. package/scripts/resolve_version.py +0 -490
  128. package/scripts/resume_conditions.py +0 -706
  129. package/scripts/ritual_sentinel.py +0 -609
  130. package/scripts/roadmap_render.py +0 -635
  131. package/scripts/rule_ownership_lint.py +0 -325
  132. package/scripts/scm.py +0 -591
  133. package/scripts/scope_audit_log.py +0 -387
  134. package/scripts/scope_decompose.py +0 -654
  135. package/scripts/scope_demote.py +0 -509
  136. package/scripts/scope_lifecycle.py +0 -1126
  137. package/scripts/scope_undo.py +0 -772
  138. package/scripts/session_start.py +0 -406
  139. package/scripts/setup_ghx.py +0 -339
  140. package/scripts/setup_windows.ps1 +0 -220
  141. package/scripts/slice_audit.py +0 -585
  142. package/scripts/slice_record.py +0 -530
  143. package/scripts/slice_record_existing.py +0 -692
  144. package/scripts/slug_normalize.py +0 -178
  145. package/scripts/spec_render.py +0 -477
  146. package/scripts/spec_validate.py +0 -238
  147. package/scripts/subagent_monitor.py +0 -658
  148. package/scripts/swarm_complete_cohort.py +0 -644
  149. package/scripts/swarm_launch.py +0 -1206
  150. package/scripts/swarm_readiness.py +0 -554
  151. package/scripts/swarm_verify_review_clean.py +0 -438
  152. package/scripts/swarm_worktrees.py +0 -497
  153. package/scripts/toolchain-check.py +0 -52
  154. package/scripts/triage_actions.py +0 -871
  155. package/scripts/triage_bootstrap.py +0 -1153
  156. package/scripts/triage_bulk.py +0 -630
  157. package/scripts/triage_classify.py +0 -932
  158. package/scripts/triage_help.py +0 -1685
  159. package/scripts/triage_queue.py +0 -1944
  160. package/scripts/triage_reconcile.py +0 -581
  161. package/scripts/triage_refresh.py +0 -643
  162. package/scripts/triage_scope.py +0 -999
  163. package/scripts/triage_scope_drift.py +0 -575
  164. package/scripts/triage_smoketest.py +0 -396
  165. package/scripts/triage_subscribe.py +0 -399
  166. package/scripts/triage_summary.py +0 -1011
  167. package/scripts/triage_welcome.py +0 -1178
  168. package/scripts/ts_check_lane.py +0 -86
  169. package/scripts/validate-links.py +0 -64
  170. package/scripts/validate_strategy_output.py +0 -212
  171. package/scripts/vbrief_activate.py +0 -228
  172. package/scripts/vbrief_migrate_conformance.py +0 -368
  173. package/scripts/vbrief_reconcile_graph.py +0 -306
  174. package/scripts/vbrief_reconcile_labels.py +0 -460
  175. package/scripts/vbrief_reconcile_umbrellas.py +0 -741
  176. package/scripts/vbrief_validate.py +0 -1144
  177. package/scripts/verify-stubs.py +0 -61
  178. package/scripts/verify_capacity.py +0 -160
  179. package/scripts/verify_encoding.py +0 -699
  180. package/scripts/verify_hooks_installed.py +0 -206
  181. package/scripts/verify_investigation.py +0 -360
  182. package/scripts/verify_judgment_gates.py +0 -827
  183. package/scripts/verify_no_task_runtime.py +0 -171
  184. package/scripts/verify_scm_boundary.py +0 -509
  185. package/scripts/verify_session_ritual.py +0 -389
  186. package/scripts/verify_tools.py +0 -426
  187. package/scripts/verify_vbrief_conformance.py +0 -478
@@ -1,529 +0,0 @@
1
- #!/usr/bin/env python3
2
- """_cache_quota.py -- size cap, entry cap, LRU eviction for the cache (#947).
3
-
4
- Extracted from :mod:`cache` to keep the parent module under the deft
5
- 1000-line MUST limit (mirrors the existing ``_cache_fetch`` /
6
- ``_cache_validate`` split). The module owns:
7
-
8
- - Cap resolution from env vars (``DEFT_CACHE_MAX_BYTES``,
9
- ``DEFT_CACHE_MAX_ENTRIES``) with sensible defaults baked in
10
- (100 MB / 10,000 entries; sized from the v0.26.0 smoke evidence
11
- documented in ``docs/smoke-2026-05-07-v0.26.0-rerun.md`` --
12
- 320 entries = 3.03 MB, ~10 KB/entry average).
13
- - Usage scanning across the cache root: enumerate every entry's
14
- ``meta.json``, sum ``size_bytes`` for the byte total, count entries,
15
- and read ``meta.json`` ``mtime`` for the LRU timestamp.
16
- - LRU eviction: pick the oldest entry by ``(mtime, path)`` (path tie-break
17
- for filesystems with 1s mtime granularity), remove the directory,
18
- return the freed bytes + record so the caller can append a
19
- ``cache:evict`` audit row.
20
- - :class:`CacheCapBreachedError`: raised when caps cannot be honored
21
- even after eviction (e.g. the new entry alone exceeds the byte cap,
22
- or every entry on disk is the just-written one). The cache CLI maps
23
- this to exit-code 3 so callers can distinguish "schema invalid"
24
- (exit 2) from "honoring the cap is impossible" (exit 3).
25
-
26
- LRU signal: the ``meta.json`` mtime is touched (single ``os.utime``
27
- syscall) on each ``cache:get`` hit. A v0.26.0 cache tree's existing
28
- entries already have a valid mtime (the original write timestamp),
29
- so this is backward-compatible without migration. The schema-bump
30
- alternative (add a ``last_accessed_at`` field) was rejected because
31
- it would (a) force a write-on-read of meta.json including schema
32
- re-validation, (b) require coordinated edits to the FROZEN
33
- ``vbrief/schemas/cache-meta.schema.json`` plus the in-module
34
- validator, and (c) impose a migration burden on pre-existing
35
- cache trees. See the #947 vBRIEF ``DesignChoice`` narrative.
36
- """
37
-
38
- from __future__ import annotations
39
-
40
- import contextlib
41
- import json
42
- import os
43
- import shutil
44
- from collections.abc import Callable, Iterable
45
- from dataclasses import dataclass
46
- from pathlib import Path
47
-
48
- # ---------------------------------------------------------------------------
49
- # Defaults + env-var contract
50
- # ---------------------------------------------------------------------------
51
-
52
- #: 100 MB. Defensible at the smoke-evidenced ~10 KB/entry average;
53
- #: a 50,000-issue mono-repo would consume ~500 MB without a cap.
54
- DEFAULT_MAX_BYTES: int = 100 * 1024 * 1024
55
-
56
- #: 10,000 entries. Equivalent to the byte cap at the smoke-evidenced
57
- #: per-entry average, so either threshold should trip first depending
58
- #: on the actual mix of small vs large issues in the working set.
59
- DEFAULT_MAX_ENTRIES: int = 10_000
60
-
61
- #: A cap value of 0 (or a non-numeric env value) disables that cap.
62
- ENV_MAX_BYTES: str = "DEFT_CACHE_MAX_BYTES"
63
- ENV_MAX_ENTRIES: str = "DEFT_CACHE_MAX_ENTRIES"
64
-
65
- #: Sentinel meaning "the cap is disabled" -- evictor never trips for it.
66
- CAP_DISABLED: int = 0
67
-
68
-
69
- class CacheCapBreachedError(RuntimeError):
70
- """Raised when the cache cap cannot be honored even after eviction.
71
-
72
- Attributes mirror the structured exit shape that callers (and the
73
- CLI exit-3 path) display so an operator can see *why* the put was
74
- refused and what they could free up to make room.
75
- """
76
-
77
- def __init__(
78
- self,
79
- *,
80
- reason: str,
81
- max_bytes: int,
82
- max_entries: int,
83
- current_bytes: int,
84
- current_entries: int,
85
- incoming_bytes: int,
86
- ) -> None:
87
- self.reason = reason
88
- self.max_bytes = max_bytes
89
- self.max_entries = max_entries
90
- self.current_bytes = current_bytes
91
- self.current_entries = current_entries
92
- self.incoming_bytes = incoming_bytes
93
- super().__init__(
94
- f"cache cap breached ({reason}): "
95
- f"max_bytes={max_bytes} max_entries={max_entries} "
96
- f"current_bytes={current_bytes} current_entries={current_entries} "
97
- f"incoming_bytes={incoming_bytes}"
98
- )
99
-
100
-
101
- # ---------------------------------------------------------------------------
102
- # Cap resolution
103
- # ---------------------------------------------------------------------------
104
-
105
-
106
- @dataclass(frozen=True)
107
- class CacheCaps:
108
- """Resolved cap thresholds in effect for one operation.
109
-
110
- A value of :data:`CAP_DISABLED` (0) means the corresponding cap is
111
- not enforced. Negative env values are clamped to 0 (disabled) rather
112
- than raising -- the caps are an operator-friendly knob, not a
113
- strict-mode setting.
114
- """
115
-
116
- max_bytes: int
117
- max_entries: int
118
-
119
- @property
120
- def bytes_enforced(self) -> bool:
121
- return self.max_bytes > 0
122
-
123
- @property
124
- def entries_enforced(self) -> bool:
125
- return self.max_entries > 0
126
-
127
- @property
128
- def any_enforced(self) -> bool:
129
- return self.bytes_enforced or self.entries_enforced
130
-
131
-
132
- def _parse_int_env(name: str, default: int) -> int:
133
- """Parse an int from ``os.environ[name]``, falling back to ``default``.
134
-
135
- Non-numeric or negative values resolve to ``CAP_DISABLED`` so a typo
136
- in the env var doesn't masquerade as an enforced cap. ``""`` (empty
137
- string) means "use the default" (consistent with how shell-set-but-
138
- -unset env vars usually behave).
139
- """
140
- raw = os.environ.get(name)
141
- if raw is None or raw == "":
142
- return default
143
- try:
144
- value = int(raw)
145
- except ValueError:
146
- return CAP_DISABLED
147
- return value if value >= 0 else CAP_DISABLED
148
-
149
-
150
- def resolve_caps(
151
- *,
152
- max_bytes: int | None = None,
153
- max_entries: int | None = None,
154
- ) -> CacheCaps:
155
- """Resolve the active caps from explicit args, env vars, or defaults.
156
-
157
- Resolution order (highest precedence first):
158
-
159
- 1. Explicit ``max_bytes`` / ``max_entries`` kwargs (used by tests
160
- that need deterministic caps regardless of process env).
161
- 2. ``DEFT_CACHE_MAX_BYTES`` / ``DEFT_CACHE_MAX_ENTRIES`` env vars.
162
- 3. Module defaults (:data:`DEFAULT_MAX_BYTES`,
163
- :data:`DEFAULT_MAX_ENTRIES`).
164
- """
165
- if max_bytes is None:
166
- max_bytes = _parse_int_env(ENV_MAX_BYTES, DEFAULT_MAX_BYTES)
167
- if max_entries is None:
168
- max_entries = _parse_int_env(ENV_MAX_ENTRIES, DEFAULT_MAX_ENTRIES)
169
- if max_bytes < 0:
170
- max_bytes = CAP_DISABLED
171
- if max_entries < 0:
172
- max_entries = CAP_DISABLED
173
- return CacheCaps(max_bytes=max_bytes, max_entries=max_entries)
174
-
175
-
176
- # ---------------------------------------------------------------------------
177
- # Usage scan
178
- # ---------------------------------------------------------------------------
179
-
180
-
181
- @dataclass(frozen=True)
182
- class EntryUsage:
183
- """One on-disk cache entry seen by the usage scan.
184
-
185
- ``size_bytes`` is read from the entry's ``meta.json`` (authoritative
186
- -- written at cache:put time and validated against the schema). If
187
- the meta.json is missing or unparseable the entry is still listed
188
- with ``size_bytes=0`` so eviction can drain corrupt entries first
189
- (they cannot be served by ``cache_get`` anyway).
190
- """
191
-
192
- entry_dir: Path
193
- source: str
194
- key: str
195
- size_bytes: int
196
- last_accessed: float
197
- meta_present: bool
198
-
199
-
200
- @dataclass(frozen=True)
201
- class UsageReport:
202
- """Aggregate usage at the time of the scan."""
203
-
204
- total_bytes: int
205
- total_entries: int
206
- entries: tuple[EntryUsage, ...]
207
-
208
-
209
- def _read_meta_size(meta_path: Path) -> tuple[int, str, str, bool]:
210
- """Read ``size_bytes`` + (source, key) from a meta.json.
211
-
212
- Returns ``(size_bytes, source, key, meta_present)``. On parse
213
- failure, returns zeros so the corrupt entry sorts as evictable
214
- without polluting the byte total.
215
- """
216
- try:
217
- meta = json.loads(meta_path.read_text(encoding="utf-8"))
218
- except (OSError, json.JSONDecodeError):
219
- return 0, "", "", False
220
- if not isinstance(meta, dict):
221
- return 0, "", "", False
222
- size = meta.get("size_bytes")
223
- src = meta.get("source")
224
- key = meta.get("key")
225
- if not isinstance(size, int) or size < 0:
226
- size = 0
227
- if not isinstance(src, str):
228
- src = ""
229
- if not isinstance(key, str):
230
- key = ""
231
- return size, src, key, True
232
-
233
-
234
- def scan_usage(
235
- cache_root: Path,
236
- *,
237
- sources: Iterable[str],
238
- ) -> UsageReport:
239
- """Walk the cache root, returning one :class:`EntryUsage` per entry.
240
-
241
- ``sources`` is the cache layer's ``ALLOWED_SOURCES`` tuple. The
242
- scan only descends into those subtrees so audit-log / scratch files
243
- at the cache root level don't pollute the count.
244
- """
245
- if not cache_root.exists():
246
- return UsageReport(total_bytes=0, total_entries=0, entries=())
247
- entries: list[EntryUsage] = []
248
- total_bytes = 0
249
- for src in sources:
250
- src_root = cache_root / src
251
- if not src_root.exists():
252
- continue
253
- # Snapshot before iteration: callers may evict mid-walk.
254
- for meta_path in list(src_root.rglob("meta.json")):
255
- size, meta_src, meta_key, present = _read_meta_size(meta_path)
256
- try:
257
- mtime = meta_path.stat().st_mtime
258
- except OSError:
259
- mtime = 0.0
260
- entries.append(
261
- EntryUsage(
262
- entry_dir=meta_path.parent,
263
- source=meta_src or src,
264
- key=meta_key
265
- or str(meta_path.parent.relative_to(src_root)).replace(
266
- os.sep, "/"
267
- ),
268
- size_bytes=size,
269
- last_accessed=mtime,
270
- meta_present=present,
271
- )
272
- )
273
- total_bytes += size
274
- return UsageReport(
275
- total_bytes=total_bytes,
276
- total_entries=len(entries),
277
- entries=tuple(entries),
278
- )
279
-
280
-
281
- # ---------------------------------------------------------------------------
282
- # LRU eviction primitives
283
- # ---------------------------------------------------------------------------
284
-
285
-
286
- def lru_order(usage: UsageReport) -> tuple[EntryUsage, ...]:
287
- """Return entries oldest-first by (mtime, entry_dir-as-str).
288
-
289
- The path tie-break is what makes eviction deterministic across
290
- filesystems with 1s mtime granularity (most ext4 / NTFS configs).
291
- Tests can rely on a stable order even for entries written within
292
- the same second.
293
- """
294
- return tuple(sorted(usage.entries, key=lambda e: (e.last_accessed, str(e.entry_dir))))
295
-
296
-
297
- def cap_breached(
298
- usage: UsageReport,
299
- caps: CacheCaps,
300
- *,
301
- incoming_bytes: int = 0,
302
- incoming_entries: int = 0,
303
- ) -> bool:
304
- """Return True iff ``usage`` plus a hypothetical add breaches caps.
305
-
306
- ``incoming_bytes`` is the net byte delta the caller plans to add
307
- (already accounting for any existing entry being replaced).
308
- ``incoming_entries`` is the entry-count delta (0 for a re-put of an
309
- existing key, 1 for a brand-new entry, 0 for prune-to-cap).
310
- """
311
- if caps.bytes_enforced and usage.total_bytes + incoming_bytes > caps.max_bytes:
312
- return True
313
- return bool(
314
- caps.entries_enforced
315
- and usage.total_entries + incoming_entries > caps.max_entries
316
- )
317
-
318
-
319
- def evict_lru(
320
- cache_root: Path,
321
- *,
322
- sources: Iterable[str],
323
- caps: CacheCaps,
324
- incoming_bytes: int = 0,
325
- incoming_entries: int = 0,
326
- protect_keys: Iterable[tuple[str, str]] = (),
327
- on_evict: EvictCallback | None = None,
328
- ) -> list[EntryUsage]:
329
- """Evict LRU entries until the cap fits the incoming delta.
330
-
331
- Single-pass O(n log n): one ``scan_usage`` call up-front, then iterate
332
- the LRU-ordered candidate list maintaining running totals so each
333
- eviction does not re-scan the cache root (the previous O(n^2)
334
- re-scan pattern was a P2 finding from the iter-0 review).
335
-
336
- Returns the list of evicted :class:`EntryUsage` records, oldest
337
- first, in eviction order.
338
-
339
- Args:
340
- cache_root: Cache root path.
341
- sources: Cache layer's ALLOWED_SOURCES tuple.
342
- caps: Resolved cap thresholds.
343
- incoming_bytes: Bytes the caller plans to add post-eviction.
344
- May be negative for a shrinking re-put (caller subtracts
345
- the existing entry's size).
346
- incoming_entries: Entry-count delta (0 for re-put / prune-to-
347
- cap, 1 for a brand-new entry).
348
- protect_keys: Iterable of (source, key) pairs that MUST NOT be
349
- evicted (typically the entry currently being written, so a
350
- re-put cannot self-evict).
351
- on_evict: Optional callback invoked once per evicted entry
352
- BEFORE the directory is removed. Receives the victim, the
353
- already-narrowed reason string (``"size_cap"`` /
354
- ``"entry_cap"`` / ``"size_cap+entry_cap"``) reflecting which
355
- cap was actually exceeded at the moment of eviction, and
356
- the resolved caps for caller introspection.
357
- """
358
- if not caps.any_enforced:
359
- return []
360
- protect = {(s, k) for s, k in protect_keys}
361
- usage = scan_usage(cache_root, sources=sources)
362
- if not cap_breached(
363
- usage,
364
- caps,
365
- incoming_bytes=incoming_bytes,
366
- incoming_entries=incoming_entries,
367
- ):
368
- return []
369
- ordered = [e for e in lru_order(usage) if (e.source, e.key) not in protect]
370
- if not ordered:
371
- # Every entry is protected -- caller decides what to do.
372
- return []
373
- evicted: list[EntryUsage] = []
374
- running_bytes = usage.total_bytes
375
- running_entries = usage.total_entries
376
- for victim in ordered:
377
- bytes_breach = (
378
- caps.bytes_enforced
379
- and running_bytes + incoming_bytes > caps.max_bytes
380
- )
381
- entries_breach = (
382
- caps.entries_enforced
383
- and running_entries + incoming_entries > caps.max_entries
384
- )
385
- if not (bytes_breach or entries_breach):
386
- break
387
- reasons: list[str] = []
388
- if bytes_breach:
389
- reasons.append("size_cap")
390
- if entries_breach:
391
- reasons.append("entry_cap")
392
- reason = "+".join(reasons) or "unknown"
393
- if on_evict is not None:
394
- on_evict(victim, reason, caps)
395
- # Concurrent removal -- treat as a no-op.
396
- with contextlib.suppress(FileNotFoundError):
397
- shutil.rmtree(victim.entry_dir)
398
- evicted.append(victim)
399
- running_bytes -= victim.size_bytes
400
- running_entries -= 1
401
- return evicted
402
-
403
-
404
- #: Type alias for the on_evict callback used by :func:`evict_lru` and
405
- #: :func:`enforce_caps`. Invoked once per evicted entry BEFORE the
406
- #: directory is removed. Signature is ``(victim, reason, caps)`` where
407
- #: ``reason`` is the already-narrowed breach descriptor reflecting which
408
- #: cap was actually exceeded at the moment of eviction (P1 fix from the
409
- #: iter-1 review: the previous ``(victim, caps, incoming_bytes)`` shape
410
- #: forced the audit callback to recompute reason without enough context
411
- #: and ended up tagging every record ``size_cap+entry_cap`` under the
412
- #: defaults).
413
- EvictCallback = Callable[[EntryUsage, str, CacheCaps], None]
414
-
415
-
416
- # ---------------------------------------------------------------------------
417
- # High-level enforce_caps entry point
418
- # ---------------------------------------------------------------------------
419
-
420
-
421
- @dataclass(frozen=True)
422
- class EnforceResult:
423
- """Outcome of an enforce_caps() call.
424
-
425
- ``evicted`` is the list of entries removed during enforcement. Empty
426
- when the cache was already under cap or no caps were enforced.
427
- ``would_breach`` is True iff eviction could not free enough -- the
428
- caller (cache:put) raises :class:`CacheCapBreachedError` in that
429
- case; prune-to-cap surfaces a structured warning.
430
- """
431
-
432
- evicted: tuple[EntryUsage, ...]
433
- final_usage: UsageReport
434
- would_breach: bool
435
-
436
-
437
- def predict_eviction_set(
438
- cache_root: Path,
439
- *,
440
- sources: Iterable[str],
441
- caps: CacheCaps,
442
- ) -> tuple[EntryUsage, ...]:
443
- """Compute the LRU eviction set without removing anything (dry-run).
444
-
445
- Walks entries in LRU order, accumulating evictions until the
446
- projected running totals fit under the caps. Used by
447
- ``cache:prune --to-cap --dry-run`` so operators can preview what
448
- would be evicted before committing.
449
- """
450
- if not caps.any_enforced:
451
- return ()
452
- usage = scan_usage(cache_root, sources=sources)
453
- if not cap_breached(usage, caps):
454
- return ()
455
- ordered = lru_order(usage)
456
- evicted: list[EntryUsage] = []
457
- running_bytes = usage.total_bytes
458
- running_entries = usage.total_entries
459
- for entry in ordered:
460
- if not (
461
- (caps.bytes_enforced and running_bytes > caps.max_bytes)
462
- or (caps.entries_enforced and running_entries > caps.max_entries)
463
- ):
464
- break
465
- evicted.append(entry)
466
- running_bytes -= entry.size_bytes
467
- running_entries -= 1
468
- return tuple(evicted)
469
-
470
-
471
- def enforce_caps(
472
- cache_root: Path,
473
- *,
474
- sources: Iterable[str],
475
- caps: CacheCaps | None = None,
476
- incoming_bytes: int = 0,
477
- incoming_entries: int = 0,
478
- protect_keys: Iterable[tuple[str, str]] = (),
479
- on_evict: EvictCallback | None = None,
480
- ) -> EnforceResult:
481
- """Evict LRU entries until the cap fits the incoming delta.
482
-
483
- Wrap :func:`evict_lru` with a final cap-breach check so callers can
484
- differentiate "evicted cleanly" from "evicted but still breached".
485
- """
486
- resolved = caps if caps is not None else resolve_caps()
487
- evicted = evict_lru(
488
- cache_root,
489
- sources=sources,
490
- caps=resolved,
491
- incoming_bytes=incoming_bytes,
492
- incoming_entries=incoming_entries,
493
- protect_keys=protect_keys,
494
- on_evict=on_evict,
495
- )
496
- final_usage = scan_usage(cache_root, sources=sources)
497
- breached = cap_breached(
498
- final_usage,
499
- resolved,
500
- incoming_bytes=incoming_bytes,
501
- incoming_entries=incoming_entries,
502
- )
503
- return EnforceResult(
504
- evicted=tuple(evicted),
505
- final_usage=final_usage,
506
- would_breach=breached,
507
- )
508
-
509
-
510
- __all__ = [
511
- "CAP_DISABLED",
512
- "CacheCapBreachedError",
513
- "CacheCaps",
514
- "DEFAULT_MAX_BYTES",
515
- "DEFAULT_MAX_ENTRIES",
516
- "ENV_MAX_BYTES",
517
- "ENV_MAX_ENTRIES",
518
- "EnforceResult",
519
- "EntryUsage",
520
- "EvictCallback",
521
- "UsageReport",
522
- "cap_breached",
523
- "enforce_caps",
524
- "evict_lru",
525
- "lru_order",
526
- "predict_eviction_set",
527
- "resolve_caps",
528
- "scan_usage",
529
- ]
@@ -1,163 +0,0 @@
1
- #!/usr/bin/env python3
2
- """_cache_refresh.py -- cache:refresh-closed reconciliation (#1476).
3
-
4
- Split out of :mod:`cache` so the parent stays under the deft 1000-line
5
- MUST limit (mirrors the existing ``_cache_fetch`` / ``_cache_quota`` /
6
- ``_cache_validate`` split).
7
-
8
- Why this module exists
9
- ----------------------
10
- The default ``cache:fetch-all`` enumeration is ``state=open``. Once an
11
- upstream GitHub issue closes it drops out of that enumeration, so its
12
- cached ``raw.json`` is never rewritten and keeps saying ``state=open``
13
- for the full 7-day cache TTL. ``triage:queue`` then keeps ranking the
14
- closed issue as actionable untriaged work -- the #1322 shape recorded
15
- in #1476.
16
-
17
- This module reconciles that gap. :func:`cache_refresh_closed`:
18
-
19
- 1. Scans on-disk cache entries whose ``raw.json`` says ``state=open``.
20
- 2. Enumerates the current open issue numbers (the authoritative set).
21
- 3. For each cached-open entry NOT in the open enumeration, fetches its
22
- live single-issue state and, when closed, rewrites the entry via
23
- ``cache.cache_put`` so the next queue walk excludes it.
24
-
25
- The single-issue fetch + rewrite loop lives in
26
- :func:`_cache_fetch.run_state_refresh`; the open enumeration in
27
- :func:`_cache_fetch.list_open_issue_numbers`. This module owns the
28
- on-disk scan and the ``cache_put`` binding.
29
-
30
- Import-cycle note
31
- -----------------
32
- ``cache`` imports ``cache_refresh_closed`` from here at module load, so
33
- this module MUST NOT import ``cache`` at the top level. The single
34
- ``import cache`` lives inside :func:`cache_refresh_closed`, by which
35
- time ``cache`` is fully initialised.
36
- """
37
-
38
- from __future__ import annotations
39
-
40
- import contextlib
41
- import json
42
- import sys
43
- from pathlib import Path
44
- from typing import Any
45
-
46
- # Make sibling ``scripts`` modules importable when invoked via
47
- # ``python scripts/cache.py`` from a Taskfile dispatch.
48
- sys.path.insert(0, str(Path(__file__).resolve().parent))
49
-
50
- from _cache_fetch import ( # noqa: E402 -- intentional sys.path tweak
51
- StateRefreshReport,
52
- list_open_issue_numbers,
53
- run_state_refresh,
54
- )
55
-
56
- #: Default cache source. v1 ships ``github-issue`` only (mirrors
57
- #: ``cache.ALLOWED_SOURCES``).
58
- _DEFAULT_SOURCE = "github-issue"
59
-
60
-
61
- def scan_cached_open_entries(
62
- repo: str,
63
- *,
64
- source: str,
65
- cache_root: Path,
66
- ) -> list[tuple[int, dict[str, Any]]]:
67
- """Return ``(number, raw)`` for on-disk cache entries that say ``state=open``.
68
-
69
- Walks ``<cache_root>/<source>/<owner>/<name>/<N>/raw.json`` and yields
70
- the parsed payloads whose normalised ``state`` is ``open`` -- the
71
- candidate set :func:`cache_refresh_closed` revisits against the live
72
- open enumeration. The lowercase compare mirrors the #1236 reader-side
73
- normalisation so a pre-#1239 cache carrying ``"state": "OPEN"`` is
74
- still considered.
75
- """
76
- if "/" not in repo:
77
- return []
78
- owner, name = repo.split("/", 1)
79
- base = cache_root / source / owner / name
80
- if not base.is_dir():
81
- return []
82
- out: list[tuple[int, dict[str, Any]]] = []
83
- for entry in sorted(base.iterdir()):
84
- if not entry.is_dir() or not entry.name.isdigit():
85
- continue
86
- raw_path = entry / "raw.json"
87
- if not raw_path.is_file():
88
- continue
89
- try:
90
- raw = json.loads(raw_path.read_text(encoding="utf-8"))
91
- except (OSError, json.JSONDecodeError):
92
- continue
93
- if not isinstance(raw, dict):
94
- continue
95
- number = raw.get("number")
96
- if not isinstance(number, int):
97
- with contextlib.suppress(ValueError, TypeError):
98
- number = int(entry.name)
99
- if not isinstance(number, int):
100
- continue
101
- state_raw = raw.get("state") or "open"
102
- state = state_raw.lower() if isinstance(state_raw, str) else "open"
103
- if state != "open":
104
- continue
105
- out.append((int(number), raw))
106
- return out
107
-
108
-
109
- def cache_refresh_closed(
110
- *,
111
- source: str,
112
- repo: str,
113
- ttl_seconds: int | None = None,
114
- delay_ms: int | None = None,
115
- limit: int = 1000,
116
- cache_root: Path | None = None,
117
- ) -> StateRefreshReport:
118
- """Rewrite cached-open entries that closed upstream to ``state=closed`` (#1476).
119
-
120
- See the module docstring for the three-step reconciliation. Returns a
121
- :class:`_cache_fetch.StateRefreshReport`. When no cached-open entries
122
- exist the open enumeration is skipped entirely (an empty report is
123
- returned without any network call).
124
-
125
- Raises:
126
- cache.CacheError: On an unsupported source, a malformed repo, or a
127
- negative ``delay_ms`` -- so CLI / Taskfile callers exit non-zero
128
- via the same error class as the rest of the cache surface.
129
- """
130
- # Deferred import breaks the cache <-> _cache_refresh cycle (see the
131
- # module docstring). ``cache`` is fully initialised by call time.
132
- import cache
133
-
134
- if source != _DEFAULT_SOURCE:
135
- raise cache.CacheError(
136
- f"cache:refresh-closed source={source!r} not supported in v1 "
137
- "(supports: github-issue only; other sources deferred to v2)"
138
- )
139
- if not cache._REPO_RE.match(repo):
140
- raise cache.CacheError(
141
- f"invalid --repo {repo!r}: expected 'owner/repo' "
142
- "(alphanumerics, '.', '_', '-' only)"
143
- )
144
- effective_delay = delay_ms if delay_ms is not None else cache.DEFAULT_DELAY_MS
145
- if effective_delay < 0:
146
- raise cache.CacheError(f"--delay-ms must be >= 0 (got {effective_delay!r})")
147
-
148
- root = cache_root if cache_root is not None else cache.DEFAULT_CACHE_ROOT
149
- cached_open = scan_cached_open_entries(repo, source=source, cache_root=Path(root))
150
- if not cached_open:
151
- return StateRefreshReport()
152
- open_numbers = list_open_issue_numbers(repo, state="open", limit=limit)
153
-
154
- def _do_put(key: str, raw: dict[str, Any]) -> None:
155
- cache.cache_put(source, key, raw, ttl_seconds=ttl_seconds, cache_root=cache_root)
156
-
157
- return run_state_refresh(
158
- repo=repo,
159
- open_numbers=open_numbers,
160
- cached_open=cached_open,
161
- do_put=_do_put,
162
- delay_ms=effective_delay,
163
- )