@deftai/directive-content 0.55.1 → 0.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/.githooks/pre-commit +143 -0
  2. package/.githooks/pre-push +121 -0
  3. package/QUICK-START.md +13 -3
  4. package/Taskfile.yml +934 -0
  5. package/UPGRADING.md +82 -11
  6. package/events/README.md +3 -3
  7. package/package.json +5 -4
  8. package/packs/skills/skills-pack-0.1.json +22 -22
  9. package/scripts/_agents_md.py +494 -0
  10. package/scripts/_cache_fetch.py +635 -0
  11. package/scripts/_cache_quota.py +529 -0
  12. package/scripts/_cache_refresh.py +163 -0
  13. package/scripts/_cache_validate.py +209 -0
  14. package/scripts/_content_root.py +42 -0
  15. package/scripts/_doctor_state.py +277 -0
  16. package/scripts/_event_detect.py +305 -0
  17. package/scripts/_events.py +514 -0
  18. package/scripts/_lifecycle_hygiene.py +568 -0
  19. package/scripts/_pathspec.py +91 -0
  20. package/scripts/_policy_show_cli.py +266 -0
  21. package/scripts/_precutover.py +92 -0
  22. package/scripts/_project_context.py +224 -0
  23. package/scripts/_project_definition_io.py +164 -0
  24. package/scripts/_relocate_snapshot.py +209 -0
  25. package/scripts/_relocate_states.py +343 -0
  26. package/scripts/_resolve_preflight_path.py +152 -0
  27. package/scripts/_safe_subprocess.py +167 -0
  28. package/scripts/_session_start_hook.py +205 -0
  29. package/scripts/_sor_gate_diff.py +365 -0
  30. package/scripts/_stdio_utf8.py +59 -0
  31. package/scripts/_triage_bootstrap_gitignore.py +904 -0
  32. package/scripts/_triage_classify_cli.py +122 -0
  33. package/scripts/_triage_queue_cli.py +625 -0
  34. package/scripts/_triage_scope_cli.py +343 -0
  35. package/scripts/_triage_scope_drift_cli.py +121 -0
  36. package/scripts/_triage_scope_ignores.py +286 -0
  37. package/scripts/_triage_scope_milestone.py +432 -0
  38. package/scripts/_triage_scope_mutations.py +337 -0
  39. package/scripts/_triage_scope_renderers.py +207 -0
  40. package/scripts/_triage_smoketest_stages.py +674 -0
  41. package/scripts/_triage_subscribe_cli.py +140 -0
  42. package/scripts/_triage_welcome_cli.py +421 -0
  43. package/scripts/_vbrief_build.py +239 -0
  44. package/scripts/_vbrief_fidelity.py +479 -0
  45. package/scripts/_vbrief_legacy.py +589 -0
  46. package/scripts/_vbrief_reconciliation.py +883 -0
  47. package/scripts/_vbrief_routing.py +277 -0
  48. package/scripts/_vbrief_safety.py +778 -0
  49. package/scripts/_vbrief_sources.py +312 -0
  50. package/scripts/_vbrief_speckit.py +262 -0
  51. package/scripts/_vbrief_story_quality.py +353 -0
  52. package/scripts/_vbrief_validation.py +299 -0
  53. package/scripts/build_dist.py +412 -0
  54. package/scripts/cache.py +1078 -0
  55. package/scripts/cache_scanner.py +745 -0
  56. package/scripts/candidates_log.py +432 -0
  57. package/scripts/capacity_backfill.py +680 -0
  58. package/scripts/capacity_show.py +653 -0
  59. package/scripts/ci_local.py +689 -0
  60. package/scripts/code_structure_validate.py +765 -0
  61. package/scripts/codebase_default_extractor.py +495 -0
  62. package/scripts/codebase_map.py +304 -0
  63. package/scripts/codebase_map_fresh.py +104 -0
  64. package/scripts/codebase_projection_registry.py +94 -0
  65. package/scripts/codebase_provider.py +582 -0
  66. package/scripts/doctor.py +2257 -0
  67. package/scripts/framework_commands.py +505 -0
  68. package/scripts/gh_rest.py +882 -0
  69. package/scripts/github_auth_modes.py +437 -0
  70. package/scripts/github_body.py +292 -0
  71. package/scripts/ip_risk.py +531 -0
  72. package/scripts/issue_emit.py +670 -0
  73. package/scripts/issue_ingest.py +1064 -0
  74. package/scripts/migrate_preflight.py +418 -0
  75. package/scripts/migrate_vbrief.py +2677 -0
  76. package/scripts/monitor_pr.py +401 -0
  77. package/scripts/pack_migrate_lessons.py +336 -0
  78. package/scripts/pack_migrate_patterns.py +254 -0
  79. package/scripts/pack_migrate_rules.py +350 -0
  80. package/scripts/pack_migrate_skills.py +423 -0
  81. package/scripts/pack_migrate_strategies.py +311 -0
  82. package/scripts/pack_migrate_swarm_spec.py +250 -0
  83. package/scripts/pack_render.py +434 -0
  84. package/scripts/packs_slice.py +712 -0
  85. package/scripts/platform_capabilities.py +336 -0
  86. package/scripts/policy.py +2826 -0
  87. package/scripts/policy_set.py +324 -0
  88. package/scripts/pr_check_closing_keywords.py +524 -0
  89. package/scripts/pr_check_protected_issues.py +267 -0
  90. package/scripts/pr_merge_readiness.py +1004 -0
  91. package/scripts/pr_wait_mergeable.py +669 -0
  92. package/scripts/prd_render.py +159 -0
  93. package/scripts/preflight_architecture_sor.py +974 -0
  94. package/scripts/preflight_branch.py +289 -0
  95. package/scripts/preflight_cache.py +974 -0
  96. package/scripts/preflight_gh.py +721 -0
  97. package/scripts/preflight_implementation.py +272 -0
  98. package/scripts/preflight_story_start.py +838 -0
  99. package/scripts/preflight_wip_cap.py +149 -0
  100. package/scripts/probe_session.py +545 -0
  101. package/scripts/project_render.py +293 -0
  102. package/scripts/quarantine_ext.py +237 -0
  103. package/scripts/reconcile_issues.py +1442 -0
  104. package/scripts/refresh-path.ps1 +107 -0
  105. package/scripts/release.py +2030 -0
  106. package/scripts/release_e2e.py +1011 -0
  107. package/scripts/release_publish.py +486 -0
  108. package/scripts/release_rollback.py +980 -0
  109. package/scripts/relocate.py +1034 -0
  110. package/scripts/resolve_changelog_unreleased.py +667 -0
  111. package/scripts/resolve_version.py +490 -0
  112. package/scripts/resume_conditions.py +706 -0
  113. package/scripts/ritual_sentinel.py +609 -0
  114. package/scripts/roadmap_render.py +635 -0
  115. package/scripts/rule_ownership_lint.py +325 -0
  116. package/scripts/scm.py +591 -0
  117. package/scripts/scope_audit_log.py +387 -0
  118. package/scripts/scope_decompose.py +654 -0
  119. package/scripts/scope_demote.py +509 -0
  120. package/scripts/scope_lifecycle.py +1126 -0
  121. package/scripts/scope_undo.py +772 -0
  122. package/scripts/session_start.py +406 -0
  123. package/scripts/setup_ghx.py +339 -0
  124. package/scripts/setup_windows.ps1 +220 -0
  125. package/scripts/slice_audit.py +585 -0
  126. package/scripts/slice_record.py +530 -0
  127. package/scripts/slice_record_existing.py +692 -0
  128. package/scripts/slug_normalize.py +178 -0
  129. package/scripts/spec_render.py +477 -0
  130. package/scripts/spec_validate.py +238 -0
  131. package/scripts/subagent_monitor.py +658 -0
  132. package/scripts/swarm_complete_cohort.py +644 -0
  133. package/scripts/swarm_launch.py +1206 -0
  134. package/scripts/swarm_readiness.py +554 -0
  135. package/scripts/swarm_verify_review_clean.py +438 -0
  136. package/scripts/swarm_worktrees.py +497 -0
  137. package/scripts/toolchain-check.py +52 -0
  138. package/scripts/triage_actions.py +871 -0
  139. package/scripts/triage_bootstrap.py +1153 -0
  140. package/scripts/triage_bulk.py +630 -0
  141. package/scripts/triage_classify.py +932 -0
  142. package/scripts/triage_help.py +1685 -0
  143. package/scripts/triage_queue.py +1944 -0
  144. package/scripts/triage_reconcile.py +581 -0
  145. package/scripts/triage_refresh.py +643 -0
  146. package/scripts/triage_scope.py +999 -0
  147. package/scripts/triage_scope_drift.py +575 -0
  148. package/scripts/triage_smoketest.py +396 -0
  149. package/scripts/triage_subscribe.py +399 -0
  150. package/scripts/triage_summary.py +1011 -0
  151. package/scripts/triage_welcome.py +1178 -0
  152. package/scripts/ts_check_lane.py +86 -0
  153. package/scripts/validate-links.py +64 -0
  154. package/scripts/validate_strategy_output.py +212 -0
  155. package/scripts/vbrief_activate.py +228 -0
  156. package/scripts/vbrief_migrate_conformance.py +368 -0
  157. package/scripts/vbrief_reconcile_graph.py +306 -0
  158. package/scripts/vbrief_reconcile_labels.py +460 -0
  159. package/scripts/vbrief_reconcile_umbrellas.py +741 -0
  160. package/scripts/vbrief_validate.py +1195 -0
  161. package/scripts/verify-stubs.py +61 -0
  162. package/scripts/verify_capacity.py +160 -0
  163. package/scripts/verify_encoding.py +699 -0
  164. package/scripts/verify_hooks_installed.py +206 -0
  165. package/scripts/verify_investigation.py +360 -0
  166. package/scripts/verify_judgment_gates.py +827 -0
  167. package/scripts/verify_no_task_runtime.py +171 -0
  168. package/scripts/verify_scm_boundary.py +509 -0
  169. package/scripts/verify_session_ritual.py +389 -0
  170. package/scripts/verify_tools.py +426 -0
  171. package/scripts/verify_vbrief_conformance.py +478 -0
  172. package/skills/deft-directive-swarm/SKILL.md +7 -26
  173. package/skills/deft-directive-sync/SKILL.md +1 -1
  174. package/tasks/architecture.yml +13 -0
  175. package/tasks/cache.yml +69 -0
  176. package/tasks/capacity.yml +38 -0
  177. package/tasks/change.yml +46 -0
  178. package/tasks/changelog.yml +24 -0
  179. package/tasks/ci.yml +49 -0
  180. package/tasks/codebase.yml +47 -0
  181. package/tasks/commit.yml +30 -0
  182. package/tasks/core.yml +126 -0
  183. package/tasks/deployments.yml +54 -0
  184. package/tasks/framework.yml +74 -0
  185. package/tasks/install.yml +60 -0
  186. package/tasks/issue.yml +50 -0
  187. package/tasks/migrate.yml +73 -0
  188. package/tasks/packs.yml +92 -0
  189. package/tasks/policy.yml +75 -0
  190. package/tasks/pr.yml +89 -0
  191. package/tasks/prd.yml +39 -0
  192. package/tasks/project.yml +27 -0
  193. package/tasks/reconcile.yml +32 -0
  194. package/tasks/relocate.yml +56 -0
  195. package/tasks/roadmap.yml +28 -0
  196. package/tasks/scm.yml +126 -0
  197. package/tasks/scope-undo.yml +36 -0
  198. package/tasks/scope.yml +141 -0
  199. package/tasks/session.yml +19 -0
  200. package/tasks/setup.yml +37 -0
  201. package/tasks/slice.yml +69 -0
  202. package/tasks/spec.yml +41 -0
  203. package/tasks/swarm.yml +85 -0
  204. package/tasks/toolchain.yml +13 -0
  205. package/tasks/triage-actions.yml +94 -0
  206. package/tasks/triage-bootstrap.yml +43 -0
  207. package/tasks/triage-bulk.yml +75 -0
  208. package/tasks/triage-classify.yml +30 -0
  209. package/tasks/triage-queue.yml +50 -0
  210. package/tasks/triage-reconcile.yml +29 -0
  211. package/tasks/triage-scope-drift.yml +29 -0
  212. package/tasks/triage-scope.yml +31 -0
  213. package/tasks/triage-smoketest.yml +33 -0
  214. package/tasks/triage-subscribe.yml +36 -0
  215. package/tasks/triage-summary.yml +29 -0
  216. package/tasks/triage-welcome.yml +32 -0
  217. package/tasks/ts.yml +328 -0
  218. package/tasks/vbrief.yml +206 -0
  219. package/tasks/verify.yml +292 -0
  220. package/templates/agents-entry.md +2 -2
@@ -0,0 +1,1078 @@
1
+ #!/usr/bin/env python3
2
+ r"""cache.py -- unified content cache for the deft framework (#883 Story 2).
3
+
4
+ Public surface (5 commands)
5
+ ---------------------------
6
+
7
+ python scripts/cache.py put <source> <key> --raw-file PATH [--ttl-seconds N]
8
+ python scripts/cache.py get <source> <key> [--allow-stale | --no-stale]
9
+ python scripts/cache.py invalidate <source> <key> [--reason TEXT]
10
+ python scripts/cache.py fetch-all --source github-issue --repo OWNER/NAME [...]
11
+ python scripts/cache.py prune [--older-than-days 30] [--source ...] [--dry-run]
12
+
13
+ Storage: ``.deft-cache/<source>/<key>/{raw.json, content.md, meta.json}``
14
+ plus a global ``quarantine-audit.jsonl`` audit log.
15
+
16
+ Scanner integration: every ``cache_put`` runs ``cache_scanner.scan``;
17
+ ``credentials`` -> hard-fail (no content.md written, exit 2);
18
+ ``injection-heading`` -> fence-and-pass; ``invisible-unicode`` -> strip-and-pass.
19
+ One audit record per put / invalidate / evict regardless of scan outcome.
20
+
21
+ Quota (#947): pre-write LRU eviction enforces ``DEFT_CACHE_MAX_BYTES`` /
22
+ ``DEFT_CACHE_MAX_ENTRIES`` (defaults 100 MB / 10,000); breach -> exit 3.
23
+
24
+ Rate limit + idempotency owned by :mod:`_cache_fetch`; schema validation
25
+ by :mod:`_cache_validate`; quota by :mod:`_cache_quota`; the #1476
26
+ refresh-closed reconciliation by :mod:`_cache_refresh`. Each cache concern
27
+ lives in its own module per the deft file-size discipline.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import argparse
33
+ import contextlib
34
+ import json
35
+ import os
36
+ import re
37
+ import shutil
38
+ import sys
39
+ import tempfile
40
+ from dataclasses import dataclass
41
+ from datetime import UTC, datetime, timedelta
42
+ from pathlib import Path
43
+ from typing import Any
44
+
45
+ # Make ``scripts`` importable when this file is invoked via
46
+ # ``python scripts/cache.py`` from a Taskfile dispatch.
47
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
48
+
49
+ from _cache_fetch import ( # noqa: E402 -- intentional sys.path tweak
50
+ CacheFetchError,
51
+ FetchAllReport,
52
+ StateRefreshReport,
53
+ run_fetch_all,
54
+ )
55
+ from _cache_quota import ( # noqa: E402
56
+ CacheCapBreachedError,
57
+ CacheCaps,
58
+ EnforceResult,
59
+ EntryUsage,
60
+ enforce_caps as _enforce_caps,
61
+ predict_eviction_set,
62
+ resolve_caps,
63
+ scan_usage,
64
+ )
65
+
66
+ # #1476 refresh-closed path; lazily imports ``cache`` at call time so this
67
+ # top-level import does not create a cycle.
68
+ from _cache_refresh import cache_refresh_closed # noqa: E402
69
+ from _cache_validate import ( # noqa: E402
70
+ CacheValidationError,
71
+ validate_meta as _validate_meta_against_sources,
72
+ )
73
+ from cache_scanner import SCANNER_VERSION, ScanResult, scan # noqa: E402
74
+
75
+ # Reconfigure stdout / stderr to UTF-8 so the cache layer's status lines
76
+ # render under Windows cp1252 default (#814).
77
+ for _stream in (sys.stdout, sys.stderr):
78
+ if hasattr(_stream, "reconfigure"):
79
+ with contextlib.suppress(Exception):
80
+ _stream.reconfigure(encoding="utf-8", errors="replace") # type: ignore[union-attr]
81
+
82
+ # Re-export the scanner version so callers / tests can verify the cache
83
+ # module advertises the same SemVer the scanner module persists.
84
+ __all__ = [
85
+ "ALLOWED_SOURCES",
86
+ "CacheCapBreachedError",
87
+ "CacheCaps",
88
+ "CacheError",
89
+ "CacheNotFoundError",
90
+ "CacheValidationError",
91
+ "DEFAULT_BATCH_SIZE",
92
+ "DEFAULT_DELAY_MS",
93
+ "DEFAULT_PRUNE_OLDER_THAN_DAYS",
94
+ "EnforceResult",
95
+ "EntryUsage",
96
+ "FetchAllReport",
97
+ "GetResult",
98
+ "PutResult",
99
+ "SCANNER_VERSION",
100
+ "SOURCE_TTL_SECONDS",
101
+ "StateRefreshReport",
102
+ "audit_path",
103
+ "cache_fetch_all",
104
+ "cache_get",
105
+ "cache_invalidate",
106
+ "cache_prune",
107
+ "cache_prune_to_cap",
108
+ "cache_put",
109
+ "cache_refresh_closed",
110
+ "entry_dir",
111
+ "main",
112
+ "resolve_caps",
113
+ "scan_usage",
114
+ "validate_meta",
115
+ ]
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Constants
119
+ # ---------------------------------------------------------------------------
120
+
121
+ DEFAULT_CACHE_ROOT: Path = Path(".deft-cache")
122
+ AUDIT_LOG_NAME: str = "quarantine-audit.jsonl"
123
+
124
+ #: Hard-coded TTLs per source type (v1 ships github-issue only).
125
+ SOURCE_TTL_SECONDS: dict[str, int] = {"github-issue": 7 * 24 * 60 * 60}
126
+ ALLOWED_SOURCES: tuple[str, ...] = tuple(SOURCE_TTL_SECONDS.keys())
127
+
128
+ #: github-issue key shape: owner/repo/N (alphanumerics, '.', '_', '-' only).
129
+ _GH_KEY_RE: re.Pattern[str] = re.compile(
130
+ r"^([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)/(\d+)$"
131
+ )
132
+ _REPO_RE: re.Pattern[str] = re.compile(
133
+ r"^([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)$"
134
+ )
135
+
136
+ DEFAULT_BATCH_SIZE: int = 10
137
+ #: REST-paginated fetch-all (#1239) no longer shells out per issue; the
138
+ #: old 500 ms default burned minutes on hundred-issue cohorts (#1562).
139
+ #: Explicit ``--delay-ms`` still paces local writes when operators need it.
140
+ DEFAULT_DELAY_MS: int = 0
141
+ DEFAULT_PRUNE_OLDER_THAN_DAYS: int = 30
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # Errors
146
+ # ---------------------------------------------------------------------------
147
+
148
+
149
+ class CacheError(RuntimeError):
150
+ """Generic cache-layer failure (subprocess, parse, IO)."""
151
+
152
+
153
+ class CacheNotFoundError(KeyError):
154
+ """Cache miss for the requested (source, key)."""
155
+
156
+
157
+ # ---------------------------------------------------------------------------
158
+ # Time helpers
159
+ # ---------------------------------------------------------------------------
160
+
161
+
162
+ def _utc_now() -> datetime:
163
+ return datetime.now(UTC)
164
+
165
+
166
+ def _utc_iso(dt: datetime | None = None) -> str:
167
+ return (dt or _utc_now()).astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
168
+
169
+
170
+ def _parse_iso(stamp: str) -> datetime:
171
+ text = stamp.strip()
172
+ if text.endswith("Z"):
173
+ text = text[:-1] + "+00:00"
174
+ return datetime.fromisoformat(text)
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Schema validation (delegates to _cache_validate)
179
+ # ---------------------------------------------------------------------------
180
+
181
+
182
+ def validate_meta(meta: dict[str, Any]) -> None:
183
+ """Validate ``meta`` against cache-meta.schema.json. Raises :class:`CacheValidationError`."""
184
+ _validate_meta_against_sources(meta, ALLOWED_SOURCES)
185
+
186
+
187
+ # ---------------------------------------------------------------------------
188
+ # Path layout
189
+ # ---------------------------------------------------------------------------
190
+
191
+
192
+ def _validate_key(source: str, key: str) -> None:
193
+ if source == "github-issue":
194
+ if not _GH_KEY_RE.match(key):
195
+ raise CacheError(
196
+ f"invalid github-issue key {key!r}: expected '<owner>/<repo>/<N>' "
197
+ "(alphanumerics, '.', '_', '-' only; N positive integer)"
198
+ )
199
+ return
200
+ raise CacheError(f"unknown source {source!r}: v1 supports {sorted(ALLOWED_SOURCES)!r}")
201
+
202
+
203
+ def entry_dir(source: str, key: str, *, cache_root: Path | None = None) -> Path:
204
+ """Return ``<cache_root>/<source>/<key>/``."""
205
+ if source not in ALLOWED_SOURCES:
206
+ raise CacheError(f"unknown source {source!r}: v1 supports {sorted(ALLOWED_SOURCES)!r}")
207
+ _validate_key(source, key)
208
+ root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
209
+ return Path(root) / source / Path(*key.split("/"))
210
+
211
+
212
+ def audit_path(*, cache_root: Path | None = None) -> Path:
213
+ root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
214
+ return Path(root) / AUDIT_LOG_NAME
215
+
216
+
217
+ # ---------------------------------------------------------------------------
218
+ # Atomic write + audit append
219
+ # ---------------------------------------------------------------------------
220
+
221
+
222
+ def _atomic_write_text(path: Path, text: str) -> None:
223
+ """Write ``text`` to ``path`` via tempfile + ``os.replace``."""
224
+ path.parent.mkdir(parents=True, exist_ok=True)
225
+ fd, tmp_name = tempfile.mkstemp(prefix=path.name + ".", suffix=".tmp", dir=str(path.parent))
226
+ tmp = Path(tmp_name)
227
+ try:
228
+ with os.fdopen(fd, "w", encoding="utf-8", newline="") as fh:
229
+ fh.write(text)
230
+ os.replace(tmp, path)
231
+ except BaseException:
232
+ with contextlib.suppress(FileNotFoundError):
233
+ tmp.unlink()
234
+ raise
235
+
236
+
237
+ def _append_audit(record: dict[str, Any], *, cache_root: Path | None = None) -> None:
238
+ """Append ``record`` as one JSON line to quarantine-audit.jsonl."""
239
+ path = audit_path(cache_root=cache_root)
240
+ path.parent.mkdir(parents=True, exist_ok=True)
241
+ line = json.dumps(record, ensure_ascii=False, sort_keys=True)
242
+ with open(path, "a", encoding="utf-8", newline="") as fh:
243
+ fh.write(line + "\n")
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # Source-specific content rendering
248
+ # ---------------------------------------------------------------------------
249
+
250
+
251
+ def _render_content(source: str, raw: dict[str, Any]) -> str:
252
+ """Render the source-specific markdown body that the scanner consumes.
253
+
254
+ For ``github-issue``: ``# #<N>: <title>\\n\\n<body>``. The title line
255
+ is included so a hostile title becomes a suspicious heading and is
256
+ wrapped in quarantined fences by the scanner (mirrors the
257
+ Greptile-fixed contract in scripts/triage_cache.py::_render_issue_md).
258
+ """
259
+ if source == "github-issue":
260
+ number = raw.get("number")
261
+ title = raw.get("title") or ""
262
+ body = raw.get("body") or ""
263
+ if not isinstance(number, int):
264
+ raise CacheError(
265
+ f"invalid github-issue raw payload: 'number' must be int "
266
+ f"(got {type(number).__name__})"
267
+ )
268
+ return f"# #{number}: {title}\n\n{body}"
269
+ raise CacheError(f"unknown source {source!r}: v1 supports {sorted(ALLOWED_SOURCES)!r}")
270
+
271
+
272
+ # ---------------------------------------------------------------------------
273
+ # Cache primitives
274
+ # ---------------------------------------------------------------------------
275
+
276
+
277
+ @dataclass
278
+ class PutResult:
279
+ source: str
280
+ key: str
281
+ entry_dir: Path
282
+ meta: dict[str, Any]
283
+ scan_result: ScanResult
284
+ content_written: bool
285
+
286
+
287
+ @dataclass
288
+ class GetResult:
289
+ source: str
290
+ key: str
291
+ entry_dir: Path
292
+ meta: dict[str, Any]
293
+ content_path: Path | None
294
+ stale: bool
295
+
296
+
297
+ def cache_put(
298
+ source: str,
299
+ key: str,
300
+ raw: dict[str, Any],
301
+ *,
302
+ ttl_seconds: int | None = None,
303
+ cache_root: Path | None = None,
304
+ fetched_at: datetime | None = None,
305
+ caps: CacheCaps | None = None,
306
+ ) -> PutResult:
307
+ """Write a cache entry. Always writes raw.json + meta.json; conditionally writes content.md.
308
+
309
+ Pre-write quota enforcement (#947): projects the new total against
310
+ the resolved caps, evicts LRU entries until the put fits, and raises
311
+ :class:`CacheCapBreachedError` if eviction can't free enough (CLI exit-3).
312
+ """
313
+ _validate_key(source, key)
314
+ fetched = fetched_at or _utc_now()
315
+ ttl = ttl_seconds if ttl_seconds is not None else SOURCE_TTL_SECONDS[source]
316
+ if not isinstance(ttl, int) or ttl < 0:
317
+ raise CacheError(f"ttl_seconds must be a non-negative int (got {ttl!r})")
318
+ expires = fetched + timedelta(seconds=ttl)
319
+
320
+ edir = entry_dir(source, key, cache_root=cache_root)
321
+
322
+ # Project raw.json size pre-write (UTF-8 JSON has no platform variance).
323
+ raw_text = json.dumps(raw, indent=2, sort_keys=True, ensure_ascii=False)
324
+ raw_size = len(raw_text.encode("utf-8"))
325
+
326
+ # Re-put: charge delta only (may be negative when shrinking; cap_breached
327
+ # handles the arithmetic correctly). Protect the existing entry from
328
+ # self-eviction. Flooring to 0 here was a P1 finding -- a shrinking re-put
329
+ # against a tight cap was being rejected as a cap-breach even though the
330
+ # smaller payload would bring the cache *under* the cap.
331
+ existing_size = _existing_entry_size(edir)
332
+ is_new_entry = existing_size is None
333
+ incoming_delta = raw_size if is_new_entry else raw_size - existing_size
334
+ incoming_entries = 1 if is_new_entry else 0
335
+
336
+ cache_root_path = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
337
+ enforce_result = _enforce_caps(
338
+ cache_root_path,
339
+ sources=ALLOWED_SOURCES,
340
+ caps=caps,
341
+ incoming_bytes=incoming_delta,
342
+ incoming_entries=incoming_entries,
343
+ protect_keys=[(source, key)],
344
+ on_evict=_make_evict_audit_callback(
345
+ cache_root=cache_root, trigger="cache:put"
346
+ ),
347
+ )
348
+ if enforce_result.would_breach:
349
+ resolved = caps if caps is not None else resolve_caps()
350
+ reason_parts: list[str] = []
351
+ if (
352
+ resolved.bytes_enforced
353
+ and enforce_result.final_usage.total_bytes + incoming_delta > resolved.max_bytes
354
+ ):
355
+ reason_parts.append("size_cap")
356
+ if (
357
+ resolved.entries_enforced
358
+ and enforce_result.final_usage.total_entries + incoming_entries
359
+ > resolved.max_entries
360
+ ):
361
+ reason_parts.append("entry_cap")
362
+ raise CacheCapBreachedError(
363
+ reason="+".join(reason_parts) or "unknown",
364
+ max_bytes=resolved.max_bytes,
365
+ max_entries=resolved.max_entries,
366
+ current_bytes=enforce_result.final_usage.total_bytes,
367
+ current_entries=enforce_result.final_usage.total_entries,
368
+ incoming_bytes=incoming_delta,
369
+ )
370
+
371
+ edir.mkdir(parents=True, exist_ok=True)
372
+ raw_path = edir / "raw.json"
373
+ _atomic_write_text(raw_path, raw_text)
374
+ raw_size = raw_path.stat().st_size # authoritative for meta.json::size_bytes
375
+
376
+ rendered = _render_content(source, raw)
377
+ scan_result = scan(rendered, scanned_at=_utc_iso(fetched))
378
+
379
+ content_path = edir / "content.md"
380
+ content_written = False
381
+ if scan_result.passed:
382
+ _atomic_write_text(content_path, scan_result.transformed_content)
383
+ content_written = True
384
+ else:
385
+ # On hard-fail, remove any prior content.md so cache:get does not
386
+ # return safe-but-stale content for an entry whose latest fetch
387
+ # contained credentials.
388
+ with contextlib.suppress(FileNotFoundError):
389
+ content_path.unlink()
390
+
391
+ meta = _build_meta(
392
+ source=source,
393
+ key=key,
394
+ fetched_at=fetched,
395
+ ttl_seconds=ttl,
396
+ expires_at=expires,
397
+ scan_result=scan_result,
398
+ size_bytes=raw_size,
399
+ )
400
+ validate_meta(meta)
401
+ _atomic_write_text(
402
+ edir / "meta.json",
403
+ json.dumps(meta, indent=2, sort_keys=True, ensure_ascii=False),
404
+ )
405
+
406
+ _append_audit(
407
+ {
408
+ "event": "cache:put",
409
+ "source": source,
410
+ "key": key,
411
+ "timestamp": _utc_iso(),
412
+ "scan_passed": scan_result.passed,
413
+ "scanner_version": scan_result.scanner_version,
414
+ "flags": [
415
+ {
416
+ "category": f.category,
417
+ "severity": f.severity,
418
+ "detail": f.detail,
419
+ "match_count": f.match_count,
420
+ }
421
+ for f in scan_result.flags
422
+ ],
423
+ "content_written": content_written,
424
+ },
425
+ cache_root=cache_root,
426
+ )
427
+
428
+ return PutResult(
429
+ source=source,
430
+ key=key,
431
+ entry_dir=edir,
432
+ meta=meta,
433
+ scan_result=scan_result,
434
+ content_written=content_written,
435
+ )
436
+
437
+
438
+ def _build_meta(
439
+ *,
440
+ source: str,
441
+ key: str,
442
+ fetched_at: datetime,
443
+ ttl_seconds: int,
444
+ expires_at: datetime,
445
+ scan_result: ScanResult,
446
+ size_bytes: int,
447
+ ) -> dict[str, Any]:
448
+ return {
449
+ "source": source,
450
+ "key": key,
451
+ "fetched_at": _utc_iso(fetched_at),
452
+ "ttl_seconds": ttl_seconds,
453
+ "expires_at": _utc_iso(expires_at),
454
+ "scan_result": {
455
+ "passed": scan_result.passed,
456
+ "scanned_at": scan_result.scanned_at,
457
+ "scanner_version": scan_result.scanner_version,
458
+ "flags": [
459
+ {
460
+ "category": f.category,
461
+ "severity": f.severity,
462
+ "detail": f.detail,
463
+ "match_count": f.match_count,
464
+ }
465
+ for f in scan_result.flags
466
+ ],
467
+ },
468
+ "size_bytes": size_bytes,
469
+ "stale": False,
470
+ }
471
+
472
+
473
+ def cache_get(
474
+ source: str,
475
+ key: str,
476
+ *,
477
+ cache_root: Path | None = None,
478
+ allow_stale: bool = True,
479
+ ) -> GetResult:
480
+ """Read a cache entry. Raises :class:`CacheNotFoundError` on miss / stale-blocked."""
481
+ edir = entry_dir(source, key, cache_root=cache_root)
482
+ meta_path = edir / "meta.json"
483
+ meta_display = f"{source}/{key}/meta.json"
484
+ if not meta_path.exists():
485
+ raise CacheNotFoundError(
486
+ f"cache miss for source={source!r} key={key!r} "
487
+ f"(expected meta.json at {meta_display})"
488
+ )
489
+ try:
490
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
491
+ except json.JSONDecodeError as exc:
492
+ raise CacheValidationError(
493
+ f"meta.json at {meta_path} is not valid JSON: {exc}"
494
+ ) from exc
495
+ validate_meta(meta)
496
+
497
+ expires = _parse_iso(meta["expires_at"])
498
+ is_stale = _utc_now() > expires
499
+ if is_stale and not allow_stale:
500
+ raise CacheNotFoundError(
501
+ f"cache entry stale for source={source!r} key={key!r}; "
502
+ f"expires_at={meta['expires_at']} (pass --allow-stale to override)"
503
+ )
504
+
505
+ # Mirror the computed staleness onto the in-memory meta dict so callers
506
+ # that inspect GetResult.meta["stale"] see the runtime truth (the on-disk
507
+ # meta.json is always written with stale=False because staleness is a
508
+ # read-time concept; without this the field is misleading on cache hits
509
+ # against TTL-expired entries). #883 Story 2 P2 cleanup.
510
+ meta["stale"] = is_stale
511
+
512
+ # LRU signal (#947): touch meta.json mtime so future eviction passes
513
+ # see this entry as recently-accessed. Single os.utime syscall; no
514
+ # rewrite, no schema validation, no extra disk I/O. Failures are
515
+ # swallowed so a read-only cache tree still serves cache hits.
516
+ _touch_mtime(meta_path)
517
+
518
+ content_path = edir / "content.md"
519
+ return GetResult(
520
+ source=source,
521
+ key=key,
522
+ entry_dir=edir,
523
+ meta=meta,
524
+ content_path=content_path if content_path.exists() else None,
525
+ stale=is_stale,
526
+ )
527
+
528
+
529
+ def cache_invalidate(
530
+ source: str,
531
+ key: str,
532
+ *,
533
+ reason: str | None = None,
534
+ cache_root: Path | None = None,
535
+ ) -> bool:
536
+ """Delete the entry directory and append an invalidate audit record. Idempotent."""
537
+ _validate_key(source, key)
538
+ edir = entry_dir(source, key, cache_root=cache_root)
539
+ existed = edir.exists()
540
+ if existed:
541
+ shutil.rmtree(edir)
542
+ _append_audit(
543
+ {
544
+ "event": "cache:invalidate",
545
+ "source": source,
546
+ "key": key,
547
+ "timestamp": _utc_iso(),
548
+ "reason": reason or "",
549
+ "existed": existed,
550
+ },
551
+ cache_root=cache_root,
552
+ )
553
+ return existed
554
+
555
+
556
+ # ---------------------------------------------------------------------------
557
+ # Idempotency check (for fetch-all)
558
+ # ---------------------------------------------------------------------------
559
+
560
+
561
+ def _is_fresh(meta_path: Path) -> bool:
562
+ """Return True iff meta_path exists, parses, and expires_at is in the future."""
563
+ if not meta_path.exists():
564
+ return False
565
+ try:
566
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
567
+ validate_meta(meta)
568
+ except (json.JSONDecodeError, CacheValidationError):
569
+ return False
570
+ try:
571
+ expires = _parse_iso(meta["expires_at"])
572
+ except (ValueError, KeyError):
573
+ return False
574
+ return _utc_now() <= expires
575
+
576
+
577
+ # ---------------------------------------------------------------------------
578
+ # fetch-all (delegates loop body to _cache_fetch.run_fetch_all)
579
+ # ---------------------------------------------------------------------------
580
+
581
+
582
+ def cache_fetch_all(
583
+ *,
584
+ source: str,
585
+ repo: str,
586
+ batch_size: int = DEFAULT_BATCH_SIZE,
587
+ delay_ms: int = DEFAULT_DELAY_MS,
588
+ ttl_seconds: int | None = None,
589
+ state: str = "open",
590
+ limit: int = 1000,
591
+ labels: tuple[str, ...] = (),
592
+ author: str | None = None,
593
+ cache_root: Path | None = None,
594
+ ) -> FetchAllReport:
595
+ """Populate the cache for issues in ``repo``. See :mod:`_cache_fetch`.
596
+
597
+ ``labels`` (#1033) and ``author`` (#1055) scope the REST issue
598
+ enumeration so an operator can ingest a subset of the backlog rather
599
+ than the whole open queue. Both default to the unfiltered case
600
+ (empty labels / no author); when both are supplied they compose with
601
+ AND semantics (label-matching issues created by the given login).
602
+ """
603
+ if source != "github-issue":
604
+ raise CacheError(
605
+ f"cache:fetch-all source={source!r} not supported in v1 "
606
+ "(supports: github-issue only; other sources deferred to v2)"
607
+ )
608
+ if not _REPO_RE.match(repo):
609
+ raise CacheError(
610
+ f"invalid --repo {repo!r}: expected 'owner/repo' "
611
+ "(alphanumerics, '.', '_', '-' only)"
612
+ )
613
+ if batch_size < 1:
614
+ raise CacheError(f"--batch-size must be >= 1 (got {batch_size!r})")
615
+ if delay_ms < 0:
616
+ raise CacheError(f"--delay-ms must be >= 0 (got {delay_ms!r})")
617
+
618
+ def _entry_dir_for(key: str) -> Path:
619
+ return entry_dir(source, key, cache_root=cache_root)
620
+
621
+ def _do_put(key: str, raw: dict[str, Any]) -> None:
622
+ cache_put(source, key, raw, ttl_seconds=ttl_seconds, cache_root=cache_root)
623
+
624
+ return run_fetch_all(
625
+ repo=repo,
626
+ is_fresh=_is_fresh,
627
+ entry_dir_for=_entry_dir_for,
628
+ do_put=_do_put,
629
+ batch_size=batch_size,
630
+ delay_ms=delay_ms,
631
+ state=state,
632
+ limit=limit,
633
+ labels=labels,
634
+ author=author,
635
+ )
636
+
637
+
638
+ # refresh-closed (#1476): ``cache_refresh_closed`` is re-exported from
639
+ # :mod:`_cache_refresh` (imported above).
640
+
641
+
642
+ # ---------------------------------------------------------------------------
643
+ # prune
644
+ # ---------------------------------------------------------------------------
645
+
646
+
647
+ def cache_prune(
648
+ *,
649
+ older_than_days: int = DEFAULT_PRUNE_OLDER_THAN_DAYS,
650
+ source: str | None = None,
651
+ dry_run: bool = False,
652
+ cache_root: Path | None = None,
653
+ ) -> list[Path]:
654
+ """Remove entries whose ``expires_at`` is older than ``older_than_days``."""
655
+ if older_than_days < 0:
656
+ raise CacheError(f"--older-than-days must be >= 0 (got {older_than_days!r})")
657
+ root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
658
+ if not root.exists():
659
+ return []
660
+
661
+ cutoff = _utc_now() - timedelta(days=older_than_days)
662
+ removed: list[Path] = []
663
+ sources = [source] if source else list(ALLOWED_SOURCES)
664
+ for src in sources:
665
+ src_root = Path(root) / src
666
+ if not src_root.exists():
667
+ continue
668
+ # Materialize the iterator before mutating the tree: shutil.rmtree()
669
+ # below removes entry directories while rglob() lazily walks them on
670
+ # POSIX, raising FileNotFoundError on the next scandir() (#883). Tests
671
+ # passed on Windows due to a different walk order; CI on Linux caught
672
+ # it. list(...) snapshots the matches up-front so deletions are safe.
673
+ for meta_path in list(src_root.rglob("meta.json")):
674
+ edir = meta_path.parent
675
+ try:
676
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
677
+ expires = _parse_iso(meta["expires_at"])
678
+ except (json.JSONDecodeError, KeyError, ValueError):
679
+ # Corrupt entries are pruned -- they can't be served by
680
+ # cache:get anyway, and leaving them masks the next
681
+ # re-populate behind a stale meta.json shadow.
682
+ expires = cutoff - timedelta(days=1)
683
+ meta = {}
684
+ if expires >= cutoff:
685
+ continue
686
+ if not dry_run:
687
+ shutil.rmtree(edir)
688
+ _append_audit(
689
+ {
690
+ "event": "cache:prune-entry",
691
+ "source": src,
692
+ "key": _meta_key_or_relpath(meta_path, src_root),
693
+ "timestamp": _utc_iso(),
694
+ "expires_at": (
695
+ meta.get("expires_at", "unknown")
696
+ if isinstance(meta, dict)
697
+ else "unknown"
698
+ ),
699
+ },
700
+ cache_root=cache_root,
701
+ )
702
+ removed.append(edir)
703
+ return removed
704
+
705
+
706
+ def _meta_key_or_relpath(meta_path: Path, src_root: Path) -> str:
707
+ try:
708
+ return str(meta_path.parent.relative_to(src_root)).replace(os.sep, "/")
709
+ except ValueError:
710
+ return str(meta_path.parent)
711
+
712
+
713
+ # ---------------------------------------------------------------------------
714
+ # Quota helpers (#947) -- size cap, entry cap, LRU eviction integration
715
+ # ---------------------------------------------------------------------------
716
+
717
+
718
+ def _existing_entry_size(edir: Path) -> int | None:
719
+ """Return ``meta.json::size_bytes`` for an existing entry, or ``None`` if absent.
720
+
721
+ Used by :func:`cache_put` to compute the byte delta on a re-put so
722
+ cap projection does not double-count the replaced entry. Corrupt /
723
+ parse-failed meta.json returns 0 (treat re-put as adding full size).
724
+ """
725
+ meta_path = edir / "meta.json"
726
+ if not meta_path.exists():
727
+ return None
728
+ try:
729
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
730
+ except (OSError, json.JSONDecodeError):
731
+ return 0
732
+ size = meta.get("size_bytes") if isinstance(meta, dict) else None
733
+ if not isinstance(size, int) or size < 0:
734
+ return 0
735
+ return size
736
+
737
+
738
+ def _make_evict_audit_callback(
739
+ *,
740
+ cache_root: Path | None,
741
+ trigger: str,
742
+ ) -> Any:
743
+ """Build the ``on_evict`` callback that appends ``cache:evict`` records.
744
+
745
+ One audit record per eviction; operators can grep for the
746
+ ``"event":"cache:evict"`` line to trace why an entry vanished. The
747
+ ``reason`` field is the precomputed breach descriptor passed in by
748
+ ``evict_lru`` -- it reflects the cap actually exceeded at the moment
749
+ of *this* eviction (not just the configured caps), so an operator
750
+ grepping ``"reason":"entry_cap"`` gets only the entry-cap-driven
751
+ evictions even when both caps are configured. P1 fix from the iter-1
752
+ review (the prior callback derived reason from caps alone, tagging
753
+ every record ``size_cap+entry_cap`` under the defaults).
754
+ """
755
+
756
+ def _on_evict(victim: EntryUsage, reason: str, _caps: CacheCaps) -> None:
757
+ last_accessed_iso = (
758
+ datetime.fromtimestamp(victim.last_accessed, tz=UTC).strftime(
759
+ "%Y-%m-%dT%H:%M:%SZ"
760
+ )
761
+ if victim.last_accessed > 0
762
+ else "unknown"
763
+ )
764
+ _append_audit(
765
+ {
766
+ "event": "cache:evict",
767
+ "source": victim.source,
768
+ "key": victim.key,
769
+ "timestamp": _utc_iso(),
770
+ "reason": reason,
771
+ "trigger": trigger,
772
+ "freed_bytes": victim.size_bytes,
773
+ "last_accessed_at": last_accessed_iso,
774
+ },
775
+ cache_root=cache_root,
776
+ )
777
+
778
+ return _on_evict
779
+
780
+
781
+ def _touch_mtime(path: Path) -> None:
782
+ """Update ``path``'s mtime to now (LRU signal). Single ``os.utime`` syscall.
783
+
784
+ Failures are swallowed: a read-only meta.json on a locked-down filesystem
785
+ still serves cache hits. Stale mtime degrades gracefully -- old mtime just
786
+ makes the entry a stronger eviction candidate next round.
787
+ """
788
+ with contextlib.suppress(OSError):
789
+ os.utime(path, None)
790
+
791
+
792
+ def cache_prune_to_cap(
793
+ *,
794
+ cache_root: Path | None = None,
795
+ caps: CacheCaps | None = None,
796
+ dry_run: bool = False,
797
+ ) -> list[EntryUsage]:
798
+ """Drain LRU entries until the cache is under the resolved caps.
799
+
800
+ Idempotent: a second invocation against an already-under-cap tree
801
+ returns ``[]``. ``dry_run=True`` evaluates the eviction set without
802
+ removing anything (no audit records are written either).
803
+ """
804
+ root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
805
+ resolved = caps if caps is not None else resolve_caps()
806
+ if not resolved.any_enforced:
807
+ return []
808
+ if dry_run:
809
+ return list(
810
+ predict_eviction_set(root, sources=ALLOWED_SOURCES, caps=resolved)
811
+ )
812
+ enforce_result = _enforce_caps(
813
+ root,
814
+ sources=ALLOWED_SOURCES,
815
+ caps=resolved,
816
+ on_evict=_make_evict_audit_callback(
817
+ cache_root=cache_root, trigger="cache:prune-to-cap"
818
+ ),
819
+ )
820
+ return list(enforce_result.evicted)
821
+
822
+
823
+ # ---------------------------------------------------------------------------
824
+ # CLI
825
+ # ---------------------------------------------------------------------------
826
+
827
+
828
+ def _build_parser() -> argparse.ArgumentParser:
829
+ parser = argparse.ArgumentParser(
830
+ prog="cache",
831
+ description="Unified content cache + quarantine layer (#883 Story 2).",
832
+ )
833
+ sub = parser.add_subparsers(dest="cmd", required=True)
834
+
835
+ p_put = sub.add_parser("put", help="Cache a (source, key) entry from a raw JSON file.")
836
+ p_put.add_argument("source", choices=list(ALLOWED_SOURCES))
837
+ p_put.add_argument("key")
838
+ p_put.add_argument("--raw-file", required=True, help="Path to the upstream JSON payload.")
839
+ p_put.add_argument("--ttl-seconds", type=int, default=None, help="Override the source TTL.")
840
+
841
+ p_get = sub.add_parser("get", help="Print the cache entry's content.md path + meta.json.")
842
+ p_get.add_argument("source", choices=list(ALLOWED_SOURCES))
843
+ p_get.add_argument("key")
844
+ grp = p_get.add_mutually_exclusive_group()
845
+ grp.add_argument("--allow-stale", action="store_true", help="Default. Stale entries returned.")
846
+ grp.add_argument("--no-stale", action="store_true", help="Stale entries treated as miss.")
847
+
848
+ p_inv = sub.add_parser("invalidate", help="Delete an entry directory + append audit.")
849
+ p_inv.add_argument("source", choices=list(ALLOWED_SOURCES))
850
+ p_inv.add_argument("key")
851
+ p_inv.add_argument("--reason", default=None, help="Audit-log reason text.")
852
+
853
+ p_fa = sub.add_parser("fetch-all", help="Bulk-populate the cache for a repo.")
854
+ p_fa.add_argument("--source", required=True, choices=["github-issue"])
855
+ p_fa.add_argument("--repo", required=True, help="owner/repo slug.")
856
+ p_fa.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE)
857
+ p_fa.add_argument("--delay-ms", type=int, default=DEFAULT_DELAY_MS)
858
+ p_fa.add_argument("--ttl-seconds", type=int, default=None)
859
+ p_fa.add_argument("--state", default="open")
860
+ p_fa.add_argument("--limit", type=int, default=1000)
861
+ p_fa.add_argument(
862
+ "--label",
863
+ action="append",
864
+ default=None,
865
+ dest="labels",
866
+ metavar="NAME[,NAME...]",
867
+ help=(
868
+ "Scope ingestion to issues carrying the given label(s) (#1033). "
869
+ "Repeatable and comma-separated (--label a,b --label c). "
870
+ "Composes with --author via AND."
871
+ ),
872
+ )
873
+ p_fa.add_argument(
874
+ "--author",
875
+ default=None,
876
+ metavar="LOGIN",
877
+ help=(
878
+ "Scope ingestion to issues created by LOGIN (#1055). Maps to "
879
+ "the REST 'creator' param. Composes with --label via AND."
880
+ ),
881
+ )
882
+ p_fa.add_argument(
883
+ "--refresh-closed",
884
+ action="store_true",
885
+ help=(
886
+ "After populating, revisit cached-open entries that are no "
887
+ "longer in the open enumeration and rewrite any that closed "
888
+ "upstream to state=closed (#1476). Adds one single-issue REST "
889
+ "read per closed-upstream candidate."
890
+ ),
891
+ )
892
+
893
+ p_pr = sub.add_parser("prune", help="Drop entries older than the threshold.")
894
+ p_pr.add_argument("--older-than-days", type=int, default=DEFAULT_PRUNE_OLDER_THAN_DAYS)
895
+ p_pr.add_argument("--source", default=None, choices=list(ALLOWED_SOURCES))
896
+ p_pr.add_argument("--dry-run", action="store_true")
897
+ p_pr.add_argument(
898
+ "--to-cap",
899
+ action="store_true",
900
+ help=(
901
+ "LRU-evict entries until the cache is under the configured "
902
+ "size + entry caps (DEFT_CACHE_MAX_BYTES, DEFT_CACHE_MAX_ENTRIES). "
903
+ "Mutually exclusive with --older-than-days semantics; ignores "
904
+ "the threshold and uses LRU recency instead."
905
+ ),
906
+ )
907
+
908
+ return parser
909
+
910
+
911
+ def main(argv: list[str] | None = None) -> int:
912
+ """CLI entry point. Per-command exit codes documented in the module docstring."""
913
+ parser = _build_parser()
914
+ try:
915
+ args = parser.parse_args(argv)
916
+ except SystemExit as exc:
917
+ return int(exc.code) if isinstance(exc.code, int) else 2
918
+
919
+ try:
920
+ return _DISPATCH[args.cmd](args)
921
+ except CacheCapBreachedError as exc:
922
+ # Cap breached even after eviction (#947). Distinct exit-3 so
923
+ # operators / orchestrators can branch on "impossible to honor
924
+ # the cap" vs the schema (exit 2) and generic (exit 1) failures.
925
+ print(f"cache: cap breached: {exc}", file=sys.stderr)
926
+ return 3
927
+ except (CacheError, CacheFetchError) as exc:
928
+ # CacheFetchError is a sibling of CacheError (extends RuntimeError
929
+ # directly to avoid a circular import in _cache_fetch). It surfaces
930
+ # from the REST list-enumeration phase before the local cache:put
931
+ # loop's try/except wraps anything; catching it here gives a
932
+ # clean ``cache: error: ...`` exit instead of a raw traceback.
933
+ print(f"cache: error: {exc}", file=sys.stderr)
934
+ return 1
935
+ except CacheValidationError as exc:
936
+ print(f"cache: schema error: {exc}", file=sys.stderr)
937
+ return 2
938
+
939
+
940
+ def _cmd_put(args: argparse.Namespace) -> int:
941
+ raw_path = Path(args.raw_file)
942
+ if not raw_path.exists():
943
+ raise CacheError(f"--raw-file not found: {raw_path}")
944
+ try:
945
+ raw = json.loads(raw_path.read_text(encoding="utf-8"))
946
+ except json.JSONDecodeError as exc:
947
+ raise CacheError(f"--raw-file is not valid JSON: {exc}") from exc
948
+ if not isinstance(raw, dict):
949
+ raise CacheError(f"--raw-file must be a JSON object (got {type(raw).__name__})")
950
+ result = cache_put(args.source, args.key, raw, ttl_seconds=args.ttl_seconds)
951
+ sys.stdout.write(
952
+ f"cache:put source={result.source} key={result.key} "
953
+ f"scan_passed={result.scan_result.passed} "
954
+ f"flags={[f.category for f in result.scan_result.flags]} "
955
+ f"content_written={result.content_written} dir={result.entry_dir}\n"
956
+ )
957
+ return 0 if result.scan_result.passed else 2
958
+
959
+
960
+ def _cmd_get(args: argparse.Namespace) -> int:
961
+ allow_stale = not args.no_stale
962
+ try:
963
+ result = cache_get(args.source, args.key, allow_stale=allow_stale)
964
+ except CacheNotFoundError as exc:
965
+ print(f"cache:get miss: {exc}", file=sys.stderr)
966
+ return 1
967
+ payload = {
968
+ "source": result.source,
969
+ "key": result.key,
970
+ "entry_dir": str(result.entry_dir),
971
+ "content_path": str(result.content_path) if result.content_path else None,
972
+ "stale": result.stale,
973
+ "meta": result.meta,
974
+ }
975
+ sys.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
976
+ return 0
977
+
978
+
979
+ def _cmd_invalidate(args: argparse.Namespace) -> int:
980
+ existed = cache_invalidate(args.source, args.key, reason=args.reason)
981
+ sys.stdout.write(
982
+ f"cache:invalidate source={args.source} key={args.key} existed={existed}\n"
983
+ )
984
+ return 0
985
+
986
+
987
+ def _normalise_label_filter(raw: list[str] | None) -> tuple[str, ...]:
988
+ """Flatten repeated + comma-separated ``--label`` values into a tuple.
989
+
990
+ ``argparse(action="append")`` yields a list with one entry per flag
991
+ occurrence; each entry may itself be comma-separated. This mirrors
992
+ the gh CLI multi-label convention and the scm.py ``--rest issue
993
+ list`` label parsing so the two surfaces stay consistent (#1033).
994
+ """
995
+ if not raw:
996
+ return ()
997
+ return tuple(
998
+ item.strip()
999
+ for value in raw
1000
+ for item in value.split(",")
1001
+ if item.strip()
1002
+ )
1003
+
1004
+
1005
+ def _cmd_fetch_all(args: argparse.Namespace) -> int:
1006
+ labels = _normalise_label_filter(getattr(args, "labels", None))
1007
+ report = cache_fetch_all(
1008
+ source=args.source,
1009
+ repo=args.repo,
1010
+ batch_size=args.batch_size,
1011
+ delay_ms=args.delay_ms,
1012
+ ttl_seconds=args.ttl_seconds,
1013
+ state=args.state,
1014
+ limit=args.limit,
1015
+ labels=labels,
1016
+ author=args.author,
1017
+ )
1018
+ sys.stdout.write(report.to_json() + "\n")
1019
+ rc = 0 if report.failed == 0 else 1
1020
+ # #1476: opt-in state reconciliation so a closed-upstream issue whose
1021
+ # cached entry is still TTL-fresh is rewritten to state=closed and
1022
+ # stops surfacing in triage:queue.
1023
+ if getattr(args, "refresh_closed", False):
1024
+ refresh = cache_refresh_closed(
1025
+ source=args.source,
1026
+ repo=args.repo,
1027
+ ttl_seconds=args.ttl_seconds,
1028
+ delay_ms=args.delay_ms,
1029
+ limit=args.limit,
1030
+ )
1031
+ sys.stdout.write(refresh.to_json() + "\n")
1032
+ if refresh.refresh_failed:
1033
+ rc = 1
1034
+ return rc
1035
+
1036
+
1037
+ def _cmd_prune(args: argparse.Namespace) -> int:
1038
+ if args.to_cap:
1039
+ evicted = cache_prune_to_cap(dry_run=args.dry_run)
1040
+ caps = resolve_caps()
1041
+ payload = {
1042
+ "mode": "to-cap",
1043
+ "max_bytes": caps.max_bytes,
1044
+ "max_entries": caps.max_entries,
1045
+ "dry_run": args.dry_run,
1046
+ "evicted_count": len(evicted),
1047
+ "evicted_keys": [f"{e.source}/{e.key}" for e in evicted],
1048
+ "freed_bytes": sum(e.size_bytes for e in evicted),
1049
+ }
1050
+ sys.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
1051
+ return 0
1052
+ removed = cache_prune(
1053
+ older_than_days=args.older_than_days,
1054
+ source=args.source,
1055
+ dry_run=args.dry_run,
1056
+ )
1057
+ payload = {
1058
+ "older_than_days": args.older_than_days,
1059
+ "source": args.source or "all",
1060
+ "dry_run": args.dry_run,
1061
+ "removed_count": len(removed),
1062
+ "removed_paths": [str(p) for p in removed],
1063
+ }
1064
+ sys.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
1065
+ return 0
1066
+
1067
+
1068
+ _DISPATCH = {
1069
+ "put": _cmd_put,
1070
+ "get": _cmd_get,
1071
+ "invalidate": _cmd_invalidate,
1072
+ "fetch-all": _cmd_fetch_all,
1073
+ "prune": _cmd_prune,
1074
+ }
1075
+
1076
+
1077
+ if __name__ == "__main__":
1078
+ raise SystemExit(main())