@deftai/directive-content 0.59.0 → 0.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-push +10 -9
- package/Taskfile.yml +48 -58
- package/UPGRADING.md +1 -1
- package/docs/assets/directive-lifecycle-diagram.png +0 -0
- package/docs/directive-lifecycle.md +73 -0
- package/docs/getting-started.md +5 -1
- package/package.json +3 -3
- package/packs/skills/skills-pack-0.1.json +22 -22
- package/scm/github.md +20 -2
- package/tasks/change.yml +16 -31
- package/tasks/ci.yml +8 -0
- package/tasks/commit.yml +12 -19
- package/tasks/core.yml +10 -0
- package/tasks/engine.yml +42 -0
- package/tasks/framework.yml +3 -0
- package/tasks/install.yml +20 -19
- package/tasks/migrate.yml +26 -15
- package/tasks/project.yml +16 -0
- package/tasks/toolchain.yml +15 -5
- package/tasks/vbrief.yml +4 -3
- package/tasks/verify.yml +12 -14
- package/scripts/_agents_md.py +0 -494
- package/scripts/_cache_fetch.py +0 -635
- package/scripts/_cache_quota.py +0 -529
- package/scripts/_cache_refresh.py +0 -163
- package/scripts/_cache_validate.py +0 -209
- package/scripts/_content_root.py +0 -42
- package/scripts/_doctor_state.py +0 -277
- package/scripts/_event_detect.py +0 -305
- package/scripts/_events.py +0 -514
- package/scripts/_lifecycle_hygiene.py +0 -568
- package/scripts/_pathspec.py +0 -91
- package/scripts/_policy_show_cli.py +0 -266
- package/scripts/_precutover.py +0 -92
- package/scripts/_project_context.py +0 -224
- package/scripts/_project_definition_io.py +0 -164
- package/scripts/_relocate_snapshot.py +0 -209
- package/scripts/_relocate_states.py +0 -343
- package/scripts/_resolve_preflight_path.py +0 -152
- package/scripts/_safe_subprocess.py +0 -167
- package/scripts/_session_start_hook.py +0 -205
- package/scripts/_sor_gate_diff.py +0 -365
- package/scripts/_stdio_utf8.py +0 -59
- package/scripts/_triage_bootstrap_gitignore.py +0 -904
- package/scripts/_triage_classify_cli.py +0 -122
- package/scripts/_triage_queue_cli.py +0 -625
- package/scripts/_triage_scope_cli.py +0 -343
- package/scripts/_triage_scope_drift_cli.py +0 -121
- package/scripts/_triage_scope_ignores.py +0 -286
- package/scripts/_triage_scope_milestone.py +0 -432
- package/scripts/_triage_scope_mutations.py +0 -337
- package/scripts/_triage_scope_renderers.py +0 -207
- package/scripts/_triage_smoketest_stages.py +0 -674
- package/scripts/_triage_subscribe_cli.py +0 -140
- package/scripts/_triage_welcome_cli.py +0 -421
- package/scripts/_vbrief_build.py +0 -239
- package/scripts/_vbrief_fidelity.py +0 -479
- package/scripts/_vbrief_legacy.py +0 -589
- package/scripts/_vbrief_reconciliation.py +0 -883
- package/scripts/_vbrief_routing.py +0 -277
- package/scripts/_vbrief_safety.py +0 -778
- package/scripts/_vbrief_sources.py +0 -312
- package/scripts/_vbrief_speckit.py +0 -262
- package/scripts/_vbrief_story_quality.py +0 -353
- package/scripts/_vbrief_validation.py +0 -299
- package/scripts/build_dist.py +0 -412
- package/scripts/cache.py +0 -1078
- package/scripts/cache_scanner.py +0 -745
- package/scripts/candidates_log.py +0 -432
- package/scripts/capacity_backfill.py +0 -680
- package/scripts/capacity_show.py +0 -653
- package/scripts/ci_local.py +0 -689
- package/scripts/code_structure_validate.py +0 -765
- package/scripts/codebase_default_extractor.py +0 -495
- package/scripts/codebase_map.py +0 -304
- package/scripts/codebase_map_fresh.py +0 -104
- package/scripts/codebase_projection_registry.py +0 -94
- package/scripts/codebase_provider.py +0 -582
- package/scripts/doctor.py +0 -2552
- package/scripts/framework_commands.py +0 -505
- package/scripts/gh_rest.py +0 -882
- package/scripts/github_auth_modes.py +0 -437
- package/scripts/github_body.py +0 -292
- package/scripts/ip_risk.py +0 -531
- package/scripts/issue_emit.py +0 -670
- package/scripts/issue_ingest.py +0 -1064
- package/scripts/migrate_preflight.py +0 -418
- package/scripts/migrate_vbrief.py +0 -2677
- package/scripts/monitor_pr.py +0 -401
- package/scripts/pack_migrate_lessons.py +0 -336
- package/scripts/pack_migrate_patterns.py +0 -254
- package/scripts/pack_migrate_rules.py +0 -350
- package/scripts/pack_migrate_skills.py +0 -423
- package/scripts/pack_migrate_strategies.py +0 -311
- package/scripts/pack_migrate_swarm_spec.py +0 -250
- package/scripts/pack_render.py +0 -434
- package/scripts/packs_slice.py +0 -712
- package/scripts/platform_capabilities.py +0 -336
- package/scripts/policy.py +0 -2826
- package/scripts/policy_set.py +0 -324
- package/scripts/pr_check_closing_keywords.py +0 -524
- package/scripts/pr_check_protected_issues.py +0 -267
- package/scripts/pr_merge_readiness.py +0 -1004
- package/scripts/pr_wait_mergeable.py +0 -669
- package/scripts/prd_render.py +0 -159
- package/scripts/preflight_architecture_sor.py +0 -974
- package/scripts/preflight_branch.py +0 -289
- package/scripts/preflight_cache.py +0 -974
- package/scripts/preflight_gh.py +0 -721
- package/scripts/preflight_implementation.py +0 -272
- package/scripts/preflight_story_start.py +0 -838
- package/scripts/preflight_wip_cap.py +0 -149
- package/scripts/probe_session.py +0 -545
- package/scripts/project_render.py +0 -293
- package/scripts/quarantine_ext.py +0 -237
- package/scripts/reconcile_issues.py +0 -1442
- package/scripts/refresh-path.ps1 +0 -107
- package/scripts/release.py +0 -2030
- package/scripts/release_e2e.py +0 -1011
- package/scripts/release_publish.py +0 -486
- package/scripts/release_rollback.py +0 -980
- package/scripts/relocate.py +0 -1034
- package/scripts/resolve_changelog_unreleased.py +0 -667
- package/scripts/resolve_version.py +0 -490
- package/scripts/resume_conditions.py +0 -706
- package/scripts/ritual_sentinel.py +0 -609
- package/scripts/roadmap_render.py +0 -635
- package/scripts/rule_ownership_lint.py +0 -325
- package/scripts/scm.py +0 -591
- package/scripts/scope_audit_log.py +0 -387
- package/scripts/scope_decompose.py +0 -654
- package/scripts/scope_demote.py +0 -509
- package/scripts/scope_lifecycle.py +0 -1126
- package/scripts/scope_undo.py +0 -772
- package/scripts/session_start.py +0 -406
- package/scripts/setup_ghx.py +0 -339
- package/scripts/setup_windows.ps1 +0 -220
- package/scripts/slice_audit.py +0 -585
- package/scripts/slice_record.py +0 -530
- package/scripts/slice_record_existing.py +0 -692
- package/scripts/slug_normalize.py +0 -178
- package/scripts/spec_render.py +0 -477
- package/scripts/spec_validate.py +0 -238
- package/scripts/subagent_monitor.py +0 -658
- package/scripts/swarm_complete_cohort.py +0 -644
- package/scripts/swarm_launch.py +0 -1206
- package/scripts/swarm_readiness.py +0 -554
- package/scripts/swarm_verify_review_clean.py +0 -438
- package/scripts/swarm_worktrees.py +0 -497
- package/scripts/toolchain-check.py +0 -52
- package/scripts/triage_actions.py +0 -871
- package/scripts/triage_bootstrap.py +0 -1153
- package/scripts/triage_bulk.py +0 -630
- package/scripts/triage_classify.py +0 -932
- package/scripts/triage_help.py +0 -1685
- package/scripts/triage_queue.py +0 -1944
- package/scripts/triage_reconcile.py +0 -581
- package/scripts/triage_refresh.py +0 -643
- package/scripts/triage_scope.py +0 -999
- package/scripts/triage_scope_drift.py +0 -575
- package/scripts/triage_smoketest.py +0 -396
- package/scripts/triage_subscribe.py +0 -399
- package/scripts/triage_summary.py +0 -1011
- package/scripts/triage_welcome.py +0 -1178
- package/scripts/ts_check_lane.py +0 -86
- package/scripts/validate-links.py +0 -64
- package/scripts/validate_strategy_output.py +0 -212
- package/scripts/vbrief_activate.py +0 -228
- package/scripts/vbrief_migrate_conformance.py +0 -368
- package/scripts/vbrief_reconcile_graph.py +0 -306
- package/scripts/vbrief_reconcile_labels.py +0 -460
- package/scripts/vbrief_reconcile_umbrellas.py +0 -741
- package/scripts/vbrief_validate.py +0 -1144
- package/scripts/verify-stubs.py +0 -61
- package/scripts/verify_capacity.py +0 -160
- package/scripts/verify_encoding.py +0 -699
- package/scripts/verify_hooks_installed.py +0 -206
- package/scripts/verify_investigation.py +0 -360
- package/scripts/verify_judgment_gates.py +0 -827
- package/scripts/verify_no_task_runtime.py +0 -171
- package/scripts/verify_scm_boundary.py +0 -509
- package/scripts/verify_session_ritual.py +0 -389
- package/scripts/verify_tools.py +0 -426
- package/scripts/verify_vbrief_conformance.py +0 -478
package/scripts/cache.py
DELETED
|
@@ -1,1078 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
r"""cache.py -- unified content cache for the deft framework (#883 Story 2).
|
|
3
|
-
|
|
4
|
-
Public surface (5 commands)
|
|
5
|
-
---------------------------
|
|
6
|
-
|
|
7
|
-
python scripts/cache.py put <source> <key> --raw-file PATH [--ttl-seconds N]
|
|
8
|
-
python scripts/cache.py get <source> <key> [--allow-stale | --no-stale]
|
|
9
|
-
python scripts/cache.py invalidate <source> <key> [--reason TEXT]
|
|
10
|
-
python scripts/cache.py fetch-all --source github-issue --repo OWNER/NAME [...]
|
|
11
|
-
python scripts/cache.py prune [--older-than-days 30] [--source ...] [--dry-run]
|
|
12
|
-
|
|
13
|
-
Storage: ``.deft-cache/<source>/<key>/{raw.json, content.md, meta.json}``
|
|
14
|
-
plus a global ``quarantine-audit.jsonl`` audit log.
|
|
15
|
-
|
|
16
|
-
Scanner integration: every ``cache_put`` runs ``cache_scanner.scan``;
|
|
17
|
-
``credentials`` -> hard-fail (no content.md written, exit 2);
|
|
18
|
-
``injection-heading`` -> fence-and-pass; ``invisible-unicode`` -> strip-and-pass.
|
|
19
|
-
One audit record per put / invalidate / evict regardless of scan outcome.
|
|
20
|
-
|
|
21
|
-
Quota (#947): pre-write LRU eviction enforces ``DEFT_CACHE_MAX_BYTES`` /
|
|
22
|
-
``DEFT_CACHE_MAX_ENTRIES`` (defaults 100 MB / 10,000); breach -> exit 3.
|
|
23
|
-
|
|
24
|
-
Rate limit + idempotency owned by :mod:`_cache_fetch`; schema validation
|
|
25
|
-
by :mod:`_cache_validate`; quota by :mod:`_cache_quota`; the #1476
|
|
26
|
-
refresh-closed reconciliation by :mod:`_cache_refresh`. Each cache concern
|
|
27
|
-
lives in its own module per the deft file-size discipline.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
from __future__ import annotations
|
|
31
|
-
|
|
32
|
-
import argparse
|
|
33
|
-
import contextlib
|
|
34
|
-
import json
|
|
35
|
-
import os
|
|
36
|
-
import re
|
|
37
|
-
import shutil
|
|
38
|
-
import sys
|
|
39
|
-
import tempfile
|
|
40
|
-
from dataclasses import dataclass
|
|
41
|
-
from datetime import UTC, datetime, timedelta
|
|
42
|
-
from pathlib import Path
|
|
43
|
-
from typing import Any
|
|
44
|
-
|
|
45
|
-
# Make ``scripts`` importable when this file is invoked via
|
|
46
|
-
# ``python scripts/cache.py`` from a Taskfile dispatch.
|
|
47
|
-
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
48
|
-
|
|
49
|
-
from _cache_fetch import ( # noqa: E402 -- intentional sys.path tweak
|
|
50
|
-
CacheFetchError,
|
|
51
|
-
FetchAllReport,
|
|
52
|
-
StateRefreshReport,
|
|
53
|
-
run_fetch_all,
|
|
54
|
-
)
|
|
55
|
-
from _cache_quota import ( # noqa: E402
|
|
56
|
-
CacheCapBreachedError,
|
|
57
|
-
CacheCaps,
|
|
58
|
-
EnforceResult,
|
|
59
|
-
EntryUsage,
|
|
60
|
-
enforce_caps as _enforce_caps,
|
|
61
|
-
predict_eviction_set,
|
|
62
|
-
resolve_caps,
|
|
63
|
-
scan_usage,
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# #1476 refresh-closed path; lazily imports ``cache`` at call time so this
|
|
67
|
-
# top-level import does not create a cycle.
|
|
68
|
-
from _cache_refresh import cache_refresh_closed # noqa: E402
|
|
69
|
-
from _cache_validate import ( # noqa: E402
|
|
70
|
-
CacheValidationError,
|
|
71
|
-
validate_meta as _validate_meta_against_sources,
|
|
72
|
-
)
|
|
73
|
-
from cache_scanner import SCANNER_VERSION, ScanResult, scan # noqa: E402
|
|
74
|
-
|
|
75
|
-
# Reconfigure stdout / stderr to UTF-8 so the cache layer's status lines
|
|
76
|
-
# render under Windows cp1252 default (#814).
|
|
77
|
-
for _stream in (sys.stdout, sys.stderr):
|
|
78
|
-
if hasattr(_stream, "reconfigure"):
|
|
79
|
-
with contextlib.suppress(Exception):
|
|
80
|
-
_stream.reconfigure(encoding="utf-8", errors="replace") # type: ignore[union-attr]
|
|
81
|
-
|
|
82
|
-
# Re-export the scanner version so callers / tests can verify the cache
|
|
83
|
-
# module advertises the same SemVer the scanner module persists.
|
|
84
|
-
__all__ = [
|
|
85
|
-
"ALLOWED_SOURCES",
|
|
86
|
-
"CacheCapBreachedError",
|
|
87
|
-
"CacheCaps",
|
|
88
|
-
"CacheError",
|
|
89
|
-
"CacheNotFoundError",
|
|
90
|
-
"CacheValidationError",
|
|
91
|
-
"DEFAULT_BATCH_SIZE",
|
|
92
|
-
"DEFAULT_DELAY_MS",
|
|
93
|
-
"DEFAULT_PRUNE_OLDER_THAN_DAYS",
|
|
94
|
-
"EnforceResult",
|
|
95
|
-
"EntryUsage",
|
|
96
|
-
"FetchAllReport",
|
|
97
|
-
"GetResult",
|
|
98
|
-
"PutResult",
|
|
99
|
-
"SCANNER_VERSION",
|
|
100
|
-
"SOURCE_TTL_SECONDS",
|
|
101
|
-
"StateRefreshReport",
|
|
102
|
-
"audit_path",
|
|
103
|
-
"cache_fetch_all",
|
|
104
|
-
"cache_get",
|
|
105
|
-
"cache_invalidate",
|
|
106
|
-
"cache_prune",
|
|
107
|
-
"cache_prune_to_cap",
|
|
108
|
-
"cache_put",
|
|
109
|
-
"cache_refresh_closed",
|
|
110
|
-
"entry_dir",
|
|
111
|
-
"main",
|
|
112
|
-
"resolve_caps",
|
|
113
|
-
"scan_usage",
|
|
114
|
-
"validate_meta",
|
|
115
|
-
]
|
|
116
|
-
|
|
117
|
-
# ---------------------------------------------------------------------------
|
|
118
|
-
# Constants
|
|
119
|
-
# ---------------------------------------------------------------------------
|
|
120
|
-
|
|
121
|
-
DEFAULT_CACHE_ROOT: Path = Path(".deft-cache")
|
|
122
|
-
AUDIT_LOG_NAME: str = "quarantine-audit.jsonl"
|
|
123
|
-
|
|
124
|
-
#: Hard-coded TTLs per source type (v1 ships github-issue only).
|
|
125
|
-
SOURCE_TTL_SECONDS: dict[str, int] = {"github-issue": 7 * 24 * 60 * 60}
|
|
126
|
-
ALLOWED_SOURCES: tuple[str, ...] = tuple(SOURCE_TTL_SECONDS.keys())
|
|
127
|
-
|
|
128
|
-
#: github-issue key shape: owner/repo/N (alphanumerics, '.', '_', '-' only).
|
|
129
|
-
_GH_KEY_RE: re.Pattern[str] = re.compile(
|
|
130
|
-
r"^([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)/(\d+)$"
|
|
131
|
-
)
|
|
132
|
-
_REPO_RE: re.Pattern[str] = re.compile(
|
|
133
|
-
r"^([A-Za-z0-9][A-Za-z0-9._-]*)/([A-Za-z0-9][A-Za-z0-9._-]*)$"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
DEFAULT_BATCH_SIZE: int = 10
|
|
137
|
-
#: REST-paginated fetch-all (#1239) no longer shells out per issue; the
|
|
138
|
-
#: old 500 ms default burned minutes on hundred-issue cohorts (#1562).
|
|
139
|
-
#: Explicit ``--delay-ms`` still paces local writes when operators need it.
|
|
140
|
-
DEFAULT_DELAY_MS: int = 0
|
|
141
|
-
DEFAULT_PRUNE_OLDER_THAN_DAYS: int = 30
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
# ---------------------------------------------------------------------------
|
|
145
|
-
# Errors
|
|
146
|
-
# ---------------------------------------------------------------------------
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
class CacheError(RuntimeError):
|
|
150
|
-
"""Generic cache-layer failure (subprocess, parse, IO)."""
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
class CacheNotFoundError(KeyError):
|
|
154
|
-
"""Cache miss for the requested (source, key)."""
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
# ---------------------------------------------------------------------------
|
|
158
|
-
# Time helpers
|
|
159
|
-
# ---------------------------------------------------------------------------
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def _utc_now() -> datetime:
|
|
163
|
-
return datetime.now(UTC)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def _utc_iso(dt: datetime | None = None) -> str:
|
|
167
|
-
return (dt or _utc_now()).astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def _parse_iso(stamp: str) -> datetime:
|
|
171
|
-
text = stamp.strip()
|
|
172
|
-
if text.endswith("Z"):
|
|
173
|
-
text = text[:-1] + "+00:00"
|
|
174
|
-
return datetime.fromisoformat(text)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
# ---------------------------------------------------------------------------
|
|
178
|
-
# Schema validation (delegates to _cache_validate)
|
|
179
|
-
# ---------------------------------------------------------------------------
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
def validate_meta(meta: dict[str, Any]) -> None:
|
|
183
|
-
"""Validate ``meta`` against cache-meta.schema.json. Raises :class:`CacheValidationError`."""
|
|
184
|
-
_validate_meta_against_sources(meta, ALLOWED_SOURCES)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
# ---------------------------------------------------------------------------
|
|
188
|
-
# Path layout
|
|
189
|
-
# ---------------------------------------------------------------------------
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
def _validate_key(source: str, key: str) -> None:
|
|
193
|
-
if source == "github-issue":
|
|
194
|
-
if not _GH_KEY_RE.match(key):
|
|
195
|
-
raise CacheError(
|
|
196
|
-
f"invalid github-issue key {key!r}: expected '<owner>/<repo>/<N>' "
|
|
197
|
-
"(alphanumerics, '.', '_', '-' only; N positive integer)"
|
|
198
|
-
)
|
|
199
|
-
return
|
|
200
|
-
raise CacheError(f"unknown source {source!r}: v1 supports {sorted(ALLOWED_SOURCES)!r}")
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def entry_dir(source: str, key: str, *, cache_root: Path | None = None) -> Path:
|
|
204
|
-
"""Return ``<cache_root>/<source>/<key>/``."""
|
|
205
|
-
if source not in ALLOWED_SOURCES:
|
|
206
|
-
raise CacheError(f"unknown source {source!r}: v1 supports {sorted(ALLOWED_SOURCES)!r}")
|
|
207
|
-
_validate_key(source, key)
|
|
208
|
-
root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
|
|
209
|
-
return Path(root) / source / Path(*key.split("/"))
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def audit_path(*, cache_root: Path | None = None) -> Path:
|
|
213
|
-
root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
|
|
214
|
-
return Path(root) / AUDIT_LOG_NAME
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
# ---------------------------------------------------------------------------
|
|
218
|
-
# Atomic write + audit append
|
|
219
|
-
# ---------------------------------------------------------------------------
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
def _atomic_write_text(path: Path, text: str) -> None:
|
|
223
|
-
"""Write ``text`` to ``path`` via tempfile + ``os.replace``."""
|
|
224
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
225
|
-
fd, tmp_name = tempfile.mkstemp(prefix=path.name + ".", suffix=".tmp", dir=str(path.parent))
|
|
226
|
-
tmp = Path(tmp_name)
|
|
227
|
-
try:
|
|
228
|
-
with os.fdopen(fd, "w", encoding="utf-8", newline="") as fh:
|
|
229
|
-
fh.write(text)
|
|
230
|
-
os.replace(tmp, path)
|
|
231
|
-
except BaseException:
|
|
232
|
-
with contextlib.suppress(FileNotFoundError):
|
|
233
|
-
tmp.unlink()
|
|
234
|
-
raise
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
def _append_audit(record: dict[str, Any], *, cache_root: Path | None = None) -> None:
|
|
238
|
-
"""Append ``record`` as one JSON line to quarantine-audit.jsonl."""
|
|
239
|
-
path = audit_path(cache_root=cache_root)
|
|
240
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
241
|
-
line = json.dumps(record, ensure_ascii=False, sort_keys=True)
|
|
242
|
-
with open(path, "a", encoding="utf-8", newline="") as fh:
|
|
243
|
-
fh.write(line + "\n")
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
# ---------------------------------------------------------------------------
|
|
247
|
-
# Source-specific content rendering
|
|
248
|
-
# ---------------------------------------------------------------------------
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def _render_content(source: str, raw: dict[str, Any]) -> str:
|
|
252
|
-
"""Render the source-specific markdown body that the scanner consumes.
|
|
253
|
-
|
|
254
|
-
For ``github-issue``: ``# #<N>: <title>\\n\\n<body>``. The title line
|
|
255
|
-
is included so a hostile title becomes a suspicious heading and is
|
|
256
|
-
wrapped in quarantined fences by the scanner (mirrors the
|
|
257
|
-
Greptile-fixed contract in scripts/triage_cache.py::_render_issue_md).
|
|
258
|
-
"""
|
|
259
|
-
if source == "github-issue":
|
|
260
|
-
number = raw.get("number")
|
|
261
|
-
title = raw.get("title") or ""
|
|
262
|
-
body = raw.get("body") or ""
|
|
263
|
-
if not isinstance(number, int):
|
|
264
|
-
raise CacheError(
|
|
265
|
-
f"invalid github-issue raw payload: 'number' must be int "
|
|
266
|
-
f"(got {type(number).__name__})"
|
|
267
|
-
)
|
|
268
|
-
return f"# #{number}: {title}\n\n{body}"
|
|
269
|
-
raise CacheError(f"unknown source {source!r}: v1 supports {sorted(ALLOWED_SOURCES)!r}")
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
# ---------------------------------------------------------------------------
|
|
273
|
-
# Cache primitives
|
|
274
|
-
# ---------------------------------------------------------------------------
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
@dataclass
|
|
278
|
-
class PutResult:
|
|
279
|
-
source: str
|
|
280
|
-
key: str
|
|
281
|
-
entry_dir: Path
|
|
282
|
-
meta: dict[str, Any]
|
|
283
|
-
scan_result: ScanResult
|
|
284
|
-
content_written: bool
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
@dataclass
|
|
288
|
-
class GetResult:
|
|
289
|
-
source: str
|
|
290
|
-
key: str
|
|
291
|
-
entry_dir: Path
|
|
292
|
-
meta: dict[str, Any]
|
|
293
|
-
content_path: Path | None
|
|
294
|
-
stale: bool
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
def cache_put(
|
|
298
|
-
source: str,
|
|
299
|
-
key: str,
|
|
300
|
-
raw: dict[str, Any],
|
|
301
|
-
*,
|
|
302
|
-
ttl_seconds: int | None = None,
|
|
303
|
-
cache_root: Path | None = None,
|
|
304
|
-
fetched_at: datetime | None = None,
|
|
305
|
-
caps: CacheCaps | None = None,
|
|
306
|
-
) -> PutResult:
|
|
307
|
-
"""Write a cache entry. Always writes raw.json + meta.json; conditionally writes content.md.
|
|
308
|
-
|
|
309
|
-
Pre-write quota enforcement (#947): projects the new total against
|
|
310
|
-
the resolved caps, evicts LRU entries until the put fits, and raises
|
|
311
|
-
:class:`CacheCapBreachedError` if eviction can't free enough (CLI exit-3).
|
|
312
|
-
"""
|
|
313
|
-
_validate_key(source, key)
|
|
314
|
-
fetched = fetched_at or _utc_now()
|
|
315
|
-
ttl = ttl_seconds if ttl_seconds is not None else SOURCE_TTL_SECONDS[source]
|
|
316
|
-
if not isinstance(ttl, int) or ttl < 0:
|
|
317
|
-
raise CacheError(f"ttl_seconds must be a non-negative int (got {ttl!r})")
|
|
318
|
-
expires = fetched + timedelta(seconds=ttl)
|
|
319
|
-
|
|
320
|
-
edir = entry_dir(source, key, cache_root=cache_root)
|
|
321
|
-
|
|
322
|
-
# Project raw.json size pre-write (UTF-8 JSON has no platform variance).
|
|
323
|
-
raw_text = json.dumps(raw, indent=2, sort_keys=True, ensure_ascii=False)
|
|
324
|
-
raw_size = len(raw_text.encode("utf-8"))
|
|
325
|
-
|
|
326
|
-
# Re-put: charge delta only (may be negative when shrinking; cap_breached
|
|
327
|
-
# handles the arithmetic correctly). Protect the existing entry from
|
|
328
|
-
# self-eviction. Flooring to 0 here was a P1 finding -- a shrinking re-put
|
|
329
|
-
# against a tight cap was being rejected as a cap-breach even though the
|
|
330
|
-
# smaller payload would bring the cache *under* the cap.
|
|
331
|
-
existing_size = _existing_entry_size(edir)
|
|
332
|
-
is_new_entry = existing_size is None
|
|
333
|
-
incoming_delta = raw_size if is_new_entry else raw_size - existing_size
|
|
334
|
-
incoming_entries = 1 if is_new_entry else 0
|
|
335
|
-
|
|
336
|
-
cache_root_path = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
|
|
337
|
-
enforce_result = _enforce_caps(
|
|
338
|
-
cache_root_path,
|
|
339
|
-
sources=ALLOWED_SOURCES,
|
|
340
|
-
caps=caps,
|
|
341
|
-
incoming_bytes=incoming_delta,
|
|
342
|
-
incoming_entries=incoming_entries,
|
|
343
|
-
protect_keys=[(source, key)],
|
|
344
|
-
on_evict=_make_evict_audit_callback(
|
|
345
|
-
cache_root=cache_root, trigger="cache:put"
|
|
346
|
-
),
|
|
347
|
-
)
|
|
348
|
-
if enforce_result.would_breach:
|
|
349
|
-
resolved = caps if caps is not None else resolve_caps()
|
|
350
|
-
reason_parts: list[str] = []
|
|
351
|
-
if (
|
|
352
|
-
resolved.bytes_enforced
|
|
353
|
-
and enforce_result.final_usage.total_bytes + incoming_delta > resolved.max_bytes
|
|
354
|
-
):
|
|
355
|
-
reason_parts.append("size_cap")
|
|
356
|
-
if (
|
|
357
|
-
resolved.entries_enforced
|
|
358
|
-
and enforce_result.final_usage.total_entries + incoming_entries
|
|
359
|
-
> resolved.max_entries
|
|
360
|
-
):
|
|
361
|
-
reason_parts.append("entry_cap")
|
|
362
|
-
raise CacheCapBreachedError(
|
|
363
|
-
reason="+".join(reason_parts) or "unknown",
|
|
364
|
-
max_bytes=resolved.max_bytes,
|
|
365
|
-
max_entries=resolved.max_entries,
|
|
366
|
-
current_bytes=enforce_result.final_usage.total_bytes,
|
|
367
|
-
current_entries=enforce_result.final_usage.total_entries,
|
|
368
|
-
incoming_bytes=incoming_delta,
|
|
369
|
-
)
|
|
370
|
-
|
|
371
|
-
edir.mkdir(parents=True, exist_ok=True)
|
|
372
|
-
raw_path = edir / "raw.json"
|
|
373
|
-
_atomic_write_text(raw_path, raw_text)
|
|
374
|
-
raw_size = raw_path.stat().st_size # authoritative for meta.json::size_bytes
|
|
375
|
-
|
|
376
|
-
rendered = _render_content(source, raw)
|
|
377
|
-
scan_result = scan(rendered, scanned_at=_utc_iso(fetched))
|
|
378
|
-
|
|
379
|
-
content_path = edir / "content.md"
|
|
380
|
-
content_written = False
|
|
381
|
-
if scan_result.passed:
|
|
382
|
-
_atomic_write_text(content_path, scan_result.transformed_content)
|
|
383
|
-
content_written = True
|
|
384
|
-
else:
|
|
385
|
-
# On hard-fail, remove any prior content.md so cache:get does not
|
|
386
|
-
# return safe-but-stale content for an entry whose latest fetch
|
|
387
|
-
# contained credentials.
|
|
388
|
-
with contextlib.suppress(FileNotFoundError):
|
|
389
|
-
content_path.unlink()
|
|
390
|
-
|
|
391
|
-
meta = _build_meta(
|
|
392
|
-
source=source,
|
|
393
|
-
key=key,
|
|
394
|
-
fetched_at=fetched,
|
|
395
|
-
ttl_seconds=ttl,
|
|
396
|
-
expires_at=expires,
|
|
397
|
-
scan_result=scan_result,
|
|
398
|
-
size_bytes=raw_size,
|
|
399
|
-
)
|
|
400
|
-
validate_meta(meta)
|
|
401
|
-
_atomic_write_text(
|
|
402
|
-
edir / "meta.json",
|
|
403
|
-
json.dumps(meta, indent=2, sort_keys=True, ensure_ascii=False),
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
_append_audit(
|
|
407
|
-
{
|
|
408
|
-
"event": "cache:put",
|
|
409
|
-
"source": source,
|
|
410
|
-
"key": key,
|
|
411
|
-
"timestamp": _utc_iso(),
|
|
412
|
-
"scan_passed": scan_result.passed,
|
|
413
|
-
"scanner_version": scan_result.scanner_version,
|
|
414
|
-
"flags": [
|
|
415
|
-
{
|
|
416
|
-
"category": f.category,
|
|
417
|
-
"severity": f.severity,
|
|
418
|
-
"detail": f.detail,
|
|
419
|
-
"match_count": f.match_count,
|
|
420
|
-
}
|
|
421
|
-
for f in scan_result.flags
|
|
422
|
-
],
|
|
423
|
-
"content_written": content_written,
|
|
424
|
-
},
|
|
425
|
-
cache_root=cache_root,
|
|
426
|
-
)
|
|
427
|
-
|
|
428
|
-
return PutResult(
|
|
429
|
-
source=source,
|
|
430
|
-
key=key,
|
|
431
|
-
entry_dir=edir,
|
|
432
|
-
meta=meta,
|
|
433
|
-
scan_result=scan_result,
|
|
434
|
-
content_written=content_written,
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
def _build_meta(
|
|
439
|
-
*,
|
|
440
|
-
source: str,
|
|
441
|
-
key: str,
|
|
442
|
-
fetched_at: datetime,
|
|
443
|
-
ttl_seconds: int,
|
|
444
|
-
expires_at: datetime,
|
|
445
|
-
scan_result: ScanResult,
|
|
446
|
-
size_bytes: int,
|
|
447
|
-
) -> dict[str, Any]:
|
|
448
|
-
return {
|
|
449
|
-
"source": source,
|
|
450
|
-
"key": key,
|
|
451
|
-
"fetched_at": _utc_iso(fetched_at),
|
|
452
|
-
"ttl_seconds": ttl_seconds,
|
|
453
|
-
"expires_at": _utc_iso(expires_at),
|
|
454
|
-
"scan_result": {
|
|
455
|
-
"passed": scan_result.passed,
|
|
456
|
-
"scanned_at": scan_result.scanned_at,
|
|
457
|
-
"scanner_version": scan_result.scanner_version,
|
|
458
|
-
"flags": [
|
|
459
|
-
{
|
|
460
|
-
"category": f.category,
|
|
461
|
-
"severity": f.severity,
|
|
462
|
-
"detail": f.detail,
|
|
463
|
-
"match_count": f.match_count,
|
|
464
|
-
}
|
|
465
|
-
for f in scan_result.flags
|
|
466
|
-
],
|
|
467
|
-
},
|
|
468
|
-
"size_bytes": size_bytes,
|
|
469
|
-
"stale": False,
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
def cache_get(
|
|
474
|
-
source: str,
|
|
475
|
-
key: str,
|
|
476
|
-
*,
|
|
477
|
-
cache_root: Path | None = None,
|
|
478
|
-
allow_stale: bool = True,
|
|
479
|
-
) -> GetResult:
|
|
480
|
-
"""Read a cache entry. Raises :class:`CacheNotFoundError` on miss / stale-blocked."""
|
|
481
|
-
edir = entry_dir(source, key, cache_root=cache_root)
|
|
482
|
-
meta_path = edir / "meta.json"
|
|
483
|
-
meta_display = f"{source}/{key}/meta.json"
|
|
484
|
-
if not meta_path.exists():
|
|
485
|
-
raise CacheNotFoundError(
|
|
486
|
-
f"cache miss for source={source!r} key={key!r} "
|
|
487
|
-
f"(expected meta.json at {meta_display})"
|
|
488
|
-
)
|
|
489
|
-
try:
|
|
490
|
-
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
491
|
-
except json.JSONDecodeError as exc:
|
|
492
|
-
raise CacheValidationError(
|
|
493
|
-
f"meta.json at {meta_path} is not valid JSON: {exc}"
|
|
494
|
-
) from exc
|
|
495
|
-
validate_meta(meta)
|
|
496
|
-
|
|
497
|
-
expires = _parse_iso(meta["expires_at"])
|
|
498
|
-
is_stale = _utc_now() > expires
|
|
499
|
-
if is_stale and not allow_stale:
|
|
500
|
-
raise CacheNotFoundError(
|
|
501
|
-
f"cache entry stale for source={source!r} key={key!r}; "
|
|
502
|
-
f"expires_at={meta['expires_at']} (pass --allow-stale to override)"
|
|
503
|
-
)
|
|
504
|
-
|
|
505
|
-
# Mirror the computed staleness onto the in-memory meta dict so callers
|
|
506
|
-
# that inspect GetResult.meta["stale"] see the runtime truth (the on-disk
|
|
507
|
-
# meta.json is always written with stale=False because staleness is a
|
|
508
|
-
# read-time concept; without this the field is misleading on cache hits
|
|
509
|
-
# against TTL-expired entries). #883 Story 2 P2 cleanup.
|
|
510
|
-
meta["stale"] = is_stale
|
|
511
|
-
|
|
512
|
-
# LRU signal (#947): touch meta.json mtime so future eviction passes
|
|
513
|
-
# see this entry as recently-accessed. Single os.utime syscall; no
|
|
514
|
-
# rewrite, no schema validation, no extra disk I/O. Failures are
|
|
515
|
-
# swallowed so a read-only cache tree still serves cache hits.
|
|
516
|
-
_touch_mtime(meta_path)
|
|
517
|
-
|
|
518
|
-
content_path = edir / "content.md"
|
|
519
|
-
return GetResult(
|
|
520
|
-
source=source,
|
|
521
|
-
key=key,
|
|
522
|
-
entry_dir=edir,
|
|
523
|
-
meta=meta,
|
|
524
|
-
content_path=content_path if content_path.exists() else None,
|
|
525
|
-
stale=is_stale,
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
def cache_invalidate(
|
|
530
|
-
source: str,
|
|
531
|
-
key: str,
|
|
532
|
-
*,
|
|
533
|
-
reason: str | None = None,
|
|
534
|
-
cache_root: Path | None = None,
|
|
535
|
-
) -> bool:
|
|
536
|
-
"""Delete the entry directory and append an invalidate audit record. Idempotent."""
|
|
537
|
-
_validate_key(source, key)
|
|
538
|
-
edir = entry_dir(source, key, cache_root=cache_root)
|
|
539
|
-
existed = edir.exists()
|
|
540
|
-
if existed:
|
|
541
|
-
shutil.rmtree(edir)
|
|
542
|
-
_append_audit(
|
|
543
|
-
{
|
|
544
|
-
"event": "cache:invalidate",
|
|
545
|
-
"source": source,
|
|
546
|
-
"key": key,
|
|
547
|
-
"timestamp": _utc_iso(),
|
|
548
|
-
"reason": reason or "",
|
|
549
|
-
"existed": existed,
|
|
550
|
-
},
|
|
551
|
-
cache_root=cache_root,
|
|
552
|
-
)
|
|
553
|
-
return existed
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
# ---------------------------------------------------------------------------
|
|
557
|
-
# Idempotency check (for fetch-all)
|
|
558
|
-
# ---------------------------------------------------------------------------
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
def _is_fresh(meta_path: Path) -> bool:
|
|
562
|
-
"""Return True iff meta_path exists, parses, and expires_at is in the future."""
|
|
563
|
-
if not meta_path.exists():
|
|
564
|
-
return False
|
|
565
|
-
try:
|
|
566
|
-
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
567
|
-
validate_meta(meta)
|
|
568
|
-
except (json.JSONDecodeError, CacheValidationError):
|
|
569
|
-
return False
|
|
570
|
-
try:
|
|
571
|
-
expires = _parse_iso(meta["expires_at"])
|
|
572
|
-
except (ValueError, KeyError):
|
|
573
|
-
return False
|
|
574
|
-
return _utc_now() <= expires
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
# ---------------------------------------------------------------------------
|
|
578
|
-
# fetch-all (delegates loop body to _cache_fetch.run_fetch_all)
|
|
579
|
-
# ---------------------------------------------------------------------------
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
def cache_fetch_all(
|
|
583
|
-
*,
|
|
584
|
-
source: str,
|
|
585
|
-
repo: str,
|
|
586
|
-
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
587
|
-
delay_ms: int = DEFAULT_DELAY_MS,
|
|
588
|
-
ttl_seconds: int | None = None,
|
|
589
|
-
state: str = "open",
|
|
590
|
-
limit: int = 1000,
|
|
591
|
-
labels: tuple[str, ...] = (),
|
|
592
|
-
author: str | None = None,
|
|
593
|
-
cache_root: Path | None = None,
|
|
594
|
-
) -> FetchAllReport:
|
|
595
|
-
"""Populate the cache for issues in ``repo``. See :mod:`_cache_fetch`.
|
|
596
|
-
|
|
597
|
-
``labels`` (#1033) and ``author`` (#1055) scope the REST issue
|
|
598
|
-
enumeration so an operator can ingest a subset of the backlog rather
|
|
599
|
-
than the whole open queue. Both default to the unfiltered case
|
|
600
|
-
(empty labels / no author); when both are supplied they compose with
|
|
601
|
-
AND semantics (label-matching issues created by the given login).
|
|
602
|
-
"""
|
|
603
|
-
if source != "github-issue":
|
|
604
|
-
raise CacheError(
|
|
605
|
-
f"cache:fetch-all source={source!r} not supported in v1 "
|
|
606
|
-
"(supports: github-issue only; other sources deferred to v2)"
|
|
607
|
-
)
|
|
608
|
-
if not _REPO_RE.match(repo):
|
|
609
|
-
raise CacheError(
|
|
610
|
-
f"invalid --repo {repo!r}: expected 'owner/repo' "
|
|
611
|
-
"(alphanumerics, '.', '_', '-' only)"
|
|
612
|
-
)
|
|
613
|
-
if batch_size < 1:
|
|
614
|
-
raise CacheError(f"--batch-size must be >= 1 (got {batch_size!r})")
|
|
615
|
-
if delay_ms < 0:
|
|
616
|
-
raise CacheError(f"--delay-ms must be >= 0 (got {delay_ms!r})")
|
|
617
|
-
|
|
618
|
-
def _entry_dir_for(key: str) -> Path:
|
|
619
|
-
return entry_dir(source, key, cache_root=cache_root)
|
|
620
|
-
|
|
621
|
-
def _do_put(key: str, raw: dict[str, Any]) -> None:
|
|
622
|
-
cache_put(source, key, raw, ttl_seconds=ttl_seconds, cache_root=cache_root)
|
|
623
|
-
|
|
624
|
-
return run_fetch_all(
|
|
625
|
-
repo=repo,
|
|
626
|
-
is_fresh=_is_fresh,
|
|
627
|
-
entry_dir_for=_entry_dir_for,
|
|
628
|
-
do_put=_do_put,
|
|
629
|
-
batch_size=batch_size,
|
|
630
|
-
delay_ms=delay_ms,
|
|
631
|
-
state=state,
|
|
632
|
-
limit=limit,
|
|
633
|
-
labels=labels,
|
|
634
|
-
author=author,
|
|
635
|
-
)
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
# refresh-closed (#1476): ``cache_refresh_closed`` is re-exported from
|
|
639
|
-
# :mod:`_cache_refresh` (imported above).
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
# ---------------------------------------------------------------------------
|
|
643
|
-
# prune
|
|
644
|
-
# ---------------------------------------------------------------------------
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
def cache_prune(
|
|
648
|
-
*,
|
|
649
|
-
older_than_days: int = DEFAULT_PRUNE_OLDER_THAN_DAYS,
|
|
650
|
-
source: str | None = None,
|
|
651
|
-
dry_run: bool = False,
|
|
652
|
-
cache_root: Path | None = None,
|
|
653
|
-
) -> list[Path]:
|
|
654
|
-
"""Remove entries whose ``expires_at`` is older than ``older_than_days``."""
|
|
655
|
-
if older_than_days < 0:
|
|
656
|
-
raise CacheError(f"--older-than-days must be >= 0 (got {older_than_days!r})")
|
|
657
|
-
root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
|
|
658
|
-
if not root.exists():
|
|
659
|
-
return []
|
|
660
|
-
|
|
661
|
-
cutoff = _utc_now() - timedelta(days=older_than_days)
|
|
662
|
-
removed: list[Path] = []
|
|
663
|
-
sources = [source] if source else list(ALLOWED_SOURCES)
|
|
664
|
-
for src in sources:
|
|
665
|
-
src_root = Path(root) / src
|
|
666
|
-
if not src_root.exists():
|
|
667
|
-
continue
|
|
668
|
-
# Materialize the iterator before mutating the tree: shutil.rmtree()
|
|
669
|
-
# below removes entry directories while rglob() lazily walks them on
|
|
670
|
-
# POSIX, raising FileNotFoundError on the next scandir() (#883). Tests
|
|
671
|
-
# passed on Windows due to a different walk order; CI on Linux caught
|
|
672
|
-
# it. list(...) snapshots the matches up-front so deletions are safe.
|
|
673
|
-
for meta_path in list(src_root.rglob("meta.json")):
|
|
674
|
-
edir = meta_path.parent
|
|
675
|
-
try:
|
|
676
|
-
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
677
|
-
expires = _parse_iso(meta["expires_at"])
|
|
678
|
-
except (json.JSONDecodeError, KeyError, ValueError):
|
|
679
|
-
# Corrupt entries are pruned -- they can't be served by
|
|
680
|
-
# cache:get anyway, and leaving them masks the next
|
|
681
|
-
# re-populate behind a stale meta.json shadow.
|
|
682
|
-
expires = cutoff - timedelta(days=1)
|
|
683
|
-
meta = {}
|
|
684
|
-
if expires >= cutoff:
|
|
685
|
-
continue
|
|
686
|
-
if not dry_run:
|
|
687
|
-
shutil.rmtree(edir)
|
|
688
|
-
_append_audit(
|
|
689
|
-
{
|
|
690
|
-
"event": "cache:prune-entry",
|
|
691
|
-
"source": src,
|
|
692
|
-
"key": _meta_key_or_relpath(meta_path, src_root),
|
|
693
|
-
"timestamp": _utc_iso(),
|
|
694
|
-
"expires_at": (
|
|
695
|
-
meta.get("expires_at", "unknown")
|
|
696
|
-
if isinstance(meta, dict)
|
|
697
|
-
else "unknown"
|
|
698
|
-
),
|
|
699
|
-
},
|
|
700
|
-
cache_root=cache_root,
|
|
701
|
-
)
|
|
702
|
-
removed.append(edir)
|
|
703
|
-
return removed
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
def _meta_key_or_relpath(meta_path: Path, src_root: Path) -> str:
|
|
707
|
-
try:
|
|
708
|
-
return str(meta_path.parent.relative_to(src_root)).replace(os.sep, "/")
|
|
709
|
-
except ValueError:
|
|
710
|
-
return str(meta_path.parent)
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
# ---------------------------------------------------------------------------
|
|
714
|
-
# Quota helpers (#947) -- size cap, entry cap, LRU eviction integration
|
|
715
|
-
# ---------------------------------------------------------------------------
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
def _existing_entry_size(edir: Path) -> int | None:
|
|
719
|
-
"""Return ``meta.json::size_bytes`` for an existing entry, or ``None`` if absent.
|
|
720
|
-
|
|
721
|
-
Used by :func:`cache_put` to compute the byte delta on a re-put so
|
|
722
|
-
cap projection does not double-count the replaced entry. Corrupt /
|
|
723
|
-
parse-failed meta.json returns 0 (treat re-put as adding full size).
|
|
724
|
-
"""
|
|
725
|
-
meta_path = edir / "meta.json"
|
|
726
|
-
if not meta_path.exists():
|
|
727
|
-
return None
|
|
728
|
-
try:
|
|
729
|
-
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
730
|
-
except (OSError, json.JSONDecodeError):
|
|
731
|
-
return 0
|
|
732
|
-
size = meta.get("size_bytes") if isinstance(meta, dict) else None
|
|
733
|
-
if not isinstance(size, int) or size < 0:
|
|
734
|
-
return 0
|
|
735
|
-
return size
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
def _make_evict_audit_callback(
|
|
739
|
-
*,
|
|
740
|
-
cache_root: Path | None,
|
|
741
|
-
trigger: str,
|
|
742
|
-
) -> Any:
|
|
743
|
-
"""Build the ``on_evict`` callback that appends ``cache:evict`` records.
|
|
744
|
-
|
|
745
|
-
One audit record per eviction; operators can grep for the
|
|
746
|
-
``"event":"cache:evict"`` line to trace why an entry vanished. The
|
|
747
|
-
``reason`` field is the precomputed breach descriptor passed in by
|
|
748
|
-
``evict_lru`` -- it reflects the cap actually exceeded at the moment
|
|
749
|
-
of *this* eviction (not just the configured caps), so an operator
|
|
750
|
-
grepping ``"reason":"entry_cap"`` gets only the entry-cap-driven
|
|
751
|
-
evictions even when both caps are configured. P1 fix from the iter-1
|
|
752
|
-
review (the prior callback derived reason from caps alone, tagging
|
|
753
|
-
every record ``size_cap+entry_cap`` under the defaults).
|
|
754
|
-
"""
|
|
755
|
-
|
|
756
|
-
def _on_evict(victim: EntryUsage, reason: str, _caps: CacheCaps) -> None:
|
|
757
|
-
last_accessed_iso = (
|
|
758
|
-
datetime.fromtimestamp(victim.last_accessed, tz=UTC).strftime(
|
|
759
|
-
"%Y-%m-%dT%H:%M:%SZ"
|
|
760
|
-
)
|
|
761
|
-
if victim.last_accessed > 0
|
|
762
|
-
else "unknown"
|
|
763
|
-
)
|
|
764
|
-
_append_audit(
|
|
765
|
-
{
|
|
766
|
-
"event": "cache:evict",
|
|
767
|
-
"source": victim.source,
|
|
768
|
-
"key": victim.key,
|
|
769
|
-
"timestamp": _utc_iso(),
|
|
770
|
-
"reason": reason,
|
|
771
|
-
"trigger": trigger,
|
|
772
|
-
"freed_bytes": victim.size_bytes,
|
|
773
|
-
"last_accessed_at": last_accessed_iso,
|
|
774
|
-
},
|
|
775
|
-
cache_root=cache_root,
|
|
776
|
-
)
|
|
777
|
-
|
|
778
|
-
return _on_evict
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
def _touch_mtime(path: Path) -> None:
|
|
782
|
-
"""Update ``path``'s mtime to now (LRU signal). Single ``os.utime`` syscall.
|
|
783
|
-
|
|
784
|
-
Failures are swallowed: a read-only meta.json on a locked-down filesystem
|
|
785
|
-
still serves cache hits. Stale mtime degrades gracefully -- old mtime just
|
|
786
|
-
makes the entry a stronger eviction candidate next round.
|
|
787
|
-
"""
|
|
788
|
-
with contextlib.suppress(OSError):
|
|
789
|
-
os.utime(path, None)
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
def cache_prune_to_cap(
|
|
793
|
-
*,
|
|
794
|
-
cache_root: Path | None = None,
|
|
795
|
-
caps: CacheCaps | None = None,
|
|
796
|
-
dry_run: bool = False,
|
|
797
|
-
) -> list[EntryUsage]:
|
|
798
|
-
"""Drain LRU entries until the cache is under the resolved caps.
|
|
799
|
-
|
|
800
|
-
Idempotent: a second invocation against an already-under-cap tree
|
|
801
|
-
returns ``[]``. ``dry_run=True`` evaluates the eviction set without
|
|
802
|
-
removing anything (no audit records are written either).
|
|
803
|
-
"""
|
|
804
|
-
root = cache_root if cache_root is not None else DEFAULT_CACHE_ROOT
|
|
805
|
-
resolved = caps if caps is not None else resolve_caps()
|
|
806
|
-
if not resolved.any_enforced:
|
|
807
|
-
return []
|
|
808
|
-
if dry_run:
|
|
809
|
-
return list(
|
|
810
|
-
predict_eviction_set(root, sources=ALLOWED_SOURCES, caps=resolved)
|
|
811
|
-
)
|
|
812
|
-
enforce_result = _enforce_caps(
|
|
813
|
-
root,
|
|
814
|
-
sources=ALLOWED_SOURCES,
|
|
815
|
-
caps=resolved,
|
|
816
|
-
on_evict=_make_evict_audit_callback(
|
|
817
|
-
cache_root=cache_root, trigger="cache:prune-to-cap"
|
|
818
|
-
),
|
|
819
|
-
)
|
|
820
|
-
return list(enforce_result.evicted)
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
# ---------------------------------------------------------------------------
|
|
824
|
-
# CLI
|
|
825
|
-
# ---------------------------------------------------------------------------
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
def _build_parser() -> argparse.ArgumentParser:
|
|
829
|
-
parser = argparse.ArgumentParser(
|
|
830
|
-
prog="cache",
|
|
831
|
-
description="Unified content cache + quarantine layer (#883 Story 2).",
|
|
832
|
-
)
|
|
833
|
-
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
834
|
-
|
|
835
|
-
p_put = sub.add_parser("put", help="Cache a (source, key) entry from a raw JSON file.")
|
|
836
|
-
p_put.add_argument("source", choices=list(ALLOWED_SOURCES))
|
|
837
|
-
p_put.add_argument("key")
|
|
838
|
-
p_put.add_argument("--raw-file", required=True, help="Path to the upstream JSON payload.")
|
|
839
|
-
p_put.add_argument("--ttl-seconds", type=int, default=None, help="Override the source TTL.")
|
|
840
|
-
|
|
841
|
-
p_get = sub.add_parser("get", help="Print the cache entry's content.md path + meta.json.")
|
|
842
|
-
p_get.add_argument("source", choices=list(ALLOWED_SOURCES))
|
|
843
|
-
p_get.add_argument("key")
|
|
844
|
-
grp = p_get.add_mutually_exclusive_group()
|
|
845
|
-
grp.add_argument("--allow-stale", action="store_true", help="Default. Stale entries returned.")
|
|
846
|
-
grp.add_argument("--no-stale", action="store_true", help="Stale entries treated as miss.")
|
|
847
|
-
|
|
848
|
-
p_inv = sub.add_parser("invalidate", help="Delete an entry directory + append audit.")
|
|
849
|
-
p_inv.add_argument("source", choices=list(ALLOWED_SOURCES))
|
|
850
|
-
p_inv.add_argument("key")
|
|
851
|
-
p_inv.add_argument("--reason", default=None, help="Audit-log reason text.")
|
|
852
|
-
|
|
853
|
-
p_fa = sub.add_parser("fetch-all", help="Bulk-populate the cache for a repo.")
|
|
854
|
-
p_fa.add_argument("--source", required=True, choices=["github-issue"])
|
|
855
|
-
p_fa.add_argument("--repo", required=True, help="owner/repo slug.")
|
|
856
|
-
p_fa.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE)
|
|
857
|
-
p_fa.add_argument("--delay-ms", type=int, default=DEFAULT_DELAY_MS)
|
|
858
|
-
p_fa.add_argument("--ttl-seconds", type=int, default=None)
|
|
859
|
-
p_fa.add_argument("--state", default="open")
|
|
860
|
-
p_fa.add_argument("--limit", type=int, default=1000)
|
|
861
|
-
p_fa.add_argument(
|
|
862
|
-
"--label",
|
|
863
|
-
action="append",
|
|
864
|
-
default=None,
|
|
865
|
-
dest="labels",
|
|
866
|
-
metavar="NAME[,NAME...]",
|
|
867
|
-
help=(
|
|
868
|
-
"Scope ingestion to issues carrying the given label(s) (#1033). "
|
|
869
|
-
"Repeatable and comma-separated (--label a,b --label c). "
|
|
870
|
-
"Composes with --author via AND."
|
|
871
|
-
),
|
|
872
|
-
)
|
|
873
|
-
p_fa.add_argument(
|
|
874
|
-
"--author",
|
|
875
|
-
default=None,
|
|
876
|
-
metavar="LOGIN",
|
|
877
|
-
help=(
|
|
878
|
-
"Scope ingestion to issues created by LOGIN (#1055). Maps to "
|
|
879
|
-
"the REST 'creator' param. Composes with --label via AND."
|
|
880
|
-
),
|
|
881
|
-
)
|
|
882
|
-
p_fa.add_argument(
|
|
883
|
-
"--refresh-closed",
|
|
884
|
-
action="store_true",
|
|
885
|
-
help=(
|
|
886
|
-
"After populating, revisit cached-open entries that are no "
|
|
887
|
-
"longer in the open enumeration and rewrite any that closed "
|
|
888
|
-
"upstream to state=closed (#1476). Adds one single-issue REST "
|
|
889
|
-
"read per closed-upstream candidate."
|
|
890
|
-
),
|
|
891
|
-
)
|
|
892
|
-
|
|
893
|
-
p_pr = sub.add_parser("prune", help="Drop entries older than the threshold.")
|
|
894
|
-
p_pr.add_argument("--older-than-days", type=int, default=DEFAULT_PRUNE_OLDER_THAN_DAYS)
|
|
895
|
-
p_pr.add_argument("--source", default=None, choices=list(ALLOWED_SOURCES))
|
|
896
|
-
p_pr.add_argument("--dry-run", action="store_true")
|
|
897
|
-
p_pr.add_argument(
|
|
898
|
-
"--to-cap",
|
|
899
|
-
action="store_true",
|
|
900
|
-
help=(
|
|
901
|
-
"LRU-evict entries until the cache is under the configured "
|
|
902
|
-
"size + entry caps (DEFT_CACHE_MAX_BYTES, DEFT_CACHE_MAX_ENTRIES). "
|
|
903
|
-
"Mutually exclusive with --older-than-days semantics; ignores "
|
|
904
|
-
"the threshold and uses LRU recency instead."
|
|
905
|
-
),
|
|
906
|
-
)
|
|
907
|
-
|
|
908
|
-
return parser
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
def main(argv: list[str] | None = None) -> int:
|
|
912
|
-
"""CLI entry point. Per-command exit codes documented in the module docstring."""
|
|
913
|
-
parser = _build_parser()
|
|
914
|
-
try:
|
|
915
|
-
args = parser.parse_args(argv)
|
|
916
|
-
except SystemExit as exc:
|
|
917
|
-
return int(exc.code) if isinstance(exc.code, int) else 2
|
|
918
|
-
|
|
919
|
-
try:
|
|
920
|
-
return _DISPATCH[args.cmd](args)
|
|
921
|
-
except CacheCapBreachedError as exc:
|
|
922
|
-
# Cap breached even after eviction (#947). Distinct exit-3 so
|
|
923
|
-
# operators / orchestrators can branch on "impossible to honor
|
|
924
|
-
# the cap" vs the schema (exit 2) and generic (exit 1) failures.
|
|
925
|
-
print(f"cache: cap breached: {exc}", file=sys.stderr)
|
|
926
|
-
return 3
|
|
927
|
-
except (CacheError, CacheFetchError) as exc:
|
|
928
|
-
# CacheFetchError is a sibling of CacheError (extends RuntimeError
|
|
929
|
-
# directly to avoid a circular import in _cache_fetch). It surfaces
|
|
930
|
-
# from the REST list-enumeration phase before the local cache:put
|
|
931
|
-
# loop's try/except wraps anything; catching it here gives a
|
|
932
|
-
# clean ``cache: error: ...`` exit instead of a raw traceback.
|
|
933
|
-
print(f"cache: error: {exc}", file=sys.stderr)
|
|
934
|
-
return 1
|
|
935
|
-
except CacheValidationError as exc:
|
|
936
|
-
print(f"cache: schema error: {exc}", file=sys.stderr)
|
|
937
|
-
return 2
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
def _cmd_put(args: argparse.Namespace) -> int:
|
|
941
|
-
raw_path = Path(args.raw_file)
|
|
942
|
-
if not raw_path.exists():
|
|
943
|
-
raise CacheError(f"--raw-file not found: {raw_path}")
|
|
944
|
-
try:
|
|
945
|
-
raw = json.loads(raw_path.read_text(encoding="utf-8"))
|
|
946
|
-
except json.JSONDecodeError as exc:
|
|
947
|
-
raise CacheError(f"--raw-file is not valid JSON: {exc}") from exc
|
|
948
|
-
if not isinstance(raw, dict):
|
|
949
|
-
raise CacheError(f"--raw-file must be a JSON object (got {type(raw).__name__})")
|
|
950
|
-
result = cache_put(args.source, args.key, raw, ttl_seconds=args.ttl_seconds)
|
|
951
|
-
sys.stdout.write(
|
|
952
|
-
f"cache:put source={result.source} key={result.key} "
|
|
953
|
-
f"scan_passed={result.scan_result.passed} "
|
|
954
|
-
f"flags={[f.category for f in result.scan_result.flags]} "
|
|
955
|
-
f"content_written={result.content_written} dir={result.entry_dir}\n"
|
|
956
|
-
)
|
|
957
|
-
return 0 if result.scan_result.passed else 2
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
def _cmd_get(args: argparse.Namespace) -> int:
|
|
961
|
-
allow_stale = not args.no_stale
|
|
962
|
-
try:
|
|
963
|
-
result = cache_get(args.source, args.key, allow_stale=allow_stale)
|
|
964
|
-
except CacheNotFoundError as exc:
|
|
965
|
-
print(f"cache:get miss: {exc}", file=sys.stderr)
|
|
966
|
-
return 1
|
|
967
|
-
payload = {
|
|
968
|
-
"source": result.source,
|
|
969
|
-
"key": result.key,
|
|
970
|
-
"entry_dir": str(result.entry_dir),
|
|
971
|
-
"content_path": str(result.content_path) if result.content_path else None,
|
|
972
|
-
"stale": result.stale,
|
|
973
|
-
"meta": result.meta,
|
|
974
|
-
}
|
|
975
|
-
sys.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
|
|
976
|
-
return 0
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
def _cmd_invalidate(args: argparse.Namespace) -> int:
|
|
980
|
-
existed = cache_invalidate(args.source, args.key, reason=args.reason)
|
|
981
|
-
sys.stdout.write(
|
|
982
|
-
f"cache:invalidate source={args.source} key={args.key} existed={existed}\n"
|
|
983
|
-
)
|
|
984
|
-
return 0
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
def _normalise_label_filter(raw: list[str] | None) -> tuple[str, ...]:
|
|
988
|
-
"""Flatten repeated + comma-separated ``--label`` values into a tuple.
|
|
989
|
-
|
|
990
|
-
``argparse(action="append")`` yields a list with one entry per flag
|
|
991
|
-
occurrence; each entry may itself be comma-separated. This mirrors
|
|
992
|
-
the gh CLI multi-label convention and the scm.py ``--rest issue
|
|
993
|
-
list`` label parsing so the two surfaces stay consistent (#1033).
|
|
994
|
-
"""
|
|
995
|
-
if not raw:
|
|
996
|
-
return ()
|
|
997
|
-
return tuple(
|
|
998
|
-
item.strip()
|
|
999
|
-
for value in raw
|
|
1000
|
-
for item in value.split(",")
|
|
1001
|
-
if item.strip()
|
|
1002
|
-
)
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
def _cmd_fetch_all(args: argparse.Namespace) -> int:
|
|
1006
|
-
labels = _normalise_label_filter(getattr(args, "labels", None))
|
|
1007
|
-
report = cache_fetch_all(
|
|
1008
|
-
source=args.source,
|
|
1009
|
-
repo=args.repo,
|
|
1010
|
-
batch_size=args.batch_size,
|
|
1011
|
-
delay_ms=args.delay_ms,
|
|
1012
|
-
ttl_seconds=args.ttl_seconds,
|
|
1013
|
-
state=args.state,
|
|
1014
|
-
limit=args.limit,
|
|
1015
|
-
labels=labels,
|
|
1016
|
-
author=args.author,
|
|
1017
|
-
)
|
|
1018
|
-
sys.stdout.write(report.to_json() + "\n")
|
|
1019
|
-
rc = 0 if report.failed == 0 else 1
|
|
1020
|
-
# #1476: opt-in state reconciliation so a closed-upstream issue whose
|
|
1021
|
-
# cached entry is still TTL-fresh is rewritten to state=closed and
|
|
1022
|
-
# stops surfacing in triage:queue.
|
|
1023
|
-
if getattr(args, "refresh_closed", False):
|
|
1024
|
-
refresh = cache_refresh_closed(
|
|
1025
|
-
source=args.source,
|
|
1026
|
-
repo=args.repo,
|
|
1027
|
-
ttl_seconds=args.ttl_seconds,
|
|
1028
|
-
delay_ms=args.delay_ms,
|
|
1029
|
-
limit=args.limit,
|
|
1030
|
-
)
|
|
1031
|
-
sys.stdout.write(refresh.to_json() + "\n")
|
|
1032
|
-
if refresh.refresh_failed:
|
|
1033
|
-
rc = 1
|
|
1034
|
-
return rc
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
def _cmd_prune(args: argparse.Namespace) -> int:
|
|
1038
|
-
if args.to_cap:
|
|
1039
|
-
evicted = cache_prune_to_cap(dry_run=args.dry_run)
|
|
1040
|
-
caps = resolve_caps()
|
|
1041
|
-
payload = {
|
|
1042
|
-
"mode": "to-cap",
|
|
1043
|
-
"max_bytes": caps.max_bytes,
|
|
1044
|
-
"max_entries": caps.max_entries,
|
|
1045
|
-
"dry_run": args.dry_run,
|
|
1046
|
-
"evicted_count": len(evicted),
|
|
1047
|
-
"evicted_keys": [f"{e.source}/{e.key}" for e in evicted],
|
|
1048
|
-
"freed_bytes": sum(e.size_bytes for e in evicted),
|
|
1049
|
-
}
|
|
1050
|
-
sys.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
|
|
1051
|
-
return 0
|
|
1052
|
-
removed = cache_prune(
|
|
1053
|
-
older_than_days=args.older_than_days,
|
|
1054
|
-
source=args.source,
|
|
1055
|
-
dry_run=args.dry_run,
|
|
1056
|
-
)
|
|
1057
|
-
payload = {
|
|
1058
|
-
"older_than_days": args.older_than_days,
|
|
1059
|
-
"source": args.source or "all",
|
|
1060
|
-
"dry_run": args.dry_run,
|
|
1061
|
-
"removed_count": len(removed),
|
|
1062
|
-
"removed_paths": [str(p) for p in removed],
|
|
1063
|
-
}
|
|
1064
|
-
sys.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
|
|
1065
|
-
return 0
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
_DISPATCH = {
|
|
1069
|
-
"put": _cmd_put,
|
|
1070
|
-
"get": _cmd_get,
|
|
1071
|
-
"invalidate": _cmd_invalidate,
|
|
1072
|
-
"fetch-all": _cmd_fetch_all,
|
|
1073
|
-
"prune": _cmd_prune,
|
|
1074
|
-
}
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
if __name__ == "__main__":
|
|
1078
|
-
raise SystemExit(main())
|