agentblaster 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentblaster/__init__.py +3 -0
- agentblaster/adapters.py +1435 -0
- agentblaster/agent_profiles.py +420 -0
- agentblaster/audit.py +27 -0
- agentblaster/benchmark_kit.py +356 -0
- agentblaster/bundle.py +692 -0
- agentblaster/campaign.py +1031 -0
- agentblaster/campaign_preflight.py +647 -0
- agentblaster/capabilities.py +270 -0
- agentblaster/claim_readiness.py +3948 -0
- agentblaster/cleanup.py +226 -0
- agentblaster/cli.py +4202 -0
- agentblaster/compare.py +423 -0
- agentblaster/config.py +62 -0
- agentblaster/constants.py +8 -0
- agentblaster/contract_check.py +919 -0
- agentblaster/costs.py +74 -0
- agentblaster/dashboard.py +5974 -0
- agentblaster/engine_advisory.py +1045 -0
- agentblaster/engine_onboarding.py +224 -0
- agentblaster/engine_targets.py +545 -0
- agentblaster/environment.py +284 -0
- agentblaster/errors.py +21 -0
- agentblaster/evidence.py +188 -0
- agentblaster/evidence_index.py +1865 -0
- agentblaster/experiment.py +200 -0
- agentblaster/exports.py +158 -0
- agentblaster/failures.py +70 -0
- agentblaster/fixtures.py +775 -0
- agentblaster/harness.py +1254 -0
- agentblaster/implementation_status.py +719 -0
- agentblaster/integrity.py +161 -0
- agentblaster/launch_recipes.py +295 -0
- agentblaster/lcp.py +107 -0
- agentblaster/matrix.py +101 -0
- agentblaster/matrix_gate.py +565 -0
- agentblaster/matrix_pressure.py +187 -0
- agentblaster/matrix_saturation.py +601 -0
- agentblaster/mcp.py +187 -0
- agentblaster/metric_coverage.py +552 -0
- agentblaster/mock_provider.py +485 -0
- agentblaster/model_catalog.py +153 -0
- agentblaster/models.py +531 -0
- agentblaster/observability.py +110 -0
- agentblaster/planning.py +199 -0
- agentblaster/policy.py +635 -0
- agentblaster/presets.py +219 -0
- agentblaster/prompt_footprint.py +245 -0
- agentblaster/protocol_repair.py +431 -0
- agentblaster/provider_audit.py +210 -0
- agentblaster/publication_brief.py +893 -0
- agentblaster/quality.py +1142 -0
- agentblaster/rate_limits.py +74 -0
- agentblaster/readiness.py +241 -0
- agentblaster/redaction.py +58 -0
- agentblaster/redaction_scan.py +247 -0
- agentblaster/release.py +440 -0
- agentblaster/release_qualification.py +2248 -0
- agentblaster/remote_onboarding.py +308 -0
- agentblaster/reports.py +2245 -0
- agentblaster/runner.py +1677 -0
- agentblaster/schema_registry.py +1151 -0
- agentblaster/secrets.py +274 -0
- agentblaster/security_posture.py +492 -0
- agentblaster/skills.py +67 -0
- agentblaster/stress_matrix.py +113 -0
- agentblaster/suite_audit.py +259 -0
- agentblaster/suite_calibration.py +171 -0
- agentblaster/suites.py +805 -0
- agentblaster/telemetry.py +947 -0
- agentblaster/telemetry_audit.py +300 -0
- agentblaster/toolsim.py +193 -0
- agentblaster/workflow_readiness.py +570 -0
- agentblaster/workflow_surfaces.py +292 -0
- agentblaster-0.1.0.dist-info/METADATA +250 -0
- agentblaster-0.1.0.dist-info/RECORD +78 -0
- agentblaster-0.1.0.dist-info/WHEEL +4 -0
- agentblaster-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from threading import Lock
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def request_interval_seconds(rate_limits: dict[str, Any]) -> float | None:
|
|
9
|
+
intervals: list[float] = []
|
|
10
|
+
requests_per_second = _number(_first_present(rate_limits.get("requests_per_second"), rate_limits.get("rps")))
|
|
11
|
+
requests_per_minute = _number(
|
|
12
|
+
_first_present(
|
|
13
|
+
rate_limits.get("requests_per_minute"),
|
|
14
|
+
rate_limits.get("rpm"),
|
|
15
|
+
rate_limits.get("max_requests_per_minute"),
|
|
16
|
+
)
|
|
17
|
+
)
|
|
18
|
+
if requests_per_second and requests_per_second > 0:
|
|
19
|
+
intervals.append(1 / requests_per_second)
|
|
20
|
+
if requests_per_minute and requests_per_minute > 0:
|
|
21
|
+
intervals.append(60 / requests_per_minute)
|
|
22
|
+
return max(intervals) if intervals else None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def rate_limit_max_concurrency(rate_limits: dict[str, Any]) -> int | None:
|
|
26
|
+
value = _number(_first_present(rate_limits.get("max_concurrency"), rate_limits.get("concurrency")))
|
|
27
|
+
if value is None or value <= 0:
|
|
28
|
+
return None
|
|
29
|
+
return int(value)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RateLimitPacer:
|
|
33
|
+
"""Thread-safe request pacer for provider-level request rate limits."""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
rate_limits: dict[str, Any],
|
|
38
|
+
*,
|
|
39
|
+
sleep_fn: Callable[[float], None] = time.sleep,
|
|
40
|
+
monotonic_fn: Callable[[], float] = time.monotonic,
|
|
41
|
+
) -> None:
|
|
42
|
+
self.interval_seconds = request_interval_seconds(rate_limits)
|
|
43
|
+
self._sleep = sleep_fn
|
|
44
|
+
self._monotonic = monotonic_fn
|
|
45
|
+
self._lock = Lock()
|
|
46
|
+
self._next_allowed_at = 0.0
|
|
47
|
+
|
|
48
|
+
def wait(self) -> float:
|
|
49
|
+
if self.interval_seconds is None:
|
|
50
|
+
return 0.0
|
|
51
|
+
with self._lock:
|
|
52
|
+
now = self._monotonic()
|
|
53
|
+
wait_seconds = max(self._next_allowed_at - now, 0.0)
|
|
54
|
+
scheduled_at = max(now, self._next_allowed_at)
|
|
55
|
+
self._next_allowed_at = scheduled_at + self.interval_seconds
|
|
56
|
+
if wait_seconds > 0:
|
|
57
|
+
self._sleep(wait_seconds)
|
|
58
|
+
return round(wait_seconds * 1000, 3)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _number(value: Any) -> float | None:
|
|
62
|
+
if value is None:
|
|
63
|
+
return None
|
|
64
|
+
try:
|
|
65
|
+
return float(value)
|
|
66
|
+
except (TypeError, ValueError):
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _first_present(*values):
|
|
71
|
+
for value in values:
|
|
72
|
+
if value is not None:
|
|
73
|
+
return value
|
|
74
|
+
return None
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from agentblaster.capabilities import check_suite_compatibility
|
|
8
|
+
from agentblaster.contract_check import provider_contract_plan
|
|
9
|
+
from agentblaster.metric_coverage import metric_coverage_for_provider
|
|
10
|
+
from agentblaster.models import ProviderConfig, SuiteDefinition
|
|
11
|
+
from agentblaster.policy import SecurityPolicy
|
|
12
|
+
from agentblaster.provider_audit import audit_providers
|
|
13
|
+
|
|
14
|
+
READINESS_SCHEMA_VERSION = "agentblaster.benchmark-readiness.v1"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def build_readiness_dossier(
|
|
18
|
+
*,
|
|
19
|
+
provider: ProviderConfig,
|
|
20
|
+
suite: SuiteDefinition,
|
|
21
|
+
policy: SecurityPolicy,
|
|
22
|
+
model: str | None = None,
|
|
23
|
+
strict_unknown: bool = False,
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
"""Build a no-network benchmark readiness dossier for one provider and suite."""
|
|
26
|
+
provider_audit = audit_providers([provider], policy).model_dump(mode="json")
|
|
27
|
+
capability_report = check_suite_compatibility(provider, suite, strict_unknown=strict_unknown).model_dump(mode="json")
|
|
28
|
+
contract_plan = provider_contract_plan(provider, model=model)
|
|
29
|
+
metric_coverage = metric_coverage_for_provider(provider)
|
|
30
|
+
provider_auth_posture = _provider_auth_posture(provider_audit)
|
|
31
|
+
secret_backend_posture = _compact_secret_backend_posture(provider_audit.get("secret_backend_posture"))
|
|
32
|
+
blocking_findings = _blocking_findings(provider_audit, capability_report, contract_plan)
|
|
33
|
+
contract_capability_evidence = contract_plan.get("capability_evidence", {})
|
|
34
|
+
warnings = _warnings(provider_audit, capability_report, metric_coverage, contract_plan)
|
|
35
|
+
return {
|
|
36
|
+
"schema_version": READINESS_SCHEMA_VERSION,
|
|
37
|
+
"provider": provider.name,
|
|
38
|
+
"suite": suite.name,
|
|
39
|
+
"model": model or provider.default_model or "<required>",
|
|
40
|
+
"ready": not blocking_findings,
|
|
41
|
+
"strict_unknown": strict_unknown,
|
|
42
|
+
"summary": {
|
|
43
|
+
"policy_ok": provider_audit["policy_ok"] == provider_audit["total_providers"] and provider_audit["errors"] == 0,
|
|
44
|
+
"suite_compatible": capability_report["compatible"],
|
|
45
|
+
"contract_checks_planned": contract_plan["summary"]["planned"],
|
|
46
|
+
"contract_capabilities_directly_checked": len(
|
|
47
|
+
contract_capability_evidence.get("directly_checked", [])
|
|
48
|
+
if isinstance(contract_capability_evidence.get("directly_checked"), list)
|
|
49
|
+
else []
|
|
50
|
+
),
|
|
51
|
+
"contract_capabilities_proxy_checked": len(
|
|
52
|
+
contract_capability_evidence.get("proxy_checked", [])
|
|
53
|
+
if isinstance(contract_capability_evidence.get("proxy_checked"), list)
|
|
54
|
+
else []
|
|
55
|
+
),
|
|
56
|
+
"contract_capabilities_not_covered": len(
|
|
57
|
+
contract_capability_evidence.get("not_covered", [])
|
|
58
|
+
if isinstance(contract_capability_evidence.get("not_covered"), list)
|
|
59
|
+
else []
|
|
60
|
+
),
|
|
61
|
+
"metric_coverage_score": metric_coverage["summary"]["coverage_score"],
|
|
62
|
+
"provider_auth_writable_backends": sum(1 for item in provider_auth_posture if item["api_key_ref_writable_backend"]),
|
|
63
|
+
"provider_auth_plaintext_fallbacks": sum(1 for item in provider_auth_posture if item["api_key_ref_plaintext_fallback"]),
|
|
64
|
+
"provider_auth_prewrite_policy_guards_recommended": sum(
|
|
65
|
+
1 for item in provider_auth_posture if item["prewrite_policy_guard_recommended"]
|
|
66
|
+
),
|
|
67
|
+
"provider_auth_keyring_required": sum(
|
|
68
|
+
1 for entry in provider_audit["providers"] if entry.get("keyring_backend_required")
|
|
69
|
+
),
|
|
70
|
+
"keyring_dependency_available": secret_backend_posture.get("keyring_dependency_available"),
|
|
71
|
+
"blocking_findings": len(blocking_findings),
|
|
72
|
+
"warnings": len(warnings),
|
|
73
|
+
},
|
|
74
|
+
"blocking_findings": blocking_findings,
|
|
75
|
+
"warnings": warnings,
|
|
76
|
+
"provider_auth_posture": provider_auth_posture,
|
|
77
|
+
"secret_backend_posture": secret_backend_posture,
|
|
78
|
+
"provider_audit": provider_audit,
|
|
79
|
+
"suite_capabilities": capability_report,
|
|
80
|
+
"contract_plan": contract_plan,
|
|
81
|
+
"contract_capability_evidence": contract_capability_evidence,
|
|
82
|
+
"metric_coverage": metric_coverage,
|
|
83
|
+
"security_notes": [
|
|
84
|
+
"Readiness dossier is static and does not contact endpoints, resolve secrets, or read raw traces.",
|
|
85
|
+
"Contract checks are planned only; use providers contract-check --execute explicitly for live checks.",
|
|
86
|
+
"Remote execution remains governed by policy and explicit allow-remote flags on execution commands.",
|
|
87
|
+
],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def write_readiness_json(report: dict[str, Any], output: Path) -> Path:
|
|
92
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
94
|
+
return output
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def format_readiness_report(report: dict[str, Any]) -> str:
|
|
98
|
+
lines = [
|
|
99
|
+
"AgentBlaster benchmark readiness dossier",
|
|
100
|
+
f"provider: {report['provider']}",
|
|
101
|
+
f"suite: {report['suite']}",
|
|
102
|
+
f"model: {report['model']}",
|
|
103
|
+
f"ready: {str(report['ready']).lower()}",
|
|
104
|
+
f"strict_unknown: {str(report['strict_unknown']).lower()}",
|
|
105
|
+
"summary:",
|
|
106
|
+
]
|
|
107
|
+
for key, value in report["summary"].items():
|
|
108
|
+
lines.append(f"- {key}: {value}")
|
|
109
|
+
lines.append("blocking_findings:")
|
|
110
|
+
if not report["blocking_findings"]:
|
|
111
|
+
lines.append("- none")
|
|
112
|
+
else:
|
|
113
|
+
for finding in report["blocking_findings"]:
|
|
114
|
+
lines.append(f"- {finding['source']}:{finding['code']} {finding['message']}")
|
|
115
|
+
lines.append("warnings:")
|
|
116
|
+
if not report["warnings"]:
|
|
117
|
+
lines.append("- none")
|
|
118
|
+
else:
|
|
119
|
+
for warning in report["warnings"]:
|
|
120
|
+
lines.append(f"- {warning['source']}:{warning['code']} {warning['message']}")
|
|
121
|
+
lines.append("provider_auth_posture:")
|
|
122
|
+
auth_posture = report.get("provider_auth_posture", [])
|
|
123
|
+
if not auth_posture:
|
|
124
|
+
lines.append("- none")
|
|
125
|
+
else:
|
|
126
|
+
for posture in auth_posture:
|
|
127
|
+
lines.append(
|
|
128
|
+
f"- {posture['provider']}: "
|
|
129
|
+
f"secret={posture['api_key_ref_kind'] or 'none'} "
|
|
130
|
+
f"configured={_bool_text(posture['api_key_ref_configured'])} "
|
|
131
|
+
f"writable={_bool_text(posture['api_key_ref_writable_backend'])} "
|
|
132
|
+
f"plaintext={_bool_text(posture['api_key_ref_plaintext_fallback'])} "
|
|
133
|
+
f"prewrite_policy_guard_recommended={_bool_text(posture['prewrite_policy_guard_recommended'])}"
|
|
134
|
+
)
|
|
135
|
+
secret_backend = report.get("secret_backend_posture") if isinstance(report.get("secret_backend_posture"), dict) else {}
|
|
136
|
+
lines.append("secret_backend_posture:")
|
|
137
|
+
if secret_backend:
|
|
138
|
+
lines.append(f"- keyring_optional: {_bool_text(bool(secret_backend.get('keyring_optional')))}")
|
|
139
|
+
lines.append(
|
|
140
|
+
f"- keyring_dependency_available: {_bool_text(bool(secret_backend.get('keyring_dependency_available')))}"
|
|
141
|
+
)
|
|
142
|
+
lines.append(f"- recommended_enterprise_backends: {','.join(secret_backend.get('recommended_enterprise_backends', []) or [])}")
|
|
143
|
+
else:
|
|
144
|
+
lines.append("- none")
|
|
145
|
+
return "\n".join(lines) + "\n"
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _provider_auth_posture(provider_audit: dict[str, Any]) -> list[dict[str, Any]]:
|
|
149
|
+
posture: list[dict[str, Any]] = []
|
|
150
|
+
for entry in provider_audit["providers"]:
|
|
151
|
+
posture.append(
|
|
152
|
+
{
|
|
153
|
+
"provider": entry["name"],
|
|
154
|
+
"api_key_ref_kind": entry["api_key_ref_kind"],
|
|
155
|
+
"api_key_ref_configured": entry["api_key_ref_configured"],
|
|
156
|
+
"api_key_ref_writable_backend": entry["api_key_ref_writable_backend"],
|
|
157
|
+
"api_key_ref_plaintext_fallback": entry["api_key_ref_plaintext_fallback"],
|
|
158
|
+
"prewrite_policy_guard_recommended": entry["prewrite_policy_guard_recommended"],
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
return posture
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _compact_secret_backend_posture(value: Any) -> dict[str, Any]:
|
|
165
|
+
if not isinstance(value, dict):
|
|
166
|
+
return {}
|
|
167
|
+
supported = value.get("supported_secret_ref_kinds") if isinstance(value.get("supported_secret_ref_kinds"), list) else []
|
|
168
|
+
recommended = (
|
|
169
|
+
value.get("recommended_enterprise_backends") if isinstance(value.get("recommended_enterprise_backends"), list) else []
|
|
170
|
+
)
|
|
171
|
+
return {
|
|
172
|
+
"env_reference_portable": bool(value.get("env_reference_portable")),
|
|
173
|
+
"keyring_optional": bool(value.get("keyring_optional")),
|
|
174
|
+
"keyring_dependency_available": bool(value.get("keyring_dependency_available")),
|
|
175
|
+
"dotenv_plaintext_fallback_supported": bool(value.get("dotenv_plaintext_fallback_supported")),
|
|
176
|
+
"dotenv_plaintext_fallback_enterprise_default": bool(value.get("dotenv_plaintext_fallback_enterprise_default")),
|
|
177
|
+
"supported_secret_ref_kinds": [str(item) for item in supported[:6]],
|
|
178
|
+
"recommended_enterprise_backends": [str(item) for item in recommended[:6]],
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _bool_text(value: bool) -> str:
|
|
183
|
+
return str(value).lower()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _blocking_findings(
|
|
187
|
+
provider_audit: dict[str, Any],
|
|
188
|
+
capability_report: dict[str, Any],
|
|
189
|
+
contract_plan: dict[str, Any],
|
|
190
|
+
) -> list[dict[str, str]]:
|
|
191
|
+
findings: list[dict[str, str]] = []
|
|
192
|
+
for entry in provider_audit["providers"]:
|
|
193
|
+
for finding in entry["findings"]:
|
|
194
|
+
if finding["severity"] == "error":
|
|
195
|
+
findings.append({"source": "provider_audit", "code": finding["code"], "message": finding["message"]})
|
|
196
|
+
for finding in capability_report["missing"]:
|
|
197
|
+
findings.append({"source": "suite_capabilities", "code": f"missing_{finding['key']}", "message": finding["message"]})
|
|
198
|
+
if capability_report["strict_unknown"]:
|
|
199
|
+
for finding in capability_report["unknown"]:
|
|
200
|
+
findings.append({"source": "suite_capabilities", "code": f"unknown_{finding['key']}", "message": finding["message"]})
|
|
201
|
+
if contract_plan["model"] == "<required>":
|
|
202
|
+
findings.append({"source": "contract_plan", "code": "model_required", "message": "model is required before executing contract or benchmark checks"})
|
|
203
|
+
return findings
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _warnings(
|
|
207
|
+
provider_audit: dict[str, Any],
|
|
208
|
+
capability_report: dict[str, Any],
|
|
209
|
+
metric_coverage: dict[str, Any],
|
|
210
|
+
contract_plan: dict[str, Any],
|
|
211
|
+
) -> list[dict[str, str]]:
|
|
212
|
+
warnings: list[dict[str, str]] = []
|
|
213
|
+
for entry in provider_audit["providers"]:
|
|
214
|
+
for finding in entry["findings"]:
|
|
215
|
+
if finding["severity"] == "warning":
|
|
216
|
+
warnings.append({"source": "provider_audit", "code": finding["code"], "message": finding["message"]})
|
|
217
|
+
if not capability_report["strict_unknown"]:
|
|
218
|
+
for finding in capability_report["unknown"]:
|
|
219
|
+
warnings.append({"source": "suite_capabilities", "code": f"unknown_{finding['key']}", "message": finding["message"]})
|
|
220
|
+
capability_evidence = contract_plan.get("capability_evidence") if isinstance(contract_plan.get("capability_evidence"), dict) else {}
|
|
221
|
+
not_covered = capability_evidence.get("not_covered") if isinstance(capability_evidence.get("not_covered"), list) else []
|
|
222
|
+
for item in not_covered:
|
|
223
|
+
if isinstance(item, dict) and item.get("capability"):
|
|
224
|
+
capability = str(item["capability"])
|
|
225
|
+
warnings.append(
|
|
226
|
+
{
|
|
227
|
+
"source": "contract_capability_evidence",
|
|
228
|
+
"code": f"not_covered_{capability}",
|
|
229
|
+
"message": f"contract-check planning does not prove capability: {capability}",
|
|
230
|
+
}
|
|
231
|
+
)
|
|
232
|
+
unavailable = [field["field"] for field in metric_coverage["fields"] if field["status"] == "unavailable"]
|
|
233
|
+
if unavailable:
|
|
234
|
+
warnings.append(
|
|
235
|
+
{
|
|
236
|
+
"source": "metric_coverage",
|
|
237
|
+
"code": "unavailable_metrics",
|
|
238
|
+
"message": "unavailable normalized metrics: " + ", ".join(sorted(unavailable)[:12]),
|
|
239
|
+
}
|
|
240
|
+
)
|
|
241
|
+
return warnings
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Mapping, MutableMapping, Sequence
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
SECRET_HEADER_NAMES = {
|
|
8
|
+
"authorization",
|
|
9
|
+
"x-api-key",
|
|
10
|
+
"api-key",
|
|
11
|
+
"openai-api-key",
|
|
12
|
+
"anthropic-api-key",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
SECRET_PATTERNS = [
|
|
16
|
+
re.compile(r"sk-ant-[A-Za-z0-9_\-]{16,}"),
|
|
17
|
+
re.compile(r"sk-(?!ant-)[A-Za-z0-9_\-]{16,}"),
|
|
18
|
+
re.compile(r"gh[opusr]_[A-Za-z0-9_]{16,}"),
|
|
19
|
+
re.compile(r"Bearer\s+[A-Za-z0-9._\-]{16,}", re.IGNORECASE),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def redact_text(value: str) -> str:
|
|
24
|
+
redacted = value
|
|
25
|
+
for pattern in SECRET_PATTERNS:
|
|
26
|
+
redacted = pattern.sub("[REDACTED]", redacted)
|
|
27
|
+
return redacted
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def redact_mapping_headers(headers: Mapping[str, Any]) -> dict[str, Any]:
|
|
31
|
+
redacted: dict[str, Any] = {}
|
|
32
|
+
for key, value in headers.items():
|
|
33
|
+
if key.lower() in SECRET_HEADER_NAMES:
|
|
34
|
+
redacted[key] = "[REDACTED]"
|
|
35
|
+
elif isinstance(value, str):
|
|
36
|
+
redacted[key] = redact_text(value)
|
|
37
|
+
else:
|
|
38
|
+
redacted[key] = redact_value(value)
|
|
39
|
+
return redacted
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def redact_value(value: Any) -> Any:
|
|
43
|
+
if isinstance(value, str):
|
|
44
|
+
return redact_text(value)
|
|
45
|
+
if isinstance(value, Mapping):
|
|
46
|
+
return redact_mapping_headers(value)
|
|
47
|
+
if isinstance(value, Sequence) and not isinstance(value, bytes | bytearray | str):
|
|
48
|
+
return [redact_value(item) for item in value]
|
|
49
|
+
return value
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def redact_in_place(value: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
53
|
+
for key in list(value.keys()):
|
|
54
|
+
if key.lower() in SECRET_HEADER_NAMES:
|
|
55
|
+
value[key] = "[REDACTED]"
|
|
56
|
+
else:
|
|
57
|
+
value[key] = redact_value(value[key])
|
|
58
|
+
return value
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from zipfile import BadZipFile, ZipFile
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
from agentblaster.errors import ConfigError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
SCAN_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
|
14
|
+
("anthropic_api_key", re.compile(r"sk-ant-[A-Za-z0-9_\-]{16,}")),
|
|
15
|
+
("openai_api_key", re.compile(r"sk-(?!ant-)[A-Za-z0-9_\-]{16,}")),
|
|
16
|
+
("github_token", re.compile(r"gh[opusr]_[A-Za-z0-9_]{16,}")),
|
|
17
|
+
("bearer_token", re.compile(r"Bearer\s+[A-Za-z0-9._\-]{16,}", re.IGNORECASE)),
|
|
18
|
+
("aws_access_key_id", re.compile(r"AKIA[0-9A-Z]{16}")),
|
|
19
|
+
("macos_user_path", re.compile(r"/Users/[A-Za-z0-9._\-]+(?:/[^\s\"']*)?")),
|
|
20
|
+
("macos_volume_path", re.compile(r"/Volumes/[A-Za-z0-9._\-]+(?:/[^\s\"']*)?")),
|
|
21
|
+
("private_local_path", re.compile(r"/private/[A-Za-z0-9._\-/]+")),
|
|
22
|
+
("windows_user_path", re.compile(r"[A-Za-z]:[\\/]+Users[\\/]+[A-Za-z0-9._\-]+(?:[\\/]+[^\s\"']*)?")),
|
|
23
|
+
]
|
|
24
|
+
TEXT_SUFFIXES = {
|
|
25
|
+
".csv",
|
|
26
|
+
".html",
|
|
27
|
+
".json",
|
|
28
|
+
".jsonl",
|
|
29
|
+
".log",
|
|
30
|
+
".md",
|
|
31
|
+
".svg",
|
|
32
|
+
".txt",
|
|
33
|
+
".yaml",
|
|
34
|
+
".yml",
|
|
35
|
+
".xml",
|
|
36
|
+
}
|
|
37
|
+
ZIP_SUFFIXES = {".zip"}
|
|
38
|
+
REDACTION_SCAN_SCHEMA_VERSION = "agentblaster.redaction-scan.v1"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RedactionScanFinding(BaseModel):
|
|
42
|
+
"""Secret-like pattern finding without the matched secret value."""
|
|
43
|
+
|
|
44
|
+
model_config = ConfigDict(extra="forbid")
|
|
45
|
+
|
|
46
|
+
path: str
|
|
47
|
+
entry: str | None = None
|
|
48
|
+
line: int | None = None
|
|
49
|
+
pattern: str
|
|
50
|
+
message: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class RedactionScanReport(BaseModel):
|
|
54
|
+
"""Redaction scan report for release/publishing gates."""
|
|
55
|
+
|
|
56
|
+
model_config = ConfigDict(extra="forbid")
|
|
57
|
+
|
|
58
|
+
schema_version: str = REDACTION_SCAN_SCHEMA_VERSION
|
|
59
|
+
ok: bool
|
|
60
|
+
total_paths: int = Field(ge=0)
|
|
61
|
+
scanned_items: int = Field(ge=0)
|
|
62
|
+
skipped_items: int = Field(ge=0)
|
|
63
|
+
findings: list[RedactionScanFinding] = Field(default_factory=list)
|
|
64
|
+
security_notes: list[str] = Field(default_factory=list)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def scan_paths(paths: list[Path], *, max_bytes: int = 2_000_000) -> RedactionScanReport:
|
|
68
|
+
if not paths:
|
|
69
|
+
raise ConfigError("redaction scan requires at least one path")
|
|
70
|
+
findings: list[RedactionScanFinding] = []
|
|
71
|
+
scanned = 0
|
|
72
|
+
skipped = 0
|
|
73
|
+
for input_path in paths:
|
|
74
|
+
if not input_path.exists():
|
|
75
|
+
raise ConfigError(f"redaction scan path does not exist: {input_path}")
|
|
76
|
+
for path in _iter_files(input_path):
|
|
77
|
+
if path.suffix.lower() in ZIP_SUFFIXES:
|
|
78
|
+
zip_scanned, zip_skipped = _scan_zip(path, findings, max_bytes=max_bytes)
|
|
79
|
+
scanned += zip_scanned
|
|
80
|
+
skipped += zip_skipped
|
|
81
|
+
continue
|
|
82
|
+
if not _looks_textual(path):
|
|
83
|
+
skipped += 1
|
|
84
|
+
continue
|
|
85
|
+
try:
|
|
86
|
+
data = path.read_bytes()
|
|
87
|
+
except OSError as exc:
|
|
88
|
+
raise ConfigError(f"unable to read scan path {path}: {exc}") from exc
|
|
89
|
+
if len(data) > max_bytes:
|
|
90
|
+
skipped += 1
|
|
91
|
+
continue
|
|
92
|
+
text = _decode_text(data)
|
|
93
|
+
if text is None:
|
|
94
|
+
skipped += 1
|
|
95
|
+
continue
|
|
96
|
+
scanned += 1
|
|
97
|
+
findings.extend(_scan_text(text, path=_safe_report_path(path), entry=None))
|
|
98
|
+
return RedactionScanReport(
|
|
99
|
+
ok=not findings,
|
|
100
|
+
total_paths=len(paths),
|
|
101
|
+
scanned_items=scanned,
|
|
102
|
+
skipped_items=skipped,
|
|
103
|
+
findings=findings,
|
|
104
|
+
security_notes=[
|
|
105
|
+
"Redaction scan reports pattern names and locations only; matched secret values are never included.",
|
|
106
|
+
"This is a deterministic regex gate for common secret formats, not a complete DLP system.",
|
|
107
|
+
],
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def format_redaction_scan_report(report: RedactionScanReport) -> str:
|
|
112
|
+
lines = [
|
|
113
|
+
f"schema_version: {report.schema_version}",
|
|
114
|
+
f"ok: {str(report.ok).lower()}",
|
|
115
|
+
f"total_paths: {report.total_paths}",
|
|
116
|
+
f"scanned_items: {report.scanned_items}",
|
|
117
|
+
f"skipped_items: {report.skipped_items}",
|
|
118
|
+
f"findings: {len(report.findings)}",
|
|
119
|
+
]
|
|
120
|
+
for finding in report.findings:
|
|
121
|
+
location = finding.path if finding.entry is None else f"{finding.path}!{finding.entry}"
|
|
122
|
+
if finding.line is not None:
|
|
123
|
+
location = f"{location}:{finding.line}"
|
|
124
|
+
lines.append(f"{finding.pattern} {location} {finding.message}")
|
|
125
|
+
return "\n".join(lines) + "\n"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def redaction_scan_json(report: RedactionScanReport) -> str:
|
|
129
|
+
return json.dumps(report.model_dump(mode="json"), indent=2, sort_keys=True) + "\n"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _iter_files(path: Path):
|
|
133
|
+
if path.is_file():
|
|
134
|
+
yield path
|
|
135
|
+
return
|
|
136
|
+
for child in sorted(item for item in path.rglob("*") if item.is_file()):
|
|
137
|
+
yield child
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _looks_textual(path: Path) -> bool:
|
|
141
|
+
return path.suffix.lower() in TEXT_SUFFIXES or path.suffix == ""
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _scan_zip(path: Path, findings: list[RedactionScanFinding], *, max_bytes: int) -> tuple[int, int]:
|
|
145
|
+
scanned = 0
|
|
146
|
+
skipped = 0
|
|
147
|
+
try:
|
|
148
|
+
with ZipFile(path) as archive:
|
|
149
|
+
for info in sorted(archive.infolist(), key=lambda item: item.filename):
|
|
150
|
+
if info.is_dir():
|
|
151
|
+
continue
|
|
152
|
+
entry_report_label = _zip_entry_report_label(info.filename)
|
|
153
|
+
findings.extend(_scan_zip_entry_name(info.filename, path=_safe_report_path(path)))
|
|
154
|
+
entry_path = Path(info.filename)
|
|
155
|
+
if entry_path.suffix.lower() not in TEXT_SUFFIXES:
|
|
156
|
+
skipped += 1
|
|
157
|
+
continue
|
|
158
|
+
if info.file_size > max_bytes:
|
|
159
|
+
skipped += 1
|
|
160
|
+
continue
|
|
161
|
+
text = _decode_text(archive.read(info))
|
|
162
|
+
if text is None:
|
|
163
|
+
skipped += 1
|
|
164
|
+
continue
|
|
165
|
+
scanned += 1
|
|
166
|
+
findings.extend(_scan_text(text, path=_safe_report_path(path), entry=entry_report_label))
|
|
167
|
+
except (OSError, BadZipFile) as exc:
|
|
168
|
+
raise ConfigError(f"unable to scan zip artifact {path}: {exc}") from exc
|
|
169
|
+
return scanned, skipped
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _scan_zip_entry_name(entry: str, *, path: str) -> list[RedactionScanFinding]:
|
|
173
|
+
findings: list[RedactionScanFinding] = []
|
|
174
|
+
normalized = entry.replace("\\", "/")
|
|
175
|
+
parts = [part for part in normalized.split("/") if part not in {"", "."}]
|
|
176
|
+
safe_entry = _safe_zip_entry_label(entry)
|
|
177
|
+
if normalized.startswith("/") or re.match(r"^[A-Za-z]:/", normalized) or ".." in parts:
|
|
178
|
+
findings.append(
|
|
179
|
+
RedactionScanFinding(
|
|
180
|
+
path=path,
|
|
181
|
+
entry=safe_entry,
|
|
182
|
+
line=None,
|
|
183
|
+
pattern="zip_unsafe_entry_path",
|
|
184
|
+
message="unsafe zip entry path detected; archive member name suppressed from extracted paths",
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
for name, pattern in SCAN_PATTERNS:
|
|
188
|
+
if pattern.search(entry):
|
|
189
|
+
findings.append(
|
|
190
|
+
RedactionScanFinding(
|
|
191
|
+
path=path,
|
|
192
|
+
entry=safe_entry,
|
|
193
|
+
line=None,
|
|
194
|
+
pattern=name,
|
|
195
|
+
message="secret-like or local-path pattern detected in zip entry name; matched value suppressed",
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
return findings
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _zip_entry_report_label(entry: str) -> str:
|
|
202
|
+
normalized = entry.replace("\\", "/")
|
|
203
|
+
parts = [part for part in normalized.split("/") if part not in {"", "."}]
|
|
204
|
+
unsafe = normalized.startswith("/") or re.match(r"^[A-Za-z]:/", normalized) or ".." in parts
|
|
205
|
+
sensitive = any(pattern.search(entry) for _, pattern in SCAN_PATTERNS)
|
|
206
|
+
if unsafe or sensitive:
|
|
207
|
+
return _safe_zip_entry_label(entry)
|
|
208
|
+
return entry
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _safe_zip_entry_label(entry: str) -> str:
|
|
212
|
+
normalized = entry.replace("\\", "/")
|
|
213
|
+
parts = [part for part in normalized.split("/") if part not in {"", ".", ".."}]
|
|
214
|
+
label = parts[-1] if parts else "<redacted-entry>"
|
|
215
|
+
if any(pattern.search(label) for _, pattern in SCAN_PATTERNS):
|
|
216
|
+
return "<redacted-entry>"
|
|
217
|
+
return label or "<redacted-entry>"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _decode_text(data: bytes) -> str | None:
|
|
221
|
+
try:
|
|
222
|
+
return data.decode("utf-8")
|
|
223
|
+
except UnicodeDecodeError:
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _safe_report_path(path: Path) -> str:
|
|
228
|
+
if path.is_absolute():
|
|
229
|
+
return path.name
|
|
230
|
+
return path.as_posix()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _scan_text(text: str, *, path: str, entry: str | None) -> list[RedactionScanFinding]:
|
|
234
|
+
findings: list[RedactionScanFinding] = []
|
|
235
|
+
for line_number, line in enumerate(text.splitlines(), start=1):
|
|
236
|
+
for name, pattern in SCAN_PATTERNS:
|
|
237
|
+
if pattern.search(line):
|
|
238
|
+
findings.append(
|
|
239
|
+
RedactionScanFinding(
|
|
240
|
+
path=path,
|
|
241
|
+
entry=entry,
|
|
242
|
+
line=line_number,
|
|
243
|
+
pattern=name,
|
|
244
|
+
message="secret-like pattern detected; matched value suppressed",
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
return findings
|