agentblaster 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. agentblaster/__init__.py +3 -0
  2. agentblaster/adapters.py +1435 -0
  3. agentblaster/agent_profiles.py +420 -0
  4. agentblaster/audit.py +27 -0
  5. agentblaster/benchmark_kit.py +356 -0
  6. agentblaster/bundle.py +692 -0
  7. agentblaster/campaign.py +1031 -0
  8. agentblaster/campaign_preflight.py +647 -0
  9. agentblaster/capabilities.py +270 -0
  10. agentblaster/claim_readiness.py +3948 -0
  11. agentblaster/cleanup.py +226 -0
  12. agentblaster/cli.py +4202 -0
  13. agentblaster/compare.py +423 -0
  14. agentblaster/config.py +62 -0
  15. agentblaster/constants.py +8 -0
  16. agentblaster/contract_check.py +919 -0
  17. agentblaster/costs.py +74 -0
  18. agentblaster/dashboard.py +5974 -0
  19. agentblaster/engine_advisory.py +1045 -0
  20. agentblaster/engine_onboarding.py +224 -0
  21. agentblaster/engine_targets.py +545 -0
  22. agentblaster/environment.py +284 -0
  23. agentblaster/errors.py +21 -0
  24. agentblaster/evidence.py +188 -0
  25. agentblaster/evidence_index.py +1865 -0
  26. agentblaster/experiment.py +200 -0
  27. agentblaster/exports.py +158 -0
  28. agentblaster/failures.py +70 -0
  29. agentblaster/fixtures.py +775 -0
  30. agentblaster/harness.py +1254 -0
  31. agentblaster/implementation_status.py +719 -0
  32. agentblaster/integrity.py +161 -0
  33. agentblaster/launch_recipes.py +295 -0
  34. agentblaster/lcp.py +107 -0
  35. agentblaster/matrix.py +101 -0
  36. agentblaster/matrix_gate.py +565 -0
  37. agentblaster/matrix_pressure.py +187 -0
  38. agentblaster/matrix_saturation.py +601 -0
  39. agentblaster/mcp.py +187 -0
  40. agentblaster/metric_coverage.py +552 -0
  41. agentblaster/mock_provider.py +485 -0
  42. agentblaster/model_catalog.py +153 -0
  43. agentblaster/models.py +531 -0
  44. agentblaster/observability.py +110 -0
  45. agentblaster/planning.py +199 -0
  46. agentblaster/policy.py +635 -0
  47. agentblaster/presets.py +219 -0
  48. agentblaster/prompt_footprint.py +245 -0
  49. agentblaster/protocol_repair.py +431 -0
  50. agentblaster/provider_audit.py +210 -0
  51. agentblaster/publication_brief.py +893 -0
  52. agentblaster/quality.py +1142 -0
  53. agentblaster/rate_limits.py +74 -0
  54. agentblaster/readiness.py +241 -0
  55. agentblaster/redaction.py +58 -0
  56. agentblaster/redaction_scan.py +247 -0
  57. agentblaster/release.py +440 -0
  58. agentblaster/release_qualification.py +2248 -0
  59. agentblaster/remote_onboarding.py +308 -0
  60. agentblaster/reports.py +2245 -0
  61. agentblaster/runner.py +1677 -0
  62. agentblaster/schema_registry.py +1151 -0
  63. agentblaster/secrets.py +274 -0
  64. agentblaster/security_posture.py +492 -0
  65. agentblaster/skills.py +67 -0
  66. agentblaster/stress_matrix.py +113 -0
  67. agentblaster/suite_audit.py +259 -0
  68. agentblaster/suite_calibration.py +171 -0
  69. agentblaster/suites.py +805 -0
  70. agentblaster/telemetry.py +947 -0
  71. agentblaster/telemetry_audit.py +300 -0
  72. agentblaster/toolsim.py +193 -0
  73. agentblaster/workflow_readiness.py +570 -0
  74. agentblaster/workflow_surfaces.py +292 -0
  75. agentblaster-0.1.0.dist-info/METADATA +250 -0
  76. agentblaster-0.1.0.dist-info/RECORD +78 -0
  77. agentblaster-0.1.0.dist-info/WHEEL +4 -0
  78. agentblaster-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from threading import Lock
5
+ from typing import Any, Callable
6
+
7
+
8
+ def request_interval_seconds(rate_limits: dict[str, Any]) -> float | None:
9
+ intervals: list[float] = []
10
+ requests_per_second = _number(_first_present(rate_limits.get("requests_per_second"), rate_limits.get("rps")))
11
+ requests_per_minute = _number(
12
+ _first_present(
13
+ rate_limits.get("requests_per_minute"),
14
+ rate_limits.get("rpm"),
15
+ rate_limits.get("max_requests_per_minute"),
16
+ )
17
+ )
18
+ if requests_per_second and requests_per_second > 0:
19
+ intervals.append(1 / requests_per_second)
20
+ if requests_per_minute and requests_per_minute > 0:
21
+ intervals.append(60 / requests_per_minute)
22
+ return max(intervals) if intervals else None
23
+
24
+
25
+ def rate_limit_max_concurrency(rate_limits: dict[str, Any]) -> int | None:
26
+ value = _number(_first_present(rate_limits.get("max_concurrency"), rate_limits.get("concurrency")))
27
+ if value is None or value <= 0:
28
+ return None
29
+ return int(value)
30
+
31
+
32
+ class RateLimitPacer:
33
+ """Thread-safe request pacer for provider-level request rate limits."""
34
+
35
+ def __init__(
36
+ self,
37
+ rate_limits: dict[str, Any],
38
+ *,
39
+ sleep_fn: Callable[[float], None] = time.sleep,
40
+ monotonic_fn: Callable[[], float] = time.monotonic,
41
+ ) -> None:
42
+ self.interval_seconds = request_interval_seconds(rate_limits)
43
+ self._sleep = sleep_fn
44
+ self._monotonic = monotonic_fn
45
+ self._lock = Lock()
46
+ self._next_allowed_at = 0.0
47
+
48
+ def wait(self) -> float:
49
+ if self.interval_seconds is None:
50
+ return 0.0
51
+ with self._lock:
52
+ now = self._monotonic()
53
+ wait_seconds = max(self._next_allowed_at - now, 0.0)
54
+ scheduled_at = max(now, self._next_allowed_at)
55
+ self._next_allowed_at = scheduled_at + self.interval_seconds
56
+ if wait_seconds > 0:
57
+ self._sleep(wait_seconds)
58
+ return round(wait_seconds * 1000, 3)
59
+
60
+
61
+ def _number(value: Any) -> float | None:
62
+ if value is None:
63
+ return None
64
+ try:
65
+ return float(value)
66
+ except (TypeError, ValueError):
67
+ return None
68
+
69
+
70
+ def _first_present(*values):
71
+ for value in values:
72
+ if value is not None:
73
+ return value
74
+ return None
@@ -0,0 +1,241 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from agentblaster.capabilities import check_suite_compatibility
8
+ from agentblaster.contract_check import provider_contract_plan
9
+ from agentblaster.metric_coverage import metric_coverage_for_provider
10
+ from agentblaster.models import ProviderConfig, SuiteDefinition
11
+ from agentblaster.policy import SecurityPolicy
12
+ from agentblaster.provider_audit import audit_providers
13
+
14
+ READINESS_SCHEMA_VERSION = "agentblaster.benchmark-readiness.v1"
15
+
16
+
17
+ def build_readiness_dossier(
18
+ *,
19
+ provider: ProviderConfig,
20
+ suite: SuiteDefinition,
21
+ policy: SecurityPolicy,
22
+ model: str | None = None,
23
+ strict_unknown: bool = False,
24
+ ) -> dict[str, Any]:
25
+ """Build a no-network benchmark readiness dossier for one provider and suite."""
26
+ provider_audit = audit_providers([provider], policy).model_dump(mode="json")
27
+ capability_report = check_suite_compatibility(provider, suite, strict_unknown=strict_unknown).model_dump(mode="json")
28
+ contract_plan = provider_contract_plan(provider, model=model)
29
+ metric_coverage = metric_coverage_for_provider(provider)
30
+ provider_auth_posture = _provider_auth_posture(provider_audit)
31
+ secret_backend_posture = _compact_secret_backend_posture(provider_audit.get("secret_backend_posture"))
32
+ blocking_findings = _blocking_findings(provider_audit, capability_report, contract_plan)
33
+ contract_capability_evidence = contract_plan.get("capability_evidence", {})
34
+ warnings = _warnings(provider_audit, capability_report, metric_coverage, contract_plan)
35
+ return {
36
+ "schema_version": READINESS_SCHEMA_VERSION,
37
+ "provider": provider.name,
38
+ "suite": suite.name,
39
+ "model": model or provider.default_model or "<required>",
40
+ "ready": not blocking_findings,
41
+ "strict_unknown": strict_unknown,
42
+ "summary": {
43
+ "policy_ok": provider_audit["policy_ok"] == provider_audit["total_providers"] and provider_audit["errors"] == 0,
44
+ "suite_compatible": capability_report["compatible"],
45
+ "contract_checks_planned": contract_plan["summary"]["planned"],
46
+ "contract_capabilities_directly_checked": len(
47
+ contract_capability_evidence.get("directly_checked", [])
48
+ if isinstance(contract_capability_evidence.get("directly_checked"), list)
49
+ else []
50
+ ),
51
+ "contract_capabilities_proxy_checked": len(
52
+ contract_capability_evidence.get("proxy_checked", [])
53
+ if isinstance(contract_capability_evidence.get("proxy_checked"), list)
54
+ else []
55
+ ),
56
+ "contract_capabilities_not_covered": len(
57
+ contract_capability_evidence.get("not_covered", [])
58
+ if isinstance(contract_capability_evidence.get("not_covered"), list)
59
+ else []
60
+ ),
61
+ "metric_coverage_score": metric_coverage["summary"]["coverage_score"],
62
+ "provider_auth_writable_backends": sum(1 for item in provider_auth_posture if item["api_key_ref_writable_backend"]),
63
+ "provider_auth_plaintext_fallbacks": sum(1 for item in provider_auth_posture if item["api_key_ref_plaintext_fallback"]),
64
+ "provider_auth_prewrite_policy_guards_recommended": sum(
65
+ 1 for item in provider_auth_posture if item["prewrite_policy_guard_recommended"]
66
+ ),
67
+ "provider_auth_keyring_required": sum(
68
+ 1 for entry in provider_audit["providers"] if entry.get("keyring_backend_required")
69
+ ),
70
+ "keyring_dependency_available": secret_backend_posture.get("keyring_dependency_available"),
71
+ "blocking_findings": len(blocking_findings),
72
+ "warnings": len(warnings),
73
+ },
74
+ "blocking_findings": blocking_findings,
75
+ "warnings": warnings,
76
+ "provider_auth_posture": provider_auth_posture,
77
+ "secret_backend_posture": secret_backend_posture,
78
+ "provider_audit": provider_audit,
79
+ "suite_capabilities": capability_report,
80
+ "contract_plan": contract_plan,
81
+ "contract_capability_evidence": contract_capability_evidence,
82
+ "metric_coverage": metric_coverage,
83
+ "security_notes": [
84
+ "Readiness dossier is static and does not contact endpoints, resolve secrets, or read raw traces.",
85
+ "Contract checks are planned only; use providers contract-check --execute explicitly for live checks.",
86
+ "Remote execution remains governed by policy and explicit allow-remote flags on execution commands.",
87
+ ],
88
+ }
89
+
90
+
91
+ def write_readiness_json(report: dict[str, Any], output: Path) -> Path:
92
+ output.parent.mkdir(parents=True, exist_ok=True)
93
+ output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
94
+ return output
95
+
96
+
97
+ def format_readiness_report(report: dict[str, Any]) -> str:
98
+ lines = [
99
+ "AgentBlaster benchmark readiness dossier",
100
+ f"provider: {report['provider']}",
101
+ f"suite: {report['suite']}",
102
+ f"model: {report['model']}",
103
+ f"ready: {str(report['ready']).lower()}",
104
+ f"strict_unknown: {str(report['strict_unknown']).lower()}",
105
+ "summary:",
106
+ ]
107
+ for key, value in report["summary"].items():
108
+ lines.append(f"- {key}: {value}")
109
+ lines.append("blocking_findings:")
110
+ if not report["blocking_findings"]:
111
+ lines.append("- none")
112
+ else:
113
+ for finding in report["blocking_findings"]:
114
+ lines.append(f"- {finding['source']}:{finding['code']} {finding['message']}")
115
+ lines.append("warnings:")
116
+ if not report["warnings"]:
117
+ lines.append("- none")
118
+ else:
119
+ for warning in report["warnings"]:
120
+ lines.append(f"- {warning['source']}:{warning['code']} {warning['message']}")
121
+ lines.append("provider_auth_posture:")
122
+ auth_posture = report.get("provider_auth_posture", [])
123
+ if not auth_posture:
124
+ lines.append("- none")
125
+ else:
126
+ for posture in auth_posture:
127
+ lines.append(
128
+ f"- {posture['provider']}: "
129
+ f"secret={posture['api_key_ref_kind'] or 'none'} "
130
+ f"configured={_bool_text(posture['api_key_ref_configured'])} "
131
+ f"writable={_bool_text(posture['api_key_ref_writable_backend'])} "
132
+ f"plaintext={_bool_text(posture['api_key_ref_plaintext_fallback'])} "
133
+ f"prewrite_policy_guard_recommended={_bool_text(posture['prewrite_policy_guard_recommended'])}"
134
+ )
135
+ secret_backend = report.get("secret_backend_posture") if isinstance(report.get("secret_backend_posture"), dict) else {}
136
+ lines.append("secret_backend_posture:")
137
+ if secret_backend:
138
+ lines.append(f"- keyring_optional: {_bool_text(bool(secret_backend.get('keyring_optional')))}")
139
+ lines.append(
140
+ f"- keyring_dependency_available: {_bool_text(bool(secret_backend.get('keyring_dependency_available')))}"
141
+ )
142
+ lines.append(f"- recommended_enterprise_backends: {','.join(secret_backend.get('recommended_enterprise_backends', []) or [])}")
143
+ else:
144
+ lines.append("- none")
145
+ return "\n".join(lines) + "\n"
146
+
147
+
148
+ def _provider_auth_posture(provider_audit: dict[str, Any]) -> list[dict[str, Any]]:
149
+ posture: list[dict[str, Any]] = []
150
+ for entry in provider_audit["providers"]:
151
+ posture.append(
152
+ {
153
+ "provider": entry["name"],
154
+ "api_key_ref_kind": entry["api_key_ref_kind"],
155
+ "api_key_ref_configured": entry["api_key_ref_configured"],
156
+ "api_key_ref_writable_backend": entry["api_key_ref_writable_backend"],
157
+ "api_key_ref_plaintext_fallback": entry["api_key_ref_plaintext_fallback"],
158
+ "prewrite_policy_guard_recommended": entry["prewrite_policy_guard_recommended"],
159
+ }
160
+ )
161
+ return posture
162
+
163
+
164
+ def _compact_secret_backend_posture(value: Any) -> dict[str, Any]:
165
+ if not isinstance(value, dict):
166
+ return {}
167
+ supported = value.get("supported_secret_ref_kinds") if isinstance(value.get("supported_secret_ref_kinds"), list) else []
168
+ recommended = (
169
+ value.get("recommended_enterprise_backends") if isinstance(value.get("recommended_enterprise_backends"), list) else []
170
+ )
171
+ return {
172
+ "env_reference_portable": bool(value.get("env_reference_portable")),
173
+ "keyring_optional": bool(value.get("keyring_optional")),
174
+ "keyring_dependency_available": bool(value.get("keyring_dependency_available")),
175
+ "dotenv_plaintext_fallback_supported": bool(value.get("dotenv_plaintext_fallback_supported")),
176
+ "dotenv_plaintext_fallback_enterprise_default": bool(value.get("dotenv_plaintext_fallback_enterprise_default")),
177
+ "supported_secret_ref_kinds": [str(item) for item in supported[:6]],
178
+ "recommended_enterprise_backends": [str(item) for item in recommended[:6]],
179
+ }
180
+
181
+
182
+ def _bool_text(value: bool) -> str:
183
+ return str(value).lower()
184
+
185
+
186
+ def _blocking_findings(
187
+ provider_audit: dict[str, Any],
188
+ capability_report: dict[str, Any],
189
+ contract_plan: dict[str, Any],
190
+ ) -> list[dict[str, str]]:
191
+ findings: list[dict[str, str]] = []
192
+ for entry in provider_audit["providers"]:
193
+ for finding in entry["findings"]:
194
+ if finding["severity"] == "error":
195
+ findings.append({"source": "provider_audit", "code": finding["code"], "message": finding["message"]})
196
+ for finding in capability_report["missing"]:
197
+ findings.append({"source": "suite_capabilities", "code": f"missing_{finding['key']}", "message": finding["message"]})
198
+ if capability_report["strict_unknown"]:
199
+ for finding in capability_report["unknown"]:
200
+ findings.append({"source": "suite_capabilities", "code": f"unknown_{finding['key']}", "message": finding["message"]})
201
+ if contract_plan["model"] == "<required>":
202
+ findings.append({"source": "contract_plan", "code": "model_required", "message": "model is required before executing contract or benchmark checks"})
203
+ return findings
204
+
205
+
206
+ def _warnings(
207
+ provider_audit: dict[str, Any],
208
+ capability_report: dict[str, Any],
209
+ metric_coverage: dict[str, Any],
210
+ contract_plan: dict[str, Any],
211
+ ) -> list[dict[str, str]]:
212
+ warnings: list[dict[str, str]] = []
213
+ for entry in provider_audit["providers"]:
214
+ for finding in entry["findings"]:
215
+ if finding["severity"] == "warning":
216
+ warnings.append({"source": "provider_audit", "code": finding["code"], "message": finding["message"]})
217
+ if not capability_report["strict_unknown"]:
218
+ for finding in capability_report["unknown"]:
219
+ warnings.append({"source": "suite_capabilities", "code": f"unknown_{finding['key']}", "message": finding["message"]})
220
+ capability_evidence = contract_plan.get("capability_evidence") if isinstance(contract_plan.get("capability_evidence"), dict) else {}
221
+ not_covered = capability_evidence.get("not_covered") if isinstance(capability_evidence.get("not_covered"), list) else []
222
+ for item in not_covered:
223
+ if isinstance(item, dict) and item.get("capability"):
224
+ capability = str(item["capability"])
225
+ warnings.append(
226
+ {
227
+ "source": "contract_capability_evidence",
228
+ "code": f"not_covered_{capability}",
229
+ "message": f"contract-check planning does not prove capability: {capability}",
230
+ }
231
+ )
232
+ unavailable = [field["field"] for field in metric_coverage["fields"] if field["status"] == "unavailable"]
233
+ if unavailable:
234
+ warnings.append(
235
+ {
236
+ "source": "metric_coverage",
237
+ "code": "unavailable_metrics",
238
+ "message": "unavailable normalized metrics: " + ", ".join(sorted(unavailable)[:12]),
239
+ }
240
+ )
241
+ return warnings
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections.abc import Mapping, MutableMapping, Sequence
5
+ from typing import Any
6
+
7
+ SECRET_HEADER_NAMES = {
8
+ "authorization",
9
+ "x-api-key",
10
+ "api-key",
11
+ "openai-api-key",
12
+ "anthropic-api-key",
13
+ }
14
+
15
+ SECRET_PATTERNS = [
16
+ re.compile(r"sk-ant-[A-Za-z0-9_\-]{16,}"),
17
+ re.compile(r"sk-(?!ant-)[A-Za-z0-9_\-]{16,}"),
18
+ re.compile(r"gh[opusr]_[A-Za-z0-9_]{16,}"),
19
+ re.compile(r"Bearer\s+[A-Za-z0-9._\-]{16,}", re.IGNORECASE),
20
+ ]
21
+
22
+
23
+ def redact_text(value: str) -> str:
24
+ redacted = value
25
+ for pattern in SECRET_PATTERNS:
26
+ redacted = pattern.sub("[REDACTED]", redacted)
27
+ return redacted
28
+
29
+
30
+ def redact_mapping_headers(headers: Mapping[str, Any]) -> dict[str, Any]:
31
+ redacted: dict[str, Any] = {}
32
+ for key, value in headers.items():
33
+ if key.lower() in SECRET_HEADER_NAMES:
34
+ redacted[key] = "[REDACTED]"
35
+ elif isinstance(value, str):
36
+ redacted[key] = redact_text(value)
37
+ else:
38
+ redacted[key] = redact_value(value)
39
+ return redacted
40
+
41
+
42
+ def redact_value(value: Any) -> Any:
43
+ if isinstance(value, str):
44
+ return redact_text(value)
45
+ if isinstance(value, Mapping):
46
+ return redact_mapping_headers(value)
47
+ if isinstance(value, Sequence) and not isinstance(value, bytes | bytearray | str):
48
+ return [redact_value(item) for item in value]
49
+ return value
50
+
51
+
52
+ def redact_in_place(value: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
53
+ for key in list(value.keys()):
54
+ if key.lower() in SECRET_HEADER_NAMES:
55
+ value[key] = "[REDACTED]"
56
+ else:
57
+ value[key] = redact_value(value[key])
58
+ return value
@@ -0,0 +1,247 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+ from zipfile import BadZipFile, ZipFile
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field
9
+
10
+ from agentblaster.errors import ConfigError
11
+
12
+
13
+ SCAN_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
14
+ ("anthropic_api_key", re.compile(r"sk-ant-[A-Za-z0-9_\-]{16,}")),
15
+ ("openai_api_key", re.compile(r"sk-(?!ant-)[A-Za-z0-9_\-]{16,}")),
16
+ ("github_token", re.compile(r"gh[opusr]_[A-Za-z0-9_]{16,}")),
17
+ ("bearer_token", re.compile(r"Bearer\s+[A-Za-z0-9._\-]{16,}", re.IGNORECASE)),
18
+ ("aws_access_key_id", re.compile(r"AKIA[0-9A-Z]{16}")),
19
+ ("macos_user_path", re.compile(r"/Users/[A-Za-z0-9._\-]+(?:/[^\s\"']*)?")),
20
+ ("macos_volume_path", re.compile(r"/Volumes/[A-Za-z0-9._\-]+(?:/[^\s\"']*)?")),
21
+ ("private_local_path", re.compile(r"/private/[A-Za-z0-9._\-/]+")),
22
+ ("windows_user_path", re.compile(r"[A-Za-z]:[\\/]+Users[\\/]+[A-Za-z0-9._\-]+(?:[\\/]+[^\s\"']*)?")),
23
+ ]
24
+ TEXT_SUFFIXES = {
25
+ ".csv",
26
+ ".html",
27
+ ".json",
28
+ ".jsonl",
29
+ ".log",
30
+ ".md",
31
+ ".svg",
32
+ ".txt",
33
+ ".yaml",
34
+ ".yml",
35
+ ".xml",
36
+ }
37
+ ZIP_SUFFIXES = {".zip"}
38
+ REDACTION_SCAN_SCHEMA_VERSION = "agentblaster.redaction-scan.v1"
39
+
40
+
41
+ class RedactionScanFinding(BaseModel):
42
+ """Secret-like pattern finding without the matched secret value."""
43
+
44
+ model_config = ConfigDict(extra="forbid")
45
+
46
+ path: str
47
+ entry: str | None = None
48
+ line: int | None = None
49
+ pattern: str
50
+ message: str
51
+
52
+
53
+ class RedactionScanReport(BaseModel):
54
+ """Redaction scan report for release/publishing gates."""
55
+
56
+ model_config = ConfigDict(extra="forbid")
57
+
58
+ schema_version: str = REDACTION_SCAN_SCHEMA_VERSION
59
+ ok: bool
60
+ total_paths: int = Field(ge=0)
61
+ scanned_items: int = Field(ge=0)
62
+ skipped_items: int = Field(ge=0)
63
+ findings: list[RedactionScanFinding] = Field(default_factory=list)
64
+ security_notes: list[str] = Field(default_factory=list)
65
+
66
+
67
+ def scan_paths(paths: list[Path], *, max_bytes: int = 2_000_000) -> RedactionScanReport:
68
+ if not paths:
69
+ raise ConfigError("redaction scan requires at least one path")
70
+ findings: list[RedactionScanFinding] = []
71
+ scanned = 0
72
+ skipped = 0
73
+ for input_path in paths:
74
+ if not input_path.exists():
75
+ raise ConfigError(f"redaction scan path does not exist: {input_path}")
76
+ for path in _iter_files(input_path):
77
+ if path.suffix.lower() in ZIP_SUFFIXES:
78
+ zip_scanned, zip_skipped = _scan_zip(path, findings, max_bytes=max_bytes)
79
+ scanned += zip_scanned
80
+ skipped += zip_skipped
81
+ continue
82
+ if not _looks_textual(path):
83
+ skipped += 1
84
+ continue
85
+ try:
86
+ data = path.read_bytes()
87
+ except OSError as exc:
88
+ raise ConfigError(f"unable to read scan path {path}: {exc}") from exc
89
+ if len(data) > max_bytes:
90
+ skipped += 1
91
+ continue
92
+ text = _decode_text(data)
93
+ if text is None:
94
+ skipped += 1
95
+ continue
96
+ scanned += 1
97
+ findings.extend(_scan_text(text, path=_safe_report_path(path), entry=None))
98
+ return RedactionScanReport(
99
+ ok=not findings,
100
+ total_paths=len(paths),
101
+ scanned_items=scanned,
102
+ skipped_items=skipped,
103
+ findings=findings,
104
+ security_notes=[
105
+ "Redaction scan reports pattern names and locations only; matched secret values are never included.",
106
+ "This is a deterministic regex gate for common secret formats, not a complete DLP system.",
107
+ ],
108
+ )
109
+
110
+
111
+ def format_redaction_scan_report(report: RedactionScanReport) -> str:
112
+ lines = [
113
+ f"schema_version: {report.schema_version}",
114
+ f"ok: {str(report.ok).lower()}",
115
+ f"total_paths: {report.total_paths}",
116
+ f"scanned_items: {report.scanned_items}",
117
+ f"skipped_items: {report.skipped_items}",
118
+ f"findings: {len(report.findings)}",
119
+ ]
120
+ for finding in report.findings:
121
+ location = finding.path if finding.entry is None else f"{finding.path}!{finding.entry}"
122
+ if finding.line is not None:
123
+ location = f"{location}:{finding.line}"
124
+ lines.append(f"{finding.pattern} {location} {finding.message}")
125
+ return "\n".join(lines) + "\n"
126
+
127
+
128
+ def redaction_scan_json(report: RedactionScanReport) -> str:
129
+ return json.dumps(report.model_dump(mode="json"), indent=2, sort_keys=True) + "\n"
130
+
131
+
132
+ def _iter_files(path: Path):
133
+ if path.is_file():
134
+ yield path
135
+ return
136
+ for child in sorted(item for item in path.rglob("*") if item.is_file()):
137
+ yield child
138
+
139
+
140
+ def _looks_textual(path: Path) -> bool:
141
+ return path.suffix.lower() in TEXT_SUFFIXES or path.suffix == ""
142
+
143
+
144
+ def _scan_zip(path: Path, findings: list[RedactionScanFinding], *, max_bytes: int) -> tuple[int, int]:
145
+ scanned = 0
146
+ skipped = 0
147
+ try:
148
+ with ZipFile(path) as archive:
149
+ for info in sorted(archive.infolist(), key=lambda item: item.filename):
150
+ if info.is_dir():
151
+ continue
152
+ entry_report_label = _zip_entry_report_label(info.filename)
153
+ findings.extend(_scan_zip_entry_name(info.filename, path=_safe_report_path(path)))
154
+ entry_path = Path(info.filename)
155
+ if entry_path.suffix.lower() not in TEXT_SUFFIXES:
156
+ skipped += 1
157
+ continue
158
+ if info.file_size > max_bytes:
159
+ skipped += 1
160
+ continue
161
+ text = _decode_text(archive.read(info))
162
+ if text is None:
163
+ skipped += 1
164
+ continue
165
+ scanned += 1
166
+ findings.extend(_scan_text(text, path=_safe_report_path(path), entry=entry_report_label))
167
+ except (OSError, BadZipFile) as exc:
168
+ raise ConfigError(f"unable to scan zip artifact {path}: {exc}") from exc
169
+ return scanned, skipped
170
+
171
+
172
+ def _scan_zip_entry_name(entry: str, *, path: str) -> list[RedactionScanFinding]:
173
+ findings: list[RedactionScanFinding] = []
174
+ normalized = entry.replace("\\", "/")
175
+ parts = [part for part in normalized.split("/") if part not in {"", "."}]
176
+ safe_entry = _safe_zip_entry_label(entry)
177
+ if normalized.startswith("/") or re.match(r"^[A-Za-z]:/", normalized) or ".." in parts:
178
+ findings.append(
179
+ RedactionScanFinding(
180
+ path=path,
181
+ entry=safe_entry,
182
+ line=None,
183
+ pattern="zip_unsafe_entry_path",
184
+ message="unsafe zip entry path detected; archive member name suppressed from extracted paths",
185
+ )
186
+ )
187
+ for name, pattern in SCAN_PATTERNS:
188
+ if pattern.search(entry):
189
+ findings.append(
190
+ RedactionScanFinding(
191
+ path=path,
192
+ entry=safe_entry,
193
+ line=None,
194
+ pattern=name,
195
+ message="secret-like or local-path pattern detected in zip entry name; matched value suppressed",
196
+ )
197
+ )
198
+ return findings
199
+
200
+
201
+ def _zip_entry_report_label(entry: str) -> str:
202
+ normalized = entry.replace("\\", "/")
203
+ parts = [part for part in normalized.split("/") if part not in {"", "."}]
204
+ unsafe = normalized.startswith("/") or re.match(r"^[A-Za-z]:/", normalized) or ".." in parts
205
+ sensitive = any(pattern.search(entry) for _, pattern in SCAN_PATTERNS)
206
+ if unsafe or sensitive:
207
+ return _safe_zip_entry_label(entry)
208
+ return entry
209
+
210
+
211
+ def _safe_zip_entry_label(entry: str) -> str:
212
+ normalized = entry.replace("\\", "/")
213
+ parts = [part for part in normalized.split("/") if part not in {"", ".", ".."}]
214
+ label = parts[-1] if parts else "<redacted-entry>"
215
+ if any(pattern.search(label) for _, pattern in SCAN_PATTERNS):
216
+ return "<redacted-entry>"
217
+ return label or "<redacted-entry>"
218
+
219
+
220
+ def _decode_text(data: bytes) -> str | None:
221
+ try:
222
+ return data.decode("utf-8")
223
+ except UnicodeDecodeError:
224
+ return None
225
+
226
+
227
+ def _safe_report_path(path: Path) -> str:
228
+ if path.is_absolute():
229
+ return path.name
230
+ return path.as_posix()
231
+
232
+
233
+ def _scan_text(text: str, *, path: str, entry: str | None) -> list[RedactionScanFinding]:
234
+ findings: list[RedactionScanFinding] = []
235
+ for line_number, line in enumerate(text.splitlines(), start=1):
236
+ for name, pattern in SCAN_PATTERNS:
237
+ if pattern.search(line):
238
+ findings.append(
239
+ RedactionScanFinding(
240
+ path=path,
241
+ entry=entry,
242
+ line=line_number,
243
+ pattern=name,
244
+ message="secret-like pattern detected; matched value suppressed",
245
+ )
246
+ )
247
+ return findings