sectum-ai 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sectum_ai-0.1.1/.gitignore +45 -0
- sectum_ai-0.1.1/PKG-INFO +71 -0
- sectum_ai-0.1.1/README.md +37 -0
- sectum_ai-0.1.1/pyproject.toml +65 -0
- sectum_ai-0.1.1/src/sectum_ai/baseline.py +330 -0
- sectum_ai-0.1.1/src/sectum_ai/cli/__init__.py +1 -0
- sectum_ai-0.1.1/src/sectum_ai/cli/app.py +1879 -0
- sectum_ai-0.1.1/src/sectum_ai/cli/py.typed +0 -0
- sectum_ai-0.1.1/src/sectum_ai/config.py +1053 -0
- sectum_ai-0.1.1/src/sectum_ai/crypto.py +102 -0
- sectum_ai-0.1.1/src/sectum_ai/embeddings.py +216 -0
- sectum_ai-0.1.1/src/sectum_ai/jobs.py +67 -0
- sectum_ai-0.1.1/src/sectum_ai/runner.py +359 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/__init__.py +10 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/build.py +73 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/corpus.py +251 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/manifest.py +17 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/markers.py +135 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/rng.py +36 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/scenario.py +35 -0
- sectum_ai-0.1.1/src/sectum_ai/substrate/tenants.py +28 -0
- sectum_ai-0.1.1/src/sectum_ai/suites.py +80 -0
- sectum_ai-0.1.1/src/sectum_ai/sweep.py +107 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
.eggs/
|
|
7
|
+
|
|
8
|
+
# Builds / distributions
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
*.whl
|
|
12
|
+
|
|
13
|
+
# mkdocs build output
|
|
14
|
+
site/
|
|
15
|
+
|
|
16
|
+
# uv / virtual environments
|
|
17
|
+
.venv/
|
|
18
|
+
venv/
|
|
19
|
+
|
|
20
|
+
# Tooling caches
|
|
21
|
+
.mypy_cache/
|
|
22
|
+
.ruff_cache/
|
|
23
|
+
.pytest_cache/
|
|
24
|
+
.coverage
|
|
25
|
+
.coverage.*
|
|
26
|
+
coverage.xml
|
|
27
|
+
htmlcov/
|
|
28
|
+
|
|
29
|
+
# Editors / OS
|
|
30
|
+
.idea/
|
|
31
|
+
.vscode/
|
|
32
|
+
*.swp
|
|
33
|
+
.DS_Store
|
|
34
|
+
|
|
35
|
+
# Example run artifacts (generated by examples/*/run.sh, incl. the
|
|
36
|
+
# out-residual/ workdir from the docs/samples regeneration recipe)
|
|
37
|
+
examples/*/out/
|
|
38
|
+
examples/*/out-residual/
|
|
39
|
+
|
|
40
|
+
# Sectum CLI default workdir (generated by seed/probe/report; not source)
|
|
41
|
+
.sectum-ai/
|
|
42
|
+
examples/*/.sectum-ai/
|
|
43
|
+
|
|
44
|
+
# Project-local engineering spec (not shared)
|
|
45
|
+
CLAUDE.md
|
sectum_ai-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sectum-ai
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Sectum AI - multi-tenant AI verification: core substrate runner and the sectum-ai CLI.
|
|
5
|
+
Project-URL: Homepage, https://sectum.ai
|
|
6
|
+
Project-URL: Documentation, https://docs.sectum.ai
|
|
7
|
+
Project-URL: Repository, https://github.com/sectum-ai/sectum-ai
|
|
8
|
+
Project-URL: Changelog, https://github.com/sectum-ai/sectum-ai/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Sectum AI
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
Keywords: ai-security,llm,multi-tenant,rag,verification
|
|
12
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Security
|
|
17
|
+
Requires-Python: >=3.12
|
|
18
|
+
Requires-Dist: pydantic>=2.9
|
|
19
|
+
Requires-Dist: pyyaml>=6
|
|
20
|
+
Requires-Dist: sectum-ai-adapters
|
|
21
|
+
Requires-Dist: sectum-ai-evidence
|
|
22
|
+
Requires-Dist: sectum-ai-probes
|
|
23
|
+
Requires-Dist: sectum-ai-spec
|
|
24
|
+
Requires-Dist: typer>=0.12
|
|
25
|
+
Provides-Extra: encryption
|
|
26
|
+
Requires-Dist: cryptography>=43; extra == 'encryption'
|
|
27
|
+
Provides-Extra: openai
|
|
28
|
+
Requires-Dist: openai>=1.40; extra == 'openai'
|
|
29
|
+
Provides-Extra: sentence-transformers
|
|
30
|
+
Requires-Dist: sentence-transformers>=3; extra == 'sentence-transformers'
|
|
31
|
+
Provides-Extra: weasyprint
|
|
32
|
+
Requires-Dist: sectum-ai-evidence[weasyprint]; extra == 'weasyprint'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# sectum-ai
|
|
36
|
+
|
|
37
|
+
**Multi-tenant AI verification.** This is the core distribution of [Sectum AI](https://github.com/sectum-ai/sectum-ai):
|
|
38
|
+
the marker-substrate runner and the `sectum-ai` command-line interface.
|
|
39
|
+
|
|
40
|
+
Sectum AI provisions synthetic tenants on an AI stack, seeds them with
|
|
41
|
+
cryptographic canary markers, runs benign and adversarial probes from each
|
|
42
|
+
tenant's session, and detects cross-tenant data leakage across every surface —
|
|
43
|
+
producing tamper-evident, control-mapped evidence that an auditor accepts.
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```sh
|
|
48
|
+
pip install sectum-ai
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
This pulls the full family: `sectum-ai-spec` (data models), `sectum-ai-probes`
|
|
52
|
+
(the Class 1–11 attack catalog + leak-detection pipeline), `sectum-ai-adapters`
|
|
53
|
+
(connectors for vector stores, caches, observability, RAG, agents, and MCP), and
|
|
54
|
+
`sectum-ai-evidence` (the tamper-evident evidence chain + `sectum-ai verify`).
|
|
55
|
+
|
|
56
|
+
## Quickstart
|
|
57
|
+
|
|
58
|
+
```sh
|
|
59
|
+
sectum-ai seed # provision synthetic tenants + plant canary markers
|
|
60
|
+
sectum-ai probe # run the attack catalog from each tenant's session
|
|
61
|
+
sectum-ai report # assemble a signed, control-mapped evidence pack (JSON + PDF)
|
|
62
|
+
sectum-ai verify .sectum-ai/evidence.json # independently re-verify the pack
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Links
|
|
66
|
+
|
|
67
|
+
- Documentation: <https://docs.sectum.ai>
|
|
68
|
+
- Source, full README, and attack catalog: <https://github.com/sectum-ai/sectum-ai>
|
|
69
|
+
|
|
70
|
+
Apache-2.0. The marker substrate, attack catalog, adapters, evidence chain, and
|
|
71
|
+
the independent `sectum-ai verify` are fully open source.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# sectum-ai
|
|
2
|
+
|
|
3
|
+
**Multi-tenant AI verification.** This is the core distribution of [Sectum AI](https://github.com/sectum-ai/sectum-ai):
|
|
4
|
+
the marker-substrate runner and the `sectum-ai` command-line interface.
|
|
5
|
+
|
|
6
|
+
Sectum AI provisions synthetic tenants on an AI stack, seeds them with
|
|
7
|
+
cryptographic canary markers, runs benign and adversarial probes from each
|
|
8
|
+
tenant's session, and detects cross-tenant data leakage across every surface —
|
|
9
|
+
producing tamper-evident, control-mapped evidence that an auditor accepts.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```sh
|
|
14
|
+
pip install sectum-ai
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
This pulls the full family: `sectum-ai-spec` (data models), `sectum-ai-probes`
|
|
18
|
+
(the Class 1–11 attack catalog + leak-detection pipeline), `sectum-ai-adapters`
|
|
19
|
+
(connectors for vector stores, caches, observability, RAG, agents, and MCP), and
|
|
20
|
+
`sectum-ai-evidence` (the tamper-evident evidence chain + `sectum-ai verify`).
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
```sh
|
|
25
|
+
sectum-ai seed # provision synthetic tenants + plant canary markers
|
|
26
|
+
sectum-ai probe # run the attack catalog from each tenant's session
|
|
27
|
+
sectum-ai report # assemble a signed, control-mapped evidence pack (JSON + PDF)
|
|
28
|
+
sectum-ai verify .sectum-ai/evidence.json # independently re-verify the pack
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Links
|
|
32
|
+
|
|
33
|
+
- Documentation: <https://docs.sectum.ai>
|
|
34
|
+
- Source, full README, and attack catalog: <https://github.com/sectum-ai/sectum-ai>
|
|
35
|
+
|
|
36
|
+
Apache-2.0. The marker substrate, attack catalog, adapters, evidence chain, and
|
|
37
|
+
the independent `sectum-ai verify` are fully open source.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sectum-ai"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Sectum AI - multi-tenant AI verification: core substrate runner and the sectum-ai CLI."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
authors = [{ name = "Sectum AI" }]
|
|
13
|
+
keywords = ["ai-security", "multi-tenant", "rag", "llm", "verification"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Topic :: Security",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"typer>=0.12",
|
|
23
|
+
"pyyaml>=6",
|
|
24
|
+
# config.py and cli/app.py import pydantic directly; declare it rather than
|
|
25
|
+
# relying on the transitive dependency via sectum-ai-spec (§13, mirrors adapters).
|
|
26
|
+
"pydantic>=2.9",
|
|
27
|
+
"sectum-ai-spec",
|
|
28
|
+
"sectum-ai-adapters",
|
|
29
|
+
"sectum-ai-probes",
|
|
30
|
+
"sectum-ai-evidence",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
# At-rest encryption of the seeded substrate (and its ground-truth manifest).
|
|
35
|
+
# Optional: the default unencrypted path needs no third-party dependency.
|
|
36
|
+
encryption = ["cryptography>=43"]
|
|
37
|
+
# weasyprint audit-pack PDF engine (sectum-ai report --pdf-engine weasyprint).
|
|
38
|
+
# Optional: the default reportlab renderer is pure Python with no system
|
|
39
|
+
# libraries; weasyprint adds the pango/cairo system libraries.
|
|
40
|
+
weasyprint = ["sectum-ai-evidence[weasyprint]"]
|
|
41
|
+
# Real embedding providers for the Class 2 per-model RPR sweep (embedding_models:
|
|
42
|
+
# ["st:all-mpnet-base-v2", ...]). Optional: the default hashing/recall sweep is
|
|
43
|
+
# pure-Python. sentence-transformers runs locally (BYOC-safe); openai is a
|
|
44
|
+
# hosted call. See packages/core/src/sectum_ai/embeddings.py.
|
|
45
|
+
sentence-transformers = ["sentence-transformers>=3"]
|
|
46
|
+
openai = ["openai>=1.40"]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
sectum-ai = "sectum_ai.cli.app:app"
|
|
50
|
+
|
|
51
|
+
[project.urls]
|
|
52
|
+
Homepage = "https://sectum.ai"
|
|
53
|
+
Documentation = "https://docs.sectum.ai"
|
|
54
|
+
Repository = "https://github.com/sectum-ai/sectum-ai"
|
|
55
|
+
Changelog = "https://github.com/sectum-ai/sectum-ai/blob/main/CHANGELOG.md"
|
|
56
|
+
|
|
57
|
+
[tool.uv.sources]
|
|
58
|
+
sectum-ai-spec = { workspace = true }
|
|
59
|
+
sectum-ai-adapters = { workspace = true }
|
|
60
|
+
sectum-ai-probes = { workspace = true }
|
|
61
|
+
sectum-ai-evidence = { workspace = true }
|
|
62
|
+
|
|
63
|
+
[tool.hatch.build.targets.wheel]
|
|
64
|
+
only-include = ["src/sectum_ai"]
|
|
65
|
+
sources = ["src"]
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Regression baselines: save a run's metrics and compare later runs to it.
|
|
2
|
+
|
|
3
|
+
A baseline is a saved snapshot of a run's headline metrics. Comparing a later
|
|
4
|
+
run to the baseline flags regressions - a metric that moved in the worse
|
|
5
|
+
(higher-leakage) direction, for example a higher Retrieval-Pivot Rate or more
|
|
6
|
+
confirmed findings after an embedding-model or prompt change (the engineering
|
|
7
|
+
spec, sections 10 and 14).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from sectum_ai.spec import Finding, FindingStatus, RunMetrics, RunResult, Severity
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class MetricDelta:
|
|
18
|
+
"""One headline metric compared between a baseline run and a later run."""
|
|
19
|
+
|
|
20
|
+
name: str
|
|
21
|
+
baseline: float
|
|
22
|
+
current: float
|
|
23
|
+
# An informational metric is reported for visibility but never counts as a
|
|
24
|
+
# regression: an erasure *caveat* (a backend with no per-tenant erasure API,
|
|
25
|
+
# Class 11 hiding place #8) is a coverage limitation, not an isolation
|
|
26
|
+
# failure. It is kept distinct from erasure *residue*, which is a real
|
|
27
|
+
# failure and does regress.
|
|
28
|
+
informational: bool = False
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def regressed(self) -> bool:
|
|
32
|
+
"""True when the metric moved in the worse, higher-leakage direction.
|
|
33
|
+
|
|
34
|
+
Always ``False`` for an informational metric. Compared with a small
|
|
35
|
+
tolerance so floating-point round-trip noise (a metric serialized to
|
|
36
|
+
JSON and back) never reads as a regression; real leakage changes are far
|
|
37
|
+
larger than the epsilon.
|
|
38
|
+
"""
|
|
39
|
+
if self.informational:
|
|
40
|
+
return False
|
|
41
|
+
return self.current > self.baseline + 1e-9
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class BaselineComparison:
|
|
46
|
+
"""The outcome of comparing a run's metrics against a saved baseline."""
|
|
47
|
+
|
|
48
|
+
deltas: tuple[MetricDelta, ...]
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def regressed(self) -> bool:
|
|
52
|
+
"""True when any compared metric regressed."""
|
|
53
|
+
return any(delta.regressed for delta in self.deltas)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _dict_deltas(
|
|
57
|
+
label: str,
|
|
58
|
+
baseline: Mapping[str, float],
|
|
59
|
+
current: Mapping[str, float],
|
|
60
|
+
*,
|
|
61
|
+
informational: bool = False,
|
|
62
|
+
) -> list[MetricDelta]:
|
|
63
|
+
"""A MetricDelta per key across both mappings; a key absent on a side is 0.0."""
|
|
64
|
+
return [
|
|
65
|
+
MetricDelta(
|
|
66
|
+
name=f"{label}[{key}]",
|
|
67
|
+
baseline=float(baseline.get(key, 0.0)),
|
|
68
|
+
current=float(current.get(key, 0.0)),
|
|
69
|
+
informational=informational,
|
|
70
|
+
)
|
|
71
|
+
for key in sorted(set(baseline) | set(current))
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def compare_metrics(baseline: RunMetrics, current: RunMetrics) -> BaselineComparison:
|
|
76
|
+
"""Compare a later run's metrics to a baseline; flag every metric that worsened.
|
|
77
|
+
|
|
78
|
+
Higher means more leakage for every metric, so an increase is a regression.
|
|
79
|
+
Confirmed findings and the Retrieval-Pivot Rate are compared directly; the
|
|
80
|
+
per-model Retrieval-Pivot Rate, the per-probe finding counts, the per-surface
|
|
81
|
+
erasure residue, and the per-pair side-channel effect sizes are compared key
|
|
82
|
+
by key. A Retrieval-Pivot Rate that was not measured, or a key absent on one
|
|
83
|
+
side, counts as ``0.0``.
|
|
84
|
+
|
|
85
|
+
The per-model RPR and per-probe counts matter because an aggregate can hide a
|
|
86
|
+
regression: swapping one embedding model can spike that model's RPR (the
|
|
87
|
+
canonical Phase-5 check, the engineering spec section 14) while the overall
|
|
88
|
+
rate is unchanged, and one probe can start leaking as another stops with no
|
|
89
|
+
change to the total confirmed count.
|
|
90
|
+
|
|
91
|
+
Per-surface erasure *caveats* are also reported, but as informational deltas
|
|
92
|
+
that never count as a regression: a caveat is a coverage limitation of the
|
|
93
|
+
backend (Class 11 hiding place #8), not an isolation failure like residue.
|
|
94
|
+
"""
|
|
95
|
+
deltas: list[MetricDelta] = [
|
|
96
|
+
MetricDelta(
|
|
97
|
+
name="confirmed_findings",
|
|
98
|
+
baseline=float(baseline.confirmed_findings),
|
|
99
|
+
current=float(current.confirmed_findings),
|
|
100
|
+
),
|
|
101
|
+
MetricDelta(
|
|
102
|
+
name="retrieval_pivot_rate",
|
|
103
|
+
baseline=baseline.retrieval_pivot_rate or 0.0,
|
|
104
|
+
current=current.retrieval_pivot_rate or 0.0,
|
|
105
|
+
),
|
|
106
|
+
# Class 3/6/10 headline rates: higher means more cross-tenant leakage, so
|
|
107
|
+
# an increase regresses exactly like the retrieval-pivot rate above.
|
|
108
|
+
MetricDelta(
|
|
109
|
+
name="poisoning_bleed_delta",
|
|
110
|
+
baseline=baseline.poisoning_bleed_delta or 0.0,
|
|
111
|
+
current=current.poisoning_bleed_delta or 0.0,
|
|
112
|
+
),
|
|
113
|
+
MetricDelta(
|
|
114
|
+
name="inversion_reconstruction_rate",
|
|
115
|
+
baseline=baseline.inversion_reconstruction_rate or 0.0,
|
|
116
|
+
current=current.inversion_reconstruction_rate or 0.0,
|
|
117
|
+
),
|
|
118
|
+
MetricDelta(
|
|
119
|
+
name="extraction_efficiency",
|
|
120
|
+
baseline=baseline.extraction_efficiency or 0.0,
|
|
121
|
+
current=current.extraction_efficiency or 0.0,
|
|
122
|
+
),
|
|
123
|
+
]
|
|
124
|
+
deltas.extend(
|
|
125
|
+
_dict_deltas(
|
|
126
|
+
"retrieval_pivot_rate_by_model",
|
|
127
|
+
baseline.retrieval_pivot_rate_by_model,
|
|
128
|
+
current.retrieval_pivot_rate_by_model,
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
deltas.extend(
|
|
132
|
+
_dict_deltas(
|
|
133
|
+
"per_probe_findings",
|
|
134
|
+
{key: float(value) for key, value in baseline.per_probe_findings.items()},
|
|
135
|
+
{key: float(value) for key, value in current.per_probe_findings.items()},
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
deltas.extend(
|
|
139
|
+
_dict_deltas("erasure_residue", baseline.erasure_residue, current.erasure_residue)
|
|
140
|
+
)
|
|
141
|
+
deltas.extend(
|
|
142
|
+
_dict_deltas(
|
|
143
|
+
"side_channel_effect_sizes",
|
|
144
|
+
baseline.side_channel_effect_sizes,
|
|
145
|
+
current.side_channel_effect_sizes,
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
deltas.extend(
|
|
149
|
+
_dict_deltas(
|
|
150
|
+
"erasure_caveats",
|
|
151
|
+
baseline.erasure_caveats,
|
|
152
|
+
current.erasure_caveats,
|
|
153
|
+
informational=True,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
return BaselineComparison(deltas=tuple(deltas))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
_SEVERITY_RANK: dict[Severity, int] = {
|
|
160
|
+
Severity.INFO: 0,
|
|
161
|
+
Severity.LOW: 1,
|
|
162
|
+
Severity.MEDIUM: 2,
|
|
163
|
+
Severity.HIGH: 3,
|
|
164
|
+
Severity.CRITICAL: 4,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass(frozen=True)
|
|
169
|
+
class FindingChange:
|
|
170
|
+
"""A finding present in both runs (same ``finding_id``) that changed in place.
|
|
171
|
+
|
|
172
|
+
``previous`` is its earlier-run copy and ``current`` its later-run copy. A
|
|
173
|
+
change is a difference in status (e.g. unverified -> confirmed) or severity
|
|
174
|
+
(e.g. low -> critical) for what is, by id, the same leak.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
previous: Finding
|
|
178
|
+
current: Finding
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def status_changed(self) -> bool:
|
|
182
|
+
"""True when the finding's status differs between the runs."""
|
|
183
|
+
return self.previous.status is not self.current.status
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def severity_changed(self) -> bool:
|
|
187
|
+
"""True when the finding's severity differs between the runs."""
|
|
188
|
+
return self.previous.severity is not self.current.severity
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def severity_escalated(self) -> bool:
|
|
192
|
+
"""True when the severity rose (a worse posture), not fell."""
|
|
193
|
+
return _SEVERITY_RANK[self.current.severity] > _SEVERITY_RANK[self.previous.severity]
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@dataclass(frozen=True)
|
|
197
|
+
class FindingDiff:
|
|
198
|
+
"""Finding-level delta between two runs, keyed by stable ``finding_id``.
|
|
199
|
+
|
|
200
|
+
``appeared`` are findings in the later run but not the earlier one (a new
|
|
201
|
+
leak); ``resolved`` are in the earlier run but gone from the later one (a
|
|
202
|
+
fixed leak); ``persisting`` are in both (the later copy). Each list follows
|
|
203
|
+
its source run's own deterministic finding order.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
appeared: tuple[Finding, ...]
|
|
207
|
+
resolved: tuple[Finding, ...]
|
|
208
|
+
persisting: tuple[Finding, ...]
|
|
209
|
+
# Findings confirmed in the later run whose id was not already confirmed in
|
|
210
|
+
# the earlier run -- the regression signal. Broader than "confirmed and
|
|
211
|
+
# newly appeared by id": it also catches a finding that persisted by id but
|
|
212
|
+
# was upgraded unverified -> confirmed between the runs. An unverified
|
|
213
|
+
# candidate never appears here (the false-positive control, the engineering
|
|
214
|
+
# spec section 6.4), so it cannot flip a diff to a regression on its own.
|
|
215
|
+
newly_confirmed: tuple[Finding, ...]
|
|
216
|
+
# Findings present in both runs whose status or severity changed in place
|
|
217
|
+
# (matched by id), for visibility. The subset that gates a regression is
|
|
218
|
+
# ``severity_escalations``.
|
|
219
|
+
changed: tuple[FindingChange, ...]
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def severity_escalations(self) -> tuple[FindingChange, ...]:
|
|
223
|
+
"""Persisting findings, confirmed in both runs, whose severity rose.
|
|
224
|
+
|
|
225
|
+
A leak that was already a confirmed cross-tenant finding becoming more
|
|
226
|
+
severe (e.g. low -> critical) is a worse isolation posture between the
|
|
227
|
+
runs, so it gates as a regression. Requiring confirmed-in-both keeps this
|
|
228
|
+
disjoint from ``newly_confirmed`` (which covers unverified -> confirmed)
|
|
229
|
+
and clear of the false-positive control.
|
|
230
|
+
"""
|
|
231
|
+
return tuple(
|
|
232
|
+
change
|
|
233
|
+
for change in self.changed
|
|
234
|
+
if change.severity_escalated
|
|
235
|
+
and change.previous.status is FindingStatus.CONFIRMED
|
|
236
|
+
and change.current.status is FindingStatus.CONFIRMED
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@dataclass(frozen=True)
|
|
241
|
+
class RunDiff:
|
|
242
|
+
"""A full comparison of two runs: metric deltas plus the finding-level diff."""
|
|
243
|
+
|
|
244
|
+
metrics: BaselineComparison
|
|
245
|
+
findings: FindingDiff
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def regressed(self) -> bool:
|
|
249
|
+
"""True when the later run is worse than the earlier one.
|
|
250
|
+
|
|
251
|
+
A regression is any worsened metric (the baseline rule), a newly
|
|
252
|
+
confirmed finding, *or* an in-place severity escalation of a finding
|
|
253
|
+
confirmed in both runs. The finding checks catch what the metric counts
|
|
254
|
+
miss: a confirmed leak that is new -- by a fresh id, or by an in-place
|
|
255
|
+
unverified -> confirmed upgrade -- can leave ``confirmed_findings``
|
|
256
|
+
unchanged when another confirmed leak resolves in the same run; and a
|
|
257
|
+
known leak growing more severe (low -> critical) is a worse posture the
|
|
258
|
+
counts do not see at all.
|
|
259
|
+
"""
|
|
260
|
+
return (
|
|
261
|
+
self.metrics.regressed
|
|
262
|
+
or bool(self.findings.newly_confirmed)
|
|
263
|
+
or bool(self.findings.severity_escalations)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def diff_findings(earlier: Sequence[Finding], later: Sequence[Finding]) -> FindingDiff:
|
|
268
|
+
"""Diff two finding sequences by ``finding_id`` into the diff buckets.
|
|
269
|
+
|
|
270
|
+
``appeared``/``resolved``/``persisting`` partition by ``finding_id``;
|
|
271
|
+
``newly_confirmed`` is every finding confirmed in ``later`` whose id was not
|
|
272
|
+
already confirmed in ``earlier`` (a fresh id, or an in-place upgrade);
|
|
273
|
+
``changed`` is every persisting finding whose status or severity differs
|
|
274
|
+
between the runs. Each side is de-duplicated by ``finding_id`` (first
|
|
275
|
+
occurrence wins) so a repeated id never lists a finding twice. Runs are
|
|
276
|
+
de-duplicated upstream; this only guards a hand-built input.
|
|
277
|
+
"""
|
|
278
|
+
earlier_ids = {finding.finding_id for finding in earlier}
|
|
279
|
+
later_ids = {finding.finding_id for finding in later}
|
|
280
|
+
earlier_confirmed_ids = {
|
|
281
|
+
finding.finding_id for finding in earlier if finding.status is FindingStatus.CONFIRMED
|
|
282
|
+
}
|
|
283
|
+
earlier_by_id: dict[str, Finding] = {}
|
|
284
|
+
for finding in earlier:
|
|
285
|
+
earlier_by_id.setdefault(finding.finding_id, finding)
|
|
286
|
+
|
|
287
|
+
def _select(findings: Sequence[Finding], keep: Callable[[str], bool]) -> tuple[Finding, ...]:
|
|
288
|
+
seen: set[str] = set()
|
|
289
|
+
chosen: list[Finding] = []
|
|
290
|
+
for finding in findings:
|
|
291
|
+
if finding.finding_id in seen or not keep(finding.finding_id):
|
|
292
|
+
continue
|
|
293
|
+
seen.add(finding.finding_id)
|
|
294
|
+
chosen.append(finding)
|
|
295
|
+
return tuple(chosen)
|
|
296
|
+
|
|
297
|
+
newly_confirmed = _select(
|
|
298
|
+
[finding for finding in later if finding.status is FindingStatus.CONFIRMED],
|
|
299
|
+
lambda fid: fid not in earlier_confirmed_ids,
|
|
300
|
+
)
|
|
301
|
+
changed: list[FindingChange] = []
|
|
302
|
+
changed_seen: set[str] = set()
|
|
303
|
+
for finding in later:
|
|
304
|
+
fid = finding.finding_id
|
|
305
|
+
if fid in changed_seen or fid not in earlier_by_id:
|
|
306
|
+
continue
|
|
307
|
+
changed_seen.add(fid)
|
|
308
|
+
previous = earlier_by_id[fid]
|
|
309
|
+
if previous.status is not finding.status or previous.severity is not finding.severity:
|
|
310
|
+
changed.append(FindingChange(previous=previous, current=finding))
|
|
311
|
+
return FindingDiff(
|
|
312
|
+
appeared=_select(later, lambda fid: fid not in earlier_ids),
|
|
313
|
+
resolved=_select(earlier, lambda fid: fid not in later_ids),
|
|
314
|
+
persisting=_select(later, lambda fid: fid in earlier_ids),
|
|
315
|
+
newly_confirmed=newly_confirmed,
|
|
316
|
+
changed=tuple(changed),
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def diff_runs(earlier: RunResult, later: RunResult) -> RunDiff:
|
|
321
|
+
"""Compare two runs: metric deltas (:func:`compare_metrics`) and a finding diff.
|
|
322
|
+
|
|
323
|
+
``earlier`` is the reference (an older run or a pre-change baseline) and
|
|
324
|
+
``later`` is the run under scrutiny, matching the argument order of
|
|
325
|
+
:func:`compare_metrics`.
|
|
326
|
+
"""
|
|
327
|
+
return RunDiff(
|
|
328
|
+
metrics=compare_metrics(earlier.metrics, later.metrics),
|
|
329
|
+
findings=diff_findings(earlier.findings, later.findings),
|
|
330
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Sectum AI command-line interface."""
|