sectum-ai 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ .eggs/
7
+
8
+ # Builds / distributions
9
+ build/
10
+ dist/
11
+ *.whl
12
+
13
+ # mkdocs build output
14
+ site/
15
+
16
+ # uv / virtual environments
17
+ .venv/
18
+ venv/
19
+
20
+ # Tooling caches
21
+ .mypy_cache/
22
+ .ruff_cache/
23
+ .pytest_cache/
24
+ .coverage
25
+ .coverage.*
26
+ coverage.xml
27
+ htmlcov/
28
+
29
+ # Editors / OS
30
+ .idea/
31
+ .vscode/
32
+ *.swp
33
+ .DS_Store
34
+
35
+ # Example run artifacts (generated by examples/*/run.sh, incl. the
36
+ # out-residual/ workdir from the docs/samples regeneration recipe)
37
+ examples/*/out/
38
+ examples/*/out-residual/
39
+
40
+ # Sectum CLI default workdir (generated by seed/probe/report; not source)
41
+ .sectum-ai/
42
+ examples/*/.sectum-ai/
43
+
44
+ # Project-local engineering spec (not shared)
45
+ CLAUDE.md
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.4
2
+ Name: sectum-ai
3
+ Version: 0.1.1
4
+ Summary: Sectum AI - multi-tenant AI verification: core substrate runner and the sectum-ai CLI.
5
+ Project-URL: Homepage, https://sectum.ai
6
+ Project-URL: Documentation, https://docs.sectum.ai
7
+ Project-URL: Repository, https://github.com/sectum-ai/sectum-ai
8
+ Project-URL: Changelog, https://github.com/sectum-ai/sectum-ai/blob/main/CHANGELOG.md
9
+ Author: Sectum AI
10
+ License-Expression: Apache-2.0
11
+ Keywords: ai-security,llm,multi-tenant,rag,verification
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Security
17
+ Requires-Python: >=3.12
18
+ Requires-Dist: pydantic>=2.9
19
+ Requires-Dist: pyyaml>=6
20
+ Requires-Dist: sectum-ai-adapters
21
+ Requires-Dist: sectum-ai-evidence
22
+ Requires-Dist: sectum-ai-probes
23
+ Requires-Dist: sectum-ai-spec
24
+ Requires-Dist: typer>=0.12
25
+ Provides-Extra: encryption
26
+ Requires-Dist: cryptography>=43; extra == 'encryption'
27
+ Provides-Extra: openai
28
+ Requires-Dist: openai>=1.40; extra == 'openai'
29
+ Provides-Extra: sentence-transformers
30
+ Requires-Dist: sentence-transformers>=3; extra == 'sentence-transformers'
31
+ Provides-Extra: weasyprint
32
+ Requires-Dist: sectum-ai-evidence[weasyprint]; extra == 'weasyprint'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # sectum-ai
36
+
37
+ **Multi-tenant AI verification.** This is the core distribution of [Sectum AI](https://github.com/sectum-ai/sectum-ai):
38
+ the marker-substrate runner and the `sectum-ai` command-line interface.
39
+
40
+ Sectum AI provisions synthetic tenants on an AI stack, seeds them with
41
+ cryptographic canary markers, runs benign and adversarial probes from each
42
+ tenant's session, and detects cross-tenant data leakage across every surface —
43
+ producing tamper-evident, control-mapped evidence that an auditor accepts.
44
+
45
+ ## Install
46
+
47
+ ```sh
48
+ pip install sectum-ai
49
+ ```
50
+
51
+ This pulls the full family: `sectum-ai-spec` (data models), `sectum-ai-probes`
52
+ (the Class 1–11 attack catalog + leak-detection pipeline), `sectum-ai-adapters`
53
+ (connectors for vector stores, caches, observability, RAG, agents, and MCP), and
54
+ `sectum-ai-evidence` (the tamper-evident evidence chain + `sectum-ai verify`).
55
+
56
+ ## Quickstart
57
+
58
+ ```sh
59
+ sectum-ai seed # provision synthetic tenants + plant canary markers
60
+ sectum-ai probe # run the attack catalog from each tenant's session
61
+ sectum-ai report # assemble a signed, control-mapped evidence pack (JSON + PDF)
62
+ sectum-ai verify .sectum-ai/evidence.json # independently re-verify the pack
63
+ ```
64
+
65
+ ## Links
66
+
67
+ - Documentation: <https://docs.sectum.ai>
68
+ - Source, full README, and attack catalog: <https://github.com/sectum-ai/sectum-ai>
69
+
70
+ Apache-2.0. The marker substrate, attack catalog, adapters, evidence chain, and
71
+ the independent `sectum-ai verify` are fully open source.
@@ -0,0 +1,37 @@
1
+ # sectum-ai
2
+
3
+ **Multi-tenant AI verification.** This is the core distribution of [Sectum AI](https://github.com/sectum-ai/sectum-ai):
4
+ the marker-substrate runner and the `sectum-ai` command-line interface.
5
+
6
+ Sectum AI provisions synthetic tenants on an AI stack, seeds them with
7
+ cryptographic canary markers, runs benign and adversarial probes from each
8
+ tenant's session, and detects cross-tenant data leakage across every surface —
9
+ producing tamper-evident, control-mapped evidence that an auditor accepts.
10
+
11
+ ## Install
12
+
13
+ ```sh
14
+ pip install sectum-ai
15
+ ```
16
+
17
+ This pulls the full family: `sectum-ai-spec` (data models), `sectum-ai-probes`
18
+ (the Class 1–11 attack catalog + leak-detection pipeline), `sectum-ai-adapters`
19
+ (connectors for vector stores, caches, observability, RAG, agents, and MCP), and
20
+ `sectum-ai-evidence` (the tamper-evident evidence chain + `sectum-ai verify`).
21
+
22
+ ## Quickstart
23
+
24
+ ```sh
25
+ sectum-ai seed # provision synthetic tenants + plant canary markers
26
+ sectum-ai probe # run the attack catalog from each tenant's session
27
+ sectum-ai report # assemble a signed, control-mapped evidence pack (JSON + PDF)
28
+ sectum-ai verify .sectum-ai/evidence.json # independently re-verify the pack
29
+ ```
30
+
31
+ ## Links
32
+
33
+ - Documentation: <https://docs.sectum.ai>
34
+ - Source, full README, and attack catalog: <https://github.com/sectum-ai/sectum-ai>
35
+
36
+ Apache-2.0. The marker substrate, attack catalog, adapters, evidence chain, and
37
+ the independent `sectum-ai verify` are fully open source.
@@ -0,0 +1,65 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "sectum-ai"
7
+ version = "0.1.1"
8
+ description = "Sectum AI - multi-tenant AI verification: core substrate runner and the sectum-ai CLI."
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = "Apache-2.0"
12
+ authors = [{ name = "Sectum AI" }]
13
+ keywords = ["ai-security", "multi-tenant", "rag", "llm", "verification"]
14
+ classifiers = [
15
+ "Development Status :: 2 - Pre-Alpha",
16
+ "Intended Audience :: Developers",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Topic :: Security",
20
+ ]
21
+ dependencies = [
22
+ "typer>=0.12",
23
+ "pyyaml>=6",
24
+ # config.py and cli/app.py import pydantic directly; declare it rather than
25
+ # relying on the transitive dependency via sectum-ai-spec (§13, mirrors adapters).
26
+ "pydantic>=2.9",
27
+ "sectum-ai-spec",
28
+ "sectum-ai-adapters",
29
+ "sectum-ai-probes",
30
+ "sectum-ai-evidence",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ # At-rest encryption of the seeded substrate (and its ground-truth manifest).
35
+ # Optional: the default unencrypted path needs no third-party dependency.
36
+ encryption = ["cryptography>=43"]
37
+ # weasyprint audit-pack PDF engine (sectum-ai report --pdf-engine weasyprint).
38
+ # Optional: the default reportlab renderer is pure Python with no system
39
+ # libraries; weasyprint adds the pango/cairo system libraries.
40
+ weasyprint = ["sectum-ai-evidence[weasyprint]"]
41
+ # Real embedding providers for the Class 2 per-model RPR sweep (embedding_models:
42
+ # ["st:all-mpnet-base-v2", ...]). Optional: the default hashing/recall sweep is
43
+ # pure-Python. sentence-transformers runs locally (BYOC-safe); openai is a
44
+ # hosted call. See packages/core/src/sectum_ai/embeddings.py.
45
+ sentence-transformers = ["sentence-transformers>=3"]
46
+ openai = ["openai>=1.40"]
47
+
48
+ [project.scripts]
49
+ sectum-ai = "sectum_ai.cli.app:app"
50
+
51
+ [project.urls]
52
+ Homepage = "https://sectum.ai"
53
+ Documentation = "https://docs.sectum.ai"
54
+ Repository = "https://github.com/sectum-ai/sectum-ai"
55
+ Changelog = "https://github.com/sectum-ai/sectum-ai/blob/main/CHANGELOG.md"
56
+
57
+ [tool.uv.sources]
58
+ sectum-ai-spec = { workspace = true }
59
+ sectum-ai-adapters = { workspace = true }
60
+ sectum-ai-probes = { workspace = true }
61
+ sectum-ai-evidence = { workspace = true }
62
+
63
+ [tool.hatch.build.targets.wheel]
64
+ only-include = ["src/sectum_ai"]
65
+ sources = ["src"]
@@ -0,0 +1,330 @@
1
+ """Regression baselines: save a run's metrics and compare later runs to it.
2
+
3
+ A baseline is a saved snapshot of a run's headline metrics. Comparing a later
4
+ run to the baseline flags regressions - a metric that moved in the worse
5
+ (higher-leakage) direction, for example a higher Retrieval-Pivot Rate or more
6
+ confirmed findings after an embedding-model or prompt change (the engineering
7
+ spec, sections 10 and 14).
8
+ """
9
+
10
+ from collections.abc import Callable, Mapping, Sequence
11
+ from dataclasses import dataclass
12
+
13
+ from sectum_ai.spec import Finding, FindingStatus, RunMetrics, RunResult, Severity
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class MetricDelta:
18
+ """One headline metric compared between a baseline run and a later run."""
19
+
20
+ name: str
21
+ baseline: float
22
+ current: float
23
+ # An informational metric is reported for visibility but never counts as a
24
+ # regression: an erasure *caveat* (a backend with no per-tenant erasure API,
25
+ # Class 11 hiding place #8) is a coverage limitation, not an isolation
26
+ # failure. It is kept distinct from erasure *residue*, which is a real
27
+ # failure and does regress.
28
+ informational: bool = False
29
+
30
+ @property
31
+ def regressed(self) -> bool:
32
+ """True when the metric moved in the worse, higher-leakage direction.
33
+
34
+ Always ``False`` for an informational metric. Compared with a small
35
+ tolerance so floating-point round-trip noise (a metric serialized to
36
+ JSON and back) never reads as a regression; real leakage changes are far
37
+ larger than the epsilon.
38
+ """
39
+ if self.informational:
40
+ return False
41
+ return self.current > self.baseline + 1e-9
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class BaselineComparison:
46
+ """The outcome of comparing a run's metrics against a saved baseline."""
47
+
48
+ deltas: tuple[MetricDelta, ...]
49
+
50
+ @property
51
+ def regressed(self) -> bool:
52
+ """True when any compared metric regressed."""
53
+ return any(delta.regressed for delta in self.deltas)
54
+
55
+
56
+ def _dict_deltas(
57
+ label: str,
58
+ baseline: Mapping[str, float],
59
+ current: Mapping[str, float],
60
+ *,
61
+ informational: bool = False,
62
+ ) -> list[MetricDelta]:
63
+ """A MetricDelta per key across both mappings; a key absent on a side is 0.0."""
64
+ return [
65
+ MetricDelta(
66
+ name=f"{label}[{key}]",
67
+ baseline=float(baseline.get(key, 0.0)),
68
+ current=float(current.get(key, 0.0)),
69
+ informational=informational,
70
+ )
71
+ for key in sorted(set(baseline) | set(current))
72
+ ]
73
+
74
+
75
+ def compare_metrics(baseline: RunMetrics, current: RunMetrics) -> BaselineComparison:
76
+ """Compare a later run's metrics to a baseline; flag every metric that worsened.
77
+
78
+ Higher means more leakage for every metric, so an increase is a regression.
79
+ Confirmed findings and the Retrieval-Pivot Rate are compared directly; the
80
+ per-model Retrieval-Pivot Rate, the per-probe finding counts, the per-surface
81
+ erasure residue, and the per-pair side-channel effect sizes are compared key
82
+ by key. A Retrieval-Pivot Rate that was not measured, or a key absent on one
83
+ side, counts as ``0.0``.
84
+
85
+ The per-model RPR and per-probe counts matter because an aggregate can hide a
86
+ regression: swapping one embedding model can spike that model's RPR (the
87
+ canonical Phase-5 check, the engineering spec section 14) while the overall
88
+ rate is unchanged, and one probe can start leaking as another stops with no
89
+ change to the total confirmed count.
90
+
91
+ Per-surface erasure *caveats* are also reported, but as informational deltas
92
+ that never count as a regression: a caveat is a coverage limitation of the
93
+ backend (Class 11 hiding place #8), not an isolation failure like residue.
94
+ """
95
+ deltas: list[MetricDelta] = [
96
+ MetricDelta(
97
+ name="confirmed_findings",
98
+ baseline=float(baseline.confirmed_findings),
99
+ current=float(current.confirmed_findings),
100
+ ),
101
+ MetricDelta(
102
+ name="retrieval_pivot_rate",
103
+ baseline=baseline.retrieval_pivot_rate or 0.0,
104
+ current=current.retrieval_pivot_rate or 0.0,
105
+ ),
106
+ # Class 3/6/10 headline rates: higher means more cross-tenant leakage, so
107
+ # an increase regresses exactly like the retrieval-pivot rate above.
108
+ MetricDelta(
109
+ name="poisoning_bleed_delta",
110
+ baseline=baseline.poisoning_bleed_delta or 0.0,
111
+ current=current.poisoning_bleed_delta or 0.0,
112
+ ),
113
+ MetricDelta(
114
+ name="inversion_reconstruction_rate",
115
+ baseline=baseline.inversion_reconstruction_rate or 0.0,
116
+ current=current.inversion_reconstruction_rate or 0.0,
117
+ ),
118
+ MetricDelta(
119
+ name="extraction_efficiency",
120
+ baseline=baseline.extraction_efficiency or 0.0,
121
+ current=current.extraction_efficiency or 0.0,
122
+ ),
123
+ ]
124
+ deltas.extend(
125
+ _dict_deltas(
126
+ "retrieval_pivot_rate_by_model",
127
+ baseline.retrieval_pivot_rate_by_model,
128
+ current.retrieval_pivot_rate_by_model,
129
+ )
130
+ )
131
+ deltas.extend(
132
+ _dict_deltas(
133
+ "per_probe_findings",
134
+ {key: float(value) for key, value in baseline.per_probe_findings.items()},
135
+ {key: float(value) for key, value in current.per_probe_findings.items()},
136
+ )
137
+ )
138
+ deltas.extend(
139
+ _dict_deltas("erasure_residue", baseline.erasure_residue, current.erasure_residue)
140
+ )
141
+ deltas.extend(
142
+ _dict_deltas(
143
+ "side_channel_effect_sizes",
144
+ baseline.side_channel_effect_sizes,
145
+ current.side_channel_effect_sizes,
146
+ )
147
+ )
148
+ deltas.extend(
149
+ _dict_deltas(
150
+ "erasure_caveats",
151
+ baseline.erasure_caveats,
152
+ current.erasure_caveats,
153
+ informational=True,
154
+ )
155
+ )
156
+ return BaselineComparison(deltas=tuple(deltas))
157
+
158
+
159
+ _SEVERITY_RANK: dict[Severity, int] = {
160
+ Severity.INFO: 0,
161
+ Severity.LOW: 1,
162
+ Severity.MEDIUM: 2,
163
+ Severity.HIGH: 3,
164
+ Severity.CRITICAL: 4,
165
+ }
166
+
167
+
168
+ @dataclass(frozen=True)
169
+ class FindingChange:
170
+ """A finding present in both runs (same ``finding_id``) that changed in place.
171
+
172
+ ``previous`` is its earlier-run copy and ``current`` its later-run copy. A
173
+ change is a difference in status (e.g. unverified -> confirmed) or severity
174
+ (e.g. low -> critical) for what is, by id, the same leak.
175
+ """
176
+
177
+ previous: Finding
178
+ current: Finding
179
+
180
+ @property
181
+ def status_changed(self) -> bool:
182
+ """True when the finding's status differs between the runs."""
183
+ return self.previous.status is not self.current.status
184
+
185
+ @property
186
+ def severity_changed(self) -> bool:
187
+ """True when the finding's severity differs between the runs."""
188
+ return self.previous.severity is not self.current.severity
189
+
190
+ @property
191
+ def severity_escalated(self) -> bool:
192
+ """True when the severity rose (a worse posture), not fell."""
193
+ return _SEVERITY_RANK[self.current.severity] > _SEVERITY_RANK[self.previous.severity]
194
+
195
+
196
+ @dataclass(frozen=True)
197
+ class FindingDiff:
198
+ """Finding-level delta between two runs, keyed by stable ``finding_id``.
199
+
200
+ ``appeared`` are findings in the later run but not the earlier one (a new
201
+ leak); ``resolved`` are in the earlier run but gone from the later one (a
202
+ fixed leak); ``persisting`` are in both (the later copy). Each list follows
203
+ its source run's own deterministic finding order.
204
+ """
205
+
206
+ appeared: tuple[Finding, ...]
207
+ resolved: tuple[Finding, ...]
208
+ persisting: tuple[Finding, ...]
209
+ # Findings confirmed in the later run whose id was not already confirmed in
210
+ # the earlier run -- the regression signal. Broader than "confirmed and
211
+ # newly appeared by id": it also catches a finding that persisted by id but
212
+ # was upgraded unverified -> confirmed between the runs. An unverified
213
+ # candidate never appears here (the false-positive control, the engineering
214
+ # spec section 6.4), so it cannot flip a diff to a regression on its own.
215
+ newly_confirmed: tuple[Finding, ...]
216
+ # Findings present in both runs whose status or severity changed in place
217
+ # (matched by id), for visibility. The subset that gates a regression is
218
+ # ``severity_escalations``.
219
+ changed: tuple[FindingChange, ...]
220
+
221
+ @property
222
+ def severity_escalations(self) -> tuple[FindingChange, ...]:
223
+ """Persisting findings, confirmed in both runs, whose severity rose.
224
+
225
+ A leak that was already a confirmed cross-tenant finding becoming more
226
+ severe (e.g. low -> critical) is a worse isolation posture between the
227
+ runs, so it gates as a regression. Requiring confirmed-in-both keeps this
228
+ disjoint from ``newly_confirmed`` (which covers unverified -> confirmed)
229
+ and clear of the false-positive control.
230
+ """
231
+ return tuple(
232
+ change
233
+ for change in self.changed
234
+ if change.severity_escalated
235
+ and change.previous.status is FindingStatus.CONFIRMED
236
+ and change.current.status is FindingStatus.CONFIRMED
237
+ )
238
+
239
+
240
+ @dataclass(frozen=True)
241
+ class RunDiff:
242
+ """A full comparison of two runs: metric deltas plus the finding-level diff."""
243
+
244
+ metrics: BaselineComparison
245
+ findings: FindingDiff
246
+
247
+ @property
248
+ def regressed(self) -> bool:
249
+ """True when the later run is worse than the earlier one.
250
+
251
+ A regression is any worsened metric (the baseline rule), a newly
252
+ confirmed finding, *or* an in-place severity escalation of a finding
253
+ confirmed in both runs. The finding checks catch what the metric counts
254
+ miss: a confirmed leak that is new -- by a fresh id, or by an in-place
255
+ unverified -> confirmed upgrade -- can leave ``confirmed_findings``
256
+ unchanged when another confirmed leak resolves in the same run; and a
257
+ known leak growing more severe (low -> critical) is a worse posture the
258
+ counts do not see at all.
259
+ """
260
+ return (
261
+ self.metrics.regressed
262
+ or bool(self.findings.newly_confirmed)
263
+ or bool(self.findings.severity_escalations)
264
+ )
265
+
266
+
267
+ def diff_findings(earlier: Sequence[Finding], later: Sequence[Finding]) -> FindingDiff:
268
+ """Diff two finding sequences by ``finding_id`` into the diff buckets.
269
+
270
+ ``appeared``/``resolved``/``persisting`` partition by ``finding_id``;
271
+ ``newly_confirmed`` is every finding confirmed in ``later`` whose id was not
272
+ already confirmed in ``earlier`` (a fresh id, or an in-place upgrade);
273
+ ``changed`` is every persisting finding whose status or severity differs
274
+ between the runs. Each side is de-duplicated by ``finding_id`` (first
275
+ occurrence wins) so a repeated id never lists a finding twice. Runs are
276
+ de-duplicated upstream; this only guards a hand-built input.
277
+ """
278
+ earlier_ids = {finding.finding_id for finding in earlier}
279
+ later_ids = {finding.finding_id for finding in later}
280
+ earlier_confirmed_ids = {
281
+ finding.finding_id for finding in earlier if finding.status is FindingStatus.CONFIRMED
282
+ }
283
+ earlier_by_id: dict[str, Finding] = {}
284
+ for finding in earlier:
285
+ earlier_by_id.setdefault(finding.finding_id, finding)
286
+
287
+ def _select(findings: Sequence[Finding], keep: Callable[[str], bool]) -> tuple[Finding, ...]:
288
+ seen: set[str] = set()
289
+ chosen: list[Finding] = []
290
+ for finding in findings:
291
+ if finding.finding_id in seen or not keep(finding.finding_id):
292
+ continue
293
+ seen.add(finding.finding_id)
294
+ chosen.append(finding)
295
+ return tuple(chosen)
296
+
297
+ newly_confirmed = _select(
298
+ [finding for finding in later if finding.status is FindingStatus.CONFIRMED],
299
+ lambda fid: fid not in earlier_confirmed_ids,
300
+ )
301
+ changed: list[FindingChange] = []
302
+ changed_seen: set[str] = set()
303
+ for finding in later:
304
+ fid = finding.finding_id
305
+ if fid in changed_seen or fid not in earlier_by_id:
306
+ continue
307
+ changed_seen.add(fid)
308
+ previous = earlier_by_id[fid]
309
+ if previous.status is not finding.status or previous.severity is not finding.severity:
310
+ changed.append(FindingChange(previous=previous, current=finding))
311
+ return FindingDiff(
312
+ appeared=_select(later, lambda fid: fid not in earlier_ids),
313
+ resolved=_select(earlier, lambda fid: fid not in later_ids),
314
+ persisting=_select(later, lambda fid: fid in earlier_ids),
315
+ newly_confirmed=newly_confirmed,
316
+ changed=tuple(changed),
317
+ )
318
+
319
+
320
+ def diff_runs(earlier: RunResult, later: RunResult) -> RunDiff:
321
+ """Compare two runs: metric deltas (:func:`compare_metrics`) and a finding diff.
322
+
323
+ ``earlier`` is the reference (an older run or a pre-change baseline) and
324
+ ``later`` is the run under scrutiny, matching the argument order of
325
+ :func:`compare_metrics`.
326
+ """
327
+ return RunDiff(
328
+ metrics=compare_metrics(earlier.metrics, later.metrics),
329
+ findings=diff_findings(earlier.findings, later.findings),
330
+ )
@@ -0,0 +1 @@
1
+ """Sectum AI command-line interface."""