policystrata 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- policystrata-0.1.0/CHANGELOG.md +21 -0
- policystrata-0.1.0/CITATION.cff +10 -0
- policystrata-0.1.0/CONTRIBUTING.md +31 -0
- policystrata-0.1.0/EVAL_CARD.md +98 -0
- policystrata-0.1.0/LICENSE +21 -0
- policystrata-0.1.0/MANIFEST.in +15 -0
- policystrata-0.1.0/PKG-INFO +324 -0
- policystrata-0.1.0/README.md +286 -0
- policystrata-0.1.0/SECURITY.md +24 -0
- policystrata-0.1.0/action.yml +70 -0
- policystrata-0.1.0/docker-compose.yml +14 -0
- policystrata-0.1.0/docs/benchmark-reference.md +187 -0
- policystrata-0.1.0/docs/distribution-roadmap.md +50 -0
- policystrata-0.1.0/docs/evidence.md +164 -0
- policystrata-0.1.0/docs/external-suite-protocol.md +75 -0
- policystrata-0.1.0/docs/failure-taxonomy.md +43 -0
- policystrata-0.1.0/docs/github-action.md +64 -0
- policystrata-0.1.0/docs/incident-reconstruction-template.md +59 -0
- policystrata-0.1.0/docs/methodology.md +222 -0
- policystrata-0.1.0/docs/open-source-commercial-strategy.md +88 -0
- policystrata-0.1.0/docs/scanner.md +158 -0
- policystrata-0.1.0/docs/trace-interop.md +44 -0
- policystrata-0.1.0/examples/integrations/dbt_semantic/finance_saas/semantic_models.yml +57 -0
- policystrata-0.1.0/examples/postgres_dbt/policystrata.yaml +28 -0
- policystrata-0.1.0/examples/postgres_dbt/policystrata_clean.yaml +10 -0
- policystrata-0.1.0/examples/postgres_dbt/policystrata_real_db_clean.yaml +32 -0
- policystrata-0.1.0/examples/postgres_dbt/semantic_models.yml +65 -0
- policystrata-0.1.0/examples/postgres_dbt/traces.jsonl +2 -0
- policystrata-0.1.0/examples/postgres_dbt/traces_clean.jsonl +1 -0
- policystrata-0.1.0/examples/postgres_dbt/traces_real_db_clean.jsonl +1 -0
- policystrata-0.1.0/pyproject.toml +91 -0
- policystrata-0.1.0/scripts/postgres-rls-evidence.py +51 -0
- policystrata-0.1.0/scripts/reproduce-evidence.sh +28 -0
- policystrata-0.1.0/setup.cfg +4 -0
- policystrata-0.1.0/src/policystrata/__init__.py +3 -0
- policystrata-0.1.0/src/policystrata/__main__.py +4 -0
- policystrata-0.1.0/src/policystrata/artifact_report.py +150 -0
- policystrata-0.1.0/src/policystrata/baselines.py +76 -0
- policystrata-0.1.0/src/policystrata/cli.py +229 -0
- policystrata-0.1.0/src/policystrata/compiler.py +179 -0
- policystrata-0.1.0/src/policystrata/database.py +84 -0
- policystrata-0.1.0/src/policystrata/demo.py +66 -0
- policystrata-0.1.0/src/policystrata/detection.py +115 -0
- policystrata-0.1.0/src/policystrata/domain.py +214 -0
- policystrata-0.1.0/src/policystrata/domains/finance_saas/policy.yaml +122 -0
- policystrata-0.1.0/src/policystrata/domains/finance_saas/schema.sql +83 -0
- policystrata-0.1.0/src/policystrata/domains/finance_saas/seed.sql +31 -0
- policystrata-0.1.0/src/policystrata/domains/finance_saas/surfaces.yaml +80 -0
- policystrata-0.1.0/src/policystrata/domains/finance_saas/tasks/seeded.yaml +130 -0
- policystrata-0.1.0/src/policystrata/domains/support_saas/policy.yaml +125 -0
- policystrata-0.1.0/src/policystrata/domains/support_saas/schema.sql +122 -0
- policystrata-0.1.0/src/policystrata/domains/support_saas/seed.sql +46 -0
- policystrata-0.1.0/src/policystrata/domains/support_saas/surfaces.yaml +80 -0
- policystrata-0.1.0/src/policystrata/domains/support_saas/tasks/seeded.yaml +142 -0
- policystrata-0.1.0/src/policystrata/evidence.py +149 -0
- policystrata-0.1.0/src/policystrata/exports.py +101 -0
- policystrata-0.1.0/src/policystrata/generator.py +222 -0
- policystrata-0.1.0/src/policystrata/integrations/__init__.py +1 -0
- policystrata-0.1.0/src/policystrata/integrations/dbt_semantic.py +169 -0
- policystrata-0.1.0/src/policystrata/minimize.py +125 -0
- policystrata-0.1.0/src/policystrata/models.py +227 -0
- policystrata-0.1.0/src/policystrata/mutations.py +117 -0
- policystrata-0.1.0/src/policystrata/policy.py +85 -0
- policystrata-0.1.0/src/policystrata/py.typed +1 -0
- policystrata-0.1.0/src/policystrata/runner.py +403 -0
- policystrata-0.1.0/src/policystrata/scan_models.py +203 -0
- policystrata-0.1.0/src/policystrata/scanner.py +1274 -0
- policystrata-0.1.0/src/policystrata/summary.py +81 -0
- policystrata-0.1.0/src/policystrata/trace_import.py +284 -0
- policystrata-0.1.0/src/policystrata.egg-info/PKG-INFO +324 -0
- policystrata-0.1.0/src/policystrata.egg-info/SOURCES.txt +87 -0
- policystrata-0.1.0/src/policystrata.egg-info/dependency_links.txt +1 -0
- policystrata-0.1.0/src/policystrata.egg-info/entry_points.txt +2 -0
- policystrata-0.1.0/src/policystrata.egg-info/requires.txt +10 -0
- policystrata-0.1.0/src/policystrata.egg-info/top_level.txt +1 -0
- policystrata-0.1.0/tests/test_cli.py +261 -0
- policystrata-0.1.0/tests/test_compiler.py +72 -0
- policystrata-0.1.0/tests/test_database_assets.py +25 -0
- policystrata-0.1.0/tests/test_detection.py +69 -0
- policystrata-0.1.0/tests/test_domain.py +142 -0
- policystrata-0.1.0/tests/test_evidence.py +55 -0
- policystrata-0.1.0/tests/test_integrations.py +15 -0
- policystrata-0.1.0/tests/test_intentional_asymmetry.py +45 -0
- policystrata-0.1.0/tests/test_minimize.py +45 -0
- policystrata-0.1.0/tests/test_policy.py +83 -0
- policystrata-0.1.0/tests/test_postgres_integration.py +98 -0
- policystrata-0.1.0/tests/test_runner.py +239 -0
- policystrata-0.1.0/tests/test_scanner.py +295 -0
- policystrata-0.1.0/uv.lock +740 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [Unreleased]
|
|
4
|
+
|
|
5
|
+
- No changes yet.
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-06-25
|
|
8
|
+
|
|
9
|
+
- Initial public research artifact.
|
|
10
|
+
- Deterministic `support_saas` and `finance_saas` benchmark domains.
|
|
11
|
+
- Seeded and generated mutation suites for cross-layer policy drift.
|
|
12
|
+
- Traces, summaries, baselines, evidence tables, minimized witnesses, scanner fixtures, and Docker
|
|
13
|
+
PostgreSQL evidence support.
|
|
14
|
+
- Public release files, CI, GitHub Action wrapper, and source distribution manifest coverage.
|
|
15
|
+
- Eval-card governance, scanner regression-case labels, database state assertions, and
|
|
16
|
+
Inspect/BenchFlow export adapters.
|
|
17
|
+
- Suite provenance, evidence-level, and detector-freeze metadata for future blinded or externally
|
|
18
|
+
authored suites.
|
|
19
|
+
- `defense_in_depth_stack` baseline and scanner `evidence_exercised` reporting for clean scans.
|
|
20
|
+
- Artifact usability report command for reviewer-facing run, witness, latency, and fixture metrics.
|
|
21
|
+
- arXiv-ready paper source and same-day submission notes under `paper/arxiv`.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use PolicyStrata in research, please cite it."
|
|
3
|
+
title: "PolicyStrata"
|
|
4
|
+
type: software
|
|
5
|
+
version: "0.1.0"
|
|
6
|
+
date-released: "2026-06-25"
|
|
7
|
+
license: MIT
|
|
8
|
+
repository-code: "https://github.com/raintree-technology/policystrata"
|
|
9
|
+
authors:
|
|
10
|
+
- name: "Raintree Technology"
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
PolicyStrata is a deterministic research artifact. Keep changes reproducible, scoped, and explicit
|
|
4
|
+
about what the evidence does and does not prove.
|
|
5
|
+
|
|
6
|
+
```bash
|
|
7
|
+
uv sync --extra dev
|
|
8
|
+
uv run pytest
|
|
9
|
+
uv run ruff check .
|
|
10
|
+
uv run mypy src
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Optional PostgreSQL tests:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
docker compose up -d postgres
|
|
17
|
+
POLICYSTRATA_RUN_DB_TESTS=1 uv run pytest tests/test_postgres_integration.py
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
- Keep the policy oracle independent from the SQL compiler path.
|
|
21
|
+
- Treat constrained generation as a reliability layer, not an authorization boundary.
|
|
22
|
+
- Preserve JSON/YAML trace stability. Add fields compatibly.
|
|
23
|
+
- Keep the built-in `support_saas` domain deterministic and seed-driven.
|
|
24
|
+
- Use adapters for external frameworks. Do not couple core execution to them.
|
|
25
|
+
- Do not require an LLM API key, hosted service, or host `psql` for deterministic tests.
|
|
26
|
+
|
|
27
|
+
When evidence behavior changes, regenerate the tables:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
scripts/reproduce-evidence.sh
|
|
31
|
+
```
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# PolicyStrata Eval Card
|
|
2
|
+
|
|
3
|
+
PolicyStrata is a deterministic policy-regression environment for governed LLM data-agent stacks.
|
|
4
|
+
It is not an authorization boundary, a generic LLM leaderboard, or a claim of production incident
|
|
5
|
+
recall.
|
|
6
|
+
|
|
7
|
+
## Scope
|
|
8
|
+
|
|
9
|
+
PolicyStrata evaluates whether authorization, semantic, database-containment, and release
|
|
10
|
+
obligations survive translation across policy-bearing surfaces:
|
|
11
|
+
|
|
12
|
+
- model-visible manifests;
|
|
13
|
+
- grammars and semantic IR;
|
|
14
|
+
- validators;
|
|
15
|
+
- SQL compilers;
|
|
16
|
+
- database controls;
|
|
17
|
+
- output-release checks.
|
|
18
|
+
|
|
19
|
+
The core artifact uses deterministic semantic plans and traces. It does not require an LLM API key.
|
|
20
|
+
|
|
21
|
+
## Current Suites
|
|
22
|
+
|
|
23
|
+
| Suite | Provenance | Boundary |
|
|
24
|
+
| --- | --- | --- |
|
|
25
|
+
| `support_saas` seeded | public hand-authored fixture | regression coverage, not recall |
|
|
26
|
+
| `support_saas` generated | deterministic operator-generated cases | generated from the same public taxonomy |
|
|
27
|
+
| `support_saas` generated_alt_seed | secondary deterministic generated suite | reproducibility evidence, not blinded held-out evidence |
|
|
28
|
+
| `finance_saas` seeded | second synthetic built-in domain | reduces single-domain risk, still synthetic |
|
|
29
|
+
|
|
30
|
+
The current canonical evidence reports 620/620 killed non-equivalent mutants across these suites.
|
|
31
|
+
That means coverage over the implemented deterministic operators and fixtures. It does not mean
|
|
32
|
+
PolicyStrata detects all real-world policy drift.
|
|
33
|
+
|
|
34
|
+
Run metadata records each suite's evidence level, provenance, and detector-freeze status. Future
|
|
35
|
+
externally authored, detector-frozen, or incident-reconstruction suites should be reported
|
|
36
|
+
separately from this 620-mutant public deterministic score.
|
|
37
|
+
|
|
38
|
+
## Scanner Evidence Levels
|
|
39
|
+
|
|
40
|
+
Scanner findings carry evidence levels:
|
|
41
|
+
|
|
42
|
+
- `deterministic_fixture`: built-in or explicitly configured fixtures.
|
|
43
|
+
- `property_generated`: generated SQL/IR mutants over configured inputs.
|
|
44
|
+
- `imported_trace`: imported production or representative traces.
|
|
45
|
+
- `real_db`: PostgreSQL fixture or RLS observations through Python adapters.
|
|
46
|
+
- `blinded_suite`: externally authored or detector-frozen suites when provided.
|
|
47
|
+
|
|
48
|
+
These levels describe what was exercised. They are not confidence intervals for unknown production
|
|
49
|
+
faults.
|
|
50
|
+
|
|
51
|
+
## Regression Gate Semantics
|
|
52
|
+
|
|
53
|
+
PolicyStrata scanner traces and state assertions may be labeled:
|
|
54
|
+
|
|
55
|
+
- `fail_to_pass`: known drift evidence should now be caught or contained.
|
|
56
|
+
- `pass_to_pass`: legitimate behavior should stay clean.
|
|
57
|
+
- `contain_to_contain`: a risky request should remain contained by a later layer.
|
|
58
|
+
- `deny_to_deny`: a forbidden request should remain denied.
|
|
59
|
+
- `allow_to_allow`: an authorized request should remain usable.
|
|
60
|
+
- `unclassified`: legacy or unlabeled imported evidence.
|
|
61
|
+
|
|
62
|
+
Release gates should not rely only on failing examples. A useful gate includes both
|
|
63
|
+
`fail_to_pass` evidence and `pass_to_pass`/`allow_to_allow` maintenance evidence so fixes do not
|
|
64
|
+
create over-restriction regressions.
|
|
65
|
+
|
|
66
|
+
## Real Database Boundary
|
|
67
|
+
|
|
68
|
+
Deterministic benchmark runs simulate database effects. The scanner can optionally prepare a
|
|
69
|
+
Docker/PostgreSQL fixture, execute read-only imported SQL beside canonical compiler SQL, run RLS
|
|
70
|
+
checks, and evaluate state assertions over result rows. Host `psql` is not required.
|
|
71
|
+
|
|
72
|
+
The current real-DB fixture is a smoke test for containment and SQL behavior. It is not an
|
|
73
|
+
end-to-end dbt/warehouse execution harness and should not be represented as one.
|
|
74
|
+
|
|
75
|
+
## Benchmark Integrity
|
|
76
|
+
|
|
77
|
+
Current limitations:
|
|
78
|
+
|
|
79
|
+
- no blinded externally authored held-out suite is shipped;
|
|
80
|
+
- no verified real incident reconstructions are shipped;
|
|
81
|
+
- synthetic domains may miss organization-specific policy nuance;
|
|
82
|
+
- generated mutants share the public operator taxonomy;
|
|
83
|
+
- baseline comparators are simple observability controls, not independent production test suites;
|
|
84
|
+
- bounded witness reduction is not full delta debugging or source-code root-cause localization.
|
|
85
|
+
|
|
86
|
+
External validation should follow `docs/external-suite-protocol.md` and, for real incidents,
|
|
87
|
+
`docs/incident-reconstruction-template.md`.
|
|
88
|
+
|
|
89
|
+
## Model-In-The-Loop Use
|
|
90
|
+
|
|
91
|
+
Model-mediated experiments are a reachability layer on top of deterministic conformance. They
|
|
92
|
+
should report reliability separately from capability:
|
|
93
|
+
|
|
94
|
+
- `reachability@k`: at least one of `k` attempts reached a witness.
|
|
95
|
+
- `policy_pass^k`: all `k` independent attempts respected the policy.
|
|
96
|
+
- `release_safe^k`: all `k` independent attempts avoided unsafe release.
|
|
97
|
+
|
|
98
|
+
Do not mix these with deterministic mutant kill rate.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Raintree Technology
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include action.yml
|
|
3
|
+
include EVAL_CARD.md
|
|
4
|
+
include CHANGELOG.md
|
|
5
|
+
include CONTRIBUTING.md
|
|
6
|
+
include SECURITY.md
|
|
7
|
+
include CITATION.cff
|
|
8
|
+
include LICENSE
|
|
9
|
+
include docker-compose.yml
|
|
10
|
+
include uv.lock
|
|
11
|
+
recursive-include docs *.md
|
|
12
|
+
recursive-include examples *.jsonl *.yaml *.yml
|
|
13
|
+
recursive-include scripts *.py *.sh
|
|
14
|
+
recursive-include tests *.py
|
|
15
|
+
recursive-include src/policystrata/domains *.sql *.yaml
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: policystrata
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cross-layer policy regression testing for LLM data-agent stacks
|
|
5
|
+
Author-email: Raintree Technology <support@raintree.technology>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/raintree-technology/policystrata
|
|
8
|
+
Project-URL: Repository, https://github.com/raintree-technology/policystrata
|
|
9
|
+
Project-URL: Documentation, https://github.com/raintree-technology/policystrata#readme
|
|
10
|
+
Project-URL: Paper, https://raintree.technology/papers
|
|
11
|
+
Project-URL: Changelog, https://github.com/raintree-technology/policystrata/blob/main/CHANGELOG.md
|
|
12
|
+
Project-URL: Issues, https://github.com/raintree-technology/policystrata/issues
|
|
13
|
+
Keywords: llm,text-to-sql,data-agents,policy-testing
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Environment :: Console
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Testing
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: pydantic>=2.0
|
|
29
|
+
Requires-Dist: pyyaml>=6.0
|
|
30
|
+
Requires-Dist: psycopg[binary]>=3.2
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
33
|
+
Requires-Dist: hypothesis>=6.0; extra == "dev"
|
|
34
|
+
Requires-Dist: ruff>=0.8.0; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
36
|
+
Requires-Dist: types-pyyaml>=6.0.0; extra == "dev"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# PolicyStrata
|
|
40
|
+
|
|
41
|
+
PolicyStrata is a deterministic regression-testing framework for cross-layer policy drift in LLM
|
|
42
|
+
data-agent stacks.
|
|
43
|
+
|
|
44
|
+
It generates principals, requests, semantic plans, database states, lowered queries, and release
|
|
45
|
+
decisions; compares each layer against a canonical reference policy; and minimizes failures into
|
|
46
|
+
small reproducible witnesses.
|
|
47
|
+
|
|
48
|
+
Use it when you are building text-to-SQL agents, BI copilots, internal analytics agents, warehouse
|
|
49
|
+
chat systems, or governed enterprise LLM tools and need to know whether prompts, manifests,
|
|
50
|
+
semantic plans, validators, SQL compilers, database controls, and output filters still agree about
|
|
51
|
+
policy.
|
|
52
|
+
|
|
53
|
+
PolicyStrata is not an authorization boundary, and it is not another generic text-to-SQL benchmark.
|
|
54
|
+
It is a reproducible research artifact and regression gate for finding reachable disagreements
|
|
55
|
+
between layers.
|
|
56
|
+
|
|
57
|
+
## Quick Start
|
|
58
|
+
|
|
59
|
+
From PyPI:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uvx policystrata demo
|
|
63
|
+
pipx run policystrata demo
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
From a source checkout:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
uv sync --extra dev
|
|
70
|
+
uv run policystrata demo
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The demo runs the built-in `support_saas` fixture, writes traces and minimized witnesses to
|
|
74
|
+
`runs/demo`, and prints the drift classes it found. Use `--out` to choose another output directory:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uv run policystrata demo --out runs/demo
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
No LLM API key is required for deterministic tests, benchmark runs, or the built-in demo.
|
|
81
|
+
|
|
82
|
+
## Install
|
|
83
|
+
|
|
84
|
+
PolicyStrata is a CLI-first Python package. The public package provides the `policystrata` console
|
|
85
|
+
script and importable Python modules.
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
python -m pip install policystrata
|
|
89
|
+
policystrata demo
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
For one-off CLI use without managing an environment:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
uvx policystrata demo
|
|
96
|
+
pipx run policystrata demo
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Repository examples under `examples/`, Docker Compose fixtures, and evidence scripts are available
|
|
100
|
+
from a GitHub checkout or source distribution. The wheel installs the runtime package and built-in
|
|
101
|
+
domain fixtures used by `policystrata demo`, `run`, `init-domain`, and `scan`.
|
|
102
|
+
|
|
103
|
+
## Use As A Template
|
|
104
|
+
|
|
105
|
+
Click **Use this template** on GitHub, then start with the deterministic fixtures:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
uv sync --extra dev
|
|
109
|
+
uv run policystrata run --domain support_saas --suite seeded --out runs/example
|
|
110
|
+
uv run policystrata summarize runs/example
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
To copy a built-in domain fixture into your tree:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
uv run policystrata init-domain support_saas --out examples/my-policystrata-domain
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Keep custom integrations as adapters. The policy oracle should stay independent from SQL compiler
|
|
120
|
+
behavior, external eval frameworks, and model-provider behavior.
|
|
121
|
+
|
|
122
|
+
## What It Tests
|
|
123
|
+
|
|
124
|
+
The core failure class is cross-layer policy drift:
|
|
125
|
+
|
|
126
|
+
```text
|
|
127
|
+
Canonical policy:
|
|
128
|
+
Analysts may view tenant-scoped aggregate ticket counts, but not customer-level PII.
|
|
129
|
+
|
|
130
|
+
Model-visible manifest or grammar:
|
|
131
|
+
Accidentally exposes customer_email as a dimension.
|
|
132
|
+
|
|
133
|
+
SQL compiler:
|
|
134
|
+
Accidentally drops the tenant predicate while lowering an authorized aggregate.
|
|
135
|
+
|
|
136
|
+
Output layer:
|
|
137
|
+
Releases the result because the final answer looks like a summary.
|
|
138
|
+
|
|
139
|
+
PolicyStrata result:
|
|
140
|
+
A minimized witness localizes the violated layer and failed obligation.
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
PolicyStrata does not assume every layer should behave identically. Each surface has a declared
|
|
144
|
+
responsibility:
|
|
145
|
+
|
|
146
|
+
- `manifest`: expose model-visible capabilities without stale or forbidden options.
|
|
147
|
+
- `grammar`: parse the declared intent space and preserve untrusted intent for validation.
|
|
148
|
+
- `validator`: authorize semantic queries and bind principal, tenant, time, and budget obligations.
|
|
149
|
+
- `compiler`: lower authorized semantic IR into SQL while preserving metric, tenant, time, and row
|
|
150
|
+
obligations.
|
|
151
|
+
- `database`: contain row access with RLS and other database-side controls.
|
|
152
|
+
- `release`: withhold contained or unauthorized results.
|
|
153
|
+
|
|
154
|
+
See [docs/failure-taxonomy.md](docs/failure-taxonomy.md) for how witness classes map to concrete
|
|
155
|
+
policy-drift failures.
|
|
156
|
+
|
|
157
|
+
## Run Benchmarks
|
|
158
|
+
|
|
159
|
+
PolicyStrata ships with deterministic `support_saas` and `finance_saas` benchmarks, generated
|
|
160
|
+
mutation suites, minimized witnesses, JSONL traces, baseline comparisons, and evidence tables.
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
uv run policystrata run --domain support_saas --suite seeded --out runs/example
|
|
164
|
+
uv run policystrata run \
|
|
165
|
+
--domain support_saas \
|
|
166
|
+
--suite generated \
|
|
167
|
+
--count 500 \
|
|
168
|
+
--seed 1729 \
|
|
169
|
+
--out runs/generated
|
|
170
|
+
uv run policystrata run --domain finance_saas --suite seeded --out runs/finance
|
|
171
|
+
uv run policystrata baselines runs/example
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
The default `run` command writes:
|
|
175
|
+
|
|
176
|
+
```text
|
|
177
|
+
runs/<id>/traces.jsonl
|
|
178
|
+
runs/<id>/summary.json
|
|
179
|
+
runs/<id>/metadata.json
|
|
180
|
+
runs/<id>/witnesses/*.json
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
`metadata.json` records the mutation operator set, suite provenance, evidence level, and
|
|
184
|
+
detector-freeze status. Static suite YAML can declare `suite_metadata` so externally authored,
|
|
185
|
+
detector-frozen, or incident-reconstruction cases stay separate from public/generated benchmark
|
|
186
|
+
scores.
|
|
187
|
+
|
|
188
|
+
Regenerate paper-style evidence tables with:
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
scripts/reproduce-evidence.sh
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Generate reviewer-facing artifact metrics for a run:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
uv run policystrata artifact-report runs/repro/seeded
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Current benchmark details are in [docs/evidence.md](docs/evidence.md), with methodology and claim
|
|
201
|
+
boundaries in [docs/methodology.md](docs/methodology.md) and [EVAL_CARD.md](EVAL_CARD.md).
|
|
202
|
+
|
|
203
|
+
## Run The Scanner
|
|
204
|
+
|
|
205
|
+
`policystrata scan` is the production-oriented path. It treats PolicyStrata as a scanner and
|
|
206
|
+
release gate, not as the authorization boundary.
|
|
207
|
+
|
|
208
|
+
Clean smoke test:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
uv run policystrata scan --config examples/postgres_dbt/policystrata_clean.yaml --out runs/scan-clean
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Intentional gate-failure fixture:
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
uv run policystrata scan --config examples/postgres_dbt/policystrata.yaml --out runs/scan
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
That fixture should exit `1` because it contains imported traces with known authorization,
|
|
221
|
+
unsafe-release, and tenant-scope findings.
|
|
222
|
+
|
|
223
|
+
Scanner outputs include:
|
|
224
|
+
|
|
225
|
+
```text
|
|
226
|
+
runs/scan-clean/scan.json
|
|
227
|
+
runs/scan-clean/findings.jsonl
|
|
228
|
+
runs/scan-clean/summary.json
|
|
229
|
+
runs/scan-clean/report.md
|
|
230
|
+
runs/scan-clean/witnesses/*.json
|
|
231
|
+
runs/scan-clean/scan.sarif # when sarif: true
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
For a scanner run that also executes imported SQL beside canonical compiler SQL against the
|
|
235
|
+
Docker/PostgreSQL fixture:
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
docker compose up -d postgres
|
|
239
|
+
uv run policystrata scan --config examples/postgres_dbt/policystrata_real_db_clean.yaml --out runs/scan-real-db-clean
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Postgres access goes through Python/`psycopg`; host `psql` is not required. See
|
|
243
|
+
[docs/scanner.md](docs/scanner.md) for scanner configuration, gate behavior, state assertions, and
|
|
244
|
+
real-database fixture details.
|
|
245
|
+
|
|
246
|
+
## GitHub Action
|
|
247
|
+
|
|
248
|
+
Use the first-party action to run `policystrata scan` as a pull-request or release gate:
|
|
249
|
+
|
|
250
|
+
```yaml
|
|
251
|
+
name: PolicyStrata
|
|
252
|
+
|
|
253
|
+
on:
|
|
254
|
+
pull_request:
|
|
255
|
+
push:
|
|
256
|
+
branches: [main]
|
|
257
|
+
|
|
258
|
+
jobs:
|
|
259
|
+
scan:
|
|
260
|
+
runs-on: ubuntu-latest
|
|
261
|
+
steps:
|
|
262
|
+
- uses: actions/checkout@v4
|
|
263
|
+
|
|
264
|
+
- uses: raintree-technology/policystrata@v0.1.0
|
|
265
|
+
with:
|
|
266
|
+
config: policystrata.yaml
|
|
267
|
+
out: runs/policystrata
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
See [docs/github-action.md](docs/github-action.md) for inputs, artifact upload, and database
|
|
271
|
+
fixture guidance.
|
|
272
|
+
|
|
273
|
+
## Integrations And Exports
|
|
274
|
+
|
|
275
|
+
PolicyStrata keeps core execution independent from external eval frameworks. Adapter exports are
|
|
276
|
+
available for downstream systems:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
uv run policystrata export runs/example --format inspect --out runs/example/inspect.jsonl
|
|
280
|
+
uv run policystrata export runs/example --format benchflow --out runs/example/benchflow.json
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
The repo also includes a small dbt Semantic Layer adapter and fixture:
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
uv run policystrata check-integration dbt-semantic \
|
|
287
|
+
--domain finance_saas \
|
|
288
|
+
--path examples/integrations/dbt_semantic/finance_saas/semantic_models.yml
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
See [docs/trace-interop.md](docs/trace-interop.md) for adapter field mappings.
|
|
292
|
+
|
|
293
|
+
## Reference Docs
|
|
294
|
+
|
|
295
|
+
- [docs/benchmark-reference.md](docs/benchmark-reference.md): domains, generated mutants,
|
|
296
|
+
baselines, and witness shape.
|
|
297
|
+
- [docs/scanner.md](docs/scanner.md): scanner inputs, gates, state assertions, and PostgreSQL
|
|
298
|
+
fixture use.
|
|
299
|
+
- [docs/github-action.md](docs/github-action.md): CI wrapper for `policystrata scan`.
|
|
300
|
+
- [docs/distribution-roadmap.md](docs/distribution-roadmap.md): CLI, GitHub Action, SDK, MCP, and
|
|
301
|
+
GitHub CLI extension sequence.
|
|
302
|
+
- [docs/evidence.md](docs/evidence.md): current evidence snapshot and reproduction commands.
|
|
303
|
+
- [docs/methodology.md](docs/methodology.md): claims, limitations, mutant definitions, and witness
|
|
304
|
+
minimization.
|
|
305
|
+
- [EVAL_CARD.md](EVAL_CARD.md): benchmark provenance, evidence levels, and eval boundaries.
|
|
306
|
+
- [docs/open-source-commercial-strategy.md](docs/open-source-commercial-strategy.md): packaging and
|
|
307
|
+
product boundary.
|
|
308
|
+
|
|
309
|
+
## Development
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
uv run pytest
|
|
313
|
+
uv run ruff check .
|
|
314
|
+
uv run mypy src
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
The built-in `support_saas` domain is deterministic and seed-driven. Preserve JSON/YAML trace
|
|
318
|
+
stability when extending artifacts; add fields compatibly.
|
|
319
|
+
|
|
320
|
+
## Status
|
|
321
|
+
|
|
322
|
+
PolicyStrata is an early research artifact. It is useful for reproducing the paper's core failure
|
|
323
|
+
model and for building regression gates around real stacks. It does not prove recall on unknown
|
|
324
|
+
production incidents, and it should not be represented as a production security scanner by itself.
|