policystrata 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. policystrata/__init__.py +3 -0
  2. policystrata/__main__.py +4 -0
  3. policystrata/artifact_report.py +150 -0
  4. policystrata/baselines.py +76 -0
  5. policystrata/cli.py +229 -0
  6. policystrata/compiler.py +179 -0
  7. policystrata/database.py +84 -0
  8. policystrata/demo.py +66 -0
  9. policystrata/detection.py +115 -0
  10. policystrata/domain.py +214 -0
  11. policystrata/domains/finance_saas/policy.yaml +122 -0
  12. policystrata/domains/finance_saas/schema.sql +83 -0
  13. policystrata/domains/finance_saas/seed.sql +31 -0
  14. policystrata/domains/finance_saas/surfaces.yaml +80 -0
  15. policystrata/domains/finance_saas/tasks/seeded.yaml +130 -0
  16. policystrata/domains/support_saas/policy.yaml +125 -0
  17. policystrata/domains/support_saas/schema.sql +122 -0
  18. policystrata/domains/support_saas/seed.sql +46 -0
  19. policystrata/domains/support_saas/surfaces.yaml +80 -0
  20. policystrata/domains/support_saas/tasks/seeded.yaml +142 -0
  21. policystrata/evidence.py +149 -0
  22. policystrata/exports.py +101 -0
  23. policystrata/generator.py +222 -0
  24. policystrata/integrations/__init__.py +1 -0
  25. policystrata/integrations/dbt_semantic.py +169 -0
  26. policystrata/minimize.py +125 -0
  27. policystrata/models.py +227 -0
  28. policystrata/mutations.py +117 -0
  29. policystrata/policy.py +85 -0
  30. policystrata/py.typed +1 -0
  31. policystrata/runner.py +403 -0
  32. policystrata/scan_models.py +203 -0
  33. policystrata/scanner.py +1274 -0
  34. policystrata/summary.py +81 -0
  35. policystrata/trace_import.py +284 -0
  36. policystrata-0.1.0.dist-info/METADATA +324 -0
  37. policystrata-0.1.0.dist-info/RECORD +41 -0
  38. policystrata-0.1.0.dist-info/WHEEL +5 -0
  39. policystrata-0.1.0.dist-info/entry_points.txt +2 -0
  40. policystrata-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. policystrata-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3 @@
1
+ """PolicyStrata research artifact."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ from policystrata.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Iterable
5
+ from pathlib import Path
6
+ from statistics import median
7
+ from typing import Any
8
+
9
+ try:
10
+ from importlib.resources.abc import Traversable
11
+ except ImportError: # Python 3.10 exposes Traversable from importlib.abc.
12
+ from importlib.abc import Traversable
13
+
14
+ from policystrata.domain import BUILTIN_DOMAINS, domain_root
15
+ from policystrata.evidence import load_run_metadata, markdown_table, run_artifact_path
16
+ from policystrata.models import Trace, WitnessClass
17
+ from policystrata.summary import load_traces
18
+
19
+
20
+ def build_artifact_report(run_dir: Path, domain_path: Path | None = None) -> dict[str, Any]:
21
+ traces = load_traces(run_dir)
22
+ metadata = load_run_metadata(run_dir)
23
+ domain = str(metadata.get("domain", "support_saas"))
24
+ witness_sizes = witness_byte_sizes(run_dir, traces)
25
+ artifact_stats = local_tree_stats(run_dir)
26
+ fixture_stats = domain_fixture_stats(domain, domain_path)
27
+
28
+ return {
29
+ "run_dir": str(run_dir),
30
+ "domain": domain,
31
+ "suite": metadata.get("suite"),
32
+ "evidence_level": metadata.get("evidence_level", "deterministic_fixture"),
33
+ "suite_provenance": metadata.get("suite_provenance", "hand_authored"),
34
+ "detector_frozen": bool(metadata.get("detector_frozen", False)),
35
+ "traces": len(traces),
36
+ "non_clean_traces": sum(1 for trace in traces if trace.witness_class != WitnessClass.CLEAN),
37
+ "minimized_witnesses": len(witness_sizes),
38
+ "median_witness_bytes": int(median(witness_sizes)) if witness_sizes else 0,
39
+ "run_artifact_files": artifact_stats["files"],
40
+ "run_artifact_bytes": artifact_stats["bytes"],
41
+ "domain_fixture_files": fixture_stats["files"],
42
+ "domain_fixture_bytes": fixture_stats["bytes"],
43
+ "domain_fixture_lines": fixture_stats["lines"],
44
+ "avg_latency_ms": round(average_latency(traces), 4),
45
+ "p95_latency_ms": round(percentile_latency(traces, 0.95), 4),
46
+ "estimated_cost": sum(int(trace.cost.get("estimated", 0)) for trace in traces),
47
+ "requires_llm_api_key": False,
48
+ }
49
+
50
+
51
+ def render_artifact_report(run_dir: Path, domain_path: Path | None = None) -> str:
52
+ report = build_artifact_report(run_dir, domain_path)
53
+ rows = [
54
+ ["Run", report["run_dir"]],
55
+ ["Domain", report["domain"]],
56
+ ["Suite", str(report["suite"])],
57
+ ["Evidence level", report["evidence_level"]],
58
+ ["Suite provenance", report["suite_provenance"]],
59
+ ["Detector frozen", "yes" if report["detector_frozen"] else "no"],
60
+ ["Traces", str(report["traces"])],
61
+ ["Non-clean traces", str(report["non_clean_traces"])],
62
+ ["Minimized witnesses", str(report["minimized_witnesses"])],
63
+ ["Median witness bytes", str(report["median_witness_bytes"])],
64
+ ["Average trace latency ms", str(report["avg_latency_ms"])],
65
+ ["P95 trace latency ms", str(report["p95_latency_ms"])],
66
+ ["Estimated policy cost", str(report["estimated_cost"])],
67
+ ["Run artifact files", str(report["run_artifact_files"])],
68
+ ["Run artifact bytes", str(report["run_artifact_bytes"])],
69
+ ["Domain fixture files", str(report["domain_fixture_files"])],
70
+ ["Domain fixture bytes", str(report["domain_fixture_bytes"])],
71
+ ["Domain fixture lines", str(report["domain_fixture_lines"])],
72
+ ["Requires LLM API key", "no"],
73
+ ]
74
+ return "# PolicyStrata Artifact Report\n\n" + markdown_table(["Metric", "Value"], rows) + "\n"
75
+
76
+
77
+ def witness_byte_sizes(run_dir: Path, traces: Iterable[Trace]) -> list[int]:
78
+ sizes: list[int] = []
79
+ for trace in traces:
80
+ if trace.witness_path:
81
+ sizes.append(len(run_artifact_path(run_dir, trace.witness_path).read_bytes()))
82
+ return sizes
83
+
84
+
85
+ def local_tree_stats(root: Path) -> dict[str, int]:
86
+ files = [path for path in root.rglob("*") if path.is_file()]
87
+ return {
88
+ "files": len(files),
89
+ "bytes": sum(path.stat().st_size for path in files),
90
+ "lines": sum(count_lines(path.read_text(encoding="utf-8")) for path in text_files(files)),
91
+ }
92
+
93
+
94
+ def domain_fixture_stats(domain: str, domain_path: Path | None) -> dict[str, int]:
95
+ if domain_path is not None:
96
+ return local_tree_stats(domain_path)
97
+ if domain in BUILTIN_DOMAINS:
98
+ return traversable_tree_stats(domain_root(domain))
99
+ return {"files": 0, "bytes": 0, "lines": 0}
100
+
101
+
102
+ def traversable_tree_stats(root: Traversable) -> dict[str, int]:
103
+ files = list(traversable_files(root))
104
+ return {
105
+ "files": len(files),
106
+ "bytes": sum(len(path.read_bytes()) for path in files),
107
+ "lines": sum(count_lines(path.read_text(encoding="utf-8")) for path in text_traversables(files)),
108
+ }
109
+
110
+
111
+ def traversable_files(root: Traversable) -> Iterable[Traversable]:
112
+ for child in root.iterdir():
113
+ if child.is_dir():
114
+ yield from traversable_files(child)
115
+ elif child.is_file():
116
+ yield child
117
+
118
+
119
+ def text_files(paths: Iterable[Path]) -> Iterable[Path]:
120
+ for path in paths:
121
+ if path.suffix in {".json", ".jsonl", ".md", ".sql", ".yaml", ".yml"}:
122
+ yield path
123
+
124
+
125
+ def text_traversables(paths: Iterable[Traversable]) -> Iterable[Traversable]:
126
+ for path in paths:
127
+ if Path(path.name).suffix in {".sql", ".yaml", ".yml"}:
128
+ yield path
129
+
130
+
131
+ def count_lines(text: str) -> int:
132
+ return sum(1 for line in text.splitlines() if line.strip())
133
+
134
+
135
+ def average_latency(traces: list[Trace]) -> float:
136
+ if not traces:
137
+ return 0.0
138
+ return sum(trace.latency_ms for trace in traces) / len(traces)
139
+
140
+
141
+ def percentile_latency(traces: list[Trace], percentile: float) -> float:
142
+ if not traces:
143
+ return 0.0
144
+ latencies = sorted(trace.latency_ms for trace in traces)
145
+ index = min(len(latencies) - 1, max(0, int(round(percentile * (len(latencies) - 1)))))
146
+ return latencies[index]
147
+
148
+
149
+ def artifact_report_json(run_dir: Path, domain_path: Path | None = None) -> str:
150
+ return json.dumps(build_artifact_report(run_dir, domain_path), indent=2, sort_keys=True) + "\n"
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from pathlib import Path
5
+
6
+ from policystrata.models import Trace, WitnessClass
7
+ from policystrata.summary import load_traces
8
+
9
+ BaselinePredicate = Callable[[Trace], bool]
10
+
11
+
12
+ def final_answer_only(trace: Trace) -> bool:
13
+ return trace.semantic_difference and trace.release_decision.allowed
14
+
15
+
16
+ def sql_snapshot(trace: Trace) -> bool:
17
+ if trace.localized_surface != "compiler":
18
+ return False
19
+ return trace.mutation not in {"cost_estimate_ignores_expansion"}
20
+
21
+
22
+ def validator_only(trace: Trace) -> bool:
23
+ return not trace.canonical_decision.allowed and trace.localized_surface != "validator"
24
+
25
+
26
+ def db_rls_only(trace: Trace) -> bool:
27
+ return trace.containment_layer == "database" or (
28
+ trace.localized_surface == "database" and bool(trace.db_result.get("blocked_by_database"))
29
+ )
30
+
31
+
32
+ def random_data_generation(trace: Trace) -> bool:
33
+ return trace.semantic_difference
34
+
35
+
36
+ def naive_surface_equality(trace: Trace) -> bool:
37
+ canonical = trace.canonical_decision.allowed
38
+ return any(decision.allowed != canonical for decision in trace.surface_decisions.values())
39
+
40
+
41
+ def defense_in_depth_stack(trace: Trace) -> bool:
42
+ return (
43
+ validator_only(trace)
44
+ or sql_snapshot(trace)
45
+ or db_rls_only(trace)
46
+ or final_answer_only(trace)
47
+ )
48
+
49
+
50
+ BASELINES: dict[str, BaselinePredicate] = {
51
+ "final_answer_only": final_answer_only,
52
+ "sql_snapshot": sql_snapshot,
53
+ "validator_only": validator_only,
54
+ "db_rls_only": db_rls_only,
55
+ "random_data_generation": random_data_generation,
56
+ "naive_surface_equality": naive_surface_equality,
57
+ "defense_in_depth_stack": defense_in_depth_stack,
58
+ }
59
+
60
+
61
+ def evaluate_baselines(traces: list[Trace]) -> dict[str, dict[str, int | float]]:
62
+ total_failures = sum(1 for trace in traces if trace.witness_class != WitnessClass.CLEAN)
63
+ results: dict[str, dict[str, int | float]] = {}
64
+ for name, predicate in BASELINES.items():
65
+ caught = sum(1 for trace in traces if trace.witness_class != WitnessClass.CLEAN and predicate(trace))
66
+ results[name] = {
67
+ "caught": caught,
68
+ "total_failures": total_failures,
69
+ "missed": total_failures - caught,
70
+ "catch_rate": caught / total_failures if total_failures else 0.0,
71
+ }
72
+ return results
73
+
74
+
75
+ def evaluate_baseline_run(run_dir: Path) -> dict[str, dict[str, int | float]]:
76
+ return evaluate_baselines(load_traces(run_dir))
policystrata/cli.py ADDED
@@ -0,0 +1,229 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from pydantic import ValidationError
8
+
9
+ from policystrata.artifact_report import artifact_report_json, render_artifact_report
10
+ from policystrata.baselines import evaluate_baseline_run
11
+ from policystrata.demo import run_demo
12
+ from policystrata.domain import BUILTIN_DOMAIN, BUILTIN_DOMAINS, copy_domain
13
+ from policystrata.evidence import parse_run_args, render_evidence_tables
14
+ from policystrata.exports import export_run
15
+ from policystrata.generator import MAX_GENERATED_COUNT
16
+ from policystrata.integrations.dbt_semantic import compare_dbt_semantic_model
17
+ from policystrata.minimize import minimize_witness_file
18
+ from policystrata.runner import run_suite
19
+ from policystrata.scan_models import GateOutcome
20
+ from policystrata.scanner import run_scan
21
+ from policystrata.summary import summarize_run
22
+
23
+
24
+ def generated_count_arg(value: str) -> int:
25
+ try:
26
+ count = int(value)
27
+ except ValueError as exc:
28
+ raise argparse.ArgumentTypeError("count must be an integer") from exc
29
+ if count < 1 or count > MAX_GENERATED_COUNT:
30
+ raise argparse.ArgumentTypeError(f"count must be between 1 and {MAX_GENERATED_COUNT}")
31
+ return count
32
+
33
+
34
+ def build_parser() -> argparse.ArgumentParser:
35
+ parser = argparse.ArgumentParser(
36
+ prog="policystrata",
37
+ description="Cross-layer policy regression testing.",
38
+ )
39
+ subparsers = parser.add_subparsers(dest="command", required=True)
40
+
41
+ init_parser = subparsers.add_parser(
42
+ "init-domain",
43
+ help="Copy a built-in domain fixture into the current tree.",
44
+ )
45
+ init_parser.add_argument("domain", choices=BUILTIN_DOMAINS)
46
+ init_parser.add_argument("--out", type=Path, default=Path("."))
47
+
48
+ demo_parser = subparsers.add_parser("demo", help="Run a 30-second built-in policy drift demo.")
49
+ demo_parser.add_argument("--out", type=Path, default=Path("runs/demo"))
50
+
51
+ run_parser = subparsers.add_parser("run", help="Run a deterministic benchmark suite.")
52
+ run_parser.add_argument("--domain", default=BUILTIN_DOMAIN)
53
+ run_parser.add_argument("--suite", default="seeded")
54
+ run_parser.add_argument("--out", type=Path, required=True)
55
+ run_parser.add_argument("--domain-path", type=Path, default=None)
56
+ run_parser.add_argument(
57
+ "--count",
58
+ type=generated_count_arg,
59
+ default=None,
60
+ help="Task count for generated suites.",
61
+ )
62
+ run_parser.add_argument(
63
+ "--seed",
64
+ type=int,
65
+ default=None,
66
+ help="Seed for generated suites.",
67
+ )
68
+
69
+ minimize_parser = subparsers.add_parser("minimize", help="Minimize a trace or witness JSON file.")
70
+ minimize_parser.add_argument("--witness", type=Path, required=True)
71
+
72
+ summarize_parser = subparsers.add_parser("summarize", help="Summarize a run directory.")
73
+ summarize_parser.add_argument("run_dir", type=Path)
74
+
75
+ baselines_parser = subparsers.add_parser("baselines", help="Evaluate baseline strategies for a run.")
76
+ baselines_parser.add_argument("run_dir", type=Path)
77
+
78
+ export_parser = subparsers.add_parser("export", help="Export a run through an external eval adapter.")
79
+ export_parser.add_argument("run_dir", type=Path)
80
+ export_parser.add_argument("--format", choices=["inspect", "benchflow"], required=True)
81
+ export_parser.add_argument("--out", type=Path, required=True)
82
+
83
+ evidence_parser = subparsers.add_parser("evidence", help="Render Markdown evidence tables.")
84
+ evidence_parser.add_argument("runs", nargs="+", help="Run directories, optionally named as suite=path.")
85
+ evidence_parser.add_argument("--out", type=Path, default=None)
86
+
87
+ artifact_parser = subparsers.add_parser(
88
+ "artifact-report",
89
+ help="Render reviewer-facing reproducibility and usability metrics for a run.",
90
+ )
91
+ artifact_parser.add_argument("run_dir", type=Path)
92
+ artifact_parser.add_argument("--domain-path", type=Path, default=None)
93
+ artifact_parser.add_argument("--format", choices=["markdown", "json"], default="markdown")
94
+ artifact_parser.add_argument("--out", type=Path, default=None)
95
+
96
+ integration_parser = subparsers.add_parser(
97
+ "check-integration",
98
+ help="Check a small external semantic-layer fixture against a PolicyStrata domain.",
99
+ )
100
+ integration_parser.add_argument("kind", choices=["dbt-semantic"])
101
+ integration_parser.add_argument("--domain", default=BUILTIN_DOMAIN, choices=BUILTIN_DOMAINS)
102
+ integration_parser.add_argument("--path", type=Path, required=True)
103
+ integration_parser.add_argument("--domain-path", type=Path, default=None)
104
+
105
+ scan_parser = subparsers.add_parser(
106
+ "scan",
107
+ help="Run a production policy-drift scan over configured adapters and traces.",
108
+ )
109
+ scan_parser.add_argument("--config", type=Path, default=Path("policystrata.yaml"))
110
+ scan_parser.add_argument("--out", type=Path, default=None)
111
+
112
+ return parser
113
+
114
+
115
+ def main(argv: list[str] | None = None) -> int:
116
+ parser = build_parser()
117
+ args = parser.parse_args(argv)
118
+
119
+ try:
120
+ return run_command(args)
121
+ except ValidationError as exc:
122
+ parser.error(format_validation_error(exc))
123
+ except (FileNotFoundError, ValueError) as exc:
124
+ parser.error(str(exc))
125
+ except TypeError as exc:
126
+ if not is_user_type_error(exc):
127
+ raise
128
+ parser.error(str(exc))
129
+ return 2
130
+
131
+
132
+ def run_command(args: argparse.Namespace) -> int:
133
+ if args.command == "init-domain":
134
+ target = copy_domain(args.domain, args.out)
135
+ print(target)
136
+ return 0
137
+
138
+ if args.command == "demo":
139
+ print(run_demo(args.out), end="")
140
+ return 0
141
+
142
+ if args.command == "run":
143
+ traces = run_suite(args.domain, args.suite, args.out, args.domain_path, args.count, args.seed)
144
+ print(json.dumps({"traces": len(traces), "out": str(args.out)}, sort_keys=True))
145
+ return 0
146
+
147
+ if args.command == "minimize":
148
+ print(json.dumps(minimize_witness_file(args.witness), indent=2, sort_keys=True))
149
+ return 0
150
+
151
+ if args.command == "summarize":
152
+ print(summarize_run(args.run_dir).model_dump_json(indent=2))
153
+ return 0
154
+
155
+ if args.command == "baselines":
156
+ print(json.dumps(evaluate_baseline_run(args.run_dir), indent=2, sort_keys=True))
157
+ return 0
158
+
159
+ if args.command == "export":
160
+ print(json.dumps(export_run(args.run_dir, args.format, args.out), sort_keys=True))
161
+ return 0
162
+
163
+ if args.command == "evidence":
164
+ markdown = render_evidence_tables(parse_run_args(args.runs))
165
+ if args.out is not None:
166
+ args.out.write_text(markdown, encoding="utf-8")
167
+ print(json.dumps({"out": str(args.out)}, sort_keys=True))
168
+ else:
169
+ print(markdown, end="")
170
+ return 0
171
+
172
+ if args.command == "artifact-report":
173
+ output = (
174
+ artifact_report_json(args.run_dir, args.domain_path)
175
+ if args.format == "json"
176
+ else render_artifact_report(args.run_dir, args.domain_path)
177
+ )
178
+ if args.out is not None:
179
+ args.out.write_text(output, encoding="utf-8")
180
+ print(json.dumps({"out": str(args.out)}, sort_keys=True))
181
+ else:
182
+ print(output, end="")
183
+ return 0
184
+
185
+ if args.command == "check-integration" and args.kind == "dbt-semantic":
186
+ print(
187
+ json.dumps(
188
+ compare_dbt_semantic_model(args.domain, args.path, args.domain_path),
189
+ indent=2,
190
+ sort_keys=True,
191
+ )
192
+ )
193
+ return 0
194
+
195
+ if args.command == "scan":
196
+ result = run_scan(args.config, args.out)
197
+ print(
198
+ json.dumps(
199
+ {
200
+ "gate": result.gate.outcome.value,
201
+ "findings": result.summary.total_findings,
202
+ "out": result.output_dir,
203
+ },
204
+ sort_keys=True,
205
+ )
206
+ )
207
+ return 1 if result.gate.outcome == GateOutcome.FAIL else 0
208
+
209
+ raise ValueError(f"unknown command: {args.command}")
210
+
211
+
212
+ def format_validation_error(exc: ValidationError) -> str:
213
+ errors = exc.errors()
214
+ if not errors:
215
+ return str(exc)
216
+ first = errors[0]
217
+ loc = ".".join(str(part) for part in first.get("loc", ())) or "input"
218
+ return f"{loc}: {first.get('msg', 'validation failed')}"
219
+
220
+
221
+ def is_user_type_error(exc: TypeError) -> bool:
222
+ message = str(exc)
223
+ return message.startswith(
224
+ (
225
+ "generated count must",
226
+ "matrix count must",
227
+ "suite name must",
228
+ )
229
+ )
@@ -0,0 +1,179 @@
1
+ from __future__ import annotations
2
+
3
+ from policystrata.models import CompileResult, Policy, Principal, SemanticQuery
4
+ from policystrata.policy import PolicyOracle
5
+
6
+ DIMENSION_SQL = {
7
+ "region": "accounts.region",
8
+ "plan": "subscriptions.plan",
9
+ "month": "date_trunc('month', invoices.invoice_date)",
10
+ "severity": "support_tickets.severity",
11
+ "customer_email": "accounts.customer_email",
12
+ "tenant_id": "accounts.tenant_id",
13
+ }
14
+
15
+ TENANT_SCOPE_MUTATIONS = {
16
+ "compiler_drops_tenant_predicate",
17
+ "compiler_uses_old_tenant_key",
18
+ "compiler_swaps_tenant_account_id",
19
+ }
20
+
21
+
22
+ def compile_query(
23
+ policy: Policy,
24
+ principal: Principal,
25
+ query: SemanticQuery,
26
+ mutation: str = "none",
27
+ domain: str = "support_saas",
28
+ ) -> CompileResult:
29
+ oracle = PolicyOracle(policy)
30
+ metric_name = oracle.resolve_metric(query.metric)
31
+ metric = policy.metrics.get(metric_name)
32
+ metric_expression = metric.expression if metric is not None else "count(*)"
33
+
34
+ metric_expression = metric_expression_for_mutation(metric_expression, mutation, domain)
35
+
36
+ dimensions = [dimension_sql(policy, dim) for dim in query.dimensions]
37
+ select_parts = [f"{metric_expression} as value"]
38
+ select_parts.extend(
39
+ f"{dim_sql} as {dim}" for dim, dim_sql in zip(query.dimensions, dimensions, strict=True)
40
+ )
41
+
42
+ joins = join_path_for_mutation(domain, mutation)
43
+
44
+ includes_tenant_predicate = mutation not in TENANT_SCOPE_MUTATIONS
45
+ where = tenant_predicates(domain, principal, mutation)
46
+
47
+ date_column = time_column(domain, metric.table if metric is not None else "")
48
+ where.extend(time_predicates(query.time_range, date_column, mutation))
49
+
50
+ where_sql = " where " + " and ".join(where) if where else ""
51
+ group_sql = " group by " + ", ".join(dimensions) if dimensions else ""
52
+ limit_sql = f" limit {query.limit}"
53
+ sql = f"select {', '.join(select_parts)} {' '.join(joins)}{where_sql}{group_sql}{limit_sql}"
54
+
55
+ return CompileResult(
56
+ sql=sql,
57
+ estimated_cost=estimated_cost_for_query(oracle, query, mutation),
58
+ includes_tenant_predicate=includes_tenant_predicate,
59
+ metric_expression=metric_expression,
60
+ join_grain=join_grain_for_mutation(mutation),
61
+ time_semantics=time_semantics_for_query(query, mutation),
62
+ )
63
+
64
+
65
+ def estimated_cost_for_query(oracle: PolicyOracle, query: SemanticQuery, mutation: str) -> int:
66
+ if mutation == "cost_estimate_ignores_expansion":
67
+ return 1
68
+ return oracle.estimate_cost(query)
69
+
70
+
71
+ def join_grain_for_mutation(mutation: str) -> str:
72
+ if mutation == "fanout_join_drift":
73
+ return "ticket_event"
74
+ if mutation == "compiler_inner_join_drops_rows":
75
+ return "inner_join_required"
76
+ return "account"
77
+
78
+
79
+ def time_semantics_for_query(query: SemanticQuery, mutation: str) -> str:
80
+ if query.time_range == "last_fiscal_month" and mutation != "fiscal_calendar_mismatch":
81
+ return "fiscal"
82
+ return "calendar"
83
+
84
+
85
+ def metric_expression_for_mutation(metric_expression: str, mutation: str, domain: str) -> str:
86
+ if mutation == "gross_net_metric_drift":
87
+ return drift_metric_expression(domain)
88
+ if mutation == "fanout_join_drift":
89
+ return f"({metric_expression}) * 2"
90
+ if mutation == "compiler_removes_distinct":
91
+ return metric_expression.replace("count(distinct ", "count(").replace("distinct ", "")
92
+ return metric_expression
93
+
94
+
95
+ def dimension_sql(policy: Policy, dimension: str) -> str:
96
+ configured = policy.dimensions.get(dimension)
97
+ if configured is not None:
98
+ return configured.column
99
+ return DIMENSION_SQL.get(dimension, dimension)
100
+
101
+
102
+ def drift_metric_expression(domain: str) -> str:
103
+ if domain == "finance_saas":
104
+ return "sum(transactions.gross_amount_cents)"
105
+ return "sum(invoices.gross_amount_cents)"
106
+
107
+
108
+ def join_path_for_mutation(domain: str, mutation: str) -> list[str]:
109
+ joins = join_path(domain)
110
+ if mutation == "compiler_inner_join_drops_rows":
111
+ return [join.replace("left join", "join") for join in joins]
112
+ return joins
113
+
114
+
115
+ def join_path(domain: str) -> list[str]:
116
+ if domain == "finance_saas":
117
+ return [
118
+ "from households",
119
+ "left join advisors on advisors.id = households.advisor_id",
120
+ "left join accounts on accounts.household_id = households.id",
121
+ "left join transactions on transactions.account_id = accounts.id",
122
+ "left join balances on balances.account_id = accounts.id",
123
+ ]
124
+ return [
125
+ "from accounts",
126
+ "left join subscriptions on subscriptions.account_id = accounts.id",
127
+ "left join invoices on invoices.subscription_id = subscriptions.id",
128
+ "left join support_tickets on support_tickets.account_id = accounts.id",
129
+ ]
130
+
131
+
132
+ def tenant_predicates(domain: str, principal: Principal, mutation: str) -> list[str]:
133
+ column = tenant_filter_column(domain, mutation)
134
+ if column is None:
135
+ return []
136
+ tenant_list = ", ".join(f"'{tenant}'" for tenant in principal.tenant_ids)
137
+ return [f"{column} in ({tenant_list})"]
138
+
139
+
140
+ def tenant_filter_column(domain: str, mutation: str) -> str | None:
141
+ if mutation == "compiler_drops_tenant_predicate":
142
+ return None
143
+ if mutation == "compiler_uses_old_tenant_key":
144
+ return legacy_tenant_column(domain)
145
+ if mutation == "compiler_swaps_tenant_account_id":
146
+ return "accounts.id"
147
+ return tenant_column(domain)
148
+
149
+
150
+ def time_predicates(time_range: str, date_column: str, mutation: str) -> list[str]:
151
+ if time_range == "last_month" or (
152
+ time_range == "last_fiscal_month" and mutation == "fiscal_calendar_mismatch"
153
+ ):
154
+ return [f"{date_column} >= date '2026-05-01'", f"{date_column} < date '2026-06-01'"]
155
+ if time_range == "last_fiscal_month":
156
+ return [f"{date_column} >= date '2026-04-27'", f"{date_column} < date '2026-05-25'"]
157
+ if time_range == "quarter_to_date":
158
+ return [f"{date_column} >= date '2026-04-01'", f"{date_column} < date '2026-06-24'"]
159
+ return []
160
+
161
+
162
+ def tenant_column(domain: str) -> str:
163
+ if domain == "finance_saas":
164
+ return "households.firm_id"
165
+ return "accounts.tenant_id"
166
+
167
+
168
+ def legacy_tenant_column(domain: str) -> str:
169
+ if domain == "finance_saas":
170
+ return "households.legacy_firm_id"
171
+ return "accounts.legacy_tenant_id"
172
+
173
+
174
+ def time_column(domain: str, table: str) -> str:
175
+ if domain == "finance_saas":
176
+ if table == "balances":
177
+ return "balances.balance_date"
178
+ return "transactions.transaction_date"
179
+ return "invoices.invoice_date"