agentic-data-contracts 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/CHANGELOG.md +36 -0
  2. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/PKG-INFO +58 -7
  3. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/README.md +57 -6
  4. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/docs/architecture.md +61 -19
  5. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/pyproject.toml +1 -1
  6. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/__init__.py +2 -0
  7. agentic_data_contracts-0.5.0/src/agentic_data_contracts/adapters/__init__.py +15 -0
  8. agentic_data_contracts-0.5.0/src/agentic_data_contracts/adapters/_normalizer.py +20 -0
  9. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/adapters/base.py +12 -0
  10. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/core/schema.py +39 -2
  11. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/tools/factory.py +44 -5
  12. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/tools/middleware.py +8 -2
  13. agentic_data_contracts-0.5.0/src/agentic_data_contracts/validation/__init__.py +36 -0
  14. agentic_data_contracts-0.5.0/src/agentic_data_contracts/validation/checkers.py +272 -0
  15. agentic_data_contracts-0.5.0/src/agentic_data_contracts/validation/validator.py +268 -0
  16. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/fixtures/valid_contract.yml +4 -1
  17. agentic_data_contracts-0.5.0/tests/test_core/test_schema.py +210 -0
  18. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_factory.py +137 -0
  19. agentic_data_contracts-0.5.0/tests/test_validation/test_checkers.py +220 -0
  20. agentic_data_contracts-0.5.0/tests/test_validation/test_result_checks.py +171 -0
  21. agentic_data_contracts-0.5.0/tests/test_validation/test_sql_normalizer.py +204 -0
  22. agentic_data_contracts-0.5.0/tests/test_validation/test_validator.py +321 -0
  23. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/uv.lock +1 -1
  24. agentic_data_contracts-0.3.0/src/agentic_data_contracts/validation/__init__.py +0 -3
  25. agentic_data_contracts-0.3.0/src/agentic_data_contracts/validation/checkers.py +0 -157
  26. agentic_data_contracts-0.3.0/src/agentic_data_contracts/validation/validator.py +0 -150
  27. agentic_data_contracts-0.3.0/tests/test_core/test_schema.py +0 -68
  28. agentic_data_contracts-0.3.0/tests/test_validation/__init__.py +0 -0
  29. agentic_data_contracts-0.3.0/tests/test_validation/test_checkers.py +0 -154
  30. agentic_data_contracts-0.3.0/tests/test_validation/test_validator.py +0 -154
  31. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/.github/dependabot.yml +0 -0
  32. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/.github/workflows/ci.yml +0 -0
  33. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/.gitignore +0 -0
  34. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/.pre-commit-config.yaml +0 -0
  35. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/.python-version +0 -0
  36. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/CLAUDE.md +0 -0
  37. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/LICENSE +0 -0
  38. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/examples/revenue_agent/agent.py +0 -0
  39. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/examples/revenue_agent/contract.yml +0 -0
  40. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/examples/revenue_agent/semantic.yml +0 -0
  41. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/examples/revenue_agent/setup_db.py +0 -0
  42. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/adapters/duckdb.py +0 -0
  43. {agentic_data_contracts-0.3.0/src/agentic_data_contracts/adapters → agentic_data_contracts-0.5.0/src/agentic_data_contracts/bridge}/__init__.py +0 -0
  44. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/bridge/compiler.py +0 -0
  45. {agentic_data_contracts-0.3.0/src/agentic_data_contracts/bridge → agentic_data_contracts-0.5.0/src/agentic_data_contracts/core}/__init__.py +0 -0
  46. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/core/contract.py +0 -0
  47. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/core/prompt.py +0 -0
  48. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/core/session.py +0 -0
  49. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/py.typed +0 -0
  50. {agentic_data_contracts-0.3.0/src/agentic_data_contracts/core → agentic_data_contracts-0.5.0/src/agentic_data_contracts/semantic}/__init__.py +0 -0
  51. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/semantic/base.py +0 -0
  52. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/semantic/cube.py +0 -0
  53. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/semantic/dbt.py +0 -0
  54. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/semantic/yaml_source.py +0 -0
  55. {agentic_data_contracts-0.3.0/src/agentic_data_contracts/semantic → agentic_data_contracts-0.5.0/src/agentic_data_contracts/tools}/__init__.py +0 -0
  56. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/tools/sdk.py +0 -0
  57. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/src/agentic_data_contracts/validation/explain.py +0 -0
  58. {agentic_data_contracts-0.3.0/src/agentic_data_contracts/tools → agentic_data_contracts-0.5.0/tests}/__init__.py +0 -0
  59. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/conftest.py +0 -0
  60. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/fixtures/minimal_contract.yml +0 -0
  61. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/fixtures/sample_cube_schema.yml +0 -0
  62. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/fixtures/sample_dbt_manifest.json +0 -0
  63. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/fixtures/semantic_source.yml +0 -0
  64. {agentic_data_contracts-0.3.0/tests → agentic_data_contracts-0.5.0/tests/test_adapters}/__init__.py +0 -0
  65. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_adapters/test_duckdb.py +0 -0
  66. {agentic_data_contracts-0.3.0/tests/test_adapters → agentic_data_contracts-0.5.0/tests/test_bridge}/__init__.py +0 -0
  67. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_bridge/test_compiler.py +0 -0
  68. {agentic_data_contracts-0.3.0/tests/test_bridge → agentic_data_contracts-0.5.0/tests/test_core}/__init__.py +0 -0
  69. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_contract.py +0 -0
  70. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_load_semantic_source.py +0 -0
  71. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_prompt_renderers.py +0 -0
  72. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_scalability.py +0 -0
  73. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_sdk_config.py +0 -0
  74. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_session.py +0 -0
  75. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_system_prompt_metrics.py +0 -0
  76. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_core/test_wildcard_tables.py +0 -0
  77. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_public_api.py +0 -0
  78. {agentic_data_contracts-0.3.0/tests/test_core → agentic_data_contracts-0.5.0/tests/test_semantic}/__init__.py +0 -0
  79. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_semantic/test_cube.py +0 -0
  80. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_semantic/test_dbt.py +0 -0
  81. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_semantic/test_relationships.py +0 -0
  82. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_semantic/test_search.py +0 -0
  83. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_semantic/test_yaml_source.py +0 -0
  84. {agentic_data_contracts-0.3.0/tests/test_semantic → agentic_data_contracts-0.5.0/tests/test_tools}/__init__.py +0 -0
  85. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_auto_load.py +0 -0
  86. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_middleware.py +0 -0
  87. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_pagination.py +0 -0
  88. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_sdk.py +0 -0
  89. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_semantic_tools.py +0 -0
  90. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_tools/test_wildcard_tools.py +0 -0
  91. {agentic_data_contracts-0.3.0/tests/test_tools → agentic_data_contracts-0.5.0/tests/test_validation}/__init__.py +0 -0
  92. {agentic_data_contracts-0.3.0 → agentic_data_contracts-0.5.0}/tests/test_validation/test_explain.py +0 -0
@@ -2,6 +2,42 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.5.0] - 2026-04-04
6
+
7
+ ### Added
8
+
9
+ - **`SqlNormalizer` protocol**: Optional pre-processing hook for adapters serving non-standard SQL dialects (e.g., Denodo VQL, Teradata). Adapters implement `normalize_sql(sql) -> str` to rewrite proprietary syntax into a form sqlglot can parse, while the original SQL is preserved for `execute()` and `explain()`.
10
+ - **Auto-detection in factory and middleware**: When an adapter implements both `DatabaseAdapter` and `SqlNormalizer`, the factory and middleware automatically wire normalization into the `Validator` — no API changes needed.
11
+ - **Normalization in `validate_results()`**: Table-scoped result checks now also benefit from SQL normalization, ensuring scoped checks fire correctly for non-standard dialects.
12
+ - **Adapter package exports**: `adapters/__init__.py` now re-exports `Column`, `DatabaseAdapter`, `QueryResult`, `SqlNormalizer`, and `TableSchema`.
13
+ - **Root export**: `SqlNormalizer` is available via `from agentic_data_contracts import SqlNormalizer`.
14
+
15
+ ## [0.4.0] - 2026-03-31
16
+
17
+ ### Added
18
+
19
+ - **Unified rule engine**: Rules now support `query_check` (pre-execution) and `result_check` (post-execution) blocks, replacing the old `filter_column` shorthand. All rules live in one `rules` list; the engine determines execution phase automatically.
20
+ - **Table scoping**: Every rule can be scoped to a specific table (`table: "schema.table"`) or apply globally (omitted or `"*"`). Pre-execution and post-execution rules both support scoping.
21
+ - **5 built-in query checks**: `required_filter`, `no_select_star`, `blocked_columns`, `require_limit`, `max_joins` — all declarative in YAML, no Python needed.
22
+ - **6 built-in result checks**: `min_value`/`max_value` (numeric column bounds), `not_null`, `min_rows`/`max_rows` — validated against actual query output post-execution.
23
+ - **Advisory rules**: Rules with neither `query_check` nor `result_check` appear in the system prompt as guidance but don't enforce anything.
24
+ - **Session cost enforcement**: `run_query` now records estimated cost from EXPLAIN and enforces cumulative `cost_limit_usd` across the session.
25
+ - **`validate_results()` on Validator**: New method for post-execution result validation, used transparently inside `run_query`.
26
+ - **`validate_query` result check notes**: Output now lists pending result checks that will run at execution time.
27
+ - **New checker classes**: `BlockedColumnsChecker`, `RequireLimitChecker`, `MaxJoinsChecker`, `ResultCheckRunner` — all exported from `validation` module.
28
+
29
+ ### Changed
30
+
31
+ - **Checker protocol**: All checkers now use `check_ast(ast)` instead of `check_sql(sql)`. SQL is parsed once by the Validator and the AST is passed to all checkers.
32
+ - **`extract_tables()` utility**: Extracted from `TableAllowlistChecker` into a standalone function for shared use by the Validator's table scoping logic.
33
+ - **`ValidationResult`**: Gains `estimated_cost_usd: float | None` field for session cost passthrough from EXPLAIN.
34
+ - **Three-phase validation**: Validator now runs query checks (Phase 1) → EXPLAIN (Phase 2) → result checks (Phase 3), up from the previous two-phase pipeline.
35
+
36
+ ### Removed
37
+
38
+ - **`SemanticRule.filter_column`**: Replaced by `query_check: { required_filter: <column> }`. No backward compatibility — the old field is removed entirely.
39
+ - **Heuristic filter detection**: The regex-based `_extract_filter_column()` method that guessed filter columns from rule descriptions is gone. Filters are now explicit in `query_check`.
40
+
5
41
  ## [0.3.0] - 2026-03-30
6
42
 
7
43
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentic-data-contracts
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: YAML-first data contract governance for AI agents
5
5
  Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
6
6
  Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
@@ -125,10 +125,13 @@ semantic:
125
125
  - name: tenant_isolation
126
126
  description: "All queries must filter by tenant_id"
127
127
  enforcement: block
128
- filter_column: tenant_id
128
+ query_check:
129
+ required_filter: tenant_id
129
130
  - name: no_select_star
130
131
  description: "Must specify explicit columns"
131
132
  enforcement: block
133
+ query_check:
134
+ no_select_star: true
132
135
 
133
136
  resources:
134
137
  cost_limit_usd: 5.00
@@ -230,11 +233,59 @@ Rules are enforced at three levels:
230
233
  - **`warn`** — query proceeds but a warning is included in the response
231
234
  - **`log`** — violation is recorded but not surfaced to the agent
232
235
 
233
- Built-in checkers enforce:
234
- - **Table allowlist** — only tables listed in `allowed_tables` may be queried
235
- - **Operation blocklist** — `forbidden_operations` (DELETE, DROP, etc.) are rejected
236
- - **Required filters** — rules with `filter_column` require a matching WHERE clause
237
- - **No SELECT \*** — queries must name explicit columns
236
+ Each rule carries a `query_check` (pre-execution) or `result_check` (post-execution) block. Rules with neither are advisory — they appear in the system prompt but don't enforce anything. Every rule can be scoped to a specific table or applied globally.
237
+
238
+ **Built-in query checks** (pre-execution, validated against SQL AST):
239
+
240
+ | Check | Description |
241
+ |-------|-------------|
242
+ | `required_filter` | Require a column in WHERE clause (e.g., `tenant_id`) |
243
+ | `no_select_star` | Forbid `SELECT *` — require explicit columns |
244
+ | `blocked_columns` | Forbid specific columns in SELECT (e.g., PII) |
245
+ | `require_limit` | Require a LIMIT clause |
246
+ | `max_joins` | Cap the number of JOINs |
247
+
248
+ **Built-in result checks** (post-execution, validated against query output):
249
+
250
+ | Check | Description |
251
+ |-------|-------------|
252
+ | `min_value` / `max_value` | Numeric bounds on a column's values |
253
+ | `not_null` | Column must not contain nulls |
254
+ | `min_rows` / `max_rows` | Row count bounds on the result set |
255
+
256
+ Example with table scoping and both check types:
257
+
258
+ ```yaml
259
+ rules:
260
+ - name: tenant_isolation
261
+ description: "Orders must filter by tenant_id"
262
+ enforcement: block
263
+ table: "analytics.orders" # only applies to this table
264
+ query_check:
265
+ required_filter: tenant_id
266
+
267
+ - name: hide_pii
268
+ description: "Do not select PII columns from customers"
269
+ enforcement: block
270
+ table: "analytics.customers"
271
+ query_check:
272
+ blocked_columns: [ssn, email, phone]
273
+
274
+ - name: wau_sanity
275
+ description: "WAU should not exceed world population"
276
+ enforcement: warn
277
+ table: "analytics.user_metrics"
278
+ result_check:
279
+ column: wau
280
+ max_value: 8_000_000_000
281
+
282
+ - name: no_negative_revenue
283
+ description: "Revenue must not be negative"
284
+ enforcement: block
285
+ result_check:
286
+ column: revenue
287
+ min_value: 0
288
+ ```
238
289
 
239
290
  ## Semantic Sources
240
291
 
@@ -72,10 +72,13 @@ semantic:
72
72
  - name: tenant_isolation
73
73
  description: "All queries must filter by tenant_id"
74
74
  enforcement: block
75
- filter_column: tenant_id
75
+ query_check:
76
+ required_filter: tenant_id
76
77
  - name: no_select_star
77
78
  description: "Must specify explicit columns"
78
79
  enforcement: block
80
+ query_check:
81
+ no_select_star: true
79
82
 
80
83
  resources:
81
84
  cost_limit_usd: 5.00
@@ -177,11 +180,59 @@ Rules are enforced at three levels:
177
180
  - **`warn`** — query proceeds but a warning is included in the response
178
181
  - **`log`** — violation is recorded but not surfaced to the agent
179
182
 
180
- Built-in checkers enforce:
181
- - **Table allowlist** — only tables listed in `allowed_tables` may be queried
182
- - **Operation blocklist** — `forbidden_operations` (DELETE, DROP, etc.) are rejected
183
- - **Required filters** — rules with `filter_column` require a matching WHERE clause
184
- - **No SELECT \*** — queries must name explicit columns
183
+ Each rule carries a `query_check` (pre-execution) or `result_check` (post-execution) block. Rules with neither are advisory — they appear in the system prompt but don't enforce anything. Every rule can be scoped to a specific table or applied globally.
184
+
185
+ **Built-in query checks** (pre-execution, validated against SQL AST):
186
+
187
+ | Check | Description |
188
+ |-------|-------------|
189
+ | `required_filter` | Require a column in WHERE clause (e.g., `tenant_id`) |
190
+ | `no_select_star` | Forbid `SELECT *` — require explicit columns |
191
+ | `blocked_columns` | Forbid specific columns in SELECT (e.g., PII) |
192
+ | `require_limit` | Require a LIMIT clause |
193
+ | `max_joins` | Cap the number of JOINs |
194
+
195
+ **Built-in result checks** (post-execution, validated against query output):
196
+
197
+ | Check | Description |
198
+ |-------|-------------|
199
+ | `min_value` / `max_value` | Numeric bounds on a column's values |
200
+ | `not_null` | Column must not contain nulls |
201
+ | `min_rows` / `max_rows` | Row count bounds on the result set |
202
+
203
+ Example with table scoping and both check types:
204
+
205
+ ```yaml
206
+ rules:
207
+ - name: tenant_isolation
208
+ description: "Orders must filter by tenant_id"
209
+ enforcement: block
210
+ table: "analytics.orders" # only applies to this table
211
+ query_check:
212
+ required_filter: tenant_id
213
+
214
+ - name: hide_pii
215
+ description: "Do not select PII columns from customers"
216
+ enforcement: block
217
+ table: "analytics.customers"
218
+ query_check:
219
+ blocked_columns: [ssn, email, phone]
220
+
221
+ - name: wau_sanity
222
+ description: "WAU should not exceed world population"
223
+ enforcement: warn
224
+ table: "analytics.user_metrics"
225
+ result_check:
226
+ column: wau
227
+ max_value: 8_000_000_000
228
+
229
+ - name: no_negative_revenue
230
+ description: "Revenue must not be negative"
231
+ enforcement: block
232
+ result_check:
233
+ column: revenue
234
+ min_value: 0
235
+ ```
185
236
 
186
237
  ## Semantic Sources
187
238
 
@@ -1,7 +1,7 @@
1
1
  # Agentic Data Contracts — Architecture
2
2
 
3
- **Date:** 2026-03-28
4
- **Status:** Implemented (v0.2.2)
3
+ **Date:** 2026-04-04
4
+ **Status:** Implemented (v0.5.0)
5
5
  **Author:** Qing Ye + Claude
6
6
 
7
7
  ## Problem Statement
@@ -106,19 +106,24 @@ semantic:
106
106
  engagement: [active_customers, churn_rate]
107
107
 
108
108
  # Governance rules (per-rule enforcement)
109
+ # Each rule has a query_check (pre-execution) or result_check (post-execution)
110
+ # Rules with neither are advisory (shown in prompt only)
109
111
  rules:
110
112
  - name: tenant_isolation
111
113
  description: "All queries must include a WHERE tenant_id = filter"
112
114
  enforcement: block # block | warn | log
113
- filter_column: tenant_id # explicit column for required filter
115
+ query_check:
116
+ required_filter: tenant_id
114
117
 
115
118
  - name: use_approved_metrics
116
119
  description: "Revenue calculations must use the semantic layer definition"
117
- enforcement: warn
120
+ enforcement: warn # advisory — no check block
118
121
 
119
122
  - name: no_select_star
120
123
  description: "Queries must specify explicit columns, no SELECT *"
121
124
  enforcement: block
125
+ query_check:
126
+ no_select_star: true
122
127
 
123
128
  # Resource governance
124
129
  resources:
@@ -180,31 +185,39 @@ When `ai-agent-contracts` IS installed, enforcement is delegated to the formal f
180
185
 
181
186
  ## Validation Layer
182
187
 
183
- Two-layer validation architecture. Dependencies: `sqlglot`.
188
+ Three-phase validation architecture. Dependencies: `sqlglot`.
184
189
 
185
- ### Layer 1: Static Validation (always available)
190
+ ### Phase 1: Query Checks (pre-execution, always available)
186
191
 
187
192
  ```python
188
193
  class Checker(Protocol):
189
- def check(self, parsed_sql: Expression, contract: DataContract) -> CheckResult: ...
194
+ def check_ast(self, ast: Expression, *args) -> CheckResult: ...
190
195
  ```
191
196
 
192
- **Built-in checkers:**
197
+ SQL is parsed once into a sqlglot AST. The Validator passes the AST to all applicable checkers, respecting table scoping.
198
+
199
+ **Structural checkers** (from top-level config):
193
200
 
194
201
  | Checker | What it validates |
195
202
  |---|---|
196
203
  | `TableAllowlistChecker` | All referenced tables are in `allowed_tables` |
197
204
  | `OperationBlocklistChecker` | No forbidden SQL operations (DELETE, DROP, etc.) |
198
- | `RequiredFilterChecker` | Required WHERE clauses present (e.g., `tenant_id`) |
199
- | `NoSelectStarChecker` | No `SELECT *` statements |
205
+
206
+ **Rule-based query checkers** (from `query_check` blocks):
207
+
208
+ | Check | Checker | What it validates |
209
+ |---|---|---|
210
+ | `required_filter` | `RequiredFilterChecker` | Required WHERE clauses present |
211
+ | `no_select_star` | `NoSelectStarChecker` | No `SELECT *` statements |
212
+ | `blocked_columns` | `BlockedColumnsChecker` | Forbidden columns not in SELECT |
213
+ | `require_limit` | `RequireLimitChecker` | LIMIT clause present |
214
+ | `max_joins` | `MaxJoinsChecker` | JOIN count within limit |
200
215
 
201
216
  `CheckResult` contains: `passed: bool`, `severity: block | warn | log`, `message: str`.
202
217
 
203
218
  The validator runs all applicable checkers and aggregates results — any `block` result stops execution, `warn` results are surfaced to the agent, `log` results are recorded silently.
204
219
 
205
- Rules that cannot be statically checked (e.g., "use semantic layer definition for revenue") become:
206
- - An instruction injected into the agent's context via `to_system_prompt()`
207
- - A post-hoc `SuccessCriterion` for evaluation by LLM judge or human review
220
+ Rules that cannot be statically checked (e.g., "use semantic layer definition for revenue") become advisory rules — they appear in the system prompt but don't enforce anything. They can also be used as `SuccessCriterion` for post-hoc evaluation.
208
221
 
209
222
  ### Layer 2: EXPLAIN Dry-Run (optional, requires database adapter)
210
223
 
@@ -226,16 +239,35 @@ class ExplainAdapter(Protocol):
226
239
  | Postgres | `EXPLAIN` (no ANALYZE) | Row estimates |
227
240
  | DuckDB | `EXPLAIN` | Row estimates |
228
241
 
242
+ ### Phase 3: Result Checks (post-execution, from `result_check` blocks)
243
+
244
+ After a query executes successfully, `run_query` calls `validator.validate_results()` to check the actual output against `result_check` rules.
245
+
246
+ **Built-in result checks:**
247
+
248
+ | Check | What it validates |
249
+ |---|---|
250
+ | `min_value` / `max_value` | Numeric column values within bounds |
251
+ | `not_null` | Column contains no null values |
252
+ | `min_rows` / `max_rows` | Result set row count within bounds |
253
+
254
+ If a result check with `enforcement: block` fails, the query data is **discarded** — the agent sees only the violation message (with actual violating values for debugging). If `enforcement: warn`, the data is returned with warnings prepended.
255
+
229
256
  ### Validation Flow
230
257
 
231
258
  ```
232
259
  SQL string
233
- → sqlglot.parse(sql, dialect=contract.dialect)
234
- Layer 1: run all checkers
260
+ → sqlglot.parse(sql, dialect=contract.dialect) — parse once
261
+ Phase 1: structural checkers + rule-based query_check checkers (table-scoped)
235
262
  → any block? → return ValidationResult(blocked=True, reasons=[...])
236
- Layer 2 available? → explain adapter
263
+ Phase 2 available? → explain adapter
237
264
  → cost/rows exceed limits? → return ValidationResult(blocked=True, reasons=[...])
238
- return ValidationResult(blocked=False, warnings=[...])
265
+ record estimated cost in session
266
+ → execute query
267
+ → Phase 3: result_check rules against actual output (table-scoped)
268
+ → any block? → discard data, return violation
269
+ → any warn? → prepend warnings to response
270
+ → return results
239
271
  ```
240
272
 
241
273
  ## Tools Layer (Claude Agent SDK Integration)
@@ -361,8 +393,17 @@ class DatabaseAdapter(Protocol):
361
393
  def describe_table(self, schema: str, table: str) -> TableSchema: ...
362
394
  @property
363
395
  def dialect(self) -> str: ... # "bigquery", "snowflake", "postgres", "duckdb"
396
+
397
+ class SqlNormalizer(Protocol):
398
+ def normalize_sql(self, sql: str) -> str: ...
364
399
  ```
365
400
 
401
+ ### SQL Normalization for Non-Standard Dialects
402
+
403
+ Adapters for databases with proprietary SQL extensions (Denodo VQL, Teradata, ClickHouse) can implement `SqlNormalizer` alongside `DatabaseAdapter`. The `Validator` calls `normalize_sql()` before `sqlglot.parse_one()` to rewrite non-standard syntax into a form sqlglot can parse. The original SQL is preserved for `execute()` and `explain()`.
404
+
405
+ Detection is automatic: `create_tools()` and `contract_middleware()` check `isinstance(adapter, SqlNormalizer)` and wire it into the `Validator` if present. Standard-dialect adapters are unaffected.
406
+
366
407
  **`describe_table` maps to native commands:**
367
408
 
368
409
  | Database | Command | What you get |
@@ -443,7 +484,7 @@ agentic-data-contracts/
443
484
  │ ├── validation/
444
485
  │ │ ├── __init__.py
445
486
  │ │ ├── validator.py # Orchestrates checkers, aggregates results
446
- │ │ ├── checkers.py # Built-in checkers (4 checkers)
487
+ │ │ ├── checkers.py # Built-in checkers (7 query checkers + ResultCheckRunner)
447
488
  │ │ └── explain.py # EXPLAIN adapter orchestration
448
489
  │ ├── tools/
449
490
  │ │ ├── __init__.py
@@ -457,7 +498,8 @@ agentic-data-contracts/
457
498
  │ │ └── yaml_source.py # YamlSource
458
499
  │ ├── adapters/
459
500
  │ │ ├── __init__.py
460
- │ │ ├── base.py # DatabaseAdapter protocol
501
+ │ │ ├── _normalizer.py # SqlNormalizer protocol (avoids circular import)
502
+ │ │ ├── base.py # DatabaseAdapter protocol + SqlNormalizer re-export
461
503
  │ │ ├── bigquery.py # BigQuery adapter
462
504
  │ │ ├── snowflake.py # Snowflake adapter
463
505
  │ │ ├── postgres.py # Postgres adapter
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "agentic-data-contracts"
3
- version = "0.3.0"
3
+ version = "0.5.0"
4
4
  description = "YAML-first data contract governance for AI agents"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -1,5 +1,6 @@
1
1
  """Agentic Data Contracts — YAML-first data contract governance for AI agents."""
2
2
 
3
+ from agentic_data_contracts.adapters.base import SqlNormalizer
3
4
  from agentic_data_contracts.core.contract import DataContract
4
5
  from agentic_data_contracts.core.prompt import ClaudePromptRenderer, PromptRenderer
5
6
  from agentic_data_contracts.tools.factory import create_tools
@@ -10,6 +11,7 @@ __all__ = [
10
11
  "ClaudePromptRenderer",
11
12
  "DataContract",
12
13
  "PromptRenderer",
14
+ "SqlNormalizer",
13
15
  "contract_middleware",
14
16
  "create_sdk_mcp_server",
15
17
  "create_tools",
@@ -0,0 +1,15 @@
1
+ from agentic_data_contracts.adapters.base import (
2
+ Column,
3
+ DatabaseAdapter,
4
+ QueryResult,
5
+ SqlNormalizer,
6
+ TableSchema,
7
+ )
8
+
9
+ __all__ = [
10
+ "Column",
11
+ "DatabaseAdapter",
12
+ "QueryResult",
13
+ "SqlNormalizer",
14
+ "TableSchema",
15
+ ]
@@ -0,0 +1,20 @@
1
+ """SqlNormalizer protocol — standalone module to avoid circular imports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Protocol, runtime_checkable
6
+
7
+
8
+ @runtime_checkable
9
+ class SqlNormalizer(Protocol):
10
+ """Rewrite database-specific SQL into a form sqlglot can parse.
11
+
12
+ Called by the Validator before AST parsing. Adapters for non-standard
13
+ dialects implement this alongside DatabaseAdapter. Standard-dialect
14
+ adapters do not need to implement this — the Validator treats its
15
+ absence as a no-op.
16
+
17
+ The original (un-normalized) SQL is still passed to execute() and explain().
18
+ """
19
+
20
+ def normalize_sql(self, sql: str) -> str: ...
@@ -41,3 +41,15 @@ class DatabaseAdapter(Protocol):
41
41
 
42
42
  @property
43
43
  def dialect(self) -> str: ...
44
+
45
+
46
+ # Re-export SqlNormalizer so consumers can import from adapters.base
47
+ from agentic_data_contracts.adapters._normalizer import SqlNormalizer # noqa: E402
48
+
49
+ __all__ = [
50
+ "Column",
51
+ "DatabaseAdapter",
52
+ "QueryResult",
53
+ "SqlNormalizer",
54
+ "TableSchema",
55
+ ]
@@ -3,8 +3,9 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from enum import StrEnum
6
+ from typing import Self
6
7
 
7
- from pydantic import BaseModel, Field
8
+ from pydantic import BaseModel, Field, field_validator, model_validator
8
9
 
9
10
 
10
11
  class Enforcement(StrEnum):
@@ -25,11 +26,47 @@ class AllowedTable(BaseModel):
25
26
  model_config = {"populate_by_name": True}
26
27
 
27
28
 
29
+ class QueryCheck(BaseModel):
30
+ required_filter: str | None = None
31
+ no_select_star: bool | None = None
32
+ blocked_columns: list[str] | None = None
33
+ require_limit: bool | None = None
34
+ max_joins: int | None = None
35
+
36
+
37
+ class ResultCheck(BaseModel):
38
+ column: str | None = None
39
+ min_value: float | None = None
40
+ max_value: float | None = None
41
+ not_null: bool | None = None
42
+ min_rows: int | None = None
43
+ max_rows: int | None = None
44
+
45
+
28
46
  class SemanticRule(BaseModel):
47
+ model_config = {"extra": "forbid"}
48
+
29
49
  name: str
30
50
  description: str
31
51
  enforcement: Enforcement
32
- filter_column: str | None = None # explicit column for required filter rules
52
+ table: str | None = None
53
+ query_check: QueryCheck | None = None
54
+ result_check: ResultCheck | None = None
55
+
56
+ @field_validator("table")
57
+ @classmethod
58
+ def table_must_be_qualified(cls, v: str | None) -> str | None:
59
+ if v is not None and v != "*" and "." not in v:
60
+ raise ValueError(
61
+ f"table must be fully qualified as 'schema.table', got '{v}'"
62
+ )
63
+ return v
64
+
65
+ @model_validator(mode="after")
66
+ def at_most_one_check(self) -> Self:
67
+ if self.query_check is not None and self.result_check is not None:
68
+ raise ValueError("Rule must not have both query_check and result_check")
69
+ return self
33
70
 
34
71
 
35
72
  class SemanticConfig(BaseModel):
@@ -6,7 +6,7 @@ import json
6
6
  from dataclasses import dataclass
7
7
  from typing import Any
8
8
 
9
- from agentic_data_contracts.adapters.base import DatabaseAdapter
9
+ from agentic_data_contracts.adapters.base import DatabaseAdapter, SqlNormalizer
10
10
  from agentic_data_contracts.core.contract import DataContract
11
11
  from agentic_data_contracts.core.session import ContractSession, LimitExceededError
12
12
  from agentic_data_contracts.semantic.base import SemanticSource
@@ -46,7 +46,13 @@ def create_tools(
46
46
  contract.resolve_tables(adapter)
47
47
 
48
48
  dialect = adapter.dialect if adapter else None
49
- validator = Validator(contract, dialect=dialect, explain_adapter=adapter)
49
+ sql_normalizer = adapter if isinstance(adapter, SqlNormalizer) else None
50
+ validator = Validator(
51
+ contract,
52
+ dialect=dialect,
53
+ explain_adapter=adapter,
54
+ sql_normalizer=sql_normalizer,
55
+ )
50
56
 
51
57
  # ── Tool 1: list_schemas ──────────────────────────────────────────────────
52
58
  async def list_schemas(args: dict[str, Any]) -> dict[str, Any]:
@@ -220,9 +226,16 @@ def create_tools(
220
226
  if result.warnings:
221
227
  msg += "\nWarnings:\n" + "\n".join(f"- {w}" for w in result.warnings)
222
228
  else:
223
- msg = "VALID — Query passed all checks."
229
+ msg = "VALID — Query passed all pre-execution checks."
224
230
  if result.warnings:
225
231
  msg += "\nWarnings:\n" + "\n".join(f"- {w}" for w in result.warnings)
232
+ # Note pending result checks
233
+ pending = validator.pending_result_check_names()
234
+ if pending:
235
+ msg += (
236
+ f"\nNote: {len(pending)} result check(s) will run after execution: "
237
+ + ", ".join(pending)
238
+ )
226
239
  return _text_response(msg)
227
240
 
228
241
  # ── Tool 8: query_cost_estimate ───────────────────────────────────────────
@@ -253,7 +266,7 @@ def create_tools(
253
266
  except LimitExceededError as e:
254
267
  return _text_response(f"BLOCKED — Session limit exceeded: {e}")
255
268
 
256
- # Validate the query
269
+ # Phase 1 + 2: query checks + EXPLAIN
257
270
  vresult = validator.validate(sql)
258
271
  if vresult.blocked:
259
272
  session.record_retry()
@@ -262,6 +275,13 @@ def create_tools(
262
275
  )
263
276
  return _text_response(msg)
264
277
 
278
+ # Record estimated cost from EXPLAIN — charged before execution because
279
+ # the cost budget tracks database resource consumption, not successful
280
+ # operations. Even if result checks later block the output, the database
281
+ # work was performed.
282
+ if vresult.estimated_cost_usd is not None:
283
+ session.record_cost(vresult.estimated_cost_usd)
284
+
265
285
  if adapter is None:
266
286
  return _text_response(
267
287
  "No database adapter configured — cannot execute query."
@@ -273,13 +293,32 @@ def create_tools(
273
293
  session.record_retry()
274
294
  return _text_response(f"BLOCKED — Query execution failed: {e}")
275
295
 
296
+ # Phase 3: result checks
297
+ rresult = validator.validate_results(
298
+ sql, qresult.columns, [tuple(r) for r in qresult.rows]
299
+ )
300
+ if rresult.blocked:
301
+ session.record_retry()
302
+ msg = "BLOCKED — Result check violations:\n" + "\n".join(
303
+ f"- {r}" for r in rresult.reasons
304
+ )
305
+ return _text_response(msg)
306
+
276
307
  rows = [dict(zip(qresult.columns, row)) for row in qresult.rows]
277
308
  data = {
278
309
  "columns": qresult.columns,
279
310
  "rows": rows,
280
311
  "row_count": qresult.row_count,
281
312
  }
282
- return _text_response(json.dumps(data, default=str))
313
+ response_text = json.dumps(data, default=str)
314
+
315
+ # Prepend warnings from both query checks and result checks
316
+ all_warnings = vresult.warnings + rresult.warnings
317
+ if all_warnings:
318
+ warning_text = "WARNINGS:\n" + "\n".join(f"- {w}" for w in all_warnings)
319
+ response_text = warning_text + "\n\n" + response_text
320
+
321
+ return _text_response(response_text)
283
322
 
284
323
  # ── Tool 10: get_contract_info ────────────────────────────────────────────
285
324
  async def get_contract_info(args: dict[str, Any]) -> dict[str, Any]:
@@ -6,7 +6,7 @@ import functools
6
6
  from collections.abc import Callable
7
7
  from typing import Any
8
8
 
9
- from agentic_data_contracts.adapters.base import DatabaseAdapter
9
+ from agentic_data_contracts.adapters.base import DatabaseAdapter, SqlNormalizer
10
10
  from agentic_data_contracts.core.contract import DataContract
11
11
  from agentic_data_contracts.core.session import ContractSession, LimitExceededError
12
12
  from agentic_data_contracts.validation.validator import Validator
@@ -22,7 +22,13 @@ def contract_middleware(
22
22
  session = ContractSession(contract)
23
23
 
24
24
  dialect = adapter.dialect if adapter else None
25
- validator = Validator(contract, dialect=dialect, explain_adapter=adapter)
25
+ sql_normalizer = adapter if isinstance(adapter, SqlNormalizer) else None
26
+ validator = Validator(
27
+ contract,
28
+ dialect=dialect,
29
+ explain_adapter=adapter,
30
+ sql_normalizer=sql_normalizer,
31
+ )
26
32
 
27
33
  def decorator(fn):
28
34
  @functools.wraps(fn)
@@ -0,0 +1,36 @@
1
+ from agentic_data_contracts.validation.checkers import (
2
+ BlockedColumnsChecker,
3
+ CheckResult,
4
+ MaxJoinsChecker,
5
+ NoSelectStarChecker,
6
+ OperationBlocklistChecker,
7
+ RequiredFilterChecker,
8
+ RequireLimitChecker,
9
+ ResultCheckRunner,
10
+ TableAllowlistChecker,
11
+ extract_tables,
12
+ )
13
+ from agentic_data_contracts.validation.explain import ExplainAdapter, ExplainResult
14
+ from agentic_data_contracts.validation.validator import (
15
+ Checker,
16
+ ValidationResult,
17
+ Validator,
18
+ )
19
+
20
+ __all__ = [
21
+ "BlockedColumnsChecker",
22
+ "CheckResult",
23
+ "Checker",
24
+ "ExplainAdapter",
25
+ "ExplainResult",
26
+ "MaxJoinsChecker",
27
+ "NoSelectStarChecker",
28
+ "OperationBlocklistChecker",
29
+ "RequiredFilterChecker",
30
+ "RequireLimitChecker",
31
+ "ResultCheckRunner",
32
+ "TableAllowlistChecker",
33
+ "ValidationResult",
34
+ "Validator",
35
+ "extract_tables",
36
+ ]