agentic-data-contracts 0.2.6__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/CHANGELOG.md +40 -0
  2. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/PKG-INFO +108 -7
  3. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/README.md +107 -6
  4. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/docs/architecture.md +50 -18
  5. agentic_data_contracts-0.4.0/docs/superpowers/plans/2026-03-31-unified-rule-engine.md +2156 -0
  6. agentic_data_contracts-0.4.0/docs/superpowers/specs/2026-03-31-unified-rule-engine-design.md +377 -0
  7. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/pyproject.toml +1 -1
  8. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/__init__.py +4 -1
  9. agentic_data_contracts-0.4.0/src/agentic_data_contracts/core/contract.py +148 -0
  10. agentic_data_contracts-0.4.0/src/agentic_data_contracts/core/prompt.py +249 -0
  11. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/core/schema.py +39 -2
  12. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/tools/factory.py +36 -3
  13. agentic_data_contracts-0.4.0/src/agentic_data_contracts/validation/__init__.py +36 -0
  14. agentic_data_contracts-0.4.0/src/agentic_data_contracts/validation/checkers.py +272 -0
  15. agentic_data_contracts-0.4.0/src/agentic_data_contracts/validation/validator.py +255 -0
  16. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/fixtures/valid_contract.yml +4 -1
  17. agentic_data_contracts-0.4.0/tests/test_core/test_prompt_renderers.py +332 -0
  18. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_scalability.py +7 -7
  19. agentic_data_contracts-0.4.0/tests/test_core/test_schema.py +210 -0
  20. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_system_prompt_metrics.py +4 -4
  21. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_public_api.py +4 -0
  22. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_semantic/test_relationships.py +1 -1
  23. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_factory.py +137 -0
  24. agentic_data_contracts-0.4.0/tests/test_validation/test_checkers.py +220 -0
  25. agentic_data_contracts-0.4.0/tests/test_validation/test_result_checks.py +171 -0
  26. agentic_data_contracts-0.4.0/tests/test_validation/test_validator.py +321 -0
  27. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/uv.lock +1 -1
  28. agentic_data_contracts-0.2.6/src/agentic_data_contracts/core/contract.py +0 -279
  29. agentic_data_contracts-0.2.6/src/agentic_data_contracts/validation/__init__.py +0 -3
  30. agentic_data_contracts-0.2.6/src/agentic_data_contracts/validation/checkers.py +0 -157
  31. agentic_data_contracts-0.2.6/src/agentic_data_contracts/validation/validator.py +0 -150
  32. agentic_data_contracts-0.2.6/tests/test_core/test_schema.py +0 -68
  33. agentic_data_contracts-0.2.6/tests/test_validation/test_checkers.py +0 -154
  34. agentic_data_contracts-0.2.6/tests/test_validation/test_validator.py +0 -154
  35. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/.github/dependabot.yml +0 -0
  36. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/.github/workflows/ci.yml +0 -0
  37. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/.gitignore +0 -0
  38. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/.pre-commit-config.yaml +0 -0
  39. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/.python-version +0 -0
  40. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/CLAUDE.md +0 -0
  41. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/LICENSE +0 -0
  42. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/examples/revenue_agent/agent.py +0 -0
  43. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/examples/revenue_agent/contract.yml +0 -0
  44. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/examples/revenue_agent/semantic.yml +0 -0
  45. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/examples/revenue_agent/setup_db.py +0 -0
  46. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/adapters/__init__.py +0 -0
  47. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/adapters/base.py +0 -0
  48. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/adapters/duckdb.py +0 -0
  49. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/bridge/__init__.py +0 -0
  50. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/bridge/compiler.py +0 -0
  51. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/core/__init__.py +0 -0
  52. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/core/session.py +0 -0
  53. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/py.typed +0 -0
  54. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/semantic/__init__.py +0 -0
  55. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/semantic/base.py +0 -0
  56. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/semantic/cube.py +0 -0
  57. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/semantic/dbt.py +0 -0
  58. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/semantic/yaml_source.py +0 -0
  59. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/tools/__init__.py +0 -0
  60. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/tools/middleware.py +0 -0
  61. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/tools/sdk.py +0 -0
  62. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/src/agentic_data_contracts/validation/explain.py +0 -0
  63. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/__init__.py +0 -0
  64. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/conftest.py +0 -0
  65. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/fixtures/minimal_contract.yml +0 -0
  66. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/fixtures/sample_cube_schema.yml +0 -0
  67. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/fixtures/sample_dbt_manifest.json +0 -0
  68. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/fixtures/semantic_source.yml +0 -0
  69. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_adapters/__init__.py +0 -0
  70. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_adapters/test_duckdb.py +0 -0
  71. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_bridge/__init__.py +0 -0
  72. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_bridge/test_compiler.py +0 -0
  73. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/__init__.py +0 -0
  74. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_contract.py +0 -0
  75. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_load_semantic_source.py +0 -0
  76. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_sdk_config.py +0 -0
  77. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_session.py +0 -0
  78. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_core/test_wildcard_tables.py +0 -0
  79. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_semantic/__init__.py +0 -0
  80. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_semantic/test_cube.py +0 -0
  81. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_semantic/test_dbt.py +0 -0
  82. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_semantic/test_search.py +0 -0
  83. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_semantic/test_yaml_source.py +0 -0
  84. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/__init__.py +0 -0
  85. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_auto_load.py +0 -0
  86. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_middleware.py +0 -0
  87. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_pagination.py +0 -0
  88. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_sdk.py +0 -0
  89. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_semantic_tools.py +0 -0
  90. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_tools/test_wildcard_tools.py +0 -0
  91. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_validation/__init__.py +0 -0
  92. {agentic_data_contracts-0.2.6 → agentic_data_contracts-0.4.0}/tests/test_validation/test_explain.py +0 -0
@@ -2,6 +2,46 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.4.0] - 2026-03-31
6
+
7
+ ### Added
8
+
9
+ - **Unified rule engine**: Rules now support `query_check` (pre-execution) and `result_check` (post-execution) blocks, replacing the old `filter_column` shorthand. All rules live in one `rules` list; the engine determines execution phase automatically.
10
+ - **Table scoping**: Every rule can be scoped to a specific table (`table: "schema.table"`) or apply globally (omitted or `"*"`). Pre-execution and post-execution rules both support scoping.
11
+ - **5 built-in query checks**: `required_filter`, `no_select_star`, `blocked_columns`, `require_limit`, `max_joins` — all declarative in YAML, no Python needed.
12
+ - **6 built-in result checks**: `min_value`/`max_value` (numeric column bounds), `not_null`, `min_rows`/`max_rows` — validated against actual query output post-execution.
13
+ - **Advisory rules**: Rules with neither `query_check` nor `result_check` appear in the system prompt as guidance but don't enforce anything.
14
+ - **Session cost enforcement**: `run_query` now records estimated cost from EXPLAIN and enforces cumulative `cost_limit_usd` across the session.
15
+ - **`validate_results()` on Validator**: New method for post-execution result validation, used transparently inside `run_query`.
16
+ - **`validate_query` result check notes**: Output now lists pending result checks that will run at execution time.
17
+ - **New checker classes**: `BlockedColumnsChecker`, `RequireLimitChecker`, `MaxJoinsChecker`, `ResultCheckRunner` — all exported from `validation` module.
18
+
19
+ ### Changed
20
+
21
+ - **Checker protocol**: All checkers now use `check_ast(ast)` instead of `check_sql(sql)`. SQL is parsed once by the Validator and the AST is passed to all checkers.
22
+ - **`extract_tables()` utility**: Extracted from `TableAllowlistChecker` into a standalone function for shared use by the Validator's table scoping logic.
23
+ - **`ValidationResult`**: Gains `estimated_cost_usd: float | None` field for session cost passthrough from EXPLAIN.
24
+ - **Three-phase validation**: Validator now runs query checks (Phase 1) → EXPLAIN (Phase 2) → result checks (Phase 3), up from the previous two-phase pipeline.
25
+
26
+ ### Removed
27
+
28
+ - **`SemanticRule.filter_column`**: Replaced by `query_check: { required_filter: <column> }`. No backward compatibility — the old field is removed entirely.
29
+ - **Heuristic filter detection**: The regex-based `_extract_filter_column()` method that guessed filter columns from rule descriptions is gone. Filters are now explicit in `query_check`.
30
+
31
+ ## [0.3.0] - 2026-03-30
32
+
33
+ ### Added
34
+
35
+ - **`PromptRenderer` protocol**: New `@runtime_checkable` protocol for custom system prompt formatting. Users can implement `render(contract, semantic_source) -> str` to control how contracts are presented to their model of choice.
36
+ - **`ClaudePromptRenderer`**: Built-in XML-structured renderer optimized for Claude models (Sonnet 4.6+). Uses XML tags for structural boundaries, places constraints at the end for better instruction-following, and merges resource/temporal limits into a single section.
37
+ - **Custom renderer support**: `to_system_prompt(renderer=MyRenderer())` delegates entirely to a user-provided renderer.
38
+ - **Top-level exports**: `from agentic_data_contracts import PromptRenderer, ClaudePromptRenderer`
39
+
40
+ ### Changed
41
+
42
+ - **Default system prompt format**: `to_system_prompt()` now generates XML output (was Markdown). Pass a custom renderer if you need a different format.
43
+ - **`contract.py` simplified**: `to_system_prompt()` is now a thin delegate (~7 lines). All prompt-building logic moved to `core/prompt.py`.
44
+
5
45
  ## [0.2.6] - 2026-03-29
6
46
 
7
47
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentic-data-contracts
3
- Version: 0.2.6
3
+ Version: 0.4.0
4
4
  Summary: YAML-first data contract governance for AI agents
5
5
  Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
6
6
  Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
@@ -125,10 +125,13 @@ semantic:
125
125
  - name: tenant_isolation
126
126
  description: "All queries must filter by tenant_id"
127
127
  enforcement: block
128
- filter_column: tenant_id
128
+ query_check:
129
+ required_filter: tenant_id
129
130
  - name: no_select_star
130
131
  description: "Must specify explicit columns"
131
132
  enforcement: block
133
+ query_check:
134
+ no_select_star: true
132
135
 
133
136
  resources:
134
137
  cost_limit_usd: 5.00
@@ -230,11 +233,59 @@ Rules are enforced at three levels:
230
233
  - **`warn`** — query proceeds but a warning is included in the response
231
234
  - **`log`** — violation is recorded but not surfaced to the agent
232
235
 
233
- Built-in checkers enforce:
234
- - **Table allowlist** — only tables listed in `allowed_tables` may be queried
235
- - **Operation blocklist** — `forbidden_operations` (DELETE, DROP, etc.) are rejected
236
- - **Required filters** — rules with `filter_column` require a matching WHERE clause
237
- - **No SELECT \*** — queries must name explicit columns
236
+ Each rule carries a `query_check` (pre-execution) or `result_check` (post-execution) block. Rules with neither are advisory — they appear in the system prompt but don't enforce anything. Every rule can be scoped to a specific table or applied globally.
237
+
238
+ **Built-in query checks** (pre-execution, validated against SQL AST):
239
+
240
+ | Check | Description |
241
+ |-------|-------------|
242
+ | `required_filter` | Require a column in WHERE clause (e.g., `tenant_id`) |
243
+ | `no_select_star` | Forbid `SELECT *` — require explicit columns |
244
+ | `blocked_columns` | Forbid specific columns in SELECT (e.g., PII) |
245
+ | `require_limit` | Require a LIMIT clause |
246
+ | `max_joins` | Cap the number of JOINs |
247
+
248
+ **Built-in result checks** (post-execution, validated against query output):
249
+
250
+ | Check | Description |
251
+ |-------|-------------|
252
+ | `min_value` / `max_value` | Numeric bounds on a column's values |
253
+ | `not_null` | Column must not contain nulls |
254
+ | `min_rows` / `max_rows` | Row count bounds on the result set |
255
+
256
+ Example with table scoping and both check types:
257
+
258
+ ```yaml
259
+ rules:
260
+ - name: tenant_isolation
261
+ description: "Orders must filter by tenant_id"
262
+ enforcement: block
263
+ table: "analytics.orders" # only applies to this table
264
+ query_check:
265
+ required_filter: tenant_id
266
+
267
+ - name: hide_pii
268
+ description: "Do not select PII columns from customers"
269
+ enforcement: block
270
+ table: "analytics.customers"
271
+ query_check:
272
+ blocked_columns: [ssn, email, phone]
273
+
274
+ - name: wau_sanity
275
+ description: "WAU should not exceed world population"
276
+ enforcement: warn
277
+ table: "analytics.user_metrics"
278
+ result_check:
279
+ column: wau
280
+ max_value: 8_000_000_000
281
+
282
+ - name: no_negative_revenue
283
+ description: "Revenue must not be negative"
284
+ enforcement: block
285
+ result_check:
286
+ column: revenue
287
+ min_value: 0
288
+ ```
238
289
 
239
290
  ## Semantic Sources
240
291
 
@@ -294,6 +345,29 @@ relationships:
294
345
 
295
346
  The agent sees these in its system prompt and uses them to write correct JOINs instead of guessing from column names.
296
347
 
348
+ ## Custom Prompt Rendering
349
+
350
+ The system prompt is generated by a `PromptRenderer`. The default `ClaudePromptRenderer` produces XML-structured output optimized for Claude models:
351
+
352
+ ```python
353
+ dc = DataContract.from_yaml("contract.yml")
354
+ print(dc.to_system_prompt()) # XML output, optimized for Claude
355
+ ```
356
+
357
+ For other models (GPT-4, Gemini, Llama), implement the `PromptRenderer` protocol:
358
+
359
+ ```python
360
+ from agentic_data_contracts import PromptRenderer, DataContract
361
+
362
+ class MarkdownRenderer:
363
+ def render(self, contract, semantic_source=None):
364
+ tables = "\n".join(f"- {t}" for t in contract.allowed_table_names())
365
+ return f"## {contract.name}\n\nAllowed tables:\n{tables}"
366
+
367
+ dc = DataContract.from_yaml("contract.yml")
368
+ print(dc.to_system_prompt(renderer=MarkdownRenderer()))
369
+ ```
370
+
297
371
  ## Scalable Metric Discovery
298
372
 
299
373
  For large data lakes with hundreds of KPIs, group metrics by domain and let the agent discover them efficiently:
@@ -348,6 +422,33 @@ resources:
348
422
  | `agent-sdk` | `claude-agent-sdk` | Claude Agent SDK integration |
349
423
  | `agent-contracts` | `ai-agent-contracts>=0.2.0` | ai-agent-contracts bridge |
350
424
 
425
+ ## Optional: Formal Governance with ai-agent-contracts
426
+
427
+ The library works standalone with lightweight enforcement. Install [`ai-agent-contracts`](https://pypi.org/project/ai-agent-contracts/) to upgrade to the formal governance framework:
428
+
429
+ ```bash
430
+ pip install "agentic-data-contracts[agent-contracts]"
431
+ ```
432
+
433
+ ```python
434
+ from agentic_data_contracts.bridge.compiler import compile_to_contract
435
+
436
+ contract = compile_to_contract(dc) # YAML → formal 7-tuple Contract
437
+ ```
438
+
439
+ **What you get with the bridge:**
440
+
441
+ | Concern | Standalone | With ai-agent-contracts |
442
+ |---|---|---|
443
+ | Resource tracking | Manual counters | Formal `ResourceConstraints` with auto-enforcement |
444
+ | Rule violations | Exception + retry | `TerminationCondition` with contract state machine |
445
+ | Success evaluation | Log-based | Weighted `SuccessCriterion` scoring, LLM judge support |
446
+ | Contract lifecycle | None | `DRAFTED → ACTIVE → FULFILLED / VIOLATED / TERMINATED` |
447
+ | Framework support | Claude Agent SDK | + LiteLLM, LangChain, LangGraph, Google ADK |
448
+ | Multi-agent | Single agent | Coordination patterns (sequential, parallel, hierarchical) |
449
+
450
+ **When to use it:** formal audit trails, success scoring, multi-agent coordination, or integration with non-Claude agent frameworks.
451
+
351
452
  ## Example
352
453
 
353
454
  See [`examples/revenue_agent/`](examples/revenue_agent/) for a complete working example with a DuckDB database, YAML semantic source, and Claude Agent SDK integration.
@@ -72,10 +72,13 @@ semantic:
72
72
  - name: tenant_isolation
73
73
  description: "All queries must filter by tenant_id"
74
74
  enforcement: block
75
- filter_column: tenant_id
75
+ query_check:
76
+ required_filter: tenant_id
76
77
  - name: no_select_star
77
78
  description: "Must specify explicit columns"
78
79
  enforcement: block
80
+ query_check:
81
+ no_select_star: true
79
82
 
80
83
  resources:
81
84
  cost_limit_usd: 5.00
@@ -177,11 +180,59 @@ Rules are enforced at three levels:
177
180
  - **`warn`** — query proceeds but a warning is included in the response
178
181
  - **`log`** — violation is recorded but not surfaced to the agent
179
182
 
180
- Built-in checkers enforce:
181
- - **Table allowlist** — only tables listed in `allowed_tables` may be queried
182
- - **Operation blocklist** — `forbidden_operations` (DELETE, DROP, etc.) are rejected
183
- - **Required filters** — rules with `filter_column` require a matching WHERE clause
184
- - **No SELECT \*** — queries must name explicit columns
183
+ Each rule carries a `query_check` (pre-execution) or `result_check` (post-execution) block. Rules with neither are advisory — they appear in the system prompt but don't enforce anything. Every rule can be scoped to a specific table or applied globally.
184
+
185
+ **Built-in query checks** (pre-execution, validated against SQL AST):
186
+
187
+ | Check | Description |
188
+ |-------|-------------|
189
+ | `required_filter` | Require a column in WHERE clause (e.g., `tenant_id`) |
190
+ | `no_select_star` | Forbid `SELECT *` — require explicit columns |
191
+ | `blocked_columns` | Forbid specific columns in SELECT (e.g., PII) |
192
+ | `require_limit` | Require a LIMIT clause |
193
+ | `max_joins` | Cap the number of JOINs |
194
+
195
+ **Built-in result checks** (post-execution, validated against query output):
196
+
197
+ | Check | Description |
198
+ |-------|-------------|
199
+ | `min_value` / `max_value` | Numeric bounds on a column's values |
200
+ | `not_null` | Column must not contain nulls |
201
+ | `min_rows` / `max_rows` | Row count bounds on the result set |
202
+
203
+ Example with table scoping and both check types:
204
+
205
+ ```yaml
206
+ rules:
207
+ - name: tenant_isolation
208
+ description: "Orders must filter by tenant_id"
209
+ enforcement: block
210
+ table: "analytics.orders" # only applies to this table
211
+ query_check:
212
+ required_filter: tenant_id
213
+
214
+ - name: hide_pii
215
+ description: "Do not select PII columns from customers"
216
+ enforcement: block
217
+ table: "analytics.customers"
218
+ query_check:
219
+ blocked_columns: [ssn, email, phone]
220
+
221
+ - name: wau_sanity
222
+ description: "WAU should not exceed world population"
223
+ enforcement: warn
224
+ table: "analytics.user_metrics"
225
+ result_check:
226
+ column: wau
227
+ max_value: 8_000_000_000
228
+
229
+ - name: no_negative_revenue
230
+ description: "Revenue must not be negative"
231
+ enforcement: block
232
+ result_check:
233
+ column: revenue
234
+ min_value: 0
235
+ ```
185
236
 
186
237
  ## Semantic Sources
187
238
 
@@ -241,6 +292,29 @@ relationships:
241
292
 
242
293
  The agent sees these in its system prompt and uses them to write correct JOINs instead of guessing from column names.
243
294
 
295
+ ## Custom Prompt Rendering
296
+
297
+ The system prompt is generated by a `PromptRenderer`. The default `ClaudePromptRenderer` produces XML-structured output optimized for Claude models:
298
+
299
+ ```python
300
+ dc = DataContract.from_yaml("contract.yml")
301
+ print(dc.to_system_prompt()) # XML output, optimized for Claude
302
+ ```
303
+
304
+ For other models (GPT-4, Gemini, Llama), implement the `PromptRenderer` protocol:
305
+
306
+ ```python
307
+ from agentic_data_contracts import PromptRenderer, DataContract
308
+
309
+ class MarkdownRenderer:
310
+ def render(self, contract, semantic_source=None):
311
+ tables = "\n".join(f"- {t}" for t in contract.allowed_table_names())
312
+ return f"## {contract.name}\n\nAllowed tables:\n{tables}"
313
+
314
+ dc = DataContract.from_yaml("contract.yml")
315
+ print(dc.to_system_prompt(renderer=MarkdownRenderer()))
316
+ ```
317
+
244
318
  ## Scalable Metric Discovery
245
319
 
246
320
  For large data lakes with hundreds of KPIs, group metrics by domain and let the agent discover them efficiently:
@@ -295,6 +369,33 @@ resources:
295
369
  | `agent-sdk` | `claude-agent-sdk` | Claude Agent SDK integration |
296
370
  | `agent-contracts` | `ai-agent-contracts>=0.2.0` | ai-agent-contracts bridge |
297
371
 
372
+ ## Optional: Formal Governance with ai-agent-contracts
373
+
374
+ The library works standalone with lightweight enforcement. Install [`ai-agent-contracts`](https://pypi.org/project/ai-agent-contracts/) to upgrade to the formal governance framework:
375
+
376
+ ```bash
377
+ pip install "agentic-data-contracts[agent-contracts]"
378
+ ```
379
+
380
+ ```python
381
+ from agentic_data_contracts.bridge.compiler import compile_to_contract
382
+
383
+ contract = compile_to_contract(dc) # YAML → formal 7-tuple Contract
384
+ ```
385
+
386
+ **What you get with the bridge:**
387
+
388
+ | Concern | Standalone | With ai-agent-contracts |
389
+ |---|---|---|
390
+ | Resource tracking | Manual counters | Formal `ResourceConstraints` with auto-enforcement |
391
+ | Rule violations | Exception + retry | `TerminationCondition` with contract state machine |
392
+ | Success evaluation | Log-based | Weighted `SuccessCriterion` scoring, LLM judge support |
393
+ | Contract lifecycle | None | `DRAFTED → ACTIVE → FULFILLED / VIOLATED / TERMINATED` |
394
+ | Framework support | Claude Agent SDK | + LiteLLM, LangChain, LangGraph, Google ADK |
395
+ | Multi-agent | Single agent | Coordination patterns (sequential, parallel, hierarchical) |
396
+
397
+ **When to use it:** formal audit trails, success scoring, multi-agent coordination, or integration with non-Claude agent frameworks.
398
+
298
399
  ## Example
299
400
 
300
401
  See [`examples/revenue_agent/`](examples/revenue_agent/) for a complete working example with a DuckDB database, YAML semantic source, and Claude Agent SDK integration.
@@ -1,7 +1,7 @@
1
1
  # Agentic Data Contracts — Architecture
2
2
 
3
- **Date:** 2026-03-28
4
- **Status:** Implemented (v0.2.2)
3
+ **Date:** 2026-03-31
4
+ **Status:** Implemented (v0.4.0)
5
5
  **Author:** Qing Ye + Claude
6
6
 
7
7
  ## Problem Statement
@@ -106,19 +106,24 @@ semantic:
106
106
  engagement: [active_customers, churn_rate]
107
107
 
108
108
  # Governance rules (per-rule enforcement)
109
+ # Each rule has a query_check (pre-execution) or result_check (post-execution)
110
+ # Rules with neither are advisory (shown in prompt only)
109
111
  rules:
110
112
  - name: tenant_isolation
111
113
  description: "All queries must include a WHERE tenant_id = filter"
112
114
  enforcement: block # block | warn | log
113
- filter_column: tenant_id # explicit column for required filter
115
+ query_check:
116
+ required_filter: tenant_id
114
117
 
115
118
  - name: use_approved_metrics
116
119
  description: "Revenue calculations must use the semantic layer definition"
117
- enforcement: warn
120
+ enforcement: warn # advisory — no check block
118
121
 
119
122
  - name: no_select_star
120
123
  description: "Queries must specify explicit columns, no SELECT *"
121
124
  enforcement: block
125
+ query_check:
126
+ no_select_star: true
122
127
 
123
128
  # Resource governance
124
129
  resources:
@@ -180,31 +185,39 @@ When `ai-agent-contracts` IS installed, enforcement is delegated to the formal f
180
185
 
181
186
  ## Validation Layer
182
187
 
183
- Two-layer validation architecture. Dependencies: `sqlglot`.
188
+ Three-phase validation architecture. Dependencies: `sqlglot`.
184
189
 
185
- ### Layer 1: Static Validation (always available)
190
+ ### Phase 1: Query Checks (pre-execution, always available)
186
191
 
187
192
  ```python
188
193
  class Checker(Protocol):
189
- def check(self, parsed_sql: Expression, contract: DataContract) -> CheckResult: ...
194
+ def check_ast(self, ast: Expression, *args) -> CheckResult: ...
190
195
  ```
191
196
 
192
- **Built-in checkers:**
197
+ SQL is parsed once into a sqlglot AST. The Validator passes the AST to all applicable checkers, respecting table scoping.
198
+
199
+ **Structural checkers** (from top-level config):
193
200
 
194
201
  | Checker | What it validates |
195
202
  |---|---|
196
203
  | `TableAllowlistChecker` | All referenced tables are in `allowed_tables` |
197
204
  | `OperationBlocklistChecker` | No forbidden SQL operations (DELETE, DROP, etc.) |
198
- | `RequiredFilterChecker` | Required WHERE clauses present (e.g., `tenant_id`) |
199
- | `NoSelectStarChecker` | No `SELECT *` statements |
205
+
206
+ **Rule-based query checkers** (from `query_check` blocks):
207
+
208
+ | Check | Checker | What it validates |
209
+ |---|---|---|
210
+ | `required_filter` | `RequiredFilterChecker` | Required WHERE clauses present |
211
+ | `no_select_star` | `NoSelectStarChecker` | No `SELECT *` statements |
212
+ | `blocked_columns` | `BlockedColumnsChecker` | Forbidden columns not in SELECT |
213
+ | `require_limit` | `RequireLimitChecker` | LIMIT clause present |
214
+ | `max_joins` | `MaxJoinsChecker` | JOIN count within limit |
200
215
 
201
216
  `CheckResult` contains: `passed: bool`, `severity: block | warn | log`, `message: str`.
202
217
 
203
218
  The validator runs all applicable checkers and aggregates results — any `block` result stops execution, `warn` results are surfaced to the agent, `log` results are recorded silently.
204
219
 
205
- Rules that cannot be statically checked (e.g., "use semantic layer definition for revenue") become:
206
- - An instruction injected into the agent's context via `to_system_prompt()`
207
- - A post-hoc `SuccessCriterion` for evaluation by LLM judge or human review
220
+ Rules that cannot be statically checked (e.g., "use semantic layer definition for revenue") become advisory rules — they appear in the system prompt but don't enforce anything. They can also be used as `SuccessCriterion` for post-hoc evaluation.
208
221
 
209
222
  ### Layer 2: EXPLAIN Dry-Run (optional, requires database adapter)
210
223
 
@@ -226,16 +239,35 @@ class ExplainAdapter(Protocol):
226
239
  | Postgres | `EXPLAIN` (no ANALYZE) | Row estimates |
227
240
  | DuckDB | `EXPLAIN` | Row estimates |
228
241
 
242
+ ### Phase 3: Result Checks (post-execution, from `result_check` blocks)
243
+
244
+ After a query executes successfully, `run_query` calls `validator.validate_results()` to check the actual output against `result_check` rules.
245
+
246
+ **Built-in result checks:**
247
+
248
+ | Check | What it validates |
249
+ |---|---|
250
+ | `min_value` / `max_value` | Numeric column values within bounds |
251
+ | `not_null` | Column contains no null values |
252
+ | `min_rows` / `max_rows` | Result set row count within bounds |
253
+
254
+ If a result check with `enforcement: block` fails, the query data is **discarded** — the agent sees only the violation message (with actual violating values for debugging). If `enforcement: warn`, the data is returned with warnings prepended.
255
+
229
256
  ### Validation Flow
230
257
 
231
258
  ```
232
259
  SQL string
233
- → sqlglot.parse(sql, dialect=contract.dialect)
234
- Layer 1: run all checkers
260
+ → sqlglot.parse(sql, dialect=contract.dialect) — parse once
261
+ Phase 1: structural checkers + rule-based query_check checkers (table-scoped)
235
262
  → any block? → return ValidationResult(blocked=True, reasons=[...])
236
- Layer 2 available? → explain adapter
263
+ Phase 2 available? → explain adapter
237
264
  → cost/rows exceed limits? → return ValidationResult(blocked=True, reasons=[...])
238
- return ValidationResult(blocked=False, warnings=[...])
265
+ record estimated cost in session
266
+ → execute query
267
+ → Phase 3: result_check rules against actual output (table-scoped)
268
+ → any block? → discard data, return violation
269
+ → any warn? → prepend warnings to response
270
+ → return results
239
271
  ```
240
272
 
241
273
  ## Tools Layer (Claude Agent SDK Integration)
@@ -443,7 +475,7 @@ agentic-data-contracts/
443
475
  │ ├── validation/
444
476
  │ │ ├── __init__.py
445
477
  │ │ ├── validator.py # Orchestrates checkers, aggregates results
446
- │ │ ├── checkers.py # Built-in checkers (4 checkers)
478
+ │ │ ├── checkers.py # Built-in checkers (7 query checkers + ResultCheckRunner)
447
479
  │ │ └── explain.py # EXPLAIN adapter orchestration
448
480
  │ ├── tools/
449
481
  │ │ ├── __init__.py