agentic-data-contracts 0.9.0__tar.gz → 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/CHANGELOG.md +19 -0
  2. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/CLAUDE.md +1 -1
  3. agentic_data_contracts-0.9.0/README.md → agentic_data_contracts-0.9.2/PKG-INFO +131 -38
  4. agentic_data_contracts-0.9.0/PKG-INFO → agentic_data_contracts-0.9.2/README.md +78 -91
  5. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/docs/architecture.md +4 -2
  6. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/agent.py +12 -0
  7. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/contract.yml +14 -1
  8. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/pyproject.toml +11 -9
  9. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/schema.py +2 -0
  10. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/session.py +13 -1
  11. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/factory.py +11 -5
  12. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/sdk.py +1 -1
  13. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_domain_model.py +19 -0
  14. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_session.py +59 -0
  15. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_factory.py +6 -1
  16. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_semantic_tools.py +45 -0
  17. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/uv.lock +30 -30
  18. agentic_data_contracts-0.9.0/docs/superpowers/plans/2026-04-13-domain-redesign.md +0 -1276
  19. agentic_data_contracts-0.9.0/docs/superpowers/specs/2026-04-13-domain-redesign-design.md +0 -176
  20. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.github/dependabot.yml +0 -0
  21. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.github/workflows/ci.yml +0 -0
  22. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.gitignore +0 -0
  23. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.pre-commit-config.yaml +0 -0
  24. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.python-version +0 -0
  25. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/LICENSE +0 -0
  26. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/semantic.yml +0 -0
  27. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/setup_db.py +0 -0
  28. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/__init__.py +0 -0
  29. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/__init__.py +0 -0
  30. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/_normalizer.py +0 -0
  31. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/base.py +0 -0
  32. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/duckdb.py +0 -0
  33. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/bridge/__init__.py +0 -0
  34. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/bridge/compiler.py +0 -0
  35. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/__init__.py +0 -0
  36. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/contract.py +0 -0
  37. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/prompt.py +0 -0
  38. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/py.typed +0 -0
  39. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/__init__.py +0 -0
  40. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/base.py +0 -0
  41. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/cube.py +0 -0
  42. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/dbt.py +0 -0
  43. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/yaml_source.py +0 -0
  44. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/__init__.py +0 -0
  45. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/middleware.py +0 -0
  46. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/__init__.py +0 -0
  47. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/checkers.py +0 -0
  48. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/explain.py +0 -0
  49. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/validator.py +0 -0
  50. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/__init__.py +0 -0
  51. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/conftest.py +0 -0
  52. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/minimal_contract.yml +0 -0
  53. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/relationships_checker.yml +0 -0
  54. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/sample_cube_schema.yml +0 -0
  55. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/sample_dbt_manifest.json +0 -0
  56. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/semantic_source.yml +0 -0
  57. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/valid_contract.yml +0 -0
  58. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_adapters/__init__.py +0 -0
  59. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_adapters/test_duckdb.py +0 -0
  60. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_bridge/__init__.py +0 -0
  61. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_bridge/test_compiler.py +0 -0
  62. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/__init__.py +0 -0
  63. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_contract.py +0 -0
  64. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_load_semantic_source.py +0 -0
  65. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_prompt_renderers.py +0 -0
  66. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_scalability.py +0 -0
  67. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_schema.py +0 -0
  68. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_sdk_config.py +0 -0
  69. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_system_prompt_metrics.py +0 -0
  70. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_wildcard_tables.py +0 -0
  71. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_public_api.py +0 -0
  72. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/__init__.py +0 -0
  73. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_cube.py +0 -0
  74. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_dbt.py +0 -0
  75. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_relationships.py +0 -0
  76. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_search.py +0 -0
  77. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_yaml_source.py +0 -0
  78. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/__init__.py +0 -0
  79. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_auto_load.py +0 -0
  80. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_middleware.py +0 -0
  81. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_pagination.py +0 -0
  82. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_relationship_tools.py +0 -0
  83. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_sdk.py +0 -0
  84. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_wildcard_tools.py +0 -0
  85. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/__init__.py +0 -0
  86. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_checkers.py +0 -0
  87. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_explain.py +0 -0
  88. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_relationship_checker.py +0 -0
  89. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_result_checks.py +0 -0
  90. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_sql_normalizer.py +0 -0
  91. {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_validator.py +0 -0
@@ -2,6 +2,25 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.9.2] - 2026-04-15
6
+
7
+ ### Fixed
8
+
9
+ - **Lazy session timer**: `ContractSession` no longer starts its wall-clock timer at construction. The timer now starts on the first `check_limits()` call, so idle time before the user's first interaction does not count against `temporal.max_duration_seconds`. This fixes premature "session expired" errors in long-lived agent setups (Chainlit, Webex bots) where the session object is created well before the first user message. (#16)
10
+
11
+ ### Added
12
+
13
+ - **`ContractSession.reset_timer()`**: New method that resets the duration timer so it restarts on the next `check_limits()` call. Useful for frameworks with their own idle-timeout mechanisms that want to restart the clock on user activity.
14
+
15
+ ## [0.9.1] - 2026-04-13
16
+
17
+ ### Added
18
+
19
+ - **Schema `description` field**: Optional description on `AllowedTable` entries, surfaced via `list_schemas` to help agents understand what each schema contains and when to use it.
20
+ - **Schema `preferred` flag**: Optional boolean on `AllowedTable` (default `false`), surfaced via `list_schemas` to signal which schema the agent should prefer when similar tables exist across schemas.
21
+ - **Example improvements**: Revenue agent example updated with `lookup_domain` and `lookup_metric` demo steps, schema description/preferred in contract, and fixed pre-existing missing `query_check` blocks on `tenant_isolation` and `no_select_star` rules.
22
+ - **Domain-driven README**: README reframed around the domain-driven approach — agents understand business domains before writing SQL.
23
+
5
24
  ## [0.9.0] - 2026-04-13
6
25
 
7
26
  ### Added
@@ -18,7 +18,7 @@
18
18
  src/agentic_data_contracts/
19
19
  ├── core/ # YAML loading, Pydantic models, lightweight enforcement
20
20
  ├── validation/ # sqlglot checkers, Validator (Layer 1 + 2), EXPLAIN protocol
21
- ├── tools/ # 11-tool factory + middleware for Claude Agent SDK
21
+ ├── tools/ # 12-tool factory + middleware for Claude Agent SDK
22
22
  ├── semantic/ # dbt/Cube/YAML source integrations
23
23
  ├── adapters/ # DatabaseAdapter protocol + DuckDB implementation
24
24
  └── bridge/ # Optional ai-agent-contracts compilation
@@ -1,3 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentic-data-contracts
3
+ Version: 0.9.2
4
+ Summary: YAML-first, domain-driven data governance for AI agents
5
+ Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
6
+ Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
7
+ Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
8
+ Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
9
+ Author-email: Qing <qingye779@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,domain-driven,llm,semantic-layer,sql-validation
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.12
23
+ Requires-Dist: pydantic>=2.0
24
+ Requires-Dist: pyyaml>=6.0.3
25
+ Requires-Dist: sqlglot>=23.0
26
+ Requires-Dist: thefuzz>=0.22.1
27
+ Provides-Extra: agent-contracts
28
+ Requires-Dist: ai-agent-contracts>=0.3.0; extra == 'agent-contracts'
29
+ Provides-Extra: agent-sdk
30
+ Requires-Dist: claude-agent-sdk>=0.1.58; extra == 'agent-sdk'
31
+ Provides-Extra: all
32
+ Requires-Dist: ai-agent-contracts>=0.3.0; extra == 'all'
33
+ Requires-Dist: claude-agent-sdk>=0.1.58; extra == 'all'
34
+ Requires-Dist: duckdb; extra == 'all'
35
+ Requires-Dist: google-cloud-bigquery; extra == 'all'
36
+ Requires-Dist: psycopg2-binary; extra == 'all'
37
+ Requires-Dist: snowflake-connector-python; extra == 'all'
38
+ Provides-Extra: bigquery
39
+ Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
40
+ Provides-Extra: dev
41
+ Requires-Dist: duckdb; extra == 'dev'
42
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
43
+ Requires-Dist: pytest-cov>=7.1.0; extra == 'dev'
44
+ Requires-Dist: pytest>=9.0.3; extra == 'dev'
45
+ Requires-Dist: ruff>=0.15.10; extra == 'dev'
46
+ Provides-Extra: duckdb
47
+ Requires-Dist: duckdb; extra == 'duckdb'
48
+ Provides-Extra: postgres
49
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
50
+ Provides-Extra: snowflake
51
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
52
+ Description-Content-Type: text/markdown
53
+
1
54
  # agentic-data-contracts
2
55
 
3
56
  [![PyPI version](https://img.shields.io/pypi/v/agentic-data-contracts.svg)](https://pypi.org/project/agentic-data-contracts/)
@@ -5,16 +58,29 @@
5
58
  [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://www.python.org/downloads/)
6
59
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
60
 
8
- **Stop your AI agents from running wild on your data.**
61
+ **YAML-first, domain-driven data governance for AI agents.**
9
62
 
10
- `agentic-data-contracts` lets data engineers define governance contracts in YAML what tables an agent may query, which operations are forbidden, what resource limits apply and enforces them automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
63
+ `agentic-data-contracts` takes a domain-driven approach to AI agent governance: instead of letting agents figure out your data landscape by trial and error, you teach them your business domains, metrics, and rules upfront — in YAML. The agent starts by understanding *what* a business domain means, then discovers *which* metrics to use, then builds queries that comply with your governance rules. All enforced automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
11
64
 
12
- **Why?** AI agents querying databases face two problems: **resource runaway** (unbounded compute, endless retries, cost overruns) and **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions). This library addresses both with a single YAML contract.
65
+ **Why domain-driven?** AI agents querying databases face three problems: **resource runaway** (unbounded compute, endless retries, cost overruns), **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions), and **lack of business context** (the agent doesn't know what "revenue" means in *your* company). This library addresses all three with a single YAML contract that combines governance rules with business domain knowledge.
13
66
 
14
67
  **Works with:** [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk-python) (primary target), or any Python agent framework. Optionally integrates with [ai-agent-contracts](https://pypi.org/project/ai-agent-contracts/) for formal resource governance.
15
68
 
16
69
  ## How It Works
17
70
 
71
+ The agent follows a domain-driven workflow — understanding business context before writing SQL:
72
+
73
+ ```
74
+ 1. Agent receives: "How is revenue trending?"
75
+ 2. lookup_domain("revenue") → "Revenue is recognized at fulfillment, not booking"
76
+ 3. lookup_metric("total_revenue") → SUM(amount) FILTER (WHERE status = 'completed')
77
+ 4. Agent writes SQL using the metric definition
78
+ 5. validate_query(sql) → VALID (passes all contract rules)
79
+ 6. run_query(sql) → results returned
80
+ ```
81
+
82
+ Governance rules are enforced automatically at query time:
83
+
18
84
  ```
19
85
  Agent: "SELECT * FROM analytics.orders"
20
86
  -> BLOCKED (no SELECT * — specify explicit columns)
@@ -24,12 +90,9 @@ Agent: "SELECT order_id, amount FROM analytics.orders"
24
90
 
25
91
  Agent: "SELECT order_id, amount FROM analytics.orders WHERE tenant_id = 'acme'"
26
92
  -> PASSED + WARN (consider using semantic revenue definition)
27
-
28
- Agent: "DELETE FROM analytics.orders WHERE id = 1"
29
- -> BLOCKED (forbidden operation: DELETE)
30
93
  ```
31
94
 
32
- The contract defines the rules. The library enforces them — before the query ever reaches the database.
95
+ The contract defines the domains, metrics, and rules. The library enforces them — before the query ever reaches the database.
33
96
 
34
97
  ## Installation
35
98
 
@@ -64,10 +127,19 @@ semantic:
64
127
  path: "./semantic.yml"
65
128
  allowed_tables:
66
129
  - schema: analytics
130
+ description: "Curated analytics tables — prefer for reporting"
131
+ preferred: true
67
132
  tables: ["*"] # all tables in schema (discovered from database)
68
133
  - schema: marketing
69
134
  tables: [campaigns] # or list specific tables
70
135
  forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
136
+ domains:
137
+ - name: revenue
138
+ summary: "Financial metrics from completed orders"
139
+ description: >
140
+ Revenue is recognized at fulfillment, not at booking.
141
+ Excludes refunds and chargebacks unless stated.
142
+ metrics: [total_revenue]
71
143
  rules:
72
144
  - name: tenant_isolation
73
145
  description: "All queries must filter by tenant_id"
@@ -161,7 +233,7 @@ asyncio.run(demo())
161
233
 
162
234
  | Tool | Description |
163
235
  |------|-------------|
164
- | `list_schemas` | List all allowed database schemas from the contract |
236
+ | `list_schemas` | List allowed schemas with descriptions and preferred flags |
165
237
  | `list_tables` | List allowed tables, optionally filtered by schema |
166
238
  | `describe_table` | Get full column details for an allowed table |
167
239
  | `preview_table` | Preview sample rows from an allowed table |
@@ -174,6 +246,57 @@ asyncio.run(demo())
174
246
  | `run_query` | Validate and execute a SQL query, returning results |
175
247
  | `get_contract_info` | Get the full contract: rules, limits, domains, and session status |
176
248
 
249
+ ## Domain-Driven Agent Workflow
250
+
251
+ The core design principle: **agents should understand the business domain before writing SQL.** Instead of dumping table schemas and hoping for the best, the contract teaches the agent your business vocabulary through progressive disclosure:
252
+
253
+ ```
254
+ 1. Domain context → "What does 'revenue' mean here?"
255
+ 2. Metric definitions → "How is 'total_revenue' calculated?"
256
+ 3. Query execution → "Run the validated SQL"
257
+ ```
258
+
259
+ ### Defining domains
260
+
261
+ Each domain carries a description that teaches the agent your business rules — things the SQL alone can't express:
262
+
263
+ ```yaml
264
+ semantic:
265
+ domains:
266
+ - name: acquisition
267
+ summary: "Customer acquisition costs and conversion metrics"
268
+ description: >
269
+ Acquisition metrics track the cost and efficiency of
270
+ acquiring new customers across all channels.
271
+ CAC is calculated using fully-loaded cost, not just ad spend.
272
+ metrics: [CAC, CPA, CPL, click_through_rate]
273
+ - name: retention
274
+ summary: "Customer retention, churn, and lifetime value"
275
+ description: >
276
+ Retention metrics measure how well we keep customers.
277
+ Churn is measured on a 30-day rolling window.
278
+ A customer is "active" if they had at least one qualifying
279
+ action in the window.
280
+ metrics: [churn_rate, LTV, retention_30d]
281
+ ```
282
+
283
+ ### How the agent uses domains
284
+
285
+ The system prompt gives the agent a compact domain index. When a user asks a domain-specific question, the agent explores progressively:
286
+
287
+ ```
288
+ lookup_domain("acquisition") → business context + metric descriptions
289
+ lookup_metric("CAC") → SQL expression, source table, filters
290
+ lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
291
+ list_metrics(domain="retention") → all metrics in the retention domain
292
+ ```
293
+
294
+ This means the agent knows that "revenue is recognized at fulfillment, not at booking" *before* it writes a single line of SQL — reducing hallucinated metrics and incorrect calculations.
295
+
296
+ ### Why progressive disclosure works
297
+
298
+ This pattern — compact index in the prompt, detailed context on demand — is the same philosophy validated by agent skill systems, MCP tool servers, and RAG architectures. Instead of overloading the agent's context window with everything upfront, you give it just enough to know *where to look*, then let it pull details when needed. The result is better token efficiency, more focused reasoning, and fewer hallucinations from context overload.
299
+
177
300
  ## Contract Rules
178
301
 
179
302
  Rules are enforced at three levels:
@@ -342,36 +465,6 @@ dc = DataContract.from_yaml("contract.yml")
342
465
  print(dc.to_system_prompt(renderer=MarkdownRenderer()))
343
466
  ```
344
467
 
345
- ## Business Domains
346
-
347
- Domains provide business context that helps agents understand *what* they're being asked about before getting into the mechanics of *how* to calculate it:
348
-
349
- ```yaml
350
- semantic:
351
- domains:
352
- - name: acquisition
353
- summary: "Customer acquisition costs and conversion metrics"
354
- description: >
355
- Acquisition metrics track the cost and efficiency of
356
- acquiring new customers across all channels.
357
- metrics: [CAC, CPA, CPL, click_through_rate]
358
- - name: retention
359
- summary: "Customer retention, churn, and lifetime value"
360
- description: >
361
- Retention metrics measure how well we keep customers.
362
- Churn is measured on a 30-day rolling window.
363
- metrics: [churn_rate, LTV, retention_30d]
364
- ```
365
-
366
- The system prompt shows a compact domain index. The agent uses `lookup_domain` for business context, then `lookup_metric` for SQL definitions:
367
-
368
- ```
369
- lookup_domain("acquisition") → full description + metrics with descriptions
370
- lookup_metric("CAC") → exact match, SQL definition
371
- lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
372
- list_metrics(domain="retention") → only retention metrics
373
- ```
374
-
375
468
  ## Scaling to Large Organizations
376
469
 
377
470
  Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
@@ -1,56 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: agentic-data-contracts
3
- Version: 0.9.0
4
- Summary: YAML-first data contract governance for AI agents
5
- Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
6
- Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
7
- Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
8
- Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
9
- Author-email: Qing <qingye779@gmail.com>
10
- License: MIT
11
- License-File: LICENSE
12
- Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,llm,sql-validation
13
- Classifier: Development Status :: 4 - Beta
14
- Classifier: Intended Audience :: Developers
15
- Classifier: Intended Audience :: Science/Research
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Programming Language :: Python :: 3.13
19
- Classifier: Topic :: Database
20
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
- Requires-Python: >=3.12
23
- Requires-Dist: pydantic>=2.0
24
- Requires-Dist: pyyaml>=6.0
25
- Requires-Dist: sqlglot>=23.0
26
- Requires-Dist: thefuzz>=0.22.1
27
- Provides-Extra: agent-contracts
28
- Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'agent-contracts'
29
- Provides-Extra: agent-sdk
30
- Requires-Dist: claude-agent-sdk>=0.1.52; extra == 'agent-sdk'
31
- Provides-Extra: all
32
- Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'all'
33
- Requires-Dist: claude-agent-sdk>=0.1.52; extra == 'all'
34
- Requires-Dist: duckdb; extra == 'all'
35
- Requires-Dist: google-cloud-bigquery; extra == 'all'
36
- Requires-Dist: psycopg2-binary; extra == 'all'
37
- Requires-Dist: snowflake-connector-python; extra == 'all'
38
- Provides-Extra: bigquery
39
- Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
40
- Provides-Extra: dev
41
- Requires-Dist: duckdb; extra == 'dev'
42
- Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
43
- Requires-Dist: pytest-cov>=6.0; extra == 'dev'
44
- Requires-Dist: pytest>=8.0; extra == 'dev'
45
- Requires-Dist: ruff>=0.8.0; extra == 'dev'
46
- Provides-Extra: duckdb
47
- Requires-Dist: duckdb; extra == 'duckdb'
48
- Provides-Extra: postgres
49
- Requires-Dist: psycopg2-binary; extra == 'postgres'
50
- Provides-Extra: snowflake
51
- Requires-Dist: snowflake-connector-python; extra == 'snowflake'
52
- Description-Content-Type: text/markdown
53
-
54
1
  # agentic-data-contracts
55
2
 
56
3
  [![PyPI version](https://img.shields.io/pypi/v/agentic-data-contracts.svg)](https://pypi.org/project/agentic-data-contracts/)
@@ -58,16 +5,29 @@ Description-Content-Type: text/markdown
58
5
  [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://www.python.org/downloads/)
59
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
60
7
 
61
- **Stop your AI agents from running wild on your data.**
8
+ **YAML-first, domain-driven data governance for AI agents.**
62
9
 
63
- `agentic-data-contracts` lets data engineers define governance contracts in YAML what tables an agent may query, which operations are forbidden, what resource limits apply and enforces them automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
10
+ `agentic-data-contracts` takes a domain-driven approach to AI agent governance: instead of letting agents figure out your data landscape by trial and error, you teach them your business domains, metrics, and rules upfront — in YAML. The agent starts by understanding *what* a business domain means, then discovers *which* metrics to use, then builds queries that comply with your governance rules. All enforced automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
64
11
 
65
- **Why?** AI agents querying databases face two problems: **resource runaway** (unbounded compute, endless retries, cost overruns) and **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions). This library addresses both with a single YAML contract.
12
+ **Why domain-driven?** AI agents querying databases face three problems: **resource runaway** (unbounded compute, endless retries, cost overruns), **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions), and **lack of business context** (the agent doesn't know what "revenue" means in *your* company). This library addresses all three with a single YAML contract that combines governance rules with business domain knowledge.
66
13
 
67
14
  **Works with:** [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk-python) (primary target), or any Python agent framework. Optionally integrates with [ai-agent-contracts](https://pypi.org/project/ai-agent-contracts/) for formal resource governance.
68
15
 
69
16
  ## How It Works
70
17
 
18
+ The agent follows a domain-driven workflow — understanding business context before writing SQL:
19
+
20
+ ```
21
+ 1. Agent receives: "How is revenue trending?"
22
+ 2. lookup_domain("revenue") → "Revenue is recognized at fulfillment, not booking"
23
+ 3. lookup_metric("total_revenue") → SUM(amount) FILTER (WHERE status = 'completed')
24
+ 4. Agent writes SQL using the metric definition
25
+ 5. validate_query(sql) → VALID (passes all contract rules)
26
+ 6. run_query(sql) → results returned
27
+ ```
28
+
29
+ Governance rules are enforced automatically at query time:
30
+
71
31
  ```
72
32
  Agent: "SELECT * FROM analytics.orders"
73
33
  -> BLOCKED (no SELECT * — specify explicit columns)
@@ -77,12 +37,9 @@ Agent: "SELECT order_id, amount FROM analytics.orders"
77
37
 
78
38
  Agent: "SELECT order_id, amount FROM analytics.orders WHERE tenant_id = 'acme'"
79
39
  -> PASSED + WARN (consider using semantic revenue definition)
80
-
81
- Agent: "DELETE FROM analytics.orders WHERE id = 1"
82
- -> BLOCKED (forbidden operation: DELETE)
83
40
  ```
84
41
 
85
- The contract defines the rules. The library enforces them — before the query ever reaches the database.
42
+ The contract defines the domains, metrics, and rules. The library enforces them — before the query ever reaches the database.
86
43
 
87
44
  ## Installation
88
45
 
@@ -117,10 +74,19 @@ semantic:
117
74
  path: "./semantic.yml"
118
75
  allowed_tables:
119
76
  - schema: analytics
77
+ description: "Curated analytics tables — prefer for reporting"
78
+ preferred: true
120
79
  tables: ["*"] # all tables in schema (discovered from database)
121
80
  - schema: marketing
122
81
  tables: [campaigns] # or list specific tables
123
82
  forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
83
+ domains:
84
+ - name: revenue
85
+ summary: "Financial metrics from completed orders"
86
+ description: >
87
+ Revenue is recognized at fulfillment, not at booking.
88
+ Excludes refunds and chargebacks unless stated.
89
+ metrics: [total_revenue]
124
90
  rules:
125
91
  - name: tenant_isolation
126
92
  description: "All queries must filter by tenant_id"
@@ -214,7 +180,7 @@ asyncio.run(demo())
214
180
 
215
181
  | Tool | Description |
216
182
  |------|-------------|
217
- | `list_schemas` | List all allowed database schemas from the contract |
183
+ | `list_schemas` | List allowed schemas with descriptions and preferred flags |
218
184
  | `list_tables` | List allowed tables, optionally filtered by schema |
219
185
  | `describe_table` | Get full column details for an allowed table |
220
186
  | `preview_table` | Preview sample rows from an allowed table |
@@ -227,6 +193,57 @@ asyncio.run(demo())
227
193
  | `run_query` | Validate and execute a SQL query, returning results |
228
194
  | `get_contract_info` | Get the full contract: rules, limits, domains, and session status |
229
195
 
196
+ ## Domain-Driven Agent Workflow
197
+
198
+ The core design principle: **agents should understand the business domain before writing SQL.** Instead of dumping table schemas and hoping for the best, the contract teaches the agent your business vocabulary through progressive disclosure:
199
+
200
+ ```
201
+ 1. Domain context → "What does 'revenue' mean here?"
202
+ 2. Metric definitions → "How is 'total_revenue' calculated?"
203
+ 3. Query execution → "Run the validated SQL"
204
+ ```
205
+
206
+ ### Defining domains
207
+
208
+ Each domain carries a description that teaches the agent your business rules — things the SQL alone can't express:
209
+
210
+ ```yaml
211
+ semantic:
212
+ domains:
213
+ - name: acquisition
214
+ summary: "Customer acquisition costs and conversion metrics"
215
+ description: >
216
+ Acquisition metrics track the cost and efficiency of
217
+ acquiring new customers across all channels.
218
+ CAC is calculated using fully-loaded cost, not just ad spend.
219
+ metrics: [CAC, CPA, CPL, click_through_rate]
220
+ - name: retention
221
+ summary: "Customer retention, churn, and lifetime value"
222
+ description: >
223
+ Retention metrics measure how well we keep customers.
224
+ Churn is measured on a 30-day rolling window.
225
+ A customer is "active" if they had at least one qualifying
226
+ action in the window.
227
+ metrics: [churn_rate, LTV, retention_30d]
228
+ ```
229
+
230
+ ### How the agent uses domains
231
+
232
+ The system prompt gives the agent a compact domain index. When a user asks a domain-specific question, the agent explores progressively:
233
+
234
+ ```
235
+ lookup_domain("acquisition") → business context + metric descriptions
236
+ lookup_metric("CAC") → SQL expression, source table, filters
237
+ lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
238
+ list_metrics(domain="retention") → all metrics in the retention domain
239
+ ```
240
+
241
+ This means the agent knows that "revenue is recognized at fulfillment, not at booking" *before* it writes a single line of SQL — reducing hallucinated metrics and incorrect calculations.
242
+
243
+ ### Why progressive disclosure works
244
+
245
+ This pattern — compact index in the prompt, detailed context on demand — is the same philosophy validated by agent skill systems, MCP tool servers, and RAG architectures. Instead of overloading the agent's context window with everything upfront, you give it just enough to know *where to look*, then let it pull details when needed. The result is better token efficiency, more focused reasoning, and fewer hallucinations from context overload.
246
+
230
247
  ## Contract Rules
231
248
 
232
249
  Rules are enforced at three levels:
@@ -395,36 +412,6 @@ dc = DataContract.from_yaml("contract.yml")
395
412
  print(dc.to_system_prompt(renderer=MarkdownRenderer()))
396
413
  ```
397
414
 
398
- ## Business Domains
399
-
400
- Domains provide business context that helps agents understand *what* they're being asked about before getting into the mechanics of *how* to calculate it:
401
-
402
- ```yaml
403
- semantic:
404
- domains:
405
- - name: acquisition
406
- summary: "Customer acquisition costs and conversion metrics"
407
- description: >
408
- Acquisition metrics track the cost and efficiency of
409
- acquiring new customers across all channels.
410
- metrics: [CAC, CPA, CPL, click_through_rate]
411
- - name: retention
412
- summary: "Customer retention, churn, and lifetime value"
413
- description: >
414
- Retention metrics measure how well we keep customers.
415
- Churn is measured on a 30-day rolling window.
416
- metrics: [churn_rate, LTV, retention_30d]
417
- ```
418
-
419
- The system prompt shows a compact domain index. The agent uses `lookup_domain` for business context, then `lookup_metric` for SQL definitions:
420
-
421
- ```
422
- lookup_domain("acquisition") → full description + metrics with descriptions
423
- lookup_metric("CAC") → exact match, SQL definition
424
- lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
425
- list_metrics(domain="retention") → only retention metrics
426
- ```
427
-
428
415
  ## Scaling to Large Organizations
429
416
 
430
417
  Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
@@ -1,7 +1,7 @@
1
1
  # Agentic Data Contracts — Architecture
2
2
 
3
3
  **Date:** 2026-04-13
4
- **Status:** Implemented (v0.9.0)
4
+ **Status:** Implemented (v0.9.2)
5
5
  **Author:** Qing Ye + Claude
6
6
 
7
7
  ## Problem Statement
@@ -93,6 +93,8 @@ semantic:
93
93
  # What the agent is allowed to access
94
94
  allowed_tables:
95
95
  - schema: analytics
96
+ description: "Curated analytics tables — prefer for reporting"
97
+ preferred: true # agent should prefer this schema
96
98
  tables: [orders, customers, subscriptions]
97
99
  - schema: raw
98
100
  tables: [] # empty = nothing from this schema
@@ -186,7 +188,7 @@ When `ai-agent-contracts` is NOT installed, `ContractSession` provides self-cont
186
188
 
187
189
  - **Retry count** — incremented on each failed query attempt, checked against `max_retries`
188
190
  - **Token usage** — tracked via callback, checked against `token_budget`
189
- - **Wall-clock duration** — start time recorded, checked against `max_duration_seconds`
191
+ - **Wall-clock duration** — lazy start on first `check_limits()` call (not at construction), checked against `max_duration_seconds`. Can be reset via `reset_timer()` for frameworks that manage their own idle timeouts.
190
192
  - **Cost estimate** — if EXPLAIN adapter returns cost info, checked against `cost_limit_usd`
191
193
 
192
194
  These are simple counters/timers with guard checks before each tool call. No formal state machine.
@@ -86,6 +86,18 @@ async def _run_demo(tools: list, prompt: str) -> None:
86
86
  print("\n=== Available Tables ===")
87
87
  print(result["content"][0]["text"])
88
88
 
89
+ # Domain discovery: understand the business context before querying
90
+ tool = next(t for t in tools if t.name == "lookup_domain")
91
+ result = await tool.callable({"name": "revenue"})
92
+ print("\n=== Lookup Domain (revenue) ===")
93
+ print(result["content"][0]["text"])
94
+
95
+ # Metric lookup: get the SQL definition for a specific metric
96
+ tool = next(t for t in tools if t.name == "lookup_metric")
97
+ result = await tool.callable({"metric_name": "total_revenue"})
98
+ print("\n=== Lookup Metric (total_revenue) ===")
99
+ print(result["content"][0]["text"])
100
+
89
101
  tool = next(t for t in tools if t.name == "validate_query")
90
102
  sql = (
91
103
  "SELECT c.region, SUM(o.amount) as revenue "
@@ -7,20 +7,33 @@ semantic:
7
7
  path: "./semantic.yml"
8
8
  allowed_tables:
9
9
  - schema: analytics
10
+ description: "Curated analytics tables — prefer these for all reporting queries"
11
+ preferred: true
10
12
  tables: [orders, customers, subscriptions]
11
13
  forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
12
14
  domains:
13
- revenue: [total_revenue, revenue_by_region]
15
+ - name: revenue
16
+ summary: "Financial metrics from completed orders"
17
+ description: >
18
+ Revenue metrics track recognized revenue from completed orders.
19
+ Revenue is recognized at fulfillment, not at booking.
20
+ Excludes refunds, chargebacks, and pending orders unless stated.
21
+ metrics: [total_revenue, revenue_by_region]
22
+ tables: [analytics.orders, analytics.customers]
14
23
  rules:
15
24
  - name: tenant_isolation
16
25
  description: "All queries must filter by tenant_id"
17
26
  enforcement: block
27
+ query_check:
28
+ required_filter: tenant_id
18
29
  - name: use_semantic_revenue
19
30
  description: "Revenue calculations must use the metric definitions"
20
31
  enforcement: warn
21
32
  - name: no_select_star
22
33
  description: "Must specify explicit columns"
23
34
  enforcement: block
35
+ query_check:
36
+ no_select_star: true
24
37
 
25
38
  resources:
26
39
  cost_limit_usd: 5.00
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "agentic-data-contracts"
3
- version = "0.9.0"
4
- description = "YAML-first data contract governance for AI agents"
3
+ version = "0.9.2"
4
+ description = "YAML-first, domain-driven data governance for AI agents"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
7
7
  authors = [{ name = "Qing", email = "qingye779@gmail.com" }]
@@ -10,9 +10,11 @@ keywords = [
10
10
  "ai-agents",
11
11
  "data-contracts",
12
12
  "data-governance",
13
+ "domain-driven",
13
14
  "sql-validation",
14
15
  "llm",
15
16
  "claude",
17
+ "semantic-layer",
16
18
  "analytics",
17
19
  "dbt",
18
20
  ]
@@ -30,7 +32,7 @@ classifiers = [
30
32
  dependencies = [
31
33
  "sqlglot>=23.0",
32
34
  "pydantic>=2.0",
33
- "pyyaml>=6.0",
35
+ "pyyaml>=6.0.3",
34
36
  "thefuzz>=0.22.1",
35
37
  ]
36
38
 
@@ -41,17 +43,17 @@ Issues = "https://github.com/flyersworder/agentic-data-contracts/issues"
41
43
  Documentation = "https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md"
42
44
 
43
45
  [project.optional-dependencies]
44
- agent-sdk = ["claude-agent-sdk>=0.1.52"]
45
- agent-contracts = ["ai-agent-contracts>=0.2.0"]
46
+ agent-sdk = ["claude-agent-sdk>=0.1.58"]
47
+ agent-contracts = ["ai-agent-contracts>=0.3.0"]
46
48
  bigquery = ["google-cloud-bigquery"]
47
49
  snowflake = ["snowflake-connector-python"]
48
50
  postgres = ["psycopg2-binary"]
49
51
  duckdb = ["duckdb"]
50
52
  dev = [
51
- "pytest>=8.0",
52
- "pytest-asyncio>=0.23",
53
- "pytest-cov>=6.0",
54
- "ruff>=0.8.0",
53
+ "pytest>=9.0.3",
54
+ "pytest-asyncio>=1.3.0",
55
+ "pytest-cov>=7.1.0",
56
+ "ruff>=0.15.10",
55
57
  "duckdb",
56
58
  ]
57
59
  all = [
@@ -22,6 +22,8 @@ class SemanticSource(BaseModel):
22
22
  class AllowedTable(BaseModel):
23
23
  schema_: str = Field(alias="schema")
24
24
  tables: list[str] = Field(default_factory=list)
25
+ description: str | None = None
26
+ preferred: bool = False
25
27
 
26
28
  model_config = {"populate_by_name": True}
27
29