agentic-data-contracts 0.9.0__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/CHANGELOG.md +19 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/CLAUDE.md +1 -1
- agentic_data_contracts-0.9.0/README.md → agentic_data_contracts-0.9.2/PKG-INFO +131 -38
- agentic_data_contracts-0.9.0/PKG-INFO → agentic_data_contracts-0.9.2/README.md +78 -91
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/docs/architecture.md +4 -2
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/agent.py +12 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/contract.yml +14 -1
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/pyproject.toml +11 -9
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/schema.py +2 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/session.py +13 -1
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/factory.py +11 -5
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/sdk.py +1 -1
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_domain_model.py +19 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_session.py +59 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_factory.py +6 -1
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_semantic_tools.py +45 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/uv.lock +30 -30
- agentic_data_contracts-0.9.0/docs/superpowers/plans/2026-04-13-domain-redesign.md +0 -1276
- agentic_data_contracts-0.9.0/docs/superpowers/specs/2026-04-13-domain-redesign-design.md +0 -176
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.github/dependabot.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.github/workflows/ci.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.gitignore +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.pre-commit-config.yaml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/.python-version +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/LICENSE +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/semantic.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/setup_db.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/_normalizer.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/base.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/adapters/duckdb.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/bridge/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/bridge/compiler.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/contract.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/prompt.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/py.typed +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/base.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/cube.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/dbt.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/semantic/yaml_source.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/tools/middleware.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/checkers.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/explain.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/validation/validator.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/conftest.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/minimal_contract.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/relationships_checker.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/sample_cube_schema.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/sample_dbt_manifest.json +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/semantic_source.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/fixtures/valid_contract.yml +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_adapters/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_adapters/test_duckdb.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_bridge/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_bridge/test_compiler.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_contract.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_load_semantic_source.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_prompt_renderers.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_scalability.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_schema.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_sdk_config.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_system_prompt_metrics.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_core/test_wildcard_tables.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_public_api.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_cube.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_dbt.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_relationships.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_search.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_semantic/test_yaml_source.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_auto_load.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_middleware.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_pagination.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_relationship_tools.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_sdk.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_tools/test_wildcard_tools.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/__init__.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_checkers.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_explain.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_relationship_checker.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_result_checks.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_sql_normalizer.py +0 -0
- {agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/tests/test_validation/test_validator.py +0 -0
|
@@ -2,6 +2,25 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.9.2] - 2026-04-15
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- **Lazy session timer**: `ContractSession` no longer starts its wall-clock timer at construction. The timer now starts on the first `check_limits()` call, so idle time before the user's first interaction does not count against `temporal.max_duration_seconds`. This fixes premature "session expired" errors in long-lived agent setups (Chainlit, Webex bots) where the session object is created well before the first user message. (#16)
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **`ContractSession.reset_timer()`**: New method that resets the duration timer so it restarts on the next `check_limits()` call. Useful for frameworks with their own idle-timeout mechanisms that want to restart the clock on user activity.
|
|
14
|
+
|
|
15
|
+
## [0.9.1] - 2026-04-13
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
|
|
19
|
+
- **Schema `description` field**: Optional description on `AllowedTable` entries, surfaced via `list_schemas` to help agents understand what each schema contains and when to use it.
|
|
20
|
+
- **Schema `preferred` flag**: Optional boolean on `AllowedTable` (default `false`), surfaced via `list_schemas` to signal which schema the agent should prefer when similar tables exist across schemas.
|
|
21
|
+
- **Example improvements**: Revenue agent example updated with `lookup_domain` and `lookup_metric` demo steps, schema description/preferred in contract, and fixed pre-existing missing `query_check` blocks on `tenant_isolation` and `no_select_star` rules.
|
|
22
|
+
- **Domain-driven README**: README reframed around the domain-driven approach — agents understand business domains before writing SQL.
|
|
23
|
+
|
|
5
24
|
## [0.9.0] - 2026-04-13
|
|
6
25
|
|
|
7
26
|
### Added
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
src/agentic_data_contracts/
|
|
19
19
|
├── core/ # YAML loading, Pydantic models, lightweight enforcement
|
|
20
20
|
├── validation/ # sqlglot checkers, Validator (Layer 1 + 2), EXPLAIN protocol
|
|
21
|
-
├── tools/ #
|
|
21
|
+
├── tools/ # 12-tool factory + middleware for Claude Agent SDK
|
|
22
22
|
├── semantic/ # dbt/Cube/YAML source integrations
|
|
23
23
|
├── adapters/ # DatabaseAdapter protocol + DuckDB implementation
|
|
24
24
|
└── bridge/ # Optional ai-agent-contracts compilation
|
|
@@ -1,3 +1,56 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentic-data-contracts
|
|
3
|
+
Version: 0.9.2
|
|
4
|
+
Summary: YAML-first, domain-driven data governance for AI agents
|
|
5
|
+
Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
|
|
6
|
+
Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
|
|
7
|
+
Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
|
|
9
|
+
Author-email: Qing <qingye779@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,domain-driven,llm,semantic-layer,sql-validation
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.12
|
|
23
|
+
Requires-Dist: pydantic>=2.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
25
|
+
Requires-Dist: sqlglot>=23.0
|
|
26
|
+
Requires-Dist: thefuzz>=0.22.1
|
|
27
|
+
Provides-Extra: agent-contracts
|
|
28
|
+
Requires-Dist: ai-agent-contracts>=0.3.0; extra == 'agent-contracts'
|
|
29
|
+
Provides-Extra: agent-sdk
|
|
30
|
+
Requires-Dist: claude-agent-sdk>=0.1.58; extra == 'agent-sdk'
|
|
31
|
+
Provides-Extra: all
|
|
32
|
+
Requires-Dist: ai-agent-contracts>=0.3.0; extra == 'all'
|
|
33
|
+
Requires-Dist: claude-agent-sdk>=0.1.58; extra == 'all'
|
|
34
|
+
Requires-Dist: duckdb; extra == 'all'
|
|
35
|
+
Requires-Dist: google-cloud-bigquery; extra == 'all'
|
|
36
|
+
Requires-Dist: psycopg2-binary; extra == 'all'
|
|
37
|
+
Requires-Dist: snowflake-connector-python; extra == 'all'
|
|
38
|
+
Provides-Extra: bigquery
|
|
39
|
+
Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: duckdb; extra == 'dev'
|
|
42
|
+
Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
|
|
43
|
+
Requires-Dist: pytest-cov>=7.1.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: pytest>=9.0.3; extra == 'dev'
|
|
45
|
+
Requires-Dist: ruff>=0.15.10; extra == 'dev'
|
|
46
|
+
Provides-Extra: duckdb
|
|
47
|
+
Requires-Dist: duckdb; extra == 'duckdb'
|
|
48
|
+
Provides-Extra: postgres
|
|
49
|
+
Requires-Dist: psycopg2-binary; extra == 'postgres'
|
|
50
|
+
Provides-Extra: snowflake
|
|
51
|
+
Requires-Dist: snowflake-connector-python; extra == 'snowflake'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
1
54
|
# agentic-data-contracts
|
|
2
55
|
|
|
3
56
|
[](https://pypi.org/project/agentic-data-contracts/)
|
|
@@ -5,16 +58,29 @@
|
|
|
5
58
|
[](https://www.python.org/downloads/)
|
|
6
59
|
[](https://opensource.org/licenses/MIT)
|
|
7
60
|
|
|
8
|
-
**
|
|
61
|
+
**YAML-first, domain-driven data governance for AI agents.**
|
|
9
62
|
|
|
10
|
-
`agentic-data-contracts`
|
|
63
|
+
`agentic-data-contracts` takes a domain-driven approach to AI agent governance: instead of letting agents figure out your data landscape by trial and error, you teach them your business domains, metrics, and rules upfront — in YAML. The agent starts by understanding *what* a business domain means, then discovers *which* metrics to use, then builds queries that comply with your governance rules. All enforced automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
|
|
11
64
|
|
|
12
|
-
**Why?** AI agents querying databases face
|
|
65
|
+
**Why domain-driven?** AI agents querying databases face three problems: **resource runaway** (unbounded compute, endless retries, cost overruns), **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions), and **lack of business context** (the agent doesn't know what "revenue" means in *your* company). This library addresses all three with a single YAML contract that combines governance rules with business domain knowledge.
|
|
13
66
|
|
|
14
67
|
**Works with:** [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk-python) (primary target), or any Python agent framework. Optionally integrates with [ai-agent-contracts](https://pypi.org/project/ai-agent-contracts/) for formal resource governance.
|
|
15
68
|
|
|
16
69
|
## How It Works
|
|
17
70
|
|
|
71
|
+
The agent follows a domain-driven workflow — understanding business context before writing SQL:
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
1. Agent receives: "How is revenue trending?"
|
|
75
|
+
2. lookup_domain("revenue") → "Revenue is recognized at fulfillment, not booking"
|
|
76
|
+
3. lookup_metric("total_revenue") → SUM(amount) FILTER (WHERE status = 'completed')
|
|
77
|
+
4. Agent writes SQL using the metric definition
|
|
78
|
+
5. validate_query(sql) → VALID (passes all contract rules)
|
|
79
|
+
6. run_query(sql) → results returned
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Governance rules are enforced automatically at query time:
|
|
83
|
+
|
|
18
84
|
```
|
|
19
85
|
Agent: "SELECT * FROM analytics.orders"
|
|
20
86
|
-> BLOCKED (no SELECT * — specify explicit columns)
|
|
@@ -24,12 +90,9 @@ Agent: "SELECT order_id, amount FROM analytics.orders"
|
|
|
24
90
|
|
|
25
91
|
Agent: "SELECT order_id, amount FROM analytics.orders WHERE tenant_id = 'acme'"
|
|
26
92
|
-> PASSED + WARN (consider using semantic revenue definition)
|
|
27
|
-
|
|
28
|
-
Agent: "DELETE FROM analytics.orders WHERE id = 1"
|
|
29
|
-
-> BLOCKED (forbidden operation: DELETE)
|
|
30
93
|
```
|
|
31
94
|
|
|
32
|
-
The contract defines the rules. The library enforces them — before the query ever reaches the database.
|
|
95
|
+
The contract defines the domains, metrics, and rules. The library enforces them — before the query ever reaches the database.
|
|
33
96
|
|
|
34
97
|
## Installation
|
|
35
98
|
|
|
@@ -64,10 +127,19 @@ semantic:
|
|
|
64
127
|
path: "./semantic.yml"
|
|
65
128
|
allowed_tables:
|
|
66
129
|
- schema: analytics
|
|
130
|
+
description: "Curated analytics tables — prefer for reporting"
|
|
131
|
+
preferred: true
|
|
67
132
|
tables: ["*"] # all tables in schema (discovered from database)
|
|
68
133
|
- schema: marketing
|
|
69
134
|
tables: [campaigns] # or list specific tables
|
|
70
135
|
forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
|
|
136
|
+
domains:
|
|
137
|
+
- name: revenue
|
|
138
|
+
summary: "Financial metrics from completed orders"
|
|
139
|
+
description: >
|
|
140
|
+
Revenue is recognized at fulfillment, not at booking.
|
|
141
|
+
Excludes refunds and chargebacks unless stated.
|
|
142
|
+
metrics: [total_revenue]
|
|
71
143
|
rules:
|
|
72
144
|
- name: tenant_isolation
|
|
73
145
|
description: "All queries must filter by tenant_id"
|
|
@@ -161,7 +233,7 @@ asyncio.run(demo())
|
|
|
161
233
|
|
|
162
234
|
| Tool | Description |
|
|
163
235
|
|------|-------------|
|
|
164
|
-
| `list_schemas` | List
|
|
236
|
+
| `list_schemas` | List allowed schemas with descriptions and preferred flags |
|
|
165
237
|
| `list_tables` | List allowed tables, optionally filtered by schema |
|
|
166
238
|
| `describe_table` | Get full column details for an allowed table |
|
|
167
239
|
| `preview_table` | Preview sample rows from an allowed table |
|
|
@@ -174,6 +246,57 @@ asyncio.run(demo())
|
|
|
174
246
|
| `run_query` | Validate and execute a SQL query, returning results |
|
|
175
247
|
| `get_contract_info` | Get the full contract: rules, limits, domains, and session status |
|
|
176
248
|
|
|
249
|
+
## Domain-Driven Agent Workflow
|
|
250
|
+
|
|
251
|
+
The core design principle: **agents should understand the business domain before writing SQL.** Instead of dumping table schemas and hoping for the best, the contract teaches the agent your business vocabulary through progressive disclosure:
|
|
252
|
+
|
|
253
|
+
```
|
|
254
|
+
1. Domain context → "What does 'revenue' mean here?"
|
|
255
|
+
2. Metric definitions → "How is 'total_revenue' calculated?"
|
|
256
|
+
3. Query execution → "Run the validated SQL"
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Defining domains
|
|
260
|
+
|
|
261
|
+
Each domain carries a description that teaches the agent your business rules — things the SQL alone can't express:
|
|
262
|
+
|
|
263
|
+
```yaml
|
|
264
|
+
semantic:
|
|
265
|
+
domains:
|
|
266
|
+
- name: acquisition
|
|
267
|
+
summary: "Customer acquisition costs and conversion metrics"
|
|
268
|
+
description: >
|
|
269
|
+
Acquisition metrics track the cost and efficiency of
|
|
270
|
+
acquiring new customers across all channels.
|
|
271
|
+
CAC is calculated using fully-loaded cost, not just ad spend.
|
|
272
|
+
metrics: [CAC, CPA, CPL, click_through_rate]
|
|
273
|
+
- name: retention
|
|
274
|
+
summary: "Customer retention, churn, and lifetime value"
|
|
275
|
+
description: >
|
|
276
|
+
Retention metrics measure how well we keep customers.
|
|
277
|
+
Churn is measured on a 30-day rolling window.
|
|
278
|
+
A customer is "active" if they had at least one qualifying
|
|
279
|
+
action in the window.
|
|
280
|
+
metrics: [churn_rate, LTV, retention_30d]
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### How the agent uses domains
|
|
284
|
+
|
|
285
|
+
The system prompt gives the agent a compact domain index. When a user asks a domain-specific question, the agent explores progressively:
|
|
286
|
+
|
|
287
|
+
```
|
|
288
|
+
lookup_domain("acquisition") → business context + metric descriptions
|
|
289
|
+
lookup_metric("CAC") → SQL expression, source table, filters
|
|
290
|
+
lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
|
|
291
|
+
list_metrics(domain="retention") → all metrics in the retention domain
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
This means the agent knows that "revenue is recognized at fulfillment, not at booking" *before* it writes a single line of SQL — reducing hallucinated metrics and incorrect calculations.
|
|
295
|
+
|
|
296
|
+
### Why progressive disclosure works
|
|
297
|
+
|
|
298
|
+
This pattern — compact index in the prompt, detailed context on demand — is the same philosophy validated by agent skill systems, MCP tool servers, and RAG architectures. Instead of overloading the agent's context window with everything upfront, you give it just enough to know *where to look*, then let it pull details when needed. The result is better token efficiency, more focused reasoning, and fewer hallucinations from context overload.
|
|
299
|
+
|
|
177
300
|
## Contract Rules
|
|
178
301
|
|
|
179
302
|
Rules are enforced at three levels:
|
|
@@ -342,36 +465,6 @@ dc = DataContract.from_yaml("contract.yml")
|
|
|
342
465
|
print(dc.to_system_prompt(renderer=MarkdownRenderer()))
|
|
343
466
|
```
|
|
344
467
|
|
|
345
|
-
## Business Domains
|
|
346
|
-
|
|
347
|
-
Domains provide business context that helps agents understand *what* they're being asked about before getting into the mechanics of *how* to calculate it:
|
|
348
|
-
|
|
349
|
-
```yaml
|
|
350
|
-
semantic:
|
|
351
|
-
domains:
|
|
352
|
-
- name: acquisition
|
|
353
|
-
summary: "Customer acquisition costs and conversion metrics"
|
|
354
|
-
description: >
|
|
355
|
-
Acquisition metrics track the cost and efficiency of
|
|
356
|
-
acquiring new customers across all channels.
|
|
357
|
-
metrics: [CAC, CPA, CPL, click_through_rate]
|
|
358
|
-
- name: retention
|
|
359
|
-
summary: "Customer retention, churn, and lifetime value"
|
|
360
|
-
description: >
|
|
361
|
-
Retention metrics measure how well we keep customers.
|
|
362
|
-
Churn is measured on a 30-day rolling window.
|
|
363
|
-
metrics: [churn_rate, LTV, retention_30d]
|
|
364
|
-
```
|
|
365
|
-
|
|
366
|
-
The system prompt shows a compact domain index. The agent uses `lookup_domain` for business context, then `lookup_metric` for SQL definitions:
|
|
367
|
-
|
|
368
|
-
```
|
|
369
|
-
lookup_domain("acquisition") → full description + metrics with descriptions
|
|
370
|
-
lookup_metric("CAC") → exact match, SQL definition
|
|
371
|
-
lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
|
|
372
|
-
list_metrics(domain="retention") → only retention metrics
|
|
373
|
-
```
|
|
374
|
-
|
|
375
468
|
## Scaling to Large Organizations
|
|
376
469
|
|
|
377
470
|
Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
|
|
@@ -1,56 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: agentic-data-contracts
|
|
3
|
-
Version: 0.9.0
|
|
4
|
-
Summary: YAML-first data contract governance for AI agents
|
|
5
|
-
Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
|
|
6
|
-
Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
|
|
7
|
-
Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
|
|
8
|
-
Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
|
|
9
|
-
Author-email: Qing <qingye779@gmail.com>
|
|
10
|
-
License: MIT
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,llm,sql-validation
|
|
13
|
-
Classifier: Development Status :: 4 - Beta
|
|
14
|
-
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: Intended Audience :: Science/Research
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
-
Classifier: Topic :: Database
|
|
20
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
-
Requires-Python: >=3.12
|
|
23
|
-
Requires-Dist: pydantic>=2.0
|
|
24
|
-
Requires-Dist: pyyaml>=6.0
|
|
25
|
-
Requires-Dist: sqlglot>=23.0
|
|
26
|
-
Requires-Dist: thefuzz>=0.22.1
|
|
27
|
-
Provides-Extra: agent-contracts
|
|
28
|
-
Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'agent-contracts'
|
|
29
|
-
Provides-Extra: agent-sdk
|
|
30
|
-
Requires-Dist: claude-agent-sdk>=0.1.52; extra == 'agent-sdk'
|
|
31
|
-
Provides-Extra: all
|
|
32
|
-
Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'all'
|
|
33
|
-
Requires-Dist: claude-agent-sdk>=0.1.52; extra == 'all'
|
|
34
|
-
Requires-Dist: duckdb; extra == 'all'
|
|
35
|
-
Requires-Dist: google-cloud-bigquery; extra == 'all'
|
|
36
|
-
Requires-Dist: psycopg2-binary; extra == 'all'
|
|
37
|
-
Requires-Dist: snowflake-connector-python; extra == 'all'
|
|
38
|
-
Provides-Extra: bigquery
|
|
39
|
-
Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
|
|
40
|
-
Provides-Extra: dev
|
|
41
|
-
Requires-Dist: duckdb; extra == 'dev'
|
|
42
|
-
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
43
|
-
Requires-Dist: pytest-cov>=6.0; extra == 'dev'
|
|
44
|
-
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
45
|
-
Requires-Dist: ruff>=0.8.0; extra == 'dev'
|
|
46
|
-
Provides-Extra: duckdb
|
|
47
|
-
Requires-Dist: duckdb; extra == 'duckdb'
|
|
48
|
-
Provides-Extra: postgres
|
|
49
|
-
Requires-Dist: psycopg2-binary; extra == 'postgres'
|
|
50
|
-
Provides-Extra: snowflake
|
|
51
|
-
Requires-Dist: snowflake-connector-python; extra == 'snowflake'
|
|
52
|
-
Description-Content-Type: text/markdown
|
|
53
|
-
|
|
54
1
|
# agentic-data-contracts
|
|
55
2
|
|
|
56
3
|
[](https://pypi.org/project/agentic-data-contracts/)
|
|
@@ -58,16 +5,29 @@ Description-Content-Type: text/markdown
|
|
|
58
5
|
[](https://www.python.org/downloads/)
|
|
59
6
|
[](https://opensource.org/licenses/MIT)
|
|
60
7
|
|
|
61
|
-
**
|
|
8
|
+
**YAML-first, domain-driven data governance for AI agents.**
|
|
62
9
|
|
|
63
|
-
`agentic-data-contracts`
|
|
10
|
+
`agentic-data-contracts` takes a domain-driven approach to AI agent governance: instead of letting agents figure out your data landscape by trial and error, you teach them your business domains, metrics, and rules upfront — in YAML. The agent starts by understanding *what* a business domain means, then discovers *which* metrics to use, then builds queries that comply with your governance rules. All enforced automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
|
|
64
11
|
|
|
65
|
-
**Why?** AI agents querying databases face
|
|
12
|
+
**Why domain-driven?** AI agents querying databases face three problems: **resource runaway** (unbounded compute, endless retries, cost overruns), **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions), and **lack of business context** (the agent doesn't know what "revenue" means in *your* company). This library addresses all three with a single YAML contract that combines governance rules with business domain knowledge.
|
|
66
13
|
|
|
67
14
|
**Works with:** [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk-python) (primary target), or any Python agent framework. Optionally integrates with [ai-agent-contracts](https://pypi.org/project/ai-agent-contracts/) for formal resource governance.
|
|
68
15
|
|
|
69
16
|
## How It Works
|
|
70
17
|
|
|
18
|
+
The agent follows a domain-driven workflow — understanding business context before writing SQL:
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
1. Agent receives: "How is revenue trending?"
|
|
22
|
+
2. lookup_domain("revenue") → "Revenue is recognized at fulfillment, not booking"
|
|
23
|
+
3. lookup_metric("total_revenue") → SUM(amount) FILTER (WHERE status = 'completed')
|
|
24
|
+
4. Agent writes SQL using the metric definition
|
|
25
|
+
5. validate_query(sql) → VALID (passes all contract rules)
|
|
26
|
+
6. run_query(sql) → results returned
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Governance rules are enforced automatically at query time:
|
|
30
|
+
|
|
71
31
|
```
|
|
72
32
|
Agent: "SELECT * FROM analytics.orders"
|
|
73
33
|
-> BLOCKED (no SELECT * — specify explicit columns)
|
|
@@ -77,12 +37,9 @@ Agent: "SELECT order_id, amount FROM analytics.orders"
|
|
|
77
37
|
|
|
78
38
|
Agent: "SELECT order_id, amount FROM analytics.orders WHERE tenant_id = 'acme'"
|
|
79
39
|
-> PASSED + WARN (consider using semantic revenue definition)
|
|
80
|
-
|
|
81
|
-
Agent: "DELETE FROM analytics.orders WHERE id = 1"
|
|
82
|
-
-> BLOCKED (forbidden operation: DELETE)
|
|
83
40
|
```
|
|
84
41
|
|
|
85
|
-
The contract defines the rules. The library enforces them — before the query ever reaches the database.
|
|
42
|
+
The contract defines the domains, metrics, and rules. The library enforces them — before the query ever reaches the database.
|
|
86
43
|
|
|
87
44
|
## Installation
|
|
88
45
|
|
|
@@ -117,10 +74,19 @@ semantic:
|
|
|
117
74
|
path: "./semantic.yml"
|
|
118
75
|
allowed_tables:
|
|
119
76
|
- schema: analytics
|
|
77
|
+
description: "Curated analytics tables — prefer for reporting"
|
|
78
|
+
preferred: true
|
|
120
79
|
tables: ["*"] # all tables in schema (discovered from database)
|
|
121
80
|
- schema: marketing
|
|
122
81
|
tables: [campaigns] # or list specific tables
|
|
123
82
|
forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
|
|
83
|
+
domains:
|
|
84
|
+
- name: revenue
|
|
85
|
+
summary: "Financial metrics from completed orders"
|
|
86
|
+
description: >
|
|
87
|
+
Revenue is recognized at fulfillment, not at booking.
|
|
88
|
+
Excludes refunds and chargebacks unless stated.
|
|
89
|
+
metrics: [total_revenue]
|
|
124
90
|
rules:
|
|
125
91
|
- name: tenant_isolation
|
|
126
92
|
description: "All queries must filter by tenant_id"
|
|
@@ -214,7 +180,7 @@ asyncio.run(demo())
|
|
|
214
180
|
|
|
215
181
|
| Tool | Description |
|
|
216
182
|
|------|-------------|
|
|
217
|
-
| `list_schemas` | List
|
|
183
|
+
| `list_schemas` | List allowed schemas with descriptions and preferred flags |
|
|
218
184
|
| `list_tables` | List allowed tables, optionally filtered by schema |
|
|
219
185
|
| `describe_table` | Get full column details for an allowed table |
|
|
220
186
|
| `preview_table` | Preview sample rows from an allowed table |
|
|
@@ -227,6 +193,57 @@ asyncio.run(demo())
|
|
|
227
193
|
| `run_query` | Validate and execute a SQL query, returning results |
|
|
228
194
|
| `get_contract_info` | Get the full contract: rules, limits, domains, and session status |
|
|
229
195
|
|
|
196
|
+
## Domain-Driven Agent Workflow
|
|
197
|
+
|
|
198
|
+
The core design principle: **agents should understand the business domain before writing SQL.** Instead of dumping table schemas and hoping for the best, the contract teaches the agent your business vocabulary through progressive disclosure:
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
1. Domain context → "What does 'revenue' mean here?"
|
|
202
|
+
2. Metric definitions → "How is 'total_revenue' calculated?"
|
|
203
|
+
3. Query execution → "Run the validated SQL"
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Defining domains
|
|
207
|
+
|
|
208
|
+
Each domain carries a description that teaches the agent your business rules — things the SQL alone can't express:
|
|
209
|
+
|
|
210
|
+
```yaml
|
|
211
|
+
semantic:
|
|
212
|
+
domains:
|
|
213
|
+
- name: acquisition
|
|
214
|
+
summary: "Customer acquisition costs and conversion metrics"
|
|
215
|
+
description: >
|
|
216
|
+
Acquisition metrics track the cost and efficiency of
|
|
217
|
+
acquiring new customers across all channels.
|
|
218
|
+
CAC is calculated using fully-loaded cost, not just ad spend.
|
|
219
|
+
metrics: [CAC, CPA, CPL, click_through_rate]
|
|
220
|
+
- name: retention
|
|
221
|
+
summary: "Customer retention, churn, and lifetime value"
|
|
222
|
+
description: >
|
|
223
|
+
Retention metrics measure how well we keep customers.
|
|
224
|
+
Churn is measured on a 30-day rolling window.
|
|
225
|
+
A customer is "active" if they had at least one qualifying
|
|
226
|
+
action in the window.
|
|
227
|
+
metrics: [churn_rate, LTV, retention_30d]
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### How the agent uses domains
|
|
231
|
+
|
|
232
|
+
The system prompt gives the agent a compact domain index. When a user asks a domain-specific question, the agent explores progressively:
|
|
233
|
+
|
|
234
|
+
```
|
|
235
|
+
lookup_domain("acquisition") → business context + metric descriptions
|
|
236
|
+
lookup_metric("CAC") → SQL expression, source table, filters
|
|
237
|
+
lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
|
|
238
|
+
list_metrics(domain="retention") → all metrics in the retention domain
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
This means the agent knows that "revenue is recognized at fulfillment, not at booking" *before* it writes a single line of SQL — reducing hallucinated metrics and incorrect calculations.
|
|
242
|
+
|
|
243
|
+
### Why progressive disclosure works
|
|
244
|
+
|
|
245
|
+
This pattern — compact index in the prompt, detailed context on demand — is the same philosophy validated by agent skill systems, MCP tool servers, and RAG architectures. Instead of overloading the agent's context window with everything upfront, you give it just enough to know *where to look*, then let it pull details when needed. The result is better token efficiency, more focused reasoning, and fewer hallucinations from context overload.
|
|
246
|
+
|
|
230
247
|
## Contract Rules
|
|
231
248
|
|
|
232
249
|
Rules are enforced at three levels:
|
|
@@ -395,36 +412,6 @@ dc = DataContract.from_yaml("contract.yml")
|
|
|
395
412
|
print(dc.to_system_prompt(renderer=MarkdownRenderer()))
|
|
396
413
|
```
|
|
397
414
|
|
|
398
|
-
## Business Domains
|
|
399
|
-
|
|
400
|
-
Domains provide business context that helps agents understand *what* they're being asked about before getting into the mechanics of *how* to calculate it:
|
|
401
|
-
|
|
402
|
-
```yaml
|
|
403
|
-
semantic:
|
|
404
|
-
domains:
|
|
405
|
-
- name: acquisition
|
|
406
|
-
summary: "Customer acquisition costs and conversion metrics"
|
|
407
|
-
description: >
|
|
408
|
-
Acquisition metrics track the cost and efficiency of
|
|
409
|
-
acquiring new customers across all channels.
|
|
410
|
-
metrics: [CAC, CPA, CPL, click_through_rate]
|
|
411
|
-
- name: retention
|
|
412
|
-
summary: "Customer retention, churn, and lifetime value"
|
|
413
|
-
description: >
|
|
414
|
-
Retention metrics measure how well we keep customers.
|
|
415
|
-
Churn is measured on a 30-day rolling window.
|
|
416
|
-
metrics: [churn_rate, LTV, retention_30d]
|
|
417
|
-
```
|
|
418
|
-
|
|
419
|
-
The system prompt shows a compact domain index. The agent uses `lookup_domain` for business context, then `lookup_metric` for SQL definitions:
|
|
420
|
-
|
|
421
|
-
```
|
|
422
|
-
lookup_domain("acquisition") → full description + metrics with descriptions
|
|
423
|
-
lookup_metric("CAC") → exact match, SQL definition
|
|
424
|
-
lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candidates
|
|
425
|
-
list_metrics(domain="retention") → only retention metrics
|
|
426
|
-
```
|
|
427
|
-
|
|
428
415
|
## Scaling to Large Organizations
|
|
429
416
|
|
|
430
417
|
Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Agentic Data Contracts — Architecture
|
|
2
2
|
|
|
3
3
|
**Date:** 2026-04-13
|
|
4
|
-
**Status:** Implemented (v0.9.
|
|
4
|
+
**Status:** Implemented (v0.9.2)
|
|
5
5
|
**Author:** Qing Ye + Claude
|
|
6
6
|
|
|
7
7
|
## Problem Statement
|
|
@@ -93,6 +93,8 @@ semantic:
|
|
|
93
93
|
# What the agent is allowed to access
|
|
94
94
|
allowed_tables:
|
|
95
95
|
- schema: analytics
|
|
96
|
+
description: "Curated analytics tables — prefer for reporting"
|
|
97
|
+
preferred: true # agent should prefer this schema
|
|
96
98
|
tables: [orders, customers, subscriptions]
|
|
97
99
|
- schema: raw
|
|
98
100
|
tables: [] # empty = nothing from this schema
|
|
@@ -186,7 +188,7 @@ When `ai-agent-contracts` is NOT installed, `ContractSession` provides self-cont
|
|
|
186
188
|
|
|
187
189
|
- **Retry count** — incremented on each failed query attempt, checked against `max_retries`
|
|
188
190
|
- **Token usage** — tracked via callback, checked against `token_budget`
|
|
189
|
-
- **Wall-clock duration** — start
|
|
191
|
+
- **Wall-clock duration** — lazy start on first `check_limits()` call (not at construction), checked against `max_duration_seconds`. Can be reset via `reset_timer()` for frameworks that manage their own idle timeouts.
|
|
190
192
|
- **Cost estimate** — if EXPLAIN adapter returns cost info, checked against `cost_limit_usd`
|
|
191
193
|
|
|
192
194
|
These are simple counters/timers with guard checks before each tool call. No formal state machine.
|
{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/agent.py
RENAMED
|
@@ -86,6 +86,18 @@ async def _run_demo(tools: list, prompt: str) -> None:
|
|
|
86
86
|
print("\n=== Available Tables ===")
|
|
87
87
|
print(result["content"][0]["text"])
|
|
88
88
|
|
|
89
|
+
# Domain discovery: understand the business context before querying
|
|
90
|
+
tool = next(t for t in tools if t.name == "lookup_domain")
|
|
91
|
+
result = await tool.callable({"name": "revenue"})
|
|
92
|
+
print("\n=== Lookup Domain (revenue) ===")
|
|
93
|
+
print(result["content"][0]["text"])
|
|
94
|
+
|
|
95
|
+
# Metric lookup: get the SQL definition for a specific metric
|
|
96
|
+
tool = next(t for t in tools if t.name == "lookup_metric")
|
|
97
|
+
result = await tool.callable({"metric_name": "total_revenue"})
|
|
98
|
+
print("\n=== Lookup Metric (total_revenue) ===")
|
|
99
|
+
print(result["content"][0]["text"])
|
|
100
|
+
|
|
89
101
|
tool = next(t for t in tools if t.name == "validate_query")
|
|
90
102
|
sql = (
|
|
91
103
|
"SELECT c.region, SUM(o.amount) as revenue "
|
{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/contract.yml
RENAMED
|
@@ -7,20 +7,33 @@ semantic:
|
|
|
7
7
|
path: "./semantic.yml"
|
|
8
8
|
allowed_tables:
|
|
9
9
|
- schema: analytics
|
|
10
|
+
description: "Curated analytics tables — prefer these for all reporting queries"
|
|
11
|
+
preferred: true
|
|
10
12
|
tables: [orders, customers, subscriptions]
|
|
11
13
|
forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
|
|
12
14
|
domains:
|
|
13
|
-
|
|
15
|
+
- name: revenue
|
|
16
|
+
summary: "Financial metrics from completed orders"
|
|
17
|
+
description: >
|
|
18
|
+
Revenue metrics track recognized revenue from completed orders.
|
|
19
|
+
Revenue is recognized at fulfillment, not at booking.
|
|
20
|
+
Excludes refunds, chargebacks, and pending orders unless stated.
|
|
21
|
+
metrics: [total_revenue, revenue_by_region]
|
|
22
|
+
tables: [analytics.orders, analytics.customers]
|
|
14
23
|
rules:
|
|
15
24
|
- name: tenant_isolation
|
|
16
25
|
description: "All queries must filter by tenant_id"
|
|
17
26
|
enforcement: block
|
|
27
|
+
query_check:
|
|
28
|
+
required_filter: tenant_id
|
|
18
29
|
- name: use_semantic_revenue
|
|
19
30
|
description: "Revenue calculations must use the metric definitions"
|
|
20
31
|
enforcement: warn
|
|
21
32
|
- name: no_select_star
|
|
22
33
|
description: "Must specify explicit columns"
|
|
23
34
|
enforcement: block
|
|
35
|
+
query_check:
|
|
36
|
+
no_select_star: true
|
|
24
37
|
|
|
25
38
|
resources:
|
|
26
39
|
cost_limit_usd: 5.00
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "agentic-data-contracts"
|
|
3
|
-
version = "0.9.
|
|
4
|
-
description = "YAML-first data
|
|
3
|
+
version = "0.9.2"
|
|
4
|
+
description = "YAML-first, domain-driven data governance for AI agents"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
7
|
authors = [{ name = "Qing", email = "qingye779@gmail.com" }]
|
|
@@ -10,9 +10,11 @@ keywords = [
|
|
|
10
10
|
"ai-agents",
|
|
11
11
|
"data-contracts",
|
|
12
12
|
"data-governance",
|
|
13
|
+
"domain-driven",
|
|
13
14
|
"sql-validation",
|
|
14
15
|
"llm",
|
|
15
16
|
"claude",
|
|
17
|
+
"semantic-layer",
|
|
16
18
|
"analytics",
|
|
17
19
|
"dbt",
|
|
18
20
|
]
|
|
@@ -30,7 +32,7 @@ classifiers = [
|
|
|
30
32
|
dependencies = [
|
|
31
33
|
"sqlglot>=23.0",
|
|
32
34
|
"pydantic>=2.0",
|
|
33
|
-
"pyyaml>=6.0",
|
|
35
|
+
"pyyaml>=6.0.3",
|
|
34
36
|
"thefuzz>=0.22.1",
|
|
35
37
|
]
|
|
36
38
|
|
|
@@ -41,17 +43,17 @@ Issues = "https://github.com/flyersworder/agentic-data-contracts/issues"
|
|
|
41
43
|
Documentation = "https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md"
|
|
42
44
|
|
|
43
45
|
[project.optional-dependencies]
|
|
44
|
-
agent-sdk = ["claude-agent-sdk>=0.1.
|
|
45
|
-
agent-contracts = ["ai-agent-contracts>=0.
|
|
46
|
+
agent-sdk = ["claude-agent-sdk>=0.1.58"]
|
|
47
|
+
agent-contracts = ["ai-agent-contracts>=0.3.0"]
|
|
46
48
|
bigquery = ["google-cloud-bigquery"]
|
|
47
49
|
snowflake = ["snowflake-connector-python"]
|
|
48
50
|
postgres = ["psycopg2-binary"]
|
|
49
51
|
duckdb = ["duckdb"]
|
|
50
52
|
dev = [
|
|
51
|
-
"pytest>=
|
|
52
|
-
"pytest-asyncio>=0
|
|
53
|
-
"pytest-cov>=
|
|
54
|
-
"ruff>=0.
|
|
53
|
+
"pytest>=9.0.3",
|
|
54
|
+
"pytest-asyncio>=1.3.0",
|
|
55
|
+
"pytest-cov>=7.1.0",
|
|
56
|
+
"ruff>=0.15.10",
|
|
55
57
|
"duckdb",
|
|
56
58
|
]
|
|
57
59
|
all = [
|
|
@@ -22,6 +22,8 @@ class SemanticSource(BaseModel):
|
|
|
22
22
|
class AllowedTable(BaseModel):
|
|
23
23
|
schema_: str = Field(alias="schema")
|
|
24
24
|
tables: list[str] = Field(default_factory=list)
|
|
25
|
+
description: str | None = None
|
|
26
|
+
preferred: bool = False
|
|
25
27
|
|
|
26
28
|
model_config = {"populate_by_name": True}
|
|
27
29
|
|