agentic-data-contracts 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. agentic_data_contracts-0.1.0/.github/dependabot.yml +15 -0
  2. agentic_data_contracts-0.1.0/.github/workflows/ci.yml +58 -0
  3. agentic_data_contracts-0.1.0/.gitignore +10 -0
  4. agentic_data_contracts-0.1.0/.pre-commit-config.yaml +16 -0
  5. agentic_data_contracts-0.1.0/.python-version +1 -0
  6. agentic_data_contracts-0.1.0/CHANGELOG.md +16 -0
  7. agentic_data_contracts-0.1.0/CLAUDE.md +52 -0
  8. agentic_data_contracts-0.1.0/LICENSE +21 -0
  9. agentic_data_contracts-0.1.0/PKG-INFO +282 -0
  10. agentic_data_contracts-0.1.0/README.md +230 -0
  11. agentic_data_contracts-0.1.0/docs/architecture.md +586 -0
  12. agentic_data_contracts-0.1.0/examples/revenue_agent/agent.py +119 -0
  13. agentic_data_contracts-0.1.0/examples/revenue_agent/contract.yml +29 -0
  14. agentic_data_contracts-0.1.0/examples/revenue_agent/semantic.yml +51 -0
  15. agentic_data_contracts-0.1.0/examples/revenue_agent/setup_db.py +55 -0
  16. agentic_data_contracts-0.1.0/pyproject.toml +82 -0
  17. agentic_data_contracts-0.1.0/src/agentic_data_contracts/__init__.py +7 -0
  18. agentic_data_contracts-0.1.0/src/agentic_data_contracts/adapters/__init__.py +0 -0
  19. agentic_data_contracts-0.1.0/src/agentic_data_contracts/adapters/base.py +42 -0
  20. agentic_data_contracts-0.1.0/src/agentic_data_contracts/adapters/duckdb.py +80 -0
  21. agentic_data_contracts-0.1.0/src/agentic_data_contracts/bridge/__init__.py +0 -0
  22. agentic_data_contracts-0.1.0/src/agentic_data_contracts/bridge/compiler.py +90 -0
  23. agentic_data_contracts-0.1.0/src/agentic_data_contracts/core/__init__.py +0 -0
  24. agentic_data_contracts-0.1.0/src/agentic_data_contracts/core/contract.py +122 -0
  25. agentic_data_contracts-0.1.0/src/agentic_data_contracts/core/schema.py +65 -0
  26. agentic_data_contracts-0.1.0/src/agentic_data_contracts/core/session.py +84 -0
  27. agentic_data_contracts-0.1.0/src/agentic_data_contracts/py.typed +0 -0
  28. agentic_data_contracts-0.1.0/src/agentic_data_contracts/semantic/__init__.py +0 -0
  29. agentic_data_contracts-0.1.0/src/agentic_data_contracts/semantic/base.py +24 -0
  30. agentic_data_contracts-0.1.0/src/agentic_data_contracts/semantic/cube.py +55 -0
  31. agentic_data_contracts-0.1.0/src/agentic_data_contracts/semantic/dbt.py +78 -0
  32. agentic_data_contracts-0.1.0/src/agentic_data_contracts/semantic/yaml_source.py +52 -0
  33. agentic_data_contracts-0.1.0/src/agentic_data_contracts/tools/__init__.py +0 -0
  34. agentic_data_contracts-0.1.0/src/agentic_data_contracts/tools/factory.py +396 -0
  35. agentic_data_contracts-0.1.0/src/agentic_data_contracts/tools/middleware.py +61 -0
  36. agentic_data_contracts-0.1.0/src/agentic_data_contracts/validation/__init__.py +3 -0
  37. agentic_data_contracts-0.1.0/src/agentic_data_contracts/validation/checkers.py +157 -0
  38. agentic_data_contracts-0.1.0/src/agentic_data_contracts/validation/explain.py +19 -0
  39. agentic_data_contracts-0.1.0/src/agentic_data_contracts/validation/validator.py +150 -0
  40. agentic_data_contracts-0.1.0/tests/__init__.py +0 -0
  41. agentic_data_contracts-0.1.0/tests/conftest.py +10 -0
  42. agentic_data_contracts-0.1.0/tests/fixtures/minimal_contract.yml +9 -0
  43. agentic_data_contracts-0.1.0/tests/fixtures/sample_cube_schema.yml +25 -0
  44. agentic_data_contracts-0.1.0/tests/fixtures/sample_dbt_manifest.json +52 -0
  45. agentic_data_contracts-0.1.0/tests/fixtures/semantic_source.yml +31 -0
  46. agentic_data_contracts-0.1.0/tests/fixtures/valid_contract.yml +42 -0
  47. agentic_data_contracts-0.1.0/tests/test_adapters/__init__.py +0 -0
  48. agentic_data_contracts-0.1.0/tests/test_adapters/test_duckdb.py +78 -0
  49. agentic_data_contracts-0.1.0/tests/test_bridge/__init__.py +0 -0
  50. agentic_data_contracts-0.1.0/tests/test_bridge/test_compiler.py +79 -0
  51. agentic_data_contracts-0.1.0/tests/test_core/__init__.py +0 -0
  52. agentic_data_contracts-0.1.0/tests/test_core/test_contract.py +67 -0
  53. agentic_data_contracts-0.1.0/tests/test_core/test_schema.py +68 -0
  54. agentic_data_contracts-0.1.0/tests/test_core/test_session.py +87 -0
  55. agentic_data_contracts-0.1.0/tests/test_public_api.py +70 -0
  56. agentic_data_contracts-0.1.0/tests/test_semantic/__init__.py +0 -0
  57. agentic_data_contracts-0.1.0/tests/test_semantic/test_cube.py +44 -0
  58. agentic_data_contracts-0.1.0/tests/test_semantic/test_dbt.py +45 -0
  59. agentic_data_contracts-0.1.0/tests/test_semantic/test_yaml_source.py +50 -0
  60. agentic_data_contracts-0.1.0/tests/test_tools/__init__.py +0 -0
  61. agentic_data_contracts-0.1.0/tests/test_tools/test_factory.py +293 -0
  62. agentic_data_contracts-0.1.0/tests/test_tools/test_middleware.py +93 -0
  63. agentic_data_contracts-0.1.0/tests/test_validation/__init__.py +0 -0
  64. agentic_data_contracts-0.1.0/tests/test_validation/test_checkers.py +154 -0
  65. agentic_data_contracts-0.1.0/tests/test_validation/test_explain.py +40 -0
  66. agentic_data_contracts-0.1.0/tests/test_validation/test_validator.py +154 -0
  67. agentic_data_contracts-0.1.0/uv.lock +2544 -0
@@ -0,0 +1,15 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ day: "monday"
8
+ open-pull-requests-limit: 10
9
+ labels:
10
+ - "dependencies"
11
+ groups:
12
+ minor-and-patch:
13
+ update-types:
14
+ - "minor"
15
+ - "patch"
@@ -0,0 +1,58 @@
1
+ name: CI and Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ release:
8
+ types: [created]
9
+ workflow_dispatch:
10
+ inputs:
11
+ run_publish:
12
+ description: 'Set to true to run the publish job'
13
+ required: false
14
+ default: 'false'
15
+
16
+ jobs:
17
+ lint:
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: astral-sh/setup-uv@v3
22
+ with:
23
+ enable-cache: true
24
+ cache-dependency-glob: "pyproject.toml"
25
+ - run: uv python install 3.13
26
+ - run: uv sync --all-extras --dev
27
+ - run: uv run ruff check src/ tests/
28
+ - run: uv run ruff format --check src/ tests/
29
+
30
+ test:
31
+ runs-on: ubuntu-latest
32
+ strategy:
33
+ matrix:
34
+ python-version: ["3.12", "3.13"]
35
+ steps:
36
+ - uses: actions/checkout@v4
37
+ - uses: astral-sh/setup-uv@v3
38
+ with:
39
+ enable-cache: true
40
+ cache-dependency-glob: "pyproject.toml"
41
+ - run: uv python install ${{ matrix.python-version }}
42
+ - run: uv sync --all-extras --dev
43
+ - run: uv run pytest --cov=agentic_data_contracts --cov-report=term-missing
44
+
45
+ publish:
46
+ needs: [lint, test]
47
+ if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.run_publish == 'true')
48
+ runs-on: ubuntu-latest
49
+ permissions:
50
+ id-token: write
51
+ steps:
52
+ - uses: actions/checkout@v4
53
+ - uses: astral-sh/setup-uv@v3
54
+ with:
55
+ enable-cache: true
56
+ - run: uv python install 3.13
57
+ - run: uv build --no-sources
58
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ *.egg
8
+ .venv/
9
+ *.duckdb
10
+ *.duckdb.wal
@@ -0,0 +1,16 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.15.8
4
+ hooks:
5
+ - id: ruff-check
6
+ args: [--fix]
7
+ - id: ruff-format
8
+
9
+ - repo: local
10
+ hooks:
11
+ - id: ty-check
12
+ name: ty check
13
+ entry: ty check
14
+ language: system
15
+ types: [python]
16
+ pass_filenames: false
@@ -0,0 +1 @@
1
+ 3.14
@@ -0,0 +1,16 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.1.0] - 2026-03-27
6
+
7
+ ### Added
8
+
9
+ - **Core layer**: YAML-first data contract schema with Pydantic validation, `DataContract` class with YAML loading and system prompt generation, `ContractSession` for lightweight resource enforcement (retries, tokens, cost, duration)
10
+ - **Validation layer**: Four built-in SQL checkers via sqlglot (table allowlist, operation blocklist, required filters, no SELECT *), `Validator` orchestrator with two-layer pipeline (static checkers + optional EXPLAIN dry-run for cost/row enforcement)
11
+ - **Tools layer**: `create_tools()` factory producing 10 agent tools (list_schemas, list_tables, describe_table, preview_table, list_metrics, lookup_metric, validate_query, query_cost_estimate, run_query, get_contract_info), `contract_middleware` decorator for wrapping existing tools
12
+ - **Semantic layer**: `SemanticSource` protocol with three implementations — `YamlSource`, `DbtSource` (manifest.json), `CubeSource` (Cube schema YAML)
13
+ - **Database adapters**: `DatabaseAdapter` protocol with `DuckDB` implementation (execute, explain with row estimate parsing, describe_table)
14
+ - **Bridge layer**: Optional `ai-agent-contracts` integration via `compile_to_contract()` mapping YAML contracts to the formal 7-tuple Contract model
15
+ - **Example**: Revenue analysis agent with DuckDB, YAML semantic source, and Claude Agent SDK fallback demo mode
16
+ - **Developer tooling**: uv for dependency management, prek pre-commit hooks (ruff + ty), 124 tests
@@ -0,0 +1,52 @@
1
+ # CLAUDE.md
2
+
3
+ ## Project Overview
4
+
5
+ `agentic-data-contracts` is a Python library for YAML-first data contract governance for AI agents. It lets data engineers define what tables an agent may query, which operations are forbidden, and what resource limits apply — then enforces those rules automatically at query time.
6
+
7
+ ## Tech Stack
8
+
9
+ - Python 3.12+, uv for dependency management
10
+ - Pydantic 2 for schema validation, sqlglot for SQL parsing
11
+ - pytest + pytest-asyncio for testing, DuckDB for integration tests
12
+ - ruff for linting/formatting, ty for type checking
13
+ - prek for pre-commit hooks
14
+
15
+ ## Project Structure
16
+
17
+ ```
18
+ src/agentic_data_contracts/
19
+ ├── core/ # YAML loading, Pydantic models, lightweight enforcement
20
+ ├── validation/ # sqlglot checkers, Validator (Layer 1 + 2), EXPLAIN protocol
21
+ ├── tools/ # 10-tool factory + middleware for Claude Agent SDK
22
+ ├── semantic/ # dbt/Cube/YAML source integrations
23
+ ├── adapters/ # DatabaseAdapter protocol + DuckDB implementation
24
+ └── bridge/ # Optional ai-agent-contracts compilation
25
+ ```
26
+
27
+ ## Common Commands
28
+
29
+ ```bash
30
+ uv sync --all-extras # Install all dependencies
31
+ uv run pytest -v # Run all tests
32
+ uv run pytest tests/test_core # Run specific test suite
33
+ uv run ruff check src/ tests/ # Lint
34
+ uv run ruff format src/ tests/ # Format
35
+ ty check # Type check
36
+ prek run --all-files # Run pre-commit hooks
37
+ ```
38
+
39
+ ## Key Design Decisions
40
+
41
+ - **Optional `ai-agent-contracts` dependency**: Library works standalone with lightweight enforcement; `ai-agent-contracts` upgrades to formal 7-tuple Contract model
42
+ - **Protocol-based extensibility**: `DatabaseAdapter`, `SemanticSource`, `ExplainAdapter`, and `Checker` are all `@runtime_checkable` protocols
43
+ - **Two-layer validation**: Layer 1 (sqlglot static analysis) always runs; Layer 2 (EXPLAIN dry-run) runs when a database adapter is available
44
+ - **Tools are plain async functions**: Compatible with Claude Agent SDK via `create_sdk_mcp_server()` but framework-agnostic
45
+
46
+ ## Conventions
47
+
48
+ - Follow TDD: write tests first, then implement
49
+ - Each layer is independently testable with its own test suite under `tests/test_<layer>/`
50
+ - YAML fixtures live in `tests/fixtures/`
51
+ - Use `uv run` to execute anything Python-related
52
+ - Pre-commit hooks (ruff + ty) run automatically on commit via prek
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Qing Ye
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,282 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentic-data-contracts
3
+ Version: 0.1.0
4
+ Summary: YAML-first data contract governance for AI agents
5
+ Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
6
+ Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
7
+ Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
8
+ Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
9
+ Author-email: Qing <qingye779@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,llm,sql-validation
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.12
23
+ Requires-Dist: pydantic>=2.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Requires-Dist: sqlglot>=23.0
26
+ Provides-Extra: agent-contracts
27
+ Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'agent-contracts'
28
+ Provides-Extra: agent-sdk
29
+ Requires-Dist: claude-agent-sdk; extra == 'agent-sdk'
30
+ Provides-Extra: all
31
+ Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'all'
32
+ Requires-Dist: claude-agent-sdk; extra == 'all'
33
+ Requires-Dist: duckdb; extra == 'all'
34
+ Requires-Dist: google-cloud-bigquery; extra == 'all'
35
+ Requires-Dist: psycopg2-binary; extra == 'all'
36
+ Requires-Dist: snowflake-connector-python; extra == 'all'
37
+ Provides-Extra: bigquery
38
+ Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
39
+ Provides-Extra: dev
40
+ Requires-Dist: duckdb; extra == 'dev'
41
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
42
+ Requires-Dist: pytest-cov>=6.0; extra == 'dev'
43
+ Requires-Dist: pytest>=8.0; extra == 'dev'
44
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
45
+ Provides-Extra: duckdb
46
+ Requires-Dist: duckdb; extra == 'duckdb'
47
+ Provides-Extra: postgres
48
+ Requires-Dist: psycopg2-binary; extra == 'postgres'
49
+ Provides-Extra: snowflake
50
+ Requires-Dist: snowflake-connector-python; extra == 'snowflake'
51
+ Description-Content-Type: text/markdown
52
+
53
+ # agentic-data-contracts
54
+
55
+ YAML-first data contract governance for AI agents. Define what tables an agent may query, which operations are forbidden, and what resource limits apply — then enforce those rules automatically at query time.
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ uv add agentic-data-contracts
61
+ # or
62
+ pip install agentic-data-contracts
63
+ ```
64
+
65
+ With optional database adapters:
66
+
67
+ ```bash
68
+ uv add "agentic-data-contracts[duckdb]" # DuckDB
69
+ uv add "agentic-data-contracts[bigquery]" # BigQuery
70
+ uv add "agentic-data-contracts[snowflake]" # Snowflake
71
+ uv add "agentic-data-contracts[postgres]" # PostgreSQL
72
+ uv add "agentic-data-contracts[agent-sdk]" # Claude Agent SDK integration
73
+ ```
74
+
75
+ ## Quick Start
76
+
77
+ ### 1. Write a YAML contract
78
+
79
+ ```yaml
80
+ # contract.yml
81
+ version: "1.0"
82
+ name: revenue-analysis
83
+
84
+ semantic:
85
+ allowed_tables:
86
+ - schema: analytics
87
+ tables: [orders, customers, subscriptions]
88
+ forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
89
+ rules:
90
+ - name: tenant_isolation
91
+ description: "All queries must filter by tenant_id"
92
+ enforcement: block
93
+ filter_column: tenant_id
94
+ - name: no_select_star
95
+ description: "Must specify explicit columns"
96
+ enforcement: block
97
+
98
+ resources:
99
+ cost_limit_usd: 5.00
100
+ max_retries: 3
101
+ token_budget: 50000
102
+
103
+ temporal:
104
+ max_duration_seconds: 300
105
+ ```
106
+
107
+ ### 2. Load the contract and create tools
108
+
109
+ ```python
110
+ from agentic_data_contracts import DataContract, create_tools
111
+ from agentic_data_contracts.adapters.duckdb import DuckDBAdapter
112
+ from agentic_data_contracts.semantic.yaml_source import YamlSource
113
+
114
+ dc = DataContract.from_yaml("contract.yml")
115
+ adapter = DuckDBAdapter("analytics.duckdb")
116
+ semantic = YamlSource("semantic.yml")
117
+
118
+ tools = create_tools(dc, adapter=adapter, semantic_source=semantic)
119
+ ```
120
+
121
+ ### 3. Use with the Claude Agent SDK
122
+
123
+ ```python
124
+ import asyncio
125
+ from claude_agent_sdk import (
126
+ ClaudeAgentOptions,
127
+ AssistantMessage,
128
+ TextBlock,
129
+ create_sdk_mcp_server,
130
+ query,
131
+ )
132
+
133
+ server = create_sdk_mcp_server(name="data-contracts", version="1.0.0", tools=tools)
134
+
135
+ options = ClaudeAgentOptions(
136
+ model="claude-sonnet-4-6",
137
+ system_prompt=f"You are a revenue analytics assistant.\n\n{dc.to_system_prompt()}",
138
+ mcp_servers={"dc": server},
139
+ allowed_tools=[f"mcp__dc__{t.name}" for t in tools],
140
+ )
141
+
142
+ async def run(prompt: str) -> None:
143
+ async for message in query(prompt=prompt, options=options):
144
+ if isinstance(message, AssistantMessage):
145
+ for block in message.content:
146
+ if isinstance(block, TextBlock):
147
+ print(block.text)
148
+
149
+ asyncio.run(run("What was total revenue by region in Q1 2025?"))
150
+ ```
151
+
152
+ ### 4. Or use the tools directly (no SDK required)
153
+
154
+ ```python
155
+ import asyncio
156
+
157
+ async def demo() -> None:
158
+ # Validate a query without executing
159
+ validate = next(t for t in tools if t.name == "validate_query")
160
+ result = await validate.callable(
161
+ {"sql": "SELECT id, amount FROM analytics.orders WHERE tenant_id = 'acme'"}
162
+ )
163
+ print(result["content"][0]["text"])
164
+ # VALID — Query passed all checks.
165
+
166
+ # Blocked query
167
+ result = await validate.callable({"sql": "SELECT * FROM analytics.orders"})
168
+ print(result["content"][0]["text"])
169
+ # BLOCKED — Violations:
170
+ # - SELECT * is not allowed — specify explicit columns
171
+
172
+ asyncio.run(demo())
173
+ ```
174
+
175
+ ## The 10 Tools
176
+
177
+ | Tool | Description |
178
+ |------|-------------|
179
+ | `list_schemas` | List all allowed database schemas from the contract |
180
+ | `list_tables` | List allowed tables, optionally filtered by schema |
181
+ | `describe_table` | Get full column details for an allowed table |
182
+ | `preview_table` | Preview sample rows from an allowed table |
183
+ | `list_metrics` | List all metric definitions from the semantic source |
184
+ | `lookup_metric` | Get the full definition of a specific metric |
185
+ | `validate_query` | Validate a SQL query against contract rules without executing |
186
+ | `query_cost_estimate` | Estimate cost and row count via EXPLAIN |
187
+ | `run_query` | Validate and execute a SQL query, returning results |
188
+ | `get_contract_info` | Get the full contract: rules, limits, and session status |
189
+
190
+ ## Contract Rules
191
+
192
+ Rules are enforced at three levels:
193
+
194
+ - **`block`** — query is rejected and an error is returned to the agent
195
+ - **`warn`** — query proceeds but a warning is included in the response
196
+ - **`log`** — violation is recorded but not surfaced to the agent
197
+
198
+ Built-in checkers enforce:
199
+ - **Table allowlist** — only tables listed in `allowed_tables` may be queried
200
+ - **Operation blocklist** — `forbidden_operations` (DELETE, DROP, etc.) are rejected
201
+ - **Required filters** — rules with `filter_column` require a matching WHERE clause
202
+ - **No SELECT \*** — queries must name explicit columns
203
+
204
+ ## Semantic Sources
205
+
206
+ A semantic source provides metric and table schema metadata to the agent.
207
+
208
+ **YAML** (built-in):
209
+ ```yaml
210
+ # semantic.yml
211
+ metrics:
212
+ - name: total_revenue
213
+ description: "Total revenue from completed orders"
214
+ sql_expression: "SUM(amount) FILTER (WHERE status = 'completed')"
215
+ source_model: analytics.orders
216
+
217
+ tables:
218
+ - schema: analytics
219
+ table: orders
220
+ columns:
221
+ - name: id
222
+ type: INTEGER
223
+ - name: amount
224
+ type: DECIMAL
225
+ - name: tenant_id
226
+ type: VARCHAR
227
+ ```
228
+
229
+ **dbt** — point to a `manifest.json`:
230
+ ```yaml
231
+ semantic:
232
+ source:
233
+ type: dbt
234
+ path: "./dbt/manifest.json"
235
+ ```
236
+
237
+ **Cube** — point to a Cube schema file:
238
+ ```yaml
239
+ semantic:
240
+ source:
241
+ type: cube
242
+ path: "./cube/schema.yml"
243
+ ```
244
+
245
+ ## Resource Limits
246
+
247
+ ```yaml
248
+ resources:
249
+ cost_limit_usd: 5.00 # max estimated query cost
250
+ max_retries: 3 # max blocked queries per session
251
+ token_budget: 50000 # max tokens consumed
252
+ max_query_time_seconds: 30 # max wall-clock query time
253
+ max_rows_scanned: 1000000 # max rows an EXPLAIN may estimate
254
+ ```
255
+
256
+ ## Optional Dependencies
257
+
258
+ | Extra | Package | Purpose |
259
+ |-------|---------|---------|
260
+ | `duckdb` | `duckdb` | DuckDB adapter |
261
+ | `bigquery` | `google-cloud-bigquery` | BigQuery adapter |
262
+ | `snowflake` | `snowflake-connector-python` | Snowflake adapter |
263
+ | `postgres` | `psycopg2-binary` | PostgreSQL adapter |
264
+ | `agent-sdk` | `claude-agent-sdk` | Claude Agent SDK integration |
265
+ | `agent-contracts` | `ai-agent-contracts>=0.2.0` | ai-agent-contracts bridge |
266
+
267
+ ## Example
268
+
269
+ See [`examples/revenue_agent/`](examples/revenue_agent/) for a complete working example with a DuckDB database, YAML semantic source, and Claude Agent SDK integration.
270
+
271
+ ```bash
272
+ uv run python examples/revenue_agent/setup_db.py
273
+ uv run python examples/revenue_agent/agent.py "What was Q1 revenue by region?"
274
+ ```
275
+
276
+ ## Architecture
277
+
278
+ See [`docs/architecture.md`](docs/architecture.md) for the full design spec covering the layered architecture, YAML schema, validation pipeline, tool design, semantic sources, database adapters, and the optional `ai-agent-contracts` bridge.
279
+
280
+ ## License
281
+
282
+ MIT