agentic-data-contracts 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/CHANGELOG.md +17 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/PKG-INFO +30 -1
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/README.md +29 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/examples/revenue_agent/contract.yml +4 -1
- agentic_data_contracts-0.2.6/examples/revenue_agent/semantic.yml +22 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/pyproject.toml +1 -1
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/contract.py +61 -12
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/base.py +8 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/cube.py +8 -1
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/dbt.py +8 -1
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/yaml_source.py +16 -1
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/factory.py +29 -6
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/semantic_source.yml +21 -0
- agentic_data_contracts-0.2.6/tests/test_core/test_scalability.py +144 -0
- agentic_data_contracts-0.2.6/tests/test_semantic/test_relationships.py +83 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_yaml_source.py +1 -1
- agentic_data_contracts-0.2.6/tests/test_tools/test_pagination.py +80 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/uv.lock +1 -1
- agentic_data_contracts-0.2.4/examples/revenue_agent/semantic.yml +0 -51
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/.github/dependabot.yml +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/.github/workflows/ci.yml +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/.gitignore +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/.pre-commit-config.yaml +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/.python-version +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/CLAUDE.md +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/LICENSE +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/docs/architecture.md +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/examples/revenue_agent/agent.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/examples/revenue_agent/setup_db.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/adapters/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/adapters/base.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/adapters/duckdb.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/bridge/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/bridge/compiler.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/schema.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/session.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/py.typed +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/middleware.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/sdk.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/checkers.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/explain.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/validator.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/conftest.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/minimal_contract.yml +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/sample_cube_schema.yml +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/sample_dbt_manifest.json +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/valid_contract.yml +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_adapters/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_adapters/test_duckdb.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_bridge/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_bridge/test_compiler.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_contract.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_load_semantic_source.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_schema.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_sdk_config.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_session.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_system_prompt_metrics.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_wildcard_tables.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_public_api.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_cube.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_dbt.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_search.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_auto_load.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_factory.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_middleware.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_sdk.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_semantic_tools.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_wildcard_tools.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/__init__.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/test_checkers.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/test_explain.py +0 -0
- {agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/test_validator.py +0 -0
|
@@ -2,6 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.6] - 2026-03-29
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- **Compact system prompt at scale**: When metrics exceed 20, the system prompt shows domain names with counts (e.g., "acquisition (45)") instead of listing every metric. Reduces prompt from ~6K to ~100 tokens for large metric sets.
|
|
10
|
+
- **Paginated `list_tables`**: Added `limit` (default 50) and `offset` parameters for handling schemas with many tables. Response includes `total` count and `next_offset` for pagination.
|
|
11
|
+
- **Cached wildcard resolution**: `resolve_tables()` is now idempotent — subsequent calls are no-ops, avoiding redundant database queries.
|
|
12
|
+
|
|
13
|
+
## [0.2.5] - 2026-03-29
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- **Table relationship metadata**: `Relationship` dataclass and `get_relationships()` on `SemanticSource` protocol for declaring join paths between tables (from/to column + relationship type)
|
|
18
|
+
- **Relationships in system prompt**: `to_system_prompt()` includes join paths so the agent knows how to combine tables correctly
|
|
19
|
+
- **YamlSource relationships**: Parsed from `relationships` section in semantic YAML files
|
|
20
|
+
- DbtSource and CubeSource return empty relationships (ready for future parsing of native join metadata)
|
|
21
|
+
|
|
5
22
|
## [0.2.4] - 2026-03-29
|
|
6
23
|
|
|
7
24
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentic-data-contracts
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: YAML-first data contract governance for AI agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
|
|
6
6
|
Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
|
|
@@ -277,6 +277,23 @@ semantic:
|
|
|
277
277
|
path: "./cube/schema.yml"
|
|
278
278
|
```
|
|
279
279
|
|
|
280
|
+
## Table Relationships
|
|
281
|
+
|
|
282
|
+
Define join paths so the agent knows how to combine tables correctly:
|
|
283
|
+
|
|
284
|
+
```yaml
|
|
285
|
+
# semantic.yml
|
|
286
|
+
relationships:
|
|
287
|
+
- from: analytics.orders.customer_id
|
|
288
|
+
to: analytics.customers.id
|
|
289
|
+
type: many_to_one
|
|
290
|
+
- from: analytics.orders.product_id
|
|
291
|
+
to: analytics.products.id
|
|
292
|
+
type: many_to_one
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
The agent sees these in its system prompt and uses them to write correct JOINs instead of guessing from column names.
|
|
296
|
+
|
|
280
297
|
## Scalable Metric Discovery
|
|
281
298
|
|
|
282
299
|
For large data lakes with hundreds of KPIs, group metrics by domain and let the agent discover them efficiently:
|
|
@@ -297,6 +314,18 @@ lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candi
|
|
|
297
314
|
list_metrics(domain="retention") → only retention metrics
|
|
298
315
|
```
|
|
299
316
|
|
|
317
|
+
## Scaling to Large Organizations
|
|
318
|
+
|
|
319
|
+
Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
|
|
320
|
+
|
|
321
|
+
| Concern | How it scales |
|
|
322
|
+
|---|---|
|
|
323
|
+
| **System prompt size** | >20 metrics: auto-switches to compact domain counts (`acquisition (45)`) instead of listing every metric |
|
|
324
|
+
| **Table discovery** | `list_tables` is paginated (default 50, with offset). Use `schema` filter for targeted browsing |
|
|
325
|
+
| **Wildcard schemas** | `tables: ["*"]` discovers tables from the database. Resolution is cached — no repeated queries |
|
|
326
|
+
| **Metric lookup** | Fuzzy search via `thefuzz` (C++ backed) — sub-millisecond even with 1000+ metrics |
|
|
327
|
+
| **SQL validation** | Set-based allowlist check — O(1) per table reference regardless of allowlist size |
|
|
328
|
+
|
|
300
329
|
## Resource Limits
|
|
301
330
|
|
|
302
331
|
```yaml
|
|
@@ -224,6 +224,23 @@ semantic:
|
|
|
224
224
|
path: "./cube/schema.yml"
|
|
225
225
|
```
|
|
226
226
|
|
|
227
|
+
## Table Relationships
|
|
228
|
+
|
|
229
|
+
Define join paths so the agent knows how to combine tables correctly:
|
|
230
|
+
|
|
231
|
+
```yaml
|
|
232
|
+
# semantic.yml
|
|
233
|
+
relationships:
|
|
234
|
+
- from: analytics.orders.customer_id
|
|
235
|
+
to: analytics.customers.id
|
|
236
|
+
type: many_to_one
|
|
237
|
+
- from: analytics.orders.product_id
|
|
238
|
+
to: analytics.products.id
|
|
239
|
+
type: many_to_one
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
The agent sees these in its system prompt and uses them to write correct JOINs instead of guessing from column names.
|
|
243
|
+
|
|
227
244
|
## Scalable Metric Discovery
|
|
228
245
|
|
|
229
246
|
For large data lakes with hundreds of KPIs, group metrics by domain and let the agent discover them efficiently:
|
|
@@ -244,6 +261,18 @@ lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candi
|
|
|
244
261
|
list_metrics(domain="retention") → only retention metrics
|
|
245
262
|
```
|
|
246
263
|
|
|
264
|
+
## Scaling to Large Organizations
|
|
265
|
+
|
|
266
|
+
Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
|
|
267
|
+
|
|
268
|
+
| Concern | How it scales |
|
|
269
|
+
|---|---|
|
|
270
|
+
| **System prompt size** | >20 metrics: auto-switches to compact domain counts (`acquisition (45)`) instead of listing every metric |
|
|
271
|
+
| **Table discovery** | `list_tables` is paginated (default 50, with offset). Use `schema` filter for targeted browsing |
|
|
272
|
+
| **Wildcard schemas** | `tables: ["*"]` discovers tables from the database. Resolution is cached — no repeated queries |
|
|
273
|
+
| **Metric lookup** | Fuzzy search via `thefuzz` (C++ backed) — sub-millisecond even with 1000+ metrics |
|
|
274
|
+
| **SQL validation** | Set-based allowlist check — O(1) per table reference regardless of allowlist size |
|
|
275
|
+
|
|
247
276
|
## Resource Limits
|
|
248
277
|
|
|
249
278
|
```yaml
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/examples/revenue_agent/contract.yml
RENAMED
|
@@ -9,12 +9,15 @@ semantic:
|
|
|
9
9
|
- schema: analytics
|
|
10
10
|
tables: [orders, customers, subscriptions]
|
|
11
11
|
forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
|
|
12
|
+
domains:
|
|
13
|
+
revenue: [total_revenue, revenue_by_region]
|
|
12
14
|
rules:
|
|
13
15
|
- name: tenant_isolation
|
|
14
16
|
description: "All queries must filter by tenant_id"
|
|
15
17
|
enforcement: block
|
|
18
|
+
filter_column: tenant_id
|
|
16
19
|
- name: use_semantic_revenue
|
|
17
|
-
description: "Revenue calculations must use the
|
|
20
|
+
description: "Revenue calculations must use the metric definitions"
|
|
18
21
|
enforcement: warn
|
|
19
22
|
- name: no_select_star
|
|
20
23
|
description: "Must specify explicit columns"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Semantic source — define only what the database can't tell the agent.
|
|
2
|
+
# Table columns are discovered at runtime via the describe_table tool.
|
|
3
|
+
|
|
4
|
+
metrics:
|
|
5
|
+
- name: total_revenue
|
|
6
|
+
description: "Total revenue from completed orders"
|
|
7
|
+
sql_expression: "SUM(amount) FILTER (WHERE status = 'completed')"
|
|
8
|
+
source_model: analytics.orders
|
|
9
|
+
filters:
|
|
10
|
+
- "status = 'completed'"
|
|
11
|
+
|
|
12
|
+
- name: revenue_by_region
|
|
13
|
+
description: "Revenue broken down by customer region"
|
|
14
|
+
sql_expression: "SUM(o.amount) GROUP BY c.region"
|
|
15
|
+
source_model: analytics.orders
|
|
16
|
+
filters:
|
|
17
|
+
- "o.status = 'completed'"
|
|
18
|
+
|
|
19
|
+
relationships:
|
|
20
|
+
- from: analytics.orders.customer_id
|
|
21
|
+
to: analytics.customers.id
|
|
22
|
+
type: many_to_one
|
|
@@ -23,6 +23,7 @@ class DataContract:
|
|
|
23
23
|
|
|
24
24
|
def __init__(self, schema: DataContractSchema) -> None:
|
|
25
25
|
self.schema = schema
|
|
26
|
+
self._tables_resolved: bool = False
|
|
26
27
|
|
|
27
28
|
@property
|
|
28
29
|
def name(self) -> str:
|
|
@@ -43,16 +44,18 @@ class DataContract:
|
|
|
43
44
|
"""Check if any schema uses wildcard ('*') for tables."""
|
|
44
45
|
return any("*" in entry.tables for entry in self.schema.semantic.allowed_tables)
|
|
45
46
|
|
|
46
|
-
def resolve_tables(self, adapter: DatabaseAdapter) -> None:
|
|
47
|
+
def resolve_tables(self, adapter: DatabaseAdapter, *, force: bool = False) -> None:
|
|
47
48
|
"""Expand wildcard tables using the database adapter.
|
|
48
49
|
|
|
49
50
|
Replaces ["*"] entries with actual table names from the database.
|
|
50
|
-
|
|
51
|
-
on the schema object.
|
|
51
|
+
Results are cached — subsequent calls are no-ops unless force=True.
|
|
52
52
|
"""
|
|
53
|
+
if self._tables_resolved and not force:
|
|
54
|
+
return
|
|
53
55
|
for entry in self.schema.semantic.allowed_tables:
|
|
54
56
|
if "*" in entry.tables:
|
|
55
57
|
entry.tables = adapter.list_tables(entry.schema_)
|
|
58
|
+
self._tables_resolved = True
|
|
56
59
|
|
|
57
60
|
def allowed_table_names(self) -> list[str]:
|
|
58
61
|
names: list[str] = []
|
|
@@ -171,6 +174,17 @@ class DataContract:
|
|
|
171
174
|
)
|
|
172
175
|
sections.append(line)
|
|
173
176
|
|
|
177
|
+
# Table relationships
|
|
178
|
+
if semantic_source is not None:
|
|
179
|
+
rels = semantic_source.get_relationships()
|
|
180
|
+
if rels:
|
|
181
|
+
sections.append(
|
|
182
|
+
"\n### Table Relationships\n"
|
|
183
|
+
"Use these join paths when combining tables:"
|
|
184
|
+
)
|
|
185
|
+
for r in rels:
|
|
186
|
+
sections.append(f"- {r.from_} \u2192 {r.to} ({r.type})")
|
|
187
|
+
|
|
174
188
|
# Resource limits
|
|
175
189
|
res = self.schema.resources
|
|
176
190
|
if res:
|
|
@@ -193,6 +207,10 @@ class DataContract:
|
|
|
193
207
|
|
|
194
208
|
return "\n".join(sections)
|
|
195
209
|
|
|
210
|
+
# Max metrics to list individually in system prompt before switching
|
|
211
|
+
# to compact domain-only summaries.
|
|
212
|
+
METRIC_DETAIL_THRESHOLD = 20
|
|
213
|
+
|
|
196
214
|
def _build_metrics_section(
|
|
197
215
|
self, semantic_source: SemanticSource | None
|
|
198
216
|
) -> str | None:
|
|
@@ -205,11 +223,27 @@ class DataContract:
|
|
|
205
223
|
|
|
206
224
|
domains = self.schema.semantic.domains
|
|
207
225
|
lines: list[str] = []
|
|
208
|
-
|
|
209
|
-
"\n### Available Metrics (use lookup_metric for full SQL definitions)"
|
|
210
|
-
)
|
|
226
|
+
compact = len(metrics) > self.METRIC_DETAIL_THRESHOLD
|
|
211
227
|
|
|
212
|
-
if domains:
|
|
228
|
+
if compact and domains:
|
|
229
|
+
# Large metric set with domains — show counts only
|
|
230
|
+
lines.append("\n### Available Metrics")
|
|
231
|
+
metric_names = {m.name for m in metrics}
|
|
232
|
+
domain_parts = []
|
|
233
|
+
for domain, names in domains.items():
|
|
234
|
+
count = sum(1 for n in names if n in metric_names)
|
|
235
|
+
if count:
|
|
236
|
+
domain_parts.append(f"{domain} ({count})")
|
|
237
|
+
lines.append(f"Domains: {', '.join(domain_parts)}")
|
|
238
|
+
lines.append(
|
|
239
|
+
'\nUse list_metrics(domain="...") to browse,'
|
|
240
|
+
' lookup_metric("...") to get SQL definitions.'
|
|
241
|
+
)
|
|
242
|
+
elif domains:
|
|
243
|
+
# Small metric set with domains — list with descriptions
|
|
244
|
+
lines.append(
|
|
245
|
+
"\n### Available Metrics (use lookup_metric for full SQL definitions)"
|
|
246
|
+
)
|
|
213
247
|
metric_map = {m.name: m for m in metrics}
|
|
214
248
|
for domain, names in domains.items():
|
|
215
249
|
entries = []
|
|
@@ -219,12 +253,27 @@ class DataContract:
|
|
|
219
253
|
entries.append(f"{m.name} \u2014 {m.description}")
|
|
220
254
|
if entries:
|
|
221
255
|
lines.append(f"**{domain}:** {', '.join(entries)}")
|
|
256
|
+
lines.append(
|
|
257
|
+
"\nUse the lookup_metric tool to get the SQL definition"
|
|
258
|
+
" before computing any KPI."
|
|
259
|
+
)
|
|
260
|
+
elif compact:
|
|
261
|
+
# Large metric set without domains — just show count
|
|
262
|
+
lines.append("\n### Available Metrics")
|
|
263
|
+
lines.append(f"{len(metrics)} metrics available.")
|
|
264
|
+
lines.append(
|
|
265
|
+
"\nUse list_metrics() to browse,"
|
|
266
|
+
' lookup_metric("...") to get SQL definitions.'
|
|
267
|
+
)
|
|
222
268
|
else:
|
|
269
|
+
# Small metric set without domains — list all
|
|
270
|
+
lines.append(
|
|
271
|
+
"\n### Available Metrics (use lookup_metric for full SQL definitions)"
|
|
272
|
+
)
|
|
223
273
|
for m in metrics:
|
|
224
274
|
lines.append(f"- {m.name} \u2014 {m.description}")
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
)
|
|
275
|
+
lines.append(
|
|
276
|
+
"\nUse the lookup_metric tool to get the SQL definition"
|
|
277
|
+
" before computing any KPI."
|
|
278
|
+
)
|
|
230
279
|
return "\n".join(lines)
|
|
@@ -20,12 +20,20 @@ class MetricDefinition:
|
|
|
20
20
|
filters: list[str] = field(default_factory=list)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
@dataclass
|
|
24
|
+
class Relationship:
|
|
25
|
+
from_: str # "schema.table.column"
|
|
26
|
+
to: str # "schema.table.column"
|
|
27
|
+
type: str = "many_to_one" # many_to_one | one_to_one | many_to_many
|
|
28
|
+
|
|
29
|
+
|
|
23
30
|
@runtime_checkable
|
|
24
31
|
class SemanticSource(Protocol):
|
|
25
32
|
def get_metrics(self) -> list[MetricDefinition]: ...
|
|
26
33
|
def get_metric(self, name: str) -> MetricDefinition | None: ...
|
|
27
34
|
def get_table_schema(self, schema: str, table: str) -> TableSchema | None: ...
|
|
28
35
|
def search_metrics(self, query: str) -> list[MetricDefinition]: ...
|
|
36
|
+
def get_relationships(self) -> list[Relationship]: ...
|
|
29
37
|
|
|
30
38
|
|
|
31
39
|
def fuzzy_search_metrics(
|
|
@@ -7,7 +7,11 @@ from pathlib import Path
|
|
|
7
7
|
import yaml
|
|
8
8
|
|
|
9
9
|
from agentic_data_contracts.adapters.base import Column, TableSchema
|
|
10
|
-
from agentic_data_contracts.semantic.base import
|
|
10
|
+
from agentic_data_contracts.semantic.base import (
|
|
11
|
+
MetricDefinition,
|
|
12
|
+
Relationship,
|
|
13
|
+
fuzzy_search_metrics,
|
|
14
|
+
)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class CubeSource:
|
|
@@ -54,5 +58,8 @@ class CubeSource:
|
|
|
54
58
|
def search_metrics(self, query: str) -> list[MetricDefinition]:
|
|
55
59
|
return fuzzy_search_metrics(self._metrics, self.get_metric, query)
|
|
56
60
|
|
|
61
|
+
def get_relationships(self) -> list[Relationship]:
|
|
62
|
+
return [] # TODO: parse from Cube joins config
|
|
63
|
+
|
|
57
64
|
def get_table_schema(self, schema: str, table: str) -> TableSchema | None:
|
|
58
65
|
return self._tables.get(f"{schema}.{table}")
|
|
@@ -7,7 +7,11 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
from agentic_data_contracts.adapters.base import Column, TableSchema
|
|
10
|
-
from agentic_data_contracts.semantic.base import
|
|
10
|
+
from agentic_data_contracts.semantic.base import (
|
|
11
|
+
MetricDefinition,
|
|
12
|
+
Relationship,
|
|
13
|
+
fuzzy_search_metrics,
|
|
14
|
+
)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class DbtSource:
|
|
@@ -77,5 +81,8 @@ class DbtSource:
|
|
|
77
81
|
def search_metrics(self, query: str) -> list[MetricDefinition]:
|
|
78
82
|
return fuzzy_search_metrics(self._metrics, self.get_metric, query)
|
|
79
83
|
|
|
84
|
+
def get_relationships(self) -> list[Relationship]:
|
|
85
|
+
return [] # TODO: parse from dbt manifest relationships/refs
|
|
86
|
+
|
|
80
87
|
def get_table_schema(self, schema: str, table: str) -> TableSchema | None:
|
|
81
88
|
return self._tables.get(f"{schema}.{table}")
|
|
@@ -7,7 +7,11 @@ from pathlib import Path
|
|
|
7
7
|
import yaml
|
|
8
8
|
|
|
9
9
|
from agentic_data_contracts.adapters.base import Column, TableSchema
|
|
10
|
-
from agentic_data_contracts.semantic.base import
|
|
10
|
+
from agentic_data_contracts.semantic.base import (
|
|
11
|
+
MetricDefinition,
|
|
12
|
+
Relationship,
|
|
13
|
+
fuzzy_search_metrics,
|
|
14
|
+
)
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class YamlSource:
|
|
@@ -38,6 +42,14 @@ class YamlSource:
|
|
|
38
42
|
for c in t.get("columns", [])
|
|
39
43
|
]
|
|
40
44
|
)
|
|
45
|
+
self._relationships = [
|
|
46
|
+
Relationship(
|
|
47
|
+
from_=r["from"],
|
|
48
|
+
to=r["to"],
|
|
49
|
+
type=r.get("type", "many_to_one"),
|
|
50
|
+
)
|
|
51
|
+
for r in raw.get("relationships", [])
|
|
52
|
+
]
|
|
41
53
|
|
|
42
54
|
def get_metrics(self) -> list[MetricDefinition]:
|
|
43
55
|
return list(self._metrics)
|
|
@@ -51,5 +63,8 @@ class YamlSource:
|
|
|
51
63
|
def search_metrics(self, query: str) -> list[MetricDefinition]:
|
|
52
64
|
return fuzzy_search_metrics(self._metrics, self.get_metric, query)
|
|
53
65
|
|
|
66
|
+
def get_relationships(self) -> list[Relationship]:
|
|
67
|
+
return list(self._relationships)
|
|
68
|
+
|
|
54
69
|
def get_table_schema(self, schema: str, table: str) -> TableSchema | None:
|
|
55
70
|
return self._tables.get(f"{schema}.{table}")
|
|
@@ -60,14 +60,23 @@ def create_tools(
|
|
|
60
60
|
# ── Tool 2: list_tables ───────────────────────────────────────────────────
|
|
61
61
|
async def list_tables(args: dict[str, Any]) -> dict[str, Any]:
|
|
62
62
|
schema_filter = args.get("schema")
|
|
63
|
-
|
|
63
|
+
try:
|
|
64
|
+
limit = max(1, int(args.get("limit", 50)))
|
|
65
|
+
except (ValueError, TypeError):
|
|
66
|
+
limit = 50
|
|
67
|
+
try:
|
|
68
|
+
offset = max(0, int(args.get("offset", 0)))
|
|
69
|
+
except (ValueError, TypeError):
|
|
70
|
+
offset = 0
|
|
71
|
+
all_tables: list[dict[str, Any]] = []
|
|
64
72
|
for entry in contract.schema.semantic.allowed_tables:
|
|
65
73
|
if schema_filter and entry.schema_ != schema_filter:
|
|
66
74
|
continue
|
|
67
75
|
if "*" in entry.tables:
|
|
68
76
|
return _text_response(
|
|
69
77
|
f"Schema '{entry.schema_}' uses wildcard tables"
|
|
70
|
-
" but no database adapter is available
|
|
78
|
+
" but no database adapter is available"
|
|
79
|
+
" to resolve them."
|
|
71
80
|
)
|
|
72
81
|
for table in entry.tables:
|
|
73
82
|
info: dict[str, Any] = {
|
|
@@ -78,8 +87,13 @@ def create_tools(
|
|
|
78
87
|
ts = semantic_source.get_table_schema(entry.schema_, table)
|
|
79
88
|
if ts is not None:
|
|
80
89
|
info["columns"] = [c.name for c in ts.columns]
|
|
81
|
-
|
|
82
|
-
|
|
90
|
+
all_tables.append(info)
|
|
91
|
+
total = len(all_tables)
|
|
92
|
+
page = all_tables[offset : offset + limit]
|
|
93
|
+
result: dict[str, Any] = {"tables": page, "total": total}
|
|
94
|
+
if offset + limit < total:
|
|
95
|
+
result["next_offset"] = offset + limit
|
|
96
|
+
return _text_response(json.dumps(result))
|
|
83
97
|
|
|
84
98
|
# ── Tool 3: describe_table ────────────────────────────────────────────────
|
|
85
99
|
async def describe_table(args: dict[str, Any]) -> dict[str, Any]:
|
|
@@ -321,7 +335,8 @@ def create_tools(
|
|
|
321
335
|
name="list_tables",
|
|
322
336
|
description=(
|
|
323
337
|
"List allowed tables, optionally filtered by schema. "
|
|
324
|
-
"Includes column names when semantic source is available."
|
|
338
|
+
"Includes column names when semantic source is available. "
|
|
339
|
+
"Paginated \u2014 use limit/offset for large schemas."
|
|
325
340
|
),
|
|
326
341
|
input_schema={
|
|
327
342
|
"type": "object",
|
|
@@ -329,7 +344,15 @@ def create_tools(
|
|
|
329
344
|
"schema": {
|
|
330
345
|
"type": "string",
|
|
331
346
|
"description": "Optional schema name to filter by",
|
|
332
|
-
}
|
|
347
|
+
},
|
|
348
|
+
"limit": {
|
|
349
|
+
"type": "integer",
|
|
350
|
+
"description": "Max tables to return (default 50)",
|
|
351
|
+
},
|
|
352
|
+
"offset": {
|
|
353
|
+
"type": "integer",
|
|
354
|
+
"description": "Skip first N tables (default 0)",
|
|
355
|
+
},
|
|
333
356
|
},
|
|
334
357
|
"required": [],
|
|
335
358
|
},
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/semantic_source.yml
RENAMED
|
@@ -29,3 +29,24 @@ tables:
|
|
|
29
29
|
- name: status
|
|
30
30
|
type: VARCHAR
|
|
31
31
|
description: "Order status: pending, completed, cancelled"
|
|
32
|
+
- name: customer_id
|
|
33
|
+
type: INTEGER
|
|
34
|
+
description: "FK to customers"
|
|
35
|
+
|
|
36
|
+
- schema: analytics
|
|
37
|
+
table: customers
|
|
38
|
+
columns:
|
|
39
|
+
- name: id
|
|
40
|
+
type: INTEGER
|
|
41
|
+
description: "Primary key"
|
|
42
|
+
- name: name
|
|
43
|
+
type: VARCHAR
|
|
44
|
+
description: "Customer name"
|
|
45
|
+
- name: region
|
|
46
|
+
type: VARCHAR
|
|
47
|
+
description: "Geographic region"
|
|
48
|
+
|
|
49
|
+
relationships:
|
|
50
|
+
- from: analytics.orders.customer_id
|
|
51
|
+
to: analytics.customers.id
|
|
52
|
+
type: many_to_one
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Tests for scalability improvements: compact prompt, pagination, caching."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock
|
|
4
|
+
|
|
5
|
+
from agentic_data_contracts.adapters.base import DatabaseAdapter
|
|
6
|
+
from agentic_data_contracts.core.contract import DataContract
|
|
7
|
+
from agentic_data_contracts.core.schema import (
|
|
8
|
+
AllowedTable,
|
|
9
|
+
DataContractSchema,
|
|
10
|
+
SemanticConfig,
|
|
11
|
+
)
|
|
12
|
+
from agentic_data_contracts.semantic.base import MetricDefinition, Relationship
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FakeSemanticSource:
|
|
16
|
+
"""Fake source with configurable metric count."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, count: int) -> None:
|
|
19
|
+
self._metrics = [
|
|
20
|
+
MetricDefinition(
|
|
21
|
+
name=f"metric_{i}",
|
|
22
|
+
description=f"Description for metric {i}",
|
|
23
|
+
sql_expression=f"SUM(col_{i})",
|
|
24
|
+
)
|
|
25
|
+
for i in range(count)
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
def get_metrics(self) -> list[MetricDefinition]:
|
|
29
|
+
return list(self._metrics)
|
|
30
|
+
|
|
31
|
+
def get_metric(self, name: str) -> MetricDefinition | None:
|
|
32
|
+
for m in self._metrics:
|
|
33
|
+
if m.name == name:
|
|
34
|
+
return m
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
def get_table_schema(self, schema: str, table: str): # noqa: ANN201
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def search_metrics(self, query: str) -> list[MetricDefinition]:
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
def get_relationships(self) -> list[Relationship]:
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _make_contract_with_domains(
|
|
48
|
+
metric_names: list[str],
|
|
49
|
+
) -> DataContract:
|
|
50
|
+
domains = {
|
|
51
|
+
"domain_a": metric_names[: len(metric_names) // 2],
|
|
52
|
+
"domain_b": metric_names[len(metric_names) // 2 :],
|
|
53
|
+
}
|
|
54
|
+
schema = DataContractSchema(
|
|
55
|
+
name="test",
|
|
56
|
+
semantic=SemanticConfig(
|
|
57
|
+
allowed_tables=[
|
|
58
|
+
AllowedTable.model_validate({"schema": "public", "tables": ["t"]}),
|
|
59
|
+
],
|
|
60
|
+
domains=domains,
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
return DataContract(schema)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestCompactMetricPrompt:
|
|
67
|
+
def test_small_set_lists_all_metrics(self) -> None:
|
|
68
|
+
source = FakeSemanticSource(5)
|
|
69
|
+
dc = _make_contract_with_domains([f"metric_{i}" for i in range(5)])
|
|
70
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
71
|
+
# Should list individual metric descriptions
|
|
72
|
+
assert "metric_0 \u2014" in prompt
|
|
73
|
+
assert "metric_4 \u2014" in prompt
|
|
74
|
+
|
|
75
|
+
def test_large_set_shows_domain_counts(self) -> None:
|
|
76
|
+
source = FakeSemanticSource(30)
|
|
77
|
+
dc = _make_contract_with_domains([f"metric_{i}" for i in range(30)])
|
|
78
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
79
|
+
# Should NOT list individual metrics
|
|
80
|
+
assert "metric_0 \u2014" not in prompt
|
|
81
|
+
# Should show domain counts
|
|
82
|
+
assert "domain_a (15)" in prompt
|
|
83
|
+
assert "domain_b (15)" in prompt
|
|
84
|
+
assert "list_metrics" in prompt
|
|
85
|
+
|
|
86
|
+
def test_large_set_no_domains_shows_count(self) -> None:
|
|
87
|
+
source = FakeSemanticSource(30)
|
|
88
|
+
schema = DataContractSchema(
|
|
89
|
+
name="test",
|
|
90
|
+
semantic=SemanticConfig(
|
|
91
|
+
allowed_tables=[
|
|
92
|
+
AllowedTable.model_validate({"schema": "public", "tables": ["t"]}),
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
dc = DataContract(schema)
|
|
97
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
98
|
+
assert "30 metrics available" in prompt
|
|
99
|
+
assert "metric_0 \u2014" not in prompt
|
|
100
|
+
|
|
101
|
+
def test_threshold_boundary(self) -> None:
|
|
102
|
+
# Exactly at threshold — should still list individually
|
|
103
|
+
source = FakeSemanticSource(20)
|
|
104
|
+
schema = DataContractSchema(
|
|
105
|
+
name="test",
|
|
106
|
+
semantic=SemanticConfig(
|
|
107
|
+
allowed_tables=[
|
|
108
|
+
AllowedTable.model_validate({"schema": "public", "tables": ["t"]}),
|
|
109
|
+
],
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
dc = DataContract(schema)
|
|
113
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
114
|
+
assert "metric_0 \u2014" in prompt
|
|
115
|
+
|
|
116
|
+
# One above threshold — compact mode
|
|
117
|
+
source = FakeSemanticSource(21)
|
|
118
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
119
|
+
assert "metric_0 \u2014" not in prompt
|
|
120
|
+
assert "21 metrics available" in prompt
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestWildcardCaching:
|
|
124
|
+
def test_resolve_tables_caches(self) -> None:
|
|
125
|
+
dc = DataContract(
|
|
126
|
+
DataContractSchema(
|
|
127
|
+
name="test",
|
|
128
|
+
semantic=SemanticConfig(
|
|
129
|
+
allowed_tables=[
|
|
130
|
+
AllowedTable.model_validate({"schema": "s", "tables": ["*"]}),
|
|
131
|
+
],
|
|
132
|
+
),
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
mock_adapter = MagicMock(spec=DatabaseAdapter)
|
|
136
|
+
mock_adapter.list_tables.return_value = ["t1", "t2"]
|
|
137
|
+
|
|
138
|
+
dc.resolve_tables(mock_adapter)
|
|
139
|
+
assert "s.t1" in dc.allowed_table_names()
|
|
140
|
+
assert mock_adapter.list_tables.call_count == 1
|
|
141
|
+
|
|
142
|
+
# Second call should be a no-op
|
|
143
|
+
dc.resolve_tables(mock_adapter)
|
|
144
|
+
assert mock_adapter.list_tables.call_count == 1
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Tests for table relationship metadata."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from agentic_data_contracts.core.contract import DataContract
|
|
6
|
+
from agentic_data_contracts.core.schema import (
|
|
7
|
+
AllowedTable,
|
|
8
|
+
DataContractSchema,
|
|
9
|
+
SemanticConfig,
|
|
10
|
+
)
|
|
11
|
+
from agentic_data_contracts.semantic.cube import CubeSource
|
|
12
|
+
from agentic_data_contracts.semantic.dbt import DbtSource
|
|
13
|
+
from agentic_data_contracts.semantic.yaml_source import YamlSource
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_yaml_source_loads_relationships(fixtures_dir: Path) -> None:
|
|
17
|
+
source = YamlSource(fixtures_dir / "semantic_source.yml")
|
|
18
|
+
rels = source.get_relationships()
|
|
19
|
+
assert len(rels) == 1
|
|
20
|
+
assert rels[0].from_ == "analytics.orders.customer_id"
|
|
21
|
+
assert rels[0].to == "analytics.customers.id"
|
|
22
|
+
assert rels[0].type == "many_to_one"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_yaml_source_no_relationships(tmp_path: Path) -> None:
|
|
26
|
+
(tmp_path / "empty.yml").write_text("metrics: []")
|
|
27
|
+
source = YamlSource(tmp_path / "empty.yml")
|
|
28
|
+
assert source.get_relationships() == []
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_dbt_source_returns_empty_relationships(
|
|
32
|
+
fixtures_dir: Path,
|
|
33
|
+
) -> None:
|
|
34
|
+
source = DbtSource(fixtures_dir / "sample_dbt_manifest.json")
|
|
35
|
+
assert source.get_relationships() == []
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_cube_source_returns_empty_relationships(
|
|
39
|
+
fixtures_dir: Path,
|
|
40
|
+
) -> None:
|
|
41
|
+
source = CubeSource(fixtures_dir / "sample_cube_schema.yml")
|
|
42
|
+
assert source.get_relationships() == []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_system_prompt_includes_relationships(
|
|
46
|
+
fixtures_dir: Path,
|
|
47
|
+
) -> None:
|
|
48
|
+
source = YamlSource(fixtures_dir / "semantic_source.yml")
|
|
49
|
+
schema = DataContractSchema(
|
|
50
|
+
name="test",
|
|
51
|
+
semantic=SemanticConfig(
|
|
52
|
+
allowed_tables=[
|
|
53
|
+
AllowedTable.model_validate(
|
|
54
|
+
{"schema": "analytics", "tables": ["orders", "customers"]}
|
|
55
|
+
),
|
|
56
|
+
],
|
|
57
|
+
),
|
|
58
|
+
)
|
|
59
|
+
dc = DataContract(schema)
|
|
60
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
61
|
+
assert "Table Relationships" in prompt
|
|
62
|
+
assert "analytics.orders.customer_id" in prompt
|
|
63
|
+
assert "analytics.customers.id" in prompt
|
|
64
|
+
assert "many_to_one" in prompt
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_system_prompt_no_relationships_when_empty(
|
|
68
|
+
fixtures_dir: Path,
|
|
69
|
+
) -> None:
|
|
70
|
+
source = DbtSource(fixtures_dir / "sample_dbt_manifest.json")
|
|
71
|
+
schema = DataContractSchema(
|
|
72
|
+
name="test",
|
|
73
|
+
semantic=SemanticConfig(
|
|
74
|
+
allowed_tables=[
|
|
75
|
+
AllowedTable.model_validate(
|
|
76
|
+
{"schema": "analytics", "tables": ["orders"]}
|
|
77
|
+
),
|
|
78
|
+
],
|
|
79
|
+
),
|
|
80
|
+
)
|
|
81
|
+
dc = DataContract(schema)
|
|
82
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
83
|
+
assert "Table Relationships" not in prompt
|
|
@@ -39,7 +39,7 @@ def test_get_metric_not_found(source: YamlSource) -> None:
|
|
|
39
39
|
def test_get_table_schema(source: YamlSource) -> None:
|
|
40
40
|
schema = source.get_table_schema("analytics", "orders")
|
|
41
41
|
assert schema is not None
|
|
42
|
-
assert len(schema.columns) ==
|
|
42
|
+
assert len(schema.columns) == 5
|
|
43
43
|
col_names = [c.name for c in schema.columns]
|
|
44
44
|
assert "id" in col_names
|
|
45
45
|
assert "amount" in col_names
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Tests for list_tables pagination."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from agentic_data_contracts.core.contract import DataContract
|
|
8
|
+
from agentic_data_contracts.core.schema import (
|
|
9
|
+
AllowedTable,
|
|
10
|
+
DataContractSchema,
|
|
11
|
+
SemanticConfig,
|
|
12
|
+
)
|
|
13
|
+
from agentic_data_contracts.tools.factory import create_tools
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def large_contract() -> DataContract:
|
|
18
|
+
"""Contract with many tables to test pagination."""
|
|
19
|
+
tables = [f"table_{i}" for i in range(60)]
|
|
20
|
+
schema = DataContractSchema(
|
|
21
|
+
name="test",
|
|
22
|
+
semantic=SemanticConfig(
|
|
23
|
+
allowed_tables=[
|
|
24
|
+
AllowedTable.model_validate({"schema": "analytics", "tables": tables}),
|
|
25
|
+
],
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
return DataContract(schema)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.asyncio
|
|
32
|
+
async def test_list_tables_default_limit(
|
|
33
|
+
large_contract: DataContract,
|
|
34
|
+
) -> None:
|
|
35
|
+
tools = create_tools(large_contract)
|
|
36
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
37
|
+
result = await tool.callable({})
|
|
38
|
+
data = json.loads(result["content"][0]["text"])
|
|
39
|
+
assert len(data["tables"]) == 50 # default limit
|
|
40
|
+
assert data["total"] == 60
|
|
41
|
+
assert data["next_offset"] == 50
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_list_tables_custom_limit(
|
|
46
|
+
large_contract: DataContract,
|
|
47
|
+
) -> None:
|
|
48
|
+
tools = create_tools(large_contract)
|
|
49
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
50
|
+
result = await tool.callable({"limit": 10})
|
|
51
|
+
data = json.loads(result["content"][0]["text"])
|
|
52
|
+
assert len(data["tables"]) == 10
|
|
53
|
+
assert data["total"] == 60
|
|
54
|
+
assert data["next_offset"] == 10
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_list_tables_with_offset(
|
|
59
|
+
large_contract: DataContract,
|
|
60
|
+
) -> None:
|
|
61
|
+
tools = create_tools(large_contract)
|
|
62
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
63
|
+
result = await tool.callable({"limit": 10, "offset": 50})
|
|
64
|
+
data = json.loads(result["content"][0]["text"])
|
|
65
|
+
assert len(data["tables"]) == 10
|
|
66
|
+
assert data["total"] == 60
|
|
67
|
+
assert "next_offset" not in data # last page
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_list_tables_small_set_no_next(
|
|
72
|
+
fixtures_dir,
|
|
73
|
+
) -> None:
|
|
74
|
+
dc = DataContract.from_yaml(fixtures_dir / "minimal_contract.yml")
|
|
75
|
+
tools = create_tools(dc)
|
|
76
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
77
|
+
result = await tool.callable({})
|
|
78
|
+
data = json.loads(result["content"][0]["text"])
|
|
79
|
+
assert data["total"] == 1
|
|
80
|
+
assert "next_offset" not in data
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
metrics:
|
|
2
|
-
- name: total_revenue
|
|
3
|
-
description: "Total revenue from completed orders"
|
|
4
|
-
sql_expression: "SUM(amount) FILTER (WHERE status = 'completed')"
|
|
5
|
-
source_model: analytics.orders
|
|
6
|
-
filters:
|
|
7
|
-
- "status = 'completed'"
|
|
8
|
-
- name: revenue_by_region
|
|
9
|
-
description: "Revenue broken down by customer region"
|
|
10
|
-
sql_expression: "SUM(o.amount) GROUP BY c.region"
|
|
11
|
-
source_model: analytics.orders
|
|
12
|
-
filters:
|
|
13
|
-
- "o.status = 'completed'"
|
|
14
|
-
|
|
15
|
-
tables:
|
|
16
|
-
- schema: analytics
|
|
17
|
-
table: orders
|
|
18
|
-
columns:
|
|
19
|
-
- name: id
|
|
20
|
-
type: INTEGER
|
|
21
|
-
description: "Order ID"
|
|
22
|
-
- name: customer_id
|
|
23
|
-
type: INTEGER
|
|
24
|
-
description: "FK to customers"
|
|
25
|
-
- name: amount
|
|
26
|
-
type: DECIMAL
|
|
27
|
-
description: "Order total in USD"
|
|
28
|
-
- name: status
|
|
29
|
-
type: VARCHAR
|
|
30
|
-
description: "pending, completed, cancelled"
|
|
31
|
-
- name: tenant_id
|
|
32
|
-
type: VARCHAR
|
|
33
|
-
description: "Tenant identifier"
|
|
34
|
-
- name: created_at
|
|
35
|
-
type: DATE
|
|
36
|
-
description: "Order date"
|
|
37
|
-
- schema: analytics
|
|
38
|
-
table: customers
|
|
39
|
-
columns:
|
|
40
|
-
- name: id
|
|
41
|
-
type: INTEGER
|
|
42
|
-
description: "Customer ID"
|
|
43
|
-
- name: name
|
|
44
|
-
type: VARCHAR
|
|
45
|
-
description: "Customer name"
|
|
46
|
-
- name: region
|
|
47
|
-
type: VARCHAR
|
|
48
|
-
description: "Geographic region"
|
|
49
|
-
- name: tenant_id
|
|
50
|
-
type: VARCHAR
|
|
51
|
-
description: "Tenant identifier"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/examples/revenue_agent/agent.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/examples/revenue_agent/setup_db.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/minimal_contract.yml
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/sample_cube_schema.yml
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/fixtures/valid_contract.yml
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_adapters/__init__.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_adapters/test_duckdb.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_bridge/test_compiler.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_contract.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_schema.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_sdk_config.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_core/test_session.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/__init__.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_cube.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_dbt.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_search.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_auto_load.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_factory.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_tools/test_middleware.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/__init__.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/test_checkers.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.4 → agentic_data_contracts-0.2.6}/tests/test_validation/test_explain.py
RENAMED
|
File without changes
|
|
File without changes
|