agentic-data-contracts 0.2.5__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/CHANGELOG.md +8 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/PKG-INFO +13 -1
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/README.md +12 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/examples/revenue_agent/contract.yml +4 -1
- agentic_data_contracts-0.2.6/examples/revenue_agent/semantic.yml +22 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/pyproject.toml +1 -1
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/contract.py +50 -12
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/factory.py +29 -6
- agentic_data_contracts-0.2.6/tests/test_core/test_scalability.py +144 -0
- agentic_data_contracts-0.2.6/tests/test_tools/test_pagination.py +80 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/uv.lock +1 -1
- agentic_data_contracts-0.2.5/examples/revenue_agent/semantic.yml +0 -51
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/.github/dependabot.yml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/.github/workflows/ci.yml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/.gitignore +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/.pre-commit-config.yaml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/.python-version +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/CLAUDE.md +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/LICENSE +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/docs/architecture.md +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/examples/revenue_agent/agent.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/examples/revenue_agent/setup_db.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/adapters/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/adapters/base.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/adapters/duckdb.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/bridge/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/bridge/compiler.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/schema.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/core/session.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/py.typed +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/base.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/cube.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/dbt.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/semantic/yaml_source.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/middleware.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/tools/sdk.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/checkers.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/explain.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/validation/validator.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/conftest.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/minimal_contract.yml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/sample_cube_schema.yml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/sample_dbt_manifest.json +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/semantic_source.yml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/valid_contract.yml +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_adapters/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_adapters/test_duckdb.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_bridge/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_bridge/test_compiler.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_contract.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_load_semantic_source.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_schema.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_sdk_config.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_session.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_system_prompt_metrics.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_wildcard_tables.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_public_api.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_cube.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_dbt.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_relationships.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_search.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_yaml_source.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_auto_load.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_factory.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_middleware.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_sdk.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_semantic_tools.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_wildcard_tools.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/__init__.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/test_checkers.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/test_explain.py +0 -0
- {agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/test_validator.py +0 -0
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.6] - 2026-03-29
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- **Compact system prompt at scale**: When metrics exceed 20, the system prompt shows domain names with counts (e.g., "acquisition (45)") instead of listing every metric. Reduces prompt from ~6K to ~100 tokens for large metric sets.
|
|
10
|
+
- **Paginated `list_tables`**: Added `limit` (default 50) and `offset` parameters for handling schemas with many tables. Response includes `total` count and `next_offset` for pagination.
|
|
11
|
+
- **Cached wildcard resolution**: `resolve_tables()` is now idempotent — subsequent calls are no-ops, avoiding redundant database queries.
|
|
12
|
+
|
|
5
13
|
## [0.2.5] - 2026-03-29
|
|
6
14
|
|
|
7
15
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentic-data-contracts
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: YAML-first data contract governance for AI agents
|
|
5
5
|
Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
|
|
6
6
|
Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
|
|
@@ -314,6 +314,18 @@ lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candi
|
|
|
314
314
|
list_metrics(domain="retention") → only retention metrics
|
|
315
315
|
```
|
|
316
316
|
|
|
317
|
+
## Scaling to Large Organizations
|
|
318
|
+
|
|
319
|
+
Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
|
|
320
|
+
|
|
321
|
+
| Concern | How it scales |
|
|
322
|
+
|---|---|
|
|
323
|
+
| **System prompt size** | >20 metrics: auto-switches to compact domain counts (`acquisition (45)`) instead of listing every metric |
|
|
324
|
+
| **Table discovery** | `list_tables` is paginated (default 50, with offset). Use `schema` filter for targeted browsing |
|
|
325
|
+
| **Wildcard schemas** | `tables: ["*"]` discovers tables from the database. Resolution is cached — no repeated queries |
|
|
326
|
+
| **Metric lookup** | Fuzzy search via `thefuzz` (C++ backed) — sub-millisecond even with 1000+ metrics |
|
|
327
|
+
| **SQL validation** | Set-based allowlist check — O(1) per table reference regardless of allowlist size |
|
|
328
|
+
|
|
317
329
|
## Resource Limits
|
|
318
330
|
|
|
319
331
|
```yaml
|
|
@@ -261,6 +261,18 @@ lookup_metric("acquisition cost") → fuzzy match, returns [CAC, CPA] as candi
|
|
|
261
261
|
list_metrics(domain="retention") → only retention metrics
|
|
262
262
|
```
|
|
263
263
|
|
|
264
|
+
## Scaling to Large Organizations
|
|
265
|
+
|
|
266
|
+
Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.
|
|
267
|
+
|
|
268
|
+
| Concern | How it scales |
|
|
269
|
+
|---|---|
|
|
270
|
+
| **System prompt size** | >20 metrics: auto-switches to compact domain counts (`acquisition (45)`) instead of listing every metric |
|
|
271
|
+
| **Table discovery** | `list_tables` is paginated (default 50, with offset). Use `schema` filter for targeted browsing |
|
|
272
|
+
| **Wildcard schemas** | `tables: ["*"]` discovers tables from the database. Resolution is cached — no repeated queries |
|
|
273
|
+
| **Metric lookup** | Fuzzy search via `thefuzz` (C++ backed) — sub-millisecond even with 1000+ metrics |
|
|
274
|
+
| **SQL validation** | Set-based allowlist check — O(1) per table reference regardless of allowlist size |
|
|
275
|
+
|
|
264
276
|
## Resource Limits
|
|
265
277
|
|
|
266
278
|
```yaml
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/examples/revenue_agent/contract.yml
RENAMED
|
@@ -9,12 +9,15 @@ semantic:
|
|
|
9
9
|
- schema: analytics
|
|
10
10
|
tables: [orders, customers, subscriptions]
|
|
11
11
|
forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
|
|
12
|
+
domains:
|
|
13
|
+
revenue: [total_revenue, revenue_by_region]
|
|
12
14
|
rules:
|
|
13
15
|
- name: tenant_isolation
|
|
14
16
|
description: "All queries must filter by tenant_id"
|
|
15
17
|
enforcement: block
|
|
18
|
+
filter_column: tenant_id
|
|
16
19
|
- name: use_semantic_revenue
|
|
17
|
-
description: "Revenue calculations must use the
|
|
20
|
+
description: "Revenue calculations must use the metric definitions"
|
|
18
21
|
enforcement: warn
|
|
19
22
|
- name: no_select_star
|
|
20
23
|
description: "Must specify explicit columns"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Semantic source — define only what the database can't tell the agent.
|
|
2
|
+
# Table columns are discovered at runtime via the describe_table tool.
|
|
3
|
+
|
|
4
|
+
metrics:
|
|
5
|
+
- name: total_revenue
|
|
6
|
+
description: "Total revenue from completed orders"
|
|
7
|
+
sql_expression: "SUM(amount) FILTER (WHERE status = 'completed')"
|
|
8
|
+
source_model: analytics.orders
|
|
9
|
+
filters:
|
|
10
|
+
- "status = 'completed'"
|
|
11
|
+
|
|
12
|
+
- name: revenue_by_region
|
|
13
|
+
description: "Revenue broken down by customer region"
|
|
14
|
+
sql_expression: "SUM(o.amount) GROUP BY c.region"
|
|
15
|
+
source_model: analytics.orders
|
|
16
|
+
filters:
|
|
17
|
+
- "o.status = 'completed'"
|
|
18
|
+
|
|
19
|
+
relationships:
|
|
20
|
+
- from: analytics.orders.customer_id
|
|
21
|
+
to: analytics.customers.id
|
|
22
|
+
type: many_to_one
|
|
@@ -23,6 +23,7 @@ class DataContract:
|
|
|
23
23
|
|
|
24
24
|
def __init__(self, schema: DataContractSchema) -> None:
|
|
25
25
|
self.schema = schema
|
|
26
|
+
self._tables_resolved: bool = False
|
|
26
27
|
|
|
27
28
|
@property
|
|
28
29
|
def name(self) -> str:
|
|
@@ -43,16 +44,18 @@ class DataContract:
|
|
|
43
44
|
"""Check if any schema uses wildcard ('*') for tables."""
|
|
44
45
|
return any("*" in entry.tables for entry in self.schema.semantic.allowed_tables)
|
|
45
46
|
|
|
46
|
-
def resolve_tables(self, adapter: DatabaseAdapter) -> None:
|
|
47
|
+
def resolve_tables(self, adapter: DatabaseAdapter, *, force: bool = False) -> None:
|
|
47
48
|
"""Expand wildcard tables using the database adapter.
|
|
48
49
|
|
|
49
50
|
Replaces ["*"] entries with actual table names from the database.
|
|
50
|
-
|
|
51
|
-
on the schema object.
|
|
51
|
+
Results are cached — subsequent calls are no-ops unless force=True.
|
|
52
52
|
"""
|
|
53
|
+
if self._tables_resolved and not force:
|
|
54
|
+
return
|
|
53
55
|
for entry in self.schema.semantic.allowed_tables:
|
|
54
56
|
if "*" in entry.tables:
|
|
55
57
|
entry.tables = adapter.list_tables(entry.schema_)
|
|
58
|
+
self._tables_resolved = True
|
|
56
59
|
|
|
57
60
|
def allowed_table_names(self) -> list[str]:
|
|
58
61
|
names: list[str] = []
|
|
@@ -204,6 +207,10 @@ class DataContract:
|
|
|
204
207
|
|
|
205
208
|
return "\n".join(sections)
|
|
206
209
|
|
|
210
|
+
# Max metrics to list individually in system prompt before switching
|
|
211
|
+
# to compact domain-only summaries.
|
|
212
|
+
METRIC_DETAIL_THRESHOLD = 20
|
|
213
|
+
|
|
207
214
|
def _build_metrics_section(
|
|
208
215
|
self, semantic_source: SemanticSource | None
|
|
209
216
|
) -> str | None:
|
|
@@ -216,11 +223,27 @@ class DataContract:
|
|
|
216
223
|
|
|
217
224
|
domains = self.schema.semantic.domains
|
|
218
225
|
lines: list[str] = []
|
|
219
|
-
|
|
220
|
-
"\n### Available Metrics (use lookup_metric for full SQL definitions)"
|
|
221
|
-
)
|
|
226
|
+
compact = len(metrics) > self.METRIC_DETAIL_THRESHOLD
|
|
222
227
|
|
|
223
|
-
if domains:
|
|
228
|
+
if compact and domains:
|
|
229
|
+
# Large metric set with domains — show counts only
|
|
230
|
+
lines.append("\n### Available Metrics")
|
|
231
|
+
metric_names = {m.name for m in metrics}
|
|
232
|
+
domain_parts = []
|
|
233
|
+
for domain, names in domains.items():
|
|
234
|
+
count = sum(1 for n in names if n in metric_names)
|
|
235
|
+
if count:
|
|
236
|
+
domain_parts.append(f"{domain} ({count})")
|
|
237
|
+
lines.append(f"Domains: {', '.join(domain_parts)}")
|
|
238
|
+
lines.append(
|
|
239
|
+
'\nUse list_metrics(domain="...") to browse,'
|
|
240
|
+
' lookup_metric("...") to get SQL definitions.'
|
|
241
|
+
)
|
|
242
|
+
elif domains:
|
|
243
|
+
# Small metric set with domains — list with descriptions
|
|
244
|
+
lines.append(
|
|
245
|
+
"\n### Available Metrics (use lookup_metric for full SQL definitions)"
|
|
246
|
+
)
|
|
224
247
|
metric_map = {m.name: m for m in metrics}
|
|
225
248
|
for domain, names in domains.items():
|
|
226
249
|
entries = []
|
|
@@ -230,12 +253,27 @@ class DataContract:
|
|
|
230
253
|
entries.append(f"{m.name} \u2014 {m.description}")
|
|
231
254
|
if entries:
|
|
232
255
|
lines.append(f"**{domain}:** {', '.join(entries)}")
|
|
256
|
+
lines.append(
|
|
257
|
+
"\nUse the lookup_metric tool to get the SQL definition"
|
|
258
|
+
" before computing any KPI."
|
|
259
|
+
)
|
|
260
|
+
elif compact:
|
|
261
|
+
# Large metric set without domains — just show count
|
|
262
|
+
lines.append("\n### Available Metrics")
|
|
263
|
+
lines.append(f"{len(metrics)} metrics available.")
|
|
264
|
+
lines.append(
|
|
265
|
+
"\nUse list_metrics() to browse,"
|
|
266
|
+
' lookup_metric("...") to get SQL definitions.'
|
|
267
|
+
)
|
|
233
268
|
else:
|
|
269
|
+
# Small metric set without domains — list all
|
|
270
|
+
lines.append(
|
|
271
|
+
"\n### Available Metrics (use lookup_metric for full SQL definitions)"
|
|
272
|
+
)
|
|
234
273
|
for m in metrics:
|
|
235
274
|
lines.append(f"- {m.name} \u2014 {m.description}")
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
)
|
|
275
|
+
lines.append(
|
|
276
|
+
"\nUse the lookup_metric tool to get the SQL definition"
|
|
277
|
+
" before computing any KPI."
|
|
278
|
+
)
|
|
241
279
|
return "\n".join(lines)
|
|
@@ -60,14 +60,23 @@ def create_tools(
|
|
|
60
60
|
# ── Tool 2: list_tables ───────────────────────────────────────────────────
|
|
61
61
|
async def list_tables(args: dict[str, Any]) -> dict[str, Any]:
|
|
62
62
|
schema_filter = args.get("schema")
|
|
63
|
-
|
|
63
|
+
try:
|
|
64
|
+
limit = max(1, int(args.get("limit", 50)))
|
|
65
|
+
except (ValueError, TypeError):
|
|
66
|
+
limit = 50
|
|
67
|
+
try:
|
|
68
|
+
offset = max(0, int(args.get("offset", 0)))
|
|
69
|
+
except (ValueError, TypeError):
|
|
70
|
+
offset = 0
|
|
71
|
+
all_tables: list[dict[str, Any]] = []
|
|
64
72
|
for entry in contract.schema.semantic.allowed_tables:
|
|
65
73
|
if schema_filter and entry.schema_ != schema_filter:
|
|
66
74
|
continue
|
|
67
75
|
if "*" in entry.tables:
|
|
68
76
|
return _text_response(
|
|
69
77
|
f"Schema '{entry.schema_}' uses wildcard tables"
|
|
70
|
-
" but no database adapter is available
|
|
78
|
+
" but no database adapter is available"
|
|
79
|
+
" to resolve them."
|
|
71
80
|
)
|
|
72
81
|
for table in entry.tables:
|
|
73
82
|
info: dict[str, Any] = {
|
|
@@ -78,8 +87,13 @@ def create_tools(
|
|
|
78
87
|
ts = semantic_source.get_table_schema(entry.schema_, table)
|
|
79
88
|
if ts is not None:
|
|
80
89
|
info["columns"] = [c.name for c in ts.columns]
|
|
81
|
-
|
|
82
|
-
|
|
90
|
+
all_tables.append(info)
|
|
91
|
+
total = len(all_tables)
|
|
92
|
+
page = all_tables[offset : offset + limit]
|
|
93
|
+
result: dict[str, Any] = {"tables": page, "total": total}
|
|
94
|
+
if offset + limit < total:
|
|
95
|
+
result["next_offset"] = offset + limit
|
|
96
|
+
return _text_response(json.dumps(result))
|
|
83
97
|
|
|
84
98
|
# ── Tool 3: describe_table ────────────────────────────────────────────────
|
|
85
99
|
async def describe_table(args: dict[str, Any]) -> dict[str, Any]:
|
|
@@ -321,7 +335,8 @@ def create_tools(
|
|
|
321
335
|
name="list_tables",
|
|
322
336
|
description=(
|
|
323
337
|
"List allowed tables, optionally filtered by schema. "
|
|
324
|
-
"Includes column names when semantic source is available."
|
|
338
|
+
"Includes column names when semantic source is available. "
|
|
339
|
+
"Paginated \u2014 use limit/offset for large schemas."
|
|
325
340
|
),
|
|
326
341
|
input_schema={
|
|
327
342
|
"type": "object",
|
|
@@ -329,7 +344,15 @@ def create_tools(
|
|
|
329
344
|
"schema": {
|
|
330
345
|
"type": "string",
|
|
331
346
|
"description": "Optional schema name to filter by",
|
|
332
|
-
}
|
|
347
|
+
},
|
|
348
|
+
"limit": {
|
|
349
|
+
"type": "integer",
|
|
350
|
+
"description": "Max tables to return (default 50)",
|
|
351
|
+
},
|
|
352
|
+
"offset": {
|
|
353
|
+
"type": "integer",
|
|
354
|
+
"description": "Skip first N tables (default 0)",
|
|
355
|
+
},
|
|
333
356
|
},
|
|
334
357
|
"required": [],
|
|
335
358
|
},
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Tests for scalability improvements: compact prompt, pagination, caching."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock
|
|
4
|
+
|
|
5
|
+
from agentic_data_contracts.adapters.base import DatabaseAdapter
|
|
6
|
+
from agentic_data_contracts.core.contract import DataContract
|
|
7
|
+
from agentic_data_contracts.core.schema import (
|
|
8
|
+
AllowedTable,
|
|
9
|
+
DataContractSchema,
|
|
10
|
+
SemanticConfig,
|
|
11
|
+
)
|
|
12
|
+
from agentic_data_contracts.semantic.base import MetricDefinition, Relationship
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FakeSemanticSource:
|
|
16
|
+
"""Fake source with configurable metric count."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, count: int) -> None:
|
|
19
|
+
self._metrics = [
|
|
20
|
+
MetricDefinition(
|
|
21
|
+
name=f"metric_{i}",
|
|
22
|
+
description=f"Description for metric {i}",
|
|
23
|
+
sql_expression=f"SUM(col_{i})",
|
|
24
|
+
)
|
|
25
|
+
for i in range(count)
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
def get_metrics(self) -> list[MetricDefinition]:
|
|
29
|
+
return list(self._metrics)
|
|
30
|
+
|
|
31
|
+
def get_metric(self, name: str) -> MetricDefinition | None:
|
|
32
|
+
for m in self._metrics:
|
|
33
|
+
if m.name == name:
|
|
34
|
+
return m
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
def get_table_schema(self, schema: str, table: str): # noqa: ANN201
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def search_metrics(self, query: str) -> list[MetricDefinition]:
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
def get_relationships(self) -> list[Relationship]:
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _make_contract_with_domains(
|
|
48
|
+
metric_names: list[str],
|
|
49
|
+
) -> DataContract:
|
|
50
|
+
domains = {
|
|
51
|
+
"domain_a": metric_names[: len(metric_names) // 2],
|
|
52
|
+
"domain_b": metric_names[len(metric_names) // 2 :],
|
|
53
|
+
}
|
|
54
|
+
schema = DataContractSchema(
|
|
55
|
+
name="test",
|
|
56
|
+
semantic=SemanticConfig(
|
|
57
|
+
allowed_tables=[
|
|
58
|
+
AllowedTable.model_validate({"schema": "public", "tables": ["t"]}),
|
|
59
|
+
],
|
|
60
|
+
domains=domains,
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
return DataContract(schema)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestCompactMetricPrompt:
|
|
67
|
+
def test_small_set_lists_all_metrics(self) -> None:
|
|
68
|
+
source = FakeSemanticSource(5)
|
|
69
|
+
dc = _make_contract_with_domains([f"metric_{i}" for i in range(5)])
|
|
70
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
71
|
+
# Should list individual metric descriptions
|
|
72
|
+
assert "metric_0 \u2014" in prompt
|
|
73
|
+
assert "metric_4 \u2014" in prompt
|
|
74
|
+
|
|
75
|
+
def test_large_set_shows_domain_counts(self) -> None:
|
|
76
|
+
source = FakeSemanticSource(30)
|
|
77
|
+
dc = _make_contract_with_domains([f"metric_{i}" for i in range(30)])
|
|
78
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
79
|
+
# Should NOT list individual metrics
|
|
80
|
+
assert "metric_0 \u2014" not in prompt
|
|
81
|
+
# Should show domain counts
|
|
82
|
+
assert "domain_a (15)" in prompt
|
|
83
|
+
assert "domain_b (15)" in prompt
|
|
84
|
+
assert "list_metrics" in prompt
|
|
85
|
+
|
|
86
|
+
def test_large_set_no_domains_shows_count(self) -> None:
|
|
87
|
+
source = FakeSemanticSource(30)
|
|
88
|
+
schema = DataContractSchema(
|
|
89
|
+
name="test",
|
|
90
|
+
semantic=SemanticConfig(
|
|
91
|
+
allowed_tables=[
|
|
92
|
+
AllowedTable.model_validate({"schema": "public", "tables": ["t"]}),
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
dc = DataContract(schema)
|
|
97
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
98
|
+
assert "30 metrics available" in prompt
|
|
99
|
+
assert "metric_0 \u2014" not in prompt
|
|
100
|
+
|
|
101
|
+
def test_threshold_boundary(self) -> None:
|
|
102
|
+
# Exactly at threshold — should still list individually
|
|
103
|
+
source = FakeSemanticSource(20)
|
|
104
|
+
schema = DataContractSchema(
|
|
105
|
+
name="test",
|
|
106
|
+
semantic=SemanticConfig(
|
|
107
|
+
allowed_tables=[
|
|
108
|
+
AllowedTable.model_validate({"schema": "public", "tables": ["t"]}),
|
|
109
|
+
],
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
dc = DataContract(schema)
|
|
113
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
114
|
+
assert "metric_0 \u2014" in prompt
|
|
115
|
+
|
|
116
|
+
# One above threshold — compact mode
|
|
117
|
+
source = FakeSemanticSource(21)
|
|
118
|
+
prompt = dc.to_system_prompt(semantic_source=source)
|
|
119
|
+
assert "metric_0 \u2014" not in prompt
|
|
120
|
+
assert "21 metrics available" in prompt
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestWildcardCaching:
|
|
124
|
+
def test_resolve_tables_caches(self) -> None:
|
|
125
|
+
dc = DataContract(
|
|
126
|
+
DataContractSchema(
|
|
127
|
+
name="test",
|
|
128
|
+
semantic=SemanticConfig(
|
|
129
|
+
allowed_tables=[
|
|
130
|
+
AllowedTable.model_validate({"schema": "s", "tables": ["*"]}),
|
|
131
|
+
],
|
|
132
|
+
),
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
mock_adapter = MagicMock(spec=DatabaseAdapter)
|
|
136
|
+
mock_adapter.list_tables.return_value = ["t1", "t2"]
|
|
137
|
+
|
|
138
|
+
dc.resolve_tables(mock_adapter)
|
|
139
|
+
assert "s.t1" in dc.allowed_table_names()
|
|
140
|
+
assert mock_adapter.list_tables.call_count == 1
|
|
141
|
+
|
|
142
|
+
# Second call should be a no-op
|
|
143
|
+
dc.resolve_tables(mock_adapter)
|
|
144
|
+
assert mock_adapter.list_tables.call_count == 1
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Tests for list_tables pagination."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from agentic_data_contracts.core.contract import DataContract
|
|
8
|
+
from agentic_data_contracts.core.schema import (
|
|
9
|
+
AllowedTable,
|
|
10
|
+
DataContractSchema,
|
|
11
|
+
SemanticConfig,
|
|
12
|
+
)
|
|
13
|
+
from agentic_data_contracts.tools.factory import create_tools
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def large_contract() -> DataContract:
|
|
18
|
+
"""Contract with many tables to test pagination."""
|
|
19
|
+
tables = [f"table_{i}" for i in range(60)]
|
|
20
|
+
schema = DataContractSchema(
|
|
21
|
+
name="test",
|
|
22
|
+
semantic=SemanticConfig(
|
|
23
|
+
allowed_tables=[
|
|
24
|
+
AllowedTable.model_validate({"schema": "analytics", "tables": tables}),
|
|
25
|
+
],
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
return DataContract(schema)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.asyncio
|
|
32
|
+
async def test_list_tables_default_limit(
|
|
33
|
+
large_contract: DataContract,
|
|
34
|
+
) -> None:
|
|
35
|
+
tools = create_tools(large_contract)
|
|
36
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
37
|
+
result = await tool.callable({})
|
|
38
|
+
data = json.loads(result["content"][0]["text"])
|
|
39
|
+
assert len(data["tables"]) == 50 # default limit
|
|
40
|
+
assert data["total"] == 60
|
|
41
|
+
assert data["next_offset"] == 50
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_list_tables_custom_limit(
|
|
46
|
+
large_contract: DataContract,
|
|
47
|
+
) -> None:
|
|
48
|
+
tools = create_tools(large_contract)
|
|
49
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
50
|
+
result = await tool.callable({"limit": 10})
|
|
51
|
+
data = json.loads(result["content"][0]["text"])
|
|
52
|
+
assert len(data["tables"]) == 10
|
|
53
|
+
assert data["total"] == 60
|
|
54
|
+
assert data["next_offset"] == 10
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_list_tables_with_offset(
|
|
59
|
+
large_contract: DataContract,
|
|
60
|
+
) -> None:
|
|
61
|
+
tools = create_tools(large_contract)
|
|
62
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
63
|
+
result = await tool.callable({"limit": 10, "offset": 50})
|
|
64
|
+
data = json.loads(result["content"][0]["text"])
|
|
65
|
+
assert len(data["tables"]) == 10
|
|
66
|
+
assert data["total"] == 60
|
|
67
|
+
assert "next_offset" not in data # last page
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_list_tables_small_set_no_next(
|
|
72
|
+
fixtures_dir,
|
|
73
|
+
) -> None:
|
|
74
|
+
dc = DataContract.from_yaml(fixtures_dir / "minimal_contract.yml")
|
|
75
|
+
tools = create_tools(dc)
|
|
76
|
+
tool = next(t for t in tools if t.name == "list_tables")
|
|
77
|
+
result = await tool.callable({})
|
|
78
|
+
data = json.loads(result["content"][0]["text"])
|
|
79
|
+
assert data["total"] == 1
|
|
80
|
+
assert "next_offset" not in data
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
metrics:
|
|
2
|
-
- name: total_revenue
|
|
3
|
-
description: "Total revenue from completed orders"
|
|
4
|
-
sql_expression: "SUM(amount) FILTER (WHERE status = 'completed')"
|
|
5
|
-
source_model: analytics.orders
|
|
6
|
-
filters:
|
|
7
|
-
- "status = 'completed'"
|
|
8
|
-
- name: revenue_by_region
|
|
9
|
-
description: "Revenue broken down by customer region"
|
|
10
|
-
sql_expression: "SUM(o.amount) GROUP BY c.region"
|
|
11
|
-
source_model: analytics.orders
|
|
12
|
-
filters:
|
|
13
|
-
- "o.status = 'completed'"
|
|
14
|
-
|
|
15
|
-
tables:
|
|
16
|
-
- schema: analytics
|
|
17
|
-
table: orders
|
|
18
|
-
columns:
|
|
19
|
-
- name: id
|
|
20
|
-
type: INTEGER
|
|
21
|
-
description: "Order ID"
|
|
22
|
-
- name: customer_id
|
|
23
|
-
type: INTEGER
|
|
24
|
-
description: "FK to customers"
|
|
25
|
-
- name: amount
|
|
26
|
-
type: DECIMAL
|
|
27
|
-
description: "Order total in USD"
|
|
28
|
-
- name: status
|
|
29
|
-
type: VARCHAR
|
|
30
|
-
description: "pending, completed, cancelled"
|
|
31
|
-
- name: tenant_id
|
|
32
|
-
type: VARCHAR
|
|
33
|
-
description: "Tenant identifier"
|
|
34
|
-
- name: created_at
|
|
35
|
-
type: DATE
|
|
36
|
-
description: "Order date"
|
|
37
|
-
- schema: analytics
|
|
38
|
-
table: customers
|
|
39
|
-
columns:
|
|
40
|
-
- name: id
|
|
41
|
-
type: INTEGER
|
|
42
|
-
description: "Customer ID"
|
|
43
|
-
- name: name
|
|
44
|
-
type: VARCHAR
|
|
45
|
-
description: "Customer name"
|
|
46
|
-
- name: region
|
|
47
|
-
type: VARCHAR
|
|
48
|
-
description: "Geographic region"
|
|
49
|
-
- name: tenant_id
|
|
50
|
-
type: VARCHAR
|
|
51
|
-
description: "Tenant identifier"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/examples/revenue_agent/agent.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/examples/revenue_agent/setup_db.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/src/agentic_data_contracts/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/minimal_contract.yml
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/sample_cube_schema.yml
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/semantic_source.yml
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/fixtures/valid_contract.yml
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_adapters/__init__.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_adapters/test_duckdb.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_bridge/test_compiler.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_contract.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_schema.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_sdk_config.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_core/test_session.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/__init__.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_cube.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_dbt.py
RENAMED
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_semantic/test_search.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_auto_load.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_factory.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_tools/test_middleware.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/__init__.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/test_checkers.py
RENAMED
|
File without changes
|
{agentic_data_contracts-0.2.5 → agentic_data_contracts-0.2.6}/tests/test_validation/test_explain.py
RENAMED
|
File without changes
|
|
File without changes
|