PyPI - agentic-data-contracts - Versions diffs - 0.9.0__tar.gz → 0.9.2__tar.gz - Mend

agentic-data-contracts 0.9.0tar.gz → 0.9.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,25 @@
 All notable changes to this project will be documented in this file.
+## [0.9.2] - 2026-04-15
+### Fixed
+- **Lazy session timer**: `ContractSession` no longer starts its wall-clock timer at construction. The timer now starts on the first `check_limits()` call, so idle time before the user's first interaction does not count against `temporal.max_duration_seconds`. This fixes premature "session expired" errors in long-lived agent setups (Chainlit, Webex bots) where the session object is created well before the first user message. (#16)
+### Added
+- **`ContractSession.reset_timer()`**: New method that resets the duration timer so it restarts on the next `check_limits()` call. Useful for frameworks with their own idle-timeout mechanisms that want to restart the clock on user activity.
+## [0.9.1] - 2026-04-13
+### Added
+- **Schema `description` field**: Optional description on `AllowedTable` entries, surfaced via `list_schemas` to help agents understand what each schema contains and when to use it.
+- **Schema `preferred` flag**: Optional boolean on `AllowedTable` (default `false`), surfaced via `list_schemas` to signal which schema the agent should prefer when similar tables exist across schemas.
+- **Example improvements**: Revenue agent example updated with `lookup_domain` and `lookup_metric` demo steps, schema description/preferred in contract, and fixed pre-existing missing `query_check` blocks on `tenant_isolation` and `no_select_star` rules.
+- **Domain-driven README**: README reframed around the domain-driven approach — agents understand business domains before writing SQL.
 ## [0.9.0] - 2026-04-13
 ### Added

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/CLAUDE.md RENAMED Viewed

@@ -18,7 +18,7 @@
 src/agentic_data_contracts/
 ├── core/          # YAML loading, Pydantic models, lightweight enforcement
 ├── validation/    # sqlglot checkers, Validator (Layer 1 + 2), EXPLAIN protocol
-├── tools/         # 11-tool factory + middleware for Claude Agent SDK
+├── tools/         # 12-tool factory + middleware for Claude Agent SDK
 ├── semantic/      # dbt/Cube/YAML source integrations
 ├── adapters/      # DatabaseAdapter protocol + DuckDB implementation
 └── bridge/        # Optional ai-agent-contracts compilation

agentic_data_contracts-0.9.0/README.md → agentic_data_contracts-0.9.2/PKG-INFO RENAMED Viewed

@@ -1,3 +1,56 @@
+Metadata-Version: 2.4
+Name: agentic-data-contracts
+Version: 0.9.2
+Summary: YAML-first, domain-driven data governance for AI agents
+Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
+Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
+Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
+Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
+Author-email: Qing <qingye779@gmail.com>
+License: MIT
+License-File: LICENSE
+Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,domain-driven,llm,semantic-layer,sql-validation
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Database
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.12
+Requires-Dist: pydantic>=2.0
+Requires-Dist: pyyaml>=6.0.3
+Requires-Dist: sqlglot>=23.0
+Requires-Dist: thefuzz>=0.22.1
+Provides-Extra: agent-contracts
+Requires-Dist: ai-agent-contracts>=0.3.0; extra == 'agent-contracts'
+Provides-Extra: agent-sdk
+Requires-Dist: claude-agent-sdk>=0.1.58; extra == 'agent-sdk'
+Provides-Extra: all
+Requires-Dist: ai-agent-contracts>=0.3.0; extra == 'all'
+Requires-Dist: claude-agent-sdk>=0.1.58; extra == 'all'
+Requires-Dist: duckdb; extra == 'all'
+Requires-Dist: google-cloud-bigquery; extra == 'all'
+Requires-Dist: psycopg2-binary; extra == 'all'
+Requires-Dist: snowflake-connector-python; extra == 'all'
+Provides-Extra: bigquery
+Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
+Provides-Extra: dev
+Requires-Dist: duckdb; extra == 'dev'
+Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
+Requires-Dist: pytest-cov>=7.1.0; extra == 'dev'
+Requires-Dist: pytest>=9.0.3; extra == 'dev'
+Requires-Dist: ruff>=0.15.10; extra == 'dev'
+Provides-Extra: duckdb
+Requires-Dist: duckdb; extra == 'duckdb'
+Provides-Extra: postgres
+Requires-Dist: psycopg2-binary; extra == 'postgres'
+Provides-Extra: snowflake
+Requires-Dist: snowflake-connector-python; extra == 'snowflake'
+Description-Content-Type: text/markdown
 # agentic-data-contracts
 [![PyPI version](https://img.shields.io/pypi/v/agentic-data-contracts.svg)](https://pypi.org/project/agentic-data-contracts/)
@@ -5,16 +58,29 @@
 [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://www.python.org/downloads/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-**Stop your AI agents from running wild on your data.**
+**YAML-first, domain-driven data governance for AI agents.**
-`agentic-data-contracts` lets data engineers define governance contracts in YAML — what tables an agent may query, which operations are forbidden, what resource limits apply — and enforces them automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
+`agentic-data-contracts` takes a domain-driven approach to AI agent governance: instead of letting agents figure out your data landscape by trial and error, you teach them your business domains, metrics, and rules upfront — in YAML. The agent starts by understanding *what* a business domain means, then discovers *which* metrics to use, then builds queries that comply with your governance rules. All enforced automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
-**Why?** AI agents querying databases face two problems: **resource runaway** (unbounded compute, endless retries, cost overruns) and **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions). This library addresses both with a single YAML contract.
+**Why domain-driven?** AI agents querying databases face three problems: **resource runaway** (unbounded compute, endless retries, cost overruns), **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions), and **lack of business context** (the agent doesn't know what "revenue" means in *your* company). This library addresses all three with a single YAML contract that combines governance rules with business domain knowledge.
 **Works with:** [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk-python) (primary target), or any Python agent framework. Optionally integrates with [ai-agent-contracts](https://pypi.org/project/ai-agent-contracts/) for formal resource governance.
 ## How It Works
+The agent follows a domain-driven workflow — understanding business context before writing SQL:
+```
+1. Agent receives: "How is revenue trending?"
+2. lookup_domain("revenue")     → "Revenue is recognized at fulfillment, not booking"
+3. lookup_metric("total_revenue") → SUM(amount) FILTER (WHERE status = 'completed')
+4. Agent writes SQL using the metric definition
+5. validate_query(sql)          → VALID (passes all contract rules)
+6. run_query(sql)               → results returned
+```
+Governance rules are enforced automatically at query time:
 ```
 Agent: "SELECT * FROM analytics.orders"
   -> BLOCKED (no SELECT * — specify explicit columns)
@@ -24,12 +90,9 @@ Agent: "SELECT order_id, amount FROM analytics.orders"
 Agent: "SELECT order_id, amount FROM analytics.orders WHERE tenant_id = 'acme'"
   -> PASSED + WARN (consider using semantic revenue definition)
-Agent: "DELETE FROM analytics.orders WHERE id = 1"
-  -> BLOCKED (forbidden operation: DELETE)
 ```
-The contract defines the rules. The library enforces them — before the query ever reaches the database.
+The contract defines the domains, metrics, and rules. The library enforces them — before the query ever reaches the database.
 ## Installation
@@ -64,10 +127,19 @@ semantic:
     path: "./semantic.yml"
   allowed_tables:
     - schema: analytics
+      description: "Curated analytics tables — prefer for reporting"
+      preferred: true
       tables: ["*"]          # all tables in schema (discovered from database)
     - schema: marketing
       tables: [campaigns]    # or list specific tables
   forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
+  domains:
+    - name: revenue
+      summary: "Financial metrics from completed orders"
+      description: >
+        Revenue is recognized at fulfillment, not at booking.
+        Excludes refunds and chargebacks unless stated.
+      metrics: [total_revenue]
   rules:
     - name: tenant_isolation
       description: "All queries must filter by tenant_id"
@@ -161,7 +233,7 @@ asyncio.run(demo())
 | Tool | Description |
 |------|-------------|
-| `list_schemas` | List all allowed database schemas from the contract |
+| `list_schemas` | List allowed schemas with descriptions and preferred flags |
 | `list_tables` | List allowed tables, optionally filtered by schema |
 | `describe_table` | Get full column details for an allowed table |
 | `preview_table` | Preview sample rows from an allowed table |
@@ -174,6 +246,57 @@ asyncio.run(demo())
 | `run_query` | Validate and execute a SQL query, returning results |
 | `get_contract_info` | Get the full contract: rules, limits, domains, and session status |
+## Domain-Driven Agent Workflow
+The core design principle: **agents should understand the business domain before writing SQL.** Instead of dumping table schemas and hoping for the best, the contract teaches the agent your business vocabulary through progressive disclosure:
+```
+1. Domain context     →  "What does 'revenue' mean here?"
+2. Metric definitions →  "How is 'total_revenue' calculated?"
+3. Query execution    →  "Run the validated SQL"
+```
+### Defining domains
+Each domain carries a description that teaches the agent your business rules — things the SQL alone can't express:
+```yaml
+semantic:
+  domains:
+    - name: acquisition
+      summary: "Customer acquisition costs and conversion metrics"
+      description: >
+        Acquisition metrics track the cost and efficiency of
+        acquiring new customers across all channels.
+        CAC is calculated using fully-loaded cost, not just ad spend.
+      metrics: [CAC, CPA, CPL, click_through_rate]
+    - name: retention
+      summary: "Customer retention, churn, and lifetime value"
+      description: >
+        Retention metrics measure how well we keep customers.
+        Churn is measured on a 30-day rolling window.
+        A customer is "active" if they had at least one qualifying
+        action in the window.
+      metrics: [churn_rate, LTV, retention_30d]
+```
+### How the agent uses domains
+The system prompt gives the agent a compact domain index. When a user asks a domain-specific question, the agent explores progressively:
+```
+lookup_domain("acquisition")        → business context + metric descriptions
+lookup_metric("CAC")                → SQL expression, source table, filters
+lookup_metric("acquisition cost")   → fuzzy match, returns [CAC, CPA] as candidates
+list_metrics(domain="retention")    → all metrics in the retention domain
+```
+This means the agent knows that "revenue is recognized at fulfillment, not at booking" *before* it writes a single line of SQL — reducing hallucinated metrics and incorrect calculations.
+### Why progressive disclosure works
+This pattern — compact index in the prompt, detailed context on demand — is the same philosophy validated by agent skill systems, MCP tool servers, and RAG architectures. Instead of overloading the agent's context window with everything upfront, you give it just enough to know *where to look*, then let it pull details when needed. The result is better token efficiency, more focused reasoning, and fewer hallucinations from context overload.
 ## Contract Rules
 Rules are enforced at three levels:
@@ -342,36 +465,6 @@ dc = DataContract.from_yaml("contract.yml")
 print(dc.to_system_prompt(renderer=MarkdownRenderer()))
 ```
-## Business Domains
-Domains provide business context that helps agents understand *what* they're being asked about before getting into the mechanics of *how* to calculate it:
-```yaml
-semantic:
-  domains:
-    - name: acquisition
-      summary: "Customer acquisition costs and conversion metrics"
-      description: >
-        Acquisition metrics track the cost and efficiency of
-        acquiring new customers across all channels.
-      metrics: [CAC, CPA, CPL, click_through_rate]
-    - name: retention
-      summary: "Customer retention, churn, and lifetime value"
-      description: >
-        Retention metrics measure how well we keep customers.
-        Churn is measured on a 30-day rolling window.
-      metrics: [churn_rate, LTV, retention_30d]
-```
-The system prompt shows a compact domain index. The agent uses `lookup_domain` for business context, then `lookup_metric` for SQL definitions:
-```
-lookup_domain("acquisition")        → full description + metrics with descriptions
-lookup_metric("CAC")                → exact match, SQL definition
-lookup_metric("acquisition cost")   → fuzzy match, returns [CAC, CPA] as candidates
-list_metrics(domain="retention")    → only retention metrics
-```
 ## Scaling to Large Organizations
 Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.

agentic_data_contracts-0.9.0/PKG-INFO → agentic_data_contracts-0.9.2/README.md RENAMED Viewed

@@ -1,56 +1,3 @@
-Metadata-Version: 2.4
-Name: agentic-data-contracts
-Version: 0.9.0
-Summary: YAML-first data contract governance for AI agents
-Project-URL: Homepage, https://github.com/flyersworder/agentic-data-contracts
-Project-URL: Repository, https://github.com/flyersworder/agentic-data-contracts
-Project-URL: Issues, https://github.com/flyersworder/agentic-data-contracts/issues
-Project-URL: Documentation, https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md
-Author-email: Qing <qingye779@gmail.com>
-License: MIT
-License-File: LICENSE
-Keywords: ai-agents,analytics,claude,data-contracts,data-governance,dbt,llm,sql-validation
-Classifier: Development Status :: 4 - Beta
-Classifier: Intended Audience :: Developers
-Classifier: Intended Audience :: Science/Research
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Classifier: Topic :: Database
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.12
-Requires-Dist: pydantic>=2.0
-Requires-Dist: pyyaml>=6.0
-Requires-Dist: sqlglot>=23.0
-Requires-Dist: thefuzz>=0.22.1
-Provides-Extra: agent-contracts
-Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'agent-contracts'
-Provides-Extra: agent-sdk
-Requires-Dist: claude-agent-sdk>=0.1.52; extra == 'agent-sdk'
-Provides-Extra: all
-Requires-Dist: ai-agent-contracts>=0.2.0; extra == 'all'
-Requires-Dist: claude-agent-sdk>=0.1.52; extra == 'all'
-Requires-Dist: duckdb; extra == 'all'
-Requires-Dist: google-cloud-bigquery; extra == 'all'
-Requires-Dist: psycopg2-binary; extra == 'all'
-Requires-Dist: snowflake-connector-python; extra == 'all'
-Provides-Extra: bigquery
-Requires-Dist: google-cloud-bigquery; extra == 'bigquery'
-Provides-Extra: dev
-Requires-Dist: duckdb; extra == 'dev'
-Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
-Requires-Dist: pytest-cov>=6.0; extra == 'dev'
-Requires-Dist: pytest>=8.0; extra == 'dev'
-Requires-Dist: ruff>=0.8.0; extra == 'dev'
-Provides-Extra: duckdb
-Requires-Dist: duckdb; extra == 'duckdb'
-Provides-Extra: postgres
-Requires-Dist: psycopg2-binary; extra == 'postgres'
-Provides-Extra: snowflake
-Requires-Dist: snowflake-connector-python; extra == 'snowflake'
-Description-Content-Type: text/markdown
 # agentic-data-contracts
 [![PyPI version](https://img.shields.io/pypi/v/agentic-data-contracts.svg)](https://pypi.org/project/agentic-data-contracts/)
@@ -58,16 +5,29 @@ Description-Content-Type: text/markdown
 [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://www.python.org/downloads/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-**Stop your AI agents from running wild on your data.**
+**YAML-first, domain-driven data governance for AI agents.**
-`agentic-data-contracts` lets data engineers define governance contracts in YAML — what tables an agent may query, which operations are forbidden, what resource limits apply — and enforces them automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
+`agentic-data-contracts` takes a domain-driven approach to AI agent governance: instead of letting agents figure out your data landscape by trial and error, you teach them your business domains, metrics, and rules upfront — in YAML. The agent starts by understanding *what* a business domain means, then discovers *which* metrics to use, then builds queries that comply with your governance rules. All enforced automatically at query time via SQL validation powered by [sqlglot](https://github.com/tobymao/sqlglot).
-**Why?** AI agents querying databases face two problems: **resource runaway** (unbounded compute, endless retries, cost overruns) and **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions). This library addresses both with a single YAML contract.
+**Why domain-driven?** AI agents querying databases face three problems: **resource runaway** (unbounded compute, endless retries, cost overruns), **semantic inconsistency** (wrong tables, missing filters, ad-hoc metric definitions), and **lack of business context** (the agent doesn't know what "revenue" means in *your* company). This library addresses all three with a single YAML contract that combines governance rules with business domain knowledge.
 **Works with:** [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk-python) (primary target), or any Python agent framework. Optionally integrates with [ai-agent-contracts](https://pypi.org/project/ai-agent-contracts/) for formal resource governance.
 ## How It Works
+The agent follows a domain-driven workflow — understanding business context before writing SQL:
+```
+1. Agent receives: "How is revenue trending?"
+2. lookup_domain("revenue")     → "Revenue is recognized at fulfillment, not booking"
+3. lookup_metric("total_revenue") → SUM(amount) FILTER (WHERE status = 'completed')
+4. Agent writes SQL using the metric definition
+5. validate_query(sql)          → VALID (passes all contract rules)
+6. run_query(sql)               → results returned
+```
+Governance rules are enforced automatically at query time:
 ```
 Agent: "SELECT * FROM analytics.orders"
   -> BLOCKED (no SELECT * — specify explicit columns)
@@ -77,12 +37,9 @@ Agent: "SELECT order_id, amount FROM analytics.orders"
 Agent: "SELECT order_id, amount FROM analytics.orders WHERE tenant_id = 'acme'"
   -> PASSED + WARN (consider using semantic revenue definition)
-Agent: "DELETE FROM analytics.orders WHERE id = 1"
-  -> BLOCKED (forbidden operation: DELETE)
 ```
-The contract defines the rules. The library enforces them — before the query ever reaches the database.
+The contract defines the domains, metrics, and rules. The library enforces them — before the query ever reaches the database.
 ## Installation
@@ -117,10 +74,19 @@ semantic:
     path: "./semantic.yml"
   allowed_tables:
     - schema: analytics
+      description: "Curated analytics tables — prefer for reporting"
+      preferred: true
       tables: ["*"]          # all tables in schema (discovered from database)
     - schema: marketing
       tables: [campaigns]    # or list specific tables
   forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
+  domains:
+    - name: revenue
+      summary: "Financial metrics from completed orders"
+      description: >
+        Revenue is recognized at fulfillment, not at booking.
+        Excludes refunds and chargebacks unless stated.
+      metrics: [total_revenue]
   rules:
     - name: tenant_isolation
       description: "All queries must filter by tenant_id"
@@ -214,7 +180,7 @@ asyncio.run(demo())
 | Tool | Description |
 |------|-------------|
-| `list_schemas` | List all allowed database schemas from the contract |
+| `list_schemas` | List allowed schemas with descriptions and preferred flags |
 | `list_tables` | List allowed tables, optionally filtered by schema |
 | `describe_table` | Get full column details for an allowed table |
 | `preview_table` | Preview sample rows from an allowed table |
@@ -227,6 +193,57 @@ asyncio.run(demo())
 | `run_query` | Validate and execute a SQL query, returning results |
 | `get_contract_info` | Get the full contract: rules, limits, domains, and session status |
+## Domain-Driven Agent Workflow
+The core design principle: **agents should understand the business domain before writing SQL.** Instead of dumping table schemas and hoping for the best, the contract teaches the agent your business vocabulary through progressive disclosure:
+```
+1. Domain context     →  "What does 'revenue' mean here?"
+2. Metric definitions →  "How is 'total_revenue' calculated?"
+3. Query execution    →  "Run the validated SQL"
+```
+### Defining domains
+Each domain carries a description that teaches the agent your business rules — things the SQL alone can't express:
+```yaml
+semantic:
+  domains:
+    - name: acquisition
+      summary: "Customer acquisition costs and conversion metrics"
+      description: >
+        Acquisition metrics track the cost and efficiency of
+        acquiring new customers across all channels.
+        CAC is calculated using fully-loaded cost, not just ad spend.
+      metrics: [CAC, CPA, CPL, click_through_rate]
+    - name: retention
+      summary: "Customer retention, churn, and lifetime value"
+      description: >
+        Retention metrics measure how well we keep customers.
+        Churn is measured on a 30-day rolling window.
+        A customer is "active" if they had at least one qualifying
+        action in the window.
+      metrics: [churn_rate, LTV, retention_30d]
+```
+### How the agent uses domains
+The system prompt gives the agent a compact domain index. When a user asks a domain-specific question, the agent explores progressively:
+```
+lookup_domain("acquisition")        → business context + metric descriptions
+lookup_metric("CAC")                → SQL expression, source table, filters
+lookup_metric("acquisition cost")   → fuzzy match, returns [CAC, CPA] as candidates
+list_metrics(domain="retention")    → all metrics in the retention domain
+```
+This means the agent knows that "revenue is recognized at fulfillment, not at booking" *before* it writes a single line of SQL — reducing hallucinated metrics and incorrect calculations.
+### Why progressive disclosure works
+This pattern — compact index in the prompt, detailed context on demand — is the same philosophy validated by agent skill systems, MCP tool servers, and RAG architectures. Instead of overloading the agent's context window with everything upfront, you give it just enough to know *where to look*, then let it pull details when needed. The result is better token efficiency, more focused reasoning, and fewer hallucinations from context overload.
 ## Contract Rules
 Rules are enforced at three levels:
@@ -395,36 +412,6 @@ dc = DataContract.from_yaml("contract.yml")
 print(dc.to_system_prompt(renderer=MarkdownRenderer()))
 ```
-## Business Domains
-Domains provide business context that helps agents understand *what* they're being asked about before getting into the mechanics of *how* to calculate it:
-```yaml
-semantic:
-  domains:
-    - name: acquisition
-      summary: "Customer acquisition costs and conversion metrics"
-      description: >
-        Acquisition metrics track the cost and efficiency of
-        acquiring new customers across all channels.
-      metrics: [CAC, CPA, CPL, click_through_rate]
-    - name: retention
-      summary: "Customer retention, churn, and lifetime value"
-      description: >
-        Retention metrics measure how well we keep customers.
-        Churn is measured on a 30-day rolling window.
-      metrics: [churn_rate, LTV, retention_30d]
-```
-The system prompt shows a compact domain index. The agent uses `lookup_domain` for business context, then `lookup_metric` for SQL definitions:
-```
-lookup_domain("acquisition")        → full description + metrics with descriptions
-lookup_metric("CAC")                → exact match, SQL definition
-lookup_metric("acquisition cost")   → fuzzy match, returns [CAC, CPA] as candidates
-list_metrics(domain="retention")    → only retention metrics
-```
 ## Scaling to Large Organizations
 Tested for 200+ tables, 300+ metrics, 50+ relationships across multiple schemas.

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/docs/architecture.md RENAMED Viewed

@@ -1,7 +1,7 @@
 # Agentic Data Contracts — Architecture
 **Date:** 2026-04-13
-**Status:** Implemented (v0.9.0)
+**Status:** Implemented (v0.9.2)
 **Author:** Qing Ye + Claude
 ## Problem Statement
@@ -93,6 +93,8 @@ semantic:
   # What the agent is allowed to access
   allowed_tables:
     - schema: analytics
+      description: "Curated analytics tables — prefer for reporting"
+      preferred: true                  # agent should prefer this schema
       tables: [orders, customers, subscriptions]
     - schema: raw
       tables: []                       # empty = nothing from this schema
@@ -186,7 +188,7 @@ When `ai-agent-contracts` is NOT installed, `ContractSession` provides self-cont
 - **Retry count** — incremented on each failed query attempt, checked against `max_retries`
 - **Token usage** — tracked via callback, checked against `token_budget`
-- **Wall-clock duration** — start time recorded, checked against `max_duration_seconds`
+- **Wall-clock duration** — lazy start on first `check_limits()` call (not at construction), checked against `max_duration_seconds`. Can be reset via `reset_timer()` for frameworks that manage their own idle timeouts.
 - **Cost estimate** — if EXPLAIN adapter returns cost info, checked against `cost_limit_usd`
 These are simple counters/timers with guard checks before each tool call. No formal state machine.

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/agent.py RENAMED Viewed

@@ -86,6 +86,18 @@ async def _run_demo(tools: list, prompt: str) -> None:
     print("\n=== Available Tables ===")
     print(result["content"][0]["text"])
+    # Domain discovery: understand the business context before querying
+    tool = next(t for t in tools if t.name == "lookup_domain")
+    result = await tool.callable({"name": "revenue"})
+    print("\n=== Lookup Domain (revenue) ===")
+    print(result["content"][0]["text"])
+    # Metric lookup: get the SQL definition for a specific metric
+    tool = next(t for t in tools if t.name == "lookup_metric")
+    result = await tool.callable({"metric_name": "total_revenue"})
+    print("\n=== Lookup Metric (total_revenue) ===")
+    print(result["content"][0]["text"])
     tool = next(t for t in tools if t.name == "validate_query")
     sql = (
         "SELECT c.region, SUM(o.amount) as revenue "

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/examples/revenue_agent/contract.yml RENAMED Viewed

@@ -7,20 +7,33 @@ semantic:
     path: "./semantic.yml"
   allowed_tables:
     - schema: analytics
+      description: "Curated analytics tables — prefer these for all reporting queries"
+      preferred: true
       tables: [orders, customers, subscriptions]
   forbidden_operations: [DELETE, DROP, TRUNCATE, UPDATE, INSERT]
   domains:
-    revenue: [total_revenue, revenue_by_region]
+    - name: revenue
+      summary: "Financial metrics from completed orders"
+      description: >
+        Revenue metrics track recognized revenue from completed orders.
+        Revenue is recognized at fulfillment, not at booking.
+        Excludes refunds, chargebacks, and pending orders unless stated.
+      metrics: [total_revenue, revenue_by_region]
+      tables: [analytics.orders, analytics.customers]
   rules:
     - name: tenant_isolation
       description: "All queries must filter by tenant_id"
       enforcement: block
+      query_check:
+        required_filter: tenant_id
     - name: use_semantic_revenue
       description: "Revenue calculations must use the metric definitions"
       enforcement: warn
     - name: no_select_star
       description: "Must specify explicit columns"
       enforcement: block
+      query_check:
+        no_select_star: true
 resources:
   cost_limit_usd: 5.00

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "agentic-data-contracts"
-version = "0.9.0"
-description = "YAML-first data contract governance for AI agents"
+version = "0.9.2"
+description = "YAML-first, domain-driven data governance for AI agents"
 readme = "README.md"
 requires-python = ">=3.12"
 authors = [{ name = "Qing", email = "qingye779@gmail.com" }]
@@ -10,9 +10,11 @@ keywords = [
     "ai-agents",
     "data-contracts",
     "data-governance",
+    "domain-driven",
     "sql-validation",
     "llm",
     "claude",
+    "semantic-layer",
     "analytics",
     "dbt",
 ]
@@ -30,7 +32,7 @@ classifiers = [
 dependencies = [
     "sqlglot>=23.0",
     "pydantic>=2.0",
-    "pyyaml>=6.0",
+    "pyyaml>=6.0.3",
     "thefuzz>=0.22.1",
 ]
@@ -41,17 +43,17 @@ Issues = "https://github.com/flyersworder/agentic-data-contracts/issues"
 Documentation = "https://github.com/flyersworder/agentic-data-contracts/blob/main/docs/architecture.md"
 [project.optional-dependencies]
-agent-sdk = ["claude-agent-sdk>=0.1.52"]
-agent-contracts = ["ai-agent-contracts>=0.2.0"]
+agent-sdk = ["claude-agent-sdk>=0.1.58"]
+agent-contracts = ["ai-agent-contracts>=0.3.0"]
 bigquery = ["google-cloud-bigquery"]
 snowflake = ["snowflake-connector-python"]
 postgres = ["psycopg2-binary"]
 duckdb = ["duckdb"]
 dev = [
-    "pytest>=8.0",
-    "pytest-asyncio>=0.23",
-    "pytest-cov>=6.0",
-    "ruff>=0.8.0",
+    "pytest>=9.0.3",
+    "pytest-asyncio>=1.3.0",
+    "pytest-cov>=7.1.0",
+    "ruff>=0.15.10",
     "duckdb",
 ]
 all = [

{agentic_data_contracts-0.9.0 → agentic_data_contracts-0.9.2}/src/agentic_data_contracts/core/schema.py RENAMED Viewed

@@ -22,6 +22,8 @@ class SemanticSource(BaseModel):
 class AllowedTable(BaseModel):
     schema_: str = Field(alias="schema")
     tables: list[str] = Field(default_factory=list)
+    description: str | None = None
+    preferred: bool = False
     model_config = {"populate_by_name": True}

agentic-data-contracts 0.9.0__tar.gz → 0.9.2__tar.gz

agentic-data-contracts 0.9.0tar.gz → 0.9.2tar.gz