remdb 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/__init__.py +10 -1
- rem/agentic/context.py +13 -2
- rem/agentic/context_builder.py +45 -34
- rem/agentic/providers/pydantic_ai.py +302 -110
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/tools.py +76 -10
- rem/api/routers/auth.py +113 -10
- rem/api/routers/chat/child_streaming.py +22 -8
- rem/api/routers/chat/completions.py +3 -3
- rem/api/routers/chat/sse_events.py +3 -3
- rem/api/routers/chat/streaming.py +40 -45
- rem/api/routers/chat/streaming_utils.py +5 -7
- rem/api/routers/feedback.py +2 -2
- rem/api/routers/query.py +5 -14
- rem/cli/commands/ask.py +144 -33
- rem/cli/commands/experiments.py +1 -1
- rem/cli/commands/process.py +9 -1
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +1 -1
- rem/models/entities/session.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +1 -1
- rem/schemas/agents/test_orchestrator.yaml +42 -0
- rem/schemas/agents/test_structured_output.yaml +52 -0
- rem/services/content/providers.py +151 -49
- rem/services/postgres/repository.py +1 -0
- rem/services/rem/README.md +4 -3
- rem/services/rem/parser.py +7 -10
- rem/services/rem/service.py +47 -0
- rem/services/session/compression.py +7 -3
- rem/services/session/pydantic_messages.py +25 -7
- rem/services/session/reload.py +2 -1
- rem/settings.py +64 -7
- rem/sql/migrations/004_cache_system.sql +3 -1
- rem/utils/schema_loader.py +135 -103
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/METADATA +6 -5
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/RECORD +40 -37
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
|
@@ -35,66 +35,41 @@ Unique Design:
|
|
|
35
35
|
- Tools and resources loaded from MCP servers via schema config
|
|
36
36
|
- Stripped descriptions to avoid LLM schema bloat
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
model initialization overhead. At scale (100+ requests/sec), this becomes expensive.
|
|
38
|
+
Caching Implementation:
|
|
39
|
+
Agent instance caching is now implemented to reduce latency from repeated
|
|
40
|
+
agent creation. See the _agent_cache module-level variables and helpers.
|
|
42
41
|
|
|
43
|
-
|
|
42
|
+
Cache Features:
|
|
43
|
+
- LRU eviction when max size (50) exceeded
|
|
44
|
+
- 5-minute TTL for cache entries
|
|
45
|
+
- Thread-safe via asyncio.Lock
|
|
46
|
+
- Cache key: hash(schema) + model + user_id
|
|
44
47
|
|
|
48
|
+
Usage:
|
|
49
|
+
# Normal usage (cache enabled by default)
|
|
50
|
+
agent = await create_agent(context, agent_schema_override=schema)
|
|
51
|
+
|
|
52
|
+
# Bypass cache for testing
|
|
53
|
+
agent = await create_agent(context, use_cache=False)
|
|
54
|
+
|
|
55
|
+
# Clear cache
|
|
56
|
+
await clear_agent_cache() # Clear all
|
|
57
|
+
await clear_agent_cache("siggy") # Clear specific schema
|
|
58
|
+
|
|
59
|
+
# Monitor cache
|
|
60
|
+
stats = get_agent_cache_stats()
|
|
61
|
+
|
|
62
|
+
Future Improvements:
|
|
45
63
|
1. Schema Cache (see rem/utils/schema_loader.py TODO):
|
|
46
64
|
- Filesystem schemas: LRU cache, no TTL (immutable)
|
|
47
65
|
- Database schemas: TTL cache (5-15 min)
|
|
48
66
|
- Reduces disk I/O and DB queries
|
|
49
67
|
|
|
50
|
-
2. Model Instance Cache
|
|
51
|
-
- Cache Pydantic AI Model() instances
|
|
52
|
-
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
* Reuse tokenizer instances
|
|
56
|
-
* Faster model initialization
|
|
57
|
-
* Lower memory footprint
|
|
58
|
-
- Implementation:
|
|
59
|
-
```python
|
|
60
|
-
_model_cache: dict[tuple[str, str], Model] = {}
|
|
61
|
-
|
|
62
|
-
def get_or_create_model(model_name: str) -> Model:
|
|
63
|
-
cache_key = _parse_model_name(model_name) # ("anthropic", "claude-3-5-sonnet")
|
|
64
|
-
if cache_key not in _model_cache:
|
|
65
|
-
_model_cache[cache_key] = Model(model_name)
|
|
66
|
-
return _model_cache[cache_key]
|
|
67
|
-
```
|
|
68
|
-
- Considerations:
|
|
69
|
-
* Max cache size (LRU eviction, e.g., 20 models)
|
|
70
|
-
* Thread safety (asyncio.Lock for cache access)
|
|
71
|
-
* Model warmup on server startup for hot paths
|
|
72
|
-
* Clear cache on model config changes
|
|
73
|
-
|
|
74
|
-
3. Agent Instance Caching (Advanced):
|
|
75
|
-
- Cache complete Agent instances (model + schema + tools)
|
|
76
|
-
- Key: (schema_name, model_name) → Agent instance
|
|
77
|
-
- Benefits:
|
|
78
|
-
* Skip schema parsing and model creation entirely
|
|
79
|
-
* Fastest possible agent.run() latency
|
|
80
|
-
- Challenges:
|
|
81
|
-
* Agent state management (stateless required)
|
|
82
|
-
* Tool/resource updates (cache invalidation)
|
|
83
|
-
* Memory usage (agents are heavier than models)
|
|
84
|
-
- Recommendation: Start with Model cache, add Agent cache if profiling shows benefit
|
|
85
|
-
|
|
86
|
-
Profiling Targets (measure before optimizing):
|
|
87
|
-
- schema_loader.load_agent_schema() calls per request
|
|
88
|
-
- create_agent() execution time (model init overhead)
|
|
89
|
-
- Model() instance creation time by provider
|
|
90
|
-
- Agent.run() total latency breakdown
|
|
91
|
-
|
|
92
|
-
Related Files:
|
|
93
|
-
- rem/utils/schema_loader.py (schema caching TODO)
|
|
94
|
-
- rem/agentic/providers/pydantic_ai.py:339 (create_agent - this file)
|
|
95
|
-
- rem/services/schema_repository.py (database schema loading)
|
|
96
|
-
|
|
97
|
-
Priority: HIGH (blocks production scaling beyond 50 req/sec)
|
|
68
|
+
2. Model Instance Cache:
|
|
69
|
+
- Cache Pydantic AI Model() instances separately
|
|
70
|
+
- Would allow sharing models across different agent schemas
|
|
71
|
+
|
|
72
|
+
Priority: MEDIUM (agent cache handles the critical path)
|
|
98
73
|
|
|
99
74
|
4. Response Format Control (structured_output enhancement):
|
|
100
75
|
- Current: structured_output is bool (True=strict schema, False=free-form text)
|
|
@@ -147,6 +122,10 @@ Example Agent Schema:
|
|
|
147
122
|
}
|
|
148
123
|
"""
|
|
149
124
|
|
|
125
|
+
import asyncio
|
|
126
|
+
import hashlib
|
|
127
|
+
import json
|
|
128
|
+
import time
|
|
150
129
|
from typing import Any
|
|
151
130
|
|
|
152
131
|
from loguru import logger
|
|
@@ -169,6 +148,120 @@ from ..context import AgentContext
|
|
|
169
148
|
from ...settings import settings
|
|
170
149
|
|
|
171
150
|
|
|
151
|
+
# =============================================================================
|
|
152
|
+
# AGENT INSTANCE CACHE
|
|
153
|
+
# =============================================================================
|
|
154
|
+
# Caches AgentRuntime instances to avoid repeated MCP tool loading and agent
|
|
155
|
+
# creation overhead. Cache key is based on schema content hash + model name.
|
|
156
|
+
#
|
|
157
|
+
# Design:
|
|
158
|
+
# - LRU-style eviction when max size exceeded
|
|
159
|
+
# - Optional TTL for cache entries
|
|
160
|
+
# - Thread-safe via asyncio.Lock
|
|
161
|
+
# - Cache can be cleared manually or on schema updates
|
|
162
|
+
# =============================================================================
|
|
163
|
+
|
|
164
|
+
_agent_cache: dict[str, tuple["AgentRuntime", float]] = {} # key -> (agent, created_at)
|
|
165
|
+
_agent_cache_lock = asyncio.Lock()
|
|
166
|
+
_AGENT_CACHE_MAX_SIZE = 50 # Max cached agents
|
|
167
|
+
_AGENT_CACHE_TTL_SECONDS = 300 # 5 minutes TTL (0 = no TTL)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _compute_cache_key(
|
|
171
|
+
agent_schema: dict[str, Any] | None,
|
|
172
|
+
model: str,
|
|
173
|
+
user_id: str | None,
|
|
174
|
+
) -> str:
|
|
175
|
+
"""
|
|
176
|
+
Compute cache key for an agent configuration.
|
|
177
|
+
|
|
178
|
+
Key components:
|
|
179
|
+
- Schema content hash (captures prompt + tools + output schema)
|
|
180
|
+
- Model name
|
|
181
|
+
- User ID (tools may be user-scoped)
|
|
182
|
+
"""
|
|
183
|
+
# Hash the schema content for stable key
|
|
184
|
+
if agent_schema:
|
|
185
|
+
# Sort keys for deterministic hashing
|
|
186
|
+
schema_str = json.dumps(agent_schema, sort_keys=True)
|
|
187
|
+
schema_hash = hashlib.md5(schema_str.encode()).hexdigest()[:12]
|
|
188
|
+
else:
|
|
189
|
+
schema_hash = "no-schema"
|
|
190
|
+
|
|
191
|
+
user_part = user_id[:8] if user_id else "no-user"
|
|
192
|
+
return f"{schema_hash}:{model}:{user_part}"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
async def _get_cached_agent(cache_key: str) -> "AgentRuntime | None":
|
|
196
|
+
"""Get agent from cache if exists and not expired."""
|
|
197
|
+
async with _agent_cache_lock:
|
|
198
|
+
if cache_key in _agent_cache:
|
|
199
|
+
agent, created_at = _agent_cache[cache_key]
|
|
200
|
+
|
|
201
|
+
# Check TTL
|
|
202
|
+
if _AGENT_CACHE_TTL_SECONDS > 0:
|
|
203
|
+
age = time.time() - created_at
|
|
204
|
+
if age > _AGENT_CACHE_TTL_SECONDS:
|
|
205
|
+
del _agent_cache[cache_key]
|
|
206
|
+
logger.debug(f"Agent cache expired: {cache_key} (age={age:.1f}s)")
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
logger.debug(f"Agent cache hit: {cache_key}")
|
|
210
|
+
return agent
|
|
211
|
+
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
async def _cache_agent(cache_key: str, agent: "AgentRuntime") -> None:
|
|
216
|
+
"""Add agent to cache with LRU eviction."""
|
|
217
|
+
async with _agent_cache_lock:
|
|
218
|
+
# Evict oldest entries if at capacity
|
|
219
|
+
while len(_agent_cache) >= _AGENT_CACHE_MAX_SIZE:
|
|
220
|
+
# Find oldest entry
|
|
221
|
+
oldest_key = min(_agent_cache.keys(), key=lambda k: _agent_cache[k][1])
|
|
222
|
+
del _agent_cache[oldest_key]
|
|
223
|
+
logger.debug(f"Agent cache evicted: {oldest_key}")
|
|
224
|
+
|
|
225
|
+
_agent_cache[cache_key] = (agent, time.time())
|
|
226
|
+
logger.debug(f"Agent cached: {cache_key} (total={len(_agent_cache)})")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
async def clear_agent_cache(schema_name: str | None = None) -> int:
|
|
230
|
+
"""
|
|
231
|
+
Clear agent cache entries.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
schema_name: If provided, only clear entries for this schema.
|
|
235
|
+
If None, clear entire cache.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Number of entries cleared.
|
|
239
|
+
"""
|
|
240
|
+
async with _agent_cache_lock:
|
|
241
|
+
if schema_name is None:
|
|
242
|
+
count = len(_agent_cache)
|
|
243
|
+
_agent_cache.clear()
|
|
244
|
+
logger.info(f"Agent cache cleared: {count} entries")
|
|
245
|
+
return count
|
|
246
|
+
else:
|
|
247
|
+
# Clear entries matching schema name (in the hash)
|
|
248
|
+
keys_to_remove = [k for k in _agent_cache if schema_name in k]
|
|
249
|
+
for k in keys_to_remove:
|
|
250
|
+
del _agent_cache[k]
|
|
251
|
+
logger.info(f"Agent cache cleared for '{schema_name}': {len(keys_to_remove)} entries")
|
|
252
|
+
return len(keys_to_remove)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def get_agent_cache_stats() -> dict[str, Any]:
|
|
256
|
+
"""Get cache statistics for monitoring."""
|
|
257
|
+
return {
|
|
258
|
+
"size": len(_agent_cache),
|
|
259
|
+
"max_size": _AGENT_CACHE_MAX_SIZE,
|
|
260
|
+
"ttl_seconds": _AGENT_CACHE_TTL_SECONDS,
|
|
261
|
+
"keys": list(_agent_cache.keys()),
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
172
265
|
class AgentRuntime:
|
|
173
266
|
"""
|
|
174
267
|
Agent runtime configuration bundle with delegation pattern.
|
|
@@ -349,6 +442,68 @@ def _prepare_schema_for_qwen(schema: dict[str, Any]) -> dict[str, Any]:
|
|
|
349
442
|
return schema_copy
|
|
350
443
|
|
|
351
444
|
|
|
445
|
+
def _render_schema_recursive(schema: dict[str, Any], indent: int = 0) -> list[str]:
|
|
446
|
+
"""
|
|
447
|
+
Recursively render a JSON schema as YAML-like text with exact field names.
|
|
448
|
+
|
|
449
|
+
This ensures the LLM sees the actual field names (e.g., 'title', 'description')
|
|
450
|
+
for nested objects, not just high-level descriptions.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
schema: JSON Schema dict (can be nested object, array, or primitive)
|
|
454
|
+
indent: Current indentation level
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
List of lines representing the schema
|
|
458
|
+
"""
|
|
459
|
+
lines = []
|
|
460
|
+
prefix = " " * indent
|
|
461
|
+
|
|
462
|
+
schema_type = schema.get("type", "any")
|
|
463
|
+
|
|
464
|
+
if schema_type == "object":
|
|
465
|
+
props = schema.get("properties", {})
|
|
466
|
+
required = schema.get("required", [])
|
|
467
|
+
|
|
468
|
+
for field_name, field_def in props.items():
|
|
469
|
+
field_type = field_def.get("type", "any")
|
|
470
|
+
field_desc = field_def.get("description", "")
|
|
471
|
+
is_required = field_name in required
|
|
472
|
+
|
|
473
|
+
# Format field header
|
|
474
|
+
req_marker = " (required)" if is_required else ""
|
|
475
|
+
if field_type == "object":
|
|
476
|
+
lines.append(f"{prefix}{field_name}:{req_marker}")
|
|
477
|
+
if field_desc:
|
|
478
|
+
lines.append(f"{prefix} # {field_desc}")
|
|
479
|
+
# Recurse into nested object
|
|
480
|
+
nested_lines = _render_schema_recursive(field_def, indent + 1)
|
|
481
|
+
lines.extend(nested_lines)
|
|
482
|
+
elif field_type == "array":
|
|
483
|
+
items = field_def.get("items", {})
|
|
484
|
+
items_type = items.get("type", "any")
|
|
485
|
+
lines.append(f"{prefix}{field_name}: [{items_type}]{req_marker}")
|
|
486
|
+
if field_desc:
|
|
487
|
+
lines.append(f"{prefix} # {field_desc}")
|
|
488
|
+
# If array items are objects, show their structure
|
|
489
|
+
if items_type == "object":
|
|
490
|
+
lines.append(f"{prefix} # Each item has:")
|
|
491
|
+
nested_lines = _render_schema_recursive(items, indent + 2)
|
|
492
|
+
lines.extend(nested_lines)
|
|
493
|
+
else:
|
|
494
|
+
# Primitive type
|
|
495
|
+
enum_vals = field_def.get("enum")
|
|
496
|
+
if enum_vals:
|
|
497
|
+
type_str = f"{field_type} (one of: {', '.join(str(v) for v in enum_vals)})"
|
|
498
|
+
else:
|
|
499
|
+
type_str = field_type
|
|
500
|
+
lines.append(f"{prefix}{field_name}: {type_str}{req_marker}")
|
|
501
|
+
if field_desc:
|
|
502
|
+
lines.append(f"{prefix} # {field_desc}")
|
|
503
|
+
|
|
504
|
+
return lines
|
|
505
|
+
|
|
506
|
+
|
|
352
507
|
def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
|
|
353
508
|
"""
|
|
354
509
|
Convert schema properties to prompt guidance text.
|
|
@@ -357,56 +512,71 @@ def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
|
|
|
357
512
|
definition into natural language guidance that informs the agent
|
|
358
513
|
about the expected response structure without forcing JSON output.
|
|
359
514
|
|
|
515
|
+
CRITICAL: This function now recursively renders nested schemas so the LLM
|
|
516
|
+
can see exact field names (e.g., 'title' vs 'name' in treatment options).
|
|
517
|
+
|
|
360
518
|
Args:
|
|
361
519
|
properties: JSON Schema properties dict
|
|
362
520
|
|
|
363
521
|
Returns:
|
|
364
522
|
Prompt text describing the expected response elements
|
|
365
|
-
|
|
366
|
-
Example:
|
|
367
|
-
properties = {
|
|
368
|
-
"answer": {"type": "string", "description": "The answer"},
|
|
369
|
-
"confidence": {"type": "number", "description": "Confidence 0-1"}
|
|
370
|
-
}
|
|
371
|
-
# Returns:
|
|
372
|
-
# "## Response Structure\n\nYour response should include:\n- **answer**: The answer\n..."
|
|
373
523
|
"""
|
|
374
524
|
if not properties:
|
|
375
525
|
return ""
|
|
376
526
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
for
|
|
380
|
-
field_type = field_def.get("type", "any")
|
|
381
|
-
description = field_def.get("description", "")
|
|
382
|
-
|
|
383
|
-
# Format based on type
|
|
384
|
-
if field_type == "array":
|
|
385
|
-
type_hint = "list"
|
|
386
|
-
elif field_type == "number":
|
|
387
|
-
type_hint = "number"
|
|
388
|
-
# Include min/max if specified
|
|
389
|
-
if "minimum" in field_def or "maximum" in field_def:
|
|
390
|
-
min_val = field_def.get("minimum", "")
|
|
391
|
-
max_val = field_def.get("maximum", "")
|
|
392
|
-
if min_val != "" and max_val != "":
|
|
393
|
-
type_hint = f"number ({min_val}-{max_val})"
|
|
394
|
-
elif field_type == "boolean":
|
|
395
|
-
type_hint = "yes/no"
|
|
396
|
-
else:
|
|
397
|
-
type_hint = field_type
|
|
527
|
+
# Separate answer (output) from other fields (internal tracking)
|
|
528
|
+
answer_field = properties.get("answer")
|
|
529
|
+
internal_fields = {k: v for k, v in properties.items() if k != "answer"}
|
|
398
530
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
531
|
+
lines = ["## Internal Thinking Structure (DO NOT output these labels)"]
|
|
532
|
+
lines.append("")
|
|
533
|
+
lines.append("Use this structure to organize your thinking, but ONLY output the answer content:")
|
|
534
|
+
lines.append("")
|
|
535
|
+
|
|
536
|
+
# If there's an answer field, emphasize it's the ONLY output
|
|
537
|
+
if answer_field:
|
|
538
|
+
answer_desc = answer_field.get("description", "Your response")
|
|
539
|
+
lines.append(f"**OUTPUT (what the user sees):** {answer_desc}")
|
|
540
|
+
lines.append("")
|
|
541
|
+
|
|
542
|
+
# Document internal fields with FULL recursive schema
|
|
543
|
+
if internal_fields:
|
|
544
|
+
lines.append("**INTERNAL (for your tracking only - do NOT include in output):**")
|
|
545
|
+
lines.append("")
|
|
546
|
+
lines.append("Schema (use these EXACT field names):")
|
|
547
|
+
lines.append("```yaml")
|
|
548
|
+
|
|
549
|
+
# Render each internal field recursively
|
|
550
|
+
for field_name, field_def in internal_fields.items():
|
|
551
|
+
field_type = field_def.get("type", "any")
|
|
552
|
+
field_desc = field_def.get("description", "")
|
|
553
|
+
|
|
554
|
+
if field_type == "object":
|
|
555
|
+
lines.append(f"{field_name}:")
|
|
556
|
+
if field_desc:
|
|
557
|
+
lines.append(f" # {field_desc}")
|
|
558
|
+
nested_lines = _render_schema_recursive(field_def, indent=1)
|
|
559
|
+
lines.extend(nested_lines)
|
|
560
|
+
elif field_type == "array":
|
|
561
|
+
items = field_def.get("items", {})
|
|
562
|
+
items_type = items.get("type", "any")
|
|
563
|
+
lines.append(f"{field_name}: [{items_type}]")
|
|
564
|
+
if field_desc:
|
|
565
|
+
lines.append(f" # {field_desc}")
|
|
566
|
+
if items_type == "object":
|
|
567
|
+
lines.append(f" # Each item has:")
|
|
568
|
+
nested_lines = _render_schema_recursive(items, indent=2)
|
|
569
|
+
lines.extend(nested_lines)
|
|
570
|
+
else:
|
|
571
|
+
lines.append(f"{field_name}: {field_type}")
|
|
572
|
+
if field_desc:
|
|
573
|
+
lines.append(f" # {field_desc}")
|
|
405
574
|
|
|
406
|
-
lines.append(
|
|
575
|
+
lines.append("```")
|
|
407
576
|
|
|
408
577
|
lines.append("")
|
|
409
|
-
lines.append("
|
|
578
|
+
lines.append("⚠️ CRITICAL: Your response must be ONLY the conversational answer text.")
|
|
579
|
+
lines.append("Do NOT output field names like 'answer:' or 'diverge_output:' - just the response itself.")
|
|
410
580
|
|
|
411
581
|
return "\n".join(lines)
|
|
412
582
|
|
|
@@ -509,6 +679,7 @@ async def create_agent(
|
|
|
509
679
|
model_override: KnownModelName | Model | None = None,
|
|
510
680
|
result_type: type[BaseModel] | None = None,
|
|
511
681
|
strip_model_description: bool = True,
|
|
682
|
+
use_cache: bool = True,
|
|
512
683
|
) -> AgentRuntime:
|
|
513
684
|
"""
|
|
514
685
|
Create agent from context with dynamic schema loading.
|
|
@@ -532,6 +703,7 @@ async def create_agent(
|
|
|
532
703
|
model_override: Optional explicit model (bypasses context.default_model)
|
|
533
704
|
result_type: Optional Pydantic model for structured output
|
|
534
705
|
strip_model_description: If True, removes model docstring from LLM schema
|
|
706
|
+
use_cache: If True, use agent instance cache (default: True)
|
|
535
707
|
|
|
536
708
|
Returns:
|
|
537
709
|
Configured Pydantic.AI Agent with MCP tools
|
|
@@ -555,6 +727,9 @@ async def create_agent(
|
|
|
555
727
|
agent_schema_override=schema,
|
|
556
728
|
result_type=Output
|
|
557
729
|
)
|
|
730
|
+
|
|
731
|
+
# Bypass cache for testing
|
|
732
|
+
agent = await create_agent(context, use_cache=False)
|
|
558
733
|
"""
|
|
559
734
|
# Initialize OTEL instrumentation if enabled (idempotent)
|
|
560
735
|
if settings.otel.enabled:
|
|
@@ -576,6 +751,17 @@ async def create_agent(
|
|
|
576
751
|
default_model = context.default_model if context else settings.llm.default_model
|
|
577
752
|
model = get_valid_model_or_default(model_override, default_model)
|
|
578
753
|
|
|
754
|
+
# Check cache first (if enabled and no custom result_type)
|
|
755
|
+
# Note: Custom result_type bypasses cache since it changes the agent's output schema
|
|
756
|
+
user_id = context.user_id if context else None
|
|
757
|
+
if use_cache and result_type is None:
|
|
758
|
+
cache_key = _compute_cache_key(agent_schema, str(model), user_id)
|
|
759
|
+
cached_agent = await _get_cached_agent(cache_key)
|
|
760
|
+
if cached_agent is not None:
|
|
761
|
+
return cached_agent
|
|
762
|
+
else:
|
|
763
|
+
cache_key = None
|
|
764
|
+
|
|
579
765
|
# Extract schema fields using typed helpers
|
|
580
766
|
from ..schema import get_system_prompt, get_metadata
|
|
581
767
|
|
|
@@ -664,26 +850,26 @@ async def create_agent(
|
|
|
664
850
|
|
|
665
851
|
set_agent_resource_attributes(agent_schema=agent_schema)
|
|
666
852
|
|
|
667
|
-
# Extract schema metadata for search_rem tool description suffix
|
|
668
|
-
# This allows entity schemas to add context-specific notes to the search_rem tool
|
|
669
|
-
search_rem_suffix = None
|
|
670
|
-
if metadata:
|
|
671
|
-
# Check for default_search_table in metadata (set by entity schemas)
|
|
672
|
-
extra = agent_schema.get("json_schema_extra", {}) if agent_schema else {}
|
|
673
|
-
default_table = extra.get("default_search_table")
|
|
674
|
-
has_embeddings = extra.get("has_embeddings", False)
|
|
675
|
-
|
|
676
|
-
if default_table:
|
|
677
|
-
# Build description suffix for search_rem
|
|
678
|
-
search_rem_suffix = f"\n\nFor this schema, use `search_rem` to query `{default_table}`. "
|
|
679
|
-
if has_embeddings:
|
|
680
|
-
search_rem_suffix += f"SEARCH works well on {default_table} (has embeddings). "
|
|
681
|
-
search_rem_suffix += f"Example: `SEARCH \"your query\" FROM {default_table} LIMIT 10`"
|
|
682
|
-
|
|
683
853
|
# Add tools from MCP server (in-process, no subprocess)
|
|
684
854
|
# Track loaded MCP servers for resource resolution
|
|
685
855
|
loaded_mcp_server = None
|
|
686
856
|
|
|
857
|
+
# Build map of tool_name → schema description from agent schema tools section
|
|
858
|
+
# This allows agent-specific tool guidance to override/augment MCP tool descriptions
|
|
859
|
+
schema_tool_descriptions: dict[str, str] = {}
|
|
860
|
+
tool_configs = metadata.tools if metadata and hasattr(metadata, 'tools') else []
|
|
861
|
+
for tool_config in tool_configs:
|
|
862
|
+
if hasattr(tool_config, 'name'):
|
|
863
|
+
t_name = tool_config.name
|
|
864
|
+
t_desc = tool_config.description or ""
|
|
865
|
+
else:
|
|
866
|
+
t_name = tool_config.get("name", "")
|
|
867
|
+
t_desc = tool_config.get("description", "")
|
|
868
|
+
# Skip resource URIs (handled separately below)
|
|
869
|
+
if t_name and "://" not in t_name and t_desc:
|
|
870
|
+
schema_tool_descriptions[t_name] = t_desc
|
|
871
|
+
logger.debug(f"Schema tool description for '{t_name}': {len(t_desc)} chars")
|
|
872
|
+
|
|
687
873
|
for server_config in mcp_server_configs:
|
|
688
874
|
server_type = server_config.get("type")
|
|
689
875
|
server_id = server_config.get("id", "mcp-server")
|
|
@@ -708,8 +894,8 @@ async def create_agent(
|
|
|
708
894
|
mcp_tools_dict = await mcp_server.get_tools()
|
|
709
895
|
|
|
710
896
|
for tool_name, tool_func in mcp_tools_dict.items():
|
|
711
|
-
#
|
|
712
|
-
tool_suffix =
|
|
897
|
+
# Get schema description suffix if agent schema defines one for this tool
|
|
898
|
+
tool_suffix = schema_tool_descriptions.get(tool_name)
|
|
713
899
|
|
|
714
900
|
wrapped_tool = create_mcp_tool_wrapper(
|
|
715
901
|
tool_name,
|
|
@@ -718,7 +904,7 @@ async def create_agent(
|
|
|
718
904
|
description_suffix=tool_suffix,
|
|
719
905
|
)
|
|
720
906
|
tools.append(wrapped_tool)
|
|
721
|
-
logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema
|
|
907
|
+
logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema desc)" if tool_suffix else ""))
|
|
722
908
|
|
|
723
909
|
logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
|
|
724
910
|
|
|
@@ -830,8 +1016,14 @@ async def create_agent(
|
|
|
830
1016
|
# from ..otel import set_agent_context_attributes
|
|
831
1017
|
# set_agent_context_attributes(context)
|
|
832
1018
|
|
|
833
|
-
|
|
1019
|
+
agent_runtime = AgentRuntime(
|
|
834
1020
|
agent=agent,
|
|
835
1021
|
temperature=temperature,
|
|
836
1022
|
max_iterations=max_iterations,
|
|
837
1023
|
)
|
|
1024
|
+
|
|
1025
|
+
# Cache the agent if caching is enabled
|
|
1026
|
+
if cache_key is not None:
|
|
1027
|
+
await _cache_agent(cache_key, agent_runtime)
|
|
1028
|
+
|
|
1029
|
+
return agent_runtime
|