remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. rem/agentic/README.md +36 -2
  2. rem/agentic/__init__.py +10 -1
  3. rem/agentic/context.py +185 -1
  4. rem/agentic/context_builder.py +56 -35
  5. rem/agentic/mcp/tool_wrapper.py +2 -2
  6. rem/agentic/providers/pydantic_ai.py +303 -111
  7. rem/agentic/schema.py +2 -2
  8. rem/api/main.py +1 -1
  9. rem/api/mcp_router/resources.py +223 -0
  10. rem/api/mcp_router/server.py +4 -0
  11. rem/api/mcp_router/tools.py +608 -166
  12. rem/api/routers/admin.py +30 -4
  13. rem/api/routers/auth.py +219 -20
  14. rem/api/routers/chat/child_streaming.py +393 -0
  15. rem/api/routers/chat/completions.py +77 -40
  16. rem/api/routers/chat/sse_events.py +7 -3
  17. rem/api/routers/chat/streaming.py +381 -291
  18. rem/api/routers/chat/streaming_utils.py +325 -0
  19. rem/api/routers/common.py +18 -0
  20. rem/api/routers/dev.py +7 -1
  21. rem/api/routers/feedback.py +11 -3
  22. rem/api/routers/messages.py +176 -38
  23. rem/api/routers/models.py +9 -1
  24. rem/api/routers/query.py +17 -15
  25. rem/api/routers/shared_sessions.py +16 -0
  26. rem/auth/jwt.py +19 -4
  27. rem/auth/middleware.py +42 -28
  28. rem/cli/README.md +62 -0
  29. rem/cli/commands/ask.py +205 -114
  30. rem/cli/commands/db.py +55 -31
  31. rem/cli/commands/experiments.py +1 -1
  32. rem/cli/commands/process.py +179 -43
  33. rem/cli/commands/query.py +109 -0
  34. rem/cli/commands/session.py +117 -0
  35. rem/cli/main.py +2 -0
  36. rem/models/core/experiment.py +1 -1
  37. rem/models/entities/ontology.py +18 -20
  38. rem/models/entities/session.py +1 -0
  39. rem/schemas/agents/core/agent-builder.yaml +1 -1
  40. rem/schemas/agents/rem.yaml +1 -1
  41. rem/schemas/agents/test_orchestrator.yaml +42 -0
  42. rem/schemas/agents/test_structured_output.yaml +52 -0
  43. rem/services/content/providers.py +151 -49
  44. rem/services/content/service.py +18 -5
  45. rem/services/embeddings/worker.py +26 -12
  46. rem/services/postgres/__init__.py +28 -3
  47. rem/services/postgres/diff_service.py +57 -5
  48. rem/services/postgres/programmable_diff_service.py +635 -0
  49. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  50. rem/services/postgres/register_type.py +11 -10
  51. rem/services/postgres/repository.py +39 -28
  52. rem/services/postgres/schema_generator.py +5 -5
  53. rem/services/postgres/sql_builder.py +6 -5
  54. rem/services/rem/README.md +4 -3
  55. rem/services/rem/parser.py +7 -10
  56. rem/services/rem/service.py +47 -0
  57. rem/services/session/__init__.py +8 -1
  58. rem/services/session/compression.py +47 -5
  59. rem/services/session/pydantic_messages.py +310 -0
  60. rem/services/session/reload.py +2 -1
  61. rem/settings.py +92 -7
  62. rem/sql/migrations/001_install.sql +125 -7
  63. rem/sql/migrations/002_install_models.sql +159 -149
  64. rem/sql/migrations/004_cache_system.sql +10 -276
  65. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  66. rem/utils/schema_loader.py +180 -120
  67. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
  68. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
  69. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
  70. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
@@ -35,66 +35,41 @@ Unique Design:
35
35
  - Tools and resources loaded from MCP servers via schema config
36
36
  - Stripped descriptions to avoid LLM schema bloat
37
37
 
38
- TODO:
39
- Model Cache Implementation (Critical for Production Scale)
40
- Current bottleneck: Every agent.run() call creates a new Agent instance with
41
- model initialization overhead. At scale (100+ requests/sec), this becomes expensive.
38
+ Caching Implementation:
39
+ Agent instance caching is now implemented to reduce latency from repeated
40
+ agent creation. See the _agent_cache module-level variables and helpers.
42
41
 
43
- Need two-tier caching strategy:
42
+ Cache Features:
43
+ - LRU eviction when max size (50) exceeded
44
+ - 5-minute TTL for cache entries
45
+ - Thread-safe via asyncio.Lock
46
+ - Cache key: hash(schema) + model + user_id
44
47
 
48
+ Usage:
49
+ # Normal usage (cache enabled by default)
50
+ agent = await create_agent(context, agent_schema_override=schema)
51
+
52
+ # Bypass cache for testing
53
+ agent = await create_agent(context, use_cache=False)
54
+
55
+ # Clear cache
56
+ await clear_agent_cache() # Clear all
57
+ await clear_agent_cache("siggy") # Clear specific schema
58
+
59
+ # Monitor cache
60
+ stats = get_agent_cache_stats()
61
+
62
+ Future Improvements:
45
63
  1. Schema Cache (see rem/utils/schema_loader.py TODO):
46
64
  - Filesystem schemas: LRU cache, no TTL (immutable)
47
65
  - Database schemas: TTL cache (5-15 min)
48
66
  - Reduces disk I/O and DB queries
49
67
 
50
- 2. Model Instance Cache (THIS TODO):
51
- - Cache Pydantic AI Model() instances (connection pools, tokenizers)
52
- - Key: (provider, model_name) Model instance
53
- - Benefits:
54
- * Reuse HTTP connection pools (httpx.AsyncClient)
55
- * Reuse tokenizer instances
56
- * Faster model initialization
57
- * Lower memory footprint
58
- - Implementation:
59
- ```python
60
- _model_cache: dict[tuple[str, str], Model] = {}
61
-
62
- def get_or_create_model(model_name: str) -> Model:
63
- cache_key = _parse_model_name(model_name) # ("anthropic", "claude-3-5-sonnet")
64
- if cache_key not in _model_cache:
65
- _model_cache[cache_key] = Model(model_name)
66
- return _model_cache[cache_key]
67
- ```
68
- - Considerations:
69
- * Max cache size (LRU eviction, e.g., 20 models)
70
- * Thread safety (asyncio.Lock for cache access)
71
- * Model warmup on server startup for hot paths
72
- * Clear cache on model config changes
73
-
74
- 3. Agent Instance Caching (Advanced):
75
- - Cache complete Agent instances (model + schema + tools)
76
- - Key: (schema_name, model_name) → Agent instance
77
- - Benefits:
78
- * Skip schema parsing and model creation entirely
79
- * Fastest possible agent.run() latency
80
- - Challenges:
81
- * Agent state management (stateless required)
82
- * Tool/resource updates (cache invalidation)
83
- * Memory usage (agents are heavier than models)
84
- - Recommendation: Start with Model cache, add Agent cache if profiling shows benefit
85
-
86
- Profiling Targets (measure before optimizing):
87
- - schema_loader.load_agent_schema() calls per request
88
- - create_agent() execution time (model init overhead)
89
- - Model() instance creation time by provider
90
- - Agent.run() total latency breakdown
91
-
92
- Related Files:
93
- - rem/utils/schema_loader.py (schema caching TODO)
94
- - rem/agentic/providers/pydantic_ai.py:339 (create_agent - this file)
95
- - rem/services/schema_repository.py (database schema loading)
96
-
97
- Priority: HIGH (blocks production scaling beyond 50 req/sec)
68
+ 2. Model Instance Cache:
69
+ - Cache Pydantic AI Model() instances separately
70
+ - Would allow sharing models across different agent schemas
71
+
72
+ Priority: MEDIUM (agent cache handles the critical path)
98
73
 
99
74
  4. Response Format Control (structured_output enhancement):
100
75
  - Current: structured_output is bool (True=strict schema, False=free-form text)
@@ -147,6 +122,10 @@ Example Agent Schema:
147
122
  }
148
123
  """
149
124
 
125
+ import asyncio
126
+ import hashlib
127
+ import json
128
+ import time
150
129
  from typing import Any
151
130
 
152
131
  from loguru import logger
@@ -169,6 +148,120 @@ from ..context import AgentContext
169
148
  from ...settings import settings
170
149
 
171
150
 
151
+ # =============================================================================
152
+ # AGENT INSTANCE CACHE
153
+ # =============================================================================
154
+ # Caches AgentRuntime instances to avoid repeated MCP tool loading and agent
155
+ # creation overhead. Cache key is based on schema content hash + model name.
156
+ #
157
+ # Design:
158
+ # - LRU-style eviction when max size exceeded
159
+ # - Optional TTL for cache entries
160
+ # - Thread-safe via asyncio.Lock
161
+ # - Cache can be cleared manually or on schema updates
162
+ # =============================================================================
163
+
164
+ _agent_cache: dict[str, tuple["AgentRuntime", float]] = {} # key -> (agent, created_at)
165
+ _agent_cache_lock = asyncio.Lock()
166
+ _AGENT_CACHE_MAX_SIZE = 50 # Max cached agents
167
+ _AGENT_CACHE_TTL_SECONDS = 300 # 5 minutes TTL (0 = no TTL)
168
+
169
+
170
+ def _compute_cache_key(
171
+ agent_schema: dict[str, Any] | None,
172
+ model: str,
173
+ user_id: str | None,
174
+ ) -> str:
175
+ """
176
+ Compute cache key for an agent configuration.
177
+
178
+ Key components:
179
+ - Schema content hash (captures prompt + tools + output schema)
180
+ - Model name
181
+ - User ID (tools may be user-scoped)
182
+ """
183
+ # Hash the schema content for stable key
184
+ if agent_schema:
185
+ # Sort keys for deterministic hashing
186
+ schema_str = json.dumps(agent_schema, sort_keys=True)
187
+ schema_hash = hashlib.md5(schema_str.encode()).hexdigest()[:12]
188
+ else:
189
+ schema_hash = "no-schema"
190
+
191
+ user_part = user_id[:8] if user_id else "no-user"
192
+ return f"{schema_hash}:{model}:{user_part}"
193
+
194
+
195
+ async def _get_cached_agent(cache_key: str) -> "AgentRuntime | None":
196
+ """Get agent from cache if exists and not expired."""
197
+ async with _agent_cache_lock:
198
+ if cache_key in _agent_cache:
199
+ agent, created_at = _agent_cache[cache_key]
200
+
201
+ # Check TTL
202
+ if _AGENT_CACHE_TTL_SECONDS > 0:
203
+ age = time.time() - created_at
204
+ if age > _AGENT_CACHE_TTL_SECONDS:
205
+ del _agent_cache[cache_key]
206
+ logger.debug(f"Agent cache expired: {cache_key} (age={age:.1f}s)")
207
+ return None
208
+
209
+ logger.debug(f"Agent cache hit: {cache_key}")
210
+ return agent
211
+
212
+ return None
213
+
214
+
215
+ async def _cache_agent(cache_key: str, agent: "AgentRuntime") -> None:
216
+ """Add agent to cache with LRU eviction."""
217
+ async with _agent_cache_lock:
218
+ # Evict oldest entries if at capacity
219
+ while len(_agent_cache) >= _AGENT_CACHE_MAX_SIZE:
220
+ # Find oldest entry
221
+ oldest_key = min(_agent_cache.keys(), key=lambda k: _agent_cache[k][1])
222
+ del _agent_cache[oldest_key]
223
+ logger.debug(f"Agent cache evicted: {oldest_key}")
224
+
225
+ _agent_cache[cache_key] = (agent, time.time())
226
+ logger.debug(f"Agent cached: {cache_key} (total={len(_agent_cache)})")
227
+
228
+
229
+ async def clear_agent_cache(schema_name: str | None = None) -> int:
230
+ """
231
+ Clear agent cache entries.
232
+
233
+ Args:
234
+ schema_name: If provided, only clear entries for this schema.
235
+ If None, clear entire cache.
236
+
237
+ Returns:
238
+ Number of entries cleared.
239
+ """
240
+ async with _agent_cache_lock:
241
+ if schema_name is None:
242
+ count = len(_agent_cache)
243
+ _agent_cache.clear()
244
+ logger.info(f"Agent cache cleared: {count} entries")
245
+ return count
246
+ else:
247
+ # Clear entries matching schema name (in the hash)
248
+ keys_to_remove = [k for k in _agent_cache if schema_name in k]
249
+ for k in keys_to_remove:
250
+ del _agent_cache[k]
251
+ logger.info(f"Agent cache cleared for '{schema_name}': {len(keys_to_remove)} entries")
252
+ return len(keys_to_remove)
253
+
254
+
255
+ def get_agent_cache_stats() -> dict[str, Any]:
256
+ """Get cache statistics for monitoring."""
257
+ return {
258
+ "size": len(_agent_cache),
259
+ "max_size": _AGENT_CACHE_MAX_SIZE,
260
+ "ttl_seconds": _AGENT_CACHE_TTL_SECONDS,
261
+ "keys": list(_agent_cache.keys()),
262
+ }
263
+
264
+
172
265
  class AgentRuntime:
173
266
  """
174
267
  Agent runtime configuration bundle with delegation pattern.
@@ -349,6 +442,68 @@ def _prepare_schema_for_qwen(schema: dict[str, Any]) -> dict[str, Any]:
349
442
  return schema_copy
350
443
 
351
444
 
445
+ def _render_schema_recursive(schema: dict[str, Any], indent: int = 0) -> list[str]:
446
+ """
447
+ Recursively render a JSON schema as YAML-like text with exact field names.
448
+
449
+ This ensures the LLM sees the actual field names (e.g., 'title', 'description')
450
+ for nested objects, not just high-level descriptions.
451
+
452
+ Args:
453
+ schema: JSON Schema dict (can be nested object, array, or primitive)
454
+ indent: Current indentation level
455
+
456
+ Returns:
457
+ List of lines representing the schema
458
+ """
459
+ lines = []
460
+ prefix = " " * indent
461
+
462
+ schema_type = schema.get("type", "any")
463
+
464
+ if schema_type == "object":
465
+ props = schema.get("properties", {})
466
+ required = schema.get("required", [])
467
+
468
+ for field_name, field_def in props.items():
469
+ field_type = field_def.get("type", "any")
470
+ field_desc = field_def.get("description", "")
471
+ is_required = field_name in required
472
+
473
+ # Format field header
474
+ req_marker = " (required)" if is_required else ""
475
+ if field_type == "object":
476
+ lines.append(f"{prefix}{field_name}:{req_marker}")
477
+ if field_desc:
478
+ lines.append(f"{prefix} # {field_desc}")
479
+ # Recurse into nested object
480
+ nested_lines = _render_schema_recursive(field_def, indent + 1)
481
+ lines.extend(nested_lines)
482
+ elif field_type == "array":
483
+ items = field_def.get("items", {})
484
+ items_type = items.get("type", "any")
485
+ lines.append(f"{prefix}{field_name}: [{items_type}]{req_marker}")
486
+ if field_desc:
487
+ lines.append(f"{prefix} # {field_desc}")
488
+ # If array items are objects, show their structure
489
+ if items_type == "object":
490
+ lines.append(f"{prefix} # Each item has:")
491
+ nested_lines = _render_schema_recursive(items, indent + 2)
492
+ lines.extend(nested_lines)
493
+ else:
494
+ # Primitive type
495
+ enum_vals = field_def.get("enum")
496
+ if enum_vals:
497
+ type_str = f"{field_type} (one of: {', '.join(str(v) for v in enum_vals)})"
498
+ else:
499
+ type_str = field_type
500
+ lines.append(f"{prefix}{field_name}: {type_str}{req_marker}")
501
+ if field_desc:
502
+ lines.append(f"{prefix} # {field_desc}")
503
+
504
+ return lines
505
+
506
+
352
507
  def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
353
508
  """
354
509
  Convert schema properties to prompt guidance text.
@@ -357,56 +512,71 @@ def _convert_properties_to_prompt(properties: dict[str, Any]) -> str:
357
512
  definition into natural language guidance that informs the agent
358
513
  about the expected response structure without forcing JSON output.
359
514
 
515
+ CRITICAL: This function now recursively renders nested schemas so the LLM
516
+ can see exact field names (e.g., 'title' vs 'name' in treatment options).
517
+
360
518
  Args:
361
519
  properties: JSON Schema properties dict
362
520
 
363
521
  Returns:
364
522
  Prompt text describing the expected response elements
365
-
366
- Example:
367
- properties = {
368
- "answer": {"type": "string", "description": "The answer"},
369
- "confidence": {"type": "number", "description": "Confidence 0-1"}
370
- }
371
- # Returns:
372
- # "## Response Structure\n\nYour response should include:\n- **answer**: The answer\n..."
373
523
  """
374
524
  if not properties:
375
525
  return ""
376
526
 
377
- lines = ["## Response Guidelines", "", "Your response should address the following elements:"]
378
-
379
- for field_name, field_def in properties.items():
380
- field_type = field_def.get("type", "any")
381
- description = field_def.get("description", "")
382
-
383
- # Format based on type
384
- if field_type == "array":
385
- type_hint = "list"
386
- elif field_type == "number":
387
- type_hint = "number"
388
- # Include min/max if specified
389
- if "minimum" in field_def or "maximum" in field_def:
390
- min_val = field_def.get("minimum", "")
391
- max_val = field_def.get("maximum", "")
392
- if min_val != "" and max_val != "":
393
- type_hint = f"number ({min_val}-{max_val})"
394
- elif field_type == "boolean":
395
- type_hint = "yes/no"
396
- else:
397
- type_hint = field_type
527
+ # Separate answer (output) from other fields (internal tracking)
528
+ answer_field = properties.get("answer")
529
+ internal_fields = {k: v for k, v in properties.items() if k != "answer"}
398
530
 
399
- # Build field description
400
- field_line = f"- **{field_name}**"
401
- if type_hint and type_hint != "string":
402
- field_line += f" ({type_hint})"
403
- if description:
404
- field_line += f": {description}"
531
+ lines = ["## Internal Thinking Structure (DO NOT output these labels)"]
532
+ lines.append("")
533
+ lines.append("Use this structure to organize your thinking, but ONLY output the answer content:")
534
+ lines.append("")
535
+
536
+ # If there's an answer field, emphasize it's the ONLY output
537
+ if answer_field:
538
+ answer_desc = answer_field.get("description", "Your response")
539
+ lines.append(f"**OUTPUT (what the user sees):** {answer_desc}")
540
+ lines.append("")
541
+
542
+ # Document internal fields with FULL recursive schema
543
+ if internal_fields:
544
+ lines.append("**INTERNAL (for your tracking only - do NOT include in output):**")
545
+ lines.append("")
546
+ lines.append("Schema (use these EXACT field names):")
547
+ lines.append("```yaml")
548
+
549
+ # Render each internal field recursively
550
+ for field_name, field_def in internal_fields.items():
551
+ field_type = field_def.get("type", "any")
552
+ field_desc = field_def.get("description", "")
553
+
554
+ if field_type == "object":
555
+ lines.append(f"{field_name}:")
556
+ if field_desc:
557
+ lines.append(f" # {field_desc}")
558
+ nested_lines = _render_schema_recursive(field_def, indent=1)
559
+ lines.extend(nested_lines)
560
+ elif field_type == "array":
561
+ items = field_def.get("items", {})
562
+ items_type = items.get("type", "any")
563
+ lines.append(f"{field_name}: [{items_type}]")
564
+ if field_desc:
565
+ lines.append(f" # {field_desc}")
566
+ if items_type == "object":
567
+ lines.append(f" # Each item has:")
568
+ nested_lines = _render_schema_recursive(items, indent=2)
569
+ lines.extend(nested_lines)
570
+ else:
571
+ lines.append(f"{field_name}: {field_type}")
572
+ if field_desc:
573
+ lines.append(f" # {field_desc}")
405
574
 
406
- lines.append(field_line)
575
+ lines.append("```")
407
576
 
408
577
  lines.append("")
409
- lines.append("Respond naturally in prose, addressing these elements where relevant.")
578
+ lines.append("⚠️ CRITICAL: Your response must be ONLY the conversational answer text.")
579
+ lines.append("Do NOT output field names like 'answer:' or 'diverge_output:' - just the response itself.")
410
580
 
411
581
  return "\n".join(lines)
412
582
 
@@ -509,6 +679,7 @@ async def create_agent(
509
679
  model_override: KnownModelName | Model | None = None,
510
680
  result_type: type[BaseModel] | None = None,
511
681
  strip_model_description: bool = True,
682
+ use_cache: bool = True,
512
683
  ) -> AgentRuntime:
513
684
  """
514
685
  Create agent from context with dynamic schema loading.
@@ -532,6 +703,7 @@ async def create_agent(
532
703
  model_override: Optional explicit model (bypasses context.default_model)
533
704
  result_type: Optional Pydantic model for structured output
534
705
  strip_model_description: If True, removes model docstring from LLM schema
706
+ use_cache: If True, use agent instance cache (default: True)
535
707
 
536
708
  Returns:
537
709
  Configured Pydantic.AI Agent with MCP tools
@@ -555,6 +727,9 @@ async def create_agent(
555
727
  agent_schema_override=schema,
556
728
  result_type=Output
557
729
  )
730
+
731
+ # Bypass cache for testing
732
+ agent = await create_agent(context, use_cache=False)
558
733
  """
559
734
  # Initialize OTEL instrumentation if enabled (idempotent)
560
735
  if settings.otel.enabled:
@@ -576,6 +751,17 @@ async def create_agent(
576
751
  default_model = context.default_model if context else settings.llm.default_model
577
752
  model = get_valid_model_or_default(model_override, default_model)
578
753
 
754
+ # Check cache first (if enabled and no custom result_type)
755
+ # Note: Custom result_type bypasses cache since it changes the agent's output schema
756
+ user_id = context.user_id if context else None
757
+ if use_cache and result_type is None:
758
+ cache_key = _compute_cache_key(agent_schema, str(model), user_id)
759
+ cached_agent = await _get_cached_agent(cache_key)
760
+ if cached_agent is not None:
761
+ return cached_agent
762
+ else:
763
+ cache_key = None
764
+
579
765
  # Extract schema fields using typed helpers
580
766
  from ..schema import get_system_prompt, get_metadata
581
767
 
@@ -664,26 +850,26 @@ async def create_agent(
664
850
 
665
851
  set_agent_resource_attributes(agent_schema=agent_schema)
666
852
 
667
- # Extract schema metadata for search_rem tool description suffix
668
- # This allows entity schemas to add context-specific notes to the search_rem tool
669
- search_rem_suffix = None
670
- if metadata:
671
- # Check for default_search_table in metadata (set by entity schemas)
672
- extra = agent_schema.get("json_schema_extra", {}) if agent_schema else {}
673
- default_table = extra.get("default_search_table")
674
- has_embeddings = extra.get("has_embeddings", False)
675
-
676
- if default_table:
677
- # Build description suffix for search_rem
678
- search_rem_suffix = f"\n\nFor this schema, use `search_rem` to query `{default_table}`. "
679
- if has_embeddings:
680
- search_rem_suffix += f"SEARCH works well on {default_table} (has embeddings). "
681
- search_rem_suffix += f"Example: `SEARCH \"your query\" FROM {default_table} LIMIT 10`"
682
-
683
853
  # Add tools from MCP server (in-process, no subprocess)
684
854
  # Track loaded MCP servers for resource resolution
685
855
  loaded_mcp_server = None
686
856
 
857
+ # Build map of tool_name → schema description from agent schema tools section
858
+ # This allows agent-specific tool guidance to override/augment MCP tool descriptions
859
+ schema_tool_descriptions: dict[str, str] = {}
860
+ tool_configs = metadata.tools if metadata and hasattr(metadata, 'tools') else []
861
+ for tool_config in tool_configs:
862
+ if hasattr(tool_config, 'name'):
863
+ t_name = tool_config.name
864
+ t_desc = tool_config.description or ""
865
+ else:
866
+ t_name = tool_config.get("name", "")
867
+ t_desc = tool_config.get("description", "")
868
+ # Skip resource URIs (handled separately below)
869
+ if t_name and "://" not in t_name and t_desc:
870
+ schema_tool_descriptions[t_name] = t_desc
871
+ logger.debug(f"Schema tool description for '{t_name}': {len(t_desc)} chars")
872
+
687
873
  for server_config in mcp_server_configs:
688
874
  server_type = server_config.get("type")
689
875
  server_id = server_config.get("id", "mcp-server")
@@ -708,8 +894,8 @@ async def create_agent(
708
894
  mcp_tools_dict = await mcp_server.get_tools()
709
895
 
710
896
  for tool_name, tool_func in mcp_tools_dict.items():
711
- # Add description suffix to search_rem tool if schema specifies a default table
712
- tool_suffix = search_rem_suffix if tool_name == "search_rem" else None
897
+ # Get schema description suffix if agent schema defines one for this tool
898
+ tool_suffix = schema_tool_descriptions.get(tool_name)
713
899
 
714
900
  wrapped_tool = create_mcp_tool_wrapper(
715
901
  tool_name,
@@ -718,7 +904,7 @@ async def create_agent(
718
904
  description_suffix=tool_suffix,
719
905
  )
720
906
  tools.append(wrapped_tool)
721
- logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema suffix)" if tool_suffix else ""))
907
+ logger.debug(f"Loaded MCP tool: {tool_name}" + (" (with schema desc)" if tool_suffix else ""))
722
908
 
723
909
  logger.info(f"Loaded {len(mcp_tools_dict)} tools from MCP server: {server_id} (in-process)")
724
910
 
@@ -732,7 +918,7 @@ async def create_agent(
732
918
  # the artificial MCP distinction between tools and resources
733
919
  #
734
920
  # Supports both concrete and template URIs:
735
- # - Concrete: "rem://schemas" -> no-param tool
921
+ # - Concrete: "rem://agents" -> no-param tool
736
922
  # - Template: "patient-profile://field/{field_key}" -> tool with field_key param
737
923
  from ..mcp.tool_wrapper import create_resource_tool
738
924
 
@@ -830,8 +1016,14 @@ async def create_agent(
830
1016
  # from ..otel import set_agent_context_attributes
831
1017
  # set_agent_context_attributes(context)
832
1018
 
833
- return AgentRuntime(
1019
+ agent_runtime = AgentRuntime(
834
1020
  agent=agent,
835
1021
  temperature=temperature,
836
1022
  max_iterations=max_iterations,
837
1023
  )
1024
+
1025
+ # Cache the agent if caching is enabled
1026
+ if cache_key is not None:
1027
+ await _cache_agent(cache_key, agent_runtime)
1028
+
1029
+ return agent_runtime
rem/agentic/schema.py CHANGED
@@ -79,7 +79,7 @@ class MCPResourceReference(BaseModel):
79
79
 
80
80
  Example (exact URI):
81
81
  {
82
- "uri": "rem://schemas",
82
+ "uri": "rem://agents",
83
83
  "name": "Agent Schemas",
84
84
  "description": "List all available agent schemas"
85
85
  }
@@ -96,7 +96,7 @@ class MCPResourceReference(BaseModel):
96
96
  default=None,
97
97
  description=(
98
98
  "Exact resource URI or URI with query parameters. "
99
- "Examples: 'rem://schemas', 'rem://resources?category=drug.*'"
99
+ "Examples: 'rem://agents', 'rem://resources?category=drug.*'"
100
100
  )
101
101
  )
102
102
 
rem/api/main.py CHANGED
@@ -322,7 +322,7 @@ def create_app() -> FastAPI:
322
322
 
323
323
  app.add_middleware(
324
324
  AuthMiddleware,
325
- protected_paths=["/api/v1"],
325
+ protected_paths=["/api/v1", "/api/admin"],
326
326
  excluded_paths=["/api/auth", "/api/dev", "/api/v1/mcp/auth", "/api/v1/slack"],
327
327
  # Allow anonymous when auth is disabled, otherwise use setting
328
328
  allow_anonymous=(not settings.auth.enabled) or settings.auth.allow_anonymous,