remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,236 @@
1
+ """
2
+ Pydantic Model Helper Utilities.
3
+
4
+ Utilities for working with REM Pydantic models following our conventions:
5
+
6
+ Business Key (entity_key) Detection:
7
+ 1. Field with json_schema_extra={"entity_key": True}
8
+ 2. Common business key fields: name, uri, key, label
9
+ 3. Fallback to "id" (unique by UUID only)
10
+
11
+ Embedding Field Detection:
12
+ 1. Field with json_schema_extra={"embed": True}
13
+ 2. Common content fields: content, description, summary, etc.
14
+ 3. Explicit disable with json_schema_extra={"embed": False}
15
+
16
+ Table Name Inference:
17
+ 1. model_config.json_schema_extra.table_name
18
+ 2. CamelCase → snake_case + pluralization
19
+ """
20
+
21
+ from typing import Any, Type
22
+
23
+ from loguru import logger
24
+ from pydantic import BaseModel
25
+
26
+
27
+ def get_entity_key_field(model: Type[BaseModel]) -> str:
28
+ """
29
+ Get the business key field for KV store lookups.
30
+
31
+ Follows REM conventions:
32
+ 1. Field with json_schema_extra={"entity_key": True}
33
+ 2. "name" field (most common for resources, moments, etc.)
34
+ 3. "uri" field (for files)
35
+ 4. "key" or "label" fields
36
+ 5. Fallback to "id" (UUID only)
37
+
38
+ Args:
39
+ model: Pydantic model class
40
+
41
+ Returns:
42
+ Field name to use as entity_key
43
+
44
+ Example:
45
+ >>> from rem.models.entities import Resource
46
+ >>> get_entity_key_field(Resource)
47
+ 'name'
48
+ """
49
+ # Check for explicit entity_key marker
50
+ for field_name, field_info in model.model_fields.items():
51
+ json_extra = getattr(field_info, "json_schema_extra", None)
52
+ if json_extra and isinstance(json_extra, dict):
53
+ if json_extra.get("entity_key") is True:
54
+ logger.debug(f"Using explicit entity_key field: {field_name}")
55
+ return field_name
56
+
57
+ # Check for common business key fields
58
+ for candidate in ["name", "uri", "key", "label", "title"]:
59
+ if candidate in model.model_fields:
60
+ logger.debug(f"Using conventional entity_key field: {candidate}")
61
+ return candidate
62
+
63
+ # Fallback to id (unique by UUID only)
64
+ logger.warning(
65
+ f"No business key found for {model.__name__}, using 'id' (UUID only)"
66
+ )
67
+ return "id"
68
+
69
+
70
+ def get_table_name(model: Type[BaseModel]) -> str:
71
+ """
72
+ Get table name for a Pydantic model.
73
+
74
+ Follows REM conventions:
75
+ 1. model_config.json_schema_extra.table_name (explicit)
76
+ 2. CamelCase → snake_case + pluralization
77
+
78
+ Args:
79
+ model: Pydantic model class
80
+
81
+ Returns:
82
+ Table name
83
+
84
+ Example:
85
+ >>> from rem.models.entities import Resource
86
+ >>> get_table_name(Resource)
87
+ 'resources'
88
+ """
89
+ import re
90
+
91
+ # Check for explicit table_name
92
+ if hasattr(model, "model_config"):
93
+ model_config = model.model_config
94
+ if isinstance(model_config, dict):
95
+ json_extra = model_config.get("json_schema_extra", {})
96
+ if isinstance(json_extra, dict) and "table_name" in json_extra:
97
+ return json_extra["table_name"]
98
+
99
+ # Infer from class name
100
+ name = model.__name__
101
+
102
+ # Convert CamelCase to snake_case
103
+ name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
104
+ name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
105
+
106
+ # Pluralize
107
+ if not name.endswith("s"):
108
+ if name.endswith("y"):
109
+ name = name[:-1] + "ies" # category -> categories
110
+ else:
111
+ name = name + "s" # resource -> resources
112
+
113
+ return name
114
+
115
+
116
+ def get_embeddable_fields(model: Type[BaseModel]) -> list[str]:
117
+ """
118
+ Get list of fields that should have embeddings generated.
119
+
120
+ Follows REM conventions:
121
+ 1. Field with json_schema_extra={"embed": True} → always embed
122
+ 2. Field with json_schema_extra={"embed": False} → never embed
123
+ 3. Common content fields → embed by default
124
+ 4. Otherwise → don't embed
125
+
126
+ Args:
127
+ model: Pydantic model class
128
+
129
+ Returns:
130
+ List of field names to generate embeddings for
131
+
132
+ Example:
133
+ >>> from rem.models.entities import Resource
134
+ >>> fields = get_embeddable_fields(Resource)
135
+ >>> "content" in fields
136
+ True
137
+ """
138
+ # Common content fields that embed by default
139
+ DEFAULT_EMBED_FIELDS = {
140
+ "content",
141
+ "description",
142
+ "summary",
143
+ "text",
144
+ "body",
145
+ "message",
146
+ "notes",
147
+ }
148
+
149
+ embeddable = []
150
+
151
+ for field_name, field_info in model.model_fields.items():
152
+ # Check json_schema_extra for explicit embed configuration
153
+ json_extra = getattr(field_info, "json_schema_extra", None)
154
+ if json_extra and isinstance(json_extra, dict):
155
+ embed = json_extra.get("embed")
156
+ if embed is True:
157
+ embeddable.append(field_name)
158
+ continue
159
+ elif embed is False:
160
+ # Explicitly disabled
161
+ continue
162
+
163
+ # Check if field name matches common content fields
164
+ if field_name.lower() in DEFAULT_EMBED_FIELDS:
165
+ embeddable.append(field_name)
166
+
167
+ return embeddable
168
+
169
+
170
+ def should_skip_field(field_name: str) -> bool:
171
+ """
172
+ Check if a field should be skipped during SQL generation.
173
+
174
+ System fields that are added separately:
175
+ - id (added as PRIMARY KEY)
176
+ - tenant_id (added for multi-tenancy)
177
+ - user_id (added for ownership)
178
+ - created_at, updated_at, deleted_at (added as system timestamps)
179
+ - graph_edges, metadata (added as JSONB system fields)
180
+ - tags, column (CoreModel fields)
181
+
182
+ Args:
183
+ field_name: Name of the field
184
+
185
+ Returns:
186
+ True if field should be skipped
187
+
188
+ Example:
189
+ >>> should_skip_field("id")
190
+ True
191
+ >>> should_skip_field("name")
192
+ False
193
+ """
194
+ SYSTEM_FIELDS = {
195
+ "id",
196
+ "tenant_id",
197
+ "user_id",
198
+ "created_at",
199
+ "updated_at",
200
+ "deleted_at",
201
+ "graph_edges",
202
+ "metadata",
203
+ "tags",
204
+ "column",
205
+ }
206
+
207
+ return field_name in SYSTEM_FIELDS
208
+
209
+
210
+ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
211
+ """
212
+ Extract REM-specific metadata from a Pydantic model.
213
+
214
+ Returns:
215
+ Dict with:
216
+ - table_name: Database table name
217
+ - entity_key_field: Business key field name
218
+ - embeddable_fields: List of fields to embed
219
+ - model_name: Original model class name
220
+
221
+ Example:
222
+ >>> from rem.models.entities import Resource
223
+ >>> meta = get_model_metadata(Resource)
224
+ >>> meta["table_name"]
225
+ 'resources'
226
+ >>> meta["entity_key_field"]
227
+ 'name'
228
+ >>> "content" in meta["embeddable_fields"]
229
+ True
230
+ """
231
+ return {
232
+ "model_name": model.__name__,
233
+ "table_name": get_table_name(model),
234
+ "entity_key_field": get_entity_key_field(model),
235
+ "embeddable_fields": get_embeddable_fields(model),
236
+ }
@@ -0,0 +1,336 @@
1
+ """
2
+ Centralized schema loading utility for agent schemas.
3
+
4
+ This module provides a single, consistent implementation for loading
5
+ agent schemas from YAML files across the entire codebase (API, CLI, agent factory).
6
+
7
+ Design Pattern:
8
+ - Search standard locations: schemas/agents/, schemas/evaluators/, schemas/
9
+ - Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
10
+ - Support relative/absolute paths
11
+ - Consistent error messages and logging
12
+ i
13
+ Usage:
14
+ # From API
15
+ schema = load_agent_schema("rem")
16
+
17
+ # From CLI with custom path
18
+ schema = load_agent_schema("./my-agent.yaml")
19
+
20
+ # From agent factory
21
+ schema = load_agent_schema("contract-analyzer")
22
+
23
+ Schema Caching Status:
24
+
25
+ ✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
26
+ - Schemas loaded from package resources cached indefinitely in _fs_schema_cache
27
+ - No TTL needed (immutable, versioned with code)
28
+ - Lazy-loaded on first access
29
+ - Custom paths not cached (may change during development)
30
+
31
+ TODO: Database Schema Caching (Future)
32
+ - Schemas loaded from schemas table (SchemaRepository)
33
+ - Will require TTL for cache invalidation (5-15 minutes)
34
+ - May change at runtime via admin updates
35
+ - Cache key: (schema_name, version) → (schema_dict, timestamp)
36
+ - Implementation ready in _db_schema_cache and _db_schema_ttl
37
+
38
+ Benefits Achieved:
39
+ - ✅ Eliminated disk I/O for repeated schema loads
40
+ - ✅ Faster agent creation (critical for API latency)
41
+ - 🔲 Database query reduction (pending DB schema implementation)
42
+
43
+ Future Enhancement (when database schemas are implemented):
44
+ import time
45
+
46
+ _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
47
+ _db_schema_ttl: int = 300 # 5 minutes
48
+
49
+ async def load_agent_schema_from_db(name: str, version: str | None = None):
50
+ cache_key = (name, version or "latest")
51
+ if cache_key in _db_schema_cache:
52
+ schema, timestamp = _db_schema_cache[cache_key]
53
+ if time.time() - timestamp < _db_schema_ttl:
54
+ return schema
55
+ # Load from DB and cache with TTL
56
+ from rem.services.repositories import schema_repository
57
+ schema = await schema_repository.get_by_name(name, version)
58
+ _db_schema_cache[cache_key] = (schema, time.time())
59
+ return schema
60
+
61
+ Related:
62
+ - rem/src/rem/agentic/providers/pydantic_ai.py (create_agent factory)
63
+ - rem/src/rem/services/repositories/schema_repository.py (database schemas)
64
+ """
65
+
66
+ import importlib.resources
67
+ from pathlib import Path
68
+ from typing import Any, cast
69
+
70
+ import yaml
71
+ from loguru import logger
72
+
73
+
74
+ # Standard search paths for agent schemas (in priority order)
75
+ SCHEMA_SEARCH_PATHS = [
76
+ "schemas/agents/{name}.yaml", # Top-level agents (e.g., rem.yaml)
77
+ "schemas/agents/core/{name}.yaml", # Core system agents
78
+ "schemas/agents/examples/{name}.yaml", # Example agents
79
+ "schemas/evaluators/{name}.yaml",
80
+ "schemas/{name}.yaml",
81
+ ]
82
+
83
+ # In-memory cache for filesystem schemas (no TTL - immutable)
84
+ _fs_schema_cache: dict[str, dict[str, Any]] = {}
85
+
86
+ # Future: Database schema cache (with TTL - mutable)
87
+ # Will be used when loading schemas from database (SchemaRepository)
88
+ # _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
89
+ # _db_schema_ttl: int = 300 # 5 minutes in seconds
90
+
91
+
92
+ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any] | None:
93
+ """
94
+ Load schema from database using LOOKUP query.
95
+
96
+ This function is synchronous but calls async database operations.
97
+ It's designed to be called from load_agent_schema() which is sync.
98
+
99
+ Args:
100
+ schema_name: Schema name to lookup
101
+ user_id: User ID for data scoping
102
+
103
+ Returns:
104
+ Schema spec (dict) if found, None otherwise
105
+
106
+ Raises:
107
+ RuntimeError: If database connection fails
108
+ """
109
+ import asyncio
110
+
111
+ # Check if we're already in an async context
112
+ try:
113
+ loop = asyncio.get_running_loop()
114
+ # We're in an async context - can't use asyncio.run()
115
+ # This shouldn't happen in normal usage since load_agent_schema is called from sync contexts
116
+ logger.warning(
117
+ "Database schema lookup called from async context. "
118
+ "This may cause issues. Consider using async version of load_agent_schema."
119
+ )
120
+ return None
121
+ except RuntimeError:
122
+ # Not in async context - safe to use asyncio.run()
123
+ pass
124
+
125
+ async def _async_lookup():
126
+ """Async helper to query database."""
127
+ from rem.services.postgres import get_postgres_service
128
+ from rem.models.entities import Schema
129
+
130
+ db = get_postgres_service()
131
+ if not db:
132
+ logger.debug("PostgreSQL service not available for schema lookup")
133
+ return None
134
+
135
+ try:
136
+ await db.connect()
137
+
138
+ # Use REM LOOKUP query to find schema
139
+ query = f"LOOKUP '{schema_name}' FROM schemas"
140
+ logger.debug(f"Executing: {query} (user_id={user_id})")
141
+
142
+ result = await db.execute_rem_query(
143
+ query=query,
144
+ user_id=user_id,
145
+ )
146
+
147
+ if result and isinstance(result, dict):
148
+ # LOOKUP returns single entity or None
149
+ # Extract spec field (JSON Schema)
150
+ spec = result.get("spec")
151
+ if spec and isinstance(spec, dict):
152
+ logger.debug(f"Found schema in database: {schema_name}")
153
+ return spec
154
+
155
+ logger.debug(f"Schema not found in database: {schema_name}")
156
+ return None
157
+
158
+ except Exception as e:
159
+ logger.debug(f"Database schema lookup error: {e}")
160
+ return None
161
+ finally:
162
+ await db.disconnect()
163
+
164
+ # Run async lookup in new event loop
165
+ return asyncio.run(_async_lookup())
166
+
167
+
168
+ def load_agent_schema(
169
+ schema_name_or_path: str,
170
+ use_cache: bool = True,
171
+ user_id: str | None = None,
172
+ enable_db_fallback: bool = True,
173
+ ) -> dict[str, Any]:
174
+ """
175
+ Load agent schema from YAML file with unified search logic and caching.
176
+
177
+ Filesystem schemas are cached indefinitely (immutable, versioned with code).
178
+ Database schemas (future) will be cached with TTL for invalidation.
179
+
180
+ Handles path resolution automatically:
181
+ - "rem" → searches schemas/agents/rem.yaml (top-level)
182
+ - "moment-builder" → searches schemas/agents/core/moment-builder.yaml
183
+ - "contract-analyzer" → searches schemas/agents/examples/contract-analyzer.yaml
184
+ - "core/moment-builder" → searches schemas/agents/core/moment-builder.yaml
185
+ - "/absolute/path.yaml" → loads directly
186
+ - "relative/path.yaml" → loads relative to cwd
187
+
188
+ Search Order:
189
+ 1. Check cache (if use_cache=True and schema found in FS cache)
190
+ 2. Exact path if it exists (absolute or relative)
191
+ 3. Package resources: schemas/agents/{name}.yaml (top-level)
192
+ 4. Package resources: schemas/agents/core/{name}.yaml
193
+ 5. Package resources: schemas/agents/examples/{name}.yaml
194
+ 6. Package resources: schemas/evaluators/{name}.yaml
195
+ 7. Package resources: schemas/{name}.yaml
196
+ 8. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
197
+
198
+ Args:
199
+ schema_name_or_path: Schema name or file path
200
+ Examples: "rem-query-agent", "contract-analyzer", "./my-schema.yaml"
201
+ use_cache: If True, uses in-memory cache for filesystem schemas
202
+ user_id: User ID for database schema lookup (required for DB fallback)
203
+ enable_db_fallback: If True, falls back to database LOOKUP when file not found
204
+
205
+ Returns:
206
+ Agent schema as dictionary
207
+
208
+ Raises:
209
+ FileNotFoundError: If schema not found in any search location (filesystem + database)
210
+ yaml.YAMLError: If schema file is invalid YAML
211
+
212
+ Examples:
213
+ >>> # Load by short name (cached after first load)
214
+ >>> schema = load_agent_schema("contract-analyzer")
215
+ >>>
216
+ >>> # Load from custom path (not cached - custom paths may change)
217
+ >>> schema = load_agent_schema("./my-agent.yaml")
218
+ >>>
219
+ >>> # Load evaluator schema (cached)
220
+ >>> schema = load_agent_schema("rem-lookup-correctness")
221
+ >>>
222
+ >>> # Load custom user schema from database
223
+ >>> schema = load_agent_schema("my-custom-agent", user_id="user-123")
224
+ """
225
+ # Normalize the name for cache key
226
+ cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '')
227
+ if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
228
+ cache_key = cache_key.rsplit('.', 1)[0]
229
+
230
+ # Check cache first (only for package resources, not custom paths)
231
+ path = Path(schema_name_or_path)
232
+ is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
233
+
234
+ if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
235
+ logger.debug(f"Loading schema from cache: {cache_key}")
236
+ return _fs_schema_cache[cache_key]
237
+
238
+ # 1. Try exact path first (absolute or relative to cwd)
239
+ if path.exists():
240
+ logger.debug(f"Loading schema from exact path: {path}")
241
+ with open(path, "r") as f:
242
+ schema = yaml.safe_load(f)
243
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
244
+ # Don't cache custom paths (they may change)
245
+ return cast(dict[str, Any], schema)
246
+
247
+ # 2. Normalize name for package resource search
248
+ base_name = cache_key
249
+
250
+ # 3. Try package resources with standard search paths
251
+ for search_pattern in SCHEMA_SEARCH_PATHS:
252
+ search_path = search_pattern.format(name=base_name)
253
+
254
+ try:
255
+ # Use importlib.resources to find schema in installed package
256
+ schema_ref = importlib.resources.files("rem") / search_path
257
+ schema_path = Path(str(schema_ref))
258
+
259
+ if schema_path.exists():
260
+ logger.debug(f"Loading schema from package: {search_path}")
261
+ with open(schema_path, "r") as f:
262
+ schema = yaml.safe_load(f)
263
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
264
+
265
+ # Cache filesystem schemas (immutable, safe to cache indefinitely)
266
+ if use_cache:
267
+ _fs_schema_cache[cache_key] = schema
268
+ logger.debug(f"Cached schema: {cache_key}")
269
+
270
+ return cast(dict[str, Any], schema)
271
+ except Exception as e:
272
+ logger.debug(f"Could not load from {search_path}: {e}")
273
+ continue
274
+
275
+ # 4. Try database LOOKUP fallback (if enabled and user_id provided)
276
+ if enable_db_fallback and user_id:
277
+ try:
278
+ logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
279
+ db_schema = _load_schema_from_database(base_name, user_id)
280
+ if db_schema:
281
+ logger.info(f"✅ Loaded schema from database: {base_name} (user_id={user_id})")
282
+ return db_schema
283
+ except Exception as e:
284
+ logger.debug(f"Database schema lookup failed: {e}")
285
+ # Fall through to error below
286
+
287
+ # 5. Schema not found in any location
288
+ searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
289
+ db_search_note = ""
290
+ if enable_db_fallback:
291
+ if user_id:
292
+ db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id='{user_id}' (no match)"
293
+ else:
294
+ db_search_note = "\n - Database: (skipped - no user_id provided)"
295
+
296
+ raise FileNotFoundError(
297
+ f"Schema not found: {schema_name_or_path}\n"
298
+ f"Searched locations:\n"
299
+ f" - Exact path: {path}\n"
300
+ f" - Package resources: {', '.join(searched_paths)}"
301
+ f"{db_search_note}"
302
+ )
303
+
304
+
305
+ def validate_agent_schema(schema: dict[str, Any]) -> bool:
306
+ """
307
+ Validate agent schema structure.
308
+
309
+ Basic validation checks:
310
+ - Has 'type' field (should be 'object')
311
+ - Has 'description' field (system prompt)
312
+ - Has 'properties' field (output schema)
313
+
314
+ Args:
315
+ schema: Agent schema dict
316
+
317
+ Returns:
318
+ True if valid
319
+
320
+ Raises:
321
+ ValueError: If schema is invalid
322
+ """
323
+ if not isinstance(schema, dict):
324
+ raise ValueError(f"Schema must be a dict, got {type(schema)}")
325
+
326
+ if schema.get('type') != 'object':
327
+ raise ValueError(f"Schema type must be 'object', got {schema.get('type')}")
328
+
329
+ if 'description' not in schema:
330
+ raise ValueError("Schema must have 'description' field (system prompt)")
331
+
332
+ if 'properties' not in schema:
333
+ logger.warning("Schema missing 'properties' field - agent will have no structured output")
334
+
335
+ logger.debug("Schema validation passed")
336
+ return True