remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,649 @@
1
+ """
2
+ Centralized schema loading utility for agent schemas.
3
+
4
+ This module provides a single, consistent implementation for loading
5
+ agent schemas from YAML files across the entire codebase (API, CLI, agent factory).
6
+
7
+ Design Pattern:
8
+ - Search standard locations: schemas/agents/, schemas/evaluators/, schemas/
9
+ - Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
10
+ - Support relative/absolute paths
11
+ - Consistent error messages and logging
12
+
13
+ Usage:
14
+ # From API
15
+ schema = load_agent_schema("rem")
16
+
17
+ # From CLI with custom path
18
+ schema = load_agent_schema("./my-agent.yaml")
19
+
20
+ # From agent factory
21
+ schema = load_agent_schema("contract-analyzer")
22
+
23
+ TODO: Git FS Integration
24
+ The schema loader currently uses importlib.resources for package schemas
25
+ and direct filesystem access for custom paths. The FS abstraction layer
26
+ (rem.services.fs.FS) could be used to abstract storage backends:
27
+
28
+ - Local filesystem (current)
29
+ - Git repositories (GitService)
30
+ - S3 (via FS provider)
31
+
32
+ This would enable loading schemas from versioned Git repos or S3 buckets
33
+ without changing the API. The FS provider pattern already exists and just
34
+ needs integration testing with the schema loader.
35
+
36
+ Example future usage:
37
+ # Load from Git at specific version
38
+ schema = load_agent_schema("git://rem/schemas/agents/rem.yaml?ref=v1.0.0")
39
+
40
+ # Load from S3
41
+ schema = load_agent_schema("s3://rem-schemas/agents/cv-parser.yaml")
42
+
43
+ Schema Caching Status:
44
+
45
+ ✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
46
+ - Schemas loaded from package resources cached indefinitely in _fs_schema_cache
47
+ - No TTL needed (immutable, versioned with code)
48
+ - Lazy-loaded on first access
49
+ - Custom paths not cached (may change during development)
50
+
51
+ TODO: Database Schema Caching (Future)
52
+ - Schemas loaded from schemas table (SchemaRepository)
53
+ - Will require TTL for cache invalidation (5-15 minutes)
54
+ - May change at runtime via admin updates
55
+ - Cache key: (schema_name, version) → (schema_dict, timestamp)
56
+ - Implementation ready in _db_schema_cache and _db_schema_ttl
57
+
58
+ Benefits Achieved:
59
+ - ✅ Eliminated disk I/O for repeated schema loads
60
+ - ✅ Faster agent creation (critical for API latency)
61
+ - 🔲 Database query reduction (pending DB schema implementation)
62
+
63
+ Future Enhancement (when database schemas are implemented):
64
+ import time
65
+
66
+ _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
67
+ _db_schema_ttl: int = 300 # 5 minutes
68
+
69
+ async def load_agent_schema_from_db(name: str, version: str | None = None):
70
+ cache_key = (name, version or "latest")
71
+ if cache_key in _db_schema_cache:
72
+ schema, timestamp = _db_schema_cache[cache_key]
73
+ if time.time() - timestamp < _db_schema_ttl:
74
+ return schema
75
+ # Load from DB and cache with TTL
76
+ from rem.services.repositories import schema_repository
77
+ schema = await schema_repository.get_by_name(name, version)
78
+ _db_schema_cache[cache_key] = (schema, time.time())
79
+ return schema
80
+
81
+ Related:
82
+ - rem/src/rem/agentic/providers/pydantic_ai.py (create_agent factory)
83
+ - rem/src/rem/services/repositories/schema_repository.py (database schemas)
84
+ """
85
+
86
+ import importlib.resources
87
+ from pathlib import Path
88
+ from typing import Any, cast
89
+
90
+ import yaml
91
+ from loguru import logger
92
+
93
+
94
+ # Standard search paths for agent/evaluator schemas (in priority order)
95
+ SCHEMA_SEARCH_PATHS = [
96
+ "schemas/agents/{name}.yaml", # Top-level agents (e.g., rem.yaml)
97
+ "schemas/agents/core/{name}.yaml", # Core system agents
98
+ "schemas/agents/examples/{name}.yaml", # Example agents
99
+ "schemas/evaluators/{name}.yaml", # Nested evaluators (e.g., hello-world/default)
100
+ "schemas/evaluators/rem/{name}.yaml", # REM evaluators (e.g., lookup-correctness)
101
+ "schemas/{name}.yaml", # Generic schemas
102
+ ]
103
+
104
+ # In-memory cache for filesystem schemas (no TTL - immutable)
105
+ _fs_schema_cache: dict[str, dict[str, Any]] = {}
106
+
107
+ # Future: Database schema cache (with TTL - mutable)
108
+ # Will be used when loading schemas from database (SchemaRepository)
109
+ # _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
110
+ # _db_schema_ttl: int = 300 # 5 minutes in seconds
111
+
112
+
113
+ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any] | None:
114
+ """
115
+ Load schema from database using LOOKUP query.
116
+
117
+ This function is synchronous but calls async database operations.
118
+ It's designed to be called from load_agent_schema() which is sync.
119
+
120
+ Args:
121
+ schema_name: Schema name to lookup
122
+ user_id: User ID for data scoping
123
+
124
+ Returns:
125
+ Schema spec (dict) if found, None otherwise
126
+
127
+ Raises:
128
+ RuntimeError: If database connection fails
129
+ """
130
+ import asyncio
131
+
132
+ # Check if we're already in an async context
133
+ try:
134
+ loop = asyncio.get_running_loop()
135
+ # We're in an async context - use thread executor to run async code
136
+ import concurrent.futures
137
+
138
+ async def _async_lookup():
139
+ """Async helper to query database."""
140
+ from rem.services.postgres import get_postgres_service
141
+
142
+ db = get_postgres_service()
143
+ if not db:
144
+ logger.debug("PostgreSQL service not available for schema lookup")
145
+ return None
146
+
147
+ try:
148
+ await db.connect()
149
+
150
+ # Query for public schemas (user_id IS NULL) and optionally user-specific
151
+ if user_id:
152
+ query = """
153
+ SELECT spec FROM schemas
154
+ WHERE LOWER(name) = LOWER($1)
155
+ AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
156
+ LIMIT 1
157
+ """
158
+ row = await db.fetchrow(query, schema_name, user_id)
159
+ else:
160
+ # No user_id - only search public schemas
161
+ query = """
162
+ SELECT spec FROM schemas
163
+ WHERE LOWER(name) = LOWER($1)
164
+ AND (user_id = 'system' OR user_id IS NULL)
165
+ LIMIT 1
166
+ """
167
+ row = await db.fetchrow(query, schema_name)
168
+ logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
169
+
170
+ if row:
171
+ spec = row.get("spec")
172
+ if spec and isinstance(spec, dict):
173
+ logger.debug(f"Found schema in database: {schema_name}")
174
+ return spec
175
+
176
+ logger.debug(f"Schema not found in database: {schema_name}")
177
+ return None
178
+
179
+ except Exception as e:
180
+ logger.debug(f"Database schema lookup error: {e}")
181
+ return None
182
+ finally:
183
+ await db.disconnect()
184
+
185
+ # Run in thread pool to avoid blocking the event loop
186
+ with concurrent.futures.ThreadPoolExecutor() as pool:
187
+ future = pool.submit(asyncio.run, _async_lookup())
188
+ return future.result(timeout=10)
189
+
190
+ except RuntimeError:
191
+ # Not in async context - safe to use asyncio.run()
192
+ pass
193
+
194
+ async def _async_lookup():
195
+ """Async helper to query database."""
196
+ from rem.services.postgres import get_postgres_service
197
+
198
+ db = get_postgres_service()
199
+ if not db:
200
+ logger.debug("PostgreSQL service not available for schema lookup")
201
+ return None
202
+
203
+ try:
204
+ await db.connect()
205
+
206
+ # Query for public schemas (user_id IS NULL) and optionally user-specific
207
+ if user_id:
208
+ query = """
209
+ SELECT spec FROM schemas
210
+ WHERE LOWER(name) = LOWER($1)
211
+ AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
212
+ LIMIT 1
213
+ """
214
+ row = await db.fetchrow(query, schema_name, user_id)
215
+ else:
216
+ # No user_id - only search public schemas
217
+ query = """
218
+ SELECT spec FROM schemas
219
+ WHERE LOWER(name) = LOWER($1)
220
+ AND (user_id = 'system' OR user_id IS NULL)
221
+ LIMIT 1
222
+ """
223
+ row = await db.fetchrow(query, schema_name)
224
+ logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
225
+
226
+ if row:
227
+ spec = row.get("spec")
228
+ if spec and isinstance(spec, dict):
229
+ logger.debug(f"Found schema in database: {schema_name}")
230
+ return spec
231
+
232
+ logger.debug(f"Schema not found in database: {schema_name}")
233
+ return None
234
+
235
+ except Exception as e:
236
+ logger.debug(f"Database schema lookup error: {e}")
237
+ return None
238
+ finally:
239
+ await db.disconnect()
240
+
241
+ # Run async lookup in new event loop
242
+ return asyncio.run(_async_lookup())
243
+
244
+
245
+ def load_agent_schema(
246
+ schema_name_or_path: str,
247
+ use_cache: bool = True,
248
+ user_id: str | None = None,
249
+ enable_db_fallback: bool = True,
250
+ ) -> dict[str, Any]:
251
+ """
252
+ Load agent schema with database-first priority for hot-reloading support.
253
+
254
+ Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
255
+
256
+ **IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
257
+ of schema updates without redeploying the application. This allows operators to
258
+ update schemas via `rem process ingest` and have changes take effect immediately.
259
+
260
+ Handles path resolution automatically:
261
+ - "rem" → searches database, then schemas/agents/rem.yaml
262
+ - "moment-builder" → searches database, then schemas/agents/core/moment-builder.yaml
263
+ - "/absolute/path.yaml" → loads directly from filesystem (exact paths skip database)
264
+ - "relative/path.yaml" → loads relative to cwd (exact paths skip database)
265
+
266
+ Search Order:
267
+ 1. Exact path if it exists (absolute or relative) - skips database
268
+ 2. Database LOOKUP: schemas table (if enable_db_fallback=True) - PREFERRED for hot-reload
269
+ 3. Check cache (if use_cache=True and schema found in FS cache)
270
+ 4. Custom paths from rem.register_schema_path() and SCHEMA__PATHS env var
271
+ 5. Package resources: schemas/agents/{name}.yaml (top-level)
272
+ 6. Package resources: schemas/agents/core/{name}.yaml
273
+ 7. Package resources: schemas/agents/examples/{name}.yaml
274
+ 8. Package resources: schemas/evaluators/{name}.yaml
275
+ 9. Package resources: schemas/{name}.yaml
276
+
277
+ Args:
278
+ schema_name_or_path: Schema name or file path (case-invariant for names)
279
+ Examples: "rem-query-agent", "Contract-Analyzer", "./my-schema.yaml"
280
+ use_cache: If True, uses in-memory cache for filesystem schemas
281
+ user_id: User ID for database schema lookup
282
+ enable_db_fallback: If True, checks database FIRST for schema (default: True)
283
+
284
+ Returns:
285
+ Agent schema as dictionary
286
+
287
+ Raises:
288
+ FileNotFoundError: If schema not found in any search location (database + filesystem)
289
+ yaml.YAMLError: If schema file is invalid YAML
290
+
291
+ Examples:
292
+ >>> # Load by short name - checks database first for hot-reload support
293
+ >>> schema = load_agent_schema("Contract-Analyzer") # case invariant
294
+ >>>
295
+ >>> # Load from custom path (skips database - exact paths always use filesystem)
296
+ >>> schema = load_agent_schema("./my-agent.yaml")
297
+ >>>
298
+ >>> # Load evaluator schema
299
+ >>> schema = load_agent_schema("rem-lookup-correctness")
300
+ """
301
+ # Normalize the name for cache key (lowercase for case-invariant lookups)
302
+ cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
303
+ if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
304
+ cache_key = cache_key.rsplit('.', 1)[0]
305
+
306
+ path = Path(schema_name_or_path)
307
+ is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
308
+
309
+ # 1. Try exact path first (absolute or relative to cwd) - must be a file, not directory
310
+ # Exact paths skip database lookup (explicit file reference)
311
+ if path.exists() and path.is_file():
312
+ logger.debug(f"Loading schema from exact path: {path}")
313
+ with open(path, "r") as f:
314
+ schema = yaml.safe_load(f)
315
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
316
+ # Don't cache custom paths (they may change)
317
+ return cast(dict[str, Any], schema)
318
+
319
+ # 2. Normalize name for lookups (lowercase)
320
+ base_name = cache_key
321
+
322
+ # 3. Try database FIRST (if enabled) - enables hot-reload without redeploy
323
+ # Database schemas are NOT cached to ensure hot-reload works immediately
324
+ if enable_db_fallback and not is_custom_path:
325
+ try:
326
+ logger.debug(f"Checking database for schema: {base_name} (user_id={user_id or 'public'})")
327
+ db_schema = _load_schema_from_database(base_name, user_id)
328
+ if db_schema:
329
+ logger.info(f"✅ Loaded schema from database: {base_name}")
330
+ return db_schema
331
+ except Exception as e:
332
+ logger.debug(f"Database schema lookup failed: {e}")
333
+ # Fall through to filesystem search
334
+
335
+ # 4. Check filesystem cache (only for package resources, not custom paths)
336
+ if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
337
+ logger.debug(f"Loading schema from cache: {cache_key}")
338
+ return _fs_schema_cache[cache_key]
339
+
340
+ # 5. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
341
+ from ..registry import get_schema_paths
342
+
343
+ custom_paths = get_schema_paths()
344
+
345
+ # Auto-detect local folders if they exist (convention over configuration)
346
+ auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
347
+ for auto_folder in auto_detect_folders:
348
+ auto_path = Path(auto_folder)
349
+ if auto_path.exists() and auto_path.is_dir():
350
+ resolved = str(auto_path.resolve())
351
+ if resolved not in custom_paths:
352
+ custom_paths.insert(0, resolved)
353
+ logger.debug(f"Auto-detected schema directory: {auto_folder}")
354
+ for custom_dir in custom_paths:
355
+ # Try various patterns within each custom directory
356
+ for pattern in [
357
+ f"{base_name}.yaml",
358
+ f"{base_name}.yml",
359
+ f"agents/{base_name}.yaml",
360
+ f"evaluators/{base_name}.yaml",
361
+ ]:
362
+ custom_path = Path(custom_dir) / pattern
363
+ if custom_path.exists():
364
+ logger.debug(f"Loading schema from custom path: {custom_path}")
365
+ with open(custom_path, "r") as f:
366
+ schema = yaml.safe_load(f)
367
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
368
+ # Don't cache custom paths (they may change during development)
369
+ return cast(dict[str, Any], schema)
370
+
371
+ # 6. Try package resources with standard search paths
372
+ for search_pattern in SCHEMA_SEARCH_PATHS:
373
+ search_path = search_pattern.format(name=base_name)
374
+
375
+ try:
376
+ # Use importlib.resources to find schema in installed package
377
+ schema_ref = importlib.resources.files("rem") / search_path
378
+ schema_path = Path(str(schema_ref))
379
+
380
+ if schema_path.exists():
381
+ logger.debug(f"Loading schema from package: {search_path}")
382
+ with open(schema_path, "r") as f:
383
+ schema = yaml.safe_load(f)
384
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
385
+
386
+ # Cache filesystem schemas (immutable, safe to cache indefinitely)
387
+ if use_cache:
388
+ _fs_schema_cache[cache_key] = schema
389
+ logger.debug(f"Cached schema: {cache_key}")
390
+
391
+ return cast(dict[str, Any], schema)
392
+ except Exception as e:
393
+ logger.debug(f"Could not load from {search_path}: {e}")
394
+ continue
395
+
396
+ # 7. Schema not found in any location
397
+ searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
398
+
399
+ custom_paths_note = ""
400
+ if custom_paths:
401
+ custom_paths_note = f"\n - Custom paths: {', '.join(custom_paths)}"
402
+
403
+ db_search_note = ""
404
+ if enable_db_fallback:
405
+ if user_id:
406
+ db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('{user_id}', 'system', NULL) (no match)"
407
+ else:
408
+ db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('system', NULL) (no match)"
409
+
410
+ raise FileNotFoundError(
411
+ f"Schema not found: {schema_name_or_path}\n"
412
+ f"Searched locations:\n"
413
+ f" - Exact path: {path}"
414
+ f"{custom_paths_note}\n"
415
+ f" - Package resources: {', '.join(searched_paths)}"
416
+ f"{db_search_note}"
417
+ )
418
+
419
+
420
+ async def load_agent_schema_async(
421
+ schema_name_or_path: str,
422
+ user_id: str | None = None,
423
+ db=None,
424
+ enable_db_fallback: bool = True,
425
+ ) -> dict[str, Any]:
426
+ """
427
+ Async version of load_agent_schema with database-first priority.
428
+
429
+ Schema names are case-invariant - "MyAgent", "myagent", "MYAGENT" all resolve to the same schema.
430
+
431
+ **IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
432
+ of schema updates without redeploying the application.
433
+
434
+ Args:
435
+ schema_name_or_path: Schema name or file path (case-invariant for names)
436
+ user_id: User ID for database schema lookup
437
+ db: Optional existing PostgresService connection (if None, will create one)
438
+ enable_db_fallback: If True, checks database FIRST for schema (default: True)
439
+
440
+ Returns:
441
+ Agent schema as dictionary
442
+
443
+ Raises:
444
+ FileNotFoundError: If schema not found
445
+ """
446
+ path = Path(schema_name_or_path)
447
+
448
+ # Normalize the name for cache key (lowercase for case-invariant lookups)
449
+ cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
450
+ if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
451
+ cache_key = cache_key.rsplit('.', 1)[0]
452
+
453
+ is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
454
+
455
+ # 1. Try exact path first (skips database - explicit file reference)
456
+ if path.exists() and path.is_file():
457
+ logger.debug(f"Loading schema from exact path: {path}")
458
+ with open(path, "r") as f:
459
+ schema = yaml.safe_load(f)
460
+ return cast(dict[str, Any], schema)
461
+
462
+ base_name = cache_key
463
+
464
+ # 2. Try database FIRST (if enabled) - enables hot-reload without redeploy
465
+ if enable_db_fallback and not is_custom_path:
466
+ from rem.services.postgres import get_postgres_service
467
+
468
+ should_disconnect = False
469
+ if db is None:
470
+ db = get_postgres_service()
471
+ if db:
472
+ await db.connect()
473
+ should_disconnect = True
474
+
475
+ if db:
476
+ try:
477
+ if user_id:
478
+ query = """
479
+ SELECT spec FROM schemas
480
+ WHERE LOWER(name) = LOWER($1)
481
+ AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
482
+ LIMIT 1
483
+ """
484
+ row = await db.fetchrow(query, base_name, user_id)
485
+ else:
486
+ # No user_id - only search public schemas
487
+ query = """
488
+ SELECT spec FROM schemas
489
+ WHERE LOWER(name) = LOWER($1)
490
+ AND (user_id = 'system' OR user_id IS NULL)
491
+ LIMIT 1
492
+ """
493
+ row = await db.fetchrow(query, base_name)
494
+ if row:
495
+ spec = row.get("spec")
496
+ if spec and isinstance(spec, dict):
497
+ logger.info(f"✅ Loaded schema from database: {base_name}")
498
+ return spec
499
+ finally:
500
+ if should_disconnect:
501
+ await db.disconnect()
502
+
503
+ # 3. Check filesystem cache
504
+ if not is_custom_path and cache_key in _fs_schema_cache:
505
+ logger.debug(f"Loading schema from cache: {cache_key}")
506
+ return _fs_schema_cache[cache_key]
507
+
508
+ # 4. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
509
+ from ..registry import get_schema_paths
510
+ custom_paths = get_schema_paths()
511
+
512
+ # Auto-detect local folders if they exist (convention over configuration)
513
+ auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
514
+ for auto_folder in auto_detect_folders:
515
+ auto_path = Path(auto_folder)
516
+ if auto_path.exists() and auto_path.is_dir():
517
+ resolved = str(auto_path.resolve())
518
+ if resolved not in custom_paths:
519
+ custom_paths.insert(0, resolved)
520
+ logger.debug(f"Auto-detected schema directory: {auto_folder}")
521
+
522
+ for custom_dir in custom_paths:
523
+ for pattern in [f"{base_name}.yaml", f"{base_name}.yml", f"agents/{base_name}.yaml"]:
524
+ custom_path = Path(custom_dir) / pattern
525
+ if custom_path.exists():
526
+ with open(custom_path, "r") as f:
527
+ schema = yaml.safe_load(f)
528
+ return cast(dict[str, Any], schema)
529
+
530
+ # 5. Try package resources
531
+ for search_pattern in SCHEMA_SEARCH_PATHS:
532
+ search_path = search_pattern.format(name=base_name)
533
+ try:
534
+ schema_ref = importlib.resources.files("rem") / search_path
535
+ schema_path = Path(str(schema_ref))
536
+ if schema_path.exists():
537
+ with open(schema_path, "r") as f:
538
+ schema = yaml.safe_load(f)
539
+ _fs_schema_cache[cache_key] = schema
540
+ return cast(dict[str, Any], schema)
541
+ except Exception:
542
+ continue
543
+
544
+ # Not found
545
+ raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
546
+
547
+
548
+ def validate_agent_schema(schema: dict[str, Any]) -> bool:
549
+ """
550
+ Validate agent schema structure.
551
+
552
+ Basic validation checks:
553
+ - Has 'type' field (should be 'object')
554
+ - Has 'description' field (system prompt)
555
+ - Has 'properties' field (output schema)
556
+
557
+ Args:
558
+ schema: Agent schema dict
559
+
560
+ Returns:
561
+ True if valid
562
+
563
+ Raises:
564
+ ValueError: If schema is invalid
565
+ """
566
+ if not isinstance(schema, dict):
567
+ raise ValueError(f"Schema must be a dict, got {type(schema)}")
568
+
569
+ if schema.get('type') != 'object':
570
+ raise ValueError(f"Schema type must be 'object', got {schema.get('type')}")
571
+
572
+ if 'description' not in schema:
573
+ raise ValueError("Schema must have 'description' field (system prompt)")
574
+
575
+ if 'properties' not in schema:
576
+ logger.warning("Schema missing 'properties' field - agent will have no structured output")
577
+
578
+ logger.debug("Schema validation passed")
579
+ return True
580
+
581
+
582
+ def get_evaluator_schema_path(evaluator_name: str) -> Path | None:
583
+ """
584
+ Find the file path to an evaluator schema.
585
+
586
+ Searches standard locations for the evaluator schema YAML file:
587
+ - ./evaluators/{name}.yaml (local project)
588
+ - Custom schema paths from registry
589
+ - Package resources: schemas/evaluators/{name}.yaml
590
+
591
+ Args:
592
+ evaluator_name: Name of the evaluator (e.g., "mental-health-classifier")
593
+
594
+ Returns:
595
+ Path to the evaluator schema file, or None if not found
596
+
597
+ Example:
598
+ >>> path = get_evaluator_schema_path("mental-health-classifier")
599
+ >>> if path:
600
+ ... print(f"Found evaluator at: {path}")
601
+ """
602
+ from ..registry import get_schema_paths
603
+
604
+ base_name = evaluator_name.lower().replace('.yaml', '').replace('.yml', '')
605
+
606
+ # 1. Try custom schema paths (from registry + auto-detected)
607
+ custom_paths = get_schema_paths()
608
+
609
+ # Auto-detect local folders
610
+ auto_detect_folders = ["./evaluators", "./schemas", "./agents"]
611
+ for auto_folder in auto_detect_folders:
612
+ auto_path = Path(auto_folder)
613
+ if auto_path.exists() and auto_path.is_dir():
614
+ resolved = str(auto_path.resolve())
615
+ if resolved not in custom_paths:
616
+ custom_paths.insert(0, resolved)
617
+
618
+ for custom_dir in custom_paths:
619
+ # Try various patterns within each custom directory
620
+ for pattern in [
621
+ f"{base_name}.yaml",
622
+ f"{base_name}.yml",
623
+ f"evaluators/{base_name}.yaml",
624
+ ]:
625
+ custom_path = Path(custom_dir) / pattern
626
+ if custom_path.exists():
627
+ logger.debug(f"Found evaluator schema: {custom_path}")
628
+ return custom_path
629
+
630
+ # 2. Try package resources
631
+ evaluator_search_paths = [
632
+ f"schemas/evaluators/{base_name}.yaml",
633
+ f"schemas/evaluators/rem/{base_name}.yaml",
634
+ ]
635
+
636
+ for search_path in evaluator_search_paths:
637
+ try:
638
+ schema_ref = importlib.resources.files("rem") / search_path
639
+ schema_path = Path(str(schema_ref))
640
+
641
+ if schema_path.exists():
642
+ logger.debug(f"Found evaluator schema in package: {schema_path}")
643
+ return schema_path
644
+ except Exception as e:
645
+ logger.debug(f"Could not check {search_path}: {e}")
646
+ continue
647
+
648
+ logger.warning(f"Evaluator schema not found: {evaluator_name}")
649
+ return None