remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,236 @@
1
+ """
2
+ Pydantic Model Helper Utilities.
3
+
4
+ Utilities for working with REM Pydantic models following our conventions:
5
+
6
+ Business Key (entity_key) Detection:
7
+ 1. Field with json_schema_extra={"entity_key": True}
8
+ 2. Common business key fields: name, uri, key, label
9
+ 3. Fallback to "id" (unique by UUID only)
10
+
11
+ Embedding Field Detection:
12
+ 1. Field with json_schema_extra={"embed": True}
13
+ 2. Common content fields: content, description, summary, etc.
14
+ 3. Explicit disable with json_schema_extra={"embed": False}
15
+
16
+ Table Name Inference:
17
+ 1. model_config.json_schema_extra.table_name
18
+ 2. CamelCase → snake_case + pluralization
19
+ """
20
+
21
+ from typing import Any, Type
22
+
23
+ from loguru import logger
24
+ from pydantic import BaseModel
25
+
26
+
27
+ def get_entity_key_field(model: Type[BaseModel]) -> str:
28
+ """
29
+ Get the business key field for KV store lookups.
30
+
31
+ Follows REM conventions:
32
+ 1. Field with json_schema_extra={"entity_key": True}
33
+ 2. "name" field (most common for resources, moments, etc.)
34
+ 3. "uri" field (for files)
35
+ 4. "key" or "label" fields
36
+ 5. Fallback to "id" (UUID only)
37
+
38
+ Args:
39
+ model: Pydantic model class
40
+
41
+ Returns:
42
+ Field name to use as entity_key
43
+
44
+ Example:
45
+ >>> from rem.models.entities import Resource
46
+ >>> get_entity_key_field(Resource)
47
+ 'name'
48
+ """
49
+ # Check for explicit entity_key marker
50
+ for field_name, field_info in model.model_fields.items():
51
+ json_extra = getattr(field_info, "json_schema_extra", None)
52
+ if json_extra and isinstance(json_extra, dict):
53
+ if json_extra.get("entity_key") is True:
54
+ logger.debug(f"Using explicit entity_key field: {field_name}")
55
+ return field_name
56
+
57
+ # Check for common business key fields
58
+ for candidate in ["name", "uri", "key", "label", "title"]:
59
+ if candidate in model.model_fields:
60
+ logger.debug(f"Using conventional entity_key field: {candidate}")
61
+ return candidate
62
+
63
+ # Fallback to id (unique by UUID only)
64
+ logger.warning(
65
+ f"No business key found for {model.__name__}, using 'id' (UUID only)"
66
+ )
67
+ return "id"
68
+
69
+
70
+ def get_table_name(model: Type[BaseModel]) -> str:
71
+ """
72
+ Get table name for a Pydantic model.
73
+
74
+ Follows REM conventions:
75
+ 1. model_config.json_schema_extra.table_name (explicit)
76
+ 2. CamelCase → snake_case + pluralization
77
+
78
+ Args:
79
+ model: Pydantic model class
80
+
81
+ Returns:
82
+ Table name
83
+
84
+ Example:
85
+ >>> from rem.models.entities import Resource
86
+ >>> get_table_name(Resource)
87
+ 'resources'
88
+ """
89
+ import re
90
+
91
+ # Check for explicit table_name
92
+ if hasattr(model, "model_config"):
93
+ model_config = model.model_config
94
+ if isinstance(model_config, dict):
95
+ json_extra = model_config.get("json_schema_extra", {})
96
+ if isinstance(json_extra, dict) and "table_name" in json_extra:
97
+ return json_extra["table_name"]
98
+
99
+ # Infer from class name
100
+ name = model.__name__
101
+
102
+ # Convert CamelCase to snake_case
103
+ name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
104
+ name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
105
+
106
+ # Pluralize
107
+ if not name.endswith("s"):
108
+ if name.endswith("y"):
109
+ name = name[:-1] + "ies" # category -> categories
110
+ else:
111
+ name = name + "s" # resource -> resources
112
+
113
+ return name
114
+
115
+
116
+ def get_embeddable_fields(model: Type[BaseModel]) -> list[str]:
117
+ """
118
+ Get list of fields that should have embeddings generated.
119
+
120
+ Follows REM conventions:
121
+ 1. Field with json_schema_extra={"embed": True} → always embed
122
+ 2. Field with json_schema_extra={"embed": False} → never embed
123
+ 3. Common content fields → embed by default
124
+ 4. Otherwise → don't embed
125
+
126
+ Args:
127
+ model: Pydantic model class
128
+
129
+ Returns:
130
+ List of field names to generate embeddings for
131
+
132
+ Example:
133
+ >>> from rem.models.entities import Resource
134
+ >>> fields = get_embeddable_fields(Resource)
135
+ >>> "content" in fields
136
+ True
137
+ """
138
+ # Common content fields that embed by default
139
+ DEFAULT_EMBED_FIELDS = {
140
+ "content",
141
+ "description",
142
+ "summary",
143
+ "text",
144
+ "body",
145
+ "message",
146
+ "notes",
147
+ }
148
+
149
+ embeddable = []
150
+
151
+ for field_name, field_info in model.model_fields.items():
152
+ # Check json_schema_extra for explicit embed configuration
153
+ json_extra = getattr(field_info, "json_schema_extra", None)
154
+ if json_extra and isinstance(json_extra, dict):
155
+ embed = json_extra.get("embed")
156
+ if embed is True:
157
+ embeddable.append(field_name)
158
+ continue
159
+ elif embed is False:
160
+ # Explicitly disabled
161
+ continue
162
+
163
+ # Check if field name matches common content fields
164
+ if field_name.lower() in DEFAULT_EMBED_FIELDS:
165
+ embeddable.append(field_name)
166
+
167
+ return embeddable
168
+
169
+
170
+ def should_skip_field(field_name: str) -> bool:
171
+ """
172
+ Check if a field should be skipped during SQL generation.
173
+
174
+ System fields that are added separately:
175
+ - id (added as PRIMARY KEY)
176
+ - tenant_id (added for multi-tenancy)
177
+ - user_id (added for ownership)
178
+ - created_at, updated_at, deleted_at (added as system timestamps)
179
+ - graph_edges, metadata (added as JSONB system fields)
180
+ - tags, column (CoreModel fields)
181
+
182
+ Args:
183
+ field_name: Name of the field
184
+
185
+ Returns:
186
+ True if field should be skipped
187
+
188
+ Example:
189
+ >>> should_skip_field("id")
190
+ True
191
+ >>> should_skip_field("name")
192
+ False
193
+ """
194
+ SYSTEM_FIELDS = {
195
+ "id",
196
+ "tenant_id",
197
+ "user_id",
198
+ "created_at",
199
+ "updated_at",
200
+ "deleted_at",
201
+ "graph_edges",
202
+ "metadata",
203
+ "tags",
204
+ "column",
205
+ }
206
+
207
+ return field_name in SYSTEM_FIELDS
208
+
209
+
210
+ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
211
+ """
212
+ Extract REM-specific metadata from a Pydantic model.
213
+
214
+ Returns:
215
+ Dict with:
216
+ - table_name: Database table name
217
+ - entity_key_field: Business key field name
218
+ - embeddable_fields: List of fields to embed
219
+ - model_name: Original model class name
220
+
221
+ Example:
222
+ >>> from rem.models.entities import Resource
223
+ >>> meta = get_model_metadata(Resource)
224
+ >>> meta["table_name"]
225
+ 'resources'
226
+ >>> meta["entity_key_field"]
227
+ 'name'
228
+ >>> "content" in meta["embeddable_fields"]
229
+ True
230
+ """
231
+ return {
232
+ "model_name": model.__name__,
233
+ "table_name": get_table_name(model),
234
+ "entity_key_field": get_entity_key_field(model),
235
+ "embeddable_fields": get_embeddable_fields(model),
236
+ }
@@ -0,0 +1,229 @@
1
+ """
2
+ Centralized schema loading utility for agent schemas.
3
+
4
+ This module provides a single, consistent implementation for loading
5
+ agent schemas from YAML files across the entire codebase (API, CLI, agent factory).
6
+
7
+ Design Pattern:
8
+ - Search standard locations: schemas/agents/, schemas/evaluators/, schemas/
9
+ - Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
10
+ - Support relative/absolute paths
11
+ - Consistent error messages and logging
12
+ i
13
+ Usage:
14
+ # From API
15
+ schema = load_agent_schema("rem")
16
+
17
+ # From CLI with custom path
18
+ schema = load_agent_schema("./my-agent.yaml")
19
+
20
+ # From agent factory
21
+ schema = load_agent_schema("contract-analyzer")
22
+
23
+ Schema Caching Status:
24
+
25
+ ✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
26
+ - Schemas loaded from package resources cached indefinitely in _fs_schema_cache
27
+ - No TTL needed (immutable, versioned with code)
28
+ - Lazy-loaded on first access
29
+ - Custom paths not cached (may change during development)
30
+
31
+ TODO: Database Schema Caching (Future)
32
+ - Schemas loaded from schemas table (SchemaRepository)
33
+ - Will require TTL for cache invalidation (5-15 minutes)
34
+ - May change at runtime via admin updates
35
+ - Cache key: (schema_name, version) → (schema_dict, timestamp)
36
+ - Implementation ready in _db_schema_cache and _db_schema_ttl
37
+
38
+ Benefits Achieved:
39
+ - ✅ Eliminated disk I/O for repeated schema loads
40
+ - ✅ Faster agent creation (critical for API latency)
41
+ - 🔲 Database query reduction (pending DB schema implementation)
42
+
43
+ Future Enhancement (when database schemas are implemented):
44
+ import time
45
+
46
+ _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
47
+ _db_schema_ttl: int = 300 # 5 minutes
48
+
49
+ async def load_agent_schema_from_db(name: str, version: str | None = None):
50
+ cache_key = (name, version or "latest")
51
+ if cache_key in _db_schema_cache:
52
+ schema, timestamp = _db_schema_cache[cache_key]
53
+ if time.time() - timestamp < _db_schema_ttl:
54
+ return schema
55
+ # Load from DB and cache with TTL
56
+ from rem.services.repositories import schema_repository
57
+ schema = await schema_repository.get_by_name(name, version)
58
+ _db_schema_cache[cache_key] = (schema, time.time())
59
+ return schema
60
+
61
+ Related:
62
+ - rem/src/rem/agentic/providers/pydantic_ai.py (create_agent factory)
63
+ - rem/src/rem/services/repositories/schema_repository.py (database schemas)
64
+ """
65
+
66
+ import importlib.resources
67
+ from pathlib import Path
68
+ from typing import Any, cast
69
+
70
+ import yaml
71
+ from loguru import logger
72
+
73
+
74
+ # Standard search paths for agent schemas (in priority order)
75
+ SCHEMA_SEARCH_PATHS = [
76
+ "schemas/agents/{name}.yaml", # Top-level agents (e.g., rem.yaml)
77
+ "schemas/agents/core/{name}.yaml", # Core system agents
78
+ "schemas/agents/examples/{name}.yaml", # Example agents
79
+ "schemas/evaluators/{name}.yaml",
80
+ "schemas/{name}.yaml",
81
+ ]
82
+
83
+ # In-memory cache for filesystem schemas (no TTL - immutable)
84
+ _fs_schema_cache: dict[str, dict[str, Any]] = {}
85
+
86
+ # Future: Database schema cache (with TTL - mutable)
87
+ # Will be used when loading schemas from database (SchemaRepository)
88
+ # _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
89
+ # _db_schema_ttl: int = 300 # 5 minutes in seconds
90
+
91
+
92
+ def load_agent_schema(schema_name_or_path: str, use_cache: bool = True) -> dict[str, Any]:
93
+ """
94
+ Load agent schema from YAML file with unified search logic and caching.
95
+
96
+ Filesystem schemas are cached indefinitely (immutable, versioned with code).
97
+ Database schemas (future) will be cached with TTL for invalidation.
98
+
99
+ Handles path resolution automatically:
100
+ - "rem" → searches schemas/agents/rem.yaml (top-level)
101
+ - "moment-builder" → searches schemas/agents/core/moment-builder.yaml
102
+ - "contract-analyzer" → searches schemas/agents/examples/contract-analyzer.yaml
103
+ - "core/moment-builder" → searches schemas/agents/core/moment-builder.yaml
104
+ - "/absolute/path.yaml" → loads directly
105
+ - "relative/path.yaml" → loads relative to cwd
106
+
107
+ Search Order:
108
+ 1. Check cache (if use_cache=True and schema found in FS cache)
109
+ 2. Exact path if it exists (absolute or relative)
110
+ 3. Package resources: schemas/agents/{name}.yaml (top-level)
111
+ 4. Package resources: schemas/agents/core/{name}.yaml
112
+ 5. Package resources: schemas/agents/examples/{name}.yaml
113
+ 6. Package resources: schemas/evaluators/{name}.yaml
114
+ 7. Package resources: schemas/{name}.yaml
115
+
116
+ Args:
117
+ schema_name_or_path: Schema name or file path
118
+ Examples: "rem-query-agent", "contract-analyzer", "./my-schema.yaml"
119
+ use_cache: If True, uses in-memory cache for filesystem schemas
120
+
121
+ Returns:
122
+ Agent schema as dictionary
123
+
124
+ Raises:
125
+ FileNotFoundError: If schema not found in any search location
126
+ yaml.YAMLError: If schema file is invalid YAML
127
+
128
+ Examples:
129
+ >>> # Load by short name (cached after first load)
130
+ >>> schema = load_agent_schema("contract-analyzer")
131
+ >>>
132
+ >>> # Load from custom path (not cached - custom paths may change)
133
+ >>> schema = load_agent_schema("./my-agent.yaml")
134
+ >>>
135
+ >>> # Load evaluator schema (cached)
136
+ >>> schema = load_agent_schema("rem-lookup-correctness")
137
+ """
138
+ # Normalize the name for cache key
139
+ cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '')
140
+ if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
141
+ cache_key = cache_key.rsplit('.', 1)[0]
142
+
143
+ # Check cache first (only for package resources, not custom paths)
144
+ path = Path(schema_name_or_path)
145
+ is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
146
+
147
+ if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
148
+ logger.debug(f"Loading schema from cache: {cache_key}")
149
+ return _fs_schema_cache[cache_key]
150
+
151
+ # 1. Try exact path first (absolute or relative to cwd)
152
+ if path.exists():
153
+ logger.debug(f"Loading schema from exact path: {path}")
154
+ with open(path, "r") as f:
155
+ schema = yaml.safe_load(f)
156
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
157
+ # Don't cache custom paths (they may change)
158
+ return cast(dict[str, Any], schema)
159
+
160
+ # 2. Normalize name for package resource search
161
+ base_name = cache_key
162
+
163
+ # 3. Try package resources with standard search paths
164
+ for search_pattern in SCHEMA_SEARCH_PATHS:
165
+ search_path = search_pattern.format(name=base_name)
166
+
167
+ try:
168
+ # Use importlib.resources to find schema in installed package
169
+ schema_ref = importlib.resources.files("rem") / search_path
170
+ schema_path = Path(str(schema_ref))
171
+
172
+ if schema_path.exists():
173
+ logger.debug(f"Loading schema from package: {search_path}")
174
+ with open(schema_path, "r") as f:
175
+ schema = yaml.safe_load(f)
176
+ logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
177
+
178
+ # Cache filesystem schemas (immutable, safe to cache indefinitely)
179
+ if use_cache:
180
+ _fs_schema_cache[cache_key] = schema
181
+ logger.debug(f"Cached schema: {cache_key}")
182
+
183
+ return cast(dict[str, Any], schema)
184
+ except Exception as e:
185
+ logger.debug(f"Could not load from {search_path}: {e}")
186
+ continue
187
+
188
+ # 4. Schema not found in any location
189
+ searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
190
+ raise FileNotFoundError(
191
+ f"Schema not found: {schema_name_or_path}\n"
192
+ f"Searched locations:\n"
193
+ f" - Exact path: {path}\n"
194
+ f" - Package resources: {', '.join(searched_paths)}"
195
+ )
196
+
197
+
198
+ def validate_agent_schema(schema: dict[str, Any]) -> bool:
199
+ """
200
+ Validate agent schema structure.
201
+
202
+ Basic validation checks:
203
+ - Has 'type' field (should be 'object')
204
+ - Has 'description' field (system prompt)
205
+ - Has 'properties' field (output schema)
206
+
207
+ Args:
208
+ schema: Agent schema dict
209
+
210
+ Returns:
211
+ True if valid
212
+
213
+ Raises:
214
+ ValueError: If schema is invalid
215
+ """
216
+ if not isinstance(schema, dict):
217
+ raise ValueError(f"Schema must be a dict, got {type(schema)}")
218
+
219
+ if schema.get('type') != 'object':
220
+ raise ValueError(f"Schema type must be 'object', got {schema.get('type')}")
221
+
222
+ if 'description' not in schema:
223
+ raise ValueError("Schema must have 'description' field (system prompt)")
224
+
225
+ if 'properties' not in schema:
226
+ logger.warning("Schema missing 'properties' field - agent will have no structured output")
227
+
228
+ logger.debug("Schema validation passed")
229
+ return True