remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic Model Helper Utilities.
|
|
3
|
+
|
|
4
|
+
Utilities for working with REM Pydantic models following our conventions:
|
|
5
|
+
|
|
6
|
+
Business Key (entity_key) Detection:
|
|
7
|
+
1. Field with json_schema_extra={"entity_key": True}
|
|
8
|
+
2. Common business key fields: name, uri, key, label
|
|
9
|
+
3. Fallback to "id" (unique by UUID only)
|
|
10
|
+
|
|
11
|
+
Embedding Field Detection:
|
|
12
|
+
1. Field with json_schema_extra={"embed": True}
|
|
13
|
+
2. Common content fields: content, description, summary, etc.
|
|
14
|
+
3. Explicit disable with json_schema_extra={"embed": False}
|
|
15
|
+
|
|
16
|
+
Table Name Inference:
|
|
17
|
+
1. model_config.json_schema_extra.table_name
|
|
18
|
+
2. CamelCase → snake_case + pluralization
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any, Type
|
|
22
|
+
|
|
23
|
+
from loguru import logger
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_entity_key_field(model: Type[BaseModel]) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Get the business key field for KV store lookups.
|
|
30
|
+
|
|
31
|
+
Follows REM conventions:
|
|
32
|
+
1. Field with json_schema_extra={"entity_key": True}
|
|
33
|
+
2. "name" field (most common for resources, moments, etc.)
|
|
34
|
+
3. "uri" field (for files)
|
|
35
|
+
4. "key" or "label" fields
|
|
36
|
+
5. Fallback to "id" (UUID only)
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
model: Pydantic model class
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Field name to use as entity_key
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> from rem.models.entities import Resource
|
|
46
|
+
>>> get_entity_key_field(Resource)
|
|
47
|
+
'name'
|
|
48
|
+
"""
|
|
49
|
+
# Check for explicit entity_key marker
|
|
50
|
+
for field_name, field_info in model.model_fields.items():
|
|
51
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
52
|
+
if json_extra and isinstance(json_extra, dict):
|
|
53
|
+
if json_extra.get("entity_key") is True:
|
|
54
|
+
logger.debug(f"Using explicit entity_key field: {field_name}")
|
|
55
|
+
return field_name
|
|
56
|
+
|
|
57
|
+
# Check for common business key fields
|
|
58
|
+
for candidate in ["name", "uri", "key", "label", "title"]:
|
|
59
|
+
if candidate in model.model_fields:
|
|
60
|
+
logger.debug(f"Using conventional entity_key field: {candidate}")
|
|
61
|
+
return candidate
|
|
62
|
+
|
|
63
|
+
# Fallback to id (unique by UUID only)
|
|
64
|
+
logger.warning(
|
|
65
|
+
f"No business key found for {model.__name__}, using 'id' (UUID only)"
|
|
66
|
+
)
|
|
67
|
+
return "id"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_table_name(model: Type[BaseModel]) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Get table name for a Pydantic model.
|
|
73
|
+
|
|
74
|
+
Follows REM conventions:
|
|
75
|
+
1. model_config.json_schema_extra.table_name (explicit)
|
|
76
|
+
2. CamelCase → snake_case + pluralization
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
model: Pydantic model class
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Table name
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
>>> from rem.models.entities import Resource
|
|
86
|
+
>>> get_table_name(Resource)
|
|
87
|
+
'resources'
|
|
88
|
+
"""
|
|
89
|
+
import re
|
|
90
|
+
|
|
91
|
+
# Check for explicit table_name
|
|
92
|
+
if hasattr(model, "model_config"):
|
|
93
|
+
model_config = model.model_config
|
|
94
|
+
if isinstance(model_config, dict):
|
|
95
|
+
json_extra = model_config.get("json_schema_extra", {})
|
|
96
|
+
if isinstance(json_extra, dict) and "table_name" in json_extra:
|
|
97
|
+
return json_extra["table_name"]
|
|
98
|
+
|
|
99
|
+
# Infer from class name
|
|
100
|
+
name = model.__name__
|
|
101
|
+
|
|
102
|
+
# Convert CamelCase to snake_case
|
|
103
|
+
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
|
104
|
+
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
|
|
105
|
+
|
|
106
|
+
# Pluralize
|
|
107
|
+
if not name.endswith("s"):
|
|
108
|
+
if name.endswith("y"):
|
|
109
|
+
name = name[:-1] + "ies" # category -> categories
|
|
110
|
+
else:
|
|
111
|
+
name = name + "s" # resource -> resources
|
|
112
|
+
|
|
113
|
+
return name
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def get_embeddable_fields(model: Type[BaseModel]) -> list[str]:
|
|
117
|
+
"""
|
|
118
|
+
Get list of fields that should have embeddings generated.
|
|
119
|
+
|
|
120
|
+
Follows REM conventions:
|
|
121
|
+
1. Field with json_schema_extra={"embed": True} → always embed
|
|
122
|
+
2. Field with json_schema_extra={"embed": False} → never embed
|
|
123
|
+
3. Common content fields → embed by default
|
|
124
|
+
4. Otherwise → don't embed
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
model: Pydantic model class
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
List of field names to generate embeddings for
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
>>> from rem.models.entities import Resource
|
|
134
|
+
>>> fields = get_embeddable_fields(Resource)
|
|
135
|
+
>>> "content" in fields
|
|
136
|
+
True
|
|
137
|
+
"""
|
|
138
|
+
# Common content fields that embed by default
|
|
139
|
+
DEFAULT_EMBED_FIELDS = {
|
|
140
|
+
"content",
|
|
141
|
+
"description",
|
|
142
|
+
"summary",
|
|
143
|
+
"text",
|
|
144
|
+
"body",
|
|
145
|
+
"message",
|
|
146
|
+
"notes",
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
embeddable = []
|
|
150
|
+
|
|
151
|
+
for field_name, field_info in model.model_fields.items():
|
|
152
|
+
# Check json_schema_extra for explicit embed configuration
|
|
153
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
154
|
+
if json_extra and isinstance(json_extra, dict):
|
|
155
|
+
embed = json_extra.get("embed")
|
|
156
|
+
if embed is True:
|
|
157
|
+
embeddable.append(field_name)
|
|
158
|
+
continue
|
|
159
|
+
elif embed is False:
|
|
160
|
+
# Explicitly disabled
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
# Check if field name matches common content fields
|
|
164
|
+
if field_name.lower() in DEFAULT_EMBED_FIELDS:
|
|
165
|
+
embeddable.append(field_name)
|
|
166
|
+
|
|
167
|
+
return embeddable
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def should_skip_field(field_name: str) -> bool:
|
|
171
|
+
"""
|
|
172
|
+
Check if a field should be skipped during SQL generation.
|
|
173
|
+
|
|
174
|
+
System fields that are added separately:
|
|
175
|
+
- id (added as PRIMARY KEY)
|
|
176
|
+
- tenant_id (added for multi-tenancy)
|
|
177
|
+
- user_id (added for ownership)
|
|
178
|
+
- created_at, updated_at, deleted_at (added as system timestamps)
|
|
179
|
+
- graph_edges, metadata (added as JSONB system fields)
|
|
180
|
+
- tags, column (CoreModel fields)
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
field_name: Name of the field
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True if field should be skipped
|
|
187
|
+
|
|
188
|
+
Example:
|
|
189
|
+
>>> should_skip_field("id")
|
|
190
|
+
True
|
|
191
|
+
>>> should_skip_field("name")
|
|
192
|
+
False
|
|
193
|
+
"""
|
|
194
|
+
SYSTEM_FIELDS = {
|
|
195
|
+
"id",
|
|
196
|
+
"tenant_id",
|
|
197
|
+
"user_id",
|
|
198
|
+
"created_at",
|
|
199
|
+
"updated_at",
|
|
200
|
+
"deleted_at",
|
|
201
|
+
"graph_edges",
|
|
202
|
+
"metadata",
|
|
203
|
+
"tags",
|
|
204
|
+
"column",
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return field_name in SYSTEM_FIELDS
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
|
|
211
|
+
"""
|
|
212
|
+
Extract REM-specific metadata from a Pydantic model.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict with:
|
|
216
|
+
- table_name: Database table name
|
|
217
|
+
- entity_key_field: Business key field name
|
|
218
|
+
- embeddable_fields: List of fields to embed
|
|
219
|
+
- model_name: Original model class name
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
>>> from rem.models.entities import Resource
|
|
223
|
+
>>> meta = get_model_metadata(Resource)
|
|
224
|
+
>>> meta["table_name"]
|
|
225
|
+
'resources'
|
|
226
|
+
>>> meta["entity_key_field"]
|
|
227
|
+
'name'
|
|
228
|
+
>>> "content" in meta["embeddable_fields"]
|
|
229
|
+
True
|
|
230
|
+
"""
|
|
231
|
+
return {
|
|
232
|
+
"model_name": model.__name__,
|
|
233
|
+
"table_name": get_table_name(model),
|
|
234
|
+
"entity_key_field": get_entity_key_field(model),
|
|
235
|
+
"embeddable_fields": get_embeddable_fields(model),
|
|
236
|
+
}
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized schema loading utility for agent schemas.
|
|
3
|
+
|
|
4
|
+
This module provides a single, consistent implementation for loading
|
|
5
|
+
agent schemas from YAML files across the entire codebase (API, CLI, agent factory).
|
|
6
|
+
|
|
7
|
+
Design Pattern:
|
|
8
|
+
- Search standard locations: schemas/agents/, schemas/evaluators/, schemas/
|
|
9
|
+
- Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
|
|
10
|
+
- Support relative/absolute paths
|
|
11
|
+
- Consistent error messages and logging
|
|
12
|
+
i
|
|
13
|
+
Usage:
|
|
14
|
+
# From API
|
|
15
|
+
schema = load_agent_schema("rem")
|
|
16
|
+
|
|
17
|
+
# From CLI with custom path
|
|
18
|
+
schema = load_agent_schema("./my-agent.yaml")
|
|
19
|
+
|
|
20
|
+
# From agent factory
|
|
21
|
+
schema = load_agent_schema("contract-analyzer")
|
|
22
|
+
|
|
23
|
+
Schema Caching Status:
|
|
24
|
+
|
|
25
|
+
✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
|
|
26
|
+
- Schemas loaded from package resources cached indefinitely in _fs_schema_cache
|
|
27
|
+
- No TTL needed (immutable, versioned with code)
|
|
28
|
+
- Lazy-loaded on first access
|
|
29
|
+
- Custom paths not cached (may change during development)
|
|
30
|
+
|
|
31
|
+
TODO: Database Schema Caching (Future)
|
|
32
|
+
- Schemas loaded from schemas table (SchemaRepository)
|
|
33
|
+
- Will require TTL for cache invalidation (5-15 minutes)
|
|
34
|
+
- May change at runtime via admin updates
|
|
35
|
+
- Cache key: (schema_name, version) → (schema_dict, timestamp)
|
|
36
|
+
- Implementation ready in _db_schema_cache and _db_schema_ttl
|
|
37
|
+
|
|
38
|
+
Benefits Achieved:
|
|
39
|
+
- ✅ Eliminated disk I/O for repeated schema loads
|
|
40
|
+
- ✅ Faster agent creation (critical for API latency)
|
|
41
|
+
- 🔲 Database query reduction (pending DB schema implementation)
|
|
42
|
+
|
|
43
|
+
Future Enhancement (when database schemas are implemented):
|
|
44
|
+
import time
|
|
45
|
+
|
|
46
|
+
_db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
47
|
+
_db_schema_ttl: int = 300 # 5 minutes
|
|
48
|
+
|
|
49
|
+
async def load_agent_schema_from_db(name: str, version: str | None = None):
|
|
50
|
+
cache_key = (name, version or "latest")
|
|
51
|
+
if cache_key in _db_schema_cache:
|
|
52
|
+
schema, timestamp = _db_schema_cache[cache_key]
|
|
53
|
+
if time.time() - timestamp < _db_schema_ttl:
|
|
54
|
+
return schema
|
|
55
|
+
# Load from DB and cache with TTL
|
|
56
|
+
from rem.services.repositories import schema_repository
|
|
57
|
+
schema = await schema_repository.get_by_name(name, version)
|
|
58
|
+
_db_schema_cache[cache_key] = (schema, time.time())
|
|
59
|
+
return schema
|
|
60
|
+
|
|
61
|
+
Related:
|
|
62
|
+
- rem/src/rem/agentic/providers/pydantic_ai.py (create_agent factory)
|
|
63
|
+
- rem/src/rem/services/repositories/schema_repository.py (database schemas)
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
import importlib.resources
|
|
67
|
+
from pathlib import Path
|
|
68
|
+
from typing import Any, cast
|
|
69
|
+
|
|
70
|
+
import yaml
|
|
71
|
+
from loguru import logger
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Standard search paths for agent schemas (in priority order)
|
|
75
|
+
SCHEMA_SEARCH_PATHS = [
|
|
76
|
+
"schemas/agents/{name}.yaml", # Top-level agents (e.g., rem.yaml)
|
|
77
|
+
"schemas/agents/core/{name}.yaml", # Core system agents
|
|
78
|
+
"schemas/agents/examples/{name}.yaml", # Example agents
|
|
79
|
+
"schemas/evaluators/{name}.yaml",
|
|
80
|
+
"schemas/{name}.yaml",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# In-memory cache for filesystem schemas (no TTL - immutable)
|
|
84
|
+
_fs_schema_cache: dict[str, dict[str, Any]] = {}
|
|
85
|
+
|
|
86
|
+
# Future: Database schema cache (with TTL - mutable)
|
|
87
|
+
# Will be used when loading schemas from database (SchemaRepository)
|
|
88
|
+
# _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
89
|
+
# _db_schema_ttl: int = 300 # 5 minutes in seconds
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def load_agent_schema(schema_name_or_path: str, use_cache: bool = True) -> dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Load agent schema from YAML file with unified search logic and caching.
|
|
95
|
+
|
|
96
|
+
Filesystem schemas are cached indefinitely (immutable, versioned with code).
|
|
97
|
+
Database schemas (future) will be cached with TTL for invalidation.
|
|
98
|
+
|
|
99
|
+
Handles path resolution automatically:
|
|
100
|
+
- "rem" → searches schemas/agents/rem.yaml (top-level)
|
|
101
|
+
- "moment-builder" → searches schemas/agents/core/moment-builder.yaml
|
|
102
|
+
- "contract-analyzer" → searches schemas/agents/examples/contract-analyzer.yaml
|
|
103
|
+
- "core/moment-builder" → searches schemas/agents/core/moment-builder.yaml
|
|
104
|
+
- "/absolute/path.yaml" → loads directly
|
|
105
|
+
- "relative/path.yaml" → loads relative to cwd
|
|
106
|
+
|
|
107
|
+
Search Order:
|
|
108
|
+
1. Check cache (if use_cache=True and schema found in FS cache)
|
|
109
|
+
2. Exact path if it exists (absolute or relative)
|
|
110
|
+
3. Package resources: schemas/agents/{name}.yaml (top-level)
|
|
111
|
+
4. Package resources: schemas/agents/core/{name}.yaml
|
|
112
|
+
5. Package resources: schemas/agents/examples/{name}.yaml
|
|
113
|
+
6. Package resources: schemas/evaluators/{name}.yaml
|
|
114
|
+
7. Package resources: schemas/{name}.yaml
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
schema_name_or_path: Schema name or file path
|
|
118
|
+
Examples: "rem-query-agent", "contract-analyzer", "./my-schema.yaml"
|
|
119
|
+
use_cache: If True, uses in-memory cache for filesystem schemas
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Agent schema as dictionary
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
FileNotFoundError: If schema not found in any search location
|
|
126
|
+
yaml.YAMLError: If schema file is invalid YAML
|
|
127
|
+
|
|
128
|
+
Examples:
|
|
129
|
+
>>> # Load by short name (cached after first load)
|
|
130
|
+
>>> schema = load_agent_schema("contract-analyzer")
|
|
131
|
+
>>>
|
|
132
|
+
>>> # Load from custom path (not cached - custom paths may change)
|
|
133
|
+
>>> schema = load_agent_schema("./my-agent.yaml")
|
|
134
|
+
>>>
|
|
135
|
+
>>> # Load evaluator schema (cached)
|
|
136
|
+
>>> schema = load_agent_schema("rem-lookup-correctness")
|
|
137
|
+
"""
|
|
138
|
+
# Normalize the name for cache key
|
|
139
|
+
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '')
|
|
140
|
+
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
141
|
+
cache_key = cache_key.rsplit('.', 1)[0]
|
|
142
|
+
|
|
143
|
+
# Check cache first (only for package resources, not custom paths)
|
|
144
|
+
path = Path(schema_name_or_path)
|
|
145
|
+
is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
146
|
+
|
|
147
|
+
if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
|
|
148
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
149
|
+
return _fs_schema_cache[cache_key]
|
|
150
|
+
|
|
151
|
+
# 1. Try exact path first (absolute or relative to cwd)
|
|
152
|
+
if path.exists():
|
|
153
|
+
logger.debug(f"Loading schema from exact path: {path}")
|
|
154
|
+
with open(path, "r") as f:
|
|
155
|
+
schema = yaml.safe_load(f)
|
|
156
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
157
|
+
# Don't cache custom paths (they may change)
|
|
158
|
+
return cast(dict[str, Any], schema)
|
|
159
|
+
|
|
160
|
+
# 2. Normalize name for package resource search
|
|
161
|
+
base_name = cache_key
|
|
162
|
+
|
|
163
|
+
# 3. Try package resources with standard search paths
|
|
164
|
+
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
165
|
+
search_path = search_pattern.format(name=base_name)
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
# Use importlib.resources to find schema in installed package
|
|
169
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
170
|
+
schema_path = Path(str(schema_ref))
|
|
171
|
+
|
|
172
|
+
if schema_path.exists():
|
|
173
|
+
logger.debug(f"Loading schema from package: {search_path}")
|
|
174
|
+
with open(schema_path, "r") as f:
|
|
175
|
+
schema = yaml.safe_load(f)
|
|
176
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
177
|
+
|
|
178
|
+
# Cache filesystem schemas (immutable, safe to cache indefinitely)
|
|
179
|
+
if use_cache:
|
|
180
|
+
_fs_schema_cache[cache_key] = schema
|
|
181
|
+
logger.debug(f"Cached schema: {cache_key}")
|
|
182
|
+
|
|
183
|
+
return cast(dict[str, Any], schema)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.debug(f"Could not load from {search_path}: {e}")
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# 4. Schema not found in any location
|
|
189
|
+
searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
|
|
190
|
+
raise FileNotFoundError(
|
|
191
|
+
f"Schema not found: {schema_name_or_path}\n"
|
|
192
|
+
f"Searched locations:\n"
|
|
193
|
+
f" - Exact path: {path}\n"
|
|
194
|
+
f" - Package resources: {', '.join(searched_paths)}"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
199
|
+
"""
|
|
200
|
+
Validate agent schema structure.
|
|
201
|
+
|
|
202
|
+
Basic validation checks:
|
|
203
|
+
- Has 'type' field (should be 'object')
|
|
204
|
+
- Has 'description' field (system prompt)
|
|
205
|
+
- Has 'properties' field (output schema)
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
schema: Agent schema dict
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
True if valid
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
ValueError: If schema is invalid
|
|
215
|
+
"""
|
|
216
|
+
if not isinstance(schema, dict):
|
|
217
|
+
raise ValueError(f"Schema must be a dict, got {type(schema)}")
|
|
218
|
+
|
|
219
|
+
if schema.get('type') != 'object':
|
|
220
|
+
raise ValueError(f"Schema type must be 'object', got {schema.get('type')}")
|
|
221
|
+
|
|
222
|
+
if 'description' not in schema:
|
|
223
|
+
raise ValueError("Schema must have 'description' field (system prompt)")
|
|
224
|
+
|
|
225
|
+
if 'properties' not in schema:
|
|
226
|
+
logger.warning("Schema missing 'properties' field - agent will have no structured output")
|
|
227
|
+
|
|
228
|
+
logger.debug("Schema validation passed")
|
|
229
|
+
return True
|