remdb 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +801 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.7.dist-info/METADATA +1473 -0
- remdb-0.3.7.dist-info/RECORD +187 -0
- remdb-0.3.7.dist-info/WHEEL +4 -0
- remdb-0.3.7.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic Model Helper Utilities.
|
|
3
|
+
|
|
4
|
+
Utilities for working with REM Pydantic models following our conventions:
|
|
5
|
+
|
|
6
|
+
Business Key (entity_key) Detection:
|
|
7
|
+
1. Field with json_schema_extra={"entity_key": True}
|
|
8
|
+
2. Common business key fields: name, uri, key, label
|
|
9
|
+
3. Fallback to "id" (unique by UUID only)
|
|
10
|
+
|
|
11
|
+
Embedding Field Detection:
|
|
12
|
+
1. Field with json_schema_extra={"embed": True}
|
|
13
|
+
2. Common content fields: content, description, summary, etc.
|
|
14
|
+
3. Explicit disable with json_schema_extra={"embed": False}
|
|
15
|
+
|
|
16
|
+
Table Name Inference:
|
|
17
|
+
1. model_config.json_schema_extra.table_name
|
|
18
|
+
2. CamelCase → snake_case + pluralization
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any, Type
|
|
22
|
+
|
|
23
|
+
from loguru import logger
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_entity_key_field(model: Type[BaseModel]) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Get the business key field for KV store lookups.
|
|
30
|
+
|
|
31
|
+
Follows REM conventions:
|
|
32
|
+
1. Field with json_schema_extra={"entity_key": True}
|
|
33
|
+
2. "name" field (most common for resources, moments, etc.)
|
|
34
|
+
3. "uri" field (for files)
|
|
35
|
+
4. "key" or "label" fields
|
|
36
|
+
5. Fallback to "id" (UUID only)
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
model: Pydantic model class
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Field name to use as entity_key
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> from rem.models.entities import Resource
|
|
46
|
+
>>> get_entity_key_field(Resource)
|
|
47
|
+
'name'
|
|
48
|
+
"""
|
|
49
|
+
# Check for explicit entity_key marker
|
|
50
|
+
for field_name, field_info in model.model_fields.items():
|
|
51
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
52
|
+
if json_extra and isinstance(json_extra, dict):
|
|
53
|
+
if json_extra.get("entity_key") is True:
|
|
54
|
+
logger.debug(f"Using explicit entity_key field: {field_name}")
|
|
55
|
+
return field_name
|
|
56
|
+
|
|
57
|
+
# Check for common business key fields
|
|
58
|
+
for candidate in ["name", "uri", "key", "label", "title"]:
|
|
59
|
+
if candidate in model.model_fields:
|
|
60
|
+
logger.debug(f"Using conventional entity_key field: {candidate}")
|
|
61
|
+
return candidate
|
|
62
|
+
|
|
63
|
+
# Fallback to id (unique by UUID only)
|
|
64
|
+
logger.warning(
|
|
65
|
+
f"No business key found for {model.__name__}, using 'id' (UUID only)"
|
|
66
|
+
)
|
|
67
|
+
return "id"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_table_name(model: Type[BaseModel]) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Get table name for a Pydantic model.
|
|
73
|
+
|
|
74
|
+
Follows REM conventions:
|
|
75
|
+
1. model_config.json_schema_extra.table_name (explicit)
|
|
76
|
+
2. CamelCase → snake_case + pluralization
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
model: Pydantic model class
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Table name
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
>>> from rem.models.entities import Resource
|
|
86
|
+
>>> get_table_name(Resource)
|
|
87
|
+
'resources'
|
|
88
|
+
"""
|
|
89
|
+
import re
|
|
90
|
+
|
|
91
|
+
# Check for explicit table_name
|
|
92
|
+
if hasattr(model, "model_config"):
|
|
93
|
+
model_config = model.model_config
|
|
94
|
+
if isinstance(model_config, dict):
|
|
95
|
+
json_extra = model_config.get("json_schema_extra", {})
|
|
96
|
+
if isinstance(json_extra, dict) and "table_name" in json_extra:
|
|
97
|
+
return json_extra["table_name"]
|
|
98
|
+
|
|
99
|
+
# Infer from class name
|
|
100
|
+
name = model.__name__
|
|
101
|
+
|
|
102
|
+
# Convert CamelCase to snake_case
|
|
103
|
+
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
|
104
|
+
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
|
|
105
|
+
|
|
106
|
+
# Pluralize
|
|
107
|
+
if not name.endswith("s"):
|
|
108
|
+
if name.endswith("y"):
|
|
109
|
+
name = name[:-1] + "ies" # category -> categories
|
|
110
|
+
else:
|
|
111
|
+
name = name + "s" # resource -> resources
|
|
112
|
+
|
|
113
|
+
return name
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def get_embeddable_fields(model: Type[BaseModel]) -> list[str]:
|
|
117
|
+
"""
|
|
118
|
+
Get list of fields that should have embeddings generated.
|
|
119
|
+
|
|
120
|
+
Follows REM conventions:
|
|
121
|
+
1. Field with json_schema_extra={"embed": True} → always embed
|
|
122
|
+
2. Field with json_schema_extra={"embed": False} → never embed
|
|
123
|
+
3. Common content fields → embed by default
|
|
124
|
+
4. Otherwise → don't embed
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
model: Pydantic model class
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
List of field names to generate embeddings for
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
>>> from rem.models.entities import Resource
|
|
134
|
+
>>> fields = get_embeddable_fields(Resource)
|
|
135
|
+
>>> "content" in fields
|
|
136
|
+
True
|
|
137
|
+
"""
|
|
138
|
+
# Common content fields that embed by default
|
|
139
|
+
DEFAULT_EMBED_FIELDS = {
|
|
140
|
+
"content",
|
|
141
|
+
"description",
|
|
142
|
+
"summary",
|
|
143
|
+
"text",
|
|
144
|
+
"body",
|
|
145
|
+
"message",
|
|
146
|
+
"notes",
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
embeddable = []
|
|
150
|
+
|
|
151
|
+
for field_name, field_info in model.model_fields.items():
|
|
152
|
+
# Check json_schema_extra for explicit embed configuration
|
|
153
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
154
|
+
if json_extra and isinstance(json_extra, dict):
|
|
155
|
+
embed = json_extra.get("embed")
|
|
156
|
+
if embed is True:
|
|
157
|
+
embeddable.append(field_name)
|
|
158
|
+
continue
|
|
159
|
+
elif embed is False:
|
|
160
|
+
# Explicitly disabled
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
# Check if field name matches common content fields
|
|
164
|
+
if field_name.lower() in DEFAULT_EMBED_FIELDS:
|
|
165
|
+
embeddable.append(field_name)
|
|
166
|
+
|
|
167
|
+
return embeddable
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def should_skip_field(field_name: str) -> bool:
|
|
171
|
+
"""
|
|
172
|
+
Check if a field should be skipped during SQL generation.
|
|
173
|
+
|
|
174
|
+
System fields that are added separately:
|
|
175
|
+
- id (added as PRIMARY KEY)
|
|
176
|
+
- tenant_id (added for multi-tenancy)
|
|
177
|
+
- user_id (added for ownership)
|
|
178
|
+
- created_at, updated_at, deleted_at (added as system timestamps)
|
|
179
|
+
- graph_edges, metadata (added as JSONB system fields)
|
|
180
|
+
- tags, column (CoreModel fields)
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
field_name: Name of the field
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True if field should be skipped
|
|
187
|
+
|
|
188
|
+
Example:
|
|
189
|
+
>>> should_skip_field("id")
|
|
190
|
+
True
|
|
191
|
+
>>> should_skip_field("name")
|
|
192
|
+
False
|
|
193
|
+
"""
|
|
194
|
+
SYSTEM_FIELDS = {
|
|
195
|
+
"id",
|
|
196
|
+
"tenant_id",
|
|
197
|
+
"user_id",
|
|
198
|
+
"created_at",
|
|
199
|
+
"updated_at",
|
|
200
|
+
"deleted_at",
|
|
201
|
+
"graph_edges",
|
|
202
|
+
"metadata",
|
|
203
|
+
"tags",
|
|
204
|
+
"column",
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return field_name in SYSTEM_FIELDS
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
|
|
211
|
+
"""
|
|
212
|
+
Extract REM-specific metadata from a Pydantic model.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict with:
|
|
216
|
+
- table_name: Database table name
|
|
217
|
+
- entity_key_field: Business key field name
|
|
218
|
+
- embeddable_fields: List of fields to embed
|
|
219
|
+
- model_name: Original model class name
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
>>> from rem.models.entities import Resource
|
|
223
|
+
>>> meta = get_model_metadata(Resource)
|
|
224
|
+
>>> meta["table_name"]
|
|
225
|
+
'resources'
|
|
226
|
+
>>> meta["entity_key_field"]
|
|
227
|
+
'name'
|
|
228
|
+
>>> "content" in meta["embeddable_fields"]
|
|
229
|
+
True
|
|
230
|
+
"""
|
|
231
|
+
return {
|
|
232
|
+
"model_name": model.__name__,
|
|
233
|
+
"table_name": get_table_name(model),
|
|
234
|
+
"entity_key_field": get_entity_key_field(model),
|
|
235
|
+
"embeddable_fields": get_embeddable_fields(model),
|
|
236
|
+
}
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized schema loading utility for agent schemas.
|
|
3
|
+
|
|
4
|
+
This module provides a single, consistent implementation for loading
|
|
5
|
+
agent schemas from YAML files across the entire codebase (API, CLI, agent factory).
|
|
6
|
+
|
|
7
|
+
Design Pattern:
|
|
8
|
+
- Search standard locations: schemas/agents/, schemas/evaluators/, schemas/
|
|
9
|
+
- Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
|
|
10
|
+
- Support relative/absolute paths
|
|
11
|
+
- Consistent error messages and logging
|
|
12
|
+
i
|
|
13
|
+
Usage:
|
|
14
|
+
# From API
|
|
15
|
+
schema = load_agent_schema("rem")
|
|
16
|
+
|
|
17
|
+
# From CLI with custom path
|
|
18
|
+
schema = load_agent_schema("./my-agent.yaml")
|
|
19
|
+
|
|
20
|
+
# From agent factory
|
|
21
|
+
schema = load_agent_schema("contract-analyzer")
|
|
22
|
+
|
|
23
|
+
Schema Caching Status:
|
|
24
|
+
|
|
25
|
+
✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
|
|
26
|
+
- Schemas loaded from package resources cached indefinitely in _fs_schema_cache
|
|
27
|
+
- No TTL needed (immutable, versioned with code)
|
|
28
|
+
- Lazy-loaded on first access
|
|
29
|
+
- Custom paths not cached (may change during development)
|
|
30
|
+
|
|
31
|
+
TODO: Database Schema Caching (Future)
|
|
32
|
+
- Schemas loaded from schemas table (SchemaRepository)
|
|
33
|
+
- Will require TTL for cache invalidation (5-15 minutes)
|
|
34
|
+
- May change at runtime via admin updates
|
|
35
|
+
- Cache key: (schema_name, version) → (schema_dict, timestamp)
|
|
36
|
+
- Implementation ready in _db_schema_cache and _db_schema_ttl
|
|
37
|
+
|
|
38
|
+
Benefits Achieved:
|
|
39
|
+
- ✅ Eliminated disk I/O for repeated schema loads
|
|
40
|
+
- ✅ Faster agent creation (critical for API latency)
|
|
41
|
+
- 🔲 Database query reduction (pending DB schema implementation)
|
|
42
|
+
|
|
43
|
+
Future Enhancement (when database schemas are implemented):
|
|
44
|
+
import time
|
|
45
|
+
|
|
46
|
+
_db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
47
|
+
_db_schema_ttl: int = 300 # 5 minutes
|
|
48
|
+
|
|
49
|
+
async def load_agent_schema_from_db(name: str, version: str | None = None):
|
|
50
|
+
cache_key = (name, version or "latest")
|
|
51
|
+
if cache_key in _db_schema_cache:
|
|
52
|
+
schema, timestamp = _db_schema_cache[cache_key]
|
|
53
|
+
if time.time() - timestamp < _db_schema_ttl:
|
|
54
|
+
return schema
|
|
55
|
+
# Load from DB and cache with TTL
|
|
56
|
+
from rem.services.repositories import schema_repository
|
|
57
|
+
schema = await schema_repository.get_by_name(name, version)
|
|
58
|
+
_db_schema_cache[cache_key] = (schema, time.time())
|
|
59
|
+
return schema
|
|
60
|
+
|
|
61
|
+
Related:
|
|
62
|
+
- rem/src/rem/agentic/providers/pydantic_ai.py (create_agent factory)
|
|
63
|
+
- rem/src/rem/services/repositories/schema_repository.py (database schemas)
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
import importlib.resources
|
|
67
|
+
from pathlib import Path
|
|
68
|
+
from typing import Any, cast
|
|
69
|
+
|
|
70
|
+
import yaml
|
|
71
|
+
from loguru import logger
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Standard search paths for agent schemas (in priority order)
|
|
75
|
+
SCHEMA_SEARCH_PATHS = [
|
|
76
|
+
"schemas/agents/{name}.yaml", # Top-level agents (e.g., rem.yaml)
|
|
77
|
+
"schemas/agents/core/{name}.yaml", # Core system agents
|
|
78
|
+
"schemas/agents/examples/{name}.yaml", # Example agents
|
|
79
|
+
"schemas/evaluators/{name}.yaml",
|
|
80
|
+
"schemas/{name}.yaml",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# In-memory cache for filesystem schemas (no TTL - immutable)
|
|
84
|
+
_fs_schema_cache: dict[str, dict[str, Any]] = {}
|
|
85
|
+
|
|
86
|
+
# Future: Database schema cache (with TTL - mutable)
|
|
87
|
+
# Will be used when loading schemas from database (SchemaRepository)
|
|
88
|
+
# _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
89
|
+
# _db_schema_ttl: int = 300 # 5 minutes in seconds
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any] | None:
|
|
93
|
+
"""
|
|
94
|
+
Load schema from database using LOOKUP query.
|
|
95
|
+
|
|
96
|
+
This function is synchronous but calls async database operations.
|
|
97
|
+
It's designed to be called from load_agent_schema() which is sync.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
schema_name: Schema name to lookup
|
|
101
|
+
user_id: User ID for data scoping
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Schema spec (dict) if found, None otherwise
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
RuntimeError: If database connection fails
|
|
108
|
+
"""
|
|
109
|
+
import asyncio
|
|
110
|
+
|
|
111
|
+
# Check if we're already in an async context
|
|
112
|
+
try:
|
|
113
|
+
loop = asyncio.get_running_loop()
|
|
114
|
+
# We're in an async context - can't use asyncio.run()
|
|
115
|
+
# This shouldn't happen in normal usage since load_agent_schema is called from sync contexts
|
|
116
|
+
logger.warning(
|
|
117
|
+
"Database schema lookup called from async context. "
|
|
118
|
+
"This may cause issues. Consider using async version of load_agent_schema."
|
|
119
|
+
)
|
|
120
|
+
return None
|
|
121
|
+
except RuntimeError:
|
|
122
|
+
# Not in async context - safe to use asyncio.run()
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
async def _async_lookup():
|
|
126
|
+
"""Async helper to query database."""
|
|
127
|
+
from rem.services.postgres import get_postgres_service
|
|
128
|
+
from rem.models.entities import Schema
|
|
129
|
+
|
|
130
|
+
db = get_postgres_service()
|
|
131
|
+
if not db:
|
|
132
|
+
logger.debug("PostgreSQL service not available for schema lookup")
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
await db.connect()
|
|
137
|
+
|
|
138
|
+
# Use REM LOOKUP query to find schema
|
|
139
|
+
query = f"LOOKUP '{schema_name}' FROM schemas"
|
|
140
|
+
logger.debug(f"Executing: {query} (user_id={user_id})")
|
|
141
|
+
|
|
142
|
+
result = await db.execute_rem_query(
|
|
143
|
+
query=query,
|
|
144
|
+
user_id=user_id,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if result and isinstance(result, dict):
|
|
148
|
+
# LOOKUP returns single entity or None
|
|
149
|
+
# Extract spec field (JSON Schema)
|
|
150
|
+
spec = result.get("spec")
|
|
151
|
+
if spec and isinstance(spec, dict):
|
|
152
|
+
logger.debug(f"Found schema in database: {schema_name}")
|
|
153
|
+
return spec
|
|
154
|
+
|
|
155
|
+
logger.debug(f"Schema not found in database: {schema_name}")
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.debug(f"Database schema lookup error: {e}")
|
|
160
|
+
return None
|
|
161
|
+
finally:
|
|
162
|
+
await db.disconnect()
|
|
163
|
+
|
|
164
|
+
# Run async lookup in new event loop
|
|
165
|
+
return asyncio.run(_async_lookup())
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def load_agent_schema(
|
|
169
|
+
schema_name_or_path: str,
|
|
170
|
+
use_cache: bool = True,
|
|
171
|
+
user_id: str | None = None,
|
|
172
|
+
enable_db_fallback: bool = True,
|
|
173
|
+
) -> dict[str, Any]:
|
|
174
|
+
"""
|
|
175
|
+
Load agent schema from YAML file with unified search logic and caching.
|
|
176
|
+
|
|
177
|
+
Filesystem schemas are cached indefinitely (immutable, versioned with code).
|
|
178
|
+
Database schemas (future) will be cached with TTL for invalidation.
|
|
179
|
+
|
|
180
|
+
Handles path resolution automatically:
|
|
181
|
+
- "rem" → searches schemas/agents/rem.yaml (top-level)
|
|
182
|
+
- "moment-builder" → searches schemas/agents/core/moment-builder.yaml
|
|
183
|
+
- "contract-analyzer" → searches schemas/agents/examples/contract-analyzer.yaml
|
|
184
|
+
- "core/moment-builder" → searches schemas/agents/core/moment-builder.yaml
|
|
185
|
+
- "/absolute/path.yaml" → loads directly
|
|
186
|
+
- "relative/path.yaml" → loads relative to cwd
|
|
187
|
+
|
|
188
|
+
Search Order:
|
|
189
|
+
1. Check cache (if use_cache=True and schema found in FS cache)
|
|
190
|
+
2. Exact path if it exists (absolute or relative)
|
|
191
|
+
3. Package resources: schemas/agents/{name}.yaml (top-level)
|
|
192
|
+
4. Package resources: schemas/agents/core/{name}.yaml
|
|
193
|
+
5. Package resources: schemas/agents/examples/{name}.yaml
|
|
194
|
+
6. Package resources: schemas/evaluators/{name}.yaml
|
|
195
|
+
7. Package resources: schemas/{name}.yaml
|
|
196
|
+
8. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
schema_name_or_path: Schema name or file path
|
|
200
|
+
Examples: "rem-query-agent", "contract-analyzer", "./my-schema.yaml"
|
|
201
|
+
use_cache: If True, uses in-memory cache for filesystem schemas
|
|
202
|
+
user_id: User ID for database schema lookup (required for DB fallback)
|
|
203
|
+
enable_db_fallback: If True, falls back to database LOOKUP when file not found
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Agent schema as dictionary
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
FileNotFoundError: If schema not found in any search location (filesystem + database)
|
|
210
|
+
yaml.YAMLError: If schema file is invalid YAML
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
>>> # Load by short name (cached after first load)
|
|
214
|
+
>>> schema = load_agent_schema("contract-analyzer")
|
|
215
|
+
>>>
|
|
216
|
+
>>> # Load from custom path (not cached - custom paths may change)
|
|
217
|
+
>>> schema = load_agent_schema("./my-agent.yaml")
|
|
218
|
+
>>>
|
|
219
|
+
>>> # Load evaluator schema (cached)
|
|
220
|
+
>>> schema = load_agent_schema("rem-lookup-correctness")
|
|
221
|
+
>>>
|
|
222
|
+
>>> # Load custom user schema from database
|
|
223
|
+
>>> schema = load_agent_schema("my-custom-agent", user_id="user-123")
|
|
224
|
+
"""
|
|
225
|
+
# Normalize the name for cache key
|
|
226
|
+
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '')
|
|
227
|
+
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
228
|
+
cache_key = cache_key.rsplit('.', 1)[0]
|
|
229
|
+
|
|
230
|
+
# Check cache first (only for package resources, not custom paths)
|
|
231
|
+
path = Path(schema_name_or_path)
|
|
232
|
+
is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
233
|
+
|
|
234
|
+
if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
|
|
235
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
236
|
+
return _fs_schema_cache[cache_key]
|
|
237
|
+
|
|
238
|
+
# 1. Try exact path first (absolute or relative to cwd)
|
|
239
|
+
if path.exists():
|
|
240
|
+
logger.debug(f"Loading schema from exact path: {path}")
|
|
241
|
+
with open(path, "r") as f:
|
|
242
|
+
schema = yaml.safe_load(f)
|
|
243
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
244
|
+
# Don't cache custom paths (they may change)
|
|
245
|
+
return cast(dict[str, Any], schema)
|
|
246
|
+
|
|
247
|
+
# 2. Normalize name for package resource search
|
|
248
|
+
base_name = cache_key
|
|
249
|
+
|
|
250
|
+
# 3. Try package resources with standard search paths
|
|
251
|
+
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
252
|
+
search_path = search_pattern.format(name=base_name)
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
# Use importlib.resources to find schema in installed package
|
|
256
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
257
|
+
schema_path = Path(str(schema_ref))
|
|
258
|
+
|
|
259
|
+
if schema_path.exists():
|
|
260
|
+
logger.debug(f"Loading schema from package: {search_path}")
|
|
261
|
+
with open(schema_path, "r") as f:
|
|
262
|
+
schema = yaml.safe_load(f)
|
|
263
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
264
|
+
|
|
265
|
+
# Cache filesystem schemas (immutable, safe to cache indefinitely)
|
|
266
|
+
if use_cache:
|
|
267
|
+
_fs_schema_cache[cache_key] = schema
|
|
268
|
+
logger.debug(f"Cached schema: {cache_key}")
|
|
269
|
+
|
|
270
|
+
return cast(dict[str, Any], schema)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.debug(f"Could not load from {search_path}: {e}")
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
# 4. Try database LOOKUP fallback (if enabled and user_id provided)
|
|
276
|
+
if enable_db_fallback and user_id:
|
|
277
|
+
try:
|
|
278
|
+
logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
|
|
279
|
+
db_schema = _load_schema_from_database(base_name, user_id)
|
|
280
|
+
if db_schema:
|
|
281
|
+
logger.info(f"✅ Loaded schema from database: {base_name} (user_id={user_id})")
|
|
282
|
+
return db_schema
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.debug(f"Database schema lookup failed: {e}")
|
|
285
|
+
# Fall through to error below
|
|
286
|
+
|
|
287
|
+
# 5. Schema not found in any location
|
|
288
|
+
searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
|
|
289
|
+
db_search_note = ""
|
|
290
|
+
if enable_db_fallback:
|
|
291
|
+
if user_id:
|
|
292
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id='{user_id}' (no match)"
|
|
293
|
+
else:
|
|
294
|
+
db_search_note = "\n - Database: (skipped - no user_id provided)"
|
|
295
|
+
|
|
296
|
+
raise FileNotFoundError(
|
|
297
|
+
f"Schema not found: {schema_name_or_path}\n"
|
|
298
|
+
f"Searched locations:\n"
|
|
299
|
+
f" - Exact path: {path}\n"
|
|
300
|
+
f" - Package resources: {', '.join(searched_paths)}"
|
|
301
|
+
f"{db_search_note}"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
306
|
+
"""
|
|
307
|
+
Validate agent schema structure.
|
|
308
|
+
|
|
309
|
+
Basic validation checks:
|
|
310
|
+
- Has 'type' field (should be 'object')
|
|
311
|
+
- Has 'description' field (system prompt)
|
|
312
|
+
- Has 'properties' field (output schema)
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
schema: Agent schema dict
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
True if valid
|
|
319
|
+
|
|
320
|
+
Raises:
|
|
321
|
+
ValueError: If schema is invalid
|
|
322
|
+
"""
|
|
323
|
+
if not isinstance(schema, dict):
|
|
324
|
+
raise ValueError(f"Schema must be a dict, got {type(schema)}")
|
|
325
|
+
|
|
326
|
+
if schema.get('type') != 'object':
|
|
327
|
+
raise ValueError(f"Schema type must be 'object', got {schema.get('type')}")
|
|
328
|
+
|
|
329
|
+
if 'description' not in schema:
|
|
330
|
+
raise ValueError("Schema must have 'description' field (system prompt)")
|
|
331
|
+
|
|
332
|
+
if 'properties' not in schema:
|
|
333
|
+
logger.warning("Schema missing 'properties' field - agent will have no structured output")
|
|
334
|
+
|
|
335
|
+
logger.debug("Schema validation passed")
|
|
336
|
+
return True
|