remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,649 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized schema loading utility for agent schemas.
|
|
3
|
+
|
|
4
|
+
This module provides a single, consistent implementation for loading
|
|
5
|
+
agent schemas from YAML files across the entire codebase (API, CLI, agent factory).
|
|
6
|
+
|
|
7
|
+
Design Pattern:
|
|
8
|
+
- Search standard locations: schemas/agents/, schemas/evaluators/, schemas/
|
|
9
|
+
- Support short names: "contract-analyzer" → "schemas/agents/contract-analyzer.yaml"
|
|
10
|
+
- Support relative/absolute paths
|
|
11
|
+
- Consistent error messages and logging
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
# From API
|
|
15
|
+
schema = load_agent_schema("rem")
|
|
16
|
+
|
|
17
|
+
# From CLI with custom path
|
|
18
|
+
schema = load_agent_schema("./my-agent.yaml")
|
|
19
|
+
|
|
20
|
+
# From agent factory
|
|
21
|
+
schema = load_agent_schema("contract-analyzer")
|
|
22
|
+
|
|
23
|
+
TODO: Git FS Integration
|
|
24
|
+
The schema loader currently uses importlib.resources for package schemas
|
|
25
|
+
and direct filesystem access for custom paths. The FS abstraction layer
|
|
26
|
+
(rem.services.fs.FS) could be used to abstract storage backends:
|
|
27
|
+
|
|
28
|
+
- Local filesystem (current)
|
|
29
|
+
- Git repositories (GitService)
|
|
30
|
+
- S3 (via FS provider)
|
|
31
|
+
|
|
32
|
+
This would enable loading schemas from versioned Git repos or S3 buckets
|
|
33
|
+
without changing the API. The FS provider pattern already exists and just
|
|
34
|
+
needs integration testing with the schema loader.
|
|
35
|
+
|
|
36
|
+
Example future usage:
|
|
37
|
+
# Load from Git at specific version
|
|
38
|
+
schema = load_agent_schema("git://rem/schemas/agents/rem.yaml?ref=v1.0.0")
|
|
39
|
+
|
|
40
|
+
# Load from S3
|
|
41
|
+
schema = load_agent_schema("s3://rem-schemas/agents/cv-parser.yaml")
|
|
42
|
+
|
|
43
|
+
Schema Caching Status:
|
|
44
|
+
|
|
45
|
+
✅ IMPLEMENTED: Filesystem Schema Caching (2025-11-22)
|
|
46
|
+
- Schemas loaded from package resources cached indefinitely in _fs_schema_cache
|
|
47
|
+
- No TTL needed (immutable, versioned with code)
|
|
48
|
+
- Lazy-loaded on first access
|
|
49
|
+
- Custom paths not cached (may change during development)
|
|
50
|
+
|
|
51
|
+
TODO: Database Schema Caching (Future)
|
|
52
|
+
- Schemas loaded from schemas table (SchemaRepository)
|
|
53
|
+
- Will require TTL for cache invalidation (5-15 minutes)
|
|
54
|
+
- May change at runtime via admin updates
|
|
55
|
+
- Cache key: (schema_name, version) → (schema_dict, timestamp)
|
|
56
|
+
- Implementation ready in _db_schema_cache and _db_schema_ttl
|
|
57
|
+
|
|
58
|
+
Benefits Achieved:
|
|
59
|
+
- ✅ Eliminated disk I/O for repeated schema loads
|
|
60
|
+
- ✅ Faster agent creation (critical for API latency)
|
|
61
|
+
- 🔲 Database query reduction (pending DB schema implementation)
|
|
62
|
+
|
|
63
|
+
Future Enhancement (when database schemas are implemented):
|
|
64
|
+
import time
|
|
65
|
+
|
|
66
|
+
_db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
67
|
+
_db_schema_ttl: int = 300 # 5 minutes
|
|
68
|
+
|
|
69
|
+
async def load_agent_schema_from_db(name: str, version: str | None = None):
|
|
70
|
+
cache_key = (name, version or "latest")
|
|
71
|
+
if cache_key in _db_schema_cache:
|
|
72
|
+
schema, timestamp = _db_schema_cache[cache_key]
|
|
73
|
+
if time.time() - timestamp < _db_schema_ttl:
|
|
74
|
+
return schema
|
|
75
|
+
# Load from DB and cache with TTL
|
|
76
|
+
from rem.services.repositories import schema_repository
|
|
77
|
+
schema = await schema_repository.get_by_name(name, version)
|
|
78
|
+
_db_schema_cache[cache_key] = (schema, time.time())
|
|
79
|
+
return schema
|
|
80
|
+
|
|
81
|
+
Related:
|
|
82
|
+
- rem/src/rem/agentic/providers/pydantic_ai.py (create_agent factory)
|
|
83
|
+
- rem/src/rem/services/repositories/schema_repository.py (database schemas)
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
import importlib.resources
|
|
87
|
+
from pathlib import Path
|
|
88
|
+
from typing import Any, cast
|
|
89
|
+
|
|
90
|
+
import yaml
|
|
91
|
+
from loguru import logger
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Standard search paths for agent/evaluator schemas (in priority order)
|
|
95
|
+
SCHEMA_SEARCH_PATHS = [
|
|
96
|
+
"schemas/agents/{name}.yaml", # Top-level agents (e.g., rem.yaml)
|
|
97
|
+
"schemas/agents/core/{name}.yaml", # Core system agents
|
|
98
|
+
"schemas/agents/examples/{name}.yaml", # Example agents
|
|
99
|
+
"schemas/evaluators/{name}.yaml", # Nested evaluators (e.g., hello-world/default)
|
|
100
|
+
"schemas/evaluators/rem/{name}.yaml", # REM evaluators (e.g., lookup-correctness)
|
|
101
|
+
"schemas/{name}.yaml", # Generic schemas
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
# In-memory cache for filesystem schemas (no TTL - immutable)
|
|
105
|
+
_fs_schema_cache: dict[str, dict[str, Any]] = {}
|
|
106
|
+
|
|
107
|
+
# Future: Database schema cache (with TTL - mutable)
|
|
108
|
+
# Will be used when loading schemas from database (SchemaRepository)
|
|
109
|
+
# _db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
110
|
+
# _db_schema_ttl: int = 300 # 5 minutes in seconds
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any] | None:
|
|
114
|
+
"""
|
|
115
|
+
Load schema from database using LOOKUP query.
|
|
116
|
+
|
|
117
|
+
This function is synchronous but calls async database operations.
|
|
118
|
+
It's designed to be called from load_agent_schema() which is sync.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
schema_name: Schema name to lookup
|
|
122
|
+
user_id: User ID for data scoping
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Schema spec (dict) if found, None otherwise
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
RuntimeError: If database connection fails
|
|
129
|
+
"""
|
|
130
|
+
import asyncio
|
|
131
|
+
|
|
132
|
+
# Check if we're already in an async context
|
|
133
|
+
try:
|
|
134
|
+
loop = asyncio.get_running_loop()
|
|
135
|
+
# We're in an async context - use thread executor to run async code
|
|
136
|
+
import concurrent.futures
|
|
137
|
+
|
|
138
|
+
async def _async_lookup():
|
|
139
|
+
"""Async helper to query database."""
|
|
140
|
+
from rem.services.postgres import get_postgres_service
|
|
141
|
+
|
|
142
|
+
db = get_postgres_service()
|
|
143
|
+
if not db:
|
|
144
|
+
logger.debug("PostgreSQL service not available for schema lookup")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
await db.connect()
|
|
149
|
+
|
|
150
|
+
# Query for public schemas (user_id IS NULL) and optionally user-specific
|
|
151
|
+
if user_id:
|
|
152
|
+
query = """
|
|
153
|
+
SELECT spec FROM schemas
|
|
154
|
+
WHERE LOWER(name) = LOWER($1)
|
|
155
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
156
|
+
LIMIT 1
|
|
157
|
+
"""
|
|
158
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
159
|
+
else:
|
|
160
|
+
# No user_id - only search public schemas
|
|
161
|
+
query = """
|
|
162
|
+
SELECT spec FROM schemas
|
|
163
|
+
WHERE LOWER(name) = LOWER($1)
|
|
164
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
165
|
+
LIMIT 1
|
|
166
|
+
"""
|
|
167
|
+
row = await db.fetchrow(query, schema_name)
|
|
168
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
|
|
169
|
+
|
|
170
|
+
if row:
|
|
171
|
+
spec = row.get("spec")
|
|
172
|
+
if spec and isinstance(spec, dict):
|
|
173
|
+
logger.debug(f"Found schema in database: {schema_name}")
|
|
174
|
+
return spec
|
|
175
|
+
|
|
176
|
+
logger.debug(f"Schema not found in database: {schema_name}")
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.debug(f"Database schema lookup error: {e}")
|
|
181
|
+
return None
|
|
182
|
+
finally:
|
|
183
|
+
await db.disconnect()
|
|
184
|
+
|
|
185
|
+
# Run in thread pool to avoid blocking the event loop
|
|
186
|
+
with concurrent.futures.ThreadPoolExecutor() as pool:
|
|
187
|
+
future = pool.submit(asyncio.run, _async_lookup())
|
|
188
|
+
return future.result(timeout=10)
|
|
189
|
+
|
|
190
|
+
except RuntimeError:
|
|
191
|
+
# Not in async context - safe to use asyncio.run()
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
async def _async_lookup():
|
|
195
|
+
"""Async helper to query database."""
|
|
196
|
+
from rem.services.postgres import get_postgres_service
|
|
197
|
+
|
|
198
|
+
db = get_postgres_service()
|
|
199
|
+
if not db:
|
|
200
|
+
logger.debug("PostgreSQL service not available for schema lookup")
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
await db.connect()
|
|
205
|
+
|
|
206
|
+
# Query for public schemas (user_id IS NULL) and optionally user-specific
|
|
207
|
+
if user_id:
|
|
208
|
+
query = """
|
|
209
|
+
SELECT spec FROM schemas
|
|
210
|
+
WHERE LOWER(name) = LOWER($1)
|
|
211
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
212
|
+
LIMIT 1
|
|
213
|
+
"""
|
|
214
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
215
|
+
else:
|
|
216
|
+
# No user_id - only search public schemas
|
|
217
|
+
query = """
|
|
218
|
+
SELECT spec FROM schemas
|
|
219
|
+
WHERE LOWER(name) = LOWER($1)
|
|
220
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
221
|
+
LIMIT 1
|
|
222
|
+
"""
|
|
223
|
+
row = await db.fetchrow(query, schema_name)
|
|
224
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
|
|
225
|
+
|
|
226
|
+
if row:
|
|
227
|
+
spec = row.get("spec")
|
|
228
|
+
if spec and isinstance(spec, dict):
|
|
229
|
+
logger.debug(f"Found schema in database: {schema_name}")
|
|
230
|
+
return spec
|
|
231
|
+
|
|
232
|
+
logger.debug(f"Schema not found in database: {schema_name}")
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.debug(f"Database schema lookup error: {e}")
|
|
237
|
+
return None
|
|
238
|
+
finally:
|
|
239
|
+
await db.disconnect()
|
|
240
|
+
|
|
241
|
+
# Run async lookup in new event loop
|
|
242
|
+
return asyncio.run(_async_lookup())
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def load_agent_schema(
|
|
246
|
+
schema_name_or_path: str,
|
|
247
|
+
use_cache: bool = True,
|
|
248
|
+
user_id: str | None = None,
|
|
249
|
+
enable_db_fallback: bool = True,
|
|
250
|
+
) -> dict[str, Any]:
|
|
251
|
+
"""
|
|
252
|
+
Load agent schema with database-first priority for hot-reloading support.
|
|
253
|
+
|
|
254
|
+
Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
|
|
255
|
+
|
|
256
|
+
**IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
|
|
257
|
+
of schema updates without redeploying the application. This allows operators to
|
|
258
|
+
update schemas via `rem process ingest` and have changes take effect immediately.
|
|
259
|
+
|
|
260
|
+
Handles path resolution automatically:
|
|
261
|
+
- "rem" → searches database, then schemas/agents/rem.yaml
|
|
262
|
+
- "moment-builder" → searches database, then schemas/agents/core/moment-builder.yaml
|
|
263
|
+
- "/absolute/path.yaml" → loads directly from filesystem (exact paths skip database)
|
|
264
|
+
- "relative/path.yaml" → loads relative to cwd (exact paths skip database)
|
|
265
|
+
|
|
266
|
+
Search Order:
|
|
267
|
+
1. Exact path if it exists (absolute or relative) - skips database
|
|
268
|
+
2. Database LOOKUP: schemas table (if enable_db_fallback=True) - PREFERRED for hot-reload
|
|
269
|
+
3. Check cache (if use_cache=True and schema found in FS cache)
|
|
270
|
+
4. Custom paths from rem.register_schema_path() and SCHEMA__PATHS env var
|
|
271
|
+
5. Package resources: schemas/agents/{name}.yaml (top-level)
|
|
272
|
+
6. Package resources: schemas/agents/core/{name}.yaml
|
|
273
|
+
7. Package resources: schemas/agents/examples/{name}.yaml
|
|
274
|
+
8. Package resources: schemas/evaluators/{name}.yaml
|
|
275
|
+
9. Package resources: schemas/{name}.yaml
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
schema_name_or_path: Schema name or file path (case-invariant for names)
|
|
279
|
+
Examples: "rem-query-agent", "Contract-Analyzer", "./my-schema.yaml"
|
|
280
|
+
use_cache: If True, uses in-memory cache for filesystem schemas
|
|
281
|
+
user_id: User ID for database schema lookup
|
|
282
|
+
enable_db_fallback: If True, checks database FIRST for schema (default: True)
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Agent schema as dictionary
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
FileNotFoundError: If schema not found in any search location (database + filesystem)
|
|
289
|
+
yaml.YAMLError: If schema file is invalid YAML
|
|
290
|
+
|
|
291
|
+
Examples:
|
|
292
|
+
>>> # Load by short name - checks database first for hot-reload support
|
|
293
|
+
>>> schema = load_agent_schema("Contract-Analyzer") # case invariant
|
|
294
|
+
>>>
|
|
295
|
+
>>> # Load from custom path (skips database - exact paths always use filesystem)
|
|
296
|
+
>>> schema = load_agent_schema("./my-agent.yaml")
|
|
297
|
+
>>>
|
|
298
|
+
>>> # Load evaluator schema
|
|
299
|
+
>>> schema = load_agent_schema("rem-lookup-correctness")
|
|
300
|
+
"""
|
|
301
|
+
# Normalize the name for cache key (lowercase for case-invariant lookups)
|
|
302
|
+
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
|
|
303
|
+
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
304
|
+
cache_key = cache_key.rsplit('.', 1)[0]
|
|
305
|
+
|
|
306
|
+
path = Path(schema_name_or_path)
|
|
307
|
+
is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
308
|
+
|
|
309
|
+
# 1. Try exact path first (absolute or relative to cwd) - must be a file, not directory
|
|
310
|
+
# Exact paths skip database lookup (explicit file reference)
|
|
311
|
+
if path.exists() and path.is_file():
|
|
312
|
+
logger.debug(f"Loading schema from exact path: {path}")
|
|
313
|
+
with open(path, "r") as f:
|
|
314
|
+
schema = yaml.safe_load(f)
|
|
315
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
316
|
+
# Don't cache custom paths (they may change)
|
|
317
|
+
return cast(dict[str, Any], schema)
|
|
318
|
+
|
|
319
|
+
# 2. Normalize name for lookups (lowercase)
|
|
320
|
+
base_name = cache_key
|
|
321
|
+
|
|
322
|
+
# 3. Try database FIRST (if enabled) - enables hot-reload without redeploy
|
|
323
|
+
# Database schemas are NOT cached to ensure hot-reload works immediately
|
|
324
|
+
if enable_db_fallback and not is_custom_path:
|
|
325
|
+
try:
|
|
326
|
+
logger.debug(f"Checking database for schema: {base_name} (user_id={user_id or 'public'})")
|
|
327
|
+
db_schema = _load_schema_from_database(base_name, user_id)
|
|
328
|
+
if db_schema:
|
|
329
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
330
|
+
return db_schema
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.debug(f"Database schema lookup failed: {e}")
|
|
333
|
+
# Fall through to filesystem search
|
|
334
|
+
|
|
335
|
+
# 4. Check filesystem cache (only for package resources, not custom paths)
|
|
336
|
+
if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
|
|
337
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
338
|
+
return _fs_schema_cache[cache_key]
|
|
339
|
+
|
|
340
|
+
# 5. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
341
|
+
from ..registry import get_schema_paths
|
|
342
|
+
|
|
343
|
+
custom_paths = get_schema_paths()
|
|
344
|
+
|
|
345
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
346
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
347
|
+
for auto_folder in auto_detect_folders:
|
|
348
|
+
auto_path = Path(auto_folder)
|
|
349
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
350
|
+
resolved = str(auto_path.resolve())
|
|
351
|
+
if resolved not in custom_paths:
|
|
352
|
+
custom_paths.insert(0, resolved)
|
|
353
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
354
|
+
for custom_dir in custom_paths:
|
|
355
|
+
# Try various patterns within each custom directory
|
|
356
|
+
for pattern in [
|
|
357
|
+
f"{base_name}.yaml",
|
|
358
|
+
f"{base_name}.yml",
|
|
359
|
+
f"agents/{base_name}.yaml",
|
|
360
|
+
f"evaluators/{base_name}.yaml",
|
|
361
|
+
]:
|
|
362
|
+
custom_path = Path(custom_dir) / pattern
|
|
363
|
+
if custom_path.exists():
|
|
364
|
+
logger.debug(f"Loading schema from custom path: {custom_path}")
|
|
365
|
+
with open(custom_path, "r") as f:
|
|
366
|
+
schema = yaml.safe_load(f)
|
|
367
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
368
|
+
# Don't cache custom paths (they may change during development)
|
|
369
|
+
return cast(dict[str, Any], schema)
|
|
370
|
+
|
|
371
|
+
# 6. Try package resources with standard search paths
|
|
372
|
+
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
373
|
+
search_path = search_pattern.format(name=base_name)
|
|
374
|
+
|
|
375
|
+
try:
|
|
376
|
+
# Use importlib.resources to find schema in installed package
|
|
377
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
378
|
+
schema_path = Path(str(schema_ref))
|
|
379
|
+
|
|
380
|
+
if schema_path.exists():
|
|
381
|
+
logger.debug(f"Loading schema from package: {search_path}")
|
|
382
|
+
with open(schema_path, "r") as f:
|
|
383
|
+
schema = yaml.safe_load(f)
|
|
384
|
+
logger.debug(f"Loaded schema with keys: {list(schema.keys())}")
|
|
385
|
+
|
|
386
|
+
# Cache filesystem schemas (immutable, safe to cache indefinitely)
|
|
387
|
+
if use_cache:
|
|
388
|
+
_fs_schema_cache[cache_key] = schema
|
|
389
|
+
logger.debug(f"Cached schema: {cache_key}")
|
|
390
|
+
|
|
391
|
+
return cast(dict[str, Any], schema)
|
|
392
|
+
except Exception as e:
|
|
393
|
+
logger.debug(f"Could not load from {search_path}: {e}")
|
|
394
|
+
continue
|
|
395
|
+
|
|
396
|
+
# 7. Schema not found in any location
|
|
397
|
+
searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
|
|
398
|
+
|
|
399
|
+
custom_paths_note = ""
|
|
400
|
+
if custom_paths:
|
|
401
|
+
custom_paths_note = f"\n - Custom paths: {', '.join(custom_paths)}"
|
|
402
|
+
|
|
403
|
+
db_search_note = ""
|
|
404
|
+
if enable_db_fallback:
|
|
405
|
+
if user_id:
|
|
406
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('{user_id}', 'system', NULL) (no match)"
|
|
407
|
+
else:
|
|
408
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('system', NULL) (no match)"
|
|
409
|
+
|
|
410
|
+
raise FileNotFoundError(
|
|
411
|
+
f"Schema not found: {schema_name_or_path}\n"
|
|
412
|
+
f"Searched locations:\n"
|
|
413
|
+
f" - Exact path: {path}"
|
|
414
|
+
f"{custom_paths_note}\n"
|
|
415
|
+
f" - Package resources: {', '.join(searched_paths)}"
|
|
416
|
+
f"{db_search_note}"
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
async def load_agent_schema_async(
|
|
421
|
+
schema_name_or_path: str,
|
|
422
|
+
user_id: str | None = None,
|
|
423
|
+
db=None,
|
|
424
|
+
enable_db_fallback: bool = True,
|
|
425
|
+
) -> dict[str, Any]:
|
|
426
|
+
"""
|
|
427
|
+
Async version of load_agent_schema with database-first priority.
|
|
428
|
+
|
|
429
|
+
Schema names are case-invariant - "MyAgent", "myagent", "MYAGENT" all resolve to the same schema.
|
|
430
|
+
|
|
431
|
+
**IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
|
|
432
|
+
of schema updates without redeploying the application.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
schema_name_or_path: Schema name or file path (case-invariant for names)
|
|
436
|
+
user_id: User ID for database schema lookup
|
|
437
|
+
db: Optional existing PostgresService connection (if None, will create one)
|
|
438
|
+
enable_db_fallback: If True, checks database FIRST for schema (default: True)
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
Agent schema as dictionary
|
|
442
|
+
|
|
443
|
+
Raises:
|
|
444
|
+
FileNotFoundError: If schema not found
|
|
445
|
+
"""
|
|
446
|
+
path = Path(schema_name_or_path)
|
|
447
|
+
|
|
448
|
+
# Normalize the name for cache key (lowercase for case-invariant lookups)
|
|
449
|
+
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
|
|
450
|
+
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
451
|
+
cache_key = cache_key.rsplit('.', 1)[0]
|
|
452
|
+
|
|
453
|
+
is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
454
|
+
|
|
455
|
+
# 1. Try exact path first (skips database - explicit file reference)
|
|
456
|
+
if path.exists() and path.is_file():
|
|
457
|
+
logger.debug(f"Loading schema from exact path: {path}")
|
|
458
|
+
with open(path, "r") as f:
|
|
459
|
+
schema = yaml.safe_load(f)
|
|
460
|
+
return cast(dict[str, Any], schema)
|
|
461
|
+
|
|
462
|
+
base_name = cache_key
|
|
463
|
+
|
|
464
|
+
# 2. Try database FIRST (if enabled) - enables hot-reload without redeploy
|
|
465
|
+
if enable_db_fallback and not is_custom_path:
|
|
466
|
+
from rem.services.postgres import get_postgres_service
|
|
467
|
+
|
|
468
|
+
should_disconnect = False
|
|
469
|
+
if db is None:
|
|
470
|
+
db = get_postgres_service()
|
|
471
|
+
if db:
|
|
472
|
+
await db.connect()
|
|
473
|
+
should_disconnect = True
|
|
474
|
+
|
|
475
|
+
if db:
|
|
476
|
+
try:
|
|
477
|
+
if user_id:
|
|
478
|
+
query = """
|
|
479
|
+
SELECT spec FROM schemas
|
|
480
|
+
WHERE LOWER(name) = LOWER($1)
|
|
481
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
482
|
+
LIMIT 1
|
|
483
|
+
"""
|
|
484
|
+
row = await db.fetchrow(query, base_name, user_id)
|
|
485
|
+
else:
|
|
486
|
+
# No user_id - only search public schemas
|
|
487
|
+
query = """
|
|
488
|
+
SELECT spec FROM schemas
|
|
489
|
+
WHERE LOWER(name) = LOWER($1)
|
|
490
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
491
|
+
LIMIT 1
|
|
492
|
+
"""
|
|
493
|
+
row = await db.fetchrow(query, base_name)
|
|
494
|
+
if row:
|
|
495
|
+
spec = row.get("spec")
|
|
496
|
+
if spec and isinstance(spec, dict):
|
|
497
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
498
|
+
return spec
|
|
499
|
+
finally:
|
|
500
|
+
if should_disconnect:
|
|
501
|
+
await db.disconnect()
|
|
502
|
+
|
|
503
|
+
# 3. Check filesystem cache
|
|
504
|
+
if not is_custom_path and cache_key in _fs_schema_cache:
|
|
505
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
506
|
+
return _fs_schema_cache[cache_key]
|
|
507
|
+
|
|
508
|
+
# 4. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
509
|
+
from ..registry import get_schema_paths
|
|
510
|
+
custom_paths = get_schema_paths()
|
|
511
|
+
|
|
512
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
513
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
514
|
+
for auto_folder in auto_detect_folders:
|
|
515
|
+
auto_path = Path(auto_folder)
|
|
516
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
517
|
+
resolved = str(auto_path.resolve())
|
|
518
|
+
if resolved not in custom_paths:
|
|
519
|
+
custom_paths.insert(0, resolved)
|
|
520
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
521
|
+
|
|
522
|
+
for custom_dir in custom_paths:
|
|
523
|
+
for pattern in [f"{base_name}.yaml", f"{base_name}.yml", f"agents/{base_name}.yaml"]:
|
|
524
|
+
custom_path = Path(custom_dir) / pattern
|
|
525
|
+
if custom_path.exists():
|
|
526
|
+
with open(custom_path, "r") as f:
|
|
527
|
+
schema = yaml.safe_load(f)
|
|
528
|
+
return cast(dict[str, Any], schema)
|
|
529
|
+
|
|
530
|
+
# 5. Try package resources
|
|
531
|
+
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
532
|
+
search_path = search_pattern.format(name=base_name)
|
|
533
|
+
try:
|
|
534
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
535
|
+
schema_path = Path(str(schema_ref))
|
|
536
|
+
if schema_path.exists():
|
|
537
|
+
with open(schema_path, "r") as f:
|
|
538
|
+
schema = yaml.safe_load(f)
|
|
539
|
+
_fs_schema_cache[cache_key] = schema
|
|
540
|
+
return cast(dict[str, Any], schema)
|
|
541
|
+
except Exception:
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
# Not found
|
|
545
|
+
raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
549
|
+
"""
|
|
550
|
+
Validate agent schema structure.
|
|
551
|
+
|
|
552
|
+
Basic validation checks:
|
|
553
|
+
- Has 'type' field (should be 'object')
|
|
554
|
+
- Has 'description' field (system prompt)
|
|
555
|
+
- Has 'properties' field (output schema)
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
schema: Agent schema dict
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
True if valid
|
|
562
|
+
|
|
563
|
+
Raises:
|
|
564
|
+
ValueError: If schema is invalid
|
|
565
|
+
"""
|
|
566
|
+
if not isinstance(schema, dict):
|
|
567
|
+
raise ValueError(f"Schema must be a dict, got {type(schema)}")
|
|
568
|
+
|
|
569
|
+
if schema.get('type') != 'object':
|
|
570
|
+
raise ValueError(f"Schema type must be 'object', got {schema.get('type')}")
|
|
571
|
+
|
|
572
|
+
if 'description' not in schema:
|
|
573
|
+
raise ValueError("Schema must have 'description' field (system prompt)")
|
|
574
|
+
|
|
575
|
+
if 'properties' not in schema:
|
|
576
|
+
logger.warning("Schema missing 'properties' field - agent will have no structured output")
|
|
577
|
+
|
|
578
|
+
logger.debug("Schema validation passed")
|
|
579
|
+
return True
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def get_evaluator_schema_path(evaluator_name: str) -> Path | None:
|
|
583
|
+
"""
|
|
584
|
+
Find the file path to an evaluator schema.
|
|
585
|
+
|
|
586
|
+
Searches standard locations for the evaluator schema YAML file:
|
|
587
|
+
- ./evaluators/{name}.yaml (local project)
|
|
588
|
+
- Custom schema paths from registry
|
|
589
|
+
- Package resources: schemas/evaluators/{name}.yaml
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
evaluator_name: Name of the evaluator (e.g., "mental-health-classifier")
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
Path to the evaluator schema file, or None if not found
|
|
596
|
+
|
|
597
|
+
Example:
|
|
598
|
+
>>> path = get_evaluator_schema_path("mental-health-classifier")
|
|
599
|
+
>>> if path:
|
|
600
|
+
... print(f"Found evaluator at: {path}")
|
|
601
|
+
"""
|
|
602
|
+
from ..registry import get_schema_paths
|
|
603
|
+
|
|
604
|
+
base_name = evaluator_name.lower().replace('.yaml', '').replace('.yml', '')
|
|
605
|
+
|
|
606
|
+
# 1. Try custom schema paths (from registry + auto-detected)
|
|
607
|
+
custom_paths = get_schema_paths()
|
|
608
|
+
|
|
609
|
+
# Auto-detect local folders
|
|
610
|
+
auto_detect_folders = ["./evaluators", "./schemas", "./agents"]
|
|
611
|
+
for auto_folder in auto_detect_folders:
|
|
612
|
+
auto_path = Path(auto_folder)
|
|
613
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
614
|
+
resolved = str(auto_path.resolve())
|
|
615
|
+
if resolved not in custom_paths:
|
|
616
|
+
custom_paths.insert(0, resolved)
|
|
617
|
+
|
|
618
|
+
for custom_dir in custom_paths:
|
|
619
|
+
# Try various patterns within each custom directory
|
|
620
|
+
for pattern in [
|
|
621
|
+
f"{base_name}.yaml",
|
|
622
|
+
f"{base_name}.yml",
|
|
623
|
+
f"evaluators/{base_name}.yaml",
|
|
624
|
+
]:
|
|
625
|
+
custom_path = Path(custom_dir) / pattern
|
|
626
|
+
if custom_path.exists():
|
|
627
|
+
logger.debug(f"Found evaluator schema: {custom_path}")
|
|
628
|
+
return custom_path
|
|
629
|
+
|
|
630
|
+
# 2. Try package resources
|
|
631
|
+
evaluator_search_paths = [
|
|
632
|
+
f"schemas/evaluators/{base_name}.yaml",
|
|
633
|
+
f"schemas/evaluators/rem/{base_name}.yaml",
|
|
634
|
+
]
|
|
635
|
+
|
|
636
|
+
for search_path in evaluator_search_paths:
|
|
637
|
+
try:
|
|
638
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
639
|
+
schema_path = Path(str(schema_ref))
|
|
640
|
+
|
|
641
|
+
if schema_path.exists():
|
|
642
|
+
logger.debug(f"Found evaluator schema in package: {schema_path}")
|
|
643
|
+
return schema_path
|
|
644
|
+
except Exception as e:
|
|
645
|
+
logger.debug(f"Could not check {search_path}: {e}")
|
|
646
|
+
continue
|
|
647
|
+
|
|
648
|
+
logger.warning(f"Evaluator schema not found: {evaluator_name}")
|
|
649
|
+
return None
|