remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +112 -17
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +302 -109
- rem/agentic/providers/pydantic_ai.py +215 -26
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +70 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +418 -27
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +27 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/registry.py +10 -4
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/service.py +92 -20
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/settings.py +324 -23
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
rem/utils/model_helpers.py
CHANGED
|
@@ -16,8 +16,12 @@ Embedding Field Detection:
|
|
|
16
16
|
Table Name Inference:
|
|
17
17
|
1. model_config.json_schema_extra.table_name
|
|
18
18
|
2. CamelCase → snake_case + pluralization
|
|
19
|
+
|
|
20
|
+
Model Resolution:
|
|
21
|
+
- model_from_arbitrary_casing: Resolve model class from flexible input casing
|
|
19
22
|
"""
|
|
20
23
|
|
|
24
|
+
import re
|
|
21
25
|
from typing import Any, Type
|
|
22
26
|
|
|
23
27
|
from loguru import logger
|
|
@@ -94,7 +98,9 @@ def get_table_name(model: Type[BaseModel]) -> str:
|
|
|
94
98
|
if isinstance(model_config, dict):
|
|
95
99
|
json_extra = model_config.get("json_schema_extra", {})
|
|
96
100
|
if isinstance(json_extra, dict) and "table_name" in json_extra:
|
|
97
|
-
|
|
101
|
+
table_name = json_extra["table_name"]
|
|
102
|
+
if isinstance(table_name, str):
|
|
103
|
+
return table_name
|
|
98
104
|
|
|
99
105
|
# Infer from class name
|
|
100
106
|
name = model.__name__
|
|
@@ -234,3 +240,152 @@ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
|
|
|
234
240
|
"entity_key_field": get_entity_key_field(model),
|
|
235
241
|
"embeddable_fields": get_embeddable_fields(model),
|
|
236
242
|
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def normalize_to_title_case(name: str) -> str:
|
|
246
|
+
"""
|
|
247
|
+
Normalize arbitrary casing to TitleCase (PascalCase).
|
|
248
|
+
|
|
249
|
+
Handles various input formats:
|
|
250
|
+
- kebab-case: domain-resource → DomainResource
|
|
251
|
+
- snake_case: domain_resource → DomainResource
|
|
252
|
+
- lowercase: domainresource → Domainresource (single word)
|
|
253
|
+
- TitleCase: DomainResource → DomainResource (passthrough)
|
|
254
|
+
- Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
name: Input name in any casing format
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
TitleCase (PascalCase) version of the name
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
>>> normalize_to_title_case("domain-resource")
|
|
264
|
+
'DomainResource'
|
|
265
|
+
>>> normalize_to_title_case("domain_resources")
|
|
266
|
+
'DomainResources'
|
|
267
|
+
>>> normalize_to_title_case("DomainResource")
|
|
268
|
+
'DomainResource'
|
|
269
|
+
"""
|
|
270
|
+
# If already TitleCase (starts with uppercase, has no delimiters, and has
|
|
271
|
+
# at least one lowercase letter), return as-is
|
|
272
|
+
if (
|
|
273
|
+
name
|
|
274
|
+
and name[0].isupper()
|
|
275
|
+
and '-' not in name
|
|
276
|
+
and '_' not in name
|
|
277
|
+
and any(c.islower() for c in name)
|
|
278
|
+
):
|
|
279
|
+
return name
|
|
280
|
+
|
|
281
|
+
# Split on common delimiters (hyphen, underscore)
|
|
282
|
+
parts = re.split(r'[-_]', name)
|
|
283
|
+
|
|
284
|
+
# Capitalize first letter of each part, lowercase the rest
|
|
285
|
+
normalized_parts = [part.capitalize() for part in parts if part]
|
|
286
|
+
|
|
287
|
+
return "".join(normalized_parts)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def model_from_arbitrary_casing(
|
|
291
|
+
name: str,
|
|
292
|
+
registry: dict[str, Type[BaseModel]] | None = None,
|
|
293
|
+
) -> Type[BaseModel]:
|
|
294
|
+
"""
|
|
295
|
+
Resolve a model class from arbitrary casing input.
|
|
296
|
+
|
|
297
|
+
REM entity models use strict TitleCase (PascalCase) naming. This function
|
|
298
|
+
allows flexible input formats while maintaining consistency:
|
|
299
|
+
|
|
300
|
+
Input formats supported:
|
|
301
|
+
- kebab-case: domain-resource, domain-resources
|
|
302
|
+
- snake_case: domain_resource, domain_resources
|
|
303
|
+
- lowercase: resource, domainresource
|
|
304
|
+
- TitleCase: Resource, DomainResource
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
name: Model name in any supported casing format
|
|
308
|
+
registry: Optional dict mapping TitleCase names to model classes.
|
|
309
|
+
If not provided, uses rem.models.entities module.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
The resolved Pydantic model class
|
|
313
|
+
|
|
314
|
+
Raises:
|
|
315
|
+
ValueError: If no model matches the normalized name
|
|
316
|
+
|
|
317
|
+
Example:
|
|
318
|
+
>>> model = model_from_arbitrary_casing("domain-resources")
|
|
319
|
+
>>> model.__name__
|
|
320
|
+
'DomainResource'
|
|
321
|
+
>>> model = model_from_arbitrary_casing("Resource")
|
|
322
|
+
>>> model.__name__
|
|
323
|
+
'Resource'
|
|
324
|
+
"""
|
|
325
|
+
# Build default registry from entities module if not provided
|
|
326
|
+
if registry is None:
|
|
327
|
+
from rem.models.entities import (
|
|
328
|
+
DomainResource,
|
|
329
|
+
Feedback,
|
|
330
|
+
File,
|
|
331
|
+
ImageResource,
|
|
332
|
+
Message,
|
|
333
|
+
Moment,
|
|
334
|
+
Ontology,
|
|
335
|
+
OntologyConfig,
|
|
336
|
+
Resource,
|
|
337
|
+
Schema,
|
|
338
|
+
Session,
|
|
339
|
+
User,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
registry = {
|
|
343
|
+
"Resource": Resource,
|
|
344
|
+
"Resources": Resource, # Plural alias
|
|
345
|
+
"DomainResource": DomainResource,
|
|
346
|
+
"DomainResources": DomainResource, # Plural alias
|
|
347
|
+
"ImageResource": ImageResource,
|
|
348
|
+
"ImageResources": ImageResource,
|
|
349
|
+
"File": File,
|
|
350
|
+
"Files": File,
|
|
351
|
+
"Message": Message,
|
|
352
|
+
"Messages": Message,
|
|
353
|
+
"Moment": Moment,
|
|
354
|
+
"Moments": Moment,
|
|
355
|
+
"Session": Session,
|
|
356
|
+
"Sessions": Session,
|
|
357
|
+
"Feedback": Feedback,
|
|
358
|
+
"User": User,
|
|
359
|
+
"Users": User,
|
|
360
|
+
"Schema": Schema,
|
|
361
|
+
"Schemas": Schema,
|
|
362
|
+
"Ontology": Ontology,
|
|
363
|
+
"Ontologies": Ontology,
|
|
364
|
+
"OntologyConfig": OntologyConfig,
|
|
365
|
+
"OntologyConfigs": OntologyConfig,
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
# Normalize input to TitleCase
|
|
369
|
+
normalized = normalize_to_title_case(name)
|
|
370
|
+
|
|
371
|
+
# Look up in registry
|
|
372
|
+
if normalized in registry:
|
|
373
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
|
|
374
|
+
return registry[normalized]
|
|
375
|
+
|
|
376
|
+
# Try without trailing 's' (singular form)
|
|
377
|
+
if normalized.endswith("s") and normalized[:-1] in registry:
|
|
378
|
+
logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
|
|
379
|
+
return registry[normalized[:-1]]
|
|
380
|
+
|
|
381
|
+
# Try with trailing 's' (plural form)
|
|
382
|
+
plural = normalized + "s"
|
|
383
|
+
if plural in registry:
|
|
384
|
+
logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
|
|
385
|
+
return registry[plural]
|
|
386
|
+
|
|
387
|
+
available = sorted(set(m.__name__ for m in registry.values()))
|
|
388
|
+
raise ValueError(
|
|
389
|
+
f"Unknown model: '{name}' (normalized: '{normalized}'). "
|
|
390
|
+
f"Available models: {', '.join(available)}"
|
|
391
|
+
)
|
rem/utils/schema_loader.py
CHANGED
|
@@ -146,7 +146,6 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
146
146
|
async def _async_lookup():
|
|
147
147
|
"""Async helper to query database."""
|
|
148
148
|
from rem.services.postgres import get_postgres_service
|
|
149
|
-
from rem.models.entities import Schema
|
|
150
149
|
|
|
151
150
|
db = get_postgres_service()
|
|
152
151
|
if not db:
|
|
@@ -156,19 +155,20 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
156
155
|
try:
|
|
157
156
|
await db.connect()
|
|
158
157
|
|
|
159
|
-
#
|
|
160
|
-
|
|
161
|
-
|
|
158
|
+
# Query schemas table directly by name
|
|
159
|
+
# Note: Schema name lookup is case-insensitive for user convenience
|
|
160
|
+
query = """
|
|
161
|
+
SELECT spec FROM schemas
|
|
162
|
+
WHERE LOWER(name) = LOWER($1)
|
|
163
|
+
AND (user_id = $2 OR user_id = 'system')
|
|
164
|
+
LIMIT 1
|
|
165
|
+
"""
|
|
166
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
|
|
162
167
|
|
|
163
|
-
|
|
164
|
-
query=query,
|
|
165
|
-
user_id=user_id,
|
|
166
|
-
)
|
|
168
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
167
169
|
|
|
168
|
-
if
|
|
169
|
-
|
|
170
|
-
# Extract spec field (JSON Schema)
|
|
171
|
-
spec = result.get("spec")
|
|
170
|
+
if row:
|
|
171
|
+
spec = row.get("spec")
|
|
172
172
|
if spec and isinstance(spec, dict):
|
|
173
173
|
logger.debug(f"Found schema in database: {schema_name}")
|
|
174
174
|
return spec
|
|
@@ -195,6 +195,8 @@ def load_agent_schema(
|
|
|
195
195
|
"""
|
|
196
196
|
Load agent schema from YAML file with unified search logic and caching.
|
|
197
197
|
|
|
198
|
+
Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
|
|
199
|
+
|
|
198
200
|
Filesystem schemas are cached indefinitely (immutable, versioned with code).
|
|
199
201
|
Database schemas (future) will be cached with TTL for invalidation.
|
|
200
202
|
|
|
@@ -218,8 +220,8 @@ def load_agent_schema(
|
|
|
218
220
|
9. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
|
|
219
221
|
|
|
220
222
|
Args:
|
|
221
|
-
schema_name_or_path: Schema name or file path
|
|
222
|
-
Examples: "rem-query-agent", "
|
|
223
|
+
schema_name_or_path: Schema name or file path (case-invariant for names)
|
|
224
|
+
Examples: "rem-query-agent", "Contract-Analyzer", "./my-schema.yaml"
|
|
223
225
|
use_cache: If True, uses in-memory cache for filesystem schemas
|
|
224
226
|
user_id: User ID for database schema lookup (required for DB fallback)
|
|
225
227
|
enable_db_fallback: If True, falls back to database LOOKUP when file not found
|
|
@@ -232,8 +234,8 @@ def load_agent_schema(
|
|
|
232
234
|
yaml.YAMLError: If schema file is invalid YAML
|
|
233
235
|
|
|
234
236
|
Examples:
|
|
235
|
-
>>> # Load by short name (cached after first load)
|
|
236
|
-
>>> schema = load_agent_schema("contract-analyzer"
|
|
237
|
+
>>> # Load by short name (cached after first load) - case invariant
|
|
238
|
+
>>> schema = load_agent_schema("Contract-Analyzer") # same as "contract-analyzer"
|
|
237
239
|
>>>
|
|
238
240
|
>>> # Load from custom path (not cached - custom paths may change)
|
|
239
241
|
>>> schema = load_agent_schema("./my-agent.yaml")
|
|
@@ -241,11 +243,11 @@ def load_agent_schema(
|
|
|
241
243
|
>>> # Load evaluator schema (cached)
|
|
242
244
|
>>> schema = load_agent_schema("rem-lookup-correctness")
|
|
243
245
|
>>>
|
|
244
|
-
>>> # Load custom user schema from database
|
|
245
|
-
>>> schema = load_agent_schema("
|
|
246
|
+
>>> # Load custom user schema from database (case invariant)
|
|
247
|
+
>>> schema = load_agent_schema("My-Agent", user_id="user-123") # same as "my-agent"
|
|
246
248
|
"""
|
|
247
|
-
# Normalize the name for cache key
|
|
248
|
-
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '')
|
|
249
|
+
# Normalize the name for cache key (lowercase for case-invariant lookups)
|
|
250
|
+
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
|
|
249
251
|
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
250
252
|
cache_key = cache_key.rsplit('.', 1)[0]
|
|
251
253
|
|
|
@@ -266,13 +268,23 @@ def load_agent_schema(
|
|
|
266
268
|
# Don't cache custom paths (they may change)
|
|
267
269
|
return cast(dict[str, Any], schema)
|
|
268
270
|
|
|
269
|
-
# 2. Normalize name for package resource search
|
|
271
|
+
# 2. Normalize name for package resource search (lowercase)
|
|
270
272
|
base_name = cache_key
|
|
271
273
|
|
|
272
|
-
# 3. Try custom schema paths (from registry + SCHEMA__PATHS env var)
|
|
274
|
+
# 3. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
273
275
|
from ..registry import get_schema_paths
|
|
274
276
|
|
|
275
277
|
custom_paths = get_schema_paths()
|
|
278
|
+
|
|
279
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
280
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
281
|
+
for auto_folder in auto_detect_folders:
|
|
282
|
+
auto_path = Path(auto_folder)
|
|
283
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
284
|
+
resolved = str(auto_path.resolve())
|
|
285
|
+
if resolved not in custom_paths:
|
|
286
|
+
custom_paths.insert(0, resolved)
|
|
287
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
276
288
|
for custom_dir in custom_paths:
|
|
277
289
|
# Try various patterns within each custom directory
|
|
278
290
|
for pattern in [
|
|
@@ -351,6 +363,122 @@ def load_agent_schema(
|
|
|
351
363
|
)
|
|
352
364
|
|
|
353
365
|
|
|
366
|
+
async def load_agent_schema_async(
|
|
367
|
+
schema_name_or_path: str,
|
|
368
|
+
user_id: str | None = None,
|
|
369
|
+
db=None,
|
|
370
|
+
) -> dict[str, Any]:
|
|
371
|
+
"""
|
|
372
|
+
Async version of load_agent_schema for use in async contexts.
|
|
373
|
+
|
|
374
|
+
Schema names are case-invariant - "MyAgent", "myagent", "MYAGENT" all resolve to the same schema.
|
|
375
|
+
|
|
376
|
+
This version accepts an existing database connection to avoid creating new connections.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
schema_name_or_path: Schema name or file path (case-invariant for names)
|
|
380
|
+
user_id: User ID for database schema lookup
|
|
381
|
+
db: Optional existing PostgresService connection (if None, will create one)
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
Agent schema as dictionary
|
|
385
|
+
|
|
386
|
+
Raises:
|
|
387
|
+
FileNotFoundError: If schema not found
|
|
388
|
+
"""
|
|
389
|
+
# First try filesystem search (sync operations are fine)
|
|
390
|
+
path = Path(schema_name_or_path)
|
|
391
|
+
|
|
392
|
+
# Normalize the name for cache key (lowercase for case-invariant lookups)
|
|
393
|
+
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
|
|
394
|
+
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
395
|
+
cache_key = cache_key.rsplit('.', 1)[0]
|
|
396
|
+
|
|
397
|
+
is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
398
|
+
|
|
399
|
+
# Check cache
|
|
400
|
+
if not is_custom_path and cache_key in _fs_schema_cache:
|
|
401
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
402
|
+
return _fs_schema_cache[cache_key]
|
|
403
|
+
|
|
404
|
+
# Try exact path
|
|
405
|
+
if path.exists():
|
|
406
|
+
logger.debug(f"Loading schema from exact path: {path}")
|
|
407
|
+
with open(path, "r") as f:
|
|
408
|
+
schema = yaml.safe_load(f)
|
|
409
|
+
return cast(dict[str, Any], schema)
|
|
410
|
+
|
|
411
|
+
base_name = cache_key
|
|
412
|
+
|
|
413
|
+
# Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
414
|
+
from ..registry import get_schema_paths
|
|
415
|
+
custom_paths = get_schema_paths()
|
|
416
|
+
|
|
417
|
+
# Auto-detect local folders if they exist (convention over configuration)
|
|
418
|
+
auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
|
|
419
|
+
for auto_folder in auto_detect_folders:
|
|
420
|
+
auto_path = Path(auto_folder)
|
|
421
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
422
|
+
resolved = str(auto_path.resolve())
|
|
423
|
+
if resolved not in custom_paths:
|
|
424
|
+
custom_paths.insert(0, resolved)
|
|
425
|
+
logger.debug(f"Auto-detected schema directory: {auto_folder}")
|
|
426
|
+
|
|
427
|
+
for custom_dir in custom_paths:
|
|
428
|
+
for pattern in [f"{base_name}.yaml", f"{base_name}.yml", f"agents/{base_name}.yaml"]:
|
|
429
|
+
custom_path = Path(custom_dir) / pattern
|
|
430
|
+
if custom_path.exists():
|
|
431
|
+
with open(custom_path, "r") as f:
|
|
432
|
+
schema = yaml.safe_load(f)
|
|
433
|
+
return cast(dict[str, Any], schema)
|
|
434
|
+
|
|
435
|
+
# Try package resources
|
|
436
|
+
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
437
|
+
search_path = search_pattern.format(name=base_name)
|
|
438
|
+
try:
|
|
439
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
440
|
+
schema_path = Path(str(schema_ref))
|
|
441
|
+
if schema_path.exists():
|
|
442
|
+
with open(schema_path, "r") as f:
|
|
443
|
+
schema = yaml.safe_load(f)
|
|
444
|
+
_fs_schema_cache[cache_key] = schema
|
|
445
|
+
return cast(dict[str, Any], schema)
|
|
446
|
+
except Exception:
|
|
447
|
+
continue
|
|
448
|
+
|
|
449
|
+
# Try database lookup
|
|
450
|
+
if user_id:
|
|
451
|
+
from rem.services.postgres import get_postgres_service
|
|
452
|
+
|
|
453
|
+
should_disconnect = False
|
|
454
|
+
if db is None:
|
|
455
|
+
db = get_postgres_service()
|
|
456
|
+
if db:
|
|
457
|
+
await db.connect()
|
|
458
|
+
should_disconnect = True
|
|
459
|
+
|
|
460
|
+
if db:
|
|
461
|
+
try:
|
|
462
|
+
query = """
|
|
463
|
+
SELECT spec FROM schemas
|
|
464
|
+
WHERE LOWER(name) = LOWER($1)
|
|
465
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
466
|
+
LIMIT 1
|
|
467
|
+
"""
|
|
468
|
+
row = await db.fetchrow(query, base_name, user_id)
|
|
469
|
+
if row:
|
|
470
|
+
spec = row.get("spec")
|
|
471
|
+
if spec and isinstance(spec, dict):
|
|
472
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
473
|
+
return spec
|
|
474
|
+
finally:
|
|
475
|
+
if should_disconnect:
|
|
476
|
+
await db.disconnect()
|
|
477
|
+
|
|
478
|
+
# Not found
|
|
479
|
+
raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
|
|
480
|
+
|
|
481
|
+
|
|
354
482
|
def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
355
483
|
"""
|
|
356
484
|
Validate agent schema structure.
|
|
@@ -383,3 +511,73 @@ def validate_agent_schema(schema: dict[str, Any]) -> bool:
|
|
|
383
511
|
|
|
384
512
|
logger.debug("Schema validation passed")
|
|
385
513
|
return True
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def get_evaluator_schema_path(evaluator_name: str) -> Path | None:
|
|
517
|
+
"""
|
|
518
|
+
Find the file path to an evaluator schema.
|
|
519
|
+
|
|
520
|
+
Searches standard locations for the evaluator schema YAML file:
|
|
521
|
+
- ./evaluators/{name}.yaml (local project)
|
|
522
|
+
- Custom schema paths from registry
|
|
523
|
+
- Package resources: schemas/evaluators/{name}.yaml
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
evaluator_name: Name of the evaluator (e.g., "mental-health-classifier")
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
Path to the evaluator schema file, or None if not found
|
|
530
|
+
|
|
531
|
+
Example:
|
|
532
|
+
>>> path = get_evaluator_schema_path("mental-health-classifier")
|
|
533
|
+
>>> if path:
|
|
534
|
+
... print(f"Found evaluator at: {path}")
|
|
535
|
+
"""
|
|
536
|
+
from ..registry import get_schema_paths
|
|
537
|
+
|
|
538
|
+
base_name = evaluator_name.lower().replace('.yaml', '').replace('.yml', '')
|
|
539
|
+
|
|
540
|
+
# 1. Try custom schema paths (from registry + auto-detected)
|
|
541
|
+
custom_paths = get_schema_paths()
|
|
542
|
+
|
|
543
|
+
# Auto-detect local folders
|
|
544
|
+
auto_detect_folders = ["./evaluators", "./schemas", "./agents"]
|
|
545
|
+
for auto_folder in auto_detect_folders:
|
|
546
|
+
auto_path = Path(auto_folder)
|
|
547
|
+
if auto_path.exists() and auto_path.is_dir():
|
|
548
|
+
resolved = str(auto_path.resolve())
|
|
549
|
+
if resolved not in custom_paths:
|
|
550
|
+
custom_paths.insert(0, resolved)
|
|
551
|
+
|
|
552
|
+
for custom_dir in custom_paths:
|
|
553
|
+
# Try various patterns within each custom directory
|
|
554
|
+
for pattern in [
|
|
555
|
+
f"{base_name}.yaml",
|
|
556
|
+
f"{base_name}.yml",
|
|
557
|
+
f"evaluators/{base_name}.yaml",
|
|
558
|
+
]:
|
|
559
|
+
custom_path = Path(custom_dir) / pattern
|
|
560
|
+
if custom_path.exists():
|
|
561
|
+
logger.debug(f"Found evaluator schema: {custom_path}")
|
|
562
|
+
return custom_path
|
|
563
|
+
|
|
564
|
+
# 2. Try package resources
|
|
565
|
+
evaluator_search_paths = [
|
|
566
|
+
f"schemas/evaluators/{base_name}.yaml",
|
|
567
|
+
f"schemas/evaluators/rem/{base_name}.yaml",
|
|
568
|
+
]
|
|
569
|
+
|
|
570
|
+
for search_path in evaluator_search_paths:
|
|
571
|
+
try:
|
|
572
|
+
schema_ref = importlib.resources.files("rem") / search_path
|
|
573
|
+
schema_path = Path(str(schema_ref))
|
|
574
|
+
|
|
575
|
+
if schema_path.exists():
|
|
576
|
+
logger.debug(f"Found evaluator schema in package: {schema_path}")
|
|
577
|
+
return schema_path
|
|
578
|
+
except Exception as e:
|
|
579
|
+
logger.debug(f"Could not check {search_path}: {e}")
|
|
580
|
+
continue
|
|
581
|
+
|
|
582
|
+
logger.warning(f"Evaluator schema not found: {evaluator_name}")
|
|
583
|
+
return None
|
rem/utils/sql_paths.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Utilities for resolving SQL file paths.
|
|
2
|
+
|
|
3
|
+
Handles package SQL directory resolution and user migrations.
|
|
4
|
+
|
|
5
|
+
Convention for user migrations:
|
|
6
|
+
Place custom SQL files in `./sql/migrations/` relative to your project root.
|
|
7
|
+
Files should be numbered (e.g., `100_custom_table.sql`) to control execution order.
|
|
8
|
+
Package migrations (001-099) run first, then user migrations (100+).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
import importlib.resources
|
|
14
|
+
|
|
15
|
+
# Convention: Default location for user-maintained migrations
|
|
16
|
+
USER_SQL_DIR_CONVENTION = "sql"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_package_sql_dir() -> Path:
|
|
20
|
+
"""Get the SQL directory from the installed rem package.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Path to the package's sql directory
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
FileNotFoundError: If the SQL directory cannot be found
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
# Use importlib.resources for Python 3.9+
|
|
30
|
+
sql_ref = importlib.resources.files("rem") / "sql"
|
|
31
|
+
package_sql = Path(str(sql_ref))
|
|
32
|
+
if package_sql.exists():
|
|
33
|
+
return package_sql
|
|
34
|
+
except (AttributeError, TypeError):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
# Fallback: use __file__ to find package location
|
|
38
|
+
try:
|
|
39
|
+
import rem
|
|
40
|
+
package_sql = Path(rem.__file__).parent / "sql"
|
|
41
|
+
if package_sql.exists():
|
|
42
|
+
return package_sql
|
|
43
|
+
except (ImportError, AttributeError):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# Development fallback: check relative to cwd
|
|
47
|
+
dev_sql = Path("src/rem/sql")
|
|
48
|
+
if dev_sql.exists():
|
|
49
|
+
return dev_sql
|
|
50
|
+
|
|
51
|
+
raise FileNotFoundError(
|
|
52
|
+
"Could not locate rem SQL directory. "
|
|
53
|
+
"Ensure remdb is properly installed or run from the source directory."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_package_migrations_dir() -> Path:
|
|
58
|
+
"""Get the migrations directory from the installed rem package.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Path to the package's migrations directory
|
|
62
|
+
"""
|
|
63
|
+
return get_package_sql_dir() / "migrations"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_user_sql_dir() -> Optional[Path]:
|
|
67
|
+
"""Get the conventional user SQL directory if it exists.
|
|
68
|
+
|
|
69
|
+
Looks for `./sql/` relative to the current working directory.
|
|
70
|
+
This follows the convention for user-maintained migrations.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Path to user sql directory if it exists, None otherwise
|
|
74
|
+
"""
|
|
75
|
+
user_sql = Path.cwd() / USER_SQL_DIR_CONVENTION
|
|
76
|
+
if user_sql.exists() and user_sql.is_dir():
|
|
77
|
+
return user_sql
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def list_package_migrations() -> List[Path]:
|
|
82
|
+
"""List all migration files in the package.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Sorted list of migration file paths
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
migrations_dir = get_package_migrations_dir()
|
|
89
|
+
if migrations_dir.exists():
|
|
90
|
+
return sorted(
|
|
91
|
+
f for f in migrations_dir.glob("*.sql")
|
|
92
|
+
if f.name[0].isdigit() # Only numbered migrations
|
|
93
|
+
)
|
|
94
|
+
except FileNotFoundError:
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def list_user_migrations() -> List[Path]:
|
|
101
|
+
"""List all migration files in the user's sql/migrations directory.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Sorted list of user migration file paths
|
|
105
|
+
"""
|
|
106
|
+
user_sql = get_user_sql_dir()
|
|
107
|
+
if user_sql:
|
|
108
|
+
migrations_dir = user_sql / "migrations"
|
|
109
|
+
if migrations_dir.exists():
|
|
110
|
+
return sorted(
|
|
111
|
+
f for f in migrations_dir.glob("*.sql")
|
|
112
|
+
if f.name[0].isdigit() # Only numbered migrations
|
|
113
|
+
)
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def list_all_migrations() -> List[Path]:
|
|
118
|
+
"""List all migration files from package and user directories.
|
|
119
|
+
|
|
120
|
+
Collects migrations from:
|
|
121
|
+
1. Package migrations directory
|
|
122
|
+
2. User directory (./sql/migrations/) if it exists
|
|
123
|
+
|
|
124
|
+
Files are sorted by name, so use numbered prefixes to control order:
|
|
125
|
+
- 001-099: Reserved for package migrations
|
|
126
|
+
- 100+: Recommended for user migrations
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Sorted list of all migration file paths (by filename)
|
|
130
|
+
"""
|
|
131
|
+
all_migrations = []
|
|
132
|
+
seen_names = set()
|
|
133
|
+
|
|
134
|
+
# Package migrations first
|
|
135
|
+
for f in list_package_migrations():
|
|
136
|
+
if f.name not in seen_names:
|
|
137
|
+
all_migrations.append(f)
|
|
138
|
+
seen_names.add(f.name)
|
|
139
|
+
|
|
140
|
+
# User migrations second
|
|
141
|
+
for f in list_user_migrations():
|
|
142
|
+
if f.name not in seen_names:
|
|
143
|
+
all_migrations.append(f)
|
|
144
|
+
seen_names.add(f.name)
|
|
145
|
+
|
|
146
|
+
return sorted(all_migrations, key=lambda p: p.name)
|
rem/utils/sql_types.py
CHANGED
|
@@ -16,6 +16,7 @@ Best Practices:
|
|
|
16
16
|
- UUID for identifiers in Union types
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
+
import types
|
|
19
20
|
from datetime import date, datetime, time
|
|
20
21
|
from typing import Any, Union, get_args, get_origin
|
|
21
22
|
from uuid import UUID
|
|
@@ -78,8 +79,9 @@ def get_sql_type(field_info: FieldInfo, field_name: str) -> str:
|
|
|
78
79
|
return "TEXT"
|
|
79
80
|
|
|
80
81
|
# Handle Union types (including Optional[T] which is Union[T, None])
|
|
82
|
+
# Also handles Python 3.10+ `X | None` syntax which uses types.UnionType
|
|
81
83
|
origin = get_origin(annotation)
|
|
82
|
-
if origin is Union:
|
|
84
|
+
if origin is Union or isinstance(annotation, types.UnionType):
|
|
83
85
|
args = get_args(annotation)
|
|
84
86
|
# Filter out NoneType
|
|
85
87
|
non_none_args = [arg for arg in args if arg is not type(None)]
|
rem/workers/__init__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Background workers for processing tasks."""
|
|
2
2
|
|
|
3
|
+
from .db_listener import DBListener
|
|
3
4
|
from .sqs_file_processor import SQSFileProcessor
|
|
5
|
+
from .unlogged_maintainer import UnloggedMaintainer
|
|
4
6
|
|
|
5
|
-
__all__ = ["SQSFileProcessor"]
|
|
7
|
+
__all__ = ["DBListener", "SQSFileProcessor", "UnloggedMaintainer"]
|