remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +16 -2
  4. rem/agentic/agents/sse_simulator.py +502 -0
  5. rem/agentic/context.py +51 -27
  6. rem/agentic/llm_provider_models.py +301 -0
  7. rem/agentic/mcp/tool_wrapper.py +112 -17
  8. rem/agentic/otel/setup.py +93 -4
  9. rem/agentic/providers/phoenix.py +302 -109
  10. rem/agentic/providers/pydantic_ai.py +215 -26
  11. rem/agentic/schema.py +361 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +215 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +132 -40
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +26 -5
  18. rem/api/mcp_router/tools.py +465 -7
  19. rem/api/routers/admin.py +494 -0
  20. rem/api/routers/auth.py +70 -0
  21. rem/api/routers/chat/completions.py +402 -20
  22. rem/api/routers/chat/models.py +88 -10
  23. rem/api/routers/chat/otel_utils.py +33 -0
  24. rem/api/routers/chat/sse_events.py +542 -0
  25. rem/api/routers/chat/streaming.py +642 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +268 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +360 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +237 -64
  34. rem/cli/commands/cluster.py +1808 -0
  35. rem/cli/commands/configure.py +1 -3
  36. rem/cli/commands/db.py +386 -143
  37. rem/cli/commands/experiments.py +418 -27
  38. rem/cli/commands/process.py +14 -8
  39. rem/cli/commands/schema.py +97 -50
  40. rem/cli/main.py +27 -6
  41. rem/config.py +10 -3
  42. rem/models/core/core_model.py +7 -1
  43. rem/models/core/experiment.py +54 -0
  44. rem/models/core/rem_query.py +5 -2
  45. rem/models/entities/__init__.py +21 -0
  46. rem/models/entities/domain_resource.py +38 -0
  47. rem/models/entities/feedback.py +123 -0
  48. rem/models/entities/message.py +30 -1
  49. rem/models/entities/session.py +83 -0
  50. rem/models/entities/shared_session.py +180 -0
  51. rem/registry.py +10 -4
  52. rem/schemas/agents/rem.yaml +7 -3
  53. rem/services/content/service.py +92 -20
  54. rem/services/embeddings/api.py +4 -4
  55. rem/services/embeddings/worker.py +16 -16
  56. rem/services/phoenix/client.py +154 -14
  57. rem/services/postgres/README.md +159 -15
  58. rem/services/postgres/__init__.py +2 -1
  59. rem/services/postgres/diff_service.py +531 -0
  60. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  61. rem/services/postgres/repository.py +132 -0
  62. rem/services/postgres/schema_generator.py +205 -4
  63. rem/services/postgres/service.py +6 -6
  64. rem/services/rem/parser.py +44 -9
  65. rem/services/rem/service.py +36 -2
  66. rem/services/session/compression.py +24 -1
  67. rem/services/session/reload.py +1 -1
  68. rem/settings.py +324 -23
  69. rem/sql/background_indexes.sql +21 -16
  70. rem/sql/migrations/001_install.sql +387 -54
  71. rem/sql/migrations/002_install_models.sql +2320 -393
  72. rem/sql/migrations/003_optional_extensions.sql +326 -0
  73. rem/sql/migrations/004_cache_system.sql +548 -0
  74. rem/utils/__init__.py +18 -0
  75. rem/utils/date_utils.py +2 -2
  76. rem/utils/model_helpers.py +156 -1
  77. rem/utils/schema_loader.py +220 -22
  78. rem/utils/sql_paths.py +146 -0
  79. rem/utils/sql_types.py +3 -1
  80. rem/workers/__init__.py +3 -1
  81. rem/workers/db_listener.py +579 -0
  82. rem/workers/unlogged_maintainer.py +463 -0
  83. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
  84. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
  85. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
  86. rem/sql/002_install_models.sql +0 -1068
  87. rem/sql/install_models.sql +0 -1051
  88. rem/sql/migrations/003_seed_default_user.sql +0 -48
  89. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
@@ -16,8 +16,12 @@ Embedding Field Detection:
16
16
  Table Name Inference:
17
17
  1. model_config.json_schema_extra.table_name
18
18
  2. CamelCase → snake_case + pluralization
19
+
20
+ Model Resolution:
21
+ - model_from_arbitrary_casing: Resolve model class from flexible input casing
19
22
  """
20
23
 
24
+ import re
21
25
  from typing import Any, Type
22
26
 
23
27
  from loguru import logger
@@ -94,7 +98,9 @@ def get_table_name(model: Type[BaseModel]) -> str:
94
98
  if isinstance(model_config, dict):
95
99
  json_extra = model_config.get("json_schema_extra", {})
96
100
  if isinstance(json_extra, dict) and "table_name" in json_extra:
97
- return json_extra["table_name"]
101
+ table_name = json_extra["table_name"]
102
+ if isinstance(table_name, str):
103
+ return table_name
98
104
 
99
105
  # Infer from class name
100
106
  name = model.__name__
@@ -234,3 +240,152 @@ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
234
240
  "entity_key_field": get_entity_key_field(model),
235
241
  "embeddable_fields": get_embeddable_fields(model),
236
242
  }
243
+
244
+
245
+ def normalize_to_title_case(name: str) -> str:
246
+ """
247
+ Normalize arbitrary casing to TitleCase (PascalCase).
248
+
249
+ Handles various input formats:
250
+ - kebab-case: domain-resource → DomainResource
251
+ - snake_case: domain_resource → DomainResource
252
+ - lowercase: domainresource → Domainresource (single word)
253
+ - TitleCase: DomainResource → DomainResource (passthrough)
254
+ - Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
255
+
256
+ Args:
257
+ name: Input name in any casing format
258
+
259
+ Returns:
260
+ TitleCase (PascalCase) version of the name
261
+
262
+ Example:
263
+ >>> normalize_to_title_case("domain-resource")
264
+ 'DomainResource'
265
+ >>> normalize_to_title_case("domain_resources")
266
+ 'DomainResources'
267
+ >>> normalize_to_title_case("DomainResource")
268
+ 'DomainResource'
269
+ """
270
+ # If already TitleCase (starts with uppercase, has no delimiters, and has
271
+ # at least one lowercase letter), return as-is
272
+ if (
273
+ name
274
+ and name[0].isupper()
275
+ and '-' not in name
276
+ and '_' not in name
277
+ and any(c.islower() for c in name)
278
+ ):
279
+ return name
280
+
281
+ # Split on common delimiters (hyphen, underscore)
282
+ parts = re.split(r'[-_]', name)
283
+
284
+ # Capitalize first letter of each part, lowercase the rest
285
+ normalized_parts = [part.capitalize() for part in parts if part]
286
+
287
+ return "".join(normalized_parts)
288
+
289
+
290
+ def model_from_arbitrary_casing(
291
+ name: str,
292
+ registry: dict[str, Type[BaseModel]] | None = None,
293
+ ) -> Type[BaseModel]:
294
+ """
295
+ Resolve a model class from arbitrary casing input.
296
+
297
+ REM entity models use strict TitleCase (PascalCase) naming. This function
298
+ allows flexible input formats while maintaining consistency:
299
+
300
+ Input formats supported:
301
+ - kebab-case: domain-resource, domain-resources
302
+ - snake_case: domain_resource, domain_resources
303
+ - lowercase: resource, domainresource
304
+ - TitleCase: Resource, DomainResource
305
+
306
+ Args:
307
+ name: Model name in any supported casing format
308
+ registry: Optional dict mapping TitleCase names to model classes.
309
+ If not provided, uses rem.models.entities module.
310
+
311
+ Returns:
312
+ The resolved Pydantic model class
313
+
314
+ Raises:
315
+ ValueError: If no model matches the normalized name
316
+
317
+ Example:
318
+ >>> model = model_from_arbitrary_casing("domain-resources")
319
+ >>> model.__name__
320
+ 'DomainResource'
321
+ >>> model = model_from_arbitrary_casing("Resource")
322
+ >>> model.__name__
323
+ 'Resource'
324
+ """
325
+ # Build default registry from entities module if not provided
326
+ if registry is None:
327
+ from rem.models.entities import (
328
+ DomainResource,
329
+ Feedback,
330
+ File,
331
+ ImageResource,
332
+ Message,
333
+ Moment,
334
+ Ontology,
335
+ OntologyConfig,
336
+ Resource,
337
+ Schema,
338
+ Session,
339
+ User,
340
+ )
341
+
342
+ registry = {
343
+ "Resource": Resource,
344
+ "Resources": Resource, # Plural alias
345
+ "DomainResource": DomainResource,
346
+ "DomainResources": DomainResource, # Plural alias
347
+ "ImageResource": ImageResource,
348
+ "ImageResources": ImageResource,
349
+ "File": File,
350
+ "Files": File,
351
+ "Message": Message,
352
+ "Messages": Message,
353
+ "Moment": Moment,
354
+ "Moments": Moment,
355
+ "Session": Session,
356
+ "Sessions": Session,
357
+ "Feedback": Feedback,
358
+ "User": User,
359
+ "Users": User,
360
+ "Schema": Schema,
361
+ "Schemas": Schema,
362
+ "Ontology": Ontology,
363
+ "Ontologies": Ontology,
364
+ "OntologyConfig": OntologyConfig,
365
+ "OntologyConfigs": OntologyConfig,
366
+ }
367
+
368
+ # Normalize input to TitleCase
369
+ normalized = normalize_to_title_case(name)
370
+
371
+ # Look up in registry
372
+ if normalized in registry:
373
+ logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
374
+ return registry[normalized]
375
+
376
+ # Try without trailing 's' (singular form)
377
+ if normalized.endswith("s") and normalized[:-1] in registry:
378
+ logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
379
+ return registry[normalized[:-1]]
380
+
381
+ # Try with trailing 's' (plural form)
382
+ plural = normalized + "s"
383
+ if plural in registry:
384
+ logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
385
+ return registry[plural]
386
+
387
+ available = sorted(set(m.__name__ for m in registry.values()))
388
+ raise ValueError(
389
+ f"Unknown model: '{name}' (normalized: '{normalized}'). "
390
+ f"Available models: {', '.join(available)}"
391
+ )
@@ -146,7 +146,6 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
146
146
  async def _async_lookup():
147
147
  """Async helper to query database."""
148
148
  from rem.services.postgres import get_postgres_service
149
- from rem.models.entities import Schema
150
149
 
151
150
  db = get_postgres_service()
152
151
  if not db:
@@ -156,19 +155,20 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
156
155
  try:
157
156
  await db.connect()
158
157
 
159
- # Use REM LOOKUP query to find schema
160
- query = f"LOOKUP '{schema_name}' FROM schemas"
161
- logger.debug(f"Executing: {query} (user_id={user_id})")
158
+ # Query schemas table directly by name
159
+ # Note: Schema name lookup is case-insensitive for user convenience
160
+ query = """
161
+ SELECT spec FROM schemas
162
+ WHERE LOWER(name) = LOWER($1)
163
+ AND (user_id = $2 OR user_id = 'system')
164
+ LIMIT 1
165
+ """
166
+ logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
162
167
 
163
- result = await db.execute_rem_query(
164
- query=query,
165
- user_id=user_id,
166
- )
168
+ row = await db.fetchrow(query, schema_name, user_id)
167
169
 
168
- if result and isinstance(result, dict):
169
- # LOOKUP returns single entity or None
170
- # Extract spec field (JSON Schema)
171
- spec = result.get("spec")
170
+ if row:
171
+ spec = row.get("spec")
172
172
  if spec and isinstance(spec, dict):
173
173
  logger.debug(f"Found schema in database: {schema_name}")
174
174
  return spec
@@ -195,6 +195,8 @@ def load_agent_schema(
195
195
  """
196
196
  Load agent schema from YAML file with unified search logic and caching.
197
197
 
198
+ Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
199
+
198
200
  Filesystem schemas are cached indefinitely (immutable, versioned with code).
199
201
  Database schemas (future) will be cached with TTL for invalidation.
200
202
 
@@ -218,8 +220,8 @@ def load_agent_schema(
218
220
  9. Database LOOKUP: schemas table (if enable_db_fallback=True and user_id provided)
219
221
 
220
222
  Args:
221
- schema_name_or_path: Schema name or file path
222
- Examples: "rem-query-agent", "contract-analyzer", "./my-schema.yaml"
223
+ schema_name_or_path: Schema name or file path (case-invariant for names)
224
+ Examples: "rem-query-agent", "Contract-Analyzer", "./my-schema.yaml"
223
225
  use_cache: If True, uses in-memory cache for filesystem schemas
224
226
  user_id: User ID for database schema lookup (required for DB fallback)
225
227
  enable_db_fallback: If True, falls back to database LOOKUP when file not found
@@ -232,8 +234,8 @@ def load_agent_schema(
232
234
  yaml.YAMLError: If schema file is invalid YAML
233
235
 
234
236
  Examples:
235
- >>> # Load by short name (cached after first load)
236
- >>> schema = load_agent_schema("contract-analyzer")
237
+ >>> # Load by short name (cached after first load) - case invariant
238
+ >>> schema = load_agent_schema("Contract-Analyzer") # same as "contract-analyzer"
237
239
  >>>
238
240
  >>> # Load from custom path (not cached - custom paths may change)
239
241
  >>> schema = load_agent_schema("./my-agent.yaml")
@@ -241,11 +243,11 @@ def load_agent_schema(
241
243
  >>> # Load evaluator schema (cached)
242
244
  >>> schema = load_agent_schema("rem-lookup-correctness")
243
245
  >>>
244
- >>> # Load custom user schema from database
245
- >>> schema = load_agent_schema("my-custom-agent", user_id="user-123")
246
+ >>> # Load custom user schema from database (case invariant)
247
+ >>> schema = load_agent_schema("My-Agent", user_id="user-123") # same as "my-agent"
246
248
  """
247
- # Normalize the name for cache key
248
- cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '')
249
+ # Normalize the name for cache key (lowercase for case-invariant lookups)
250
+ cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
249
251
  if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
250
252
  cache_key = cache_key.rsplit('.', 1)[0]
251
253
 
@@ -266,13 +268,23 @@ def load_agent_schema(
266
268
  # Don't cache custom paths (they may change)
267
269
  return cast(dict[str, Any], schema)
268
270
 
269
- # 2. Normalize name for package resource search
271
+ # 2. Normalize name for package resource search (lowercase)
270
272
  base_name = cache_key
271
273
 
272
- # 3. Try custom schema paths (from registry + SCHEMA__PATHS env var)
274
+ # 3. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
273
275
  from ..registry import get_schema_paths
274
276
 
275
277
  custom_paths = get_schema_paths()
278
+
279
+ # Auto-detect local folders if they exist (convention over configuration)
280
+ auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
281
+ for auto_folder in auto_detect_folders:
282
+ auto_path = Path(auto_folder)
283
+ if auto_path.exists() and auto_path.is_dir():
284
+ resolved = str(auto_path.resolve())
285
+ if resolved not in custom_paths:
286
+ custom_paths.insert(0, resolved)
287
+ logger.debug(f"Auto-detected schema directory: {auto_folder}")
276
288
  for custom_dir in custom_paths:
277
289
  # Try various patterns within each custom directory
278
290
  for pattern in [
@@ -351,6 +363,122 @@ def load_agent_schema(
351
363
  )
352
364
 
353
365
 
366
+ async def load_agent_schema_async(
367
+ schema_name_or_path: str,
368
+ user_id: str | None = None,
369
+ db=None,
370
+ ) -> dict[str, Any]:
371
+ """
372
+ Async version of load_agent_schema for use in async contexts.
373
+
374
+ Schema names are case-invariant - "MyAgent", "myagent", "MYAGENT" all resolve to the same schema.
375
+
376
+ This version accepts an existing database connection to avoid creating new connections.
377
+
378
+ Args:
379
+ schema_name_or_path: Schema name or file path (case-invariant for names)
380
+ user_id: User ID for database schema lookup
381
+ db: Optional existing PostgresService connection (if None, will create one)
382
+
383
+ Returns:
384
+ Agent schema as dictionary
385
+
386
+ Raises:
387
+ FileNotFoundError: If schema not found
388
+ """
389
+ # First try filesystem search (sync operations are fine)
390
+ path = Path(schema_name_or_path)
391
+
392
+ # Normalize the name for cache key (lowercase for case-invariant lookups)
393
+ cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
394
+ if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
395
+ cache_key = cache_key.rsplit('.', 1)[0]
396
+
397
+ is_custom_path = path.exists() or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
398
+
399
+ # Check cache
400
+ if not is_custom_path and cache_key in _fs_schema_cache:
401
+ logger.debug(f"Loading schema from cache: {cache_key}")
402
+ return _fs_schema_cache[cache_key]
403
+
404
+ # Try exact path
405
+ if path.exists():
406
+ logger.debug(f"Loading schema from exact path: {path}")
407
+ with open(path, "r") as f:
408
+ schema = yaml.safe_load(f)
409
+ return cast(dict[str, Any], schema)
410
+
411
+ base_name = cache_key
412
+
413
+ # Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
414
+ from ..registry import get_schema_paths
415
+ custom_paths = get_schema_paths()
416
+
417
+ # Auto-detect local folders if they exist (convention over configuration)
418
+ auto_detect_folders = ["./agents", "./schemas", "./evaluators"]
419
+ for auto_folder in auto_detect_folders:
420
+ auto_path = Path(auto_folder)
421
+ if auto_path.exists() and auto_path.is_dir():
422
+ resolved = str(auto_path.resolve())
423
+ if resolved not in custom_paths:
424
+ custom_paths.insert(0, resolved)
425
+ logger.debug(f"Auto-detected schema directory: {auto_folder}")
426
+
427
+ for custom_dir in custom_paths:
428
+ for pattern in [f"{base_name}.yaml", f"{base_name}.yml", f"agents/{base_name}.yaml"]:
429
+ custom_path = Path(custom_dir) / pattern
430
+ if custom_path.exists():
431
+ with open(custom_path, "r") as f:
432
+ schema = yaml.safe_load(f)
433
+ return cast(dict[str, Any], schema)
434
+
435
+ # Try package resources
436
+ for search_pattern in SCHEMA_SEARCH_PATHS:
437
+ search_path = search_pattern.format(name=base_name)
438
+ try:
439
+ schema_ref = importlib.resources.files("rem") / search_path
440
+ schema_path = Path(str(schema_ref))
441
+ if schema_path.exists():
442
+ with open(schema_path, "r") as f:
443
+ schema = yaml.safe_load(f)
444
+ _fs_schema_cache[cache_key] = schema
445
+ return cast(dict[str, Any], schema)
446
+ except Exception:
447
+ continue
448
+
449
+ # Try database lookup
450
+ if user_id:
451
+ from rem.services.postgres import get_postgres_service
452
+
453
+ should_disconnect = False
454
+ if db is None:
455
+ db = get_postgres_service()
456
+ if db:
457
+ await db.connect()
458
+ should_disconnect = True
459
+
460
+ if db:
461
+ try:
462
+ query = """
463
+ SELECT spec FROM schemas
464
+ WHERE LOWER(name) = LOWER($1)
465
+ AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
466
+ LIMIT 1
467
+ """
468
+ row = await db.fetchrow(query, base_name, user_id)
469
+ if row:
470
+ spec = row.get("spec")
471
+ if spec and isinstance(spec, dict):
472
+ logger.info(f"✅ Loaded schema from database: {base_name}")
473
+ return spec
474
+ finally:
475
+ if should_disconnect:
476
+ await db.disconnect()
477
+
478
+ # Not found
479
+ raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
480
+
481
+
354
482
  def validate_agent_schema(schema: dict[str, Any]) -> bool:
355
483
  """
356
484
  Validate agent schema structure.
@@ -383,3 +511,73 @@ def validate_agent_schema(schema: dict[str, Any]) -> bool:
383
511
 
384
512
  logger.debug("Schema validation passed")
385
513
  return True
514
+
515
+
516
+ def get_evaluator_schema_path(evaluator_name: str) -> Path | None:
517
+ """
518
+ Find the file path to an evaluator schema.
519
+
520
+ Searches standard locations for the evaluator schema YAML file:
521
+ - ./evaluators/{name}.yaml (local project)
522
+ - Custom schema paths from registry
523
+ - Package resources: schemas/evaluators/{name}.yaml
524
+
525
+ Args:
526
+ evaluator_name: Name of the evaluator (e.g., "mental-health-classifier")
527
+
528
+ Returns:
529
+ Path to the evaluator schema file, or None if not found
530
+
531
+ Example:
532
+ >>> path = get_evaluator_schema_path("mental-health-classifier")
533
+ >>> if path:
534
+ ... print(f"Found evaluator at: {path}")
535
+ """
536
+ from ..registry import get_schema_paths
537
+
538
+ base_name = evaluator_name.lower().replace('.yaml', '').replace('.yml', '')
539
+
540
+ # 1. Try custom schema paths (from registry + auto-detected)
541
+ custom_paths = get_schema_paths()
542
+
543
+ # Auto-detect local folders
544
+ auto_detect_folders = ["./evaluators", "./schemas", "./agents"]
545
+ for auto_folder in auto_detect_folders:
546
+ auto_path = Path(auto_folder)
547
+ if auto_path.exists() and auto_path.is_dir():
548
+ resolved = str(auto_path.resolve())
549
+ if resolved not in custom_paths:
550
+ custom_paths.insert(0, resolved)
551
+
552
+ for custom_dir in custom_paths:
553
+ # Try various patterns within each custom directory
554
+ for pattern in [
555
+ f"{base_name}.yaml",
556
+ f"{base_name}.yml",
557
+ f"evaluators/{base_name}.yaml",
558
+ ]:
559
+ custom_path = Path(custom_dir) / pattern
560
+ if custom_path.exists():
561
+ logger.debug(f"Found evaluator schema: {custom_path}")
562
+ return custom_path
563
+
564
+ # 2. Try package resources
565
+ evaluator_search_paths = [
566
+ f"schemas/evaluators/{base_name}.yaml",
567
+ f"schemas/evaluators/rem/{base_name}.yaml",
568
+ ]
569
+
570
+ for search_path in evaluator_search_paths:
571
+ try:
572
+ schema_ref = importlib.resources.files("rem") / search_path
573
+ schema_path = Path(str(schema_ref))
574
+
575
+ if schema_path.exists():
576
+ logger.debug(f"Found evaluator schema in package: {schema_path}")
577
+ return schema_path
578
+ except Exception as e:
579
+ logger.debug(f"Could not check {search_path}: {e}")
580
+ continue
581
+
582
+ logger.warning(f"Evaluator schema not found: {evaluator_name}")
583
+ return None
rem/utils/sql_paths.py ADDED
@@ -0,0 +1,146 @@
1
+ """Utilities for resolving SQL file paths.
2
+
3
+ Handles package SQL directory resolution and user migrations.
4
+
5
+ Convention for user migrations:
6
+ Place custom SQL files in `./sql/migrations/` relative to your project root.
7
+ Files should be numbered (e.g., `100_custom_table.sql`) to control execution order.
8
+ Package migrations (001-099) run first, then user migrations (100+).
9
+ """
10
+
11
+ from pathlib import Path
12
+ from typing import List, Optional
13
+ import importlib.resources
14
+
15
+ # Convention: Default location for user-maintained migrations
16
+ USER_SQL_DIR_CONVENTION = "sql"
17
+
18
+
19
+ def get_package_sql_dir() -> Path:
20
+ """Get the SQL directory from the installed rem package.
21
+
22
+ Returns:
23
+ Path to the package's sql directory
24
+
25
+ Raises:
26
+ FileNotFoundError: If the SQL directory cannot be found
27
+ """
28
+ try:
29
+ # Use importlib.resources for Python 3.9+
30
+ sql_ref = importlib.resources.files("rem") / "sql"
31
+ package_sql = Path(str(sql_ref))
32
+ if package_sql.exists():
33
+ return package_sql
34
+ except (AttributeError, TypeError):
35
+ pass
36
+
37
+ # Fallback: use __file__ to find package location
38
+ try:
39
+ import rem
40
+ package_sql = Path(rem.__file__).parent / "sql"
41
+ if package_sql.exists():
42
+ return package_sql
43
+ except (ImportError, AttributeError):
44
+ pass
45
+
46
+ # Development fallback: check relative to cwd
47
+ dev_sql = Path("src/rem/sql")
48
+ if dev_sql.exists():
49
+ return dev_sql
50
+
51
+ raise FileNotFoundError(
52
+ "Could not locate rem SQL directory. "
53
+ "Ensure remdb is properly installed or run from the source directory."
54
+ )
55
+
56
+
57
+ def get_package_migrations_dir() -> Path:
58
+ """Get the migrations directory from the installed rem package.
59
+
60
+ Returns:
61
+ Path to the package's migrations directory
62
+ """
63
+ return get_package_sql_dir() / "migrations"
64
+
65
+
66
+ def get_user_sql_dir() -> Optional[Path]:
67
+ """Get the conventional user SQL directory if it exists.
68
+
69
+ Looks for `./sql/` relative to the current working directory.
70
+ This follows the convention for user-maintained migrations.
71
+
72
+ Returns:
73
+ Path to user sql directory if it exists, None otherwise
74
+ """
75
+ user_sql = Path.cwd() / USER_SQL_DIR_CONVENTION
76
+ if user_sql.exists() and user_sql.is_dir():
77
+ return user_sql
78
+ return None
79
+
80
+
81
+ def list_package_migrations() -> List[Path]:
82
+ """List all migration files in the package.
83
+
84
+ Returns:
85
+ Sorted list of migration file paths
86
+ """
87
+ try:
88
+ migrations_dir = get_package_migrations_dir()
89
+ if migrations_dir.exists():
90
+ return sorted(
91
+ f for f in migrations_dir.glob("*.sql")
92
+ if f.name[0].isdigit() # Only numbered migrations
93
+ )
94
+ except FileNotFoundError:
95
+ pass
96
+
97
+ return []
98
+
99
+
100
+ def list_user_migrations() -> List[Path]:
101
+ """List all migration files in the user's sql/migrations directory.
102
+
103
+ Returns:
104
+ Sorted list of user migration file paths
105
+ """
106
+ user_sql = get_user_sql_dir()
107
+ if user_sql:
108
+ migrations_dir = user_sql / "migrations"
109
+ if migrations_dir.exists():
110
+ return sorted(
111
+ f for f in migrations_dir.glob("*.sql")
112
+ if f.name[0].isdigit() # Only numbered migrations
113
+ )
114
+ return []
115
+
116
+
117
+ def list_all_migrations() -> List[Path]:
118
+ """List all migration files from package and user directories.
119
+
120
+ Collects migrations from:
121
+ 1. Package migrations directory
122
+ 2. User directory (./sql/migrations/) if it exists
123
+
124
+ Files are sorted by name, so use numbered prefixes to control order:
125
+ - 001-099: Reserved for package migrations
126
+ - 100+: Recommended for user migrations
127
+
128
+ Returns:
129
+ Sorted list of all migration file paths (by filename)
130
+ """
131
+ all_migrations = []
132
+ seen_names = set()
133
+
134
+ # Package migrations first
135
+ for f in list_package_migrations():
136
+ if f.name not in seen_names:
137
+ all_migrations.append(f)
138
+ seen_names.add(f.name)
139
+
140
+ # User migrations second
141
+ for f in list_user_migrations():
142
+ if f.name not in seen_names:
143
+ all_migrations.append(f)
144
+ seen_names.add(f.name)
145
+
146
+ return sorted(all_migrations, key=lambda p: p.name)
rem/utils/sql_types.py CHANGED
@@ -16,6 +16,7 @@ Best Practices:
16
16
  - UUID for identifiers in Union types
17
17
  """
18
18
 
19
+ import types
19
20
  from datetime import date, datetime, time
20
21
  from typing import Any, Union, get_args, get_origin
21
22
  from uuid import UUID
@@ -78,8 +79,9 @@ def get_sql_type(field_info: FieldInfo, field_name: str) -> str:
78
79
  return "TEXT"
79
80
 
80
81
  # Handle Union types (including Optional[T] which is Union[T, None])
82
+ # Also handles Python 3.10+ `X | None` syntax which uses types.UnionType
81
83
  origin = get_origin(annotation)
82
- if origin is Union:
84
+ if origin is Union or isinstance(annotation, types.UnionType):
83
85
  args = get_args(annotation)
84
86
  # Filter out NoneType
85
87
  non_none_args = [arg for arg in args if arg is not type(None)]
rem/workers/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Background workers for processing tasks."""
2
2
 
3
+ from .db_listener import DBListener
3
4
  from .sqs_file_processor import SQSFileProcessor
5
+ from .unlogged_maintainer import UnloggedMaintainer
4
6
 
5
- __all__ = ["SQSFileProcessor"]
7
+ __all__ = ["DBListener", "SQSFileProcessor", "UnloggedMaintainer"]