remdb 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (55) hide show
  1. rem/agentic/context.py +28 -24
  2. rem/agentic/mcp/tool_wrapper.py +29 -3
  3. rem/agentic/otel/setup.py +92 -4
  4. rem/agentic/providers/pydantic_ai.py +88 -18
  5. rem/agentic/schema.py +358 -21
  6. rem/agentic/tools/rem_tools.py +3 -3
  7. rem/api/main.py +85 -16
  8. rem/api/mcp_router/resources.py +1 -1
  9. rem/api/mcp_router/server.py +18 -4
  10. rem/api/mcp_router/tools.py +383 -16
  11. rem/api/routers/admin.py +218 -1
  12. rem/api/routers/chat/completions.py +30 -3
  13. rem/api/routers/chat/streaming.py +143 -3
  14. rem/api/routers/feedback.py +12 -319
  15. rem/api/routers/query.py +360 -0
  16. rem/api/routers/shared_sessions.py +13 -13
  17. rem/cli/commands/README.md +237 -64
  18. rem/cli/commands/cluster.py +1300 -0
  19. rem/cli/commands/configure.py +1 -3
  20. rem/cli/commands/db.py +354 -143
  21. rem/cli/commands/process.py +14 -8
  22. rem/cli/commands/schema.py +92 -45
  23. rem/cli/main.py +27 -6
  24. rem/models/core/rem_query.py +5 -2
  25. rem/models/entities/shared_session.py +2 -28
  26. rem/registry.py +10 -4
  27. rem/services/content/service.py +30 -8
  28. rem/services/embeddings/api.py +4 -4
  29. rem/services/embeddings/worker.py +16 -16
  30. rem/services/postgres/README.md +151 -26
  31. rem/services/postgres/__init__.py +2 -1
  32. rem/services/postgres/diff_service.py +531 -0
  33. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  34. rem/services/postgres/schema_generator.py +205 -4
  35. rem/services/postgres/service.py +6 -6
  36. rem/services/rem/parser.py +44 -9
  37. rem/services/rem/service.py +36 -2
  38. rem/services/session/reload.py +1 -1
  39. rem/settings.py +56 -7
  40. rem/sql/background_indexes.sql +19 -24
  41. rem/sql/migrations/001_install.sql +252 -69
  42. rem/sql/migrations/002_install_models.sql +2171 -593
  43. rem/sql/migrations/003_optional_extensions.sql +326 -0
  44. rem/sql/migrations/004_cache_system.sql +548 -0
  45. rem/utils/__init__.py +18 -0
  46. rem/utils/date_utils.py +2 -2
  47. rem/utils/schema_loader.py +17 -13
  48. rem/utils/sql_paths.py +146 -0
  49. rem/workers/__init__.py +2 -1
  50. rem/workers/unlogged_maintainer.py +463 -0
  51. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/METADATA +149 -76
  52. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/RECORD +54 -48
  53. rem/sql/migrations/003_seed_default_user.sql +0 -48
  54. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/WHEEL +0 -0
  55. {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
@@ -12,6 +12,7 @@ Output includes:
12
12
  - KV_STORE triggers
13
13
  - Indexes (foreground and background)
14
14
  - Migrations
15
+ - Schema table entries (for agent-like table access)
15
16
 
16
17
  Usage:
17
18
  from rem.services.postgres.schema_generator import SchemaGenerator
@@ -30,14 +31,192 @@ Usage:
30
31
 
31
32
  import importlib.util
32
33
  import inspect
34
+ import json
35
+ import uuid
33
36
  from pathlib import Path
34
- from typing import Type
37
+ from typing import Any, Type
35
38
 
36
39
  from loguru import logger
37
40
  from pydantic import BaseModel
38
41
 
39
42
  from ...settings import settings
40
- from .register_type import register_type
43
+ from ...utils.sql_paths import get_package_sql_dir
44
+ from .register_type import register_type, should_embed_field
45
+
46
+ # Namespace UUID for generating deterministic UUIDs from model names
47
+ # Using UUID5 with this namespace ensures same model always gets same UUID
48
+ REM_SCHEMA_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8") # DNS namespace
49
+
50
+
51
+ def generate_model_uuid(fully_qualified_name: str) -> uuid.UUID:
52
+ """
53
+ Generate deterministic UUID from fully qualified model name.
54
+
55
+ Uses UUID5 (SHA-1 hash) with REM namespace for reproducibility.
56
+ Same fully qualified name always produces same UUID.
57
+
58
+ Args:
59
+ fully_qualified_name: Full module path, e.g., "rem.models.entities.Resource"
60
+
61
+ Returns:
62
+ Deterministic UUID for this model
63
+ """
64
+ return uuid.uuid5(REM_SCHEMA_NAMESPACE, fully_qualified_name)
65
+
66
+
67
+ def extract_model_schema_metadata(
68
+ model: Type[BaseModel],
69
+ table_name: str,
70
+ entity_key_field: str,
71
+ include_search_tool: bool = True,
72
+ ) -> dict[str, Any]:
73
+ """
74
+ Extract schema metadata from a Pydantic model for schemas table.
75
+
76
+ Args:
77
+ model: Pydantic model class
78
+ table_name: Database table name
79
+ entity_key_field: Field used as entity key in kv_store
80
+ include_search_tool: If True, add search_rem tool for querying this table
81
+
82
+ Returns:
83
+ Dict with schema metadata ready for schemas table insert
84
+ """
85
+ # Get fully qualified name
86
+ fqn = f"{model.__module__}.{model.__name__}"
87
+
88
+ # Generate deterministic UUID
89
+ schema_id = generate_model_uuid(fqn)
90
+
91
+ # Get JSON schema from Pydantic
92
+ json_schema = model.model_json_schema()
93
+
94
+ # Find embedding fields
95
+ embedding_fields = []
96
+ for field_name, field_info in model.model_fields.items():
97
+ if should_embed_field(field_name, field_info):
98
+ embedding_fields.append(field_name)
99
+
100
+ # Build description with search capability note
101
+ base_description = model.__doc__ or f"Schema for {model.__name__}"
102
+ search_note = (
103
+ f"\n\nThis agent can search the `{table_name}` table using the `search_rem` tool. "
104
+ f"Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, "
105
+ f"SEARCH for semantic similarity, or SQL for complex queries."
106
+ ) if include_search_tool else ""
107
+
108
+ # Build spec with table metadata and tools
109
+ # Note: default_search_table is used by create_agent to append a description
110
+ # suffix to the search_rem tool when loading it dynamically
111
+ has_embeddings = bool(embedding_fields)
112
+
113
+ spec = {
114
+ "type": "object",
115
+ "description": base_description + search_note,
116
+ "properties": json_schema.get("properties", {}),
117
+ "required": json_schema.get("required", []),
118
+ "json_schema_extra": {
119
+ "table_name": table_name,
120
+ "entity_key_field": entity_key_field,
121
+ "embedding_fields": embedding_fields,
122
+ "fully_qualified_name": fqn,
123
+ "tools": ["search_rem"] if include_search_tool else [],
124
+ "default_search_table": table_name,
125
+ "has_embeddings": has_embeddings,
126
+ },
127
+ }
128
+
129
+ # Build content (documentation)
130
+ content = f"""# {model.__name__}
131
+
132
+ {base_description}
133
+
134
+ ## Overview
135
+
136
+ The `{model.__name__}` entity is stored in the `{table_name}` table. Each record is uniquely
137
+ identified by its `{entity_key_field}` field for lookups and graph traversal.
138
+
139
+ ## Search Capabilities
140
+
141
+ This schema includes the `search_rem` tool which supports:
142
+ - **LOOKUP**: O(1) exact match by {entity_key_field} (e.g., `LOOKUP "entity-name"`)
143
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
144
+ - **SEARCH**: Semantic vector search on {', '.join(embedding_fields) if embedding_fields else 'content'} (e.g., `SEARCH "concept" FROM {table_name} LIMIT 10`)
145
+ - **SQL**: Complex queries (e.g., `SELECT * FROM {table_name} WHERE ...`)
146
+
147
+ ## Table Info
148
+
149
+ | Property | Value |
150
+ |----------|-------|
151
+ | Table | `{table_name}` |
152
+ | Entity Key | `{entity_key_field}` |
153
+ | Embedding Fields | {', '.join(f'`{f}`' for f in embedding_fields) if embedding_fields else 'None'} |
154
+ | Tools | {', '.join(['`search_rem`'] if include_search_tool else ['None'])} |
155
+
156
+ ## Fields
157
+
158
+ """
159
+ for field_name, field_info in model.model_fields.items():
160
+ field_type = str(field_info.annotation) if field_info.annotation else "Any"
161
+ field_desc = field_info.description or ""
162
+ required = "Required" if field_info.is_required() else "Optional"
163
+ content += f"### `{field_name}`\n"
164
+ content += f"- **Type**: `{field_type}`\n"
165
+ content += f"- **{required}**\n"
166
+ if field_desc:
167
+ content += f"- {field_desc}\n"
168
+ content += "\n"
169
+
170
+ return {
171
+ "id": str(schema_id),
172
+ "name": model.__name__,
173
+ "table_name": table_name,
174
+ "entity_key_field": entity_key_field,
175
+ "embedding_fields": embedding_fields,
176
+ "fqn": fqn,
177
+ "spec": spec,
178
+ "content": content,
179
+ "category": "entity",
180
+ }
181
+
182
+
183
+ def generate_schema_upsert_sql(schema_metadata: dict[str, Any]) -> str:
184
+ """
185
+ Generate SQL UPSERT statement for schemas table.
186
+
187
+ Uses ON CONFLICT DO UPDATE for idempotency.
188
+
189
+ Args:
190
+ schema_metadata: Dict from extract_model_schema_metadata()
191
+
192
+ Returns:
193
+ SQL INSERT ... ON CONFLICT statement
194
+ """
195
+ # Escape single quotes in content and spec
196
+ content_escaped = schema_metadata["content"].replace("'", "''")
197
+ spec_json = json.dumps(schema_metadata["spec"]).replace("'", "''")
198
+
199
+ sql = f"""
200
+ -- Schema entry for {schema_metadata['name']} ({schema_metadata['table_name']})
201
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
202
+ VALUES (
203
+ '{schema_metadata['id']}'::uuid,
204
+ 'system',
205
+ '{schema_metadata['name']}',
206
+ '{content_escaped}',
207
+ '{spec_json}'::jsonb,
208
+ 'entity',
209
+ '{{"table_name": "{schema_metadata['table_name']}", "entity_key_field": "{schema_metadata['entity_key_field']}", "embedding_fields": {json.dumps(schema_metadata['embedding_fields'])}, "fqn": "{schema_metadata['fqn']}"}}'::jsonb
210
+ )
211
+ ON CONFLICT (id) DO UPDATE SET
212
+ name = EXCLUDED.name,
213
+ content = EXCLUDED.content,
214
+ spec = EXCLUDED.spec,
215
+ category = EXCLUDED.category,
216
+ metadata = EXCLUDED.metadata,
217
+ updated_at = CURRENT_TIMESTAMP;
218
+ """
219
+ return sql.strip()
41
220
 
42
221
 
43
222
  class SchemaGenerator:
@@ -56,9 +235,9 @@ class SchemaGenerator:
56
235
  Initialize schema generator.
57
236
 
58
237
  Args:
59
- output_dir: Optional directory for output files (defaults to settings.sql_dir)
238
+ output_dir: Optional directory for output files (defaults to package sql dir)
60
239
  """
61
- self.output_dir = output_dir or Path(settings.sql_dir)
240
+ self.output_dir = output_dir or get_package_sql_dir()
62
241
  self.schemas: dict[str, dict] = {}
63
242
 
64
243
  def discover_models(self, directory: str | Path) -> dict[str, Type[BaseModel]]:
@@ -234,6 +413,14 @@ class SchemaGenerator:
234
413
  create_kv_trigger=True,
235
414
  )
236
415
 
416
+ # Extract schema metadata for schemas table entry
417
+ schema_metadata = extract_model_schema_metadata(
418
+ model=model,
419
+ table_name=table_name,
420
+ entity_key_field=entity_key_field,
421
+ )
422
+ schema["schema_metadata"] = schema_metadata
423
+
237
424
  self.schemas[table_name] = schema
238
425
  return schema
239
426
 
@@ -343,6 +530,7 @@ class SchemaGenerator:
343
530
  "-- 2. Embeddings tables (embeddings_<table>)",
344
531
  "-- 3. KV_STORE triggers for cache maintenance",
345
532
  "-- 4. Indexes (foreground only, background indexes separate)",
533
+ "-- 5. Schema table entries (for agent-like table access)",
346
534
  "",
347
535
  "-- ============================================================================",
348
536
  "-- PREREQUISITES CHECK",
@@ -388,6 +576,19 @@ class SchemaGenerator:
388
576
  sql_parts.append(schema["sql"]["kv_trigger"])
389
577
  sql_parts.append("")
390
578
 
579
+ # Add schema table entries (every entity table is also an "agent")
580
+ sql_parts.append("-- ============================================================================")
581
+ sql_parts.append("-- SCHEMA TABLE ENTRIES")
582
+ sql_parts.append("-- Every entity table gets a schemas entry for agent-like access")
583
+ sql_parts.append("-- ============================================================================")
584
+ sql_parts.append("")
585
+
586
+ for table_name, schema in self.schemas.items():
587
+ if "schema_metadata" in schema:
588
+ schema_upsert = generate_schema_upsert_sql(schema["schema_metadata"])
589
+ sql_parts.append(schema_upsert)
590
+ sql_parts.append("")
591
+
391
592
  # Add migration record
392
593
  sql_parts.append("-- ============================================================================")
393
594
  sql_parts.append("-- RECORD MIGRATION")
@@ -190,19 +190,19 @@ class PostgresService:
190
190
 
191
191
  async def connect(self) -> None:
192
192
  """Establish database connection pool."""
193
- logger.info(f"Connecting to PostgreSQL with pool size {self.pool_size}")
193
+ logger.debug(f"Connecting to PostgreSQL with pool size {self.pool_size}")
194
194
  self.pool = await asyncpg.create_pool(
195
195
  self.connection_string,
196
196
  min_size=1,
197
197
  max_size=self.pool_size,
198
198
  init=self._init_connection, # Configure JSONB codec on each connection
199
199
  )
200
- logger.info("PostgreSQL connection pool established")
200
+ logger.debug("PostgreSQL connection pool established")
201
201
 
202
202
  # Start embedding worker if available
203
203
  if self.embedding_worker and hasattr(self.embedding_worker, "start"):
204
204
  await self.embedding_worker.start()
205
- logger.info("Embedding worker started")
205
+ logger.debug("Embedding worker started")
206
206
 
207
207
  async def disconnect(self) -> None:
208
208
  """Close database connection pool."""
@@ -211,10 +211,10 @@ class PostgresService:
211
211
  # The worker will be stopped explicitly when the application shuts down
212
212
 
213
213
  if self.pool:
214
- logger.info("Closing PostgreSQL connection pool")
214
+ logger.debug("Closing PostgreSQL connection pool")
215
215
  await self.pool.close()
216
216
  self.pool = None
217
- logger.info("PostgreSQL connection pool closed")
217
+ logger.debug("PostgreSQL connection pool closed")
218
218
 
219
219
  async def execute(
220
220
  self,
@@ -631,7 +631,7 @@ class PostgresService:
631
631
  table_name: str,
632
632
  embedding: list[float],
633
633
  limit: int = 10,
634
- min_similarity: float = 0.7,
634
+ min_similarity: float = 0.3,
635
635
  tenant_id: Optional[str] = None,
636
636
  ) -> list[dict[str, Any]]:
637
637
  """
@@ -50,9 +50,36 @@ class RemQueryParser:
50
50
  params: Dict[str, Any] = {}
51
51
  positional_args: List[str] = []
52
52
 
53
- # Process remaining tokens
54
- for token in tokens[1:]:
55
- if "=" in token:
53
+ # For SQL queries, preserve the raw query (keywords like LIMIT are SQL keywords)
54
+ if query_type == QueryType.SQL:
55
+ # Everything after "SQL" is the raw SQL query
56
+ raw_sql = query_string[3:].strip() # Skip "SQL" prefix
57
+ params["raw_query"] = raw_sql
58
+ return query_type, params
59
+
60
+ # Process remaining tokens, handling REM keywords
61
+ i = 1
62
+ while i < len(tokens):
63
+ token = tokens[i]
64
+ token_upper = token.upper()
65
+
66
+ # Handle REM keywords that take a value
67
+ if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
68
+ if i + 1 < len(tokens):
69
+ keyword_map = {
70
+ "LIMIT": "limit",
71
+ "DEPTH": "max_depth",
72
+ "THRESHOLD": "threshold",
73
+ "TYPE": "edge_types",
74
+ "FROM": "initial_query",
75
+ "WITH": "initial_query",
76
+ }
77
+ key = keyword_map[token_upper]
78
+ value = tokens[i + 1]
79
+ params[key] = self._convert_value(key, value)
80
+ i += 2
81
+ continue
82
+ elif "=" in token:
56
83
  # It's a keyword argument
57
84
  key, value = token.split("=", 1)
58
85
  # Handle parameter aliases
@@ -61,6 +88,7 @@ class RemQueryParser:
61
88
  else:
62
89
  # It's a positional argument part
63
90
  positional_args.append(token)
91
+ i += 1
64
92
 
65
93
  # Map positional arguments to specific fields based on QueryType
66
94
  self._map_positional_args(query_type, positional_args, params)
@@ -133,13 +161,20 @@ class RemQueryParser:
133
161
  params["query_text"] = combined_value
134
162
 
135
163
  elif query_type == QueryType.SEARCH:
136
- params["query_text"] = combined_value
164
+ # SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
165
+ # First positional arg is table name, rest is query text
166
+ if len(positional_args) >= 2:
167
+ params["table_name"] = positional_args[0]
168
+ params["query_text"] = " ".join(positional_args[1:])
169
+ elif len(positional_args) == 1:
170
+ # Could be table name or query text - assume query text if no table
171
+ params["query_text"] = positional_args[0]
172
+ # If no positional args, params stays empty
137
173
 
138
174
  elif query_type == QueryType.TRAVERSE:
139
175
  params["initial_query"] = combined_value
140
176
 
141
- # SQL typically requires named arguments (table=...), but if we supported
142
- # SQL SELECT * FROM ..., we might handle it differently.
143
- # For now, RemService expects table=...
144
- # If there are positional args for SQL, we might ignore or raise,
145
- # but current service doesn't use them.
177
+ elif query_type == QueryType.SQL:
178
+ # SQL with positional args means "SQL SELECT * FROM ..." form
179
+ # Treat the combined positional args as the raw SQL query
180
+ params["raw_query"] = combined_value
@@ -13,6 +13,31 @@ Design:
13
13
  - All queries pushed down to Postgres for performance
14
14
  - Model schema inspection for validation only
15
15
  - Exceptions for missing fields/embeddings
16
+
17
+ TODO: Staged Plan Execution
18
+ - Implement execute_staged_plan() method for multi-stage query execution
19
+ - Each stage can be:
20
+ 1. Static query (query field): Execute REM dialect directly
21
+ 2. Dynamic query (intent field): LLM interprets intent + previous results to build query
22
+ - Flow for dynamic stages:
23
+ 1. Gather results from depends_on stages (from previous_results or current execution)
24
+ 2. Pass intent + previous results to LLM (like ask_rem but with context)
25
+ 3. LLM generates REM query based on what it learned from previous stages
26
+ 4. Execute generated query
27
+ 5. Store results in stage_results for client to use in continuation
28
+ - Multi-turn continuation:
29
+ - Client passes previous_results back from response's stage_results
30
+ - Client sets resume_from_stage to skip already-executed stages
31
+ - Server uses previous_results as context for depends_on lookups
32
+ - Use cases:
33
+ - LOOKUP "Sarah" → intent: "find her team members" (LLM sees Sarah's graph_edges, builds TRAVERSE)
34
+ - SEARCH "API docs" → intent: "get authors" (LLM extracts author refs, builds LOOKUP)
35
+ - Complex graph exploration with LLM-driven navigation
36
+ - API: POST /api/v1/query with:
37
+ - mode="staged-plan"
38
+ - plan=[{stage, query|intent, name, depends_on}]
39
+ - previous_results=[{stage, name, query_executed, results, count}] (for continuation)
40
+ - resume_from_stage=N (to skip completed stages)
16
41
  """
17
42
 
18
43
  from typing import Any
@@ -309,17 +334,26 @@ class RemService:
309
334
  )
310
335
 
311
336
  # Execute vector search via rem_search() PostgreSQL function
337
+ min_sim = params.min_similarity if params.min_similarity is not None else 0.3
338
+ limit = params.limit or 10
312
339
  query_params = get_search_params(
313
340
  query_embedding,
314
341
  table_name,
315
342
  field_name,
316
343
  tenant_id,
317
344
  provider,
318
- params.min_similarity or 0.7,
319
- params.limit or 10,
345
+ min_sim,
346
+ limit,
320
347
  tenant_id, # Use tenant_id (query.user_id) as user_id
321
348
  )
349
+ logger.debug(
350
+ f"SEARCH params: table={table_name}, field={field_name}, "
351
+ f"tenant_id={tenant_id}, provider={provider}, "
352
+ f"min_similarity={min_sim}, limit={limit}, "
353
+ f"embedding_dims={len(query_embedding)}"
354
+ )
322
355
  results = await self.db.execute(SEARCH_QUERY, query_params)
356
+ logger.debug(f"SEARCH results: {len(results)} rows")
323
357
 
324
358
  return {
325
359
  "query_type": "SEARCH",
@@ -65,7 +65,7 @@ async def reload_session(
65
65
  session_id=session_id, user_id=user_id, decompress=decompress_messages
66
66
  )
67
67
 
68
- logger.info(
68
+ logger.debug(
69
69
  f"Reloaded {len(messages)} messages for session {session_id} "
70
70
  f"(decompressed={decompress_messages})"
71
71
  )
rem/settings.py CHANGED
@@ -58,7 +58,7 @@ Example .env file:
58
58
 
59
59
  import os
60
60
  import hashlib
61
- from pydantic import Field, field_validator, FieldValidationInfo
61
+ from pydantic import Field, field_validator, ValidationInfo
62
62
  from pydantic_settings import BaseSettings, SettingsConfigDict
63
63
  from loguru import logger
64
64
 
@@ -414,7 +414,7 @@ class AuthSettings(BaseSettings):
414
414
 
415
415
  @field_validator("session_secret", mode="before")
416
416
  @classmethod
417
- def generate_dev_secret(cls, v: str | None, info: FieldValidationInfo) -> str:
417
+ def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:
418
418
  # Only generate if not already set and not in production
419
419
  if not v and info.data.get("environment") != "production":
420
420
  # Deterministic secret for development
@@ -1004,6 +1004,59 @@ class APISettings(BaseSettings):
1004
1004
  )
1005
1005
 
1006
1006
 
1007
+ class ModelsSettings(BaseSettings):
1008
+ """
1009
+ Custom model registration settings for downstream applications.
1010
+
1011
+ Allows downstream apps to specify Python modules containing custom models
1012
+ that should be imported (and thus registered) before schema generation.
1013
+
1014
+ This enables `rem db schema generate` to discover models registered with
1015
+ `@rem.register_model` in downstream applications.
1016
+
1017
+ Environment variables:
1018
+ MODELS__IMPORT_MODULES - Semicolon-separated list of Python modules to import
1019
+ Example: "models;myapp.entities;myapp.custom_models"
1020
+
1021
+ Example:
1022
+ # In downstream app's .env
1023
+ MODELS__IMPORT_MODULES=models
1024
+
1025
+ # In downstream app's models/__init__.py
1026
+ import rem
1027
+ from rem.models.core import CoreModel
1028
+
1029
+ @rem.register_model
1030
+ class MyCustomEntity(CoreModel):
1031
+ name: str
1032
+
1033
+ # Then run schema generation
1034
+ rem db schema generate # Includes MyCustomEntity
1035
+ """
1036
+
1037
+ model_config = SettingsConfigDict(
1038
+ env_prefix="MODELS__",
1039
+ extra="ignore",
1040
+ )
1041
+
1042
+ import_modules: str = Field(
1043
+ default="",
1044
+ description=(
1045
+ "Semicolon-separated list of Python modules to import for model registration. "
1046
+ "These modules are imported before schema generation to ensure custom models "
1047
+ "decorated with @rem.register_model are discovered. "
1048
+ "Example: 'models;myapp.entities'"
1049
+ ),
1050
+ )
1051
+
1052
+ @property
1053
+ def module_list(self) -> list[str]:
1054
+ """Get modules as a list, filtering empty strings."""
1055
+ if not self.import_modules:
1056
+ return []
1057
+ return [m.strip() for m in self.import_modules.split(";") if m.strip()]
1058
+
1059
+
1007
1060
  class SchemaSettings(BaseSettings):
1008
1061
  """
1009
1062
  Schema search path settings for agent and evaluator schemas.
@@ -1281,16 +1334,12 @@ class Settings(BaseSettings):
1281
1334
  description="Root path for reverse proxy (e.g., /rem for ALB routing)",
1282
1335
  )
1283
1336
 
1284
- sql_dir: str = Field(
1285
- default="src/rem/sql",
1286
- description="Directory for SQL files and migrations",
1287
- )
1288
-
1289
1337
  # Nested settings groups
1290
1338
  api: APISettings = Field(default_factory=APISettings)
1291
1339
  chat: ChatSettings = Field(default_factory=ChatSettings)
1292
1340
  llm: LLMSettings = Field(default_factory=LLMSettings)
1293
1341
  mcp: MCPSettings = Field(default_factory=MCPSettings)
1342
+ models: ModelsSettings = Field(default_factory=ModelsSettings)
1294
1343
  otel: OTELSettings = Field(default_factory=OTELSettings)
1295
1344
  phoenix: PhoenixSettings = Field(default_factory=PhoenixSettings)
1296
1345
  auth: AuthSettings = Field(default_factory=AuthSettings)
@@ -1,9 +1,9 @@
1
1
  -- Background index creation
2
2
  -- Run AFTER initial data load to avoid blocking writes
3
3
 
4
- -- HNSW vector index for embeddings_users
5
- CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_users_vector_hnsw
6
- ON embeddings_users
4
+ -- HNSW vector index for embeddings_files
5
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_files_vector_hnsw
6
+ ON embeddings_files
7
7
  USING hnsw (embedding vector_cosine_ops);
8
8
 
9
9
  -- HNSW vector index for embeddings_image_resources
@@ -11,29 +11,14 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_image_resources_vector_hn
11
11
  ON embeddings_image_resources
12
12
  USING hnsw (embedding vector_cosine_ops);
13
13
 
14
- -- HNSW vector index for embeddings_moments
15
- CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
16
- ON embeddings_moments
17
- USING hnsw (embedding vector_cosine_ops);
18
-
19
- -- HNSW vector index for embeddings_sessions
20
- CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
21
- ON embeddings_sessions
22
- USING hnsw (embedding vector_cosine_ops);
23
-
24
- -- HNSW vector index for embeddings_resources
25
- CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
26
- ON embeddings_resources
27
- USING hnsw (embedding vector_cosine_ops);
28
-
29
14
  -- HNSW vector index for embeddings_messages
30
15
  CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_messages_vector_hnsw
31
16
  ON embeddings_messages
32
17
  USING hnsw (embedding vector_cosine_ops);
33
18
 
34
- -- HNSW vector index for embeddings_files
35
- CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_files_vector_hnsw
36
- ON embeddings_files
19
+ -- HNSW vector index for embeddings_moments
20
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
21
+ ON embeddings_moments
37
22
  USING hnsw (embedding vector_cosine_ops);
38
23
 
39
24
  -- HNSW vector index for embeddings_ontology_configs
@@ -41,12 +26,22 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_ontology_configs_vector_h
41
26
  ON embeddings_ontology_configs
42
27
  USING hnsw (embedding vector_cosine_ops);
43
28
 
44
- -- HNSW vector index for embeddings_domain_resources
45
- CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_domain_resources_vector_hnsw
46
- ON embeddings_domain_resources
29
+ -- HNSW vector index for embeddings_resources
30
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
31
+ ON embeddings_resources
47
32
  USING hnsw (embedding vector_cosine_ops);
48
33
 
49
34
  -- HNSW vector index for embeddings_schemas
50
35
  CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_schemas_vector_hnsw
51
36
  ON embeddings_schemas
52
37
  USING hnsw (embedding vector_cosine_ops);
38
+
39
+ -- HNSW vector index for embeddings_sessions
40
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
41
+ ON embeddings_sessions
42
+ USING hnsw (embedding vector_cosine_ops);
43
+
44
+ -- HNSW vector index for embeddings_users
45
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_users_vector_hnsw
46
+ ON embeddings_users
47
+ USING hnsw (embedding vector_cosine_ops);