remdb 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/context.py +28 -24
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/pydantic_ai.py +88 -18
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/main.py +85 -16
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +18 -4
- rem/api/mcp_router/tools.py +383 -16
- rem/api/routers/admin.py +218 -1
- rem/api/routers/chat/completions.py +30 -3
- rem/api/routers/chat/streaming.py +143 -3
- rem/api/routers/feedback.py +12 -319
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +13 -13
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1300 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +354 -143
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +92 -45
- rem/cli/main.py +27 -6
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/shared_session.py +2 -28
- rem/registry.py +10 -4
- rem/services/content/service.py +30 -8
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/postgres/README.md +151 -26
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/reload.py +1 -1
- rem/settings.py +56 -7
- rem/sql/background_indexes.sql +19 -24
- rem/sql/migrations/001_install.sql +252 -69
- rem/sql/migrations/002_install_models.sql +2171 -593
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/schema_loader.py +17 -13
- rem/utils/sql_paths.py +146 -0
- rem/workers/__init__.py +2 -1
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/METADATA +149 -76
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/RECORD +54 -48
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/WHEEL +0 -0
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
|
@@ -12,6 +12,7 @@ Output includes:
|
|
|
12
12
|
- KV_STORE triggers
|
|
13
13
|
- Indexes (foreground and background)
|
|
14
14
|
- Migrations
|
|
15
|
+
- Schema table entries (for agent-like table access)
|
|
15
16
|
|
|
16
17
|
Usage:
|
|
17
18
|
from rem.services.postgres.schema_generator import SchemaGenerator
|
|
@@ -30,14 +31,192 @@ Usage:
|
|
|
30
31
|
|
|
31
32
|
import importlib.util
|
|
32
33
|
import inspect
|
|
34
|
+
import json
|
|
35
|
+
import uuid
|
|
33
36
|
from pathlib import Path
|
|
34
|
-
from typing import Type
|
|
37
|
+
from typing import Any, Type
|
|
35
38
|
|
|
36
39
|
from loguru import logger
|
|
37
40
|
from pydantic import BaseModel
|
|
38
41
|
|
|
39
42
|
from ...settings import settings
|
|
40
|
-
from .
|
|
43
|
+
from ...utils.sql_paths import get_package_sql_dir
|
|
44
|
+
from .register_type import register_type, should_embed_field
|
|
45
|
+
|
|
46
|
+
# Namespace UUID for generating deterministic UUIDs from model names
|
|
47
|
+
# Using UUID5 with this namespace ensures same model always gets same UUID
|
|
48
|
+
REM_SCHEMA_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8") # DNS namespace
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def generate_model_uuid(fully_qualified_name: str) -> uuid.UUID:
|
|
52
|
+
"""
|
|
53
|
+
Generate deterministic UUID from fully qualified model name.
|
|
54
|
+
|
|
55
|
+
Uses UUID5 (SHA-1 hash) with REM namespace for reproducibility.
|
|
56
|
+
Same fully qualified name always produces same UUID.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
fully_qualified_name: Full module path, e.g., "rem.models.entities.Resource"
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Deterministic UUID for this model
|
|
63
|
+
"""
|
|
64
|
+
return uuid.uuid5(REM_SCHEMA_NAMESPACE, fully_qualified_name)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def extract_model_schema_metadata(
|
|
68
|
+
model: Type[BaseModel],
|
|
69
|
+
table_name: str,
|
|
70
|
+
entity_key_field: str,
|
|
71
|
+
include_search_tool: bool = True,
|
|
72
|
+
) -> dict[str, Any]:
|
|
73
|
+
"""
|
|
74
|
+
Extract schema metadata from a Pydantic model for schemas table.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
model: Pydantic model class
|
|
78
|
+
table_name: Database table name
|
|
79
|
+
entity_key_field: Field used as entity key in kv_store
|
|
80
|
+
include_search_tool: If True, add search_rem tool for querying this table
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Dict with schema metadata ready for schemas table insert
|
|
84
|
+
"""
|
|
85
|
+
# Get fully qualified name
|
|
86
|
+
fqn = f"{model.__module__}.{model.__name__}"
|
|
87
|
+
|
|
88
|
+
# Generate deterministic UUID
|
|
89
|
+
schema_id = generate_model_uuid(fqn)
|
|
90
|
+
|
|
91
|
+
# Get JSON schema from Pydantic
|
|
92
|
+
json_schema = model.model_json_schema()
|
|
93
|
+
|
|
94
|
+
# Find embedding fields
|
|
95
|
+
embedding_fields = []
|
|
96
|
+
for field_name, field_info in model.model_fields.items():
|
|
97
|
+
if should_embed_field(field_name, field_info):
|
|
98
|
+
embedding_fields.append(field_name)
|
|
99
|
+
|
|
100
|
+
# Build description with search capability note
|
|
101
|
+
base_description = model.__doc__ or f"Schema for {model.__name__}"
|
|
102
|
+
search_note = (
|
|
103
|
+
f"\n\nThis agent can search the `{table_name}` table using the `search_rem` tool. "
|
|
104
|
+
f"Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, "
|
|
105
|
+
f"SEARCH for semantic similarity, or SQL for complex queries."
|
|
106
|
+
) if include_search_tool else ""
|
|
107
|
+
|
|
108
|
+
# Build spec with table metadata and tools
|
|
109
|
+
# Note: default_search_table is used by create_agent to append a description
|
|
110
|
+
# suffix to the search_rem tool when loading it dynamically
|
|
111
|
+
has_embeddings = bool(embedding_fields)
|
|
112
|
+
|
|
113
|
+
spec = {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"description": base_description + search_note,
|
|
116
|
+
"properties": json_schema.get("properties", {}),
|
|
117
|
+
"required": json_schema.get("required", []),
|
|
118
|
+
"json_schema_extra": {
|
|
119
|
+
"table_name": table_name,
|
|
120
|
+
"entity_key_field": entity_key_field,
|
|
121
|
+
"embedding_fields": embedding_fields,
|
|
122
|
+
"fully_qualified_name": fqn,
|
|
123
|
+
"tools": ["search_rem"] if include_search_tool else [],
|
|
124
|
+
"default_search_table": table_name,
|
|
125
|
+
"has_embeddings": has_embeddings,
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Build content (documentation)
|
|
130
|
+
content = f"""# {model.__name__}
|
|
131
|
+
|
|
132
|
+
{base_description}
|
|
133
|
+
|
|
134
|
+
## Overview
|
|
135
|
+
|
|
136
|
+
The `{model.__name__}` entity is stored in the `{table_name}` table. Each record is uniquely
|
|
137
|
+
identified by its `{entity_key_field}` field for lookups and graph traversal.
|
|
138
|
+
|
|
139
|
+
## Search Capabilities
|
|
140
|
+
|
|
141
|
+
This schema includes the `search_rem` tool which supports:
|
|
142
|
+
- **LOOKUP**: O(1) exact match by {entity_key_field} (e.g., `LOOKUP "entity-name"`)
|
|
143
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
144
|
+
- **SEARCH**: Semantic vector search on {', '.join(embedding_fields) if embedding_fields else 'content'} (e.g., `SEARCH "concept" FROM {table_name} LIMIT 10`)
|
|
145
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM {table_name} WHERE ...`)
|
|
146
|
+
|
|
147
|
+
## Table Info
|
|
148
|
+
|
|
149
|
+
| Property | Value |
|
|
150
|
+
|----------|-------|
|
|
151
|
+
| Table | `{table_name}` |
|
|
152
|
+
| Entity Key | `{entity_key_field}` |
|
|
153
|
+
| Embedding Fields | {', '.join(f'`{f}`' for f in embedding_fields) if embedding_fields else 'None'} |
|
|
154
|
+
| Tools | {', '.join(['`search_rem`'] if include_search_tool else ['None'])} |
|
|
155
|
+
|
|
156
|
+
## Fields
|
|
157
|
+
|
|
158
|
+
"""
|
|
159
|
+
for field_name, field_info in model.model_fields.items():
|
|
160
|
+
field_type = str(field_info.annotation) if field_info.annotation else "Any"
|
|
161
|
+
field_desc = field_info.description or ""
|
|
162
|
+
required = "Required" if field_info.is_required() else "Optional"
|
|
163
|
+
content += f"### `{field_name}`\n"
|
|
164
|
+
content += f"- **Type**: `{field_type}`\n"
|
|
165
|
+
content += f"- **{required}**\n"
|
|
166
|
+
if field_desc:
|
|
167
|
+
content += f"- {field_desc}\n"
|
|
168
|
+
content += "\n"
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"id": str(schema_id),
|
|
172
|
+
"name": model.__name__,
|
|
173
|
+
"table_name": table_name,
|
|
174
|
+
"entity_key_field": entity_key_field,
|
|
175
|
+
"embedding_fields": embedding_fields,
|
|
176
|
+
"fqn": fqn,
|
|
177
|
+
"spec": spec,
|
|
178
|
+
"content": content,
|
|
179
|
+
"category": "entity",
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def generate_schema_upsert_sql(schema_metadata: dict[str, Any]) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Generate SQL UPSERT statement for schemas table.
|
|
186
|
+
|
|
187
|
+
Uses ON CONFLICT DO UPDATE for idempotency.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
schema_metadata: Dict from extract_model_schema_metadata()
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
SQL INSERT ... ON CONFLICT statement
|
|
194
|
+
"""
|
|
195
|
+
# Escape single quotes in content and spec
|
|
196
|
+
content_escaped = schema_metadata["content"].replace("'", "''")
|
|
197
|
+
spec_json = json.dumps(schema_metadata["spec"]).replace("'", "''")
|
|
198
|
+
|
|
199
|
+
sql = f"""
|
|
200
|
+
-- Schema entry for {schema_metadata['name']} ({schema_metadata['table_name']})
|
|
201
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
202
|
+
VALUES (
|
|
203
|
+
'{schema_metadata['id']}'::uuid,
|
|
204
|
+
'system',
|
|
205
|
+
'{schema_metadata['name']}',
|
|
206
|
+
'{content_escaped}',
|
|
207
|
+
'{spec_json}'::jsonb,
|
|
208
|
+
'entity',
|
|
209
|
+
'{{"table_name": "{schema_metadata['table_name']}", "entity_key_field": "{schema_metadata['entity_key_field']}", "embedding_fields": {json.dumps(schema_metadata['embedding_fields'])}, "fqn": "{schema_metadata['fqn']}"}}'::jsonb
|
|
210
|
+
)
|
|
211
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
212
|
+
name = EXCLUDED.name,
|
|
213
|
+
content = EXCLUDED.content,
|
|
214
|
+
spec = EXCLUDED.spec,
|
|
215
|
+
category = EXCLUDED.category,
|
|
216
|
+
metadata = EXCLUDED.metadata,
|
|
217
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
218
|
+
"""
|
|
219
|
+
return sql.strip()
|
|
41
220
|
|
|
42
221
|
|
|
43
222
|
class SchemaGenerator:
|
|
@@ -56,9 +235,9 @@ class SchemaGenerator:
|
|
|
56
235
|
Initialize schema generator.
|
|
57
236
|
|
|
58
237
|
Args:
|
|
59
|
-
output_dir: Optional directory for output files (defaults to
|
|
238
|
+
output_dir: Optional directory for output files (defaults to package sql dir)
|
|
60
239
|
"""
|
|
61
|
-
self.output_dir = output_dir or
|
|
240
|
+
self.output_dir = output_dir or get_package_sql_dir()
|
|
62
241
|
self.schemas: dict[str, dict] = {}
|
|
63
242
|
|
|
64
243
|
def discover_models(self, directory: str | Path) -> dict[str, Type[BaseModel]]:
|
|
@@ -234,6 +413,14 @@ class SchemaGenerator:
|
|
|
234
413
|
create_kv_trigger=True,
|
|
235
414
|
)
|
|
236
415
|
|
|
416
|
+
# Extract schema metadata for schemas table entry
|
|
417
|
+
schema_metadata = extract_model_schema_metadata(
|
|
418
|
+
model=model,
|
|
419
|
+
table_name=table_name,
|
|
420
|
+
entity_key_field=entity_key_field,
|
|
421
|
+
)
|
|
422
|
+
schema["schema_metadata"] = schema_metadata
|
|
423
|
+
|
|
237
424
|
self.schemas[table_name] = schema
|
|
238
425
|
return schema
|
|
239
426
|
|
|
@@ -343,6 +530,7 @@ class SchemaGenerator:
|
|
|
343
530
|
"-- 2. Embeddings tables (embeddings_<table>)",
|
|
344
531
|
"-- 3. KV_STORE triggers for cache maintenance",
|
|
345
532
|
"-- 4. Indexes (foreground only, background indexes separate)",
|
|
533
|
+
"-- 5. Schema table entries (for agent-like table access)",
|
|
346
534
|
"",
|
|
347
535
|
"-- ============================================================================",
|
|
348
536
|
"-- PREREQUISITES CHECK",
|
|
@@ -388,6 +576,19 @@ class SchemaGenerator:
|
|
|
388
576
|
sql_parts.append(schema["sql"]["kv_trigger"])
|
|
389
577
|
sql_parts.append("")
|
|
390
578
|
|
|
579
|
+
# Add schema table entries (every entity table is also an "agent")
|
|
580
|
+
sql_parts.append("-- ============================================================================")
|
|
581
|
+
sql_parts.append("-- SCHEMA TABLE ENTRIES")
|
|
582
|
+
sql_parts.append("-- Every entity table gets a schemas entry for agent-like access")
|
|
583
|
+
sql_parts.append("-- ============================================================================")
|
|
584
|
+
sql_parts.append("")
|
|
585
|
+
|
|
586
|
+
for table_name, schema in self.schemas.items():
|
|
587
|
+
if "schema_metadata" in schema:
|
|
588
|
+
schema_upsert = generate_schema_upsert_sql(schema["schema_metadata"])
|
|
589
|
+
sql_parts.append(schema_upsert)
|
|
590
|
+
sql_parts.append("")
|
|
591
|
+
|
|
391
592
|
# Add migration record
|
|
392
593
|
sql_parts.append("-- ============================================================================")
|
|
393
594
|
sql_parts.append("-- RECORD MIGRATION")
|
rem/services/postgres/service.py
CHANGED
|
@@ -190,19 +190,19 @@ class PostgresService:
|
|
|
190
190
|
|
|
191
191
|
async def connect(self) -> None:
|
|
192
192
|
"""Establish database connection pool."""
|
|
193
|
-
logger.
|
|
193
|
+
logger.debug(f"Connecting to PostgreSQL with pool size {self.pool_size}")
|
|
194
194
|
self.pool = await asyncpg.create_pool(
|
|
195
195
|
self.connection_string,
|
|
196
196
|
min_size=1,
|
|
197
197
|
max_size=self.pool_size,
|
|
198
198
|
init=self._init_connection, # Configure JSONB codec on each connection
|
|
199
199
|
)
|
|
200
|
-
logger.
|
|
200
|
+
logger.debug("PostgreSQL connection pool established")
|
|
201
201
|
|
|
202
202
|
# Start embedding worker if available
|
|
203
203
|
if self.embedding_worker and hasattr(self.embedding_worker, "start"):
|
|
204
204
|
await self.embedding_worker.start()
|
|
205
|
-
logger.
|
|
205
|
+
logger.debug("Embedding worker started")
|
|
206
206
|
|
|
207
207
|
async def disconnect(self) -> None:
|
|
208
208
|
"""Close database connection pool."""
|
|
@@ -211,10 +211,10 @@ class PostgresService:
|
|
|
211
211
|
# The worker will be stopped explicitly when the application shuts down
|
|
212
212
|
|
|
213
213
|
if self.pool:
|
|
214
|
-
logger.
|
|
214
|
+
logger.debug("Closing PostgreSQL connection pool")
|
|
215
215
|
await self.pool.close()
|
|
216
216
|
self.pool = None
|
|
217
|
-
logger.
|
|
217
|
+
logger.debug("PostgreSQL connection pool closed")
|
|
218
218
|
|
|
219
219
|
async def execute(
|
|
220
220
|
self,
|
|
@@ -631,7 +631,7 @@ class PostgresService:
|
|
|
631
631
|
table_name: str,
|
|
632
632
|
embedding: list[float],
|
|
633
633
|
limit: int = 10,
|
|
634
|
-
min_similarity: float = 0.
|
|
634
|
+
min_similarity: float = 0.3,
|
|
635
635
|
tenant_id: Optional[str] = None,
|
|
636
636
|
) -> list[dict[str, Any]]:
|
|
637
637
|
"""
|
rem/services/rem/parser.py
CHANGED
|
@@ -50,9 +50,36 @@ class RemQueryParser:
|
|
|
50
50
|
params: Dict[str, Any] = {}
|
|
51
51
|
positional_args: List[str] = []
|
|
52
52
|
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
|
|
53
|
+
# For SQL queries, preserve the raw query (keywords like LIMIT are SQL keywords)
|
|
54
|
+
if query_type == QueryType.SQL:
|
|
55
|
+
# Everything after "SQL" is the raw SQL query
|
|
56
|
+
raw_sql = query_string[3:].strip() # Skip "SQL" prefix
|
|
57
|
+
params["raw_query"] = raw_sql
|
|
58
|
+
return query_type, params
|
|
59
|
+
|
|
60
|
+
# Process remaining tokens, handling REM keywords
|
|
61
|
+
i = 1
|
|
62
|
+
while i < len(tokens):
|
|
63
|
+
token = tokens[i]
|
|
64
|
+
token_upper = token.upper()
|
|
65
|
+
|
|
66
|
+
# Handle REM keywords that take a value
|
|
67
|
+
if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
|
|
68
|
+
if i + 1 < len(tokens):
|
|
69
|
+
keyword_map = {
|
|
70
|
+
"LIMIT": "limit",
|
|
71
|
+
"DEPTH": "max_depth",
|
|
72
|
+
"THRESHOLD": "threshold",
|
|
73
|
+
"TYPE": "edge_types",
|
|
74
|
+
"FROM": "initial_query",
|
|
75
|
+
"WITH": "initial_query",
|
|
76
|
+
}
|
|
77
|
+
key = keyword_map[token_upper]
|
|
78
|
+
value = tokens[i + 1]
|
|
79
|
+
params[key] = self._convert_value(key, value)
|
|
80
|
+
i += 2
|
|
81
|
+
continue
|
|
82
|
+
elif "=" in token:
|
|
56
83
|
# It's a keyword argument
|
|
57
84
|
key, value = token.split("=", 1)
|
|
58
85
|
# Handle parameter aliases
|
|
@@ -61,6 +88,7 @@ class RemQueryParser:
|
|
|
61
88
|
else:
|
|
62
89
|
# It's a positional argument part
|
|
63
90
|
positional_args.append(token)
|
|
91
|
+
i += 1
|
|
64
92
|
|
|
65
93
|
# Map positional arguments to specific fields based on QueryType
|
|
66
94
|
self._map_positional_args(query_type, positional_args, params)
|
|
@@ -133,13 +161,20 @@ class RemQueryParser:
|
|
|
133
161
|
params["query_text"] = combined_value
|
|
134
162
|
|
|
135
163
|
elif query_type == QueryType.SEARCH:
|
|
136
|
-
|
|
164
|
+
# SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
|
|
165
|
+
# First positional arg is table name, rest is query text
|
|
166
|
+
if len(positional_args) >= 2:
|
|
167
|
+
params["table_name"] = positional_args[0]
|
|
168
|
+
params["query_text"] = " ".join(positional_args[1:])
|
|
169
|
+
elif len(positional_args) == 1:
|
|
170
|
+
# Could be table name or query text - assume query text if no table
|
|
171
|
+
params["query_text"] = positional_args[0]
|
|
172
|
+
# If no positional args, params stays empty
|
|
137
173
|
|
|
138
174
|
elif query_type == QueryType.TRAVERSE:
|
|
139
175
|
params["initial_query"] = combined_value
|
|
140
176
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
# but current service doesn't use them.
|
|
177
|
+
elif query_type == QueryType.SQL:
|
|
178
|
+
# SQL with positional args means "SQL SELECT * FROM ..." form
|
|
179
|
+
# Treat the combined positional args as the raw SQL query
|
|
180
|
+
params["raw_query"] = combined_value
|
rem/services/rem/service.py
CHANGED
|
@@ -13,6 +13,31 @@ Design:
|
|
|
13
13
|
- All queries pushed down to Postgres for performance
|
|
14
14
|
- Model schema inspection for validation only
|
|
15
15
|
- Exceptions for missing fields/embeddings
|
|
16
|
+
|
|
17
|
+
TODO: Staged Plan Execution
|
|
18
|
+
- Implement execute_staged_plan() method for multi-stage query execution
|
|
19
|
+
- Each stage can be:
|
|
20
|
+
1. Static query (query field): Execute REM dialect directly
|
|
21
|
+
2. Dynamic query (intent field): LLM interprets intent + previous results to build query
|
|
22
|
+
- Flow for dynamic stages:
|
|
23
|
+
1. Gather results from depends_on stages (from previous_results or current execution)
|
|
24
|
+
2. Pass intent + previous results to LLM (like ask_rem but with context)
|
|
25
|
+
3. LLM generates REM query based on what it learned from previous stages
|
|
26
|
+
4. Execute generated query
|
|
27
|
+
5. Store results in stage_results for client to use in continuation
|
|
28
|
+
- Multi-turn continuation:
|
|
29
|
+
- Client passes previous_results back from response's stage_results
|
|
30
|
+
- Client sets resume_from_stage to skip already-executed stages
|
|
31
|
+
- Server uses previous_results as context for depends_on lookups
|
|
32
|
+
- Use cases:
|
|
33
|
+
- LOOKUP "Sarah" → intent: "find her team members" (LLM sees Sarah's graph_edges, builds TRAVERSE)
|
|
34
|
+
- SEARCH "API docs" → intent: "get authors" (LLM extracts author refs, builds LOOKUP)
|
|
35
|
+
- Complex graph exploration with LLM-driven navigation
|
|
36
|
+
- API: POST /api/v1/query with:
|
|
37
|
+
- mode="staged-plan"
|
|
38
|
+
- plan=[{stage, query|intent, name, depends_on}]
|
|
39
|
+
- previous_results=[{stage, name, query_executed, results, count}] (for continuation)
|
|
40
|
+
- resume_from_stage=N (to skip completed stages)
|
|
16
41
|
"""
|
|
17
42
|
|
|
18
43
|
from typing import Any
|
|
@@ -309,17 +334,26 @@ class RemService:
|
|
|
309
334
|
)
|
|
310
335
|
|
|
311
336
|
# Execute vector search via rem_search() PostgreSQL function
|
|
337
|
+
min_sim = params.min_similarity if params.min_similarity is not None else 0.3
|
|
338
|
+
limit = params.limit or 10
|
|
312
339
|
query_params = get_search_params(
|
|
313
340
|
query_embedding,
|
|
314
341
|
table_name,
|
|
315
342
|
field_name,
|
|
316
343
|
tenant_id,
|
|
317
344
|
provider,
|
|
318
|
-
|
|
319
|
-
|
|
345
|
+
min_sim,
|
|
346
|
+
limit,
|
|
320
347
|
tenant_id, # Use tenant_id (query.user_id) as user_id
|
|
321
348
|
)
|
|
349
|
+
logger.debug(
|
|
350
|
+
f"SEARCH params: table={table_name}, field={field_name}, "
|
|
351
|
+
f"tenant_id={tenant_id}, provider={provider}, "
|
|
352
|
+
f"min_similarity={min_sim}, limit={limit}, "
|
|
353
|
+
f"embedding_dims={len(query_embedding)}"
|
|
354
|
+
)
|
|
322
355
|
results = await self.db.execute(SEARCH_QUERY, query_params)
|
|
356
|
+
logger.debug(f"SEARCH results: {len(results)} rows")
|
|
323
357
|
|
|
324
358
|
return {
|
|
325
359
|
"query_type": "SEARCH",
|
rem/services/session/reload.py
CHANGED
rem/settings.py
CHANGED
|
@@ -58,7 +58,7 @@ Example .env file:
|
|
|
58
58
|
|
|
59
59
|
import os
|
|
60
60
|
import hashlib
|
|
61
|
-
from pydantic import Field, field_validator,
|
|
61
|
+
from pydantic import Field, field_validator, ValidationInfo
|
|
62
62
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
63
63
|
from loguru import logger
|
|
64
64
|
|
|
@@ -414,7 +414,7 @@ class AuthSettings(BaseSettings):
|
|
|
414
414
|
|
|
415
415
|
@field_validator("session_secret", mode="before")
|
|
416
416
|
@classmethod
|
|
417
|
-
def generate_dev_secret(cls, v: str | None, info:
|
|
417
|
+
def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:
|
|
418
418
|
# Only generate if not already set and not in production
|
|
419
419
|
if not v and info.data.get("environment") != "production":
|
|
420
420
|
# Deterministic secret for development
|
|
@@ -1004,6 +1004,59 @@ class APISettings(BaseSettings):
|
|
|
1004
1004
|
)
|
|
1005
1005
|
|
|
1006
1006
|
|
|
1007
|
+
class ModelsSettings(BaseSettings):
|
|
1008
|
+
"""
|
|
1009
|
+
Custom model registration settings for downstream applications.
|
|
1010
|
+
|
|
1011
|
+
Allows downstream apps to specify Python modules containing custom models
|
|
1012
|
+
that should be imported (and thus registered) before schema generation.
|
|
1013
|
+
|
|
1014
|
+
This enables `rem db schema generate` to discover models registered with
|
|
1015
|
+
`@rem.register_model` in downstream applications.
|
|
1016
|
+
|
|
1017
|
+
Environment variables:
|
|
1018
|
+
MODELS__IMPORT_MODULES - Semicolon-separated list of Python modules to import
|
|
1019
|
+
Example: "models;myapp.entities;myapp.custom_models"
|
|
1020
|
+
|
|
1021
|
+
Example:
|
|
1022
|
+
# In downstream app's .env
|
|
1023
|
+
MODELS__IMPORT_MODULES=models
|
|
1024
|
+
|
|
1025
|
+
# In downstream app's models/__init__.py
|
|
1026
|
+
import rem
|
|
1027
|
+
from rem.models.core import CoreModel
|
|
1028
|
+
|
|
1029
|
+
@rem.register_model
|
|
1030
|
+
class MyCustomEntity(CoreModel):
|
|
1031
|
+
name: str
|
|
1032
|
+
|
|
1033
|
+
# Then run schema generation
|
|
1034
|
+
rem db schema generate # Includes MyCustomEntity
|
|
1035
|
+
"""
|
|
1036
|
+
|
|
1037
|
+
model_config = SettingsConfigDict(
|
|
1038
|
+
env_prefix="MODELS__",
|
|
1039
|
+
extra="ignore",
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
import_modules: str = Field(
|
|
1043
|
+
default="",
|
|
1044
|
+
description=(
|
|
1045
|
+
"Semicolon-separated list of Python modules to import for model registration. "
|
|
1046
|
+
"These modules are imported before schema generation to ensure custom models "
|
|
1047
|
+
"decorated with @rem.register_model are discovered. "
|
|
1048
|
+
"Example: 'models;myapp.entities'"
|
|
1049
|
+
),
|
|
1050
|
+
)
|
|
1051
|
+
|
|
1052
|
+
@property
|
|
1053
|
+
def module_list(self) -> list[str]:
|
|
1054
|
+
"""Get modules as a list, filtering empty strings."""
|
|
1055
|
+
if not self.import_modules:
|
|
1056
|
+
return []
|
|
1057
|
+
return [m.strip() for m in self.import_modules.split(";") if m.strip()]
|
|
1058
|
+
|
|
1059
|
+
|
|
1007
1060
|
class SchemaSettings(BaseSettings):
|
|
1008
1061
|
"""
|
|
1009
1062
|
Schema search path settings for agent and evaluator schemas.
|
|
@@ -1281,16 +1334,12 @@ class Settings(BaseSettings):
|
|
|
1281
1334
|
description="Root path for reverse proxy (e.g., /rem for ALB routing)",
|
|
1282
1335
|
)
|
|
1283
1336
|
|
|
1284
|
-
sql_dir: str = Field(
|
|
1285
|
-
default="src/rem/sql",
|
|
1286
|
-
description="Directory for SQL files and migrations",
|
|
1287
|
-
)
|
|
1288
|
-
|
|
1289
1337
|
# Nested settings groups
|
|
1290
1338
|
api: APISettings = Field(default_factory=APISettings)
|
|
1291
1339
|
chat: ChatSettings = Field(default_factory=ChatSettings)
|
|
1292
1340
|
llm: LLMSettings = Field(default_factory=LLMSettings)
|
|
1293
1341
|
mcp: MCPSettings = Field(default_factory=MCPSettings)
|
|
1342
|
+
models: ModelsSettings = Field(default_factory=ModelsSettings)
|
|
1294
1343
|
otel: OTELSettings = Field(default_factory=OTELSettings)
|
|
1295
1344
|
phoenix: PhoenixSettings = Field(default_factory=PhoenixSettings)
|
|
1296
1345
|
auth: AuthSettings = Field(default_factory=AuthSettings)
|
rem/sql/background_indexes.sql
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
-- Background index creation
|
|
2
2
|
-- Run AFTER initial data load to avoid blocking writes
|
|
3
3
|
|
|
4
|
-
-- HNSW vector index for
|
|
5
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS
|
|
6
|
-
ON
|
|
4
|
+
-- HNSW vector index for embeddings_files
|
|
5
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_files_vector_hnsw
|
|
6
|
+
ON embeddings_files
|
|
7
7
|
USING hnsw (embedding vector_cosine_ops);
|
|
8
8
|
|
|
9
9
|
-- HNSW vector index for embeddings_image_resources
|
|
@@ -11,29 +11,14 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_image_resources_vector_hn
|
|
|
11
11
|
ON embeddings_image_resources
|
|
12
12
|
USING hnsw (embedding vector_cosine_ops);
|
|
13
13
|
|
|
14
|
-
-- HNSW vector index for embeddings_moments
|
|
15
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
|
|
16
|
-
ON embeddings_moments
|
|
17
|
-
USING hnsw (embedding vector_cosine_ops);
|
|
18
|
-
|
|
19
|
-
-- HNSW vector index for embeddings_sessions
|
|
20
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
|
|
21
|
-
ON embeddings_sessions
|
|
22
|
-
USING hnsw (embedding vector_cosine_ops);
|
|
23
|
-
|
|
24
|
-
-- HNSW vector index for embeddings_resources
|
|
25
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
|
|
26
|
-
ON embeddings_resources
|
|
27
|
-
USING hnsw (embedding vector_cosine_ops);
|
|
28
|
-
|
|
29
14
|
-- HNSW vector index for embeddings_messages
|
|
30
15
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_messages_vector_hnsw
|
|
31
16
|
ON embeddings_messages
|
|
32
17
|
USING hnsw (embedding vector_cosine_ops);
|
|
33
18
|
|
|
34
|
-
-- HNSW vector index for
|
|
35
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS
|
|
36
|
-
ON
|
|
19
|
+
-- HNSW vector index for embeddings_moments
|
|
20
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
|
|
21
|
+
ON embeddings_moments
|
|
37
22
|
USING hnsw (embedding vector_cosine_ops);
|
|
38
23
|
|
|
39
24
|
-- HNSW vector index for embeddings_ontology_configs
|
|
@@ -41,12 +26,22 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_ontology_configs_vector_h
|
|
|
41
26
|
ON embeddings_ontology_configs
|
|
42
27
|
USING hnsw (embedding vector_cosine_ops);
|
|
43
28
|
|
|
44
|
-
-- HNSW vector index for
|
|
45
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS
|
|
46
|
-
ON
|
|
29
|
+
-- HNSW vector index for embeddings_resources
|
|
30
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
|
|
31
|
+
ON embeddings_resources
|
|
47
32
|
USING hnsw (embedding vector_cosine_ops);
|
|
48
33
|
|
|
49
34
|
-- HNSW vector index for embeddings_schemas
|
|
50
35
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_schemas_vector_hnsw
|
|
51
36
|
ON embeddings_schemas
|
|
52
37
|
USING hnsw (embedding vector_cosine_ops);
|
|
38
|
+
|
|
39
|
+
-- HNSW vector index for embeddings_sessions
|
|
40
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
|
|
41
|
+
ON embeddings_sessions
|
|
42
|
+
USING hnsw (embedding vector_cosine_ops);
|
|
43
|
+
|
|
44
|
+
-- HNSW vector index for embeddings_users
|
|
45
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_users_vector_hnsw
|
|
46
|
+
ON embeddings_users
|
|
47
|
+
USING hnsw (embedding vector_cosine_ops);
|