remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +36 -2
- rem/agentic/__init__.py +10 -1
- rem/agentic/context.py +185 -1
- rem/agentic/context_builder.py +56 -35
- rem/agentic/mcp/tool_wrapper.py +2 -2
- rem/agentic/providers/pydantic_ai.py +303 -111
- rem/agentic/schema.py +2 -2
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/server.py +4 -0
- rem/api/mcp_router/tools.py +608 -166
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +219 -20
- rem/api/routers/chat/child_streaming.py +393 -0
- rem/api/routers/chat/completions.py +77 -40
- rem/api/routers/chat/sse_events.py +7 -3
- rem/api/routers/chat/streaming.py +381 -291
- rem/api/routers/chat/streaming_utils.py +325 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +11 -3
- rem/api/routers/messages.py +176 -38
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +17 -15
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/auth/middleware.py +42 -28
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +205 -114
- rem/cli/commands/db.py +55 -31
- rem/cli/commands/experiments.py +1 -1
- rem/cli/commands/process.py +179 -43
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +1 -1
- rem/models/entities/ontology.py +18 -20
- rem/models/entities/session.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +1 -1
- rem/schemas/agents/rem.yaml +1 -1
- rem/schemas/agents/test_orchestrator.yaml +42 -0
- rem/schemas/agents/test_structured_output.yaml +52 -0
- rem/services/content/providers.py +151 -49
- rem/services/content/service.py +18 -5
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +11 -10
- rem/services/postgres/repository.py +39 -28
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/rem/README.md +4 -3
- rem/services/rem/parser.py +7 -10
- rem/services/rem/service.py +47 -0
- rem/services/session/__init__.py +8 -1
- rem/services/session/compression.py +47 -5
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +2 -1
- rem/settings.py +92 -7
- rem/sql/migrations/001_install.sql +125 -7
- rem/sql/migrations/002_install_models.sql +159 -149
- rem/sql/migrations/004_cache_system.sql +10 -276
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/schema_loader.py +180 -120
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
- {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
|
@@ -94,14 +94,14 @@ def generate_table_schema(
|
|
|
94
94
|
# Always add id as primary key
|
|
95
95
|
columns.append("id UUID PRIMARY KEY DEFAULT uuid_generate_v4()")
|
|
96
96
|
|
|
97
|
-
# Add tenant_id if tenant scoped
|
|
97
|
+
# Add tenant_id if tenant scoped (nullable - NULL means public/shared)
|
|
98
98
|
if tenant_scoped:
|
|
99
|
-
columns.append("tenant_id VARCHAR(100)
|
|
100
|
-
indexes.append(f"CREATE INDEX idx_{table_name}_tenant ON {table_name} (tenant_id);")
|
|
99
|
+
columns.append("tenant_id VARCHAR(100)")
|
|
100
|
+
indexes.append(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_tenant ON {table_name} (tenant_id);")
|
|
101
101
|
|
|
102
102
|
# Add user_id (owner field)
|
|
103
103
|
columns.append("user_id VARCHAR(256)")
|
|
104
|
-
indexes.append(f"CREATE INDEX idx_{table_name}_user ON {table_name} (user_id);")
|
|
104
|
+
indexes.append(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_user ON {table_name} (user_id);")
|
|
105
105
|
|
|
106
106
|
# Process Pydantic fields (skip system fields)
|
|
107
107
|
for field_name, field_info in model.model_fields.items():
|
|
@@ -125,19 +125,19 @@ def generate_table_schema(
|
|
|
125
125
|
# Add graph_edges JSONB field
|
|
126
126
|
columns.append("graph_edges JSONB DEFAULT '[]'::jsonb")
|
|
127
127
|
indexes.append(
|
|
128
|
-
f"CREATE INDEX idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
|
|
128
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
|
|
129
129
|
)
|
|
130
130
|
|
|
131
131
|
# Add metadata JSONB field
|
|
132
132
|
columns.append("metadata JSONB DEFAULT '{}'::jsonb")
|
|
133
133
|
indexes.append(
|
|
134
|
-
f"CREATE INDEX idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
|
|
134
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
|
|
135
135
|
)
|
|
136
136
|
|
|
137
137
|
# Add tags field (TEXT[] for list[str])
|
|
138
138
|
columns.append("tags TEXT[] DEFAULT ARRAY[]::TEXT[]")
|
|
139
139
|
indexes.append(
|
|
140
|
-
f"CREATE INDEX idx_{table_name}_tags ON {table_name} USING GIN (tags);"
|
|
140
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_tags ON {table_name} USING GIN (tags);"
|
|
141
141
|
)
|
|
142
142
|
|
|
143
143
|
# Generate CREATE TABLE statement
|
|
@@ -202,10 +202,10 @@ CREATE TABLE IF NOT EXISTS {embeddings_table} (
|
|
|
202
202
|
);
|
|
203
203
|
|
|
204
204
|
-- Index for entity lookup (get all embeddings for entity)
|
|
205
|
-
CREATE INDEX idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
|
|
205
|
+
CREATE INDEX IF NOT EXISTS idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
|
|
206
206
|
|
|
207
207
|
-- Index for field + provider lookup
|
|
208
|
-
CREATE INDEX idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
|
|
208
|
+
CREATE INDEX IF NOT EXISTS idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
|
|
209
209
|
|
|
210
210
|
-- HNSW index for vector similarity search (created in background)
|
|
211
211
|
-- Note: This will be created by background thread after data load
|
|
@@ -258,6 +258,7 @@ BEGIN
|
|
|
258
258
|
RETURN OLD;
|
|
259
259
|
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
260
260
|
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
261
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
261
262
|
INSERT INTO kv_store (
|
|
262
263
|
entity_key,
|
|
263
264
|
entity_type,
|
|
@@ -277,7 +278,7 @@ BEGIN
|
|
|
277
278
|
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
278
279
|
CURRENT_TIMESTAMP
|
|
279
280
|
)
|
|
280
|
-
ON CONFLICT (tenant_id, entity_key)
|
|
281
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
281
282
|
DO UPDATE SET
|
|
282
283
|
entity_id = EXCLUDED.entity_id,
|
|
283
284
|
user_id = EXCLUDED.user_id,
|
|
@@ -33,15 +33,15 @@ if TYPE_CHECKING:
|
|
|
33
33
|
|
|
34
34
|
def get_postgres_service() -> "PostgresService | None":
|
|
35
35
|
"""
|
|
36
|
-
Get PostgresService
|
|
36
|
+
Get PostgresService singleton from parent module.
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
Uses late import to avoid circular import issues.
|
|
39
|
+
Previously had a separate _postgres_instance here which caused
|
|
40
|
+
"pool not connected" errors due to duplicate connection pools.
|
|
39
41
|
"""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
from .service import PostgresService
|
|
44
|
-
return PostgresService()
|
|
42
|
+
# Late import to avoid circular import (repository.py imported by __init__.py)
|
|
43
|
+
from rem.services.postgres import get_postgres_service as _get_singleton
|
|
44
|
+
return _get_singleton()
|
|
45
45
|
|
|
46
46
|
T = TypeVar("T", bound=BaseModel)
|
|
47
47
|
|
|
@@ -74,7 +74,7 @@ class Repository(Generic[T]):
|
|
|
74
74
|
self,
|
|
75
75
|
records: T | list[T],
|
|
76
76
|
embeddable_fields: list[str] | None = None,
|
|
77
|
-
generate_embeddings: bool =
|
|
77
|
+
generate_embeddings: bool = True,
|
|
78
78
|
) -> T | list[T]:
|
|
79
79
|
"""
|
|
80
80
|
Upsert single record or list of records (create or update on ID conflict).
|
|
@@ -84,8 +84,9 @@ class Repository(Generic[T]):
|
|
|
84
84
|
|
|
85
85
|
Args:
|
|
86
86
|
records: Single model instance or list of model instances
|
|
87
|
-
embeddable_fields: Optional list of fields to generate embeddings for
|
|
88
|
-
|
|
87
|
+
embeddable_fields: Optional list of fields to generate embeddings for.
|
|
88
|
+
If None, auto-detects 'content' field if present.
|
|
89
|
+
generate_embeddings: Whether to queue embedding generation tasks (default: True)
|
|
89
90
|
|
|
90
91
|
Returns:
|
|
91
92
|
Single record or list of records with generated IDs (matches input type)
|
|
@@ -118,25 +119,35 @@ class Repository(Generic[T]):
|
|
|
118
119
|
record.id = row["id"] # type: ignore[attr-defined]
|
|
119
120
|
|
|
120
121
|
# Queue embedding generation if requested and worker is available
|
|
121
|
-
if generate_embeddings and
|
|
122
|
+
if generate_embeddings and self.db.embedding_worker:
|
|
122
123
|
from rem.services.embeddings import EmbeddingTask
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
124
|
+
from .register_type import should_embed_field
|
|
125
|
+
|
|
126
|
+
# Auto-detect embeddable fields if not specified
|
|
127
|
+
if embeddable_fields is None:
|
|
128
|
+
embeddable_fields = [
|
|
129
|
+
field_name
|
|
130
|
+
for field_name, field_info in self.model_class.model_fields.items()
|
|
131
|
+
if should_embed_field(field_name, field_info)
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
if embeddable_fields:
|
|
135
|
+
for record in records_list:
|
|
136
|
+
for field_name in embeddable_fields:
|
|
137
|
+
content = getattr(record, field_name, None)
|
|
138
|
+
if content and isinstance(content, str):
|
|
139
|
+
task = EmbeddingTask(
|
|
140
|
+
task_id=f"{record.id}-{field_name}", # type: ignore[attr-defined]
|
|
141
|
+
entity_id=str(record.id), # type: ignore[attr-defined]
|
|
142
|
+
table_name=self.table_name,
|
|
143
|
+
field_name=field_name,
|
|
144
|
+
content=content,
|
|
145
|
+
provider="openai", # Default provider
|
|
146
|
+
model="text-embedding-3-small", # Default model
|
|
147
|
+
)
|
|
148
|
+
await self.db.embedding_worker.queue_task(task)
|
|
149
|
+
|
|
150
|
+
logger.debug(f"Queued {len(records_list) * len(embeddable_fields)} embedding tasks")
|
|
140
151
|
|
|
141
152
|
# Return single item or list to match input type
|
|
142
153
|
return records_list[0] if is_single else records_list
|
|
@@ -351,10 +351,10 @@ class SchemaGenerator:
|
|
|
351
351
|
|
|
352
352
|
Priority:
|
|
353
353
|
1. Field with json_schema_extra={\"entity_key\": True}
|
|
354
|
-
2. Field named \"name\"
|
|
354
|
+
2. Field named \"name\" (human-readable identifier)
|
|
355
355
|
3. Field named \"key\"
|
|
356
|
-
4. Field named \"
|
|
357
|
-
5.
|
|
356
|
+
4. Field named \"uri\"
|
|
357
|
+
5. Field named \"id\" (fallback)
|
|
358
358
|
|
|
359
359
|
Args:
|
|
360
360
|
model: Pydantic model class
|
|
@@ -369,9 +369,9 @@ class SchemaGenerator:
|
|
|
369
369
|
if json_extra.get("entity_key"):
|
|
370
370
|
return field_name
|
|
371
371
|
|
|
372
|
-
# Check for key fields in priority order:
|
|
372
|
+
# Check for key fields in priority order: name -> key -> uri -> id
|
|
373
373
|
# (matching sql_builder.get_entity_key convention)
|
|
374
|
-
for candidate in ["
|
|
374
|
+
for candidate in ["name", "key", "uri", "id"]:
|
|
375
375
|
if candidate in model.model_fields:
|
|
376
376
|
return candidate
|
|
377
377
|
|
|
@@ -35,10 +35,11 @@ def get_natural_key(model: BaseModel) -> str | None:
|
|
|
35
35
|
|
|
36
36
|
def get_entity_key(model: BaseModel) -> str:
|
|
37
37
|
"""
|
|
38
|
-
Get entity key for KV store following precedence:
|
|
38
|
+
Get entity key for KV store following precedence: name -> key -> uri -> id.
|
|
39
39
|
|
|
40
|
-
For KV store lookups, we prefer
|
|
41
|
-
then
|
|
40
|
+
For KV store lookups, we prefer human-readable identifiers first (name/key),
|
|
41
|
+
then URIs, with id as the fallback. This allows users to lookup entities
|
|
42
|
+
by their natural names like "panic-disorder" instead of UUIDs.
|
|
42
43
|
|
|
43
44
|
Args:
|
|
44
45
|
model: Pydantic model instance
|
|
@@ -46,13 +47,13 @@ def get_entity_key(model: BaseModel) -> str:
|
|
|
46
47
|
Returns:
|
|
47
48
|
Entity key string (guaranteed to exist)
|
|
48
49
|
"""
|
|
49
|
-
for field in ["
|
|
50
|
+
for field in ["name", "key", "uri", "id"]:
|
|
50
51
|
if hasattr(model, field):
|
|
51
52
|
value = getattr(model, field)
|
|
52
53
|
if value:
|
|
53
54
|
return str(value)
|
|
54
55
|
# Should never reach here since id always exists in CoreModel
|
|
55
|
-
raise ValueError(f"Model {type(model)} has no
|
|
56
|
+
raise ValueError(f"Model {type(model)} has no name, key, uri, or id field")
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
def generate_deterministic_id(user_id: str | None, entity_key: str) -> uuid.UUID:
|
rem/services/rem/README.md
CHANGED
|
@@ -40,15 +40,16 @@ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
|
|
|
40
40
|
available : Stage 1+
|
|
41
41
|
example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
|
|
42
42
|
|
|
43
|
-
SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
|
|
43
|
+
SearchQuery ::= SEARCH <text:string> [IN|TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
|
|
44
44
|
text : Semantic query text
|
|
45
|
-
table : Target table (default: "resources")
|
|
45
|
+
table : Target table (default: "resources"). Use IN or TABLE keyword.
|
|
46
46
|
clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
|
|
47
47
|
limit : Max results (default: 10)
|
|
48
48
|
performance : Indexed (pgvector)
|
|
49
49
|
available : Stage 3+
|
|
50
50
|
examples :
|
|
51
|
-
- SEARCH "database migration"
|
|
51
|
+
- SEARCH "database migration" IN resources LIMIT 10
|
|
52
|
+
- SEARCH "parcel delivery" IN ontologies
|
|
52
53
|
- SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
|
|
53
54
|
- SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
|
|
54
55
|
- SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
|
rem/services/rem/parser.py
CHANGED
|
@@ -64,7 +64,7 @@ class RemQueryParser:
|
|
|
64
64
|
token_upper = token.upper()
|
|
65
65
|
|
|
66
66
|
# Handle REM keywords that take a value
|
|
67
|
-
if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
|
|
67
|
+
if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH", "TABLE", "IN", "WHERE"):
|
|
68
68
|
if i + 1 < len(tokens):
|
|
69
69
|
keyword_map = {
|
|
70
70
|
"LIMIT": "limit",
|
|
@@ -73,6 +73,9 @@ class RemQueryParser:
|
|
|
73
73
|
"TYPE": "edge_types",
|
|
74
74
|
"FROM": "initial_query",
|
|
75
75
|
"WITH": "initial_query",
|
|
76
|
+
"TABLE": "table_name",
|
|
77
|
+
"IN": "table_name", # IN is alias for TABLE
|
|
78
|
+
"WHERE": "where_clause",
|
|
76
79
|
}
|
|
77
80
|
key = keyword_map[token_upper]
|
|
78
81
|
value = tokens[i + 1]
|
|
@@ -161,15 +164,9 @@ class RemQueryParser:
|
|
|
161
164
|
params["query_text"] = combined_value
|
|
162
165
|
|
|
163
166
|
elif query_type == QueryType.SEARCH:
|
|
164
|
-
# SEARCH expects: SEARCH <table> <
|
|
165
|
-
#
|
|
166
|
-
|
|
167
|
-
params["table_name"] = positional_args[0]
|
|
168
|
-
params["query_text"] = " ".join(positional_args[1:])
|
|
169
|
-
elif len(positional_args) == 1:
|
|
170
|
-
# Could be table name or query text - assume query text if no table
|
|
171
|
-
params["query_text"] = positional_args[0]
|
|
172
|
-
# If no positional args, params stays empty
|
|
167
|
+
# SEARCH expects: SEARCH <text> [TABLE <table>] [WHERE <clause>] [LIMIT n]
|
|
168
|
+
# All positional args are query_text, TABLE/WHERE/LIMIT are handled as keywords
|
|
169
|
+
params["query_text"] = combined_value
|
|
173
170
|
|
|
174
171
|
elif query_type == QueryType.TRAVERSE:
|
|
175
172
|
params["initial_query"] = combined_value
|
rem/services/rem/service.py
CHANGED
|
@@ -478,6 +478,53 @@ class RemService:
|
|
|
478
478
|
parser = RemQueryParser()
|
|
479
479
|
return parser.parse(query_string)
|
|
480
480
|
|
|
481
|
+
async def execute_query_string(
|
|
482
|
+
self, query_string: str, user_id: str | None = None
|
|
483
|
+
) -> dict[str, Any]:
|
|
484
|
+
"""
|
|
485
|
+
Execute a REM dialect query string directly.
|
|
486
|
+
|
|
487
|
+
This is the unified entry point for executing REM queries from both
|
|
488
|
+
the CLI and API. It handles parsing the query string, creating the
|
|
489
|
+
RemQuery model, and executing it.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
query_string: REM dialect query (e.g., 'LOOKUP "Sarah Chen"',
|
|
493
|
+
'SEARCH resources "API design"', 'SELECT * FROM users')
|
|
494
|
+
user_id: Optional user ID for query isolation
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
Dict with query results and metadata:
|
|
498
|
+
- query_type: The type of query executed
|
|
499
|
+
- results: List of result rows
|
|
500
|
+
- count: Number of results
|
|
501
|
+
- Additional fields depending on query type
|
|
502
|
+
|
|
503
|
+
Raises:
|
|
504
|
+
ValueError: If the query string is invalid
|
|
505
|
+
QueryExecutionError: If query execution fails
|
|
506
|
+
|
|
507
|
+
Example:
|
|
508
|
+
>>> result = await rem_service.execute_query_string(
|
|
509
|
+
... 'LOOKUP "Sarah Chen"',
|
|
510
|
+
... user_id="user-123"
|
|
511
|
+
... )
|
|
512
|
+
>>> print(result["count"])
|
|
513
|
+
1
|
|
514
|
+
"""
|
|
515
|
+
# Parse the query string into type and parameters
|
|
516
|
+
query_type, parameters = self._parse_query_string(query_string)
|
|
517
|
+
|
|
518
|
+
# Create and validate the RemQuery model
|
|
519
|
+
rem_query = RemQuery.model_validate({
|
|
520
|
+
"query_type": query_type,
|
|
521
|
+
"parameters": parameters,
|
|
522
|
+
"user_id": user_id,
|
|
523
|
+
})
|
|
524
|
+
|
|
525
|
+
# Execute and return results
|
|
526
|
+
return await self.execute_query(rem_query)
|
|
527
|
+
|
|
481
528
|
async def ask_rem(
|
|
482
529
|
self, natural_query: str, tenant_id: str, llm_model: str | None = None, plan_mode: bool = False
|
|
483
530
|
) -> dict[str, Any]:
|
rem/services/session/__init__.py
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
"""Session management services for conversation persistence and compression."""
|
|
2
2
|
|
|
3
3
|
from .compression import MessageCompressor, SessionMessageStore
|
|
4
|
+
from .pydantic_messages import audit_session_history, session_to_pydantic_messages
|
|
4
5
|
from .reload import reload_session
|
|
5
6
|
|
|
6
|
-
__all__ = [
|
|
7
|
+
__all__ = [
|
|
8
|
+
"MessageCompressor",
|
|
9
|
+
"SessionMessageStore",
|
|
10
|
+
"audit_session_history",
|
|
11
|
+
"reload_session",
|
|
12
|
+
"session_to_pydantic_messages",
|
|
13
|
+
]
|
|
@@ -65,7 +65,7 @@ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
|
|
|
65
65
|
logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
|
|
66
66
|
return truncated
|
|
67
67
|
|
|
68
|
-
from rem.models.entities import Message
|
|
68
|
+
from rem.models.entities import Message, Session
|
|
69
69
|
from rem.services.postgres import PostgresService, Repository
|
|
70
70
|
from rem.settings import settings
|
|
71
71
|
|
|
@@ -96,7 +96,7 @@ class MessageCompressor:
|
|
|
96
96
|
Returns:
|
|
97
97
|
Compressed message dict
|
|
98
98
|
"""
|
|
99
|
-
content = message.get("content"
|
|
99
|
+
content = message.get("content") or ""
|
|
100
100
|
|
|
101
101
|
# Don't compress short messages or system messages
|
|
102
102
|
if (
|
|
@@ -177,6 +177,39 @@ class SessionMessageStore:
|
|
|
177
177
|
self.user_id = user_id
|
|
178
178
|
self.compressor = compressor or MessageCompressor()
|
|
179
179
|
self.repo = Repository(Message)
|
|
180
|
+
self._session_repo = Repository(Session, table_name="sessions")
|
|
181
|
+
|
|
182
|
+
async def _ensure_session_exists(
|
|
183
|
+
self,
|
|
184
|
+
session_id: str,
|
|
185
|
+
user_id: str | None = None,
|
|
186
|
+
) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Ensure session exists, creating it if necessary.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
session_id: Session UUID from X-Session-Id header
|
|
192
|
+
user_id: Optional user identifier
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
# Check if session already exists by UUID
|
|
196
|
+
existing = await self._session_repo.get_by_id(session_id)
|
|
197
|
+
if existing:
|
|
198
|
+
return # Session already exists
|
|
199
|
+
|
|
200
|
+
# Create new session with the provided UUID as id
|
|
201
|
+
session = Session(
|
|
202
|
+
id=session_id, # Use the provided UUID as session id
|
|
203
|
+
name=session_id, # Default name to UUID, can be updated later
|
|
204
|
+
user_id=user_id or self.user_id,
|
|
205
|
+
tenant_id=self.user_id, # tenant_id set to user_id for scoping
|
|
206
|
+
)
|
|
207
|
+
await self._session_repo.upsert(session)
|
|
208
|
+
logger.info(f"Created session {session_id} for user {user_id or self.user_id}")
|
|
209
|
+
|
|
210
|
+
except Exception as e:
|
|
211
|
+
# Log but don't fail - session creation is best-effort
|
|
212
|
+
logger.warning(f"Failed to ensure session exists: {e}")
|
|
180
213
|
|
|
181
214
|
async def store_message(
|
|
182
215
|
self,
|
|
@@ -209,7 +242,7 @@ class SessionMessageStore:
|
|
|
209
242
|
# Use pre-generated id from message dict if available (for frontend feedback)
|
|
210
243
|
msg = Message(
|
|
211
244
|
id=message.get("id"), # Use pre-generated ID if provided
|
|
212
|
-
content=message.get("content"
|
|
245
|
+
content=message.get("content") or "",
|
|
213
246
|
message_type=message.get("role", "assistant"),
|
|
214
247
|
session_id=session_id,
|
|
215
248
|
tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
|
|
@@ -283,8 +316,10 @@ class SessionMessageStore:
|
|
|
283
316
|
"""
|
|
284
317
|
Store all session messages and return compressed versions.
|
|
285
318
|
|
|
319
|
+
Ensures session exists before storing messages.
|
|
320
|
+
|
|
286
321
|
Args:
|
|
287
|
-
session_id: Session
|
|
322
|
+
session_id: Session UUID
|
|
288
323
|
messages: List of messages to store
|
|
289
324
|
user_id: Optional user identifier
|
|
290
325
|
compress: Whether to compress messages (default: True)
|
|
@@ -296,10 +331,13 @@ class SessionMessageStore:
|
|
|
296
331
|
logger.debug("Postgres disabled, returning messages uncompressed")
|
|
297
332
|
return messages
|
|
298
333
|
|
|
334
|
+
# Ensure session exists before storing messages
|
|
335
|
+
await self._ensure_session_exists(session_id, user_id)
|
|
336
|
+
|
|
299
337
|
compressed_messages = []
|
|
300
338
|
|
|
301
339
|
for idx, message in enumerate(messages):
|
|
302
|
-
content = message.get("content"
|
|
340
|
+
content = message.get("content") or ""
|
|
303
341
|
|
|
304
342
|
# Only store and compress long assistant responses
|
|
305
343
|
if (
|
|
@@ -330,6 +368,8 @@ class SessionMessageStore:
|
|
|
330
368
|
}
|
|
331
369
|
|
|
332
370
|
# For tool messages, include tool call details in metadata
|
|
371
|
+
# Note: tool_arguments is stored only when provided (parent tool calls)
|
|
372
|
+
# For child tool calls (e.g., register_metadata), args are in content as JSON
|
|
333
373
|
if message.get("role") == "tool":
|
|
334
374
|
if message.get("tool_call_id"):
|
|
335
375
|
msg_metadata["tool_call_id"] = message.get("tool_call_id")
|
|
@@ -398,6 +438,8 @@ class SessionMessageStore:
|
|
|
398
438
|
}
|
|
399
439
|
|
|
400
440
|
# For tool messages, reconstruct tool call metadata
|
|
441
|
+
# Note: tool_arguments may be in metadata (parent calls) or parsed from
|
|
442
|
+
# content (child calls like register_metadata) by pydantic_messages.py
|
|
401
443
|
if role == "tool" and msg.metadata:
|
|
402
444
|
if msg.metadata.get("tool_call_id"):
|
|
403
445
|
msg_dict["tool_call_id"] = msg.metadata["tool_call_id"]
|