remdb 0.3.180__py3-none-any.whl → 0.3.258__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. rem/agentic/README.md +36 -2
  2. rem/agentic/__init__.py +10 -1
  3. rem/agentic/context.py +185 -1
  4. rem/agentic/context_builder.py +56 -35
  5. rem/agentic/mcp/tool_wrapper.py +2 -2
  6. rem/agentic/providers/pydantic_ai.py +303 -111
  7. rem/agentic/schema.py +2 -2
  8. rem/api/main.py +1 -1
  9. rem/api/mcp_router/resources.py +223 -0
  10. rem/api/mcp_router/server.py +4 -0
  11. rem/api/mcp_router/tools.py +608 -166
  12. rem/api/routers/admin.py +30 -4
  13. rem/api/routers/auth.py +219 -20
  14. rem/api/routers/chat/child_streaming.py +393 -0
  15. rem/api/routers/chat/completions.py +77 -40
  16. rem/api/routers/chat/sse_events.py +7 -3
  17. rem/api/routers/chat/streaming.py +381 -291
  18. rem/api/routers/chat/streaming_utils.py +325 -0
  19. rem/api/routers/common.py +18 -0
  20. rem/api/routers/dev.py +7 -1
  21. rem/api/routers/feedback.py +11 -3
  22. rem/api/routers/messages.py +176 -38
  23. rem/api/routers/models.py +9 -1
  24. rem/api/routers/query.py +17 -15
  25. rem/api/routers/shared_sessions.py +16 -0
  26. rem/auth/jwt.py +19 -4
  27. rem/auth/middleware.py +42 -28
  28. rem/cli/README.md +62 -0
  29. rem/cli/commands/ask.py +205 -114
  30. rem/cli/commands/db.py +55 -31
  31. rem/cli/commands/experiments.py +1 -1
  32. rem/cli/commands/process.py +179 -43
  33. rem/cli/commands/query.py +109 -0
  34. rem/cli/commands/session.py +117 -0
  35. rem/cli/main.py +2 -0
  36. rem/models/core/experiment.py +1 -1
  37. rem/models/entities/ontology.py +18 -20
  38. rem/models/entities/session.py +1 -0
  39. rem/schemas/agents/core/agent-builder.yaml +1 -1
  40. rem/schemas/agents/rem.yaml +1 -1
  41. rem/schemas/agents/test_orchestrator.yaml +42 -0
  42. rem/schemas/agents/test_structured_output.yaml +52 -0
  43. rem/services/content/providers.py +151 -49
  44. rem/services/content/service.py +18 -5
  45. rem/services/embeddings/worker.py +26 -12
  46. rem/services/postgres/__init__.py +28 -3
  47. rem/services/postgres/diff_service.py +57 -5
  48. rem/services/postgres/programmable_diff_service.py +635 -0
  49. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  50. rem/services/postgres/register_type.py +11 -10
  51. rem/services/postgres/repository.py +39 -28
  52. rem/services/postgres/schema_generator.py +5 -5
  53. rem/services/postgres/sql_builder.py +6 -5
  54. rem/services/rem/README.md +4 -3
  55. rem/services/rem/parser.py +7 -10
  56. rem/services/rem/service.py +47 -0
  57. rem/services/session/__init__.py +8 -1
  58. rem/services/session/compression.py +47 -5
  59. rem/services/session/pydantic_messages.py +310 -0
  60. rem/services/session/reload.py +2 -1
  61. rem/settings.py +92 -7
  62. rem/sql/migrations/001_install.sql +125 -7
  63. rem/sql/migrations/002_install_models.sql +159 -149
  64. rem/sql/migrations/004_cache_system.sql +10 -276
  65. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  66. rem/utils/schema_loader.py +180 -120
  67. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/METADATA +7 -6
  68. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/RECORD +70 -61
  69. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
  70. {remdb-0.3.180.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
@@ -94,14 +94,14 @@ def generate_table_schema(
94
94
  # Always add id as primary key
95
95
  columns.append("id UUID PRIMARY KEY DEFAULT uuid_generate_v4()")
96
96
 
97
- # Add tenant_id if tenant scoped
97
+ # Add tenant_id if tenant scoped (nullable - NULL means public/shared)
98
98
  if tenant_scoped:
99
- columns.append("tenant_id VARCHAR(100) NOT NULL")
100
- indexes.append(f"CREATE INDEX idx_{table_name}_tenant ON {table_name} (tenant_id);")
99
+ columns.append("tenant_id VARCHAR(100)")
100
+ indexes.append(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_tenant ON {table_name} (tenant_id);")
101
101
 
102
102
  # Add user_id (owner field)
103
103
  columns.append("user_id VARCHAR(256)")
104
- indexes.append(f"CREATE INDEX idx_{table_name}_user ON {table_name} (user_id);")
104
+ indexes.append(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_user ON {table_name} (user_id);")
105
105
 
106
106
  # Process Pydantic fields (skip system fields)
107
107
  for field_name, field_info in model.model_fields.items():
@@ -125,19 +125,19 @@ def generate_table_schema(
125
125
  # Add graph_edges JSONB field
126
126
  columns.append("graph_edges JSONB DEFAULT '[]'::jsonb")
127
127
  indexes.append(
128
- f"CREATE INDEX idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
128
+ f"CREATE INDEX IF NOT EXISTS idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
129
129
  )
130
130
 
131
131
  # Add metadata JSONB field
132
132
  columns.append("metadata JSONB DEFAULT '{}'::jsonb")
133
133
  indexes.append(
134
- f"CREATE INDEX idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
134
+ f"CREATE INDEX IF NOT EXISTS idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
135
135
  )
136
136
 
137
137
  # Add tags field (TEXT[] for list[str])
138
138
  columns.append("tags TEXT[] DEFAULT ARRAY[]::TEXT[]")
139
139
  indexes.append(
140
- f"CREATE INDEX idx_{table_name}_tags ON {table_name} USING GIN (tags);"
140
+ f"CREATE INDEX IF NOT EXISTS idx_{table_name}_tags ON {table_name} USING GIN (tags);"
141
141
  )
142
142
 
143
143
  # Generate CREATE TABLE statement
@@ -202,10 +202,10 @@ CREATE TABLE IF NOT EXISTS {embeddings_table} (
202
202
  );
203
203
 
204
204
  -- Index for entity lookup (get all embeddings for entity)
205
- CREATE INDEX idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
205
+ CREATE INDEX IF NOT EXISTS idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
206
206
 
207
207
  -- Index for field + provider lookup
208
- CREATE INDEX idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
208
+ CREATE INDEX IF NOT EXISTS idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
209
209
 
210
210
  -- HNSW index for vector similarity search (created in background)
211
211
  -- Note: This will be created by background thread after data load
@@ -258,6 +258,7 @@ BEGIN
258
258
  RETURN OLD;
259
259
  ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
260
260
  -- Upsert to KV_STORE (O(1) lookup by entity_key)
261
+ -- tenant_id can be NULL (meaning public/shared data)
261
262
  INSERT INTO kv_store (
262
263
  entity_key,
263
264
  entity_type,
@@ -277,7 +278,7 @@ BEGIN
277
278
  COALESCE(NEW.graph_edges, '[]'::jsonb),
278
279
  CURRENT_TIMESTAMP
279
280
  )
280
- ON CONFLICT (tenant_id, entity_key)
281
+ ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
281
282
  DO UPDATE SET
282
283
  entity_id = EXCLUDED.entity_id,
283
284
  user_id = EXCLUDED.user_id,
@@ -33,15 +33,15 @@ if TYPE_CHECKING:
33
33
 
34
34
  def get_postgres_service() -> "PostgresService | None":
35
35
  """
36
- Get PostgresService instance with connection string from settings.
36
+ Get PostgresService singleton from parent module.
37
37
 
38
- Returns None if Postgres is disabled.
38
+ Uses late import to avoid circular import issues.
39
+ Previously had a separate _postgres_instance here which caused
40
+ "pool not connected" errors due to duplicate connection pools.
39
41
  """
40
- if not settings.postgres.enabled:
41
- return None
42
-
43
- from .service import PostgresService
44
- return PostgresService()
42
+ # Late import to avoid circular import (repository.py imported by __init__.py)
43
+ from rem.services.postgres import get_postgres_service as _get_singleton
44
+ return _get_singleton()
45
45
 
46
46
  T = TypeVar("T", bound=BaseModel)
47
47
 
@@ -74,7 +74,7 @@ class Repository(Generic[T]):
74
74
  self,
75
75
  records: T | list[T],
76
76
  embeddable_fields: list[str] | None = None,
77
- generate_embeddings: bool = False,
77
+ generate_embeddings: bool = True,
78
78
  ) -> T | list[T]:
79
79
  """
80
80
  Upsert single record or list of records (create or update on ID conflict).
@@ -84,8 +84,9 @@ class Repository(Generic[T]):
84
84
 
85
85
  Args:
86
86
  records: Single model instance or list of model instances
87
- embeddable_fields: Optional list of fields to generate embeddings for
88
- generate_embeddings: Whether to queue embedding generation tasks
87
+ embeddable_fields: Optional list of fields to generate embeddings for.
88
+ If None, auto-detects 'content' field if present.
89
+ generate_embeddings: Whether to queue embedding generation tasks (default: True)
89
90
 
90
91
  Returns:
91
92
  Single record or list of records with generated IDs (matches input type)
@@ -118,25 +119,35 @@ class Repository(Generic[T]):
118
119
  record.id = row["id"] # type: ignore[attr-defined]
119
120
 
120
121
  # Queue embedding generation if requested and worker is available
121
- if generate_embeddings and embeddable_fields and self.db.embedding_worker:
122
+ if generate_embeddings and self.db.embedding_worker:
122
123
  from rem.services.embeddings import EmbeddingTask
123
-
124
- for record in records_list:
125
- for field_name in embeddable_fields:
126
- content = getattr(record, field_name, None)
127
- if content and isinstance(content, str):
128
- task = EmbeddingTask(
129
- task_id=f"{record.id}-{field_name}", # type: ignore[attr-defined]
130
- entity_id=str(record.id), # type: ignore[attr-defined]
131
- table_name=self.table_name,
132
- field_name=field_name,
133
- content=content,
134
- provider="openai", # Default provider
135
- model="text-embedding-3-small", # Default model
136
- )
137
- await self.db.embedding_worker.queue_task(task)
138
-
139
- logger.debug(f"Queued {len(records_list) * len(embeddable_fields)} embedding tasks")
124
+ from .register_type import should_embed_field
125
+
126
+ # Auto-detect embeddable fields if not specified
127
+ if embeddable_fields is None:
128
+ embeddable_fields = [
129
+ field_name
130
+ for field_name, field_info in self.model_class.model_fields.items()
131
+ if should_embed_field(field_name, field_info)
132
+ ]
133
+
134
+ if embeddable_fields:
135
+ for record in records_list:
136
+ for field_name in embeddable_fields:
137
+ content = getattr(record, field_name, None)
138
+ if content and isinstance(content, str):
139
+ task = EmbeddingTask(
140
+ task_id=f"{record.id}-{field_name}", # type: ignore[attr-defined]
141
+ entity_id=str(record.id), # type: ignore[attr-defined]
142
+ table_name=self.table_name,
143
+ field_name=field_name,
144
+ content=content,
145
+ provider="openai", # Default provider
146
+ model="text-embedding-3-small", # Default model
147
+ )
148
+ await self.db.embedding_worker.queue_task(task)
149
+
150
+ logger.debug(f"Queued {len(records_list) * len(embeddable_fields)} embedding tasks")
140
151
 
141
152
  # Return single item or list to match input type
142
153
  return records_list[0] if is_single else records_list
@@ -351,10 +351,10 @@ class SchemaGenerator:
351
351
 
352
352
  Priority:
353
353
  1. Field with json_schema_extra={\"entity_key\": True}
354
- 2. Field named \"name\"
354
+ 2. Field named \"name\" (human-readable identifier)
355
355
  3. Field named \"key\"
356
- 4. Field named \"label\"
357
- 5. First string field
356
+ 4. Field named \"uri\"
357
+ 5. Field named \"id\" (fallback)
358
358
 
359
359
  Args:
360
360
  model: Pydantic model class
@@ -369,9 +369,9 @@ class SchemaGenerator:
369
369
  if json_extra.get("entity_key"):
370
370
  return field_name
371
371
 
372
- # Check for key fields in priority order: id -> uri -> key -> name
372
+ # Check for key fields in priority order: name -> key -> uri -> id
373
373
  # (matching sql_builder.get_entity_key convention)
374
- for candidate in ["id", "uri", "key", "name"]:
374
+ for candidate in ["name", "key", "uri", "id"]:
375
375
  if candidate in model.model_fields:
376
376
  return candidate
377
377
 
@@ -35,10 +35,11 @@ def get_natural_key(model: BaseModel) -> str | None:
35
35
 
36
36
  def get_entity_key(model: BaseModel) -> str:
37
37
  """
38
- Get entity key for KV store following precedence: id -> uri -> key -> name.
38
+ Get entity key for KV store following precedence: name -> key -> uri -> id.
39
39
 
40
- For KV store lookups, we prefer globally unique identifiers first (id),
41
- then natural keys (uri/key/name). Always returns a value (id as fallback).
40
+ For KV store lookups, we prefer human-readable identifiers first (name/key),
41
+ then URIs, with id as the fallback. This allows users to lookup entities
42
+ by their natural names like "panic-disorder" instead of UUIDs.
42
43
 
43
44
  Args:
44
45
  model: Pydantic model instance
@@ -46,13 +47,13 @@ def get_entity_key(model: BaseModel) -> str:
46
47
  Returns:
47
48
  Entity key string (guaranteed to exist)
48
49
  """
49
- for field in ["id", "uri", "key", "name"]:
50
+ for field in ["name", "key", "uri", "id"]:
50
51
  if hasattr(model, field):
51
52
  value = getattr(model, field)
52
53
  if value:
53
54
  return str(value)
54
55
  # Should never reach here since id always exists in CoreModel
55
- raise ValueError(f"Model {type(model)} has no id, uri, key, or name field")
56
+ raise ValueError(f"Model {type(model)} has no name, key, uri, or id field")
56
57
 
57
58
 
58
59
  def generate_deterministic_id(user_id: str | None, entity_key: str) -> uuid.UUID:
@@ -40,15 +40,16 @@ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
40
40
  available : Stage 1+
41
41
  example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
42
42
 
43
- SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
43
+ SearchQuery ::= SEARCH <text:string> [IN|TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
44
44
  text : Semantic query text
45
- table : Target table (default: "resources")
45
+ table : Target table (default: "resources"). Use IN or TABLE keyword.
46
46
  clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
47
47
  limit : Max results (default: 10)
48
48
  performance : Indexed (pgvector)
49
49
  available : Stage 3+
50
50
  examples :
51
- - SEARCH "database migration" TABLE resources LIMIT 10
51
+ - SEARCH "database migration" IN resources LIMIT 10
52
+ - SEARCH "parcel delivery" IN ontologies
52
53
  - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
53
54
  - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
54
55
  - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
@@ -64,7 +64,7 @@ class RemQueryParser:
64
64
  token_upper = token.upper()
65
65
 
66
66
  # Handle REM keywords that take a value
67
- if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
67
+ if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH", "TABLE", "IN", "WHERE"):
68
68
  if i + 1 < len(tokens):
69
69
  keyword_map = {
70
70
  "LIMIT": "limit",
@@ -73,6 +73,9 @@ class RemQueryParser:
73
73
  "TYPE": "edge_types",
74
74
  "FROM": "initial_query",
75
75
  "WITH": "initial_query",
76
+ "TABLE": "table_name",
77
+ "IN": "table_name", # IN is alias for TABLE
78
+ "WHERE": "where_clause",
76
79
  }
77
80
  key = keyword_map[token_upper]
78
81
  value = tokens[i + 1]
@@ -161,15 +164,9 @@ class RemQueryParser:
161
164
  params["query_text"] = combined_value
162
165
 
163
166
  elif query_type == QueryType.SEARCH:
164
- # SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
165
- # First positional arg is table name, rest is query text
166
- if len(positional_args) >= 2:
167
- params["table_name"] = positional_args[0]
168
- params["query_text"] = " ".join(positional_args[1:])
169
- elif len(positional_args) == 1:
170
- # Could be table name or query text - assume query text if no table
171
- params["query_text"] = positional_args[0]
172
- # If no positional args, params stays empty
167
+ # SEARCH expects: SEARCH <text> [TABLE <table>] [WHERE <clause>] [LIMIT n]
168
+ # All positional args are query_text, TABLE/WHERE/LIMIT are handled as keywords
169
+ params["query_text"] = combined_value
173
170
 
174
171
  elif query_type == QueryType.TRAVERSE:
175
172
  params["initial_query"] = combined_value
@@ -478,6 +478,53 @@ class RemService:
478
478
  parser = RemQueryParser()
479
479
  return parser.parse(query_string)
480
480
 
481
+ async def execute_query_string(
482
+ self, query_string: str, user_id: str | None = None
483
+ ) -> dict[str, Any]:
484
+ """
485
+ Execute a REM dialect query string directly.
486
+
487
+ This is the unified entry point for executing REM queries from both
488
+ the CLI and API. It handles parsing the query string, creating the
489
+ RemQuery model, and executing it.
490
+
491
+ Args:
492
+ query_string: REM dialect query (e.g., 'LOOKUP "Sarah Chen"',
493
+ 'SEARCH resources "API design"', 'SELECT * FROM users')
494
+ user_id: Optional user ID for query isolation
495
+
496
+ Returns:
497
+ Dict with query results and metadata:
498
+ - query_type: The type of query executed
499
+ - results: List of result rows
500
+ - count: Number of results
501
+ - Additional fields depending on query type
502
+
503
+ Raises:
504
+ ValueError: If the query string is invalid
505
+ QueryExecutionError: If query execution fails
506
+
507
+ Example:
508
+ >>> result = await rem_service.execute_query_string(
509
+ ... 'LOOKUP "Sarah Chen"',
510
+ ... user_id="user-123"
511
+ ... )
512
+ >>> print(result["count"])
513
+ 1
514
+ """
515
+ # Parse the query string into type and parameters
516
+ query_type, parameters = self._parse_query_string(query_string)
517
+
518
+ # Create and validate the RemQuery model
519
+ rem_query = RemQuery.model_validate({
520
+ "query_type": query_type,
521
+ "parameters": parameters,
522
+ "user_id": user_id,
523
+ })
524
+
525
+ # Execute and return results
526
+ return await self.execute_query(rem_query)
527
+
481
528
  async def ask_rem(
482
529
  self, natural_query: str, tenant_id: str, llm_model: str | None = None, plan_mode: bool = False
483
530
  ) -> dict[str, Any]:
@@ -1,6 +1,13 @@
1
1
  """Session management services for conversation persistence and compression."""
2
2
 
3
3
  from .compression import MessageCompressor, SessionMessageStore
4
+ from .pydantic_messages import audit_session_history, session_to_pydantic_messages
4
5
  from .reload import reload_session
5
6
 
6
- __all__ = ["MessageCompressor", "SessionMessageStore", "reload_session"]
7
+ __all__ = [
8
+ "MessageCompressor",
9
+ "SessionMessageStore",
10
+ "audit_session_history",
11
+ "reload_session",
12
+ "session_to_pydantic_messages",
13
+ ]
@@ -65,7 +65,7 @@ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
65
65
  logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
66
66
  return truncated
67
67
 
68
- from rem.models.entities import Message
68
+ from rem.models.entities import Message, Session
69
69
  from rem.services.postgres import PostgresService, Repository
70
70
  from rem.settings import settings
71
71
 
@@ -96,7 +96,7 @@ class MessageCompressor:
96
96
  Returns:
97
97
  Compressed message dict
98
98
  """
99
- content = message.get("content", "")
99
+ content = message.get("content") or ""
100
100
 
101
101
  # Don't compress short messages or system messages
102
102
  if (
@@ -177,6 +177,39 @@ class SessionMessageStore:
177
177
  self.user_id = user_id
178
178
  self.compressor = compressor or MessageCompressor()
179
179
  self.repo = Repository(Message)
180
+ self._session_repo = Repository(Session, table_name="sessions")
181
+
182
+ async def _ensure_session_exists(
183
+ self,
184
+ session_id: str,
185
+ user_id: str | None = None,
186
+ ) -> None:
187
+ """
188
+ Ensure session exists, creating it if necessary.
189
+
190
+ Args:
191
+ session_id: Session UUID from X-Session-Id header
192
+ user_id: Optional user identifier
193
+ """
194
+ try:
195
+ # Check if session already exists by UUID
196
+ existing = await self._session_repo.get_by_id(session_id)
197
+ if existing:
198
+ return # Session already exists
199
+
200
+ # Create new session with the provided UUID as id
201
+ session = Session(
202
+ id=session_id, # Use the provided UUID as session id
203
+ name=session_id, # Default name to UUID, can be updated later
204
+ user_id=user_id or self.user_id,
205
+ tenant_id=self.user_id, # tenant_id set to user_id for scoping
206
+ )
207
+ await self._session_repo.upsert(session)
208
+ logger.info(f"Created session {session_id} for user {user_id or self.user_id}")
209
+
210
+ except Exception as e:
211
+ # Log but don't fail - session creation is best-effort
212
+ logger.warning(f"Failed to ensure session exists: {e}")
180
213
 
181
214
  async def store_message(
182
215
  self,
@@ -209,7 +242,7 @@ class SessionMessageStore:
209
242
  # Use pre-generated id from message dict if available (for frontend feedback)
210
243
  msg = Message(
211
244
  id=message.get("id"), # Use pre-generated ID if provided
212
- content=message.get("content", ""),
245
+ content=message.get("content") or "",
213
246
  message_type=message.get("role", "assistant"),
214
247
  session_id=session_id,
215
248
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
@@ -283,8 +316,10 @@ class SessionMessageStore:
283
316
  """
284
317
  Store all session messages and return compressed versions.
285
318
 
319
+ Ensures session exists before storing messages.
320
+
286
321
  Args:
287
- session_id: Session identifier
322
+ session_id: Session UUID
288
323
  messages: List of messages to store
289
324
  user_id: Optional user identifier
290
325
  compress: Whether to compress messages (default: True)
@@ -296,10 +331,13 @@ class SessionMessageStore:
296
331
  logger.debug("Postgres disabled, returning messages uncompressed")
297
332
  return messages
298
333
 
334
+ # Ensure session exists before storing messages
335
+ await self._ensure_session_exists(session_id, user_id)
336
+
299
337
  compressed_messages = []
300
338
 
301
339
  for idx, message in enumerate(messages):
302
- content = message.get("content", "")
340
+ content = message.get("content") or ""
303
341
 
304
342
  # Only store and compress long assistant responses
305
343
  if (
@@ -330,6 +368,8 @@ class SessionMessageStore:
330
368
  }
331
369
 
332
370
  # For tool messages, include tool call details in metadata
371
+ # Note: tool_arguments is stored only when provided (parent tool calls)
372
+ # For child tool calls (e.g., register_metadata), args are in content as JSON
333
373
  if message.get("role") == "tool":
334
374
  if message.get("tool_call_id"):
335
375
  msg_metadata["tool_call_id"] = message.get("tool_call_id")
@@ -398,6 +438,8 @@ class SessionMessageStore:
398
438
  }
399
439
 
400
440
  # For tool messages, reconstruct tool call metadata
441
+ # Note: tool_arguments may be in metadata (parent calls) or parsed from
442
+ # content (child calls like register_metadata) by pydantic_messages.py
401
443
  if role == "tool" and msg.metadata:
402
444
  if msg.metadata.get("tool_call_id"):
403
445
  msg_dict["tool_call_id"] = msg.metadata["tool_call_id"]