remdb 0.3.200__py3-none-any.whl → 0.3.226__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

@@ -1,12 +1,13 @@
1
1
  """Session management services for conversation persistence and compression."""
2
2
 
3
3
  from .compression import MessageCompressor, SessionMessageStore
4
- from .pydantic_messages import session_to_pydantic_messages
4
+ from .pydantic_messages import audit_session_history, session_to_pydantic_messages
5
5
  from .reload import reload_session
6
6
 
7
7
  __all__ = [
8
8
  "MessageCompressor",
9
9
  "SessionMessageStore",
10
+ "audit_session_history",
10
11
  "reload_session",
11
12
  "session_to_pydantic_messages",
12
13
  ]
@@ -65,7 +65,7 @@ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
65
65
  logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
66
66
  return truncated
67
67
 
68
- from rem.models.entities import Message
68
+ from rem.models.entities import Message, Session
69
69
  from rem.services.postgres import PostgresService, Repository
70
70
  from rem.settings import settings
71
71
 
@@ -177,6 +177,39 @@ class SessionMessageStore:
177
177
  self.user_id = user_id
178
178
  self.compressor = compressor or MessageCompressor()
179
179
  self.repo = Repository(Message)
180
+ self._session_repo = Repository(Session, table_name="sessions")
181
+
182
+ async def _ensure_session_exists(
183
+ self,
184
+ session_id: str,
185
+ user_id: str | None = None,
186
+ ) -> None:
187
+ """
188
+ Ensure session exists, creating it if necessary.
189
+
190
+ Args:
191
+ session_id: Session UUID from X-Session-Id header
192
+ user_id: Optional user identifier
193
+ """
194
+ try:
195
+ # Check if session already exists by UUID
196
+ existing = await self._session_repo.get_by_id(session_id)
197
+ if existing:
198
+ return # Session already exists
199
+
200
+ # Create new session with the provided UUID as id
201
+ session = Session(
202
+ id=session_id, # Use the provided UUID as session id
203
+ name=session_id, # Default name to UUID, can be updated later
204
+ user_id=user_id or self.user_id,
205
+ tenant_id=self.user_id, # tenant_id set to user_id for scoping
206
+ )
207
+ await self._session_repo.upsert(session)
208
+ logger.info(f"Created session {session_id} for user {user_id or self.user_id}")
209
+
210
+ except Exception as e:
211
+ # Log but don't fail - session creation is best-effort
212
+ logger.warning(f"Failed to ensure session exists: {e}")
180
213
 
181
214
  async def store_message(
182
215
  self,
@@ -283,8 +316,10 @@ class SessionMessageStore:
283
316
  """
284
317
  Store all session messages and return compressed versions.
285
318
 
319
+ Ensures session exists before storing messages.
320
+
286
321
  Args:
287
- session_id: Session identifier
322
+ session_id: Session UUID
288
323
  messages: List of messages to store
289
324
  user_id: Optional user identifier
290
325
  compress: Whether to compress messages (default: True)
@@ -296,6 +331,9 @@ class SessionMessageStore:
296
331
  logger.debug("Postgres disabled, returning messages uncompressed")
297
332
  return messages
298
333
 
334
+ # Ensure session exists before storing messages
335
+ await self._ensure_session_exists(session_id, user_id)
336
+
299
337
  compressed_messages = []
300
338
 
301
339
  for idx, message in enumerate(messages):
@@ -208,3 +208,69 @@ def session_to_pydantic_messages(
208
208
 
209
209
  logger.debug(f"Converted {len(session_history)} stored messages to {len(messages)} pydantic-ai messages")
210
210
  return messages
211
+
212
+
213
+ def audit_session_history(
214
+ session_id: str,
215
+ agent_name: str,
216
+ prompt: str,
217
+ raw_session_history: list[dict[str, Any]],
218
+ pydantic_messages_count: int,
219
+ ) -> None:
220
+ """
221
+ Dump session history to a YAML file for debugging.
222
+
223
+ Only runs when DEBUG__AUDIT_SESSION=true. Writes to DEBUG__AUDIT_DIR (default /tmp).
224
+ Appends to the same file for a session, so all agent invocations are in one place.
225
+
226
+ Args:
227
+ session_id: The session identifier
228
+ agent_name: Name of the agent being invoked
229
+ prompt: The prompt being sent to the agent
230
+ raw_session_history: The raw session messages from the database
231
+ pydantic_messages_count: Count of converted pydantic-ai messages
232
+ """
233
+ from ...settings import settings
234
+
235
+ if not settings.debug.audit_session:
236
+ return
237
+
238
+ try:
239
+ import yaml
240
+ from pathlib import Path
241
+ from ...utils.date_utils import utc_now, to_iso
242
+
243
+ audit_dir = Path(settings.debug.audit_dir)
244
+ audit_dir.mkdir(parents=True, exist_ok=True)
245
+ audit_file = audit_dir / f"{session_id}.yaml"
246
+
247
+ # Create entry for this agent invocation
248
+ entry = {
249
+ "timestamp": to_iso(utc_now()),
250
+ "agent_name": agent_name,
251
+ "prompt": prompt,
252
+ "raw_history_count": len(raw_session_history),
253
+ "pydantic_messages_count": pydantic_messages_count,
254
+ "raw_session_history": raw_session_history,
255
+ }
256
+
257
+ # Load existing data or create new
258
+ existing_data: dict[str, Any] = {"session_id": session_id, "invocations": []}
259
+ if audit_file.exists():
260
+ with open(audit_file) as f:
261
+ loaded = yaml.safe_load(f)
262
+ if loaded:
263
+ # Ensure session_id is always present (backfill if missing)
264
+ existing_data = {
265
+ "session_id": loaded.get("session_id", session_id),
266
+ "invocations": loaded.get("invocations", []),
267
+ }
268
+
269
+ # Append this invocation
270
+ existing_data["invocations"].append(entry)
271
+
272
+ with open(audit_file, "w") as f:
273
+ yaml.dump(existing_data, f, default_flow_style=False, allow_unicode=True)
274
+ logger.info(f"DEBUG: Session audit updated: {audit_file}")
275
+ except Exception as e:
276
+ logger.warning(f"DEBUG: Failed to dump session audit: {e}")
rem/settings.py CHANGED
@@ -1651,6 +1651,33 @@ class EmailSettings(BaseSettings):
1651
1651
  return kwargs
1652
1652
 
1653
1653
 
1654
+ class DebugSettings(BaseSettings):
1655
+ """
1656
+ Debug settings for development and troubleshooting.
1657
+
1658
+ Environment variables:
1659
+ DEBUG__AUDIT_SESSION - Dump session history to /tmp/{session_id}.yaml
1660
+ DEBUG__AUDIT_DIR - Directory for session audit files (default: /tmp)
1661
+ """
1662
+
1663
+ model_config = SettingsConfigDict(
1664
+ env_prefix="DEBUG__",
1665
+ env_file=".env",
1666
+ env_file_encoding="utf-8",
1667
+ extra="ignore",
1668
+ )
1669
+
1670
+ audit_session: bool = Field(
1671
+ default=False,
1672
+ description="When true, dump full session history to audit files for debugging",
1673
+ )
1674
+
1675
+ audit_dir: str = Field(
1676
+ default="/tmp",
1677
+ description="Directory for session audit files",
1678
+ )
1679
+
1680
+
1654
1681
  class TestSettings(BaseSettings):
1655
1682
  """
1656
1683
  Test environment settings.
@@ -1767,6 +1794,7 @@ class Settings(BaseSettings):
1767
1794
  schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
1768
1795
  email: EmailSettings = Field(default_factory=EmailSettings)
1769
1796
  test: TestSettings = Field(default_factory=TestSettings)
1797
+ debug: DebugSettings = Field(default_factory=DebugSettings)
1770
1798
 
1771
1799
 
1772
1800
  # Auto-load .env file from current directory if it exists
@@ -822,6 +822,7 @@ COMMENT ON FUNCTION fn_get_shared_messages IS
822
822
  -- Function to list sessions with user details (name, email) for admin views
823
823
 
824
824
  -- List sessions with user info, CTE pagination
825
+ -- Note: messages.session_id stores the session UUID (sessions.id)
825
826
  CREATE OR REPLACE FUNCTION fn_list_sessions_with_user(
826
827
  p_user_id VARCHAR(256) DEFAULT NULL, -- Filter by user_id (NULL = all users, admin only)
827
828
  p_user_name VARCHAR(256) DEFAULT NULL, -- Filter by user name (partial match, admin only)
@@ -847,7 +848,15 @@ RETURNS TABLE(
847
848
  ) AS $$
848
849
  BEGIN
849
850
  RETURN QUERY
850
- WITH filtered_sessions AS (
851
+ WITH session_msg_counts AS (
852
+ -- Count messages per session (joining on session UUID)
853
+ SELECT
854
+ m.session_id,
855
+ COUNT(*)::INTEGER as actual_message_count
856
+ FROM messages m
857
+ GROUP BY m.session_id
858
+ ),
859
+ filtered_sessions AS (
851
860
  SELECT
852
861
  s.id,
853
862
  s.name,
@@ -856,13 +865,14 @@ BEGIN
856
865
  s.user_id,
857
866
  COALESCE(u.name, s.user_id)::VARCHAR(256) AS user_name,
858
867
  u.email::VARCHAR(256) AS user_email,
859
- s.message_count,
868
+ COALESCE(mc.actual_message_count, 0) AS message_count,
860
869
  s.total_tokens,
861
870
  s.created_at,
862
871
  s.updated_at,
863
872
  s.metadata
864
873
  FROM sessions s
865
874
  LEFT JOIN users u ON u.id::text = s.user_id
875
+ LEFT JOIN session_msg_counts mc ON mc.session_id = s.id::text
866
876
  WHERE s.deleted_at IS NULL
867
877
  AND (p_user_id IS NULL OR s.user_id = p_user_id)
868
878
  AND (p_user_name IS NULL OR u.name ILIKE '%' || p_user_name || '%')
@@ -895,7 +905,7 @@ END;
895
905
  $$ LANGUAGE plpgsql STABLE;
896
906
 
897
907
  COMMENT ON FUNCTION fn_list_sessions_with_user IS
898
- 'List sessions with user details (name, email). Supports filtering by user_id, user_name, user_email, and mode.';
908
+ 'List sessions with user details and computed message counts. Joins messages on session name.';
899
909
 
900
910
  -- ============================================================================
901
911
  -- RECORD INSTALLATION
@@ -1,7 +1,7 @@
1
1
  -- REM Model Schema (install_models.sql)
2
2
  -- Generated from Pydantic models
3
3
  -- Source: model registry
4
- -- Generated at: 2025-12-15T09:58:08.880060
4
+ -- Generated at: 2025-12-22T17:34:54.187339
5
5
  --
6
6
  -- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
7
7
  --
@@ -2088,32 +2088,30 @@ Domain-specific knowledge - either agent-extracted or direct-loaded.
2088
2088
  tags=["cv", "engineering"]
2089
2089
  )
2090
2090
 
2091
- # Direct-loaded: Medical knowledge base from git
2092
- disorder_ontology = Ontology(
2093
- name="panic-disorder",
2094
- uri="git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md",
2095
- content="# Panic Disorder\n\nPanic disorder is characterized by...",
2091
+ # Direct-loaded: Knowledge base from git
2092
+ api_docs = Ontology(
2093
+ name="rest-api-guide",
2094
+ uri="git://example-org/docs/api/rest-api-guide.md",
2095
+ content="# REST API Guide\n\nThis guide covers RESTful API design...",
2096
2096
  extracted_data={
2097
- "type": "disorder",
2098
- "category": "anxiety",
2099
- "icd10": "F41.0",
2100
- "dsm5_criteria": ["A", "B", "C", "D"],
2097
+ "type": "documentation",
2098
+ "category": "api",
2099
+ "version": "2.0",
2101
2100
  },
2102
- tags=["disorder", "anxiety", "dsm5"]
2101
+ tags=["api", "rest", "documentation"]
2103
2102
  )
2104
2103
 
2105
- # Direct-loaded: Clinical procedure from git
2106
- scid_node = Ontology(
2107
- name="scid-5-f1",
2108
- uri="git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md",
2109
- content="# scid-5-f1: Panic Attack Screening\n\n...",
2104
+ # Direct-loaded: Technical spec from git
2105
+ config_spec = Ontology(
2106
+ name="config-schema",
2107
+ uri="git://example-org/docs/specs/config-schema.md",
2108
+ content="# Configuration Schema\n\nThis document defines...",
2110
2109
  extracted_data={
2111
- "type": "procedure",
2112
- "module": "F",
2113
- "section": "Panic Disorder",
2114
- "dsm5_criterion": "Panic Attack Specifier",
2110
+ "type": "specification",
2111
+ "format": "yaml",
2112
+ "version": "1.0",
2115
2113
  },
2116
- tags=["scid-5", "procedure", "anxiety"]
2114
+ tags=["config", "schema", "specification"]
2117
2115
  )
2118
2116
 
2119
2117
 
@@ -2227,7 +2225,7 @@ This schema includes the `search_rem` tool which supports:
2227
2225
  - **Optional**
2228
2226
 
2229
2227
  ',
2230
- '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Medical knowledge base from git\n disorder_ontology = Ontology(\n name=\"panic-disorder\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/disorders/anxiety/panic-disorder.md\",\n content=\"# Panic Disorder\\n\\nPanic disorder is characterized by...\",\n extracted_data={\n \"type\": \"disorder\",\n \"category\": \"anxiety\",\n \"icd10\": \"F41.0\",\n \"dsm5_criteria\": [\"A\", \"B\", \"C\", \"D\"],\n },\n tags=[\"disorder\", \"anxiety\", \"dsm5\"]\n )\n\n # Direct-loaded: Clinical procedure from git\n scid_node = Ontology(\n name=\"scid-5-f1\",\n uri=\"git://bwolfson-siggie/Siggy-MVP/ontology/procedures/scid-5/module-f/scid-5-f1.md\",\n content=\"# scid-5-f1: Panic Attack Screening\\n\\n...\",\n extracted_data={\n \"type\": \"procedure\",\n \"module\": \"F\",\n \"section\": \"Panic Disorder\",\n \"dsm5_criterion\": \"Panic Attack Specifier\",\n },\n tags=[\"scid-5\", \"procedure\", \"anxiety\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2228
+ '{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Knowledge base from git\n api_docs = Ontology(\n name=\"rest-api-guide\",\n uri=\"git://example-org/docs/api/rest-api-guide.md\",\n content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n extracted_data={\n \"type\": \"documentation\",\n \"category\": \"api\",\n \"version\": \"2.0\",\n },\n tags=[\"api\", \"rest\", \"documentation\"]\n )\n\n # Direct-loaded: Technical spec from git\n config_spec = Ontology(\n name=\"config-schema\",\n uri=\"git://example-org/docs/specs/config-schema.md\",\n content=\"# Configuration Schema\\n\\nThis document defines...\",\n extracted_data={\n \"type\": \"specification\",\n \"format\": \"yaml\",\n \"version\": \"1.0\",\n },\n tags=[\"config\", \"schema\", \"specification\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
2231
2229
  'entity',
2232
2230
  '{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
2233
2231
  )
@@ -0,0 +1,45 @@
1
+ -- Migration: Update messages.session_id from session name to session UUID
2
+ -- This fixes the bug where messages were stored with session.name instead of session.id
3
+ --
4
+ -- Run this migration AFTER deploying the code fixes in remdb 0.3.204+
5
+ -- The code now correctly stores session.id (UUID), but existing data needs migration.
6
+
7
+ BEGIN;
8
+
9
+ -- First, count how many messages need to be updated
10
+ DO $$
11
+ DECLARE
12
+ count_to_migrate INTEGER;
13
+ BEGIN
14
+ SELECT COUNT(*) INTO count_to_migrate
15
+ FROM messages m
16
+ JOIN sessions s ON m.session_id = s.name
17
+ WHERE m.session_id != s.id::text;
18
+
19
+ RAISE NOTICE 'Messages needing migration: %', count_to_migrate;
20
+ END $$;
21
+
22
+ -- Update messages.session_id from session name to session UUID
23
+ UPDATE messages m
24
+ SET session_id = s.id::text
25
+ FROM sessions s
26
+ WHERE m.session_id = s.name
27
+ AND m.session_id != s.id::text;
28
+
29
+ -- Report how many were updated
30
+ DO $$
31
+ DECLARE
32
+ updated_count INTEGER;
33
+ BEGIN
34
+ GET DIAGNOSTICS updated_count = ROW_COUNT;
35
+ RAISE NOTICE 'Messages updated: %', updated_count;
36
+ END $$;
37
+
38
+ COMMIT;
39
+
40
+ -- Verify the fix - all messages should now join by UUID
41
+ SELECT
42
+ 'Messages matching sessions by UUID' as status,
43
+ COUNT(*) as count
44
+ FROM messages m
45
+ JOIN sessions s ON m.session_id = s.id::text;
@@ -147,15 +147,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
147
147
  try:
148
148
  await db.connect()
149
149
 
150
- query = """
151
- SELECT spec FROM schemas
152
- WHERE LOWER(name) = LOWER($1)
153
- AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
154
- LIMIT 1
155
- """
156
- logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
157
-
158
- row = await db.fetchrow(query, schema_name, user_id)
150
+ # Query for public schemas (user_id IS NULL) and optionally user-specific
151
+ if user_id:
152
+ query = """
153
+ SELECT spec FROM schemas
154
+ WHERE LOWER(name) = LOWER($1)
155
+ AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
156
+ LIMIT 1
157
+ """
158
+ row = await db.fetchrow(query, schema_name, user_id)
159
+ else:
160
+ # No user_id - only search public schemas
161
+ query = """
162
+ SELECT spec FROM schemas
163
+ WHERE LOWER(name) = LOWER($1)
164
+ AND (user_id = 'system' OR user_id IS NULL)
165
+ LIMIT 1
166
+ """
167
+ row = await db.fetchrow(query, schema_name)
168
+ logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
159
169
 
160
170
  if row:
161
171
  spec = row.get("spec")
@@ -193,17 +203,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
193
203
  try:
194
204
  await db.connect()
195
205
 
196
- # Query schemas table directly by name
197
- # Note: Schema name lookup is case-insensitive for user convenience
198
- query = """
199
- SELECT spec FROM schemas
200
- WHERE LOWER(name) = LOWER($1)
201
- AND (user_id = $2 OR user_id = 'system')
202
- LIMIT 1
203
- """
204
- logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id}")
205
-
206
- row = await db.fetchrow(query, schema_name, user_id)
206
+ # Query for public schemas (user_id IS NULL) and optionally user-specific
207
+ if user_id:
208
+ query = """
209
+ SELECT spec FROM schemas
210
+ WHERE LOWER(name) = LOWER($1)
211
+ AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
212
+ LIMIT 1
213
+ """
214
+ row = await db.fetchrow(query, schema_name, user_id)
215
+ else:
216
+ # No user_id - only search public schemas
217
+ query = """
218
+ SELECT spec FROM schemas
219
+ WHERE LOWER(name) = LOWER($1)
220
+ AND (user_id = 'system' OR user_id IS NULL)
221
+ LIMIT 1
222
+ """
223
+ row = await db.fetchrow(query, schema_name)
224
+ logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
207
225
 
208
226
  if row:
209
227
  spec = row.get("spec")
@@ -365,13 +383,14 @@ def load_agent_schema(
365
383
  logger.debug(f"Could not load from {search_path}: {e}")
366
384
  continue
367
385
 
368
- # 5. Try database LOOKUP fallback (if enabled and user_id provided)
369
- if enable_db_fallback and user_id:
386
+ # 5. Try database LOOKUP fallback (if enabled)
387
+ # Always search for public schemas (user_id IS NULL), plus user-specific if user_id provided
388
+ if enable_db_fallback:
370
389
  try:
371
- logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
390
+ logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id or 'public'})")
372
391
  db_schema = _load_schema_from_database(base_name, user_id)
373
392
  if db_schema:
374
- logger.info(f"✅ Loaded schema from database: {base_name} (user_id={user_id})")
393
+ logger.info(f"✅ Loaded schema from database: {base_name}")
375
394
  return db_schema
376
395
  except Exception as e:
377
396
  logger.debug(f"Database schema lookup failed: {e}")
@@ -387,9 +406,9 @@ def load_agent_schema(
387
406
  db_search_note = ""
388
407
  if enable_db_fallback:
389
408
  if user_id:
390
- db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id='{user_id}' (no match)"
409
+ db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('{user_id}', 'system', NULL) (no match)"
391
410
  else:
392
- db_search_note = "\n - Database: (skipped - no user_id provided)"
411
+ db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('system', NULL) (no match)"
393
412
 
394
413
  raise FileNotFoundError(
395
414
  f"Schema not found: {schema_name_or_path}\n"
@@ -484,19 +503,19 @@ async def load_agent_schema_async(
484
503
  except Exception:
485
504
  continue
486
505
 
487
- # Try database lookup
488
- if user_id:
489
- from rem.services.postgres import get_postgres_service
490
-
491
- should_disconnect = False
492
- if db is None:
493
- db = get_postgres_service()
494
- if db:
495
- await db.connect()
496
- should_disconnect = True
506
+ # Try database lookup - always search public schemas, plus user-specific if user_id provided
507
+ from rem.services.postgres import get_postgres_service
497
508
 
509
+ should_disconnect = False
510
+ if db is None:
511
+ db = get_postgres_service()
498
512
  if db:
499
- try:
513
+ await db.connect()
514
+ should_disconnect = True
515
+
516
+ if db:
517
+ try:
518
+ if user_id:
500
519
  query = """
501
520
  SELECT spec FROM schemas
502
521
  WHERE LOWER(name) = LOWER($1)
@@ -504,14 +523,23 @@ async def load_agent_schema_async(
504
523
  LIMIT 1
505
524
  """
506
525
  row = await db.fetchrow(query, base_name, user_id)
507
- if row:
508
- spec = row.get("spec")
509
- if spec and isinstance(spec, dict):
510
- logger.info(f"✅ Loaded schema from database: {base_name}")
511
- return spec
512
- finally:
513
- if should_disconnect:
514
- await db.disconnect()
526
+ else:
527
+ # No user_id - only search public schemas
528
+ query = """
529
+ SELECT spec FROM schemas
530
+ WHERE LOWER(name) = LOWER($1)
531
+ AND (user_id = 'system' OR user_id IS NULL)
532
+ LIMIT 1
533
+ """
534
+ row = await db.fetchrow(query, base_name)
535
+ if row:
536
+ spec = row.get("spec")
537
+ if spec and isinstance(spec, dict):
538
+ logger.info(f"✅ Loaded schema from database: {base_name}")
539
+ return spec
540
+ finally:
541
+ if should_disconnect:
542
+ await db.disconnect()
515
543
 
516
544
  # Not found
517
545
  raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: remdb
3
- Version: 0.3.200
3
+ Version: 0.3.226
4
4
  Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
5
  Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
6
6
  Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md