remdb 0.3.200__py3-none-any.whl → 0.3.226__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/README.md +262 -2
- rem/agentic/context.py +73 -1
- rem/agentic/mcp/tool_wrapper.py +2 -2
- rem/agentic/providers/pydantic_ai.py +1 -1
- rem/agentic/schema.py +2 -2
- rem/api/mcp_router/tools.py +154 -18
- rem/api/routers/admin.py +30 -4
- rem/api/routers/auth.py +106 -10
- rem/api/routers/chat/completions.py +24 -29
- rem/api/routers/chat/sse_events.py +5 -1
- rem/api/routers/chat/streaming.py +163 -2
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +7 -1
- rem/api/routers/feedback.py +9 -1
- rem/api/routers/messages.py +80 -15
- rem/api/routers/models.py +9 -1
- rem/api/routers/query.py +12 -1
- rem/api/routers/shared_sessions.py +16 -0
- rem/auth/jwt.py +19 -4
- rem/cli/commands/ask.py +61 -81
- rem/cli/commands/process.py +3 -3
- rem/models/entities/ontology.py +18 -20
- rem/schemas/agents/rem.yaml +1 -1
- rem/services/postgres/repository.py +14 -4
- rem/services/session/__init__.py +2 -1
- rem/services/session/compression.py +40 -2
- rem/services/session/pydantic_messages.py +66 -0
- rem/settings.py +28 -0
- rem/sql/migrations/001_install.sql +13 -3
- rem/sql/migrations/002_install_models.sql +20 -22
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/schema_loader.py +73 -45
- {remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/METADATA +1 -1
- {remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/RECORD +36 -34
- {remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/WHEEL +0 -0
- {remdb-0.3.200.dist-info → remdb-0.3.226.dist-info}/entry_points.txt +0 -0
rem/services/session/__init__.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
"""Session management services for conversation persistence and compression."""
|
|
2
2
|
|
|
3
3
|
from .compression import MessageCompressor, SessionMessageStore
|
|
4
|
-
from .pydantic_messages import session_to_pydantic_messages
|
|
4
|
+
from .pydantic_messages import audit_session_history, session_to_pydantic_messages
|
|
5
5
|
from .reload import reload_session
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"MessageCompressor",
|
|
9
9
|
"SessionMessageStore",
|
|
10
|
+
"audit_session_history",
|
|
10
11
|
"reload_session",
|
|
11
12
|
"session_to_pydantic_messages",
|
|
12
13
|
]
|
|
@@ -65,7 +65,7 @@ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
|
|
|
65
65
|
logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
|
|
66
66
|
return truncated
|
|
67
67
|
|
|
68
|
-
from rem.models.entities import Message
|
|
68
|
+
from rem.models.entities import Message, Session
|
|
69
69
|
from rem.services.postgres import PostgresService, Repository
|
|
70
70
|
from rem.settings import settings
|
|
71
71
|
|
|
@@ -177,6 +177,39 @@ class SessionMessageStore:
|
|
|
177
177
|
self.user_id = user_id
|
|
178
178
|
self.compressor = compressor or MessageCompressor()
|
|
179
179
|
self.repo = Repository(Message)
|
|
180
|
+
self._session_repo = Repository(Session, table_name="sessions")
|
|
181
|
+
|
|
182
|
+
async def _ensure_session_exists(
|
|
183
|
+
self,
|
|
184
|
+
session_id: str,
|
|
185
|
+
user_id: str | None = None,
|
|
186
|
+
) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Ensure session exists, creating it if necessary.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
session_id: Session UUID from X-Session-Id header
|
|
192
|
+
user_id: Optional user identifier
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
# Check if session already exists by UUID
|
|
196
|
+
existing = await self._session_repo.get_by_id(session_id)
|
|
197
|
+
if existing:
|
|
198
|
+
return # Session already exists
|
|
199
|
+
|
|
200
|
+
# Create new session with the provided UUID as id
|
|
201
|
+
session = Session(
|
|
202
|
+
id=session_id, # Use the provided UUID as session id
|
|
203
|
+
name=session_id, # Default name to UUID, can be updated later
|
|
204
|
+
user_id=user_id or self.user_id,
|
|
205
|
+
tenant_id=self.user_id, # tenant_id set to user_id for scoping
|
|
206
|
+
)
|
|
207
|
+
await self._session_repo.upsert(session)
|
|
208
|
+
logger.info(f"Created session {session_id} for user {user_id or self.user_id}")
|
|
209
|
+
|
|
210
|
+
except Exception as e:
|
|
211
|
+
# Log but don't fail - session creation is best-effort
|
|
212
|
+
logger.warning(f"Failed to ensure session exists: {e}")
|
|
180
213
|
|
|
181
214
|
async def store_message(
|
|
182
215
|
self,
|
|
@@ -283,8 +316,10 @@ class SessionMessageStore:
|
|
|
283
316
|
"""
|
|
284
317
|
Store all session messages and return compressed versions.
|
|
285
318
|
|
|
319
|
+
Ensures session exists before storing messages.
|
|
320
|
+
|
|
286
321
|
Args:
|
|
287
|
-
session_id: Session
|
|
322
|
+
session_id: Session UUID
|
|
288
323
|
messages: List of messages to store
|
|
289
324
|
user_id: Optional user identifier
|
|
290
325
|
compress: Whether to compress messages (default: True)
|
|
@@ -296,6 +331,9 @@ class SessionMessageStore:
|
|
|
296
331
|
logger.debug("Postgres disabled, returning messages uncompressed")
|
|
297
332
|
return messages
|
|
298
333
|
|
|
334
|
+
# Ensure session exists before storing messages
|
|
335
|
+
await self._ensure_session_exists(session_id, user_id)
|
|
336
|
+
|
|
299
337
|
compressed_messages = []
|
|
300
338
|
|
|
301
339
|
for idx, message in enumerate(messages):
|
|
@@ -208,3 +208,69 @@ def session_to_pydantic_messages(
|
|
|
208
208
|
|
|
209
209
|
logger.debug(f"Converted {len(session_history)} stored messages to {len(messages)} pydantic-ai messages")
|
|
210
210
|
return messages
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def audit_session_history(
|
|
214
|
+
session_id: str,
|
|
215
|
+
agent_name: str,
|
|
216
|
+
prompt: str,
|
|
217
|
+
raw_session_history: list[dict[str, Any]],
|
|
218
|
+
pydantic_messages_count: int,
|
|
219
|
+
) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Dump session history to a YAML file for debugging.
|
|
222
|
+
|
|
223
|
+
Only runs when DEBUG__AUDIT_SESSION=true. Writes to DEBUG__AUDIT_DIR (default /tmp).
|
|
224
|
+
Appends to the same file for a session, so all agent invocations are in one place.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
session_id: The session identifier
|
|
228
|
+
agent_name: Name of the agent being invoked
|
|
229
|
+
prompt: The prompt being sent to the agent
|
|
230
|
+
raw_session_history: The raw session messages from the database
|
|
231
|
+
pydantic_messages_count: Count of converted pydantic-ai messages
|
|
232
|
+
"""
|
|
233
|
+
from ...settings import settings
|
|
234
|
+
|
|
235
|
+
if not settings.debug.audit_session:
|
|
236
|
+
return
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
import yaml
|
|
240
|
+
from pathlib import Path
|
|
241
|
+
from ...utils.date_utils import utc_now, to_iso
|
|
242
|
+
|
|
243
|
+
audit_dir = Path(settings.debug.audit_dir)
|
|
244
|
+
audit_dir.mkdir(parents=True, exist_ok=True)
|
|
245
|
+
audit_file = audit_dir / f"{session_id}.yaml"
|
|
246
|
+
|
|
247
|
+
# Create entry for this agent invocation
|
|
248
|
+
entry = {
|
|
249
|
+
"timestamp": to_iso(utc_now()),
|
|
250
|
+
"agent_name": agent_name,
|
|
251
|
+
"prompt": prompt,
|
|
252
|
+
"raw_history_count": len(raw_session_history),
|
|
253
|
+
"pydantic_messages_count": pydantic_messages_count,
|
|
254
|
+
"raw_session_history": raw_session_history,
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
# Load existing data or create new
|
|
258
|
+
existing_data: dict[str, Any] = {"session_id": session_id, "invocations": []}
|
|
259
|
+
if audit_file.exists():
|
|
260
|
+
with open(audit_file) as f:
|
|
261
|
+
loaded = yaml.safe_load(f)
|
|
262
|
+
if loaded:
|
|
263
|
+
# Ensure session_id is always present (backfill if missing)
|
|
264
|
+
existing_data = {
|
|
265
|
+
"session_id": loaded.get("session_id", session_id),
|
|
266
|
+
"invocations": loaded.get("invocations", []),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
# Append this invocation
|
|
270
|
+
existing_data["invocations"].append(entry)
|
|
271
|
+
|
|
272
|
+
with open(audit_file, "w") as f:
|
|
273
|
+
yaml.dump(existing_data, f, default_flow_style=False, allow_unicode=True)
|
|
274
|
+
logger.info(f"DEBUG: Session audit updated: {audit_file}")
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger.warning(f"DEBUG: Failed to dump session audit: {e}")
|
rem/settings.py
CHANGED
|
@@ -1651,6 +1651,33 @@ class EmailSettings(BaseSettings):
|
|
|
1651
1651
|
return kwargs
|
|
1652
1652
|
|
|
1653
1653
|
|
|
1654
|
+
class DebugSettings(BaseSettings):
|
|
1655
|
+
"""
|
|
1656
|
+
Debug settings for development and troubleshooting.
|
|
1657
|
+
|
|
1658
|
+
Environment variables:
|
|
1659
|
+
DEBUG__AUDIT_SESSION - Dump session history to /tmp/{session_id}.yaml
|
|
1660
|
+
DEBUG__AUDIT_DIR - Directory for session audit files (default: /tmp)
|
|
1661
|
+
"""
|
|
1662
|
+
|
|
1663
|
+
model_config = SettingsConfigDict(
|
|
1664
|
+
env_prefix="DEBUG__",
|
|
1665
|
+
env_file=".env",
|
|
1666
|
+
env_file_encoding="utf-8",
|
|
1667
|
+
extra="ignore",
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
audit_session: bool = Field(
|
|
1671
|
+
default=False,
|
|
1672
|
+
description="When true, dump full session history to audit files for debugging",
|
|
1673
|
+
)
|
|
1674
|
+
|
|
1675
|
+
audit_dir: str = Field(
|
|
1676
|
+
default="/tmp",
|
|
1677
|
+
description="Directory for session audit files",
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
|
|
1654
1681
|
class TestSettings(BaseSettings):
|
|
1655
1682
|
"""
|
|
1656
1683
|
Test environment settings.
|
|
@@ -1767,6 +1794,7 @@ class Settings(BaseSettings):
|
|
|
1767
1794
|
schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
|
|
1768
1795
|
email: EmailSettings = Field(default_factory=EmailSettings)
|
|
1769
1796
|
test: TestSettings = Field(default_factory=TestSettings)
|
|
1797
|
+
debug: DebugSettings = Field(default_factory=DebugSettings)
|
|
1770
1798
|
|
|
1771
1799
|
|
|
1772
1800
|
# Auto-load .env file from current directory if it exists
|
|
@@ -822,6 +822,7 @@ COMMENT ON FUNCTION fn_get_shared_messages IS
|
|
|
822
822
|
-- Function to list sessions with user details (name, email) for admin views
|
|
823
823
|
|
|
824
824
|
-- List sessions with user info, CTE pagination
|
|
825
|
+
-- Note: messages.session_id stores the session UUID (sessions.id)
|
|
825
826
|
CREATE OR REPLACE FUNCTION fn_list_sessions_with_user(
|
|
826
827
|
p_user_id VARCHAR(256) DEFAULT NULL, -- Filter by user_id (NULL = all users, admin only)
|
|
827
828
|
p_user_name VARCHAR(256) DEFAULT NULL, -- Filter by user name (partial match, admin only)
|
|
@@ -847,7 +848,15 @@ RETURNS TABLE(
|
|
|
847
848
|
) AS $$
|
|
848
849
|
BEGIN
|
|
849
850
|
RETURN QUERY
|
|
850
|
-
WITH
|
|
851
|
+
WITH session_msg_counts AS (
|
|
852
|
+
-- Count messages per session (joining on session UUID)
|
|
853
|
+
SELECT
|
|
854
|
+
m.session_id,
|
|
855
|
+
COUNT(*)::INTEGER as actual_message_count
|
|
856
|
+
FROM messages m
|
|
857
|
+
GROUP BY m.session_id
|
|
858
|
+
),
|
|
859
|
+
filtered_sessions AS (
|
|
851
860
|
SELECT
|
|
852
861
|
s.id,
|
|
853
862
|
s.name,
|
|
@@ -856,13 +865,14 @@ BEGIN
|
|
|
856
865
|
s.user_id,
|
|
857
866
|
COALESCE(u.name, s.user_id)::VARCHAR(256) AS user_name,
|
|
858
867
|
u.email::VARCHAR(256) AS user_email,
|
|
859
|
-
|
|
868
|
+
COALESCE(mc.actual_message_count, 0) AS message_count,
|
|
860
869
|
s.total_tokens,
|
|
861
870
|
s.created_at,
|
|
862
871
|
s.updated_at,
|
|
863
872
|
s.metadata
|
|
864
873
|
FROM sessions s
|
|
865
874
|
LEFT JOIN users u ON u.id::text = s.user_id
|
|
875
|
+
LEFT JOIN session_msg_counts mc ON mc.session_id = s.id::text
|
|
866
876
|
WHERE s.deleted_at IS NULL
|
|
867
877
|
AND (p_user_id IS NULL OR s.user_id = p_user_id)
|
|
868
878
|
AND (p_user_name IS NULL OR u.name ILIKE '%' || p_user_name || '%')
|
|
@@ -895,7 +905,7 @@ END;
|
|
|
895
905
|
$$ LANGUAGE plpgsql STABLE;
|
|
896
906
|
|
|
897
907
|
COMMENT ON FUNCTION fn_list_sessions_with_user IS
|
|
898
|
-
'List sessions with user details
|
|
908
|
+
'List sessions with user details and computed message counts. Joins messages on session name.';
|
|
899
909
|
|
|
900
910
|
-- ============================================================================
|
|
901
911
|
-- RECORD INSTALLATION
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
-- REM Model Schema (install_models.sql)
|
|
2
2
|
-- Generated from Pydantic models
|
|
3
3
|
-- Source: model registry
|
|
4
|
-
-- Generated at: 2025-12-
|
|
4
|
+
-- Generated at: 2025-12-22T17:34:54.187339
|
|
5
5
|
--
|
|
6
6
|
-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate
|
|
7
7
|
--
|
|
@@ -2088,32 +2088,30 @@ Domain-specific knowledge - either agent-extracted or direct-loaded.
|
|
|
2088
2088
|
tags=["cv", "engineering"]
|
|
2089
2089
|
)
|
|
2090
2090
|
|
|
2091
|
-
# Direct-loaded:
|
|
2092
|
-
|
|
2093
|
-
name="
|
|
2094
|
-
uri="git://
|
|
2095
|
-
content="#
|
|
2091
|
+
# Direct-loaded: Knowledge base from git
|
|
2092
|
+
api_docs = Ontology(
|
|
2093
|
+
name="rest-api-guide",
|
|
2094
|
+
uri="git://example-org/docs/api/rest-api-guide.md",
|
|
2095
|
+
content="# REST API Guide\n\nThis guide covers RESTful API design...",
|
|
2096
2096
|
extracted_data={
|
|
2097
|
-
"type": "
|
|
2098
|
-
"category": "
|
|
2099
|
-
"
|
|
2100
|
-
"dsm5_criteria": ["A", "B", "C", "D"],
|
|
2097
|
+
"type": "documentation",
|
|
2098
|
+
"category": "api",
|
|
2099
|
+
"version": "2.0",
|
|
2101
2100
|
},
|
|
2102
|
-
tags=["
|
|
2101
|
+
tags=["api", "rest", "documentation"]
|
|
2103
2102
|
)
|
|
2104
2103
|
|
|
2105
|
-
# Direct-loaded:
|
|
2106
|
-
|
|
2107
|
-
name="
|
|
2108
|
-
uri="git://
|
|
2109
|
-
content="#
|
|
2104
|
+
# Direct-loaded: Technical spec from git
|
|
2105
|
+
config_spec = Ontology(
|
|
2106
|
+
name="config-schema",
|
|
2107
|
+
uri="git://example-org/docs/specs/config-schema.md",
|
|
2108
|
+
content="# Configuration Schema\n\nThis document defines...",
|
|
2110
2109
|
extracted_data={
|
|
2111
|
-
"type": "
|
|
2112
|
-
"
|
|
2113
|
-
"
|
|
2114
|
-
"dsm5_criterion": "Panic Attack Specifier",
|
|
2110
|
+
"type": "specification",
|
|
2111
|
+
"format": "yaml",
|
|
2112
|
+
"version": "1.0",
|
|
2115
2113
|
},
|
|
2116
|
-
tags=["
|
|
2114
|
+
tags=["config", "schema", "specification"]
|
|
2117
2115
|
)
|
|
2118
2116
|
|
|
2119
2117
|
|
|
@@ -2227,7 +2225,7 @@ This schema includes the `search_rem` tool which supports:
|
|
|
2227
2225
|
- **Optional**
|
|
2228
2226
|
|
|
2229
2227
|
',
|
|
2230
|
-
'{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded:
|
|
2228
|
+
'{"type": "object", "description": "Domain-specific knowledge - either agent-extracted or direct-loaded.\n\n Attributes:\n name: Human-readable label for this ontology instance\n uri: External source reference (git://, s3://, https://) for direct-loaded ontologies\n file_id: Foreign key to File entity (optional - only for agent-extracted)\n agent_schema_id: Schema that performed extraction (optional - only for agent-extracted)\n provider_name: LLM provider used for extraction (optional)\n model_name: Specific model used (optional)\n extracted_data: Structured data - either extracted by agent or parsed from source\n confidence_score: Optional confidence score from extraction (0.0-1.0)\n extraction_timestamp: When extraction was performed\n content: Text used for generating embedding\n\n Inherited from CoreModel:\n id: UUID or string identifier\n created_at: Entity creation timestamp\n updated_at: Last update timestamp\n deleted_at: Soft deletion timestamp\n tenant_id: Multi-tenancy isolation\n user_id: Ownership\n graph_edges: Relationships to other entities\n metadata: Flexible metadata storage\n tags: Classification tags\n\n Example Usage:\n # Agent-extracted: CV parsing\n cv_ontology = Ontology(\n name=\"john-doe-cv-2024\",\n file_id=\"file-uuid-123\",\n agent_schema_id=\"cv-parser-v1\",\n provider_name=\"anthropic\",\n model_name=\"claude-sonnet-4-5-20250929\",\n extracted_data={\n \"candidate_name\": \"John Doe\",\n \"skills\": [\"Python\", \"PostgreSQL\", \"Kubernetes\"],\n },\n confidence_score=0.95,\n tags=[\"cv\", \"engineering\"]\n )\n\n # Direct-loaded: Knowledge base from git\n api_docs = Ontology(\n name=\"rest-api-guide\",\n uri=\"git://example-org/docs/api/rest-api-guide.md\",\n content=\"# REST API Guide\\n\\nThis guide covers RESTful API design...\",\n extracted_data={\n \"type\": \"documentation\",\n \"category\": \"api\",\n \"version\": \"2.0\",\n },\n tags=[\"api\", \"rest\", \"documentation\"]\n )\n\n # Direct-loaded: Technical spec from git\n config_spec = Ontology(\n name=\"config-schema\",\n uri=\"git://example-org/docs/specs/config-schema.md\",\n content=\"# Configuration Schema\\n\\nThis document defines...\",\n extracted_data={\n \"type\": \"specification\",\n \"format\": \"yaml\",\n \"version\": \"1.0\",\n },\n tags=[\"config\", \"schema\", \"specification\"]\n )\n \n\nThis agent can search the `ontologies` table using the `search_rem` tool. Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, SEARCH for semantic similarity, or SQL for complex queries.", "properties": {"id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "description": "Unique identifier (UUID or string, generated per model type). Generated automatically if not provided.", "title": "Id"}, "created_at": {"description": "Entity creation timestamp", "format": "date-time", "title": "Created At", "type": "string"}, "updated_at": {"description": "Last update timestamp", "format": "date-time", "title": "Updated At", "type": "string"}, "deleted_at": {"anyOf": [{"format": "date-time", "type": "string"}, {"type": "null"}], "default": null, "description": "Soft deletion timestamp", "title": "Deleted At"}, "tenant_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Tenant identifier for multi-tenancy isolation", "title": "Tenant Id"}, "user_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Owner user identifier (tenant-scoped). This is a VARCHAR(256), not a UUID, to allow flexibility for external identity providers. Typically generated as a hash of the user''s email address. In future, other strong unique claims (e.g., OAuth sub, verified phone) could also be used for generation.", "title": "User Id"}, "graph_edges": {"description": "Knowledge graph edges stored as InlineEdge dicts", "items": {"additionalProperties": true, "type": "object"}, "title": "Graph Edges", "type": "array"}, "metadata": {"additionalProperties": true, "description": "Flexible metadata storage", "title": "Metadata", "type": "object"}, "tags": {"description": "Entity tags", "items": {"type": "string"}, "title": "Tags", "type": "array"}, "name": {"title": "Name", "type": "string"}, "uri": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Uri"}, "file_id": {"anyOf": [{"format": "uuid", "type": "string"}, {"type": "string"}, {"type": "null"}], "default": null, "title": "File Id"}, "agent_schema_id": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Agent Schema Id"}, "provider_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Provider Name"}, "model_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Name"}, "extracted_data": {"anyOf": [{"additionalProperties": true, "type": "object"}, {"type": "null"}], "default": null, "title": "Extracted Data"}, "confidence_score": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Confidence Score"}, "extraction_timestamp": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Extraction Timestamp"}, "content": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Content"}}, "required": ["name"], "json_schema_extra": {"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fully_qualified_name": "rem.models.entities.ontology.Ontology", "tools": ["search_rem"], "default_search_table": "ontologies", "has_embeddings": true}}'::jsonb,
|
|
2231
2229
|
'entity',
|
|
2232
2230
|
'{"table_name": "ontologies", "entity_key_field": "name", "embedding_fields": ["content"], "fqn": "rem.models.entities.ontology.Ontology"}'::jsonb
|
|
2233
2231
|
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
-- Migration: Update messages.session_id from session name to session UUID
|
|
2
|
+
-- This fixes the bug where messages were stored with session.name instead of session.id
|
|
3
|
+
--
|
|
4
|
+
-- Run this migration AFTER deploying the code fixes in remdb 0.3.204+
|
|
5
|
+
-- The code now correctly stores session.id (UUID), but existing data needs migration.
|
|
6
|
+
|
|
7
|
+
BEGIN;
|
|
8
|
+
|
|
9
|
+
-- First, count how many messages need to be updated
|
|
10
|
+
DO $$
|
|
11
|
+
DECLARE
|
|
12
|
+
count_to_migrate INTEGER;
|
|
13
|
+
BEGIN
|
|
14
|
+
SELECT COUNT(*) INTO count_to_migrate
|
|
15
|
+
FROM messages m
|
|
16
|
+
JOIN sessions s ON m.session_id = s.name
|
|
17
|
+
WHERE m.session_id != s.id::text;
|
|
18
|
+
|
|
19
|
+
RAISE NOTICE 'Messages needing migration: %', count_to_migrate;
|
|
20
|
+
END $$;
|
|
21
|
+
|
|
22
|
+
-- Update messages.session_id from session name to session UUID
|
|
23
|
+
UPDATE messages m
|
|
24
|
+
SET session_id = s.id::text
|
|
25
|
+
FROM sessions s
|
|
26
|
+
WHERE m.session_id = s.name
|
|
27
|
+
AND m.session_id != s.id::text;
|
|
28
|
+
|
|
29
|
+
-- Report how many were updated
|
|
30
|
+
DO $$
|
|
31
|
+
DECLARE
|
|
32
|
+
updated_count INTEGER;
|
|
33
|
+
BEGIN
|
|
34
|
+
GET DIAGNOSTICS updated_count = ROW_COUNT;
|
|
35
|
+
RAISE NOTICE 'Messages updated: %', updated_count;
|
|
36
|
+
END $$;
|
|
37
|
+
|
|
38
|
+
COMMIT;
|
|
39
|
+
|
|
40
|
+
-- Verify the fix - all messages should now join by UUID
|
|
41
|
+
SELECT
|
|
42
|
+
'Messages matching sessions by UUID' as status,
|
|
43
|
+
COUNT(*) as count
|
|
44
|
+
FROM messages m
|
|
45
|
+
JOIN sessions s ON m.session_id = s.id::text;
|
rem/utils/schema_loader.py
CHANGED
|
@@ -147,15 +147,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
147
147
|
try:
|
|
148
148
|
await db.connect()
|
|
149
149
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
150
|
+
# Query for public schemas (user_id IS NULL) and optionally user-specific
|
|
151
|
+
if user_id:
|
|
152
|
+
query = """
|
|
153
|
+
SELECT spec FROM schemas
|
|
154
|
+
WHERE LOWER(name) = LOWER($1)
|
|
155
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
156
|
+
LIMIT 1
|
|
157
|
+
"""
|
|
158
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
159
|
+
else:
|
|
160
|
+
# No user_id - only search public schemas
|
|
161
|
+
query = """
|
|
162
|
+
SELECT spec FROM schemas
|
|
163
|
+
WHERE LOWER(name) = LOWER($1)
|
|
164
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
165
|
+
LIMIT 1
|
|
166
|
+
"""
|
|
167
|
+
row = await db.fetchrow(query, schema_name)
|
|
168
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
|
|
159
169
|
|
|
160
170
|
if row:
|
|
161
171
|
spec = row.get("spec")
|
|
@@ -193,17 +203,25 @@ def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any]
|
|
|
193
203
|
try:
|
|
194
204
|
await db.connect()
|
|
195
205
|
|
|
196
|
-
# Query schemas
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
206
|
+
# Query for public schemas (user_id IS NULL) and optionally user-specific
|
|
207
|
+
if user_id:
|
|
208
|
+
query = """
|
|
209
|
+
SELECT spec FROM schemas
|
|
210
|
+
WHERE LOWER(name) = LOWER($1)
|
|
211
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
212
|
+
LIMIT 1
|
|
213
|
+
"""
|
|
214
|
+
row = await db.fetchrow(query, schema_name, user_id)
|
|
215
|
+
else:
|
|
216
|
+
# No user_id - only search public schemas
|
|
217
|
+
query = """
|
|
218
|
+
SELECT spec FROM schemas
|
|
219
|
+
WHERE LOWER(name) = LOWER($1)
|
|
220
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
221
|
+
LIMIT 1
|
|
222
|
+
"""
|
|
223
|
+
row = await db.fetchrow(query, schema_name)
|
|
224
|
+
logger.debug(f"Executing schema lookup: name={schema_name}, user_id={user_id or 'public'}")
|
|
207
225
|
|
|
208
226
|
if row:
|
|
209
227
|
spec = row.get("spec")
|
|
@@ -365,13 +383,14 @@ def load_agent_schema(
|
|
|
365
383
|
logger.debug(f"Could not load from {search_path}: {e}")
|
|
366
384
|
continue
|
|
367
385
|
|
|
368
|
-
# 5. Try database LOOKUP fallback (if enabled
|
|
369
|
-
|
|
386
|
+
# 5. Try database LOOKUP fallback (if enabled)
|
|
387
|
+
# Always search for public schemas (user_id IS NULL), plus user-specific if user_id provided
|
|
388
|
+
if enable_db_fallback:
|
|
370
389
|
try:
|
|
371
|
-
logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id})")
|
|
390
|
+
logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id or 'public'})")
|
|
372
391
|
db_schema = _load_schema_from_database(base_name, user_id)
|
|
373
392
|
if db_schema:
|
|
374
|
-
logger.info(f"✅ Loaded schema from database: {base_name}
|
|
393
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
375
394
|
return db_schema
|
|
376
395
|
except Exception as e:
|
|
377
396
|
logger.debug(f"Database schema lookup failed: {e}")
|
|
@@ -387,9 +406,9 @@ def load_agent_schema(
|
|
|
387
406
|
db_search_note = ""
|
|
388
407
|
if enable_db_fallback:
|
|
389
408
|
if user_id:
|
|
390
|
-
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id
|
|
409
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('{user_id}', 'system', NULL) (no match)"
|
|
391
410
|
else:
|
|
392
|
-
db_search_note = "\n - Database:
|
|
411
|
+
db_search_note = f"\n - Database: LOOKUP '{base_name}' FROM schemas WHERE user_id IN ('system', NULL) (no match)"
|
|
393
412
|
|
|
394
413
|
raise FileNotFoundError(
|
|
395
414
|
f"Schema not found: {schema_name_or_path}\n"
|
|
@@ -484,19 +503,19 @@ async def load_agent_schema_async(
|
|
|
484
503
|
except Exception:
|
|
485
504
|
continue
|
|
486
505
|
|
|
487
|
-
# Try database lookup
|
|
488
|
-
|
|
489
|
-
from rem.services.postgres import get_postgres_service
|
|
490
|
-
|
|
491
|
-
should_disconnect = False
|
|
492
|
-
if db is None:
|
|
493
|
-
db = get_postgres_service()
|
|
494
|
-
if db:
|
|
495
|
-
await db.connect()
|
|
496
|
-
should_disconnect = True
|
|
506
|
+
# Try database lookup - always search public schemas, plus user-specific if user_id provided
|
|
507
|
+
from rem.services.postgres import get_postgres_service
|
|
497
508
|
|
|
509
|
+
should_disconnect = False
|
|
510
|
+
if db is None:
|
|
511
|
+
db = get_postgres_service()
|
|
498
512
|
if db:
|
|
499
|
-
|
|
513
|
+
await db.connect()
|
|
514
|
+
should_disconnect = True
|
|
515
|
+
|
|
516
|
+
if db:
|
|
517
|
+
try:
|
|
518
|
+
if user_id:
|
|
500
519
|
query = """
|
|
501
520
|
SELECT spec FROM schemas
|
|
502
521
|
WHERE LOWER(name) = LOWER($1)
|
|
@@ -504,14 +523,23 @@ async def load_agent_schema_async(
|
|
|
504
523
|
LIMIT 1
|
|
505
524
|
"""
|
|
506
525
|
row = await db.fetchrow(query, base_name, user_id)
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
526
|
+
else:
|
|
527
|
+
# No user_id - only search public schemas
|
|
528
|
+
query = """
|
|
529
|
+
SELECT spec FROM schemas
|
|
530
|
+
WHERE LOWER(name) = LOWER($1)
|
|
531
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
532
|
+
LIMIT 1
|
|
533
|
+
"""
|
|
534
|
+
row = await db.fetchrow(query, base_name)
|
|
535
|
+
if row:
|
|
536
|
+
spec = row.get("spec")
|
|
537
|
+
if spec and isinstance(spec, dict):
|
|
538
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
539
|
+
return spec
|
|
540
|
+
finally:
|
|
541
|
+
if should_disconnect:
|
|
542
|
+
await db.disconnect()
|
|
515
543
|
|
|
516
544
|
# Not found
|
|
517
545
|
raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: remdb
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.226
|
|
4
4
|
Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
|
|
5
5
|
Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
|
|
6
6
|
Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
|