remdb 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/__init__.py +10 -1
- rem/agentic/context.py +13 -2
- rem/agentic/context_builder.py +45 -34
- rem/agentic/providers/pydantic_ai.py +302 -110
- rem/api/mcp_router/resources.py +223 -0
- rem/api/mcp_router/tools.py +76 -10
- rem/api/routers/auth.py +113 -10
- rem/api/routers/chat/child_streaming.py +22 -8
- rem/api/routers/chat/completions.py +3 -3
- rem/api/routers/chat/sse_events.py +3 -3
- rem/api/routers/chat/streaming.py +40 -45
- rem/api/routers/chat/streaming_utils.py +5 -7
- rem/api/routers/feedback.py +2 -2
- rem/api/routers/query.py +5 -14
- rem/cli/commands/ask.py +144 -33
- rem/cli/commands/experiments.py +1 -1
- rem/cli/commands/process.py +9 -1
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/session.py +117 -0
- rem/cli/main.py +2 -0
- rem/models/core/experiment.py +1 -1
- rem/models/entities/session.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +1 -1
- rem/schemas/agents/test_orchestrator.yaml +42 -0
- rem/schemas/agents/test_structured_output.yaml +52 -0
- rem/services/content/providers.py +151 -49
- rem/services/postgres/repository.py +1 -0
- rem/services/rem/README.md +4 -3
- rem/services/rem/parser.py +7 -10
- rem/services/rem/service.py +47 -0
- rem/services/session/compression.py +7 -3
- rem/services/session/pydantic_messages.py +25 -7
- rem/services/session/reload.py +2 -1
- rem/settings.py +64 -7
- rem/sql/migrations/004_cache_system.sql +3 -1
- rem/utils/schema_loader.py +135 -103
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/METADATA +6 -5
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/RECORD +40 -37
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
- {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
|
@@ -96,7 +96,7 @@ class MessageCompressor:
|
|
|
96
96
|
Returns:
|
|
97
97
|
Compressed message dict
|
|
98
98
|
"""
|
|
99
|
-
content = message.get("content"
|
|
99
|
+
content = message.get("content") or ""
|
|
100
100
|
|
|
101
101
|
# Don't compress short messages or system messages
|
|
102
102
|
if (
|
|
@@ -242,7 +242,7 @@ class SessionMessageStore:
|
|
|
242
242
|
# Use pre-generated id from message dict if available (for frontend feedback)
|
|
243
243
|
msg = Message(
|
|
244
244
|
id=message.get("id"), # Use pre-generated ID if provided
|
|
245
|
-
content=message.get("content"
|
|
245
|
+
content=message.get("content") or "",
|
|
246
246
|
message_type=message.get("role", "assistant"),
|
|
247
247
|
session_id=session_id,
|
|
248
248
|
tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
|
|
@@ -337,7 +337,7 @@ class SessionMessageStore:
|
|
|
337
337
|
compressed_messages = []
|
|
338
338
|
|
|
339
339
|
for idx, message in enumerate(messages):
|
|
340
|
-
content = message.get("content"
|
|
340
|
+
content = message.get("content") or ""
|
|
341
341
|
|
|
342
342
|
# Only store and compress long assistant responses
|
|
343
343
|
if (
|
|
@@ -368,6 +368,8 @@ class SessionMessageStore:
|
|
|
368
368
|
}
|
|
369
369
|
|
|
370
370
|
# For tool messages, include tool call details in metadata
|
|
371
|
+
# Note: tool_arguments is stored only when provided (parent tool calls)
|
|
372
|
+
# For child tool calls (e.g., register_metadata), args are in content as JSON
|
|
371
373
|
if message.get("role") == "tool":
|
|
372
374
|
if message.get("tool_call_id"):
|
|
373
375
|
msg_metadata["tool_call_id"] = message.get("tool_call_id")
|
|
@@ -436,6 +438,8 @@ class SessionMessageStore:
|
|
|
436
438
|
}
|
|
437
439
|
|
|
438
440
|
# For tool messages, reconstruct tool call metadata
|
|
441
|
+
# Note: tool_arguments may be in metadata (parent calls) or parsed from
|
|
442
|
+
# content (child calls like register_metadata) by pydantic_messages.py
|
|
439
443
|
if role == "tool" and msg.metadata:
|
|
440
444
|
if msg.metadata.get("tool_call_id"):
|
|
441
445
|
msg_dict["tool_call_id"] = msg.metadata["tool_call_id"]
|
|
@@ -5,12 +5,16 @@ storage format into pydantic-ai's native ModelRequest/ModelResponse types.
|
|
|
5
5
|
|
|
6
6
|
Key insight: When we store tool results, we only store the result (ToolReturnPart).
|
|
7
7
|
But LLM APIs require matching ToolCallPart for each ToolReturnPart. So we synthesize
|
|
8
|
-
the ToolCallPart from stored metadata (tool_name, tool_call_id
|
|
8
|
+
the ToolCallPart from stored metadata (tool_name, tool_call_id) and arguments.
|
|
9
|
+
|
|
10
|
+
Tool arguments can come from two places:
|
|
11
|
+
- Parent tool calls (ask_agent): tool_arguments stored in metadata (content = result)
|
|
12
|
+
- Child tool calls (register_metadata): arguments parsed from content (content = args as JSON)
|
|
9
13
|
|
|
10
14
|
Storage format (our simplified format):
|
|
11
15
|
{"role": "user", "content": "..."}
|
|
12
16
|
{"role": "assistant", "content": "..."}
|
|
13
|
-
{"role": "tool", "content": "{...}", "tool_name": "...", "tool_call_id": "...", "tool_arguments": {...}}
|
|
17
|
+
{"role": "tool", "content": "{...}", "tool_name": "...", "tool_call_id": "...", "tool_arguments": {...}} # optional
|
|
14
18
|
|
|
15
19
|
Pydantic-ai format (what the LLM expects):
|
|
16
20
|
ModelRequest(parts=[UserPromptPart(content="...")])
|
|
@@ -102,7 +106,7 @@ def session_to_pydantic_messages(
|
|
|
102
106
|
while i < len(session_history):
|
|
103
107
|
msg = session_history[i]
|
|
104
108
|
role = msg.get("role", "")
|
|
105
|
-
content = msg.get("content"
|
|
109
|
+
content = msg.get("content") or ""
|
|
106
110
|
|
|
107
111
|
if role == "user":
|
|
108
112
|
# User messages become ModelRequest with UserPromptPart
|
|
@@ -120,8 +124,15 @@ def session_to_pydantic_messages(
|
|
|
120
124
|
tool_msg = session_history[j]
|
|
121
125
|
tool_name = tool_msg.get("tool_name", "unknown_tool")
|
|
122
126
|
tool_call_id = tool_msg.get("tool_call_id", f"call_{j}")
|
|
123
|
-
|
|
124
|
-
|
|
127
|
+
tool_content = tool_msg.get("content") or "{}"
|
|
128
|
+
|
|
129
|
+
# tool_arguments: prefer explicit field, fallback to parsing content
|
|
130
|
+
tool_arguments = tool_msg.get("tool_arguments")
|
|
131
|
+
if tool_arguments is None and isinstance(tool_content, str) and tool_content:
|
|
132
|
+
try:
|
|
133
|
+
tool_arguments = json.loads(tool_content)
|
|
134
|
+
except json.JSONDecodeError:
|
|
135
|
+
tool_arguments = {}
|
|
125
136
|
|
|
126
137
|
# Parse tool content if it's a JSON string
|
|
127
138
|
if isinstance(tool_content, str):
|
|
@@ -179,8 +190,15 @@ def session_to_pydantic_messages(
|
|
|
179
190
|
# Orphan tool message (no preceding assistant) - synthesize both parts
|
|
180
191
|
tool_name = msg.get("tool_name", "unknown_tool")
|
|
181
192
|
tool_call_id = msg.get("tool_call_id", f"call_{i}")
|
|
182
|
-
|
|
183
|
-
|
|
193
|
+
tool_content = msg.get("content") or "{}"
|
|
194
|
+
|
|
195
|
+
# tool_arguments: prefer explicit field, fallback to parsing content
|
|
196
|
+
tool_arguments = msg.get("tool_arguments")
|
|
197
|
+
if tool_arguments is None and isinstance(tool_content, str) and tool_content:
|
|
198
|
+
try:
|
|
199
|
+
tool_arguments = json.loads(tool_content)
|
|
200
|
+
except json.JSONDecodeError:
|
|
201
|
+
tool_arguments = {}
|
|
184
202
|
|
|
185
203
|
# Parse tool content
|
|
186
204
|
if isinstance(tool_content, str):
|
rem/services/session/reload.py
CHANGED
|
@@ -12,7 +12,8 @@ Design Pattern:
|
|
|
12
12
|
|
|
13
13
|
Message Types on Reload:
|
|
14
14
|
- user: Returned as-is
|
|
15
|
-
- tool: Returned
|
|
15
|
+
- tool: Returned with metadata (tool_call_id, tool_name). tool_arguments may be in
|
|
16
|
+
metadata (parent calls) or parsed from content (child calls) by pydantic_messages.py
|
|
16
17
|
- assistant: Compressed on load if long (>400 chars), with REM LOOKUP for recovery
|
|
17
18
|
"""
|
|
18
19
|
|
rem/settings.py
CHANGED
|
@@ -424,6 +424,49 @@ class AuthSettings(BaseSettings):
|
|
|
424
424
|
google: GoogleOAuthSettings = Field(default_factory=GoogleOAuthSettings)
|
|
425
425
|
microsoft: MicrosoftOAuthSettings = Field(default_factory=MicrosoftOAuthSettings)
|
|
426
426
|
|
|
427
|
+
# Pre-approved login codes (bypass email verification)
|
|
428
|
+
# Format: comma-separated codes with prefix A=admin, B=normal user
|
|
429
|
+
# Example: "A12345,A67890,B11111,B22222"
|
|
430
|
+
preapproved_codes: str = Field(
|
|
431
|
+
default="",
|
|
432
|
+
description=(
|
|
433
|
+
"Comma-separated list of pre-approved login codes. "
|
|
434
|
+
"Prefix A = admin user, B = normal user. "
|
|
435
|
+
"Example: 'A12345,A67890,B11111'. "
|
|
436
|
+
"Users can login with these codes without email verification."
|
|
437
|
+
),
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
def check_preapproved_code(self, code: str) -> dict | None:
|
|
441
|
+
"""
|
|
442
|
+
Check if a code is in the pre-approved list.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
code: The code to check (including prefix)
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
Dict with 'role' key if valid, None if not found.
|
|
449
|
+
- A prefix -> role='admin'
|
|
450
|
+
- B prefix -> role='user'
|
|
451
|
+
"""
|
|
452
|
+
if not self.preapproved_codes:
|
|
453
|
+
return None
|
|
454
|
+
|
|
455
|
+
codes = [c.strip().upper() for c in self.preapproved_codes.split(",") if c.strip()]
|
|
456
|
+
code_upper = code.strip().upper()
|
|
457
|
+
|
|
458
|
+
if code_upper not in codes:
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
# Parse prefix to determine role
|
|
462
|
+
if code_upper.startswith("A"):
|
|
463
|
+
return {"role": "admin", "code": code_upper}
|
|
464
|
+
elif code_upper.startswith("B"):
|
|
465
|
+
return {"role": "user", "code": code_upper}
|
|
466
|
+
else:
|
|
467
|
+
# Unknown prefix, treat as user
|
|
468
|
+
return {"role": "user", "code": code_upper}
|
|
469
|
+
|
|
427
470
|
@field_validator("session_secret", mode="before")
|
|
428
471
|
@classmethod
|
|
429
472
|
def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:
|
|
@@ -722,7 +765,7 @@ class DataLakeSettings(BaseSettings):
|
|
|
722
765
|
│ └── cpt/ # CPT codes
|
|
723
766
|
└── calibration/ # Agent calibration
|
|
724
767
|
├── experiments/ # Experiment configs + results
|
|
725
|
-
│ └── {agent}/{task}/ # e.g.,
|
|
768
|
+
│ └── {agent}/{task}/ # e.g., rem/risk-assessment
|
|
726
769
|
└── datasets/ # Shared evaluation datasets
|
|
727
770
|
|
|
728
771
|
Experiment Storage:
|
|
@@ -1598,7 +1641,7 @@ class EmailSettings(BaseSettings):
|
|
|
1598
1641
|
"Existing users can always login regardless of domain. "
|
|
1599
1642
|
"New users must have an email from a trusted domain. "
|
|
1600
1643
|
"Empty string means all domains are allowed. "
|
|
1601
|
-
"Example: '
|
|
1644
|
+
"Example: 'mycompany.com,example.com'"
|
|
1602
1645
|
),
|
|
1603
1646
|
)
|
|
1604
1647
|
|
|
@@ -1797,14 +1840,28 @@ class Settings(BaseSettings):
|
|
|
1797
1840
|
debug: DebugSettings = Field(default_factory=DebugSettings)
|
|
1798
1841
|
|
|
1799
1842
|
|
|
1800
|
-
# Auto-load .env file from current directory
|
|
1801
|
-
# This happens BEFORE config file loading, so .env takes precedence
|
|
1843
|
+
# Auto-load .env file from current directory or parent directories
|
|
1844
|
+
# This happens BEFORE config file loading, so .env takes precedence over shell env vars
|
|
1802
1845
|
from pathlib import Path
|
|
1803
1846
|
from dotenv import load_dotenv
|
|
1804
1847
|
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1848
|
+
|
|
1849
|
+
def _find_dotenv() -> Path | None:
|
|
1850
|
+
"""Search for .env in current dir and up to 3 parent directories."""
|
|
1851
|
+
current = Path.cwd()
|
|
1852
|
+
for _ in range(4): # Current + 3 parents
|
|
1853
|
+
env_path = current / ".env"
|
|
1854
|
+
if env_path.exists():
|
|
1855
|
+
return env_path
|
|
1856
|
+
if current.parent == current: # Reached root
|
|
1857
|
+
break
|
|
1858
|
+
current = current.parent
|
|
1859
|
+
return None
|
|
1860
|
+
|
|
1861
|
+
|
|
1862
|
+
_dotenv_path = _find_dotenv()
|
|
1863
|
+
if _dotenv_path:
|
|
1864
|
+
load_dotenv(_dotenv_path, override=True) # .env takes precedence over shell env vars
|
|
1808
1865
|
logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
|
|
1809
1866
|
|
|
1810
1867
|
# Load configuration from ~/.rem/config.yaml before initializing settings
|
|
@@ -64,9 +64,11 @@ CREATE OR REPLACE FUNCTION rem_kv_store_empty(p_user_id TEXT)
|
|
|
64
64
|
RETURNS BOOLEAN AS $$
|
|
65
65
|
BEGIN
|
|
66
66
|
-- Quick existence check - very fast with index
|
|
67
|
+
-- Check for user-specific OR public (NULL user_id) entries
|
|
68
|
+
-- This ensures self-healing triggers correctly for public ontologies
|
|
67
69
|
RETURN NOT EXISTS (
|
|
68
70
|
SELECT 1 FROM kv_store
|
|
69
|
-
WHERE user_id = p_user_id
|
|
71
|
+
WHERE user_id = p_user_id OR user_id IS NULL
|
|
70
72
|
LIMIT 1
|
|
71
73
|
);
|
|
72
74
|
END;
|
rem/utils/schema_loader.py
CHANGED
|
@@ -84,6 +84,7 @@ Schema Caching Status:
|
|
|
84
84
|
"""
|
|
85
85
|
|
|
86
86
|
import importlib.resources
|
|
87
|
+
import time
|
|
87
88
|
from pathlib import Path
|
|
88
89
|
from typing import Any, cast
|
|
89
90
|
|
|
@@ -104,10 +105,32 @@ SCHEMA_SEARCH_PATHS = [
|
|
|
104
105
|
# In-memory cache for filesystem schemas (no TTL - immutable)
|
|
105
106
|
_fs_schema_cache: dict[str, dict[str, Any]] = {}
|
|
106
107
|
|
|
107
|
-
#
|
|
108
|
-
#
|
|
109
|
-
|
|
110
|
-
|
|
108
|
+
# Database schema cache (with TTL - mutable, supports hot-reload)
|
|
109
|
+
# Cache key: (schema_name, user_id or "public") → (schema_dict, timestamp)
|
|
110
|
+
_db_schema_cache: dict[tuple[str, str], tuple[dict[str, Any], float]] = {}
|
|
111
|
+
_db_schema_ttl: int = 300 # 5 minutes in seconds
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _get_cached_db_schema(schema_name: str, user_id: str | None) -> dict[str, Any] | None:
|
|
115
|
+
"""Get schema from DB cache if exists and not expired."""
|
|
116
|
+
cache_key = (schema_name.lower(), user_id or "public")
|
|
117
|
+
if cache_key in _db_schema_cache:
|
|
118
|
+
schema, timestamp = _db_schema_cache[cache_key]
|
|
119
|
+
if time.time() - timestamp < _db_schema_ttl:
|
|
120
|
+
logger.debug(f"Schema cache hit: {schema_name} (age: {time.time() - timestamp:.0f}s)")
|
|
121
|
+
return schema
|
|
122
|
+
else:
|
|
123
|
+
# Expired, remove from cache
|
|
124
|
+
del _db_schema_cache[cache_key]
|
|
125
|
+
logger.debug(f"Schema cache expired: {schema_name}")
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _cache_db_schema(schema_name: str, user_id: str | None, schema: dict[str, Any]) -> None:
|
|
130
|
+
"""Add schema to DB cache with current timestamp."""
|
|
131
|
+
cache_key = (schema_name.lower(), user_id or "public")
|
|
132
|
+
_db_schema_cache[cache_key] = (schema, time.time())
|
|
133
|
+
logger.debug(f"Schema cached: {schema_name} (TTL: {_db_schema_ttl}s)")
|
|
111
134
|
|
|
112
135
|
|
|
113
136
|
def _load_schema_from_database(schema_name: str, user_id: str) -> dict[str, Any] | None:
|
|
@@ -249,73 +272,65 @@ def load_agent_schema(
|
|
|
249
272
|
enable_db_fallback: bool = True,
|
|
250
273
|
) -> dict[str, Any]:
|
|
251
274
|
"""
|
|
252
|
-
Load agent schema
|
|
275
|
+
Load agent schema with database-first priority for hot-reloading support.
|
|
253
276
|
|
|
254
277
|
Schema names are case-invariant - "Rem", "rem", "REM" all resolve to the same schema.
|
|
255
278
|
|
|
256
|
-
|
|
257
|
-
|
|
279
|
+
**IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
|
|
280
|
+
of schema updates without redeploying the application. This allows operators to
|
|
281
|
+
update schemas via `rem process ingest` and have changes take effect immediately.
|
|
258
282
|
|
|
259
283
|
Handles path resolution automatically:
|
|
260
|
-
- "rem" → searches schemas/agents/rem.yaml
|
|
261
|
-
- "moment-builder" → searches schemas/agents/core/moment-builder.yaml
|
|
262
|
-
- "
|
|
263
|
-
- "
|
|
264
|
-
- "/absolute/path.yaml" → loads directly
|
|
265
|
-
- "relative/path.yaml" → loads relative to cwd
|
|
284
|
+
- "rem" → searches database, then schemas/agents/rem.yaml
|
|
285
|
+
- "moment-builder" → searches database, then schemas/agents/core/moment-builder.yaml
|
|
286
|
+
- "/absolute/path.yaml" → loads directly from filesystem (exact paths skip database)
|
|
287
|
+
- "relative/path.yaml" → loads relative to cwd (exact paths skip database)
|
|
266
288
|
|
|
267
289
|
Search Order:
|
|
268
|
-
1.
|
|
269
|
-
2.
|
|
270
|
-
3.
|
|
271
|
-
4.
|
|
272
|
-
5. Package resources: schemas/agents/
|
|
273
|
-
6. Package resources: schemas/agents/
|
|
274
|
-
7. Package resources: schemas/
|
|
275
|
-
8. Package resources: schemas/{name}.yaml
|
|
276
|
-
9.
|
|
290
|
+
1. Exact path if it exists (absolute or relative) - skips database
|
|
291
|
+
2. Database LOOKUP: schemas table (if enable_db_fallback=True) - PREFERRED for hot-reload
|
|
292
|
+
3. Check cache (if use_cache=True and schema found in FS cache)
|
|
293
|
+
4. Custom paths from rem.register_schema_path() and SCHEMA__PATHS env var
|
|
294
|
+
5. Package resources: schemas/agents/{name}.yaml (top-level)
|
|
295
|
+
6. Package resources: schemas/agents/core/{name}.yaml
|
|
296
|
+
7. Package resources: schemas/agents/examples/{name}.yaml
|
|
297
|
+
8. Package resources: schemas/evaluators/{name}.yaml
|
|
298
|
+
9. Package resources: schemas/{name}.yaml
|
|
277
299
|
|
|
278
300
|
Args:
|
|
279
301
|
schema_name_or_path: Schema name or file path (case-invariant for names)
|
|
280
302
|
Examples: "rem-query-agent", "Contract-Analyzer", "./my-schema.yaml"
|
|
281
303
|
use_cache: If True, uses in-memory cache for filesystem schemas
|
|
282
|
-
user_id: User ID for database schema lookup
|
|
283
|
-
enable_db_fallback: If True,
|
|
304
|
+
user_id: User ID for database schema lookup
|
|
305
|
+
enable_db_fallback: If True, checks database FIRST for schema (default: True)
|
|
284
306
|
|
|
285
307
|
Returns:
|
|
286
308
|
Agent schema as dictionary
|
|
287
309
|
|
|
288
310
|
Raises:
|
|
289
|
-
FileNotFoundError: If schema not found in any search location (
|
|
311
|
+
FileNotFoundError: If schema not found in any search location (database + filesystem)
|
|
290
312
|
yaml.YAMLError: If schema file is invalid YAML
|
|
291
313
|
|
|
292
314
|
Examples:
|
|
293
|
-
>>> # Load by short name
|
|
294
|
-
>>> schema = load_agent_schema("Contract-Analyzer") #
|
|
315
|
+
>>> # Load by short name - checks database first for hot-reload support
|
|
316
|
+
>>> schema = load_agent_schema("Contract-Analyzer") # case invariant
|
|
295
317
|
>>>
|
|
296
|
-
>>> # Load from custom path (
|
|
318
|
+
>>> # Load from custom path (skips database - exact paths always use filesystem)
|
|
297
319
|
>>> schema = load_agent_schema("./my-agent.yaml")
|
|
298
320
|
>>>
|
|
299
|
-
>>> # Load evaluator schema
|
|
321
|
+
>>> # Load evaluator schema
|
|
300
322
|
>>> schema = load_agent_schema("rem-lookup-correctness")
|
|
301
|
-
>>>
|
|
302
|
-
>>> # Load custom user schema from database (case invariant)
|
|
303
|
-
>>> schema = load_agent_schema("My-Agent", user_id="user-123") # same as "my-agent"
|
|
304
323
|
"""
|
|
305
324
|
# Normalize the name for cache key (lowercase for case-invariant lookups)
|
|
306
325
|
cache_key = str(schema_name_or_path).replace('agents/', '').replace('schemas/', '').replace('evaluators/', '').replace('core/', '').replace('examples/', '').lower()
|
|
307
326
|
if cache_key.endswith('.yaml') or cache_key.endswith('.yml'):
|
|
308
327
|
cache_key = cache_key.rsplit('.', 1)[0]
|
|
309
328
|
|
|
310
|
-
# Check cache first (only for package resources, not custom paths)
|
|
311
329
|
path = Path(schema_name_or_path)
|
|
312
330
|
is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
313
331
|
|
|
314
|
-
if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
|
|
315
|
-
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
316
|
-
return _fs_schema_cache[cache_key]
|
|
317
|
-
|
|
318
332
|
# 1. Try exact path first (absolute or relative to cwd) - must be a file, not directory
|
|
333
|
+
# Exact paths skip database lookup (explicit file reference)
|
|
319
334
|
if path.exists() and path.is_file():
|
|
320
335
|
logger.debug(f"Loading schema from exact path: {path}")
|
|
321
336
|
with open(path, "r") as f:
|
|
@@ -324,10 +339,28 @@ def load_agent_schema(
|
|
|
324
339
|
# Don't cache custom paths (they may change)
|
|
325
340
|
return cast(dict[str, Any], schema)
|
|
326
341
|
|
|
327
|
-
# 2. Normalize name for
|
|
342
|
+
# 2. Normalize name for lookups (lowercase)
|
|
328
343
|
base_name = cache_key
|
|
329
344
|
|
|
330
|
-
# 3. Try
|
|
345
|
+
# 3. Try database FIRST (if enabled) - enables hot-reload without redeploy
|
|
346
|
+
# Database schemas are NOT cached to ensure hot-reload works immediately
|
|
347
|
+
if enable_db_fallback and not is_custom_path:
|
|
348
|
+
try:
|
|
349
|
+
logger.debug(f"Checking database for schema: {base_name} (user_id={user_id or 'public'})")
|
|
350
|
+
db_schema = _load_schema_from_database(base_name, user_id)
|
|
351
|
+
if db_schema:
|
|
352
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
353
|
+
return db_schema
|
|
354
|
+
except Exception as e:
|
|
355
|
+
logger.debug(f"Database schema lookup failed: {e}")
|
|
356
|
+
# Fall through to filesystem search
|
|
357
|
+
|
|
358
|
+
# 4. Check filesystem cache (only for package resources, not custom paths)
|
|
359
|
+
if use_cache and not is_custom_path and cache_key in _fs_schema_cache:
|
|
360
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
361
|
+
return _fs_schema_cache[cache_key]
|
|
362
|
+
|
|
363
|
+
# 5. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
331
364
|
from ..registry import get_schema_paths
|
|
332
365
|
|
|
333
366
|
custom_paths = get_schema_paths()
|
|
@@ -358,7 +391,7 @@ def load_agent_schema(
|
|
|
358
391
|
# Don't cache custom paths (they may change during development)
|
|
359
392
|
return cast(dict[str, Any], schema)
|
|
360
393
|
|
|
361
|
-
#
|
|
394
|
+
# 6. Try package resources with standard search paths
|
|
362
395
|
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
363
396
|
search_path = search_pattern.format(name=base_name)
|
|
364
397
|
|
|
@@ -383,20 +416,7 @@ def load_agent_schema(
|
|
|
383
416
|
logger.debug(f"Could not load from {search_path}: {e}")
|
|
384
417
|
continue
|
|
385
418
|
|
|
386
|
-
#
|
|
387
|
-
# Always search for public schemas (user_id IS NULL), plus user-specific if user_id provided
|
|
388
|
-
if enable_db_fallback:
|
|
389
|
-
try:
|
|
390
|
-
logger.debug(f"Attempting database LOOKUP for schema: {base_name} (user_id={user_id or 'public'})")
|
|
391
|
-
db_schema = _load_schema_from_database(base_name, user_id)
|
|
392
|
-
if db_schema:
|
|
393
|
-
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
394
|
-
return db_schema
|
|
395
|
-
except Exception as e:
|
|
396
|
-
logger.debug(f"Database schema lookup failed: {e}")
|
|
397
|
-
# Fall through to error below
|
|
398
|
-
|
|
399
|
-
# 6. Schema not found in any location
|
|
419
|
+
# 7. Schema not found in any location
|
|
400
420
|
searched_paths = [pattern.format(name=base_name) for pattern in SCHEMA_SEARCH_PATHS]
|
|
401
421
|
|
|
402
422
|
custom_paths_note = ""
|
|
@@ -424,18 +444,21 @@ async def load_agent_schema_async(
|
|
|
424
444
|
schema_name_or_path: str,
|
|
425
445
|
user_id: str | None = None,
|
|
426
446
|
db=None,
|
|
447
|
+
enable_db_fallback: bool = True,
|
|
427
448
|
) -> dict[str, Any]:
|
|
428
449
|
"""
|
|
429
|
-
Async version of load_agent_schema
|
|
450
|
+
Async version of load_agent_schema with database-first priority.
|
|
430
451
|
|
|
431
452
|
Schema names are case-invariant - "MyAgent", "myagent", "MYAGENT" all resolve to the same schema.
|
|
432
453
|
|
|
433
|
-
|
|
454
|
+
**IMPORTANT**: Database is checked FIRST (before filesystem) to enable hot-reloading
|
|
455
|
+
of schema updates without redeploying the application.
|
|
434
456
|
|
|
435
457
|
Args:
|
|
436
458
|
schema_name_or_path: Schema name or file path (case-invariant for names)
|
|
437
459
|
user_id: User ID for database schema lookup
|
|
438
460
|
db: Optional existing PostgresService connection (if None, will create one)
|
|
461
|
+
enable_db_fallback: If True, checks database FIRST for schema (default: True)
|
|
439
462
|
|
|
440
463
|
Returns:
|
|
441
464
|
Agent schema as dictionary
|
|
@@ -443,7 +466,6 @@ async def load_agent_schema_async(
|
|
|
443
466
|
Raises:
|
|
444
467
|
FileNotFoundError: If schema not found
|
|
445
468
|
"""
|
|
446
|
-
# First try filesystem search (sync operations are fine)
|
|
447
469
|
path = Path(schema_name_or_path)
|
|
448
470
|
|
|
449
471
|
# Normalize the name for cache key (lowercase for case-invariant lookups)
|
|
@@ -453,12 +475,7 @@ async def load_agent_schema_async(
|
|
|
453
475
|
|
|
454
476
|
is_custom_path = (path.exists() and path.is_file()) or '/' in str(schema_name_or_path) or '\\' in str(schema_name_or_path)
|
|
455
477
|
|
|
456
|
-
#
|
|
457
|
-
if not is_custom_path and cache_key in _fs_schema_cache:
|
|
458
|
-
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
459
|
-
return _fs_schema_cache[cache_key]
|
|
460
|
-
|
|
461
|
-
# Try exact path (must be a file, not directory)
|
|
478
|
+
# 1. Try exact path first (skips database - explicit file reference)
|
|
462
479
|
if path.exists() and path.is_file():
|
|
463
480
|
logger.debug(f"Loading schema from exact path: {path}")
|
|
464
481
|
with open(path, "r") as f:
|
|
@@ -467,7 +484,60 @@ async def load_agent_schema_async(
|
|
|
467
484
|
|
|
468
485
|
base_name = cache_key
|
|
469
486
|
|
|
470
|
-
# Try
|
|
487
|
+
# 2. Try database FIRST (if enabled) - enables hot-reload without redeploy
|
|
488
|
+
if enable_db_fallback and not is_custom_path:
|
|
489
|
+
# Check DB schema cache first (TTL-based)
|
|
490
|
+
cached_schema = _get_cached_db_schema(base_name, user_id)
|
|
491
|
+
if cached_schema is not None:
|
|
492
|
+
logger.info(f"✅ Loaded schema from cache: {base_name}")
|
|
493
|
+
return cached_schema
|
|
494
|
+
|
|
495
|
+
# Cache miss - query database
|
|
496
|
+
from rem.services.postgres import get_postgres_service
|
|
497
|
+
|
|
498
|
+
should_disconnect = False
|
|
499
|
+
if db is None:
|
|
500
|
+
db = get_postgres_service()
|
|
501
|
+
if db:
|
|
502
|
+
await db.connect()
|
|
503
|
+
should_disconnect = True
|
|
504
|
+
|
|
505
|
+
if db:
|
|
506
|
+
try:
|
|
507
|
+
if user_id:
|
|
508
|
+
query = """
|
|
509
|
+
SELECT spec FROM schemas
|
|
510
|
+
WHERE LOWER(name) = LOWER($1)
|
|
511
|
+
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
512
|
+
LIMIT 1
|
|
513
|
+
"""
|
|
514
|
+
row = await db.fetchrow(query, base_name, user_id)
|
|
515
|
+
else:
|
|
516
|
+
# No user_id - only search public schemas
|
|
517
|
+
query = """
|
|
518
|
+
SELECT spec FROM schemas
|
|
519
|
+
WHERE LOWER(name) = LOWER($1)
|
|
520
|
+
AND (user_id = 'system' OR user_id IS NULL)
|
|
521
|
+
LIMIT 1
|
|
522
|
+
"""
|
|
523
|
+
row = await db.fetchrow(query, base_name)
|
|
524
|
+
if row:
|
|
525
|
+
spec = row.get("spec")
|
|
526
|
+
if spec and isinstance(spec, dict):
|
|
527
|
+
# Cache the schema for future requests
|
|
528
|
+
_cache_db_schema(base_name, user_id, spec)
|
|
529
|
+
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
530
|
+
return spec
|
|
531
|
+
finally:
|
|
532
|
+
if should_disconnect:
|
|
533
|
+
await db.disconnect()
|
|
534
|
+
|
|
535
|
+
# 3. Check filesystem cache
|
|
536
|
+
if not is_custom_path and cache_key in _fs_schema_cache:
|
|
537
|
+
logger.debug(f"Loading schema from cache: {cache_key}")
|
|
538
|
+
return _fs_schema_cache[cache_key]
|
|
539
|
+
|
|
540
|
+
# 4. Try custom schema paths (from registry + SCHEMA__PATHS env var + auto-detected)
|
|
471
541
|
from ..registry import get_schema_paths
|
|
472
542
|
custom_paths = get_schema_paths()
|
|
473
543
|
|
|
@@ -489,7 +559,7 @@ async def load_agent_schema_async(
|
|
|
489
559
|
schema = yaml.safe_load(f)
|
|
490
560
|
return cast(dict[str, Any], schema)
|
|
491
561
|
|
|
492
|
-
# Try package resources
|
|
562
|
+
# 5. Try package resources
|
|
493
563
|
for search_pattern in SCHEMA_SEARCH_PATHS:
|
|
494
564
|
search_path = search_pattern.format(name=base_name)
|
|
495
565
|
try:
|
|
@@ -503,44 +573,6 @@ async def load_agent_schema_async(
|
|
|
503
573
|
except Exception:
|
|
504
574
|
continue
|
|
505
575
|
|
|
506
|
-
# Try database lookup - always search public schemas, plus user-specific if user_id provided
|
|
507
|
-
from rem.services.postgres import get_postgres_service
|
|
508
|
-
|
|
509
|
-
should_disconnect = False
|
|
510
|
-
if db is None:
|
|
511
|
-
db = get_postgres_service()
|
|
512
|
-
if db:
|
|
513
|
-
await db.connect()
|
|
514
|
-
should_disconnect = True
|
|
515
|
-
|
|
516
|
-
if db:
|
|
517
|
-
try:
|
|
518
|
-
if user_id:
|
|
519
|
-
query = """
|
|
520
|
-
SELECT spec FROM schemas
|
|
521
|
-
WHERE LOWER(name) = LOWER($1)
|
|
522
|
-
AND (user_id = $2 OR user_id = 'system' OR user_id IS NULL)
|
|
523
|
-
LIMIT 1
|
|
524
|
-
"""
|
|
525
|
-
row = await db.fetchrow(query, base_name, user_id)
|
|
526
|
-
else:
|
|
527
|
-
# No user_id - only search public schemas
|
|
528
|
-
query = """
|
|
529
|
-
SELECT spec FROM schemas
|
|
530
|
-
WHERE LOWER(name) = LOWER($1)
|
|
531
|
-
AND (user_id = 'system' OR user_id IS NULL)
|
|
532
|
-
LIMIT 1
|
|
533
|
-
"""
|
|
534
|
-
row = await db.fetchrow(query, base_name)
|
|
535
|
-
if row:
|
|
536
|
-
spec = row.get("spec")
|
|
537
|
-
if spec and isinstance(spec, dict):
|
|
538
|
-
logger.info(f"✅ Loaded schema from database: {base_name}")
|
|
539
|
-
return spec
|
|
540
|
-
finally:
|
|
541
|
-
if should_disconnect:
|
|
542
|
-
await db.disconnect()
|
|
543
|
-
|
|
544
576
|
# Not found
|
|
545
577
|
raise FileNotFoundError(f"Schema not found: {schema_name_or_path}")
|
|
546
578
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: remdb
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.258
|
|
4
4
|
Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
|
|
5
5
|
Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
|
|
6
6
|
Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
|
|
@@ -28,7 +28,7 @@ Requires-Dist: gitpython>=3.1.45
|
|
|
28
28
|
Requires-Dist: hypercorn>=0.17.0
|
|
29
29
|
Requires-Dist: itsdangerous>=2.0.0
|
|
30
30
|
Requires-Dist: json-schema-to-pydantic>=0.2.0
|
|
31
|
-
Requires-Dist: kreuzberg
|
|
31
|
+
Requires-Dist: kreuzberg>=4.0.5
|
|
32
32
|
Requires-Dist: loguru>=0.7.0
|
|
33
33
|
Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
|
|
34
34
|
Requires-Dist: opentelemetry-api>=1.28.0
|
|
@@ -1300,15 +1300,16 @@ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
|
|
|
1300
1300
|
available : Stage 1+
|
|
1301
1301
|
example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
|
|
1302
1302
|
|
|
1303
|
-
SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
|
|
1303
|
+
SearchQuery ::= SEARCH <text:string> [IN|TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
|
|
1304
1304
|
text : Semantic query text
|
|
1305
|
-
table : Target table (default: "resources")
|
|
1305
|
+
table : Target table (default: "resources"). Use IN or TABLE keyword.
|
|
1306
1306
|
clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
|
|
1307
1307
|
limit : Max results (default: 10)
|
|
1308
1308
|
performance : Indexed (pgvector)
|
|
1309
1309
|
available : Stage 3+
|
|
1310
1310
|
examples :
|
|
1311
|
-
- SEARCH "database migration"
|
|
1311
|
+
- SEARCH "database migration" IN resources LIMIT 10
|
|
1312
|
+
- SEARCH "parcel delivery" IN ontologies
|
|
1312
1313
|
- SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
|
|
1313
1314
|
- SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
|
|
1314
1315
|
- SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
|