remdb 0.3.7__py3-none-any.whl → 0.3.133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -25
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +112 -17
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +314 -132
- rem/agentic/providers/pydantic_ai.py +215 -26
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +5 -6
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +469 -74
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +92 -133
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +302 -28
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +399 -29
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +282 -35
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/METADATA +460 -303
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/RECORD +105 -74
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
rem/services/rem/parser.py
CHANGED
|
@@ -50,9 +50,36 @@ class RemQueryParser:
|
|
|
50
50
|
params: Dict[str, Any] = {}
|
|
51
51
|
positional_args: List[str] = []
|
|
52
52
|
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
|
|
53
|
+
# For SQL queries, preserve the raw query (keywords like LIMIT are SQL keywords)
|
|
54
|
+
if query_type == QueryType.SQL:
|
|
55
|
+
# Everything after "SQL" is the raw SQL query
|
|
56
|
+
raw_sql = query_string[3:].strip() # Skip "SQL" prefix
|
|
57
|
+
params["raw_query"] = raw_sql
|
|
58
|
+
return query_type, params
|
|
59
|
+
|
|
60
|
+
# Process remaining tokens, handling REM keywords
|
|
61
|
+
i = 1
|
|
62
|
+
while i < len(tokens):
|
|
63
|
+
token = tokens[i]
|
|
64
|
+
token_upper = token.upper()
|
|
65
|
+
|
|
66
|
+
# Handle REM keywords that take a value
|
|
67
|
+
if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
|
|
68
|
+
if i + 1 < len(tokens):
|
|
69
|
+
keyword_map = {
|
|
70
|
+
"LIMIT": "limit",
|
|
71
|
+
"DEPTH": "max_depth",
|
|
72
|
+
"THRESHOLD": "threshold",
|
|
73
|
+
"TYPE": "edge_types",
|
|
74
|
+
"FROM": "initial_query",
|
|
75
|
+
"WITH": "initial_query",
|
|
76
|
+
}
|
|
77
|
+
key = keyword_map[token_upper]
|
|
78
|
+
value = tokens[i + 1]
|
|
79
|
+
params[key] = self._convert_value(key, value)
|
|
80
|
+
i += 2
|
|
81
|
+
continue
|
|
82
|
+
elif "=" in token:
|
|
56
83
|
# It's a keyword argument
|
|
57
84
|
key, value = token.split("=", 1)
|
|
58
85
|
# Handle parameter aliases
|
|
@@ -61,6 +88,7 @@ class RemQueryParser:
|
|
|
61
88
|
else:
|
|
62
89
|
# It's a positional argument part
|
|
63
90
|
positional_args.append(token)
|
|
91
|
+
i += 1
|
|
64
92
|
|
|
65
93
|
# Map positional arguments to specific fields based on QueryType
|
|
66
94
|
self._map_positional_args(query_type, positional_args, params)
|
|
@@ -133,13 +161,20 @@ class RemQueryParser:
|
|
|
133
161
|
params["query_text"] = combined_value
|
|
134
162
|
|
|
135
163
|
elif query_type == QueryType.SEARCH:
|
|
136
|
-
|
|
164
|
+
# SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
|
|
165
|
+
# First positional arg is table name, rest is query text
|
|
166
|
+
if len(positional_args) >= 2:
|
|
167
|
+
params["table_name"] = positional_args[0]
|
|
168
|
+
params["query_text"] = " ".join(positional_args[1:])
|
|
169
|
+
elif len(positional_args) == 1:
|
|
170
|
+
# Could be table name or query text - assume query text if no table
|
|
171
|
+
params["query_text"] = positional_args[0]
|
|
172
|
+
# If no positional args, params stays empty
|
|
137
173
|
|
|
138
174
|
elif query_type == QueryType.TRAVERSE:
|
|
139
175
|
params["initial_query"] = combined_value
|
|
140
176
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
# but current service doesn't use them.
|
|
177
|
+
elif query_type == QueryType.SQL:
|
|
178
|
+
# SQL with positional args means "SQL SELECT * FROM ..." form
|
|
179
|
+
# Treat the combined positional args as the raw SQL query
|
|
180
|
+
params["raw_query"] = combined_value
|
rem/services/rem/service.py
CHANGED
|
@@ -13,6 +13,31 @@ Design:
|
|
|
13
13
|
- All queries pushed down to Postgres for performance
|
|
14
14
|
- Model schema inspection for validation only
|
|
15
15
|
- Exceptions for missing fields/embeddings
|
|
16
|
+
|
|
17
|
+
TODO: Staged Plan Execution
|
|
18
|
+
- Implement execute_staged_plan() method for multi-stage query execution
|
|
19
|
+
- Each stage can be:
|
|
20
|
+
1. Static query (query field): Execute REM dialect directly
|
|
21
|
+
2. Dynamic query (intent field): LLM interprets intent + previous results to build query
|
|
22
|
+
- Flow for dynamic stages:
|
|
23
|
+
1. Gather results from depends_on stages (from previous_results or current execution)
|
|
24
|
+
2. Pass intent + previous results to LLM (like ask_rem but with context)
|
|
25
|
+
3. LLM generates REM query based on what it learned from previous stages
|
|
26
|
+
4. Execute generated query
|
|
27
|
+
5. Store results in stage_results for client to use in continuation
|
|
28
|
+
- Multi-turn continuation:
|
|
29
|
+
- Client passes previous_results back from response's stage_results
|
|
30
|
+
- Client sets resume_from_stage to skip already-executed stages
|
|
31
|
+
- Server uses previous_results as context for depends_on lookups
|
|
32
|
+
- Use cases:
|
|
33
|
+
- LOOKUP "Sarah" → intent: "find her team members" (LLM sees Sarah's graph_edges, builds TRAVERSE)
|
|
34
|
+
- SEARCH "API docs" → intent: "get authors" (LLM extracts author refs, builds LOOKUP)
|
|
35
|
+
- Complex graph exploration with LLM-driven navigation
|
|
36
|
+
- API: POST /api/v1/query with:
|
|
37
|
+
- mode="staged-plan"
|
|
38
|
+
- plan=[{stage, query|intent, name, depends_on}]
|
|
39
|
+
- previous_results=[{stage, name, query_executed, results, count}] (for continuation)
|
|
40
|
+
- resume_from_stage=N (to skip completed stages)
|
|
16
41
|
"""
|
|
17
42
|
|
|
18
43
|
from typing import Any
|
|
@@ -309,17 +334,26 @@ class RemService:
|
|
|
309
334
|
)
|
|
310
335
|
|
|
311
336
|
# Execute vector search via rem_search() PostgreSQL function
|
|
337
|
+
min_sim = params.min_similarity if params.min_similarity is not None else 0.3
|
|
338
|
+
limit = params.limit or 10
|
|
312
339
|
query_params = get_search_params(
|
|
313
340
|
query_embedding,
|
|
314
341
|
table_name,
|
|
315
342
|
field_name,
|
|
316
343
|
tenant_id,
|
|
317
344
|
provider,
|
|
318
|
-
|
|
319
|
-
|
|
345
|
+
min_sim,
|
|
346
|
+
limit,
|
|
320
347
|
tenant_id, # Use tenant_id (query.user_id) as user_id
|
|
321
348
|
)
|
|
349
|
+
logger.debug(
|
|
350
|
+
f"SEARCH params: table={table_name}, field={field_name}, "
|
|
351
|
+
f"tenant_id={tenant_id}, provider={provider}, "
|
|
352
|
+
f"min_similarity={min_sim}, limit={limit}, "
|
|
353
|
+
f"embedding_dims={len(query_embedding)}"
|
|
354
|
+
)
|
|
322
355
|
results = await self.db.execute(SEARCH_QUERY, query_params)
|
|
356
|
+
logger.debug(f"SEARCH results: {len(results)} rows")
|
|
323
357
|
|
|
324
358
|
return {
|
|
325
359
|
"query_type": "SEARCH",
|
|
@@ -14,6 +14,21 @@ from typing import Any
|
|
|
14
14
|
|
|
15
15
|
from loguru import logger
|
|
16
16
|
|
|
17
|
+
# Max length for entity keys (kv_store.entity_key is varchar(255))
|
|
18
|
+
MAX_ENTITY_KEY_LENGTH = 255
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
|
|
22
|
+
"""Truncate a key to max length, preserving useful suffix if possible."""
|
|
23
|
+
if len(key) <= max_length:
|
|
24
|
+
return key
|
|
25
|
+
# Keep first part and add hash suffix for uniqueness
|
|
26
|
+
import hashlib
|
|
27
|
+
hash_suffix = hashlib.md5(key.encode()).hexdigest()[:8]
|
|
28
|
+
truncated = key[:max_length - 9] + "-" + hash_suffix
|
|
29
|
+
logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
|
|
30
|
+
return truncated
|
|
31
|
+
|
|
17
32
|
from rem.models.entities import Message
|
|
18
33
|
from rem.services.postgres import PostgresService, Repository
|
|
19
34
|
from rem.settings import settings
|
|
@@ -151,15 +166,20 @@ class SessionMessageStore:
|
|
|
151
166
|
return f"msg-{message_index}"
|
|
152
167
|
|
|
153
168
|
# Create entity key for REM LOOKUP: session-{session_id}-msg-{index}
|
|
154
|
-
entity_key
|
|
169
|
+
# Truncate to avoid exceeding kv_store.entity_key varchar(255) limit
|
|
170
|
+
entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
|
|
155
171
|
|
|
156
172
|
# Create Message entity for assistant response
|
|
173
|
+
# Use pre-generated id from message dict if available (for frontend feedback)
|
|
157
174
|
msg = Message(
|
|
175
|
+
id=message.get("id"), # Use pre-generated ID if provided
|
|
158
176
|
content=message.get("content", ""),
|
|
159
177
|
message_type=message.get("role", "assistant"),
|
|
160
178
|
session_id=session_id,
|
|
161
179
|
tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
|
|
162
180
|
user_id=user_id or self.user_id,
|
|
181
|
+
trace_id=message.get("trace_id"),
|
|
182
|
+
span_id=message.get("span_id"),
|
|
163
183
|
metadata={
|
|
164
184
|
"message_index": message_index,
|
|
165
185
|
"entity_key": entity_key, # Store entity key for LOOKUP
|
|
@@ -268,11 +288,14 @@ class SessionMessageStore:
|
|
|
268
288
|
# Short assistant messages, user messages, and system messages stored as-is
|
|
269
289
|
# Store ALL messages in database for full audit trail
|
|
270
290
|
msg = Message(
|
|
291
|
+
id=message.get("id"), # Use pre-generated ID if provided
|
|
271
292
|
content=content,
|
|
272
293
|
message_type=message.get("role", "user"),
|
|
273
294
|
session_id=session_id,
|
|
274
295
|
tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
|
|
275
296
|
user_id=user_id or self.user_id,
|
|
297
|
+
trace_id=message.get("trace_id"),
|
|
298
|
+
span_id=message.get("span_id"),
|
|
276
299
|
metadata={
|
|
277
300
|
"message_index": idx,
|
|
278
301
|
"timestamp": message.get("timestamp"),
|
rem/services/session/reload.py
CHANGED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
User Service - User account management.
|
|
3
|
+
|
|
4
|
+
Handles user creation, profile updates, and session linking.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from loguru import logger
|
|
11
|
+
|
|
12
|
+
from ..models.entities.user import User, UserTier
|
|
13
|
+
from .postgres.repository import Repository
|
|
14
|
+
from .postgres.service import PostgresService
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class UserService:
|
|
18
|
+
"""
|
|
19
|
+
Service for managing user accounts and sessions.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, db: PostgresService):
|
|
23
|
+
self.db = db
|
|
24
|
+
self.repo = Repository(User, "users", db=db)
|
|
25
|
+
|
|
26
|
+
async def get_or_create_user(
|
|
27
|
+
self,
|
|
28
|
+
email: str,
|
|
29
|
+
tenant_id: str = "default",
|
|
30
|
+
name: str = "New User",
|
|
31
|
+
avatar_url: Optional[str] = None,
|
|
32
|
+
) -> User:
|
|
33
|
+
"""
|
|
34
|
+
Get existing user by email or create a new one.
|
|
35
|
+
"""
|
|
36
|
+
users = await self.repo.find(filters={"email": email}, limit=1)
|
|
37
|
+
|
|
38
|
+
if users:
|
|
39
|
+
user = users[0]
|
|
40
|
+
# Update profile if needed (e.g., name/avatar from OAuth)
|
|
41
|
+
updated = False
|
|
42
|
+
if name and user.name == "New User": # Only update if placeholder
|
|
43
|
+
user.name = name
|
|
44
|
+
updated = True
|
|
45
|
+
|
|
46
|
+
# Store avatar in metadata if provided
|
|
47
|
+
if avatar_url:
|
|
48
|
+
user.metadata = user.metadata or {}
|
|
49
|
+
if user.metadata.get("avatar_url") != avatar_url:
|
|
50
|
+
user.metadata["avatar_url"] = avatar_url
|
|
51
|
+
updated = True
|
|
52
|
+
|
|
53
|
+
if updated:
|
|
54
|
+
user.updated_at = datetime.utcnow()
|
|
55
|
+
await self.repo.upsert(user)
|
|
56
|
+
|
|
57
|
+
return user
|
|
58
|
+
|
|
59
|
+
# Create new user
|
|
60
|
+
user = User(
|
|
61
|
+
tenant_id=tenant_id,
|
|
62
|
+
user_id=email, # Use email as user_id for now? Or UUID?
|
|
63
|
+
# The User model has 'user_id' field but also 'id' UUID.
|
|
64
|
+
# Usually user_id is the external ID or email.
|
|
65
|
+
name=name,
|
|
66
|
+
email=email,
|
|
67
|
+
tier=UserTier.FREE,
|
|
68
|
+
created_at=datetime.utcnow(),
|
|
69
|
+
updated_at=datetime.utcnow(),
|
|
70
|
+
metadata={"avatar_url": avatar_url} if avatar_url else {},
|
|
71
|
+
)
|
|
72
|
+
await self.repo.upsert(user)
|
|
73
|
+
logger.info(f"Created new user: {email}")
|
|
74
|
+
return user
|
|
75
|
+
|
|
76
|
+
async def link_anonymous_session(self, user: User, anon_id: str) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Link an anonymous session ID to a user account.
|
|
79
|
+
|
|
80
|
+
This allows merging history from the anonymous session into the user's profile.
|
|
81
|
+
"""
|
|
82
|
+
if not anon_id:
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
# Check if already linked
|
|
86
|
+
if anon_id in user.anonymous_ids:
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
# Add to list
|
|
90
|
+
user.anonymous_ids.append(anon_id)
|
|
91
|
+
user.updated_at = datetime.utcnow()
|
|
92
|
+
|
|
93
|
+
# Save
|
|
94
|
+
await self.repo.upsert(user)
|
|
95
|
+
logger.info(f"Linked anonymous session {anon_id} to user {user.email}")
|
|
96
|
+
|
|
97
|
+
# TODO: Migrate/Merge actual data (rate limit counts, history) if needed.
|
|
98
|
+
# For now, we just link the IDs so future queries can include data from this anon_id.
|