remdb 0.3.0__py3-none-any.whl → 0.3.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (106) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +502 -0
  6. rem/agentic/context.py +51 -25
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/mcp/tool_wrapper.py +29 -3
  9. rem/agentic/otel/setup.py +93 -4
  10. rem/agentic/providers/phoenix.py +32 -43
  11. rem/agentic/providers/pydantic_ai.py +168 -24
  12. rem/agentic/schema.py +358 -21
  13. rem/agentic/tools/rem_tools.py +3 -3
  14. rem/api/README.md +238 -1
  15. rem/api/deps.py +255 -0
  16. rem/api/main.py +154 -37
  17. rem/api/mcp_router/resources.py +1 -1
  18. rem/api/mcp_router/server.py +26 -5
  19. rem/api/mcp_router/tools.py +465 -7
  20. rem/api/middleware/tracking.py +172 -0
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +124 -0
  23. rem/api/routers/chat/completions.py +402 -20
  24. rem/api/routers/chat/models.py +88 -10
  25. rem/api/routers/chat/otel_utils.py +33 -0
  26. rem/api/routers/chat/sse_events.py +542 -0
  27. rem/api/routers/chat/streaming.py +642 -45
  28. rem/api/routers/dev.py +81 -0
  29. rem/api/routers/feedback.py +268 -0
  30. rem/api/routers/messages.py +473 -0
  31. rem/api/routers/models.py +78 -0
  32. rem/api/routers/query.py +360 -0
  33. rem/api/routers/shared_sessions.py +406 -0
  34. rem/auth/middleware.py +126 -27
  35. rem/cli/commands/README.md +237 -64
  36. rem/cli/commands/ask.py +13 -10
  37. rem/cli/commands/cluster.py +1808 -0
  38. rem/cli/commands/configure.py +5 -6
  39. rem/cli/commands/db.py +396 -139
  40. rem/cli/commands/experiments.py +293 -73
  41. rem/cli/commands/process.py +22 -15
  42. rem/cli/commands/scaffold.py +47 -0
  43. rem/cli/commands/schema.py +97 -50
  44. rem/cli/main.py +29 -6
  45. rem/config.py +10 -3
  46. rem/models/core/core_model.py +7 -1
  47. rem/models/core/rem_query.py +5 -2
  48. rem/models/entities/__init__.py +21 -0
  49. rem/models/entities/domain_resource.py +38 -0
  50. rem/models/entities/feedback.py +123 -0
  51. rem/models/entities/message.py +30 -1
  52. rem/models/entities/session.py +83 -0
  53. rem/models/entities/shared_session.py +180 -0
  54. rem/models/entities/user.py +10 -3
  55. rem/registry.py +373 -0
  56. rem/schemas/agents/rem.yaml +7 -3
  57. rem/services/content/providers.py +94 -140
  58. rem/services/content/service.py +92 -20
  59. rem/services/dreaming/affinity_service.py +2 -16
  60. rem/services/dreaming/moment_service.py +2 -15
  61. rem/services/embeddings/api.py +24 -17
  62. rem/services/embeddings/worker.py +16 -16
  63. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  64. rem/services/phoenix/client.py +302 -28
  65. rem/services/postgres/README.md +159 -15
  66. rem/services/postgres/__init__.py +2 -1
  67. rem/services/postgres/diff_service.py +531 -0
  68. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  69. rem/services/postgres/repository.py +132 -0
  70. rem/services/postgres/schema_generator.py +291 -9
  71. rem/services/postgres/service.py +6 -6
  72. rem/services/rate_limit.py +113 -0
  73. rem/services/rem/README.md +14 -0
  74. rem/services/rem/parser.py +44 -9
  75. rem/services/rem/service.py +36 -2
  76. rem/services/session/compression.py +24 -1
  77. rem/services/session/reload.py +1 -1
  78. rem/services/user_service.py +98 -0
  79. rem/settings.py +313 -29
  80. rem/sql/background_indexes.sql +21 -16
  81. rem/sql/migrations/001_install.sql +387 -54
  82. rem/sql/migrations/002_install_models.sql +2320 -393
  83. rem/sql/migrations/003_optional_extensions.sql +326 -0
  84. rem/sql/migrations/004_cache_system.sql +548 -0
  85. rem/utils/__init__.py +18 -0
  86. rem/utils/constants.py +97 -0
  87. rem/utils/date_utils.py +228 -0
  88. rem/utils/embeddings.py +17 -4
  89. rem/utils/files.py +167 -0
  90. rem/utils/mime_types.py +158 -0
  91. rem/utils/model_helpers.py +156 -1
  92. rem/utils/schema_loader.py +282 -35
  93. rem/utils/sql_paths.py +146 -0
  94. rem/utils/sql_types.py +3 -1
  95. rem/utils/vision.py +9 -14
  96. rem/workers/README.md +14 -14
  97. rem/workers/__init__.py +3 -1
  98. rem/workers/db_listener.py +579 -0
  99. rem/workers/db_maintainer.py +74 -0
  100. rem/workers/unlogged_maintainer.py +463 -0
  101. {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/METADATA +464 -289
  102. {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/RECORD +104 -73
  103. {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/WHEEL +1 -1
  104. rem/sql/002_install_models.sql +0 -1068
  105. rem/sql/install_models.sql +0 -1038
  106. {remdb-0.3.0.dist-info → remdb-0.3.127.dist-info}/entry_points.txt +0 -0
@@ -50,9 +50,36 @@ class RemQueryParser:
50
50
  params: Dict[str, Any] = {}
51
51
  positional_args: List[str] = []
52
52
 
53
- # Process remaining tokens
54
- for token in tokens[1:]:
55
- if "=" in token:
53
+ # For SQL queries, preserve the raw query (keywords like LIMIT are SQL keywords)
54
+ if query_type == QueryType.SQL:
55
+ # Everything after "SQL" is the raw SQL query
56
+ raw_sql = query_string[3:].strip() # Skip "SQL" prefix
57
+ params["raw_query"] = raw_sql
58
+ return query_type, params
59
+
60
+ # Process remaining tokens, handling REM keywords
61
+ i = 1
62
+ while i < len(tokens):
63
+ token = tokens[i]
64
+ token_upper = token.upper()
65
+
66
+ # Handle REM keywords that take a value
67
+ if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
68
+ if i + 1 < len(tokens):
69
+ keyword_map = {
70
+ "LIMIT": "limit",
71
+ "DEPTH": "max_depth",
72
+ "THRESHOLD": "threshold",
73
+ "TYPE": "edge_types",
74
+ "FROM": "initial_query",
75
+ "WITH": "initial_query",
76
+ }
77
+ key = keyword_map[token_upper]
78
+ value = tokens[i + 1]
79
+ params[key] = self._convert_value(key, value)
80
+ i += 2
81
+ continue
82
+ elif "=" in token:
56
83
  # It's a keyword argument
57
84
  key, value = token.split("=", 1)
58
85
  # Handle parameter aliases
@@ -61,6 +88,7 @@ class RemQueryParser:
61
88
  else:
62
89
  # It's a positional argument part
63
90
  positional_args.append(token)
91
+ i += 1
64
92
 
65
93
  # Map positional arguments to specific fields based on QueryType
66
94
  self._map_positional_args(query_type, positional_args, params)
@@ -133,13 +161,20 @@ class RemQueryParser:
133
161
  params["query_text"] = combined_value
134
162
 
135
163
  elif query_type == QueryType.SEARCH:
136
- params["query_text"] = combined_value
164
+ # SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
165
+ # First positional arg is table name, rest is query text
166
+ if len(positional_args) >= 2:
167
+ params["table_name"] = positional_args[0]
168
+ params["query_text"] = " ".join(positional_args[1:])
169
+ elif len(positional_args) == 1:
170
+ # Could be table name or query text - assume query text if no table
171
+ params["query_text"] = positional_args[0]
172
+ # If no positional args, params stays empty
137
173
 
138
174
  elif query_type == QueryType.TRAVERSE:
139
175
  params["initial_query"] = combined_value
140
176
 
141
- # SQL typically requires named arguments (table=...), but if we supported
142
- # SQL SELECT * FROM ..., we might handle it differently.
143
- # For now, RemService expects table=...
144
- # If there are positional args for SQL, we might ignore or raise,
145
- # but current service doesn't use them.
177
+ elif query_type == QueryType.SQL:
178
+ # SQL with positional args means "SQL SELECT * FROM ..." form
179
+ # Treat the combined positional args as the raw SQL query
180
+ params["raw_query"] = combined_value
@@ -13,6 +13,31 @@ Design:
13
13
  - All queries pushed down to Postgres for performance
14
14
  - Model schema inspection for validation only
15
15
  - Exceptions for missing fields/embeddings
16
+
17
+ TODO: Staged Plan Execution
18
+ - Implement execute_staged_plan() method for multi-stage query execution
19
+ - Each stage can be:
20
+ 1. Static query (query field): Execute REM dialect directly
21
+ 2. Dynamic query (intent field): LLM interprets intent + previous results to build query
22
+ - Flow for dynamic stages:
23
+ 1. Gather results from depends_on stages (from previous_results or current execution)
24
+ 2. Pass intent + previous results to LLM (like ask_rem but with context)
25
+ 3. LLM generates REM query based on what it learned from previous stages
26
+ 4. Execute generated query
27
+ 5. Store results in stage_results for client to use in continuation
28
+ - Multi-turn continuation:
29
+ - Client passes previous_results back from response's stage_results
30
+ - Client sets resume_from_stage to skip already-executed stages
31
+ - Server uses previous_results as context for depends_on lookups
32
+ - Use cases:
33
+ - LOOKUP "Sarah" → intent: "find her team members" (LLM sees Sarah's graph_edges, builds TRAVERSE)
34
+ - SEARCH "API docs" → intent: "get authors" (LLM extracts author refs, builds LOOKUP)
35
+ - Complex graph exploration with LLM-driven navigation
36
+ - API: POST /api/v1/query with:
37
+ - mode="staged-plan"
38
+ - plan=[{stage, query|intent, name, depends_on}]
39
+ - previous_results=[{stage, name, query_executed, results, count}] (for continuation)
40
+ - resume_from_stage=N (to skip completed stages)
16
41
  """
17
42
 
18
43
  from typing import Any
@@ -309,17 +334,26 @@ class RemService:
309
334
  )
310
335
 
311
336
  # Execute vector search via rem_search() PostgreSQL function
337
+ min_sim = params.min_similarity if params.min_similarity is not None else 0.3
338
+ limit = params.limit or 10
312
339
  query_params = get_search_params(
313
340
  query_embedding,
314
341
  table_name,
315
342
  field_name,
316
343
  tenant_id,
317
344
  provider,
318
- params.min_similarity or 0.7,
319
- params.limit or 10,
345
+ min_sim,
346
+ limit,
320
347
  tenant_id, # Use tenant_id (query.user_id) as user_id
321
348
  )
349
+ logger.debug(
350
+ f"SEARCH params: table={table_name}, field={field_name}, "
351
+ f"tenant_id={tenant_id}, provider={provider}, "
352
+ f"min_similarity={min_sim}, limit={limit}, "
353
+ f"embedding_dims={len(query_embedding)}"
354
+ )
322
355
  results = await self.db.execute(SEARCH_QUERY, query_params)
356
+ logger.debug(f"SEARCH results: {len(results)} rows")
323
357
 
324
358
  return {
325
359
  "query_type": "SEARCH",
@@ -14,6 +14,21 @@ from typing import Any
14
14
 
15
15
  from loguru import logger
16
16
 
17
+ # Max length for entity keys (kv_store.entity_key is varchar(255))
18
+ MAX_ENTITY_KEY_LENGTH = 255
19
+
20
+
21
+ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
22
+ """Truncate a key to max length, preserving useful suffix if possible."""
23
+ if len(key) <= max_length:
24
+ return key
25
+ # Keep first part and add hash suffix for uniqueness
26
+ import hashlib
27
+ hash_suffix = hashlib.md5(key.encode()).hexdigest()[:8]
28
+ truncated = key[:max_length - 9] + "-" + hash_suffix
29
+ logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
30
+ return truncated
31
+
17
32
  from rem.models.entities import Message
18
33
  from rem.services.postgres import PostgresService, Repository
19
34
  from rem.settings import settings
@@ -151,15 +166,20 @@ class SessionMessageStore:
151
166
  return f"msg-{message_index}"
152
167
 
153
168
  # Create entity key for REM LOOKUP: session-{session_id}-msg-{index}
154
- entity_key = f"session-{session_id}-msg-{message_index}"
169
+ # Truncate to avoid exceeding kv_store.entity_key varchar(255) limit
170
+ entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
155
171
 
156
172
  # Create Message entity for assistant response
173
+ # Use pre-generated id from message dict if available (for frontend feedback)
157
174
  msg = Message(
175
+ id=message.get("id"), # Use pre-generated ID if provided
158
176
  content=message.get("content", ""),
159
177
  message_type=message.get("role", "assistant"),
160
178
  session_id=session_id,
161
179
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
162
180
  user_id=user_id or self.user_id,
181
+ trace_id=message.get("trace_id"),
182
+ span_id=message.get("span_id"),
163
183
  metadata={
164
184
  "message_index": message_index,
165
185
  "entity_key": entity_key, # Store entity key for LOOKUP
@@ -268,11 +288,14 @@ class SessionMessageStore:
268
288
  # Short assistant messages, user messages, and system messages stored as-is
269
289
  # Store ALL messages in database for full audit trail
270
290
  msg = Message(
291
+ id=message.get("id"), # Use pre-generated ID if provided
271
292
  content=content,
272
293
  message_type=message.get("role", "user"),
273
294
  session_id=session_id,
274
295
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
275
296
  user_id=user_id or self.user_id,
297
+ trace_id=message.get("trace_id"),
298
+ span_id=message.get("span_id"),
276
299
  metadata={
277
300
  "message_index": idx,
278
301
  "timestamp": message.get("timestamp"),
@@ -65,7 +65,7 @@ async def reload_session(
65
65
  session_id=session_id, user_id=user_id, decompress=decompress_messages
66
66
  )
67
67
 
68
- logger.info(
68
+ logger.debug(
69
69
  f"Reloaded {len(messages)} messages for session {session_id} "
70
70
  f"(decompressed={decompress_messages})"
71
71
  )
@@ -0,0 +1,98 @@
1
+ """
2
+ User Service - User account management.
3
+
4
+ Handles user creation, profile updates, and session linking.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from typing import Optional
9
+
10
+ from loguru import logger
11
+
12
+ from ..models.entities.user import User, UserTier
13
+ from .postgres.repository import Repository
14
+ from .postgres.service import PostgresService
15
+
16
+
17
+ class UserService:
18
+ """
19
+ Service for managing user accounts and sessions.
20
+ """
21
+
22
+ def __init__(self, db: PostgresService):
23
+ self.db = db
24
+ self.repo = Repository(User, "users", db=db)
25
+
26
+ async def get_or_create_user(
27
+ self,
28
+ email: str,
29
+ tenant_id: str = "default",
30
+ name: str = "New User",
31
+ avatar_url: Optional[str] = None,
32
+ ) -> User:
33
+ """
34
+ Get existing user by email or create a new one.
35
+ """
36
+ users = await self.repo.find(filters={"email": email}, limit=1)
37
+
38
+ if users:
39
+ user = users[0]
40
+ # Update profile if needed (e.g., name/avatar from OAuth)
41
+ updated = False
42
+ if name and user.name == "New User": # Only update if placeholder
43
+ user.name = name
44
+ updated = True
45
+
46
+ # Store avatar in metadata if provided
47
+ if avatar_url:
48
+ user.metadata = user.metadata or {}
49
+ if user.metadata.get("avatar_url") != avatar_url:
50
+ user.metadata["avatar_url"] = avatar_url
51
+ updated = True
52
+
53
+ if updated:
54
+ user.updated_at = datetime.utcnow()
55
+ await self.repo.upsert(user)
56
+
57
+ return user
58
+
59
+ # Create new user
60
+ user = User(
61
+ tenant_id=tenant_id,
62
+ user_id=email, # Use email as user_id for now? Or UUID?
63
+ # The User model has 'user_id' field but also 'id' UUID.
64
+ # Usually user_id is the external ID or email.
65
+ name=name,
66
+ email=email,
67
+ tier=UserTier.FREE,
68
+ created_at=datetime.utcnow(),
69
+ updated_at=datetime.utcnow(),
70
+ metadata={"avatar_url": avatar_url} if avatar_url else {},
71
+ )
72
+ await self.repo.upsert(user)
73
+ logger.info(f"Created new user: {email}")
74
+ return user
75
+
76
+ async def link_anonymous_session(self, user: User, anon_id: str) -> None:
77
+ """
78
+ Link an anonymous session ID to a user account.
79
+
80
+ This allows merging history from the anonymous session into the user's profile.
81
+ """
82
+ if not anon_id:
83
+ return
84
+
85
+ # Check if already linked
86
+ if anon_id in user.anonymous_ids:
87
+ return
88
+
89
+ # Add to list
90
+ user.anonymous_ids.append(anon_id)
91
+ user.updated_at = datetime.utcnow()
92
+
93
+ # Save
94
+ await self.repo.upsert(user)
95
+ logger.info(f"Linked anonymous session {anon_id} to user {user.email}")
96
+
97
+ # TODO: Migrate/Merge actual data (rate limit counts, history) if needed.
98
+ # For now, we just link the IDs so future queries can include data from this anon_id.