remdb 0.3.146__py3-none-any.whl → 0.3.181__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (57) hide show
  1. rem/agentic/agents/__init__.py +16 -0
  2. rem/agentic/agents/agent_manager.py +311 -0
  3. rem/agentic/context.py +81 -3
  4. rem/agentic/context_builder.py +36 -9
  5. rem/agentic/mcp/tool_wrapper.py +43 -14
  6. rem/agentic/providers/pydantic_ai.py +76 -34
  7. rem/agentic/schema.py +4 -3
  8. rem/agentic/tools/rem_tools.py +11 -0
  9. rem/api/deps.py +3 -5
  10. rem/api/main.py +22 -3
  11. rem/api/mcp_router/resources.py +75 -14
  12. rem/api/mcp_router/server.py +28 -23
  13. rem/api/mcp_router/tools.py +177 -2
  14. rem/api/middleware/tracking.py +5 -5
  15. rem/api/routers/auth.py +352 -6
  16. rem/api/routers/chat/completions.py +5 -3
  17. rem/api/routers/chat/streaming.py +95 -22
  18. rem/api/routers/messages.py +24 -15
  19. rem/auth/__init__.py +13 -3
  20. rem/auth/jwt.py +352 -0
  21. rem/auth/middleware.py +70 -30
  22. rem/auth/providers/__init__.py +4 -1
  23. rem/auth/providers/email.py +215 -0
  24. rem/cli/commands/ask.py +1 -1
  25. rem/cli/commands/db.py +118 -54
  26. rem/models/entities/__init__.py +4 -0
  27. rem/models/entities/ontology.py +93 -101
  28. rem/models/entities/subscriber.py +175 -0
  29. rem/models/entities/user.py +1 -0
  30. rem/schemas/agents/core/agent-builder.yaml +235 -0
  31. rem/services/__init__.py +3 -1
  32. rem/services/content/service.py +4 -3
  33. rem/services/email/__init__.py +10 -0
  34. rem/services/email/service.py +522 -0
  35. rem/services/email/templates.py +360 -0
  36. rem/services/embeddings/worker.py +26 -12
  37. rem/services/postgres/README.md +38 -0
  38. rem/services/postgres/diff_service.py +19 -3
  39. rem/services/postgres/pydantic_to_sqlalchemy.py +37 -2
  40. rem/services/postgres/register_type.py +1 -1
  41. rem/services/postgres/repository.py +37 -25
  42. rem/services/postgres/schema_generator.py +5 -5
  43. rem/services/postgres/sql_builder.py +6 -5
  44. rem/services/session/compression.py +113 -50
  45. rem/services/session/reload.py +14 -7
  46. rem/services/user_service.py +41 -9
  47. rem/settings.py +182 -1
  48. rem/sql/background_indexes.sql +5 -0
  49. rem/sql/migrations/001_install.sql +33 -4
  50. rem/sql/migrations/002_install_models.sql +204 -186
  51. rem/sql/migrations/005_schema_update.sql +145 -0
  52. rem/utils/model_helpers.py +101 -0
  53. rem/utils/schema_loader.py +45 -7
  54. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/METADATA +1 -1
  55. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/RECORD +57 -48
  56. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/WHEEL +0 -0
  57. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/entry_points.txt +0 -0
@@ -97,7 +97,7 @@ Context Building Flow:
97
97
  - Long messages include REM LOOKUP hints: "... [REM LOOKUP session-{id}-msg-{index}] ..."
98
98
  - Agent can retrieve full content on-demand using REM LOOKUP
99
99
  3. User profile provided as REM LOOKUP hint (on-demand by default)
100
- - Agent receives: "User ID: {user_id}. To load user profile: Use REM LOOKUP users/{user_id}"
100
+ - Agent receives: "User: {email}. To load user profile: Use REM LOOKUP \"{email}\""
101
101
  - Agent decides whether to load profile based on query
102
102
  4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
103
103
  5. Combines: system context + compressed session history + new messages
@@ -330,8 +330,8 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
330
330
  - Useful for A/B testing, model comparison, and feedback collection
331
331
  """
332
332
  # Load agent schema: use header value from context or default
333
- # Extract AgentContext first to get schema name
334
- temp_context = AgentContext.from_headers(dict(request.headers))
333
+ # Extract AgentContext from request (gets user_id from JWT token)
334
+ temp_context = AgentContext.from_request(request)
335
335
  schema_name = temp_context.agent_schema_uri or DEFAULT_AGENT_SCHEMA
336
336
 
337
337
  # Resolve model: use body.model if provided, otherwise settings default
@@ -350,6 +350,7 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
350
350
  context, messages = await ContextBuilder.build_from_headers(
351
351
  headers=dict(request.headers),
352
352
  new_messages=new_messages,
353
+ user_id=temp_context.user_id, # From JWT token (source of truth)
353
354
  )
354
355
 
355
356
  # Ensure session exists with metadata and eval mode if applicable
@@ -509,6 +510,7 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
509
510
  context, messages = await ContextBuilder.build_from_headers(
510
511
  headers=dict(request.headers),
511
512
  new_messages=new_messages,
513
+ user_id=temp_context.user_id, # From JWT token (source of truth)
512
514
  )
513
515
 
514
516
  logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
@@ -76,6 +76,9 @@ async def stream_openai_response(
76
76
  agent_schema: str | None = None,
77
77
  # Mutable container to capture trace context (deterministic, not AI-dependent)
78
78
  trace_context_out: dict | None = None,
79
+ # Mutable container to capture tool calls for persistence
80
+ # Format: list of {"tool_name": str, "tool_id": str, "arguments": dict, "result": any}
81
+ tool_calls_out: list | None = None,
79
82
  ) -> AsyncGenerator[str, None]:
80
83
  """
81
84
  Stream Pydantic AI agent responses with rich SSE events.
@@ -146,6 +149,9 @@ async def stream_openai_response(
146
149
  pending_tool_completions: list[tuple[str, str]] = []
147
150
  # Track if metadata was registered via register_metadata tool
148
151
  metadata_registered = False
152
+ # Track pending tool calls with full data for persistence
153
+ # Maps tool_id -> {"tool_name": str, "tool_id": str, "arguments": dict}
154
+ pending_tool_data: dict[str, dict] = {}
149
155
 
150
156
  try:
151
157
  # Emit initial progress event
@@ -299,6 +305,13 @@ async def stream_openai_response(
299
305
  arguments=args_dict
300
306
  ))
301
307
 
308
+ # Track tool call data for persistence (especially register_metadata)
309
+ pending_tool_data[tool_id] = {
310
+ "tool_name": tool_name,
311
+ "tool_id": tool_id,
312
+ "arguments": args_dict,
313
+ }
314
+
302
315
  # Update progress
303
316
  current_step = 2
304
317
  total_steps = 4 # Added tool execution step
@@ -421,6 +434,15 @@ async def stream_openai_response(
421
434
  hidden=False,
422
435
  ))
423
436
 
437
+ # Capture tool call with result for persistence
438
+ # Special handling for register_metadata - always capture full data
439
+ if tool_calls_out is not None and tool_id in pending_tool_data:
440
+ tool_data = pending_tool_data[tool_id]
441
+ tool_data["result"] = result_content
442
+ tool_data["is_metadata"] = is_metadata_event
443
+ tool_calls_out.append(tool_data)
444
+ del pending_tool_data[tool_id]
445
+
424
446
  if not is_metadata_event:
425
447
  # Normal tool completion - emit ToolCallEvent
426
448
  result_str = str(result_content)
@@ -728,6 +750,9 @@ async def stream_openai_response_with_save(
728
750
  # Accumulate content during streaming
729
751
  accumulated_content = []
730
752
 
753
+ # Capture tool calls for persistence (especially register_metadata)
754
+ tool_calls: list = []
755
+
731
756
  async for chunk in stream_openai_response(
732
757
  agent=agent,
733
758
  prompt=prompt,
@@ -737,6 +762,7 @@ async def stream_openai_response_with_save(
737
762
  session_id=session_id,
738
763
  message_id=message_id,
739
764
  trace_context_out=trace_context, # Pass container to capture trace IDs
765
+ tool_calls_out=tool_calls, # Capture tool calls for persistence
740
766
  ):
741
767
  yield chunk
742
768
 
@@ -755,28 +781,75 @@ async def stream_openai_response_with_save(
755
781
  except (json.JSONDecodeError, KeyError, IndexError):
756
782
  pass # Skip non-JSON or malformed chunks
757
783
 
758
- # After streaming completes, save the assistant response
759
- if settings.postgres.enabled and session_id and accumulated_content:
760
- full_content = "".join(accumulated_content)
784
+ # After streaming completes, save tool calls and assistant response
785
+ # Note: All messages stored UNCOMPRESSED. Compression happens on reload.
786
+ if settings.postgres.enabled and session_id:
761
787
  # Get captured trace context from container (deterministically captured inside agent execution)
762
788
  captured_trace_id = trace_context.get("trace_id")
763
789
  captured_span_id = trace_context.get("span_id")
764
- assistant_message = {
765
- "id": message_id, # Use pre-generated ID for consistency with metadata event
766
- "role": "assistant",
767
- "content": full_content,
768
- "timestamp": to_iso(utc_now()),
769
- "trace_id": captured_trace_id,
770
- "span_id": captured_span_id,
771
- }
772
- try:
773
- store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
774
- await store.store_session_messages(
775
- session_id=session_id,
776
- messages=[assistant_message],
777
- user_id=user_id,
778
- compress=True, # Compress long assistant responses
779
- )
780
- logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
781
- except Exception as e:
782
- logger.error(f"Failed to save assistant response: {e}", exc_info=True)
790
+ timestamp = to_iso(utc_now())
791
+
792
+ messages_to_store = []
793
+
794
+ # First, store tool call messages (message_type: "tool")
795
+ for tool_call in tool_calls:
796
+ tool_message = {
797
+ "role": "tool",
798
+ "content": json.dumps(tool_call.get("result", {}), default=str),
799
+ "timestamp": timestamp,
800
+ "trace_id": captured_trace_id,
801
+ "span_id": captured_span_id,
802
+ # Store tool call details in a way that can be reconstructed
803
+ "tool_call_id": tool_call.get("tool_id"),
804
+ "tool_name": tool_call.get("tool_name"),
805
+ "tool_arguments": tool_call.get("arguments"),
806
+ }
807
+ messages_to_store.append(tool_message)
808
+
809
+ # Then store assistant text response (if any)
810
+ if accumulated_content:
811
+ full_content = "".join(accumulated_content)
812
+ assistant_message = {
813
+ "id": message_id, # Use pre-generated ID for consistency with metadata event
814
+ "role": "assistant",
815
+ "content": full_content,
816
+ "timestamp": timestamp,
817
+ "trace_id": captured_trace_id,
818
+ "span_id": captured_span_id,
819
+ }
820
+ messages_to_store.append(assistant_message)
821
+
822
+ if messages_to_store:
823
+ try:
824
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
825
+ await store.store_session_messages(
826
+ session_id=session_id,
827
+ messages=messages_to_store,
828
+ user_id=user_id,
829
+ compress=False, # Store uncompressed; compression happens on reload
830
+ )
831
+ logger.debug(
832
+ f"Saved {len(tool_calls)} tool calls and "
833
+ f"{'assistant response' if accumulated_content else 'no text'} "
834
+ f"to session {session_id}"
835
+ )
836
+ except Exception as e:
837
+ logger.error(f"Failed to save session messages: {e}", exc_info=True)
838
+
839
+ # Update session description with session_name (non-blocking, after all yields)
840
+ for tool_call in tool_calls:
841
+ if tool_call.get("tool_name") == "register_metadata" and tool_call.get("is_metadata"):
842
+ session_name = tool_call.get("arguments", {}).get("session_name")
843
+ if session_name:
844
+ try:
845
+ from ....models.entities import Session
846
+ from ....services.postgres import Repository
847
+ repo = Repository(Session, table_name="sessions")
848
+ session = await repo.get_by_id(session_id)
849
+ if session and session.description != session_name:
850
+ session.description = session_name
851
+ await repo.update(session)
852
+ logger.debug(f"Updated session {session_id} description to '{session_name}'")
853
+ except Exception as e:
854
+ logger.warning(f"Failed to update session description: {e}")
855
+ break
@@ -134,7 +134,6 @@ async def list_messages(
134
134
  ),
135
135
  limit: int = Query(default=50, ge=1, le=100, description="Max results to return"),
136
136
  offset: int = Query(default=0, ge=0, description="Offset for pagination"),
137
- x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
138
137
  ) -> MessageListResponse:
139
138
  """
140
139
  List messages with optional filters.
@@ -158,15 +157,18 @@ async def list_messages(
158
157
 
159
158
  repo = Repository(Message, table_name="messages")
160
159
 
160
+ # Get current user for logging
161
+ current_user = get_current_user(request)
162
+ jwt_user_id = current_user.get("id") if current_user else None
163
+
161
164
  # If mine=true, force filter to current user's ID from JWT
162
165
  effective_user_id = user_id
163
166
  if mine:
164
- current_user = get_current_user(request)
165
167
  if current_user:
166
168
  effective_user_id = current_user.get("id")
167
169
 
168
170
  # Build user-scoped filters (admin can see all, regular users see only their own)
169
- filters = await get_user_filter(request, x_user_id=effective_user_id, x_tenant_id=x_tenant_id)
171
+ filters = await get_user_filter(request, x_user_id=effective_user_id)
170
172
 
171
173
  # Apply optional filters
172
174
  if session_id:
@@ -174,6 +176,13 @@ async def list_messages(
174
176
  if message_type:
175
177
  filters["message_type"] = message_type
176
178
 
179
+ # Log the query parameters for debugging
180
+ logger.debug(
181
+ f"[messages] Query: session_id={session_id} | "
182
+ f"jwt_user_id={jwt_user_id} | "
183
+ f"filters={filters}"
184
+ )
185
+
177
186
  # For date filtering, we need custom SQL (not supported by basic Repository)
178
187
  # For now, fetch all matching base filters and filter in Python
179
188
  # TODO: Extend Repository to support date range filters
@@ -206,6 +215,12 @@ async def list_messages(
206
215
  # Get total count for pagination info
207
216
  total = await repo.count(filters)
208
217
 
218
+ # Log result count
219
+ logger.debug(
220
+ f"[messages] Result: returned={len(messages)} | total={total} | "
221
+ f"session_id={session_id}"
222
+ )
223
+
209
224
  return MessageListResponse(data=messages, total=total, has_more=has_more)
210
225
 
211
226
 
@@ -213,7 +228,6 @@ async def list_messages(
213
228
  async def get_message(
214
229
  request: Request,
215
230
  message_id: str,
216
- x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
217
231
  ) -> Message:
218
232
  """
219
233
  Get a specific message by ID.
@@ -236,7 +250,7 @@ async def get_message(
236
250
  raise HTTPException(status_code=503, detail="Database not enabled")
237
251
 
238
252
  repo = Repository(Message, table_name="messages")
239
- message = await repo.get_by_id(message_id, x_tenant_id)
253
+ message = await repo.get_by_id(message_id)
240
254
 
241
255
  if not message:
242
256
  raise HTTPException(status_code=404, detail=f"Message '{message_id}' not found")
@@ -263,7 +277,6 @@ async def list_sessions(
263
277
  mode: SessionMode | None = Query(default=None, description="Filter by session mode"),
264
278
  page: int = Query(default=1, ge=1, description="Page number (1-indexed)"),
265
279
  page_size: int = Query(default=50, ge=1, le=100, description="Number of results per page"),
266
- x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
267
280
  ) -> SessionsQueryResponse:
268
281
  """
269
282
  List sessions with optional filters and page-based pagination.
@@ -288,7 +301,7 @@ async def list_sessions(
288
301
  repo = Repository(Session, table_name="sessions")
289
302
 
290
303
  # Build user-scoped filters (admin can see all, regular users see only their own)
291
- filters = await get_user_filter(request, x_user_id=user_id, x_tenant_id=x_tenant_id)
304
+ filters = await get_user_filter(request, x_user_id=user_id)
292
305
  if mode:
293
306
  filters["mode"] = mode.value
294
307
 
@@ -319,7 +332,6 @@ async def create_session(
319
332
  request_body: SessionCreateRequest,
320
333
  user: dict = Depends(require_admin),
321
334
  x_user_id: str = Header(alias="X-User-Id", default="default"),
322
- x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
323
335
  ) -> Session:
324
336
  """
325
337
  Create a new session.
@@ -334,7 +346,6 @@ async def create_session(
334
346
 
335
347
  Headers:
336
348
  - X-User-Id: User identifier (owner of the session)
337
- - X-Tenant-Id: Tenant identifier
338
349
 
339
350
  Returns:
340
351
  Created session object
@@ -354,7 +365,7 @@ async def create_session(
354
365
  prompt=request_body.prompt,
355
366
  agent_schema_uri=request_body.agent_schema_uri,
356
367
  user_id=effective_user_id,
357
- tenant_id=x_tenant_id,
368
+ tenant_id="default", # tenant_id not used for filtering, set to default
358
369
  )
359
370
 
360
371
  repo = Repository(Session, table_name="sessions")
@@ -372,7 +383,6 @@ async def create_session(
372
383
  async def get_session(
373
384
  request: Request,
374
385
  session_id: str,
375
- x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
376
386
  ) -> Session:
377
387
  """
378
388
  Get a specific session by ID.
@@ -395,11 +405,11 @@ async def get_session(
395
405
  raise HTTPException(status_code=503, detail="Database not enabled")
396
406
 
397
407
  repo = Repository(Session, table_name="sessions")
398
- session = await repo.get_by_id(session_id, x_tenant_id)
408
+ session = await repo.get_by_id(session_id)
399
409
 
400
410
  if not session:
401
411
  # Try finding by name
402
- sessions = await repo.find({"name": session_id, "tenant_id": x_tenant_id}, limit=1)
412
+ sessions = await repo.find({"name": session_id}, limit=1)
403
413
  if sessions:
404
414
  session = sessions[0]
405
415
  else:
@@ -420,7 +430,6 @@ async def update_session(
420
430
  request: Request,
421
431
  session_id: str,
422
432
  request_body: SessionUpdateRequest,
423
- x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
424
433
  ) -> Session:
425
434
  """
426
435
  Update an existing session.
@@ -450,7 +459,7 @@ async def update_session(
450
459
  raise HTTPException(status_code=503, detail="Database not enabled")
451
460
 
452
461
  repo = Repository(Session, table_name="sessions")
453
- session = await repo.get_by_id(session_id, x_tenant_id)
462
+ session = await repo.get_by_id(session_id)
454
463
 
455
464
  if not session:
456
465
  raise HTTPException(status_code=404, detail=f"Session '{session_id}' not found")
rem/auth/__init__.py CHANGED
@@ -1,26 +1,36 @@
1
1
  """
2
2
  REM Authentication Module.
3
3
 
4
- OAuth 2.1 compliant authentication with support for:
4
+ Authentication with support for:
5
+ - Email passwordless login (verification codes)
5
6
  - Google OAuth
6
7
  - Microsoft Entra ID (Azure AD) OIDC
7
8
  - Custom OIDC providers
8
9
 
9
10
  Design Pattern:
10
11
  - Provider-agnostic base classes
11
- - PKCE (Proof Key for Code Exchange) for all flows
12
+ - PKCE (Proof Key for Code Exchange) for OAuth flows
12
13
  - State parameter for CSRF protection
13
14
  - Nonce for ID token replay protection
14
15
  - Token validation with JWKS
15
- - Clean separation: providers/ for OAuth logic, middleware.py for FastAPI integration
16
+ - Clean separation: providers/ for auth logic, middleware.py for FastAPI integration
17
+
18
+ Email Auth Flow:
19
+ 1. POST /api/auth/email/send-code with {email}
20
+ 2. User receives code via email
21
+ 3. POST /api/auth/email/verify with {email, code}
22
+ 4. Session created, user authenticated
16
23
  """
17
24
 
18
25
  from .providers.base import OAuthProvider
26
+ from .providers.email import EmailAuthProvider, EmailAuthResult
19
27
  from .providers.google import GoogleOAuthProvider
20
28
  from .providers.microsoft import MicrosoftOAuthProvider
21
29
 
22
30
  __all__ = [
23
31
  "OAuthProvider",
32
+ "EmailAuthProvider",
33
+ "EmailAuthResult",
24
34
  "GoogleOAuthProvider",
25
35
  "MicrosoftOAuthProvider",
26
36
  ]