remdb 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (74) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +51 -27
  3. rem/agentic/mcp/tool_wrapper.py +155 -18
  4. rem/agentic/otel/setup.py +93 -4
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +195 -46
  7. rem/agentic/schema.py +361 -21
  8. rem/agentic/tools/rem_tools.py +3 -3
  9. rem/api/main.py +85 -16
  10. rem/api/mcp_router/resources.py +1 -1
  11. rem/api/mcp_router/server.py +18 -4
  12. rem/api/mcp_router/tools.py +394 -16
  13. rem/api/routers/admin.py +218 -1
  14. rem/api/routers/chat/completions.py +280 -7
  15. rem/api/routers/chat/models.py +81 -7
  16. rem/api/routers/chat/otel_utils.py +33 -0
  17. rem/api/routers/chat/sse_events.py +17 -1
  18. rem/api/routers/chat/streaming.py +177 -3
  19. rem/api/routers/feedback.py +142 -329
  20. rem/api/routers/query.py +360 -0
  21. rem/api/routers/shared_sessions.py +13 -13
  22. rem/cli/commands/README.md +237 -64
  23. rem/cli/commands/cluster.py +1808 -0
  24. rem/cli/commands/configure.py +4 -7
  25. rem/cli/commands/db.py +354 -143
  26. rem/cli/commands/experiments.py +436 -30
  27. rem/cli/commands/process.py +14 -8
  28. rem/cli/commands/schema.py +92 -45
  29. rem/cli/commands/session.py +336 -0
  30. rem/cli/dreaming.py +2 -2
  31. rem/cli/main.py +29 -6
  32. rem/config.py +8 -1
  33. rem/models/core/experiment.py +54 -0
  34. rem/models/core/rem_query.py +5 -2
  35. rem/models/entities/ontology.py +1 -1
  36. rem/models/entities/ontology_config.py +1 -1
  37. rem/models/entities/shared_session.py +2 -28
  38. rem/registry.py +10 -4
  39. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  40. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  41. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  42. rem/services/content/service.py +30 -8
  43. rem/services/embeddings/api.py +4 -4
  44. rem/services/embeddings/worker.py +16 -16
  45. rem/services/phoenix/client.py +59 -18
  46. rem/services/postgres/README.md +151 -26
  47. rem/services/postgres/__init__.py +2 -1
  48. rem/services/postgres/diff_service.py +531 -0
  49. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  50. rem/services/postgres/schema_generator.py +205 -4
  51. rem/services/postgres/service.py +6 -6
  52. rem/services/rem/parser.py +44 -9
  53. rem/services/rem/service.py +36 -2
  54. rem/services/session/compression.py +7 -0
  55. rem/services/session/reload.py +1 -1
  56. rem/settings.py +288 -16
  57. rem/sql/background_indexes.sql +19 -24
  58. rem/sql/migrations/001_install.sql +252 -69
  59. rem/sql/migrations/002_install_models.sql +2197 -619
  60. rem/sql/migrations/003_optional_extensions.sql +326 -0
  61. rem/sql/migrations/004_cache_system.sql +548 -0
  62. rem/utils/__init__.py +18 -0
  63. rem/utils/date_utils.py +2 -2
  64. rem/utils/schema_loader.py +110 -15
  65. rem/utils/sql_paths.py +146 -0
  66. rem/utils/vision.py +1 -1
  67. rem/workers/__init__.py +3 -1
  68. rem/workers/db_listener.py +579 -0
  69. rem/workers/unlogged_maintainer.py +463 -0
  70. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/METADATA +300 -215
  71. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/RECORD +73 -64
  72. rem/sql/migrations/003_seed_default_user.sql +0 -48
  73. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
  74. {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,38 @@
1
1
  """
2
2
  OpenAI-compatible API models for chat completions.
3
3
 
4
- Design Pattern
4
+ Design Pattern:
5
5
  - Full OpenAI compatibility for drop-in replacement
6
6
  - Support for streaming (SSE) and non-streaming modes
7
7
  - Response format control (text vs json_object)
8
- - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Agent-Schema, etc.)
8
+ - Headers map to AgentContext for session/context control
9
+ - Body fields for OpenAI-compatible parameters + metadata
10
+
11
+ Headers (context control):
12
+ X-User-Id → context.user_id (user identifier)
13
+ X-Tenant-Id → context.tenant_id (multi-tenancy, default: "default")
14
+ X-Session-Id → context.session_id (conversation continuity)
15
+ X-Agent-Schema → context.agent_schema_uri (which agent to use, default: "rem")
16
+ X-Model-Name → context.default_model (model override)
17
+ X-Chat-Is-Audio → triggers audio transcription ("true"/"false")
18
+ X-Is-Eval → context.is_eval (marks session as evaluation, sets mode=EVALUATION)
19
+
20
+ Body Fields (OpenAI-compatible + extensions):
21
+ model → LLM model (e.g., "openai:gpt-4.1", "anthropic:claude-sonnet-4-5-20250929")
22
+ messages → Chat conversation history
23
+ temperature → Sampling temperature (0-2)
24
+ max_tokens → Max tokens (deprecated, use max_completion_tokens)
25
+ max_completion_tokens → Max tokens to generate
26
+ stream → Enable SSE streaming
27
+ metadata → Key-value pairs merged with session metadata (for evals/experiments)
28
+ store → Whether to store for distillation/evaluation
29
+ seed → Deterministic sampling seed
30
+ top_p → Nucleus sampling probability
31
+ reasoning_effort → low/medium/high for o-series models
32
+ service_tier → auto/flex/priority/default
9
33
  """
10
34
 
11
- from typing import Literal
35
+ from typing import Any, Literal
12
36
 
13
37
  from pydantic import BaseModel, Field
14
38
 
@@ -46,10 +70,17 @@ class ChatCompletionRequest(BaseModel):
46
70
  Compatible with OpenAI's /v1/chat/completions endpoint.
47
71
 
48
72
  Headers Map to AgentContext:
49
- - X-User-Id → context.user_id
50
- - X-Tenant-Id → context.tenant_id
51
- - X-Session-Id → context.session_id
52
- - X-Agent-Schema → context.agent_schema_uri
73
+ X-User-Id → context.user_id
74
+ X-Tenant-Id → context.tenant_id (default: "default")
75
+ X-Session-Id → context.session_id
76
+ X-Agent-Schema → context.agent_schema_uri (default: "rem")
77
+ X-Model-Name → context.default_model
78
+ X-Chat-Is-Audio → triggers audio transcription
79
+ X-Is-Eval → context.is_eval (sets session mode=EVALUATION)
80
+
81
+ Body Fields for Metadata/Evals:
82
+ metadata → Key-value pairs merged with session metadata
83
+ store → Whether to store for distillation/evaluation
53
84
 
54
85
  Note: Model is specified in body.model (standard OpenAI field), not headers.
55
86
  """
@@ -73,6 +104,49 @@ class ChatCompletionRequest(BaseModel):
73
104
  default=None,
74
105
  description="Response format. Set type='json_object' to enable JSON mode.",
75
106
  )
107
+ # Additional OpenAI-compatible fields
108
+ metadata: dict[str, str] | None = Field(
109
+ default=None,
110
+ description="Key-value pairs attached to the request (max 16 keys, 64/512 char limits). "
111
+ "Merged with session metadata for persistence.",
112
+ )
113
+ store: bool | None = Field(
114
+ default=None,
115
+ description="Whether to store for distillation/evaluation purposes.",
116
+ )
117
+ max_completion_tokens: int | None = Field(
118
+ default=None,
119
+ ge=1,
120
+ description="Max tokens to generate (replaces deprecated max_tokens).",
121
+ )
122
+ seed: int | None = Field(
123
+ default=None,
124
+ description="Seed for deterministic sampling (best effort).",
125
+ )
126
+ top_p: float | None = Field(
127
+ default=None,
128
+ ge=0,
129
+ le=1,
130
+ description="Nucleus sampling probability. Use temperature OR top_p, not both.",
131
+ )
132
+ logprobs: bool | None = Field(
133
+ default=None,
134
+ description="Whether to return log probabilities for output tokens.",
135
+ )
136
+ top_logprobs: int | None = Field(
137
+ default=None,
138
+ ge=0,
139
+ le=20,
140
+ description="Number of most likely tokens to return at each position (requires logprobs=true).",
141
+ )
142
+ reasoning_effort: Literal["low", "medium", "high"] | None = Field(
143
+ default=None,
144
+ description="Reasoning effort for o-series models (low/medium/high).",
145
+ )
146
+ service_tier: Literal["auto", "flex", "priority", "default"] | None = Field(
147
+ default=None,
148
+ description="Service tier for processing (flex is 50% cheaper but slower).",
149
+ )
76
150
 
77
151
 
78
152
  # Response models
@@ -0,0 +1,33 @@
1
+ """OTEL utilities for chat routers."""
2
+
3
+ from loguru import logger
4
+
5
+
6
+ def get_tracer():
7
+ """Get the OpenTelemetry tracer for chat completions."""
8
+ try:
9
+ from opentelemetry import trace
10
+ return trace.get_tracer("rem.chat.completions")
11
+ except Exception:
12
+ return None
13
+
14
+
15
+ def get_current_trace_context() -> tuple[str | None, str | None]:
16
+ """Get trace_id and span_id from current OTEL context.
17
+
18
+ Returns:
19
+ Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
20
+ """
21
+ try:
22
+ from opentelemetry import trace
23
+
24
+ span = trace.get_current_span()
25
+ ctx = span.get_span_context()
26
+ if ctx.is_valid:
27
+ trace_id = format(ctx.trace_id, '032x')
28
+ span_id = format(ctx.span_id, '016x')
29
+ return trace_id, span_id
30
+ except Exception as e:
31
+ logger.debug(f"Could not get trace context: {e}")
32
+
33
+ return None, None
@@ -321,7 +321,13 @@ class MetadataEvent(BaseModel):
321
321
  # Agent info
322
322
  agent_schema: str | None = Field(
323
323
  default=None,
324
- description="Name of the agent schema used for this response (e.g., 'rem', 'Siggy')"
324
+ description="Name of the agent schema used for this response (e.g., 'rem', 'query-assistant')"
325
+ )
326
+
327
+ # Session info
328
+ session_name: str | None = Field(
329
+ default=None,
330
+ description="Short 1-3 phrase name for the session topic (e.g., 'Prescription Drug Questions', 'AWS Setup Help')"
325
331
  )
326
332
 
327
333
  # Quality indicators
@@ -350,6 +356,16 @@ class MetadataEvent(BaseModel):
350
356
  description="Token count for this response"
351
357
  )
352
358
 
359
+ # Trace context for observability (deterministic, captured from OTEL)
360
+ trace_id: str | None = Field(
361
+ default=None,
362
+ description="OTEL trace ID for correlating with Phoenix/observability systems"
363
+ )
364
+ span_id: str | None = Field(
365
+ default=None,
366
+ description="OTEL span ID for correlating with Phoenix/observability systems"
367
+ )
368
+
353
369
  # System flags
354
370
  flags: list[str] | None = Field(
355
371
  default=None,
@@ -47,6 +47,7 @@ from pydantic_ai.messages import (
47
47
  ToolCallPart,
48
48
  )
49
49
 
50
+ from .otel_utils import get_current_trace_context, get_tracer
50
51
  from .models import (
51
52
  ChatCompletionMessageDelta,
52
53
  ChatCompletionStreamChoice,
@@ -71,6 +72,10 @@ async def stream_openai_response(
71
72
  message_id: str | None = None,
72
73
  in_reply_to: str | None = None,
73
74
  session_id: str | None = None,
75
+ # Agent info for metadata
76
+ agent_schema: str | None = None,
77
+ # Mutable container to capture trace context (deterministic, not AI-dependent)
78
+ trace_context_out: dict | None = None,
74
79
  ) -> AsyncGenerator[str, None]:
75
80
  """
76
81
  Stream Pydantic AI agent responses with rich SSE events.
@@ -154,6 +159,14 @@ async def stream_openai_response(
154
159
 
155
160
  # Use agent.iter() to get complete execution with tool calls
156
161
  async with agent.iter(prompt) as agent_run:
162
+ # Capture trace context IMMEDIATELY inside agent execution
163
+ # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
164
+ # NOT dependent on any AI-generated content
165
+ captured_trace_id, captured_span_id = get_current_trace_context()
166
+ if trace_context_out is not None:
167
+ trace_context_out["trace_id"] = captured_trace_id
168
+ trace_context_out["span_id"] = captured_span_id
169
+
157
170
  async for node in agent_run:
158
171
  # Check if this is a model request node (includes tool calls)
159
172
  if Agent.is_model_request_node(node):
@@ -258,8 +271,6 @@ async def stream_openai_response(
258
271
  # Queue for completion matching (FIFO)
259
272
  pending_tool_completions.append((tool_name, tool_id))
260
273
 
261
- logger.info(f"🔧 {tool_name}")
262
-
263
274
  # Emit tool_call SSE event (started)
264
275
  # Try to get arguments as dict
265
276
  args_dict = None
@@ -269,6 +280,18 @@ async def stream_openai_response(
269
280
  elif isinstance(event.part.args, dict):
270
281
  args_dict = event.part.args
271
282
 
283
+ # Log tool call with key parameters
284
+ if args_dict and tool_name == "search_rem":
285
+ query_type = args_dict.get("query_type", "?")
286
+ limit = args_dict.get("limit", 20)
287
+ table = args_dict.get("table", "")
288
+ query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
289
+ if query_text and len(query_text) > 50:
290
+ query_text = query_text[:50] + "..."
291
+ logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
292
+ else:
293
+ logger.info(f"🔧 {tool_name}")
294
+
272
295
  yield format_sse_event(ToolCallEvent(
273
296
  tool_name=tool_name,
274
297
  tool_id=tool_id,
@@ -354,21 +377,47 @@ async def stream_openai_response(
354
377
  registered_sources = result_content.get("sources")
355
378
  registered_references = result_content.get("references")
356
379
  registered_flags = result_content.get("flags")
380
+ # Session naming
381
+ registered_session_name = result_content.get("session_name")
382
+ # Risk assessment fields
383
+ registered_risk_level = result_content.get("risk_level")
384
+ registered_risk_score = result_content.get("risk_score")
385
+ registered_risk_reasoning = result_content.get("risk_reasoning")
386
+ registered_recommended_action = result_content.get("recommended_action")
387
+ # Extra fields
388
+ registered_extra = result_content.get("extra")
357
389
 
358
390
  logger.info(
359
391
  f"📊 Metadata registered: confidence={registered_confidence}, "
360
- f"sources={registered_sources}"
392
+ f"session_name={registered_session_name}, "
393
+ f"risk_level={registered_risk_level}, sources={registered_sources}"
361
394
  )
362
395
 
396
+ # Build extra dict with risk fields and any custom extras
397
+ extra_data = {}
398
+ if registered_risk_level is not None:
399
+ extra_data["risk_level"] = registered_risk_level
400
+ if registered_risk_score is not None:
401
+ extra_data["risk_score"] = registered_risk_score
402
+ if registered_risk_reasoning is not None:
403
+ extra_data["risk_reasoning"] = registered_risk_reasoning
404
+ if registered_recommended_action is not None:
405
+ extra_data["recommended_action"] = registered_recommended_action
406
+ if registered_extra:
407
+ extra_data.update(registered_extra)
408
+
363
409
  # Emit metadata event immediately
364
410
  yield format_sse_event(MetadataEvent(
365
411
  message_id=message_id,
366
412
  in_reply_to=in_reply_to,
367
413
  session_id=session_id,
414
+ agent_schema=agent_schema,
415
+ session_name=registered_session_name,
368
416
  confidence=registered_confidence,
369
417
  sources=registered_sources,
370
418
  model_version=model,
371
419
  flags=registered_flags,
420
+ extra=extra_data if extra_data else None,
372
421
  hidden=False,
373
422
  ))
374
423
 
@@ -377,6 +426,31 @@ async def stream_openai_response(
377
426
  result_str = str(result_content)
378
427
  result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
379
428
 
429
+ # Log result count for search_rem
430
+ if tool_name == "search_rem" and isinstance(result_content, dict):
431
+ results = result_content.get("results", {})
432
+ # Handle nested result structure: results may be a dict with 'results' list and 'count'
433
+ if isinstance(results, dict):
434
+ count = results.get("count", len(results.get("results", [])))
435
+ query_type = results.get("query_type", "?")
436
+ query_text = results.get("query_text", results.get("key", ""))
437
+ table = results.get("table_name", "")
438
+ elif isinstance(results, list):
439
+ count = len(results)
440
+ query_type = "?"
441
+ query_text = ""
442
+ table = ""
443
+ else:
444
+ count = "?"
445
+ query_type = "?"
446
+ query_text = ""
447
+ table = ""
448
+ status = result_content.get("status", "unknown")
449
+ # Truncate query text for logging
450
+ if query_text and len(str(query_text)) > 40:
451
+ query_text = str(query_text)[:40] + "..."
452
+ logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
453
+
380
454
  yield format_sse_event(ToolCallEvent(
381
455
  tool_name=tool_name,
382
456
  tool_id=tool_id,
@@ -464,10 +538,14 @@ async def stream_openai_response(
464
538
  message_id=message_id,
465
539
  in_reply_to=in_reply_to,
466
540
  session_id=session_id,
541
+ agent_schema=agent_schema,
467
542
  confidence=1.0, # Default to 100% confidence
468
543
  model_version=model,
469
544
  latency_ms=latency_ms,
470
545
  token_count=token_count,
546
+ # Include deterministic trace context captured from OTEL
547
+ trace_id=captured_trace_id,
548
+ span_id=captured_span_id,
471
549
  ))
472
550
 
473
551
  # Mark all progress complete
@@ -606,3 +684,99 @@ async def stream_minimal_simulator(
606
684
  # Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
607
685
  async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
608
686
  yield sse_string
687
+
688
+
689
+ async def stream_openai_response_with_save(
690
+ agent: Agent,
691
+ prompt: str,
692
+ model: str,
693
+ request_id: str | None = None,
694
+ agent_schema: str | None = None,
695
+ session_id: str | None = None,
696
+ user_id: str | None = None,
697
+ ) -> AsyncGenerator[str, None]:
698
+ """
699
+ Wrapper around stream_openai_response that saves the assistant response after streaming.
700
+
701
+ This accumulates all text content during streaming and saves it to the database
702
+ after the stream completes.
703
+
704
+ Args:
705
+ agent: Pydantic AI agent instance
706
+ prompt: User prompt
707
+ model: Model name
708
+ request_id: Optional request ID
709
+ agent_schema: Agent schema name
710
+ session_id: Session ID for message storage
711
+ user_id: User ID for message storage
712
+
713
+ Yields:
714
+ SSE-formatted strings
715
+ """
716
+ from ....utils.date_utils import utc_now, to_iso
717
+ from ....services.session import SessionMessageStore
718
+ from ....settings import settings
719
+
720
+ # Pre-generate message_id so it can be sent in metadata event
721
+ # This allows frontend to use it for feedback before DB persistence
722
+ message_id = str(uuid.uuid4())
723
+
724
+ # Mutable container for capturing trace context from inside agent execution
725
+ # This is deterministic - captured from OTEL instrumentation, not AI-generated
726
+ trace_context: dict = {}
727
+
728
+ # Accumulate content during streaming
729
+ accumulated_content = []
730
+
731
+ async for chunk in stream_openai_response(
732
+ agent=agent,
733
+ prompt=prompt,
734
+ model=model,
735
+ request_id=request_id,
736
+ agent_schema=agent_schema,
737
+ session_id=session_id,
738
+ message_id=message_id,
739
+ trace_context_out=trace_context, # Pass container to capture trace IDs
740
+ ):
741
+ yield chunk
742
+
743
+ # Extract text content from OpenAI-format chunks
744
+ # Format: data: {"choices": [{"delta": {"content": "..."}}]}
745
+ if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
746
+ try:
747
+ data_str = chunk[6:].strip() # Remove "data: " prefix
748
+ if data_str:
749
+ data = json.loads(data_str)
750
+ if "choices" in data and data["choices"]:
751
+ delta = data["choices"][0].get("delta", {})
752
+ content = delta.get("content")
753
+ if content:
754
+ accumulated_content.append(content)
755
+ except (json.JSONDecodeError, KeyError, IndexError):
756
+ pass # Skip non-JSON or malformed chunks
757
+
758
+ # After streaming completes, save the assistant response
759
+ if settings.postgres.enabled and session_id and accumulated_content:
760
+ full_content = "".join(accumulated_content)
761
+ # Get captured trace context from container (deterministically captured inside agent execution)
762
+ captured_trace_id = trace_context.get("trace_id")
763
+ captured_span_id = trace_context.get("span_id")
764
+ assistant_message = {
765
+ "id": message_id, # Use pre-generated ID for consistency with metadata event
766
+ "role": "assistant",
767
+ "content": full_content,
768
+ "timestamp": to_iso(utc_now()),
769
+ "trace_id": captured_trace_id,
770
+ "span_id": captured_span_id,
771
+ }
772
+ try:
773
+ store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
774
+ await store.store_session_messages(
775
+ session_id=session_id,
776
+ messages=[assistant_message],
777
+ user_id=user_id,
778
+ compress=True, # Compress long assistant responses
779
+ )
780
+ logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
781
+ except Exception as e:
782
+ logger.error(f"Failed to save assistant response: {e}", exc_info=True)