remdb 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +51 -27
- rem/agentic/mcp/tool_wrapper.py +155 -18
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +195 -46
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/main.py +85 -16
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +18 -4
- rem/api/mcp_router/tools.py +394 -16
- rem/api/routers/admin.py +218 -1
- rem/api/routers/chat/completions.py +280 -7
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +177 -3
- rem/api/routers/feedback.py +142 -329
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +13 -13
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +354 -143
- rem/cli/commands/experiments.py +436 -30
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +92 -45
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +29 -6
- rem/config.py +8 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/shared_session.py +2 -28
- rem/registry.py +10 -4
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/content/service.py +30 -8
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/README.md +151 -26
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +7 -0
- rem/services/session/reload.py +1 -1
- rem/settings.py +288 -16
- rem/sql/background_indexes.sql +19 -24
- rem/sql/migrations/001_install.sql +252 -69
- rem/sql/migrations/002_install_models.sql +2197 -619
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/schema_loader.py +110 -15
- rem/utils/sql_paths.py +146 -0
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/METADATA +300 -215
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/RECORD +73 -64
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0
rem/api/routers/chat/models.py
CHANGED
|
@@ -1,14 +1,38 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible API models for chat completions.
|
|
3
3
|
|
|
4
|
-
Design Pattern
|
|
4
|
+
Design Pattern:
|
|
5
5
|
- Full OpenAI compatibility for drop-in replacement
|
|
6
6
|
- Support for streaming (SSE) and non-streaming modes
|
|
7
7
|
- Response format control (text vs json_object)
|
|
8
|
-
- Headers map to AgentContext
|
|
8
|
+
- Headers map to AgentContext for session/context control
|
|
9
|
+
- Body fields for OpenAI-compatible parameters + metadata
|
|
10
|
+
|
|
11
|
+
Headers (context control):
|
|
12
|
+
X-User-Id → context.user_id (user identifier)
|
|
13
|
+
X-Tenant-Id → context.tenant_id (multi-tenancy, default: "default")
|
|
14
|
+
X-Session-Id → context.session_id (conversation continuity)
|
|
15
|
+
X-Agent-Schema → context.agent_schema_uri (which agent to use, default: "rem")
|
|
16
|
+
X-Model-Name → context.default_model (model override)
|
|
17
|
+
X-Chat-Is-Audio → triggers audio transcription ("true"/"false")
|
|
18
|
+
X-Is-Eval → context.is_eval (marks session as evaluation, sets mode=EVALUATION)
|
|
19
|
+
|
|
20
|
+
Body Fields (OpenAI-compatible + extensions):
|
|
21
|
+
model → LLM model (e.g., "openai:gpt-4.1", "anthropic:claude-sonnet-4-5-20250929")
|
|
22
|
+
messages → Chat conversation history
|
|
23
|
+
temperature → Sampling temperature (0-2)
|
|
24
|
+
max_tokens → Max tokens (deprecated, use max_completion_tokens)
|
|
25
|
+
max_completion_tokens → Max tokens to generate
|
|
26
|
+
stream → Enable SSE streaming
|
|
27
|
+
metadata → Key-value pairs merged with session metadata (for evals/experiments)
|
|
28
|
+
store → Whether to store for distillation/evaluation
|
|
29
|
+
seed → Deterministic sampling seed
|
|
30
|
+
top_p → Nucleus sampling probability
|
|
31
|
+
reasoning_effort → low/medium/high for o-series models
|
|
32
|
+
service_tier → auto/flex/priority/default
|
|
9
33
|
"""
|
|
10
34
|
|
|
11
|
-
from typing import Literal
|
|
35
|
+
from typing import Any, Literal
|
|
12
36
|
|
|
13
37
|
from pydantic import BaseModel, Field
|
|
14
38
|
|
|
@@ -46,10 +70,17 @@ class ChatCompletionRequest(BaseModel):
|
|
|
46
70
|
Compatible with OpenAI's /v1/chat/completions endpoint.
|
|
47
71
|
|
|
48
72
|
Headers Map to AgentContext:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
73
|
+
X-User-Id → context.user_id
|
|
74
|
+
X-Tenant-Id → context.tenant_id (default: "default")
|
|
75
|
+
X-Session-Id → context.session_id
|
|
76
|
+
X-Agent-Schema → context.agent_schema_uri (default: "rem")
|
|
77
|
+
X-Model-Name → context.default_model
|
|
78
|
+
X-Chat-Is-Audio → triggers audio transcription
|
|
79
|
+
X-Is-Eval → context.is_eval (sets session mode=EVALUATION)
|
|
80
|
+
|
|
81
|
+
Body Fields for Metadata/Evals:
|
|
82
|
+
metadata → Key-value pairs merged with session metadata
|
|
83
|
+
store → Whether to store for distillation/evaluation
|
|
53
84
|
|
|
54
85
|
Note: Model is specified in body.model (standard OpenAI field), not headers.
|
|
55
86
|
"""
|
|
@@ -73,6 +104,49 @@ class ChatCompletionRequest(BaseModel):
|
|
|
73
104
|
default=None,
|
|
74
105
|
description="Response format. Set type='json_object' to enable JSON mode.",
|
|
75
106
|
)
|
|
107
|
+
# Additional OpenAI-compatible fields
|
|
108
|
+
metadata: dict[str, str] | None = Field(
|
|
109
|
+
default=None,
|
|
110
|
+
description="Key-value pairs attached to the request (max 16 keys, 64/512 char limits). "
|
|
111
|
+
"Merged with session metadata for persistence.",
|
|
112
|
+
)
|
|
113
|
+
store: bool | None = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Whether to store for distillation/evaluation purposes.",
|
|
116
|
+
)
|
|
117
|
+
max_completion_tokens: int | None = Field(
|
|
118
|
+
default=None,
|
|
119
|
+
ge=1,
|
|
120
|
+
description="Max tokens to generate (replaces deprecated max_tokens).",
|
|
121
|
+
)
|
|
122
|
+
seed: int | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description="Seed for deterministic sampling (best effort).",
|
|
125
|
+
)
|
|
126
|
+
top_p: float | None = Field(
|
|
127
|
+
default=None,
|
|
128
|
+
ge=0,
|
|
129
|
+
le=1,
|
|
130
|
+
description="Nucleus sampling probability. Use temperature OR top_p, not both.",
|
|
131
|
+
)
|
|
132
|
+
logprobs: bool | None = Field(
|
|
133
|
+
default=None,
|
|
134
|
+
description="Whether to return log probabilities for output tokens.",
|
|
135
|
+
)
|
|
136
|
+
top_logprobs: int | None = Field(
|
|
137
|
+
default=None,
|
|
138
|
+
ge=0,
|
|
139
|
+
le=20,
|
|
140
|
+
description="Number of most likely tokens to return at each position (requires logprobs=true).",
|
|
141
|
+
)
|
|
142
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = Field(
|
|
143
|
+
default=None,
|
|
144
|
+
description="Reasoning effort for o-series models (low/medium/high).",
|
|
145
|
+
)
|
|
146
|
+
service_tier: Literal["auto", "flex", "priority", "default"] | None = Field(
|
|
147
|
+
default=None,
|
|
148
|
+
description="Service tier for processing (flex is 50% cheaper but slower).",
|
|
149
|
+
)
|
|
76
150
|
|
|
77
151
|
|
|
78
152
|
# Response models
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""OTEL utilities for chat routers."""
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_tracer():
|
|
7
|
+
"""Get the OpenTelemetry tracer for chat completions."""
|
|
8
|
+
try:
|
|
9
|
+
from opentelemetry import trace
|
|
10
|
+
return trace.get_tracer("rem.chat.completions")
|
|
11
|
+
except Exception:
|
|
12
|
+
return None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_current_trace_context() -> tuple[str | None, str | None]:
|
|
16
|
+
"""Get trace_id and span_id from current OTEL context.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
from opentelemetry import trace
|
|
23
|
+
|
|
24
|
+
span = trace.get_current_span()
|
|
25
|
+
ctx = span.get_span_context()
|
|
26
|
+
if ctx.is_valid:
|
|
27
|
+
trace_id = format(ctx.trace_id, '032x')
|
|
28
|
+
span_id = format(ctx.span_id, '016x')
|
|
29
|
+
return trace_id, span_id
|
|
30
|
+
except Exception as e:
|
|
31
|
+
logger.debug(f"Could not get trace context: {e}")
|
|
32
|
+
|
|
33
|
+
return None, None
|
|
@@ -321,7 +321,13 @@ class MetadataEvent(BaseModel):
|
|
|
321
321
|
# Agent info
|
|
322
322
|
agent_schema: str | None = Field(
|
|
323
323
|
default=None,
|
|
324
|
-
description="Name of the agent schema used for this response (e.g., 'rem', '
|
|
324
|
+
description="Name of the agent schema used for this response (e.g., 'rem', 'query-assistant')"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Session info
|
|
328
|
+
session_name: str | None = Field(
|
|
329
|
+
default=None,
|
|
330
|
+
description="Short 1-3 phrase name for the session topic (e.g., 'Prescription Drug Questions', 'AWS Setup Help')"
|
|
325
331
|
)
|
|
326
332
|
|
|
327
333
|
# Quality indicators
|
|
@@ -350,6 +356,16 @@ class MetadataEvent(BaseModel):
|
|
|
350
356
|
description="Token count for this response"
|
|
351
357
|
)
|
|
352
358
|
|
|
359
|
+
# Trace context for observability (deterministic, captured from OTEL)
|
|
360
|
+
trace_id: str | None = Field(
|
|
361
|
+
default=None,
|
|
362
|
+
description="OTEL trace ID for correlating with Phoenix/observability systems"
|
|
363
|
+
)
|
|
364
|
+
span_id: str | None = Field(
|
|
365
|
+
default=None,
|
|
366
|
+
description="OTEL span ID for correlating with Phoenix/observability systems"
|
|
367
|
+
)
|
|
368
|
+
|
|
353
369
|
# System flags
|
|
354
370
|
flags: list[str] | None = Field(
|
|
355
371
|
default=None,
|
|
@@ -47,6 +47,7 @@ from pydantic_ai.messages import (
|
|
|
47
47
|
ToolCallPart,
|
|
48
48
|
)
|
|
49
49
|
|
|
50
|
+
from .otel_utils import get_current_trace_context, get_tracer
|
|
50
51
|
from .models import (
|
|
51
52
|
ChatCompletionMessageDelta,
|
|
52
53
|
ChatCompletionStreamChoice,
|
|
@@ -71,6 +72,10 @@ async def stream_openai_response(
|
|
|
71
72
|
message_id: str | None = None,
|
|
72
73
|
in_reply_to: str | None = None,
|
|
73
74
|
session_id: str | None = None,
|
|
75
|
+
# Agent info for metadata
|
|
76
|
+
agent_schema: str | None = None,
|
|
77
|
+
# Mutable container to capture trace context (deterministic, not AI-dependent)
|
|
78
|
+
trace_context_out: dict | None = None,
|
|
74
79
|
) -> AsyncGenerator[str, None]:
|
|
75
80
|
"""
|
|
76
81
|
Stream Pydantic AI agent responses with rich SSE events.
|
|
@@ -154,6 +159,14 @@ async def stream_openai_response(
|
|
|
154
159
|
|
|
155
160
|
# Use agent.iter() to get complete execution with tool calls
|
|
156
161
|
async with agent.iter(prompt) as agent_run:
|
|
162
|
+
# Capture trace context IMMEDIATELY inside agent execution
|
|
163
|
+
# This is deterministic - it's the OTEL context from Pydantic AI instrumentation
|
|
164
|
+
# NOT dependent on any AI-generated content
|
|
165
|
+
captured_trace_id, captured_span_id = get_current_trace_context()
|
|
166
|
+
if trace_context_out is not None:
|
|
167
|
+
trace_context_out["trace_id"] = captured_trace_id
|
|
168
|
+
trace_context_out["span_id"] = captured_span_id
|
|
169
|
+
|
|
157
170
|
async for node in agent_run:
|
|
158
171
|
# Check if this is a model request node (includes tool calls)
|
|
159
172
|
if Agent.is_model_request_node(node):
|
|
@@ -258,8 +271,6 @@ async def stream_openai_response(
|
|
|
258
271
|
# Queue for completion matching (FIFO)
|
|
259
272
|
pending_tool_completions.append((tool_name, tool_id))
|
|
260
273
|
|
|
261
|
-
logger.info(f"🔧 {tool_name}")
|
|
262
|
-
|
|
263
274
|
# Emit tool_call SSE event (started)
|
|
264
275
|
# Try to get arguments as dict
|
|
265
276
|
args_dict = None
|
|
@@ -269,6 +280,18 @@ async def stream_openai_response(
|
|
|
269
280
|
elif isinstance(event.part.args, dict):
|
|
270
281
|
args_dict = event.part.args
|
|
271
282
|
|
|
283
|
+
# Log tool call with key parameters
|
|
284
|
+
if args_dict and tool_name == "search_rem":
|
|
285
|
+
query_type = args_dict.get("query_type", "?")
|
|
286
|
+
limit = args_dict.get("limit", 20)
|
|
287
|
+
table = args_dict.get("table", "")
|
|
288
|
+
query_text = args_dict.get("query_text", args_dict.get("entity_key", ""))
|
|
289
|
+
if query_text and len(query_text) > 50:
|
|
290
|
+
query_text = query_text[:50] + "..."
|
|
291
|
+
logger.info(f"🔧 {tool_name} {query_type.upper()} '{query_text}' table={table} limit={limit}")
|
|
292
|
+
else:
|
|
293
|
+
logger.info(f"🔧 {tool_name}")
|
|
294
|
+
|
|
272
295
|
yield format_sse_event(ToolCallEvent(
|
|
273
296
|
tool_name=tool_name,
|
|
274
297
|
tool_id=tool_id,
|
|
@@ -354,21 +377,47 @@ async def stream_openai_response(
|
|
|
354
377
|
registered_sources = result_content.get("sources")
|
|
355
378
|
registered_references = result_content.get("references")
|
|
356
379
|
registered_flags = result_content.get("flags")
|
|
380
|
+
# Session naming
|
|
381
|
+
registered_session_name = result_content.get("session_name")
|
|
382
|
+
# Risk assessment fields
|
|
383
|
+
registered_risk_level = result_content.get("risk_level")
|
|
384
|
+
registered_risk_score = result_content.get("risk_score")
|
|
385
|
+
registered_risk_reasoning = result_content.get("risk_reasoning")
|
|
386
|
+
registered_recommended_action = result_content.get("recommended_action")
|
|
387
|
+
# Extra fields
|
|
388
|
+
registered_extra = result_content.get("extra")
|
|
357
389
|
|
|
358
390
|
logger.info(
|
|
359
391
|
f"📊 Metadata registered: confidence={registered_confidence}, "
|
|
360
|
-
f"
|
|
392
|
+
f"session_name={registered_session_name}, "
|
|
393
|
+
f"risk_level={registered_risk_level}, sources={registered_sources}"
|
|
361
394
|
)
|
|
362
395
|
|
|
396
|
+
# Build extra dict with risk fields and any custom extras
|
|
397
|
+
extra_data = {}
|
|
398
|
+
if registered_risk_level is not None:
|
|
399
|
+
extra_data["risk_level"] = registered_risk_level
|
|
400
|
+
if registered_risk_score is not None:
|
|
401
|
+
extra_data["risk_score"] = registered_risk_score
|
|
402
|
+
if registered_risk_reasoning is not None:
|
|
403
|
+
extra_data["risk_reasoning"] = registered_risk_reasoning
|
|
404
|
+
if registered_recommended_action is not None:
|
|
405
|
+
extra_data["recommended_action"] = registered_recommended_action
|
|
406
|
+
if registered_extra:
|
|
407
|
+
extra_data.update(registered_extra)
|
|
408
|
+
|
|
363
409
|
# Emit metadata event immediately
|
|
364
410
|
yield format_sse_event(MetadataEvent(
|
|
365
411
|
message_id=message_id,
|
|
366
412
|
in_reply_to=in_reply_to,
|
|
367
413
|
session_id=session_id,
|
|
414
|
+
agent_schema=agent_schema,
|
|
415
|
+
session_name=registered_session_name,
|
|
368
416
|
confidence=registered_confidence,
|
|
369
417
|
sources=registered_sources,
|
|
370
418
|
model_version=model,
|
|
371
419
|
flags=registered_flags,
|
|
420
|
+
extra=extra_data if extra_data else None,
|
|
372
421
|
hidden=False,
|
|
373
422
|
))
|
|
374
423
|
|
|
@@ -377,6 +426,31 @@ async def stream_openai_response(
|
|
|
377
426
|
result_str = str(result_content)
|
|
378
427
|
result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
|
|
379
428
|
|
|
429
|
+
# Log result count for search_rem
|
|
430
|
+
if tool_name == "search_rem" and isinstance(result_content, dict):
|
|
431
|
+
results = result_content.get("results", {})
|
|
432
|
+
# Handle nested result structure: results may be a dict with 'results' list and 'count'
|
|
433
|
+
if isinstance(results, dict):
|
|
434
|
+
count = results.get("count", len(results.get("results", [])))
|
|
435
|
+
query_type = results.get("query_type", "?")
|
|
436
|
+
query_text = results.get("query_text", results.get("key", ""))
|
|
437
|
+
table = results.get("table_name", "")
|
|
438
|
+
elif isinstance(results, list):
|
|
439
|
+
count = len(results)
|
|
440
|
+
query_type = "?"
|
|
441
|
+
query_text = ""
|
|
442
|
+
table = ""
|
|
443
|
+
else:
|
|
444
|
+
count = "?"
|
|
445
|
+
query_type = "?"
|
|
446
|
+
query_text = ""
|
|
447
|
+
table = ""
|
|
448
|
+
status = result_content.get("status", "unknown")
|
|
449
|
+
# Truncate query text for logging
|
|
450
|
+
if query_text and len(str(query_text)) > 40:
|
|
451
|
+
query_text = str(query_text)[:40] + "..."
|
|
452
|
+
logger.info(f" ↳ {tool_name} {query_type} '{query_text}' table={table} → {count} results")
|
|
453
|
+
|
|
380
454
|
yield format_sse_event(ToolCallEvent(
|
|
381
455
|
tool_name=tool_name,
|
|
382
456
|
tool_id=tool_id,
|
|
@@ -464,10 +538,14 @@ async def stream_openai_response(
|
|
|
464
538
|
message_id=message_id,
|
|
465
539
|
in_reply_to=in_reply_to,
|
|
466
540
|
session_id=session_id,
|
|
541
|
+
agent_schema=agent_schema,
|
|
467
542
|
confidence=1.0, # Default to 100% confidence
|
|
468
543
|
model_version=model,
|
|
469
544
|
latency_ms=latency_ms,
|
|
470
545
|
token_count=token_count,
|
|
546
|
+
# Include deterministic trace context captured from OTEL
|
|
547
|
+
trace_id=captured_trace_id,
|
|
548
|
+
span_id=captured_span_id,
|
|
471
549
|
))
|
|
472
550
|
|
|
473
551
|
# Mark all progress complete
|
|
@@ -606,3 +684,99 @@ async def stream_minimal_simulator(
|
|
|
606
684
|
# Simulator now yields SSE-formatted strings directly (OpenAI-compatible)
|
|
607
685
|
async for sse_string in stream_minimal_demo(content=content, delay_ms=delay_ms):
|
|
608
686
|
yield sse_string
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
async def stream_openai_response_with_save(
|
|
690
|
+
agent: Agent,
|
|
691
|
+
prompt: str,
|
|
692
|
+
model: str,
|
|
693
|
+
request_id: str | None = None,
|
|
694
|
+
agent_schema: str | None = None,
|
|
695
|
+
session_id: str | None = None,
|
|
696
|
+
user_id: str | None = None,
|
|
697
|
+
) -> AsyncGenerator[str, None]:
|
|
698
|
+
"""
|
|
699
|
+
Wrapper around stream_openai_response that saves the assistant response after streaming.
|
|
700
|
+
|
|
701
|
+
This accumulates all text content during streaming and saves it to the database
|
|
702
|
+
after the stream completes.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
agent: Pydantic AI agent instance
|
|
706
|
+
prompt: User prompt
|
|
707
|
+
model: Model name
|
|
708
|
+
request_id: Optional request ID
|
|
709
|
+
agent_schema: Agent schema name
|
|
710
|
+
session_id: Session ID for message storage
|
|
711
|
+
user_id: User ID for message storage
|
|
712
|
+
|
|
713
|
+
Yields:
|
|
714
|
+
SSE-formatted strings
|
|
715
|
+
"""
|
|
716
|
+
from ....utils.date_utils import utc_now, to_iso
|
|
717
|
+
from ....services.session import SessionMessageStore
|
|
718
|
+
from ....settings import settings
|
|
719
|
+
|
|
720
|
+
# Pre-generate message_id so it can be sent in metadata event
|
|
721
|
+
# This allows frontend to use it for feedback before DB persistence
|
|
722
|
+
message_id = str(uuid.uuid4())
|
|
723
|
+
|
|
724
|
+
# Mutable container for capturing trace context from inside agent execution
|
|
725
|
+
# This is deterministic - captured from OTEL instrumentation, not AI-generated
|
|
726
|
+
trace_context: dict = {}
|
|
727
|
+
|
|
728
|
+
# Accumulate content during streaming
|
|
729
|
+
accumulated_content = []
|
|
730
|
+
|
|
731
|
+
async for chunk in stream_openai_response(
|
|
732
|
+
agent=agent,
|
|
733
|
+
prompt=prompt,
|
|
734
|
+
model=model,
|
|
735
|
+
request_id=request_id,
|
|
736
|
+
agent_schema=agent_schema,
|
|
737
|
+
session_id=session_id,
|
|
738
|
+
message_id=message_id,
|
|
739
|
+
trace_context_out=trace_context, # Pass container to capture trace IDs
|
|
740
|
+
):
|
|
741
|
+
yield chunk
|
|
742
|
+
|
|
743
|
+
# Extract text content from OpenAI-format chunks
|
|
744
|
+
# Format: data: {"choices": [{"delta": {"content": "..."}}]}
|
|
745
|
+
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
|
746
|
+
try:
|
|
747
|
+
data_str = chunk[6:].strip() # Remove "data: " prefix
|
|
748
|
+
if data_str:
|
|
749
|
+
data = json.loads(data_str)
|
|
750
|
+
if "choices" in data and data["choices"]:
|
|
751
|
+
delta = data["choices"][0].get("delta", {})
|
|
752
|
+
content = delta.get("content")
|
|
753
|
+
if content:
|
|
754
|
+
accumulated_content.append(content)
|
|
755
|
+
except (json.JSONDecodeError, KeyError, IndexError):
|
|
756
|
+
pass # Skip non-JSON or malformed chunks
|
|
757
|
+
|
|
758
|
+
# After streaming completes, save the assistant response
|
|
759
|
+
if settings.postgres.enabled and session_id and accumulated_content:
|
|
760
|
+
full_content = "".join(accumulated_content)
|
|
761
|
+
# Get captured trace context from container (deterministically captured inside agent execution)
|
|
762
|
+
captured_trace_id = trace_context.get("trace_id")
|
|
763
|
+
captured_span_id = trace_context.get("span_id")
|
|
764
|
+
assistant_message = {
|
|
765
|
+
"id": message_id, # Use pre-generated ID for consistency with metadata event
|
|
766
|
+
"role": "assistant",
|
|
767
|
+
"content": full_content,
|
|
768
|
+
"timestamp": to_iso(utc_now()),
|
|
769
|
+
"trace_id": captured_trace_id,
|
|
770
|
+
"span_id": captured_span_id,
|
|
771
|
+
}
|
|
772
|
+
try:
|
|
773
|
+
store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
|
|
774
|
+
await store.store_session_messages(
|
|
775
|
+
session_id=session_id,
|
|
776
|
+
messages=[assistant_message],
|
|
777
|
+
user_id=user_id,
|
|
778
|
+
compress=True, # Compress long assistant responses
|
|
779
|
+
)
|
|
780
|
+
logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
|
|
781
|
+
except Exception as e:
|
|
782
|
+
logger.error(f"Failed to save assistant response: {e}", exc_info=True)
|