remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +32 -2
- rem/agentic/agents/agent_manager.py +310 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/context_builder.py +5 -3
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +155 -18
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +280 -57
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +28 -5
- rem/api/mcp_router/tools.py +555 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +278 -4
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +697 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/__init__.py +13 -3
- rem/auth/middleware.py +186 -22
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +58 -14
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +25 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/registry.py +10 -4
- rem/schemas/agents/core/agent-builder.yaml +134 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +92 -19
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +459 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +197 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +547 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +137 -51
- rem/services/session/reload.py +15 -8
- rem/settings.py +515 -27
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2304 -377
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/files.py +157 -1
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,94 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible chat completions router for REM.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
Quick Start (Local Development)
|
|
5
|
+
===============================
|
|
6
|
+
|
|
7
|
+
NOTE: Local dev uses LOCAL databases (Postgres via Docker Compose on port 5050).
|
|
8
|
+
Do NOT port-forward databases. Only port-forward observability services.
|
|
9
|
+
|
|
10
|
+
IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
|
|
11
|
+
storage issues and feedback will not work correctly.
|
|
12
|
+
|
|
13
|
+
1. Port Forwarding (REQUIRED for trace capture and Phoenix sync):
|
|
14
|
+
|
|
15
|
+
# Terminal 1: OTEL Collector (HTTP) - sends traces to Phoenix
|
|
16
|
+
kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
|
|
17
|
+
|
|
18
|
+
# Terminal 2: Phoenix UI - view traces at http://localhost:6006
|
|
19
|
+
kubectl port-forward -n siggy svc/phoenix 6006:6006
|
|
20
|
+
|
|
21
|
+
2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
|
|
22
|
+
|
|
23
|
+
export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
|
|
24
|
+
-o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
|
|
25
|
+
|
|
26
|
+
3. Start API with OTEL and Phoenix enabled:
|
|
27
|
+
|
|
28
|
+
cd /path/to/remstack/rem
|
|
29
|
+
source .venv/bin/activate
|
|
30
|
+
OTEL__ENABLED=true \\
|
|
31
|
+
PHOENIX__ENABLED=true \\
|
|
32
|
+
PHOENIX_API_KEY="$PHOENIX_API_KEY" \\
|
|
33
|
+
uvicorn rem.api.main:app --host 0.0.0.0 --port 8000 --app-dir src
|
|
34
|
+
|
|
35
|
+
4. Test Chat Request (session_id MUST be a UUID):
|
|
36
|
+
|
|
37
|
+
SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
|
|
38
|
+
curl -s -N -X POST http://localhost:8000/api/v1/chat/completions \\
|
|
39
|
+
-H 'Content-Type: application/json' \\
|
|
40
|
+
-H "X-Session-Id: $SESSION_ID" \\
|
|
41
|
+
-H 'X-Agent-Schema: rem' \\
|
|
42
|
+
-d '{"messages": [{"role": "user", "content": "Hello"}], "stream": true}'
|
|
43
|
+
|
|
44
|
+
# Note: Use 'rem' agent schema (default) for real LLM responses.
|
|
45
|
+
# The 'simulator' agent is for testing SSE events without LLM calls.
|
|
46
|
+
|
|
47
|
+
5. Submit Feedback on Response:
|
|
48
|
+
|
|
49
|
+
The metadata SSE event contains message_id and trace_id for feedback:
|
|
50
|
+
event: metadata
|
|
51
|
+
data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
|
|
52
|
+
|
|
53
|
+
Use session_id (UUID you generated) and message_id to submit feedback:
|
|
54
|
+
|
|
55
|
+
curl -X POST http://localhost:8000/api/v1/messages/feedback \\
|
|
56
|
+
-H 'Content-Type: application/json' \\
|
|
57
|
+
-H 'X-Tenant-Id: default' \\
|
|
58
|
+
-d '{
|
|
59
|
+
"session_id": "<your-uuid-session-id>",
|
|
60
|
+
"message_id": "<message-id-from-metadata>",
|
|
61
|
+
"rating": 1,
|
|
62
|
+
"categories": ["helpful"],
|
|
63
|
+
"comment": "Good response"
|
|
64
|
+
}'
|
|
65
|
+
|
|
66
|
+
Expected response (201 = synced to Phoenix):
|
|
67
|
+
{"phoenix_synced": true, "trace_id": "e53c701c...", "span_id": "6432d497..."}
|
|
68
|
+
|
|
69
|
+
OTEL Architecture
|
|
70
|
+
=================
|
|
71
|
+
|
|
72
|
+
REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
|
|
73
|
+
(port 4318) (k8s: observability) (k8s: siggy)
|
|
74
|
+
|
|
75
|
+
Environment Variables:
|
|
76
|
+
OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
|
|
77
|
+
PHOENIX__ENABLED=true Enable Phoenix integration (required for feedback sync)
|
|
78
|
+
PHOENIX_API_KEY=<jwt> Phoenix API key (required for feedback->Phoenix sync)
|
|
79
|
+
OTEL__COLLECTOR_ENDPOINT Default: http://localhost:4318
|
|
80
|
+
OTEL__PROTOCOL Default: http (use port 4318, not gRPC 4317)
|
|
81
|
+
|
|
82
|
+
Design Pattern
|
|
83
|
+
==============
|
|
84
|
+
|
|
85
|
+
- Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema, X-Is-Eval)
|
|
6
86
|
- ContextBuilder centralizes message construction with user profile + session history
|
|
7
87
|
- Body.model is the LLM model for Pydantic AI
|
|
8
88
|
- X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
|
|
9
89
|
- Support for streaming (SSE) and non-streaming modes
|
|
10
90
|
- Response format control (text vs json_object)
|
|
91
|
+
- OpenAI-compatible body fields: metadata, store, reasoning_effort, etc.
|
|
11
92
|
|
|
12
93
|
Context Building Flow:
|
|
13
94
|
1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
|
|
@@ -25,9 +106,10 @@ Context Building Flow:
|
|
|
25
106
|
Headers Mapping
|
|
26
107
|
X-User-Id → AgentContext.user_id
|
|
27
108
|
X-Tenant-Id → AgentContext.tenant_id
|
|
28
|
-
X-Session-Id → AgentContext.session_id
|
|
109
|
+
X-Session-Id → AgentContext.session_id (use UUID for new sessions)
|
|
29
110
|
X-Model-Name → AgentContext.default_model (overrides body.model)
|
|
30
111
|
X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
|
|
112
|
+
X-Is-Eval → AgentContext.is_eval (sets session mode to EVALUATION)
|
|
31
113
|
|
|
32
114
|
Default Agent:
|
|
33
115
|
If X-Agent-Schema header is not provided, the system loads 'rem' schema,
|
|
@@ -42,6 +124,7 @@ Example Request:
|
|
|
42
124
|
POST /api/v1/chat/completions
|
|
43
125
|
X-Tenant-Id: acme-corp
|
|
44
126
|
X-User-Id: user123
|
|
127
|
+
X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890 # UUID
|
|
45
128
|
X-Agent-Schema: rem # Optional, this is the default
|
|
46
129
|
|
|
47
130
|
{
|
|
@@ -67,10 +150,12 @@ from loguru import logger
|
|
|
67
150
|
from ....agentic.context import AgentContext
|
|
68
151
|
from ....agentic.context_builder import ContextBuilder
|
|
69
152
|
from ....agentic.providers.pydantic_ai import create_agent
|
|
153
|
+
from ....models.entities.session import Session, SessionMode
|
|
70
154
|
from ....services.audio.transcriber import AudioTranscriber
|
|
155
|
+
from ....services.postgres.repository import Repository
|
|
71
156
|
from ....services.session import SessionMessageStore, reload_session
|
|
72
157
|
from ....settings import settings
|
|
73
|
-
from ....utils.schema_loader import load_agent_schema
|
|
158
|
+
from ....utils.schema_loader import load_agent_schema, load_agent_schema_async
|
|
74
159
|
from .json_utils import extract_json_resilient
|
|
75
160
|
from .models import (
|
|
76
161
|
ChatCompletionChoice,
|
|
@@ -79,14 +164,113 @@ from .models import (
|
|
|
79
164
|
ChatCompletionUsage,
|
|
80
165
|
ChatMessage,
|
|
81
166
|
)
|
|
82
|
-
from .streaming import stream_openai_response
|
|
167
|
+
from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response
|
|
83
168
|
|
|
84
|
-
router = APIRouter(prefix="/v1", tags=["chat"])
|
|
169
|
+
router = APIRouter(prefix="/api/v1", tags=["chat"])
|
|
85
170
|
|
|
86
171
|
# Default agent schema file
|
|
87
172
|
DEFAULT_AGENT_SCHEMA = "rem"
|
|
88
173
|
|
|
89
174
|
|
|
175
|
+
def get_current_trace_context() -> tuple[str | None, str | None]:
|
|
176
|
+
"""Get trace_id and span_id from current OTEL context.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
from opentelemetry import trace
|
|
183
|
+
span = trace.get_current_span()
|
|
184
|
+
if span and span.get_span_context().is_valid:
|
|
185
|
+
ctx = span.get_span_context()
|
|
186
|
+
trace_id = format(ctx.trace_id, '032x')
|
|
187
|
+
span_id = format(ctx.span_id, '016x')
|
|
188
|
+
return trace_id, span_id
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
return None, None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_tracer():
|
|
195
|
+
"""Get the OpenTelemetry tracer for chat completions."""
|
|
196
|
+
try:
|
|
197
|
+
from opentelemetry import trace
|
|
198
|
+
return trace.get_tracer("rem.chat.completions")
|
|
199
|
+
except Exception:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def ensure_session_with_metadata(
|
|
204
|
+
session_id: str,
|
|
205
|
+
user_id: str | None,
|
|
206
|
+
tenant_id: str,
|
|
207
|
+
is_eval: bool,
|
|
208
|
+
request_metadata: dict[str, str] | None,
|
|
209
|
+
agent_schema: str | None = None,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Ensure session exists and update with metadata/mode.
|
|
213
|
+
|
|
214
|
+
If X-Is-Eval header is true, sets session mode to EVALUATION.
|
|
215
|
+
Merges request metadata with existing session metadata.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
session_id: Session identifier (maps to Session.name)
|
|
219
|
+
user_id: User identifier
|
|
220
|
+
tenant_id: Tenant identifier
|
|
221
|
+
is_eval: Whether this is an evaluation session
|
|
222
|
+
request_metadata: Metadata from request body to merge
|
|
223
|
+
agent_schema: Optional agent schema being used
|
|
224
|
+
"""
|
|
225
|
+
if not settings.postgres.enabled:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
repo = Repository(Session, table_name="sessions")
|
|
230
|
+
|
|
231
|
+
# Try to load existing session by name (session_id is the name field)
|
|
232
|
+
existing_list = await repo.find(
|
|
233
|
+
filters={"name": session_id, "tenant_id": tenant_id},
|
|
234
|
+
limit=1,
|
|
235
|
+
)
|
|
236
|
+
existing = existing_list[0] if existing_list else None
|
|
237
|
+
|
|
238
|
+
if existing:
|
|
239
|
+
# Merge metadata if provided
|
|
240
|
+
merged_metadata = existing.metadata or {}
|
|
241
|
+
if request_metadata:
|
|
242
|
+
merged_metadata.update(request_metadata)
|
|
243
|
+
|
|
244
|
+
# Update session if eval flag or new metadata
|
|
245
|
+
needs_update = False
|
|
246
|
+
if is_eval and existing.mode != SessionMode.EVALUATION:
|
|
247
|
+
existing.mode = SessionMode.EVALUATION
|
|
248
|
+
needs_update = True
|
|
249
|
+
if request_metadata:
|
|
250
|
+
existing.metadata = merged_metadata
|
|
251
|
+
needs_update = True
|
|
252
|
+
|
|
253
|
+
if needs_update:
|
|
254
|
+
await repo.upsert(existing)
|
|
255
|
+
logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
|
|
256
|
+
else:
|
|
257
|
+
# Create new session
|
|
258
|
+
session = Session(
|
|
259
|
+
name=session_id,
|
|
260
|
+
mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
|
|
261
|
+
user_id=user_id,
|
|
262
|
+
tenant_id=tenant_id,
|
|
263
|
+
agent_schema_uri=agent_schema,
|
|
264
|
+
metadata=request_metadata or {},
|
|
265
|
+
)
|
|
266
|
+
await repo.upsert(session)
|
|
267
|
+
logger.info(f"Created session {session_id} (eval={is_eval})")
|
|
268
|
+
|
|
269
|
+
except Exception as e:
|
|
270
|
+
# Non-critical - log but don't fail the request
|
|
271
|
+
logger.error(f"Failed to ensure session metadata: {e}", exc_info=True)
|
|
272
|
+
|
|
273
|
+
|
|
90
274
|
@router.post("/chat/completions", response_model=None)
|
|
91
275
|
async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
92
276
|
"""
|
|
@@ -102,6 +286,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
102
286
|
| X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
|
|
103
287
|
| X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
|
|
104
288
|
| X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
|
|
289
|
+
| X-Is-Eval | Mark as evaluation session | AgentContext.is_eval | false |
|
|
290
|
+
|
|
291
|
+
Additional OpenAI-compatible Body Fields:
|
|
292
|
+
- metadata: Key-value pairs merged with session metadata (max 16 keys)
|
|
293
|
+
- store: Whether to store for distillation/evaluation
|
|
294
|
+
- max_completion_tokens: Max tokens to generate (replaces max_tokens)
|
|
295
|
+
- seed: Seed for deterministic sampling
|
|
296
|
+
- top_p: Nucleus sampling probability
|
|
297
|
+
- logprobs: Return log probabilities
|
|
298
|
+
- reasoning_effort: low/medium/high for o-series models
|
|
299
|
+
- service_tier: auto/flex/priority/default
|
|
105
300
|
|
|
106
301
|
Example Models:
|
|
107
302
|
- anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
|
|
@@ -127,15 +322,137 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
127
322
|
- If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
|
|
128
323
|
- New messages saved to database with compression for session continuity
|
|
129
324
|
- When Postgres is disabled, session management is skipped
|
|
325
|
+
|
|
326
|
+
Evaluation Sessions:
|
|
327
|
+
- Set X-Is-Eval: true header to mark session as evaluation
|
|
328
|
+
- Session mode will be set to EVALUATION
|
|
329
|
+
- Request metadata is merged with session metadata
|
|
330
|
+
- Useful for A/B testing, model comparison, and feedback collection
|
|
130
331
|
"""
|
|
131
332
|
# Load agent schema: use header value from context or default
|
|
132
333
|
# Extract AgentContext first to get schema name
|
|
133
334
|
temp_context = AgentContext.from_headers(dict(request.headers))
|
|
134
335
|
schema_name = temp_context.agent_schema_uri or DEFAULT_AGENT_SCHEMA
|
|
135
336
|
|
|
337
|
+
# Resolve model: use body.model if provided, otherwise settings default
|
|
338
|
+
if body.model is None:
|
|
339
|
+
body.model = settings.llm.default_model
|
|
340
|
+
logger.debug(f"No model specified, using default: {body.model}")
|
|
341
|
+
|
|
342
|
+
# Special handling for simulator schema - no LLM, just generates demo SSE events
|
|
343
|
+
# Check BEFORE loading schema since simulator doesn't need a schema file
|
|
344
|
+
# Still builds full context and saves messages like a real agent
|
|
345
|
+
if schema_name == "simulator":
|
|
346
|
+
logger.info("Using SSE simulator (no LLM)")
|
|
347
|
+
|
|
348
|
+
# Build context just like real agents (loads session history, user context)
|
|
349
|
+
new_messages = [msg.model_dump() for msg in body.messages]
|
|
350
|
+
context, messages = await ContextBuilder.build_from_headers(
|
|
351
|
+
headers=dict(request.headers),
|
|
352
|
+
new_messages=new_messages,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Ensure session exists with metadata and eval mode if applicable
|
|
356
|
+
if context.session_id:
|
|
357
|
+
await ensure_session_with_metadata(
|
|
358
|
+
session_id=context.session_id,
|
|
359
|
+
user_id=context.user_id,
|
|
360
|
+
tenant_id=context.tenant_id,
|
|
361
|
+
is_eval=context.is_eval,
|
|
362
|
+
request_metadata=body.metadata,
|
|
363
|
+
agent_schema="simulator",
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Get the last user message as prompt
|
|
367
|
+
prompt = body.messages[-1].content if body.messages else "demo"
|
|
368
|
+
request_id = f"sim-{uuid.uuid4().hex[:24]}"
|
|
369
|
+
|
|
370
|
+
# Generate message IDs upfront for correlation
|
|
371
|
+
user_message_id = str(uuid.uuid4())
|
|
372
|
+
assistant_message_id = str(uuid.uuid4())
|
|
373
|
+
|
|
374
|
+
# Simulated assistant response content (for persistence)
|
|
375
|
+
simulated_content = (
|
|
376
|
+
f"[SSE Simulator Response]\n\n"
|
|
377
|
+
f"This is a simulated response demonstrating all SSE event types:\n"
|
|
378
|
+
f"- reasoning events (model thinking)\n"
|
|
379
|
+
f"- text_delta events (streamed content)\n"
|
|
380
|
+
f"- progress events (multi-step operations)\n"
|
|
381
|
+
f"- tool_call events (function invocations)\n"
|
|
382
|
+
f"- action_request events (UI solicitation)\n"
|
|
383
|
+
f"- metadata events (confidence, sources, message IDs)\n\n"
|
|
384
|
+
f"Original prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}"
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Save messages to database (if session_id and postgres enabled)
|
|
388
|
+
if settings.postgres.enabled and context.session_id:
|
|
389
|
+
user_message = {
|
|
390
|
+
"id": user_message_id,
|
|
391
|
+
"role": "user",
|
|
392
|
+
"content": prompt,
|
|
393
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
394
|
+
}
|
|
395
|
+
assistant_message = {
|
|
396
|
+
"id": assistant_message_id,
|
|
397
|
+
"role": "assistant",
|
|
398
|
+
"content": simulated_content,
|
|
399
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
|
|
404
|
+
await store.store_session_messages(
|
|
405
|
+
session_id=context.session_id,
|
|
406
|
+
messages=[user_message, assistant_message],
|
|
407
|
+
user_id=context.user_id,
|
|
408
|
+
compress=True,
|
|
409
|
+
)
|
|
410
|
+
logger.info(f"Saved simulator conversation to session {context.session_id}")
|
|
411
|
+
except Exception as e:
|
|
412
|
+
# Log error but don't fail the request - session storage is non-critical
|
|
413
|
+
logger.error(f"Failed to save session messages: {e}", exc_info=True)
|
|
414
|
+
|
|
415
|
+
if body.stream:
|
|
416
|
+
return StreamingResponse(
|
|
417
|
+
stream_simulator_response(
|
|
418
|
+
prompt=prompt,
|
|
419
|
+
model="simulator-v1.0.0",
|
|
420
|
+
# Pass message correlation IDs
|
|
421
|
+
message_id=assistant_message_id,
|
|
422
|
+
in_reply_to=user_message_id,
|
|
423
|
+
session_id=context.session_id,
|
|
424
|
+
),
|
|
425
|
+
media_type="text/event-stream",
|
|
426
|
+
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
|
427
|
+
)
|
|
428
|
+
else:
|
|
429
|
+
# Non-streaming simulator returns simple JSON
|
|
430
|
+
return ChatCompletionResponse(
|
|
431
|
+
id=request_id,
|
|
432
|
+
created=int(time.time()),
|
|
433
|
+
model="simulator-v1.0.0",
|
|
434
|
+
choices=[
|
|
435
|
+
ChatCompletionChoice(
|
|
436
|
+
index=0,
|
|
437
|
+
message=ChatMessage(
|
|
438
|
+
role="assistant",
|
|
439
|
+
content=simulated_content,
|
|
440
|
+
),
|
|
441
|
+
finish_reason="stop",
|
|
442
|
+
)
|
|
443
|
+
],
|
|
444
|
+
usage=ChatCompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0),
|
|
445
|
+
)
|
|
446
|
+
|
|
136
447
|
# Load schema using centralized utility
|
|
448
|
+
# Enable database fallback to load dynamic agents stored in schemas table
|
|
449
|
+
# Use async version since we're in an async context (FastAPI endpoint)
|
|
450
|
+
user_id = temp_context.user_id or settings.test.effective_user_id
|
|
137
451
|
try:
|
|
138
|
-
agent_schema =
|
|
452
|
+
agent_schema = await load_agent_schema_async(
|
|
453
|
+
schema_name,
|
|
454
|
+
user_id=user_id,
|
|
455
|
+
)
|
|
139
456
|
except FileNotFoundError:
|
|
140
457
|
# Fallback to default if specified schema not found
|
|
141
458
|
logger.warning(f"Schema '{schema_name}' not found, falling back to '{DEFAULT_AGENT_SCHEMA}'")
|
|
@@ -151,7 +468,7 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
151
468
|
detail=f"Agent schema '{schema_name}' not found and default schema unavailable",
|
|
152
469
|
)
|
|
153
470
|
|
|
154
|
-
logger.
|
|
471
|
+
logger.debug(f"Using agent schema: {schema_name}, model: {body.model}")
|
|
155
472
|
|
|
156
473
|
# Check for audio input
|
|
157
474
|
is_audio = request.headers.get("x-chat-is-audio", "").lower() == "true"
|
|
@@ -196,6 +513,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
196
513
|
|
|
197
514
|
logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
|
|
198
515
|
|
|
516
|
+
# Ensure session exists with metadata and eval mode if applicable
|
|
517
|
+
if context.session_id:
|
|
518
|
+
await ensure_session_with_metadata(
|
|
519
|
+
session_id=context.session_id,
|
|
520
|
+
user_id=context.user_id,
|
|
521
|
+
tenant_id=context.tenant_id,
|
|
522
|
+
is_eval=context.is_eval,
|
|
523
|
+
request_metadata=body.metadata,
|
|
524
|
+
agent_schema=schema_name,
|
|
525
|
+
)
|
|
526
|
+
|
|
199
527
|
# Create agent with schema and model override
|
|
200
528
|
agent = await create_agent(
|
|
201
529
|
context=context,
|
|
@@ -212,14 +540,60 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
212
540
|
|
|
213
541
|
# Streaming mode
|
|
214
542
|
if body.stream:
|
|
543
|
+
# Save user message before streaming starts
|
|
544
|
+
if settings.postgres.enabled and context.session_id:
|
|
545
|
+
user_message = {
|
|
546
|
+
"role": "user",
|
|
547
|
+
"content": body.messages[-1].content if body.messages else "",
|
|
548
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
549
|
+
}
|
|
550
|
+
try:
|
|
551
|
+
store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
|
|
552
|
+
await store.store_session_messages(
|
|
553
|
+
session_id=context.session_id,
|
|
554
|
+
messages=[user_message],
|
|
555
|
+
user_id=context.user_id,
|
|
556
|
+
compress=False, # User messages are typically short
|
|
557
|
+
)
|
|
558
|
+
logger.debug(f"Saved user message to session {context.session_id}")
|
|
559
|
+
except Exception as e:
|
|
560
|
+
logger.error(f"Failed to save user message: {e}", exc_info=True)
|
|
561
|
+
|
|
215
562
|
return StreamingResponse(
|
|
216
|
-
|
|
563
|
+
stream_openai_response_with_save(
|
|
564
|
+
agent=agent,
|
|
565
|
+
prompt=prompt,
|
|
566
|
+
model=body.model,
|
|
567
|
+
request_id=request_id,
|
|
568
|
+
agent_schema=schema_name,
|
|
569
|
+
session_id=context.session_id,
|
|
570
|
+
user_id=context.user_id,
|
|
571
|
+
),
|
|
217
572
|
media_type="text/event-stream",
|
|
218
573
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
|
219
574
|
)
|
|
220
575
|
|
|
221
576
|
# Non-streaming mode
|
|
222
|
-
|
|
577
|
+
# Create a parent span to capture trace context for message storage
|
|
578
|
+
trace_id, span_id = None, None
|
|
579
|
+
tracer = get_tracer()
|
|
580
|
+
|
|
581
|
+
if tracer:
|
|
582
|
+
with tracer.start_as_current_span(
|
|
583
|
+
"chat_completion",
|
|
584
|
+
attributes={
|
|
585
|
+
"session.id": context.session_id or "",
|
|
586
|
+
"user.id": context.user_id or "",
|
|
587
|
+
"model": body.model,
|
|
588
|
+
"agent.schema": context.agent_schema_uri or DEFAULT_AGENT_SCHEMA,
|
|
589
|
+
}
|
|
590
|
+
) as span:
|
|
591
|
+
# Capture trace context from the span we just created
|
|
592
|
+
trace_id, span_id = get_current_trace_context()
|
|
593
|
+
result = await agent.run(prompt)
|
|
594
|
+
else:
|
|
595
|
+
# No tracer available, run without tracing
|
|
596
|
+
result = await agent.run(prompt)
|
|
223
597
|
|
|
224
598
|
# Determine content format based on response_format request
|
|
225
599
|
if body.response_format and body.response_format.type == "json_object":
|
|
@@ -242,25 +616,33 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
242
616
|
"role": "user",
|
|
243
617
|
"content": body.messages[-1].content if body.messages else "",
|
|
244
618
|
"timestamp": datetime.utcnow().isoformat(),
|
|
619
|
+
"trace_id": trace_id,
|
|
620
|
+
"span_id": span_id,
|
|
245
621
|
}
|
|
246
622
|
|
|
247
623
|
assistant_message = {
|
|
248
624
|
"role": "assistant",
|
|
249
625
|
"content": content,
|
|
250
626
|
"timestamp": datetime.utcnow().isoformat(),
|
|
627
|
+
"trace_id": trace_id,
|
|
628
|
+
"span_id": span_id,
|
|
251
629
|
}
|
|
252
630
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
631
|
+
try:
|
|
632
|
+
# Store messages with compression
|
|
633
|
+
store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
|
|
634
|
+
|
|
635
|
+
await store.store_session_messages(
|
|
636
|
+
session_id=context.session_id,
|
|
637
|
+
messages=[user_message, assistant_message],
|
|
638
|
+
user_id=context.user_id,
|
|
639
|
+
compress=True,
|
|
640
|
+
)
|
|
262
641
|
|
|
263
|
-
|
|
642
|
+
logger.info(f"Saved conversation to session {context.session_id}")
|
|
643
|
+
except Exception as e:
|
|
644
|
+
# Log error but don't fail the request - session storage is non-critical
|
|
645
|
+
logger.error(f"Failed to save session messages: {e}", exc_info=True)
|
|
264
646
|
|
|
265
647
|
return ChatCompletionResponse(
|
|
266
648
|
id=request_id,
|
rem/api/routers/chat/models.py
CHANGED
|
@@ -1,17 +1,43 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible API models for chat completions.
|
|
3
3
|
|
|
4
|
-
Design Pattern
|
|
4
|
+
Design Pattern:
|
|
5
5
|
- Full OpenAI compatibility for drop-in replacement
|
|
6
6
|
- Support for streaming (SSE) and non-streaming modes
|
|
7
7
|
- Response format control (text vs json_object)
|
|
8
|
-
- Headers map to AgentContext
|
|
8
|
+
- Headers map to AgentContext for session/context control
|
|
9
|
+
- Body fields for OpenAI-compatible parameters + metadata
|
|
10
|
+
|
|
11
|
+
Headers (context control):
|
|
12
|
+
X-User-Id → context.user_id (user identifier)
|
|
13
|
+
X-Tenant-Id → context.tenant_id (multi-tenancy, default: "default")
|
|
14
|
+
X-Session-Id → context.session_id (conversation continuity)
|
|
15
|
+
X-Agent-Schema → context.agent_schema_uri (which agent to use, default: "rem")
|
|
16
|
+
X-Model-Name → context.default_model (model override)
|
|
17
|
+
X-Chat-Is-Audio → triggers audio transcription ("true"/"false")
|
|
18
|
+
X-Is-Eval → context.is_eval (marks session as evaluation, sets mode=EVALUATION)
|
|
19
|
+
|
|
20
|
+
Body Fields (OpenAI-compatible + extensions):
|
|
21
|
+
model → LLM model (e.g., "openai:gpt-4.1", "anthropic:claude-sonnet-4-5-20250929")
|
|
22
|
+
messages → Chat conversation history
|
|
23
|
+
temperature → Sampling temperature (0-2)
|
|
24
|
+
max_tokens → Max tokens (deprecated, use max_completion_tokens)
|
|
25
|
+
max_completion_tokens → Max tokens to generate
|
|
26
|
+
stream → Enable SSE streaming
|
|
27
|
+
metadata → Key-value pairs merged with session metadata (for evals/experiments)
|
|
28
|
+
store → Whether to store for distillation/evaluation
|
|
29
|
+
seed → Deterministic sampling seed
|
|
30
|
+
top_p → Nucleus sampling probability
|
|
31
|
+
reasoning_effort → low/medium/high for o-series models
|
|
32
|
+
service_tier → auto/flex/priority/default
|
|
9
33
|
"""
|
|
10
34
|
|
|
11
|
-
from typing import Literal
|
|
35
|
+
from typing import Any, Literal
|
|
12
36
|
|
|
13
37
|
from pydantic import BaseModel, Field
|
|
14
38
|
|
|
39
|
+
from rem.settings import settings
|
|
40
|
+
|
|
15
41
|
|
|
16
42
|
# Request models
|
|
17
43
|
class ChatMessage(BaseModel):
|
|
@@ -44,17 +70,26 @@ class ChatCompletionRequest(BaseModel):
|
|
|
44
70
|
Compatible with OpenAI's /v1/chat/completions endpoint.
|
|
45
71
|
|
|
46
72
|
Headers Map to AgentContext:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
73
|
+
X-User-Id → context.user_id
|
|
74
|
+
X-Tenant-Id → context.tenant_id (default: "default")
|
|
75
|
+
X-Session-Id → context.session_id
|
|
76
|
+
X-Agent-Schema → context.agent_schema_uri (default: "rem")
|
|
77
|
+
X-Model-Name → context.default_model
|
|
78
|
+
X-Chat-Is-Audio → triggers audio transcription
|
|
79
|
+
X-Is-Eval → context.is_eval (sets session mode=EVALUATION)
|
|
80
|
+
|
|
81
|
+
Body Fields for Metadata/Evals:
|
|
82
|
+
metadata → Key-value pairs merged with session metadata
|
|
83
|
+
store → Whether to store for distillation/evaluation
|
|
51
84
|
|
|
52
85
|
Note: Model is specified in body.model (standard OpenAI field), not headers.
|
|
53
86
|
"""
|
|
54
87
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
88
|
+
# TODO: default should come from settings.llm.default_model at request time
|
|
89
|
+
# Using None and resolving in endpoint to avoid import-time settings evaluation
|
|
90
|
+
model: str | None = Field(
|
|
91
|
+
default=None,
|
|
92
|
+
description="Model to use. Defaults to LLM__DEFAULT_MODEL from settings.",
|
|
58
93
|
)
|
|
59
94
|
messages: list[ChatMessage] = Field(description="Chat conversation history")
|
|
60
95
|
temperature: float | None = Field(default=None, ge=0, le=2)
|
|
@@ -69,6 +104,49 @@ class ChatCompletionRequest(BaseModel):
|
|
|
69
104
|
default=None,
|
|
70
105
|
description="Response format. Set type='json_object' to enable JSON mode.",
|
|
71
106
|
)
|
|
107
|
+
# Additional OpenAI-compatible fields
|
|
108
|
+
metadata: dict[str, str] | None = Field(
|
|
109
|
+
default=None,
|
|
110
|
+
description="Key-value pairs attached to the request (max 16 keys, 64/512 char limits). "
|
|
111
|
+
"Merged with session metadata for persistence.",
|
|
112
|
+
)
|
|
113
|
+
store: bool | None = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Whether to store for distillation/evaluation purposes.",
|
|
116
|
+
)
|
|
117
|
+
max_completion_tokens: int | None = Field(
|
|
118
|
+
default=None,
|
|
119
|
+
ge=1,
|
|
120
|
+
description="Max tokens to generate (replaces deprecated max_tokens).",
|
|
121
|
+
)
|
|
122
|
+
seed: int | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description="Seed for deterministic sampling (best effort).",
|
|
125
|
+
)
|
|
126
|
+
top_p: float | None = Field(
|
|
127
|
+
default=None,
|
|
128
|
+
ge=0,
|
|
129
|
+
le=1,
|
|
130
|
+
description="Nucleus sampling probability. Use temperature OR top_p, not both.",
|
|
131
|
+
)
|
|
132
|
+
logprobs: bool | None = Field(
|
|
133
|
+
default=None,
|
|
134
|
+
description="Whether to return log probabilities for output tokens.",
|
|
135
|
+
)
|
|
136
|
+
top_logprobs: int | None = Field(
|
|
137
|
+
default=None,
|
|
138
|
+
ge=0,
|
|
139
|
+
le=20,
|
|
140
|
+
description="Number of most likely tokens to return at each position (requires logprobs=true).",
|
|
141
|
+
)
|
|
142
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = Field(
|
|
143
|
+
default=None,
|
|
144
|
+
description="Reasoning effort for o-series models (low/medium/high).",
|
|
145
|
+
)
|
|
146
|
+
service_tier: Literal["auto", "flex", "priority", "default"] | None = Field(
|
|
147
|
+
default=None,
|
|
148
|
+
description="Service tier for processing (flex is 50% cheaper but slower).",
|
|
149
|
+
)
|
|
72
150
|
|
|
73
151
|
|
|
74
152
|
# Response models
|