remdb 0.3.103__py3-none-any.whl → 0.3.141__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +51 -27
- rem/agentic/mcp/tool_wrapper.py +155 -18
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +195 -46
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/main.py +85 -16
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +18 -4
- rem/api/mcp_router/tools.py +394 -16
- rem/api/routers/admin.py +218 -1
- rem/api/routers/chat/completions.py +280 -7
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +177 -3
- rem/api/routers/feedback.py +142 -329
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +13 -13
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +354 -143
- rem/cli/commands/experiments.py +436 -30
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +92 -45
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +29 -6
- rem/config.py +8 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/shared_session.py +2 -28
- rem/registry.py +10 -4
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/content/service.py +30 -8
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/README.md +151 -26
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +7 -0
- rem/services/session/reload.py +1 -1
- rem/settings.py +288 -16
- rem/sql/background_indexes.sql +19 -24
- rem/sql/migrations/001_install.sql +252 -69
- rem/sql/migrations/002_install_models.sql +2197 -619
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/schema_loader.py +110 -15
- rem/utils/sql_paths.py +146 -0
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/METADATA +300 -215
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/RECORD +73 -64
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
- {remdb-0.3.103.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0
rem/api/routers/admin.py
CHANGED
|
@@ -9,6 +9,9 @@ Endpoints:
|
|
|
9
9
|
GET /api/admin/messages - List all messages across users (admin only)
|
|
10
10
|
GET /api/admin/stats - System statistics (admin only)
|
|
11
11
|
|
|
12
|
+
Internal Endpoints (hidden from Swagger, secret-protected):
|
|
13
|
+
POST /api/admin/internal/rebuild-kv - Trigger kv_store rebuild (called by pg_net)
|
|
14
|
+
|
|
12
15
|
All endpoints require:
|
|
13
16
|
1. Authentication (valid session)
|
|
14
17
|
2. Admin role in user's roles list
|
|
@@ -17,11 +20,14 @@ Design Pattern:
|
|
|
17
20
|
- Uses require_admin dependency for role enforcement
|
|
18
21
|
- Cross-tenant queries (no user_id filtering)
|
|
19
22
|
- Audit logging for admin actions
|
|
23
|
+
- Internal endpoints use X-Internal-Secret header for authentication
|
|
20
24
|
"""
|
|
21
25
|
|
|
26
|
+
import asyncio
|
|
27
|
+
import threading
|
|
22
28
|
from typing import Literal
|
|
23
29
|
|
|
24
|
-
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
30
|
+
from fastapi import APIRouter, Depends, Header, HTTPException, Query, BackgroundTasks
|
|
25
31
|
from loguru import logger
|
|
26
32
|
from pydantic import BaseModel
|
|
27
33
|
|
|
@@ -32,6 +38,12 @@ from ...settings import settings
|
|
|
32
38
|
|
|
33
39
|
router = APIRouter(prefix="/api/admin", tags=["admin"])
|
|
34
40
|
|
|
41
|
+
# =============================================================================
|
|
42
|
+
# Internal Router (hidden from Swagger)
|
|
43
|
+
# =============================================================================
|
|
44
|
+
|
|
45
|
+
internal_router = APIRouter(prefix="/internal", include_in_schema=False)
|
|
46
|
+
|
|
35
47
|
|
|
36
48
|
# =============================================================================
|
|
37
49
|
# Response Models
|
|
@@ -275,3 +287,208 @@ async def get_system_stats(
|
|
|
275
287
|
active_sessions_24h=0, # TODO: implement
|
|
276
288
|
messages_24h=0, # TODO: implement
|
|
277
289
|
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# =============================================================================
|
|
293
|
+
# Internal Endpoints (hidden from Swagger, secret-protected)
|
|
294
|
+
# =============================================================================
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class RebuildKVRequest(BaseModel):
|
|
298
|
+
"""Request body for kv_store rebuild trigger."""
|
|
299
|
+
|
|
300
|
+
user_id: str | None = None
|
|
301
|
+
triggered_by: str = "api"
|
|
302
|
+
timestamp: str | None = None
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class RebuildKVResponse(BaseModel):
|
|
306
|
+
"""Response from kv_store rebuild trigger."""
|
|
307
|
+
|
|
308
|
+
status: Literal["submitted", "started", "skipped"]
|
|
309
|
+
message: str
|
|
310
|
+
job_method: str | None = None # "sqs" or "thread"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
async def _get_internal_secret() -> str | None:
|
|
314
|
+
"""
|
|
315
|
+
Get the internal API secret from cache_system_state table.
|
|
316
|
+
|
|
317
|
+
Returns None if the table doesn't exist or secret not found.
|
|
318
|
+
"""
|
|
319
|
+
from ...services.postgres import get_postgres_service
|
|
320
|
+
|
|
321
|
+
db = get_postgres_service()
|
|
322
|
+
if not db:
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
await db.connect()
|
|
327
|
+
secret = await db.fetchval("SELECT rem_get_cache_api_secret()")
|
|
328
|
+
return secret
|
|
329
|
+
except Exception as e:
|
|
330
|
+
logger.warning(f"Could not get internal API secret: {e}")
|
|
331
|
+
return None
|
|
332
|
+
finally:
|
|
333
|
+
await db.disconnect()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
async def _validate_internal_secret(x_internal_secret: str | None = Header(None)):
|
|
337
|
+
"""
|
|
338
|
+
Dependency to validate the X-Internal-Secret header.
|
|
339
|
+
|
|
340
|
+
Raises 401 if secret is missing or invalid.
|
|
341
|
+
"""
|
|
342
|
+
if not x_internal_secret:
|
|
343
|
+
logger.warning("Internal endpoint called without X-Internal-Secret header")
|
|
344
|
+
raise HTTPException(status_code=401, detail="Missing X-Internal-Secret header")
|
|
345
|
+
|
|
346
|
+
expected_secret = await _get_internal_secret()
|
|
347
|
+
if not expected_secret:
|
|
348
|
+
logger.error("Could not retrieve internal secret from database")
|
|
349
|
+
raise HTTPException(status_code=503, detail="Internal secret not configured")
|
|
350
|
+
|
|
351
|
+
if x_internal_secret != expected_secret:
|
|
352
|
+
logger.warning("Internal endpoint called with invalid secret")
|
|
353
|
+
raise HTTPException(status_code=401, detail="Invalid X-Internal-Secret")
|
|
354
|
+
|
|
355
|
+
return True
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _run_rebuild_in_thread():
|
|
359
|
+
"""
|
|
360
|
+
Run the kv_store rebuild in a background thread.
|
|
361
|
+
|
|
362
|
+
This is the fallback when SQS is not available.
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
def rebuild_task():
|
|
366
|
+
"""Thread target function."""
|
|
367
|
+
import asyncio
|
|
368
|
+
from ...workers.unlogged_maintainer import UnloggedMaintainer
|
|
369
|
+
|
|
370
|
+
async def _run():
|
|
371
|
+
maintainer = UnloggedMaintainer()
|
|
372
|
+
if not maintainer.db:
|
|
373
|
+
logger.error("Database not configured, cannot rebuild")
|
|
374
|
+
return
|
|
375
|
+
try:
|
|
376
|
+
await maintainer.db.connect()
|
|
377
|
+
await maintainer.rebuild_with_lock()
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.error(f"Background rebuild failed: {e}")
|
|
380
|
+
finally:
|
|
381
|
+
await maintainer.db.disconnect()
|
|
382
|
+
|
|
383
|
+
# Create new event loop for this thread
|
|
384
|
+
loop = asyncio.new_event_loop()
|
|
385
|
+
asyncio.set_event_loop(loop)
|
|
386
|
+
try:
|
|
387
|
+
loop.run_until_complete(_run())
|
|
388
|
+
finally:
|
|
389
|
+
loop.close()
|
|
390
|
+
|
|
391
|
+
thread = threading.Thread(target=rebuild_task, name="kv-rebuild-worker")
|
|
392
|
+
thread.daemon = True
|
|
393
|
+
thread.start()
|
|
394
|
+
logger.info(f"Started background rebuild thread: {thread.name}")
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _submit_sqs_rebuild_job_sync(request: RebuildKVRequest) -> bool:
|
|
398
|
+
"""
|
|
399
|
+
Submit rebuild job to SQS queue (synchronous).
|
|
400
|
+
|
|
401
|
+
Returns True if job was submitted, False if SQS unavailable.
|
|
402
|
+
"""
|
|
403
|
+
import json
|
|
404
|
+
|
|
405
|
+
import boto3
|
|
406
|
+
from botocore.exceptions import ClientError
|
|
407
|
+
|
|
408
|
+
if not settings.sqs.queue_url:
|
|
409
|
+
logger.debug("SQS queue URL not configured, cannot submit SQS job")
|
|
410
|
+
return False
|
|
411
|
+
|
|
412
|
+
try:
|
|
413
|
+
sqs = boto3.client("sqs", region_name=settings.sqs.region)
|
|
414
|
+
|
|
415
|
+
message_body = {
|
|
416
|
+
"action": "rebuild_kv_store",
|
|
417
|
+
"user_id": request.user_id,
|
|
418
|
+
"triggered_by": request.triggered_by,
|
|
419
|
+
"timestamp": request.timestamp,
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
response = sqs.send_message(
|
|
423
|
+
QueueUrl=settings.sqs.queue_url,
|
|
424
|
+
MessageBody=json.dumps(message_body),
|
|
425
|
+
MessageAttributes={
|
|
426
|
+
"action": {"DataType": "String", "StringValue": "rebuild_kv_store"},
|
|
427
|
+
},
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
message_id = response.get("MessageId")
|
|
431
|
+
logger.info(f"Submitted rebuild job to SQS: {message_id}")
|
|
432
|
+
return True
|
|
433
|
+
|
|
434
|
+
except ClientError as e:
|
|
435
|
+
logger.warning(f"Failed to submit SQS job: {e}")
|
|
436
|
+
return False
|
|
437
|
+
except Exception as e:
|
|
438
|
+
logger.warning(f"SQS submission error: {e}")
|
|
439
|
+
return False
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
async def _submit_sqs_rebuild_job(request: RebuildKVRequest) -> bool:
|
|
443
|
+
"""
|
|
444
|
+
Submit rebuild job to SQS queue (async wrapper).
|
|
445
|
+
|
|
446
|
+
Runs boto3 call in thread pool to avoid blocking event loop.
|
|
447
|
+
"""
|
|
448
|
+
import asyncio
|
|
449
|
+
|
|
450
|
+
return await asyncio.to_thread(_submit_sqs_rebuild_job_sync, request)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
@internal_router.post("/rebuild-kv", response_model=RebuildKVResponse)
|
|
454
|
+
async def trigger_kv_rebuild(
|
|
455
|
+
request: RebuildKVRequest,
|
|
456
|
+
_: bool = Depends(_validate_internal_secret),
|
|
457
|
+
) -> RebuildKVResponse:
|
|
458
|
+
"""
|
|
459
|
+
Trigger kv_store rebuild (internal endpoint, not shown in Swagger).
|
|
460
|
+
|
|
461
|
+
Called by pg_net from PostgreSQL when self-healing detects empty cache.
|
|
462
|
+
Authentication: X-Internal-Secret header must match secret in cache_system_state.
|
|
463
|
+
|
|
464
|
+
Priority:
|
|
465
|
+
1. Submit job to SQS (if configured) - scales with KEDA
|
|
466
|
+
2. Fallback to background thread - runs in same process
|
|
467
|
+
|
|
468
|
+
Note: This endpoint returns immediately. Rebuild happens asynchronously.
|
|
469
|
+
"""
|
|
470
|
+
logger.info(
|
|
471
|
+
f"Rebuild kv_store requested by {request.triggered_by} "
|
|
472
|
+
f"(user_id={request.user_id})"
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
# Try SQS first
|
|
476
|
+
if await _submit_sqs_rebuild_job(request):
|
|
477
|
+
return RebuildKVResponse(
|
|
478
|
+
status="submitted",
|
|
479
|
+
message="Rebuild job submitted to SQS queue",
|
|
480
|
+
job_method="sqs",
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Fallback to background thread
|
|
484
|
+
_run_rebuild_in_thread()
|
|
485
|
+
|
|
486
|
+
return RebuildKVResponse(
|
|
487
|
+
status="started",
|
|
488
|
+
message="Rebuild started in background thread (SQS unavailable)",
|
|
489
|
+
job_method="thread",
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
# Include internal router in main router
|
|
494
|
+
router.include_router(internal_router)
|
|
@@ -1,13 +1,94 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible chat completions router for REM.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
Quick Start (Local Development)
|
|
5
|
+
===============================
|
|
6
|
+
|
|
7
|
+
NOTE: Local dev uses LOCAL databases (Postgres via Docker Compose on port 5050).
|
|
8
|
+
Do NOT port-forward databases. Only port-forward observability services.
|
|
9
|
+
|
|
10
|
+
IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
|
|
11
|
+
storage issues and feedback will not work correctly.
|
|
12
|
+
|
|
13
|
+
1. Port Forwarding (REQUIRED for trace capture and Phoenix sync):
|
|
14
|
+
|
|
15
|
+
# Terminal 1: OTEL Collector (HTTP) - sends traces to Phoenix
|
|
16
|
+
kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
|
|
17
|
+
|
|
18
|
+
# Terminal 2: Phoenix UI - view traces at http://localhost:6006
|
|
19
|
+
kubectl port-forward -n siggy svc/phoenix 6006:6006
|
|
20
|
+
|
|
21
|
+
2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
|
|
22
|
+
|
|
23
|
+
export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
|
|
24
|
+
-o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
|
|
25
|
+
|
|
26
|
+
3. Start API with OTEL and Phoenix enabled:
|
|
27
|
+
|
|
28
|
+
cd /path/to/remstack/rem
|
|
29
|
+
source .venv/bin/activate
|
|
30
|
+
OTEL__ENABLED=true \\
|
|
31
|
+
PHOENIX__ENABLED=true \\
|
|
32
|
+
PHOENIX_API_KEY="$PHOENIX_API_KEY" \\
|
|
33
|
+
uvicorn rem.api.main:app --host 0.0.0.0 --port 8000 --app-dir src
|
|
34
|
+
|
|
35
|
+
4. Test Chat Request (session_id MUST be a UUID):
|
|
36
|
+
|
|
37
|
+
SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
|
|
38
|
+
curl -s -N -X POST http://localhost:8000/api/v1/chat/completions \\
|
|
39
|
+
-H 'Content-Type: application/json' \\
|
|
40
|
+
-H "X-Session-Id: $SESSION_ID" \\
|
|
41
|
+
-H 'X-Agent-Schema: rem' \\
|
|
42
|
+
-d '{"messages": [{"role": "user", "content": "Hello"}], "stream": true}'
|
|
43
|
+
|
|
44
|
+
# Note: Use 'rem' agent schema (default) for real LLM responses.
|
|
45
|
+
# The 'simulator' agent is for testing SSE events without LLM calls.
|
|
46
|
+
|
|
47
|
+
5. Submit Feedback on Response:
|
|
48
|
+
|
|
49
|
+
The metadata SSE event contains message_id and trace_id for feedback:
|
|
50
|
+
event: metadata
|
|
51
|
+
data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
|
|
52
|
+
|
|
53
|
+
Use session_id (UUID you generated) and message_id to submit feedback:
|
|
54
|
+
|
|
55
|
+
curl -X POST http://localhost:8000/api/v1/messages/feedback \\
|
|
56
|
+
-H 'Content-Type: application/json' \\
|
|
57
|
+
-H 'X-Tenant-Id: default' \\
|
|
58
|
+
-d '{
|
|
59
|
+
"session_id": "<your-uuid-session-id>",
|
|
60
|
+
"message_id": "<message-id-from-metadata>",
|
|
61
|
+
"rating": 1,
|
|
62
|
+
"categories": ["helpful"],
|
|
63
|
+
"comment": "Good response"
|
|
64
|
+
}'
|
|
65
|
+
|
|
66
|
+
Expected response (201 = synced to Phoenix):
|
|
67
|
+
{"phoenix_synced": true, "trace_id": "e53c701c...", "span_id": "6432d497..."}
|
|
68
|
+
|
|
69
|
+
OTEL Architecture
|
|
70
|
+
=================
|
|
71
|
+
|
|
72
|
+
REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
|
|
73
|
+
(port 4318) (k8s: observability) (k8s: siggy)
|
|
74
|
+
|
|
75
|
+
Environment Variables:
|
|
76
|
+
OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
|
|
77
|
+
PHOENIX__ENABLED=true Enable Phoenix integration (required for feedback sync)
|
|
78
|
+
PHOENIX_API_KEY=<jwt> Phoenix API key (required for feedback->Phoenix sync)
|
|
79
|
+
OTEL__COLLECTOR_ENDPOINT Default: http://localhost:4318
|
|
80
|
+
OTEL__PROTOCOL Default: http (use port 4318, not gRPC 4317)
|
|
81
|
+
|
|
82
|
+
Design Pattern
|
|
83
|
+
==============
|
|
84
|
+
|
|
85
|
+
- Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema, X-Is-Eval)
|
|
6
86
|
- ContextBuilder centralizes message construction with user profile + session history
|
|
7
87
|
- Body.model is the LLM model for Pydantic AI
|
|
8
88
|
- X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
|
|
9
89
|
- Support for streaming (SSE) and non-streaming modes
|
|
10
90
|
- Response format control (text vs json_object)
|
|
91
|
+
- OpenAI-compatible body fields: metadata, store, reasoning_effort, etc.
|
|
11
92
|
|
|
12
93
|
Context Building Flow:
|
|
13
94
|
1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
|
|
@@ -25,9 +106,10 @@ Context Building Flow:
|
|
|
25
106
|
Headers Mapping
|
|
26
107
|
X-User-Id → AgentContext.user_id
|
|
27
108
|
X-Tenant-Id → AgentContext.tenant_id
|
|
28
|
-
X-Session-Id → AgentContext.session_id
|
|
109
|
+
X-Session-Id → AgentContext.session_id (use UUID for new sessions)
|
|
29
110
|
X-Model-Name → AgentContext.default_model (overrides body.model)
|
|
30
111
|
X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
|
|
112
|
+
X-Is-Eval → AgentContext.is_eval (sets session mode to EVALUATION)
|
|
31
113
|
|
|
32
114
|
Default Agent:
|
|
33
115
|
If X-Agent-Schema header is not provided, the system loads 'rem' schema,
|
|
@@ -42,6 +124,7 @@ Example Request:
|
|
|
42
124
|
POST /api/v1/chat/completions
|
|
43
125
|
X-Tenant-Id: acme-corp
|
|
44
126
|
X-User-Id: user123
|
|
127
|
+
X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890 # UUID
|
|
45
128
|
X-Agent-Schema: rem # Optional, this is the default
|
|
46
129
|
|
|
47
130
|
{
|
|
@@ -67,7 +150,9 @@ from loguru import logger
|
|
|
67
150
|
from ....agentic.context import AgentContext
|
|
68
151
|
from ....agentic.context_builder import ContextBuilder
|
|
69
152
|
from ....agentic.providers.pydantic_ai import create_agent
|
|
153
|
+
from ....models.entities.session import Session, SessionMode
|
|
70
154
|
from ....services.audio.transcriber import AudioTranscriber
|
|
155
|
+
from ....services.postgres.repository import Repository
|
|
71
156
|
from ....services.session import SessionMessageStore, reload_session
|
|
72
157
|
from ....settings import settings
|
|
73
158
|
from ....utils.schema_loader import load_agent_schema, load_agent_schema_async
|
|
@@ -79,7 +164,7 @@ from .models import (
|
|
|
79
164
|
ChatCompletionUsage,
|
|
80
165
|
ChatMessage,
|
|
81
166
|
)
|
|
82
|
-
from .streaming import stream_openai_response, stream_simulator_response
|
|
167
|
+
from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response
|
|
83
168
|
|
|
84
169
|
router = APIRouter(prefix="/api/v1", tags=["chat"])
|
|
85
170
|
|
|
@@ -87,6 +172,105 @@ router = APIRouter(prefix="/api/v1", tags=["chat"])
|
|
|
87
172
|
DEFAULT_AGENT_SCHEMA = "rem"
|
|
88
173
|
|
|
89
174
|
|
|
175
|
+
def get_current_trace_context() -> tuple[str | None, str | None]:
|
|
176
|
+
"""Get trace_id and span_id from current OTEL context.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
|
|
180
|
+
"""
|
|
181
|
+
try:
|
|
182
|
+
from opentelemetry import trace
|
|
183
|
+
span = trace.get_current_span()
|
|
184
|
+
if span and span.get_span_context().is_valid:
|
|
185
|
+
ctx = span.get_span_context()
|
|
186
|
+
trace_id = format(ctx.trace_id, '032x')
|
|
187
|
+
span_id = format(ctx.span_id, '016x')
|
|
188
|
+
return trace_id, span_id
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
return None, None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_tracer():
|
|
195
|
+
"""Get the OpenTelemetry tracer for chat completions."""
|
|
196
|
+
try:
|
|
197
|
+
from opentelemetry import trace
|
|
198
|
+
return trace.get_tracer("rem.chat.completions")
|
|
199
|
+
except Exception:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def ensure_session_with_metadata(
|
|
204
|
+
session_id: str,
|
|
205
|
+
user_id: str | None,
|
|
206
|
+
tenant_id: str,
|
|
207
|
+
is_eval: bool,
|
|
208
|
+
request_metadata: dict[str, str] | None,
|
|
209
|
+
agent_schema: str | None = None,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Ensure session exists and update with metadata/mode.
|
|
213
|
+
|
|
214
|
+
If X-Is-Eval header is true, sets session mode to EVALUATION.
|
|
215
|
+
Merges request metadata with existing session metadata.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
session_id: Session identifier (maps to Session.name)
|
|
219
|
+
user_id: User identifier
|
|
220
|
+
tenant_id: Tenant identifier
|
|
221
|
+
is_eval: Whether this is an evaluation session
|
|
222
|
+
request_metadata: Metadata from request body to merge
|
|
223
|
+
agent_schema: Optional agent schema being used
|
|
224
|
+
"""
|
|
225
|
+
if not settings.postgres.enabled:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
repo = Repository(Session, table_name="sessions")
|
|
230
|
+
|
|
231
|
+
# Try to load existing session by name (session_id is the name field)
|
|
232
|
+
existing_list = await repo.find(
|
|
233
|
+
filters={"name": session_id, "tenant_id": tenant_id},
|
|
234
|
+
limit=1,
|
|
235
|
+
)
|
|
236
|
+
existing = existing_list[0] if existing_list else None
|
|
237
|
+
|
|
238
|
+
if existing:
|
|
239
|
+
# Merge metadata if provided
|
|
240
|
+
merged_metadata = existing.metadata or {}
|
|
241
|
+
if request_metadata:
|
|
242
|
+
merged_metadata.update(request_metadata)
|
|
243
|
+
|
|
244
|
+
# Update session if eval flag or new metadata
|
|
245
|
+
needs_update = False
|
|
246
|
+
if is_eval and existing.mode != SessionMode.EVALUATION:
|
|
247
|
+
existing.mode = SessionMode.EVALUATION
|
|
248
|
+
needs_update = True
|
|
249
|
+
if request_metadata:
|
|
250
|
+
existing.metadata = merged_metadata
|
|
251
|
+
needs_update = True
|
|
252
|
+
|
|
253
|
+
if needs_update:
|
|
254
|
+
await repo.upsert(existing)
|
|
255
|
+
logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
|
|
256
|
+
else:
|
|
257
|
+
# Create new session
|
|
258
|
+
session = Session(
|
|
259
|
+
name=session_id,
|
|
260
|
+
mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
|
|
261
|
+
user_id=user_id,
|
|
262
|
+
tenant_id=tenant_id,
|
|
263
|
+
agent_schema_uri=agent_schema,
|
|
264
|
+
metadata=request_metadata or {},
|
|
265
|
+
)
|
|
266
|
+
await repo.upsert(session)
|
|
267
|
+
logger.info(f"Created session {session_id} (eval={is_eval})")
|
|
268
|
+
|
|
269
|
+
except Exception as e:
|
|
270
|
+
# Non-critical - log but don't fail the request
|
|
271
|
+
logger.error(f"Failed to ensure session metadata: {e}", exc_info=True)
|
|
272
|
+
|
|
273
|
+
|
|
90
274
|
@router.post("/chat/completions", response_model=None)
|
|
91
275
|
async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
92
276
|
"""
|
|
@@ -102,6 +286,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
102
286
|
| X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
|
|
103
287
|
| X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
|
|
104
288
|
| X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
|
|
289
|
+
| X-Is-Eval | Mark as evaluation session | AgentContext.is_eval | false |
|
|
290
|
+
|
|
291
|
+
Additional OpenAI-compatible Body Fields:
|
|
292
|
+
- metadata: Key-value pairs merged with session metadata (max 16 keys)
|
|
293
|
+
- store: Whether to store for distillation/evaluation
|
|
294
|
+
- max_completion_tokens: Max tokens to generate (replaces max_tokens)
|
|
295
|
+
- seed: Seed for deterministic sampling
|
|
296
|
+
- top_p: Nucleus sampling probability
|
|
297
|
+
- logprobs: Return log probabilities
|
|
298
|
+
- reasoning_effort: low/medium/high for o-series models
|
|
299
|
+
- service_tier: auto/flex/priority/default
|
|
105
300
|
|
|
106
301
|
Example Models:
|
|
107
302
|
- anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
|
|
@@ -127,6 +322,12 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
127
322
|
- If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
|
|
128
323
|
- New messages saved to database with compression for session continuity
|
|
129
324
|
- When Postgres is disabled, session management is skipped
|
|
325
|
+
|
|
326
|
+
Evaluation Sessions:
|
|
327
|
+
- Set X-Is-Eval: true header to mark session as evaluation
|
|
328
|
+
- Session mode will be set to EVALUATION
|
|
329
|
+
- Request metadata is merged with session metadata
|
|
330
|
+
- Useful for A/B testing, model comparison, and feedback collection
|
|
130
331
|
"""
|
|
131
332
|
# Load agent schema: use header value from context or default
|
|
132
333
|
# Extract AgentContext first to get schema name
|
|
@@ -151,6 +352,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
151
352
|
new_messages=new_messages,
|
|
152
353
|
)
|
|
153
354
|
|
|
355
|
+
# Ensure session exists with metadata and eval mode if applicable
|
|
356
|
+
if context.session_id:
|
|
357
|
+
await ensure_session_with_metadata(
|
|
358
|
+
session_id=context.session_id,
|
|
359
|
+
user_id=context.user_id,
|
|
360
|
+
tenant_id=context.tenant_id,
|
|
361
|
+
is_eval=context.is_eval,
|
|
362
|
+
request_metadata=body.metadata,
|
|
363
|
+
agent_schema="simulator",
|
|
364
|
+
)
|
|
365
|
+
|
|
154
366
|
# Get the last user message as prompt
|
|
155
367
|
prompt = body.messages[-1].content if body.messages else "demo"
|
|
156
368
|
request_id = f"sim-{uuid.uuid4().hex[:24]}"
|
|
@@ -256,7 +468,7 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
256
468
|
detail=f"Agent schema '{schema_name}' not found and default schema unavailable",
|
|
257
469
|
)
|
|
258
470
|
|
|
259
|
-
logger.
|
|
471
|
+
logger.debug(f"Using agent schema: {schema_name}, model: {body.model}")
|
|
260
472
|
|
|
261
473
|
# Check for audio input
|
|
262
474
|
is_audio = request.headers.get("x-chat-is-audio", "").lower() == "true"
|
|
@@ -301,6 +513,17 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
301
513
|
|
|
302
514
|
logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
|
|
303
515
|
|
|
516
|
+
# Ensure session exists with metadata and eval mode if applicable
|
|
517
|
+
if context.session_id:
|
|
518
|
+
await ensure_session_with_metadata(
|
|
519
|
+
session_id=context.session_id,
|
|
520
|
+
user_id=context.user_id,
|
|
521
|
+
tenant_id=context.tenant_id,
|
|
522
|
+
is_eval=context.is_eval,
|
|
523
|
+
request_metadata=body.metadata,
|
|
524
|
+
agent_schema=schema_name,
|
|
525
|
+
)
|
|
526
|
+
|
|
304
527
|
# Create agent with schema and model override
|
|
305
528
|
agent = await create_agent(
|
|
306
529
|
context=context,
|
|
@@ -317,14 +540,60 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
317
540
|
|
|
318
541
|
# Streaming mode
|
|
319
542
|
if body.stream:
|
|
543
|
+
# Save user message before streaming starts
|
|
544
|
+
if settings.postgres.enabled and context.session_id:
|
|
545
|
+
user_message = {
|
|
546
|
+
"role": "user",
|
|
547
|
+
"content": body.messages[-1].content if body.messages else "",
|
|
548
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
549
|
+
}
|
|
550
|
+
try:
|
|
551
|
+
store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
|
|
552
|
+
await store.store_session_messages(
|
|
553
|
+
session_id=context.session_id,
|
|
554
|
+
messages=[user_message],
|
|
555
|
+
user_id=context.user_id,
|
|
556
|
+
compress=False, # User messages are typically short
|
|
557
|
+
)
|
|
558
|
+
logger.debug(f"Saved user message to session {context.session_id}")
|
|
559
|
+
except Exception as e:
|
|
560
|
+
logger.error(f"Failed to save user message: {e}", exc_info=True)
|
|
561
|
+
|
|
320
562
|
return StreamingResponse(
|
|
321
|
-
|
|
563
|
+
stream_openai_response_with_save(
|
|
564
|
+
agent=agent,
|
|
565
|
+
prompt=prompt,
|
|
566
|
+
model=body.model,
|
|
567
|
+
request_id=request_id,
|
|
568
|
+
agent_schema=schema_name,
|
|
569
|
+
session_id=context.session_id,
|
|
570
|
+
user_id=context.user_id,
|
|
571
|
+
),
|
|
322
572
|
media_type="text/event-stream",
|
|
323
573
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
|
324
574
|
)
|
|
325
575
|
|
|
326
576
|
# Non-streaming mode
|
|
327
|
-
|
|
577
|
+
# Create a parent span to capture trace context for message storage
|
|
578
|
+
trace_id, span_id = None, None
|
|
579
|
+
tracer = get_tracer()
|
|
580
|
+
|
|
581
|
+
if tracer:
|
|
582
|
+
with tracer.start_as_current_span(
|
|
583
|
+
"chat_completion",
|
|
584
|
+
attributes={
|
|
585
|
+
"session.id": context.session_id or "",
|
|
586
|
+
"user.id": context.user_id or "",
|
|
587
|
+
"model": body.model,
|
|
588
|
+
"agent.schema": context.agent_schema_uri or DEFAULT_AGENT_SCHEMA,
|
|
589
|
+
}
|
|
590
|
+
) as span:
|
|
591
|
+
# Capture trace context from the span we just created
|
|
592
|
+
trace_id, span_id = get_current_trace_context()
|
|
593
|
+
result = await agent.run(prompt)
|
|
594
|
+
else:
|
|
595
|
+
# No tracer available, run without tracing
|
|
596
|
+
result = await agent.run(prompt)
|
|
328
597
|
|
|
329
598
|
# Determine content format based on response_format request
|
|
330
599
|
if body.response_format and body.response_format.type == "json_object":
|
|
@@ -347,12 +616,16 @@ async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
|
347
616
|
"role": "user",
|
|
348
617
|
"content": body.messages[-1].content if body.messages else "",
|
|
349
618
|
"timestamp": datetime.utcnow().isoformat(),
|
|
619
|
+
"trace_id": trace_id,
|
|
620
|
+
"span_id": span_id,
|
|
350
621
|
}
|
|
351
622
|
|
|
352
623
|
assistant_message = {
|
|
353
624
|
"role": "assistant",
|
|
354
625
|
"content": content,
|
|
355
626
|
"timestamp": datetime.utcnow().isoformat(),
|
|
627
|
+
"trace_id": trace_id,
|
|
628
|
+
"span_id": span_id,
|
|
356
629
|
}
|
|
357
630
|
|
|
358
631
|
try:
|