remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +23 -3
- rem/agentic/mcp/tool_wrapper.py +126 -15
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +122 -43
- rem/agentic/schema.py +4 -1
- rem/api/mcp_router/tools.py +13 -2
- rem/api/routers/chat/completions.py +250 -4
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +35 -1
- rem/api/routers/feedback.py +134 -14
- rem/auth/middleware.py +66 -1
- rem/cli/commands/cluster.py +590 -82
- rem/cli/commands/configure.py +3 -4
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
- rem/services/session/compression.py +7 -0
- rem/settings.py +260 -17
- rem/sql/migrations/002_install_models.sql +91 -91
- rem/sql/migrations/004_cache_system.sql +1 -1
- rem/utils/README.md +45 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +94 -3
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +2 -1
- rem/workers/db_listener.py +579 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
rem/api/routers/chat/models.py
CHANGED
|
@@ -1,14 +1,38 @@
|
|
|
1
1
|
"""
|
|
2
2
|
OpenAI-compatible API models for chat completions.
|
|
3
3
|
|
|
4
|
-
Design Pattern
|
|
4
|
+
Design Pattern:
|
|
5
5
|
- Full OpenAI compatibility for drop-in replacement
|
|
6
6
|
- Support for streaming (SSE) and non-streaming modes
|
|
7
7
|
- Response format control (text vs json_object)
|
|
8
|
-
- Headers map to AgentContext
|
|
8
|
+
- Headers map to AgentContext for session/context control
|
|
9
|
+
- Body fields for OpenAI-compatible parameters + metadata
|
|
10
|
+
|
|
11
|
+
Headers (context control):
|
|
12
|
+
X-User-Id → context.user_id (user identifier)
|
|
13
|
+
X-Tenant-Id → context.tenant_id (multi-tenancy, default: "default")
|
|
14
|
+
X-Session-Id → context.session_id (conversation continuity)
|
|
15
|
+
X-Agent-Schema → context.agent_schema_uri (which agent to use, default: "rem")
|
|
16
|
+
X-Model-Name → context.default_model (model override)
|
|
17
|
+
X-Chat-Is-Audio → triggers audio transcription ("true"/"false")
|
|
18
|
+
X-Is-Eval → context.is_eval (marks session as evaluation, sets mode=EVALUATION)
|
|
19
|
+
|
|
20
|
+
Body Fields (OpenAI-compatible + extensions):
|
|
21
|
+
model → LLM model (e.g., "openai:gpt-4.1", "anthropic:claude-sonnet-4-5-20250929")
|
|
22
|
+
messages → Chat conversation history
|
|
23
|
+
temperature → Sampling temperature (0-2)
|
|
24
|
+
max_tokens → Max tokens (deprecated, use max_completion_tokens)
|
|
25
|
+
max_completion_tokens → Max tokens to generate
|
|
26
|
+
stream → Enable SSE streaming
|
|
27
|
+
metadata → Key-value pairs merged with session metadata (for evals/experiments)
|
|
28
|
+
store → Whether to store for distillation/evaluation
|
|
29
|
+
seed → Deterministic sampling seed
|
|
30
|
+
top_p → Nucleus sampling probability
|
|
31
|
+
reasoning_effort → low/medium/high for o-series models
|
|
32
|
+
service_tier → auto/flex/priority/default
|
|
9
33
|
"""
|
|
10
34
|
|
|
11
|
-
from typing import Literal
|
|
35
|
+
from typing import Any, Literal
|
|
12
36
|
|
|
13
37
|
from pydantic import BaseModel, Field
|
|
14
38
|
|
|
@@ -46,10 +70,17 @@ class ChatCompletionRequest(BaseModel):
|
|
|
46
70
|
Compatible with OpenAI's /v1/chat/completions endpoint.
|
|
47
71
|
|
|
48
72
|
Headers Map to AgentContext:
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
73
|
+
X-User-Id → context.user_id
|
|
74
|
+
X-Tenant-Id → context.tenant_id (default: "default")
|
|
75
|
+
X-Session-Id → context.session_id
|
|
76
|
+
X-Agent-Schema → context.agent_schema_uri (default: "rem")
|
|
77
|
+
X-Model-Name → context.default_model
|
|
78
|
+
X-Chat-Is-Audio → triggers audio transcription
|
|
79
|
+
X-Is-Eval → context.is_eval (sets session mode=EVALUATION)
|
|
80
|
+
|
|
81
|
+
Body Fields for Metadata/Evals:
|
|
82
|
+
metadata → Key-value pairs merged with session metadata
|
|
83
|
+
store → Whether to store for distillation/evaluation
|
|
53
84
|
|
|
54
85
|
Note: Model is specified in body.model (standard OpenAI field), not headers.
|
|
55
86
|
"""
|
|
@@ -73,6 +104,49 @@ class ChatCompletionRequest(BaseModel):
|
|
|
73
104
|
default=None,
|
|
74
105
|
description="Response format. Set type='json_object' to enable JSON mode.",
|
|
75
106
|
)
|
|
107
|
+
# Additional OpenAI-compatible fields
|
|
108
|
+
metadata: dict[str, str] | None = Field(
|
|
109
|
+
default=None,
|
|
110
|
+
description="Key-value pairs attached to the request (max 16 keys, 64/512 char limits). "
|
|
111
|
+
"Merged with session metadata for persistence.",
|
|
112
|
+
)
|
|
113
|
+
store: bool | None = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Whether to store for distillation/evaluation purposes.",
|
|
116
|
+
)
|
|
117
|
+
max_completion_tokens: int | None = Field(
|
|
118
|
+
default=None,
|
|
119
|
+
ge=1,
|
|
120
|
+
description="Max tokens to generate (replaces deprecated max_tokens).",
|
|
121
|
+
)
|
|
122
|
+
seed: int | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description="Seed for deterministic sampling (best effort).",
|
|
125
|
+
)
|
|
126
|
+
top_p: float | None = Field(
|
|
127
|
+
default=None,
|
|
128
|
+
ge=0,
|
|
129
|
+
le=1,
|
|
130
|
+
description="Nucleus sampling probability. Use temperature OR top_p, not both.",
|
|
131
|
+
)
|
|
132
|
+
logprobs: bool | None = Field(
|
|
133
|
+
default=None,
|
|
134
|
+
description="Whether to return log probabilities for output tokens.",
|
|
135
|
+
)
|
|
136
|
+
top_logprobs: int | None = Field(
|
|
137
|
+
default=None,
|
|
138
|
+
ge=0,
|
|
139
|
+
le=20,
|
|
140
|
+
description="Number of most likely tokens to return at each position (requires logprobs=true).",
|
|
141
|
+
)
|
|
142
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = Field(
|
|
143
|
+
default=None,
|
|
144
|
+
description="Reasoning effort for o-series models (low/medium/high).",
|
|
145
|
+
)
|
|
146
|
+
service_tier: Literal["auto", "flex", "priority", "default"] | None = Field(
|
|
147
|
+
default=None,
|
|
148
|
+
description="Service tier for processing (flex is 50% cheaper but slower).",
|
|
149
|
+
)
|
|
76
150
|
|
|
77
151
|
|
|
78
152
|
# Response models
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""OTEL utilities for chat routers."""
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_tracer():
|
|
7
|
+
"""Get the OpenTelemetry tracer for chat completions."""
|
|
8
|
+
try:
|
|
9
|
+
from opentelemetry import trace
|
|
10
|
+
return trace.get_tracer("rem.chat.completions")
|
|
11
|
+
except Exception:
|
|
12
|
+
return None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_current_trace_context() -> tuple[str | None, str | None]:
|
|
16
|
+
"""Get trace_id and span_id from current OTEL context.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
from opentelemetry import trace
|
|
23
|
+
|
|
24
|
+
span = trace.get_current_span()
|
|
25
|
+
ctx = span.get_span_context()
|
|
26
|
+
if ctx.is_valid:
|
|
27
|
+
trace_id = format(ctx.trace_id, '032x')
|
|
28
|
+
span_id = format(ctx.span_id, '016x')
|
|
29
|
+
return trace_id, span_id
|
|
30
|
+
except Exception as e:
|
|
31
|
+
logger.debug(f"Could not get trace context: {e}")
|
|
32
|
+
|
|
33
|
+
return None, None
|
|
@@ -321,7 +321,13 @@ class MetadataEvent(BaseModel):
|
|
|
321
321
|
# Agent info
|
|
322
322
|
agent_schema: str | None = Field(
|
|
323
323
|
default=None,
|
|
324
|
-
description="Name of the agent schema used for this response (e.g., 'rem', '
|
|
324
|
+
description="Name of the agent schema used for this response (e.g., 'rem', 'query-assistant')"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Session info
|
|
328
|
+
session_name: str | None = Field(
|
|
329
|
+
default=None,
|
|
330
|
+
description="Short 1-3 phrase name for the session topic (e.g., 'Prescription Drug Questions', 'AWS Setup Help')"
|
|
325
331
|
)
|
|
326
332
|
|
|
327
333
|
# Quality indicators
|
|
@@ -350,6 +356,16 @@ class MetadataEvent(BaseModel):
|
|
|
350
356
|
description="Token count for this response"
|
|
351
357
|
)
|
|
352
358
|
|
|
359
|
+
# Trace context for observability (deterministic, captured from OTEL)
|
|
360
|
+
trace_id: str | None = Field(
|
|
361
|
+
default=None,
|
|
362
|
+
description="OTEL trace ID for correlating with Phoenix/observability systems"
|
|
363
|
+
)
|
|
364
|
+
span_id: str | None = Field(
|
|
365
|
+
default=None,
|
|
366
|
+
description="OTEL span ID for correlating with Phoenix/observability systems"
|
|
367
|
+
)
|
|
368
|
+
|
|
353
369
|
# System flags
|
|
354
370
|
flags: list[str] | None = Field(
|
|
355
371
|
default=None,
|
|
@@ -47,6 +47,7 @@ from pydantic_ai.messages import (
|
|
|
47
47
|
ToolCallPart,
|
|
48
48
|
)
|
|
49
49
|
|
|
50
|
+
from .otel_utils import get_current_trace_context, get_tracer
|
|
50
51
|
from .models import (
|
|
51
52
|
ChatCompletionMessageDelta,
|
|
52
53
|
ChatCompletionStreamChoice,
|
|
@@ -73,6 +74,8 @@ async def stream_openai_response(
|
|
|
73
74
|
session_id: str | None = None,
|
|
74
75
|
# Agent info for metadata
|
|
75
76
|
agent_schema: str | None = None,
|
|
77
|
+
# Mutable container to capture trace context (deterministic, not AI-dependent)
|
|
78
|
+
trace_context_out: dict | None = None,
|
|
76
79
|
) -> AsyncGenerator[str, None]:
|
|
77
80
|
"""
|
|
78
81
|
Stream Pydantic AI agent responses with rich SSE events.
|
|
@@ -156,6 +159,14 @@ async def stream_openai_response(
|
|
|
156
159
|
|
|
157
160
|
# Use agent.iter() to get complete execution with tool calls
|
|
158
161
|
async with agent.iter(prompt) as agent_run:
|
|
162
|
+
# Capture trace context IMMEDIATELY inside agent execution
|
|
163
|
+
# This is deterministic - it's the OTEL context from Pydantic AI instrumentation
|
|
164
|
+
# NOT dependent on any AI-generated content
|
|
165
|
+
captured_trace_id, captured_span_id = get_current_trace_context()
|
|
166
|
+
if trace_context_out is not None:
|
|
167
|
+
trace_context_out["trace_id"] = captured_trace_id
|
|
168
|
+
trace_context_out["span_id"] = captured_span_id
|
|
169
|
+
|
|
159
170
|
async for node in agent_run:
|
|
160
171
|
# Check if this is a model request node (includes tool calls)
|
|
161
172
|
if Agent.is_model_request_node(node):
|
|
@@ -366,6 +377,8 @@ async def stream_openai_response(
|
|
|
366
377
|
registered_sources = result_content.get("sources")
|
|
367
378
|
registered_references = result_content.get("references")
|
|
368
379
|
registered_flags = result_content.get("flags")
|
|
380
|
+
# Session naming
|
|
381
|
+
registered_session_name = result_content.get("session_name")
|
|
369
382
|
# Risk assessment fields
|
|
370
383
|
registered_risk_level = result_content.get("risk_level")
|
|
371
384
|
registered_risk_score = result_content.get("risk_score")
|
|
@@ -376,6 +389,7 @@ async def stream_openai_response(
|
|
|
376
389
|
|
|
377
390
|
logger.info(
|
|
378
391
|
f"📊 Metadata registered: confidence={registered_confidence}, "
|
|
392
|
+
f"session_name={registered_session_name}, "
|
|
379
393
|
f"risk_level={registered_risk_level}, sources={registered_sources}"
|
|
380
394
|
)
|
|
381
395
|
|
|
@@ -398,6 +412,7 @@ async def stream_openai_response(
|
|
|
398
412
|
in_reply_to=in_reply_to,
|
|
399
413
|
session_id=session_id,
|
|
400
414
|
agent_schema=agent_schema,
|
|
415
|
+
session_name=registered_session_name,
|
|
401
416
|
confidence=registered_confidence,
|
|
402
417
|
sources=registered_sources,
|
|
403
418
|
model_version=model,
|
|
@@ -528,6 +543,9 @@ async def stream_openai_response(
|
|
|
528
543
|
model_version=model,
|
|
529
544
|
latency_ms=latency_ms,
|
|
530
545
|
token_count=token_count,
|
|
546
|
+
# Include deterministic trace context captured from OTEL
|
|
547
|
+
trace_id=captured_trace_id,
|
|
548
|
+
span_id=captured_span_id,
|
|
531
549
|
))
|
|
532
550
|
|
|
533
551
|
# Mark all progress complete
|
|
@@ -699,6 +717,14 @@ async def stream_openai_response_with_save(
|
|
|
699
717
|
from ....services.session import SessionMessageStore
|
|
700
718
|
from ....settings import settings
|
|
701
719
|
|
|
720
|
+
# Pre-generate message_id so it can be sent in metadata event
|
|
721
|
+
# This allows frontend to use it for feedback before DB persistence
|
|
722
|
+
message_id = str(uuid.uuid4())
|
|
723
|
+
|
|
724
|
+
# Mutable container for capturing trace context from inside agent execution
|
|
725
|
+
# This is deterministic - captured from OTEL instrumentation, not AI-generated
|
|
726
|
+
trace_context: dict = {}
|
|
727
|
+
|
|
702
728
|
# Accumulate content during streaming
|
|
703
729
|
accumulated_content = []
|
|
704
730
|
|
|
@@ -709,6 +735,8 @@ async def stream_openai_response_with_save(
|
|
|
709
735
|
request_id=request_id,
|
|
710
736
|
agent_schema=agent_schema,
|
|
711
737
|
session_id=session_id,
|
|
738
|
+
message_id=message_id,
|
|
739
|
+
trace_context_out=trace_context, # Pass container to capture trace IDs
|
|
712
740
|
):
|
|
713
741
|
yield chunk
|
|
714
742
|
|
|
@@ -730,10 +758,16 @@ async def stream_openai_response_with_save(
|
|
|
730
758
|
# After streaming completes, save the assistant response
|
|
731
759
|
if settings.postgres.enabled and session_id and accumulated_content:
|
|
732
760
|
full_content = "".join(accumulated_content)
|
|
761
|
+
# Get captured trace context from container (deterministically captured inside agent execution)
|
|
762
|
+
captured_trace_id = trace_context.get("trace_id")
|
|
763
|
+
captured_span_id = trace_context.get("span_id")
|
|
733
764
|
assistant_message = {
|
|
765
|
+
"id": message_id, # Use pre-generated ID for consistency with metadata event
|
|
734
766
|
"role": "assistant",
|
|
735
767
|
"content": full_content,
|
|
736
768
|
"timestamp": to_iso(utc_now()),
|
|
769
|
+
"trace_id": captured_trace_id,
|
|
770
|
+
"span_id": captured_span_id,
|
|
737
771
|
}
|
|
738
772
|
try:
|
|
739
773
|
store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
|
|
@@ -743,6 +777,6 @@ async def stream_openai_response_with_save(
|
|
|
743
777
|
user_id=user_id,
|
|
744
778
|
compress=True, # Compress long assistant responses
|
|
745
779
|
)
|
|
746
|
-
logger.debug(f"Saved assistant response to session {session_id} ({len(full_content)} chars)")
|
|
780
|
+
logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
|
|
747
781
|
except Exception as e:
|
|
748
782
|
logger.error(f"Failed to save assistant response: {e}", exc_info=True)
|
rem/api/routers/feedback.py
CHANGED
|
@@ -7,16 +7,64 @@ Endpoints:
|
|
|
7
7
|
POST /api/v1/messages/feedback - Submit feedback on a message
|
|
8
8
|
|
|
9
9
|
Trace Integration:
|
|
10
|
-
- Feedback
|
|
11
|
-
- Phoenix sync attaches feedback as span annotations
|
|
10
|
+
- Feedback auto-resolves trace_id/span_id from the message in the database
|
|
11
|
+
- Phoenix sync attaches feedback as span annotations when trace info is available
|
|
12
|
+
|
|
13
|
+
HTTP Status Codes:
|
|
14
|
+
- 201: Feedback saved AND synced to Phoenix as annotation (phoenix_synced=true)
|
|
15
|
+
- 200: Feedback accepted and saved to DB, but NOT synced to Phoenix
|
|
16
|
+
(missing trace_id/span_id, Phoenix disabled, or sync failed)
|
|
17
|
+
|
|
18
|
+
IMPORTANT - Testing Requirements:
|
|
19
|
+
╔════════════════════════════════════════════════════════════════════════════════════════════════════╗
|
|
20
|
+
║ 1. Use 'rem' agent (NOT 'simulator') - only real agents capture traces ║
|
|
21
|
+
║ 2. Session IDs MUST be UUIDs - use python3 -c "import uuid; print(uuid.uuid4())" ║
|
|
22
|
+
║ 3. Port-forward OTEL collector: kubectl port-forward -n observability ║
|
|
23
|
+
║ svc/otel-collector-collector 4318:4318 ║
|
|
24
|
+
║ 4. Port-forward Phoenix: kubectl port-forward -n siggy svc/phoenix 6006:6006 ║
|
|
25
|
+
║ 5. Set environment variables when starting the API: ║
|
|
26
|
+
║ OTEL__ENABLED=true PHOENIX__ENABLED=true PHOENIX_API_KEY=<jwt> uvicorn ... ║
|
|
27
|
+
║ 6. Get PHOENIX_API_KEY: ║
|
|
28
|
+
║ kubectl get secret -n siggy rem-phoenix-api-key -o jsonpath='{.data.PHOENIX_API_KEY}' ║
|
|
29
|
+
║ | base64 -d ║
|
|
30
|
+
╚════════════════════════════════════════════════════════════════════════════════════════════════════╝
|
|
31
|
+
|
|
32
|
+
Usage:
|
|
33
|
+
# 1. Send a chat message with X-Session-Id header (MUST be UUID!)
|
|
34
|
+
SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
|
|
35
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \\
|
|
36
|
+
-H "Content-Type: application/json" \\
|
|
37
|
+
-H "X-Session-Id: $SESSION_ID" \\
|
|
38
|
+
-H "X-Agent-Schema: rem" \\
|
|
39
|
+
-d '{"messages": [{"role": "user", "content": "hello"}], "stream": true}'
|
|
40
|
+
|
|
41
|
+
# 2. Extract message_id from the 'metadata' SSE event:
|
|
42
|
+
# event: metadata
|
|
43
|
+
# data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
|
|
44
|
+
|
|
45
|
+
# 3. Submit feedback referencing that message (trace_id auto-resolved from DB)
|
|
46
|
+
curl -X POST http://localhost:8000/api/v1/messages/feedback \\
|
|
47
|
+
-H "Content-Type: application/json" \\
|
|
48
|
+
-H "X-Tenant-Id: default" \\
|
|
49
|
+
-d '{
|
|
50
|
+
"session_id": "'$SESSION_ID'",
|
|
51
|
+
"message_id": "<message-id-from-metadata>",
|
|
52
|
+
"rating": 1,
|
|
53
|
+
"categories": ["helpful"],
|
|
54
|
+
"comment": "Great response!"
|
|
55
|
+
}'
|
|
56
|
+
|
|
57
|
+
# 4. Check response:
|
|
58
|
+
# - 201 + phoenix_synced=true = annotation synced to Phoenix (check Phoenix UI at :6006)
|
|
59
|
+
# - 200 + phoenix_synced=false = feedback saved but not synced (missing trace info)
|
|
12
60
|
"""
|
|
13
61
|
|
|
14
|
-
from fastapi import APIRouter, Header, HTTPException, Request
|
|
62
|
+
from fastapi import APIRouter, Header, HTTPException, Request, Response
|
|
15
63
|
from loguru import logger
|
|
16
64
|
from pydantic import BaseModel, Field
|
|
17
65
|
|
|
18
66
|
from ..deps import get_user_id_from_request
|
|
19
|
-
from ...models.entities import Feedback
|
|
67
|
+
from ...models.entities import Feedback
|
|
20
68
|
from ...services.postgres import Repository
|
|
21
69
|
from ...settings import settings
|
|
22
70
|
|
|
@@ -73,9 +121,10 @@ class FeedbackResponse(BaseModel):
|
|
|
73
121
|
# =============================================================================
|
|
74
122
|
|
|
75
123
|
|
|
76
|
-
@router.post("/messages/feedback", response_model=FeedbackResponse
|
|
124
|
+
@router.post("/messages/feedback", response_model=FeedbackResponse)
|
|
77
125
|
async def submit_feedback(
|
|
78
126
|
request: Request,
|
|
127
|
+
response: Response,
|
|
79
128
|
request_body: FeedbackCreateRequest,
|
|
80
129
|
x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
|
|
81
130
|
) -> FeedbackResponse:
|
|
@@ -89,8 +138,12 @@ async def submit_feedback(
|
|
|
89
138
|
- Provided explicitly in the request
|
|
90
139
|
- Auto-resolved from the message if message_id is provided
|
|
91
140
|
|
|
141
|
+
HTTP Status Codes:
|
|
142
|
+
- 201: Feedback saved AND synced to Phoenix (phoenix_synced=true)
|
|
143
|
+
- 200: Feedback accepted but NOT synced (missing trace info, disabled, or failed)
|
|
144
|
+
|
|
92
145
|
Returns:
|
|
93
|
-
Created feedback object
|
|
146
|
+
Created feedback object with phoenix_synced indicating sync status
|
|
94
147
|
"""
|
|
95
148
|
if not settings.postgres.enabled:
|
|
96
149
|
raise HTTPException(status_code=503, detail="Database not enabled")
|
|
@@ -102,11 +155,44 @@ async def submit_feedback(
|
|
|
102
155
|
span_id = request_body.span_id
|
|
103
156
|
|
|
104
157
|
if request_body.message_id and (not trace_id or not span_id):
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
158
|
+
# Look up message by ID to get trace context
|
|
159
|
+
# Note: Messages are stored with tenant_id=user_id (not x_tenant_id header)
|
|
160
|
+
# so we query by ID only - UUIDs are globally unique
|
|
161
|
+
from ...services.postgres import PostgresService
|
|
162
|
+
import uuid
|
|
163
|
+
|
|
164
|
+
logger.info(f"Looking up trace context for message_id={request_body.message_id}")
|
|
165
|
+
|
|
166
|
+
# Convert message_id string to UUID for database query
|
|
167
|
+
try:
|
|
168
|
+
message_uuid = uuid.UUID(request_body.message_id)
|
|
169
|
+
except ValueError as e:
|
|
170
|
+
logger.warning(f"Invalid message_id format '{request_body.message_id}': {e}")
|
|
171
|
+
message_uuid = None
|
|
172
|
+
|
|
173
|
+
if message_uuid:
|
|
174
|
+
db = PostgresService()
|
|
175
|
+
# Ensure connection (same pattern as Repository)
|
|
176
|
+
if not db.pool:
|
|
177
|
+
await db.connect()
|
|
178
|
+
|
|
179
|
+
if db.pool:
|
|
180
|
+
query = """
|
|
181
|
+
SELECT trace_id, span_id FROM messages
|
|
182
|
+
WHERE id = $1 AND deleted_at IS NULL
|
|
183
|
+
LIMIT 1
|
|
184
|
+
"""
|
|
185
|
+
async with db.pool.acquire() as conn:
|
|
186
|
+
row = await conn.fetchrow(query, message_uuid)
|
|
187
|
+
logger.info(f"Database query result for message {request_body.message_id}: row={row}")
|
|
188
|
+
if row:
|
|
189
|
+
trace_id = trace_id or row["trace_id"]
|
|
190
|
+
span_id = span_id or row["span_id"]
|
|
191
|
+
logger.info(f"Found trace context for message {request_body.message_id}: trace_id={trace_id}, span_id={span_id}")
|
|
192
|
+
else:
|
|
193
|
+
logger.warning(f"No message found in database with id={request_body.message_id}")
|
|
194
|
+
else:
|
|
195
|
+
logger.warning(f"Database pool not available for message lookup after connect attempt")
|
|
110
196
|
|
|
111
197
|
feedback = Feedback(
|
|
112
198
|
session_id=request_body.session_id,
|
|
@@ -130,9 +216,43 @@ async def submit_feedback(
|
|
|
130
216
|
f"message={request_body.message_id}, rating={request_body.rating}"
|
|
131
217
|
)
|
|
132
218
|
|
|
133
|
-
#
|
|
134
|
-
|
|
135
|
-
|
|
219
|
+
# Sync to Phoenix if trace_id/span_id available and Phoenix is enabled
|
|
220
|
+
phoenix_synced = False
|
|
221
|
+
phoenix_annotation_id = None
|
|
222
|
+
|
|
223
|
+
if trace_id and span_id and settings.phoenix.enabled:
|
|
224
|
+
try:
|
|
225
|
+
from ...services.phoenix import PhoenixClient
|
|
226
|
+
|
|
227
|
+
phoenix_client = PhoenixClient()
|
|
228
|
+
phoenix_annotation_id = phoenix_client.sync_user_feedback(
|
|
229
|
+
span_id=span_id,
|
|
230
|
+
rating=request_body.rating,
|
|
231
|
+
categories=request_body.categories,
|
|
232
|
+
comment=request_body.comment,
|
|
233
|
+
feedback_id=str(result.id),
|
|
234
|
+
trace_id=trace_id,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if phoenix_annotation_id:
|
|
238
|
+
phoenix_synced = True
|
|
239
|
+
# Update the feedback record with sync status
|
|
240
|
+
result.phoenix_synced = True
|
|
241
|
+
result.phoenix_annotation_id = phoenix_annotation_id
|
|
242
|
+
await repo.upsert(result)
|
|
243
|
+
logger.info(f"Feedback synced to Phoenix: annotation_id={phoenix_annotation_id}")
|
|
244
|
+
else:
|
|
245
|
+
logger.warning(f"Phoenix sync returned no annotation ID for feedback {result.id}")
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.error(f"Failed to sync feedback to Phoenix: {e}")
|
|
249
|
+
# Don't fail the request if Phoenix sync fails
|
|
250
|
+
elif trace_id and span_id:
|
|
251
|
+
logger.debug(f"Feedback has trace info but Phoenix disabled: trace={trace_id}, span={span_id}")
|
|
252
|
+
|
|
253
|
+
# Set HTTP status code based on Phoenix sync result
|
|
254
|
+
# 201 = synced to Phoenix, 200 = accepted but not synced
|
|
255
|
+
response.status_code = 201 if phoenix_synced else 200
|
|
136
256
|
|
|
137
257
|
return FeedbackResponse(
|
|
138
258
|
id=str(result.id),
|
rem/auth/middleware.py
CHANGED
|
@@ -6,6 +6,7 @@ Supports anonymous access with rate limiting when allow_anonymous=True.
|
|
|
6
6
|
MCP endpoints are always protected unless explicitly disabled.
|
|
7
7
|
|
|
8
8
|
Design Pattern:
|
|
9
|
+
- Check X-API-Key header first (if API key auth enabled)
|
|
9
10
|
- Check session for user on protected paths
|
|
10
11
|
- Check Bearer token for dev token (non-production only)
|
|
11
12
|
- MCP paths always require authentication (protected service)
|
|
@@ -20,6 +21,12 @@ Access Modes (configured in settings.auth):
|
|
|
20
21
|
- mcp_requires_auth=true (default): MCP always requires login regardless of allow_anonymous
|
|
21
22
|
- mcp_requires_auth=false: MCP follows normal allow_anonymous rules (dev only)
|
|
22
23
|
|
|
24
|
+
API Key Authentication (configured in settings.api):
|
|
25
|
+
- api_key_enabled=true: Require X-API-Key header for protected endpoints
|
|
26
|
+
- api_key: The secret key to validate against
|
|
27
|
+
- Provides simple programmatic access without OAuth flow
|
|
28
|
+
- X-API-Key header takes precedence over session auth
|
|
29
|
+
|
|
23
30
|
Dev Token Support (non-production only):
|
|
24
31
|
- GET /api/auth/dev/token returns a Bearer token for test-user
|
|
25
32
|
- Include as: Authorization: Bearer dev_<signature>
|
|
@@ -82,6 +89,39 @@ class AuthMiddleware(BaseHTTPMiddleware):
|
|
|
82
89
|
self.mcp_requires_auth = mcp_requires_auth
|
|
83
90
|
self.mcp_path = mcp_path
|
|
84
91
|
|
|
92
|
+
def _check_api_key(self, request: Request) -> dict | None:
|
|
93
|
+
"""
|
|
94
|
+
Check for valid X-API-Key header.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
API key user dict if valid, None otherwise
|
|
98
|
+
"""
|
|
99
|
+
# Only check if API key auth is enabled
|
|
100
|
+
if not settings.api.api_key_enabled:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
# Check for X-API-Key header
|
|
104
|
+
api_key = request.headers.get("x-api-key")
|
|
105
|
+
if not api_key:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
# Validate against configured API key
|
|
109
|
+
if settings.api.api_key and api_key == settings.api.api_key:
|
|
110
|
+
logger.debug("X-API-Key authenticated")
|
|
111
|
+
return {
|
|
112
|
+
"id": "api-key-user",
|
|
113
|
+
"email": "api@rem.local",
|
|
114
|
+
"name": "API Key User",
|
|
115
|
+
"provider": "api-key",
|
|
116
|
+
"tenant_id": "default",
|
|
117
|
+
"tier": "pro", # API key users get full access
|
|
118
|
+
"roles": ["user"],
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Invalid API key
|
|
122
|
+
logger.warning("Invalid X-API-Key provided")
|
|
123
|
+
return None
|
|
124
|
+
|
|
85
125
|
def _check_dev_token(self, request: Request) -> dict | None:
|
|
86
126
|
"""
|
|
87
127
|
Check for valid dev token in Authorization header (non-production only).
|
|
@@ -105,7 +145,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
|
|
|
105
145
|
# Verify dev token
|
|
106
146
|
from ..api.routers.dev import verify_dev_token
|
|
107
147
|
if verify_dev_token(token):
|
|
108
|
-
logger.debug(
|
|
148
|
+
logger.debug("Dev token authenticated as test-user")
|
|
109
149
|
return {
|
|
110
150
|
"id": "test-user",
|
|
111
151
|
"email": "test@rem.local",
|
|
@@ -142,6 +182,31 @@ class AuthMiddleware(BaseHTTPMiddleware):
|
|
|
142
182
|
if not is_protected or is_excluded:
|
|
143
183
|
return await call_next(request)
|
|
144
184
|
|
|
185
|
+
# Check for X-API-Key header first (if enabled)
|
|
186
|
+
api_key_user = self._check_api_key(request)
|
|
187
|
+
if api_key_user:
|
|
188
|
+
request.state.user = api_key_user
|
|
189
|
+
request.state.is_anonymous = False
|
|
190
|
+
return await call_next(request)
|
|
191
|
+
|
|
192
|
+
# If API key auth is enabled but no valid key provided, reject immediately
|
|
193
|
+
if settings.api.api_key_enabled:
|
|
194
|
+
# Check if X-API-Key header was provided but invalid
|
|
195
|
+
if request.headers.get("x-api-key"):
|
|
196
|
+
logger.warning(f"Invalid X-API-Key for: {path}")
|
|
197
|
+
return JSONResponse(
|
|
198
|
+
status_code=401,
|
|
199
|
+
content={"detail": "Invalid API key"},
|
|
200
|
+
headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
|
|
201
|
+
)
|
|
202
|
+
# No API key provided when required
|
|
203
|
+
logger.debug(f"Missing X-API-Key for: {path}")
|
|
204
|
+
return JSONResponse(
|
|
205
|
+
status_code=401,
|
|
206
|
+
content={"detail": "API key required. Include X-API-Key header."},
|
|
207
|
+
headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
|
|
208
|
+
)
|
|
209
|
+
|
|
145
210
|
# Check for dev token (non-production only)
|
|
146
211
|
dev_user = self._check_dev_token(request)
|
|
147
212
|
if dev_user:
|