remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (44) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +126 -15
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +122 -43
  7. rem/agentic/schema.py +4 -1
  8. rem/api/mcp_router/tools.py +13 -2
  9. rem/api/routers/chat/completions.py +250 -4
  10. rem/api/routers/chat/models.py +81 -7
  11. rem/api/routers/chat/otel_utils.py +33 -0
  12. rem/api/routers/chat/sse_events.py +17 -1
  13. rem/api/routers/chat/streaming.py +35 -1
  14. rem/api/routers/feedback.py +134 -14
  15. rem/auth/middleware.py +66 -1
  16. rem/cli/commands/cluster.py +590 -82
  17. rem/cli/commands/configure.py +3 -4
  18. rem/cli/commands/experiments.py +468 -76
  19. rem/cli/commands/session.py +336 -0
  20. rem/cli/dreaming.py +2 -2
  21. rem/cli/main.py +2 -0
  22. rem/config.py +8 -1
  23. rem/models/core/experiment.py +58 -14
  24. rem/models/entities/ontology.py +1 -1
  25. rem/models/entities/ontology_config.py +1 -1
  26. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  27. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  28. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  29. rem/services/phoenix/client.py +59 -18
  30. rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
  31. rem/services/session/compression.py +7 -0
  32. rem/settings.py +260 -17
  33. rem/sql/migrations/002_install_models.sql +91 -91
  34. rem/sql/migrations/004_cache_system.sql +1 -1
  35. rem/utils/README.md +45 -0
  36. rem/utils/files.py +157 -1
  37. rem/utils/schema_loader.py +94 -3
  38. rem/utils/vision.py +1 -1
  39. rem/workers/__init__.py +2 -1
  40. rem/workers/db_listener.py +579 -0
  41. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
  42. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
  43. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
  44. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,38 @@
1
1
  """
2
2
  OpenAI-compatible API models for chat completions.
3
3
 
4
- Design Pattern
4
+ Design Pattern:
5
5
  - Full OpenAI compatibility for drop-in replacement
6
6
  - Support for streaming (SSE) and non-streaming modes
7
7
  - Response format control (text vs json_object)
8
- - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Agent-Schema, etc.)
8
+ - Headers map to AgentContext for session/context control
9
+ - Body fields for OpenAI-compatible parameters + metadata
10
+
11
+ Headers (context control):
12
+ X-User-Id → context.user_id (user identifier)
13
+ X-Tenant-Id → context.tenant_id (multi-tenancy, default: "default")
14
+ X-Session-Id → context.session_id (conversation continuity)
15
+ X-Agent-Schema → context.agent_schema_uri (which agent to use, default: "rem")
16
+ X-Model-Name → context.default_model (model override)
17
+ X-Chat-Is-Audio → triggers audio transcription ("true"/"false")
18
+ X-Is-Eval → context.is_eval (marks session as evaluation, sets mode=EVALUATION)
19
+
20
+ Body Fields (OpenAI-compatible + extensions):
21
+ model → LLM model (e.g., "openai:gpt-4.1", "anthropic:claude-sonnet-4-5-20250929")
22
+ messages → Chat conversation history
23
+ temperature → Sampling temperature (0-2)
24
+ max_tokens → Max tokens (deprecated, use max_completion_tokens)
25
+ max_completion_tokens → Max tokens to generate
26
+ stream → Enable SSE streaming
27
+ metadata → Key-value pairs merged with session metadata (for evals/experiments)
28
+ store → Whether to store for distillation/evaluation
29
+ seed → Deterministic sampling seed
30
+ top_p → Nucleus sampling probability
31
+ reasoning_effort → low/medium/high for o-series models
32
+ service_tier → auto/flex/priority/default
9
33
  """
10
34
 
11
- from typing import Literal
35
+ from typing import Any, Literal
12
36
 
13
37
  from pydantic import BaseModel, Field
14
38
 
@@ -46,10 +70,17 @@ class ChatCompletionRequest(BaseModel):
46
70
  Compatible with OpenAI's /v1/chat/completions endpoint.
47
71
 
48
72
  Headers Map to AgentContext:
49
- - X-User-Id → context.user_id
50
- - X-Tenant-Id → context.tenant_id
51
- - X-Session-Id → context.session_id
52
- - X-Agent-Schema → context.agent_schema_uri
73
+ X-User-Id → context.user_id
74
+ X-Tenant-Id → context.tenant_id (default: "default")
75
+ X-Session-Id → context.session_id
76
+ X-Agent-Schema → context.agent_schema_uri (default: "rem")
77
+ X-Model-Name → context.default_model
78
+ X-Chat-Is-Audio → triggers audio transcription
79
+ X-Is-Eval → context.is_eval (sets session mode=EVALUATION)
80
+
81
+ Body Fields for Metadata/Evals:
82
+ metadata → Key-value pairs merged with session metadata
83
+ store → Whether to store for distillation/evaluation
53
84
 
54
85
  Note: Model is specified in body.model (standard OpenAI field), not headers.
55
86
  """
@@ -73,6 +104,49 @@ class ChatCompletionRequest(BaseModel):
73
104
  default=None,
74
105
  description="Response format. Set type='json_object' to enable JSON mode.",
75
106
  )
107
+ # Additional OpenAI-compatible fields
108
+ metadata: dict[str, str] | None = Field(
109
+ default=None,
110
+ description="Key-value pairs attached to the request (max 16 keys, 64/512 char limits). "
111
+ "Merged with session metadata for persistence.",
112
+ )
113
+ store: bool | None = Field(
114
+ default=None,
115
+ description="Whether to store for distillation/evaluation purposes.",
116
+ )
117
+ max_completion_tokens: int | None = Field(
118
+ default=None,
119
+ ge=1,
120
+ description="Max tokens to generate (replaces deprecated max_tokens).",
121
+ )
122
+ seed: int | None = Field(
123
+ default=None,
124
+ description="Seed for deterministic sampling (best effort).",
125
+ )
126
+ top_p: float | None = Field(
127
+ default=None,
128
+ ge=0,
129
+ le=1,
130
+ description="Nucleus sampling probability. Use temperature OR top_p, not both.",
131
+ )
132
+ logprobs: bool | None = Field(
133
+ default=None,
134
+ description="Whether to return log probabilities for output tokens.",
135
+ )
136
+ top_logprobs: int | None = Field(
137
+ default=None,
138
+ ge=0,
139
+ le=20,
140
+ description="Number of most likely tokens to return at each position (requires logprobs=true).",
141
+ )
142
+ reasoning_effort: Literal["low", "medium", "high"] | None = Field(
143
+ default=None,
144
+ description="Reasoning effort for o-series models (low/medium/high).",
145
+ )
146
+ service_tier: Literal["auto", "flex", "priority", "default"] | None = Field(
147
+ default=None,
148
+ description="Service tier for processing (flex is 50% cheaper but slower).",
149
+ )
76
150
 
77
151
 
78
152
  # Response models
@@ -0,0 +1,33 @@
1
+ """OTEL utilities for chat routers."""
2
+
3
+ from loguru import logger
4
+
5
+
6
+ def get_tracer():
7
+ """Get the OpenTelemetry tracer for chat completions."""
8
+ try:
9
+ from opentelemetry import trace
10
+ return trace.get_tracer("rem.chat.completions")
11
+ except Exception:
12
+ return None
13
+
14
+
15
+ def get_current_trace_context() -> tuple[str | None, str | None]:
16
+ """Get trace_id and span_id from current OTEL context.
17
+
18
+ Returns:
19
+ Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
20
+ """
21
+ try:
22
+ from opentelemetry import trace
23
+
24
+ span = trace.get_current_span()
25
+ ctx = span.get_span_context()
26
+ if ctx.is_valid:
27
+ trace_id = format(ctx.trace_id, '032x')
28
+ span_id = format(ctx.span_id, '016x')
29
+ return trace_id, span_id
30
+ except Exception as e:
31
+ logger.debug(f"Could not get trace context: {e}")
32
+
33
+ return None, None
@@ -321,7 +321,13 @@ class MetadataEvent(BaseModel):
321
321
  # Agent info
322
322
  agent_schema: str | None = Field(
323
323
  default=None,
324
- description="Name of the agent schema used for this response (e.g., 'rem', 'Siggy')"
324
+ description="Name of the agent schema used for this response (e.g., 'rem', 'query-assistant')"
325
+ )
326
+
327
+ # Session info
328
+ session_name: str | None = Field(
329
+ default=None,
330
+ description="Short 1-3 phrase name for the session topic (e.g., 'Prescription Drug Questions', 'AWS Setup Help')"
325
331
  )
326
332
 
327
333
  # Quality indicators
@@ -350,6 +356,16 @@ class MetadataEvent(BaseModel):
350
356
  description="Token count for this response"
351
357
  )
352
358
 
359
+ # Trace context for observability (deterministic, captured from OTEL)
360
+ trace_id: str | None = Field(
361
+ default=None,
362
+ description="OTEL trace ID for correlating with Phoenix/observability systems"
363
+ )
364
+ span_id: str | None = Field(
365
+ default=None,
366
+ description="OTEL span ID for correlating with Phoenix/observability systems"
367
+ )
368
+
353
369
  # System flags
354
370
  flags: list[str] | None = Field(
355
371
  default=None,
@@ -47,6 +47,7 @@ from pydantic_ai.messages import (
47
47
  ToolCallPart,
48
48
  )
49
49
 
50
+ from .otel_utils import get_current_trace_context, get_tracer
50
51
  from .models import (
51
52
  ChatCompletionMessageDelta,
52
53
  ChatCompletionStreamChoice,
@@ -73,6 +74,8 @@ async def stream_openai_response(
73
74
  session_id: str | None = None,
74
75
  # Agent info for metadata
75
76
  agent_schema: str | None = None,
77
+ # Mutable container to capture trace context (deterministic, not AI-dependent)
78
+ trace_context_out: dict | None = None,
76
79
  ) -> AsyncGenerator[str, None]:
77
80
  """
78
81
  Stream Pydantic AI agent responses with rich SSE events.
@@ -156,6 +159,14 @@ async def stream_openai_response(
156
159
 
157
160
  # Use agent.iter() to get complete execution with tool calls
158
161
  async with agent.iter(prompt) as agent_run:
162
+ # Capture trace context IMMEDIATELY inside agent execution
163
+ # This is deterministic - it's the OTEL context from Pydantic AI instrumentation
164
+ # NOT dependent on any AI-generated content
165
+ captured_trace_id, captured_span_id = get_current_trace_context()
166
+ if trace_context_out is not None:
167
+ trace_context_out["trace_id"] = captured_trace_id
168
+ trace_context_out["span_id"] = captured_span_id
169
+
159
170
  async for node in agent_run:
160
171
  # Check if this is a model request node (includes tool calls)
161
172
  if Agent.is_model_request_node(node):
@@ -366,6 +377,8 @@ async def stream_openai_response(
366
377
  registered_sources = result_content.get("sources")
367
378
  registered_references = result_content.get("references")
368
379
  registered_flags = result_content.get("flags")
380
+ # Session naming
381
+ registered_session_name = result_content.get("session_name")
369
382
  # Risk assessment fields
370
383
  registered_risk_level = result_content.get("risk_level")
371
384
  registered_risk_score = result_content.get("risk_score")
@@ -376,6 +389,7 @@ async def stream_openai_response(
376
389
 
377
390
  logger.info(
378
391
  f"📊 Metadata registered: confidence={registered_confidence}, "
392
+ f"session_name={registered_session_name}, "
379
393
  f"risk_level={registered_risk_level}, sources={registered_sources}"
380
394
  )
381
395
 
@@ -398,6 +412,7 @@ async def stream_openai_response(
398
412
  in_reply_to=in_reply_to,
399
413
  session_id=session_id,
400
414
  agent_schema=agent_schema,
415
+ session_name=registered_session_name,
401
416
  confidence=registered_confidence,
402
417
  sources=registered_sources,
403
418
  model_version=model,
@@ -528,6 +543,9 @@ async def stream_openai_response(
528
543
  model_version=model,
529
544
  latency_ms=latency_ms,
530
545
  token_count=token_count,
546
+ # Include deterministic trace context captured from OTEL
547
+ trace_id=captured_trace_id,
548
+ span_id=captured_span_id,
531
549
  ))
532
550
 
533
551
  # Mark all progress complete
@@ -699,6 +717,14 @@ async def stream_openai_response_with_save(
699
717
  from ....services.session import SessionMessageStore
700
718
  from ....settings import settings
701
719
 
720
+ # Pre-generate message_id so it can be sent in metadata event
721
+ # This allows frontend to use it for feedback before DB persistence
722
+ message_id = str(uuid.uuid4())
723
+
724
+ # Mutable container for capturing trace context from inside agent execution
725
+ # This is deterministic - captured from OTEL instrumentation, not AI-generated
726
+ trace_context: dict = {}
727
+
702
728
  # Accumulate content during streaming
703
729
  accumulated_content = []
704
730
 
@@ -709,6 +735,8 @@ async def stream_openai_response_with_save(
709
735
  request_id=request_id,
710
736
  agent_schema=agent_schema,
711
737
  session_id=session_id,
738
+ message_id=message_id,
739
+ trace_context_out=trace_context, # Pass container to capture trace IDs
712
740
  ):
713
741
  yield chunk
714
742
 
@@ -730,10 +758,16 @@ async def stream_openai_response_with_save(
730
758
  # After streaming completes, save the assistant response
731
759
  if settings.postgres.enabled and session_id and accumulated_content:
732
760
  full_content = "".join(accumulated_content)
761
+ # Get captured trace context from container (deterministically captured inside agent execution)
762
+ captured_trace_id = trace_context.get("trace_id")
763
+ captured_span_id = trace_context.get("span_id")
733
764
  assistant_message = {
765
+ "id": message_id, # Use pre-generated ID for consistency with metadata event
734
766
  "role": "assistant",
735
767
  "content": full_content,
736
768
  "timestamp": to_iso(utc_now()),
769
+ "trace_id": captured_trace_id,
770
+ "span_id": captured_span_id,
737
771
  }
738
772
  try:
739
773
  store = SessionMessageStore(user_id=user_id or settings.test.effective_user_id)
@@ -743,6 +777,6 @@ async def stream_openai_response_with_save(
743
777
  user_id=user_id,
744
778
  compress=True, # Compress long assistant responses
745
779
  )
746
- logger.debug(f"Saved assistant response to session {session_id} ({len(full_content)} chars)")
780
+ logger.debug(f"Saved assistant response {message_id} to session {session_id} ({len(full_content)} chars)")
747
781
  except Exception as e:
748
782
  logger.error(f"Failed to save assistant response: {e}", exc_info=True)
@@ -7,16 +7,64 @@ Endpoints:
7
7
  POST /api/v1/messages/feedback - Submit feedback on a message
8
8
 
9
9
  Trace Integration:
10
- - Feedback can reference trace_id/span_id for OTEL integration
11
- - Phoenix sync attaches feedback as span annotations (async)
10
+ - Feedback auto-resolves trace_id/span_id from the message in the database
11
+ - Phoenix sync attaches feedback as span annotations when trace info is available
12
+
13
+ HTTP Status Codes:
14
+ - 201: Feedback saved AND synced to Phoenix as annotation (phoenix_synced=true)
15
+ - 200: Feedback accepted and saved to DB, but NOT synced to Phoenix
16
+ (missing trace_id/span_id, Phoenix disabled, or sync failed)
17
+
18
+ IMPORTANT - Testing Requirements:
19
+ ╔════════════════════════════════════════════════════════════════════════════════════════════════════╗
20
+ ║ 1. Use 'rem' agent (NOT 'simulator') - only real agents capture traces ║
21
+ ║ 2. Session IDs MUST be UUIDs - use python3 -c "import uuid; print(uuid.uuid4())" ║
22
+ ║ 3. Port-forward OTEL collector: kubectl port-forward -n observability ║
23
+ ║ svc/otel-collector-collector 4318:4318 ║
24
+ ║ 4. Port-forward Phoenix: kubectl port-forward -n siggy svc/phoenix 6006:6006 ║
25
+ ║ 5. Set environment variables when starting the API: ║
26
+ ║ OTEL__ENABLED=true PHOENIX__ENABLED=true PHOENIX_API_KEY=<jwt> uvicorn ... ║
27
+ ║ 6. Get PHOENIX_API_KEY: ║
28
+ ║ kubectl get secret -n siggy rem-phoenix-api-key -o jsonpath='{.data.PHOENIX_API_KEY}' ║
29
+ ║ | base64 -d ║
30
+ ╚════════════════════════════════════════════════════════════════════════════════════════════════════╝
31
+
32
+ Usage:
33
+ # 1. Send a chat message with X-Session-Id header (MUST be UUID!)
34
+ SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
35
+ curl -X POST http://localhost:8000/api/v1/chat/completions \\
36
+ -H "Content-Type: application/json" \\
37
+ -H "X-Session-Id: $SESSION_ID" \\
38
+ -H "X-Agent-Schema: rem" \\
39
+ -d '{"messages": [{"role": "user", "content": "hello"}], "stream": true}'
40
+
41
+ # 2. Extract message_id from the 'metadata' SSE event:
42
+ # event: metadata
43
+ # data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
44
+
45
+ # 3. Submit feedback referencing that message (trace_id auto-resolved from DB)
46
+ curl -X POST http://localhost:8000/api/v1/messages/feedback \\
47
+ -H "Content-Type: application/json" \\
48
+ -H "X-Tenant-Id: default" \\
49
+ -d '{
50
+ "session_id": "'$SESSION_ID'",
51
+ "message_id": "<message-id-from-metadata>",
52
+ "rating": 1,
53
+ "categories": ["helpful"],
54
+ "comment": "Great response!"
55
+ }'
56
+
57
+ # 4. Check response:
58
+ # - 201 + phoenix_synced=true = annotation synced to Phoenix (check Phoenix UI at :6006)
59
+ # - 200 + phoenix_synced=false = feedback saved but not synced (missing trace info)
12
60
  """
13
61
 
14
- from fastapi import APIRouter, Header, HTTPException, Request
62
+ from fastapi import APIRouter, Header, HTTPException, Request, Response
15
63
  from loguru import logger
16
64
  from pydantic import BaseModel, Field
17
65
 
18
66
  from ..deps import get_user_id_from_request
19
- from ...models.entities import Feedback, Message
67
+ from ...models.entities import Feedback
20
68
  from ...services.postgres import Repository
21
69
  from ...settings import settings
22
70
 
@@ -73,9 +121,10 @@ class FeedbackResponse(BaseModel):
73
121
  # =============================================================================
74
122
 
75
123
 
76
- @router.post("/messages/feedback", response_model=FeedbackResponse, status_code=201)
124
+ @router.post("/messages/feedback", response_model=FeedbackResponse)
77
125
  async def submit_feedback(
78
126
  request: Request,
127
+ response: Response,
79
128
  request_body: FeedbackCreateRequest,
80
129
  x_tenant_id: str = Header(alias="X-Tenant-Id", default="default"),
81
130
  ) -> FeedbackResponse:
@@ -89,8 +138,12 @@ async def submit_feedback(
89
138
  - Provided explicitly in the request
90
139
  - Auto-resolved from the message if message_id is provided
91
140
 
141
+ HTTP Status Codes:
142
+ - 201: Feedback saved AND synced to Phoenix (phoenix_synced=true)
143
+ - 200: Feedback accepted but NOT synced (missing trace info, disabled, or failed)
144
+
92
145
  Returns:
93
- Created feedback object
146
+ Created feedback object with phoenix_synced indicating sync status
94
147
  """
95
148
  if not settings.postgres.enabled:
96
149
  raise HTTPException(status_code=503, detail="Database not enabled")
@@ -102,11 +155,44 @@ async def submit_feedback(
102
155
  span_id = request_body.span_id
103
156
 
104
157
  if request_body.message_id and (not trace_id or not span_id):
105
- message_repo = Repository(Message, table_name="messages")
106
- message = await message_repo.get_by_id(request_body.message_id, x_tenant_id)
107
- if message:
108
- trace_id = trace_id or message.trace_id
109
- span_id = span_id or message.span_id
158
+ # Look up message by ID to get trace context
159
+ # Note: Messages are stored with tenant_id=user_id (not x_tenant_id header)
160
+ # so we query by ID only - UUIDs are globally unique
161
+ from ...services.postgres import PostgresService
162
+ import uuid
163
+
164
+ logger.info(f"Looking up trace context for message_id={request_body.message_id}")
165
+
166
+ # Convert message_id string to UUID for database query
167
+ try:
168
+ message_uuid = uuid.UUID(request_body.message_id)
169
+ except ValueError as e:
170
+ logger.warning(f"Invalid message_id format '{request_body.message_id}': {e}")
171
+ message_uuid = None
172
+
173
+ if message_uuid:
174
+ db = PostgresService()
175
+ # Ensure connection (same pattern as Repository)
176
+ if not db.pool:
177
+ await db.connect()
178
+
179
+ if db.pool:
180
+ query = """
181
+ SELECT trace_id, span_id FROM messages
182
+ WHERE id = $1 AND deleted_at IS NULL
183
+ LIMIT 1
184
+ """
185
+ async with db.pool.acquire() as conn:
186
+ row = await conn.fetchrow(query, message_uuid)
187
+ logger.info(f"Database query result for message {request_body.message_id}: row={row}")
188
+ if row:
189
+ trace_id = trace_id or row["trace_id"]
190
+ span_id = span_id or row["span_id"]
191
+ logger.info(f"Found trace context for message {request_body.message_id}: trace_id={trace_id}, span_id={span_id}")
192
+ else:
193
+ logger.warning(f"No message found in database with id={request_body.message_id}")
194
+ else:
195
+ logger.warning(f"Database pool not available for message lookup after connect attempt")
110
196
 
111
197
  feedback = Feedback(
112
198
  session_id=request_body.session_id,
@@ -130,9 +216,43 @@ async def submit_feedback(
130
216
  f"message={request_body.message_id}, rating={request_body.rating}"
131
217
  )
132
218
 
133
- # TODO: Async sync to Phoenix if trace_id/span_id available
134
- if trace_id and span_id:
135
- logger.debug(f"Feedback has trace info: trace={trace_id}, span={span_id}")
219
+ # Sync to Phoenix if trace_id/span_id available and Phoenix is enabled
220
+ phoenix_synced = False
221
+ phoenix_annotation_id = None
222
+
223
+ if trace_id and span_id and settings.phoenix.enabled:
224
+ try:
225
+ from ...services.phoenix import PhoenixClient
226
+
227
+ phoenix_client = PhoenixClient()
228
+ phoenix_annotation_id = phoenix_client.sync_user_feedback(
229
+ span_id=span_id,
230
+ rating=request_body.rating,
231
+ categories=request_body.categories,
232
+ comment=request_body.comment,
233
+ feedback_id=str(result.id),
234
+ trace_id=trace_id,
235
+ )
236
+
237
+ if phoenix_annotation_id:
238
+ phoenix_synced = True
239
+ # Update the feedback record with sync status
240
+ result.phoenix_synced = True
241
+ result.phoenix_annotation_id = phoenix_annotation_id
242
+ await repo.upsert(result)
243
+ logger.info(f"Feedback synced to Phoenix: annotation_id={phoenix_annotation_id}")
244
+ else:
245
+ logger.warning(f"Phoenix sync returned no annotation ID for feedback {result.id}")
246
+
247
+ except Exception as e:
248
+ logger.error(f"Failed to sync feedback to Phoenix: {e}")
249
+ # Don't fail the request if Phoenix sync fails
250
+ elif trace_id and span_id:
251
+ logger.debug(f"Feedback has trace info but Phoenix disabled: trace={trace_id}, span={span_id}")
252
+
253
+ # Set HTTP status code based on Phoenix sync result
254
+ # 201 = synced to Phoenix, 200 = accepted but not synced
255
+ response.status_code = 201 if phoenix_synced else 200
136
256
 
137
257
  return FeedbackResponse(
138
258
  id=str(result.id),
rem/auth/middleware.py CHANGED
@@ -6,6 +6,7 @@ Supports anonymous access with rate limiting when allow_anonymous=True.
6
6
  MCP endpoints are always protected unless explicitly disabled.
7
7
 
8
8
  Design Pattern:
9
+ - Check X-API-Key header first (if API key auth enabled)
9
10
  - Check session for user on protected paths
10
11
  - Check Bearer token for dev token (non-production only)
11
12
  - MCP paths always require authentication (protected service)
@@ -20,6 +21,12 @@ Access Modes (configured in settings.auth):
20
21
  - mcp_requires_auth=true (default): MCP always requires login regardless of allow_anonymous
21
22
  - mcp_requires_auth=false: MCP follows normal allow_anonymous rules (dev only)
22
23
 
24
+ API Key Authentication (configured in settings.api):
25
+ - api_key_enabled=true: Require X-API-Key header for protected endpoints
26
+ - api_key: The secret key to validate against
27
+ - Provides simple programmatic access without OAuth flow
28
+ - X-API-Key header takes precedence over session auth
29
+
23
30
  Dev Token Support (non-production only):
24
31
  - GET /api/auth/dev/token returns a Bearer token for test-user
25
32
  - Include as: Authorization: Bearer dev_<signature>
@@ -82,6 +89,39 @@ class AuthMiddleware(BaseHTTPMiddleware):
82
89
  self.mcp_requires_auth = mcp_requires_auth
83
90
  self.mcp_path = mcp_path
84
91
 
92
+ def _check_api_key(self, request: Request) -> dict | None:
93
+ """
94
+ Check for valid X-API-Key header.
95
+
96
+ Returns:
97
+ API key user dict if valid, None otherwise
98
+ """
99
+ # Only check if API key auth is enabled
100
+ if not settings.api.api_key_enabled:
101
+ return None
102
+
103
+ # Check for X-API-Key header
104
+ api_key = request.headers.get("x-api-key")
105
+ if not api_key:
106
+ return None
107
+
108
+ # Validate against configured API key
109
+ if settings.api.api_key and api_key == settings.api.api_key:
110
+ logger.debug("X-API-Key authenticated")
111
+ return {
112
+ "id": "api-key-user",
113
+ "email": "api@rem.local",
114
+ "name": "API Key User",
115
+ "provider": "api-key",
116
+ "tenant_id": "default",
117
+ "tier": "pro", # API key users get full access
118
+ "roles": ["user"],
119
+ }
120
+
121
+ # Invalid API key
122
+ logger.warning("Invalid X-API-Key provided")
123
+ return None
124
+
85
125
  def _check_dev_token(self, request: Request) -> dict | None:
86
126
  """
87
127
  Check for valid dev token in Authorization header (non-production only).
@@ -105,7 +145,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
105
145
  # Verify dev token
106
146
  from ..api.routers.dev import verify_dev_token
107
147
  if verify_dev_token(token):
108
- logger.debug(f"Dev token authenticated as test-user")
148
+ logger.debug("Dev token authenticated as test-user")
109
149
  return {
110
150
  "id": "test-user",
111
151
  "email": "test@rem.local",
@@ -142,6 +182,31 @@ class AuthMiddleware(BaseHTTPMiddleware):
142
182
  if not is_protected or is_excluded:
143
183
  return await call_next(request)
144
184
 
185
+ # Check for X-API-Key header first (if enabled)
186
+ api_key_user = self._check_api_key(request)
187
+ if api_key_user:
188
+ request.state.user = api_key_user
189
+ request.state.is_anonymous = False
190
+ return await call_next(request)
191
+
192
+ # If API key auth is enabled but no valid key provided, reject immediately
193
+ if settings.api.api_key_enabled:
194
+ # Check if X-API-Key header was provided but invalid
195
+ if request.headers.get("x-api-key"):
196
+ logger.warning(f"Invalid X-API-Key for: {path}")
197
+ return JSONResponse(
198
+ status_code=401,
199
+ content={"detail": "Invalid API key"},
200
+ headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
201
+ )
202
+ # No API key provided when required
203
+ logger.debug(f"Missing X-API-Key for: {path}")
204
+ return JSONResponse(
205
+ status_code=401,
206
+ content={"detail": "API key required. Include X-API-Key header."},
207
+ headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
208
+ )
209
+
145
210
  # Check for dev token (non-production only)
146
211
  dev_user = self._check_dev_token(request)
147
212
  if dev_user: