remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,702 @@
1
+ """
2
+ OpenAI-compatible chat completions router for REM.
3
+
4
+ Quick Start (Local Development)
5
+ ===============================
6
+
7
+ NOTE: Local dev uses LOCAL databases (Postgres via Docker Compose on port 5050).
8
+ Do NOT port-forward databases. Only port-forward observability services.
9
+
10
+ IMPORTANT: Session IDs MUST be UUIDs. Non-UUID session IDs will cause message
11
+ storage issues and feedback will not work correctly.
12
+
13
+ 1. Port Forwarding (REQUIRED for trace capture and Phoenix sync):
14
+
15
+ # Terminal 1: OTEL Collector (HTTP) - sends traces to Phoenix
16
+ kubectl port-forward -n observability svc/otel-collector-collector 4318:4318
17
+
18
+ # Terminal 2: Phoenix UI - view traces at http://localhost:6006
19
+ kubectl port-forward -n siggy svc/phoenix 6006:6006
20
+
21
+ 2. Get Phoenix API Key (REQUIRED for feedback->Phoenix sync):
22
+
23
+ export PHOENIX_API_KEY=$(kubectl get secret -n siggy rem-phoenix-api-key \\
24
+ -o jsonpath='{.data.PHOENIX_API_KEY}' | base64 -d)
25
+
26
+ 3. Start API with OTEL and Phoenix enabled:
27
+
28
+ cd /path/to/remstack/rem
29
+ source .venv/bin/activate
30
+ OTEL__ENABLED=true \\
31
+ PHOENIX__ENABLED=true \\
32
+ PHOENIX_API_KEY="$PHOENIX_API_KEY" \\
33
+ uvicorn rem.api.main:app --host 0.0.0.0 --port 8000 --app-dir src
34
+
35
+ 4. Test Chat Request (session_id MUST be a UUID):
36
+
37
+ SESSION_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
38
+ curl -s -N -X POST http://localhost:8000/api/v1/chat/completions \\
39
+ -H 'Content-Type: application/json' \\
40
+ -H "X-Session-Id: $SESSION_ID" \\
41
+ -H 'X-Agent-Schema: rem' \\
42
+ -d '{"messages": [{"role": "user", "content": "Hello"}], "stream": true}'
43
+
44
+ # Note: Use 'rem' agent schema (default) for real LLM responses.
45
+ # The 'simulator' agent is for testing SSE events without LLM calls.
46
+
47
+ 5. Submit Feedback on Response:
48
+
49
+ The metadata SSE event contains message_id and trace_id for feedback:
50
+ event: metadata
51
+ data: {"message_id": "728882f8-...", "trace_id": "e53c701c...", ...}
52
+
53
+ Use session_id (UUID you generated) and message_id to submit feedback:
54
+
55
+ curl -X POST http://localhost:8000/api/v1/messages/feedback \\
56
+ -H 'Content-Type: application/json' \\
57
+ -H 'X-Tenant-Id: default' \\
58
+ -d '{
59
+ "session_id": "<your-uuid-session-id>",
60
+ "message_id": "<message-id-from-metadata>",
61
+ "rating": 1,
62
+ "categories": ["helpful"],
63
+ "comment": "Good response"
64
+ }'
65
+
66
+ Expected response (201 = synced to Phoenix):
67
+ {"phoenix_synced": true, "trace_id": "e53c701c...", "span_id": "6432d497..."}
68
+
69
+ OTEL Architecture
70
+ =================
71
+
72
+ REM API --[OTLP/HTTP]--> OTEL Collector --[relay]--> Phoenix
73
+ (port 4318) (k8s: observability) (k8s: siggy)
74
+
75
+ Environment Variables:
76
+ OTEL__ENABLED=true Enable OTEL tracing (required for trace capture)
77
+ PHOENIX__ENABLED=true Enable Phoenix integration (required for feedback sync)
78
+ PHOENIX_API_KEY=<jwt> Phoenix API key (required for feedback->Phoenix sync)
79
+ OTEL__COLLECTOR_ENDPOINT Default: http://localhost:4318
80
+ OTEL__PROTOCOL Default: http (use port 4318, not gRPC 4317)
81
+
82
+ Design Pattern
83
+ ==============
84
+
85
+ - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema, X-Is-Eval)
86
+ - ContextBuilder centralizes message construction with user profile + session history
87
+ - Body.model is the LLM model for Pydantic AI
88
+ - X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
89
+ - Support for streaming (SSE) and non-streaming modes
90
+ - Response format control (text vs json_object)
91
+ - OpenAI-compatible body fields: metadata, store, reasoning_effort, etc.
92
+
93
+ Context Building Flow:
94
+ 1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
95
+ 2. Session history ALWAYS loaded with compression (if session_id provided)
96
+ - Uses SessionMessageStore with compression to keep context efficient
97
+ - Long messages include REM LOOKUP hints: "... [REM LOOKUP session-{id}-msg-{index}] ..."
98
+ - Agent can retrieve full content on-demand using REM LOOKUP
99
+ 3. User profile provided as REM LOOKUP hint (on-demand by default)
100
+ - Agent receives: "User: {email}. To load user profile: Use REM LOOKUP \"{email}\""
101
+ - Agent decides whether to load profile based on query
102
+ 4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
103
+ 5. Combines: system context + compressed session history + new messages
104
+ 6. Agent receives complete message list ready for execution
105
+
106
+ Headers Mapping
107
+ X-User-Id → AgentContext.user_id
108
+ X-Tenant-Id → AgentContext.tenant_id
109
+ X-Session-Id → AgentContext.session_id (use UUID for new sessions)
110
+ X-Model-Name → AgentContext.default_model (overrides body.model)
111
+ X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
112
+ X-Is-Eval → AgentContext.is_eval (sets session mode to EVALUATION)
113
+
114
+ Default Agent:
115
+ If X-Agent-Schema header is not provided, the system loads 'rem' schema,
116
+ which is the REM expert assistant with comprehensive knowledge about:
117
+ - REM architecture and concepts
118
+ - Entity types and graph traversal
119
+ - REM queries (LOOKUP, FUZZY, TRAVERSE)
120
+ - Agent development with Pydantic AI
121
+ - Cloud infrastructure (EKS, Karpenter, CloudNativePG)
122
+
123
+ Example Request:
124
+ POST /api/v1/chat/completions
125
+ X-Tenant-Id: acme-corp
126
+ X-User-Id: user123
127
+ X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890 # UUID
128
+ X-Agent-Schema: rem # Optional, this is the default
129
+
130
+ {
131
+ "model": "openai:gpt-4o-mini",
132
+ "messages": [
133
+ {"role": "user", "content": "How do I create a new REM entity?"}
134
+ ],
135
+ "stream": true
136
+ }
137
+ """
138
+
139
+ import base64
140
+ import tempfile
141
+ import time
142
+ import uuid
143
+ from datetime import datetime
144
+ from pathlib import Path
145
+
146
+ from fastapi import APIRouter, Request
147
+ from fastapi.responses import StreamingResponse
148
+ from loguru import logger
149
+
150
+ from ....agentic.context import AgentContext
151
+ from ....agentic.context_builder import ContextBuilder
152
+ from ....agentic.providers.pydantic_ai import create_agent
153
+ from ....models.entities.session import Session, SessionMode
154
+ from ....services.audio.transcriber import AudioTranscriber
155
+ from ....services.postgres.repository import Repository
156
+ from ....services.session import SessionMessageStore, reload_session
157
+ from ....settings import settings
158
+ from ....utils.schema_loader import load_agent_schema, load_agent_schema_async
159
+ from .json_utils import extract_json_resilient
160
+ from .models import (
161
+ ChatCompletionChoice,
162
+ ChatCompletionRequest,
163
+ ChatCompletionResponse,
164
+ ChatCompletionUsage,
165
+ ChatMessage,
166
+ )
167
+ from .streaming import stream_openai_response, stream_openai_response_with_save, stream_simulator_response, save_user_message
168
+
169
+ router = APIRouter(prefix="/api/v1", tags=["chat"])
170
+
171
+ # Default agent schema file
172
+ DEFAULT_AGENT_SCHEMA = "rem"
173
+
174
+
175
+ def get_current_trace_context() -> tuple[str | None, str | None]:
176
+ """Get trace_id and span_id from current OTEL context.
177
+
178
+ Returns:
179
+ Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
180
+ """
181
+ try:
182
+ from opentelemetry import trace
183
+ span = trace.get_current_span()
184
+ if span and span.get_span_context().is_valid:
185
+ ctx = span.get_span_context()
186
+ trace_id = format(ctx.trace_id, '032x')
187
+ span_id = format(ctx.span_id, '016x')
188
+ return trace_id, span_id
189
+ except Exception:
190
+ pass
191
+ return None, None
192
+
193
+
194
+ def get_tracer():
195
+ """Get the OpenTelemetry tracer for chat completions."""
196
+ try:
197
+ from opentelemetry import trace
198
+ return trace.get_tracer("rem.chat.completions")
199
+ except Exception:
200
+ return None
201
+
202
+
203
+ async def ensure_session_with_metadata(
204
+ session_id: str,
205
+ user_id: str | None,
206
+ tenant_id: str,
207
+ is_eval: bool,
208
+ request_metadata: dict[str, str] | None,
209
+ agent_schema: str | None = None,
210
+ ) -> None:
211
+ """
212
+ Ensure session exists and update with metadata/mode.
213
+
214
+ If X-Is-Eval header is true, sets session mode to EVALUATION.
215
+ Merges request metadata with existing session metadata.
216
+
217
+ Args:
218
+ session_id: Session UUID from X-Session-Id header
219
+ user_id: User identifier
220
+ tenant_id: Tenant identifier
221
+ is_eval: Whether this is an evaluation session
222
+ request_metadata: Metadata from request body to merge
223
+ agent_schema: Optional agent schema being used
224
+ """
225
+ if not settings.postgres.enabled:
226
+ return
227
+
228
+ try:
229
+ repo = Repository(Session, table_name="sessions")
230
+
231
+ # Look up session by UUID (id field)
232
+ existing = await repo.get_by_id(session_id)
233
+
234
+ if existing:
235
+ # Merge metadata if provided
236
+ merged_metadata = existing.metadata or {}
237
+ if request_metadata:
238
+ merged_metadata.update(request_metadata)
239
+
240
+ # Update session if eval flag or new metadata
241
+ needs_update = False
242
+ if is_eval and existing.mode != SessionMode.EVALUATION:
243
+ existing.mode = SessionMode.EVALUATION
244
+ needs_update = True
245
+ if request_metadata:
246
+ existing.metadata = merged_metadata
247
+ needs_update = True
248
+
249
+ if needs_update:
250
+ await repo.upsert(existing)
251
+ logger.debug(f"Updated session {session_id} (eval={is_eval}, metadata keys={list(merged_metadata.keys())})")
252
+ else:
253
+ # Create new session with the provided UUID as the id
254
+ session = Session(
255
+ id=session_id, # Use the provided UUID as session id
256
+ name=session_id, # Default name to UUID, can be updated later with LLM-generated name
257
+ mode=SessionMode.EVALUATION if is_eval else SessionMode.NORMAL,
258
+ user_id=user_id,
259
+ tenant_id=tenant_id,
260
+ agent_schema_uri=agent_schema,
261
+ metadata=request_metadata or {},
262
+ )
263
+ await repo.upsert(session)
264
+ logger.info(f"Created session {session_id} (eval={is_eval})")
265
+
266
+ except Exception as e:
267
+ # Non-critical - log but don't fail the request
268
+ logger.error(f"Failed to ensure session metadata: {e}", exc_info=True)
269
+
270
+
271
+ @router.post("/chat/completions", response_model=None)
272
+ async def chat_completions(body: ChatCompletionRequest, request: Request):
273
+ """
274
+ OpenAI-compatible chat completions with REM agent support.
275
+
276
+ The 'model' field in the request body is the LLM model used by Pydantic AI.
277
+ The X-Agent-Schema header specifies which agent schema to use (defaults to 'rem').
278
+
279
+ Supported Headers:
280
+ | Header | Description | Maps To | Default |
281
+ |---------------------|--------------------------------------|--------------------------------|---------------|
282
+ | X-User-Id | User identifier | AgentContext.user_id | None |
283
+ | X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
284
+ | X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
285
+ | X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
286
+ | X-Is-Eval | Mark as evaluation session | AgentContext.is_eval | false |
287
+
288
+ Additional OpenAI-compatible Body Fields:
289
+ - metadata: Key-value pairs merged with session metadata (max 16 keys)
290
+ - store: Whether to store for distillation/evaluation
291
+ - max_completion_tokens: Max tokens to generate (replaces max_tokens)
292
+ - seed: Seed for deterministic sampling
293
+ - top_p: Nucleus sampling probability
294
+ - logprobs: Return log probabilities
295
+ - reasoning_effort: low/medium/high for o-series models
296
+ - service_tier: auto/flex/priority/default
297
+
298
+ Example Models:
299
+ - anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
300
+ - anthropic:claude-3-7-sonnet-20250219 (Claude 3.7 Sonnet)
301
+ - anthropic:claude-3-5-haiku-20241022 (Claude 3.5 Haiku)
302
+ - openai:gpt-4.1-turbo
303
+ - openai:gpt-4o
304
+ - openai:gpt-4o-mini
305
+
306
+ Response Formats:
307
+ - text (default): Plain text response
308
+ - json_object: Best-effort JSON extraction from agent output
309
+
310
+ Default Agent (rem):
311
+ - Expert assistant for REM system
312
+ - Comprehensive knowledge of REM architecture, concepts, and implementation
313
+ - Structured output with answer, confidence, and references
314
+
315
+ Session Management:
316
+ - Session history ALWAYS loaded with compression when X-Session-Id provided
317
+ - Uses SessionMessageStore with REM LOOKUP hints for long messages
318
+ - User profile provided as REM LOOKUP hint (on-demand by default)
319
+ - If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
320
+ - New messages saved to database with compression for session continuity
321
+ - When Postgres is disabled, session management is skipped
322
+
323
+ Evaluation Sessions:
324
+ - Set X-Is-Eval: true header to mark session as evaluation
325
+ - Session mode will be set to EVALUATION
326
+ - Request metadata is merged with session metadata
327
+ - Useful for A/B testing, model comparison, and feedback collection
328
+ """
329
+ # Load agent schema: use header value from context or default
330
+ # Extract AgentContext from request (gets user_id from JWT token)
331
+ temp_context = AgentContext.from_request(request)
332
+ schema_name = temp_context.agent_schema_uri or DEFAULT_AGENT_SCHEMA
333
+
334
+ # Resolve model: use body.model if provided, otherwise settings default
335
+ if body.model is None:
336
+ body.model = settings.llm.default_model
337
+ logger.debug(f"No model specified, using default: {body.model}")
338
+
339
+ # Special handling for simulator schema - no LLM, just generates demo SSE events
340
+ # Check BEFORE loading schema since simulator doesn't need a schema file
341
+ # Still builds full context and saves messages like a real agent
342
+ if schema_name == "simulator":
343
+ logger.info("Using SSE simulator (no LLM)")
344
+
345
+ # Build context just like real agents (loads session history, user context)
346
+ new_messages = [msg.model_dump() for msg in body.messages]
347
+ context, messages = await ContextBuilder.build_from_headers(
348
+ headers=dict(request.headers),
349
+ new_messages=new_messages,
350
+ user_id=temp_context.user_id, # From JWT token (source of truth)
351
+ )
352
+
353
+ # Ensure session exists with metadata and eval mode if applicable
354
+ if context.session_id:
355
+ await ensure_session_with_metadata(
356
+ session_id=context.session_id,
357
+ user_id=context.user_id,
358
+ tenant_id=context.tenant_id,
359
+ is_eval=context.is_eval,
360
+ request_metadata=body.metadata,
361
+ agent_schema="simulator",
362
+ )
363
+
364
+ # Get the last user message as prompt
365
+ prompt = body.messages[-1].content if body.messages else "demo"
366
+ request_id = f"sim-{uuid.uuid4().hex[:24]}"
367
+
368
+ # Generate message IDs upfront for correlation
369
+ user_message_id = str(uuid.uuid4())
370
+ assistant_message_id = str(uuid.uuid4())
371
+
372
+ # Simulated assistant response content (for persistence)
373
+ simulated_content = (
374
+ f"[SSE Simulator Response]\n\n"
375
+ f"This is a simulated response demonstrating all SSE event types:\n"
376
+ f"- reasoning events (model thinking)\n"
377
+ f"- text_delta events (streamed content)\n"
378
+ f"- progress events (multi-step operations)\n"
379
+ f"- tool_call events (function invocations)\n"
380
+ f"- action_request events (UI solicitation)\n"
381
+ f"- metadata events (confidence, sources, message IDs)\n\n"
382
+ f"Original prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}"
383
+ )
384
+
385
+ # Save messages to database (if session_id and postgres enabled)
386
+ if settings.postgres.enabled and context.session_id:
387
+ user_message = {
388
+ "id": user_message_id,
389
+ "role": "user",
390
+ "content": prompt,
391
+ "timestamp": datetime.utcnow().isoformat(),
392
+ }
393
+ assistant_message = {
394
+ "id": assistant_message_id,
395
+ "role": "assistant",
396
+ "content": simulated_content,
397
+ "timestamp": datetime.utcnow().isoformat(),
398
+ }
399
+
400
+ try:
401
+ store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
402
+ await store.store_session_messages(
403
+ session_id=context.session_id,
404
+ messages=[user_message, assistant_message],
405
+ user_id=context.user_id,
406
+ compress=True,
407
+ )
408
+ logger.info(f"Saved simulator conversation to session {context.session_id}")
409
+ except Exception as e:
410
+ # Log error but don't fail the request - session storage is non-critical
411
+ logger.error(f"Failed to save session messages: {e}", exc_info=True)
412
+
413
+ if body.stream:
414
+ return StreamingResponse(
415
+ stream_simulator_response(
416
+ prompt=prompt,
417
+ model="simulator-v1.0.0",
418
+ # Pass message correlation IDs
419
+ message_id=assistant_message_id,
420
+ in_reply_to=user_message_id,
421
+ session_id=context.session_id,
422
+ ),
423
+ media_type="text/event-stream",
424
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
425
+ )
426
+ else:
427
+ # Non-streaming simulator returns simple JSON
428
+ return ChatCompletionResponse(
429
+ id=request_id,
430
+ created=int(time.time()),
431
+ model="simulator-v1.0.0",
432
+ choices=[
433
+ ChatCompletionChoice(
434
+ index=0,
435
+ message=ChatMessage(
436
+ role="assistant",
437
+ content=simulated_content,
438
+ ),
439
+ finish_reason="stop",
440
+ )
441
+ ],
442
+ usage=ChatCompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0),
443
+ )
444
+
445
+ # Load schema using centralized utility
446
+ # Enable database fallback to load dynamic agents stored in schemas table
447
+ # Use async version since we're in an async context (FastAPI endpoint)
448
+ user_id = temp_context.user_id or settings.test.effective_user_id
449
+ try:
450
+ agent_schema = await load_agent_schema_async(
451
+ schema_name,
452
+ user_id=user_id,
453
+ )
454
+ except FileNotFoundError:
455
+ # Fallback to default if specified schema not found
456
+ logger.warning(f"Schema '{schema_name}' not found, falling back to '{DEFAULT_AGENT_SCHEMA}'")
457
+ schema_name = DEFAULT_AGENT_SCHEMA
458
+ try:
459
+ agent_schema = load_agent_schema(schema_name)
460
+ except FileNotFoundError:
461
+ # No schema available at all
462
+ from fastapi import HTTPException
463
+
464
+ raise HTTPException(
465
+ status_code=500,
466
+ detail=f"Agent schema '{schema_name}' not found and default schema unavailable",
467
+ )
468
+
469
+ logger.debug(f"Using agent schema: {schema_name}, model: {body.model}")
470
+
471
+ # Check for audio input
472
+ is_audio = request.headers.get("x-chat-is-audio", "").lower() == "true"
473
+
474
+ # Process messages (transcribe audio if needed)
475
+ new_messages = [msg.model_dump() for msg in body.messages]
476
+
477
+ if is_audio and new_messages and new_messages[0]["role"] == "user":
478
+ # First user message should be base64-encoded audio
479
+ try:
480
+ audio_b64 = new_messages[0]["content"]
481
+ audio_bytes = base64.b64decode(audio_b64)
482
+
483
+ # Write to temp file for transcription
484
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
485
+ tmp_file.write(audio_bytes)
486
+ tmp_path = tmp_file.name
487
+
488
+ # Transcribe audio
489
+ transcriber = AudioTranscriber()
490
+ result = transcriber.transcribe_file(tmp_path)
491
+
492
+ # Replace audio content with transcribed text
493
+ new_messages[0]["content"] = result.text
494
+ logger.info(f"Transcribed audio: {len(result.text)} characters")
495
+
496
+ # Clean up temp file
497
+ Path(tmp_path).unlink()
498
+
499
+ except Exception as e:
500
+ logger.error(f"Failed to transcribe audio: {e}")
501
+ # Fall through with original content (will likely fail at agent)
502
+
503
+ # Use ContextBuilder to construct context and basic messages
504
+ # Note: We load session history separately for proper pydantic-ai message_history
505
+ context, messages = await ContextBuilder.build_from_headers(
506
+ headers=dict(request.headers),
507
+ new_messages=new_messages,
508
+ user_id=temp_context.user_id, # From JWT token (source of truth)
509
+ )
510
+
511
+ # Load raw session history for proper pydantic-ai message_history format
512
+ # This enables proper tool call/return pairing for LLM API compatibility
513
+ from ....services.session import SessionMessageStore, session_to_pydantic_messages, audit_session_history
514
+ from ....agentic.schema import get_system_prompt
515
+
516
+ pydantic_message_history = None
517
+ if context.session_id and settings.postgres.enabled:
518
+ try:
519
+ store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
520
+ raw_session_history = await store.load_session_messages(
521
+ session_id=context.session_id,
522
+ user_id=context.user_id,
523
+ compress_on_load=False, # Don't compress - we need full data for reconstruction
524
+ )
525
+ if raw_session_history:
526
+ # CRITICAL: Extract and pass the agent's system prompt
527
+ # pydantic-ai only auto-adds system prompts when message_history is empty
528
+ # When we pass message_history, we must include the system prompt ourselves
529
+ agent_system_prompt = get_system_prompt(agent_schema) if agent_schema else None
530
+ pydantic_message_history = session_to_pydantic_messages(
531
+ raw_session_history,
532
+ system_prompt=agent_system_prompt,
533
+ )
534
+ logger.debug(f"Converted {len(raw_session_history)} session messages to {len(pydantic_message_history)} pydantic-ai messages (with system prompt)")
535
+
536
+ # Audit session history if enabled (for debugging)
537
+ audit_session_history(
538
+ session_id=context.session_id,
539
+ agent_name=schema_name or "default",
540
+ prompt=body.messages[-1].content if body.messages else "",
541
+ raw_session_history=raw_session_history,
542
+ pydantic_messages_count=len(pydantic_message_history),
543
+ )
544
+ except Exception as e:
545
+ logger.warning(f"Failed to load session history for message_history: {e}")
546
+ # Fall back to old behavior (concatenated prompt)
547
+
548
+ logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
549
+
550
+ # Ensure session exists with metadata and eval mode if applicable
551
+ if context.session_id:
552
+ await ensure_session_with_metadata(
553
+ session_id=context.session_id,
554
+ user_id=context.user_id,
555
+ tenant_id=context.tenant_id,
556
+ is_eval=context.is_eval,
557
+ request_metadata=body.metadata,
558
+ agent_schema=schema_name,
559
+ )
560
+
561
+ # Create agent with schema and model override
562
+ agent = await create_agent(
563
+ context=context,
564
+ agent_schema_override=agent_schema,
565
+ model_override=body.model, # type: ignore[arg-type]
566
+ )
567
+
568
+ # Build the prompt for the agent
569
+ # If we have proper message_history, use just the latest user message as prompt
570
+ # Otherwise, fall back to concatenating all messages (legacy behavior)
571
+ if pydantic_message_history:
572
+ # Use the latest user message as the prompt, with history passed separately
573
+ user_prompt = body.messages[-1].content if body.messages else ""
574
+ prompt = user_prompt
575
+ logger.debug(f"Using message_history with {len(pydantic_message_history)} messages")
576
+ else:
577
+ # Legacy: Combine all messages into single prompt for agent
578
+ prompt = "\n".join(msg.content for msg in messages)
579
+
580
+ # Generate OpenAI-compatible request ID
581
+ request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
582
+
583
+ # Streaming mode
584
+ if body.stream:
585
+ # Save user message before streaming starts (using shared utility)
586
+ if context.session_id:
587
+ await save_user_message(
588
+ session_id=context.session_id,
589
+ user_id=context.user_id,
590
+ content=body.messages[-1].content if body.messages else "",
591
+ )
592
+
593
+ return StreamingResponse(
594
+ stream_openai_response_with_save(
595
+ agent=agent,
596
+ prompt=prompt,
597
+ model=body.model,
598
+ request_id=request_id,
599
+ agent_schema=schema_name,
600
+ session_id=context.session_id,
601
+ user_id=context.user_id,
602
+ agent_context=context, # Pass context for multi-agent support
603
+ message_history=pydantic_message_history, # Native pydantic-ai message history
604
+ ),
605
+ media_type="text/event-stream",
606
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
607
+ )
608
+
609
+ # Non-streaming mode
610
+ # Create a parent span to capture trace context for message storage
611
+ trace_id, span_id = None, None
612
+ tracer = get_tracer()
613
+
614
+ if tracer:
615
+ with tracer.start_as_current_span(
616
+ "chat_completion",
617
+ attributes={
618
+ "session.id": context.session_id or "",
619
+ "user.id": context.user_id or "",
620
+ "model": body.model,
621
+ "agent.schema": context.agent_schema_uri or DEFAULT_AGENT_SCHEMA,
622
+ }
623
+ ) as span:
624
+ # Capture trace context from the span we just created
625
+ trace_id, span_id = get_current_trace_context()
626
+ if pydantic_message_history:
627
+ result = await agent.run(prompt, message_history=pydantic_message_history)
628
+ else:
629
+ result = await agent.run(prompt)
630
+ else:
631
+ # No tracer available, run without tracing
632
+ if pydantic_message_history:
633
+ result = await agent.run(prompt, message_history=pydantic_message_history)
634
+ else:
635
+ result = await agent.run(prompt)
636
+
637
+ # Determine content format based on response_format request
638
+ if body.response_format and body.response_format.type == "json_object":
639
+ # JSON mode: Best-effort extraction of JSON from agent output
640
+ content = extract_json_resilient(result.output) # type: ignore[attr-defined]
641
+ else:
642
+ # Text mode: Return as string (handle structured output)
643
+ from rem.agentic.serialization import serialize_agent_result_json
644
+ content = serialize_agent_result_json(result.output) # type: ignore[attr-defined]
645
+
646
+ # Get usage from result if available
647
+ usage = result.usage() if hasattr(result, "usage") else None
648
+ prompt_tokens = usage.input_tokens if usage else 0
649
+ completion_tokens = usage.output_tokens if usage else 0
650
+
651
+ # Save conversation messages to database (if session_id and postgres enabled)
652
+ if settings.postgres.enabled and context.session_id:
653
+ # Extract just the new user message (last message from body)
654
+ user_message = {
655
+ "role": "user",
656
+ "content": body.messages[-1].content if body.messages else "",
657
+ "timestamp": datetime.utcnow().isoformat(),
658
+ "trace_id": trace_id,
659
+ "span_id": span_id,
660
+ }
661
+
662
+ assistant_message = {
663
+ "role": "assistant",
664
+ "content": content,
665
+ "timestamp": datetime.utcnow().isoformat(),
666
+ "trace_id": trace_id,
667
+ "span_id": span_id,
668
+ }
669
+
670
+ try:
671
+ # Store messages with compression
672
+ store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
673
+
674
+ await store.store_session_messages(
675
+ session_id=context.session_id,
676
+ messages=[user_message, assistant_message],
677
+ user_id=context.user_id,
678
+ compress=True,
679
+ )
680
+
681
+ logger.info(f"Saved conversation to session {context.session_id}")
682
+ except Exception as e:
683
+ # Log error but don't fail the request - session storage is non-critical
684
+ logger.error(f"Failed to save session messages: {e}", exc_info=True)
685
+
686
+ return ChatCompletionResponse(
687
+ id=request_id,
688
+ created=int(time.time()),
689
+ model=body.model, # Echo back the requested model
690
+ choices=[
691
+ ChatCompletionChoice(
692
+ index=0,
693
+ message=ChatMessage(role="assistant", content=content),
694
+ finish_reason="stop",
695
+ )
696
+ ],
697
+ usage=ChatCompletionUsage(
698
+ prompt_tokens=prompt_tokens,
699
+ completion_tokens=completion_tokens,
700
+ total_tokens=prompt_tokens + completion_tokens,
701
+ ),
702
+ )