remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
rem/api/README.md ADDED
@@ -0,0 +1,420 @@
1
+ # REM API
2
+
3
+ FastAPI server for REM (Resources Entities Moments) system with OpenAI-compatible chat completions, MCP server, and RESTful endpoints.
4
+
5
+ ## Running the API
6
+
7
+ ### CLI Command
8
+
9
+ ```bash
10
+ # Development mode (with auto-reload)
11
+ rem serve
12
+
13
+ # Production mode
14
+ rem serve --host 0.0.0.0 --port 8000 --workers 4
15
+ ```
16
+
17
+ ### CLI Options
18
+
19
+ ```bash
20
+ rem serve --help
21
+
22
+ Options:
23
+ --host TEXT Host to bind to (default: 0.0.0.0)
24
+ --port INTEGER Port to listen on (default: 8000)
25
+ --reload Enable auto-reload for development (default: true)
26
+ --workers INTEGER Number of worker processes (default: 1)
27
+ --log-level TEXT Logging level: debug, info, warning, error (default: info)
28
+ ```
29
+
30
+ ### Direct Python
31
+
32
+ ```python
33
+ import uvicorn
34
+ from rem.api.main import app
35
+
36
+ uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
37
+ ```
38
+
39
+ ### Environment Variables
40
+
41
+ ```bash
42
+ # API Server
43
+ API__HOST=0.0.0.0
44
+ API__PORT=8000
45
+ API__RELOAD=true
46
+ API__WORKERS=1
47
+ API__LOG_LEVEL=info
48
+
49
+ # Chat Settings
50
+ CHAT__AUTO_INJECT_USER_CONTEXT=false # Default: false (use REM LOOKUP hints)
51
+
52
+ # LLM
53
+ LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
54
+ LLM__DEFAULT_TEMPERATURE=0.5
55
+ LLM__ANTHROPIC_API_KEY=sk-ant-...
56
+ LLM__OPENAI_API_KEY=sk-...
57
+
58
+ # PostgreSQL (required for session history)
59
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
60
+ POSTGRES__ENABLED=true
61
+
62
+ # OpenTelemetry (optional)
63
+ OTEL__ENABLED=false
64
+ OTEL__SERVICE_NAME=rem-api
65
+ OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
66
+ ```
67
+
68
+ ## Endpoints
69
+
70
+ ### Chat Completions
71
+
72
+ **POST /v1/chat/completions** - OpenAI-compatible chat completions
73
+
74
+ Features:
75
+ - Streaming and non-streaming modes
76
+ - Session history with compression
77
+ - User profile integration via dreaming worker
78
+ - Multiple agent schemas
79
+ - Model override support
80
+
81
+ ### MCP Server
82
+
83
+ **Mounted at /api/v1/mcp** - FastMCP server for Model Context Protocol
84
+
85
+ Tools:
86
+ - `ask_rem`: Query REM system using natural language
87
+ - `parse_and_ingest_file`: Ingest files into REM
88
+ - Additional MCP tools for REM operations
89
+
90
+ ### Health Check
91
+
92
+ **GET /health** - Health check endpoint
93
+
94
+ ## Content Headers
95
+
96
+ REM API uses custom headers to provide context, identify users, and manage sessions.
97
+
98
+ ### Header Reference
99
+
100
+ | Header Name | Description | Example Value | Required |
101
+ |-------------|-------------|---------------|----------|
102
+ | `X-User-Id` | User identifier (email, UUID, or username) | `sarah@example.com`, `user-123` | No |
103
+ | `X-Tenant-Id` | Tenant identifier for multi-tenancy | `acme-corp`, `tenant-123` | No |
104
+ | `X-Session-Id` | Session identifier for conversation continuity (must be UUID) | `550e8400-e29b-41d4-a716-446655440000` | No |
105
+ | `X-Agent-Schema` | Agent schema name to use | `rem`, `query-agent` | No |
106
+ | `X-Chat-Is-Audio` | Indicates audio input in chat completions | `true`, `false` | No |
107
+ | `Authorization` | Bearer token for API authentication | `Bearer jwt_token_here` | Yes* |
108
+
109
+ *Required for authenticated endpoints. Not required for public endpoints.
110
+
111
+ ## Session Management
112
+
113
+ REM chat API is designed for multi-turn conversations where each request contains a single message.
114
+
115
+ ### How Sessions Work
116
+
117
+ 1. **First Message**: Client sends message without `X-Session-Id`
118
+ - Server processes message
119
+ - Returns response
120
+ - Client generates session ID for subsequent messages
121
+
122
+ 2. **Subsequent Messages**: Client sends message with `X-Session-Id`
123
+ - Server loads compressed session history from database
124
+ - Combines history with new message
125
+ - Agent receives full conversation context
126
+ - New messages saved to database with compression
127
+
128
+ 3. **Compression**: Long assistant responses are compressed
129
+ - Short messages (<400 chars): Stored and loaded as-is
130
+ - Long messages (>400 chars): Compressed with REM LOOKUP hints
131
+ - Example: `"Start of response... [Message truncated - REM LOOKUP session-123-msg-1 to recover full content] ...end of response"`
132
+ - Agent can retrieve full content on-demand using REM LOOKUP
133
+
134
+ ### Benefits of Compression
135
+
136
+ - Prevents context window bloat
137
+ - Maintains conversation continuity
138
+ - Agent decides what to retrieve
139
+ - More efficient for long conversations
140
+
141
+ ## User Profiles and Dreaming
142
+
143
+ The dreaming worker runs periodically to build user models:
144
+
145
+ 1. Analyzes user's resources, sessions, and moments
146
+ 2. Generates profile with current projects, expertise, interests
147
+ 3. Stores profile in User entity (`metadata.profile` and model fields)
148
+
149
+ ### User Profile in Chat
150
+
151
+ **On-Demand (Default):**
152
+ - Agent receives hint: `"User ID: sarah@example.com. To load user profile: Use REM LOOKUP users/sarah@example.com"`
153
+ - Agent decides whether to load based on query
154
+ - More efficient for queries that don't need personalization
155
+
156
+ **Auto-Inject (Optional):**
157
+ - Set environment variable: `CHAT__AUTO_INJECT_USER_CONTEXT=true`
158
+ - User profile automatically loaded and injected into system message
159
+ - Simpler for basic chatbots that always need context
160
+
161
+ ## Usage Examples
162
+
163
+ **Note on Authentication**: By default, authentication is disabled (`AUTH__ENABLED=false`) for local development and testing. The examples below work without an `Authorization` header. If authentication is enabled in your environment, add: `-H "Authorization: Bearer your_jwt_token"` to cURL requests or `"Authorization": "Bearer your_jwt_token"` to Python headers.
164
+
165
+ ### cURL: Simple Chat
166
+
167
+ ```bash
168
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
169
+ -H "Content-Type: application/json" \
170
+ -H "X-User-Id: sarah@example.com" \
171
+ -d '{
172
+ "model": "anthropic:claude-sonnet-4-5-20250929",
173
+ "messages": [
174
+ {"role": "user", "content": "What is REM?"}
175
+ ]
176
+ }'
177
+ ```
178
+
179
+ ### cURL: Streaming Chat
180
+
181
+ ```bash
182
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
183
+ -H "Content-Type: application/json" \
184
+ -H "X-User-Id: sarah@example.com" \
185
+ -d '{
186
+ "model": "anthropic:claude-sonnet-4-5-20250929",
187
+ "messages": [
188
+ {"role": "user", "content": "Explain REM architecture"}
189
+ ],
190
+ "stream": true
191
+ }'
192
+ ```
193
+
194
+ ### cURL: Multi-Turn Conversation
195
+
196
+ ```bash
197
+ # First message
198
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
199
+ -H "Content-Type: application/json" \
200
+ -H "X-User-Id: sarah@example.com" \
201
+ -H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
202
+ -d '{
203
+ "model": "openai:gpt-4o",
204
+ "messages": [
205
+ {"role": "user", "content": "What are moments in REM?"}
206
+ ]
207
+ }'
208
+
209
+ # Second message (session history loaded automatically)
210
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
211
+ -H "Content-Type: application/json" \
212
+ -H "X-User-Id: sarah@example.com" \
213
+ -H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
214
+ -d '{
215
+ "model": "openai:gpt-4o",
216
+ "messages": [
217
+ {"role": "user", "content": "How are they created?"}
218
+ ]
219
+ }'
220
+ ```
221
+
222
+ ### Python: Multi-Turn Conversation
223
+
224
+ ```python
225
+ import requests
226
+ import uuid
227
+
228
+ url = "http://localhost:8000/api/v1/chat/completions"
229
+ session_id = str(uuid.uuid4()) # Must be a valid UUID
230
+
231
+ def send_message(content):
232
+ headers = {
233
+ "Content-Type": "application/json",
234
+ "X-User-Id": "sarah@example.com",
235
+ "X-Session-Id": session_id
236
+ }
237
+ data = {
238
+ "model": "openai:gpt-4o",
239
+ "messages": [
240
+ {"role": "user", "content": content}
241
+ ]
242
+ }
243
+
244
+ response = requests.post(url, headers=headers, json=data)
245
+ return response.json()["choices"][0]["message"]["content"]
246
+
247
+ # First turn
248
+ response1 = send_message("What are moments in REM?")
249
+ print(f"Assistant: {response1}\n")
250
+
251
+ # Second turn (session history loaded automatically)
252
+ response2 = send_message("How are they created?")
253
+ print(f"Assistant: {response2}\n")
254
+
255
+ # Third turn
256
+ response3 = send_message("Can you give an example?")
257
+ print(f"Assistant: {response3}\n")
258
+ ```
259
+
260
+ ### Python: Streaming Chat
261
+
262
+ ```python
263
+ import requests
264
+ import json
265
+
266
+ url = "http://localhost:8000/api/v1/chat/completions"
267
+ headers = {
268
+ "Content-Type": "application/json",
269
+ "X-User-Id": "sarah@example.com"
270
+ }
271
+ data = {
272
+ "model": "anthropic:claude-sonnet-4-5-20250929",
273
+ "messages": [
274
+ {"role": "user", "content": "Explain REM architecture"}
275
+ ],
276
+ "stream": True
277
+ }
278
+
279
+ response = requests.post(url, headers=headers, json=data, stream=True)
280
+
281
+ for line in response.iter_lines():
282
+ if line:
283
+ line_str = line.decode('utf-8')
284
+ if line_str.startswith('data: '):
285
+ data_str = line_str[6:] # Remove 'data: ' prefix
286
+ if data_str != '[DONE]':
287
+ chunk = json.loads(data_str)
288
+ delta = chunk["choices"][0]["delta"]
289
+ if "content" in delta:
290
+ print(delta["content"], end="", flush=True)
291
+ ```
292
+
293
+ ### Python: Audio Input (Voice Chat)
294
+
295
+ ```python
296
+ import requests
297
+ import base64
298
+
299
+ # Read audio file and encode to base64
300
+ with open("recording.wav", "rb") as audio_file:
301
+ audio_b64 = base64.b64encode(audio_file.read()).decode('utf-8')
302
+
303
+ url = "http://localhost:8000/api/v1/chat/completions"
304
+ headers = {
305
+ "Content-Type": "application/json",
306
+ "X-User-Id": "sarah@example.com",
307
+ "X-Chat-Is-Audio": "true" # Trigger audio transcription
308
+ }
309
+ data = {
310
+ "model": "anthropic:claude-sonnet-4-5-20250929",
311
+ "messages": [
312
+ {"role": "user", "content": audio_b64} # Base64-encoded WAV audio
313
+ ]
314
+ }
315
+
316
+ response = requests.post(url, headers=headers, json=data)
317
+ print(response.json()["choices"][0]["message"]["content"])
318
+
319
+ # Audio is transcribed to text using OpenAI Whisper
320
+ # Then processed as normal text chat
321
+ ```
322
+
323
+ ## Response Format
324
+
325
+ ### Non-Streaming Response
326
+
327
+ ```json
328
+ {
329
+ "id": "chatcmpl-abc123def456",
330
+ "created": 1732292400,
331
+ "model": "anthropic:claude-sonnet-4-5-20250929",
332
+ "choices": [
333
+ {
334
+ "index": 0,
335
+ "message": {
336
+ "role": "assistant",
337
+ "content": "REM (Resources Entities Moments) is a bio-inspired memory architecture..."
338
+ },
339
+ "finish_reason": "stop"
340
+ }
341
+ ],
342
+ "usage": {
343
+ "prompt_tokens": 150,
344
+ "completion_tokens": 200,
345
+ "total_tokens": 350
346
+ }
347
+ }
348
+ ```
349
+
350
+ ### Streaming Response (SSE Format)
351
+
352
+ ```
353
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"role":"assistant","content":""},"index":0}]}
354
+
355
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":"REM"},"index":0}]}
356
+
357
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" (Resources"},"index":0}]}
358
+
359
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" Entities"},"index":0}]}
360
+
361
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{},"finish_reason":"stop","index":0}]}
362
+
363
+ data: [DONE]
364
+ ```
365
+
366
+ ## Architecture
367
+
368
+ ### Middleware Ordering
369
+
370
+ Middleware runs in reverse order of addition:
371
+ 1. CORS (added last, runs first) - adds headers to all responses
372
+ 2. Auth middleware - validates authentication
373
+ 3. Logging middleware - logs requests/responses
374
+ 4. Sessions middleware (added first, runs last)
375
+
376
+ ### Stateless MCP Mounting
377
+
378
+ - FastMCP with `stateless_http=True` for Kubernetes compatibility
379
+ - Prevents stale session errors across pod restarts
380
+ - Mount at `/api/v1/mcp` for consistency
381
+ - Path rewrite middleware for trailing slash handling
382
+ - `redirect_slashes=False` prevents auth header stripping
383
+
384
+ ### Context Building Flow
385
+
386
+ 1. ContextBuilder extracts user_id, session_id from headers
387
+ 2. Session history ALWAYS loaded with compression (if session_id provided)
388
+ 3. User profile provided as REM LOOKUP hint (on-demand by default)
389
+ 4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded
390
+ 5. Combines: system context + compressed session history + new messages
391
+ 6. Agent receives complete message list ready for execution
392
+
393
+ ## Error Responses
394
+
395
+ ### 500 - Agent Schema Not Found
396
+
397
+ ```json
398
+ {
399
+ "detail": "Agent schema 'invalid-schema' not found and default schema unavailable"
400
+ }
401
+ ```
402
+
403
+ **Solution**: Use valid schema name or ensure default schema exists in `schemas/agents/rem.yaml`
404
+
405
+ ## Best Practices
406
+
407
+ 1. **Use Session IDs**: Always provide `X-Session-Id` for multi-turn conversations
408
+ 2. **Generate Stable Session IDs**: Use UUIDs or meaningful identifiers
409
+ 3. **Tenant Scoping**: Provide `X-Tenant-Id` for multi-tenant deployments
410
+ 4. **Model Selection**: Choose appropriate model for task complexity
411
+ 5. **Streaming**: Use streaming for long-running responses
412
+ 6. **User Context**: Enable auto-inject only if always needed, otherwise use on-demand
413
+
414
+ ## Related Documentation
415
+
416
+ - [Chat Router](routers/chat/completions.py) - Chat completions implementation
417
+ - [MCP Router](mcp_router/server.py) - MCP server implementation
418
+ - [Agent Schemas](../../schemas/agents/) - Available agent schemas
419
+ - [Session Compression](../../services/session/compression.py) - Compression implementation
420
+ - [Context Builder](../../agentic/context_builder.py) - Context construction logic