remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/api/README.md ADDED
@@ -0,0 +1,657 @@
1
+ # REM API
2
+
3
+ FastAPI server for REM (Resources Entities Moments) system with OpenAI-compatible chat completions, MCP server, and RESTful endpoints.
4
+
5
+ ## Running the API
6
+
7
+ ### CLI Command
8
+
9
+ ```bash
10
+ # Development mode (with auto-reload)
11
+ rem serve
12
+
13
+ # Production mode
14
+ rem serve --host 0.0.0.0 --port 8000 --workers 4
15
+ ```
16
+
17
+ ### CLI Options
18
+
19
+ ```bash
20
+ rem serve --help
21
+
22
+ Options:
23
+ --host TEXT Host to bind to (default: 0.0.0.0)
24
+ --port INTEGER Port to listen on (default: 8000)
25
+ --reload Enable auto-reload for development (default: true)
26
+ --workers INTEGER Number of worker processes (default: 1)
27
+ --log-level TEXT Logging level: debug, info, warning, error (default: info)
28
+ ```
29
+
30
+ ### Direct Python
31
+
32
+ ```python
33
+ import uvicorn
34
+ from rem.api.main import app
35
+
36
+ uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
37
+ ```
38
+
39
+ ### Environment Variables
40
+
41
+ ```bash
42
+ # API Server
43
+ API__HOST=0.0.0.0
44
+ API__PORT=8000
45
+ API__RELOAD=true
46
+ API__WORKERS=1
47
+ API__LOG_LEVEL=info
48
+
49
+ # Chat Settings
50
+ CHAT__AUTO_INJECT_USER_CONTEXT=false # Default: false (use REM LOOKUP hints)
51
+
52
+ # LLM
53
+ LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
54
+ LLM__DEFAULT_TEMPERATURE=0.5
55
+ LLM__ANTHROPIC_API_KEY=sk-ant-...
56
+ LLM__OPENAI_API_KEY=sk-...
57
+
58
+ # PostgreSQL (required for session history)
59
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
60
+ POSTGRES__ENABLED=true
61
+
62
+ # OpenTelemetry (optional)
63
+ OTEL__ENABLED=false
64
+ OTEL__SERVICE_NAME=rem-api
65
+ OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
66
+ ```
67
+
68
+ ## Endpoints
69
+
70
+ ### Chat Completions
71
+
72
+ **POST /v1/chat/completions** - OpenAI-compatible chat completions
73
+
74
+ Features:
75
+ - Streaming and non-streaming modes
76
+ - Session history with compression
77
+ - User profile integration via dreaming worker
78
+ - Multiple agent schemas
79
+ - Model override support
80
+
81
+ ### MCP Server
82
+
83
+ **Mounted at /api/v1/mcp** - FastMCP server for Model Context Protocol
84
+
85
+ Tools:
86
+ - `ask_rem`: Query REM system using natural language
87
+ - `parse_and_ingest_file`: Ingest files into REM
88
+ - Additional MCP tools for REM operations
89
+
90
+ ### Health Check
91
+
92
+ **GET /health** - Health check endpoint
93
+
94
+ ## Content Headers
95
+
96
+ REM API uses custom headers to provide context, identify users, and manage sessions.
97
+
98
+ ### Header Reference
99
+
100
+ | Header Name | Description | Example Value | Required |
101
+ |-------------|-------------|---------------|----------|
102
+ | `X-User-Id` | User identifier (email, UUID, or username) | `sarah@example.com`, `user-123` | No |
103
+ | `X-Tenant-Id` | Tenant identifier for multi-tenancy | `acme-corp`, `tenant-123` | No |
104
+ | `X-Session-Id` | Session identifier for conversation continuity (must be UUID) | `550e8400-e29b-41d4-a716-446655440000` | No |
105
+ | `X-Agent-Schema` | Agent schema name to use | `rem`, `query-agent` | No |
106
+ | `X-Chat-Is-Audio` | Indicates audio input in chat completions | `true`, `false` | No |
107
+ | `Authorization` | Bearer token for API authentication | `Bearer jwt_token_here` | Yes* |
108
+
109
+ *Required for authenticated endpoints. Not required for public endpoints.
110
+
111
+ ## Session Management
112
+
113
+ REM chat API is designed for multi-turn conversations where each request contains a single message.
114
+
115
+ ### How Sessions Work
116
+
117
+ 1. **First Message**: Client sends message without `X-Session-Id`
118
+ - Server processes message
119
+ - Returns response
120
+ - Client generates session ID for subsequent messages
121
+
122
+ 2. **Subsequent Messages**: Client sends message with `X-Session-Id`
123
+ - Server loads compressed session history from database
124
+ - Combines history with new message
125
+ - Agent receives full conversation context
126
+ - New messages saved to database with compression
127
+
128
+ 3. **Compression**: Long assistant responses are compressed
129
+ - Short messages (<400 chars): Stored and loaded as-is
130
+ - Long messages (>400 chars): Compressed with REM LOOKUP hints
131
+ - Example: `"Start of response... [Message truncated - REM LOOKUP session-123-msg-1 to recover full content] ...end of response"`
132
+ - Agent can retrieve full content on-demand using REM LOOKUP
133
+
134
+ ### Benefits of Compression
135
+
136
+ - Prevents context window bloat
137
+ - Maintains conversation continuity
138
+ - Agent decides what to retrieve
139
+ - More efficient for long conversations
140
+
141
+ ## User Profiles and Dreaming
142
+
143
+ The dreaming worker runs periodically to build user models:
144
+
145
+ 1. Analyzes user's resources, sessions, and moments
146
+ 2. Generates profile with current projects, expertise, interests
147
+ 3. Stores profile in User entity (`metadata.profile` and model fields)
148
+
149
+ ### User Profile in Chat
150
+
151
+ **On-Demand (Default):**
152
+ - Agent receives hint: `"User ID: sarah@example.com. To load user profile: Use REM LOOKUP users/sarah@example.com"`
153
+ - Agent decides whether to load based on query
154
+ - More efficient for queries that don't need personalization
155
+
156
+ **Auto-Inject (Optional):**
157
+ - Set environment variable: `CHAT__AUTO_INJECT_USER_CONTEXT=true`
158
+ - User profile automatically loaded and injected into system message
159
+ - Simpler for basic chatbots that always need context
160
+
161
+ ## Authentication
162
+
163
+ ### Production Authentication
164
+
165
+ When `AUTH__ENABLED=true`, users authenticate via OAuth (Google or Microsoft). The OAuth flow:
166
+
167
+ 1. User visits `/api/auth/google/login` or `/api/auth/microsoft/login`
168
+ 2. User authenticates with provider
169
+ 3. Callback stores user in session cookie
170
+ 4. Subsequent requests use session cookie
171
+
172
+ ### Development Token (Non-Production Only)
173
+
174
+ For local development and testing, you can use a dev token instead of OAuth. This endpoint is available at `/api/dev/token` whenever `ENVIRONMENT != "production"`, regardless of whether auth is enabled.
175
+
176
+ **Get Token:**
177
+ ```bash
178
+ curl http://localhost:8000/api/dev/token
179
+ ```
180
+
181
+ **Response:**
182
+ ```json
183
+ {
184
+ "token": "dev_89737a19376332bfd9a4a06db8b79fd1",
185
+ "type": "Bearer",
186
+ "user": {
187
+ "id": "test-user",
188
+ "email": "test@rem.local",
189
+ "name": "Test User"
190
+ },
191
+ "usage": "curl -H \"Authorization: Bearer dev_...\" http://localhost:8000/api/v1/...",
192
+ "warning": "This token is for development/testing only and will not work in production."
193
+ }
194
+ ```
195
+
196
+ **Use Token:**
197
+ ```bash
198
+ # Get the token
199
+ TOKEN=$(curl -s http://localhost:8000/api/dev/token | jq -r .token)
200
+
201
+ # Use it in requests
202
+ curl -H "Authorization: Bearer $TOKEN" \
203
+ -H "X-Tenant-Id: default" \
204
+ http://localhost:8000/api/v1/shared-with-me
205
+ ```
206
+
207
+ **Security Notes:**
208
+ - Only available when `ENVIRONMENT != "production"`
209
+ - Token is HMAC-signed using session secret
210
+ - Authenticates as `test-user` with `pro` tier and `admin` role
211
+ - Token is deterministic per environment (same secret = same token)
212
+
213
+ ### Anonymous Access
214
+
215
+ When `AUTH__ALLOW_ANONYMOUS=true` (default in development):
216
+ - Requests without authentication are allowed
217
+ - Anonymous users get rate-limited access
218
+ - MCP endpoints still require auth unless `AUTH__MCP_REQUIRES_AUTH=false`
219
+
220
+ ## Usage Examples
221
+
222
+ **Note on Authentication**: By default, authentication is disabled (`AUTH__ENABLED=false`) for local development and testing. The examples below work without an `Authorization` header. If authentication is enabled, use either:
223
+ - **Dev token**: `-H "Authorization: Bearer $(curl -s http://localhost:8000/api/dev/token | jq -r .token)"`
224
+ - **Session cookie**: Login via OAuth first, then use cookies
225
+
226
+ ### cURL: Simple Chat
227
+
228
+ ```bash
229
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
230
+ -H "Content-Type: application/json" \
231
+ -H "X-User-Id: sarah@example.com" \
232
+ -d '{
233
+ "model": "anthropic:claude-sonnet-4-5-20250929",
234
+ "messages": [
235
+ {"role": "user", "content": "What is REM?"}
236
+ ]
237
+ }'
238
+ ```
239
+
240
+ ### cURL: Streaming Chat
241
+
242
+ ```bash
243
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
244
+ -H "Content-Type: application/json" \
245
+ -H "X-User-Id: sarah@example.com" \
246
+ -d '{
247
+ "model": "anthropic:claude-sonnet-4-5-20250929",
248
+ "messages": [
249
+ {"role": "user", "content": "Explain REM architecture"}
250
+ ],
251
+ "stream": true
252
+ }'
253
+ ```
254
+
255
+ ### cURL: Multi-Turn Conversation
256
+
257
+ ```bash
258
+ # First message
259
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
260
+ -H "Content-Type: application/json" \
261
+ -H "X-User-Id: sarah@example.com" \
262
+ -H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
263
+ -d '{
264
+ "model": "openai:gpt-4o",
265
+ "messages": [
266
+ {"role": "user", "content": "What are moments in REM?"}
267
+ ]
268
+ }'
269
+
270
+ # Second message (session history loaded automatically)
271
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
272
+ -H "Content-Type: application/json" \
273
+ -H "X-User-Id: sarah@example.com" \
274
+ -H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
275
+ -d '{
276
+ "model": "openai:gpt-4o",
277
+ "messages": [
278
+ {"role": "user", "content": "How are they created?"}
279
+ ]
280
+ }'
281
+ ```
282
+
283
+ ### Python: Multi-Turn Conversation
284
+
285
+ ```python
286
+ import requests
287
+ import uuid
288
+
289
+ url = "http://localhost:8000/api/v1/chat/completions"
290
+ session_id = str(uuid.uuid4()) # Must be a valid UUID
291
+
292
+ def send_message(content):
293
+ headers = {
294
+ "Content-Type": "application/json",
295
+ "X-User-Id": "sarah@example.com",
296
+ "X-Session-Id": session_id
297
+ }
298
+ data = {
299
+ "model": "openai:gpt-4o",
300
+ "messages": [
301
+ {"role": "user", "content": content}
302
+ ]
303
+ }
304
+
305
+ response = requests.post(url, headers=headers, json=data)
306
+ return response.json()["choices"][0]["message"]["content"]
307
+
308
+ # First turn
309
+ response1 = send_message("What are moments in REM?")
310
+ print(f"Assistant: {response1}\n")
311
+
312
+ # Second turn (session history loaded automatically)
313
+ response2 = send_message("How are they created?")
314
+ print(f"Assistant: {response2}\n")
315
+
316
+ # Third turn
317
+ response3 = send_message("Can you give an example?")
318
+ print(f"Assistant: {response3}\n")
319
+ ```
320
+
321
+ ### Python: Streaming Chat
322
+
323
+ ```python
324
+ import requests
325
+ import json
326
+
327
+ url = "http://localhost:8000/api/v1/chat/completions"
328
+ headers = {
329
+ "Content-Type": "application/json",
330
+ "X-User-Id": "sarah@example.com"
331
+ }
332
+ data = {
333
+ "model": "anthropic:claude-sonnet-4-5-20250929",
334
+ "messages": [
335
+ {"role": "user", "content": "Explain REM architecture"}
336
+ ],
337
+ "stream": True
338
+ }
339
+
340
+ response = requests.post(url, headers=headers, json=data, stream=True)
341
+
342
+ for line in response.iter_lines():
343
+ if line:
344
+ line_str = line.decode('utf-8')
345
+ if line_str.startswith('data: '):
346
+ data_str = line_str[6:] # Remove 'data: ' prefix
347
+ if data_str != '[DONE]':
348
+ chunk = json.loads(data_str)
349
+ delta = chunk["choices"][0]["delta"]
350
+ if "content" in delta:
351
+ print(delta["content"], end="", flush=True)
352
+ ```
353
+
354
+ ### Python: Audio Input (Voice Chat)
355
+
356
+ ```python
357
+ import requests
358
+ import base64
359
+
360
+ # Read audio file and encode to base64
361
+ with open("recording.wav", "rb") as audio_file:
362
+ audio_b64 = base64.b64encode(audio_file.read()).decode('utf-8')
363
+
364
+ url = "http://localhost:8000/api/v1/chat/completions"
365
+ headers = {
366
+ "Content-Type": "application/json",
367
+ "X-User-Id": "sarah@example.com",
368
+ "X-Chat-Is-Audio": "true" # Trigger audio transcription
369
+ }
370
+ data = {
371
+ "model": "anthropic:claude-sonnet-4-5-20250929",
372
+ "messages": [
373
+ {"role": "user", "content": audio_b64} # Base64-encoded WAV audio
374
+ ]
375
+ }
376
+
377
+ response = requests.post(url, headers=headers, json=data)
378
+ print(response.json()["choices"][0]["message"]["content"])
379
+
380
+ # Audio is transcribed to text using OpenAI Whisper
381
+ # Then processed as normal text chat
382
+ ```
383
+
384
+ ## Response Format
385
+
386
+ ### Non-Streaming Response
387
+
388
+ ```json
389
+ {
390
+ "id": "chatcmpl-abc123def456",
391
+ "created": 1732292400,
392
+ "model": "anthropic:claude-sonnet-4-5-20250929",
393
+ "choices": [
394
+ {
395
+ "index": 0,
396
+ "message": {
397
+ "role": "assistant",
398
+ "content": "REM (Resources Entities Moments) is a bio-inspired memory architecture..."
399
+ },
400
+ "finish_reason": "stop"
401
+ }
402
+ ],
403
+ "usage": {
404
+ "prompt_tokens": 150,
405
+ "completion_tokens": 200,
406
+ "total_tokens": 350
407
+ }
408
+ }
409
+ ```
410
+
411
+ ### Streaming Response (SSE Format)
412
+
413
+ ```
414
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"role":"assistant","content":""},"index":0}]}
415
+
416
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":"REM"},"index":0}]}
417
+
418
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" (Resources"},"index":0}]}
419
+
420
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" Entities"},"index":0}]}
421
+
422
+ data: {"id":"chatcmpl-abc123","choices":[{"delta":{},"finish_reason":"stop","index":0}]}
423
+
424
+ data: [DONE]
425
+ ```
426
+
427
+ ## Extended SSE Event Protocol
428
+
429
+ REM uses OpenAI-compatible format for text content streaming, plus custom named SSE events for rich UI interactions.
430
+
431
+ ### Event Types
432
+
433
+ | Event Type | Format | Purpose | UI Display |
434
+ |------------|--------|---------|------------|
435
+ | (text content) | `data:` (OpenAI format) | Content chunks | Main response area |
436
+ | `reasoning` | `event:` | Model thinking | Collapsible "thinking" section |
437
+ | `progress` | `event:` | Step indicators | Progress bar/stepper |
438
+ | `tool_call` | `event:` | Tool invocations | Tool status panel |
439
+ | `action_request` | `event:` | User input solicitation | Buttons, forms, modals |
440
+ | `metadata` | `event:` | System info | Hidden or badge display |
441
+ | `error` | `event:` | Error notification | Error toast/alert |
442
+ | `done` | `event:` | Stream completion | Cleanup signal |
443
+
444
+ ### Event Format
445
+
446
+ **Text content (OpenAI-compatible `data:` format):**
447
+ ```
448
+ data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1732748123,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello "},"finish_reason":null}]}
449
+
450
+ data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1732748123,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"world!"},"finish_reason":null}]}
451
+
452
+ data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1732748123,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
453
+
454
+ data: [DONE]
455
+ ```
456
+
457
+ **Named events (use `event:` prefix):**
458
+ ```
459
+ event: reasoning
460
+ data: {"type": "reasoning", "content": "Analyzing the request...", "step": 1}
461
+
462
+ event: progress
463
+ data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Searching", "status": "in_progress"}
464
+
465
+ event: tool_call
466
+ data: {"type": "tool_call", "tool_name": "search_rem", "status": "started", "arguments": {"query": "..."}}
467
+
468
+ event: action_request
469
+ data: {"type": "action_request", "card": {"id": "feedback-1", "prompt": "Was this helpful?", "actions": [...]}}
470
+
471
+ event: metadata
472
+ data: {"type": "metadata", "confidence": 0.95, "sources": ["doc1.md"], "hidden": false}
473
+
474
+ event: done
475
+ data: {"type": "done", "reason": "stop"}
476
+ ```
477
+
478
+ ### Action Request Cards (Adaptive Cards-inspired)
479
+
480
+ Action requests solicit user input using a schema inspired by [Microsoft Adaptive Cards](https://adaptivecards.io/):
481
+
482
+ ```json
483
+ {
484
+ "type": "action_request",
485
+ "card": {
486
+ "id": "confirm-delete-123",
487
+ "prompt": "Are you sure you want to delete this item?",
488
+ "display_style": "modal",
489
+ "actions": [
490
+ {
491
+ "type": "Action.Submit",
492
+ "id": "confirm",
493
+ "title": "Delete",
494
+ "style": "destructive",
495
+ "data": {"action": "delete", "item_id": "123"}
496
+ },
497
+ {
498
+ "type": "Action.Submit",
499
+ "id": "cancel",
500
+ "title": "Cancel",
501
+ "style": "secondary",
502
+ "data": {"action": "cancel"}
503
+ }
504
+ ],
505
+ "inputs": [
506
+ {
507
+ "type": "Input.Text",
508
+ "id": "reason",
509
+ "label": "Reason (optional)",
510
+ "placeholder": "Why are you deleting this?"
511
+ }
512
+ ],
513
+ "timeout_ms": 30000
514
+ }
515
+ }
516
+ ```
517
+
518
+ **Action Types:**
519
+ - `Action.Submit` - Send data to server
520
+ - `Action.OpenUrl` - Navigate to URL
521
+ - `Action.ShowCard` - Reveal nested content
522
+
523
+ **Input Types:**
524
+ - `Input.Text` - Text field (single or multiline)
525
+ - `Input.ChoiceSet` - Dropdown/radio selection
526
+ - `Input.Toggle` - Checkbox/toggle
527
+
528
+ ### SSE Simulator Endpoint
529
+
530
+ For frontend development and testing, use the simulator which generates all event types without LLM costs:
531
+
532
+ ```bash
533
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
534
+ -H "Content-Type: application/json" \
535
+ -H "X-Agent-Schema: simulator" \
536
+ -d '{"messages": [{"role": "user", "content": "demo"}], "stream": true}'
537
+ ```
538
+
539
+ The simulator produces a scripted sequence demonstrating:
540
+ 1. Reasoning events (4 steps)
541
+ 2. Progress indicators
542
+ 3. Simulated tool calls
543
+ 4. Rich markdown content
544
+ 5. Metadata with confidence
545
+ 6. Action request for feedback
546
+
547
+ See `rem/agentic/agents/sse_simulator.py` for implementation details.
548
+
549
+ ### Frontend Integration
550
+
551
+ ```typescript
552
+ // Parse SSE events in React/TypeScript
553
+ const eventSource = new EventSource('/api/v1/chat/completions');
554
+
555
+ eventSource.onmessage = (e) => {
556
+ // Default handler for data-only events (text_delta)
557
+ const event = JSON.parse(e.data);
558
+ if (event.type === 'text_delta') {
559
+ appendContent(event.content);
560
+ }
561
+ };
562
+
563
+ eventSource.addEventListener('reasoning', (e) => {
564
+ const event = JSON.parse(e.data);
565
+ appendReasoning(event.content);
566
+ });
567
+
568
+ eventSource.addEventListener('action_request', (e) => {
569
+ const event = JSON.parse(e.data);
570
+ showActionCard(event.card);
571
+ });
572
+
573
+ eventSource.addEventListener('done', () => {
574
+ eventSource.close();
575
+ });
576
+ ```
577
+
578
+ ## Architecture
579
+
580
+ ### Middleware Ordering
581
+
582
+ Middleware runs in reverse order of addition:
583
+ 1. CORS (added last, runs first) - adds headers to all responses
584
+ 2. Auth middleware - validates authentication
585
+ 3. Logging middleware - logs requests/responses
586
+ 4. Sessions middleware (added first, runs last)
587
+
588
+ ### Stateless MCP Mounting
589
+
590
+ - FastMCP with `stateless_http=True` for Kubernetes compatibility
591
+ - Prevents stale session errors across pod restarts
592
+ - Mount at `/api/v1/mcp` for consistency
593
+ - Path rewrite middleware for trailing slash handling
594
+ - `redirect_slashes=False` prevents auth header stripping
595
+
596
+ ### Context Building Flow
597
+
598
+ 1. ContextBuilder extracts user_id, session_id from headers
599
+ 2. Session history ALWAYS loaded with compression (if session_id provided)
600
+ 3. User profile provided as REM LOOKUP hint (on-demand by default)
601
+ 4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded
602
+ 5. Combines: system context + compressed session history + new messages
603
+ 6. Agent receives complete message list ready for execution
604
+
605
+ ## Error Responses
606
+
607
+ ### 429 - Rate Limit Exceeded
608
+
609
+ When a user exceeds their rate limit (based on their tier), the API returns a 429 status code with a structured error body. The frontend should intercept this error to prompt the user to sign in or upgrade.
610
+
611
+ ```json
612
+ {
613
+ "error": {
614
+ "code": "rate_limit_exceeded",
615
+ "message": "You have exceeded your rate limit. Please sign in or upgrade to continue.",
616
+ "details": {
617
+ "limit": 50,
618
+ "tier": "anonymous",
619
+ "retry_after": 60
620
+ }
621
+ }
622
+ }
623
+ ```
624
+
625
+ **Handling Strategy:**
626
+ 1. **Intercept 429s:** API client should listen for `status === 429`.
627
+ 2. **Check Code:** If `error.code === 'rate_limit_exceeded'` AND `error.details.tier === 'anonymous'`, trigger "Login / Sign Up" flow.
628
+ 3. **Authenticated Users:** If `tier !== 'anonymous'`, prompt to upgrade plan.
629
+
630
+ ### 500 - Agent Schema Not Found
631
+
632
+ ```json
633
+ {
634
+ "detail": "Agent schema 'invalid-schema' not found and default schema unavailable"
635
+ }
636
+ ```
637
+
638
+ **Solution**: Use valid schema name or ensure default schema exists in `schemas/agents/rem.yaml`
639
+
640
+ ## Best Practices
641
+
642
+ 1. **Use Session IDs**: Always provide `X-Session-Id` for multi-turn conversations
643
+ 2. **Generate Stable Session IDs**: Use UUIDs or meaningful identifiers
644
+ 3. **Tenant Scoping**: Provide `X-Tenant-Id` for multi-tenant deployments
645
+ 4. **Model Selection**: Choose appropriate model for task complexity
646
+ 5. **Streaming**: Use streaming for long-running responses
647
+ 6. **User Context**: Enable auto-inject only if always needed, otherwise use on-demand
648
+
649
+ ## Related Documentation
650
+
651
+ - [Chat Router](routers/chat/completions.py) - Chat completions implementation
652
+ - [SSE Events](routers/chat/sse_events.py) - SSE event type definitions
653
+ - [SSE Simulator](../../agentic/agents/sse_simulator.py) - Event simulator for testing
654
+ - [MCP Router](mcp_router/server.py) - MCP server implementation
655
+ - [Agent Schemas](../../schemas/agents/) - Available agent schemas
656
+ - [Session Compression](../../services/session/compression.py) - Compression implementation
657
+ - [Context Builder](../../agentic/context_builder.py) - Context construction logic