remdb 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +801 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.7.dist-info/METADATA +1473 -0
- remdb-0.3.7.dist-info/RECORD +187 -0
- remdb-0.3.7.dist-info/WHEEL +4 -0
- remdb-0.3.7.dist-info/entry_points.txt +2 -0
rem/api/README.md
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
# REM API
|
|
2
|
+
|
|
3
|
+
FastAPI server for REM (Resources Entities Moments) system with OpenAI-compatible chat completions, MCP server, and RESTful endpoints.
|
|
4
|
+
|
|
5
|
+
## Running the API
|
|
6
|
+
|
|
7
|
+
### CLI Command
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Development mode (with auto-reload)
|
|
11
|
+
rem serve
|
|
12
|
+
|
|
13
|
+
# Production mode
|
|
14
|
+
rem serve --host 0.0.0.0 --port 8000 --workers 4
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### CLI Options
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
rem serve --help
|
|
21
|
+
|
|
22
|
+
Options:
|
|
23
|
+
--host TEXT Host to bind to (default: 0.0.0.0)
|
|
24
|
+
--port INTEGER Port to listen on (default: 8000)
|
|
25
|
+
--reload Enable auto-reload for development (default: true)
|
|
26
|
+
--workers INTEGER Number of worker processes (default: 1)
|
|
27
|
+
--log-level TEXT Logging level: debug, info, warning, error (default: info)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Direct Python
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import uvicorn
|
|
34
|
+
from rem.api.main import app
|
|
35
|
+
|
|
36
|
+
uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Environment Variables
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# API Server
|
|
43
|
+
API__HOST=0.0.0.0
|
|
44
|
+
API__PORT=8000
|
|
45
|
+
API__RELOAD=true
|
|
46
|
+
API__WORKERS=1
|
|
47
|
+
API__LOG_LEVEL=info
|
|
48
|
+
|
|
49
|
+
# Chat Settings
|
|
50
|
+
CHAT__AUTO_INJECT_USER_CONTEXT=false # Default: false (use REM LOOKUP hints)
|
|
51
|
+
|
|
52
|
+
# LLM
|
|
53
|
+
LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
|
|
54
|
+
LLM__DEFAULT_TEMPERATURE=0.5
|
|
55
|
+
LLM__ANTHROPIC_API_KEY=sk-ant-...
|
|
56
|
+
LLM__OPENAI_API_KEY=sk-...
|
|
57
|
+
|
|
58
|
+
# PostgreSQL (required for session history)
|
|
59
|
+
POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
|
|
60
|
+
POSTGRES__ENABLED=true
|
|
61
|
+
|
|
62
|
+
# OpenTelemetry (optional)
|
|
63
|
+
OTEL__ENABLED=false
|
|
64
|
+
OTEL__SERVICE_NAME=rem-api
|
|
65
|
+
OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Endpoints
|
|
69
|
+
|
|
70
|
+
### Chat Completions
|
|
71
|
+
|
|
72
|
+
**POST /v1/chat/completions** - OpenAI-compatible chat completions
|
|
73
|
+
|
|
74
|
+
Features:
|
|
75
|
+
- Streaming and non-streaming modes
|
|
76
|
+
- Session history with compression
|
|
77
|
+
- User profile integration via dreaming worker
|
|
78
|
+
- Multiple agent schemas
|
|
79
|
+
- Model override support
|
|
80
|
+
|
|
81
|
+
### MCP Server
|
|
82
|
+
|
|
83
|
+
**Mounted at /api/v1/mcp** - FastMCP server for Model Context Protocol
|
|
84
|
+
|
|
85
|
+
Tools:
|
|
86
|
+
- `ask_rem`: Query REM system using natural language
|
|
87
|
+
- `parse_and_ingest_file`: Ingest files into REM
|
|
88
|
+
- Additional MCP tools for REM operations
|
|
89
|
+
|
|
90
|
+
### Health Check
|
|
91
|
+
|
|
92
|
+
**GET /health** - Health check endpoint
|
|
93
|
+
|
|
94
|
+
## Content Headers
|
|
95
|
+
|
|
96
|
+
REM API uses custom headers to provide context, identify users, and manage sessions.
|
|
97
|
+
|
|
98
|
+
### Header Reference
|
|
99
|
+
|
|
100
|
+
| Header Name | Description | Example Value | Required |
|
|
101
|
+
|-------------|-------------|---------------|----------|
|
|
102
|
+
| `X-User-Id` | User identifier (email, UUID, or username) | `sarah@example.com`, `user-123` | No |
|
|
103
|
+
| `X-Tenant-Id` | Tenant identifier for multi-tenancy | `acme-corp`, `tenant-123` | No |
|
|
104
|
+
| `X-Session-Id` | Session identifier for conversation continuity (must be UUID) | `550e8400-e29b-41d4-a716-446655440000` | No |
|
|
105
|
+
| `X-Agent-Schema` | Agent schema name to use | `rem`, `query-agent` | No |
|
|
106
|
+
| `X-Chat-Is-Audio` | Indicates audio input in chat completions | `true`, `false` | No |
|
|
107
|
+
| `Authorization` | Bearer token for API authentication | `Bearer jwt_token_here` | Yes* |
|
|
108
|
+
|
|
109
|
+
*Required for authenticated endpoints. Not required for public endpoints.
|
|
110
|
+
|
|
111
|
+
## Session Management
|
|
112
|
+
|
|
113
|
+
REM chat API is designed for multi-turn conversations where each request contains a single message.
|
|
114
|
+
|
|
115
|
+
### How Sessions Work
|
|
116
|
+
|
|
117
|
+
1. **First Message**: Client sends message without `X-Session-Id`
|
|
118
|
+
- Server processes message
|
|
119
|
+
- Returns response
|
|
120
|
+
- Client generates session ID for subsequent messages
|
|
121
|
+
|
|
122
|
+
2. **Subsequent Messages**: Client sends message with `X-Session-Id`
|
|
123
|
+
- Server loads compressed session history from database
|
|
124
|
+
- Combines history with new message
|
|
125
|
+
- Agent receives full conversation context
|
|
126
|
+
- New messages saved to database with compression
|
|
127
|
+
|
|
128
|
+
3. **Compression**: Long assistant responses are compressed
|
|
129
|
+
- Short messages (<400 chars): Stored and loaded as-is
|
|
130
|
+
- Long messages (>400 chars): Compressed with REM LOOKUP hints
|
|
131
|
+
- Example: `"Start of response... [Message truncated - REM LOOKUP session-123-msg-1 to recover full content] ...end of response"`
|
|
132
|
+
- Agent can retrieve full content on-demand using REM LOOKUP
|
|
133
|
+
|
|
134
|
+
### Benefits of Compression
|
|
135
|
+
|
|
136
|
+
- Prevents context window bloat
|
|
137
|
+
- Maintains conversation continuity
|
|
138
|
+
- Agent decides what to retrieve
|
|
139
|
+
- More efficient for long conversations
|
|
140
|
+
|
|
141
|
+
## User Profiles and Dreaming
|
|
142
|
+
|
|
143
|
+
The dreaming worker runs periodically to build user models:
|
|
144
|
+
|
|
145
|
+
1. Analyzes user's resources, sessions, and moments
|
|
146
|
+
2. Generates profile with current projects, expertise, interests
|
|
147
|
+
3. Stores profile in User entity (`metadata.profile` and model fields)
|
|
148
|
+
|
|
149
|
+
### User Profile in Chat
|
|
150
|
+
|
|
151
|
+
**On-Demand (Default):**
|
|
152
|
+
- Agent receives hint: `"User ID: sarah@example.com. To load user profile: Use REM LOOKUP users/sarah@example.com"`
|
|
153
|
+
- Agent decides whether to load based on query
|
|
154
|
+
- More efficient for queries that don't need personalization
|
|
155
|
+
|
|
156
|
+
**Auto-Inject (Optional):**
|
|
157
|
+
- Set environment variable: `CHAT__AUTO_INJECT_USER_CONTEXT=true`
|
|
158
|
+
- User profile automatically loaded and injected into system message
|
|
159
|
+
- Simpler for basic chatbots that always need context
|
|
160
|
+
|
|
161
|
+
## Usage Examples
|
|
162
|
+
|
|
163
|
+
**Note on Authentication**: By default, authentication is disabled (`AUTH__ENABLED=false`) for local development and testing. The examples below work without an `Authorization` header. If authentication is enabled in your environment, add: `-H "Authorization: Bearer your_jwt_token"` to cURL requests or `"Authorization": "Bearer your_jwt_token"` to Python headers.
|
|
164
|
+
|
|
165
|
+
### cURL: Simple Chat
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
169
|
+
-H "Content-Type: application/json" \
|
|
170
|
+
-H "X-User-Id: sarah@example.com" \
|
|
171
|
+
-d '{
|
|
172
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
173
|
+
"messages": [
|
|
174
|
+
{"role": "user", "content": "What is REM?"}
|
|
175
|
+
]
|
|
176
|
+
}'
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### cURL: Streaming Chat
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
183
|
+
-H "Content-Type: application/json" \
|
|
184
|
+
-H "X-User-Id: sarah@example.com" \
|
|
185
|
+
-d '{
|
|
186
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
187
|
+
"messages": [
|
|
188
|
+
{"role": "user", "content": "Explain REM architecture"}
|
|
189
|
+
],
|
|
190
|
+
"stream": true
|
|
191
|
+
}'
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### cURL: Multi-Turn Conversation
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
# First message
|
|
198
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
199
|
+
-H "Content-Type: application/json" \
|
|
200
|
+
-H "X-User-Id: sarah@example.com" \
|
|
201
|
+
-H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
|
|
202
|
+
-d '{
|
|
203
|
+
"model": "openai:gpt-4o",
|
|
204
|
+
"messages": [
|
|
205
|
+
{"role": "user", "content": "What are moments in REM?"}
|
|
206
|
+
]
|
|
207
|
+
}'
|
|
208
|
+
|
|
209
|
+
# Second message (session history loaded automatically)
|
|
210
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
211
|
+
-H "Content-Type: application/json" \
|
|
212
|
+
-H "X-User-Id: sarah@example.com" \
|
|
213
|
+
-H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
|
|
214
|
+
-d '{
|
|
215
|
+
"model": "openai:gpt-4o",
|
|
216
|
+
"messages": [
|
|
217
|
+
{"role": "user", "content": "How are they created?"}
|
|
218
|
+
]
|
|
219
|
+
}'
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Python: Multi-Turn Conversation
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
import requests
|
|
226
|
+
import uuid
|
|
227
|
+
|
|
228
|
+
url = "http://localhost:8000/api/v1/chat/completions"
|
|
229
|
+
session_id = str(uuid.uuid4()) # Must be a valid UUID
|
|
230
|
+
|
|
231
|
+
def send_message(content):
|
|
232
|
+
headers = {
|
|
233
|
+
"Content-Type": "application/json",
|
|
234
|
+
"X-User-Id": "sarah@example.com",
|
|
235
|
+
"X-Session-Id": session_id
|
|
236
|
+
}
|
|
237
|
+
data = {
|
|
238
|
+
"model": "openai:gpt-4o",
|
|
239
|
+
"messages": [
|
|
240
|
+
{"role": "user", "content": content}
|
|
241
|
+
]
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
response = requests.post(url, headers=headers, json=data)
|
|
245
|
+
return response.json()["choices"][0]["message"]["content"]
|
|
246
|
+
|
|
247
|
+
# First turn
|
|
248
|
+
response1 = send_message("What are moments in REM?")
|
|
249
|
+
print(f"Assistant: {response1}\n")
|
|
250
|
+
|
|
251
|
+
# Second turn (session history loaded automatically)
|
|
252
|
+
response2 = send_message("How are they created?")
|
|
253
|
+
print(f"Assistant: {response2}\n")
|
|
254
|
+
|
|
255
|
+
# Third turn
|
|
256
|
+
response3 = send_message("Can you give an example?")
|
|
257
|
+
print(f"Assistant: {response3}\n")
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
### Python: Streaming Chat
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
import requests
|
|
264
|
+
import json
|
|
265
|
+
|
|
266
|
+
url = "http://localhost:8000/api/v1/chat/completions"
|
|
267
|
+
headers = {
|
|
268
|
+
"Content-Type": "application/json",
|
|
269
|
+
"X-User-Id": "sarah@example.com"
|
|
270
|
+
}
|
|
271
|
+
data = {
|
|
272
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
273
|
+
"messages": [
|
|
274
|
+
{"role": "user", "content": "Explain REM architecture"}
|
|
275
|
+
],
|
|
276
|
+
"stream": True
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
response = requests.post(url, headers=headers, json=data, stream=True)
|
|
280
|
+
|
|
281
|
+
for line in response.iter_lines():
|
|
282
|
+
if line:
|
|
283
|
+
line_str = line.decode('utf-8')
|
|
284
|
+
if line_str.startswith('data: '):
|
|
285
|
+
data_str = line_str[6:] # Remove 'data: ' prefix
|
|
286
|
+
if data_str != '[DONE]':
|
|
287
|
+
chunk = json.loads(data_str)
|
|
288
|
+
delta = chunk["choices"][0]["delta"]
|
|
289
|
+
if "content" in delta:
|
|
290
|
+
print(delta["content"], end="", flush=True)
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### Python: Audio Input (Voice Chat)
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
import requests
|
|
297
|
+
import base64
|
|
298
|
+
|
|
299
|
+
# Read audio file and encode to base64
|
|
300
|
+
with open("recording.wav", "rb") as audio_file:
|
|
301
|
+
audio_b64 = base64.b64encode(audio_file.read()).decode('utf-8')
|
|
302
|
+
|
|
303
|
+
url = "http://localhost:8000/api/v1/chat/completions"
|
|
304
|
+
headers = {
|
|
305
|
+
"Content-Type": "application/json",
|
|
306
|
+
"X-User-Id": "sarah@example.com",
|
|
307
|
+
"X-Chat-Is-Audio": "true" # Trigger audio transcription
|
|
308
|
+
}
|
|
309
|
+
data = {
|
|
310
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
311
|
+
"messages": [
|
|
312
|
+
{"role": "user", "content": audio_b64} # Base64-encoded WAV audio
|
|
313
|
+
]
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
response = requests.post(url, headers=headers, json=data)
|
|
317
|
+
print(response.json()["choices"][0]["message"]["content"])
|
|
318
|
+
|
|
319
|
+
# Audio is transcribed to text using OpenAI Whisper
|
|
320
|
+
# Then processed as normal text chat
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## Response Format
|
|
324
|
+
|
|
325
|
+
### Non-Streaming Response
|
|
326
|
+
|
|
327
|
+
```json
|
|
328
|
+
{
|
|
329
|
+
"id": "chatcmpl-abc123def456",
|
|
330
|
+
"created": 1732292400,
|
|
331
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
332
|
+
"choices": [
|
|
333
|
+
{
|
|
334
|
+
"index": 0,
|
|
335
|
+
"message": {
|
|
336
|
+
"role": "assistant",
|
|
337
|
+
"content": "REM (Resources Entities Moments) is a bio-inspired memory architecture..."
|
|
338
|
+
},
|
|
339
|
+
"finish_reason": "stop"
|
|
340
|
+
}
|
|
341
|
+
],
|
|
342
|
+
"usage": {
|
|
343
|
+
"prompt_tokens": 150,
|
|
344
|
+
"completion_tokens": 200,
|
|
345
|
+
"total_tokens": 350
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Streaming Response (SSE Format)
|
|
351
|
+
|
|
352
|
+
```
|
|
353
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"role":"assistant","content":""},"index":0}]}
|
|
354
|
+
|
|
355
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":"REM"},"index":0}]}
|
|
356
|
+
|
|
357
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" (Resources"},"index":0}]}
|
|
358
|
+
|
|
359
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" Entities"},"index":0}]}
|
|
360
|
+
|
|
361
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{},"finish_reason":"stop","index":0}]}
|
|
362
|
+
|
|
363
|
+
data: [DONE]
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
## Architecture
|
|
367
|
+
|
|
368
|
+
### Middleware Ordering
|
|
369
|
+
|
|
370
|
+
Middleware runs in reverse order of addition:
|
|
371
|
+
1. CORS (added last, runs first) - adds headers to all responses
|
|
372
|
+
2. Auth middleware - validates authentication
|
|
373
|
+
3. Logging middleware - logs requests/responses
|
|
374
|
+
4. Sessions middleware (added first, runs last)
|
|
375
|
+
|
|
376
|
+
### Stateless MCP Mounting
|
|
377
|
+
|
|
378
|
+
- FastMCP with `stateless_http=True` for Kubernetes compatibility
|
|
379
|
+
- Prevents stale session errors across pod restarts
|
|
380
|
+
- Mount at `/api/v1/mcp` for consistency
|
|
381
|
+
- Path rewrite middleware for trailing slash handling
|
|
382
|
+
- `redirect_slashes=False` prevents auth header stripping
|
|
383
|
+
|
|
384
|
+
### Context Building Flow
|
|
385
|
+
|
|
386
|
+
1. ContextBuilder extracts user_id, session_id from headers
|
|
387
|
+
2. Session history ALWAYS loaded with compression (if session_id provided)
|
|
388
|
+
3. User profile provided as REM LOOKUP hint (on-demand by default)
|
|
389
|
+
4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded
|
|
390
|
+
5. Combines: system context + compressed session history + new messages
|
|
391
|
+
6. Agent receives complete message list ready for execution
|
|
392
|
+
|
|
393
|
+
## Error Responses
|
|
394
|
+
|
|
395
|
+
### 500 - Agent Schema Not Found
|
|
396
|
+
|
|
397
|
+
```json
|
|
398
|
+
{
|
|
399
|
+
"detail": "Agent schema 'invalid-schema' not found and default schema unavailable"
|
|
400
|
+
}
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
**Solution**: Use valid schema name or ensure default schema exists in `schemas/agents/rem.yaml`
|
|
404
|
+
|
|
405
|
+
## Best Practices
|
|
406
|
+
|
|
407
|
+
1. **Use Session IDs**: Always provide `X-Session-Id` for multi-turn conversations
|
|
408
|
+
2. **Generate Stable Session IDs**: Use UUIDs or meaningful identifiers
|
|
409
|
+
3. **Tenant Scoping**: Provide `X-Tenant-Id` for multi-tenant deployments
|
|
410
|
+
4. **Model Selection**: Choose appropriate model for task complexity
|
|
411
|
+
5. **Streaming**: Use streaming for long-running responses
|
|
412
|
+
6. **User Context**: Enable auto-inject only if always needed, otherwise use on-demand
|
|
413
|
+
|
|
414
|
+
## Related Documentation
|
|
415
|
+
|
|
416
|
+
- [Chat Router](routers/chat/completions.py) - Chat completions implementation
|
|
417
|
+
- [MCP Router](mcp_router/server.py) - MCP server implementation
|
|
418
|
+
- [Agent Schemas](../../schemas/agents/) - Available agent schemas
|
|
419
|
+
- [Session Compression](../../services/session/compression.py) - Compression implementation
|
|
420
|
+
- [Context Builder](../../agentic/context_builder.py) - Context construction logic
|