remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/api/README.md
ADDED
|
@@ -0,0 +1,657 @@
|
|
|
1
|
+
# REM API
|
|
2
|
+
|
|
3
|
+
FastAPI server for REM (Resources Entities Moments) system with OpenAI-compatible chat completions, MCP server, and RESTful endpoints.
|
|
4
|
+
|
|
5
|
+
## Running the API
|
|
6
|
+
|
|
7
|
+
### CLI Command
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Development mode (with auto-reload)
|
|
11
|
+
rem serve
|
|
12
|
+
|
|
13
|
+
# Production mode
|
|
14
|
+
rem serve --host 0.0.0.0 --port 8000 --workers 4
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### CLI Options
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
rem serve --help
|
|
21
|
+
|
|
22
|
+
Options:
|
|
23
|
+
--host TEXT Host to bind to (default: 0.0.0.0)
|
|
24
|
+
--port INTEGER Port to listen on (default: 8000)
|
|
25
|
+
--reload Enable auto-reload for development (default: true)
|
|
26
|
+
--workers INTEGER Number of worker processes (default: 1)
|
|
27
|
+
--log-level TEXT Logging level: debug, info, warning, error (default: info)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Direct Python
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import uvicorn
|
|
34
|
+
from rem.api.main import app
|
|
35
|
+
|
|
36
|
+
uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Environment Variables
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# API Server
|
|
43
|
+
API__HOST=0.0.0.0
|
|
44
|
+
API__PORT=8000
|
|
45
|
+
API__RELOAD=true
|
|
46
|
+
API__WORKERS=1
|
|
47
|
+
API__LOG_LEVEL=info
|
|
48
|
+
|
|
49
|
+
# Chat Settings
|
|
50
|
+
CHAT__AUTO_INJECT_USER_CONTEXT=false # Default: false (use REM LOOKUP hints)
|
|
51
|
+
|
|
52
|
+
# LLM
|
|
53
|
+
LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
|
|
54
|
+
LLM__DEFAULT_TEMPERATURE=0.5
|
|
55
|
+
LLM__ANTHROPIC_API_KEY=sk-ant-...
|
|
56
|
+
LLM__OPENAI_API_KEY=sk-...
|
|
57
|
+
|
|
58
|
+
# PostgreSQL (required for session history)
|
|
59
|
+
POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
|
|
60
|
+
POSTGRES__ENABLED=true
|
|
61
|
+
|
|
62
|
+
# OpenTelemetry (optional)
|
|
63
|
+
OTEL__ENABLED=false
|
|
64
|
+
OTEL__SERVICE_NAME=rem-api
|
|
65
|
+
OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Endpoints
|
|
69
|
+
|
|
70
|
+
### Chat Completions
|
|
71
|
+
|
|
72
|
+
**POST /v1/chat/completions** - OpenAI-compatible chat completions
|
|
73
|
+
|
|
74
|
+
Features:
|
|
75
|
+
- Streaming and non-streaming modes
|
|
76
|
+
- Session history with compression
|
|
77
|
+
- User profile integration via dreaming worker
|
|
78
|
+
- Multiple agent schemas
|
|
79
|
+
- Model override support
|
|
80
|
+
|
|
81
|
+
### MCP Server
|
|
82
|
+
|
|
83
|
+
**Mounted at /api/v1/mcp** - FastMCP server for Model Context Protocol
|
|
84
|
+
|
|
85
|
+
Tools:
|
|
86
|
+
- `ask_rem`: Query REM system using natural language
|
|
87
|
+
- `parse_and_ingest_file`: Ingest files into REM
|
|
88
|
+
- Additional MCP tools for REM operations
|
|
89
|
+
|
|
90
|
+
### Health Check
|
|
91
|
+
|
|
92
|
+
**GET /health** - Health check endpoint
|
|
93
|
+
|
|
94
|
+
## Content Headers
|
|
95
|
+
|
|
96
|
+
REM API uses custom headers to provide context, identify users, and manage sessions.
|
|
97
|
+
|
|
98
|
+
### Header Reference
|
|
99
|
+
|
|
100
|
+
| Header Name | Description | Example Value | Required |
|
|
101
|
+
|-------------|-------------|---------------|----------|
|
|
102
|
+
| `X-User-Id` | User identifier (email, UUID, or username) | `sarah@example.com`, `user-123` | No |
|
|
103
|
+
| `X-Tenant-Id` | Tenant identifier for multi-tenancy | `acme-corp`, `tenant-123` | No |
|
|
104
|
+
| `X-Session-Id` | Session identifier for conversation continuity (must be UUID) | `550e8400-e29b-41d4-a716-446655440000` | No |
|
|
105
|
+
| `X-Agent-Schema` | Agent schema name to use | `rem`, `query-agent` | No |
|
|
106
|
+
| `X-Chat-Is-Audio` | Indicates audio input in chat completions | `true`, `false` | No |
|
|
107
|
+
| `Authorization` | Bearer token for API authentication | `Bearer jwt_token_here` | Yes* |
|
|
108
|
+
|
|
109
|
+
*Required for authenticated endpoints. Not required for public endpoints.
|
|
110
|
+
|
|
111
|
+
## Session Management
|
|
112
|
+
|
|
113
|
+
REM chat API is designed for multi-turn conversations where each request contains a single message.
|
|
114
|
+
|
|
115
|
+
### How Sessions Work
|
|
116
|
+
|
|
117
|
+
1. **First Message**: Client sends message without `X-Session-Id`
|
|
118
|
+
- Server processes message
|
|
119
|
+
- Returns response
|
|
120
|
+
- Client generates session ID for subsequent messages
|
|
121
|
+
|
|
122
|
+
2. **Subsequent Messages**: Client sends message with `X-Session-Id`
|
|
123
|
+
- Server loads compressed session history from database
|
|
124
|
+
- Combines history with new message
|
|
125
|
+
- Agent receives full conversation context
|
|
126
|
+
- New messages saved to database with compression
|
|
127
|
+
|
|
128
|
+
3. **Compression**: Long assistant responses are compressed
|
|
129
|
+
- Short messages (<400 chars): Stored and loaded as-is
|
|
130
|
+
- Long messages (>400 chars): Compressed with REM LOOKUP hints
|
|
131
|
+
- Example: `"Start of response... [Message truncated - REM LOOKUP session-123-msg-1 to recover full content] ...end of response"`
|
|
132
|
+
- Agent can retrieve full content on-demand using REM LOOKUP
|
|
133
|
+
|
|
134
|
+
### Benefits of Compression
|
|
135
|
+
|
|
136
|
+
- Prevents context window bloat
|
|
137
|
+
- Maintains conversation continuity
|
|
138
|
+
- Agent decides what to retrieve
|
|
139
|
+
- More efficient for long conversations
|
|
140
|
+
|
|
141
|
+
## User Profiles and Dreaming
|
|
142
|
+
|
|
143
|
+
The dreaming worker runs periodically to build user models:
|
|
144
|
+
|
|
145
|
+
1. Analyzes user's resources, sessions, and moments
|
|
146
|
+
2. Generates profile with current projects, expertise, interests
|
|
147
|
+
3. Stores profile in User entity (`metadata.profile` and model fields)
|
|
148
|
+
|
|
149
|
+
### User Profile in Chat
|
|
150
|
+
|
|
151
|
+
**On-Demand (Default):**
|
|
152
|
+
- Agent receives hint: `"User ID: sarah@example.com. To load user profile: Use REM LOOKUP users/sarah@example.com"`
|
|
153
|
+
- Agent decides whether to load based on query
|
|
154
|
+
- More efficient for queries that don't need personalization
|
|
155
|
+
|
|
156
|
+
**Auto-Inject (Optional):**
|
|
157
|
+
- Set environment variable: `CHAT__AUTO_INJECT_USER_CONTEXT=true`
|
|
158
|
+
- User profile automatically loaded and injected into system message
|
|
159
|
+
- Simpler for basic chatbots that always need context
|
|
160
|
+
|
|
161
|
+
## Authentication
|
|
162
|
+
|
|
163
|
+
### Production Authentication
|
|
164
|
+
|
|
165
|
+
When `AUTH__ENABLED=true`, users authenticate via OAuth (Google or Microsoft). The OAuth flow:
|
|
166
|
+
|
|
167
|
+
1. User visits `/api/auth/google/login` or `/api/auth/microsoft/login`
|
|
168
|
+
2. User authenticates with provider
|
|
169
|
+
3. Callback stores user in session cookie
|
|
170
|
+
4. Subsequent requests use session cookie
|
|
171
|
+
|
|
172
|
+
### Development Token (Non-Production Only)
|
|
173
|
+
|
|
174
|
+
For local development and testing, you can use a dev token instead of OAuth. This endpoint is available at `/api/dev/token` whenever `ENVIRONMENT != "production"`, regardless of whether auth is enabled.
|
|
175
|
+
|
|
176
|
+
**Get Token:**
|
|
177
|
+
```bash
|
|
178
|
+
curl http://localhost:8000/api/dev/token
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
**Response:**
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"token": "dev_89737a19376332bfd9a4a06db8b79fd1",
|
|
185
|
+
"type": "Bearer",
|
|
186
|
+
"user": {
|
|
187
|
+
"id": "test-user",
|
|
188
|
+
"email": "test@rem.local",
|
|
189
|
+
"name": "Test User"
|
|
190
|
+
},
|
|
191
|
+
"usage": "curl -H \"Authorization: Bearer dev_...\" http://localhost:8000/api/v1/...",
|
|
192
|
+
"warning": "This token is for development/testing only and will not work in production."
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Use Token:**
|
|
197
|
+
```bash
|
|
198
|
+
# Get the token
|
|
199
|
+
TOKEN=$(curl -s http://localhost:8000/api/dev/token | jq -r .token)
|
|
200
|
+
|
|
201
|
+
# Use it in requests
|
|
202
|
+
curl -H "Authorization: Bearer $TOKEN" \
|
|
203
|
+
-H "X-Tenant-Id: default" \
|
|
204
|
+
http://localhost:8000/api/v1/shared-with-me
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
**Security Notes:**
|
|
208
|
+
- Only available when `ENVIRONMENT != "production"`
|
|
209
|
+
- Token is HMAC-signed using session secret
|
|
210
|
+
- Authenticates as `test-user` with `pro` tier and `admin` role
|
|
211
|
+
- Token is deterministic per environment (same secret = same token)
|
|
212
|
+
|
|
213
|
+
### Anonymous Access
|
|
214
|
+
|
|
215
|
+
When `AUTH__ALLOW_ANONYMOUS=true` (default in development):
|
|
216
|
+
- Requests without authentication are allowed
|
|
217
|
+
- Anonymous users get rate-limited access
|
|
218
|
+
- MCP endpoints still require auth unless `AUTH__MCP_REQUIRES_AUTH=false`
|
|
219
|
+
|
|
220
|
+
## Usage Examples
|
|
221
|
+
|
|
222
|
+
**Note on Authentication**: By default, authentication is disabled (`AUTH__ENABLED=false`) for local development and testing. The examples below work without an `Authorization` header. If authentication is enabled, use either:
|
|
223
|
+
- **Dev token**: `-H "Authorization: Bearer $(curl -s http://localhost:8000/api/dev/token | jq -r .token)"`
|
|
224
|
+
- **Session cookie**: Login via OAuth first, then use cookies
|
|
225
|
+
|
|
226
|
+
### cURL: Simple Chat
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
230
|
+
-H "Content-Type: application/json" \
|
|
231
|
+
-H "X-User-Id: sarah@example.com" \
|
|
232
|
+
-d '{
|
|
233
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
234
|
+
"messages": [
|
|
235
|
+
{"role": "user", "content": "What is REM?"}
|
|
236
|
+
]
|
|
237
|
+
}'
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### cURL: Streaming Chat
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
244
|
+
-H "Content-Type: application/json" \
|
|
245
|
+
-H "X-User-Id: sarah@example.com" \
|
|
246
|
+
-d '{
|
|
247
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
248
|
+
"messages": [
|
|
249
|
+
{"role": "user", "content": "Explain REM architecture"}
|
|
250
|
+
],
|
|
251
|
+
"stream": true
|
|
252
|
+
}'
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### cURL: Multi-Turn Conversation
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
# First message
|
|
259
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
260
|
+
-H "Content-Type: application/json" \
|
|
261
|
+
-H "X-User-Id: sarah@example.com" \
|
|
262
|
+
-H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
|
|
263
|
+
-d '{
|
|
264
|
+
"model": "openai:gpt-4o",
|
|
265
|
+
"messages": [
|
|
266
|
+
{"role": "user", "content": "What are moments in REM?"}
|
|
267
|
+
]
|
|
268
|
+
}'
|
|
269
|
+
|
|
270
|
+
# Second message (session history loaded automatically)
|
|
271
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
272
|
+
-H "Content-Type: application/json" \
|
|
273
|
+
-H "X-User-Id: sarah@example.com" \
|
|
274
|
+
-H "X-Session-Id: 550e8400-e29b-41d4-a716-446655440000" \
|
|
275
|
+
-d '{
|
|
276
|
+
"model": "openai:gpt-4o",
|
|
277
|
+
"messages": [
|
|
278
|
+
{"role": "user", "content": "How are they created?"}
|
|
279
|
+
]
|
|
280
|
+
}'
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Python: Multi-Turn Conversation
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
import requests
|
|
287
|
+
import uuid
|
|
288
|
+
|
|
289
|
+
url = "http://localhost:8000/api/v1/chat/completions"
|
|
290
|
+
session_id = str(uuid.uuid4()) # Must be a valid UUID
|
|
291
|
+
|
|
292
|
+
def send_message(content):
|
|
293
|
+
headers = {
|
|
294
|
+
"Content-Type": "application/json",
|
|
295
|
+
"X-User-Id": "sarah@example.com",
|
|
296
|
+
"X-Session-Id": session_id
|
|
297
|
+
}
|
|
298
|
+
data = {
|
|
299
|
+
"model": "openai:gpt-4o",
|
|
300
|
+
"messages": [
|
|
301
|
+
{"role": "user", "content": content}
|
|
302
|
+
]
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
response = requests.post(url, headers=headers, json=data)
|
|
306
|
+
return response.json()["choices"][0]["message"]["content"]
|
|
307
|
+
|
|
308
|
+
# First turn
|
|
309
|
+
response1 = send_message("What are moments in REM?")
|
|
310
|
+
print(f"Assistant: {response1}\n")
|
|
311
|
+
|
|
312
|
+
# Second turn (session history loaded automatically)
|
|
313
|
+
response2 = send_message("How are they created?")
|
|
314
|
+
print(f"Assistant: {response2}\n")
|
|
315
|
+
|
|
316
|
+
# Third turn
|
|
317
|
+
response3 = send_message("Can you give an example?")
|
|
318
|
+
print(f"Assistant: {response3}\n")
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Python: Streaming Chat
|
|
322
|
+
|
|
323
|
+
```python
|
|
324
|
+
import requests
|
|
325
|
+
import json
|
|
326
|
+
|
|
327
|
+
url = "http://localhost:8000/api/v1/chat/completions"
|
|
328
|
+
headers = {
|
|
329
|
+
"Content-Type": "application/json",
|
|
330
|
+
"X-User-Id": "sarah@example.com"
|
|
331
|
+
}
|
|
332
|
+
data = {
|
|
333
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
334
|
+
"messages": [
|
|
335
|
+
{"role": "user", "content": "Explain REM architecture"}
|
|
336
|
+
],
|
|
337
|
+
"stream": True
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
response = requests.post(url, headers=headers, json=data, stream=True)
|
|
341
|
+
|
|
342
|
+
for line in response.iter_lines():
|
|
343
|
+
if line:
|
|
344
|
+
line_str = line.decode('utf-8')
|
|
345
|
+
if line_str.startswith('data: '):
|
|
346
|
+
data_str = line_str[6:] # Remove 'data: ' prefix
|
|
347
|
+
if data_str != '[DONE]':
|
|
348
|
+
chunk = json.loads(data_str)
|
|
349
|
+
delta = chunk["choices"][0]["delta"]
|
|
350
|
+
if "content" in delta:
|
|
351
|
+
print(delta["content"], end="", flush=True)
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### Python: Audio Input (Voice Chat)
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
import requests
|
|
358
|
+
import base64
|
|
359
|
+
|
|
360
|
+
# Read audio file and encode to base64
|
|
361
|
+
with open("recording.wav", "rb") as audio_file:
|
|
362
|
+
audio_b64 = base64.b64encode(audio_file.read()).decode('utf-8')
|
|
363
|
+
|
|
364
|
+
url = "http://localhost:8000/api/v1/chat/completions"
|
|
365
|
+
headers = {
|
|
366
|
+
"Content-Type": "application/json",
|
|
367
|
+
"X-User-Id": "sarah@example.com",
|
|
368
|
+
"X-Chat-Is-Audio": "true" # Trigger audio transcription
|
|
369
|
+
}
|
|
370
|
+
data = {
|
|
371
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
372
|
+
"messages": [
|
|
373
|
+
{"role": "user", "content": audio_b64} # Base64-encoded WAV audio
|
|
374
|
+
]
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
response = requests.post(url, headers=headers, json=data)
|
|
378
|
+
print(response.json()["choices"][0]["message"]["content"])
|
|
379
|
+
|
|
380
|
+
# Audio is transcribed to text using OpenAI Whisper
|
|
381
|
+
# Then processed as normal text chat
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
## Response Format
|
|
385
|
+
|
|
386
|
+
### Non-Streaming Response
|
|
387
|
+
|
|
388
|
+
```json
|
|
389
|
+
{
|
|
390
|
+
"id": "chatcmpl-abc123def456",
|
|
391
|
+
"created": 1732292400,
|
|
392
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
393
|
+
"choices": [
|
|
394
|
+
{
|
|
395
|
+
"index": 0,
|
|
396
|
+
"message": {
|
|
397
|
+
"role": "assistant",
|
|
398
|
+
"content": "REM (Resources Entities Moments) is a bio-inspired memory architecture..."
|
|
399
|
+
},
|
|
400
|
+
"finish_reason": "stop"
|
|
401
|
+
}
|
|
402
|
+
],
|
|
403
|
+
"usage": {
|
|
404
|
+
"prompt_tokens": 150,
|
|
405
|
+
"completion_tokens": 200,
|
|
406
|
+
"total_tokens": 350
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
### Streaming Response (SSE Format)
|
|
412
|
+
|
|
413
|
+
```
|
|
414
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"role":"assistant","content":""},"index":0}]}
|
|
415
|
+
|
|
416
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":"REM"},"index":0}]}
|
|
417
|
+
|
|
418
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" (Resources"},"index":0}]}
|
|
419
|
+
|
|
420
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{"content":" Entities"},"index":0}]}
|
|
421
|
+
|
|
422
|
+
data: {"id":"chatcmpl-abc123","choices":[{"delta":{},"finish_reason":"stop","index":0}]}
|
|
423
|
+
|
|
424
|
+
data: [DONE]
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
## Extended SSE Event Protocol
|
|
428
|
+
|
|
429
|
+
REM uses OpenAI-compatible format for text content streaming, plus custom named SSE events for rich UI interactions.
|
|
430
|
+
|
|
431
|
+
### Event Types
|
|
432
|
+
|
|
433
|
+
| Event Type | Format | Purpose | UI Display |
|
|
434
|
+
|------------|--------|---------|------------|
|
|
435
|
+
| (text content) | `data:` (OpenAI format) | Content chunks | Main response area |
|
|
436
|
+
| `reasoning` | `event:` | Model thinking | Collapsible "thinking" section |
|
|
437
|
+
| `progress` | `event:` | Step indicators | Progress bar/stepper |
|
|
438
|
+
| `tool_call` | `event:` | Tool invocations | Tool status panel |
|
|
439
|
+
| `action_request` | `event:` | User input solicitation | Buttons, forms, modals |
|
|
440
|
+
| `metadata` | `event:` | System info | Hidden or badge display |
|
|
441
|
+
| `error` | `event:` | Error notification | Error toast/alert |
|
|
442
|
+
| `done` | `event:` | Stream completion | Cleanup signal |
|
|
443
|
+
|
|
444
|
+
### Event Format
|
|
445
|
+
|
|
446
|
+
**Text content (OpenAI-compatible `data:` format):**
|
|
447
|
+
```
|
|
448
|
+
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1732748123,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello "},"finish_reason":null}]}
|
|
449
|
+
|
|
450
|
+
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1732748123,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"world!"},"finish_reason":null}]}
|
|
451
|
+
|
|
452
|
+
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1732748123,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
|
|
453
|
+
|
|
454
|
+
data: [DONE]
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
**Named events (use `event:` prefix):**
|
|
458
|
+
```
|
|
459
|
+
event: reasoning
|
|
460
|
+
data: {"type": "reasoning", "content": "Analyzing the request...", "step": 1}
|
|
461
|
+
|
|
462
|
+
event: progress
|
|
463
|
+
data: {"type": "progress", "step": 1, "total_steps": 3, "label": "Searching", "status": "in_progress"}
|
|
464
|
+
|
|
465
|
+
event: tool_call
|
|
466
|
+
data: {"type": "tool_call", "tool_name": "search_rem", "status": "started", "arguments": {"query": "..."}}
|
|
467
|
+
|
|
468
|
+
event: action_request
|
|
469
|
+
data: {"type": "action_request", "card": {"id": "feedback-1", "prompt": "Was this helpful?", "actions": [...]}}
|
|
470
|
+
|
|
471
|
+
event: metadata
|
|
472
|
+
data: {"type": "metadata", "confidence": 0.95, "sources": ["doc1.md"], "hidden": false}
|
|
473
|
+
|
|
474
|
+
event: done
|
|
475
|
+
data: {"type": "done", "reason": "stop"}
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
### Action Request Cards (Adaptive Cards-inspired)
|
|
479
|
+
|
|
480
|
+
Action requests solicit user input using a schema inspired by [Microsoft Adaptive Cards](https://adaptivecards.io/):
|
|
481
|
+
|
|
482
|
+
```json
|
|
483
|
+
{
|
|
484
|
+
"type": "action_request",
|
|
485
|
+
"card": {
|
|
486
|
+
"id": "confirm-delete-123",
|
|
487
|
+
"prompt": "Are you sure you want to delete this item?",
|
|
488
|
+
"display_style": "modal",
|
|
489
|
+
"actions": [
|
|
490
|
+
{
|
|
491
|
+
"type": "Action.Submit",
|
|
492
|
+
"id": "confirm",
|
|
493
|
+
"title": "Delete",
|
|
494
|
+
"style": "destructive",
|
|
495
|
+
"data": {"action": "delete", "item_id": "123"}
|
|
496
|
+
},
|
|
497
|
+
{
|
|
498
|
+
"type": "Action.Submit",
|
|
499
|
+
"id": "cancel",
|
|
500
|
+
"title": "Cancel",
|
|
501
|
+
"style": "secondary",
|
|
502
|
+
"data": {"action": "cancel"}
|
|
503
|
+
}
|
|
504
|
+
],
|
|
505
|
+
"inputs": [
|
|
506
|
+
{
|
|
507
|
+
"type": "Input.Text",
|
|
508
|
+
"id": "reason",
|
|
509
|
+
"label": "Reason (optional)",
|
|
510
|
+
"placeholder": "Why are you deleting this?"
|
|
511
|
+
}
|
|
512
|
+
],
|
|
513
|
+
"timeout_ms": 30000
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
**Action Types:**
|
|
519
|
+
- `Action.Submit` - Send data to server
|
|
520
|
+
- `Action.OpenUrl` - Navigate to URL
|
|
521
|
+
- `Action.ShowCard` - Reveal nested content
|
|
522
|
+
|
|
523
|
+
**Input Types:**
|
|
524
|
+
- `Input.Text` - Text field (single or multiline)
|
|
525
|
+
- `Input.ChoiceSet` - Dropdown/radio selection
|
|
526
|
+
- `Input.Toggle` - Checkbox/toggle
|
|
527
|
+
|
|
528
|
+
### SSE Simulator Endpoint
|
|
529
|
+
|
|
530
|
+
For frontend development and testing, use the simulator which generates all event types without LLM costs:
|
|
531
|
+
|
|
532
|
+
```bash
|
|
533
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
534
|
+
-H "Content-Type: application/json" \
|
|
535
|
+
-H "X-Agent-Schema: simulator" \
|
|
536
|
+
-d '{"messages": [{"role": "user", "content": "demo"}], "stream": true}'
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
The simulator produces a scripted sequence demonstrating:
|
|
540
|
+
1. Reasoning events (4 steps)
|
|
541
|
+
2. Progress indicators
|
|
542
|
+
3. Simulated tool calls
|
|
543
|
+
4. Rich markdown content
|
|
544
|
+
5. Metadata with confidence
|
|
545
|
+
6. Action request for feedback
|
|
546
|
+
|
|
547
|
+
See `rem/agentic/agents/sse_simulator.py` for implementation details.
|
|
548
|
+
|
|
549
|
+
### Frontend Integration
|
|
550
|
+
|
|
551
|
+
```typescript
|
|
552
|
+
// Parse SSE events in React/TypeScript
|
|
553
|
+
const eventSource = new EventSource('/api/v1/chat/completions');
|
|
554
|
+
|
|
555
|
+
eventSource.onmessage = (e) => {
|
|
556
|
+
// Default handler for data-only events (text_delta)
|
|
557
|
+
const event = JSON.parse(e.data);
|
|
558
|
+
if (event.type === 'text_delta') {
|
|
559
|
+
appendContent(event.content);
|
|
560
|
+
}
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
eventSource.addEventListener('reasoning', (e) => {
|
|
564
|
+
const event = JSON.parse(e.data);
|
|
565
|
+
appendReasoning(event.content);
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
eventSource.addEventListener('action_request', (e) => {
|
|
569
|
+
const event = JSON.parse(e.data);
|
|
570
|
+
showActionCard(event.card);
|
|
571
|
+
});
|
|
572
|
+
|
|
573
|
+
eventSource.addEventListener('done', () => {
|
|
574
|
+
eventSource.close();
|
|
575
|
+
});
|
|
576
|
+
```
|
|
577
|
+
|
|
578
|
+
## Architecture
|
|
579
|
+
|
|
580
|
+
### Middleware Ordering
|
|
581
|
+
|
|
582
|
+
Middleware runs in reverse order of addition:
|
|
583
|
+
1. CORS (added last, runs first) - adds headers to all responses
|
|
584
|
+
2. Auth middleware - validates authentication
|
|
585
|
+
3. Logging middleware - logs requests/responses
|
|
586
|
+
4. Sessions middleware (added first, runs last)
|
|
587
|
+
|
|
588
|
+
### Stateless MCP Mounting
|
|
589
|
+
|
|
590
|
+
- FastMCP with `stateless_http=True` for Kubernetes compatibility
|
|
591
|
+
- Prevents stale session errors across pod restarts
|
|
592
|
+
- Mount at `/api/v1/mcp` for consistency
|
|
593
|
+
- Path rewrite middleware for trailing slash handling
|
|
594
|
+
- `redirect_slashes=False` prevents auth header stripping
|
|
595
|
+
|
|
596
|
+
### Context Building Flow
|
|
597
|
+
|
|
598
|
+
1. ContextBuilder extracts user_id, session_id from headers
|
|
599
|
+
2. Session history ALWAYS loaded with compression (if session_id provided)
|
|
600
|
+
3. User profile provided as REM LOOKUP hint (on-demand by default)
|
|
601
|
+
4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded
|
|
602
|
+
5. Combines: system context + compressed session history + new messages
|
|
603
|
+
6. Agent receives complete message list ready for execution
|
|
604
|
+
|
|
605
|
+
## Error Responses
|
|
606
|
+
|
|
607
|
+
### 429 - Rate Limit Exceeded
|
|
608
|
+
|
|
609
|
+
When a user exceeds their rate limit (based on their tier), the API returns a 429 status code with a structured error body. The frontend should intercept this error to prompt the user to sign in or upgrade.
|
|
610
|
+
|
|
611
|
+
```json
|
|
612
|
+
{
|
|
613
|
+
"error": {
|
|
614
|
+
"code": "rate_limit_exceeded",
|
|
615
|
+
"message": "You have exceeded your rate limit. Please sign in or upgrade to continue.",
|
|
616
|
+
"details": {
|
|
617
|
+
"limit": 50,
|
|
618
|
+
"tier": "anonymous",
|
|
619
|
+
"retry_after": 60
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
```
|
|
624
|
+
|
|
625
|
+
**Handling Strategy:**
|
|
626
|
+
1. **Intercept 429s:** API client should listen for `status === 429`.
|
|
627
|
+
2. **Check Code:** If `error.code === 'rate_limit_exceeded'` AND `error.details.tier === 'anonymous'`, trigger "Login / Sign Up" flow.
|
|
628
|
+
3. **Authenticated Users:** If `tier !== 'anonymous'`, prompt to upgrade plan.
|
|
629
|
+
|
|
630
|
+
### 500 - Agent Schema Not Found
|
|
631
|
+
|
|
632
|
+
```json
|
|
633
|
+
{
|
|
634
|
+
"detail": "Agent schema 'invalid-schema' not found and default schema unavailable"
|
|
635
|
+
}
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
**Solution**: Use valid schema name or ensure default schema exists in `schemas/agents/rem.yaml`
|
|
639
|
+
|
|
640
|
+
## Best Practices
|
|
641
|
+
|
|
642
|
+
1. **Use Session IDs**: Always provide `X-Session-Id` for multi-turn conversations
|
|
643
|
+
2. **Generate Stable Session IDs**: Use UUIDs or meaningful identifiers
|
|
644
|
+
3. **Tenant Scoping**: Provide `X-Tenant-Id` for multi-tenant deployments
|
|
645
|
+
4. **Model Selection**: Choose appropriate model for task complexity
|
|
646
|
+
5. **Streaming**: Use streaming for long-running responses
|
|
647
|
+
6. **User Context**: Enable auto-inject only if always needed, otherwise use on-demand
|
|
648
|
+
|
|
649
|
+
## Related Documentation
|
|
650
|
+
|
|
651
|
+
- [Chat Router](routers/chat/completions.py) - Chat completions implementation
|
|
652
|
+
- [SSE Events](routers/chat/sse_events.py) - SSE event type definitions
|
|
653
|
+
- [SSE Simulator](../../agentic/agents/sse_simulator.py) - Event simulator for testing
|
|
654
|
+
- [MCP Router](mcp_router/server.py) - MCP server implementation
|
|
655
|
+
- [Agent Schemas](../../schemas/agents/) - Available agent schemas
|
|
656
|
+
- [Session Compression](../../services/session/compression.py) - Compression implementation
|
|
657
|
+
- [Context Builder](../../agentic/context_builder.py) - Context construction logic
|