remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
# Session Management for REM
|
|
2
|
+
|
|
3
|
+
This module implements session persistence, compression, and reloading for conversation continuity in the REM chat completions API.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The session management system enables multi-turn conversations by:
|
|
8
|
+
1. **Saving** all chat messages to the database
|
|
9
|
+
2. **Compressing** long assistant responses with REM LOOKUP keys
|
|
10
|
+
3. **Reloading** conversation history on subsequent requests
|
|
11
|
+
4. **Gracefully degrading** when Postgres is disabled
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
### Components
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
services/session/
|
|
19
|
+
├── compression.py # Message compression and storage
|
|
20
|
+
├── reload.py # Session history reloading
|
|
21
|
+
└── __init__.py # Public API
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Database Schema
|
|
25
|
+
|
|
26
|
+
Messages are stored in the `messages` table (inherited from CoreModel):
|
|
27
|
+
|
|
28
|
+
```sql
|
|
29
|
+
CREATE TABLE messages (
|
|
30
|
+
id UUID PRIMARY KEY,
|
|
31
|
+
content TEXT NOT NULL,
|
|
32
|
+
message_type VARCHAR, -- 'user', 'assistant', 'system'
|
|
33
|
+
session_id VARCHAR, -- Groups messages by conversation
|
|
34
|
+
tenant_id VARCHAR, -- Optional: for future multi-tenant SaaS use
|
|
35
|
+
user_id VARCHAR NOT NULL, -- User ownership (primary isolation scope)
|
|
36
|
+
metadata JSONB, -- Contains entity_key, message_index, timestamp
|
|
37
|
+
created_at TIMESTAMP,
|
|
38
|
+
updated_at TIMESTAMP,
|
|
39
|
+
deleted_at TIMESTAMP
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
-- Indexes for fast retrieval
|
|
43
|
+
CREATE INDEX idx_messages_session ON messages(session_id, user_id);
|
|
44
|
+
CREATE INDEX idx_messages_entity_key ON messages((metadata->>'entity_key'));
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Key Features
|
|
48
|
+
|
|
49
|
+
### 1. Message Compression
|
|
50
|
+
|
|
51
|
+
Long assistant responses (>400 chars) are compressed to save context window space:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from rem.services.session import MessageCompressor
|
|
55
|
+
|
|
56
|
+
compressor = MessageCompressor(truncate_length=200)
|
|
57
|
+
|
|
58
|
+
# Long message
|
|
59
|
+
long_msg = {"role": "assistant", "content": "A" * 1000}
|
|
60
|
+
|
|
61
|
+
# Compress with REM LOOKUP key
|
|
62
|
+
compressed = compressor.compress_message(long_msg, entity_key="session-123-msg-5")
|
|
63
|
+
|
|
64
|
+
# Result:
|
|
65
|
+
# {
|
|
66
|
+
# "role": "assistant",
|
|
67
|
+
# "content": "AAAA...[first 200 chars]...\n\n... [Message truncated - REM LOOKUP session-123-msg-5 to recover full content] ...\n\n...[last 200 chars]...AAAA",
|
|
68
|
+
# "_compressed": True,
|
|
69
|
+
# "_original_length": 1000,
|
|
70
|
+
# "_entity_key": "session-123-msg-5"
|
|
71
|
+
# }
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Benefits:**
|
|
75
|
+
- Keeps conversation history within LLM context windows
|
|
76
|
+
- Full messages stored in database for audit trail
|
|
77
|
+
- Retrieved on-demand via REM LOOKUP queries
|
|
78
|
+
- Compression markers visible to LLM for awareness
|
|
79
|
+
|
|
80
|
+
### 2. Session Reloading
|
|
81
|
+
|
|
82
|
+
Load full conversation history for session continuity:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from rem.services.session import reload_session
|
|
86
|
+
from rem.services.postgres import get_postgres_service
|
|
87
|
+
|
|
88
|
+
db = get_postgres_service()
|
|
89
|
+
|
|
90
|
+
# Reload conversation
|
|
91
|
+
history = await reload_session(
|
|
92
|
+
db=db,
|
|
93
|
+
session_id="session-abc-123",
|
|
94
|
+
user_id="alice",
|
|
95
|
+
decompress_messages=False # Use compressed versions
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Returns:
|
|
99
|
+
# [
|
|
100
|
+
# {"role": "user", "content": "What is REM?"},
|
|
101
|
+
# {"role": "assistant", "content": "REM is..."},
|
|
102
|
+
# {"role": "user", "content": "Tell me more"},
|
|
103
|
+
# {"role": "assistant", "content": "...compressed with LOOKUP key..."}
|
|
104
|
+
# ]
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Options:**
|
|
108
|
+
- `decompress_messages=False`: Fast, uses compressed versions (default)
|
|
109
|
+
- `decompress_messages=True`: Slower, fetches full content via LOOKUP
|
|
110
|
+
|
|
111
|
+
### 3. REM LOOKUP Pattern
|
|
112
|
+
|
|
113
|
+
Compressed messages use entity keys for retrieval:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from rem.services.session import SessionMessageStore
|
|
117
|
+
from rem.services.postgres import get_postgres_service
|
|
118
|
+
|
|
119
|
+
db = get_postgres_service()
|
|
120
|
+
store = SessionMessageStore(user_id="alice")
|
|
121
|
+
|
|
122
|
+
# Entity key format: session-{session_id}-msg-{index}
|
|
123
|
+
entity_key = "session-abc-123-msg-5"
|
|
124
|
+
|
|
125
|
+
# Retrieve full message via LOOKUP
|
|
126
|
+
full_content = await store.retrieve_message(entity_key)
|
|
127
|
+
|
|
128
|
+
# SQL executed:
|
|
129
|
+
# SELECT * FROM messages
|
|
130
|
+
# WHERE metadata->>'entity_key' = 'session-abc-123-msg-5'
|
|
131
|
+
# AND user_id = 'alice'
|
|
132
|
+
# LIMIT 1
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Key Format:**
|
|
136
|
+
- Pattern: `session-{session_id}-msg-{message_index}`
|
|
137
|
+
- Example: `session-abc-123-msg-5` (5th message in session abc-123)
|
|
138
|
+
- Enables O(1) LOOKUP via JSONB index
|
|
139
|
+
|
|
140
|
+
## Usage in Chat Completions
|
|
141
|
+
|
|
142
|
+
### Integration Pattern
|
|
143
|
+
|
|
144
|
+
The chat completions endpoint (`rem/src/rem/api/routers/chat/completions.py`) integrates session management:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
@router.post("/chat/completions")
|
|
148
|
+
async def chat_completions(body: ChatCompletionRequest, request: Request):
|
|
149
|
+
# 1. Extract context from headers
|
|
150
|
+
context = AgentContext.from_headers(dict(request.headers))
|
|
151
|
+
db = get_postgres_service()
|
|
152
|
+
|
|
153
|
+
# 2. Reload session history
|
|
154
|
+
history = []
|
|
155
|
+
if context.session_id and db:
|
|
156
|
+
history = await reload_session(
|
|
157
|
+
db=db,
|
|
158
|
+
session_id=context.session_id,
|
|
159
|
+
user_id=context.user_id or "default",
|
|
160
|
+
decompress_messages=False
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# 3. Run agent with history
|
|
164
|
+
agent = await create_pydantic_ai_agent(context, agent_schema, body.model)
|
|
165
|
+
result = await agent.run(prompt)
|
|
166
|
+
|
|
167
|
+
# 4. Save new messages
|
|
168
|
+
if context.session_id and db:
|
|
169
|
+
store = SessionMessageStore(db=db, user_id=context.user_id or "default")
|
|
170
|
+
await store.store_session_messages(
|
|
171
|
+
session_id=context.session_id,
|
|
172
|
+
messages=[
|
|
173
|
+
{"role": "user", "content": prompt},
|
|
174
|
+
{"role": "assistant", "content": result.output}
|
|
175
|
+
],
|
|
176
|
+
compress=True
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return response
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Client Request
|
|
183
|
+
|
|
184
|
+
Include session context in HTTP headers:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
188
|
+
-H "X-Session-Id: session-abc-123" \
|
|
189
|
+
-H "X-User-Id: alice" \
|
|
190
|
+
-H "Content-Type: application/json" \
|
|
191
|
+
-d '{
|
|
192
|
+
"model": "openai:gpt-4o-mini",
|
|
193
|
+
"messages": [{"role": "user", "content": "What did we discuss earlier?"}],
|
|
194
|
+
"stream": false
|
|
195
|
+
}'
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**Headers:**
|
|
199
|
+
- `X-User-Id`: User identifier (required, default: "default")
|
|
200
|
+
- `X-Session-Id`: Session/conversation identifier (optional)
|
|
201
|
+
|
|
202
|
+
## Testing
|
|
203
|
+
|
|
204
|
+
### Unit Tests
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Run session management tests
|
|
208
|
+
pytest rem/tests/integration/test_session_management.py -v
|
|
209
|
+
|
|
210
|
+
# Test message compression
|
|
211
|
+
pytest rem/tests/integration/test_session_management.py::test_message_compressor -v
|
|
212
|
+
|
|
213
|
+
# Test session reloading
|
|
214
|
+
pytest rem/tests/integration/test_session_management.py::test_reload_session -v
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Integration Tests
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
# End-to-end completions with sessions
|
|
221
|
+
pytest rem/tests/integration/test_completions_with_sessions.py -v
|
|
222
|
+
|
|
223
|
+
# Test session continuity
|
|
224
|
+
pytest rem/tests/integration/test_completions_with_sessions.py::test_completions_with_session_continuity -v
|
|
225
|
+
|
|
226
|
+
# Test tenant isolation
|
|
227
|
+
pytest rem/tests/integration/test_completions_with_sessions.py::test_completions_tenant_isolation -v
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Sample Data
|
|
231
|
+
|
|
232
|
+
Seed the database with realistic conversation data:
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
# Seed all sample conversations
|
|
236
|
+
python -m rem.tests.scripts.seed_sample_sessions --all
|
|
237
|
+
|
|
238
|
+
# Seed specific conversation
|
|
239
|
+
python -m rem.tests.scripts.seed_sample_sessions \
|
|
240
|
+
--conversation rem_intro \
|
|
241
|
+
--user-id alice
|
|
242
|
+
|
|
243
|
+
# Demonstrate LOOKUP retrieval
|
|
244
|
+
python -m rem.tests.scripts.seed_sample_sessions \
|
|
245
|
+
--demo-lookup \
|
|
246
|
+
--session-id <session-id> \
|
|
247
|
+
--user-id alice
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Sample conversations available:
|
|
251
|
+
- `rem_intro`: Introduction to REM concepts
|
|
252
|
+
- `technical_deep_dive`: InlineEdge and TRAVERSE queries
|
|
253
|
+
- `practical_implementation`: Session logging setup guide
|
|
254
|
+
- `compression_test`: Very long response for compression testing
|
|
255
|
+
- `multi_turn`: Multi-turn technical Q&A
|
|
256
|
+
|
|
257
|
+
## Performance Considerations
|
|
258
|
+
|
|
259
|
+
### Context Window Management
|
|
260
|
+
|
|
261
|
+
**Problem:** LLMs have limited context windows (8K-200K tokens)
|
|
262
|
+
|
|
263
|
+
**Solution:** Message compression
|
|
264
|
+
- Short messages: Stored as-is
|
|
265
|
+
- Long messages (>400 chars): Compressed with LOOKUP keys
|
|
266
|
+
- LLM sees truncated versions in history
|
|
267
|
+
- Full content available via LOOKUP if needed
|
|
268
|
+
|
|
269
|
+
**Benefits:**
|
|
270
|
+
- Fit 10-20 turns in 8K context window
|
|
271
|
+
- Full audit trail preserved in database
|
|
272
|
+
- Configurable compression threshold
|
|
273
|
+
|
|
274
|
+
### Database Performance
|
|
275
|
+
|
|
276
|
+
**Optimizations:**
|
|
277
|
+
- Composite index on `(session_id, tenant_id)` for fast session queries
|
|
278
|
+
- JSONB GIN index on `metadata` for LOOKUP queries
|
|
279
|
+
- `created_at` for chronological ordering
|
|
280
|
+
- Soft deletes via `deleted_at` (no hard deletes)
|
|
281
|
+
|
|
282
|
+
**Query Performance:**
|
|
283
|
+
- Session reload: O(n) where n = messages in session
|
|
284
|
+
- LOOKUP retrieval: O(1) with JSONB index
|
|
285
|
+
- Tenant isolation: Enforced at query level
|
|
286
|
+
|
|
287
|
+
## Graceful Degradation
|
|
288
|
+
|
|
289
|
+
When Postgres is disabled (`POSTGRES__ENABLED=false`):
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
# All operations skip database gracefully
|
|
293
|
+
if not settings.postgres.enabled:
|
|
294
|
+
logger.debug("Postgres disabled, skipping session management")
|
|
295
|
+
return []
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
**Behavior:**
|
|
299
|
+
- `reload_session()` returns empty list
|
|
300
|
+
- `store_session_messages()` no-ops
|
|
301
|
+
- No errors raised
|
|
302
|
+
- Chat completions work without history
|
|
303
|
+
|
|
304
|
+
## Design Principles
|
|
305
|
+
|
|
306
|
+
1. **LOOKUP-First**: Entity keys enable O(1) retrieval
|
|
307
|
+
2. **User Isolation**: All queries scoped by user_id
|
|
308
|
+
3. **Graceful Degradation**: Works without database
|
|
309
|
+
4. **Compression-Aware**: LLM sees compression markers
|
|
310
|
+
5. **Audit Trail**: Full messages always stored
|
|
311
|
+
6. **Natural Keys**: Human-readable entity key format
|
|
312
|
+
|
|
313
|
+
## Future Enhancements
|
|
314
|
+
|
|
315
|
+
### Token Tracking (TODO)
|
|
316
|
+
|
|
317
|
+
Track token usage per session for cost analysis:
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
# Store usage metadata
|
|
321
|
+
metadata = {
|
|
322
|
+
"entity_key": "session-123-msg-5",
|
|
323
|
+
"usage": {
|
|
324
|
+
"prompt_tokens": 1500,
|
|
325
|
+
"completion_tokens": 800,
|
|
326
|
+
"total_tokens": 2300,
|
|
327
|
+
"model": "gpt-4o",
|
|
328
|
+
"estimated_cost": 0.046
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
### Context Window Optimization
|
|
334
|
+
|
|
335
|
+
Implement sliding window with summarization:
|
|
336
|
+
|
|
337
|
+
```python
|
|
338
|
+
# Keep recent N messages verbatim
|
|
339
|
+
# Summarize older messages
|
|
340
|
+
# Discard ancient messages
|
|
341
|
+
|
|
342
|
+
history = [
|
|
343
|
+
{"role": "system", "content": "Summary of messages 1-10: ..."},
|
|
344
|
+
*recent_messages[-5:] # Last 5 turns
|
|
345
|
+
]
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
### Multi-Session Retrieval
|
|
349
|
+
|
|
350
|
+
Load related sessions for context:
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
# Find related sessions by user or topic
|
|
354
|
+
related_sessions = await find_related_sessions(
|
|
355
|
+
user_id="alice",
|
|
356
|
+
topic_tags=["rem-architecture"],
|
|
357
|
+
limit=3
|
|
358
|
+
)
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
## Related Documentation
|
|
362
|
+
|
|
363
|
+
- [Message Entity Model](../../models/entities/message.py)
|
|
364
|
+
- [Repository Pattern](../postgres/repository.py)
|
|
365
|
+
- [AgentContext](../../agentic/context.py)
|
|
366
|
+
- [Chat Completions API](../../api/routers/chat/completions.py)
|
|
367
|
+
- [REM Query System](../../models/core/rem_query.py)
|
|
368
|
+
|
|
369
|
+
## References
|
|
370
|
+
|
|
371
|
+
Inspired by p8fs-modules session management:
|
|
372
|
+
- `p8fs/src/p8fs/services/llm/session_messages.py` - Compression pattern
|
|
373
|
+
- `p8fs/src/p8fs/services/llm/audit_mixin.py` - Session lifecycle
|
|
374
|
+
- `p8fs/src/p8fs/services/llm/models.py` - CallingContext pattern
|