remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized context builder for agent execution.
|
|
3
|
+
|
|
4
|
+
Session History (ALWAYS loaded with compression):
|
|
5
|
+
- Each chat request is a single message, so session history MUST be recovered
|
|
6
|
+
- Uses SessionMessageStore with compression to keep context efficient
|
|
7
|
+
- Prevents context window bloat while maintaining conversation continuity
|
|
8
|
+
|
|
9
|
+
User Context (on-demand by default):
|
|
10
|
+
- System message includes user email for context awareness
|
|
11
|
+
- Fails silently if user not found - agent proceeds without user context
|
|
12
|
+
- Example: "User: sarah@example.com"
|
|
13
|
+
|
|
14
|
+
User Context (auto-inject when enabled):
|
|
15
|
+
- Set CHAT__AUTO_INJECT_USER_CONTEXT=true
|
|
16
|
+
- User profile automatically loaded from database and injected into system message
|
|
17
|
+
- Simpler for basic chatbots that always need context
|
|
18
|
+
|
|
19
|
+
Design Pattern:
|
|
20
|
+
1. Extract AgentContext from headers (user_id, tenant_id, session_id)
|
|
21
|
+
2. If auto-inject enabled: Load User/Session from database
|
|
22
|
+
3. If auto-inject disabled: Show user email for context (fail silently if not found)
|
|
23
|
+
4. Construct system message with date + context
|
|
24
|
+
5. Return complete context ready for agent execution
|
|
25
|
+
|
|
26
|
+
Integration Points:
|
|
27
|
+
- API endpoints: build_from_headers() extracts user context from JWT/session headers
|
|
28
|
+
- Tests: build_from_test() creates minimal test context without DB
|
|
29
|
+
- Settings: CHAT__AUTO_INJECT_* controls auto-inject vs on-demand behavior
|
|
30
|
+
|
|
31
|
+
Usage (on-demand, default):
|
|
32
|
+
# From FastAPI endpoint
|
|
33
|
+
context, messages = await ContextBuilder.build_from_headers(
|
|
34
|
+
headers=request.headers,
|
|
35
|
+
new_messages=[{"role": "user", "content": "What's next for the API migration?"}]
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Messages list structure (on-demand):
|
|
39
|
+
# [
|
|
40
|
+
# {"role": "system", "content": "Today's date: 2025-11-22\n\nUser: sarah@example.com"},
|
|
41
|
+
# {"role": "user", "content": "What's next for the API migration?"}
|
|
42
|
+
# ]
|
|
43
|
+
|
|
44
|
+
agent = await create_agent(context=context, ...)
|
|
45
|
+
prompt = "\n".join(msg.content for msg in messages)
|
|
46
|
+
result = await agent.run(prompt)
|
|
47
|
+
|
|
48
|
+
Usage (auto-inject, CHAT__AUTO_INJECT_USER_CONTEXT=true):
|
|
49
|
+
# Messages list structure (auto-inject):
|
|
50
|
+
# [
|
|
51
|
+
# {"role": "system", "content": "Today's date: 2025-11-22\n\nUser Context (auto-injected):\nSummary: ...\nInterests: ..."},
|
|
52
|
+
# {"role": "user", "content": "Previous message"},
|
|
53
|
+
# {"role": "assistant", "content": "Previous response"},
|
|
54
|
+
# {"role": "user", "content": "What's next for the API migration?"}
|
|
55
|
+
# ]
|
|
56
|
+
|
|
57
|
+
Testing:
|
|
58
|
+
# From CLI/test (no database)
|
|
59
|
+
context, messages = await ContextBuilder.build_from_test(
|
|
60
|
+
user_id="test@rem.ai",
|
|
61
|
+
tenant_id="test-tenant",
|
|
62
|
+
message="Hello"
|
|
63
|
+
)
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
from datetime import datetime, timezone
|
|
67
|
+
from typing import Any
|
|
68
|
+
|
|
69
|
+
from loguru import logger
|
|
70
|
+
from pydantic import BaseModel
|
|
71
|
+
|
|
72
|
+
from .context import AgentContext
|
|
73
|
+
from ..models.entities.user import User
|
|
74
|
+
from ..models.entities.message import Message
|
|
75
|
+
from ..services.postgres.repository import Repository
|
|
76
|
+
from ..services.postgres.service import PostgresService
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ContextMessage(BaseModel):
|
|
80
|
+
"""Standard message format for LLM conversations."""
|
|
81
|
+
|
|
82
|
+
role: str # "system", "user", "assistant"
|
|
83
|
+
content: str
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ContextBuilder:
|
|
87
|
+
"""
|
|
88
|
+
Centralized builder for agent execution context.
|
|
89
|
+
|
|
90
|
+
Handles:
|
|
91
|
+
- User profile loading from database
|
|
92
|
+
- Session history recovery
|
|
93
|
+
- Context message construction
|
|
94
|
+
- Test context generation
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
async def build_from_headers(
|
|
99
|
+
headers: dict[str, str],
|
|
100
|
+
new_messages: list[dict[str, str]] | None = None,
|
|
101
|
+
db: PostgresService | None = None,
|
|
102
|
+
user_id: str | None = None,
|
|
103
|
+
) -> tuple[AgentContext, list[ContextMessage]]:
|
|
104
|
+
"""
|
|
105
|
+
Build complete context from HTTP headers.
|
|
106
|
+
|
|
107
|
+
Session History (ALWAYS loaded with compression):
|
|
108
|
+
- If session_id provided, session history is ALWAYS loaded using SessionMessageStore
|
|
109
|
+
- Compression keeps context efficient
|
|
110
|
+
|
|
111
|
+
User Context (on-demand by default):
|
|
112
|
+
- System message includes user email: "User: {email}"
|
|
113
|
+
- Fails silently if user not found - agent proceeds without user context
|
|
114
|
+
|
|
115
|
+
User Context (auto-inject when enabled):
|
|
116
|
+
- Set CHAT__AUTO_INJECT_USER_CONTEXT=true
|
|
117
|
+
- User profile automatically loaded and injected into system message
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
headers: HTTP request headers (case-insensitive)
|
|
121
|
+
new_messages: New messages from current request
|
|
122
|
+
db: Optional PostgresService (creates if None)
|
|
123
|
+
user_id: Override user_id from JWT token (takes precedence over X-User-Id header)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Tuple of (AgentContext, messages list)
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
headers = {"X-User-Id": "sarah@example.com", "X-Session-Id": "sess-123"}
|
|
130
|
+
context, messages = await ContextBuilder.build_from_headers(headers, new_messages)
|
|
131
|
+
|
|
132
|
+
# messages structure:
|
|
133
|
+
# [
|
|
134
|
+
# {"role": "system", "content": "Today's date: 2025-11-22\n\nUser: sarah@example.com"},
|
|
135
|
+
# {"role": "user", "content": "Previous message"},
|
|
136
|
+
# {"role": "assistant", "content": "Previous response"},
|
|
137
|
+
# {"role": "user", "content": "New message"}
|
|
138
|
+
# ]
|
|
139
|
+
"""
|
|
140
|
+
from ..settings import settings
|
|
141
|
+
from ..services.session.compression import SessionMessageStore
|
|
142
|
+
|
|
143
|
+
# Extract AgentContext from headers
|
|
144
|
+
context = AgentContext.from_headers(headers)
|
|
145
|
+
|
|
146
|
+
# Override user_id if provided (from JWT token - takes precedence over header)
|
|
147
|
+
if user_id is not None:
|
|
148
|
+
context = AgentContext(
|
|
149
|
+
user_id=user_id,
|
|
150
|
+
tenant_id=context.tenant_id,
|
|
151
|
+
session_id=context.session_id,
|
|
152
|
+
default_model=context.default_model,
|
|
153
|
+
agent_schema_uri=context.agent_schema_uri,
|
|
154
|
+
is_eval=context.is_eval,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Initialize DB if not provided and needed (for user context or session history)
|
|
158
|
+
close_db = False
|
|
159
|
+
if db is None and (settings.chat.auto_inject_user_context or context.session_id):
|
|
160
|
+
from ..services.postgres import get_postgres_service
|
|
161
|
+
db = get_postgres_service()
|
|
162
|
+
if db:
|
|
163
|
+
await db.connect()
|
|
164
|
+
close_db = True
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
# Build messages list
|
|
168
|
+
messages: list[ContextMessage] = []
|
|
169
|
+
|
|
170
|
+
# Build context hint message
|
|
171
|
+
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
172
|
+
context_hint = f"Today's date: {today}."
|
|
173
|
+
|
|
174
|
+
# Add user context (auto-inject or on-demand hint)
|
|
175
|
+
if settings.chat.auto_inject_user_context and context.user_id and db:
|
|
176
|
+
# Auto-inject: Load and include user profile
|
|
177
|
+
user_context_content = await ContextBuilder._load_user_context(
|
|
178
|
+
user_id=context.user_id,
|
|
179
|
+
tenant_id=context.tenant_id,
|
|
180
|
+
db=db,
|
|
181
|
+
)
|
|
182
|
+
if user_context_content:
|
|
183
|
+
context_hint += f"\n\nUser Context (auto-injected):\n{user_context_content}"
|
|
184
|
+
else:
|
|
185
|
+
context_hint += "\n\nNo user context available (anonymous or new user)."
|
|
186
|
+
elif context.user_id and db:
|
|
187
|
+
# On-demand: Show user email for context (no REM LOOKUP - it requires exact user_id match)
|
|
188
|
+
# Fail silently if user lookup fails - just proceed without user context
|
|
189
|
+
try:
|
|
190
|
+
user_repo = Repository(User, "users", db=db)
|
|
191
|
+
user = await user_repo.get_by_id(context.user_id, context.tenant_id)
|
|
192
|
+
if user and user.email:
|
|
193
|
+
context_hint += f"\n\nUser: {user.email}"
|
|
194
|
+
# If user not found, just proceed without adding user context
|
|
195
|
+
except Exception as e:
|
|
196
|
+
# Fail silently - don't block agent execution if user lookup fails
|
|
197
|
+
logger.debug(f"Could not load user context: {e}")
|
|
198
|
+
|
|
199
|
+
# Add system context hint
|
|
200
|
+
messages.append(ContextMessage(role="system", content=context_hint))
|
|
201
|
+
|
|
202
|
+
# ALWAYS load session history (if session_id provided)
|
|
203
|
+
# - Long assistant messages are compressed on load with REM LOOKUP hints
|
|
204
|
+
# - Tool messages are never compressed (contain structured metadata)
|
|
205
|
+
if context.session_id and settings.postgres.enabled:
|
|
206
|
+
store = SessionMessageStore(user_id=context.user_id or "default")
|
|
207
|
+
session_history = await store.load_session_messages(
|
|
208
|
+
session_id=context.session_id,
|
|
209
|
+
user_id=context.user_id,
|
|
210
|
+
compress_on_load=True, # Compress long assistant messages
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Convert to ContextMessage format
|
|
214
|
+
# For tool messages, wrap content with clear markers so the agent
|
|
215
|
+
# can see previous tool results when the prompt is concatenated
|
|
216
|
+
for msg_dict in session_history:
|
|
217
|
+
role = msg_dict["role"]
|
|
218
|
+
content = msg_dict["content"]
|
|
219
|
+
|
|
220
|
+
if role == "tool":
|
|
221
|
+
# Wrap tool results with clear markers for visibility
|
|
222
|
+
tool_name = msg_dict.get("tool_name", "unknown")
|
|
223
|
+
content = f"[TOOL RESULT: {tool_name}]\n{content}\n[/TOOL RESULT]"
|
|
224
|
+
|
|
225
|
+
messages.append(
|
|
226
|
+
ContextMessage(
|
|
227
|
+
role=role,
|
|
228
|
+
content=content,
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
logger.debug(f"Loaded {len(session_history)} messages for session {context.session_id}")
|
|
233
|
+
|
|
234
|
+
# Add new messages from request
|
|
235
|
+
if new_messages:
|
|
236
|
+
for msg in new_messages:
|
|
237
|
+
messages.append(ContextMessage(**msg))
|
|
238
|
+
|
|
239
|
+
return context, messages
|
|
240
|
+
|
|
241
|
+
finally:
|
|
242
|
+
if close_db and db:
|
|
243
|
+
await db.disconnect()
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
async def _load_user_context(
|
|
247
|
+
user_id: str | None,
|
|
248
|
+
tenant_id: str,
|
|
249
|
+
db: PostgresService,
|
|
250
|
+
) -> str | None:
|
|
251
|
+
"""
|
|
252
|
+
Load user profile from database and format as context.
|
|
253
|
+
|
|
254
|
+
user_id is always a UUID5 hash of email (bijection).
|
|
255
|
+
Looks up user by their id field in the database.
|
|
256
|
+
|
|
257
|
+
Returns formatted string with:
|
|
258
|
+
- User summary (generated by dreaming worker)
|
|
259
|
+
- Current projects
|
|
260
|
+
- Technical interests
|
|
261
|
+
- Preferred topics
|
|
262
|
+
|
|
263
|
+
Returns None if user_id not provided or user not found.
|
|
264
|
+
"""
|
|
265
|
+
if not user_id:
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
user_repo = Repository(User, "users", db=db)
|
|
270
|
+
# user_id is UUID5 hash of email - look up by database id
|
|
271
|
+
user = await user_repo.get_by_id(user_id, tenant_id)
|
|
272
|
+
|
|
273
|
+
if not user:
|
|
274
|
+
logger.debug(f"User {user_id} not found in tenant {tenant_id}")
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
# Build user context string
|
|
278
|
+
parts = []
|
|
279
|
+
|
|
280
|
+
if user.summary:
|
|
281
|
+
parts.append(f"Summary: {user.summary}")
|
|
282
|
+
|
|
283
|
+
if user.interests:
|
|
284
|
+
parts.append(f"Interests: {', '.join(user.interests[:5])}")
|
|
285
|
+
|
|
286
|
+
if user.preferred_topics:
|
|
287
|
+
parts.append(f"Topics: {', '.join(user.preferred_topics[:5])}")
|
|
288
|
+
|
|
289
|
+
# Add full profile from metadata if available
|
|
290
|
+
if user.metadata and "profile" in user.metadata:
|
|
291
|
+
profile = user.metadata["profile"]
|
|
292
|
+
|
|
293
|
+
if profile.get("current_projects"):
|
|
294
|
+
projects = profile["current_projects"]
|
|
295
|
+
project_names = [p.get("name", "Unnamed") for p in projects[:3]]
|
|
296
|
+
parts.append(f"Current Projects: {', '.join(project_names)}")
|
|
297
|
+
|
|
298
|
+
if not parts:
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
return "\n".join(parts)
|
|
302
|
+
|
|
303
|
+
except Exception as e:
|
|
304
|
+
logger.error(f"Failed to load user context: {e}")
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
async def build_from_test(
|
|
310
|
+
user_id: str = "test@rem.ai",
|
|
311
|
+
tenant_id: str = "test-tenant",
|
|
312
|
+
session_id: str | None = None,
|
|
313
|
+
message: str = "Hello",
|
|
314
|
+
model: str | None = None,
|
|
315
|
+
) -> tuple[AgentContext, list[ContextMessage]]:
|
|
316
|
+
"""
|
|
317
|
+
Build context for testing (no database lookup).
|
|
318
|
+
|
|
319
|
+
Creates minimal context with:
|
|
320
|
+
- Test user (test@rem.ai)
|
|
321
|
+
- Test tenant
|
|
322
|
+
- Context hint with date
|
|
323
|
+
- Single user message
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
user_id: Test user identifier (default: test@rem.ai)
|
|
327
|
+
tenant_id: Test tenant identifier
|
|
328
|
+
session_id: Optional session ID
|
|
329
|
+
message: User message content
|
|
330
|
+
model: Optional model override
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Tuple of (AgentContext, messages list)
|
|
334
|
+
|
|
335
|
+
Example:
|
|
336
|
+
context, messages = await ContextBuilder.build_from_test(
|
|
337
|
+
user_id="test@rem.ai",
|
|
338
|
+
message="What's the weather like?"
|
|
339
|
+
)
|
|
340
|
+
"""
|
|
341
|
+
from ..settings import settings
|
|
342
|
+
|
|
343
|
+
# Create test context
|
|
344
|
+
context = AgentContext(
|
|
345
|
+
user_id=user_id,
|
|
346
|
+
tenant_id=tenant_id,
|
|
347
|
+
session_id=session_id,
|
|
348
|
+
default_model=model or settings.llm.default_model,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Build minimal messages
|
|
352
|
+
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
353
|
+
context_hint = f"Today's date: {today}.\n\nTest user context: {user_id} (test mode, no profile loaded)."
|
|
354
|
+
|
|
355
|
+
messages = [
|
|
356
|
+
ContextMessage(role="system", content=context_hint),
|
|
357
|
+
ContextMessage(role="user", content=message),
|
|
358
|
+
]
|
|
359
|
+
|
|
360
|
+
return context, messages
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Provider Model Registry.
|
|
3
|
+
|
|
4
|
+
Defines available LLM models across providers (OpenAI, Anthropic, Google, Cerebras).
|
|
5
|
+
Used by the models API endpoint and for validating model requests.
|
|
6
|
+
|
|
7
|
+
Future: Models will be stored in database for dynamic management.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelInfo(BaseModel):
|
|
15
|
+
"""Information about a single model."""
|
|
16
|
+
|
|
17
|
+
id: str = Field(description="Model ID in provider:model format")
|
|
18
|
+
object: Literal["model"] = "model"
|
|
19
|
+
created: int = Field(description="Unix timestamp of model availability")
|
|
20
|
+
owned_by: str = Field(description="Provider name")
|
|
21
|
+
description: str | None = Field(default=None, description="Model description")
|
|
22
|
+
context_window: int | None = Field(default=None, description="Max context tokens")
|
|
23
|
+
max_output_tokens: int | None = Field(default=None, description="Max output tokens")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Model definitions with 2025 releases
|
|
27
|
+
# Using Unix timestamps for created dates (approximate release dates)
|
|
28
|
+
AVAILABLE_MODELS: list[ModelInfo] = [
|
|
29
|
+
# ==========================================================================
|
|
30
|
+
# OpenAI Models (2025)
|
|
31
|
+
# ==========================================================================
|
|
32
|
+
# GPT-4.1 series (Released April 14, 2025)
|
|
33
|
+
ModelInfo(
|
|
34
|
+
id="openai:gpt-4.1",
|
|
35
|
+
created=1744588800, # April 14, 2025
|
|
36
|
+
owned_by="openai",
|
|
37
|
+
description="Latest GPT-4 iteration, excels at coding and instruction following. 1M context.",
|
|
38
|
+
context_window=1047576,
|
|
39
|
+
max_output_tokens=32768,
|
|
40
|
+
),
|
|
41
|
+
ModelInfo(
|
|
42
|
+
id="openai:gpt-4.1-mini",
|
|
43
|
+
created=1744588800,
|
|
44
|
+
owned_by="openai",
|
|
45
|
+
description="Small model beating GPT-4o in many benchmarks. 83% cost reduction vs GPT-4o.",
|
|
46
|
+
context_window=1047576,
|
|
47
|
+
max_output_tokens=32768,
|
|
48
|
+
),
|
|
49
|
+
ModelInfo(
|
|
50
|
+
id="openai:gpt-4.1-nano",
|
|
51
|
+
created=1744588800,
|
|
52
|
+
owned_by="openai",
|
|
53
|
+
description="Fastest and cheapest OpenAI model. Ideal for classification and autocompletion.",
|
|
54
|
+
context_window=1047576,
|
|
55
|
+
max_output_tokens=32768,
|
|
56
|
+
),
|
|
57
|
+
# GPT-4o (legacy but still supported)
|
|
58
|
+
ModelInfo(
|
|
59
|
+
id="openai:gpt-4o",
|
|
60
|
+
created=1715644800, # May 13, 2024
|
|
61
|
+
owned_by="openai",
|
|
62
|
+
description="Previous flagship multimodal model. Being superseded by GPT-4.1.",
|
|
63
|
+
context_window=128000,
|
|
64
|
+
max_output_tokens=16384,
|
|
65
|
+
),
|
|
66
|
+
ModelInfo(
|
|
67
|
+
id="openai:gpt-4o-mini",
|
|
68
|
+
created=1721347200, # July 18, 2024
|
|
69
|
+
owned_by="openai",
|
|
70
|
+
description="Cost-efficient smaller GPT-4o variant.",
|
|
71
|
+
context_window=128000,
|
|
72
|
+
max_output_tokens=16384,
|
|
73
|
+
),
|
|
74
|
+
# o1 reasoning models
|
|
75
|
+
ModelInfo(
|
|
76
|
+
id="openai:o1",
|
|
77
|
+
created=1733961600, # December 12, 2024
|
|
78
|
+
owned_by="openai",
|
|
79
|
+
description="Advanced reasoning model for complex problems. Extended thinking.",
|
|
80
|
+
context_window=200000,
|
|
81
|
+
max_output_tokens=100000,
|
|
82
|
+
),
|
|
83
|
+
ModelInfo(
|
|
84
|
+
id="openai:o1-mini",
|
|
85
|
+
created=1726099200, # September 12, 2024
|
|
86
|
+
owned_by="openai",
|
|
87
|
+
description="Smaller reasoning model, fast for coding and math.",
|
|
88
|
+
context_window=128000,
|
|
89
|
+
max_output_tokens=65536,
|
|
90
|
+
),
|
|
91
|
+
ModelInfo(
|
|
92
|
+
id="openai:o3-mini",
|
|
93
|
+
created=1738195200, # January 30, 2025
|
|
94
|
+
owned_by="openai",
|
|
95
|
+
description="Latest mini reasoning model with improved performance.",
|
|
96
|
+
context_window=200000,
|
|
97
|
+
max_output_tokens=100000,
|
|
98
|
+
),
|
|
99
|
+
# ==========================================================================
|
|
100
|
+
# Anthropic Models (2025)
|
|
101
|
+
# ==========================================================================
|
|
102
|
+
# Claude 4.5 series (Latest - November 2025)
|
|
103
|
+
ModelInfo(
|
|
104
|
+
id="anthropic:claude-opus-4-5-20251124",
|
|
105
|
+
created=1732406400, # November 24, 2025
|
|
106
|
+
owned_by="anthropic",
|
|
107
|
+
description="Most capable Claude model. World-class coding with 'effort' parameter control.",
|
|
108
|
+
context_window=200000,
|
|
109
|
+
max_output_tokens=128000,
|
|
110
|
+
),
|
|
111
|
+
ModelInfo(
|
|
112
|
+
id="anthropic:claude-sonnet-4-5-20250929",
|
|
113
|
+
created=1727568000, # September 29, 2025
|
|
114
|
+
owned_by="anthropic",
|
|
115
|
+
description="Best balance of intelligence and speed. Excellent for coding and agents.",
|
|
116
|
+
context_window=200000,
|
|
117
|
+
max_output_tokens=128000,
|
|
118
|
+
),
|
|
119
|
+
ModelInfo(
|
|
120
|
+
id="anthropic:claude-haiku-4-5-20251101",
|
|
121
|
+
created=1730419200, # November 1, 2025
|
|
122
|
+
owned_by="anthropic",
|
|
123
|
+
description="Fast and affordable. Sonnet 4 performance at 1/3 cost. Safest Claude model.",
|
|
124
|
+
context_window=200000,
|
|
125
|
+
max_output_tokens=128000,
|
|
126
|
+
),
|
|
127
|
+
# Claude 4 series
|
|
128
|
+
ModelInfo(
|
|
129
|
+
id="anthropic:claude-opus-4-20250514",
|
|
130
|
+
created=1715644800, # May 14, 2025
|
|
131
|
+
owned_by="anthropic",
|
|
132
|
+
description="World's best coding model. Sustained performance on complex agent workflows.",
|
|
133
|
+
context_window=200000,
|
|
134
|
+
max_output_tokens=128000,
|
|
135
|
+
),
|
|
136
|
+
ModelInfo(
|
|
137
|
+
id="anthropic:claude-sonnet-4-20250514",
|
|
138
|
+
created=1715644800, # May 14, 2025
|
|
139
|
+
owned_by="anthropic",
|
|
140
|
+
description="Significant upgrade to Sonnet 3.7. Great for everyday tasks.",
|
|
141
|
+
context_window=200000,
|
|
142
|
+
max_output_tokens=128000,
|
|
143
|
+
),
|
|
144
|
+
ModelInfo(
|
|
145
|
+
id="anthropic:claude-opus-4-1-20250805",
|
|
146
|
+
created=1722816000, # August 5, 2025
|
|
147
|
+
owned_by="anthropic",
|
|
148
|
+
description="Opus 4 upgrade focused on agentic tasks and real-world coding.",
|
|
149
|
+
context_window=200000,
|
|
150
|
+
max_output_tokens=128000,
|
|
151
|
+
),
|
|
152
|
+
# Aliases for convenience
|
|
153
|
+
ModelInfo(
|
|
154
|
+
id="anthropic:claude-opus-4-5",
|
|
155
|
+
created=1732406400,
|
|
156
|
+
owned_by="anthropic",
|
|
157
|
+
description="Alias for latest Claude Opus 4.5",
|
|
158
|
+
context_window=200000,
|
|
159
|
+
max_output_tokens=128000,
|
|
160
|
+
),
|
|
161
|
+
ModelInfo(
|
|
162
|
+
id="anthropic:claude-sonnet-4-5",
|
|
163
|
+
created=1727568000,
|
|
164
|
+
owned_by="anthropic",
|
|
165
|
+
description="Alias for latest Claude Sonnet 4.5",
|
|
166
|
+
context_window=200000,
|
|
167
|
+
max_output_tokens=128000,
|
|
168
|
+
),
|
|
169
|
+
ModelInfo(
|
|
170
|
+
id="anthropic:claude-haiku-4-5",
|
|
171
|
+
created=1730419200,
|
|
172
|
+
owned_by="anthropic",
|
|
173
|
+
description="Alias for latest Claude Haiku 4.5",
|
|
174
|
+
context_window=200000,
|
|
175
|
+
max_output_tokens=128000,
|
|
176
|
+
),
|
|
177
|
+
# ==========================================================================
|
|
178
|
+
# Google Models (2025)
|
|
179
|
+
# ==========================================================================
|
|
180
|
+
# Gemini 3 (Latest)
|
|
181
|
+
ModelInfo(
|
|
182
|
+
id="google:gemini-3-pro",
|
|
183
|
+
created=1730419200, # November 2025
|
|
184
|
+
owned_by="google",
|
|
185
|
+
description="Most advanced Gemini. State-of-the-art reasoning, 35% better than 2.5 Pro.",
|
|
186
|
+
context_window=2000000,
|
|
187
|
+
max_output_tokens=65536,
|
|
188
|
+
),
|
|
189
|
+
# Gemini 2.5 series
|
|
190
|
+
ModelInfo(
|
|
191
|
+
id="google:gemini-2.5-pro",
|
|
192
|
+
created=1727568000, # September 2025
|
|
193
|
+
owned_by="google",
|
|
194
|
+
description="High-capability model with adaptive thinking. 1M context window.",
|
|
195
|
+
context_window=1000000,
|
|
196
|
+
max_output_tokens=65536,
|
|
197
|
+
),
|
|
198
|
+
ModelInfo(
|
|
199
|
+
id="google:gemini-2.5-flash",
|
|
200
|
+
created=1727568000,
|
|
201
|
+
owned_by="google",
|
|
202
|
+
description="Fast and capable. Best for large-scale processing and agentic tasks.",
|
|
203
|
+
context_window=1000000,
|
|
204
|
+
max_output_tokens=65536,
|
|
205
|
+
),
|
|
206
|
+
ModelInfo(
|
|
207
|
+
id="google:gemini-2.5-flash-lite",
|
|
208
|
+
created=1727568000,
|
|
209
|
+
owned_by="google",
|
|
210
|
+
description="Optimized for massive scale. Balances cost and performance.",
|
|
211
|
+
context_window=1000000,
|
|
212
|
+
max_output_tokens=32768,
|
|
213
|
+
),
|
|
214
|
+
# Gemini 2.0
|
|
215
|
+
ModelInfo(
|
|
216
|
+
id="google:gemini-2.0-flash",
|
|
217
|
+
created=1733875200, # December 2024
|
|
218
|
+
owned_by="google",
|
|
219
|
+
description="Fast multimodal model with native tool use.",
|
|
220
|
+
context_window=1000000,
|
|
221
|
+
max_output_tokens=8192,
|
|
222
|
+
),
|
|
223
|
+
# Gemma open models
|
|
224
|
+
ModelInfo(
|
|
225
|
+
id="google:gemma-3",
|
|
226
|
+
created=1727568000,
|
|
227
|
+
owned_by="google",
|
|
228
|
+
description="Open model with text/image input, 140+ languages, 128K context.",
|
|
229
|
+
context_window=128000,
|
|
230
|
+
max_output_tokens=8192,
|
|
231
|
+
),
|
|
232
|
+
ModelInfo(
|
|
233
|
+
id="google:gemma-3n",
|
|
234
|
+
created=1730419200,
|
|
235
|
+
owned_by="google",
|
|
236
|
+
description="Efficient open model for low-resource devices. Multimodal input.",
|
|
237
|
+
context_window=128000,
|
|
238
|
+
max_output_tokens=8192,
|
|
239
|
+
),
|
|
240
|
+
# ==========================================================================
|
|
241
|
+
# Cerebras Models (Ultra-fast inference)
|
|
242
|
+
# ==========================================================================
|
|
243
|
+
ModelInfo(
|
|
244
|
+
id="cerebras:llama-3.3-70b",
|
|
245
|
+
created=1733875200, # December 2024
|
|
246
|
+
owned_by="cerebras",
|
|
247
|
+
description="Llama 3.3 70B on Cerebras. Ultra-fast inference (~2000 tok/s). Fully compatible with structured output.",
|
|
248
|
+
context_window=128000,
|
|
249
|
+
max_output_tokens=8192,
|
|
250
|
+
),
|
|
251
|
+
ModelInfo(
|
|
252
|
+
id="cerebras:qwen-3-32b",
|
|
253
|
+
created=1733875200, # December 2024
|
|
254
|
+
owned_by="cerebras",
|
|
255
|
+
description="Qwen 3 32B on Cerebras. Ultra-fast inference (~2400 tok/s). Requires strict schema mode.",
|
|
256
|
+
context_window=32000,
|
|
257
|
+
max_output_tokens=8192,
|
|
258
|
+
),
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
# Set of valid model IDs for fast O(1) lookup
|
|
262
|
+
ALLOWED_MODEL_IDS: set[str] = {model.id for model in AVAILABLE_MODELS}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def is_valid_model(model_id: str | None) -> bool:
|
|
266
|
+
"""Check if a model ID is in the allowed list."""
|
|
267
|
+
if model_id is None:
|
|
268
|
+
return False
|
|
269
|
+
return model_id in ALLOWED_MODEL_IDS
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def get_valid_model_or_default(model_id: str | None, default_model: str) -> str:
|
|
273
|
+
"""
|
|
274
|
+
Return the model_id if it's valid, otherwise return the default.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
model_id: The requested model ID (may be None or invalid)
|
|
278
|
+
default_model: Fallback model from settings
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Valid model ID to use
|
|
282
|
+
"""
|
|
283
|
+
if is_valid_model(model_id):
|
|
284
|
+
return model_id # type: ignore[return-value]
|
|
285
|
+
return default_model
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_model_by_id(model_id: str) -> ModelInfo | None:
|
|
289
|
+
"""
|
|
290
|
+
Get model info by ID.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
model_id: Model identifier in provider:model format
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
ModelInfo if found, None otherwise
|
|
297
|
+
"""
|
|
298
|
+
for model in AVAILABLE_MODELS:
|
|
299
|
+
if model.id == model_id:
|
|
300
|
+
return model
|
|
301
|
+
return None
|
|
File without changes
|