remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,360 @@
1
+ """
2
+ Centralized context builder for agent execution.
3
+
4
+ Session History (ALWAYS loaded with compression):
5
+ - Each chat request is a single message, so session history MUST be recovered
6
+ - Uses SessionMessageStore with compression to keep context efficient
7
+ - Prevents context window bloat while maintaining conversation continuity
8
+
9
+ User Context (on-demand by default):
10
+ - System message includes user email for context awareness
11
+ - Fails silently if user not found - agent proceeds without user context
12
+ - Example: "User: sarah@example.com"
13
+
14
+ User Context (auto-inject when enabled):
15
+ - Set CHAT__AUTO_INJECT_USER_CONTEXT=true
16
+ - User profile automatically loaded from database and injected into system message
17
+ - Simpler for basic chatbots that always need context
18
+
19
+ Design Pattern:
20
+ 1. Extract AgentContext from headers (user_id, tenant_id, session_id)
21
+ 2. If auto-inject enabled: Load User/Session from database
22
+ 3. If auto-inject disabled: Show user email for context (fail silently if not found)
23
+ 4. Construct system message with date + context
24
+ 5. Return complete context ready for agent execution
25
+
26
+ Integration Points:
27
+ - API endpoints: build_from_headers() extracts user context from JWT/session headers
28
+ - Tests: build_from_test() creates minimal test context without DB
29
+ - Settings: CHAT__AUTO_INJECT_* controls auto-inject vs on-demand behavior
30
+
31
+ Usage (on-demand, default):
32
+ # From FastAPI endpoint
33
+ context, messages = await ContextBuilder.build_from_headers(
34
+ headers=request.headers,
35
+ new_messages=[{"role": "user", "content": "What's next for the API migration?"}]
36
+ )
37
+
38
+ # Messages list structure (on-demand):
39
+ # [
40
+ # {"role": "system", "content": "Today's date: 2025-11-22\n\nUser: sarah@example.com"},
41
+ # {"role": "user", "content": "What's next for the API migration?"}
42
+ # ]
43
+
44
+ agent = await create_agent(context=context, ...)
45
+ prompt = "\n".join(msg.content for msg in messages)
46
+ result = await agent.run(prompt)
47
+
48
+ Usage (auto-inject, CHAT__AUTO_INJECT_USER_CONTEXT=true):
49
+ # Messages list structure (auto-inject):
50
+ # [
51
+ # {"role": "system", "content": "Today's date: 2025-11-22\n\nUser Context (auto-injected):\nSummary: ...\nInterests: ..."},
52
+ # {"role": "user", "content": "Previous message"},
53
+ # {"role": "assistant", "content": "Previous response"},
54
+ # {"role": "user", "content": "What's next for the API migration?"}
55
+ # ]
56
+
57
+ Testing:
58
+ # From CLI/test (no database)
59
+ context, messages = await ContextBuilder.build_from_test(
60
+ user_id="test@rem.ai",
61
+ tenant_id="test-tenant",
62
+ message="Hello"
63
+ )
64
+ """
65
+
66
+ from datetime import datetime, timezone
67
+ from typing import Any
68
+
69
+ from loguru import logger
70
+ from pydantic import BaseModel
71
+
72
+ from .context import AgentContext
73
+ from ..models.entities.user import User
74
+ from ..models.entities.message import Message
75
+ from ..services.postgres.repository import Repository
76
+ from ..services.postgres.service import PostgresService
77
+
78
+
79
+ class ContextMessage(BaseModel):
80
+ """Standard message format for LLM conversations."""
81
+
82
+ role: str # "system", "user", "assistant"
83
+ content: str
84
+
85
+
86
+ class ContextBuilder:
87
+ """
88
+ Centralized builder for agent execution context.
89
+
90
+ Handles:
91
+ - User profile loading from database
92
+ - Session history recovery
93
+ - Context message construction
94
+ - Test context generation
95
+ """
96
+
97
+ @staticmethod
98
+ async def build_from_headers(
99
+ headers: dict[str, str],
100
+ new_messages: list[dict[str, str]] | None = None,
101
+ db: PostgresService | None = None,
102
+ user_id: str | None = None,
103
+ ) -> tuple[AgentContext, list[ContextMessage]]:
104
+ """
105
+ Build complete context from HTTP headers.
106
+
107
+ Session History (ALWAYS loaded with compression):
108
+ - If session_id provided, session history is ALWAYS loaded using SessionMessageStore
109
+ - Compression keeps context efficient
110
+
111
+ User Context (on-demand by default):
112
+ - System message includes user email: "User: {email}"
113
+ - Fails silently if user not found - agent proceeds without user context
114
+
115
+ User Context (auto-inject when enabled):
116
+ - Set CHAT__AUTO_INJECT_USER_CONTEXT=true
117
+ - User profile automatically loaded and injected into system message
118
+
119
+ Args:
120
+ headers: HTTP request headers (case-insensitive)
121
+ new_messages: New messages from current request
122
+ db: Optional PostgresService (creates if None)
123
+ user_id: Override user_id from JWT token (takes precedence over X-User-Id header)
124
+
125
+ Returns:
126
+ Tuple of (AgentContext, messages list)
127
+
128
+ Example:
129
+ headers = {"X-User-Id": "sarah@example.com", "X-Session-Id": "sess-123"}
130
+ context, messages = await ContextBuilder.build_from_headers(headers, new_messages)
131
+
132
+ # messages structure:
133
+ # [
134
+ # {"role": "system", "content": "Today's date: 2025-11-22\n\nUser: sarah@example.com"},
135
+ # {"role": "user", "content": "Previous message"},
136
+ # {"role": "assistant", "content": "Previous response"},
137
+ # {"role": "user", "content": "New message"}
138
+ # ]
139
+ """
140
+ from ..settings import settings
141
+ from ..services.session.compression import SessionMessageStore
142
+
143
+ # Extract AgentContext from headers
144
+ context = AgentContext.from_headers(headers)
145
+
146
+ # Override user_id if provided (from JWT token - takes precedence over header)
147
+ if user_id is not None:
148
+ context = AgentContext(
149
+ user_id=user_id,
150
+ tenant_id=context.tenant_id,
151
+ session_id=context.session_id,
152
+ default_model=context.default_model,
153
+ agent_schema_uri=context.agent_schema_uri,
154
+ is_eval=context.is_eval,
155
+ )
156
+
157
+ # Initialize DB if not provided and needed (for user context or session history)
158
+ close_db = False
159
+ if db is None and (settings.chat.auto_inject_user_context or context.session_id):
160
+ from ..services.postgres import get_postgres_service
161
+ db = get_postgres_service()
162
+ if db:
163
+ await db.connect()
164
+ close_db = True
165
+
166
+ try:
167
+ # Build messages list
168
+ messages: list[ContextMessage] = []
169
+
170
+ # Build context hint message
171
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
172
+ context_hint = f"Today's date: {today}."
173
+
174
+ # Add user context (auto-inject or on-demand hint)
175
+ if settings.chat.auto_inject_user_context and context.user_id and db:
176
+ # Auto-inject: Load and include user profile
177
+ user_context_content = await ContextBuilder._load_user_context(
178
+ user_id=context.user_id,
179
+ tenant_id=context.tenant_id,
180
+ db=db,
181
+ )
182
+ if user_context_content:
183
+ context_hint += f"\n\nUser Context (auto-injected):\n{user_context_content}"
184
+ else:
185
+ context_hint += "\n\nNo user context available (anonymous or new user)."
186
+ elif context.user_id and db:
187
+ # On-demand: Show user email for context (no REM LOOKUP - it requires exact user_id match)
188
+ # Fail silently if user lookup fails - just proceed without user context
189
+ try:
190
+ user_repo = Repository(User, "users", db=db)
191
+ user = await user_repo.get_by_id(context.user_id, context.tenant_id)
192
+ if user and user.email:
193
+ context_hint += f"\n\nUser: {user.email}"
194
+ # If user not found, just proceed without adding user context
195
+ except Exception as e:
196
+ # Fail silently - don't block agent execution if user lookup fails
197
+ logger.debug(f"Could not load user context: {e}")
198
+
199
+ # Add system context hint
200
+ messages.append(ContextMessage(role="system", content=context_hint))
201
+
202
+ # ALWAYS load session history (if session_id provided)
203
+ # - Long assistant messages are compressed on load with REM LOOKUP hints
204
+ # - Tool messages are never compressed (contain structured metadata)
205
+ if context.session_id and settings.postgres.enabled:
206
+ store = SessionMessageStore(user_id=context.user_id or "default")
207
+ session_history = await store.load_session_messages(
208
+ session_id=context.session_id,
209
+ user_id=context.user_id,
210
+ compress_on_load=True, # Compress long assistant messages
211
+ )
212
+
213
+ # Convert to ContextMessage format
214
+ # For tool messages, wrap content with clear markers so the agent
215
+ # can see previous tool results when the prompt is concatenated
216
+ for msg_dict in session_history:
217
+ role = msg_dict["role"]
218
+ content = msg_dict["content"]
219
+
220
+ if role == "tool":
221
+ # Wrap tool results with clear markers for visibility
222
+ tool_name = msg_dict.get("tool_name", "unknown")
223
+ content = f"[TOOL RESULT: {tool_name}]\n{content}\n[/TOOL RESULT]"
224
+
225
+ messages.append(
226
+ ContextMessage(
227
+ role=role,
228
+ content=content,
229
+ )
230
+ )
231
+
232
+ logger.debug(f"Loaded {len(session_history)} messages for session {context.session_id}")
233
+
234
+ # Add new messages from request
235
+ if new_messages:
236
+ for msg in new_messages:
237
+ messages.append(ContextMessage(**msg))
238
+
239
+ return context, messages
240
+
241
+ finally:
242
+ if close_db and db:
243
+ await db.disconnect()
244
+
245
+ @staticmethod
246
+ async def _load_user_context(
247
+ user_id: str | None,
248
+ tenant_id: str,
249
+ db: PostgresService,
250
+ ) -> str | None:
251
+ """
252
+ Load user profile from database and format as context.
253
+
254
+ user_id is always a UUID5 hash of email (bijection).
255
+ Looks up user by their id field in the database.
256
+
257
+ Returns formatted string with:
258
+ - User summary (generated by dreaming worker)
259
+ - Current projects
260
+ - Technical interests
261
+ - Preferred topics
262
+
263
+ Returns None if user_id not provided or user not found.
264
+ """
265
+ if not user_id:
266
+ return None
267
+
268
+ try:
269
+ user_repo = Repository(User, "users", db=db)
270
+ # user_id is UUID5 hash of email - look up by database id
271
+ user = await user_repo.get_by_id(user_id, tenant_id)
272
+
273
+ if not user:
274
+ logger.debug(f"User {user_id} not found in tenant {tenant_id}")
275
+ return None
276
+
277
+ # Build user context string
278
+ parts = []
279
+
280
+ if user.summary:
281
+ parts.append(f"Summary: {user.summary}")
282
+
283
+ if user.interests:
284
+ parts.append(f"Interests: {', '.join(user.interests[:5])}")
285
+
286
+ if user.preferred_topics:
287
+ parts.append(f"Topics: {', '.join(user.preferred_topics[:5])}")
288
+
289
+ # Add full profile from metadata if available
290
+ if user.metadata and "profile" in user.metadata:
291
+ profile = user.metadata["profile"]
292
+
293
+ if profile.get("current_projects"):
294
+ projects = profile["current_projects"]
295
+ project_names = [p.get("name", "Unnamed") for p in projects[:3]]
296
+ parts.append(f"Current Projects: {', '.join(project_names)}")
297
+
298
+ if not parts:
299
+ return None
300
+
301
+ return "\n".join(parts)
302
+
303
+ except Exception as e:
304
+ logger.error(f"Failed to load user context: {e}")
305
+ return None
306
+
307
+
308
+ @staticmethod
309
+ async def build_from_test(
310
+ user_id: str = "test@rem.ai",
311
+ tenant_id: str = "test-tenant",
312
+ session_id: str | None = None,
313
+ message: str = "Hello",
314
+ model: str | None = None,
315
+ ) -> tuple[AgentContext, list[ContextMessage]]:
316
+ """
317
+ Build context for testing (no database lookup).
318
+
319
+ Creates minimal context with:
320
+ - Test user (test@rem.ai)
321
+ - Test tenant
322
+ - Context hint with date
323
+ - Single user message
324
+
325
+ Args:
326
+ user_id: Test user identifier (default: test@rem.ai)
327
+ tenant_id: Test tenant identifier
328
+ session_id: Optional session ID
329
+ message: User message content
330
+ model: Optional model override
331
+
332
+ Returns:
333
+ Tuple of (AgentContext, messages list)
334
+
335
+ Example:
336
+ context, messages = await ContextBuilder.build_from_test(
337
+ user_id="test@rem.ai",
338
+ message="What's the weather like?"
339
+ )
340
+ """
341
+ from ..settings import settings
342
+
343
+ # Create test context
344
+ context = AgentContext(
345
+ user_id=user_id,
346
+ tenant_id=tenant_id,
347
+ session_id=session_id,
348
+ default_model=model or settings.llm.default_model,
349
+ )
350
+
351
+ # Build minimal messages
352
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
353
+ context_hint = f"Today's date: {today}.\n\nTest user context: {user_id} (test mode, no profile loaded)."
354
+
355
+ messages = [
356
+ ContextMessage(role="system", content=context_hint),
357
+ ContextMessage(role="user", content=message),
358
+ ]
359
+
360
+ return context, messages
@@ -0,0 +1,301 @@
1
+ """
2
+ LLM Provider Model Registry.
3
+
4
+ Defines available LLM models across providers (OpenAI, Anthropic, Google, Cerebras).
5
+ Used by the models API endpoint and for validating model requests.
6
+
7
+ Future: Models will be stored in database for dynamic management.
8
+ """
9
+
10
+ from pydantic import BaseModel, Field
11
+ from typing import Literal
12
+
13
+
14
+ class ModelInfo(BaseModel):
15
+ """Information about a single model."""
16
+
17
+ id: str = Field(description="Model ID in provider:model format")
18
+ object: Literal["model"] = "model"
19
+ created: int = Field(description="Unix timestamp of model availability")
20
+ owned_by: str = Field(description="Provider name")
21
+ description: str | None = Field(default=None, description="Model description")
22
+ context_window: int | None = Field(default=None, description="Max context tokens")
23
+ max_output_tokens: int | None = Field(default=None, description="Max output tokens")
24
+
25
+
26
+ # Model definitions with 2025 releases
27
+ # Using Unix timestamps for created dates (approximate release dates)
28
+ AVAILABLE_MODELS: list[ModelInfo] = [
29
+ # ==========================================================================
30
+ # OpenAI Models (2025)
31
+ # ==========================================================================
32
+ # GPT-4.1 series (Released April 14, 2025)
33
+ ModelInfo(
34
+ id="openai:gpt-4.1",
35
+ created=1744588800, # April 14, 2025
36
+ owned_by="openai",
37
+ description="Latest GPT-4 iteration, excels at coding and instruction following. 1M context.",
38
+ context_window=1047576,
39
+ max_output_tokens=32768,
40
+ ),
41
+ ModelInfo(
42
+ id="openai:gpt-4.1-mini",
43
+ created=1744588800,
44
+ owned_by="openai",
45
+ description="Small model beating GPT-4o in many benchmarks. 83% cost reduction vs GPT-4o.",
46
+ context_window=1047576,
47
+ max_output_tokens=32768,
48
+ ),
49
+ ModelInfo(
50
+ id="openai:gpt-4.1-nano",
51
+ created=1744588800,
52
+ owned_by="openai",
53
+ description="Fastest and cheapest OpenAI model. Ideal for classification and autocompletion.",
54
+ context_window=1047576,
55
+ max_output_tokens=32768,
56
+ ),
57
+ # GPT-4o (legacy but still supported)
58
+ ModelInfo(
59
+ id="openai:gpt-4o",
60
+ created=1715644800, # May 13, 2024
61
+ owned_by="openai",
62
+ description="Previous flagship multimodal model. Being superseded by GPT-4.1.",
63
+ context_window=128000,
64
+ max_output_tokens=16384,
65
+ ),
66
+ ModelInfo(
67
+ id="openai:gpt-4o-mini",
68
+ created=1721347200, # July 18, 2024
69
+ owned_by="openai",
70
+ description="Cost-efficient smaller GPT-4o variant.",
71
+ context_window=128000,
72
+ max_output_tokens=16384,
73
+ ),
74
+ # o1 reasoning models
75
+ ModelInfo(
76
+ id="openai:o1",
77
+ created=1733961600, # December 12, 2024
78
+ owned_by="openai",
79
+ description="Advanced reasoning model for complex problems. Extended thinking.",
80
+ context_window=200000,
81
+ max_output_tokens=100000,
82
+ ),
83
+ ModelInfo(
84
+ id="openai:o1-mini",
85
+ created=1726099200, # September 12, 2024
86
+ owned_by="openai",
87
+ description="Smaller reasoning model, fast for coding and math.",
88
+ context_window=128000,
89
+ max_output_tokens=65536,
90
+ ),
91
+ ModelInfo(
92
+ id="openai:o3-mini",
93
+ created=1738195200, # January 30, 2025
94
+ owned_by="openai",
95
+ description="Latest mini reasoning model with improved performance.",
96
+ context_window=200000,
97
+ max_output_tokens=100000,
98
+ ),
99
+ # ==========================================================================
100
+ # Anthropic Models (2025)
101
+ # ==========================================================================
102
+ # Claude 4.5 series (Latest - November 2025)
103
+ ModelInfo(
104
+ id="anthropic:claude-opus-4-5-20251124",
105
+ created=1732406400, # November 24, 2025
106
+ owned_by="anthropic",
107
+ description="Most capable Claude model. World-class coding with 'effort' parameter control.",
108
+ context_window=200000,
109
+ max_output_tokens=128000,
110
+ ),
111
+ ModelInfo(
112
+ id="anthropic:claude-sonnet-4-5-20250929",
113
+ created=1727568000, # September 29, 2025
114
+ owned_by="anthropic",
115
+ description="Best balance of intelligence and speed. Excellent for coding and agents.",
116
+ context_window=200000,
117
+ max_output_tokens=128000,
118
+ ),
119
+ ModelInfo(
120
+ id="anthropic:claude-haiku-4-5-20251101",
121
+ created=1730419200, # November 1, 2025
122
+ owned_by="anthropic",
123
+ description="Fast and affordable. Sonnet 4 performance at 1/3 cost. Safest Claude model.",
124
+ context_window=200000,
125
+ max_output_tokens=128000,
126
+ ),
127
+ # Claude 4 series
128
+ ModelInfo(
129
+ id="anthropic:claude-opus-4-20250514",
130
+ created=1715644800, # May 14, 2025
131
+ owned_by="anthropic",
132
+ description="World's best coding model. Sustained performance on complex agent workflows.",
133
+ context_window=200000,
134
+ max_output_tokens=128000,
135
+ ),
136
+ ModelInfo(
137
+ id="anthropic:claude-sonnet-4-20250514",
138
+ created=1715644800, # May 14, 2025
139
+ owned_by="anthropic",
140
+ description="Significant upgrade to Sonnet 3.7. Great for everyday tasks.",
141
+ context_window=200000,
142
+ max_output_tokens=128000,
143
+ ),
144
+ ModelInfo(
145
+ id="anthropic:claude-opus-4-1-20250805",
146
+ created=1722816000, # August 5, 2025
147
+ owned_by="anthropic",
148
+ description="Opus 4 upgrade focused on agentic tasks and real-world coding.",
149
+ context_window=200000,
150
+ max_output_tokens=128000,
151
+ ),
152
+ # Aliases for convenience
153
+ ModelInfo(
154
+ id="anthropic:claude-opus-4-5",
155
+ created=1732406400,
156
+ owned_by="anthropic",
157
+ description="Alias for latest Claude Opus 4.5",
158
+ context_window=200000,
159
+ max_output_tokens=128000,
160
+ ),
161
+ ModelInfo(
162
+ id="anthropic:claude-sonnet-4-5",
163
+ created=1727568000,
164
+ owned_by="anthropic",
165
+ description="Alias for latest Claude Sonnet 4.5",
166
+ context_window=200000,
167
+ max_output_tokens=128000,
168
+ ),
169
+ ModelInfo(
170
+ id="anthropic:claude-haiku-4-5",
171
+ created=1730419200,
172
+ owned_by="anthropic",
173
+ description="Alias for latest Claude Haiku 4.5",
174
+ context_window=200000,
175
+ max_output_tokens=128000,
176
+ ),
177
+ # ==========================================================================
178
+ # Google Models (2025)
179
+ # ==========================================================================
180
+ # Gemini 3 (Latest)
181
+ ModelInfo(
182
+ id="google:gemini-3-pro",
183
+ created=1730419200, # November 2025
184
+ owned_by="google",
185
+ description="Most advanced Gemini. State-of-the-art reasoning, 35% better than 2.5 Pro.",
186
+ context_window=2000000,
187
+ max_output_tokens=65536,
188
+ ),
189
+ # Gemini 2.5 series
190
+ ModelInfo(
191
+ id="google:gemini-2.5-pro",
192
+ created=1727568000, # September 2025
193
+ owned_by="google",
194
+ description="High-capability model with adaptive thinking. 1M context window.",
195
+ context_window=1000000,
196
+ max_output_tokens=65536,
197
+ ),
198
+ ModelInfo(
199
+ id="google:gemini-2.5-flash",
200
+ created=1727568000,
201
+ owned_by="google",
202
+ description="Fast and capable. Best for large-scale processing and agentic tasks.",
203
+ context_window=1000000,
204
+ max_output_tokens=65536,
205
+ ),
206
+ ModelInfo(
207
+ id="google:gemini-2.5-flash-lite",
208
+ created=1727568000,
209
+ owned_by="google",
210
+ description="Optimized for massive scale. Balances cost and performance.",
211
+ context_window=1000000,
212
+ max_output_tokens=32768,
213
+ ),
214
+ # Gemini 2.0
215
+ ModelInfo(
216
+ id="google:gemini-2.0-flash",
217
+ created=1733875200, # December 2024
218
+ owned_by="google",
219
+ description="Fast multimodal model with native tool use.",
220
+ context_window=1000000,
221
+ max_output_tokens=8192,
222
+ ),
223
+ # Gemma open models
224
+ ModelInfo(
225
+ id="google:gemma-3",
226
+ created=1727568000,
227
+ owned_by="google",
228
+ description="Open model with text/image input, 140+ languages, 128K context.",
229
+ context_window=128000,
230
+ max_output_tokens=8192,
231
+ ),
232
+ ModelInfo(
233
+ id="google:gemma-3n",
234
+ created=1730419200,
235
+ owned_by="google",
236
+ description="Efficient open model for low-resource devices. Multimodal input.",
237
+ context_window=128000,
238
+ max_output_tokens=8192,
239
+ ),
240
+ # ==========================================================================
241
+ # Cerebras Models (Ultra-fast inference)
242
+ # ==========================================================================
243
+ ModelInfo(
244
+ id="cerebras:llama-3.3-70b",
245
+ created=1733875200, # December 2024
246
+ owned_by="cerebras",
247
+ description="Llama 3.3 70B on Cerebras. Ultra-fast inference (~2000 tok/s). Fully compatible with structured output.",
248
+ context_window=128000,
249
+ max_output_tokens=8192,
250
+ ),
251
+ ModelInfo(
252
+ id="cerebras:qwen-3-32b",
253
+ created=1733875200, # December 2024
254
+ owned_by="cerebras",
255
+ description="Qwen 3 32B on Cerebras. Ultra-fast inference (~2400 tok/s). Requires strict schema mode.",
256
+ context_window=32000,
257
+ max_output_tokens=8192,
258
+ ),
259
+ ]
260
+
261
+ # Set of valid model IDs for fast O(1) lookup
262
+ ALLOWED_MODEL_IDS: set[str] = {model.id for model in AVAILABLE_MODELS}
263
+
264
+
265
+ def is_valid_model(model_id: str | None) -> bool:
266
+ """Check if a model ID is in the allowed list."""
267
+ if model_id is None:
268
+ return False
269
+ return model_id in ALLOWED_MODEL_IDS
270
+
271
+
272
+ def get_valid_model_or_default(model_id: str | None, default_model: str) -> str:
273
+ """
274
+ Return the model_id if it's valid, otherwise return the default.
275
+
276
+ Args:
277
+ model_id: The requested model ID (may be None or invalid)
278
+ default_model: Fallback model from settings
279
+
280
+ Returns:
281
+ Valid model ID to use
282
+ """
283
+ if is_valid_model(model_id):
284
+ return model_id # type: ignore[return-value]
285
+ return default_model
286
+
287
+
288
+ def get_model_by_id(model_id: str) -> ModelInfo | None:
289
+ """
290
+ Get model info by ID.
291
+
292
+ Args:
293
+ model_id: Model identifier in provider:model format
294
+
295
+ Returns:
296
+ ModelInfo if found, None otherwise
297
+ """
298
+ for model in AVAILABLE_MODELS:
299
+ if model.id == model_id:
300
+ return model
301
+ return None
File without changes