remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,229 @@
1
+ """
2
+ OAuth 2.1 Authentication Router.
3
+
4
+ Leverages Authlib for standards-compliant OAuth/OIDC implementation.
5
+ Minimal custom code - Authlib handles PKCE, token validation, JWKS.
6
+
7
+ Endpoints:
8
+ - GET /api/auth/{provider}/login - Initiate OAuth flow
9
+ - GET /api/auth/{provider}/callback - OAuth callback
10
+ - POST /api/auth/logout - Clear session
11
+ - GET /api/auth/me - Current user info
12
+
13
+ Supported providers:
14
+ - google: Google OAuth 2.0 / OIDC
15
+ - microsoft: Microsoft Entra ID OIDC
16
+
17
+ Design Pattern (OAuth 2.1 + PKCE):
18
+ 1. User clicks "Login with Google"
19
+ 2. /login generates state + PKCE code_verifier
20
+ 3. Store code_verifier in session
21
+ 4. Redirect to provider with code_challenge
22
+ 5. User authenticates and grants consent
23
+ 6. Provider redirects to /callback with code
24
+ 7. Exchange code + code_verifier for tokens
25
+ 8. Validate ID token signature with JWKS
26
+ 9. Store user info in session
27
+ 10. Redirect to application
28
+
29
+ Dependencies:
30
+ pip install authlib httpx
31
+
32
+ Environment variables:
33
+ AUTH__ENABLED=true
34
+ AUTH__SESSION_SECRET=<random-secret>
35
+ AUTH__GOOGLE__CLIENT_ID=<google-client-id>
36
+ AUTH__GOOGLE__CLIENT_SECRET=<google-client-secret>
37
+ AUTH__MICROSOFT__CLIENT_ID=<microsoft-client-id>
38
+ AUTH__MICROSOFT__CLIENT_SECRET=<microsoft-client-secret>
39
+ AUTH__MICROSOFT__TENANT=common
40
+
41
+ References:
42
+ - Authlib: https://docs.authlib.org/en/latest/
43
+ - OAuth 2.1: https://datatracker.ietf.org/doc/html/draft-ietf-oauth-v2-1-11
44
+ """
45
+
46
+ from fastapi import APIRouter, HTTPException, Request
47
+ from fastapi.responses import RedirectResponse
48
+ from authlib.integrations.starlette_client import OAuth
49
+ from loguru import logger
50
+
51
+ from ...settings import settings
52
+
53
+ router = APIRouter(prefix="/api/auth", tags=["auth"])
54
+
55
+ # Initialize Authlib OAuth client
56
+ # Authlib handles PKCE, state, nonce, token validation automatically
57
+ oauth = OAuth()
58
+
59
+ # Register Google provider
60
+ if settings.auth.google.client_id:
61
+ oauth.register(
62
+ name="google",
63
+ client_id=settings.auth.google.client_id,
64
+ client_secret=settings.auth.google.client_secret,
65
+ server_metadata_url="https://accounts.google.com/.well-known/openid-configuration",
66
+ client_kwargs={
67
+ "scope": "openid email profile",
68
+ # Authlib automatically adds PKCE to authorization request
69
+ },
70
+ )
71
+ logger.info("Google OAuth provider registered")
72
+
73
+ # Register Microsoft provider
74
+ if settings.auth.microsoft.client_id:
75
+ tenant = settings.auth.microsoft.tenant
76
+ oauth.register(
77
+ name="microsoft",
78
+ client_id=settings.auth.microsoft.client_id,
79
+ client_secret=settings.auth.microsoft.client_secret,
80
+ server_metadata_url=f"https://login.microsoftonline.com/{tenant}/v2.0/.well-known/openid-configuration",
81
+ client_kwargs={
82
+ "scope": "openid email profile User.Read",
83
+ },
84
+ )
85
+ logger.info(f"Microsoft OAuth provider registered (tenant: {tenant})")
86
+
87
+
88
+ @router.get("/{provider}/login")
89
+ async def login(provider: str, request: Request):
90
+ """
91
+ Initiate OAuth flow with provider.
92
+
93
+ Authlib automatically:
94
+ - Generates state for CSRF protection
95
+ - Generates PKCE code_verifier and code_challenge
96
+ - Stores state and code_verifier in session
97
+ - Redirects to provider's authorization endpoint
98
+
99
+ Args:
100
+ provider: OAuth provider (google, microsoft)
101
+ request: FastAPI request (for session access)
102
+
103
+ Returns:
104
+ Redirect to provider's authorization page
105
+ """
106
+ if not settings.auth.enabled:
107
+ raise HTTPException(status_code=501, detail="Authentication is disabled")
108
+
109
+ # Get OAuth client for provider
110
+ client = oauth.create_client(provider)
111
+ if not client:
112
+ raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
113
+
114
+ # Get redirect URI from settings
115
+ if provider == "google":
116
+ redirect_uri = settings.auth.google.redirect_uri
117
+ elif provider == "microsoft":
118
+ redirect_uri = settings.auth.microsoft.redirect_uri
119
+ else:
120
+ raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
121
+
122
+ # Authlib authorize_redirect() automatically:
123
+ # - Generates state parameter
124
+ # - Generates PKCE code_verifier and code_challenge
125
+ # - Stores state and code_verifier in session
126
+ # - Builds authorization URL with all required parameters
127
+ return await client.authorize_redirect(request, redirect_uri)
128
+
129
+
130
+ @router.get("/{provider}/callback")
131
+ async def callback(provider: str, request: Request):
132
+ """
133
+ OAuth callback endpoint.
134
+
135
+ Authlib automatically:
136
+ - Validates state parameter (CSRF protection)
137
+ - Exchanges code for tokens with PKCE code_verifier
138
+ - Validates ID token signature with JWKS
139
+ - Verifies ID token claims (iss, aud, exp, nonce)
140
+
141
+ Args:
142
+ provider: OAuth provider (google, microsoft)
143
+ request: FastAPI request (for session and query params)
144
+
145
+ Returns:
146
+ Redirect to application home page
147
+ """
148
+ if not settings.auth.enabled:
149
+ raise HTTPException(status_code=501, detail="Authentication is disabled")
150
+
151
+ # Get OAuth client for provider
152
+ client = oauth.create_client(provider)
153
+ if not client:
154
+ raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
155
+
156
+ try:
157
+ # Authlib authorize_access_token() automatically:
158
+ # - Validates state from session (CSRF)
159
+ # - Retrieves code_verifier from session
160
+ # - Exchanges authorization code for tokens
161
+ # - Validates ID token signature with JWKS
162
+ # - Verifies ID token claims
163
+ token = await client.authorize_access_token(request)
164
+
165
+ # Parse user info from ID token or call userinfo endpoint
166
+ # Authlib parses ID token claims automatically
167
+ user_info = token.get("userinfo")
168
+ if not user_info:
169
+ # Fetch from userinfo endpoint if not in ID token
170
+ user_info = await client.userinfo(token=token)
171
+
172
+ # Store user info in session
173
+ request.session["user"] = {
174
+ "provider": provider,
175
+ "sub": user_info.get("sub"),
176
+ "email": user_info.get("email"),
177
+ "name": user_info.get("name"),
178
+ "picture": user_info.get("picture"),
179
+ }
180
+
181
+ # Store tokens in session for API access
182
+ request.session["tokens"] = {
183
+ "access_token": token.get("access_token"),
184
+ "refresh_token": token.get("refresh_token"),
185
+ "expires_at": token.get("expires_at"),
186
+ }
187
+
188
+ logger.info(f"User authenticated: {user_info.get('email')} via {provider}")
189
+
190
+ # Redirect to application
191
+ # TODO: Support custom redirect URL from state parameter
192
+ return RedirectResponse(url="/")
193
+
194
+ except Exception as e:
195
+ logger.error(f"OAuth callback error: {e}")
196
+ raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
197
+
198
+
199
+ @router.post("/logout")
200
+ async def logout(request: Request):
201
+ """
202
+ Clear user session.
203
+
204
+ Args:
205
+ request: FastAPI request
206
+
207
+ Returns:
208
+ Success message
209
+ """
210
+ request.session.clear()
211
+ return {"message": "Logged out successfully"}
212
+
213
+
214
+ @router.get("/me")
215
+ async def me(request: Request):
216
+ """
217
+ Get current user information from session.
218
+
219
+ Args:
220
+ request: FastAPI request
221
+
222
+ Returns:
223
+ User information or 401 if not authenticated
224
+ """
225
+ user = request.session.get("user")
226
+ if not user:
227
+ raise HTTPException(status_code=401, detail="Not authenticated")
228
+
229
+ return user
@@ -0,0 +1,5 @@
1
+ """Chat completions router with OpenAI-compatible API."""
2
+
3
+ from .completions import router
4
+
5
+ __all__ = ["router"]
@@ -0,0 +1,281 @@
1
+ """
2
+ OpenAI-compatible chat completions router for REM.
3
+
4
+ Design Pattern:
5
+ - Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema)
6
+ - ContextBuilder centralizes message construction with user profile + session history
7
+ - Body.model is the LLM model for Pydantic AI
8
+ - X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
9
+ - Support for streaming (SSE) and non-streaming modes
10
+ - Response format control (text vs json_object)
11
+
12
+ Context Building Flow:
13
+ 1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
14
+ 2. Session history ALWAYS loaded with compression (if session_id provided)
15
+ - Uses SessionMessageStore with compression to keep context efficient
16
+ - Long messages include REM LOOKUP hints: "... [REM LOOKUP session-{id}-msg-{index}] ..."
17
+ - Agent can retrieve full content on-demand using REM LOOKUP
18
+ 3. User profile provided as REM LOOKUP hint (on-demand by default)
19
+ - Agent receives: "User ID: {user_id}. To load user profile: Use REM LOOKUP users/{user_id}"
20
+ - Agent decides whether to load profile based on query
21
+ 4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
22
+ 5. Combines: system context + compressed session history + new messages
23
+ 6. Agent receives complete message list ready for execution
24
+
25
+ Headers Mapping
26
+ X-User-Id → AgentContext.user_id
27
+ X-Tenant-Id → AgentContext.tenant_id
28
+ X-Session-Id → AgentContext.session_id
29
+ X-Model-Name → AgentContext.default_model (overrides body.model)
30
+ X-Agent-Schema → AgentContext.agent_schema_uri (defaults to 'rem')
31
+
32
+ Default Agent:
33
+ If X-Agent-Schema header is not provided, the system loads 'rem' schema,
34
+ which is the REM expert assistant with comprehensive knowledge about:
35
+ - REM architecture and concepts
36
+ - Entity types and graph traversal
37
+ - REM queries (LOOKUP, FUZZY, TRAVERSE)
38
+ - Agent development with Pydantic AI
39
+ - Cloud infrastructure (EKS, Karpenter, CloudNativePG)
40
+
41
+ Example Request:
42
+ POST /api/v1/chat/completions
43
+ X-Tenant-Id: acme-corp
44
+ X-User-Id: user123
45
+ X-Agent-Schema: rem # Optional, this is the default
46
+
47
+ {
48
+ "model": "openai:gpt-4o-mini",
49
+ "messages": [
50
+ {"role": "user", "content": "How do I create a new REM entity?"}
51
+ ],
52
+ "stream": true
53
+ }
54
+ """
55
+
56
+ import base64
57
+ import tempfile
58
+ import time
59
+ import uuid
60
+ from datetime import datetime
61
+ from pathlib import Path
62
+
63
+ from fastapi import APIRouter, Request
64
+ from fastapi.responses import StreamingResponse
65
+ from loguru import logger
66
+
67
+ from ....agentic.context import AgentContext
68
+ from ....agentic.context_builder import ContextBuilder
69
+ from ....agentic.providers.pydantic_ai import create_agent
70
+ from ....services.audio.transcriber import AudioTranscriber
71
+ from ....services.session import SessionMessageStore, reload_session
72
+ from ....settings import settings
73
+ from ....utils.schema_loader import load_agent_schema
74
+ from .json_utils import extract_json_resilient
75
+ from .models import (
76
+ ChatCompletionChoice,
77
+ ChatCompletionRequest,
78
+ ChatCompletionResponse,
79
+ ChatCompletionUsage,
80
+ ChatMessage,
81
+ )
82
+ from .streaming import stream_openai_response
83
+
84
+ router = APIRouter(prefix="/v1", tags=["chat"])
85
+
86
+ # Default agent schema file
87
+ DEFAULT_AGENT_SCHEMA = "rem"
88
+
89
+
90
+ @router.post("/chat/completions", response_model=None)
91
+ async def chat_completions(body: ChatCompletionRequest, request: Request):
92
+ """
93
+ OpenAI-compatible chat completions with REM agent support.
94
+
95
+ The 'model' field in the request body is the LLM model used by Pydantic AI.
96
+ The X-Agent-Schema header specifies which agent schema to use (defaults to 'rem').
97
+
98
+ Supported Headers:
99
+ | Header | Description | Maps To | Default |
100
+ |---------------------|--------------------------------------|--------------------------------|---------------|
101
+ | X-User-Id | User identifier | AgentContext.user_id | None |
102
+ | X-Tenant-Id | Tenant identifier (multi-tenancy) | AgentContext.tenant_id | "default" |
103
+ | X-Session-Id | Session/conversation identifier | AgentContext.session_id | None |
104
+ | X-Agent-Schema | Agent schema name | AgentContext.agent_schema_uri | "rem" |
105
+
106
+ Example Models:
107
+ - anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
108
+ - anthropic:claude-3-7-sonnet-20250219 (Claude 3.7 Sonnet)
109
+ - anthropic:claude-3-5-haiku-20241022 (Claude 3.5 Haiku)
110
+ - openai:gpt-4.1-turbo
111
+ - openai:gpt-4o
112
+ - openai:gpt-4o-mini
113
+
114
+ Response Formats:
115
+ - text (default): Plain text response
116
+ - json_object: Best-effort JSON extraction from agent output
117
+
118
+ Default Agent (rem):
119
+ - Expert assistant for REM system
120
+ - Comprehensive knowledge of REM architecture, concepts, and implementation
121
+ - Structured output with answer, confidence, and references
122
+
123
+ Session Management:
124
+ - Session history ALWAYS loaded with compression when X-Session-Id provided
125
+ - Uses SessionMessageStore with REM LOOKUP hints for long messages
126
+ - User profile provided as REM LOOKUP hint (on-demand by default)
127
+ - If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
128
+ - New messages saved to database with compression for session continuity
129
+ - When Postgres is disabled, session management is skipped
130
+ """
131
+ # Load agent schema: use header value from context or default
132
+ # Extract AgentContext first to get schema name
133
+ temp_context = AgentContext.from_headers(dict(request.headers))
134
+ schema_name = temp_context.agent_schema_uri or DEFAULT_AGENT_SCHEMA
135
+
136
+ # Load schema using centralized utility
137
+ try:
138
+ agent_schema = load_agent_schema(schema_name)
139
+ except FileNotFoundError:
140
+ # Fallback to default if specified schema not found
141
+ logger.warning(f"Schema '{schema_name}' not found, falling back to '{DEFAULT_AGENT_SCHEMA}'")
142
+ schema_name = DEFAULT_AGENT_SCHEMA
143
+ try:
144
+ agent_schema = load_agent_schema(schema_name)
145
+ except FileNotFoundError:
146
+ # No schema available at all
147
+ from fastapi import HTTPException
148
+
149
+ raise HTTPException(
150
+ status_code=500,
151
+ detail=f"Agent schema '{schema_name}' not found and default schema unavailable",
152
+ )
153
+
154
+ logger.info(f"Using agent schema: {schema_name}, model: {body.model}")
155
+
156
+ # Check for audio input
157
+ is_audio = request.headers.get("x-chat-is-audio", "").lower() == "true"
158
+
159
+ # Process messages (transcribe audio if needed)
160
+ new_messages = [msg.model_dump() for msg in body.messages]
161
+
162
+ if is_audio and new_messages and new_messages[0]["role"] == "user":
163
+ # First user message should be base64-encoded audio
164
+ try:
165
+ audio_b64 = new_messages[0]["content"]
166
+ audio_bytes = base64.b64decode(audio_b64)
167
+
168
+ # Write to temp file for transcription
169
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
170
+ tmp_file.write(audio_bytes)
171
+ tmp_path = tmp_file.name
172
+
173
+ # Transcribe audio
174
+ transcriber = AudioTranscriber()
175
+ result = transcriber.transcribe_file(tmp_path)
176
+
177
+ # Replace audio content with transcribed text
178
+ new_messages[0]["content"] = result.text
179
+ logger.info(f"Transcribed audio: {len(result.text)} characters")
180
+
181
+ # Clean up temp file
182
+ Path(tmp_path).unlink()
183
+
184
+ except Exception as e:
185
+ logger.error(f"Failed to transcribe audio: {e}")
186
+ # Fall through with original content (will likely fail at agent)
187
+
188
+ # Use ContextBuilder to construct complete message list with:
189
+ # 1. System context hint (date + user profile)
190
+ # 2. Session history (if session_id provided)
191
+ # 3. New messages from request body (transcribed if audio)
192
+ context, messages = await ContextBuilder.build_from_headers(
193
+ headers=dict(request.headers),
194
+ new_messages=new_messages,
195
+ )
196
+
197
+ logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
198
+
199
+ # Create agent with schema and model override
200
+ agent = await create_agent(
201
+ context=context,
202
+ agent_schema_override=agent_schema,
203
+ model_override=body.model, # type: ignore[arg-type]
204
+ )
205
+
206
+ # Combine all messages into single prompt for agent
207
+ # ContextBuilder already assembled: system context + history + new messages
208
+ prompt = "\n".join(msg.content for msg in messages)
209
+
210
+ # Generate OpenAI-compatible request ID
211
+ request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
212
+
213
+ # Streaming mode
214
+ if body.stream:
215
+ return StreamingResponse(
216
+ stream_openai_response(agent, prompt, body.model, request_id),
217
+ media_type="text/event-stream",
218
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
219
+ )
220
+
221
+ # Non-streaming mode
222
+ result = await agent.run(prompt)
223
+
224
+ # Determine content format based on response_format request
225
+ if body.response_format and body.response_format.type == "json_object":
226
+ # JSON mode: Best-effort extraction of JSON from agent output
227
+ content = extract_json_resilient(result.output) # type: ignore[attr-defined]
228
+ else:
229
+ # Text mode: Return as string (handle structured output)
230
+ from rem.agentic.serialization import serialize_agent_result_json
231
+ content = serialize_agent_result_json(result.output) # type: ignore[attr-defined]
232
+
233
+ # Get usage from result if available
234
+ usage = result.usage() if hasattr(result, "usage") else None
235
+ prompt_tokens = usage.input_tokens if usage else 0
236
+ completion_tokens = usage.output_tokens if usage else 0
237
+
238
+ # Save conversation messages to database (if session_id and postgres enabled)
239
+ if settings.postgres.enabled and context.session_id:
240
+ # Extract just the new user message (last message from body)
241
+ user_message = {
242
+ "role": "user",
243
+ "content": body.messages[-1].content if body.messages else "",
244
+ "timestamp": datetime.utcnow().isoformat(),
245
+ }
246
+
247
+ assistant_message = {
248
+ "role": "assistant",
249
+ "content": content,
250
+ "timestamp": datetime.utcnow().isoformat(),
251
+ }
252
+
253
+ # Store messages with compression
254
+ store = SessionMessageStore(user_id=context.user_id or "default")
255
+
256
+ await store.store_session_messages(
257
+ session_id=context.session_id,
258
+ messages=[user_message, assistant_message],
259
+ user_id=context.user_id,
260
+ compress=True,
261
+ )
262
+
263
+ logger.info(f"Saved conversation to session {context.session_id}")
264
+
265
+ return ChatCompletionResponse(
266
+ id=request_id,
267
+ created=int(time.time()),
268
+ model=body.model, # Echo back the requested model
269
+ choices=[
270
+ ChatCompletionChoice(
271
+ index=0,
272
+ message=ChatMessage(role="assistant", content=content),
273
+ finish_reason="stop",
274
+ )
275
+ ],
276
+ usage=ChatCompletionUsage(
277
+ prompt_tokens=prompt_tokens,
278
+ completion_tokens=completion_tokens,
279
+ total_tokens=prompt_tokens + completion_tokens,
280
+ ),
281
+ )
@@ -0,0 +1,76 @@
1
+ """
2
+ JSON extraction utilities for response_format='json_object' mode.
3
+
4
+ Design Pattern:
5
+ - Best-effort JSON extraction from agent output
6
+ - Handles fenced code blocks (```json ... ```)
7
+ - Handles raw JSON objects
8
+ - Graceful fallback to string if extraction fails
9
+ """
10
+
11
+ import json
12
+ import re
13
+
14
+
15
+ def extract_json_resilient(output: str | dict | list) -> str:
16
+ """
17
+ Extract JSON from agent output with multiple fallback strategies.
18
+
19
+ Strategies (in order):
20
+ 1. If already dict/list, serialize directly
21
+ 2. Extract from fenced JSON code blocks (```json ... ```)
22
+ 3. Find JSON object/array in text ({...} or [...])
23
+ 4. Return as-is if all strategies fail
24
+
25
+ Args:
26
+ output: Agent output (str, dict, or list)
27
+
28
+ Returns:
29
+ JSON string (best-effort)
30
+
31
+ Examples:
32
+ >>> extract_json_resilient({"answer": "test"})
33
+ '{"answer": "test"}'
34
+
35
+ >>> extract_json_resilient('Here is the result:\\n```json\\n{"answer": "test"}\\n```')
36
+ '{"answer": "test"}'
37
+
38
+ >>> extract_json_resilient('The answer is {"answer": "test"} as shown above.')
39
+ '{"answer": "test"}'
40
+ """
41
+ # Strategy 1: Already structured
42
+ if isinstance(output, (dict, list)):
43
+ return json.dumps(output)
44
+
45
+ text = str(output)
46
+
47
+ # Strategy 2: Extract from fenced code blocks
48
+ fenced_match = re.search(r"```json\s*\n(.*?)\n```", text, re.DOTALL)
49
+ if fenced_match:
50
+ try:
51
+ json_str = fenced_match.group(1).strip()
52
+ # Validate it's valid JSON
53
+ json.loads(json_str)
54
+ return json_str
55
+ except json.JSONDecodeError:
56
+ pass
57
+
58
+ # Strategy 3: Find JSON object or array
59
+ # Look for {...} or [...]
60
+ for pattern in [
61
+ r"\{[^{}]*\}", # Simple object
62
+ r"\{.*\}", # Nested object
63
+ r"\[.*\]", # Array
64
+ ]:
65
+ match = re.search(pattern, text, re.DOTALL)
66
+ if match:
67
+ try:
68
+ json_str = match.group(0)
69
+ # Validate it's valid JSON
70
+ json.loads(json_str)
71
+ return json_str
72
+ except json.JSONDecodeError:
73
+ continue
74
+
75
+ # Strategy 4: Fallback to string
76
+ return text