PyPI - remdb - Versions diffs - 0.3.7__py3-none-any.whl - Mend

remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

rem/__init__.py +2 -0
rem/agentic/README.md +650 -0
rem/agentic/__init__.py +39 -0
rem/agentic/agents/README.md +155 -0
rem/agentic/agents/__init__.py +8 -0
rem/agentic/context.py +148 -0
rem/agentic/context_builder.py +329 -0
rem/agentic/mcp/__init__.py +0 -0
rem/agentic/mcp/tool_wrapper.py +107 -0
rem/agentic/otel/__init__.py +5 -0
rem/agentic/otel/setup.py +151 -0
rem/agentic/providers/phoenix.py +674 -0
rem/agentic/providers/pydantic_ai.py +572 -0
rem/agentic/query.py +117 -0
rem/agentic/query_helper.py +89 -0
rem/agentic/schema.py +396 -0
rem/agentic/serialization.py +245 -0
rem/agentic/tools/__init__.py +5 -0
rem/agentic/tools/rem_tools.py +231 -0
rem/api/README.md +420 -0
rem/api/main.py +324 -0
rem/api/mcp_router/prompts.py +182 -0
rem/api/mcp_router/resources.py +536 -0
rem/api/mcp_router/server.py +213 -0
rem/api/mcp_router/tools.py +584 -0
rem/api/routers/auth.py +229 -0
rem/api/routers/chat/__init__.py +5 -0
rem/api/routers/chat/completions.py +281 -0
rem/api/routers/chat/json_utils.py +76 -0
rem/api/routers/chat/models.py +124 -0
rem/api/routers/chat/streaming.py +185 -0
rem/auth/README.md +258 -0
rem/auth/__init__.py +26 -0
rem/auth/middleware.py +100 -0
rem/auth/providers/__init__.py +13 -0
rem/auth/providers/base.py +376 -0
rem/auth/providers/google.py +163 -0
rem/auth/providers/microsoft.py +237 -0
rem/cli/README.md +455 -0
rem/cli/__init__.py +8 -0
rem/cli/commands/README.md +126 -0
rem/cli/commands/__init__.py +3 -0
rem/cli/commands/ask.py +566 -0
rem/cli/commands/configure.py +497 -0
rem/cli/commands/db.py +493 -0
rem/cli/commands/dreaming.py +324 -0
rem/cli/commands/experiments.py +1302 -0
rem/cli/commands/mcp.py +66 -0
rem/cli/commands/process.py +245 -0
rem/cli/commands/schema.py +183 -0
rem/cli/commands/serve.py +106 -0
rem/cli/dreaming.py +363 -0
rem/cli/main.py +96 -0
rem/config.py +237 -0
rem/mcp_server.py +41 -0
rem/models/core/__init__.py +49 -0
rem/models/core/core_model.py +64 -0
rem/models/core/engram.py +333 -0
rem/models/core/experiment.py +628 -0
rem/models/core/inline_edge.py +132 -0
rem/models/core/rem_query.py +243 -0
rem/models/entities/__init__.py +43 -0
rem/models/entities/file.py +57 -0
rem/models/entities/image_resource.py +88 -0
rem/models/entities/message.py +35 -0
rem/models/entities/moment.py +123 -0
rem/models/entities/ontology.py +191 -0
rem/models/entities/ontology_config.py +131 -0
rem/models/entities/resource.py +95 -0
rem/models/entities/schema.py +87 -0
rem/models/entities/user.py +85 -0
rem/py.typed +0 -0
rem/schemas/README.md +507 -0
rem/schemas/__init__.py +6 -0
rem/schemas/agents/README.md +92 -0
rem/schemas/agents/core/moment-builder.yaml +178 -0
rem/schemas/agents/core/rem-query-agent.yaml +226 -0
rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
rem/schemas/agents/core/simple-assistant.yaml +19 -0
rem/schemas/agents/core/user-profile-builder.yaml +163 -0
rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
rem/schemas/agents/examples/contract-extractor.yaml +134 -0
rem/schemas/agents/examples/cv-parser.yaml +263 -0
rem/schemas/agents/examples/hello-world.yaml +37 -0
rem/schemas/agents/examples/query.yaml +54 -0
rem/schemas/agents/examples/simple.yaml +21 -0
rem/schemas/agents/examples/test.yaml +29 -0
rem/schemas/agents/rem.yaml +128 -0
rem/schemas/evaluators/hello-world/default.yaml +77 -0
rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
rem/services/__init__.py +16 -0
rem/services/audio/INTEGRATION.md +308 -0
rem/services/audio/README.md +376 -0
rem/services/audio/__init__.py +15 -0
rem/services/audio/chunker.py +354 -0
rem/services/audio/transcriber.py +259 -0
rem/services/content/README.md +1269 -0
rem/services/content/__init__.py +5 -0
rem/services/content/providers.py +801 -0
rem/services/content/service.py +676 -0
rem/services/dreaming/README.md +230 -0
rem/services/dreaming/__init__.py +53 -0
rem/services/dreaming/affinity_service.py +336 -0
rem/services/dreaming/moment_service.py +264 -0
rem/services/dreaming/ontology_service.py +54 -0
rem/services/dreaming/user_model_service.py +297 -0
rem/services/dreaming/utils.py +39 -0
rem/services/embeddings/__init__.py +11 -0
rem/services/embeddings/api.py +120 -0
rem/services/embeddings/worker.py +421 -0
rem/services/fs/README.md +662 -0
rem/services/fs/__init__.py +62 -0
rem/services/fs/examples.py +206 -0
rem/services/fs/examples_paths.py +204 -0
rem/services/fs/git_provider.py +935 -0
rem/services/fs/local_provider.py +760 -0
rem/services/fs/parsing-hooks-examples.md +172 -0
rem/services/fs/paths.py +276 -0
rem/services/fs/provider.py +460 -0
rem/services/fs/s3_provider.py +1042 -0
rem/services/fs/service.py +186 -0
rem/services/git/README.md +1075 -0
rem/services/git/__init__.py +17 -0
rem/services/git/service.py +469 -0
rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
rem/services/phoenix/README.md +453 -0
rem/services/phoenix/__init__.py +46 -0
rem/services/phoenix/client.py +686 -0
rem/services/phoenix/config.py +88 -0
rem/services/phoenix/prompt_labels.py +477 -0
rem/services/postgres/README.md +575 -0
rem/services/postgres/__init__.py +23 -0
rem/services/postgres/migration_service.py +427 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
rem/services/postgres/register_type.py +352 -0
rem/services/postgres/repository.py +337 -0
rem/services/postgres/schema_generator.py +379 -0
rem/services/postgres/service.py +802 -0
rem/services/postgres/sql_builder.py +354 -0
rem/services/rem/README.md +304 -0
rem/services/rem/__init__.py +23 -0
rem/services/rem/exceptions.py +71 -0
rem/services/rem/executor.py +293 -0
rem/services/rem/parser.py +145 -0
rem/services/rem/queries.py +196 -0
rem/services/rem/query.py +371 -0
rem/services/rem/service.py +527 -0
rem/services/session/README.md +374 -0
rem/services/session/__init__.py +6 -0
rem/services/session/compression.py +360 -0
rem/services/session/reload.py +77 -0
rem/settings.py +1235 -0
rem/sql/002_install_models.sql +1068 -0
rem/sql/background_indexes.sql +42 -0
rem/sql/install_models.sql +1038 -0
rem/sql/migrations/001_install.sql +503 -0
rem/sql/migrations/002_install_models.sql +1202 -0
rem/utils/AGENTIC_CHUNKING.md +597 -0
rem/utils/README.md +583 -0
rem/utils/__init__.py +43 -0
rem/utils/agentic_chunking.py +622 -0
rem/utils/batch_ops.py +343 -0
rem/utils/chunking.py +108 -0
rem/utils/clip_embeddings.py +276 -0
rem/utils/dict_utils.py +98 -0
rem/utils/embeddings.py +423 -0
rem/utils/examples/embeddings_example.py +305 -0
rem/utils/examples/sql_types_example.py +202 -0
rem/utils/markdown.py +16 -0
rem/utils/model_helpers.py +236 -0
rem/utils/schema_loader.py +336 -0
rem/utils/sql_types.py +348 -0
rem/utils/user_id.py +81 -0
rem/utils/vision.py +330 -0
rem/workers/README.md +506 -0
rem/workers/__init__.py +5 -0
rem/workers/dreaming.py +502 -0
rem/workers/engram_processor.py +312 -0
rem/workers/sqs_file_processor.py +193 -0
remdb-0.3.7.dist-info/METADATA +1473 -0
remdb-0.3.7.dist-info/RECORD +187 -0
remdb-0.3.7.dist-info/WHEEL +4 -0
remdb-0.3.7.dist-info/entry_points.txt +2 -0

rem/api/routers/auth.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""
+OAuth 2.1 Authentication Router.
+Leverages Authlib for standards-compliant OAuth/OIDC implementation.
+Minimal custom code - Authlib handles PKCE, token validation, JWKS.
+Endpoints:
+- GET  /api/auth/{provider}/login    - Initiate OAuth flow
+- GET  /api/auth/{provider}/callback - OAuth callback
+- POST /api/auth/logout              - Clear session
+- GET  /api/auth/me                  - Current user info
+Supported providers:
+- google: Google OAuth 2.0 / OIDC
+- microsoft: Microsoft Entra ID OIDC
+Design Pattern (OAuth 2.1 + PKCE):
+1. User clicks "Login with Google"
+2. /login generates state + PKCE code_verifier
+3. Store code_verifier in session
+4. Redirect to provider with code_challenge
+5. User authenticates and grants consent
+6. Provider redirects to /callback with code
+7. Exchange code + code_verifier for tokens
+8. Validate ID token signature with JWKS
+9. Store user info in session
+10. Redirect to application
+Dependencies:
+    pip install authlib httpx
+Environment variables:
+    AUTH__ENABLED=true
+    AUTH__SESSION_SECRET=<random-secret>
+    AUTH__GOOGLE__CLIENT_ID=<google-client-id>
+    AUTH__GOOGLE__CLIENT_SECRET=<google-client-secret>
+    AUTH__MICROSOFT__CLIENT_ID=<microsoft-client-id>
+    AUTH__MICROSOFT__CLIENT_SECRET=<microsoft-client-secret>
+    AUTH__MICROSOFT__TENANT=common
+References:
+- Authlib: https://docs.authlib.org/en/latest/
+- OAuth 2.1: https://datatracker.ietf.org/doc/html/draft-ietf-oauth-v2-1-11
+"""
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import RedirectResponse
+from authlib.integrations.starlette_client import OAuth
+from loguru import logger
+from ...settings import settings
+router = APIRouter(prefix="/api/auth", tags=["auth"])
+# Initialize Authlib OAuth client
+# Authlib handles PKCE, state, nonce, token validation automatically
+oauth = OAuth()
+# Register Google provider
+if settings.auth.google.client_id:
+    oauth.register(
+        name="google",
+        client_id=settings.auth.google.client_id,
+        client_secret=settings.auth.google.client_secret,
+        server_metadata_url="https://accounts.google.com/.well-known/openid-configuration",
+        client_kwargs={
+            "scope": "openid email profile",
+            # Authlib automatically adds PKCE to authorization request
+        },
+    )
+    logger.info("Google OAuth provider registered")
+# Register Microsoft provider
+if settings.auth.microsoft.client_id:
+    tenant = settings.auth.microsoft.tenant
+    oauth.register(
+        name="microsoft",
+        client_id=settings.auth.microsoft.client_id,
+        client_secret=settings.auth.microsoft.client_secret,
+        server_metadata_url=f"https://login.microsoftonline.com/{tenant}/v2.0/.well-known/openid-configuration",
+        client_kwargs={
+            "scope": "openid email profile User.Read",
+        },
+    )
+    logger.info(f"Microsoft OAuth provider registered (tenant: {tenant})")
+@router.get("/{provider}/login")
+async def login(provider: str, request: Request):
+    """
+    Initiate OAuth flow with provider.
+    Authlib automatically:
+    - Generates state for CSRF protection
+    - Generates PKCE code_verifier and code_challenge
+    - Stores state and code_verifier in session
+    - Redirects to provider's authorization endpoint
+    Args:
+        provider: OAuth provider (google, microsoft)
+        request: FastAPI request (for session access)
+    Returns:
+        Redirect to provider's authorization page
+    """
+    if not settings.auth.enabled:
+        raise HTTPException(status_code=501, detail="Authentication is disabled")
+    # Get OAuth client for provider
+    client = oauth.create_client(provider)
+    if not client:
+        raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
+    # Get redirect URI from settings
+    if provider == "google":
+        redirect_uri = settings.auth.google.redirect_uri
+    elif provider == "microsoft":
+        redirect_uri = settings.auth.microsoft.redirect_uri
+    else:
+        raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
+    # Authlib authorize_redirect() automatically:
+    # - Generates state parameter
+    # - Generates PKCE code_verifier and code_challenge
+    # - Stores state and code_verifier in session
+    # - Builds authorization URL with all required parameters
+    return await client.authorize_redirect(request, redirect_uri)
+@router.get("/{provider}/callback")
+async def callback(provider: str, request: Request):
+    """
+    OAuth callback endpoint.
+    Authlib automatically:
+    - Validates state parameter (CSRF protection)
+    - Exchanges code for tokens with PKCE code_verifier
+    - Validates ID token signature with JWKS
+    - Verifies ID token claims (iss, aud, exp, nonce)
+    Args:
+        provider: OAuth provider (google, microsoft)
+        request: FastAPI request (for session and query params)
+    Returns:
+        Redirect to application home page
+    """
+    if not settings.auth.enabled:
+        raise HTTPException(status_code=501, detail="Authentication is disabled")
+    # Get OAuth client for provider
+    client = oauth.create_client(provider)
+    if not client:
+        raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
+    try:
+        # Authlib authorize_access_token() automatically:
+        # - Validates state from session (CSRF)
+        # - Retrieves code_verifier from session
+        # - Exchanges authorization code for tokens
+        # - Validates ID token signature with JWKS
+        # - Verifies ID token claims
+        token = await client.authorize_access_token(request)
+        # Parse user info from ID token or call userinfo endpoint
+        # Authlib parses ID token claims automatically
+        user_info = token.get("userinfo")
+        if not user_info:
+            # Fetch from userinfo endpoint if not in ID token
+            user_info = await client.userinfo(token=token)
+        # Store user info in session
+        request.session["user"] = {
+            "provider": provider,
+            "sub": user_info.get("sub"),
+            "email": user_info.get("email"),
+            "name": user_info.get("name"),
+            "picture": user_info.get("picture"),
+        }
+        # Store tokens in session for API access
+        request.session["tokens"] = {
+            "access_token": token.get("access_token"),
+            "refresh_token": token.get("refresh_token"),
+            "expires_at": token.get("expires_at"),
+        }
+        logger.info(f"User authenticated: {user_info.get('email')} via {provider}")
+        # Redirect to application
+        # TODO: Support custom redirect URL from state parameter
+        return RedirectResponse(url="/")
+    except Exception as e:
+        logger.error(f"OAuth callback error: {e}")
+        raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}")
+@router.post("/logout")
+async def logout(request: Request):
+    """
+    Clear user session.
+    Args:
+        request: FastAPI request
+    Returns:
+        Success message
+    """
+    request.session.clear()
+    return {"message": "Logged out successfully"}
+@router.get("/me")
+async def me(request: Request):
+    """
+    Get current user information from session.
+    Args:
+        request: FastAPI request
+    Returns:
+        User information or 401 if not authenticated
+    """
+    user = request.session.get("user")
+    if not user:
+        raise HTTPException(status_code=401, detail="Not authenticated")
+    return user

rem/api/routers/chat/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Chat completions router with OpenAI-compatible API."""
+from .completions import router
+__all__ = ["router"]

rem/api/routers/chat/completions.py ADDED Viewed

@@ -0,0 +1,281 @@
+"""
+OpenAI-compatible chat completions router for REM.
+Design Pattern:
+- Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Session-Id, X-Agent-Schema)
+- ContextBuilder centralizes message construction with user profile + session history
+- Body.model is the LLM model for Pydantic AI
+- X-Agent-Schema header specifies which agent schema to use (defaults to 'rem')
+- Support for streaming (SSE) and non-streaming modes
+- Response format control (text vs json_object)
+Context Building Flow:
+1. ContextBuilder.build_from_headers() extracts user_id, session_id from headers
+2. Session history ALWAYS loaded with compression (if session_id provided)
+   - Uses SessionMessageStore with compression to keep context efficient
+   - Long messages include REM LOOKUP hints: "... [REM LOOKUP session-{id}-msg-{index}] ..."
+   - Agent can retrieve full content on-demand using REM LOOKUP
+3. User profile provided as REM LOOKUP hint (on-demand by default)
+   - Agent receives: "User ID: {user_id}. To load user profile: Use REM LOOKUP users/{user_id}"
+   - Agent decides whether to load profile based on query
+4. If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
+5. Combines: system context + compressed session history + new messages
+6. Agent receives complete message list ready for execution
+Headers Mapping
+    X-User-Id        → AgentContext.user_id
+    X-Tenant-Id      → AgentContext.tenant_id
+    X-Session-Id     → AgentContext.session_id
+    X-Model-Name     → AgentContext.default_model (overrides body.model)
+    X-Agent-Schema   → AgentContext.agent_schema_uri (defaults to 'rem')
+Default Agent:
+    If X-Agent-Schema header is not provided, the system loads 'rem' schema,
+    which is the REM expert assistant with comprehensive knowledge about:
+    - REM architecture and concepts
+    - Entity types and graph traversal
+    - REM queries (LOOKUP, FUZZY, TRAVERSE)
+    - Agent development with Pydantic AI
+    - Cloud infrastructure (EKS, Karpenter, CloudNativePG)
+Example Request:
+    POST /api/v1/chat/completions
+    X-Tenant-Id: acme-corp
+    X-User-Id: user123
+    X-Agent-Schema: rem  # Optional, this is the default
+    {
+      "model": "openai:gpt-4o-mini",
+      "messages": [
+        {"role": "user", "content": "How do I create a new REM entity?"}
+      ],
+      "stream": true
+    }
+"""
+import base64
+import tempfile
+import time
+import uuid
+from datetime import datetime
+from pathlib import Path
+from fastapi import APIRouter, Request
+from fastapi.responses import StreamingResponse
+from loguru import logger
+from ....agentic.context import AgentContext
+from ....agentic.context_builder import ContextBuilder
+from ....agentic.providers.pydantic_ai import create_agent
+from ....services.audio.transcriber import AudioTranscriber
+from ....services.session import SessionMessageStore, reload_session
+from ....settings import settings
+from ....utils.schema_loader import load_agent_schema
+from .json_utils import extract_json_resilient
+from .models import (
+    ChatCompletionChoice,
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    ChatCompletionUsage,
+    ChatMessage,
+)
+from .streaming import stream_openai_response
+router = APIRouter(prefix="/v1", tags=["chat"])
+# Default agent schema file
+DEFAULT_AGENT_SCHEMA = "rem"
+@router.post("/chat/completions", response_model=None)
+async def chat_completions(body: ChatCompletionRequest, request: Request):
+    """
+    OpenAI-compatible chat completions with REM agent support.
+    The 'model' field in the request body is the LLM model used by Pydantic AI.
+    The X-Agent-Schema header specifies which agent schema to use (defaults to 'rem').
+    Supported Headers:
+    | Header              | Description                          | Maps To                        | Default       |
+    |---------------------|--------------------------------------|--------------------------------|---------------|
+    | X-User-Id           | User identifier                      | AgentContext.user_id           | None          |
+    | X-Tenant-Id         | Tenant identifier (multi-tenancy)    | AgentContext.tenant_id         | "default"     |
+    | X-Session-Id        | Session/conversation identifier      | AgentContext.session_id        | None          |
+    | X-Agent-Schema      | Agent schema name                    | AgentContext.agent_schema_uri  | "rem"         |
+    Example Models:
+    - anthropic:claude-sonnet-4-5-20250929 (Claude 4.5 Sonnet)
+    - anthropic:claude-3-7-sonnet-20250219 (Claude 3.7 Sonnet)
+    - anthropic:claude-3-5-haiku-20241022 (Claude 3.5 Haiku)
+    - openai:gpt-4.1-turbo
+    - openai:gpt-4o
+    - openai:gpt-4o-mini
+    Response Formats:
+    - text (default): Plain text response
+    - json_object: Best-effort JSON extraction from agent output
+    Default Agent (rem):
+    - Expert assistant for REM system
+    - Comprehensive knowledge of REM architecture, concepts, and implementation
+    - Structured output with answer, confidence, and references
+    Session Management:
+    - Session history ALWAYS loaded with compression when X-Session-Id provided
+    - Uses SessionMessageStore with REM LOOKUP hints for long messages
+    - User profile provided as REM LOOKUP hint (on-demand by default)
+    - If CHAT__AUTO_INJECT_USER_CONTEXT=true: User profile auto-loaded and injected
+    - New messages saved to database with compression for session continuity
+    - When Postgres is disabled, session management is skipped
+    """
+    # Load agent schema: use header value from context or default
+    # Extract AgentContext first to get schema name
+    temp_context = AgentContext.from_headers(dict(request.headers))
+    schema_name = temp_context.agent_schema_uri or DEFAULT_AGENT_SCHEMA
+    # Load schema using centralized utility
+    try:
+        agent_schema = load_agent_schema(schema_name)
+    except FileNotFoundError:
+        # Fallback to default if specified schema not found
+        logger.warning(f"Schema '{schema_name}' not found, falling back to '{DEFAULT_AGENT_SCHEMA}'")
+        schema_name = DEFAULT_AGENT_SCHEMA
+        try:
+            agent_schema = load_agent_schema(schema_name)
+        except FileNotFoundError:
+            # No schema available at all
+            from fastapi import HTTPException
+            raise HTTPException(
+                status_code=500,
+                detail=f"Agent schema '{schema_name}' not found and default schema unavailable",
+            )
+    logger.info(f"Using agent schema: {schema_name}, model: {body.model}")
+    # Check for audio input
+    is_audio = request.headers.get("x-chat-is-audio", "").lower() == "true"
+    # Process messages (transcribe audio if needed)
+    new_messages = [msg.model_dump() for msg in body.messages]
+    if is_audio and new_messages and new_messages[0]["role"] == "user":
+        # First user message should be base64-encoded audio
+        try:
+            audio_b64 = new_messages[0]["content"]
+            audio_bytes = base64.b64decode(audio_b64)
+            # Write to temp file for transcription
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                tmp_file.write(audio_bytes)
+                tmp_path = tmp_file.name
+            # Transcribe audio
+            transcriber = AudioTranscriber()
+            result = transcriber.transcribe_file(tmp_path)
+            # Replace audio content with transcribed text
+            new_messages[0]["content"] = result.text
+            logger.info(f"Transcribed audio: {len(result.text)} characters")
+            # Clean up temp file
+            Path(tmp_path).unlink()
+        except Exception as e:
+            logger.error(f"Failed to transcribe audio: {e}")
+            # Fall through with original content (will likely fail at agent)
+    # Use ContextBuilder to construct complete message list with:
+    # 1. System context hint (date + user profile)
+    # 2. Session history (if session_id provided)
+    # 3. New messages from request body (transcribed if audio)
+    context, messages = await ContextBuilder.build_from_headers(
+        headers=dict(request.headers),
+        new_messages=new_messages,
+    )
+    logger.info(f"Built context with {len(messages)} total messages (includes history + user context)")
+    # Create agent with schema and model override
+    agent = await create_agent(
+        context=context,
+        agent_schema_override=agent_schema,
+        model_override=body.model,  # type: ignore[arg-type]
+    )
+    # Combine all messages into single prompt for agent
+    # ContextBuilder already assembled: system context + history + new messages
+    prompt = "\n".join(msg.content for msg in messages)
+    # Generate OpenAI-compatible request ID
+    request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
+    # Streaming mode
+    if body.stream:
+        return StreamingResponse(
+            stream_openai_response(agent, prompt, body.model, request_id),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+    # Non-streaming mode
+    result = await agent.run(prompt)
+    # Determine content format based on response_format request
+    if body.response_format and body.response_format.type == "json_object":
+        # JSON mode: Best-effort extraction of JSON from agent output
+        content = extract_json_resilient(result.output)  # type: ignore[attr-defined]
+    else:
+        # Text mode: Return as string (handle structured output)
+        from rem.agentic.serialization import serialize_agent_result_json
+        content = serialize_agent_result_json(result.output)  # type: ignore[attr-defined]
+    # Get usage from result if available
+    usage = result.usage() if hasattr(result, "usage") else None
+    prompt_tokens = usage.input_tokens if usage else 0
+    completion_tokens = usage.output_tokens if usage else 0
+    # Save conversation messages to database (if session_id and postgres enabled)
+    if settings.postgres.enabled and context.session_id:
+        # Extract just the new user message (last message from body)
+        user_message = {
+            "role": "user",
+            "content": body.messages[-1].content if body.messages else "",
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+        assistant_message = {
+            "role": "assistant",
+            "content": content,
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+        # Store messages with compression
+        store = SessionMessageStore(user_id=context.user_id or "default")
+        await store.store_session_messages(
+            session_id=context.session_id,
+            messages=[user_message, assistant_message],
+            user_id=context.user_id,
+            compress=True,
+        )
+        logger.info(f"Saved conversation to session {context.session_id}")
+    return ChatCompletionResponse(
+        id=request_id,
+        created=int(time.time()),
+        model=body.model,  # Echo back the requested model
+        choices=[
+            ChatCompletionChoice(
+                index=0,
+                message=ChatMessage(role="assistant", content=content),
+                finish_reason="stop",
+            )
+        ],
+        usage=ChatCompletionUsage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        ),
+    )

rem/api/routers/chat/json_utils.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""
+JSON extraction utilities for response_format='json_object' mode.
+Design Pattern:
+- Best-effort JSON extraction from agent output
+- Handles fenced code blocks (```json ... ```)
+- Handles raw JSON objects
+- Graceful fallback to string if extraction fails
+"""
+import json
+import re
+def extract_json_resilient(output: str | dict | list) -> str:
+    """
+    Extract JSON from agent output with multiple fallback strategies.
+    Strategies (in order):
+    1. If already dict/list, serialize directly
+    2. Extract from fenced JSON code blocks (```json ... ```)
+    3. Find JSON object/array in text ({...} or [...])
+    4. Return as-is if all strategies fail
+    Args:
+        output: Agent output (str, dict, or list)
+    Returns:
+        JSON string (best-effort)
+    Examples:
+        >>> extract_json_resilient({"answer": "test"})
+        '{"answer": "test"}'
+        >>> extract_json_resilient('Here is the result:\\n```json\\n{"answer": "test"}\\n```')
+        '{"answer": "test"}'
+        >>> extract_json_resilient('The answer is {"answer": "test"} as shown above.')
+        '{"answer": "test"}'
+    """
+    # Strategy 1: Already structured
+    if isinstance(output, (dict, list)):
+        return json.dumps(output)
+    text = str(output)
+    # Strategy 2: Extract from fenced code blocks
+    fenced_match = re.search(r"```json\s*\n(.*?)\n```", text, re.DOTALL)
+    if fenced_match:
+        try:
+            json_str = fenced_match.group(1).strip()
+            # Validate it's valid JSON
+            json.loads(json_str)
+            return json_str
+        except json.JSONDecodeError:
+            pass
+    # Strategy 3: Find JSON object or array
+    # Look for {...} or [...]
+    for pattern in [
+        r"\{[^{}]*\}",  # Simple object
+        r"\{.*\}",  # Nested object
+        r"\[.*\]",  # Array
+    ]:
+        match = re.search(pattern, text, re.DOTALL)
+        if match:
+            try:
+                json_str = match.group(0)
+                # Validate it's valid JSON
+                json.loads(json_str)
+                return json_str
+            except json.JSONDecodeError:
+                continue
+    # Strategy 4: Fallback to string
+    return text