PyPI - graphserve - Versions diffs - 0.1.0__py3-none-any.whl - Mend

graphserve 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

graphserve/__init__.py +5 -0
graphserve/_ids.py +51 -0
graphserve/errors.py +5 -0
graphserve/py.typed +0 -0
graphserve/registry.py +43 -0
graphserve/router.py +58 -0
graphserve/routes/__init__.py +0 -0
graphserve/routes/chat.py +156 -0
graphserve/routes/responses.py +207 -0
graphserve/translate.py +1055 -0
graphserve-0.1.0.dist-info/METADATA +119 -0
graphserve-0.1.0.dist-info/RECORD +14 -0
graphserve-0.1.0.dist-info/WHEEL +4 -0
graphserve-0.1.0.dist-info/licenses/LICENSE +176 -0

graphserve/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""GraphServe — serve LangGraph graphs over the OpenAI APIs."""
+from graphserve.registry import GraphRegistry, GraphConfig
+from graphserve.router import create_openai_router
+__all__ = ["GraphRegistry", "GraphConfig", "create_openai_router"]

graphserve/_ids.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Conversation/response id helpers.
+Two id shapes cross the wire:
+* ``conv_<hex>`` — a conversation id (the LangGraph ``thread_id``). Minted by the
+  consumer's Conversations API and passed back via the ``conversation`` field.
+* ``resp_<model>.<hex>`` — a response id. GraphServe is storeless, so the model
+  is encoded in the id: ``GET/DELETE /responses/{id}`` parse it back to resolve
+  which registered graph owns the thread. The ``<hex>`` is the same thread uuid.
+"""
+from __future__ import annotations
+import uuid
+CONV_PREFIX = "conv_"
+RESP_PREFIX = "resp_"
+def format_conv_id(conv_uuid: uuid.UUID) -> str:
+    return f"{CONV_PREFIX}{conv_uuid.hex}"
+def parse_conv_id(value: str) -> uuid.UUID:
+    raw = value.removeprefix(CONV_PREFIX)
+    return uuid.UUID(raw)
+def format_resp_id(model: str, conv_uuid: uuid.UUID) -> str:
+    return f"{RESP_PREFIX}{model}.{conv_uuid.hex}"
+def parse_resp_id(value: str) -> tuple[str, uuid.UUID]:
+    """Split ``resp_<model>.<hex>`` into ``(model, thread_uuid)``.
+    The 32-char hex never contains a dot, so the *last* dot separates model from
+    thread — model names may themselves contain dots/hyphens (``gpt-4.1-mini``).
+    Raises ``ValueError`` on a malformed id.
+    """
+    raw = value.removeprefix(RESP_PREFIX)
+    model, hex_ = raw.rsplit(".", 1)  # ponytail: model w/ trailing ".<32hex>" tail can't occur (registry-controlled)
+    return model, uuid.UUID(hex_)
+def thread_uuid_from_anchor(value: str) -> uuid.UUID:
+    """Extract the thread uuid from a ``resp_`` id, ``conv_`` id, or bare uuid.
+    The ``conversation`` / ``previous_response_id`` anchor may be either a
+    conversation id or a prior response id; both point at the same thread.
+    """
+    if value.startswith(RESP_PREFIX):
+        return parse_resp_id(value)[1]
+    return parse_conv_id(value)

graphserve/errors.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""OpenAI-shaped error envelope. The mounting app maps exceptions to HTTP."""
+from __future__ import annotations
+def openai_error_body(message: str, *, type: str, code: str | None = None) -> dict:
+    return {"error": {"message": message, "type": type, "code": code}}

graphserve/py.typed ADDED Viewed

File without changes

graphserve/registry.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Declarative graph registration. No auto-discovery, no inheritance."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+class UnknownModelError(KeyError):
+    pass
+@dataclass
+class GraphConfig:
+    """An already-compiled graph to serve.
+    ``graph`` must be an already-compiled graph — GraphServe never constructs
+    it. Build/compile the graph in your application and pass it in. Per-request
+    concerns (runtime context, callbacks, output extraction) are handled
+    generically by GraphServe, not per graph.
+    """
+    graph: Any
+    streamable_node_names: list[str] | None = None
+class GraphRegistry:
+    def __init__(self) -> None:
+        self._configs: dict[str, GraphConfig] = {}
+    def register(self, model_name: str, config: GraphConfig) -> None:
+        if model_name in self._configs:
+            raise ValueError(f"Model {model_name!r} already registered")
+        self._configs[model_name] = config
+    def resolve(self, model_name: str) -> GraphConfig:
+        try:
+            return self._configs[model_name]
+        except KeyError:
+            raise UnknownModelError(
+                f"Unknown model {model_name!r}. Available: {self.list_models()}"
+            ) from None
+    def list_models(self) -> list[str]:
+        return list(self._configs)

graphserve/router.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""Public builder: mount OpenAI-compatible routes on a consumer's FastAPI app."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter
+from langgraph.checkpoint.memory import InMemorySaver
+from graphserve.registry import GraphRegistry
+from graphserve.routes.chat import build_chat_router
+from graphserve.routes.responses import build_responses_router
+logger = logging.getLogger(__name__)
+def create_openai_router(registry: GraphRegistry) -> APIRouter:
+    """Build an APIRouter with OpenAI-compatible /models, /responses, /chat routes.
+    GraphServe is a pure OpenAI↔LangGraph converter: it holds no conversation
+    store and persists nothing itself. All stateful Responses logic runs against
+    the registered graph's LangGraph checkpointer, keyed by ``thread_id``.
+    Cross-cutting concerns are the consumer's responsibility, applied with
+    standard FastAPI tools:
+    - **Auth**: pass ``dependencies=[Depends(...)]`` to ``app.include_router``.
+    - **Callbacks / tracing**: attach to the graph when you construct it.
+    Each registered graph MUST be compiled with a checkpointer so stateful
+    GET / ``previous_response_id`` continuity works. If a graph was compiled
+    without one, an ``InMemorySaver`` is injected here (state is lost on restart)
+    and a warning is logged.
+    """
+    for model in registry.list_models():
+        graph = registry.resolve(model).graph
+        # Real compiled graphs expose ``checkpointer`` (None if compiled without
+        # one). Plain test doubles lack the attribute — leave those untouched.
+        if getattr(graph, "checkpointer", "missing") is None:
+            graph.checkpointer = InMemorySaver()
+            logger.warning(
+                "Graph %r had no checkpointer; injected InMemorySaver "
+                "(conversation state is lost on restart).",
+                model,
+            )
+    router = APIRouter()
+    @router.get("/models")
+    async def list_models() -> dict:
+        return {
+            "object": "list",
+            "data": [{"id": m, "object": "model"} for m in registry.list_models()],
+        }
+    router.include_router(build_responses_router(registry))
+    router.include_router(build_chat_router(registry))
+    return router

graphserve/routes/__init__.py ADDED Viewed

File without changes

graphserve/routes/chat.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""OpenAI Chat Completions API route handler."""
+from __future__ import annotations
+import json
+import time
+from typing import Any
+from uuid import uuid4
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from langchain_core.messages import AIMessage
+from langchain_core.messages.utils import convert_to_messages
+from pydantic import BaseModel
+from graphserve.errors import openai_error_body
+from graphserve.registry import GraphRegistry, UnknownModelError
+from graphserve.translate import chat_completion_chunks, request_to_context, result_to_text
+def _openai_tool_calls(message: AIMessage | None) -> list[dict]:
+    """Map a LangChain AIMessage's tool_calls to OpenAI chat tool_calls dicts.
+    OpenAI requires ``function.arguments`` as a JSON string and a tool-call
+    ``id``; LangChain stores args as a dict and the id may be absent.
+    """
+    if message is None:
+        return []
+    calls = []
+    for tc in getattr(message, "tool_calls", None) or []:
+        calls.append({
+            "id": tc.get("id") or f"call_{uuid4().hex[:24]}",
+            "type": "function",
+            "function": {
+                "name": tc.get("name", ""),
+                "arguments": json.dumps(tc.get("args") or {}),
+            },
+        })
+    return calls
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: list[dict]
+    stream: bool = False
+    user: str | None = None
+    metadata: dict[str, Any] | None = None
+def build_chat_router(
+    registry: GraphRegistry,
+) -> APIRouter:
+    """Build the /chat/completions sub-router (private — called by create_openai_router)."""
+    router = APIRouter()
+    @router.post("/chat/completions")
+    async def create_chat_completion(request: ChatCompletionRequest):
+        # 1. Resolve model config
+        try:
+            cfg = registry.resolve(request.model)
+        except UnknownModelError as exc:
+            raise HTTPException(
+                status_code=404,
+                detail=openai_error_body(
+                    str(exc),
+                    type="invalid_request_error",
+                    code="model_not_found",
+                ),
+            ) from exc
+        # 2. Resolve graph
+        graph = cfg.graph
+        # 3. Build input from messages
+        graph_input = {"messages": convert_to_messages(request.messages)}
+        # 4. Build context
+        context = request_to_context(request)
+        # 5. Build LangGraph config — use conversation_id from metadata as thread_id if provided,
+        #    otherwise fall back to a fresh uuid (stateless per-request).
+        conv_id = (request.metadata or {}).get("conversation_id")
+        thread_id = str(conv_id) if conv_id else uuid4().hex
+        run_config: dict = {"configurable": {"thread_id": thread_id}}
+        completion_id = f"chatcmpl-{uuid4().hex}"
+        created = int(time.time())
+        # 6a. Streaming path
+        if request.stream:
+            async def _message_stream():
+                async for item in graph.astream(
+                    graph_input,
+                    config=run_config,
+                    context=context,
+                    stream_mode="messages",
+                ):
+                    # stream_mode="messages" (string) yields (chunk, metadata) tuples.
+                    # NB: a LIST stream_mode would instead yield (mode, (chunk, metadata)),
+                    # whose [0] is the mode string — which silently drops all content.
+                    if isinstance(item, tuple) and len(item) == 2:
+                        yield item
+            return StreamingResponse(
+                chat_completion_chunks(
+                    _message_stream(),
+                    completion_id=completion_id,
+                    model=request.model,
+                    created=created,
+                ),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "X-Accel-Buffering": "no",
+                },
+            )
+        # 6b. Non-streaming path
+        result = await graph.ainvoke(graph_input, config=run_config, context=context)
+        messages = result.get("messages", []) if isinstance(result, dict) else []
+        text = result_to_text(result)
+        # Surface a pending tool call only when the LAST message is an AIMessage
+        # requesting one (a return_direct tool already executed -> trailing
+        # ToolMessage -> no pending call).
+        last = messages[-1] if messages else None
+        last_ai = last if isinstance(last, AIMessage) else None
+        tool_calls = _openai_tool_calls(last_ai)
+        message: dict[str, Any] = {
+            "role": "assistant",
+            # OpenAI sets content null on a tool-call turn with no text.
+            "content": text or (None if tool_calls else ""),
+        }
+        if tool_calls:
+            message["tool_calls"] = tool_calls
+        return {
+            "id": completion_id,
+            "object": "chat.completion",
+            "created": created,
+            "model": request.model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": message,
+                    "finish_reason": "tool_calls" if tool_calls else "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            },
+        }
+    return router

graphserve/routes/responses.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""OpenAI Responses API route handler.
+Storeless: conversation state lives entirely in the registered graph's LangGraph
+checkpointer, keyed by ``thread_id``. The response id encodes the model
+(``resp_<model>.<hex>``) so stateless GET/DELETE can resolve the owning graph.
+"""
+from __future__ import annotations
+import time
+from typing import Any
+from uuid import uuid4
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from pydantic import BaseModel
+from graphserve._ids import (
+    format_resp_id,
+    parse_resp_id,
+    thread_uuid_from_anchor,
+)
+from graphserve.errors import openai_error_body
+from graphserve.registry import GraphRegistry, UnknownModelError
+from graphserve.translate import (
+    encode_sse,
+    emit_response_sse_from_astream,
+    extract_text,
+    lc_messages_to_openai_items,
+    messages_to_response_dict,
+    request_to_context,
+)
+class ResponseCreateRequest(BaseModel):
+    model: str
+    input: str | list
+    stream: bool = False
+    user: str | None = None
+    previous_response_id: str | None = None
+    conversation: str | None = None
+    instructions: str | None = None
+    metadata: dict[str, Any] | None = None
+    chat_template_kwargs: dict[str, Any] | None = None
+def _input_to_messages(input_val: Any) -> list:
+    """Convert a plain string or list of turn objects to LangChain messages."""
+    if isinstance(input_val, str):
+        return [HumanMessage(content=input_val)]
+    if isinstance(input_val, list):
+        msgs = []
+        for item in input_val:
+            role = item.get("role", "user") if isinstance(item, dict) else "user"
+            content = item.get("content", "") if isinstance(item, dict) else str(item)
+            text = extract_text(content)
+            if role == "assistant":
+                msgs.append(AIMessage(content=text))
+            elif role in ("system", "developer"):
+                msgs.append(SystemMessage(content=text))
+            else:  # user or unknown
+                msgs.append(HumanMessage(content=text))
+        return msgs
+    # fallback
+    return [HumanMessage(content=str(input_val))]
+def _not_found(response_id: str) -> HTTPException:
+    return HTTPException(
+        status_code=404,
+        detail=openai_error_body(
+            f"Response {response_id!r} not found",
+            type="invalid_request_error",
+            code="response_not_found",
+        ),
+    )
+def build_responses_router(registry: GraphRegistry) -> APIRouter:
+    """Build the /responses sub-router (private — called by create_openai_router)."""
+    router = APIRouter()
+    @router.post("/responses")
+    async def create_response(request: ResponseCreateRequest):
+        # 1. Resolve model -> graph (model is always present on create).
+        try:
+            cfg = registry.resolve(request.model)
+        except UnknownModelError as exc:
+            raise HTTPException(
+                status_code=404,
+                detail=openai_error_body(
+                    str(exc),
+                    type="invalid_request_error",
+                    code="model_not_found",
+                ),
+            ) from exc
+        # 2. Resolve the thread. `conversation` takes precedence over
+        #    `previous_response_id`; both anchor to an existing thread. Absent
+        #    either, mint a fresh thread (ephemeral conversation).
+        anchor = request.conversation or request.previous_response_id
+        if anchor:
+            try:
+                thread_uuid = thread_uuid_from_anchor(anchor)
+            except ValueError:
+                thread_uuid = uuid4()
+        else:
+            thread_uuid = uuid4()
+        graph = cfg.graph
+        graph_input = {"messages": _input_to_messages(request.input)}
+        context = request_to_context(request)
+        created_at = int(time.time())
+        # 3. LangGraph config.
+        run_config: dict = {"configurable": {"thread_id": str(thread_uuid)}}
+        if request.chat_template_kwargs:
+            enable = request.chat_template_kwargs.get("enable_thinking", False)
+            run_config["configurable"]["extra_body"] = {
+                "chat_template_kwargs": request.chat_template_kwargs,
+                "reasoning": {"enabled": enable},
+            }
+        resp_id = format_resp_id(request.model, thread_uuid)
+        # 4a. Streaming path
+        if request.stream:
+            sse_stream = emit_response_sse_from_astream(
+                graph,
+                graph_input,
+                config=run_config,
+                context=context,
+                streamable_node_names=cfg.streamable_node_names,
+                resp_id=resp_id,
+                model=request.model,
+                created_at=created_at,
+            )
+            return StreamingResponse(
+                encode_sse(sse_stream),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "X-Accel-Buffering": "no",
+                },
+            )
+        # 4b. Non-streaming path
+        result = await graph.ainvoke(graph_input, config=run_config, context=context)
+        return messages_to_response_dict(
+            result.get("messages", []) if isinstance(result, dict) else [],
+            conversation_id=thread_uuid,
+            model=request.model,
+            created_at=created_at,
+        )
+    def _resolve(response_id: str):
+        """Parse the response id and resolve (model, thread_uuid, graph)."""
+        try:
+            model, thread_uuid = parse_resp_id(response_id)
+            cfg = registry.resolve(model)
+        except (ValueError, UnknownModelError) as exc:
+            raise _not_found(response_id) from exc
+        return model, thread_uuid, cfg.graph
+    @router.get("/responses/{response_id}")
+    async def get_response(response_id: str):
+        model, thread_uuid, graph = _resolve(response_id)
+        try:
+            state = await graph.aget_state({"configurable": {"thread_id": str(thread_uuid)}})
+            messages = state.values.get("messages", []) if state and state.values else []
+        except ValueError:
+            # Graph compiled without a checkpointer (and none injected).
+            messages = []
+        return messages_to_response_dict(
+            messages,
+            conversation_id=thread_uuid,
+            model=model,
+            created_at=int(time.time()),
+        )
+    @router.get("/responses/{response_id}/input_items")
+    async def list_response_input_items(response_id: str, limit: int = 100):
+        _model, thread_uuid, graph = _resolve(response_id)
+        try:
+            state = await graph.aget_state({"configurable": {"thread_id": str(thread_uuid)}})
+            messages = state.values.get("messages", []) if state and state.values else []
+        except ValueError:
+            messages = []
+        items = lc_messages_to_openai_items(messages)[:limit]
+        return {
+            "object": "list",
+            "data": items,
+            "first_id": items[0]["id"] if items else None,
+            "last_id": items[-1]["id"] if items else None,
+            "has_more": len(items) == limit,
+        }
+    @router.delete("/responses/{response_id}")
+    async def delete_response(response_id: str):
+        _model, thread_uuid, graph = _resolve(response_id)
+        checkpointer = getattr(graph, "checkpointer", None)
+        if checkpointer is not None and hasattr(checkpointer, "adelete_thread"):
+            await checkpointer.adelete_thread(str(thread_uuid))
+        return {"id": response_id, "object": "response", "deleted": True}
+    return router