graphserve 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graphserve/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """GraphServe — serve LangGraph graphs over the OpenAI APIs."""
2
+ from graphserve.registry import GraphRegistry, GraphConfig
3
+ from graphserve.router import create_openai_router
4
+
5
+ __all__ = ["GraphRegistry", "GraphConfig", "create_openai_router"]
graphserve/_ids.py ADDED
@@ -0,0 +1,51 @@
1
+ """Conversation/response id helpers.
2
+
3
+ Two id shapes cross the wire:
4
+
5
+ * ``conv_<hex>`` — a conversation id (the LangGraph ``thread_id``). Minted by the
6
+ consumer's Conversations API and passed back via the ``conversation`` field.
7
+ * ``resp_<model>.<hex>`` — a response id. GraphServe is storeless, so the model
8
+ is encoded in the id: ``GET/DELETE /responses/{id}`` parse it back to resolve
9
+ which registered graph owns the thread. The ``<hex>`` is the same thread uuid.
10
+ """
11
+ from __future__ import annotations
12
+ import uuid
13
+
14
+ CONV_PREFIX = "conv_"
15
+ RESP_PREFIX = "resp_"
16
+
17
+
18
+ def format_conv_id(conv_uuid: uuid.UUID) -> str:
19
+ return f"{CONV_PREFIX}{conv_uuid.hex}"
20
+
21
+
22
+ def parse_conv_id(value: str) -> uuid.UUID:
23
+ raw = value.removeprefix(CONV_PREFIX)
24
+ return uuid.UUID(raw)
25
+
26
+
27
+ def format_resp_id(model: str, conv_uuid: uuid.UUID) -> str:
28
+ return f"{RESP_PREFIX}{model}.{conv_uuid.hex}"
29
+
30
+
31
+ def parse_resp_id(value: str) -> tuple[str, uuid.UUID]:
32
+ """Split ``resp_<model>.<hex>`` into ``(model, thread_uuid)``.
33
+
34
+ The 32-char hex never contains a dot, so the *last* dot separates model from
35
+ thread — model names may themselves contain dots/hyphens (``gpt-4.1-mini``).
36
+ Raises ``ValueError`` on a malformed id.
37
+ """
38
+ raw = value.removeprefix(RESP_PREFIX)
39
+ model, hex_ = raw.rsplit(".", 1) # ponytail: model w/ trailing ".<32hex>" tail can't occur (registry-controlled)
40
+ return model, uuid.UUID(hex_)
41
+
42
+
43
+ def thread_uuid_from_anchor(value: str) -> uuid.UUID:
44
+ """Extract the thread uuid from a ``resp_`` id, ``conv_`` id, or bare uuid.
45
+
46
+ The ``conversation`` / ``previous_response_id`` anchor may be either a
47
+ conversation id or a prior response id; both point at the same thread.
48
+ """
49
+ if value.startswith(RESP_PREFIX):
50
+ return parse_resp_id(value)[1]
51
+ return parse_conv_id(value)
graphserve/errors.py ADDED
@@ -0,0 +1,5 @@
1
+ """OpenAI-shaped error envelope. The mounting app maps exceptions to HTTP."""
2
+ from __future__ import annotations
3
+
4
+ def openai_error_body(message: str, *, type: str, code: str | None = None) -> dict:
5
+ return {"error": {"message": message, "type": type, "code": code}}
graphserve/py.typed ADDED
File without changes
graphserve/registry.py ADDED
@@ -0,0 +1,43 @@
1
+ """Declarative graph registration. No auto-discovery, no inheritance."""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+
7
+ class UnknownModelError(KeyError):
8
+ pass
9
+
10
+
11
+ @dataclass
12
+ class GraphConfig:
13
+ """An already-compiled graph to serve.
14
+
15
+ ``graph`` must be an already-compiled graph — GraphServe never constructs
16
+ it. Build/compile the graph in your application and pass it in. Per-request
17
+ concerns (runtime context, callbacks, output extraction) are handled
18
+ generically by GraphServe, not per graph.
19
+ """
20
+
21
+ graph: Any
22
+ streamable_node_names: list[str] | None = None
23
+
24
+
25
+ class GraphRegistry:
26
+ def __init__(self) -> None:
27
+ self._configs: dict[str, GraphConfig] = {}
28
+
29
+ def register(self, model_name: str, config: GraphConfig) -> None:
30
+ if model_name in self._configs:
31
+ raise ValueError(f"Model {model_name!r} already registered")
32
+ self._configs[model_name] = config
33
+
34
+ def resolve(self, model_name: str) -> GraphConfig:
35
+ try:
36
+ return self._configs[model_name]
37
+ except KeyError:
38
+ raise UnknownModelError(
39
+ f"Unknown model {model_name!r}. Available: {self.list_models()}"
40
+ ) from None
41
+
42
+ def list_models(self) -> list[str]:
43
+ return list(self._configs)
graphserve/router.py ADDED
@@ -0,0 +1,58 @@
1
+ """Public builder: mount OpenAI-compatible routes on a consumer's FastAPI app."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+
6
+ from fastapi import APIRouter
7
+ from langgraph.checkpoint.memory import InMemorySaver
8
+
9
+ from graphserve.registry import GraphRegistry
10
+ from graphserve.routes.chat import build_chat_router
11
+ from graphserve.routes.responses import build_responses_router
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_openai_router(registry: GraphRegistry) -> APIRouter:
17
+ """Build an APIRouter with OpenAI-compatible /models, /responses, /chat routes.
18
+
19
+ GraphServe is a pure OpenAI↔LangGraph converter: it holds no conversation
20
+ store and persists nothing itself. All stateful Responses logic runs against
21
+ the registered graph's LangGraph checkpointer, keyed by ``thread_id``.
22
+
23
+ Cross-cutting concerns are the consumer's responsibility, applied with
24
+ standard FastAPI tools:
25
+
26
+ - **Auth**: pass ``dependencies=[Depends(...)]`` to ``app.include_router``.
27
+ - **Callbacks / tracing**: attach to the graph when you construct it.
28
+
29
+ Each registered graph MUST be compiled with a checkpointer so stateful
30
+ GET / ``previous_response_id`` continuity works. If a graph was compiled
31
+ without one, an ``InMemorySaver`` is injected here (state is lost on restart)
32
+ and a warning is logged.
33
+ """
34
+ for model in registry.list_models():
35
+ graph = registry.resolve(model).graph
36
+ # Real compiled graphs expose ``checkpointer`` (None if compiled without
37
+ # one). Plain test doubles lack the attribute — leave those untouched.
38
+ if getattr(graph, "checkpointer", "missing") is None:
39
+ graph.checkpointer = InMemorySaver()
40
+ logger.warning(
41
+ "Graph %r had no checkpointer; injected InMemorySaver "
42
+ "(conversation state is lost on restart).",
43
+ model,
44
+ )
45
+
46
+ router = APIRouter()
47
+
48
+ @router.get("/models")
49
+ async def list_models() -> dict:
50
+ return {
51
+ "object": "list",
52
+ "data": [{"id": m, "object": "model"} for m in registry.list_models()],
53
+ }
54
+
55
+ router.include_router(build_responses_router(registry))
56
+ router.include_router(build_chat_router(registry))
57
+
58
+ return router
File without changes
@@ -0,0 +1,156 @@
1
+ """OpenAI Chat Completions API route handler."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import time
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ from fastapi import APIRouter, HTTPException
10
+ from fastapi.responses import StreamingResponse
11
+ from langchain_core.messages import AIMessage
12
+ from langchain_core.messages.utils import convert_to_messages
13
+ from pydantic import BaseModel
14
+
15
+ from graphserve.errors import openai_error_body
16
+ from graphserve.registry import GraphRegistry, UnknownModelError
17
+ from graphserve.translate import chat_completion_chunks, request_to_context, result_to_text
18
+
19
+
20
+ def _openai_tool_calls(message: AIMessage | None) -> list[dict]:
21
+ """Map a LangChain AIMessage's tool_calls to OpenAI chat tool_calls dicts.
22
+
23
+ OpenAI requires ``function.arguments`` as a JSON string and a tool-call
24
+ ``id``; LangChain stores args as a dict and the id may be absent.
25
+ """
26
+ if message is None:
27
+ return []
28
+ calls = []
29
+ for tc in getattr(message, "tool_calls", None) or []:
30
+ calls.append({
31
+ "id": tc.get("id") or f"call_{uuid4().hex[:24]}",
32
+ "type": "function",
33
+ "function": {
34
+ "name": tc.get("name", ""),
35
+ "arguments": json.dumps(tc.get("args") or {}),
36
+ },
37
+ })
38
+ return calls
39
+
40
+
41
+ class ChatCompletionRequest(BaseModel):
42
+ model: str
43
+ messages: list[dict]
44
+ stream: bool = False
45
+ user: str | None = None
46
+ metadata: dict[str, Any] | None = None
47
+
48
+
49
+ def build_chat_router(
50
+ registry: GraphRegistry,
51
+ ) -> APIRouter:
52
+ """Build the /chat/completions sub-router (private — called by create_openai_router)."""
53
+ router = APIRouter()
54
+
55
+ @router.post("/chat/completions")
56
+ async def create_chat_completion(request: ChatCompletionRequest):
57
+ # 1. Resolve model config
58
+ try:
59
+ cfg = registry.resolve(request.model)
60
+ except UnknownModelError as exc:
61
+ raise HTTPException(
62
+ status_code=404,
63
+ detail=openai_error_body(
64
+ str(exc),
65
+ type="invalid_request_error",
66
+ code="model_not_found",
67
+ ),
68
+ ) from exc
69
+
70
+ # 2. Resolve graph
71
+ graph = cfg.graph
72
+
73
+ # 3. Build input from messages
74
+ graph_input = {"messages": convert_to_messages(request.messages)}
75
+
76
+ # 4. Build context
77
+ context = request_to_context(request)
78
+
79
+ # 5. Build LangGraph config — use conversation_id from metadata as thread_id if provided,
80
+ # otherwise fall back to a fresh uuid (stateless per-request).
81
+ conv_id = (request.metadata or {}).get("conversation_id")
82
+ thread_id = str(conv_id) if conv_id else uuid4().hex
83
+ run_config: dict = {"configurable": {"thread_id": thread_id}}
84
+
85
+ completion_id = f"chatcmpl-{uuid4().hex}"
86
+ created = int(time.time())
87
+
88
+ # 6a. Streaming path
89
+ if request.stream:
90
+ async def _message_stream():
91
+ async for item in graph.astream(
92
+ graph_input,
93
+ config=run_config,
94
+ context=context,
95
+ stream_mode="messages",
96
+ ):
97
+ # stream_mode="messages" (string) yields (chunk, metadata) tuples.
98
+ # NB: a LIST stream_mode would instead yield (mode, (chunk, metadata)),
99
+ # whose [0] is the mode string — which silently drops all content.
100
+ if isinstance(item, tuple) and len(item) == 2:
101
+ yield item
102
+
103
+ return StreamingResponse(
104
+ chat_completion_chunks(
105
+ _message_stream(),
106
+ completion_id=completion_id,
107
+ model=request.model,
108
+ created=created,
109
+ ),
110
+ media_type="text/event-stream",
111
+ headers={
112
+ "Cache-Control": "no-cache",
113
+ "X-Accel-Buffering": "no",
114
+ },
115
+ )
116
+
117
+ # 6b. Non-streaming path
118
+ result = await graph.ainvoke(graph_input, config=run_config, context=context)
119
+
120
+ messages = result.get("messages", []) if isinstance(result, dict) else []
121
+ text = result_to_text(result)
122
+
123
+ # Surface a pending tool call only when the LAST message is an AIMessage
124
+ # requesting one (a return_direct tool already executed -> trailing
125
+ # ToolMessage -> no pending call).
126
+ last = messages[-1] if messages else None
127
+ last_ai = last if isinstance(last, AIMessage) else None
128
+ tool_calls = _openai_tool_calls(last_ai)
129
+ message: dict[str, Any] = {
130
+ "role": "assistant",
131
+ # OpenAI sets content null on a tool-call turn with no text.
132
+ "content": text or (None if tool_calls else ""),
133
+ }
134
+ if tool_calls:
135
+ message["tool_calls"] = tool_calls
136
+
137
+ return {
138
+ "id": completion_id,
139
+ "object": "chat.completion",
140
+ "created": created,
141
+ "model": request.model,
142
+ "choices": [
143
+ {
144
+ "index": 0,
145
+ "message": message,
146
+ "finish_reason": "tool_calls" if tool_calls else "stop",
147
+ }
148
+ ],
149
+ "usage": {
150
+ "prompt_tokens": 0,
151
+ "completion_tokens": 0,
152
+ "total_tokens": 0,
153
+ },
154
+ }
155
+
156
+ return router
@@ -0,0 +1,207 @@
1
+ """OpenAI Responses API route handler.
2
+
3
+ Storeless: conversation state lives entirely in the registered graph's LangGraph
4
+ checkpointer, keyed by ``thread_id``. The response id encodes the model
5
+ (``resp_<model>.<hex>``) so stateless GET/DELETE can resolve the owning graph.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from typing import Any
11
+ from uuid import uuid4
12
+
13
+ from fastapi import APIRouter, HTTPException
14
+ from fastapi.responses import StreamingResponse
15
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
16
+ from pydantic import BaseModel
17
+
18
+ from graphserve._ids import (
19
+ format_resp_id,
20
+ parse_resp_id,
21
+ thread_uuid_from_anchor,
22
+ )
23
+ from graphserve.errors import openai_error_body
24
+ from graphserve.registry import GraphRegistry, UnknownModelError
25
+ from graphserve.translate import (
26
+ encode_sse,
27
+ emit_response_sse_from_astream,
28
+ extract_text,
29
+ lc_messages_to_openai_items,
30
+ messages_to_response_dict,
31
+ request_to_context,
32
+ )
33
+
34
+
35
+ class ResponseCreateRequest(BaseModel):
36
+ model: str
37
+ input: str | list
38
+ stream: bool = False
39
+ user: str | None = None
40
+ previous_response_id: str | None = None
41
+ conversation: str | None = None
42
+ instructions: str | None = None
43
+ metadata: dict[str, Any] | None = None
44
+ chat_template_kwargs: dict[str, Any] | None = None
45
+
46
+
47
+ def _input_to_messages(input_val: Any) -> list:
48
+ """Convert a plain string or list of turn objects to LangChain messages."""
49
+ if isinstance(input_val, str):
50
+ return [HumanMessage(content=input_val)]
51
+ if isinstance(input_val, list):
52
+ msgs = []
53
+ for item in input_val:
54
+ role = item.get("role", "user") if isinstance(item, dict) else "user"
55
+ content = item.get("content", "") if isinstance(item, dict) else str(item)
56
+ text = extract_text(content)
57
+ if role == "assistant":
58
+ msgs.append(AIMessage(content=text))
59
+ elif role in ("system", "developer"):
60
+ msgs.append(SystemMessage(content=text))
61
+ else: # user or unknown
62
+ msgs.append(HumanMessage(content=text))
63
+ return msgs
64
+ # fallback
65
+ return [HumanMessage(content=str(input_val))]
66
+
67
+
68
+ def _not_found(response_id: str) -> HTTPException:
69
+ return HTTPException(
70
+ status_code=404,
71
+ detail=openai_error_body(
72
+ f"Response {response_id!r} not found",
73
+ type="invalid_request_error",
74
+ code="response_not_found",
75
+ ),
76
+ )
77
+
78
+
79
+ def build_responses_router(registry: GraphRegistry) -> APIRouter:
80
+ """Build the /responses sub-router (private — called by create_openai_router)."""
81
+ router = APIRouter()
82
+
83
+ @router.post("/responses")
84
+ async def create_response(request: ResponseCreateRequest):
85
+ # 1. Resolve model -> graph (model is always present on create).
86
+ try:
87
+ cfg = registry.resolve(request.model)
88
+ except UnknownModelError as exc:
89
+ raise HTTPException(
90
+ status_code=404,
91
+ detail=openai_error_body(
92
+ str(exc),
93
+ type="invalid_request_error",
94
+ code="model_not_found",
95
+ ),
96
+ ) from exc
97
+
98
+ # 2. Resolve the thread. `conversation` takes precedence over
99
+ # `previous_response_id`; both anchor to an existing thread. Absent
100
+ # either, mint a fresh thread (ephemeral conversation).
101
+ anchor = request.conversation or request.previous_response_id
102
+ if anchor:
103
+ try:
104
+ thread_uuid = thread_uuid_from_anchor(anchor)
105
+ except ValueError:
106
+ thread_uuid = uuid4()
107
+ else:
108
+ thread_uuid = uuid4()
109
+
110
+ graph = cfg.graph
111
+ graph_input = {"messages": _input_to_messages(request.input)}
112
+ context = request_to_context(request)
113
+ created_at = int(time.time())
114
+
115
+ # 3. LangGraph config.
116
+ run_config: dict = {"configurable": {"thread_id": str(thread_uuid)}}
117
+ if request.chat_template_kwargs:
118
+ enable = request.chat_template_kwargs.get("enable_thinking", False)
119
+ run_config["configurable"]["extra_body"] = {
120
+ "chat_template_kwargs": request.chat_template_kwargs,
121
+ "reasoning": {"enabled": enable},
122
+ }
123
+
124
+ resp_id = format_resp_id(request.model, thread_uuid)
125
+
126
+ # 4a. Streaming path
127
+ if request.stream:
128
+ sse_stream = emit_response_sse_from_astream(
129
+ graph,
130
+ graph_input,
131
+ config=run_config,
132
+ context=context,
133
+ streamable_node_names=cfg.streamable_node_names,
134
+ resp_id=resp_id,
135
+ model=request.model,
136
+ created_at=created_at,
137
+ )
138
+ return StreamingResponse(
139
+ encode_sse(sse_stream),
140
+ media_type="text/event-stream",
141
+ headers={
142
+ "Cache-Control": "no-cache",
143
+ "X-Accel-Buffering": "no",
144
+ },
145
+ )
146
+
147
+ # 4b. Non-streaming path
148
+ result = await graph.ainvoke(graph_input, config=run_config, context=context)
149
+ return messages_to_response_dict(
150
+ result.get("messages", []) if isinstance(result, dict) else [],
151
+ conversation_id=thread_uuid,
152
+ model=request.model,
153
+ created_at=created_at,
154
+ )
155
+
156
+ def _resolve(response_id: str):
157
+ """Parse the response id and resolve (model, thread_uuid, graph)."""
158
+ try:
159
+ model, thread_uuid = parse_resp_id(response_id)
160
+ cfg = registry.resolve(model)
161
+ except (ValueError, UnknownModelError) as exc:
162
+ raise _not_found(response_id) from exc
163
+ return model, thread_uuid, cfg.graph
164
+
165
+ @router.get("/responses/{response_id}")
166
+ async def get_response(response_id: str):
167
+ model, thread_uuid, graph = _resolve(response_id)
168
+ try:
169
+ state = await graph.aget_state({"configurable": {"thread_id": str(thread_uuid)}})
170
+ messages = state.values.get("messages", []) if state and state.values else []
171
+ except ValueError:
172
+ # Graph compiled without a checkpointer (and none injected).
173
+ messages = []
174
+ return messages_to_response_dict(
175
+ messages,
176
+ conversation_id=thread_uuid,
177
+ model=model,
178
+ created_at=int(time.time()),
179
+ )
180
+
181
+ @router.get("/responses/{response_id}/input_items")
182
+ async def list_response_input_items(response_id: str, limit: int = 100):
183
+ _model, thread_uuid, graph = _resolve(response_id)
184
+ try:
185
+ state = await graph.aget_state({"configurable": {"thread_id": str(thread_uuid)}})
186
+ messages = state.values.get("messages", []) if state and state.values else []
187
+ except ValueError:
188
+ messages = []
189
+
190
+ items = lc_messages_to_openai_items(messages)[:limit]
191
+ return {
192
+ "object": "list",
193
+ "data": items,
194
+ "first_id": items[0]["id"] if items else None,
195
+ "last_id": items[-1]["id"] if items else None,
196
+ "has_more": len(items) == limit,
197
+ }
198
+
199
+ @router.delete("/responses/{response_id}")
200
+ async def delete_response(response_id: str):
201
+ _model, thread_uuid, graph = _resolve(response_id)
202
+ checkpointer = getattr(graph, "checkpointer", None)
203
+ if checkpointer is not None and hasattr(checkpointer, "adelete_thread"):
204
+ await checkpointer.adelete_thread(str(thread_uuid))
205
+ return {"id": response_id, "object": "response", "deleted": True}
206
+
207
+ return router