axion-code 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- axion/__init__.py +3 -0
- axion/api/__init__.py +0 -0
- axion/api/anthropic.py +460 -0
- axion/api/client.py +259 -0
- axion/api/error.py +161 -0
- axion/api/ollama.py +597 -0
- axion/api/openai_compat.py +805 -0
- axion/api/openai_responses.py +627 -0
- axion/api/prompt_cache.py +31 -0
- axion/api/sse.py +98 -0
- axion/api/types.py +451 -0
- axion/cli/__init__.py +0 -0
- axion/cli/init_cmd.py +50 -0
- axion/cli/input.py +290 -0
- axion/cli/main.py +2953 -0
- axion/cli/render.py +489 -0
- axion/cli/tui.py +766 -0
- axion/commands/__init__.py +0 -0
- axion/commands/handlers/__init__.py +0 -0
- axion/commands/handlers/agents.py +51 -0
- axion/commands/handlers/builtin_commands.py +367 -0
- axion/commands/handlers/mcp.py +59 -0
- axion/commands/handlers/models.py +75 -0
- axion/commands/handlers/plugins.py +55 -0
- axion/commands/handlers/skills.py +61 -0
- axion/commands/parsing.py +317 -0
- axion/commands/registry.py +166 -0
- axion/compat_harness/__init__.py +0 -0
- axion/compat_harness/extractor.py +145 -0
- axion/plugins/__init__.py +0 -0
- axion/plugins/hooks.py +22 -0
- axion/plugins/manager.py +391 -0
- axion/plugins/manifest.py +270 -0
- axion/runtime/__init__.py +0 -0
- axion/runtime/bash.py +388 -0
- axion/runtime/bootstrap.py +39 -0
- axion/runtime/claude_subscription.py +300 -0
- axion/runtime/compact.py +233 -0
- axion/runtime/config.py +397 -0
- axion/runtime/conversation.py +1073 -0
- axion/runtime/file_ops.py +613 -0
- axion/runtime/git.py +213 -0
- axion/runtime/hooks.py +235 -0
- axion/runtime/image.py +212 -0
- axion/runtime/lanes.py +282 -0
- axion/runtime/lsp.py +425 -0
- axion/runtime/mcp/__init__.py +0 -0
- axion/runtime/mcp/client.py +76 -0
- axion/runtime/mcp/lifecycle.py +96 -0
- axion/runtime/mcp/stdio.py +318 -0
- axion/runtime/mcp/tool_bridge.py +79 -0
- axion/runtime/memory.py +196 -0
- axion/runtime/oauth.py +329 -0
- axion/runtime/openai_subscription.py +346 -0
- axion/runtime/permissions.py +247 -0
- axion/runtime/plan_mode.py +96 -0
- axion/runtime/policy_engine.py +259 -0
- axion/runtime/prompt.py +586 -0
- axion/runtime/recovery.py +261 -0
- axion/runtime/remote.py +28 -0
- axion/runtime/sandbox.py +68 -0
- axion/runtime/scheduler.py +231 -0
- axion/runtime/session.py +365 -0
- axion/runtime/sharing.py +159 -0
- axion/runtime/skills.py +124 -0
- axion/runtime/tasks.py +258 -0
- axion/runtime/usage.py +241 -0
- axion/runtime/workers.py +186 -0
- axion/telemetry/__init__.py +0 -0
- axion/telemetry/events.py +67 -0
- axion/telemetry/profile.py +49 -0
- axion/telemetry/sink.py +60 -0
- axion/telemetry/tracer.py +95 -0
- axion/tools/__init__.py +0 -0
- axion/tools/lane_completion.py +33 -0
- axion/tools/registry.py +853 -0
- axion/tools/tool_search.py +226 -0
- axion_code-1.0.0.dist-info/METADATA +709 -0
- axion_code-1.0.0.dist-info/RECORD +82 -0
- axion_code-1.0.0.dist-info/WHEEL +4 -0
- axion_code-1.0.0.dist-info/entry_points.txt +2 -0
- axion_code-1.0.0.dist-info/licenses/LICENSE +21 -0
axion/api/ollama.py
ADDED
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
"""Ollama-compatible API client with streaming support.
|
|
2
|
+
|
|
3
|
+
Ollama runs locally and exposes an OpenAI-compatible API at
|
|
4
|
+
http://localhost:11434/v1/chat/completions. This module provides a
|
|
5
|
+
client that translates between Anthropic-style request/response types
|
|
6
|
+
and the Ollama wire format.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any, AsyncIterator
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
from axion.api.error import (
|
|
20
|
+
ApiResponseError,
|
|
21
|
+
HttpError,
|
|
22
|
+
InvalidSseFrameError,
|
|
23
|
+
)
|
|
24
|
+
from axion.api.types import (
|
|
25
|
+
ContentBlockDeltaEvent,
|
|
26
|
+
ContentBlockStartEvent,
|
|
27
|
+
ContentBlockStopEvent,
|
|
28
|
+
InputMessage,
|
|
29
|
+
MessageDelta,
|
|
30
|
+
MessageDeltaEvent,
|
|
31
|
+
MessageRequest,
|
|
32
|
+
MessageResponse,
|
|
33
|
+
MessageStartEvent,
|
|
34
|
+
MessageStopEvent,
|
|
35
|
+
OutputContentBlock,
|
|
36
|
+
StreamEvent,
|
|
37
|
+
TextDelta,
|
|
38
|
+
TextInputBlock,
|
|
39
|
+
TextOutputBlock,
|
|
40
|
+
ToolChoice,
|
|
41
|
+
ToolDefinition,
|
|
42
|
+
ToolResultBlock,
|
|
43
|
+
ToolResultJsonContent,
|
|
44
|
+
ToolResultTextContent,
|
|
45
|
+
ToolUseOutputBlock,
|
|
46
|
+
Usage,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
logger = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# Constants
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
DEFAULT_OLLAMA_BASE_URL = "http://localhost:11434"
|
|
56
|
+
DEFAULT_OLLAMA_MODEL = "llama3.1"
|
|
57
|
+
OLLAMA_BASE_URL_ENV = "OLLAMA_BASE_URL"
|
|
58
|
+
|
|
59
|
+
# Prefixes that indicate an Ollama / local model
|
|
60
|
+
OLLAMA_MODEL_PREFIXES = (
|
|
61
|
+
"llama",
|
|
62
|
+
"mistral",
|
|
63
|
+
"codellama",
|
|
64
|
+
"deepseek",
|
|
65
|
+
"phi",
|
|
66
|
+
"gemma",
|
|
67
|
+
"qwen",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
# Data types
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class OllamaModelInfo:
|
|
77
|
+
"""Summary of a locally-available Ollama model."""
|
|
78
|
+
|
|
79
|
+
name: str
|
|
80
|
+
size: int = 0
|
|
81
|
+
digest: str = ""
|
|
82
|
+
modified_at: str = ""
|
|
83
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# SSE parser (OpenAI format, reused from openai_compat logic)
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class _OllamaSseParser:
|
|
92
|
+
"""Incremental SSE parser for Ollama's OpenAI-compatible streaming."""
|
|
93
|
+
|
|
94
|
+
def __init__(self) -> None:
|
|
95
|
+
self._buffer = bytearray()
|
|
96
|
+
|
|
97
|
+
def push(self, chunk: bytes) -> list[dict[str, Any]]:
|
|
98
|
+
self._buffer.extend(chunk)
|
|
99
|
+
results: list[dict[str, Any]] = []
|
|
100
|
+
|
|
101
|
+
while True:
|
|
102
|
+
frame = self._next_frame()
|
|
103
|
+
if frame is None:
|
|
104
|
+
break
|
|
105
|
+
parsed = _parse_sse_frame(frame)
|
|
106
|
+
if parsed is not None:
|
|
107
|
+
results.append(parsed)
|
|
108
|
+
|
|
109
|
+
return results
|
|
110
|
+
|
|
111
|
+
def _next_frame(self) -> str | None:
|
|
112
|
+
pos = self._buffer.find(b"\n\n")
|
|
113
|
+
sep_len = 2
|
|
114
|
+
if pos == -1:
|
|
115
|
+
pos = self._buffer.find(b"\r\n\r\n")
|
|
116
|
+
sep_len = 4
|
|
117
|
+
if pos == -1:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
frame_bytes = bytes(self._buffer[:pos])
|
|
121
|
+
del self._buffer[: pos + sep_len]
|
|
122
|
+
return frame_bytes.decode("utf-8", errors="replace")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _parse_sse_frame(frame: str) -> dict[str, Any] | None:
|
|
126
|
+
trimmed = frame.strip()
|
|
127
|
+
if not trimmed:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
data_lines: list[str] = []
|
|
131
|
+
for line in trimmed.splitlines():
|
|
132
|
+
if line.startswith(":"):
|
|
133
|
+
continue
|
|
134
|
+
if line.startswith("data:"):
|
|
135
|
+
data_lines.append(line[len("data:") :].lstrip())
|
|
136
|
+
|
|
137
|
+
if not data_lines:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
payload = "\n".join(data_lines)
|
|
141
|
+
if payload == "[DONE]":
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
return json.loads(payload)
|
|
146
|
+
except json.JSONDecodeError as exc:
|
|
147
|
+
raise InvalidSseFrameError(f"Invalid JSON in SSE data: {exc}") from exc
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
# Stream state machine
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class _OllamaStreamState:
|
|
156
|
+
"""Translates OpenAI ChatCompletionChunks into Anthropic StreamEvents.
|
|
157
|
+
|
|
158
|
+
Simplified compared to the full OpenAI compat state machine: Ollama does
|
|
159
|
+
not currently emit ``tool_calls`` deltas in streaming mode, so tool-call
|
|
160
|
+
accumulation is omitted.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def __init__(self, model: str) -> None:
|
|
164
|
+
self._model = model
|
|
165
|
+
self._message_started = False
|
|
166
|
+
self._text_started = False
|
|
167
|
+
self._text_finished = False
|
|
168
|
+
self._finished = False
|
|
169
|
+
self._stop_reason: str | None = None
|
|
170
|
+
self._usage: Usage | None = None
|
|
171
|
+
|
|
172
|
+
def ingest_chunk(self, chunk: dict[str, Any]) -> list[StreamEvent]:
|
|
173
|
+
events: list[StreamEvent] = []
|
|
174
|
+
|
|
175
|
+
if not self._message_started:
|
|
176
|
+
self._message_started = True
|
|
177
|
+
events.append(
|
|
178
|
+
MessageStartEvent(
|
|
179
|
+
message=MessageResponse(
|
|
180
|
+
id=chunk.get("id", ""),
|
|
181
|
+
type="message",
|
|
182
|
+
role="assistant",
|
|
183
|
+
content=[],
|
|
184
|
+
model=chunk.get("model") or self._model,
|
|
185
|
+
usage=Usage(),
|
|
186
|
+
),
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if "usage" in chunk and chunk["usage"]:
|
|
191
|
+
u = chunk["usage"]
|
|
192
|
+
self._usage = Usage(
|
|
193
|
+
input_tokens=u.get("prompt_tokens", 0),
|
|
194
|
+
output_tokens=u.get("completion_tokens", 0),
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
for choice in chunk.get("choices", []):
|
|
198
|
+
delta = choice.get("delta", {})
|
|
199
|
+
|
|
200
|
+
content = delta.get("content")
|
|
201
|
+
if content:
|
|
202
|
+
if not self._text_started:
|
|
203
|
+
self._text_started = True
|
|
204
|
+
events.append(
|
|
205
|
+
ContentBlockStartEvent(
|
|
206
|
+
index=0,
|
|
207
|
+
content_block=TextOutputBlock(text=""),
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
events.append(
|
|
211
|
+
ContentBlockDeltaEvent(
|
|
212
|
+
index=0,
|
|
213
|
+
delta=TextDelta(text=content),
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
finish_reason = choice.get("finish_reason")
|
|
218
|
+
if finish_reason:
|
|
219
|
+
self._stop_reason = _normalize_finish_reason(finish_reason)
|
|
220
|
+
|
|
221
|
+
return events
|
|
222
|
+
|
|
223
|
+
def finish(self) -> list[StreamEvent]:
|
|
224
|
+
if self._finished:
|
|
225
|
+
return []
|
|
226
|
+
self._finished = True
|
|
227
|
+
|
|
228
|
+
events: list[StreamEvent] = []
|
|
229
|
+
|
|
230
|
+
if self._text_started and not self._text_finished:
|
|
231
|
+
self._text_finished = True
|
|
232
|
+
events.append(ContentBlockStopEvent(index=0))
|
|
233
|
+
|
|
234
|
+
if self._message_started:
|
|
235
|
+
events.append(
|
|
236
|
+
MessageDeltaEvent(
|
|
237
|
+
delta=MessageDelta(
|
|
238
|
+
stop_reason=self._stop_reason or "end_turn",
|
|
239
|
+
),
|
|
240
|
+
usage=self._usage or Usage(),
|
|
241
|
+
)
|
|
242
|
+
)
|
|
243
|
+
events.append(MessageStopEvent())
|
|
244
|
+
|
|
245
|
+
return events
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# ---------------------------------------------------------------------------
|
|
249
|
+
# Request translation (Anthropic -> Ollama / OpenAI)
|
|
250
|
+
# ---------------------------------------------------------------------------
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _build_ollama_request(request: MessageRequest) -> dict[str, Any]:
|
|
254
|
+
"""Translate an Anthropic-style MessageRequest into an OpenAI chat body."""
|
|
255
|
+
messages: list[dict[str, Any]] = []
|
|
256
|
+
|
|
257
|
+
# System message
|
|
258
|
+
if request.system:
|
|
259
|
+
messages.append({"role": "system", "content": request.system})
|
|
260
|
+
|
|
261
|
+
# Conversation messages
|
|
262
|
+
for message in request.messages:
|
|
263
|
+
messages.extend(_translate_message(message))
|
|
264
|
+
|
|
265
|
+
payload: dict[str, Any] = {
|
|
266
|
+
"model": request.model,
|
|
267
|
+
"max_tokens": request.max_tokens,
|
|
268
|
+
"messages": messages,
|
|
269
|
+
"stream": request.stream,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
# Tools -> OpenAI functions format
|
|
273
|
+
if request.tools:
|
|
274
|
+
payload["tools"] = [_openai_tool_definition(t) for t in request.tools]
|
|
275
|
+
|
|
276
|
+
if request.tool_choice is not None:
|
|
277
|
+
payload["tool_choice"] = _openai_tool_choice(request.tool_choice)
|
|
278
|
+
|
|
279
|
+
return payload
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _translate_message(message: InputMessage) -> list[dict[str, Any]]:
|
|
283
|
+
"""Translate a single Anthropic InputMessage into OpenAI messages."""
|
|
284
|
+
if message.role == "assistant":
|
|
285
|
+
text_parts: list[str] = []
|
|
286
|
+
for block in message.content:
|
|
287
|
+
if isinstance(block, TextInputBlock):
|
|
288
|
+
text_parts.append(block.text)
|
|
289
|
+
text = "".join(text_parts)
|
|
290
|
+
if not text:
|
|
291
|
+
return []
|
|
292
|
+
return [{"role": "assistant", "content": text}]
|
|
293
|
+
|
|
294
|
+
results: list[dict[str, Any]] = []
|
|
295
|
+
for block in message.content:
|
|
296
|
+
if isinstance(block, TextInputBlock):
|
|
297
|
+
results.append({"role": "user", "content": block.text})
|
|
298
|
+
elif isinstance(block, ToolResultBlock):
|
|
299
|
+
content = _flatten_tool_result_content(block)
|
|
300
|
+
results.append({
|
|
301
|
+
"role": "tool",
|
|
302
|
+
"tool_call_id": block.tool_use_id,
|
|
303
|
+
"content": content,
|
|
304
|
+
})
|
|
305
|
+
return results
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _flatten_tool_result_content(block: ToolResultBlock) -> str:
|
|
309
|
+
parts: list[str] = []
|
|
310
|
+
for c in block.content:
|
|
311
|
+
if isinstance(c, ToolResultTextContent):
|
|
312
|
+
parts.append(c.text)
|
|
313
|
+
elif isinstance(c, ToolResultJsonContent):
|
|
314
|
+
parts.append(
|
|
315
|
+
json.dumps(c.value) if not isinstance(c.value, str) else c.value
|
|
316
|
+
)
|
|
317
|
+
return "\n".join(parts)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _openai_tool_definition(tool: ToolDefinition) -> dict[str, Any]:
|
|
321
|
+
func: dict[str, Any] = {
|
|
322
|
+
"name": tool.name,
|
|
323
|
+
"parameters": tool.input_schema,
|
|
324
|
+
}
|
|
325
|
+
if tool.description is not None:
|
|
326
|
+
func["description"] = tool.description
|
|
327
|
+
return {"type": "function", "function": func}
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _openai_tool_choice(tool_choice: ToolChoice) -> Any:
|
|
331
|
+
if tool_choice.type == "auto":
|
|
332
|
+
return "auto"
|
|
333
|
+
if tool_choice.type == "any":
|
|
334
|
+
return "required"
|
|
335
|
+
if tool_choice.type == "tool" and tool_choice.name:
|
|
336
|
+
return {"type": "function", "function": {"name": tool_choice.name}}
|
|
337
|
+
return "auto"
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# ---------------------------------------------------------------------------
|
|
341
|
+
# Response translation (Ollama / OpenAI -> Anthropic)
|
|
342
|
+
# ---------------------------------------------------------------------------
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _normalize_response(model: str, data: dict[str, Any]) -> MessageResponse:
|
|
346
|
+
"""Translate an OpenAI ChatCompletion response to Anthropic MessageResponse."""
|
|
347
|
+
choices = data.get("choices", [])
|
|
348
|
+
if not choices:
|
|
349
|
+
raise InvalidSseFrameError("chat completion response missing choices")
|
|
350
|
+
|
|
351
|
+
choice = choices[0]
|
|
352
|
+
message = choice.get("message", {})
|
|
353
|
+
content: list[OutputContentBlock] = []
|
|
354
|
+
|
|
355
|
+
text = message.get("content")
|
|
356
|
+
if text:
|
|
357
|
+
content.append(TextOutputBlock(text=text))
|
|
358
|
+
|
|
359
|
+
# Tool calls (Ollama may include these in non-streaming mode)
|
|
360
|
+
for tc in message.get("tool_calls", []):
|
|
361
|
+
func = tc.get("function", {})
|
|
362
|
+
arguments = _parse_tool_arguments(func.get("arguments", ""))
|
|
363
|
+
content.append(
|
|
364
|
+
ToolUseOutputBlock(
|
|
365
|
+
id=tc.get("id", ""),
|
|
366
|
+
name=func.get("name", ""),
|
|
367
|
+
input=arguments,
|
|
368
|
+
)
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
usage_data = data.get("usage", {})
|
|
372
|
+
usage = Usage(
|
|
373
|
+
input_tokens=usage_data.get("prompt_tokens", 0),
|
|
374
|
+
output_tokens=usage_data.get("completion_tokens", 0),
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
finish_reason = choice.get("finish_reason")
|
|
378
|
+
stop_reason = _normalize_finish_reason(finish_reason) if finish_reason else None
|
|
379
|
+
|
|
380
|
+
return MessageResponse(
|
|
381
|
+
id=data.get("id", ""),
|
|
382
|
+
type="message",
|
|
383
|
+
role=message.get("role", "assistant"),
|
|
384
|
+
content=content,
|
|
385
|
+
model=data.get("model", "") or model,
|
|
386
|
+
usage=usage,
|
|
387
|
+
stop_reason=stop_reason,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
# ---------------------------------------------------------------------------
|
|
392
|
+
# Helpers
|
|
393
|
+
# ---------------------------------------------------------------------------
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _parse_tool_arguments(arguments: str) -> Any:
|
|
397
|
+
try:
|
|
398
|
+
return json.loads(arguments)
|
|
399
|
+
except (json.JSONDecodeError, TypeError):
|
|
400
|
+
return {"raw": arguments}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _normalize_finish_reason(value: str) -> str:
|
|
404
|
+
mapping = {"stop": "end_turn", "tool_calls": "tool_use"}
|
|
405
|
+
return mapping.get(value, value)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _read_ollama_base_url() -> str:
|
|
409
|
+
return os.environ.get(OLLAMA_BASE_URL_ENV, "") or DEFAULT_OLLAMA_BASE_URL
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def is_ollama_model(model: str) -> bool:
|
|
413
|
+
"""Return True if *model* looks like a locally-served Ollama model."""
|
|
414
|
+
lower = model.lower()
|
|
415
|
+
return any(lower.startswith(prefix) for prefix in OLLAMA_MODEL_PREFIXES)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
# ---------------------------------------------------------------------------
|
|
419
|
+
# Client
|
|
420
|
+
# ---------------------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
class OllamaClient:
|
|
424
|
+
"""HTTP client for a local Ollama instance.
|
|
425
|
+
|
|
426
|
+
Ollama exposes an OpenAI-compatible endpoint at ``/v1/chat/completions``
|
|
427
|
+
and a native tag-listing endpoint at ``/api/tags``.
|
|
428
|
+
"""
|
|
429
|
+
|
|
430
|
+
def __init__(
|
|
431
|
+
self,
|
|
432
|
+
base_url: str = DEFAULT_OLLAMA_BASE_URL,
|
|
433
|
+
model: str = DEFAULT_OLLAMA_MODEL,
|
|
434
|
+
) -> None:
|
|
435
|
+
self._base_url = base_url.rstrip("/")
|
|
436
|
+
self._model = model
|
|
437
|
+
self._http = httpx.AsyncClient(timeout=httpx.Timeout(300.0))
|
|
438
|
+
|
|
439
|
+
@classmethod
|
|
440
|
+
def from_env(cls, model: str | None = None) -> OllamaClient:
|
|
441
|
+
"""Create a client using environment variables for the base URL."""
|
|
442
|
+
base_url = _read_ollama_base_url()
|
|
443
|
+
return cls(base_url=base_url, model=model or DEFAULT_OLLAMA_MODEL)
|
|
444
|
+
|
|
445
|
+
@property
|
|
446
|
+
def model(self) -> str:
|
|
447
|
+
return self._model
|
|
448
|
+
|
|
449
|
+
@property
|
|
450
|
+
def base_url(self) -> str:
|
|
451
|
+
return self._base_url
|
|
452
|
+
|
|
453
|
+
# -- Public API ----------------------------------------------------------
|
|
454
|
+
|
|
455
|
+
async def send_message(self, request: MessageRequest) -> MessageResponse:
|
|
456
|
+
"""Send a non-streaming chat completion request."""
|
|
457
|
+
req = MessageRequest(
|
|
458
|
+
model=request.model,
|
|
459
|
+
max_tokens=request.max_tokens,
|
|
460
|
+
messages=request.messages,
|
|
461
|
+
system=request.system,
|
|
462
|
+
tools=request.tools,
|
|
463
|
+
tool_choice=request.tool_choice,
|
|
464
|
+
stream=False,
|
|
465
|
+
)
|
|
466
|
+
body = _build_ollama_request(req)
|
|
467
|
+
url = f"{self._base_url}/v1/chat/completions"
|
|
468
|
+
|
|
469
|
+
try:
|
|
470
|
+
response = await self._http.post(
|
|
471
|
+
url,
|
|
472
|
+
json=body,
|
|
473
|
+
headers={"content-type": "application/json"},
|
|
474
|
+
)
|
|
475
|
+
except httpx.HTTPError as exc:
|
|
476
|
+
raise HttpError(str(exc), cause=exc) from exc
|
|
477
|
+
|
|
478
|
+
_expect_success(response)
|
|
479
|
+
payload = response.json()
|
|
480
|
+
return _normalize_response(req.model, payload)
|
|
481
|
+
|
|
482
|
+
async def stream_message(
|
|
483
|
+
self, request: MessageRequest
|
|
484
|
+
) -> AsyncIterator[StreamEvent]:
|
|
485
|
+
"""Send a streaming request and yield Anthropic-format StreamEvents."""
|
|
486
|
+
req = MessageRequest(
|
|
487
|
+
model=request.model,
|
|
488
|
+
max_tokens=request.max_tokens,
|
|
489
|
+
messages=request.messages,
|
|
490
|
+
system=request.system,
|
|
491
|
+
tools=request.tools,
|
|
492
|
+
tool_choice=request.tool_choice,
|
|
493
|
+
stream=True,
|
|
494
|
+
)
|
|
495
|
+
body = _build_ollama_request(req)
|
|
496
|
+
url = f"{self._base_url}/v1/chat/completions"
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
response = await self._http.send(
|
|
500
|
+
self._http.build_request(
|
|
501
|
+
"POST",
|
|
502
|
+
url,
|
|
503
|
+
json=body,
|
|
504
|
+
headers={"content-type": "application/json"},
|
|
505
|
+
),
|
|
506
|
+
stream=True,
|
|
507
|
+
)
|
|
508
|
+
except httpx.HTTPError as exc:
|
|
509
|
+
raise HttpError(str(exc), cause=exc) from exc
|
|
510
|
+
|
|
511
|
+
if not response.is_success:
|
|
512
|
+
body_text = await response.aread()
|
|
513
|
+
raise ApiResponseError(
|
|
514
|
+
status=response.status_code,
|
|
515
|
+
body=body_text.decode("utf-8", errors="replace"),
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
parser = _OllamaSseParser()
|
|
519
|
+
state = _OllamaStreamState(model=req.model)
|
|
520
|
+
|
|
521
|
+
async for raw_chunk in response.aiter_bytes():
|
|
522
|
+
for chunk in parser.push(raw_chunk):
|
|
523
|
+
for event in state.ingest_chunk(chunk):
|
|
524
|
+
yield event
|
|
525
|
+
|
|
526
|
+
for event in state.finish():
|
|
527
|
+
yield event
|
|
528
|
+
|
|
529
|
+
async def list_models(self) -> list[OllamaModelInfo]:
|
|
530
|
+
"""GET /api/tags -- list locally available models."""
|
|
531
|
+
url = f"{self._base_url}/api/tags"
|
|
532
|
+
try:
|
|
533
|
+
response = await self._http.get(url)
|
|
534
|
+
except httpx.HTTPError as exc:
|
|
535
|
+
raise HttpError(str(exc), cause=exc) from exc
|
|
536
|
+
|
|
537
|
+
_expect_success(response)
|
|
538
|
+
data = response.json()
|
|
539
|
+
|
|
540
|
+
models: list[OllamaModelInfo] = []
|
|
541
|
+
for m in data.get("models", []):
|
|
542
|
+
models.append(
|
|
543
|
+
OllamaModelInfo(
|
|
544
|
+
name=m.get("name", ""),
|
|
545
|
+
size=m.get("size", 0),
|
|
546
|
+
digest=m.get("digest", ""),
|
|
547
|
+
modified_at=m.get("modified_at", ""),
|
|
548
|
+
details=m.get("details", {}),
|
|
549
|
+
)
|
|
550
|
+
)
|
|
551
|
+
return models
|
|
552
|
+
|
|
553
|
+
async def is_available(self) -> bool:
|
|
554
|
+
"""Check whether the Ollama server is reachable."""
|
|
555
|
+
url = f"{self._base_url}/api/tags"
|
|
556
|
+
try:
|
|
557
|
+
response = await self._http.get(url, timeout=httpx.Timeout(5.0))
|
|
558
|
+
return response.is_success
|
|
559
|
+
except (httpx.HTTPError, OSError):
|
|
560
|
+
return False
|
|
561
|
+
|
|
562
|
+
async def close(self) -> None:
|
|
563
|
+
"""Shut down the underlying HTTP client."""
|
|
564
|
+
await self._http.aclose()
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
# ---------------------------------------------------------------------------
|
|
568
|
+
# Response validation
|
|
569
|
+
# ---------------------------------------------------------------------------
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _expect_success(response: httpx.Response) -> None:
|
|
573
|
+
"""Raise ApiResponseError for non-2xx responses."""
|
|
574
|
+
if response.is_success:
|
|
575
|
+
return
|
|
576
|
+
|
|
577
|
+
body = response.text
|
|
578
|
+
|
|
579
|
+
error_type: str | None = None
|
|
580
|
+
message: str | None = None
|
|
581
|
+
try:
|
|
582
|
+
envelope = json.loads(body)
|
|
583
|
+
err_obj = envelope.get("error", {})
|
|
584
|
+
if isinstance(err_obj, dict):
|
|
585
|
+
error_type = err_obj.get("type")
|
|
586
|
+
message = err_obj.get("message")
|
|
587
|
+
elif isinstance(err_obj, str):
|
|
588
|
+
message = err_obj
|
|
589
|
+
except (json.JSONDecodeError, AttributeError):
|
|
590
|
+
pass
|
|
591
|
+
|
|
592
|
+
raise ApiResponseError(
|
|
593
|
+
status=response.status_code,
|
|
594
|
+
error_type=error_type,
|
|
595
|
+
message=message,
|
|
596
|
+
body=body,
|
|
597
|
+
)
|