qdrant-loader-core 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. qdrant_loader_core-0.7.1/PKG-INFO +34 -0
  2. qdrant_loader_core-0.7.1/pyproject.toml +63 -0
  3. qdrant_loader_core-0.7.1/setup.cfg +4 -0
  4. qdrant_loader_core-0.7.1/src/qdrant_loader_core/__init__.py +25 -0
  5. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/__init__.py +17 -0
  6. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/errors.py +22 -0
  7. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/factory.py +90 -0
  8. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/__init__.py +8 -0
  9. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/azure_openai.py +71 -0
  10. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/ollama.py +284 -0
  11. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/openai.py +257 -0
  12. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/ratelimit.py +20 -0
  13. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/settings.py +133 -0
  14. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/tokenization.py +38 -0
  15. qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/types.py +29 -0
  16. qdrant_loader_core-0.7.1/src/qdrant_loader_core/logging.py +279 -0
  17. qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/PKG-INFO +34 -0
  18. qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/SOURCES.txt +25 -0
  19. qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/dependency_links.txt +1 -0
  20. qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/requires.txt +8 -0
  21. qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/top_level.txt +1 -0
  22. qdrant_loader_core-0.7.1/tests/test_azure_provider.py +100 -0
  23. qdrant_loader_core-0.7.1/tests/test_factory_stub.py +41 -0
  24. qdrant_loader_core-0.7.1/tests/test_ollama_embeddings.py +161 -0
  25. qdrant_loader_core-0.7.1/tests/test_providers_import.py +11 -0
  26. qdrant_loader_core-0.7.1/tests/test_settings_mapping.py +70 -0
  27. qdrant_loader_core-0.7.1/tests/test_tokenization.py +29 -0
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: qdrant-loader-core
3
+ Version: 0.7.1
4
+ Summary: Shared core for provider-agnostic LLM support and configuration mapping for qdrant-loader ecosystem
5
+ Author-email: Martin Papy <martin.papy@cbtw.tech>
6
+ License-Expression: GPL-3.0
7
+ Project-URL: Homepage, https://qdrant-loader.net
8
+ Project-URL: Documentation, https://qdrant-loader.net/docs/packages/core/README.html
9
+ Project-URL: Repository, https://github.com/martin-papy/qdrant-loader
10
+ Project-URL: Issues, https://github.com/martin-papy/qdrant-loader/issues
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Information Technology
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Database
19
+ Classifier: Topic :: Database :: Database Engines/Servers
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Topic :: Text Processing :: Indexing
24
+ Classifier: Topic :: Text Processing :: Linguistic
25
+ Classifier: Environment :: Console
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.12
28
+ Description-Content-Type: text/markdown
29
+ Requires-Dist: pydantic>=2.0.0
30
+ Provides-Extra: openai
31
+ Requires-Dist: openai>=1.3.0; extra == "openai"
32
+ Requires-Dist: tiktoken>=0.5.0; extra == "openai"
33
+ Provides-Extra: ollama
34
+ Requires-Dist: httpx>=0.24.0; extra == "ollama"
@@ -0,0 +1,63 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=61.0",
4
+ "wheel",
5
+ ]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [project]
9
+ name = "qdrant-loader-core"
10
+ version = "0.7.1"
11
+ description = "Shared core for provider-agnostic LLM support and configuration mapping for qdrant-loader ecosystem"
12
+ readme = "README.md"
13
+ requires-python = ">=3.12"
14
+ license = "GPL-3.0"
15
+ authors = [
16
+ { name = "Martin Papy", email = "martin.papy@cbtw.tech" },
17
+ ]
18
+ classifiers = [
19
+ "Development Status :: 5 - Production/Stable",
20
+ "Intended Audience :: Developers",
21
+ "Intended Audience :: Information Technology",
22
+ "Intended Audience :: Science/Research",
23
+ "Operating System :: OS Independent",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Topic :: Database",
27
+ "Topic :: Database :: Database Engines/Servers",
28
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
29
+ "Topic :: Scientific/Engineering :: Information Analysis",
30
+ "Topic :: Software Development :: Libraries :: Python Modules",
31
+ "Topic :: Text Processing :: Indexing",
32
+ "Topic :: Text Processing :: Linguistic",
33
+ "Environment :: Console",
34
+ "Typing :: Typed",
35
+ ]
36
+ dependencies = [
37
+ "pydantic>=2.0.0",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ openai = [
42
+ "openai>=1.3.0",
43
+ "tiktoken>=0.5.0",
44
+ ]
45
+ ollama = [
46
+ "httpx>=0.24.0",
47
+ ]
48
+
49
+ [project.urls]
50
+ Homepage = "https://qdrant-loader.net"
51
+ Documentation = "https://qdrant-loader.net/docs/packages/core/README.html"
52
+ Repository = "https://github.com/martin-papy/qdrant-loader"
53
+ Issues = "https://github.com/martin-papy/qdrant-loader/issues"
54
+
55
+ [tool.setuptools.packages.find]
56
+ where = [
57
+ "src",
58
+ ]
59
+
60
+ [tool.pytest.ini_options]
61
+ testpaths = [
62
+ "packages/qdrant-loader-core/tests",
63
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,25 @@
1
+ # qdrant-loader-core package root
2
+
3
+ from .llm import (
4
+ ChatClient,
5
+ EmbeddingPolicy,
6
+ EmbeddingsClient,
7
+ LLMProvider,
8
+ LLMSettings,
9
+ RateLimitPolicy,
10
+ RequestPolicy,
11
+ TokenCounter,
12
+ create_provider,
13
+ )
14
+
15
+ __all__ = [
16
+ "EmbeddingsClient",
17
+ "ChatClient",
18
+ "TokenCounter",
19
+ "LLMProvider",
20
+ "LLMSettings",
21
+ "RequestPolicy",
22
+ "RateLimitPolicy",
23
+ "EmbeddingPolicy",
24
+ "create_provider",
25
+ ]
@@ -0,0 +1,17 @@
1
+ # Re-export core interfaces for convenience
2
+
3
+ from .factory import create_provider
4
+ from .settings import EmbeddingPolicy, LLMSettings, RateLimitPolicy, RequestPolicy
5
+ from .types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
6
+
7
+ __all__ = [
8
+ "EmbeddingsClient",
9
+ "ChatClient",
10
+ "TokenCounter",
11
+ "LLMProvider",
12
+ "LLMSettings",
13
+ "RequestPolicy",
14
+ "RateLimitPolicy",
15
+ "EmbeddingPolicy",
16
+ "create_provider",
17
+ ]
@@ -0,0 +1,22 @@
1
+ class LLMError(Exception):
2
+ pass
3
+
4
+
5
+ class TimeoutError(LLMError):
6
+ pass
7
+
8
+
9
+ class RateLimitedError(LLMError):
10
+ pass
11
+
12
+
13
+ class InvalidRequestError(LLMError):
14
+ pass
15
+
16
+
17
+ class AuthError(LLMError):
18
+ pass
19
+
20
+
21
+ class ServerError(LLMError):
22
+ pass
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ from urllib.parse import urlparse
4
+
5
+ from .providers.ollama import OllamaProvider
6
+ from .providers.openai import OpenAIProvider
7
+
8
+ try:
9
+ from .providers.azure_openai import AzureOpenAIProvider # type: ignore
10
+ except Exception: # pragma: no cover - optional dependency surface
11
+ AzureOpenAIProvider = None # type: ignore
12
+ from .settings import LLMSettings
13
+ from .types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
14
+
15
+
16
+ class _NoopEmbeddings(EmbeddingsClient):
17
+ async def embed(self, inputs: list[str]) -> list[list[float]]:
18
+ raise NotImplementedError("Embeddings provider not implemented")
19
+
20
+
21
+ class _NoopChat(ChatClient):
22
+ async def chat(self, messages, **kwargs): # type: ignore[no-untyped-def]
23
+ raise NotImplementedError("Chat provider not implemented")
24
+
25
+
26
+ class _NoopTokenizer(TokenCounter):
27
+ def count(self, text: str) -> int: # naive char-count fallback
28
+ return len(text)
29
+
30
+
31
+ class _NoopProvider(LLMProvider):
32
+ def embeddings(self) -> EmbeddingsClient:
33
+ return _NoopEmbeddings()
34
+
35
+ def chat(self) -> ChatClient:
36
+ return _NoopChat()
37
+
38
+ def tokenizer(self) -> TokenCounter:
39
+ return _NoopTokenizer()
40
+
41
+
42
+ def _safe_hostname(url: str | None) -> str | None:
43
+ if not url:
44
+ return None
45
+ try:
46
+ host = urlparse(url).hostname
47
+ return host.lower() if host else None
48
+ except Exception:
49
+ return None
50
+
51
+
52
+ def create_provider(settings: LLMSettings) -> LLMProvider:
53
+ """Create a provider by settings.
54
+
55
+ Phase 0: route OpenAI/OpenAI-compatible to OpenAIProvider when available; otherwise return a noop provider.
56
+ Ollama returns a stub provider for now.
57
+ """
58
+ provider_name = (settings.provider or "").lower()
59
+ base_url = (settings.base_url or "")
60
+ base_host = _safe_hostname(base_url)
61
+
62
+ # Route Azure before generic OpenAI routing
63
+ is_azure = (
64
+ "azure" in provider_name
65
+ or (
66
+ base_host is not None
67
+ and (
68
+ base_host == "openai.azure.com"
69
+ or base_host.endswith(".openai.azure.com")
70
+ or base_host == "cognitiveservices.azure.com"
71
+ or base_host.endswith(".cognitiveservices.azure.com")
72
+ )
73
+ )
74
+ )
75
+ if is_azure and AzureOpenAIProvider is not None: # type: ignore[truthy-bool]
76
+ try:
77
+ return AzureOpenAIProvider(settings) # type: ignore[misc]
78
+ except Exception:
79
+ return _NoopProvider()
80
+
81
+ if "openai" in provider_name or "openai" in base_url.lower():
82
+ try:
83
+ return OpenAIProvider(settings)
84
+ except Exception:
85
+ return _NoopProvider()
86
+
87
+ if provider_name == "ollama" or (base_host in ("localhost", "127.0.0.1")):
88
+ return OllamaProvider(settings)
89
+
90
+ return _NoopProvider()
@@ -0,0 +1,8 @@
1
+ # Namespace package for LLM providers
2
+
3
+ __all__ = [
4
+ "openai",
5
+ "ollama",
6
+ ]
7
+
8
+
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+ from urllib.parse import urlparse
5
+
6
+ try:
7
+ from openai import AzureOpenAI # type: ignore
8
+ except Exception: # pragma: no cover - optional dependency surface
9
+ AzureOpenAI = None # type: ignore
10
+
11
+ from ...logging import LoggingConfig
12
+ from ..settings import LLMSettings
13
+ from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
14
+ from .openai import OpenAIChat, OpenAIEmbeddings, _OpenAITokenCounter
15
+
16
+ logger = LoggingConfig.get_logger(__name__)
17
+
18
+
19
+ def _host_of(url: str | None) -> str | None:
20
+ if not url:
21
+ return None
22
+ try:
23
+ return urlparse(url).hostname or None
24
+ except Exception:
25
+ return None
26
+
27
+
28
+ def _validate_azure_settings(settings: LLMSettings) -> None:
29
+ base_url = settings.base_url or ""
30
+ if "/openai/deployments" in base_url:
31
+ raise ValueError(
32
+ "Azure OpenAI base_url must be the resource root (e.g. https://<resource>.openai.azure.com). Do not include /openai/deployments/... in base_url."
33
+ )
34
+ if not (settings.api_version and isinstance(settings.api_version, str)):
35
+ raise ValueError(
36
+ "Azure OpenAI requires api_version (e.g. '2024-05-01-preview') in global.llm.api_version"
37
+ )
38
+
39
+
40
+ class AzureOpenAIProvider(LLMProvider):
41
+ def __init__(self, settings: LLMSettings):
42
+ self._settings = settings
43
+ _validate_azure_settings(settings)
44
+
45
+ self._base_host = _host_of(settings.base_url)
46
+ if AzureOpenAI is None:
47
+ self._client = None
48
+ else:
49
+ # Prefer explicit azure_endpoint in provider_options; fallback to base_url
50
+ provider_opts = settings.provider_options or {}
51
+ endpoint = provider_opts.get("azure_endpoint") or settings.base_url
52
+ kwargs: dict[str, Any] = {
53
+ "api_key": settings.api_key,
54
+ "api_version": settings.api_version,
55
+ }
56
+ if endpoint:
57
+ kwargs["azure_endpoint"] = endpoint
58
+ self._client = AzureOpenAI(**{k: v for k, v in kwargs.items() if v is not None})
59
+
60
+ def embeddings(self) -> EmbeddingsClient:
61
+ model = self._settings.models.get("embeddings", "")
62
+ return OpenAIEmbeddings(self._client, model, self._base_host, provider_label="azure_openai")
63
+
64
+ def chat(self) -> ChatClient:
65
+ model = self._settings.models.get("chat", "")
66
+ return OpenAIChat(self._client, model, self._base_host, provider_label="azure_openai")
67
+
68
+ def tokenizer(self) -> TokenCounter:
69
+ return _OpenAITokenCounter(self._settings.tokenizer)
70
+
71
+
@@ -0,0 +1,284 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ try:
6
+ import httpx # type: ignore
7
+ except Exception: # pragma: no cover - optional dependency
8
+ httpx = None # type: ignore
9
+
10
+ from ...logging import LoggingConfig
11
+ from ..errors import (
12
+ AuthError,
13
+ InvalidRequestError,
14
+ RateLimitedError,
15
+ ServerError,
16
+ )
17
+ from ..errors import (
18
+ TimeoutError as LLMTimeoutError,
19
+ )
20
+ from ..settings import LLMSettings
21
+ from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
22
+
23
+ logger = LoggingConfig.get_logger(__name__)
24
+
25
+
26
+ def _join_url(base: str | None, path: str) -> str:
27
+ base = (base or "").rstrip("/")
28
+ path = path.lstrip("/")
29
+ return f"{base}/{path}" if base else f"/{path}"
30
+
31
+
32
+ class OllamaEmbeddings(EmbeddingsClient):
33
+ def __init__(
34
+ self,
35
+ base_url: str | None,
36
+ model: str,
37
+ headers: dict[str, str] | None,
38
+ *,
39
+ timeout_s: float | None = None,
40
+ provider_options: dict[str, Any] | None = None,
41
+ ):
42
+ self._base_url = (base_url or "http://localhost:11434").rstrip("/")
43
+ self._model = model
44
+ self._headers = headers or {}
45
+ self._timeout_s = float(timeout_s) if timeout_s is not None else 30.0
46
+ self._provider_options = provider_options or {}
47
+
48
+ async def embed(self, inputs: list[str]) -> list[list[float]]:
49
+ if httpx is None:
50
+ raise NotImplementedError("httpx not available for Ollama embeddings")
51
+
52
+ # Prefer OpenAI-compatible if base_url seems to expose /v1
53
+ use_v1 = "/v1" in (self._base_url or "")
54
+ async with httpx.AsyncClient(timeout=self._timeout_s) as client:
55
+ try:
56
+ if use_v1:
57
+ # OpenAI-compatible embeddings endpoint
58
+ url = _join_url(self._base_url, "/embeddings")
59
+ payload = {"model": self._model, "input": inputs}
60
+ resp = await client.post(url, json=payload, headers=self._headers)
61
+ resp.raise_for_status()
62
+ data = resp.json()
63
+ logger.info(
64
+ "LLM request",
65
+ provider="ollama",
66
+ operation="embeddings",
67
+ model=self._model,
68
+ base_host=self._base_url,
69
+ inputs=len(inputs),
70
+ # latency for v1 path hard to compute here; omitted for now
71
+ )
72
+ return [item["embedding"] for item in data.get("data", [])]
73
+ else:
74
+ # Determine native endpoint preference: embed | embeddings | auto (default)
75
+ native_pref = str(self._provider_options.get("native_endpoint", "auto")).lower()
76
+ prefer_embed = native_pref != "embeddings"
77
+
78
+ # Try batch embed first when preferred
79
+ if prefer_embed:
80
+ url = _join_url(self._base_url, "/api/embed")
81
+ payload = {"model": self._model, "input": inputs}
82
+ try:
83
+ resp = await client.post(
84
+ url, json=payload, headers=self._headers
85
+ )
86
+ resp.raise_for_status()
87
+ data = resp.json()
88
+ vectors = data.get("embeddings")
89
+ if not isinstance(vectors, list) or (
90
+ len(vectors) != len(inputs)
91
+ ):
92
+ raise ValueError(
93
+ "Invalid embeddings response from /api/embed"
94
+ )
95
+ # Normalize to list[list[float]]
96
+ norm = [list(vec) for vec in vectors]
97
+ logger.info(
98
+ "LLM request",
99
+ provider="ollama",
100
+ operation="embeddings",
101
+ model=self._model,
102
+ base_host=self._base_url,
103
+ inputs=len(inputs),
104
+ # latency for native batch path not measured in this stub
105
+ )
106
+ return norm
107
+ except httpx.HTTPStatusError as exc:
108
+ status = exc.response.status_code if exc.response else None
109
+ # Fallback for servers that don't support /api/embed
110
+ if status not in (404, 405, 501):
111
+ raise
112
+
113
+ # Per-item embeddings endpoint fallback or preference
114
+ url = _join_url(self._base_url, "/api/embeddings")
115
+ vectors2: list[list[float]] = []
116
+ for text in inputs:
117
+ payload = {"model": self._model, "input": text}
118
+ resp = await client.post(
119
+ url, json=payload, headers=self._headers
120
+ )
121
+ resp.raise_for_status()
122
+ data = resp.json()
123
+ emb = data.get("embedding")
124
+ if emb is None and isinstance(data.get("data"), dict):
125
+ emb = data["data"].get("embedding")
126
+ if emb is None:
127
+ raise ValueError(
128
+ "Invalid embedding response from /api/embeddings"
129
+ )
130
+ vectors2.append(list(emb))
131
+ logger.info(
132
+ "LLM request",
133
+ provider="ollama",
134
+ operation="embeddings",
135
+ model=self._model,
136
+ base_host=self._base_url,
137
+ inputs=len(inputs),
138
+ # latency for per-item path not measured in this stub
139
+ )
140
+ return vectors2
141
+ except httpx.TimeoutException as exc:
142
+ raise LLMTimeoutError(str(exc))
143
+ except httpx.HTTPStatusError as exc:
144
+ status = exc.response.status_code if exc.response else None
145
+ if status == 401:
146
+ raise AuthError(str(exc))
147
+ if status == 429:
148
+ raise RateLimitedError(str(exc))
149
+ if status and 400 <= status < 500:
150
+ raise InvalidRequestError(str(exc))
151
+ raise ServerError(str(exc))
152
+ except httpx.HTTPError as exc:
153
+ raise ServerError(str(exc))
154
+
155
+
156
+ class OllamaChat(ChatClient):
157
+ def __init__(self, base_url: str | None, model: str, headers: dict[str, str] | None):
158
+ self._base_url = base_url or "http://localhost:11434"
159
+ self._model = model
160
+ self._headers = headers or {}
161
+
162
+ async def chat(self, messages: list[dict[str, Any]], **kwargs: Any) -> dict[str, Any]:
163
+ if httpx is None:
164
+ raise NotImplementedError("httpx not available for Ollama chat")
165
+
166
+ # Prefer OpenAI-compatible if base_url exposes /v1
167
+ use_v1 = "/v1" in (self._base_url or "")
168
+ # Flatten messages to a single prompt for native API; preserve roles when possible
169
+ if use_v1:
170
+ url = _join_url(self._base_url, "/chat/completions")
171
+ payload = {"model": self._model, "messages": messages}
172
+ # Map common kwargs
173
+ for k in ("temperature", "max_tokens", "top_p", "stop"):
174
+ if k in kwargs and kwargs[k] is not None:
175
+ payload[k] = kwargs[k]
176
+ async with httpx.AsyncClient(timeout=60.0) as client:
177
+ try:
178
+ from datetime import datetime
179
+ started = datetime.utcnow()
180
+ resp = await client.post(url, json=payload, headers=self._headers)
181
+ resp.raise_for_status()
182
+ data = resp.json()
183
+ text = ""
184
+ choices = data.get("choices") or []
185
+ if choices:
186
+ msg = (choices[0] or {}).get("message") or {}
187
+ text = msg.get("content", "") or ""
188
+ duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
189
+ logger.info(
190
+ "LLM request",
191
+ provider="ollama",
192
+ operation="chat",
193
+ model=self._model,
194
+ base_host=self._base_url,
195
+ messages=len(messages),
196
+ latency_ms=duration_ms,
197
+ )
198
+ return {"text": text, "raw": data, "usage": data.get("usage"), "model": data.get("model", self._model)}
199
+ except httpx.TimeoutException as exc:
200
+ raise LLMTimeoutError(str(exc))
201
+ except httpx.HTTPStatusError as exc:
202
+ status = exc.response.status_code if exc.response else None
203
+ if status == 401:
204
+ raise AuthError(str(exc))
205
+ if status == 429:
206
+ raise RateLimitedError(str(exc))
207
+ if status and 400 <= status < 500:
208
+ raise InvalidRequestError(str(exc))
209
+ raise ServerError(str(exc))
210
+ except httpx.HTTPError as exc:
211
+ raise ServerError(str(exc))
212
+ else:
213
+ # Native API
214
+ url = _join_url(self._base_url, "/api/chat")
215
+ payload = {
216
+ "model": self._model,
217
+ "messages": messages,
218
+ "stream": False,
219
+ }
220
+ if "temperature" in kwargs and kwargs["temperature"] is not None:
221
+ payload["options"] = {"temperature": kwargs["temperature"]}
222
+ async with httpx.AsyncClient(timeout=60.0) as client:
223
+ try:
224
+ from datetime import datetime
225
+ started = datetime.utcnow()
226
+ resp = await client.post(url, json=payload, headers=self._headers)
227
+ resp.raise_for_status()
228
+ data = resp.json()
229
+ # Ollama native returns {"message": {"content": "..."}, ...}
230
+ text = ""
231
+ if isinstance(data.get("message"), dict):
232
+ text = data["message"].get("content", "") or ""
233
+ duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
234
+ logger.info(
235
+ "LLM request",
236
+ provider="ollama",
237
+ operation="chat",
238
+ model=self._model,
239
+ base_host=self._base_url,
240
+ messages=len(messages),
241
+ latency_ms=duration_ms,
242
+ )
243
+ return {"text": text, "raw": data, "usage": None, "model": self._model}
244
+ except httpx.TimeoutException as exc:
245
+ raise LLMTimeoutError(str(exc))
246
+ except httpx.HTTPStatusError as exc:
247
+ status = exc.response.status_code if exc.response else None
248
+ if status == 401:
249
+ raise AuthError(str(exc))
250
+ if status == 429:
251
+ raise RateLimitedError(str(exc))
252
+ if status and 400 <= status < 500:
253
+ raise InvalidRequestError(str(exc))
254
+ raise ServerError(str(exc))
255
+ except httpx.HTTPError as exc:
256
+ raise ServerError(str(exc))
257
+
258
+
259
+ class OllamaTokenizer(TokenCounter):
260
+ def count(self, text: str) -> int:
261
+ return len(text)
262
+
263
+
264
+ class OllamaProvider(LLMProvider):
265
+ def __init__(self, settings: LLMSettings):
266
+ self._settings = settings
267
+
268
+ def embeddings(self) -> EmbeddingsClient:
269
+ model = self._settings.models.get("embeddings", "")
270
+ timeout = (self._settings.request.timeout_s if self._settings and self._settings.request else 30.0)
271
+ return OllamaEmbeddings(
272
+ self._settings.base_url,
273
+ model,
274
+ self._settings.headers,
275
+ timeout_s=timeout,
276
+ provider_options=self._settings.provider_options,
277
+ )
278
+
279
+ def chat(self) -> ChatClient:
280
+ model = self._settings.models.get("chat", "")
281
+ return OllamaChat(self._settings.base_url, model, self._settings.headers)
282
+
283
+ def tokenizer(self) -> TokenCounter:
284
+ return OllamaTokenizer()