qdrant-loader-core 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qdrant_loader_core-0.7.1/PKG-INFO +34 -0
- qdrant_loader_core-0.7.1/pyproject.toml +63 -0
- qdrant_loader_core-0.7.1/setup.cfg +4 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/__init__.py +25 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/__init__.py +17 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/errors.py +22 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/factory.py +90 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/__init__.py +8 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/azure_openai.py +71 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/ollama.py +284 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/providers/openai.py +257 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/ratelimit.py +20 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/settings.py +133 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/tokenization.py +38 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/llm/types.py +29 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core/logging.py +279 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/PKG-INFO +34 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/SOURCES.txt +25 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/dependency_links.txt +1 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/requires.txt +8 -0
- qdrant_loader_core-0.7.1/src/qdrant_loader_core.egg-info/top_level.txt +1 -0
- qdrant_loader_core-0.7.1/tests/test_azure_provider.py +100 -0
- qdrant_loader_core-0.7.1/tests/test_factory_stub.py +41 -0
- qdrant_loader_core-0.7.1/tests/test_ollama_embeddings.py +161 -0
- qdrant_loader_core-0.7.1/tests/test_providers_import.py +11 -0
- qdrant_loader_core-0.7.1/tests/test_settings_mapping.py +70 -0
- qdrant_loader_core-0.7.1/tests/test_tokenization.py +29 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qdrant-loader-core
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Shared core for provider-agnostic LLM support and configuration mapping for qdrant-loader ecosystem
|
|
5
|
+
Author-email: Martin Papy <martin.papy@cbtw.tech>
|
|
6
|
+
License-Expression: GPL-3.0
|
|
7
|
+
Project-URL: Homepage, https://qdrant-loader.net
|
|
8
|
+
Project-URL: Documentation, https://qdrant-loader.net/docs/packages/core/README.html
|
|
9
|
+
Project-URL: Repository, https://github.com/martin-papy/qdrant-loader
|
|
10
|
+
Project-URL: Issues, https://github.com/martin-papy/qdrant-loader/issues
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Information Technology
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Database
|
|
19
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
24
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
25
|
+
Classifier: Environment :: Console
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.12
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
Requires-Dist: pydantic>=2.0.0
|
|
30
|
+
Provides-Extra: openai
|
|
31
|
+
Requires-Dist: openai>=1.3.0; extra == "openai"
|
|
32
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "openai"
|
|
33
|
+
Provides-Extra: ollama
|
|
34
|
+
Requires-Dist: httpx>=0.24.0; extra == "ollama"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [
|
|
3
|
+
"setuptools>=61.0",
|
|
4
|
+
"wheel",
|
|
5
|
+
]
|
|
6
|
+
build-backend = "setuptools.build_meta"
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "qdrant-loader-core"
|
|
10
|
+
version = "0.7.1"
|
|
11
|
+
description = "Shared core for provider-agnostic LLM support and configuration mapping for qdrant-loader ecosystem"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.12"
|
|
14
|
+
license = "GPL-3.0"
|
|
15
|
+
authors = [
|
|
16
|
+
{ name = "Martin Papy", email = "martin.papy@cbtw.tech" },
|
|
17
|
+
]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 5 - Production/Stable",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
21
|
+
"Intended Audience :: Information Technology",
|
|
22
|
+
"Intended Audience :: Science/Research",
|
|
23
|
+
"Operating System :: OS Independent",
|
|
24
|
+
"Programming Language :: Python :: 3",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Database",
|
|
27
|
+
"Topic :: Database :: Database Engines/Servers",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
30
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
31
|
+
"Topic :: Text Processing :: Indexing",
|
|
32
|
+
"Topic :: Text Processing :: Linguistic",
|
|
33
|
+
"Environment :: Console",
|
|
34
|
+
"Typing :: Typed",
|
|
35
|
+
]
|
|
36
|
+
dependencies = [
|
|
37
|
+
"pydantic>=2.0.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
openai = [
|
|
42
|
+
"openai>=1.3.0",
|
|
43
|
+
"tiktoken>=0.5.0",
|
|
44
|
+
]
|
|
45
|
+
ollama = [
|
|
46
|
+
"httpx>=0.24.0",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[project.urls]
|
|
50
|
+
Homepage = "https://qdrant-loader.net"
|
|
51
|
+
Documentation = "https://qdrant-loader.net/docs/packages/core/README.html"
|
|
52
|
+
Repository = "https://github.com/martin-papy/qdrant-loader"
|
|
53
|
+
Issues = "https://github.com/martin-papy/qdrant-loader/issues"
|
|
54
|
+
|
|
55
|
+
[tool.setuptools.packages.find]
|
|
56
|
+
where = [
|
|
57
|
+
"src",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tool.pytest.ini_options]
|
|
61
|
+
testpaths = [
|
|
62
|
+
"packages/qdrant-loader-core/tests",
|
|
63
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# qdrant-loader-core package root
|
|
2
|
+
|
|
3
|
+
from .llm import (
|
|
4
|
+
ChatClient,
|
|
5
|
+
EmbeddingPolicy,
|
|
6
|
+
EmbeddingsClient,
|
|
7
|
+
LLMProvider,
|
|
8
|
+
LLMSettings,
|
|
9
|
+
RateLimitPolicy,
|
|
10
|
+
RequestPolicy,
|
|
11
|
+
TokenCounter,
|
|
12
|
+
create_provider,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"EmbeddingsClient",
|
|
17
|
+
"ChatClient",
|
|
18
|
+
"TokenCounter",
|
|
19
|
+
"LLMProvider",
|
|
20
|
+
"LLMSettings",
|
|
21
|
+
"RequestPolicy",
|
|
22
|
+
"RateLimitPolicy",
|
|
23
|
+
"EmbeddingPolicy",
|
|
24
|
+
"create_provider",
|
|
25
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Re-export core interfaces for convenience
|
|
2
|
+
|
|
3
|
+
from .factory import create_provider
|
|
4
|
+
from .settings import EmbeddingPolicy, LLMSettings, RateLimitPolicy, RequestPolicy
|
|
5
|
+
from .types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"EmbeddingsClient",
|
|
9
|
+
"ChatClient",
|
|
10
|
+
"TokenCounter",
|
|
11
|
+
"LLMProvider",
|
|
12
|
+
"LLMSettings",
|
|
13
|
+
"RequestPolicy",
|
|
14
|
+
"RateLimitPolicy",
|
|
15
|
+
"EmbeddingPolicy",
|
|
16
|
+
"create_provider",
|
|
17
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class LLMError(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TimeoutError(LLMError):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RateLimitedError(LLMError):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidRequestError(LLMError):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AuthError(LLMError):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ServerError(LLMError):
|
|
22
|
+
pass
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from .providers.ollama import OllamaProvider
|
|
6
|
+
from .providers.openai import OpenAIProvider
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from .providers.azure_openai import AzureOpenAIProvider # type: ignore
|
|
10
|
+
except Exception: # pragma: no cover - optional dependency surface
|
|
11
|
+
AzureOpenAIProvider = None # type: ignore
|
|
12
|
+
from .settings import LLMSettings
|
|
13
|
+
from .types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class _NoopEmbeddings(EmbeddingsClient):
|
|
17
|
+
async def embed(self, inputs: list[str]) -> list[list[float]]:
|
|
18
|
+
raise NotImplementedError("Embeddings provider not implemented")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _NoopChat(ChatClient):
|
|
22
|
+
async def chat(self, messages, **kwargs): # type: ignore[no-untyped-def]
|
|
23
|
+
raise NotImplementedError("Chat provider not implemented")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class _NoopTokenizer(TokenCounter):
|
|
27
|
+
def count(self, text: str) -> int: # naive char-count fallback
|
|
28
|
+
return len(text)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class _NoopProvider(LLMProvider):
|
|
32
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
33
|
+
return _NoopEmbeddings()
|
|
34
|
+
|
|
35
|
+
def chat(self) -> ChatClient:
|
|
36
|
+
return _NoopChat()
|
|
37
|
+
|
|
38
|
+
def tokenizer(self) -> TokenCounter:
|
|
39
|
+
return _NoopTokenizer()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _safe_hostname(url: str | None) -> str | None:
|
|
43
|
+
if not url:
|
|
44
|
+
return None
|
|
45
|
+
try:
|
|
46
|
+
host = urlparse(url).hostname
|
|
47
|
+
return host.lower() if host else None
|
|
48
|
+
except Exception:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def create_provider(settings: LLMSettings) -> LLMProvider:
|
|
53
|
+
"""Create a provider by settings.
|
|
54
|
+
|
|
55
|
+
Phase 0: route OpenAI/OpenAI-compatible to OpenAIProvider when available; otherwise return a noop provider.
|
|
56
|
+
Ollama returns a stub provider for now.
|
|
57
|
+
"""
|
|
58
|
+
provider_name = (settings.provider or "").lower()
|
|
59
|
+
base_url = (settings.base_url or "")
|
|
60
|
+
base_host = _safe_hostname(base_url)
|
|
61
|
+
|
|
62
|
+
# Route Azure before generic OpenAI routing
|
|
63
|
+
is_azure = (
|
|
64
|
+
"azure" in provider_name
|
|
65
|
+
or (
|
|
66
|
+
base_host is not None
|
|
67
|
+
and (
|
|
68
|
+
base_host == "openai.azure.com"
|
|
69
|
+
or base_host.endswith(".openai.azure.com")
|
|
70
|
+
or base_host == "cognitiveservices.azure.com"
|
|
71
|
+
or base_host.endswith(".cognitiveservices.azure.com")
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
if is_azure and AzureOpenAIProvider is not None: # type: ignore[truthy-bool]
|
|
76
|
+
try:
|
|
77
|
+
return AzureOpenAIProvider(settings) # type: ignore[misc]
|
|
78
|
+
except Exception:
|
|
79
|
+
return _NoopProvider()
|
|
80
|
+
|
|
81
|
+
if "openai" in provider_name or "openai" in base_url.lower():
|
|
82
|
+
try:
|
|
83
|
+
return OpenAIProvider(settings)
|
|
84
|
+
except Exception:
|
|
85
|
+
return _NoopProvider()
|
|
86
|
+
|
|
87
|
+
if provider_name == "ollama" or (base_host in ("localhost", "127.0.0.1")):
|
|
88
|
+
return OllamaProvider(settings)
|
|
89
|
+
|
|
90
|
+
return _NoopProvider()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from openai import AzureOpenAI # type: ignore
|
|
8
|
+
except Exception: # pragma: no cover - optional dependency surface
|
|
9
|
+
AzureOpenAI = None # type: ignore
|
|
10
|
+
|
|
11
|
+
from ...logging import LoggingConfig
|
|
12
|
+
from ..settings import LLMSettings
|
|
13
|
+
from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
14
|
+
from .openai import OpenAIChat, OpenAIEmbeddings, _OpenAITokenCounter
|
|
15
|
+
|
|
16
|
+
logger = LoggingConfig.get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _host_of(url: str | None) -> str | None:
|
|
20
|
+
if not url:
|
|
21
|
+
return None
|
|
22
|
+
try:
|
|
23
|
+
return urlparse(url).hostname or None
|
|
24
|
+
except Exception:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _validate_azure_settings(settings: LLMSettings) -> None:
|
|
29
|
+
base_url = settings.base_url or ""
|
|
30
|
+
if "/openai/deployments" in base_url:
|
|
31
|
+
raise ValueError(
|
|
32
|
+
"Azure OpenAI base_url must be the resource root (e.g. https://<resource>.openai.azure.com). Do not include /openai/deployments/... in base_url."
|
|
33
|
+
)
|
|
34
|
+
if not (settings.api_version and isinstance(settings.api_version, str)):
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"Azure OpenAI requires api_version (e.g. '2024-05-01-preview') in global.llm.api_version"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class AzureOpenAIProvider(LLMProvider):
|
|
41
|
+
def __init__(self, settings: LLMSettings):
|
|
42
|
+
self._settings = settings
|
|
43
|
+
_validate_azure_settings(settings)
|
|
44
|
+
|
|
45
|
+
self._base_host = _host_of(settings.base_url)
|
|
46
|
+
if AzureOpenAI is None:
|
|
47
|
+
self._client = None
|
|
48
|
+
else:
|
|
49
|
+
# Prefer explicit azure_endpoint in provider_options; fallback to base_url
|
|
50
|
+
provider_opts = settings.provider_options or {}
|
|
51
|
+
endpoint = provider_opts.get("azure_endpoint") or settings.base_url
|
|
52
|
+
kwargs: dict[str, Any] = {
|
|
53
|
+
"api_key": settings.api_key,
|
|
54
|
+
"api_version": settings.api_version,
|
|
55
|
+
}
|
|
56
|
+
if endpoint:
|
|
57
|
+
kwargs["azure_endpoint"] = endpoint
|
|
58
|
+
self._client = AzureOpenAI(**{k: v for k, v in kwargs.items() if v is not None})
|
|
59
|
+
|
|
60
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
61
|
+
model = self._settings.models.get("embeddings", "")
|
|
62
|
+
return OpenAIEmbeddings(self._client, model, self._base_host, provider_label="azure_openai")
|
|
63
|
+
|
|
64
|
+
def chat(self) -> ChatClient:
|
|
65
|
+
model = self._settings.models.get("chat", "")
|
|
66
|
+
return OpenAIChat(self._client, model, self._base_host, provider_label="azure_openai")
|
|
67
|
+
|
|
68
|
+
def tokenizer(self) -> TokenCounter:
|
|
69
|
+
return _OpenAITokenCounter(self._settings.tokenizer)
|
|
70
|
+
|
|
71
|
+
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import httpx # type: ignore
|
|
7
|
+
except Exception: # pragma: no cover - optional dependency
|
|
8
|
+
httpx = None # type: ignore
|
|
9
|
+
|
|
10
|
+
from ...logging import LoggingConfig
|
|
11
|
+
from ..errors import (
|
|
12
|
+
AuthError,
|
|
13
|
+
InvalidRequestError,
|
|
14
|
+
RateLimitedError,
|
|
15
|
+
ServerError,
|
|
16
|
+
)
|
|
17
|
+
from ..errors import (
|
|
18
|
+
TimeoutError as LLMTimeoutError,
|
|
19
|
+
)
|
|
20
|
+
from ..settings import LLMSettings
|
|
21
|
+
from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
22
|
+
|
|
23
|
+
logger = LoggingConfig.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _join_url(base: str | None, path: str) -> str:
|
|
27
|
+
base = (base or "").rstrip("/")
|
|
28
|
+
path = path.lstrip("/")
|
|
29
|
+
return f"{base}/{path}" if base else f"/{path}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class OllamaEmbeddings(EmbeddingsClient):
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
base_url: str | None,
|
|
36
|
+
model: str,
|
|
37
|
+
headers: dict[str, str] | None,
|
|
38
|
+
*,
|
|
39
|
+
timeout_s: float | None = None,
|
|
40
|
+
provider_options: dict[str, Any] | None = None,
|
|
41
|
+
):
|
|
42
|
+
self._base_url = (base_url or "http://localhost:11434").rstrip("/")
|
|
43
|
+
self._model = model
|
|
44
|
+
self._headers = headers or {}
|
|
45
|
+
self._timeout_s = float(timeout_s) if timeout_s is not None else 30.0
|
|
46
|
+
self._provider_options = provider_options or {}
|
|
47
|
+
|
|
48
|
+
async def embed(self, inputs: list[str]) -> list[list[float]]:
|
|
49
|
+
if httpx is None:
|
|
50
|
+
raise NotImplementedError("httpx not available for Ollama embeddings")
|
|
51
|
+
|
|
52
|
+
# Prefer OpenAI-compatible if base_url seems to expose /v1
|
|
53
|
+
use_v1 = "/v1" in (self._base_url or "")
|
|
54
|
+
async with httpx.AsyncClient(timeout=self._timeout_s) as client:
|
|
55
|
+
try:
|
|
56
|
+
if use_v1:
|
|
57
|
+
# OpenAI-compatible embeddings endpoint
|
|
58
|
+
url = _join_url(self._base_url, "/embeddings")
|
|
59
|
+
payload = {"model": self._model, "input": inputs}
|
|
60
|
+
resp = await client.post(url, json=payload, headers=self._headers)
|
|
61
|
+
resp.raise_for_status()
|
|
62
|
+
data = resp.json()
|
|
63
|
+
logger.info(
|
|
64
|
+
"LLM request",
|
|
65
|
+
provider="ollama",
|
|
66
|
+
operation="embeddings",
|
|
67
|
+
model=self._model,
|
|
68
|
+
base_host=self._base_url,
|
|
69
|
+
inputs=len(inputs),
|
|
70
|
+
# latency for v1 path hard to compute here; omitted for now
|
|
71
|
+
)
|
|
72
|
+
return [item["embedding"] for item in data.get("data", [])]
|
|
73
|
+
else:
|
|
74
|
+
# Determine native endpoint preference: embed | embeddings | auto (default)
|
|
75
|
+
native_pref = str(self._provider_options.get("native_endpoint", "auto")).lower()
|
|
76
|
+
prefer_embed = native_pref != "embeddings"
|
|
77
|
+
|
|
78
|
+
# Try batch embed first when preferred
|
|
79
|
+
if prefer_embed:
|
|
80
|
+
url = _join_url(self._base_url, "/api/embed")
|
|
81
|
+
payload = {"model": self._model, "input": inputs}
|
|
82
|
+
try:
|
|
83
|
+
resp = await client.post(
|
|
84
|
+
url, json=payload, headers=self._headers
|
|
85
|
+
)
|
|
86
|
+
resp.raise_for_status()
|
|
87
|
+
data = resp.json()
|
|
88
|
+
vectors = data.get("embeddings")
|
|
89
|
+
if not isinstance(vectors, list) or (
|
|
90
|
+
len(vectors) != len(inputs)
|
|
91
|
+
):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
"Invalid embeddings response from /api/embed"
|
|
94
|
+
)
|
|
95
|
+
# Normalize to list[list[float]]
|
|
96
|
+
norm = [list(vec) for vec in vectors]
|
|
97
|
+
logger.info(
|
|
98
|
+
"LLM request",
|
|
99
|
+
provider="ollama",
|
|
100
|
+
operation="embeddings",
|
|
101
|
+
model=self._model,
|
|
102
|
+
base_host=self._base_url,
|
|
103
|
+
inputs=len(inputs),
|
|
104
|
+
# latency for native batch path not measured in this stub
|
|
105
|
+
)
|
|
106
|
+
return norm
|
|
107
|
+
except httpx.HTTPStatusError as exc:
|
|
108
|
+
status = exc.response.status_code if exc.response else None
|
|
109
|
+
# Fallback for servers that don't support /api/embed
|
|
110
|
+
if status not in (404, 405, 501):
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
# Per-item embeddings endpoint fallback or preference
|
|
114
|
+
url = _join_url(self._base_url, "/api/embeddings")
|
|
115
|
+
vectors2: list[list[float]] = []
|
|
116
|
+
for text in inputs:
|
|
117
|
+
payload = {"model": self._model, "input": text}
|
|
118
|
+
resp = await client.post(
|
|
119
|
+
url, json=payload, headers=self._headers
|
|
120
|
+
)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
emb = data.get("embedding")
|
|
124
|
+
if emb is None and isinstance(data.get("data"), dict):
|
|
125
|
+
emb = data["data"].get("embedding")
|
|
126
|
+
if emb is None:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
"Invalid embedding response from /api/embeddings"
|
|
129
|
+
)
|
|
130
|
+
vectors2.append(list(emb))
|
|
131
|
+
logger.info(
|
|
132
|
+
"LLM request",
|
|
133
|
+
provider="ollama",
|
|
134
|
+
operation="embeddings",
|
|
135
|
+
model=self._model,
|
|
136
|
+
base_host=self._base_url,
|
|
137
|
+
inputs=len(inputs),
|
|
138
|
+
# latency for per-item path not measured in this stub
|
|
139
|
+
)
|
|
140
|
+
return vectors2
|
|
141
|
+
except httpx.TimeoutException as exc:
|
|
142
|
+
raise LLMTimeoutError(str(exc))
|
|
143
|
+
except httpx.HTTPStatusError as exc:
|
|
144
|
+
status = exc.response.status_code if exc.response else None
|
|
145
|
+
if status == 401:
|
|
146
|
+
raise AuthError(str(exc))
|
|
147
|
+
if status == 429:
|
|
148
|
+
raise RateLimitedError(str(exc))
|
|
149
|
+
if status and 400 <= status < 500:
|
|
150
|
+
raise InvalidRequestError(str(exc))
|
|
151
|
+
raise ServerError(str(exc))
|
|
152
|
+
except httpx.HTTPError as exc:
|
|
153
|
+
raise ServerError(str(exc))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class OllamaChat(ChatClient):
|
|
157
|
+
def __init__(self, base_url: str | None, model: str, headers: dict[str, str] | None):
|
|
158
|
+
self._base_url = base_url or "http://localhost:11434"
|
|
159
|
+
self._model = model
|
|
160
|
+
self._headers = headers or {}
|
|
161
|
+
|
|
162
|
+
async def chat(self, messages: list[dict[str, Any]], **kwargs: Any) -> dict[str, Any]:
|
|
163
|
+
if httpx is None:
|
|
164
|
+
raise NotImplementedError("httpx not available for Ollama chat")
|
|
165
|
+
|
|
166
|
+
# Prefer OpenAI-compatible if base_url exposes /v1
|
|
167
|
+
use_v1 = "/v1" in (self._base_url or "")
|
|
168
|
+
# Flatten messages to a single prompt for native API; preserve roles when possible
|
|
169
|
+
if use_v1:
|
|
170
|
+
url = _join_url(self._base_url, "/chat/completions")
|
|
171
|
+
payload = {"model": self._model, "messages": messages}
|
|
172
|
+
# Map common kwargs
|
|
173
|
+
for k in ("temperature", "max_tokens", "top_p", "stop"):
|
|
174
|
+
if k in kwargs and kwargs[k] is not None:
|
|
175
|
+
payload[k] = kwargs[k]
|
|
176
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
177
|
+
try:
|
|
178
|
+
from datetime import datetime
|
|
179
|
+
started = datetime.utcnow()
|
|
180
|
+
resp = await client.post(url, json=payload, headers=self._headers)
|
|
181
|
+
resp.raise_for_status()
|
|
182
|
+
data = resp.json()
|
|
183
|
+
text = ""
|
|
184
|
+
choices = data.get("choices") or []
|
|
185
|
+
if choices:
|
|
186
|
+
msg = (choices[0] or {}).get("message") or {}
|
|
187
|
+
text = msg.get("content", "") or ""
|
|
188
|
+
duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
|
|
189
|
+
logger.info(
|
|
190
|
+
"LLM request",
|
|
191
|
+
provider="ollama",
|
|
192
|
+
operation="chat",
|
|
193
|
+
model=self._model,
|
|
194
|
+
base_host=self._base_url,
|
|
195
|
+
messages=len(messages),
|
|
196
|
+
latency_ms=duration_ms,
|
|
197
|
+
)
|
|
198
|
+
return {"text": text, "raw": data, "usage": data.get("usage"), "model": data.get("model", self._model)}
|
|
199
|
+
except httpx.TimeoutException as exc:
|
|
200
|
+
raise LLMTimeoutError(str(exc))
|
|
201
|
+
except httpx.HTTPStatusError as exc:
|
|
202
|
+
status = exc.response.status_code if exc.response else None
|
|
203
|
+
if status == 401:
|
|
204
|
+
raise AuthError(str(exc))
|
|
205
|
+
if status == 429:
|
|
206
|
+
raise RateLimitedError(str(exc))
|
|
207
|
+
if status and 400 <= status < 500:
|
|
208
|
+
raise InvalidRequestError(str(exc))
|
|
209
|
+
raise ServerError(str(exc))
|
|
210
|
+
except httpx.HTTPError as exc:
|
|
211
|
+
raise ServerError(str(exc))
|
|
212
|
+
else:
|
|
213
|
+
# Native API
|
|
214
|
+
url = _join_url(self._base_url, "/api/chat")
|
|
215
|
+
payload = {
|
|
216
|
+
"model": self._model,
|
|
217
|
+
"messages": messages,
|
|
218
|
+
"stream": False,
|
|
219
|
+
}
|
|
220
|
+
if "temperature" in kwargs and kwargs["temperature"] is not None:
|
|
221
|
+
payload["options"] = {"temperature": kwargs["temperature"]}
|
|
222
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
223
|
+
try:
|
|
224
|
+
from datetime import datetime
|
|
225
|
+
started = datetime.utcnow()
|
|
226
|
+
resp = await client.post(url, json=payload, headers=self._headers)
|
|
227
|
+
resp.raise_for_status()
|
|
228
|
+
data = resp.json()
|
|
229
|
+
# Ollama native returns {"message": {"content": "..."}, ...}
|
|
230
|
+
text = ""
|
|
231
|
+
if isinstance(data.get("message"), dict):
|
|
232
|
+
text = data["message"].get("content", "") or ""
|
|
233
|
+
duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
|
|
234
|
+
logger.info(
|
|
235
|
+
"LLM request",
|
|
236
|
+
provider="ollama",
|
|
237
|
+
operation="chat",
|
|
238
|
+
model=self._model,
|
|
239
|
+
base_host=self._base_url,
|
|
240
|
+
messages=len(messages),
|
|
241
|
+
latency_ms=duration_ms,
|
|
242
|
+
)
|
|
243
|
+
return {"text": text, "raw": data, "usage": None, "model": self._model}
|
|
244
|
+
except httpx.TimeoutException as exc:
|
|
245
|
+
raise LLMTimeoutError(str(exc))
|
|
246
|
+
except httpx.HTTPStatusError as exc:
|
|
247
|
+
status = exc.response.status_code if exc.response else None
|
|
248
|
+
if status == 401:
|
|
249
|
+
raise AuthError(str(exc))
|
|
250
|
+
if status == 429:
|
|
251
|
+
raise RateLimitedError(str(exc))
|
|
252
|
+
if status and 400 <= status < 500:
|
|
253
|
+
raise InvalidRequestError(str(exc))
|
|
254
|
+
raise ServerError(str(exc))
|
|
255
|
+
except httpx.HTTPError as exc:
|
|
256
|
+
raise ServerError(str(exc))
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class OllamaTokenizer(TokenCounter):
|
|
260
|
+
def count(self, text: str) -> int:
|
|
261
|
+
return len(text)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class OllamaProvider(LLMProvider):
|
|
265
|
+
def __init__(self, settings: LLMSettings):
|
|
266
|
+
self._settings = settings
|
|
267
|
+
|
|
268
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
269
|
+
model = self._settings.models.get("embeddings", "")
|
|
270
|
+
timeout = (self._settings.request.timeout_s if self._settings and self._settings.request else 30.0)
|
|
271
|
+
return OllamaEmbeddings(
|
|
272
|
+
self._settings.base_url,
|
|
273
|
+
model,
|
|
274
|
+
self._settings.headers,
|
|
275
|
+
timeout_s=timeout,
|
|
276
|
+
provider_options=self._settings.provider_options,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
def chat(self) -> ChatClient:
|
|
280
|
+
model = self._settings.models.get("chat", "")
|
|
281
|
+
return OllamaChat(self._settings.base_url, model, self._settings.headers)
|
|
282
|
+
|
|
283
|
+
def tokenizer(self) -> TokenCounter:
|
|
284
|
+
return OllamaTokenizer()
|