qdrant-loader-core 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qdrant_loader_core/__init__.py +25 -0
- qdrant_loader_core/llm/__init__.py +17 -0
- qdrant_loader_core/llm/errors.py +22 -0
- qdrant_loader_core/llm/factory.py +90 -0
- qdrant_loader_core/llm/providers/__init__.py +8 -0
- qdrant_loader_core/llm/providers/azure_openai.py +71 -0
- qdrant_loader_core/llm/providers/ollama.py +284 -0
- qdrant_loader_core/llm/providers/openai.py +257 -0
- qdrant_loader_core/llm/ratelimit.py +20 -0
- qdrant_loader_core/llm/settings.py +133 -0
- qdrant_loader_core/llm/tokenization.py +38 -0
- qdrant_loader_core/llm/types.py +29 -0
- qdrant_loader_core/logging.py +279 -0
- qdrant_loader_core-0.7.1.dist-info/METADATA +34 -0
- qdrant_loader_core-0.7.1.dist-info/RECORD +17 -0
- qdrant_loader_core-0.7.1.dist-info/WHEEL +5 -0
- qdrant_loader_core-0.7.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# qdrant-loader-core package root
|
|
2
|
+
|
|
3
|
+
from .llm import (
|
|
4
|
+
ChatClient,
|
|
5
|
+
EmbeddingPolicy,
|
|
6
|
+
EmbeddingsClient,
|
|
7
|
+
LLMProvider,
|
|
8
|
+
LLMSettings,
|
|
9
|
+
RateLimitPolicy,
|
|
10
|
+
RequestPolicy,
|
|
11
|
+
TokenCounter,
|
|
12
|
+
create_provider,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"EmbeddingsClient",
|
|
17
|
+
"ChatClient",
|
|
18
|
+
"TokenCounter",
|
|
19
|
+
"LLMProvider",
|
|
20
|
+
"LLMSettings",
|
|
21
|
+
"RequestPolicy",
|
|
22
|
+
"RateLimitPolicy",
|
|
23
|
+
"EmbeddingPolicy",
|
|
24
|
+
"create_provider",
|
|
25
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Re-export core interfaces for convenience
|
|
2
|
+
|
|
3
|
+
from .factory import create_provider
|
|
4
|
+
from .settings import EmbeddingPolicy, LLMSettings, RateLimitPolicy, RequestPolicy
|
|
5
|
+
from .types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"EmbeddingsClient",
|
|
9
|
+
"ChatClient",
|
|
10
|
+
"TokenCounter",
|
|
11
|
+
"LLMProvider",
|
|
12
|
+
"LLMSettings",
|
|
13
|
+
"RequestPolicy",
|
|
14
|
+
"RateLimitPolicy",
|
|
15
|
+
"EmbeddingPolicy",
|
|
16
|
+
"create_provider",
|
|
17
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class LLMError(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TimeoutError(LLMError):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RateLimitedError(LLMError):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidRequestError(LLMError):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AuthError(LLMError):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ServerError(LLMError):
|
|
22
|
+
pass
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from .providers.ollama import OllamaProvider
|
|
6
|
+
from .providers.openai import OpenAIProvider
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from .providers.azure_openai import AzureOpenAIProvider # type: ignore
|
|
10
|
+
except Exception: # pragma: no cover - optional dependency surface
|
|
11
|
+
AzureOpenAIProvider = None # type: ignore
|
|
12
|
+
from .settings import LLMSettings
|
|
13
|
+
from .types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class _NoopEmbeddings(EmbeddingsClient):
|
|
17
|
+
async def embed(self, inputs: list[str]) -> list[list[float]]:
|
|
18
|
+
raise NotImplementedError("Embeddings provider not implemented")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _NoopChat(ChatClient):
|
|
22
|
+
async def chat(self, messages, **kwargs): # type: ignore[no-untyped-def]
|
|
23
|
+
raise NotImplementedError("Chat provider not implemented")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class _NoopTokenizer(TokenCounter):
|
|
27
|
+
def count(self, text: str) -> int: # naive char-count fallback
|
|
28
|
+
return len(text)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class _NoopProvider(LLMProvider):
|
|
32
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
33
|
+
return _NoopEmbeddings()
|
|
34
|
+
|
|
35
|
+
def chat(self) -> ChatClient:
|
|
36
|
+
return _NoopChat()
|
|
37
|
+
|
|
38
|
+
def tokenizer(self) -> TokenCounter:
|
|
39
|
+
return _NoopTokenizer()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _safe_hostname(url: str | None) -> str | None:
|
|
43
|
+
if not url:
|
|
44
|
+
return None
|
|
45
|
+
try:
|
|
46
|
+
host = urlparse(url).hostname
|
|
47
|
+
return host.lower() if host else None
|
|
48
|
+
except Exception:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def create_provider(settings: LLMSettings) -> LLMProvider:
|
|
53
|
+
"""Create a provider by settings.
|
|
54
|
+
|
|
55
|
+
Phase 0: route OpenAI/OpenAI-compatible to OpenAIProvider when available; otherwise return a noop provider.
|
|
56
|
+
Ollama returns a stub provider for now.
|
|
57
|
+
"""
|
|
58
|
+
provider_name = (settings.provider or "").lower()
|
|
59
|
+
base_url = (settings.base_url or "")
|
|
60
|
+
base_host = _safe_hostname(base_url)
|
|
61
|
+
|
|
62
|
+
# Route Azure before generic OpenAI routing
|
|
63
|
+
is_azure = (
|
|
64
|
+
"azure" in provider_name
|
|
65
|
+
or (
|
|
66
|
+
base_host is not None
|
|
67
|
+
and (
|
|
68
|
+
base_host == "openai.azure.com"
|
|
69
|
+
or base_host.endswith(".openai.azure.com")
|
|
70
|
+
or base_host == "cognitiveservices.azure.com"
|
|
71
|
+
or base_host.endswith(".cognitiveservices.azure.com")
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
if is_azure and AzureOpenAIProvider is not None: # type: ignore[truthy-bool]
|
|
76
|
+
try:
|
|
77
|
+
return AzureOpenAIProvider(settings) # type: ignore[misc]
|
|
78
|
+
except Exception:
|
|
79
|
+
return _NoopProvider()
|
|
80
|
+
|
|
81
|
+
if "openai" in provider_name or "openai" in base_url.lower():
|
|
82
|
+
try:
|
|
83
|
+
return OpenAIProvider(settings)
|
|
84
|
+
except Exception:
|
|
85
|
+
return _NoopProvider()
|
|
86
|
+
|
|
87
|
+
if provider_name == "ollama" or (base_host in ("localhost", "127.0.0.1")):
|
|
88
|
+
return OllamaProvider(settings)
|
|
89
|
+
|
|
90
|
+
return _NoopProvider()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from openai import AzureOpenAI # type: ignore
|
|
8
|
+
except Exception: # pragma: no cover - optional dependency surface
|
|
9
|
+
AzureOpenAI = None # type: ignore
|
|
10
|
+
|
|
11
|
+
from ...logging import LoggingConfig
|
|
12
|
+
from ..settings import LLMSettings
|
|
13
|
+
from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
14
|
+
from .openai import OpenAIChat, OpenAIEmbeddings, _OpenAITokenCounter
|
|
15
|
+
|
|
16
|
+
logger = LoggingConfig.get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _host_of(url: str | None) -> str | None:
|
|
20
|
+
if not url:
|
|
21
|
+
return None
|
|
22
|
+
try:
|
|
23
|
+
return urlparse(url).hostname or None
|
|
24
|
+
except Exception:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _validate_azure_settings(settings: LLMSettings) -> None:
|
|
29
|
+
base_url = settings.base_url or ""
|
|
30
|
+
if "/openai/deployments" in base_url:
|
|
31
|
+
raise ValueError(
|
|
32
|
+
"Azure OpenAI base_url must be the resource root (e.g. https://<resource>.openai.azure.com). Do not include /openai/deployments/... in base_url."
|
|
33
|
+
)
|
|
34
|
+
if not (settings.api_version and isinstance(settings.api_version, str)):
|
|
35
|
+
raise ValueError(
|
|
36
|
+
"Azure OpenAI requires api_version (e.g. '2024-05-01-preview') in global.llm.api_version"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class AzureOpenAIProvider(LLMProvider):
|
|
41
|
+
def __init__(self, settings: LLMSettings):
|
|
42
|
+
self._settings = settings
|
|
43
|
+
_validate_azure_settings(settings)
|
|
44
|
+
|
|
45
|
+
self._base_host = _host_of(settings.base_url)
|
|
46
|
+
if AzureOpenAI is None:
|
|
47
|
+
self._client = None
|
|
48
|
+
else:
|
|
49
|
+
# Prefer explicit azure_endpoint in provider_options; fallback to base_url
|
|
50
|
+
provider_opts = settings.provider_options or {}
|
|
51
|
+
endpoint = provider_opts.get("azure_endpoint") or settings.base_url
|
|
52
|
+
kwargs: dict[str, Any] = {
|
|
53
|
+
"api_key": settings.api_key,
|
|
54
|
+
"api_version": settings.api_version,
|
|
55
|
+
}
|
|
56
|
+
if endpoint:
|
|
57
|
+
kwargs["azure_endpoint"] = endpoint
|
|
58
|
+
self._client = AzureOpenAI(**{k: v for k, v in kwargs.items() if v is not None})
|
|
59
|
+
|
|
60
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
61
|
+
model = self._settings.models.get("embeddings", "")
|
|
62
|
+
return OpenAIEmbeddings(self._client, model, self._base_host, provider_label="azure_openai")
|
|
63
|
+
|
|
64
|
+
def chat(self) -> ChatClient:
|
|
65
|
+
model = self._settings.models.get("chat", "")
|
|
66
|
+
return OpenAIChat(self._client, model, self._base_host, provider_label="azure_openai")
|
|
67
|
+
|
|
68
|
+
def tokenizer(self) -> TokenCounter:
|
|
69
|
+
return _OpenAITokenCounter(self._settings.tokenizer)
|
|
70
|
+
|
|
71
|
+
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import httpx # type: ignore
|
|
7
|
+
except Exception: # pragma: no cover - optional dependency
|
|
8
|
+
httpx = None # type: ignore
|
|
9
|
+
|
|
10
|
+
from ...logging import LoggingConfig
|
|
11
|
+
from ..errors import (
|
|
12
|
+
AuthError,
|
|
13
|
+
InvalidRequestError,
|
|
14
|
+
RateLimitedError,
|
|
15
|
+
ServerError,
|
|
16
|
+
)
|
|
17
|
+
from ..errors import (
|
|
18
|
+
TimeoutError as LLMTimeoutError,
|
|
19
|
+
)
|
|
20
|
+
from ..settings import LLMSettings
|
|
21
|
+
from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
22
|
+
|
|
23
|
+
logger = LoggingConfig.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _join_url(base: str | None, path: str) -> str:
|
|
27
|
+
base = (base or "").rstrip("/")
|
|
28
|
+
path = path.lstrip("/")
|
|
29
|
+
return f"{base}/{path}" if base else f"/{path}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class OllamaEmbeddings(EmbeddingsClient):
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
base_url: str | None,
|
|
36
|
+
model: str,
|
|
37
|
+
headers: dict[str, str] | None,
|
|
38
|
+
*,
|
|
39
|
+
timeout_s: float | None = None,
|
|
40
|
+
provider_options: dict[str, Any] | None = None,
|
|
41
|
+
):
|
|
42
|
+
self._base_url = (base_url or "http://localhost:11434").rstrip("/")
|
|
43
|
+
self._model = model
|
|
44
|
+
self._headers = headers or {}
|
|
45
|
+
self._timeout_s = float(timeout_s) if timeout_s is not None else 30.0
|
|
46
|
+
self._provider_options = provider_options or {}
|
|
47
|
+
|
|
48
|
+
async def embed(self, inputs: list[str]) -> list[list[float]]:
|
|
49
|
+
if httpx is None:
|
|
50
|
+
raise NotImplementedError("httpx not available for Ollama embeddings")
|
|
51
|
+
|
|
52
|
+
# Prefer OpenAI-compatible if base_url seems to expose /v1
|
|
53
|
+
use_v1 = "/v1" in (self._base_url or "")
|
|
54
|
+
async with httpx.AsyncClient(timeout=self._timeout_s) as client:
|
|
55
|
+
try:
|
|
56
|
+
if use_v1:
|
|
57
|
+
# OpenAI-compatible embeddings endpoint
|
|
58
|
+
url = _join_url(self._base_url, "/embeddings")
|
|
59
|
+
payload = {"model": self._model, "input": inputs}
|
|
60
|
+
resp = await client.post(url, json=payload, headers=self._headers)
|
|
61
|
+
resp.raise_for_status()
|
|
62
|
+
data = resp.json()
|
|
63
|
+
logger.info(
|
|
64
|
+
"LLM request",
|
|
65
|
+
provider="ollama",
|
|
66
|
+
operation="embeddings",
|
|
67
|
+
model=self._model,
|
|
68
|
+
base_host=self._base_url,
|
|
69
|
+
inputs=len(inputs),
|
|
70
|
+
# latency for v1 path hard to compute here; omitted for now
|
|
71
|
+
)
|
|
72
|
+
return [item["embedding"] for item in data.get("data", [])]
|
|
73
|
+
else:
|
|
74
|
+
# Determine native endpoint preference: embed | embeddings | auto (default)
|
|
75
|
+
native_pref = str(self._provider_options.get("native_endpoint", "auto")).lower()
|
|
76
|
+
prefer_embed = native_pref != "embeddings"
|
|
77
|
+
|
|
78
|
+
# Try batch embed first when preferred
|
|
79
|
+
if prefer_embed:
|
|
80
|
+
url = _join_url(self._base_url, "/api/embed")
|
|
81
|
+
payload = {"model": self._model, "input": inputs}
|
|
82
|
+
try:
|
|
83
|
+
resp = await client.post(
|
|
84
|
+
url, json=payload, headers=self._headers
|
|
85
|
+
)
|
|
86
|
+
resp.raise_for_status()
|
|
87
|
+
data = resp.json()
|
|
88
|
+
vectors = data.get("embeddings")
|
|
89
|
+
if not isinstance(vectors, list) or (
|
|
90
|
+
len(vectors) != len(inputs)
|
|
91
|
+
):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
"Invalid embeddings response from /api/embed"
|
|
94
|
+
)
|
|
95
|
+
# Normalize to list[list[float]]
|
|
96
|
+
norm = [list(vec) for vec in vectors]
|
|
97
|
+
logger.info(
|
|
98
|
+
"LLM request",
|
|
99
|
+
provider="ollama",
|
|
100
|
+
operation="embeddings",
|
|
101
|
+
model=self._model,
|
|
102
|
+
base_host=self._base_url,
|
|
103
|
+
inputs=len(inputs),
|
|
104
|
+
# latency for native batch path not measured in this stub
|
|
105
|
+
)
|
|
106
|
+
return norm
|
|
107
|
+
except httpx.HTTPStatusError as exc:
|
|
108
|
+
status = exc.response.status_code if exc.response else None
|
|
109
|
+
# Fallback for servers that don't support /api/embed
|
|
110
|
+
if status not in (404, 405, 501):
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
# Per-item embeddings endpoint fallback or preference
|
|
114
|
+
url = _join_url(self._base_url, "/api/embeddings")
|
|
115
|
+
vectors2: list[list[float]] = []
|
|
116
|
+
for text in inputs:
|
|
117
|
+
payload = {"model": self._model, "input": text}
|
|
118
|
+
resp = await client.post(
|
|
119
|
+
url, json=payload, headers=self._headers
|
|
120
|
+
)
|
|
121
|
+
resp.raise_for_status()
|
|
122
|
+
data = resp.json()
|
|
123
|
+
emb = data.get("embedding")
|
|
124
|
+
if emb is None and isinstance(data.get("data"), dict):
|
|
125
|
+
emb = data["data"].get("embedding")
|
|
126
|
+
if emb is None:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
"Invalid embedding response from /api/embeddings"
|
|
129
|
+
)
|
|
130
|
+
vectors2.append(list(emb))
|
|
131
|
+
logger.info(
|
|
132
|
+
"LLM request",
|
|
133
|
+
provider="ollama",
|
|
134
|
+
operation="embeddings",
|
|
135
|
+
model=self._model,
|
|
136
|
+
base_host=self._base_url,
|
|
137
|
+
inputs=len(inputs),
|
|
138
|
+
# latency for per-item path not measured in this stub
|
|
139
|
+
)
|
|
140
|
+
return vectors2
|
|
141
|
+
except httpx.TimeoutException as exc:
|
|
142
|
+
raise LLMTimeoutError(str(exc))
|
|
143
|
+
except httpx.HTTPStatusError as exc:
|
|
144
|
+
status = exc.response.status_code if exc.response else None
|
|
145
|
+
if status == 401:
|
|
146
|
+
raise AuthError(str(exc))
|
|
147
|
+
if status == 429:
|
|
148
|
+
raise RateLimitedError(str(exc))
|
|
149
|
+
if status and 400 <= status < 500:
|
|
150
|
+
raise InvalidRequestError(str(exc))
|
|
151
|
+
raise ServerError(str(exc))
|
|
152
|
+
except httpx.HTTPError as exc:
|
|
153
|
+
raise ServerError(str(exc))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class OllamaChat(ChatClient):
|
|
157
|
+
def __init__(self, base_url: str | None, model: str, headers: dict[str, str] | None):
|
|
158
|
+
self._base_url = base_url or "http://localhost:11434"
|
|
159
|
+
self._model = model
|
|
160
|
+
self._headers = headers or {}
|
|
161
|
+
|
|
162
|
+
async def chat(self, messages: list[dict[str, Any]], **kwargs: Any) -> dict[str, Any]:
|
|
163
|
+
if httpx is None:
|
|
164
|
+
raise NotImplementedError("httpx not available for Ollama chat")
|
|
165
|
+
|
|
166
|
+
# Prefer OpenAI-compatible if base_url exposes /v1
|
|
167
|
+
use_v1 = "/v1" in (self._base_url or "")
|
|
168
|
+
# Flatten messages to a single prompt for native API; preserve roles when possible
|
|
169
|
+
if use_v1:
|
|
170
|
+
url = _join_url(self._base_url, "/chat/completions")
|
|
171
|
+
payload = {"model": self._model, "messages": messages}
|
|
172
|
+
# Map common kwargs
|
|
173
|
+
for k in ("temperature", "max_tokens", "top_p", "stop"):
|
|
174
|
+
if k in kwargs and kwargs[k] is not None:
|
|
175
|
+
payload[k] = kwargs[k]
|
|
176
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
177
|
+
try:
|
|
178
|
+
from datetime import datetime
|
|
179
|
+
started = datetime.utcnow()
|
|
180
|
+
resp = await client.post(url, json=payload, headers=self._headers)
|
|
181
|
+
resp.raise_for_status()
|
|
182
|
+
data = resp.json()
|
|
183
|
+
text = ""
|
|
184
|
+
choices = data.get("choices") or []
|
|
185
|
+
if choices:
|
|
186
|
+
msg = (choices[0] or {}).get("message") or {}
|
|
187
|
+
text = msg.get("content", "") or ""
|
|
188
|
+
duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
|
|
189
|
+
logger.info(
|
|
190
|
+
"LLM request",
|
|
191
|
+
provider="ollama",
|
|
192
|
+
operation="chat",
|
|
193
|
+
model=self._model,
|
|
194
|
+
base_host=self._base_url,
|
|
195
|
+
messages=len(messages),
|
|
196
|
+
latency_ms=duration_ms,
|
|
197
|
+
)
|
|
198
|
+
return {"text": text, "raw": data, "usage": data.get("usage"), "model": data.get("model", self._model)}
|
|
199
|
+
except httpx.TimeoutException as exc:
|
|
200
|
+
raise LLMTimeoutError(str(exc))
|
|
201
|
+
except httpx.HTTPStatusError as exc:
|
|
202
|
+
status = exc.response.status_code if exc.response else None
|
|
203
|
+
if status == 401:
|
|
204
|
+
raise AuthError(str(exc))
|
|
205
|
+
if status == 429:
|
|
206
|
+
raise RateLimitedError(str(exc))
|
|
207
|
+
if status and 400 <= status < 500:
|
|
208
|
+
raise InvalidRequestError(str(exc))
|
|
209
|
+
raise ServerError(str(exc))
|
|
210
|
+
except httpx.HTTPError as exc:
|
|
211
|
+
raise ServerError(str(exc))
|
|
212
|
+
else:
|
|
213
|
+
# Native API
|
|
214
|
+
url = _join_url(self._base_url, "/api/chat")
|
|
215
|
+
payload = {
|
|
216
|
+
"model": self._model,
|
|
217
|
+
"messages": messages,
|
|
218
|
+
"stream": False,
|
|
219
|
+
}
|
|
220
|
+
if "temperature" in kwargs and kwargs["temperature"] is not None:
|
|
221
|
+
payload["options"] = {"temperature": kwargs["temperature"]}
|
|
222
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
223
|
+
try:
|
|
224
|
+
from datetime import datetime
|
|
225
|
+
started = datetime.utcnow()
|
|
226
|
+
resp = await client.post(url, json=payload, headers=self._headers)
|
|
227
|
+
resp.raise_for_status()
|
|
228
|
+
data = resp.json()
|
|
229
|
+
# Ollama native returns {"message": {"content": "..."}, ...}
|
|
230
|
+
text = ""
|
|
231
|
+
if isinstance(data.get("message"), dict):
|
|
232
|
+
text = data["message"].get("content", "") or ""
|
|
233
|
+
duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
|
|
234
|
+
logger.info(
|
|
235
|
+
"LLM request",
|
|
236
|
+
provider="ollama",
|
|
237
|
+
operation="chat",
|
|
238
|
+
model=self._model,
|
|
239
|
+
base_host=self._base_url,
|
|
240
|
+
messages=len(messages),
|
|
241
|
+
latency_ms=duration_ms,
|
|
242
|
+
)
|
|
243
|
+
return {"text": text, "raw": data, "usage": None, "model": self._model}
|
|
244
|
+
except httpx.TimeoutException as exc:
|
|
245
|
+
raise LLMTimeoutError(str(exc))
|
|
246
|
+
except httpx.HTTPStatusError as exc:
|
|
247
|
+
status = exc.response.status_code if exc.response else None
|
|
248
|
+
if status == 401:
|
|
249
|
+
raise AuthError(str(exc))
|
|
250
|
+
if status == 429:
|
|
251
|
+
raise RateLimitedError(str(exc))
|
|
252
|
+
if status and 400 <= status < 500:
|
|
253
|
+
raise InvalidRequestError(str(exc))
|
|
254
|
+
raise ServerError(str(exc))
|
|
255
|
+
except httpx.HTTPError as exc:
|
|
256
|
+
raise ServerError(str(exc))
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class OllamaTokenizer(TokenCounter):
|
|
260
|
+
def count(self, text: str) -> int:
|
|
261
|
+
return len(text)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class OllamaProvider(LLMProvider):
|
|
265
|
+
def __init__(self, settings: LLMSettings):
|
|
266
|
+
self._settings = settings
|
|
267
|
+
|
|
268
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
269
|
+
model = self._settings.models.get("embeddings", "")
|
|
270
|
+
timeout = (self._settings.request.timeout_s if self._settings and self._settings.request else 30.0)
|
|
271
|
+
return OllamaEmbeddings(
|
|
272
|
+
self._settings.base_url,
|
|
273
|
+
model,
|
|
274
|
+
self._settings.headers,
|
|
275
|
+
timeout_s=timeout,
|
|
276
|
+
provider_options=self._settings.provider_options,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
def chat(self) -> ChatClient:
|
|
280
|
+
model = self._settings.models.get("chat", "")
|
|
281
|
+
return OllamaChat(self._settings.base_url, model, self._settings.headers)
|
|
282
|
+
|
|
283
|
+
def tokenizer(self) -> TokenCounter:
|
|
284
|
+
return OllamaTokenizer()
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from openai import OpenAI # type: ignore
|
|
9
|
+
# New-style exception classes (OpenAI Python SDK >=1.x)
|
|
10
|
+
try: # nested to avoid failing entirely on older clients
|
|
11
|
+
from openai import ( # type: ignore
|
|
12
|
+
APIConnectionError,
|
|
13
|
+
APIStatusError,
|
|
14
|
+
APITimeoutError,
|
|
15
|
+
AuthenticationError,
|
|
16
|
+
BadRequestError,
|
|
17
|
+
RateLimitError,
|
|
18
|
+
)
|
|
19
|
+
except Exception: # pragma: no cover - optional dependency surface
|
|
20
|
+
APIConnectionError = APIStatusError = APITimeoutError = AuthenticationError = BadRequestError = RateLimitError = () # type: ignore
|
|
21
|
+
except Exception: # pragma: no cover - optional dependency at this phase
|
|
22
|
+
OpenAI = None # type: ignore
|
|
23
|
+
APIConnectionError = APIStatusError = APITimeoutError = AuthenticationError = BadRequestError = RateLimitError = () # type: ignore
|
|
24
|
+
|
|
25
|
+
from ...logging import LoggingConfig
|
|
26
|
+
from ..errors import (
|
|
27
|
+
AuthError,
|
|
28
|
+
InvalidRequestError,
|
|
29
|
+
LLMError,
|
|
30
|
+
RateLimitedError,
|
|
31
|
+
ServerError,
|
|
32
|
+
)
|
|
33
|
+
from ..errors import (
|
|
34
|
+
TimeoutError as LLMTimeoutError,
|
|
35
|
+
)
|
|
36
|
+
from ..settings import LLMSettings
|
|
37
|
+
from ..types import ChatClient, EmbeddingsClient, LLMProvider, TokenCounter
|
|
38
|
+
|
|
39
|
+
logger = LoggingConfig.get_logger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _safe_host(url: str | None) -> str | None:
|
|
43
|
+
if not url:
|
|
44
|
+
return None
|
|
45
|
+
try:
|
|
46
|
+
return urlparse(url).hostname or None
|
|
47
|
+
except Exception:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _map_openai_exception(exc: Exception) -> LLMError:
|
|
52
|
+
try:
|
|
53
|
+
# Rate limit
|
|
54
|
+
if RateLimitError and isinstance(exc, RateLimitError): # type: ignore[arg-type]
|
|
55
|
+
return RateLimitedError(str(exc))
|
|
56
|
+
# Timeout
|
|
57
|
+
if APITimeoutError and isinstance(exc, APITimeoutError): # type: ignore[arg-type]
|
|
58
|
+
return LLMTimeoutError(str(exc))
|
|
59
|
+
# Auth
|
|
60
|
+
if AuthenticationError and isinstance(exc, AuthenticationError): # type: ignore[arg-type]
|
|
61
|
+
return AuthError(str(exc))
|
|
62
|
+
# Bad request / invalid params
|
|
63
|
+
if BadRequestError and isinstance(exc, BadRequestError): # type: ignore[arg-type]
|
|
64
|
+
return InvalidRequestError(str(exc))
|
|
65
|
+
# API status error (typically non-2xx)
|
|
66
|
+
if APIStatusError and isinstance(exc, APIStatusError): # type: ignore[arg-type]
|
|
67
|
+
# Best-effort: check for status code
|
|
68
|
+
status_code = getattr(exc, "status_code", None) or getattr(
|
|
69
|
+
getattr(exc, "response", None), "status_code", None
|
|
70
|
+
)
|
|
71
|
+
if isinstance(status_code, int) and 400 <= status_code < 500:
|
|
72
|
+
if status_code == 429:
|
|
73
|
+
return RateLimitedError(str(exc))
|
|
74
|
+
if status_code in (401, 403):
|
|
75
|
+
return AuthError(str(exc))
|
|
76
|
+
return InvalidRequestError(str(exc))
|
|
77
|
+
return ServerError(str(exc))
|
|
78
|
+
# Connection-level errors
|
|
79
|
+
if APIConnectionError and isinstance(exc, APIConnectionError): # type: ignore[arg-type]
|
|
80
|
+
return ServerError(str(exc))
|
|
81
|
+
except Exception:
|
|
82
|
+
pass
|
|
83
|
+
# Fallback
|
|
84
|
+
return ServerError(str(exc))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class _OpenAITokenCounter(TokenCounter):
|
|
88
|
+
def __init__(self, tokenizer: str):
|
|
89
|
+
self._tokenizer = tokenizer
|
|
90
|
+
|
|
91
|
+
def count(self, text: str) -> int:
|
|
92
|
+
# Phase 0: fallback to naive length; real tiktoken impl to come later
|
|
93
|
+
return len(text)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class OpenAIEmbeddings(EmbeddingsClient):
|
|
97
|
+
def __init__(
|
|
98
|
+
self, client: Any, model: str, base_host: str | None, *, provider_label: str = "openai"
|
|
99
|
+
):
|
|
100
|
+
self._client = client
|
|
101
|
+
self._model = model
|
|
102
|
+
self._base_host = base_host
|
|
103
|
+
self._provider_label = provider_label
|
|
104
|
+
|
|
105
|
+
async def embed(self, inputs: list[str]) -> list[list[float]]:
|
|
106
|
+
if not self._client:
|
|
107
|
+
raise NotImplementedError("OpenAI client not available")
|
|
108
|
+
# Use thread offloading to keep async interface consistent with sync client
|
|
109
|
+
import asyncio
|
|
110
|
+
|
|
111
|
+
started = datetime.utcnow()
|
|
112
|
+
try:
|
|
113
|
+
response = await asyncio.to_thread(
|
|
114
|
+
self._client.embeddings.create, model=self._model, input=inputs
|
|
115
|
+
)
|
|
116
|
+
duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
|
|
117
|
+
try:
|
|
118
|
+
logger.info(
|
|
119
|
+
"LLM request",
|
|
120
|
+
provider=self._provider_label,
|
|
121
|
+
operation="embeddings",
|
|
122
|
+
model=self._model,
|
|
123
|
+
base_host=self._base_host,
|
|
124
|
+
inputs=len(inputs),
|
|
125
|
+
latency_ms=duration_ms,
|
|
126
|
+
)
|
|
127
|
+
except Exception:
|
|
128
|
+
pass
|
|
129
|
+
return [item.embedding for item in response.data]
|
|
130
|
+
except Exception as exc: # Normalize errors
|
|
131
|
+
mapped = _map_openai_exception(exc)
|
|
132
|
+
try:
|
|
133
|
+
logger.warning(
|
|
134
|
+
"LLM error",
|
|
135
|
+
provider=self._provider_label,
|
|
136
|
+
operation="embeddings",
|
|
137
|
+
model=self._model,
|
|
138
|
+
base_host=self._base_host,
|
|
139
|
+
error=type(exc).__name__,
|
|
140
|
+
)
|
|
141
|
+
except Exception:
|
|
142
|
+
pass
|
|
143
|
+
raise mapped
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class OpenAIChat(ChatClient):
|
|
147
|
+
def __init__(
|
|
148
|
+
self, client: Any, model: str, base_host: str | None, *, provider_label: str = "openai"
|
|
149
|
+
):
|
|
150
|
+
self._client = client
|
|
151
|
+
self._model = model
|
|
152
|
+
self._base_host = base_host
|
|
153
|
+
self._provider_label = provider_label
|
|
154
|
+
|
|
155
|
+
async def chat(
|
|
156
|
+
self, messages: list[dict[str, Any]], **kwargs: Any
|
|
157
|
+
) -> dict[str, Any]:
|
|
158
|
+
if not self._client:
|
|
159
|
+
raise NotImplementedError("OpenAI client not available")
|
|
160
|
+
|
|
161
|
+
# Normalize kwargs to OpenAI python client parameters
|
|
162
|
+
create_kwargs: dict[str, Any] = {}
|
|
163
|
+
for key in ("temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty", "stop", "seed", "response_format"):
|
|
164
|
+
if key in kwargs and kwargs[key] is not None:
|
|
165
|
+
create_kwargs[key] = kwargs[key]
|
|
166
|
+
|
|
167
|
+
# Allow model override per-call
|
|
168
|
+
model_name = kwargs.pop("model", self._model)
|
|
169
|
+
|
|
170
|
+
import asyncio
|
|
171
|
+
|
|
172
|
+
# The OpenAI python client call is sync for chat.completions
|
|
173
|
+
started = datetime.utcnow()
|
|
174
|
+
try:
|
|
175
|
+
response = await asyncio.to_thread(
|
|
176
|
+
self._client.chat.completions.create,
|
|
177
|
+
model=model_name,
|
|
178
|
+
messages=messages,
|
|
179
|
+
**create_kwargs,
|
|
180
|
+
)
|
|
181
|
+
duration_ms = int((datetime.utcnow() - started).total_seconds() * 1000)
|
|
182
|
+
try:
|
|
183
|
+
logger.info(
|
|
184
|
+
"LLM request",
|
|
185
|
+
provider=self._provider_label,
|
|
186
|
+
operation="chat",
|
|
187
|
+
model=model_name,
|
|
188
|
+
base_host=self._base_host,
|
|
189
|
+
messages=len(messages),
|
|
190
|
+
latency_ms=duration_ms,
|
|
191
|
+
)
|
|
192
|
+
except Exception:
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
# Normalize to provider-agnostic dict
|
|
196
|
+
choice0 = response.choices[0] if getattr(response, "choices", None) else None
|
|
197
|
+
text = ""
|
|
198
|
+
if choice0 is not None:
|
|
199
|
+
message = getattr(choice0, "message", None)
|
|
200
|
+
if message is not None:
|
|
201
|
+
text = getattr(message, "content", "") or ""
|
|
202
|
+
|
|
203
|
+
usage = getattr(response, "usage", None)
|
|
204
|
+
normalized_usage = None
|
|
205
|
+
if usage is not None:
|
|
206
|
+
normalized_usage = {
|
|
207
|
+
"prompt_tokens": getattr(usage, "prompt_tokens", None),
|
|
208
|
+
"completion_tokens": getattr(usage, "completion_tokens", None),
|
|
209
|
+
"total_tokens": getattr(usage, "total_tokens", None),
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
"text": text,
|
|
214
|
+
"raw": response,
|
|
215
|
+
"usage": normalized_usage,
|
|
216
|
+
"model": getattr(response, "model", model_name),
|
|
217
|
+
}
|
|
218
|
+
except Exception as exc:
|
|
219
|
+
mapped = _map_openai_exception(exc)
|
|
220
|
+
try:
|
|
221
|
+
logger.warning(
|
|
222
|
+
"LLM error",
|
|
223
|
+
provider=self._provider_label,
|
|
224
|
+
operation="chat",
|
|
225
|
+
model=model_name,
|
|
226
|
+
base_host=self._base_host,
|
|
227
|
+
error=type(exc).__name__,
|
|
228
|
+
)
|
|
229
|
+
except Exception:
|
|
230
|
+
pass
|
|
231
|
+
raise mapped
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class OpenAIProvider(LLMProvider):
|
|
235
|
+
def __init__(self, settings: LLMSettings):
|
|
236
|
+
self._settings = settings
|
|
237
|
+
self._base_host = _safe_host(settings.base_url)
|
|
238
|
+
if OpenAI is None:
|
|
239
|
+
self._client = None
|
|
240
|
+
else:
|
|
241
|
+
kwargs: dict[str, Any] = {}
|
|
242
|
+
if settings.base_url:
|
|
243
|
+
kwargs["base_url"] = settings.base_url
|
|
244
|
+
if settings.api_key:
|
|
245
|
+
kwargs["api_key"] = settings.api_key
|
|
246
|
+
self._client = OpenAI(**kwargs)
|
|
247
|
+
|
|
248
|
+
def embeddings(self) -> EmbeddingsClient:
|
|
249
|
+
model = self._settings.models.get("embeddings", "")
|
|
250
|
+
return OpenAIEmbeddings(self._client, model, self._base_host, provider_label="openai")
|
|
251
|
+
|
|
252
|
+
def chat(self) -> ChatClient:
|
|
253
|
+
model = self._settings.models.get("chat", "")
|
|
254
|
+
return OpenAIChat(self._client, model, self._base_host, provider_label="openai")
|
|
255
|
+
|
|
256
|
+
def tokenizer(self) -> TokenCounter:
|
|
257
|
+
return _OpenAITokenCounter(self._settings.tokenizer)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AsyncRateLimiter:
|
|
7
|
+
"""Minimal async rate limiter placeholder.
|
|
8
|
+
|
|
9
|
+
In later phases, enforce RPM/TPM and concurrency.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, max_concurrency: int = 5):
|
|
13
|
+
self._semaphore = asyncio.Semaphore(max_concurrency)
|
|
14
|
+
|
|
15
|
+
async def __aenter__(self):
|
|
16
|
+
await self._semaphore.acquire()
|
|
17
|
+
return self
|
|
18
|
+
|
|
19
|
+
async def __aexit__(self, exc_type, exc, tb):
|
|
20
|
+
self._semaphore.release()
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class RequestPolicy:
|
|
12
|
+
timeout_s: float = 30.0
|
|
13
|
+
max_retries: int = 3
|
|
14
|
+
backoff_s_min: float = 1.0
|
|
15
|
+
backoff_s_max: float = 30.0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class RateLimitPolicy:
|
|
20
|
+
rpm: int | None = None
|
|
21
|
+
tpm: int | None = None
|
|
22
|
+
concurrency: int = 5
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class EmbeddingPolicy:
|
|
27
|
+
vector_size: int | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class LLMSettings:
|
|
32
|
+
provider: str
|
|
33
|
+
base_url: str | None
|
|
34
|
+
api_key: str | None
|
|
35
|
+
headers: dict[str, str] | None
|
|
36
|
+
models: dict[str, str]
|
|
37
|
+
tokenizer: str
|
|
38
|
+
request: RequestPolicy
|
|
39
|
+
rate_limits: RateLimitPolicy
|
|
40
|
+
embeddings: EmbeddingPolicy
|
|
41
|
+
api_version: str | None = None
|
|
42
|
+
provider_options: dict[str, Any] | None = None
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def from_global_config(global_data: Mapping[str, Any]) -> LLMSettings:
|
|
46
|
+
"""Construct settings from a parsed global configuration dict.
|
|
47
|
+
|
|
48
|
+
Supports two schemas:
|
|
49
|
+
- New: global.llm
|
|
50
|
+
- Legacy: global.embedding and file_conversion.markitdown
|
|
51
|
+
"""
|
|
52
|
+
llm = (global_data or {}).get("llm") or {}
|
|
53
|
+
if llm:
|
|
54
|
+
return LLMSettings(
|
|
55
|
+
provider=str(llm.get("provider")),
|
|
56
|
+
base_url=llm.get("base_url"),
|
|
57
|
+
api_key=llm.get("api_key"),
|
|
58
|
+
api_version=llm.get("api_version"),
|
|
59
|
+
headers=dict(llm.get("headers") or {}),
|
|
60
|
+
models=dict(llm.get("models") or {}),
|
|
61
|
+
tokenizer=str(llm.get("tokenizer", "none")),
|
|
62
|
+
request=RequestPolicy(**(llm.get("request") or {})),
|
|
63
|
+
rate_limits=RateLimitPolicy(**(llm.get("rate_limits") or {})),
|
|
64
|
+
embeddings=EmbeddingPolicy(**(llm.get("embeddings") or {})),
|
|
65
|
+
provider_options=dict(llm.get("provider_options") or {}),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Legacy mapping
|
|
69
|
+
embedding = (global_data or {}).get("embedding") or {}
|
|
70
|
+
file_conv = (global_data or {}).get("file_conversion") or {}
|
|
71
|
+
markit = (
|
|
72
|
+
(file_conv.get("markitdown") or {}) if isinstance(file_conv, dict) else {}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
endpoint = embedding.get("endpoint")
|
|
76
|
+
# Detect Azure OpenAI in legacy endpoint to set provider accordingly
|
|
77
|
+
endpoint_l = (endpoint or "").lower() if isinstance(endpoint, str) else ""
|
|
78
|
+
host: str | None = None
|
|
79
|
+
if endpoint_l:
|
|
80
|
+
try:
|
|
81
|
+
host = urlparse(endpoint_l).hostname or None
|
|
82
|
+
except Exception:
|
|
83
|
+
host = None
|
|
84
|
+
is_azure = False
|
|
85
|
+
if host:
|
|
86
|
+
host_l = host.lower()
|
|
87
|
+
is_azure = (
|
|
88
|
+
host_l == "openai.azure.com"
|
|
89
|
+
or host_l.endswith(".openai.azure.com")
|
|
90
|
+
or host_l == "cognitiveservices.azure.com"
|
|
91
|
+
or host_l.endswith(".cognitiveservices.azure.com")
|
|
92
|
+
)
|
|
93
|
+
if is_azure:
|
|
94
|
+
provider = "azure_openai"
|
|
95
|
+
elif "openai" in endpoint_l:
|
|
96
|
+
provider = "openai"
|
|
97
|
+
else:
|
|
98
|
+
provider = "openai_compat"
|
|
99
|
+
models = {
|
|
100
|
+
"embeddings": embedding.get("model"),
|
|
101
|
+
}
|
|
102
|
+
if isinstance(markit.get("llm_model"), str):
|
|
103
|
+
models["chat"] = markit.get("llm_model")
|
|
104
|
+
|
|
105
|
+
# Emit deprecation warnings when relying on legacy fields
|
|
106
|
+
try:
|
|
107
|
+
if embedding or markit:
|
|
108
|
+
warnings.warn(
|
|
109
|
+
(
|
|
110
|
+
"Using legacy configuration fields is deprecated. "
|
|
111
|
+
"Please migrate to 'global.llm' (see docs: configuration reference). "
|
|
112
|
+
"Mapped from: global.embedding.* and/or file_conversion.markitdown.*"
|
|
113
|
+
),
|
|
114
|
+
category=DeprecationWarning,
|
|
115
|
+
stacklevel=2,
|
|
116
|
+
)
|
|
117
|
+
except Exception:
|
|
118
|
+
# Best-effort warning; never break mapping
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
return LLMSettings(
|
|
122
|
+
provider=provider,
|
|
123
|
+
base_url=endpoint,
|
|
124
|
+
api_key=embedding.get("api_key"),
|
|
125
|
+
api_version=None,
|
|
126
|
+
headers=None,
|
|
127
|
+
models=models,
|
|
128
|
+
tokenizer=str(embedding.get("tokenizer", "none")),
|
|
129
|
+
request=RequestPolicy(),
|
|
130
|
+
rate_limits=RateLimitPolicy(),
|
|
131
|
+
embeddings=EmbeddingPolicy(vector_size=embedding.get("vector_size")),
|
|
132
|
+
provider_options=None,
|
|
133
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .types import TokenCounter
|
|
4
|
+
|
|
5
|
+
try: # Optional dependency
|
|
6
|
+
import tiktoken # type: ignore
|
|
7
|
+
except Exception: # pragma: no cover - absence is acceptable
|
|
8
|
+
tiktoken = None # type: ignore
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CharCountTokenCounter(TokenCounter):
|
|
12
|
+
def count(self, text: str) -> int:
|
|
13
|
+
return len(text)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TiktokenTokenCounter(TokenCounter):
|
|
17
|
+
"""Token counter backed by tiktoken; falls back gracefully when unavailable.
|
|
18
|
+
|
|
19
|
+
If the requested encoding cannot be loaded or encode fails, falls back to
|
|
20
|
+
a simple character count to avoid runtime errors.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, encoding_name: str):
|
|
24
|
+
self._encoding_name = encoding_name
|
|
25
|
+
self._encoding = None
|
|
26
|
+
if tiktoken is not None:
|
|
27
|
+
try:
|
|
28
|
+
self._encoding = tiktoken.get_encoding(encoding_name)
|
|
29
|
+
except Exception:
|
|
30
|
+
self._encoding = None
|
|
31
|
+
|
|
32
|
+
def count(self, text: str) -> int:
|
|
33
|
+
if self._encoding is not None:
|
|
34
|
+
try:
|
|
35
|
+
return len(self._encoding.encode(text))
|
|
36
|
+
except Exception:
|
|
37
|
+
pass
|
|
38
|
+
return len(text)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Protocol, runtime_checkable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@runtime_checkable
|
|
7
|
+
class EmbeddingsClient(Protocol):
|
|
8
|
+
async def embed(self, inputs: list[str]) -> list[list[float]]: ...
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@runtime_checkable
|
|
12
|
+
class ChatClient(Protocol):
|
|
13
|
+
async def chat(
|
|
14
|
+
self, messages: list[dict[str, Any]], **kwargs: Any
|
|
15
|
+
) -> dict[str, Any]: ...
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@runtime_checkable
|
|
19
|
+
class TokenCounter(Protocol):
|
|
20
|
+
def count(self, text: str) -> int: ...
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@runtime_checkable
|
|
24
|
+
class LLMProvider(Protocol):
|
|
25
|
+
def embeddings(self) -> EmbeddingsClient: ...
|
|
26
|
+
|
|
27
|
+
def chat(self) -> ChatClient: ...
|
|
28
|
+
|
|
29
|
+
def tokenizer(self) -> TokenCounter: ...
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Unified logging configuration for qdrant-loader ecosystem.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- structlog setup (console/json/file) with redaction
|
|
5
|
+
- stdlib logging bridge with redaction filter
|
|
6
|
+
- optional suppression of noisy third-party logs
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import structlog
|
|
17
|
+
from structlog.stdlib import LoggerFactory
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
# ExtraAdder is available in structlog >= 20
|
|
21
|
+
from structlog.stdlib import ExtraAdder # type: ignore
|
|
22
|
+
except Exception: # pragma: no cover - fallback when absent
|
|
23
|
+
ExtraAdder = None # type: ignore
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class QdrantVersionFilter(logging.Filter):
|
|
27
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
28
|
+
try:
|
|
29
|
+
return "version check" not in record.getMessage().lower()
|
|
30
|
+
except Exception:
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ApplicationFilter(logging.Filter):
|
|
35
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
36
|
+
# Allow all logs by default; app packages may add their own filters
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RedactionFilter(logging.Filter):
|
|
41
|
+
"""Redacts obvious secrets from stdlib log records."""
|
|
42
|
+
|
|
43
|
+
# Heuristics for tokens/keys in plain strings
|
|
44
|
+
TOKEN_PATTERNS = [
|
|
45
|
+
re.compile(r"sk-[A-Za-z0-9_\-]{6,}"),
|
|
46
|
+
re.compile(r"tok-[A-Za-z0-9_\-]{6,}"),
|
|
47
|
+
re.compile(r"(?i)(api_key|authorization|token|access_token|secret|password)\s*[:=]\s*([^\s]+)"),
|
|
48
|
+
re.compile(r"Bearer\s+[A-Za-z0-9_\-\.]+"),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
# Keys commonly used for secrets in structlog event dictionaries
|
|
52
|
+
SENSITIVE_KEYS = {
|
|
53
|
+
"api_key",
|
|
54
|
+
"llm_api_key",
|
|
55
|
+
"authorization",
|
|
56
|
+
"Authorization",
|
|
57
|
+
"token",
|
|
58
|
+
"access_token",
|
|
59
|
+
"secret",
|
|
60
|
+
"password",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def _redact_text(self, text: str) -> str:
|
|
64
|
+
def mask(m: re.Match[str]) -> str:
|
|
65
|
+
s = m.group(0)
|
|
66
|
+
if len(s) <= 8:
|
|
67
|
+
return "***REDACTED***"
|
|
68
|
+
return s[:2] + "***REDACTED***" + s[-2:]
|
|
69
|
+
|
|
70
|
+
redacted = text
|
|
71
|
+
for pat in self.TOKEN_PATTERNS:
|
|
72
|
+
redacted = pat.sub(mask, redacted)
|
|
73
|
+
return redacted
|
|
74
|
+
|
|
75
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
76
|
+
try:
|
|
77
|
+
redaction_detected = False
|
|
78
|
+
|
|
79
|
+
# Args may contain secrets; best-effort mask strings and detect changes
|
|
80
|
+
if isinstance(record.args, tuple):
|
|
81
|
+
new_args = []
|
|
82
|
+
for a in record.args:
|
|
83
|
+
if isinstance(a, str):
|
|
84
|
+
red_a = self._redact_text(a)
|
|
85
|
+
if red_a != a:
|
|
86
|
+
redaction_detected = True
|
|
87
|
+
new_args.append(red_a)
|
|
88
|
+
else:
|
|
89
|
+
new_args.append(a)
|
|
90
|
+
record.args = tuple(new_args)
|
|
91
|
+
|
|
92
|
+
# Redact raw message only when it contains no formatting placeholders
|
|
93
|
+
# to avoid interfering with %-style or {}-style formatting
|
|
94
|
+
if isinstance(record.msg, str):
|
|
95
|
+
try:
|
|
96
|
+
has_placeholders = ("%" in record.msg) or ("{" in record.msg)
|
|
97
|
+
except Exception:
|
|
98
|
+
has_placeholders = True
|
|
99
|
+
if not has_placeholders:
|
|
100
|
+
red_msg = self._redact_text(record.msg)
|
|
101
|
+
if red_msg != record.msg:
|
|
102
|
+
record.msg = red_msg
|
|
103
|
+
redaction_detected = True
|
|
104
|
+
|
|
105
|
+
# If structlog extras contain sensitive keys, mark as redacted
|
|
106
|
+
try:
|
|
107
|
+
if any(
|
|
108
|
+
(k in self.SENSITIVE_KEYS and bool(record.__dict__.get(k)))
|
|
109
|
+
for k in record.__dict__.keys()
|
|
110
|
+
):
|
|
111
|
+
redaction_detected = True
|
|
112
|
+
except Exception:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
# Ensure a visible redaction marker appears in the captured message
|
|
116
|
+
if redaction_detected:
|
|
117
|
+
try:
|
|
118
|
+
if isinstance(record.msg, str) and "***REDACTED***" not in record.msg:
|
|
119
|
+
# Append a marker in a way that won't interfere with %-formatting
|
|
120
|
+
record.msg = f"{record.msg} ***REDACTED***"
|
|
121
|
+
except Exception:
|
|
122
|
+
pass
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class CleanFormatter(logging.Formatter):
|
|
129
|
+
"""Formatter that removes ANSI color codes for clean file output."""
|
|
130
|
+
|
|
131
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
132
|
+
message = super().format(record)
|
|
133
|
+
try:
|
|
134
|
+
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
135
|
+
return ansi_escape.sub("", message)
|
|
136
|
+
except Exception:
|
|
137
|
+
return message
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _redact_processor(logger: Any, method_name: str, event_dict: dict[str, Any]) -> dict[str, Any]:
|
|
141
|
+
"""Structlog processor to redact sensitive fields in event_dict."""
|
|
142
|
+
sensitive_keys = {
|
|
143
|
+
"api_key",
|
|
144
|
+
"llm_api_key",
|
|
145
|
+
"authorization",
|
|
146
|
+
"Authorization",
|
|
147
|
+
"token",
|
|
148
|
+
"access_token",
|
|
149
|
+
"secret",
|
|
150
|
+
"password",
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
def mask(value: str) -> str:
|
|
154
|
+
try:
|
|
155
|
+
if not isinstance(value, str) or not value:
|
|
156
|
+
return "***REDACTED***"
|
|
157
|
+
if len(value) <= 8:
|
|
158
|
+
return "***REDACTED***"
|
|
159
|
+
return value[:2] + "***REDACTED***" + value[-2:]
|
|
160
|
+
except Exception:
|
|
161
|
+
return "***REDACTED***"
|
|
162
|
+
|
|
163
|
+
def deep_redact(obj: Any) -> Any:
|
|
164
|
+
try:
|
|
165
|
+
if isinstance(obj, dict):
|
|
166
|
+
return {k: (mask(v) if k in sensitive_keys and isinstance(v, str) else deep_redact(v)) for k, v in obj.items()}
|
|
167
|
+
if isinstance(obj, list):
|
|
168
|
+
return [deep_redact(i) for i in obj]
|
|
169
|
+
return obj
|
|
170
|
+
except Exception:
|
|
171
|
+
return obj
|
|
172
|
+
|
|
173
|
+
return deep_redact(event_dict)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class LoggingConfig:
|
|
177
|
+
"""Core logging setup with structlog + stdlib redaction and filters."""
|
|
178
|
+
|
|
179
|
+
_initialized = False
|
|
180
|
+
|
|
181
|
+
@classmethod
|
|
182
|
+
def setup(
|
|
183
|
+
cls,
|
|
184
|
+
*,
|
|
185
|
+
level: str = "INFO",
|
|
186
|
+
format: str = "console", # "console" | "json"
|
|
187
|
+
file: str | None = None,
|
|
188
|
+
clean_output: bool = True,
|
|
189
|
+
suppress_qdrant_warnings: bool = True,
|
|
190
|
+
disable_console: bool | None = None,
|
|
191
|
+
) -> None:
|
|
192
|
+
# Env override for console toggling (e.g., MCP server)
|
|
193
|
+
if disable_console is None:
|
|
194
|
+
disable_console = os.getenv("MCP_DISABLE_CONSOLE_LOGGING", "").lower() == "true"
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
numeric_level = getattr(logging, level.upper())
|
|
198
|
+
except AttributeError:
|
|
199
|
+
raise ValueError(f"Invalid log level: {level}") from None
|
|
200
|
+
|
|
201
|
+
# Reset structlog defaults but preserve existing stdlib handlers (e.g., pytest caplog)
|
|
202
|
+
structlog.reset_defaults()
|
|
203
|
+
|
|
204
|
+
handlers: list[logging.Handler] = []
|
|
205
|
+
|
|
206
|
+
# Choose timestamp format and final renderer for structlog messages
|
|
207
|
+
if clean_output and format == "console":
|
|
208
|
+
ts_fmt = "%H:%M:%S"
|
|
209
|
+
final_renderer = structlog.dev.ConsoleRenderer(colors=True)
|
|
210
|
+
else:
|
|
211
|
+
ts_fmt = "iso"
|
|
212
|
+
final_renderer = (
|
|
213
|
+
structlog.processors.JSONRenderer()
|
|
214
|
+
if format == "json"
|
|
215
|
+
else structlog.dev.ConsoleRenderer(colors=True)
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if not disable_console:
|
|
219
|
+
console_handler = logging.StreamHandler()
|
|
220
|
+
console_handler.setFormatter(logging.Formatter("%(message)s"))
|
|
221
|
+
console_handler.addFilter(ApplicationFilter())
|
|
222
|
+
console_handler.addFilter(RedactionFilter())
|
|
223
|
+
handlers.append(console_handler)
|
|
224
|
+
|
|
225
|
+
if file:
|
|
226
|
+
file_handler = logging.FileHandler(file)
|
|
227
|
+
# Use CleanFormatter to strip ANSI sequences from structlog console renderer output
|
|
228
|
+
file_handler.setFormatter(CleanFormatter("%(message)s"))
|
|
229
|
+
file_handler.addFilter(ApplicationFilter())
|
|
230
|
+
file_handler.addFilter(RedactionFilter())
|
|
231
|
+
handlers.append(file_handler)
|
|
232
|
+
|
|
233
|
+
# Attach our handlers without removing existing ones (so pytest caplog keeps working)
|
|
234
|
+
root_logger = logging.getLogger()
|
|
235
|
+
root_logger.setLevel(numeric_level)
|
|
236
|
+
for h in handlers:
|
|
237
|
+
root_logger.addHandler(h)
|
|
238
|
+
|
|
239
|
+
# Add global filters so captured logs (e.g., pytest caplog) are also redacted
|
|
240
|
+
# Avoid duplicate filters if setup() is called multiple times
|
|
241
|
+
has_redaction = any(isinstance(f, RedactionFilter) for f in root_logger.filters)
|
|
242
|
+
if not has_redaction:
|
|
243
|
+
root_logger.addFilter(RedactionFilter())
|
|
244
|
+
has_app_filter = any(isinstance(f, ApplicationFilter) for f in root_logger.filters)
|
|
245
|
+
if not has_app_filter:
|
|
246
|
+
root_logger.addFilter(ApplicationFilter())
|
|
247
|
+
|
|
248
|
+
# Optional suppressions
|
|
249
|
+
if suppress_qdrant_warnings:
|
|
250
|
+
logging.getLogger("qdrant_client").addFilter(QdrantVersionFilter())
|
|
251
|
+
|
|
252
|
+
# Quiet noisy libs a bit
|
|
253
|
+
for name in ("httpx", "httpcore", "urllib3", "gensim"):
|
|
254
|
+
logging.getLogger(name).setLevel(logging.WARNING)
|
|
255
|
+
|
|
256
|
+
# structlog processors – render to a final string directly
|
|
257
|
+
structlog.configure(
|
|
258
|
+
processors=[
|
|
259
|
+
structlog.stdlib.filter_by_level,
|
|
260
|
+
structlog.stdlib.add_logger_name,
|
|
261
|
+
structlog.stdlib.add_log_level,
|
|
262
|
+
structlog.processors.TimeStamper(fmt=ts_fmt),
|
|
263
|
+
_redact_processor,
|
|
264
|
+
final_renderer,
|
|
265
|
+
],
|
|
266
|
+
wrapper_class=structlog.make_filtering_bound_logger(numeric_level),
|
|
267
|
+
logger_factory=LoggerFactory(),
|
|
268
|
+
cache_logger_on_first_use=False,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
cls._initialized = True
|
|
272
|
+
|
|
273
|
+
@classmethod
|
|
274
|
+
def get_logger(cls, name: str | None = None) -> structlog.BoundLogger:
|
|
275
|
+
if not cls._initialized:
|
|
276
|
+
cls.setup()
|
|
277
|
+
return structlog.get_logger(name)
|
|
278
|
+
|
|
279
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qdrant-loader-core
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Shared core for provider-agnostic LLM support and configuration mapping for qdrant-loader ecosystem
|
|
5
|
+
Author-email: Martin Papy <martin.papy@cbtw.tech>
|
|
6
|
+
License-Expression: GPL-3.0
|
|
7
|
+
Project-URL: Homepage, https://qdrant-loader.net
|
|
8
|
+
Project-URL: Documentation, https://qdrant-loader.net/docs/packages/core/README.html
|
|
9
|
+
Project-URL: Repository, https://github.com/martin-papy/qdrant-loader
|
|
10
|
+
Project-URL: Issues, https://github.com/martin-papy/qdrant-loader/issues
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Information Technology
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Database
|
|
19
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
24
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
25
|
+
Classifier: Environment :: Console
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.12
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
Requires-Dist: pydantic>=2.0.0
|
|
30
|
+
Provides-Extra: openai
|
|
31
|
+
Requires-Dist: openai>=1.3.0; extra == "openai"
|
|
32
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "openai"
|
|
33
|
+
Provides-Extra: ollama
|
|
34
|
+
Requires-Dist: httpx>=0.24.0; extra == "ollama"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
qdrant_loader_core/__init__.py,sha256=9JasHhaZvRdZNL1Mge2I3MtcgZ7QQutCdPG8jJeIGGo,433
|
|
2
|
+
qdrant_loader_core/logging.py,sha256=YCc7ykPLmDX6oOeote1cL3mPd_IkwPecNvYlDf8uqTg,9839
|
|
3
|
+
qdrant_loader_core/llm/__init__.py,sha256=NgYA69RsGaEe9eAIa-4XHZw5ijo0lPF9iiFt7yM0rP4,445
|
|
4
|
+
qdrant_loader_core/llm/errors.py,sha256=OF9jUayFSdXz7Tq0mtD68BuUaBRfKVxbcJY4R2_9eSM,248
|
|
5
|
+
qdrant_loader_core/llm/factory.py,sha256=pfYj0EQ29YGD1Hy8VktH12oKzTtBSwjQOovChrzo2Fo,2825
|
|
6
|
+
qdrant_loader_core/llm/ratelimit.py,sha256=aJOYTc2Y57_yBc-Ea9D_Hdw5LkqwxP73MQPEI1RbYk4,477
|
|
7
|
+
qdrant_loader_core/llm/settings.py,sha256=1OVyOIQ05Mp9370La0ABaMjABIp5kszUzopydWcEwqk,4474
|
|
8
|
+
qdrant_loader_core/llm/tokenization.py,sha256=cptyJevdIIQwMsT1Xqe9kL-PimAr_G2BKIQIv6UMdw4,1141
|
|
9
|
+
qdrant_loader_core/llm/types.py,sha256=z2Zlq6zUt9fTOapBk5g4BDR59uxBC6Ia4YZq3QUjGo0,651
|
|
10
|
+
qdrant_loader_core/llm/providers/__init__.py,sha256=L7JXyBbTI-hkZpKKUsnjFL16YUQkmGbTbqkVPlRW08Y,83
|
|
11
|
+
qdrant_loader_core/llm/providers/azure_openai.py,sha256=MADFASMuZus7STblvk8zp-nXVFY_NFefeK3bah3Ii7E,2598
|
|
12
|
+
qdrant_loader_core/llm/providers/ollama.py,sha256=zU7ce1mV-oyieNtCJabrATLZgiIe97Ay763PiAd-BBA,12764
|
|
13
|
+
qdrant_loader_core/llm/providers/openai.py,sha256=8IG9-2pc3cYNY3Kb9S3ZYQn9qajJfC0HUG63B1C4nnQ,9543
|
|
14
|
+
qdrant_loader_core-0.7.1.dist-info/METADATA,sha256=6GTrW2sghMJRuhTK0lnQQfve7QE9puiVpmaOlMySv74,1628
|
|
15
|
+
qdrant_loader_core-0.7.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
16
|
+
qdrant_loader_core-0.7.1.dist-info/top_level.txt,sha256=IuQX743SRjuKkZNiQd3xqGLuAQkFNKSCFnR5G2hFpck,19
|
|
17
|
+
qdrant_loader_core-0.7.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
qdrant_loader_core
|