embed-client 1.0.0__tar.gz → 1.0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {embed_client-1.0.0 → embed_client-1.0.1.1}/PKG-INFO +1 -1
- embed_client-1.0.1.1/README.md +24 -0
- embed_client-1.0.1.1/embed_client/async_client.py +544 -0
- embed_client-1.0.1.1/embed_client/example_async_usage.py +165 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client/example_async_usage_ru.py +34 -8
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client.egg-info/PKG-INFO +1 -1
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client.egg-info/SOURCES.txt +2 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/pyproject.toml +1 -1
- embed_client-1.0.1.1/tests/test_async_client.py +1080 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/tests/test_async_client_real.py +51 -21
- {embed_client-1.0.0 → embed_client-1.0.1.1}/tests/test_async_client_stress.py +27 -1
- embed_client-1.0.1.1/tests/test_async_client_stress_new.py +319 -0
- embed_client-1.0.1.1/tests/test_async_client_stress_updated.py +319 -0
- embed_client-1.0.1.1/tests/test_example_async_usage.py +79 -0
- embed_client-1.0.0/README.md +0 -1
- embed_client-1.0.0/embed_client/async_client.py +0 -215
- embed_client-1.0.0/embed_client/example_async_usage.py +0 -69
- embed_client-1.0.0/tests/test_async_client.py +0 -285
- embed_client-1.0.0/tests/test_example_async_usage.py +0 -29
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client/__init__.py +0 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client.egg-info/dependency_links.txt +0 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client.egg-info/requires.txt +0 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/embed_client.egg-info/top_level.txt +0 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/setup.cfg +0 -0
- {embed_client-1.0.0 → embed_client-1.0.1.1}/tests/test_example_async_usage_ru.py +0 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
# vvz-embed-client
|
2
|
+
|
3
|
+
## Quick Start: Примеры запуска
|
4
|
+
|
5
|
+
**Вариант 1: через аргументы командной строки**
|
6
|
+
|
7
|
+
```sh
|
8
|
+
python embed_client/example_async_usage.py --base-url http://localhost --port 8001
|
9
|
+
python embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
|
10
|
+
```
|
11
|
+
|
12
|
+
**Вариант 2: через переменные окружения**
|
13
|
+
|
14
|
+
```sh
|
15
|
+
export EMBED_CLIENT_BASE_URL=http://localhost
|
16
|
+
export EMBED_CLIENT_PORT=8001
|
17
|
+
python embed_client/example_async_usage.py
|
18
|
+
python embed_client/example_async_usage_ru.py
|
19
|
+
```
|
20
|
+
|
21
|
+
**Важно:**
|
22
|
+
- Используйте `--base-url` (через дефис), а не `--base_url` (через подчеркивание).
|
23
|
+
- Значение base_url должно содержать `http://` или `https://`.
|
24
|
+
- Аргументы должны быть отдельными (через пробел), а не через `=`.
|
@@ -0,0 +1,544 @@
|
|
1
|
+
"""
|
2
|
+
Async client for Embedding Service API (OpenAPI 3.0.2)
|
3
|
+
|
4
|
+
- 100% type-annotated
|
5
|
+
- English docstrings and examples
|
6
|
+
- Ready for PyPi
|
7
|
+
- Supports new API format with body, embedding, and chunks
|
8
|
+
"""
|
9
|
+
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
11
|
+
import aiohttp
|
12
|
+
import asyncio
|
13
|
+
import os
|
14
|
+
import json
|
15
|
+
import logging
|
16
|
+
|
17
|
+
class EmbeddingServiceError(Exception):
|
18
|
+
"""Base exception for EmbeddingServiceAsyncClient."""
|
19
|
+
|
20
|
+
class EmbeddingServiceConnectionError(EmbeddingServiceError):
|
21
|
+
"""Raised when the service is unavailable or connection fails."""
|
22
|
+
|
23
|
+
class EmbeddingServiceHTTPError(EmbeddingServiceError):
|
24
|
+
"""Raised for HTTP errors (4xx, 5xx)."""
|
25
|
+
def __init__(self, status: int, message: str):
|
26
|
+
super().__init__(f"HTTP {status}: {message}")
|
27
|
+
self.status = status
|
28
|
+
self.message = message
|
29
|
+
|
30
|
+
class EmbeddingServiceAPIError(EmbeddingServiceError):
|
31
|
+
"""Raised for errors returned by the API in the response body."""
|
32
|
+
def __init__(self, error: Any):
|
33
|
+
super().__init__(f"API error: {error}")
|
34
|
+
self.error = error
|
35
|
+
|
36
|
+
class EmbeddingServiceConfigError(EmbeddingServiceError):
|
37
|
+
"""Raised for configuration errors (invalid base_url, port, etc.)."""
|
38
|
+
|
39
|
+
class EmbeddingServiceTimeoutError(EmbeddingServiceError):
|
40
|
+
"""Raised when request times out."""
|
41
|
+
|
42
|
+
class EmbeddingServiceJSONError(EmbeddingServiceError):
|
43
|
+
"""Raised when JSON parsing fails."""
|
44
|
+
|
45
|
+
class EmbeddingServiceAsyncClient:
|
46
|
+
"""
|
47
|
+
Asynchronous client for the Embedding Service API.
|
48
|
+
|
49
|
+
Supports both old and new API formats:
|
50
|
+
- Old format: {"result": {"success": true, "data": {"embeddings": [...]}}}
|
51
|
+
- New format: {"result": {"success": true, "data": [{"body": "text", "embedding": [...], "chunks": [...]}]}}
|
52
|
+
|
53
|
+
Args:
|
54
|
+
base_url (str): Base URL of the embedding service (e.g., "http://localhost").
|
55
|
+
port (int): Port of the embedding service (e.g., 8001).
|
56
|
+
timeout (float): Request timeout in seconds (default: 30).
|
57
|
+
Raises:
|
58
|
+
EmbeddingServiceConfigError: If base_url or port is invalid.
|
59
|
+
"""
|
60
|
+
def __init__(self, base_url: Optional[str] = None, port: Optional[int] = None, timeout: float = 30.0):
|
61
|
+
# Validate and set base_url
|
62
|
+
try:
|
63
|
+
self.base_url = base_url or os.getenv("EMBEDDING_SERVICE_BASE_URL", "http://localhost")
|
64
|
+
if not self.base_url:
|
65
|
+
raise EmbeddingServiceConfigError("base_url must be provided.")
|
66
|
+
if not isinstance(self.base_url, str):
|
67
|
+
raise EmbeddingServiceConfigError("base_url must be a string.")
|
68
|
+
|
69
|
+
# Validate URL format
|
70
|
+
if not (self.base_url.startswith("http://") or self.base_url.startswith("https://")):
|
71
|
+
raise EmbeddingServiceConfigError("base_url must start with http:// or https://")
|
72
|
+
except (TypeError, AttributeError) as e:
|
73
|
+
raise EmbeddingServiceConfigError(f"Invalid base_url configuration: {e}") from e
|
74
|
+
|
75
|
+
# Validate and set port
|
76
|
+
try:
|
77
|
+
port_env = os.getenv("EMBEDDING_SERVICE_PORT", "8001")
|
78
|
+
self.port = port if port is not None else int(port_env)
|
79
|
+
if self.port is None:
|
80
|
+
raise EmbeddingServiceConfigError("port must be provided.")
|
81
|
+
if not isinstance(self.port, int) or self.port <= 0 or self.port > 65535:
|
82
|
+
raise EmbeddingServiceConfigError("port must be a valid integer between 1 and 65535.")
|
83
|
+
except (ValueError, TypeError) as e:
|
84
|
+
raise EmbeddingServiceConfigError(f"Invalid port configuration: {e}") from e
|
85
|
+
|
86
|
+
# Validate timeout
|
87
|
+
try:
|
88
|
+
self.timeout = float(timeout)
|
89
|
+
if self.timeout <= 0:
|
90
|
+
raise EmbeddingServiceConfigError("timeout must be positive.")
|
91
|
+
except (ValueError, TypeError) as e:
|
92
|
+
raise EmbeddingServiceConfigError(f"Invalid timeout configuration: {e}") from e
|
93
|
+
|
94
|
+
self._session: Optional[aiohttp.ClientSession] = None
|
95
|
+
|
96
|
+
def _make_url(self, path: str, base_url: Optional[str] = None, port: Optional[int] = None) -> str:
|
97
|
+
try:
|
98
|
+
url = (base_url or self.base_url).rstrip("/")
|
99
|
+
port_val = port if port is not None else self.port
|
100
|
+
return f"{url}:{port_val}{path}"
|
101
|
+
except Exception as e:
|
102
|
+
raise EmbeddingServiceConfigError(f"Failed to construct URL: {e}") from e
|
103
|
+
|
104
|
+
def _format_error_response(self, error: str, lang: Optional[str] = None, text: Optional[str] = None) -> Dict[str, Any]:
|
105
|
+
"""
|
106
|
+
Format error response in a standard way.
|
107
|
+
Args:
|
108
|
+
error (str): Error message
|
109
|
+
lang (str, optional): Language of the text that caused the error
|
110
|
+
text (str, optional): Text that caused the error
|
111
|
+
Returns:
|
112
|
+
dict: Formatted error response
|
113
|
+
"""
|
114
|
+
response = {"error": f"Embedding service error: {error}"}
|
115
|
+
if lang is not None:
|
116
|
+
response["lang"] = lang
|
117
|
+
if text is not None:
|
118
|
+
response["text"] = text
|
119
|
+
return response
|
120
|
+
|
121
|
+
def extract_embeddings(self, result: Dict[str, Any]) -> List[List[float]]:
|
122
|
+
"""
|
123
|
+
Extract embeddings from API response, supporting both old and new formats.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
result: API response dictionary
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
List of embedding vectors (list of lists of floats)
|
130
|
+
|
131
|
+
Raises:
|
132
|
+
ValueError: If embeddings cannot be extracted from the response
|
133
|
+
"""
|
134
|
+
# Handle direct embeddings field (old format compatibility)
|
135
|
+
if "embeddings" in result:
|
136
|
+
return result["embeddings"]
|
137
|
+
|
138
|
+
# Handle result wrapper
|
139
|
+
if "result" in result:
|
140
|
+
res = result["result"]
|
141
|
+
|
142
|
+
# Handle direct list in result (old format)
|
143
|
+
if isinstance(res, list):
|
144
|
+
return res
|
145
|
+
|
146
|
+
if isinstance(res, dict):
|
147
|
+
# Handle old format: result.embeddings
|
148
|
+
if "embeddings" in res:
|
149
|
+
return res["embeddings"]
|
150
|
+
|
151
|
+
# Handle old format: result.data.embeddings
|
152
|
+
if "data" in res and isinstance(res["data"], dict) and "embeddings" in res["data"]:
|
153
|
+
return res["data"]["embeddings"]
|
154
|
+
|
155
|
+
# Handle new format: result.data[].embedding
|
156
|
+
if "data" in res and isinstance(res["data"], list):
|
157
|
+
embeddings = []
|
158
|
+
for item in res["data"]:
|
159
|
+
if isinstance(item, dict) and "embedding" in item:
|
160
|
+
embeddings.append(item["embedding"])
|
161
|
+
else:
|
162
|
+
raise ValueError(f"Invalid item format in new API response: {item}")
|
163
|
+
return embeddings
|
164
|
+
|
165
|
+
raise ValueError(f"Cannot extract embeddings from response: {result}")
|
166
|
+
|
167
|
+
def extract_embedding_data(self, result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
168
|
+
"""
|
169
|
+
Extract full embedding data from API response (new format only).
|
170
|
+
|
171
|
+
Args:
|
172
|
+
result: API response dictionary
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
List of dictionaries with 'body', 'embedding', and 'chunks' fields
|
176
|
+
|
177
|
+
Raises:
|
178
|
+
ValueError: If data cannot be extracted or is in old format
|
179
|
+
"""
|
180
|
+
if "result" in result and isinstance(result["result"], dict):
|
181
|
+
res = result["result"]
|
182
|
+
if "data" in res and isinstance(res["data"], list):
|
183
|
+
# Validate that all items have required fields
|
184
|
+
for i, item in enumerate(res["data"]):
|
185
|
+
if not isinstance(item, dict):
|
186
|
+
raise ValueError(f"Item {i} is not a dictionary: {item}")
|
187
|
+
if "body" not in item:
|
188
|
+
raise ValueError(f"Item {i} missing 'body' field: {item}")
|
189
|
+
if "embedding" not in item:
|
190
|
+
raise ValueError(f"Item {i} missing 'embedding' field: {item}")
|
191
|
+
if "chunks" not in item:
|
192
|
+
raise ValueError(f"Item {i} missing 'chunks' field: {item}")
|
193
|
+
|
194
|
+
return res["data"]
|
195
|
+
|
196
|
+
raise ValueError(f"Cannot extract embedding data from response (new format required): {result}")
|
197
|
+
|
198
|
+
def extract_texts(self, result: Dict[str, Any]) -> List[str]:
|
199
|
+
"""
|
200
|
+
Extract original texts from API response (new format only).
|
201
|
+
|
202
|
+
Args:
|
203
|
+
result: API response dictionary
|
204
|
+
|
205
|
+
Returns:
|
206
|
+
List of original text strings
|
207
|
+
|
208
|
+
Raises:
|
209
|
+
ValueError: If texts cannot be extracted or is in old format
|
210
|
+
"""
|
211
|
+
data = self.extract_embedding_data(result)
|
212
|
+
return [item["body"] for item in data]
|
213
|
+
|
214
|
+
def extract_chunks(self, result: Dict[str, Any]) -> List[List[str]]:
|
215
|
+
"""
|
216
|
+
Extract text chunks from API response (new format only).
|
217
|
+
|
218
|
+
Args:
|
219
|
+
result: API response dictionary
|
220
|
+
|
221
|
+
Returns:
|
222
|
+
List of chunk lists for each text
|
223
|
+
|
224
|
+
Raises:
|
225
|
+
ValueError: If chunks cannot be extracted or is in old format
|
226
|
+
"""
|
227
|
+
data = self.extract_embedding_data(result)
|
228
|
+
return [item["chunks"] for item in data]
|
229
|
+
|
230
|
+
async def __aenter__(self):
|
231
|
+
try:
|
232
|
+
# Create session with timeout configuration
|
233
|
+
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
234
|
+
self._session = aiohttp.ClientSession(timeout=timeout)
|
235
|
+
return self
|
236
|
+
except Exception as e:
|
237
|
+
raise EmbeddingServiceError(f"Failed to create HTTP session: {e}") from e
|
238
|
+
|
239
|
+
async def __aexit__(self, exc_type, exc, tb):
|
240
|
+
if self._session:
|
241
|
+
try:
|
242
|
+
await self._session.close()
|
243
|
+
except Exception as e:
|
244
|
+
raise EmbeddingServiceError(f"Failed to close HTTP session: {e}") from e
|
245
|
+
finally:
|
246
|
+
self._session = None
|
247
|
+
|
248
|
+
async def _parse_json_response(self, resp: aiohttp.ClientResponse) -> Dict[str, Any]:
|
249
|
+
"""
|
250
|
+
Parse JSON response with proper error handling.
|
251
|
+
|
252
|
+
Args:
|
253
|
+
resp: aiohttp response object
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
dict: Parsed JSON data
|
257
|
+
|
258
|
+
Raises:
|
259
|
+
EmbeddingServiceJSONError: If JSON parsing fails
|
260
|
+
"""
|
261
|
+
try:
|
262
|
+
return await resp.json()
|
263
|
+
except json.JSONDecodeError as e:
|
264
|
+
try:
|
265
|
+
text = await resp.text()
|
266
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}. Response text: {text[:500]}...") from e
|
267
|
+
except Exception as text_error:
|
268
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}. Failed to get response text: {text_error}") from e
|
269
|
+
except UnicodeDecodeError as e:
|
270
|
+
raise EmbeddingServiceJSONError(f"Unicode decode error in response: {e}") from e
|
271
|
+
except Exception as e:
|
272
|
+
raise EmbeddingServiceJSONError(f"Unexpected error parsing JSON: {e}") from e
|
273
|
+
|
274
|
+
async def health(self, base_url: Optional[str] = None, port: Optional[int] = None) -> Dict[str, Any]:
|
275
|
+
"""
|
276
|
+
Check the health of the service.
|
277
|
+
Args:
|
278
|
+
base_url (str, optional): Override base URL.
|
279
|
+
port (int, optional): Override port.
|
280
|
+
Returns:
|
281
|
+
dict: Health status and model info.
|
282
|
+
"""
|
283
|
+
url = self._make_url("/health", base_url, port)
|
284
|
+
try:
|
285
|
+
async with self._session.get(url, timeout=self.timeout) as resp:
|
286
|
+
await self._raise_for_status(resp)
|
287
|
+
try:
|
288
|
+
data = await resp.json()
|
289
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
290
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
291
|
+
if "error" in data:
|
292
|
+
raise EmbeddingServiceAPIError(data["error"])
|
293
|
+
return data
|
294
|
+
except EmbeddingServiceHTTPError:
|
295
|
+
raise
|
296
|
+
except EmbeddingServiceConnectionError:
|
297
|
+
raise
|
298
|
+
except EmbeddingServiceJSONError:
|
299
|
+
raise
|
300
|
+
except EmbeddingServiceTimeoutError:
|
301
|
+
raise
|
302
|
+
except aiohttp.ClientConnectionError as e:
|
303
|
+
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
304
|
+
except aiohttp.ClientResponseError as e:
|
305
|
+
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
306
|
+
except asyncio.TimeoutError as e:
|
307
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
308
|
+
except aiohttp.ServerTimeoutError as e:
|
309
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
310
|
+
except aiohttp.ClientSSLError as e:
|
311
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
312
|
+
except aiohttp.ClientOSError as e:
|
313
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
314
|
+
except Exception as e:
|
315
|
+
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
316
|
+
|
317
|
+
async def get_openapi_schema(self, base_url: Optional[str] = None, port: Optional[int] = None) -> Dict[str, Any]:
|
318
|
+
"""
|
319
|
+
Get the OpenAPI schema of the service.
|
320
|
+
Args:
|
321
|
+
base_url (str, optional): Override base URL.
|
322
|
+
port (int, optional): Override port.
|
323
|
+
Returns:
|
324
|
+
dict: OpenAPI schema.
|
325
|
+
"""
|
326
|
+
url = self._make_url("/openapi.json", base_url, port)
|
327
|
+
try:
|
328
|
+
async with self._session.get(url, timeout=self.timeout) as resp:
|
329
|
+
await self._raise_for_status(resp)
|
330
|
+
try:
|
331
|
+
data = await resp.json()
|
332
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
333
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
334
|
+
if "error" in data:
|
335
|
+
raise EmbeddingServiceAPIError(data["error"])
|
336
|
+
return data
|
337
|
+
except EmbeddingServiceHTTPError:
|
338
|
+
raise
|
339
|
+
except EmbeddingServiceConnectionError:
|
340
|
+
raise
|
341
|
+
except EmbeddingServiceJSONError:
|
342
|
+
raise
|
343
|
+
except EmbeddingServiceTimeoutError:
|
344
|
+
raise
|
345
|
+
except aiohttp.ClientConnectionError as e:
|
346
|
+
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
347
|
+
except aiohttp.ClientResponseError as e:
|
348
|
+
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
349
|
+
except asyncio.TimeoutError as e:
|
350
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
351
|
+
except aiohttp.ServerTimeoutError as e:
|
352
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
353
|
+
except aiohttp.ClientSSLError as e:
|
354
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
355
|
+
except aiohttp.ClientOSError as e:
|
356
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
357
|
+
except Exception as e:
|
358
|
+
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
359
|
+
|
360
|
+
async def get_commands(self, base_url: Optional[str] = None, port: Optional[int] = None) -> Dict[str, Any]:
|
361
|
+
"""
|
362
|
+
Get the list of available commands.
|
363
|
+
Args:
|
364
|
+
base_url (str, optional): Override base URL.
|
365
|
+
port (int, optional): Override port.
|
366
|
+
Returns:
|
367
|
+
dict: List of commands and their descriptions.
|
368
|
+
"""
|
369
|
+
url = self._make_url("/api/commands", base_url, port)
|
370
|
+
try:
|
371
|
+
async with self._session.get(url, timeout=self.timeout) as resp:
|
372
|
+
await self._raise_for_status(resp)
|
373
|
+
try:
|
374
|
+
data = await resp.json()
|
375
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
376
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
377
|
+
if "error" in data:
|
378
|
+
raise EmbeddingServiceAPIError(data["error"])
|
379
|
+
return data
|
380
|
+
except EmbeddingServiceHTTPError:
|
381
|
+
raise
|
382
|
+
except EmbeddingServiceConnectionError:
|
383
|
+
raise
|
384
|
+
except EmbeddingServiceJSONError:
|
385
|
+
raise
|
386
|
+
except EmbeddingServiceTimeoutError:
|
387
|
+
raise
|
388
|
+
except aiohttp.ClientConnectionError as e:
|
389
|
+
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
390
|
+
except aiohttp.ClientResponseError as e:
|
391
|
+
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
392
|
+
except asyncio.TimeoutError as e:
|
393
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
394
|
+
except aiohttp.ServerTimeoutError as e:
|
395
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
396
|
+
except aiohttp.ClientSSLError as e:
|
397
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
398
|
+
except aiohttp.ClientOSError as e:
|
399
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
400
|
+
except Exception as e:
|
401
|
+
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
402
|
+
|
403
|
+
def _validate_texts(self, texts: List[str]) -> None:
|
404
|
+
"""
|
405
|
+
Validate input texts before sending to the API.
|
406
|
+
Args:
|
407
|
+
texts (List[str]): List of texts to validate
|
408
|
+
Raises:
|
409
|
+
EmbeddingServiceAPIError: If texts are invalid
|
410
|
+
"""
|
411
|
+
if not texts:
|
412
|
+
raise EmbeddingServiceAPIError({
|
413
|
+
"code": -32602,
|
414
|
+
"message": "Empty texts list provided"
|
415
|
+
})
|
416
|
+
|
417
|
+
invalid_texts = []
|
418
|
+
for i, text in enumerate(texts):
|
419
|
+
if not isinstance(text, str):
|
420
|
+
invalid_texts.append(f"Text at index {i} is not a string")
|
421
|
+
continue
|
422
|
+
if not text or not text.strip():
|
423
|
+
invalid_texts.append(f"Text at index {i} is empty or contains only whitespace")
|
424
|
+
elif len(text.strip()) < 2: # Минимальная длина текста
|
425
|
+
invalid_texts.append(f"Text at index {i} is too short (minimum 2 characters)")
|
426
|
+
|
427
|
+
if invalid_texts:
|
428
|
+
raise EmbeddingServiceAPIError({
|
429
|
+
"code": -32602,
|
430
|
+
"message": "Invalid input texts",
|
431
|
+
"details": invalid_texts
|
432
|
+
})
|
433
|
+
|
434
|
+
async def cmd(self, command: str, params: Optional[Dict[str, Any]] = None, base_url: Optional[str] = None, port: Optional[int] = None) -> Dict[str, Any]:
|
435
|
+
"""
|
436
|
+
Execute a command via JSON-RPC protocol.
|
437
|
+
Args:
|
438
|
+
command (str): Command to execute (embed, models, health, help, config).
|
439
|
+
params (dict, optional): Parameters for the command.
|
440
|
+
base_url (str, optional): Override base URL.
|
441
|
+
port (int, optional): Override port.
|
442
|
+
Returns:
|
443
|
+
dict: Command execution result or error response in format:
|
444
|
+
{
|
445
|
+
"error": {
|
446
|
+
"code": <код ошибки>,
|
447
|
+
"message": <сообщение об ошибке>,
|
448
|
+
"details": <опциональные детали ошибки>
|
449
|
+
}
|
450
|
+
}
|
451
|
+
или
|
452
|
+
{
|
453
|
+
"result": {
|
454
|
+
"success": true,
|
455
|
+
"data": {
|
456
|
+
"embeddings": [[...], ...]
|
457
|
+
}
|
458
|
+
}
|
459
|
+
}
|
460
|
+
"""
|
461
|
+
if not command:
|
462
|
+
raise EmbeddingServiceAPIError({
|
463
|
+
"code": -32602,
|
464
|
+
"message": "Command is required"
|
465
|
+
})
|
466
|
+
|
467
|
+
# Валидация текстов для команды embed
|
468
|
+
if command == "embed" and params and "texts" in params:
|
469
|
+
self._validate_texts(params["texts"])
|
470
|
+
|
471
|
+
logger = logging.getLogger('EmbeddingServiceAsyncClient.cmd')
|
472
|
+
url = self._make_url("/cmd", base_url, port)
|
473
|
+
payload = {"command": command}
|
474
|
+
if params is not None:
|
475
|
+
payload["params"] = params
|
476
|
+
logger.info(f"Sending embedding command: url={url}, payload={payload}")
|
477
|
+
try:
|
478
|
+
async with self._session.post(url, json=payload, timeout=self.timeout) as resp:
|
479
|
+
logger.info(f"Embedding service HTTP status: {resp.status}")
|
480
|
+
await self._raise_for_status(resp)
|
481
|
+
try:
|
482
|
+
resp_json = await resp.json()
|
483
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
484
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
485
|
+
logger.info(f"Embedding service response: {str(resp_json)[:300]}")
|
486
|
+
# Обработка ошибок API
|
487
|
+
if "error" in resp_json:
|
488
|
+
raise EmbeddingServiceAPIError(resp_json["error"])
|
489
|
+
if "result" in resp_json:
|
490
|
+
result = resp_json["result"]
|
491
|
+
if isinstance(result, dict) and (result.get("success") is False or "error" in result):
|
492
|
+
raise EmbeddingServiceAPIError(result.get("error", result))
|
493
|
+
return resp_json
|
494
|
+
except EmbeddingServiceAPIError:
|
495
|
+
raise
|
496
|
+
except EmbeddingServiceHTTPError:
|
497
|
+
raise
|
498
|
+
except EmbeddingServiceConnectionError:
|
499
|
+
raise
|
500
|
+
except EmbeddingServiceJSONError:
|
501
|
+
raise
|
502
|
+
except EmbeddingServiceTimeoutError:
|
503
|
+
raise
|
504
|
+
except aiohttp.ServerTimeoutError as e:
|
505
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
506
|
+
except aiohttp.ClientConnectionError as e:
|
507
|
+
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
508
|
+
except aiohttp.ClientResponseError as e:
|
509
|
+
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
510
|
+
except asyncio.TimeoutError as e:
|
511
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
512
|
+
except aiohttp.ClientSSLError as e:
|
513
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
514
|
+
except aiohttp.ClientOSError as e:
|
515
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
516
|
+
except Exception as e:
|
517
|
+
logger.error(f"Error in embedding cmd: {e}", exc_info=True)
|
518
|
+
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
519
|
+
|
520
|
+
async def _raise_for_status(self, resp: aiohttp.ClientResponse):
|
521
|
+
try:
|
522
|
+
resp.raise_for_status()
|
523
|
+
except aiohttp.ClientResponseError as e:
|
524
|
+
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
525
|
+
|
526
|
+
async def close(self) -> None:
|
527
|
+
"""
|
528
|
+
Close the underlying HTTP session explicitly.
|
529
|
+
|
530
|
+
This method allows the user to manually close the aiohttp.ClientSession used by the client.
|
531
|
+
It is safe to call multiple times; if the session is already closed or was never opened, nothing happens.
|
532
|
+
|
533
|
+
Raises:
|
534
|
+
EmbeddingServiceError: If closing the session fails.
|
535
|
+
"""
|
536
|
+
if self._session:
|
537
|
+
try:
|
538
|
+
await self._session.close()
|
539
|
+
except Exception as e:
|
540
|
+
raise EmbeddingServiceError(f"Failed to close HTTP session: {e}") from e
|
541
|
+
finally:
|
542
|
+
self._session = None
|
543
|
+
|
544
|
+
# TODO: Add methods for /cmd, /api/commands, etc.
|