embed-client 1.0.0.1__py3-none-any.whl → 2.0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- embed_client/async_client.py +387 -46
- embed_client/example_async_usage.py +128 -32
- embed_client/example_async_usage_ru.py +42 -8
- {embed_client-1.0.0.1.dist-info → embed_client-2.0.0.0.dist-info}/METADATA +1 -1
- embed_client-2.0.0.0.dist-info/RECORD +8 -0
- embed_client-1.0.0.1.dist-info/RECORD +0 -8
- {embed_client-1.0.0.1.dist-info → embed_client-2.0.0.0.dist-info}/WHEEL +0 -0
- {embed_client-1.0.0.1.dist-info → embed_client-2.0.0.0.dist-info}/top_level.txt +0 -0
embed_client/async_client.py
CHANGED
@@ -4,11 +4,15 @@ Async client for Embedding Service API (OpenAPI 3.0.2)
|
|
4
4
|
- 100% type-annotated
|
5
5
|
- English docstrings and examples
|
6
6
|
- Ready for PyPi
|
7
|
+
- Supports new API format with body, embedding, and chunks
|
7
8
|
"""
|
8
9
|
|
9
10
|
from typing import Any, Dict, List, Optional, Union
|
10
11
|
import aiohttp
|
12
|
+
import asyncio
|
11
13
|
import os
|
14
|
+
import json
|
15
|
+
import logging
|
12
16
|
|
13
17
|
class EmbeddingServiceError(Exception):
|
14
18
|
"""Base exception for EmbeddingServiceAsyncClient."""
|
@@ -29,28 +33,73 @@ class EmbeddingServiceAPIError(EmbeddingServiceError):
|
|
29
33
|
super().__init__(f"API error: {error}")
|
30
34
|
self.error = error
|
31
35
|
|
36
|
+
class EmbeddingServiceConfigError(EmbeddingServiceError):
|
37
|
+
"""Raised for configuration errors (invalid base_url, port, etc.)."""
|
38
|
+
|
39
|
+
class EmbeddingServiceTimeoutError(EmbeddingServiceError):
|
40
|
+
"""Raised when request times out."""
|
41
|
+
|
42
|
+
class EmbeddingServiceJSONError(EmbeddingServiceError):
|
43
|
+
"""Raised when JSON parsing fails."""
|
44
|
+
|
32
45
|
class EmbeddingServiceAsyncClient:
|
33
46
|
"""
|
34
47
|
Asynchronous client for the Embedding Service API.
|
48
|
+
|
49
|
+
Supports both old and new API formats:
|
50
|
+
- Old format: {"result": {"success": true, "data": {"embeddings": [...]}}}
|
51
|
+
- New format: {"result": {"success": true, "data": {"embeddings": [...], "results": [{"body": "text", "embedding": [...], "tokens": [...], "bm25_tokens": [...]}]}}}
|
52
|
+
|
35
53
|
Args:
|
36
54
|
base_url (str): Base URL of the embedding service (e.g., "http://localhost").
|
37
55
|
port (int): Port of the embedding service (e.g., 8001).
|
56
|
+
timeout (float): Request timeout in seconds (default: 30).
|
38
57
|
Raises:
|
39
|
-
|
58
|
+
EmbeddingServiceConfigError: If base_url or port is invalid.
|
40
59
|
"""
|
41
|
-
def __init__(self, base_url: Optional[str] = None, port: Optional[int] = None):
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
60
|
+
def __init__(self, base_url: Optional[str] = None, port: Optional[int] = None, timeout: float = 30.0):
|
61
|
+
# Validate and set base_url
|
62
|
+
try:
|
63
|
+
self.base_url = base_url or os.getenv("EMBEDDING_SERVICE_BASE_URL", "http://localhost")
|
64
|
+
if not self.base_url:
|
65
|
+
raise EmbeddingServiceConfigError("base_url must be provided.")
|
66
|
+
if not isinstance(self.base_url, str):
|
67
|
+
raise EmbeddingServiceConfigError("base_url must be a string.")
|
68
|
+
|
69
|
+
# Validate URL format
|
70
|
+
if not (self.base_url.startswith("http://") or self.base_url.startswith("https://")):
|
71
|
+
raise EmbeddingServiceConfigError("base_url must start with http:// or https://")
|
72
|
+
except (TypeError, AttributeError) as e:
|
73
|
+
raise EmbeddingServiceConfigError(f"Invalid base_url configuration: {e}") from e
|
74
|
+
|
75
|
+
# Validate and set port
|
76
|
+
try:
|
77
|
+
port_env = os.getenv("EMBEDDING_SERVICE_PORT", "8001")
|
78
|
+
self.port = port if port is not None else int(port_env)
|
79
|
+
if self.port is None:
|
80
|
+
raise EmbeddingServiceConfigError("port must be provided.")
|
81
|
+
if not isinstance(self.port, int) or self.port <= 0 or self.port > 65535:
|
82
|
+
raise EmbeddingServiceConfigError("port must be a valid integer between 1 and 65535.")
|
83
|
+
except (ValueError, TypeError) as e:
|
84
|
+
raise EmbeddingServiceConfigError(f"Invalid port configuration: {e}") from e
|
85
|
+
|
86
|
+
# Validate timeout
|
87
|
+
try:
|
88
|
+
self.timeout = float(timeout)
|
89
|
+
if self.timeout <= 0:
|
90
|
+
raise EmbeddingServiceConfigError("timeout must be positive.")
|
91
|
+
except (ValueError, TypeError) as e:
|
92
|
+
raise EmbeddingServiceConfigError(f"Invalid timeout configuration: {e}") from e
|
93
|
+
|
48
94
|
self._session: Optional[aiohttp.ClientSession] = None
|
49
95
|
|
50
96
|
def _make_url(self, path: str, base_url: Optional[str] = None, port: Optional[int] = None) -> str:
|
51
|
-
|
52
|
-
|
53
|
-
|
97
|
+
try:
|
98
|
+
url = (base_url or self.base_url).rstrip("/")
|
99
|
+
port_val = port if port is not None else self.port
|
100
|
+
return f"{url}:{port_val}{path}"
|
101
|
+
except Exception as e:
|
102
|
+
raise EmbeddingServiceConfigError(f"Failed to construct URL: {e}") from e
|
54
103
|
|
55
104
|
def _format_error_response(self, error: str, lang: Optional[str] = None, text: Optional[str] = None) -> Dict[str, Any]:
|
56
105
|
"""
|
@@ -69,14 +118,221 @@ class EmbeddingServiceAsyncClient:
|
|
69
118
|
response["text"] = text
|
70
119
|
return response
|
71
120
|
|
121
|
+
def extract_embeddings(self, result: Dict[str, Any]) -> List[List[float]]:
|
122
|
+
"""
|
123
|
+
Extract embeddings from API response, supporting both old and new formats.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
result: API response dictionary
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
List of embedding vectors (list of lists of floats)
|
130
|
+
|
131
|
+
Raises:
|
132
|
+
ValueError: If embeddings cannot be extracted from the response
|
133
|
+
"""
|
134
|
+
# Handle direct embeddings field (old format compatibility)
|
135
|
+
if "embeddings" in result:
|
136
|
+
return result["embeddings"]
|
137
|
+
|
138
|
+
# Handle result wrapper
|
139
|
+
if "result" in result:
|
140
|
+
res = result["result"]
|
141
|
+
|
142
|
+
# Handle direct list in result (old format)
|
143
|
+
if isinstance(res, list):
|
144
|
+
return res
|
145
|
+
|
146
|
+
if isinstance(res, dict):
|
147
|
+
# Handle old format: result.embeddings
|
148
|
+
if "embeddings" in res:
|
149
|
+
return res["embeddings"]
|
150
|
+
|
151
|
+
# Handle old format: result.data.embeddings
|
152
|
+
if "data" in res and isinstance(res["data"], dict) and "embeddings" in res["data"]:
|
153
|
+
return res["data"]["embeddings"]
|
154
|
+
|
155
|
+
# Handle new format: result.data[].embedding
|
156
|
+
if "data" in res and isinstance(res["data"], list):
|
157
|
+
embeddings = []
|
158
|
+
for item in res["data"]:
|
159
|
+
if isinstance(item, dict) and "embedding" in item:
|
160
|
+
embeddings.append(item["embedding"])
|
161
|
+
else:
|
162
|
+
raise ValueError(f"Invalid item format in new API response: {item}")
|
163
|
+
return embeddings
|
164
|
+
|
165
|
+
raise ValueError(f"Cannot extract embeddings from response: {result}")
|
166
|
+
|
167
|
+
def extract_embedding_data(self, result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
168
|
+
"""
|
169
|
+
Extract full embedding data from API response (new format only).
|
170
|
+
|
171
|
+
Args:
|
172
|
+
result: API response dictionary
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
List of dictionaries with 'body', 'embedding', 'tokens', and 'bm25_tokens' fields
|
176
|
+
|
177
|
+
Raises:
|
178
|
+
ValueError: If data cannot be extracted or is in old format
|
179
|
+
"""
|
180
|
+
if "result" in result and isinstance(result["result"], dict):
|
181
|
+
res = result["result"]
|
182
|
+
if "data" in res and isinstance(res["data"], dict) and "results" in res["data"]:
|
183
|
+
# New format: result.data.results[]
|
184
|
+
results = res["data"]["results"]
|
185
|
+
if isinstance(results, list):
|
186
|
+
# Validate that all items have required fields
|
187
|
+
for i, item in enumerate(results):
|
188
|
+
if not isinstance(item, dict):
|
189
|
+
raise ValueError(f"Item {i} is not a dictionary: {item}")
|
190
|
+
if "body" not in item:
|
191
|
+
raise ValueError(f"Item {i} missing 'body' field: {item}")
|
192
|
+
if "embedding" not in item:
|
193
|
+
raise ValueError(f"Item {i} missing 'embedding' field: {item}")
|
194
|
+
if "tokens" not in item:
|
195
|
+
raise ValueError(f"Item {i} missing 'tokens' field: {item}")
|
196
|
+
if "bm25_tokens" not in item:
|
197
|
+
raise ValueError(f"Item {i} missing 'bm25_tokens' field: {item}")
|
198
|
+
|
199
|
+
return results
|
200
|
+
|
201
|
+
# Legacy support for old format: result.data[]
|
202
|
+
if "data" in res and isinstance(res["data"], list):
|
203
|
+
# Validate that all items have required fields
|
204
|
+
for i, item in enumerate(res["data"]):
|
205
|
+
if not isinstance(item, dict):
|
206
|
+
raise ValueError(f"Item {i} is not a dictionary: {item}")
|
207
|
+
if "body" not in item:
|
208
|
+
raise ValueError(f"Item {i} missing 'body' field: {item}")
|
209
|
+
if "embedding" not in item:
|
210
|
+
raise ValueError(f"Item {i} missing 'embedding' field: {item}")
|
211
|
+
# Old format had 'chunks' instead of 'tokens'
|
212
|
+
if "chunks" not in item and "tokens" not in item:
|
213
|
+
raise ValueError(f"Item {i} missing 'chunks' or 'tokens' field: {item}")
|
214
|
+
|
215
|
+
return res["data"]
|
216
|
+
|
217
|
+
raise ValueError(f"Cannot extract embedding data from response (new format required): {result}")
|
218
|
+
|
219
|
+
def extract_texts(self, result: Dict[str, Any]) -> List[str]:
|
220
|
+
"""
|
221
|
+
Extract original texts from API response (new format only).
|
222
|
+
|
223
|
+
Args:
|
224
|
+
result: API response dictionary
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
List of original text strings
|
228
|
+
|
229
|
+
Raises:
|
230
|
+
ValueError: If texts cannot be extracted or is in old format
|
231
|
+
"""
|
232
|
+
data = self.extract_embedding_data(result)
|
233
|
+
return [item["body"] for item in data]
|
234
|
+
|
235
|
+
def extract_chunks(self, result: Dict[str, Any]) -> List[List[str]]:
|
236
|
+
"""
|
237
|
+
Extract text chunks from API response (new format only).
|
238
|
+
Note: This method now extracts 'tokens' instead of 'chunks' for compatibility.
|
239
|
+
|
240
|
+
Args:
|
241
|
+
result: API response dictionary
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
List of token lists for each text
|
245
|
+
|
246
|
+
Raises:
|
247
|
+
ValueError: If chunks cannot be extracted or is in old format
|
248
|
+
"""
|
249
|
+
data = self.extract_embedding_data(result)
|
250
|
+
chunks = []
|
251
|
+
for item in data:
|
252
|
+
# New format uses 'tokens', old format used 'chunks'
|
253
|
+
if "tokens" in item:
|
254
|
+
chunks.append(item["tokens"])
|
255
|
+
elif "chunks" in item:
|
256
|
+
chunks.append(item["chunks"])
|
257
|
+
else:
|
258
|
+
raise ValueError(f"Item missing both 'tokens' and 'chunks' fields: {item}")
|
259
|
+
return chunks
|
260
|
+
|
261
|
+
def extract_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
|
262
|
+
"""
|
263
|
+
Extract tokens from API response (new format only).
|
264
|
+
|
265
|
+
Args:
|
266
|
+
result: API response dictionary
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
List of token lists for each text
|
270
|
+
|
271
|
+
Raises:
|
272
|
+
ValueError: If tokens cannot be extracted or is in old format
|
273
|
+
"""
|
274
|
+
data = self.extract_embedding_data(result)
|
275
|
+
return [item["tokens"] for item in data]
|
276
|
+
|
277
|
+
def extract_bm25_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
|
278
|
+
"""
|
279
|
+
Extract BM25 tokens from API response (new format only).
|
280
|
+
|
281
|
+
Args:
|
282
|
+
result: API response dictionary
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
List of BM25 token lists for each text
|
286
|
+
|
287
|
+
Raises:
|
288
|
+
ValueError: If BM25 tokens cannot be extracted or is in old format
|
289
|
+
"""
|
290
|
+
data = self.extract_embedding_data(result)
|
291
|
+
return [item["bm25_tokens"] for item in data]
|
292
|
+
|
72
293
|
async def __aenter__(self):
|
73
|
-
|
74
|
-
|
294
|
+
try:
|
295
|
+
# Create session with timeout configuration
|
296
|
+
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
297
|
+
self._session = aiohttp.ClientSession(timeout=timeout)
|
298
|
+
return self
|
299
|
+
except Exception as e:
|
300
|
+
raise EmbeddingServiceError(f"Failed to create HTTP session: {e}") from e
|
75
301
|
|
76
302
|
async def __aexit__(self, exc_type, exc, tb):
|
77
303
|
if self._session:
|
78
|
-
|
79
|
-
|
304
|
+
try:
|
305
|
+
await self._session.close()
|
306
|
+
except Exception as e:
|
307
|
+
raise EmbeddingServiceError(f"Failed to close HTTP session: {e}") from e
|
308
|
+
finally:
|
309
|
+
self._session = None
|
310
|
+
|
311
|
+
async def _parse_json_response(self, resp: aiohttp.ClientResponse) -> Dict[str, Any]:
|
312
|
+
"""
|
313
|
+
Parse JSON response with proper error handling.
|
314
|
+
|
315
|
+
Args:
|
316
|
+
resp: aiohttp response object
|
317
|
+
|
318
|
+
Returns:
|
319
|
+
dict: Parsed JSON data
|
320
|
+
|
321
|
+
Raises:
|
322
|
+
EmbeddingServiceJSONError: If JSON parsing fails
|
323
|
+
"""
|
324
|
+
try:
|
325
|
+
return await resp.json()
|
326
|
+
except json.JSONDecodeError as e:
|
327
|
+
try:
|
328
|
+
text = await resp.text()
|
329
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}. Response text: {text[:500]}...") from e
|
330
|
+
except Exception as text_error:
|
331
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}. Failed to get response text: {text_error}") from e
|
332
|
+
except UnicodeDecodeError as e:
|
333
|
+
raise EmbeddingServiceJSONError(f"Unicode decode error in response: {e}") from e
|
334
|
+
except Exception as e:
|
335
|
+
raise EmbeddingServiceJSONError(f"Unexpected error parsing JSON: {e}") from e
|
80
336
|
|
81
337
|
async def health(self, base_url: Optional[str] = None, port: Optional[int] = None) -> Dict[str, Any]:
|
82
338
|
"""
|
@@ -89,17 +345,35 @@ class EmbeddingServiceAsyncClient:
|
|
89
345
|
"""
|
90
346
|
url = self._make_url("/health", base_url, port)
|
91
347
|
try:
|
92
|
-
async with self._session.get(url) as resp:
|
348
|
+
async with self._session.get(url, timeout=self.timeout) as resp:
|
93
349
|
await self._raise_for_status(resp)
|
94
|
-
|
350
|
+
try:
|
351
|
+
data = await resp.json()
|
352
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
353
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
354
|
+
if "error" in data:
|
355
|
+
raise EmbeddingServiceAPIError(data["error"])
|
356
|
+
return data
|
95
357
|
except EmbeddingServiceHTTPError:
|
96
358
|
raise
|
97
359
|
except EmbeddingServiceConnectionError:
|
98
360
|
raise
|
361
|
+
except EmbeddingServiceJSONError:
|
362
|
+
raise
|
363
|
+
except EmbeddingServiceTimeoutError:
|
364
|
+
raise
|
99
365
|
except aiohttp.ClientConnectionError as e:
|
100
366
|
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
101
367
|
except aiohttp.ClientResponseError as e:
|
102
368
|
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
369
|
+
except asyncio.TimeoutError as e:
|
370
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
371
|
+
except aiohttp.ServerTimeoutError as e:
|
372
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
373
|
+
except aiohttp.ClientSSLError as e:
|
374
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
375
|
+
except aiohttp.ClientOSError as e:
|
376
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
103
377
|
except Exception as e:
|
104
378
|
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
105
379
|
|
@@ -114,17 +388,35 @@ class EmbeddingServiceAsyncClient:
|
|
114
388
|
"""
|
115
389
|
url = self._make_url("/openapi.json", base_url, port)
|
116
390
|
try:
|
117
|
-
async with self._session.get(url) as resp:
|
391
|
+
async with self._session.get(url, timeout=self.timeout) as resp:
|
118
392
|
await self._raise_for_status(resp)
|
119
|
-
|
393
|
+
try:
|
394
|
+
data = await resp.json()
|
395
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
396
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
397
|
+
if "error" in data:
|
398
|
+
raise EmbeddingServiceAPIError(data["error"])
|
399
|
+
return data
|
120
400
|
except EmbeddingServiceHTTPError:
|
121
401
|
raise
|
122
402
|
except EmbeddingServiceConnectionError:
|
123
403
|
raise
|
404
|
+
except EmbeddingServiceJSONError:
|
405
|
+
raise
|
406
|
+
except EmbeddingServiceTimeoutError:
|
407
|
+
raise
|
124
408
|
except aiohttp.ClientConnectionError as e:
|
125
409
|
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
126
410
|
except aiohttp.ClientResponseError as e:
|
127
411
|
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
412
|
+
except asyncio.TimeoutError as e:
|
413
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
414
|
+
except aiohttp.ServerTimeoutError as e:
|
415
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
416
|
+
except aiohttp.ClientSSLError as e:
|
417
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
418
|
+
except aiohttp.ClientOSError as e:
|
419
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
128
420
|
except Exception as e:
|
129
421
|
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
130
422
|
|
@@ -139,17 +431,35 @@ class EmbeddingServiceAsyncClient:
|
|
139
431
|
"""
|
140
432
|
url = self._make_url("/api/commands", base_url, port)
|
141
433
|
try:
|
142
|
-
async with self._session.get(url) as resp:
|
434
|
+
async with self._session.get(url, timeout=self.timeout) as resp:
|
143
435
|
await self._raise_for_status(resp)
|
144
|
-
|
436
|
+
try:
|
437
|
+
data = await resp.json()
|
438
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
439
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
440
|
+
if "error" in data:
|
441
|
+
raise EmbeddingServiceAPIError(data["error"])
|
442
|
+
return data
|
145
443
|
except EmbeddingServiceHTTPError:
|
146
444
|
raise
|
147
445
|
except EmbeddingServiceConnectionError:
|
148
446
|
raise
|
447
|
+
except EmbeddingServiceJSONError:
|
448
|
+
raise
|
449
|
+
except EmbeddingServiceTimeoutError:
|
450
|
+
raise
|
149
451
|
except aiohttp.ClientConnectionError as e:
|
150
452
|
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
151
453
|
except aiohttp.ClientResponseError as e:
|
152
454
|
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
455
|
+
except asyncio.TimeoutError as e:
|
456
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
457
|
+
except aiohttp.ServerTimeoutError as e:
|
458
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
459
|
+
except aiohttp.ClientSSLError as e:
|
460
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
461
|
+
except aiohttp.ClientOSError as e:
|
462
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
153
463
|
except Exception as e:
|
154
464
|
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
155
465
|
|
@@ -221,41 +531,54 @@ class EmbeddingServiceAsyncClient:
|
|
221
531
|
if command == "embed" and params and "texts" in params:
|
222
532
|
self._validate_texts(params["texts"])
|
223
533
|
|
534
|
+
logger = logging.getLogger('EmbeddingServiceAsyncClient.cmd')
|
224
535
|
url = self._make_url("/cmd", base_url, port)
|
225
536
|
payload = {"command": command}
|
226
537
|
if params is not None:
|
227
538
|
payload["params"] = params
|
228
|
-
|
539
|
+
logger.info(f"Sending embedding command: url={url}, payload={payload}")
|
229
540
|
try:
|
230
|
-
async with self._session.post(url, json=payload) as resp:
|
541
|
+
async with self._session.post(url, json=payload, timeout=self.timeout) as resp:
|
542
|
+
logger.info(f"Embedding service HTTP status: {resp.status}")
|
231
543
|
await self._raise_for_status(resp)
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
raise
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
544
|
+
try:
|
545
|
+
resp_json = await resp.json()
|
546
|
+
except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
|
547
|
+
raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
|
548
|
+
logger.info(f"Embedding service response: {str(resp_json)[:300]}")
|
549
|
+
# Обработка ошибок API
|
550
|
+
if "error" in resp_json:
|
551
|
+
raise EmbeddingServiceAPIError(resp_json["error"])
|
552
|
+
if "result" in resp_json:
|
553
|
+
result = resp_json["result"]
|
554
|
+
if isinstance(result, dict) and (result.get("success") is False or "error" in result):
|
555
|
+
raise EmbeddingServiceAPIError(result.get("error", result))
|
556
|
+
return resp_json
|
557
|
+
except EmbeddingServiceAPIError:
|
558
|
+
raise
|
559
|
+
except EmbeddingServiceHTTPError:
|
560
|
+
raise
|
561
|
+
except EmbeddingServiceConnectionError:
|
562
|
+
raise
|
563
|
+
except EmbeddingServiceJSONError:
|
564
|
+
raise
|
565
|
+
except EmbeddingServiceTimeoutError:
|
566
|
+
raise
|
567
|
+
except aiohttp.ServerTimeoutError as e:
|
568
|
+
raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
|
245
569
|
except aiohttp.ClientConnectionError as e:
|
246
|
-
raise
|
247
|
-
"code": -32000,
|
248
|
-
"message": f"Connection error: {e}"
|
249
|
-
}) from e
|
570
|
+
raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
|
250
571
|
except aiohttp.ClientResponseError as e:
|
251
572
|
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
252
|
-
except
|
253
|
-
raise
|
573
|
+
except asyncio.TimeoutError as e:
|
574
|
+
raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
|
575
|
+
except aiohttp.ClientSSLError as e:
|
576
|
+
raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
|
577
|
+
except aiohttp.ClientOSError as e:
|
578
|
+
raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
|
254
579
|
except Exception as e:
|
255
|
-
|
256
|
-
|
257
|
-
"message": f"Unexpected error: {e}"
|
258
|
-
}) from e
|
580
|
+
logger.error(f"Error in embedding cmd: {e}", exc_info=True)
|
581
|
+
raise EmbeddingServiceError(f"Unexpected error: {e}") from e
|
259
582
|
|
260
583
|
async def _raise_for_status(self, resp: aiohttp.ClientResponse):
|
261
584
|
try:
|
@@ -263,4 +586,22 @@ class EmbeddingServiceAsyncClient:
|
|
263
586
|
except aiohttp.ClientResponseError as e:
|
264
587
|
raise EmbeddingServiceHTTPError(e.status, e.message) from e
|
265
588
|
|
589
|
+
async def close(self) -> None:
|
590
|
+
"""
|
591
|
+
Close the underlying HTTP session explicitly.
|
592
|
+
|
593
|
+
This method allows the user to manually close the aiohttp.ClientSession used by the client.
|
594
|
+
It is safe to call multiple times; if the session is already closed or was never opened, nothing happens.
|
595
|
+
|
596
|
+
Raises:
|
597
|
+
EmbeddingServiceError: If closing the session fails.
|
598
|
+
"""
|
599
|
+
if self._session:
|
600
|
+
try:
|
601
|
+
await self._session.close()
|
602
|
+
except Exception as e:
|
603
|
+
raise EmbeddingServiceError(f"Failed to close HTTP session: {e}") from e
|
604
|
+
finally:
|
605
|
+
self._session = None
|
606
|
+
|
266
607
|
# TODO: Add methods for /cmd, /api/commands, etc.
|
@@ -1,19 +1,51 @@
|
|
1
1
|
"""
|
2
2
|
Example usage of EmbeddingServiceAsyncClient.
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
USAGE:
|
5
|
+
python embed_client/example_async_usage.py --base-url http://localhost --port 8001
|
6
|
+
# или
|
7
|
+
python -m asyncio embed_client/example_async_usage.py --base-url http://localhost --port 8001
|
8
|
+
|
9
|
+
# Можно также использовать переменные окружения:
|
10
|
+
export EMBED_CLIENT_BASE_URL=http://localhost
|
11
|
+
export EMBED_CLIENT_PORT=8001
|
12
|
+
python embed_client/example_async_usage.py
|
13
|
+
|
14
|
+
# ВАЖНО:
|
15
|
+
# --base-url и --port должны быть отдельными аргументами (через пробел),
|
16
|
+
# а не через = (НЕ --base_url=...)
|
17
|
+
# base_url должен содержать http:// или https://
|
6
18
|
|
7
|
-
|
19
|
+
EXAMPLES:
|
20
|
+
python embed_client/example_async_usage.py --base-url http://localhost --port 8001
|
8
21
|
python -m asyncio embed_client/example_async_usage.py --base-url http://localhost --port 8001
|
22
|
+
export EMBED_CLIENT_BASE_URL=http://localhost
|
23
|
+
export EMBED_CLIENT_PORT=8001
|
24
|
+
python embed_client/example_async_usage.py
|
9
25
|
|
10
|
-
|
26
|
+
Explicit session close example:
|
27
|
+
import asyncio
|
28
|
+
from embed_client.async_client import EmbeddingServiceAsyncClient
|
29
|
+
async def main():
|
30
|
+
client = EmbeddingServiceAsyncClient(base_url="http://localhost", port=8001)
|
31
|
+
# ... use client ...
|
32
|
+
await client.close() # Explicitly close session
|
33
|
+
asyncio.run(main())
|
11
34
|
"""
|
12
35
|
|
13
36
|
import asyncio
|
14
37
|
import sys
|
15
38
|
import os
|
16
|
-
from embed_client.async_client import
|
39
|
+
from embed_client.async_client import (
|
40
|
+
EmbeddingServiceAsyncClient,
|
41
|
+
EmbeddingServiceError,
|
42
|
+
EmbeddingServiceAPIError,
|
43
|
+
EmbeddingServiceHTTPError,
|
44
|
+
EmbeddingServiceConnectionError,
|
45
|
+
EmbeddingServiceTimeoutError,
|
46
|
+
EmbeddingServiceJSONError,
|
47
|
+
EmbeddingServiceConfigError
|
48
|
+
)
|
17
49
|
|
18
50
|
def get_params():
|
19
51
|
base_url = None
|
@@ -33,37 +65,101 @@ def get_params():
|
|
33
65
|
return None, None
|
34
66
|
return base_url, int(port)
|
35
67
|
|
68
|
+
def extract_vectors(result):
|
69
|
+
"""Extract embeddings from the API response, supporting both old and new formats."""
|
70
|
+
# Handle direct embeddings field (old format compatibility)
|
71
|
+
if "embeddings" in result:
|
72
|
+
return result["embeddings"]
|
73
|
+
|
74
|
+
# Handle result wrapper
|
75
|
+
if "result" in result:
|
76
|
+
res = result["result"]
|
77
|
+
|
78
|
+
# Handle direct list in result (old format)
|
79
|
+
if isinstance(res, list):
|
80
|
+
return res
|
81
|
+
|
82
|
+
if isinstance(res, dict):
|
83
|
+
# Handle old format: result.embeddings
|
84
|
+
if "embeddings" in res:
|
85
|
+
return res["embeddings"]
|
86
|
+
|
87
|
+
# Handle old format: result.data.embeddings
|
88
|
+
if "data" in res and isinstance(res["data"], dict) and "embeddings" in res["data"]:
|
89
|
+
return res["data"]["embeddings"]
|
90
|
+
|
91
|
+
# Handle new format: result.data[].embedding
|
92
|
+
if "data" in res and isinstance(res["data"], list):
|
93
|
+
embeddings = []
|
94
|
+
for item in res["data"]:
|
95
|
+
if isinstance(item, dict) and "embedding" in item:
|
96
|
+
embeddings.append(item["embedding"])
|
97
|
+
else:
|
98
|
+
raise ValueError(f"Invalid item format in new API response: {item}")
|
99
|
+
return embeddings
|
100
|
+
|
101
|
+
raise ValueError(f"Cannot extract embeddings from response: {result}")
|
102
|
+
|
36
103
|
async def main():
|
37
|
-
|
38
|
-
|
39
|
-
#
|
40
|
-
|
41
|
-
print("
|
104
|
+
try:
|
105
|
+
base_url, port = get_params()
|
106
|
+
# Explicit open/close example
|
107
|
+
client = EmbeddingServiceAsyncClient(base_url=base_url, port=port)
|
108
|
+
print("Explicit session open/close example:")
|
109
|
+
await client.close()
|
110
|
+
print("Session closed explicitly (manual close example).\n")
|
111
|
+
async with EmbeddingServiceAsyncClient(base_url=base_url, port=port) as client:
|
112
|
+
# Check health
|
113
|
+
try:
|
114
|
+
health = await client.health()
|
115
|
+
print("Service health:", health)
|
116
|
+
except EmbeddingServiceConnectionError as e:
|
117
|
+
print(f"Connection error during health check: {e}")
|
118
|
+
return
|
119
|
+
except EmbeddingServiceTimeoutError as e:
|
120
|
+
print(f"Timeout error during health check: {e}")
|
121
|
+
except EmbeddingServiceError as e:
|
122
|
+
print(f"Error during health check: {e}")
|
42
123
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
print(f"
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
124
|
+
# Request embeddings for a list of texts
|
125
|
+
texts = ["hello world", "test embedding"]
|
126
|
+
try:
|
127
|
+
result = await client.cmd("embed", params={"texts": texts})
|
128
|
+
vectors = extract_vectors(result)
|
129
|
+
print(f"Embeddings for {len(texts)} texts:")
|
130
|
+
for i, vec in enumerate(vectors):
|
131
|
+
print(f" Text: {texts[i]!r}\n Vector: {vec[:5]}... (total {len(vec)} dims)")
|
132
|
+
except EmbeddingServiceAPIError as e:
|
133
|
+
print(f"API error during embedding: {e}")
|
134
|
+
except EmbeddingServiceConnectionError as e:
|
135
|
+
print(f"Connection error during embedding: {e}")
|
136
|
+
except EmbeddingServiceTimeoutError as e:
|
137
|
+
print(f"Timeout error during embedding: {e}")
|
138
|
+
except EmbeddingServiceError as e:
|
139
|
+
print(f"Error during embedding: {e}")
|
140
|
+
|
141
|
+
# Example: health check via cmd
|
142
|
+
try:
|
143
|
+
result = await client.cmd("health")
|
144
|
+
print("Health check result:", result)
|
145
|
+
except EmbeddingServiceError as e:
|
146
|
+
print(f"Error during health command: {e}")
|
58
147
|
|
59
|
-
|
60
|
-
|
61
|
-
|
148
|
+
# Example: error handling for empty command
|
149
|
+
try:
|
150
|
+
result = await client.cmd("")
|
151
|
+
print("Empty command result:", result)
|
152
|
+
except EmbeddingServiceAPIError as e:
|
153
|
+
print(f"Expected error for empty command: {e}")
|
154
|
+
except EmbeddingServiceError as e:
|
155
|
+
print(f"Error for empty command: {e}")
|
62
156
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
157
|
+
except EmbeddingServiceConfigError as e:
|
158
|
+
print(f"Configuration error: {e}")
|
159
|
+
sys.exit(1)
|
160
|
+
except Exception as e:
|
161
|
+
print(f"Unexpected error: {e}")
|
162
|
+
sys.exit(1)
|
67
163
|
|
68
164
|
if __name__ == "__main__":
|
69
165
|
asyncio.run(main())
|
@@ -1,13 +1,27 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
Пример использования EmbeddingServiceAsyncClient (асинхронный клиент).
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
Run this script with:
|
4
|
+
USAGE:
|
5
|
+
python embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
|
6
|
+
# или
|
8
7
|
python -m asyncio embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
|
9
8
|
|
10
|
-
|
9
|
+
# Можно также использовать переменные окружения:
|
10
|
+
export EMBED_CLIENT_BASE_URL=http://localhost
|
11
|
+
export EMBED_CLIENT_PORT=8001
|
12
|
+
python embed_client/example_async_usage_ru.py
|
13
|
+
|
14
|
+
# ВАЖНО:
|
15
|
+
# --base-url и --port должны быть отдельными аргументами (через пробел),
|
16
|
+
# а не через = (НЕ --base_url=...)
|
17
|
+
# base_url должен содержать http:// или https://
|
18
|
+
|
19
|
+
EXAMPLES:
|
20
|
+
python embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
|
21
|
+
python -m asyncio embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
|
22
|
+
export EMBED_CLIENT_BASE_URL=http://localhost
|
23
|
+
export EMBED_CLIENT_PORT=8001
|
24
|
+
python embed_client/example_async_usage_ru.py
|
11
25
|
"""
|
12
26
|
|
13
27
|
import asyncio
|
@@ -34,7 +48,7 @@ def get_params():
|
|
34
48
|
if not port:
|
35
49
|
port = os.environ.get("EMBED_CLIENT_PORT")
|
36
50
|
if not base_url or not port:
|
37
|
-
print("Error: base_url and port must be provided via --base-url
|
51
|
+
print("Error: base_url and port must be provided via [--base-url | --port] arguments or [EMBED_CLIENT_BASE_URL/EMBED_CLIENT_PORT] environment variables.")
|
38
52
|
sys.exit(1)
|
39
53
|
return None, None
|
40
54
|
return base_url, int(port)
|
@@ -62,10 +76,30 @@ async def main():
|
|
62
76
|
texts = ["hello world", "test embedding"]
|
63
77
|
try:
|
64
78
|
result = await client.cmd("embed", params={"texts": texts})
|
65
|
-
|
79
|
+
# Use client's extract method for compatibility with both old and new formats
|
80
|
+
vectors = client.extract_embeddings(result)
|
66
81
|
print(f"Embeddings for {len(texts)} texts:")
|
67
82
|
for i, vec in enumerate(vectors):
|
68
83
|
print(f" Text: {texts[i]!r}\n Vector: {vec[:5]}... (total {len(vec)} dims)")
|
84
|
+
|
85
|
+
# Try to extract additional data if new format is available
|
86
|
+
try:
|
87
|
+
embedding_data = client.extract_embedding_data(result)
|
88
|
+
print("\nAdditional data from new format:")
|
89
|
+
for i, data in enumerate(embedding_data):
|
90
|
+
print(f" Text: {data['body']!r}")
|
91
|
+
print(f" Tokens: {data['tokens']}")
|
92
|
+
print(f" BM25 tokens: {data['bm25_tokens']}")
|
93
|
+
|
94
|
+
# Extract tokens and BM25 tokens separately
|
95
|
+
tokens = client.extract_tokens(result)
|
96
|
+
bm25_tokens = client.extract_bm25_tokens(result)
|
97
|
+
print(f"\nExtracted tokens: {tokens}")
|
98
|
+
print(f"Extracted BM25 tokens: {bm25_tokens}")
|
99
|
+
|
100
|
+
except ValueError as e:
|
101
|
+
print(f"(Old format detected - no additional data available): {e}")
|
102
|
+
|
69
103
|
except EmbeddingServiceAPIError as e:
|
70
104
|
print("[API error]", e.error)
|
71
105
|
except EmbeddingServiceHTTPError as e:
|
@@ -0,0 +1,8 @@
|
|
1
|
+
embed_client/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
2
|
+
embed_client/async_client.py,sha256=8jsDvHQhtoYNrQ4B2rGHqmniGPVMQeSsrzZZUyMx_J8,26357
|
3
|
+
embed_client/example_async_usage.py,sha256=6oCDALFebTv1o5k7lB7UuiacP9Scvf2r3gVIVtIrsPk,6623
|
4
|
+
embed_client/example_async_usage_ru.py,sha256=J9K3UpDJwwwy7gNQzy6G3clX4VoMleBmRk_9vymlIiw,5425
|
5
|
+
embed_client-2.0.0.0.dist-info/METADATA,sha256=3_4NeyifJYxq9iQQHgmxgegFxeoNPvriW_UpWNsAOmI,254
|
6
|
+
embed_client-2.0.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
+
embed_client-2.0.0.0.dist-info/top_level.txt,sha256=uG00A4d9o9DFrhiN7goObpeig72Pniby0E7UpDRgyXY,13
|
8
|
+
embed_client-2.0.0.0.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
embed_client/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
2
|
-
embed_client/async_client.py,sha256=sppZ8fPr4XNNLE3M6kLFTL6u5HE3nqo87bxAgt0S0zE,10589
|
3
|
-
embed_client/example_async_usage.py,sha256=df0RRwq2FtqVSL2MHVclfVIJj1wyQUuKZXB-lyVb3Kg,2538
|
4
|
-
embed_client/example_async_usage_ru.py,sha256=kZXQcbEFkx9tWXoCq-AoyvvUY4aCuW1XqPVb1ADWeAM,3558
|
5
|
-
embed_client-1.0.0.1.dist-info/METADATA,sha256=nFDbLecEwcLOuqhJe84hSdboS9vCEhmmX-Ajw9LYark,254
|
6
|
-
embed_client-1.0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
-
embed_client-1.0.0.1.dist-info/top_level.txt,sha256=uG00A4d9o9DFrhiN7goObpeig72Pniby0E7UpDRgyXY,13
|
8
|
-
embed_client-1.0.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|