embed-client 1.0.0.1__py3-none-any.whl → 2.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,11 +4,15 @@ Async client for Embedding Service API (OpenAPI 3.0.2)
4
4
  - 100% type-annotated
5
5
  - English docstrings and examples
6
6
  - Ready for PyPi
7
+ - Supports new API format with body, embedding, and chunks
7
8
  """
8
9
 
9
10
  from typing import Any, Dict, List, Optional, Union
10
11
  import aiohttp
12
+ import asyncio
11
13
  import os
14
+ import json
15
+ import logging
12
16
 
13
17
  class EmbeddingServiceError(Exception):
14
18
  """Base exception for EmbeddingServiceAsyncClient."""
@@ -29,28 +33,73 @@ class EmbeddingServiceAPIError(EmbeddingServiceError):
29
33
  super().__init__(f"API error: {error}")
30
34
  self.error = error
31
35
 
36
+ class EmbeddingServiceConfigError(EmbeddingServiceError):
37
+ """Raised for configuration errors (invalid base_url, port, etc.)."""
38
+
39
+ class EmbeddingServiceTimeoutError(EmbeddingServiceError):
40
+ """Raised when request times out."""
41
+
42
+ class EmbeddingServiceJSONError(EmbeddingServiceError):
43
+ """Raised when JSON parsing fails."""
44
+
32
45
  class EmbeddingServiceAsyncClient:
33
46
  """
34
47
  Asynchronous client for the Embedding Service API.
48
+
49
+ Supports both old and new API formats:
50
+ - Old format: {"result": {"success": true, "data": {"embeddings": [...]}}}
51
+ - New format: {"result": {"success": true, "data": {"embeddings": [...], "results": [{"body": "text", "embedding": [...], "tokens": [...], "bm25_tokens": [...]}]}}}
52
+
35
53
  Args:
36
54
  base_url (str): Base URL of the embedding service (e.g., "http://localhost").
37
55
  port (int): Port of the embedding service (e.g., 8001).
56
+ timeout (float): Request timeout in seconds (default: 30).
38
57
  Raises:
39
- ValueError: If base_url or port is not provided.
58
+ EmbeddingServiceConfigError: If base_url or port is invalid.
40
59
  """
41
- def __init__(self, base_url: Optional[str] = None, port: Optional[int] = None):
42
- self.base_url = base_url or os.getenv("EMBEDDING_SERVICE_BASE_URL", "http://localhost")
43
- if not self.base_url:
44
- raise ValueError("base_url must be provided.")
45
- self.port = port or int(os.getenv("EMBEDDING_SERVICE_PORT", "8001"))
46
- if self.port is None:
47
- raise ValueError("port must be provided.")
60
+ def __init__(self, base_url: Optional[str] = None, port: Optional[int] = None, timeout: float = 30.0):
61
+ # Validate and set base_url
62
+ try:
63
+ self.base_url = base_url or os.getenv("EMBEDDING_SERVICE_BASE_URL", "http://localhost")
64
+ if not self.base_url:
65
+ raise EmbeddingServiceConfigError("base_url must be provided.")
66
+ if not isinstance(self.base_url, str):
67
+ raise EmbeddingServiceConfigError("base_url must be a string.")
68
+
69
+ # Validate URL format
70
+ if not (self.base_url.startswith("http://") or self.base_url.startswith("https://")):
71
+ raise EmbeddingServiceConfigError("base_url must start with http:// or https://")
72
+ except (TypeError, AttributeError) as e:
73
+ raise EmbeddingServiceConfigError(f"Invalid base_url configuration: {e}") from e
74
+
75
+ # Validate and set port
76
+ try:
77
+ port_env = os.getenv("EMBEDDING_SERVICE_PORT", "8001")
78
+ self.port = port if port is not None else int(port_env)
79
+ if self.port is None:
80
+ raise EmbeddingServiceConfigError("port must be provided.")
81
+ if not isinstance(self.port, int) or self.port <= 0 or self.port > 65535:
82
+ raise EmbeddingServiceConfigError("port must be a valid integer between 1 and 65535.")
83
+ except (ValueError, TypeError) as e:
84
+ raise EmbeddingServiceConfigError(f"Invalid port configuration: {e}") from e
85
+
86
+ # Validate timeout
87
+ try:
88
+ self.timeout = float(timeout)
89
+ if self.timeout <= 0:
90
+ raise EmbeddingServiceConfigError("timeout must be positive.")
91
+ except (ValueError, TypeError) as e:
92
+ raise EmbeddingServiceConfigError(f"Invalid timeout configuration: {e}") from e
93
+
48
94
  self._session: Optional[aiohttp.ClientSession] = None
49
95
 
50
96
  def _make_url(self, path: str, base_url: Optional[str] = None, port: Optional[int] = None) -> str:
51
- url = (base_url or self.base_url).rstrip("/")
52
- port_val = port if port is not None else self.port
53
- return f"{url}:{port_val}{path}"
97
+ try:
98
+ url = (base_url or self.base_url).rstrip("/")
99
+ port_val = port if port is not None else self.port
100
+ return f"{url}:{port_val}{path}"
101
+ except Exception as e:
102
+ raise EmbeddingServiceConfigError(f"Failed to construct URL: {e}") from e
54
103
 
55
104
  def _format_error_response(self, error: str, lang: Optional[str] = None, text: Optional[str] = None) -> Dict[str, Any]:
56
105
  """
@@ -69,14 +118,221 @@ class EmbeddingServiceAsyncClient:
69
118
  response["text"] = text
70
119
  return response
71
120
 
121
+ def extract_embeddings(self, result: Dict[str, Any]) -> List[List[float]]:
122
+ """
123
+ Extract embeddings from API response, supporting both old and new formats.
124
+
125
+ Args:
126
+ result: API response dictionary
127
+
128
+ Returns:
129
+ List of embedding vectors (list of lists of floats)
130
+
131
+ Raises:
132
+ ValueError: If embeddings cannot be extracted from the response
133
+ """
134
+ # Handle direct embeddings field (old format compatibility)
135
+ if "embeddings" in result:
136
+ return result["embeddings"]
137
+
138
+ # Handle result wrapper
139
+ if "result" in result:
140
+ res = result["result"]
141
+
142
+ # Handle direct list in result (old format)
143
+ if isinstance(res, list):
144
+ return res
145
+
146
+ if isinstance(res, dict):
147
+ # Handle old format: result.embeddings
148
+ if "embeddings" in res:
149
+ return res["embeddings"]
150
+
151
+ # Handle old format: result.data.embeddings
152
+ if "data" in res and isinstance(res["data"], dict) and "embeddings" in res["data"]:
153
+ return res["data"]["embeddings"]
154
+
155
+ # Handle new format: result.data[].embedding
156
+ if "data" in res and isinstance(res["data"], list):
157
+ embeddings = []
158
+ for item in res["data"]:
159
+ if isinstance(item, dict) and "embedding" in item:
160
+ embeddings.append(item["embedding"])
161
+ else:
162
+ raise ValueError(f"Invalid item format in new API response: {item}")
163
+ return embeddings
164
+
165
+ raise ValueError(f"Cannot extract embeddings from response: {result}")
166
+
167
+ def extract_embedding_data(self, result: Dict[str, Any]) -> List[Dict[str, Any]]:
168
+ """
169
+ Extract full embedding data from API response (new format only).
170
+
171
+ Args:
172
+ result: API response dictionary
173
+
174
+ Returns:
175
+ List of dictionaries with 'body', 'embedding', 'tokens', and 'bm25_tokens' fields
176
+
177
+ Raises:
178
+ ValueError: If data cannot be extracted or is in old format
179
+ """
180
+ if "result" in result and isinstance(result["result"], dict):
181
+ res = result["result"]
182
+ if "data" in res and isinstance(res["data"], dict) and "results" in res["data"]:
183
+ # New format: result.data.results[]
184
+ results = res["data"]["results"]
185
+ if isinstance(results, list):
186
+ # Validate that all items have required fields
187
+ for i, item in enumerate(results):
188
+ if not isinstance(item, dict):
189
+ raise ValueError(f"Item {i} is not a dictionary: {item}")
190
+ if "body" not in item:
191
+ raise ValueError(f"Item {i} missing 'body' field: {item}")
192
+ if "embedding" not in item:
193
+ raise ValueError(f"Item {i} missing 'embedding' field: {item}")
194
+ if "tokens" not in item:
195
+ raise ValueError(f"Item {i} missing 'tokens' field: {item}")
196
+ if "bm25_tokens" not in item:
197
+ raise ValueError(f"Item {i} missing 'bm25_tokens' field: {item}")
198
+
199
+ return results
200
+
201
+ # Legacy support for old format: result.data[]
202
+ if "data" in res and isinstance(res["data"], list):
203
+ # Validate that all items have required fields
204
+ for i, item in enumerate(res["data"]):
205
+ if not isinstance(item, dict):
206
+ raise ValueError(f"Item {i} is not a dictionary: {item}")
207
+ if "body" not in item:
208
+ raise ValueError(f"Item {i} missing 'body' field: {item}")
209
+ if "embedding" not in item:
210
+ raise ValueError(f"Item {i} missing 'embedding' field: {item}")
211
+ # Old format had 'chunks' instead of 'tokens'
212
+ if "chunks" not in item and "tokens" not in item:
213
+ raise ValueError(f"Item {i} missing 'chunks' or 'tokens' field: {item}")
214
+
215
+ return res["data"]
216
+
217
+ raise ValueError(f"Cannot extract embedding data from response (new format required): {result}")
218
+
219
+ def extract_texts(self, result: Dict[str, Any]) -> List[str]:
220
+ """
221
+ Extract original texts from API response (new format only).
222
+
223
+ Args:
224
+ result: API response dictionary
225
+
226
+ Returns:
227
+ List of original text strings
228
+
229
+ Raises:
230
+ ValueError: If texts cannot be extracted or is in old format
231
+ """
232
+ data = self.extract_embedding_data(result)
233
+ return [item["body"] for item in data]
234
+
235
+ def extract_chunks(self, result: Dict[str, Any]) -> List[List[str]]:
236
+ """
237
+ Extract text chunks from API response (new format only).
238
+ Note: This method now extracts 'tokens' instead of 'chunks' for compatibility.
239
+
240
+ Args:
241
+ result: API response dictionary
242
+
243
+ Returns:
244
+ List of token lists for each text
245
+
246
+ Raises:
247
+ ValueError: If chunks cannot be extracted or is in old format
248
+ """
249
+ data = self.extract_embedding_data(result)
250
+ chunks = []
251
+ for item in data:
252
+ # New format uses 'tokens', old format used 'chunks'
253
+ if "tokens" in item:
254
+ chunks.append(item["tokens"])
255
+ elif "chunks" in item:
256
+ chunks.append(item["chunks"])
257
+ else:
258
+ raise ValueError(f"Item missing both 'tokens' and 'chunks' fields: {item}")
259
+ return chunks
260
+
261
+ def extract_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
262
+ """
263
+ Extract tokens from API response (new format only).
264
+
265
+ Args:
266
+ result: API response dictionary
267
+
268
+ Returns:
269
+ List of token lists for each text
270
+
271
+ Raises:
272
+ ValueError: If tokens cannot be extracted or is in old format
273
+ """
274
+ data = self.extract_embedding_data(result)
275
+ return [item["tokens"] for item in data]
276
+
277
+ def extract_bm25_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
278
+ """
279
+ Extract BM25 tokens from API response (new format only).
280
+
281
+ Args:
282
+ result: API response dictionary
283
+
284
+ Returns:
285
+ List of BM25 token lists for each text
286
+
287
+ Raises:
288
+ ValueError: If BM25 tokens cannot be extracted or is in old format
289
+ """
290
+ data = self.extract_embedding_data(result)
291
+ return [item["bm25_tokens"] for item in data]
292
+
72
293
  async def __aenter__(self):
73
- self._session = aiohttp.ClientSession()
74
- return self
294
+ try:
295
+ # Create session with timeout configuration
296
+ timeout = aiohttp.ClientTimeout(total=self.timeout)
297
+ self._session = aiohttp.ClientSession(timeout=timeout)
298
+ return self
299
+ except Exception as e:
300
+ raise EmbeddingServiceError(f"Failed to create HTTP session: {e}") from e
75
301
 
76
302
  async def __aexit__(self, exc_type, exc, tb):
77
303
  if self._session:
78
- await self._session.close()
79
- self._session = None
304
+ try:
305
+ await self._session.close()
306
+ except Exception as e:
307
+ raise EmbeddingServiceError(f"Failed to close HTTP session: {e}") from e
308
+ finally:
309
+ self._session = None
310
+
311
+ async def _parse_json_response(self, resp: aiohttp.ClientResponse) -> Dict[str, Any]:
312
+ """
313
+ Parse JSON response with proper error handling.
314
+
315
+ Args:
316
+ resp: aiohttp response object
317
+
318
+ Returns:
319
+ dict: Parsed JSON data
320
+
321
+ Raises:
322
+ EmbeddingServiceJSONError: If JSON parsing fails
323
+ """
324
+ try:
325
+ return await resp.json()
326
+ except json.JSONDecodeError as e:
327
+ try:
328
+ text = await resp.text()
329
+ raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}. Response text: {text[:500]}...") from e
330
+ except Exception as text_error:
331
+ raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}. Failed to get response text: {text_error}") from e
332
+ except UnicodeDecodeError as e:
333
+ raise EmbeddingServiceJSONError(f"Unicode decode error in response: {e}") from e
334
+ except Exception as e:
335
+ raise EmbeddingServiceJSONError(f"Unexpected error parsing JSON: {e}") from e
80
336
 
81
337
  async def health(self, base_url: Optional[str] = None, port: Optional[int] = None) -> Dict[str, Any]:
82
338
  """
@@ -89,17 +345,35 @@ class EmbeddingServiceAsyncClient:
89
345
  """
90
346
  url = self._make_url("/health", base_url, port)
91
347
  try:
92
- async with self._session.get(url) as resp:
348
+ async with self._session.get(url, timeout=self.timeout) as resp:
93
349
  await self._raise_for_status(resp)
94
- return await resp.json()
350
+ try:
351
+ data = await resp.json()
352
+ except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
353
+ raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
354
+ if "error" in data:
355
+ raise EmbeddingServiceAPIError(data["error"])
356
+ return data
95
357
  except EmbeddingServiceHTTPError:
96
358
  raise
97
359
  except EmbeddingServiceConnectionError:
98
360
  raise
361
+ except EmbeddingServiceJSONError:
362
+ raise
363
+ except EmbeddingServiceTimeoutError:
364
+ raise
99
365
  except aiohttp.ClientConnectionError as e:
100
366
  raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
101
367
  except aiohttp.ClientResponseError as e:
102
368
  raise EmbeddingServiceHTTPError(e.status, e.message) from e
369
+ except asyncio.TimeoutError as e:
370
+ raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
371
+ except aiohttp.ServerTimeoutError as e:
372
+ raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
373
+ except aiohttp.ClientSSLError as e:
374
+ raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
375
+ except aiohttp.ClientOSError as e:
376
+ raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
103
377
  except Exception as e:
104
378
  raise EmbeddingServiceError(f"Unexpected error: {e}") from e
105
379
 
@@ -114,17 +388,35 @@ class EmbeddingServiceAsyncClient:
114
388
  """
115
389
  url = self._make_url("/openapi.json", base_url, port)
116
390
  try:
117
- async with self._session.get(url) as resp:
391
+ async with self._session.get(url, timeout=self.timeout) as resp:
118
392
  await self._raise_for_status(resp)
119
- return await resp.json()
393
+ try:
394
+ data = await resp.json()
395
+ except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
396
+ raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
397
+ if "error" in data:
398
+ raise EmbeddingServiceAPIError(data["error"])
399
+ return data
120
400
  except EmbeddingServiceHTTPError:
121
401
  raise
122
402
  except EmbeddingServiceConnectionError:
123
403
  raise
404
+ except EmbeddingServiceJSONError:
405
+ raise
406
+ except EmbeddingServiceTimeoutError:
407
+ raise
124
408
  except aiohttp.ClientConnectionError as e:
125
409
  raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
126
410
  except aiohttp.ClientResponseError as e:
127
411
  raise EmbeddingServiceHTTPError(e.status, e.message) from e
412
+ except asyncio.TimeoutError as e:
413
+ raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
414
+ except aiohttp.ServerTimeoutError as e:
415
+ raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
416
+ except aiohttp.ClientSSLError as e:
417
+ raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
418
+ except aiohttp.ClientOSError as e:
419
+ raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
128
420
  except Exception as e:
129
421
  raise EmbeddingServiceError(f"Unexpected error: {e}") from e
130
422
 
@@ -139,17 +431,35 @@ class EmbeddingServiceAsyncClient:
139
431
  """
140
432
  url = self._make_url("/api/commands", base_url, port)
141
433
  try:
142
- async with self._session.get(url) as resp:
434
+ async with self._session.get(url, timeout=self.timeout) as resp:
143
435
  await self._raise_for_status(resp)
144
- return await resp.json()
436
+ try:
437
+ data = await resp.json()
438
+ except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
439
+ raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
440
+ if "error" in data:
441
+ raise EmbeddingServiceAPIError(data["error"])
442
+ return data
145
443
  except EmbeddingServiceHTTPError:
146
444
  raise
147
445
  except EmbeddingServiceConnectionError:
148
446
  raise
447
+ except EmbeddingServiceJSONError:
448
+ raise
449
+ except EmbeddingServiceTimeoutError:
450
+ raise
149
451
  except aiohttp.ClientConnectionError as e:
150
452
  raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
151
453
  except aiohttp.ClientResponseError as e:
152
454
  raise EmbeddingServiceHTTPError(e.status, e.message) from e
455
+ except asyncio.TimeoutError as e:
456
+ raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
457
+ except aiohttp.ServerTimeoutError as e:
458
+ raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
459
+ except aiohttp.ClientSSLError as e:
460
+ raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
461
+ except aiohttp.ClientOSError as e:
462
+ raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
153
463
  except Exception as e:
154
464
  raise EmbeddingServiceError(f"Unexpected error: {e}") from e
155
465
 
@@ -221,41 +531,54 @@ class EmbeddingServiceAsyncClient:
221
531
  if command == "embed" and params and "texts" in params:
222
532
  self._validate_texts(params["texts"])
223
533
 
534
+ logger = logging.getLogger('EmbeddingServiceAsyncClient.cmd')
224
535
  url = self._make_url("/cmd", base_url, port)
225
536
  payload = {"command": command}
226
537
  if params is not None:
227
538
  payload["params"] = params
228
-
539
+ logger.info(f"Sending embedding command: url={url}, payload={payload}")
229
540
  try:
230
- async with self._session.post(url, json=payload) as resp:
541
+ async with self._session.post(url, json=payload, timeout=self.timeout) as resp:
542
+ logger.info(f"Embedding service HTTP status: {resp.status}")
231
543
  await self._raise_for_status(resp)
232
- data = await resp.json()
233
-
234
- if "error" in data:
235
- raise EmbeddingServiceAPIError(data["error"])
236
-
237
- if "result" in data:
238
- res = data["result"]
239
- if isinstance(res, dict) and "success" in res and res["success"] is False:
240
- if "error" in res:
241
- raise EmbeddingServiceAPIError(res["error"])
242
-
243
- return data
244
-
544
+ try:
545
+ resp_json = await resp.json()
546
+ except (ValueError, UnicodeDecodeError, json.JSONDecodeError) as e:
547
+ raise EmbeddingServiceJSONError(f"Invalid JSON response: {e}") from e
548
+ logger.info(f"Embedding service response: {str(resp_json)[:300]}")
549
+ # Обработка ошибок API
550
+ if "error" in resp_json:
551
+ raise EmbeddingServiceAPIError(resp_json["error"])
552
+ if "result" in resp_json:
553
+ result = resp_json["result"]
554
+ if isinstance(result, dict) and (result.get("success") is False or "error" in result):
555
+ raise EmbeddingServiceAPIError(result.get("error", result))
556
+ return resp_json
557
+ except EmbeddingServiceAPIError:
558
+ raise
559
+ except EmbeddingServiceHTTPError:
560
+ raise
561
+ except EmbeddingServiceConnectionError:
562
+ raise
563
+ except EmbeddingServiceJSONError:
564
+ raise
565
+ except EmbeddingServiceTimeoutError:
566
+ raise
567
+ except aiohttp.ServerTimeoutError as e:
568
+ raise EmbeddingServiceTimeoutError(f"Server timeout: {e}") from e
245
569
  except aiohttp.ClientConnectionError as e:
246
- raise EmbeddingServiceAPIError({
247
- "code": -32000,
248
- "message": f"Connection error: {e}"
249
- }) from e
570
+ raise EmbeddingServiceConnectionError(f"Connection error: {e}") from e
250
571
  except aiohttp.ClientResponseError as e:
251
572
  raise EmbeddingServiceHTTPError(e.status, e.message) from e
252
- except EmbeddingServiceHTTPError:
253
- raise
573
+ except asyncio.TimeoutError as e:
574
+ raise EmbeddingServiceTimeoutError(f"Request timeout: {e}") from e
575
+ except aiohttp.ClientSSLError as e:
576
+ raise EmbeddingServiceConnectionError(f"SSL error: {e}") from e
577
+ except aiohttp.ClientOSError as e:
578
+ raise EmbeddingServiceConnectionError(f"OS error: {e}") from e
254
579
  except Exception as e:
255
- raise EmbeddingServiceAPIError({
256
- "code": -32000,
257
- "message": f"Unexpected error: {e}"
258
- }) from e
580
+ logger.error(f"Error in embedding cmd: {e}", exc_info=True)
581
+ raise EmbeddingServiceError(f"Unexpected error: {e}") from e
259
582
 
260
583
  async def _raise_for_status(self, resp: aiohttp.ClientResponse):
261
584
  try:
@@ -263,4 +586,22 @@ class EmbeddingServiceAsyncClient:
263
586
  except aiohttp.ClientResponseError as e:
264
587
  raise EmbeddingServiceHTTPError(e.status, e.message) from e
265
588
 
589
+ async def close(self) -> None:
590
+ """
591
+ Close the underlying HTTP session explicitly.
592
+
593
+ This method allows the user to manually close the aiohttp.ClientSession used by the client.
594
+ It is safe to call multiple times; if the session is already closed or was never opened, nothing happens.
595
+
596
+ Raises:
597
+ EmbeddingServiceError: If closing the session fails.
598
+ """
599
+ if self._session:
600
+ try:
601
+ await self._session.close()
602
+ except Exception as e:
603
+ raise EmbeddingServiceError(f"Failed to close HTTP session: {e}") from e
604
+ finally:
605
+ self._session = None
606
+
266
607
  # TODO: Add methods for /cmd, /api/commands, etc.
@@ -1,19 +1,51 @@
1
1
  """
2
2
  Example usage of EmbeddingServiceAsyncClient.
3
3
 
4
- This example demonstrates how to use the async client to check the health of the embedding service,
5
- request embeddings, and handle all possible errors.
4
+ USAGE:
5
+ python embed_client/example_async_usage.py --base-url http://localhost --port 8001
6
+ # или
7
+ python -m asyncio embed_client/example_async_usage.py --base-url http://localhost --port 8001
8
+
9
+ # Можно также использовать переменные окружения:
10
+ export EMBED_CLIENT_BASE_URL=http://localhost
11
+ export EMBED_CLIENT_PORT=8001
12
+ python embed_client/example_async_usage.py
13
+
14
+ # ВАЖНО:
15
+ # --base-url и --port должны быть отдельными аргументами (через пробел),
16
+ # а не через = (НЕ --base_url=...)
17
+ # base_url должен содержать http:// или https://
6
18
 
7
- Run this script with:
19
+ EXAMPLES:
20
+ python embed_client/example_async_usage.py --base-url http://localhost --port 8001
8
21
  python -m asyncio embed_client/example_async_usage.py --base-url http://localhost --port 8001
22
+ export EMBED_CLIENT_BASE_URL=http://localhost
23
+ export EMBED_CLIENT_PORT=8001
24
+ python embed_client/example_async_usage.py
9
25
 
10
- You can also set EMBED_CLIENT_BASE_URL and EMBED_CLIENT_PORT environment variables.
26
+ Explicit session close example:
27
+ import asyncio
28
+ from embed_client.async_client import EmbeddingServiceAsyncClient
29
+ async def main():
30
+ client = EmbeddingServiceAsyncClient(base_url="http://localhost", port=8001)
31
+ # ... use client ...
32
+ await client.close() # Explicitly close session
33
+ asyncio.run(main())
11
34
  """
12
35
 
13
36
  import asyncio
14
37
  import sys
15
38
  import os
16
- from embed_client.async_client import EmbeddingServiceAsyncClient
39
+ from embed_client.async_client import (
40
+ EmbeddingServiceAsyncClient,
41
+ EmbeddingServiceError,
42
+ EmbeddingServiceAPIError,
43
+ EmbeddingServiceHTTPError,
44
+ EmbeddingServiceConnectionError,
45
+ EmbeddingServiceTimeoutError,
46
+ EmbeddingServiceJSONError,
47
+ EmbeddingServiceConfigError
48
+ )
17
49
 
18
50
  def get_params():
19
51
  base_url = None
@@ -33,37 +65,101 @@ def get_params():
33
65
  return None, None
34
66
  return base_url, int(port)
35
67
 
68
+ def extract_vectors(result):
69
+ """Extract embeddings from the API response, supporting both old and new formats."""
70
+ # Handle direct embeddings field (old format compatibility)
71
+ if "embeddings" in result:
72
+ return result["embeddings"]
73
+
74
+ # Handle result wrapper
75
+ if "result" in result:
76
+ res = result["result"]
77
+
78
+ # Handle direct list in result (old format)
79
+ if isinstance(res, list):
80
+ return res
81
+
82
+ if isinstance(res, dict):
83
+ # Handle old format: result.embeddings
84
+ if "embeddings" in res:
85
+ return res["embeddings"]
86
+
87
+ # Handle old format: result.data.embeddings
88
+ if "data" in res and isinstance(res["data"], dict) and "embeddings" in res["data"]:
89
+ return res["data"]["embeddings"]
90
+
91
+ # Handle new format: result.data[].embedding
92
+ if "data" in res and isinstance(res["data"], list):
93
+ embeddings = []
94
+ for item in res["data"]:
95
+ if isinstance(item, dict) and "embedding" in item:
96
+ embeddings.append(item["embedding"])
97
+ else:
98
+ raise ValueError(f"Invalid item format in new API response: {item}")
99
+ return embeddings
100
+
101
+ raise ValueError(f"Cannot extract embeddings from response: {result}")
102
+
36
103
  async def main():
37
- base_url, port = get_params()
38
- async with EmbeddingServiceAsyncClient(base_url=base_url, port=port) as client:
39
- # Check health
40
- health = await client.health()
41
- print("Service health:", health)
104
+ try:
105
+ base_url, port = get_params()
106
+ # Explicit open/close example
107
+ client = EmbeddingServiceAsyncClient(base_url=base_url, port=port)
108
+ print("Explicit session open/close example:")
109
+ await client.close()
110
+ print("Session closed explicitly (manual close example).\n")
111
+ async with EmbeddingServiceAsyncClient(base_url=base_url, port=port) as client:
112
+ # Check health
113
+ try:
114
+ health = await client.health()
115
+ print("Service health:", health)
116
+ except EmbeddingServiceConnectionError as e:
117
+ print(f"Connection error during health check: {e}")
118
+ return
119
+ except EmbeddingServiceTimeoutError as e:
120
+ print(f"Timeout error during health check: {e}")
121
+ except EmbeddingServiceError as e:
122
+ print(f"Error during health check: {e}")
42
123
 
43
- # Request embeddings for a list of texts
44
- texts = ["hello world", "test embedding"]
45
- result = await client.cmd("embed", params={"texts": texts})
46
-
47
- if "error" in result:
48
- print(f"Error occurred: {result['error']}")
49
- if "lang" in result:
50
- print(f"Language: {result['lang']}")
51
- if "text" in result:
52
- print(f"Text: {result['text']}")
53
- else:
54
- vectors = result["result"]
55
- print(f"Embeddings for {len(texts)} texts:")
56
- for i, vec in enumerate(vectors):
57
- print(f" Text: {texts[i]!r}\n Vector: {vec[:5]}... (total {len(vec)} dims)")
124
+ # Request embeddings for a list of texts
125
+ texts = ["hello world", "test embedding"]
126
+ try:
127
+ result = await client.cmd("embed", params={"texts": texts})
128
+ vectors = extract_vectors(result)
129
+ print(f"Embeddings for {len(texts)} texts:")
130
+ for i, vec in enumerate(vectors):
131
+ print(f" Text: {texts[i]!r}\n Vector: {vec[:5]}... (total {len(vec)} dims)")
132
+ except EmbeddingServiceAPIError as e:
133
+ print(f"API error during embedding: {e}")
134
+ except EmbeddingServiceConnectionError as e:
135
+ print(f"Connection error during embedding: {e}")
136
+ except EmbeddingServiceTimeoutError as e:
137
+ print(f"Timeout error during embedding: {e}")
138
+ except EmbeddingServiceError as e:
139
+ print(f"Error during embedding: {e}")
140
+
141
+ # Example: health check via cmd
142
+ try:
143
+ result = await client.cmd("health")
144
+ print("Health check result:", result)
145
+ except EmbeddingServiceError as e:
146
+ print(f"Error during health command: {e}")
58
147
 
59
- # Example: error handling for invalid command
60
- result = await client.cmd("health")
61
- print("Health check result:", result)
148
+ # Example: error handling for empty command
149
+ try:
150
+ result = await client.cmd("")
151
+ print("Empty command result:", result)
152
+ except EmbeddingServiceAPIError as e:
153
+ print(f"Expected error for empty command: {e}")
154
+ except EmbeddingServiceError as e:
155
+ print(f"Error for empty command: {e}")
62
156
 
63
- # Example: error handling for empty command
64
- # result = await client.cmd("")
65
- # if "error" in result:
66
- # print(f"Error for empty command: {result['error']}")
157
+ except EmbeddingServiceConfigError as e:
158
+ print(f"Configuration error: {e}")
159
+ sys.exit(1)
160
+ except Exception as e:
161
+ print(f"Unexpected error: {e}")
162
+ sys.exit(1)
67
163
 
68
164
  if __name__ == "__main__":
69
165
  asyncio.run(main())
@@ -1,13 +1,27 @@
1
1
  """
2
- Example usage of EmbeddingServiceAsyncClient.
2
+ Пример использования EmbeddingServiceAsyncClient (асинхронный клиент).
3
3
 
4
- This example demonstrates how to use the async client to check the health of the embedding service,
5
- request embeddings, and handle all possible exceptions.
6
-
7
- Run this script with:
4
+ USAGE:
5
+ python embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
6
+ # или
8
7
  python -m asyncio embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
9
8
 
10
- You can also set EMBED_CLIENT_BASE_URL and EMBED_CLIENT_PORT environment variables.
9
+ # Можно также использовать переменные окружения:
10
+ export EMBED_CLIENT_BASE_URL=http://localhost
11
+ export EMBED_CLIENT_PORT=8001
12
+ python embed_client/example_async_usage_ru.py
13
+
14
+ # ВАЖНО:
15
+ # --base-url и --port должны быть отдельными аргументами (через пробел),
16
+ # а не через = (НЕ --base_url=...)
17
+ # base_url должен содержать http:// или https://
18
+
19
+ EXAMPLES:
20
+ python embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
21
+ python -m asyncio embed_client/example_async_usage_ru.py --base-url http://localhost --port 8001
22
+ export EMBED_CLIENT_BASE_URL=http://localhost
23
+ export EMBED_CLIENT_PORT=8001
24
+ python embed_client/example_async_usage_ru.py
11
25
  """
12
26
 
13
27
  import asyncio
@@ -34,7 +48,7 @@ def get_params():
34
48
  if not port:
35
49
  port = os.environ.get("EMBED_CLIENT_PORT")
36
50
  if not base_url or not port:
37
- print("Error: base_url and port must be provided via --base-url/--port arguments or EMBED_CLIENT_BASE_URL/EMBED_CLIENT_PORT environment variables.")
51
+ print("Error: base_url and port must be provided via [--base-url | --port] arguments or [EMBED_CLIENT_BASE_URL/EMBED_CLIENT_PORT] environment variables.")
38
52
  sys.exit(1)
39
53
  return None, None
40
54
  return base_url, int(port)
@@ -62,10 +76,30 @@ async def main():
62
76
  texts = ["hello world", "test embedding"]
63
77
  try:
64
78
  result = await client.cmd("embed", params={"texts": texts})
65
- vectors = result["result"]
79
+ # Use client's extract method for compatibility with both old and new formats
80
+ vectors = client.extract_embeddings(result)
66
81
  print(f"Embeddings for {len(texts)} texts:")
67
82
  for i, vec in enumerate(vectors):
68
83
  print(f" Text: {texts[i]!r}\n Vector: {vec[:5]}... (total {len(vec)} dims)")
84
+
85
+ # Try to extract additional data if new format is available
86
+ try:
87
+ embedding_data = client.extract_embedding_data(result)
88
+ print("\nAdditional data from new format:")
89
+ for i, data in enumerate(embedding_data):
90
+ print(f" Text: {data['body']!r}")
91
+ print(f" Tokens: {data['tokens']}")
92
+ print(f" BM25 tokens: {data['bm25_tokens']}")
93
+
94
+ # Extract tokens and BM25 tokens separately
95
+ tokens = client.extract_tokens(result)
96
+ bm25_tokens = client.extract_bm25_tokens(result)
97
+ print(f"\nExtracted tokens: {tokens}")
98
+ print(f"Extracted BM25 tokens: {bm25_tokens}")
99
+
100
+ except ValueError as e:
101
+ print(f"(Old format detected - no additional data available): {e}")
102
+
69
103
  except EmbeddingServiceAPIError as e:
70
104
  print("[API error]", e.error)
71
105
  except EmbeddingServiceHTTPError as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: embed-client
3
- Version: 1.0.0.1
3
+ Version: 2.0.0.0
4
4
  Summary: Async client for Embedding Service API
5
5
  Author: Your Name
6
6
  Requires-Dist: aiohttp
@@ -0,0 +1,8 @@
1
+ embed_client/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
2
+ embed_client/async_client.py,sha256=8jsDvHQhtoYNrQ4B2rGHqmniGPVMQeSsrzZZUyMx_J8,26357
3
+ embed_client/example_async_usage.py,sha256=6oCDALFebTv1o5k7lB7UuiacP9Scvf2r3gVIVtIrsPk,6623
4
+ embed_client/example_async_usage_ru.py,sha256=J9K3UpDJwwwy7gNQzy6G3clX4VoMleBmRk_9vymlIiw,5425
5
+ embed_client-2.0.0.0.dist-info/METADATA,sha256=3_4NeyifJYxq9iQQHgmxgegFxeoNPvriW_UpWNsAOmI,254
6
+ embed_client-2.0.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ embed_client-2.0.0.0.dist-info/top_level.txt,sha256=uG00A4d9o9DFrhiN7goObpeig72Pniby0E7UpDRgyXY,13
8
+ embed_client-2.0.0.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- embed_client/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
2
- embed_client/async_client.py,sha256=sppZ8fPr4XNNLE3M6kLFTL6u5HE3nqo87bxAgt0S0zE,10589
3
- embed_client/example_async_usage.py,sha256=df0RRwq2FtqVSL2MHVclfVIJj1wyQUuKZXB-lyVb3Kg,2538
4
- embed_client/example_async_usage_ru.py,sha256=kZXQcbEFkx9tWXoCq-AoyvvUY4aCuW1XqPVb1ADWeAM,3558
5
- embed_client-1.0.0.1.dist-info/METADATA,sha256=nFDbLecEwcLOuqhJe84hSdboS9vCEhmmX-Ajw9LYark,254
6
- embed_client-1.0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- embed_client-1.0.0.1.dist-info/top_level.txt,sha256=uG00A4d9o9DFrhiN7goObpeig72Pniby0E7UpDRgyXY,13
8
- embed_client-1.0.0.1.dist-info/RECORD,,