embed-client 1.0.1.1__py3-none-any.whl → 2.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,7 +48,7 @@ class EmbeddingServiceAsyncClient:
48
48
 
49
49
  Supports both old and new API formats:
50
50
  - Old format: {"result": {"success": true, "data": {"embeddings": [...]}}}
51
- - New format: {"result": {"success": true, "data": [{"body": "text", "embedding": [...], "chunks": [...]}]}}
51
+ - New format: {"result": {"success": true, "data": {"embeddings": [...], "results": [{"body": "text", "embedding": [...], "tokens": [...], "bm25_tokens": [...]}]}}}
52
52
 
53
53
  Args:
54
54
  base_url (str): Base URL of the embedding service (e.g., "http://localhost").
@@ -172,13 +172,33 @@ class EmbeddingServiceAsyncClient:
172
172
  result: API response dictionary
173
173
 
174
174
  Returns:
175
- List of dictionaries with 'body', 'embedding', and 'chunks' fields
175
+ List of dictionaries with 'body', 'embedding', 'tokens', and 'bm25_tokens' fields
176
176
 
177
177
  Raises:
178
178
  ValueError: If data cannot be extracted or is in old format
179
179
  """
180
180
  if "result" in result and isinstance(result["result"], dict):
181
181
  res = result["result"]
182
+ if "data" in res and isinstance(res["data"], dict) and "results" in res["data"]:
183
+ # New format: result.data.results[]
184
+ results = res["data"]["results"]
185
+ if isinstance(results, list):
186
+ # Validate that all items have required fields
187
+ for i, item in enumerate(results):
188
+ if not isinstance(item, dict):
189
+ raise ValueError(f"Item {i} is not a dictionary: {item}")
190
+ if "body" not in item:
191
+ raise ValueError(f"Item {i} missing 'body' field: {item}")
192
+ if "embedding" not in item:
193
+ raise ValueError(f"Item {i} missing 'embedding' field: {item}")
194
+ if "tokens" not in item:
195
+ raise ValueError(f"Item {i} missing 'tokens' field: {item}")
196
+ if "bm25_tokens" not in item:
197
+ raise ValueError(f"Item {i} missing 'bm25_tokens' field: {item}")
198
+
199
+ return results
200
+
201
+ # Legacy support for old format: result.data[]
182
202
  if "data" in res and isinstance(res["data"], list):
183
203
  # Validate that all items have required fields
184
204
  for i, item in enumerate(res["data"]):
@@ -188,8 +208,9 @@ class EmbeddingServiceAsyncClient:
188
208
  raise ValueError(f"Item {i} missing 'body' field: {item}")
189
209
  if "embedding" not in item:
190
210
  raise ValueError(f"Item {i} missing 'embedding' field: {item}")
191
- if "chunks" not in item:
192
- raise ValueError(f"Item {i} missing 'chunks' field: {item}")
211
+ # Old format had 'chunks' instead of 'tokens'
212
+ if "chunks" not in item and "tokens" not in item:
213
+ raise ValueError(f"Item {i} missing 'chunks' or 'tokens' field: {item}")
193
214
 
194
215
  return res["data"]
195
216
 
@@ -214,18 +235,60 @@ class EmbeddingServiceAsyncClient:
214
235
  def extract_chunks(self, result: Dict[str, Any]) -> List[List[str]]:
215
236
  """
216
237
  Extract text chunks from API response (new format only).
238
+ Note: This method now extracts 'tokens' instead of 'chunks' for compatibility.
217
239
 
218
240
  Args:
219
241
  result: API response dictionary
220
242
 
221
243
  Returns:
222
- List of chunk lists for each text
244
+ List of token lists for each text
223
245
 
224
246
  Raises:
225
247
  ValueError: If chunks cannot be extracted or is in old format
226
248
  """
227
249
  data = self.extract_embedding_data(result)
228
- return [item["chunks"] for item in data]
250
+ chunks = []
251
+ for item in data:
252
+ # New format uses 'tokens', old format used 'chunks'
253
+ if "tokens" in item:
254
+ chunks.append(item["tokens"])
255
+ elif "chunks" in item:
256
+ chunks.append(item["chunks"])
257
+ else:
258
+ raise ValueError(f"Item missing both 'tokens' and 'chunks' fields: {item}")
259
+ return chunks
260
+
261
+ def extract_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
262
+ """
263
+ Extract tokens from API response (new format only).
264
+
265
+ Args:
266
+ result: API response dictionary
267
+
268
+ Returns:
269
+ List of token lists for each text
270
+
271
+ Raises:
272
+ ValueError: If tokens cannot be extracted or is in old format
273
+ """
274
+ data = self.extract_embedding_data(result)
275
+ return [item["tokens"] for item in data]
276
+
277
+ def extract_bm25_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
278
+ """
279
+ Extract BM25 tokens from API response (new format only).
280
+
281
+ Args:
282
+ result: API response dictionary
283
+
284
+ Returns:
285
+ List of BM25 token lists for each text
286
+
287
+ Raises:
288
+ ValueError: If BM25 tokens cannot be extracted or is in old format
289
+ """
290
+ data = self.extract_embedding_data(result)
291
+ return [item["bm25_tokens"] for item in data]
229
292
 
230
293
  async def __aenter__(self):
231
294
  try:
@@ -88,9 +88,17 @@ async def main():
88
88
  print("\nAdditional data from new format:")
89
89
  for i, data in enumerate(embedding_data):
90
90
  print(f" Text: {data['body']!r}")
91
- print(f" Chunks: {data['chunks']}")
92
- except ValueError:
93
- print("(Old format detected - no additional data available)")
91
+ print(f" Tokens: {data['tokens']}")
92
+ print(f" BM25 tokens: {data['bm25_tokens']}")
93
+
94
+ # Extract tokens and BM25 tokens separately
95
+ tokens = client.extract_tokens(result)
96
+ bm25_tokens = client.extract_bm25_tokens(result)
97
+ print(f"\nExtracted tokens: {tokens}")
98
+ print(f"Extracted BM25 tokens: {bm25_tokens}")
99
+
100
+ except ValueError as e:
101
+ print(f"(Old format detected - no additional data available): {e}")
94
102
 
95
103
  except EmbeddingServiceAPIError as e:
96
104
  print("[API error]", e.error)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: embed-client
3
- Version: 1.0.1.1
3
+ Version: 2.0.0.0
4
4
  Summary: Async client for Embedding Service API
5
5
  Author: Your Name
6
6
  Requires-Dist: aiohttp
@@ -0,0 +1,8 @@
1
+ embed_client/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
2
+ embed_client/async_client.py,sha256=8jsDvHQhtoYNrQ4B2rGHqmniGPVMQeSsrzZZUyMx_J8,26357
3
+ embed_client/example_async_usage.py,sha256=6oCDALFebTv1o5k7lB7UuiacP9Scvf2r3gVIVtIrsPk,6623
4
+ embed_client/example_async_usage_ru.py,sha256=J9K3UpDJwwwy7gNQzy6G3clX4VoMleBmRk_9vymlIiw,5425
5
+ embed_client-2.0.0.0.dist-info/METADATA,sha256=3_4NeyifJYxq9iQQHgmxgegFxeoNPvriW_UpWNsAOmI,254
6
+ embed_client-2.0.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ embed_client-2.0.0.0.dist-info/top_level.txt,sha256=uG00A4d9o9DFrhiN7goObpeig72Pniby0E7UpDRgyXY,13
8
+ embed_client-2.0.0.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- embed_client/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
2
- embed_client/async_client.py,sha256=BNGBGtionC6Evcr9yTTZGsMt7r9hH-DRcHguSJMxR8s,23514
3
- embed_client/example_async_usage.py,sha256=6oCDALFebTv1o5k7lB7UuiacP9Scvf2r3gVIVtIrsPk,6623
4
- embed_client/example_async_usage_ru.py,sha256=0ZFeUCSHoWnKQelK9UQ2Y3hSvFhVvRJ9cosWqxMEF8A,4979
5
- embed_client-1.0.1.1.dist-info/METADATA,sha256=BaAFA1F76uxxjtMVGhD5NftdIMvkoWyfwOKahOmuTdk,254
6
- embed_client-1.0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- embed_client-1.0.1.1.dist-info/top_level.txt,sha256=uG00A4d9o9DFrhiN7goObpeig72Pniby0E7UpDRgyXY,13
8
- embed_client-1.0.1.1.dist-info/RECORD,,