PyPI - embed-client - Versions diffs - 1.0.1.1__tar.gz → 2.0.0.0__tar.gz - Mend

embed-client 1.0.1.1tar.gz → 2.0.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{embed_client-1.0.1.1 → embed_client-2.0.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: embed-client
-Version: 1.0.1.1
+Version: 2.0.0.0
 Summary: Async client for Embedding Service API
 Author: Your Name
 Requires-Dist: aiohttp

{embed_client-1.0.1.1 → embed_client-2.0.0.0}/embed_client/async_client.py RENAMED Viewed

@@ -48,7 +48,7 @@ class EmbeddingServiceAsyncClient:
     Supports both old and new API formats:
     - Old format: {"result": {"success": true, "data": {"embeddings": [...]}}}
-    - New format: {"result": {"success": true, "data": [{"body": "text", "embedding": [...], "chunks": [...]}]}}
+    - New format: {"result": {"success": true, "data": {"embeddings": [...], "results": [{"body": "text", "embedding": [...], "tokens": [...], "bm25_tokens": [...]}]}}}
     Args:
         base_url (str): Base URL of the embedding service (e.g., "http://localhost").
@@ -172,13 +172,33 @@ class EmbeddingServiceAsyncClient:
             result: API response dictionary
         Returns:
-            List of dictionaries with 'body', 'embedding', and 'chunks' fields
+            List of dictionaries with 'body', 'embedding', 'tokens', and 'bm25_tokens' fields
         Raises:
             ValueError: If data cannot be extracted or is in old format
         """
         if "result" in result and isinstance(result["result"], dict):
             res = result["result"]
+            if "data" in res and isinstance(res["data"], dict) and "results" in res["data"]:
+                # New format: result.data.results[]
+                results = res["data"]["results"]
+                if isinstance(results, list):
+                    # Validate that all items have required fields
+                    for i, item in enumerate(results):
+                        if not isinstance(item, dict):
+                            raise ValueError(f"Item {i} is not a dictionary: {item}")
+                        if "body" not in item:
+                            raise ValueError(f"Item {i} missing 'body' field: {item}")
+                        if "embedding" not in item:
+                            raise ValueError(f"Item {i} missing 'embedding' field: {item}")
+                        if "tokens" not in item:
+                            raise ValueError(f"Item {i} missing 'tokens' field: {item}")
+                        if "bm25_tokens" not in item:
+                            raise ValueError(f"Item {i} missing 'bm25_tokens' field: {item}")
+                    return results
+            # Legacy support for old format: result.data[]
             if "data" in res and isinstance(res["data"], list):
                 # Validate that all items have required fields
                 for i, item in enumerate(res["data"]):
@@ -188,8 +208,9 @@ class EmbeddingServiceAsyncClient:
                         raise ValueError(f"Item {i} missing 'body' field: {item}")
                     if "embedding" not in item:
                         raise ValueError(f"Item {i} missing 'embedding' field: {item}")
-                    if "chunks" not in item:
-                        raise ValueError(f"Item {i} missing 'chunks' field: {item}")
+                    # Old format had 'chunks' instead of 'tokens'
+                    if "chunks" not in item and "tokens" not in item:
+                        raise ValueError(f"Item {i} missing 'chunks' or 'tokens' field: {item}")
                 return res["data"]
@@ -214,18 +235,60 @@ class EmbeddingServiceAsyncClient:
     def extract_chunks(self, result: Dict[str, Any]) -> List[List[str]]:
         """
         Extract text chunks from API response (new format only).
+        Note: This method now extracts 'tokens' instead of 'chunks' for compatibility.
         Args:
             result: API response dictionary
         Returns:
-            List of chunk lists for each text
+            List of token lists for each text
         Raises:
             ValueError: If chunks cannot be extracted or is in old format
         """
         data = self.extract_embedding_data(result)
-        return [item["chunks"] for item in data]
+        chunks = []
+        for item in data:
+            # New format uses 'tokens', old format used 'chunks'
+            if "tokens" in item:
+                chunks.append(item["tokens"])
+            elif "chunks" in item:
+                chunks.append(item["chunks"])
+            else:
+                raise ValueError(f"Item missing both 'tokens' and 'chunks' fields: {item}")
+        return chunks
+    def extract_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
+        """
+        Extract tokens from API response (new format only).
+        Args:
+            result: API response dictionary
+        Returns:
+            List of token lists for each text
+        Raises:
+            ValueError: If tokens cannot be extracted or is in old format
+        """
+        data = self.extract_embedding_data(result)
+        return [item["tokens"] for item in data]
+    def extract_bm25_tokens(self, result: Dict[str, Any]) -> List[List[str]]:
+        """
+        Extract BM25 tokens from API response (new format only).
+        Args:
+            result: API response dictionary
+        Returns:
+            List of BM25 token lists for each text
+        Raises:
+            ValueError: If BM25 tokens cannot be extracted or is in old format
+        """
+        data = self.extract_embedding_data(result)
+        return [item["bm25_tokens"] for item in data]
     async def __aenter__(self):
         try:

{embed_client-1.0.1.1 → embed_client-2.0.0.0}/embed_client/example_async_usage_ru.py RENAMED Viewed

@@ -88,9 +88,17 @@ async def main():
                     print("\nAdditional data from new format:")
                     for i, data in enumerate(embedding_data):
                         print(f"  Text: {data['body']!r}")
-                        print(f"  Chunks: {data['chunks']}")
-                except ValueError:
-                    print("(Old format detected - no additional data available)")
+                        print(f"  Tokens: {data['tokens']}")
+                        print(f"  BM25 tokens: {data['bm25_tokens']}")
+                    # Extract tokens and BM25 tokens separately
+                    tokens = client.extract_tokens(result)
+                    bm25_tokens = client.extract_bm25_tokens(result)
+                    print(f"\nExtracted tokens: {tokens}")
+                    print(f"Extracted BM25 tokens: {bm25_tokens}")
+                except ValueError as e:
+                    print(f"(Old format detected - no additional data available): {e}")
             except EmbeddingServiceAPIError as e:
                 print("[API error]", e.error)

{embed_client-1.0.1.1 → embed_client-2.0.0.0}/embed_client.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: embed-client
-Version: 1.0.1.1
+Version: 2.0.0.0
 Summary: Async client for Embedding Service API
 Author: Your Name
 Requires-Dist: aiohttp

{embed_client-1.0.1.1 → embed_client-2.0.0.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "embed-client"
-version = "1.0.1.001"
+version = "2.0.0.0"
 description = "Async client for Embedding Service API"
 authors = [{name = "Your Name"}]
 dependencies = [

{embed_client-1.0.1.1 → embed_client-2.0.0.0}/tests/test_async_client.py RENAMED Viewed

@@ -810,13 +810,49 @@ async def test_extract_chunks():
     result = {
         "result": {
             "data": [
-                {"body": "text1", "embedding": [0.1, 0.2], "chunks": ["chunk1", "chunk2"]},
-                {"body": "text2", "embedding": [0.3, 0.4], "chunks": ["chunk3"]}
+                {"body": "text1", "embedding": [0.1, 0.2], "tokens": ["text1"]},
+                {"body": "text2", "embedding": [0.3, 0.4], "tokens": ["text2"]}
             ]
         }
     }
     chunks = client.extract_chunks(result)
-    assert chunks == [["chunk1", "chunk2"], ["chunk3"]]
+    assert chunks == [["text1"], ["text2"]]
+@pytest.mark.asyncio
+async def test_extract_tokens():
+    """Test extracting tokens from new format responses."""
+    client = EmbeddingServiceAsyncClient(base_url=BASE_URL, port=PORT)
+    result = {
+        "result": {
+            "data": {
+                "results": [
+                    {"body": "text1", "embedding": [0.1, 0.2], "tokens": ["text1"], "bm25_tokens": ["text1"]},
+                    {"body": "text2", "embedding": [0.3, 0.4], "tokens": ["text2"], "bm25_tokens": ["text2"]}
+                ]
+            }
+        }
+    }
+    tokens = client.extract_tokens(result)
+    assert tokens == [["text1"], ["text2"]]
+@pytest.mark.asyncio
+async def test_extract_bm25_tokens():
+    """Test extracting BM25 tokens from new format responses."""
+    client = EmbeddingServiceAsyncClient(base_url=BASE_URL, port=PORT)
+    result = {
+        "result": {
+            "data": {
+                "results": [
+                    {"body": "text1", "embedding": [0.1, 0.2], "tokens": ["text1"], "bm25_tokens": ["text1"]},
+                    {"body": "text2", "embedding": [0.3, 0.4], "tokens": ["text2"], "bm25_tokens": ["text2"]}
+                ]
+            }
+        }
+    }
+    bm25_tokens = client.extract_bm25_tokens(result)
+    assert bm25_tokens == [["text1"], ["text2"]]
 @pytest.mark.asyncio
 async def test_close_without_open():