PyPI - cocoindex - Versions diffs - 0.3.0__cp311-abi3-manylinux_2_28_x86_64.whl → 0.3.1__cp311-abi3-manylinux_2_28_x86_64.whl - Mend

cocoindex 0.3.0__cp311-abi3-manylinux_2_28_x86_64.whl → 0.3.1__cp311-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

cocoindex/_engine.abi3.so CHANGED Viewed

Binary file

cocoindex/functions/colpali.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import functools
 from dataclasses import dataclass
-from typing import Any, Optional, TYPE_CHECKING, Literal
+from typing import Any, TYPE_CHECKING, Literal
 import numpy as np
 from .. import op
@@ -22,18 +22,11 @@ class ColPaliModelInfo:
     dimension: int
-@functools.lru_cache(maxsize=None)
+@functools.cache
 def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
     """Load and cache ColPali model and processor with shared device setup."""
     try:
-        from colpali_engine import (  # type: ignore[import-untyped]
-            ColPali,
-            ColPaliProcessor,
-            ColQwen2,
-            ColQwen2Processor,
-            ColSmol,
-            ColSmolProcessor,
-        )
+        import colpali_engine as ce  # type: ignore[import-untyped]
         import torch
     except ImportError as e:
         raise ImportError(
@@ -42,29 +35,30 @@ def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
         ) from e
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    lower_model_name = model_name.lower()
     # Determine model type from name
-    if "colpali" in model_name.lower():
-        model = ColPali.from_pretrained(
+    if lower_model_name.startswith("colpali"):
+        model = ce.ColPali.from_pretrained(
             model_name, torch_dtype=torch.bfloat16, device_map=device
         )
-        processor = ColPaliProcessor.from_pretrained(model_name)
-    elif "colqwen" in model_name.lower():
-        model = ColQwen2.from_pretrained(
+        processor = ce.ColPaliProcessor.from_pretrained(model_name)
+    elif lower_model_name.startswith("colqwen2.5"):
+        model = ce.ColQwen2_5.from_pretrained(
             model_name, torch_dtype=torch.bfloat16, device_map=device
         )
-        processor = ColQwen2Processor.from_pretrained(model_name)
-    elif "colsmol" in model_name.lower():
-        model = ColSmol.from_pretrained(
+        processor = ce.ColQwen2_5_Processor.from_pretrained(model_name)
+    elif lower_model_name.startswith("colqwen"):
+        model = ce.ColQwen2.from_pretrained(
             model_name, torch_dtype=torch.bfloat16, device_map=device
         )
-        processor = ColSmolProcessor.from_pretrained(model_name)
+        processor = ce.ColQwen2Processor.from_pretrained(model_name)
     else:
         # Fallback to ColPali for backwards compatibility
-        model = ColPali.from_pretrained(
+        model = ce.ColPali.from_pretrained(
             model_name, torch_dtype=torch.bfloat16, device_map=device
         )
-        processor = ColPaliProcessor.from_pretrained(model_name)
+        processor = ce.ColPaliProcessor.from_pretrained(model_name)
     # Detect dimension
     dimension = _detect_colpali_dimension(model, processor, device)
@@ -130,6 +124,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
 @op.executor_class(
     gpu=True,
     cache=True,
+    batching=True,
     behavior_version=1,
 )
 class ColPaliEmbedImageExecutor:
@@ -146,7 +141,7 @@ class ColPaliEmbedImageExecutor:
         dimension = self._model_info.dimension
         return Vector[Vector[np.float32, Literal[dimension]]]  # type: ignore
-    def __call__(self, img_bytes: bytes) -> Any:
+    def __call__(self, img_bytes_list: list[bytes]) -> Any:
         try:
             from PIL import Image
             import torch
@@ -160,8 +155,11 @@ class ColPaliEmbedImageExecutor:
         processor = self._model_info.processor
         device = self._model_info.device
-        pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-        inputs = processor.process_images([pil_image]).to(device)
+        pil_images = [
+            Image.open(io.BytesIO(img_bytes)).convert("RGB")
+            for img_bytes in img_bytes_list
+        ]
+        inputs = processor.process_images(pil_images).to(device)
         with torch.no_grad():
             embeddings = model(**inputs)
@@ -171,10 +169,8 @@ class ColPaliEmbedImageExecutor:
                 f"Expected 3D tensor [batch, patches, hidden_dim], got shape {embeddings.shape}"
             )
-        # Keep patch-level embeddings: [batch, patches, hidden_dim] -> [patches, hidden_dim]
-        patch_embeddings = embeddings[0]  # Remove batch dimension
-        return patch_embeddings.cpu().to(torch.float32).numpy()
+        # [patches, hidden_dim]
+        return embeddings.cpu().to(torch.float32).numpy()
 class ColPaliEmbedQuery(op.FunctionSpec):
@@ -207,6 +203,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
     gpu=True,
     cache=True,
     behavior_version=1,
+    batching=True,
 )
 class ColPaliEmbedQueryExecutor:
     """Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
@@ -222,7 +219,7 @@ class ColPaliEmbedQueryExecutor:
         dimension = self._model_info.dimension
         return Vector[Vector[np.float32, Literal[dimension]]]  # type: ignore
-    def __call__(self, query: str) -> Any:
+    def __call__(self, queries: list[str]) -> Any:
         try:
             import torch
         except ImportError as e:
@@ -234,7 +231,7 @@ class ColPaliEmbedQueryExecutor:
         processor = self._model_info.processor
         device = self._model_info.device
-        inputs = processor.process_queries([query]).to(device)
+        inputs = processor.process_queries(queries).to(device)
         with torch.no_grad():
             embeddings = model(**inputs)
@@ -244,7 +241,5 @@ class ColPaliEmbedQueryExecutor:
                 f"Expected 3D tensor [batch, tokens, hidden_dim], got shape {embeddings.shape}"
             )
-        # Keep token-level embeddings: [batch, tokens, hidden_dim] -> [tokens, hidden_dim]
-        token_embeddings = embeddings[0]  # Remove batch dimension
-        return token_embeddings.cpu().to(torch.float32).numpy()
+        # [tokens, hidden_dim]
+        return embeddings.cpu().to(torch.float32).numpy()

{cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cocoindex
-Version: 0.3.0
+Version: 0.3.1
 Classifier: Development Status :: 3 - Alpha
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent

{cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-cocoindex-0.3.0.dist-info/METADATA,sha256=vMA1fqUWkDkxXgNT76l8pOBEqgt-Cu8XEpC4BPCbzp8,14193
-cocoindex-0.3.0.dist-info/WHEEL,sha256=O2QTG69GgK-VjUv6T5nE2QGjJc-8mS3d1MslSxOOSiY,107
-cocoindex-0.3.0.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
-cocoindex-0.3.0.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=VRDb6qOsN808v5fkXRUNfGUqSvD_OJWjO6hD3uWfKFg,750831
+cocoindex-0.3.1.dist-info/METADATA,sha256=0fDN_3SnRoRi7nlvbZ1YXKQo4y9Bukm-S1zG3K1DBD8,14193
+cocoindex-0.3.1.dist-info/WHEEL,sha256=O2QTG69GgK-VjUv6T5nE2QGjJc-8mS3d1MslSxOOSiY,107
+cocoindex-0.3.1.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
+cocoindex-0.3.1.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=_9FT9tv3L_NmA0GOtdkqRZGf_4o5A16ui5YvPPi5-WM,750831
 cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
-cocoindex/_engine.abi3.so,sha256=JtN0US3tax_DmVbbFlCn-LkgYUFA1tM89aN9TZqo7zY,73238416
+cocoindex/_engine.abi3.so,sha256=SJnPK_Iw17r4FM4q0N-KOB5-Q0blsbfQrij0vByulJs,73312144
 cocoindex/auth_registry.py,sha256=g-uLDWLYW5NMbYe7q4Y-sU5dSyrlJXBEciyWtAiP9KE,1340
 cocoindex/cli.py,sha256=k7bl8RTUZoNNxTlQMr-Y3-9-rTNt8z1v7rJWqsajYC8,24792
 cocoindex/engine_object.py,sha256=5YTuWoR3WILhyt3PW-d9es3MAas_xD6tZZqvipN-sjg,10050
@@ -11,7 +11,7 @@ cocoindex/engine_value.py,sha256=WJw8ymYAqF2CCyg9SBiQzx8z9bl7XNVuD6ffgYvRRWQ,232
 cocoindex/flow.py,sha256=xDz3rOo4RhbboknvC-KnbWq8RBykEO0YsjGSBfXqIEg,40076
 cocoindex/functions/__init__.py,sha256=V2IF4h-Cqq4OD_GN3Oqdry-FArORyRCKmqJ7g5UlJr8,1021
 cocoindex/functions/_engine_builtin_specs.py,sha256=WpCGrjUfJBa8xZP5JiEmA8kLu7fp9Rcs7ynpuJmvSGg,1786
-cocoindex/functions/colpali.py,sha256=oACyG3qG2dquyCJ6bT7FkMkua5rXDLSxnOHcgoz9waU,8865
+cocoindex/functions/colpali.py,sha256=IsVZHO_xRgCSH1Gl6Ubyf0g4CRDMegn72tq_UJQ0G-A,8624
 cocoindex/functions/sbert.py,sha256=o_DS1ZAqpNc4u1Yrm9DO5LxfMFrlH_hfb0MWobJrs_k,2223
 cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
 cocoindex/lib.py,sha256=spfdU4IbzdffHyGdrQPIw_qGo9aX0OAAboqsjj8bTiQ,2290
@@ -39,4 +39,4 @@ cocoindex/typing.py,sha256=qQj5uM6XAKHzRJ2BIEs7X-xeOXVcM9p_xz5SVqPVvS8,23914
 cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
 cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
 cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
-cocoindex-0.3.0.dist-info/RECORD,,
+cocoindex-0.3.1.dist-info/RECORD,,

{cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/licenses/THIRD_PARTY_NOTICES.html RENAMED Viewed

@@ -2846,7 +2846,7 @@ Software.
                 <h3 id="Apache-2.0">Apache License 2.0</h3>
                 <h4>Used by:</h4>
                 <ul class="license-used-by">
-                    <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.0</a></li>
+                    <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.1</a></li>
                     <li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
                     <li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
                 </ul>

{cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes