PyPI - cocoindex - Versions diffs - 0.1.79__cp312-cp312-manylinux_2_28_x86_64.whl → 0.1.80__cp312-cp312-manylinux_2_28_x86_64.whl - Mend

cocoindex 0.1.79__cp312-cp312-manylinux_2_28_x86_64.whl → 0.1.80__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

cocoindex/_engine.cpython-312-x86_64-linux-gnu.so CHANGED Viewed

Binary file

cocoindex/functions.py CHANGED Viewed

@@ -116,19 +116,62 @@ class SentenceTransformerEmbedExecutor:
 def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
     """Get or load ColPali model and processor, with caching."""
     try:
-        from colpali_engine.models import ColPali, ColPaliProcessor  # type: ignore[import-untyped]
+        from colpali_engine.models import (  # type: ignore[import-untyped]
+            ColPali,
+            ColPaliProcessor,
+            ColQwen2,
+            ColQwen2Processor,
+            ColQwen2_5,
+            ColQwen2_5_Processor,
+            ColIdefics3,
+            ColIdefics3Processor,
+        )
         from colpali_engine.utils.torch_utils import get_torch_device  # type: ignore[import-untyped]
         import torch
     except ImportError as e:
         raise ImportError(
-            "ColPali is not available. Make sure cocoindex is installed with ColPali support."
+            "ColVision models are not available. Make sure cocoindex is installed with ColPali support."
         ) from e
     device = get_torch_device("auto")
-    model = ColPali.from_pretrained(
-        model_name, device_map=device, torch_dtype=torch.bfloat16
-    ).eval()
-    processor = ColPaliProcessor.from_pretrained(model_name)
+    # Manual model detection based on model name
+    model_name_lower = model_name.lower()
+    try:
+        if "qwen2.5" in model_name_lower:
+            model = ColQwen2_5.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColQwen2_5_Processor.from_pretrained(model_name)
+        elif "qwen2" in model_name_lower:
+            model = ColQwen2.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColQwen2Processor.from_pretrained(model_name)
+        elif "colsmol" in model_name_lower or "smol" in model_name_lower:
+            # ColSmol models use Idefics3 architecture
+            model = ColIdefics3.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColIdefics3Processor.from_pretrained(model_name)
+        else:
+            # Default to ColPali
+            model = ColPali.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            ).eval()
+            processor = ColPaliProcessor.from_pretrained(model_name)
+    except Exception as e:
+        raise RuntimeError(f"Failed to load model {model_name}: {e}")
     # Get dimension from the actual model
     dimension = _detect_colpali_dimension(model, processor, device)
@@ -167,17 +210,25 @@ def _detect_colpali_dimension(model: Any, processor: Any, device: Any) -> int:
 class ColPaliEmbedImage(op.FunctionSpec):
     """
-    `ColPaliEmbedImage` embeds images using the ColPali multimodal model.
+    `ColPaliEmbedImage` embeds images using ColVision multimodal models.
+    Supports ALL models available in the colpali-engine library, including:
+    - ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
+    - ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
+    - ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
+    - Any future ColVision models supported by colpali-engine
-    ColPali (Contextual Late-interaction over Patches) uses late interaction
-    between image patch embeddings and text token embeddings for retrieval.
+    These models use late interaction between image patch embeddings and text token
+    embeddings for retrieval.
     Args:
-        model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
+        model: Any ColVision model name supported by colpali-engine
+               (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
+               See https://github.com/illuin-tech/colpali for the complete list of supported models.
     Note:
         This function requires the optional colpali-engine dependency.
-        Install it with: pip install 'cocoindex[embeddings]'
+        Install it with: pip install 'cocoindex[colpali]'
     """
     model: str
@@ -189,7 +240,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
     behavior_version=1,
 )
 class ColPaliEmbedImageExecutor:
-    """Executor for ColPaliEmbedImage."""
+    """Executor for ColVision image embedding (ColPali, ColQwen2, ColSmol, etc.)."""
     spec: ColPaliEmbedImage
     _model_info: ColPaliModelInfo
@@ -209,7 +260,7 @@ class ColPaliEmbedImageExecutor:
             import io
         except ImportError as e:
             raise ImportError(
-                "Required dependencies (PIL, torch) are missing for ColPali image embedding."
+                "Required dependencies (PIL, torch) are missing for ColVision image embedding."
             ) from e
         model = self._model_info.model
@@ -235,17 +286,25 @@ class ColPaliEmbedImageExecutor:
 class ColPaliEmbedQuery(op.FunctionSpec):
     """
-    `ColPaliEmbedQuery` embeds text queries using the ColPali multimodal model.
+    `ColPaliEmbedQuery` embeds text queries using ColVision multimodal models.
+    Supports ALL models available in the colpali-engine library, including:
+    - ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
+    - ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
+    - ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
+    - Any future ColVision models supported by colpali-engine
-    This produces query embeddings compatible with ColPali image embeddings
+    This produces query embeddings compatible with ColVision image embeddings
     for late interaction scoring (MaxSim).
     Args:
-        model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
+        model: Any ColVision model name supported by colpali-engine
+               (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
+               See https://github.com/illuin-tech/colpali for the complete list of supported models.
     Note:
         This function requires the optional colpali-engine dependency.
-        Install it with: pip install 'cocoindex[embeddings]'
+        Install it with: pip install 'cocoindex[colpali]'
     """
     model: str
@@ -257,7 +316,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
     behavior_version=1,
 )
 class ColPaliEmbedQueryExecutor:
-    """Executor for ColPaliEmbedQuery."""
+    """Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
     spec: ColPaliEmbedQuery
     _model_info: ColPaliModelInfo
@@ -275,7 +334,7 @@ class ColPaliEmbedQueryExecutor:
             import torch
         except ImportError as e:
             raise ImportError(
-                "Required dependencies (torch) are missing for ColPali query embedding."
+                "Required dependencies (torch) are missing for ColVision query embedding."
             ) from e
         model = self._model_info.model

{cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cocoindex
-Version: 0.1.79
+Version: 0.1.80
 Requires-Dist: click>=8.1.8
 Requires-Dist: rich>=14.0.0
 Requires-Dist: python-dotenv>=1.1.0
@@ -39,7 +39,8 @@ Project-URL: Homepage, https://cocoindex.io/
 [![Documentation](https://img.shields.io/badge/Documentation-394e79?logo=readthedocs&logoColor=00B9FF)](https://cocoindex.io/docs/getting_started/quickstart)
 [![License](https://img.shields.io/badge/license-Apache%202.0-5B5BD6?logoColor=white)](https://opensource.org/licenses/Apache-2.0)
 [![PyPI version](https://img.shields.io/pypi/v/cocoindex?color=5B5BD6)](https://pypi.org/project/cocoindex/)
-[![PyPI - Downloads](https://img.shields.io/pypi/dm/cocoindex)](https://pypistats.org/packages/cocoindex)
+<!--[![PyPI - Downloads](https://img.shields.io/pypi/dm/cocoindex)](https://pypistats.org/packages/cocoindex) -->
+[![PyPI Downloads](https://static.pepy.tech/badge/cocoindex/month)](https://pepy.tech/projects/cocoindex)
 [![CI](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml/badge.svg?event=push&color=5B5BD6)](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)
 [![release](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml/badge.svg?event=push&color=5B5BD6)](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)
 [![Discord](https://img.shields.io/discord/1314801574169673738?logo=discord&color=5B5BD6&logoColor=white)](https://discord.com/invite/zpA9S2DR7s)
@@ -216,6 +217,8 @@ It defines an index flow like this:
 | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
 | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
 | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
+| [Patient intake form extraction](examples/patient_intake_extraction) | Use LLM to extract structured data from patient intake forms with different formats |
 More coming and stay tuned 👀!

{cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
-cocoindex-0.1.79.dist-info/METADATA,sha256=1RyDQg5xjdf954JY3IBfZaAe5u6SQ2x-sDcPJDlSuPQ,11799
-cocoindex-0.1.79.dist-info/WHEEL,sha256=agcEWikPie1qUk10ElUHI4WcN5vs5MjhJbp7oethB0A,108
-cocoindex-0.1.79.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
-cocoindex-0.1.79.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+cocoindex-0.1.80.dist-info/METADATA,sha256=DNAMbPaFYBElOtqshXIvRW_MNmJgcqNHY6A5uN9NcQ8,12073
+cocoindex-0.1.80.dist-info/WHEEL,sha256=agcEWikPie1qUk10ElUHI4WcN5vs5MjhJbp7oethB0A,108
+cocoindex-0.1.80.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
+cocoindex-0.1.80.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
 cocoindex/__init__.py,sha256=sLpSVO5Cotgn_82lawxvXnaqfa-qj33rytWBAe2MTtU,2201
-cocoindex/_engine.cpython-312-x86_64-linux-gnu.so,sha256=wOmweA_mrr2KX_41KXrVfwbCLS9rcGoYP2tJ1Esx3vE,71022048
+cocoindex/_engine.cpython-312-x86_64-linux-gnu.so,sha256=QwiJs1O5W2RvEHPNRj7q-eVbTkkRKldgln8Lhxo5rt0,72116944
 cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
 cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
 cocoindex/convert.py,sha256=HodeDl1HVX8nnBH02lQKarw5i3xmkjB0nGj-DXt7Ifc,18284
 cocoindex/flow.py,sha256=egKbBG2X9DjAqmcATcndyRhe9zMZHRd-YxKCpt9BsUg,36551
-cocoindex/functions.py,sha256=34sZWoS0zGnaKyooIODQgc6QEPZKiJoWhfb8jKIWwps,9528
+cocoindex/functions.py,sha256=-8tAW7_HhSw-A7M8U_C1vUfE9jxNPJ6j2yBRJvP16Tk,12302
 cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
 cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
 cocoindex/llm.py,sha256=Pv_cdnRngTLtuLU9AUmS8izIHhcKVnuBNolC33f9BDI,851
@@ -28,4 +28,4 @@ cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nH
 cocoindex/typing.py,sha256=qQ0ANF3iuQDeSqipHgL2SDiiXL2reTMUN0aj4ve_T0w,13359
 cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
 cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
-cocoindex-0.1.79.dist-info/RECORD,,
+cocoindex-0.1.80.dist-info/RECORD,,

{cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/WHEEL RENAMED Viewed

File without changes

{cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes