cocoindex 0.3.0__cp311-abi3-manylinux_2_28_x86_64.whl → 0.3.1__cp311-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/_engine.abi3.so CHANGED
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  import functools
4
4
  from dataclasses import dataclass
5
- from typing import Any, Optional, TYPE_CHECKING, Literal
5
+ from typing import Any, TYPE_CHECKING, Literal
6
6
  import numpy as np
7
7
 
8
8
  from .. import op
@@ -22,18 +22,11 @@ class ColPaliModelInfo:
22
22
  dimension: int
23
23
 
24
24
 
25
- @functools.lru_cache(maxsize=None)
25
+ @functools.cache
26
26
  def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
27
27
  """Load and cache ColPali model and processor with shared device setup."""
28
28
  try:
29
- from colpali_engine import ( # type: ignore[import-untyped]
30
- ColPali,
31
- ColPaliProcessor,
32
- ColQwen2,
33
- ColQwen2Processor,
34
- ColSmol,
35
- ColSmolProcessor,
36
- )
29
+ import colpali_engine as ce # type: ignore[import-untyped]
37
30
  import torch
38
31
  except ImportError as e:
39
32
  raise ImportError(
@@ -42,29 +35,30 @@ def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
42
35
  ) from e
43
36
 
44
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
+ lower_model_name = model_name.lower()
45
39
 
46
40
  # Determine model type from name
47
- if "colpali" in model_name.lower():
48
- model = ColPali.from_pretrained(
41
+ if lower_model_name.startswith("colpali"):
42
+ model = ce.ColPali.from_pretrained(
49
43
  model_name, torch_dtype=torch.bfloat16, device_map=device
50
44
  )
51
- processor = ColPaliProcessor.from_pretrained(model_name)
52
- elif "colqwen" in model_name.lower():
53
- model = ColQwen2.from_pretrained(
45
+ processor = ce.ColPaliProcessor.from_pretrained(model_name)
46
+ elif lower_model_name.startswith("colqwen2.5"):
47
+ model = ce.ColQwen2_5.from_pretrained(
54
48
  model_name, torch_dtype=torch.bfloat16, device_map=device
55
49
  )
56
- processor = ColQwen2Processor.from_pretrained(model_name)
57
- elif "colsmol" in model_name.lower():
58
- model = ColSmol.from_pretrained(
50
+ processor = ce.ColQwen2_5_Processor.from_pretrained(model_name)
51
+ elif lower_model_name.startswith("colqwen"):
52
+ model = ce.ColQwen2.from_pretrained(
59
53
  model_name, torch_dtype=torch.bfloat16, device_map=device
60
54
  )
61
- processor = ColSmolProcessor.from_pretrained(model_name)
55
+ processor = ce.ColQwen2Processor.from_pretrained(model_name)
62
56
  else:
63
57
  # Fallback to ColPali for backwards compatibility
64
- model = ColPali.from_pretrained(
58
+ model = ce.ColPali.from_pretrained(
65
59
  model_name, torch_dtype=torch.bfloat16, device_map=device
66
60
  )
67
- processor = ColPaliProcessor.from_pretrained(model_name)
61
+ processor = ce.ColPaliProcessor.from_pretrained(model_name)
68
62
 
69
63
  # Detect dimension
70
64
  dimension = _detect_colpali_dimension(model, processor, device)
@@ -130,6 +124,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
130
124
  @op.executor_class(
131
125
  gpu=True,
132
126
  cache=True,
127
+ batching=True,
133
128
  behavior_version=1,
134
129
  )
135
130
  class ColPaliEmbedImageExecutor:
@@ -146,7 +141,7 @@ class ColPaliEmbedImageExecutor:
146
141
  dimension = self._model_info.dimension
147
142
  return Vector[Vector[np.float32, Literal[dimension]]] # type: ignore
148
143
 
149
- def __call__(self, img_bytes: bytes) -> Any:
144
+ def __call__(self, img_bytes_list: list[bytes]) -> Any:
150
145
  try:
151
146
  from PIL import Image
152
147
  import torch
@@ -160,8 +155,11 @@ class ColPaliEmbedImageExecutor:
160
155
  processor = self._model_info.processor
161
156
  device = self._model_info.device
162
157
 
163
- pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
164
- inputs = processor.process_images([pil_image]).to(device)
158
+ pil_images = [
159
+ Image.open(io.BytesIO(img_bytes)).convert("RGB")
160
+ for img_bytes in img_bytes_list
161
+ ]
162
+ inputs = processor.process_images(pil_images).to(device)
165
163
  with torch.no_grad():
166
164
  embeddings = model(**inputs)
167
165
 
@@ -171,10 +169,8 @@ class ColPaliEmbedImageExecutor:
171
169
  f"Expected 3D tensor [batch, patches, hidden_dim], got shape {embeddings.shape}"
172
170
  )
173
171
 
174
- # Keep patch-level embeddings: [batch, patches, hidden_dim] -> [patches, hidden_dim]
175
- patch_embeddings = embeddings[0] # Remove batch dimension
176
-
177
- return patch_embeddings.cpu().to(torch.float32).numpy()
172
+ # [patches, hidden_dim]
173
+ return embeddings.cpu().to(torch.float32).numpy()
178
174
 
179
175
 
180
176
  class ColPaliEmbedQuery(op.FunctionSpec):
@@ -207,6 +203,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
207
203
  gpu=True,
208
204
  cache=True,
209
205
  behavior_version=1,
206
+ batching=True,
210
207
  )
211
208
  class ColPaliEmbedQueryExecutor:
212
209
  """Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
@@ -222,7 +219,7 @@ class ColPaliEmbedQueryExecutor:
222
219
  dimension = self._model_info.dimension
223
220
  return Vector[Vector[np.float32, Literal[dimension]]] # type: ignore
224
221
 
225
- def __call__(self, query: str) -> Any:
222
+ def __call__(self, queries: list[str]) -> Any:
226
223
  try:
227
224
  import torch
228
225
  except ImportError as e:
@@ -234,7 +231,7 @@ class ColPaliEmbedQueryExecutor:
234
231
  processor = self._model_info.processor
235
232
  device = self._model_info.device
236
233
 
237
- inputs = processor.process_queries([query]).to(device)
234
+ inputs = processor.process_queries(queries).to(device)
238
235
  with torch.no_grad():
239
236
  embeddings = model(**inputs)
240
237
 
@@ -244,7 +241,5 @@ class ColPaliEmbedQueryExecutor:
244
241
  f"Expected 3D tensor [batch, tokens, hidden_dim], got shape {embeddings.shape}"
245
242
  )
246
243
 
247
- # Keep token-level embeddings: [batch, tokens, hidden_dim] -> [tokens, hidden_dim]
248
- token_embeddings = embeddings[0] # Remove batch dimension
249
-
250
- return token_embeddings.cpu().to(torch.float32).numpy()
244
+ # [tokens, hidden_dim]
245
+ return embeddings.cpu().to(torch.float32).numpy()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: License :: OSI Approved :: Apache Software License
6
6
  Classifier: Operating System :: OS Independent
@@ -1,9 +1,9 @@
1
- cocoindex-0.3.0.dist-info/METADATA,sha256=vMA1fqUWkDkxXgNT76l8pOBEqgt-Cu8XEpC4BPCbzp8,14193
2
- cocoindex-0.3.0.dist-info/WHEEL,sha256=O2QTG69GgK-VjUv6T5nE2QGjJc-8mS3d1MslSxOOSiY,107
3
- cocoindex-0.3.0.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.3.0.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=VRDb6qOsN808v5fkXRUNfGUqSvD_OJWjO6hD3uWfKFg,750831
1
+ cocoindex-0.3.1.dist-info/METADATA,sha256=0fDN_3SnRoRi7nlvbZ1YXKQo4y9Bukm-S1zG3K1DBD8,14193
2
+ cocoindex-0.3.1.dist-info/WHEEL,sha256=O2QTG69GgK-VjUv6T5nE2QGjJc-8mS3d1MslSxOOSiY,107
3
+ cocoindex-0.3.1.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.3.1.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=_9FT9tv3L_NmA0GOtdkqRZGf_4o5A16ui5YvPPi5-WM,750831
5
5
  cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
6
- cocoindex/_engine.abi3.so,sha256=JtN0US3tax_DmVbbFlCn-LkgYUFA1tM89aN9TZqo7zY,73238416
6
+ cocoindex/_engine.abi3.so,sha256=SJnPK_Iw17r4FM4q0N-KOB5-Q0blsbfQrij0vByulJs,73312144
7
7
  cocoindex/auth_registry.py,sha256=g-uLDWLYW5NMbYe7q4Y-sU5dSyrlJXBEciyWtAiP9KE,1340
8
8
  cocoindex/cli.py,sha256=k7bl8RTUZoNNxTlQMr-Y3-9-rTNt8z1v7rJWqsajYC8,24792
9
9
  cocoindex/engine_object.py,sha256=5YTuWoR3WILhyt3PW-d9es3MAas_xD6tZZqvipN-sjg,10050
@@ -11,7 +11,7 @@ cocoindex/engine_value.py,sha256=WJw8ymYAqF2CCyg9SBiQzx8z9bl7XNVuD6ffgYvRRWQ,232
11
11
  cocoindex/flow.py,sha256=xDz3rOo4RhbboknvC-KnbWq8RBykEO0YsjGSBfXqIEg,40076
12
12
  cocoindex/functions/__init__.py,sha256=V2IF4h-Cqq4OD_GN3Oqdry-FArORyRCKmqJ7g5UlJr8,1021
13
13
  cocoindex/functions/_engine_builtin_specs.py,sha256=WpCGrjUfJBa8xZP5JiEmA8kLu7fp9Rcs7ynpuJmvSGg,1786
14
- cocoindex/functions/colpali.py,sha256=oACyG3qG2dquyCJ6bT7FkMkua5rXDLSxnOHcgoz9waU,8865
14
+ cocoindex/functions/colpali.py,sha256=IsVZHO_xRgCSH1Gl6Ubyf0g4CRDMegn72tq_UJQ0G-A,8624
15
15
  cocoindex/functions/sbert.py,sha256=o_DS1ZAqpNc4u1Yrm9DO5LxfMFrlH_hfb0MWobJrs_k,2223
16
16
  cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
17
17
  cocoindex/lib.py,sha256=spfdU4IbzdffHyGdrQPIw_qGo9aX0OAAboqsjj8bTiQ,2290
@@ -39,4 +39,4 @@ cocoindex/typing.py,sha256=qQj5uM6XAKHzRJ2BIEs7X-xeOXVcM9p_xz5SVqPVvS8,23914
39
39
  cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
40
40
  cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
41
41
  cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
42
- cocoindex-0.3.0.dist-info/RECORD,,
42
+ cocoindex-0.3.1.dist-info/RECORD,,
@@ -2846,7 +2846,7 @@ Software.
2846
2846
  <h3 id="Apache-2.0">Apache License 2.0</h3>
2847
2847
  <h4>Used by:</h4>
2848
2848
  <ul class="license-used-by">
2849
- <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.0</a></li>
2849
+ <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.1</a></li>
2850
2850
  <li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
2851
2851
  <li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
2852
2852
  </ul>