cocoindex 0.3.0__cp311-abi3-win_amd64.whl → 0.3.1__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.pyd +0 -0
- cocoindex/functions/colpali.py +29 -34
- {cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/METADATA +1 -1
- {cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/RECORD +7 -7
- {cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/licenses/THIRD_PARTY_NOTICES.html +1 -1
- {cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/WHEEL +0 -0
- {cocoindex-0.3.0.dist-info → cocoindex-0.3.1.dist-info}/entry_points.txt +0 -0
cocoindex/_engine.pyd
CHANGED
|
Binary file
|
cocoindex/functions/colpali.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import functools
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, TYPE_CHECKING, Literal
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
8
|
from .. import op
|
|
@@ -22,18 +22,11 @@ class ColPaliModelInfo:
|
|
|
22
22
|
dimension: int
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
@functools.
|
|
25
|
+
@functools.cache
|
|
26
26
|
def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
|
|
27
27
|
"""Load and cache ColPali model and processor with shared device setup."""
|
|
28
28
|
try:
|
|
29
|
-
|
|
30
|
-
ColPali,
|
|
31
|
-
ColPaliProcessor,
|
|
32
|
-
ColQwen2,
|
|
33
|
-
ColQwen2Processor,
|
|
34
|
-
ColSmol,
|
|
35
|
-
ColSmolProcessor,
|
|
36
|
-
)
|
|
29
|
+
import colpali_engine as ce # type: ignore[import-untyped]
|
|
37
30
|
import torch
|
|
38
31
|
except ImportError as e:
|
|
39
32
|
raise ImportError(
|
|
@@ -42,29 +35,30 @@ def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
|
|
|
42
35
|
) from e
|
|
43
36
|
|
|
44
37
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
38
|
+
lower_model_name = model_name.lower()
|
|
45
39
|
|
|
46
40
|
# Determine model type from name
|
|
47
|
-
if "colpali"
|
|
48
|
-
model = ColPali.from_pretrained(
|
|
41
|
+
if lower_model_name.startswith("colpali"):
|
|
42
|
+
model = ce.ColPali.from_pretrained(
|
|
49
43
|
model_name, torch_dtype=torch.bfloat16, device_map=device
|
|
50
44
|
)
|
|
51
|
-
processor = ColPaliProcessor.from_pretrained(model_name)
|
|
52
|
-
elif "
|
|
53
|
-
model =
|
|
45
|
+
processor = ce.ColPaliProcessor.from_pretrained(model_name)
|
|
46
|
+
elif lower_model_name.startswith("colqwen2.5"):
|
|
47
|
+
model = ce.ColQwen2_5.from_pretrained(
|
|
54
48
|
model_name, torch_dtype=torch.bfloat16, device_map=device
|
|
55
49
|
)
|
|
56
|
-
processor =
|
|
57
|
-
elif "
|
|
58
|
-
model =
|
|
50
|
+
processor = ce.ColQwen2_5_Processor.from_pretrained(model_name)
|
|
51
|
+
elif lower_model_name.startswith("colqwen"):
|
|
52
|
+
model = ce.ColQwen2.from_pretrained(
|
|
59
53
|
model_name, torch_dtype=torch.bfloat16, device_map=device
|
|
60
54
|
)
|
|
61
|
-
processor =
|
|
55
|
+
processor = ce.ColQwen2Processor.from_pretrained(model_name)
|
|
62
56
|
else:
|
|
63
57
|
# Fallback to ColPali for backwards compatibility
|
|
64
|
-
model = ColPali.from_pretrained(
|
|
58
|
+
model = ce.ColPali.from_pretrained(
|
|
65
59
|
model_name, torch_dtype=torch.bfloat16, device_map=device
|
|
66
60
|
)
|
|
67
|
-
processor = ColPaliProcessor.from_pretrained(model_name)
|
|
61
|
+
processor = ce.ColPaliProcessor.from_pretrained(model_name)
|
|
68
62
|
|
|
69
63
|
# Detect dimension
|
|
70
64
|
dimension = _detect_colpali_dimension(model, processor, device)
|
|
@@ -130,6 +124,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
|
|
|
130
124
|
@op.executor_class(
|
|
131
125
|
gpu=True,
|
|
132
126
|
cache=True,
|
|
127
|
+
batching=True,
|
|
133
128
|
behavior_version=1,
|
|
134
129
|
)
|
|
135
130
|
class ColPaliEmbedImageExecutor:
|
|
@@ -146,7 +141,7 @@ class ColPaliEmbedImageExecutor:
|
|
|
146
141
|
dimension = self._model_info.dimension
|
|
147
142
|
return Vector[Vector[np.float32, Literal[dimension]]] # type: ignore
|
|
148
143
|
|
|
149
|
-
def __call__(self,
|
|
144
|
+
def __call__(self, img_bytes_list: list[bytes]) -> Any:
|
|
150
145
|
try:
|
|
151
146
|
from PIL import Image
|
|
152
147
|
import torch
|
|
@@ -160,8 +155,11 @@ class ColPaliEmbedImageExecutor:
|
|
|
160
155
|
processor = self._model_info.processor
|
|
161
156
|
device = self._model_info.device
|
|
162
157
|
|
|
163
|
-
|
|
164
|
-
|
|
158
|
+
pil_images = [
|
|
159
|
+
Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
|
160
|
+
for img_bytes in img_bytes_list
|
|
161
|
+
]
|
|
162
|
+
inputs = processor.process_images(pil_images).to(device)
|
|
165
163
|
with torch.no_grad():
|
|
166
164
|
embeddings = model(**inputs)
|
|
167
165
|
|
|
@@ -171,10 +169,8 @@ class ColPaliEmbedImageExecutor:
|
|
|
171
169
|
f"Expected 3D tensor [batch, patches, hidden_dim], got shape {embeddings.shape}"
|
|
172
170
|
)
|
|
173
171
|
|
|
174
|
-
#
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
return patch_embeddings.cpu().to(torch.float32).numpy()
|
|
172
|
+
# [patches, hidden_dim]
|
|
173
|
+
return embeddings.cpu().to(torch.float32).numpy()
|
|
178
174
|
|
|
179
175
|
|
|
180
176
|
class ColPaliEmbedQuery(op.FunctionSpec):
|
|
@@ -207,6 +203,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
|
|
|
207
203
|
gpu=True,
|
|
208
204
|
cache=True,
|
|
209
205
|
behavior_version=1,
|
|
206
|
+
batching=True,
|
|
210
207
|
)
|
|
211
208
|
class ColPaliEmbedQueryExecutor:
|
|
212
209
|
"""Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
|
|
@@ -222,7 +219,7 @@ class ColPaliEmbedQueryExecutor:
|
|
|
222
219
|
dimension = self._model_info.dimension
|
|
223
220
|
return Vector[Vector[np.float32, Literal[dimension]]] # type: ignore
|
|
224
221
|
|
|
225
|
-
def __call__(self,
|
|
222
|
+
def __call__(self, queries: list[str]) -> Any:
|
|
226
223
|
try:
|
|
227
224
|
import torch
|
|
228
225
|
except ImportError as e:
|
|
@@ -234,7 +231,7 @@ class ColPaliEmbedQueryExecutor:
|
|
|
234
231
|
processor = self._model_info.processor
|
|
235
232
|
device = self._model_info.device
|
|
236
233
|
|
|
237
|
-
inputs = processor.process_queries(
|
|
234
|
+
inputs = processor.process_queries(queries).to(device)
|
|
238
235
|
with torch.no_grad():
|
|
239
236
|
embeddings = model(**inputs)
|
|
240
237
|
|
|
@@ -244,7 +241,5 @@ class ColPaliEmbedQueryExecutor:
|
|
|
244
241
|
f"Expected 3D tensor [batch, tokens, hidden_dim], got shape {embeddings.shape}"
|
|
245
242
|
)
|
|
246
243
|
|
|
247
|
-
#
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
return token_embeddings.cpu().to(torch.float32).numpy()
|
|
244
|
+
# [tokens, hidden_dim]
|
|
245
|
+
return embeddings.cpu().to(torch.float32).numpy()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
cocoindex-0.3.
|
|
2
|
-
cocoindex-0.3.
|
|
3
|
-
cocoindex-0.3.
|
|
4
|
-
cocoindex-0.3.
|
|
1
|
+
cocoindex-0.3.1.dist-info/METADATA,sha256=XVa358pgFtptG2qP5yJTbxgzqFe_1XjVm0_69HXchTg,14426
|
|
2
|
+
cocoindex-0.3.1.dist-info/WHEEL,sha256=QC3zdlgimDC1GtRrc0qfjqbzuc7G6nDvPrjaINbNOTw,95
|
|
3
|
+
cocoindex-0.3.1.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
|
4
|
+
cocoindex-0.3.1.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=_9FT9tv3L_NmA0GOtdkqRZGf_4o5A16ui5YvPPi5-WM,750831
|
|
5
5
|
cocoindex/__init__.py,sha256=MsjYflfJHL_sKL4OxcExfRwl57JFKwqvt9AWbiHsZ3Q,2744
|
|
6
|
-
cocoindex/_engine.pyd,sha256=
|
|
6
|
+
cocoindex/_engine.pyd,sha256=5SgV6FV9mGZGq6hw0cUg5I8f9lf9lWuGxf2CMwGAkq0,77837312
|
|
7
7
|
cocoindex/auth_registry.py,sha256=HK1vfKQh_6z310c8kgFDIQf9RdoiA3vWUwvFYbgybr0,1384
|
|
8
8
|
cocoindex/cli.py,sha256=1Q_c4FzW41FhlCrdJNNkD93Z4zP48hmFzsaU2FJ3wzc,25622
|
|
9
9
|
cocoindex/engine_object.py,sha256=JH27f2MLNUw3HQy6JuCpvRhMxuKZBkxplLIo0m7PU04,10322
|
|
@@ -11,7 +11,7 @@ cocoindex/engine_value.py,sha256=CYS_rdJQZoLEcKVcUXupcH0u8k-6ce80Xrzd3rgUBJE,239
|
|
|
11
11
|
cocoindex/flow.py,sha256=MznA23dsIOlu-1uEBe6XVTfYlDjvH3bvky1BCUEnEng,41357
|
|
12
12
|
cocoindex/functions/__init__.py,sha256=AZ4f7dBVZMkWyR85z9Gy1AgOAB0f1tex62CMP8MDLX4,1061
|
|
13
13
|
cocoindex/functions/_engine_builtin_specs.py,sha256=6ZV91MUosqGSolnKNUjyRHZ_oTOOsQ_jMMEBGZW1EYo,1852
|
|
14
|
-
cocoindex/functions/colpali.py,sha256=
|
|
14
|
+
cocoindex/functions/colpali.py,sha256=Got7es-Em9KwWGwJAAXMJNFp1wFcbtEOj6vkV0cqSAA,8869
|
|
15
15
|
cocoindex/functions/sbert.py,sha256=HPieCBNuKRaWRIBIH8hW4ACFtGN2rHpg6N7mfURpUcQ,2289
|
|
16
16
|
cocoindex/index.py,sha256=C__LzwIC918VIDGsBsyLwvNBO-4BiC5Coq01Fp1zXkI,1032
|
|
17
17
|
cocoindex/lib.py,sha256=cyKGdn8cfH9bkYfrnJ7dlUBO8OVZkKyrkYhWHsMFW_g,2365
|
|
@@ -39,4 +39,4 @@ cocoindex/typing.py,sha256=dC8CmltIbT85TAgiFXwmHT_lePdiHRQLrURjgZWF6oo,24716
|
|
|
39
39
|
cocoindex/user_app_loader.py,sha256=ZkvUG9aJNNECAjwTY0ZYtNpFd9dNBPVoPKGTtB7dSZg,1926
|
|
40
40
|
cocoindex/utils.py,sha256=U3W39zD2uZpXX8v84tJD7sRmbC5ar3z_ljAP1cJrYXI,618
|
|
41
41
|
cocoindex/validation.py,sha256=4ZjsW-SZT8X_TEEhEE6QG6D-8Oq_TkPAhTqP0mdFYSE,3194
|
|
42
|
-
cocoindex-0.3.
|
|
42
|
+
cocoindex-0.3.1.dist-info/RECORD,,
|
|
@@ -2846,7 +2846,7 @@ Software.
|
|
|
2846
2846
|
<h3 id="Apache-2.0">Apache License 2.0</h3>
|
|
2847
2847
|
<h4>Used by:</h4>
|
|
2848
2848
|
<ul class="license-used-by">
|
|
2849
|
-
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.
|
|
2849
|
+
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.1</a></li>
|
|
2850
2850
|
<li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
|
|
2851
2851
|
<li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
|
|
2852
2852
|
</ul>
|
|
File without changes
|
|
File without changes
|