cocoindex 0.1.79__cp312-cp312-manylinux_2_28_x86_64.whl → 0.1.80__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cpython-312-x86_64-linux-gnu.so +0 -0
- cocoindex/functions.py +78 -19
- {cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/METADATA +5 -2
- {cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/RECORD +7 -7
- {cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.79.dist-info → cocoindex-0.1.80.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/functions.py
CHANGED
@@ -116,19 +116,62 @@ class SentenceTransformerEmbedExecutor:
|
|
116
116
|
def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
|
117
117
|
"""Get or load ColPali model and processor, with caching."""
|
118
118
|
try:
|
119
|
-
from colpali_engine.models import
|
119
|
+
from colpali_engine.models import ( # type: ignore[import-untyped]
|
120
|
+
ColPali,
|
121
|
+
ColPaliProcessor,
|
122
|
+
ColQwen2,
|
123
|
+
ColQwen2Processor,
|
124
|
+
ColQwen2_5,
|
125
|
+
ColQwen2_5_Processor,
|
126
|
+
ColIdefics3,
|
127
|
+
ColIdefics3Processor,
|
128
|
+
)
|
120
129
|
from colpali_engine.utils.torch_utils import get_torch_device # type: ignore[import-untyped]
|
121
130
|
import torch
|
122
131
|
except ImportError as e:
|
123
132
|
raise ImportError(
|
124
|
-
"
|
133
|
+
"ColVision models are not available. Make sure cocoindex is installed with ColPali support."
|
125
134
|
) from e
|
126
135
|
|
127
136
|
device = get_torch_device("auto")
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
137
|
+
|
138
|
+
# Manual model detection based on model name
|
139
|
+
model_name_lower = model_name.lower()
|
140
|
+
|
141
|
+
try:
|
142
|
+
if "qwen2.5" in model_name_lower:
|
143
|
+
model = ColQwen2_5.from_pretrained(
|
144
|
+
model_name,
|
145
|
+
torch_dtype=torch.bfloat16,
|
146
|
+
device_map=device,
|
147
|
+
).eval()
|
148
|
+
processor = ColQwen2_5_Processor.from_pretrained(model_name)
|
149
|
+
elif "qwen2" in model_name_lower:
|
150
|
+
model = ColQwen2.from_pretrained(
|
151
|
+
model_name,
|
152
|
+
torch_dtype=torch.bfloat16,
|
153
|
+
device_map=device,
|
154
|
+
).eval()
|
155
|
+
processor = ColQwen2Processor.from_pretrained(model_name)
|
156
|
+
elif "colsmol" in model_name_lower or "smol" in model_name_lower:
|
157
|
+
# ColSmol models use Idefics3 architecture
|
158
|
+
model = ColIdefics3.from_pretrained(
|
159
|
+
model_name,
|
160
|
+
torch_dtype=torch.bfloat16,
|
161
|
+
device_map=device,
|
162
|
+
).eval()
|
163
|
+
processor = ColIdefics3Processor.from_pretrained(model_name)
|
164
|
+
else:
|
165
|
+
# Default to ColPali
|
166
|
+
model = ColPali.from_pretrained(
|
167
|
+
model_name,
|
168
|
+
torch_dtype=torch.bfloat16,
|
169
|
+
device_map=device,
|
170
|
+
).eval()
|
171
|
+
processor = ColPaliProcessor.from_pretrained(model_name)
|
172
|
+
|
173
|
+
except Exception as e:
|
174
|
+
raise RuntimeError(f"Failed to load model {model_name}: {e}")
|
132
175
|
|
133
176
|
# Get dimension from the actual model
|
134
177
|
dimension = _detect_colpali_dimension(model, processor, device)
|
@@ -167,17 +210,25 @@ def _detect_colpali_dimension(model: Any, processor: Any, device: Any) -> int:
|
|
167
210
|
|
168
211
|
class ColPaliEmbedImage(op.FunctionSpec):
|
169
212
|
"""
|
170
|
-
`ColPaliEmbedImage` embeds images using
|
213
|
+
`ColPaliEmbedImage` embeds images using ColVision multimodal models.
|
214
|
+
|
215
|
+
Supports ALL models available in the colpali-engine library, including:
|
216
|
+
- ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
|
217
|
+
- ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
|
218
|
+
- ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
|
219
|
+
- Any future ColVision models supported by colpali-engine
|
171
220
|
|
172
|
-
|
173
|
-
|
221
|
+
These models use late interaction between image patch embeddings and text token
|
222
|
+
embeddings for retrieval.
|
174
223
|
|
175
224
|
Args:
|
176
|
-
model:
|
225
|
+
model: Any ColVision model name supported by colpali-engine
|
226
|
+
(e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
|
227
|
+
See https://github.com/illuin-tech/colpali for the complete list of supported models.
|
177
228
|
|
178
229
|
Note:
|
179
230
|
This function requires the optional colpali-engine dependency.
|
180
|
-
Install it with: pip install 'cocoindex[
|
231
|
+
Install it with: pip install 'cocoindex[colpali]'
|
181
232
|
"""
|
182
233
|
|
183
234
|
model: str
|
@@ -189,7 +240,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
|
|
189
240
|
behavior_version=1,
|
190
241
|
)
|
191
242
|
class ColPaliEmbedImageExecutor:
|
192
|
-
"""Executor for
|
243
|
+
"""Executor for ColVision image embedding (ColPali, ColQwen2, ColSmol, etc.)."""
|
193
244
|
|
194
245
|
spec: ColPaliEmbedImage
|
195
246
|
_model_info: ColPaliModelInfo
|
@@ -209,7 +260,7 @@ class ColPaliEmbedImageExecutor:
|
|
209
260
|
import io
|
210
261
|
except ImportError as e:
|
211
262
|
raise ImportError(
|
212
|
-
"Required dependencies (PIL, torch) are missing for
|
263
|
+
"Required dependencies (PIL, torch) are missing for ColVision image embedding."
|
213
264
|
) from e
|
214
265
|
|
215
266
|
model = self._model_info.model
|
@@ -235,17 +286,25 @@ class ColPaliEmbedImageExecutor:
|
|
235
286
|
|
236
287
|
class ColPaliEmbedQuery(op.FunctionSpec):
|
237
288
|
"""
|
238
|
-
`ColPaliEmbedQuery` embeds text queries using
|
289
|
+
`ColPaliEmbedQuery` embeds text queries using ColVision multimodal models.
|
290
|
+
|
291
|
+
Supports ALL models available in the colpali-engine library, including:
|
292
|
+
- ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
|
293
|
+
- ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
|
294
|
+
- ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
|
295
|
+
- Any future ColVision models supported by colpali-engine
|
239
296
|
|
240
|
-
This produces query embeddings compatible with
|
297
|
+
This produces query embeddings compatible with ColVision image embeddings
|
241
298
|
for late interaction scoring (MaxSim).
|
242
299
|
|
243
300
|
Args:
|
244
|
-
model:
|
301
|
+
model: Any ColVision model name supported by colpali-engine
|
302
|
+
(e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
|
303
|
+
See https://github.com/illuin-tech/colpali for the complete list of supported models.
|
245
304
|
|
246
305
|
Note:
|
247
306
|
This function requires the optional colpali-engine dependency.
|
248
|
-
Install it with: pip install 'cocoindex[
|
307
|
+
Install it with: pip install 'cocoindex[colpali]'
|
249
308
|
"""
|
250
309
|
|
251
310
|
model: str
|
@@ -257,7 +316,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
|
|
257
316
|
behavior_version=1,
|
258
317
|
)
|
259
318
|
class ColPaliEmbedQueryExecutor:
|
260
|
-
"""Executor for
|
319
|
+
"""Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
|
261
320
|
|
262
321
|
spec: ColPaliEmbedQuery
|
263
322
|
_model_info: ColPaliModelInfo
|
@@ -275,7 +334,7 @@ class ColPaliEmbedQueryExecutor:
|
|
275
334
|
import torch
|
276
335
|
except ImportError as e:
|
277
336
|
raise ImportError(
|
278
|
-
"Required dependencies (torch) are missing for
|
337
|
+
"Required dependencies (torch) are missing for ColVision query embedding."
|
279
338
|
) from e
|
280
339
|
|
281
340
|
model = self._model_info.model
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.80
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
@@ -39,7 +39,8 @@ Project-URL: Homepage, https://cocoindex.io/
|
|
39
39
|
[](https://cocoindex.io/docs/getting_started/quickstart)
|
40
40
|
[](https://opensource.org/licenses/Apache-2.0)
|
41
41
|
[](https://pypi.org/project/cocoindex/)
|
42
|
-
[](https://pypistats.org/packages/cocoindex)
|
42
|
+
<!--[](https://pypistats.org/packages/cocoindex) -->
|
43
|
+
[](https://pepy.tech/projects/cocoindex)
|
43
44
|
[](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)
|
44
45
|
[](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)
|
45
46
|
[](https://discord.com/invite/zpA9S2DR7s)
|
@@ -216,6 +217,8 @@ It defines an index flow like this:
|
|
216
217
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
217
218
|
| [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
|
218
219
|
| [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
|
220
|
+
| [Patient intake form extraction](examples/patient_intake_extraction) | Use LLM to extract structured data from patient intake forms with different formats |
|
221
|
+
|
219
222
|
|
220
223
|
More coming and stay tuned 👀!
|
221
224
|
|
@@ -1,14 +1,14 @@
|
|
1
|
-
cocoindex-0.1.
|
2
|
-
cocoindex-0.1.
|
3
|
-
cocoindex-0.1.
|
4
|
-
cocoindex-0.1.
|
1
|
+
cocoindex-0.1.80.dist-info/METADATA,sha256=DNAMbPaFYBElOtqshXIvRW_MNmJgcqNHY6A5uN9NcQ8,12073
|
2
|
+
cocoindex-0.1.80.dist-info/WHEEL,sha256=agcEWikPie1qUk10ElUHI4WcN5vs5MjhJbp7oethB0A,108
|
3
|
+
cocoindex-0.1.80.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.1.80.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
5
5
|
cocoindex/__init__.py,sha256=sLpSVO5Cotgn_82lawxvXnaqfa-qj33rytWBAe2MTtU,2201
|
6
|
-
cocoindex/_engine.cpython-312-x86_64-linux-gnu.so,sha256=
|
6
|
+
cocoindex/_engine.cpython-312-x86_64-linux-gnu.so,sha256=QwiJs1O5W2RvEHPNRj7q-eVbTkkRKldgln8Lhxo5rt0,72116944
|
7
7
|
cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
|
8
8
|
cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
|
9
9
|
cocoindex/convert.py,sha256=HodeDl1HVX8nnBH02lQKarw5i3xmkjB0nGj-DXt7Ifc,18284
|
10
10
|
cocoindex/flow.py,sha256=egKbBG2X9DjAqmcATcndyRhe9zMZHRd-YxKCpt9BsUg,36551
|
11
|
-
cocoindex/functions.py,sha256
|
11
|
+
cocoindex/functions.py,sha256=-8tAW7_HhSw-A7M8U_C1vUfE9jxNPJ6j2yBRJvP16Tk,12302
|
12
12
|
cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
|
13
13
|
cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
|
14
14
|
cocoindex/llm.py,sha256=Pv_cdnRngTLtuLU9AUmS8izIHhcKVnuBNolC33f9BDI,851
|
@@ -28,4 +28,4 @@ cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nH
|
|
28
28
|
cocoindex/typing.py,sha256=qQ0ANF3iuQDeSqipHgL2SDiiXL2reTMUN0aj4ve_T0w,13359
|
29
29
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
30
30
|
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
31
|
-
cocoindex-0.1.
|
31
|
+
cocoindex-0.1.80.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|