cocoindex 0.1.79__cp313-cp313-macosx_10_12_x86_64.whl → 0.1.81__cp313-cp313-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
cocoindex/functions.py CHANGED
@@ -116,19 +116,62 @@ class SentenceTransformerEmbedExecutor:
116
116
  def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
117
117
  """Get or load ColPali model and processor, with caching."""
118
118
  try:
119
- from colpali_engine.models import ColPali, ColPaliProcessor # type: ignore[import-untyped]
119
+ from colpali_engine.models import ( # type: ignore[import-untyped]
120
+ ColPali,
121
+ ColPaliProcessor,
122
+ ColQwen2,
123
+ ColQwen2Processor,
124
+ ColQwen2_5,
125
+ ColQwen2_5_Processor,
126
+ ColIdefics3,
127
+ ColIdefics3Processor,
128
+ )
120
129
  from colpali_engine.utils.torch_utils import get_torch_device # type: ignore[import-untyped]
121
130
  import torch
122
131
  except ImportError as e:
123
132
  raise ImportError(
124
- "ColPali is not available. Make sure cocoindex is installed with ColPali support."
133
+ "ColVision models are not available. Make sure cocoindex is installed with ColPali support."
125
134
  ) from e
126
135
 
127
136
  device = get_torch_device("auto")
128
- model = ColPali.from_pretrained(
129
- model_name, device_map=device, torch_dtype=torch.bfloat16
130
- ).eval()
131
- processor = ColPaliProcessor.from_pretrained(model_name)
137
+
138
+ # Manual model detection based on model name
139
+ model_name_lower = model_name.lower()
140
+
141
+ try:
142
+ if "qwen2.5" in model_name_lower:
143
+ model = ColQwen2_5.from_pretrained(
144
+ model_name,
145
+ torch_dtype=torch.bfloat16,
146
+ device_map=device,
147
+ ).eval()
148
+ processor = ColQwen2_5_Processor.from_pretrained(model_name)
149
+ elif "qwen2" in model_name_lower:
150
+ model = ColQwen2.from_pretrained(
151
+ model_name,
152
+ torch_dtype=torch.bfloat16,
153
+ device_map=device,
154
+ ).eval()
155
+ processor = ColQwen2Processor.from_pretrained(model_name)
156
+ elif "colsmol" in model_name_lower or "smol" in model_name_lower:
157
+ # ColSmol models use Idefics3 architecture
158
+ model = ColIdefics3.from_pretrained(
159
+ model_name,
160
+ torch_dtype=torch.bfloat16,
161
+ device_map=device,
162
+ ).eval()
163
+ processor = ColIdefics3Processor.from_pretrained(model_name)
164
+ else:
165
+ # Default to ColPali
166
+ model = ColPali.from_pretrained(
167
+ model_name,
168
+ torch_dtype=torch.bfloat16,
169
+ device_map=device,
170
+ ).eval()
171
+ processor = ColPaliProcessor.from_pretrained(model_name)
172
+
173
+ except Exception as e:
174
+ raise RuntimeError(f"Failed to load model {model_name}: {e}")
132
175
 
133
176
  # Get dimension from the actual model
134
177
  dimension = _detect_colpali_dimension(model, processor, device)
@@ -167,17 +210,25 @@ def _detect_colpali_dimension(model: Any, processor: Any, device: Any) -> int:
167
210
 
168
211
  class ColPaliEmbedImage(op.FunctionSpec):
169
212
  """
170
- `ColPaliEmbedImage` embeds images using the ColPali multimodal model.
213
+ `ColPaliEmbedImage` embeds images using ColVision multimodal models.
214
+
215
+ Supports ALL models available in the colpali-engine library, including:
216
+ - ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
217
+ - ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
218
+ - ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
219
+ - Any future ColVision models supported by colpali-engine
171
220
 
172
- ColPali (Contextual Late-interaction over Patches) uses late interaction
173
- between image patch embeddings and text token embeddings for retrieval.
221
+ These models use late interaction between image patch embeddings and text token
222
+ embeddings for retrieval.
174
223
 
175
224
  Args:
176
- model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
225
+ model: Any ColVision model name supported by colpali-engine
226
+ (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
227
+ See https://github.com/illuin-tech/colpali for the complete list of supported models.
177
228
 
178
229
  Note:
179
230
  This function requires the optional colpali-engine dependency.
180
- Install it with: pip install 'cocoindex[embeddings]'
231
+ Install it with: pip install 'cocoindex[colpali]'
181
232
  """
182
233
 
183
234
  model: str
@@ -189,7 +240,7 @@ class ColPaliEmbedImage(op.FunctionSpec):
189
240
  behavior_version=1,
190
241
  )
191
242
  class ColPaliEmbedImageExecutor:
192
- """Executor for ColPaliEmbedImage."""
243
+ """Executor for ColVision image embedding (ColPali, ColQwen2, ColSmol, etc.)."""
193
244
 
194
245
  spec: ColPaliEmbedImage
195
246
  _model_info: ColPaliModelInfo
@@ -209,7 +260,7 @@ class ColPaliEmbedImageExecutor:
209
260
  import io
210
261
  except ImportError as e:
211
262
  raise ImportError(
212
- "Required dependencies (PIL, torch) are missing for ColPali image embedding."
263
+ "Required dependencies (PIL, torch) are missing for ColVision image embedding."
213
264
  ) from e
214
265
 
215
266
  model = self._model_info.model
@@ -235,17 +286,25 @@ class ColPaliEmbedImageExecutor:
235
286
 
236
287
  class ColPaliEmbedQuery(op.FunctionSpec):
237
288
  """
238
- `ColPaliEmbedQuery` embeds text queries using the ColPali multimodal model.
289
+ `ColPaliEmbedQuery` embeds text queries using ColVision multimodal models.
290
+
291
+ Supports ALL models available in the colpali-engine library, including:
292
+ - ColPali models (colpali-*): PaliGemma-based, best for general document retrieval
293
+ - ColQwen2 models (colqwen-*): Qwen2-VL-based, excellent for multilingual text (29+ languages) and general vision
294
+ - ColSmol models (colsmol-*): Lightweight, good for resource-constrained environments
295
+ - Any future ColVision models supported by colpali-engine
239
296
 
240
- This produces query embeddings compatible with ColPali image embeddings
297
+ This produces query embeddings compatible with ColVision image embeddings
241
298
  for late interaction scoring (MaxSim).
242
299
 
243
300
  Args:
244
- model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
301
+ model: Any ColVision model name supported by colpali-engine
302
+ (e.g., "vidore/colpali-v1.2", "vidore/colqwen2.5-v0.2", "vidore/colsmol-v1.0")
303
+ See https://github.com/illuin-tech/colpali for the complete list of supported models.
245
304
 
246
305
  Note:
247
306
  This function requires the optional colpali-engine dependency.
248
- Install it with: pip install 'cocoindex[embeddings]'
307
+ Install it with: pip install 'cocoindex[colpali]'
249
308
  """
250
309
 
251
310
  model: str
@@ -257,7 +316,7 @@ class ColPaliEmbedQuery(op.FunctionSpec):
257
316
  behavior_version=1,
258
317
  )
259
318
  class ColPaliEmbedQueryExecutor:
260
- """Executor for ColPaliEmbedQuery."""
319
+ """Executor for ColVision query embedding (ColPali, ColQwen2, ColSmol, etc.)."""
261
320
 
262
321
  spec: ColPaliEmbedQuery
263
322
  _model_info: ColPaliModelInfo
@@ -275,7 +334,7 @@ class ColPaliEmbedQueryExecutor:
275
334
  import torch
276
335
  except ImportError as e:
277
336
  raise ImportError(
278
- "Required dependencies (torch) are missing for ColPali query embedding."
337
+ "Required dependencies (torch) are missing for ColVision query embedding."
279
338
  ) from e
280
339
 
281
340
  model = self._model_info.model
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.79
3
+ Version: 0.1.81
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -39,7 +39,8 @@ Project-URL: Homepage, https://cocoindex.io/
39
39
  [![Documentation](https://img.shields.io/badge/Documentation-394e79?logo=readthedocs&logoColor=00B9FF)](https://cocoindex.io/docs/getting_started/quickstart)
40
40
  [![License](https://img.shields.io/badge/license-Apache%202.0-5B5BD6?logoColor=white)](https://opensource.org/licenses/Apache-2.0)
41
41
  [![PyPI version](https://img.shields.io/pypi/v/cocoindex?color=5B5BD6)](https://pypi.org/project/cocoindex/)
42
- [![PyPI - Downloads](https://img.shields.io/pypi/dm/cocoindex)](https://pypistats.org/packages/cocoindex)
42
+ <!--[![PyPI - Downloads](https://img.shields.io/pypi/dm/cocoindex)](https://pypistats.org/packages/cocoindex) -->
43
+ [![PyPI Downloads](https://static.pepy.tech/badge/cocoindex/month)](https://pepy.tech/projects/cocoindex)
43
44
  [![CI](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml/badge.svg?event=push&color=5B5BD6)](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)
44
45
  [![release](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml/badge.svg?event=push&color=5B5BD6)](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)
45
46
  [![Discord](https://img.shields.io/discord/1314801574169673738?logo=discord&color=5B5BD6&logoColor=white)](https://discord.com/invite/zpA9S2DR7s)
@@ -216,6 +217,8 @@ It defines an index flow like this:
216
217
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
217
218
  | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
218
219
  | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
220
+ | [Patient intake form extraction](examples/patient_intake_extraction) | Use LLM to extract structured data from patient intake forms with different formats |
221
+
219
222
 
220
223
  More coming and stay tuned 👀!
221
224
 
@@ -1,14 +1,14 @@
1
- cocoindex-0.1.79.dist-info/METADATA,sha256=1RyDQg5xjdf954JY3IBfZaAe5u6SQ2x-sDcPJDlSuPQ,11799
2
- cocoindex-0.1.79.dist-info/WHEEL,sha256=NvZaK6sFPuu8Uh9tMXGKWxMrGLxBro5TQniO6rRq4wQ,106
3
- cocoindex-0.1.79.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.1.79.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ cocoindex-0.1.81.dist-info/METADATA,sha256=tUfIvHwTallyeNhZjVvy6_8CeIie0NTpDv3R-mK-oN4,12073
2
+ cocoindex-0.1.81.dist-info/WHEEL,sha256=NvZaK6sFPuu8Uh9tMXGKWxMrGLxBro5TQniO6rRq4wQ,106
3
+ cocoindex-0.1.81.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.1.81.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
5
5
  cocoindex/__init__.py,sha256=sLpSVO5Cotgn_82lawxvXnaqfa-qj33rytWBAe2MTtU,2201
6
- cocoindex/_engine.cpython-313-darwin.so,sha256=qi78J308LKsXtKS28BWayHn0BqGfhUBROFkXD35Yr8w,67755076
6
+ cocoindex/_engine.cpython-313-darwin.so,sha256=f8XDBIEc1jlL1MFcqeEeJ0hVjyxI_Cj-mzYFTfiKAug,68708372
7
7
  cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
8
8
  cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
9
9
  cocoindex/convert.py,sha256=HodeDl1HVX8nnBH02lQKarw5i3xmkjB0nGj-DXt7Ifc,18284
10
10
  cocoindex/flow.py,sha256=egKbBG2X9DjAqmcATcndyRhe9zMZHRd-YxKCpt9BsUg,36551
11
- cocoindex/functions.py,sha256=34sZWoS0zGnaKyooIODQgc6QEPZKiJoWhfb8jKIWwps,9528
11
+ cocoindex/functions.py,sha256=-8tAW7_HhSw-A7M8U_C1vUfE9jxNPJ6j2yBRJvP16Tk,12302
12
12
  cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
13
13
  cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
14
14
  cocoindex/llm.py,sha256=Pv_cdnRngTLtuLU9AUmS8izIHhcKVnuBNolC33f9BDI,851
@@ -28,4 +28,4 @@ cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nH
28
28
  cocoindex/typing.py,sha256=qQ0ANF3iuQDeSqipHgL2SDiiXL2reTMUN0aj4ve_T0w,13359
29
29
  cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
30
30
  cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
31
- cocoindex-0.1.79.dist-info/RECORD,,
31
+ cocoindex-0.1.81.dist-info/RECORD,,