cocoindex 0.1.74__cp313-cp313-manylinux_2_28_x86_64.whl → 0.1.76__cp313-cp313-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +5 -3
- cocoindex/_engine.cpython-313-x86_64-linux-gnu.so +0 -0
- cocoindex/convert.py +56 -87
- cocoindex/flow.py +27 -11
- cocoindex/functions.py +197 -0
- cocoindex/op.py +3 -2
- cocoindex/tests/test_convert.py +111 -24
- cocoindex/tests/test_transform_flow.py +103 -0
- cocoindex/typing.py +4 -4
- {cocoindex-0.1.74.dist-info → cocoindex-0.1.76.dist-info}/METADATA +7 -2
- {cocoindex-0.1.74.dist-info → cocoindex-0.1.76.dist-info}/RECORD +14 -13
- {cocoindex-0.1.74.dist-info → cocoindex-0.1.76.dist-info}/WHEEL +1 -1
- {cocoindex-0.1.74.dist-info → cocoindex-0.1.76.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.74.dist-info → cocoindex-0.1.76.dist-info}/licenses/LICENSE +0 -0
cocoindex/__init__.py
CHANGED
@@ -11,7 +11,8 @@ from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
|
|
11
11
|
from .flow import flow_def
|
12
12
|
from .flow import EvaluateAndDumpOptions, GeneratedField
|
13
13
|
from .flow import FlowLiveUpdater, FlowLiveUpdaterOptions, FlowUpdaterStatusUpdates
|
14
|
-
from .flow import
|
14
|
+
from .flow import open_flow
|
15
|
+
from .flow import add_flow_def, remove_flow # DEPRECATED
|
15
16
|
from .flow import update_all_flows_async, setup_all_flows, drop_all_flows
|
16
17
|
from .lib import init, start_server, stop
|
17
18
|
from .llm import LlmSpec, LlmApiType
|
@@ -57,8 +58,9 @@ __all__ = [
|
|
57
58
|
"FlowLiveUpdater",
|
58
59
|
"FlowLiveUpdaterOptions",
|
59
60
|
"FlowUpdaterStatusUpdates",
|
60
|
-
"
|
61
|
-
"
|
61
|
+
"open_flow",
|
62
|
+
"add_flow_def", # DEPRECATED
|
63
|
+
"remove_flow", # DEPRECATED
|
62
64
|
"update_all_flows_async",
|
63
65
|
"setup_all_flows",
|
64
66
|
"drop_all_flows",
|
Binary file
|
cocoindex/convert.py
CHANGED
@@ -95,6 +95,7 @@ def make_engine_value_decoder(
|
|
95
95
|
field_path: list[str],
|
96
96
|
src_type: dict[str, Any],
|
97
97
|
dst_type_info: AnalyzedTypeInfo,
|
98
|
+
for_key: bool = False,
|
98
99
|
) -> Callable[[Any], Any]:
|
99
100
|
"""
|
100
101
|
Make a decoder from an engine value to a Python value.
|
@@ -123,6 +124,7 @@ def make_engine_value_decoder(
|
|
123
124
|
field_path,
|
124
125
|
src_type["fields"],
|
125
126
|
dst_type_info,
|
127
|
+
for_key=for_key,
|
126
128
|
)
|
127
129
|
|
128
130
|
if src_type_kind in TABLE_TYPES:
|
@@ -131,10 +133,10 @@ def make_engine_value_decoder(
|
|
131
133
|
|
132
134
|
if src_type_kind == "LTable":
|
133
135
|
if isinstance(dst_type_variant, AnalyzedAnyType):
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
136
|
+
dst_elem_type = Any
|
137
|
+
elif isinstance(dst_type_variant, AnalyzedListType):
|
138
|
+
dst_elem_type = dst_type_variant.elem_type
|
139
|
+
else:
|
138
140
|
raise ValueError(
|
139
141
|
f"Type mismatch for `{''.join(field_path)}`: "
|
140
142
|
f"declared `{dst_type_info.core_type}`, a list type expected"
|
@@ -142,7 +144,7 @@ def make_engine_value_decoder(
|
|
142
144
|
row_decoder = make_engine_struct_decoder(
|
143
145
|
field_path,
|
144
146
|
engine_fields_schema,
|
145
|
-
analyze_type_info(
|
147
|
+
analyze_type_info(dst_elem_type),
|
146
148
|
)
|
147
149
|
|
148
150
|
def decode(value: Any) -> Any | None:
|
@@ -152,10 +154,11 @@ def make_engine_value_decoder(
|
|
152
154
|
|
153
155
|
elif src_type_kind == "KTable":
|
154
156
|
if isinstance(dst_type_variant, AnalyzedAnyType):
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
157
|
+
key_type, value_type = Any, Any
|
158
|
+
elif isinstance(dst_type_variant, AnalyzedDictType):
|
159
|
+
key_type = dst_type_variant.key_type
|
160
|
+
value_type = dst_type_variant.value_type
|
161
|
+
else:
|
159
162
|
raise ValueError(
|
160
163
|
f"Type mismatch for `{''.join(field_path)}`: "
|
161
164
|
f"declared `{dst_type_info.core_type}`, a dict type expected"
|
@@ -166,13 +169,14 @@ def make_engine_value_decoder(
|
|
166
169
|
key_decoder = make_engine_value_decoder(
|
167
170
|
field_path,
|
168
171
|
key_field_schema["type"],
|
169
|
-
analyze_type_info(
|
172
|
+
analyze_type_info(key_type),
|
173
|
+
for_key=True,
|
170
174
|
)
|
171
175
|
field_path.pop()
|
172
176
|
value_decoder = make_engine_struct_decoder(
|
173
177
|
field_path,
|
174
178
|
engine_fields_schema[1:],
|
175
|
-
analyze_type_info(
|
179
|
+
analyze_type_info(value_type),
|
176
180
|
)
|
177
181
|
|
178
182
|
def decode(value: Any) -> Any | None:
|
@@ -237,7 +241,9 @@ def make_engine_value_decoder(
|
|
237
241
|
vec_elem_decoder = make_engine_value_decoder(
|
238
242
|
field_path + ["[*]"],
|
239
243
|
src_type["element_type"],
|
240
|
-
analyze_type_info(
|
244
|
+
analyze_type_info(
|
245
|
+
dst_type_variant.elem_type if dst_type_variant else Any
|
246
|
+
),
|
241
247
|
)
|
242
248
|
|
243
249
|
def decode_vector(value: Any) -> Any | None:
|
@@ -316,26 +322,26 @@ def make_engine_struct_decoder(
|
|
316
322
|
field_path: list[str],
|
317
323
|
src_fields: list[dict[str, Any]],
|
318
324
|
dst_type_info: AnalyzedTypeInfo,
|
325
|
+
for_key: bool = False,
|
319
326
|
) -> Callable[[list[Any]], Any]:
|
320
327
|
"""Make a decoder from an engine field values to a Python value."""
|
321
328
|
|
322
329
|
dst_type_variant = dst_type_info.variant
|
323
330
|
|
324
|
-
use_dict = False
|
325
331
|
if isinstance(dst_type_variant, AnalyzedAnyType):
|
326
|
-
|
332
|
+
if for_key:
|
333
|
+
return _make_engine_struct_to_tuple_decoder(field_path, src_fields)
|
334
|
+
else:
|
335
|
+
return _make_engine_struct_to_dict_decoder(field_path, src_fields, Any)
|
327
336
|
elif isinstance(dst_type_variant, AnalyzedDictType):
|
328
337
|
analyzed_key_type = analyze_type_info(dst_type_variant.key_type)
|
329
|
-
|
330
|
-
use_dict = (
|
338
|
+
if (
|
331
339
|
isinstance(analyzed_key_type.variant, AnalyzedAnyType)
|
332
|
-
or
|
333
|
-
|
334
|
-
|
340
|
+
or analyzed_key_type.core_type is str
|
341
|
+
):
|
342
|
+
return _make_engine_struct_to_dict_decoder(
|
343
|
+
field_path, src_fields, dst_type_variant.value_type
|
335
344
|
)
|
336
|
-
) and isinstance(analyzed_value_type.variant, AnalyzedAnyType)
|
337
|
-
if use_dict:
|
338
|
-
return _make_engine_struct_to_dict_decoder(field_path, src_fields)
|
339
345
|
|
340
346
|
if not isinstance(dst_type_variant, AnalyzedStructType):
|
341
347
|
raise ValueError(
|
@@ -375,7 +381,7 @@ def make_engine_struct_decoder(
|
|
375
381
|
with ChildFieldPath(field_path, f".{name}"):
|
376
382
|
if src_idx is not None:
|
377
383
|
field_decoder = make_engine_value_decoder(
|
378
|
-
field_path, src_fields[src_idx]["type"], type_info
|
384
|
+
field_path, src_fields[src_idx]["type"], type_info, for_key=for_key
|
379
385
|
)
|
380
386
|
return lambda values: field_decoder(values[src_idx])
|
381
387
|
|
@@ -409,17 +415,19 @@ def make_engine_struct_decoder(
|
|
409
415
|
def _make_engine_struct_to_dict_decoder(
|
410
416
|
field_path: list[str],
|
411
417
|
src_fields: list[dict[str, Any]],
|
418
|
+
value_type_annotation: Any,
|
412
419
|
) -> Callable[[list[Any] | None], dict[str, Any] | None]:
|
413
420
|
"""Make a decoder from engine field values to a Python dict."""
|
414
421
|
|
415
422
|
field_decoders = []
|
416
|
-
|
423
|
+
value_type_info = analyze_type_info(value_type_annotation)
|
424
|
+
for field_schema in src_fields:
|
417
425
|
field_name = field_schema["name"]
|
418
426
|
with ChildFieldPath(field_path, f".{field_name}"):
|
419
427
|
field_decoder = make_engine_value_decoder(
|
420
428
|
field_path,
|
421
429
|
field_schema["type"],
|
422
|
-
|
430
|
+
value_type_info,
|
423
431
|
)
|
424
432
|
field_decoders.append((field_name, field_decoder))
|
425
433
|
|
@@ -438,76 +446,37 @@ def _make_engine_struct_to_dict_decoder(
|
|
438
446
|
return decode_to_dict
|
439
447
|
|
440
448
|
|
441
|
-
def
|
449
|
+
def _make_engine_struct_to_tuple_decoder(
|
442
450
|
field_path: list[str],
|
443
451
|
src_fields: list[dict[str, Any]],
|
444
|
-
) -> Callable[[list[Any] | None],
|
445
|
-
"""Make a decoder from engine
|
452
|
+
) -> Callable[[list[Any] | None], tuple[Any, ...] | None]:
|
453
|
+
"""Make a decoder from engine field values to a Python tuple."""
|
446
454
|
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
raise ValueError(
|
458
|
-
f"LTable row at index {i} decoded to None, which is not allowed."
|
455
|
+
field_decoders = []
|
456
|
+
value_type_info = analyze_type_info(Any)
|
457
|
+
for field_schema in src_fields:
|
458
|
+
field_name = field_schema["name"]
|
459
|
+
with ChildFieldPath(field_path, f".{field_name}"):
|
460
|
+
field_decoders.append(
|
461
|
+
make_engine_value_decoder(
|
462
|
+
field_path,
|
463
|
+
field_schema["type"],
|
464
|
+
value_type_info,
|
459
465
|
)
|
460
|
-
|
461
|
-
return result
|
462
|
-
|
463
|
-
return decode_to_list_dict
|
464
|
-
|
465
|
-
|
466
|
-
def _make_engine_ktable_to_dict_dict_decoder(
|
467
|
-
field_path: list[str],
|
468
|
-
src_fields: list[dict[str, Any]],
|
469
|
-
) -> Callable[[list[Any] | None], dict[Any, dict[str, Any]] | None]:
|
470
|
-
"""Make a decoder from engine KTable values to a dict of dicts."""
|
471
|
-
|
472
|
-
if not src_fields:
|
473
|
-
raise ValueError("KTable must have at least one field for the key")
|
474
|
-
|
475
|
-
# First field is the key, remaining fields are the value
|
476
|
-
key_field_schema = src_fields[0]
|
477
|
-
value_fields_schema = src_fields[1:]
|
478
|
-
|
479
|
-
# Create decoders
|
480
|
-
with ChildFieldPath(field_path, f".{key_field_schema.get('name', KEY_FIELD_NAME)}"):
|
481
|
-
key_decoder = make_engine_value_decoder(
|
482
|
-
field_path, key_field_schema["type"], analyze_type_info(Any)
|
483
|
-
)
|
484
|
-
|
485
|
-
value_decoder = _make_engine_struct_to_dict_decoder(field_path, value_fields_schema)
|
466
|
+
)
|
486
467
|
|
487
|
-
def
|
488
|
-
values: list[Any] | None,
|
489
|
-
) -> dict[Any, dict[str, Any]] | None:
|
468
|
+
def decode_to_tuple(values: list[Any] | None) -> tuple[Any, ...] | None:
|
490
469
|
if values is None:
|
491
470
|
return None
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
else:
|
500
|
-
tmp = value_decoder(row_values[1:])
|
501
|
-
if tmp is None:
|
502
|
-
value = {}
|
503
|
-
else:
|
504
|
-
value = tmp
|
505
|
-
if isinstance(key, dict):
|
506
|
-
key = tuple(key.values())
|
507
|
-
result[key] = value
|
508
|
-
return result
|
471
|
+
if len(field_decoders) != len(values):
|
472
|
+
raise ValueError(
|
473
|
+
f"Field count mismatch: expected {len(field_decoders)}, got {len(values)}"
|
474
|
+
)
|
475
|
+
return tuple(
|
476
|
+
field_decoder(value) for value, field_decoder in zip(values, field_decoders)
|
477
|
+
)
|
509
478
|
|
510
|
-
return
|
479
|
+
return decode_to_tuple
|
511
480
|
|
512
481
|
|
513
482
|
def dump_engine_object(v: Any) -> Any:
|
cocoindex/flow.py
CHANGED
@@ -798,7 +798,7 @@ class Flow:
|
|
798
798
|
The current instance is still valid after it's called.
|
799
799
|
For example, you can still call `setup()` after it, to setup the persistent backends again.
|
800
800
|
|
801
|
-
Call `
|
801
|
+
Call `close()` if you want to remove the flow from the current process.
|
802
802
|
"""
|
803
803
|
execution_context.run(self.drop_async(report_to_stdout=report_to_stdout))
|
804
804
|
|
@@ -810,6 +810,18 @@ class Flow:
|
|
810
810
|
report_to_stdout=report_to_stdout
|
811
811
|
)
|
812
812
|
|
813
|
+
def close(self) -> None:
|
814
|
+
"""
|
815
|
+
Close the flow. It will remove the flow from the current process to free up resources.
|
816
|
+
After it's called, methods of the flow should no longer be called.
|
817
|
+
|
818
|
+
This will NOT touch the persistent backends of the flow.
|
819
|
+
"""
|
820
|
+
_engine.remove_flow_context(self.full_name)
|
821
|
+
self._lazy_engine_flow = None
|
822
|
+
with _flows_lock:
|
823
|
+
del _flows[self.name]
|
824
|
+
|
813
825
|
|
814
826
|
def _create_lazy_flow(
|
815
827
|
name: str | None, fl_def: Callable[[FlowBuilder, DataScope], None]
|
@@ -845,7 +857,10 @@ def get_flow_full_name(name: str) -> str:
|
|
845
857
|
return f"{setting.get_app_namespace(trailing_delimiter='.')}{name}"
|
846
858
|
|
847
859
|
|
848
|
-
def
|
860
|
+
def open_flow(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
|
861
|
+
"""
|
862
|
+
Open a flow, with the given name and definition.
|
863
|
+
"""
|
849
864
|
with _flows_lock:
|
850
865
|
if name in _flows:
|
851
866
|
raise KeyError(f"Flow with name {name} already exists")
|
@@ -853,17 +868,18 @@ def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) ->
|
|
853
868
|
return fl
|
854
869
|
|
855
870
|
|
856
|
-
def
|
871
|
+
def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
|
857
872
|
"""
|
858
|
-
|
859
|
-
|
873
|
+
DEPRECATED: Use `open_flow()` instead.
|
874
|
+
"""
|
875
|
+
return open_flow(name, fl_def)
|
876
|
+
|
860
877
|
|
861
|
-
|
878
|
+
def remove_flow(fl: Flow) -> None:
|
862
879
|
"""
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
del _flows[fl.name]
|
880
|
+
DEPRECATED: Use `Flow.close()` instead.
|
881
|
+
"""
|
882
|
+
fl.close()
|
867
883
|
|
868
884
|
|
869
885
|
def flow_def(
|
@@ -872,7 +888,7 @@ def flow_def(
|
|
872
888
|
"""
|
873
889
|
A decorator to wrap the flow definition.
|
874
890
|
"""
|
875
|
-
return lambda fl_def:
|
891
|
+
return lambda fl_def: open_flow(name or fl_def.__name__, fl_def)
|
876
892
|
|
877
893
|
|
878
894
|
def flow_names() -> list[str]:
|
cocoindex/functions.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""All builtin functions."""
|
2
2
|
|
3
3
|
import dataclasses
|
4
|
+
import functools
|
4
5
|
from typing import Annotated, Any, Literal
|
5
6
|
|
6
7
|
import numpy as np
|
@@ -23,6 +24,16 @@ class CustomLanguageSpec:
|
|
23
24
|
aliases: list[str] = dataclasses.field(default_factory=list)
|
24
25
|
|
25
26
|
|
27
|
+
@dataclasses.dataclass
|
28
|
+
class ColPaliModelInfo:
|
29
|
+
"""Data structure for ColPali model and processor."""
|
30
|
+
|
31
|
+
model: Any
|
32
|
+
processor: Any
|
33
|
+
dimension: int
|
34
|
+
device: Any
|
35
|
+
|
36
|
+
|
26
37
|
class SplitRecursively(op.FunctionSpec):
|
27
38
|
"""Split a document (in string) recursively."""
|
28
39
|
|
@@ -99,3 +110,189 @@ class SentenceTransformerEmbedExecutor:
|
|
99
110
|
assert self._model is not None
|
100
111
|
result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
|
101
112
|
return result
|
113
|
+
|
114
|
+
|
115
|
+
@functools.cache
|
116
|
+
def _get_colpali_model_and_processor(model_name: str) -> ColPaliModelInfo:
|
117
|
+
"""Get or load ColPali model and processor, with caching."""
|
118
|
+
try:
|
119
|
+
from colpali_engine.models import ColPali, ColPaliProcessor # type: ignore[import-untyped]
|
120
|
+
from colpali_engine.utils.torch_utils import get_torch_device # type: ignore[import-untyped]
|
121
|
+
import torch
|
122
|
+
except ImportError as e:
|
123
|
+
raise ImportError(
|
124
|
+
"ColPali is not available. Make sure cocoindex is installed with ColPali support."
|
125
|
+
) from e
|
126
|
+
|
127
|
+
device = get_torch_device("auto")
|
128
|
+
model = ColPali.from_pretrained(
|
129
|
+
model_name, device_map=device, torch_dtype=torch.bfloat16
|
130
|
+
).eval()
|
131
|
+
processor = ColPaliProcessor.from_pretrained(model_name)
|
132
|
+
|
133
|
+
# Get dimension from the actual model
|
134
|
+
dimension = _detect_colpali_dimension(model, processor, device)
|
135
|
+
|
136
|
+
return ColPaliModelInfo(
|
137
|
+
model=model,
|
138
|
+
processor=processor,
|
139
|
+
dimension=dimension,
|
140
|
+
device=device,
|
141
|
+
)
|
142
|
+
|
143
|
+
|
144
|
+
def _detect_colpali_dimension(model: Any, processor: Any, device: Any) -> int:
|
145
|
+
"""Detect ColPali embedding dimension from the actual model config."""
|
146
|
+
# Try to access embedding dimension
|
147
|
+
if hasattr(model.config, "embedding_dim"):
|
148
|
+
dim = model.config.embedding_dim
|
149
|
+
else:
|
150
|
+
# Fallback: infer from output shape with dummy data
|
151
|
+
from PIL import Image
|
152
|
+
import numpy as np
|
153
|
+
import torch
|
154
|
+
|
155
|
+
dummy_img = Image.fromarray(np.zeros((224, 224, 3), np.uint8))
|
156
|
+
# Use the processor to process the dummy image
|
157
|
+
processed = processor.process_images([dummy_img]).to(device)
|
158
|
+
with torch.no_grad():
|
159
|
+
output = model(**processed)
|
160
|
+
dim = int(output.shape[-1])
|
161
|
+
if isinstance(dim, int):
|
162
|
+
return dim
|
163
|
+
else:
|
164
|
+
raise ValueError(f"Expected integer dimension, got {type(dim)}: {dim}")
|
165
|
+
return dim
|
166
|
+
|
167
|
+
|
168
|
+
class ColPaliEmbedImage(op.FunctionSpec):
|
169
|
+
"""
|
170
|
+
`ColPaliEmbedImage` embeds images using the ColPali multimodal model.
|
171
|
+
|
172
|
+
ColPali (Contextual Late-interaction over Patches) uses late interaction
|
173
|
+
between image patch embeddings and text token embeddings for retrieval.
|
174
|
+
|
175
|
+
Args:
|
176
|
+
model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
|
177
|
+
|
178
|
+
Note:
|
179
|
+
This function requires the optional colpali-engine dependency.
|
180
|
+
Install it with: pip install 'cocoindex[embeddings]'
|
181
|
+
"""
|
182
|
+
|
183
|
+
model: str
|
184
|
+
|
185
|
+
|
186
|
+
@op.executor_class(
|
187
|
+
gpu=True,
|
188
|
+
cache=True,
|
189
|
+
behavior_version=1,
|
190
|
+
)
|
191
|
+
class ColPaliEmbedImageExecutor:
|
192
|
+
"""Executor for ColPaliEmbedImage."""
|
193
|
+
|
194
|
+
spec: ColPaliEmbedImage
|
195
|
+
_model_info: ColPaliModelInfo
|
196
|
+
|
197
|
+
def analyze(self, _img_bytes: Any) -> type:
|
198
|
+
# Get shared model and dimension
|
199
|
+
self._model_info = _get_colpali_model_and_processor(self.spec.model)
|
200
|
+
|
201
|
+
# Return multi-vector type: Variable patches x Fixed hidden dimension
|
202
|
+
dimension = self._model_info.dimension
|
203
|
+
return Vector[Vector[np.float32, Literal[dimension]]] # type: ignore
|
204
|
+
|
205
|
+
def __call__(self, img_bytes: bytes) -> Any:
|
206
|
+
try:
|
207
|
+
from PIL import Image
|
208
|
+
import torch
|
209
|
+
import io
|
210
|
+
except ImportError as e:
|
211
|
+
raise ImportError(
|
212
|
+
"Required dependencies (PIL, torch) are missing for ColPali image embedding."
|
213
|
+
) from e
|
214
|
+
|
215
|
+
model = self._model_info.model
|
216
|
+
processor = self._model_info.processor
|
217
|
+
device = self._model_info.device
|
218
|
+
|
219
|
+
pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
220
|
+
inputs = processor.process_images([pil_image]).to(device)
|
221
|
+
with torch.no_grad():
|
222
|
+
embeddings = model(**inputs)
|
223
|
+
|
224
|
+
# Return multi-vector format: [patches, hidden_dim]
|
225
|
+
if len(embeddings.shape) != 3:
|
226
|
+
raise ValueError(
|
227
|
+
f"Expected 3D tensor [batch, patches, hidden_dim], got shape {embeddings.shape}"
|
228
|
+
)
|
229
|
+
|
230
|
+
# Keep patch-level embeddings: [batch, patches, hidden_dim] -> [patches, hidden_dim]
|
231
|
+
patch_embeddings = embeddings[0] # Remove batch dimension
|
232
|
+
|
233
|
+
return patch_embeddings.cpu().to(torch.float32).numpy()
|
234
|
+
|
235
|
+
|
236
|
+
class ColPaliEmbedQuery(op.FunctionSpec):
|
237
|
+
"""
|
238
|
+
`ColPaliEmbedQuery` embeds text queries using the ColPali multimodal model.
|
239
|
+
|
240
|
+
This produces query embeddings compatible with ColPali image embeddings
|
241
|
+
for late interaction scoring (MaxSim).
|
242
|
+
|
243
|
+
Args:
|
244
|
+
model: The ColPali model name to use (e.g., "vidore/colpali-v1.2")
|
245
|
+
|
246
|
+
Note:
|
247
|
+
This function requires the optional colpali-engine dependency.
|
248
|
+
Install it with: pip install 'cocoindex[embeddings]'
|
249
|
+
"""
|
250
|
+
|
251
|
+
model: str
|
252
|
+
|
253
|
+
|
254
|
+
@op.executor_class(
|
255
|
+
gpu=True,
|
256
|
+
cache=True,
|
257
|
+
behavior_version=1,
|
258
|
+
)
|
259
|
+
class ColPaliEmbedQueryExecutor:
|
260
|
+
"""Executor for ColPaliEmbedQuery."""
|
261
|
+
|
262
|
+
spec: ColPaliEmbedQuery
|
263
|
+
_model_info: ColPaliModelInfo
|
264
|
+
|
265
|
+
def analyze(self, _query: Any) -> type:
|
266
|
+
# Get shared model and dimension
|
267
|
+
self._model_info = _get_colpali_model_and_processor(self.spec.model)
|
268
|
+
|
269
|
+
# Return multi-vector type: Variable tokens x Fixed hidden dimension
|
270
|
+
dimension = self._model_info.dimension
|
271
|
+
return Vector[Vector[np.float32, Literal[dimension]]] # type: ignore
|
272
|
+
|
273
|
+
def __call__(self, query: str) -> Any:
|
274
|
+
try:
|
275
|
+
import torch
|
276
|
+
except ImportError as e:
|
277
|
+
raise ImportError(
|
278
|
+
"Required dependencies (torch) are missing for ColPali query embedding."
|
279
|
+
) from e
|
280
|
+
|
281
|
+
model = self._model_info.model
|
282
|
+
processor = self._model_info.processor
|
283
|
+
device = self._model_info.device
|
284
|
+
|
285
|
+
inputs = processor.process_queries([query]).to(device)
|
286
|
+
with torch.no_grad():
|
287
|
+
embeddings = model(**inputs)
|
288
|
+
|
289
|
+
# Return multi-vector format: [tokens, hidden_dim]
|
290
|
+
if len(embeddings.shape) != 3:
|
291
|
+
raise ValueError(
|
292
|
+
f"Expected 3D tensor [batch, tokens, hidden_dim], got shape {embeddings.shape}"
|
293
|
+
)
|
294
|
+
|
295
|
+
# Keep token-level embeddings: [batch, tokens, hidden_dim] -> [tokens, hidden_dim]
|
296
|
+
token_embeddings = embeddings[0] # Remove batch dimension
|
297
|
+
|
298
|
+
return token_embeddings.cpu().to(torch.float32).numpy()
|
cocoindex/op.py
CHANGED
@@ -505,7 +505,7 @@ class _TargetConnector:
|
|
505
505
|
self._mutatation_type.value_type,
|
506
506
|
)
|
507
507
|
if self._mutatation_type is not None
|
508
|
-
else (
|
508
|
+
else (Any, Any)
|
509
509
|
)
|
510
510
|
|
511
511
|
key_type_info = analyze_type_info(key_annotation)
|
@@ -519,10 +519,11 @@ class _TargetConnector:
|
|
519
519
|
["(key)"],
|
520
520
|
key_fields_schema[0]["type"],
|
521
521
|
key_type_info,
|
522
|
+
for_key=True,
|
522
523
|
)
|
523
524
|
else:
|
524
525
|
key_decoder = make_engine_struct_decoder(
|
525
|
-
["(key)"], key_fields_schema, key_type_info
|
526
|
+
["(key)"], key_fields_schema, key_type_info, for_key=True
|
526
527
|
)
|
527
528
|
|
528
529
|
value_decoder = make_engine_struct_decoder(
|
cocoindex/tests/test_convert.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import datetime
|
2
|
+
import inspect
|
2
3
|
import uuid
|
3
4
|
from dataclasses import dataclass, make_dataclass, field
|
4
5
|
from typing import Annotated, Any, Callable, Literal, NamedTuple
|
@@ -236,19 +237,24 @@ def test_encode_engine_value_none() -> None:
|
|
236
237
|
|
237
238
|
|
238
239
|
def test_roundtrip_basic_types() -> None:
|
239
|
-
validate_full_roundtrip(
|
240
|
+
validate_full_roundtrip(
|
241
|
+
b"hello world",
|
242
|
+
bytes,
|
243
|
+
(b"hello world", inspect.Parameter.empty),
|
244
|
+
(b"hello world", Any),
|
245
|
+
)
|
240
246
|
validate_full_roundtrip(b"\x00\x01\x02\xff\xfe", bytes)
|
241
|
-
validate_full_roundtrip("hello", str, ("hello",
|
242
|
-
validate_full_roundtrip(True, bool, (True,
|
243
|
-
validate_full_roundtrip(False, bool, (False,
|
247
|
+
validate_full_roundtrip("hello", str, ("hello", Any))
|
248
|
+
validate_full_roundtrip(True, bool, (True, Any))
|
249
|
+
validate_full_roundtrip(False, bool, (False, Any))
|
244
250
|
validate_full_roundtrip(
|
245
|
-
42, cocoindex.Int64, (42, int), (np.int64(42), np.int64), (42,
|
251
|
+
42, cocoindex.Int64, (42, int), (np.int64(42), np.int64), (42, Any)
|
246
252
|
)
|
247
253
|
validate_full_roundtrip(42, int, (42, cocoindex.Int64))
|
248
254
|
validate_full_roundtrip(np.int64(42), np.int64, (42, cocoindex.Int64))
|
249
255
|
|
250
256
|
validate_full_roundtrip(
|
251
|
-
3.25, Float64, (3.25, float), (np.float64(3.25), np.float64), (3.25,
|
257
|
+
3.25, Float64, (3.25, float), (np.float64(3.25), np.float64), (3.25, Any)
|
252
258
|
)
|
253
259
|
validate_full_roundtrip(3.25, float, (3.25, Float64))
|
254
260
|
validate_full_roundtrip(np.float64(3.25), np.float64, (3.25, Float64))
|
@@ -260,35 +266,35 @@ def test_roundtrip_basic_types() -> None:
|
|
260
266
|
(np.float32(3.25), np.float32),
|
261
267
|
(np.float64(3.25), np.float64),
|
262
268
|
(3.25, Float64),
|
263
|
-
(3.25,
|
269
|
+
(3.25, Any),
|
264
270
|
)
|
265
271
|
validate_full_roundtrip(np.float32(3.25), np.float32, (3.25, Float32))
|
266
272
|
|
267
273
|
|
268
274
|
def test_roundtrip_uuid() -> None:
|
269
275
|
uuid_value = uuid.uuid4()
|
270
|
-
validate_full_roundtrip(uuid_value, uuid.UUID, (uuid_value,
|
276
|
+
validate_full_roundtrip(uuid_value, uuid.UUID, (uuid_value, Any))
|
271
277
|
|
272
278
|
|
273
279
|
def test_roundtrip_range() -> None:
|
274
280
|
r1 = (0, 100)
|
275
|
-
validate_full_roundtrip(r1, cocoindex.Range, (r1,
|
281
|
+
validate_full_roundtrip(r1, cocoindex.Range, (r1, Any))
|
276
282
|
r2 = (50, 50)
|
277
|
-
validate_full_roundtrip(r2, cocoindex.Range, (r2,
|
283
|
+
validate_full_roundtrip(r2, cocoindex.Range, (r2, Any))
|
278
284
|
r3 = (0, 1_000_000_000)
|
279
|
-
validate_full_roundtrip(r3, cocoindex.Range, (r3,
|
285
|
+
validate_full_roundtrip(r3, cocoindex.Range, (r3, Any))
|
280
286
|
|
281
287
|
|
282
288
|
def test_roundtrip_time() -> None:
|
283
289
|
t1 = datetime.time(10, 30, 50, 123456)
|
284
|
-
validate_full_roundtrip(t1, datetime.time, (t1,
|
290
|
+
validate_full_roundtrip(t1, datetime.time, (t1, Any))
|
285
291
|
t2 = datetime.time(23, 59, 59)
|
286
|
-
validate_full_roundtrip(t2, datetime.time, (t2,
|
292
|
+
validate_full_roundtrip(t2, datetime.time, (t2, Any))
|
287
293
|
t3 = datetime.time(0, 0, 0)
|
288
|
-
validate_full_roundtrip(t3, datetime.time, (t3,
|
294
|
+
validate_full_roundtrip(t3, datetime.time, (t3, Any))
|
289
295
|
|
290
296
|
validate_full_roundtrip(
|
291
|
-
datetime.date(2025, 1, 1), datetime.date, (datetime.date(2025, 1, 1),
|
297
|
+
datetime.date(2025, 1, 1), datetime.date, (datetime.date(2025, 1, 1), Any)
|
292
298
|
)
|
293
299
|
|
294
300
|
validate_full_roundtrip(
|
@@ -333,11 +339,11 @@ def test_roundtrip_timedelta() -> None:
|
|
333
339
|
td1 = datetime.timedelta(
|
334
340
|
days=5, seconds=10, microseconds=123, milliseconds=456, minutes=30, hours=2
|
335
341
|
)
|
336
|
-
validate_full_roundtrip(td1, datetime.timedelta, (td1,
|
342
|
+
validate_full_roundtrip(td1, datetime.timedelta, (td1, Any))
|
337
343
|
td2 = datetime.timedelta(days=-5, hours=-2)
|
338
|
-
validate_full_roundtrip(td2, datetime.timedelta, (td2,
|
344
|
+
validate_full_roundtrip(td2, datetime.timedelta, (td2, Any))
|
339
345
|
td3 = datetime.timedelta(0)
|
340
|
-
validate_full_roundtrip(td3, datetime.timedelta, (td3,
|
346
|
+
validate_full_roundtrip(td3, datetime.timedelta, (td3, Any))
|
341
347
|
|
342
348
|
|
343
349
|
def test_roundtrip_json() -> None:
|
@@ -1160,6 +1166,37 @@ def test_full_roundtrip_scalar_with_python_types() -> None:
|
|
1160
1166
|
validate_full_roundtrip(instance, MixedStruct)
|
1161
1167
|
|
1162
1168
|
|
1169
|
+
def test_roundtrip_simple_struct_to_dict_binding() -> None:
|
1170
|
+
"""Test struct -> dict binding with Any annotation."""
|
1171
|
+
|
1172
|
+
@dataclass
|
1173
|
+
class SimpleStruct:
|
1174
|
+
first_name: str
|
1175
|
+
last_name: str
|
1176
|
+
|
1177
|
+
instance = SimpleStruct("John", "Doe")
|
1178
|
+
expected_dict = {"first_name": "John", "last_name": "Doe"}
|
1179
|
+
|
1180
|
+
# Test Any annotation
|
1181
|
+
validate_full_roundtrip(
|
1182
|
+
instance,
|
1183
|
+
SimpleStruct,
|
1184
|
+
(expected_dict, Any),
|
1185
|
+
(expected_dict, dict),
|
1186
|
+
(expected_dict, dict[Any, Any]),
|
1187
|
+
(expected_dict, dict[str, Any]),
|
1188
|
+
# For simple struct, all fields have the same type, so we can directly use the type as the dict value type.
|
1189
|
+
(expected_dict, dict[Any, str]),
|
1190
|
+
(expected_dict, dict[str, str]),
|
1191
|
+
)
|
1192
|
+
|
1193
|
+
with pytest.raises(ValueError):
|
1194
|
+
validate_full_roundtrip(instance, SimpleStruct, (expected_dict, dict[str, int]))
|
1195
|
+
|
1196
|
+
with pytest.raises(ValueError):
|
1197
|
+
validate_full_roundtrip(instance, SimpleStruct, (expected_dict, dict[int, Any]))
|
1198
|
+
|
1199
|
+
|
1163
1200
|
def test_roundtrip_struct_to_dict_binding() -> None:
|
1164
1201
|
"""Test struct -> dict binding with Any annotation."""
|
1165
1202
|
|
@@ -1173,7 +1210,20 @@ def test_roundtrip_struct_to_dict_binding() -> None:
|
|
1173
1210
|
expected_dict = {"name": "test", "value": 42, "price": 3.14}
|
1174
1211
|
|
1175
1212
|
# Test Any annotation
|
1176
|
-
validate_full_roundtrip(
|
1213
|
+
validate_full_roundtrip(
|
1214
|
+
instance,
|
1215
|
+
SimpleStruct,
|
1216
|
+
(expected_dict, Any),
|
1217
|
+
(expected_dict, dict),
|
1218
|
+
(expected_dict, dict[Any, Any]),
|
1219
|
+
(expected_dict, dict[str, Any]),
|
1220
|
+
)
|
1221
|
+
|
1222
|
+
with pytest.raises(ValueError):
|
1223
|
+
validate_full_roundtrip(instance, SimpleStruct, (expected_dict, dict[str, str]))
|
1224
|
+
|
1225
|
+
with pytest.raises(ValueError):
|
1226
|
+
validate_full_roundtrip(instance, SimpleStruct, (expected_dict, dict[int, Any]))
|
1177
1227
|
|
1178
1228
|
|
1179
1229
|
def test_roundtrip_struct_to_dict_explicit() -> None:
|
@@ -1207,8 +1257,8 @@ def test_roundtrip_struct_to_dict_with_none_annotation() -> None:
|
|
1207
1257
|
instance = Config("localhost", 8080, True)
|
1208
1258
|
expected_dict = {"host": "localhost", "port": 8080, "debug": True}
|
1209
1259
|
|
1210
|
-
# Test
|
1211
|
-
validate_full_roundtrip(instance, Config, (expected_dict,
|
1260
|
+
# Test empty annotation (should be treated as Any)
|
1261
|
+
validate_full_roundtrip(instance, Config, (expected_dict, inspect.Parameter.empty))
|
1212
1262
|
|
1213
1263
|
|
1214
1264
|
def test_roundtrip_struct_to_dict_nested() -> None:
|
@@ -1289,7 +1339,13 @@ def test_roundtrip_ltable_to_list_dict_binding() -> None:
|
|
1289
1339
|
]
|
1290
1340
|
|
1291
1341
|
# Test Any annotation
|
1292
|
-
validate_full_roundtrip(
|
1342
|
+
validate_full_roundtrip(
|
1343
|
+
users,
|
1344
|
+
list[User],
|
1345
|
+
(expected_list_dict, Any),
|
1346
|
+
(expected_list_dict, list[Any]),
|
1347
|
+
(expected_list_dict, list[dict[str, Any]]),
|
1348
|
+
)
|
1293
1349
|
|
1294
1350
|
|
1295
1351
|
def test_roundtrip_ktable_to_dict_dict_binding() -> None:
|
@@ -1313,7 +1369,17 @@ def test_roundtrip_ktable_to_dict_dict_binding() -> None:
|
|
1313
1369
|
}
|
1314
1370
|
|
1315
1371
|
# Test Any annotation
|
1316
|
-
validate_full_roundtrip(
|
1372
|
+
validate_full_roundtrip(
|
1373
|
+
products,
|
1374
|
+
dict[str, Product],
|
1375
|
+
(expected_dict_dict, Any),
|
1376
|
+
(expected_dict_dict, dict),
|
1377
|
+
(expected_dict_dict, dict[Any, Any]),
|
1378
|
+
(expected_dict_dict, dict[str, Any]),
|
1379
|
+
(expected_dict_dict, dict[Any, dict[Any, Any]]),
|
1380
|
+
(expected_dict_dict, dict[str, dict[Any, Any]]),
|
1381
|
+
(expected_dict_dict, dict[str, dict[str, Any]]),
|
1382
|
+
)
|
1317
1383
|
|
1318
1384
|
|
1319
1385
|
def test_roundtrip_ktable_with_complex_key() -> None:
|
@@ -1339,7 +1405,28 @@ def test_roundtrip_ktable_with_complex_key() -> None:
|
|
1339
1405
|
}
|
1340
1406
|
|
1341
1407
|
# Test Any annotation
|
1342
|
-
validate_full_roundtrip(
|
1408
|
+
validate_full_roundtrip(
|
1409
|
+
orders,
|
1410
|
+
dict[OrderKey, Order],
|
1411
|
+
(expected_dict_dict, Any),
|
1412
|
+
(expected_dict_dict, dict),
|
1413
|
+
(expected_dict_dict, dict[Any, Any]),
|
1414
|
+
(expected_dict_dict, dict[Any, dict[str, Any]]),
|
1415
|
+
(
|
1416
|
+
{
|
1417
|
+
("shop1", 1): Order("Alice", 100.0),
|
1418
|
+
("shop2", 2): Order("Bob", 200.0),
|
1419
|
+
},
|
1420
|
+
dict[Any, Order],
|
1421
|
+
),
|
1422
|
+
(
|
1423
|
+
{
|
1424
|
+
OrderKey("shop1", 1): {"customer": "Alice", "total": 100.0},
|
1425
|
+
OrderKey("shop2", 2): {"customer": "Bob", "total": 200.0},
|
1426
|
+
},
|
1427
|
+
dict[OrderKey, Any],
|
1428
|
+
),
|
1429
|
+
)
|
1343
1430
|
|
1344
1431
|
|
1345
1432
|
def test_roundtrip_ltable_with_nested_structs() -> None:
|
@@ -0,0 +1,103 @@
|
|
1
|
+
import typing
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
|
7
|
+
import cocoindex
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class Child:
|
12
|
+
value: int
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class Parent:
|
17
|
+
children: list[Child]
|
18
|
+
|
19
|
+
|
20
|
+
# Fixture to initialize CocoIndex library
|
21
|
+
@pytest.fixture(scope="session", autouse=True)
|
22
|
+
def init_cocoindex() -> typing.Generator[None, None, None]:
|
23
|
+
cocoindex.init()
|
24
|
+
yield
|
25
|
+
|
26
|
+
|
27
|
+
@cocoindex.op.function()
|
28
|
+
def add_suffix(text: str) -> str:
|
29
|
+
"""Append ' world' to the input text."""
|
30
|
+
return f"{text} world"
|
31
|
+
|
32
|
+
|
33
|
+
@cocoindex.transform_flow()
|
34
|
+
def simple_transform(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[str]:
|
35
|
+
"""Transform flow that applies add_suffix to input text."""
|
36
|
+
return text.transform(add_suffix)
|
37
|
+
|
38
|
+
|
39
|
+
@cocoindex.op.function()
|
40
|
+
def extract_value(value: int) -> int:
|
41
|
+
"""Extracts the value."""
|
42
|
+
return value
|
43
|
+
|
44
|
+
|
45
|
+
@cocoindex.transform_flow()
|
46
|
+
def for_each_transform(
|
47
|
+
data: cocoindex.DataSlice[Parent],
|
48
|
+
) -> cocoindex.DataSlice[Any]:
|
49
|
+
"""Transform flow that processes child rows to extract values."""
|
50
|
+
with data["children"].row() as child:
|
51
|
+
child["new_field"] = child["value"].transform(extract_value)
|
52
|
+
return data
|
53
|
+
|
54
|
+
|
55
|
+
def test_simple_transform_flow() -> None:
|
56
|
+
"""Test the simple transform flow."""
|
57
|
+
input_text = "hello"
|
58
|
+
result = simple_transform.eval(input_text)
|
59
|
+
assert result == "hello world", f"Expected 'hello world', got {result}"
|
60
|
+
|
61
|
+
result = simple_transform.eval("")
|
62
|
+
assert result == " world", f"Expected ' world', got {result}"
|
63
|
+
|
64
|
+
|
65
|
+
@pytest.mark.asyncio
|
66
|
+
async def test_simple_transform_flow_async() -> None:
|
67
|
+
"""Test the simple transform flow asynchronously."""
|
68
|
+
input_text = "async"
|
69
|
+
result = await simple_transform.eval_async(input_text)
|
70
|
+
assert result == "async world", f"Expected 'async world', got {result}"
|
71
|
+
|
72
|
+
|
73
|
+
def test_for_each_transform_flow() -> None:
|
74
|
+
"""Test the complex transform flow with child rows."""
|
75
|
+
input_data = Parent(children=[Child(1), Child(2), Child(3)])
|
76
|
+
result = for_each_transform.eval(input_data)
|
77
|
+
expected = {
|
78
|
+
"children": [
|
79
|
+
{"value": 1, "new_field": 1},
|
80
|
+
{"value": 2, "new_field": 2},
|
81
|
+
{"value": 3, "new_field": 3},
|
82
|
+
]
|
83
|
+
}
|
84
|
+
assert result == expected, f"Expected {expected}, got {result}"
|
85
|
+
|
86
|
+
input_data = Parent(children=[])
|
87
|
+
result = for_each_transform.eval(input_data)
|
88
|
+
assert result == {"children": []}, f"Expected {{'children': []}}, got {result}"
|
89
|
+
|
90
|
+
|
91
|
+
@pytest.mark.asyncio
|
92
|
+
async def test_for_each_transform_flow_async() -> None:
|
93
|
+
"""Test the complex transform flow asynchronously."""
|
94
|
+
input_data = Parent(children=[Child(4), Child(5)])
|
95
|
+
result = await for_each_transform.eval_async(input_data)
|
96
|
+
expected = {
|
97
|
+
"children": [
|
98
|
+
{"value": 4, "new_field": 4},
|
99
|
+
{"value": 5, "new_field": 5},
|
100
|
+
]
|
101
|
+
}
|
102
|
+
|
103
|
+
assert result == expected, f"Expected {expected}, got {result}"
|
cocoindex/typing.py
CHANGED
@@ -262,7 +262,7 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
262
262
|
|
263
263
|
if kind is not None:
|
264
264
|
variant = AnalyzedBasicType(kind=kind)
|
265
|
-
elif base_type is
|
265
|
+
elif base_type is Any or base_type is inspect.Parameter.empty:
|
266
266
|
variant = AnalyzedAnyType()
|
267
267
|
elif is_struct_type(base_type):
|
268
268
|
variant = AnalyzedStructType(struct_type=t)
|
@@ -270,15 +270,15 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
270
270
|
kind = DtypeRegistry.validate_dtype_and_get_kind(t)
|
271
271
|
variant = AnalyzedBasicType(kind=kind)
|
272
272
|
elif base_type is collections.abc.Sequence or base_type is list:
|
273
|
-
elem_type = type_args[0] if len(type_args) > 0 else
|
273
|
+
elem_type = type_args[0] if len(type_args) > 0 else Any
|
274
274
|
variant = AnalyzedListType(elem_type=elem_type, vector_info=vector_info)
|
275
275
|
elif base_type is np.ndarray:
|
276
276
|
np_number_type = t
|
277
277
|
elem_type = extract_ndarray_elem_dtype(np_number_type)
|
278
278
|
variant = AnalyzedListType(elem_type=elem_type, vector_info=vector_info)
|
279
279
|
elif base_type is collections.abc.Mapping or base_type is dict or t is dict:
|
280
|
-
key_type = type_args[0] if len(type_args) > 0 else
|
281
|
-
elem_type = type_args[1] if len(type_args) > 1 else
|
280
|
+
key_type = type_args[0] if len(type_args) > 0 else Any
|
281
|
+
elem_type = type_args[1] if len(type_args) > 1 else Any
|
282
282
|
variant = AnalyzedDictType(key_type=key_type, value_type=elem_type)
|
283
283
|
elif base_type in (types.UnionType, typing.Union):
|
284
284
|
non_none_types = [arg for arg in type_args if arg not in (None, types.NoneType)]
|
@@ -1,24 +1,28 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.76
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
7
7
|
Requires-Dist: watchfiles>=1.1.0
|
8
8
|
Requires-Dist: numpy>=1.23.2
|
9
9
|
Requires-Dist: pytest ; extra == 'dev'
|
10
|
+
Requires-Dist: pytest-asyncio ; extra == 'dev'
|
10
11
|
Requires-Dist: ruff ; extra == 'dev'
|
11
12
|
Requires-Dist: mypy ; extra == 'dev'
|
12
13
|
Requires-Dist: pre-commit ; extra == 'dev'
|
13
14
|
Requires-Dist: sentence-transformers>=3.3.1 ; extra == 'embeddings'
|
15
|
+
Requires-Dist: colpali-engine ; extra == 'colpali'
|
14
16
|
Requires-Dist: sentence-transformers>=3.3.1 ; extra == 'all'
|
17
|
+
Requires-Dist: colpali-engine ; extra == 'all'
|
15
18
|
Provides-Extra: dev
|
16
19
|
Provides-Extra: embeddings
|
20
|
+
Provides-Extra: colpali
|
17
21
|
Provides-Extra: all
|
18
22
|
License-File: LICENSE
|
19
23
|
Summary: With CocoIndex, users declare the transformation, CocoIndex creates & maintains an index, and keeps the derived index up to date based on source update, with minimal computation and changes.
|
20
24
|
Author-email: CocoIndex <cocoindex.io@gmail.com>
|
21
|
-
License: Apache-2.0
|
25
|
+
License-Expression: Apache-2.0
|
22
26
|
Requires-Python: >=3.11
|
23
27
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
24
28
|
Project-URL: Homepage, https://cocoindex.io/
|
@@ -210,6 +214,7 @@ It defines an index flow like this:
|
|
210
214
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
211
215
|
| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
|
212
216
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
217
|
+
| [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
|
213
218
|
|
214
219
|
More coming and stay tuned 👀!
|
215
220
|
|
@@ -1,18 +1,18 @@
|
|
1
|
-
cocoindex-0.1.
|
2
|
-
cocoindex-0.1.
|
3
|
-
cocoindex-0.1.
|
4
|
-
cocoindex-0.1.
|
5
|
-
cocoindex/__init__.py,sha256=
|
6
|
-
cocoindex/_engine.cpython-313-x86_64-linux-gnu.so,sha256=
|
1
|
+
cocoindex-0.1.76.dist-info/METADATA,sha256=w_VMnPWkx5iMgpwgAWTB3KRxSfcGaU1sVES6-jXiAjQ,11655
|
2
|
+
cocoindex-0.1.76.dist-info/WHEEL,sha256=9Ee4MwqZpMDLH1_kZE8rvruLKRVRs9cmbXRSBB0h-_M,108
|
3
|
+
cocoindex-0.1.76.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.1.76.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
5
|
+
cocoindex/__init__.py,sha256=sLpSVO5Cotgn_82lawxvXnaqfa-qj33rytWBAe2MTtU,2201
|
6
|
+
cocoindex/_engine.cpython-313-x86_64-linux-gnu.so,sha256=sIEYde0TfFKjUS1vPsht_7DPZjbvlApMxb_U2CbiIJw,71114160
|
7
7
|
cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
|
8
8
|
cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
|
9
|
-
cocoindex/convert.py,sha256=
|
10
|
-
cocoindex/flow.py,sha256=
|
11
|
-
cocoindex/functions.py,sha256=
|
9
|
+
cocoindex/convert.py,sha256=HodeDl1HVX8nnBH02lQKarw5i3xmkjB0nGj-DXt7Ifc,18284
|
10
|
+
cocoindex/flow.py,sha256=egKbBG2X9DjAqmcATcndyRhe9zMZHRd-YxKCpt9BsUg,36551
|
11
|
+
cocoindex/functions.py,sha256=34sZWoS0zGnaKyooIODQgc6QEPZKiJoWhfb8jKIWwps,9528
|
12
12
|
cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
|
13
13
|
cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
|
14
14
|
cocoindex/llm.py,sha256=WxmWUbNcf9HOCM5xkbDeFs9lF67M3mr810B7deDDc-8,673
|
15
|
-
cocoindex/op.py,sha256=
|
15
|
+
cocoindex/op.py,sha256=oiG1rjxz6ad1jGS7DMya4NStrA_6LV3RbcVSR75XUl0,21516
|
16
16
|
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
cocoindex/runtime.py,sha256=povilB3HH3y1JF-yxKwU-pD8n2WnAqyQxIgvXXHNc60,1080
|
18
18
|
cocoindex/setting.py,sha256=TwhQ6pEeZmvc8ZXlnT9d8Wn8Vz_u7Z5LJUkGsKmKSno,4859
|
@@ -20,11 +20,12 @@ cocoindex/setup.py,sha256=7uIHKN4FOCuoidPXcKyGTrkqpkl9luL49-6UcnMxYzw,3068
|
|
20
20
|
cocoindex/sources.py,sha256=69COA4qbZDipzGYfXv-WJSmicFkA509xIShRGDh6A0A,2083
|
21
21
|
cocoindex/targets.py,sha256=Nfh_tpFd1goTnS_cxBjIs4j9zl3Z4Z1JomAQ1dl3Sic,2796
|
22
22
|
cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
cocoindex/tests/test_convert.py,sha256=
|
23
|
+
cocoindex/tests/test_convert.py,sha256=l7LqD7duV9-xkYTaKOsEPdqw7v14dUzE40f4VVLlBCQ,49423
|
24
24
|
cocoindex/tests/test_optional_database.py,sha256=snAmkNa6wtOSaxoZE1HgjvL5v_ylitt3Jt_9df4Cgdc,8506
|
25
|
+
cocoindex/tests/test_transform_flow.py,sha256=VvT5b895MH5kwT-h4OpdDTl545SU4nxeIm7E_QANmAk,2894
|
25
26
|
cocoindex/tests/test_typing.py,sha256=9OF3lO2uSpZBefkEJx7WRbnkXjwQtvlQIeeARYQID68,12391
|
26
27
|
cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nHNhZ2Q,4543
|
27
|
-
cocoindex/typing.py,sha256=
|
28
|
+
cocoindex/typing.py,sha256=qQ0ANF3iuQDeSqipHgL2SDiiXL2reTMUN0aj4ve_T0w,13359
|
28
29
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
29
30
|
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
30
|
-
cocoindex-0.1.
|
31
|
+
cocoindex-0.1.76.dist-info/RECORD,,
|
File without changes
|
File without changes
|