cocoindex 0.1.81__cp312-cp312-manylinux_2_28_x86_64.whl → 0.1.83__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cpython-312-x86_64-linux-gnu.so +0 -0
- cocoindex/cli.py +5 -46
- cocoindex/convert.py +120 -36
- cocoindex/flow.py +68 -69
- cocoindex/functions.py +3 -3
- cocoindex/op.py +73 -75
- cocoindex/sources.py +20 -0
- cocoindex/subprocess_exec.py +241 -0
- cocoindex/tests/test_convert.py +63 -44
- cocoindex/tests/test_transform_flow.py +32 -0
- cocoindex/user_app_loader.py +51 -0
- {cocoindex-0.1.81.dist-info → cocoindex-0.1.83.dist-info}/METADATA +6 -6
- {cocoindex-0.1.81.dist-info → cocoindex-0.1.83.dist-info}/RECORD +16 -14
- {cocoindex-0.1.81.dist-info → cocoindex-0.1.83.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.81.dist-info → cocoindex-0.1.83.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.81.dist-info → cocoindex-0.1.83.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/cli.py
CHANGED
@@ -4,9 +4,7 @@ import datetime
|
|
4
4
|
import importlib.util
|
5
5
|
import os
|
6
6
|
import signal
|
7
|
-
import sys
|
8
7
|
import threading
|
9
|
-
import types
|
10
8
|
from types import FrameType
|
11
9
|
from typing import Any, Iterable
|
12
10
|
|
@@ -20,6 +18,8 @@ from rich.table import Table
|
|
20
18
|
from . import flow, lib, setting
|
21
19
|
from .setup import flow_names_with_setup
|
22
20
|
from .runtime import execution_context
|
21
|
+
from .subprocess_exec import add_user_app
|
22
|
+
from .user_app_loader import load_user_app
|
23
23
|
|
24
24
|
# Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
|
25
25
|
COCOINDEX_HOST = "https://cocoindex.io"
|
@@ -76,50 +76,9 @@ def _get_app_ref_from_specifier(
|
|
76
76
|
return app_ref
|
77
77
|
|
78
78
|
|
79
|
-
def _load_user_app(app_target: str) ->
|
80
|
-
|
81
|
-
|
82
|
-
Exits on failure.
|
83
|
-
"""
|
84
|
-
if not app_target:
|
85
|
-
raise click.ClickException("Application target not provided.")
|
86
|
-
|
87
|
-
looks_like_path = os.sep in app_target or app_target.lower().endswith(".py")
|
88
|
-
|
89
|
-
if looks_like_path:
|
90
|
-
if not os.path.isfile(app_target):
|
91
|
-
raise click.ClickException(f"Application file path not found: {app_target}")
|
92
|
-
app_path = os.path.abspath(app_target)
|
93
|
-
app_dir = os.path.dirname(app_path)
|
94
|
-
module_name = os.path.splitext(os.path.basename(app_path))[0]
|
95
|
-
|
96
|
-
if app_dir not in sys.path:
|
97
|
-
sys.path.insert(0, app_dir)
|
98
|
-
try:
|
99
|
-
spec = importlib.util.spec_from_file_location(module_name, app_path)
|
100
|
-
if spec is None:
|
101
|
-
raise ImportError(f"Could not create spec for file: {app_path}")
|
102
|
-
module = importlib.util.module_from_spec(spec)
|
103
|
-
sys.modules[spec.name] = module
|
104
|
-
if spec.loader is None:
|
105
|
-
raise ImportError(f"Could not create loader for file: {app_path}")
|
106
|
-
spec.loader.exec_module(module)
|
107
|
-
return module
|
108
|
-
except (ImportError, FileNotFoundError, PermissionError) as e:
|
109
|
-
raise click.ClickException(f"Failed importing file '{app_path}': {e}")
|
110
|
-
finally:
|
111
|
-
if app_dir in sys.path and sys.path[0] == app_dir:
|
112
|
-
sys.path.pop(0)
|
113
|
-
|
114
|
-
# Try as module
|
115
|
-
try:
|
116
|
-
return importlib.import_module(app_target)
|
117
|
-
except ImportError as e:
|
118
|
-
raise click.ClickException(f"Failed to load module '{app_target}': {e}")
|
119
|
-
except Exception as e:
|
120
|
-
raise click.ClickException(
|
121
|
-
f"Unexpected error importing module '{app_target}': {e}"
|
122
|
-
)
|
79
|
+
def _load_user_app(app_target: str) -> None:
|
80
|
+
load_user_app(app_target)
|
81
|
+
add_user_app(app_target)
|
123
82
|
|
124
83
|
|
125
84
|
def _initialize_cocoindex_in_process() -> None:
|
cocoindex/convert.py
CHANGED
@@ -9,26 +9,26 @@ import datetime
|
|
9
9
|
import inspect
|
10
10
|
import warnings
|
11
11
|
from enum import Enum
|
12
|
-
from typing import Any, Callable, Mapping, get_origin
|
12
|
+
from typing import Any, Callable, Mapping, Type, get_origin
|
13
13
|
|
14
14
|
import numpy as np
|
15
15
|
|
16
16
|
from .typing import (
|
17
17
|
KEY_FIELD_NAME,
|
18
18
|
TABLE_TYPES,
|
19
|
-
analyze_type_info,
|
20
|
-
encode_enriched_type,
|
21
|
-
is_namedtuple_type,
|
22
|
-
is_struct_type,
|
23
|
-
AnalyzedTypeInfo,
|
24
19
|
AnalyzedAnyType,
|
20
|
+
AnalyzedBasicType,
|
25
21
|
AnalyzedDictType,
|
26
22
|
AnalyzedListType,
|
27
|
-
|
23
|
+
AnalyzedStructType,
|
24
|
+
AnalyzedTypeInfo,
|
28
25
|
AnalyzedUnionType,
|
29
26
|
AnalyzedUnknownType,
|
30
|
-
|
27
|
+
analyze_type_info,
|
28
|
+
encode_enriched_type,
|
29
|
+
is_namedtuple_type,
|
31
30
|
is_numpy_number_type,
|
31
|
+
is_struct_type,
|
32
32
|
)
|
33
33
|
|
34
34
|
|
@@ -50,34 +50,6 @@ class ChildFieldPath:
|
|
50
50
|
self._field_path.pop()
|
51
51
|
|
52
52
|
|
53
|
-
def encode_engine_value(value: Any) -> Any:
|
54
|
-
"""Encode a Python value to an engine value."""
|
55
|
-
if dataclasses.is_dataclass(value):
|
56
|
-
return [
|
57
|
-
encode_engine_value(getattr(value, f.name))
|
58
|
-
for f in dataclasses.fields(value)
|
59
|
-
]
|
60
|
-
if is_namedtuple_type(type(value)):
|
61
|
-
return [encode_engine_value(getattr(value, name)) for name in value._fields]
|
62
|
-
if isinstance(value, np.number):
|
63
|
-
return value.item()
|
64
|
-
if isinstance(value, np.ndarray):
|
65
|
-
return value
|
66
|
-
if isinstance(value, (list, tuple)):
|
67
|
-
return [encode_engine_value(v) for v in value]
|
68
|
-
if isinstance(value, dict):
|
69
|
-
if not value:
|
70
|
-
return {}
|
71
|
-
|
72
|
-
first_val = next(iter(value.values()))
|
73
|
-
if is_struct_type(type(first_val)): # KTable
|
74
|
-
return [
|
75
|
-
[encode_engine_value(k)] + encode_engine_value(v)
|
76
|
-
for k, v in value.items()
|
77
|
-
]
|
78
|
-
return value
|
79
|
-
|
80
|
-
|
81
53
|
_CONVERTIBLE_KINDS = {
|
82
54
|
("Float32", "Float64"),
|
83
55
|
("LocalDateTime", "OffsetDateTime"),
|
@@ -91,6 +63,118 @@ def _is_type_kind_convertible_to(src_type_kind: str, dst_type_kind: str) -> bool
|
|
91
63
|
)
|
92
64
|
|
93
65
|
|
66
|
+
# Pre-computed type info for missing/Any type annotations
|
67
|
+
ANY_TYPE_INFO = analyze_type_info(inspect.Parameter.empty)
|
68
|
+
|
69
|
+
|
70
|
+
def make_engine_value_encoder(type_info: AnalyzedTypeInfo) -> Callable[[Any], Any]:
|
71
|
+
"""
|
72
|
+
Create an encoder closure for a specific type.
|
73
|
+
"""
|
74
|
+
variant = type_info.variant
|
75
|
+
|
76
|
+
if isinstance(variant, AnalyzedUnknownType):
|
77
|
+
raise ValueError(f"Type annotation `{type_info.core_type}` is unsupported")
|
78
|
+
|
79
|
+
if isinstance(variant, AnalyzedListType):
|
80
|
+
elem_type_info = (
|
81
|
+
analyze_type_info(variant.elem_type) if variant.elem_type else ANY_TYPE_INFO
|
82
|
+
)
|
83
|
+
if isinstance(elem_type_info.variant, AnalyzedStructType):
|
84
|
+
elem_encoder = make_engine_value_encoder(elem_type_info)
|
85
|
+
|
86
|
+
def encode_struct_list(value: Any) -> Any:
|
87
|
+
return None if value is None else [elem_encoder(v) for v in value]
|
88
|
+
|
89
|
+
return encode_struct_list
|
90
|
+
|
91
|
+
if isinstance(variant, AnalyzedDictType):
|
92
|
+
if not variant.value_type:
|
93
|
+
return lambda value: value
|
94
|
+
|
95
|
+
value_type_info = analyze_type_info(variant.value_type)
|
96
|
+
if isinstance(value_type_info.variant, AnalyzedStructType):
|
97
|
+
|
98
|
+
def encode_struct_dict(value: Any) -> Any:
|
99
|
+
if not isinstance(value, dict):
|
100
|
+
return value
|
101
|
+
if not value:
|
102
|
+
return []
|
103
|
+
|
104
|
+
sample_key, sample_val = next(iter(value.items()))
|
105
|
+
key_type, val_type = type(sample_key), type(sample_val)
|
106
|
+
|
107
|
+
# Handle KTable case
|
108
|
+
if value and is_struct_type(val_type):
|
109
|
+
key_encoder = (
|
110
|
+
make_engine_value_encoder(analyze_type_info(key_type))
|
111
|
+
if is_struct_type(key_type)
|
112
|
+
else make_engine_value_encoder(ANY_TYPE_INFO)
|
113
|
+
)
|
114
|
+
value_encoder = make_engine_value_encoder(
|
115
|
+
analyze_type_info(val_type)
|
116
|
+
)
|
117
|
+
return [
|
118
|
+
[key_encoder(k)] + value_encoder(v) for k, v in value.items()
|
119
|
+
]
|
120
|
+
return {key_encoder(k): value_encoder(v) for k, v in value.items()}
|
121
|
+
|
122
|
+
return encode_struct_dict
|
123
|
+
|
124
|
+
if isinstance(variant, AnalyzedStructType):
|
125
|
+
struct_type = variant.struct_type
|
126
|
+
|
127
|
+
if dataclasses.is_dataclass(struct_type):
|
128
|
+
fields = dataclasses.fields(struct_type)
|
129
|
+
field_encoders = [
|
130
|
+
make_engine_value_encoder(analyze_type_info(f.type)) for f in fields
|
131
|
+
]
|
132
|
+
field_names = [f.name for f in fields]
|
133
|
+
|
134
|
+
def encode_dataclass(value: Any) -> Any:
|
135
|
+
if not dataclasses.is_dataclass(value):
|
136
|
+
return value
|
137
|
+
return [
|
138
|
+
encoder(getattr(value, name))
|
139
|
+
for encoder, name in zip(field_encoders, field_names)
|
140
|
+
]
|
141
|
+
|
142
|
+
return encode_dataclass
|
143
|
+
|
144
|
+
elif is_namedtuple_type(struct_type):
|
145
|
+
annotations = struct_type.__annotations__
|
146
|
+
field_names = list(getattr(struct_type, "_fields", ()))
|
147
|
+
field_encoders = [
|
148
|
+
make_engine_value_encoder(
|
149
|
+
analyze_type_info(annotations[name])
|
150
|
+
if name in annotations
|
151
|
+
else ANY_TYPE_INFO
|
152
|
+
)
|
153
|
+
for name in field_names
|
154
|
+
]
|
155
|
+
|
156
|
+
def encode_namedtuple(value: Any) -> Any:
|
157
|
+
if not is_namedtuple_type(type(value)):
|
158
|
+
return value
|
159
|
+
return [
|
160
|
+
encoder(getattr(value, name))
|
161
|
+
for encoder, name in zip(field_encoders, field_names)
|
162
|
+
]
|
163
|
+
|
164
|
+
return encode_namedtuple
|
165
|
+
|
166
|
+
def encode_basic_value(value: Any) -> Any:
|
167
|
+
if isinstance(value, np.number):
|
168
|
+
return value.item()
|
169
|
+
if isinstance(value, np.ndarray):
|
170
|
+
return value
|
171
|
+
if isinstance(value, (list, tuple)):
|
172
|
+
return [encode_basic_value(v) for v in value]
|
173
|
+
return value
|
174
|
+
|
175
|
+
return encode_basic_value
|
176
|
+
|
177
|
+
|
94
178
|
def make_engine_value_decoder(
|
95
179
|
field_path: list[str],
|
96
180
|
src_type: dict[str, Any],
|
cocoindex/flow.py
CHANGED
@@ -9,15 +9,6 @@ import datetime
|
|
9
9
|
import functools
|
10
10
|
import inspect
|
11
11
|
import re
|
12
|
-
|
13
|
-
from .validation import (
|
14
|
-
validate_flow_name,
|
15
|
-
NamingError,
|
16
|
-
validate_full_flow_name,
|
17
|
-
validate_target_name,
|
18
|
-
)
|
19
|
-
from .typing import analyze_type_info
|
20
|
-
|
21
12
|
from dataclasses import dataclass
|
22
13
|
from enum import Enum
|
23
14
|
from threading import Lock
|
@@ -25,13 +16,13 @@ from typing import (
|
|
25
16
|
Any,
|
26
17
|
Callable,
|
27
18
|
Generic,
|
19
|
+
Iterable,
|
28
20
|
NamedTuple,
|
29
21
|
Sequence,
|
30
22
|
TypeVar,
|
31
23
|
cast,
|
32
24
|
get_args,
|
33
25
|
get_origin,
|
34
|
-
Iterable,
|
35
26
|
)
|
36
27
|
|
37
28
|
from rich.text import Text
|
@@ -41,11 +32,20 @@ from . import _engine # type: ignore
|
|
41
32
|
from . import index
|
42
33
|
from . import op
|
43
34
|
from . import setting
|
44
|
-
from .convert import
|
35
|
+
from .convert import (
|
36
|
+
dump_engine_object,
|
37
|
+
make_engine_value_decoder,
|
38
|
+
make_engine_value_encoder,
|
39
|
+
)
|
45
40
|
from .op import FunctionSpec
|
46
41
|
from .runtime import execution_context
|
47
42
|
from .setup import SetupChangeBundle
|
48
|
-
from .typing import encode_enriched_type
|
43
|
+
from .typing import analyze_type_info, encode_enriched_type
|
44
|
+
from .validation import (
|
45
|
+
validate_flow_name,
|
46
|
+
validate_full_flow_name,
|
47
|
+
validate_target_name,
|
48
|
+
)
|
49
49
|
|
50
50
|
|
51
51
|
class _NameBuilder:
|
@@ -105,18 +105,26 @@ def _spec_kind(spec: Any) -> str:
|
|
105
105
|
|
106
106
|
def _transform_helper(
|
107
107
|
flow_builder_state: _FlowBuilderState,
|
108
|
-
fn_spec: FunctionSpec,
|
108
|
+
fn_spec: FunctionSpec | Callable[..., Any],
|
109
109
|
transform_args: list[tuple[Any, str | None]],
|
110
110
|
name: str | None = None,
|
111
111
|
) -> DataSlice[Any]:
|
112
|
-
if
|
112
|
+
if isinstance(fn_spec, FunctionSpec):
|
113
|
+
kind = _spec_kind(fn_spec)
|
114
|
+
spec = fn_spec
|
115
|
+
elif callable(fn_spec) and (
|
116
|
+
op_kind := getattr(fn_spec, "__cocoindex_op_kind__", None)
|
117
|
+
):
|
118
|
+
kind = op_kind
|
119
|
+
spec = op.EmptyFunctionSpec()
|
120
|
+
else:
|
113
121
|
raise ValueError("transform() can only be called on a CocoIndex function")
|
114
122
|
|
115
123
|
return _create_data_slice(
|
116
124
|
flow_builder_state,
|
117
125
|
lambda target_scope, name: flow_builder_state.engine_flow_builder.transform(
|
118
|
-
|
119
|
-
dump_engine_object(
|
126
|
+
kind,
|
127
|
+
dump_engine_object(spec),
|
120
128
|
transform_args,
|
121
129
|
target_scope,
|
122
130
|
flow_builder_state.field_name_builder.build_name(
|
@@ -245,7 +253,7 @@ class DataSlice(Generic[T]):
|
|
245
253
|
f(scope)
|
246
254
|
|
247
255
|
def transform(
|
248
|
-
self, fn_spec: op.FunctionSpec, *args: Any, **kwargs: Any
|
256
|
+
self, fn_spec: op.FunctionSpec | Callable[..., Any], *args: Any, **kwargs: Any
|
249
257
|
) -> DataSlice[Any]:
|
250
258
|
"""
|
251
259
|
Apply a function to the data slice.
|
@@ -513,7 +521,7 @@ class FlowBuilder:
|
|
513
521
|
)
|
514
522
|
|
515
523
|
def transform(
|
516
|
-
self, fn_spec: FunctionSpec, *args: Any, **kwargs: Any
|
524
|
+
self, fn_spec: FunctionSpec | Callable[..., Any], *args: Any, **kwargs: Any
|
517
525
|
) -> DataSlice[Any]:
|
518
526
|
"""
|
519
527
|
Apply a function to inputs, returning a DataSlice.
|
@@ -978,6 +986,12 @@ class TransformFlowInfo(NamedTuple):
|
|
978
986
|
result_decoder: Callable[[Any], T]
|
979
987
|
|
980
988
|
|
989
|
+
class FlowArgInfo(NamedTuple):
|
990
|
+
name: str
|
991
|
+
type_hint: Any
|
992
|
+
encoder: Callable[[Any], Any]
|
993
|
+
|
994
|
+
|
981
995
|
class TransformFlow(Generic[T]):
|
982
996
|
"""
|
983
997
|
A transient transformation flow that transforms in-memory data.
|
@@ -985,8 +999,7 @@ class TransformFlow(Generic[T]):
|
|
985
999
|
|
986
1000
|
_flow_fn: Callable[..., DataSlice[T]]
|
987
1001
|
_flow_name: str
|
988
|
-
|
989
|
-
_param_names: list[str]
|
1002
|
+
_args_info: list[FlowArgInfo]
|
990
1003
|
|
991
1004
|
_lazy_lock: asyncio.Lock
|
992
1005
|
_lazy_flow_info: TransformFlowInfo | None = None
|
@@ -994,7 +1007,6 @@ class TransformFlow(Generic[T]):
|
|
994
1007
|
def __init__(
|
995
1008
|
self,
|
996
1009
|
flow_fn: Callable[..., DataSlice[T]],
|
997
|
-
flow_arg_types: Sequence[Any],
|
998
1010
|
/,
|
999
1011
|
name: str | None = None,
|
1000
1012
|
):
|
@@ -1002,9 +1014,32 @@ class TransformFlow(Generic[T]):
|
|
1002
1014
|
self._flow_name = _transform_flow_name_builder.build_name(
|
1003
1015
|
name, prefix="_transform_flow_"
|
1004
1016
|
)
|
1005
|
-
self._flow_arg_types = list(flow_arg_types)
|
1006
1017
|
self._lazy_lock = asyncio.Lock()
|
1007
1018
|
|
1019
|
+
sig = inspect.signature(flow_fn)
|
1020
|
+
args_info = []
|
1021
|
+
for param_name, param in sig.parameters.items():
|
1022
|
+
if param.kind not in (
|
1023
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
1024
|
+
inspect.Parameter.KEYWORD_ONLY,
|
1025
|
+
):
|
1026
|
+
raise ValueError(
|
1027
|
+
f"Parameter `{param_name}` is not a parameter can be passed by name"
|
1028
|
+
)
|
1029
|
+
value_type_annotation: type | None = _get_data_slice_annotation_type(
|
1030
|
+
param.annotation
|
1031
|
+
)
|
1032
|
+
if value_type_annotation is None:
|
1033
|
+
raise ValueError(
|
1034
|
+
f"Parameter `{param_name}` for {flow_fn} has no value type annotation. "
|
1035
|
+
"Please use `cocoindex.DataSlice[T]` where T is the type of the value."
|
1036
|
+
)
|
1037
|
+
encoder = make_engine_value_encoder(
|
1038
|
+
analyze_type_info(value_type_annotation)
|
1039
|
+
)
|
1040
|
+
args_info.append(FlowArgInfo(param_name, value_type_annotation, encoder))
|
1041
|
+
self._args_info = args_info
|
1042
|
+
|
1008
1043
|
def __call__(self, *args: Any, **kwargs: Any) -> DataSlice[T]:
|
1009
1044
|
return self._flow_fn(*args, **kwargs)
|
1010
1045
|
|
@@ -1024,31 +1059,15 @@ class TransformFlow(Generic[T]):
|
|
1024
1059
|
|
1025
1060
|
async def _build_flow_info_async(self) -> TransformFlowInfo:
|
1026
1061
|
flow_builder_state = _FlowBuilderState(self._flow_name)
|
1027
|
-
sig = inspect.signature(self._flow_fn)
|
1028
|
-
if len(sig.parameters) != len(self._flow_arg_types):
|
1029
|
-
raise ValueError(
|
1030
|
-
f"Number of parameters in the flow function ({len(sig.parameters)}) "
|
1031
|
-
f"does not match the number of argument types ({len(self._flow_arg_types)})"
|
1032
|
-
)
|
1033
|
-
|
1034
1062
|
kwargs: dict[str, DataSlice[T]] = {}
|
1035
|
-
for
|
1036
|
-
|
1037
|
-
):
|
1038
|
-
if param.kind not in (
|
1039
|
-
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
1040
|
-
inspect.Parameter.KEYWORD_ONLY,
|
1041
|
-
):
|
1042
|
-
raise ValueError(
|
1043
|
-
f"Parameter `{param_name}` is not a parameter can be passed by name"
|
1044
|
-
)
|
1045
|
-
encoded_type = encode_enriched_type(param_type)
|
1063
|
+
for arg_info in self._args_info:
|
1064
|
+
encoded_type = encode_enriched_type(arg_info.type_hint)
|
1046
1065
|
if encoded_type is None:
|
1047
|
-
raise ValueError(f"Parameter `{
|
1066
|
+
raise ValueError(f"Parameter `{arg_info.name}` has no type annotation")
|
1048
1067
|
engine_ds = flow_builder_state.engine_flow_builder.add_direct_input(
|
1049
|
-
|
1068
|
+
arg_info.name, encoded_type
|
1050
1069
|
)
|
1051
|
-
kwargs[
|
1070
|
+
kwargs[arg_info.name] = DataSlice(
|
1052
1071
|
_DataSliceState(flow_builder_state, engine_ds)
|
1053
1072
|
)
|
1054
1073
|
|
@@ -1061,13 +1080,12 @@ class TransformFlow(Generic[T]):
|
|
1061
1080
|
execution_context.event_loop
|
1062
1081
|
)
|
1063
1082
|
)
|
1064
|
-
self._param_names = list(sig.parameters.keys())
|
1065
1083
|
|
1066
1084
|
engine_return_type = (
|
1067
1085
|
_data_slice_state(output).engine_data_slice.data_type().schema()
|
1068
1086
|
)
|
1069
1087
|
python_return_type: type[T] | None = _get_data_slice_annotation_type(
|
1070
|
-
|
1088
|
+
inspect.signature(self._flow_fn).return_annotation
|
1071
1089
|
)
|
1072
1090
|
result_decoder = make_engine_value_decoder(
|
1073
1091
|
[], engine_return_type["type"], analyze_type_info(python_return_type)
|
@@ -1099,13 +1117,14 @@ class TransformFlow(Generic[T]):
|
|
1099
1117
|
"""
|
1100
1118
|
flow_info = await self._flow_info_async()
|
1101
1119
|
params = []
|
1102
|
-
for i,
|
1120
|
+
for i, arg_info in enumerate(self._args_info):
|
1103
1121
|
if i < len(args):
|
1104
|
-
|
1122
|
+
arg = args[i]
|
1105
1123
|
elif arg in kwargs:
|
1106
|
-
|
1124
|
+
arg = kwargs[arg]
|
1107
1125
|
else:
|
1108
1126
|
raise ValueError(f"Parameter {arg} is not provided")
|
1127
|
+
params.append(arg_info.encoder(arg))
|
1109
1128
|
engine_result = await flow_info.engine_flow.evaluate_async(params)
|
1110
1129
|
return flow_info.result_decoder(engine_result)
|
1111
1130
|
|
@@ -1116,27 +1135,7 @@ def transform_flow() -> Callable[[Callable[..., DataSlice[T]]], TransformFlow[T]
|
|
1116
1135
|
"""
|
1117
1136
|
|
1118
1137
|
def _transform_flow_wrapper(fn: Callable[..., DataSlice[T]]) -> TransformFlow[T]:
|
1119
|
-
|
1120
|
-
arg_types = []
|
1121
|
-
for param_name, param in sig.parameters.items():
|
1122
|
-
if param.kind not in (
|
1123
|
-
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
1124
|
-
inspect.Parameter.KEYWORD_ONLY,
|
1125
|
-
):
|
1126
|
-
raise ValueError(
|
1127
|
-
f"Parameter `{param_name}` is not a parameter can be passed by name"
|
1128
|
-
)
|
1129
|
-
value_type_annotation: type[T] | None = _get_data_slice_annotation_type(
|
1130
|
-
param.annotation
|
1131
|
-
)
|
1132
|
-
if value_type_annotation is None:
|
1133
|
-
raise ValueError(
|
1134
|
-
f"Parameter `{param_name}` for {fn} has no value type annotation. "
|
1135
|
-
"Please use `cocoindex.DataSlice[T]` where T is the type of the value."
|
1136
|
-
)
|
1137
|
-
arg_types.append(value_type_annotation)
|
1138
|
-
|
1139
|
-
_transform_flow = TransformFlow(fn, arg_types)
|
1138
|
+
_transform_flow = TransformFlow(fn)
|
1140
1139
|
functools.update_wrapper(_transform_flow, fn)
|
1141
1140
|
return _transform_flow
|
1142
1141
|
|
cocoindex/functions.py
CHANGED
@@ -89,7 +89,7 @@ class SentenceTransformerEmbedExecutor:
|
|
89
89
|
spec: SentenceTransformerEmbed
|
90
90
|
_model: Any | None = None
|
91
91
|
|
92
|
-
def analyze(self
|
92
|
+
def analyze(self) -> type:
|
93
93
|
try:
|
94
94
|
# Only import sentence_transformers locally when it's needed, as its import is very slow.
|
95
95
|
import sentence_transformers # pylint: disable=import-outside-toplevel
|
@@ -245,7 +245,7 @@ class ColPaliEmbedImageExecutor:
|
|
245
245
|
spec: ColPaliEmbedImage
|
246
246
|
_model_info: ColPaliModelInfo
|
247
247
|
|
248
|
-
def analyze(self
|
248
|
+
def analyze(self) -> type:
|
249
249
|
# Get shared model and dimension
|
250
250
|
self._model_info = _get_colpali_model_and_processor(self.spec.model)
|
251
251
|
|
@@ -321,7 +321,7 @@ class ColPaliEmbedQueryExecutor:
|
|
321
321
|
spec: ColPaliEmbedQuery
|
322
322
|
_model_info: ColPaliModelInfo
|
323
323
|
|
324
|
-
def analyze(self
|
324
|
+
def analyze(self) -> type:
|
325
325
|
# Get shared model and dimension
|
326
326
|
self._model_info = _get_colpali_model_and_processor(self.spec.model)
|
327
327
|
|