cocoindex 0.1.52__cp311-cp311-win_amd64.whl → 0.1.54__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cp311-win_amd64.pyd +0 -0
- cocoindex/cli.py +6 -6
- cocoindex/convert.py +93 -46
- cocoindex/flow.py +3 -2
- cocoindex/functions.py +10 -0
- cocoindex/llm.py +3 -0
- cocoindex/tests/__init__.py +0 -1
- cocoindex/tests/test_convert.py +289 -58
- cocoindex/tests/test_typing.py +115 -77
- cocoindex/typing.py +76 -64
- {cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/METADATA +11 -10
- cocoindex-0.1.54.dist-info/RECORD +28 -0
- cocoindex-0.1.52.dist-info/RECORD +0 -28
- {cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/cli.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
import
|
1
|
+
import atexit
|
2
2
|
import datetime
|
3
|
-
import sys
|
4
3
|
import importlib.util
|
5
4
|
import os
|
6
|
-
import
|
5
|
+
import sys
|
7
6
|
import types
|
7
|
+
from typing import Any
|
8
8
|
|
9
|
-
|
9
|
+
import click
|
10
|
+
from dotenv import find_dotenv, load_dotenv
|
10
11
|
from rich.console import Console
|
11
12
|
from rich.panel import Panel
|
12
13
|
from rich.table import Table
|
13
|
-
from typing import Any
|
14
14
|
|
15
15
|
from . import flow, lib, setting
|
16
|
-
from .setup import
|
16
|
+
from .setup import apply_setup_changes, drop_setup, flow_names_with_setup, sync_setup
|
17
17
|
|
18
18
|
# Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
|
19
19
|
COCOINDEX_HOST = "https://cocoindex.io"
|
cocoindex/convert.py
CHANGED
@@ -6,17 +6,20 @@ import dataclasses
|
|
6
6
|
import datetime
|
7
7
|
import inspect
|
8
8
|
import uuid
|
9
|
+
from enum import Enum
|
10
|
+
from typing import Any, Callable, Mapping, get_origin
|
11
|
+
|
9
12
|
import numpy as np
|
10
13
|
|
11
|
-
from enum import Enum
|
12
|
-
from typing import Any, Callable, get_origin, Mapping
|
13
14
|
from .typing import (
|
15
|
+
KEY_FIELD_NAME,
|
16
|
+
TABLE_TYPES,
|
17
|
+
AnalyzedTypeInfo,
|
18
|
+
DtypeRegistry,
|
14
19
|
analyze_type_info,
|
15
20
|
encode_enriched_type,
|
21
|
+
extract_ndarray_scalar_dtype,
|
16
22
|
is_namedtuple_type,
|
17
|
-
TABLE_TYPES,
|
18
|
-
KEY_FIELD_NAME,
|
19
|
-
DtypeRegistry,
|
20
23
|
)
|
21
24
|
|
22
25
|
|
@@ -29,6 +32,8 @@ def encode_engine_value(value: Any) -> Any:
|
|
29
32
|
]
|
30
33
|
if is_namedtuple_type(type(value)):
|
31
34
|
return [encode_engine_value(getattr(value, name)) for name in value._fields]
|
35
|
+
if isinstance(value, np.number):
|
36
|
+
return value.item()
|
32
37
|
if isinstance(value, np.ndarray):
|
33
38
|
return value
|
34
39
|
if isinstance(value, (list, tuple)):
|
@@ -42,6 +47,19 @@ def encode_engine_value(value: Any) -> Any:
|
|
42
47
|
return value
|
43
48
|
|
44
49
|
|
50
|
+
_CONVERTIBLE_KINDS = {
|
51
|
+
("Float32", "Float64"),
|
52
|
+
("LocalDateTime", "OffsetDateTime"),
|
53
|
+
}
|
54
|
+
|
55
|
+
|
56
|
+
def _is_type_kind_convertible_to(src_type_kind: str, dst_type_kind: str) -> bool:
|
57
|
+
return (
|
58
|
+
src_type_kind == dst_type_kind
|
59
|
+
or (src_type_kind, dst_type_kind) in _CONVERTIBLE_KINDS
|
60
|
+
)
|
61
|
+
|
62
|
+
|
45
63
|
def make_engine_value_decoder(
|
46
64
|
field_path: list[str],
|
47
65
|
src_type: dict[str, Any],
|
@@ -61,11 +79,23 @@ def make_engine_value_decoder(
|
|
61
79
|
|
62
80
|
src_type_kind = src_type["kind"]
|
63
81
|
|
82
|
+
dst_type_info: AnalyzedTypeInfo | None = None
|
64
83
|
if (
|
65
|
-
dst_annotation is None
|
66
|
-
|
67
|
-
|
84
|
+
dst_annotation is not None
|
85
|
+
and dst_annotation is not inspect.Parameter.empty
|
86
|
+
and dst_annotation is not Any
|
68
87
|
):
|
88
|
+
dst_type_info = analyze_type_info(dst_annotation)
|
89
|
+
if not _is_type_kind_convertible_to(src_type_kind, dst_type_info.kind):
|
90
|
+
raise ValueError(
|
91
|
+
f"Type mismatch for `{''.join(field_path)}`: "
|
92
|
+
f"passed in {src_type_kind}, declared {dst_annotation} ({dst_type_info.kind})"
|
93
|
+
)
|
94
|
+
|
95
|
+
if src_type_kind == "Uuid":
|
96
|
+
return lambda value: uuid.UUID(bytes=value)
|
97
|
+
|
98
|
+
if dst_type_info is None:
|
69
99
|
if src_type_kind == "Struct" or src_type_kind in TABLE_TYPES:
|
70
100
|
raise ValueError(
|
71
101
|
f"Missing type annotation for `{''.join(field_path)}`."
|
@@ -73,14 +103,62 @@ def make_engine_value_decoder(
|
|
73
103
|
)
|
74
104
|
return lambda value: value
|
75
105
|
|
76
|
-
dst_type_info
|
106
|
+
if dst_type_info.kind in ("Float32", "Float64", "Int64"):
|
107
|
+
dst_core_type = dst_type_info.core_type
|
108
|
+
|
109
|
+
def decode_scalar(value: Any) -> Any | None:
|
110
|
+
if value is None:
|
111
|
+
if dst_type_info.nullable:
|
112
|
+
return None
|
113
|
+
raise ValueError(
|
114
|
+
f"Received null for non-nullable scalar `{''.join(field_path)}`"
|
115
|
+
)
|
116
|
+
return dst_core_type(value)
|
117
|
+
|
118
|
+
return decode_scalar
|
77
119
|
|
78
|
-
if src_type_kind
|
79
|
-
|
80
|
-
|
81
|
-
|
120
|
+
if src_type_kind == "Vector":
|
121
|
+
field_path_str = "".join(field_path)
|
122
|
+
expected_dim = (
|
123
|
+
dst_type_info.vector_info.dim if dst_type_info.vector_info else None
|
82
124
|
)
|
83
125
|
|
126
|
+
elem_decoder = None
|
127
|
+
scalar_dtype = None
|
128
|
+
if dst_type_info.np_number_type is None: # for Non-NDArray vector
|
129
|
+
elem_decoder = make_engine_value_decoder(
|
130
|
+
field_path + ["[*]"],
|
131
|
+
src_type["element_type"],
|
132
|
+
dst_type_info.elem_type,
|
133
|
+
)
|
134
|
+
else: # for NDArray vector
|
135
|
+
scalar_dtype = extract_ndarray_scalar_dtype(dst_type_info.np_number_type)
|
136
|
+
_ = DtypeRegistry.validate_dtype_and_get_kind(scalar_dtype)
|
137
|
+
|
138
|
+
def decode_vector(value: Any) -> Any | None:
|
139
|
+
if value is None:
|
140
|
+
if dst_type_info.nullable:
|
141
|
+
return None
|
142
|
+
raise ValueError(
|
143
|
+
f"Received null for non-nullable vector `{field_path_str}`"
|
144
|
+
)
|
145
|
+
if not isinstance(value, (np.ndarray, list)):
|
146
|
+
raise TypeError(
|
147
|
+
f"Expected NDArray or list for vector `{field_path_str}`, got {type(value)}"
|
148
|
+
)
|
149
|
+
if expected_dim is not None and len(value) != expected_dim:
|
150
|
+
raise ValueError(
|
151
|
+
f"Vector dimension mismatch for `{field_path_str}`: "
|
152
|
+
f"expected {expected_dim}, got {len(value)}"
|
153
|
+
)
|
154
|
+
|
155
|
+
if elem_decoder is not None: # for Non-NDArray vector
|
156
|
+
return [elem_decoder(v) for v in value]
|
157
|
+
else: # for NDArray vector
|
158
|
+
return np.array(value, dtype=scalar_dtype)
|
159
|
+
|
160
|
+
return decode_vector
|
161
|
+
|
84
162
|
if dst_type_info.struct_type is not None:
|
85
163
|
return _make_engine_struct_value_decoder(
|
86
164
|
field_path, src_type["fields"], dst_type_info.struct_type
|
@@ -123,39 +201,8 @@ def make_engine_value_decoder(
|
|
123
201
|
field_path.pop()
|
124
202
|
return decode
|
125
203
|
|
126
|
-
if src_type_kind == "
|
127
|
-
return lambda value:
|
128
|
-
|
129
|
-
if src_type_kind == "Vector":
|
130
|
-
dtype_info = DtypeRegistry.get_by_dtype(dst_type_info.np_number_type)
|
131
|
-
|
132
|
-
def decode_vector(value: Any) -> Any | None:
|
133
|
-
if value is None:
|
134
|
-
if dst_type_info.nullable:
|
135
|
-
return None
|
136
|
-
raise ValueError(
|
137
|
-
f"Received null for non-nullable vector `{''.join(field_path)}`"
|
138
|
-
)
|
139
|
-
|
140
|
-
if not isinstance(value, (np.ndarray, list)):
|
141
|
-
raise TypeError(
|
142
|
-
f"Expected NDArray or list for vector `{''.join(field_path)}`, got {type(value)}"
|
143
|
-
)
|
144
|
-
expected_dim = (
|
145
|
-
dst_type_info.vector_info.dim if dst_type_info.vector_info else None
|
146
|
-
)
|
147
|
-
if expected_dim is not None and len(value) != expected_dim:
|
148
|
-
raise ValueError(
|
149
|
-
f"Vector dimension mismatch for `{''.join(field_path)}`: "
|
150
|
-
f"expected {expected_dim}, got {len(value)}"
|
151
|
-
)
|
152
|
-
|
153
|
-
# Use NDArray for supported numeric dtypes, else return list
|
154
|
-
if dtype_info is not None:
|
155
|
-
return np.array(value, dtype=dtype_info.numpy_dtype)
|
156
|
-
return value
|
157
|
-
|
158
|
-
return decode_vector
|
204
|
+
if src_type_kind == "Union":
|
205
|
+
return lambda value: value[1]
|
159
206
|
|
160
207
|
return lambda value: value
|
161
208
|
|
cocoindex/flow.py
CHANGED
@@ -92,6 +92,7 @@ def _spec_kind(spec: Any) -> str:
|
|
92
92
|
|
93
93
|
|
94
94
|
T = TypeVar("T")
|
95
|
+
S = TypeVar("S")
|
95
96
|
|
96
97
|
|
97
98
|
class _DataSliceState:
|
@@ -185,7 +186,7 @@ class DataSlice(Generic[T]):
|
|
185
186
|
|
186
187
|
def transform(
|
187
188
|
self, fn_spec: op.FunctionSpec, *args: Any, **kwargs: Any
|
188
|
-
) -> DataSlice[
|
189
|
+
) -> DataSlice[Any]:
|
189
190
|
"""
|
190
191
|
Apply a function to the data slice.
|
191
192
|
"""
|
@@ -216,7 +217,7 @@ class DataSlice(Generic[T]):
|
|
216
217
|
),
|
217
218
|
)
|
218
219
|
|
219
|
-
def call(self, func: Callable[
|
220
|
+
def call(self, func: Callable[..., S], *args: Any, **kwargs: Any) -> S:
|
220
221
|
"""
|
221
222
|
Call a function with the data slice.
|
222
223
|
"""
|
cocoindex/functions.py
CHANGED
@@ -32,6 +32,16 @@ class SplitRecursively(op.FunctionSpec):
|
|
32
32
|
custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
|
33
33
|
|
34
34
|
|
35
|
+
class EmbedText(op.FunctionSpec):
|
36
|
+
"""Embed a text into a vector space."""
|
37
|
+
|
38
|
+
api_type: llm.LlmApiType
|
39
|
+
model: str
|
40
|
+
address: str | None = None
|
41
|
+
output_dimension: int | None = None
|
42
|
+
task_type: str | None = None
|
43
|
+
|
44
|
+
|
35
45
|
class ExtractByLlm(op.FunctionSpec):
|
36
46
|
"""Extract information from a text using a LLM."""
|
37
47
|
|
cocoindex/llm.py
CHANGED
cocoindex/tests/__init__.py
CHANGED
@@ -1 +0,0 @@
|
|
1
|
-
|