cocoindex 0.1.53__cp311-cp311-win_amd64.whl → 0.1.55__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cp311-win_amd64.pyd +0 -0
- cocoindex/cli.py +90 -11
- cocoindex/convert.py +77 -62
- cocoindex/flow.py +3 -2
- cocoindex/functions.py +10 -0
- cocoindex/llm.py +1 -0
- cocoindex/tests/__init__.py +0 -1
- cocoindex/tests/test_convert.py +137 -38
- cocoindex/tests/test_typing.py +26 -7
- cocoindex/typing.py +31 -12
- {cocoindex-0.1.53.dist-info → cocoindex-0.1.55.dist-info}/METADATA +12 -10
- cocoindex-0.1.55.dist-info/RECORD +28 -0
- {cocoindex-0.1.53.dist-info → cocoindex-0.1.55.dist-info}/WHEEL +1 -1
- cocoindex-0.1.53.dist-info/RECORD +0 -28
- {cocoindex-0.1.53.dist-info → cocoindex-0.1.55.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.53.dist-info → cocoindex-0.1.55.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/cli.py
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
import
|
1
|
+
import atexit
|
2
2
|
import datetime
|
3
|
-
import sys
|
4
3
|
import importlib.util
|
5
4
|
import os
|
6
|
-
import
|
5
|
+
import signal
|
6
|
+
import sys
|
7
|
+
import threading
|
7
8
|
import types
|
9
|
+
from types import FrameType
|
10
|
+
from typing import Any
|
8
11
|
|
9
|
-
|
12
|
+
import click
|
13
|
+
import watchfiles
|
14
|
+
from dotenv import find_dotenv, load_dotenv
|
10
15
|
from rich.console import Console
|
11
16
|
from rich.panel import Panel
|
12
17
|
from rich.table import Table
|
13
|
-
from typing import Any
|
14
18
|
|
15
19
|
from . import flow, lib, setting
|
16
|
-
from .setup import
|
20
|
+
from .setup import apply_setup_changes, drop_setup, flow_names_with_setup, sync_setup
|
17
21
|
|
18
22
|
# Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
|
19
23
|
COCOINDEX_HOST = "https://cocoindex.io"
|
@@ -116,6 +120,12 @@ def _load_user_app(app_target: str) -> types.ModuleType:
|
|
116
120
|
)
|
117
121
|
|
118
122
|
|
123
|
+
def _initialize_cocoindex_in_process() -> None:
|
124
|
+
settings = setting.Settings.from_env()
|
125
|
+
lib.init(settings)
|
126
|
+
atexit.register(lib.stop)
|
127
|
+
|
128
|
+
|
119
129
|
@click.group()
|
120
130
|
@click.version_option(package_name="cocoindex", message="%(prog)s version %(version)s")
|
121
131
|
@click.option(
|
@@ -128,7 +138,7 @@ def _load_user_app(app_target: str) -> types.ModuleType:
|
|
128
138
|
default=None,
|
129
139
|
show_default=False,
|
130
140
|
)
|
131
|
-
def cli(env_file: str | None) -> None:
|
141
|
+
def cli(env_file: str | None = None) -> None:
|
132
142
|
"""
|
133
143
|
CLI for Cocoindex.
|
134
144
|
"""
|
@@ -139,9 +149,7 @@ def cli(env_file: str | None) -> None:
|
|
139
149
|
click.echo(f"Loaded environment variables from: {loaded_env_path}", err=True)
|
140
150
|
|
141
151
|
try:
|
142
|
-
|
143
|
-
lib.init(settings)
|
144
|
-
atexit.register(lib.stop)
|
152
|
+
_initialize_cocoindex_in_process()
|
145
153
|
except Exception as e:
|
146
154
|
raise click.ClickException(f"Failed to initialize CocoIndex library: {e}")
|
147
155
|
|
@@ -485,6 +493,14 @@ def evaluate(
|
|
485
493
|
default=False,
|
486
494
|
help="Avoid printing anything to the standard output, e.g. statistics.",
|
487
495
|
)
|
496
|
+
@click.option(
|
497
|
+
"-r",
|
498
|
+
"--reload",
|
499
|
+
is_flag=True,
|
500
|
+
show_default=True,
|
501
|
+
default=False,
|
502
|
+
help="Enable auto-reload on code changes.",
|
503
|
+
)
|
488
504
|
def server(
|
489
505
|
app_target: str,
|
490
506
|
address: str | None,
|
@@ -493,6 +509,7 @@ def server(
|
|
493
509
|
cors_origin: str | None,
|
494
510
|
cors_cocoindex: bool,
|
495
511
|
cors_local: int | None,
|
512
|
+
reload: bool,
|
496
513
|
) -> None:
|
497
514
|
"""
|
498
515
|
Start a HTTP server providing REST APIs.
|
@@ -502,6 +519,58 @@ def server(
|
|
502
519
|
APP_TARGET: path/to/app.py or installed_module.
|
503
520
|
"""
|
504
521
|
app_ref = _get_app_ref_from_specifier(app_target)
|
522
|
+
args = (
|
523
|
+
app_ref,
|
524
|
+
address,
|
525
|
+
cors_origin,
|
526
|
+
cors_cocoindex,
|
527
|
+
cors_local,
|
528
|
+
live_update,
|
529
|
+
quiet,
|
530
|
+
)
|
531
|
+
|
532
|
+
if reload:
|
533
|
+
watch_paths = {os.getcwd()}
|
534
|
+
if os.path.isfile(app_ref):
|
535
|
+
watch_paths.add(os.path.dirname(os.path.abspath(app_ref)))
|
536
|
+
else:
|
537
|
+
try:
|
538
|
+
spec = importlib.util.find_spec(app_ref)
|
539
|
+
if spec and spec.origin:
|
540
|
+
watch_paths.add(os.path.dirname(os.path.abspath(spec.origin)))
|
541
|
+
except ImportError:
|
542
|
+
pass
|
543
|
+
|
544
|
+
watchfiles.run_process(
|
545
|
+
*watch_paths,
|
546
|
+
target=_reloadable_server_target,
|
547
|
+
args=args,
|
548
|
+
watch_filter=watchfiles.PythonFilter(),
|
549
|
+
callback=lambda changes: click.secho(
|
550
|
+
f"\nDetected changes in {len(changes)} file(s), reloading server...\n",
|
551
|
+
fg="cyan",
|
552
|
+
),
|
553
|
+
)
|
554
|
+
else:
|
555
|
+
_run_server(*args)
|
556
|
+
|
557
|
+
|
558
|
+
def _reloadable_server_target(*args: Any, **kwargs: Any) -> None:
|
559
|
+
"""Reloadable target for the watchfiles process."""
|
560
|
+
_initialize_cocoindex_in_process()
|
561
|
+
_run_server(*args, **kwargs)
|
562
|
+
|
563
|
+
|
564
|
+
def _run_server(
|
565
|
+
app_ref: str,
|
566
|
+
address: str | None = None,
|
567
|
+
cors_origin: str | None = None,
|
568
|
+
cors_cocoindex: bool = False,
|
569
|
+
cors_local: int | None = None,
|
570
|
+
live_update: bool = False,
|
571
|
+
quiet: bool = False,
|
572
|
+
) -> None:
|
573
|
+
"""Helper function to run the server with specified settings."""
|
505
574
|
_load_user_app(app_ref)
|
506
575
|
|
507
576
|
server_settings = setting.ServerSettings.from_env()
|
@@ -525,7 +594,17 @@ def server(
|
|
525
594
|
if live_update:
|
526
595
|
options = flow.FlowLiveUpdaterOptions(live_mode=True, print_stats=not quiet)
|
527
596
|
flow.update_all_flows(options)
|
528
|
-
|
597
|
+
|
598
|
+
click.secho("Press Ctrl+C to stop the server.", fg="yellow")
|
599
|
+
|
600
|
+
shutdown_event = threading.Event()
|
601
|
+
|
602
|
+
def handle_signal(signum: int, frame: FrameType | None) -> None:
|
603
|
+
shutdown_event.set()
|
604
|
+
|
605
|
+
signal.signal(signal.SIGINT, handle_signal)
|
606
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
607
|
+
shutdown_event.wait()
|
529
608
|
|
530
609
|
|
531
610
|
def _flow_name(name: str | None) -> str:
|
cocoindex/convert.py
CHANGED
@@ -14,6 +14,7 @@ import numpy as np
|
|
14
14
|
from .typing import (
|
15
15
|
KEY_FIELD_NAME,
|
16
16
|
TABLE_TYPES,
|
17
|
+
AnalyzedTypeInfo,
|
17
18
|
DtypeRegistry,
|
18
19
|
analyze_type_info,
|
19
20
|
encode_enriched_type,
|
@@ -41,11 +42,22 @@ def encode_engine_value(value: Any) -> Any:
|
|
41
42
|
return [
|
42
43
|
[encode_engine_value(k)] + encode_engine_value(v) for k, v in value.items()
|
43
44
|
]
|
44
|
-
if isinstance(value, uuid.UUID):
|
45
|
-
return value.bytes
|
46
45
|
return value
|
47
46
|
|
48
47
|
|
48
|
+
_CONVERTIBLE_KINDS = {
|
49
|
+
("Float32", "Float64"),
|
50
|
+
("LocalDateTime", "OffsetDateTime"),
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
def _is_type_kind_convertible_to(src_type_kind: str, dst_type_kind: str) -> bool:
|
55
|
+
return (
|
56
|
+
src_type_kind == dst_type_kind
|
57
|
+
or (src_type_kind, dst_type_kind) in _CONVERTIBLE_KINDS
|
58
|
+
)
|
59
|
+
|
60
|
+
|
49
61
|
def make_engine_value_decoder(
|
50
62
|
field_path: list[str],
|
51
63
|
src_type: dict[str, Any],
|
@@ -65,11 +77,20 @@ def make_engine_value_decoder(
|
|
65
77
|
|
66
78
|
src_type_kind = src_type["kind"]
|
67
79
|
|
80
|
+
dst_type_info: AnalyzedTypeInfo | None = None
|
68
81
|
if (
|
69
|
-
dst_annotation is None
|
70
|
-
|
71
|
-
|
82
|
+
dst_annotation is not None
|
83
|
+
and dst_annotation is not inspect.Parameter.empty
|
84
|
+
and dst_annotation is not Any
|
72
85
|
):
|
86
|
+
dst_type_info = analyze_type_info(dst_annotation)
|
87
|
+
if not _is_type_kind_convertible_to(src_type_kind, dst_type_info.kind):
|
88
|
+
raise ValueError(
|
89
|
+
f"Type mismatch for `{''.join(field_path)}`: "
|
90
|
+
f"passed in {src_type_kind}, declared {dst_annotation} ({dst_type_info.kind})"
|
91
|
+
)
|
92
|
+
|
93
|
+
if dst_type_info is None:
|
73
94
|
if src_type_kind == "Struct" or src_type_kind in TABLE_TYPES:
|
74
95
|
raise ValueError(
|
75
96
|
f"Missing type annotation for `{''.join(field_path)}`."
|
@@ -77,32 +98,66 @@ def make_engine_value_decoder(
|
|
77
98
|
)
|
78
99
|
return lambda value: value
|
79
100
|
|
80
|
-
dst_type_info
|
101
|
+
if dst_type_info.kind in ("Float32", "Float64", "Int64"):
|
102
|
+
dst_core_type = dst_type_info.core_type
|
81
103
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
104
|
+
def decode_scalar(value: Any) -> Any | None:
|
105
|
+
if value is None:
|
106
|
+
if dst_type_info.nullable:
|
107
|
+
return None
|
108
|
+
raise ValueError(
|
109
|
+
f"Received null for non-nullable scalar `{''.join(field_path)}`"
|
110
|
+
)
|
111
|
+
return dst_core_type(value)
|
87
112
|
|
88
|
-
|
89
|
-
|
90
|
-
|
113
|
+
return decode_scalar
|
114
|
+
|
115
|
+
if src_type_kind == "Vector":
|
116
|
+
field_path_str = "".join(field_path)
|
117
|
+
expected_dim = (
|
118
|
+
dst_type_info.vector_info.dim if dst_type_info.vector_info else None
|
91
119
|
)
|
92
120
|
|
93
|
-
|
94
|
-
|
121
|
+
elem_decoder = None
|
122
|
+
scalar_dtype = None
|
123
|
+
if dst_type_info.np_number_type is None: # for Non-NDArray vector
|
124
|
+
elem_decoder = make_engine_value_decoder(
|
125
|
+
field_path + ["[*]"],
|
126
|
+
src_type["element_type"],
|
127
|
+
dst_type_info.elem_type,
|
128
|
+
)
|
129
|
+
else: # for NDArray vector
|
130
|
+
scalar_dtype = extract_ndarray_scalar_dtype(dst_type_info.np_number_type)
|
131
|
+
_ = DtypeRegistry.validate_dtype_and_get_kind(scalar_dtype)
|
95
132
|
|
96
|
-
def
|
133
|
+
def decode_vector(value: Any) -> Any | None:
|
97
134
|
if value is None:
|
98
135
|
if dst_type_info.nullable:
|
99
136
|
return None
|
100
137
|
raise ValueError(
|
101
|
-
f"Received null for non-nullable
|
138
|
+
f"Received null for non-nullable vector `{field_path_str}`"
|
139
|
+
)
|
140
|
+
if not isinstance(value, (np.ndarray, list)):
|
141
|
+
raise TypeError(
|
142
|
+
f"Expected NDArray or list for vector `{field_path_str}`, got {type(value)}"
|
143
|
+
)
|
144
|
+
if expected_dim is not None and len(value) != expected_dim:
|
145
|
+
raise ValueError(
|
146
|
+
f"Vector dimension mismatch for `{field_path_str}`: "
|
147
|
+
f"expected {expected_dim}, got {len(value)}"
|
102
148
|
)
|
103
|
-
return numpy_type(value)
|
104
149
|
|
105
|
-
|
150
|
+
if elem_decoder is not None: # for Non-NDArray vector
|
151
|
+
return [elem_decoder(v) for v in value]
|
152
|
+
else: # for NDArray vector
|
153
|
+
return np.array(value, dtype=scalar_dtype)
|
154
|
+
|
155
|
+
return decode_vector
|
156
|
+
|
157
|
+
if dst_type_info.struct_type is not None:
|
158
|
+
return _make_engine_struct_value_decoder(
|
159
|
+
field_path, src_type["fields"], dst_type_info.struct_type
|
160
|
+
)
|
106
161
|
|
107
162
|
if src_type_kind in TABLE_TYPES:
|
108
163
|
field_path.append("[*]")
|
@@ -141,48 +196,8 @@ def make_engine_value_decoder(
|
|
141
196
|
field_path.pop()
|
142
197
|
return decode
|
143
198
|
|
144
|
-
if src_type_kind == "
|
145
|
-
return lambda value:
|
146
|
-
|
147
|
-
if src_type_kind == "Vector":
|
148
|
-
|
149
|
-
def decode_vector(value: Any) -> Any | None:
|
150
|
-
field_path_str = "".join(field_path)
|
151
|
-
expected_dim = (
|
152
|
-
dst_type_info.vector_info.dim if dst_type_info.vector_info else None
|
153
|
-
)
|
154
|
-
|
155
|
-
if value is None:
|
156
|
-
if dst_type_info.nullable:
|
157
|
-
return None
|
158
|
-
raise ValueError(
|
159
|
-
f"Received null for non-nullable vector `{field_path_str}`"
|
160
|
-
)
|
161
|
-
if not isinstance(value, (np.ndarray, list)):
|
162
|
-
raise TypeError(
|
163
|
-
f"Expected NDArray or list for vector `{field_path_str}`, got {type(value)}"
|
164
|
-
)
|
165
|
-
if expected_dim is not None and len(value) != expected_dim:
|
166
|
-
raise ValueError(
|
167
|
-
f"Vector dimension mismatch for `{field_path_str}`: "
|
168
|
-
f"expected {expected_dim}, got {len(value)}"
|
169
|
-
)
|
170
|
-
|
171
|
-
if dst_type_info.np_number_type is None: # for Non-NDArray vector
|
172
|
-
elem_decoder = make_engine_value_decoder(
|
173
|
-
field_path + ["[*]"],
|
174
|
-
src_type["element_type"],
|
175
|
-
dst_type_info.elem_type,
|
176
|
-
)
|
177
|
-
return [elem_decoder(v) for v in value]
|
178
|
-
else: # for NDArray vector
|
179
|
-
scalar_dtype = extract_ndarray_scalar_dtype(
|
180
|
-
dst_type_info.np_number_type
|
181
|
-
)
|
182
|
-
_ = DtypeRegistry.validate_dtype_and_get_kind(scalar_dtype)
|
183
|
-
return np.array(value, dtype=scalar_dtype)
|
184
|
-
|
185
|
-
return decode_vector
|
199
|
+
if src_type_kind == "Union":
|
200
|
+
return lambda value: value[1]
|
186
201
|
|
187
202
|
return lambda value: value
|
188
203
|
|
cocoindex/flow.py
CHANGED
@@ -92,6 +92,7 @@ def _spec_kind(spec: Any) -> str:
|
|
92
92
|
|
93
93
|
|
94
94
|
T = TypeVar("T")
|
95
|
+
S = TypeVar("S")
|
95
96
|
|
96
97
|
|
97
98
|
class _DataSliceState:
|
@@ -185,7 +186,7 @@ class DataSlice(Generic[T]):
|
|
185
186
|
|
186
187
|
def transform(
|
187
188
|
self, fn_spec: op.FunctionSpec, *args: Any, **kwargs: Any
|
188
|
-
) -> DataSlice[
|
189
|
+
) -> DataSlice[Any]:
|
189
190
|
"""
|
190
191
|
Apply a function to the data slice.
|
191
192
|
"""
|
@@ -216,7 +217,7 @@ class DataSlice(Generic[T]):
|
|
216
217
|
),
|
217
218
|
)
|
218
219
|
|
219
|
-
def call(self, func: Callable[
|
220
|
+
def call(self, func: Callable[..., S], *args: Any, **kwargs: Any) -> S:
|
220
221
|
"""
|
221
222
|
Call a function with the data slice.
|
222
223
|
"""
|
cocoindex/functions.py
CHANGED
@@ -32,6 +32,16 @@ class SplitRecursively(op.FunctionSpec):
|
|
32
32
|
custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
|
33
33
|
|
34
34
|
|
35
|
+
class EmbedText(op.FunctionSpec):
|
36
|
+
"""Embed a text into a vector space."""
|
37
|
+
|
38
|
+
api_type: llm.LlmApiType
|
39
|
+
model: str
|
40
|
+
address: str | None = None
|
41
|
+
output_dimension: int | None = None
|
42
|
+
task_type: str | None = None
|
43
|
+
|
44
|
+
|
35
45
|
class ExtractByLlm(op.FunctionSpec):
|
36
46
|
"""Extract information from a text using a LLM."""
|
37
47
|
|
cocoindex/llm.py
CHANGED
cocoindex/tests/__init__.py
CHANGED
@@ -1 +0,0 @@
|
|
1
|
-
|
cocoindex/tests/test_convert.py
CHANGED
@@ -91,23 +91,26 @@ def validate_full_roundtrip(
|
|
91
91
|
"""
|
92
92
|
from cocoindex import _engine # type: ignore
|
93
93
|
|
94
|
+
def eq(a: Any, b: Any) -> bool:
|
95
|
+
if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
|
96
|
+
return np.array_equal(a, b)
|
97
|
+
return type(a) == type(b) and not not (a == b)
|
98
|
+
|
94
99
|
encoded_value = encode_engine_value(value)
|
95
100
|
value_type = value_type or type(value)
|
96
101
|
encoded_output_type = encode_enriched_type(value_type)["type"]
|
97
102
|
value_from_engine = _engine.testutil.seder_roundtrip(
|
98
103
|
encoded_value, encoded_output_type
|
99
104
|
)
|
100
|
-
|
101
|
-
|
102
|
-
)
|
103
|
-
np.testing.assert_array_equal(decoded_value, value)
|
105
|
+
decoder = make_engine_value_decoder([], encoded_output_type, value_type)
|
106
|
+
decoded_value = decoder(value_from_engine)
|
107
|
+
assert eq(decoded_value, value)
|
104
108
|
|
105
109
|
if other_decoded_values is not None:
|
106
110
|
for other_value, other_type in other_decoded_values:
|
107
|
-
|
108
|
-
|
109
|
-
)
|
110
|
-
np.testing.assert_array_equal(other_decoded_value, other_value)
|
111
|
+
decoder = make_engine_value_decoder([], encoded_output_type, other_type)
|
112
|
+
other_decoded_value = decoder(value_from_engine)
|
113
|
+
assert eq(other_decoded_value, other_value)
|
111
114
|
|
112
115
|
|
113
116
|
def test_encode_engine_value_basic_types() -> None:
|
@@ -119,7 +122,7 @@ def test_encode_engine_value_basic_types() -> None:
|
|
119
122
|
|
120
123
|
def test_encode_engine_value_uuid() -> None:
|
121
124
|
u = uuid.uuid4()
|
122
|
-
assert encode_engine_value(u) == u
|
125
|
+
assert encode_engine_value(u) == u
|
123
126
|
|
124
127
|
|
125
128
|
def test_encode_engine_value_date_time_types() -> None:
|
@@ -215,19 +218,38 @@ def test_encode_engine_value_none() -> None:
|
|
215
218
|
|
216
219
|
|
217
220
|
def test_roundtrip_basic_types() -> None:
|
218
|
-
validate_full_roundtrip(42, int)
|
221
|
+
validate_full_roundtrip(42, int, (42, None))
|
219
222
|
validate_full_roundtrip(3.25, float, (3.25, Float64))
|
220
|
-
validate_full_roundtrip(3.25, Float64, (3.25, float))
|
221
|
-
validate_full_roundtrip(3.25, Float32)
|
222
|
-
validate_full_roundtrip("hello", str)
|
223
|
-
validate_full_roundtrip(True, bool)
|
224
|
-
validate_full_roundtrip(False, bool)
|
225
|
-
validate_full_roundtrip(datetime.date(2025, 1, 1), datetime.date)
|
226
|
-
validate_full_roundtrip(datetime.datetime.now(), cocoindex.LocalDateTime)
|
227
223
|
validate_full_roundtrip(
|
228
|
-
|
224
|
+
3.25, Float64, (3.25, float), (np.float64(3.25), np.float64)
|
225
|
+
)
|
226
|
+
validate_full_roundtrip(
|
227
|
+
3.25, Float32, (3.25, float), (np.float32(3.25), np.float32)
|
228
|
+
)
|
229
|
+
validate_full_roundtrip("hello", str, ("hello", None))
|
230
|
+
validate_full_roundtrip(True, bool, (True, None))
|
231
|
+
validate_full_roundtrip(False, bool, (False, None))
|
232
|
+
validate_full_roundtrip(
|
233
|
+
datetime.date(2025, 1, 1), datetime.date, (datetime.date(2025, 1, 1), None)
|
234
|
+
)
|
235
|
+
|
236
|
+
validate_full_roundtrip(
|
237
|
+
datetime.datetime(2025, 1, 2, 3, 4, 5, 123456),
|
238
|
+
cocoindex.LocalDateTime,
|
239
|
+
(datetime.datetime(2025, 1, 2, 3, 4, 5, 123456), datetime.datetime),
|
240
|
+
)
|
241
|
+
validate_full_roundtrip(
|
242
|
+
datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
|
243
|
+
cocoindex.OffsetDateTime,
|
244
|
+
(
|
245
|
+
datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
|
246
|
+
datetime.datetime,
|
247
|
+
),
|
229
248
|
)
|
230
249
|
|
250
|
+
uuid_value = uuid.uuid4()
|
251
|
+
validate_full_roundtrip(uuid_value, uuid.UUID, (uuid_value, None))
|
252
|
+
|
231
253
|
|
232
254
|
def test_decode_scalar_numpy_values() -> None:
|
233
255
|
test_cases = [
|
@@ -263,8 +285,8 @@ def test_non_ndarray_vector_decoding() -> None:
|
|
263
285
|
decoder = make_engine_value_decoder(["field"], src_type, dst_type_uuid)
|
264
286
|
uuid1 = uuid.uuid4()
|
265
287
|
uuid2 = uuid.uuid4()
|
266
|
-
|
267
|
-
result = decoder(
|
288
|
+
input_uuids = [uuid1, uuid2]
|
289
|
+
result = decoder(input_uuids)
|
268
290
|
assert isinstance(result, list)
|
269
291
|
assert all(isinstance(x, uuid.UUID) for x in result)
|
270
292
|
assert result == [uuid1, uuid2]
|
@@ -549,6 +571,48 @@ def test_field_position_cases(
|
|
549
571
|
assert decoder(engine_val) == PythonOrder(**expected_dict)
|
550
572
|
|
551
573
|
|
574
|
+
def test_roundtrip_union_simple() -> None:
|
575
|
+
t = int | str | float
|
576
|
+
value = 10.4
|
577
|
+
validate_full_roundtrip(value, t)
|
578
|
+
|
579
|
+
|
580
|
+
def test_roundtrip_union_with_active_uuid() -> None:
|
581
|
+
t = str | uuid.UUID | int
|
582
|
+
value = uuid.uuid4()
|
583
|
+
validate_full_roundtrip(value, t)
|
584
|
+
|
585
|
+
|
586
|
+
def test_roundtrip_union_with_inactive_uuid() -> None:
|
587
|
+
t = str | uuid.UUID | int
|
588
|
+
value = "5a9f8f6a-318f-4f1f-929d-566d7444a62d" # it's a string
|
589
|
+
validate_full_roundtrip(value, t)
|
590
|
+
|
591
|
+
|
592
|
+
def test_roundtrip_union_offset_datetime() -> None:
|
593
|
+
t = str | uuid.UUID | float | int | datetime.datetime
|
594
|
+
value = datetime.datetime.now(datetime.UTC)
|
595
|
+
validate_full_roundtrip(value, t)
|
596
|
+
|
597
|
+
|
598
|
+
def test_roundtrip_union_date() -> None:
|
599
|
+
t = str | uuid.UUID | float | int | datetime.date
|
600
|
+
value = datetime.date.today()
|
601
|
+
validate_full_roundtrip(value, t)
|
602
|
+
|
603
|
+
|
604
|
+
def test_roundtrip_union_time() -> None:
|
605
|
+
t = str | uuid.UUID | float | int | datetime.time
|
606
|
+
value = datetime.time()
|
607
|
+
validate_full_roundtrip(value, t)
|
608
|
+
|
609
|
+
|
610
|
+
def test_roundtrip_union_timedelta() -> None:
|
611
|
+
t = str | uuid.UUID | float | int | datetime.timedelta
|
612
|
+
value = datetime.timedelta(hours=39, minutes=10, seconds=1)
|
613
|
+
validate_full_roundtrip(value, t)
|
614
|
+
|
615
|
+
|
552
616
|
def test_roundtrip_ltable() -> None:
|
553
617
|
t = list[Order]
|
554
618
|
value = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)]
|
@@ -807,37 +871,72 @@ def test_dump_vector_type_annotation_no_dim() -> None:
|
|
807
871
|
|
808
872
|
def test_full_roundtrip_vector_numeric_types() -> None:
|
809
873
|
"""Test full roundtrip for numeric vector types using NDArray."""
|
810
|
-
value_f32
|
811
|
-
|
874
|
+
value_f32 = np.array([1.0, 2.0, 3.0], dtype=np.float32)
|
875
|
+
validate_full_roundtrip(
|
876
|
+
value_f32,
|
877
|
+
Vector[np.float32, Literal[3]],
|
878
|
+
([np.float32(1.0), np.float32(2.0), np.float32(3.0)], list[np.float32]),
|
879
|
+
([1.0, 2.0, 3.0], list[cocoindex.Float32]),
|
880
|
+
([1.0, 2.0, 3.0], list[float]),
|
812
881
|
)
|
813
|
-
validate_full_roundtrip(
|
814
|
-
|
815
|
-
|
882
|
+
validate_full_roundtrip(
|
883
|
+
value_f32,
|
884
|
+
np.typing.NDArray[np.float32],
|
885
|
+
([np.float32(1.0), np.float32(2.0), np.float32(3.0)], list[np.float32]),
|
886
|
+
([1.0, 2.0, 3.0], list[cocoindex.Float32]),
|
887
|
+
([1.0, 2.0, 3.0], list[float]),
|
816
888
|
)
|
817
|
-
validate_full_roundtrip(
|
818
|
-
|
819
|
-
|
820
|
-
|
889
|
+
validate_full_roundtrip(
|
890
|
+
value_f32.tolist(),
|
891
|
+
list[np.float32],
|
892
|
+
(value_f32, Vector[np.float32, Literal[3]]),
|
893
|
+
([1.0, 2.0, 3.0], list[cocoindex.Float32]),
|
894
|
+
([1.0, 2.0, 3.0], list[float]),
|
895
|
+
)
|
896
|
+
|
897
|
+
value_f64 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
898
|
+
validate_full_roundtrip(
|
899
|
+
value_f64,
|
900
|
+
Vector[np.float64, Literal[3]],
|
901
|
+
([np.float64(1.0), np.float64(2.0), np.float64(3.0)], list[np.float64]),
|
902
|
+
([1.0, 2.0, 3.0], list[cocoindex.Float64]),
|
903
|
+
([1.0, 2.0, 3.0], list[float]),
|
904
|
+
)
|
905
|
+
|
906
|
+
value_i64 = np.array([1, 2, 3], dtype=np.int64)
|
907
|
+
validate_full_roundtrip(
|
908
|
+
value_i64,
|
909
|
+
Vector[np.int64, Literal[3]],
|
910
|
+
([np.int64(1), np.int64(2), np.int64(3)], list[np.int64]),
|
911
|
+
([1, 2, 3], list[int]),
|
912
|
+
)
|
913
|
+
|
914
|
+
value_i32 = np.array([1, 2, 3], dtype=np.int32)
|
821
915
|
with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
|
822
916
|
validate_full_roundtrip(value_i32, Vector[np.int32, Literal[3]])
|
823
|
-
value_u8
|
917
|
+
value_u8 = np.array([1, 2, 3], dtype=np.uint8)
|
824
918
|
with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
|
825
919
|
validate_full_roundtrip(value_u8, Vector[np.uint8, Literal[3]])
|
826
|
-
value_u16
|
920
|
+
value_u16 = np.array([1, 2, 3], dtype=np.uint16)
|
827
921
|
with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
|
828
922
|
validate_full_roundtrip(value_u16, Vector[np.uint16, Literal[3]])
|
829
|
-
value_u32
|
923
|
+
value_u32 = np.array([1, 2, 3], dtype=np.uint32)
|
830
924
|
with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
|
831
925
|
validate_full_roundtrip(value_u32, Vector[np.uint32, Literal[3]])
|
832
|
-
value_u64
|
926
|
+
value_u64 = np.array([1, 2, 3], dtype=np.uint64)
|
833
927
|
with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
|
834
928
|
validate_full_roundtrip(value_u64, Vector[np.uint64, Literal[3]])
|
835
929
|
|
836
930
|
|
837
931
|
def test_roundtrip_vector_no_dimension() -> None:
|
838
932
|
"""Test full roundtrip for vector types without dimension annotation."""
|
839
|
-
value_f64
|
840
|
-
validate_full_roundtrip(
|
933
|
+
value_f64 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
934
|
+
validate_full_roundtrip(
|
935
|
+
value_f64,
|
936
|
+
Vector[np.float64],
|
937
|
+
([1.0, 2.0, 3.0], list[float]),
|
938
|
+
(np.array([1.0, 2.0, 3.0], dtype=np.float64), np.typing.NDArray[np.float64]),
|
939
|
+
)
|
841
940
|
|
842
941
|
|
843
942
|
def test_roundtrip_string_vector() -> None:
|
@@ -862,9 +961,9 @@ def test_roundtrip_dimension_mismatch() -> None:
|
|
862
961
|
def test_full_roundtrip_scalar_numeric_types() -> None:
|
863
962
|
"""Test full roundtrip for scalar NumPy numeric types."""
|
864
963
|
# Test supported scalar types
|
865
|
-
validate_full_roundtrip(np.int64(42), np.int64)
|
866
|
-
validate_full_roundtrip(np.float32(3.
|
867
|
-
validate_full_roundtrip(np.float64(
|
964
|
+
validate_full_roundtrip(np.int64(42), np.int64, (42, int))
|
965
|
+
validate_full_roundtrip(np.float32(3.25), np.float32, (3.25, cocoindex.Float32))
|
966
|
+
validate_full_roundtrip(np.float64(3.25), np.float64, (3.25, cocoindex.Float64))
|
868
967
|
|
869
968
|
# Test unsupported scalar types
|
870
969
|
for unsupported_type in [np.int32, np.uint8, np.uint16, np.uint32, np.uint64]:
|
cocoindex/tests/test_typing.py
CHANGED
@@ -2,7 +2,7 @@ import dataclasses
|
|
2
2
|
import datetime
|
3
3
|
import uuid
|
4
4
|
from collections.abc import Mapping, Sequence
|
5
|
-
from typing import Annotated, Any,
|
5
|
+
from typing import Annotated, Any, Literal, NamedTuple, get_args, get_origin
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pytest
|
@@ -162,10 +162,11 @@ def test_ndarray_any_dtype() -> None:
|
|
162
162
|
|
163
163
|
|
164
164
|
def test_list_of_primitives() -> None:
|
165
|
-
typ =
|
165
|
+
typ = list[str]
|
166
166
|
result = analyze_type_info(typ)
|
167
167
|
assert result == AnalyzedTypeInfo(
|
168
168
|
kind="Vector",
|
169
|
+
core_type=list[str],
|
169
170
|
vector_info=VectorInfo(dim=None),
|
170
171
|
elem_type=str,
|
171
172
|
key_type=None,
|
@@ -177,10 +178,11 @@ def test_list_of_primitives() -> None:
|
|
177
178
|
|
178
179
|
|
179
180
|
def test_list_of_structs() -> None:
|
180
|
-
typ =
|
181
|
+
typ = list[SimpleDataclass]
|
181
182
|
result = analyze_type_info(typ)
|
182
183
|
assert result == AnalyzedTypeInfo(
|
183
184
|
kind="LTable",
|
185
|
+
core_type=list[SimpleDataclass],
|
184
186
|
vector_info=None,
|
185
187
|
elem_type=SimpleDataclass,
|
186
188
|
key_type=None,
|
@@ -196,6 +198,7 @@ def test_sequence_of_int() -> None:
|
|
196
198
|
result = analyze_type_info(typ)
|
197
199
|
assert result == AnalyzedTypeInfo(
|
198
200
|
kind="Vector",
|
201
|
+
core_type=Sequence[int],
|
199
202
|
vector_info=VectorInfo(dim=None),
|
200
203
|
elem_type=int,
|
201
204
|
key_type=None,
|
@@ -207,10 +210,11 @@ def test_sequence_of_int() -> None:
|
|
207
210
|
|
208
211
|
|
209
212
|
def test_list_with_vector_info() -> None:
|
210
|
-
typ = Annotated[
|
213
|
+
typ = Annotated[list[int], VectorInfo(dim=5)]
|
211
214
|
result = analyze_type_info(typ)
|
212
215
|
assert result == AnalyzedTypeInfo(
|
213
216
|
kind="Vector",
|
217
|
+
core_type=list[int],
|
214
218
|
vector_info=VectorInfo(dim=5),
|
215
219
|
elem_type=int,
|
216
220
|
key_type=None,
|
@@ -222,10 +226,11 @@ def test_list_with_vector_info() -> None:
|
|
222
226
|
|
223
227
|
|
224
228
|
def test_dict_str_int() -> None:
|
225
|
-
typ =
|
229
|
+
typ = dict[str, int]
|
226
230
|
result = analyze_type_info(typ)
|
227
231
|
assert result == AnalyzedTypeInfo(
|
228
232
|
kind="KTable",
|
233
|
+
core_type=dict[str, int],
|
229
234
|
vector_info=None,
|
230
235
|
elem_type=(str, int),
|
231
236
|
key_type=None,
|
@@ -241,6 +246,7 @@ def test_mapping_str_dataclass() -> None:
|
|
241
246
|
result = analyze_type_info(typ)
|
242
247
|
assert result == AnalyzedTypeInfo(
|
243
248
|
kind="KTable",
|
249
|
+
core_type=Mapping[str, SimpleDataclass],
|
244
250
|
vector_info=None,
|
245
251
|
elem_type=(str, SimpleDataclass),
|
246
252
|
key_type=None,
|
@@ -256,6 +262,7 @@ def test_dataclass() -> None:
|
|
256
262
|
result = analyze_type_info(typ)
|
257
263
|
assert result == AnalyzedTypeInfo(
|
258
264
|
kind="Struct",
|
265
|
+
core_type=SimpleDataclass,
|
259
266
|
vector_info=None,
|
260
267
|
elem_type=None,
|
261
268
|
key_type=None,
|
@@ -271,6 +278,7 @@ def test_named_tuple() -> None:
|
|
271
278
|
result = analyze_type_info(typ)
|
272
279
|
assert result == AnalyzedTypeInfo(
|
273
280
|
kind="Struct",
|
281
|
+
core_type=SimpleNamedTuple,
|
274
282
|
vector_info=None,
|
275
283
|
elem_type=None,
|
276
284
|
key_type=None,
|
@@ -286,6 +294,7 @@ def test_tuple_key_value() -> None:
|
|
286
294
|
result = analyze_type_info(typ)
|
287
295
|
assert result == AnalyzedTypeInfo(
|
288
296
|
kind="Int64",
|
297
|
+
core_type=int,
|
289
298
|
vector_info=None,
|
290
299
|
elem_type=None,
|
291
300
|
key_type=str,
|
@@ -301,6 +310,7 @@ def test_str() -> None:
|
|
301
310
|
result = analyze_type_info(typ)
|
302
311
|
assert result == AnalyzedTypeInfo(
|
303
312
|
kind="Str",
|
313
|
+
core_type=str,
|
304
314
|
vector_info=None,
|
305
315
|
elem_type=None,
|
306
316
|
key_type=None,
|
@@ -316,6 +326,7 @@ def test_bool() -> None:
|
|
316
326
|
result = analyze_type_info(typ)
|
317
327
|
assert result == AnalyzedTypeInfo(
|
318
328
|
kind="Bool",
|
329
|
+
core_type=bool,
|
319
330
|
vector_info=None,
|
320
331
|
elem_type=None,
|
321
332
|
key_type=None,
|
@@ -331,6 +342,7 @@ def test_bytes() -> None:
|
|
331
342
|
result = analyze_type_info(typ)
|
332
343
|
assert result == AnalyzedTypeInfo(
|
333
344
|
kind="Bytes",
|
345
|
+
core_type=bytes,
|
334
346
|
vector_info=None,
|
335
347
|
elem_type=None,
|
336
348
|
key_type=None,
|
@@ -346,6 +358,7 @@ def test_uuid() -> None:
|
|
346
358
|
result = analyze_type_info(typ)
|
347
359
|
assert result == AnalyzedTypeInfo(
|
348
360
|
kind="Uuid",
|
361
|
+
core_type=uuid.UUID,
|
349
362
|
vector_info=None,
|
350
363
|
elem_type=None,
|
351
364
|
key_type=None,
|
@@ -361,6 +374,7 @@ def test_date() -> None:
|
|
361
374
|
result = analyze_type_info(typ)
|
362
375
|
assert result == AnalyzedTypeInfo(
|
363
376
|
kind="Date",
|
377
|
+
core_type=datetime.date,
|
364
378
|
vector_info=None,
|
365
379
|
elem_type=None,
|
366
380
|
key_type=None,
|
@@ -376,6 +390,7 @@ def test_time() -> None:
|
|
376
390
|
result = analyze_type_info(typ)
|
377
391
|
assert result == AnalyzedTypeInfo(
|
378
392
|
kind="Time",
|
393
|
+
core_type=datetime.time,
|
379
394
|
vector_info=None,
|
380
395
|
elem_type=None,
|
381
396
|
key_type=None,
|
@@ -391,6 +406,7 @@ def test_timedelta() -> None:
|
|
391
406
|
result = analyze_type_info(typ)
|
392
407
|
assert result == AnalyzedTypeInfo(
|
393
408
|
kind="TimeDelta",
|
409
|
+
core_type=datetime.timedelta,
|
394
410
|
vector_info=None,
|
395
411
|
elem_type=None,
|
396
412
|
key_type=None,
|
@@ -406,6 +422,7 @@ def test_float() -> None:
|
|
406
422
|
result = analyze_type_info(typ)
|
407
423
|
assert result == AnalyzedTypeInfo(
|
408
424
|
kind="Float64",
|
425
|
+
core_type=float,
|
409
426
|
vector_info=None,
|
410
427
|
elem_type=None,
|
411
428
|
key_type=None,
|
@@ -421,6 +438,7 @@ def test_int() -> None:
|
|
421
438
|
result = analyze_type_info(typ)
|
422
439
|
assert result == AnalyzedTypeInfo(
|
423
440
|
kind="Int64",
|
441
|
+
core_type=int,
|
424
442
|
vector_info=None,
|
425
443
|
elem_type=None,
|
426
444
|
key_type=None,
|
@@ -436,6 +454,7 @@ def test_type_with_attributes() -> None:
|
|
436
454
|
result = analyze_type_info(typ)
|
437
455
|
assert result == AnalyzedTypeInfo(
|
438
456
|
kind="Str",
|
457
|
+
core_type=str,
|
439
458
|
vector_info=None,
|
440
459
|
elem_type=None,
|
441
460
|
key_type=None,
|
@@ -472,7 +491,7 @@ def test_encode_enriched_type_vector() -> None:
|
|
472
491
|
|
473
492
|
|
474
493
|
def test_encode_enriched_type_ltable() -> None:
|
475
|
-
typ =
|
494
|
+
typ = list[SimpleDataclass]
|
476
495
|
result = encode_enriched_type(typ)
|
477
496
|
assert result["type"]["kind"] == "LTable"
|
478
497
|
assert result["type"]["row"]["kind"] == "Struct"
|
@@ -513,7 +532,7 @@ def test_invalid_struct_kind() -> None:
|
|
513
532
|
|
514
533
|
|
515
534
|
def test_invalid_list_kind() -> None:
|
516
|
-
typ = Annotated[
|
535
|
+
typ = Annotated[list[int], TypeKind("Struct")]
|
517
536
|
with pytest.raises(ValueError, match="Unexpected type kind for list: Struct"):
|
518
537
|
analyze_type_info(typ)
|
519
538
|
|
cocoindex/typing.py
CHANGED
@@ -150,6 +150,7 @@ class AnalyzedTypeInfo:
|
|
150
150
|
"""
|
151
151
|
|
152
152
|
kind: str
|
153
|
+
core_type: Any
|
153
154
|
vector_info: VectorInfo | None # For Vector
|
154
155
|
elem_type: ElementType | None # For Vector and Table
|
155
156
|
|
@@ -161,6 +162,7 @@ class AnalyzedTypeInfo:
|
|
161
162
|
|
162
163
|
attrs: dict[str, Any] | None
|
163
164
|
nullable: bool = False
|
165
|
+
union_variant_types: typing.List[ElementType] | None = None # For Union
|
164
166
|
|
165
167
|
|
166
168
|
def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
@@ -181,18 +183,6 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
181
183
|
if base_type is Annotated:
|
182
184
|
annotations = t.__metadata__
|
183
185
|
t = t.__origin__
|
184
|
-
elif base_type is types.UnionType:
|
185
|
-
possible_types = typing.get_args(t)
|
186
|
-
non_none_types = [
|
187
|
-
arg for arg in possible_types if arg not in (None, types.NoneType)
|
188
|
-
]
|
189
|
-
if len(non_none_types) != 1:
|
190
|
-
raise ValueError(
|
191
|
-
f"Expect exactly one non-None choice for Union type, but got {len(non_none_types)}: {t}"
|
192
|
-
)
|
193
|
-
t = non_none_types[0]
|
194
|
-
if len(possible_types) > 1:
|
195
|
-
nullable = True
|
196
186
|
else:
|
197
187
|
break
|
198
188
|
|
@@ -211,6 +201,7 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
211
201
|
|
212
202
|
struct_type: type | None = None
|
213
203
|
elem_type: ElementType | None = None
|
204
|
+
union_variant_types: typing.List[ElementType] | None = None
|
214
205
|
key_type: type | None = None
|
215
206
|
np_number_type: type | None = None
|
216
207
|
if _is_struct_type(t):
|
@@ -251,6 +242,24 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
251
242
|
args = typing.get_args(t)
|
252
243
|
elem_type = (args[0], args[1])
|
253
244
|
kind = "KTable"
|
245
|
+
elif base_type is types.UnionType:
|
246
|
+
possible_types = typing.get_args(t)
|
247
|
+
non_none_types = [
|
248
|
+
arg for arg in possible_types if arg not in (None, types.NoneType)
|
249
|
+
]
|
250
|
+
|
251
|
+
if len(non_none_types) == 0:
|
252
|
+
return analyze_type_info(None)
|
253
|
+
|
254
|
+
nullable = len(non_none_types) < len(possible_types)
|
255
|
+
|
256
|
+
if len(non_none_types) == 1:
|
257
|
+
result = analyze_type_info(non_none_types[0])
|
258
|
+
result.nullable = nullable
|
259
|
+
return result
|
260
|
+
|
261
|
+
kind = "Union"
|
262
|
+
union_variant_types = non_none_types
|
254
263
|
elif kind is None:
|
255
264
|
if t is bytes:
|
256
265
|
kind = "Bytes"
|
@@ -277,8 +286,10 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
277
286
|
|
278
287
|
return AnalyzedTypeInfo(
|
279
288
|
kind=kind,
|
289
|
+
core_type=t,
|
280
290
|
vector_info=vector_info,
|
281
291
|
elem_type=elem_type,
|
292
|
+
union_variant_types=union_variant_types,
|
282
293
|
key_type=key_type,
|
283
294
|
struct_type=struct_type,
|
284
295
|
np_number_type=np_number_type,
|
@@ -338,6 +349,14 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
|
|
338
349
|
encoded_type["element_type"] = _encode_type(elem_type_info)
|
339
350
|
encoded_type["dimension"] = type_info.vector_info.dim
|
340
351
|
|
352
|
+
elif type_info.kind == "Union":
|
353
|
+
if type_info.union_variant_types is None:
|
354
|
+
raise ValueError("Union type must have a variant type list")
|
355
|
+
encoded_type["types"] = [
|
356
|
+
_encode_type(analyze_type_info(typ))
|
357
|
+
for typ in type_info.union_variant_types
|
358
|
+
]
|
359
|
+
|
341
360
|
elif type_info.kind in TABLE_TYPES:
|
342
361
|
if type_info.elem_type is None:
|
343
362
|
raise ValueError(f"{type_info.kind} type must have an element type")
|
@@ -1,12 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.55
|
4
4
|
Requires-Dist: sentence-transformers>=3.3.1
|
5
5
|
Requires-Dist: click>=8.1.8
|
6
6
|
Requires-Dist: rich>=14.0.0
|
7
7
|
Requires-Dist: python-dotenv>=1.1.0
|
8
|
+
Requires-Dist: watchfiles>=1.1.0
|
8
9
|
Requires-Dist: pytest ; extra == 'test'
|
9
10
|
Requires-Dist: ruff ; extra == 'dev'
|
11
|
+
Requires-Dist: pre-commit ; extra == 'dev'
|
10
12
|
Provides-Extra: test
|
11
13
|
Provides-Extra: dev
|
12
14
|
License-File: LICENSE
|
@@ -51,10 +53,10 @@ Unlike a workflow orchestration framework where data is usually opaque, in CocoI
|
|
51
53
|
|
52
54
|
```python
|
53
55
|
# import
|
54
|
-
data['content'] = flow_builder.add_source(...)
|
56
|
+
data['content'] = flow_builder.add_source(...)
|
55
57
|
|
56
58
|
# transform
|
57
|
-
data['out'] = data['content']
|
59
|
+
data['out'] = data['content']
|
58
60
|
.transform(...)
|
59
61
|
.transform(...)
|
60
62
|
|
@@ -75,17 +77,17 @@ As a data framework, CocoIndex takes it to the next level on data freshness. **I
|
|
75
77
|
The frameworks takes care of
|
76
78
|
- Change data capture.
|
77
79
|
- Figure out what exactly needs to be updated, and only updating that without having to recompute everything.
|
78
|
-
|
80
|
+
|
79
81
|
This makes it fast to reflect any source updates to the target store. If you have concerns with surfacing stale data to AI agents and are spending lots of efforts working on infra piece to optimize the latency, the framework actually handles it for you.
|
80
82
|
|
81
83
|
|
82
84
|
## Quick Start:
|
83
|
-
If you're new to CocoIndex, we recommend checking out
|
85
|
+
If you're new to CocoIndex, we recommend checking out
|
84
86
|
- 📖 [Documentation](https://cocoindex.io/docs)
|
85
87
|
- ⚡ [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart)
|
86
|
-
- 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
|
88
|
+
- 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
|
87
89
|
|
88
|
-
### Setup
|
90
|
+
### Setup
|
89
91
|
|
90
92
|
1. Install CocoIndex Python library
|
91
93
|
|
@@ -155,8 +157,8 @@ It defines an index flow like this:
|
|
155
157
|
| [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
|
156
158
|
| [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
|
157
159
|
| [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
|
158
|
-
| [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
|
159
|
-
| [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
|
160
|
+
| [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
|
161
|
+
| [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
|
160
162
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
161
163
|
|
162
164
|
More coming and stay tuned 👀!
|
@@ -178,7 +180,7 @@ Join our community here:
|
|
178
180
|
- 📜 [Read our blog posts](https://cocoindex.io/blogs/)
|
179
181
|
|
180
182
|
## Support us:
|
181
|
-
We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
|
183
|
+
We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
|
182
184
|
|
183
185
|
## License
|
184
186
|
CocoIndex is Apache 2.0 licensed.
|
@@ -0,0 +1,28 @@
|
|
1
|
+
cocoindex-0.1.55.dist-info/METADATA,sha256=uZ5mri9lCaVS5sLMfY1c8lFIyaAHI2duMFy_Y6sxh_Q,10105
|
2
|
+
cocoindex-0.1.55.dist-info/WHEEL,sha256=j4qqVNEKdft0WYe2NEymIjSlGSeJ1BogQYEaMx5ZvPI,96
|
3
|
+
cocoindex-0.1.55.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.1.55.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
5
|
+
cocoindex/__init__.py,sha256=0cJBLw3MQX7MeuurZ49TV96zdKkSCva9atqxJZG0U2M,1853
|
6
|
+
cocoindex/_engine.cp311-win_amd64.pyd,sha256=aoFG_VuYjPnC-04UOhXaNaqFEoAk1QPKG7E02FnuWpQ,61743616
|
7
|
+
cocoindex/auth_registry.py,sha256=LojDKoX0ccO-G3bboFMlAti50_t5GK9BS0ouPJZfyUs,745
|
8
|
+
cocoindex/cli.py,sha256=Zw_TJalwAxIR1VDvUGNUMqfgqcdrHH-yP20ppax-xpE,20995
|
9
|
+
cocoindex/convert.py,sha256=Gvy2bw0YrhOhqx7EQpa-bZ1TxWREy6ur2sKwMK9J_h4,10522
|
10
|
+
cocoindex/flow.py,sha256=H54uyDSrJ-akBrzv3Y-ncr2hCbSeg55FZ9tp_Wmt3Gs,30992
|
11
|
+
cocoindex/functions.py,sha256=1P8UhXlSS59zmBS0L7ltK0Elbo9VKw_T1M_O_ASGnWQ,2710
|
12
|
+
cocoindex/index.py,sha256=GrqTm1rLwICQ8hadtNvJAxVg7GWMvtMmFcbiNtNzmP0,569
|
13
|
+
cocoindex/lib.py,sha256=o2UGq3eWsZbK5nusZEU7Y0R6NTbT0i03G2ye8N6ATNg,3015
|
14
|
+
cocoindex/llm.py,sha256=zc-5o6qWo8KBXa6a533jbmad5QoSBtJL9b7bj9SFehY,453
|
15
|
+
cocoindex/op.py,sha256=Jk1KfRNBY4TEsbbhWHB5pEzNcMo_2T-FQR1Y75OUVhU,12143
|
16
|
+
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
cocoindex/runtime.py,sha256=saKEZntVwUVQNASRhiO9bHRVIFmQccemq2f9mo4mo1A,1090
|
18
|
+
cocoindex/setting.py,sha256=dRNdX-rPBn321zGx6GGoSMggS4F2879A6EBLOUbX8R4,3717
|
19
|
+
cocoindex/setup.py,sha256=nqJAEGQH-5yTulEy3aCAa9khbuiaqD81ZZUdeM3K_lo,799
|
20
|
+
cocoindex/sources.py,sha256=4hxsntuyClp_jKH4oZbx3iE3UM4P2bZTpWy28dqdyFY,1375
|
21
|
+
cocoindex/targets.py,sha256=7FfG9kuEf5KTXtLwXMFaPFIut3PsIbpb3XIEjjeF7Bg,2931
|
22
|
+
cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
+
cocoindex/tests/test_convert.py,sha256=npvLoWfZenQn4U-7odrP_CG7R3quwZWI0IEMNMDD5H0,37195
|
24
|
+
cocoindex/tests/test_optional_database.py,sha256=dnzmTgaJf37D3q8fQsjP5UDER6FYETaUokDnFBMLtIk,8755
|
25
|
+
cocoindex/tests/test_typing.py,sha256=6W2NQmyTj4LMuWegV5m4NVP2clVNrUa5eD28_3nwzjs,15300
|
26
|
+
cocoindex/typing.py,sha256=T5BsXOArgXK4yoDSh9Fo-dzXGYYgsnRhLVOH1Z_42Ig,12985
|
27
|
+
cocoindex/utils.py,sha256=U3W39zD2uZpXX8v84tJD7sRmbC5ar3z_ljAP1cJrYXI,618
|
28
|
+
cocoindex-0.1.55.dist-info/RECORD,,
|
@@ -1,28 +0,0 @@
|
|
1
|
-
cocoindex-0.1.53.dist-info/METADATA,sha256=tsTjQi0dIr3mU1aTxOcDs2xyChKVauyGIkT4CN1UwuE,10039
|
2
|
-
cocoindex-0.1.53.dist-info/WHEEL,sha256=s0dDfj2AxxVsNT4qxjje5WVH9nG_gFfyCM_ZRxv6bRM,96
|
3
|
-
cocoindex-0.1.53.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
-
cocoindex-0.1.53.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
5
|
-
cocoindex/__init__.py,sha256=0cJBLw3MQX7MeuurZ49TV96zdKkSCva9atqxJZG0U2M,1853
|
6
|
-
cocoindex/_engine.cp311-win_amd64.pyd,sha256=_d7PBH4pnG2CqDJGtNAEUIF-T5C0SOOkrBipZwvZ0hM,61381120
|
7
|
-
cocoindex/auth_registry.py,sha256=LojDKoX0ccO-G3bboFMlAti50_t5GK9BS0ouPJZfyUs,745
|
8
|
-
cocoindex/cli.py,sha256=G69aDjYiT6wWJIG2l-VQAslfdxVE_OmkWQzZdR3KXiw,18798
|
9
|
-
cocoindex/convert.py,sha256=yRUQaiTuLwC6rHJZI7g1gnqsZWefBiD_9vPgHxGa5Ow,10066
|
10
|
-
cocoindex/flow.py,sha256=Dt6lzzhZgnnNbFOZ0smeDy6SlBCBofneoCBZ-T3rtIg,30983
|
11
|
-
cocoindex/functions.py,sha256=3l7POrvuk5DVIwGUgCODAi-JNFJ1_WLTOg6Yn-uZ0IE,2471
|
12
|
-
cocoindex/index.py,sha256=GrqTm1rLwICQ8hadtNvJAxVg7GWMvtMmFcbiNtNzmP0,569
|
13
|
-
cocoindex/lib.py,sha256=o2UGq3eWsZbK5nusZEU7Y0R6NTbT0i03G2ye8N6ATNg,3015
|
14
|
-
cocoindex/llm.py,sha256=bvdI0dzU0DV_56xfyHnRKv1E75aEm_qDZ82EqN1MDQ4,430
|
15
|
-
cocoindex/op.py,sha256=Jk1KfRNBY4TEsbbhWHB5pEzNcMo_2T-FQR1Y75OUVhU,12143
|
16
|
-
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
cocoindex/runtime.py,sha256=saKEZntVwUVQNASRhiO9bHRVIFmQccemq2f9mo4mo1A,1090
|
18
|
-
cocoindex/setting.py,sha256=dRNdX-rPBn321zGx6GGoSMggS4F2879A6EBLOUbX8R4,3717
|
19
|
-
cocoindex/setup.py,sha256=nqJAEGQH-5yTulEy3aCAa9khbuiaqD81ZZUdeM3K_lo,799
|
20
|
-
cocoindex/sources.py,sha256=4hxsntuyClp_jKH4oZbx3iE3UM4P2bZTpWy28dqdyFY,1375
|
21
|
-
cocoindex/targets.py,sha256=7FfG9kuEf5KTXtLwXMFaPFIut3PsIbpb3XIEjjeF7Bg,2931
|
22
|
-
cocoindex/tests/__init__.py,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
23
|
-
cocoindex/tests/test_convert.py,sha256=7pcYPICPOgMA4SjXK4jsTutLMRyBHxhuXgZpZdjeDew,34181
|
24
|
-
cocoindex/tests/test_optional_database.py,sha256=dnzmTgaJf37D3q8fQsjP5UDER6FYETaUokDnFBMLtIk,8755
|
25
|
-
cocoindex/tests/test_typing.py,sha256=XX_d1q5IUWcPANsp2oKZb7JI4DjVBVt1U7FwwEy9igo,14708
|
26
|
-
cocoindex/typing.py,sha256=Vc51BobrtswtX_sNSuSiWc4iiHeafL6dXqhbNo0iKXc,12385
|
27
|
-
cocoindex/utils.py,sha256=U3W39zD2uZpXX8v84tJD7sRmbC5ar3z_ljAP1cJrYXI,618
|
28
|
-
cocoindex-0.1.53.dist-info/RECORD,,
|
File without changes
|
File without changes
|