cocoindex 0.1.49__cp312-cp312-win_amd64.whl → 0.1.51__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +52 -1
- cocoindex/_engine.cp312-win_amd64.pyd +0 -0
- cocoindex/cli.py +22 -4
- cocoindex/convert.py +41 -1
- cocoindex/functions.py +6 -4
- cocoindex/lib.py +1 -2
- cocoindex/setting.py +10 -6
- cocoindex/tests/test_convert.py +359 -84
- cocoindex/tests/test_optional_database.py +249 -0
- cocoindex/tests/test_typing.py +505 -0
- cocoindex/typing.py +92 -17
- {cocoindex-0.1.49.dist-info → cocoindex-0.1.51.dist-info}/METADATA +1 -1
- cocoindex-0.1.51.dist-info/RECORD +28 -0
- {cocoindex-0.1.49.dist-info → cocoindex-0.1.51.dist-info}/WHEEL +1 -1
- cocoindex/query.py +0 -115
- cocoindex-0.1.49.dist-info/RECORD +0 -27
- {cocoindex-0.1.49.dist-info → cocoindex-0.1.51.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.49.dist-info → cocoindex-0.1.51.dist-info}/licenses/LICENSE +0 -0
cocoindex/__init__.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from . import functions,
|
5
|
+
from . import functions, sources, storages, cli, utils
|
6
6
|
|
7
7
|
from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
|
8
8
|
from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
|
@@ -15,3 +15,54 @@ from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
|
|
15
15
|
from .setting import DatabaseConnectionSpec, Settings, ServerSettings
|
16
16
|
from .setting import get_app_namespace
|
17
17
|
from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
# Submodules
|
21
|
+
"_engine",
|
22
|
+
"functions",
|
23
|
+
"sources",
|
24
|
+
"storages",
|
25
|
+
"cli",
|
26
|
+
"utils",
|
27
|
+
# Auth registry
|
28
|
+
"AuthEntryReference",
|
29
|
+
"add_auth_entry",
|
30
|
+
"ref_auth_entry",
|
31
|
+
# Flow
|
32
|
+
"FlowBuilder",
|
33
|
+
"DataScope",
|
34
|
+
"DataSlice",
|
35
|
+
"Flow",
|
36
|
+
"transform_flow",
|
37
|
+
"flow_def",
|
38
|
+
"EvaluateAndDumpOptions",
|
39
|
+
"GeneratedField",
|
40
|
+
"update_all_flows_async",
|
41
|
+
"FlowLiveUpdater",
|
42
|
+
"FlowLiveUpdaterOptions",
|
43
|
+
# Lib
|
44
|
+
"init",
|
45
|
+
"start_server",
|
46
|
+
"stop",
|
47
|
+
"main_fn",
|
48
|
+
# LLM
|
49
|
+
"LlmSpec",
|
50
|
+
"LlmApiType",
|
51
|
+
# Index
|
52
|
+
"VectorSimilarityMetric",
|
53
|
+
"VectorIndexDef",
|
54
|
+
"IndexOptions",
|
55
|
+
# Settings
|
56
|
+
"DatabaseConnectionSpec",
|
57
|
+
"Settings",
|
58
|
+
"ServerSettings",
|
59
|
+
"get_app_namespace",
|
60
|
+
# Typing
|
61
|
+
"Float32",
|
62
|
+
"Float64",
|
63
|
+
"LocalDateTime",
|
64
|
+
"OffsetDateTime",
|
65
|
+
"Range",
|
66
|
+
"Vector",
|
67
|
+
"Json",
|
68
|
+
]
|
Binary file
|
cocoindex/cli.py
CHANGED
@@ -237,7 +237,15 @@ def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
|
|
237
237
|
|
238
238
|
@cli.command()
|
239
239
|
@click.argument("app_target", type=str)
|
240
|
-
|
240
|
+
@click.option(
|
241
|
+
"-f",
|
242
|
+
"--force",
|
243
|
+
is_flag=True,
|
244
|
+
show_default=True,
|
245
|
+
default=False,
|
246
|
+
help="Force setup without confirmation prompts.",
|
247
|
+
)
|
248
|
+
def setup(app_target: str, force: bool) -> None:
|
241
249
|
"""
|
242
250
|
Check and apply backend setup changes for flows, including the internal and target storage
|
243
251
|
(to export).
|
@@ -252,7 +260,7 @@ def setup(app_target: str) -> None:
|
|
252
260
|
if setup_status.is_up_to_date():
|
253
261
|
click.echo("No changes need to be pushed.")
|
254
262
|
return
|
255
|
-
if not click.confirm(
|
263
|
+
if not force and not click.confirm(
|
256
264
|
"Changes need to be pushed. Continue? [yes/N]",
|
257
265
|
default=False,
|
258
266
|
show_default=False,
|
@@ -275,7 +283,17 @@ def setup(app_target: str) -> None:
|
|
275
283
|
"even if not defined in the current process."
|
276
284
|
"If used, APP_TARGET and any listed flow names are ignored.",
|
277
285
|
)
|
278
|
-
|
286
|
+
@click.option(
|
287
|
+
"-f",
|
288
|
+
"--force",
|
289
|
+
is_flag=True,
|
290
|
+
show_default=True,
|
291
|
+
default=False,
|
292
|
+
help="Force drop without confirmation prompts.",
|
293
|
+
)
|
294
|
+
def drop(
|
295
|
+
app_target: str | None, flow_name: tuple[str, ...], drop_all: bool, force: bool
|
296
|
+
) -> None:
|
279
297
|
"""
|
280
298
|
Drop the backend setup for flows.
|
281
299
|
|
@@ -328,7 +346,7 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool) ->
|
|
328
346
|
if setup_status.is_up_to_date():
|
329
347
|
click.echo("No flows need to be dropped.")
|
330
348
|
return
|
331
|
-
if not click.confirm(
|
349
|
+
if not force and not click.confirm(
|
332
350
|
f"\nThis will apply changes to drop setup for: {', '.join(flow_names)}. Continue? [yes/N]",
|
333
351
|
default=False,
|
334
352
|
show_default=False,
|
cocoindex/convert.py
CHANGED
@@ -6,6 +6,7 @@ import dataclasses
|
|
6
6
|
import datetime
|
7
7
|
import inspect
|
8
8
|
import uuid
|
9
|
+
import numpy as np
|
9
10
|
|
10
11
|
from enum import Enum
|
11
12
|
from typing import Any, Callable, get_origin, Mapping
|
@@ -15,6 +16,7 @@ from .typing import (
|
|
15
16
|
is_namedtuple_type,
|
16
17
|
TABLE_TYPES,
|
17
18
|
KEY_FIELD_NAME,
|
19
|
+
DtypeRegistry,
|
18
20
|
)
|
19
21
|
|
20
22
|
|
@@ -27,6 +29,8 @@ def encode_engine_value(value: Any) -> Any:
|
|
27
29
|
]
|
28
30
|
if is_namedtuple_type(type(value)):
|
29
31
|
return [encode_engine_value(getattr(value, name)) for name in value._fields]
|
32
|
+
if isinstance(value, np.ndarray):
|
33
|
+
return value
|
30
34
|
if isinstance(value, (list, tuple)):
|
31
35
|
return [encode_engine_value(v) for v in value]
|
32
36
|
if isinstance(value, dict):
|
@@ -122,6 +126,37 @@ def make_engine_value_decoder(
|
|
122
126
|
if src_type_kind == "Uuid":
|
123
127
|
return lambda value: uuid.UUID(bytes=value)
|
124
128
|
|
129
|
+
if src_type_kind == "Vector":
|
130
|
+
dtype_info = DtypeRegistry.get_by_dtype(dst_type_info.np_number_type)
|
131
|
+
|
132
|
+
def decode_vector(value: Any) -> Any | None:
|
133
|
+
if value is None:
|
134
|
+
if dst_type_info.nullable:
|
135
|
+
return None
|
136
|
+
raise ValueError(
|
137
|
+
f"Received null for non-nullable vector `{''.join(field_path)}`"
|
138
|
+
)
|
139
|
+
|
140
|
+
if not isinstance(value, (np.ndarray, list)):
|
141
|
+
raise TypeError(
|
142
|
+
f"Expected NDArray or list for vector `{''.join(field_path)}`, got {type(value)}"
|
143
|
+
)
|
144
|
+
expected_dim = (
|
145
|
+
dst_type_info.vector_info.dim if dst_type_info.vector_info else None
|
146
|
+
)
|
147
|
+
if expected_dim is not None and len(value) != expected_dim:
|
148
|
+
raise ValueError(
|
149
|
+
f"Vector dimension mismatch for `{''.join(field_path)}`: "
|
150
|
+
f"expected {expected_dim}, got {len(value)}"
|
151
|
+
)
|
152
|
+
|
153
|
+
# Use NDArray for supported numeric dtypes, else return list
|
154
|
+
if dtype_info is not None:
|
155
|
+
return np.array(value, dtype=dtype_info.numpy_dtype)
|
156
|
+
return value
|
157
|
+
|
158
|
+
return decode_vector
|
159
|
+
|
125
160
|
return lambda value: value
|
126
161
|
|
127
162
|
|
@@ -201,7 +236,12 @@ def dump_engine_object(v: Any) -> Any:
|
|
201
236
|
nanos = int((total_secs - secs) * 1e9)
|
202
237
|
return {"secs": secs, "nanos": nanos}
|
203
238
|
elif hasattr(v, "__dict__"):
|
204
|
-
s = {
|
239
|
+
s = {}
|
240
|
+
for k, val in v.__dict__.items():
|
241
|
+
if val is None:
|
242
|
+
# Skip None values
|
243
|
+
continue
|
244
|
+
s[k] = dump_engine_object(val)
|
205
245
|
if hasattr(v, "kind") and "kind" not in s:
|
206
246
|
s["kind"] = v.kind
|
207
247
|
return s
|
cocoindex/functions.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
"""All builtin functions."""
|
2
2
|
|
3
|
-
from typing import Annotated, Any, TYPE_CHECKING
|
3
|
+
from typing import Annotated, Any, TYPE_CHECKING, Literal
|
4
|
+
import numpy as np
|
5
|
+
from numpy.typing import NDArray
|
4
6
|
import dataclasses
|
5
7
|
|
6
8
|
from .typing import Float32, Vector, TypeAttr
|
@@ -66,11 +68,11 @@ class SentenceTransformerEmbedExecutor:
|
|
66
68
|
self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
|
67
69
|
dim = self._model.get_sentence_embedding_dimension()
|
68
70
|
result: type = Annotated[
|
69
|
-
Vector[
|
71
|
+
Vector[np.float32, Literal[dim]], # type: ignore
|
70
72
|
TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value),
|
71
73
|
]
|
72
74
|
return result
|
73
75
|
|
74
|
-
def __call__(self, text: str) ->
|
75
|
-
result:
|
76
|
+
def __call__(self, text: str) -> NDArray[np.float32]:
|
77
|
+
result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
|
76
78
|
return result
|
cocoindex/lib.py
CHANGED
@@ -6,7 +6,7 @@ import warnings
|
|
6
6
|
from typing import Callable, Any
|
7
7
|
|
8
8
|
from . import _engine # type: ignore
|
9
|
-
from . import flow,
|
9
|
+
from . import flow, setting
|
10
10
|
from .convert import dump_engine_object
|
11
11
|
|
12
12
|
|
@@ -24,7 +24,6 @@ def init(settings: setting.Settings | None = None) -> None:
|
|
24
24
|
def start_server(settings: setting.ServerSettings) -> None:
|
25
25
|
"""Start the cocoindex server."""
|
26
26
|
flow.ensure_all_flows_built()
|
27
|
-
query.ensure_all_handlers_built()
|
28
27
|
_engine.start_server(settings.__dict__)
|
29
28
|
|
30
29
|
|
cocoindex/setting.py
CHANGED
@@ -62,18 +62,22 @@ def _load_field(
|
|
62
62
|
class Settings:
|
63
63
|
"""Settings for the cocoindex library."""
|
64
64
|
|
65
|
-
database: DatabaseConnectionSpec
|
65
|
+
database: DatabaseConnectionSpec | None = None
|
66
66
|
app_namespace: str = ""
|
67
67
|
|
68
68
|
@classmethod
|
69
69
|
def from_env(cls) -> Self:
|
70
70
|
"""Load settings from environment variables."""
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
72
|
+
database_url = os.getenv("COCOINDEX_DATABASE_URL")
|
73
|
+
if database_url is not None:
|
74
|
+
db_kwargs: dict[str, str] = dict()
|
75
|
+
_load_field(db_kwargs, "url", "COCOINDEX_DATABASE_URL", required=True)
|
76
|
+
_load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
|
77
|
+
_load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
|
78
|
+
database = DatabaseConnectionSpec(**db_kwargs)
|
79
|
+
else:
|
80
|
+
database = None
|
77
81
|
|
78
82
|
app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", "")
|
79
83
|
|