cocoindex 0.1.50__cp312-cp312-macosx_11_0_arm64.whl → 0.1.52__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +55 -1
- cocoindex/_engine.cpython-312-darwin.so +0 -0
- cocoindex/cli.py +23 -6
- cocoindex/convert.py +7 -3
- cocoindex/flow.py +2 -2
- cocoindex/op.py +3 -3
- cocoindex/setting.py +10 -6
- cocoindex/{storages.py → targets.py} +8 -8
- cocoindex/tests/test_convert.py +151 -104
- cocoindex/tests/test_optional_database.py +249 -0
- cocoindex/tests/test_typing.py +62 -56
- cocoindex/typing.py +19 -19
- cocoindex/utils.py +7 -4
- {cocoindex-0.1.50.dist-info → cocoindex-0.1.52.dist-info}/METADATA +2 -2
- cocoindex-0.1.52.dist-info/RECORD +28 -0
- cocoindex-0.1.50.dist-info/RECORD +0 -27
- {cocoindex-0.1.50.dist-info → cocoindex-0.1.52.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.50.dist-info → cocoindex-0.1.52.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.50.dist-info → cocoindex-0.1.52.dist-info}/licenses/LICENSE +0 -0
cocoindex/__init__.py
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from . import functions, sources,
|
5
|
+
from . import functions, sources, targets, cli, utils
|
6
|
+
|
7
|
+
from . import targets as storages # Deprecated: Use targets instead
|
6
8
|
|
7
9
|
from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
|
8
10
|
from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
|
@@ -15,3 +17,55 @@ from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
|
|
15
17
|
from .setting import DatabaseConnectionSpec, Settings, ServerSettings
|
16
18
|
from .setting import get_app_namespace
|
17
19
|
from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
|
20
|
+
|
21
|
+
__all__ = [
|
22
|
+
# Submodules
|
23
|
+
"_engine",
|
24
|
+
"functions",
|
25
|
+
"sources",
|
26
|
+
"targets",
|
27
|
+
"storages",
|
28
|
+
"cli",
|
29
|
+
"utils",
|
30
|
+
# Auth registry
|
31
|
+
"AuthEntryReference",
|
32
|
+
"add_auth_entry",
|
33
|
+
"ref_auth_entry",
|
34
|
+
# Flow
|
35
|
+
"FlowBuilder",
|
36
|
+
"DataScope",
|
37
|
+
"DataSlice",
|
38
|
+
"Flow",
|
39
|
+
"transform_flow",
|
40
|
+
"flow_def",
|
41
|
+
"EvaluateAndDumpOptions",
|
42
|
+
"GeneratedField",
|
43
|
+
"update_all_flows_async",
|
44
|
+
"FlowLiveUpdater",
|
45
|
+
"FlowLiveUpdaterOptions",
|
46
|
+
# Lib
|
47
|
+
"init",
|
48
|
+
"start_server",
|
49
|
+
"stop",
|
50
|
+
"main_fn",
|
51
|
+
# LLM
|
52
|
+
"LlmSpec",
|
53
|
+
"LlmApiType",
|
54
|
+
# Index
|
55
|
+
"VectorSimilarityMetric",
|
56
|
+
"VectorIndexDef",
|
57
|
+
"IndexOptions",
|
58
|
+
# Settings
|
59
|
+
"DatabaseConnectionSpec",
|
60
|
+
"Settings",
|
61
|
+
"ServerSettings",
|
62
|
+
"get_app_namespace",
|
63
|
+
# Typing
|
64
|
+
"Float32",
|
65
|
+
"Float64",
|
66
|
+
"LocalDateTime",
|
67
|
+
"OffsetDateTime",
|
68
|
+
"Range",
|
69
|
+
"Vector",
|
70
|
+
"Json",
|
71
|
+
]
|
Binary file
|
cocoindex/cli.py
CHANGED
@@ -237,10 +237,17 @@ def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
|
|
237
237
|
|
238
238
|
@cli.command()
|
239
239
|
@click.argument("app_target", type=str)
|
240
|
-
|
240
|
+
@click.option(
|
241
|
+
"-f",
|
242
|
+
"--force",
|
243
|
+
is_flag=True,
|
244
|
+
show_default=True,
|
245
|
+
default=False,
|
246
|
+
help="Force setup without confirmation prompts.",
|
247
|
+
)
|
248
|
+
def setup(app_target: str, force: bool) -> None:
|
241
249
|
"""
|
242
|
-
Check and apply backend setup changes for flows, including the internal and target
|
243
|
-
(to export).
|
250
|
+
Check and apply backend setup changes for flows, including the internal storage and target (to export to).
|
244
251
|
|
245
252
|
APP_TARGET: path/to/app.py or installed_module.
|
246
253
|
"""
|
@@ -252,7 +259,7 @@ def setup(app_target: str) -> None:
|
|
252
259
|
if setup_status.is_up_to_date():
|
253
260
|
click.echo("No changes need to be pushed.")
|
254
261
|
return
|
255
|
-
if not click.confirm(
|
262
|
+
if not force and not click.confirm(
|
256
263
|
"Changes need to be pushed. Continue? [yes/N]",
|
257
264
|
default=False,
|
258
265
|
show_default=False,
|
@@ -275,7 +282,17 @@ def setup(app_target: str) -> None:
|
|
275
282
|
"even if not defined in the current process."
|
276
283
|
"If used, APP_TARGET and any listed flow names are ignored.",
|
277
284
|
)
|
278
|
-
|
285
|
+
@click.option(
|
286
|
+
"-f",
|
287
|
+
"--force",
|
288
|
+
is_flag=True,
|
289
|
+
show_default=True,
|
290
|
+
default=False,
|
291
|
+
help="Force drop without confirmation prompts.",
|
292
|
+
)
|
293
|
+
def drop(
|
294
|
+
app_target: str | None, flow_name: tuple[str, ...], drop_all: bool, force: bool
|
295
|
+
) -> None:
|
279
296
|
"""
|
280
297
|
Drop the backend setup for flows.
|
281
298
|
|
@@ -328,7 +345,7 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool) ->
|
|
328
345
|
if setup_status.is_up_to_date():
|
329
346
|
click.echo("No flows need to be dropped.")
|
330
347
|
return
|
331
|
-
if not click.confirm(
|
348
|
+
if not force and not click.confirm(
|
332
349
|
f"\nThis will apply changes to drop setup for: {', '.join(flow_names)}. Continue? [yes/N]",
|
333
350
|
default=False,
|
334
351
|
show_default=False,
|
cocoindex/convert.py
CHANGED
@@ -127,8 +127,7 @@ def make_engine_value_decoder(
|
|
127
127
|
return lambda value: uuid.UUID(bytes=value)
|
128
128
|
|
129
129
|
if src_type_kind == "Vector":
|
130
|
-
|
131
|
-
dtype_info = DtypeRegistry.get_by_kind(elem_coco_type_info.kind)
|
130
|
+
dtype_info = DtypeRegistry.get_by_dtype(dst_type_info.np_number_type)
|
132
131
|
|
133
132
|
def decode_vector(value: Any) -> Any | None:
|
134
133
|
if value is None:
|
@@ -237,7 +236,12 @@ def dump_engine_object(v: Any) -> Any:
|
|
237
236
|
nanos = int((total_secs - secs) * 1e9)
|
238
237
|
return {"secs": secs, "nanos": nanos}
|
239
238
|
elif hasattr(v, "__dict__"):
|
240
|
-
s = {
|
239
|
+
s = {}
|
240
|
+
for k, val in v.__dict__.items():
|
241
|
+
if val is None:
|
242
|
+
# Skip None values
|
243
|
+
continue
|
244
|
+
s[k] = dump_engine_object(val)
|
241
245
|
if hasattr(v, "kind") and "kind" not in s:
|
242
246
|
s["kind"] = v.kind
|
243
247
|
return s
|
cocoindex/flow.py
CHANGED
@@ -327,7 +327,7 @@ class DataCollector:
|
|
327
327
|
def export(
|
328
328
|
self,
|
329
329
|
name: str,
|
330
|
-
target_spec: op.
|
330
|
+
target_spec: op.TargetSpec,
|
331
331
|
/,
|
332
332
|
*,
|
333
333
|
primary_key_fields: Sequence[str],
|
@@ -340,7 +340,7 @@ class DataCollector:
|
|
340
340
|
|
341
341
|
`vector_index` is for backward compatibility only. Please use `vector_indexes` instead.
|
342
342
|
"""
|
343
|
-
if not isinstance(target_spec, op.
|
343
|
+
if not isinstance(target_spec, op.TargetSpec):
|
344
344
|
raise ValueError(
|
345
345
|
"export() can only be called on a CocoIndex target storage"
|
346
346
|
)
|
cocoindex/op.py
CHANGED
@@ -19,7 +19,7 @@ class OpCategory(Enum):
|
|
19
19
|
|
20
20
|
FUNCTION = "function"
|
21
21
|
SOURCE = "source"
|
22
|
-
|
22
|
+
TARGET = "target"
|
23
23
|
DECLARATION = "declaration"
|
24
24
|
|
25
25
|
|
@@ -52,8 +52,8 @@ class FunctionSpec(metaclass=SpecMeta, category=OpCategory.FUNCTION): # pylint:
|
|
52
52
|
"""A function spec. All its subclass can be instantiated similar to a dataclass, i.e. ClassName(field1=value1, field2=value2, ...)"""
|
53
53
|
|
54
54
|
|
55
|
-
class
|
56
|
-
"""A
|
55
|
+
class TargetSpec(metaclass=SpecMeta, category=OpCategory.TARGET): # pylint: disable=too-few-public-methods
|
56
|
+
"""A target spec. All its subclass can be instantiated similar to a dataclass, i.e. ClassName(field1=value1, field2=value2, ...)"""
|
57
57
|
|
58
58
|
|
59
59
|
class DeclarationSpec(metaclass=SpecMeta, category=OpCategory.DECLARATION): # pylint: disable=too-few-public-methods
|
cocoindex/setting.py
CHANGED
@@ -62,18 +62,22 @@ def _load_field(
|
|
62
62
|
class Settings:
|
63
63
|
"""Settings for the cocoindex library."""
|
64
64
|
|
65
|
-
database: DatabaseConnectionSpec
|
65
|
+
database: DatabaseConnectionSpec | None = None
|
66
66
|
app_namespace: str = ""
|
67
67
|
|
68
68
|
@classmethod
|
69
69
|
def from_env(cls) -> Self:
|
70
70
|
"""Load settings from environment variables."""
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
72
|
+
database_url = os.getenv("COCOINDEX_DATABASE_URL")
|
73
|
+
if database_url is not None:
|
74
|
+
db_kwargs: dict[str, str] = dict()
|
75
|
+
_load_field(db_kwargs, "url", "COCOINDEX_DATABASE_URL", required=True)
|
76
|
+
_load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
|
77
|
+
_load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
|
78
|
+
database = DatabaseConnectionSpec(**db_kwargs)
|
79
|
+
else:
|
80
|
+
database = None
|
77
81
|
|
78
82
|
app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", "")
|
79
83
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""All builtin
|
1
|
+
"""All builtin targets."""
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
4
|
from typing import Sequence
|
@@ -9,8 +9,8 @@ from .auth_registry import AuthEntryReference
|
|
9
9
|
from .setting import DatabaseConnectionSpec
|
10
10
|
|
11
11
|
|
12
|
-
class Postgres(op.
|
13
|
-
"""
|
12
|
+
class Postgres(op.TargetSpec):
|
13
|
+
"""Target powered by Postgres and pgvector."""
|
14
14
|
|
15
15
|
database: AuthEntryReference[DatabaseConnectionSpec] | None = None
|
16
16
|
table_name: str | None = None
|
@@ -25,8 +25,8 @@ class QdrantConnection:
|
|
25
25
|
|
26
26
|
|
27
27
|
@dataclass
|
28
|
-
class Qdrant(op.
|
29
|
-
"""
|
28
|
+
class Qdrant(op.TargetSpec):
|
29
|
+
"""Target powered by Qdrant - https://qdrant.tech/."""
|
30
30
|
|
31
31
|
collection_name: str
|
32
32
|
connection: AuthEntryReference[QdrantConnection] | None = None
|
@@ -52,7 +52,7 @@ class NodeFromFields:
|
|
52
52
|
|
53
53
|
@dataclass
|
54
54
|
class ReferencedNode:
|
55
|
-
"""
|
55
|
+
"""Target spec for a graph node."""
|
56
56
|
|
57
57
|
label: str
|
58
58
|
primary_key_fields: Sequence[str]
|
@@ -95,7 +95,7 @@ class Neo4jConnection:
|
|
95
95
|
db: str | None = None
|
96
96
|
|
97
97
|
|
98
|
-
class Neo4j(op.
|
98
|
+
class Neo4j(op.TargetSpec):
|
99
99
|
"""Graph storage powered by Neo4j."""
|
100
100
|
|
101
101
|
connection: AuthEntryReference[Neo4jConnection]
|
@@ -119,7 +119,7 @@ class KuzuConnection:
|
|
119
119
|
api_server_url: str
|
120
120
|
|
121
121
|
|
122
|
-
class Kuzu(op.
|
122
|
+
class Kuzu(op.TargetSpec):
|
123
123
|
"""Graph storage powered by Kuzu."""
|
124
124
|
|
125
125
|
connection: AuthEntryReference[KuzuConnection]
|