cocoindex 0.1.43__cp311-cp311-macosx_11_0_arm64.whl → 0.1.45__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +2 -1
- cocoindex/_engine.cpython-311-darwin.so +0 -0
- cocoindex/auth_registry.py +7 -3
- cocoindex/cli.py +186 -67
- cocoindex/convert.py +93 -52
- cocoindex/flow.py +303 -132
- cocoindex/functions.py +17 -4
- cocoindex/index.py +6 -0
- cocoindex/lib.py +14 -9
- cocoindex/llm.py +4 -0
- cocoindex/op.py +126 -61
- cocoindex/query.py +40 -17
- cocoindex/runtime.py +9 -4
- cocoindex/setting.py +35 -12
- cocoindex/setup.py +7 -3
- cocoindex/sources.py +3 -1
- cocoindex/storages.py +50 -7
- cocoindex/tests/test_convert.py +255 -63
- cocoindex/typing.py +116 -70
- cocoindex/utils.py +10 -2
- {cocoindex-0.1.43.dist-info → cocoindex-0.1.45.dist-info}/METADATA +3 -1
- cocoindex-0.1.45.dist-info/RECORD +27 -0
- cocoindex-0.1.43.dist-info/RECORD +0 -27
- {cocoindex-0.1.43.dist-info → cocoindex-0.1.45.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.43.dist-info → cocoindex-0.1.45.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.43.dist-info → cocoindex-0.1.45.dist-info}/licenses/LICENSE +0 -0
cocoindex/runtime.py
CHANGED
@@ -5,12 +5,14 @@ manner.
|
|
5
5
|
|
6
6
|
import threading
|
7
7
|
import asyncio
|
8
|
-
from typing import Coroutine
|
8
|
+
from typing import Any, Coroutine
|
9
|
+
|
10
|
+
|
9
11
|
class _ExecutionContext:
|
10
12
|
_lock: threading.Lock
|
11
13
|
_event_loop: asyncio.AbstractEventLoop | None = None
|
12
14
|
|
13
|
-
def __init__(self):
|
15
|
+
def __init__(self) -> None:
|
14
16
|
self._lock = threading.Lock()
|
15
17
|
|
16
18
|
@property
|
@@ -19,11 +21,14 @@ class _ExecutionContext:
|
|
19
21
|
with self._lock:
|
20
22
|
if self._event_loop is None:
|
21
23
|
self._event_loop = asyncio.new_event_loop()
|
22
|
-
threading.Thread(
|
24
|
+
threading.Thread(
|
25
|
+
target=self._event_loop.run_forever, daemon=True
|
26
|
+
).start()
|
23
27
|
return self._event_loop
|
24
28
|
|
25
|
-
def run(self, coro: Coroutine):
|
29
|
+
def run(self, coro: Coroutine[Any, Any, Any]) -> Any:
|
26
30
|
"""Run a coroutine in the event loop, blocking until it finishes. Return its result."""
|
27
31
|
return asyncio.run_coroutine_threadsafe(coro, self.event_loop).result()
|
28
32
|
|
33
|
+
|
29
34
|
execution_context = _ExecutionContext()
|
cocoindex/setting.py
CHANGED
@@ -1,43 +1,55 @@
|
|
1
1
|
"""
|
2
2
|
Data types for settings of the cocoindex library.
|
3
3
|
"""
|
4
|
+
|
4
5
|
import os
|
5
6
|
|
6
7
|
from typing import Callable, Self, Any, overload
|
7
8
|
from dataclasses import dataclass
|
8
9
|
|
9
|
-
_app_namespace: str =
|
10
|
+
_app_namespace: str = ""
|
11
|
+
|
10
12
|
|
11
13
|
def get_app_namespace(*, trailing_delimiter: str | None = None) -> str:
|
12
14
|
"""Get the application namespace. Append the `trailing_delimiter` if not empty."""
|
13
|
-
if _app_namespace ==
|
15
|
+
if _app_namespace == "" or trailing_delimiter is None:
|
14
16
|
return _app_namespace
|
15
|
-
return f
|
17
|
+
return f"{_app_namespace}{trailing_delimiter}"
|
18
|
+
|
16
19
|
|
17
20
|
def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
|
18
21
|
"""Split the full name into the application namespace and the rest."""
|
19
22
|
parts = full_name.split(delimiter, 1)
|
20
23
|
if len(parts) == 1:
|
21
|
-
return
|
24
|
+
return "", parts[0]
|
22
25
|
return (parts[0], parts[1])
|
23
26
|
|
24
|
-
|
27
|
+
|
28
|
+
def set_app_namespace(app_namespace: str) -> None:
|
25
29
|
"""Set the application namespace."""
|
26
30
|
global _app_namespace # pylint: disable=global-statement
|
27
31
|
_app_namespace = app_namespace
|
28
32
|
|
33
|
+
|
29
34
|
@dataclass
|
30
35
|
class DatabaseConnectionSpec:
|
31
36
|
"""
|
32
37
|
Connection spec for relational database.
|
33
38
|
Used by both internal and target storage.
|
34
39
|
"""
|
40
|
+
|
35
41
|
url: str
|
36
42
|
user: str | None = None
|
37
43
|
password: str | None = None
|
38
44
|
|
39
|
-
|
40
|
-
|
45
|
+
|
46
|
+
def _load_field(
|
47
|
+
target: dict[str, Any],
|
48
|
+
name: str,
|
49
|
+
env_name: str,
|
50
|
+
required: bool = False,
|
51
|
+
parse: Callable[[str], Any] | None = None,
|
52
|
+
) -> None:
|
41
53
|
value = os.getenv(env_name)
|
42
54
|
if value is None:
|
43
55
|
if required:
|
@@ -45,9 +57,11 @@ def _load_field(target: dict[str, Any], name: str, env_name: str, required: bool
|
|
45
57
|
else:
|
46
58
|
target[name] = value if parse is None else parse(value)
|
47
59
|
|
60
|
+
|
48
61
|
@dataclass
|
49
62
|
class Settings:
|
50
63
|
"""Settings for the cocoindex library."""
|
64
|
+
|
51
65
|
database: DatabaseConnectionSpec
|
52
66
|
app_namespace: str = ""
|
53
67
|
|
@@ -61,10 +75,11 @@ class Settings:
|
|
61
75
|
_load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
|
62
76
|
database = DatabaseConnectionSpec(**db_kwargs)
|
63
77
|
|
64
|
-
app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE",
|
78
|
+
app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", "")
|
65
79
|
|
66
80
|
return cls(database=database, app_namespace=app_namespace)
|
67
81
|
|
82
|
+
|
68
83
|
@dataclass
|
69
84
|
class ServerSettings:
|
70
85
|
"""Settings for the cocoindex server."""
|
@@ -80,8 +95,12 @@ class ServerSettings:
|
|
80
95
|
"""Load settings from environment variables."""
|
81
96
|
kwargs: dict[str, Any] = dict()
|
82
97
|
_load_field(kwargs, "address", "COCOINDEX_SERVER_ADDRESS")
|
83
|
-
_load_field(
|
84
|
-
|
98
|
+
_load_field(
|
99
|
+
kwargs,
|
100
|
+
"cors_origins",
|
101
|
+
"COCOINDEX_SERVER_CORS_ORIGINS",
|
102
|
+
parse=ServerSettings.parse_cors_origins,
|
103
|
+
)
|
85
104
|
return cls(**kwargs)
|
86
105
|
|
87
106
|
@overload
|
@@ -93,8 +112,12 @@ class ServerSettings:
|
|
93
112
|
def parse_cors_origins(s: str | None) -> list[str] | None: ...
|
94
113
|
|
95
114
|
@staticmethod
|
96
|
-
def parse_cors_origins(s):
|
115
|
+
def parse_cors_origins(s: str | None) -> list[str] | None:
|
97
116
|
"""
|
98
117
|
Parse the CORS origins from a string.
|
99
118
|
"""
|
100
|
-
return
|
119
|
+
return (
|
120
|
+
[o for e in s.split(",") if (o := e.strip()) != ""]
|
121
|
+
if s is not None
|
122
|
+
else None
|
123
|
+
)
|
cocoindex/setup.py
CHANGED
@@ -1,22 +1,26 @@
|
|
1
1
|
from . import flow
|
2
2
|
from . import setting
|
3
|
-
from . import _engine
|
3
|
+
from . import _engine # type: ignore
|
4
|
+
|
4
5
|
|
5
6
|
def sync_setup() -> _engine.SetupStatus:
|
6
7
|
flow.ensure_all_flows_built()
|
7
8
|
return _engine.sync_setup()
|
8
9
|
|
10
|
+
|
9
11
|
def drop_setup(flow_names: list[str]) -> _engine.SetupStatus:
|
10
12
|
flow.ensure_all_flows_built()
|
11
13
|
return _engine.drop_setup([flow.get_full_flow_name(name) for name in flow_names])
|
12
14
|
|
15
|
+
|
13
16
|
def flow_names_with_setup() -> list[str]:
|
14
17
|
result = []
|
15
18
|
for name in _engine.flow_names_with_setup():
|
16
|
-
app_namespace, name = setting.split_app_namespace(name,
|
19
|
+
app_namespace, name = setting.split_app_namespace(name, ".")
|
17
20
|
if app_namespace == setting.get_app_namespace():
|
18
21
|
result.append(name)
|
19
22
|
return result
|
20
23
|
|
21
|
-
|
24
|
+
|
25
|
+
def apply_setup_changes(setup_status: _engine.SetupStatus) -> None:
|
22
26
|
_engine.apply_setup_changes(setup_status)
|
cocoindex/sources.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
"""All builtin sources."""
|
2
|
+
|
2
3
|
from . import op
|
3
4
|
import datetime
|
4
5
|
|
6
|
+
|
5
7
|
class LocalFile(op.SourceSpec):
|
6
8
|
"""Import data from local file system."""
|
7
9
|
|
@@ -40,4 +42,4 @@ class AmazonS3(op.SourceSpec):
|
|
40
42
|
binary: bool = False
|
41
43
|
included_patterns: list[str] | None = None
|
42
44
|
excluded_patterns: list[str] | None = None
|
43
|
-
sqs_queue_url: str | None = None
|
45
|
+
sqs_queue_url: str | None = None
|
cocoindex/storages.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""All builtin storages."""
|
2
|
+
|
2
3
|
from dataclasses import dataclass
|
3
4
|
from typing import Sequence
|
4
5
|
|
@@ -7,11 +8,14 @@ from . import index
|
|
7
8
|
from .auth_registry import AuthEntryReference
|
8
9
|
from .setting import DatabaseConnectionSpec
|
9
10
|
|
11
|
+
|
10
12
|
class Postgres(op.StorageSpec):
|
11
13
|
"""Storage powered by Postgres and pgvector."""
|
14
|
+
|
12
15
|
database: AuthEntryReference[DatabaseConnectionSpec] | None = None
|
13
16
|
table_name: str | None = None
|
14
17
|
|
18
|
+
|
15
19
|
@dataclass
|
16
20
|
class Qdrant(op.StorageSpec):
|
17
21
|
"""Storage powered by Qdrant - https://qdrant.tech/."""
|
@@ -20,61 +24,77 @@ class Qdrant(op.StorageSpec):
|
|
20
24
|
grpc_url: str = "http://localhost:6334/"
|
21
25
|
api_key: str | None = None
|
22
26
|
|
23
|
-
@dataclass
|
24
|
-
class Neo4jConnection:
|
25
|
-
"""Connection spec for Neo4j."""
|
26
|
-
uri: str
|
27
|
-
user: str
|
28
|
-
password: str
|
29
|
-
db: str | None = None
|
30
27
|
|
31
28
|
@dataclass
|
32
29
|
class TargetFieldMapping:
|
33
30
|
"""Mapping for a graph element (node or relationship) field."""
|
31
|
+
|
34
32
|
source: str
|
35
33
|
# Field name for the node in the Knowledge Graph.
|
36
34
|
# If unspecified, it's the same as `field_name`.
|
37
35
|
target: str | None = None
|
38
36
|
|
37
|
+
|
39
38
|
@dataclass
|
40
39
|
class NodeFromFields:
|
41
40
|
"""Spec for a referenced graph node, usually as part of a relationship."""
|
41
|
+
|
42
42
|
label: str
|
43
43
|
fields: list[TargetFieldMapping]
|
44
44
|
|
45
|
+
|
45
46
|
@dataclass
|
46
47
|
class ReferencedNode:
|
47
48
|
"""Storage spec for a graph node."""
|
49
|
+
|
48
50
|
label: str
|
49
51
|
primary_key_fields: Sequence[str]
|
50
52
|
vector_indexes: Sequence[index.VectorIndexDef] = ()
|
51
53
|
|
54
|
+
|
52
55
|
@dataclass
|
53
56
|
class Nodes:
|
54
57
|
"""Spec to map a row to a graph node."""
|
58
|
+
|
55
59
|
kind = "Node"
|
56
60
|
|
57
61
|
label: str
|
58
62
|
|
63
|
+
|
59
64
|
@dataclass
|
60
65
|
class Relationships:
|
61
66
|
"""Spec to map a row to a graph relationship."""
|
67
|
+
|
62
68
|
kind = "Relationship"
|
63
69
|
|
64
70
|
rel_type: str
|
65
71
|
source: NodeFromFields
|
66
72
|
target: NodeFromFields
|
67
73
|
|
74
|
+
|
68
75
|
# For backwards compatibility only
|
69
76
|
NodeMapping = Nodes
|
70
77
|
RelationshipMapping = Relationships
|
71
78
|
NodeReferenceMapping = NodeFromFields
|
72
79
|
|
80
|
+
|
81
|
+
@dataclass
|
82
|
+
class Neo4jConnection:
|
83
|
+
"""Connection spec for Neo4j."""
|
84
|
+
|
85
|
+
uri: str
|
86
|
+
user: str
|
87
|
+
password: str
|
88
|
+
db: str | None = None
|
89
|
+
|
90
|
+
|
73
91
|
class Neo4j(op.StorageSpec):
|
74
92
|
"""Graph storage powered by Neo4j."""
|
93
|
+
|
75
94
|
connection: AuthEntryReference[Neo4jConnection]
|
76
95
|
mapping: Nodes | Relationships
|
77
96
|
|
97
|
+
|
78
98
|
class Neo4jDeclaration(op.DeclarationSpec):
|
79
99
|
"""Declarations for Neo4j."""
|
80
100
|
|
@@ -83,3 +103,26 @@ class Neo4jDeclaration(op.DeclarationSpec):
|
|
83
103
|
nodes_label: str
|
84
104
|
primary_key_fields: Sequence[str]
|
85
105
|
vector_indexes: Sequence[index.VectorIndexDef] = ()
|
106
|
+
|
107
|
+
|
108
|
+
@dataclass
|
109
|
+
class KuzuConnection:
|
110
|
+
"""Connection spec for Kuzu."""
|
111
|
+
|
112
|
+
api_server_url: str
|
113
|
+
|
114
|
+
|
115
|
+
class Kuzu(op.StorageSpec):
|
116
|
+
"""Graph storage powered by Kuzu."""
|
117
|
+
|
118
|
+
connection: AuthEntryReference[KuzuConnection]
|
119
|
+
mapping: Nodes | Relationships
|
120
|
+
|
121
|
+
|
122
|
+
class KuzuDeclaration(op.DeclarationSpec):
|
123
|
+
"""Declarations for Kuzu."""
|
124
|
+
|
125
|
+
kind = "Kuzu"
|
126
|
+
connection: AuthEntryReference[KuzuConnection]
|
127
|
+
nodes_label: str
|
128
|
+
primary_key_fields: Sequence[str]
|