cocoindex 0.2.14__cp311-abi3-manylinux_2_28_x86_64.whl → 0.2.16__cp311-abi3-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +3 -0
- cocoindex/_engine.abi3.so +0 -0
- cocoindex/flow.py +10 -8
- cocoindex/functions.py +20 -2
- {cocoindex-0.2.14.dist-info → cocoindex-0.2.16.dist-info}/METADATA +1 -1
- {cocoindex-0.2.14.dist-info → cocoindex-0.2.16.dist-info}/RECORD +9 -9
- {cocoindex-0.2.14.dist-info → cocoindex-0.2.16.dist-info}/licenses/THIRD_PARTY_NOTICES.html +1 -1
- {cocoindex-0.2.14.dist-info → cocoindex-0.2.16.dist-info}/WHEEL +0 -0
- {cocoindex-0.2.14.dist-info → cocoindex-0.2.16.dist-info}/entry_points.txt +0 -0
cocoindex/__init__.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
|
5
|
+
from . import _engine # type: ignore
|
5
6
|
from . import functions, sources, targets, cli, utils
|
6
7
|
|
7
8
|
from . import targets as storages # Deprecated: Use targets instead
|
@@ -42,6 +43,8 @@ from .typing import (
|
|
42
43
|
Json,
|
43
44
|
)
|
44
45
|
|
46
|
+
_engine.init_pyo3_runtime()
|
47
|
+
|
45
48
|
__all__ = [
|
46
49
|
# Submodules
|
47
50
|
"_engine",
|
cocoindex/_engine.abi3.so
CHANGED
Binary file
|
cocoindex/flow.py
CHANGED
@@ -17,7 +17,6 @@ from typing import (
|
|
17
17
|
Callable,
|
18
18
|
Generic,
|
19
19
|
Iterable,
|
20
|
-
NamedTuple,
|
21
20
|
Sequence,
|
22
21
|
TypeVar,
|
23
22
|
cast,
|
@@ -575,7 +574,8 @@ class FlowLiveUpdaterOptions:
|
|
575
574
|
print_stats: bool = False
|
576
575
|
|
577
576
|
|
578
|
-
|
577
|
+
@dataclass
|
578
|
+
class FlowUpdaterStatusUpdates:
|
579
579
|
"""
|
580
580
|
Status updates for a flow updater.
|
581
581
|
"""
|
@@ -1060,12 +1060,14 @@ def _get_data_slice_annotation_type(
|
|
1060
1060
|
_transform_flow_name_builder = _NameBuilder()
|
1061
1061
|
|
1062
1062
|
|
1063
|
-
|
1063
|
+
@dataclass
|
1064
|
+
class TransformFlowInfo(Generic[T]):
|
1064
1065
|
engine_flow: _engine.TransientFlow
|
1065
1066
|
result_decoder: Callable[[Any], T]
|
1066
1067
|
|
1067
1068
|
|
1068
|
-
|
1069
|
+
@dataclass
|
1070
|
+
class FlowArgInfo:
|
1069
1071
|
name: str
|
1070
1072
|
type_hint: Any
|
1071
1073
|
encoder: Callable[[Any], Any]
|
@@ -1081,7 +1083,7 @@ class TransformFlow(Generic[T]):
|
|
1081
1083
|
_args_info: list[FlowArgInfo]
|
1082
1084
|
|
1083
1085
|
_lazy_lock: asyncio.Lock
|
1084
|
-
_lazy_flow_info: TransformFlowInfo | None = None
|
1086
|
+
_lazy_flow_info: TransformFlowInfo[T] | None = None
|
1085
1087
|
|
1086
1088
|
def __init__(
|
1087
1089
|
self,
|
@@ -1123,12 +1125,12 @@ class TransformFlow(Generic[T]):
|
|
1123
1125
|
return self._flow_fn(*args, **kwargs)
|
1124
1126
|
|
1125
1127
|
@property
|
1126
|
-
def _flow_info(self) -> TransformFlowInfo:
|
1128
|
+
def _flow_info(self) -> TransformFlowInfo[T]:
|
1127
1129
|
if self._lazy_flow_info is not None:
|
1128
1130
|
return self._lazy_flow_info
|
1129
1131
|
return execution_context.run(self._flow_info_async())
|
1130
1132
|
|
1131
|
-
async def _flow_info_async(self) -> TransformFlowInfo:
|
1133
|
+
async def _flow_info_async(self) -> TransformFlowInfo[T]:
|
1132
1134
|
if self._lazy_flow_info is not None:
|
1133
1135
|
return self._lazy_flow_info
|
1134
1136
|
async with self._lazy_lock:
|
@@ -1136,7 +1138,7 @@ class TransformFlow(Generic[T]):
|
|
1136
1138
|
self._lazy_flow_info = await self._build_flow_info_async()
|
1137
1139
|
return self._lazy_flow_info
|
1138
1140
|
|
1139
|
-
async def _build_flow_info_async(self) -> TransformFlowInfo:
|
1141
|
+
async def _build_flow_info_async(self) -> TransformFlowInfo[T]:
|
1140
1142
|
flow_builder_state = _FlowBuilderState(self._flow_name)
|
1141
1143
|
kwargs: dict[str, DataSlice[T]] = {}
|
1142
1144
|
for arg_info in self._args_info:
|
cocoindex/functions.py
CHANGED
@@ -2,13 +2,13 @@
|
|
2
2
|
|
3
3
|
import dataclasses
|
4
4
|
import functools
|
5
|
-
from typing import
|
5
|
+
from typing import Any, Literal
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
10
|
from . import llm, op
|
11
|
-
from .typing import
|
11
|
+
from .typing import Vector
|
12
12
|
|
13
13
|
|
14
14
|
class ParseJson(op.FunctionSpec):
|
@@ -40,6 +40,24 @@ class SplitRecursively(op.FunctionSpec):
|
|
40
40
|
custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
|
41
41
|
|
42
42
|
|
43
|
+
class SplitBySeparators(op.FunctionSpec):
|
44
|
+
"""
|
45
|
+
Split text by specified regex separators only.
|
46
|
+
Output schema matches SplitRecursively for drop-in compatibility:
|
47
|
+
KTable rows with fields: location (Range), text (Str), start, end.
|
48
|
+
Args:
|
49
|
+
separators_regex: list[str] # e.g., [r"\\n\\n+"]
|
50
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
51
|
+
include_empty: bool = False
|
52
|
+
trim: bool = True
|
53
|
+
"""
|
54
|
+
|
55
|
+
separators_regex: list[str] = dataclasses.field(default_factory=list)
|
56
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
57
|
+
include_empty: bool = False
|
58
|
+
trim: bool = True
|
59
|
+
|
60
|
+
|
43
61
|
class EmbedText(op.FunctionSpec):
|
44
62
|
"""Embed a text into a vector space."""
|
45
63
|
|
@@ -1,14 +1,14 @@
|
|
1
|
-
cocoindex-0.2.
|
2
|
-
cocoindex-0.2.
|
3
|
-
cocoindex-0.2.
|
4
|
-
cocoindex-0.2.
|
5
|
-
cocoindex/__init__.py,sha256=
|
6
|
-
cocoindex/_engine.abi3.so,sha256=
|
1
|
+
cocoindex-0.2.16.dist-info/METADATA,sha256=PlewUqWQWLCwQG8HT1lJed7JG9nyvPH61KjhNB_3U7Q,13316
|
2
|
+
cocoindex-0.2.16.dist-info/WHEEL,sha256=IG-K_sumA04dNIpy5J1b3kZo_HELEjvxxDWHD32zTgo,107
|
3
|
+
cocoindex-0.2.16.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.2.16.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=fxZXw4-pEtDeJix0DJuH2VrSsp0CTjsExMxjxU4p0ds,717769
|
5
|
+
cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
|
6
|
+
cocoindex/_engine.abi3.so,sha256=xnhUzk7vP6YNFdJkkG-jleX9YqXv5KkKIDrExl4h0mk,73561928
|
7
7
|
cocoindex/auth_registry.py,sha256=_DOIY42C79joLCY_XczHwP5uebkmSavweoAHc0L3hQY,1334
|
8
8
|
cocoindex/cli.py,sha256=69X30bFTFdM7c0_6lgIHR19CeQ7UEkobEQYihy8IdOQ,21599
|
9
9
|
cocoindex/convert.py,sha256=itkUBCriOk8fdauahHRqJ-L8mnHehNZsBe_FouN0K1Q,28695
|
10
|
-
cocoindex/flow.py,sha256=
|
11
|
-
cocoindex/functions.py,sha256=
|
10
|
+
cocoindex/flow.py,sha256=oAFerPoarOS9XWVumYIzT4EHJyV3Pixv2mfqpCLHNOw,39849
|
11
|
+
cocoindex/functions.py,sha256=V4ljBnCprvA25XlCVvNLwK5ergXiEcKU76jkOGC-X3A,12882
|
12
12
|
cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
|
13
13
|
cocoindex/lib.py,sha256=0XheDF7fiFdqExpdqzU-VKun_Zll6DwZ5JfTm7u42aY,2284
|
14
14
|
cocoindex/llm.py,sha256=Pv_cdnRngTLtuLU9AUmS8izIHhcKVnuBNolC33f9BDI,851
|
@@ -34,4 +34,4 @@ cocoindex/typing.py,sha256=jZO3meRVL_RsFdhj8Sx6gWF-Z207VhoPtb1ZmqzAnH0,19974
|
|
34
34
|
cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
|
35
35
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
36
36
|
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
37
|
-
cocoindex-0.2.
|
37
|
+
cocoindex-0.2.16.dist-info/RECORD,,
|
@@ -2428,7 +2428,7 @@ Software.
|
|
2428
2428
|
<h3 id="Apache-2.0">Apache License 2.0</h3>
|
2429
2429
|
<h4>Used by:</h4>
|
2430
2430
|
<ul class="license-used-by">
|
2431
|
-
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.
|
2431
|
+
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.16</a></li>
|
2432
2432
|
<li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
|
2433
2433
|
<li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
|
2434
2434
|
</ul>
|
File without changes
|
File without changes
|