cocoindex 0.2.13__cp311-abi3-macosx_10_12_x86_64.whl → 0.2.15__cp311-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +12 -1
- cocoindex/_engine.abi3.so +0 -0
- cocoindex/functions.py +20 -2
- cocoindex/index.py +22 -1
- cocoindex/targets/lancedb.py +6 -0
- {cocoindex-0.2.13.dist-info → cocoindex-0.2.15.dist-info}/METADATA +1 -1
- {cocoindex-0.2.13.dist-info → cocoindex-0.2.15.dist-info}/RECORD +10 -10
- {cocoindex-0.2.13.dist-info → cocoindex-0.2.15.dist-info}/licenses/THIRD_PARTY_NOTICES.html +1 -1
- {cocoindex-0.2.13.dist-info → cocoindex-0.2.15.dist-info}/WHEEL +0 -0
- {cocoindex-0.2.13.dist-info → cocoindex-0.2.15.dist-info}/entry_points.txt +0 -0
cocoindex/__init__.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
|
5
|
+
from . import _engine # type: ignore
|
5
6
|
from . import functions, sources, targets, cli, utils
|
6
7
|
|
7
8
|
from . import targets as storages # Deprecated: Use targets instead
|
@@ -21,7 +22,13 @@ from .flow import add_flow_def, remove_flow # DEPRECATED
|
|
21
22
|
from .flow import update_all_flows_async, setup_all_flows, drop_all_flows
|
22
23
|
from .lib import settings, init, start_server, stop
|
23
24
|
from .llm import LlmSpec, LlmApiType
|
24
|
-
from .index import
|
25
|
+
from .index import (
|
26
|
+
VectorSimilarityMetric,
|
27
|
+
VectorIndexDef,
|
28
|
+
IndexOptions,
|
29
|
+
HnswVectorIndexMethod,
|
30
|
+
IvfFlatVectorIndexMethod,
|
31
|
+
)
|
25
32
|
from .setting import DatabaseConnectionSpec, Settings, ServerSettings
|
26
33
|
from .setting import get_app_namespace
|
27
34
|
from .query_handler import QueryHandlerResultFields, QueryInfo, QueryOutput
|
@@ -36,6 +43,8 @@ from .typing import (
|
|
36
43
|
Json,
|
37
44
|
)
|
38
45
|
|
46
|
+
_engine.init_pyo3_runtime()
|
47
|
+
|
39
48
|
__all__ = [
|
40
49
|
# Submodules
|
41
50
|
"_engine",
|
@@ -82,6 +91,8 @@ __all__ = [
|
|
82
91
|
"VectorSimilarityMetric",
|
83
92
|
"VectorIndexDef",
|
84
93
|
"IndexOptions",
|
94
|
+
"HnswVectorIndexMethod",
|
95
|
+
"IvfFlatVectorIndexMethod",
|
85
96
|
# Settings
|
86
97
|
"DatabaseConnectionSpec",
|
87
98
|
"Settings",
|
cocoindex/_engine.abi3.so
CHANGED
Binary file
|
cocoindex/functions.py
CHANGED
@@ -2,13 +2,13 @@
|
|
2
2
|
|
3
3
|
import dataclasses
|
4
4
|
import functools
|
5
|
-
from typing import
|
5
|
+
from typing import Any, Literal
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
10
|
from . import llm, op
|
11
|
-
from .typing import
|
11
|
+
from .typing import Vector
|
12
12
|
|
13
13
|
|
14
14
|
class ParseJson(op.FunctionSpec):
|
@@ -40,6 +40,24 @@ class SplitRecursively(op.FunctionSpec):
|
|
40
40
|
custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
|
41
41
|
|
42
42
|
|
43
|
+
class SplitBySeparators(op.FunctionSpec):
|
44
|
+
"""
|
45
|
+
Split text by specified regex separators only.
|
46
|
+
Output schema matches SplitRecursively for drop-in compatibility:
|
47
|
+
KTable rows with fields: location (Range), text (Str), start, end.
|
48
|
+
Args:
|
49
|
+
separators_regex: list[str] # e.g., [r"\\n\\n+"]
|
50
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
51
|
+
include_empty: bool = False
|
52
|
+
trim: bool = True
|
53
|
+
"""
|
54
|
+
|
55
|
+
separators_regex: list[str] = dataclasses.field(default_factory=list)
|
56
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
57
|
+
include_empty: bool = False
|
58
|
+
trim: bool = True
|
59
|
+
|
60
|
+
|
43
61
|
class EmbedText(op.FunctionSpec):
|
44
62
|
"""Embed a text into a vector space."""
|
45
63
|
|
cocoindex/index.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from enum import Enum
|
2
2
|
from dataclasses import dataclass
|
3
|
-
from typing import Sequence
|
3
|
+
from typing import Sequence, Union
|
4
4
|
|
5
5
|
|
6
6
|
class VectorSimilarityMetric(Enum):
|
@@ -9,6 +9,26 @@ class VectorSimilarityMetric(Enum):
|
|
9
9
|
INNER_PRODUCT = "InnerProduct"
|
10
10
|
|
11
11
|
|
12
|
+
@dataclass
|
13
|
+
class HnswVectorIndexMethod:
|
14
|
+
"""HNSW vector index parameters."""
|
15
|
+
|
16
|
+
kind: str = "Hnsw"
|
17
|
+
m: int | None = None
|
18
|
+
ef_construction: int | None = None
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class IvfFlatVectorIndexMethod:
|
23
|
+
"""IVFFlat vector index parameters."""
|
24
|
+
|
25
|
+
kind: str = "IvfFlat"
|
26
|
+
lists: int | None = None
|
27
|
+
|
28
|
+
|
29
|
+
VectorIndexMethod = Union[HnswVectorIndexMethod, IvfFlatVectorIndexMethod]
|
30
|
+
|
31
|
+
|
12
32
|
@dataclass
|
13
33
|
class VectorIndexDef:
|
14
34
|
"""
|
@@ -17,6 +37,7 @@ class VectorIndexDef:
|
|
17
37
|
|
18
38
|
field_name: str
|
19
39
|
metric: VectorSimilarityMetric
|
40
|
+
method: VectorIndexMethod | None = None
|
20
41
|
|
21
42
|
|
22
43
|
@dataclass
|
cocoindex/targets/lancedb.py
CHANGED
@@ -296,6 +296,12 @@ class _Connector:
|
|
296
296
|
) -> _State:
|
297
297
|
if len(key_fields_schema) != 1:
|
298
298
|
raise ValueError("LanceDB only supports a single key field")
|
299
|
+
if index_options.vector_indexes is not None:
|
300
|
+
for vector_index in index_options.vector_indexes:
|
301
|
+
if vector_index.method is not None:
|
302
|
+
raise ValueError(
|
303
|
+
"Vector index method is not configurable for LanceDB yet"
|
304
|
+
)
|
299
305
|
return _State(
|
300
306
|
key_field_schema=key_fields_schema[0],
|
301
307
|
value_fields_schema=value_fields_schema,
|
@@ -1,15 +1,15 @@
|
|
1
|
-
cocoindex-0.2.
|
2
|
-
cocoindex-0.2.
|
3
|
-
cocoindex-0.2.
|
4
|
-
cocoindex-0.2.
|
5
|
-
cocoindex/__init__.py,sha256=
|
6
|
-
cocoindex/_engine.abi3.so,sha256=
|
1
|
+
cocoindex-0.2.15.dist-info/METADATA,sha256=3u9PxzHp4NgjbknLyCtuASJeXskfIpODzlIpSyaBcSI,13316
|
2
|
+
cocoindex-0.2.15.dist-info/WHEEL,sha256=N8W3-0eDM6igWj-H12r7VkxoMaJIqJLxUyWCFstEaGg,105
|
3
|
+
cocoindex-0.2.15.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.2.15.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=LV2CtczxHQ5mN6Rj9TgnBa06dAxNPXZLXc0hMbco8tU,717769
|
5
|
+
cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
|
6
|
+
cocoindex/_engine.abi3.so,sha256=RED4cYZkRb3GsTn5IN_Mp_XkdYtlvY6G3Vp7b9836jI,70194300
|
7
7
|
cocoindex/auth_registry.py,sha256=_DOIY42C79joLCY_XczHwP5uebkmSavweoAHc0L3hQY,1334
|
8
8
|
cocoindex/cli.py,sha256=69X30bFTFdM7c0_6lgIHR19CeQ7UEkobEQYihy8IdOQ,21599
|
9
9
|
cocoindex/convert.py,sha256=itkUBCriOk8fdauahHRqJ-L8mnHehNZsBe_FouN0K1Q,28695
|
10
10
|
cocoindex/flow.py,sha256=Vk72dX_svfpinvsolQ11aw6YDqbzaafrAi7xrQHo1i0,39844
|
11
|
-
cocoindex/functions.py,sha256=
|
12
|
-
cocoindex/index.py,sha256=
|
11
|
+
cocoindex/functions.py,sha256=V4ljBnCprvA25XlCVvNLwK5ergXiEcKU76jkOGC-X3A,12882
|
12
|
+
cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
|
13
13
|
cocoindex/lib.py,sha256=0XheDF7fiFdqExpdqzU-VKun_Zll6DwZ5JfTm7u42aY,2284
|
14
14
|
cocoindex/llm.py,sha256=Pv_cdnRngTLtuLU9AUmS8izIHhcKVnuBNolC33f9BDI,851
|
15
15
|
cocoindex/op.py,sha256=c1xzoiWoPb6PYiCQAUsyzMRJwPvWaC3o5emPk38oY-4,26551
|
@@ -22,7 +22,7 @@ cocoindex/sources.py,sha256=FYz7cWYasLGDaYoIEQ1dF2uprgUETHWsTIrIS7n6pQE,3188
|
|
22
22
|
cocoindex/subprocess_exec.py,sha256=r1xO84uek4VP4I6i87JMwsH5xFm3vKW0ABvgn0jskt4,10088
|
23
23
|
cocoindex/targets/__init__.py,sha256=HQG7I4U0xQhHiYctiUvwEBLxT2727oHP3xwrqotjmhk,78
|
24
24
|
cocoindex/targets/_engine_builtin_specs.py,sha256=DM7vyO0pkoukA-aBbvm_J4irgXhXIEqWdp-hwVpVRU4,2800
|
25
|
-
cocoindex/targets/lancedb.py,sha256=
|
25
|
+
cocoindex/targets/lancedb.py,sha256=1nzCre5p-fvKkmLOTvfpiLTfnhF3qMLqTvsTwNuGwVU,15749
|
26
26
|
cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
27
|
cocoindex/tests/test_convert.py,sha256=pG1AkEdIKSRE0trMV2dQ6VSQEqQJnPTJzfQCVrZSz8w,50791
|
28
28
|
cocoindex/tests/test_load_convert.py,sha256=XHuXhgLcazniEYrhoz_L5RFvgUwu-iy3EB9zgR6p95A,3339
|
@@ -34,4 +34,4 @@ cocoindex/typing.py,sha256=jZO3meRVL_RsFdhj8Sx6gWF-Z207VhoPtb1ZmqzAnH0,19974
|
|
34
34
|
cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
|
35
35
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
36
36
|
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
37
|
-
cocoindex-0.2.
|
37
|
+
cocoindex-0.2.15.dist-info/RECORD,,
|
@@ -2428,7 +2428,7 @@ Software.
|
|
2428
2428
|
<h3 id="Apache-2.0">Apache License 2.0</h3>
|
2429
2429
|
<h4>Used by:</h4>
|
2430
2430
|
<ul class="license-used-by">
|
2431
|
-
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.
|
2431
|
+
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.15</a></li>
|
2432
2432
|
<li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
|
2433
2433
|
<li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
|
2434
2434
|
</ul>
|
File without changes
|
File without changes
|