cocoindex 0.2.13__cp311-abi3-macosx_11_0_arm64.whl → 0.2.15__cp311-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/__init__.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Cocoindex is a framework for building and running indexing pipelines.
3
3
  """
4
4
 
5
+ from . import _engine # type: ignore
5
6
  from . import functions, sources, targets, cli, utils
6
7
 
7
8
  from . import targets as storages # Deprecated: Use targets instead
@@ -21,7 +22,13 @@ from .flow import add_flow_def, remove_flow # DEPRECATED
21
22
  from .flow import update_all_flows_async, setup_all_flows, drop_all_flows
22
23
  from .lib import settings, init, start_server, stop
23
24
  from .llm import LlmSpec, LlmApiType
24
- from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
25
+ from .index import (
26
+ VectorSimilarityMetric,
27
+ VectorIndexDef,
28
+ IndexOptions,
29
+ HnswVectorIndexMethod,
30
+ IvfFlatVectorIndexMethod,
31
+ )
25
32
  from .setting import DatabaseConnectionSpec, Settings, ServerSettings
26
33
  from .setting import get_app_namespace
27
34
  from .query_handler import QueryHandlerResultFields, QueryInfo, QueryOutput
@@ -36,6 +43,8 @@ from .typing import (
36
43
  Json,
37
44
  )
38
45
 
46
+ _engine.init_pyo3_runtime()
47
+
39
48
  __all__ = [
40
49
  # Submodules
41
50
  "_engine",
@@ -82,6 +91,8 @@ __all__ = [
82
91
  "VectorSimilarityMetric",
83
92
  "VectorIndexDef",
84
93
  "IndexOptions",
94
+ "HnswVectorIndexMethod",
95
+ "IvfFlatVectorIndexMethod",
85
96
  # Settings
86
97
  "DatabaseConnectionSpec",
87
98
  "Settings",
cocoindex/_engine.abi3.so CHANGED
Binary file
cocoindex/functions.py CHANGED
@@ -2,13 +2,13 @@
2
2
 
3
3
  import dataclasses
4
4
  import functools
5
- from typing import Annotated, Any, Literal
5
+ from typing import Any, Literal
6
6
 
7
7
  import numpy as np
8
8
  from numpy.typing import NDArray
9
9
 
10
10
  from . import llm, op
11
- from .typing import TypeAttr, Vector
11
+ from .typing import Vector
12
12
 
13
13
 
14
14
  class ParseJson(op.FunctionSpec):
@@ -40,6 +40,24 @@ class SplitRecursively(op.FunctionSpec):
40
40
  custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
41
41
 
42
42
 
43
+ class SplitBySeparators(op.FunctionSpec):
44
+ """
45
+ Split text by specified regex separators only.
46
+ Output schema matches SplitRecursively for drop-in compatibility:
47
+ KTable rows with fields: location (Range), text (Str), start, end.
48
+ Args:
49
+ separators_regex: list[str] # e.g., [r"\\n\\n+"]
50
+ keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
51
+ include_empty: bool = False
52
+ trim: bool = True
53
+ """
54
+
55
+ separators_regex: list[str] = dataclasses.field(default_factory=list)
56
+ keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
57
+ include_empty: bool = False
58
+ trim: bool = True
59
+
60
+
43
61
  class EmbedText(op.FunctionSpec):
44
62
  """Embed a text into a vector space."""
45
63
 
cocoindex/index.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from enum import Enum
2
2
  from dataclasses import dataclass
3
- from typing import Sequence
3
+ from typing import Sequence, Union
4
4
 
5
5
 
6
6
  class VectorSimilarityMetric(Enum):
@@ -9,6 +9,26 @@ class VectorSimilarityMetric(Enum):
9
9
  INNER_PRODUCT = "InnerProduct"
10
10
 
11
11
 
12
+ @dataclass
13
+ class HnswVectorIndexMethod:
14
+ """HNSW vector index parameters."""
15
+
16
+ kind: str = "Hnsw"
17
+ m: int | None = None
18
+ ef_construction: int | None = None
19
+
20
+
21
+ @dataclass
22
+ class IvfFlatVectorIndexMethod:
23
+ """IVFFlat vector index parameters."""
24
+
25
+ kind: str = "IvfFlat"
26
+ lists: int | None = None
27
+
28
+
29
+ VectorIndexMethod = Union[HnswVectorIndexMethod, IvfFlatVectorIndexMethod]
30
+
31
+
12
32
  @dataclass
13
33
  class VectorIndexDef:
14
34
  """
@@ -17,6 +37,7 @@ class VectorIndexDef:
17
37
 
18
38
  field_name: str
19
39
  metric: VectorSimilarityMetric
40
+ method: VectorIndexMethod | None = None
20
41
 
21
42
 
22
43
  @dataclass
@@ -296,6 +296,12 @@ class _Connector:
296
296
  ) -> _State:
297
297
  if len(key_fields_schema) != 1:
298
298
  raise ValueError("LanceDB only supports a single key field")
299
+ if index_options.vector_indexes is not None:
300
+ for vector_index in index_options.vector_indexes:
301
+ if vector_index.method is not None:
302
+ raise ValueError(
303
+ "Vector index method is not configurable for LanceDB yet"
304
+ )
299
305
  return _State(
300
306
  key_field_schema=key_fields_schema[0],
301
307
  value_fields_schema=value_fields_schema,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.2.13
3
+ Version: 0.2.15
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: License :: OSI Approved :: Apache Software License
6
6
  Classifier: Operating System :: OS Independent
@@ -1,15 +1,15 @@
1
- cocoindex-0.2.13.dist-info/METADATA,sha256=08dUNFSA-_C_GyIUPTzNCV5ss-L-GP8r07ucSnlU7N4,13316
2
- cocoindex-0.2.13.dist-info/WHEEL,sha256=cVaoL47Ex1FxzwnkO_WCjy3a1Wl6mtZbBPTvTiNCHdY,103
3
- cocoindex-0.2.13.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.2.13.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=kOcN_-Ew74skSjMDUYvpRfdbB2pO24VPcbup3dwagb4,717769
5
- cocoindex/__init__.py,sha256=AsoNLBgjJ-1AEKtwTnENJM9dEilHTq6rJ4qWUzGKSdc,2428
6
- cocoindex/_engine.abi3.so,sha256=gKWLgibmJt1uyCgB9SKHVDF9NnjjjKiepnCGwml840U,67174768
1
+ cocoindex-0.2.15.dist-info/METADATA,sha256=3u9PxzHp4NgjbknLyCtuASJeXskfIpODzlIpSyaBcSI,13316
2
+ cocoindex-0.2.15.dist-info/WHEEL,sha256=cVaoL47Ex1FxzwnkO_WCjy3a1Wl6mtZbBPTvTiNCHdY,103
3
+ cocoindex-0.2.15.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.2.15.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=LV2CtczxHQ5mN6Rj9TgnBa06dAxNPXZLXc0hMbco8tU,717769
5
+ cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
6
+ cocoindex/_engine.abi3.so,sha256=fCnSkunJcYsjrtesJqRupKG7o9neU6pQ25vZRHVOjUU,67288912
7
7
  cocoindex/auth_registry.py,sha256=_DOIY42C79joLCY_XczHwP5uebkmSavweoAHc0L3hQY,1334
8
8
  cocoindex/cli.py,sha256=69X30bFTFdM7c0_6lgIHR19CeQ7UEkobEQYihy8IdOQ,21599
9
9
  cocoindex/convert.py,sha256=itkUBCriOk8fdauahHRqJ-L8mnHehNZsBe_FouN0K1Q,28695
10
10
  cocoindex/flow.py,sha256=Vk72dX_svfpinvsolQ11aw6YDqbzaafrAi7xrQHo1i0,39844
11
- cocoindex/functions.py,sha256=09erNt3WbzY9l1KER-akBF2O5-6xEahV2ORBECaL6yk,12260
12
- cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
11
+ cocoindex/functions.py,sha256=V4ljBnCprvA25XlCVvNLwK5ergXiEcKU76jkOGC-X3A,12882
12
+ cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
13
13
  cocoindex/lib.py,sha256=0XheDF7fiFdqExpdqzU-VKun_Zll6DwZ5JfTm7u42aY,2284
14
14
  cocoindex/llm.py,sha256=Pv_cdnRngTLtuLU9AUmS8izIHhcKVnuBNolC33f9BDI,851
15
15
  cocoindex/op.py,sha256=c1xzoiWoPb6PYiCQAUsyzMRJwPvWaC3o5emPk38oY-4,26551
@@ -22,7 +22,7 @@ cocoindex/sources.py,sha256=FYz7cWYasLGDaYoIEQ1dF2uprgUETHWsTIrIS7n6pQE,3188
22
22
  cocoindex/subprocess_exec.py,sha256=r1xO84uek4VP4I6i87JMwsH5xFm3vKW0ABvgn0jskt4,10088
23
23
  cocoindex/targets/__init__.py,sha256=HQG7I4U0xQhHiYctiUvwEBLxT2727oHP3xwrqotjmhk,78
24
24
  cocoindex/targets/_engine_builtin_specs.py,sha256=DM7vyO0pkoukA-aBbvm_J4irgXhXIEqWdp-hwVpVRU4,2800
25
- cocoindex/targets/lancedb.py,sha256=pEHEBCOEe358e14cqAI1SytUTBCgRkYCsv9fJE7biTA,15440
25
+ cocoindex/targets/lancedb.py,sha256=1nzCre5p-fvKkmLOTvfpiLTfnhF3qMLqTvsTwNuGwVU,15749
26
26
  cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  cocoindex/tests/test_convert.py,sha256=pG1AkEdIKSRE0trMV2dQ6VSQEqQJnPTJzfQCVrZSz8w,50791
28
28
  cocoindex/tests/test_load_convert.py,sha256=XHuXhgLcazniEYrhoz_L5RFvgUwu-iy3EB9zgR6p95A,3339
@@ -34,4 +34,4 @@ cocoindex/typing.py,sha256=jZO3meRVL_RsFdhj8Sx6gWF-Z207VhoPtb1ZmqzAnH0,19974
34
34
  cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
35
35
  cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
36
36
  cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
37
- cocoindex-0.2.13.dist-info/RECORD,,
37
+ cocoindex-0.2.15.dist-info/RECORD,,
@@ -2428,7 +2428,7 @@ Software.
2428
2428
  <h3 id="Apache-2.0">Apache License 2.0</h3>
2429
2429
  <h4>Used by:</h4>
2430
2430
  <ul class="license-used-by">
2431
- <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.13</a></li>
2431
+ <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.15</a></li>
2432
2432
  <li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
2433
2433
  <li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
2434
2434
  </ul>