cocoindex 0.2.22__cp311-abi3-macosx_11_0_arm64.whl → 0.3.0__cp311-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.abi3.so +0 -0
- cocoindex/functions/sbert.py +6 -3
- cocoindex/op.py +52 -17
- cocoindex/runtime.py +36 -6
- cocoindex/subprocess_exec.py +1 -27
- {cocoindex-0.2.22.dist-info → cocoindex-0.3.0.dist-info}/METADATA +1 -1
- {cocoindex-0.2.22.dist-info → cocoindex-0.3.0.dist-info}/RECORD +10 -10
- {cocoindex-0.2.22.dist-info → cocoindex-0.3.0.dist-info}/licenses/THIRD_PARTY_NOTICES.html +1 -1
- {cocoindex-0.2.22.dist-info → cocoindex-0.3.0.dist-info}/WHEEL +0 -0
- {cocoindex-0.2.22.dist-info → cocoindex-0.3.0.dist-info}/entry_points.txt +0 -0
cocoindex/_engine.abi3.so
CHANGED
|
Binary file
|
cocoindex/functions/sbert.py
CHANGED
|
@@ -31,6 +31,7 @@ class SentenceTransformerEmbed(op.FunctionSpec):
|
|
|
31
31
|
@op.executor_class(
|
|
32
32
|
gpu=True,
|
|
33
33
|
cache=True,
|
|
34
|
+
batching=True,
|
|
34
35
|
behavior_version=1,
|
|
35
36
|
arg_relationship=(op.ArgRelationship.EMBEDDING_ORIGIN_TEXT, "text"),
|
|
36
37
|
)
|
|
@@ -57,7 +58,9 @@ class SentenceTransformerEmbedExecutor:
|
|
|
57
58
|
dim = self._model.get_sentence_embedding_dimension()
|
|
58
59
|
return Vector[np.float32, Literal[dim]] # type: ignore
|
|
59
60
|
|
|
60
|
-
def __call__(self, text: str) -> NDArray[np.float32]:
|
|
61
|
+
def __call__(self, text: list[str]) -> list[NDArray[np.float32]]:
|
|
61
62
|
assert self._model is not None
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
results: list[NDArray[np.float32]] = self._model.encode(
|
|
64
|
+
text, convert_to_numpy=True
|
|
65
|
+
)
|
|
66
|
+
return results
|
cocoindex/op.py
CHANGED
|
@@ -32,6 +32,7 @@ from .engine_value import (
|
|
|
32
32
|
)
|
|
33
33
|
from .typing import (
|
|
34
34
|
KEY_FIELD_NAME,
|
|
35
|
+
AnalyzedListType,
|
|
35
36
|
AnalyzedTypeInfo,
|
|
36
37
|
StructSchema,
|
|
37
38
|
StructType,
|
|
@@ -45,6 +46,7 @@ from .typing import (
|
|
|
45
46
|
EnrichedValueType,
|
|
46
47
|
decode_engine_field_schemas,
|
|
47
48
|
FieldSchema,
|
|
49
|
+
ValueType,
|
|
48
50
|
)
|
|
49
51
|
from .runtime import to_async_call
|
|
50
52
|
from .index import IndexOptions
|
|
@@ -149,6 +151,7 @@ class OpArgs:
|
|
|
149
151
|
"""
|
|
150
152
|
- gpu: Whether the executor will be executed on GPU.
|
|
151
153
|
- cache: Whether the executor will be cached.
|
|
154
|
+
- batching: Whether the executor will be batched.
|
|
152
155
|
- behavior_version: The behavior version of the executor. Cache will be invalidated if it
|
|
153
156
|
changes. Must be provided if `cache` is True.
|
|
154
157
|
- arg_relationship: It specifies the relationship between an input argument and the output,
|
|
@@ -158,6 +161,7 @@ class OpArgs:
|
|
|
158
161
|
|
|
159
162
|
gpu: bool = False
|
|
160
163
|
cache: bool = False
|
|
164
|
+
batching: bool = False
|
|
161
165
|
behavior_version: int | None = None
|
|
162
166
|
arg_relationship: tuple[ArgRelationship, str] | None = None
|
|
163
167
|
|
|
@@ -168,6 +172,16 @@ class _ArgInfo:
|
|
|
168
172
|
is_required: bool
|
|
169
173
|
|
|
170
174
|
|
|
175
|
+
def _make_batched_engine_value_decoder(
|
|
176
|
+
field_path: list[str], src_type: ValueType, dst_type_info: AnalyzedTypeInfo
|
|
177
|
+
) -> Callable[[Any], Any]:
|
|
178
|
+
if not isinstance(dst_type_info.variant, AnalyzedListType):
|
|
179
|
+
raise ValueError("Expected arguments for batching function to be a list type")
|
|
180
|
+
elem_type_info = analyze_type_info(dst_type_info.variant.elem_type)
|
|
181
|
+
base_decoder = make_engine_value_decoder(field_path, src_type, elem_type_info)
|
|
182
|
+
return lambda value: [base_decoder(v) for v in value]
|
|
183
|
+
|
|
184
|
+
|
|
171
185
|
def _register_op_factory(
|
|
172
186
|
category: OpCategory,
|
|
173
187
|
expected_args: list[tuple[str, inspect.Parameter]],
|
|
@@ -181,6 +195,10 @@ def _register_op_factory(
|
|
|
181
195
|
Register an op factory.
|
|
182
196
|
"""
|
|
183
197
|
|
|
198
|
+
if op_args.batching:
|
|
199
|
+
if len(expected_args) != 1:
|
|
200
|
+
raise ValueError("Batching is only supported for single argument functions")
|
|
201
|
+
|
|
184
202
|
class _WrappedExecutor:
|
|
185
203
|
_executor: Any
|
|
186
204
|
_args_info: list[_ArgInfo]
|
|
@@ -208,7 +226,7 @@ def _register_op_factory(
|
|
|
208
226
|
"""
|
|
209
227
|
self._args_info = []
|
|
210
228
|
self._kwargs_info = {}
|
|
211
|
-
attributes =
|
|
229
|
+
attributes = {}
|
|
212
230
|
potentially_missing_required_arg = False
|
|
213
231
|
|
|
214
232
|
def process_arg(
|
|
@@ -220,14 +238,17 @@ def _register_op_factory(
|
|
|
220
238
|
if op_args.arg_relationship is not None:
|
|
221
239
|
related_attr, related_arg_name = op_args.arg_relationship
|
|
222
240
|
if related_arg_name == arg_name:
|
|
223
|
-
attributes.
|
|
224
|
-
TypeAttr(related_attr.value, actual_arg.analyzed_value)
|
|
225
|
-
)
|
|
241
|
+
attributes[related_attr.value] = actual_arg.analyzed_value
|
|
226
242
|
type_info = analyze_type_info(arg_param.annotation)
|
|
227
243
|
enriched = EnrichedValueType.decode(actual_arg.value_type)
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
244
|
+
if op_args.batching:
|
|
245
|
+
decoder = _make_batched_engine_value_decoder(
|
|
246
|
+
[arg_name], enriched.type, type_info
|
|
247
|
+
)
|
|
248
|
+
else:
|
|
249
|
+
decoder = make_engine_value_decoder(
|
|
250
|
+
[arg_name], enriched.type, type_info
|
|
251
|
+
)
|
|
231
252
|
is_required = not type_info.nullable
|
|
232
253
|
if is_required and actual_arg.value_type.get("nullable", False):
|
|
233
254
|
potentially_missing_required_arg = True
|
|
@@ -302,20 +323,32 @@ def _register_op_factory(
|
|
|
302
323
|
if len(missing_args) > 0:
|
|
303
324
|
raise ValueError(f"Missing arguments: {', '.join(missing_args)}")
|
|
304
325
|
|
|
326
|
+
analyzed_expected_return_type = analyze_type_info(expected_return)
|
|
327
|
+
self._result_encoder = make_engine_value_encoder(
|
|
328
|
+
analyzed_expected_return_type
|
|
329
|
+
)
|
|
330
|
+
|
|
305
331
|
base_analyze_method = getattr(self._executor, "analyze", None)
|
|
306
332
|
if base_analyze_method is not None:
|
|
307
|
-
|
|
333
|
+
analyzed_result_type = analyze_type_info(base_analyze_method())
|
|
308
334
|
else:
|
|
309
|
-
|
|
335
|
+
if op_args.batching:
|
|
336
|
+
if not isinstance(
|
|
337
|
+
analyzed_expected_return_type.variant, AnalyzedListType
|
|
338
|
+
):
|
|
339
|
+
raise ValueError(
|
|
340
|
+
"Expected return type for batching function to be a list type"
|
|
341
|
+
)
|
|
342
|
+
analyzed_result_type = analyze_type_info(
|
|
343
|
+
analyzed_expected_return_type.variant.elem_type
|
|
344
|
+
)
|
|
345
|
+
else:
|
|
346
|
+
analyzed_result_type = analyzed_expected_return_type
|
|
310
347
|
if len(attributes) > 0:
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
analyzed_result_type_info = analyze_type_info(result_type)
|
|
314
|
-
encoded_type = encode_enriched_type_info(analyzed_result_type_info)
|
|
348
|
+
analyzed_result_type.attrs = attributes
|
|
315
349
|
if potentially_missing_required_arg:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
self._result_encoder = make_engine_value_encoder(analyzed_result_type_info)
|
|
350
|
+
analyzed_result_type.nullable = True
|
|
351
|
+
encoded_type = encode_enriched_type_info(analyzed_result_type)
|
|
319
352
|
|
|
320
353
|
return encoded_type
|
|
321
354
|
|
|
@@ -359,7 +392,9 @@ def _register_op_factory(
|
|
|
359
392
|
|
|
360
393
|
if category == OpCategory.FUNCTION:
|
|
361
394
|
_engine.register_function_factory(
|
|
362
|
-
op_kind,
|
|
395
|
+
op_kind,
|
|
396
|
+
_EngineFunctionExecutorFactory(spec_loader, _WrappedExecutor),
|
|
397
|
+
op_args.batching,
|
|
363
398
|
)
|
|
364
399
|
else:
|
|
365
400
|
raise ValueError(f"Unsupported executor type {category}")
|
cocoindex/runtime.py
CHANGED
|
@@ -6,8 +6,9 @@ manner.
|
|
|
6
6
|
import threading
|
|
7
7
|
import asyncio
|
|
8
8
|
import inspect
|
|
9
|
-
|
|
9
|
+
import warnings
|
|
10
10
|
|
|
11
|
+
from typing import Any, Callable, Awaitable, TypeVar, Coroutine
|
|
11
12
|
|
|
12
13
|
T = TypeVar("T")
|
|
13
14
|
|
|
@@ -24,15 +25,44 @@ class _ExecutionContext:
|
|
|
24
25
|
"""Get the event loop for the cocoindex library."""
|
|
25
26
|
with self._lock:
|
|
26
27
|
if self._event_loop is None:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
loop = asyncio.new_event_loop()
|
|
29
|
+
self._event_loop = loop
|
|
30
|
+
|
|
31
|
+
def _runner(l: asyncio.AbstractEventLoop) -> None:
|
|
32
|
+
asyncio.set_event_loop(l)
|
|
33
|
+
l.run_forever()
|
|
34
|
+
|
|
35
|
+
threading.Thread(target=_runner, args=(loop,), daemon=True).start()
|
|
31
36
|
return self._event_loop
|
|
32
37
|
|
|
33
38
|
def run(self, coro: Coroutine[Any, Any, T]) -> T:
|
|
34
39
|
"""Run a coroutine in the event loop, blocking until it finishes. Return its result."""
|
|
35
|
-
|
|
40
|
+
try:
|
|
41
|
+
running_loop = asyncio.get_running_loop()
|
|
42
|
+
except RuntimeError:
|
|
43
|
+
running_loop = None
|
|
44
|
+
|
|
45
|
+
loop = self.event_loop
|
|
46
|
+
|
|
47
|
+
if running_loop is not None:
|
|
48
|
+
if running_loop is loop:
|
|
49
|
+
raise RuntimeError(
|
|
50
|
+
"CocoIndex sync API was called from inside CocoIndex's async context. "
|
|
51
|
+
"Use the async variant of this method instead."
|
|
52
|
+
)
|
|
53
|
+
warnings.warn(
|
|
54
|
+
"CocoIndex sync API was called inside an existing event loop. "
|
|
55
|
+
"This may block other tasks. Prefer the async method.",
|
|
56
|
+
RuntimeWarning,
|
|
57
|
+
stacklevel=2,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
fut = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
61
|
+
try:
|
|
62
|
+
return fut.result()
|
|
63
|
+
except KeyboardInterrupt:
|
|
64
|
+
fut.cancel()
|
|
65
|
+
raise
|
|
36
66
|
|
|
37
67
|
|
|
38
68
|
execution_context = _ExecutionContext()
|
cocoindex/subprocess_exec.py
CHANGED
|
@@ -19,7 +19,6 @@ import threading
|
|
|
19
19
|
import asyncio
|
|
20
20
|
import os
|
|
21
21
|
import time
|
|
22
|
-
import atexit
|
|
23
22
|
from .user_app_loader import load_user_app
|
|
24
23
|
from .runtime import execution_context
|
|
25
24
|
import logging
|
|
@@ -32,39 +31,14 @@ WATCHDOG_INTERVAL_SECONDS = 10.0
|
|
|
32
31
|
# ---------------------------------------------
|
|
33
32
|
_pool_lock = threading.Lock()
|
|
34
33
|
_pool: ProcessPoolExecutor | None = None
|
|
35
|
-
_pool_cleanup_registered = False
|
|
36
34
|
_user_apps: list[str] = []
|
|
37
35
|
_logger = logging.getLogger(__name__)
|
|
38
36
|
|
|
39
37
|
|
|
40
|
-
def shutdown_pool_at_exit() -> None:
|
|
41
|
-
"""Best-effort shutdown of the global ProcessPoolExecutor on interpreter exit."""
|
|
42
|
-
global _pool, _pool_cleanup_registered # pylint: disable=global-statement
|
|
43
|
-
with _pool_lock:
|
|
44
|
-
if _pool is not None:
|
|
45
|
-
try:
|
|
46
|
-
_pool.shutdown(wait=True, cancel_futures=True)
|
|
47
|
-
except Exception as e:
|
|
48
|
-
_logger.error(
|
|
49
|
-
"Error during ProcessPoolExecutor shutdown at exit: %s",
|
|
50
|
-
e,
|
|
51
|
-
exc_info=True,
|
|
52
|
-
)
|
|
53
|
-
finally:
|
|
54
|
-
_pool = None
|
|
55
|
-
_pool_cleanup_registered = False
|
|
56
|
-
|
|
57
|
-
|
|
58
38
|
def _get_pool() -> ProcessPoolExecutor:
|
|
59
|
-
global _pool
|
|
39
|
+
global _pool # pylint: disable=global-statement
|
|
60
40
|
with _pool_lock:
|
|
61
41
|
if _pool is None:
|
|
62
|
-
if not _pool_cleanup_registered:
|
|
63
|
-
# Register the shutdown at exit at creation time (rather than at import time)
|
|
64
|
-
# to make sure it's executed earlier in the shutdown sequence.
|
|
65
|
-
atexit.register(shutdown_pool_at_exit)
|
|
66
|
-
_pool_cleanup_registered = True
|
|
67
|
-
|
|
68
42
|
# Single worker process as requested
|
|
69
43
|
_pool = ProcessPoolExecutor(
|
|
70
44
|
max_workers=1,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
cocoindex-0.
|
|
2
|
-
cocoindex-0.
|
|
3
|
-
cocoindex-0.
|
|
4
|
-
cocoindex-0.
|
|
1
|
+
cocoindex-0.3.0.dist-info/METADATA,sha256=vMA1fqUWkDkxXgNT76l8pOBEqgt-Cu8XEpC4BPCbzp8,14193
|
|
2
|
+
cocoindex-0.3.0.dist-info/WHEEL,sha256=kelVgS1CONjYuT6OEIC92ofaP9Aiw74EQCpLzMjs2Gg,103
|
|
3
|
+
cocoindex-0.3.0.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
|
4
|
+
cocoindex-0.3.0.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=VRDb6qOsN808v5fkXRUNfGUqSvD_OJWjO6hD3uWfKFg,750831
|
|
5
5
|
cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
|
|
6
|
-
cocoindex/_engine.abi3.so,sha256=
|
|
6
|
+
cocoindex/_engine.abi3.so,sha256=BuR-KcjvCA1J9eydM4PJDukz-Qf2BN6u6nG2yAR0Giw,67325168
|
|
7
7
|
cocoindex/auth_registry.py,sha256=g-uLDWLYW5NMbYe7q4Y-sU5dSyrlJXBEciyWtAiP9KE,1340
|
|
8
8
|
cocoindex/cli.py,sha256=k7bl8RTUZoNNxTlQMr-Y3-9-rTNt8z1v7rJWqsajYC8,24792
|
|
9
9
|
cocoindex/engine_object.py,sha256=5YTuWoR3WILhyt3PW-d9es3MAas_xD6tZZqvipN-sjg,10050
|
|
@@ -12,19 +12,19 @@ cocoindex/flow.py,sha256=xDz3rOo4RhbboknvC-KnbWq8RBykEO0YsjGSBfXqIEg,40076
|
|
|
12
12
|
cocoindex/functions/__init__.py,sha256=V2IF4h-Cqq4OD_GN3Oqdry-FArORyRCKmqJ7g5UlJr8,1021
|
|
13
13
|
cocoindex/functions/_engine_builtin_specs.py,sha256=WpCGrjUfJBa8xZP5JiEmA8kLu7fp9Rcs7ynpuJmvSGg,1786
|
|
14
14
|
cocoindex/functions/colpali.py,sha256=oACyG3qG2dquyCJ6bT7FkMkua5rXDLSxnOHcgoz9waU,8865
|
|
15
|
-
cocoindex/functions/sbert.py,sha256=
|
|
15
|
+
cocoindex/functions/sbert.py,sha256=o_DS1ZAqpNc4u1Yrm9DO5LxfMFrlH_hfb0MWobJrs_k,2223
|
|
16
16
|
cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
|
|
17
17
|
cocoindex/lib.py,sha256=spfdU4IbzdffHyGdrQPIw_qGo9aX0OAAboqsjj8bTiQ,2290
|
|
18
18
|
cocoindex/llm.py,sha256=8ZdJhOmhdb2xEcCxk6rDpnj6hlhCyFBmJdhCNMqAOP4,875
|
|
19
|
-
cocoindex/op.py,sha256=
|
|
19
|
+
cocoindex/op.py,sha256=FtYHn3RPBWSBAAaFbmp-FhLbs0qrMMd3lZdheM-udUo,37672
|
|
20
20
|
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
cocoindex/query_handler.py,sha256=X-SQT71LHiOOXn6-TJlQcGodJk-iT8p_1TcIMvRLBRI,1344
|
|
22
|
-
cocoindex/runtime.py,sha256=
|
|
22
|
+
cocoindex/runtime.py,sha256=Qig--_3Uwvcw-MuuEdBg-P92IYB6So_nRocD2jnEi3g,2298
|
|
23
23
|
cocoindex/setting.py,sha256=1Dx8ktjwf-8BiXrbsmfn5Mzudb2SQYqFdRnSNGVKaLk,4960
|
|
24
24
|
cocoindex/setup.py,sha256=7uIHKN4FOCuoidPXcKyGTrkqpkl9luL49-6UcnMxYzw,3068
|
|
25
25
|
cocoindex/sources/__init__.py,sha256=Yu9VHNaGlOEE3jpqfIseswsg25Le3HzwDr6XJAn22Ns,78
|
|
26
26
|
cocoindex/sources/_engine_builtin_specs.py,sha256=MDsNP1y0vlUCGTAmA4jj2X8AMn1MVATbZzSi66ggvkA,3598
|
|
27
|
-
cocoindex/subprocess_exec.py,sha256=
|
|
27
|
+
cocoindex/subprocess_exec.py,sha256=41PyvMaZ9q7B-F0YTUg1xdNVM_6_HdFErDbxDJMUFPg,9040
|
|
28
28
|
cocoindex/targets/__init__.py,sha256=HQG7I4U0xQhHiYctiUvwEBLxT2727oHP3xwrqotjmhk,78
|
|
29
29
|
cocoindex/targets/_engine_builtin_specs.py,sha256=glXUN5bj11Jxky1VPvmGnWnMHXTQWEh08INcbldo3F4,3375
|
|
30
30
|
cocoindex/targets/lancedb.py,sha256=1nzCre5p-fvKkmLOTvfpiLTfnhF3qMLqTvsTwNuGwVU,15749
|
|
@@ -39,4 +39,4 @@ cocoindex/typing.py,sha256=qQj5uM6XAKHzRJ2BIEs7X-xeOXVcM9p_xz5SVqPVvS8,23914
|
|
|
39
39
|
cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
|
|
40
40
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
|
41
41
|
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
|
42
|
-
cocoindex-0.
|
|
42
|
+
cocoindex-0.3.0.dist-info/RECORD,,
|
|
@@ -2846,7 +2846,7 @@ Software.
|
|
|
2846
2846
|
<h3 id="Apache-2.0">Apache License 2.0</h3>
|
|
2847
2847
|
<h4>Used by:</h4>
|
|
2848
2848
|
<ul class="license-used-by">
|
|
2849
|
-
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.
|
|
2849
|
+
<li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.0</a></li>
|
|
2850
2850
|
<li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
|
|
2851
2851
|
<li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
|
|
2852
2852
|
</ul>
|
|
File without changes
|
|
File without changes
|