cocoindex 0.2.23__cp311-abi3-macosx_10_12_x86_64.whl → 0.3.0__cp311-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/_engine.abi3.so CHANGED
Binary file
@@ -31,6 +31,7 @@ class SentenceTransformerEmbed(op.FunctionSpec):
31
31
  @op.executor_class(
32
32
  gpu=True,
33
33
  cache=True,
34
+ batching=True,
34
35
  behavior_version=1,
35
36
  arg_relationship=(op.ArgRelationship.EMBEDDING_ORIGIN_TEXT, "text"),
36
37
  )
@@ -57,7 +58,9 @@ class SentenceTransformerEmbedExecutor:
57
58
  dim = self._model.get_sentence_embedding_dimension()
58
59
  return Vector[np.float32, Literal[dim]] # type: ignore
59
60
 
60
- def __call__(self, text: str) -> NDArray[np.float32]:
61
+ def __call__(self, text: list[str]) -> list[NDArray[np.float32]]:
61
62
  assert self._model is not None
62
- result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
63
- return result
63
+ results: list[NDArray[np.float32]] = self._model.encode(
64
+ text, convert_to_numpy=True
65
+ )
66
+ return results
cocoindex/op.py CHANGED
@@ -32,6 +32,7 @@ from .engine_value import (
32
32
  )
33
33
  from .typing import (
34
34
  KEY_FIELD_NAME,
35
+ AnalyzedListType,
35
36
  AnalyzedTypeInfo,
36
37
  StructSchema,
37
38
  StructType,
@@ -45,6 +46,7 @@ from .typing import (
45
46
  EnrichedValueType,
46
47
  decode_engine_field_schemas,
47
48
  FieldSchema,
49
+ ValueType,
48
50
  )
49
51
  from .runtime import to_async_call
50
52
  from .index import IndexOptions
@@ -149,6 +151,7 @@ class OpArgs:
149
151
  """
150
152
  - gpu: Whether the executor will be executed on GPU.
151
153
  - cache: Whether the executor will be cached.
154
+ - batching: Whether the executor will be batched.
152
155
  - behavior_version: The behavior version of the executor. Cache will be invalidated if it
153
156
  changes. Must be provided if `cache` is True.
154
157
  - arg_relationship: It specifies the relationship between an input argument and the output,
@@ -158,6 +161,7 @@ class OpArgs:
158
161
 
159
162
  gpu: bool = False
160
163
  cache: bool = False
164
+ batching: bool = False
161
165
  behavior_version: int | None = None
162
166
  arg_relationship: tuple[ArgRelationship, str] | None = None
163
167
 
@@ -168,6 +172,16 @@ class _ArgInfo:
168
172
  is_required: bool
169
173
 
170
174
 
175
+ def _make_batched_engine_value_decoder(
176
+ field_path: list[str], src_type: ValueType, dst_type_info: AnalyzedTypeInfo
177
+ ) -> Callable[[Any], Any]:
178
+ if not isinstance(dst_type_info.variant, AnalyzedListType):
179
+ raise ValueError("Expected arguments for batching function to be a list type")
180
+ elem_type_info = analyze_type_info(dst_type_info.variant.elem_type)
181
+ base_decoder = make_engine_value_decoder(field_path, src_type, elem_type_info)
182
+ return lambda value: [base_decoder(v) for v in value]
183
+
184
+
171
185
  def _register_op_factory(
172
186
  category: OpCategory,
173
187
  expected_args: list[tuple[str, inspect.Parameter]],
@@ -181,6 +195,10 @@ def _register_op_factory(
181
195
  Register an op factory.
182
196
  """
183
197
 
198
+ if op_args.batching:
199
+ if len(expected_args) != 1:
200
+ raise ValueError("Batching is only supported for single argument functions")
201
+
184
202
  class _WrappedExecutor:
185
203
  _executor: Any
186
204
  _args_info: list[_ArgInfo]
@@ -208,7 +226,7 @@ def _register_op_factory(
208
226
  """
209
227
  self._args_info = []
210
228
  self._kwargs_info = {}
211
- attributes = []
229
+ attributes = {}
212
230
  potentially_missing_required_arg = False
213
231
 
214
232
  def process_arg(
@@ -220,14 +238,17 @@ def _register_op_factory(
220
238
  if op_args.arg_relationship is not None:
221
239
  related_attr, related_arg_name = op_args.arg_relationship
222
240
  if related_arg_name == arg_name:
223
- attributes.append(
224
- TypeAttr(related_attr.value, actual_arg.analyzed_value)
225
- )
241
+ attributes[related_attr.value] = actual_arg.analyzed_value
226
242
  type_info = analyze_type_info(arg_param.annotation)
227
243
  enriched = EnrichedValueType.decode(actual_arg.value_type)
228
- decoder = make_engine_value_decoder(
229
- [arg_name], enriched.type, type_info
230
- )
244
+ if op_args.batching:
245
+ decoder = _make_batched_engine_value_decoder(
246
+ [arg_name], enriched.type, type_info
247
+ )
248
+ else:
249
+ decoder = make_engine_value_decoder(
250
+ [arg_name], enriched.type, type_info
251
+ )
231
252
  is_required = not type_info.nullable
232
253
  if is_required and actual_arg.value_type.get("nullable", False):
233
254
  potentially_missing_required_arg = True
@@ -302,20 +323,32 @@ def _register_op_factory(
302
323
  if len(missing_args) > 0:
303
324
  raise ValueError(f"Missing arguments: {', '.join(missing_args)}")
304
325
 
326
+ analyzed_expected_return_type = analyze_type_info(expected_return)
327
+ self._result_encoder = make_engine_value_encoder(
328
+ analyzed_expected_return_type
329
+ )
330
+
305
331
  base_analyze_method = getattr(self._executor, "analyze", None)
306
332
  if base_analyze_method is not None:
307
- result_type = base_analyze_method()
333
+ analyzed_result_type = analyze_type_info(base_analyze_method())
308
334
  else:
309
- result_type = expected_return
335
+ if op_args.batching:
336
+ if not isinstance(
337
+ analyzed_expected_return_type.variant, AnalyzedListType
338
+ ):
339
+ raise ValueError(
340
+ "Expected return type for batching function to be a list type"
341
+ )
342
+ analyzed_result_type = analyze_type_info(
343
+ analyzed_expected_return_type.variant.elem_type
344
+ )
345
+ else:
346
+ analyzed_result_type = analyzed_expected_return_type
310
347
  if len(attributes) > 0:
311
- result_type = Annotated[result_type, *attributes]
312
-
313
- analyzed_result_type_info = analyze_type_info(result_type)
314
- encoded_type = encode_enriched_type_info(analyzed_result_type_info)
348
+ analyzed_result_type.attrs = attributes
315
349
  if potentially_missing_required_arg:
316
- encoded_type["nullable"] = True
317
-
318
- self._result_encoder = make_engine_value_encoder(analyzed_result_type_info)
350
+ analyzed_result_type.nullable = True
351
+ encoded_type = encode_enriched_type_info(analyzed_result_type)
319
352
 
320
353
  return encoded_type
321
354
 
@@ -359,7 +392,9 @@ def _register_op_factory(
359
392
 
360
393
  if category == OpCategory.FUNCTION:
361
394
  _engine.register_function_factory(
362
- op_kind, _EngineFunctionExecutorFactory(spec_loader, _WrappedExecutor)
395
+ op_kind,
396
+ _EngineFunctionExecutorFactory(spec_loader, _WrappedExecutor),
397
+ op_args.batching,
363
398
  )
364
399
  else:
365
400
  raise ValueError(f"Unsupported executor type {category}")
cocoindex/runtime.py CHANGED
@@ -6,8 +6,9 @@ manner.
6
6
  import threading
7
7
  import asyncio
8
8
  import inspect
9
- from typing import Any, Callable, Coroutine, TypeVar, Awaitable
9
+ import warnings
10
10
 
11
+ from typing import Any, Callable, Awaitable, TypeVar, Coroutine
11
12
 
12
13
  T = TypeVar("T")
13
14
 
@@ -24,15 +25,44 @@ class _ExecutionContext:
24
25
  """Get the event loop for the cocoindex library."""
25
26
  with self._lock:
26
27
  if self._event_loop is None:
27
- self._event_loop = asyncio.new_event_loop()
28
- threading.Thread(
29
- target=self._event_loop.run_forever, daemon=True
30
- ).start()
28
+ loop = asyncio.new_event_loop()
29
+ self._event_loop = loop
30
+
31
+ def _runner(l: asyncio.AbstractEventLoop) -> None:
32
+ asyncio.set_event_loop(l)
33
+ l.run_forever()
34
+
35
+ threading.Thread(target=_runner, args=(loop,), daemon=True).start()
31
36
  return self._event_loop
32
37
 
33
38
  def run(self, coro: Coroutine[Any, Any, T]) -> T:
34
39
  """Run a coroutine in the event loop, blocking until it finishes. Return its result."""
35
- return asyncio.run_coroutine_threadsafe(coro, self.event_loop).result()
40
+ try:
41
+ running_loop = asyncio.get_running_loop()
42
+ except RuntimeError:
43
+ running_loop = None
44
+
45
+ loop = self.event_loop
46
+
47
+ if running_loop is not None:
48
+ if running_loop is loop:
49
+ raise RuntimeError(
50
+ "CocoIndex sync API was called from inside CocoIndex's async context. "
51
+ "Use the async variant of this method instead."
52
+ )
53
+ warnings.warn(
54
+ "CocoIndex sync API was called inside an existing event loop. "
55
+ "This may block other tasks. Prefer the async method.",
56
+ RuntimeWarning,
57
+ stacklevel=2,
58
+ )
59
+
60
+ fut = asyncio.run_coroutine_threadsafe(coro, loop)
61
+ try:
62
+ return fut.result()
63
+ except KeyboardInterrupt:
64
+ fut.cancel()
65
+ raise
36
66
 
37
67
 
38
68
  execution_context = _ExecutionContext()
@@ -19,7 +19,6 @@ import threading
19
19
  import asyncio
20
20
  import os
21
21
  import time
22
- import atexit
23
22
  from .user_app_loader import load_user_app
24
23
  from .runtime import execution_context
25
24
  import logging
@@ -32,39 +31,14 @@ WATCHDOG_INTERVAL_SECONDS = 10.0
32
31
  # ---------------------------------------------
33
32
  _pool_lock = threading.Lock()
34
33
  _pool: ProcessPoolExecutor | None = None
35
- _pool_cleanup_registered = False
36
34
  _user_apps: list[str] = []
37
35
  _logger = logging.getLogger(__name__)
38
36
 
39
37
 
40
- def shutdown_pool_at_exit() -> None:
41
- """Best-effort shutdown of the global ProcessPoolExecutor on interpreter exit."""
42
- global _pool, _pool_cleanup_registered # pylint: disable=global-statement
43
- with _pool_lock:
44
- if _pool is not None:
45
- try:
46
- _pool.shutdown(wait=True, cancel_futures=True)
47
- except Exception as e:
48
- _logger.error(
49
- "Error during ProcessPoolExecutor shutdown at exit: %s",
50
- e,
51
- exc_info=True,
52
- )
53
- finally:
54
- _pool = None
55
- _pool_cleanup_registered = False
56
-
57
-
58
38
  def _get_pool() -> ProcessPoolExecutor:
59
- global _pool, _pool_cleanup_registered # pylint: disable=global-statement
39
+ global _pool # pylint: disable=global-statement
60
40
  with _pool_lock:
61
41
  if _pool is None:
62
- if not _pool_cleanup_registered:
63
- # Register the shutdown at exit at creation time (rather than at import time)
64
- # to make sure it's executed earlier in the shutdown sequence.
65
- atexit.register(shutdown_pool_at_exit)
66
- _pool_cleanup_registered = True
67
-
68
42
  # Single worker process as requested
69
43
  _pool = ProcessPoolExecutor(
70
44
  max_workers=1,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.2.23
3
+ Version: 0.3.0
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: License :: OSI Approved :: Apache Software License
6
6
  Classifier: Operating System :: OS Independent
@@ -1,9 +1,9 @@
1
- cocoindex-0.2.23.dist-info/METADATA,sha256=ngGBMCTqt5Apsew33CJ7osgLGBwAdSBr8E3etjUEFB0,14194
2
- cocoindex-0.2.23.dist-info/WHEEL,sha256=eDlp9unULyyDxD2Zd14qZwSC_Y-kO5nuRBdlMsoCXEY,105
3
- cocoindex-0.2.23.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.2.23.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=JkjceWLwTbf-svSD5D0wLkWQTF2nCHB_Ev3THD6IWOQ,750832
1
+ cocoindex-0.3.0.dist-info/METADATA,sha256=vMA1fqUWkDkxXgNT76l8pOBEqgt-Cu8XEpC4BPCbzp8,14193
2
+ cocoindex-0.3.0.dist-info/WHEEL,sha256=eDlp9unULyyDxD2Zd14qZwSC_Y-kO5nuRBdlMsoCXEY,105
3
+ cocoindex-0.3.0.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.3.0.dist-info/licenses/THIRD_PARTY_NOTICES.html,sha256=VRDb6qOsN808v5fkXRUNfGUqSvD_OJWjO6hD3uWfKFg,750831
5
5
  cocoindex/__init__.py,sha256=6qZWVkK4WZ01BIAg3CPh_bRRdA6Clk4d4Q6OnZ2jFa4,2630
6
- cocoindex/_engine.abi3.so,sha256=E3eoWnJh9lz9OdOtJ5vmcRjNqCoq8Jf_9jMCFRp1tBk,70055200
6
+ cocoindex/_engine.abi3.so,sha256=CHhZSvV-OpSq20F8vg7-kY6ibRQ-ahlCZ2FQw52c77E,70186592
7
7
  cocoindex/auth_registry.py,sha256=g-uLDWLYW5NMbYe7q4Y-sU5dSyrlJXBEciyWtAiP9KE,1340
8
8
  cocoindex/cli.py,sha256=k7bl8RTUZoNNxTlQMr-Y3-9-rTNt8z1v7rJWqsajYC8,24792
9
9
  cocoindex/engine_object.py,sha256=5YTuWoR3WILhyt3PW-d9es3MAas_xD6tZZqvipN-sjg,10050
@@ -12,19 +12,19 @@ cocoindex/flow.py,sha256=xDz3rOo4RhbboknvC-KnbWq8RBykEO0YsjGSBfXqIEg,40076
12
12
  cocoindex/functions/__init__.py,sha256=V2IF4h-Cqq4OD_GN3Oqdry-FArORyRCKmqJ7g5UlJr8,1021
13
13
  cocoindex/functions/_engine_builtin_specs.py,sha256=WpCGrjUfJBa8xZP5JiEmA8kLu7fp9Rcs7ynpuJmvSGg,1786
14
14
  cocoindex/functions/colpali.py,sha256=oACyG3qG2dquyCJ6bT7FkMkua5rXDLSxnOHcgoz9waU,8865
15
- cocoindex/functions/sbert.py,sha256=1z5OJT-blXT6tVN5vEvEzvYAzOnzs1RCnu1UbCUP6wM,2162
15
+ cocoindex/functions/sbert.py,sha256=o_DS1ZAqpNc4u1Yrm9DO5LxfMFrlH_hfb0MWobJrs_k,2223
16
16
  cocoindex/index.py,sha256=tz5ilvmOp0BtroGehCQDqWK_pIX9m6ghkhcxsDVU8WE,982
17
17
  cocoindex/lib.py,sha256=spfdU4IbzdffHyGdrQPIw_qGo9aX0OAAboqsjj8bTiQ,2290
18
18
  cocoindex/llm.py,sha256=8ZdJhOmhdb2xEcCxk6rDpnj6hlhCyFBmJdhCNMqAOP4,875
19
- cocoindex/op.py,sha256=TO-ETk3qXgnNS51NlWuLrOw_TfQ2mw83-_iswqULcQI,36095
19
+ cocoindex/op.py,sha256=FtYHn3RPBWSBAAaFbmp-FhLbs0qrMMd3lZdheM-udUo,37672
20
20
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  cocoindex/query_handler.py,sha256=X-SQT71LHiOOXn6-TJlQcGodJk-iT8p_1TcIMvRLBRI,1344
22
- cocoindex/runtime.py,sha256=4NxcltaDZvA3RR3Pnt6gH_f99jcWSyMH_1Xi5BjbtwY,1342
22
+ cocoindex/runtime.py,sha256=Qig--_3Uwvcw-MuuEdBg-P92IYB6So_nRocD2jnEi3g,2298
23
23
  cocoindex/setting.py,sha256=1Dx8ktjwf-8BiXrbsmfn5Mzudb2SQYqFdRnSNGVKaLk,4960
24
24
  cocoindex/setup.py,sha256=7uIHKN4FOCuoidPXcKyGTrkqpkl9luL49-6UcnMxYzw,3068
25
25
  cocoindex/sources/__init__.py,sha256=Yu9VHNaGlOEE3jpqfIseswsg25Le3HzwDr6XJAn22Ns,78
26
26
  cocoindex/sources/_engine_builtin_specs.py,sha256=MDsNP1y0vlUCGTAmA4jj2X8AMn1MVATbZzSi66ggvkA,3598
27
- cocoindex/subprocess_exec.py,sha256=r1xO84uek4VP4I6i87JMwsH5xFm3vKW0ABvgn0jskt4,10088
27
+ cocoindex/subprocess_exec.py,sha256=41PyvMaZ9q7B-F0YTUg1xdNVM_6_HdFErDbxDJMUFPg,9040
28
28
  cocoindex/targets/__init__.py,sha256=HQG7I4U0xQhHiYctiUvwEBLxT2727oHP3xwrqotjmhk,78
29
29
  cocoindex/targets/_engine_builtin_specs.py,sha256=glXUN5bj11Jxky1VPvmGnWnMHXTQWEh08INcbldo3F4,3375
30
30
  cocoindex/targets/lancedb.py,sha256=1nzCre5p-fvKkmLOTvfpiLTfnhF3qMLqTvsTwNuGwVU,15749
@@ -39,4 +39,4 @@ cocoindex/typing.py,sha256=qQj5uM6XAKHzRJ2BIEs7X-xeOXVcM9p_xz5SVqPVvS8,23914
39
39
  cocoindex/user_app_loader.py,sha256=bc3Af-gYRxJ9GpObtpjegZY855oQBCv5FGkrkWV2yGY,1873
40
40
  cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
41
41
  cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
42
- cocoindex-0.2.23.dist-info/RECORD,,
42
+ cocoindex-0.3.0.dist-info/RECORD,,
@@ -2846,7 +2846,7 @@ Software.
2846
2846
  <h3 id="Apache-2.0">Apache License 2.0</h3>
2847
2847
  <h4>Used by:</h4>
2848
2848
  <ul class="license-used-by">
2849
- <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.2.23</a></li>
2849
+ <li><a href=" https://crates.io/crates/cocoindex ">cocoindex 0.3.0</a></li>
2850
2850
  <li><a href=" https://github.com/awesomized/crc-fast-rust ">crc-fast 1.3.0</a></li>
2851
2851
  <li><a href=" https://github.com/qdrant/rust-client ">qdrant-client 1.15.0</a></li>
2852
2852
  </ul>