cocoindex 0.2.0__cp313-cp313-win_amd64.whl → 0.2.2__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
cocoindex/flow.py CHANGED
@@ -120,9 +120,10 @@ def _transform_helper(
120
120
  else:
121
121
  raise ValueError("transform() can only be called on a CocoIndex function")
122
122
 
123
- return _create_data_slice(
124
- flow_builder_state,
125
- lambda target_scope, name: flow_builder_state.engine_flow_builder.transform(
123
+ def _create_data_slice_inner(
124
+ target_scope: _engine.DataScopeRef | None, name: str | None
125
+ ) -> _engine.DataSlice:
126
+ result = flow_builder_state.engine_flow_builder.transform(
126
127
  kind,
127
128
  dump_engine_object(spec),
128
129
  transform_args,
@@ -130,7 +131,12 @@ def _transform_helper(
130
131
  flow_builder_state.field_name_builder.build_name(
131
132
  name, prefix=_to_snake_case(_spec_kind(fn_spec)) + "_"
132
133
  ),
133
- ),
134
+ )
135
+ return result
136
+
137
+ return _create_data_slice(
138
+ flow_builder_state,
139
+ _create_data_slice_inner,
134
140
  name,
135
141
  )
136
142
 
@@ -166,6 +172,7 @@ class _DataSliceState:
166
172
  def engine_data_slice(self) -> _engine.DataSlice:
167
173
  """
168
174
  Get the internal DataSlice.
175
+ This can be blocking.
169
176
  """
170
177
  if self._lazy_lock is None:
171
178
  if self._data_slice is None:
@@ -179,6 +186,13 @@ class _DataSliceState:
179
186
  self._data_slice = self._data_slice_creator(None)
180
187
  return self._data_slice
181
188
 
189
+ async def engine_data_slice_async(self) -> _engine.DataSlice:
190
+ """
191
+ Get the internal DataSlice.
192
+ This can be blocking.
193
+ """
194
+ return await asyncio.to_thread(lambda: self.engine_data_slice)
195
+
182
196
  def attach_to_scope(self, scope: _engine.DataScopeRef, field_name: str) -> None:
183
197
  """
184
198
  Attach the current data slice (if not yet attached) to the given scope.
@@ -795,9 +809,8 @@ class Flow:
795
809
  """
796
810
  Setup persistent backends of the flow. The async version.
797
811
  """
798
- await make_setup_bundle([self]).describe_and_apply_async(
799
- report_to_stdout=report_to_stdout
800
- )
812
+ bundle = await make_setup_bundle_async([self])
813
+ await bundle.describe_and_apply_async(report_to_stdout=report_to_stdout)
801
814
 
802
815
  def drop(self, report_to_stdout: bool = False) -> None:
803
816
  """
@@ -814,9 +827,8 @@ class Flow:
814
827
  """
815
828
  Drop persistent backends of the flow. The async version.
816
829
  """
817
- await make_drop_bundle([self]).describe_and_apply_async(
818
- report_to_stdout=report_to_stdout
819
- )
830
+ bundle = await make_drop_bundle_async([self])
831
+ await bundle.describe_and_apply_async(report_to_stdout=report_to_stdout)
820
832
 
821
833
  def close(self) -> None:
822
834
  """
@@ -1071,19 +1083,16 @@ class TransformFlow(Generic[T]):
1071
1083
  _DataSliceState(flow_builder_state, engine_ds)
1072
1084
  )
1073
1085
 
1074
- output = self._flow_fn(**kwargs)
1075
- flow_builder_state.engine_flow_builder.set_direct_output(
1076
- _data_slice_state(output).engine_data_slice
1077
- )
1086
+ output = await asyncio.to_thread(lambda: self._flow_fn(**kwargs))
1087
+ output_data_slice = await _data_slice_state(output).engine_data_slice_async()
1088
+
1089
+ flow_builder_state.engine_flow_builder.set_direct_output(output_data_slice)
1078
1090
  engine_flow = (
1079
1091
  await flow_builder_state.engine_flow_builder.build_transient_flow_async(
1080
1092
  execution_context.event_loop
1081
1093
  )
1082
1094
  )
1083
-
1084
- engine_return_type = (
1085
- _data_slice_state(output).engine_data_slice.data_type().schema()
1086
- )
1095
+ engine_return_type = output_data_slice.data_type().schema()
1087
1096
  python_return_type: type[T] | None = _get_data_slice_annotation_type(
1088
1097
  inspect.signature(self._flow_fn).return_annotation
1089
1098
  )
@@ -1142,28 +1151,42 @@ def transform_flow() -> Callable[[Callable[..., DataSlice[T]]], TransformFlow[T]
1142
1151
  return _transform_flow_wrapper
1143
1152
 
1144
1153
 
1145
- def make_setup_bundle(flow_iter: Iterable[Flow]) -> SetupChangeBundle:
1154
+ async def make_setup_bundle_async(flow_iter: Iterable[Flow]) -> SetupChangeBundle:
1146
1155
  """
1147
1156
  Make a bundle to setup flows with the given names.
1148
1157
  """
1149
1158
  full_names = []
1150
1159
  for fl in flow_iter:
1151
- fl.internal_flow()
1160
+ await fl.internal_flow_async()
1152
1161
  full_names.append(fl.full_name)
1153
1162
  return SetupChangeBundle(_engine.make_setup_bundle(full_names))
1154
1163
 
1155
1164
 
1156
- def make_drop_bundle(flow_iter: Iterable[Flow]) -> SetupChangeBundle:
1165
+ def make_setup_bundle(flow_iter: Iterable[Flow]) -> SetupChangeBundle:
1166
+ """
1167
+ Make a bundle to setup flows with the given names.
1168
+ """
1169
+ return execution_context.run(make_setup_bundle_async(flow_iter))
1170
+
1171
+
1172
+ async def make_drop_bundle_async(flow_iter: Iterable[Flow]) -> SetupChangeBundle:
1157
1173
  """
1158
1174
  Make a bundle to drop flows with the given names.
1159
1175
  """
1160
1176
  full_names = []
1161
1177
  for fl in flow_iter:
1162
- fl.internal_flow()
1178
+ await fl.internal_flow_async()
1163
1179
  full_names.append(fl.full_name)
1164
1180
  return SetupChangeBundle(_engine.make_drop_bundle(full_names))
1165
1181
 
1166
1182
 
1183
+ def make_drop_bundle(flow_iter: Iterable[Flow]) -> SetupChangeBundle:
1184
+ """
1185
+ Make a bundle to drop flows with the given names.
1186
+ """
1187
+ return execution_context.run(make_drop_bundle_async(flow_iter))
1188
+
1189
+
1167
1190
  def setup_all_flows(report_to_stdout: bool = False) -> None:
1168
1191
  """
1169
1192
  Setup all flows registered in the current process.
cocoindex/setting.py CHANGED
@@ -44,8 +44,8 @@ class DatabaseConnectionSpec:
44
44
  url: str
45
45
  user: str | None = None
46
46
  password: str | None = None
47
- max_connections: int = 64
48
- min_connections: int = 16
47
+ max_connections: int = 25
48
+ min_connections: int = 5
49
49
 
50
50
 
51
51
  @dataclass
@@ -22,6 +22,7 @@ import time
22
22
  from .user_app_loader import load_user_app
23
23
  from .runtime import execution_context
24
24
  import logging
25
+ import multiprocessing as mp
25
26
 
26
27
  WATCHDOG_INTERVAL_SECONDS = 10.0
27
28
 
@@ -43,6 +44,7 @@ def _get_pool() -> ProcessPoolExecutor:
43
44
  max_workers=1,
44
45
  initializer=_subprocess_init,
45
46
  initargs=(_user_apps, os.getpid()),
47
+ mp_context=mp.get_context("spawn"),
46
48
  )
47
49
  return _pool
48
50
 
@@ -69,6 +71,7 @@ def _restart_pool(old_pool: ProcessPoolExecutor | None = None) -> None:
69
71
  max_workers=1,
70
72
  initializer=_subprocess_init,
71
73
  initargs=(_user_apps, os.getpid()),
74
+ mp_context=mp.get_context("spawn"),
72
75
  )
73
76
  if prev_pool is not None:
74
77
  # Best-effort shutdown of previous pool; letting exceptions bubble up
@@ -124,8 +127,19 @@ def _start_parent_watchdog(
124
127
 
125
128
  def _subprocess_init(user_apps: list[str], parent_pid: int) -> None:
126
129
  _start_parent_watchdog(parent_pid)
130
+
131
+ # In case any user app is already in this subprocess, e.g. the subprocess is forked, we need to avoid loading it again.
132
+ with _pool_lock:
133
+ already_loaded_apps = set(_user_apps)
134
+
135
+ loaded_apps = []
127
136
  for app_target in user_apps:
128
- load_user_app(app_target)
137
+ if app_target not in already_loaded_apps:
138
+ load_user_app(app_target)
139
+ loaded_apps.append(app_target)
140
+
141
+ with _pool_lock:
142
+ _user_apps.extend(loaded_apps)
129
143
 
130
144
 
131
145
  class _OnceResult:
@@ -166,6 +166,25 @@ class GpuAppendSuffixExecutor:
166
166
  return f"{text}{self.spec.suffix}"
167
167
 
168
168
 
169
+ class GpuAppendSuffixWithAnalyzePrepare(cocoindex.op.FunctionSpec):
170
+ suffix: str
171
+
172
+
173
+ @cocoindex.op.executor_class(gpu=True)
174
+ class GpuAppendSuffixWithAnalyzePrepareExecutor:
175
+ spec: GpuAppendSuffixWithAnalyzePrepare
176
+ suffix: str
177
+
178
+ def analyze(self) -> Any:
179
+ return str
180
+
181
+ def prepare(self) -> None:
182
+ self.suffix = self.spec.suffix
183
+
184
+ def __call__(self, text: str) -> str:
185
+ return f"{text}{self.suffix}"
186
+
187
+
169
188
  def test_gpu_function() -> None:
170
189
  @cocoindex.transform_flow()
171
190
  def transform_flow(text: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[str]:
@@ -174,3 +193,15 @@ def test_gpu_function() -> None:
174
193
  result = transform_flow.eval("Hello")
175
194
  expected = "Hello world!"
176
195
  assert result == expected, f"Expected {expected}, got {result}"
196
+
197
+ @cocoindex.transform_flow()
198
+ def transform_flow_with_analyze_prepare(
199
+ text: cocoindex.DataSlice[str],
200
+ ) -> cocoindex.DataSlice[str]:
201
+ return text.transform(gpu_append_world).transform(
202
+ GpuAppendSuffixWithAnalyzePrepare(suffix="!!")
203
+ )
204
+
205
+ result = transform_flow_with_analyze_prepare.eval("Hello")
206
+ expected = "Hello world!!"
207
+ assert result == expected, f"Expected {expected}, got {result}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -84,7 +84,7 @@ CocoIndex makes it effortless to transform data with AI, and keep source data an
84
84
  </br>
85
85
 
86
86
  <p align="center">
87
- <img width="4187" height="1883" alt="CocoIndex Features" src="https://github.com/user-attachments/assets/6147673f-0daf-4313-a0c7-7c4205ba2e31" />
87
+ <img alt="CocoIndex Features" src="https://cocoindex.io/images/venn2.svg" />
88
88
  </p>
89
89
 
90
90
  </br>
@@ -1,13 +1,13 @@
1
- cocoindex-0.2.0.dist-info/METADATA,sha256=u3xlaek3vJchi1ki3C0wm03J9SNiifOMK6WFmquDvwI,12403
2
- cocoindex-0.2.0.dist-info/WHEEL,sha256=K7foeVF-x_RZTycPKa1uE1HH2bAWe3AiJbihrXn5Hhc,96
3
- cocoindex-0.2.0.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.2.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ cocoindex-0.2.2.dist-info/METADATA,sha256=fTrnBYaHaBQXAZOTR6165dE9McaS-ePB_H6SsF41dUs,12334
2
+ cocoindex-0.2.2.dist-info/WHEEL,sha256=K7foeVF-x_RZTycPKa1uE1HH2bAWe3AiJbihrXn5Hhc,96
3
+ cocoindex-0.2.2.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.2.2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
5
5
  cocoindex/__init__.py,sha256=5zwuS_X-n7QAE5uBSufqXp77OW8KVVD8E5t_6koDLRc,2293
6
- cocoindex/_engine.cp313-win_amd64.pyd,sha256=HQPzr9nNVz0dlpAaXdHRKDpMcdHlNj4J_gwfonnvPFw,71612928
6
+ cocoindex/_engine.cp313-win_amd64.pyd,sha256=5XNMzSZdXaFSYrKeVORWQAM_u93uVuSImkW7SEeQl00,71629824
7
7
  cocoindex/auth_registry.py,sha256=Qq1IVZb-7K4luRrQSDlOPbISnGEZ4kIDsrCU8H2ARw0,1529
8
8
  cocoindex/cli.py,sha256=VgeVgO5z2l6tv7XWS3q-sRZ8cYe9r7izUYUR3NfdgJA,21372
9
9
  cocoindex/convert.py,sha256=k1NT3VoERVS2HV1rLDFyCk6rG1YgHXDnIvrI-lqFdUE,22681
10
- cocoindex/flow.py,sha256=6AadyuaQsujkjV1zcXM-DiYr29uVdGXII4tIuNGDo_k,37440
10
+ cocoindex/flow.py,sha256=YGSASrMNtQzX9S42XfOpspEBFayj16OiD3q5bTz1y98,38409
11
11
  cocoindex/functions.py,sha256=CtiwTVW6g4BtO5_EvVcij7Si4Bx-axnM1hsdU43aM4g,12617
12
12
  cocoindex/index.py,sha256=GrqTm1rLwICQ8hadtNvJAxVg7GWMvtMmFcbiNtNzmP0,569
13
13
  cocoindex/lib.py,sha256=cZosix4nwROvod4QJOwCzrm6U1CVy_wKMMk7sDDG_Z0,849
@@ -15,19 +15,19 @@ cocoindex/llm.py,sha256=JM5EPup7FZojynCI0rg4gnMBYmdPZpaHJbVBz3f7BAI,897
15
15
  cocoindex/op.py,sha256=1uOwE1zzMs7FrMSpQMJPUI8YtnXXk3-tlEJ_ahPFq5A,22884
16
16
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cocoindex/runtime.py,sha256=6mE-jR1Kh5c4GETDvBgwjXZq69TK5rh1qNpaseRDZnw,1117
18
- cocoindex/setting.py,sha256=048VfKRly9TzLElJlL01tUxySGtRbsc9m68yIHvMzMU,5464
18
+ cocoindex/setting.py,sha256=obq1EiFlSJvidQZPVVC9V5ZUZAzYnyYcSm_6XUVDkUE,5463
19
19
  cocoindex/setup.py,sha256=KbJvmeFu0NbeoH-5iDmHZP86f26HIId8kHmGUNZAePI,3160
20
20
  cocoindex/sources.py,sha256=mVZhyVTyHzFy7z3lceYqgguygADQkIL42nYK6zKSKKQ,2868
21
- cocoindex/subprocess_exec.py,sha256=KDV7xdFUhKYqHZdiJqirNTl7DbJQUXLvga0Q1urE6XA,8143
21
+ cocoindex/subprocess_exec.py,sha256=ase_F4ivmyShCIJcygRl9Uz8BKthpOLq-TUfyqrUB9A,8658
22
22
  cocoindex/targets.py,sha256=7FfG9kuEf5KTXtLwXMFaPFIut3PsIbpb3XIEjjeF7Bg,2931
23
23
  cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  cocoindex/tests/test_convert.py,sha256=kngLt9p2BXo3D7_OZNnnSS1w_Ew8966IuaDpHcfd9Go,51750
25
25
  cocoindex/tests/test_optional_database.py,sha256=dnzmTgaJf37D3q8fQsjP5UDER6FYETaUokDnFBMLtIk,8755
26
- cocoindex/tests/test_transform_flow.py,sha256=DDtxENZHh_2mt0vDU4T1Oaq-jVLJDwaKpkMg83_9NfE,5324
26
+ cocoindex/tests/test_transform_flow.py,sha256=DxM-7_kWeU-QzOpH77Vd5Jehbbq00xCBBgRK7mRn0kI,6237
27
27
  cocoindex/tests/test_typing.py,sha256=WqR1M11dSVYXZj1DnXnOvwH6VNQpiv5lCEO-FqA_by0,12820
28
28
  cocoindex/tests/test_validation.py,sha256=I4wr8lAMAjmy5xgG5N_OJKveXt8XIa96MsQTXhw5AnA,4677
29
29
  cocoindex/typing.py,sha256=bkKcp4G_xQODwQn92sfeNi-kXSWnbtlwTpFbrMGCnj4,14673
30
30
  cocoindex/user_app_loader.py,sha256=jKNyCq5Osl4dMevlDNloGuwCfDscxw5o0m9_OqrHDN8,1965
31
31
  cocoindex/utils.py,sha256=U3W39zD2uZpXX8v84tJD7sRmbC5ar3z_ljAP1cJrYXI,618
32
32
  cocoindex/validation.py,sha256=4ZjsW-SZT8X_TEEhEE6QG6D-8Oq_TkPAhTqP0mdFYSE,3194
33
- cocoindex-0.2.0.dist-info/RECORD,,
33
+ cocoindex-0.2.2.dist-info/RECORD,,