cocoindex 0.1.76__cp312-cp312-manylinux_2_28_x86_64.whl → 0.1.77__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/op.py CHANGED
@@ -114,8 +114,8 @@ class _FunctionExecutorFactory:
114
114
  ) -> tuple[dict[str, Any], Executor]:
115
115
  spec = _load_spec_from_engine(self._spec_cls, spec)
116
116
  executor = self._executor_cls(spec)
117
- result_type = executor.analyze(*args, **kwargs)
118
- return (encode_enriched_type(result_type), executor)
117
+ result_type = executor.analyze_schema(*args, **kwargs)
118
+ return (result_type, executor)
119
119
 
120
120
 
121
121
  _gpu_dispatch_lock = asyncio.Lock()
@@ -156,6 +156,12 @@ def _to_async_call(call: Callable[..., Any]) -> Callable[..., Awaitable[Any]]:
156
156
  return lambda *args, **kwargs: asyncio.to_thread(lambda: call(*args, **kwargs))
157
157
 
158
158
 
159
+ @dataclasses.dataclass
160
+ class _ArgInfo:
161
+ decoder: Callable[[Any], Any]
162
+ is_required: bool
163
+
164
+
159
165
  def _register_op_factory(
160
166
  category: OpCategory,
161
167
  expected_args: list[tuple[str, inspect.Parameter]],
@@ -176,8 +182,8 @@ def _register_op_factory(
176
182
  return op_args.behavior_version
177
183
 
178
184
  class _WrappedClass(executor_cls, _Fallback): # type: ignore[misc]
179
- _args_decoders: list[Callable[[Any], Any]]
180
- _kwargs_decoders: dict[str, Callable[[Any], Any]]
185
+ _args_info: list[_ArgInfo]
186
+ _kwargs_info: dict[str, _ArgInfo]
181
187
  _acall: Callable[..., Awaitable[Any]]
182
188
 
183
189
  def __init__(self, spec: Any) -> None:
@@ -185,28 +191,45 @@ def _register_op_factory(
185
191
  self.spec = spec
186
192
  self._acall = _to_async_call(super().__call__)
187
193
 
188
- def analyze(
194
+ def analyze_schema(
189
195
  self, *args: _engine.OpArgSchema, **kwargs: _engine.OpArgSchema
190
196
  ) -> Any:
191
197
  """
192
198
  Analyze the spec and arguments. In this phase, argument types should be validated.
193
199
  It should return the expected result type for the current op.
194
200
  """
195
- self._args_decoders = []
196
- self._kwargs_decoders = {}
201
+ self._args_info = []
202
+ self._kwargs_info = {}
197
203
  attributes = []
198
-
199
- def process_attribute(arg_name: str, arg: _engine.OpArgSchema) -> None:
204
+ potentially_missing_required_arg = False
205
+
206
+ def process_arg(
207
+ arg_name: str,
208
+ arg_param: inspect.Parameter,
209
+ actual_arg: _engine.OpArgSchema,
210
+ ) -> _ArgInfo:
211
+ nonlocal potentially_missing_required_arg
200
212
  if op_args.arg_relationship is not None:
201
213
  related_attr, related_arg_name = op_args.arg_relationship
202
214
  if related_arg_name == arg_name:
203
215
  attributes.append(
204
- TypeAttr(related_attr.value, arg.analyzed_value)
216
+ TypeAttr(related_attr.value, actual_arg.analyzed_value)
205
217
  )
218
+ type_info = analyze_type_info(arg_param.annotation)
219
+ decoder = make_engine_value_decoder(
220
+ [arg_name], actual_arg.value_type["type"], type_info
221
+ )
222
+ is_required = not type_info.nullable
223
+ if is_required and actual_arg.value_type.get("nullable", False):
224
+ potentially_missing_required_arg = True
225
+ return _ArgInfo(
226
+ decoder=decoder,
227
+ is_required=is_required,
228
+ )
206
229
 
207
230
  # Match arguments with parameters.
208
231
  next_param_idx = 0
209
- for arg in args:
232
+ for actual_arg in args:
210
233
  if next_param_idx >= len(expected_args):
211
234
  raise ValueError(
212
235
  f"Too many arguments passed in: {len(args)} > {len(expected_args)}"
@@ -219,20 +242,13 @@ def _register_op_factory(
219
242
  raise ValueError(
220
243
  f"Too many positional arguments passed in: {len(args)} > {next_param_idx}"
221
244
  )
222
- self._args_decoders.append(
223
- make_engine_value_decoder(
224
- [arg_name],
225
- arg.value_type["type"],
226
- analyze_type_info(arg_param.annotation),
227
- )
228
- )
229
- process_attribute(arg_name, arg)
245
+ self._args_info.append(process_arg(arg_name, arg_param, actual_arg))
230
246
  if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
231
247
  next_param_idx += 1
232
248
 
233
249
  expected_kwargs = expected_args[next_param_idx:]
234
250
 
235
- for kwarg_name, kwarg in kwargs.items():
251
+ for kwarg_name, actual_arg in kwargs.items():
236
252
  expected_arg = next(
237
253
  (
238
254
  arg
@@ -254,12 +270,9 @@ def _register_op_factory(
254
270
  f"Unexpected keyword argument passed in: {kwarg_name}"
255
271
  )
256
272
  arg_param = expected_arg[1]
257
- self._kwargs_decoders[kwarg_name] = make_engine_value_decoder(
258
- [kwarg_name],
259
- kwarg.value_type["type"],
260
- analyze_type_info(arg_param.annotation),
273
+ self._kwargs_info[kwarg_name] = process_arg(
274
+ kwarg_name, arg_param, actual_arg
261
275
  )
262
- process_attribute(kwarg_name, kwarg)
263
276
 
264
277
  missing_args = [
265
278
  name
@@ -280,32 +293,45 @@ def _register_op_factory(
280
293
  if len(missing_args) > 0:
281
294
  raise ValueError(f"Missing arguments: {', '.join(missing_args)}")
282
295
 
283
- prepare_method = getattr(executor_cls, "analyze", None)
284
- if prepare_method is not None:
285
- result = prepare_method(self, *args, **kwargs)
296
+ base_analyze_method = getattr(self, "analyze", None)
297
+ if base_analyze_method is not None:
298
+ result = base_analyze_method(*args, **kwargs)
286
299
  else:
287
300
  result = expected_return
288
301
  if len(attributes) > 0:
289
302
  result = Annotated[result, *attributes]
290
- return result
303
+
304
+ encoded_type = encode_enriched_type(result)
305
+ if potentially_missing_required_arg:
306
+ encoded_type["nullable"] = True
307
+ return encoded_type
291
308
 
292
309
  async def prepare(self) -> None:
293
310
  """
294
311
  Prepare for execution.
295
312
  It's executed after `analyze` and before any `__call__` execution.
296
313
  """
297
- setup_method = getattr(super(), "prepare", None)
298
- if setup_method is not None:
299
- await _to_async_call(setup_method)()
314
+ prepare_method = getattr(super(), "prepare", None)
315
+ if prepare_method is not None:
316
+ await _to_async_call(prepare_method)()
300
317
 
301
318
  async def __call__(self, *args: Any, **kwargs: Any) -> Any:
302
- decoded_args = (
303
- decoder(arg) for decoder, arg in zip(self._args_decoders, args)
304
- )
305
- decoded_kwargs = {
306
- arg_name: self._kwargs_decoders[arg_name](arg)
307
- for arg_name, arg in kwargs.items()
308
- }
319
+ decoded_args = []
320
+ for arg_info, arg in zip(self._args_info, args):
321
+ if arg_info.is_required and arg is None:
322
+ return None
323
+ decoded_args.append(arg_info.decoder(arg))
324
+
325
+ decoded_kwargs = {}
326
+ for kwarg_name, arg in kwargs.items():
327
+ kwarg_info = self._kwargs_info.get(kwarg_name)
328
+ if kwarg_info is None:
329
+ raise ValueError(
330
+ f"Unexpected keyword argument passed in: {kwarg_name}"
331
+ )
332
+ if kwarg_info.is_required and arg is None:
333
+ return None
334
+ decoded_kwargs[kwarg_name] = kwarg_info.decoder(arg)
309
335
 
310
336
  if op_args.gpu:
311
337
  # For GPU executions, data-level parallelism is applied, so we don't want to
@@ -101,3 +101,44 @@ async def test_for_each_transform_flow_async() -> None:
101
101
  }
102
102
 
103
103
  assert result == expected, f"Expected {expected}, got {result}"
104
+
105
+
106
+ def test_none_arg_yield_none_result() -> None:
107
+ """Test that None arguments yield None results."""
108
+
109
+ @cocoindex.op.function()
110
+ def custom_fn(
111
+ required_arg: int,
112
+ optional_arg: int | None,
113
+ required_kwarg: int,
114
+ optional_kwarg: int | None,
115
+ ) -> int:
116
+ return (
117
+ required_arg + (optional_arg or 0) + required_kwarg + (optional_kwarg or 0)
118
+ )
119
+
120
+ @cocoindex.transform_flow()
121
+ def transform_flow(
122
+ required_arg: cocoindex.DataSlice[int | None],
123
+ optional_arg: cocoindex.DataSlice[int | None],
124
+ required_kwarg: cocoindex.DataSlice[int | None],
125
+ optional_kwarg: cocoindex.DataSlice[int | None],
126
+ ) -> cocoindex.DataSlice[int | None]:
127
+ return required_arg.transform(
128
+ custom_fn,
129
+ optional_arg,
130
+ required_kwarg=required_kwarg,
131
+ optional_kwarg=optional_kwarg,
132
+ )
133
+
134
+ result = transform_flow.eval(1, 2, 4, 8)
135
+ assert result == 15, f"Expected 15, got {result}"
136
+
137
+ result = transform_flow.eval(1, None, 4, None)
138
+ assert result == 5, f"Expected 5, got {result}"
139
+
140
+ result = transform_flow.eval(None, 2, 4, 8)
141
+ assert result is None, f"Expected None, got {result}"
142
+
143
+ result = transform_flow.eval(1, 2, None, None)
144
+ assert result is None, f"Expected None, got {result}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.76
3
+ Version: 0.1.77
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -214,6 +214,7 @@ It defines an index flow like this:
214
214
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
215
215
  | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
216
216
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
217
+ | [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
217
218
  | [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
218
219
 
219
220
  More coming and stay tuned 👀!
@@ -1,9 +1,9 @@
1
- cocoindex-0.1.76.dist-info/METADATA,sha256=w_VMnPWkx5iMgpwgAWTB3KRxSfcGaU1sVES6-jXiAjQ,11655
2
- cocoindex-0.1.76.dist-info/WHEEL,sha256=ou1DNCp7U3BqmBQmL1kn_MScQo0FSr_-jd6ceYUSmEk,108
3
- cocoindex-0.1.76.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.1.76.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ cocoindex-0.1.77.dist-info/METADATA,sha256=009CbNRu3D4ifri3AGpphTt29ENF0F1HNeu9O1HkVXI,11799
2
+ cocoindex-0.1.77.dist-info/WHEEL,sha256=ou1DNCp7U3BqmBQmL1kn_MScQo0FSr_-jd6ceYUSmEk,108
3
+ cocoindex-0.1.77.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.1.77.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
5
5
  cocoindex/__init__.py,sha256=sLpSVO5Cotgn_82lawxvXnaqfa-qj33rytWBAe2MTtU,2201
6
- cocoindex/_engine.cpython-312-x86_64-linux-gnu.so,sha256=eR8J_mgVd8MJVI8-vZHQd_PMKqtRQD-W4uBQoAI40G8,71118248
6
+ cocoindex/_engine.cpython-312-x86_64-linux-gnu.so,sha256=zYVBlgdzRKBRU2EzDuZfE661KxNknrvr-Nv9KIZecM8,71118256
7
7
  cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
8
8
  cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
9
9
  cocoindex/convert.py,sha256=HodeDl1HVX8nnBH02lQKarw5i3xmkjB0nGj-DXt7Ifc,18284
@@ -12,7 +12,7 @@ cocoindex/functions.py,sha256=34sZWoS0zGnaKyooIODQgc6QEPZKiJoWhfb8jKIWwps,9528
12
12
  cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
13
13
  cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
14
14
  cocoindex/llm.py,sha256=WxmWUbNcf9HOCM5xkbDeFs9lF67M3mr810B7deDDc-8,673
15
- cocoindex/op.py,sha256=oiG1rjxz6ad1jGS7DMya4NStrA_6LV3RbcVSR75XUl0,21516
15
+ cocoindex/op.py,sha256=EZ3QrDGJciYAxsqBeZOc3SstOL_34a6cke3WXWwz4-M,22538
16
16
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cocoindex/runtime.py,sha256=povilB3HH3y1JF-yxKwU-pD8n2WnAqyQxIgvXXHNc60,1080
18
18
  cocoindex/setting.py,sha256=TwhQ6pEeZmvc8ZXlnT9d8Wn8Vz_u7Z5LJUkGsKmKSno,4859
@@ -22,10 +22,10 @@ cocoindex/targets.py,sha256=Nfh_tpFd1goTnS_cxBjIs4j9zl3Z4Z1JomAQ1dl3Sic,2796
22
22
  cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  cocoindex/tests/test_convert.py,sha256=l7LqD7duV9-xkYTaKOsEPdqw7v14dUzE40f4VVLlBCQ,49423
24
24
  cocoindex/tests/test_optional_database.py,sha256=snAmkNa6wtOSaxoZE1HgjvL5v_ylitt3Jt_9df4Cgdc,8506
25
- cocoindex/tests/test_transform_flow.py,sha256=VvT5b895MH5kwT-h4OpdDTl545SU4nxeIm7E_QANmAk,2894
25
+ cocoindex/tests/test_transform_flow.py,sha256=M63Noo29Kl5CawTosCeZHjkn4ObjTP2ycHCGO5p8WHM,4230
26
26
  cocoindex/tests/test_typing.py,sha256=9OF3lO2uSpZBefkEJx7WRbnkXjwQtvlQIeeARYQID68,12391
27
27
  cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nHNhZ2Q,4543
28
28
  cocoindex/typing.py,sha256=qQ0ANF3iuQDeSqipHgL2SDiiXL2reTMUN0aj4ve_T0w,13359
29
29
  cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
30
30
  cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
31
- cocoindex-0.1.76.dist-info/RECORD,,
31
+ cocoindex-0.1.77.dist-info/RECORD,,