cocoindex 0.1.76__cp312-cp312-manylinux_2_28_aarch64.whl → 0.1.78__cp312-cp312-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cpython-312-aarch64-linux-gnu.so +0 -0
- cocoindex/op.py +65 -39
- cocoindex/setting.py +16 -2
- cocoindex/tests/test_transform_flow.py +41 -0
- {cocoindex-0.1.76.dist-info → cocoindex-0.1.78.dist-info}/METADATA +2 -1
- {cocoindex-0.1.76.dist-info → cocoindex-0.1.78.dist-info}/RECORD +9 -9
- {cocoindex-0.1.76.dist-info → cocoindex-0.1.78.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.76.dist-info → cocoindex-0.1.78.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.76.dist-info → cocoindex-0.1.78.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/op.py
CHANGED
@@ -114,8 +114,8 @@ class _FunctionExecutorFactory:
|
|
114
114
|
) -> tuple[dict[str, Any], Executor]:
|
115
115
|
spec = _load_spec_from_engine(self._spec_cls, spec)
|
116
116
|
executor = self._executor_cls(spec)
|
117
|
-
result_type = executor.
|
118
|
-
return (
|
117
|
+
result_type = executor.analyze_schema(*args, **kwargs)
|
118
|
+
return (result_type, executor)
|
119
119
|
|
120
120
|
|
121
121
|
_gpu_dispatch_lock = asyncio.Lock()
|
@@ -156,6 +156,12 @@ def _to_async_call(call: Callable[..., Any]) -> Callable[..., Awaitable[Any]]:
|
|
156
156
|
return lambda *args, **kwargs: asyncio.to_thread(lambda: call(*args, **kwargs))
|
157
157
|
|
158
158
|
|
159
|
+
@dataclasses.dataclass
|
160
|
+
class _ArgInfo:
|
161
|
+
decoder: Callable[[Any], Any]
|
162
|
+
is_required: bool
|
163
|
+
|
164
|
+
|
159
165
|
def _register_op_factory(
|
160
166
|
category: OpCategory,
|
161
167
|
expected_args: list[tuple[str, inspect.Parameter]],
|
@@ -176,8 +182,8 @@ def _register_op_factory(
|
|
176
182
|
return op_args.behavior_version
|
177
183
|
|
178
184
|
class _WrappedClass(executor_cls, _Fallback): # type: ignore[misc]
|
179
|
-
|
180
|
-
|
185
|
+
_args_info: list[_ArgInfo]
|
186
|
+
_kwargs_info: dict[str, _ArgInfo]
|
181
187
|
_acall: Callable[..., Awaitable[Any]]
|
182
188
|
|
183
189
|
def __init__(self, spec: Any) -> None:
|
@@ -185,28 +191,45 @@ def _register_op_factory(
|
|
185
191
|
self.spec = spec
|
186
192
|
self._acall = _to_async_call(super().__call__)
|
187
193
|
|
188
|
-
def
|
194
|
+
def analyze_schema(
|
189
195
|
self, *args: _engine.OpArgSchema, **kwargs: _engine.OpArgSchema
|
190
196
|
) -> Any:
|
191
197
|
"""
|
192
198
|
Analyze the spec and arguments. In this phase, argument types should be validated.
|
193
199
|
It should return the expected result type for the current op.
|
194
200
|
"""
|
195
|
-
self.
|
196
|
-
self.
|
201
|
+
self._args_info = []
|
202
|
+
self._kwargs_info = {}
|
197
203
|
attributes = []
|
198
|
-
|
199
|
-
|
204
|
+
potentially_missing_required_arg = False
|
205
|
+
|
206
|
+
def process_arg(
|
207
|
+
arg_name: str,
|
208
|
+
arg_param: inspect.Parameter,
|
209
|
+
actual_arg: _engine.OpArgSchema,
|
210
|
+
) -> _ArgInfo:
|
211
|
+
nonlocal potentially_missing_required_arg
|
200
212
|
if op_args.arg_relationship is not None:
|
201
213
|
related_attr, related_arg_name = op_args.arg_relationship
|
202
214
|
if related_arg_name == arg_name:
|
203
215
|
attributes.append(
|
204
|
-
TypeAttr(related_attr.value,
|
216
|
+
TypeAttr(related_attr.value, actual_arg.analyzed_value)
|
205
217
|
)
|
218
|
+
type_info = analyze_type_info(arg_param.annotation)
|
219
|
+
decoder = make_engine_value_decoder(
|
220
|
+
[arg_name], actual_arg.value_type["type"], type_info
|
221
|
+
)
|
222
|
+
is_required = not type_info.nullable
|
223
|
+
if is_required and actual_arg.value_type.get("nullable", False):
|
224
|
+
potentially_missing_required_arg = True
|
225
|
+
return _ArgInfo(
|
226
|
+
decoder=decoder,
|
227
|
+
is_required=is_required,
|
228
|
+
)
|
206
229
|
|
207
230
|
# Match arguments with parameters.
|
208
231
|
next_param_idx = 0
|
209
|
-
for
|
232
|
+
for actual_arg in args:
|
210
233
|
if next_param_idx >= len(expected_args):
|
211
234
|
raise ValueError(
|
212
235
|
f"Too many arguments passed in: {len(args)} > {len(expected_args)}"
|
@@ -219,20 +242,13 @@ def _register_op_factory(
|
|
219
242
|
raise ValueError(
|
220
243
|
f"Too many positional arguments passed in: {len(args)} > {next_param_idx}"
|
221
244
|
)
|
222
|
-
self.
|
223
|
-
make_engine_value_decoder(
|
224
|
-
[arg_name],
|
225
|
-
arg.value_type["type"],
|
226
|
-
analyze_type_info(arg_param.annotation),
|
227
|
-
)
|
228
|
-
)
|
229
|
-
process_attribute(arg_name, arg)
|
245
|
+
self._args_info.append(process_arg(arg_name, arg_param, actual_arg))
|
230
246
|
if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
|
231
247
|
next_param_idx += 1
|
232
248
|
|
233
249
|
expected_kwargs = expected_args[next_param_idx:]
|
234
250
|
|
235
|
-
for kwarg_name,
|
251
|
+
for kwarg_name, actual_arg in kwargs.items():
|
236
252
|
expected_arg = next(
|
237
253
|
(
|
238
254
|
arg
|
@@ -254,12 +270,9 @@ def _register_op_factory(
|
|
254
270
|
f"Unexpected keyword argument passed in: {kwarg_name}"
|
255
271
|
)
|
256
272
|
arg_param = expected_arg[1]
|
257
|
-
self.
|
258
|
-
|
259
|
-
kwarg.value_type["type"],
|
260
|
-
analyze_type_info(arg_param.annotation),
|
273
|
+
self._kwargs_info[kwarg_name] = process_arg(
|
274
|
+
kwarg_name, arg_param, actual_arg
|
261
275
|
)
|
262
|
-
process_attribute(kwarg_name, kwarg)
|
263
276
|
|
264
277
|
missing_args = [
|
265
278
|
name
|
@@ -280,32 +293,45 @@ def _register_op_factory(
|
|
280
293
|
if len(missing_args) > 0:
|
281
294
|
raise ValueError(f"Missing arguments: {', '.join(missing_args)}")
|
282
295
|
|
283
|
-
|
284
|
-
if
|
285
|
-
result =
|
296
|
+
base_analyze_method = getattr(self, "analyze", None)
|
297
|
+
if base_analyze_method is not None:
|
298
|
+
result = base_analyze_method(*args, **kwargs)
|
286
299
|
else:
|
287
300
|
result = expected_return
|
288
301
|
if len(attributes) > 0:
|
289
302
|
result = Annotated[result, *attributes]
|
290
|
-
|
303
|
+
|
304
|
+
encoded_type = encode_enriched_type(result)
|
305
|
+
if potentially_missing_required_arg:
|
306
|
+
encoded_type["nullable"] = True
|
307
|
+
return encoded_type
|
291
308
|
|
292
309
|
async def prepare(self) -> None:
|
293
310
|
"""
|
294
311
|
Prepare for execution.
|
295
312
|
It's executed after `analyze` and before any `__call__` execution.
|
296
313
|
"""
|
297
|
-
|
298
|
-
if
|
299
|
-
await _to_async_call(
|
314
|
+
prepare_method = getattr(super(), "prepare", None)
|
315
|
+
if prepare_method is not None:
|
316
|
+
await _to_async_call(prepare_method)()
|
300
317
|
|
301
318
|
async def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
302
|
-
decoded_args =
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
}
|
319
|
+
decoded_args = []
|
320
|
+
for arg_info, arg in zip(self._args_info, args):
|
321
|
+
if arg_info.is_required and arg is None:
|
322
|
+
return None
|
323
|
+
decoded_args.append(arg_info.decoder(arg))
|
324
|
+
|
325
|
+
decoded_kwargs = {}
|
326
|
+
for kwarg_name, arg in kwargs.items():
|
327
|
+
kwarg_info = self._kwargs_info.get(kwarg_name)
|
328
|
+
if kwarg_info is None:
|
329
|
+
raise ValueError(
|
330
|
+
f"Unexpected keyword argument passed in: {kwarg_name}"
|
331
|
+
)
|
332
|
+
if kwarg_info.is_required and arg is None:
|
333
|
+
return None
|
334
|
+
decoded_kwargs[kwarg_name] = kwarg_info.decoder(arg)
|
309
335
|
|
310
336
|
if op_args.gpu:
|
311
337
|
# For GPU executions, data-level parallelism is applied, so we don't want to
|
cocoindex/setting.py
CHANGED
@@ -44,6 +44,8 @@ class DatabaseConnectionSpec:
|
|
44
44
|
url: str
|
45
45
|
user: str | None = None
|
46
46
|
password: str | None = None
|
47
|
+
max_connections: int = 64
|
48
|
+
min_connections: int = 16
|
47
49
|
|
48
50
|
|
49
51
|
@dataclass
|
@@ -51,7 +53,7 @@ class GlobalExecutionOptions:
|
|
51
53
|
"""Global execution options."""
|
52
54
|
|
53
55
|
# The maximum number of concurrent inflight requests, shared among all sources from all flows.
|
54
|
-
source_max_inflight_rows: int | None =
|
56
|
+
source_max_inflight_rows: int | None = 256
|
55
57
|
source_max_inflight_bytes: int | None = None
|
56
58
|
|
57
59
|
|
@@ -92,10 +94,22 @@ class Settings:
|
|
92
94
|
|
93
95
|
database_url = os.getenv("COCOINDEX_DATABASE_URL")
|
94
96
|
if database_url is not None:
|
95
|
-
db_kwargs: dict[str,
|
97
|
+
db_kwargs: dict[str, Any] = dict()
|
96
98
|
_load_field(db_kwargs, "url", "COCOINDEX_DATABASE_URL", required=True)
|
97
99
|
_load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
|
98
100
|
_load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
|
101
|
+
_load_field(
|
102
|
+
db_kwargs,
|
103
|
+
"max_connections",
|
104
|
+
"COCOINDEX_DATABASE_MAX_CONNECTIONS",
|
105
|
+
parse=int,
|
106
|
+
)
|
107
|
+
_load_field(
|
108
|
+
db_kwargs,
|
109
|
+
"min_connections",
|
110
|
+
"COCOINDEX_DATABASE_MIN_CONNECTIONS",
|
111
|
+
parse=int,
|
112
|
+
)
|
99
113
|
database = DatabaseConnectionSpec(**db_kwargs)
|
100
114
|
else:
|
101
115
|
database = None
|
@@ -101,3 +101,44 @@ async def test_for_each_transform_flow_async() -> None:
|
|
101
101
|
}
|
102
102
|
|
103
103
|
assert result == expected, f"Expected {expected}, got {result}"
|
104
|
+
|
105
|
+
|
106
|
+
def test_none_arg_yield_none_result() -> None:
|
107
|
+
"""Test that None arguments yield None results."""
|
108
|
+
|
109
|
+
@cocoindex.op.function()
|
110
|
+
def custom_fn(
|
111
|
+
required_arg: int,
|
112
|
+
optional_arg: int | None,
|
113
|
+
required_kwarg: int,
|
114
|
+
optional_kwarg: int | None,
|
115
|
+
) -> int:
|
116
|
+
return (
|
117
|
+
required_arg + (optional_arg or 0) + required_kwarg + (optional_kwarg or 0)
|
118
|
+
)
|
119
|
+
|
120
|
+
@cocoindex.transform_flow()
|
121
|
+
def transform_flow(
|
122
|
+
required_arg: cocoindex.DataSlice[int | None],
|
123
|
+
optional_arg: cocoindex.DataSlice[int | None],
|
124
|
+
required_kwarg: cocoindex.DataSlice[int | None],
|
125
|
+
optional_kwarg: cocoindex.DataSlice[int | None],
|
126
|
+
) -> cocoindex.DataSlice[int | None]:
|
127
|
+
return required_arg.transform(
|
128
|
+
custom_fn,
|
129
|
+
optional_arg,
|
130
|
+
required_kwarg=required_kwarg,
|
131
|
+
optional_kwarg=optional_kwarg,
|
132
|
+
)
|
133
|
+
|
134
|
+
result = transform_flow.eval(1, 2, 4, 8)
|
135
|
+
assert result == 15, f"Expected 15, got {result}"
|
136
|
+
|
137
|
+
result = transform_flow.eval(1, None, 4, None)
|
138
|
+
assert result == 5, f"Expected 5, got {result}"
|
139
|
+
|
140
|
+
result = transform_flow.eval(None, 2, 4, 8)
|
141
|
+
assert result is None, f"Expected None, got {result}"
|
142
|
+
|
143
|
+
result = transform_flow.eval(1, 2, None, None)
|
144
|
+
assert result is None, f"Expected None, got {result}"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.78
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
@@ -214,6 +214,7 @@ It defines an index flow like this:
|
|
214
214
|
| [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
|
215
215
|
| [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
|
216
216
|
| [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
|
217
|
+
| [Multi Format Indexing](examples/multi_format_indexing) | Build visual document index from PDFs and images with ColPali for semantic search |
|
217
218
|
| [Custom Output Files](examples/custom_output_files) | Convert markdown files to HTML files and save them to a local directory, using *CocoIndex Custom Targets* |
|
218
219
|
|
219
220
|
More coming and stay tuned 👀!
|
@@ -1,9 +1,9 @@
|
|
1
|
-
cocoindex-0.1.
|
2
|
-
cocoindex-0.1.
|
3
|
-
cocoindex-0.1.
|
4
|
-
cocoindex-0.1.
|
1
|
+
cocoindex-0.1.78.dist-info/METADATA,sha256=nnDzBVGk5BZqrwAIxbOJGO32-I5PdHswg5igu7WmNCM,11799
|
2
|
+
cocoindex-0.1.78.dist-info/WHEEL,sha256=IktTQ2BkmimmZPeHCLpiT1hdCTJVWmbb_i88UFB7MTQ,109
|
3
|
+
cocoindex-0.1.78.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.1.78.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
5
5
|
cocoindex/__init__.py,sha256=sLpSVO5Cotgn_82lawxvXnaqfa-qj33rytWBAe2MTtU,2201
|
6
|
-
cocoindex/_engine.cpython-312-aarch64-linux-gnu.so,sha256=
|
6
|
+
cocoindex/_engine.cpython-312-aarch64-linux-gnu.so,sha256=IFZhYDLepsYzGJ52nUiSNMlKBdnIiJeZQ7ltFjc3qR8,68562720
|
7
7
|
cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
|
8
8
|
cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
|
9
9
|
cocoindex/convert.py,sha256=HodeDl1HVX8nnBH02lQKarw5i3xmkjB0nGj-DXt7Ifc,18284
|
@@ -12,20 +12,20 @@ cocoindex/functions.py,sha256=34sZWoS0zGnaKyooIODQgc6QEPZKiJoWhfb8jKIWwps,9528
|
|
12
12
|
cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
|
13
13
|
cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
|
14
14
|
cocoindex/llm.py,sha256=WxmWUbNcf9HOCM5xkbDeFs9lF67M3mr810B7deDDc-8,673
|
15
|
-
cocoindex/op.py,sha256=
|
15
|
+
cocoindex/op.py,sha256=EZ3QrDGJciYAxsqBeZOc3SstOL_34a6cke3WXWwz4-M,22538
|
16
16
|
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
cocoindex/runtime.py,sha256=povilB3HH3y1JF-yxKwU-pD8n2WnAqyQxIgvXXHNc60,1080
|
18
|
-
cocoindex/setting.py,sha256=
|
18
|
+
cocoindex/setting.py,sha256=sGnpkZBAJ7AEm6zVEItGl6fMywgi4uBqZcYwhV19kio,5282
|
19
19
|
cocoindex/setup.py,sha256=7uIHKN4FOCuoidPXcKyGTrkqpkl9luL49-6UcnMxYzw,3068
|
20
20
|
cocoindex/sources.py,sha256=69COA4qbZDipzGYfXv-WJSmicFkA509xIShRGDh6A0A,2083
|
21
21
|
cocoindex/targets.py,sha256=Nfh_tpFd1goTnS_cxBjIs4j9zl3Z4Z1JomAQ1dl3Sic,2796
|
22
22
|
cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
cocoindex/tests/test_convert.py,sha256=l7LqD7duV9-xkYTaKOsEPdqw7v14dUzE40f4VVLlBCQ,49423
|
24
24
|
cocoindex/tests/test_optional_database.py,sha256=snAmkNa6wtOSaxoZE1HgjvL5v_ylitt3Jt_9df4Cgdc,8506
|
25
|
-
cocoindex/tests/test_transform_flow.py,sha256=
|
25
|
+
cocoindex/tests/test_transform_flow.py,sha256=M63Noo29Kl5CawTosCeZHjkn4ObjTP2ycHCGO5p8WHM,4230
|
26
26
|
cocoindex/tests/test_typing.py,sha256=9OF3lO2uSpZBefkEJx7WRbnkXjwQtvlQIeeARYQID68,12391
|
27
27
|
cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nHNhZ2Q,4543
|
28
28
|
cocoindex/typing.py,sha256=qQ0ANF3iuQDeSqipHgL2SDiiXL2reTMUN0aj4ve_T0w,13359
|
29
29
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
30
30
|
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
31
|
-
cocoindex-0.1.
|
31
|
+
cocoindex-0.1.78.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|