cocoindex 0.1.83__cp312-cp312-win_amd64.whl → 0.2.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
cocoindex/convert.py CHANGED
@@ -14,7 +14,6 @@ from typing import Any, Callable, Mapping, Type, get_origin
14
14
  import numpy as np
15
15
 
16
16
  from .typing import (
17
- KEY_FIELD_NAME,
18
17
  TABLE_TYPES,
19
18
  AnalyzedAnyType,
20
19
  AnalyzedBasicType,
@@ -28,7 +27,6 @@ from .typing import (
28
27
  encode_enriched_type,
29
28
  is_namedtuple_type,
30
29
  is_numpy_number_type,
31
- is_struct_type,
32
30
  )
33
31
 
34
32
 
@@ -88,38 +86,35 @@ def make_engine_value_encoder(type_info: AnalyzedTypeInfo) -> Callable[[Any], An
88
86
 
89
87
  return encode_struct_list
90
88
 
91
- if isinstance(variant, AnalyzedDictType):
92
- if not variant.value_type:
93
- return lambda value: value
89
+ # Otherwise it's a vector, falling into basic type in the engine.
94
90
 
91
+ if isinstance(variant, AnalyzedDictType):
95
92
  value_type_info = analyze_type_info(variant.value_type)
96
- if isinstance(value_type_info.variant, AnalyzedStructType):
97
-
98
- def encode_struct_dict(value: Any) -> Any:
99
- if not isinstance(value, dict):
100
- return value
101
- if not value:
102
- return []
103
-
104
- sample_key, sample_val = next(iter(value.items()))
105
- key_type, val_type = type(sample_key), type(sample_val)
106
-
107
- # Handle KTable case
108
- if value and is_struct_type(val_type):
109
- key_encoder = (
110
- make_engine_value_encoder(analyze_type_info(key_type))
111
- if is_struct_type(key_type)
112
- else make_engine_value_encoder(ANY_TYPE_INFO)
113
- )
114
- value_encoder = make_engine_value_encoder(
115
- analyze_type_info(val_type)
116
- )
117
- return [
118
- [key_encoder(k)] + value_encoder(v) for k, v in value.items()
119
- ]
120
- return {key_encoder(k): value_encoder(v) for k, v in value.items()}
93
+ if not isinstance(value_type_info.variant, AnalyzedStructType):
94
+ raise ValueError(
95
+ f"Value type for dict is required to be a struct (e.g. dataclass or NamedTuple), got {variant.value_type}. "
96
+ f"If you want a free-formed dict, use `cocoindex.Json` instead."
97
+ )
98
+ value_encoder = make_engine_value_encoder(value_type_info)
99
+
100
+ key_type_info = analyze_type_info(variant.key_type)
101
+ key_encoder = make_engine_value_encoder(key_type_info)
102
+ if isinstance(key_type_info.variant, AnalyzedBasicType):
121
103
 
122
- return encode_struct_dict
104
+ def encode_row(k: Any, v: Any) -> Any:
105
+ return [key_encoder(k)] + value_encoder(v)
106
+
107
+ else:
108
+
109
+ def encode_row(k: Any, v: Any) -> Any:
110
+ return key_encoder(k) + value_encoder(v)
111
+
112
+ def encode_struct_dict(value: Any) -> Any:
113
+ if not value:
114
+ return []
115
+ return [encode_row(k, v) for k, v in value.items()]
116
+
117
+ return encode_struct_dict
123
118
 
124
119
  if isinstance(variant, AnalyzedStructType):
125
120
  struct_type = variant.struct_type
@@ -132,8 +127,8 @@ def make_engine_value_encoder(type_info: AnalyzedTypeInfo) -> Callable[[Any], An
132
127
  field_names = [f.name for f in fields]
133
128
 
134
129
  def encode_dataclass(value: Any) -> Any:
135
- if not dataclasses.is_dataclass(value):
136
- return value
130
+ if value is None:
131
+ return None
137
132
  return [
138
133
  encoder(getattr(value, name))
139
134
  for encoder, name in zip(field_encoders, field_names)
@@ -154,8 +149,8 @@ def make_engine_value_encoder(type_info: AnalyzedTypeInfo) -> Callable[[Any], An
154
149
  ]
155
150
 
156
151
  def encode_namedtuple(value: Any) -> Any:
157
- if not is_namedtuple_type(type(value)):
158
- return value
152
+ if value is None:
153
+ return None
159
154
  return [
160
155
  encoder(getattr(value, name))
161
156
  for encoder, name in zip(field_encoders, field_names)
@@ -248,25 +243,47 @@ def make_engine_value_decoder(
248
243
  f"declared `{dst_type_info.core_type}`, a dict type expected"
249
244
  )
250
245
 
251
- key_field_schema = engine_fields_schema[0]
252
- field_path.append(f".{key_field_schema.get('name', KEY_FIELD_NAME)}")
253
- key_decoder = make_engine_value_decoder(
254
- field_path,
255
- key_field_schema["type"],
256
- analyze_type_info(key_type),
257
- for_key=True,
258
- )
259
- field_path.pop()
246
+ num_key_parts = src_type.get("num_key_parts", 1)
247
+ key_type_info = analyze_type_info(key_type)
248
+ key_decoder: Callable[..., Any] | None = None
249
+ if (
250
+ isinstance(
251
+ key_type_info.variant, (AnalyzedBasicType, AnalyzedAnyType)
252
+ )
253
+ and num_key_parts == 1
254
+ ):
255
+ single_key_decoder = make_engine_value_decoder(
256
+ field_path,
257
+ engine_fields_schema[0]["type"],
258
+ key_type_info,
259
+ for_key=True,
260
+ )
261
+
262
+ def key_decoder(value: list[Any]) -> Any:
263
+ return single_key_decoder(value[0])
264
+
265
+ else:
266
+ key_decoder = make_engine_struct_decoder(
267
+ field_path,
268
+ engine_fields_schema[0:num_key_parts],
269
+ key_type_info,
270
+ for_key=True,
271
+ )
260
272
  value_decoder = make_engine_struct_decoder(
261
273
  field_path,
262
- engine_fields_schema[1:],
274
+ engine_fields_schema[num_key_parts:],
263
275
  analyze_type_info(value_type),
264
276
  )
265
277
 
266
278
  def decode(value: Any) -> Any | None:
267
279
  if value is None:
268
280
  return None
269
- return {key_decoder(v[0]): value_decoder(v[1:]) for v in value}
281
+ return {
282
+ key_decoder(v[0:num_key_parts]): value_decoder(
283
+ v[num_key_parts:]
284
+ )
285
+ for v in value
286
+ }
270
287
 
271
288
  return decode
272
289
 
cocoindex/setting.py CHANGED
@@ -44,8 +44,8 @@ class DatabaseConnectionSpec:
44
44
  url: str
45
45
  user: str | None = None
46
46
  password: str | None = None
47
- max_connections: int = 64
48
- min_connections: int = 16
47
+ max_connections: int = 25
48
+ min_connections: int = 5
49
49
 
50
50
 
51
51
  @dataclass
@@ -22,6 +22,7 @@ import time
22
22
  from .user_app_loader import load_user_app
23
23
  from .runtime import execution_context
24
24
  import logging
25
+ import multiprocessing as mp
25
26
 
26
27
  WATCHDOG_INTERVAL_SECONDS = 10.0
27
28
 
@@ -43,6 +44,7 @@ def _get_pool() -> ProcessPoolExecutor:
43
44
  max_workers=1,
44
45
  initializer=_subprocess_init,
45
46
  initargs=(_user_apps, os.getpid()),
47
+ mp_context=mp.get_context("spawn"),
46
48
  )
47
49
  return _pool
48
50
 
@@ -69,6 +71,7 @@ def _restart_pool(old_pool: ProcessPoolExecutor | None = None) -> None:
69
71
  max_workers=1,
70
72
  initializer=_subprocess_init,
71
73
  initargs=(_user_apps, os.getpid()),
74
+ mp_context=mp.get_context("spawn"),
72
75
  )
73
76
  if prev_pool is not None:
74
77
  # Best-effort shutdown of previous pool; letting exceptions bubble up
@@ -124,8 +127,19 @@ def _start_parent_watchdog(
124
127
 
125
128
  def _subprocess_init(user_apps: list[str], parent_pid: int) -> None:
126
129
  _start_parent_watchdog(parent_pid)
130
+
131
+ # In case any user app is already in this subprocess, e.g. the subprocess is forked, we need to avoid loading it again.
132
+ with _pool_lock:
133
+ already_loaded_apps = set(_user_apps)
134
+
135
+ loaded_apps = []
127
136
  for app_target in user_apps:
128
- load_user_app(app_target)
137
+ if app_target not in already_loaded_apps:
138
+ load_user_app(app_target)
139
+ loaded_apps.append(app_target)
140
+
141
+ with _pool_lock:
142
+ _user_apps.extend(loaded_apps)
129
143
 
130
144
 
131
145
  class _OnceResult:
cocoindex/typing.py CHANGED
@@ -330,35 +330,50 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
330
330
 
331
331
  def _encode_struct_schema(
332
332
  struct_type: type, key_type: type | None = None
333
- ) -> dict[str, Any]:
333
+ ) -> tuple[dict[str, Any], int | None]:
334
334
  fields = []
335
335
 
336
- def add_field(name: str, t: Any) -> None:
336
+ def add_field(name: str, analyzed_type: AnalyzedTypeInfo) -> None:
337
337
  try:
338
- type_info = encode_enriched_type_info(analyze_type_info(t))
338
+ type_info = encode_enriched_type_info(analyzed_type)
339
339
  except ValueError as e:
340
340
  e.add_note(
341
341
  f"Failed to encode annotation for field - "
342
- f"{struct_type.__name__}.{name}: {t}"
342
+ f"{struct_type.__name__}.{name}: {analyzed_type.core_type}"
343
343
  )
344
344
  raise
345
345
  type_info["name"] = name
346
346
  fields.append(type_info)
347
347
 
348
+ def add_fields_from_struct(struct_type: type) -> None:
349
+ if dataclasses.is_dataclass(struct_type):
350
+ for field in dataclasses.fields(struct_type):
351
+ add_field(field.name, analyze_type_info(field.type))
352
+ elif is_namedtuple_type(struct_type):
353
+ for name, field_type in struct_type.__annotations__.items():
354
+ add_field(name, analyze_type_info(field_type))
355
+ else:
356
+ raise ValueError(f"Unsupported struct type: {struct_type}")
357
+
358
+ result: dict[str, Any] = {}
359
+ num_key_parts = None
348
360
  if key_type is not None:
349
- add_field(KEY_FIELD_NAME, key_type)
361
+ key_type_info = analyze_type_info(key_type)
362
+ if isinstance(key_type_info.variant, AnalyzedBasicType):
363
+ add_field(KEY_FIELD_NAME, key_type_info)
364
+ num_key_parts = 1
365
+ elif isinstance(key_type_info.variant, AnalyzedStructType):
366
+ add_fields_from_struct(key_type_info.variant.struct_type)
367
+ num_key_parts = len(fields)
368
+ else:
369
+ raise ValueError(f"Unsupported key type: {key_type}")
350
370
 
351
- if dataclasses.is_dataclass(struct_type):
352
- for field in dataclasses.fields(struct_type):
353
- add_field(field.name, field.type)
354
- elif is_namedtuple_type(struct_type):
355
- for name, field_type in struct_type.__annotations__.items():
356
- add_field(name, field_type)
371
+ add_fields_from_struct(struct_type)
357
372
 
358
- result: dict[str, Any] = {"fields": fields}
373
+ result["fields"] = fields
359
374
  if doc := inspect.getdoc(struct_type):
360
375
  result["description"] = doc
361
- return result
376
+ return result, num_key_parts
362
377
 
363
378
 
364
379
  def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
@@ -374,7 +389,7 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
374
389
  return {"kind": variant.kind}
375
390
 
376
391
  if isinstance(variant, AnalyzedStructType):
377
- encoded_type = _encode_struct_schema(variant.struct_type)
392
+ encoded_type, _ = _encode_struct_schema(variant.struct_type)
378
393
  encoded_type["kind"] = "Struct"
379
394
  return encoded_type
380
395
 
@@ -384,10 +399,8 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
384
399
  if isinstance(elem_type_info.variant, AnalyzedStructType):
385
400
  if variant.vector_info is not None:
386
401
  raise ValueError("LTable type must not have a vector info")
387
- return {
388
- "kind": "LTable",
389
- "row": _encode_struct_schema(elem_type_info.variant.struct_type),
390
- }
402
+ row_type, _ = _encode_struct_schema(elem_type_info.variant.struct_type)
403
+ return {"kind": "LTable", "row": row_type}
391
404
  else:
392
405
  vector_info = variant.vector_info
393
406
  return {
@@ -402,12 +415,14 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
402
415
  raise ValueError(
403
416
  f"KTable value must have a Struct type, got {value_type_info.core_type}"
404
417
  )
418
+ row_type, num_key_parts = _encode_struct_schema(
419
+ value_type_info.variant.struct_type,
420
+ variant.key_type,
421
+ )
405
422
  return {
406
423
  "kind": "KTable",
407
- "row": _encode_struct_schema(
408
- value_type_info.variant.struct_type,
409
- variant.key_type,
410
- ),
424
+ "row": row_type,
425
+ "num_key_parts": num_key_parts,
411
426
  }
412
427
 
413
428
  if isinstance(variant, AnalyzedUnionType):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.83
3
+ Version: 0.2.1
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -1,12 +1,12 @@
1
- cocoindex-0.1.83.dist-info/METADATA,sha256=hdRQSWNsnB6dUFa1F8ikl2fE9dnIVNv288ldby5XbNU,12404
2
- cocoindex-0.1.83.dist-info/WHEEL,sha256=HF3aUMilrtO42xS_fBzOVaPE8OtiNjP_RotLatII7HM,96
3
- cocoindex-0.1.83.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.1.83.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ cocoindex-0.2.1.dist-info/METADATA,sha256=m9RVQtYVruDkXdRHm0GdTqZYRSnNF5R_bBIcUqZoIC8,12403
2
+ cocoindex-0.2.1.dist-info/WHEEL,sha256=4hYCffp0RsSVQAuv2PMtXQ9QS7YSHeZi4PrSg-wi2q0,96
3
+ cocoindex-0.2.1.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.2.1.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
5
5
  cocoindex/__init__.py,sha256=5zwuS_X-n7QAE5uBSufqXp77OW8KVVD8E5t_6koDLRc,2293
6
- cocoindex/_engine.cp312-win_amd64.pyd,sha256=FbhWKFdn6QGQZ0dsgXNsuYKWubdEKro9GQU4yqxtjcg,71575552
6
+ cocoindex/_engine.cp312-win_amd64.pyd,sha256=l1eJWMLVZz7ihhGO5iqFvnK8sLGzjI3vOIZRBhNtZzc,71615488
7
7
  cocoindex/auth_registry.py,sha256=Qq1IVZb-7K4luRrQSDlOPbISnGEZ4kIDsrCU8H2ARw0,1529
8
8
  cocoindex/cli.py,sha256=VgeVgO5z2l6tv7XWS3q-sRZ8cYe9r7izUYUR3NfdgJA,21372
9
- cocoindex/convert.py,sha256=IqrOswncP99pOYecCtOKpp1pQEQHyxE13KKfLGSN9ZQ,22066
9
+ cocoindex/convert.py,sha256=k1NT3VoERVS2HV1rLDFyCk6rG1YgHXDnIvrI-lqFdUE,22681
10
10
  cocoindex/flow.py,sha256=6AadyuaQsujkjV1zcXM-DiYr29uVdGXII4tIuNGDo_k,37440
11
11
  cocoindex/functions.py,sha256=CtiwTVW6g4BtO5_EvVcij7Si4Bx-axnM1hsdU43aM4g,12617
12
12
  cocoindex/index.py,sha256=GrqTm1rLwICQ8hadtNvJAxVg7GWMvtMmFcbiNtNzmP0,569
@@ -15,10 +15,10 @@ cocoindex/llm.py,sha256=JM5EPup7FZojynCI0rg4gnMBYmdPZpaHJbVBz3f7BAI,897
15
15
  cocoindex/op.py,sha256=1uOwE1zzMs7FrMSpQMJPUI8YtnXXk3-tlEJ_ahPFq5A,22884
16
16
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cocoindex/runtime.py,sha256=6mE-jR1Kh5c4GETDvBgwjXZq69TK5rh1qNpaseRDZnw,1117
18
- cocoindex/setting.py,sha256=048VfKRly9TzLElJlL01tUxySGtRbsc9m68yIHvMzMU,5464
18
+ cocoindex/setting.py,sha256=obq1EiFlSJvidQZPVVC9V5ZUZAzYnyYcSm_6XUVDkUE,5463
19
19
  cocoindex/setup.py,sha256=KbJvmeFu0NbeoH-5iDmHZP86f26HIId8kHmGUNZAePI,3160
20
20
  cocoindex/sources.py,sha256=mVZhyVTyHzFy7z3lceYqgguygADQkIL42nYK6zKSKKQ,2868
21
- cocoindex/subprocess_exec.py,sha256=KDV7xdFUhKYqHZdiJqirNTl7DbJQUXLvga0Q1urE6XA,8143
21
+ cocoindex/subprocess_exec.py,sha256=ase_F4ivmyShCIJcygRl9Uz8BKthpOLq-TUfyqrUB9A,8658
22
22
  cocoindex/targets.py,sha256=7FfG9kuEf5KTXtLwXMFaPFIut3PsIbpb3XIEjjeF7Bg,2931
23
23
  cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  cocoindex/tests/test_convert.py,sha256=kngLt9p2BXo3D7_OZNnnSS1w_Ew8966IuaDpHcfd9Go,51750
@@ -26,8 +26,8 @@ cocoindex/tests/test_optional_database.py,sha256=dnzmTgaJf37D3q8fQsjP5UDER6FYETa
26
26
  cocoindex/tests/test_transform_flow.py,sha256=DDtxENZHh_2mt0vDU4T1Oaq-jVLJDwaKpkMg83_9NfE,5324
27
27
  cocoindex/tests/test_typing.py,sha256=WqR1M11dSVYXZj1DnXnOvwH6VNQpiv5lCEO-FqA_by0,12820
28
28
  cocoindex/tests/test_validation.py,sha256=I4wr8lAMAjmy5xgG5N_OJKveXt8XIa96MsQTXhw5AnA,4677
29
- cocoindex/typing.py,sha256=gMNJIpGGe-SiXlihDQ-Dw2YdebQvOyG-bWovR-veO6g,13817
29
+ cocoindex/typing.py,sha256=bkKcp4G_xQODwQn92sfeNi-kXSWnbtlwTpFbrMGCnj4,14673
30
30
  cocoindex/user_app_loader.py,sha256=jKNyCq5Osl4dMevlDNloGuwCfDscxw5o0m9_OqrHDN8,1965
31
31
  cocoindex/utils.py,sha256=U3W39zD2uZpXX8v84tJD7sRmbC5ar3z_ljAP1cJrYXI,618
32
32
  cocoindex/validation.py,sha256=4ZjsW-SZT8X_TEEhEE6QG6D-8Oq_TkPAhTqP0mdFYSE,3194
33
- cocoindex-0.1.83.dist-info/RECORD,,
33
+ cocoindex-0.2.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.3)
2
+ Generator: maturin (1.9.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp312-cp312-win_amd64