cocoindex 0.2.16__cp311-abi3-manylinux_2_28_aarch64.whl → 0.2.18__cp311-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/_engine.abi3.so CHANGED
Binary file
@@ -6,7 +6,7 @@ from dataclasses import dataclass
6
6
  from typing import Generic, TypeVar
7
7
 
8
8
  from . import _engine # type: ignore
9
- from .convert import dump_engine_object, load_engine_object
9
+ from .engine_object import dump_engine_object, load_engine_object
10
10
 
11
11
  T = TypeVar("T")
12
12
 
cocoindex/cli.py CHANGED
@@ -2,6 +2,7 @@ import atexit
2
2
  import asyncio
3
3
  import datetime
4
4
  import importlib.util
5
+ import json
5
6
  import os
6
7
  import signal
7
8
  import threading
@@ -220,6 +221,41 @@ def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
220
221
  console.print(table)
221
222
 
222
223
 
224
+ def _drop_flows(flows: Iterable[flow.Flow], app_ref: str, force: bool = False) -> None:
225
+ """
226
+ Helper function to drop flows without user interaction.
227
+ Used internally by --reset flag
228
+
229
+ Args:
230
+ flows: Iterable of Flow objects to drop
231
+ force: If True, skip confirmation prompts
232
+ """
233
+ flow_full_names = ", ".join(fl.full_name for fl in flows)
234
+ click.echo(
235
+ f"Preparing to drop specified flows: {flow_full_names} (in '{app_ref}').",
236
+ err=True,
237
+ )
238
+
239
+ if not flows:
240
+ click.echo("No flows identified for the drop operation.")
241
+ return
242
+
243
+ setup_bundle = flow.make_drop_bundle(flows)
244
+ description, is_up_to_date = setup_bundle.describe()
245
+ click.echo(description)
246
+ if is_up_to_date:
247
+ click.echo("No flows need to be dropped.")
248
+ return
249
+ if not force and not click.confirm(
250
+ f"\nThis will apply changes to drop setup for: {flow_full_names}. Continue? [yes/N]",
251
+ default=False,
252
+ show_default=False,
253
+ ):
254
+ click.echo("Drop operation aborted by user.")
255
+ return
256
+ setup_bundle.apply(report_to_stdout=True)
257
+
258
+
223
259
  def _setup_flows(
224
260
  flow_iter: Iterable[flow.Flow],
225
261
  *,
@@ -269,7 +305,14 @@ async def _update_all_flows_with_hint_async(
269
305
  default=False,
270
306
  help="Force setup without confirmation prompts.",
271
307
  )
272
- def setup(app_target: str, force: bool) -> None:
308
+ @click.option(
309
+ "--reset",
310
+ is_flag=True,
311
+ show_default=True,
312
+ default=False,
313
+ help="Drop existing setup before running setup (equivalent to running 'cocoindex drop' first).",
314
+ )
315
+ def setup(app_target: str, force: bool, reset: bool) -> None:
273
316
  """
274
317
  Check and apply backend setup changes for flows, including the internal storage and target (to export to).
275
318
 
@@ -277,6 +320,11 @@ def setup(app_target: str, force: bool) -> None:
277
320
  """
278
321
  app_ref = _get_app_ref_from_specifier(app_target)
279
322
  _load_user_app(app_ref)
323
+
324
+ # If --reset is specified, drop existing setup first
325
+ if reset:
326
+ _drop_flows(flow.flows().values(), app_ref=app_ref, force=force)
327
+
280
328
  _setup_flows(flow.flows().values(), force=force, always_show_setup=True)
281
329
 
282
330
 
@@ -325,30 +373,7 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], force: bool) -> Non
325
373
  else:
326
374
  flows = flow.flows().values()
327
375
 
328
- flow_full_names = ", ".join(fl.full_name for fl in flows)
329
- click.echo(
330
- f"Preparing to drop specified flows: {flow_full_names} (in '{app_ref}').",
331
- err=True,
332
- )
333
-
334
- if not flows:
335
- click.echo("No flows identified for the drop operation.")
336
- return
337
-
338
- setup_bundle = flow.make_drop_bundle(flows)
339
- description, is_up_to_date = setup_bundle.describe()
340
- click.echo(description)
341
- if is_up_to_date:
342
- click.echo("No flows need to be dropped.")
343
- return
344
- if not force and not click.confirm(
345
- f"\nThis will apply changes to drop setup for: {flow_full_names}. Continue? [yes/N]",
346
- default=False,
347
- show_default=False,
348
- ):
349
- click.echo("Drop operation aborted by user.")
350
- return
351
- setup_bundle.apply(report_to_stdout=True)
376
+ _drop_flows(flows, app_ref=app_ref, force=force)
352
377
 
353
378
 
354
379
  @cli.command()
@@ -375,6 +400,13 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], force: bool) -> Non
375
400
  default=False,
376
401
  help="Automatically setup backends for the flow if it's not setup yet.",
377
402
  )
403
+ @click.option(
404
+ "--reset",
405
+ is_flag=True,
406
+ show_default=True,
407
+ default=False,
408
+ help="Drop existing setup before updating (equivalent to running 'cocoindex drop' first). `--reset` implies `--setup`.",
409
+ )
378
410
  @click.option(
379
411
  "-f",
380
412
  "--force",
@@ -396,6 +428,7 @@ def update(
396
428
  live: bool,
397
429
  reexport: bool,
398
430
  setup: bool, # pylint: disable=redefined-outer-name
431
+ reset: bool,
399
432
  force: bool,
400
433
  quiet: bool,
401
434
  ) -> None:
@@ -407,6 +440,13 @@ def update(
407
440
  """
408
441
  app_ref, flow_name = _parse_app_flow_specifier(app_flow_specifier)
409
442
  _load_user_app(app_ref)
443
+ flow_list = (
444
+ [flow.flow_by_name(flow_name)] if flow_name else list(flow.flows().values())
445
+ )
446
+
447
+ # If --reset is specified, drop existing setup first
448
+ if reset:
449
+ _drop_flows(flow_list, app_ref=app_ref, force=force)
410
450
 
411
451
  if live:
412
452
  click.secho(
@@ -419,19 +459,14 @@ def update(
419
459
  reexport_targets=reexport,
420
460
  print_stats=not quiet,
421
461
  )
462
+ if reset or setup:
463
+ _setup_flows(flow_list, force=force, quiet=quiet)
464
+
422
465
  if flow_name is None:
423
- if setup:
424
- _setup_flows(
425
- flow.flows().values(),
426
- force=force,
427
- quiet=quiet,
428
- )
429
466
  execution_context.run(_update_all_flows_with_hint_async(options))
430
467
  else:
431
- fl = flow.flow_by_name(flow_name)
432
- if setup:
433
- _setup_flows((fl,), force=force, quiet=quiet)
434
- with flow.FlowLiveUpdater(fl, options) as updater:
468
+ assert len(flow_list) == 1
469
+ with flow.FlowLiveUpdater(flow_list[0], options) as updater:
435
470
  updater.wait()
436
471
  if options.live_mode:
437
472
  _show_no_live_update_hint()
@@ -529,6 +564,13 @@ def evaluate(
529
564
  default=False,
530
565
  help="Automatically setup backends for the flow if it's not setup yet.",
531
566
  )
567
+ @click.option(
568
+ "--reset",
569
+ is_flag=True,
570
+ show_default=True,
571
+ default=False,
572
+ help="Drop existing setup before starting server (equivalent to running 'cocoindex drop' first). `--reset` implies `--setup`.",
573
+ )
532
574
  @click.option(
533
575
  "--reexport",
534
576
  is_flag=True,
@@ -565,6 +607,7 @@ def server(
565
607
  address: str | None,
566
608
  live_update: bool,
567
609
  setup: bool, # pylint: disable=redefined-outer-name
610
+ reset: bool,
568
611
  reexport: bool,
569
612
  force: bool,
570
613
  quiet: bool,
@@ -588,11 +631,14 @@ def server(
588
631
  cors_cocoindex,
589
632
  cors_local,
590
633
  live_update,
591
- setup,
592
634
  reexport,
593
- force,
594
635
  quiet,
595
636
  )
637
+ kwargs = {
638
+ "run_reset": reset,
639
+ "run_setup": setup,
640
+ "force": force,
641
+ }
596
642
 
597
643
  if reload:
598
644
  watch_paths = {os.getcwd()}
@@ -610,6 +656,7 @@ def server(
610
656
  *watch_paths,
611
657
  target=_reloadable_server_target,
612
658
  args=args,
659
+ kwargs=kwargs,
613
660
  watch_filter=watchfiles.PythonFilter(),
614
661
  callback=lambda changes: click.secho(
615
662
  f"\nDetected changes in {len(changes)} file(s), reloading server...\n",
@@ -621,12 +668,19 @@ def server(
621
668
  "NOTE: Flow code changes will NOT be reflected until you restart to load the new code. Use --reload to enable auto-reload.\n",
622
669
  fg="yellow",
623
670
  )
624
- _run_server(*args)
671
+ _run_server(*args, **kwargs)
625
672
 
626
673
 
627
674
  def _reloadable_server_target(*args: Any, **kwargs: Any) -> None:
628
675
  """Reloadable target for the watchfiles process."""
629
676
  _initialize_cocoindex_in_process()
677
+
678
+ kwargs["run_setup"] = kwargs["run_setup"] or kwargs["run_reset"]
679
+ changed_files = json.loads(os.environ.get("WATCHFILES_CHANGES", "[]"))
680
+ if changed_files:
681
+ kwargs["run_reset"] = False
682
+ kwargs["force"] = True
683
+
630
684
  _run_server(*args, **kwargs)
631
685
 
632
686
 
@@ -637,14 +691,40 @@ def _run_server(
637
691
  cors_cocoindex: bool = False,
638
692
  cors_local: int | None = None,
639
693
  live_update: bool = False,
640
- run_setup: bool = False,
641
694
  reexport: bool = False,
642
- force: bool = False,
643
695
  quiet: bool = False,
696
+ /,
697
+ *,
698
+ force: bool = False,
699
+ run_reset: bool = False,
700
+ run_setup: bool = False,
644
701
  ) -> None:
645
702
  """Helper function to run the server with specified settings."""
646
703
  _load_user_app(app_ref)
647
704
 
705
+ # Check if any flows are registered
706
+ if not flow.flow_names():
707
+ click.secho(
708
+ f"\nError: No flows registered in '{app_ref}'.\n",
709
+ fg="red",
710
+ bold=True,
711
+ err=True,
712
+ )
713
+ click.secho(
714
+ "To use CocoIndex server, you need to define at least one flow.",
715
+ err=True,
716
+ )
717
+ click.secho(
718
+ "See https://cocoindex.io/docs for more information.\n",
719
+ fg="cyan",
720
+ err=True,
721
+ )
722
+ raise click.Abort()
723
+
724
+ # If --reset is specified, drop existing setup first
725
+ if run_reset:
726
+ _drop_flows(flow.flows().values(), app_ref=app_ref, force=force)
727
+
648
728
  server_settings = setting.ServerSettings.from_env()
649
729
  cors_origins: set[str] = set(server_settings.cors_origins or [])
650
730
  if cors_origin is not None:
@@ -658,7 +738,7 @@ def _run_server(
658
738
  if address is not None:
659
739
  server_settings.address = address
660
740
 
661
- if run_setup:
741
+ if run_reset or run_setup:
662
742
  _setup_flows(
663
743
  flow.flows().values(),
664
744
  force=force,
@@ -0,0 +1,272 @@
1
+ """
2
+ Utilities to dump/load objects (for configs, specs).
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import datetime
8
+ import dataclasses
9
+ from enum import Enum
10
+ from typing import Any, Mapping, TypeVar, overload, get_origin
11
+
12
+ import numpy as np
13
+
14
+ from .typing import (
15
+ AnalyzedAnyType,
16
+ AnalyzedBasicType,
17
+ AnalyzedDictType,
18
+ AnalyzedListType,
19
+ AnalyzedStructType,
20
+ AnalyzedTypeInfo,
21
+ AnalyzedUnionType,
22
+ EnrichedValueType,
23
+ FieldSchema,
24
+ analyze_type_info,
25
+ encode_enriched_type,
26
+ is_namedtuple_type,
27
+ is_pydantic_model,
28
+ extract_ndarray_elem_dtype,
29
+ )
30
+
31
+
32
+ T = TypeVar("T")
33
+
34
+ try:
35
+ import pydantic, pydantic_core
36
+ except ImportError:
37
+ pass
38
+
39
+
40
+ def get_auto_default_for_type(
41
+ type_info: AnalyzedTypeInfo,
42
+ ) -> tuple[Any, bool]:
43
+ """
44
+ Get an auto-default value for a type annotation if it's safe to do so.
45
+
46
+ Returns:
47
+ A tuple of (default_value, is_supported) where:
48
+ - default_value: The default value if auto-defaulting is supported
49
+ - is_supported: True if auto-defaulting is supported for this type
50
+ """
51
+ # Case 1: Nullable types (Optional[T] or T | None)
52
+ if type_info.nullable:
53
+ return None, True
54
+
55
+ # Case 2: Table types (KTable or LTable) - check if it's a list or dict type
56
+ if isinstance(type_info.variant, AnalyzedListType):
57
+ return [], True
58
+ elif isinstance(type_info.variant, AnalyzedDictType):
59
+ return {}, True
60
+
61
+ return None, False
62
+
63
+
64
+ def dump_engine_object(v: Any) -> Any:
65
+ """Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
66
+ if v is None:
67
+ return None
68
+ elif isinstance(v, EnrichedValueType):
69
+ return v.encode()
70
+ elif isinstance(v, FieldSchema):
71
+ return v.encode()
72
+ elif isinstance(v, type) or get_origin(v) is not None:
73
+ return encode_enriched_type(v)
74
+ elif isinstance(v, Enum):
75
+ return v.value
76
+ elif isinstance(v, datetime.timedelta):
77
+ total_secs = v.total_seconds()
78
+ secs = int(total_secs)
79
+ nanos = int((total_secs - secs) * 1e9)
80
+ return {"secs": secs, "nanos": nanos}
81
+ elif is_namedtuple_type(type(v)):
82
+ # Handle NamedTuple objects specifically to use dict format
83
+ field_names = list(getattr(type(v), "_fields", ()))
84
+ result = {}
85
+ for name in field_names:
86
+ val = getattr(v, name)
87
+ result[name] = dump_engine_object(val) # Include all values, including None
88
+ if hasattr(v, "kind") and "kind" not in result:
89
+ result["kind"] = v.kind
90
+ return result
91
+ elif hasattr(v, "__dict__"): # for dataclass-like objects
92
+ s = {}
93
+ for k, val in v.__dict__.items():
94
+ if val is None:
95
+ # Skip None values
96
+ continue
97
+ s[k] = dump_engine_object(val)
98
+ if hasattr(v, "kind") and "kind" not in s:
99
+ s["kind"] = v.kind
100
+ return s
101
+ elif isinstance(v, (list, tuple)):
102
+ return [dump_engine_object(item) for item in v]
103
+ elif isinstance(v, np.ndarray):
104
+ return v.tolist()
105
+ elif isinstance(v, dict):
106
+ return {k: dump_engine_object(v) for k, v in v.items()}
107
+ return v
108
+
109
+
110
+ @overload
111
+ def load_engine_object(expected_type: type[T], v: Any) -> T: ...
112
+ @overload
113
+ def load_engine_object(expected_type: Any, v: Any) -> Any: ...
114
+ def load_engine_object(expected_type: Any, v: Any) -> Any:
115
+ """Recursively load an object that was produced by dump_engine_object().
116
+
117
+ Args:
118
+ expected_type: The Python type annotation to reconstruct to.
119
+ v: The engine-facing Pythonized object (e.g., dict/list/primitive) to convert.
120
+
121
+ Returns:
122
+ A Python object matching the expected_type where possible.
123
+ """
124
+ # Fast path
125
+ if v is None:
126
+ return None
127
+
128
+ type_info = analyze_type_info(expected_type)
129
+ variant = type_info.variant
130
+
131
+ if type_info.core_type is EnrichedValueType:
132
+ return EnrichedValueType.decode(v)
133
+ if type_info.core_type is FieldSchema:
134
+ return FieldSchema.decode(v)
135
+
136
+ # Any or unknown → return as-is
137
+ if isinstance(variant, AnalyzedAnyType) or type_info.base_type is Any:
138
+ return v
139
+
140
+ # Enum handling
141
+ if isinstance(expected_type, type) and issubclass(expected_type, Enum):
142
+ return expected_type(v)
143
+
144
+ # TimeDelta special form {secs, nanos}
145
+ if isinstance(variant, AnalyzedBasicType) and variant.kind == "TimeDelta":
146
+ if isinstance(v, Mapping) and "secs" in v and "nanos" in v:
147
+ secs = int(v["secs"]) # type: ignore[index]
148
+ nanos = int(v["nanos"]) # type: ignore[index]
149
+ return datetime.timedelta(seconds=secs, microseconds=nanos / 1_000)
150
+ return v
151
+
152
+ # List, NDArray (Vector-ish), or general sequences
153
+ if isinstance(variant, AnalyzedListType):
154
+ elem_type = variant.elem_type if variant.elem_type else Any
155
+ if type_info.base_type is np.ndarray:
156
+ # Reconstruct NDArray with appropriate dtype if available
157
+ try:
158
+ dtype = extract_ndarray_elem_dtype(type_info.core_type)
159
+ except (TypeError, ValueError, AttributeError):
160
+ dtype = None
161
+ return np.array(v, dtype=dtype)
162
+ # Regular Python list
163
+ return [load_engine_object(elem_type, item) for item in v]
164
+
165
+ # Dict / Mapping
166
+ if isinstance(variant, AnalyzedDictType):
167
+ key_t = variant.key_type
168
+ val_t = variant.value_type
169
+ return {
170
+ load_engine_object(key_t, k): load_engine_object(val_t, val)
171
+ for k, val in v.items()
172
+ }
173
+
174
+ # Structs (dataclass, NamedTuple, or Pydantic)
175
+ if isinstance(variant, AnalyzedStructType):
176
+ struct_type = variant.struct_type
177
+ init_kwargs: dict[str, Any] = {}
178
+ missing_fields: list[tuple[str, Any]] = []
179
+ if dataclasses.is_dataclass(struct_type):
180
+ if not isinstance(v, Mapping):
181
+ raise ValueError(f"Expected dict for dataclass, got {type(v)}")
182
+
183
+ for dc_field in dataclasses.fields(struct_type):
184
+ if dc_field.name in v:
185
+ init_kwargs[dc_field.name] = load_engine_object(
186
+ dc_field.type, v[dc_field.name]
187
+ )
188
+ else:
189
+ if (
190
+ dc_field.default is dataclasses.MISSING
191
+ and dc_field.default_factory is dataclasses.MISSING
192
+ ):
193
+ missing_fields.append((dc_field.name, dc_field.type))
194
+
195
+ elif is_namedtuple_type(struct_type):
196
+ if not isinstance(v, Mapping):
197
+ raise ValueError(f"Expected dict for NamedTuple, got {type(v)}")
198
+ # Dict format (from dump/load functions)
199
+ annotations = getattr(struct_type, "__annotations__", {})
200
+ field_names = list(getattr(struct_type, "_fields", ()))
201
+ field_defaults = getattr(struct_type, "_field_defaults", {})
202
+
203
+ for name in field_names:
204
+ f_type = annotations.get(name, Any)
205
+ if name in v:
206
+ init_kwargs[name] = load_engine_object(f_type, v[name])
207
+ elif name not in field_defaults:
208
+ missing_fields.append((name, f_type))
209
+
210
+ elif is_pydantic_model(struct_type):
211
+ if not isinstance(v, Mapping):
212
+ raise ValueError(f"Expected dict for Pydantic model, got {type(v)}")
213
+
214
+ model_fields: dict[str, pydantic.fields.FieldInfo]
215
+ if hasattr(struct_type, "model_fields"):
216
+ model_fields = struct_type.model_fields # type: ignore[attr-defined]
217
+ else:
218
+ model_fields = {}
219
+
220
+ for name, pyd_field in model_fields.items():
221
+ if name in v:
222
+ init_kwargs[name] = load_engine_object(
223
+ pyd_field.annotation, v[name]
224
+ )
225
+ elif (
226
+ getattr(pyd_field, "default", pydantic_core.PydanticUndefined)
227
+ is pydantic_core.PydanticUndefined
228
+ and getattr(pyd_field, "default_factory") is None
229
+ ):
230
+ missing_fields.append((name, pyd_field.annotation))
231
+ else:
232
+ assert False, "Unsupported struct type"
233
+
234
+ for name, f_type in missing_fields:
235
+ type_info = analyze_type_info(f_type)
236
+ auto_default, is_supported = get_auto_default_for_type(type_info)
237
+ if is_supported:
238
+ init_kwargs[name] = auto_default
239
+ return struct_type(**init_kwargs)
240
+
241
+ # Union with discriminator support via "kind"
242
+ if isinstance(variant, AnalyzedUnionType):
243
+ if isinstance(v, Mapping) and "kind" in v:
244
+ discriminator = v["kind"]
245
+ for typ in variant.variant_types:
246
+ t_info = analyze_type_info(typ)
247
+ if isinstance(t_info.variant, AnalyzedStructType):
248
+ t_struct = t_info.variant.struct_type
249
+ candidate_kind = getattr(t_struct, "kind", None)
250
+ if candidate_kind == discriminator:
251
+ # Remove discriminator for constructor
252
+ v_wo_kind = dict(v)
253
+ v_wo_kind.pop("kind", None)
254
+ return load_engine_object(t_struct, v_wo_kind)
255
+ # Fallback: try each variant until one succeeds
256
+ for typ in variant.variant_types:
257
+ try:
258
+ return load_engine_object(typ, v)
259
+ except (TypeError, ValueError):
260
+ continue
261
+ return v
262
+
263
+ # Basic types and everything else: handle numpy scalars and passthrough
264
+ if isinstance(v, np.ndarray) and type_info.base_type is list:
265
+ return v.tolist()
266
+ if isinstance(v, (list, tuple)) and type_info.base_type not in (list, tuple):
267
+ # If a non-sequence basic type expected, attempt direct cast
268
+ try:
269
+ return type_info.core_type(v)
270
+ except (TypeError, ValueError):
271
+ return v
272
+ return v