PyPI - cocoindex - Versions diffs - 0.2.16__cp311-abi3-manylinux_2_28_aarch64.whl → 0.2.18__cp311-abi3-manylinux_2_28_aarch64.whl - Mend

cocoindex 0.2.16__cp311-abi3-manylinux_2_28_aarch64.whl → 0.2.18__cp311-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

cocoindex/_engine.abi3.so +0 -0
cocoindex/auth_registry.py +1 -1
cocoindex/cli.py +121 -41
cocoindex/engine_object.py +272 -0
cocoindex/{convert.py → engine_value.py} +64 -208
cocoindex/flow.py +7 -2
cocoindex/functions/__init__.py +40 -0
cocoindex/functions/_engine_builtin_specs.py +66 -0
cocoindex/functions/colpali.py +250 -0
cocoindex/functions/sbert.py +63 -0
cocoindex/lib.py +1 -1
cocoindex/op.py +7 -3
cocoindex/sources/__init__.py +5 -0
cocoindex/{sources.py → sources/_engine_builtin_specs.py} +3 -3
cocoindex/targets/_engine_builtin_specs.py +19 -1
cocoindex/tests/test_engine_object.py +331 -0
cocoindex/tests/{test_convert.py → test_engine_value.py} +150 -26
cocoindex/typing.py +125 -3
{cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/METADATA +4 -1
cocoindex-0.2.18.dist-info/RECORD +43 -0
{cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/WHEEL +1 -1
{cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/licenses/THIRD_PARTY_NOTICES.html +54 -21
cocoindex/tests/test_load_convert.py +0 -118
cocoindex-0.2.16.dist-info/RECORD +0 -37
{cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/entry_points.txt +0 -0

cocoindex/_engine.abi3.so CHANGED Viewed

Binary file

cocoindex/auth_registry.py CHANGED Viewed

@@ -6,7 +6,7 @@ from dataclasses import dataclass
 from typing import Generic, TypeVar
 from . import _engine  # type: ignore
-from .convert import dump_engine_object, load_engine_object
+from .engine_object import dump_engine_object, load_engine_object
 T = TypeVar("T")

cocoindex/cli.py CHANGED Viewed

@@ -2,6 +2,7 @@ import atexit
 import asyncio
 import datetime
 import importlib.util
+import json
 import os
 import signal
 import threading
@@ -220,6 +221,41 @@ def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
     console.print(table)
+def _drop_flows(flows: Iterable[flow.Flow], app_ref: str, force: bool = False) -> None:
+    """
+    Helper function to drop flows without user interaction.
+    Used internally by --reset flag
+    Args:
+        flows: Iterable of Flow objects to drop
+        force: If True, skip confirmation prompts
+    """
+    flow_full_names = ", ".join(fl.full_name for fl in flows)
+    click.echo(
+        f"Preparing to drop specified flows: {flow_full_names} (in '{app_ref}').",
+        err=True,
+    )
+    if not flows:
+        click.echo("No flows identified for the drop operation.")
+        return
+    setup_bundle = flow.make_drop_bundle(flows)
+    description, is_up_to_date = setup_bundle.describe()
+    click.echo(description)
+    if is_up_to_date:
+        click.echo("No flows need to be dropped.")
+        return
+    if not force and not click.confirm(
+        f"\nThis will apply changes to drop setup for: {flow_full_names}. Continue? [yes/N]",
+        default=False,
+        show_default=False,
+    ):
+        click.echo("Drop operation aborted by user.")
+        return
+    setup_bundle.apply(report_to_stdout=True)
 def _setup_flows(
     flow_iter: Iterable[flow.Flow],
     *,
@@ -269,7 +305,14 @@ async def _update_all_flows_with_hint_async(
     default=False,
     help="Force setup without confirmation prompts.",
 )
-def setup(app_target: str, force: bool) -> None:
+@click.option(
+    "--reset",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="Drop existing setup before running setup (equivalent to running 'cocoindex drop' first).",
+)
+def setup(app_target: str, force: bool, reset: bool) -> None:
     """
     Check and apply backend setup changes for flows, including the internal storage and target (to export to).
@@ -277,6 +320,11 @@ def setup(app_target: str, force: bool) -> None:
     """
     app_ref = _get_app_ref_from_specifier(app_target)
     _load_user_app(app_ref)
+    # If --reset is specified, drop existing setup first
+    if reset:
+        _drop_flows(flow.flows().values(), app_ref=app_ref, force=force)
     _setup_flows(flow.flows().values(), force=force, always_show_setup=True)
@@ -325,30 +373,7 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], force: bool) -> Non
     else:
         flows = flow.flows().values()
-    flow_full_names = ", ".join(fl.full_name for fl in flows)
-    click.echo(
-        f"Preparing to drop specified flows: {flow_full_names} (in '{app_ref}').",
-        err=True,
-    )
-    if not flows:
-        click.echo("No flows identified for the drop operation.")
-        return
-    setup_bundle = flow.make_drop_bundle(flows)
-    description, is_up_to_date = setup_bundle.describe()
-    click.echo(description)
-    if is_up_to_date:
-        click.echo("No flows need to be dropped.")
-        return
-    if not force and not click.confirm(
-        f"\nThis will apply changes to drop setup for: {flow_full_names}. Continue? [yes/N]",
-        default=False,
-        show_default=False,
-    ):
-        click.echo("Drop operation aborted by user.")
-        return
-    setup_bundle.apply(report_to_stdout=True)
+    _drop_flows(flows, app_ref=app_ref, force=force)
 @cli.command()
@@ -375,6 +400,13 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], force: bool) -> Non
     default=False,
     help="Automatically setup backends for the flow if it's not setup yet.",
 )
+@click.option(
+    "--reset",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="Drop existing setup before updating (equivalent to running 'cocoindex drop' first). `--reset` implies `--setup`.",
+)
 @click.option(
     "-f",
     "--force",
@@ -396,6 +428,7 @@ def update(
     live: bool,
     reexport: bool,
     setup: bool,  # pylint: disable=redefined-outer-name
+    reset: bool,
     force: bool,
     quiet: bool,
 ) -> None:
@@ -407,6 +440,13 @@ def update(
     """
     app_ref, flow_name = _parse_app_flow_specifier(app_flow_specifier)
     _load_user_app(app_ref)
+    flow_list = (
+        [flow.flow_by_name(flow_name)] if flow_name else list(flow.flows().values())
+    )
+    # If --reset is specified, drop existing setup first
+    if reset:
+        _drop_flows(flow_list, app_ref=app_ref, force=force)
     if live:
         click.secho(
@@ -419,19 +459,14 @@ def update(
         reexport_targets=reexport,
         print_stats=not quiet,
     )
+    if reset or setup:
+        _setup_flows(flow_list, force=force, quiet=quiet)
     if flow_name is None:
-        if setup:
-            _setup_flows(
-                flow.flows().values(),
-                force=force,
-                quiet=quiet,
-            )
         execution_context.run(_update_all_flows_with_hint_async(options))
     else:
-        fl = flow.flow_by_name(flow_name)
-        if setup:
-            _setup_flows((fl,), force=force, quiet=quiet)
-        with flow.FlowLiveUpdater(fl, options) as updater:
+        assert len(flow_list) == 1
+        with flow.FlowLiveUpdater(flow_list[0], options) as updater:
             updater.wait()
             if options.live_mode:
                 _show_no_live_update_hint()
@@ -529,6 +564,13 @@ def evaluate(
     default=False,
     help="Automatically setup backends for the flow if it's not setup yet.",
 )
+@click.option(
+    "--reset",
+    is_flag=True,
+    show_default=True,
+    default=False,
+    help="Drop existing setup before starting server (equivalent to running 'cocoindex drop' first). `--reset` implies `--setup`.",
+)
 @click.option(
     "--reexport",
     is_flag=True,
@@ -565,6 +607,7 @@ def server(
     address: str | None,
     live_update: bool,
     setup: bool,  # pylint: disable=redefined-outer-name
+    reset: bool,
     reexport: bool,
     force: bool,
     quiet: bool,
@@ -588,11 +631,14 @@ def server(
         cors_cocoindex,
         cors_local,
         live_update,
-        setup,
         reexport,
-        force,
         quiet,
     )
+    kwargs = {
+        "run_reset": reset,
+        "run_setup": setup,
+        "force": force,
+    }
     if reload:
         watch_paths = {os.getcwd()}
@@ -610,6 +656,7 @@ def server(
             *watch_paths,
             target=_reloadable_server_target,
             args=args,
+            kwargs=kwargs,
             watch_filter=watchfiles.PythonFilter(),
             callback=lambda changes: click.secho(
                 f"\nDetected changes in {len(changes)} file(s), reloading server...\n",
@@ -621,12 +668,19 @@ def server(
             "NOTE: Flow code changes will NOT be reflected until you restart to load the new code. Use --reload to enable auto-reload.\n",
             fg="yellow",
         )
-        _run_server(*args)
+        _run_server(*args, **kwargs)
 def _reloadable_server_target(*args: Any, **kwargs: Any) -> None:
     """Reloadable target for the watchfiles process."""
     _initialize_cocoindex_in_process()
+    kwargs["run_setup"] = kwargs["run_setup"] or kwargs["run_reset"]
+    changed_files = json.loads(os.environ.get("WATCHFILES_CHANGES", "[]"))
+    if changed_files:
+        kwargs["run_reset"] = False
+    kwargs["force"] = True
     _run_server(*args, **kwargs)
@@ -637,14 +691,40 @@ def _run_server(
     cors_cocoindex: bool = False,
     cors_local: int | None = None,
     live_update: bool = False,
-    run_setup: bool = False,
     reexport: bool = False,
-    force: bool = False,
     quiet: bool = False,
+    /,
+    *,
+    force: bool = False,
+    run_reset: bool = False,
+    run_setup: bool = False,
 ) -> None:
     """Helper function to run the server with specified settings."""
     _load_user_app(app_ref)
+    # Check if any flows are registered
+    if not flow.flow_names():
+        click.secho(
+            f"\nError: No flows registered in '{app_ref}'.\n",
+            fg="red",
+            bold=True,
+            err=True,
+        )
+        click.secho(
+            "To use CocoIndex server, you need to define at least one flow.",
+            err=True,
+        )
+        click.secho(
+            "See https://cocoindex.io/docs for more information.\n",
+            fg="cyan",
+            err=True,
+        )
+        raise click.Abort()
+    # If --reset is specified, drop existing setup first
+    if run_reset:
+        _drop_flows(flow.flows().values(), app_ref=app_ref, force=force)
     server_settings = setting.ServerSettings.from_env()
     cors_origins: set[str] = set(server_settings.cors_origins or [])
     if cors_origin is not None:
@@ -658,7 +738,7 @@ def _run_server(
     if address is not None:
         server_settings.address = address
-    if run_setup:
+    if run_reset or run_setup:
         _setup_flows(
             flow.flows().values(),
             force=force,

cocoindex/engine_object.py ADDED Viewed

@@ -0,0 +1,272 @@
+"""
+Utilities to dump/load objects (for configs, specs).
+"""
+from __future__ import annotations
+import datetime
+import dataclasses
+from enum import Enum
+from typing import Any, Mapping, TypeVar, overload, get_origin
+import numpy as np
+from .typing import (
+    AnalyzedAnyType,
+    AnalyzedBasicType,
+    AnalyzedDictType,
+    AnalyzedListType,
+    AnalyzedStructType,
+    AnalyzedTypeInfo,
+    AnalyzedUnionType,
+    EnrichedValueType,
+    FieldSchema,
+    analyze_type_info,
+    encode_enriched_type,
+    is_namedtuple_type,
+    is_pydantic_model,
+    extract_ndarray_elem_dtype,
+)
+T = TypeVar("T")
+try:
+    import pydantic, pydantic_core
+except ImportError:
+    pass
+def get_auto_default_for_type(
+    type_info: AnalyzedTypeInfo,
+) -> tuple[Any, bool]:
+    """
+    Get an auto-default value for a type annotation if it's safe to do so.
+    Returns:
+        A tuple of (default_value, is_supported) where:
+        - default_value: The default value if auto-defaulting is supported
+        - is_supported: True if auto-defaulting is supported for this type
+    """
+    # Case 1: Nullable types (Optional[T] or T | None)
+    if type_info.nullable:
+        return None, True
+    # Case 2: Table types (KTable or LTable) - check if it's a list or dict type
+    if isinstance(type_info.variant, AnalyzedListType):
+        return [], True
+    elif isinstance(type_info.variant, AnalyzedDictType):
+        return {}, True
+    return None, False
+def dump_engine_object(v: Any) -> Any:
+    """Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
+    if v is None:
+        return None
+    elif isinstance(v, EnrichedValueType):
+        return v.encode()
+    elif isinstance(v, FieldSchema):
+        return v.encode()
+    elif isinstance(v, type) or get_origin(v) is not None:
+        return encode_enriched_type(v)
+    elif isinstance(v, Enum):
+        return v.value
+    elif isinstance(v, datetime.timedelta):
+        total_secs = v.total_seconds()
+        secs = int(total_secs)
+        nanos = int((total_secs - secs) * 1e9)
+        return {"secs": secs, "nanos": nanos}
+    elif is_namedtuple_type(type(v)):
+        # Handle NamedTuple objects specifically to use dict format
+        field_names = list(getattr(type(v), "_fields", ()))
+        result = {}
+        for name in field_names:
+            val = getattr(v, name)
+            result[name] = dump_engine_object(val)  # Include all values, including None
+        if hasattr(v, "kind") and "kind" not in result:
+            result["kind"] = v.kind
+        return result
+    elif hasattr(v, "__dict__"):  # for dataclass-like objects
+        s = {}
+        for k, val in v.__dict__.items():
+            if val is None:
+                # Skip None values
+                continue
+            s[k] = dump_engine_object(val)
+        if hasattr(v, "kind") and "kind" not in s:
+            s["kind"] = v.kind
+        return s
+    elif isinstance(v, (list, tuple)):
+        return [dump_engine_object(item) for item in v]
+    elif isinstance(v, np.ndarray):
+        return v.tolist()
+    elif isinstance(v, dict):
+        return {k: dump_engine_object(v) for k, v in v.items()}
+    return v
+@overload
+def load_engine_object(expected_type: type[T], v: Any) -> T: ...
+@overload
+def load_engine_object(expected_type: Any, v: Any) -> Any: ...
+def load_engine_object(expected_type: Any, v: Any) -> Any:
+    """Recursively load an object that was produced by dump_engine_object().
+    Args:
+        expected_type: The Python type annotation to reconstruct to.
+        v: The engine-facing Pythonized object (e.g., dict/list/primitive) to convert.
+    Returns:
+        A Python object matching the expected_type where possible.
+    """
+    # Fast path
+    if v is None:
+        return None
+    type_info = analyze_type_info(expected_type)
+    variant = type_info.variant
+    if type_info.core_type is EnrichedValueType:
+        return EnrichedValueType.decode(v)
+    if type_info.core_type is FieldSchema:
+        return FieldSchema.decode(v)
+    # Any or unknown → return as-is
+    if isinstance(variant, AnalyzedAnyType) or type_info.base_type is Any:
+        return v
+    # Enum handling
+    if isinstance(expected_type, type) and issubclass(expected_type, Enum):
+        return expected_type(v)
+    # TimeDelta special form {secs, nanos}
+    if isinstance(variant, AnalyzedBasicType) and variant.kind == "TimeDelta":
+        if isinstance(v, Mapping) and "secs" in v and "nanos" in v:
+            secs = int(v["secs"])  # type: ignore[index]
+            nanos = int(v["nanos"])  # type: ignore[index]
+            return datetime.timedelta(seconds=secs, microseconds=nanos / 1_000)
+        return v
+    # List, NDArray (Vector-ish), or general sequences
+    if isinstance(variant, AnalyzedListType):
+        elem_type = variant.elem_type if variant.elem_type else Any
+        if type_info.base_type is np.ndarray:
+            # Reconstruct NDArray with appropriate dtype if available
+            try:
+                dtype = extract_ndarray_elem_dtype(type_info.core_type)
+            except (TypeError, ValueError, AttributeError):
+                dtype = None
+            return np.array(v, dtype=dtype)
+        # Regular Python list
+        return [load_engine_object(elem_type, item) for item in v]
+    # Dict / Mapping
+    if isinstance(variant, AnalyzedDictType):
+        key_t = variant.key_type
+        val_t = variant.value_type
+        return {
+            load_engine_object(key_t, k): load_engine_object(val_t, val)
+            for k, val in v.items()
+        }
+    # Structs (dataclass, NamedTuple, or Pydantic)
+    if isinstance(variant, AnalyzedStructType):
+        struct_type = variant.struct_type
+        init_kwargs: dict[str, Any] = {}
+        missing_fields: list[tuple[str, Any]] = []
+        if dataclasses.is_dataclass(struct_type):
+            if not isinstance(v, Mapping):
+                raise ValueError(f"Expected dict for dataclass, got {type(v)}")
+            for dc_field in dataclasses.fields(struct_type):
+                if dc_field.name in v:
+                    init_kwargs[dc_field.name] = load_engine_object(
+                        dc_field.type, v[dc_field.name]
+                    )
+                else:
+                    if (
+                        dc_field.default is dataclasses.MISSING
+                        and dc_field.default_factory is dataclasses.MISSING
+                    ):
+                        missing_fields.append((dc_field.name, dc_field.type))
+        elif is_namedtuple_type(struct_type):
+            if not isinstance(v, Mapping):
+                raise ValueError(f"Expected dict for NamedTuple, got {type(v)}")
+            # Dict format (from dump/load functions)
+            annotations = getattr(struct_type, "__annotations__", {})
+            field_names = list(getattr(struct_type, "_fields", ()))
+            field_defaults = getattr(struct_type, "_field_defaults", {})
+            for name in field_names:
+                f_type = annotations.get(name, Any)
+                if name in v:
+                    init_kwargs[name] = load_engine_object(f_type, v[name])
+                elif name not in field_defaults:
+                    missing_fields.append((name, f_type))
+        elif is_pydantic_model(struct_type):
+            if not isinstance(v, Mapping):
+                raise ValueError(f"Expected dict for Pydantic model, got {type(v)}")
+            model_fields: dict[str, pydantic.fields.FieldInfo]
+            if hasattr(struct_type, "model_fields"):
+                model_fields = struct_type.model_fields  # type: ignore[attr-defined]
+            else:
+                model_fields = {}
+            for name, pyd_field in model_fields.items():
+                if name in v:
+                    init_kwargs[name] = load_engine_object(
+                        pyd_field.annotation, v[name]
+                    )
+                elif (
+                    getattr(pyd_field, "default", pydantic_core.PydanticUndefined)
+                    is pydantic_core.PydanticUndefined
+                    and getattr(pyd_field, "default_factory") is None
+                ):
+                    missing_fields.append((name, pyd_field.annotation))
+        else:
+            assert False, "Unsupported struct type"
+        for name, f_type in missing_fields:
+            type_info = analyze_type_info(f_type)
+            auto_default, is_supported = get_auto_default_for_type(type_info)
+            if is_supported:
+                init_kwargs[name] = auto_default
+        return struct_type(**init_kwargs)
+    # Union with discriminator support via "kind"
+    if isinstance(variant, AnalyzedUnionType):
+        if isinstance(v, Mapping) and "kind" in v:
+            discriminator = v["kind"]
+            for typ in variant.variant_types:
+                t_info = analyze_type_info(typ)
+                if isinstance(t_info.variant, AnalyzedStructType):
+                    t_struct = t_info.variant.struct_type
+                    candidate_kind = getattr(t_struct, "kind", None)
+                    if candidate_kind == discriminator:
+                        # Remove discriminator for constructor
+                        v_wo_kind = dict(v)
+                        v_wo_kind.pop("kind", None)
+                        return load_engine_object(t_struct, v_wo_kind)
+        # Fallback: try each variant until one succeeds
+        for typ in variant.variant_types:
+            try:
+                return load_engine_object(typ, v)
+            except (TypeError, ValueError):
+                continue
+        return v
+    # Basic types and everything else: handle numpy scalars and passthrough
+    if isinstance(v, np.ndarray) and type_info.base_type is list:
+        return v.tolist()
+    if isinstance(v, (list, tuple)) and type_info.base_type not in (list, tuple):
+        # If a non-sequence basic type expected, attempt direct cast
+        try:
+            return type_info.core_type(v)
+        except (TypeError, ValueError):
+            return v
+    return v