PyPI - cocoindex - Versions diffs - 0.1.52__cp311-cp311-win_amd64.whl → 0.1.54__cp311-cp311-win_amd64.whl - Mend

cocoindex 0.1.52__cp311-cp311-win_amd64.whl → 0.1.54__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

cocoindex/_engine.cp311-win_amd64.pyd +0 -0
cocoindex/cli.py +6 -6
cocoindex/convert.py +93 -46
cocoindex/flow.py +3 -2
cocoindex/functions.py +10 -0
cocoindex/llm.py +3 -0
cocoindex/tests/__init__.py +0 -1
cocoindex/tests/test_convert.py +289 -58
cocoindex/tests/test_typing.py +115 -77
cocoindex/typing.py +76 -64
{cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/METADATA +11 -10
cocoindex-0.1.54.dist-info/RECORD +28 -0
cocoindex-0.1.52.dist-info/RECORD +0 -28
{cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/WHEEL +0 -0
{cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/entry_points.txt +0 -0
{cocoindex-0.1.52.dist-info → cocoindex-0.1.54.dist-info}/licenses/LICENSE +0 -0

cocoindex/tests/test_convert.py CHANGED Viewed

@@ -1,22 +1,25 @@
-import uuid
 import datetime
+import uuid
 from dataclasses import dataclass, make_dataclass
-from typing import NamedTuple, Literal, Any, Callable, Union
+from typing import Annotated, Any, Callable, Literal, NamedTuple
+import numpy as np
 import pytest
+from numpy.typing import NDArray
 import cocoindex
-from cocoindex.typing import (
-    encode_enriched_type,
-    Vector,
-    Float32,
-    Float64,
-)
 from cocoindex.convert import (
+    dump_engine_object,
     encode_engine_value,
     make_engine_value_decoder,
-    dump_engine_object,
 )
-import numpy as np
-from numpy.typing import NDArray
+from cocoindex.typing import (
+    Float32,
+    Float64,
+    TypeKind,
+    Vector,
+    encode_enriched_type,
+)
 @dataclass
@@ -88,23 +91,26 @@ def validate_full_roundtrip(
     """
     from cocoindex import _engine  # type: ignore
+    def eq(a: Any, b: Any) -> bool:
+        if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
+            return np.array_equal(a, b)
+        return type(a) == type(b) and not not (a == b)
     encoded_value = encode_engine_value(value)
     value_type = value_type or type(value)
     encoded_output_type = encode_enriched_type(value_type)["type"]
     value_from_engine = _engine.testutil.seder_roundtrip(
         encoded_value, encoded_output_type
     )
-    decoded_value = build_engine_value_decoder(value_type, value_type)(
-        value_from_engine
-    )
-    np.testing.assert_array_equal(decoded_value, value)
+    decoder = make_engine_value_decoder([], encoded_output_type, value_type)
+    decoded_value = decoder(value_from_engine)
+    assert eq(decoded_value, value)
     if other_decoded_values is not None:
         for other_value, other_type in other_decoded_values:
-            other_decoded_value = build_engine_value_decoder(other_type, other_type)(
-                value_from_engine
-            )
-            np.testing.assert_array_equal(other_decoded_value, other_value)
+            decoder = make_engine_value_decoder([], encoded_output_type, other_type)
+            other_decoded_value = decoder(value_from_engine)
+            assert eq(other_decoded_value, other_value)
 def test_encode_engine_value_basic_types() -> None:
@@ -128,6 +134,19 @@ def test_encode_engine_value_date_time_types() -> None:
     assert encode_engine_value(dt) == dt
+def test_encode_scalar_numpy_values() -> None:
+    """Test encoding scalar NumPy values to engine-compatible values."""
+    test_cases = [
+        (np.int64(42), 42),
+        (np.float32(3.14), pytest.approx(3.14)),
+        (np.float64(2.718), pytest.approx(2.718)),
+    ]
+    for np_value, expected in test_cases:
+        encoded = encode_engine_value(np_value)
+        assert encoded == expected
+        assert isinstance(encoded, (int, float))
 def test_encode_engine_value_struct() -> None:
     order = Order(order_id="O123", name="mixed nuts", price=25.0)
     assert encode_engine_value(order) == ["O123", "mixed nuts", 25.0, "default_extra"]
@@ -199,18 +218,78 @@ def test_encode_engine_value_none() -> None:
 def test_roundtrip_basic_types() -> None:
-    validate_full_roundtrip(42, int)
+    validate_full_roundtrip(42, int, (42, None))
     validate_full_roundtrip(3.25, float, (3.25, Float64))
-    validate_full_roundtrip(3.25, Float64, (3.25, float))
-    validate_full_roundtrip(3.25, Float32)
-    validate_full_roundtrip("hello", str)
-    validate_full_roundtrip(True, bool)
-    validate_full_roundtrip(False, bool)
-    validate_full_roundtrip(datetime.date(2025, 1, 1), datetime.date)
-    validate_full_roundtrip(datetime.datetime.now(), cocoindex.LocalDateTime)
     validate_full_roundtrip(
-        datetime.datetime.now(datetime.UTC), cocoindex.OffsetDateTime
+        3.25, Float64, (3.25, float), (np.float64(3.25), np.float64)
     )
+    validate_full_roundtrip(
+        3.25, Float32, (3.25, float), (np.float32(3.25), np.float32)
+    )
+    validate_full_roundtrip("hello", str, ("hello", None))
+    validate_full_roundtrip(True, bool, (True, None))
+    validate_full_roundtrip(False, bool, (False, None))
+    validate_full_roundtrip(
+        datetime.date(2025, 1, 1), datetime.date, (datetime.date(2025, 1, 1), None)
+    )
+    validate_full_roundtrip(
+        datetime.datetime(2025, 1, 2, 3, 4, 5, 123456),
+        cocoindex.LocalDateTime,
+        (datetime.datetime(2025, 1, 2, 3, 4, 5, 123456), datetime.datetime),
+    )
+    validate_full_roundtrip(
+        datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
+        cocoindex.OffsetDateTime,
+        (
+            datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
+            datetime.datetime,
+        ),
+    )
+    uuid_value = uuid.uuid4()
+    validate_full_roundtrip(uuid_value, uuid.UUID, (uuid_value, None))
+def test_decode_scalar_numpy_values() -> None:
+    test_cases = [
+        ({"kind": "Int64"}, np.int64, 42, np.int64(42)),
+        ({"kind": "Float32"}, np.float32, 3.14, np.float32(3.14)),
+        ({"kind": "Float64"}, np.float64, 2.718, np.float64(2.718)),
+    ]
+    for src_type, dst_type, input_value, expected in test_cases:
+        decoder = make_engine_value_decoder(["field"], src_type, dst_type)
+        result = decoder(input_value)
+        assert isinstance(result, dst_type)
+        assert result == expected
+def test_non_ndarray_vector_decoding() -> None:
+    # Test list[np.float64]
+    src_type = {
+        "kind": "Vector",
+        "element_type": {"kind": "Float64"},
+        "dimension": None,
+    }
+    dst_type_float = list[np.float64]
+    decoder = make_engine_value_decoder(["field"], src_type, dst_type_float)
+    input_numbers = [1.0, 2.0, 3.0]
+    result = decoder(input_numbers)
+    assert isinstance(result, list)
+    assert all(isinstance(x, np.float64) for x in result)
+    assert result == [np.float64(1.0), np.float64(2.0), np.float64(3.0)]
+    # Test list[Uuid]
+    src_type = {"kind": "Vector", "element_type": {"kind": "Uuid"}, "dimension": None}
+    dst_type_uuid = list[uuid.UUID]
+    decoder = make_engine_value_decoder(["field"], src_type, dst_type_uuid)
+    uuid1 = uuid.uuid4()
+    uuid2 = uuid.uuid4()
+    input_bytes = [uuid1.bytes, uuid2.bytes]
+    result = decoder(input_bytes)
+    assert isinstance(result, list)
+    assert all(isinstance(x, uuid.UUID) for x in result)
+    assert result == [uuid1, uuid2]
 @pytest.mark.parametrize(
@@ -492,6 +571,48 @@ def test_field_position_cases(
     assert decoder(engine_val) == PythonOrder(**expected_dict)
+def test_roundtrip_union_simple() -> None:
+    t = int | str | float
+    value = 10.4
+    validate_full_roundtrip(value, t)
+def test_roundtrip_union_with_active_uuid() -> None:
+    t = str | uuid.UUID | int
+    value = uuid.uuid4().bytes
+    validate_full_roundtrip(value, t)
+def test_roundtrip_union_with_inactive_uuid() -> None:
+    t = str | uuid.UUID | int
+    value = "5a9f8f6a-318f-4f1f-929d-566d7444a62d"  # it's a string
+    validate_full_roundtrip(value, t)
+def test_roundtrip_union_offset_datetime() -> None:
+    t = str | uuid.UUID | float | int | datetime.datetime
+    value = datetime.datetime.now(datetime.UTC)
+    validate_full_roundtrip(value, t)
+def test_roundtrip_union_date() -> None:
+    t = str | uuid.UUID | float | int | datetime.date
+    value = datetime.date.today()
+    validate_full_roundtrip(value, t)
+def test_roundtrip_union_time() -> None:
+    t = str | uuid.UUID | float | int | datetime.time
+    value = datetime.time()
+    validate_full_roundtrip(value, t)
+def test_roundtrip_union_timedelta() -> None:
+    t = str | uuid.UUID | float | int | datetime.timedelta
+    value = datetime.timedelta(hours=39, minutes=10, seconds=1)
+    validate_full_roundtrip(value, t)
 def test_roundtrip_ltable() -> None:
     t = list[Order]
     value = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)]
@@ -565,12 +686,6 @@ Float64VectorTypeNoDim = Vector[np.float64]
 Float32VectorType = Vector[np.float32, Literal[3]]
 Float64VectorType = Vector[np.float64, Literal[3]]
 Int64VectorType = Vector[np.int64, Literal[3]]
-Int32VectorType = Vector[np.int32, Literal[3]]
-UInt8VectorType = Vector[np.uint8, Literal[3]]
-UInt16VectorType = Vector[np.uint16, Literal[3]]
-UInt32VectorType = Vector[np.uint32, Literal[3]]
-UInt64VectorType = Vector[np.uint64, Literal[3]]
-StrVectorType = Vector[str]
 NDArrayFloat32Type = NDArray[np.float32]
 NDArrayFloat64Type = NDArray[np.float64]
 NDArrayInt64Type = NDArray[np.int64]
@@ -756,37 +871,72 @@ def test_dump_vector_type_annotation_no_dim() -> None:
 def test_full_roundtrip_vector_numeric_types() -> None:
     """Test full roundtrip for numeric vector types using NDArray."""
-    value_f32: Vector[np.float32, Literal[3]] = np.array(
-        [1.0, 2.0, 3.0], dtype=np.float32
+    value_f32 = np.array([1.0, 2.0, 3.0], dtype=np.float32)
+    validate_full_roundtrip(
+        value_f32,
+        Vector[np.float32, Literal[3]],
+        ([np.float32(1.0), np.float32(2.0), np.float32(3.0)], list[np.float32]),
+        ([1.0, 2.0, 3.0], list[cocoindex.Float32]),
+        ([1.0, 2.0, 3.0], list[float]),
     )
-    validate_full_roundtrip(value_f32, Vector[np.float32, Literal[3]])
-    value_f64: Vector[np.float64, Literal[3]] = np.array(
-        [1.0, 2.0, 3.0], dtype=np.float64
+    validate_full_roundtrip(
+        value_f32,
+        np.typing.NDArray[np.float32],
+        ([np.float32(1.0), np.float32(2.0), np.float32(3.0)], list[np.float32]),
+        ([1.0, 2.0, 3.0], list[cocoindex.Float32]),
+        ([1.0, 2.0, 3.0], list[float]),
+    )
+    validate_full_roundtrip(
+        value_f32.tolist(),
+        list[np.float32],
+        (value_f32, Vector[np.float32, Literal[3]]),
+        ([1.0, 2.0, 3.0], list[cocoindex.Float32]),
+        ([1.0, 2.0, 3.0], list[float]),
+    )
+    value_f64 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+    validate_full_roundtrip(
+        value_f64,
+        Vector[np.float64, Literal[3]],
+        ([np.float64(1.0), np.float64(2.0), np.float64(3.0)], list[np.float64]),
+        ([1.0, 2.0, 3.0], list[cocoindex.Float64]),
+        ([1.0, 2.0, 3.0], list[float]),
+    )
+    value_i64 = np.array([1, 2, 3], dtype=np.int64)
+    validate_full_roundtrip(
+        value_i64,
+        Vector[np.int64, Literal[3]],
+        ([np.int64(1), np.int64(2), np.int64(3)], list[np.int64]),
+        ([1, 2, 3], list[int]),
     )
-    validate_full_roundtrip(value_f64, Vector[np.float64, Literal[3]])
-    value_i64: Vector[np.int64, Literal[3]] = np.array([1, 2, 3], dtype=np.int64)
-    validate_full_roundtrip(value_i64, Vector[np.int64, Literal[3]])
-    value_i32: Vector[np.int32, Literal[3]] = np.array([1, 2, 3], dtype=np.int32)
-    with pytest.raises(ValueError, match="type unsupported yet"):
+    value_i32 = np.array([1, 2, 3], dtype=np.int32)
+    with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
         validate_full_roundtrip(value_i32, Vector[np.int32, Literal[3]])
-    value_u8: Vector[np.uint8, Literal[3]] = np.array([1, 2, 3], dtype=np.uint8)
-    with pytest.raises(ValueError, match="type unsupported yet"):
+    value_u8 = np.array([1, 2, 3], dtype=np.uint8)
+    with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
         validate_full_roundtrip(value_u8, Vector[np.uint8, Literal[3]])
-    value_u16: Vector[np.uint16, Literal[3]] = np.array([1, 2, 3], dtype=np.uint16)
-    with pytest.raises(ValueError, match="type unsupported yet"):
+    value_u16 = np.array([1, 2, 3], dtype=np.uint16)
+    with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
         validate_full_roundtrip(value_u16, Vector[np.uint16, Literal[3]])
-    value_u32: Vector[np.uint32, Literal[3]] = np.array([1, 2, 3], dtype=np.uint32)
-    with pytest.raises(ValueError, match="type unsupported yet"):
+    value_u32 = np.array([1, 2, 3], dtype=np.uint32)
+    with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
         validate_full_roundtrip(value_u32, Vector[np.uint32, Literal[3]])
-    value_u64: Vector[np.uint64, Literal[3]] = np.array([1, 2, 3], dtype=np.uint64)
-    with pytest.raises(ValueError, match="type unsupported yet"):
+    value_u64 = np.array([1, 2, 3], dtype=np.uint64)
+    with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
         validate_full_roundtrip(value_u64, Vector[np.uint64, Literal[3]])
 def test_roundtrip_vector_no_dimension() -> None:
     """Test full roundtrip for vector types without dimension annotation."""
-    value_f64: Vector[np.float64] = np.array([1.0, 2.0, 3.0], dtype=np.float64)
-    validate_full_roundtrip(value_f64, Vector[np.float64])
+    value_f64 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+    validate_full_roundtrip(
+        value_f64,
+        Vector[np.float64],
+        ([1.0, 2.0, 3.0], list[float]),
+        (np.array([1.0, 2.0, 3.0], dtype=np.float64), np.typing.NDArray[np.float64]),
+    )
 def test_roundtrip_string_vector() -> None:
@@ -808,7 +958,88 @@ def test_roundtrip_dimension_mismatch() -> None:
         validate_full_roundtrip(value_f32, Vector[np.float32, Literal[3]])
-def test_roundtrip_list_backward_compatibility() -> None:
-    """Test full roundtrip for list-based vectors for backward compatibility."""
-    value_list: list[int] = [1, 2, 3]
-    validate_full_roundtrip(value_list, list[int])
+def test_full_roundtrip_scalar_numeric_types() -> None:
+    """Test full roundtrip for scalar NumPy numeric types."""
+    # Test supported scalar types
+    validate_full_roundtrip(np.int64(42), np.int64, (42, int))
+    validate_full_roundtrip(np.float32(3.25), np.float32, (3.25, cocoindex.Float32))
+    validate_full_roundtrip(np.float64(3.25), np.float64, (3.25, cocoindex.Float64))
+    # Test unsupported scalar types
+    for unsupported_type in [np.int32, np.uint8, np.uint16, np.uint32, np.uint64]:
+        with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
+            validate_full_roundtrip(unsupported_type(1), unsupported_type)
+def test_full_roundtrip_nullable_scalar() -> None:
+    """Test full roundtrip for nullable scalar NumPy types."""
+    # Test with non-null values
+    validate_full_roundtrip(np.int64(42), np.int64 | None)
+    validate_full_roundtrip(np.float32(3.14), np.float32 | None)
+    validate_full_roundtrip(np.float64(2.718), np.float64 | None)
+    # Test with None
+    validate_full_roundtrip(None, np.int64 | None)
+    validate_full_roundtrip(None, np.float32 | None)
+    validate_full_roundtrip(None, np.float64 | None)
+def test_full_roundtrip_scalar_in_struct() -> None:
+    """Test full roundtrip for scalar NumPy types in a dataclass."""
+    @dataclass
+    class NumericStruct:
+        int_field: np.int64
+        float32_field: np.float32
+        float64_field: np.float64
+    instance = NumericStruct(
+        int_field=np.int64(42),
+        float32_field=np.float32(3.14),
+        float64_field=np.float64(2.718),
+    )
+    validate_full_roundtrip(instance, NumericStruct)
+def test_full_roundtrip_scalar_in_nested_struct() -> None:
+    """Test full roundtrip for scalar NumPy types in a nested struct."""
+    @dataclass
+    class InnerStruct:
+        value: np.float64
+    @dataclass
+    class OuterStruct:
+        inner: InnerStruct
+        count: np.int64
+    instance = OuterStruct(
+        inner=InnerStruct(value=np.float64(2.718)),
+        count=np.int64(1),
+    )
+    validate_full_roundtrip(instance, OuterStruct)
+def test_full_roundtrip_scalar_with_python_types() -> None:
+    """Test full roundtrip for structs mixing NumPy and Python scalar types."""
+    @dataclass
+    class MixedStruct:
+        numpy_int: np.int64
+        python_int: int
+        numpy_float: np.float64
+        python_float: float
+        string: str
+        annotated_int: Annotated[np.int64, TypeKind("int")]
+        annotated_float: Float32
+    instance = MixedStruct(
+        numpy_int=np.int64(42),
+        python_int=43,
+        numpy_float=np.float64(2.718),
+        python_float=3.14,
+        string="hello, world",
+        annotated_int=np.int64(42),
+        annotated_float=2.0,
+    )
+    validate_full_roundtrip(instance, MixedStruct)