cocoindex 0.1.52__cp311-cp311-win_amd64.whl → 0.1.54__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,25 @@
1
- import uuid
2
1
  import datetime
2
+ import uuid
3
3
  from dataclasses import dataclass, make_dataclass
4
- from typing import NamedTuple, Literal, Any, Callable, Union
4
+ from typing import Annotated, Any, Callable, Literal, NamedTuple
5
+
6
+ import numpy as np
5
7
  import pytest
8
+ from numpy.typing import NDArray
9
+
6
10
  import cocoindex
7
- from cocoindex.typing import (
8
- encode_enriched_type,
9
- Vector,
10
- Float32,
11
- Float64,
12
- )
13
11
  from cocoindex.convert import (
12
+ dump_engine_object,
14
13
  encode_engine_value,
15
14
  make_engine_value_decoder,
16
- dump_engine_object,
17
15
  )
18
- import numpy as np
19
- from numpy.typing import NDArray
16
+ from cocoindex.typing import (
17
+ Float32,
18
+ Float64,
19
+ TypeKind,
20
+ Vector,
21
+ encode_enriched_type,
22
+ )
20
23
 
21
24
 
22
25
  @dataclass
@@ -88,23 +91,26 @@ def validate_full_roundtrip(
88
91
  """
89
92
  from cocoindex import _engine # type: ignore
90
93
 
94
+ def eq(a: Any, b: Any) -> bool:
95
+ if isinstance(a, np.ndarray) and isinstance(b, np.ndarray):
96
+ return np.array_equal(a, b)
97
+ return type(a) == type(b) and not not (a == b)
98
+
91
99
  encoded_value = encode_engine_value(value)
92
100
  value_type = value_type or type(value)
93
101
  encoded_output_type = encode_enriched_type(value_type)["type"]
94
102
  value_from_engine = _engine.testutil.seder_roundtrip(
95
103
  encoded_value, encoded_output_type
96
104
  )
97
- decoded_value = build_engine_value_decoder(value_type, value_type)(
98
- value_from_engine
99
- )
100
- np.testing.assert_array_equal(decoded_value, value)
105
+ decoder = make_engine_value_decoder([], encoded_output_type, value_type)
106
+ decoded_value = decoder(value_from_engine)
107
+ assert eq(decoded_value, value)
101
108
 
102
109
  if other_decoded_values is not None:
103
110
  for other_value, other_type in other_decoded_values:
104
- other_decoded_value = build_engine_value_decoder(other_type, other_type)(
105
- value_from_engine
106
- )
107
- np.testing.assert_array_equal(other_decoded_value, other_value)
111
+ decoder = make_engine_value_decoder([], encoded_output_type, other_type)
112
+ other_decoded_value = decoder(value_from_engine)
113
+ assert eq(other_decoded_value, other_value)
108
114
 
109
115
 
110
116
  def test_encode_engine_value_basic_types() -> None:
@@ -128,6 +134,19 @@ def test_encode_engine_value_date_time_types() -> None:
128
134
  assert encode_engine_value(dt) == dt
129
135
 
130
136
 
137
+ def test_encode_scalar_numpy_values() -> None:
138
+ """Test encoding scalar NumPy values to engine-compatible values."""
139
+ test_cases = [
140
+ (np.int64(42), 42),
141
+ (np.float32(3.14), pytest.approx(3.14)),
142
+ (np.float64(2.718), pytest.approx(2.718)),
143
+ ]
144
+ for np_value, expected in test_cases:
145
+ encoded = encode_engine_value(np_value)
146
+ assert encoded == expected
147
+ assert isinstance(encoded, (int, float))
148
+
149
+
131
150
  def test_encode_engine_value_struct() -> None:
132
151
  order = Order(order_id="O123", name="mixed nuts", price=25.0)
133
152
  assert encode_engine_value(order) == ["O123", "mixed nuts", 25.0, "default_extra"]
@@ -199,18 +218,78 @@ def test_encode_engine_value_none() -> None:
199
218
 
200
219
 
201
220
  def test_roundtrip_basic_types() -> None:
202
- validate_full_roundtrip(42, int)
221
+ validate_full_roundtrip(42, int, (42, None))
203
222
  validate_full_roundtrip(3.25, float, (3.25, Float64))
204
- validate_full_roundtrip(3.25, Float64, (3.25, float))
205
- validate_full_roundtrip(3.25, Float32)
206
- validate_full_roundtrip("hello", str)
207
- validate_full_roundtrip(True, bool)
208
- validate_full_roundtrip(False, bool)
209
- validate_full_roundtrip(datetime.date(2025, 1, 1), datetime.date)
210
- validate_full_roundtrip(datetime.datetime.now(), cocoindex.LocalDateTime)
211
223
  validate_full_roundtrip(
212
- datetime.datetime.now(datetime.UTC), cocoindex.OffsetDateTime
224
+ 3.25, Float64, (3.25, float), (np.float64(3.25), np.float64)
213
225
  )
226
+ validate_full_roundtrip(
227
+ 3.25, Float32, (3.25, float), (np.float32(3.25), np.float32)
228
+ )
229
+ validate_full_roundtrip("hello", str, ("hello", None))
230
+ validate_full_roundtrip(True, bool, (True, None))
231
+ validate_full_roundtrip(False, bool, (False, None))
232
+ validate_full_roundtrip(
233
+ datetime.date(2025, 1, 1), datetime.date, (datetime.date(2025, 1, 1), None)
234
+ )
235
+
236
+ validate_full_roundtrip(
237
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456),
238
+ cocoindex.LocalDateTime,
239
+ (datetime.datetime(2025, 1, 2, 3, 4, 5, 123456), datetime.datetime),
240
+ )
241
+ validate_full_roundtrip(
242
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
243
+ cocoindex.OffsetDateTime,
244
+ (
245
+ datetime.datetime(2025, 1, 2, 3, 4, 5, 123456, datetime.UTC),
246
+ datetime.datetime,
247
+ ),
248
+ )
249
+
250
+ uuid_value = uuid.uuid4()
251
+ validate_full_roundtrip(uuid_value, uuid.UUID, (uuid_value, None))
252
+
253
+
254
+ def test_decode_scalar_numpy_values() -> None:
255
+ test_cases = [
256
+ ({"kind": "Int64"}, np.int64, 42, np.int64(42)),
257
+ ({"kind": "Float32"}, np.float32, 3.14, np.float32(3.14)),
258
+ ({"kind": "Float64"}, np.float64, 2.718, np.float64(2.718)),
259
+ ]
260
+ for src_type, dst_type, input_value, expected in test_cases:
261
+ decoder = make_engine_value_decoder(["field"], src_type, dst_type)
262
+ result = decoder(input_value)
263
+ assert isinstance(result, dst_type)
264
+ assert result == expected
265
+
266
+
267
+ def test_non_ndarray_vector_decoding() -> None:
268
+ # Test list[np.float64]
269
+ src_type = {
270
+ "kind": "Vector",
271
+ "element_type": {"kind": "Float64"},
272
+ "dimension": None,
273
+ }
274
+ dst_type_float = list[np.float64]
275
+ decoder = make_engine_value_decoder(["field"], src_type, dst_type_float)
276
+ input_numbers = [1.0, 2.0, 3.0]
277
+ result = decoder(input_numbers)
278
+ assert isinstance(result, list)
279
+ assert all(isinstance(x, np.float64) for x in result)
280
+ assert result == [np.float64(1.0), np.float64(2.0), np.float64(3.0)]
281
+
282
+ # Test list[Uuid]
283
+ src_type = {"kind": "Vector", "element_type": {"kind": "Uuid"}, "dimension": None}
284
+ dst_type_uuid = list[uuid.UUID]
285
+ decoder = make_engine_value_decoder(["field"], src_type, dst_type_uuid)
286
+ uuid1 = uuid.uuid4()
287
+ uuid2 = uuid.uuid4()
288
+ input_bytes = [uuid1.bytes, uuid2.bytes]
289
+ result = decoder(input_bytes)
290
+ assert isinstance(result, list)
291
+ assert all(isinstance(x, uuid.UUID) for x in result)
292
+ assert result == [uuid1, uuid2]
214
293
 
215
294
 
216
295
  @pytest.mark.parametrize(
@@ -492,6 +571,48 @@ def test_field_position_cases(
492
571
  assert decoder(engine_val) == PythonOrder(**expected_dict)
493
572
 
494
573
 
574
+ def test_roundtrip_union_simple() -> None:
575
+ t = int | str | float
576
+ value = 10.4
577
+ validate_full_roundtrip(value, t)
578
+
579
+
580
+ def test_roundtrip_union_with_active_uuid() -> None:
581
+ t = str | uuid.UUID | int
582
+ value = uuid.uuid4().bytes
583
+ validate_full_roundtrip(value, t)
584
+
585
+
586
+ def test_roundtrip_union_with_inactive_uuid() -> None:
587
+ t = str | uuid.UUID | int
588
+ value = "5a9f8f6a-318f-4f1f-929d-566d7444a62d" # it's a string
589
+ validate_full_roundtrip(value, t)
590
+
591
+
592
+ def test_roundtrip_union_offset_datetime() -> None:
593
+ t = str | uuid.UUID | float | int | datetime.datetime
594
+ value = datetime.datetime.now(datetime.UTC)
595
+ validate_full_roundtrip(value, t)
596
+
597
+
598
+ def test_roundtrip_union_date() -> None:
599
+ t = str | uuid.UUID | float | int | datetime.date
600
+ value = datetime.date.today()
601
+ validate_full_roundtrip(value, t)
602
+
603
+
604
+ def test_roundtrip_union_time() -> None:
605
+ t = str | uuid.UUID | float | int | datetime.time
606
+ value = datetime.time()
607
+ validate_full_roundtrip(value, t)
608
+
609
+
610
+ def test_roundtrip_union_timedelta() -> None:
611
+ t = str | uuid.UUID | float | int | datetime.timedelta
612
+ value = datetime.timedelta(hours=39, minutes=10, seconds=1)
613
+ validate_full_roundtrip(value, t)
614
+
615
+
495
616
  def test_roundtrip_ltable() -> None:
496
617
  t = list[Order]
497
618
  value = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)]
@@ -565,12 +686,6 @@ Float64VectorTypeNoDim = Vector[np.float64]
565
686
  Float32VectorType = Vector[np.float32, Literal[3]]
566
687
  Float64VectorType = Vector[np.float64, Literal[3]]
567
688
  Int64VectorType = Vector[np.int64, Literal[3]]
568
- Int32VectorType = Vector[np.int32, Literal[3]]
569
- UInt8VectorType = Vector[np.uint8, Literal[3]]
570
- UInt16VectorType = Vector[np.uint16, Literal[3]]
571
- UInt32VectorType = Vector[np.uint32, Literal[3]]
572
- UInt64VectorType = Vector[np.uint64, Literal[3]]
573
- StrVectorType = Vector[str]
574
689
  NDArrayFloat32Type = NDArray[np.float32]
575
690
  NDArrayFloat64Type = NDArray[np.float64]
576
691
  NDArrayInt64Type = NDArray[np.int64]
@@ -756,37 +871,72 @@ def test_dump_vector_type_annotation_no_dim() -> None:
756
871
 
757
872
  def test_full_roundtrip_vector_numeric_types() -> None:
758
873
  """Test full roundtrip for numeric vector types using NDArray."""
759
- value_f32: Vector[np.float32, Literal[3]] = np.array(
760
- [1.0, 2.0, 3.0], dtype=np.float32
874
+ value_f32 = np.array([1.0, 2.0, 3.0], dtype=np.float32)
875
+ validate_full_roundtrip(
876
+ value_f32,
877
+ Vector[np.float32, Literal[3]],
878
+ ([np.float32(1.0), np.float32(2.0), np.float32(3.0)], list[np.float32]),
879
+ ([1.0, 2.0, 3.0], list[cocoindex.Float32]),
880
+ ([1.0, 2.0, 3.0], list[float]),
761
881
  )
762
- validate_full_roundtrip(value_f32, Vector[np.float32, Literal[3]])
763
- value_f64: Vector[np.float64, Literal[3]] = np.array(
764
- [1.0, 2.0, 3.0], dtype=np.float64
882
+ validate_full_roundtrip(
883
+ value_f32,
884
+ np.typing.NDArray[np.float32],
885
+ ([np.float32(1.0), np.float32(2.0), np.float32(3.0)], list[np.float32]),
886
+ ([1.0, 2.0, 3.0], list[cocoindex.Float32]),
887
+ ([1.0, 2.0, 3.0], list[float]),
888
+ )
889
+ validate_full_roundtrip(
890
+ value_f32.tolist(),
891
+ list[np.float32],
892
+ (value_f32, Vector[np.float32, Literal[3]]),
893
+ ([1.0, 2.0, 3.0], list[cocoindex.Float32]),
894
+ ([1.0, 2.0, 3.0], list[float]),
895
+ )
896
+
897
+ value_f64 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
898
+ validate_full_roundtrip(
899
+ value_f64,
900
+ Vector[np.float64, Literal[3]],
901
+ ([np.float64(1.0), np.float64(2.0), np.float64(3.0)], list[np.float64]),
902
+ ([1.0, 2.0, 3.0], list[cocoindex.Float64]),
903
+ ([1.0, 2.0, 3.0], list[float]),
904
+ )
905
+
906
+ value_i64 = np.array([1, 2, 3], dtype=np.int64)
907
+ validate_full_roundtrip(
908
+ value_i64,
909
+ Vector[np.int64, Literal[3]],
910
+ ([np.int64(1), np.int64(2), np.int64(3)], list[np.int64]),
911
+ ([1, 2, 3], list[int]),
765
912
  )
766
- validate_full_roundtrip(value_f64, Vector[np.float64, Literal[3]])
767
- value_i64: Vector[np.int64, Literal[3]] = np.array([1, 2, 3], dtype=np.int64)
768
- validate_full_roundtrip(value_i64, Vector[np.int64, Literal[3]])
769
- value_i32: Vector[np.int32, Literal[3]] = np.array([1, 2, 3], dtype=np.int32)
770
- with pytest.raises(ValueError, match="type unsupported yet"):
913
+
914
+ value_i32 = np.array([1, 2, 3], dtype=np.int32)
915
+ with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
771
916
  validate_full_roundtrip(value_i32, Vector[np.int32, Literal[3]])
772
- value_u8: Vector[np.uint8, Literal[3]] = np.array([1, 2, 3], dtype=np.uint8)
773
- with pytest.raises(ValueError, match="type unsupported yet"):
917
+ value_u8 = np.array([1, 2, 3], dtype=np.uint8)
918
+ with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
774
919
  validate_full_roundtrip(value_u8, Vector[np.uint8, Literal[3]])
775
- value_u16: Vector[np.uint16, Literal[3]] = np.array([1, 2, 3], dtype=np.uint16)
776
- with pytest.raises(ValueError, match="type unsupported yet"):
920
+ value_u16 = np.array([1, 2, 3], dtype=np.uint16)
921
+ with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
777
922
  validate_full_roundtrip(value_u16, Vector[np.uint16, Literal[3]])
778
- value_u32: Vector[np.uint32, Literal[3]] = np.array([1, 2, 3], dtype=np.uint32)
779
- with pytest.raises(ValueError, match="type unsupported yet"):
923
+ value_u32 = np.array([1, 2, 3], dtype=np.uint32)
924
+ with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
780
925
  validate_full_roundtrip(value_u32, Vector[np.uint32, Literal[3]])
781
- value_u64: Vector[np.uint64, Literal[3]] = np.array([1, 2, 3], dtype=np.uint64)
782
- with pytest.raises(ValueError, match="type unsupported yet"):
926
+ value_u64 = np.array([1, 2, 3], dtype=np.uint64)
927
+ with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
783
928
  validate_full_roundtrip(value_u64, Vector[np.uint64, Literal[3]])
784
929
 
785
930
 
786
931
  def test_roundtrip_vector_no_dimension() -> None:
787
932
  """Test full roundtrip for vector types without dimension annotation."""
788
- value_f64: Vector[np.float64] = np.array([1.0, 2.0, 3.0], dtype=np.float64)
789
- validate_full_roundtrip(value_f64, Vector[np.float64])
933
+ value_f64 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
934
+ validate_full_roundtrip(
935
+ value_f64,
936
+ Vector[np.float64],
937
+ ([1.0, 2.0, 3.0], list[float]),
938
+ (np.array([1.0, 2.0, 3.0], dtype=np.float64), np.typing.NDArray[np.float64]),
939
+ )
790
940
 
791
941
 
792
942
  def test_roundtrip_string_vector() -> None:
@@ -808,7 +958,88 @@ def test_roundtrip_dimension_mismatch() -> None:
808
958
  validate_full_roundtrip(value_f32, Vector[np.float32, Literal[3]])
809
959
 
810
960
 
811
- def test_roundtrip_list_backward_compatibility() -> None:
812
- """Test full roundtrip for list-based vectors for backward compatibility."""
813
- value_list: list[int] = [1, 2, 3]
814
- validate_full_roundtrip(value_list, list[int])
961
+ def test_full_roundtrip_scalar_numeric_types() -> None:
962
+ """Test full roundtrip for scalar NumPy numeric types."""
963
+ # Test supported scalar types
964
+ validate_full_roundtrip(np.int64(42), np.int64, (42, int))
965
+ validate_full_roundtrip(np.float32(3.25), np.float32, (3.25, cocoindex.Float32))
966
+ validate_full_roundtrip(np.float64(3.25), np.float64, (3.25, cocoindex.Float64))
967
+
968
+ # Test unsupported scalar types
969
+ for unsupported_type in [np.int32, np.uint8, np.uint16, np.uint32, np.uint64]:
970
+ with pytest.raises(ValueError, match="Unsupported NumPy dtype"):
971
+ validate_full_roundtrip(unsupported_type(1), unsupported_type)
972
+
973
+
974
+ def test_full_roundtrip_nullable_scalar() -> None:
975
+ """Test full roundtrip for nullable scalar NumPy types."""
976
+ # Test with non-null values
977
+ validate_full_roundtrip(np.int64(42), np.int64 | None)
978
+ validate_full_roundtrip(np.float32(3.14), np.float32 | None)
979
+ validate_full_roundtrip(np.float64(2.718), np.float64 | None)
980
+
981
+ # Test with None
982
+ validate_full_roundtrip(None, np.int64 | None)
983
+ validate_full_roundtrip(None, np.float32 | None)
984
+ validate_full_roundtrip(None, np.float64 | None)
985
+
986
+
987
+ def test_full_roundtrip_scalar_in_struct() -> None:
988
+ """Test full roundtrip for scalar NumPy types in a dataclass."""
989
+
990
+ @dataclass
991
+ class NumericStruct:
992
+ int_field: np.int64
993
+ float32_field: np.float32
994
+ float64_field: np.float64
995
+
996
+ instance = NumericStruct(
997
+ int_field=np.int64(42),
998
+ float32_field=np.float32(3.14),
999
+ float64_field=np.float64(2.718),
1000
+ )
1001
+ validate_full_roundtrip(instance, NumericStruct)
1002
+
1003
+
1004
+ def test_full_roundtrip_scalar_in_nested_struct() -> None:
1005
+ """Test full roundtrip for scalar NumPy types in a nested struct."""
1006
+
1007
+ @dataclass
1008
+ class InnerStruct:
1009
+ value: np.float64
1010
+
1011
+ @dataclass
1012
+ class OuterStruct:
1013
+ inner: InnerStruct
1014
+ count: np.int64
1015
+
1016
+ instance = OuterStruct(
1017
+ inner=InnerStruct(value=np.float64(2.718)),
1018
+ count=np.int64(1),
1019
+ )
1020
+ validate_full_roundtrip(instance, OuterStruct)
1021
+
1022
+
1023
+ def test_full_roundtrip_scalar_with_python_types() -> None:
1024
+ """Test full roundtrip for structs mixing NumPy and Python scalar types."""
1025
+
1026
+ @dataclass
1027
+ class MixedStruct:
1028
+ numpy_int: np.int64
1029
+ python_int: int
1030
+ numpy_float: np.float64
1031
+ python_float: float
1032
+ string: str
1033
+ annotated_int: Annotated[np.int64, TypeKind("int")]
1034
+ annotated_float: Float32
1035
+
1036
+ instance = MixedStruct(
1037
+ numpy_int=np.int64(42),
1038
+ python_int=43,
1039
+ numpy_float=np.float64(2.718),
1040
+ python_float=3.14,
1041
+ string="hello, world",
1042
+ annotated_int=np.int64(42),
1043
+ annotated_float=2.0,
1044
+ )
1045
+ validate_full_roundtrip(instance, MixedStruct)