cocoindex 0.2.3__cp311-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,429 @@
1
+ import dataclasses
2
+ import datetime
3
+ import uuid
4
+ from collections.abc import Mapping, Sequence
5
+ from typing import Annotated, Any, Literal, NamedTuple, get_args, get_origin
6
+
7
+ import numpy as np
8
+ import pytest
9
+ from numpy.typing import NDArray
10
+
11
+ from cocoindex.typing import (
12
+ AnalyzedBasicType,
13
+ AnalyzedDictType,
14
+ AnalyzedListType,
15
+ AnalyzedStructType,
16
+ AnalyzedUnknownType,
17
+ AnalyzedTypeInfo,
18
+ TypeAttr,
19
+ TypeKind,
20
+ Vector,
21
+ VectorInfo,
22
+ analyze_type_info,
23
+ encode_enriched_type,
24
+ )
25
+
26
+
27
+ @dataclasses.dataclass
28
+ class SimpleDataclass:
29
+ name: str
30
+ value: int
31
+
32
+
33
+ class SimpleNamedTuple(NamedTuple):
34
+ name: str
35
+ value: Any
36
+
37
+
38
+ def test_ndarray_float32_no_dim() -> None:
39
+ typ = NDArray[np.float32]
40
+ result = analyze_type_info(typ)
41
+ assert isinstance(result.variant, AnalyzedListType)
42
+ assert result.variant.vector_info is None
43
+ assert result.variant.elem_type == np.float32
44
+ assert result.nullable is False
45
+ assert get_origin(result.core_type) == np.ndarray
46
+ assert get_args(result.core_type)[1] == np.dtype[np.float32]
47
+
48
+
49
+ def test_vector_float32_no_dim() -> None:
50
+ typ = Vector[np.float32]
51
+ result = analyze_type_info(typ)
52
+ assert isinstance(result.variant, AnalyzedListType)
53
+ assert result.variant.vector_info == VectorInfo(dim=None)
54
+ assert result.variant.elem_type == np.float32
55
+ assert result.nullable is False
56
+ assert get_origin(result.core_type) == np.ndarray
57
+ assert get_args(result.core_type)[1] == np.dtype[np.float32]
58
+
59
+
60
+ def test_ndarray_float64_with_dim() -> None:
61
+ typ = Annotated[NDArray[np.float64], VectorInfo(dim=128)]
62
+ result = analyze_type_info(typ)
63
+ assert isinstance(result.variant, AnalyzedListType)
64
+ assert result.variant.vector_info == VectorInfo(dim=128)
65
+ assert result.variant.elem_type == np.float64
66
+ assert result.nullable is False
67
+ assert get_origin(result.core_type) == np.ndarray
68
+ assert get_args(result.core_type)[1] == np.dtype[np.float64]
69
+
70
+
71
+ def test_vector_float32_with_dim() -> None:
72
+ typ = Vector[np.float32, Literal[384]]
73
+ result = analyze_type_info(typ)
74
+ assert isinstance(result.variant, AnalyzedListType)
75
+ assert result.variant.vector_info == VectorInfo(dim=384)
76
+ assert result.variant.elem_type == np.float32
77
+ assert result.nullable is False
78
+ assert get_origin(result.core_type) == np.ndarray
79
+ assert get_args(result.core_type)[1] == np.dtype[np.float32]
80
+
81
+
82
+ def test_ndarray_int64_no_dim() -> None:
83
+ typ = NDArray[np.int64]
84
+ result = analyze_type_info(typ)
85
+ assert isinstance(result.variant, AnalyzedListType)
86
+ assert result.variant.vector_info is None
87
+ assert result.variant.elem_type == np.int64
88
+ assert result.nullable is False
89
+ assert get_origin(result.core_type) == np.ndarray
90
+ assert get_args(result.core_type)[1] == np.dtype[np.int64]
91
+
92
+
93
+ def test_nullable_ndarray() -> None:
94
+ typ = NDArray[np.float32] | None
95
+ result = analyze_type_info(typ)
96
+ assert isinstance(result.variant, AnalyzedListType)
97
+ assert result.variant.vector_info is None
98
+ assert result.variant.elem_type == np.float32
99
+ assert result.nullable is True
100
+ assert get_origin(result.core_type) == np.ndarray
101
+ assert get_args(result.core_type)[1] == np.dtype[np.float32]
102
+
103
+
104
+ def test_scalar_numpy_types() -> None:
105
+ for np_type, expected_kind in [
106
+ (np.int64, "Int64"),
107
+ (np.float32, "Float32"),
108
+ (np.float64, "Float64"),
109
+ ]:
110
+ type_info = analyze_type_info(np_type)
111
+ assert isinstance(type_info.variant, AnalyzedBasicType)
112
+ assert type_info.variant.kind == expected_kind, (
113
+ f"Expected {expected_kind} for {np_type}, got {type_info.variant.kind}"
114
+ )
115
+ assert type_info.core_type == np_type, (
116
+ f"Expected {np_type}, got {type_info.core_type}"
117
+ )
118
+
119
+
120
+ def test_vector_str() -> None:
121
+ typ = Vector[str]
122
+ result = analyze_type_info(typ)
123
+ assert isinstance(result.variant, AnalyzedListType)
124
+ assert result.variant.elem_type is str
125
+ assert result.variant.vector_info == VectorInfo(dim=None)
126
+
127
+
128
+ def test_non_numpy_vector() -> None:
129
+ typ = Vector[float, Literal[3]]
130
+ result = analyze_type_info(typ)
131
+ assert isinstance(result.variant, AnalyzedListType)
132
+ assert result.variant.elem_type is float
133
+ assert result.variant.vector_info == VectorInfo(dim=3)
134
+
135
+
136
+ def test_list_of_primitives() -> None:
137
+ typ = list[str]
138
+ result = analyze_type_info(typ)
139
+ assert result == AnalyzedTypeInfo(
140
+ core_type=list[str],
141
+ base_type=list,
142
+ variant=AnalyzedListType(elem_type=str, vector_info=None),
143
+ attrs=None,
144
+ nullable=False,
145
+ )
146
+
147
+
148
+ def test_list_of_structs() -> None:
149
+ typ = list[SimpleDataclass]
150
+ result = analyze_type_info(typ)
151
+ assert result == AnalyzedTypeInfo(
152
+ core_type=list[SimpleDataclass],
153
+ base_type=list,
154
+ variant=AnalyzedListType(elem_type=SimpleDataclass, vector_info=None),
155
+ attrs=None,
156
+ nullable=False,
157
+ )
158
+
159
+
160
+ def test_sequence_of_int() -> None:
161
+ typ = Sequence[int]
162
+ result = analyze_type_info(typ)
163
+ assert result == AnalyzedTypeInfo(
164
+ core_type=Sequence[int],
165
+ base_type=Sequence,
166
+ variant=AnalyzedListType(elem_type=int, vector_info=None),
167
+ attrs=None,
168
+ nullable=False,
169
+ )
170
+
171
+
172
+ def test_list_with_vector_info() -> None:
173
+ typ = Annotated[list[int], VectorInfo(dim=5)]
174
+ result = analyze_type_info(typ)
175
+ assert result == AnalyzedTypeInfo(
176
+ core_type=list[int],
177
+ base_type=list,
178
+ variant=AnalyzedListType(elem_type=int, vector_info=VectorInfo(dim=5)),
179
+ attrs=None,
180
+ nullable=False,
181
+ )
182
+
183
+
184
+ def test_dict_str_int() -> None:
185
+ typ = dict[str, int]
186
+ result = analyze_type_info(typ)
187
+ assert result == AnalyzedTypeInfo(
188
+ core_type=dict[str, int],
189
+ base_type=dict,
190
+ variant=AnalyzedDictType(key_type=str, value_type=int),
191
+ attrs=None,
192
+ nullable=False,
193
+ )
194
+
195
+
196
+ def test_mapping_str_dataclass() -> None:
197
+ typ = Mapping[str, SimpleDataclass]
198
+ result = analyze_type_info(typ)
199
+ assert result == AnalyzedTypeInfo(
200
+ core_type=Mapping[str, SimpleDataclass],
201
+ base_type=Mapping,
202
+ variant=AnalyzedDictType(key_type=str, value_type=SimpleDataclass),
203
+ attrs=None,
204
+ nullable=False,
205
+ )
206
+
207
+
208
+ def test_dataclass() -> None:
209
+ typ = SimpleDataclass
210
+ result = analyze_type_info(typ)
211
+ assert result == AnalyzedTypeInfo(
212
+ core_type=SimpleDataclass,
213
+ base_type=SimpleDataclass,
214
+ variant=AnalyzedStructType(struct_type=SimpleDataclass),
215
+ attrs=None,
216
+ nullable=False,
217
+ )
218
+
219
+
220
+ def test_named_tuple() -> None:
221
+ typ = SimpleNamedTuple
222
+ result = analyze_type_info(typ)
223
+ assert result == AnalyzedTypeInfo(
224
+ core_type=SimpleNamedTuple,
225
+ base_type=SimpleNamedTuple,
226
+ variant=AnalyzedStructType(struct_type=SimpleNamedTuple),
227
+ attrs=None,
228
+ nullable=False,
229
+ )
230
+
231
+
232
+ def test_str() -> None:
233
+ typ = str
234
+ result = analyze_type_info(typ)
235
+ assert result == AnalyzedTypeInfo(
236
+ core_type=str,
237
+ base_type=str,
238
+ variant=AnalyzedBasicType(kind="Str"),
239
+ attrs=None,
240
+ nullable=False,
241
+ )
242
+
243
+
244
+ def test_bool() -> None:
245
+ typ = bool
246
+ result = analyze_type_info(typ)
247
+ assert result == AnalyzedTypeInfo(
248
+ core_type=bool,
249
+ base_type=bool,
250
+ variant=AnalyzedBasicType(kind="Bool"),
251
+ attrs=None,
252
+ nullable=False,
253
+ )
254
+
255
+
256
+ def test_bytes() -> None:
257
+ typ = bytes
258
+ result = analyze_type_info(typ)
259
+ assert result == AnalyzedTypeInfo(
260
+ core_type=bytes,
261
+ base_type=bytes,
262
+ variant=AnalyzedBasicType(kind="Bytes"),
263
+ attrs=None,
264
+ nullable=False,
265
+ )
266
+
267
+
268
+ def test_uuid() -> None:
269
+ typ = uuid.UUID
270
+ result = analyze_type_info(typ)
271
+ assert result == AnalyzedTypeInfo(
272
+ core_type=uuid.UUID,
273
+ base_type=uuid.UUID,
274
+ variant=AnalyzedBasicType(kind="Uuid"),
275
+ attrs=None,
276
+ nullable=False,
277
+ )
278
+
279
+
280
+ def test_date() -> None:
281
+ typ = datetime.date
282
+ result = analyze_type_info(typ)
283
+ assert result == AnalyzedTypeInfo(
284
+ core_type=datetime.date,
285
+ base_type=datetime.date,
286
+ variant=AnalyzedBasicType(kind="Date"),
287
+ attrs=None,
288
+ nullable=False,
289
+ )
290
+
291
+
292
+ def test_time() -> None:
293
+ typ = datetime.time
294
+ result = analyze_type_info(typ)
295
+ assert result == AnalyzedTypeInfo(
296
+ core_type=datetime.time,
297
+ base_type=datetime.time,
298
+ variant=AnalyzedBasicType(kind="Time"),
299
+ attrs=None,
300
+ nullable=False,
301
+ )
302
+
303
+
304
+ def test_timedelta() -> None:
305
+ typ = datetime.timedelta
306
+ result = analyze_type_info(typ)
307
+ assert result == AnalyzedTypeInfo(
308
+ core_type=datetime.timedelta,
309
+ base_type=datetime.timedelta,
310
+ variant=AnalyzedBasicType(kind="TimeDelta"),
311
+ attrs=None,
312
+ nullable=False,
313
+ )
314
+
315
+
316
+ def test_float() -> None:
317
+ typ = float
318
+ result = analyze_type_info(typ)
319
+ assert result == AnalyzedTypeInfo(
320
+ core_type=float,
321
+ base_type=float,
322
+ variant=AnalyzedBasicType(kind="Float64"),
323
+ attrs=None,
324
+ nullable=False,
325
+ )
326
+
327
+
328
+ def test_int() -> None:
329
+ typ = int
330
+ result = analyze_type_info(typ)
331
+ assert result == AnalyzedTypeInfo(
332
+ core_type=int,
333
+ base_type=int,
334
+ variant=AnalyzedBasicType(kind="Int64"),
335
+ attrs=None,
336
+ nullable=False,
337
+ )
338
+
339
+
340
+ def test_type_with_attributes() -> None:
341
+ typ = Annotated[str, TypeAttr("key", "value")]
342
+ result = analyze_type_info(typ)
343
+ assert result == AnalyzedTypeInfo(
344
+ core_type=str,
345
+ base_type=str,
346
+ variant=AnalyzedBasicType(kind="Str"),
347
+ attrs={"key": "value"},
348
+ nullable=False,
349
+ )
350
+
351
+
352
+ def test_encode_enriched_type_none() -> None:
353
+ typ = None
354
+ result = encode_enriched_type(typ)
355
+ assert result is None
356
+
357
+
358
+ def test_encode_enriched_type_struct() -> None:
359
+ typ = SimpleDataclass
360
+ result = encode_enriched_type(typ)
361
+ assert result["type"]["kind"] == "Struct"
362
+ assert len(result["type"]["fields"]) == 2
363
+ assert result["type"]["fields"][0]["name"] == "name"
364
+ assert result["type"]["fields"][0]["type"]["kind"] == "Str"
365
+ assert result["type"]["fields"][1]["name"] == "value"
366
+ assert result["type"]["fields"][1]["type"]["kind"] == "Int64"
367
+
368
+
369
+ def test_encode_enriched_type_vector() -> None:
370
+ typ = NDArray[np.float32]
371
+ result = encode_enriched_type(typ)
372
+ assert result["type"]["kind"] == "Vector"
373
+ assert result["type"]["element_type"]["kind"] == "Float32"
374
+ assert result["type"]["dimension"] is None
375
+
376
+
377
+ def test_encode_enriched_type_ltable() -> None:
378
+ typ = list[SimpleDataclass]
379
+ result = encode_enriched_type(typ)
380
+ assert result["type"]["kind"] == "LTable"
381
+ assert "fields" in result["type"]["row"]
382
+ assert len(result["type"]["row"]["fields"]) == 2
383
+
384
+
385
+ def test_encode_enriched_type_with_attrs() -> None:
386
+ typ = Annotated[str, TypeAttr("key", "value")]
387
+ result = encode_enriched_type(typ)
388
+ assert result["type"]["kind"] == "Str"
389
+ assert result["attrs"] == {"key": "value"}
390
+
391
+
392
+ def test_encode_enriched_type_nullable() -> None:
393
+ typ = str | None
394
+ result = encode_enriched_type(typ)
395
+ assert result["type"]["kind"] == "Str"
396
+ assert result["nullable"] is True
397
+
398
+
399
+ def test_encode_scalar_numpy_types_schema() -> None:
400
+ for np_type, expected_kind in [
401
+ (np.int64, "Int64"),
402
+ (np.float32, "Float32"),
403
+ (np.float64, "Float64"),
404
+ ]:
405
+ schema = encode_enriched_type(np_type)
406
+ assert schema["type"]["kind"] == expected_kind, (
407
+ f"Expected {expected_kind} for {np_type}, got {schema['type']['kind']}"
408
+ )
409
+ assert not schema.get("nullable", False)
410
+
411
+
412
+ def test_annotated_struct_with_type_kind() -> None:
413
+ typ = Annotated[SimpleDataclass, TypeKind("Vector")]
414
+ result = analyze_type_info(typ)
415
+ assert isinstance(result.variant, AnalyzedBasicType)
416
+ assert result.variant.kind == "Vector"
417
+
418
+
419
+ def test_annotated_list_with_type_kind() -> None:
420
+ typ = Annotated[list[int], TypeKind("Struct")]
421
+ result = analyze_type_info(typ)
422
+ assert isinstance(result.variant, AnalyzedBasicType)
423
+ assert result.variant.kind == "Struct"
424
+
425
+
426
+ def test_unknown_type() -> None:
427
+ typ = set
428
+ result = analyze_type_info(typ)
429
+ assert isinstance(result.variant, AnalyzedUnknownType)
@@ -0,0 +1,134 @@
1
+ """Tests for naming validation functionality."""
2
+
3
+ import pytest
4
+ from cocoindex.validation import (
5
+ validate_field_name,
6
+ validate_flow_name,
7
+ validate_full_flow_name,
8
+ validate_app_namespace_name,
9
+ validate_target_name,
10
+ NamingError,
11
+ validate_identifier_name,
12
+ )
13
+
14
+
15
+ class TestValidateIdentifierName:
16
+ """Test the core validation function."""
17
+
18
+ def test_valid_names(self) -> None:
19
+ """Test that valid names pass validation."""
20
+ valid_names = [
21
+ "field1",
22
+ "field_name",
23
+ "_private",
24
+ "a",
25
+ "field123",
26
+ "FIELD_NAME",
27
+ "MyField",
28
+ "field_123_test",
29
+ ]
30
+
31
+ for name in valid_names:
32
+ result = validate_identifier_name(name)
33
+ assert result is None, f"Valid name '{name}' failed validation: {result}"
34
+
35
+ def test_valid_names_with_dots(self) -> None:
36
+ """Test that valid names with dots pass validation when allowed."""
37
+ valid_names = ["app.flow", "my_app.my_flow", "namespace.sub.flow", "a.b.c.d"]
38
+
39
+ for name in valid_names:
40
+ result = validate_identifier_name(name, allow_dots=True)
41
+ assert result is None, (
42
+ f"Valid dotted name '{name}' failed validation: {result}"
43
+ )
44
+
45
+ def test_invalid_starting_characters(self) -> None:
46
+ """Test names with invalid starting characters."""
47
+ invalid_names = [
48
+ "123field", # starts with digit
49
+ ".field", # starts with dot
50
+ "-field", # starts with dash
51
+ " field", # starts with space
52
+ ]
53
+
54
+ for name in invalid_names:
55
+ result = validate_identifier_name(name)
56
+ assert result is not None, (
57
+ f"Invalid name '{name}' should have failed validation"
58
+ )
59
+
60
+ def test_double_underscore_restriction(self) -> None:
61
+ """Test double underscore restriction."""
62
+ invalid_names = ["__reserved", "__internal", "__test"]
63
+
64
+ for name in invalid_names:
65
+ result = validate_identifier_name(name)
66
+ assert result is not None
67
+ assert "double underscores" in result.lower()
68
+
69
+ def test_length_restriction(self) -> None:
70
+ """Test maximum length restriction."""
71
+ long_name = "a" * 65
72
+ result = validate_identifier_name(long_name, max_length=64)
73
+ assert result is not None
74
+ assert "maximum length" in result.lower()
75
+
76
+
77
+ class TestSpecificValidators:
78
+ """Test the specific validation functions."""
79
+
80
+ def test_valid_field_names(self) -> None:
81
+ """Test valid field names."""
82
+ valid_names = ["field1", "field_name", "_private", "FIELD"]
83
+ for name in valid_names:
84
+ validate_field_name(name) # Should not raise
85
+
86
+ def test_invalid_field_names(self) -> None:
87
+ """Test invalid field names raise NamingError."""
88
+ invalid_names = ["123field", "field-name", "__reserved", "a" * 65]
89
+
90
+ for name in invalid_names:
91
+ with pytest.raises(NamingError):
92
+ validate_field_name(name)
93
+
94
+ def test_flow_validation(self) -> None:
95
+ """Test flow name validation."""
96
+ # Valid flow names
97
+ validate_flow_name("MyFlow")
98
+ validate_flow_name("my_flow_123")
99
+
100
+ # Invalid flow names
101
+ with pytest.raises(NamingError):
102
+ validate_flow_name("123flow")
103
+
104
+ with pytest.raises(NamingError):
105
+ validate_flow_name("__reserved_flow")
106
+
107
+ def test_full_flow_name_allows_dots(self) -> None:
108
+ """Test that full flow names allow dots."""
109
+ validate_full_flow_name("app.my_flow")
110
+ validate_full_flow_name("namespace.subnamespace.flow")
111
+
112
+ # But still reject invalid patterns
113
+ with pytest.raises(NamingError):
114
+ validate_full_flow_name("123.invalid")
115
+
116
+ def test_target_validation(self) -> None:
117
+ """Test target name validation."""
118
+ validate_target_name("my_target")
119
+ validate_target_name("output_table")
120
+
121
+ with pytest.raises(NamingError):
122
+ validate_target_name("123target")
123
+
124
+ def test_app_namespace_validation(self) -> None:
125
+ """Test app namespace validation."""
126
+ validate_app_namespace_name("myapp")
127
+ validate_app_namespace_name("my_app_123")
128
+
129
+ # Should not allow dots in app namespace
130
+ with pytest.raises(NamingError):
131
+ validate_app_namespace_name("my.app")
132
+
133
+ with pytest.raises(NamingError):
134
+ validate_app_namespace_name("123app")