cocoindex 0.3.4__cp311-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cocoindex/__init__.py +114 -0
  2. cocoindex/_engine.abi3.so +0 -0
  3. cocoindex/auth_registry.py +44 -0
  4. cocoindex/cli.py +830 -0
  5. cocoindex/engine_object.py +214 -0
  6. cocoindex/engine_value.py +550 -0
  7. cocoindex/flow.py +1281 -0
  8. cocoindex/functions/__init__.py +40 -0
  9. cocoindex/functions/_engine_builtin_specs.py +66 -0
  10. cocoindex/functions/colpali.py +247 -0
  11. cocoindex/functions/sbert.py +77 -0
  12. cocoindex/index.py +50 -0
  13. cocoindex/lib.py +75 -0
  14. cocoindex/llm.py +47 -0
  15. cocoindex/op.py +1047 -0
  16. cocoindex/py.typed +0 -0
  17. cocoindex/query_handler.py +57 -0
  18. cocoindex/runtime.py +78 -0
  19. cocoindex/setting.py +171 -0
  20. cocoindex/setup.py +92 -0
  21. cocoindex/sources/__init__.py +5 -0
  22. cocoindex/sources/_engine_builtin_specs.py +120 -0
  23. cocoindex/subprocess_exec.py +277 -0
  24. cocoindex/targets/__init__.py +5 -0
  25. cocoindex/targets/_engine_builtin_specs.py +153 -0
  26. cocoindex/targets/lancedb.py +466 -0
  27. cocoindex/tests/__init__.py +0 -0
  28. cocoindex/tests/test_engine_object.py +331 -0
  29. cocoindex/tests/test_engine_value.py +1724 -0
  30. cocoindex/tests/test_optional_database.py +249 -0
  31. cocoindex/tests/test_transform_flow.py +300 -0
  32. cocoindex/tests/test_typing.py +553 -0
  33. cocoindex/tests/test_validation.py +134 -0
  34. cocoindex/typing.py +834 -0
  35. cocoindex/user_app_loader.py +53 -0
  36. cocoindex/utils.py +20 -0
  37. cocoindex/validation.py +104 -0
  38. cocoindex-0.3.4.dist-info/METADATA +288 -0
  39. cocoindex-0.3.4.dist-info/RECORD +42 -0
  40. cocoindex-0.3.4.dist-info/WHEEL +4 -0
  41. cocoindex-0.3.4.dist-info/entry_points.txt +2 -0
  42. cocoindex-0.3.4.dist-info/licenses/THIRD_PARTY_NOTICES.html +13249 -0
cocoindex/typing.py ADDED
@@ -0,0 +1,834 @@
1
+ import collections
2
+ import dataclasses
3
+ import datetime
4
+ import inspect
5
+ import types
6
+ import typing
7
+ import uuid
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Annotated,
11
+ Any,
12
+ Generic,
13
+ Iterator,
14
+ Literal,
15
+ NamedTuple,
16
+ Protocol,
17
+ TypeVar,
18
+ overload,
19
+ Self,
20
+ get_type_hints,
21
+ )
22
+
23
+ import numpy as np
24
+ from numpy.typing import NDArray
25
+
26
+ # Optional Pydantic support
27
+ try:
28
+ import pydantic
29
+
30
+ PYDANTIC_AVAILABLE = True
31
+ except ImportError:
32
+ pydantic = None # type: ignore[assignment]
33
+ PYDANTIC_AVAILABLE = False
34
+
35
+ if TYPE_CHECKING:
36
+ if PYDANTIC_AVAILABLE:
37
+ from pydantic import BaseModel
38
+ else:
39
+ BaseModel = object # type: ignore[misc,assignment]
40
+
41
+
42
+ class VectorInfo(NamedTuple):
43
+ dim: int | None
44
+
45
+
46
+ class TypeKind(NamedTuple):
47
+ kind: str
48
+
49
+
50
+ class TypeAttr:
51
+ key: str
52
+ value: Any
53
+
54
+ def __init__(self, key: str, value: Any):
55
+ self.key = key
56
+ self.value = value
57
+
58
+
59
+ Annotation = TypeKind | TypeAttr | VectorInfo
60
+
61
+ Int64 = Annotated[int, TypeKind("Int64")]
62
+ Float32 = Annotated[float, TypeKind("Float32")]
63
+ Float64 = Annotated[float, TypeKind("Float64")]
64
+ Range = Annotated[tuple[int, int], TypeKind("Range")]
65
+ Json = Annotated[Any, TypeKind("Json")]
66
+ LocalDateTime = Annotated[datetime.datetime, TypeKind("LocalDateTime")]
67
+ OffsetDateTime = Annotated[datetime.datetime, TypeKind("OffsetDateTime")]
68
+
69
+ if TYPE_CHECKING:
70
+ T_co = TypeVar("T_co", covariant=True)
71
+ Dim_co = TypeVar("Dim_co", bound=int | None, covariant=True, default=None)
72
+
73
+ class Vector(Protocol, Generic[T_co, Dim_co]):
74
+ """Vector[T, Dim] is a special typing alias for an NDArray[T] with optional dimension info"""
75
+
76
+ def __getitem__(self, index: int) -> T_co: ...
77
+ def __len__(self) -> int: ...
78
+
79
+ else:
80
+
81
+ class Vector: # type: ignore[unreachable]
82
+ """A special typing alias for an NDArray[T] with optional dimension info"""
83
+
84
+ def __class_getitem__(self, params):
85
+ if not isinstance(params, tuple):
86
+ # No dimension provided, e.g., Vector[np.float32]
87
+ dtype = params
88
+ vector_info = VectorInfo(dim=None)
89
+ else:
90
+ # Element type and dimension provided, e.g., Vector[np.float32, Literal[3]]
91
+ dtype, dim_literal = params
92
+ # Extract the literal value
93
+ dim_val = (
94
+ typing.get_args(dim_literal)[0]
95
+ if typing.get_origin(dim_literal) is Literal
96
+ else None
97
+ )
98
+ vector_info = VectorInfo(dim=dim_val)
99
+
100
+ # Use NDArray for supported numeric dtypes, else list
101
+ base_type = analyze_type_info(dtype).base_type
102
+ if is_numpy_number_type(base_type) or base_type is np.ndarray:
103
+ return Annotated[NDArray[dtype], vector_info]
104
+ return Annotated[list[dtype], vector_info]
105
+
106
+
107
+ TABLE_TYPES: tuple[str, str] = ("KTable", "LTable")
108
+ KEY_FIELD_NAME: str = "_key"
109
+
110
+
111
+ def extract_ndarray_elem_dtype(ndarray_type: Any) -> Any:
112
+ args = typing.get_args(ndarray_type)
113
+ _, dtype_spec = args
114
+ dtype_args = typing.get_args(dtype_spec)
115
+ if not dtype_args:
116
+ raise ValueError(f"Invalid dtype specification: {dtype_spec}")
117
+ return dtype_args[0]
118
+
119
+
120
+ def is_numpy_number_type(t: type) -> bool:
121
+ return isinstance(t, type) and issubclass(t, (np.integer, np.floating))
122
+
123
+
124
+ def is_namedtuple_type(t: type) -> bool:
125
+ return isinstance(t, type) and issubclass(t, tuple) and hasattr(t, "_fields")
126
+
127
+
128
+ def is_pydantic_model(t: Any) -> bool:
129
+ """Check if a type is a Pydantic model."""
130
+ if not PYDANTIC_AVAILABLE or not isinstance(t, type):
131
+ return False
132
+ try:
133
+ return issubclass(t, pydantic.BaseModel)
134
+ except TypeError:
135
+ return False
136
+
137
+
138
+ def is_struct_type(t: Any) -> bool:
139
+ return isinstance(t, type) and (
140
+ dataclasses.is_dataclass(t) or is_namedtuple_type(t) or is_pydantic_model(t)
141
+ )
142
+
143
+
144
+ class DtypeRegistry:
145
+ """
146
+ Registry for NumPy dtypes used in CocoIndex.
147
+ Maps NumPy dtypes to their CocoIndex type kind.
148
+ """
149
+
150
+ _DTYPE_TO_KIND: dict[Any, str] = {
151
+ np.float32: "Float32",
152
+ np.float64: "Float64",
153
+ np.int64: "Int64",
154
+ }
155
+
156
+ @classmethod
157
+ def validate_dtype_and_get_kind(cls, dtype: Any) -> str:
158
+ """
159
+ Validate that the given dtype is supported, and get its CocoIndex kind by dtype.
160
+ """
161
+ if dtype is Any:
162
+ raise TypeError(
163
+ "NDArray for Vector must use a concrete numpy dtype, got `Any`."
164
+ )
165
+ kind = cls._DTYPE_TO_KIND.get(dtype)
166
+ if kind is None:
167
+ raise ValueError(
168
+ f"Unsupported NumPy dtype in NDArray: {dtype}. "
169
+ f"Supported dtypes: {cls._DTYPE_TO_KIND.keys()}"
170
+ )
171
+ return kind
172
+
173
+
174
+ class AnalyzedAnyType(NamedTuple):
175
+ """
176
+ When the type annotation is missing or matches any type.
177
+ """
178
+
179
+
180
+ class AnalyzedBasicType(NamedTuple):
181
+ """
182
+ For types that fit into basic type, and annotated with basic type or Json type.
183
+ """
184
+
185
+ kind: str
186
+
187
+
188
+ class AnalyzedListType(NamedTuple):
189
+ """
190
+ Any list type, e.g. list[T], Sequence[T], NDArray[T], etc.
191
+ """
192
+
193
+ elem_type: Any
194
+ vector_info: VectorInfo | None
195
+
196
+
197
+ class AnalyzedStructFieldInfo(NamedTuple):
198
+ """
199
+ Info about a field in a struct type.
200
+ """
201
+
202
+ name: str
203
+ type_hint: Any
204
+ default_value: Any
205
+ description: str | None
206
+
207
+
208
+ class AnalyzedStructType(NamedTuple):
209
+ """
210
+ Any struct type, e.g. dataclass, NamedTuple, etc.
211
+ """
212
+
213
+ struct_type: type
214
+
215
+ @property
216
+ def fields(self) -> Iterator[AnalyzedStructFieldInfo]:
217
+ type_hints = get_type_hints(self.struct_type, include_extras=True)
218
+ if dataclasses.is_dataclass(self.struct_type):
219
+ parameters = inspect.signature(self.struct_type).parameters
220
+ for name, parameter in parameters.items():
221
+ yield AnalyzedStructFieldInfo(
222
+ name=name,
223
+ type_hint=type_hints.get(name, Any),
224
+ default_value=parameter.default,
225
+ description=None,
226
+ )
227
+ elif is_namedtuple_type(self.struct_type):
228
+ fields = getattr(self.struct_type, "_fields", ())
229
+ defaults = getattr(self.struct_type, "_field_defaults", {})
230
+ for name in fields:
231
+ yield AnalyzedStructFieldInfo(
232
+ name=name,
233
+ type_hint=type_hints.get(name, Any),
234
+ default_value=defaults.get(name, inspect.Parameter.empty),
235
+ description=None,
236
+ )
237
+ elif is_pydantic_model(self.struct_type):
238
+ model_fields = getattr(self.struct_type, "model_fields", {})
239
+ for name, field_info in model_fields.items():
240
+ yield AnalyzedStructFieldInfo(
241
+ name=name,
242
+ type_hint=type_hints.get(name, Any),
243
+ default_value=field_info.default
244
+ if field_info.default is not ...
245
+ else inspect.Parameter.empty,
246
+ description=field_info.description,
247
+ )
248
+ else:
249
+ raise ValueError(f"Unsupported struct type: {self.struct_type}")
250
+
251
+
252
+ class AnalyzedUnionType(NamedTuple):
253
+ """
254
+ Any union type, e.g. T1 | T2 | ..., etc.
255
+ """
256
+
257
+ variant_types: list[Any]
258
+
259
+
260
+ class AnalyzedDictType(NamedTuple):
261
+ """
262
+ Any dict type, e.g. dict[T1, T2], Mapping[T1, T2], etc.
263
+ """
264
+
265
+ key_type: Any
266
+ value_type: Any
267
+
268
+
269
+ class AnalyzedUnknownType(NamedTuple):
270
+ """
271
+ Any type that is not supported by CocoIndex.
272
+ """
273
+
274
+
275
+ AnalyzedTypeVariant = (
276
+ AnalyzedAnyType
277
+ | AnalyzedBasicType
278
+ | AnalyzedListType
279
+ | AnalyzedStructType
280
+ | AnalyzedUnionType
281
+ | AnalyzedDictType
282
+ | AnalyzedUnknownType
283
+ )
284
+
285
+
286
+ @dataclasses.dataclass
287
+ class AnalyzedTypeInfo:
288
+ """
289
+ Analyzed info of a Python type.
290
+ """
291
+
292
+ # The type without annotations. e.g. int, list[int], dict[str, int]
293
+ core_type: Any
294
+ # The type without annotations and parameters. e.g. int, list, dict
295
+ base_type: Any
296
+ variant: AnalyzedTypeVariant
297
+ attrs: dict[str, Any] | None
298
+ nullable: bool = False
299
+
300
+
301
+ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
302
+ """
303
+ Analyze a Python type annotation and extract CocoIndex-specific type information.
304
+ """
305
+
306
+ annotations: tuple[Annotation, ...] = ()
307
+ base_type = None
308
+ type_args: tuple[Any, ...] = ()
309
+ nullable = False
310
+ while True:
311
+ base_type = typing.get_origin(t)
312
+ if base_type is Annotated:
313
+ annotations = t.__metadata__
314
+ t = t.__origin__
315
+ else:
316
+ if base_type is None:
317
+ base_type = t
318
+ else:
319
+ type_args = typing.get_args(t)
320
+ break
321
+ core_type = t
322
+
323
+ attrs: dict[str, Any] | None = None
324
+ vector_info: VectorInfo | None = None
325
+ kind: str | None = None
326
+ for attr in annotations:
327
+ if isinstance(attr, TypeAttr):
328
+ if attrs is None:
329
+ attrs = dict()
330
+ attrs[attr.key] = attr.value
331
+ elif isinstance(attr, VectorInfo):
332
+ vector_info = attr
333
+ elif isinstance(attr, TypeKind):
334
+ kind = attr.kind
335
+
336
+ variant: AnalyzedTypeVariant | None = None
337
+
338
+ if kind is not None:
339
+ variant = AnalyzedBasicType(kind=kind)
340
+ elif base_type is Any or base_type is inspect.Parameter.empty:
341
+ variant = AnalyzedAnyType()
342
+ elif is_struct_type(base_type):
343
+ variant = AnalyzedStructType(struct_type=t)
344
+ elif is_numpy_number_type(t):
345
+ kind = DtypeRegistry.validate_dtype_and_get_kind(t)
346
+ variant = AnalyzedBasicType(kind=kind)
347
+ elif base_type is collections.abc.Sequence or base_type is list:
348
+ elem_type = type_args[0] if len(type_args) > 0 else Any
349
+ variant = AnalyzedListType(elem_type=elem_type, vector_info=vector_info)
350
+ elif base_type is np.ndarray:
351
+ np_number_type = t
352
+ elem_type = extract_ndarray_elem_dtype(np_number_type)
353
+ variant = AnalyzedListType(elem_type=elem_type, vector_info=vector_info)
354
+ elif base_type is collections.abc.Mapping or base_type is dict or t is dict:
355
+ key_type = type_args[0] if len(type_args) > 0 else Any
356
+ elem_type = type_args[1] if len(type_args) > 1 else Any
357
+ variant = AnalyzedDictType(key_type=key_type, value_type=elem_type)
358
+ elif base_type in (types.UnionType, typing.Union):
359
+ non_none_types = [arg for arg in type_args if arg not in (None, types.NoneType)]
360
+ if len(non_none_types) == 0:
361
+ return analyze_type_info(None)
362
+
363
+ nullable = len(non_none_types) < len(type_args)
364
+ if len(non_none_types) == 1:
365
+ result = analyze_type_info(non_none_types[0])
366
+ result.nullable = nullable
367
+ return result
368
+
369
+ variant = AnalyzedUnionType(variant_types=non_none_types)
370
+ else:
371
+ if t is bytes:
372
+ kind = "Bytes"
373
+ elif t is str:
374
+ kind = "Str"
375
+ elif t is bool:
376
+ kind = "Bool"
377
+ elif t is int:
378
+ kind = "Int64"
379
+ elif t is float:
380
+ kind = "Float64"
381
+ elif t is uuid.UUID:
382
+ kind = "Uuid"
383
+ elif t is datetime.date:
384
+ kind = "Date"
385
+ elif t is datetime.time:
386
+ kind = "Time"
387
+ elif t is datetime.datetime:
388
+ kind = "OffsetDateTime"
389
+ elif t is datetime.timedelta:
390
+ kind = "TimeDelta"
391
+
392
+ if kind is None:
393
+ variant = AnalyzedUnknownType()
394
+ else:
395
+ variant = AnalyzedBasicType(kind=kind)
396
+
397
+ return AnalyzedTypeInfo(
398
+ core_type=core_type,
399
+ base_type=base_type,
400
+ variant=variant,
401
+ attrs=attrs,
402
+ nullable=nullable,
403
+ )
404
+
405
+
406
+ def _encode_struct_schema(
407
+ struct_info: AnalyzedStructType, key_type: type | None = None
408
+ ) -> tuple[dict[str, Any], int | None]:
409
+ fields = []
410
+
411
+ def add_field(
412
+ name: str, analyzed_type: AnalyzedTypeInfo, description: str | None = None
413
+ ) -> None:
414
+ try:
415
+ type_info = encode_enriched_type_info(analyzed_type)
416
+ except ValueError as e:
417
+ e.add_note(
418
+ f"Failed to encode annotation for field - "
419
+ f"{struct_info.struct_type.__name__}.{name}: {analyzed_type.core_type}"
420
+ )
421
+ raise
422
+ type_info["name"] = name
423
+ if description is not None:
424
+ type_info["description"] = description
425
+ fields.append(type_info)
426
+
427
+ def add_fields_from_struct(struct_info: AnalyzedStructType) -> None:
428
+ for field in struct_info.fields:
429
+ add_field(field.name, analyze_type_info(field.type_hint), field.description)
430
+
431
+ result: dict[str, Any] = {}
432
+ num_key_parts = None
433
+ if key_type is not None:
434
+ key_type_info = analyze_type_info(key_type)
435
+ if isinstance(key_type_info.variant, AnalyzedBasicType):
436
+ add_field(KEY_FIELD_NAME, key_type_info)
437
+ num_key_parts = 1
438
+ elif isinstance(key_type_info.variant, AnalyzedStructType):
439
+ add_fields_from_struct(key_type_info.variant)
440
+ num_key_parts = len(fields)
441
+ else:
442
+ raise ValueError(f"Unsupported key type: {key_type}")
443
+
444
+ add_fields_from_struct(struct_info)
445
+
446
+ result["fields"] = fields
447
+ if doc := inspect.getdoc(struct_info):
448
+ result["description"] = doc
449
+ return result, num_key_parts
450
+
451
+
452
+ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
453
+ variant = type_info.variant
454
+
455
+ if isinstance(variant, AnalyzedAnyType):
456
+ raise ValueError("Specific type annotation is expected")
457
+
458
+ if isinstance(variant, AnalyzedUnknownType):
459
+ raise ValueError(f"Unsupported type annotation: {type_info.core_type}")
460
+
461
+ if isinstance(variant, AnalyzedBasicType):
462
+ return {"kind": variant.kind}
463
+
464
+ if isinstance(variant, AnalyzedStructType):
465
+ encoded_type, _ = _encode_struct_schema(variant)
466
+ encoded_type["kind"] = "Struct"
467
+ return encoded_type
468
+
469
+ if isinstance(variant, AnalyzedListType):
470
+ elem_type_info = analyze_type_info(variant.elem_type)
471
+ encoded_elem_type = _encode_type(elem_type_info)
472
+ if isinstance(elem_type_info.variant, AnalyzedStructType):
473
+ if variant.vector_info is not None:
474
+ raise ValueError("LTable type must not have a vector info")
475
+ row_type, _ = _encode_struct_schema(elem_type_info.variant)
476
+ return {"kind": "LTable", "row": row_type}
477
+ else:
478
+ vector_info = variant.vector_info
479
+ return {
480
+ "kind": "Vector",
481
+ "element_type": encoded_elem_type,
482
+ "dimension": vector_info and vector_info.dim,
483
+ }
484
+
485
+ if isinstance(variant, AnalyzedDictType):
486
+ value_type_info = analyze_type_info(variant.value_type)
487
+ if not isinstance(value_type_info.variant, AnalyzedStructType):
488
+ raise ValueError(
489
+ f"KTable value must have a Struct type, got {value_type_info.core_type}"
490
+ )
491
+ row_type, num_key_parts = _encode_struct_schema(
492
+ value_type_info.variant,
493
+ variant.key_type,
494
+ )
495
+ return {
496
+ "kind": "KTable",
497
+ "row": row_type,
498
+ "num_key_parts": num_key_parts,
499
+ }
500
+
501
+ if isinstance(variant, AnalyzedUnionType):
502
+ return {
503
+ "kind": "Union",
504
+ "types": [
505
+ _encode_type(analyze_type_info(typ)) for typ in variant.variant_types
506
+ ],
507
+ }
508
+
509
+
510
+ def encode_enriched_type_info(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
511
+ """
512
+ Encode an `AnalyzedTypeInfo` to a CocoIndex engine's `EnrichedValueType` representation
513
+ """
514
+ encoded: dict[str, Any] = {"type": _encode_type(type_info)}
515
+
516
+ if type_info.attrs is not None:
517
+ encoded["attrs"] = type_info.attrs
518
+
519
+ if type_info.nullable:
520
+ encoded["nullable"] = True
521
+
522
+ return encoded
523
+
524
+
525
+ @overload
526
+ def encode_enriched_type(t: None) -> None: ...
527
+
528
+
529
+ @overload
530
+ def encode_enriched_type(t: Any) -> dict[str, Any]: ...
531
+
532
+
533
+ def encode_enriched_type(t: Any) -> dict[str, Any] | None:
534
+ """
535
+ Convert a Python type to a CocoIndex engine's type representation
536
+ """
537
+ if t is None:
538
+ return None
539
+
540
+ return encode_enriched_type_info(analyze_type_info(t))
541
+
542
+
543
+ def resolve_forward_ref(t: Any) -> Any:
544
+ if isinstance(t, str):
545
+ return eval(t) # pylint: disable=eval-used
546
+ return t
547
+
548
+
549
+ # ========================= Engine Schema Types (Python mirror of Rust) =========================
550
+
551
+
552
+ @dataclasses.dataclass
553
+ class VectorTypeSchema:
554
+ element_type: "BasicValueType"
555
+ dimension: int | None
556
+
557
+ def __str__(self) -> str:
558
+ dimension_str = f", {self.dimension}" if self.dimension is not None else ""
559
+ return f"Vector[{self.element_type}{dimension_str}]"
560
+
561
+ def __repr__(self) -> str:
562
+ return self.__str__()
563
+
564
+ @staticmethod
565
+ def decode(obj: dict[str, Any]) -> "VectorTypeSchema":
566
+ return VectorTypeSchema(
567
+ element_type=BasicValueType.decode(obj["element_type"]),
568
+ dimension=obj.get("dimension"),
569
+ )
570
+
571
+ def encode(self) -> dict[str, Any]:
572
+ return {
573
+ "element_type": self.element_type.encode(),
574
+ "dimension": self.dimension,
575
+ }
576
+
577
+
578
+ @dataclasses.dataclass
579
+ class UnionTypeSchema:
580
+ variants: list["BasicValueType"]
581
+
582
+ def __str__(self) -> str:
583
+ types_str = " | ".join(str(t) for t in self.variants)
584
+ return f"Union[{types_str}]"
585
+
586
+ def __repr__(self) -> str:
587
+ return self.__str__()
588
+
589
+ @staticmethod
590
+ def decode(obj: dict[str, Any]) -> "UnionTypeSchema":
591
+ return UnionTypeSchema(
592
+ variants=[BasicValueType.decode(t) for t in obj["types"]]
593
+ )
594
+
595
+ def encode(self) -> dict[str, Any]:
596
+ return {"types": [variant.encode() for variant in self.variants]}
597
+
598
+
599
+ @dataclasses.dataclass
600
+ class BasicValueType:
601
+ """
602
+ Mirror of Rust BasicValueType in JSON form.
603
+
604
+ For Vector and Union kinds, extra fields are populated accordingly.
605
+ """
606
+
607
+ kind: Literal[
608
+ "Bytes",
609
+ "Str",
610
+ "Bool",
611
+ "Int64",
612
+ "Float32",
613
+ "Float64",
614
+ "Range",
615
+ "Uuid",
616
+ "Date",
617
+ "Time",
618
+ "LocalDateTime",
619
+ "OffsetDateTime",
620
+ "TimeDelta",
621
+ "Json",
622
+ "Vector",
623
+ "Union",
624
+ ]
625
+ vector: VectorTypeSchema | None = None
626
+ union: UnionTypeSchema | None = None
627
+
628
+ def __str__(self) -> str:
629
+ if self.kind == "Vector" and self.vector is not None:
630
+ dimension_str = (
631
+ f", {self.vector.dimension}"
632
+ if self.vector.dimension is not None
633
+ else ""
634
+ )
635
+ return f"Vector[{self.vector.element_type}{dimension_str}]"
636
+ elif self.kind == "Union" and self.union is not None:
637
+ types_str = " | ".join(str(t) for t in self.union.variants)
638
+ return f"Union[{types_str}]"
639
+ else:
640
+ return self.kind
641
+
642
+ def __repr__(self) -> str:
643
+ return self.__str__()
644
+
645
+ @staticmethod
646
+ def decode(obj: dict[str, Any]) -> "BasicValueType":
647
+ kind = obj["kind"]
648
+ if kind == "Vector":
649
+ return BasicValueType(
650
+ kind=kind, # type: ignore[arg-type]
651
+ vector=VectorTypeSchema.decode(obj),
652
+ )
653
+ if kind == "Union":
654
+ return BasicValueType(
655
+ kind=kind, # type: ignore[arg-type]
656
+ union=UnionTypeSchema.decode(obj),
657
+ )
658
+ return BasicValueType(kind=kind) # type: ignore[arg-type]
659
+
660
+ def encode(self) -> dict[str, Any]:
661
+ result = {"kind": self.kind}
662
+ if self.kind == "Vector" and self.vector is not None:
663
+ result.update(self.vector.encode())
664
+ elif self.kind == "Union" and self.union is not None:
665
+ result.update(self.union.encode())
666
+ return result
667
+
668
+
669
+ @dataclasses.dataclass
670
+ class EnrichedValueType:
671
+ type: "ValueType"
672
+ nullable: bool = False
673
+ attrs: dict[str, Any] | None = None
674
+
675
+ def __str__(self) -> str:
676
+ result = str(self.type)
677
+ if self.nullable:
678
+ result += "?"
679
+ if self.attrs:
680
+ attrs_str = ", ".join(f"{k}: {v}" for k, v in self.attrs.items())
681
+ result += f" [{attrs_str}]"
682
+ return result
683
+
684
+ def __repr__(self) -> str:
685
+ return self.__str__()
686
+
687
+ @staticmethod
688
+ def decode(obj: dict[str, Any]) -> "EnrichedValueType":
689
+ return EnrichedValueType(
690
+ type=decode_engine_value_type(obj["type"]),
691
+ nullable=obj.get("nullable", False),
692
+ attrs=obj.get("attrs"),
693
+ )
694
+
695
+ def encode(self) -> dict[str, Any]:
696
+ result: dict[str, Any] = {"type": self.type.encode()}
697
+ if self.nullable:
698
+ result["nullable"] = True
699
+ if self.attrs is not None:
700
+ result["attrs"] = self.attrs
701
+ return result
702
+
703
+
704
+ @dataclasses.dataclass
705
+ class FieldSchema:
706
+ name: str
707
+ value_type: EnrichedValueType
708
+ description: str | None = None
709
+
710
+ def __str__(self) -> str:
711
+ return f"{self.name}: {self.value_type}"
712
+
713
+ def __repr__(self) -> str:
714
+ return self.__str__()
715
+
716
+ @staticmethod
717
+ def decode(obj: dict[str, Any]) -> "FieldSchema":
718
+ return FieldSchema(
719
+ name=obj["name"],
720
+ value_type=EnrichedValueType.decode(obj),
721
+ description=obj.get("description"),
722
+ )
723
+
724
+ def encode(self) -> dict[str, Any]:
725
+ result = self.value_type.encode()
726
+ result["name"] = self.name
727
+ if self.description is not None:
728
+ result["description"] = self.description
729
+ return result
730
+
731
+
732
+ @dataclasses.dataclass
733
+ class StructSchema:
734
+ fields: list[FieldSchema]
735
+ description: str | None = None
736
+
737
+ def __str__(self) -> str:
738
+ fields_str = ", ".join(str(field) for field in self.fields)
739
+ return f"Struct({fields_str})"
740
+
741
+ def __repr__(self) -> str:
742
+ return self.__str__()
743
+
744
+ @classmethod
745
+ def decode(cls, obj: dict[str, Any]) -> Self:
746
+ return cls(
747
+ fields=[FieldSchema.decode(f) for f in obj["fields"]],
748
+ description=obj.get("description"),
749
+ )
750
+
751
+ def encode(self) -> dict[str, Any]:
752
+ result: dict[str, Any] = {"fields": [field.encode() for field in self.fields]}
753
+ if self.description is not None:
754
+ result["description"] = self.description
755
+ return result
756
+
757
+
758
+ @dataclasses.dataclass
759
+ class StructType(StructSchema):
760
+ kind: Literal["Struct"] = "Struct"
761
+
762
+ def __str__(self) -> str:
763
+ # Use the parent's __str__ method for consistency
764
+ return super().__str__()
765
+
766
+ def __repr__(self) -> str:
767
+ return self.__str__()
768
+
769
+ def encode(self) -> dict[str, Any]:
770
+ result = super().encode()
771
+ result["kind"] = self.kind
772
+ return result
773
+
774
+
775
+ @dataclasses.dataclass
776
+ class TableType:
777
+ kind: Literal["KTable", "LTable"]
778
+ row: StructSchema
779
+ num_key_parts: int | None = None # Only for KTable
780
+
781
+ def __str__(self) -> str:
782
+ if self.kind == "KTable":
783
+ num_parts = self.num_key_parts if self.num_key_parts is not None else 1
784
+ table_kind = f"KTable({num_parts})"
785
+ else: # LTable
786
+ table_kind = "LTable"
787
+
788
+ return f"{table_kind}({self.row})"
789
+
790
+ def __repr__(self) -> str:
791
+ return self.__str__()
792
+
793
+ @staticmethod
794
+ def decode(obj: dict[str, Any]) -> "TableType":
795
+ row_obj = obj["row"]
796
+ row = StructSchema(
797
+ fields=[FieldSchema.decode(f) for f in row_obj["fields"]],
798
+ description=row_obj.get("description"),
799
+ )
800
+ return TableType(
801
+ kind=obj["kind"], # type: ignore[arg-type]
802
+ row=row,
803
+ num_key_parts=obj.get("num_key_parts"),
804
+ )
805
+
806
+ def encode(self) -> dict[str, Any]:
807
+ result: dict[str, Any] = {"kind": self.kind, "row": self.row.encode()}
808
+ if self.num_key_parts is not None:
809
+ result["num_key_parts"] = self.num_key_parts
810
+ return result
811
+
812
+
813
+ ValueType = BasicValueType | StructType | TableType
814
+
815
+
816
+ def decode_engine_field_schemas(objs: list[dict[str, Any]]) -> list[FieldSchema]:
817
+ return [FieldSchema.decode(o) for o in objs]
818
+
819
+
820
+ def decode_engine_value_type(obj: dict[str, Any]) -> ValueType:
821
+ kind = obj["kind"]
822
+ if kind == "Struct":
823
+ return StructType.decode(obj)
824
+
825
+ if kind in TABLE_TYPES:
826
+ return TableType.decode(obj)
827
+
828
+ # Otherwise it's a basic value
829
+ return BasicValueType.decode(obj)
830
+
831
+
832
+ def encode_engine_value_type(value_type: ValueType) -> dict[str, Any]:
833
+ """Encode a ValueType to its dictionary representation."""
834
+ return value_type.encode()