pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/type_system.py
CHANGED
|
@@ -5,10 +5,15 @@ import datetime
|
|
|
5
5
|
import enum
|
|
6
6
|
import io
|
|
7
7
|
import json
|
|
8
|
+
import types
|
|
8
9
|
import typing
|
|
9
10
|
import urllib.parse
|
|
10
11
|
import urllib.request
|
|
11
|
-
|
|
12
|
+
import uuid
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, ClassVar, Iterable, Literal, Mapping, Sequence, Union
|
|
15
|
+
|
|
16
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
12
17
|
|
|
13
18
|
import av
|
|
14
19
|
import jsonschema
|
|
@@ -21,10 +26,9 @@ import sqlalchemy as sql
|
|
|
21
26
|
from typing_extensions import _AnnotatedAlias
|
|
22
27
|
|
|
23
28
|
import pixeltable.exceptions as excs
|
|
29
|
+
from pixeltable.env import Env
|
|
24
30
|
from pixeltable.utils import parse_local_file_path
|
|
25
31
|
|
|
26
|
-
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
27
|
-
|
|
28
32
|
|
|
29
33
|
class ColumnType:
|
|
30
34
|
@enum.unique
|
|
@@ -41,6 +45,8 @@ class ColumnType:
|
|
|
41
45
|
AUDIO = 9
|
|
42
46
|
DOCUMENT = 10
|
|
43
47
|
DATE = 11
|
|
48
|
+
UUID = 12
|
|
49
|
+
BINARY = 13
|
|
44
50
|
|
|
45
51
|
# exprs that don't evaluate to a computable value in Pixeltable, such as an Image member function
|
|
46
52
|
INVALID = 255
|
|
@@ -48,11 +54,11 @@ class ColumnType:
|
|
|
48
54
|
@classmethod
|
|
49
55
|
def supertype(
|
|
50
56
|
cls,
|
|
51
|
-
type1:
|
|
52
|
-
type2:
|
|
57
|
+
type1: 'ColumnType.Type' | None,
|
|
58
|
+
type2: 'ColumnType.Type' | None,
|
|
53
59
|
# we need to pass this in because we can't easily append it as a class member
|
|
54
60
|
common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
|
|
55
|
-
) ->
|
|
61
|
+
) -> 'ColumnType.Type' | None:
|
|
56
62
|
if type1 == type2:
|
|
57
63
|
return type1
|
|
58
64
|
t = common_supertypes.get((type1, type2))
|
|
@@ -63,26 +69,8 @@ class ColumnType:
|
|
|
63
69
|
return t
|
|
64
70
|
return None
|
|
65
71
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
"""
|
|
69
|
-
Base type used in images and arrays
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
BOOL = (0,)
|
|
73
|
-
INT8 = (1,)
|
|
74
|
-
INT16 = (2,)
|
|
75
|
-
INT32 = (3,)
|
|
76
|
-
INT64 = (4,)
|
|
77
|
-
UINT8 = (5,)
|
|
78
|
-
UINT16 = (6,)
|
|
79
|
-
UINT32 = (7,)
|
|
80
|
-
UINT64 = (8,)
|
|
81
|
-
FLOAT16 = (9,)
|
|
82
|
-
FLOAT32 = (10,)
|
|
83
|
-
FLOAT64 = 11
|
|
84
|
-
|
|
85
|
-
scalar_types: ClassVar[set[Type]] = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP, Type.DATE}
|
|
72
|
+
scalar_json_types: ClassVar[set[Type]] = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL}
|
|
73
|
+
scalar_types: ClassVar[set[Type]] = scalar_json_types | {Type.TIMESTAMP, Type.DATE, Type.UUID}
|
|
86
74
|
numeric_types: ClassVar[set[Type]] = {Type.INT, Type.FLOAT}
|
|
87
75
|
common_supertypes: ClassVar[dict[tuple[Type, Type], Type]] = {
|
|
88
76
|
(Type.BOOL, Type.INT): Type.INT,
|
|
@@ -151,31 +139,37 @@ class ColumnType:
|
|
|
151
139
|
|
|
152
140
|
@classmethod
|
|
153
141
|
def make_type(cls, t: Type) -> ColumnType:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
142
|
+
match t:
|
|
143
|
+
case cls.Type.STRING:
|
|
144
|
+
return StringType()
|
|
145
|
+
case cls.Type.INT:
|
|
146
|
+
return IntType()
|
|
147
|
+
case cls.Type.FLOAT:
|
|
148
|
+
return FloatType()
|
|
149
|
+
case cls.Type.BOOL:
|
|
150
|
+
return BoolType()
|
|
151
|
+
case cls.Type.TIMESTAMP:
|
|
152
|
+
return TimestampType()
|
|
153
|
+
case cls.Type.JSON:
|
|
154
|
+
return JsonType()
|
|
155
|
+
case cls.Type.ARRAY:
|
|
156
|
+
return ArrayType()
|
|
157
|
+
case cls.Type.IMAGE:
|
|
158
|
+
return ImageType()
|
|
159
|
+
case cls.Type.VIDEO:
|
|
160
|
+
return VideoType()
|
|
161
|
+
case cls.Type.AUDIO:
|
|
162
|
+
return AudioType()
|
|
163
|
+
case cls.Type.DOCUMENT:
|
|
164
|
+
return DocumentType()
|
|
165
|
+
case cls.Type.DATE:
|
|
166
|
+
return DateType()
|
|
167
|
+
case cls.Type.UUID:
|
|
168
|
+
return UUIDType()
|
|
169
|
+
case cls.Type.BINARY:
|
|
170
|
+
return BinaryType()
|
|
171
|
+
case _:
|
|
172
|
+
raise AssertionError(t)
|
|
179
173
|
|
|
180
174
|
def __repr__(self) -> str:
|
|
181
175
|
return self._to_str(as_schema=False)
|
|
@@ -185,7 +179,7 @@ class ColumnType:
|
|
|
185
179
|
if as_schema:
|
|
186
180
|
return base_str if self.nullable else f'Required[{base_str}]'
|
|
187
181
|
else:
|
|
188
|
-
return f'
|
|
182
|
+
return f'{base_str} | None' if self.nullable else base_str
|
|
189
183
|
|
|
190
184
|
def _to_base_str(self) -> str:
|
|
191
185
|
"""
|
|
@@ -214,7 +208,13 @@ class ColumnType:
|
|
|
214
208
|
# Default: just compare base types (this works for all types whose only parameter is nullable)
|
|
215
209
|
return self._type == other._type
|
|
216
210
|
|
|
217
|
-
def supertype(self, other: ColumnType) ->
|
|
211
|
+
def supertype(self, other: ColumnType, for_inference: bool = False) -> ColumnType | None:
|
|
212
|
+
"""
|
|
213
|
+
Returns the most specific type that is a supertype of both `self` and `other`.
|
|
214
|
+
|
|
215
|
+
If `for_inference=True`, then we disallow certain type relationships that are technically correct, but may
|
|
216
|
+
be confusing for schema inference during data imports.
|
|
217
|
+
"""
|
|
218
218
|
if self == other:
|
|
219
219
|
return self
|
|
220
220
|
if self.matches(other):
|
|
@@ -229,12 +229,20 @@ class ColumnType:
|
|
|
229
229
|
t = self.Type.supertype(self._type, other._type, self.common_supertypes)
|
|
230
230
|
if t is not None:
|
|
231
231
|
return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
|
|
232
|
-
|
|
232
|
+
|
|
233
|
+
# If we see a mix of JSON and/or JSON-compatible scalar types, resolve to JSON.
|
|
234
|
+
# (For JSON+JSON, we return None to allow JsonType to handle merging the type schemas.)
|
|
235
|
+
if not for_inference and (
|
|
236
|
+
(self.is_json_type() and other.is_scalar_json_type())
|
|
237
|
+
or (self.is_scalar_json_type() and other.is_json_type())
|
|
238
|
+
or (self.is_scalar_json_type() and other.is_scalar_json_type())
|
|
239
|
+
):
|
|
240
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
233
241
|
|
|
234
242
|
return None
|
|
235
243
|
|
|
236
244
|
@classmethod
|
|
237
|
-
def infer_literal_type(cls, val: Any, nullable: bool = False) ->
|
|
245
|
+
def infer_literal_type(cls, val: Any, nullable: bool = False) -> ColumnType | None:
|
|
238
246
|
if val is None:
|
|
239
247
|
return InvalidType(nullable=True)
|
|
240
248
|
if isinstance(val, str):
|
|
@@ -252,6 +260,10 @@ class ColumnType:
|
|
|
252
260
|
return TimestampType(nullable=nullable)
|
|
253
261
|
if isinstance(val, datetime.date):
|
|
254
262
|
return DateType(nullable=nullable)
|
|
263
|
+
if isinstance(val, uuid.UUID):
|
|
264
|
+
return UUIDType(nullable=nullable)
|
|
265
|
+
if isinstance(val, bytes):
|
|
266
|
+
return BinaryType(nullable=nullable)
|
|
255
267
|
if isinstance(val, PIL.Image.Image):
|
|
256
268
|
return ImageType(width=val.width, height=val.height, mode=val.mode, nullable=nullable)
|
|
257
269
|
if isinstance(val, np.ndarray):
|
|
@@ -268,7 +280,7 @@ class ColumnType:
|
|
|
268
280
|
return None
|
|
269
281
|
|
|
270
282
|
@classmethod
|
|
271
|
-
def infer_common_literal_type(cls, vals: Iterable[Any]) ->
|
|
283
|
+
def infer_common_literal_type(cls, vals: Iterable[Any]) -> ColumnType | None:
|
|
272
284
|
"""
|
|
273
285
|
Returns the most specific type that is a supertype of all literals in `vals`. If no such type
|
|
274
286
|
exists, returns None.
|
|
@@ -276,13 +288,13 @@ class ColumnType:
|
|
|
276
288
|
Args:
|
|
277
289
|
vals: A collection of literals.
|
|
278
290
|
"""
|
|
279
|
-
inferred_type:
|
|
291
|
+
inferred_type: ColumnType | None = None
|
|
280
292
|
for val in vals:
|
|
281
293
|
val_type = cls.infer_literal_type(val)
|
|
282
294
|
if inferred_type is None:
|
|
283
295
|
inferred_type = val_type
|
|
284
296
|
else:
|
|
285
|
-
inferred_type = inferred_type.supertype(val_type)
|
|
297
|
+
inferred_type = inferred_type.supertype(val_type, for_inference=True)
|
|
286
298
|
if inferred_type is None:
|
|
287
299
|
return None
|
|
288
300
|
if not inferred_type.has_supertype():
|
|
@@ -291,8 +303,12 @@ class ColumnType:
|
|
|
291
303
|
|
|
292
304
|
@classmethod
|
|
293
305
|
def from_python_type(
|
|
294
|
-
cls,
|
|
295
|
-
|
|
306
|
+
cls,
|
|
307
|
+
t: type | _GenericAlias,
|
|
308
|
+
nullable_default: bool = False,
|
|
309
|
+
allow_builtin_types: bool = True,
|
|
310
|
+
infer_pydantic_json: bool = False,
|
|
311
|
+
) -> ColumnType | None:
|
|
296
312
|
"""
|
|
297
313
|
Convert a Python type into a Pixeltable `ColumnType` instance.
|
|
298
314
|
|
|
@@ -304,16 +320,20 @@ class ColumnType:
|
|
|
304
320
|
allowed (as in UDF definitions). If False, then only Pixeltable types such as `pxt.String`,
|
|
305
321
|
`pxt.Int`, etc., will be allowed (as in schema definitions). `Optional` and `Required`
|
|
306
322
|
designations will be allowed regardless.
|
|
323
|
+
infer_pydantic_json: If True, accepts an extended set of built-ins (eg, Enum, Path) and returns the type to
|
|
324
|
+
which pydantic.BaseModel.model_dump(mode='json') serializes it.
|
|
307
325
|
"""
|
|
308
326
|
origin = typing.get_origin(t)
|
|
309
327
|
type_args = typing.get_args(t)
|
|
310
|
-
if origin
|
|
311
|
-
# Check if `t` has the form
|
|
328
|
+
if origin in (typing.Union, types.UnionType):
|
|
329
|
+
# Check if `t` has the form T | None.
|
|
312
330
|
if len(type_args) == 2 and type(None) in type_args:
|
|
313
|
-
# `t` is a type of the form
|
|
331
|
+
# `t` is a type of the form T | None (equivalently, T | None or None | T).
|
|
314
332
|
# We treat it as the underlying type but with nullable=True.
|
|
315
333
|
underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
|
|
316
|
-
underlying = cls.from_python_type(
|
|
334
|
+
underlying = cls.from_python_type(
|
|
335
|
+
underlying_py_type, allow_builtin_types=allow_builtin_types, infer_pydantic_json=infer_pydantic_json
|
|
336
|
+
)
|
|
317
337
|
if underlying is not None:
|
|
318
338
|
return underlying.copy(nullable=True)
|
|
319
339
|
elif origin is Required:
|
|
@@ -327,7 +347,7 @@ class ColumnType:
|
|
|
327
347
|
if isinstance(parameters, ColumnType):
|
|
328
348
|
return parameters.copy(nullable=nullable_default)
|
|
329
349
|
else:
|
|
330
|
-
# It's something other than
|
|
350
|
+
# It's something other than T | None, Required[T], or an explicitly annotated type.
|
|
331
351
|
if origin is not None:
|
|
332
352
|
# Discard type parameters to ensure that parameterized types such as `list[T]`
|
|
333
353
|
# are correctly mapped to Pixeltable types.
|
|
@@ -340,6 +360,13 @@ class ColumnType:
|
|
|
340
360
|
if literal_type is None:
|
|
341
361
|
return None
|
|
342
362
|
return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
|
|
363
|
+
if infer_pydantic_json and isinstance(t, type) and issubclass(t, enum.Enum):
|
|
364
|
+
literal_type = cls.infer_common_literal_type(member.value for member in t)
|
|
365
|
+
if literal_type is None:
|
|
366
|
+
return None
|
|
367
|
+
return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
|
|
368
|
+
if infer_pydantic_json and t is Path:
|
|
369
|
+
return StringType(nullable=nullable_default)
|
|
343
370
|
if t is str:
|
|
344
371
|
return StringType(nullable=nullable_default)
|
|
345
372
|
if t is int:
|
|
@@ -352,6 +379,10 @@ class ColumnType:
|
|
|
352
379
|
return TimestampType(nullable=nullable_default)
|
|
353
380
|
if t is datetime.date:
|
|
354
381
|
return DateType(nullable=nullable_default)
|
|
382
|
+
if t is uuid.UUID:
|
|
383
|
+
return UUIDType(nullable=nullable_default)
|
|
384
|
+
if t is bytes:
|
|
385
|
+
return BinaryType(nullable=nullable_default)
|
|
355
386
|
if t is PIL.Image.Image:
|
|
356
387
|
return ImageType(nullable=nullable_default)
|
|
357
388
|
if isinstance(t, type) and issubclass(t, (Sequence, Mapping, pydantic.BaseModel)):
|
|
@@ -360,10 +391,7 @@ class ColumnType:
|
|
|
360
391
|
|
|
361
392
|
@classmethod
|
|
362
393
|
def normalize_type(
|
|
363
|
-
cls,
|
|
364
|
-
t: Union[ColumnType, type, _AnnotatedAlias],
|
|
365
|
-
nullable_default: bool = False,
|
|
366
|
-
allow_builtin_types: bool = True,
|
|
394
|
+
cls, t: ColumnType | type | _AnnotatedAlias, nullable_default: bool = False, allow_builtin_types: bool = True
|
|
367
395
|
) -> ColumnType:
|
|
368
396
|
"""
|
|
369
397
|
Convert any type recognizable by Pixeltable to its corresponding ColumnType.
|
|
@@ -382,19 +410,51 @@ class ColumnType:
|
|
|
382
410
|
(float, 'pxt.Float'),
|
|
383
411
|
(datetime.datetime, 'pxt.Timestamp'),
|
|
384
412
|
(datetime.date, 'pxt.Date'),
|
|
413
|
+
(uuid.UUID, 'pxt.UUID'),
|
|
385
414
|
(PIL.Image.Image, 'pxt.Image'),
|
|
415
|
+
(bytes, 'pxt.Binary'),
|
|
386
416
|
(Sequence, 'pxt.Json'),
|
|
387
417
|
(Mapping, 'pxt.Json'),
|
|
388
418
|
]
|
|
389
419
|
|
|
390
420
|
@classmethod
|
|
391
|
-
def __raise_exc_for_invalid_type(cls, t:
|
|
421
|
+
def __raise_exc_for_invalid_type(cls, t: type | _AnnotatedAlias) -> None:
|
|
392
422
|
for builtin_type, suggestion in cls.__TYPE_SUGGESTIONS:
|
|
393
423
|
if t is builtin_type or (isinstance(t, type) and issubclass(t, builtin_type)):
|
|
394
424
|
name = t.__name__ if t.__module__ == 'builtins' else f'{t.__module__}.{t.__name__}'
|
|
395
425
|
raise excs.Error(f'Standard Python type `{name}` cannot be used here; use `{suggestion}` instead')
|
|
396
426
|
raise excs.Error(f'Unknown type: {t}')
|
|
397
427
|
|
|
428
|
+
@classmethod
|
|
429
|
+
def from_json_schema(cls, schema: dict[str, Any]) -> ColumnType | None:
|
|
430
|
+
# We first express the JSON schema as a Python type, and then convert it to a Pixeltable type.
|
|
431
|
+
# TODO: Is there a meaningful fallback if one of these operations fails? (Maybe another use case for a pxt Any
|
|
432
|
+
# type?)
|
|
433
|
+
py_type = cls.__json_schema_to_py_type(schema)
|
|
434
|
+
return cls.from_python_type(py_type) if py_type is not None else None
|
|
435
|
+
|
|
436
|
+
@classmethod
|
|
437
|
+
def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> type | _GenericAlias | None:
|
|
438
|
+
if 'type' in schema:
|
|
439
|
+
if schema['type'] == 'null':
|
|
440
|
+
return type(None)
|
|
441
|
+
if schema['type'] == 'string':
|
|
442
|
+
return str
|
|
443
|
+
if schema['type'] == 'integer':
|
|
444
|
+
return int
|
|
445
|
+
if schema['type'] == 'number':
|
|
446
|
+
return float
|
|
447
|
+
if schema['type'] == 'boolean':
|
|
448
|
+
return bool
|
|
449
|
+
if schema['type'] in ('array', 'object'):
|
|
450
|
+
return list
|
|
451
|
+
elif 'anyOf' in schema:
|
|
452
|
+
subscripts = tuple(cls.__json_schema_to_py_type(subschema) for subschema in schema['anyOf'])
|
|
453
|
+
if all(subscript is not None for subscript in subscripts):
|
|
454
|
+
return Union[subscripts]
|
|
455
|
+
|
|
456
|
+
return None
|
|
457
|
+
|
|
398
458
|
def validate_literal(self, val: Any) -> None:
|
|
399
459
|
"""Raise TypeError if val is not a valid literal for this type"""
|
|
400
460
|
if val is None:
|
|
@@ -443,6 +503,9 @@ class ColumnType:
|
|
|
443
503
|
def is_scalar_type(self) -> bool:
|
|
444
504
|
return self._type in self.scalar_types
|
|
445
505
|
|
|
506
|
+
def is_scalar_json_type(self) -> bool:
|
|
507
|
+
return self._type in self.scalar_json_types
|
|
508
|
+
|
|
446
509
|
def is_numeric_type(self) -> bool:
|
|
447
510
|
return self._type in self.numeric_types
|
|
448
511
|
|
|
@@ -467,12 +530,18 @@ class ColumnType:
|
|
|
467
530
|
def is_date_type(self) -> bool:
|
|
468
531
|
return self._type == self.Type.DATE
|
|
469
532
|
|
|
533
|
+
def is_uuid_type(self) -> bool:
|
|
534
|
+
return self._type == self.Type.UUID
|
|
535
|
+
|
|
470
536
|
def is_json_type(self) -> bool:
|
|
471
537
|
return self._type == self.Type.JSON
|
|
472
538
|
|
|
473
539
|
def is_array_type(self) -> bool:
|
|
474
540
|
return self._type == self.Type.ARRAY
|
|
475
541
|
|
|
542
|
+
def is_binary_type(self) -> bool:
|
|
543
|
+
return self._type == self.Type.BINARY
|
|
544
|
+
|
|
476
545
|
def is_image_type(self) -> bool:
|
|
477
546
|
return self._type == self.Type.IMAGE
|
|
478
547
|
|
|
@@ -489,6 +558,10 @@ class ColumnType:
|
|
|
489
558
|
# types that refer to external media files
|
|
490
559
|
return self.is_image_type() or self.is_video_type() or self.is_audio_type() or self.is_document_type()
|
|
491
560
|
|
|
561
|
+
def supports_file_offloading(self) -> bool:
|
|
562
|
+
# types that can be offloaded to file-based storage via a CellMaterializationNode
|
|
563
|
+
return self.is_array_type() or self.is_json_type() or self.is_binary_type()
|
|
564
|
+
|
|
492
565
|
@classmethod
|
|
493
566
|
@abc.abstractmethod
|
|
494
567
|
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
@@ -505,6 +578,35 @@ class ColumnType:
|
|
|
505
578
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
506
579
|
raise excs.Error(f'Pixeltable type {self} is not a valid JSON type')
|
|
507
580
|
|
|
581
|
+
@classmethod
|
|
582
|
+
def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> ColumnType | None:
|
|
583
|
+
"""
|
|
584
|
+
Return pixeltable type corresponding to a given simple numpy dtype
|
|
585
|
+
"""
|
|
586
|
+
if np.issubdtype(dtype, np.integer):
|
|
587
|
+
return IntType(nullable=nullable)
|
|
588
|
+
|
|
589
|
+
if np.issubdtype(dtype, np.floating):
|
|
590
|
+
return FloatType(nullable=nullable)
|
|
591
|
+
|
|
592
|
+
if dtype == np.bool_:
|
|
593
|
+
return BoolType(nullable=nullable)
|
|
594
|
+
|
|
595
|
+
if np.issubdtype(dtype, np.str_):
|
|
596
|
+
return StringType(nullable=nullable)
|
|
597
|
+
|
|
598
|
+
if np.issubdtype(dtype, np.character):
|
|
599
|
+
return StringType(nullable=nullable)
|
|
600
|
+
|
|
601
|
+
if np.issubdtype(dtype, np.datetime64):
|
|
602
|
+
unit, _ = np.datetime_data(dtype)
|
|
603
|
+
if unit in ('D', 'M', 'Y'):
|
|
604
|
+
return DateType(nullable=nullable)
|
|
605
|
+
else:
|
|
606
|
+
return TimestampType(nullable=nullable)
|
|
607
|
+
|
|
608
|
+
return None
|
|
609
|
+
|
|
508
610
|
|
|
509
611
|
class InvalidType(ColumnType):
|
|
510
612
|
def __init__(self, nullable: bool = False):
|
|
@@ -629,8 +731,9 @@ class TimestampType(ColumnType):
|
|
|
629
731
|
def _create_literal(self, val: Any) -> Any:
|
|
630
732
|
if isinstance(val, str):
|
|
631
733
|
return datetime.datetime.fromisoformat(val)
|
|
632
|
-
|
|
633
|
-
|
|
734
|
+
# Place naive timestamps in the default time zone
|
|
735
|
+
if isinstance(val, datetime.datetime) and val.tzinfo is None:
|
|
736
|
+
return val.replace(tzinfo=Env.get().default_time_zone)
|
|
634
737
|
return val
|
|
635
738
|
|
|
636
739
|
|
|
@@ -657,11 +760,57 @@ class DateType(ColumnType):
|
|
|
657
760
|
return val
|
|
658
761
|
|
|
659
762
|
|
|
763
|
+
class UUIDType(ColumnType):
|
|
764
|
+
def __init__(self, nullable: bool = False):
|
|
765
|
+
super().__init__(self.Type.UUID, nullable=nullable)
|
|
766
|
+
|
|
767
|
+
def has_supertype(self) -> bool:
|
|
768
|
+
return not self.nullable
|
|
769
|
+
|
|
770
|
+
@classmethod
|
|
771
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
772
|
+
return sql.UUID(as_uuid=True)
|
|
773
|
+
|
|
774
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
775
|
+
return {'type': 'string', 'format': 'uuid'}
|
|
776
|
+
|
|
777
|
+
def print_value(self, val: Any) -> str:
|
|
778
|
+
return f"'{val}'"
|
|
779
|
+
|
|
780
|
+
def _to_base_str(self) -> str:
|
|
781
|
+
return 'UUID'
|
|
782
|
+
|
|
783
|
+
def _validate_literal(self, val: Any) -> None:
|
|
784
|
+
if not isinstance(val, uuid.UUID):
|
|
785
|
+
raise TypeError(f'Expected uuid.UUID, got {val.__class__.__name__}')
|
|
786
|
+
|
|
787
|
+
def _create_literal(self, val: Any) -> Any:
|
|
788
|
+
if isinstance(val, str):
|
|
789
|
+
return uuid.UUID(val)
|
|
790
|
+
return val
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
class BinaryType(ColumnType):
|
|
794
|
+
def __init__(self, nullable: bool = False):
|
|
795
|
+
super().__init__(self.Type.BINARY, nullable=nullable)
|
|
796
|
+
|
|
797
|
+
@classmethod
|
|
798
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
799
|
+
return sql.LargeBinary()
|
|
800
|
+
|
|
801
|
+
def _to_base_str(self) -> str:
|
|
802
|
+
return 'Binary'
|
|
803
|
+
|
|
804
|
+
def _validate_literal(self, val: Any) -> None:
|
|
805
|
+
if not isinstance(val, bytes):
|
|
806
|
+
raise TypeError(f'Expected `bytes`, got `{val.__class__.__name__}`')
|
|
807
|
+
|
|
808
|
+
|
|
660
809
|
class JsonType(ColumnType):
|
|
661
|
-
json_schema:
|
|
662
|
-
__validator:
|
|
810
|
+
json_schema: dict[str, Any] | None
|
|
811
|
+
__validator: jsonschema.protocols.Validator | None
|
|
663
812
|
|
|
664
|
-
def __init__(self, json_schema:
|
|
813
|
+
def __init__(self, json_schema: dict[str, Any] | None = None, nullable: bool = False):
|
|
665
814
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
666
815
|
self.json_schema = json_schema
|
|
667
816
|
if json_schema is None:
|
|
@@ -716,7 +865,7 @@ class JsonType(ColumnType):
|
|
|
716
865
|
|
|
717
866
|
@classmethod
|
|
718
867
|
def __is_valid_json(cls, val: Any) -> bool:
|
|
719
|
-
if val is None or isinstance(val, (str, int, float, bool)):
|
|
868
|
+
if val is None or isinstance(val, (str, int, float, bool, np.ndarray, PIL.Image.Image, bytes)):
|
|
720
869
|
return True
|
|
721
870
|
if isinstance(val, (list, tuple)):
|
|
722
871
|
return all(cls.__is_valid_json(v) for v in val)
|
|
@@ -731,7 +880,7 @@ class JsonType(ColumnType):
|
|
|
731
880
|
return val.model_dump()
|
|
732
881
|
return val
|
|
733
882
|
|
|
734
|
-
def supertype(self, other: ColumnType) ->
|
|
883
|
+
def supertype(self, other: ColumnType, for_inference: bool = False) -> JsonType | None:
|
|
735
884
|
# Try using the (much faster) supertype logic in ColumnType first. That will work if, for example, the types
|
|
736
885
|
# are identical except for nullability. If that doesn't work and both types are JsonType, then we will need to
|
|
737
886
|
# merge their schemas.
|
|
@@ -753,7 +902,7 @@ class JsonType(ColumnType):
|
|
|
753
902
|
)
|
|
754
903
|
|
|
755
904
|
@classmethod
|
|
756
|
-
def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) ->
|
|
905
|
+
def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
|
|
757
906
|
# Defining a general type hierarchy over all JSON schemas would be a challenging problem. In order to keep
|
|
758
907
|
# things manageable, we only define a hierarchy among "conforming" schemas, which provides enough generality
|
|
759
908
|
# for the most important use cases (unions for type inference, validation of inline exprs). A schema is
|
|
@@ -813,7 +962,7 @@ class JsonType(ColumnType):
|
|
|
813
962
|
return {} # Unresolvable type conflict; the supertype is an unrestricted JsonType.
|
|
814
963
|
|
|
815
964
|
@classmethod
|
|
816
|
-
def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) ->
|
|
965
|
+
def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
|
|
817
966
|
a, a_nullable = cls.__unpack_null_from_schema(a)
|
|
818
967
|
b, b_nullable = cls.__unpack_null_from_schema(b)
|
|
819
968
|
|
|
@@ -841,33 +990,64 @@ class JsonType(ColumnType):
|
|
|
841
990
|
return f'Json[{self.json_schema}]'
|
|
842
991
|
|
|
843
992
|
|
|
993
|
+
ARRAY_SUPPORTED_NUMPY_DTYPES = [
|
|
994
|
+
np.bool_,
|
|
995
|
+
np.uint8,
|
|
996
|
+
np.uint16,
|
|
997
|
+
np.uint32,
|
|
998
|
+
np.uint64,
|
|
999
|
+
np.int8,
|
|
1000
|
+
np.int16,
|
|
1001
|
+
np.int32,
|
|
1002
|
+
np.int64,
|
|
1003
|
+
np.float16,
|
|
1004
|
+
np.float32,
|
|
1005
|
+
np.float64,
|
|
1006
|
+
np.str_,
|
|
1007
|
+
]
|
|
1008
|
+
|
|
1009
|
+
|
|
844
1010
|
class ArrayType(ColumnType):
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
1011
|
+
pxt_dtype_to_numpy_dtype: ClassVar[dict[ColumnType.Type, np.dtype]] = {
|
|
1012
|
+
ColumnType.Type.INT: np.dtype(np.int64),
|
|
1013
|
+
ColumnType.Type.FLOAT: np.dtype(np.float32),
|
|
1014
|
+
ColumnType.Type.BOOL: np.dtype(np.bool_),
|
|
1015
|
+
ColumnType.Type.STRING: np.dtype(np.str_),
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
shape: tuple[int | None, ...] | None
|
|
1019
|
+
dtype: np.dtype | None
|
|
848
1020
|
|
|
849
1021
|
def __init__(
|
|
850
1022
|
self,
|
|
851
|
-
shape:
|
|
852
|
-
dtype:
|
|
1023
|
+
shape: tuple[int | None, ...] | None = None,
|
|
1024
|
+
dtype: ColumnType | np.dtype | None = None,
|
|
853
1025
|
nullable: bool = False,
|
|
854
1026
|
):
|
|
855
1027
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
856
1028
|
assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
|
|
857
|
-
assert (
|
|
858
|
-
dtype is None
|
|
859
|
-
or dtype.is_int_type()
|
|
860
|
-
or dtype.is_float_type()
|
|
861
|
-
or dtype.is_bool_type()
|
|
862
|
-
or dtype.is_string_type()
|
|
863
|
-
)
|
|
864
|
-
|
|
865
1029
|
self.shape = shape
|
|
866
|
-
|
|
867
|
-
|
|
1030
|
+
if dtype is None:
|
|
1031
|
+
self.dtype = None
|
|
1032
|
+
elif isinstance(dtype, np.dtype):
|
|
1033
|
+
# Numpy string has some specifications (endianness, max length, encoding) that we don't support, so we just
|
|
1034
|
+
# strip them out.
|
|
1035
|
+
if dtype.type == np.str_:
|
|
1036
|
+
self.dtype = np.dtype(np.str_)
|
|
1037
|
+
else:
|
|
1038
|
+
if dtype not in ARRAY_SUPPORTED_NUMPY_DTYPES:
|
|
1039
|
+
raise ValueError(f'Unsupported dtype: {dtype}')
|
|
1040
|
+
self.dtype = dtype
|
|
1041
|
+
elif isinstance(dtype, ColumnType):
|
|
1042
|
+
self.dtype = self.pxt_dtype_to_numpy_dtype.get(dtype._type, None)
|
|
1043
|
+
if self.dtype is None:
|
|
1044
|
+
raise ValueError(f'Unsupported dtype: {dtype}')
|
|
1045
|
+
assert self.dtype in ARRAY_SUPPORTED_NUMPY_DTYPES
|
|
1046
|
+
else:
|
|
1047
|
+
raise ValueError(f'Unsupported dtype: {dtype}')
|
|
868
1048
|
|
|
869
1049
|
def copy(self, nullable: bool) -> ColumnType:
|
|
870
|
-
return ArrayType(self.shape, self.
|
|
1050
|
+
return ArrayType(self.shape, self.dtype, nullable=nullable)
|
|
871
1051
|
|
|
872
1052
|
def matches(self, other: ColumnType) -> bool:
|
|
873
1053
|
return isinstance(other, ArrayType) and self.shape == other.shape and self.dtype == other.dtype
|
|
@@ -875,7 +1055,7 @@ class ArrayType(ColumnType):
|
|
|
875
1055
|
def __hash__(self) -> int:
|
|
876
1056
|
return hash((self._type, self.nullable, self.shape, self.dtype))
|
|
877
1057
|
|
|
878
|
-
def supertype(self, other: ColumnType) ->
|
|
1058
|
+
def supertype(self, other: ColumnType, for_inference: bool = False) -> ArrayType | None:
|
|
879
1059
|
basic_supertype = super().supertype(other)
|
|
880
1060
|
if basic_supertype is not None:
|
|
881
1061
|
assert isinstance(basic_supertype, ArrayType)
|
|
@@ -884,162 +1064,121 @@ class ArrayType(ColumnType):
|
|
|
884
1064
|
if not isinstance(other, ArrayType):
|
|
885
1065
|
return None
|
|
886
1066
|
|
|
887
|
-
|
|
888
|
-
if
|
|
889
|
-
|
|
1067
|
+
# Supertype has dtype only if dtypes are identical. We can change this behavior to consider casting rules or
|
|
1068
|
+
# something else if there's demand for it.
|
|
1069
|
+
if self.dtype != other.dtype:
|
|
890
1070
|
return ArrayType(nullable=(self.nullable or other.nullable))
|
|
891
|
-
|
|
1071
|
+
super_dtype = self.dtype
|
|
1072
|
+
|
|
1073
|
+
# Determine the shape of the supertype
|
|
1074
|
+
super_shape: tuple[int | None, ...] | None
|
|
892
1075
|
if self.shape is None or other.shape is None or len(self.shape) != len(other.shape):
|
|
893
1076
|
super_shape = None
|
|
894
1077
|
else:
|
|
895
1078
|
super_shape = tuple(n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape))
|
|
896
|
-
return ArrayType(super_shape,
|
|
1079
|
+
return ArrayType(super_shape, super_dtype, nullable=(self.nullable or other.nullable))
|
|
897
1080
|
|
|
898
1081
|
def _as_dict(self) -> dict:
|
|
899
1082
|
result = super()._as_dict()
|
|
900
1083
|
shape_as_list = None if self.shape is None else list(self.shape)
|
|
901
|
-
|
|
902
|
-
|
|
1084
|
+
result.update(shape=shape_as_list)
|
|
1085
|
+
|
|
1086
|
+
if self.dtype is None:
|
|
1087
|
+
result.update(numpy_dtype=None)
|
|
1088
|
+
elif self.dtype == np.str_:
|
|
1089
|
+
# str(np.str_) would be something like '<U', but since we don't support the string specifications, just use
|
|
1090
|
+
# 'str' instead to avoid confusion.
|
|
1091
|
+
result.update(numpy_dtype='str')
|
|
1092
|
+
else:
|
|
1093
|
+
result.update(numpy_dtype=str(self.dtype))
|
|
903
1094
|
return result
|
|
904
1095
|
|
|
905
1096
|
def _to_base_str(self) -> str:
|
|
906
1097
|
if self.shape is None and self.dtype is None:
|
|
907
1098
|
return 'Array'
|
|
908
1099
|
if self.shape is None:
|
|
909
|
-
return f'Array[{self.
|
|
1100
|
+
return f'Array[{self.dtype.name}]'
|
|
910
1101
|
assert self.dtype is not None
|
|
911
|
-
return f'Array[{self.shape}, {self.
|
|
1102
|
+
return f'Array[{self.shape}, {self.dtype.name}]'
|
|
912
1103
|
|
|
913
1104
|
@classmethod
|
|
914
1105
|
def _from_dict(cls, d: dict) -> ColumnType:
|
|
1106
|
+
assert 'numpy_dtype' in d
|
|
1107
|
+
dtype = None if d['numpy_dtype'] is None else np.dtype(d['numpy_dtype'])
|
|
915
1108
|
assert 'shape' in d
|
|
916
|
-
assert 'dtype' in d
|
|
917
1109
|
shape = None if d['shape'] is None else tuple(d['shape'])
|
|
918
|
-
dtype = None if d['dtype'] is None else cls.make_type(cls.Type(d['dtype']))
|
|
919
1110
|
return cls(shape, dtype, nullable=d['nullable'])
|
|
920
1111
|
|
|
921
1112
|
@classmethod
|
|
922
|
-
def
|
|
923
|
-
"""
|
|
924
|
-
Return pixeltable type corresponding to a given simple numpy dtype
|
|
925
|
-
"""
|
|
926
|
-
if np.issubdtype(dtype, np.integer):
|
|
927
|
-
return IntType(nullable=nullable)
|
|
928
|
-
|
|
929
|
-
if np.issubdtype(dtype, np.floating):
|
|
930
|
-
return FloatType(nullable=nullable)
|
|
931
|
-
|
|
932
|
-
if dtype == np.bool_:
|
|
933
|
-
return BoolType(nullable=nullable)
|
|
934
|
-
|
|
935
|
-
if np.issubdtype(dtype, np.str_):
|
|
936
|
-
return StringType(nullable=nullable)
|
|
937
|
-
|
|
938
|
-
if np.issubdtype(dtype, np.character):
|
|
939
|
-
return StringType(nullable=nullable)
|
|
940
|
-
|
|
941
|
-
if np.issubdtype(dtype, np.datetime64):
|
|
942
|
-
unit, _ = np.datetime_data(dtype)
|
|
943
|
-
if unit in ['D', 'M', 'Y']:
|
|
944
|
-
return DateType(nullable=nullable)
|
|
945
|
-
else:
|
|
946
|
-
return TimestampType(nullable=nullable)
|
|
947
|
-
|
|
948
|
-
return None
|
|
949
|
-
|
|
950
|
-
@classmethod
|
|
951
|
-
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
|
|
952
|
-
# determine our dtype
|
|
1113
|
+
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> ArrayType | None:
|
|
953
1114
|
assert isinstance(val, np.ndarray)
|
|
954
|
-
|
|
955
|
-
if pxttype is None:
|
|
1115
|
+
if val.dtype.type not in ARRAY_SUPPORTED_NUMPY_DTYPES:
|
|
956
1116
|
return None
|
|
957
|
-
return cls(val.shape, dtype=
|
|
1117
|
+
return cls(val.shape, dtype=val.dtype, nullable=nullable)
|
|
958
1118
|
|
|
959
|
-
def
|
|
1119
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
1120
|
+
schema: dict[str, Any] = {'type': 'array'}
|
|
1121
|
+
if self.dtype == np.str_:
|
|
1122
|
+
schema.update({'items': {'type': 'str'}})
|
|
1123
|
+
elif self.dtype is not None:
|
|
1124
|
+
schema.update({'items': {'type': str(self.dtype)}})
|
|
1125
|
+
return schema
|
|
1126
|
+
|
|
1127
|
+
def _validate_literal(self, val: Any) -> None:
|
|
960
1128
|
if not isinstance(val, np.ndarray):
|
|
961
|
-
|
|
1129
|
+
raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
|
|
962
1130
|
|
|
963
|
-
# If a dtype
|
|
964
|
-
if self.dtype
|
|
965
|
-
|
|
1131
|
+
# If column type has a dtype, check if it matches
|
|
1132
|
+
if self.dtype == np.str_:
|
|
1133
|
+
if val.dtype.type != np.str_:
|
|
1134
|
+
raise TypeError(f'Expected numpy.ndarray of dtype {self.dtype}, got numpy.ndarray of dtype {val.dtype}')
|
|
1135
|
+
elif self.dtype is not None and self.dtype != val.dtype:
|
|
1136
|
+
raise TypeError(f'Expected numpy.ndarray of dtype {self.dtype}, got numpy.ndarray of dtype {val.dtype}')
|
|
966
1137
|
|
|
967
|
-
#
|
|
968
|
-
if
|
|
969
|
-
|
|
970
|
-
):
|
|
971
|
-
return False
|
|
1138
|
+
# Check that the dtype is one of the supported types
|
|
1139
|
+
if val.dtype.type != np.str_ and val.dtype not in ARRAY_SUPPORTED_NUMPY_DTYPES:
|
|
1140
|
+
raise TypeError(f'Unsupported dtype {val.dtype}')
|
|
972
1141
|
|
|
973
1142
|
# If a shape is specified, check that there's a match
|
|
974
1143
|
if self.shape is not None:
|
|
975
1144
|
if len(val.shape) != len(self.shape):
|
|
976
|
-
|
|
1145
|
+
raise TypeError(
|
|
1146
|
+
f'Expected numpy.ndarray({self.shape}, dtype={self.dtype}), '
|
|
1147
|
+
f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
|
|
1148
|
+
)
|
|
977
1149
|
# check that the shapes are compatible
|
|
978
1150
|
for n1, n2 in zip(val.shape, self.shape):
|
|
979
1151
|
assert n1 is not None # `val` must have a concrete shape
|
|
980
1152
|
if n2 is None:
|
|
981
1153
|
continue # wildcard
|
|
982
1154
|
if n1 != n2:
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
def _to_json_schema(self) -> dict[str, Any]:
|
|
988
|
-
return {'type': 'array', 'items': self.pxt_dtype._to_json_schema()}
|
|
989
|
-
|
|
990
|
-
def _validate_literal(self, val: Any) -> None:
|
|
991
|
-
if not isinstance(val, np.ndarray):
|
|
992
|
-
raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
|
|
993
|
-
if not self.is_valid_literal(val):
|
|
994
|
-
if self.shape is not None:
|
|
995
|
-
raise TypeError(
|
|
996
|
-
f'Expected numpy.ndarray({self.shape}, dtype={self.numpy_dtype()}), '
|
|
997
|
-
f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
|
|
998
|
-
)
|
|
999
|
-
elif self.dtype is not None:
|
|
1000
|
-
raise TypeError(
|
|
1001
|
-
f'Expected numpy.ndarray of dtype {self.numpy_dtype()}, got numpy.ndarray of dtype {val.dtype}'
|
|
1002
|
-
)
|
|
1003
|
-
else:
|
|
1004
|
-
raise TypeError(f'Unsupported dtype for numpy.ndarray: {val.dtype}')
|
|
1155
|
+
raise TypeError(
|
|
1156
|
+
f'Expected numpy.ndarray({self.shape}, dtype={self.dtype}), '
|
|
1157
|
+
f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
|
|
1158
|
+
)
|
|
1005
1159
|
|
|
1006
1160
|
def _create_literal(self, val: Any) -> Any:
|
|
1007
1161
|
if isinstance(val, (list, tuple)):
|
|
1008
1162
|
# map python float to whichever numpy float is
|
|
1009
1163
|
# declared for this type, rather than assume float64
|
|
1010
|
-
return np.array(val, dtype=self.
|
|
1164
|
+
return np.array(val, dtype=self.dtype)
|
|
1011
1165
|
return val
|
|
1012
1166
|
|
|
1013
1167
|
@classmethod
|
|
1014
1168
|
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
1015
1169
|
return sql.LargeBinary()
|
|
1016
1170
|
|
|
1017
|
-
def numpy_dtype(self) -> Optional[np.dtype]:
|
|
1018
|
-
if self.dtype is None:
|
|
1019
|
-
return None
|
|
1020
|
-
if self.dtype == self.Type.INT:
|
|
1021
|
-
return np.dtype(np.int64)
|
|
1022
|
-
if self.dtype == self.Type.FLOAT:
|
|
1023
|
-
return np.dtype(np.float32)
|
|
1024
|
-
if self.dtype == self.Type.BOOL:
|
|
1025
|
-
return np.dtype(np.bool_)
|
|
1026
|
-
if self.dtype == self.Type.STRING:
|
|
1027
|
-
return np.dtype(np.str_)
|
|
1028
|
-
raise AssertionError(self.dtype)
|
|
1029
|
-
|
|
1030
1171
|
|
|
1031
1172
|
class ImageType(ColumnType):
|
|
1032
1173
|
def __init__(
|
|
1033
1174
|
self,
|
|
1034
|
-
width:
|
|
1035
|
-
height:
|
|
1036
|
-
size:
|
|
1037
|
-
mode:
|
|
1175
|
+
width: int | None = None,
|
|
1176
|
+
height: int | None = None,
|
|
1177
|
+
size: tuple[int, int] | None = None,
|
|
1178
|
+
mode: str | None = None,
|
|
1038
1179
|
nullable: bool = False,
|
|
1039
1180
|
):
|
|
1040
|
-
|
|
1041
|
-
TODO: does it make sense to specify only width or height?
|
|
1042
|
-
"""
|
|
1181
|
+
# TODO: does it make sense to specify only width or height?
|
|
1043
1182
|
super().__init__(self.Type.IMAGE, nullable=nullable)
|
|
1044
1183
|
assert not (width is not None and size is not None)
|
|
1045
1184
|
assert not (height is not None and size is not None)
|
|
@@ -1077,7 +1216,7 @@ class ImageType(ColumnType):
|
|
|
1077
1216
|
def __hash__(self) -> int:
|
|
1078
1217
|
return hash((self._type, self.nullable, self.size, self.mode))
|
|
1079
1218
|
|
|
1080
|
-
def supertype(self, other: ColumnType) ->
|
|
1219
|
+
def supertype(self, other: ColumnType, for_inference: bool = False) -> ImageType | None:
|
|
1081
1220
|
basic_supertype = super().supertype(other)
|
|
1082
1221
|
if basic_supertype is not None:
|
|
1083
1222
|
assert isinstance(basic_supertype, ImageType)
|
|
@@ -1092,7 +1231,7 @@ class ImageType(ColumnType):
|
|
|
1092
1231
|
return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
|
|
1093
1232
|
|
|
1094
1233
|
@property
|
|
1095
|
-
def size(self) ->
|
|
1234
|
+
def size(self) -> tuple[int, int] | None:
|
|
1096
1235
|
if self.width is None or self.height is None:
|
|
1097
1236
|
return None
|
|
1098
1237
|
return (self.width, self.height)
|
|
@@ -1123,8 +1262,8 @@ class ImageType(ColumnType):
|
|
|
1123
1262
|
img.load()
|
|
1124
1263
|
return img
|
|
1125
1264
|
except Exception as exc:
|
|
1126
|
-
|
|
1127
|
-
raise excs.Error(f'data URL could not be decoded into a valid image: {
|
|
1265
|
+
error_msg_val = val if len(val) < 50 else val[:50] + '...'
|
|
1266
|
+
raise excs.Error(f'data URL could not be decoded into a valid image: {error_msg_val}') from exc
|
|
1128
1267
|
return val
|
|
1129
1268
|
|
|
1130
1269
|
def _validate_literal(self, val: Any) -> None:
|
|
@@ -1211,7 +1350,7 @@ class DocumentType(ColumnType):
|
|
|
1211
1350
|
TXT = 4
|
|
1212
1351
|
|
|
1213
1352
|
@classmethod
|
|
1214
|
-
def from_extension(cls, ext: str) ->
|
|
1353
|
+
def from_extension(cls, ext: str) -> 'DocumentType.DocumentFormat' | None:
|
|
1215
1354
|
if ext in ('.htm', '.html'):
|
|
1216
1355
|
return cls.HTML
|
|
1217
1356
|
if ext == '.md':
|
|
@@ -1224,7 +1363,7 @@ class DocumentType(ColumnType):
|
|
|
1224
1363
|
return cls.TXT
|
|
1225
1364
|
return None
|
|
1226
1365
|
|
|
1227
|
-
def __init__(self, nullable: bool = False, doc_formats:
|
|
1366
|
+
def __init__(self, nullable: bool = False, doc_formats: str | None = None):
|
|
1228
1367
|
super().__init__(self.Type.DOCUMENT, nullable=nullable)
|
|
1229
1368
|
self.doc_formats = doc_formats
|
|
1230
1369
|
if doc_formats is not None:
|
|
@@ -1278,6 +1417,8 @@ Float = typing.Annotated[float, FloatType(nullable=False)]
|
|
|
1278
1417
|
Bool = typing.Annotated[bool, BoolType(nullable=False)]
|
|
1279
1418
|
Timestamp = typing.Annotated[datetime.datetime, TimestampType(nullable=False)]
|
|
1280
1419
|
Date = typing.Annotated[datetime.date, DateType(nullable=False)]
|
|
1420
|
+
UUID = typing.Annotated[uuid.UUID, UUIDType(nullable=False)]
|
|
1421
|
+
Binary = typing.Annotated[bytes, BinaryType(nullable=False)]
|
|
1281
1422
|
|
|
1282
1423
|
|
|
1283
1424
|
class _PxtType:
|
|
@@ -1320,14 +1461,17 @@ class Json(_PxtType):
|
|
|
1320
1461
|
class Array(np.ndarray, _PxtType):
|
|
1321
1462
|
def __class_getitem__(cls, item: Any) -> _AnnotatedAlias:
|
|
1322
1463
|
"""
|
|
1323
|
-
`item` (the type subscript) must be a tuple with
|
|
1324
|
-
-
|
|
1325
|
-
- A type, specifying the dtype of the array
|
|
1326
|
-
|
|
1464
|
+
`item` (the type subscript) must be a tuple with at most two elements (in any order):
|
|
1465
|
+
- An optional tuple of `int | None`s, specifying the shape of the array
|
|
1466
|
+
- A type (`ColumnType | np.dtype`), specifying the dtype of the array
|
|
1467
|
+
Examples:
|
|
1468
|
+
* Array[(3, None, 2), pxt.Float]
|
|
1469
|
+
* Array[(4, 4), np.uint8]
|
|
1470
|
+
* Array[np.bool]
|
|
1327
1471
|
"""
|
|
1328
1472
|
params = item if isinstance(item, tuple) else (item,)
|
|
1329
|
-
shape:
|
|
1330
|
-
dtype:
|
|
1473
|
+
shape: tuple | None = None
|
|
1474
|
+
dtype: ColumnType | np.dtype | None = None
|
|
1331
1475
|
if not any(isinstance(param, (type, _AnnotatedAlias)) for param in params):
|
|
1332
1476
|
raise TypeError('Array type parameter must include a dtype.')
|
|
1333
1477
|
for param in params:
|
|
@@ -1340,7 +1484,10 @@ class Array(np.ndarray, _PxtType):
|
|
|
1340
1484
|
elif isinstance(param, (type, _AnnotatedAlias)):
|
|
1341
1485
|
if dtype is not None:
|
|
1342
1486
|
raise TypeError(f'Duplicate Array type parameter: {param}')
|
|
1343
|
-
|
|
1487
|
+
if isinstance(param, type) and param in ARRAY_SUPPORTED_NUMPY_DTYPES:
|
|
1488
|
+
dtype = np.dtype(param)
|
|
1489
|
+
else:
|
|
1490
|
+
dtype = ColumnType.normalize_type(param, allow_builtin_types=False)
|
|
1344
1491
|
else:
|
|
1345
1492
|
raise TypeError(f'Invalid Array type parameter: {param}')
|
|
1346
1493
|
return typing.Annotated[np.ndarray, ArrayType(shape=shape, dtype=dtype, nullable=False)]
|
|
@@ -1367,8 +1514,8 @@ class Image(PIL.Image.Image, _PxtType):
|
|
|
1367
1514
|
else:
|
|
1368
1515
|
# Not a tuple (single arg)
|
|
1369
1516
|
params = (item,)
|
|
1370
|
-
size:
|
|
1371
|
-
mode:
|
|
1517
|
+
size: tuple | None = None
|
|
1518
|
+
mode: str | None = None
|
|
1372
1519
|
for param in params:
|
|
1373
1520
|
if isinstance(param, tuple):
|
|
1374
1521
|
if (
|
|
@@ -1413,4 +1560,19 @@ class Document(str, _PxtType):
|
|
|
1413
1560
|
return DocumentType(nullable=nullable)
|
|
1414
1561
|
|
|
1415
1562
|
|
|
1416
|
-
ALL_PIXELTABLE_TYPES = (
|
|
1563
|
+
ALL_PIXELTABLE_TYPES = (
|
|
1564
|
+
String,
|
|
1565
|
+
Bool,
|
|
1566
|
+
Int,
|
|
1567
|
+
Float,
|
|
1568
|
+
Timestamp,
|
|
1569
|
+
Json,
|
|
1570
|
+
Array,
|
|
1571
|
+
Image,
|
|
1572
|
+
Video,
|
|
1573
|
+
Audio,
|
|
1574
|
+
Document,
|
|
1575
|
+
Date,
|
|
1576
|
+
UUID,
|
|
1577
|
+
Binary,
|
|
1578
|
+
)
|