pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/type_system.py CHANGED
@@ -5,10 +5,15 @@ import datetime
5
5
  import enum
6
6
  import io
7
7
  import json
8
+ import types
8
9
  import typing
9
10
  import urllib.parse
10
11
  import urllib.request
11
- from typing import Any, ClassVar, Iterable, Literal, Mapping, Optional, Sequence, Union
12
+ import uuid
13
+ from pathlib import Path
14
+ from typing import Any, ClassVar, Iterable, Literal, Mapping, Sequence, Union
15
+
16
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
12
17
 
13
18
  import av
14
19
  import jsonschema
@@ -21,10 +26,9 @@ import sqlalchemy as sql
21
26
  from typing_extensions import _AnnotatedAlias
22
27
 
23
28
  import pixeltable.exceptions as excs
29
+ from pixeltable.env import Env
24
30
  from pixeltable.utils import parse_local_file_path
25
31
 
26
- from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
27
-
28
32
 
29
33
  class ColumnType:
30
34
  @enum.unique
@@ -41,6 +45,8 @@ class ColumnType:
41
45
  AUDIO = 9
42
46
  DOCUMENT = 10
43
47
  DATE = 11
48
+ UUID = 12
49
+ BINARY = 13
44
50
 
45
51
  # exprs that don't evaluate to a computable value in Pixeltable, such as an Image member function
46
52
  INVALID = 255
@@ -48,11 +54,11 @@ class ColumnType:
48
54
  @classmethod
49
55
  def supertype(
50
56
  cls,
51
- type1: Optional['ColumnType.Type'],
52
- type2: Optional['ColumnType.Type'],
57
+ type1: 'ColumnType.Type' | None,
58
+ type2: 'ColumnType.Type' | None,
53
59
  # we need to pass this in because we can't easily append it as a class member
54
60
  common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
55
- ) -> Optional['ColumnType.Type']:
61
+ ) -> 'ColumnType.Type' | None:
56
62
  if type1 == type2:
57
63
  return type1
58
64
  t = common_supertypes.get((type1, type2))
@@ -63,26 +69,8 @@ class ColumnType:
63
69
  return t
64
70
  return None
65
71
 
66
- @enum.unique
67
- class DType(enum.Enum):
68
- """
69
- Base type used in images and arrays
70
- """
71
-
72
- BOOL = (0,)
73
- INT8 = (1,)
74
- INT16 = (2,)
75
- INT32 = (3,)
76
- INT64 = (4,)
77
- UINT8 = (5,)
78
- UINT16 = (6,)
79
- UINT32 = (7,)
80
- UINT64 = (8,)
81
- FLOAT16 = (9,)
82
- FLOAT32 = (10,)
83
- FLOAT64 = 11
84
-
85
- scalar_types: ClassVar[set[Type]] = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP, Type.DATE}
72
+ scalar_json_types: ClassVar[set[Type]] = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL}
73
+ scalar_types: ClassVar[set[Type]] = scalar_json_types | {Type.TIMESTAMP, Type.DATE, Type.UUID}
86
74
  numeric_types: ClassVar[set[Type]] = {Type.INT, Type.FLOAT}
87
75
  common_supertypes: ClassVar[dict[tuple[Type, Type], Type]] = {
88
76
  (Type.BOOL, Type.INT): Type.INT,
@@ -151,31 +139,37 @@ class ColumnType:
151
139
 
152
140
  @classmethod
153
141
  def make_type(cls, t: Type) -> ColumnType:
154
- assert t != cls.Type.INVALID
155
- if t == cls.Type.STRING:
156
- return StringType()
157
- if t == cls.Type.INT:
158
- return IntType()
159
- if t == cls.Type.FLOAT:
160
- return FloatType()
161
- if t == cls.Type.BOOL:
162
- return BoolType()
163
- if t == cls.Type.TIMESTAMP:
164
- return TimestampType()
165
- if t == cls.Type.ARRAY:
166
- return ArrayType()
167
- if t == cls.Type.JSON:
168
- return JsonType()
169
- if t == cls.Type.IMAGE:
170
- return ImageType()
171
- if t == cls.Type.VIDEO:
172
- return VideoType()
173
- if t == cls.Type.AUDIO:
174
- return AudioType()
175
- if t == cls.Type.DOCUMENT:
176
- return DocumentType()
177
- if t == cls.Type.DATE:
178
- return DateType()
142
+ match t:
143
+ case cls.Type.STRING:
144
+ return StringType()
145
+ case cls.Type.INT:
146
+ return IntType()
147
+ case cls.Type.FLOAT:
148
+ return FloatType()
149
+ case cls.Type.BOOL:
150
+ return BoolType()
151
+ case cls.Type.TIMESTAMP:
152
+ return TimestampType()
153
+ case cls.Type.JSON:
154
+ return JsonType()
155
+ case cls.Type.ARRAY:
156
+ return ArrayType()
157
+ case cls.Type.IMAGE:
158
+ return ImageType()
159
+ case cls.Type.VIDEO:
160
+ return VideoType()
161
+ case cls.Type.AUDIO:
162
+ return AudioType()
163
+ case cls.Type.DOCUMENT:
164
+ return DocumentType()
165
+ case cls.Type.DATE:
166
+ return DateType()
167
+ case cls.Type.UUID:
168
+ return UUIDType()
169
+ case cls.Type.BINARY:
170
+ return BinaryType()
171
+ case _:
172
+ raise AssertionError(t)
179
173
 
180
174
  def __repr__(self) -> str:
181
175
  return self._to_str(as_schema=False)
@@ -185,7 +179,7 @@ class ColumnType:
185
179
  if as_schema:
186
180
  return base_str if self.nullable else f'Required[{base_str}]'
187
181
  else:
188
- return f'Optional[{base_str}]' if self.nullable else base_str
182
+ return f'{base_str} | None' if self.nullable else base_str
189
183
 
190
184
  def _to_base_str(self) -> str:
191
185
  """
@@ -214,7 +208,13 @@ class ColumnType:
214
208
  # Default: just compare base types (this works for all types whose only parameter is nullable)
215
209
  return self._type == other._type
216
210
 
217
- def supertype(self, other: ColumnType) -> Optional[ColumnType]:
211
+ def supertype(self, other: ColumnType, for_inference: bool = False) -> ColumnType | None:
212
+ """
213
+ Returns the most specific type that is a supertype of both `self` and `other`.
214
+
215
+ If `for_inference=True`, then we disallow certain type relationships that are technically correct, but may
216
+ be confusing for schema inference during data imports.
217
+ """
218
218
  if self == other:
219
219
  return self
220
220
  if self.matches(other):
@@ -229,12 +229,20 @@ class ColumnType:
229
229
  t = self.Type.supertype(self._type, other._type, self.common_supertypes)
230
230
  if t is not None:
231
231
  return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
232
- return None
232
+
233
+ # If we see a mix of JSON and/or JSON-compatible scalar types, resolve to JSON.
234
+ # (For JSON+JSON, we return None to allow JsonType to handle merging the type schemas.)
235
+ if not for_inference and (
236
+ (self.is_json_type() and other.is_scalar_json_type())
237
+ or (self.is_scalar_json_type() and other.is_json_type())
238
+ or (self.is_scalar_json_type() and other.is_scalar_json_type())
239
+ ):
240
+ return JsonType(nullable=(self.nullable or other.nullable))
233
241
 
234
242
  return None
235
243
 
236
244
  @classmethod
237
- def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
245
+ def infer_literal_type(cls, val: Any, nullable: bool = False) -> ColumnType | None:
238
246
  if val is None:
239
247
  return InvalidType(nullable=True)
240
248
  if isinstance(val, str):
@@ -252,6 +260,10 @@ class ColumnType:
252
260
  return TimestampType(nullable=nullable)
253
261
  if isinstance(val, datetime.date):
254
262
  return DateType(nullable=nullable)
263
+ if isinstance(val, uuid.UUID):
264
+ return UUIDType(nullable=nullable)
265
+ if isinstance(val, bytes):
266
+ return BinaryType(nullable=nullable)
255
267
  if isinstance(val, PIL.Image.Image):
256
268
  return ImageType(width=val.width, height=val.height, mode=val.mode, nullable=nullable)
257
269
  if isinstance(val, np.ndarray):
@@ -268,7 +280,7 @@ class ColumnType:
268
280
  return None
269
281
 
270
282
  @classmethod
271
- def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
283
+ def infer_common_literal_type(cls, vals: Iterable[Any]) -> ColumnType | None:
272
284
  """
273
285
  Returns the most specific type that is a supertype of all literals in `vals`. If no such type
274
286
  exists, returns None.
@@ -276,13 +288,13 @@ class ColumnType:
276
288
  Args:
277
289
  vals: A collection of literals.
278
290
  """
279
- inferred_type: Optional[ColumnType] = None
291
+ inferred_type: ColumnType | None = None
280
292
  for val in vals:
281
293
  val_type = cls.infer_literal_type(val)
282
294
  if inferred_type is None:
283
295
  inferred_type = val_type
284
296
  else:
285
- inferred_type = inferred_type.supertype(val_type)
297
+ inferred_type = inferred_type.supertype(val_type, for_inference=True)
286
298
  if inferred_type is None:
287
299
  return None
288
300
  if not inferred_type.has_supertype():
@@ -291,8 +303,12 @@ class ColumnType:
291
303
 
292
304
  @classmethod
293
305
  def from_python_type(
294
- cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
295
- ) -> Optional[ColumnType]:
306
+ cls,
307
+ t: type | _GenericAlias,
308
+ nullable_default: bool = False,
309
+ allow_builtin_types: bool = True,
310
+ infer_pydantic_json: bool = False,
311
+ ) -> ColumnType | None:
296
312
  """
297
313
  Convert a Python type into a Pixeltable `ColumnType` instance.
298
314
 
@@ -304,16 +320,20 @@ class ColumnType:
304
320
  allowed (as in UDF definitions). If False, then only Pixeltable types such as `pxt.String`,
305
321
  `pxt.Int`, etc., will be allowed (as in schema definitions). `Optional` and `Required`
306
322
  designations will be allowed regardless.
323
+ infer_pydantic_json: If True, accepts an extended set of built-ins (eg, Enum, Path) and returns the type to
324
+ which pydantic.BaseModel.model_dump(mode='json') serializes it.
307
325
  """
308
326
  origin = typing.get_origin(t)
309
327
  type_args = typing.get_args(t)
310
- if origin is typing.Union:
311
- # Check if `t` has the form Optional[T].
328
+ if origin in (typing.Union, types.UnionType):
329
+ # Check if `t` has the form T | None.
312
330
  if len(type_args) == 2 and type(None) in type_args:
313
- # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
331
+ # `t` is a type of the form T | None (equivalently, T | None or None | T).
314
332
  # We treat it as the underlying type but with nullable=True.
315
333
  underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
316
- underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
334
+ underlying = cls.from_python_type(
335
+ underlying_py_type, allow_builtin_types=allow_builtin_types, infer_pydantic_json=infer_pydantic_json
336
+ )
317
337
  if underlying is not None:
318
338
  return underlying.copy(nullable=True)
319
339
  elif origin is Required:
@@ -327,7 +347,7 @@ class ColumnType:
327
347
  if isinstance(parameters, ColumnType):
328
348
  return parameters.copy(nullable=nullable_default)
329
349
  else:
330
- # It's something other than Optional[T], Required[T], or an explicitly annotated type.
350
+ # It's something other than T | None, Required[T], or an explicitly annotated type.
331
351
  if origin is not None:
332
352
  # Discard type parameters to ensure that parameterized types such as `list[T]`
333
353
  # are correctly mapped to Pixeltable types.
@@ -340,6 +360,13 @@ class ColumnType:
340
360
  if literal_type is None:
341
361
  return None
342
362
  return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
363
+ if infer_pydantic_json and isinstance(t, type) and issubclass(t, enum.Enum):
364
+ literal_type = cls.infer_common_literal_type(member.value for member in t)
365
+ if literal_type is None:
366
+ return None
367
+ return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
368
+ if infer_pydantic_json and t is Path:
369
+ return StringType(nullable=nullable_default)
343
370
  if t is str:
344
371
  return StringType(nullable=nullable_default)
345
372
  if t is int:
@@ -352,6 +379,10 @@ class ColumnType:
352
379
  return TimestampType(nullable=nullable_default)
353
380
  if t is datetime.date:
354
381
  return DateType(nullable=nullable_default)
382
+ if t is uuid.UUID:
383
+ return UUIDType(nullable=nullable_default)
384
+ if t is bytes:
385
+ return BinaryType(nullable=nullable_default)
355
386
  if t is PIL.Image.Image:
356
387
  return ImageType(nullable=nullable_default)
357
388
  if isinstance(t, type) and issubclass(t, (Sequence, Mapping, pydantic.BaseModel)):
@@ -360,10 +391,7 @@ class ColumnType:
360
391
 
361
392
  @classmethod
362
393
  def normalize_type(
363
- cls,
364
- t: Union[ColumnType, type, _AnnotatedAlias],
365
- nullable_default: bool = False,
366
- allow_builtin_types: bool = True,
394
+ cls, t: ColumnType | type | _AnnotatedAlias, nullable_default: bool = False, allow_builtin_types: bool = True
367
395
  ) -> ColumnType:
368
396
  """
369
397
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
@@ -382,19 +410,51 @@ class ColumnType:
382
410
  (float, 'pxt.Float'),
383
411
  (datetime.datetime, 'pxt.Timestamp'),
384
412
  (datetime.date, 'pxt.Date'),
413
+ (uuid.UUID, 'pxt.UUID'),
385
414
  (PIL.Image.Image, 'pxt.Image'),
415
+ (bytes, 'pxt.Binary'),
386
416
  (Sequence, 'pxt.Json'),
387
417
  (Mapping, 'pxt.Json'),
388
418
  ]
389
419
 
390
420
  @classmethod
391
- def __raise_exc_for_invalid_type(cls, t: Union[type, _AnnotatedAlias]) -> None:
421
+ def __raise_exc_for_invalid_type(cls, t: type | _AnnotatedAlias) -> None:
392
422
  for builtin_type, suggestion in cls.__TYPE_SUGGESTIONS:
393
423
  if t is builtin_type or (isinstance(t, type) and issubclass(t, builtin_type)):
394
424
  name = t.__name__ if t.__module__ == 'builtins' else f'{t.__module__}.{t.__name__}'
395
425
  raise excs.Error(f'Standard Python type `{name}` cannot be used here; use `{suggestion}` instead')
396
426
  raise excs.Error(f'Unknown type: {t}')
397
427
 
428
+ @classmethod
429
+ def from_json_schema(cls, schema: dict[str, Any]) -> ColumnType | None:
430
+ # We first express the JSON schema as a Python type, and then convert it to a Pixeltable type.
431
+ # TODO: Is there a meaningful fallback if one of these operations fails? (Maybe another use case for a pxt Any
432
+ # type?)
433
+ py_type = cls.__json_schema_to_py_type(schema)
434
+ return cls.from_python_type(py_type) if py_type is not None else None
435
+
436
+ @classmethod
437
+ def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> type | _GenericAlias | None:
438
+ if 'type' in schema:
439
+ if schema['type'] == 'null':
440
+ return type(None)
441
+ if schema['type'] == 'string':
442
+ return str
443
+ if schema['type'] == 'integer':
444
+ return int
445
+ if schema['type'] == 'number':
446
+ return float
447
+ if schema['type'] == 'boolean':
448
+ return bool
449
+ if schema['type'] in ('array', 'object'):
450
+ return list
451
+ elif 'anyOf' in schema:
452
+ subscripts = tuple(cls.__json_schema_to_py_type(subschema) for subschema in schema['anyOf'])
453
+ if all(subscript is not None for subscript in subscripts):
454
+ return Union[subscripts]
455
+
456
+ return None
457
+
398
458
  def validate_literal(self, val: Any) -> None:
399
459
  """Raise TypeError if val is not a valid literal for this type"""
400
460
  if val is None:
@@ -443,6 +503,9 @@ class ColumnType:
443
503
  def is_scalar_type(self) -> bool:
444
504
  return self._type in self.scalar_types
445
505
 
506
+ def is_scalar_json_type(self) -> bool:
507
+ return self._type in self.scalar_json_types
508
+
446
509
  def is_numeric_type(self) -> bool:
447
510
  return self._type in self.numeric_types
448
511
 
@@ -467,12 +530,18 @@ class ColumnType:
467
530
  def is_date_type(self) -> bool:
468
531
  return self._type == self.Type.DATE
469
532
 
533
+ def is_uuid_type(self) -> bool:
534
+ return self._type == self.Type.UUID
535
+
470
536
  def is_json_type(self) -> bool:
471
537
  return self._type == self.Type.JSON
472
538
 
473
539
  def is_array_type(self) -> bool:
474
540
  return self._type == self.Type.ARRAY
475
541
 
542
+ def is_binary_type(self) -> bool:
543
+ return self._type == self.Type.BINARY
544
+
476
545
  def is_image_type(self) -> bool:
477
546
  return self._type == self.Type.IMAGE
478
547
 
@@ -489,6 +558,10 @@ class ColumnType:
489
558
  # types that refer to external media files
490
559
  return self.is_image_type() or self.is_video_type() or self.is_audio_type() or self.is_document_type()
491
560
 
561
+ def supports_file_offloading(self) -> bool:
562
+ # types that can be offloaded to file-based storage via a CellMaterializationNode
563
+ return self.is_array_type() or self.is_json_type() or self.is_binary_type()
564
+
492
565
  @classmethod
493
566
  @abc.abstractmethod
494
567
  def to_sa_type(cls) -> sql.types.TypeEngine:
@@ -505,6 +578,35 @@ class ColumnType:
505
578
  def _to_json_schema(self) -> dict[str, Any]:
506
579
  raise excs.Error(f'Pixeltable type {self} is not a valid JSON type')
507
580
 
581
+ @classmethod
582
+ def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> ColumnType | None:
583
+ """
584
+ Return pixeltable type corresponding to a given simple numpy dtype
585
+ """
586
+ if np.issubdtype(dtype, np.integer):
587
+ return IntType(nullable=nullable)
588
+
589
+ if np.issubdtype(dtype, np.floating):
590
+ return FloatType(nullable=nullable)
591
+
592
+ if dtype == np.bool_:
593
+ return BoolType(nullable=nullable)
594
+
595
+ if np.issubdtype(dtype, np.str_):
596
+ return StringType(nullable=nullable)
597
+
598
+ if np.issubdtype(dtype, np.character):
599
+ return StringType(nullable=nullable)
600
+
601
+ if np.issubdtype(dtype, np.datetime64):
602
+ unit, _ = np.datetime_data(dtype)
603
+ if unit in ('D', 'M', 'Y'):
604
+ return DateType(nullable=nullable)
605
+ else:
606
+ return TimestampType(nullable=nullable)
607
+
608
+ return None
609
+
508
610
 
509
611
  class InvalidType(ColumnType):
510
612
  def __init__(self, nullable: bool = False):
@@ -629,8 +731,9 @@ class TimestampType(ColumnType):
629
731
  def _create_literal(self, val: Any) -> Any:
630
732
  if isinstance(val, str):
631
733
  return datetime.datetime.fromisoformat(val)
632
- if isinstance(val, datetime.datetime):
633
- return val
734
+ # Place naive timestamps in the default time zone
735
+ if isinstance(val, datetime.datetime) and val.tzinfo is None:
736
+ return val.replace(tzinfo=Env.get().default_time_zone)
634
737
  return val
635
738
 
636
739
 
@@ -657,11 +760,57 @@ class DateType(ColumnType):
657
760
  return val
658
761
 
659
762
 
763
+ class UUIDType(ColumnType):
764
+ def __init__(self, nullable: bool = False):
765
+ super().__init__(self.Type.UUID, nullable=nullable)
766
+
767
+ def has_supertype(self) -> bool:
768
+ return not self.nullable
769
+
770
+ @classmethod
771
+ def to_sa_type(cls) -> sql.types.TypeEngine:
772
+ return sql.UUID(as_uuid=True)
773
+
774
+ def _to_json_schema(self) -> dict[str, Any]:
775
+ return {'type': 'string', 'format': 'uuid'}
776
+
777
+ def print_value(self, val: Any) -> str:
778
+ return f"'{val}'"
779
+
780
+ def _to_base_str(self) -> str:
781
+ return 'UUID'
782
+
783
+ def _validate_literal(self, val: Any) -> None:
784
+ if not isinstance(val, uuid.UUID):
785
+ raise TypeError(f'Expected uuid.UUID, got {val.__class__.__name__}')
786
+
787
+ def _create_literal(self, val: Any) -> Any:
788
+ if isinstance(val, str):
789
+ return uuid.UUID(val)
790
+ return val
791
+
792
+
793
+ class BinaryType(ColumnType):
794
+ def __init__(self, nullable: bool = False):
795
+ super().__init__(self.Type.BINARY, nullable=nullable)
796
+
797
+ @classmethod
798
+ def to_sa_type(cls) -> sql.types.TypeEngine:
799
+ return sql.LargeBinary()
800
+
801
+ def _to_base_str(self) -> str:
802
+ return 'Binary'
803
+
804
+ def _validate_literal(self, val: Any) -> None:
805
+ if not isinstance(val, bytes):
806
+ raise TypeError(f'Expected `bytes`, got `{val.__class__.__name__}`')
807
+
808
+
660
809
  class JsonType(ColumnType):
661
- json_schema: Optional[dict[str, Any]]
662
- __validator: Optional[jsonschema.protocols.Validator]
810
+ json_schema: dict[str, Any] | None
811
+ __validator: jsonschema.protocols.Validator | None
663
812
 
664
- def __init__(self, json_schema: Optional[dict[str, Any]] = None, nullable: bool = False):
813
+ def __init__(self, json_schema: dict[str, Any] | None = None, nullable: bool = False):
665
814
  super().__init__(self.Type.JSON, nullable=nullable)
666
815
  self.json_schema = json_schema
667
816
  if json_schema is None:
@@ -716,7 +865,7 @@ class JsonType(ColumnType):
716
865
 
717
866
  @classmethod
718
867
  def __is_valid_json(cls, val: Any) -> bool:
719
- if val is None or isinstance(val, (str, int, float, bool)):
868
+ if val is None or isinstance(val, (str, int, float, bool, np.ndarray, PIL.Image.Image, bytes)):
720
869
  return True
721
870
  if isinstance(val, (list, tuple)):
722
871
  return all(cls.__is_valid_json(v) for v in val)
@@ -731,7 +880,7 @@ class JsonType(ColumnType):
731
880
  return val.model_dump()
732
881
  return val
733
882
 
734
- def supertype(self, other: ColumnType) -> Optional[JsonType]:
883
+ def supertype(self, other: ColumnType, for_inference: bool = False) -> JsonType | None:
735
884
  # Try using the (much faster) supertype logic in ColumnType first. That will work if, for example, the types
736
885
  # are identical except for nullability. If that doesn't work and both types are JsonType, then we will need to
737
886
  # merge their schemas.
@@ -753,7 +902,7 @@ class JsonType(ColumnType):
753
902
  )
754
903
 
755
904
  @classmethod
756
- def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
905
+ def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
757
906
  # Defining a general type hierarchy over all JSON schemas would be a challenging problem. In order to keep
758
907
  # things manageable, we only define a hierarchy among "conforming" schemas, which provides enough generality
759
908
  # for the most important use cases (unions for type inference, validation of inline exprs). A schema is
@@ -813,7 +962,7 @@ class JsonType(ColumnType):
813
962
  return {} # Unresolvable type conflict; the supertype is an unrestricted JsonType.
814
963
 
815
964
  @classmethod
816
- def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
965
+ def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
817
966
  a, a_nullable = cls.__unpack_null_from_schema(a)
818
967
  b, b_nullable = cls.__unpack_null_from_schema(b)
819
968
 
@@ -841,33 +990,64 @@ class JsonType(ColumnType):
841
990
  return f'Json[{self.json_schema}]'
842
991
 
843
992
 
993
+ ARRAY_SUPPORTED_NUMPY_DTYPES = [
994
+ np.bool_,
995
+ np.uint8,
996
+ np.uint16,
997
+ np.uint32,
998
+ np.uint64,
999
+ np.int8,
1000
+ np.int16,
1001
+ np.int32,
1002
+ np.int64,
1003
+ np.float16,
1004
+ np.float32,
1005
+ np.float64,
1006
+ np.str_,
1007
+ ]
1008
+
1009
+
844
1010
  class ArrayType(ColumnType):
845
- shape: Optional[tuple[Optional[int], ...]]
846
- pxt_dtype: Optional[ColumnType]
847
- dtype: Optional[ColumnType.Type]
1011
+ pxt_dtype_to_numpy_dtype: ClassVar[dict[ColumnType.Type, np.dtype]] = {
1012
+ ColumnType.Type.INT: np.dtype(np.int64),
1013
+ ColumnType.Type.FLOAT: np.dtype(np.float32),
1014
+ ColumnType.Type.BOOL: np.dtype(np.bool_),
1015
+ ColumnType.Type.STRING: np.dtype(np.str_),
1016
+ }
1017
+
1018
+ shape: tuple[int | None, ...] | None
1019
+ dtype: np.dtype | None
848
1020
 
849
1021
  def __init__(
850
1022
  self,
851
- shape: Optional[tuple[Optional[int], ...]] = None,
852
- dtype: Optional[ColumnType] = None,
1023
+ shape: tuple[int | None, ...] | None = None,
1024
+ dtype: ColumnType | np.dtype | None = None,
853
1025
  nullable: bool = False,
854
1026
  ):
855
1027
  super().__init__(self.Type.ARRAY, nullable=nullable)
856
1028
  assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
857
- assert (
858
- dtype is None
859
- or dtype.is_int_type()
860
- or dtype.is_float_type()
861
- or dtype.is_bool_type()
862
- or dtype.is_string_type()
863
- )
864
-
865
1029
  self.shape = shape
866
- self.pxt_dtype = dtype # we need this for copy() and __str__()
867
- self.dtype = None if dtype is None else dtype._type
1030
+ if dtype is None:
1031
+ self.dtype = None
1032
+ elif isinstance(dtype, np.dtype):
1033
+ # Numpy string has some specifications (endianness, max length, encoding) that we don't support, so we just
1034
+ # strip them out.
1035
+ if dtype.type == np.str_:
1036
+ self.dtype = np.dtype(np.str_)
1037
+ else:
1038
+ if dtype not in ARRAY_SUPPORTED_NUMPY_DTYPES:
1039
+ raise ValueError(f'Unsupported dtype: {dtype}')
1040
+ self.dtype = dtype
1041
+ elif isinstance(dtype, ColumnType):
1042
+ self.dtype = self.pxt_dtype_to_numpy_dtype.get(dtype._type, None)
1043
+ if self.dtype is None:
1044
+ raise ValueError(f'Unsupported dtype: {dtype}')
1045
+ assert self.dtype in ARRAY_SUPPORTED_NUMPY_DTYPES
1046
+ else:
1047
+ raise ValueError(f'Unsupported dtype: {dtype}')
868
1048
 
869
1049
  def copy(self, nullable: bool) -> ColumnType:
870
- return ArrayType(self.shape, self.pxt_dtype, nullable=nullable)
1050
+ return ArrayType(self.shape, self.dtype, nullable=nullable)
871
1051
 
872
1052
  def matches(self, other: ColumnType) -> bool:
873
1053
  return isinstance(other, ArrayType) and self.shape == other.shape and self.dtype == other.dtype
@@ -875,7 +1055,7 @@ class ArrayType(ColumnType):
875
1055
  def __hash__(self) -> int:
876
1056
  return hash((self._type, self.nullable, self.shape, self.dtype))
877
1057
 
878
- def supertype(self, other: ColumnType) -> Optional[ArrayType]:
1058
+ def supertype(self, other: ColumnType, for_inference: bool = False) -> ArrayType | None:
879
1059
  basic_supertype = super().supertype(other)
880
1060
  if basic_supertype is not None:
881
1061
  assert isinstance(basic_supertype, ArrayType)
@@ -884,162 +1064,121 @@ class ArrayType(ColumnType):
884
1064
  if not isinstance(other, ArrayType):
885
1065
  return None
886
1066
 
887
- super_dtype = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
888
- if super_dtype is None:
889
- # if the dtypes are incompatible, then the supertype is a fully general array
1067
+ # Supertype has dtype only if dtypes are identical. We can change this behavior to consider casting rules or
1068
+ # something else if there's demand for it.
1069
+ if self.dtype != other.dtype:
890
1070
  return ArrayType(nullable=(self.nullable or other.nullable))
891
- super_shape: Optional[tuple[Optional[int], ...]]
1071
+ super_dtype = self.dtype
1072
+
1073
+ # Determine the shape of the supertype
1074
+ super_shape: tuple[int | None, ...] | None
892
1075
  if self.shape is None or other.shape is None or len(self.shape) != len(other.shape):
893
1076
  super_shape = None
894
1077
  else:
895
1078
  super_shape = tuple(n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape))
896
- return ArrayType(super_shape, self.make_type(super_dtype), nullable=(self.nullable or other.nullable))
1079
+ return ArrayType(super_shape, super_dtype, nullable=(self.nullable or other.nullable))
897
1080
 
898
1081
  def _as_dict(self) -> dict:
899
1082
  result = super()._as_dict()
900
1083
  shape_as_list = None if self.shape is None else list(self.shape)
901
- dtype_value = None if self.dtype is None else self.dtype.value
902
- result.update(shape=shape_as_list, dtype=dtype_value)
1084
+ result.update(shape=shape_as_list)
1085
+
1086
+ if self.dtype is None:
1087
+ result.update(numpy_dtype=None)
1088
+ elif self.dtype == np.str_:
1089
+ # str(np.str_) would be something like '<U', but since we don't support the string specifications, just use
1090
+ # 'str' instead to avoid confusion.
1091
+ result.update(numpy_dtype='str')
1092
+ else:
1093
+ result.update(numpy_dtype=str(self.dtype))
903
1094
  return result
904
1095
 
905
1096
  def _to_base_str(self) -> str:
906
1097
  if self.shape is None and self.dtype is None:
907
1098
  return 'Array'
908
1099
  if self.shape is None:
909
- return f'Array[{self.pxt_dtype}]'
1100
+ return f'Array[{self.dtype.name}]'
910
1101
  assert self.dtype is not None
911
- return f'Array[{self.shape}, {self.pxt_dtype}]'
1102
+ return f'Array[{self.shape}, {self.dtype.name}]'
912
1103
 
913
1104
  @classmethod
914
1105
  def _from_dict(cls, d: dict) -> ColumnType:
1106
+ assert 'numpy_dtype' in d
1107
+ dtype = None if d['numpy_dtype'] is None else np.dtype(d['numpy_dtype'])
915
1108
  assert 'shape' in d
916
- assert 'dtype' in d
917
1109
  shape = None if d['shape'] is None else tuple(d['shape'])
918
- dtype = None if d['dtype'] is None else cls.make_type(cls.Type(d['dtype']))
919
1110
  return cls(shape, dtype, nullable=d['nullable'])
920
1111
 
921
1112
  @classmethod
922
- def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> Optional[ColumnType]:
923
- """
924
- Return pixeltable type corresponding to a given simple numpy dtype
925
- """
926
- if np.issubdtype(dtype, np.integer):
927
- return IntType(nullable=nullable)
928
-
929
- if np.issubdtype(dtype, np.floating):
930
- return FloatType(nullable=nullable)
931
-
932
- if dtype == np.bool_:
933
- return BoolType(nullable=nullable)
934
-
935
- if np.issubdtype(dtype, np.str_):
936
- return StringType(nullable=nullable)
937
-
938
- if np.issubdtype(dtype, np.character):
939
- return StringType(nullable=nullable)
940
-
941
- if np.issubdtype(dtype, np.datetime64):
942
- unit, _ = np.datetime_data(dtype)
943
- if unit in ['D', 'M', 'Y']:
944
- return DateType(nullable=nullable)
945
- else:
946
- return TimestampType(nullable=nullable)
947
-
948
- return None
949
-
950
- @classmethod
951
- def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
952
- # determine our dtype
1113
+ def from_literal(cls, val: np.ndarray, nullable: bool = False) -> ArrayType | None:
953
1114
  assert isinstance(val, np.ndarray)
954
- pxttype: Optional[ColumnType] = cls.from_np_dtype(val.dtype, nullable)
955
- if pxttype is None:
1115
+ if val.dtype.type not in ARRAY_SUPPORTED_NUMPY_DTYPES:
956
1116
  return None
957
- return cls(val.shape, dtype=pxttype, nullable=nullable)
1117
+ return cls(val.shape, dtype=val.dtype, nullable=nullable)
958
1118
 
959
- def is_valid_literal(self, val: np.ndarray) -> bool:
1119
+ def _to_json_schema(self) -> dict[str, Any]:
1120
+ schema: dict[str, Any] = {'type': 'array'}
1121
+ if self.dtype == np.str_:
1122
+ schema.update({'items': {'type': 'str'}})
1123
+ elif self.dtype is not None:
1124
+ schema.update({'items': {'type': str(self.dtype)}})
1125
+ return schema
1126
+
1127
+ def _validate_literal(self, val: Any) -> None:
960
1128
  if not isinstance(val, np.ndarray):
961
- return False
1129
+ raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
962
1130
 
963
- # If a dtype is specified, check that there's a match
964
- if self.dtype is not None and not np.issubdtype(val.dtype, self.numpy_dtype()):
965
- return False
1131
+ # If column type has a dtype, check if it matches
1132
+ if self.dtype == np.str_:
1133
+ if val.dtype.type != np.str_:
1134
+ raise TypeError(f'Expected numpy.ndarray of dtype {self.dtype}, got numpy.ndarray of dtype {val.dtype}')
1135
+ elif self.dtype is not None and self.dtype != val.dtype:
1136
+ raise TypeError(f'Expected numpy.ndarray of dtype {self.dtype}, got numpy.ndarray of dtype {val.dtype}')
966
1137
 
967
- # If no dtype is specified, we still need to check that the dtype is one of the supported types
968
- if self.dtype is None and not any(
969
- np.issubdtype(val.dtype, ndtype) for ndtype in [np.int64, np.float32, np.bool_, np.str_]
970
- ):
971
- return False
1138
+ # Check that the dtype is one of the supported types
1139
+ if val.dtype.type != np.str_ and val.dtype not in ARRAY_SUPPORTED_NUMPY_DTYPES:
1140
+ raise TypeError(f'Unsupported dtype {val.dtype}')
972
1141
 
973
1142
  # If a shape is specified, check that there's a match
974
1143
  if self.shape is not None:
975
1144
  if len(val.shape) != len(self.shape):
976
- return False
1145
+ raise TypeError(
1146
+ f'Expected numpy.ndarray({self.shape}, dtype={self.dtype}), '
1147
+ f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
1148
+ )
977
1149
  # check that the shapes are compatible
978
1150
  for n1, n2 in zip(val.shape, self.shape):
979
1151
  assert n1 is not None # `val` must have a concrete shape
980
1152
  if n2 is None:
981
1153
  continue # wildcard
982
1154
  if n1 != n2:
983
- return False
984
-
985
- return True
986
-
987
- def _to_json_schema(self) -> dict[str, Any]:
988
- return {'type': 'array', 'items': self.pxt_dtype._to_json_schema()}
989
-
990
- def _validate_literal(self, val: Any) -> None:
991
- if not isinstance(val, np.ndarray):
992
- raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
993
- if not self.is_valid_literal(val):
994
- if self.shape is not None:
995
- raise TypeError(
996
- f'Expected numpy.ndarray({self.shape}, dtype={self.numpy_dtype()}), '
997
- f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
998
- )
999
- elif self.dtype is not None:
1000
- raise TypeError(
1001
- f'Expected numpy.ndarray of dtype {self.numpy_dtype()}, got numpy.ndarray of dtype {val.dtype}'
1002
- )
1003
- else:
1004
- raise TypeError(f'Unsupported dtype for numpy.ndarray: {val.dtype}')
1155
+ raise TypeError(
1156
+ f'Expected numpy.ndarray({self.shape}, dtype={self.dtype}), '
1157
+ f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
1158
+ )
1005
1159
 
1006
1160
  def _create_literal(self, val: Any) -> Any:
1007
1161
  if isinstance(val, (list, tuple)):
1008
1162
  # map python float to whichever numpy float is
1009
1163
  # declared for this type, rather than assume float64
1010
- return np.array(val, dtype=self.numpy_dtype())
1164
+ return np.array(val, dtype=self.dtype)
1011
1165
  return val
1012
1166
 
1013
1167
  @classmethod
1014
1168
  def to_sa_type(cls) -> sql.types.TypeEngine:
1015
1169
  return sql.LargeBinary()
1016
1170
 
1017
- def numpy_dtype(self) -> Optional[np.dtype]:
1018
- if self.dtype is None:
1019
- return None
1020
- if self.dtype == self.Type.INT:
1021
- return np.dtype(np.int64)
1022
- if self.dtype == self.Type.FLOAT:
1023
- return np.dtype(np.float32)
1024
- if self.dtype == self.Type.BOOL:
1025
- return np.dtype(np.bool_)
1026
- if self.dtype == self.Type.STRING:
1027
- return np.dtype(np.str_)
1028
- raise AssertionError(self.dtype)
1029
-
1030
1171
 
1031
1172
  class ImageType(ColumnType):
1032
1173
  def __init__(
1033
1174
  self,
1034
- width: Optional[int] = None,
1035
- height: Optional[int] = None,
1036
- size: Optional[tuple[int, int]] = None,
1037
- mode: Optional[str] = None,
1175
+ width: int | None = None,
1176
+ height: int | None = None,
1177
+ size: tuple[int, int] | None = None,
1178
+ mode: str | None = None,
1038
1179
  nullable: bool = False,
1039
1180
  ):
1040
- """
1041
- TODO: does it make sense to specify only width or height?
1042
- """
1181
+ # TODO: does it make sense to specify only width or height?
1043
1182
  super().__init__(self.Type.IMAGE, nullable=nullable)
1044
1183
  assert not (width is not None and size is not None)
1045
1184
  assert not (height is not None and size is not None)
@@ -1077,7 +1216,7 @@ class ImageType(ColumnType):
1077
1216
  def __hash__(self) -> int:
1078
1217
  return hash((self._type, self.nullable, self.size, self.mode))
1079
1218
 
1080
- def supertype(self, other: ColumnType) -> Optional[ImageType]:
1219
+ def supertype(self, other: ColumnType, for_inference: bool = False) -> ImageType | None:
1081
1220
  basic_supertype = super().supertype(other)
1082
1221
  if basic_supertype is not None:
1083
1222
  assert isinstance(basic_supertype, ImageType)
@@ -1092,7 +1231,7 @@ class ImageType(ColumnType):
1092
1231
  return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
1093
1232
 
1094
1233
  @property
1095
- def size(self) -> Optional[tuple[int, int]]:
1234
+ def size(self) -> tuple[int, int] | None:
1096
1235
  if self.width is None or self.height is None:
1097
1236
  return None
1098
1237
  return (self.width, self.height)
@@ -1123,8 +1262,8 @@ class ImageType(ColumnType):
1123
1262
  img.load()
1124
1263
  return img
1125
1264
  except Exception as exc:
1126
- errormsg_val = val if len(val) < 50 else val[:50] + '...'
1127
- raise excs.Error(f'data URL could not be decoded into a valid image: {errormsg_val}') from exc
1265
+ error_msg_val = val if len(val) < 50 else val[:50] + '...'
1266
+ raise excs.Error(f'data URL could not be decoded into a valid image: {error_msg_val}') from exc
1128
1267
  return val
1129
1268
 
1130
1269
  def _validate_literal(self, val: Any) -> None:
@@ -1211,7 +1350,7 @@ class DocumentType(ColumnType):
1211
1350
  TXT = 4
1212
1351
 
1213
1352
  @classmethod
1214
- def from_extension(cls, ext: str) -> Optional['DocumentType.DocumentFormat']:
1353
+ def from_extension(cls, ext: str) -> 'DocumentType.DocumentFormat' | None:
1215
1354
  if ext in ('.htm', '.html'):
1216
1355
  return cls.HTML
1217
1356
  if ext == '.md':
@@ -1224,7 +1363,7 @@ class DocumentType(ColumnType):
1224
1363
  return cls.TXT
1225
1364
  return None
1226
1365
 
1227
- def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
1366
+ def __init__(self, nullable: bool = False, doc_formats: str | None = None):
1228
1367
  super().__init__(self.Type.DOCUMENT, nullable=nullable)
1229
1368
  self.doc_formats = doc_formats
1230
1369
  if doc_formats is not None:
@@ -1278,6 +1417,8 @@ Float = typing.Annotated[float, FloatType(nullable=False)]
1278
1417
  Bool = typing.Annotated[bool, BoolType(nullable=False)]
1279
1418
  Timestamp = typing.Annotated[datetime.datetime, TimestampType(nullable=False)]
1280
1419
  Date = typing.Annotated[datetime.date, DateType(nullable=False)]
1420
+ UUID = typing.Annotated[uuid.UUID, UUIDType(nullable=False)]
1421
+ Binary = typing.Annotated[bytes, BinaryType(nullable=False)]
1281
1422
 
1282
1423
 
1283
1424
  class _PxtType:
@@ -1320,14 +1461,17 @@ class Json(_PxtType):
1320
1461
  class Array(np.ndarray, _PxtType):
1321
1462
  def __class_getitem__(cls, item: Any) -> _AnnotatedAlias:
1322
1463
  """
1323
- `item` (the type subscript) must be a tuple with exactly two elements (in any order):
1324
- - A tuple of `Optional[int]`s, specifying the shape of the array
1325
- - A type, specifying the dtype of the array
1326
- Example: Array[(3, None, 2), pxt.Float]
1464
+ `item` (the type subscript) must be a tuple with at most two elements (in any order):
1465
+ - An optional tuple of `int | None`s, specifying the shape of the array
1466
+ - A type (`ColumnType | np.dtype`), specifying the dtype of the array
1467
+ Examples:
1468
+ * Array[(3, None, 2), pxt.Float]
1469
+ * Array[(4, 4), np.uint8]
1470
+ * Array[np.bool]
1327
1471
  """
1328
1472
  params = item if isinstance(item, tuple) else (item,)
1329
- shape: Optional[tuple] = None
1330
- dtype: Optional[ColumnType] = None
1473
+ shape: tuple | None = None
1474
+ dtype: ColumnType | np.dtype | None = None
1331
1475
  if not any(isinstance(param, (type, _AnnotatedAlias)) for param in params):
1332
1476
  raise TypeError('Array type parameter must include a dtype.')
1333
1477
  for param in params:
@@ -1340,7 +1484,10 @@ class Array(np.ndarray, _PxtType):
1340
1484
  elif isinstance(param, (type, _AnnotatedAlias)):
1341
1485
  if dtype is not None:
1342
1486
  raise TypeError(f'Duplicate Array type parameter: {param}')
1343
- dtype = ColumnType.normalize_type(param, allow_builtin_types=False)
1487
+ if isinstance(param, type) and param in ARRAY_SUPPORTED_NUMPY_DTYPES:
1488
+ dtype = np.dtype(param)
1489
+ else:
1490
+ dtype = ColumnType.normalize_type(param, allow_builtin_types=False)
1344
1491
  else:
1345
1492
  raise TypeError(f'Invalid Array type parameter: {param}')
1346
1493
  return typing.Annotated[np.ndarray, ArrayType(shape=shape, dtype=dtype, nullable=False)]
@@ -1367,8 +1514,8 @@ class Image(PIL.Image.Image, _PxtType):
1367
1514
  else:
1368
1515
  # Not a tuple (single arg)
1369
1516
  params = (item,)
1370
- size: Optional[tuple] = None
1371
- mode: Optional[str] = None
1517
+ size: tuple | None = None
1518
+ mode: str | None = None
1372
1519
  for param in params:
1373
1520
  if isinstance(param, tuple):
1374
1521
  if (
@@ -1413,4 +1560,19 @@ class Document(str, _PxtType):
1413
1560
  return DocumentType(nullable=nullable)
1414
1561
 
1415
1562
 
1416
- ALL_PIXELTABLE_TYPES = (String, Bool, Int, Float, Timestamp, Json, Array, Image, Video, Audio, Document)
1563
+ ALL_PIXELTABLE_TYPES = (
1564
+ String,
1565
+ Bool,
1566
+ Int,
1567
+ Float,
1568
+ Timestamp,
1569
+ Json,
1570
+ Array,
1571
+ Image,
1572
+ Video,
1573
+ Audio,
1574
+ Document,
1575
+ Date,
1576
+ UUID,
1577
+ Binary,
1578
+ )