pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +590 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +359 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +195 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +256 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +122 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +418 -182
- pixeltable/tests/conftest.py +146 -88
- pixeltable/tests/functions/test_fireworks.py +42 -0
- pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable/tests/functions/test_huggingface.py +158 -0
- pixeltable/tests/functions/test_openai.py +152 -0
- pixeltable/tests/functions/test_together.py +111 -0
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +370 -0
- pixeltable/tests/test_dataframe.py +439 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +120 -0
- pixeltable/tests/test_exprs.py +592 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1195 -263
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +151 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +320 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +445 -124
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/hf_datasets.py +157 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +167 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.4.dist-info/LICENSE +18 -0
- pixeltable-0.2.4.dist-info/METADATA +127 -0
- pixeltable-0.2.4.dist-info/RECORD +132 -0
- {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_functions.py +0 -11
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.0.dist-info/METADATA +0 -34
- pixeltable-0.1.0.dist-info/RECORD +0 -36
pixeltable/type_system.py
CHANGED
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import abc
|
|
2
|
-
from typing import Any, Optional, Tuple, Dict, Callable, List, Union
|
|
3
|
-
import enum
|
|
4
4
|
import datetime
|
|
5
|
+
import enum
|
|
5
6
|
import json
|
|
7
|
+
import typing
|
|
8
|
+
import urllib.parse
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
|
|
6
11
|
|
|
7
|
-
import os
|
|
8
|
-
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
|
9
|
-
import tensorflow as tf
|
|
10
12
|
import PIL.Image
|
|
13
|
+
import av
|
|
14
|
+
import numpy as np
|
|
11
15
|
import sqlalchemy as sql
|
|
12
16
|
|
|
17
|
+
from pixeltable import exceptions as excs
|
|
13
18
|
|
|
14
19
|
|
|
15
20
|
class ColumnType:
|
|
@@ -24,25 +29,16 @@ class ColumnType:
|
|
|
24
29
|
ARRAY = 6
|
|
25
30
|
IMAGE = 7
|
|
26
31
|
VIDEO = 8
|
|
32
|
+
AUDIO = 9
|
|
33
|
+
DOCUMENT = 10
|
|
27
34
|
|
|
28
35
|
# exprs that don't evaluate to a computable value in Pixeltable, such as an Image member function
|
|
29
|
-
INVALID =
|
|
30
|
-
|
|
31
|
-
def to_tf(self) -> tf.dtypes.DType:
|
|
32
|
-
if self == self.STRING:
|
|
33
|
-
return tf.string
|
|
34
|
-
if self == self.INT:
|
|
35
|
-
return tf.int64
|
|
36
|
-
if self == self.FLOAT:
|
|
37
|
-
return tf.float32
|
|
38
|
-
if self == self.BOOL:
|
|
39
|
-
return tf.bool
|
|
40
|
-
raise TypeError(f'Cannot convert {self} to TensorFlow')
|
|
36
|
+
INVALID = 255
|
|
41
37
|
|
|
42
38
|
@classmethod
|
|
43
39
|
def supertype(
|
|
44
40
|
cls, type1: 'Type', type2: 'Type',
|
|
45
|
-
# we need to pass this in because we can't easily
|
|
41
|
+
# we need to pass this in because we can't easily append it as a class member
|
|
46
42
|
common_supertypes: Dict[Tuple['Type', 'Type'], 'Type']
|
|
47
43
|
) -> Optional['Type']:
|
|
48
44
|
if type1 == type2:
|
|
@@ -82,8 +78,9 @@ class ColumnType:
|
|
|
82
78
|
(Type.INT, Type.FLOAT): Type.FLOAT,
|
|
83
79
|
}
|
|
84
80
|
|
|
85
|
-
def __init__(self, t: Type):
|
|
81
|
+
def __init__(self, t: Type, nullable: bool = False):
|
|
86
82
|
self._type = t
|
|
83
|
+
self.nullable = nullable
|
|
87
84
|
|
|
88
85
|
@property
|
|
89
86
|
def type_enum(self) -> Type:
|
|
@@ -93,7 +90,7 @@ class ColumnType:
|
|
|
93
90
|
return json.dumps(self.as_dict())
|
|
94
91
|
|
|
95
92
|
@classmethod
|
|
96
|
-
def serialize_list(cls, type_list: List[
|
|
93
|
+
def serialize_list(cls, type_list: List[ColumnType]) -> str:
|
|
97
94
|
return json.dumps([t.as_dict() for t in type_list])
|
|
98
95
|
|
|
99
96
|
def as_dict(self) -> Dict:
|
|
@@ -103,33 +100,34 @@ class ColumnType:
|
|
|
103
100
|
}
|
|
104
101
|
|
|
105
102
|
def _as_dict(self) -> Dict:
|
|
106
|
-
return {}
|
|
103
|
+
return {'nullable': self.nullable}
|
|
107
104
|
|
|
108
105
|
@classmethod
|
|
109
|
-
def deserialize(cls, type_str: str) ->
|
|
106
|
+
def deserialize(cls, type_str: str) -> ColumnType:
|
|
110
107
|
type_dict = json.loads(type_str)
|
|
111
108
|
return cls.from_dict(type_dict)
|
|
112
109
|
|
|
113
110
|
@classmethod
|
|
114
|
-
def deserialize_list(cls, type_list_str: str) -> List[
|
|
111
|
+
def deserialize_list(cls, type_list_str: str) -> List[ColumnType]:
|
|
115
112
|
type_dict_list = json.loads(type_list_str)
|
|
116
113
|
return [cls.from_dict(type_dict) for type_dict in type_dict_list]
|
|
117
114
|
|
|
118
115
|
@classmethod
|
|
119
|
-
def from_dict(cls, type_dict: Dict) ->
|
|
116
|
+
def from_dict(cls, type_dict: Dict) -> ColumnType:
|
|
120
117
|
assert '_classname' in type_dict
|
|
121
118
|
type_class = globals()[type_dict['_classname']]
|
|
122
119
|
return type_class._from_dict(type_dict)
|
|
123
120
|
|
|
124
121
|
@classmethod
|
|
125
|
-
def _from_dict(cls, d: Dict) ->
|
|
122
|
+
def _from_dict(cls, d: Dict) -> ColumnType:
|
|
126
123
|
"""
|
|
127
|
-
Default implementation: simply invoke c'tor
|
|
124
|
+
Default implementation: simply invoke c'tor
|
|
128
125
|
"""
|
|
129
|
-
|
|
126
|
+
assert 'nullable' in d
|
|
127
|
+
return cls(nullable=d['nullable'])
|
|
130
128
|
|
|
131
129
|
@classmethod
|
|
132
|
-
def make_type(cls, t: Type) ->
|
|
130
|
+
def make_type(cls, t: Type) -> ColumnType:
|
|
133
131
|
assert t != cls.Type.INVALID and t != cls.Type.ARRAY
|
|
134
132
|
if t == cls.Type.STRING:
|
|
135
133
|
return StringType()
|
|
@@ -147,21 +145,44 @@ class ColumnType:
|
|
|
147
145
|
return ImageType()
|
|
148
146
|
if t == cls.Type.VIDEO:
|
|
149
147
|
return VideoType()
|
|
148
|
+
if t == cls.Type.AUDIO:
|
|
149
|
+
return AudioType()
|
|
150
|
+
if t == cls.Type.DOCUMENT:
|
|
151
|
+
return AudioType()
|
|
150
152
|
|
|
151
153
|
def __str__(self) -> str:
|
|
152
154
|
return self._type.name.lower()
|
|
153
155
|
|
|
154
156
|
def __eq__(self, other: object) -> bool:
|
|
157
|
+
return self.matches(other) and self.nullable == other.nullable
|
|
158
|
+
|
|
159
|
+
def is_supertype_of(self, other: ColumnType) -> bool:
|
|
160
|
+
if type(self) != type(other):
|
|
161
|
+
return False
|
|
162
|
+
if self.matches(other):
|
|
163
|
+
return True
|
|
164
|
+
return self._is_supertype_of(other)
|
|
165
|
+
|
|
166
|
+
@abc.abstractmethod
|
|
167
|
+
def _is_supertype_of(self, other: ColumnType) -> bool:
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
def matches(self, other: object) -> bool:
|
|
171
|
+
"""Two types match if they're equal, aside from nullability"""
|
|
172
|
+
if not isinstance(other, ColumnType):
|
|
173
|
+
pass
|
|
155
174
|
assert isinstance(other, ColumnType)
|
|
156
|
-
if
|
|
175
|
+
if type(self) != type(other):
|
|
157
176
|
return False
|
|
158
177
|
for member_var in vars(self).keys():
|
|
178
|
+
if member_var == 'nullable':
|
|
179
|
+
continue
|
|
159
180
|
if getattr(self, member_var) != getattr(other, member_var):
|
|
160
181
|
return False
|
|
161
182
|
return True
|
|
162
183
|
|
|
163
184
|
@classmethod
|
|
164
|
-
def supertype(cls, type1:
|
|
185
|
+
def supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
|
|
165
186
|
if type1 == type2:
|
|
166
187
|
return type1
|
|
167
188
|
|
|
@@ -183,16 +204,15 @@ class ColumnType:
|
|
|
183
204
|
|
|
184
205
|
@classmethod
|
|
185
206
|
@abc.abstractmethod
|
|
186
|
-
def _supertype(cls, type1:
|
|
207
|
+
def _supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
|
|
187
208
|
"""
|
|
188
209
|
Class-specific implementation of determining the supertype. type1 and type2 are from the same subclass of
|
|
189
210
|
ColumnType.
|
|
190
211
|
"""
|
|
191
212
|
pass
|
|
192
213
|
|
|
193
|
-
|
|
194
214
|
@classmethod
|
|
195
|
-
def
|
|
215
|
+
def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
|
|
196
216
|
if isinstance(val, str):
|
|
197
217
|
return StringType()
|
|
198
218
|
if isinstance(val, int):
|
|
@@ -203,6 +223,104 @@ class ColumnType:
|
|
|
203
223
|
return BoolType()
|
|
204
224
|
if isinstance(val, datetime.datetime) or isinstance(val, datetime.date):
|
|
205
225
|
return TimestampType()
|
|
226
|
+
if isinstance(val, np.ndarray):
|
|
227
|
+
col_type = ArrayType.from_literal(val)
|
|
228
|
+
if col_type is not None:
|
|
229
|
+
return col_type
|
|
230
|
+
# this could still be json-serializable
|
|
231
|
+
if isinstance(val, dict) or isinstance(val, np.ndarray):
|
|
232
|
+
try:
|
|
233
|
+
JsonType().validate_literal(val)
|
|
234
|
+
return JsonType()
|
|
235
|
+
except TypeError:
|
|
236
|
+
return None
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def from_python_type(cls, t: type) -> Optional[ColumnType]:
|
|
242
|
+
if typing.get_origin(t) is typing.Union:
|
|
243
|
+
union_args = typing.get_args(t)
|
|
244
|
+
if union_args[1] is type(None):
|
|
245
|
+
# `t` is a type of the form Optional[T] (equivalently, Union[T, None]).
|
|
246
|
+
# We treat it as the underlying type but with nullable=True.
|
|
247
|
+
underlying = cls.from_python_type(union_args[0])
|
|
248
|
+
if underlying is not None:
|
|
249
|
+
underlying.nullable = True
|
|
250
|
+
return underlying
|
|
251
|
+
else:
|
|
252
|
+
# Discard type parameters to ensure that parameterized types such as `list[T]`
|
|
253
|
+
# are correctly mapped to Pixeltable types.
|
|
254
|
+
base = typing.get_origin(t)
|
|
255
|
+
if base is None:
|
|
256
|
+
# No type parameters; the base type is just `t` itself
|
|
257
|
+
base = t
|
|
258
|
+
if base is str:
|
|
259
|
+
return StringType()
|
|
260
|
+
if base is int:
|
|
261
|
+
return IntType()
|
|
262
|
+
if base is float:
|
|
263
|
+
return FloatType()
|
|
264
|
+
if base is bool:
|
|
265
|
+
return BoolType()
|
|
266
|
+
if base is datetime.date or base is datetime.datetime:
|
|
267
|
+
return TimestampType()
|
|
268
|
+
if issubclass(base, Sequence) or issubclass(base, Mapping):
|
|
269
|
+
return JsonType()
|
|
270
|
+
if issubclass(base, PIL.Image.Image):
|
|
271
|
+
return ImageType()
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
def validate_literal(self, val: Any) -> None:
|
|
275
|
+
"""Raise TypeError if val is not a valid literal for this type"""
|
|
276
|
+
if val is None:
|
|
277
|
+
if not self.nullable:
|
|
278
|
+
raise TypeError('Expected non-None value')
|
|
279
|
+
else:
|
|
280
|
+
return
|
|
281
|
+
self._validate_literal(val)
|
|
282
|
+
|
|
283
|
+
def validate_media(self, val: Any) -> None:
|
|
284
|
+
"""
|
|
285
|
+
Raise TypeError if val is not a path to a valid media file (or a valid in-memory byte sequence) for this type
|
|
286
|
+
"""
|
|
287
|
+
if self.is_media_type():
|
|
288
|
+
raise NotImplementedError(f'validate_media() not implemented for {self.__class__.__name__}')
|
|
289
|
+
|
|
290
|
+
def _validate_file_path(self, val: Any) -> None:
|
|
291
|
+
"""Raises TypeError if not a valid local file path or not a path/byte sequence"""
|
|
292
|
+
if isinstance(val, str):
|
|
293
|
+
parsed = urllib.parse.urlparse(val)
|
|
294
|
+
if parsed.scheme != '' and parsed.scheme != 'file':
|
|
295
|
+
return
|
|
296
|
+
path = Path(urllib.parse.unquote(parsed.path))
|
|
297
|
+
if not path.is_file():
|
|
298
|
+
raise TypeError(f'File not found: {str(path)}')
|
|
299
|
+
else:
|
|
300
|
+
if not isinstance(val, bytes):
|
|
301
|
+
raise TypeError(f'expected file path or bytes, got {type(val)}')
|
|
302
|
+
|
|
303
|
+
@abc.abstractmethod
|
|
304
|
+
def _validate_literal(self, val: Any) -> None:
|
|
305
|
+
"""Raise TypeError if val is not a valid literal for this type"""
|
|
306
|
+
pass
|
|
307
|
+
|
|
308
|
+
@abc.abstractmethod
|
|
309
|
+
def _create_literal(self, val : Any) -> Any:
|
|
310
|
+
"""Create a literal of this type from val, including any needed conversions.
|
|
311
|
+
val is guaranteed to be non-None"""
|
|
312
|
+
return val
|
|
313
|
+
|
|
314
|
+
def create_literal(self, val: Any) -> Any:
|
|
315
|
+
"""Create a literal of this type from val or raise TypeError if not possible"""
|
|
316
|
+
if val is not None:
|
|
317
|
+
val = self._create_literal(val)
|
|
318
|
+
|
|
319
|
+
self.validate_literal(val)
|
|
320
|
+
return val
|
|
321
|
+
|
|
322
|
+
def print_value(self, val: Any) -> str:
|
|
323
|
+
return str(val)
|
|
206
324
|
|
|
207
325
|
def is_scalar_type(self) -> bool:
|
|
208
326
|
return self._type in self.scalar_types
|
|
@@ -240,6 +358,16 @@ class ColumnType:
|
|
|
240
358
|
def is_video_type(self) -> bool:
|
|
241
359
|
return self._type == self.Type.VIDEO
|
|
242
360
|
|
|
361
|
+
def is_audio_type(self) -> bool:
|
|
362
|
+
return self._type == self.Type.AUDIO
|
|
363
|
+
|
|
364
|
+
def is_document_type(self) -> bool:
|
|
365
|
+
return self._type == self.Type.DOCUMENT
|
|
366
|
+
|
|
367
|
+
def is_media_type(self) -> bool:
|
|
368
|
+
# types that refer to external media files
|
|
369
|
+
return self.is_image_type() or self.is_video_type() or self.is_audio_type() or self.is_document_type()
|
|
370
|
+
|
|
243
371
|
@abc.abstractmethod
|
|
244
372
|
def to_sql(self) -> str:
|
|
245
373
|
"""
|
|
@@ -281,21 +409,17 @@ class ColumnType:
|
|
|
281
409
|
"""
|
|
282
410
|
assert False
|
|
283
411
|
|
|
284
|
-
def conversion_fn(self, target:
|
|
412
|
+
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
285
413
|
"""
|
|
286
414
|
Return Callable that converts a column value of type self to a value of type 'target'.
|
|
287
415
|
Returns None if conversion isn't possible.
|
|
288
416
|
"""
|
|
289
417
|
return None
|
|
290
418
|
|
|
291
|
-
@abc.abstractmethod
|
|
292
|
-
def to_tf(self) -> Union[tf.TypeSpec, Dict[str, tf.TypeSpec]]:
|
|
293
|
-
pass
|
|
294
|
-
|
|
295
419
|
|
|
296
420
|
class InvalidType(ColumnType):
|
|
297
|
-
def __init__(self):
|
|
298
|
-
super().__init__(self.Type.INVALID)
|
|
421
|
+
def __init__(self, nullable: bool = False):
|
|
422
|
+
super().__init__(self.Type.INVALID, nullable=nullable)
|
|
299
423
|
|
|
300
424
|
def to_sql(self) -> str:
|
|
301
425
|
assert False
|
|
@@ -303,13 +427,15 @@ class InvalidType(ColumnType):
|
|
|
303
427
|
def to_sa_type(self) -> Any:
|
|
304
428
|
assert False
|
|
305
429
|
|
|
306
|
-
def
|
|
307
|
-
|
|
430
|
+
def print_value(self, val: Any) -> str:
|
|
431
|
+
assert False
|
|
308
432
|
|
|
433
|
+
def _validate_literal(self, val: Any) -> None:
|
|
434
|
+
assert False
|
|
309
435
|
|
|
310
436
|
class StringType(ColumnType):
|
|
311
|
-
def __init__(self):
|
|
312
|
-
super().__init__(self.Type.STRING)
|
|
437
|
+
def __init__(self, nullable: bool = False):
|
|
438
|
+
super().__init__(self.Type.STRING, nullable=nullable)
|
|
313
439
|
|
|
314
440
|
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
315
441
|
if not target.is_timestamp_type():
|
|
@@ -328,28 +454,39 @@ class StringType(ColumnType):
|
|
|
328
454
|
def to_sa_type(self) -> str:
|
|
329
455
|
return sql.String
|
|
330
456
|
|
|
331
|
-
def
|
|
332
|
-
return
|
|
457
|
+
def print_value(self, val: Any) -> str:
|
|
458
|
+
return f"'{val}'"
|
|
459
|
+
|
|
460
|
+
def _validate_literal(self, val: Any) -> None:
|
|
461
|
+
if not isinstance(val, str):
|
|
462
|
+
raise TypeError(f'Expected string, got {val.__class__.__name__}')
|
|
333
463
|
|
|
464
|
+
def _create_literal(self, val: Any) -> Any:
|
|
465
|
+
# Replace null byte within python string with space to avoid issues with Postgres.
|
|
466
|
+
# Use a space to avoid merging words.
|
|
467
|
+
# TODO(orm): this will also be an issue with JSON inputs, would space still be a good replacement?
|
|
468
|
+
if isinstance(val, str) and '\x00' in val:
|
|
469
|
+
return val.replace('\x00', ' ')
|
|
470
|
+
return val
|
|
334
471
|
|
|
335
472
|
class IntType(ColumnType):
|
|
336
|
-
def __init__(self):
|
|
337
|
-
super().__init__(self.Type.INT)
|
|
473
|
+
def __init__(self, nullable: bool = False):
|
|
474
|
+
super().__init__(self.Type.INT, nullable=nullable)
|
|
338
475
|
|
|
339
476
|
def to_sql(self) -> str:
|
|
340
|
-
return '
|
|
477
|
+
return 'BIGINT'
|
|
341
478
|
|
|
342
479
|
def to_sa_type(self) -> str:
|
|
343
|
-
return sql.
|
|
480
|
+
return sql.BigInteger
|
|
344
481
|
|
|
345
|
-
def
|
|
346
|
-
|
|
347
|
-
|
|
482
|
+
def _validate_literal(self, val: Any) -> None:
|
|
483
|
+
if not isinstance(val, int):
|
|
484
|
+
raise TypeError(f'Expected int, got {val.__class__.__name__}')
|
|
348
485
|
|
|
349
486
|
|
|
350
487
|
class FloatType(ColumnType):
|
|
351
|
-
def __init__(self):
|
|
352
|
-
super().__init__(self.Type.FLOAT)
|
|
488
|
+
def __init__(self, nullable: bool = False):
|
|
489
|
+
super().__init__(self.Type.FLOAT, nullable=nullable)
|
|
353
490
|
|
|
354
491
|
def to_sql(self) -> str:
|
|
355
492
|
return 'FLOAT'
|
|
@@ -357,14 +494,18 @@ class FloatType(ColumnType):
|
|
|
357
494
|
def to_sa_type(self) -> str:
|
|
358
495
|
return sql.Float
|
|
359
496
|
|
|
360
|
-
def
|
|
361
|
-
|
|
362
|
-
|
|
497
|
+
def _validate_literal(self, val: Any) -> None:
|
|
498
|
+
if not isinstance(val, float):
|
|
499
|
+
raise TypeError(f'Expected float, got {val.__class__.__name__}')
|
|
363
500
|
|
|
501
|
+
def _create_literal(self, val: Any) -> Any:
|
|
502
|
+
if isinstance(val, int):
|
|
503
|
+
return float(val)
|
|
504
|
+
return val
|
|
364
505
|
|
|
365
506
|
class BoolType(ColumnType):
|
|
366
|
-
def __init__(self):
|
|
367
|
-
super().__init__(self.Type.BOOL)
|
|
507
|
+
def __init__(self, nullable: bool = False):
|
|
508
|
+
super().__init__(self.Type.BOOL, nullable=nullable)
|
|
368
509
|
|
|
369
510
|
def to_sql(self) -> str:
|
|
370
511
|
return 'BOOLEAN'
|
|
@@ -372,14 +513,18 @@ class BoolType(ColumnType):
|
|
|
372
513
|
def to_sa_type(self) -> str:
|
|
373
514
|
return sql.Boolean
|
|
374
515
|
|
|
375
|
-
def
|
|
376
|
-
|
|
377
|
-
|
|
516
|
+
def _validate_literal(self, val: Any) -> None:
|
|
517
|
+
if not isinstance(val, bool):
|
|
518
|
+
raise TypeError(f'Expected bool, got {val.__class__.__name__}')
|
|
378
519
|
|
|
520
|
+
def _create_literal(self, val: Any) -> Any:
|
|
521
|
+
if isinstance(val, int):
|
|
522
|
+
return bool(val)
|
|
523
|
+
return val
|
|
379
524
|
|
|
380
525
|
class TimestampType(ColumnType):
|
|
381
|
-
def __init__(self):
|
|
382
|
-
super().__init__(self.Type.TIMESTAMP)
|
|
526
|
+
def __init__(self, nullable: bool = False):
|
|
527
|
+
super().__init__(self.Type.TIMESTAMP, nullable=nullable)
|
|
383
528
|
|
|
384
529
|
def to_sql(self) -> str:
|
|
385
530
|
return 'INTEGER'
|
|
@@ -387,14 +532,19 @@ class TimestampType(ColumnType):
|
|
|
387
532
|
def to_sa_type(self) -> str:
|
|
388
533
|
return sql.TIMESTAMP
|
|
389
534
|
|
|
390
|
-
def
|
|
391
|
-
|
|
535
|
+
def _validate_literal(self, val: Any) -> None:
|
|
536
|
+
if not isinstance(val, datetime.datetime) and not isinstance(val, datetime.date):
|
|
537
|
+
raise TypeError(f'Expected datetime.datetime or datetime.date, got {val.__class__.__name__}')
|
|
392
538
|
|
|
539
|
+
def _create_literal(self, val: Any) -> Any:
|
|
540
|
+
if isinstance(val, str):
|
|
541
|
+
return datetime.datetime.fromisoformat(val)
|
|
542
|
+
return val
|
|
393
543
|
|
|
394
544
|
class JsonType(ColumnType):
|
|
395
545
|
# TODO: type_spec also needs to be able to express lists
|
|
396
|
-
def __init__(self, type_spec: Optional[Dict[str, ColumnType]] = None):
|
|
397
|
-
super().__init__(self.Type.JSON)
|
|
546
|
+
def __init__(self, type_spec: Optional[Dict[str, ColumnType]] = None, nullable: bool = False):
|
|
547
|
+
super().__init__(self.Type.JSON, nullable=nullable)
|
|
398
548
|
self.type_spec = type_spec
|
|
399
549
|
|
|
400
550
|
def _as_dict(self) -> Dict:
|
|
@@ -405,13 +555,13 @@ class JsonType(ColumnType):
|
|
|
405
555
|
return result
|
|
406
556
|
|
|
407
557
|
@classmethod
|
|
408
|
-
def _from_dict(cls, d: Dict) ->
|
|
558
|
+
def _from_dict(cls, d: Dict) -> ColumnType:
|
|
409
559
|
type_spec = None
|
|
410
560
|
if 'type_spec' in d:
|
|
411
561
|
type_spec = {
|
|
412
562
|
field_name: cls.deserialize(field_type_dict) for field_name, field_type_dict in d['type_spec'].items()
|
|
413
563
|
}
|
|
414
|
-
return cls(type_spec)
|
|
564
|
+
return cls(type_spec, nullable=d['nullable'])
|
|
415
565
|
|
|
416
566
|
def to_sql(self) -> str:
|
|
417
567
|
return 'JSONB'
|
|
@@ -419,20 +569,34 @@ class JsonType(ColumnType):
|
|
|
419
569
|
def to_sa_type(self) -> str:
|
|
420
570
|
return sql.dialects.postgresql.JSONB
|
|
421
571
|
|
|
422
|
-
def
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
572
|
+
def print_value(self, val: Any) -> str:
|
|
573
|
+
val_type = self.infer_literal_type(val)
|
|
574
|
+
if val_type == self:
|
|
575
|
+
return str(val)
|
|
576
|
+
return val_type.print_value(val)
|
|
577
|
+
|
|
578
|
+
def _validate_literal(self, val: Any) -> None:
|
|
579
|
+
if not isinstance(val, dict) and not isinstance(val, list):
|
|
580
|
+
raise TypeError(f'Expected dict or list, got {val.__class__.__name__}')
|
|
581
|
+
try:
|
|
582
|
+
_ = json.dumps(val)
|
|
583
|
+
except TypeError as e:
|
|
584
|
+
raise TypeError(f'Expected JSON-serializable object, got {val}')
|
|
585
|
+
|
|
586
|
+
def _create_literal(self, val: Any) -> Any:
|
|
587
|
+
if isinstance(val, tuple):
|
|
588
|
+
val = list(val)
|
|
589
|
+
return val
|
|
427
590
|
|
|
428
591
|
class ArrayType(ColumnType):
|
|
429
592
|
def __init__(
|
|
430
|
-
self, shape: Tuple[Union[int, None], ...], dtype: ColumnType
|
|
431
|
-
super().__init__(self.Type.ARRAY)
|
|
593
|
+
self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
594
|
+
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
432
595
|
self.shape = shape
|
|
433
|
-
|
|
596
|
+
assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
|
|
597
|
+
self.dtype = dtype._type
|
|
434
598
|
|
|
435
|
-
def _supertype(cls, type1:
|
|
599
|
+
def _supertype(cls, type1: ArrayType, type2: ArrayType) -> Optional[ArrayType]:
|
|
436
600
|
if len(type1.shape) != len(type2.shape):
|
|
437
601
|
return None
|
|
438
602
|
base_type = ColumnType.supertype(type1.dtype, type2.dtype)
|
|
@@ -447,53 +611,90 @@ class ArrayType(ColumnType):
|
|
|
447
611
|
return result
|
|
448
612
|
|
|
449
613
|
def __str__(self) -> str:
|
|
450
|
-
return f'{self.
|
|
614
|
+
return f'{self._type.name.lower()}({self.shape}, dtype={self.dtype.name})'
|
|
451
615
|
|
|
452
616
|
@classmethod
|
|
453
|
-
def _from_dict(cls, d: Dict) ->
|
|
617
|
+
def _from_dict(cls, d: Dict) -> ColumnType:
|
|
454
618
|
assert 'shape' in d
|
|
455
619
|
assert 'dtype' in d
|
|
456
620
|
shape = tuple(d['shape'])
|
|
457
|
-
dtype = cls.Type(d['dtype'])
|
|
458
|
-
return cls(shape, dtype)
|
|
621
|
+
dtype = cls.make_type(cls.Type(d['dtype']))
|
|
622
|
+
return cls(shape, dtype, nullable=d['nullable'])
|
|
623
|
+
|
|
624
|
+
@classmethod
|
|
625
|
+
def from_literal(cls, val: np.ndarray) -> Optional[ArrayType]:
|
|
626
|
+
# determine our dtype
|
|
627
|
+
assert isinstance(val, np.ndarray)
|
|
628
|
+
if np.issubdtype(val.dtype, np.integer):
|
|
629
|
+
dtype = IntType()
|
|
630
|
+
elif np.issubdtype(val.dtype, np.floating):
|
|
631
|
+
dtype = FloatType()
|
|
632
|
+
elif val.dtype == np.bool_:
|
|
633
|
+
dtype = BoolType()
|
|
634
|
+
elif val.dtype == np.str_:
|
|
635
|
+
dtype = StringType()
|
|
636
|
+
else:
|
|
637
|
+
return None
|
|
638
|
+
return cls(val.shape, dtype=dtype, nullable=True)
|
|
639
|
+
|
|
640
|
+
def is_valid_literal(self, val: np.ndarray) -> bool:
|
|
641
|
+
if not isinstance(val, np.ndarray):
|
|
642
|
+
return False
|
|
643
|
+
if len(val.shape) != len(self.shape):
|
|
644
|
+
return False
|
|
645
|
+
# check that the shapes are compatible
|
|
646
|
+
for n1, n2 in zip(val.shape, self.shape):
|
|
647
|
+
if n1 is None:
|
|
648
|
+
return False
|
|
649
|
+
if n2 is None:
|
|
650
|
+
# wildcard
|
|
651
|
+
continue
|
|
652
|
+
if n1 != n2:
|
|
653
|
+
return False
|
|
654
|
+
return val.dtype == self.numpy_dtype()
|
|
655
|
+
|
|
656
|
+
def _validate_literal(self, val: Any) -> None:
|
|
657
|
+
if not isinstance(val, np.ndarray):
|
|
658
|
+
raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
|
|
659
|
+
if not self.is_valid_literal(val):
|
|
660
|
+
raise TypeError((
|
|
661
|
+
f'Expected ndarray({self.shape}, dtype={self.numpy_dtype()}), '
|
|
662
|
+
f'got ndarray({val.shape}, dtype={val.dtype})'))
|
|
663
|
+
|
|
664
|
+
def _create_literal(self, val: Any) -> Any:
|
|
665
|
+
if isinstance(val, (list,tuple)):
|
|
666
|
+
# map python float to whichever numpy float is
|
|
667
|
+
# declared for this type, rather than assume float64
|
|
668
|
+
return np.array(val, dtype=self.numpy_dtype())
|
|
669
|
+
return val
|
|
459
670
|
|
|
460
671
|
def to_sql(self) -> str:
|
|
461
672
|
return 'BYTEA'
|
|
462
673
|
|
|
463
674
|
def to_sa_type(self) -> str:
|
|
464
|
-
return sql.
|
|
465
|
-
|
|
466
|
-
def
|
|
467
|
-
|
|
675
|
+
return sql.LargeBinary
|
|
676
|
+
|
|
677
|
+
def numpy_dtype(self) -> np.dtype:
|
|
678
|
+
if self.dtype == self.Type.INT:
|
|
679
|
+
return np.dtype(np.int64)
|
|
680
|
+
if self.dtype == self.Type.FLOAT:
|
|
681
|
+
return np.dtype(np.float32)
|
|
682
|
+
if self.dtype == self.Type.BOOL:
|
|
683
|
+
return np.dtype(np.bool_)
|
|
684
|
+
if self.dtype == self.Type.STRING:
|
|
685
|
+
return np.dtype(np.str_)
|
|
686
|
+
assert False
|
|
468
687
|
|
|
469
688
|
|
|
470
689
|
class ImageType(ColumnType):
|
|
471
|
-
@enum.unique
|
|
472
|
-
class Mode(enum.Enum):
|
|
473
|
-
L = 0,
|
|
474
|
-
RGB = 1
|
|
475
|
-
|
|
476
|
-
@classmethod
|
|
477
|
-
def from_pil(cls, pil_mode: str) -> 'Mode':
|
|
478
|
-
if pil_mode == 'L':
|
|
479
|
-
return cls.L
|
|
480
|
-
if pil_mode == 'RGB':
|
|
481
|
-
return cls.RGB
|
|
482
|
-
|
|
483
|
-
def to_pil(self) -> str:
|
|
484
|
-
return self.name
|
|
485
|
-
|
|
486
|
-
def num_channels(self) -> int:
|
|
487
|
-
return len(self.name)
|
|
488
|
-
|
|
489
690
|
def __init__(
|
|
490
691
|
self, width: Optional[int] = None, height: Optional[int] = None, size: Optional[Tuple[int, int]] = None,
|
|
491
|
-
mode: Optional[
|
|
692
|
+
mode: Optional[str] = None, nullable: bool = False
|
|
492
693
|
):
|
|
493
694
|
"""
|
|
494
695
|
TODO: does it make sense to specify only width or height?
|
|
495
696
|
"""
|
|
496
|
-
super().__init__(self.Type.IMAGE)
|
|
697
|
+
super().__init__(self.Type.IMAGE, nullable=nullable)
|
|
497
698
|
assert not(width is not None and size is not None)
|
|
498
699
|
assert not(height is not None and size is not None)
|
|
499
700
|
if size is not None:
|
|
@@ -504,22 +705,53 @@ class ImageType(ColumnType):
|
|
|
504
705
|
self.height = height
|
|
505
706
|
self.mode = mode
|
|
506
707
|
|
|
708
|
+
def __str__(self) -> str:
|
|
709
|
+
if self.width is not None or self.height is not None or self.mode is not None:
|
|
710
|
+
params_str = ''
|
|
711
|
+
if self.width is not None:
|
|
712
|
+
params_str = f'width={self.width}'
|
|
713
|
+
if self.height is not None:
|
|
714
|
+
if len(params_str) > 0:
|
|
715
|
+
params_str += ', '
|
|
716
|
+
params_str += f'height={self.height}'
|
|
717
|
+
if self.mode is not None:
|
|
718
|
+
if len(params_str) > 0:
|
|
719
|
+
params_str += ', '
|
|
720
|
+
params_str += f'mode={self.mode}'
|
|
721
|
+
params_str = f'({params_str})'
|
|
722
|
+
else:
|
|
723
|
+
params_str = ''
|
|
724
|
+
return f'{self._type.name.lower()}{params_str}'
|
|
725
|
+
|
|
726
|
+
def _is_supertype_of(self, other: ImageType) -> bool:
|
|
727
|
+
if self.mode != other.mode:
|
|
728
|
+
return False
|
|
729
|
+
if self.width is None and self.height is None:
|
|
730
|
+
return True
|
|
731
|
+
if self.width != other.width and self.height != other.height:
|
|
732
|
+
return False
|
|
733
|
+
|
|
734
|
+
@property
|
|
735
|
+
def size(self) -> Optional[Tuple[int, int]]:
|
|
736
|
+
if self.width is None or self.height is None:
|
|
737
|
+
return None
|
|
738
|
+
return (self.width, self.height)
|
|
739
|
+
|
|
507
740
|
@property
|
|
508
741
|
def num_channels(self) -> Optional[int]:
|
|
509
742
|
return None if self.mode is None else self.mode.num_channels()
|
|
510
743
|
|
|
511
744
|
def _as_dict(self) -> Dict:
|
|
512
745
|
result = super()._as_dict()
|
|
513
|
-
result.update(width=self.width, height=self.height, mode=self.mode
|
|
746
|
+
result.update(width=self.width, height=self.height, mode=self.mode)
|
|
514
747
|
return result
|
|
515
748
|
|
|
516
749
|
@classmethod
|
|
517
|
-
def _from_dict(cls, d: Dict) ->
|
|
750
|
+
def _from_dict(cls, d: Dict) -> ColumnType:
|
|
518
751
|
assert 'width' in d
|
|
519
752
|
assert 'height' in d
|
|
520
753
|
assert 'mode' in d
|
|
521
|
-
|
|
522
|
-
return cls(width=d['width'], height=d['height'], mode=cls.Mode(mode_val) if mode_val is not None else None)
|
|
754
|
+
return cls(width=d['width'], height=d['height'], mode=d['mode'], nullable=d['nullable'])
|
|
523
755
|
|
|
524
756
|
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
525
757
|
if not target.is_image_type():
|
|
@@ -547,21 +779,55 @@ class ImageType(ColumnType):
|
|
|
547
779
|
def to_sa_type(self) -> str:
|
|
548
780
|
return sql.String
|
|
549
781
|
|
|
550
|
-
def
|
|
551
|
-
|
|
782
|
+
def _validate_literal(self, val: Any) -> None:
|
|
783
|
+
if isinstance(val, PIL.Image.Image):
|
|
784
|
+
return
|
|
785
|
+
self._validate_file_path(val)
|
|
552
786
|
|
|
787
|
+
def validate_media(self, val: Any) -> None:
|
|
788
|
+
assert isinstance(val, str)
|
|
789
|
+
try:
|
|
790
|
+
_ = PIL.Image.open(val)
|
|
791
|
+
except PIL.UnidentifiedImageError:
|
|
792
|
+
raise excs.Error(f'Not a valid image: {val}') from None
|
|
553
793
|
|
|
554
794
|
class VideoType(ColumnType):
|
|
555
|
-
def __init__(self):
|
|
556
|
-
super().__init__(self.Type.VIDEO)
|
|
795
|
+
def __init__(self, nullable: bool = False):
|
|
796
|
+
super().__init__(self.Type.VIDEO, nullable=nullable)
|
|
557
797
|
|
|
558
|
-
def
|
|
559
|
-
|
|
560
|
-
return
|
|
798
|
+
def to_sql(self) -> str:
|
|
799
|
+
# stored as a file path
|
|
800
|
+
return 'VARCHAR'
|
|
561
801
|
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
802
|
+
def to_sa_type(self) -> str:
|
|
803
|
+
return sql.String
|
|
804
|
+
|
|
805
|
+
def _validate_literal(self, val: Any) -> None:
|
|
806
|
+
self._validate_file_path(val)
|
|
807
|
+
|
|
808
|
+
def validate_media(self, val: Any) -> None:
|
|
809
|
+
assert isinstance(val, str)
|
|
810
|
+
try:
|
|
811
|
+
with av.open(val, 'r') as fh:
|
|
812
|
+
if len(fh.streams.video) == 0:
|
|
813
|
+
raise excs.Error(f'Not a valid video: {val}')
|
|
814
|
+
# decode a few frames to make sure it's playable
|
|
815
|
+
# TODO: decode all frames? but that's very slow
|
|
816
|
+
num_decoded = 0
|
|
817
|
+
for frame in fh.decode(video=0):
|
|
818
|
+
_ = frame.to_image()
|
|
819
|
+
num_decoded += 1
|
|
820
|
+
if num_decoded == 10:
|
|
821
|
+
break
|
|
822
|
+
if num_decoded < 2:
|
|
823
|
+
# this is most likely an image file
|
|
824
|
+
raise excs.Error(f'Not a valid video: {val}')
|
|
825
|
+
except av.AVError:
|
|
826
|
+
raise excs.Error(f'Not a valid video: {val}') from None
|
|
827
|
+
|
|
828
|
+
class AudioType(ColumnType):
|
|
829
|
+
def __init__(self, nullable: bool = False):
|
|
830
|
+
super().__init__(self.Type.AUDIO, nullable=nullable)
|
|
565
831
|
|
|
566
832
|
def to_sql(self) -> str:
|
|
567
833
|
# stored as a file path
|
|
@@ -570,5 +836,60 @@ class VideoType(ColumnType):
|
|
|
570
836
|
def to_sa_type(self) -> str:
|
|
571
837
|
return sql.String
|
|
572
838
|
|
|
573
|
-
def
|
|
574
|
-
|
|
839
|
+
def _validate_literal(self, val: Any) -> None:
|
|
840
|
+
self._validate_file_path(val)
|
|
841
|
+
|
|
842
|
+
def validate_media(self, val: Any) -> None:
|
|
843
|
+
try:
|
|
844
|
+
with av.open(val) as container:
|
|
845
|
+
if len(container.streams.audio) == 0:
|
|
846
|
+
raise excs.Error(f'No audio stream in file: {val}')
|
|
847
|
+
audio_stream = container.streams.audio[0]
|
|
848
|
+
|
|
849
|
+
# decode everything to make sure it's playable
|
|
850
|
+
# TODO: is there some way to verify it's a playable audio file other than decoding all of it?
|
|
851
|
+
for packet in container.demux(audio_stream):
|
|
852
|
+
for _ in packet.decode():
|
|
853
|
+
pass
|
|
854
|
+
except av.AVError as e:
|
|
855
|
+
raise excs.Error(f'Not a valid audio file: {val}\n{e}') from None
|
|
856
|
+
|
|
857
|
+
class DocumentType(ColumnType):
|
|
858
|
+
@enum.unique
|
|
859
|
+
class DocumentFormat(enum.Enum):
|
|
860
|
+
HTML = 0
|
|
861
|
+
MD = 1
|
|
862
|
+
PDF = 2
|
|
863
|
+
|
|
864
|
+
def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
|
|
865
|
+
super().__init__(self.Type.DOCUMENT, nullable=nullable)
|
|
866
|
+
if doc_formats is not None:
|
|
867
|
+
type_strs = doc_formats.split(',')
|
|
868
|
+
for type_str in type_strs:
|
|
869
|
+
if not hasattr(self.DocumentFormat, type_str):
|
|
870
|
+
raise ValueError(f'Invalid document type: {type_str}')
|
|
871
|
+
self._doc_formats = [self.DocumentFormat[type_str.upper()] for type_str in type_strs]
|
|
872
|
+
else:
|
|
873
|
+
self._doc_formats = [t for t in self.DocumentFormat]
|
|
874
|
+
|
|
875
|
+
def to_sql(self) -> str:
|
|
876
|
+
# stored as a file path
|
|
877
|
+
return 'VARCHAR'
|
|
878
|
+
|
|
879
|
+
def to_sa_type(self) -> str:
|
|
880
|
+
return sql.String
|
|
881
|
+
|
|
882
|
+
def _validate_literal(self, val: Any) -> None:
|
|
883
|
+
self._validate_file_path(val)
|
|
884
|
+
|
|
885
|
+
def validate_media(self, val: Any) -> None:
|
|
886
|
+
assert isinstance(val, str)
|
|
887
|
+
from pixeltable.utils.documents import get_document_handle
|
|
888
|
+
with open(val, 'r', encoding='utf8') as fh:
|
|
889
|
+
try:
|
|
890
|
+
s = fh.read()
|
|
891
|
+
dh = get_document_handle(s)
|
|
892
|
+
if dh is None:
|
|
893
|
+
raise excs.Error(f'Not a recognized document format: {val}')
|
|
894
|
+
except Exception as e:
|
|
895
|
+
raise excs.Error(f'Not a recognized document format: {val}') from None
|