pixeltable 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +6 -3
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/globals.py +15 -6
- pixeltable/catalog/insertable_table.py +23 -8
- pixeltable/catalog/named_function.py +1 -1
- pixeltable/catalog/path_dict.py +4 -4
- pixeltable/catalog/schema_object.py +30 -18
- pixeltable/catalog/table.py +87 -104
- pixeltable/catalog/table_version.py +35 -24
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +15 -8
- pixeltable/dataframe.py +56 -56
- pixeltable/env.py +10 -9
- pixeltable/exec/__init__.py +3 -3
- pixeltable/exec/aggregation_node.py +3 -3
- pixeltable/exec/expr_eval_node.py +3 -3
- pixeltable/exec/in_memory_data_node.py +4 -4
- pixeltable/exec/sql_node.py +4 -1
- pixeltable/exprs/arithmetic_expr.py +41 -16
- pixeltable/exprs/array_slice.py +3 -4
- pixeltable/exprs/column_ref.py +20 -4
- pixeltable/exprs/comparison.py +11 -6
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/expr.py +88 -23
- pixeltable/exprs/function_call.py +12 -1
- pixeltable/exprs/globals.py +3 -1
- pixeltable/exprs/inline_array.py +4 -4
- pixeltable/exprs/json_path.py +36 -20
- pixeltable/exprs/row_builder.py +4 -4
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/functions/__init__.py +1 -2
- pixeltable/functions/audio.py +32 -0
- pixeltable/functions/huggingface.py +4 -4
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +46 -0
- pixeltable/functions/video.py +5 -1
- pixeltable/functions/{eval.py → vision.py} +166 -27
- pixeltable/globals.py +57 -28
- pixeltable/io/external_store.py +6 -6
- pixeltable/io/globals.py +13 -14
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/pandas.py +60 -19
- pixeltable/io/parquet.py +14 -14
- pixeltable/iterators/document.py +7 -7
- pixeltable/iterators/video.py +55 -23
- pixeltable/plan.py +58 -29
- pixeltable/store.py +97 -59
- pixeltable/tool/create_test_db_dump.py +17 -11
- pixeltable/type_system.py +155 -143
- pixeltable/utils/pytorch.py +12 -10
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/RECORD +56 -54
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
pixeltable/type_system.py
CHANGED
|
@@ -7,13 +7,12 @@ import json
|
|
|
7
7
|
import typing
|
|
8
8
|
import urllib.parse
|
|
9
9
|
import urllib.request
|
|
10
|
-
from copy import deepcopy
|
|
11
10
|
from pathlib import Path
|
|
12
|
-
from typing import Any,
|
|
11
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union
|
|
13
12
|
|
|
14
|
-
import
|
|
15
|
-
import av
|
|
13
|
+
import av # type: ignore
|
|
16
14
|
import numpy as np
|
|
15
|
+
import PIL.Image
|
|
17
16
|
import sqlalchemy as sql
|
|
18
17
|
|
|
19
18
|
from pixeltable import exceptions as excs
|
|
@@ -39,10 +38,10 @@ class ColumnType:
|
|
|
39
38
|
|
|
40
39
|
@classmethod
|
|
41
40
|
def supertype(
|
|
42
|
-
cls, type1: 'Type', type2: 'Type',
|
|
41
|
+
cls, type1: 'ColumnType.Type', type2: 'ColumnType.Type',
|
|
43
42
|
# we need to pass this in because we can't easily append it as a class member
|
|
44
|
-
common_supertypes: Dict[Tuple['Type', 'Type'], 'Type']
|
|
45
|
-
) -> Optional['Type']:
|
|
43
|
+
common_supertypes: Dict[Tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type']
|
|
44
|
+
) -> Optional['ColumnType.Type']:
|
|
46
45
|
if type1 == type2:
|
|
47
46
|
return type1
|
|
48
47
|
t = common_supertypes.get((type1, type2))
|
|
@@ -74,7 +73,7 @@ class ColumnType:
|
|
|
74
73
|
|
|
75
74
|
scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
|
|
76
75
|
numeric_types = {Type.INT, Type.FLOAT}
|
|
77
|
-
common_supertypes:
|
|
76
|
+
common_supertypes: dict[tuple[Type, Type], Type] = {
|
|
78
77
|
(Type.BOOL, Type.INT): Type.INT,
|
|
79
78
|
(Type.BOOL, Type.FLOAT): Type.FLOAT,
|
|
80
79
|
(Type.INT, Type.FLOAT): Type.FLOAT,
|
|
@@ -95,11 +94,12 @@ class ColumnType:
|
|
|
95
94
|
def serialize(self) -> str:
|
|
96
95
|
return json.dumps(self.as_dict())
|
|
97
96
|
|
|
98
|
-
def copy(self, nullable:
|
|
99
|
-
|
|
100
|
-
if nullable
|
|
101
|
-
|
|
102
|
-
|
|
97
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
98
|
+
# Default implementation calls unary initializer
|
|
99
|
+
if nullable == self.nullable:
|
|
100
|
+
return self
|
|
101
|
+
else:
|
|
102
|
+
return self.__class__(nullable=nullable)
|
|
103
103
|
|
|
104
104
|
@classmethod
|
|
105
105
|
def serialize_list(cls, type_list: List[ColumnType]) -> str:
|
|
@@ -136,7 +136,7 @@ class ColumnType:
|
|
|
136
136
|
Default implementation: simply invoke c'tor
|
|
137
137
|
"""
|
|
138
138
|
assert 'nullable' in d
|
|
139
|
-
return cls(nullable=d['nullable'])
|
|
139
|
+
return cls(nullable=d['nullable']) # type: ignore[call-arg]
|
|
140
140
|
|
|
141
141
|
@classmethod
|
|
142
142
|
def make_type(cls, t: Type) -> ColumnType:
|
|
@@ -166,91 +166,89 @@ class ColumnType:
|
|
|
166
166
|
return self._type.name.lower()
|
|
167
167
|
|
|
168
168
|
def __eq__(self, other: object) -> bool:
|
|
169
|
-
return self.matches(other) and self.nullable == other.nullable
|
|
170
|
-
|
|
171
|
-
def is_supertype_of(self, other: ColumnType) -> bool:
|
|
172
|
-
if type(self) != type(other):
|
|
173
|
-
return False
|
|
174
|
-
if self.matches(other):
|
|
175
|
-
return True
|
|
176
|
-
return self._is_supertype_of(other)
|
|
169
|
+
return isinstance(other, ColumnType) and self.matches(other) and self.nullable == other.nullable
|
|
177
170
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
171
|
+
def is_supertype_of(self, other: ColumnType, ignore_nullable: bool = False) -> bool:
|
|
172
|
+
if ignore_nullable:
|
|
173
|
+
supertype = self.supertype(other)
|
|
174
|
+
if supertype is None:
|
|
175
|
+
return False
|
|
176
|
+
return supertype.matches(self)
|
|
177
|
+
else:
|
|
178
|
+
return self.supertype(other) == self
|
|
181
179
|
|
|
182
|
-
def matches(self, other:
|
|
180
|
+
def matches(self, other: ColumnType) -> bool:
|
|
183
181
|
"""Two types match if they're equal, aside from nullability"""
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
assert isinstance(other, ColumnType), type(other)
|
|
187
|
-
if type(self) != type(other):
|
|
188
|
-
return False
|
|
189
|
-
for member_var in vars(self).keys():
|
|
190
|
-
if member_var == '_nullable':
|
|
191
|
-
continue
|
|
192
|
-
if getattr(self, member_var) != getattr(other, member_var):
|
|
193
|
-
return False
|
|
194
|
-
return True
|
|
182
|
+
# Default: just compare base types (this works for all types whose only parameter is nullable)
|
|
183
|
+
return self._type == other._type
|
|
195
184
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
185
|
+
def supertype(self, other: ColumnType) -> Optional[ColumnType]:
|
|
186
|
+
if self == other:
|
|
187
|
+
return self
|
|
188
|
+
if self.matches(other):
|
|
189
|
+
return self.copy(nullable=(self.nullable or other.nullable))
|
|
200
190
|
|
|
201
|
-
if
|
|
202
|
-
return
|
|
203
|
-
if
|
|
204
|
-
return
|
|
191
|
+
if self.is_invalid_type():
|
|
192
|
+
return other
|
|
193
|
+
if other.is_invalid_type():
|
|
194
|
+
return self
|
|
205
195
|
|
|
206
|
-
if
|
|
207
|
-
t =
|
|
196
|
+
if self.is_scalar_type() and other.is_scalar_type():
|
|
197
|
+
t = self.Type.supertype(self._type, other._type, self.common_supertypes)
|
|
208
198
|
if t is not None:
|
|
209
|
-
return
|
|
199
|
+
return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
|
|
210
200
|
return None
|
|
211
201
|
|
|
212
|
-
if type1._type == type2._type:
|
|
213
|
-
return cls._supertype(type1, type2)
|
|
214
|
-
|
|
215
202
|
return None
|
|
216
203
|
|
|
217
204
|
@classmethod
|
|
218
|
-
|
|
219
|
-
def _supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
|
|
220
|
-
"""
|
|
221
|
-
Class-specific implementation of determining the supertype. type1 and type2 are from the same subclass of
|
|
222
|
-
ColumnType.
|
|
223
|
-
"""
|
|
224
|
-
pass
|
|
225
|
-
|
|
226
|
-
@classmethod
|
|
227
|
-
def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
|
|
205
|
+
def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
|
|
228
206
|
if isinstance(val, str):
|
|
229
|
-
return StringType()
|
|
207
|
+
return StringType(nullable=nullable)
|
|
230
208
|
if isinstance(val, bool):
|
|
231
209
|
# We have to check bool before int, because isinstance(b, int) is True if b is a Python bool
|
|
232
|
-
return BoolType()
|
|
210
|
+
return BoolType(nullable=nullable)
|
|
233
211
|
if isinstance(val, int):
|
|
234
|
-
return IntType()
|
|
212
|
+
return IntType(nullable=nullable)
|
|
235
213
|
if isinstance(val, float):
|
|
236
|
-
return FloatType()
|
|
214
|
+
return FloatType(nullable=nullable)
|
|
237
215
|
if isinstance(val, datetime.datetime):
|
|
238
|
-
return TimestampType()
|
|
216
|
+
return TimestampType(nullable=nullable)
|
|
239
217
|
if isinstance(val, PIL.Image.Image):
|
|
240
|
-
return ImageType(width=val.width, height=val.height, mode=val.mode)
|
|
218
|
+
return ImageType(width=val.width, height=val.height, mode=val.mode, nullable=nullable)
|
|
241
219
|
if isinstance(val, np.ndarray):
|
|
242
|
-
col_type = ArrayType.from_literal(val)
|
|
220
|
+
col_type = ArrayType.from_literal(val, nullable=nullable)
|
|
243
221
|
if col_type is not None:
|
|
244
222
|
return col_type
|
|
245
223
|
# this could still be json-serializable
|
|
246
224
|
if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray):
|
|
247
225
|
try:
|
|
248
226
|
JsonType().validate_literal(val)
|
|
249
|
-
return JsonType()
|
|
227
|
+
return JsonType(nullable=nullable)
|
|
250
228
|
except TypeError:
|
|
251
229
|
return None
|
|
252
230
|
return None
|
|
253
231
|
|
|
232
|
+
@classmethod
|
|
233
|
+
def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
|
|
234
|
+
"""
|
|
235
|
+
Returns the most specific type that is a supertype of all literals in `vals`. If no such type
|
|
236
|
+
exists, returns None.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
vals: A collection of literals.
|
|
240
|
+
"""
|
|
241
|
+
inferred_type: Optional[ColumnType] = None
|
|
242
|
+
for val in vals:
|
|
243
|
+
val_type = cls.infer_literal_type(val)
|
|
244
|
+
if inferred_type is None:
|
|
245
|
+
inferred_type = val_type
|
|
246
|
+
else:
|
|
247
|
+
inferred_type = inferred_type.supertype(val_type)
|
|
248
|
+
if inferred_type is None:
|
|
249
|
+
return None
|
|
250
|
+
return inferred_type
|
|
251
|
+
|
|
254
252
|
@classmethod
|
|
255
253
|
def from_python_type(cls, t: type) -> Optional[ColumnType]:
|
|
256
254
|
if typing.get_origin(t) is typing.Union:
|
|
@@ -317,10 +315,8 @@ class ColumnType:
|
|
|
317
315
|
@abc.abstractmethod
|
|
318
316
|
def _validate_literal(self, val: Any) -> None:
|
|
319
317
|
"""Raise TypeError if val is not a valid literal for this type"""
|
|
320
|
-
pass
|
|
321
318
|
|
|
322
|
-
|
|
323
|
-
def _create_literal(self, val : Any) -> Any:
|
|
319
|
+
def _create_literal(self, val: Any) -> Any:
|
|
324
320
|
"""Create a literal of this type from val, including any needed conversions.
|
|
325
321
|
val is guaranteed to be non-None"""
|
|
326
322
|
return val
|
|
@@ -389,21 +385,6 @@ class ColumnType:
|
|
|
389
385
|
"""
|
|
390
386
|
pass
|
|
391
387
|
|
|
392
|
-
@staticmethod
|
|
393
|
-
def no_conversion(v: Any) -> Any:
|
|
394
|
-
"""
|
|
395
|
-
Special return value of conversion_fn() that indicates that no conversion is necessary.
|
|
396
|
-
Should not be called
|
|
397
|
-
"""
|
|
398
|
-
assert False
|
|
399
|
-
|
|
400
|
-
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
401
|
-
"""
|
|
402
|
-
Return Callable that converts a column value of type self to a value of type 'target'.
|
|
403
|
-
Returns None if conversion isn't possible.
|
|
404
|
-
"""
|
|
405
|
-
return None
|
|
406
|
-
|
|
407
388
|
|
|
408
389
|
class InvalidType(ColumnType):
|
|
409
390
|
def __init__(self, nullable: bool = False):
|
|
@@ -423,17 +404,6 @@ class StringType(ColumnType):
|
|
|
423
404
|
def __init__(self, nullable: bool = False):
|
|
424
405
|
super().__init__(self.Type.STRING, nullable=nullable)
|
|
425
406
|
|
|
426
|
-
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
427
|
-
if not target.is_timestamp_type():
|
|
428
|
-
return None
|
|
429
|
-
def convert(val: str) -> Optional[datetime.datetime]:
|
|
430
|
-
try:
|
|
431
|
-
dt = datetime.datetime.fromisoformat(val)
|
|
432
|
-
return dt
|
|
433
|
-
except ValueError:
|
|
434
|
-
return None
|
|
435
|
-
return convert
|
|
436
|
-
|
|
437
407
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
438
408
|
return sql.String()
|
|
439
409
|
|
|
@@ -522,6 +492,37 @@ class JsonType(ColumnType):
|
|
|
522
492
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
523
493
|
self.type_spec = type_spec
|
|
524
494
|
|
|
495
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
496
|
+
return JsonType(self.type_spec, nullable=nullable)
|
|
497
|
+
|
|
498
|
+
def matches(self, other: ColumnType) -> bool:
|
|
499
|
+
return other._type == self.Type.JSON and self.type_spec == other.type_spec
|
|
500
|
+
|
|
501
|
+
def supertype(self, other: ColumnType) -> Optional[JsonType]:
|
|
502
|
+
if not isinstance(other, JsonType):
|
|
503
|
+
return None
|
|
504
|
+
if self.type_spec is None:
|
|
505
|
+
# we don't have a type spec and can accept anything accepted by other
|
|
506
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
507
|
+
if other.type_spec is None:
|
|
508
|
+
# we have a type spec but other doesn't
|
|
509
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
510
|
+
|
|
511
|
+
# we both have type specs; the supertype's type spec is the union of the two
|
|
512
|
+
type_spec: dict[str, ColumnType] = {}
|
|
513
|
+
type_spec.update(self.type_spec)
|
|
514
|
+
for other_field_name, other_field_type in other.type_spec.items():
|
|
515
|
+
if other_field_name not in type_spec:
|
|
516
|
+
type_spec[other_field_name] = other_field_type
|
|
517
|
+
else:
|
|
518
|
+
# both type specs have this field
|
|
519
|
+
field_type = type_spec[other_field_name].supertype(other_field_type)
|
|
520
|
+
if field_type is None:
|
|
521
|
+
# conflicting types
|
|
522
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
523
|
+
type_spec[other_field_name] = field_type
|
|
524
|
+
return JsonType(type_spec, nullable=(self.nullable or other.nullable))
|
|
525
|
+
|
|
525
526
|
def _as_dict(self) -> Dict:
|
|
526
527
|
result = super()._as_dict()
|
|
527
528
|
if self.type_spec is not None:
|
|
@@ -551,11 +552,20 @@ class JsonType(ColumnType):
|
|
|
551
552
|
|
|
552
553
|
def _validate_literal(self, val: Any) -> None:
|
|
553
554
|
if not isinstance(val, dict) and not isinstance(val, list):
|
|
555
|
+
# TODO In the future we should accept scalars too, which would enable us to remove this top-level check
|
|
554
556
|
raise TypeError(f'Expected dict or list, got {val.__class__.__name__}')
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
557
|
+
if not self.__is_valid_literal(val):
|
|
558
|
+
raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
|
|
559
|
+
|
|
560
|
+
@classmethod
|
|
561
|
+
def __is_valid_literal(cls, val: Any) -> None:
|
|
562
|
+
if val is None or isinstance(val, (str, int, float, bool)):
|
|
563
|
+
return True
|
|
564
|
+
if isinstance(val, (list, tuple)):
|
|
565
|
+
return all(cls.__is_valid_literal(v) for v in val)
|
|
566
|
+
if isinstance(val, dict):
|
|
567
|
+
return all(isinstance(k, str) and cls.__is_valid_literal(v) for k, v in val.items())
|
|
568
|
+
return False
|
|
559
569
|
|
|
560
570
|
def _create_literal(self, val: Any) -> Any:
|
|
561
571
|
if isinstance(val, tuple):
|
|
@@ -564,21 +574,29 @@ class JsonType(ColumnType):
|
|
|
564
574
|
|
|
565
575
|
|
|
566
576
|
class ArrayType(ColumnType):
|
|
567
|
-
def __init__(
|
|
568
|
-
self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
577
|
+
def __init__(self, shape: tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
569
578
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
570
579
|
self.shape = shape
|
|
571
580
|
assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
|
|
581
|
+
self.pxt_dtype = dtype
|
|
572
582
|
self.dtype = dtype._type
|
|
573
583
|
|
|
574
|
-
def
|
|
575
|
-
|
|
584
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
585
|
+
return ArrayType(self.shape, self.pxt_dtype, nullable=nullable)
|
|
586
|
+
|
|
587
|
+
def matches(self, other: ColumnType) -> bool:
|
|
588
|
+
return other._type == self.Type.ARRAY and self.shape == other.shape and self.dtype == other.dtype
|
|
589
|
+
|
|
590
|
+
def supertype(self, other: ColumnType) -> Optional[ArrayType]:
|
|
591
|
+
if not isinstance(other, ArrayType):
|
|
576
592
|
return None
|
|
577
|
-
|
|
593
|
+
if len(self.shape) != len(other.shape):
|
|
594
|
+
return None
|
|
595
|
+
base_type = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
|
|
578
596
|
if base_type is None:
|
|
579
597
|
return None
|
|
580
|
-
shape = [n1 if n1 == n2 else None for n1, n2 in zip(
|
|
581
|
-
return ArrayType(tuple(shape), base_type, nullable=(
|
|
598
|
+
shape = [n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape)]
|
|
599
|
+
return ArrayType(tuple(shape), self.make_type(base_type), nullable=(self.nullable or other.nullable))
|
|
582
600
|
|
|
583
601
|
def _as_dict(self) -> Dict:
|
|
584
602
|
result = super()._as_dict()
|
|
@@ -597,11 +615,11 @@ class ArrayType(ColumnType):
|
|
|
597
615
|
return cls(shape, dtype, nullable=d['nullable'])
|
|
598
616
|
|
|
599
617
|
@classmethod
|
|
600
|
-
def from_literal(cls, val: np.ndarray) -> Optional[ArrayType]:
|
|
618
|
+
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
|
|
601
619
|
# determine our dtype
|
|
602
620
|
assert isinstance(val, np.ndarray)
|
|
603
621
|
if np.issubdtype(val.dtype, np.integer):
|
|
604
|
-
dtype = IntType()
|
|
622
|
+
dtype: ColumnType = IntType()
|
|
605
623
|
elif np.issubdtype(val.dtype, np.floating):
|
|
606
624
|
dtype = FloatType()
|
|
607
625
|
elif val.dtype == np.bool_:
|
|
@@ -610,7 +628,7 @@ class ArrayType(ColumnType):
|
|
|
610
628
|
dtype = StringType()
|
|
611
629
|
else:
|
|
612
630
|
return None
|
|
613
|
-
return cls(val.shape, dtype=dtype)
|
|
631
|
+
return cls(val.shape, dtype=dtype, nullable=nullable)
|
|
614
632
|
|
|
615
633
|
def is_valid_literal(self, val: np.ndarray) -> bool:
|
|
616
634
|
if not isinstance(val, np.ndarray):
|
|
@@ -677,6 +695,9 @@ class ImageType(ColumnType):
|
|
|
677
695
|
self.height = height
|
|
678
696
|
self.mode = mode
|
|
679
697
|
|
|
698
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
699
|
+
return ImageType(self.width, self.height, mode=self.mode, nullable=nullable)
|
|
700
|
+
|
|
680
701
|
def __str__(self) -> str:
|
|
681
702
|
if self.width is not None or self.height is not None or self.mode is not None:
|
|
682
703
|
params_str = ''
|
|
@@ -695,13 +716,21 @@ class ImageType(ColumnType):
|
|
|
695
716
|
params_str = ''
|
|
696
717
|
return f'{self._type.name.lower()}{params_str}'
|
|
697
718
|
|
|
698
|
-
def
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
719
|
+
def matches(self, other: ColumnType) -> bool:
|
|
720
|
+
return (
|
|
721
|
+
other._type == self.Type.IMAGE
|
|
722
|
+
and self.width == other.width
|
|
723
|
+
and self.height == other.height
|
|
724
|
+
and self.mode == other.mode
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
def supertype(self, other: ColumnType) -> Optional[ImageType]:
|
|
728
|
+
if not isinstance(other, ImageType):
|
|
729
|
+
return None
|
|
730
|
+
width = self.width if self.width == other.width else None
|
|
731
|
+
height = self.height if self.height == other.height else None
|
|
732
|
+
mode = self.mode if self.mode == other.mode else None
|
|
733
|
+
return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
|
|
705
734
|
|
|
706
735
|
@property
|
|
707
736
|
def size(self) -> Optional[Tuple[int, int]]:
|
|
@@ -709,10 +738,6 @@ class ImageType(ColumnType):
|
|
|
709
738
|
return None
|
|
710
739
|
return (self.width, self.height)
|
|
711
740
|
|
|
712
|
-
@property
|
|
713
|
-
def num_channels(self) -> Optional[int]:
|
|
714
|
-
return None if self.mode is None else self.mode.num_channels()
|
|
715
|
-
|
|
716
741
|
def _as_dict(self) -> Dict:
|
|
717
742
|
result = super()._as_dict()
|
|
718
743
|
result.update(width=self.width, height=self.height, mode=self.mode)
|
|
@@ -725,26 +750,6 @@ class ImageType(ColumnType):
|
|
|
725
750
|
assert 'mode' in d
|
|
726
751
|
return cls(width=d['width'], height=d['height'], mode=d['mode'], nullable=d['nullable'])
|
|
727
752
|
|
|
728
|
-
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
729
|
-
if not target.is_image_type():
|
|
730
|
-
return None
|
|
731
|
-
assert isinstance(target, ImageType)
|
|
732
|
-
if (target.width is None) != (target.height is None):
|
|
733
|
-
# we can't resize only one dimension
|
|
734
|
-
return None
|
|
735
|
-
if (target.width == self.width or target.width is None) \
|
|
736
|
-
and (target.height == self.height or target.height is None) \
|
|
737
|
-
and (target.mode == self.mode or target.mode is None):
|
|
738
|
-
# nothing to do
|
|
739
|
-
return self.no_conversion
|
|
740
|
-
def convert(img: PIL.Image.Image) -> PIL.Image.Image:
|
|
741
|
-
if self.width != target.width or self.height != target.height:
|
|
742
|
-
img = img.resize((target.width, target.height))
|
|
743
|
-
if self.mode != target.mode:
|
|
744
|
-
img = img.convert(target.mode.to_pil())
|
|
745
|
-
return img
|
|
746
|
-
return convert
|
|
747
|
-
|
|
748
753
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
749
754
|
return sql.String()
|
|
750
755
|
|
|
@@ -829,6 +834,7 @@ class DocumentType(ColumnType):
|
|
|
829
834
|
|
|
830
835
|
def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
|
|
831
836
|
super().__init__(self.Type.DOCUMENT, nullable=nullable)
|
|
837
|
+
self.doc_formats = doc_formats
|
|
832
838
|
if doc_formats is not None:
|
|
833
839
|
type_strs = doc_formats.split(',')
|
|
834
840
|
for type_str in type_strs:
|
|
@@ -838,6 +844,12 @@ class DocumentType(ColumnType):
|
|
|
838
844
|
else:
|
|
839
845
|
self._doc_formats = [t for t in self.DocumentFormat]
|
|
840
846
|
|
|
847
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
848
|
+
return DocumentType(doc_formats=self.doc_formats, nullable=nullable)
|
|
849
|
+
|
|
850
|
+
def matches(self, other: ColumnType) -> bool:
|
|
851
|
+
return other._type == self.Type.DOCUMENT and self._doc_formats == other._doc_formats
|
|
852
|
+
|
|
841
853
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
842
854
|
# stored as a file path
|
|
843
855
|
return sql.String()
|
pixeltable/utils/pytorch.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import io
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Iterator
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import PIL.Image
|
|
2
9
|
import pyarrow as pa
|
|
3
|
-
import pyarrow.parquet
|
|
4
10
|
import torch
|
|
5
11
|
import torch.utils.data
|
|
6
|
-
from
|
|
7
|
-
import PIL.Image
|
|
8
|
-
import json
|
|
9
|
-
from typing import Dict, Iterator, Any
|
|
10
|
-
import datetime
|
|
12
|
+
from pyarrow import parquet
|
|
11
13
|
|
|
12
14
|
from pixeltable.type_system import ColumnType
|
|
13
|
-
|
|
15
|
+
|
|
14
16
|
|
|
15
17
|
class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
16
18
|
"""
|
|
@@ -39,7 +41,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
39
41
|
with column_type_path.open() as f:
|
|
40
42
|
column_types = json.load(f)
|
|
41
43
|
self.column_types = {k: ColumnType.from_dict(v) for k, v in column_types.items()}
|
|
42
|
-
self.part_metadata =
|
|
44
|
+
self.part_metadata = parquet.ParquetDataset(path).files
|
|
43
45
|
|
|
44
46
|
def _unmarshall(self, k: str, v: Any) -> Any:
|
|
45
47
|
if self.column_types[k].is_image_type():
|
|
@@ -52,7 +54,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
52
54
|
return arr
|
|
53
55
|
|
|
54
56
|
assert self.image_format == "pt"
|
|
55
|
-
import torchvision
|
|
57
|
+
import torchvision
|
|
56
58
|
|
|
57
59
|
# use arr instead of im in ToTensor() to guarantee array input
|
|
58
60
|
# to torch.from_numpy is writable. Using im is a suspected cause of
|
|
@@ -85,7 +87,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
85
87
|
part_list = [ i for i in part_list if (i % worker_info.num_workers) == worker_info.id ]
|
|
86
88
|
|
|
87
89
|
for part_no in part_list:
|
|
88
|
-
pqf =
|
|
90
|
+
pqf = parquet.ParquetFile(self.part_metadata[part_no])
|
|
89
91
|
for batch in pqf.iter_batches():
|
|
90
92
|
for tup in arrow.iter_tuples(batch):
|
|
91
93
|
yield {k: self._unmarshall(k, v) for k, v in tup.items()}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.16
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -21,11 +21,11 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
|
21
21
|
Requires-Dist: numpy (>=1.25)
|
|
22
22
|
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
23
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
-
Requires-Dist: pgserver (==0.1.4)
|
|
25
24
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
25
|
Requires-Dist: pillow (>=9.3.0)
|
|
26
|
+
Requires-Dist: pixeltable-pgserver (==0.2.7)
|
|
27
27
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: psycopg[binary] (==3.1.18)
|
|
29
29
|
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|
|
30
30
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
31
31
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
@@ -36,7 +36,7 @@ Requires-Dist: tqdm (>=4.64)
|
|
|
36
36
|
Description-Content-Type: text/markdown
|
|
37
37
|
|
|
38
38
|
<div align="center">
|
|
39
|
-
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/
|
|
39
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
40
40
|
|
|
41
41
|
# Unifying Data, Models, and Orchestration for AI Products
|
|
42
42
|
|
|
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown
|
|
|
46
46
|
[](https://github.com/pixeltable/pixeltable/actions)
|
|
47
47
|
[](https://pypi.org/project/pixeltable/)
|
|
48
48
|
|
|
49
|
-
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/
|
|
49
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/release/docs/release/tutorials)
|
|
50
50
|
</div>
|
|
51
51
|
|
|
52
52
|
Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
@@ -68,10 +68,10 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
68
68
|
|
|
69
69
|
| Topic | Notebook | Topic | Notebook |
|
|
70
70
|
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
71
|
-
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
72
|
-
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
73
|
-
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
74
|
-
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
71
|
+
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
72
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
73
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
74
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
75
75
|
|
|
76
76
|
## 🧱 Code Samples
|
|
77
77
|
|
|
@@ -186,7 +186,7 @@ Pixeltable unifies data storage, versioning, and indexing with orchestration and
|
|
|
186
186
|
- **It integrates with any existing Python code or libraries**
|
|
187
187
|
- Bring your ever-changing code and workloads
|
|
188
188
|
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
### What is Pixeltable not providing?
|
|
191
191
|
|
|
192
192
|
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|