pixeltable 0.2.15__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +3 -0
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/globals.py +15 -6
- pixeltable/catalog/insertable_table.py +23 -8
- pixeltable/catalog/named_function.py +1 -1
- pixeltable/catalog/path_dict.py +4 -4
- pixeltable/catalog/schema_object.py +30 -18
- pixeltable/catalog/table.py +84 -99
- pixeltable/catalog/table_version.py +35 -24
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +15 -8
- pixeltable/dataframe.py +56 -56
- pixeltable/env.py +6 -5
- pixeltable/exec/__init__.py +3 -3
- pixeltable/exec/aggregation_node.py +3 -3
- pixeltable/exec/expr_eval_node.py +3 -3
- pixeltable/exec/in_memory_data_node.py +4 -4
- pixeltable/exec/sql_node.py +4 -1
- pixeltable/exprs/array_slice.py +3 -4
- pixeltable/exprs/column_ref.py +20 -4
- pixeltable/exprs/comparison.py +11 -6
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/expr.py +51 -23
- pixeltable/exprs/function_call.py +8 -1
- pixeltable/exprs/inline_array.py +2 -2
- pixeltable/exprs/json_path.py +36 -20
- pixeltable/exprs/row_builder.py +4 -4
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/functions/__init__.py +1 -2
- pixeltable/functions/audio.py +32 -0
- pixeltable/functions/huggingface.py +4 -4
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/video.py +5 -1
- pixeltable/functions/vision.py +2 -6
- pixeltable/globals.py +57 -28
- pixeltable/io/external_store.py +4 -4
- pixeltable/io/globals.py +12 -13
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/pandas.py +27 -12
- pixeltable/io/parquet.py +14 -14
- pixeltable/iterators/document.py +7 -7
- pixeltable/plan.py +58 -29
- pixeltable/store.py +32 -31
- pixeltable/tool/create_test_db_dump.py +12 -6
- pixeltable/type_system.py +89 -97
- pixeltable/utils/pytorch.py +12 -10
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/RECORD +52 -51
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.15.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
pixeltable/type_system.py
CHANGED
|
@@ -7,13 +7,12 @@ import json
|
|
|
7
7
|
import typing
|
|
8
8
|
import urllib.parse
|
|
9
9
|
import urllib.request
|
|
10
|
-
from copy import deepcopy
|
|
11
10
|
from pathlib import Path
|
|
12
|
-
from typing import Any, Iterable,
|
|
11
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union
|
|
13
12
|
|
|
14
|
-
import
|
|
15
|
-
import av
|
|
13
|
+
import av # type: ignore
|
|
16
14
|
import numpy as np
|
|
15
|
+
import PIL.Image
|
|
17
16
|
import sqlalchemy as sql
|
|
18
17
|
|
|
19
18
|
from pixeltable import exceptions as excs
|
|
@@ -39,10 +38,10 @@ class ColumnType:
|
|
|
39
38
|
|
|
40
39
|
@classmethod
|
|
41
40
|
def supertype(
|
|
42
|
-
cls, type1: 'Type', type2: 'Type',
|
|
41
|
+
cls, type1: 'ColumnType.Type', type2: 'ColumnType.Type',
|
|
43
42
|
# we need to pass this in because we can't easily append it as a class member
|
|
44
|
-
common_supertypes: Dict[Tuple['Type', 'Type'], 'Type']
|
|
45
|
-
) -> Optional['Type']:
|
|
43
|
+
common_supertypes: Dict[Tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type']
|
|
44
|
+
) -> Optional['ColumnType.Type']:
|
|
46
45
|
if type1 == type2:
|
|
47
46
|
return type1
|
|
48
47
|
t = common_supertypes.get((type1, type2))
|
|
@@ -74,7 +73,7 @@ class ColumnType:
|
|
|
74
73
|
|
|
75
74
|
scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
|
|
76
75
|
numeric_types = {Type.INT, Type.FLOAT}
|
|
77
|
-
common_supertypes:
|
|
76
|
+
common_supertypes: dict[tuple[Type, Type], Type] = {
|
|
78
77
|
(Type.BOOL, Type.INT): Type.INT,
|
|
79
78
|
(Type.BOOL, Type.FLOAT): Type.FLOAT,
|
|
80
79
|
(Type.INT, Type.FLOAT): Type.FLOAT,
|
|
@@ -95,11 +94,12 @@ class ColumnType:
|
|
|
95
94
|
def serialize(self) -> str:
|
|
96
95
|
return json.dumps(self.as_dict())
|
|
97
96
|
|
|
98
|
-
def copy(self, nullable:
|
|
99
|
-
|
|
100
|
-
if nullable
|
|
101
|
-
|
|
102
|
-
|
|
97
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
98
|
+
# Default implementation calls unary initializer
|
|
99
|
+
if nullable == self.nullable:
|
|
100
|
+
return self
|
|
101
|
+
else:
|
|
102
|
+
return self.__class__(nullable=nullable)
|
|
103
103
|
|
|
104
104
|
@classmethod
|
|
105
105
|
def serialize_list(cls, type_list: List[ColumnType]) -> str:
|
|
@@ -136,7 +136,7 @@ class ColumnType:
|
|
|
136
136
|
Default implementation: simply invoke c'tor
|
|
137
137
|
"""
|
|
138
138
|
assert 'nullable' in d
|
|
139
|
-
return cls(nullable=d['nullable'])
|
|
139
|
+
return cls(nullable=d['nullable']) # type: ignore[call-arg]
|
|
140
140
|
|
|
141
141
|
@classmethod
|
|
142
142
|
def make_type(cls, t: Type) -> ColumnType:
|
|
@@ -169,22 +169,23 @@ class ColumnType:
|
|
|
169
169
|
return isinstance(other, ColumnType) and self.matches(other) and self.nullable == other.nullable
|
|
170
170
|
|
|
171
171
|
def is_supertype_of(self, other: ColumnType, ignore_nullable: bool = False) -> bool:
|
|
172
|
-
|
|
173
|
-
|
|
172
|
+
if ignore_nullable:
|
|
173
|
+
supertype = self.supertype(other)
|
|
174
|
+
if supertype is None:
|
|
175
|
+
return False
|
|
176
|
+
return supertype.matches(self)
|
|
177
|
+
else:
|
|
178
|
+
return self.supertype(other) == self
|
|
174
179
|
|
|
175
180
|
def matches(self, other: ColumnType) -> bool:
|
|
176
181
|
"""Two types match if they're equal, aside from nullability"""
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
for member_var in vars(self).keys():
|
|
180
|
-
if member_var == '_nullable':
|
|
181
|
-
continue
|
|
182
|
-
if getattr(self, member_var) != getattr(other, member_var):
|
|
183
|
-
return False
|
|
184
|
-
return True
|
|
182
|
+
# Default: just compare base types (this works for all types whose only parameter is nullable)
|
|
183
|
+
return self._type == other._type
|
|
185
184
|
|
|
186
185
|
def supertype(self, other: ColumnType) -> Optional[ColumnType]:
|
|
187
|
-
if self
|
|
186
|
+
if self == other:
|
|
187
|
+
return self
|
|
188
|
+
if self.matches(other):
|
|
188
189
|
return self.copy(nullable=(self.nullable or other.nullable))
|
|
189
190
|
|
|
190
191
|
if self.is_invalid_type():
|
|
@@ -201,29 +202,29 @@ class ColumnType:
|
|
|
201
202
|
return None
|
|
202
203
|
|
|
203
204
|
@classmethod
|
|
204
|
-
def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
|
|
205
|
+
def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
|
|
205
206
|
if isinstance(val, str):
|
|
206
|
-
return StringType()
|
|
207
|
+
return StringType(nullable=nullable)
|
|
207
208
|
if isinstance(val, bool):
|
|
208
209
|
# We have to check bool before int, because isinstance(b, int) is True if b is a Python bool
|
|
209
|
-
return BoolType()
|
|
210
|
+
return BoolType(nullable=nullable)
|
|
210
211
|
if isinstance(val, int):
|
|
211
|
-
return IntType()
|
|
212
|
+
return IntType(nullable=nullable)
|
|
212
213
|
if isinstance(val, float):
|
|
213
|
-
return FloatType()
|
|
214
|
+
return FloatType(nullable=nullable)
|
|
214
215
|
if isinstance(val, datetime.datetime):
|
|
215
|
-
return TimestampType()
|
|
216
|
+
return TimestampType(nullable=nullable)
|
|
216
217
|
if isinstance(val, PIL.Image.Image):
|
|
217
|
-
return ImageType(width=val.width, height=val.height, mode=val.mode)
|
|
218
|
+
return ImageType(width=val.width, height=val.height, mode=val.mode, nullable=nullable)
|
|
218
219
|
if isinstance(val, np.ndarray):
|
|
219
|
-
col_type = ArrayType.from_literal(val)
|
|
220
|
+
col_type = ArrayType.from_literal(val, nullable=nullable)
|
|
220
221
|
if col_type is not None:
|
|
221
222
|
return col_type
|
|
222
223
|
# this could still be json-serializable
|
|
223
224
|
if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray):
|
|
224
225
|
try:
|
|
225
226
|
JsonType().validate_literal(val)
|
|
226
|
-
return JsonType()
|
|
227
|
+
return JsonType(nullable=nullable)
|
|
227
228
|
except TypeError:
|
|
228
229
|
return None
|
|
229
230
|
return None
|
|
@@ -314,10 +315,8 @@ class ColumnType:
|
|
|
314
315
|
@abc.abstractmethod
|
|
315
316
|
def _validate_literal(self, val: Any) -> None:
|
|
316
317
|
"""Raise TypeError if val is not a valid literal for this type"""
|
|
317
|
-
pass
|
|
318
318
|
|
|
319
|
-
|
|
320
|
-
def _create_literal(self, val : Any) -> Any:
|
|
319
|
+
def _create_literal(self, val: Any) -> Any:
|
|
321
320
|
"""Create a literal of this type from val, including any needed conversions.
|
|
322
321
|
val is guaranteed to be non-None"""
|
|
323
322
|
return val
|
|
@@ -386,21 +385,6 @@ class ColumnType:
|
|
|
386
385
|
"""
|
|
387
386
|
pass
|
|
388
387
|
|
|
389
|
-
@staticmethod
|
|
390
|
-
def no_conversion(v: Any) -> Any:
|
|
391
|
-
"""
|
|
392
|
-
Special return value of conversion_fn() that indicates that no conversion is necessary.
|
|
393
|
-
Should not be called
|
|
394
|
-
"""
|
|
395
|
-
assert False
|
|
396
|
-
|
|
397
|
-
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
398
|
-
"""
|
|
399
|
-
Return Callable that converts a column value of type self to a value of type 'target'.
|
|
400
|
-
Returns None if conversion isn't possible.
|
|
401
|
-
"""
|
|
402
|
-
return None
|
|
403
|
-
|
|
404
388
|
|
|
405
389
|
class InvalidType(ColumnType):
|
|
406
390
|
def __init__(self, nullable: bool = False):
|
|
@@ -420,17 +404,6 @@ class StringType(ColumnType):
|
|
|
420
404
|
def __init__(self, nullable: bool = False):
|
|
421
405
|
super().__init__(self.Type.STRING, nullable=nullable)
|
|
422
406
|
|
|
423
|
-
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
424
|
-
if not target.is_timestamp_type():
|
|
425
|
-
return None
|
|
426
|
-
def convert(val: str) -> Optional[datetime.datetime]:
|
|
427
|
-
try:
|
|
428
|
-
dt = datetime.datetime.fromisoformat(val)
|
|
429
|
-
return dt
|
|
430
|
-
except ValueError:
|
|
431
|
-
return None
|
|
432
|
-
return convert
|
|
433
|
-
|
|
434
407
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
435
408
|
return sql.String()
|
|
436
409
|
|
|
@@ -519,7 +492,15 @@ class JsonType(ColumnType):
|
|
|
519
492
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
520
493
|
self.type_spec = type_spec
|
|
521
494
|
|
|
495
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
496
|
+
return JsonType(self.type_spec, nullable=nullable)
|
|
497
|
+
|
|
498
|
+
def matches(self, other: ColumnType) -> bool:
|
|
499
|
+
return other._type == self.Type.JSON and self.type_spec == other.type_spec
|
|
500
|
+
|
|
522
501
|
def supertype(self, other: ColumnType) -> Optional[JsonType]:
|
|
502
|
+
if not isinstance(other, JsonType):
|
|
503
|
+
return None
|
|
523
504
|
if self.type_spec is None:
|
|
524
505
|
# we don't have a type spec and can accept anything accepted by other
|
|
525
506
|
return JsonType(nullable=(self.nullable or other.nullable))
|
|
@@ -528,10 +509,11 @@ class JsonType(ColumnType):
|
|
|
528
509
|
return JsonType(nullable=(self.nullable or other.nullable))
|
|
529
510
|
|
|
530
511
|
# we both have type specs; the supertype's type spec is the union of the two
|
|
531
|
-
type_spec =
|
|
512
|
+
type_spec: dict[str, ColumnType] = {}
|
|
513
|
+
type_spec.update(self.type_spec)
|
|
532
514
|
for other_field_name, other_field_type in other.type_spec.items():
|
|
533
515
|
if other_field_name not in type_spec:
|
|
534
|
-
type_spec[other_field_name] = other_field_type
|
|
516
|
+
type_spec[other_field_name] = other_field_type
|
|
535
517
|
else:
|
|
536
518
|
# both type specs have this field
|
|
537
519
|
field_type = type_spec[other_field_name].supertype(other_field_type)
|
|
@@ -570,11 +552,20 @@ class JsonType(ColumnType):
|
|
|
570
552
|
|
|
571
553
|
def _validate_literal(self, val: Any) -> None:
|
|
572
554
|
if not isinstance(val, dict) and not isinstance(val, list):
|
|
555
|
+
# TODO In the future we should accept scalars too, which would enable us to remove this top-level check
|
|
573
556
|
raise TypeError(f'Expected dict or list, got {val.__class__.__name__}')
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
557
|
+
if not self.__is_valid_literal(val):
|
|
558
|
+
raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
|
|
559
|
+
|
|
560
|
+
@classmethod
|
|
561
|
+
def __is_valid_literal(cls, val: Any) -> None:
|
|
562
|
+
if val is None or isinstance(val, (str, int, float, bool)):
|
|
563
|
+
return True
|
|
564
|
+
if isinstance(val, (list, tuple)):
|
|
565
|
+
return all(cls.__is_valid_literal(v) for v in val)
|
|
566
|
+
if isinstance(val, dict):
|
|
567
|
+
return all(isinstance(k, str) and cls.__is_valid_literal(v) for k, v in val.items())
|
|
568
|
+
return False
|
|
578
569
|
|
|
579
570
|
def _create_literal(self, val: Any) -> Any:
|
|
580
571
|
if isinstance(val, tuple):
|
|
@@ -583,12 +574,19 @@ class JsonType(ColumnType):
|
|
|
583
574
|
|
|
584
575
|
|
|
585
576
|
class ArrayType(ColumnType):
|
|
586
|
-
def __init__(self, shape:
|
|
577
|
+
def __init__(self, shape: tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
587
578
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
588
579
|
self.shape = shape
|
|
589
580
|
assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
|
|
581
|
+
self.pxt_dtype = dtype
|
|
590
582
|
self.dtype = dtype._type
|
|
591
583
|
|
|
584
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
585
|
+
return ArrayType(self.shape, self.pxt_dtype, nullable=nullable)
|
|
586
|
+
|
|
587
|
+
def matches(self, other: ColumnType) -> bool:
|
|
588
|
+
return other._type == self.Type.ARRAY and self.shape == other.shape and self.dtype == other.dtype
|
|
589
|
+
|
|
592
590
|
def supertype(self, other: ColumnType) -> Optional[ArrayType]:
|
|
593
591
|
if not isinstance(other, ArrayType):
|
|
594
592
|
return None
|
|
@@ -617,11 +615,11 @@ class ArrayType(ColumnType):
|
|
|
617
615
|
return cls(shape, dtype, nullable=d['nullable'])
|
|
618
616
|
|
|
619
617
|
@classmethod
|
|
620
|
-
def from_literal(cls, val: np.ndarray) -> Optional[ArrayType]:
|
|
618
|
+
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
|
|
621
619
|
# determine our dtype
|
|
622
620
|
assert isinstance(val, np.ndarray)
|
|
623
621
|
if np.issubdtype(val.dtype, np.integer):
|
|
624
|
-
dtype = IntType()
|
|
622
|
+
dtype: ColumnType = IntType()
|
|
625
623
|
elif np.issubdtype(val.dtype, np.floating):
|
|
626
624
|
dtype = FloatType()
|
|
627
625
|
elif val.dtype == np.bool_:
|
|
@@ -630,7 +628,7 @@ class ArrayType(ColumnType):
|
|
|
630
628
|
dtype = StringType()
|
|
631
629
|
else:
|
|
632
630
|
return None
|
|
633
|
-
return cls(val.shape, dtype=dtype)
|
|
631
|
+
return cls(val.shape, dtype=dtype, nullable=nullable)
|
|
634
632
|
|
|
635
633
|
def is_valid_literal(self, val: np.ndarray) -> bool:
|
|
636
634
|
if not isinstance(val, np.ndarray):
|
|
@@ -697,6 +695,9 @@ class ImageType(ColumnType):
|
|
|
697
695
|
self.height = height
|
|
698
696
|
self.mode = mode
|
|
699
697
|
|
|
698
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
699
|
+
return ImageType(self.width, self.height, mode=self.mode, nullable=nullable)
|
|
700
|
+
|
|
700
701
|
def __str__(self) -> str:
|
|
701
702
|
if self.width is not None or self.height is not None or self.mode is not None:
|
|
702
703
|
params_str = ''
|
|
@@ -715,6 +716,14 @@ class ImageType(ColumnType):
|
|
|
715
716
|
params_str = ''
|
|
716
717
|
return f'{self._type.name.lower()}{params_str}'
|
|
717
718
|
|
|
719
|
+
def matches(self, other: ColumnType) -> bool:
|
|
720
|
+
return (
|
|
721
|
+
other._type == self.Type.IMAGE
|
|
722
|
+
and self.width == other.width
|
|
723
|
+
and self.height == other.height
|
|
724
|
+
and self.mode == other.mode
|
|
725
|
+
)
|
|
726
|
+
|
|
718
727
|
def supertype(self, other: ColumnType) -> Optional[ImageType]:
|
|
719
728
|
if not isinstance(other, ImageType):
|
|
720
729
|
return None
|
|
@@ -729,10 +738,6 @@ class ImageType(ColumnType):
|
|
|
729
738
|
return None
|
|
730
739
|
return (self.width, self.height)
|
|
731
740
|
|
|
732
|
-
@property
|
|
733
|
-
def num_channels(self) -> Optional[int]:
|
|
734
|
-
return None if self.mode is None else self.mode.num_channels()
|
|
735
|
-
|
|
736
741
|
def _as_dict(self) -> Dict:
|
|
737
742
|
result = super()._as_dict()
|
|
738
743
|
result.update(width=self.width, height=self.height, mode=self.mode)
|
|
@@ -745,26 +750,6 @@ class ImageType(ColumnType):
|
|
|
745
750
|
assert 'mode' in d
|
|
746
751
|
return cls(width=d['width'], height=d['height'], mode=d['mode'], nullable=d['nullable'])
|
|
747
752
|
|
|
748
|
-
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
749
|
-
if not target.is_image_type():
|
|
750
|
-
return None
|
|
751
|
-
assert isinstance(target, ImageType)
|
|
752
|
-
if (target.width is None) != (target.height is None):
|
|
753
|
-
# we can't resize only one dimension
|
|
754
|
-
return None
|
|
755
|
-
if (target.width == self.width or target.width is None) \
|
|
756
|
-
and (target.height == self.height or target.height is None) \
|
|
757
|
-
and (target.mode == self.mode or target.mode is None):
|
|
758
|
-
# nothing to do
|
|
759
|
-
return self.no_conversion
|
|
760
|
-
def convert(img: PIL.Image.Image) -> PIL.Image.Image:
|
|
761
|
-
if self.width != target.width or self.height != target.height:
|
|
762
|
-
img = img.resize((target.width, target.height))
|
|
763
|
-
if self.mode != target.mode:
|
|
764
|
-
img = img.convert(target.mode.to_pil())
|
|
765
|
-
return img
|
|
766
|
-
return convert
|
|
767
|
-
|
|
768
753
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
769
754
|
return sql.String()
|
|
770
755
|
|
|
@@ -849,6 +834,7 @@ class DocumentType(ColumnType):
|
|
|
849
834
|
|
|
850
835
|
def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
|
|
851
836
|
super().__init__(self.Type.DOCUMENT, nullable=nullable)
|
|
837
|
+
self.doc_formats = doc_formats
|
|
852
838
|
if doc_formats is not None:
|
|
853
839
|
type_strs = doc_formats.split(',')
|
|
854
840
|
for type_str in type_strs:
|
|
@@ -858,6 +844,12 @@ class DocumentType(ColumnType):
|
|
|
858
844
|
else:
|
|
859
845
|
self._doc_formats = [t for t in self.DocumentFormat]
|
|
860
846
|
|
|
847
|
+
def copy(self, nullable: bool) -> ColumnType:
|
|
848
|
+
return DocumentType(doc_formats=self.doc_formats, nullable=nullable)
|
|
849
|
+
|
|
850
|
+
def matches(self, other: ColumnType) -> bool:
|
|
851
|
+
return other._type == self.Type.DOCUMENT and self._doc_formats == other._doc_formats
|
|
852
|
+
|
|
861
853
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
862
854
|
# stored as a file path
|
|
863
855
|
return sql.String()
|
pixeltable/utils/pytorch.py
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import io
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Iterator
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import PIL.Image
|
|
2
9
|
import pyarrow as pa
|
|
3
|
-
import pyarrow.parquet
|
|
4
10
|
import torch
|
|
5
11
|
import torch.utils.data
|
|
6
|
-
from
|
|
7
|
-
import PIL.Image
|
|
8
|
-
import json
|
|
9
|
-
from typing import Dict, Iterator, Any
|
|
10
|
-
import datetime
|
|
12
|
+
from pyarrow import parquet
|
|
11
13
|
|
|
12
14
|
from pixeltable.type_system import ColumnType
|
|
13
|
-
|
|
15
|
+
|
|
14
16
|
|
|
15
17
|
class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
16
18
|
"""
|
|
@@ -39,7 +41,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
39
41
|
with column_type_path.open() as f:
|
|
40
42
|
column_types = json.load(f)
|
|
41
43
|
self.column_types = {k: ColumnType.from_dict(v) for k, v in column_types.items()}
|
|
42
|
-
self.part_metadata =
|
|
44
|
+
self.part_metadata = parquet.ParquetDataset(path).files
|
|
43
45
|
|
|
44
46
|
def _unmarshall(self, k: str, v: Any) -> Any:
|
|
45
47
|
if self.column_types[k].is_image_type():
|
|
@@ -52,7 +54,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
52
54
|
return arr
|
|
53
55
|
|
|
54
56
|
assert self.image_format == "pt"
|
|
55
|
-
import torchvision
|
|
57
|
+
import torchvision
|
|
56
58
|
|
|
57
59
|
# use arr instead of im in ToTensor() to guarantee array input
|
|
58
60
|
# to torch.from_numpy is writable. Using im is a suspected cause of
|
|
@@ -85,7 +87,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
85
87
|
part_list = [ i for i in part_list if (i % worker_info.num_workers) == worker_info.id ]
|
|
86
88
|
|
|
87
89
|
for part_no in part_list:
|
|
88
|
-
pqf =
|
|
90
|
+
pqf = parquet.ParquetFile(self.part_metadata[part_no])
|
|
89
91
|
for batch in pqf.iter_batches():
|
|
90
92
|
for tup in arrow.iter_tuples(batch):
|
|
91
93
|
yield {k: self._unmarshall(k, v) for k, v in tup.items()}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.16
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -23,9 +23,9 @@ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
|
23
23
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
24
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
25
25
|
Requires-Dist: pillow (>=9.3.0)
|
|
26
|
-
Requires-Dist: pixeltable-pgserver (==0.2.
|
|
26
|
+
Requires-Dist: pixeltable-pgserver (==0.2.7)
|
|
27
27
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
|
-
Requires-Dist:
|
|
28
|
+
Requires-Dist: psycopg[binary] (==3.1.18)
|
|
29
29
|
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|
|
30
30
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
31
31
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
@@ -36,7 +36,7 @@ Requires-Dist: tqdm (>=4.64)
|
|
|
36
36
|
Description-Content-Type: text/markdown
|
|
37
37
|
|
|
38
38
|
<div align="center">
|
|
39
|
-
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/
|
|
39
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
40
40
|
|
|
41
41
|
# Unifying Data, Models, and Orchestration for AI Products
|
|
42
42
|
|
|
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown
|
|
|
46
46
|
[](https://github.com/pixeltable/pixeltable/actions)
|
|
47
47
|
[](https://pypi.org/project/pixeltable/)
|
|
48
48
|
|
|
49
|
-
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/
|
|
49
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/release/docs/release/tutorials)
|
|
50
50
|
</div>
|
|
51
51
|
|
|
52
52
|
Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
@@ -68,10 +68,10 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
68
68
|
|
|
69
69
|
| Topic | Notebook | Topic | Notebook |
|
|
70
70
|
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
71
|
-
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
72
|
-
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
73
|
-
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
74
|
-
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/
|
|
71
|
+
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
72
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
73
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
74
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
75
75
|
|
|
76
76
|
## 🧱 Code Samples
|
|
77
77
|
|
|
@@ -186,7 +186,7 @@ Pixeltable unifies data storage, versioning, and indexing with orchestration and
|
|
|
186
186
|
- **It integrates with any existing Python code or libraries**
|
|
187
187
|
- Bring your ever-changing code and workloads
|
|
188
188
|
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
### What is Pixeltable not providing?
|
|
191
191
|
|
|
192
192
|
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|