pixeltable 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/column.py +6 -3
  3. pixeltable/catalog/dir.py +1 -1
  4. pixeltable/catalog/globals.py +15 -6
  5. pixeltable/catalog/insertable_table.py +23 -8
  6. pixeltable/catalog/named_function.py +1 -1
  7. pixeltable/catalog/path_dict.py +4 -4
  8. pixeltable/catalog/schema_object.py +30 -18
  9. pixeltable/catalog/table.py +87 -104
  10. pixeltable/catalog/table_version.py +35 -24
  11. pixeltable/catalog/table_version_path.py +2 -2
  12. pixeltable/catalog/view.py +15 -8
  13. pixeltable/dataframe.py +56 -56
  14. pixeltable/env.py +10 -9
  15. pixeltable/exec/__init__.py +3 -3
  16. pixeltable/exec/aggregation_node.py +3 -3
  17. pixeltable/exec/expr_eval_node.py +3 -3
  18. pixeltable/exec/in_memory_data_node.py +4 -4
  19. pixeltable/exec/sql_node.py +4 -1
  20. pixeltable/exprs/arithmetic_expr.py +41 -16
  21. pixeltable/exprs/array_slice.py +3 -4
  22. pixeltable/exprs/column_ref.py +20 -4
  23. pixeltable/exprs/comparison.py +11 -6
  24. pixeltable/exprs/data_row.py +3 -0
  25. pixeltable/exprs/expr.py +88 -23
  26. pixeltable/exprs/function_call.py +12 -1
  27. pixeltable/exprs/globals.py +3 -1
  28. pixeltable/exprs/inline_array.py +4 -4
  29. pixeltable/exprs/json_path.py +36 -20
  30. pixeltable/exprs/row_builder.py +4 -4
  31. pixeltable/exprs/rowid_ref.py +1 -1
  32. pixeltable/functions/__init__.py +1 -2
  33. pixeltable/functions/audio.py +32 -0
  34. pixeltable/functions/huggingface.py +4 -4
  35. pixeltable/functions/image.py +1 -1
  36. pixeltable/functions/json.py +46 -0
  37. pixeltable/functions/video.py +5 -1
  38. pixeltable/functions/{eval.py → vision.py} +166 -27
  39. pixeltable/globals.py +57 -28
  40. pixeltable/io/external_store.py +6 -6
  41. pixeltable/io/globals.py +13 -14
  42. pixeltable/io/label_studio.py +6 -6
  43. pixeltable/io/pandas.py +60 -19
  44. pixeltable/io/parquet.py +14 -14
  45. pixeltable/iterators/document.py +7 -7
  46. pixeltable/iterators/video.py +55 -23
  47. pixeltable/plan.py +58 -29
  48. pixeltable/store.py +97 -59
  49. pixeltable/tool/create_test_db_dump.py +17 -11
  50. pixeltable/type_system.py +155 -143
  51. pixeltable/utils/pytorch.py +12 -10
  52. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
  53. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/RECORD +56 -54
  54. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
  55. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
  56. {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
pixeltable/type_system.py CHANGED
@@ -7,13 +7,12 @@ import json
7
7
  import typing
8
8
  import urllib.parse
9
9
  import urllib.request
10
- from copy import deepcopy
11
10
  from pathlib import Path
12
- from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
11
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union
13
12
 
14
- import PIL.Image
15
- import av
13
+ import av # type: ignore
16
14
  import numpy as np
15
+ import PIL.Image
17
16
  import sqlalchemy as sql
18
17
 
19
18
  from pixeltable import exceptions as excs
@@ -39,10 +38,10 @@ class ColumnType:
39
38
 
40
39
  @classmethod
41
40
  def supertype(
42
- cls, type1: 'Type', type2: 'Type',
41
+ cls, type1: 'ColumnType.Type', type2: 'ColumnType.Type',
43
42
  # we need to pass this in because we can't easily append it as a class member
44
- common_supertypes: Dict[Tuple['Type', 'Type'], 'Type']
45
- ) -> Optional['Type']:
43
+ common_supertypes: Dict[Tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type']
44
+ ) -> Optional['ColumnType.Type']:
46
45
  if type1 == type2:
47
46
  return type1
48
47
  t = common_supertypes.get((type1, type2))
@@ -74,7 +73,7 @@ class ColumnType:
74
73
 
75
74
  scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
76
75
  numeric_types = {Type.INT, Type.FLOAT}
77
- common_supertypes: Dict[Tuple[Type, Type], Type] = {
76
+ common_supertypes: dict[tuple[Type, Type], Type] = {
78
77
  (Type.BOOL, Type.INT): Type.INT,
79
78
  (Type.BOOL, Type.FLOAT): Type.FLOAT,
80
79
  (Type.INT, Type.FLOAT): Type.FLOAT,
@@ -95,11 +94,12 @@ class ColumnType:
95
94
  def serialize(self) -> str:
96
95
  return json.dumps(self.as_dict())
97
96
 
98
- def copy(self, nullable: Optional[bool] = None) -> ColumnType:
99
- result = deepcopy(self)
100
- if nullable is not None:
101
- result._nullable = nullable
102
- return result
97
+ def copy(self, nullable: bool) -> ColumnType:
98
+ # Default implementation calls unary initializer
99
+ if nullable == self.nullable:
100
+ return self
101
+ else:
102
+ return self.__class__(nullable=nullable)
103
103
 
104
104
  @classmethod
105
105
  def serialize_list(cls, type_list: List[ColumnType]) -> str:
@@ -136,7 +136,7 @@ class ColumnType:
136
136
  Default implementation: simply invoke c'tor
137
137
  """
138
138
  assert 'nullable' in d
139
- return cls(nullable=d['nullable'])
139
+ return cls(nullable=d['nullable']) # type: ignore[call-arg]
140
140
 
141
141
  @classmethod
142
142
  def make_type(cls, t: Type) -> ColumnType:
@@ -166,91 +166,89 @@ class ColumnType:
166
166
  return self._type.name.lower()
167
167
 
168
168
  def __eq__(self, other: object) -> bool:
169
- return self.matches(other) and self.nullable == other.nullable
170
-
171
- def is_supertype_of(self, other: ColumnType) -> bool:
172
- if type(self) != type(other):
173
- return False
174
- if self.matches(other):
175
- return True
176
- return self._is_supertype_of(other)
169
+ return isinstance(other, ColumnType) and self.matches(other) and self.nullable == other.nullable
177
170
 
178
- @abc.abstractmethod
179
- def _is_supertype_of(self, other: ColumnType) -> bool:
180
- return False
171
+ def is_supertype_of(self, other: ColumnType, ignore_nullable: bool = False) -> bool:
172
+ if ignore_nullable:
173
+ supertype = self.supertype(other)
174
+ if supertype is None:
175
+ return False
176
+ return supertype.matches(self)
177
+ else:
178
+ return self.supertype(other) == self
181
179
 
182
- def matches(self, other: object) -> bool:
180
+ def matches(self, other: ColumnType) -> bool:
183
181
  """Two types match if they're equal, aside from nullability"""
184
- if not isinstance(other, ColumnType):
185
- pass
186
- assert isinstance(other, ColumnType), type(other)
187
- if type(self) != type(other):
188
- return False
189
- for member_var in vars(self).keys():
190
- if member_var == '_nullable':
191
- continue
192
- if getattr(self, member_var) != getattr(other, member_var):
193
- return False
194
- return True
182
+ # Default: just compare base types (this works for all types whose only parameter is nullable)
183
+ return self._type == other._type
195
184
 
196
- @classmethod
197
- def supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
198
- if type1 == type2:
199
- return type1
185
+ def supertype(self, other: ColumnType) -> Optional[ColumnType]:
186
+ if self == other:
187
+ return self
188
+ if self.matches(other):
189
+ return self.copy(nullable=(self.nullable or other.nullable))
200
190
 
201
- if type1.is_invalid_type():
202
- return type2
203
- if type2.is_invalid_type():
204
- return type1
191
+ if self.is_invalid_type():
192
+ return other
193
+ if other.is_invalid_type():
194
+ return self
205
195
 
206
- if type1.is_scalar_type() and type2.is_scalar_type():
207
- t = cls.Type.supertype(type1._type, type2._type, cls.common_supertypes)
196
+ if self.is_scalar_type() and other.is_scalar_type():
197
+ t = self.Type.supertype(self._type, other._type, self.common_supertypes)
208
198
  if t is not None:
209
- return cls.make_type(t).copy(nullable=(type1.nullable or type2.nullable))
199
+ return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
210
200
  return None
211
201
 
212
- if type1._type == type2._type:
213
- return cls._supertype(type1, type2)
214
-
215
202
  return None
216
203
 
217
204
  @classmethod
218
- @abc.abstractmethod
219
- def _supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
220
- """
221
- Class-specific implementation of determining the supertype. type1 and type2 are from the same subclass of
222
- ColumnType.
223
- """
224
- pass
225
-
226
- @classmethod
227
- def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
205
+ def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
228
206
  if isinstance(val, str):
229
- return StringType()
207
+ return StringType(nullable=nullable)
230
208
  if isinstance(val, bool):
231
209
  # We have to check bool before int, because isinstance(b, int) is True if b is a Python bool
232
- return BoolType()
210
+ return BoolType(nullable=nullable)
233
211
  if isinstance(val, int):
234
- return IntType()
212
+ return IntType(nullable=nullable)
235
213
  if isinstance(val, float):
236
- return FloatType()
214
+ return FloatType(nullable=nullable)
237
215
  if isinstance(val, datetime.datetime):
238
- return TimestampType()
216
+ return TimestampType(nullable=nullable)
239
217
  if isinstance(val, PIL.Image.Image):
240
- return ImageType(width=val.width, height=val.height, mode=val.mode)
218
+ return ImageType(width=val.width, height=val.height, mode=val.mode, nullable=nullable)
241
219
  if isinstance(val, np.ndarray):
242
- col_type = ArrayType.from_literal(val)
220
+ col_type = ArrayType.from_literal(val, nullable=nullable)
243
221
  if col_type is not None:
244
222
  return col_type
245
223
  # this could still be json-serializable
246
224
  if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray):
247
225
  try:
248
226
  JsonType().validate_literal(val)
249
- return JsonType()
227
+ return JsonType(nullable=nullable)
250
228
  except TypeError:
251
229
  return None
252
230
  return None
253
231
 
232
+ @classmethod
233
+ def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
234
+ """
235
+ Returns the most specific type that is a supertype of all literals in `vals`. If no such type
236
+ exists, returns None.
237
+
238
+ Args:
239
+ vals: A collection of literals.
240
+ """
241
+ inferred_type: Optional[ColumnType] = None
242
+ for val in vals:
243
+ val_type = cls.infer_literal_type(val)
244
+ if inferred_type is None:
245
+ inferred_type = val_type
246
+ else:
247
+ inferred_type = inferred_type.supertype(val_type)
248
+ if inferred_type is None:
249
+ return None
250
+ return inferred_type
251
+
254
252
  @classmethod
255
253
  def from_python_type(cls, t: type) -> Optional[ColumnType]:
256
254
  if typing.get_origin(t) is typing.Union:
@@ -317,10 +315,8 @@ class ColumnType:
317
315
  @abc.abstractmethod
318
316
  def _validate_literal(self, val: Any) -> None:
319
317
  """Raise TypeError if val is not a valid literal for this type"""
320
- pass
321
318
 
322
- @abc.abstractmethod
323
- def _create_literal(self, val : Any) -> Any:
319
+ def _create_literal(self, val: Any) -> Any:
324
320
  """Create a literal of this type from val, including any needed conversions.
325
321
  val is guaranteed to be non-None"""
326
322
  return val
@@ -389,21 +385,6 @@ class ColumnType:
389
385
  """
390
386
  pass
391
387
 
392
- @staticmethod
393
- def no_conversion(v: Any) -> Any:
394
- """
395
- Special return value of conversion_fn() that indicates that no conversion is necessary.
396
- Should not be called
397
- """
398
- assert False
399
-
400
- def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
401
- """
402
- Return Callable that converts a column value of type self to a value of type 'target'.
403
- Returns None if conversion isn't possible.
404
- """
405
- return None
406
-
407
388
 
408
389
  class InvalidType(ColumnType):
409
390
  def __init__(self, nullable: bool = False):
@@ -423,17 +404,6 @@ class StringType(ColumnType):
423
404
  def __init__(self, nullable: bool = False):
424
405
  super().__init__(self.Type.STRING, nullable=nullable)
425
406
 
426
- def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
427
- if not target.is_timestamp_type():
428
- return None
429
- def convert(val: str) -> Optional[datetime.datetime]:
430
- try:
431
- dt = datetime.datetime.fromisoformat(val)
432
- return dt
433
- except ValueError:
434
- return None
435
- return convert
436
-
437
407
  def to_sa_type(self) -> sql.types.TypeEngine:
438
408
  return sql.String()
439
409
 
@@ -522,6 +492,37 @@ class JsonType(ColumnType):
522
492
  super().__init__(self.Type.JSON, nullable=nullable)
523
493
  self.type_spec = type_spec
524
494
 
495
+ def copy(self, nullable: bool) -> ColumnType:
496
+ return JsonType(self.type_spec, nullable=nullable)
497
+
498
+ def matches(self, other: ColumnType) -> bool:
499
+ return other._type == self.Type.JSON and self.type_spec == other.type_spec
500
+
501
+ def supertype(self, other: ColumnType) -> Optional[JsonType]:
502
+ if not isinstance(other, JsonType):
503
+ return None
504
+ if self.type_spec is None:
505
+ # we don't have a type spec and can accept anything accepted by other
506
+ return JsonType(nullable=(self.nullable or other.nullable))
507
+ if other.type_spec is None:
508
+ # we have a type spec but other doesn't
509
+ return JsonType(nullable=(self.nullable or other.nullable))
510
+
511
+ # we both have type specs; the supertype's type spec is the union of the two
512
+ type_spec: dict[str, ColumnType] = {}
513
+ type_spec.update(self.type_spec)
514
+ for other_field_name, other_field_type in other.type_spec.items():
515
+ if other_field_name not in type_spec:
516
+ type_spec[other_field_name] = other_field_type
517
+ else:
518
+ # both type specs have this field
519
+ field_type = type_spec[other_field_name].supertype(other_field_type)
520
+ if field_type is None:
521
+ # conflicting types
522
+ return JsonType(nullable=(self.nullable or other.nullable))
523
+ type_spec[other_field_name] = field_type
524
+ return JsonType(type_spec, nullable=(self.nullable or other.nullable))
525
+
525
526
  def _as_dict(self) -> Dict:
526
527
  result = super()._as_dict()
527
528
  if self.type_spec is not None:
@@ -551,11 +552,20 @@ class JsonType(ColumnType):
551
552
 
552
553
  def _validate_literal(self, val: Any) -> None:
553
554
  if not isinstance(val, dict) and not isinstance(val, list):
555
+ # TODO In the future we should accept scalars too, which would enable us to remove this top-level check
554
556
  raise TypeError(f'Expected dict or list, got {val.__class__.__name__}')
555
- try:
556
- _ = json.dumps(val)
557
- except TypeError as e:
558
- raise TypeError(f'Expected JSON-serializable object, got {val}')
557
+ if not self.__is_valid_literal(val):
558
+ raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
559
+
560
+ @classmethod
561
+ def __is_valid_literal(cls, val: Any) -> None:
562
+ if val is None or isinstance(val, (str, int, float, bool)):
563
+ return True
564
+ if isinstance(val, (list, tuple)):
565
+ return all(cls.__is_valid_literal(v) for v in val)
566
+ if isinstance(val, dict):
567
+ return all(isinstance(k, str) and cls.__is_valid_literal(v) for k, v in val.items())
568
+ return False
559
569
 
560
570
  def _create_literal(self, val: Any) -> Any:
561
571
  if isinstance(val, tuple):
@@ -564,21 +574,29 @@ class JsonType(ColumnType):
564
574
 
565
575
 
566
576
  class ArrayType(ColumnType):
567
- def __init__(
568
- self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
577
+ def __init__(self, shape: tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
569
578
  super().__init__(self.Type.ARRAY, nullable=nullable)
570
579
  self.shape = shape
571
580
  assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
581
+ self.pxt_dtype = dtype
572
582
  self.dtype = dtype._type
573
583
 
574
- def _supertype(cls, type1: ArrayType, type2: ArrayType) -> Optional[ArrayType]:
575
- if len(type1.shape) != len(type2.shape):
584
+ def copy(self, nullable: bool) -> ColumnType:
585
+ return ArrayType(self.shape, self.pxt_dtype, nullable=nullable)
586
+
587
+ def matches(self, other: ColumnType) -> bool:
588
+ return other._type == self.Type.ARRAY and self.shape == other.shape and self.dtype == other.dtype
589
+
590
+ def supertype(self, other: ColumnType) -> Optional[ArrayType]:
591
+ if not isinstance(other, ArrayType):
576
592
  return None
577
- base_type = ColumnType.supertype(type1.dtype, type2.dtype)
593
+ if len(self.shape) != len(other.shape):
594
+ return None
595
+ base_type = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
578
596
  if base_type is None:
579
597
  return None
580
- shape = [n1 if n1 == n2 else None for n1, n2 in zip(type1.shape, type2.shape)]
581
- return ArrayType(tuple(shape), base_type, nullable=(type1.nullable or type2.nullable))
598
+ shape = [n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape)]
599
+ return ArrayType(tuple(shape), self.make_type(base_type), nullable=(self.nullable or other.nullable))
582
600
 
583
601
  def _as_dict(self) -> Dict:
584
602
  result = super()._as_dict()
@@ -597,11 +615,11 @@ class ArrayType(ColumnType):
597
615
  return cls(shape, dtype, nullable=d['nullable'])
598
616
 
599
617
  @classmethod
600
- def from_literal(cls, val: np.ndarray) -> Optional[ArrayType]:
618
+ def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
601
619
  # determine our dtype
602
620
  assert isinstance(val, np.ndarray)
603
621
  if np.issubdtype(val.dtype, np.integer):
604
- dtype = IntType()
622
+ dtype: ColumnType = IntType()
605
623
  elif np.issubdtype(val.dtype, np.floating):
606
624
  dtype = FloatType()
607
625
  elif val.dtype == np.bool_:
@@ -610,7 +628,7 @@ class ArrayType(ColumnType):
610
628
  dtype = StringType()
611
629
  else:
612
630
  return None
613
- return cls(val.shape, dtype=dtype)
631
+ return cls(val.shape, dtype=dtype, nullable=nullable)
614
632
 
615
633
  def is_valid_literal(self, val: np.ndarray) -> bool:
616
634
  if not isinstance(val, np.ndarray):
@@ -677,6 +695,9 @@ class ImageType(ColumnType):
677
695
  self.height = height
678
696
  self.mode = mode
679
697
 
698
+ def copy(self, nullable: bool) -> ColumnType:
699
+ return ImageType(self.width, self.height, mode=self.mode, nullable=nullable)
700
+
680
701
  def __str__(self) -> str:
681
702
  if self.width is not None or self.height is not None or self.mode is not None:
682
703
  params_str = ''
@@ -695,13 +716,21 @@ class ImageType(ColumnType):
695
716
  params_str = ''
696
717
  return f'{self._type.name.lower()}{params_str}'
697
718
 
698
- def _is_supertype_of(self, other: ImageType) -> bool:
699
- if self.mode is not None and self.mode != other.mode:
700
- return False
701
- if self.width is None and self.height is None:
702
- return True
703
- if self.width != other.width and self.height != other.height:
704
- return False
719
+ def matches(self, other: ColumnType) -> bool:
720
+ return (
721
+ other._type == self.Type.IMAGE
722
+ and self.width == other.width
723
+ and self.height == other.height
724
+ and self.mode == other.mode
725
+ )
726
+
727
+ def supertype(self, other: ColumnType) -> Optional[ImageType]:
728
+ if not isinstance(other, ImageType):
729
+ return None
730
+ width = self.width if self.width == other.width else None
731
+ height = self.height if self.height == other.height else None
732
+ mode = self.mode if self.mode == other.mode else None
733
+ return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
705
734
 
706
735
  @property
707
736
  def size(self) -> Optional[Tuple[int, int]]:
@@ -709,10 +738,6 @@ class ImageType(ColumnType):
709
738
  return None
710
739
  return (self.width, self.height)
711
740
 
712
- @property
713
- def num_channels(self) -> Optional[int]:
714
- return None if self.mode is None else self.mode.num_channels()
715
-
716
741
  def _as_dict(self) -> Dict:
717
742
  result = super()._as_dict()
718
743
  result.update(width=self.width, height=self.height, mode=self.mode)
@@ -725,26 +750,6 @@ class ImageType(ColumnType):
725
750
  assert 'mode' in d
726
751
  return cls(width=d['width'], height=d['height'], mode=d['mode'], nullable=d['nullable'])
727
752
 
728
- def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
729
- if not target.is_image_type():
730
- return None
731
- assert isinstance(target, ImageType)
732
- if (target.width is None) != (target.height is None):
733
- # we can't resize only one dimension
734
- return None
735
- if (target.width == self.width or target.width is None) \
736
- and (target.height == self.height or target.height is None) \
737
- and (target.mode == self.mode or target.mode is None):
738
- # nothing to do
739
- return self.no_conversion
740
- def convert(img: PIL.Image.Image) -> PIL.Image.Image:
741
- if self.width != target.width or self.height != target.height:
742
- img = img.resize((target.width, target.height))
743
- if self.mode != target.mode:
744
- img = img.convert(target.mode.to_pil())
745
- return img
746
- return convert
747
-
748
753
  def to_sa_type(self) -> sql.types.TypeEngine:
749
754
  return sql.String()
750
755
 
@@ -829,6 +834,7 @@ class DocumentType(ColumnType):
829
834
 
830
835
  def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
831
836
  super().__init__(self.Type.DOCUMENT, nullable=nullable)
837
+ self.doc_formats = doc_formats
832
838
  if doc_formats is not None:
833
839
  type_strs = doc_formats.split(',')
834
840
  for type_str in type_strs:
@@ -838,6 +844,12 @@ class DocumentType(ColumnType):
838
844
  else:
839
845
  self._doc_formats = [t for t in self.DocumentFormat]
840
846
 
847
+ def copy(self, nullable: bool) -> ColumnType:
848
+ return DocumentType(doc_formats=self.doc_formats, nullable=nullable)
849
+
850
+ def matches(self, other: ColumnType) -> bool:
851
+ return other._type == self.Type.DOCUMENT and self._doc_formats == other._doc_formats
852
+
841
853
  def to_sa_type(self) -> sql.types.TypeEngine:
842
854
  # stored as a file path
843
855
  return sql.String()
@@ -1,16 +1,18 @@
1
+ import datetime
1
2
  import io
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Iterator
6
+
7
+ import numpy as np
8
+ import PIL.Image
2
9
  import pyarrow as pa
3
- import pyarrow.parquet
4
10
  import torch
5
11
  import torch.utils.data
6
- from pathlib import Path
7
- import PIL.Image
8
- import json
9
- from typing import Dict, Iterator, Any
10
- import datetime
12
+ from pyarrow import parquet
11
13
 
12
14
  from pixeltable.type_system import ColumnType
13
- import numpy as np
15
+
14
16
 
15
17
  class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
16
18
  """
@@ -39,7 +41,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
39
41
  with column_type_path.open() as f:
40
42
  column_types = json.load(f)
41
43
  self.column_types = {k: ColumnType.from_dict(v) for k, v in column_types.items()}
42
- self.part_metadata = pa.parquet.ParquetDataset(path).files
44
+ self.part_metadata = parquet.ParquetDataset(path).files
43
45
 
44
46
  def _unmarshall(self, k: str, v: Any) -> Any:
45
47
  if self.column_types[k].is_image_type():
@@ -52,7 +54,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
52
54
  return arr
53
55
 
54
56
  assert self.image_format == "pt"
55
- import torchvision # pylint: disable = import-outside-toplevel
57
+ import torchvision
56
58
 
57
59
  # use arr instead of im in ToTensor() to guarantee array input
58
60
  # to torch.from_numpy is writable. Using im is a suspected cause of
@@ -85,7 +87,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
85
87
  part_list = [ i for i in part_list if (i % worker_info.num_workers) == worker_info.id ]
86
88
 
87
89
  for part_no in part_list:
88
- pqf = pa.parquet.ParquetFile(self.part_metadata[part_no])
90
+ pqf = parquet.ParquetFile(self.part_metadata[part_no])
89
91
  for batch in pqf.iter_batches():
90
92
  for tup in arrow.iter_tuples(batch):
91
93
  yield {k: self._unmarshall(k, v) for k, v in tup.items()}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.14
3
+ Version: 0.2.16
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Pixeltable, Inc.
6
6
  Author-email: contact@pixeltable.com
@@ -21,11 +21,11 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
21
21
  Requires-Dist: numpy (>=1.25)
22
22
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
23
  Requires-Dist: pandas (>=2.0,<3.0)
24
- Requires-Dist: pgserver (==0.1.4)
25
24
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
25
  Requires-Dist: pillow (>=9.3.0)
26
+ Requires-Dist: pixeltable-pgserver (==0.2.7)
27
27
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
- Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
28
+ Requires-Dist: psycopg[binary] (==3.1.18)
29
29
  Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
30
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
31
  Requires-Dist: requests (>=2.31.0,<3.0.0)
@@ -36,7 +36,7 @@ Requires-Dist: tqdm (>=4.64)
36
36
  Description-Content-Type: text/markdown
37
37
 
38
38
  <div align="center">
39
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
40
 
41
41
  # Unifying Data, Models, and Orchestration for AI Products
42
42
 
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown
46
46
  [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
47
47
  [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
48
48
 
49
- [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
49
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/release/docs/release/tutorials)
50
50
  </div>
51
51
 
52
52
  Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
@@ -68,10 +68,10 @@ Learn how to create tables, populate them with data, and enhance them with built
68
68
 
69
69
  | Topic | Notebook | Topic | Notebook |
70
70
  |:----------|:-----------------|:-------------------------|:---------------------------------:|
71
- | 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
72
- | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
73
- | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
74
- | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
71
+ | 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
72
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
73
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
74
+ | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
75
75
 
76
76
  ## 🧱 Code Samples
77
77
 
@@ -186,7 +186,7 @@ Pixeltable unifies data storage, versioning, and indexing with orchestration and
186
186
  - **It integrates with any existing Python code or libraries**
187
187
  - Bring your ever-changing code and workloads
188
188
  - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
189
-
189
+
190
190
  ### What is Pixeltable not providing?
191
191
 
192
192
  - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.