pixeltable 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (150) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +106 -81
  18. pixeltable/env.py +28 -24
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -9
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +108 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +231 -113
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +36 -23
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/share/__init__.py +0 -0
  128. pixeltable/share/packager.py +218 -0
  129. pixeltable/store.py +42 -26
  130. pixeltable/type_system.py +102 -75
  131. pixeltable/utils/arrow.py +7 -8
  132. pixeltable/utils/coco.py +16 -17
  133. pixeltable/utils/code.py +1 -1
  134. pixeltable/utils/console_output.py +6 -3
  135. pixeltable/utils/description_helper.py +7 -7
  136. pixeltable/utils/documents.py +3 -1
  137. pixeltable/utils/filecache.py +12 -7
  138. pixeltable/utils/http_server.py +9 -8
  139. pixeltable/utils/iceberg.py +14 -0
  140. pixeltable/utils/media_store.py +3 -2
  141. pixeltable/utils/pytorch.py +11 -14
  142. pixeltable/utils/s3.py +1 -0
  143. pixeltable/utils/sql.py +1 -0
  144. pixeltable/utils/transactional_directory.py +2 -2
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/METADATA +9 -9
  146. pixeltable-0.3.4.dist-info/RECORD +166 -0
  147. pixeltable-0.3.2.dist-info/RECORD +0 -161
  148. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
  149. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
  150. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
pixeltable/type_system.py CHANGED
@@ -9,21 +9,22 @@ import typing
9
9
  import urllib.parse
10
10
  import urllib.request
11
11
  from pathlib import Path
12
- from typing import Any, Iterable, Mapping, Optional, Sequence, Union
12
+ from typing import Any, Iterable, Literal, Mapping, Optional, Sequence, Union
13
13
 
14
- import PIL.Image
15
14
  import av # type: ignore
16
15
  import jsonschema
17
16
  import jsonschema.protocols
18
17
  import jsonschema.validators
19
18
  import numpy as np
19
+ import PIL.Image
20
20
  import pydantic
21
21
  import sqlalchemy as sql
22
- from typing import _GenericAlias # type: ignore[attr-defined]
23
22
  from typing_extensions import _AnnotatedAlias
24
23
 
25
24
  import pixeltable.exceptions as excs
26
25
 
26
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
27
+
27
28
 
28
29
  class ColumnType:
29
30
  @enum.unique
@@ -45,9 +46,11 @@ class ColumnType:
45
46
 
46
47
  @classmethod
47
48
  def supertype(
48
- cls, type1: 'ColumnType.Type', type2: 'ColumnType.Type',
49
- # we need to pass this in because we can't easily append it as a class member
50
- common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type']
49
+ cls,
50
+ type1: 'ColumnType.Type',
51
+ type2: 'ColumnType.Type',
52
+ # we need to pass this in because we can't easily append it as a class member
53
+ common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
51
54
  ) -> Optional['ColumnType.Type']:
52
55
  if type1 == type2:
53
56
  return type1
@@ -59,23 +62,23 @@ class ColumnType:
59
62
  return t
60
63
  return None
61
64
 
62
-
63
65
  @enum.unique
64
66
  class DType(enum.Enum):
65
67
  """
66
68
  Base type used in images and arrays
67
69
  """
68
- BOOL = 0,
69
- INT8 = 1,
70
- INT16 = 2,
71
- INT32 = 3,
72
- INT64 = 4,
73
- UINT8 = 5,
74
- UINT16 = 6,
75
- UINT32 = 7,
76
- UINT64 = 8,
77
- FLOAT16 = 9,
78
- FLOAT32 = 10,
70
+
71
+ BOOL = (0,)
72
+ INT8 = (1,)
73
+ INT16 = (2,)
74
+ INT32 = (3,)
75
+ INT64 = (4,)
76
+ UINT8 = (5,)
77
+ UINT16 = (6,)
78
+ UINT32 = (7,)
79
+ UINT64 = (8,)
80
+ FLOAT16 = (9,)
81
+ FLOAT32 = (10,)
79
82
  FLOAT64 = 11
80
83
 
81
84
  scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
@@ -113,10 +116,7 @@ class ColumnType:
113
116
  return json.dumps([t.as_dict() for t in type_list])
114
117
 
115
118
  def as_dict(self) -> dict:
116
- return {
117
- '_classname': self.__class__.__name__,
118
- **self._as_dict(),
119
- }
119
+ return {'_classname': self.__class__.__name__, **self._as_dict()}
120
120
 
121
121
  def _as_dict(self) -> dict:
122
122
  return {'nullable': self.nullable}
@@ -213,9 +213,9 @@ class ColumnType:
213
213
  return self.copy(nullable=(self.nullable or other.nullable))
214
214
 
215
215
  if self.is_invalid_type():
216
- return other
216
+ return other.copy(nullable=(self.nullable or other.nullable))
217
217
  if other.is_invalid_type():
218
- return self
218
+ return self.copy(nullable=(self.nullable or other.nullable))
219
219
 
220
220
  if self.is_scalar_type() and other.is_scalar_type():
221
221
  t = self.Type.supertype(self._type, other._type, self.common_supertypes)
@@ -277,10 +277,7 @@ class ColumnType:
277
277
 
278
278
  @classmethod
279
279
  def from_python_type(
280
- cls,
281
- t: Union[type, _GenericAlias],
282
- nullable_default: bool = False,
283
- allow_builtin_types: bool = True
280
+ cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
284
281
  ) -> Optional[ColumnType]:
285
282
  """
286
283
  Convert a Python type into a Pixeltable `ColumnType` instance.
@@ -295,28 +292,24 @@ class ColumnType:
295
292
  designations will be allowed regardless.
296
293
  """
297
294
  origin = typing.get_origin(t)
295
+ type_args = typing.get_args(t)
298
296
  if origin is typing.Union:
299
297
  # Check if `t` has the form Optional[T].
300
- union_args = typing.get_args(t)
301
- if len(union_args) == 2 and type(None) in union_args:
298
+ if len(type_args) == 2 and type(None) in type_args:
302
299
  # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
303
300
  # We treat it as the underlying type but with nullable=True.
304
- underlying_py_type = union_args[0] if union_args[1] is type(None) else union_args[1]
301
+ underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
305
302
  underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
306
303
  if underlying is not None:
307
304
  return underlying.copy(nullable=True)
308
305
  elif origin is Required:
309
- required_args = typing.get_args(t)
310
- assert len(required_args) == 1
306
+ assert len(type_args) == 1
311
307
  return cls.from_python_type(
312
- required_args[0],
313
- nullable_default=False,
314
- allow_builtin_types=allow_builtin_types
315
- )
308
+ type_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
309
+ ).copy(nullable=False)
316
310
  elif origin is typing.Annotated:
317
- annotated_args = typing.get_args(t)
318
- origin = annotated_args[0]
319
- parameters = annotated_args[1]
311
+ origin = type_args[0]
312
+ parameters = type_args[1]
320
313
  if isinstance(parameters, ColumnType):
321
314
  return parameters.copy(nullable=nullable_default)
322
315
  else:
@@ -328,6 +321,11 @@ class ColumnType:
328
321
  if isinstance(t, type) and issubclass(t, _PxtType):
329
322
  return t.as_col_type(nullable=nullable_default)
330
323
  elif allow_builtin_types:
324
+ if t is Literal and len(type_args) > 0:
325
+ literal_type = cls.infer_common_literal_type(type_args)
326
+ if literal_type is None:
327
+ return None
328
+ return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
331
329
  if t is str:
332
330
  return StringType(nullable=nullable_default)
333
331
  if t is int:
@@ -340,7 +338,7 @@ class ColumnType:
340
338
  return TimestampType(nullable=nullable_default)
341
339
  if t is PIL.Image.Image:
342
340
  return ImageType(nullable=nullable_default)
343
- if issubclass(t, Sequence) or issubclass(t, Mapping) or issubclass(t, pydantic.BaseModel):
341
+ if isinstance(t, type) and issubclass(t, (Sequence, Mapping, pydantic.BaseModel)):
344
342
  return JsonType(nullable=nullable_default)
345
343
  return None
346
344
 
@@ -349,7 +347,7 @@ class ColumnType:
349
347
  cls,
350
348
  t: Union[ColumnType, type, _AnnotatedAlias],
351
349
  nullable_default: bool = False,
352
- allow_builtin_types: bool = True
350
+ allow_builtin_types: bool = True,
353
351
  ) -> ColumnType:
354
352
  """
355
353
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
@@ -415,7 +413,7 @@ class ColumnType:
415
413
 
416
414
  def _create_literal(self, val: Any) -> Any:
417
415
  """Create a literal of this type from val, including any needed conversions.
418
- val is guaranteed to be non-None"""
416
+ val is guaranteed to be non-None"""
419
417
  return val
420
418
 
421
419
  def create_literal(self, val: Any) -> Any:
@@ -484,12 +482,7 @@ class ColumnType:
484
482
 
485
483
  def to_json_schema(self) -> dict[str, Any]:
486
484
  if self.nullable:
487
- return {
488
- 'anyOf': [
489
- self._to_json_schema(),
490
- {'type': 'null'},
491
- ]
492
- }
485
+ return {'anyOf': [self._to_json_schema(), {'type': 'null'}]}
493
486
  else:
494
487
  return self._to_json_schema()
495
488
 
@@ -612,7 +605,6 @@ class TimestampType(ColumnType):
612
605
 
613
606
 
614
607
  class JsonType(ColumnType):
615
-
616
608
  json_schema: Optional[dict[str, Any]]
617
609
  __validator: Optional[jsonschema.protocols.Validator]
618
610
 
@@ -699,8 +691,7 @@ class JsonType(ColumnType):
699
691
  superschema = self.__superschema(self.json_schema, other.json_schema)
700
692
 
701
693
  return JsonType(
702
- json_schema=(None if len(superschema) == 0 else superschema),
703
- nullable=(self.nullable or other.nullable)
694
+ json_schema=(None if len(superschema) == 0 else superschema), nullable=(self.nullable or other.nullable)
704
695
  )
705
696
 
706
697
  @classmethod
@@ -755,7 +746,7 @@ class JsonType(ColumnType):
755
746
  a_type = a.get('type')
756
747
  b_type = b.get('type')
757
748
 
758
- if (a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type):
749
+ if a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type:
759
750
  # a and b both have the same type designation, but are not identical. This can happen if
760
751
  # (for example) they have validators or other attributes that differ. In this case, we
761
752
  # generalize to {'type': t}, where t is their shared type, with no other qualifications.
@@ -793,15 +784,25 @@ class JsonType(ColumnType):
793
784
 
794
785
 
795
786
  class ArrayType(ColumnType):
796
-
797
787
  shape: Optional[tuple[Optional[int], ...]]
798
788
  pxt_dtype: Optional[ColumnType]
799
789
  dtype: Optional[ColumnType.Type]
800
790
 
801
- def __init__(self, shape: Optional[tuple[Optional[int], ...]] = None, dtype: Optional[ColumnType] = None, nullable: bool = False):
791
+ def __init__(
792
+ self,
793
+ shape: Optional[tuple[Optional[int], ...]] = None,
794
+ dtype: Optional[ColumnType] = None,
795
+ nullable: bool = False,
796
+ ):
802
797
  super().__init__(self.Type.ARRAY, nullable=nullable)
803
798
  assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
804
- assert dtype is None or dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
799
+ assert (
800
+ dtype is None
801
+ or dtype.is_int_type()
802
+ or dtype.is_float_type()
803
+ or dtype.is_bool_type()
804
+ or dtype.is_string_type()
805
+ )
805
806
 
806
807
  self.shape = shape
807
808
  self.pxt_dtype = dtype # we need this for copy() and __str__()
@@ -853,21 +854,39 @@ class ArrayType(ColumnType):
853
854
  dtype = None if d['dtype'] is None else cls.make_type(cls.Type(d['dtype']))
854
855
  return cls(shape, dtype, nullable=d['nullable'])
855
856
 
857
+ @classmethod
858
+ def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> Optional[ColumnType]:
859
+ """
860
+ Return pixeltable type corresponding to a given simple numpy dtype
861
+ """
862
+ if np.issubdtype(dtype, np.integer):
863
+ return IntType(nullable=nullable)
864
+
865
+ if np.issubdtype(dtype, np.floating):
866
+ return FloatType(nullable=nullable)
867
+
868
+ if dtype == np.bool_:
869
+ return BoolType(nullable=nullable)
870
+
871
+ if np.issubdtype(dtype, np.str_):
872
+ return StringType(nullable=nullable)
873
+
874
+ if np.issubdtype(dtype, np.character):
875
+ return StringType(nullable=nullable)
876
+
877
+ if np.issubdtype(dtype, np.datetime64):
878
+ return TimestampType(nullable=nullable)
879
+
880
+ return None
881
+
856
882
  @classmethod
857
883
  def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
858
884
  # determine our dtype
859
885
  assert isinstance(val, np.ndarray)
860
- if np.issubdtype(val.dtype, np.integer):
861
- dtype: ColumnType = IntType()
862
- elif np.issubdtype(val.dtype, np.floating):
863
- dtype = FloatType()
864
- elif val.dtype == np.bool_:
865
- dtype = BoolType()
866
- elif val.dtype == np.str_:
867
- dtype = StringType()
868
- else:
886
+ pxttype: Optional[ColumnType] = cls.from_np_dtype(val.dtype, nullable)
887
+ if pxttype == None:
869
888
  return None
870
- return cls(val.shape, dtype=dtype, nullable=nullable)
889
+ return cls(val.shape, dtype=pxttype, nullable=nullable)
871
890
 
872
891
  def is_valid_literal(self, val: np.ndarray) -> bool:
873
892
  if not isinstance(val, np.ndarray):
@@ -898,10 +917,7 @@ class ArrayType(ColumnType):
898
917
  return True
899
918
 
900
919
  def _to_json_schema(self) -> dict[str, Any]:
901
- return {
902
- 'type': 'array',
903
- 'items': self.pxt_dtype._to_json_schema(),
904
- }
920
+ return {'type': 'array', 'items': self.pxt_dtype._to_json_schema()}
905
921
 
906
922
  def _validate_literal(self, val: Any) -> None:
907
923
  if not isinstance(val, np.ndarray):
@@ -945,15 +961,19 @@ class ArrayType(ColumnType):
945
961
 
946
962
  class ImageType(ColumnType):
947
963
  def __init__(
948
- self, width: Optional[int] = None, height: Optional[int] = None, size: Optional[tuple[int, int]] = None,
949
- mode: Optional[str] = None, nullable: bool = False
964
+ self,
965
+ width: Optional[int] = None,
966
+ height: Optional[int] = None,
967
+ size: Optional[tuple[int, int]] = None,
968
+ mode: Optional[str] = None,
969
+ nullable: bool = False,
950
970
  ):
951
971
  """
952
972
  TODO: does it make sense to specify only width or height?
953
973
  """
954
974
  super().__init__(self.Type.IMAGE, nullable=nullable)
955
- assert not(width is not None and size is not None)
956
- assert not(height is not None and size is not None)
975
+ assert not (width is not None and size is not None)
976
+ assert not (height is not None and size is not None)
957
977
  if size is not None:
958
978
  self.width = size[0]
959
979
  self.height = size[1]
@@ -1143,6 +1163,7 @@ class DocumentType(ColumnType):
1143
1163
  def validate_media(self, val: Any) -> None:
1144
1164
  assert isinstance(val, str)
1145
1165
  from pixeltable.utils.documents import get_document_handle
1166
+
1146
1167
  dh = get_document_handle(val)
1147
1168
  if dh is None:
1148
1169
  raise excs.Error(f'Not a recognized document format: {val}')
@@ -1156,6 +1177,7 @@ class Required(typing.Generic[T]):
1156
1177
  Marker class to indicate that a column is non-nullable in a schema definition. This has no meaning as a type hint,
1157
1178
  and is intended only for schema declarations.
1158
1179
  """
1180
+
1159
1181
  pass
1160
1182
 
1161
1183
 
@@ -1178,6 +1200,7 @@ class _PxtType:
1178
1200
  `Image[(300, 300), 'RGB']`. The specialized forms resolve to `typing.Annotated` instances whose annotation is a
1179
1201
  `ColumnType`.
1180
1202
  """
1203
+
1181
1204
  def __init__(self):
1182
1205
  raise TypeError(f'Type `{type(self)}` cannot be instantiated.')
1183
1206
 
@@ -1256,7 +1279,11 @@ class Image(PIL.Image.Image, _PxtType):
1256
1279
  mode: Optional[str] = None
1257
1280
  for param in params:
1258
1281
  if isinstance(param, tuple):
1259
- if len(param) != 2 or not isinstance(param[0], (int, type(None))) or not isinstance(param[1], (int, type(None))):
1282
+ if (
1283
+ len(param) != 2
1284
+ or not isinstance(param[0], (int, type(None)))
1285
+ or not isinstance(param[1], (int, type(None)))
1286
+ ):
1260
1287
  raise TypeError(f'Invalid Image type parameter: {param}')
1261
1288
  if size is not None:
1262
1289
  raise TypeError(f'Duplicate Image type parameter: {param}')
pixeltable/utils/arrow.py CHANGED
@@ -1,13 +1,12 @@
1
+ import datetime
1
2
  from typing import Any, Iterator, Optional, Union
2
3
 
3
4
  import numpy as np
4
5
  import pyarrow as pa
5
- import datetime
6
6
 
7
7
  import pixeltable.type_system as ts
8
8
 
9
-
10
- _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
9
+ PA_TO_PXT_TYPES: dict[pa.DataType, ts.ColumnType] = {
11
10
  pa.string(): ts.StringType(nullable=True),
12
11
  pa.bool_(): ts.BoolType(nullable=True),
13
12
  pa.uint8(): ts.IntType(nullable=True),
@@ -19,7 +18,7 @@ _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
19
18
  pa.float32(): ts.FloatType(nullable=True),
20
19
  }
21
20
 
22
- _pt_to_pa: dict[type[ts.ColumnType], pa.DataType] = {
21
+ PXT_TO_PA_TYPES: dict[type[ts.ColumnType], pa.DataType] = {
23
22
  ts.StringType: pa.string(),
24
23
  ts.TimestampType: pa.timestamp('us', tz=datetime.timezone.utc), # postgres timestamp is microseconds
25
24
  ts.BoolType: pa.bool_(),
@@ -39,8 +38,8 @@ def to_pixeltable_type(arrow_type: pa.DataType) -> Optional[ts.ColumnType]:
39
38
  """
40
39
  if isinstance(arrow_type, pa.TimestampType):
41
40
  return ts.TimestampType(nullable=True)
42
- elif arrow_type in _pa_to_pt:
43
- return _pa_to_pt[arrow_type]
41
+ elif arrow_type in PA_TO_PXT_TYPES:
42
+ return PA_TO_PXT_TYPES[arrow_type]
44
43
  elif isinstance(arrow_type, pa.FixedShapeTensorType):
45
44
  dtype = to_pixeltable_type(arrow_type.value_type)
46
45
  if dtype is None:
@@ -54,8 +53,8 @@ def to_arrow_type(pixeltable_type: ts.ColumnType) -> Optional[pa.DataType]:
54
53
  """Convert a pixeltable DataType to a pyarrow datatype if one is defined.
55
54
  Returns None if no conversion is currently implemented.
56
55
  """
57
- if pixeltable_type.__class__ in _pt_to_pa:
58
- return _pt_to_pa[pixeltable_type.__class__]
56
+ if pixeltable_type.__class__ in PXT_TO_PA_TYPES:
57
+ return PXT_TO_PA_TYPES[pixeltable_type.__class__]
59
58
  elif isinstance(pixeltable_type, ts.ArrayType):
60
59
  return pa.fixed_shape_tensor(pa.from_numpy_dtype(pixeltable_type.numpy_dtype()), pixeltable_type.shape)
61
60
  else:
pixeltable/utils/coco.py CHANGED
@@ -22,6 +22,7 @@ Required format:
22
22
  }
23
23
  """
24
24
 
25
+
25
26
  def _verify_input_dict(input_dict: dict[str, Any]) -> None:
26
27
  """Verify that input_dict is a valid input dict for write_coco_dataset()"""
27
28
  if not isinstance(input_dict, dict):
@@ -30,7 +31,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
30
31
  raise excs.Error(f'Missing key "image" in input dict: {input_dict}{format_msg}')
31
32
  if not isinstance(input_dict['image'], PIL.Image.Image):
32
33
  raise excs.Error(f'Value for "image" is not a PIL.Image.Image: {input_dict}{format_msg}')
33
- if 'annotations' not in input_dict:
34
+ if 'annotations' not in input_dict:
34
35
  raise excs.Error(f'Missing key "annotations" in input dict: {input_dict}{format_msg}')
35
36
  if not isinstance(input_dict['annotations'], list):
36
37
  raise excs.Error(f'Value for "annotations" is not a list: {input_dict}{format_msg}')
@@ -48,6 +49,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
48
49
  if not isinstance(annotation['category'], (str, int)):
49
50
  raise excs.Error(f'Value for "category" is not a str or int: {annotation}{format_msg}')
50
51
 
52
+
51
53
  def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
52
54
  """Export a DataFrame result set as a COCO dataset in dest_path and return the path of the data.json file."""
53
55
  # TODO: validate schema
@@ -96,12 +98,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
96
98
  img_path = images_dir / f'{img_id}.jpg'
97
99
  img.save(img_path)
98
100
 
99
- images.append({
100
- 'id': img_id,
101
- 'file_name': str(img_path),
102
- 'width': img.width,
103
- 'height': img.height,
104
- })
101
+ images.append({'id': img_id, 'file_name': str(img_path), 'width': img.width, 'height': img.height})
105
102
 
106
103
  # create annotation records for this image
107
104
  for annotation in input_dict['annotations']:
@@ -109,15 +106,17 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
109
106
  x, y, w, h = annotation['bbox']
110
107
  category = annotation['category']
111
108
  categories.add(category)
112
- annotations.append({
113
- 'id': ann_id,
114
- 'image_id': img_id,
115
- # we use the category name here and fix it up at the end, when we have assigned category ids
116
- 'category_id': category,
117
- 'bbox': annotation['bbox'],
118
- 'area': w * h,
119
- 'iscrowd': 0,
120
- })
109
+ annotations.append(
110
+ {
111
+ 'id': ann_id,
112
+ 'image_id': img_id,
113
+ # we use the category name here and fix it up at the end, when we have assigned category ids
114
+ 'category_id': category,
115
+ 'bbox': annotation['bbox'],
116
+ 'area': w * h,
117
+ 'iscrowd': 0,
118
+ }
119
+ )
121
120
 
122
121
  # replace category names with ids
123
122
  category_ids = {category: id for id, category in enumerate(sorted(list(categories)))}
@@ -226,5 +225,5 @@ COCO_2017_CATEGORIES = {
226
225
  87: 'scissors',
227
226
  88: 'teddy bear',
228
227
  89: 'hair drier',
229
- 90: 'toothbrush'
228
+ 90: 'toothbrush',
230
229
  }
pixeltable/utils/code.py CHANGED
@@ -3,9 +3,9 @@ from typing import Optional
3
3
 
4
4
  from pixeltable.func import Function
5
5
 
6
-
7
6
  # Utilities related to the organization of the Pixeltable codebase.
8
7
 
8
+
9
9
  def local_public_names(mod_name: str, exclude: Optional[list[str]] = None) -> list[str]:
10
10
  """
11
11
  Returns a list of all functions and submodules that are local to the specified module and are
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
 
3
+
3
4
  def map_level(verbosity: int) -> int:
4
5
  """
5
6
  Map verbosity level to logging level.
@@ -19,6 +20,7 @@ def map_level(verbosity: int) -> int:
19
20
  return logging.DEBUG
20
21
  return logging.INFO
21
22
 
23
+
22
24
  class ConsoleOutputHandler(logging.StreamHandler):
23
25
  def __init__(self, stream):
24
26
  super().__init__(stream)
@@ -29,13 +31,14 @@ class ConsoleOutputHandler(logging.StreamHandler):
29
31
  else:
30
32
  self.stream.write(record.msg + '\n')
31
33
 
34
+
32
35
  class ConsoleMessageFilter(logging.Filter):
33
36
  def filter(self, record: logging.LogRecord) -> bool:
34
37
  if hasattr(record, 'user_visible') and record.user_visible:
35
38
  return True
36
39
  return False
37
40
 
38
- class ConsoleLogger(logging.LoggerAdapter):
39
- def __init__(self, logger:logging.Logger):
40
- super().__init__(logger, extra={'user_visible' : True})
41
41
 
42
+ class ConsoleLogger(logging.LoggerAdapter):
43
+ def __init__(self, logger: logging.Logger):
44
+ super().__init__(logger, extra={'user_visible': True})
@@ -25,6 +25,7 @@ class DescriptionHelper:
25
25
  DescriptionHelper can convert a list of descriptors into either HTML or plaintext and do something reasonable
26
26
  in each case.
27
27
  """
28
+
28
29
  __descriptors: list[_Descriptor]
29
30
 
30
31
  def __init__(self) -> None:
@@ -69,18 +70,17 @@ class DescriptionHelper:
69
70
  return (
70
71
  # Render the string as a single-cell DataFrame. This will ensure a consistent style of output in
71
72
  # cases where strings appear alongside DataFrames in the same DescriptionHelper.
72
- pd.DataFrame([descriptor.body]).style
73
- .set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
74
- .hide(axis='index').hide(axis='columns')
73
+ pd.DataFrame([descriptor.body])
74
+ .style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
75
+ .hide(axis='index')
76
+ .hide(axis='columns')
75
77
  )
76
78
  else:
77
79
  styler = descriptor.styler
78
80
  if styler is None:
79
81
  styler = descriptor.body.style
80
- styler = (
81
- styler
82
- .set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
83
- .set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
82
+ styler = styler.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'}).set_table_styles(
83
+ [dict(selector='th', props=[('text-align', 'left')])]
84
84
  )
85
85
  if not descriptor.show_header:
86
86
  styler = styler.hide(axis='columns')
@@ -83,6 +83,7 @@ def get_xml_handle(path: str) -> Optional[bs4.BeautifulSoup]:
83
83
  def get_markdown_handle(path: str) -> Optional[dict]:
84
84
  Env.get().require_package('mistune', [3, 0])
85
85
  import mistune
86
+
86
87
  try:
87
88
  with open(path, encoding='utf8') as file:
88
89
  text = file.read()
@@ -91,9 +92,10 @@ def get_markdown_handle(path: str) -> Optional[dict]:
91
92
  except Exception:
92
93
  return None
93
94
 
95
+
94
96
  def get_txt(path: str) -> Optional[str]:
95
97
  try:
96
- with open(path, "r") as f:
98
+ with open(path, 'r') as f:
97
99
  doc = f.read()
98
100
  return doc if doc != '' else None
99
101
  except Exception:
@@ -17,9 +17,9 @@ from pixeltable.env import Env
17
17
 
18
18
  _logger = logging.getLogger('pixeltable')
19
19
 
20
+
20
21
  @dataclass
21
22
  class CacheEntry:
22
-
23
23
  key: str
24
24
  tbl_id: UUID
25
25
  col_id: int
@@ -56,6 +56,7 @@ class FileCache:
56
56
  TODO:
57
57
  - implement MRU eviction for queries that exceed the capacity
58
58
  """
59
+
59
60
  __instance: Optional[FileCache] = None
60
61
 
61
62
  cache: OrderedDict[str, CacheEntry]
@@ -79,8 +80,7 @@ class FileCache:
79
80
 
80
81
  FileCacheColumnStats = namedtuple('FileCacheColumnStats', ('tbl_id', 'col_id', 'num_files', 'total_size'))
81
82
  FileCacheStats = namedtuple(
82
- 'FileCacheStats',
83
- ('total_size', 'num_requests', 'num_hits', 'num_evictions', 'column_stats')
83
+ 'FileCacheStats', ('total_size', 'num_requests', 'num_hits', 'num_evictions', 'column_stats')
84
84
  )
85
85
 
86
86
  @classmethod
@@ -154,7 +154,7 @@ class FileCache:
154
154
  f'Consider increasing the cache size to at least {round(suggested_cache_size / (1 << 30), 1)} GiB '
155
155
  f'(it is currently {round(self.capacity_bytes / (1 << 30), 1)} GiB).\n'
156
156
  f'You can do this by setting the value of `file_cache_size_g` in: {str(Env.get()._config_file)}',
157
- excs.PixeltableWarning
157
+ excs.PixeltableWarning,
158
158
  )
159
159
  self.new_redownload_witnessed = False
160
160
 
@@ -195,7 +195,9 @@ class FileCache:
195
195
  self.evicted_working_set_keys.add(key)
196
196
  self.new_redownload_witnessed = True
197
197
  self.keys_retrieved.add(key)
198
- entry = CacheEntry(key, tbl_id, col_id, file_info.st_size, datetime.fromtimestamp(file_info.st_mtime), path.suffix)
198
+ entry = CacheEntry(
199
+ key, tbl_id, col_id, file_info.st_size, datetime.fromtimestamp(file_info.st_mtime), path.suffix
200
+ )
199
201
  self.cache[key] = entry
200
202
  self.total_size += entry.size
201
203
  new_path = entry.path
@@ -217,7 +219,9 @@ class FileCache:
217
219
  # Make a record of the eviction, so that we can generate a warning later if the key is retrieved again.
218
220
  self.keys_evicted_after_retrieval.add(lru_entry.key)
219
221
  os.remove(str(lru_entry.path))
220
- _logger.debug(f'evicted entry for cell {lru_entry.key} from file cache (of size {lru_entry.size // (1 << 20)} MiB)')
222
+ _logger.debug(
223
+ f'evicted entry for cell {lru_entry.key} from file cache (of size {lru_entry.size // (1 << 20)} MiB)'
224
+ )
221
225
 
222
226
  def set_capacity(self, capacity_bytes: int) -> None:
223
227
  self.capacity_bytes = capacity_bytes
@@ -232,7 +236,8 @@ class FileCache:
232
236
  t[0] += 1
233
237
  t[1] += entry.size
234
238
  col_stats = [
235
- self.FileCacheColumnStats(tbl_id, col_id, num_files, size) for (tbl_id, col_id), (num_files, size) in d.items()
239
+ self.FileCacheColumnStats(tbl_id, col_id, num_files, size)
240
+ for (tbl_id, col_id), (num_files, size) in d.items()
236
241
  ]
237
242
  col_stats.sort(key=lambda e: e[3], reverse=True)
238
243
  return self.FileCacheStats(self.total_size, self.num_requests, self.num_hits, self.num_evictions, col_stats)