pixeltable 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (60) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +1 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +14 -14
  11. pixeltable/env.py +20 -3
  12. pixeltable/exec/component_iteration_node.py +1 -2
  13. pixeltable/exec/expr_eval/evaluators.py +4 -2
  14. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  15. pixeltable/exprs/comparison.py +8 -4
  16. pixeltable/exprs/data_row.py +5 -3
  17. pixeltable/exprs/expr.py +9 -2
  18. pixeltable/exprs/function_call.py +155 -313
  19. pixeltable/func/aggregate_function.py +29 -15
  20. pixeltable/func/callable_function.py +11 -8
  21. pixeltable/func/expr_template_function.py +3 -9
  22. pixeltable/func/function.py +148 -74
  23. pixeltable/func/signature.py +65 -30
  24. pixeltable/func/udf.py +1 -1
  25. pixeltable/functions/__init__.py +1 -0
  26. pixeltable/functions/deepseek.py +121 -0
  27. pixeltable/functions/image.py +7 -7
  28. pixeltable/functions/openai.py +49 -10
  29. pixeltable/functions/video.py +14 -7
  30. pixeltable/globals.py +14 -3
  31. pixeltable/index/embedding_index.py +4 -13
  32. pixeltable/io/globals.py +88 -77
  33. pixeltable/io/hf_datasets.py +34 -34
  34. pixeltable/io/pandas.py +75 -87
  35. pixeltable/io/parquet.py +19 -27
  36. pixeltable/io/utils.py +115 -0
  37. pixeltable/iterators/audio.py +2 -1
  38. pixeltable/iterators/video.py +1 -1
  39. pixeltable/metadata/__init__.py +2 -1
  40. pixeltable/metadata/converters/convert_15.py +18 -8
  41. pixeltable/metadata/converters/convert_27.py +31 -0
  42. pixeltable/metadata/converters/convert_28.py +15 -0
  43. pixeltable/metadata/converters/convert_29.py +111 -0
  44. pixeltable/metadata/converters/util.py +12 -1
  45. pixeltable/metadata/notes.py +3 -0
  46. pixeltable/metadata/schema.py +8 -0
  47. pixeltable/share/__init__.py +1 -0
  48. pixeltable/share/packager.py +246 -0
  49. pixeltable/share/publish.py +97 -0
  50. pixeltable/type_system.py +87 -42
  51. pixeltable/utils/__init__.py +41 -0
  52. pixeltable/utils/arrow.py +45 -12
  53. pixeltable/utils/formatter.py +1 -1
  54. pixeltable/utils/iceberg.py +14 -0
  55. pixeltable/utils/media_store.py +1 -1
  56. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/METADATA +37 -50
  57. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/RECORD +60 -51
  58. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
  59. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
  60. {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
pixeltable/type_system.py CHANGED
@@ -8,12 +8,9 @@ import json
8
8
  import typing
9
9
  import urllib.parse
10
10
  import urllib.request
11
- from pathlib import Path
11
+ from typing import Any, Iterable, Literal, Mapping, Optional, Sequence, Union
12
12
 
13
- from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
14
- from typing import Any, Iterable, Mapping, Optional, Sequence, Union
15
-
16
- import av # type: ignore
13
+ import av
17
14
  import jsonschema
18
15
  import jsonschema.protocols
19
16
  import jsonschema.validators
@@ -24,6 +21,9 @@ import sqlalchemy as sql
24
21
  from typing_extensions import _AnnotatedAlias
25
22
 
26
23
  import pixeltable.exceptions as excs
24
+ from pixeltable.utils import parse_local_file_path
25
+
26
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
27
27
 
28
28
 
29
29
  class ColumnType:
@@ -47,8 +47,8 @@ class ColumnType:
47
47
  @classmethod
48
48
  def supertype(
49
49
  cls,
50
- type1: 'ColumnType.Type',
51
- type2: 'ColumnType.Type',
50
+ type1: Optional['ColumnType.Type'],
51
+ type2: Optional['ColumnType.Type'],
52
52
  # we need to pass this in because we can't easily append it as a class member
53
53
  common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
54
54
  ) -> Optional['ColumnType.Type']:
@@ -93,6 +93,9 @@ class ColumnType:
93
93
  self._type = t
94
94
  self._nullable = nullable
95
95
 
96
+ def has_supertype(self) -> bool:
97
+ return True
98
+
96
99
  @property
97
100
  def nullable(self) -> bool:
98
101
  return self._nullable
@@ -213,9 +216,9 @@ class ColumnType:
213
216
  return self.copy(nullable=(self.nullable or other.nullable))
214
217
 
215
218
  if self.is_invalid_type():
216
- return other
219
+ return other.copy(nullable=(self.nullable or other.nullable))
217
220
  if other.is_invalid_type():
218
- return self
221
+ return self.copy(nullable=(self.nullable or other.nullable))
219
222
 
220
223
  if self.is_scalar_type() and other.is_scalar_type():
221
224
  t = self.Type.supertype(self._type, other._type, self.common_supertypes)
@@ -271,8 +274,10 @@ class ColumnType:
271
274
  inferred_type = val_type
272
275
  else:
273
276
  inferred_type = inferred_type.supertype(val_type)
274
- if inferred_type is None:
275
- return None
277
+ if inferred_type is None:
278
+ return None
279
+ if not inferred_type.has_supertype():
280
+ return inferred_type
276
281
  return inferred_type
277
282
 
278
283
  @classmethod
@@ -292,26 +297,24 @@ class ColumnType:
292
297
  designations will be allowed regardless.
293
298
  """
294
299
  origin = typing.get_origin(t)
300
+ type_args = typing.get_args(t)
295
301
  if origin is typing.Union:
296
302
  # Check if `t` has the form Optional[T].
297
- union_args = typing.get_args(t)
298
- if len(union_args) == 2 and type(None) in union_args:
303
+ if len(type_args) == 2 and type(None) in type_args:
299
304
  # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
300
305
  # We treat it as the underlying type but with nullable=True.
301
- underlying_py_type = union_args[0] if union_args[1] is type(None) else union_args[1]
306
+ underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
302
307
  underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
303
308
  if underlying is not None:
304
309
  return underlying.copy(nullable=True)
305
310
  elif origin is Required:
306
- required_args = typing.get_args(t)
307
- assert len(required_args) == 1
311
+ assert len(type_args) == 1
308
312
  return cls.from_python_type(
309
- required_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
310
- )
313
+ type_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
314
+ ).copy(nullable=False)
311
315
  elif origin is typing.Annotated:
312
- annotated_args = typing.get_args(t)
313
- origin = annotated_args[0]
314
- parameters = annotated_args[1]
316
+ origin = type_args[0]
317
+ parameters = type_args[1]
315
318
  if isinstance(parameters, ColumnType):
316
319
  return parameters.copy(nullable=nullable_default)
317
320
  else:
@@ -323,6 +326,11 @@ class ColumnType:
323
326
  if isinstance(t, type) and issubclass(t, _PxtType):
324
327
  return t.as_col_type(nullable=nullable_default)
325
328
  elif allow_builtin_types:
329
+ if t is Literal and len(type_args) > 0:
330
+ literal_type = cls.infer_common_literal_type(type_args)
331
+ if literal_type is None:
332
+ return None
333
+ return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
326
334
  if t is str:
327
335
  return StringType(nullable=nullable_default)
328
336
  if t is int:
@@ -335,7 +343,7 @@ class ColumnType:
335
343
  return TimestampType(nullable=nullable_default)
336
344
  if t is PIL.Image.Image:
337
345
  return ImageType(nullable=nullable_default)
338
- if issubclass(t, Sequence) or issubclass(t, Mapping) or issubclass(t, pydantic.BaseModel):
346
+ if isinstance(t, type) and issubclass(t, (Sequence, Mapping, pydantic.BaseModel)):
339
347
  return JsonType(nullable=nullable_default)
340
348
  return None
341
349
 
@@ -394,12 +402,9 @@ class ColumnType:
394
402
  def _validate_file_path(self, val: Any) -> None:
395
403
  """Raises TypeError if not a valid local file path or not a path/byte sequence"""
396
404
  if isinstance(val, str):
397
- parsed = urllib.parse.urlparse(val)
398
- if parsed.scheme != '' and parsed.scheme != 'file':
399
- return
400
- path = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed.path)))
401
- if not path.is_file():
402
- raise TypeError(f'File not found: {str(path)}')
405
+ path = parse_local_file_path(val)
406
+ if path is not None and not path.is_file():
407
+ raise TypeError(f'File not found: {path}')
403
408
  else:
404
409
  if not isinstance(val, bytes):
405
410
  raise TypeError(f'expected file path or bytes, got {type(val)}')
@@ -492,7 +497,7 @@ class InvalidType(ColumnType):
492
497
  super().__init__(self.Type.INVALID, nullable=nullable)
493
498
 
494
499
  def to_sa_type(self) -> sql.types.TypeEngine:
495
- assert False
500
+ return sql.types.NullType()
496
501
 
497
502
  def print_value(self, val: Any) -> str:
498
503
  return str(val)
@@ -505,6 +510,9 @@ class StringType(ColumnType):
505
510
  def __init__(self, nullable: bool = False):
506
511
  super().__init__(self.Type.STRING, nullable=nullable)
507
512
 
513
+ def has_supertype(self):
514
+ return not self.nullable
515
+
508
516
  def to_sa_type(self) -> sql.types.TypeEngine:
509
517
  return sql.String()
510
518
 
@@ -588,6 +596,9 @@ class TimestampType(ColumnType):
588
596
  def __init__(self, nullable: bool = False):
589
597
  super().__init__(self.Type.TIMESTAMP, nullable=nullable)
590
598
 
599
+ def has_supertype(self):
600
+ return not self.nullable
601
+
591
602
  def to_sa_type(self) -> sql.types.TypeEngine:
592
603
  return sql.TIMESTAMP(timezone=True)
593
604
 
@@ -598,6 +609,8 @@ class TimestampType(ColumnType):
598
609
  def _create_literal(self, val: Any) -> Any:
599
610
  if isinstance(val, str):
600
611
  return datetime.datetime.fromisoformat(val)
612
+ if isinstance(val, datetime.datetime):
613
+ return val
601
614
  return val
602
615
 
603
616
 
@@ -648,6 +661,10 @@ class JsonType(ColumnType):
648
661
  return val_type.print_value(val)
649
662
 
650
663
  def _validate_literal(self, val: Any) -> None:
664
+ if isinstance(val, tuple):
665
+ val = list(val)
666
+ if isinstance(val, pydantic.BaseModel):
667
+ val = val.model_dump()
651
668
  if not self.__is_valid_json(val):
652
669
  raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
653
670
  if self.__validator is not None:
@@ -815,14 +832,20 @@ class ArrayType(ColumnType):
815
832
  return hash((self._type, self.nullable, self.shape, self.dtype))
816
833
 
817
834
  def supertype(self, other: ColumnType) -> Optional[ArrayType]:
835
+ basic_supertype = super().supertype(other)
836
+ if basic_supertype is not None:
837
+ assert isinstance(basic_supertype, ArrayType)
838
+ return basic_supertype
839
+
818
840
  if not isinstance(other, ArrayType):
819
841
  return None
842
+
820
843
  super_dtype = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
821
844
  if super_dtype is None:
822
845
  # if the dtypes are incompatible, then the supertype is a fully general array
823
846
  return ArrayType(nullable=(self.nullable or other.nullable))
824
847
  super_shape: Optional[tuple[Optional[int], ...]]
825
- if len(self.shape) != len(other.shape):
848
+ if self.shape is None or other.shape is None or len(self.shape) != len(other.shape):
826
849
  super_shape = None
827
850
  else:
828
851
  super_shape = tuple(n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape))
@@ -851,23 +874,39 @@ class ArrayType(ColumnType):
851
874
  dtype = None if d['dtype'] is None else cls.make_type(cls.Type(d['dtype']))
852
875
  return cls(shape, dtype, nullable=d['nullable'])
853
876
 
877
+ @classmethod
878
+ def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> Optional[ColumnType]:
879
+ """
880
+ Return pixeltable type corresponding to a given simple numpy dtype
881
+ """
882
+ if np.issubdtype(dtype, np.integer):
883
+ return IntType(nullable=nullable)
884
+
885
+ if np.issubdtype(dtype, np.floating):
886
+ return FloatType(nullable=nullable)
887
+
888
+ if dtype == np.bool_:
889
+ return BoolType(nullable=nullable)
890
+
891
+ if np.issubdtype(dtype, np.str_):
892
+ return StringType(nullable=nullable)
893
+
894
+ if np.issubdtype(dtype, np.character):
895
+ return StringType(nullable=nullable)
896
+
897
+ if np.issubdtype(dtype, np.datetime64):
898
+ return TimestampType(nullable=nullable)
899
+
900
+ return None
901
+
854
902
  @classmethod
855
903
  def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
856
904
  # determine our dtype
857
905
  assert isinstance(val, np.ndarray)
858
- dtype: ColumnType
859
- if np.issubdtype(val.dtype, np.integer):
860
- dtype = IntType()
861
- elif np.issubdtype(val.dtype, np.floating):
862
- dtype = FloatType()
863
- elif val.dtype == np.bool_:
864
- dtype = BoolType()
865
- elif np.issubdtype(val.dtype, np.str_):
866
- # Note that this includes NumPy types like '<U1' -- arrays of single Unicode characters
867
- dtype = StringType()
868
- else:
906
+ pxttype: Optional[ColumnType] = cls.from_np_dtype(val.dtype, nullable)
907
+ if pxttype == None:
869
908
  return None
870
- return cls(val.shape, dtype=dtype, nullable=nullable)
909
+ return cls(val.shape, dtype=pxttype, nullable=nullable)
871
910
 
872
911
  def is_valid_literal(self, val: np.ndarray) -> bool:
873
912
  if not isinstance(val, np.ndarray):
@@ -990,8 +1029,14 @@ class ImageType(ColumnType):
990
1029
  return hash((self._type, self.nullable, self.size, self.mode))
991
1030
 
992
1031
  def supertype(self, other: ColumnType) -> Optional[ImageType]:
1032
+ basic_supertype = super().supertype(other)
1033
+ if basic_supertype is not None:
1034
+ assert isinstance(basic_supertype, ImageType)
1035
+ return basic_supertype
1036
+
993
1037
  if not isinstance(other, ImageType):
994
1038
  return None
1039
+
995
1040
  width = self.width if self.width == other.width else None
996
1041
  height = self.height if self.height == other.height else None
997
1042
  mode = self.mode if self.mode == other.mode else None
@@ -1,3 +1,10 @@
1
+ import hashlib
2
+ import urllib.parse
3
+ import urllib.request
4
+ from pathlib import Path
5
+ from typing import Optional, Union
6
+
7
+
1
8
  def print_perf_counter_delta(delta: float) -> str:
2
9
  """Prints a performance counter delta in a human-readable format.
3
10
 
@@ -15,3 +22,37 @@ def print_perf_counter_delta(delta: float) -> str:
15
22
  return f'{delta * 1e3:.2f} ms'
16
23
  else:
17
24
  return f'{delta:.2f} s'
25
+
26
+
27
+ def sha256sum(path: Union[Path, str]) -> str:
28
+ """
29
+ Compute the SHA256 hash of a file.
30
+ """
31
+ if isinstance(path, str):
32
+ path = Path(path)
33
+
34
+ h = hashlib.sha256()
35
+ with open(path, 'rb') as file:
36
+ while chunk := file.read(h.block_size):
37
+ h.update(chunk)
38
+
39
+ return h.hexdigest()
40
+
41
+
42
+ def parse_local_file_path(file_or_url: str) -> Optional[Path]:
43
+ """
44
+ Parses a string that may be either a URL or a local file path.
45
+
46
+ If the string is a local file path or a file-scheme URL (file://), then a Path object will be returned.
47
+ Otherwise, None will be returned.
48
+ """
49
+ parsed = urllib.parse.urlparse(file_or_url)
50
+ if len(parsed.scheme) <= 1:
51
+ # We're using `urlparse` to help distinguish file paths from URLs. If there is no scheme, then it's a file path.
52
+ # If there's a single-character scheme, we also interpret this as a file path; this insures that drive letters
53
+ # on Windows pathnames are correctly handled.
54
+ return Path(file_or_url).absolute()
55
+ elif parsed.scheme == 'file':
56
+ return Path(urllib.parse.unquote(urllib.request.url2pathname(parsed.path)))
57
+ else:
58
+ return None
pixeltable/utils/arrow.py CHANGED
@@ -6,8 +6,10 @@ import pyarrow as pa
6
6
 
7
7
  import pixeltable.type_system as ts
8
8
 
9
- _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
9
+ PA_TO_PXT_TYPES: dict[pa.DataType, ts.ColumnType] = {
10
10
  pa.string(): ts.StringType(nullable=True),
11
+ pa.large_string(): ts.StringType(nullable=True),
12
+ pa.timestamp('us', tz=datetime.timezone.utc): ts.TimestampType(nullable=True),
11
13
  pa.bool_(): ts.BoolType(nullable=True),
12
14
  pa.uint8(): ts.IntType(nullable=True),
13
15
  pa.int8(): ts.IntType(nullable=True),
@@ -16,9 +18,10 @@ _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
16
18
  pa.int32(): ts.IntType(nullable=True),
17
19
  pa.int64(): ts.IntType(nullable=True),
18
20
  pa.float32(): ts.FloatType(nullable=True),
21
+ pa.float64(): ts.FloatType(nullable=True),
19
22
  }
20
23
 
21
- _pt_to_pa: dict[type[ts.ColumnType], pa.DataType] = {
24
+ PXT_TO_PA_TYPES: dict[type[ts.ColumnType], pa.DataType] = {
22
25
  ts.StringType: pa.string(),
23
26
  ts.TimestampType: pa.timestamp('us', tz=datetime.timezone.utc), # postgres timestamp is microseconds
24
27
  ts.BoolType: pa.bool_(),
@@ -32,19 +35,20 @@ _pt_to_pa: dict[type[ts.ColumnType], pa.DataType] = {
32
35
  }
33
36
 
34
37
 
35
- def to_pixeltable_type(arrow_type: pa.DataType) -> Optional[ts.ColumnType]:
38
+ def to_pixeltable_type(arrow_type: pa.DataType, nullable: bool) -> Optional[ts.ColumnType]:
36
39
  """Convert a pyarrow DataType to a pixeltable ColumnType if one is defined.
37
40
  Returns None if no conversion is currently implemented.
38
41
  """
39
42
  if isinstance(arrow_type, pa.TimestampType):
40
- return ts.TimestampType(nullable=True)
41
- elif arrow_type in _pa_to_pt:
42
- return _pa_to_pt[arrow_type]
43
+ return ts.TimestampType(nullable=nullable)
44
+ elif arrow_type in PA_TO_PXT_TYPES:
45
+ pt = PA_TO_PXT_TYPES[arrow_type]
46
+ return pt.copy(nullable=nullable)
43
47
  elif isinstance(arrow_type, pa.FixedShapeTensorType):
44
- dtype = to_pixeltable_type(arrow_type.value_type)
48
+ dtype = to_pixeltable_type(arrow_type.value_type, nullable)
45
49
  if dtype is None:
46
50
  return None
47
- return ts.ArrayType(shape=arrow_type.shape, dtype=dtype)
51
+ return ts.ArrayType(shape=arrow_type.shape, dtype=dtype, nullable=nullable)
48
52
  else:
49
53
  return None
50
54
 
@@ -53,16 +57,25 @@ def to_arrow_type(pixeltable_type: ts.ColumnType) -> Optional[pa.DataType]:
53
57
  """Convert a pixeltable DataType to a pyarrow datatype if one is defined.
54
58
  Returns None if no conversion is currently implemented.
55
59
  """
56
- if pixeltable_type.__class__ in _pt_to_pa:
57
- return _pt_to_pa[pixeltable_type.__class__]
60
+ if pixeltable_type.__class__ in PXT_TO_PA_TYPES:
61
+ return PXT_TO_PA_TYPES[pixeltable_type.__class__]
58
62
  elif isinstance(pixeltable_type, ts.ArrayType):
59
63
  return pa.fixed_shape_tensor(pa.from_numpy_dtype(pixeltable_type.numpy_dtype()), pixeltable_type.shape)
60
64
  else:
61
65
  return None
62
66
 
63
67
 
64
- def to_pixeltable_schema(arrow_schema: pa.Schema) -> dict[str, ts.ColumnType]:
65
- return {field.name: to_pixeltable_type(field.type) for field in arrow_schema}
68
+ def ar_infer_schema(
69
+ arrow_schema: pa.Schema, schema_overrides: dict[str, Any], primary_key: list[str]
70
+ ) -> dict[str, ts.ColumnType]:
71
+ """Convert a pyarrow Schema to a schema using pyarrow names and pixeltable types."""
72
+ ar_schema = {
73
+ field.name: to_pixeltable_type(field.type, field.name not in primary_key)
74
+ if field.name not in schema_overrides
75
+ else schema_overrides[field.name]
76
+ for field in arrow_schema
77
+ }
78
+ return ar_schema
66
79
 
67
80
 
68
81
  def to_arrow_schema(pixeltable_schema: dict[str, Any]) -> pa.Schema:
@@ -96,3 +109,23 @@ def iter_tuples(batch: Union[pa.Table, pa.RecordBatch]) -> Iterator[dict[str, An
96
109
 
97
110
  for i in range(batch_size):
98
111
  yield {col_name: values[i] for col_name, values in pydict.items()}
112
+
113
+
114
+ def iter_tuples2(
115
+ batch: Union[pa.Table, pa.RecordBatch], col_mapping: Optional[dict[str, str]], schema: dict[str, ts.ColumnType]
116
+ ) -> Iterator[dict[str, Any]]:
117
+ """Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
118
+ pydict = to_pydict(batch)
119
+ assert len(pydict) > 0, 'empty record batch'
120
+ for _, v in pydict.items():
121
+ batch_size = len(v)
122
+ break
123
+
124
+ for i in range(batch_size):
125
+ # Convert a row to insertable format
126
+ yield {
127
+ (pxt_name := col_name if col_mapping is None else col_mapping[col_name]): schema[pxt_name].create_literal(
128
+ values[i]
129
+ )
130
+ for col_name, values in pydict.items()
131
+ }
@@ -6,7 +6,7 @@ import logging
6
6
  import mimetypes
7
7
  from typing import Any, Callable, Optional
8
8
 
9
- import av # type: ignore[import-untyped]
9
+ import av
10
10
  import numpy as np
11
11
  import PIL
12
12
  import PIL.Image as Image
@@ -0,0 +1,14 @@
1
+ from pathlib import Path
2
+ from typing import Union
3
+
4
+ from pyiceberg.catalog.sql import SqlCatalog
5
+
6
+
7
+ def sqlite_catalog(warehouse_path: Union[str, Path], name: str = 'pixeltable') -> SqlCatalog:
8
+ """
9
+ Instantiate a sqlite Iceberg catalog at the specified path. If no catalog exists, one will be created.
10
+ """
11
+ if isinstance(warehouse_path, str):
12
+ warehouse_path = Path(warehouse_path)
13
+ warehouse_path.mkdir(exist_ok=True)
14
+ return SqlCatalog(name, uri=f'sqlite:///{warehouse_path}/catalog.db', warehouse=f'file://{warehouse_path}')
@@ -30,7 +30,7 @@ class MediaStore:
30
30
  the environment's media_dir.
31
31
  """
32
32
  id_hex = uuid.uuid4().hex
33
- parent = Env.get().media_dir / tbl_id.hex / id_hex[0:2] / id_hex[0:4]
33
+ parent = Env.get().media_dir / tbl_id.hex / id_hex[:2] / id_hex[:4]
34
34
  parent.mkdir(parents=True, exist_ok=True)
35
35
  return parent / f'{tbl_id.hex}_{col_id}_{version}_{id_hex}{ext or ""}'
36
36