pixeltable 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +8 -22
  4. pixeltable/catalog/insertable_table.py +26 -8
  5. pixeltable/catalog/table.py +179 -83
  6. pixeltable/catalog/table_version.py +13 -39
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/catalog/view.py +2 -2
  9. pixeltable/dataframe.py +20 -28
  10. pixeltable/env.py +2 -0
  11. pixeltable/exec/cache_prefetch_node.py +189 -43
  12. pixeltable/exec/data_row_batch.py +3 -3
  13. pixeltable/exec/exec_context.py +2 -2
  14. pixeltable/exec/exec_node.py +2 -2
  15. pixeltable/exec/expr_eval_node.py +8 -8
  16. pixeltable/exprs/arithmetic_expr.py +9 -4
  17. pixeltable/exprs/column_ref.py +4 -0
  18. pixeltable/exprs/comparison.py +5 -0
  19. pixeltable/exprs/json_path.py +1 -1
  20. pixeltable/func/aggregate_function.py +8 -8
  21. pixeltable/func/expr_template_function.py +6 -5
  22. pixeltable/func/udf.py +6 -11
  23. pixeltable/functions/huggingface.py +136 -25
  24. pixeltable/functions/llama_cpp.py +3 -2
  25. pixeltable/functions/mistralai.py +1 -1
  26. pixeltable/functions/openai.py +1 -1
  27. pixeltable/functions/together.py +1 -1
  28. pixeltable/functions/util.py +5 -2
  29. pixeltable/globals.py +55 -6
  30. pixeltable/plan.py +1 -1
  31. pixeltable/tool/create_test_db_dump.py +1 -1
  32. pixeltable/type_system.py +83 -35
  33. pixeltable/utils/coco.py +5 -5
  34. pixeltable/utils/formatter.py +3 -3
  35. pixeltable/utils/s3.py +6 -3
  36. {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/METADATA +119 -46
  37. {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/RECORD +40 -40
  38. {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
  39. {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
  40. {pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py CHANGED
@@ -123,7 +123,8 @@ def create_view(
123
123
  additional_columns: If specified, will add these columns to the view once it is created. The format
124
124
  of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
125
125
  [`create_table`][pixeltable.create_table].
126
- is_snapshot: Whether the view is a snapshot.
126
+ is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
127
+ [`create_snapshot`][pixeltable.create_snapshot].
127
128
  iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
128
129
  the base table.
129
130
  num_retained_versions: Number of versions of the view to retain.
@@ -142,11 +143,6 @@ def create_view(
142
143
 
143
144
  >>> tbl = pxt.get_table('my_table')
144
145
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
145
-
146
- Create a snapshot of `my_table`:
147
-
148
- >>> tbl = pxt.get_table('my_table')
149
- ... snapshot_view = pxt.create_view('my_snapshot_view', tbl, is_snapshot=True)
150
146
  """
151
147
  where: Optional[exprs.Expr] = None
152
148
  if isinstance(base, catalog.Table):
@@ -186,6 +182,59 @@ def create_view(
186
182
  return view
187
183
 
188
184
 
185
+ def create_snapshot(
186
+ path_str: str,
187
+ base: Union[catalog.Table, DataFrame],
188
+ *,
189
+ additional_columns: Optional[dict[str, Any]] = None,
190
+ iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
191
+ num_retained_versions: int = 10,
192
+ comment: str = '',
193
+ media_validation: Literal['on_read', 'on_write'] = 'on_write',
194
+ ignore_errors: bool = False,
195
+ ) -> Optional[catalog.Table]:
196
+ """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
197
+
198
+ Args:
199
+ path_str: A name for the snapshot; can be either a simple name such as `my_snapshot`, or a pathname such as
200
+ `dir1.my_snapshot`.
201
+ base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
202
+ base the snapshot on.
203
+ additional_columns: If specified, will add these columns to the snapshot once it is created. The format
204
+ of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
205
+ [`create_table`][pixeltable.create_table].
206
+ iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
207
+ the base table.
208
+ num_retained_versions: Number of versions of the view to retain.
209
+ comment: Optional comment for the view.
210
+ ignore_errors: if True, fail silently if the path already exists or is invalid.
211
+
212
+ Returns:
213
+ A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot. If the path already
214
+ exists or is invalid and `ignore_errors=True`, returns `None`.
215
+
216
+ Raises:
217
+ Error: if the path already exists or is invalid and `ignore_errors=False`.
218
+
219
+ Examples:
220
+ Create a snapshot of `my_table`:
221
+
222
+ >>> tbl = pxt.get_table('my_table')
223
+ ... snapshot = pxt.create_snapshot('my_snapshot', tbl)
224
+ """
225
+ return create_view(
226
+ path_str,
227
+ base,
228
+ additional_columns=additional_columns,
229
+ iterator=iterator,
230
+ is_snapshot=True,
231
+ num_retained_versions=num_retained_versions,
232
+ comment=comment,
233
+ media_validation=media_validation,
234
+ ignore_errors=ignore_errors,
235
+ )
236
+
237
+
189
238
  def get_table(path: str) -> catalog.Table:
190
239
  """Get a handle to an existing table, view, or snapshot.
191
240
 
pixeltable/plan.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Any, Iterable, Optional, Sequence, cast
1
+ from typing import Any, Iterable, Optional, Sequence
2
2
  from uuid import UUID
3
3
 
4
4
  import sqlalchemy as sql
@@ -153,7 +153,7 @@ class Dumper:
153
153
  self.__add_expr_columns(v, 'view')
154
154
 
155
155
  # snapshot
156
- _ = pxt.create_view('views.snapshot', t.where(t.c2 >= 75), is_snapshot=True)
156
+ _ = pxt.create_snapshot('views.snapshot', t.where(t.c2 >= 75))
157
157
 
158
158
  # view of views
159
159
  vv = pxt.create_view('views.view_of_views', v.where(t.c2 >= 25))
pixeltable/type_system.py CHANGED
@@ -5,6 +5,7 @@ import datetime
5
5
  import enum
6
6
  import io
7
7
  import json
8
+ import types
8
9
  import typing
9
10
  import urllib.parse
10
11
  import urllib.request
@@ -272,63 +273,110 @@ class ColumnType:
272
273
  return inferred_type
273
274
 
274
275
  @classmethod
275
- def from_python_type(cls, t: Union[type, _GenericAlias], nullable_default: bool = False) -> Optional[ColumnType]:
276
- if typing.get_origin(t) is typing.Union:
276
+ def from_python_type(
277
+ cls,
278
+ t: Union[type, _GenericAlias],
279
+ nullable_default: bool = False,
280
+ allow_builtin_types: bool = True
281
+ ) -> Optional[ColumnType]:
282
+ """
283
+ Convert a Python type into a Pixeltable `ColumnType` instance.
284
+
285
+ Args:
286
+ t: The Python type.
287
+ nullable_default: If True, then the returned `ColumnType` will be nullable unless it is marked as
288
+ `Required`.
289
+ allow_builtin_types: If True, then built-in types such as `str`, `int`, `float`, etc., will be
290
+ allowed (as in UDF definitions). If False, then only Pixeltable types such as `pxt.String`,
291
+ `pxt.Int`, etc., will be allowed (as in schema definitions). `Optional` and `Required`
292
+ designations will be allowed regardless.
293
+ """
294
+ origin = typing.get_origin(t)
295
+ if origin is typing.Union:
296
+ # Check if `t` has the form Optional[T].
277
297
  union_args = typing.get_args(t)
278
298
  if len(union_args) == 2 and type(None) in union_args:
279
299
  # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
280
300
  # We treat it as the underlying type but with nullable=True.
281
301
  underlying_py_type = union_args[0] if union_args[1] is type(None) else union_args[1]
282
- underlying = cls.from_python_type(underlying_py_type)
302
+ underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
283
303
  if underlying is not None:
284
304
  return underlying.copy(nullable=True)
285
- elif typing.get_origin(t) is typing.Annotated:
305
+ elif origin is Required:
306
+ required_args = typing.get_args(t)
307
+ assert len(required_args) == 1
308
+ return cls.from_python_type(
309
+ required_args[0],
310
+ nullable_default=False,
311
+ allow_builtin_types=allow_builtin_types
312
+ )
313
+ elif origin is typing.Annotated:
286
314
  annotated_args = typing.get_args(t)
287
315
  origin = annotated_args[0]
288
316
  parameters = annotated_args[1]
289
317
  if isinstance(parameters, ColumnType):
290
318
  return parameters.copy(nullable=nullable_default)
291
- elif typing.get_origin(t) is Required:
292
- required_args = typing.get_args(t)
293
- assert len(required_args) == 1
294
- return cls.from_python_type(required_args[0], nullable_default=False)
295
319
  else:
296
- # Discard type parameters to ensure that parameterized types such as `list[T]`
297
- # are correctly mapped to Pixeltable types.
298
- origin = typing.get_origin(t)
299
- if origin is None:
300
- # No type parameters; the origin type is just `t` itself
301
- origin = t
302
- if issubclass(origin, _PxtType):
303
- return origin.as_col_type(nullable=nullable_default)
304
- if origin is str:
305
- return StringType(nullable=nullable_default)
306
- if origin is int:
307
- return IntType(nullable=nullable_default)
308
- if origin is float:
309
- return FloatType(nullable=nullable_default)
310
- if origin is bool:
311
- return BoolType(nullable=nullable_default)
312
- if origin is datetime.datetime:
313
- return TimestampType(nullable=nullable_default)
314
- if origin is PIL.Image.Image:
315
- return ImageType(nullable=nullable_default)
316
- if issubclass(origin, Sequence) or issubclass(origin, Mapping):
317
- return JsonType(nullable=nullable_default)
320
+ # It's something other than Optional[T], Required[T], or an explicitly annotated type.
321
+ if origin is not None:
322
+ # Discard type parameters to ensure that parameterized types such as `list[T]`
323
+ # are correctly mapped to Pixeltable types.
324
+ t = origin
325
+ if isinstance(t, type) and issubclass(t, _PxtType):
326
+ return t.as_col_type(nullable=nullable_default)
327
+ elif allow_builtin_types:
328
+ if t is str:
329
+ return StringType(nullable=nullable_default)
330
+ if t is int:
331
+ return IntType(nullable=nullable_default)
332
+ if t is float:
333
+ return FloatType(nullable=nullable_default)
334
+ if t is bool:
335
+ return BoolType(nullable=nullable_default)
336
+ if t is datetime.datetime:
337
+ return TimestampType(nullable=nullable_default)
338
+ if t is PIL.Image.Image:
339
+ return ImageType(nullable=nullable_default)
340
+ if issubclass(t, Sequence) or issubclass(t, Mapping):
341
+ return JsonType(nullable=nullable_default)
318
342
  return None
319
343
 
320
344
  @classmethod
321
- def normalize_type(cls, t: Union[ColumnType, type, _AnnotatedAlias], nullable_default: bool = False) -> ColumnType:
345
+ def normalize_type(
346
+ cls,
347
+ t: Union[ColumnType, type, _AnnotatedAlias],
348
+ nullable_default: bool = False,
349
+ allow_builtin_types: bool = True
350
+ ) -> ColumnType:
322
351
  """
323
352
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
324
353
  """
325
354
  if isinstance(t, ColumnType):
326
355
  return t
327
- col_type = cls.from_python_type(t, nullable_default)
356
+ col_type = cls.from_python_type(t, nullable_default, allow_builtin_types)
328
357
  if col_type is None:
329
- raise excs.Error(f'Unknown type: {t}')
358
+ cls.__raise_exc_for_invalid_type(t)
330
359
  return col_type
331
360
 
361
+ __TYPE_SUGGESTIONS: list[tuple[type, str]] = [
362
+ (str, 'pxt.String'),
363
+ (bool, 'pxt.Bool'),
364
+ (int, 'pxt.Int'),
365
+ (float, 'pxt.Float'),
366
+ (datetime.datetime, 'pxt.Timestamp'),
367
+ (PIL.Image.Image, 'pxt.Image'),
368
+ (Sequence, 'pxt.Json'),
369
+ (Mapping, 'pxt.Json'),
370
+ ]
371
+
372
+ @classmethod
373
+ def __raise_exc_for_invalid_type(cls, t: Union[type, _AnnotatedAlias]) -> None:
374
+ for builtin_type, suggestion in cls.__TYPE_SUGGESTIONS:
375
+ if t is builtin_type or (isinstance(t, type) and issubclass(t, builtin_type)):
376
+ name = t.__name__ if t.__module__ == 'builtins' else f'{t.__module__}.{t.__name__}'
377
+ raise excs.Error(f'Standard Python type `{name}` cannot be used here; use `{suggestion}` instead')
378
+ raise excs.Error(f'Unknown type: {t}')
379
+
332
380
  def validate_literal(self, val: Any) -> None:
333
381
  """Raise TypeError if val is not a valid literal for this type"""
334
382
  if val is None:
@@ -979,7 +1027,7 @@ class Array(np.ndarray, _PxtType):
979
1027
  `item` (the type subscript) must be a tuple with exactly two elements (in any order):
980
1028
  - A tuple of `Optional[int]`s, specifying the shape of the array
981
1029
  - A type, specifying the dtype of the array
982
- Example: Array[(3, None, 2), float]
1030
+ Example: Array[(3, None, 2), pxt.Float]
983
1031
  """
984
1032
  params = item if isinstance(item, tuple) else (item,)
985
1033
  shape: Optional[tuple] = None
@@ -994,7 +1042,7 @@ class Array(np.ndarray, _PxtType):
994
1042
  elif isinstance(param, type) or isinstance(param, _AnnotatedAlias):
995
1043
  if dtype is not None:
996
1044
  raise TypeError(f'Duplicate Array type parameter: {param}')
997
- dtype = ColumnType.from_python_type(param)
1045
+ dtype = ColumnType.normalize_type(param, allow_builtin_types=False)
998
1046
  else:
999
1047
  raise TypeError(f'Invalid Array type parameter: {param}')
1000
1048
  if shape is None:
pixeltable/utils/coco.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  from pathlib import Path
3
- from typing import Any, Dict, List, Set
3
+ from typing import Any
4
4
 
5
5
  import PIL
6
6
 
@@ -22,7 +22,7 @@ Required format:
22
22
  }
23
23
  """
24
24
 
25
- def _verify_input_dict(input_dict: Dict[str, Any]) -> None:
25
+ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
26
26
  """Verify that input_dict is a valid input dict for write_coco_dataset()"""
27
27
  if not isinstance(input_dict, dict):
28
28
  raise excs.Error(f'Expected dict, got {input_dict}{format_msg}')
@@ -61,11 +61,11 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
61
61
  images_dir = dest_path / 'images'
62
62
  images_dir.mkdir()
63
63
 
64
- images: List[Dict[str, Any]] = []
64
+ images: list[dict[str, Any]] = []
65
65
  img_id = -1
66
- annotations: List[Dict[str, Any]] = []
66
+ annotations: list[dict[str, Any]] = []
67
67
  ann_id = -1
68
- categories: Set[Any] = set()
68
+ categories: set[Any] = set()
69
69
  for input_row in df._exec():
70
70
  if input_dict_slot_idx == -1:
71
71
  input_dict_expr = df._select_list_exprs[0]
@@ -138,11 +138,11 @@ class Formatter:
138
138
  assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
139
139
  # Try to make it look decent in a variety of display scenarios
140
140
  if self.__num_rows > 1:
141
- width = 240 # Multiple rows: display small images
141
+ width = min(240, img.width) # Multiple rows: display small images
142
142
  elif self.__num_cols > 1:
143
- width = 480 # Multiple columns: display medium images
143
+ width = min(480, img.width) # Multiple columns: display medium images
144
144
  else:
145
- width = 640 # A single image: larger display
145
+ width = min(640, img.width) # A single image: larger display
146
146
  with io.BytesIO() as buffer:
147
147
  img.save(buffer, 'webp')
148
148
  img_base64 = base64.b64encode(buffer.getvalue()).decode()
pixeltable/utils/s3.py CHANGED
@@ -1,13 +1,16 @@
1
1
  from typing import Any
2
2
 
3
3
 
4
- def get_client() -> Any:
4
+ def get_client(**kwargs: Any) -> Any:
5
5
  import boto3
6
6
  import botocore
7
7
  try:
8
8
  boto3.Session().get_credentials().get_frozen_credentials()
9
- return boto3.client('s3') # credentials are available
9
+ config = botocore.config.Config(**kwargs)
10
+ return boto3.client('s3', config=config) # credentials are available
10
11
  except AttributeError:
11
12
  # No credentials available, use unsigned mode
12
- config = botocore.config.Config(signature_version=botocore.UNSIGNED)
13
+ config_args = kwargs.copy()
14
+ config_args['signature_version'] = botocore.UNSIGNED
15
+ config = botocore.config.Config(**config_args)
13
16
  return boto3.client('s3', config=config)