pixeltable 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +8 -22
- pixeltable/catalog/insertable_table.py +26 -8
- pixeltable/catalog/table.py +179 -83
- pixeltable/catalog/table_version.py +13 -39
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +20 -28
- pixeltable/env.py +2 -0
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +2 -2
- pixeltable/exec/expr_eval_node.py +8 -8
- pixeltable/exprs/arithmetic_expr.py +9 -4
- pixeltable/exprs/column_ref.py +4 -0
- pixeltable/exprs/comparison.py +5 -0
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/huggingface.py +145 -25
- pixeltable/functions/llama_cpp.py +3 -2
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +55 -6
- pixeltable/plan.py +1 -1
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/type_system.py +83 -35
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/formatter.py +3 -3
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/METADATA +119 -46
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/RECORD +40 -40
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.22.dist-info → pixeltable-0.2.24.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py
CHANGED
|
@@ -123,7 +123,8 @@ def create_view(
|
|
|
123
123
|
additional_columns: If specified, will add these columns to the view once it is created. The format
|
|
124
124
|
of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
|
|
125
125
|
[`create_table`][pixeltable.create_table].
|
|
126
|
-
is_snapshot: Whether the view is a snapshot.
|
|
126
|
+
is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
|
|
127
|
+
[`create_snapshot`][pixeltable.create_snapshot].
|
|
127
128
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
128
129
|
the base table.
|
|
129
130
|
num_retained_versions: Number of versions of the view to retain.
|
|
@@ -142,11 +143,6 @@ def create_view(
|
|
|
142
143
|
|
|
143
144
|
>>> tbl = pxt.get_table('my_table')
|
|
144
145
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
|
|
145
|
-
|
|
146
|
-
Create a snapshot of `my_table`:
|
|
147
|
-
|
|
148
|
-
>>> tbl = pxt.get_table('my_table')
|
|
149
|
-
... snapshot_view = pxt.create_view('my_snapshot_view', tbl, is_snapshot=True)
|
|
150
146
|
"""
|
|
151
147
|
where: Optional[exprs.Expr] = None
|
|
152
148
|
if isinstance(base, catalog.Table):
|
|
@@ -186,6 +182,59 @@ def create_view(
|
|
|
186
182
|
return view
|
|
187
183
|
|
|
188
184
|
|
|
185
|
+
def create_snapshot(
|
|
186
|
+
path_str: str,
|
|
187
|
+
base: Union[catalog.Table, DataFrame],
|
|
188
|
+
*,
|
|
189
|
+
additional_columns: Optional[dict[str, Any]] = None,
|
|
190
|
+
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
|
|
191
|
+
num_retained_versions: int = 10,
|
|
192
|
+
comment: str = '',
|
|
193
|
+
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
194
|
+
ignore_errors: bool = False,
|
|
195
|
+
) -> Optional[catalog.Table]:
|
|
196
|
+
"""Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
path_str: A name for the snapshot; can be either a simple name such as `my_snapshot`, or a pathname such as
|
|
200
|
+
`dir1.my_snapshot`.
|
|
201
|
+
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
|
|
202
|
+
base the snapshot on.
|
|
203
|
+
additional_columns: If specified, will add these columns to the snapshot once it is created. The format
|
|
204
|
+
of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
|
|
205
|
+
[`create_table`][pixeltable.create_table].
|
|
206
|
+
iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
|
|
207
|
+
the base table.
|
|
208
|
+
num_retained_versions: Number of versions of the view to retain.
|
|
209
|
+
comment: Optional comment for the view.
|
|
210
|
+
ignore_errors: if True, fail silently if the path already exists or is invalid.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot. If the path already
|
|
214
|
+
exists or is invalid and `ignore_errors=True`, returns `None`.
|
|
215
|
+
|
|
216
|
+
Raises:
|
|
217
|
+
Error: if the path already exists or is invalid and `ignore_errors=False`.
|
|
218
|
+
|
|
219
|
+
Examples:
|
|
220
|
+
Create a snapshot of `my_table`:
|
|
221
|
+
|
|
222
|
+
>>> tbl = pxt.get_table('my_table')
|
|
223
|
+
... snapshot = pxt.create_snapshot('my_snapshot', tbl)
|
|
224
|
+
"""
|
|
225
|
+
return create_view(
|
|
226
|
+
path_str,
|
|
227
|
+
base,
|
|
228
|
+
additional_columns=additional_columns,
|
|
229
|
+
iterator=iterator,
|
|
230
|
+
is_snapshot=True,
|
|
231
|
+
num_retained_versions=num_retained_versions,
|
|
232
|
+
comment=comment,
|
|
233
|
+
media_validation=media_validation,
|
|
234
|
+
ignore_errors=ignore_errors,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
189
238
|
def get_table(path: str) -> catalog.Table:
|
|
190
239
|
"""Get a handle to an existing table, view, or snapshot.
|
|
191
240
|
|
pixeltable/plan.py
CHANGED
|
@@ -153,7 +153,7 @@ class Dumper:
|
|
|
153
153
|
self.__add_expr_columns(v, 'view')
|
|
154
154
|
|
|
155
155
|
# snapshot
|
|
156
|
-
_ = pxt.
|
|
156
|
+
_ = pxt.create_snapshot('views.snapshot', t.where(t.c2 >= 75))
|
|
157
157
|
|
|
158
158
|
# view of views
|
|
159
159
|
vv = pxt.create_view('views.view_of_views', v.where(t.c2 >= 25))
|
pixeltable/type_system.py
CHANGED
|
@@ -5,6 +5,7 @@ import datetime
|
|
|
5
5
|
import enum
|
|
6
6
|
import io
|
|
7
7
|
import json
|
|
8
|
+
import types
|
|
8
9
|
import typing
|
|
9
10
|
import urllib.parse
|
|
10
11
|
import urllib.request
|
|
@@ -272,63 +273,110 @@ class ColumnType:
|
|
|
272
273
|
return inferred_type
|
|
273
274
|
|
|
274
275
|
@classmethod
|
|
275
|
-
def from_python_type(
|
|
276
|
-
|
|
276
|
+
def from_python_type(
|
|
277
|
+
cls,
|
|
278
|
+
t: Union[type, _GenericAlias],
|
|
279
|
+
nullable_default: bool = False,
|
|
280
|
+
allow_builtin_types: bool = True
|
|
281
|
+
) -> Optional[ColumnType]:
|
|
282
|
+
"""
|
|
283
|
+
Convert a Python type into a Pixeltable `ColumnType` instance.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
t: The Python type.
|
|
287
|
+
nullable_default: If True, then the returned `ColumnType` will be nullable unless it is marked as
|
|
288
|
+
`Required`.
|
|
289
|
+
allow_builtin_types: If True, then built-in types such as `str`, `int`, `float`, etc., will be
|
|
290
|
+
allowed (as in UDF definitions). If False, then only Pixeltable types such as `pxt.String`,
|
|
291
|
+
`pxt.Int`, etc., will be allowed (as in schema definitions). `Optional` and `Required`
|
|
292
|
+
designations will be allowed regardless.
|
|
293
|
+
"""
|
|
294
|
+
origin = typing.get_origin(t)
|
|
295
|
+
if origin is typing.Union:
|
|
296
|
+
# Check if `t` has the form Optional[T].
|
|
277
297
|
union_args = typing.get_args(t)
|
|
278
298
|
if len(union_args) == 2 and type(None) in union_args:
|
|
279
299
|
# `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
|
|
280
300
|
# We treat it as the underlying type but with nullable=True.
|
|
281
301
|
underlying_py_type = union_args[0] if union_args[1] is type(None) else union_args[1]
|
|
282
|
-
underlying = cls.from_python_type(underlying_py_type)
|
|
302
|
+
underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
|
|
283
303
|
if underlying is not None:
|
|
284
304
|
return underlying.copy(nullable=True)
|
|
285
|
-
elif
|
|
305
|
+
elif origin is Required:
|
|
306
|
+
required_args = typing.get_args(t)
|
|
307
|
+
assert len(required_args) == 1
|
|
308
|
+
return cls.from_python_type(
|
|
309
|
+
required_args[0],
|
|
310
|
+
nullable_default=False,
|
|
311
|
+
allow_builtin_types=allow_builtin_types
|
|
312
|
+
)
|
|
313
|
+
elif origin is typing.Annotated:
|
|
286
314
|
annotated_args = typing.get_args(t)
|
|
287
315
|
origin = annotated_args[0]
|
|
288
316
|
parameters = annotated_args[1]
|
|
289
317
|
if isinstance(parameters, ColumnType):
|
|
290
318
|
return parameters.copy(nullable=nullable_default)
|
|
291
|
-
elif typing.get_origin(t) is Required:
|
|
292
|
-
required_args = typing.get_args(t)
|
|
293
|
-
assert len(required_args) == 1
|
|
294
|
-
return cls.from_python_type(required_args[0], nullable_default=False)
|
|
295
319
|
else:
|
|
296
|
-
#
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
320
|
+
# It's something other than Optional[T], Required[T], or an explicitly annotated type.
|
|
321
|
+
if origin is not None:
|
|
322
|
+
# Discard type parameters to ensure that parameterized types such as `list[T]`
|
|
323
|
+
# are correctly mapped to Pixeltable types.
|
|
324
|
+
t = origin
|
|
325
|
+
if isinstance(t, type) and issubclass(t, _PxtType):
|
|
326
|
+
return t.as_col_type(nullable=nullable_default)
|
|
327
|
+
elif allow_builtin_types:
|
|
328
|
+
if t is str:
|
|
329
|
+
return StringType(nullable=nullable_default)
|
|
330
|
+
if t is int:
|
|
331
|
+
return IntType(nullable=nullable_default)
|
|
332
|
+
if t is float:
|
|
333
|
+
return FloatType(nullable=nullable_default)
|
|
334
|
+
if t is bool:
|
|
335
|
+
return BoolType(nullable=nullable_default)
|
|
336
|
+
if t is datetime.datetime:
|
|
337
|
+
return TimestampType(nullable=nullable_default)
|
|
338
|
+
if t is PIL.Image.Image:
|
|
339
|
+
return ImageType(nullable=nullable_default)
|
|
340
|
+
if issubclass(t, Sequence) or issubclass(t, Mapping):
|
|
341
|
+
return JsonType(nullable=nullable_default)
|
|
318
342
|
return None
|
|
319
343
|
|
|
320
344
|
@classmethod
|
|
321
|
-
def normalize_type(
|
|
345
|
+
def normalize_type(
|
|
346
|
+
cls,
|
|
347
|
+
t: Union[ColumnType, type, _AnnotatedAlias],
|
|
348
|
+
nullable_default: bool = False,
|
|
349
|
+
allow_builtin_types: bool = True
|
|
350
|
+
) -> ColumnType:
|
|
322
351
|
"""
|
|
323
352
|
Convert any type recognizable by Pixeltable to its corresponding ColumnType.
|
|
324
353
|
"""
|
|
325
354
|
if isinstance(t, ColumnType):
|
|
326
355
|
return t
|
|
327
|
-
col_type = cls.from_python_type(t, nullable_default)
|
|
356
|
+
col_type = cls.from_python_type(t, nullable_default, allow_builtin_types)
|
|
328
357
|
if col_type is None:
|
|
329
|
-
|
|
358
|
+
cls.__raise_exc_for_invalid_type(t)
|
|
330
359
|
return col_type
|
|
331
360
|
|
|
361
|
+
__TYPE_SUGGESTIONS: list[tuple[type, str]] = [
|
|
362
|
+
(str, 'pxt.String'),
|
|
363
|
+
(bool, 'pxt.Bool'),
|
|
364
|
+
(int, 'pxt.Int'),
|
|
365
|
+
(float, 'pxt.Float'),
|
|
366
|
+
(datetime.datetime, 'pxt.Timestamp'),
|
|
367
|
+
(PIL.Image.Image, 'pxt.Image'),
|
|
368
|
+
(Sequence, 'pxt.Json'),
|
|
369
|
+
(Mapping, 'pxt.Json'),
|
|
370
|
+
]
|
|
371
|
+
|
|
372
|
+
@classmethod
|
|
373
|
+
def __raise_exc_for_invalid_type(cls, t: Union[type, _AnnotatedAlias]) -> None:
|
|
374
|
+
for builtin_type, suggestion in cls.__TYPE_SUGGESTIONS:
|
|
375
|
+
if t is builtin_type or (isinstance(t, type) and issubclass(t, builtin_type)):
|
|
376
|
+
name = t.__name__ if t.__module__ == 'builtins' else f'{t.__module__}.{t.__name__}'
|
|
377
|
+
raise excs.Error(f'Standard Python type `{name}` cannot be used here; use `{suggestion}` instead')
|
|
378
|
+
raise excs.Error(f'Unknown type: {t}')
|
|
379
|
+
|
|
332
380
|
def validate_literal(self, val: Any) -> None:
|
|
333
381
|
"""Raise TypeError if val is not a valid literal for this type"""
|
|
334
382
|
if val is None:
|
|
@@ -979,7 +1027,7 @@ class Array(np.ndarray, _PxtType):
|
|
|
979
1027
|
`item` (the type subscript) must be a tuple with exactly two elements (in any order):
|
|
980
1028
|
- A tuple of `Optional[int]`s, specifying the shape of the array
|
|
981
1029
|
- A type, specifying the dtype of the array
|
|
982
|
-
Example: Array[(3, None, 2),
|
|
1030
|
+
Example: Array[(3, None, 2), pxt.Float]
|
|
983
1031
|
"""
|
|
984
1032
|
params = item if isinstance(item, tuple) else (item,)
|
|
985
1033
|
shape: Optional[tuple] = None
|
|
@@ -994,7 +1042,7 @@ class Array(np.ndarray, _PxtType):
|
|
|
994
1042
|
elif isinstance(param, type) or isinstance(param, _AnnotatedAlias):
|
|
995
1043
|
if dtype is not None:
|
|
996
1044
|
raise TypeError(f'Duplicate Array type parameter: {param}')
|
|
997
|
-
dtype = ColumnType.
|
|
1045
|
+
dtype = ColumnType.normalize_type(param, allow_builtin_types=False)
|
|
998
1046
|
else:
|
|
999
1047
|
raise TypeError(f'Invalid Array type parameter: {param}')
|
|
1000
1048
|
if shape is None:
|
pixeltable/utils/coco.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import PIL
|
|
6
6
|
|
|
@@ -22,7 +22,7 @@ Required format:
|
|
|
22
22
|
}
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
def _verify_input_dict(input_dict:
|
|
25
|
+
def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
26
26
|
"""Verify that input_dict is a valid input dict for write_coco_dataset()"""
|
|
27
27
|
if not isinstance(input_dict, dict):
|
|
28
28
|
raise excs.Error(f'Expected dict, got {input_dict}{format_msg}')
|
|
@@ -61,11 +61,11 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
61
61
|
images_dir = dest_path / 'images'
|
|
62
62
|
images_dir.mkdir()
|
|
63
63
|
|
|
64
|
-
images:
|
|
64
|
+
images: list[dict[str, Any]] = []
|
|
65
65
|
img_id = -1
|
|
66
|
-
annotations:
|
|
66
|
+
annotations: list[dict[str, Any]] = []
|
|
67
67
|
ann_id = -1
|
|
68
|
-
categories:
|
|
68
|
+
categories: set[Any] = set()
|
|
69
69
|
for input_row in df._exec():
|
|
70
70
|
if input_dict_slot_idx == -1:
|
|
71
71
|
input_dict_expr = df._select_list_exprs[0]
|
pixeltable/utils/formatter.py
CHANGED
|
@@ -138,11 +138,11 @@ class Formatter:
|
|
|
138
138
|
assert isinstance(img, Image.Image), f'Wrong type: {type(img)}'
|
|
139
139
|
# Try to make it look decent in a variety of display scenarios
|
|
140
140
|
if self.__num_rows > 1:
|
|
141
|
-
width = 240 # Multiple rows: display small images
|
|
141
|
+
width = min(240, img.width) # Multiple rows: display small images
|
|
142
142
|
elif self.__num_cols > 1:
|
|
143
|
-
width = 480 # Multiple columns: display medium images
|
|
143
|
+
width = min(480, img.width) # Multiple columns: display medium images
|
|
144
144
|
else:
|
|
145
|
-
width = 640 # A single image: larger display
|
|
145
|
+
width = min(640, img.width) # A single image: larger display
|
|
146
146
|
with io.BytesIO() as buffer:
|
|
147
147
|
img.save(buffer, 'webp')
|
|
148
148
|
img_base64 = base64.b64encode(buffer.getvalue()).decode()
|
pixeltable/utils/s3.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
def get_client() -> Any:
|
|
4
|
+
def get_client(**kwargs: Any) -> Any:
|
|
5
5
|
import boto3
|
|
6
6
|
import botocore
|
|
7
7
|
try:
|
|
8
8
|
boto3.Session().get_credentials().get_frozen_credentials()
|
|
9
|
-
|
|
9
|
+
config = botocore.config.Config(**kwargs)
|
|
10
|
+
return boto3.client('s3', config=config) # credentials are available
|
|
10
11
|
except AttributeError:
|
|
11
12
|
# No credentials available, use unsigned mode
|
|
12
|
-
|
|
13
|
+
config_args = kwargs.copy()
|
|
14
|
+
config_args['signature_version'] = botocore.UNSIGNED
|
|
15
|
+
config = botocore.config.Config(**config_args)
|
|
13
16
|
return boto3.client('s3', config=config)
|