pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import inspect
|
|
2
|
-
from typing import List, Dict, Any, Optional, Callable
|
|
3
|
-
import abc
|
|
4
|
-
|
|
5
|
-
from .function import Function
|
|
6
|
-
from .signature import Signature
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class BatchedFunction(Function):
|
|
10
|
-
"""Base class for functions that can run on batches"""
|
|
11
|
-
|
|
12
|
-
@abc.abstractmethod
|
|
13
|
-
def get_batch_size(self, *args: Any, **kwargs: Any) -> Optional[int]:
|
|
14
|
-
"""Return the batch size for the given arguments, or None if the batch size is unknown.
|
|
15
|
-
args/kwargs might be empty
|
|
16
|
-
"""
|
|
17
|
-
raise NotImplementedError
|
|
18
|
-
|
|
19
|
-
@abc.abstractmethod
|
|
20
|
-
def invoke(self, arg_batches: List[List[Any]], kwarg_batches: Dict[str, List[Any]]) -> List[Any]:
|
|
21
|
-
"""Invoke the function for the given batch and return a batch of results"""
|
|
22
|
-
raise NotImplementedError
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class ExplicitBatchedFunction(BatchedFunction):
|
|
26
|
-
"""
|
|
27
|
-
A `BatchedFunction` that is defined by a signature and an explicit python
|
|
28
|
-
`Callable`.
|
|
29
|
-
"""
|
|
30
|
-
def __init__(self, signature: Signature, batch_size: Optional[int], invoker_fn: Callable, self_path: str):
|
|
31
|
-
super().__init__(signature=signature, py_signature=inspect.signature(invoker_fn), self_path=self_path)
|
|
32
|
-
self.batch_size = batch_size
|
|
33
|
-
self.invoker_fn = invoker_fn
|
|
34
|
-
|
|
35
|
-
def get_batch_size(self, *args: Any, **kwargs: Any) -> Optional[int]:
|
|
36
|
-
return self.batch_size
|
|
37
|
-
|
|
38
|
-
def invoke(self, arg_batches: List[List[Any]], kwarg_batches: Dict[str, List[Any]]) -> List[Any]:
|
|
39
|
-
"""Invoke the function for the given batch and return a batch of results"""
|
|
40
|
-
constant_param_names = [p.name for p in self.signature.constant_parameters]
|
|
41
|
-
kwargs = {k: v[0] for k, v in kwarg_batches.items() if k in constant_param_names}
|
|
42
|
-
kwarg_batches = {k: v for k, v in kwarg_batches.items() if k not in constant_param_names}
|
|
43
|
-
return self.invoker_fn(*arg_batches, **kwargs, **kwarg_batches)
|
|
44
|
-
|
|
45
|
-
def validate_call(self, bound_args: Dict[str, Any]) -> None:
|
|
46
|
-
"""Verify constant parameters"""
|
|
47
|
-
import pixeltable.exprs as exprs
|
|
48
|
-
for param in self.signature.constant_parameters:
|
|
49
|
-
if param.name in bound_args and isinstance(bound_args[param.name], exprs.Expr):
|
|
50
|
-
raise ValueError(
|
|
51
|
-
f'{self.display_name}(): '
|
|
52
|
-
f'parameter {param.name} must be a constant value, not a Pixeltable expression'
|
|
53
|
-
)
|
pixeltable/func/nos_function.py
DELETED
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Any, Dict, List, Tuple
|
|
2
|
-
import inspect
|
|
3
|
-
import logging
|
|
4
|
-
import sys
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
from .signature import Signature, Parameter
|
|
9
|
-
from .batched_function import BatchedFunction
|
|
10
|
-
import pixeltable.env as env
|
|
11
|
-
import pixeltable.type_system as ts
|
|
12
|
-
import pixeltable.exceptions as excs
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
_logger = logging.getLogger('pixeltable')
|
|
16
|
-
|
|
17
|
-
class NOSFunction(BatchedFunction):
|
|
18
|
-
def __init__(self, model_spec: 'nos.common.ModelSpec', self_path: str):
|
|
19
|
-
return_type, param_types = self._convert_nos_signature(model_spec.signature)
|
|
20
|
-
param_names = list(model_spec.signature.get_inputs_spec().keys())
|
|
21
|
-
params = [
|
|
22
|
-
Parameter(name, col_type, inspect.Parameter.POSITIONAL_OR_KEYWORD, is_batched=False)
|
|
23
|
-
for name, col_type in zip(param_names, param_types)
|
|
24
|
-
]
|
|
25
|
-
signature = Signature(return_type, params)
|
|
26
|
-
|
|
27
|
-
# construct inspect.Signature
|
|
28
|
-
py_params = [
|
|
29
|
-
inspect.Parameter(name, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
30
|
-
for name, col_type in zip(param_names, param_types)
|
|
31
|
-
]
|
|
32
|
-
py_signature = inspect.Signature(py_params)
|
|
33
|
-
super().__init__(signature, py_signature=py_signature, self_path=self_path)
|
|
34
|
-
|
|
35
|
-
self.model_spec = model_spec
|
|
36
|
-
self.nos_param_names = model_spec.signature.get_inputs_spec().keys()
|
|
37
|
-
self.scalar_nos_param_names = []
|
|
38
|
-
|
|
39
|
-
# for models on images
|
|
40
|
-
self.img_param_pos: Optional[int] = None # position of the image parameter in the function signature
|
|
41
|
-
# for multi-resolution image models
|
|
42
|
-
import nos
|
|
43
|
-
self.img_batch_params: List[nos.common.ObjectTypeInfo] = []
|
|
44
|
-
self.img_resolutions: List[int] = [] # for multi-resolution models
|
|
45
|
-
self.batch_size: Optional[int] = None
|
|
46
|
-
self.img_size: Optional[Tuple[int, int]] = None # W, H
|
|
47
|
-
|
|
48
|
-
# try to determine batch_size and img_size
|
|
49
|
-
batch_size = sys.maxsize
|
|
50
|
-
for pos, (param_name, type_info) in enumerate(model_spec.signature.get_inputs_spec().items()):
|
|
51
|
-
if isinstance(type_info, list):
|
|
52
|
-
assert isinstance(type_info[0].base_spec(), nos.common.ImageSpec)
|
|
53
|
-
# this is a multi-resolution image model
|
|
54
|
-
self.img_batch_params = type_info
|
|
55
|
-
self.img_param_pos = pos
|
|
56
|
-
self.img_resolutions = [
|
|
57
|
-
info.base_spec().shape[0] * info.base_spec().shape[1] for info in self.img_batch_params
|
|
58
|
-
]
|
|
59
|
-
else:
|
|
60
|
-
if not type_info.is_batched():
|
|
61
|
-
self.scalar_nos_param_names.append(param_name)
|
|
62
|
-
else:
|
|
63
|
-
batch_size = min(batch_size, type_info.batch_size())
|
|
64
|
-
|
|
65
|
-
if isinstance(type_info.base_spec(), nos.common.ImageSpec):
|
|
66
|
-
# this is a single-resolution image model
|
|
67
|
-
if type_info.base_spec().shape is not None:
|
|
68
|
-
self.img_size = (type_info.base_spec().shape[1], type_info.base_spec().shape[0])
|
|
69
|
-
self.img_param_pos = pos
|
|
70
|
-
|
|
71
|
-
if batch_size != sys.maxsize:
|
|
72
|
-
self.batch_size = batch_size
|
|
73
|
-
|
|
74
|
-
def _convert_nos_type(
|
|
75
|
-
self, type_info: 'nos.common.spec.ObjectTypeInfo', ignore_shape: bool = False
|
|
76
|
-
) -> ts.ColumnType:
|
|
77
|
-
"""Convert ObjectTypeInfo to ColumnType"""
|
|
78
|
-
import nos
|
|
79
|
-
if type_info.base_spec() is None:
|
|
80
|
-
if type_info.base_type() == str:
|
|
81
|
-
return ts.StringType()
|
|
82
|
-
if type_info.base_type() == int:
|
|
83
|
-
return ts.IntType()
|
|
84
|
-
if type_info.base_type() == float:
|
|
85
|
-
return ts.FloatType()
|
|
86
|
-
if type_info.base_type() == bool:
|
|
87
|
-
return ts.BoolType()
|
|
88
|
-
else:
|
|
89
|
-
raise excs.Error(f'Cannot convert {type_info} to ColumnType')
|
|
90
|
-
elif isinstance(type_info.base_spec(), nos.common.ImageSpec):
|
|
91
|
-
size = None
|
|
92
|
-
if not ignore_shape and type_info.base_spec().shape is not None:
|
|
93
|
-
size = (type_info.base_spec().shape[1], type_info.base_spec().shape[0])
|
|
94
|
-
# TODO: set mode
|
|
95
|
-
return ts.ImageType(size=size)
|
|
96
|
-
elif isinstance(type_info.base_spec(), nos.common.TensorSpec):
|
|
97
|
-
return ts.ArrayType(shape=type_info.base_spec().shape, dtype=ts.FloatType())
|
|
98
|
-
else:
|
|
99
|
-
raise excs.Error(f'Cannot convert {type_info} to ColumnType')
|
|
100
|
-
|
|
101
|
-
def _convert_nos_signature(
|
|
102
|
-
self, sig: 'nos.common.spec.FunctionSignature') -> Tuple[ts.ColumnType, List[ts.ColumnType]]:
|
|
103
|
-
if len(sig.get_outputs_spec()) > 1:
|
|
104
|
-
return_type = ts.JsonType()
|
|
105
|
-
else:
|
|
106
|
-
return_type = self._convert_nos_type(list(sig.get_outputs_spec().values())[0])
|
|
107
|
-
param_types: List[ts.ColumnType] = []
|
|
108
|
-
for _, type_info in sig.get_inputs_spec().items():
|
|
109
|
-
# if there are multiple input shapes we leave them out of the ColumnType and deal with them in FunctionCall
|
|
110
|
-
if isinstance(type_info, list):
|
|
111
|
-
param_types.append(self._convert_nos_type(type_info[0], ignore_shape=True))
|
|
112
|
-
else:
|
|
113
|
-
param_types.append(self._convert_nos_type(type_info, ignore_shape=False))
|
|
114
|
-
return return_type, param_types
|
|
115
|
-
|
|
116
|
-
def is_multi_res_model(self) -> bool:
|
|
117
|
-
return self.img_param_pos is not None and len(self.img_batch_params) > 0
|
|
118
|
-
|
|
119
|
-
def get_batch_size(self, *args: Any, **kwargs: Any) -> Optional[int]:
|
|
120
|
-
if self.batch_size is not None or len(self.img_batch_params) == 0 or len(args) == 0:
|
|
121
|
-
return self.batch_size
|
|
122
|
-
|
|
123
|
-
# return batch size appropriate for the given image size
|
|
124
|
-
img_arg = args[self.img_param_pos]
|
|
125
|
-
input_res = img_arg.size[0] * img_arg.size[1]
|
|
126
|
-
batch_size, _ = self._select_model_res(input_res)
|
|
127
|
-
return batch_size
|
|
128
|
-
|
|
129
|
-
def _select_model_res(self, input_res: int) -> Tuple[int, Tuple[int, int]]:
|
|
130
|
-
"""Select the model resolution that is closest to the input resolution
|
|
131
|
-
Returns: batch size, image size
|
|
132
|
-
"""
|
|
133
|
-
deltas = [abs(res - input_res) for res in self.img_resolutions]
|
|
134
|
-
idx = deltas.index(min(deltas))
|
|
135
|
-
type_info = self.img_batch_params[idx]
|
|
136
|
-
return type_info.batch_size(), (type_info.base_spec().shape[1], type_info.base_spec().shape[0])
|
|
137
|
-
|
|
138
|
-
def invoke(self, arg_batches: List[List[Any]], kwarg_batches: Dict[str, List[Any]]) -> List[Any]:
|
|
139
|
-
# check that scalar args are constant
|
|
140
|
-
|
|
141
|
-
num_batch_rows = len(arg_batches[0])
|
|
142
|
-
# if we need to rescale image args, and we're doing object detection, we need to rescale the
|
|
143
|
-
# bounding boxes as well
|
|
144
|
-
scale_factors = np.ndarray((num_batch_rows, 2), dtype=np.float32)
|
|
145
|
-
|
|
146
|
-
target_res: Optional[Tuple[int, int]] = None
|
|
147
|
-
if self.img_param_pos is not None:
|
|
148
|
-
# for now, NOS will only receive RGB images
|
|
149
|
-
arg_batches[self.img_param_pos] = \
|
|
150
|
-
[img.convert('RGB') if img.mode != 'RGB' else img for img in arg_batches[self.img_param_pos ]]
|
|
151
|
-
if self.is_multi_res_model():
|
|
152
|
-
# we need to select the resolution that is closest to the input resolution
|
|
153
|
-
sample_img = arg_batches[self.img_param_pos][0]
|
|
154
|
-
_, target_res = self._select_model_res(sample_img.size[0] * sample_img.size[1])
|
|
155
|
-
else:
|
|
156
|
-
target_res = self.img_size
|
|
157
|
-
|
|
158
|
-
if target_res is not None:
|
|
159
|
-
# we need to record the scale factors and resize the images;
|
|
160
|
-
# keep in mind that every image could have a different resolution
|
|
161
|
-
scale_factors[:, 0] = \
|
|
162
|
-
[img.size[0] / target_res[0] for img in arg_batches[self.img_param_pos]]
|
|
163
|
-
scale_factors[:, 1] = \
|
|
164
|
-
[img.size[1] / target_res[1] for img in arg_batches[self.img_param_pos]]
|
|
165
|
-
arg_batches[self.img_param_pos] = [
|
|
166
|
-
# only resize if necessary
|
|
167
|
-
img.resize(target_res) if img.size != target_res else img
|
|
168
|
-
for img in arg_batches[self.img_param_pos]
|
|
169
|
-
]
|
|
170
|
-
|
|
171
|
-
kwargs = {param_name: args for param_name, args in zip(self.nos_param_names, arg_batches)}
|
|
172
|
-
# fix up scalar parameters
|
|
173
|
-
kwargs.update(
|
|
174
|
-
{param_name: kwargs[param_name][0] for param_name in self.scalar_nos_param_names})
|
|
175
|
-
_logger.debug(
|
|
176
|
-
f'Running NOS task {self.model_spec.task}: '
|
|
177
|
-
f'batch_size={num_batch_rows} target_res={target_res}')
|
|
178
|
-
result = env.Env.get().nos_client.Run(
|
|
179
|
-
task=self.model_spec.task, model_name=self.model_spec.name, **kwargs)
|
|
180
|
-
|
|
181
|
-
import nos
|
|
182
|
-
if self.model_spec.task == nos.common.TaskType.OBJECT_DETECTION_2D and target_res is not None:
|
|
183
|
-
# we need to rescale the bounding boxes
|
|
184
|
-
result_bboxes = [] # workaround: result['bboxes'][*] is immutable
|
|
185
|
-
for i, bboxes in enumerate(result['bboxes']):
|
|
186
|
-
bboxes = np.copy(bboxes)
|
|
187
|
-
nos_batch_row_idx = i
|
|
188
|
-
bboxes[:, 0] *= scale_factors[nos_batch_row_idx, 0]
|
|
189
|
-
bboxes[:, 1] *= scale_factors[nos_batch_row_idx, 1]
|
|
190
|
-
bboxes[:, 2] *= scale_factors[nos_batch_row_idx, 0]
|
|
191
|
-
bboxes[:, 3] *= scale_factors[nos_batch_row_idx, 1]
|
|
192
|
-
result_bboxes.append(bboxes)
|
|
193
|
-
result['bboxes'] = result_bboxes
|
|
194
|
-
|
|
195
|
-
if len(result) == 1:
|
|
196
|
-
key = list(result.keys())[0]
|
|
197
|
-
row_results = result[key]
|
|
198
|
-
else:
|
|
199
|
-
# we rearrange result into one dict per row
|
|
200
|
-
row_results = [{k: v[i].tolist() for k, v in result.items()} for i in range(num_batch_rows)]
|
|
201
|
-
return row_results
|
|
202
|
-
|
pixeltable/tests/conftest.py
DELETED
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
import pathlib
|
|
5
|
-
from typing import List
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pytest
|
|
9
|
-
import PIL.Image
|
|
10
|
-
|
|
11
|
-
import pixeltable as pxt
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
13
|
-
from pixeltable import exprs
|
|
14
|
-
import pixeltable.functions as pxtf
|
|
15
|
-
from pixeltable.exprs import RELATIVE_PATH_ROOT as R
|
|
16
|
-
from pixeltable.metadata import SystemInfo, create_system_info
|
|
17
|
-
from pixeltable.metadata.schema import TableSchemaVersion, TableVersion, Table, Function, Dir
|
|
18
|
-
from pixeltable.tests.utils import read_data_file, create_test_tbl, create_all_datatypes_tbl, skip_test_if_not_installed
|
|
19
|
-
from pixeltable.type_system import StringType, ImageType, FloatType
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@pytest.fixture(scope='session')
|
|
23
|
-
def init_env(tmp_path_factory) -> None:
|
|
24
|
-
from pixeltable.env import Env
|
|
25
|
-
# set the relevant env vars for Client() to connect to the test db
|
|
26
|
-
|
|
27
|
-
shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', str(pathlib.Path.home() / '.pixeltable')))
|
|
28
|
-
home_dir = str(tmp_path_factory.mktemp('base') / '.pixeltable')
|
|
29
|
-
os.environ['PIXELTABLE_HOME'] = home_dir
|
|
30
|
-
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.yaml')
|
|
31
|
-
test_db = 'test'
|
|
32
|
-
os.environ['PIXELTABLE_DB'] = test_db
|
|
33
|
-
os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
|
|
34
|
-
|
|
35
|
-
# ensure this home dir exits
|
|
36
|
-
shared_home.mkdir(parents=True, exist_ok=True)
|
|
37
|
-
# this also runs create_all()
|
|
38
|
-
Env.get().set_up(echo=True)
|
|
39
|
-
yield
|
|
40
|
-
# leave db in place for debugging purposes
|
|
41
|
-
|
|
42
|
-
@pytest.fixture(scope='function')
|
|
43
|
-
def test_client(init_env) -> pxt.Client:
|
|
44
|
-
# Clean the DB *before* instantiating a client object. This is because some tests
|
|
45
|
-
# (such as test_migration.py) may leave the DB in a broken state, from which the
|
|
46
|
-
# client is uninstantiable.
|
|
47
|
-
clean_db()
|
|
48
|
-
cl = pxt.Client(reload=True)
|
|
49
|
-
cl.logging(level=logging.DEBUG, to_stdout=True)
|
|
50
|
-
yield cl
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def clean_db(restore_tables: bool = True) -> None:
|
|
54
|
-
from pixeltable.env import Env
|
|
55
|
-
# The logic from Client.reset_catalog() has been moved here, so that it
|
|
56
|
-
# does not rely on instantiating a Client object. As before, UUID-named data tables will
|
|
57
|
-
# not be cleaned. If in the future it is desirable to clean out data tables as well,
|
|
58
|
-
# the commented lines may be used to drop ALL tables from the test db.
|
|
59
|
-
# sql_md = declarative_base().metadata
|
|
60
|
-
# sql_md.reflect(Env.get().engine)
|
|
61
|
-
# sql_md.drop_all(bind=Env.get().engine)
|
|
62
|
-
engine = Env.get().engine
|
|
63
|
-
SystemInfo.__table__.drop(engine, checkfirst=True)
|
|
64
|
-
TableSchemaVersion.__table__.drop(engine, checkfirst=True)
|
|
65
|
-
TableVersion.__table__.drop(engine, checkfirst=True)
|
|
66
|
-
Table.__table__.drop(engine, checkfirst=True)
|
|
67
|
-
Function.__table__.drop(engine, checkfirst=True)
|
|
68
|
-
Dir.__table__.drop(engine, checkfirst=True)
|
|
69
|
-
if restore_tables:
|
|
70
|
-
Dir.__table__.create(engine)
|
|
71
|
-
Function.__table__.create(engine)
|
|
72
|
-
Table.__table__.create(engine)
|
|
73
|
-
TableVersion.__table__.create(engine)
|
|
74
|
-
TableSchemaVersion.__table__.create(engine)
|
|
75
|
-
SystemInfo.__table__.create(engine)
|
|
76
|
-
create_system_info(engine)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@pytest.fixture(scope='function')
|
|
80
|
-
def test_tbl(test_client: pxt.Client) -> catalog.Table:
|
|
81
|
-
return create_test_tbl(test_client)
|
|
82
|
-
|
|
83
|
-
# @pytest.fixture(scope='function')
|
|
84
|
-
# def test_stored_fn(test_client: pxt.Client) -> pxt.Function:
|
|
85
|
-
# @pxt.udf(return_type=pxt.IntType(), param_types=[pxt.IntType()])
|
|
86
|
-
# def test_fn(x):
|
|
87
|
-
# return x + 1
|
|
88
|
-
# test_client.create_function('test_fn', test_fn)
|
|
89
|
-
# return test_fn
|
|
90
|
-
|
|
91
|
-
@pytest.fixture(scope='function')
|
|
92
|
-
def test_tbl_exprs(test_tbl: catalog.Table) -> List[exprs.Expr]:
|
|
93
|
-
#def test_tbl_exprs(test_tbl: catalog.Table, test_stored_fn: pxt.Function) -> List[exprs.Expr]:
|
|
94
|
-
|
|
95
|
-
t = test_tbl
|
|
96
|
-
return [
|
|
97
|
-
t.c1,
|
|
98
|
-
t.c7['*'].f1,
|
|
99
|
-
exprs.Literal('test'),
|
|
100
|
-
exprs.InlineDict({
|
|
101
|
-
'a': t.c1, 'b': t.c6.f1, 'c': 17,
|
|
102
|
-
'd': exprs.InlineDict({'e': t.c2}),
|
|
103
|
-
'f': exprs.InlineArray((t.c3, t.c3))
|
|
104
|
-
}),
|
|
105
|
-
exprs.InlineArray([[t.c2, t.c2], [t.c2, t.c2]]),
|
|
106
|
-
t.c2 > 5,
|
|
107
|
-
t.c2 == None,
|
|
108
|
-
~(t.c2 > 5),
|
|
109
|
-
(t.c2 > 5) & (t.c1 == 'test'),
|
|
110
|
-
(t.c2 > 5) | (t.c1 == 'test'),
|
|
111
|
-
t.c7['*'].f5 >> [R[3], R[2], R[1], R[0]],
|
|
112
|
-
t.c8[0, 1:],
|
|
113
|
-
t.c2.astype(FloatType()),
|
|
114
|
-
(t.c2 + 1).astype(FloatType()),
|
|
115
|
-
t.c2.apply(str),
|
|
116
|
-
(t.c2 + 1).apply(str),
|
|
117
|
-
t.c3.apply(str),
|
|
118
|
-
t.c4.apply(str),
|
|
119
|
-
t.c5.apply(str),
|
|
120
|
-
t.c6.apply(str),
|
|
121
|
-
t.c1.apply(json.loads),
|
|
122
|
-
t.c8.errortype,
|
|
123
|
-
t.c8.errormsg,
|
|
124
|
-
pxtf.sum(t.c2, group_by=t.c4, order_by=t.c3),
|
|
125
|
-
]
|
|
126
|
-
|
|
127
|
-
@pytest.fixture(scope='function')
|
|
128
|
-
def all_datatypes_tbl(test_client: pxt.Client) -> catalog.Table:
|
|
129
|
-
return create_all_datatypes_tbl(test_client)
|
|
130
|
-
|
|
131
|
-
@pytest.fixture(scope='function')
|
|
132
|
-
def img_tbl(test_client: pxt.Client) -> catalog.Table:
|
|
133
|
-
schema = {
|
|
134
|
-
'img': ImageType(nullable=False),
|
|
135
|
-
'category': StringType(nullable=False),
|
|
136
|
-
'split': StringType(nullable=False),
|
|
137
|
-
}
|
|
138
|
-
# this table is not indexed in order to avoid the cost of computing embeddings
|
|
139
|
-
tbl = test_client.create_table('test_img_tbl', schema)
|
|
140
|
-
rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
|
|
141
|
-
tbl.insert(rows)
|
|
142
|
-
return tbl
|
|
143
|
-
|
|
144
|
-
@pytest.fixture(scope='function')
|
|
145
|
-
def img_tbl_exprs(img_tbl: catalog.Table) -> List[exprs.Expr]:
|
|
146
|
-
img_t = img_tbl
|
|
147
|
-
return [
|
|
148
|
-
img_t.img.width,
|
|
149
|
-
img_t.img.rotate(90),
|
|
150
|
-
# we're using a list here, not a tuple; the latter turns into a list during the back/forth conversion
|
|
151
|
-
img_t.img.rotate(90).resize([224, 224]),
|
|
152
|
-
img_t.img.fileurl,
|
|
153
|
-
img_t.img.localpath,
|
|
154
|
-
]
|
|
155
|
-
|
|
156
|
-
@pytest.fixture(scope='function')
|
|
157
|
-
def small_img_tbl(test_client: pxt.Client) -> catalog.Table:
|
|
158
|
-
cl = test_client
|
|
159
|
-
schema = {
|
|
160
|
-
'img': ImageType(nullable=False),
|
|
161
|
-
'category': StringType(nullable=False),
|
|
162
|
-
'split': StringType(nullable=False),
|
|
163
|
-
}
|
|
164
|
-
tbl = cl.create_table('test_indexed_img_tbl', schema)
|
|
165
|
-
rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
|
|
166
|
-
# select output_rows randomly in the hope of getting a good sample of the available categories
|
|
167
|
-
rng = np.random.default_rng(17)
|
|
168
|
-
idxs = rng.choice(np.arange(len(rows)), size=40, replace=False)
|
|
169
|
-
rows = [rows[i] for i in idxs]
|
|
170
|
-
tbl.insert(rows)
|
|
171
|
-
return tbl
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import pixeltable as pxt
|
|
2
|
-
from pixeltable.tests.utils import skip_test_if_not_installed, get_image_files, validate_update_status
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class TestYolox:
|
|
6
|
-
|
|
7
|
-
def test_yolox(self, test_client: pxt.Client):
|
|
8
|
-
skip_test_if_not_installed('yolox')
|
|
9
|
-
from pixeltable.ext.functions.yolox import yolox
|
|
10
|
-
cl = test_client
|
|
11
|
-
t = cl.create_table('yolox_test', {'image': pxt.ImageType()})
|
|
12
|
-
t['detect_yolox_tiny'] = yolox(t.image, model_id='yolox_tiny')
|
|
13
|
-
t['detect_yolox_nano'] = yolox(t.image, model_id='yolox_nano', threshold=0.2)
|
|
14
|
-
t['yolox_nano_bboxes'] = t.detect_yolox_nano.bboxes
|
|
15
|
-
images = get_image_files()[:10]
|
|
16
|
-
validate_update_status(t.insert({'image': image} for image in images), expected_rows=10)
|
|
17
|
-
rows = t.collect()
|
|
18
|
-
# Verify correctly formed JSON
|
|
19
|
-
assert all(list(result.keys()) == ['bboxes', 'labels', 'scores'] for result in rows['detect_yolox_tiny'])
|
|
20
|
-
# Verify that bboxes are actually present in at least some of the rows.
|
|
21
|
-
assert any(len(bboxes) > 0 for bboxes in rows['yolox_nano_bboxes'])
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import pixeltable as pxt
|
|
4
|
-
import pixeltable.exceptions as excs
|
|
5
|
-
from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@pytest.mark.remote_api
|
|
9
|
-
class TestFireworks:
|
|
10
|
-
|
|
11
|
-
def test_fireworks(self, test_client: pxt.Client) -> None:
|
|
12
|
-
skip_test_if_not_installed('fireworks')
|
|
13
|
-
TestFireworks.skip_test_if_no_fireworks_client()
|
|
14
|
-
cl = test_client
|
|
15
|
-
t = cl.create_table('test_tbl', {'input': pxt.StringType()})
|
|
16
|
-
from pixeltable.functions.fireworks import chat_completions
|
|
17
|
-
messages = [{'role': 'user', 'content': t.input}]
|
|
18
|
-
t['output'] = chat_completions(
|
|
19
|
-
messages=messages,
|
|
20
|
-
model='accounts/fireworks/models/llama-v2-7b-chat'
|
|
21
|
-
)
|
|
22
|
-
t['output_2'] = chat_completions(
|
|
23
|
-
messages=messages,
|
|
24
|
-
model='accounts/fireworks/models/llama-v2-7b-chat',
|
|
25
|
-
max_tokens=300,
|
|
26
|
-
top_k=40,
|
|
27
|
-
top_p=0.9,
|
|
28
|
-
temperature=0.7
|
|
29
|
-
)
|
|
30
|
-
validate_update_status(t.insert(input="How's everything going today?"), 1)
|
|
31
|
-
results = t.collect()
|
|
32
|
-
assert len(results['output'][0]['choices'][0]['message']['content']) > 0
|
|
33
|
-
assert len(results['output_2'][0]['choices'][0]['message']['content']) > 0
|
|
34
|
-
|
|
35
|
-
# This ensures that the test will be skipped, rather than returning an error, when no API key is
|
|
36
|
-
# available (for example, when a PR runs in CI).
|
|
37
|
-
@staticmethod
|
|
38
|
-
def skip_test_if_no_fireworks_client() -> None:
|
|
39
|
-
try:
|
|
40
|
-
import pixeltable.functions.fireworks
|
|
41
|
-
_ = pixeltable.functions.fireworks.fireworks_client()
|
|
42
|
-
except excs.Error as exc:
|
|
43
|
-
pytest.skip(str(exc))
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
import pixeltable as pxt
|
|
2
|
-
from pixeltable import catalog
|
|
3
|
-
from pixeltable.functions.pil.image import blend
|
|
4
|
-
from pixeltable.iterators import FrameIterator
|
|
5
|
-
from pixeltable.tests.utils import get_video_files, skip_test_if_not_installed
|
|
6
|
-
from pixeltable.type_system import VideoType, StringType
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestFunctions:
|
|
10
|
-
def test_pil(self, img_tbl: catalog.Table) -> None:
|
|
11
|
-
t = img_tbl
|
|
12
|
-
_ = t[t.img, t.img.rotate(90), blend(t.img, t.img.rotate(90), 0.5)].show()
|
|
13
|
-
|
|
14
|
-
def test_eval_detections(self, test_client: pxt.Client) -> None:
|
|
15
|
-
skip_test_if_not_installed('nos')
|
|
16
|
-
cl = test_client
|
|
17
|
-
video_t = cl.create_table('video_tbl', {'video': VideoType()})
|
|
18
|
-
# create frame view
|
|
19
|
-
args = {'video': video_t.video, 'fps': 1}
|
|
20
|
-
v = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
|
|
21
|
-
|
|
22
|
-
files = get_video_files()
|
|
23
|
-
video_t.insert(video=files[-1])
|
|
24
|
-
v.add_column(frame_s=v.frame.resize([640, 480]))
|
|
25
|
-
from pixeltable.functions.nos.object_detection_2d import yolox_nano, yolox_small, yolox_large
|
|
26
|
-
v.add_column(detections_a=yolox_nano(v.frame_s))
|
|
27
|
-
v.add_column(detections_b=yolox_small(v.frame_s))
|
|
28
|
-
v.add_column(gt=yolox_large(v.frame_s))
|
|
29
|
-
from pixeltable.functions.eval import eval_detections, mean_ap
|
|
30
|
-
res = v.select(
|
|
31
|
-
eval_detections(
|
|
32
|
-
v.detections_a.bboxes, v.detections_a.labels, v.detections_a.scores, v.gt.bboxes, v.gt.labels
|
|
33
|
-
)).show()
|
|
34
|
-
v.add_column(
|
|
35
|
-
eval_a=eval_detections(
|
|
36
|
-
v.detections_a.bboxes, v.detections_a.labels, v.detections_a.scores, v.gt.bboxes, v.gt.labels))
|
|
37
|
-
v.add_column(
|
|
38
|
-
eval_b=eval_detections(
|
|
39
|
-
v.detections_b.bboxes, v.detections_b.labels, v.detections_b.scores, v.gt.bboxes, v.gt.labels))
|
|
40
|
-
ap_a = v.select(mean_ap(v.eval_a)).show()[0, 0]
|
|
41
|
-
ap_b = v.select(mean_ap(v.eval_b)).show()[0, 0]
|
|
42
|
-
common_classes = set(ap_a.keys()) & set(ap_b.keys())
|
|
43
|
-
|
|
44
|
-
## TODO: following assertion is failing on CI,
|
|
45
|
-
# It is not necessarily a bug, as assert codition is not expected to be always true
|
|
46
|
-
# for k in common_classes:
|
|
47
|
-
# assert ap_a[k] <= ap_b[k]
|
|
48
|
-
|
|
49
|
-
def test_str(self, test_client: pxt.Client) -> None:
|
|
50
|
-
cl = test_client
|
|
51
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
52
|
-
from pixeltable.functions.string import str_format
|
|
53
|
-
t.add_column(s1=str_format('ABC {0}', t.input))
|
|
54
|
-
t.add_column(s2=str_format('DEF {this}', this=t.input))
|
|
55
|
-
t.add_column(s3=str_format('GHI {0} JKL {this}', t.input, this=t.input))
|
|
56
|
-
status = t.insert(input='MNO')
|
|
57
|
-
assert status.num_rows == 1
|
|
58
|
-
assert status.num_excs == 0
|
|
59
|
-
row = t.head()[0]
|
|
60
|
-
assert row == {'input': 'MNO', 's1': 'ABC MNO', 's2': 'DEF MNO', 's3': 'GHI MNO JKL MNO'}
|