pixeltable 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/catalog/column.py +26 -49
- pixeltable/catalog/insertable_table.py +7 -4
- pixeltable/catalog/table.py +163 -57
- pixeltable/catalog/table_version.py +416 -140
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/client.py +72 -6
- pixeltable/dataframe.py +65 -21
- pixeltable/env.py +52 -53
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/in_memory_data_node.py +11 -7
- pixeltable/exprs/comparison.py +3 -3
- pixeltable/exprs/data_row.py +5 -1
- pixeltable/exprs/literal.py +16 -4
- pixeltable/exprs/row_builder.py +8 -40
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/aggregate_function.py +15 -15
- pixeltable/func/expr_template_function.py +9 -1
- pixeltable/func/globals.py +24 -14
- pixeltable/func/signature.py +18 -12
- pixeltable/func/udf.py +7 -2
- pixeltable/functions/__init__.py +9 -9
- pixeltable/functions/eval.py +7 -8
- pixeltable/functions/fireworks.py +10 -37
- pixeltable/functions/huggingface.py +47 -19
- pixeltable/functions/openai.py +192 -24
- pixeltable/functions/together.py +104 -9
- pixeltable/functions/util.py +11 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +49 -0
- pixeltable/index/embedding_index.py +95 -0
- pixeltable/metadata/schema.py +45 -22
- pixeltable/plan.py +15 -34
- pixeltable/store.py +38 -41
- pixeltable/tests/conftest.py +8 -14
- pixeltable/tests/ext/test_yolox.py +21 -0
- pixeltable/tests/functions/test_fireworks.py +43 -0
- pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable/tests/{test_functions.py → functions/test_huggingface.py} +7 -143
- pixeltable/tests/functions/test_openai.py +162 -0
- pixeltable/tests/functions/test_together.py +112 -0
- pixeltable/tests/test_component_view.py +14 -5
- pixeltable/tests/test_dataframe.py +23 -22
- pixeltable/tests/test_exprs.py +99 -102
- pixeltable/tests/test_function.py +51 -43
- pixeltable/tests/test_index.py +138 -0
- pixeltable/tests/test_migration.py +2 -1
- pixeltable/tests/test_snapshot.py +24 -1
- pixeltable/tests/test_table.py +205 -26
- pixeltable/tests/test_types.py +30 -0
- pixeltable/tests/test_video.py +16 -16
- pixeltable/tests/test_view.py +5 -0
- pixeltable/tests/utils.py +171 -14
- pixeltable/tool/create_test_db_dump.py +16 -0
- pixeltable/type_system.py +77 -128
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/hf_datasets.py +157 -0
- pixeltable/utils/parquet.py +68 -27
- pixeltable/utils/pytorch.py +16 -97
- {pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/METADATA +35 -28
- {pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/RECORD +63 -50
- {pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0
pixeltable/tests/test_table.py
CHANGED
|
@@ -8,6 +8,7 @@ import PIL
|
|
|
8
8
|
import cv2
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
|
+
import pathlib
|
|
11
12
|
import pytest
|
|
12
13
|
|
|
13
14
|
import pixeltable as pxt
|
|
@@ -17,7 +18,7 @@ from pixeltable import exceptions as excs
|
|
|
17
18
|
from pixeltable.iterators import FrameIterator
|
|
18
19
|
from pixeltable.tests.utils import \
|
|
19
20
|
make_tbl, create_table_data, read_data_file, get_video_files, get_audio_files, get_image_files, get_documents, \
|
|
20
|
-
assert_resultset_eq
|
|
21
|
+
assert_resultset_eq, assert_hf_dataset_equal, make_test_arrow_table, validate_update_status
|
|
21
22
|
from pixeltable.tests.utils import skip_test_if_not_installed
|
|
22
23
|
from pixeltable.type_system import \
|
|
23
24
|
StringType, IntType, FloatType, TimestampType, ImageType, VideoType, JsonType, BoolType, ArrayType, AudioType, \
|
|
@@ -25,7 +26,6 @@ from pixeltable.type_system import \
|
|
|
25
26
|
from pixeltable.utils.filecache import FileCache
|
|
26
27
|
from pixeltable.utils.media_store import MediaStore
|
|
27
28
|
|
|
28
|
-
|
|
29
29
|
class TestTable:
|
|
30
30
|
# exc for a % 10 == 0
|
|
31
31
|
@pxt.udf(return_type=FloatType(), param_types=[IntType()])
|
|
@@ -41,6 +41,21 @@ class TestTable:
|
|
|
41
41
|
def add1(a: int) -> int:
|
|
42
42
|
return a + 1
|
|
43
43
|
|
|
44
|
+
@pxt.uda(
|
|
45
|
+
update_types=[IntType()], value_type=IntType(), requires_order_by=True,
|
|
46
|
+
allows_window=True)
|
|
47
|
+
class window_fn:
|
|
48
|
+
def __init__(self):
|
|
49
|
+
pass
|
|
50
|
+
def update(self, i: int) -> None:
|
|
51
|
+
pass
|
|
52
|
+
def value(self) -> int:
|
|
53
|
+
return 1
|
|
54
|
+
|
|
55
|
+
@pxt.expr_udf(param_types=[IntType(nullable=False)])
|
|
56
|
+
def add1(a: int) -> int:
|
|
57
|
+
return a + 1
|
|
58
|
+
|
|
44
59
|
def test_create(self, test_client: pxt.Client) -> None:
|
|
45
60
|
cl = test_client
|
|
46
61
|
cl.create_dir('dir1')
|
|
@@ -56,7 +71,7 @@ class TestTable:
|
|
|
56
71
|
with pytest.raises(excs.Error):
|
|
57
72
|
_ = cl.create_table('1test', schema)
|
|
58
73
|
with pytest.raises(excs.Error):
|
|
59
|
-
_ =
|
|
74
|
+
_ = cl.create_table('bad name', schema={'c1': StringType()})
|
|
60
75
|
with pytest.raises(excs.Error):
|
|
61
76
|
_ = cl.create_table('test', schema)
|
|
62
77
|
with pytest.raises(excs.Error):
|
|
@@ -116,6 +131,100 @@ class TestTable:
|
|
|
116
131
|
tbl.revert()
|
|
117
132
|
assert tbl.num_retained_versions == num_retained_versions
|
|
118
133
|
|
|
134
|
+
def test_import_parquet(self, test_client: pxt.Client, tmp_path: pathlib.Path) -> None:
|
|
135
|
+
skip_test_if_not_installed('pyarrow')
|
|
136
|
+
import pyarrow as pa
|
|
137
|
+
from pixeltable.utils.arrow import iter_tuples
|
|
138
|
+
|
|
139
|
+
parquet_dir = tmp_path / 'test_data'
|
|
140
|
+
parquet_dir.mkdir()
|
|
141
|
+
make_test_arrow_table(parquet_dir)
|
|
142
|
+
|
|
143
|
+
tab = test_client.import_parquet('test_parquet', parquet_path=str(parquet_dir))
|
|
144
|
+
assert 'test_parquet' in test_client.list_tables()
|
|
145
|
+
assert tab is not None
|
|
146
|
+
num_elts = tab.count()
|
|
147
|
+
arrow_tab: pa.Table = pa.parquet.read_table(str(parquet_dir))
|
|
148
|
+
assert num_elts == arrow_tab.num_rows
|
|
149
|
+
assert set(tab.column_names()) == set(arrow_tab.column_names)
|
|
150
|
+
|
|
151
|
+
result_set = tab.order_by(tab.c_id).collect()
|
|
152
|
+
column_types = tab.column_types()
|
|
153
|
+
|
|
154
|
+
for tup, arrow_tup in zip(result_set, iter_tuples(arrow_tab)):
|
|
155
|
+
assert tup['c_id'] == arrow_tup['c_id']
|
|
156
|
+
for col, val in tup.items():
|
|
157
|
+
if val is None:
|
|
158
|
+
assert arrow_tup[col] is None
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
if column_types[col].is_array_type():
|
|
162
|
+
assert (val == arrow_tup[col]).all()
|
|
163
|
+
else:
|
|
164
|
+
assert val == arrow_tup[col]
|
|
165
|
+
|
|
166
|
+
def test_import_huggingface_dataset(self, test_client: pxt.Client, tmp_path: pathlib.Path) -> None:
|
|
167
|
+
skip_test_if_not_installed('datasets')
|
|
168
|
+
import datasets
|
|
169
|
+
|
|
170
|
+
test_cases = [
|
|
171
|
+
# { # includes a timestamp. 20MB for specific slice
|
|
172
|
+
# Disbled this test case because download is failing, and its not critical.
|
|
173
|
+
# 'dataset_name': 'c4',
|
|
174
|
+
# # see https://huggingface.co/datasets/allenai/c4/blob/main/realnewslike/c4-train.00000-of-00512.json.gz
|
|
175
|
+
# 'dataset': datasets.load_dataset(
|
|
176
|
+
# "allenai/c4",
|
|
177
|
+
# data_dir="realnewslike",
|
|
178
|
+
# data_files="c4-train.00000-of-00512.json.gz",
|
|
179
|
+
# split='train[:1000]',
|
|
180
|
+
# cache_dir=tmp_path
|
|
181
|
+
# ),
|
|
182
|
+
# },
|
|
183
|
+
{ # includes an embedding (array type), common in a few RAG datasets.
|
|
184
|
+
'dataset_name': 'cohere_wikipedia',
|
|
185
|
+
'dataset': datasets.load_dataset("Cohere/wikipedia-2023-11-embed-multilingual-v3",
|
|
186
|
+
data_dir='cr').select_columns(['url', 'title', 'text', 'emb']),
|
|
187
|
+
# column with name `_id`` is not currently allowed by pixeltable rules,
|
|
188
|
+
# so filter out that column.
|
|
189
|
+
# cr subdir has a small number of rows, avoid running out of space in CI runner
|
|
190
|
+
# see https://huggingface.co/datasets/Cohere/wikipedia-2023-11-embed-multilingual-v3/tree/main/cr
|
|
191
|
+
'schema_override': {'emb': ArrayType((1024,), dtype=FloatType(), nullable=False)}
|
|
192
|
+
},
|
|
193
|
+
# example of dataset dictionary with multiple splits
|
|
194
|
+
{
|
|
195
|
+
'dataset_name': 'rotten_tomatoes',
|
|
196
|
+
'dataset': datasets.load_dataset("rotten_tomatoes"),
|
|
197
|
+
},
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
# test a column name for splits other than the default of 'split'
|
|
201
|
+
split_column_name = 'my_split_col'
|
|
202
|
+
for rec in test_cases:
|
|
203
|
+
dataset_name = rec['dataset_name']
|
|
204
|
+
hf_dataset = rec['dataset']
|
|
205
|
+
|
|
206
|
+
tab = test_client.import_huggingface_dataset(
|
|
207
|
+
dataset_name,
|
|
208
|
+
hf_dataset,
|
|
209
|
+
column_name_for_split=split_column_name,
|
|
210
|
+
schema_override=rec.get('schema_override', None),
|
|
211
|
+
)
|
|
212
|
+
if isinstance(hf_dataset, datasets.Dataset):
|
|
213
|
+
assert_hf_dataset_equal(hf_dataset, tab.df(), split_column_name)
|
|
214
|
+
elif isinstance(hf_dataset, datasets.DatasetDict):
|
|
215
|
+
assert tab.count() == sum(hf_dataset.num_rows.values())
|
|
216
|
+
assert split_column_name in tab.column_names()
|
|
217
|
+
|
|
218
|
+
for dataset_name in hf_dataset:
|
|
219
|
+
df = tab.where(tab.my_split_col == dataset_name)
|
|
220
|
+
assert_hf_dataset_equal(hf_dataset[dataset_name], df, split_column_name)
|
|
221
|
+
else:
|
|
222
|
+
assert False
|
|
223
|
+
|
|
224
|
+
with pytest.raises(excs.Error) as exc_info:
|
|
225
|
+
test_client.import_huggingface_dataset('test', {})
|
|
226
|
+
assert 'type(dataset)' in str(exc_info.value)
|
|
227
|
+
|
|
119
228
|
def test_image_table(self, test_client: pxt.Client) -> None:
|
|
120
229
|
n_sample_rows = 20
|
|
121
230
|
cl = test_client
|
|
@@ -205,10 +314,6 @@ class TestTable:
|
|
|
205
314
|
cl.create_table('test', {'c1': {'type': StringType(), 'stored': 'true'}})
|
|
206
315
|
assert '"stored" must be a bool' in str(exc_info.value)
|
|
207
316
|
|
|
208
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
209
|
-
cl.create_table('test', {'c1': {'type': StringType(), 'indexed': 'true'}})
|
|
210
|
-
assert '"indexed" must be a bool' in str(exc_info.value)
|
|
211
|
-
|
|
212
317
|
with pytest.raises(excs.Error) as exc_info:
|
|
213
318
|
cl.create_table('test', {'c1': StringType()}, primary_key='c2')
|
|
214
319
|
assert 'primary key column c2 not found' in str(exc_info.value).lower()
|
|
@@ -407,18 +512,8 @@ class TestTable:
|
|
|
407
512
|
# a non-materialized column that refers to another non-materialized column
|
|
408
513
|
view.add_column(c4=view.c2.rotate(60), stored=False)
|
|
409
514
|
|
|
410
|
-
@pxt.uda(
|
|
411
|
-
name='window_fn', update_types=[IntType()], value_type=IntType(), requires_order_by = True,
|
|
412
|
-
allows_window = True)
|
|
413
|
-
class WindowFnAggregator:
|
|
414
|
-
def __init__(self):
|
|
415
|
-
pass
|
|
416
|
-
def update(self, i: int) -> None:
|
|
417
|
-
pass
|
|
418
|
-
def value(self) -> int:
|
|
419
|
-
return 1
|
|
420
515
|
# cols computed with window functions are stored by default
|
|
421
|
-
view.add_column(c5=window_fn(view.frame_idx, 1, group_by=view.video))
|
|
516
|
+
view.add_column(c5=self.window_fn(view.frame_idx, 1, group_by=view.video))
|
|
422
517
|
|
|
423
518
|
# reload to make sure that metadata gets restored correctly
|
|
424
519
|
cl = pxt.Client(reload=True)
|
|
@@ -459,6 +554,23 @@ class TestTable:
|
|
|
459
554
|
cl.drop_table('test_tbl')
|
|
460
555
|
assert MediaStore.count(view.get_id()) == 0
|
|
461
556
|
|
|
557
|
+
def test_insert_nulls(self, test_client: pxt.Client) -> None:
|
|
558
|
+
cl = test_client
|
|
559
|
+
schema = {
|
|
560
|
+
'c1': StringType(nullable=True),
|
|
561
|
+
'c2': IntType(nullable=True),
|
|
562
|
+
'c3': FloatType(nullable=True),
|
|
563
|
+
'c4': BoolType(nullable=True),
|
|
564
|
+
'c5': ArrayType((2, 3), dtype=IntType(), nullable=True),
|
|
565
|
+
'c6': JsonType(nullable=True),
|
|
566
|
+
'c7': ImageType(nullable=True),
|
|
567
|
+
'c8': VideoType(nullable=True),
|
|
568
|
+
}
|
|
569
|
+
t = cl.create_table('test1', schema)
|
|
570
|
+
status = t.insert(c1='abc')
|
|
571
|
+
assert status.num_rows == 1
|
|
572
|
+
assert status.num_excs == 0
|
|
573
|
+
|
|
462
574
|
def test_insert(self, test_client: pxt.Client) -> None:
|
|
463
575
|
cl = test_client
|
|
464
576
|
schema = {
|
|
@@ -533,6 +645,15 @@ class TestTable:
|
|
|
533
645
|
t.insert(c5=np.ndarray((3, 2)))
|
|
534
646
|
assert 'expected ndarray((2, 3)' in str(exc_info.value)
|
|
535
647
|
|
|
648
|
+
def test_insert_string_with_null(self, test_client: pxt.Client) -> None:
|
|
649
|
+
cl = test_client
|
|
650
|
+
t = cl.create_table('test', {'c1': StringType()})
|
|
651
|
+
|
|
652
|
+
t.insert([{'c1': 'this is a python\x00string'}])
|
|
653
|
+
assert t.count() == 1
|
|
654
|
+
for tup in t.df().collect():
|
|
655
|
+
assert tup['c1'] == 'this is a python string'
|
|
656
|
+
|
|
536
657
|
def test_query(self, test_client: pxt.Client) -> None:
|
|
537
658
|
skip_test_if_not_installed('boto3')
|
|
538
659
|
cl = test_client
|
|
@@ -547,7 +668,63 @@ class TestTable:
|
|
|
547
668
|
t2 = cl.get_table('test')
|
|
548
669
|
_ = t2.show(n=0)
|
|
549
670
|
|
|
550
|
-
def
|
|
671
|
+
def test_batch_update(self, test_tbl: pxt.Table) -> None:
|
|
672
|
+
t = test_tbl
|
|
673
|
+
validate_update_status(
|
|
674
|
+
t.batch_update([{'c1': '1', 'c2': 1}, {'c1': '2', 'c2': 2}]),
|
|
675
|
+
expected_rows=2)
|
|
676
|
+
assert t.where(t.c2 == 1).collect()[0]['c1'] == '1'
|
|
677
|
+
assert t.where(t.c2 == 2).collect()[0]['c1'] == '2'
|
|
678
|
+
validate_update_status(
|
|
679
|
+
t.batch_update([{'c1': 'one', '_rowid': (1,)}, {'c1': 'two', '_rowid': (2,)}]),
|
|
680
|
+
expected_rows=2)
|
|
681
|
+
assert t.where(t.c2 == 1).collect()[0]['c1'] == 'one'
|
|
682
|
+
assert t.where(t.c2 == 2).collect()[0]['c1'] == 'two'
|
|
683
|
+
|
|
684
|
+
cl = pxt.Client()
|
|
685
|
+
# test composite primary key
|
|
686
|
+
schema = {'c1': StringType(), 'c2': IntType(), 'c3': FloatType()}
|
|
687
|
+
t = cl.create_table('composite', schema=schema, primary_key=['c1', 'c2'])
|
|
688
|
+
rows = [{'c1': str(i), 'c2': i, 'c3': float(i)} for i in range(10)]
|
|
689
|
+
validate_update_status(t.insert(rows), expected_rows=10)
|
|
690
|
+
|
|
691
|
+
validate_update_status(
|
|
692
|
+
t.batch_update([{'c1': '1', 'c2': 1, 'c3': 2.0}, {'c1': '2', 'c2': 2, 'c3': 3.0}]),
|
|
693
|
+
expected_rows=2)
|
|
694
|
+
|
|
695
|
+
with pytest.raises(excs.Error) as exc_info:
|
|
696
|
+
# can't mix _rowid with primary key
|
|
697
|
+
_ = t.batch_update([{'c1': '1', 'c2': 1, 'c3': 2.0, '_rowid': (1,)}])
|
|
698
|
+
assert 'c1 is a primary key column' in str(exc_info.value).lower()
|
|
699
|
+
|
|
700
|
+
with pytest.raises(excs.Error) as exc_info:
|
|
701
|
+
# bad literal
|
|
702
|
+
_ = t.batch_update([{'c2': 1, 'c3': 'a'}])
|
|
703
|
+
assert "'a' is not a valid literal" in str(exc_info.value).lower()
|
|
704
|
+
|
|
705
|
+
with pytest.raises(excs.Error) as exc_info:
|
|
706
|
+
# missing primary key column
|
|
707
|
+
t.batch_update([{'c1': '1', 'c3': 2.0}])
|
|
708
|
+
assert 'primary key columns (c2) missing' in str(exc_info.value).lower()
|
|
709
|
+
|
|
710
|
+
# table without primary key
|
|
711
|
+
t2 = cl.create_table('no_pk', schema=schema)
|
|
712
|
+
validate_update_status(t2.insert(rows), expected_rows=10)
|
|
713
|
+
with pytest.raises(excs.Error) as exc_info:
|
|
714
|
+
_ = t2.batch_update([{'c1': '1', 'c2': 1, 'c3': 2.0}])
|
|
715
|
+
assert 'must have primary key for batch update' in str(exc_info.value).lower()
|
|
716
|
+
|
|
717
|
+
# updating with _rowid still works
|
|
718
|
+
validate_update_status(
|
|
719
|
+
t2.batch_update([{'c1': 'one', '_rowid': (1,)}, {'c1': 'two', '_rowid': (2,)}]),
|
|
720
|
+
expected_rows=2)
|
|
721
|
+
assert t2.where(t2.c2 == 1).collect()[0]['c1'] == 'one'
|
|
722
|
+
assert t2.where(t2.c2 == 2).collect()[0]['c1'] == 'two'
|
|
723
|
+
with pytest.raises(AssertionError):
|
|
724
|
+
# some rows are missing rowids
|
|
725
|
+
_ = t2.batch_update([{'c1': 'one', '_rowid': (1,)}, {'c1': 'two'}])
|
|
726
|
+
|
|
727
|
+
def test_update(self, test_tbl: pxt.Table, small_img_tbl) -> None:
|
|
551
728
|
t = test_tbl
|
|
552
729
|
# update every type with a literal
|
|
553
730
|
test_cases = [
|
|
@@ -652,7 +829,7 @@ class TestTable:
|
|
|
652
829
|
t.update({'c3': 1.0}, where=lambda c2: c2 == 10)
|
|
653
830
|
assert 'Predicate' in str(excinfo.value)
|
|
654
831
|
|
|
655
|
-
img_t =
|
|
832
|
+
img_t = small_img_tbl
|
|
656
833
|
|
|
657
834
|
# can't update image col
|
|
658
835
|
with pytest.raises(excs.Error) as excinfo:
|
|
@@ -682,7 +859,7 @@ class TestTable:
|
|
|
682
859
|
r2 = t.where(t.c2 < 5).select(t.c3, t.c10, t.d1, t.d2).order_by(t.c2).show(0)
|
|
683
860
|
assert_resultset_eq(r1, r2)
|
|
684
861
|
|
|
685
|
-
def test_delete(self, test_tbl: pxt.Table,
|
|
862
|
+
def test_delete(self, test_tbl: pxt.Table, small_img_tbl) -> None:
|
|
686
863
|
t = test_tbl
|
|
687
864
|
|
|
688
865
|
cnt = t.where(t.c3 < 10.0).count()
|
|
@@ -710,7 +887,7 @@ class TestTable:
|
|
|
710
887
|
t.delete(where=lambda c2: c2 == 10)
|
|
711
888
|
assert 'Predicate' in str(excinfo.value)
|
|
712
889
|
|
|
713
|
-
img_t =
|
|
890
|
+
img_t = small_img_tbl
|
|
714
891
|
# similarity search is not supported
|
|
715
892
|
with pytest.raises(excs.Error) as excinfo:
|
|
716
893
|
img_t.delete(where=img_t.img.nearest('car'))
|
|
@@ -876,6 +1053,10 @@ class TestTable:
|
|
|
876
1053
|
t2.revert()
|
|
877
1054
|
assert MediaStore.count(t2.get_id()) == t2.count() * stores_img_col
|
|
878
1055
|
|
|
1056
|
+
@pxt.udf(return_type=ImageType(), param_types=[ImageType()])
|
|
1057
|
+
def img_fn_with_exc(img: PIL.Image.Image) -> PIL.Image.Image:
|
|
1058
|
+
raise RuntimeError
|
|
1059
|
+
|
|
879
1060
|
def test_computed_img_cols(self, test_client: pxt.Client) -> None:
|
|
880
1061
|
cl = test_client
|
|
881
1062
|
schema = {'img': ImageType(nullable=False)}
|
|
@@ -893,10 +1074,7 @@ class TestTable:
|
|
|
893
1074
|
|
|
894
1075
|
# computed img col with exceptions
|
|
895
1076
|
t = cl.create_table('test3', schema)
|
|
896
|
-
|
|
897
|
-
def f(img: PIL.Image.Image) -> PIL.Image.Image:
|
|
898
|
-
raise RuntimeError
|
|
899
|
-
t.add_column(c3=f(t.img), stored=True)
|
|
1077
|
+
t.add_column(c3=self.img_fn_with_exc(t.img), stored=True)
|
|
900
1078
|
rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
|
|
901
1079
|
rows = [{'img': r['img']} for r in rows[:20]]
|
|
902
1080
|
t.insert(rows, fail_on_exception=False)
|
|
@@ -1108,6 +1286,7 @@ class TestTable:
|
|
|
1108
1286
|
check_rename(t, 'c1_renamed', 'c1')
|
|
1109
1287
|
|
|
1110
1288
|
# revert() works
|
|
1289
|
+
_ = t.select(t.c1_renamed).collect()
|
|
1111
1290
|
t.revert()
|
|
1112
1291
|
_ = t.select(t.c1).collect()
|
|
1113
1292
|
#check_rename(t, 'c1', 'c1_renamed')
|
pixeltable/tests/test_types.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from copy import copy
|
|
3
|
+
from typing import List, Dict, Optional
|
|
4
|
+
|
|
1
5
|
from pixeltable.type_system import \
|
|
2
6
|
ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
|
|
3
7
|
|
|
@@ -20,3 +24,29 @@ class TestTypes:
|
|
|
20
24
|
t_serialized = t.serialize()
|
|
21
25
|
t_deserialized = ColumnType.deserialize(t_serialized)
|
|
22
26
|
assert t == t_deserialized
|
|
27
|
+
|
|
28
|
+
def test_from_python_type(self) -> None:
|
|
29
|
+
test_cases = {
|
|
30
|
+
str: StringType(),
|
|
31
|
+
int: IntType(),
|
|
32
|
+
float: FloatType(),
|
|
33
|
+
bool: BoolType(),
|
|
34
|
+
datetime.date: TimestampType(),
|
|
35
|
+
datetime.datetime: TimestampType(),
|
|
36
|
+
list: JsonType(),
|
|
37
|
+
dict: JsonType(),
|
|
38
|
+
list[int]: JsonType(),
|
|
39
|
+
list[dict[str, int]]: JsonType(),
|
|
40
|
+
dict[int, str]: JsonType(),
|
|
41
|
+
dict[dict[str, int], list[int]]: JsonType(),
|
|
42
|
+
List: JsonType(),
|
|
43
|
+
Dict: JsonType(),
|
|
44
|
+
List[int]: JsonType(),
|
|
45
|
+
List[Dict[str, int]]: JsonType(),
|
|
46
|
+
Dict[int, str]: JsonType()
|
|
47
|
+
}
|
|
48
|
+
for py_type, pxt_type in test_cases.items():
|
|
49
|
+
assert ColumnType.from_python_type(py_type) == pxt_type
|
|
50
|
+
opt_pxt_type = copy(pxt_type)
|
|
51
|
+
opt_pxt_type.nullable = True
|
|
52
|
+
assert ColumnType.from_python_type(Optional[py_type]) == opt_pxt_type
|
pixeltable/tests/test_video.py
CHANGED
|
@@ -107,6 +107,18 @@ class TestVideo:
|
|
|
107
107
|
base_t.insert({'video': p} for p in video_filepaths)
|
|
108
108
|
_ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
|
|
109
109
|
|
|
110
|
+
# window function that simply passes through the frame
|
|
111
|
+
@pxt.uda(
|
|
112
|
+
update_types=[ImageType()], value_type=ImageType(),
|
|
113
|
+
requires_order_by=True, allows_std_agg=False, allows_window=True)
|
|
114
|
+
class agg_fn:
|
|
115
|
+
def __init__(self):
|
|
116
|
+
self.img = None
|
|
117
|
+
def update(self, frame: PIL.Image.Image) -> None:
|
|
118
|
+
self.img = frame
|
|
119
|
+
def value(self) -> PIL.Image.Image:
|
|
120
|
+
return self.img
|
|
121
|
+
|
|
110
122
|
def test_make_video(self, test_client: pxt.Client) -> None:
|
|
111
123
|
video_filepaths = get_video_files()
|
|
112
124
|
cl = test_client
|
|
@@ -131,29 +143,17 @@ class TestVideo:
|
|
|
131
143
|
make_video(view_t.pos, view_t.frame),
|
|
132
144
|
make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
|
|
133
145
|
|
|
134
|
-
# window function that simply passes through the frame
|
|
135
|
-
@pxt.uda(
|
|
136
|
-
update_types=[ImageType()], value_type=ImageType(), name='agg_fn',
|
|
137
|
-
requires_order_by=True, allows_std_agg=False, allows_window=True)
|
|
138
|
-
class WindowAgg:
|
|
139
|
-
def __init__(self):
|
|
140
|
-
self.img = None
|
|
141
|
-
def update(self, frame: PIL.Image.Image) -> None:
|
|
142
|
-
self.img = frame
|
|
143
|
-
def value(self) -> PIL.Image.Image:
|
|
144
|
-
return self.img
|
|
145
|
-
|
|
146
146
|
# make sure it works
|
|
147
|
-
_ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
|
|
148
|
-
status = view_t.add_column(agg=agg_fn(view_t.pos, view_t.frame, group_by=base_t))
|
|
147
|
+
_ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
|
|
148
|
+
status = view_t.add_column(agg=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t))
|
|
149
149
|
assert status.num_excs == 0
|
|
150
150
|
_ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
|
|
151
151
|
|
|
152
152
|
# image cols computed with a window function currently need to be stored
|
|
153
153
|
with pytest.raises(excs.Error):
|
|
154
|
-
view_t.add_column(agg2=agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
|
|
154
|
+
view_t.add_column(agg2=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
|
|
155
155
|
|
|
156
156
|
# reload from store
|
|
157
157
|
cl = pxt.Client(reload=True)
|
|
158
158
|
base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
|
|
159
|
-
_ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
|
|
159
|
+
_ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
|
pixeltable/tests/test_view.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import logging
|
|
2
3
|
|
|
3
4
|
import PIL
|
|
@@ -414,6 +415,10 @@ class TestView:
|
|
|
414
415
|
v.order_by(v.c2).show(0),
|
|
415
416
|
t.where(t.c2 < 10).order_by(t.c2).show(0))
|
|
416
417
|
|
|
418
|
+
# create views with filters containing date and datetime
|
|
419
|
+
_ = cl.create_view('test_view_2', t, filter=t.c5 >= datetime.date.today())
|
|
420
|
+
_ = cl.create_view('test_view_3', t, filter=t.c5 < datetime.datetime.now())
|
|
421
|
+
|
|
417
422
|
def test_view_of_snapshot(self, test_client: pxt.Client) -> None:
|
|
418
423
|
"""Test view over a snapshot"""
|
|
419
424
|
cl = test_client
|