pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +31 -50
  4. pixeltable/catalog/insertable_table.py +7 -6
  5. pixeltable/catalog/table.py +171 -57
  6. pixeltable/catalog/table_version.py +417 -140
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/dataframe.py +239 -121
  9. pixeltable/env.py +82 -16
  10. pixeltable/exec/__init__.py +2 -1
  11. pixeltable/exec/cache_prefetch_node.py +1 -1
  12. pixeltable/exec/data_row_batch.py +6 -7
  13. pixeltable/exec/expr_eval_node.py +28 -28
  14. pixeltable/exec/in_memory_data_node.py +11 -7
  15. pixeltable/exec/sql_scan_node.py +7 -6
  16. pixeltable/exprs/__init__.py +4 -3
  17. pixeltable/exprs/column_ref.py +9 -0
  18. pixeltable/exprs/comparison.py +3 -3
  19. pixeltable/exprs/data_row.py +5 -1
  20. pixeltable/exprs/expr.py +15 -7
  21. pixeltable/exprs/function_call.py +17 -15
  22. pixeltable/exprs/image_member_access.py +9 -28
  23. pixeltable/exprs/in_predicate.py +96 -0
  24. pixeltable/exprs/inline_array.py +13 -11
  25. pixeltable/exprs/inline_dict.py +15 -13
  26. pixeltable/exprs/literal.py +16 -4
  27. pixeltable/exprs/row_builder.py +15 -41
  28. pixeltable/exprs/similarity_expr.py +65 -0
  29. pixeltable/ext/__init__.py +5 -0
  30. pixeltable/ext/functions/yolox.py +92 -0
  31. pixeltable/func/__init__.py +0 -2
  32. pixeltable/func/aggregate_function.py +18 -15
  33. pixeltable/func/callable_function.py +57 -13
  34. pixeltable/func/expr_template_function.py +20 -3
  35. pixeltable/func/function.py +35 -4
  36. pixeltable/func/globals.py +24 -14
  37. pixeltable/func/signature.py +23 -27
  38. pixeltable/func/udf.py +13 -12
  39. pixeltable/functions/__init__.py +8 -8
  40. pixeltable/functions/eval.py +7 -8
  41. pixeltable/functions/huggingface.py +64 -17
  42. pixeltable/functions/openai.py +36 -3
  43. pixeltable/functions/pil/image.py +61 -64
  44. pixeltable/functions/together.py +21 -0
  45. pixeltable/functions/util.py +11 -0
  46. pixeltable/globals.py +425 -0
  47. pixeltable/index/__init__.py +2 -0
  48. pixeltable/index/base.py +51 -0
  49. pixeltable/index/embedding_index.py +168 -0
  50. pixeltable/io/__init__.py +3 -0
  51. pixeltable/{utils → io}/hf_datasets.py +48 -17
  52. pixeltable/io/pandas.py +148 -0
  53. pixeltable/{utils → io}/parquet.py +58 -33
  54. pixeltable/iterators/__init__.py +1 -1
  55. pixeltable/iterators/base.py +4 -0
  56. pixeltable/iterators/document.py +218 -97
  57. pixeltable/iterators/video.py +8 -9
  58. pixeltable/metadata/__init__.py +7 -3
  59. pixeltable/metadata/converters/convert_12.py +3 -0
  60. pixeltable/metadata/converters/convert_13.py +41 -0
  61. pixeltable/metadata/schema.py +45 -22
  62. pixeltable/plan.py +15 -51
  63. pixeltable/store.py +38 -41
  64. pixeltable/tool/create_test_db_dump.py +39 -4
  65. pixeltable/type_system.py +47 -96
  66. pixeltable/utils/documents.py +42 -12
  67. pixeltable/utils/http_server.py +70 -0
  68. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
  69. pixeltable-0.2.6.dist-info/RECORD +119 -0
  70. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  71. pixeltable/client.py +0 -604
  72. pixeltable/exprs/image_similarity_predicate.py +0 -58
  73. pixeltable/func/batched_function.py +0 -53
  74. pixeltable/tests/conftest.py +0 -177
  75. pixeltable/tests/functions/test_fireworks.py +0 -42
  76. pixeltable/tests/functions/test_functions.py +0 -60
  77. pixeltable/tests/functions/test_huggingface.py +0 -158
  78. pixeltable/tests/functions/test_openai.py +0 -152
  79. pixeltable/tests/functions/test_together.py +0 -111
  80. pixeltable/tests/test_audio.py +0 -65
  81. pixeltable/tests/test_catalog.py +0 -27
  82. pixeltable/tests/test_client.py +0 -21
  83. pixeltable/tests/test_component_view.py +0 -370
  84. pixeltable/tests/test_dataframe.py +0 -439
  85. pixeltable/tests/test_dirs.py +0 -107
  86. pixeltable/tests/test_document.py +0 -120
  87. pixeltable/tests/test_exprs.py +0 -805
  88. pixeltable/tests/test_function.py +0 -324
  89. pixeltable/tests/test_migration.py +0 -43
  90. pixeltable/tests/test_nos.py +0 -54
  91. pixeltable/tests/test_snapshot.py +0 -208
  92. pixeltable/tests/test_table.py +0 -1267
  93. pixeltable/tests/test_transactional_directory.py +0 -42
  94. pixeltable/tests/test_types.py +0 -22
  95. pixeltable/tests/test_video.py +0 -159
  96. pixeltable/tests/test_view.py +0 -530
  97. pixeltable/tests/utils.py +0 -408
  98. pixeltable-0.2.4.dist-info/RECORD +0 -132
  99. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
@@ -1,805 +0,0 @@
1
- import json
2
- import urllib.parse
3
- import urllib.request
4
- from typing import List, Dict
5
-
6
- import pytest
7
- import sqlalchemy as sql
8
-
9
- import pixeltable as pxt
10
- import pixeltable.func as func
11
- from pixeltable import catalog
12
- from pixeltable import exceptions as excs
13
- from pixeltable import exprs
14
- from pixeltable.exprs import Expr, ColumnRef
15
- from pixeltable.exprs import RELATIVE_PATH_ROOT as R
16
- from pixeltable.functions import cast, sum, count
17
- from pixeltable.functions.pil.image import blend
18
- from pixeltable.iterators import FrameIterator
19
- from pixeltable.tests.utils import get_image_files, skip_test_if_not_installed
20
- from pixeltable.type_system import StringType, BoolType, IntType, ArrayType, ColumnType, FloatType, \
21
- VideoType
22
-
23
-
24
- class TestExprs:
25
- def test_basic(self, test_tbl: catalog.Table) -> None:
26
- t = test_tbl
27
- assert t['c1'].equals(t.c1)
28
- assert t['c7']['*'].f5.equals(t.c7['*'].f5)
29
-
30
- assert isinstance(t.c1 == None, Expr)
31
- assert isinstance(t.c1 < 'a', Expr)
32
- assert isinstance(t.c1 <= 'a', Expr)
33
- assert isinstance(t.c1 == 'a', Expr)
34
- assert isinstance(t.c1 != 'a', Expr)
35
- assert isinstance(t.c1 > 'a', Expr)
36
- assert isinstance(t.c1 >= 'a', Expr)
37
- assert isinstance((t.c1 == 'a') & (t.c2 < 5), Expr)
38
- assert isinstance((t.c1 == 'a') | (t.c2 < 5), Expr)
39
- assert isinstance(~(t.c1 == 'a'), Expr)
40
- with pytest.raises(AttributeError) as excinfo:
41
- _ = t.does_not_exist
42
- assert 'unknown' in str(excinfo.value).lower()
43
-
44
- def test_compound_predicates(self, test_tbl: catalog.Table) -> None:
45
- t = test_tbl
46
- # compound predicates that can be fully evaluated in SQL
47
- _ = t.where((t.c1 == 'test string') & (t.c6.f1 > 50)).collect()
48
- _ = t.where((t.c1 == 'test string') & (t.c2 > 50)).collect()
49
- e = ((t.c1 == 'test string') & (t.c2 > 50)).sql_expr()
50
- assert len(e.clauses) == 2
51
-
52
- e = ((t.c1 == 'test string') & (t.c2 > 50) & (t.c3 < 1.0)).sql_expr()
53
- assert len(e.clauses) == 3
54
- e = ((t.c1 == 'test string') | (t.c2 > 50)).sql_expr()
55
- assert len(e.clauses) == 2
56
- e = ((t.c1 == 'test string') | (t.c2 > 50) | (t.c3 < 1.0)).sql_expr()
57
- assert len(e.clauses) == 3
58
- e = (~(t.c1 == 'test string')).sql_expr()
59
- assert isinstance(e, sql.sql.expression.BinaryExpression)
60
-
61
- with pytest.raises(TypeError) as exc_info:
62
- _ = t.where((t.c1 == 'test string') or (t.c6.f1 > 50)).collect()
63
- assert 'cannot be used in conjunction with python boolean operators' in str(exc_info.value).lower()
64
-
65
- # compound predicates with Python functions
66
- @pxt.udf(return_type=BoolType(), param_types=[StringType()])
67
- def udf(_: str) -> bool:
68
- return True
69
- @pxt.udf(return_type=BoolType(), param_types=[IntType()])
70
- def udf2(_: int) -> bool:
71
- return True
72
-
73
- # TODO: find a way to test this
74
- # # & can be split
75
- # p = (t.c1 == 'test string') & udf(t.c1)
76
- # assert p.sql_expr() is None
77
- # sql_pred, other_pred = p.extract_sql_predicate()
78
- # assert isinstance(sql_pred, sql.sql.expression.BinaryExpression)
79
- # assert isinstance(other_pred, FunctionCall)
80
- #
81
- # p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50)
82
- # assert p.sql_expr() is None
83
- # sql_pred, other_pred = p.extract_sql_predicate()
84
- # assert len(sql_pred.clauses) == 2
85
- # assert isinstance(other_pred, FunctionCall)
86
- #
87
- # p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50) & udf2(t.c2)
88
- # assert p.sql_expr() is None
89
- # sql_pred, other_pred = p.extract_sql_predicate()
90
- # assert len(sql_pred.clauses) == 2
91
- # assert isinstance(other_pred, CompoundPredicate)
92
- #
93
- # # | cannot be split
94
- # p = (t.c1 == 'test string') | udf(t.c1)
95
- # assert p.sql_expr() is None
96
- # sql_pred, other_pred = p.extract_sql_predicate()
97
- # assert sql_pred is None
98
- # assert isinstance(other_pred, CompoundPredicate)
99
-
100
- def test_filters(self, test_tbl: catalog.Table) -> None:
101
- t = test_tbl
102
- _ = t[t.c1 == 'test string'].show()
103
- print(_)
104
- _ = t[t.c2 > 50].show()
105
- print(_)
106
- _ = t[t.c1n == None].show()
107
- print(_)
108
- _ = t[t.c1n != None].show(0)
109
- print(_)
110
-
111
- def test_exception_handling(self, test_tbl: catalog.Table) -> None:
112
- t = test_tbl
113
-
114
- # error in expr that's handled in SQL
115
- with pytest.raises(excs.Error):
116
- _ = t[(t.c2 + 1) / t.c2].show()
117
-
118
- # error in expr that's handled in Python
119
- with pytest.raises(excs.Error):
120
- _ = t[(t.c6.f2 + 1) / (t.c2 - 10)].show()
121
-
122
- # the same, but with an inline function
123
- @pxt.udf(return_type=FloatType(), param_types=[IntType(), IntType()])
124
- def f(a: int, b: int) -> float:
125
- return a / b
126
- with pytest.raises(excs.Error):
127
- _ = t[f(t.c2 + 1, t.c2)].show()
128
-
129
- # error in agg.init()
130
- @pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
131
- class Aggregator(pxt.Aggregator):
132
- def __init__(self):
133
- self.sum = 1 / 0
134
- def update(self, val):
135
- pass
136
- def value(self):
137
- return 1
138
- with pytest.raises(excs.Error):
139
- _ = t[agg(t.c2)].show()
140
-
141
- # error in agg.update()
142
- @pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
143
- class Aggregator(pxt.Aggregator):
144
- def __init__(self):
145
- self.sum = 0
146
- def update(self, val):
147
- self.sum += 1 / val
148
- def value(self):
149
- return 1
150
- with pytest.raises(excs.Error):
151
- _ = t[agg(t.c2 - 10)].show()
152
-
153
- # error in agg.value()
154
- @pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
155
- class Aggregator(pxt.Aggregator):
156
- def __init__(self):
157
- self.sum = 0
158
- def update(self, val):
159
- self.sum += val
160
- def value(self):
161
- return 1 / self.sum
162
- with pytest.raises(excs.Error):
163
- _ = t[t.c2 <= 2][agg(t.c2 - 1)].show()
164
-
165
- def test_props(self, test_tbl: catalog.Table, img_tbl: catalog.Table) -> None:
166
- t = test_tbl
167
- # errortype/-msg for computed column
168
- res = t.select(error=t.c8.errortype).collect()
169
- assert res.to_pandas()['error'].isna().all()
170
- res = t.select(error=t.c8.errormsg).collect()
171
- assert res.to_pandas()['error'].isna().all()
172
-
173
- img_t = img_tbl
174
- # fileurl
175
- res = img_t.select(img_t.img.fileurl).show(0).to_pandas()
176
- stored_urls = set(res.iloc[:, 0])
177
- assert len(stored_urls) == len(res)
178
- all_urls = set(urllib.parse.urljoin('file:', urllib.request.pathname2url(path)) for path in get_image_files())
179
- assert stored_urls <= all_urls
180
-
181
- # localpath
182
- res = img_t.select(img_t.img.localpath).show(0).to_pandas()
183
- stored_paths = set(res.iloc[:, 0])
184
- assert len(stored_paths) == len(res)
185
- all_paths = set(get_image_files())
186
- assert stored_paths <= all_paths
187
-
188
- # errortype/-msg for image column
189
- res = img_t.select(error=img_t.img.errortype).collect().to_pandas()
190
- assert res['error'].isna().all()
191
- res = img_t.select(error=img_t.img.errormsg).collect().to_pandas()
192
- assert res['error'].isna().all()
193
-
194
- for c in [t.c1, t.c1n, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7]:
195
- # errortype/errormsg only applies to stored computed and media columns
196
- with pytest.raises(excs.Error) as excinfo:
197
- _ = t.select(c.errortype).show()
198
- assert 'only valid for' in str(excinfo.value)
199
- with pytest.raises(excs.Error) as excinfo:
200
- _ = t.select(c.errormsg).show()
201
- assert 'only valid for' in str(excinfo.value)
202
-
203
- # fileurl/localpath only applies to media columns
204
- with pytest.raises(excs.Error) as excinfo:
205
- _ = t.select(t.c1.fileurl).show()
206
- assert 'only valid for' in str(excinfo.value)
207
- with pytest.raises(excs.Error) as excinfo:
208
- _ = t.select(t.c1.localpath).show()
209
- assert 'only valid for' in str(excinfo.value)
210
-
211
- # fileurl/localpath doesn't apply to unstored computed img columns
212
- img_t.add_column(c9=img_t.img.rotate(30))
213
- with pytest.raises(excs.Error) as excinfo:
214
- _ = img_t.select(img_t.c9.localpath).show()
215
- assert 'computed unstored' in str(excinfo.value)
216
-
217
- def test_null_args(self, test_client: pxt.Client) -> None:
218
- # create table with two int columns
219
- schema = {'c1': FloatType(nullable=True), 'c2': FloatType(nullable=True)}
220
- t = test_client.create_table('test', schema)
221
-
222
- # computed column that doesn't allow nulls
223
- t.add_column(c3=lambda c1, c2: c1 + c2, type=FloatType(nullable=False))
224
- # function that does allow nulls
225
- @pxt.udf(return_type=FloatType(nullable=True),
226
- param_types=[FloatType(nullable=False), FloatType(nullable=True)])
227
- def f(a: int, b: int) -> int:
228
- if b is None:
229
- return a
230
- return a + b
231
- t.add_column(c4=f(t.c1, t.c2))
232
-
233
- # data that tests all combinations of nulls
234
- data = [{'c1': 1.0, 'c2': 1.0}, {'c1': 1.0, 'c2': None}, {'c1': None, 'c2': 1.0}, {'c1': None, 'c2': None}]
235
- status = t.insert(data, fail_on_exception=False)
236
- assert status.num_rows == len(data)
237
- assert status.num_excs == len(data) - 1
238
- result = t.select(t.c3, t.c4).collect()
239
- assert result['c3'] == [2.0, None, None, None]
240
- assert result['c4'] == [2.0, 1.0, None, None]
241
-
242
- def test_arithmetic_exprs(self, test_tbl: catalog.Table) -> None:
243
- t = test_tbl
244
-
245
- _ = t[t.c2, t.c6.f3, t.c2 + t.c6.f3, (t.c2 + t.c6.f3) / (t.c6.f3 + 1)].show()
246
- _ = t[t.c2 + t.c2].show()
247
- for op1, op2 in [(t.c2, t.c2), (t.c3, t.c3)]:
248
- _ = t[op1 + op2].show()
249
- _ = t[op1 - op2].show()
250
- _ = t[op1 * op2].show()
251
- _ = t[op1 > 0][op1 / op2].show()
252
-
253
- # non-numeric types
254
- for op1, op2 in [
255
- (t.c1, t.c2), (t.c1, 1), (t.c2, t.c1), (t.c2, 'a'),
256
- (t.c1, t.c3), (t.c1, 1.0), (t.c3, t.c1), (t.c3, 'a')
257
- ]:
258
- with pytest.raises(excs.Error):
259
- _ = t[op1 + op2]
260
- with pytest.raises(excs.Error):
261
- _ = t[op1 - op2]
262
- with pytest.raises(excs.Error):
263
- _ = t[op1 * op2]
264
- with pytest.raises(excs.Error):
265
- _ = t[op1 / op2]
266
-
267
- # TODO: test division; requires predicate
268
- for op1, op2 in [(t.c6.f2, t.c6.f2), (t.c6.f3, t.c6.f3)]:
269
- _ = t[op1 + op2].show()
270
- _ = t[op1 - op2].show()
271
- _ = t[op1 * op2].show()
272
- with pytest.raises(excs.Error):
273
- _ = t[op1 / op2].show()
274
-
275
- for op1, op2 in [
276
- (t.c6.f1, t.c6.f2), (t.c6.f1, t.c6.f3), (t.c6.f1, 1), (t.c6.f1, 1.0),
277
- (t.c6.f2, t.c6.f1), (t.c6.f3, t.c6.f1), (t.c6.f2, 'a'), (t.c6.f3, 'a'),
278
- ]:
279
- with pytest.raises(excs.Error):
280
- _ = t[op1 + op2].show()
281
- with pytest.raises(excs.Error):
282
- _ = t[op1 - op2].show()
283
- with pytest.raises(excs.Error):
284
- _ = t[op1 * op2].show()
285
-
286
-
287
- def test_inline_dict(self, test_tbl: catalog.Table) -> None:
288
- t = test_tbl
289
- df = t[[{'a': t.c1, 'b': {'c': t.c2}, 'd': 1, 'e': {'f': 2}}]]
290
- result = df.show()
291
- print(result)
292
-
293
- def test_inline_array(self, test_tbl: catalog.Table) -> None:
294
- t = test_tbl
295
- result = t.select([[t.c2, 1], [t.c2, 2]]).show()
296
- t = result.column_types()[0]
297
- assert t.is_array_type()
298
- assert isinstance(t, ArrayType)
299
- assert t.shape == (2, 2)
300
- assert t.dtype == ColumnType.Type.INT
301
-
302
- def test_json_mapper(self, test_tbl: catalog.Table) -> None:
303
- t = test_tbl
304
- # top-level is dict
305
- df = t[t.c6.f5['*'] >> (R + 1)]
306
- res = df.show()
307
- print(res)
308
- _ = t[t.c7['*'].f5 >> [R[3], R[2], R[1], R[0]]]
309
- _ = _.show()
310
- print(_)
311
- # target expr contains global-scope dependency
312
- df = t[
313
- t.c6.f5['*'] >> (R * t.c6.f5[1])
314
- ]
315
- res = df.show()
316
- print(res)
317
-
318
- def test_dicts(self, test_tbl: catalog.Table) -> None:
319
- t = test_tbl
320
- # top-level is dict
321
- _ = t[t.c6.f1]
322
- _ = _.show()
323
- print(_)
324
- # predicate on dict field
325
- _ = t[t.c6.f2 < 2].show()
326
- #_ = t[t.c6.f2].show()
327
- #_ = t[t.c6.f5].show()
328
- _ = t[t.c6.f6.f8].show()
329
- _ = t[cast(t.c6.f6.f8, ArrayType((4,), FloatType()))].show()
330
-
331
- # top-level is array
332
- #_ = t[t.c7['*'].f1].show()
333
- #_ = t[t.c7['*'].f2].show()
334
- #_ = t[t.c7['*'].f5].show()
335
- _ = t[t.c7['*'].f6.f8].show()
336
- _ = t[t.c7[0].f6.f8].show()
337
- _ = t[t.c7[:2].f6.f8].show()
338
- _ = t[t.c7[::-1].f6.f8].show()
339
- _ = t[cast(t.c7['*'].f6.f8, ArrayType((2, 4), FloatType()))].show()
340
- print(_)
341
-
342
- def test_arrays(self, test_tbl: catalog.Table) -> None:
343
- t = test_tbl
344
- t.add_column(array_col=[[t.c2, 1], [1, t.c2]])
345
- _ = t[t.array_col].show()
346
- print(_)
347
- _ = t[t.array_col[:, 0]].show()
348
- print(_)
349
-
350
- def test_astype(self, test_tbl: catalog.Table) -> None:
351
- t = test_tbl
352
- # Convert int to float
353
- status = t.add_column(c2_as_float=t.c2.astype(FloatType()))
354
- assert status.num_excs == 0
355
- data = t.select(t.c2, t.c2_as_float).collect()
356
- for row in data:
357
- assert isinstance(row['c2'], int)
358
- assert isinstance(row['c2_as_float'], float)
359
- assert row['c2'] == row['c2_as_float']
360
- # Compound expression
361
- status = t.add_column(compound_as_float=(t.c2 + 1).astype(FloatType()))
362
- assert status.num_excs == 0
363
- data = t.select(t.c2, t.compound_as_float).collect()
364
- for row in data:
365
- assert isinstance(row['compound_as_float'], float)
366
- assert row['c2'] + 1 == row['compound_as_float']
367
- # Type conversion error
368
- status = t.add_column(c2_as_string=t.c2.astype(StringType()))
369
- assert status.num_excs == t.count()
370
-
371
- def test_apply(self, test_tbl: catalog.Table) -> None:
372
-
373
- t = test_tbl
374
-
375
- # For each column c1, ..., c5, we create a new column ci_as_str that converts it to
376
- # a string, then check that each row is correctly converted
377
- # (For c1 this is the no-op string-to-string conversion)
378
- for col_id in range(1, 6):
379
- col_name = f'c{col_id}'
380
- str_col_name = f'c{col_id}_str'
381
- status = t.add_column(**{str_col_name: t[col_name].apply(str)})
382
- assert status.num_excs == 0
383
- data = t.select(t[col_name], t[str_col_name]).collect()
384
- for row in data:
385
- assert row[str_col_name] == str(row[col_name])
386
-
387
- # Test a compound expression with apply
388
- status = t.add_column(c2_plus_1_str=(t.c2 + 1).apply(str))
389
- assert status.num_excs == 0
390
- data = t.select(t.c2, t.c2_plus_1_str).collect()
391
- for row in data:
392
- assert row['c2_plus_1_str'] == str(row['c2'] + 1)
393
-
394
- # For columns c6, c7, try using json.dumps and json.loads to emit and parse JSON <-> str
395
- for col_id in range(6, 8):
396
- col_name = f'c{col_id}'
397
- str_col_name = f'c{col_id}_str'
398
- back_to_json_col_name = f'c{col_id}_back_to_json'
399
- status = t.add_column(**{str_col_name: t[col_name].apply(json.dumps)})
400
- assert status.num_excs == 0
401
- status = t.add_column(**{back_to_json_col_name: t[str_col_name].apply(json.loads)})
402
- assert status.num_excs == 0
403
- data = t.select(t[col_name], t[str_col_name], t[back_to_json_col_name]).collect()
404
- for row in data:
405
- assert row[str_col_name] == json.dumps(row[col_name])
406
- assert row[back_to_json_col_name] == row[col_name]
407
-
408
- def f1(x):
409
- return str(x)
410
-
411
- # Now test that a function without a return type throws an exception ...
412
- with pytest.raises(excs.Error) as exc_info:
413
- t.c2.apply(f1)
414
- assert 'Column type of `f1` cannot be inferred.' in str(exc_info.value)
415
-
416
- # ... but works if the type is specified explicitly.
417
- status = t.add_column(c2_str_f1=t.c2.apply(f1, col_type=StringType()))
418
- assert status.num_excs == 0
419
-
420
- # Test that the return type of a function can be successfully inferred.
421
- def f2(x) -> str:
422
- return str(x)
423
-
424
- status = t.add_column(c2_str_f2=t.c2.apply(f2))
425
- assert status.num_excs == 0
426
-
427
- # Test various validation failures.
428
-
429
- def f3(x, y) -> str:
430
- return f'{x}{y}'
431
-
432
- with pytest.raises(excs.Error) as exc_info:
433
- t.c2.apply(f3) # Too many required parameters
434
- assert str(exc_info.value) == 'Function `f3` has multiple required parameters.'
435
-
436
- def f4() -> str:
437
- return "pixeltable"
438
-
439
- with pytest.raises(excs.Error) as exc_info:
440
- t.c2.apply(f4) # No positional parameters
441
- assert str(exc_info.value) == 'Function `f4` has no positional parameters.'
442
-
443
- def f5(**kwargs) -> str:
444
- return ""
445
-
446
- with pytest.raises(excs.Error) as exc_info:
447
- t.c2.apply(f5) # No positional parameters
448
- assert str(exc_info.value) == 'Function `f5` has no positional parameters.'
449
-
450
- # Ensure these varargs signatures are acceptable
451
-
452
- def f6(x, **kwargs) -> str:
453
- return x
454
-
455
- t.c2.apply(f6)
456
-
457
- def f7(x, *args) -> str:
458
- return x
459
-
460
- t.c2.apply(f7)
461
-
462
- def f8(*args) -> str:
463
- return ''
464
-
465
- t.c2.apply(f8)
466
-
467
- def test_select_list(self, img_tbl) -> None:
468
- t = img_tbl
469
- result = t[t.img].show(n=100)
470
- _ = result._repr_html_()
471
- df = t[[t.img, t.img.rotate(60)]]
472
- _ = df.show(n=100)._repr_html_()
473
-
474
- with pytest.raises(excs.Error):
475
- _ = t[t.img.rotate]
476
-
477
- def test_img_members(self, img_tbl) -> None:
478
- t = img_tbl
479
- # make sure the limit is applied in Python, not in the SELECT
480
- result = t[t.img.height > 200][t.img].show(n=3)
481
- assert len(result) == 3
482
- result = t[t.img.crop((10, 10, 60, 60))].show(n=100)
483
- result = t[t.img.crop((10, 10, 60, 60)).resize((100, 100))].show(n=100)
484
- result = t[t.img.crop((10, 10, 60, 60)).resize((100, 100)).convert('L')].show(n=100)
485
- result = t[t.img.getextrema()].show(n=100)
486
- result = t[t.img, t.img.height, t.img.rotate(90)].show(n=100)
487
- _ = result._repr_html_()
488
-
489
- def test_img_functions(self, img_tbl) -> None:
490
- skip_test_if_not_installed('nos')
491
- t = img_tbl
492
- from pixeltable.functions.pil.image import resize
493
- result = t[t.img.resize((224, 224))].show(0)
494
- result = t[resize(t.img, (224, 224))].show(0)
495
- result = t[blend(t.img, t.img.rotate(90), 0.5)].show(100)
496
- print(result)
497
- from pixeltable.functions.nos.image_embedding import openai_clip
498
- result = t[openai_clip(t.img.resize((224, 224)))].show(10)
499
- print(result)
500
- _ = result._repr_html_()
501
- _ = t.img.entropy() > 1
502
- _ = (t.img.entropy() > 1) & (t.split == 'train')
503
- _ = (t.img.entropy() > 1) & (t.split == 'train') & (t.split == 'val')
504
- _ = (t.split == 'train') & (t.img.entropy() > 1) & (t.split == 'val') & (t.img.entropy() < 0)
505
- _ = t[(t.split == 'train') & (t.category == 'n03445777')][t.img].show()
506
- print(_)
507
- result = t[t.img.width > 1].show()
508
- print(result)
509
- result = t[(t.split == 'val') & (t.img.entropy() > 1) & (t.category == 'n03445777')].show()
510
- print(result)
511
- result = t[
512
- (t.split == 'train') & (t.img.entropy() > 1) & (t.split == 'val') & (t.img.entropy() < 0)
513
- ][t.img, t.split].show()
514
- print(result)
515
-
516
- def test_similarity(self, indexed_img_tbl: catalog.Table) -> None:
517
- skip_test_if_not_installed('nos')
518
- t = indexed_img_tbl
519
- _ = t.show(30)
520
- probe = t.select(t.img, t.category).show(1)
521
- img = probe[0, 0]
522
- result = t.where(t.img.nearest(img)).show(10)
523
- assert len(result) == 10
524
- # nearest() with one SQL predicate and one Python predicate
525
- result = t[t.img.nearest(img) & (t.category == probe[0, 1]) & (t.img.width > 1)].show(10)
526
- # TODO: figure out how to verify results
527
-
528
- with pytest.raises(excs.Error) as exc_info:
529
- _ = t[t.img.nearest(img)].order_by(t.category).show()
530
- assert 'cannot be used in conjunction with' in str(exc_info.value)
531
-
532
- result = t[t.img.nearest('musical instrument')].show(10)
533
- assert len(result) == 10
534
- # matches() with one SQL predicate and one Python predicate
535
- french_horn_category = 'n03394916'
536
- result = t[
537
- t.img.nearest('musical instrument') & (t.category == french_horn_category) & (t.img.width > 1)
538
- ].show(10)
539
-
540
- with pytest.raises(excs.Error) as exc_info:
541
- _ = t[t.img.nearest(5)].show()
542
- assert 'requires' in str(exc_info.value)
543
-
544
- # TODO: this doesn't work when combined with test_similarity(), for some reason the data table for img_tbl
545
- # doesn't get created; why?
546
- def test_similarity2(self, img_tbl: catalog.Table) -> None:
547
- t = img_tbl
548
- probe = t[t.img].show(1)
549
- img = probe[0, 0]
550
-
551
- with pytest.raises(excs.Error):
552
- _ = t[t.img.nearest(img)].show(10)
553
- with pytest.raises(excs.Error):
554
- _ = t[t.img.nearest('musical instrument')].show(10)
555
-
556
- def test_ids(
557
- self, test_tbl: catalog.Table, test_tbl_exprs: List[exprs.Expr],
558
- img_tbl: catalog.Table, img_tbl_exprs: List[exprs.Expr]
559
- ) -> None:
560
- d: Dict[int, exprs.Expr] = {}
561
- for e in test_tbl_exprs:
562
- assert e.id is not None
563
- d[e.id] = e
564
- for e in img_tbl_exprs:
565
- assert e.id is not None
566
- d[e.id] = e
567
- assert len(d) == len(test_tbl_exprs) + len(img_tbl_exprs)
568
-
569
- def test_serialization(
570
- self, test_tbl_exprs: List[exprs.Expr], img_tbl_exprs: List[exprs.Expr]
571
- ) -> None:
572
- """Test as_dict()/from_dict() (via serialize()/deserialize()) for all exprs."""
573
- for e in test_tbl_exprs:
574
- e_serialized = e.serialize()
575
- e_deserialized = Expr.deserialize(e_serialized)
576
- assert e.equals(e_deserialized)
577
-
578
- for e in img_tbl_exprs:
579
- e_serialized = e.serialize()
580
- e_deserialized = Expr.deserialize(e_serialized)
581
- assert e.equals(e_deserialized)
582
-
583
- def test_print(self, test_tbl_exprs: List[exprs.Expr], img_tbl_exprs: List[exprs.Expr]) -> None:
584
- _ = func.FunctionRegistry.get().module_fns
585
- for e in test_tbl_exprs:
586
- _ = str(e)
587
- print(_)
588
- for e in img_tbl_exprs:
589
- _ = str(e)
590
- print(_)
591
-
592
- def test_subexprs(self, img_tbl: catalog.Table) -> None:
593
- t = img_tbl
594
- e = t.img
595
- subexprs = [s for s in e.subexprs()]
596
- assert len(subexprs) == 1
597
- e = t.img.rotate(90).resize((224, 224))
598
- subexprs = [s for s in e.subexprs()]
599
- assert len(subexprs) == 4
600
- subexprs = [s for s in e.subexprs(expr_class=ColumnRef)]
601
- assert len(subexprs) == 1
602
- assert t.img.equals(subexprs[0])
603
-
604
- def test_window_fns(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
605
- cl = test_client
606
- t = test_tbl
607
- _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3)).show(100)
608
-
609
- # conflicting ordering requirements
610
- with pytest.raises(excs.Error):
611
- _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3), sum(t.c2, group_by=t.c3, order_by=t.c4)).show(100)
612
- with pytest.raises(excs.Error):
613
- _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3), sum(t.c2, group_by=t.c3, order_by=t.c4)).show(100)
614
-
615
- # backfill works
616
- t.add_column(c9=sum(t.c2, group_by=t.c4, order_by=t.c3))
617
- _ = t.c9.col.has_window_fn_call()
618
-
619
- # ordering conflict between frame extraction and window fn
620
- base_t = cl.create_table('videos', {'video': VideoType(), 'c2': IntType(nullable=False)})
621
- args = {'video': base_t.video, 'fps': 0}
622
- v = cl.create_view('frame_view', base_t, iterator_class=FrameIterator, iterator_args=args)
623
- # compatible ordering
624
- _ = v.select(v.frame, sum(v.frame_idx, group_by=base_t, order_by=v.pos)).show(100)
625
- with pytest.raises(excs.Error):
626
- # incompatible ordering
627
- _ = v.select(v.frame, sum(v.c2, order_by=base_t, group_by=v.pos)).show(100)
628
-
629
- schema = {
630
- 'c2': IntType(nullable=False),
631
- 'c3': FloatType(nullable=False),
632
- 'c4': BoolType(nullable=False),
633
- }
634
- new_t = cl.create_table('insert_test', schema=schema)
635
- new_t.add_column(c2_sum=sum(new_t.c2, group_by=new_t.c4, order_by=new_t.c3))
636
- rows = list(t.select(t.c2, t.c4, t.c3).collect())
637
- new_t.insert(rows)
638
- _ = new_t.show(0)
639
-
640
- def test_aggregates(self, test_tbl: catalog.Table) -> None:
641
- t = test_tbl
642
- _ = t[t.c2 % 2, sum(t.c2), count(t.c2), sum(t.c2) + count(t.c2), sum(t.c2) + (t.c2 % 2)]\
643
- .group_by(t.c2 % 2).show()
644
-
645
- # check that aggregates don't show up in the wrong places
646
- with pytest.raises(excs.Error):
647
- # aggregate in where clause
648
- _ = t[sum(t.c2) > 0][sum(t.c2)].group_by(t.c2 % 2).show()
649
- with pytest.raises(excs.Error):
650
- # aggregate in group_by clause
651
- _ = t[sum(t.c2)].group_by(sum(t.c2)).show()
652
- with pytest.raises(excs.Error):
653
- # mixing aggregates and non-aggregates
654
- _ = t[sum(t.c2) + t.c2].group_by(t.c2 % 2).show()
655
- with pytest.raises(excs.Error):
656
- # nested aggregates
657
- _ = t[sum(count(t.c2))].group_by(t.c2 % 2).show()
658
-
659
- def test_udas(self, test_tbl: catalog.Table) -> None:
660
- t = test_tbl
661
-
662
- @pxt.uda(
663
- name='window_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
664
- allows_window=True, requires_order_by=False)
665
- class WindowAgg:
666
- def __init__(self, val: int = 0):
667
- self.val = val
668
- def update(self, ignore: int) -> None:
669
- pass
670
- def value(self) -> int:
671
- return self.val
672
-
673
- @pxt.uda(
674
- name='ordered_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
675
- requires_order_by=True, allows_window=True)
676
- class WindowAgg:
677
- def __init__(self, val: int = 0):
678
- self.val = val
679
- def update(self, i: int) -> None:
680
- pass
681
- def value(self) -> int:
682
- return self.val
683
-
684
- @pxt.uda(
685
- name='std_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
686
- requires_order_by=False, allows_window=False)
687
- class StdAgg:
688
- def __init__(self, val: int = 0):
689
- self.val = val
690
- def update(self, i: int) -> None:
691
- pass
692
- def value(self) -> int:
693
- return self.val
694
-
695
- # init arg is passed along
696
- assert t.select(out=window_agg(t.c2, order_by=t.c2)).collect()[0]['out'] == 0
697
- assert t.select(out=window_agg(t.c2, val=1, order_by=t.c2)).collect()[0]['out'] == 1
698
-
699
- with pytest.raises(excs.Error) as exc_info:
700
- _ = t.select(window_agg(t.c2, val=t.c2, order_by=t.c2)).collect()
701
- assert 'needs to be a constant' in str(exc_info.value)
702
-
703
- with pytest.raises(excs.Error) as exc_info:
704
- # ordering expression not a pixeltable expr
705
- _ = t.select(ordered_agg(1, t.c2)).collect()
706
- assert 'but instead is a' in str(exc_info.value).lower()
707
-
708
- with pytest.raises(excs.Error) as exc_info:
709
- # explicit order_by
710
- _ = t.select(ordered_agg(t.c2, order_by=t.c2)).collect()
711
- assert 'order_by invalid' in str(exc_info.value).lower()
712
-
713
- with pytest.raises(excs.Error) as exc_info:
714
- # order_by for non-window function
715
- _ = t.select(std_agg(t.c2, order_by=t.c2)).collect()
716
- assert 'does not allow windows' in str(exc_info.value).lower()
717
-
718
- with pytest.raises(excs.Error) as exc_info:
719
- # group_by for non-window function
720
- _ = t.select(std_agg(t.c2, group_by=t.c4)).collect()
721
- assert 'group_by invalid' in str(exc_info.value).lower()
722
-
723
- with pytest.raises(excs.Error) as exc_info:
724
- # missing init type
725
- @pxt.uda(update_types=[IntType()], value_type=IntType())
726
- class WindowAgg:
727
- def __init__(self, val: int = 0):
728
- self.val = val
729
- def update(self, ignore: int) -> None:
730
- pass
731
- def value(self) -> int:
732
- return self.val
733
- assert 'init_types must be a list of' in str(exc_info.value)
734
-
735
- with pytest.raises(excs.Error) as exc_info:
736
- # missing update parameter
737
- @pxt.uda(init_types=[IntType()], update_types=[], value_type=IntType())
738
- class WindowAgg:
739
- def __init__(self, val: int = 0):
740
- self.val = val
741
- def update(self) -> None:
742
- pass
743
- def value(self) -> int:
744
- return self.val
745
- assert 'must have at least one parameter' in str(exc_info.value)
746
-
747
- with pytest.raises(excs.Error) as exc_info:
748
- # missing update type
749
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
750
- class WindowAgg:
751
- def __init__(self, val: int = 0):
752
- self.val = val
753
- def update(self, i1: int, i2: int) -> None:
754
- pass
755
- def value(self) -> int:
756
- return self.val
757
- assert 'update_types must be a list of' in str(exc_info.value)
758
-
759
- with pytest.raises(excs.Error) as exc_info:
760
- # duplicate parameter names
761
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
762
- class WindowAgg:
763
- def __init__(self, val: int = 0):
764
- self.val = val
765
- def update(self, val: int) -> None:
766
- pass
767
- def value(self) -> int:
768
- return self.val
769
- assert 'cannot have parameters with the same name: val' in str(exc_info.value)
770
-
771
- with pytest.raises(excs.Error) as exc_info:
772
- # invalid name
773
- @pxt.uda(name='not an identifier', init_types=[IntType()], update_types=[IntType()], value_type=IntType())
774
- class WindowAgg:
775
- def __init__(self, val: int = 0):
776
- self.val = val
777
- def update(self, i1: int, i2: int) -> None:
778
- pass
779
- def value(self) -> int:
780
- return self.val
781
- assert 'invalid name' in str(exc_info.value).lower()
782
-
783
- with pytest.raises(excs.Error) as exc_info:
784
- # reserved parameter name
785
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
786
- class WindowAgg:
787
- def __init__(self, val: int = 0):
788
- self.val = val
789
- def update(self, order_by: int) -> None:
790
- pass
791
- def value(self) -> int:
792
- return self.val
793
- assert 'order_by is reserved' in str(exc_info.value).lower()
794
-
795
- with pytest.raises(excs.Error) as exc_info:
796
- # reserved parameter name
797
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
798
- class WindowAgg:
799
- def __init__(self, val: int = 0):
800
- self.val = val
801
- def update(self, group_by: int) -> None:
802
- pass
803
- def value(self) -> int:
804
- return self.val
805
- assert 'group_by is reserved' in str(exc_info.value).lower()