pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show
  1. pixeltable/__init__.py +20 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +23 -7
  4. pixeltable/catalog/insertable_table.py +32 -19
  5. pixeltable/catalog/table.py +210 -20
  6. pixeltable/catalog/table_version.py +272 -111
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/dataframe.py +184 -110
  9. pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable/datatransfer/label_studio.py +526 -0
  11. pixeltable/datatransfer/remote.py +113 -0
  12. pixeltable/env.py +213 -79
  13. pixeltable/exec/__init__.py +2 -1
  14. pixeltable/exec/data_row_batch.py +6 -7
  15. pixeltable/exec/expr_eval_node.py +28 -28
  16. pixeltable/exec/sql_scan_node.py +7 -6
  17. pixeltable/exprs/__init__.py +4 -3
  18. pixeltable/exprs/column_ref.py +11 -2
  19. pixeltable/exprs/comparison.py +39 -1
  20. pixeltable/exprs/data_row.py +7 -0
  21. pixeltable/exprs/expr.py +26 -19
  22. pixeltable/exprs/function_call.py +17 -18
  23. pixeltable/exprs/globals.py +14 -2
  24. pixeltable/exprs/image_member_access.py +9 -28
  25. pixeltable/exprs/in_predicate.py +96 -0
  26. pixeltable/exprs/inline_array.py +13 -11
  27. pixeltable/exprs/inline_dict.py +15 -13
  28. pixeltable/exprs/row_builder.py +7 -1
  29. pixeltable/exprs/similarity_expr.py +67 -0
  30. pixeltable/ext/functions/whisperx.py +30 -0
  31. pixeltable/ext/functions/yolox.py +16 -0
  32. pixeltable/func/__init__.py +0 -2
  33. pixeltable/func/aggregate_function.py +5 -2
  34. pixeltable/func/callable_function.py +57 -13
  35. pixeltable/func/expr_template_function.py +14 -3
  36. pixeltable/func/function.py +35 -4
  37. pixeltable/func/signature.py +5 -15
  38. pixeltable/func/udf.py +8 -12
  39. pixeltable/functions/fireworks.py +9 -4
  40. pixeltable/functions/huggingface.py +48 -5
  41. pixeltable/functions/openai.py +49 -11
  42. pixeltable/functions/pil/image.py +61 -64
  43. pixeltable/functions/together.py +32 -6
  44. pixeltable/functions/util.py +0 -43
  45. pixeltable/functions/video.py +46 -8
  46. pixeltable/globals.py +443 -0
  47. pixeltable/index/__init__.py +1 -0
  48. pixeltable/index/base.py +9 -2
  49. pixeltable/index/btree.py +54 -0
  50. pixeltable/index/embedding_index.py +91 -15
  51. pixeltable/io/__init__.py +4 -0
  52. pixeltable/io/globals.py +59 -0
  53. pixeltable/{utils → io}/hf_datasets.py +48 -17
  54. pixeltable/io/pandas.py +148 -0
  55. pixeltable/{utils → io}/parquet.py +58 -33
  56. pixeltable/iterators/__init__.py +1 -1
  57. pixeltable/iterators/base.py +8 -4
  58. pixeltable/iterators/document.py +225 -93
  59. pixeltable/iterators/video.py +16 -9
  60. pixeltable/metadata/__init__.py +8 -4
  61. pixeltable/metadata/converters/convert_12.py +3 -0
  62. pixeltable/metadata/converters/convert_13.py +41 -0
  63. pixeltable/metadata/converters/convert_14.py +13 -0
  64. pixeltable/metadata/converters/convert_15.py +29 -0
  65. pixeltable/metadata/converters/util.py +63 -0
  66. pixeltable/metadata/schema.py +12 -6
  67. pixeltable/plan.py +11 -24
  68. pixeltable/store.py +16 -23
  69. pixeltable/tool/create_test_db_dump.py +49 -14
  70. pixeltable/type_system.py +27 -58
  71. pixeltable/utils/coco.py +94 -0
  72. pixeltable/utils/documents.py +42 -12
  73. pixeltable/utils/http_server.py +70 -0
  74. pixeltable-0.2.7.dist-info/METADATA +137 -0
  75. pixeltable-0.2.7.dist-info/RECORD +126 -0
  76. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
  77. pixeltable/client.py +0 -600
  78. pixeltable/exprs/image_similarity_predicate.py +0 -58
  79. pixeltable/func/batched_function.py +0 -53
  80. pixeltable/func/nos_function.py +0 -202
  81. pixeltable/tests/conftest.py +0 -171
  82. pixeltable/tests/ext/test_yolox.py +0 -21
  83. pixeltable/tests/functions/test_fireworks.py +0 -43
  84. pixeltable/tests/functions/test_functions.py +0 -60
  85. pixeltable/tests/functions/test_huggingface.py +0 -158
  86. pixeltable/tests/functions/test_openai.py +0 -162
  87. pixeltable/tests/functions/test_together.py +0 -112
  88. pixeltable/tests/test_audio.py +0 -65
  89. pixeltable/tests/test_catalog.py +0 -27
  90. pixeltable/tests/test_client.py +0 -21
  91. pixeltable/tests/test_component_view.py +0 -379
  92. pixeltable/tests/test_dataframe.py +0 -440
  93. pixeltable/tests/test_dirs.py +0 -107
  94. pixeltable/tests/test_document.py +0 -120
  95. pixeltable/tests/test_exprs.py +0 -802
  96. pixeltable/tests/test_function.py +0 -332
  97. pixeltable/tests/test_index.py +0 -138
  98. pixeltable/tests/test_migration.py +0 -44
  99. pixeltable/tests/test_nos.py +0 -54
  100. pixeltable/tests/test_snapshot.py +0 -231
  101. pixeltable/tests/test_table.py +0 -1343
  102. pixeltable/tests/test_transactional_directory.py +0 -42
  103. pixeltable/tests/test_types.py +0 -52
  104. pixeltable/tests/test_video.py +0 -159
  105. pixeltable/tests/test_view.py +0 -535
  106. pixeltable/tests/utils.py +0 -442
  107. pixeltable/utils/clip.py +0 -18
  108. pixeltable-0.2.5.dist-info/METADATA +0 -128
  109. pixeltable-0.2.5.dist-info/RECORD +0 -139
  110. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
@@ -1,802 +0,0 @@
1
- import json
2
- import urllib.parse
3
- import urllib.request
4
- from typing import List, Dict
5
-
6
- import pytest
7
- import sqlalchemy as sql
8
-
9
- import pixeltable as pxt
10
- import pixeltable.func as func
11
- from pixeltable import catalog
12
- from pixeltable import exceptions as excs
13
- from pixeltable import exprs
14
- from pixeltable.exprs import Expr, ColumnRef
15
- from pixeltable.exprs import RELATIVE_PATH_ROOT as R
16
- from pixeltable.functions import cast, sum, count
17
- from pixeltable.functions.pil.image import blend
18
- from pixeltable.iterators import FrameIterator
19
- from pixeltable.tests.utils import get_image_files, skip_test_if_not_installed
20
- from pixeltable.type_system import StringType, BoolType, IntType, ArrayType, ColumnType, FloatType, \
21
- VideoType
22
-
23
-
24
- class TestExprs:
25
- @pxt.udf(return_type=FloatType(), param_types=[IntType(), IntType()])
26
- def div_0_error(a: int, b: int) -> float:
27
- return a / b
28
-
29
- # function that does allow nulls
30
- @pxt.udf(return_type=FloatType(nullable=True),
31
- param_types=[FloatType(nullable=False), FloatType(nullable=True)])
32
- def null_args_fn(a: int, b: int) -> int:
33
- if b is None:
34
- return a
35
- return a + b
36
-
37
- # error in agg.init()
38
- @pxt.uda(update_types=[IntType()], value_type=IntType())
39
- class init_exc(pxt.Aggregator):
40
- def __init__(self):
41
- self.sum = 1 / 0
42
- def update(self, val):
43
- pass
44
- def value(self):
45
- return 1
46
-
47
- # error in agg.update()
48
- @pxt.uda(update_types=[IntType()], value_type=IntType())
49
- class update_exc(pxt.Aggregator):
50
- def __init__(self):
51
- self.sum = 0
52
- def update(self, val):
53
- self.sum += 1 / val
54
- def value(self):
55
- return 1
56
-
57
- # error in agg.value()
58
- @pxt.uda(update_types=[IntType()], value_type=IntType())
59
- class value_exc(pxt.Aggregator):
60
- def __init__(self):
61
- self.sum = 0
62
- def update(self, val):
63
- self.sum += val
64
- def value(self):
65
- return 1 / self.sum
66
-
67
- def test_basic(self, test_tbl: catalog.Table) -> None:
68
- t = test_tbl
69
- assert t['c1'].equals(t.c1)
70
- assert t['c7']['*'].f5.equals(t.c7['*'].f5)
71
-
72
- assert isinstance(t.c1 == None, Expr)
73
- assert isinstance(t.c1 < 'a', Expr)
74
- assert isinstance(t.c1 <= 'a', Expr)
75
- assert isinstance(t.c1 == 'a', Expr)
76
- assert isinstance(t.c1 != 'a', Expr)
77
- assert isinstance(t.c1 > 'a', Expr)
78
- assert isinstance(t.c1 >= 'a', Expr)
79
- assert isinstance((t.c1 == 'a') & (t.c2 < 5), Expr)
80
- assert isinstance((t.c1 == 'a') | (t.c2 < 5), Expr)
81
- assert isinstance(~(t.c1 == 'a'), Expr)
82
- with pytest.raises(AttributeError) as excinfo:
83
- _ = t.does_not_exist
84
- assert 'unknown' in str(excinfo.value).lower()
85
-
86
- def test_compound_predicates(self, test_tbl: catalog.Table) -> None:
87
- t = test_tbl
88
- # compound predicates that can be fully evaluated in SQL
89
- _ = t.where((t.c1 == 'test string') & (t.c6.f1 > 50)).collect()
90
- _ = t.where((t.c1 == 'test string') & (t.c2 > 50)).collect()
91
- e = ((t.c1 == 'test string') & (t.c2 > 50)).sql_expr()
92
- assert len(e.clauses) == 2
93
-
94
- e = ((t.c1 == 'test string') & (t.c2 > 50) & (t.c3 < 1.0)).sql_expr()
95
- assert len(e.clauses) == 3
96
- e = ((t.c1 == 'test string') | (t.c2 > 50)).sql_expr()
97
- assert len(e.clauses) == 2
98
- e = ((t.c1 == 'test string') | (t.c2 > 50) | (t.c3 < 1.0)).sql_expr()
99
- assert len(e.clauses) == 3
100
- e = (~(t.c1 == 'test string')).sql_expr()
101
- assert isinstance(e, sql.sql.expression.BinaryExpression)
102
-
103
- with pytest.raises(TypeError) as exc_info:
104
- _ = t.where((t.c1 == 'test string') or (t.c6.f1 > 50)).collect()
105
- assert 'cannot be used in conjunction with python boolean operators' in str(exc_info.value).lower()
106
-
107
- # # compound predicates with Python functions
108
- # @pt.udf(return_type=BoolType(), param_types=[StringType()])
109
- # def udf(_: str) -> bool:
110
- # return True
111
- # @pt.udf(return_type=BoolType(), param_types=[IntType()])
112
- # def udf2(_: int) -> bool:
113
- # return True
114
-
115
- # TODO: find a way to test this
116
- # # & can be split
117
- # p = (t.c1 == 'test string') & udf(t.c1)
118
- # assert p.sql_expr() is None
119
- # sql_pred, other_pred = p.extract_sql_predicate()
120
- # assert isinstance(sql_pred, sql.sql.expression.BinaryExpression)
121
- # assert isinstance(other_pred, FunctionCall)
122
- #
123
- # p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50)
124
- # assert p.sql_expr() is None
125
- # sql_pred, other_pred = p.extract_sql_predicate()
126
- # assert len(sql_pred.clauses) == 2
127
- # assert isinstance(other_pred, FunctionCall)
128
- #
129
- # p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50) & udf2(t.c2)
130
- # assert p.sql_expr() is None
131
- # sql_pred, other_pred = p.extract_sql_predicate()
132
- # assert len(sql_pred.clauses) == 2
133
- # assert isinstance(other_pred, CompoundPredicate)
134
- #
135
- # # | cannot be split
136
- # p = (t.c1 == 'test string') | udf(t.c1)
137
- # assert p.sql_expr() is None
138
- # sql_pred, other_pred = p.extract_sql_predicate()
139
- # assert sql_pred is None
140
- # assert isinstance(other_pred, CompoundPredicate)
141
-
142
- def test_filters(self, test_tbl: catalog.Table) -> None:
143
- t = test_tbl
144
- _ = t[t.c1 == 'test string'].show()
145
- print(_)
146
- _ = t[t.c2 > 50].show()
147
- print(_)
148
- _ = t[t.c1n == None].show()
149
- print(_)
150
- _ = t[t.c1n != None].show(0)
151
- print(_)
152
-
153
- def test_exception_handling(self, test_tbl: catalog.Table) -> None:
154
- t = test_tbl
155
-
156
- # error in expr that's handled in SQL
157
- with pytest.raises(excs.Error):
158
- _ = t[(t.c2 + 1) / t.c2].show()
159
-
160
- # error in expr that's handled in Python
161
- with pytest.raises(excs.Error):
162
- _ = t[(t.c6.f2 + 1) / (t.c2 - 10)].show()
163
-
164
- # the same, but with an inline function
165
- with pytest.raises(excs.Error):
166
- _ = t[self.div_0_error(t.c2 + 1, t.c2)].show()
167
-
168
- # error in agg.init()
169
- with pytest.raises(excs.Error) as exc_info:
170
- _ = t[self.init_exc(t.c2)].show()
171
- assert 'division by zero' in str(exc_info.value)
172
-
173
- # error in agg.update()
174
- with pytest.raises(excs.Error):
175
- _ = t[self.update_exc(t.c2 - 10)].show()
176
-
177
- # error in agg.value()
178
- with pytest.raises(excs.Error):
179
- _ = t[t.c2 <= 2][self.value_exc(t.c2 - 1)].show()
180
-
181
- def test_props(self, test_tbl: catalog.Table, img_tbl: catalog.Table) -> None:
182
- t = test_tbl
183
- # errortype/-msg for computed column
184
- res = t.select(error=t.c8.errortype).collect()
185
- assert res.to_pandas()['error'].isna().all()
186
- res = t.select(error=t.c8.errormsg).collect()
187
- assert res.to_pandas()['error'].isna().all()
188
-
189
- img_t = img_tbl
190
- # fileurl
191
- res = img_t.select(img_t.img.fileurl).show(0).to_pandas()
192
- stored_urls = set(res.iloc[:, 0])
193
- assert len(stored_urls) == len(res)
194
- all_urls = set(urllib.parse.urljoin('file:', urllib.request.pathname2url(path)) for path in get_image_files())
195
- assert stored_urls <= all_urls
196
-
197
- # localpath
198
- res = img_t.select(img_t.img.localpath).show(0).to_pandas()
199
- stored_paths = set(res.iloc[:, 0])
200
- assert len(stored_paths) == len(res)
201
- all_paths = set(get_image_files())
202
- assert stored_paths <= all_paths
203
-
204
- # errortype/-msg for image column
205
- res = img_t.select(error=img_t.img.errortype).collect().to_pandas()
206
- assert res['error'].isna().all()
207
- res = img_t.select(error=img_t.img.errormsg).collect().to_pandas()
208
- assert res['error'].isna().all()
209
-
210
- for c in [t.c1, t.c1n, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7]:
211
- # errortype/errormsg only applies to stored computed and media columns
212
- with pytest.raises(excs.Error) as excinfo:
213
- _ = t.select(c.errortype).show()
214
- assert 'only valid for' in str(excinfo.value)
215
- with pytest.raises(excs.Error) as excinfo:
216
- _ = t.select(c.errormsg).show()
217
- assert 'only valid for' in str(excinfo.value)
218
-
219
- # fileurl/localpath only applies to media columns
220
- with pytest.raises(excs.Error) as excinfo:
221
- _ = t.select(t.c1.fileurl).show()
222
- assert 'only valid for' in str(excinfo.value)
223
- with pytest.raises(excs.Error) as excinfo:
224
- _ = t.select(t.c1.localpath).show()
225
- assert 'only valid for' in str(excinfo.value)
226
-
227
- # fileurl/localpath doesn't apply to unstored computed img columns
228
- img_t.add_column(c9=img_t.img.rotate(30))
229
- with pytest.raises(excs.Error) as excinfo:
230
- _ = img_t.select(img_t.c9.localpath).show()
231
- assert 'computed unstored' in str(excinfo.value)
232
-
233
- def test_null_args(self, test_client: pxt.Client) -> None:
234
- # create table with two int columns
235
- schema = {'c1': FloatType(nullable=True), 'c2': FloatType(nullable=True)}
236
- t = test_client.create_table('test', schema)
237
-
238
- # computed column that doesn't allow nulls
239
- t.add_column(c3=lambda c1, c2: c1 + c2, type=FloatType(nullable=False))
240
- t.add_column(c4=self.null_args_fn(t.c1, t.c2))
241
-
242
- # data that tests all combinations of nulls
243
- data = [{'c1': 1.0, 'c2': 1.0}, {'c1': 1.0, 'c2': None}, {'c1': None, 'c2': 1.0}, {'c1': None, 'c2': None}]
244
- status = t.insert(data, fail_on_exception=False)
245
- assert status.num_rows == len(data)
246
- assert status.num_excs == len(data) - 1
247
- result = t.select(t.c3, t.c4).collect()
248
- assert result['c3'] == [2.0, None, None, None]
249
- assert result['c4'] == [2.0, 1.0, None, None]
250
-
251
- def test_arithmetic_exprs(self, test_tbl: catalog.Table) -> None:
252
- t = test_tbl
253
-
254
- _ = t[t.c2, t.c6.f3, t.c2 + t.c6.f3, (t.c2 + t.c6.f3) / (t.c6.f3 + 1)].show()
255
- _ = t[t.c2 + t.c2].show()
256
- for op1, op2 in [(t.c2, t.c2), (t.c3, t.c3)]:
257
- _ = t[op1 + op2].show()
258
- _ = t[op1 - op2].show()
259
- _ = t[op1 * op2].show()
260
- _ = t[op1 > 0][op1 / op2].show()
261
-
262
- # non-numeric types
263
- for op1, op2 in [
264
- (t.c1, t.c2), (t.c1, 1), (t.c2, t.c1), (t.c2, 'a'),
265
- (t.c1, t.c3), (t.c1, 1.0), (t.c3, t.c1), (t.c3, 'a')
266
- ]:
267
- with pytest.raises(excs.Error):
268
- _ = t[op1 + op2]
269
- with pytest.raises(excs.Error):
270
- _ = t[op1 - op2]
271
- with pytest.raises(excs.Error):
272
- _ = t[op1 * op2]
273
- with pytest.raises(excs.Error):
274
- _ = t[op1 / op2]
275
-
276
- # TODO: test division; requires predicate
277
- for op1, op2 in [(t.c6.f2, t.c6.f2), (t.c6.f3, t.c6.f3)]:
278
- _ = t[op1 + op2].show()
279
- _ = t[op1 - op2].show()
280
- _ = t[op1 * op2].show()
281
- with pytest.raises(excs.Error):
282
- _ = t[op1 / op2].show()
283
-
284
- for op1, op2 in [
285
- (t.c6.f1, t.c6.f2), (t.c6.f1, t.c6.f3), (t.c6.f1, 1), (t.c6.f1, 1.0),
286
- (t.c6.f2, t.c6.f1), (t.c6.f3, t.c6.f1), (t.c6.f2, 'a'), (t.c6.f3, 'a'),
287
- ]:
288
- with pytest.raises(excs.Error):
289
- _ = t[op1 + op2].show()
290
- with pytest.raises(excs.Error):
291
- _ = t[op1 - op2].show()
292
- with pytest.raises(excs.Error):
293
- _ = t[op1 * op2].show()
294
-
295
-
296
- def test_inline_dict(self, test_tbl: catalog.Table) -> None:
297
- t = test_tbl
298
- df = t[[{'a': t.c1, 'b': {'c': t.c2}, 'd': 1, 'e': {'f': 2}}]]
299
- result = df.show()
300
- print(result)
301
-
302
- def test_inline_array(self, test_tbl: catalog.Table) -> None:
303
- t = test_tbl
304
- result = t.select([[t.c2, 1], [t.c2, 2]]).show()
305
- t = result.column_types()[0]
306
- assert t.is_array_type()
307
- assert isinstance(t, ArrayType)
308
- assert t.shape == (2, 2)
309
- assert t.dtype == ColumnType.Type.INT
310
-
311
- def test_json_mapper(self, test_tbl: catalog.Table) -> None:
312
- t = test_tbl
313
- # top-level is dict
314
- df = t[t.c6.f5['*'] >> (R + 1)]
315
- res = df.show()
316
- print(res)
317
- _ = t[t.c7['*'].f5 >> [R[3], R[2], R[1], R[0]]]
318
- _ = _.show()
319
- print(_)
320
- # target expr contains global-scope dependency
321
- df = t[
322
- t.c6.f5['*'] >> (R * t.c6.f5[1])
323
- ]
324
- res = df.show()
325
- print(res)
326
-
327
- def test_dicts(self, test_tbl: catalog.Table) -> None:
328
- t = test_tbl
329
- # top-level is dict
330
- _ = t[t.c6.f1]
331
- _ = _.show()
332
- print(_)
333
- # predicate on dict field
334
- _ = t[t.c6.f2 < 2].show()
335
- #_ = t[t.c6.f2].show()
336
- #_ = t[t.c6.f5].show()
337
- _ = t[t.c6.f6.f8].show()
338
- _ = t[cast(t.c6.f6.f8, ArrayType((4,), FloatType()))].show()
339
-
340
- # top-level is array
341
- #_ = t[t.c7['*'].f1].show()
342
- #_ = t[t.c7['*'].f2].show()
343
- #_ = t[t.c7['*'].f5].show()
344
- _ = t[t.c7['*'].f6.f8].show()
345
- _ = t[t.c7[0].f6.f8].show()
346
- _ = t[t.c7[:2].f6.f8].show()
347
- _ = t[t.c7[::-1].f6.f8].show()
348
- _ = t[cast(t.c7['*'].f6.f8, ArrayType((2, 4), FloatType()))].show()
349
- print(_)
350
-
351
- def test_arrays(self, test_tbl: catalog.Table) -> None:
352
- t = test_tbl
353
- t.add_column(array_col=[[t.c2, 1], [1, t.c2]])
354
- _ = t[t.array_col].show()
355
- print(_)
356
- _ = t[t.array_col[:, 0]].show()
357
- print(_)
358
-
359
- def test_astype(self, test_tbl: catalog.Table) -> None:
360
- t = test_tbl
361
- # Convert int to float
362
- status = t.add_column(c2_as_float=t.c2.astype(FloatType()))
363
- assert status.num_excs == 0
364
- data = t.select(t.c2, t.c2_as_float).collect()
365
- for row in data:
366
- assert isinstance(row['c2'], int)
367
- assert isinstance(row['c2_as_float'], float)
368
- assert row['c2'] == row['c2_as_float']
369
- # Compound expression
370
- status = t.add_column(compound_as_float=(t.c2 + 1).astype(FloatType()))
371
- assert status.num_excs == 0
372
- data = t.select(t.c2, t.compound_as_float).collect()
373
- for row in data:
374
- assert isinstance(row['compound_as_float'], float)
375
- assert row['c2'] + 1 == row['compound_as_float']
376
- # Type conversion error
377
- status = t.add_column(c2_as_string=t.c2.astype(StringType()))
378
- assert status.num_excs == t.count()
379
-
380
- def test_apply(self, test_tbl: catalog.Table) -> None:
381
-
382
- t = test_tbl
383
-
384
- # For each column c1, ..., c5, we create a new column ci_as_str that converts it to
385
- # a string, then check that each row is correctly converted
386
- # (For c1 this is the no-op string-to-string conversion)
387
- for col_id in range(1, 6):
388
- col_name = f'c{col_id}'
389
- str_col_name = f'c{col_id}_str'
390
- status = t.add_column(**{str_col_name: t[col_name].apply(str)})
391
- assert status.num_excs == 0
392
- data = t.select(t[col_name], t[str_col_name]).collect()
393
- for row in data:
394
- assert row[str_col_name] == str(row[col_name])
395
-
396
- # Test a compound expression with apply
397
- status = t.add_column(c2_plus_1_str=(t.c2 + 1).apply(str))
398
- assert status.num_excs == 0
399
- data = t.select(t.c2, t.c2_plus_1_str).collect()
400
- for row in data:
401
- assert row['c2_plus_1_str'] == str(row['c2'] + 1)
402
-
403
- # For columns c6, c7, try using json.dumps and json.loads to emit and parse JSON <-> str
404
- for col_id in range(6, 8):
405
- col_name = f'c{col_id}'
406
- str_col_name = f'c{col_id}_str'
407
- back_to_json_col_name = f'c{col_id}_back_to_json'
408
- status = t.add_column(**{str_col_name: t[col_name].apply(json.dumps)})
409
- assert status.num_excs == 0
410
- status = t.add_column(**{back_to_json_col_name: t[str_col_name].apply(json.loads)})
411
- assert status.num_excs == 0
412
- data = t.select(t[col_name], t[str_col_name], t[back_to_json_col_name]).collect()
413
- for row in data:
414
- assert row[str_col_name] == json.dumps(row[col_name])
415
- assert row[back_to_json_col_name] == row[col_name]
416
-
417
- def f1(x):
418
- return str(x)
419
-
420
- # Now test that a function without a return type throws an exception ...
421
- with pytest.raises(excs.Error) as exc_info:
422
- t.c2.apply(f1)
423
- assert 'Column type of `f1` cannot be inferred.' in str(exc_info.value)
424
-
425
- # ... but works if the type is specified explicitly.
426
- status = t.add_column(c2_str_f1=t.c2.apply(f1, col_type=StringType()))
427
- assert status.num_excs == 0
428
-
429
- # Test that the return type of a function can be successfully inferred.
430
- def f2(x) -> str:
431
- return str(x)
432
-
433
- status = t.add_column(c2_str_f2=t.c2.apply(f2))
434
- assert status.num_excs == 0
435
-
436
- # Test various validation failures.
437
-
438
- def f3(x, y) -> str:
439
- return f'{x}{y}'
440
-
441
- with pytest.raises(excs.Error) as exc_info:
442
- t.c2.apply(f3) # Too many required parameters
443
- assert str(exc_info.value) == 'Function `f3` has multiple required parameters.'
444
-
445
- def f4() -> str:
446
- return "pixeltable"
447
-
448
- with pytest.raises(excs.Error) as exc_info:
449
- t.c2.apply(f4) # No positional parameters
450
- assert str(exc_info.value) == 'Function `f4` has no positional parameters.'
451
-
452
- def f5(**kwargs) -> str:
453
- return ""
454
-
455
- with pytest.raises(excs.Error) as exc_info:
456
- t.c2.apply(f5) # No positional parameters
457
- assert str(exc_info.value) == 'Function `f5` has no positional parameters.'
458
-
459
- # Ensure these varargs signatures are acceptable
460
-
461
- def f6(x, **kwargs) -> str:
462
- return x
463
-
464
- t.c2.apply(f6)
465
-
466
- def f7(x, *args) -> str:
467
- return x
468
-
469
- t.c2.apply(f7)
470
-
471
- def f8(*args) -> str:
472
- return ''
473
-
474
- t.c2.apply(f8)
475
-
476
- def test_select_list(self, img_tbl) -> None:
477
- t = img_tbl
478
- result = t[t.img].show(n=100)
479
- _ = result._repr_html_()
480
- df = t[[t.img, t.img.rotate(60)]]
481
- _ = df.show(n=100)._repr_html_()
482
-
483
- with pytest.raises(excs.Error):
484
- _ = t[t.img.rotate]
485
-
486
- def test_img_members(self, img_tbl) -> None:
487
- t = img_tbl
488
- # make sure the limit is applied in Python, not in the SELECT
489
- result = t[t.img.height > 200][t.img].show(n=3)
490
- assert len(result) == 3
491
- result = t[t.img.crop((10, 10, 60, 60))].show(n=100)
492
- result = t[t.img.crop((10, 10, 60, 60)).resize((100, 100))].show(n=100)
493
- result = t[t.img.crop((10, 10, 60, 60)).resize((100, 100)).convert('L')].show(n=100)
494
- result = t[t.img.getextrema()].show(n=100)
495
- result = t[t.img, t.img.height, t.img.rotate(90)].show(n=100)
496
- _ = result._repr_html_()
497
-
498
- def test_img_functions(self, img_tbl) -> None:
499
- skip_test_if_not_installed('nos')
500
- t = img_tbl
501
- from pixeltable.functions.pil.image import resize
502
- result = t[t.img.resize((224, 224))].show(0)
503
- result = t[resize(t.img, (224, 224))].show(0)
504
- result = t[blend(t.img, t.img.rotate(90), 0.5)].show(100)
505
- print(result)
506
- from pixeltable.functions.nos.image_embedding import openai_clip
507
- result = t[openai_clip(t.img.resize((224, 224)))].show(10)
508
- print(result)
509
- _ = result._repr_html_()
510
- _ = t.img.entropy() > 1
511
- _ = (t.img.entropy() > 1) & (t.split == 'train')
512
- _ = (t.img.entropy() > 1) & (t.split == 'train') & (t.split == 'val')
513
- _ = (t.split == 'train') & (t.img.entropy() > 1) & (t.split == 'val') & (t.img.entropy() < 0)
514
- _ = t[(t.split == 'train') & (t.category == 'n03445777')][t.img].show()
515
- print(_)
516
- result = t[t.img.width > 1].show()
517
- print(result)
518
- result = t[(t.split == 'val') & (t.img.entropy() > 1) & (t.category == 'n03445777')].show()
519
- print(result)
520
- result = t[
521
- (t.split == 'train') & (t.img.entropy() > 1) & (t.split == 'val') & (t.img.entropy() < 0)
522
- ][t.img, t.split].show()
523
- print(result)
524
-
525
- @pytest.mark.skip(reason='temporarily disabled')
526
- def test_similarity(self, small_img_tbl) -> None:
527
- skip_test_if_not_installed('nos')
528
- t = small_img_tbl
529
- _ = t.show(30)
530
- probe = t.select(t.img, t.category).show(1)
531
- img = probe[0, 0]
532
- result = t.where(t.img.nearest(img)).show(10)
533
- assert len(result) == 10
534
- # nearest() with one SQL predicate and one Python predicate
535
- result = t[t.img.nearest(img) & (t.category == probe[0, 1]) & (t.img.width > 1)].show(10)
536
- # TODO: figure out how to verify results
537
-
538
- with pytest.raises(excs.Error) as exc_info:
539
- _ = t[t.img.nearest(img)].order_by(t.category).show()
540
- assert 'cannot be used in conjunction with' in str(exc_info.value)
541
-
542
- result = t[t.img.nearest('musical instrument')].show(10)
543
- assert len(result) == 10
544
- # matches() with one SQL predicate and one Python predicate
545
- french_horn_category = 'n03394916'
546
- result = t[
547
- t.img.nearest('musical instrument') & (t.category == french_horn_category) & (t.img.width > 1)
548
- ].show(10)
549
-
550
- with pytest.raises(excs.Error) as exc_info:
551
- _ = t[t.img.nearest(5)].show()
552
- assert 'requires' in str(exc_info.value)
553
-
554
- # TODO: this doesn't work when combined with test_similarity(), for some reason the data table for img_tbl
555
- # doesn't get created; why?
556
- def test_similarity2(self, img_tbl: catalog.Table) -> None:
557
- t = img_tbl
558
- probe = t[t.img].show(1)
559
- img = probe[0, 0]
560
-
561
- with pytest.raises(excs.Error):
562
- _ = t[t.img.nearest(img)].show(10)
563
- with pytest.raises(excs.Error):
564
- _ = t[t.img.nearest('musical instrument')].show(10)
565
-
566
- def test_ids(
567
- self, test_tbl: catalog.Table, test_tbl_exprs: List[exprs.Expr],
568
- img_tbl: catalog.Table, img_tbl_exprs: List[exprs.Expr]
569
- ) -> None:
570
- d: Dict[int, exprs.Expr] = {}
571
- for e in test_tbl_exprs:
572
- assert e.id is not None
573
- d[e.id] = e
574
- for e in img_tbl_exprs:
575
- assert e.id is not None
576
- d[e.id] = e
577
- assert len(d) == len(test_tbl_exprs) + len(img_tbl_exprs)
578
-
579
- def test_serialization(
580
- self, test_tbl_exprs: List[exprs.Expr], img_tbl_exprs: List[exprs.Expr]
581
- ) -> None:
582
- """Test as_dict()/from_dict() (via serialize()/deserialize()) for all exprs."""
583
- for e in test_tbl_exprs:
584
- e_serialized = e.serialize()
585
- e_deserialized = Expr.deserialize(e_serialized)
586
- assert e.equals(e_deserialized)
587
-
588
- for e in img_tbl_exprs:
589
- e_serialized = e.serialize()
590
- e_deserialized = Expr.deserialize(e_serialized)
591
- assert e.equals(e_deserialized)
592
-
593
- def test_print(self, test_tbl_exprs: List[exprs.Expr], img_tbl_exprs: List[exprs.Expr]) -> None:
594
- _ = func.FunctionRegistry.get().module_fns
595
- for e in test_tbl_exprs:
596
- _ = str(e)
597
- print(_)
598
- for e in img_tbl_exprs:
599
- _ = str(e)
600
- print(_)
601
-
602
- def test_subexprs(self, img_tbl: catalog.Table) -> None:
603
- t = img_tbl
604
- e = t.img
605
- subexprs = [s for s in e.subexprs()]
606
- assert len(subexprs) == 1
607
- e = t.img.rotate(90).resize((224, 224))
608
- subexprs = [s for s in e.subexprs()]
609
- assert len(subexprs) == 4
610
- subexprs = [s for s in e.subexprs(expr_class=ColumnRef)]
611
- assert len(subexprs) == 1
612
- assert t.img.equals(subexprs[0])
613
-
614
- def test_window_fns(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
615
- cl = test_client
616
- t = test_tbl
617
- _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3)).show(100)
618
-
619
- # conflicting ordering requirements
620
- with pytest.raises(excs.Error):
621
- _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3), sum(t.c2, group_by=t.c3, order_by=t.c4)).show(100)
622
- with pytest.raises(excs.Error):
623
- _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3), sum(t.c2, group_by=t.c3, order_by=t.c4)).show(100)
624
-
625
- # backfill works
626
- t.add_column(c9=sum(t.c2, group_by=t.c4, order_by=t.c3))
627
- _ = t.c9.col.has_window_fn_call()
628
-
629
- # ordering conflict between frame extraction and window fn
630
- base_t = cl.create_table('videos', {'video': VideoType(), 'c2': IntType(nullable=False)})
631
- args = {'video': base_t.video, 'fps': 0}
632
- v = cl.create_view('frame_view', base_t, iterator_class=FrameIterator, iterator_args=args)
633
- # compatible ordering
634
- _ = v.select(v.frame, sum(v.frame_idx, group_by=base_t, order_by=v.pos)).show(100)
635
- with pytest.raises(excs.Error):
636
- # incompatible ordering
637
- _ = v.select(v.frame, sum(v.c2, order_by=base_t, group_by=v.pos)).show(100)
638
-
639
- schema = {
640
- 'c2': IntType(nullable=False),
641
- 'c3': FloatType(nullable=False),
642
- 'c4': BoolType(nullable=False),
643
- }
644
- new_t = cl.create_table('insert_test', schema=schema)
645
- new_t.add_column(c2_sum=sum(new_t.c2, group_by=new_t.c4, order_by=new_t.c3))
646
- rows = list(t.select(t.c2, t.c4, t.c3).collect())
647
- new_t.insert(rows)
648
- _ = new_t.show(0)
649
-
650
- def test_aggregates(self, test_tbl: catalog.Table) -> None:
651
- t = test_tbl
652
- _ = t[t.c2 % 2, sum(t.c2), count(t.c2), sum(t.c2) + count(t.c2), sum(t.c2) + (t.c2 % 2)]\
653
- .group_by(t.c2 % 2).show()
654
-
655
- # check that aggregates don't show up in the wrong places
656
- with pytest.raises(excs.Error):
657
- # aggregate in where clause
658
- _ = t[sum(t.c2) > 0][sum(t.c2)].group_by(t.c2 % 2).show()
659
- with pytest.raises(excs.Error):
660
- # aggregate in group_by clause
661
- _ = t[sum(t.c2)].group_by(sum(t.c2)).show()
662
- with pytest.raises(excs.Error):
663
- # mixing aggregates and non-aggregates
664
- _ = t[sum(t.c2) + t.c2].group_by(t.c2 % 2).show()
665
- with pytest.raises(excs.Error):
666
- # nested aggregates
667
- _ = t[sum(count(t.c2))].group_by(t.c2 % 2).show()
668
-
669
- @pxt.uda(
670
- init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
671
- allows_window=True, requires_order_by=False)
672
- class window_agg:
673
- def __init__(self, val: int = 0):
674
- self.val = val
675
- def update(self, ignore: int) -> None:
676
- pass
677
- def value(self) -> int:
678
- return self.val
679
-
680
- @pxt.uda(
681
- init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
682
- requires_order_by=True, allows_window=True)
683
- class ordered_agg:
684
- def __init__(self, val: int = 0):
685
- self.val = val
686
- def update(self, i: int) -> None:
687
- pass
688
- def value(self) -> int:
689
- return self.val
690
-
691
- @pxt.uda(
692
- init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
693
- requires_order_by=False, allows_window=False)
694
- class std_agg:
695
- def __init__(self, val: int = 0):
696
- self.val = val
697
- def update(self, i: int) -> None:
698
- pass
699
- def value(self) -> int:
700
- return self.val
701
-
702
- def test_udas(self, test_tbl: catalog.Table) -> None:
703
- t = test_tbl
704
- # init arg is passed along
705
- assert t.select(out=self.window_agg(t.c2, order_by=t.c2)).collect()[0]['out'] == 0
706
- assert t.select(out=self.window_agg(t.c2, val=1, order_by=t.c2)).collect()[0]['out'] == 1
707
-
708
- with pytest.raises(excs.Error) as exc_info:
709
- _ = t.select(self.window_agg(t.c2, val=t.c2, order_by=t.c2)).collect()
710
- assert 'needs to be a constant' in str(exc_info.value)
711
-
712
- with pytest.raises(excs.Error) as exc_info:
713
- # ordering expression not a pixeltable expr
714
- _ = t.select(self.ordered_agg(1, t.c2)).collect()
715
- assert 'but instead is a' in str(exc_info.value).lower()
716
-
717
- with pytest.raises(excs.Error) as exc_info:
718
- # explicit order_by
719
- _ = t.select(self.ordered_agg(t.c2, order_by=t.c2)).collect()
720
- assert 'order_by invalid' in str(exc_info.value).lower()
721
-
722
- with pytest.raises(excs.Error) as exc_info:
723
- # order_by for non-window function
724
- _ = t.select(self.std_agg(t.c2, order_by=t.c2)).collect()
725
- assert 'does not allow windows' in str(exc_info.value).lower()
726
-
727
- with pytest.raises(excs.Error) as exc_info:
728
- # group_by for non-window function
729
- _ = t.select(self.std_agg(t.c2, group_by=t.c4)).collect()
730
- assert 'group_by invalid' in str(exc_info.value).lower()
731
-
732
- with pytest.raises(excs.Error) as exc_info:
733
- # missing init type
734
- @pxt.uda(update_types=[IntType()], value_type=IntType())
735
- class WindowAgg:
736
- def __init__(self, val: int = 0):
737
- self.val = val
738
- def update(self, ignore: int) -> None:
739
- pass
740
- def value(self) -> int:
741
- return self.val
742
- assert 'init_types must be a list of' in str(exc_info.value)
743
-
744
- with pytest.raises(excs.Error) as exc_info:
745
- # missing update parameter
746
- @pxt.uda(init_types=[IntType()], update_types=[], value_type=IntType())
747
- class WindowAgg:
748
- def __init__(self, val: int = 0):
749
- self.val = val
750
- def update(self) -> None:
751
- pass
752
- def value(self) -> int:
753
- return self.val
754
- assert 'must have at least one parameter' in str(exc_info.value)
755
-
756
- with pytest.raises(excs.Error) as exc_info:
757
- # missing update type
758
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
759
- class WindowAgg:
760
- def __init__(self, val: int = 0):
761
- self.val = val
762
- def update(self, i1: int, i2: int) -> None:
763
- pass
764
- def value(self) -> int:
765
- return self.val
766
- assert 'update_types must be a list of' in str(exc_info.value)
767
-
768
- with pytest.raises(excs.Error) as exc_info:
769
- # duplicate parameter names
770
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
771
- class WindowAgg:
772
- def __init__(self, val: int = 0):
773
- self.val = val
774
- def update(self, val: int) -> None:
775
- pass
776
- def value(self) -> int:
777
- return self.val
778
- assert 'cannot have parameters with the same name: val' in str(exc_info.value)
779
-
780
- with pytest.raises(excs.Error) as exc_info:
781
- # reserved parameter name
782
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
783
- class WindowAgg:
784
- def __init__(self, val: int = 0):
785
- self.val = val
786
- def update(self, order_by: int) -> None:
787
- pass
788
- def value(self) -> int:
789
- return self.val
790
- assert 'order_by is reserved' in str(exc_info.value).lower()
791
-
792
- with pytest.raises(excs.Error) as exc_info:
793
- # reserved parameter name
794
- @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
795
- class WindowAgg:
796
- def __init__(self, val: int = 0):
797
- self.val = val
798
- def update(self, group_by: int) -> None:
799
- pass
800
- def value(self) -> int:
801
- return self.val
802
- assert 'group_by is reserved' in str(exc_info.value).lower()