pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.1.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.1.dist-info/METADATA +119 -0
  124. pixeltable-0.2.1.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -1,188 +1,744 @@
1
- import pytest
1
+ import datetime
2
2
  import math
3
+ import os
4
+ import random
5
+ from typing import List, Tuple
6
+
7
+ import PIL
8
+ import cv2
9
+ import numpy as np
10
+ import pandas as pd
11
+ import pytest
3
12
 
4
- import pixeltable as pt
5
- from pixeltable import exceptions as exc
13
+ import pixeltable as pxt
14
+ import pixeltable.functions as ptf
6
15
  from pixeltable import catalog
16
+ from pixeltable import exceptions as excs
17
+ from pixeltable.iterators import FrameIterator
18
+ from pixeltable.tests.utils import \
19
+ make_tbl, create_table_data, read_data_file, get_video_files, get_audio_files, get_image_files, get_documents, \
20
+ assert_resultset_eq
7
21
  from pixeltable.type_system import \
8
- StringType, IntType, FloatType, TimestampType, ImageType, VideoType, JsonType, BoolType
9
- from pixeltable.tests.utils import make_tbl, create_table_data, read_data_file, get_video_files, sum_uda
10
- from pixeltable.functions import make_video
11
- from pixeltable import utils
22
+ StringType, IntType, FloatType, TimestampType, ImageType, VideoType, JsonType, BoolType, ArrayType, AudioType, \
23
+ DocumentType
24
+ from pixeltable.utils.filecache import FileCache
25
+ from pixeltable.utils.media_store import MediaStore
12
26
 
13
27
 
14
28
  class TestTable:
15
- def test_create(self, test_db: catalog.Db) -> None:
16
- db = test_db
17
- db.create_dir('dir1')
18
- c1 = catalog.Column('c1', StringType(), nullable=False)
19
- c2 = catalog.Column('c2', IntType(), nullable=False)
20
- c3 = catalog.Column('c3', FloatType(), nullable=False)
21
- c4 = catalog.Column('c4', TimestampType(), nullable=False)
22
- schema = [c1, c2, c3, c4]
23
- _ = db.create_table('test', schema)
24
- _ = db.create_table('dir1.test', schema)
25
-
26
- with pytest.raises(exc.BadFormatError):
27
- _ = db.create_table('1test', schema)
28
- with pytest.raises(exc.BadFormatError):
29
+ # exc for a % 10 == 0
30
+ @pxt.udf(return_type=FloatType(), param_types=[IntType()])
31
+ def f1(a: int) -> float:
32
+ return a / (a % 10)
33
+
34
+ # exception for a == None; this should not get triggered
35
+ @pxt.udf(return_type=FloatType(), param_types=[FloatType()])
36
+ def f2(a: float) -> float:
37
+ return a + 1
38
+
39
+ @pxt.expr_udf(param_types=[IntType(nullable=False)])
40
+ def add1(a: int) -> int:
41
+ return a + 1
42
+
43
+ def test_create(self, test_client: pxt.Client) -> None:
44
+ cl = test_client
45
+ cl.create_dir('dir1')
46
+ schema = {
47
+ 'c1': StringType(nullable=False),
48
+ 'c2': IntType(nullable=False),
49
+ 'c3': FloatType(nullable=False),
50
+ 'c4': TimestampType(nullable=False),
51
+ }
52
+ tbl = cl.create_table('test', schema)
53
+ _ = cl.create_table('dir1.test', schema)
54
+
55
+ with pytest.raises(excs.Error):
56
+ _ = cl.create_table('1test', schema)
57
+ with pytest.raises(excs.Error):
29
58
  _ = catalog.Column('1c', StringType())
30
- with pytest.raises(exc.DuplicateNameError):
31
- _ = db.create_table('test2', [c1, c1])
32
- with pytest.raises(exc.DuplicateNameError):
33
- _ = db.create_table('test', schema)
34
- with pytest.raises(exc.DuplicateNameError):
35
- _ = db.create_table('test2', [c1, c1])
36
- with pytest.raises(exc.UnknownEntityError):
37
- _ = db.create_table('dir2.test2', schema)
38
-
39
- _ = db.list_tables()
40
- _ = db.list_tables('dir1')
41
-
42
- with pytest.raises(exc.BadFormatError):
43
- _ = db.list_tables('1dir')
44
- with pytest.raises(exc.UnknownEntityError):
45
- _ = db.list_tables('dir2')
59
+ with pytest.raises(excs.Error):
60
+ _ = cl.create_table('test', schema)
61
+ with pytest.raises(excs.Error):
62
+ _ = cl.create_table('dir2.test2', schema)
63
+
64
+ _ = cl.list_tables()
65
+ _ = cl.list_tables('dir1')
66
+
67
+ with pytest.raises(excs.Error):
68
+ _ = cl.list_tables('1dir')
69
+ with pytest.raises(excs.Error):
70
+ _ = cl.list_tables('dir2')
46
71
 
47
72
  # test loading with new client
48
- cl2 = pt.Client()
49
- db = cl2.get_db('test')
73
+ cl = pxt.Client(reload=True)
50
74
 
51
- tbl = db.get_table('test')
52
- assert isinstance(tbl, catalog.MutableTable)
53
- tbl.add_column(catalog.Column('c5', IntType()))
75
+ tbl = cl.get_table('test')
76
+ assert isinstance(tbl, catalog.InsertableTable)
77
+ tbl.add_column(c5=IntType())
54
78
  tbl.drop_column('c1')
55
79
  tbl.rename_column('c2', 'c17')
56
80
 
57
- db.rename_table('test', 'test2')
81
+ cl.move('test', 'test2')
82
+
83
+ cl.drop_table('test2')
84
+ cl.drop_table('dir1.test')
85
+
86
+ with pytest.raises(excs.Error):
87
+ cl.drop_table('test')
88
+ with pytest.raises(excs.Error):
89
+ cl.drop_table('dir1.test2')
90
+ with pytest.raises(excs.Error):
91
+ cl.drop_table('.test2')
92
+
93
+ def test_empty_table(self, test_client: pxt.Client) -> None:
94
+ cl = test_client
95
+ with pytest.raises(excs.Error) as exc_info:
96
+ cl.create_table('empty_table', {})
97
+ assert 'Table schema is empty' in str(exc_info.value)
98
+
99
+ def test_table_attrs(self, test_client: pxt.Client) -> None:
100
+ cl = test_client
101
+ schema = {'c': StringType(nullable=False)}
102
+ num_retained_versions = 20
103
+ comment = "This is a table."
104
+ tbl = cl.create_table('test_table_attrs', schema, num_retained_versions=num_retained_versions, comment=comment)
105
+ assert tbl.num_retained_versions == num_retained_versions
106
+ assert tbl.comment == comment
107
+ new_num_retained_versions = 30
108
+ new_comment = "This is an updated table."
109
+ tbl.num_retained_versions = new_num_retained_versions
110
+ assert tbl.num_retained_versions == new_num_retained_versions
111
+ tbl.comment = new_comment
112
+ assert tbl.comment == new_comment
113
+ tbl.revert()
114
+ assert tbl.comment == comment
115
+ tbl.revert()
116
+ assert tbl.num_retained_versions == num_retained_versions
117
+
118
+ def test_image_table(self, test_client: pxt.Client) -> None:
119
+ n_sample_rows = 20
120
+ cl = test_client
121
+ schema = {
122
+ 'img': ImageType(nullable=False),
123
+ 'category': StringType(nullable=False),
124
+ 'split': StringType(nullable=False),
125
+ 'img_literal': ImageType(nullable=False),
126
+ }
127
+ tbl = cl.create_table('test', schema)
128
+ assert(MediaStore.count(tbl.get_id()) == 0)
129
+
130
+ rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
131
+ sample_rows = random.sample(rows, n_sample_rows)
132
+
133
+ # add literal image data and column
134
+ for r in rows:
135
+ with open(r['img'], 'rb') as f:
136
+ r['img_literal'] = f.read()
137
+
138
+ tbl.insert(sample_rows)
139
+ assert(MediaStore.count(tbl.get_id()) == n_sample_rows)
140
+
141
+ # compare img and img_literal
142
+ # TODO: make tbl.select(tbl.img == tbl.img_literal) work
143
+ tdf = tbl.select(tbl.img, tbl.img_literal).show()
144
+ pdf = tdf.to_pandas()
145
+ for tup in pdf.itertuples():
146
+ assert tup.img == tup.img_literal
147
+
148
+ # Test adding stored image transformation
149
+ tbl.add_column(rotated=tbl.img.rotate(30), stored=True)
150
+ assert(MediaStore.count(tbl.get_id()) == 2 * n_sample_rows)
151
+
152
+ # Test MediaStore.stats()
153
+ stats = list(filter(lambda x: x[0] == tbl.get_id(), MediaStore.stats()))
154
+ assert len(stats) == 2 # Two columns
155
+ assert stats[0][2] == n_sample_rows # Each column has n_sample_rows associated images
156
+ assert stats[1][2] == n_sample_rows
157
+
158
+ # Test that version-specific images are cleared when table is reverted
159
+ tbl.revert()
160
+ assert(MediaStore.count(tbl.get_id()) == n_sample_rows)
161
+
162
+ # Test that all stored images are cleared when table is dropped
163
+ cl.drop_table('test')
164
+ assert(MediaStore.count(tbl.get_id()) == 0)
165
+
166
+ def test_schema_spec(self, test_client: pxt.Client) -> None:
167
+ cl = test_client
168
+
169
+ with pytest.raises(excs.Error) as exc_info:
170
+ cl.create_table('test', {'c 1': IntType()})
171
+ assert 'invalid column name' in str(exc_info.value).lower()
172
+
173
+ with pytest.raises(excs.Error) as exc_info:
174
+ cl.create_table('test', {'c1': {}})
175
+ assert '"type" is required' in str(exc_info.value)
176
+
177
+ with pytest.raises(excs.Error) as exc_info:
178
+ cl.create_table('test', {'c1': {'xyz': IntType()}})
179
+ assert "invalid key 'xyz'" in str(exc_info.value)
180
+
181
+ with pytest.raises(excs.Error) as exc_info:
182
+ cl.create_table('test', {'c1': {'stored': True}})
183
+ assert '"type" is required' in str(exc_info.value)
184
+
185
+ with pytest.raises(excs.Error) as exc_info:
186
+ cl.create_table('test', {'c1': {'type': 'string'}})
187
+ assert 'must be a ColumnType' in str(exc_info.value)
188
+
189
+ with pytest.raises(excs.Error) as exc_info:
190
+ cl.create_table('test', {'c1': {'value': 1, 'type': StringType()}})
191
+ assert '"type" is redundant' in str(exc_info.value)
192
+
193
+ with pytest.raises(excs.Error) as exc_info:
194
+ cl.create_table('test', {'c1': {'value': pytest}})
195
+ assert 'value needs to be either' in str(exc_info.value)
196
+
197
+ with pytest.raises(excs.Error) as exc_info:
198
+ def f() -> float:
199
+ return 1.0
200
+ cl.create_table('test', {'c1': {'value': f}})
201
+ assert '"type" is required' in str(exc_info.value)
202
+
203
+ with pytest.raises(excs.Error) as exc_info:
204
+ cl.create_table('test', {'c1': {'type': StringType(), 'stored': 'true'}})
205
+ assert '"stored" must be a bool' in str(exc_info.value)
206
+
207
+ with pytest.raises(excs.Error) as exc_info:
208
+ cl.create_table('test', {'c1': {'type': StringType(), 'indexed': 'true'}})
209
+ assert '"indexed" must be a bool' in str(exc_info.value)
210
+
211
+ with pytest.raises(excs.Error) as exc_info:
212
+ cl.create_table('test', {'c1': StringType()}, primary_key='c2')
213
+ assert 'primary key column c2 not found' in str(exc_info.value).lower()
214
+
215
+ with pytest.raises(excs.Error) as exc_info:
216
+ cl.create_table('test', {'c1': StringType()}, primary_key=['c1', 'c2'])
217
+ assert 'primary key column c2 not found' in str(exc_info.value).lower()
218
+
219
+ with pytest.raises(excs.Error) as exc_info:
220
+ cl.create_table('test', {'c1': StringType()}, primary_key=['c2'])
221
+ assert 'primary key column c2 not found' in str(exc_info.value).lower()
222
+
223
+ with pytest.raises(excs.Error) as exc_info:
224
+ cl.create_table('test', {'c1': StringType()}, primary_key=0)
225
+ assert 'primary_key must be a' in str(exc_info.value).lower()
226
+
227
+ with pytest.raises(excs.Error) as exc_info:
228
+ cl.create_table('test', {'c1': StringType(nullable=True)}, primary_key='c1')
229
+ assert 'cannot be nullable' in str(exc_info.value).lower()
230
+
231
+ def check_bad_media(
232
+ self, test_client: pxt.Client, rows: List[Tuple[str, bool]], col_type: pxt.ColumnType,
233
+ validate_local_path: bool = True
234
+ ) -> None:
235
+ schema = {
236
+ 'media': col_type,
237
+ 'is_bad_media': BoolType(nullable=False),
238
+ }
239
+ tbl = test_client.create_table('test', schema)
240
+
241
+ assert len(rows) > 0
242
+ total_bad_rows = sum([int(row['is_bad_media']) for row in rows])
243
+ assert total_bad_rows > 0
244
+
245
+ # Mode 1: Validation error on bad input (default)
246
+ # we ignore the exact error here, because it depends on the media type
247
+ with pytest.raises(excs.Error):
248
+ tbl.insert(rows, fail_on_exception=True)
58
249
 
59
- db.drop_table('test2')
60
- db.drop_table('dir1.test')
250
+ # Mode 2: ignore_errors=True, store error information in table
251
+ status = tbl.insert(rows, fail_on_exception=False)
252
+ _ = tbl.select(tbl.media, tbl.media.errormsg).show()
253
+ assert status.num_rows == len(rows)
254
+ assert status.num_excs == total_bad_rows
61
255
 
62
- with pytest.raises(exc.UnknownEntityError):
63
- db.drop_table('test')
64
- with pytest.raises(exc.UnknownEntityError):
65
- db.drop_table('dir1.test2')
66
- with pytest.raises(exc.BadFormatError):
67
- db.drop_table('.test2')
256
+ # check that we have the right number of bad and good rows
257
+ assert tbl.where(tbl.is_bad_media == True).count() == total_bad_rows
258
+ assert tbl.where(tbl.is_bad_media == False).count() == len(rows) - total_bad_rows
259
+
260
+ # check error type is set correctly
261
+ assert tbl.where((tbl.is_bad_media == True) & (tbl.media.errortype == None)).count() == 0
262
+ assert tbl.where((tbl.is_bad_media == False) & (tbl.media.errortype == None)).count() \
263
+ == len(rows) - total_bad_rows
264
+
265
+ # check fileurl is set for valid images, and check no file url is set for bad images
266
+ assert tbl.where((tbl.is_bad_media == False) & (tbl.media.fileurl == None)).count() == 0
267
+ assert tbl.where((tbl.is_bad_media == True) & (tbl.media.fileurl != None)).count() == 0
268
+
269
+ if validate_local_path:
270
+ # check that tbl.media is a valid local path
271
+ paths = tbl.where(tbl.media != None).select(output=tbl.media).collect()['output']
272
+ for path in paths:
273
+ assert os.path.exists(path) and os.path.isfile(path)
274
+
275
+ def test_validate_image(self, test_client: pxt.Client) -> None:
276
+ rows = read_data_file('imagenette2-160', 'manifest_bad.csv', ['img'])
277
+ rows = [{'media': r['img'], 'is_bad_media': r['is_bad_image']} for r in rows]
278
+ self.check_bad_media(test_client, rows, ImageType(nullable=True), validate_local_path=False)
279
+
280
+ def test_validate_video(self, test_client: pxt.Client) -> None:
281
+ files = get_video_files(include_bad_video=True)
282
+ rows = [{'media': f, 'is_bad_media': f.endswith('bad_video.mp4')} for f in files]
283
+ self.check_bad_media(test_client, rows, VideoType(nullable=True))
284
+
285
+ def test_validate_audio(self, test_client: pxt.Client) -> None:
286
+ files = get_audio_files(include_bad_audio=True)
287
+ rows = [{'media': f, 'is_bad_media': f.endswith('bad_audio.mp3')} for f in files]
288
+ self.check_bad_media(test_client, rows, AudioType(nullable=True))
289
+
290
+ def test_validate_docs(self, test_client: pxt.Client) -> None:
291
+ valid_doc_paths = get_documents()
292
+ invalid_doc_paths = [get_video_files()[0], get_audio_files()[0], get_image_files()[0]]
293
+ doc_paths = valid_doc_paths + invalid_doc_paths
294
+ is_valid = [True] * len(valid_doc_paths) + [False] * len(invalid_doc_paths)
295
+ rows = [{'media': f, 'is_bad_media': not is_valid} for f, is_valid in zip(doc_paths, is_valid)]
296
+ self.check_bad_media(test_client, rows, DocumentType(nullable=True))
297
+
298
+ def test_validate_external_url(self, test_client: pxt.Client) -> None:
299
+ rows = [
300
+ {'media': 's3://open-images-dataset/validation/doesnotexist.jpg', 'is_bad_media': True},
301
+ {'media': 'https://archive.random.org/download?file=2024-01-28.bin', 'is_bad_media': True}, # 403 error
302
+ {'media': 's3://open-images-dataset/validation/3c02ca9ec9b2b77b.jpg', 'is_bad_media': True}, # wrong media
303
+ # test s3 url
304
+ {
305
+ 'media': 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4',
306
+ 'is_bad_media': False
307
+ },
308
+ # test http url
309
+ {
310
+ 'media': 'https://github.com/pixeltable/pixeltable/raw/master/pixeltable/tests/data/videos/bangkok.mp4',
311
+ 'is_bad_media': False
312
+ },
68
313
 
69
- def test_create_images(self, test_db: catalog.Db) -> None:
70
- db = test_db
71
- cols = [
72
- catalog.Column('img', ImageType(), nullable=False),
73
- catalog.Column('category', StringType(), nullable=False),
74
- catalog.Column('split', StringType(), nullable=False),
75
314
  ]
76
- tbl = db.create_table('test', cols)
77
- df = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
78
- # TODO: insert a random subset
79
- tbl.insert_pandas(df[:20])
80
- html_str = tbl.show(n=100)._repr_html_()
81
- print(html_str)
82
- # TODO: check html_str
83
-
84
- def test_create_video(self, test_db: catalog.Db) -> None:
85
- db = test_db
86
- cols = [
87
- catalog.Column('video', VideoType(), nullable=False),
88
- catalog.Column('frame', ImageType(), nullable=False),
89
- catalog.Column('frame_idx', IntType(), nullable=False),
315
+ self.check_bad_media(test_client, rows, VideoType(nullable=True))
316
+
317
+ def test_create_s3_image_table(self, test_client: pxt.Client) -> None:
318
+ cl = test_client
319
+ tbl = cl.create_table('test', {'img': ImageType(nullable=False)})
320
+ # this is needed because Client.reset_catalog() doesn't call TableVersion.drop(), which would
321
+ # clear the file cache
322
+ # TODO: change reset_catalog() to drop tables
323
+ FileCache.get().clear()
324
+ cache_stats = FileCache.get().stats()
325
+ assert cache_stats.num_requests == 0, f'{str(cache_stats)} tbl_id={tbl.get_id()}'
326
+ # add computed column to make sure that external files are cached locally during insert
327
+ tbl.add_column(rotated=tbl.img.rotate(30), stored=True)
328
+ urls = [
329
+ 's3://open-images-dataset/validation/3c02ca9ec9b2b77b.jpg',
330
+ 's3://open-images-dataset/validation/3c13e0015b6c3bcf.jpg',
331
+ 's3://open-images-dataset/validation/3ba5380490084697.jpg',
332
+ 's3://open-images-dataset/validation/3afeb4b34f90c0cf.jpg',
333
+ 's3://open-images-dataset/validation/3b07a2c0d5c0c789.jpg',
90
334
  ]
91
- tbl = db.create_table(
92
- 'test', cols, extract_frames_from='video', extracted_frame_col='frame',
93
- extracted_frame_idx_col='frame_idx', extracted_fps=0)
94
- params = tbl.parameters
335
+
336
+ tbl.insert({'img': url} for url in urls)
337
+ # check that we populated the cache
338
+ cache_stats = FileCache.get().stats()
339
+ assert cache_stats.num_requests == len(urls), f'{str(cache_stats)} tbl_id={tbl.get_id()}'
340
+ assert cache_stats.num_hits == 0
341
+ assert FileCache.get().num_files() == len(urls)
342
+ assert FileCache.get().num_files(tbl.get_id()) == len(urls)
343
+ assert FileCache.get().avg_file_size() > 0
344
+
345
+ # query: we read from the cache
346
+ _ = tbl.show(0)
347
+ cache_stats = FileCache.get().stats()
348
+ assert cache_stats.num_requests == 2 * len(urls)
349
+ assert cache_stats.num_hits == len(urls)
350
+
351
+ # after clearing the cache, we need to re-fetch the files
352
+ FileCache.get().clear()
353
+ _ = tbl.show(0)
354
+ cache_stats = FileCache.get().stats()
355
+ assert cache_stats.num_requests == len(urls)
356
+ assert cache_stats.num_hits == 0
357
+
358
+ # start with fresh client and FileCache instance to test FileCache initialization with pre-existing files
359
+ cl = pxt.Client(reload=True)
360
+ # is there a better way to do this?
361
+ FileCache._instance = None
362
+ t = cl.get_table('test')
363
+ _ = t.show(0)
364
+ cache_stats = FileCache.get().stats()
365
+ assert cache_stats.num_requests == len(urls)
366
+ assert cache_stats.num_hits == len(urls)
367
+
368
+ # dropping the table also clears the file cache
369
+ cl.drop_table('test')
370
+ cache_stats = FileCache.get().stats()
371
+ assert cache_stats.total_size == 0
372
+
373
+ def test_video_url(self, test_client: pxt.Client) -> None:
374
+ cl = test_client
375
+ schema = {
376
+ 'payload': IntType(nullable=False),
377
+ 'video': VideoType(nullable=False),
378
+ }
379
+ tbl = cl.create_table('test', schema)
380
+ url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
381
+ tbl.insert(payload=1, video=url)
382
+ row = tbl.select(tbl.video.fileurl, tbl.video.localpath).collect()[0]
383
+ assert row['video_fileurl'] == url
384
+ # row[1] contains valid path to an mp4 file
385
+ local_path = row['video_localpath']
386
+ assert os.path.exists(local_path) and os.path.isfile(local_path)
387
+ cap = cv2.VideoCapture(local_path)
388
+ # TODO: this isn't sufficient to determine that this is actually a video, rather than an image
389
+ assert cap.isOpened()
390
+ cap.release()
391
+
392
+ def test_create_video_table(self, test_client: pxt.Client) -> None:
393
+ cl = test_client
394
+ tbl = cl.create_table(
395
+ 'test_tbl',
396
+ {'payload': IntType(nullable=False), 'video': VideoType(nullable=True)})
397
+ args = {'video': tbl.video, 'fps': 0}
398
+ view = cl.create_view('test_view', tbl, iterator_class=FrameIterator, iterator_args=args)
399
+ view.add_column(c1=view.frame.rotate(30), stored=True)
400
+ view.add_column(c2=view.c1.rotate(40), stored=False)
401
+ view.add_column(c3=view.c2.rotate(50), stored=True)
402
+ # a non-materialized column that refers to another non-materialized column
403
+ view.add_column(c4=view.c2.rotate(60), stored=False)
404
+
405
+ @pxt.uda(
406
+ name='window_fn', update_types=[IntType()], value_type=IntType(), requires_order_by = True,
407
+ allows_window = True)
408
+ class WindowFnAggregator:
409
+ def __init__(self):
410
+ pass
411
+ def update(self, i: int) -> None:
412
+ pass
413
+ def value(self) -> int:
414
+ return 1
415
+ # cols computed with window functions are stored by default
416
+ view.add_column(c5=window_fn(view.frame_idx, 1, group_by=view.video))
417
+
95
418
  # reload to make sure that metadata gets restored correctly
96
- cl = pt.Client()
97
- db = cl.get_db('test')
98
- tbl = db.get_table('test')
99
- assert tbl.parameters == params
100
- tbl.insert_rows([[get_video_files()[0]]], ['video'])
101
- html_str = tbl.show(n=100)._repr_html_()
102
- # TODO: check html_str
103
- _ = tbl[make_video(tbl.frame_idx, tbl.frame)].group_by(tbl.video).show()
104
-
105
- with pytest.raises(exc.Error):
419
+ cl = pxt.Client(reload=True)
420
+ tbl = cl.get_table('test_tbl')
421
+ view = cl.get_table('test_view')
422
+ # we're inserting only a single row and the video column is not in position 0
423
+ url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
424
+ status = tbl.insert(payload=1, video=url)
425
+ assert status.num_excs == 0
426
+ # * 2: we have 2 stored img cols
427
+ assert MediaStore.count(view.get_id()) == view.count() * 2
428
+ # also insert a local file
429
+ tbl.insert(payload=1, video=get_video_files()[0])
430
+ assert MediaStore.count(view.get_id()) == view.count() * 2
431
+
432
+ # TODO: test inserting Nulls
433
+ #status = tbl.insert(payload=1, video=None)
434
+ #assert status.num_excs == 0
435
+
436
+ # revert() clears stored images
437
+ tbl.revert()
438
+ tbl.revert()
439
+ assert MediaStore.count(view.get_id()) == 0
440
+
441
+ with pytest.raises(excs.Error):
106
442
  # can't drop frame col
107
- tbl.drop_column('frame')
108
- with pytest.raises(exc.Error):
443
+ view.drop_column('frame')
444
+ with pytest.raises(excs.Error):
109
445
  # can't drop frame_idx col
110
- tbl.drop_column('frame_idx')
111
- with pytest.raises(exc.BadFormatError):
112
- # missing parameters
113
- _ = db.create_table(
114
- 'exc', cols, extract_frames_from='video',
115
- extracted_frame_idx_col='frame_idx', extracted_fps=0)
116
- with pytest.raises(exc.BadFormatError):
117
- # wrong column type
118
- _ = db.create_table(
119
- 'exc', cols, extract_frames_from='frame', extracted_frame_col='frame',
120
- extracted_frame_idx_col='frame_idx', extracted_fps=0)
121
- with pytest.raises(exc.BadFormatError):
122
- # wrong column type
123
- _ = db.create_table(
124
- 'exc', cols, extract_frames_from='video', extracted_frame_col='frame_idx',
125
- extracted_frame_idx_col='frame_idx', extracted_fps=0)
126
- with pytest.raises(exc.BadFormatError):
127
- # wrong column type
128
- _ = db.create_table(
129
- 'exc', cols, extract_frames_from='video', extracted_frame_col='frame',
130
- extracted_frame_idx_col='frame', extracted_fps=0)
131
- with pytest.raises(exc.BadFormatError):
132
- # unknown column
133
- _ = db.create_table(
134
- 'exc', cols, extract_frames_from='breaks', extracted_frame_col='frame',
135
- extracted_frame_idx_col='frame_idx', extracted_fps=0)
136
- with pytest.raises(exc.BadFormatError):
137
- # unknown column
138
- _ = db.create_table(
139
- 'exc', cols, extract_frames_from='video', extracted_frame_col='breaks',
140
- extracted_frame_idx_col='frame_idx', extracted_fps=0)
141
- with pytest.raises(exc.BadFormatError):
142
- # unknown column
143
- _ = db.create_table(
144
- 'exc', cols, extract_frames_from='video', extracted_frame_col='frame',
145
- extracted_frame_idx_col='breaks', extracted_fps=0)
146
-
147
- def test_insert(self, test_db: catalog.Db) -> None:
148
- db = test_db
149
- t1 = make_tbl(db, 'test1', ['c1', 'c2'])
150
- data1 = create_table_data(t1)
151
- t1.insert_pandas(data1)
152
- assert t1.count() == len(data1)
446
+ view.drop_column('frame_idx')
447
+
448
+ # drop() clears stored images and the cache
449
+ tbl.insert(payload=1, video=get_video_files()[0])
450
+ with pytest.raises(excs.Error) as exc_info:
451
+ cl.drop_table('test_tbl')
452
+ assert 'has dependents: test_view' in str(exc_info.value)
453
+ cl.drop_table('test_view')
454
+ cl.drop_table('test_tbl')
455
+ assert MediaStore.count(view.get_id()) == 0
456
+
457
+ def test_insert(self, test_client: pxt.Client) -> None:
458
+ cl = test_client
459
+ schema = {
460
+ 'c1': StringType(nullable=False),
461
+ 'c2': IntType(nullable=False),
462
+ 'c3': FloatType(nullable=False),
463
+ 'c4': BoolType(nullable=False),
464
+ 'c5': ArrayType((2, 3), dtype=IntType(), nullable=False),
465
+ 'c6': JsonType(nullable=False),
466
+ 'c7': ImageType(nullable=False),
467
+ 'c8': VideoType(nullable=False),
468
+ }
469
+ t = cl.create_table('test1', schema)
470
+ rows = create_table_data(t)
471
+ status = t.insert(rows)
472
+ assert status.num_rows == len(rows)
473
+ assert status.num_excs == 0
474
+
475
+ # alternate (kwargs) insert syntax
476
+ status = t.insert(
477
+ c1='string',
478
+ c2=91,
479
+ c3=1.0,
480
+ c4=True,
481
+ c5=np.ones((2, 3), dtype=np.dtype(np.int64)),
482
+ c6={'key': 'val'},
483
+ c7=get_image_files()[0],
484
+ c8=get_video_files()[0]
485
+ )
486
+ assert status.num_rows == 1
487
+ assert status.num_excs == 0
488
+
489
+ # empty input
490
+ with pytest.raises(excs.Error) as exc_info:
491
+ t.insert([])
492
+ assert 'empty' in str(exc_info.value)
493
+
494
+ # missing column
495
+ with pytest.raises(excs.Error) as exc_info:
496
+ # drop first column
497
+ col_names = list(rows[0].keys())[1:]
498
+ new_rows = [{col_name: row[col_name] for col_name in col_names} for row in rows]
499
+ t.insert(new_rows)
500
+ assert 'Missing' in str(exc_info.value)
153
501
 
154
502
  # incompatible schema
155
- t2 = make_tbl(db, 'test2', ['c2', 'c1'])
156
- t2_data = create_table_data(t2)
157
- with pytest.raises(exc.InsertError):
158
- t1.insert_pandas(t2_data)
159
-
160
- def test_query(self, test_db: catalog.Db) -> None:
161
- db = test_db
162
- t = make_tbl(db, 'test', ['c1', 'c2', 'c3', 'c4', 'c5'])
163
- t_data = create_table_data(t)
164
- t.insert_pandas(t_data)
503
+ for (col_name, col_type), value_col_name in zip(schema.items(), ['c2', 'c3', 'c5', 'c5', 'c6', 'c7', 'c2', 'c2']):
504
+ cl.drop_table('test1', ignore_errors=True)
505
+ t = cl.create_table('test1', {col_name: col_type})
506
+ with pytest.raises(excs.Error) as exc_info:
507
+ t.insert({col_name: r[value_col_name]} for r in rows)
508
+ assert 'expected' in str(exc_info.value).lower()
509
+
510
+ # rows not list of dicts
511
+ cl.drop_table('test1', ignore_errors=True)
512
+ t = cl.create_table('test1', {'c1': StringType()})
513
+ with pytest.raises(excs.Error) as exc_info:
514
+ t.insert(['1'])
515
+ assert 'list of dictionaries' in str(exc_info.value)
516
+
517
+ # bad null value
518
+ cl.drop_table('test1', ignore_errors=True)
519
+ t = cl.create_table('test1', {'c1': StringType(nullable=False)})
520
+ with pytest.raises(excs.Error) as exc_info:
521
+ t.insert(c1=None)
522
+ assert 'expected non-None' in str(exc_info.value)
523
+
524
+ # bad array literal
525
+ cl.drop_table('test1', ignore_errors=True)
526
+ t = cl.create_table('test1', {'c5': ArrayType((2, 3), dtype=IntType(), nullable=False)})
527
+ with pytest.raises(excs.Error) as exc_info:
528
+ t.insert(c5=np.ndarray((3, 2)))
529
+ assert 'expected ndarray((2, 3)' in str(exc_info.value)
530
+
531
+ def test_query(self, test_client: pxt.Client) -> None:
532
+ cl = test_client
533
+ col_names = ['c1', 'c2', 'c3', 'c4', 'c5']
534
+ t = make_tbl(cl, 'test', col_names)
535
+ rows = create_table_data(t)
536
+ t.insert(rows)
165
537
  _ = t.show(n=0)
166
538
 
167
539
  # test querying existing table
168
- cl2 = pt.Client()
169
- db2 = cl2.get_db('test')
170
- t2 = db2.get_table('test')
540
+ cl = pxt.Client(reload=True)
541
+ t2 = cl.get_table('test')
171
542
  _ = t2.show(n=0)
172
543
 
173
- def test_computed_cols(self, test_db: catalog.Db) -> None:
174
- db = test_db
175
- c1 = catalog.Column('c1', IntType(), nullable=False)
176
- c2 = catalog.Column('c2', FloatType(), nullable=False)
177
- c3 = catalog.Column('c3', JsonType(), nullable=False)
178
- schema = [c1, c2, c3]
179
- t = db.create_table('test', schema)
180
- t.add_column(catalog.Column('c4', computed_with=t.c1 + 1))
181
- t.add_column(catalog.Column('c5', computed_with=t.c4 + 1))
182
- t.add_column(catalog.Column('c6', computed_with=t.c1 / t.c2))
183
- t.add_column(catalog.Column('c7', computed_with=t.c6 * t.c2))
184
- t.add_column(catalog.Column('c8', computed_with=t.c3.detections['*'].bounding_box))
185
- t.add_column(catalog.Column('c9', FloatType(), computed_with=lambda c2: math.sqrt(c2)))
544
+ def test_update(self, test_tbl: pxt.Table, indexed_img_tbl: pxt.Table) -> None:
545
+ t = test_tbl
546
+ # update every type with a literal
547
+ test_cases = [
548
+ ('c1', 'new string'),
549
+ # TODO: ('c1n', None),
550
+ ('c3', -1.0),
551
+ ('c4', True),
552
+ ('c5', datetime.datetime.now()),
553
+ ('c6', [{'x': 1, 'y': 2}]),
554
+ ]
555
+ count = t.count()
556
+ for col_name, literal in test_cases:
557
+ status = t.update({col_name: literal}, where=t.c3 < 10.0, cascade=False)
558
+ assert status.num_rows == 10
559
+ assert status.updated_cols == [f'{t.get_name()}.{col_name}']
560
+ assert t.count() == count
561
+ t.revert()
562
+
563
+ # exchange two columns
564
+ t.add_column(float_col=FloatType(nullable=True))
565
+ t.update({'float_col': 1.0})
566
+ float_col_vals = t.select(t.float_col).collect().to_pandas()['float_col']
567
+ c3_vals = t.select(t.c3).collect().to_pandas()['c3']
568
+ assert np.all(float_col_vals == pd.Series([1.0] * t.count()))
569
+ t.update({'c3': t.float_col, 'float_col': t.c3})
570
+ assert np.all(t.select(t.c3).collect().to_pandas()['c3'] == float_col_vals)
571
+ assert np.all(t.select(t.float_col).collect().to_pandas()['float_col'] == c3_vals)
572
+ t.revert()
573
+
574
+ # update column that is used in computed cols
575
+ t.add_column(computed1=t.c3 + 1)
576
+ t.add_column(computed2=t.computed1 + 1)
577
+ t.add_column(computed3=t.c3 + 3)
578
+
579
+ # cascade=False
580
+ computed1 = t.order_by(t.computed1).show(0).to_pandas()['computed1']
581
+ computed2 = t.order_by(t.computed2).show(0).to_pandas()['computed2']
582
+ computed3 = t.order_by(t.computed3).show(0).to_pandas()['computed3']
583
+ assert t.where(t.c3 < 10.0).count() == 10
584
+ assert t.where(t.c3 == 10.0).count() == 1
585
+ # update to a value that also satisfies the where clause
586
+ status = t.update({'c3': 0.0}, where=t.c3 < 10.0, cascade=False)
587
+ assert status.num_rows == 10
588
+ assert status.updated_cols == ['test_tbl.c3']
589
+ assert t.where(t.c3 < 10.0).count() == 10
590
+ assert t.where(t.c3 == 0.0).count() == 10
591
+ # computed cols are not updated
592
+ assert np.all(t.order_by(t.computed1).show(0).to_pandas()['computed1'] == computed1)
593
+ assert np.all(t.order_by(t.computed2).show(0).to_pandas()['computed2'] == computed2)
594
+ assert np.all(t.order_by(t.computed3).show(0).to_pandas()['computed3'] == computed3)
595
+
596
+ # revert, then verify that we're back to where we started
597
+ cl = pxt.Client(reload=True)
598
+ t = cl.get_table(t.get_name())
599
+ t.revert()
600
+ assert t.where(t.c3 < 10.0).count() == 10
601
+ assert t.where(t.c3 == 10.0).count() == 1
602
+
603
+ # cascade=True
604
+ status = t.update({'c3': 0.0}, where=t.c3 < 10.0, cascade=True)
605
+ assert status.num_rows == 10
606
+ assert set(status.updated_cols) == \
607
+ set(['test_tbl.c3', 'test_tbl.computed1', 'test_tbl.computed2', 'test_tbl.computed3'])
608
+ assert t.where(t.c3 < 10.0).count() == 10
609
+ assert t.where(t.c3 == 0.0).count() == 10
610
+ assert np.all(t.order_by(t.computed1).show(0).to_pandas()['computed1'][:10] == pd.Series([1.0] * 10))
611
+ assert np.all(t.order_by(t.computed2).show(0).to_pandas()['computed2'][:10] == pd.Series([2.0] * 10))
612
+ assert np.all(t.order_by(t.computed3).show(0).to_pandas()['computed3'][:10] == pd.Series([3.0] * 10))
613
+
614
+ # bad update spec
615
+ with pytest.raises(excs.Error) as excinfo:
616
+ t.update({1: 1})
617
+ assert 'dict key' in str(excinfo.value)
618
+
619
+ # unknown column
620
+ with pytest.raises(excs.Error) as excinfo:
621
+ t.update({'unknown': 1})
622
+ assert 'unknown unknown' in str(excinfo.value)
623
+
624
+ # incompatible type
625
+ with pytest.raises(excs.Error) as excinfo:
626
+ t.update({'c1': 1})
627
+ assert 'not compatible' in str(excinfo.value)
628
+
629
+ # can't update primary key
630
+ with pytest.raises(excs.Error) as excinfo:
631
+ t.update({'c2': 1})
632
+ assert 'primary key' in str(excinfo.value)
633
+
634
+ # can't update computed column
635
+ with pytest.raises(excs.Error) as excinfo:
636
+ t.update({'computed1': 1})
637
+ assert 'is computed' in str(excinfo.value)
638
+
639
+ # non-expr
640
+ with pytest.raises(excs.Error) as excinfo:
641
+ t.update({'c3': lambda c3: math.sqrt(c3)})
642
+ assert 'not a recognized' in str(excinfo.value)
643
+
644
+ # non-Predicate filter
645
+ with pytest.raises(excs.Error) as excinfo:
646
+ t.update({'c3': 1.0}, where=lambda c2: c2 == 10)
647
+ assert 'Predicate' in str(excinfo.value)
648
+
649
+ img_t = indexed_img_tbl
650
+
651
+ # can't update image col
652
+ with pytest.raises(excs.Error) as excinfo:
653
+ img_t.update({'img': 17}, where=img_t.img.nearest('car'))
654
+ assert 'has type image' in str(excinfo.value)
655
+
656
+ # similarity search is not supported
657
+ with pytest.raises(excs.Error) as excinfo:
658
+ img_t.update({'split': 'train'}, where=img_t.img.nearest('car'))
659
+ assert 'nearest()' in str(excinfo.value)
660
+
661
+ # filter not expressible in SQL
662
+ with pytest.raises(excs.Error) as excinfo:
663
+ img_t.update({'split': 'train'}, where=img_t.img.width > 100)
664
+ assert 'not expressible' in str(excinfo.value)
665
+
666
+ def test_cascading_update(self, test_tbl: pxt.InsertableTable) -> None:
667
+ t = test_tbl
668
+ t.add_column(d1=t.c3 - 1)
669
+ # add column that can be updated
670
+ t.add_column(c10=FloatType(nullable=True))
671
+ t.update({'c10': t.c3})
672
+ # computed column that depends on two columns: exercise duplicate elimination during query construction
673
+ t.add_column(d2=t.c3 - t.c10)
674
+ r1 = t.where(t.c2 < 5).select(t.c3 + 1.0, t.c10 - 1.0, t.c3, 2.0).order_by(t.c2).show(0)
675
+ t.update({'c4': True, 'c3': t.c3 + 1.0, 'c10': t.c10 - 1.0}, where=t.c2 < 5, cascade=True)
676
+ r2 = t.where(t.c2 < 5).select(t.c3, t.c10, t.d1, t.d2).order_by(t.c2).show(0)
677
+ assert_resultset_eq(r1, r2)
678
+
679
+ def test_delete(self, test_tbl: pxt.Table, indexed_img_tbl: pxt.Table) -> None:
680
+ t = test_tbl
681
+
682
+ cnt = t.where(t.c3 < 10.0).count()
683
+ assert cnt == 10
684
+ cnt = t.where(t.c3 == 10.0).count()
685
+ assert cnt == 1
686
+ status = t.delete(where=t.c3 < 10.0)
687
+ assert status.num_rows == 10
688
+ cnt = t.where(t.c3 < 10.0).count()
689
+ assert cnt == 0
690
+ cnt = t.where(t.c3 == 10.0).count()
691
+ assert cnt == 1
692
+
693
+ # revert, then verify that we're back where we started
694
+ cl = pxt.Client(reload=True)
695
+ t = cl.get_table(t.get_name())
696
+ t.revert()
697
+ cnt = t.where(t.c3 < 10.0).count()
698
+ assert cnt == 10
699
+ cnt = t.where(t.c3 == 10.0).count()
700
+ assert cnt == 1
701
+
702
+ # non-Predicate filter
703
+ with pytest.raises(excs.Error) as excinfo:
704
+ t.delete(where=lambda c2: c2 == 10)
705
+ assert 'Predicate' in str(excinfo.value)
706
+
707
+ img_t = indexed_img_tbl
708
+ # similarity search is not supported
709
+ with pytest.raises(excs.Error) as excinfo:
710
+ img_t.delete(where=img_t.img.nearest('car'))
711
+ assert 'nearest()' in str(excinfo.value)
712
+
713
+ # filter not expressible in SQL
714
+ with pytest.raises(excs.Error) as excinfo:
715
+ img_t.delete(where=img_t.img.width > 100)
716
+ assert 'not expressible' in str(excinfo.value)
717
+
718
+ def test_computed_cols(self, test_client: pxt.client) -> None:
719
+ cl = test_client
720
+ schema = {
721
+ 'c1': IntType(nullable=False),
722
+ 'c2': FloatType(nullable=False),
723
+ 'c3': JsonType(nullable=False),
724
+ }
725
+ t : pxt.InsertableTable = cl.create_table('test', schema)
726
+ status = t.add_column(c4=t.c1 + 1)
727
+ assert status.num_excs == 0
728
+ status = t.add_column(c5=t.c4 + 1)
729
+ assert status.num_excs == 0
730
+ status = t.add_column(c6=t.c1 / t.c2)
731
+ assert status.num_excs == 0
732
+ status = t.add_column(c7=t.c6 * t.c2)
733
+ assert status.num_excs == 0
734
+ status = t.add_column(c8=t.c3.detections['*'].bounding_box)
735
+ assert status.num_excs == 0
736
+ status = t.add_column(c9=lambda c2: math.sqrt(c2), type=FloatType())
737
+ assert status.num_excs == 0
738
+
739
+ # unstored cols that compute window functions aren't currently supported
740
+ with pytest.raises((excs.Error)):
741
+ t.add_column(c10=ptf.sum(t.c1, group_by=t.c1), stored=False)
186
742
 
187
743
  # Column.dependent_cols are computed correctly
188
744
  assert len(t.c1.col.dependent_cols) == 2
@@ -194,137 +750,409 @@ class TestTable:
194
750
  assert len(t.c7.col.dependent_cols) == 0
195
751
  assert len(t.c8.col.dependent_cols) == 0
196
752
 
197
- data_df = create_table_data(t, ['c1', 'c2', 'c3'], num_rows=10)
198
- t.insert_pandas(data_df)
753
+ rows = create_table_data(t, ['c1', 'c2', 'c3'], num_rows=10)
754
+ t.insert(rows)
199
755
  _ = t.show()
200
756
 
201
757
  # not allowed to pass values for computed cols
202
- with pytest.raises(exc.InsertError):
203
- data_df2 = create_table_data(t, num_rows=10)
204
- t.insert_pandas(data_df2)
205
-
206
- # computed col references non-existent col
207
- with pytest.raises(exc.Error):
208
- c1 = catalog.Column('c1', IntType(), nullable=False)
209
- c2 = catalog.Column('c2', FloatType(), nullable=False)
210
- c3 = catalog.Column('c3', FloatType(), nullable=False, computed_with=lambda c2: math.sqrt(c2))
211
- _ = db.create_table('test2', [c1, c3, c2])
758
+ with pytest.raises(excs.Error):
759
+ rows2 = create_table_data(t, ['c1', 'c2', 'c3', 'c4'], num_rows=10)
760
+ t.insert(rows2)
212
761
 
213
762
  # test loading from store
214
- cl2 = pt.Client()
215
- db2 = cl2.get_db('test')
216
- t2 = db2.get_table('test')
217
- assert len(t.columns) == len(t2.columns)
218
- for i in range(len(t.columns)):
219
- if t.columns[i].value_expr is not None:
220
- assert t.columns[i].value_expr.equals(t2.columns[i].value_expr)
763
+ cl = pxt.Client(reload=True)
764
+ t = cl.get_table('test')
765
+ assert len(t.columns()) == len(t.columns())
766
+ for i in range(len(t.columns())):
767
+ if t.columns()[i].value_expr is not None:
768
+ assert t.columns()[i].value_expr.equals(t.columns()[i].value_expr)
221
769
 
222
770
  # make sure we can still insert data and that computed cols are still set correctly
223
- t2.insert_pandas(data_df)
224
- res = t2.show(0)
225
- tbl_df = t2.show(0).to_pandas()
771
+ status = t.insert(rows)
772
+ assert status.num_excs == 0
773
+ res = t.show(0)
774
+ tbl_df = t.show(0).to_pandas()
226
775
 
227
776
  # can't drop c4: c5 depends on it
228
- with pytest.raises(exc.Error):
777
+ with pytest.raises(excs.Error):
229
778
  t.drop_column('c4')
230
779
  t.drop_column('c5')
231
780
  # now it works
232
781
  t.drop_column('c4')
233
782
 
234
- def test_computed_img_cols(self, test_db: catalog.Db) -> None:
235
- db = test_db
236
- c1 = catalog.Column('img', ImageType(), nullable=False, indexed=True)
237
- schema = [c1]
238
- t = db.create_table('test', schema)
239
- t.add_column(catalog.Column('c2', computed_with=t.img.width))
240
- t.add_column(catalog.Column('c3', computed_with=t.img.rotate(90)))
783
+ def test_expr_udf_computed_cols(self, test_client: pxt.Client) -> None:
784
+ cl = test_client
785
+ t = cl.create_table('test', {'c1': IntType(nullable=False)})
786
+ rows = [{'c1': i} for i in range(100)]
787
+ status = t.insert(rows)
788
+ assert status.num_rows == len(rows)
789
+ status = t.add_column(c2=t.c1 + 1)
790
+ assert status.num_excs == 0
791
+ # call with positional arg
792
+ status = t.add_column(c3=self.add1(t.c1))
793
+ assert status.num_excs == 0
794
+ # call with keyword arg
795
+ status = t.add_column(c4=self.add1(a=t.c1))
796
+ assert status.num_excs == 0
797
+
798
+ # TODO: how to verify the output?
799
+ describe_output = t.__repr__()
800
+ # 'add1' didn't get swallowed/the expr udf is still visible in the column definition
801
+ assert 'add1' in describe_output
241
802
 
242
- data_df = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
243
- t.insert_pandas(data_df.loc[0:20, ['img']])
803
+ def check(t: pxt.Table) -> None:
804
+ assert_resultset_eq(
805
+ t.select(t.c1 + 1).order_by(t.c1).collect(),
806
+ t.select(t.c2).order_by(t.c1).collect())
807
+ assert_resultset_eq(
808
+ t.select(t.c1 + 1).order_by(t.c1).collect(),
809
+ t.select(t.c3).order_by(t.c1).collect())
810
+
811
+ check(t)
812
+ # test loading from store
813
+ cl = pxt.Client(reload=True)
814
+ t = cl.get_table('test')
815
+ check(t)
816
+
817
+ # make sure we can still insert data and that computed cols are still set correctly
818
+ status = t.insert(rows)
819
+ assert status.num_excs == 0
820
+ check(t)
821
+
822
+ def test_computed_col_exceptions(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
823
+ cl = test_client
824
+
825
+ # exception during insert()
826
+ schema = {'c2': IntType(nullable=False)}
827
+ rows = list(test_tbl.select(test_tbl.c2).collect())
828
+ t = cl.create_table('test_insert', schema)
829
+ status = t.add_column(add1=self.f2(self.f1(t.c2)))
830
+ assert status.num_excs == 0
831
+ status = t.insert(rows, fail_on_exception=False)
832
+ assert status.num_excs == 10
833
+ assert 'test_insert.add1' in status.cols_with_excs
834
+ assert t.where(t.add1.errortype != None).count() == 10
835
+
836
+ # exception during add_column()
837
+ t = cl.create_table('test_add_column', schema)
838
+ status = t.insert(rows)
839
+ assert status.num_rows == 100
840
+ assert status.num_excs == 0
841
+ status = t.add_column(add1=self.f2(self.f1(t.c2)))
842
+ assert status.num_excs == 10
843
+ assert 'test_add_column.add1' in status.cols_with_excs
844
+ assert t.where(t.add1.errortype != None).count() == 10
845
+
846
+ def _test_computed_img_cols(self, t: catalog.Table, stores_img_col: bool) -> None:
847
+ rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
848
+ rows = [{'img': r['img']} for r in rows[:20]]
849
+ status = t.insert(rows)
850
+ assert status.num_rows == 20
851
+ _ = t.count()
244
852
  _ = t.show()
245
- assert utils.computed_img_count(tbl_id=t.id) == t.count()
853
+ assert MediaStore.count(t.get_id()) == t.count() * stores_img_col
246
854
 
247
855
  # test loading from store
248
- cl2 = pt.Client()
249
- db2 = cl2.get_db('test')
250
- t2 = db2.get_table('test')
251
- assert len(t.columns) == len(t2.columns)
252
- for i in range(len(t.columns)):
253
- if t.columns[i].value_expr is not None:
254
- assert t.columns[i].value_expr.equals(t2.columns[i].value_expr)
856
+ cl = pxt.Client(reload=True)
857
+ t2 = cl.get_table(t.get_name())
858
+ assert len(t.columns()) == len(t2.columns())
859
+ for i in range(len(t.columns())):
860
+ if t.columns()[i].value_expr is not None:
861
+ assert t.columns()[i].value_expr.equals(t2.columns()[i].value_expr)
255
862
 
256
863
  # make sure we can still insert data and that computed cols are still set correctly
257
- t2.insert_pandas(data_df.loc[0:20, ['img']])
258
- assert utils.computed_img_count(tbl_id=t.id) == t2.count()
864
+ t2.insert(rows)
865
+ assert MediaStore.count(t2.get_id()) == t2.count() * stores_img_col
259
866
  res = t2.show(0)
260
867
  tbl_df = t2.show(0).to_pandas()
261
- print(tbl_df)
262
868
 
263
869
  # revert also removes computed images
264
870
  t2.revert()
265
- assert utils.computed_img_count() == t2.count()
871
+ assert MediaStore.count(t2.get_id()) == t2.count() * stores_img_col
872
+
873
+ def test_computed_img_cols(self, test_client: pxt.Client) -> None:
874
+ cl = test_client
875
+ schema = {'img': ImageType(nullable=False)}
876
+ t = cl.create_table('test', schema)
877
+ t.add_column(c2=t.img.width)
878
+ # c3 is not stored by default
879
+ t.add_column(c3=t.img.rotate(90))
880
+ self._test_computed_img_cols(t, stores_img_col=False)
881
+
882
+ t = cl.create_table('test2', schema)
883
+ # c3 is now stored
884
+ t.add_column(c3=t.img.rotate(90), stored=True)
885
+ self._test_computed_img_cols(t, stores_img_col=True)
886
+ _ = t[t.c3.errortype].show(0)
887
+
888
+ # computed img col with exceptions
889
+ t = cl.create_table('test3', schema)
890
+ @pxt.udf(return_type=ImageType(), param_types=[ImageType()])
891
+ def f(img: PIL.Image.Image) -> PIL.Image.Image:
892
+ raise RuntimeError
893
+ t.add_column(c3=f(t.img), stored=True)
894
+ rows = read_data_file('imagenette2-160', 'manifest.csv', ['img'])
895
+ rows = [{'img': r['img']} for r in rows[:20]]
896
+ t.insert(rows, fail_on_exception=False)
897
+ _ = t[t.c3.errortype].show(0)
266
898
 
267
- def test_computed_window_fn(self, test_db: catalog.Db, test_tbl: catalog.Table) -> None:
268
- db = test_db
899
+ def test_computed_window_fn(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
900
+ cl = test_client
269
901
  t = test_tbl
270
902
  # backfill
271
- t.add_column(catalog.Column('c9', computed_with=sum_uda(t.c2).window(partition_by=t.c4, order_by=t.c3)))
272
-
273
- c2 = catalog.Column('c2', IntType(), nullable=False)
274
- c3 = catalog.Column('c3', FloatType(), nullable=False)
275
- c4 = catalog.Column('c4', BoolType(), nullable=False)
276
- new_t = db.create_table('insert_test', [c2, c3, c4])
277
- new_t.add_column(catalog.Column('c5', IntType(), computed_with=lambda c2: c2 * c2))
278
- new_t.add_column(catalog.Column(
279
- 'c6', computed_with=sum_uda(new_t.c5).window(partition_by=new_t.c4, order_by=new_t.c3)))
280
- data_df = t[t.c2, t.c4, t.c3].show(0).to_pandas()
281
- new_t.insert_pandas(data_df)
903
+ t.add_column(c9=ptf.sum(t.c2, group_by=t.c4, order_by=t.c3))
904
+
905
+ schema = {
906
+ 'c2': IntType(nullable=False),
907
+ 'c3': FloatType(nullable=False),
908
+ 'c4': BoolType(nullable=False),
909
+ }
910
+ new_t = cl.create_table('insert_test', schema)
911
+ new_t.add_column(c5=lambda c2: c2 * c2, type=IntType())
912
+ new_t.add_column(c6=ptf.sum(new_t.c5, group_by=new_t.c4, order_by=new_t.c3))
913
+ rows = list(t.select(t.c2, t.c4, t.c3).collect())
914
+ new_t.insert(rows)
282
915
  _ = new_t.show(0)
283
- print(_)
284
-
285
- def test_revert(self, test_db: catalog.Db) -> None:
286
- db = test_db
287
- t1 = make_tbl(db, 'test1', ['c1', 'c2'])
288
- data1 = create_table_data(t1)
289
- t1.insert_pandas(data1)
290
- assert t1.count() == len(data1)
291
- data2 = create_table_data(t1)
292
- t1.insert_pandas(data2)
293
- assert t1.count() == len(data1) + len(data2)
916
+
917
+ def test_revert(self, test_client: pxt.Client) -> None:
918
+ cl = test_client
919
+ t1 = make_tbl(cl, 'test1', ['c1', 'c2'])
920
+ assert t1.version() == 0
921
+ rows1 = create_table_data(t1)
922
+ t1.insert(rows1)
923
+ assert t1.count() == len(rows1)
924
+ assert t1.version() == 1
925
+ rows2 = create_table_data(t1)
926
+ t1.insert(rows2)
927
+ assert t1.count() == len(rows1) + len(rows2)
928
+ assert t1.version() == 2
294
929
  t1.revert()
295
- assert t1.count() == len(data1)
296
- t1.insert_pandas(data2)
297
- assert t1.count() == len(data1) + len(data2)
298
-
299
- def test_snapshot(self, test_db: catalog.Db) -> None:
300
- db = test_db
301
- db.create_dir('main')
302
- tbl = make_tbl(db, 'main.test1', ['c1', 'c2'])
303
- data1 = create_table_data(tbl)
304
- tbl.insert_pandas(data1)
305
- assert tbl.count() == len(data1)
306
-
307
- db.create_snapshot('snap', ['main.test1'])
308
- snap = db.get_table('snap.test1')
309
- assert snap.count() == len(data1)
310
-
311
- # adding data to a base table doesn't change the snapshot
312
- data2 = create_table_data(tbl)
313
- tbl.insert_pandas(data2)
314
- assert tbl.count() == len(data1) + len(data2)
315
- assert snap.count() == len(data1)
930
+ assert t1.count() == len(rows1)
931
+ assert t1.version() == 1
932
+ t1.insert(rows2)
933
+ assert t1.count() == len(rows1) + len(rows2)
934
+ assert t1.version() == 2
316
935
 
317
- tbl.revert()
318
- # can't revert a version referenced by a snapshot
319
- with pytest.raises(exc.OperationalError):
320
- tbl.revert()
321
-
322
- def test_add_column(self, test_db: catalog.Db) -> None:
323
- db = test_db
324
- t = make_tbl(db, 'test', ['c1', 'c2'])
325
- data1 = create_table_data(t)
326
- t.insert_pandas(data1)
327
- assert t.count() == len(data1)
328
- t.add_column(catalog.Column('c3', computed_with=t.c2 + 10, nullable=False))
936
+ # can't revert past version 0
937
+ t1.revert()
938
+ t1.revert()
939
+ with pytest.raises(excs.Error) as excinfo:
940
+ t1.revert()
941
+ assert 'version 0' in str(excinfo.value)
942
+
943
+ def test_add_column(self, test_tbl: catalog.Table) -> None:
944
+ t = test_tbl
945
+ num_orig_cols = len(t.columns())
946
+ t.add_column(add1=pxt.IntType(nullable=True))
947
+ assert len(t.columns()) == num_orig_cols + 1
948
+
949
+ with pytest.raises(excs.Error) as exc_info:
950
+ _ = t.add_column(add2=pxt.IntType(nullable=False))
951
+ assert 'cannot add non-nullable' in str(exc_info.value).lower()
952
+
953
+ with pytest.raises(excs.Error) as exc_info:
954
+ _ = t.add_column(add2=pxt.IntType(nullable=False), add3=pxt.StringType())
955
+ assert 'requires exactly one keyword argument' in str(exc_info.value).lower()
956
+
957
+ with pytest.raises(excs.Error) as exc_info:
958
+ _ = t.add_column(pos=pxt.StringType(nullable=True))
959
+ assert 'is reserved' in str(exc_info.value).lower()
960
+
961
+ with pytest.raises(excs.Error) as exc_info:
962
+ _ = t.add_column(add2=pxt.IntType(nullable=False), type=pxt.StringType())
963
+ assert '"type" is redundant' in str(exc_info.value).lower()
964
+
965
+ with pytest.raises(excs.Error) as exc_info:
966
+ _ = t.add_column(add2=[[1.0, 2.0], [3.0, 4.0]], type=pxt.StringType())
967
+ assert '"type" is redundant' in str(exc_info.value).lower()
968
+
969
+ with pytest.raises(excs.Error) as exc_info:
970
+ _ = t.add_column(add2=pxt.IntType(nullable=False), stored=False)
971
+ assert 'stored=false only applies' in str(exc_info.value).lower()
972
+
973
+ # duplicate name
974
+ with pytest.raises(excs.Error) as exc_info:
975
+ _ = t.add_column(c1=pxt.IntType())
976
+ assert 'duplicate column name' in str(exc_info.value).lower()
977
+
978
+ # 'stored' kwarg only applies to computed image columns
979
+ with pytest.raises(excs.Error):
980
+ _ = t.add_column(c5=IntType(), stored=False)
981
+ with pytest.raises(excs.Error):
982
+ _ = t.add_column(c5=ImageType(), stored=False)
983
+ with pytest.raises(excs.Error):
984
+ _ = t.add_column(c5=(t.c2 + t.c3), stored=False)
985
+
986
+ # make sure this is still true after reloading the metadata
987
+ cl = pxt.Client(reload=True)
988
+ t = cl.get_table(t.get_name())
989
+ assert len(t.columns()) == num_orig_cols + 1
990
+
991
+ # revert() works
992
+ t.revert()
993
+ assert len(t.columns()) == num_orig_cols
994
+
995
+ # make sure this is still true after reloading the metadata once more
996
+ cl = pxt.Client(reload=True)
997
+ t = cl.get_table(t.get_name())
998
+ assert len(t.columns()) == num_orig_cols
999
+
1000
+ def test_add_column_setitem(self, test_tbl: catalog.Table) -> None:
1001
+ t = test_tbl
1002
+ num_orig_cols = len(t.columns())
1003
+ t['add1'] = pxt.IntType(nullable=True)
1004
+ assert len(t.columns()) == num_orig_cols + 1
1005
+ t['computed1'] = t.c2 + 1
1006
+ assert len(t.columns()) == num_orig_cols + 2
1007
+
1008
+ with pytest.raises(excs.Error) as exc_info:
1009
+ _ = t['pos'] = pxt.StringType()
1010
+ assert 'is reserved' in str(exc_info.value).lower()
1011
+
1012
+ with pytest.raises(excs.Error) as exc_info:
1013
+ _ = t[2] = pxt.StringType()
1014
+ assert 'must be a string' in str(exc_info.value).lower()
1015
+
1016
+ with pytest.raises(excs.Error) as exc_info:
1017
+ _ = t['add 2'] = pxt.StringType()
1018
+ assert 'invalid column name' in str(exc_info.value).lower()
1019
+
1020
+ with pytest.raises(excs.Error) as exc_info:
1021
+ _ = t['add2'] = {'value': t.c2 + 1, 'type': pxt.StringType()}
1022
+ assert '"type" is redundant' in str(exc_info.value).lower()
1023
+
1024
+ with pytest.raises(excs.Error) as exc_info:
1025
+ _ = t['add2'] = {'value': pxt.IntType()}
1026
+ assert 'value needs to be either' in str(exc_info.value).lower()
1027
+
1028
+ with pytest.raises(excs.Error) as exc_info:
1029
+ _ = t['add2'] = {'value': t.c2 + 1, 'stored': False}
1030
+ assert 'stored=false only applies' in str(exc_info.value).lower()
1031
+
1032
+ # duplicate name
1033
+ with pytest.raises(excs.Error) as exc_info:
1034
+ _ = t['c1'] = pxt.IntType()
1035
+ assert 'duplicate column name' in str(exc_info.value).lower()
1036
+
1037
+ # make sure this is still true after reloading the metadata
1038
+ cl = pxt.Client(reload=True)
1039
+ t = cl.get_table(t.get_name())
1040
+ assert len(t.columns()) == num_orig_cols + 2
1041
+
1042
+ # revert() works
1043
+ t.revert()
1044
+ t.revert()
1045
+ assert len(t.columns()) == num_orig_cols
1046
+
1047
+ # make sure this is still true after reloading the metadata once more
1048
+ cl = pxt.Client(reload=True)
1049
+ t = cl.get_table(t.get_name())
1050
+ assert len(t.columns()) == num_orig_cols
1051
+
1052
+ def test_drop_column(self, test_tbl: catalog.Table) -> None:
1053
+ t = test_tbl
1054
+ num_orig_cols = len(t.columns())
1055
+ t.drop_column('c1')
1056
+ assert len(t.columns()) == num_orig_cols - 1
1057
+
1058
+ with pytest.raises(excs.Error):
1059
+ t.drop_column('unknown')
1060
+
1061
+ # make sure this is still true after reloading the metadata
1062
+ cl = pxt.Client(reload=True)
1063
+ t = cl.get_table(t.get_name())
1064
+ assert len(t.columns()) == num_orig_cols - 1
1065
+
1066
+ # revert() works
1067
+ t.revert()
1068
+ assert len(t.columns()) == num_orig_cols
1069
+
1070
+ # make sure this is still true after reloading the metadata once more
1071
+ cl = pxt.Client(reload=True)
1072
+ t = cl.get_table(t.get_name())
1073
+ assert len(t.columns()) == num_orig_cols
1074
+
1075
+ def test_rename_column(self, test_tbl: catalog.Table) -> None:
1076
+ t = test_tbl
1077
+ num_orig_cols = len(t.columns())
1078
+ t.rename_column('c1', 'c1_renamed')
1079
+ assert len(t.columns()) == num_orig_cols
1080
+
1081
+ def check_rename(t: pxt.Table, known: str, unknown: str) -> None:
1082
+ with pytest.raises(AttributeError) as exc_info:
1083
+ _ = t.select(t[unknown]).collect()
1084
+ assert 'unknown' in str(exc_info.value).lower()
1085
+ _ = t.select(t[known]).collect()
1086
+
1087
+ check_rename(t, 'c1_renamed', 'c1')
1088
+
1089
+ # unknown column
1090
+ with pytest.raises(excs.Error):
1091
+ t.rename_column('unknown', 'unknown_renamed')
1092
+ # bad name
1093
+ with pytest.raises(excs.Error):
1094
+ t.rename_column('c2', 'bad name')
1095
+ # existing name
1096
+ with pytest.raises(excs.Error):
1097
+ t.rename_column('c2', 'c3')
1098
+
1099
+ # make sure this is still true after reloading the metadata
1100
+ cl = pxt.Client(reload=True)
1101
+ t = cl.get_table(t.get_name())
1102
+ check_rename(t, 'c1_renamed', 'c1')
1103
+
1104
+ # revert() works
1105
+ t.revert()
1106
+ _ = t.select(t.c1).collect()
1107
+ #check_rename(t, 'c1', 'c1_renamed')
1108
+
1109
+ # make sure this is still true after reloading the metadata once more
1110
+ cl = pxt.Client(reload=True)
1111
+ t = cl.get_table(t.get_name())
1112
+ check_rename(t, 'c1', 'c1_renamed')
1113
+
1114
+ def test_add_computed_column(self, test_tbl: catalog.Table) -> None:
1115
+ t = test_tbl
1116
+ status = t.add_column(add1=t.c2 + 10)
1117
+ assert status.num_excs == 0
329
1118
  _ = t.show()
330
- print(_)
1119
+
1120
+ # with exception in SQL
1121
+ with pytest.raises(excs.Error):
1122
+ t.add_column(add2=(t.c2 - 10) / (t.c3 - 10))
1123
+
1124
+ # with exception in Python for c6.f2 == 10
1125
+ status = t.add_column(add2=(t.c6.f2 - 10) / (t.c6.f2 - 10))
1126
+ assert status.num_excs == 1
1127
+ result = t[t.add2.errortype != None][t.c6.f2, t.add2, t.add2.errortype, t.add2.errormsg].show()
1128
+ assert len(result) == 1
1129
+
1130
+ # test case: exceptions in dependencies prevent execution of dependent exprs
1131
+ status = t.add_column(add3=self.f2(self.f1(t.c2)))
1132
+ assert status.num_excs == 10
1133
+ result = t[t.add3.errortype != None][t.c2, t.add3, t.add3.errortype, t.add3.errormsg].show()
1134
+ assert len(result) == 10
1135
+
1136
+ def test_describe(self, test_tbl: catalog.Table) -> None:
1137
+ t = test_tbl
1138
+ fn = lambda c2: np.full((3, 4), c2)
1139
+ t.add_column(computed1=fn, type=ArrayType((3, 4), dtype=IntType()))
1140
+ t.describe()
1141
+ t.comment = 'This is a comment.'
1142
+ t.describe()
1143
+
1144
+ # TODO: how to you check the output of these?
1145
+ _ = repr(t)
1146
+ _ = t._repr_html_()
1147
+
1148
+ def test_common_col_names(self, test_client: pxt.Client) -> None:
1149
+ """Make sure that commonly used column names don't collide with Table member vars"""
1150
+ cl = test_client
1151
+ schema = {'id': IntType(nullable=False), 'name': StringType(nullable=False)}
1152
+ tbl = cl.create_table('test', schema)
1153
+ status = tbl.insert({'id': id, 'name': str(id)} for id in range(10))
1154
+ assert status.num_rows == 10
1155
+ assert status.num_excs == 0
1156
+ assert tbl.count() == 10
1157
+ # we can create references to those column via __getattr__
1158
+ _ = tbl.select(tbl.id, tbl.name).collect()