pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,433 @@
1
+ import datetime
2
+ import pickle
3
+ from pathlib import Path
4
+ from typing import Any, Dict
5
+
6
+ import bs4
7
+ import numpy as np
8
+ import pytest
9
+ import requests
10
+ from pycocotools.coco import COCO
11
+
12
+ import pixeltable as pxt
13
+ from pixeltable import catalog
14
+ from pixeltable import exceptions as excs
15
+ from pixeltable.iterators import FrameIterator
16
+ from pixeltable.tests.utils import get_video_files, get_audio_files, skip_test_if_not_installed
17
+
18
+
19
+ class TestDataFrame:
20
+ def test_select_where(self, test_tbl: catalog.Table) -> None:
21
+ t = test_tbl
22
+ res1 = t[t.c1, t.c2, t.c3].show(0)
23
+ res2 = t.select(t.c1, t.c2, t.c3).show(0)
24
+ assert res1 == res2
25
+
26
+ res1 = t[t.c2 < 10][t.c1, t.c2, t.c3].show(0)
27
+ res2 = t.where(t.c2 < 10).select(t.c1, t.c2, t.c3).show(0)
28
+ assert res1 == res2
29
+
30
+ res3 = t.where(t.c2 < 10).select(c1=t.c1, c2=t.c2, c3=t.c3).show(0)
31
+ assert res1 == res3
32
+
33
+ res4 = t.where(t.c2 < 10).select(t.c1, c2=t.c2, c3=t.c3).show(0)
34
+ assert res1 == res4
35
+
36
+ _ = t.where(t.c2 < 10).select(t.c2, t.c2).show(0) # repeated name no error
37
+
38
+ # duplicate select list
39
+ with pytest.raises(excs.Error) as exc_info:
40
+ _ = t.select(t.c1).select(t.c2).show(0)
41
+ assert 'already specified' in str(exc_info.value)
42
+
43
+ # invalid expr in select list: Callable is not a valid literal
44
+ with pytest.raises(TypeError) as exc_info:
45
+ _ = t.select(datetime.datetime.now).show(0)
46
+ assert 'Not a valid literal' in str(exc_info.value)
47
+
48
+ # catch invalid name in select list from user input
49
+ # only check stuff that's not caught by python kwargs checker
50
+ with pytest.raises(excs.Error) as exc_info:
51
+ _ = t.select(t.c1, **{'c2-1': t.c2}).show(0)
52
+ assert 'Invalid name' in str(exc_info.value)
53
+
54
+ with pytest.raises(excs.Error) as exc_info:
55
+ _ = t.select(t.c1, **{'': t.c2}).show(0)
56
+ assert 'Invalid name' in str(exc_info.value)
57
+
58
+ with pytest.raises(excs.Error) as exc_info:
59
+ _ = t.select(t.c1, **{'foo.bar': t.c2}).show(0)
60
+ assert 'Invalid name' in str(exc_info.value)
61
+
62
+ with pytest.raises(excs.Error) as exc_info:
63
+ _ = t.select(t.c1, _c3=t.c2).show(0)
64
+ assert 'Invalid name' in str(exc_info.value)
65
+
66
+ # catch repeated name from user input
67
+ with pytest.raises(excs.Error) as exc_info:
68
+ _ = t.select(t.c2, c2=t.c1).show(0)
69
+ assert 'Repeated column name' in str(exc_info.value)
70
+
71
+ with pytest.raises(excs.Error) as exc_info:
72
+ _ = t.select(t.c2+1, col_0=t.c2).show(0)
73
+ assert 'Repeated column name' in str(exc_info.value)
74
+
75
+ def test_result_set_iterator(self, test_tbl: catalog.Table) -> None:
76
+ t = test_tbl
77
+ res = t.select(t.c1, t.c2, t.c3).collect()
78
+ pd_df = res.to_pandas()
79
+
80
+ def check_row(row: Dict[str, Any], idx: int) -> None:
81
+ assert len(row) == 3
82
+ assert 'c1' in row
83
+ assert row['c1'] == pd_df['c1'][idx]
84
+ assert 'c2' in row
85
+ assert row['c2'] == pd_df['c2'][idx]
86
+ assert 'c3' in row
87
+ assert row['c3'] == pd_df['c3'][idx]
88
+
89
+ # row iteration
90
+ for idx, row in enumerate(res):
91
+ check_row(row, idx)
92
+
93
+ # row access
94
+ row = res[0]
95
+ check_row(row, 0)
96
+
97
+ # column access
98
+ col_values = res['c2']
99
+ assert col_values == pd_df['c2'].values.tolist()
100
+
101
+ # cell access
102
+ assert res[0, 'c2'] == pd_df['c2'][0]
103
+ assert res[0, 'c2'] == res[0, 1]
104
+
105
+ with pytest.raises(excs.Error) as exc_info:
106
+ _ = res['does_not_exist']
107
+ assert 'Invalid column name' in str(exc_info.value)
108
+
109
+ with pytest.raises(excs.Error) as exc_info:
110
+ _ = res[0, 'does_not_exist']
111
+ assert 'Invalid column name' in str(exc_info.value)
112
+
113
+ with pytest.raises(excs.Error) as exc_info:
114
+ _ = res[0, 0, 0]
115
+ assert 'Bad index' in str(exc_info.value)
116
+
117
+ with pytest.raises(excs.Error) as exc_info:
118
+ _ = res['c2', 0]
119
+ assert 'Bad index' in str(exc_info.value)
120
+
121
+ def test_order_by(self, test_tbl: catalog.Table) -> None:
122
+ t = test_tbl
123
+ res = t.select(t.c4, t.c2).order_by(t.c4).order_by(t.c2, asc=False).show(0)
124
+
125
+ # invalid expr in order_by()
126
+ with pytest.raises(excs.Error) as exc_info:
127
+ _ = t.order_by(datetime.datetime.now()).show(0)
128
+ assert 'Invalid expression' in str(exc_info.value)
129
+
130
+ def test_head_tail(self, test_tbl: catalog.Table) -> None:
131
+ t = test_tbl
132
+ res = t.head(10).to_pandas()
133
+ assert np.all(res.c2 == list(range(10)))
134
+ # Where is applied
135
+ res = t.where(t.c2 > 9).head(10).to_pandas()
136
+ assert np.all(res.c2 == list(range(10, 20)))
137
+ # order_by() is an error
138
+ with pytest.raises(excs.Error) as exc_info:
139
+ _ = t.order_by(t.c2).head(10)
140
+ assert 'cannot be used with order_by' in str(exc_info.value)
141
+
142
+ res = t.tail().to_pandas()
143
+ assert np.all(res.c2 == list(range(90, 100)))
144
+ res = t.where(t.c2 < 90).tail().to_pandas()
145
+ assert np.all(res.c2 == list(range(80, 90)))
146
+ # order_by() is an error
147
+ with pytest.raises(excs.Error) as exc_info:
148
+ _ = t.order_by(t.c2).tail(10)
149
+ assert 'cannot be used with order_by' in str(exc_info.value)
150
+
151
+ def test_describe(self, test_tbl: catalog.Table) -> None:
152
+ t = test_tbl
153
+ df = t.select(t.c1).where(t.c2 < 10).limit(10)
154
+ df.describe()
155
+
156
+ # TODO: how to you check the output of these?
157
+ _ = df.__repr__()
158
+ _ = df._repr_html_()
159
+
160
+ def test_count(self, test_tbl: catalog.Table, indexed_img_tbl: catalog.Table) -> None:
161
+ skip_test_if_not_installed('nos')
162
+ t = test_tbl
163
+ cnt = t.count()
164
+ assert cnt == 100
165
+
166
+ cnt = t.where(t.c2 < 10).count()
167
+ assert cnt == 10
168
+
169
+ # count() doesn't work with similarity search
170
+ t = indexed_img_tbl
171
+ probe = t.select(t.img).show(1)
172
+ img = probe[0, 0]
173
+ with pytest.raises(excs.Error):
174
+ _ = t.where(t.img.nearest(img)).count()
175
+ with pytest.raises(excs.Error):
176
+ _ = t.where(t.img.nearest('car')).count()
177
+
178
+ # for now, count() doesn't work with non-SQL Where clauses
179
+ with pytest.raises(excs.Error):
180
+ _ = t.where(t.img.width > 100).count()
181
+
182
+ def test_select_literal(self, test_tbl: catalog.Table) -> None:
183
+ t = test_tbl
184
+ res = t.select(1.0).where(t.c2 < 10).collect()
185
+ assert res[res.column_names()[0]] == [1.0] * 10
186
+
187
+ def test_html_media_url(self, test_client: pxt.Client) -> None:
188
+ tab = test_client.create_table('test_html_repr', {'video': pxt.VideoType(), 'audio': pxt.AudioType()})
189
+ status = tab.insert(video=get_video_files()[0], audio=get_audio_files()[0])
190
+ assert status.num_rows == 1
191
+ assert status.num_excs == 0
192
+
193
+ res = tab.select(tab.video, tab.audio).collect()
194
+ doc = bs4.BeautifulSoup(res._repr_html_(), features='html.parser')
195
+ video_tags = doc.find_all('video')
196
+ assert len(video_tags) == 1
197
+ audio_tags = doc.find_all('audio')
198
+ assert len(audio_tags) == 1
199
+
200
+ # get the source elements and test their src attributes
201
+ for tag in video_tags + audio_tags:
202
+ sources = tag.find_all('source')
203
+ assert len(sources) == 1
204
+ for src in sources:
205
+ response = requests.get(src['src'])
206
+ assert response.status_code == 200
207
+
208
+ def test_to_pytorch_dataset(self, all_datatypes_tbl: catalog.Table):
209
+ """ tests all types are handled correctly in this conversion
210
+ """
211
+ import torch
212
+
213
+ t = all_datatypes_tbl
214
+ df = t.where(t.row_id < 1)
215
+ assert df.count() > 0
216
+ ds = df.to_pytorch_dataset()
217
+ type_dict = dict(zip(df.get_column_names(),df.get_column_types()))
218
+ for tup in ds:
219
+ for col in df.get_column_names():
220
+ assert col in tup
221
+
222
+ arrval = tup['c_array']
223
+ assert isinstance(arrval, np.ndarray)
224
+ col_type = type_dict['c_array']
225
+ assert arrval.dtype == col_type.numpy_dtype()
226
+ assert arrval.shape == col_type.shape
227
+ assert arrval.dtype == np.float32
228
+ assert arrval.flags["WRITEABLE"], 'required by pytorch collate function'
229
+
230
+ assert isinstance(tup['c_bool'], bool)
231
+ assert isinstance(tup['c_int'], int)
232
+ assert isinstance(tup['c_float'], float)
233
+ assert isinstance(tup['c_timestamp'], float)
234
+ assert torch.is_tensor(tup['c_image'])
235
+ assert isinstance(tup['c_video'], str)
236
+ assert isinstance(tup['c_json'], dict)
237
+
238
+ def test_to_pytorch_image_format(self, all_datatypes_tbl: catalog.Table) -> None:
239
+ """ tests the image_format parameter is honored
240
+ """
241
+ import torch
242
+ import torchvision.transforms as T
243
+
244
+ W, H = 220, 224 # make different from each other
245
+ t = all_datatypes_tbl
246
+ df = t.select(
247
+ t.row_id,
248
+ t.c_image,
249
+ c_image_xformed=t.c_image.resize([W, H]).convert('RGB')
250
+ ).where(t.row_id < 1)
251
+
252
+ pandas_df = df.show().to_pandas()
253
+ im_plain = pandas_df['c_image'].values[0]
254
+ im_xformed = pandas_df['c_image_xformed'].values[0]
255
+ assert pandas_df.shape[0] == 1
256
+
257
+ ds = df.to_pytorch_dataset(image_format='np')
258
+ ds_ptformat = df.to_pytorch_dataset(image_format='pt')
259
+
260
+ elt_count = 0
261
+ for elt, elt_pt in zip(ds, ds_ptformat):
262
+ arr_plain = elt['c_image']
263
+ assert isinstance(arr_plain, np.ndarray)
264
+ assert arr_plain.flags["WRITEABLE"], 'required by pytorch collate function'
265
+
266
+ # NB: compare numpy array bc PIL.Image object itself is not using same file.
267
+ assert (arr_plain == np.array(im_plain)).all(), 'numpy image should be the same as the original'
268
+ arr_xformed = elt['c_image_xformed']
269
+ assert isinstance(arr_xformed, np.ndarray)
270
+ assert arr_xformed.flags["WRITEABLE"], 'required by pytorch collate function'
271
+
272
+ assert arr_xformed.shape == (H, W, 3)
273
+ assert arr_xformed.dtype == np.uint8
274
+ # same as above, compare numpy array bc PIL.Image object itself is not using same file.
275
+ assert (arr_xformed == np.array(im_xformed)).all(),\
276
+ 'numpy image array for xformed image should be the same as the original'
277
+
278
+ # now compare pytorch version
279
+ arr_pt = elt_pt['c_image']
280
+ assert torch.is_tensor(arr_pt)
281
+ arr_pt = elt_pt['c_image_xformed']
282
+ assert torch.is_tensor(arr_pt)
283
+ assert arr_pt.shape == (3, H, W)
284
+ assert arr_pt.dtype == torch.float32
285
+ assert (0.0 <= arr_pt).all()
286
+ assert (arr_pt <= 1.0).all()
287
+ assert torch.isclose(T.ToTensor()(arr_xformed), arr_pt).all(),\
288
+ 'pytorch image should be consistent with numpy image'
289
+ elt_count += 1
290
+ assert elt_count == 1
291
+
292
+ @pytest.mark.skip('Flaky test (fails intermittently)')
293
+ def test_to_pytorch_dataloader(self, all_datatypes_tbl: catalog.Table) -> None:
294
+ """ Tests the dataset works well with pytorch dataloader:
295
+ 1. compatibility with multiprocessing
296
+ 2. compatibility of all types with default collate_fn
297
+ """
298
+ import torch.utils.data
299
+ @pxt.udf(param_types=[pxt.JsonType()], return_type=pxt.JsonType())
300
+ def restrict_json_for_default_collate(obj):
301
+ keys = ['id', 'label', 'iscrowd', 'bounding_box']
302
+ return {k: obj[k] for k in keys}
303
+
304
+ t = all_datatypes_tbl
305
+ df = t.select(
306
+ t.row_id,
307
+ t.c_int,
308
+ t.c_float,
309
+ t.c_bool,
310
+ t.c_timestamp,
311
+ t.c_array,
312
+ t.c_video,
313
+ # default collate_fn doesnt support null values, nor lists of different lengths
314
+ # but does allow some dictionaries if they are uniform
315
+ c_json = restrict_json_for_default_collate(t.c_json.detections[0]),
316
+ # images must be uniform shape for pytorch collate_fn to not fail
317
+ c_image=t.c_image.resize([220, 224]).convert('RGB')
318
+ )
319
+ df_size = df.count()
320
+ ds = df.to_pytorch_dataset(image_format='pt')
321
+ # test serialization:
322
+ # - pickle.dumps() and pickle.loads() must work so that
323
+ # we can use num_workers > 0
324
+ x = pickle.dumps(ds)
325
+ _ = pickle.loads(x)
326
+
327
+ # test we get all rows
328
+ def check_recover_all_rows(ds, size : int, **kwargs):
329
+ dl = torch.utils.data.DataLoader(ds, **kwargs)
330
+ loaded_ids = set()
331
+ for batch in dl:
332
+ for row_id in batch['row_id']:
333
+ val = int(row_id) # np.int -> int or will fail set equality test below.
334
+ assert val not in loaded_ids, val
335
+ loaded_ids.add(val)
336
+
337
+ assert loaded_ids == set(range(size))
338
+
339
+ # check different number of workers
340
+ check_recover_all_rows(ds, size=df_size, batch_size=3, num_workers=0) # within this process
341
+ check_recover_all_rows(ds, size=df_size, batch_size=3, num_workers=2) # two separate processes
342
+
343
+ # check edge case where some workers get no rows
344
+ short_size = 1
345
+ df_short = df.where(t.row_id < short_size)
346
+ ds_short = df_short.to_pytorch_dataset(image_format='pt')
347
+ check_recover_all_rows(ds_short, size=short_size, batch_size=13, num_workers=short_size+1)
348
+
349
+ def test_pytorch_dataset_caching(self, all_datatypes_tbl: catalog.Table) -> None:
350
+ """ Tests that dataset caching works
351
+ 1. using the same dataset twice in a row uses the cache
352
+ 2. adding a row to the table invalidates the cached version
353
+ 3. changing the select list invalidates the cached version
354
+ """
355
+ t = all_datatypes_tbl
356
+
357
+ t.drop_column('c_video') # null value video column triggers internal assertions in DataRow
358
+ # see https://github.com/pixeltable/pixeltable/issues/38
359
+
360
+ t.drop_column('c_array') # no support yet for null array values in the pytorch dataset
361
+
362
+ def _get_mtimes(dir: Path):
363
+ return {p.name: p.stat().st_mtime for p in dir.iterdir()}
364
+
365
+ # check result cached
366
+ ds1 = t.to_pytorch_dataset(image_format='pt')
367
+ ds1_mtimes = _get_mtimes(ds1.path)
368
+
369
+ ds2 = t.to_pytorch_dataset(image_format='pt')
370
+ ds2_mtimes = _get_mtimes(ds2.path)
371
+ assert ds2.path == ds1.path, 'result should be cached'
372
+ assert ds2_mtimes == ds1_mtimes, 'no extra file system work should have occurred'
373
+
374
+ # check invalidation on insert
375
+ t_size = t.count()
376
+ t.insert(row_id=t_size)
377
+ ds3 = t.to_pytorch_dataset(image_format='pt')
378
+ assert ds3.path != ds1.path, 'different path should be used'
379
+
380
+ # check select list invalidation
381
+ ds4 = t.select(t.row_id).to_pytorch_dataset(image_format='pt')
382
+ assert ds4.path != ds3.path, 'different select list, hence different path should be used'
383
+
384
+ def test_to_coco(self, test_client: pxt.Client) -> None:
385
+ skip_test_if_not_installed('nos')
386
+ cl = test_client
387
+ base_t = cl.create_table('videos', {'video': pxt.VideoType()})
388
+ args = {'video': base_t.video, 'fps': 1}
389
+ view_t = cl.create_view('frames', base_t, iterator_class=FrameIterator, iterator_args=args)
390
+ from pixeltable.functions.nos.object_detection_2d import yolox_medium
391
+ view_t.add_column(detections=yolox_medium(view_t.frame))
392
+ base_t.insert(video=get_video_files()[0])
393
+
394
+ @pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
395
+ def yolo_to_coco(detections):
396
+ bboxes, labels = detections['bboxes'], detections['labels']
397
+ num_annotations = len(detections['bboxes'])
398
+ assert num_annotations == len(detections['labels'])
399
+ result = []
400
+ for i in range(num_annotations):
401
+ bbox = bboxes[i]
402
+ ann = {
403
+ 'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
404
+ 'category': labels[i],
405
+ }
406
+ result.append(ann)
407
+ return result
408
+
409
+ query = view_t.select({'image': view_t.frame, 'annotations': yolo_to_coco(view_t.detections)})
410
+ path = query.to_coco_dataset()
411
+ # we get a valid COCO dataset
412
+ coco_ds = COCO(path)
413
+ assert len(coco_ds.imgs) == view_t.count()
414
+
415
+ # we call to_coco_dataset() again and get the cached dataset
416
+ new_path = query.to_coco_dataset()
417
+ assert path == new_path
418
+
419
+ # the cache is invalidated when we add more data
420
+ base_t.insert(video=get_video_files()[1])
421
+ new_path = query.to_coco_dataset()
422
+ assert path != new_path
423
+ coco_ds = COCO(new_path)
424
+ assert len(coco_ds.imgs) == view_t.count()
425
+
426
+ # incorrect select list
427
+ with pytest.raises(excs.Error) as exc_info:
428
+ _ = view_t.select({'image': view_t.frame, 'annotations': view_t.detections}).to_coco_dataset()
429
+ assert '"annotations" is not a list' in str(exc_info.value)
430
+
431
+ with pytest.raises(excs.Error) as exc_info:
432
+ _ = view_t.select(view_t.detections).to_coco_dataset()
433
+ assert 'missing key "image"' in str(exc_info.value).lower()
@@ -1,91 +1,107 @@
1
1
  import pytest
2
2
 
3
- import pixeltable as pt
4
- from pixeltable import exceptions as exc
3
+ import pixeltable as pxt
4
+ from pixeltable import exceptions as excs
5
5
  from pixeltable.tests.utils import make_tbl
6
- from pixeltable import catalog
7
6
 
8
7
 
9
8
  class TestDirs:
10
- def test_create(self, test_db: catalog.Db) -> None:
11
- db = test_db
9
+ def test_create(self, test_client: pxt.Client) -> None:
10
+ cl = test_client
12
11
  dirs = ['dir1', 'dir1.sub1', 'dir1.sub1.subsub1']
13
12
  for name in dirs:
14
- db.create_dir(name)
13
+ cl.create_dir(name)
15
14
 
16
- with pytest.raises(exc.BadFormatError):
17
- db.create_dir('1dir')
18
- with pytest.raises(exc.BadFormatError):
19
- db.create_dir('_dir1')
20
- with pytest.raises(exc.BadFormatError):
21
- db.create_dir('dir 1')
22
- with pytest.raises(exc.BadFormatError):
23
- db.create_dir('dir1..sub2')
24
- with pytest.raises(exc.BadFormatError):
25
- db.create_dir('dir1.sub2.')
26
- with pytest.raises(exc.BadFormatError):
27
- db.create_dir('dir1:sub2.')
15
+ # invalid names
16
+ with pytest.raises(excs.Error):
17
+ cl.create_dir('1dir')
18
+ with pytest.raises(excs.Error):
19
+ cl.create_dir('_dir1')
20
+ with pytest.raises(excs.Error):
21
+ cl.create_dir('dir 1')
22
+ with pytest.raises(excs.Error):
23
+ cl.create_dir('dir1..sub2')
24
+ with pytest.raises(excs.Error):
25
+ cl.create_dir('dir1.sub2.')
26
+ with pytest.raises(excs.Error):
27
+ cl.create_dir('dir1:sub2.')
28
28
 
29
29
  # existing dirs
30
- with pytest.raises(exc.DuplicateNameError):
31
- db.create_dir('dir1')
32
- with pytest.raises(exc.DuplicateNameError):
33
- db.create_dir('dir1.sub1')
34
- with pytest.raises(exc.DuplicateNameError):
35
- db.create_dir('dir1.sub1.subsub1')
30
+ with pytest.raises(excs.Error):
31
+ cl.create_dir('dir1')
32
+ cl.create_dir('dir1', ignore_errors=True)
33
+ with pytest.raises(excs.Error):
34
+ cl.create_dir('dir1.sub1')
35
+ with pytest.raises(excs.Error):
36
+ cl.create_dir('dir1.sub1.subsub1')
36
37
 
37
38
  # existing table
38
- make_tbl(db, 'dir1.t1')
39
- with pytest.raises(exc.DuplicateNameError):
40
- db.create_dir('dir1.t1')
39
+ make_tbl(cl, 'dir1.t1')
40
+ with pytest.raises(excs.Error):
41
+ cl.create_dir('dir1.t1')
41
42
 
42
- with pytest.raises(exc.UnknownEntityError):
43
- db.create_dir('dir2.sub2')
44
- make_tbl(db, 't2')
45
- with pytest.raises(exc.UnknownEntityError):
46
- db.create_dir('t2.sub2')
43
+ with pytest.raises(excs.Error):
44
+ cl.create_dir('dir2.sub2')
45
+ make_tbl(cl, 't2')
46
+ with pytest.raises(excs.Error):
47
+ cl.create_dir('t2.sub2')
47
48
 
48
49
  # new client: force loading from store
49
- cl2 = pt.Client()
50
- db = cl2.get_db('test')
50
+ cl2 = pxt.Client(reload=True)
51
51
 
52
- listing = db.list_dirs(recursive=True)
52
+ listing = cl2.list_dirs(recursive=True)
53
53
  assert listing == dirs
54
- listing = db.list_dirs(recursive=False)
54
+ listing = cl2.list_dirs(recursive=False)
55
55
  assert listing == ['dir1']
56
- listing = db.list_dirs('dir1', recursive=True)
56
+ listing = cl2.list_dirs('dir1', recursive=True)
57
57
  assert listing == ['dir1.sub1', 'dir1.sub1.subsub1']
58
- listing = db.list_dirs('dir1', recursive=False)
58
+ listing = cl2.list_dirs('dir1', recursive=False)
59
59
  assert listing == ['dir1.sub1']
60
- listing = db.list_dirs('dir1.sub1', recursive=True)
60
+ listing = cl2.list_dirs('dir1.sub1', recursive=True)
61
61
  assert listing == ['dir1.sub1.subsub1']
62
- listing = db.list_dirs('dir1.sub1', recursive=False)
62
+ listing = cl2.list_dirs('dir1.sub1', recursive=False)
63
63
  assert listing == ['dir1.sub1.subsub1']
64
64
 
65
- def test_rm(self, test_db: catalog.Db) -> None:
66
- db = test_db
65
+ def test_rm(self, test_client: pxt.Client) -> None:
66
+ cl = test_client
67
67
  dirs = ['dir1', 'dir1.sub1', 'dir1.sub1.subsub1']
68
68
  for name in dirs:
69
- db.create_dir(name)
70
- make_tbl(db, 't1')
71
- make_tbl(db, 'dir1.t1')
69
+ cl.create_dir(name)
70
+ make_tbl(cl, 't1')
71
+ make_tbl(cl, 'dir1.t1')
72
72
 
73
- with pytest.raises(exc.BadFormatError):
74
- db.rm_dir('1dir')
75
- with pytest.raises(exc.BadFormatError):
76
- db.rm_dir('dir1..sub1')
77
- with pytest.raises(exc.UnknownEntityError):
78
- db.rm_dir('dir2')
79
- with pytest.raises(exc.UnknownEntityError):
80
- db.rm_dir('t1')
73
+ # bad name
74
+ with pytest.raises(excs.Error):
75
+ cl.rm_dir('1dir')
76
+ # bad path
77
+ with pytest.raises(excs.Error):
78
+ cl.rm_dir('dir1..sub1')
79
+ # doesn't exist
80
+ with pytest.raises(excs.Error):
81
+ cl.rm_dir('dir2')
82
+ # not empty
83
+ with pytest.raises(excs.Error):
84
+ cl.rm_dir('dir1')
81
85
 
82
- with pytest.raises(exc.DirectoryNotEmptyError):
83
- db.rm_dir('dir1')
86
+ cl.rm_dir('dir1.sub1.subsub1')
87
+ assert cl.list_dirs('dir1.sub1') == []
84
88
 
85
- def test_rename_tbl(self, test_db: catalog.Db) -> None:
86
- db = test_db
87
- db.create_dir('dir1')
88
- make_tbl(db, 'dir1.t1')
89
- assert db.list_tables('dir1') == ['dir1.t1']
90
- db.rename_table('dir1.t1', 't2')
91
- assert db.list_tables('dir1') == ['dir1.t2']
89
+ # check after reloading
90
+ cl = pxt.Client(reload=True)
91
+ assert cl.list_dirs('dir1.sub1') == []
92
+
93
+ def test_move(self, test_client: pxt.Client) -> None:
94
+ cl = test_client
95
+ cl.create_dir('dir1')
96
+ cl.create_dir('dir1.sub1')
97
+ make_tbl(cl, 'dir1.sub1.t1')
98
+ assert cl.list_tables('dir1') == ['dir1.sub1.t1']
99
+ cl.move('dir1.sub1.t1', 'dir1.sub1.t2')
100
+ assert cl.list_tables('dir1') == ['dir1.sub1.t2']
101
+ cl.create_dir('dir2')
102
+ cl.move('dir1', 'dir2.dir1')
103
+ assert cl.list_tables('dir2') == ['dir2.dir1.sub1.t2']
104
+
105
+ # new client: force loading from store
106
+ cl2 = pxt.Client(reload=True)
107
+ assert cl2.list_tables('dir2') == ['dir2.dir1.sub1.t2']