pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show
  1. pixeltable/__init__.py +20 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +23 -7
  4. pixeltable/catalog/insertable_table.py +32 -19
  5. pixeltable/catalog/table.py +210 -20
  6. pixeltable/catalog/table_version.py +272 -111
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/dataframe.py +184 -110
  9. pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable/datatransfer/label_studio.py +526 -0
  11. pixeltable/datatransfer/remote.py +113 -0
  12. pixeltable/env.py +213 -79
  13. pixeltable/exec/__init__.py +2 -1
  14. pixeltable/exec/data_row_batch.py +6 -7
  15. pixeltable/exec/expr_eval_node.py +28 -28
  16. pixeltable/exec/sql_scan_node.py +7 -6
  17. pixeltable/exprs/__init__.py +4 -3
  18. pixeltable/exprs/column_ref.py +11 -2
  19. pixeltable/exprs/comparison.py +39 -1
  20. pixeltable/exprs/data_row.py +7 -0
  21. pixeltable/exprs/expr.py +26 -19
  22. pixeltable/exprs/function_call.py +17 -18
  23. pixeltable/exprs/globals.py +14 -2
  24. pixeltable/exprs/image_member_access.py +9 -28
  25. pixeltable/exprs/in_predicate.py +96 -0
  26. pixeltable/exprs/inline_array.py +13 -11
  27. pixeltable/exprs/inline_dict.py +15 -13
  28. pixeltable/exprs/row_builder.py +7 -1
  29. pixeltable/exprs/similarity_expr.py +67 -0
  30. pixeltable/ext/functions/whisperx.py +30 -0
  31. pixeltable/ext/functions/yolox.py +16 -0
  32. pixeltable/func/__init__.py +0 -2
  33. pixeltable/func/aggregate_function.py +5 -2
  34. pixeltable/func/callable_function.py +57 -13
  35. pixeltable/func/expr_template_function.py +14 -3
  36. pixeltable/func/function.py +35 -4
  37. pixeltable/func/signature.py +5 -15
  38. pixeltable/func/udf.py +8 -12
  39. pixeltable/functions/fireworks.py +9 -4
  40. pixeltable/functions/huggingface.py +48 -5
  41. pixeltable/functions/openai.py +49 -11
  42. pixeltable/functions/pil/image.py +61 -64
  43. pixeltable/functions/together.py +32 -6
  44. pixeltable/functions/util.py +0 -43
  45. pixeltable/functions/video.py +46 -8
  46. pixeltable/globals.py +443 -0
  47. pixeltable/index/__init__.py +1 -0
  48. pixeltable/index/base.py +9 -2
  49. pixeltable/index/btree.py +54 -0
  50. pixeltable/index/embedding_index.py +91 -15
  51. pixeltable/io/__init__.py +4 -0
  52. pixeltable/io/globals.py +59 -0
  53. pixeltable/{utils → io}/hf_datasets.py +48 -17
  54. pixeltable/io/pandas.py +148 -0
  55. pixeltable/{utils → io}/parquet.py +58 -33
  56. pixeltable/iterators/__init__.py +1 -1
  57. pixeltable/iterators/base.py +8 -4
  58. pixeltable/iterators/document.py +225 -93
  59. pixeltable/iterators/video.py +16 -9
  60. pixeltable/metadata/__init__.py +8 -4
  61. pixeltable/metadata/converters/convert_12.py +3 -0
  62. pixeltable/metadata/converters/convert_13.py +41 -0
  63. pixeltable/metadata/converters/convert_14.py +13 -0
  64. pixeltable/metadata/converters/convert_15.py +29 -0
  65. pixeltable/metadata/converters/util.py +63 -0
  66. pixeltable/metadata/schema.py +12 -6
  67. pixeltable/plan.py +11 -24
  68. pixeltable/store.py +16 -23
  69. pixeltable/tool/create_test_db_dump.py +49 -14
  70. pixeltable/type_system.py +27 -58
  71. pixeltable/utils/coco.py +94 -0
  72. pixeltable/utils/documents.py +42 -12
  73. pixeltable/utils/http_server.py +70 -0
  74. pixeltable-0.2.7.dist-info/METADATA +137 -0
  75. pixeltable-0.2.7.dist-info/RECORD +126 -0
  76. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
  77. pixeltable/client.py +0 -600
  78. pixeltable/exprs/image_similarity_predicate.py +0 -58
  79. pixeltable/func/batched_function.py +0 -53
  80. pixeltable/func/nos_function.py +0 -202
  81. pixeltable/tests/conftest.py +0 -171
  82. pixeltable/tests/ext/test_yolox.py +0 -21
  83. pixeltable/tests/functions/test_fireworks.py +0 -43
  84. pixeltable/tests/functions/test_functions.py +0 -60
  85. pixeltable/tests/functions/test_huggingface.py +0 -158
  86. pixeltable/tests/functions/test_openai.py +0 -162
  87. pixeltable/tests/functions/test_together.py +0 -112
  88. pixeltable/tests/test_audio.py +0 -65
  89. pixeltable/tests/test_catalog.py +0 -27
  90. pixeltable/tests/test_client.py +0 -21
  91. pixeltable/tests/test_component_view.py +0 -379
  92. pixeltable/tests/test_dataframe.py +0 -440
  93. pixeltable/tests/test_dirs.py +0 -107
  94. pixeltable/tests/test_document.py +0 -120
  95. pixeltable/tests/test_exprs.py +0 -802
  96. pixeltable/tests/test_function.py +0 -332
  97. pixeltable/tests/test_index.py +0 -138
  98. pixeltable/tests/test_migration.py +0 -44
  99. pixeltable/tests/test_nos.py +0 -54
  100. pixeltable/tests/test_snapshot.py +0 -231
  101. pixeltable/tests/test_table.py +0 -1343
  102. pixeltable/tests/test_transactional_directory.py +0 -42
  103. pixeltable/tests/test_types.py +0 -52
  104. pixeltable/tests/test_video.py +0 -159
  105. pixeltable/tests/test_view.py +0 -535
  106. pixeltable/tests/utils.py +0 -442
  107. pixeltable/utils/clip.py +0 -18
  108. pixeltable-0.2.5.dist-info/METADATA +0 -128
  109. pixeltable-0.2.5.dist-info/RECORD +0 -139
  110. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
@@ -1,231 +0,0 @@
1
- from typing import Any, Dict
2
-
3
- import numpy as np
4
- import pytest
5
-
6
- import pixeltable as pxt
7
- import pixeltable.exceptions as excs
8
- from pixeltable.tests.utils import create_test_tbl, assert_resultset_eq, create_img_tbl, img_embed
9
- from pixeltable.type_system import IntType
10
-
11
-
12
- class TestSnapshot:
13
- def run_basic_test(
14
- self, cl: pxt.Client, tbl: pxt.Table, snap: pxt.Table, extra_items: Dict[str, Any], filter: Any,
15
- reload_md: bool
16
- ) -> None:
17
- tbl_path, snap_path = cl.get_path(tbl), cl.get_path(snap)
18
- # run the initial query against the base table here, before reloading, otherwise the filter breaks
19
- tbl_select_list = [tbl[col_name] for col_name in tbl.column_names()]
20
- tbl_select_list.extend([value_expr for _, value_expr in extra_items.items()])
21
- orig_resultset = tbl.select(*tbl_select_list).where(filter).order_by(tbl.c2).collect()
22
-
23
- if reload_md:
24
- # reload md
25
- cl = pxt.Client(reload=True)
26
- tbl = cl.get_table(tbl_path)
27
- snap = cl.get_table(snap_path)
28
-
29
- # view select list: base cols followed by view cols
30
- snap_select_list = [snap[col_name] for col_name in snap.column_names()[len(extra_items):]]
31
- snap_select_list.extend([snap[col_name] for col_name in extra_items.keys()])
32
- snap_query = snap.select(*snap_select_list).order_by(snap.c2)
33
- r1 = list(orig_resultset)
34
- r2 = list(snap_query.collect())
35
- assert_resultset_eq(snap_query.collect(), orig_resultset)
36
-
37
- # adding data to a base table doesn't change the snapshot
38
- rows = list(tbl.select(tbl.c1, tbl.c1n, tbl.c2, tbl.c3, tbl.c4, tbl.c5, tbl.c6, tbl.c7).collect())
39
- status = tbl.insert(rows)
40
- assert status.num_rows == len(rows)
41
- assert_resultset_eq(snap_query.collect(), orig_resultset)
42
-
43
- # update() doesn't affect the view
44
- status = tbl.update({'c3': tbl.c3 + 1.0})
45
- assert status.num_rows == tbl.count()
46
- assert_resultset_eq(snap_query.collect(), orig_resultset)
47
-
48
- # delete() doesn't affect the view
49
- num_tbl_rows = tbl.count()
50
- status = tbl.delete()
51
- assert status.num_rows == num_tbl_rows
52
- assert_resultset_eq(snap_query.collect(), orig_resultset)
53
-
54
- tbl.revert() # undo delete()
55
- tbl.revert() # undo update()
56
- tbl.revert() # undo insert()
57
- # can't revert a version referenced by a snapshot
58
- with pytest.raises(excs.Error) as excinfo:
59
- tbl.revert()
60
- assert 'version is needed' in str(excinfo.value)
61
-
62
- # can't drop a table with snapshots
63
- with pytest.raises(excs.Error) as excinfo:
64
- cl.drop_table(tbl_path)
65
- assert snap_path in str(excinfo.value)
66
-
67
- cl.drop_table(snap_path)
68
- cl.drop_table(tbl_path)
69
-
70
- def test_basic(self, test_client: pxt.Client) -> None:
71
- cl = test_client
72
- cl.create_dir('main')
73
- cl.create_dir('snap')
74
- tbl_path = 'main.tbl1'
75
- snap_path = 'snap.snap1'
76
-
77
- for reload_md in [False, True]:
78
- for has_filter in [False, True]:
79
- for has_cols in [False, True]:
80
- cl = pxt.Client(reload=True)
81
- tbl = create_test_tbl(name=tbl_path, client=cl)
82
- schema = {
83
- 'v1': tbl.c3 * 2.0,
84
- # include a lambda to make sure that is handled correctly
85
- 'v2': {'value': lambda c3: c3 * 2.0, 'type': pxt.FloatType()}
86
- } if has_cols else {}
87
- extra_items = {'v1': tbl.c3 * 2.0, 'v2': tbl.c3 * 2.0} if has_cols else {}
88
- filter = tbl.c2 < 10 if has_filter else None
89
- snap = cl.create_view(snap_path, tbl, schema=schema, filter=filter, is_snapshot=True)
90
- self.run_basic_test(cl, tbl, snap, extra_items=extra_items, filter=filter, reload_md=reload_md)
91
-
92
- def test_errors(self, test_client: pxt.Client) -> None:
93
- cl = test_client
94
- tbl = create_test_tbl(client=cl)
95
- snap = cl.create_view('snap', tbl, is_snapshot=True)
96
-
97
- with pytest.raises(pxt.Error) as excinfo:
98
- _ = snap.update({'c3': snap.c3 + 1.0})
99
- assert 'cannot update a snapshot' in str(excinfo.value).lower()
100
-
101
- with pytest.raises(pxt.Error) as excinfo:
102
- _ = snap.batch_update([{'c3': 1.0, 'c2': 1}])
103
- assert 'cannot update a snapshot' in str(excinfo.value).lower()
104
-
105
- with pytest.raises(pxt.Error) as excinfo:
106
- _ = snap.revert()
107
- assert 'cannot revert a snapshot' in str(excinfo.value).lower()
108
-
109
- with pytest.raises(pxt.Error) as excinfo:
110
- img_tbl = create_img_tbl(cl)
111
- snap = cl.create_view('img_snap', img_tbl, is_snapshot=True)
112
- snap.add_embedding_index('img', img_embed=img_embed)
113
- assert 'cannot add an index to a snapshot' in str(excinfo.value).lower()
114
-
115
- def test_views_of_snapshots(self, test_client: pxt.Client) -> None:
116
- cl = test_client
117
- t = cl.create_table('tbl', {'a': IntType()})
118
- rows = [{'a': 1}, {'a': 2}, {'a': 3}]
119
- status = t.insert(rows)
120
- assert status.num_rows == len(rows)
121
- assert status.num_excs == 0
122
- s1 = cl.create_view('s1', t, is_snapshot=True)
123
- v1 = cl.create_view('v1', s1, is_snapshot=False)
124
- s2 = cl.create_view('s2', v1, is_snapshot=True)
125
- v2 = cl.create_view('v2', s2, is_snapshot=False)
126
-
127
- def verify(s1: pxt.Table, s2: pxt.Table, v1: pxt.Table, v2: pxt.Table) -> None:
128
- assert s1.count() == len(rows)
129
- assert v1.count() == len(rows)
130
- assert s2.count() == len(rows)
131
- assert v2.count() == len(rows)
132
-
133
- verify(s1, s2, v1, v2)
134
-
135
- status = t.insert(rows)
136
- assert status.num_rows == len(rows)
137
- assert status.num_excs == 0
138
- verify(s1, s2, v1, v2)
139
-
140
- cl = pxt.Client(reload=True)
141
- s1 = cl.get_table('s1')
142
- s2 = cl.get_table('s2')
143
- v1 = cl.get_table('v1')
144
- v2 = cl.get_table('v2')
145
- verify(s1, s2, v1, v2)
146
-
147
- def test_snapshot_of_view_chain(self, test_client: pxt.Client) -> None:
148
- cl = test_client
149
- t = cl.create_table('tbl', {'a': IntType()})
150
- rows = [{'a': 1}, {'a': 2}, {'a': 3}]
151
- status = t.insert(rows)
152
- assert status.num_rows == len(rows)
153
- assert status.num_excs == 0
154
- v1 = cl.create_view('v1', t, is_snapshot=False)
155
- v2 = cl.create_view('v2', v1, is_snapshot=False)
156
- s = cl.create_view('s', v2, is_snapshot=True)
157
-
158
- def verify(v1: pxt.Table, v2: pxt.Table, s: pxt.Table) -> None:
159
- assert v1.count() == t.count()
160
- assert v2.count() == t.count()
161
- assert s.count() == len(rows)
162
-
163
- verify(v1, v2, s)
164
-
165
- status = t.insert(rows)
166
- assert status.num_rows == len(rows) * 3 # we also updated 2 views
167
- assert status.num_excs == 0
168
- verify(v1, v2, s)
169
-
170
- cl = pxt.Client(reload=True)
171
- v1 = cl.get_table('v1')
172
- v2 = cl.get_table('v2')
173
- s = cl.get_table('s')
174
- verify(v1, v2, s)
175
-
176
- def test_multiple_snapshot_paths(self, test_client: pxt.Client) -> None:
177
- cl = test_client
178
- t = create_test_tbl(cl)
179
- c4 = t.select(t.c4).order_by(t.c2).collect().to_pandas()['c4']
180
- orig_c3 = t.select(t.c3).collect().to_pandas()['c3']
181
- v = cl.create_view('v', base=t, schema={'v1': t.c3 + 1})
182
- s1 = cl.create_view('s1', v, is_snapshot=True)
183
- t.drop_column('c4')
184
- # s2 references the same view version as s1, but a different version of t (due to a schema change)
185
- s2 = cl.create_view('s2', v, is_snapshot=True)
186
- t.update({'c6': {'a': 17}})
187
- # s3 references the same view version as s2, but a different version of t (due to a data change)
188
- s3 = cl.create_view('s3', v, is_snapshot=True)
189
- t.update({'c3': t.c3 + 1})
190
- # s4 references different versions of t and v
191
- s4 = cl.create_view('s4', v, is_snapshot=True)
192
-
193
- def validate(t: pxt.Table, v: pxt.Table, s1: pxt.Table, s2: pxt.Table, s3: pxt.Table, s4: pxt.Table) -> None:
194
- # c4 is only visible in s1
195
- assert np.all(s1.select(s1.c4).collect().to_pandas()['c4'] == c4)
196
- with pytest.raises(AttributeError):
197
- _ = t.select(t.c4).collect()
198
- with pytest.raises(AttributeError):
199
- _ = v.select(v.c4).collect()
200
- with pytest.raises(AttributeError):
201
- _ = s2.select(s2.c4).collect()
202
- with pytest.raises(AttributeError):
203
- _ = s3.select(s3.c4).collect()
204
- with pytest.raises(AttributeError):
205
- _ = s4.select(s4.c4).collect()
206
-
207
- # c3
208
- assert np.all(t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] == orig_c3 + 1)
209
- assert np.all(s1.select(s1.c3).order_by(s1.c2).collect().to_pandas()['c3'] == orig_c3)
210
- assert np.all(s2.select(s2.c3).order_by(s2.c2).collect().to_pandas()['c3'] == orig_c3)
211
- assert np.all(s3.select(s3.c3).order_by(s3.c2).collect().to_pandas()['c3'] == orig_c3)
212
- assert np.all(s4.select(s4.c3).order_by(s4.c2).collect().to_pandas()['c3'] == orig_c3 + 1)
213
-
214
- # v1
215
- assert np.all(
216
- v.select(v.v1).order_by(v.c2).collect().to_pandas()['v1'] == \
217
- t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] + 1)
218
- assert np.all(s1.select(s1.v1).order_by(s1.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
219
- assert np.all(s2.select(s2.v1).order_by(s2.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
220
- assert np.all(s3.select(s3.v1).order_by(s3.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
221
- assert np.all(
222
- s4.select(s4.v1).order_by(s4.c2).collect().to_pandas()['v1'] == \
223
- t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] + 1)
224
-
225
- validate(t, v, s1, s2, s3, s4)
226
-
227
- # make sure it works after metadata reload
228
- cl = pxt.Client(reload=True)
229
- t, v = cl.get_table('test_tbl'), cl.get_table('v')
230
- s1, s2, s3, s4 = cl.get_table('s1'), cl.get_table('s2'), cl.get_table('s3'), cl.get_table('s4')
231
- validate(t, v, s1, s2, s3, s4)