pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +31 -50
  4. pixeltable/catalog/insertable_table.py +7 -6
  5. pixeltable/catalog/table.py +171 -57
  6. pixeltable/catalog/table_version.py +417 -140
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/dataframe.py +239 -121
  9. pixeltable/env.py +82 -16
  10. pixeltable/exec/__init__.py +2 -1
  11. pixeltable/exec/cache_prefetch_node.py +1 -1
  12. pixeltable/exec/data_row_batch.py +6 -7
  13. pixeltable/exec/expr_eval_node.py +28 -28
  14. pixeltable/exec/in_memory_data_node.py +11 -7
  15. pixeltable/exec/sql_scan_node.py +7 -6
  16. pixeltable/exprs/__init__.py +4 -3
  17. pixeltable/exprs/column_ref.py +9 -0
  18. pixeltable/exprs/comparison.py +3 -3
  19. pixeltable/exprs/data_row.py +5 -1
  20. pixeltable/exprs/expr.py +15 -7
  21. pixeltable/exprs/function_call.py +17 -15
  22. pixeltable/exprs/image_member_access.py +9 -28
  23. pixeltable/exprs/in_predicate.py +96 -0
  24. pixeltable/exprs/inline_array.py +13 -11
  25. pixeltable/exprs/inline_dict.py +15 -13
  26. pixeltable/exprs/literal.py +16 -4
  27. pixeltable/exprs/row_builder.py +15 -41
  28. pixeltable/exprs/similarity_expr.py +65 -0
  29. pixeltable/ext/__init__.py +5 -0
  30. pixeltable/ext/functions/yolox.py +92 -0
  31. pixeltable/func/__init__.py +0 -2
  32. pixeltable/func/aggregate_function.py +18 -15
  33. pixeltable/func/callable_function.py +57 -13
  34. pixeltable/func/expr_template_function.py +20 -3
  35. pixeltable/func/function.py +35 -4
  36. pixeltable/func/globals.py +24 -14
  37. pixeltable/func/signature.py +23 -27
  38. pixeltable/func/udf.py +13 -12
  39. pixeltable/functions/__init__.py +8 -8
  40. pixeltable/functions/eval.py +7 -8
  41. pixeltable/functions/huggingface.py +64 -17
  42. pixeltable/functions/openai.py +36 -3
  43. pixeltable/functions/pil/image.py +61 -64
  44. pixeltable/functions/together.py +21 -0
  45. pixeltable/functions/util.py +11 -0
  46. pixeltable/globals.py +425 -0
  47. pixeltable/index/__init__.py +2 -0
  48. pixeltable/index/base.py +51 -0
  49. pixeltable/index/embedding_index.py +168 -0
  50. pixeltable/io/__init__.py +3 -0
  51. pixeltable/{utils → io}/hf_datasets.py +48 -17
  52. pixeltable/io/pandas.py +148 -0
  53. pixeltable/{utils → io}/parquet.py +58 -33
  54. pixeltable/iterators/__init__.py +1 -1
  55. pixeltable/iterators/base.py +4 -0
  56. pixeltable/iterators/document.py +218 -97
  57. pixeltable/iterators/video.py +8 -9
  58. pixeltable/metadata/__init__.py +7 -3
  59. pixeltable/metadata/converters/convert_12.py +3 -0
  60. pixeltable/metadata/converters/convert_13.py +41 -0
  61. pixeltable/metadata/schema.py +45 -22
  62. pixeltable/plan.py +15 -51
  63. pixeltable/store.py +38 -41
  64. pixeltable/tool/create_test_db_dump.py +39 -4
  65. pixeltable/type_system.py +47 -96
  66. pixeltable/utils/documents.py +42 -12
  67. pixeltable/utils/http_server.py +70 -0
  68. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
  69. pixeltable-0.2.6.dist-info/RECORD +119 -0
  70. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  71. pixeltable/client.py +0 -604
  72. pixeltable/exprs/image_similarity_predicate.py +0 -58
  73. pixeltable/func/batched_function.py +0 -53
  74. pixeltable/tests/conftest.py +0 -177
  75. pixeltable/tests/functions/test_fireworks.py +0 -42
  76. pixeltable/tests/functions/test_functions.py +0 -60
  77. pixeltable/tests/functions/test_huggingface.py +0 -158
  78. pixeltable/tests/functions/test_openai.py +0 -152
  79. pixeltable/tests/functions/test_together.py +0 -111
  80. pixeltable/tests/test_audio.py +0 -65
  81. pixeltable/tests/test_catalog.py +0 -27
  82. pixeltable/tests/test_client.py +0 -21
  83. pixeltable/tests/test_component_view.py +0 -370
  84. pixeltable/tests/test_dataframe.py +0 -439
  85. pixeltable/tests/test_dirs.py +0 -107
  86. pixeltable/tests/test_document.py +0 -120
  87. pixeltable/tests/test_exprs.py +0 -805
  88. pixeltable/tests/test_function.py +0 -324
  89. pixeltable/tests/test_migration.py +0 -43
  90. pixeltable/tests/test_nos.py +0 -54
  91. pixeltable/tests/test_snapshot.py +0 -208
  92. pixeltable/tests/test_table.py +0 -1267
  93. pixeltable/tests/test_transactional_directory.py +0 -42
  94. pixeltable/tests/test_types.py +0 -22
  95. pixeltable/tests/test_video.py +0 -159
  96. pixeltable/tests/test_view.py +0 -530
  97. pixeltable/tests/utils.py +0 -408
  98. pixeltable-0.2.4.dist-info/RECORD +0 -132
  99. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
@@ -1,42 +0,0 @@
1
- import pathlib
2
- import tempfile
3
-
4
- from pixeltable.utils.transactional_directory import transactional_directory
5
-
6
-
7
- class MyException(Exception):
8
- pass
9
-
10
- class TestTransactionalDirectory:
11
- def test_success(self) -> None:
12
- test_dir = pathlib.Path(tempfile.mkdtemp())
13
- assert test_dir.exists()
14
- final = test_dir / "test_success"
15
- assert not final.exists()
16
- with transactional_directory(final) as folder:
17
- assert folder.exists()
18
- (folder / "subfolder1").mkdir()
19
- with (folder / "test.txt").open("w") as f:
20
- f.write("test")
21
-
22
- assert final.exists()
23
- assert (final / "subfolder1").is_dir()
24
- assert (final / "test.txt").read_text() == "test"
25
-
26
- def test_failure(self) -> None:
27
- test_dir = pathlib.Path(tempfile.mkdtemp())
28
- assert test_dir.exists()
29
- final = test_dir / "test_failure"
30
- assert not final.exists()
31
-
32
- try:
33
- with transactional_directory(final) as folder:
34
- assert folder.exists()
35
- (folder / "subfolder1").mkdir()
36
- with (folder / "test.txt").open("w") as f:
37
- f.write("test")
38
- raise MyException()
39
- except MyException:
40
- pass
41
-
42
- assert not final.exists()
@@ -1,22 +0,0 @@
1
- from pixeltable.type_system import \
2
- ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
3
-
4
-
5
- class TestTypes:
6
- def test_serialize(self, init_env) -> None:
7
- type_vals = [
8
- InvalidType(), StringType(), IntType(), BoolType(), TimestampType(),
9
- ImageType(height=100, width=200, mode='RGB'),
10
- JsonType({
11
- 'a': StringType(), 'b': IntType(), 'c': FloatType(), 'd': BoolType(), 'e': TimestampType(),
12
- 'f': ImageType(height=100, width=200, mode='RGB'),
13
- 'g': JsonType({'f1': StringType(), 'f2': IntType()}),
14
- 'h': ArrayType((224, 224, 3), dtype=IntType()),
15
- }),
16
- ArrayType((224, 224, 3), dtype=IntType()),
17
- ]
18
-
19
- for t in type_vals:
20
- t_serialized = t.serialize()
21
- t_deserialized = ColumnType.deserialize(t_serialized)
22
- assert t == t_deserialized
@@ -1,159 +0,0 @@
1
- from typing import Optional, List, Tuple
2
-
3
- import PIL
4
- import pytest
5
-
6
- import pixeltable as pxt
7
- from pixeltable import catalog
8
- from pixeltable import exceptions as excs
9
- from pixeltable.iterators import FrameIterator
10
- from pixeltable.tests.utils import get_video_files
11
- from pixeltable.tests.utils import skip_test_if_not_installed
12
- from pixeltable.type_system import VideoType, ImageType
13
- from pixeltable.utils.media_store import MediaStore
14
-
15
-
16
- class TestVideo:
17
- def create_tbls(
18
- self, cl: pxt.Client, base_name: str = 'video_tbl', view_name: str = 'frame_view'
19
- ) -> Tuple[catalog.InsertableTable, catalog.Table]:
20
- cl.drop_table(view_name, ignore_errors=True)
21
- cl.drop_table(base_name, ignore_errors=True)
22
- base_t = cl.create_table(base_name, {'video': VideoType()})
23
- args = {'video': base_t.video, 'fps': 1}
24
- view_t = cl.create_view(view_name, base_t, iterator_class=FrameIterator, iterator_args=args)
25
- return base_t, view_t
26
-
27
- def create_and_insert(
28
- self, cl: pxt.Client, stored: Optional[bool], paths: List[str]
29
- ) -> Tuple[catalog.InsertableTable, catalog.Table]:
30
- base_t, view_t = self.create_tbls(cl)
31
-
32
- view_t.add_column(transform=view_t.frame.rotate(90), stored=stored)
33
- base_t.insert({'video': p} for p in paths)
34
- total_num_rows = view_t.count()
35
- result = view_t[view_t.frame_idx >= 5][view_t.frame_idx, view_t.frame, view_t.transform].show(0)
36
- assert len(result) == total_num_rows - len(paths) * 5
37
- result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(3)
38
- assert len(result) == 3
39
- result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(0)
40
- assert len(result) == total_num_rows
41
- return base_t, view_t
42
-
43
- def test_basic(self, test_client: pxt.Client) -> None:
44
- video_filepaths = get_video_files()
45
- cl = test_client
46
-
47
- # default case: computed images are not stored
48
- _, view = self.create_and_insert(cl, None, video_filepaths)
49
- assert MediaStore.count(view.get_id()) == 0
50
-
51
- # computed images are explicitly not stored
52
- _, view = self.create_and_insert(cl, False, video_filepaths)
53
- assert MediaStore.count(view.get_id()) == 0
54
-
55
- # computed images are stored
56
- tbl, view = self.create_and_insert(cl, True, video_filepaths)
57
- assert MediaStore.count(view.get_id()) == view.count()
58
-
59
- # revert() also removes computed images
60
- tbl.insert({'video': p} for p in video_filepaths)
61
- tbl.revert()
62
- assert MediaStore.count(view.get_id()) == view.count()
63
-
64
- def test_query(self, test_client: pxt.client) -> None:
65
- skip_test_if_not_installed('boto3')
66
- video_filepaths = get_video_files()
67
- cl = test_client
68
- base_t, view_t = self.create_tbls(cl)
69
- # also include an external file, to make sure that prefetching works
70
- url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
71
- video_filepaths.append(url)
72
- status = base_t.insert({'video': p} for p in video_filepaths)
73
- assert status.num_excs == 0
74
- # make sure that we can get the frames back
75
- res = view_t.select(view_t.frame).collect().to_pandas()
76
- assert res['frame'].notnull().all()
77
- # make sure we can select a specific video
78
- all_rows = view_t.select(url=view_t.video.fileurl).collect().to_pandas()
79
- res = view_t.where(view_t.video == url).collect()
80
- assert len(res) == len(all_rows[all_rows.url == url])
81
-
82
- def test_fps(self, test_client: pxt.client) -> None:
83
- cl = test_client
84
- path = get_video_files()[0]
85
- videos = cl.create_table('videos', {'video': VideoType()})
86
- frames_1_0 = cl.create_view(
87
- 'frames_1_0', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1})
88
- frames_0_5 = cl.create_view(
89
- 'frames_0_5', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/2})
90
- frames_0_33 = cl.create_view(
91
- 'frames_0_33', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/3})
92
- videos.insert(video=path)
93
- assert frames_0_5.count() == frames_1_0.count() // 2 or frames_0_5.count() == frames_1_0.count() // 2 + 1
94
- assert frames_0_33.count() == frames_1_0.count() // 3 or frames_0_33.count() == frames_1_0.count() // 3 + 1
95
-
96
- def test_computed_cols(self, test_client: pxt.client) -> None:
97
- video_filepaths = get_video_files()
98
- cl = test_client
99
- base_t, view_t = self.create_tbls(cl)
100
- # c2 and c4 depend directly on c1, c3 depends on it indirectly
101
- view_t.add_column(c1=view_t.frame.resize([224, 224]))
102
- view_t.add_column(c2=view_t.c1.rotate(10))
103
- view_t.add_column(c3=view_t.c2.rotate(20))
104
- view_t.add_column(c4=view_t.c1.rotate(30))
105
- for name in ['c1', 'c2', 'c3', 'c4']:
106
- assert not view_t.tbl_version_path.tbl_version.cols_by_name[name].is_stored
107
- base_t.insert({'video': p} for p in video_filepaths)
108
- _ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
109
-
110
- def test_make_video(self, test_client: pxt.Client) -> None:
111
- video_filepaths = get_video_files()
112
- cl = test_client
113
- base_t, view_t = self.create_tbls(cl)
114
- base_t.insert({'video': p} for p in video_filepaths)
115
- # reference to the frame col requires ordering by base, pos
116
- from pixeltable.functions import make_video
117
- _ = view_t.select(make_video(view_t.pos, view_t.frame)).group_by(base_t).show()
118
- # the same without frame col
119
- view_t.add_column(transformed=view_t.frame.rotate(30), stored=True)
120
- _ = view_t.select(make_video(view_t.pos, view_t.transformed)).group_by(base_t).show()
121
-
122
- with pytest.raises(excs.Error):
123
- # make_video() doesn't allow windows
124
- _ = view_t.select(make_video(view_t.pos, view_t.frame, group_by=base_t)).show()
125
- with pytest.raises(excs.Error):
126
- # make_video() requires ordering
127
- _ = view_t.select(make_video(view_t.frame, order_by=view_t.pos)).show()
128
- with pytest.raises(excs.Error):
129
- # incompatible ordering requirements
130
- _ = view_t.select(
131
- make_video(view_t.pos, view_t.frame),
132
- make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
133
-
134
- # window function that simply passes through the frame
135
- @pxt.uda(
136
- update_types=[ImageType()], value_type=ImageType(), name='agg_fn',
137
- requires_order_by=True, allows_std_agg=False, allows_window=True)
138
- class WindowAgg:
139
- def __init__(self):
140
- self.img = None
141
- def update(self, frame: PIL.Image.Image) -> None:
142
- self.img = frame
143
- def value(self) -> PIL.Image.Image:
144
- return self.img
145
-
146
- # make sure it works
147
- _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
148
- status = view_t.add_column(agg=agg_fn(view_t.pos, view_t.frame, group_by=base_t))
149
- assert status.num_excs == 0
150
- _ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
151
-
152
- # image cols computed with a window function currently need to be stored
153
- with pytest.raises(excs.Error):
154
- view_t.add_column(agg2=agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
155
-
156
- # reload from store
157
- cl = pxt.Client(reload=True)
158
- base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
159
- _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()