pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +34 -6
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +590 -30
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +359 -45
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +195 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +34 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +256 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +122 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +418 -182
  88. pixeltable/tests/conftest.py +146 -88
  89. pixeltable/tests/functions/test_fireworks.py +42 -0
  90. pixeltable/tests/functions/test_functions.py +60 -0
  91. pixeltable/tests/functions/test_huggingface.py +158 -0
  92. pixeltable/tests/functions/test_openai.py +152 -0
  93. pixeltable/tests/functions/test_together.py +111 -0
  94. pixeltable/tests/test_audio.py +65 -0
  95. pixeltable/tests/test_catalog.py +27 -0
  96. pixeltable/tests/test_client.py +14 -14
  97. pixeltable/tests/test_component_view.py +370 -0
  98. pixeltable/tests/test_dataframe.py +439 -0
  99. pixeltable/tests/test_dirs.py +78 -62
  100. pixeltable/tests/test_document.py +120 -0
  101. pixeltable/tests/test_exprs.py +592 -135
  102. pixeltable/tests/test_function.py +297 -67
  103. pixeltable/tests/test_migration.py +43 -0
  104. pixeltable/tests/test_nos.py +54 -0
  105. pixeltable/tests/test_snapshot.py +208 -0
  106. pixeltable/tests/test_table.py +1195 -263
  107. pixeltable/tests/test_transactional_directory.py +42 -0
  108. pixeltable/tests/test_types.py +5 -11
  109. pixeltable/tests/test_video.py +151 -34
  110. pixeltable/tests/test_view.py +530 -0
  111. pixeltable/tests/utils.py +320 -45
  112. pixeltable/tool/create_test_db_dump.py +149 -0
  113. pixeltable/tool/create_test_video.py +81 -0
  114. pixeltable/type_system.py +445 -124
  115. pixeltable/utils/__init__.py +17 -46
  116. pixeltable/utils/arrow.py +98 -0
  117. pixeltable/utils/clip.py +12 -15
  118. pixeltable/utils/coco.py +136 -0
  119. pixeltable/utils/documents.py +39 -0
  120. pixeltable/utils/filecache.py +195 -0
  121. pixeltable/utils/help.py +11 -0
  122. pixeltable/utils/hf_datasets.py +157 -0
  123. pixeltable/utils/media_store.py +76 -0
  124. pixeltable/utils/parquet.py +167 -0
  125. pixeltable/utils/pytorch.py +91 -0
  126. pixeltable/utils/s3.py +13 -0
  127. pixeltable/utils/sql.py +17 -0
  128. pixeltable/utils/transactional_directory.py +35 -0
  129. pixeltable-0.2.4.dist-info/LICENSE +18 -0
  130. pixeltable-0.2.4.dist-info/METADATA +127 -0
  131. pixeltable-0.2.4.dist-info/RECORD +132 -0
  132. {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
  133. pixeltable/catalog.py +0 -1421
  134. pixeltable/exprs.py +0 -1745
  135. pixeltable/function.py +0 -269
  136. pixeltable/functions/clip.py +0 -10
  137. pixeltable/functions/pil/__init__.py +0 -23
  138. pixeltable/functions/tf.py +0 -21
  139. pixeltable/index.py +0 -57
  140. pixeltable/tests/test_dict.py +0 -24
  141. pixeltable/tests/test_functions.py +0 -11
  142. pixeltable/tests/test_tf.py +0 -69
  143. pixeltable/tf.py +0 -33
  144. pixeltable/utils/tf.py +0 -33
  145. pixeltable/utils/video.py +0 -32
  146. pixeltable-0.1.0.dist-info/METADATA +0 -34
  147. pixeltable-0.1.0.dist-info/RECORD +0 -36
@@ -0,0 +1,42 @@
1
+ import pathlib
2
+ import tempfile
3
+
4
+ from pixeltable.utils.transactional_directory import transactional_directory
5
+
6
+
7
+ class MyException(Exception):
8
+ pass
9
+
10
+ class TestTransactionalDirectory:
11
+ def test_success(self) -> None:
12
+ test_dir = pathlib.Path(tempfile.mkdtemp())
13
+ assert test_dir.exists()
14
+ final = test_dir / "test_success"
15
+ assert not final.exists()
16
+ with transactional_directory(final) as folder:
17
+ assert folder.exists()
18
+ (folder / "subfolder1").mkdir()
19
+ with (folder / "test.txt").open("w") as f:
20
+ f.write("test")
21
+
22
+ assert final.exists()
23
+ assert (final / "subfolder1").is_dir()
24
+ assert (final / "test.txt").read_text() == "test"
25
+
26
+ def test_failure(self) -> None:
27
+ test_dir = pathlib.Path(tempfile.mkdtemp())
28
+ assert test_dir.exists()
29
+ final = test_dir / "test_failure"
30
+ assert not final.exists()
31
+
32
+ try:
33
+ with transactional_directory(final) as folder:
34
+ assert folder.exists()
35
+ (folder / "subfolder1").mkdir()
36
+ with (folder / "test.txt").open("w") as f:
37
+ f.write("test")
38
+ raise MyException()
39
+ except MyException:
40
+ pass
41
+
42
+ assert not final.exists()
@@ -1,25 +1,19 @@
1
- import pytest
2
- import pandas as pd
3
-
4
- import pixeltable as pt
5
1
  from pixeltable.type_system import \
6
2
  ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
7
- from pixeltable.tests.utils import get_video_files
8
- from pixeltable import catalog
9
3
 
10
4
 
11
5
  class TestTypes:
12
- def test_serialize(self, init_db: None) -> None:
6
+ def test_serialize(self, init_env) -> None:
13
7
  type_vals = [
14
8
  InvalidType(), StringType(), IntType(), BoolType(), TimestampType(),
15
- ImageType(height=100, width=200, mode=ImageType.Mode.RGB),
9
+ ImageType(height=100, width=200, mode='RGB'),
16
10
  JsonType({
17
11
  'a': StringType(), 'b': IntType(), 'c': FloatType(), 'd': BoolType(), 'e': TimestampType(),
18
- 'f': ImageType(height=100, width=200, mode=ImageType.Mode.RGB),
12
+ 'f': ImageType(height=100, width=200, mode='RGB'),
19
13
  'g': JsonType({'f1': StringType(), 'f2': IntType()}),
20
- 'h': ArrayType((224, 224, 3), dtype=ColumnType.Type.INT),
14
+ 'h': ArrayType((224, 224, 3), dtype=IntType()),
21
15
  }),
22
- ArrayType((224, 224, 3), dtype=ColumnType.Type.INT),
16
+ ArrayType((224, 224, 3), dtype=IntType()),
23
17
  ]
24
18
 
25
19
  for t in type_vals:
@@ -1,42 +1,159 @@
1
+ from typing import Optional, List, Tuple
2
+
3
+ import PIL
1
4
  import pytest
2
- import pandas as pd
3
5
 
4
- import pixeltable as pt
5
- from pixeltable.type_system import VideoType, IntType, ImageType
6
- from pixeltable.tests.utils import get_video_files
6
+ import pixeltable as pxt
7
7
  from pixeltable import catalog
8
- from pixeltable import utils
9
- from pixeltable import exceptions as exc
8
+ from pixeltable import exceptions as excs
9
+ from pixeltable.iterators import FrameIterator
10
+ from pixeltable.tests.utils import get_video_files
11
+ from pixeltable.tests.utils import skip_test_if_not_installed
12
+ from pixeltable.type_system import VideoType, ImageType
13
+ from pixeltable.utils.media_store import MediaStore
10
14
 
11
15
 
12
16
  class TestVideo:
13
- def test_basic(self, test_db: pt.Db) -> None:
17
+ def create_tbls(
18
+ self, cl: pxt.Client, base_name: str = 'video_tbl', view_name: str = 'frame_view'
19
+ ) -> Tuple[catalog.InsertableTable, catalog.Table]:
20
+ cl.drop_table(view_name, ignore_errors=True)
21
+ cl.drop_table(base_name, ignore_errors=True)
22
+ base_t = cl.create_table(base_name, {'video': VideoType()})
23
+ args = {'video': base_t.video, 'fps': 1}
24
+ view_t = cl.create_view(view_name, base_t, iterator_class=FrameIterator, iterator_args=args)
25
+ return base_t, view_t
26
+
27
+ def create_and_insert(
28
+ self, cl: pxt.Client, stored: Optional[bool], paths: List[str]
29
+ ) -> Tuple[catalog.InsertableTable, catalog.Table]:
30
+ base_t, view_t = self.create_tbls(cl)
31
+
32
+ view_t.add_column(transform=view_t.frame.rotate(90), stored=stored)
33
+ base_t.insert({'video': p} for p in paths)
34
+ total_num_rows = view_t.count()
35
+ result = view_t[view_t.frame_idx >= 5][view_t.frame_idx, view_t.frame, view_t.transform].show(0)
36
+ assert len(result) == total_num_rows - len(paths) * 5
37
+ result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(3)
38
+ assert len(result) == 3
39
+ result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(0)
40
+ assert len(result) == total_num_rows
41
+ return base_t, view_t
42
+
43
+ def test_basic(self, test_client: pxt.Client) -> None:
14
44
  video_filepaths = get_video_files()
15
- db = test_db
16
- cols = [
17
- catalog.Column('video', VideoType(), nullable=False),
18
- catalog.Column('frame', ImageType(), nullable=False),
19
- catalog.Column('frame_idx', IntType(), nullable=False),
20
- ]
21
- tbl = db.create_table(
22
- 'test', cols, extract_frames_from = 'video', extracted_frame_col = 'frame',
23
- extracted_frame_idx_col = 'frame_idx', extracted_fps = 1)
24
- tbl.insert_rows([[p] for p in video_filepaths], columns=['video'])
25
- assert utils.extracted_frame_count(tbl_id=tbl.id) == tbl.count()
26
- _ = tbl[tbl.frame_idx, tbl.frame, tbl.frame.rotate(90)].show(0)
27
-
28
- # missing 'columns' arg
29
- with pytest.raises(exc.Error):
30
- tbl.insert_rows([[p] for p in video_filepaths])
31
-
32
- # column values mismatch in rows
33
- with pytest.raises(exc.Error):
34
- tbl.insert_rows([[1, 2], [3]], columns=['video'])
35
-
36
- # column values mismatch in rows
37
- with pytest.raises(exc.Error):
38
- tbl.insert_rows([[1, 2]], columns=['video'])
39
-
40
- # revert() also removes extracted frames
45
+ cl = test_client
46
+
47
+ # default case: computed images are not stored
48
+ _, view = self.create_and_insert(cl, None, video_filepaths)
49
+ assert MediaStore.count(view.get_id()) == 0
50
+
51
+ # computed images are explicitly not stored
52
+ _, view = self.create_and_insert(cl, False, video_filepaths)
53
+ assert MediaStore.count(view.get_id()) == 0
54
+
55
+ # computed images are stored
56
+ tbl, view = self.create_and_insert(cl, True, video_filepaths)
57
+ assert MediaStore.count(view.get_id()) == view.count()
58
+
59
+ # revert() also removes computed images
60
+ tbl.insert({'video': p} for p in video_filepaths)
41
61
  tbl.revert()
42
- assert utils.extracted_frame_count(tbl_id=tbl.id) == tbl.count()
62
+ assert MediaStore.count(view.get_id()) == view.count()
63
+
64
+ def test_query(self, test_client: pxt.client) -> None:
65
+ skip_test_if_not_installed('boto3')
66
+ video_filepaths = get_video_files()
67
+ cl = test_client
68
+ base_t, view_t = self.create_tbls(cl)
69
+ # also include an external file, to make sure that prefetching works
70
+ url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
71
+ video_filepaths.append(url)
72
+ status = base_t.insert({'video': p} for p in video_filepaths)
73
+ assert status.num_excs == 0
74
+ # make sure that we can get the frames back
75
+ res = view_t.select(view_t.frame).collect().to_pandas()
76
+ assert res['frame'].notnull().all()
77
+ # make sure we can select a specific video
78
+ all_rows = view_t.select(url=view_t.video.fileurl).collect().to_pandas()
79
+ res = view_t.where(view_t.video == url).collect()
80
+ assert len(res) == len(all_rows[all_rows.url == url])
81
+
82
+ def test_fps(self, test_client: pxt.client) -> None:
83
+ cl = test_client
84
+ path = get_video_files()[0]
85
+ videos = cl.create_table('videos', {'video': VideoType()})
86
+ frames_1_0 = cl.create_view(
87
+ 'frames_1_0', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1})
88
+ frames_0_5 = cl.create_view(
89
+ 'frames_0_5', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/2})
90
+ frames_0_33 = cl.create_view(
91
+ 'frames_0_33', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/3})
92
+ videos.insert(video=path)
93
+ assert frames_0_5.count() == frames_1_0.count() // 2 or frames_0_5.count() == frames_1_0.count() // 2 + 1
94
+ assert frames_0_33.count() == frames_1_0.count() // 3 or frames_0_33.count() == frames_1_0.count() // 3 + 1
95
+
96
+ def test_computed_cols(self, test_client: pxt.client) -> None:
97
+ video_filepaths = get_video_files()
98
+ cl = test_client
99
+ base_t, view_t = self.create_tbls(cl)
100
+ # c2 and c4 depend directly on c1, c3 depends on it indirectly
101
+ view_t.add_column(c1=view_t.frame.resize([224, 224]))
102
+ view_t.add_column(c2=view_t.c1.rotate(10))
103
+ view_t.add_column(c3=view_t.c2.rotate(20))
104
+ view_t.add_column(c4=view_t.c1.rotate(30))
105
+ for name in ['c1', 'c2', 'c3', 'c4']:
106
+ assert not view_t.tbl_version_path.tbl_version.cols_by_name[name].is_stored
107
+ base_t.insert({'video': p} for p in video_filepaths)
108
+ _ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
109
+
110
+ def test_make_video(self, test_client: pxt.Client) -> None:
111
+ video_filepaths = get_video_files()
112
+ cl = test_client
113
+ base_t, view_t = self.create_tbls(cl)
114
+ base_t.insert({'video': p} for p in video_filepaths)
115
+ # reference to the frame col requires ordering by base, pos
116
+ from pixeltable.functions import make_video
117
+ _ = view_t.select(make_video(view_t.pos, view_t.frame)).group_by(base_t).show()
118
+ # the same without frame col
119
+ view_t.add_column(transformed=view_t.frame.rotate(30), stored=True)
120
+ _ = view_t.select(make_video(view_t.pos, view_t.transformed)).group_by(base_t).show()
121
+
122
+ with pytest.raises(excs.Error):
123
+ # make_video() doesn't allow windows
124
+ _ = view_t.select(make_video(view_t.pos, view_t.frame, group_by=base_t)).show()
125
+ with pytest.raises(excs.Error):
126
+ # make_video() requires ordering
127
+ _ = view_t.select(make_video(view_t.frame, order_by=view_t.pos)).show()
128
+ with pytest.raises(excs.Error):
129
+ # incompatible ordering requirements
130
+ _ = view_t.select(
131
+ make_video(view_t.pos, view_t.frame),
132
+ make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
133
+
134
+ # window function that simply passes through the frame
135
+ @pxt.uda(
136
+ update_types=[ImageType()], value_type=ImageType(), name='agg_fn',
137
+ requires_order_by=True, allows_std_agg=False, allows_window=True)
138
+ class WindowAgg:
139
+ def __init__(self):
140
+ self.img = None
141
+ def update(self, frame: PIL.Image.Image) -> None:
142
+ self.img = frame
143
+ def value(self) -> PIL.Image.Image:
144
+ return self.img
145
+
146
+ # make sure it works
147
+ _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
148
+ status = view_t.add_column(agg=agg_fn(view_t.pos, view_t.frame, group_by=base_t))
149
+ assert status.num_excs == 0
150
+ _ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
151
+
152
+ # image cols computed with a window function currently need to be stored
153
+ with pytest.raises(excs.Error):
154
+ view_t.add_column(agg2=agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
155
+
156
+ # reload from store
157
+ cl = pxt.Client(reload=True)
158
+ base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
159
+ _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()