pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,42 @@
1
+ import pathlib
2
+ import tempfile
3
+
4
+ from pixeltable.utils.transactional_directory import transactional_directory
5
+
6
+
7
+ class MyException(Exception):
8
+ pass
9
+
10
+ class TestTransactionalDirectory:
11
+ def test_success(self) -> None:
12
+ test_dir = pathlib.Path(tempfile.mkdtemp())
13
+ assert test_dir.exists()
14
+ final = test_dir / "test_success"
15
+ assert not final.exists()
16
+ with transactional_directory(final) as folder:
17
+ assert folder.exists()
18
+ (folder / "subfolder1").mkdir()
19
+ with (folder / "test.txt").open("w") as f:
20
+ f.write("test")
21
+
22
+ assert final.exists()
23
+ assert (final / "subfolder1").is_dir()
24
+ assert (final / "test.txt").read_text() == "test"
25
+
26
+ def test_failure(self) -> None:
27
+ test_dir = pathlib.Path(tempfile.mkdtemp())
28
+ assert test_dir.exists()
29
+ final = test_dir / "test_failure"
30
+ assert not final.exists()
31
+
32
+ try:
33
+ with transactional_directory(final) as folder:
34
+ assert folder.exists()
35
+ (folder / "subfolder1").mkdir()
36
+ with (folder / "test.txt").open("w") as f:
37
+ f.write("test")
38
+ raise MyException()
39
+ except MyException:
40
+ pass
41
+
42
+ assert not final.exists()
@@ -1,25 +1,19 @@
1
- import pytest
2
- import pandas as pd
3
-
4
- import pixeltable as pt
5
1
  from pixeltable.type_system import \
6
2
  ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
7
- from pixeltable.tests.utils import get_video_files
8
- from pixeltable import catalog
9
3
 
10
4
 
11
5
  class TestTypes:
12
- def test_serialize(self, init_db: None) -> None:
6
+ def test_serialize(self, init_env) -> None:
13
7
  type_vals = [
14
8
  InvalidType(), StringType(), IntType(), BoolType(), TimestampType(),
15
- ImageType(height=100, width=200, mode=ImageType.Mode.RGB),
9
+ ImageType(height=100, width=200, mode='RGB'),
16
10
  JsonType({
17
11
  'a': StringType(), 'b': IntType(), 'c': FloatType(), 'd': BoolType(), 'e': TimestampType(),
18
- 'f': ImageType(height=100, width=200, mode=ImageType.Mode.RGB),
12
+ 'f': ImageType(height=100, width=200, mode='RGB'),
19
13
  'g': JsonType({'f1': StringType(), 'f2': IntType()}),
20
- 'h': ArrayType((224, 224, 3), dtype=ColumnType.Type.INT),
14
+ 'h': ArrayType((224, 224, 3), dtype=IntType()),
21
15
  }),
22
- ArrayType((224, 224, 3), dtype=ColumnType.Type.INT),
16
+ ArrayType((224, 224, 3), dtype=IntType()),
23
17
  ]
24
18
 
25
19
  for t in type_vals:
@@ -1,42 +1,157 @@
1
+ from typing import Optional, List, Tuple
2
+
3
+ import PIL
1
4
  import pytest
2
- import pandas as pd
3
5
 
4
- import pixeltable as pt
5
- from pixeltable.type_system import VideoType, IntType, ImageType
6
- from pixeltable.tests.utils import get_video_files
6
+ import pixeltable as pxt
7
7
  from pixeltable import catalog
8
- from pixeltable import utils
9
- from pixeltable import exceptions as exc
8
+ from pixeltable import exceptions as excs
9
+ from pixeltable.iterators import FrameIterator
10
+ from pixeltable.tests.utils import get_video_files
11
+ from pixeltable.type_system import VideoType, ImageType
12
+ from pixeltable.utils.media_store import MediaStore
10
13
 
11
14
 
12
15
  class TestVideo:
13
- def test_basic(self, test_db: catalog.Db) -> None:
16
+ def create_tbls(
17
+ self, cl: pxt.Client, base_name: str = 'video_tbl', view_name: str = 'frame_view'
18
+ ) -> Tuple[catalog.InsertableTable, catalog.Table]:
19
+ cl.drop_table(view_name, ignore_errors=True)
20
+ cl.drop_table(base_name, ignore_errors=True)
21
+ base_t = cl.create_table(base_name, {'video': VideoType()})
22
+ args = {'video': base_t.video, 'fps': 1}
23
+ view_t = cl.create_view(view_name, base_t, iterator_class=FrameIterator, iterator_args=args)
24
+ return base_t, view_t
25
+
26
+ def create_and_insert(
27
+ self, cl: pxt.Client, stored: Optional[bool], paths: List[str]
28
+ ) -> Tuple[catalog.InsertableTable, catalog.Table]:
29
+ base_t, view_t = self.create_tbls(cl)
30
+
31
+ view_t.add_column(transform=view_t.frame.rotate(90), stored=stored)
32
+ base_t.insert({'video': p} for p in paths)
33
+ total_num_rows = view_t.count()
34
+ result = view_t[view_t.frame_idx >= 5][view_t.frame_idx, view_t.frame, view_t.transform].show(0)
35
+ assert len(result) == total_num_rows - len(paths) * 5
36
+ result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(3)
37
+ assert len(result) == 3
38
+ result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(0)
39
+ assert len(result) == total_num_rows
40
+ return base_t, view_t
41
+
42
+ def test_basic(self, test_client: pxt.Client) -> None:
14
43
  video_filepaths = get_video_files()
15
- db = test_db
16
- cols = [
17
- catalog.Column('video', VideoType(), nullable=False),
18
- catalog.Column('frame', ImageType(), nullable=False),
19
- catalog.Column('frame_idx', IntType(), nullable=False),
20
- ]
21
- tbl = db.create_table(
22
- 'test', cols, extract_frames_from = 'video', extracted_frame_col = 'frame',
23
- extracted_frame_idx_col = 'frame_idx', extracted_fps = 1)
24
- tbl.insert_rows([[p] for p in video_filepaths], columns=['video'])
25
- assert utils.extracted_frame_count(tbl_id=tbl.id) == tbl.count()
26
- _ = tbl[tbl.frame_idx, tbl.frame, tbl.frame.rotate(90)].show(0)
27
-
28
- # missing 'columns' arg
29
- with pytest.raises(exc.Error):
30
- tbl.insert_rows([[p] for p in video_filepaths])
31
-
32
- # column values mismatch in rows
33
- with pytest.raises(exc.Error):
34
- tbl.insert_rows([[1, 2], [3]], columns=['video'])
35
-
36
- # column values mismatch in rows
37
- with pytest.raises(exc.Error):
38
- tbl.insert_rows([[1, 2]], columns=['video'])
39
-
40
- # revert() also removes extracted frames
44
+ cl = test_client
45
+
46
+ # default case: computed images are not stored
47
+ _, view = self.create_and_insert(cl, None, video_filepaths)
48
+ assert MediaStore.count(view.get_id()) == 0
49
+
50
+ # computed images are explicitly not stored
51
+ _, view = self.create_and_insert(cl, False, video_filepaths)
52
+ assert MediaStore.count(view.get_id()) == 0
53
+
54
+ # computed images are stored
55
+ tbl, view = self.create_and_insert(cl, True, video_filepaths)
56
+ assert MediaStore.count(view.get_id()) == view.count()
57
+
58
+ # revert() also removes computed images
59
+ tbl.insert({'video': p} for p in video_filepaths)
41
60
  tbl.revert()
42
- assert utils.extracted_frame_count(tbl_id=tbl.id) == tbl.count()
61
+ assert MediaStore.count(view.get_id()) == view.count()
62
+
63
+ def test_query(self, test_client: pxt.client) -> None:
64
+ video_filepaths = get_video_files()
65
+ cl = test_client
66
+ base_t, view_t = self.create_tbls(cl)
67
+ # also include an external file, to make sure that prefetching works
68
+ url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
69
+ video_filepaths.append(url)
70
+ status = base_t.insert({'video': p} for p in video_filepaths)
71
+ assert status.num_excs == 0
72
+ # make sure that we can get the frames back
73
+ res = view_t.select(view_t.frame).collect().to_pandas()
74
+ assert res['frame'].notnull().all()
75
+ # make sure we can select a specific video
76
+ all_rows = view_t.select(url=view_t.video.fileurl).collect().to_pandas()
77
+ res = view_t.where(view_t.video == url).collect()
78
+ assert len(res) == len(all_rows[all_rows.url == url])
79
+
80
+ def test_fps(self, test_client: pxt.client) -> None:
81
+ cl = test_client
82
+ path = get_video_files()[0]
83
+ videos = cl.create_table('videos', {'video': VideoType()})
84
+ frames_1_0 = cl.create_view(
85
+ 'frames_1_0', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1})
86
+ frames_0_5 = cl.create_view(
87
+ 'frames_0_5', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/2})
88
+ frames_0_33 = cl.create_view(
89
+ 'frames_0_33', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/3})
90
+ videos.insert(video=path)
91
+ assert frames_0_5.count() == frames_1_0.count() // 2 or frames_0_5.count() == frames_1_0.count() // 2 + 1
92
+ assert frames_0_33.count() == frames_1_0.count() // 3 or frames_0_33.count() == frames_1_0.count() // 3 + 1
93
+
94
+ def test_computed_cols(self, test_client: pxt.client) -> None:
95
+ video_filepaths = get_video_files()
96
+ cl = test_client
97
+ base_t, view_t = self.create_tbls(cl)
98
+ # c2 and c4 depend directly on c1, c3 depends on it indirectly
99
+ view_t.add_column(c1=view_t.frame.resize([224, 224]))
100
+ view_t.add_column(c2=view_t.c1.rotate(10))
101
+ view_t.add_column(c3=view_t.c2.rotate(20))
102
+ view_t.add_column(c4=view_t.c1.rotate(30))
103
+ for name in ['c1', 'c2', 'c3', 'c4']:
104
+ assert not view_t.tbl_version_path.tbl_version.cols_by_name[name].is_stored
105
+ base_t.insert({'video': p} for p in video_filepaths)
106
+ _ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
107
+
108
+ def test_make_video(self, test_client: pxt.Client) -> None:
109
+ video_filepaths = get_video_files()
110
+ cl = test_client
111
+ base_t, view_t = self.create_tbls(cl)
112
+ base_t.insert({'video': p} for p in video_filepaths)
113
+ # reference to the frame col requires ordering by base, pos
114
+ from pixeltable.functions import make_video
115
+ _ = view_t.select(make_video(view_t.pos, view_t.frame)).group_by(base_t).show()
116
+ # the same without frame col
117
+ view_t.add_column(transformed=view_t.frame.rotate(30), stored=True)
118
+ _ = view_t.select(make_video(view_t.pos, view_t.transformed)).group_by(base_t).show()
119
+
120
+ with pytest.raises(excs.Error):
121
+ # make_video() doesn't allow windows
122
+ _ = view_t.select(make_video(view_t.pos, view_t.frame, group_by=base_t)).show()
123
+ with pytest.raises(excs.Error):
124
+ # make_video() requires ordering
125
+ _ = view_t.select(make_video(view_t.frame, order_by=view_t.pos)).show()
126
+ with pytest.raises(excs.Error):
127
+ # incompatible ordering requirements
128
+ _ = view_t.select(
129
+ make_video(view_t.pos, view_t.frame),
130
+ make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
131
+
132
+ # window function that simply passes through the frame
133
+ @pxt.uda(
134
+ update_types=[ImageType()], value_type=ImageType(), name='agg_fn',
135
+ requires_order_by=True, allows_std_agg=False, allows_window=True)
136
+ class WindowAgg:
137
+ def __init__(self):
138
+ self.img = None
139
+ def update(self, frame: PIL.Image.Image) -> None:
140
+ self.img = frame
141
+ def value(self) -> PIL.Image.Image:
142
+ return self.img
143
+
144
+ # make sure it works
145
+ _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
146
+ status = view_t.add_column(agg=agg_fn(view_t.pos, view_t.frame, group_by=base_t))
147
+ assert status.num_excs == 0
148
+ _ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
149
+
150
+ # image cols computed with a window function currently need to be stored
151
+ with pytest.raises(excs.Error):
152
+ view_t.add_column(agg2=agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
153
+
154
+ # reload from store
155
+ cl = pxt.Client(reload=True)
156
+ base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
157
+ _ = view_t.select(agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()