pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show
  1. pixeltable/__init__.py +20 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +23 -7
  4. pixeltable/catalog/insertable_table.py +32 -19
  5. pixeltable/catalog/table.py +210 -20
  6. pixeltable/catalog/table_version.py +272 -111
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/dataframe.py +184 -110
  9. pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable/datatransfer/label_studio.py +526 -0
  11. pixeltable/datatransfer/remote.py +113 -0
  12. pixeltable/env.py +213 -79
  13. pixeltable/exec/__init__.py +2 -1
  14. pixeltable/exec/data_row_batch.py +6 -7
  15. pixeltable/exec/expr_eval_node.py +28 -28
  16. pixeltable/exec/sql_scan_node.py +7 -6
  17. pixeltable/exprs/__init__.py +4 -3
  18. pixeltable/exprs/column_ref.py +11 -2
  19. pixeltable/exprs/comparison.py +39 -1
  20. pixeltable/exprs/data_row.py +7 -0
  21. pixeltable/exprs/expr.py +26 -19
  22. pixeltable/exprs/function_call.py +17 -18
  23. pixeltable/exprs/globals.py +14 -2
  24. pixeltable/exprs/image_member_access.py +9 -28
  25. pixeltable/exprs/in_predicate.py +96 -0
  26. pixeltable/exprs/inline_array.py +13 -11
  27. pixeltable/exprs/inline_dict.py +15 -13
  28. pixeltable/exprs/row_builder.py +7 -1
  29. pixeltable/exprs/similarity_expr.py +67 -0
  30. pixeltable/ext/functions/whisperx.py +30 -0
  31. pixeltable/ext/functions/yolox.py +16 -0
  32. pixeltable/func/__init__.py +0 -2
  33. pixeltable/func/aggregate_function.py +5 -2
  34. pixeltable/func/callable_function.py +57 -13
  35. pixeltable/func/expr_template_function.py +14 -3
  36. pixeltable/func/function.py +35 -4
  37. pixeltable/func/signature.py +5 -15
  38. pixeltable/func/udf.py +8 -12
  39. pixeltable/functions/fireworks.py +9 -4
  40. pixeltable/functions/huggingface.py +48 -5
  41. pixeltable/functions/openai.py +49 -11
  42. pixeltable/functions/pil/image.py +61 -64
  43. pixeltable/functions/together.py +32 -6
  44. pixeltable/functions/util.py +0 -43
  45. pixeltable/functions/video.py +46 -8
  46. pixeltable/globals.py +443 -0
  47. pixeltable/index/__init__.py +1 -0
  48. pixeltable/index/base.py +9 -2
  49. pixeltable/index/btree.py +54 -0
  50. pixeltable/index/embedding_index.py +91 -15
  51. pixeltable/io/__init__.py +4 -0
  52. pixeltable/io/globals.py +59 -0
  53. pixeltable/{utils → io}/hf_datasets.py +48 -17
  54. pixeltable/io/pandas.py +148 -0
  55. pixeltable/{utils → io}/parquet.py +58 -33
  56. pixeltable/iterators/__init__.py +1 -1
  57. pixeltable/iterators/base.py +8 -4
  58. pixeltable/iterators/document.py +225 -93
  59. pixeltable/iterators/video.py +16 -9
  60. pixeltable/metadata/__init__.py +8 -4
  61. pixeltable/metadata/converters/convert_12.py +3 -0
  62. pixeltable/metadata/converters/convert_13.py +41 -0
  63. pixeltable/metadata/converters/convert_14.py +13 -0
  64. pixeltable/metadata/converters/convert_15.py +29 -0
  65. pixeltable/metadata/converters/util.py +63 -0
  66. pixeltable/metadata/schema.py +12 -6
  67. pixeltable/plan.py +11 -24
  68. pixeltable/store.py +16 -23
  69. pixeltable/tool/create_test_db_dump.py +49 -14
  70. pixeltable/type_system.py +27 -58
  71. pixeltable/utils/coco.py +94 -0
  72. pixeltable/utils/documents.py +42 -12
  73. pixeltable/utils/http_server.py +70 -0
  74. pixeltable-0.2.7.dist-info/METADATA +137 -0
  75. pixeltable-0.2.7.dist-info/RECORD +126 -0
  76. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
  77. pixeltable/client.py +0 -600
  78. pixeltable/exprs/image_similarity_predicate.py +0 -58
  79. pixeltable/func/batched_function.py +0 -53
  80. pixeltable/func/nos_function.py +0 -202
  81. pixeltable/tests/conftest.py +0 -171
  82. pixeltable/tests/ext/test_yolox.py +0 -21
  83. pixeltable/tests/functions/test_fireworks.py +0 -43
  84. pixeltable/tests/functions/test_functions.py +0 -60
  85. pixeltable/tests/functions/test_huggingface.py +0 -158
  86. pixeltable/tests/functions/test_openai.py +0 -162
  87. pixeltable/tests/functions/test_together.py +0 -112
  88. pixeltable/tests/test_audio.py +0 -65
  89. pixeltable/tests/test_catalog.py +0 -27
  90. pixeltable/tests/test_client.py +0 -21
  91. pixeltable/tests/test_component_view.py +0 -379
  92. pixeltable/tests/test_dataframe.py +0 -440
  93. pixeltable/tests/test_dirs.py +0 -107
  94. pixeltable/tests/test_document.py +0 -120
  95. pixeltable/tests/test_exprs.py +0 -802
  96. pixeltable/tests/test_function.py +0 -332
  97. pixeltable/tests/test_index.py +0 -138
  98. pixeltable/tests/test_migration.py +0 -44
  99. pixeltable/tests/test_nos.py +0 -54
  100. pixeltable/tests/test_snapshot.py +0 -231
  101. pixeltable/tests/test_table.py +0 -1343
  102. pixeltable/tests/test_transactional_directory.py +0 -42
  103. pixeltable/tests/test_types.py +0 -52
  104. pixeltable/tests/test_video.py +0 -159
  105. pixeltable/tests/test_view.py +0 -535
  106. pixeltable/tests/utils.py +0 -442
  107. pixeltable/utils/clip.py +0 -18
  108. pixeltable-0.2.5.dist-info/METADATA +0 -128
  109. pixeltable-0.2.5.dist-info/RECORD +0 -139
  110. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
@@ -1,42 +0,0 @@
1
- import pathlib
2
- import tempfile
3
-
4
- from pixeltable.utils.transactional_directory import transactional_directory
5
-
6
-
7
- class MyException(Exception):
8
- pass
9
-
10
- class TestTransactionalDirectory:
11
- def test_success(self) -> None:
12
- test_dir = pathlib.Path(tempfile.mkdtemp())
13
- assert test_dir.exists()
14
- final = test_dir / "test_success"
15
- assert not final.exists()
16
- with transactional_directory(final) as folder:
17
- assert folder.exists()
18
- (folder / "subfolder1").mkdir()
19
- with (folder / "test.txt").open("w") as f:
20
- f.write("test")
21
-
22
- assert final.exists()
23
- assert (final / "subfolder1").is_dir()
24
- assert (final / "test.txt").read_text() == "test"
25
-
26
- def test_failure(self) -> None:
27
- test_dir = pathlib.Path(tempfile.mkdtemp())
28
- assert test_dir.exists()
29
- final = test_dir / "test_failure"
30
- assert not final.exists()
31
-
32
- try:
33
- with transactional_directory(final) as folder:
34
- assert folder.exists()
35
- (folder / "subfolder1").mkdir()
36
- with (folder / "test.txt").open("w") as f:
37
- f.write("test")
38
- raise MyException()
39
- except MyException:
40
- pass
41
-
42
- assert not final.exists()
@@ -1,52 +0,0 @@
1
- import datetime
2
- from copy import copy
3
- from typing import List, Dict, Optional
4
-
5
- from pixeltable.type_system import \
6
- ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
7
-
8
-
9
- class TestTypes:
10
- def test_serialize(self, init_env) -> None:
11
- type_vals = [
12
- InvalidType(), StringType(), IntType(), BoolType(), TimestampType(),
13
- ImageType(height=100, width=200, mode='RGB'),
14
- JsonType({
15
- 'a': StringType(), 'b': IntType(), 'c': FloatType(), 'd': BoolType(), 'e': TimestampType(),
16
- 'f': ImageType(height=100, width=200, mode='RGB'),
17
- 'g': JsonType({'f1': StringType(), 'f2': IntType()}),
18
- 'h': ArrayType((224, 224, 3), dtype=IntType()),
19
- }),
20
- ArrayType((224, 224, 3), dtype=IntType()),
21
- ]
22
-
23
- for t in type_vals:
24
- t_serialized = t.serialize()
25
- t_deserialized = ColumnType.deserialize(t_serialized)
26
- assert t == t_deserialized
27
-
28
- def test_from_python_type(self) -> None:
29
- test_cases = {
30
- str: StringType(),
31
- int: IntType(),
32
- float: FloatType(),
33
- bool: BoolType(),
34
- datetime.date: TimestampType(),
35
- datetime.datetime: TimestampType(),
36
- list: JsonType(),
37
- dict: JsonType(),
38
- list[int]: JsonType(),
39
- list[dict[str, int]]: JsonType(),
40
- dict[int, str]: JsonType(),
41
- dict[dict[str, int], list[int]]: JsonType(),
42
- List: JsonType(),
43
- Dict: JsonType(),
44
- List[int]: JsonType(),
45
- List[Dict[str, int]]: JsonType(),
46
- Dict[int, str]: JsonType()
47
- }
48
- for py_type, pxt_type in test_cases.items():
49
- assert ColumnType.from_python_type(py_type) == pxt_type
50
- opt_pxt_type = copy(pxt_type)
51
- opt_pxt_type.nullable = True
52
- assert ColumnType.from_python_type(Optional[py_type]) == opt_pxt_type
@@ -1,159 +0,0 @@
1
- from typing import Optional, List, Tuple
2
-
3
- import PIL
4
- import pytest
5
-
6
- import pixeltable as pxt
7
- from pixeltable import catalog
8
- from pixeltable import exceptions as excs
9
- from pixeltable.iterators import FrameIterator
10
- from pixeltable.tests.utils import get_video_files
11
- from pixeltable.tests.utils import skip_test_if_not_installed
12
- from pixeltable.type_system import VideoType, ImageType
13
- from pixeltable.utils.media_store import MediaStore
14
-
15
-
16
- class TestVideo:
17
- def create_tbls(
18
- self, cl: pxt.Client, base_name: str = 'video_tbl', view_name: str = 'frame_view'
19
- ) -> Tuple[catalog.InsertableTable, catalog.Table]:
20
- cl.drop_table(view_name, ignore_errors=True)
21
- cl.drop_table(base_name, ignore_errors=True)
22
- base_t = cl.create_table(base_name, {'video': VideoType()})
23
- args = {'video': base_t.video, 'fps': 1}
24
- view_t = cl.create_view(view_name, base_t, iterator_class=FrameIterator, iterator_args=args)
25
- return base_t, view_t
26
-
27
- def create_and_insert(
28
- self, cl: pxt.Client, stored: Optional[bool], paths: List[str]
29
- ) -> Tuple[catalog.InsertableTable, catalog.Table]:
30
- base_t, view_t = self.create_tbls(cl)
31
-
32
- view_t.add_column(transform=view_t.frame.rotate(90), stored=stored)
33
- base_t.insert({'video': p} for p in paths)
34
- total_num_rows = view_t.count()
35
- result = view_t[view_t.frame_idx >= 5][view_t.frame_idx, view_t.frame, view_t.transform].show(0)
36
- assert len(result) == total_num_rows - len(paths) * 5
37
- result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(3)
38
- assert len(result) == 3
39
- result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(0)
40
- assert len(result) == total_num_rows
41
- return base_t, view_t
42
-
43
- def test_basic(self, test_client: pxt.Client) -> None:
44
- video_filepaths = get_video_files()
45
- cl = test_client
46
-
47
- # default case: computed images are not stored
48
- _, view = self.create_and_insert(cl, None, video_filepaths)
49
- assert MediaStore.count(view.get_id()) == 0
50
-
51
- # computed images are explicitly not stored
52
- _, view = self.create_and_insert(cl, False, video_filepaths)
53
- assert MediaStore.count(view.get_id()) == 0
54
-
55
- # computed images are stored
56
- tbl, view = self.create_and_insert(cl, True, video_filepaths)
57
- assert MediaStore.count(view.get_id()) == view.count()
58
-
59
- # revert() also removes computed images
60
- tbl.insert({'video': p} for p in video_filepaths)
61
- tbl.revert()
62
- assert MediaStore.count(view.get_id()) == view.count()
63
-
64
- def test_query(self, test_client: pxt.client) -> None:
65
- skip_test_if_not_installed('boto3')
66
- video_filepaths = get_video_files()
67
- cl = test_client
68
- base_t, view_t = self.create_tbls(cl)
69
- # also include an external file, to make sure that prefetching works
70
- url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
71
- video_filepaths.append(url)
72
- status = base_t.insert({'video': p} for p in video_filepaths)
73
- assert status.num_excs == 0
74
- # make sure that we can get the frames back
75
- res = view_t.select(view_t.frame).collect().to_pandas()
76
- assert res['frame'].notnull().all()
77
- # make sure we can select a specific video
78
- all_rows = view_t.select(url=view_t.video.fileurl).collect().to_pandas()
79
- res = view_t.where(view_t.video == url).collect()
80
- assert len(res) == len(all_rows[all_rows.url == url])
81
-
82
- def test_fps(self, test_client: pxt.client) -> None:
83
- cl = test_client
84
- path = get_video_files()[0]
85
- videos = cl.create_table('videos', {'video': VideoType()})
86
- frames_1_0 = cl.create_view(
87
- 'frames_1_0', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1})
88
- frames_0_5 = cl.create_view(
89
- 'frames_0_5', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/2})
90
- frames_0_33 = cl.create_view(
91
- 'frames_0_33', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/3})
92
- videos.insert(video=path)
93
- assert frames_0_5.count() == frames_1_0.count() // 2 or frames_0_5.count() == frames_1_0.count() // 2 + 1
94
- assert frames_0_33.count() == frames_1_0.count() // 3 or frames_0_33.count() == frames_1_0.count() // 3 + 1
95
-
96
- def test_computed_cols(self, test_client: pxt.client) -> None:
97
- video_filepaths = get_video_files()
98
- cl = test_client
99
- base_t, view_t = self.create_tbls(cl)
100
- # c2 and c4 depend directly on c1, c3 depends on it indirectly
101
- view_t.add_column(c1=view_t.frame.resize([224, 224]))
102
- view_t.add_column(c2=view_t.c1.rotate(10))
103
- view_t.add_column(c3=view_t.c2.rotate(20))
104
- view_t.add_column(c4=view_t.c1.rotate(30))
105
- for name in ['c1', 'c2', 'c3', 'c4']:
106
- assert not view_t.tbl_version_path.tbl_version.cols_by_name[name].is_stored
107
- base_t.insert({'video': p} for p in video_filepaths)
108
- _ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
109
-
110
- # window function that simply passes through the frame
111
- @pxt.uda(
112
- update_types=[ImageType()], value_type=ImageType(),
113
- requires_order_by=True, allows_std_agg=False, allows_window=True)
114
- class agg_fn:
115
- def __init__(self):
116
- self.img = None
117
- def update(self, frame: PIL.Image.Image) -> None:
118
- self.img = frame
119
- def value(self) -> PIL.Image.Image:
120
- return self.img
121
-
122
- def test_make_video(self, test_client: pxt.Client) -> None:
123
- video_filepaths = get_video_files()
124
- cl = test_client
125
- base_t, view_t = self.create_tbls(cl)
126
- base_t.insert({'video': p} for p in video_filepaths)
127
- # reference to the frame col requires ordering by base, pos
128
- from pixeltable.functions import make_video
129
- _ = view_t.select(make_video(view_t.pos, view_t.frame)).group_by(base_t).show()
130
- # the same without frame col
131
- view_t.add_column(transformed=view_t.frame.rotate(30), stored=True)
132
- _ = view_t.select(make_video(view_t.pos, view_t.transformed)).group_by(base_t).show()
133
-
134
- with pytest.raises(excs.Error):
135
- # make_video() doesn't allow windows
136
- _ = view_t.select(make_video(view_t.pos, view_t.frame, group_by=base_t)).show()
137
- with pytest.raises(excs.Error):
138
- # make_video() requires ordering
139
- _ = view_t.select(make_video(view_t.frame, order_by=view_t.pos)).show()
140
- with pytest.raises(excs.Error):
141
- # incompatible ordering requirements
142
- _ = view_t.select(
143
- make_video(view_t.pos, view_t.frame),
144
- make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
145
-
146
- # make sure it works
147
- _ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
148
- status = view_t.add_column(agg=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t))
149
- assert status.num_excs == 0
150
- _ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
151
-
152
- # image cols computed with a window function currently need to be stored
153
- with pytest.raises(excs.Error):
154
- view_t.add_column(agg2=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
155
-
156
- # reload from store
157
- cl = pxt.Client(reload=True)
158
- base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
159
- _ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()