pixeltable 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +9 -5
  4. pixeltable/catalog/insertable_table.py +0 -2
  5. pixeltable/catalog/table.py +16 -8
  6. pixeltable/catalog/table_version.py +3 -2
  7. pixeltable/dataframe.py +184 -110
  8. pixeltable/env.py +69 -18
  9. pixeltable/exec/__init__.py +2 -1
  10. pixeltable/exec/data_row_batch.py +6 -7
  11. pixeltable/exec/expr_eval_node.py +28 -28
  12. pixeltable/exec/sql_scan_node.py +7 -6
  13. pixeltable/exprs/__init__.py +4 -3
  14. pixeltable/exprs/column_ref.py +9 -0
  15. pixeltable/exprs/expr.py +15 -7
  16. pixeltable/exprs/function_call.py +17 -15
  17. pixeltable/exprs/image_member_access.py +9 -28
  18. pixeltable/exprs/in_predicate.py +96 -0
  19. pixeltable/exprs/inline_array.py +13 -11
  20. pixeltable/exprs/inline_dict.py +15 -13
  21. pixeltable/exprs/row_builder.py +7 -1
  22. pixeltable/exprs/similarity_expr.py +65 -0
  23. pixeltable/func/__init__.py +0 -2
  24. pixeltable/func/aggregate_function.py +3 -0
  25. pixeltable/func/callable_function.py +57 -13
  26. pixeltable/func/expr_template_function.py +11 -2
  27. pixeltable/func/function.py +35 -4
  28. pixeltable/func/signature.py +5 -15
  29. pixeltable/func/udf.py +6 -10
  30. pixeltable/functions/huggingface.py +23 -4
  31. pixeltable/functions/openai.py +34 -1
  32. pixeltable/functions/pil/image.py +61 -64
  33. pixeltable/functions/together.py +21 -0
  34. pixeltable/globals.py +425 -0
  35. pixeltable/index/base.py +3 -1
  36. pixeltable/index/embedding_index.py +87 -14
  37. pixeltable/io/__init__.py +3 -0
  38. pixeltable/{utils → io}/hf_datasets.py +48 -17
  39. pixeltable/io/pandas.py +148 -0
  40. pixeltable/{utils → io}/parquet.py +58 -33
  41. pixeltable/iterators/__init__.py +1 -1
  42. pixeltable/iterators/base.py +4 -0
  43. pixeltable/iterators/document.py +218 -97
  44. pixeltable/iterators/video.py +8 -9
  45. pixeltable/metadata/__init__.py +7 -3
  46. pixeltable/metadata/converters/convert_12.py +3 -0
  47. pixeltable/metadata/converters/convert_13.py +41 -0
  48. pixeltable/plan.py +2 -19
  49. pixeltable/store.py +2 -2
  50. pixeltable/tool/create_test_db_dump.py +32 -13
  51. pixeltable/type_system.py +13 -54
  52. pixeltable/utils/documents.py +42 -12
  53. pixeltable/utils/http_server.py +70 -0
  54. {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/METADATA +10 -7
  55. pixeltable-0.2.6.dist-info/RECORD +119 -0
  56. {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  57. pixeltable/client.py +0 -600
  58. pixeltable/exprs/image_similarity_predicate.py +0 -58
  59. pixeltable/func/batched_function.py +0 -53
  60. pixeltable/tests/conftest.py +0 -171
  61. pixeltable/tests/ext/test_yolox.py +0 -21
  62. pixeltable/tests/functions/test_fireworks.py +0 -43
  63. pixeltable/tests/functions/test_functions.py +0 -60
  64. pixeltable/tests/functions/test_huggingface.py +0 -158
  65. pixeltable/tests/functions/test_openai.py +0 -162
  66. pixeltable/tests/functions/test_together.py +0 -112
  67. pixeltable/tests/test_audio.py +0 -65
  68. pixeltable/tests/test_catalog.py +0 -27
  69. pixeltable/tests/test_client.py +0 -21
  70. pixeltable/tests/test_component_view.py +0 -379
  71. pixeltable/tests/test_dataframe.py +0 -440
  72. pixeltable/tests/test_dirs.py +0 -107
  73. pixeltable/tests/test_document.py +0 -120
  74. pixeltable/tests/test_exprs.py +0 -802
  75. pixeltable/tests/test_function.py +0 -332
  76. pixeltable/tests/test_index.py +0 -138
  77. pixeltable/tests/test_migration.py +0 -44
  78. pixeltable/tests/test_nos.py +0 -54
  79. pixeltable/tests/test_snapshot.py +0 -231
  80. pixeltable/tests/test_table.py +0 -1343
  81. pixeltable/tests/test_transactional_directory.py +0 -42
  82. pixeltable/tests/test_types.py +0 -52
  83. pixeltable/tests/test_video.py +0 -159
  84. pixeltable/tests/test_view.py +0 -535
  85. pixeltable/tests/utils.py +0 -442
  86. pixeltable-0.2.5.dist-info/RECORD +0 -139
  87. {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
@@ -1,112 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
- from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
6
-
7
-
8
- @pytest.mark.remote_api
9
- class TestTogether:
10
-
11
- def test_completions(self, test_client: pxt.Client) -> None:
12
- skip_test_if_not_installed('together')
13
- TestTogether.skip_test_if_no_together_client()
14
- cl = test_client
15
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
16
- from pixeltable.functions.together import completions
17
- t.add_column(output=completions(prompt=t.input, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
18
- t.add_column(output_2=completions(
19
- prompt=t.input,
20
- model='mistralai/Mixtral-8x7B-v0.1',
21
- max_tokens=300,
22
- stop=['\n'],
23
- temperature=0.7,
24
- top_p=0.9,
25
- top_k=40,
26
- repetition_penalty=1.1,
27
- logprobs=1,
28
- echo=True,
29
- n=3,
30
- safety_model='Meta-Llama/Llama-Guard-7b'
31
- ))
32
- validate_update_status(t.insert(input='I am going to the '), 1)
33
- result = t.collect()
34
- assert len(result['output'][0]['choices'][0]['text']) > 0
35
- assert len(result['output_2'][0]['choices'][0]['text']) > 0
36
-
37
- def test_chat_completions(self, test_client: pxt.Client) -> None:
38
- skip_test_if_not_installed('together')
39
- TestTogether.skip_test_if_no_together_client()
40
- cl = test_client
41
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
42
- messages = [{'role': 'user', 'content': t.input}]
43
- from pixeltable.functions.together import chat_completions
44
- t.add_column(output=chat_completions(messages=messages, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
45
- t.add_column(output_2=chat_completions(
46
- messages=messages,
47
- model='mistralai/Mixtral-8x7B-Instruct-v0.1',
48
- max_tokens=300,
49
- stop=['\n'],
50
- temperature=0.7,
51
- top_p=0.9,
52
- top_k=40,
53
- repetition_penalty=1.1,
54
- logprobs=1,
55
- echo=True,
56
- n=3,
57
- safety_model='Meta-Llama/Llama-Guard-7b',
58
- response_format={'type': 'json_object'}
59
- ))
60
- validate_update_status(t.insert(input='Give me a typical example of a JSON structure.'), 1)
61
- result = t.collect()
62
- assert len(result['output'][0]['choices'][0]['message']) > 0
63
- assert len(result['output_2'][0]['choices'][0]['message']) > 0
64
-
65
- def test_embeddings(self, test_client: pxt.Client) -> None:
66
- skip_test_if_not_installed('together')
67
- TestTogether.skip_test_if_no_together_client()
68
- cl = test_client
69
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
70
- from pixeltable.functions.together import embeddings
71
- t.add_column(embed=embeddings(input=t.input, model='togethercomputer/m2-bert-80M-8k-retrieval'))
72
- validate_update_status(t.insert(input='Together AI provides a variety of embeddings models.'), 1)
73
- assert len(t.collect()['embed'][0]) > 0
74
-
75
- def test_image_generations(self, test_client: pxt.Client) -> None:
76
- skip_test_if_not_installed('together')
77
- TestTogether.skip_test_if_no_together_client()
78
- cl = test_client
79
- t = cl.create_table(
80
- 'test_tbl',
81
- {'input': pxt.StringType(), 'negative_prompt': pxt.StringType(nullable=True)}
82
- )
83
- from pixeltable.functions.together import image_generations
84
- t.add_column(img=image_generations(t.input, model='runwayml/stable-diffusion-v1-5'))
85
- t.add_column(img_2=image_generations(
86
- t.input,
87
- model='stabilityai/stable-diffusion-2-1',
88
- steps=30,
89
- seed=4178780,
90
- height=768,
91
- width=512,
92
- negative_prompt=t.negative_prompt
93
- ))
94
- validate_update_status(t.insert([
95
- {'input': 'A friendly dinosaur playing tennis in a cornfield'},
96
- {'input': 'A friendly dinosaur playing tennis in a cornfield',
97
- 'negative_prompt': 'tennis court'}
98
- ]), 2)
99
- assert t.collect()['img'][0].size == (512, 512)
100
- assert t.collect()['img_2'][0].size == (512, 768)
101
- assert t.collect()['img'][1].size == (512, 512)
102
- assert t.collect()['img_2'][1].size == (512, 768)
103
-
104
- # This ensures that the test will be skipped, rather than returning an error, when no API key is
105
- # available (for example, when a PR runs in CI).
106
- @staticmethod
107
- def skip_test_if_no_together_client() -> None:
108
- try:
109
- import pixeltable.functions.together
110
- _ = pixeltable.functions.together.together_client()
111
- except excs.Error as exc:
112
- pytest.skip(str(exc))
@@ -1,65 +0,0 @@
1
- from typing import Optional
2
-
3
- import av
4
-
5
- import pixeltable as pxt
6
- import pixeltable.env as env
7
- from pixeltable.tests.utils import get_video_files, get_audio_files
8
- from pixeltable.type_system import VideoType, AudioType
9
- from pixeltable.utils.media_store import MediaStore
10
-
11
-
12
- class TestAudio:
13
- def check_audio_params(self, path: str, format: Optional[str] = None, codec: Optional[str] = None):
14
- with av.open(path) as container:
15
- audio_stream = container.streams.audio[0]
16
- if format is not None:
17
- assert format == container.format.name
18
- if codec is not None:
19
- assert codec == audio_stream.codec_context.codec.name
20
-
21
- def test_basic(self, test_client: pxt.Client) -> None:
22
- audio_filepaths = get_audio_files()
23
- cl = test_client
24
- audio_t = cl.create_table('audio', {'audio_file': AudioType()})
25
- status = audio_t.insert({'audio_file': p} for p in audio_filepaths)
26
- assert status.num_rows == len(audio_filepaths)
27
- assert status.num_excs == 0
28
- paths = audio_t.select(output=audio_t.audio_file.localpath).collect()['output']
29
- assert set(paths) == set(audio_filepaths)
30
-
31
- def test_extract(self, test_client: pxt.Client) -> None:
32
- video_filepaths = get_video_files()
33
- cl = test_client
34
- video_t = cl.create_table('videos', {'video': VideoType()})
35
- from pixeltable.functions.video import extract_audio
36
- video_t.add_column(audio=extract_audio(video_t.video))
37
-
38
- # one of the 3 videos doesn't have audio
39
- status = video_t.insert({'video': p} for p in video_filepaths)
40
- assert status.num_rows == len(video_filepaths)
41
- assert status.num_excs == 0
42
- assert MediaStore.count(video_t.get_id()) == len(video_filepaths) - 1
43
- assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
44
- assert env.Env.get().num_tmp_files() == 0
45
-
46
- # make sure everything works with a fresh client
47
- cl = pxt.Client()
48
- video_t = cl.get_table('videos')
49
- assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
50
-
51
- # test generating different formats and codecs
52
- paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s16le')).collect()['output']
53
- # media files that are created as a part of a query end up in the tmp dir
54
- assert env.Env.get().num_tmp_files() == video_t.where(video_t.audio != None).count()
55
- for path in [p for p in paths if p is not None]:
56
- self.check_audio_params(path, format='wav', codec='pcm_s16le')
57
- # higher resolution
58
- paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s32le')).collect()['output']
59
- for path in [p for p in paths if p is not None]:
60
- self.check_audio_params(path, format='wav', codec='pcm_s32le')
61
-
62
- for format in ['mp3', 'flac']:
63
- paths = video_t.select(output=extract_audio(video_t.video, format=format)).collect()['output']
64
- for path in [p for p in paths if p is not None]:
65
- self.check_audio_params(path, format=format)
@@ -1,27 +0,0 @@
1
- from pixeltable.catalog import is_valid_identifier, is_valid_path
2
-
3
- class TestCatalog:
4
- """Tests for miscellanous catalog functions."""
5
- def test_valid_identifier(self) -> None:
6
- valid_ids = ['a', 'a1', 'a_1', 'a_']
7
- invalid_ids = ['', '_', '__', '_a', '1a', 'a.b', '.a', 'a-b']
8
- for valid_id in valid_ids:
9
- assert is_valid_identifier(valid_id), valid_ids
10
-
11
- for invalid_id in invalid_ids:
12
- assert not is_valid_identifier(invalid_id), invalid_ids
13
-
14
- def test_valid_path(self) -> None:
15
- assert is_valid_path('', empty_is_valid=True)
16
- assert not is_valid_path('', empty_is_valid=False)
17
-
18
- valid_paths = ['a', 'a_.b_', 'a.b.c', 'a.b.c.d']
19
- invalid_paths = ['.', '..', 'a.', '.a', 'a..b']
20
-
21
- for valid_path in valid_paths:
22
- assert is_valid_path(valid_path, empty_is_valid=False), valid_path
23
- assert is_valid_path(valid_path, empty_is_valid=True), valid_path
24
-
25
- for invalid_path in invalid_paths:
26
- assert not is_valid_path(invalid_path, empty_is_valid=False), invalid_path
27
- assert not is_valid_path(invalid_path, empty_is_valid=True), invalid_path
@@ -1,21 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
-
6
-
7
- class TestClient:
8
- def test_list_functions(self, init_env) -> None:
9
- cl = pxt.Client()
10
- _ = cl.list_functions()
11
- print(_)
12
-
13
- def test_drop_table(self, test_tbl: pxt.Table) -> None:
14
- cl = pxt.Client()
15
- t = cl.get_table('test_tbl')
16
- cl.drop_table('test_tbl')
17
- with pytest.raises(excs.Error):
18
- _ = cl.get_table('test_tbl')
19
- with pytest.raises(excs.Error):
20
- _ = t.show(1)
21
-
@@ -1,379 +0,0 @@
1
- from typing import Dict, Any, List, Tuple
2
-
3
- import PIL
4
- import numpy as np
5
- import pandas as pd
6
- import pytest
7
-
8
- import pixeltable as pxt
9
- from pixeltable import exceptions as excs
10
- from pixeltable.iterators import ComponentIterator
11
- from pixeltable.iterators.video import FrameIterator
12
- from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files, validate_update_status
13
- from pixeltable.type_system import IntType, VideoType, JsonType
14
-
15
- class ConstantImgIterator(ComponentIterator):
16
- """Component iterator that generates a fixed number of all-black 1280x720 images."""
17
- def __init__(self, video: str, num_frames: int = 10):
18
- self.img = PIL.Image.new('RGB', (1280, 720))
19
- self.next_frame_idx = 0
20
- self.num_frames = num_frames
21
- self.pos_msec = 0.0
22
- self.pos_frame = 0.0
23
-
24
- @classmethod
25
- def input_schema(cls) -> Dict[str, pxt.ColumnType]:
26
- return {
27
- 'video': VideoType(nullable=False),
28
- 'fps': pxt.FloatType()
29
- }
30
-
31
- @classmethod
32
- def output_schema(cls, *args: Any, **kwargs: Any) -> Tuple[Dict[str, pxt.ColumnType], List[str]]:
33
- return {
34
- 'frame_idx': IntType(),
35
- 'pos_msec': pxt.FloatType(),
36
- 'pos_frame': pxt.FloatType(),
37
- 'frame': pxt.ImageType(),
38
- }, ['frame']
39
-
40
- def __next__(self) -> Dict[str, Any]:
41
- while True:
42
- if self.next_frame_idx == self.num_frames:
43
- raise StopIteration
44
- result = {
45
- 'frame_idx': self.next_frame_idx,
46
- 'pos_msec': self.pos_msec,
47
- 'pos_frame': self.pos_frame,
48
- 'frame': self.img,
49
- }
50
- self.next_frame_idx += 1
51
- return result
52
-
53
- def close(self) -> None:
54
- pass
55
-
56
- def set_pos(self, pos: int) -> None:
57
- if pos == self.next_frame_idx:
58
- return
59
- self.next_frame_idx = pos
60
-
61
- class TestComponentView:
62
- def test_basic(self, test_client: pxt.Client) -> None:
63
- cl = test_client
64
- # create video table
65
- schema = {'video': VideoType(), 'angle': IntType(), 'other_angle': IntType()}
66
- video_t = cl.create_table('video_tbl', schema)
67
- video_filepaths = get_test_video_files()
68
-
69
- # cannot add 'pos' column
70
- with pytest.raises(excs.Error) as excinfo:
71
- video_t.add_column(pos=IntType())
72
- assert 'reserved' in str(excinfo.value)
73
-
74
- # parameter missing
75
- with pytest.raises(excs.Error) as excinfo:
76
- args = {'fps': 1}
77
- _ = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
78
- assert 'missing a required argument' in str(excinfo.value)
79
-
80
- # bad parameter type
81
- with pytest.raises(excs.Error) as excinfo:
82
- args = {'video': video_t.video, 'fps': '1'}
83
- _ = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
84
- assert 'expected float' in str(excinfo.value)
85
-
86
- # bad parameter type
87
- with pytest.raises(excs.Error) as excinfo:
88
- args = {'video': 1, 'fps': 1}
89
- _ = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
90
- assert 'expected file path' in str(excinfo.value)
91
-
92
- # create frame view
93
- args = {'video': video_t.video, 'fps': 1}
94
- view_t = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
95
- # computed column that references a column from the base
96
- view_t.add_column(angle2=view_t.angle + 1)
97
- # computed column that references an unstored and a stored computed view column
98
- view_t.add_column(v1=view_t.frame.rotate(view_t.angle2), stored=True)
99
- # computed column that references a stored computed column from the view
100
- view_t.add_column(v2=view_t.frame_idx - 1)
101
- # computed column that references an unstored view column and a column from the base; the stored value
102
- # cannot be materialized in SQL directly
103
- view_t.add_column(v3=view_t.frame.rotate(video_t.other_angle), stored=True)
104
-
105
- # and load data
106
- rows = [{'video': p, 'angle': 30, 'other_angle': -30} for p in video_filepaths]
107
- status = video_t.insert(rows)
108
- assert status.num_excs == 0
109
- # pos and frame_idx are identical
110
- res = view_t.select(view_t.pos, view_t.frame_idx).collect().to_pandas()
111
- assert np.all(res['pos'] == res['frame_idx'])
112
-
113
- video_url = video_t.select(video_t.video.fileurl).show(0)[0, 0]
114
- result = view_t.where(view_t.video == video_url).select(view_t.frame, view_t.frame_idx) \
115
- .collect()
116
- result = view_t.where(view_t.video == video_url).select(view_t.frame_idx).order_by(view_t.frame_idx) \
117
- .collect().to_pandas()
118
- assert len(result) > 0
119
- assert np.all(result['frame_idx'] == pd.Series(range(len(result))))
120
-
121
- def test_add_column(self, test_client: pxt.Client) -> None:
122
- cl = test_client
123
- # create video table
124
- video_t = cl.create_table('video_tbl', {'video': VideoType()})
125
- video_filepaths = get_test_video_files()
126
- # create frame view
127
- args = {'video': video_t.video, 'fps': 1}
128
- view_t = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
129
-
130
- rows = [{'video': p} for p in video_filepaths]
131
- video_t.insert(rows)
132
- # adding a non-computed column backfills it with nulls
133
- view_t.add_column(annotation=JsonType(nullable=True))
134
- assert view_t.count() == view_t.where(view_t.annotation == None).count()
135
- # adding more data via the base table sets the column values to null
136
- video_t.insert(rows)
137
- _ = view_t.where(view_t.annotation == None).count()
138
- assert view_t.count() == view_t.where(view_t.annotation == None).count()
139
-
140
- with pytest.raises(excs.Error) as excinfo:
141
- view_t.add_column(annotation=JsonType(nullable=False))
142
- assert 'must be nullable' in str(excinfo.value)
143
-
144
- def test_update(self, test_client: pxt.Client) -> None:
145
- cl = test_client
146
- # create video table
147
- video_t = cl.create_table('video_tbl', {'video': VideoType()})
148
- # create frame view with manually updated column
149
- args = {'video': video_t.video, 'fps': 1}
150
- view_t = cl.create_view(
151
- 'test_view', video_t, schema={'annotation': JsonType(nullable=True)},
152
- iterator_class=FrameIterator, iterator_args=args)
153
-
154
- video_filepaths = get_test_video_files()
155
- rows = [{'video': p} for p in video_filepaths]
156
- status = video_t.insert(rows)
157
- assert status.num_excs == 0
158
- import urllib
159
- video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
160
- validate_update_status(
161
- view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url),
162
- expected_rows=view_t.where(view_t.video == video_url).count())
163
- assert view_t.where(view_t.annotation != None).count() == view_t.where(view_t.video == video_url).count()
164
-
165
- # batch update with _rowid works
166
- validate_update_status(
167
- view_t.batch_update(
168
- [{'annotation': {'a': 1}, '_rowid': (1, 0)}, {'annotation': {'a': 1}, '_rowid': (1, 1)}]),
169
- expected_rows=2)
170
- with pytest.raises(AssertionError):
171
- # malformed _rowid
172
- view_t.batch_update([{'annotation': {'a': 1}, '_rowid': (1,)}])
173
-
174
- with pytest.raises(excs.Error) as excinfo:
175
- _ = cl.create_view(
176
- 'bad_view', video_t, schema={'annotation': JsonType(nullable=False)},
177
- iterator_class=FrameIterator, iterator_args=args)
178
- assert 'must be nullable' in str(excinfo.value)
179
-
180
- # break up the snapshot tests for better (future) parallelization
181
- def test_snapshot1(self, test_client: pxt.Client) -> None:
182
- has_column = False
183
- has_filter = False
184
- for reload_md in [False, True]:
185
- cl = pxt.Client(reload=True)
186
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
187
-
188
- def test_snapshot2(self, test_client: pxt.Client) -> None:
189
- has_column = True
190
- has_filter = False
191
- for reload_md in [False, True]:
192
- cl = pxt.Client(reload=True)
193
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
194
-
195
- def test_snapshot3(self, test_client: pxt.Client) -> None:
196
- has_column = False
197
- has_filter = True
198
- for reload_md in [False, True]:
199
- cl = pxt.Client(reload=True)
200
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
201
-
202
- def test_snapshot4(self, test_client: pxt.Client) -> None:
203
- has_column = True
204
- has_filter = True
205
- for reload_md in [False, True]:
206
- cl = pxt.Client(reload=True)
207
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
208
-
209
- def run_snapshot_test(self, cl: pxt.Client, has_column: bool, has_filter: bool, reload_md: bool) -> None:
210
- base_path = 'video_tbl'
211
- view_path = 'test_view'
212
- snap_path = 'test_snap'
213
-
214
- # create video table
215
- video_t = cl.create_table(base_path, {'video': VideoType(), 'margin': IntType()})
216
- video_filepaths = get_test_video_files()
217
- rows = [{'video': path, 'margin': i * 10} for i, path in enumerate(video_filepaths)]
218
- status = video_t.insert(rows)
219
- assert status.num_rows == len(rows)
220
- assert status.num_excs == 0
221
-
222
- # create frame view with a computed column
223
- args = {'video': video_t.video}
224
- view_t = cl.create_view(
225
- view_path, video_t, iterator_class=ConstantImgIterator, iterator_args=args, is_snapshot=False)
226
- view_t.add_column(
227
- cropped=view_t.frame.crop([view_t.margin, view_t.margin, view_t.frame.width, view_t.frame.height]),
228
- stored=True)
229
- snap_col_expr = [view_t.cropped.width * view_t.cropped.height] if has_column else []
230
- view_query = \
231
- view_t.select(
232
- view_t.margin, view_t.frame.width, view_t.frame.height, view_t.cropped.width,
233
- view_t.cropped.height, *snap_col_expr)\
234
- .order_by(view_t.video, view_t.pos)
235
- if has_filter:
236
- view_query = view_query.where(view_t.frame_idx < 10)
237
- orig_resultset = view_query.collect()
238
-
239
- # create snapshot of view
240
- filter = view_t.frame_idx < 10 if has_filter else None
241
- schema = {'c1': view_t.cropped.width * view_t.cropped.height} if has_column else {}
242
- snap_t = cl.create_view(snap_path, view_t, schema=schema, filter=filter, is_snapshot=True)
243
- snap_cols = [snap_t.c1] if has_column else []
244
- snap_query = \
245
- snap_t.select(
246
- snap_t.margin, snap_t.frame.width, snap_t.frame.height, snap_t.cropped.width,
247
- snap_t.cropped.height, *snap_cols)\
248
- .order_by(snap_t.video, snap_t.pos)
249
- assert_resultset_eq(snap_query.collect(), orig_resultset)
250
-
251
- if reload_md:
252
- cl = pxt.Client(reload=True)
253
- video_t = cl.get_table(base_path)
254
- snap_t = cl.get_table(snap_path)
255
- snap_cols = [snap_t.c1] if has_column else []
256
- snap_query = \
257
- snap_t.select(
258
- snap_t.margin, snap_t.frame.width, snap_t.frame.height, snap_t.cropped.width,
259
- snap_t.cropped.height, *snap_cols) \
260
- .order_by(snap_t.video, snap_t.pos)
261
-
262
- # snapshot is unaffected by base insert()
263
- status = video_t.insert(rows)
264
- assert status.num_excs == 0
265
- assert_resultset_eq(snap_query.collect(), orig_resultset)
266
-
267
- # snapshot is unaffected by base update()
268
- status = video_t.update({'margin': video_t.margin + 1})
269
- assert status.num_excs == 0
270
- assert_resultset_eq(snap_query.collect(), orig_resultset)
271
-
272
- # snapshot is unaffected by base delete()
273
- status = video_t.delete()
274
- assert status.num_excs == 0
275
- assert_resultset_eq(snap_query.collect(), orig_resultset)
276
-
277
- cl.drop_table(snap_path)
278
- cl.drop_table(view_path)
279
- cl.drop_table(base_path)
280
-
281
- def test_chained_views(self, test_client: pxt.Client) -> None:
282
- """Component view followed by a standard view"""
283
- cl = test_client
284
- # create video table
285
- schema = {'video': VideoType(), 'int1': IntType(), 'int2': IntType()}
286
- video_t = cl.create_table('video_tbl', schema)
287
- video_filepaths = get_test_video_files()
288
-
289
- # create first view
290
- args = {'video': video_t.video}
291
- v1 = cl.create_view('test_view', video_t, iterator_class=ConstantImgIterator, iterator_args=args)
292
- # computed column that references stored base column
293
- v1.add_column(int3=v1.int1 + 1)
294
- # stored computed column that references an unstored and a stored computed view column
295
- v1.add_column(img1=v1.frame.crop([v1.int3, v1.int3, v1.frame.width, v1.frame.height]), stored=True)
296
- # computed column that references a stored computed view column
297
- v1.add_column(int4=v1.frame_idx + 1)
298
- # unstored computed column that references an unstored and a stored computed view column
299
- v1.add_column(img2=v1.frame.crop([v1.int4, v1.int4, v1.frame.width, v1.frame.height]), stored=False)
300
-
301
- # create second view
302
- v2 = cl.create_view('chained_view', v1)
303
- # computed column that references stored video_t column
304
- v2.add_column(int5=v2.int1 + 1)
305
- v2.add_column(int6=v2.int2 + 1)
306
- # stored computed column that references a stored base column and a stored computed view column;
307
- # indirectly references int1
308
- v2.add_column(img3=v2.img1.crop([v2.int5, v2.int5, v2.img1.width, v2.img1.height]), stored=True)
309
- # stored computed column that references an unstored base column and a manually updated column from video_t;
310
- # indirectly references int2
311
- v2.add_column(img4=v2.img2.crop([v2.int6, v2.int6, v2.img2.width, v2.img2.height]), stored=True)
312
- # comuted column that indirectly references int1 and int2
313
- v2.add_column(int7=v2.img3.width + v2.img4.width)
314
-
315
- def check_view():
316
- assert_resultset_eq(
317
- v1.select(v1.int3).order_by(v1.video, v1.pos).collect(),
318
- v1.select(v1.int1 + 1).order_by(v1.video, v1.pos).collect())
319
- assert_resultset_eq(
320
- v1.select(v1.int4).order_by(v1.video, v1.pos).collect(),
321
- v1.select(v1.frame_idx + 1).order_by(v1.video, v1.pos).collect())
322
- assert_resultset_eq(
323
- v1\
324
- .select(v1.video, v1.img1.width, v1.img1.height)\
325
- .order_by(v1.video, v1.pos).collect(),
326
- v1\
327
- .select(v1.video, v1.frame.width - v1.int1 - 1, v1.frame.height - v1.int1 - 1)\
328
- .order_by(v1.video, v1.pos).collect())
329
- assert_resultset_eq(
330
- v2.select(v2.int5).order_by(v2.video, v2.pos).collect(),
331
- v2.select(v2.int1 + 1).order_by(v2.video, v2.pos).collect())
332
- assert_resultset_eq(
333
- v2.select(v2.int6).order_by(v2.video, v2.pos).collect(),
334
- v2.select(v2.int2 + 1).order_by(v2.video, v2.pos).collect())
335
- assert_resultset_eq(
336
- v2 \
337
- .select(v2.video, v2.img3.width, v2.img3.height) \
338
- .order_by(v2.video, v2.pos).collect(),
339
- v2 \
340
- .select(v2.video, v2.frame.width - v2.int1 * 2 - 2, v2.frame.height - v2.int1 * 2 - 2) \
341
- .order_by(v2.video, v2.pos).collect())
342
- assert_resultset_eq(
343
- v2 \
344
- .select(v2.video, v2.img4.width, v2.img4.height) \
345
- .order_by(v2.video, v2.pos).collect(),
346
- v2 \
347
- .select(
348
- v2.video, v2.frame.width - v2.frame_idx - v2.int2 - 2,
349
- v2.frame.height - v2.frame_idx - v2.int2 - 2) \
350
- .order_by(v2.video, v2.pos).collect())
351
- assert_resultset_eq(
352
- v2.select(v2.int7).order_by(v2.video, v2.pos).collect(),
353
- v2.select(v2.img3.width + v2.img4.width).order_by(v2.video, v2.pos).collect())
354
- assert_resultset_eq(
355
- v2.select(v2.int7).order_by(v2.video, v2.pos).collect(),
356
- v2.select(v2.frame.width - v2.int1 * 2 - 2 + v2.frame.width - v2.frame_idx - v2.int2 - 2)\
357
- .order_by(v2.video, v2.pos).collect())
358
-
359
- # load data
360
- rows = [{'video': p, 'int1': i, 'int2': len(video_filepaths) - i} for i, p in enumerate(video_filepaths)]
361
- status = video_t.insert(rows)
362
- assert status.num_rows == video_t.count() + v1.count() + v2.count()
363
- check_view()
364
-
365
- # update int1: propagates to int3, img1, int5, img3, int7
366
- # TODO: how to test that img4 doesn't get recomputed as part of the computation of int7?
367
- # need to collect more runtime stats (eg, called functions)
368
- import urllib
369
- video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
370
- status = video_t.update({'int1': video_t.int1 + 1}, where=video_t.video == video_url)
371
- assert status.num_rows == 1 + v1.where(v1.video == video_url).count() + v2.where(v2.video == video_url).count()
372
- assert sorted('int1 int3 img1 int5 img3 int7'.split()) == sorted([str.split('.')[1] for str in status.updated_cols])
373
- check_view()
374
-
375
- # update int2: propagates to img4, int6, int7
376
- status = video_t.update({'int2': video_t.int2 + 1}, where=video_t.video == video_url)
377
- assert status.num_rows == 1 + v2.where(v2.video == video_url).count()
378
- assert sorted('int2 img4 int6 int7'.split()) == sorted([str.split('.')[1] for str in status.updated_cols])
379
- check_view()