pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (99) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +31 -50
  4. pixeltable/catalog/insertable_table.py +7 -6
  5. pixeltable/catalog/table.py +171 -57
  6. pixeltable/catalog/table_version.py +417 -140
  7. pixeltable/catalog/table_version_path.py +2 -2
  8. pixeltable/dataframe.py +239 -121
  9. pixeltable/env.py +82 -16
  10. pixeltable/exec/__init__.py +2 -1
  11. pixeltable/exec/cache_prefetch_node.py +1 -1
  12. pixeltable/exec/data_row_batch.py +6 -7
  13. pixeltable/exec/expr_eval_node.py +28 -28
  14. pixeltable/exec/in_memory_data_node.py +11 -7
  15. pixeltable/exec/sql_scan_node.py +7 -6
  16. pixeltable/exprs/__init__.py +4 -3
  17. pixeltable/exprs/column_ref.py +9 -0
  18. pixeltable/exprs/comparison.py +3 -3
  19. pixeltable/exprs/data_row.py +5 -1
  20. pixeltable/exprs/expr.py +15 -7
  21. pixeltable/exprs/function_call.py +17 -15
  22. pixeltable/exprs/image_member_access.py +9 -28
  23. pixeltable/exprs/in_predicate.py +96 -0
  24. pixeltable/exprs/inline_array.py +13 -11
  25. pixeltable/exprs/inline_dict.py +15 -13
  26. pixeltable/exprs/literal.py +16 -4
  27. pixeltable/exprs/row_builder.py +15 -41
  28. pixeltable/exprs/similarity_expr.py +65 -0
  29. pixeltable/ext/__init__.py +5 -0
  30. pixeltable/ext/functions/yolox.py +92 -0
  31. pixeltable/func/__init__.py +0 -2
  32. pixeltable/func/aggregate_function.py +18 -15
  33. pixeltable/func/callable_function.py +57 -13
  34. pixeltable/func/expr_template_function.py +20 -3
  35. pixeltable/func/function.py +35 -4
  36. pixeltable/func/globals.py +24 -14
  37. pixeltable/func/signature.py +23 -27
  38. pixeltable/func/udf.py +13 -12
  39. pixeltable/functions/__init__.py +8 -8
  40. pixeltable/functions/eval.py +7 -8
  41. pixeltable/functions/huggingface.py +64 -17
  42. pixeltable/functions/openai.py +36 -3
  43. pixeltable/functions/pil/image.py +61 -64
  44. pixeltable/functions/together.py +21 -0
  45. pixeltable/functions/util.py +11 -0
  46. pixeltable/globals.py +425 -0
  47. pixeltable/index/__init__.py +2 -0
  48. pixeltable/index/base.py +51 -0
  49. pixeltable/index/embedding_index.py +168 -0
  50. pixeltable/io/__init__.py +3 -0
  51. pixeltable/{utils → io}/hf_datasets.py +48 -17
  52. pixeltable/io/pandas.py +148 -0
  53. pixeltable/{utils → io}/parquet.py +58 -33
  54. pixeltable/iterators/__init__.py +1 -1
  55. pixeltable/iterators/base.py +4 -0
  56. pixeltable/iterators/document.py +218 -97
  57. pixeltable/iterators/video.py +8 -9
  58. pixeltable/metadata/__init__.py +7 -3
  59. pixeltable/metadata/converters/convert_12.py +3 -0
  60. pixeltable/metadata/converters/convert_13.py +41 -0
  61. pixeltable/metadata/schema.py +45 -22
  62. pixeltable/plan.py +15 -51
  63. pixeltable/store.py +38 -41
  64. pixeltable/tool/create_test_db_dump.py +39 -4
  65. pixeltable/type_system.py +47 -96
  66. pixeltable/utils/documents.py +42 -12
  67. pixeltable/utils/http_server.py +70 -0
  68. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
  69. pixeltable-0.2.6.dist-info/RECORD +119 -0
  70. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  71. pixeltable/client.py +0 -604
  72. pixeltable/exprs/image_similarity_predicate.py +0 -58
  73. pixeltable/func/batched_function.py +0 -53
  74. pixeltable/tests/conftest.py +0 -177
  75. pixeltable/tests/functions/test_fireworks.py +0 -42
  76. pixeltable/tests/functions/test_functions.py +0 -60
  77. pixeltable/tests/functions/test_huggingface.py +0 -158
  78. pixeltable/tests/functions/test_openai.py +0 -152
  79. pixeltable/tests/functions/test_together.py +0 -111
  80. pixeltable/tests/test_audio.py +0 -65
  81. pixeltable/tests/test_catalog.py +0 -27
  82. pixeltable/tests/test_client.py +0 -21
  83. pixeltable/tests/test_component_view.py +0 -370
  84. pixeltable/tests/test_dataframe.py +0 -439
  85. pixeltable/tests/test_dirs.py +0 -107
  86. pixeltable/tests/test_document.py +0 -120
  87. pixeltable/tests/test_exprs.py +0 -805
  88. pixeltable/tests/test_function.py +0 -324
  89. pixeltable/tests/test_migration.py +0 -43
  90. pixeltable/tests/test_nos.py +0 -54
  91. pixeltable/tests/test_snapshot.py +0 -208
  92. pixeltable/tests/test_table.py +0 -1267
  93. pixeltable/tests/test_transactional_directory.py +0 -42
  94. pixeltable/tests/test_types.py +0 -22
  95. pixeltable/tests/test_video.py +0 -159
  96. pixeltable/tests/test_view.py +0 -530
  97. pixeltable/tests/utils.py +0 -408
  98. pixeltable-0.2.4.dist-info/RECORD +0 -132
  99. {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
@@ -1,111 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
- from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
6
-
7
-
8
- class TestTogether:
9
-
10
- def test_completions(self, test_client: pxt.Client) -> None:
11
- skip_test_if_not_installed('together')
12
- TestTogether.skip_test_if_no_together_client()
13
- cl = test_client
14
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
15
- from pixeltable.functions.together import completions
16
- t.add_column(output=completions(prompt=t.input, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
17
- t.add_column(output_2=completions(
18
- prompt=t.input,
19
- model='mistralai/Mixtral-8x7B-v0.1',
20
- max_tokens=300,
21
- stop=['\n'],
22
- temperature=0.7,
23
- top_p=0.9,
24
- top_k=40,
25
- repetition_penalty=1.1,
26
- logprobs=1,
27
- echo=True,
28
- n=3,
29
- safety_model='Meta-Llama/Llama-Guard-7b'
30
- ))
31
- validate_update_status(t.insert(input='I am going to the '), 1)
32
- result = t.collect()
33
- assert len(result['output'][0]['choices'][0]['text']) > 0
34
- assert len(result['output_2'][0]['choices'][0]['text']) > 0
35
-
36
- def test_chat_completions(self, test_client: pxt.Client) -> None:
37
- skip_test_if_not_installed('together')
38
- TestTogether.skip_test_if_no_together_client()
39
- cl = test_client
40
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
41
- messages = [{'role': 'user', 'content': t.input}]
42
- from pixeltable.functions.together import chat_completions
43
- t.add_column(output=chat_completions(messages=messages, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
44
- t.add_column(output_2=chat_completions(
45
- messages=messages,
46
- model='mistralai/Mixtral-8x7B-Instruct-v0.1',
47
- max_tokens=300,
48
- stop=['\n'],
49
- temperature=0.7,
50
- top_p=0.9,
51
- top_k=40,
52
- repetition_penalty=1.1,
53
- logprobs=1,
54
- echo=True,
55
- n=3,
56
- safety_model='Meta-Llama/Llama-Guard-7b',
57
- response_format={'type': 'json_object'}
58
- ))
59
- validate_update_status(t.insert(input='Give me a typical example of a JSON structure.'), 1)
60
- result = t.collect()
61
- assert len(result['output'][0]['choices'][0]['message']) > 0
62
- assert len(result['output_2'][0]['choices'][0]['message']) > 0
63
-
64
- def test_embeddings(self, test_client: pxt.Client) -> None:
65
- skip_test_if_not_installed('together')
66
- TestTogether.skip_test_if_no_together_client()
67
- cl = test_client
68
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
69
- from pixeltable.functions.together import embeddings
70
- t.add_column(embed=embeddings(input=t.input, model='togethercomputer/m2-bert-80M-8k-retrieval'))
71
- validate_update_status(t.insert(input='Together AI provides a variety of embeddings models.'), 1)
72
- assert len(t.collect()['embed'][0]) > 0
73
-
74
- def test_image_generations(self, test_client: pxt.Client) -> None:
75
- skip_test_if_not_installed('together')
76
- TestTogether.skip_test_if_no_together_client()
77
- cl = test_client
78
- t = cl.create_table(
79
- 'test_tbl',
80
- {'input': pxt.StringType(), 'negative_prompt': pxt.StringType(nullable=True)}
81
- )
82
- from pixeltable.functions.together import image_generations
83
- t.add_column(img=image_generations(t.input, model='runwayml/stable-diffusion-v1-5'))
84
- t.add_column(img_2=image_generations(
85
- t.input,
86
- model='stabilityai/stable-diffusion-2-1',
87
- steps=30,
88
- seed=4178780,
89
- height=768,
90
- width=512,
91
- negative_prompt=t.negative_prompt
92
- ))
93
- validate_update_status(t.insert([
94
- {'input': 'A friendly dinosaur playing tennis in a cornfield'},
95
- {'input': 'A friendly dinosaur playing tennis in a cornfield',
96
- 'negative_prompt': 'tennis court'}
97
- ]), 2)
98
- assert t.collect()['img'][0].size == (512, 512)
99
- assert t.collect()['img_2'][0].size == (512, 768)
100
- assert t.collect()['img'][1].size == (512, 512)
101
- assert t.collect()['img_2'][1].size == (512, 768)
102
-
103
- # This ensures that the test will be skipped, rather than returning an error, when no API key is
104
- # available (for example, when a PR runs in CI).
105
- @staticmethod
106
- def skip_test_if_no_together_client() -> None:
107
- try:
108
- import pixeltable.functions.together
109
- _ = pixeltable.functions.together.together_client()
110
- except excs.Error as exc:
111
- pytest.skip(str(exc))
@@ -1,65 +0,0 @@
1
- from typing import Optional
2
-
3
- import av
4
-
5
- import pixeltable as pxt
6
- import pixeltable.env as env
7
- from pixeltable.tests.utils import get_video_files, get_audio_files
8
- from pixeltable.type_system import VideoType, AudioType
9
- from pixeltable.utils.media_store import MediaStore
10
-
11
-
12
- class TestAudio:
13
- def check_audio_params(self, path: str, format: Optional[str] = None, codec: Optional[str] = None):
14
- with av.open(path) as container:
15
- audio_stream = container.streams.audio[0]
16
- if format is not None:
17
- assert format == container.format.name
18
- if codec is not None:
19
- assert codec == audio_stream.codec_context.codec.name
20
-
21
- def test_basic(self, test_client: pxt.Client) -> None:
22
- audio_filepaths = get_audio_files()
23
- cl = test_client
24
- audio_t = cl.create_table('audio', {'audio_file': AudioType()})
25
- status = audio_t.insert({'audio_file': p} for p in audio_filepaths)
26
- assert status.num_rows == len(audio_filepaths)
27
- assert status.num_excs == 0
28
- paths = audio_t.select(output=audio_t.audio_file.localpath).collect()['output']
29
- assert set(paths) == set(audio_filepaths)
30
-
31
- def test_extract(self, test_client: pxt.Client) -> None:
32
- video_filepaths = get_video_files()
33
- cl = test_client
34
- video_t = cl.create_table('videos', {'video': VideoType()})
35
- from pixeltable.functions.video import extract_audio
36
- video_t.add_column(audio=extract_audio(video_t.video))
37
-
38
- # one of the 3 videos doesn't have audio
39
- status = video_t.insert({'video': p} for p in video_filepaths)
40
- assert status.num_rows == len(video_filepaths)
41
- assert status.num_excs == 0
42
- assert MediaStore.count(video_t.get_id()) == len(video_filepaths) - 1
43
- assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
44
- assert env.Env.get().num_tmp_files() == 0
45
-
46
- # make sure everything works with a fresh client
47
- cl = pxt.Client()
48
- video_t = cl.get_table('videos')
49
- assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
50
-
51
- # test generating different formats and codecs
52
- paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s16le')).collect()['output']
53
- # media files that are created as a part of a query end up in the tmp dir
54
- assert env.Env.get().num_tmp_files() == video_t.where(video_t.audio != None).count()
55
- for path in [p for p in paths if p is not None]:
56
- self.check_audio_params(path, format='wav', codec='pcm_s16le')
57
- # higher resolution
58
- paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s32le')).collect()['output']
59
- for path in [p for p in paths if p is not None]:
60
- self.check_audio_params(path, format='wav', codec='pcm_s32le')
61
-
62
- for format in ['mp3', 'flac']:
63
- paths = video_t.select(output=extract_audio(video_t.video, format=format)).collect()['output']
64
- for path in [p for p in paths if p is not None]:
65
- self.check_audio_params(path, format=format)
@@ -1,27 +0,0 @@
1
- from pixeltable.catalog import is_valid_identifier, is_valid_path
2
-
3
- class TestCatalog:
4
- """Tests for miscellanous catalog functions."""
5
- def test_valid_identifier(self) -> None:
6
- valid_ids = ['a', 'a1', 'a_1', 'a_']
7
- invalid_ids = ['', '_', '__', '_a', '1a', 'a.b', '.a', 'a-b']
8
- for valid_id in valid_ids:
9
- assert is_valid_identifier(valid_id), valid_ids
10
-
11
- for invalid_id in invalid_ids:
12
- assert not is_valid_identifier(invalid_id), invalid_ids
13
-
14
- def test_valid_path(self) -> None:
15
- assert is_valid_path('', empty_is_valid=True)
16
- assert not is_valid_path('', empty_is_valid=False)
17
-
18
- valid_paths = ['a', 'a_.b_', 'a.b.c', 'a.b.c.d']
19
- invalid_paths = ['.', '..', 'a.', '.a', 'a..b']
20
-
21
- for valid_path in valid_paths:
22
- assert is_valid_path(valid_path, empty_is_valid=False), valid_path
23
- assert is_valid_path(valid_path, empty_is_valid=True), valid_path
24
-
25
- for invalid_path in invalid_paths:
26
- assert not is_valid_path(invalid_path, empty_is_valid=False), invalid_path
27
- assert not is_valid_path(invalid_path, empty_is_valid=True), invalid_path
@@ -1,21 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
-
6
-
7
- class TestClient:
8
- def test_list_functions(self, init_env) -> None:
9
- cl = pxt.Client()
10
- _ = cl.list_functions()
11
- print(_)
12
-
13
- def test_drop_table(self, test_tbl: pxt.Table) -> None:
14
- cl = pxt.Client()
15
- t = cl.get_table('test_tbl')
16
- cl.drop_table('test_tbl')
17
- with pytest.raises(excs.Error):
18
- _ = cl.get_table('test_tbl')
19
- with pytest.raises(excs.Error):
20
- _ = t.show(1)
21
-
@@ -1,370 +0,0 @@
1
- from typing import Dict, Any, List, Tuple
2
-
3
- import PIL
4
- import numpy as np
5
- import pandas as pd
6
- import pytest
7
-
8
- import pixeltable as pxt
9
- from pixeltable import exceptions as excs
10
- from pixeltable.iterators import ComponentIterator
11
- from pixeltable.iterators.video import FrameIterator
12
- from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files
13
- from pixeltable.type_system import IntType, VideoType, JsonType
14
-
15
- class ConstantImgIterator(ComponentIterator):
16
- """Component iterator that generates a fixed number of all-black 1280x720 images."""
17
- def __init__(self, video: str, num_frames: int = 10):
18
- self.img = PIL.Image.new('RGB', (1280, 720))
19
- self.next_frame_idx = 0
20
- self.num_frames = num_frames
21
- self.pos_msec = 0.0
22
- self.pos_frame = 0.0
23
-
24
- @classmethod
25
- def input_schema(cls) -> Dict[str, pxt.ColumnType]:
26
- return {
27
- 'video': VideoType(nullable=False),
28
- 'fps': pxt.FloatType()
29
- }
30
-
31
- @classmethod
32
- def output_schema(cls, *args: Any, **kwargs: Any) -> Tuple[Dict[str, pxt.ColumnType], List[str]]:
33
- return {
34
- 'frame_idx': IntType(),
35
- 'pos_msec': pxt.FloatType(),
36
- 'pos_frame': pxt.FloatType(),
37
- 'frame': pxt.ImageType(),
38
- }, ['frame']
39
-
40
- def __next__(self) -> Dict[str, Any]:
41
- while True:
42
- if self.next_frame_idx == self.num_frames:
43
- raise StopIteration
44
- result = {
45
- 'frame_idx': self.next_frame_idx,
46
- 'pos_msec': self.pos_msec,
47
- 'pos_frame': self.pos_frame,
48
- 'frame': self.img,
49
- }
50
- self.next_frame_idx += 1
51
- return result
52
-
53
- def close(self) -> None:
54
- pass
55
-
56
- def set_pos(self, pos: int) -> None:
57
- if pos == self.next_frame_idx:
58
- return
59
- self.next_frame_idx = pos
60
-
61
- class TestComponentView:
62
- def test_basic(self, test_client: pxt.Client) -> None:
63
- cl = test_client
64
- # create video table
65
- schema = {'video': VideoType(), 'angle': IntType(), 'other_angle': IntType()}
66
- video_t = cl.create_table('video_tbl', schema)
67
- video_filepaths = get_test_video_files()
68
-
69
- # cannot add 'pos' column
70
- with pytest.raises(excs.Error) as excinfo:
71
- video_t.add_column(pos=IntType())
72
- assert 'reserved' in str(excinfo.value)
73
-
74
- # parameter missing
75
- with pytest.raises(excs.Error) as excinfo:
76
- args = {'fps': 1}
77
- _ = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
78
- assert 'missing a required argument' in str(excinfo.value)
79
-
80
- # bad parameter type
81
- with pytest.raises(excs.Error) as excinfo:
82
- args = {'video': video_t.video, 'fps': '1'}
83
- _ = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
84
- assert 'expected float' in str(excinfo.value)
85
-
86
- # bad parameter type
87
- with pytest.raises(excs.Error) as excinfo:
88
- args = {'video': 1, 'fps': 1}
89
- _ = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
90
- assert 'expected file path' in str(excinfo.value)
91
-
92
- # create frame view
93
- args = {'video': video_t.video, 'fps': 1}
94
- view_t = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
95
- # computed column that references a column from the base
96
- view_t.add_column(angle2=view_t.angle + 1)
97
- # computed column that references an unstored and a stored computed view column
98
- view_t.add_column(v1=view_t.frame.rotate(view_t.angle2), stored=True)
99
- # computed column that references a stored computed column from the view
100
- view_t.add_column(v2=view_t.frame_idx - 1)
101
- # computed column that references an unstored view column and a column from the base; the stored value
102
- # cannot be materialized in SQL directly
103
- view_t.add_column(v3=view_t.frame.rotate(video_t.other_angle), stored=True)
104
-
105
- # and load data
106
- rows = [{'video': p, 'angle': 30, 'other_angle': -30} for p in video_filepaths]
107
- status = video_t.insert(rows)
108
- assert status.num_excs == 0
109
- # pos and frame_idx are identical
110
- res = view_t.select(view_t.pos, view_t.frame_idx).collect().to_pandas()
111
- assert np.all(res['pos'] == res['frame_idx'])
112
-
113
- video_url = video_t.select(video_t.video.fileurl).show(0)[0, 0]
114
- result = view_t.where(view_t.video == video_url).select(view_t.frame, view_t.frame_idx) \
115
- .collect()
116
- result = view_t.where(view_t.video == video_url).select(view_t.frame_idx).order_by(view_t.frame_idx) \
117
- .collect().to_pandas()
118
- assert len(result) > 0
119
- assert np.all(result['frame_idx'] == pd.Series(range(len(result))))
120
-
121
- def test_add_column(self, test_client: pxt.Client) -> None:
122
- cl = test_client
123
- # create video table
124
- video_t = cl.create_table('video_tbl', {'video': VideoType()})
125
- video_filepaths = get_test_video_files()
126
- # create frame view
127
- args = {'video': video_t.video, 'fps': 1}
128
- view_t = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
129
-
130
- rows = [{'video': p} for p in video_filepaths]
131
- video_t.insert(rows)
132
- # adding a non-computed column backfills it with nulls
133
- view_t.add_column(annotation=JsonType(nullable=True))
134
- assert view_t.count() == view_t.where(view_t.annotation == None).count()
135
- # adding more data via the base table sets the column values to null
136
- video_t.insert(rows)
137
- _ = view_t.where(view_t.annotation == None).count()
138
- assert view_t.count() == view_t.where(view_t.annotation == None).count()
139
-
140
- with pytest.raises(excs.Error) as excinfo:
141
- view_t.add_column(annotation=JsonType(nullable=False))
142
- assert 'must be nullable' in str(excinfo.value)
143
-
144
- def test_update(self, test_client: pxt.Client) -> None:
145
- cl = test_client
146
- # create video table
147
- video_t = cl.create_table('video_tbl', {'video': VideoType()})
148
- # create frame view with manually updated column
149
- args = {'video': video_t.video, 'fps': 1}
150
- view_t = cl.create_view(
151
- 'test_view', video_t, schema={'annotation': JsonType(nullable=True)},
152
- iterator_class=FrameIterator, iterator_args=args)
153
-
154
- video_filepaths = get_test_video_files()
155
- rows = [{'video': p} for p in video_filepaths]
156
- status = video_t.insert(rows)
157
- assert status.num_excs == 0
158
- import urllib
159
- video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
160
- status = view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url)
161
- c1 = view_t.where(view_t.annotation != None).count()
162
- c2 = view_t.where(view_t.video == video_url).count()
163
- assert c1 == c2
164
-
165
- with pytest.raises(excs.Error) as excinfo:
166
- _ = cl.create_view(
167
- 'bad_view', video_t, schema={'annotation': JsonType(nullable=False)},
168
- iterator_class=FrameIterator, iterator_args=args)
169
- assert 'must be nullable' in str(excinfo.value)
170
-
171
- # break up the snapshot tests for better (future) parallelization
172
- def test_snapshot1(self, test_client: pxt.Client) -> None:
173
- has_column = False
174
- has_filter = False
175
- for reload_md in [False, True]:
176
- cl = pxt.Client(reload=True)
177
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
178
-
179
- def test_snapshot2(self, test_client: pxt.Client) -> None:
180
- has_column = True
181
- has_filter = False
182
- for reload_md in [False, True]:
183
- cl = pxt.Client(reload=True)
184
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
185
-
186
- def test_snapshot3(self, test_client: pxt.Client) -> None:
187
- has_column = False
188
- has_filter = True
189
- for reload_md in [False, True]:
190
- cl = pxt.Client(reload=True)
191
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
192
-
193
- def test_snapshot4(self, test_client: pxt.Client) -> None:
194
- has_column = True
195
- has_filter = True
196
- for reload_md in [False, True]:
197
- cl = pxt.Client(reload=True)
198
- self.run_snapshot_test(cl, has_column=has_column, has_filter=has_filter, reload_md=reload_md)
199
-
200
- def run_snapshot_test(self, cl: pxt.Client, has_column: bool, has_filter: bool, reload_md: bool) -> None:
201
- base_path = 'video_tbl'
202
- view_path = 'test_view'
203
- snap_path = 'test_snap'
204
-
205
- # create video table
206
- video_t = cl.create_table(base_path, {'video': VideoType(), 'margin': IntType()})
207
- video_filepaths = get_test_video_files()
208
- rows = [{'video': path, 'margin': i * 10} for i, path in enumerate(video_filepaths)]
209
- status = video_t.insert(rows)
210
- assert status.num_rows == len(rows)
211
- assert status.num_excs == 0
212
-
213
- # create frame view with a computed column
214
- args = {'video': video_t.video}
215
- view_t = cl.create_view(
216
- view_path, video_t, iterator_class=ConstantImgIterator, iterator_args=args, is_snapshot=False)
217
- view_t.add_column(
218
- cropped=view_t.frame.crop([view_t.margin, view_t.margin, view_t.frame.width, view_t.frame.height]),
219
- stored=True)
220
- snap_col_expr = [view_t.cropped.width * view_t.cropped.height] if has_column else []
221
- view_query = \
222
- view_t.select(
223
- view_t.margin, view_t.frame.width, view_t.frame.height, view_t.cropped.width,
224
- view_t.cropped.height, *snap_col_expr)\
225
- .order_by(view_t.video, view_t.pos)
226
- if has_filter:
227
- view_query = view_query.where(view_t.frame_idx < 10)
228
- orig_resultset = view_query.collect()
229
-
230
- # create snapshot of view
231
- filter = view_t.frame_idx < 10 if has_filter else None
232
- schema = {'c1': view_t.cropped.width * view_t.cropped.height} if has_column else {}
233
- snap_t = cl.create_view(snap_path, view_t, schema=schema, filter=filter, is_snapshot=True)
234
- snap_cols = [snap_t.c1] if has_column else []
235
- snap_query = \
236
- snap_t.select(
237
- snap_t.margin, snap_t.frame.width, snap_t.frame.height, snap_t.cropped.width,
238
- snap_t.cropped.height, *snap_cols)\
239
- .order_by(snap_t.video, snap_t.pos)
240
- assert_resultset_eq(snap_query.collect(), orig_resultset)
241
-
242
- if reload_md:
243
- cl = pxt.Client(reload=True)
244
- video_t = cl.get_table(base_path)
245
- snap_t = cl.get_table(snap_path)
246
- snap_cols = [snap_t.c1] if has_column else []
247
- snap_query = \
248
- snap_t.select(
249
- snap_t.margin, snap_t.frame.width, snap_t.frame.height, snap_t.cropped.width,
250
- snap_t.cropped.height, *snap_cols) \
251
- .order_by(snap_t.video, snap_t.pos)
252
-
253
- # snapshot is unaffected by base insert()
254
- status = video_t.insert(rows)
255
- assert status.num_excs == 0
256
- assert_resultset_eq(snap_query.collect(), orig_resultset)
257
-
258
- # snapshot is unaffected by base update()
259
- status = video_t.update({'margin': video_t.margin + 1})
260
- assert status.num_excs == 0
261
- assert_resultset_eq(snap_query.collect(), orig_resultset)
262
-
263
- # snapshot is unaffected by base delete()
264
- status = video_t.delete()
265
- assert status.num_excs == 0
266
- assert_resultset_eq(snap_query.collect(), orig_resultset)
267
-
268
- cl.drop_table(snap_path)
269
- cl.drop_table(view_path)
270
- cl.drop_table(base_path)
271
-
272
- def test_chained_views(self, test_client: pxt.Client) -> None:
273
- """Component view followed by a standard view"""
274
- cl = test_client
275
- # create video table
276
- schema = {'video': VideoType(), 'int1': IntType(), 'int2': IntType()}
277
- video_t = cl.create_table('video_tbl', schema)
278
- video_filepaths = get_test_video_files()
279
-
280
- # create first view
281
- args = {'video': video_t.video}
282
- v1 = cl.create_view('test_view', video_t, iterator_class=ConstantImgIterator, iterator_args=args)
283
- # computed column that references stored base column
284
- v1.add_column(int3=v1.int1 + 1)
285
- # stored computed column that references an unstored and a stored computed view column
286
- v1.add_column(img1=v1.frame.crop([v1.int3, v1.int3, v1.frame.width, v1.frame.height]), stored=True)
287
- # computed column that references a stored computed view column
288
- v1.add_column(int4=v1.frame_idx + 1)
289
- # unstored computed column that references an unstored and a stored computed view column
290
- v1.add_column(img2=v1.frame.crop([v1.int4, v1.int4, v1.frame.width, v1.frame.height]), stored=False)
291
-
292
- # create second view
293
- v2 = cl.create_view('chained_view', v1)
294
- # computed column that references stored video_t column
295
- v2.add_column(int5=v2.int1 + 1)
296
- v2.add_column(int6=v2.int2 + 1)
297
- # stored computed column that references a stored base column and a stored computed view column;
298
- # indirectly references int1
299
- v2.add_column(img3=v2.img1.crop([v2.int5, v2.int5, v2.img1.width, v2.img1.height]), stored=True)
300
- # stored computed column that references an unstored base column and a manually updated column from video_t;
301
- # indirectly references int2
302
- v2.add_column(img4=v2.img2.crop([v2.int6, v2.int6, v2.img2.width, v2.img2.height]), stored=True)
303
- # comuted column that indirectly references int1 and int2
304
- v2.add_column(int7=v2.img3.width + v2.img4.width)
305
-
306
- def check_view():
307
- assert_resultset_eq(
308
- v1.select(v1.int3).order_by(v1.video, v1.pos).collect(),
309
- v1.select(v1.int1 + 1).order_by(v1.video, v1.pos).collect())
310
- assert_resultset_eq(
311
- v1.select(v1.int4).order_by(v1.video, v1.pos).collect(),
312
- v1.select(v1.frame_idx + 1).order_by(v1.video, v1.pos).collect())
313
- assert_resultset_eq(
314
- v1\
315
- .select(v1.video, v1.img1.width, v1.img1.height)\
316
- .order_by(v1.video, v1.pos).collect(),
317
- v1\
318
- .select(v1.video, v1.frame.width - v1.int1 - 1, v1.frame.height - v1.int1 - 1)\
319
- .order_by(v1.video, v1.pos).collect())
320
- assert_resultset_eq(
321
- v2.select(v2.int5).order_by(v2.video, v2.pos).collect(),
322
- v2.select(v2.int1 + 1).order_by(v2.video, v2.pos).collect())
323
- assert_resultset_eq(
324
- v2.select(v2.int6).order_by(v2.video, v2.pos).collect(),
325
- v2.select(v2.int2 + 1).order_by(v2.video, v2.pos).collect())
326
- assert_resultset_eq(
327
- v2 \
328
- .select(v2.video, v2.img3.width, v2.img3.height) \
329
- .order_by(v2.video, v2.pos).collect(),
330
- v2 \
331
- .select(v2.video, v2.frame.width - v2.int1 * 2 - 2, v2.frame.height - v2.int1 * 2 - 2) \
332
- .order_by(v2.video, v2.pos).collect())
333
- assert_resultset_eq(
334
- v2 \
335
- .select(v2.video, v2.img4.width, v2.img4.height) \
336
- .order_by(v2.video, v2.pos).collect(),
337
- v2 \
338
- .select(
339
- v2.video, v2.frame.width - v2.frame_idx - v2.int2 - 2,
340
- v2.frame.height - v2.frame_idx - v2.int2 - 2) \
341
- .order_by(v2.video, v2.pos).collect())
342
- assert_resultset_eq(
343
- v2.select(v2.int7).order_by(v2.video, v2.pos).collect(),
344
- v2.select(v2.img3.width + v2.img4.width).order_by(v2.video, v2.pos).collect())
345
- assert_resultset_eq(
346
- v2.select(v2.int7).order_by(v2.video, v2.pos).collect(),
347
- v2.select(v2.frame.width - v2.int1 * 2 - 2 + v2.frame.width - v2.frame_idx - v2.int2 - 2)\
348
- .order_by(v2.video, v2.pos).collect())
349
-
350
- # load data
351
- rows = [{'video': p, 'int1': i, 'int2': len(video_filepaths) - i} for i, p in enumerate(video_filepaths)]
352
- status = video_t.insert(rows)
353
- assert status.num_rows == video_t.count() + v1.count() + v2.count()
354
- check_view()
355
-
356
- # update int1: propagates to int3, img1, int5, img3, int7
357
- # TODO: how to test that img4 doesn't get recomputed as part of the computation of int7?
358
- # need to collect more runtime stats (eg, called functions)
359
- import urllib
360
- video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
361
- status = video_t.update({'int1': video_t.int1 + 1}, where=video_t.video == video_url)
362
- assert status.num_rows == 1 + v1.where(v1.video == video_url).count() + v2.where(v2.video == video_url).count()
363
- assert sorted('int1 int3 img1 int5 img3 int7'.split()) == sorted([str.split('.')[1] for str in status.updated_cols])
364
- check_view()
365
-
366
- # update int2: propagates to img4, int6, int7
367
- status = video_t.update({'int2': video_t.int2 + 1}, where=video_t.video == video_url)
368
- assert status.num_rows == 1 + v2.where(v2.video == video_url).count()
369
- assert sorted('int2 img4 int6 int7'.split()) == sorted([str.split('.')[1] for str in status.updated_cols])
370
- check_view()