pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -87
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1085 -262
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -126
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.0.dist-info/LICENSE +18 -0
- pixeltable-0.2.0.dist-info/METADATA +117 -0
- pixeltable-0.2.0.dist-info/RECORD +125 -0
- {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.1.dist-info/METADATA +0 -31
- pixeltable-0.1.1.dist-info/RECORD +0 -36
pixeltable/tests/utils.py
CHANGED
|
@@ -1,16 +1,22 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import glob
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Dict, Any, List
|
|
6
|
+
from typing import Dict, Any, List, Optional
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
import pandas as pd
|
|
10
|
+
import pytest
|
|
9
11
|
|
|
10
|
-
import pixeltable as
|
|
12
|
+
import pixeltable as pxt
|
|
13
|
+
import pixeltable.type_system as ts
|
|
11
14
|
from pixeltable import catalog
|
|
12
|
-
from pixeltable.
|
|
13
|
-
from pixeltable.
|
|
15
|
+
from pixeltable.dataframe import DataFrameResultSet
|
|
16
|
+
from pixeltable.env import Env
|
|
17
|
+
from pixeltable.type_system import \
|
|
18
|
+
ColumnType, StringType, IntType, FloatType, ArrayType, BoolType, TimestampType, JsonType, ImageType, VideoType
|
|
19
|
+
|
|
14
20
|
|
|
15
21
|
def make_default_type(t: ColumnType.Type) -> ColumnType:
|
|
16
22
|
if t == ColumnType.Type.STRING:
|
|
@@ -25,14 +31,19 @@ def make_default_type(t: ColumnType.Type) -> ColumnType:
|
|
|
25
31
|
return TimestampType()
|
|
26
32
|
assert False
|
|
27
33
|
|
|
28
|
-
def make_tbl(
|
|
29
|
-
|
|
34
|
+
def make_tbl(cl: pxt.Client, name: str = 'test', col_names: Optional[List[str]] = None) -> catalog.InsertableTable:
|
|
35
|
+
if col_names is None:
|
|
36
|
+
col_names = ['c1']
|
|
37
|
+
schema: Dict[str, ts.ColumnType] = {}
|
|
30
38
|
for i, col_name in enumerate(col_names):
|
|
31
|
-
schema
|
|
32
|
-
return
|
|
39
|
+
schema[f'{col_name}'] = make_default_type(ColumnType.Type(i % 5))
|
|
40
|
+
return cl.create_table(name, schema)
|
|
33
41
|
|
|
34
|
-
def create_table_data(t: catalog.Table, col_names: List[str] =
|
|
42
|
+
def create_table_data(t: catalog.Table, col_names: Optional[List[str]] = None, num_rows: int = 10) -> List[Dict[str, Any]]:
|
|
43
|
+
if col_names is None:
|
|
44
|
+
col_names = []
|
|
35
45
|
data: Dict[str, Any] = {}
|
|
46
|
+
|
|
36
47
|
sample_dict = {
|
|
37
48
|
'detections': [{
|
|
38
49
|
'id': '637e8e073b28441a453564cf',
|
|
@@ -70,38 +81,138 @@ def create_table_data(t: catalog.Table, col_names: List[str] = [], num_rows: int
|
|
|
70
81
|
}
|
|
71
82
|
|
|
72
83
|
if len(col_names) == 0:
|
|
73
|
-
col_names = [c.name for c in t.columns]
|
|
84
|
+
col_names = [c.name for c in t.columns() if not c.is_computed]
|
|
74
85
|
|
|
86
|
+
col_types = t.column_types()
|
|
75
87
|
for col_name in col_names:
|
|
76
|
-
|
|
88
|
+
col_type = col_types[col_name]
|
|
77
89
|
col_data: Any = None
|
|
78
|
-
if
|
|
90
|
+
if col_type.is_string_type():
|
|
79
91
|
col_data = ['test string'] * num_rows
|
|
80
|
-
if
|
|
81
|
-
col_data = np.random.randint(0, 100, size=num_rows)
|
|
82
|
-
if
|
|
83
|
-
col_data = np.random.random(size=num_rows) * 100
|
|
84
|
-
if
|
|
92
|
+
if col_type.is_int_type():
|
|
93
|
+
col_data = np.random.randint(0, 100, size=num_rows).tolist()
|
|
94
|
+
if col_type.is_float_type():
|
|
95
|
+
col_data = (np.random.random(size=num_rows) * 100).tolist()
|
|
96
|
+
if col_type.is_bool_type():
|
|
85
97
|
col_data = np.random.randint(0, 2, size=num_rows)
|
|
86
98
|
col_data = [False if i == 0 else True for i in col_data]
|
|
87
|
-
if
|
|
88
|
-
col_data = datetime.datetime.now()
|
|
89
|
-
if
|
|
99
|
+
if col_type.is_timestamp_type():
|
|
100
|
+
col_data = [datetime.datetime.now()] * num_rows
|
|
101
|
+
if col_type.is_json_type():
|
|
90
102
|
col_data = [sample_dict] * num_rows
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
103
|
+
if col_type.is_array_type():
|
|
104
|
+
col_data = [np.ones(col_type.shape, dtype=col_type.numpy_dtype()) for i in range(num_rows)]
|
|
105
|
+
if col_type.is_image_type():
|
|
106
|
+
image_path = get_image_files()[0]
|
|
107
|
+
col_data = [image_path for i in range(num_rows)]
|
|
108
|
+
if col_type.is_video_type():
|
|
109
|
+
video_path = get_video_files()[0]
|
|
110
|
+
col_data = [video_path for i in range(num_rows)]
|
|
111
|
+
data[col_name] = col_data
|
|
112
|
+
rows = [{col_name: data[col_name][i] for col_name in col_names} for i in range(num_rows)]
|
|
113
|
+
return rows
|
|
114
|
+
|
|
115
|
+
def create_test_tbl(client: pxt.Client, name: str = 'test_tbl') -> catalog.Table:
|
|
116
|
+
schema = {
|
|
117
|
+
'c1': StringType(nullable=False),
|
|
118
|
+
'c1n': StringType(nullable=True),
|
|
119
|
+
'c2': IntType(nullable=False),
|
|
120
|
+
'c3': FloatType(nullable=False),
|
|
121
|
+
'c4': BoolType(nullable=False),
|
|
122
|
+
'c5': TimestampType(nullable=False),
|
|
123
|
+
'c6': JsonType(nullable=False),
|
|
124
|
+
'c7': JsonType(nullable=False),
|
|
125
|
+
}
|
|
126
|
+
t = client.create_table(name, schema, primary_key='c2')
|
|
127
|
+
t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
|
|
128
|
+
|
|
129
|
+
num_rows = 100
|
|
130
|
+
d1 = {
|
|
131
|
+
'f1': 'test string 1',
|
|
132
|
+
'f2': 1,
|
|
133
|
+
'f3': 1.0,
|
|
134
|
+
'f4': True,
|
|
135
|
+
'f5': [1.0, 2.0, 3.0, 4.0],
|
|
136
|
+
'f6': {
|
|
137
|
+
'f7': 'test string 2',
|
|
138
|
+
'f8': [1.0, 2.0, 3.0, 4.0],
|
|
139
|
+
},
|
|
140
|
+
}
|
|
141
|
+
d2 = [d1, d1]
|
|
142
|
+
|
|
143
|
+
c1_data = [f'test string {i}' for i in range(num_rows)]
|
|
144
|
+
c2_data = [i for i in range(num_rows)]
|
|
145
|
+
c3_data = [float(i) for i in range(num_rows)]
|
|
146
|
+
c4_data = [bool(i % 2) for i in range(num_rows)]
|
|
147
|
+
c5_data = [datetime.datetime.now()] * num_rows
|
|
148
|
+
c6_data = []
|
|
149
|
+
for i in range(num_rows):
|
|
150
|
+
d = {
|
|
151
|
+
'f1': f'test string {i}',
|
|
152
|
+
'f2': i,
|
|
153
|
+
'f3': float(i),
|
|
154
|
+
'f4': bool(i % 2),
|
|
155
|
+
'f5': [1.0, 2.0, 3.0, 4.0],
|
|
156
|
+
'f6': {
|
|
157
|
+
'f7': 'test string 2',
|
|
158
|
+
'f8': [1.0, 2.0, 3.0, 4.0],
|
|
159
|
+
},
|
|
160
|
+
}
|
|
161
|
+
c6_data.append(d)
|
|
162
|
+
|
|
163
|
+
c7_data = [d2] * num_rows
|
|
164
|
+
rows = [
|
|
165
|
+
{
|
|
166
|
+
'c1': c1_data[i],
|
|
167
|
+
'c1n': c1_data[i] if i % 10 != 0 else None,
|
|
168
|
+
'c2': c2_data[i],
|
|
169
|
+
'c3': c3_data[i],
|
|
170
|
+
'c4': c4_data[i],
|
|
171
|
+
'c5': c5_data[i],
|
|
172
|
+
'c6': c6_data[i],
|
|
173
|
+
'c7': c7_data[i],
|
|
174
|
+
}
|
|
175
|
+
for i in range(num_rows)
|
|
176
|
+
]
|
|
177
|
+
t.insert(rows)
|
|
178
|
+
return t
|
|
96
179
|
|
|
97
|
-
def
|
|
180
|
+
def create_all_datatypes_tbl(test_client: pxt.Client) -> catalog.Table:
|
|
181
|
+
""" Creates a table with all supported datatypes.
|
|
182
|
+
"""
|
|
183
|
+
schema = {
|
|
184
|
+
'row_id': IntType(nullable=False), # used for row selection
|
|
185
|
+
'c_array': ArrayType(shape=(10,), dtype=FloatType(), nullable=True),
|
|
186
|
+
'c_bool': BoolType(nullable=True),
|
|
187
|
+
'c_float': FloatType(nullable=True),
|
|
188
|
+
'c_image': ImageType(nullable=True),
|
|
189
|
+
'c_int': IntType(nullable=True),
|
|
190
|
+
'c_json': JsonType(nullable=True),
|
|
191
|
+
'c_string': StringType(nullable=True),
|
|
192
|
+
'c_timestamp': TimestampType(nullable=True),
|
|
193
|
+
'c_video': VideoType(nullable=True),
|
|
194
|
+
}
|
|
195
|
+
tbl = test_client.create_table('all_datatype_tbl', schema)
|
|
196
|
+
example_rows = create_table_data(tbl, num_rows=11)
|
|
197
|
+
|
|
198
|
+
for i,r in enumerate(example_rows):
|
|
199
|
+
r['row_id'] = i # row_id
|
|
200
|
+
|
|
201
|
+
tbl.insert(example_rows)
|
|
202
|
+
return tbl
|
|
203
|
+
|
|
204
|
+
def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
|
98
205
|
"""
|
|
99
206
|
Locate dir_name, create df out of file_name.
|
|
100
|
-
transform columns 'file_name' to column 'file_path' with absolute paths
|
|
101
207
|
path_col_names: col names in csv file that contain file names; those will be converted to absolute paths
|
|
102
208
|
by adding the path to 'file_name' as a prefix.
|
|
209
|
+
Returns:
|
|
210
|
+
tuple of (list of rows, list of column names)
|
|
103
211
|
"""
|
|
104
|
-
|
|
212
|
+
if path_col_names is None:
|
|
213
|
+
path_col_names = []
|
|
214
|
+
tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
|
|
215
|
+
glob_result = glob.glob(f'{tests_dir}/**/{dir_name}', recursive=True)
|
|
105
216
|
assert len(glob_result) == 1, f'Could not find {dir_name}'
|
|
106
217
|
abs_path = Path(glob_result[0])
|
|
107
218
|
data_file_path = abs_path / file_name
|
|
@@ -110,24 +221,54 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: List[str] = []
|
|
|
110
221
|
for col_name in path_col_names:
|
|
111
222
|
assert col_name in df.columns
|
|
112
223
|
df[col_name] = df.apply(lambda r: str(abs_path / r[col_name]), axis=1)
|
|
113
|
-
|
|
224
|
+
return df.to_dict(orient='records')
|
|
225
|
+
|
|
226
|
+
def get_video_files(include_bad_video=False) -> List[str]:
|
|
227
|
+
tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
|
|
228
|
+
glob_result = glob.glob(f'{tests_dir}/**/videos/*', recursive=True)
|
|
229
|
+
if not include_bad_video:
|
|
230
|
+
glob_result = [f for f in glob_result if 'bad_video' not in f]
|
|
231
|
+
return glob_result
|
|
114
232
|
|
|
115
|
-
def
|
|
116
|
-
|
|
233
|
+
def get_image_files() -> List[str]:
|
|
234
|
+
tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
|
|
235
|
+
glob_result = glob.glob(f'{tests_dir}/**/imagenette2-160/*', recursive=True)
|
|
117
236
|
return glob_result
|
|
118
237
|
|
|
238
|
+
def get_audio_files(include_bad_audio=False) -> List[str]:
|
|
239
|
+
tests_dir = os.path.dirname(__file__)
|
|
240
|
+
glob_result = glob.glob(f'{tests_dir}/**/audio/*', recursive=True)
|
|
241
|
+
if not include_bad_audio:
|
|
242
|
+
glob_result = [f for f in glob_result if 'bad_audio' not in f]
|
|
243
|
+
return glob_result
|
|
244
|
+
|
|
245
|
+
def get_documents() -> List[str]:
|
|
246
|
+
tests_dir = os.path.dirname(__file__)
|
|
247
|
+
# for now, we can only handle .html and .md
|
|
248
|
+
return [p for p in glob.glob(f'{tests_dir}/**/documents/*', recursive=True) if not p.endswith('.pdf')]
|
|
249
|
+
|
|
250
|
+
def get_sentences(n: int = 100) -> List[str]:
|
|
251
|
+
tests_dir = os.path.dirname(__file__)
|
|
252
|
+
path = glob.glob(f'{tests_dir}/**/jeopardy.json', recursive=True)[0]
|
|
253
|
+
with open(path, 'r') as f:
|
|
254
|
+
questions_list = json.load(f)
|
|
255
|
+
# this dataset contains \' around the questions
|
|
256
|
+
return [q['question'].replace("'", '') for q in questions_list[:n]]
|
|
257
|
+
|
|
258
|
+
def assert_resultset_eq(r1: DataFrameResultSet, r2: DataFrameResultSet) -> None:
|
|
259
|
+
assert len(r1) == len(r2)
|
|
260
|
+
assert len(r1.column_names()) == len(r2.column_names()) # we don't care about the actual column names
|
|
261
|
+
r1_pd = r1.to_pandas()
|
|
262
|
+
r2_pd = r2.to_pandas()
|
|
263
|
+
for i in range(len(r1.column_names())):
|
|
264
|
+
# only compare column values
|
|
265
|
+
s1 = r1_pd.iloc[:, i]
|
|
266
|
+
s2 = r2_pd.iloc[:, i]
|
|
267
|
+
if s1.dtype == np.float64:
|
|
268
|
+
assert np.allclose(s1, s2)
|
|
269
|
+
else:
|
|
270
|
+
assert s1.equals(s2)
|
|
119
271
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
@classmethod
|
|
124
|
-
def make_aggregator(cls) -> 'SumAggregator':
|
|
125
|
-
return cls()
|
|
126
|
-
def update(self, val: int) -> None:
|
|
127
|
-
self.sum += val
|
|
128
|
-
def value(self) -> int:
|
|
129
|
-
return self.sum
|
|
130
|
-
|
|
131
|
-
sum_uda = Function(
|
|
132
|
-
IntType(), [IntType()],
|
|
133
|
-
init_fn=SumAggregator.make_aggregator, update_fn=SumAggregator.update, value_fn=SumAggregator.value)
|
|
272
|
+
def skip_test_if_not_installed(package) -> None:
|
|
273
|
+
if not Env.get().is_installed_package(package):
|
|
274
|
+
pytest.skip(f'Package `{package}` is not installed.')
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import pathlib
|
|
6
|
+
import subprocess
|
|
7
|
+
|
|
8
|
+
import pgserver
|
|
9
|
+
import toml
|
|
10
|
+
|
|
11
|
+
import pixeltable as pxt
|
|
12
|
+
import pixeltable.metadata as metadata
|
|
13
|
+
from pixeltable.env import Env
|
|
14
|
+
from pixeltable.type_system import \
|
|
15
|
+
StringType, IntType, FloatType, BoolType, TimestampType, JsonType
|
|
16
|
+
|
|
17
|
+
_logger = logging.getLogger('pixeltable')
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Dumper:
|
|
21
|
+
|
|
22
|
+
def __init__(self, output_dir='target', db_name='pxtdump') -> None:
|
|
23
|
+
self.output_dir = pathlib.Path(output_dir)
|
|
24
|
+
shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '~/.pixeltable')).expanduser()
|
|
25
|
+
mock_home_dir = self.output_dir / '.pixeltable'
|
|
26
|
+
mock_home_dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
|
|
28
|
+
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.yaml')
|
|
29
|
+
os.environ['PIXELTABLE_DB'] = db_name
|
|
30
|
+
os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
|
|
31
|
+
|
|
32
|
+
Env.get().set_up(reinit_db=True)
|
|
33
|
+
self.cl = pxt.Client()
|
|
34
|
+
self.cl.logging(level=logging.DEBUG, to_stdout=True)
|
|
35
|
+
|
|
36
|
+
def dump_db(self) -> None:
|
|
37
|
+
md_version = metadata.VERSION
|
|
38
|
+
dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
|
|
39
|
+
_logger.info(f'Creating database dump at: {dump_file}')
|
|
40
|
+
pg_package_dir = os.path.dirname(pgserver.__file__)
|
|
41
|
+
pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
|
|
42
|
+
_logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
|
|
43
|
+
with open(dump_file, 'wb') as dump:
|
|
44
|
+
pg_dump_process = subprocess.Popen(
|
|
45
|
+
[pg_dump_binary, Env.get().db_url, '-U', 'postgres', '-Fc'],
|
|
46
|
+
stdout=subprocess.PIPE
|
|
47
|
+
)
|
|
48
|
+
subprocess.run(
|
|
49
|
+
["gzip", "-9"],
|
|
50
|
+
stdin=pg_dump_process.stdout,
|
|
51
|
+
stdout=dump,
|
|
52
|
+
check=True
|
|
53
|
+
)
|
|
54
|
+
info_file = self.output_dir / f'pixeltable-v{md_version:03d}-test-info.toml'
|
|
55
|
+
git_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip()
|
|
56
|
+
user = os.environ.get('USER', os.environ.get('USERNAME'))
|
|
57
|
+
info_dict = {'pixeltable-dump': {
|
|
58
|
+
'metadata-version': md_version,
|
|
59
|
+
'git-sha': git_sha,
|
|
60
|
+
'datetime': datetime.datetime.utcnow(),
|
|
61
|
+
'user': user
|
|
62
|
+
}}
|
|
63
|
+
with open(info_file, 'w') as info:
|
|
64
|
+
toml.dump(info_dict, info)
|
|
65
|
+
|
|
66
|
+
# TODO: Add additional features to the test DB dump (ideally it should exercise
|
|
67
|
+
# every major pixeltable DB feature)
|
|
68
|
+
def create_tables(self) -> None:
|
|
69
|
+
schema = {
|
|
70
|
+
'c1': StringType(nullable=False),
|
|
71
|
+
'c1n': StringType(nullable=True),
|
|
72
|
+
'c2': IntType(nullable=False),
|
|
73
|
+
'c3': FloatType(nullable=False),
|
|
74
|
+
'c4': BoolType(nullable=False),
|
|
75
|
+
'c5': TimestampType(nullable=False),
|
|
76
|
+
'c6': JsonType(nullable=False),
|
|
77
|
+
'c7': JsonType(nullable=False),
|
|
78
|
+
}
|
|
79
|
+
t = self.cl.create_table('sample_table', schema, primary_key='c2')
|
|
80
|
+
t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
|
|
81
|
+
|
|
82
|
+
# Add columns for .astype converters to ensure they're persisted properly
|
|
83
|
+
t.add_column(c2_as_float=t.c2.astype(FloatType()))
|
|
84
|
+
|
|
85
|
+
# Add columns for .apply
|
|
86
|
+
t.add_column(c2_to_string=t.c2.apply(str))
|
|
87
|
+
t.add_column(c6_to_string=t.c6.apply(json.dumps))
|
|
88
|
+
t.add_column(c6_back_to_json=t.c6_to_string.apply(json.loads))
|
|
89
|
+
|
|
90
|
+
num_rows = 100
|
|
91
|
+
d1 = {
|
|
92
|
+
'f1': 'test string 1',
|
|
93
|
+
'f2': 1,
|
|
94
|
+
'f3': 1.0,
|
|
95
|
+
'f4': True,
|
|
96
|
+
'f5': [1.0, 2.0, 3.0, 4.0],
|
|
97
|
+
'f6': {
|
|
98
|
+
'f7': 'test string 2',
|
|
99
|
+
'f8': [1.0, 2.0, 3.0, 4.0],
|
|
100
|
+
},
|
|
101
|
+
}
|
|
102
|
+
d2 = [d1, d1]
|
|
103
|
+
|
|
104
|
+
c1_data = [f'test string {i}' for i in range(num_rows)]
|
|
105
|
+
c2_data = [i for i in range(num_rows)]
|
|
106
|
+
c3_data = [float(i) for i in range(num_rows)]
|
|
107
|
+
c4_data = [bool(i % 2) for i in range(num_rows)]
|
|
108
|
+
c5_data = [datetime.datetime.now()] * num_rows
|
|
109
|
+
c6_data = []
|
|
110
|
+
for i in range(num_rows):
|
|
111
|
+
d = {
|
|
112
|
+
'f1': f'test string {i}',
|
|
113
|
+
'f2': i,
|
|
114
|
+
'f3': float(i),
|
|
115
|
+
'f4': bool(i % 2),
|
|
116
|
+
'f5': [1.0, 2.0, 3.0, 4.0],
|
|
117
|
+
'f6': {
|
|
118
|
+
'f7': 'test string 2',
|
|
119
|
+
'f8': [1.0, 2.0, 3.0, 4.0],
|
|
120
|
+
},
|
|
121
|
+
}
|
|
122
|
+
c6_data.append(d)
|
|
123
|
+
|
|
124
|
+
c7_data = [d2] * num_rows
|
|
125
|
+
rows = [
|
|
126
|
+
{
|
|
127
|
+
'c1': c1_data[i],
|
|
128
|
+
'c1n': c1_data[i] if i % 10 != 0 else None,
|
|
129
|
+
'c2': c2_data[i],
|
|
130
|
+
'c3': c3_data[i],
|
|
131
|
+
'c4': c4_data[i],
|
|
132
|
+
'c5': c5_data[i],
|
|
133
|
+
'c6': c6_data[i],
|
|
134
|
+
'c7': c7_data[i],
|
|
135
|
+
}
|
|
136
|
+
for i in range(num_rows)
|
|
137
|
+
]
|
|
138
|
+
t.insert(rows)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def main() -> None:
|
|
142
|
+
_logger.info("Creating pixeltable test artifact.")
|
|
143
|
+
dumper = Dumper()
|
|
144
|
+
dumper.create_tables()
|
|
145
|
+
dumper.dump_db()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
if __name__ == "__main__":
|
|
149
|
+
main()
|