pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (139) hide show
  1. pixeltable/__init__.py +34 -6
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -30
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -45
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -87
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1085 -262
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -126
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.1.dist-info/METADATA +0 -31
  139. pixeltable-0.1.1.dist-info/RECORD +0 -36
pixeltable/tests/utils.py CHANGED
@@ -1,16 +1,22 @@
1
1
  import datetime
2
2
  import glob
3
+ import json
3
4
  import os
4
5
  from pathlib import Path
5
- from typing import Dict, Any, List
6
+ from typing import Dict, Any, List, Optional
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
10
+ import pytest
9
11
 
10
- import pixeltable as pt
12
+ import pixeltable as pxt
13
+ import pixeltable.type_system as ts
11
14
  from pixeltable import catalog
12
- from pixeltable.type_system import ColumnType, StringType, IntType, FloatType, BoolType, TimestampType
13
- from pixeltable.function import Function
15
+ from pixeltable.dataframe import DataFrameResultSet
16
+ from pixeltable.env import Env
17
+ from pixeltable.type_system import \
18
+ ColumnType, StringType, IntType, FloatType, ArrayType, BoolType, TimestampType, JsonType, ImageType, VideoType
19
+
14
20
 
15
21
  def make_default_type(t: ColumnType.Type) -> ColumnType:
16
22
  if t == ColumnType.Type.STRING:
@@ -25,14 +31,19 @@ def make_default_type(t: ColumnType.Type) -> ColumnType:
25
31
  return TimestampType()
26
32
  assert False
27
33
 
28
- def make_tbl(db: pt.Db, name: str = 'test', col_names: List[str] = ['c1']) -> pt.MutableTable:
29
- schema: List[catalog.Column] = []
34
+ def make_tbl(cl: pxt.Client, name: str = 'test', col_names: Optional[List[str]] = None) -> catalog.InsertableTable:
35
+ if col_names is None:
36
+ col_names = ['c1']
37
+ schema: Dict[str, ts.ColumnType] = {}
30
38
  for i, col_name in enumerate(col_names):
31
- schema.append(catalog.Column(f'{col_name}', make_default_type(ColumnType.Type(i % 5))))
32
- return db.create_table(name, schema)
39
+ schema[f'{col_name}'] = make_default_type(ColumnType.Type(i % 5))
40
+ return cl.create_table(name, schema)
33
41
 
34
- def create_table_data(t: catalog.Table, col_names: List[str] = [], num_rows: int = 10) -> pd.DataFrame:
42
+ def create_table_data(t: catalog.Table, col_names: Optional[List[str]] = None, num_rows: int = 10) -> List[Dict[str, Any]]:
43
+ if col_names is None:
44
+ col_names = []
35
45
  data: Dict[str, Any] = {}
46
+
36
47
  sample_dict = {
37
48
  'detections': [{
38
49
  'id': '637e8e073b28441a453564cf',
@@ -70,38 +81,138 @@ def create_table_data(t: catalog.Table, col_names: List[str] = [], num_rows: int
70
81
  }
71
82
 
72
83
  if len(col_names) == 0:
73
- col_names = [c.name for c in t.columns]
84
+ col_names = [c.name for c in t.columns() if not c.is_computed]
74
85
 
86
+ col_types = t.column_types()
75
87
  for col_name in col_names:
76
- col = t.cols_by_name[col_name]
88
+ col_type = col_types[col_name]
77
89
  col_data: Any = None
78
- if col.col_type.is_string_type():
90
+ if col_type.is_string_type():
79
91
  col_data = ['test string'] * num_rows
80
- if col.col_type.is_int_type():
81
- col_data = np.random.randint(0, 100, size=num_rows)
82
- if col.col_type.is_float_type():
83
- col_data = np.random.random(size=num_rows) * 100
84
- if col.col_type.is_bool_type():
92
+ if col_type.is_int_type():
93
+ col_data = np.random.randint(0, 100, size=num_rows).tolist()
94
+ if col_type.is_float_type():
95
+ col_data = (np.random.random(size=num_rows) * 100).tolist()
96
+ if col_type.is_bool_type():
85
97
  col_data = np.random.randint(0, 2, size=num_rows)
86
98
  col_data = [False if i == 0 else True for i in col_data]
87
- if col.col_type.is_timestamp_type():
88
- col_data = datetime.datetime.now()
89
- if col.col_type.is_json_type():
99
+ if col_type.is_timestamp_type():
100
+ col_data = [datetime.datetime.now()] * num_rows
101
+ if col_type.is_json_type():
90
102
  col_data = [sample_dict] * num_rows
91
- # TODO: implement this
92
- assert not col.col_type.is_image_type()
93
- assert not col.col_type.is_array_type()
94
- data[col.name] = col_data
95
- return pd.DataFrame(data=data)
103
+ if col_type.is_array_type():
104
+ col_data = [np.ones(col_type.shape, dtype=col_type.numpy_dtype()) for i in range(num_rows)]
105
+ if col_type.is_image_type():
106
+ image_path = get_image_files()[0]
107
+ col_data = [image_path for i in range(num_rows)]
108
+ if col_type.is_video_type():
109
+ video_path = get_video_files()[0]
110
+ col_data = [video_path for i in range(num_rows)]
111
+ data[col_name] = col_data
112
+ rows = [{col_name: data[col_name][i] for col_name in col_names} for i in range(num_rows)]
113
+ return rows
114
+
115
+ def create_test_tbl(client: pxt.Client, name: str = 'test_tbl') -> catalog.Table:
116
+ schema = {
117
+ 'c1': StringType(nullable=False),
118
+ 'c1n': StringType(nullable=True),
119
+ 'c2': IntType(nullable=False),
120
+ 'c3': FloatType(nullable=False),
121
+ 'c4': BoolType(nullable=False),
122
+ 'c5': TimestampType(nullable=False),
123
+ 'c6': JsonType(nullable=False),
124
+ 'c7': JsonType(nullable=False),
125
+ }
126
+ t = client.create_table(name, schema, primary_key='c2')
127
+ t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
128
+
129
+ num_rows = 100
130
+ d1 = {
131
+ 'f1': 'test string 1',
132
+ 'f2': 1,
133
+ 'f3': 1.0,
134
+ 'f4': True,
135
+ 'f5': [1.0, 2.0, 3.0, 4.0],
136
+ 'f6': {
137
+ 'f7': 'test string 2',
138
+ 'f8': [1.0, 2.0, 3.0, 4.0],
139
+ },
140
+ }
141
+ d2 = [d1, d1]
142
+
143
+ c1_data = [f'test string {i}' for i in range(num_rows)]
144
+ c2_data = [i for i in range(num_rows)]
145
+ c3_data = [float(i) for i in range(num_rows)]
146
+ c4_data = [bool(i % 2) for i in range(num_rows)]
147
+ c5_data = [datetime.datetime.now()] * num_rows
148
+ c6_data = []
149
+ for i in range(num_rows):
150
+ d = {
151
+ 'f1': f'test string {i}',
152
+ 'f2': i,
153
+ 'f3': float(i),
154
+ 'f4': bool(i % 2),
155
+ 'f5': [1.0, 2.0, 3.0, 4.0],
156
+ 'f6': {
157
+ 'f7': 'test string 2',
158
+ 'f8': [1.0, 2.0, 3.0, 4.0],
159
+ },
160
+ }
161
+ c6_data.append(d)
162
+
163
+ c7_data = [d2] * num_rows
164
+ rows = [
165
+ {
166
+ 'c1': c1_data[i],
167
+ 'c1n': c1_data[i] if i % 10 != 0 else None,
168
+ 'c2': c2_data[i],
169
+ 'c3': c3_data[i],
170
+ 'c4': c4_data[i],
171
+ 'c5': c5_data[i],
172
+ 'c6': c6_data[i],
173
+ 'c7': c7_data[i],
174
+ }
175
+ for i in range(num_rows)
176
+ ]
177
+ t.insert(rows)
178
+ return t
96
179
 
97
- def read_data_file(dir_name: str, file_name: str, path_col_names: List[str] = []) -> pd.DataFrame:
180
+ def create_all_datatypes_tbl(test_client: pxt.Client) -> catalog.Table:
181
+ """ Creates a table with all supported datatypes.
182
+ """
183
+ schema = {
184
+ 'row_id': IntType(nullable=False), # used for row selection
185
+ 'c_array': ArrayType(shape=(10,), dtype=FloatType(), nullable=True),
186
+ 'c_bool': BoolType(nullable=True),
187
+ 'c_float': FloatType(nullable=True),
188
+ 'c_image': ImageType(nullable=True),
189
+ 'c_int': IntType(nullable=True),
190
+ 'c_json': JsonType(nullable=True),
191
+ 'c_string': StringType(nullable=True),
192
+ 'c_timestamp': TimestampType(nullable=True),
193
+ 'c_video': VideoType(nullable=True),
194
+ }
195
+ tbl = test_client.create_table('all_datatype_tbl', schema)
196
+ example_rows = create_table_data(tbl, num_rows=11)
197
+
198
+ for i,r in enumerate(example_rows):
199
+ r['row_id'] = i # row_id
200
+
201
+ tbl.insert(example_rows)
202
+ return tbl
203
+
204
+ def read_data_file(dir_name: str, file_name: str, path_col_names: Optional[List[str]] = None) -> List[Dict[str, Any]]:
98
205
  """
99
206
  Locate dir_name, create df out of file_name.
100
- transform columns 'file_name' to column 'file_path' with absolute paths
101
207
  path_col_names: col names in csv file that contain file names; those will be converted to absolute paths
102
208
  by adding the path to 'file_name' as a prefix.
209
+ Returns:
210
+ tuple of (list of rows, list of column names)
103
211
  """
104
- glob_result = glob.glob(f'{os.getcwd()}/**/{dir_name}', recursive=True)
212
+ if path_col_names is None:
213
+ path_col_names = []
214
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
215
+ glob_result = glob.glob(f'{tests_dir}/**/{dir_name}', recursive=True)
105
216
  assert len(glob_result) == 1, f'Could not find {dir_name}'
106
217
  abs_path = Path(glob_result[0])
107
218
  data_file_path = abs_path / file_name
@@ -110,24 +221,54 @@ def read_data_file(dir_name: str, file_name: str, path_col_names: List[str] = []
110
221
  for col_name in path_col_names:
111
222
  assert col_name in df.columns
112
223
  df[col_name] = df.apply(lambda r: str(abs_path / r[col_name]), axis=1)
113
- return df
224
+ return df.to_dict(orient='records')
225
+
226
+ def get_video_files(include_bad_video=False) -> List[str]:
227
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
228
+ glob_result = glob.glob(f'{tests_dir}/**/videos/*', recursive=True)
229
+ if not include_bad_video:
230
+ glob_result = [f for f in glob_result if 'bad_video' not in f]
231
+ return glob_result
114
232
 
115
- def get_video_files() -> List[str]:
116
- glob_result = glob.glob(f'{os.getcwd()}/**/videos/*.mp4', recursive=True)
233
+ def get_image_files() -> List[str]:
234
+ tests_dir = os.path.dirname(__file__) # search with respect to tests/ dir
235
+ glob_result = glob.glob(f'{tests_dir}/**/imagenette2-160/*', recursive=True)
117
236
  return glob_result
118
237
 
238
+ def get_audio_files(include_bad_audio=False) -> List[str]:
239
+ tests_dir = os.path.dirname(__file__)
240
+ glob_result = glob.glob(f'{tests_dir}/**/audio/*', recursive=True)
241
+ if not include_bad_audio:
242
+ glob_result = [f for f in glob_result if 'bad_audio' not in f]
243
+ return glob_result
244
+
245
+ def get_documents() -> List[str]:
246
+ tests_dir = os.path.dirname(__file__)
247
+ # for now, we can only handle .html and .md
248
+ return [p for p in glob.glob(f'{tests_dir}/**/documents/*', recursive=True) if not p.endswith('.pdf')]
249
+
250
+ def get_sentences(n: int = 100) -> List[str]:
251
+ tests_dir = os.path.dirname(__file__)
252
+ path = glob.glob(f'{tests_dir}/**/jeopardy.json', recursive=True)[0]
253
+ with open(path, 'r') as f:
254
+ questions_list = json.load(f)
255
+ # this dataset contains \' around the questions
256
+ return [q['question'].replace("'", '') for q in questions_list[:n]]
257
+
258
+ def assert_resultset_eq(r1: DataFrameResultSet, r2: DataFrameResultSet) -> None:
259
+ assert len(r1) == len(r2)
260
+ assert len(r1.column_names()) == len(r2.column_names()) # we don't care about the actual column names
261
+ r1_pd = r1.to_pandas()
262
+ r2_pd = r2.to_pandas()
263
+ for i in range(len(r1.column_names())):
264
+ # only compare column values
265
+ s1 = r1_pd.iloc[:, i]
266
+ s2 = r2_pd.iloc[:, i]
267
+ if s1.dtype == np.float64:
268
+ assert np.allclose(s1, s2)
269
+ else:
270
+ assert s1.equals(s2)
119
271
 
120
- class SumAggregator:
121
- def __init__(self):
122
- self.sum = 0
123
- @classmethod
124
- def make_aggregator(cls) -> 'SumAggregator':
125
- return cls()
126
- def update(self, val: int) -> None:
127
- self.sum += val
128
- def value(self) -> int:
129
- return self.sum
130
-
131
- sum_uda = Function(
132
- IntType(), [IntType()],
133
- init_fn=SumAggregator.make_aggregator, update_fn=SumAggregator.update, value_fn=SumAggregator.value)
272
+ def skip_test_if_not_installed(package) -> None:
273
+ if not Env.get().is_installed_package(package):
274
+ pytest.skip(f'Package `{package}` is not installed.')
@@ -0,0 +1,149 @@
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import os
5
+ import pathlib
6
+ import subprocess
7
+
8
+ import pgserver
9
+ import toml
10
+
11
+ import pixeltable as pxt
12
+ import pixeltable.metadata as metadata
13
+ from pixeltable.env import Env
14
+ from pixeltable.type_system import \
15
+ StringType, IntType, FloatType, BoolType, TimestampType, JsonType
16
+
17
+ _logger = logging.getLogger('pixeltable')
18
+
19
+
20
+ class Dumper:
21
+
22
+ def __init__(self, output_dir='target', db_name='pxtdump') -> None:
23
+ self.output_dir = pathlib.Path(output_dir)
24
+ shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '~/.pixeltable')).expanduser()
25
+ mock_home_dir = self.output_dir / '.pixeltable'
26
+ mock_home_dir.mkdir(parents=True, exist_ok=True)
27
+ os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
28
+ os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.yaml')
29
+ os.environ['PIXELTABLE_DB'] = db_name
30
+ os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
31
+
32
+ Env.get().set_up(reinit_db=True)
33
+ self.cl = pxt.Client()
34
+ self.cl.logging(level=logging.DEBUG, to_stdout=True)
35
+
36
+ def dump_db(self) -> None:
37
+ md_version = metadata.VERSION
38
+ dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
39
+ _logger.info(f'Creating database dump at: {dump_file}')
40
+ pg_package_dir = os.path.dirname(pgserver.__file__)
41
+ pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
42
+ _logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
43
+ with open(dump_file, 'wb') as dump:
44
+ pg_dump_process = subprocess.Popen(
45
+ [pg_dump_binary, Env.get().db_url, '-U', 'postgres', '-Fc'],
46
+ stdout=subprocess.PIPE
47
+ )
48
+ subprocess.run(
49
+ ["gzip", "-9"],
50
+ stdin=pg_dump_process.stdout,
51
+ stdout=dump,
52
+ check=True
53
+ )
54
+ info_file = self.output_dir / f'pixeltable-v{md_version:03d}-test-info.toml'
55
+ git_sha = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip()
56
+ user = os.environ.get('USER', os.environ.get('USERNAME'))
57
+ info_dict = {'pixeltable-dump': {
58
+ 'metadata-version': md_version,
59
+ 'git-sha': git_sha,
60
+ 'datetime': datetime.datetime.utcnow(),
61
+ 'user': user
62
+ }}
63
+ with open(info_file, 'w') as info:
64
+ toml.dump(info_dict, info)
65
+
66
+ # TODO: Add additional features to the test DB dump (ideally it should exercise
67
+ # every major pixeltable DB feature)
68
+ def create_tables(self) -> None:
69
+ schema = {
70
+ 'c1': StringType(nullable=False),
71
+ 'c1n': StringType(nullable=True),
72
+ 'c2': IntType(nullable=False),
73
+ 'c3': FloatType(nullable=False),
74
+ 'c4': BoolType(nullable=False),
75
+ 'c5': TimestampType(nullable=False),
76
+ 'c6': JsonType(nullable=False),
77
+ 'c7': JsonType(nullable=False),
78
+ }
79
+ t = self.cl.create_table('sample_table', schema, primary_key='c2')
80
+ t.add_column(c8=[[1, 2, 3], [4, 5, 6]])
81
+
82
+ # Add columns for .astype converters to ensure they're persisted properly
83
+ t.add_column(c2_as_float=t.c2.astype(FloatType()))
84
+
85
+ # Add columns for .apply
86
+ t.add_column(c2_to_string=t.c2.apply(str))
87
+ t.add_column(c6_to_string=t.c6.apply(json.dumps))
88
+ t.add_column(c6_back_to_json=t.c6_to_string.apply(json.loads))
89
+
90
+ num_rows = 100
91
+ d1 = {
92
+ 'f1': 'test string 1',
93
+ 'f2': 1,
94
+ 'f3': 1.0,
95
+ 'f4': True,
96
+ 'f5': [1.0, 2.0, 3.0, 4.0],
97
+ 'f6': {
98
+ 'f7': 'test string 2',
99
+ 'f8': [1.0, 2.0, 3.0, 4.0],
100
+ },
101
+ }
102
+ d2 = [d1, d1]
103
+
104
+ c1_data = [f'test string {i}' for i in range(num_rows)]
105
+ c2_data = [i for i in range(num_rows)]
106
+ c3_data = [float(i) for i in range(num_rows)]
107
+ c4_data = [bool(i % 2) for i in range(num_rows)]
108
+ c5_data = [datetime.datetime.now()] * num_rows
109
+ c6_data = []
110
+ for i in range(num_rows):
111
+ d = {
112
+ 'f1': f'test string {i}',
113
+ 'f2': i,
114
+ 'f3': float(i),
115
+ 'f4': bool(i % 2),
116
+ 'f5': [1.0, 2.0, 3.0, 4.0],
117
+ 'f6': {
118
+ 'f7': 'test string 2',
119
+ 'f8': [1.0, 2.0, 3.0, 4.0],
120
+ },
121
+ }
122
+ c6_data.append(d)
123
+
124
+ c7_data = [d2] * num_rows
125
+ rows = [
126
+ {
127
+ 'c1': c1_data[i],
128
+ 'c1n': c1_data[i] if i % 10 != 0 else None,
129
+ 'c2': c2_data[i],
130
+ 'c3': c3_data[i],
131
+ 'c4': c4_data[i],
132
+ 'c5': c5_data[i],
133
+ 'c6': c6_data[i],
134
+ 'c7': c7_data[i],
135
+ }
136
+ for i in range(num_rows)
137
+ ]
138
+ t.insert(rows)
139
+
140
+
141
+ def main() -> None:
142
+ _logger.info("Creating pixeltable test artifact.")
143
+ dumper = Dumper()
144
+ dumper.create_tables()
145
+ dumper.dump_db()
146
+
147
+
148
+ if __name__ == "__main__":
149
+ main()