pixeltable 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (139) hide show
  1. pixeltable/__init__.py +34 -6
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -30
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -45
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -87
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1085 -262
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -126
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.1.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.1.dist-info/METADATA +0 -31
  139. pixeltable-0.1.1.dist-info/RECORD +0 -36
pixeltable/catalog.py DELETED
@@ -1,1421 +0,0 @@
1
- from typing import Optional, List, Set, Dict, Any, Type, Union, Callable
2
- import re
3
- import inspect
4
- import io
5
- import os
6
- import dataclasses
7
-
8
- import PIL, cv2
9
- import numpy as np
10
- from PIL import Image
11
- from tqdm.autonotebook import tqdm
12
- import pathlib
13
-
14
- import pandas as pd
15
- import sqlalchemy as sql
16
- import sqlalchemy.orm as orm
17
-
18
- from pixeltable import store
19
- from pixeltable.env import Env
20
- from pixeltable import exceptions as exc
21
- from pixeltable.type_system import ColumnType
22
- from pixeltable.utils import clip, video
23
- from pixeltable import utils
24
- from pixeltable.index import VectorIndex
25
- from pixeltable.function import Function, FunctionRegistry
26
-
27
-
28
- _ID_RE = r'[a-zA-Z]\w*'
29
- _PATH_RE = f'{_ID_RE}(\\.{_ID_RE})*'
30
-
31
-
32
- class Column:
33
- def __init__(
34
- self, name: str, col_type: Optional[ColumnType] = None,
35
- computed_with: Optional[Union['Expr', Callable]] = None,
36
- primary_key: bool = False, nullable: bool = True, col_id: Optional[int] = None,
37
- value_expr_str: Optional[str] = None, indexed: bool = False):
38
- """
39
- Computed columns: those have a non-None computed_with argument
40
- - when constructed by the user: 'computed_with' was constructed explicitly and is passed in;
41
- 'value_expr_str' is None and col_type is None
42
- - when loaded from store: 'value_expr_str' is the serialized form and col_type is set;
43
- 'computed_with' is None
44
- Computed_with is a Callable:
45
- - the callable's parameter names must correspond to existing columns in the table for which this Column
46
- is being used
47
- - col_type needs to be set to the callable's return type
48
-
49
- indexed: only valid for image columns; if true, maintains an NN index for this column
50
- """
51
- from pixeltable import exprs
52
- if re.fullmatch(_ID_RE, name) is None:
53
- raise exc.BadFormatError(f"Invalid column name: '{name}'")
54
- self.name = name
55
- if col_type is None and computed_with is None:
56
- raise exc.Error(f'Column {name}: col_type is required if computed_with is not specified')
57
- assert not(value_expr_str is not None and computed_with is not None)
58
-
59
- self.value_expr: Optional['Expr'] = None
60
- self.compute_func: Optional[Callable] = None
61
- if computed_with is not None:
62
- value_expr = exprs.Expr.from_object(computed_with)
63
- if value_expr is None:
64
- # computed_with needs to be a Callable
65
- if not isinstance(computed_with, Callable):
66
- raise exc.Error(
67
- f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
68
- f'but it is a {type(computed_with)}')
69
- if col_type is None:
70
- raise exc.Error(f'Column {name}: col_type is required if computed_with is a Callable')
71
- # we need to turn the computed_with function into an Expr, but this requires resolving
72
- # column name references and for that we need to wait until we're assigned to a Table
73
- self.compute_func = computed_with
74
- else:
75
- self.value_expr = value_expr.copy()
76
- self.col_type = self.value_expr.col_type
77
-
78
- if col_type is not None:
79
- self.col_type = col_type
80
- assert self.col_type is not None
81
-
82
- self.value_expr_str = value_expr_str # stored here so it's easily accessible for the Table c'tor
83
- self.dependent_cols: List[Column] = [] # cols with value_exprs that reference us
84
- self.id = col_id
85
- self.primary_key = primary_key
86
- # computed cols are always nullable
87
- self.nullable = nullable or computed_with is not None or value_expr_str is not None
88
- self.sa_col: Optional[sql.schema.Column] = None
89
-
90
- if indexed and not self.col_type.is_image_type():
91
- raise exc.Error(f'Column {name}: indexed=True requires ImageType')
92
- self.is_indexed = indexed
93
- self.idx: Optional[VectorIndex] = None
94
-
95
- def to_sql(self) -> str:
96
- return f'{self.storage_name()} {self.col_type.to_sql()}'
97
-
98
- @property
99
- def is_computed(self) -> bool:
100
- return self.compute_func is not None or self.value_expr is not None
101
-
102
- def create_sa_col(self) -> None:
103
- """
104
- This needs to be recreated for every new table schema version.
105
- """
106
- self.sa_col = sql.Column(self.storage_name(), self.col_type.to_sa_type(), nullable=self.nullable)
107
-
108
- def set_idx(self, idx: VectorIndex) -> None:
109
- self.idx = idx
110
-
111
- def storage_name(self) -> str:
112
- assert self.id is not None
113
- return f'col_{self.id}'
114
-
115
- def __str__(self) -> str:
116
- return f'{self.name}: {self.col_type}'
117
-
118
- def __eq__(self, other: object) -> bool:
119
- if not isinstance(other, Column):
120
- return False
121
- if self.sa_col is None or other.sa_col is None:
122
- return False
123
- # if they point to the same table column, they're the same
124
- return str(self.sa_col) == str(other.sa_col)
125
-
126
-
127
- # base class of all addressable objects within a Db
128
- class SchemaObject:
129
- def __init__(self, obj_id: int):
130
- self.id = obj_id
131
-
132
- @classmethod
133
- def display_name(cls) -> str:
134
- """
135
- Return name displayed in error messages.
136
- """
137
- assert False
138
- return ''
139
-
140
-
141
- class DirBase(SchemaObject):
142
- def __init__(self, dir_id: int):
143
- super().__init__(dir_id)
144
-
145
- @classmethod
146
- def display_name(cls) -> str:
147
- return 'directory'
148
-
149
-
150
- # contains only MutableTables
151
- class Dir(DirBase):
152
- def __init__(self, dir_id: int):
153
- super().__init__(dir_id)
154
-
155
-
156
- # contains only TableSnapshots
157
- class SnapshotDir(DirBase):
158
- def __init__(self, dir_id: int):
159
- super().__init__(dir_id)
160
-
161
-
162
- class NamedFunction(SchemaObject):
163
- """
164
- Contains references to functions that are named and have a path within a db.
165
- The Function itself is stored in the FunctionRegistry.
166
- """
167
- def __init__(self, id: int, dir_id: int, name: str):
168
- super().__init__(id)
169
- self.dir_id = dir_id
170
- self.name = name
171
-
172
-
173
- class Table(SchemaObject):
174
- #def __init__(self, tbl_record: store.Table, schema: List[Column]):
175
- def __init__(
176
- self, db_id: int, tbl_id: int, dir_id: int, name: str, version: int, cols: List[Column]):
177
- super().__init__(tbl_id)
178
- self.db_id = db_id
179
- self.dir_id = dir_id
180
- # TODO: this will be out-of-date after a rename()
181
- self.name = name
182
- for pos, col in enumerate(cols):
183
- if re.fullmatch(_ID_RE, col.name) is None:
184
- raise exc.BadFormatError(f"Invalid column name: '{col.name}'")
185
- assert col.id is not None
186
- self.cols = cols
187
- self.cols_by_name = {col.name: col for col in cols}
188
- self.cols_by_id = {col.id: col for col in cols}
189
- self.version = version
190
-
191
- # we can't call _load_valid_rowids() here because the storage table may not exist yet
192
- self.valid_rowids: Set[int] = set()
193
-
194
- # sqlalchemy-related metadata; used to insert and query the storage table
195
- self.sa_md = sql.MetaData()
196
- self._create_sa_tbl()
197
- self.is_dropped = False
198
-
199
- # make sure to traverse columns ordered by position = order in which cols were created;
200
- # this guarantees that references always point backwards
201
- for col in self.cols:
202
- if col.value_expr is not None or col.value_expr_str is not None:
203
- self._record_value_expr(col)
204
-
205
- def _record_value_expr(self, col: Column) -> None:
206
- """
207
- Update Column.dependent_cols for all cols referenced in col.value_expr.
208
- Creates col.value_expr if it doesn't exist yet.
209
- """
210
- from pixeltable.exprs import Expr, ColumnRef
211
- if col.value_expr is None:
212
- assert col.value_expr_str is not None
213
- col.value_expr = Expr.deserialize(col.value_expr_str, self)
214
-
215
- refd_col_ids = [e.col.id for e in col.value_expr.subexprs() if isinstance(e, ColumnRef)]
216
- refd_cols = [self.cols_by_id[id] for id in refd_col_ids]
217
- for refd_col in refd_cols:
218
- refd_col.dependent_cols.append(col)
219
-
220
- def _load_valid_rowids(self) -> None:
221
- if not any(col.col_type.is_image_type() for col in self.cols):
222
- return
223
- stmt = sql.select(self.rowid_col) \
224
- .where(self.v_min_col <= self.version) \
225
- .where(self.v_max_col > self.version)
226
- with Env.get().engine.begin() as conn:
227
- rows = conn.execute(stmt)
228
- for row in rows:
229
- rowid = row[0]
230
- self.valid_rowids.add(rowid)
231
-
232
- def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
233
- if col_name not in self.cols_by_name:
234
- raise AttributeError(f'Column {col_name} unknown')
235
- col = self.cols_by_name[col_name]
236
- from pixeltable.exprs import ColumnRef
237
- return ColumnRef(col)
238
-
239
- def __getitem__(self, index: object) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame']:
240
- if isinstance(index, str):
241
- # basically <tbl>.<colname>
242
- return self.__getattr__(index)
243
- from pixeltable.dataframe import DataFrame
244
- return DataFrame(self).__getitem__(index)
245
-
246
- def df(self) -> 'pixeltable.dataframe.DataFrame':
247
- # local import: avoid circular imports
248
- from pixeltable.dataframe import DataFrame
249
- return DataFrame(self)
250
-
251
- def show(self, *args, **kwargs) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
252
- return self.df().show(*args, **kwargs)
253
-
254
- def count(self) -> int:
255
- return self.df().count()
256
-
257
- @property
258
- def columns(self) -> List[Column]:
259
- return self.cols
260
-
261
- def storage_name(self) -> str:
262
- return f'tbl_{self.id}'
263
-
264
- def _check_is_dropped(self) -> None:
265
- if self.is_dropped:
266
- raise exc.OperationalError('Table has been dropped')
267
-
268
- def _create_sa_tbl(self) -> None:
269
- self.rowid_col = sql.Column('rowid', sql.BigInteger, nullable=False)
270
- self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
271
- self.v_max_col = \
272
- sql.Column('v_max', sql.BigInteger, nullable=False, server_default=str(store.Table.MAX_VERSION))
273
- sa_cols = [self.rowid_col, self.v_min_col, self.v_max_col]
274
- # re-create sql.Columns for each column, regardless of whether it already has sa_col set: it was bound
275
- # to the last sql.Table version we created and cannot be reused
276
- for col in self.cols:
277
- col.create_sa_col()
278
- sa_cols.extend([col.sa_col for col in self.cols])
279
- if hasattr(self, 'sa_tbl'):
280
- self.sa_md.remove(self.sa_tbl)
281
- self.sa_tbl = sql.Table(self.storage_name(), self.sa_md, *sa_cols)
282
-
283
- @classmethod
284
- def _vector_idx_name(cls, tbl_id: int, col: Column) -> str:
285
- return f'{tbl_id}_{col.id}'
286
-
287
- # MODULE-LOCAL, NOT PUBLIC
288
- @classmethod
289
- def load_cols(cls, tbl_id: int, schema_version: int, session: orm.Session) -> List[Column]:
290
- """
291
- Returns loaded cols.
292
- """
293
- col_records = session.query(store.SchemaColumn) \
294
- .where(store.SchemaColumn.tbl_id == tbl_id) \
295
- .where(store.SchemaColumn.schema_version == schema_version) \
296
- .order_by(store.SchemaColumn.pos.asc()).all()
297
- cols = [
298
- Column(
299
- r.name, ColumnType.deserialize(r.col_type), primary_key=r.is_pk, nullable=r.is_nullable,
300
- col_id=r.col_id, value_expr_str=r.value_expr, indexed=r.is_indexed)
301
- for r in col_records
302
- ]
303
- for col in [col for col in cols if col.col_type.is_image_type()]:
304
- if col.is_indexed:
305
- col.set_idx(VectorIndex.load(cls._vector_idx_name(tbl_id, col), dim=512))
306
- return cols
307
-
308
-
309
- class TableSnapshot(Table):
310
- def __init__(self, snapshot_record: store.TableSnapshot, cols: List[Column]):
311
- assert snapshot_record.db_id is not None
312
- assert snapshot_record.id is not None
313
- assert snapshot_record.dir_id is not None
314
- assert snapshot_record.name is not None
315
- assert snapshot_record.tbl_version is not None
316
- # the id of this SchemaObject is TableSnapshot.tbl_id, not TableSnapshot.id: we use tbl_id to construct
317
- # the name of the data table
318
- super().__init__(
319
- snapshot_record.db_id, snapshot_record.tbl_id, snapshot_record.dir_id, snapshot_record.name,
320
- snapshot_record.tbl_version, cols)
321
- self.snapshot_tbl_id = snapshot_record.id
322
- # it's safe to call _load_valid_rowids() here because the storage table already exists
323
- self._load_valid_rowids()
324
-
325
- def __repr__(self) -> str:
326
- return f'TableSnapshot(name={self.name})'
327
-
328
- @classmethod
329
- def display_name(cls) -> str:
330
- return 'table snapshot'
331
-
332
- @dataclasses.dataclass
333
- class TableParameters:
334
- # garbage-collect old versions beyond this point, unless they are referenced in a snapshot
335
- num_retained_versions: int
336
-
337
- # parameters for frame extraction
338
- frame_src_col: int # column id
339
- frame_col: int # column id
340
- frame_idx_col: int # column id
341
- extraction_fps: int
342
-
343
-
344
- class MutableTable(Table):
345
- def __init__(self, tbl_record: store.Table, schema_version: int, cols: List[Column]):
346
- assert tbl_record.db_id is not None
347
- assert tbl_record.id is not None
348
- assert tbl_record.dir_id is not None
349
- assert tbl_record.name is not None
350
- assert tbl_record.current_version is not None
351
- super().__init__(
352
- tbl_record.db_id, tbl_record.id, tbl_record.dir_id, tbl_record.name, tbl_record.current_version, cols)
353
- assert tbl_record.next_col_id is not None
354
- self.next_col_id = tbl_record.next_col_id
355
- assert tbl_record.next_row_id is not None
356
- self.next_row_id = tbl_record.next_row_id
357
- self.schema_version = schema_version
358
- self.parameters = TableParameters(**tbl_record.parameters)
359
-
360
- def __repr__(self) -> str:
361
- return f'MutableTable(name={self.name})'
362
-
363
- @classmethod
364
- def display_name(cls) -> str:
365
- return 'table'
366
-
367
- def add_column(self, c: Column) -> None:
368
- self._check_is_dropped()
369
- if re.fullmatch(_ID_RE, c.name) is None:
370
- raise exc.BadFormatError(f"Invalid column name: '{c.name}'")
371
- if c.name in self.cols_by_name:
372
- raise exc.DuplicateNameError(f'Column {c.name} already exists')
373
- assert self.next_col_id is not None
374
- c.id = self.next_col_id
375
- self.next_col_id += 1
376
-
377
- if c.compute_func is not None:
378
- # create value_expr from compute_func
379
- self._create_value_expr(c, self.cols_by_name)
380
- if c.value_expr is not None:
381
- self._record_value_expr(c)
382
-
383
- self.cols.append(c)
384
- self.cols_by_name[c.name] = c
385
- self.cols_by_id[c.id] = c
386
-
387
- # we're creating a new schema version
388
- self.version += 1
389
- preceding_schema_version = self.schema_version
390
- self.schema_version = self.version
391
-
392
- with Env.get().engine.begin() as conn:
393
- conn.execute(
394
- sql.update(store.Table.__table__)
395
- .values({
396
- store.Table.current_version: self.version,
397
- store.Table.current_schema_version: self.schema_version,
398
- store.Table.next_col_id: self.next_col_id
399
- })
400
- .where(store.Table.id == self.id))
401
- conn.execute(
402
- sql.insert(store.TableSchemaVersion.__table__)
403
- .values(
404
- tbl_id=self.id, schema_version=self.schema_version,
405
- preceding_schema_version=preceding_schema_version))
406
- conn.execute(
407
- sql.insert(store.StorageColumn.__table__)
408
- .values(tbl_id=self.id, col_id=c.id, schema_version_add=self.schema_version))
409
- self._create_col_md(conn)
410
- stmt = f'ALTER TABLE {self.storage_name()} ADD COLUMN {c.to_sql()}'
411
- conn.execute(sql.text(stmt))
412
- self._create_sa_tbl()
413
-
414
- if not c.is_computed or self.count() == 0:
415
- return
416
- # backfill the existing rows
417
- from pixeltable.dataframe import DataFrame
418
- # use copy to avoid reusing existing execution state
419
- query = DataFrame(self, [c.value_expr.copy()])
420
- with Env.get().engine.begin() as conn:
421
- with tqdm(total=self.count()) as progress_bar:
422
- for result_row in query.exec(n=0, select_pk=True):
423
- column_val, rowid, v_min = result_row
424
- column_val = self._convert_to_stored(c, column_val, rowid)
425
- conn.execute(
426
- sql.update(self.sa_tbl)
427
- .values({c.sa_col: column_val})
428
- .where(self.rowid_col == rowid)
429
- .where(self.v_min_col == v_min))
430
- progress_bar.update(1)
431
-
432
- def drop_column(self, name: str) -> None:
433
- self._check_is_dropped()
434
- if name not in self.cols_by_name:
435
- raise exc.UnknownEntityError
436
- col = self.cols_by_name[name]
437
- if len(col.dependent_cols) > 0:
438
- raise exc.Error(
439
- f'Cannot drop column {name} because the following columns depend on it:\n',
440
- f'{", ".join([c.name for c in col.dependent_cols])}')
441
- if col.id == self.parameters.frame_col or col.id == self.parameters.frame_idx_col:
442
- src_col_name = self.cols_by_id[self.parameters.frame_src_col].name
443
- raise exc.Error(
444
- f'Cannot drop column {name} because it is used for frame extraction on column {src_col_name}')
445
- if col.id == self.parameters.frame_src_col:
446
- # we also need to reset the frame extraction table parameters
447
- self.parameters.frame_src_col = None
448
- self.parameters.frame_col = None
449
- self.parameters.frame_idx_col = None
450
- self.parameters.extraction_fps = None
451
-
452
- if col.value_expr is not None:
453
- # update Column.dependent_cols
454
- for c in self.cols:
455
- if c == col:
456
- break
457
- try:
458
- c.dependent_cols.remove(col)
459
- except ValueError:
460
- # ignore
461
- pass
462
-
463
- self.cols.remove(col)
464
- del self.cols_by_name[name]
465
- del self.cols_by_id[col.id]
466
-
467
-
468
- # we're creating a new schema version
469
- self.version += 1
470
- preceding_schema_version = self.schema_version
471
- self.schema_version = self.version
472
-
473
- with Env.get().engine.begin() as conn:
474
- conn.execute(
475
- sql.update(store.Table.__table__)
476
- .values({
477
- store.Table.parameters: dataclasses.asdict(self.parameters),
478
- store.Table.current_version: self.version,
479
- store.Table.current_schema_version: self.schema_version
480
- })
481
- .where(store.Table.id == self.id))
482
- conn.execute(
483
- sql.insert(store.TableSchemaVersion.__table__)
484
- .values(
485
- tbl_id=self.id, schema_version=self.schema_version,
486
- preceding_schema_version=preceding_schema_version))
487
- conn.execute(
488
- sql.update(store.StorageColumn.__table__)
489
- .values({store.StorageColumn.schema_version_drop: self.schema_version})
490
- .where(store.StorageColumn.tbl_id == self.id)
491
- .where(store.StorageColumn.col_id == col.id))
492
- self._create_col_md(conn)
493
- self._create_sa_tbl()
494
-
495
- def rename_column(self, old_name: str, new_name: str) -> None:
496
- self._check_is_dropped()
497
- if old_name not in self.cols_by_name:
498
- raise exc.UnknownEntityError(f'Unknown column: {old_name}')
499
- if re.fullmatch(_ID_RE, new_name) is None:
500
- raise exc.BadFormatError(f"Invalid column name: '{new_name}'")
501
- if new_name in self.cols_by_name:
502
- raise exc.DuplicateNameError(f'Column {new_name} already exists')
503
- col = self.cols_by_name[old_name]
504
- del self.cols_by_name[old_name]
505
- col.name = new_name
506
- self.cols_by_name[new_name] = col
507
-
508
- # we're creating a new schema version
509
- self.version += 1
510
- preceding_schema_version = self.schema_version
511
- self.schema_version = self.version
512
-
513
- with Env.get().engine.begin() as conn:
514
- conn.execute(
515
- sql.update(store.Table.__table__)
516
- .values({
517
- store.Table.current_version: self.version,
518
- store.Table.current_schema_version: self.schema_version
519
- })
520
- .where(store.Table.id == self.id))
521
- conn.execute(
522
- sql.insert(store.TableSchemaVersion.__table__)
523
- .values(tbl_id=self.id, schema_version=self.schema_version,
524
- preceding_schema_version=preceding_schema_version))
525
- self._create_col_md(conn)
526
-
527
- def _create_col_md(self, conn: sql.engine.base.Connection) -> None:
528
- for pos, c in enumerate(self.cols):
529
- value_expr_str = c.value_expr.serialize() if c.value_expr is not None else None
530
- conn.execute(
531
- sql.insert(store.SchemaColumn.__table__)
532
- .values(
533
- tbl_id=self.id, schema_version=self.version, col_id=c.id, pos=pos, name=c.name,
534
- col_type=c.col_type.serialize(), is_nullable=c.nullable, is_pk=c.primary_key,
535
- value_expr=value_expr_str, is_indexed=c.is_indexed))
536
-
537
- def _convert_to_stored(self, col: Column, val: Any, rowid: int) -> Any:
538
- """
539
- Convert column value 'val' into a store-compatible format, if needed:
540
- - images are stored as files
541
- - arrays are stored as serialized ndarrays
542
- """
543
- if col.col_type.is_image_type():
544
- # replace PIL.Image.Image with file path
545
- img = val
546
- img_path = utils.get_computed_img_path(self.id, col.id, self.version, rowid)
547
- img.save(img_path)
548
- return str(img_path)
549
- elif col.col_type.is_array_type():
550
- # serialize numpy array
551
- np_array = val
552
- buffer = io.BytesIO()
553
- np.save(buffer, np_array)
554
- return buffer.getvalue()
555
- else:
556
- return val
557
-
558
- def insert_rows(self, rows: List[List[Any]], columns: List[str] = []) -> None:
559
- """
560
- Insert rows into table. 'Columns' is a list of column names that specify the columns present in 'rows'.
561
- 'Columns' == empty: all columns are present in 'rows'.
562
- """
563
- assert len(rows) > 0
564
- if len(rows[0]) != len(self.cols) and len(columns) == 0:
565
- raise exc.Error(
566
- f'Table {self.name} has {len(self.cols)} columns, but the data only contains {len(rows[0])} columns. '
567
- f"In this case, you need to specify the column names with the 'columns' parameter.")
568
-
569
- # make sure that each row contains the same number of values
570
- num_col_vals = len(rows[0])
571
- for i in range(1, len(rows)):
572
- if len(rows[i]) != num_col_vals:
573
- raise exc.Error(
574
- f'Inconsistent number of column values in rows: row 0 has {len(rows[0])}, '
575
- f'row {i} has {len(rows[i])}')
576
-
577
- if len(columns) == 0:
578
- columns = [c.name for c in self.cols]
579
- if len(rows[0]) != len(columns):
580
- raise exc.Error(
581
- f'The number of column values in rows ({len(rows[0])}) does not match the given number of column names '
582
- f'({len(columns)}')
583
-
584
- pd_df = pd.DataFrame.from_records(rows, columns=columns)
585
- self.insert_pandas(pd_df)
586
-
587
- def insert_pandas(self, data: pd.DataFrame) -> None:
588
- """
589
- If self.parameters.frame_src_col != None:
590
- - each row (containing a video) is expanded into one row per extracted frame (at the rate of the fps parameter)
591
- - parameters.frame_col is the image column that receives the extracted frame
592
- - parameters.frame_idx_col is the integer column that receives the frame index (starting at 0)
593
- """
594
- self._check_is_dropped()
595
- all_col_names = {col.name for col in self.cols}
596
- reqd_col_names = {col.name for col in self.cols if not col.nullable and col.value_expr is None}
597
- if self.parameters.frame_src_col is not None:
598
- reqd_col_names.discard(self.cols_by_id[self.parameters.frame_col].name)
599
- reqd_col_names.discard(self.cols_by_id[self.parameters.frame_idx_col].name)
600
- given_col_names = set(data.columns)
601
- if not(reqd_col_names <= given_col_names):
602
- raise exc.InsertError(f'Missing columns: {", ".join(reqd_col_names - given_col_names)}')
603
- if not(given_col_names <= all_col_names):
604
- raise exc.InsertError(f'Unknown columns: {", ".join(given_col_names - all_col_names)}')
605
- computed_col_names = {col.name for col in self.cols if col.value_expr is not None}
606
- if self.parameters.frame_src_col is not None:
607
- computed_col_names.add(self.cols_by_id[self.parameters.frame_col].name)
608
- computed_col_names.add(self.cols_by_id[self.parameters.frame_idx_col].name)
609
- if len(computed_col_names & given_col_names) > 0:
610
- raise exc.InsertError(
611
- f'Provided values for computed columns: {", ".join(computed_col_names & given_col_names)}')
612
-
613
- # check types
614
- provided_cols = [self.cols_by_name[name] for name in data.columns]
615
- for col in provided_cols:
616
- if col.col_type.is_string_type() and not pd.api.types.is_string_dtype(data.dtypes[col.name]):
617
- raise exc.InsertError(f'Column {col.name} requires string data but contains {data.dtypes[col.name]}')
618
- if col.col_type.is_int_type() and not pd.api.types.is_integer_dtype(data.dtypes[col.name]):
619
- raise exc.InsertError(f'Column {col.name} requires integer data but contains {data.dtypes[col.name]}')
620
- if col.col_type.is_float_type() and not pd.api.types.is_numeric_dtype(data.dtypes[col.name]):
621
- raise exc.InsertError(f'Column {col.name} requires numerical data but contains {data.dtypes[col.name]}')
622
- if col.col_type.is_bool_type() and not pd.api.types.is_bool_dtype(data.dtypes[col.name]):
623
- raise exc.InsertError(f'Column {col.name} requires boolean data but contains {data.dtypes[col.name]}')
624
- if col.col_type.is_timestamp_type() and not pd.api.types.is_datetime64_any_dtype(data.dtypes[col.name]):
625
- raise exc.InsertError(f'Column {col.name} requires datetime data but contains {data.dtypes[col.name]}')
626
- if col.col_type.is_json_type() and not pd.api.types.is_object_dtype(data.dtypes[col.name]):
627
- raise exc.InsertError(
628
- f'Column {col.name} requires dictionary data but contains {data.dtypes[col.name]}')
629
- if col.col_type.is_array_type() and not pd.api.types.is_object_dtype(data.dtypes[col.name]):
630
- raise exc.InsertError(
631
- f'Column {col.name} requires array data but contains {data.dtypes[col.name]}')
632
- if col.col_type.is_image_type() and not pd.api.types.is_string_dtype(data.dtypes[col.name]):
633
- raise exc.InsertError(
634
- f'Column {col.name} requires local file paths but contains {data.dtypes[col.name]}')
635
- if col.col_type.is_video_type() and not pd.api.types.is_string_dtype(data.dtypes[col.name]):
636
- raise exc.InsertError(
637
- f'Column {col.name} requires local file paths but contains {data.dtypes[col.name]}')
638
-
639
- # check data
640
- data_cols = [self.cols_by_name[name] for name in data.columns]
641
- for col in data_cols:
642
- # image cols: make sure file path points to a valid image file
643
- if col.col_type.is_image_type():
644
- for _, path_str in data[col.name].items():
645
- try:
646
- _ = Image.open(path_str)
647
- except FileNotFoundError:
648
- raise exc.OperationalError(f'Column {col.name}: file does not exist: {path_str}')
649
- except PIL.UnidentifiedImageError:
650
- raise exc.OperationalError(f'Column {col.name}: not a valid image file: {path_str}')
651
-
652
- # image cols: make sure file path points to a valid image file; build index if col is indexed
653
- if col.col_type.is_video_type():
654
- for _, path_str in data[col.name].items():
655
- cap = cv2.VideoCapture(path_str)
656
- success = cap.isOpened()
657
- cap.release()
658
- if not success:
659
- raise exc.Error(f'Column {col.name}: could not open video file {path_str}')
660
-
661
- if col.col_type.is_json_type():
662
- for idx, d in data[col.name].items():
663
- if not isinstance(d, dict) and not isinstance(d, list):
664
- raise exc.OperationalError(
665
- f'Value for column {col.name} in row {idx} requires a dictionary or list: {d} ')
666
-
667
- # we're creating a new version
668
- self.version += 1
669
-
670
- # frame extraction from videos
671
- if self.parameters.frame_src_col is not None:
672
- video_col = self.cols_by_id[self.parameters.frame_src_col]
673
- frame_col = self.cols_by_id[self.parameters.frame_col]
674
- frame_idx_col = self.cols_by_id[self.parameters.frame_idx_col]
675
-
676
- # check data: video_column needs to contain valid file paths
677
- for idx, path_str in data[video_col.name].items():
678
- path = pathlib.Path(path_str)
679
- if not path.is_file():
680
- raise exc.OperationalError(
681
- f'For frame extraction, value for column {col.name} in row {idx} requires a valid '
682
- f'file path: {path}')
683
-
684
- # expand each row in 'data' into one row per frame, adding columns frame_column and frame_idx_column
685
- expanded_rows: List[Dict] = []
686
- for input_row_idx, input_tuple in enumerate(data.itertuples(index=False)):
687
- input_row = input_tuple._asdict()
688
- path = input_row[video_col.name]
689
- # we need to generate a unique prefix for each set of frames corresponding to a single video
690
- frame_path_prefix = utils.get_extracted_frame_path(
691
- self.id, video_col.id, self.version, self.next_row_id + input_row_idx)
692
- frame_paths = video.extract_frames(path, frame_path_prefix, self.parameters.extraction_fps)
693
- frame_rows = [
694
- {frame_col.name: p, frame_idx_col.name: i, **input_row} for i, p in enumerate(frame_paths)
695
- ]
696
- expanded_rows.extend(frame_rows)
697
- data = pd.DataFrame.from_dict(expanded_rows, orient='columns')
698
-
699
- rowids = range(self.next_row_id, self.next_row_id + len(data))
700
-
701
- # update image indices
702
- data_cols = [self.cols_by_name[name] for name in data.columns]
703
- for col in [c for c in data_cols if c.is_indexed]:
704
- embeddings = np.zeros((len(data), 512))
705
- for i, (_, path_str) in enumerate(data[col.name].items()):
706
- try:
707
- img = Image.open(path_str)
708
- embeddings[i] = clip.encode_image(img)
709
- except FileNotFoundError:
710
- raise exc.OperationalError(f'Column {col.name}: file does not exist: {path_str}')
711
- except PIL.UnidentifiedImageError:
712
- raise exc.OperationalError(f'Column {col.name}: not a valid image file: {path_str}')
713
- assert col.idx is not None
714
- col.idx.insert(embeddings, np.array(rowids))
715
-
716
- # prepare state for computed cols
717
- from pixeltable import exprs
718
- eval_ctx: Optional[exprs.ComputedColEvalCtx] = None
719
- evaluator: Optional[exprs.ExprEvaluator] = None
720
- input_col_refs: List[exprs.ColumnRef] = [] # columns needed as input for computing value_exprs
721
- computed_cols = [col for col in self.cols if col.value_expr is not None]
722
- value_exprs: List[exprs.Expr] = [] # for computed_cols
723
- window_sort_exprs: List[exprs.Expr] = []
724
- if len(computed_cols) > 0:
725
- # create copies to avoid reusing past execution state; eval ctx and evaluator need to share these copies
726
- value_exprs = [c.value_expr.copy() for c in computed_cols]
727
- eval_ctx = exprs.ComputedColEvalCtx(
728
- [(exprs.ColumnRef(computed_cols[i]), value_exprs[i]) for i in range(len(computed_cols))])
729
- evaluator = exprs.ExprEvaluator(value_exprs, None, with_sql=False)
730
- input_col_refs = [
731
- e for e in evaluator.output_eval_exprs
732
- # we're looking for ColumnRefs to Columns that aren't themselves computed
733
- if isinstance(e, exprs.ColumnRef) and e.col.value_expr is None
734
- ]
735
-
736
- # determine order_by clause for window functions, if any
737
- window_fn_calls = [
738
- e for e in exprs.Expr.list_subexprs(value_exprs)
739
- if isinstance(e, exprs.FunctionCall) and e.is_window_fn_call
740
- ]
741
- window_sort_exprs = window_fn_calls[0].get_window_sort_exprs() if len(window_fn_calls) > 0 else []
742
-
743
- # construct new df with the storage column names, in order to iterate over it more easily
744
- stored_data = {col.storage_name(): data[col.name] for col in data_cols}
745
- stored_data_df = pd.DataFrame(data=stored_data)
746
- if len(window_sort_exprs) > 0:
747
- # need to sort data in order to compute windowed agg functions
748
- storage_col_names = [e.col.storage_name() for e in window_sort_exprs]
749
- stored_data_df.sort_values(storage_col_names, axis=0, inplace=True)
750
- insert_values: List[Dict[str, Any]] = []
751
- with tqdm(total=len(stored_data_df)) as progress_bar:
752
- for row_idx, row in enumerate(stored_data_df.itertuples(index=False)):
753
- row_dict = {'rowid': rowids[row_idx], 'v_min': self.version, **row._asdict()}
754
-
755
- if len(computed_cols) > 0:
756
- # materialize computed column values
757
- data_row = [None] * eval_ctx.num_materialized
758
- # copy inputs
759
- for col_ref in input_col_refs:
760
- data_row[col_ref.data_row_idx] = row_dict[col_ref.col.storage_name()]
761
- # load image, if this is a file path
762
- if col_ref.col_type.is_image_type():
763
- data_row[col_ref.data_row_idx] = PIL.Image.open(data_row[col_ref.data_row_idx])
764
- evaluator.eval((), data_row)
765
-
766
- # convert data values to storage format where necessary
767
- for col_idx in range(len(computed_cols)):
768
- val = data_row[value_exprs[col_idx].data_row_idx]
769
- data_row[value_exprs[col_idx].data_row_idx] = \
770
- self._convert_to_stored(computed_cols[col_idx], val, rowids[row_idx])
771
-
772
- computed_vals_dict = {
773
- computed_cols[i].storage_name(): data_row[value_exprs[i].data_row_idx]
774
- for i in range(len(computed_cols))
775
- }
776
- row_dict.update(computed_vals_dict)
777
-
778
- insert_values.append(row_dict)
779
- progress_bar.update(1)
780
-
781
- with Env.get().engine.begin() as conn:
782
- conn.execute(sql.insert(self.sa_tbl), insert_values)
783
- self.next_row_id += len(data)
784
- conn.execute(
785
- sql.update(store.Table.__table__)
786
- .values({store.Table.current_version: self.version, store.Table.next_row_id: self.next_row_id})
787
- .where(store.Table.id == self.id))
788
-
789
- self.valid_rowids.update(rowids)
790
-
791
- def insert_csv(self, file_path: str) -> None:
792
- pass
793
-
794
- # TODO: update() signature?
795
- #def update(self, data: pd.DataFrame) -> None:
796
-
797
- # TODO: delete() signature?
798
- #def delete(self, data: DataFrame) -> None:
799
-
800
- def _delete_computed_imgs(self, version: int) -> None:
801
- """
802
- Delete image files computed for given version.
803
- """
804
- img_paths = utils.computed_imgs(tbl_id=self.id, version=version)
805
- for p in img_paths:
806
- os.remove(p)
807
- return
808
-
809
- def _delete_extracted_frames(self, version: int) -> None:
810
- """
811
- Delete extracted frames for given version.
812
- """
813
- frame_paths = utils.extracted_frames(tbl_id=self.id, version=version)
814
- for p in frame_paths:
815
- os.remove(p)
816
-
817
- def revert(self) -> None:
818
- self._check_is_dropped()
819
- if self.version == 0:
820
- raise exc.OperationalError('Cannot revert version 0')
821
- # check if the current version is referenced by a snapshot
822
- with orm.Session(Env.get().engine) as session:
823
- # make sure we don't have a snapshot referencing this version
824
- num_references = session.query(sql.func.count(store.TableSnapshot.id)) \
825
- .where(store.TableSnapshot.db_id == self.db_id) \
826
- .where(store.TableSnapshot.tbl_id == self.id) \
827
- .where(store.TableSnapshot.tbl_version == self.version) \
828
- .scalar()
829
- if num_references > 0:
830
- raise exc.OperationalError(
831
- f'Current version is needed for {num_references} snapshot{"s" if num_references > 1 else ""}')
832
-
833
- conn = session.connection()
834
- # delete newly-added data
835
- self._delete_computed_imgs(self.version)
836
- self._delete_extracted_frames(self.version)
837
- conn.execute(sql.delete(self.sa_tbl).where(self.sa_tbl.c.v_min == self.version))
838
- # revert new deletions
839
- conn.execute(
840
- sql.update(self.sa_tbl).values({self.sa_tbl.c.v_max: store.Table.MAX_VERSION})
841
- .where(self.sa_tbl.c.v_max == self.version))
842
-
843
- if self.version == self.schema_version:
844
- # the current version involved a schema change:
845
- # we need to determine the preceding schema version and reload the schema
846
- preceding_schema_version = session.query(store.TableSchemaVersion.preceding_schema_version) \
847
- .where(store.TableSchemaVersion.tbl_id == self.id) \
848
- .where(store.TableSchemaVersion.schema_version == self.schema_version) \
849
- .scalar()
850
- self.cols = self.load_cols(self.id, preceding_schema_version, session)
851
- conn.execute(
852
- sql.delete(store.TableSchemaVersion.__table__)
853
- .where(store.TableSchemaVersion.tbl_id == self.id)
854
- .where(store.TableSchemaVersion.schema_version == self.schema_version))
855
- self.schema_version = preceding_schema_version
856
-
857
- conn.execute(
858
- sql.update(store.Table.__table__)
859
- .values({
860
- store.Table.current_version: self.version,
861
- store.Table.current_schema_version: self.schema_version
862
- })
863
- .where(store.Table.id == self.id))
864
-
865
- session.commit()
866
- self.version -= 1
867
-
868
- # MODULE-LOCAL, NOT PUBLIC
869
- def rename(self, new_name: str) -> None:
870
- self._check_is_dropped()
871
- with Env.get().engine.begin() as conn:
872
- conn.execute(
873
- sql.update(store.Table.__table__).values({store.Table.name: new_name})
874
- .where(store.Table.id == self.id))
875
-
876
- # MODULE-LOCAL, NOT PUBLIC
877
- def drop(self) -> None:
878
- self._check_is_dropped()
879
- with Env.get().engine.begin() as conn:
880
- conn.execute(
881
- sql.update(store.Table.__table__).values({store.Table.is_mutable: False})
882
- .where(store.Table.id == self.id))
883
-
884
- @classmethod
885
- def _create_value_expr(cls, col: Column, existing_cols: Dict[str, Column]) -> None:
886
- """
887
- Create col.value_expr, given col.compute_func.
888
- Interprets compute_func's parameters to be references to columns and construct ColumnRefs as args.
889
- Does not update Column.dependent_cols.
890
- """
891
- assert col.value_expr is None
892
- assert col.compute_func is not None
893
- from pixeltable import exprs
894
- params = inspect.signature(col.compute_func).parameters
895
- args: List[exprs.ColumnRef] = []
896
- for param_name in params:
897
- if param_name not in existing_cols:
898
- raise exc.Error(
899
- f'Column {col.name}: compute_with parameter refers to an unknown column: {param_name}')
900
- args.append(exprs.ColumnRef(existing_cols[param_name]))
901
- fn = Function(col.col_type, [arg.col_type for arg in args], eval_fn=col.compute_func)
902
- col.value_expr = exprs.FunctionCall(fn, args)
903
-
904
- # MODULE-LOCAL, NOT PUBLIC
905
- @classmethod
906
- def create(
907
- cls, db_id: int, dir_id: int, name: str, cols: List[Column],
908
- num_retained_versions: int,
909
- extract_frames_from: Optional[str], extracted_frame_col: Optional[str], extracted_frame_idx_col: Optional[str],
910
- extracted_fps: Optional[int]
911
- ) -> 'MutableTable':
912
- # make sure col names are unique (within the table) and assign ids
913
- cols_by_name: Dict[str, Column] = {}
914
- for pos, c in enumerate(cols):
915
- if c.name in cols_by_name:
916
- raise exc.DuplicateNameError(f'Duplicate column: {c.name}')
917
- c.id = pos
918
- cols_by_name[c.name] = c
919
-
920
- # check frame extraction params, if present
921
- if extract_frames_from is not None:
922
- assert extracted_frame_col is not None and extracted_frame_idx_col is not None and extracted_fps is not None
923
- if extract_frames_from is not None and extract_frames_from not in cols_by_name:
924
- raise exc.BadFormatError(f'Unknown column in extract_frames_from: {extract_frames_from}')
925
- col_type = cols_by_name[extract_frames_from].col_type
926
- is_nullable = cols_by_name[extract_frames_from].nullable
927
- if not col_type.is_video_type():
928
- raise exc.BadFormatError(
929
- f'extract_frames_from requires the name of a column of type video, but {extract_frames_from} has '
930
- f'type {col_type}')
931
- if extracted_frame_col is not None and extracted_frame_col not in cols_by_name:
932
- raise exc.BadFormatError(f'Unknown column in extracted_frame_col: {extracted_frame_col}')
933
- col_type = cols_by_name[extracted_frame_col].col_type
934
- if not col_type.is_image_type():
935
- raise exc.BadFormatError(
936
- f'extracted_frame_col requires the name of a column of type image, but {extracted_frame_col} has '
937
- f'type {col_type}')
938
- # the src column determines whether the frame column is nullable
939
- cols_by_name[extracted_frame_col].nullable = is_nullable
940
- if extracted_frame_idx_col is not None and extracted_frame_idx_col not in cols_by_name:
941
- raise exc.BadFormatError(f'Unknown column in extracted_frame_idx_col: {extracted_frame_idx_col}')
942
- col_type = cols_by_name[extracted_frame_idx_col].col_type
943
- if not col_type.is_int_type():
944
- raise exc.BadFormatError(
945
- f'extracted_frame_idx_col requires the name of a column of type int, but {extracted_frame_idx_col} '
946
- f'has type {col_type}')
947
- # the src column determines whether the frame idx column is nullable
948
- cols_by_name[extracted_frame_idx_col].nullable = is_nullable
949
-
950
- params = TableParameters(
951
- num_retained_versions,
952
- cols_by_name[extract_frames_from].id if extract_frames_from is not None else None,
953
- cols_by_name[extracted_frame_col].id if extracted_frame_col is not None else None,
954
- cols_by_name[extracted_frame_idx_col].id if extracted_frame_idx_col is not None else None,
955
- extracted_fps)
956
-
957
- with orm.Session(Env.get().engine) as session:
958
- tbl_record = store.Table(
959
- db_id=db_id, dir_id=dir_id, name=name, parameters=dataclasses.asdict(params), current_version=0,
960
- current_schema_version=0, is_mutable=True, next_col_id=len(cols), next_row_id=0)
961
- session.add(tbl_record)
962
- session.flush() # sets tbl_record.id
963
-
964
- tbl_version_record = store.TableSchemaVersion(
965
- tbl_id=tbl_record.id, schema_version=0, preceding_schema_version=0)
966
- session.add(tbl_version_record)
967
- session.flush() # avoid FK violations in Postgres
968
- print(f'creating table {name}, id={tbl_record.id}')
969
-
970
- cols_by_name: Dict[str, Column] = {} # records the cols we have seen so far
971
- for pos, col in enumerate(cols):
972
- session.add(store.StorageColumn(tbl_id=tbl_record.id, col_id=col.id, schema_version_add=0))
973
- session.flush() # avoid FK violations in Postgres
974
- if col.value_expr is None and col.compute_func is not None:
975
- cls._create_value_expr(col, cols_by_name)
976
- # Column.dependent_cols for existing cols is wrong at this point, but Table.init() will set it correctly
977
- value_expr_str = col.value_expr.serialize() if col.value_expr is not None else None
978
- session.add(
979
- store.SchemaColumn(
980
- tbl_id=tbl_record.id, schema_version=0, col_id=col.id, pos=pos, name=col.name,
981
- col_type=col.col_type.serialize(), is_nullable=col.nullable, is_pk=col.primary_key,
982
- value_expr=value_expr_str, is_indexed=col.is_indexed
983
- )
984
- )
985
- session.flush() # avoid FK violations in Postgres
986
-
987
- # for image cols, add VectorIndex for kNN search
988
- if col.is_indexed and col.col_type.is_image_type():
989
- col.set_idx(VectorIndex.create(Table._vector_idx_name(tbl_record.id, col), 512))
990
-
991
- cols_by_name[col.name] = col
992
- session.flush()
993
-
994
- assert tbl_record.id is not None
995
- tbl = MutableTable(tbl_record, 0, cols)
996
- tbl.sa_md.create_all(bind=session.connection())
997
- session.commit()
998
- return tbl
999
-
1000
-
1001
- class Path:
1002
- def __init__(self, path: str, empty_is_valid: bool=False):
1003
- if path == '' and not empty_is_valid or path != '' and re.fullmatch(_PATH_RE, path) is None:
1004
- raise exc.BadFormatError(f"Invalid path format: '{path}'")
1005
- self.components = path.split('.')
1006
-
1007
- @property
1008
- def len(self) -> int:
1009
- return 0 if self.is_root else len(self.components)
1010
-
1011
- @property
1012
- def name(self) -> str:
1013
- assert len(self.components) > 0
1014
- return self.components[-1]
1015
-
1016
- @property
1017
- def is_root(self) -> bool:
1018
- return self.components[0] == ''
1019
-
1020
- @property
1021
- def parent(self) -> 'Path':
1022
- if len(self.components) == 1:
1023
- if self.is_root:
1024
- return self
1025
- else:
1026
- return Path('', empty_is_valid=True)
1027
- else:
1028
- return Path('.'.join(self.components[:-1]))
1029
-
1030
- def append(self, name: str) -> 'Path':
1031
- if self.is_root:
1032
- return Path(name)
1033
- else:
1034
- return Path(f'{str(self)}.{name}')
1035
-
1036
- def is_ancestor(self, other: 'Path', is_parent: bool = False) -> bool:
1037
- """
1038
- True if self as an ancestor path of other.
1039
- """
1040
- if self.len >= other.len or other.is_root:
1041
- return False
1042
- if self.is_root and (other.len == 1 or not is_parent):
1043
- return True
1044
- is_prefix = self.components == other.components[:self.len]
1045
- return is_prefix and (self.len == (other.len - 1) or not is_parent)
1046
-
1047
- def __str__(self) -> str:
1048
- return '.'.join(self.components)
1049
-
1050
-
1051
- class PathDict:
1052
- def __init__(self) -> None:
1053
- # *not* Dict[Path, SchemaObject]
1054
- self.paths: Dict[str, SchemaObject] = {} # all paths
1055
-
1056
- def __getitem__(self, path: Path) -> SchemaObject:
1057
- return self.paths[str(path)]
1058
-
1059
- def __setitem__(self, path: Path, val: SchemaObject) -> None:
1060
- self.paths[str(path)] = val
1061
-
1062
- def __delitem__(self, path: Path) -> None:
1063
- del self.paths[str(path)]
1064
-
1065
- def update(self, paths: Dict[str, SchemaObject]) -> None:
1066
- self.paths.update(paths)
1067
-
1068
- # checks that the parent of path exists and is a Dir
1069
- # and that the object of path has 'expected' type
1070
- def check_is_valid(
1071
- self, path: Path, expected: Optional[Type[SchemaObject]],
1072
- expected_parent_type: Type[DirBase] = DirBase) -> None:
1073
- path_str = str(path)
1074
- # check for existence
1075
- if expected is not None:
1076
- if path_str not in self.paths:
1077
- raise exc.UnknownEntityError(path_str)
1078
- obj = self.paths[path_str]
1079
- if not isinstance(obj, expected):
1080
- raise exc.UnknownEntityError(f'{path_str} needs to be a {expected.display_name()}')
1081
- if expected is None and path_str in self.paths:
1082
- raise exc.DuplicateNameError(f'{path_str} already exists')
1083
- # check for containing directory
1084
- parent_path = path.parent
1085
- if str(parent_path) not in self.paths:
1086
- raise exc.UnknownEntityError(f'Directory {str(parent_path)}')
1087
- parent = self.paths[str(parent_path)]
1088
- if not isinstance(parent, expected_parent_type):
1089
- raise exc.UnknownEntityError(f'{str(parent_path)} needs to be a {expected_parent_type.display_name()}')
1090
-
1091
- def get(self, path_type: Type[SchemaObject]) -> List[Path]:
1092
- return [obj for _, obj in self.paths.items() if isinstance(obj, path_type)]
1093
-
1094
- def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
1095
- candidates = [
1096
- Path(path, empty_is_valid=True)
1097
- for path, obj in self.paths.items() if child_type is None or isinstance(obj, child_type)
1098
- ]
1099
- result = [path for path in candidates if parent.is_ancestor(path, is_parent=(not recursive))]
1100
- return result
1101
-
1102
-
1103
- class Db:
1104
- def __init__(self, db_id: int, name: str):
1105
- self.id = db_id
1106
- self.name = name
1107
- self.paths = PathDict()
1108
- self.paths.update(self._load_dirs())
1109
- self.paths.update(self._load_tables())
1110
- self.paths.update(self._load_function_md())
1111
-
1112
- def create_table(
1113
- self, path_str: str, schema: List[Column], num_retained_versions: int = 10,
1114
- extract_frames_from: Optional[str] = None, extracted_frame_col: Optional[str] = None,
1115
- extracted_frame_idx_col: Optional[str] = None, extracted_fps: Optional[int] = None
1116
- ) -> MutableTable:
1117
- path = Path(path_str)
1118
- self.paths.check_is_valid(path, expected=None, expected_parent_type=Dir)
1119
- dir = self.paths[path.parent]
1120
-
1121
- # make sure frame extraction params are either fully present or absent
1122
- frame_extraction_param_count = int(extract_frames_from is not None) + int(extracted_frame_col is not None)\
1123
- + int(extracted_frame_idx_col is not None) + int(extracted_fps is not None)
1124
- if frame_extraction_param_count != 0 and frame_extraction_param_count != 4:
1125
- raise exc.BadFormatError(
1126
- 'Frame extraction requires that all parameters (extract_frames_from, extracted_frame_col, '
1127
- 'extracted_frame_idx_col, extracted_fps) be specified')
1128
- tbl = MutableTable.create(
1129
- self.id, dir.id, path.name, schema, num_retained_versions, extract_frames_from, extracted_frame_col,
1130
- extracted_frame_idx_col, extracted_fps)
1131
- self.paths[path] = tbl
1132
- return tbl
1133
-
1134
- def get_table(self, path_str: str) -> Table:
1135
- path = Path(path_str)
1136
- self.paths.check_is_valid(path, expected=Table)
1137
- obj = self.paths[path]
1138
- assert isinstance(obj, Table)
1139
- return obj
1140
-
1141
- def rename_table(self, path_str: str, new_name: str) -> None:
1142
- path = Path(path_str)
1143
- self.paths.check_is_valid(path, expected=MutableTable)
1144
- if re.fullmatch(_ID_RE, new_name) is None:
1145
- raise exc.BadFormatError(f"Invalid table name: '{new_name}'")
1146
- new_path = path.parent.append(new_name)
1147
- self.paths.check_is_valid(new_path, expected=None, expected_parent_type=Dir)
1148
-
1149
- tbl = self.paths[path]
1150
- assert isinstance(tbl, MutableTable)
1151
- del self.paths[path]
1152
- self.paths[new_path] = tbl
1153
- tbl.rename(new_name)
1154
-
1155
- def move_table(self, tbl_path: str, dir_path: str) -> None:
1156
- pass
1157
-
1158
- def list_tables(self, dir_path: str = '', recursive: bool = True) -> List[str]:
1159
- assert dir_path is not None
1160
- path = Path(dir_path, empty_is_valid=True)
1161
- self.paths.check_is_valid(path, expected=DirBase)
1162
- return [str(p) for p in self.paths.get_children(path, child_type=Table, recursive=recursive)]
1163
-
1164
- def drop_table(self, path_str: str, force: bool = False, ignore_errors: bool = False) -> None:
1165
- path = Path(path_str)
1166
- try:
1167
- self.paths.check_is_valid(path, expected=MutableTable)
1168
- except Exception as e:
1169
- if ignore_errors:
1170
- return
1171
- else:
1172
- raise e
1173
- tbl = self.paths[path]
1174
- assert isinstance(tbl, MutableTable)
1175
- tbl.drop()
1176
- del self.paths[path]
1177
-
1178
- def create_snapshot(self, path_str: str, tbl_paths: List[str]) -> None:
1179
- snapshot_dir_path = Path(path_str)
1180
- self.paths.check_is_valid(snapshot_dir_path, expected=None, expected_parent_type=Dir)
1181
- tbls: List[MutableTable] = []
1182
- for tbl_path_str in tbl_paths:
1183
- tbl_path = Path(tbl_path_str)
1184
- self.paths.check_is_valid(tbl_path, expected=MutableTable)
1185
- tbl = self.paths[tbl_path]
1186
- assert isinstance(tbl, MutableTable)
1187
- tbls.append(tbl)
1188
-
1189
- with orm.Session(Env.get().engine) as session:
1190
- dir_record = store.Dir(db_id=self.id, path=path_str, is_snapshot=True)
1191
- session.add(dir_record)
1192
- session.flush()
1193
- assert dir_record.id is not None
1194
- self.paths[snapshot_dir_path] = Dir(dir_record.id)
1195
-
1196
- for tbl in tbls:
1197
- snapshot_record = store.TableSnapshot(
1198
- db_id=self.id, dir_id=dir_record.id, name=tbl.name, tbl_id=tbl.id, tbl_version=tbl.version,
1199
- tbl_schema_version=tbl.schema_version)
1200
- session.add(snapshot_record)
1201
- session.flush()
1202
- assert snapshot_record.id is not None
1203
- cols = Table.load_cols(tbl.id, tbl.schema_version, session)
1204
- snapshot = TableSnapshot(snapshot_record, cols)
1205
- snapshot_path = snapshot_dir_path.append(tbl.name)
1206
- self.paths[snapshot_path] = snapshot
1207
-
1208
- session.commit()
1209
-
1210
- def create_dir(self, path_str: str) -> None:
1211
- path = Path(path_str)
1212
- self.paths.check_is_valid(path, expected=None, expected_parent_type=Dir)
1213
- with orm.Session(Env.get().engine) as session:
1214
- dir_record = store.Dir(db_id=self.id, path=path_str, is_snapshot=False)
1215
- session.add(dir_record)
1216
- session.flush()
1217
- assert dir_record.id is not None
1218
- self.paths[path] = Dir(dir_record.id)
1219
- session.commit()
1220
-
1221
- def rm_dir(self, path_str: str) -> None:
1222
- path = Path(path_str)
1223
- self.paths.check_is_valid(path, expected=Dir)
1224
-
1225
- # make sure it's empty
1226
- if len(self.paths.get_children(path, child_type=None, recursive=True)) > 0:
1227
- raise exc.DirectoryNotEmptyError(f'Directory {path_str}')
1228
- # TODO: figure out how to make force=True work in the presence of snapshots
1229
- # # delete tables
1230
- # for tbl_path in self.paths.get_children(path, child_type=Table, recursive=True):
1231
- # self.drop_table(str(tbl_path), force=True)
1232
- # # rm subdirs
1233
- # for dir_path in self.paths.get_children(path, child_type=DirBase, recursive=False):
1234
- # self.rm_dir(str(dir_path), force=True)
1235
-
1236
- with Env.get().engine.begin() as conn:
1237
- dir = self.paths[path]
1238
- conn.execute(sql.delete(store.Dir.__table__).where(store.Dir.id == dir.id))
1239
- del self.paths[path]
1240
-
1241
- def list_dirs(self, path_str: str = '', recursive: bool = True) -> List[str]:
1242
- path = Path(path_str, empty_is_valid=True)
1243
- self.paths.check_is_valid(path, expected=DirBase)
1244
- return [str(p) for p in self.paths.get_children(path, child_type=DirBase, recursive=recursive)]
1245
-
1246
- def create_function(self, path_str: str, func: Function) -> None:
1247
- if func.is_library_function:
1248
- raise exc.Error(f'Cannot create a named function for a library function')
1249
- path = Path(path_str)
1250
- self.paths.check_is_valid(path, expected=None, expected_parent_type=Dir)
1251
- dir = self.paths[path.parent]
1252
-
1253
- FunctionRegistry.get().create_function(func, self.id, dir.id, path.name)
1254
- self.paths[path] = NamedFunction(func.id, dir.id, path.name)
1255
-
1256
- def rename_function(self, path_str: str, new_path_str: str) -> None:
1257
- """
1258
- Assign a new name and/or move the function to a different directory.
1259
- """
1260
- path = Path(path_str)
1261
- new_path = Path(new_path_str)
1262
- self.paths.check_is_valid(path, expected=NamedFunction)
1263
- self.paths.check_is_valid(new_path, expected=None)
1264
- func = self.paths[path]
1265
- new_dir = self.paths[new_path.parent]
1266
- with Env.get().engine.begin() as conn:
1267
- conn.execute(
1268
- sql.update(store.Function.__table__)
1269
- .values({
1270
- store.Function.dir_id: new_dir.id,
1271
- store.Function.name: new_path.name,
1272
- })
1273
- .where(store.Function.id == func.id))
1274
- del self.paths[path]
1275
- self.paths[new_path] = func
1276
-
1277
- def update_function(self, path_str: str, new_eval_fn: Callable) -> None:
1278
- """
1279
- Update the Function for given path with the callable.
1280
- """
1281
- path = Path(path_str)
1282
- self.paths.check_is_valid(path, expected=NamedFunction)
1283
- named_fn = self.paths[path]
1284
- # TODO: check that function signature doesn't change if the Function is used in a computed column
1285
- FunctionRegistry.get().update_function(named_fn.id, new_eval_fn)
1286
-
1287
- def load_function(self, path_str: str) -> Function:
1288
- path = Path(path_str)
1289
- self.paths.check_is_valid(path, expected=NamedFunction)
1290
- named_fn = self.paths[path]
1291
- assert isinstance(named_fn, NamedFunction)
1292
- return FunctionRegistry.get().get_function(named_fn.id)
1293
-
1294
- def drop_function(self, path_str: str, ignore_errors: bool = False) -> None:
1295
- """
1296
- Deletes function from db, provided that no computed columns depend on it.
1297
- """
1298
- path = Path(path_str)
1299
- try:
1300
- self.paths.check_is_valid(path, expected=NamedFunction)
1301
- except exc.UnknownEntityError as e:
1302
- if ignore_errors:
1303
- return
1304
- else:
1305
- raise e
1306
- named_fn = self.paths[path]
1307
- FunctionRegistry.get().delete_function(named_fn.id)
1308
- del self.paths[path]
1309
-
1310
- def _load_dirs(self) -> Dict[str, SchemaObject]:
1311
- result: Dict[str, SchemaObject] = {}
1312
- with orm.Session(Env.get().engine) as session:
1313
- for dir_record in session.query(store.Dir).where(store.Dir.db_id == self.id).all():
1314
- result[dir_record.path] = SnapshotDir(dir_record.id) if dir_record.is_snapshot else Dir(dir_record.id)
1315
- return result
1316
-
1317
- def _load_tables(self) -> Dict[str, SchemaObject]:
1318
- result: Dict[str, SchemaObject] = {}
1319
- with orm.Session(Env.get().engine) as session:
1320
- # load all reachable (= mutable) tables
1321
- q = session.query(store.Table, store.Dir.path) \
1322
- .join(store.Dir)\
1323
- .where(store.Table.db_id == self.id) \
1324
- .where(store.Table.is_mutable == True)
1325
- for tbl_record, dir_path in q.all():
1326
- cols = Table.load_cols(
1327
- tbl_record.id, tbl_record.current_schema_version, session)
1328
- tbl = MutableTable(tbl_record, tbl_record.current_schema_version, cols)
1329
- tbl._load_valid_rowids() # TODO: move this someplace more appropriate
1330
- path = Path(dir_path, empty_is_valid=True).append(tbl_record.name)
1331
- result[str(path)] = tbl
1332
-
1333
- # load all table snapshots
1334
- q = session.query(store.TableSnapshot, store.Dir.path) \
1335
- .select_from(store.TableSnapshot) \
1336
- .join(store.Table) \
1337
- .join(store.Dir) \
1338
- .where(store.TableSnapshot.db_id == self.id)
1339
- for snapshot_record, dir_path in q.all():
1340
- cols = Table.load_cols(snapshot_record.tbl_id, snapshot_record.tbl_schema_version, session)
1341
- snapshot = TableSnapshot(snapshot_record, cols)
1342
- path = Path(dir_path, empty_is_valid=True).append(snapshot_record.name)
1343
- result[str(path)] = snapshot
1344
-
1345
- return result
1346
-
1347
- def _load_function_md(self) -> Dict[str, SchemaObject]:
1348
- """
1349
- Loads Function metadata. Doesn't load the actual callable, which can be large and is only done on-demand by the
1350
- FunctionRegistry.
1351
- """
1352
- result: Dict[str, SchemaObject] = {}
1353
- with orm.Session(Env.get().engine) as session:
1354
- # load all reachable (= mutable) tables
1355
- q = session.query(store.Function.id, store.Function.dir_id, store.Function.name, store.Dir.path) \
1356
- .join(store.Dir) \
1357
- .where(store.Function.db_id == self.id)
1358
- for id, dir_id, name, dir_path in q.all():
1359
- named_fn = NamedFunction(id, dir_id, name)
1360
- path = Path(dir_path, empty_is_valid=True).append(name)
1361
- result[str(path)] = named_fn
1362
- return result
1363
-
1364
- def __str__(self) -> str:
1365
- return self.name
1366
-
1367
- def __repr__(self) -> str:
1368
- return f'Db(name={self.name})'
1369
-
1370
- @classmethod
1371
- def create(cls, name: str) -> 'Db':
1372
- db_id: int = -1
1373
- with orm.Session(Env.get().engine) as session:
1374
- # check for duplicate name
1375
- is_duplicate = session.query(sql.func.count(store.Db.id)).where(store.Db.name == name).scalar() > 0
1376
- if is_duplicate:
1377
- raise exc.DuplicateNameError(f"Db '{name}' already exists")
1378
-
1379
- db_record = store.Db(name=name)
1380
- session.add(db_record)
1381
- session.flush()
1382
- assert db_record.id is not None
1383
- db_id = db_record.id
1384
- # also create a top-level directory, so that every schema object has a directory
1385
- dir_record = store.Dir(db_id=db_id, path='', is_snapshot=False)
1386
- session.add(dir_record)
1387
- session.flush()
1388
- session.commit()
1389
- assert db_id is not None
1390
- return Db(db_id, name)
1391
-
1392
- @classmethod
1393
- def load(cls, name: str) -> 'Db':
1394
- if re.fullmatch(_ID_RE, name) is None:
1395
- raise exc.BadFormatError(f"Invalid db name: '{name}'")
1396
- with orm.Session(Env.get().engine) as session:
1397
- try:
1398
- db_record = session.query(store.Db).where(store.Db.name == name).one()
1399
- return Db(db_record.id, db_record.name)
1400
- except sql.exc.NoResultFound:
1401
- raise exc.UnknownEntityError(f'Db {name}')
1402
-
1403
- def delete(self) -> None:
1404
- """
1405
- Delete db and all associated data.
1406
- """
1407
- with Env.get().engine.begin() as conn:
1408
- conn.execute(sql.delete(store.TableSnapshot.__table__).where(store.TableSnapshot.db_id == self.id))
1409
- tbls_stmt = sql.select(store.Table.id).where(store.Table.db_id == self.id)
1410
- conn.execute(sql.delete(store.SchemaColumn.__table__).where(store.SchemaColumn.tbl_id.in_(tbls_stmt)))
1411
- conn.execute(sql.delete(store.StorageColumn.__table__).where(store.StorageColumn.tbl_id.in_(tbls_stmt)))
1412
- conn.execute(
1413
- sql.delete(store.TableSchemaVersion.__table__).where(store.TableSchemaVersion.tbl_id.in_(tbls_stmt)))
1414
- conn.execute(sql.delete(store.Table.__table__).where(store.Table.db_id == self.id))
1415
- conn.execute(sql.delete(store.Function.__table__).where(store.Function.db_id == self.id))
1416
- conn.execute(sql.delete(store.Dir.__table__).where(store.Dir.db_id == self.id))
1417
- conn.execute(sql.delete(store.Db.__table__).where(store.Db.id == self.id))
1418
- # delete all data tables
1419
- # TODO: also deleted generated images
1420
- for tbl in self.paths.get(MutableTable):
1421
- tbl.sa_md.drop_all(bind=conn)