pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
pixeltable/store.py
CHANGED
|
@@ -12,10 +12,8 @@ from typing import Any, Iterator, Literal, Optional, Union
|
|
|
12
12
|
import sqlalchemy as sql
|
|
13
13
|
from tqdm import TqdmWarning, tqdm
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
|
|
17
|
-
import pixeltable.exceptions as excs
|
|
18
|
-
from pixeltable import exprs
|
|
15
|
+
from pixeltable import catalog, exceptions as excs, exprs
|
|
16
|
+
from pixeltable.env import Env
|
|
19
17
|
from pixeltable.exec import ExecNode
|
|
20
18
|
from pixeltable.metadata import schema
|
|
21
19
|
from pixeltable.utils.media_store import MediaStore
|
|
@@ -33,7 +31,7 @@ class StoreBase:
|
|
|
33
31
|
- v_max: version at which the row was deleted (or MAX_VERSION if it's still live)
|
|
34
32
|
"""
|
|
35
33
|
|
|
36
|
-
tbl_version: catalog.
|
|
34
|
+
tbl_version: catalog.TableVersionHandle
|
|
37
35
|
sa_md: sql.MetaData
|
|
38
36
|
sa_tbl: Optional[sql.Table]
|
|
39
37
|
_pk_cols: list[sql.Column]
|
|
@@ -44,12 +42,14 @@ class StoreBase:
|
|
|
44
42
|
__INSERT_BATCH_SIZE = 1000
|
|
45
43
|
|
|
46
44
|
def __init__(self, tbl_version: catalog.TableVersion):
|
|
47
|
-
self.tbl_version =
|
|
45
|
+
self.tbl_version = catalog.TableVersionHandle(
|
|
46
|
+
tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version
|
|
47
|
+
)
|
|
48
48
|
self.sa_md = sql.MetaData()
|
|
49
49
|
self.sa_tbl = None
|
|
50
50
|
# We need to declare a `base` variable here, even though it's only defined for instances of `StoreView`,
|
|
51
51
|
# since it's referenced by various methods of `StoreBase`
|
|
52
|
-
self.base =
|
|
52
|
+
self.base = tbl_version.base.get().store_tbl if tbl_version.base is not None else None
|
|
53
53
|
self.create_sa_tbl()
|
|
54
54
|
|
|
55
55
|
def pk_columns(self) -> list[sql.Column]:
|
|
@@ -76,7 +76,7 @@ class StoreBase:
|
|
|
76
76
|
"""Create self.sa_tbl from self.tbl_version."""
|
|
77
77
|
system_cols = self._create_system_columns()
|
|
78
78
|
all_cols = system_cols.copy()
|
|
79
|
-
for col in [c for c in self.tbl_version.cols if c.is_stored]:
|
|
79
|
+
for col in [c for c in self.tbl_version.get().cols if c.is_stored]:
|
|
80
80
|
# re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
|
|
81
81
|
# to the last sql.Table version we created and cannot be reused
|
|
82
82
|
col.create_sa_cols()
|
|
@@ -115,7 +115,7 @@ class StoreBase:
|
|
|
115
115
|
|
|
116
116
|
def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column, v_min: int) -> str:
|
|
117
117
|
"""Move tmp media file with given url to Env.media_dir and return new url, or given url if not a tmp_dir file"""
|
|
118
|
-
pxt_tmp_dir = str(
|
|
118
|
+
pxt_tmp_dir = str(Env.get().tmp_dir)
|
|
119
119
|
if file_url is None:
|
|
120
120
|
return None
|
|
121
121
|
parsed = urllib.parse.urlparse(file_url)
|
|
@@ -158,36 +158,36 @@ class StoreBase:
|
|
|
158
158
|
table_row[pk_col.name] = pk_val
|
|
159
159
|
return table_row, num_excs
|
|
160
160
|
|
|
161
|
-
def count(self
|
|
161
|
+
def count(self) -> int:
|
|
162
162
|
"""Return the number of rows visible in self.tbl_version"""
|
|
163
163
|
stmt = (
|
|
164
164
|
sql.select(sql.func.count('*'))
|
|
165
165
|
.select_from(self.sa_tbl)
|
|
166
|
-
.where(self.v_min_col <= self.tbl_version.version)
|
|
167
|
-
.where(self.v_max_col > self.tbl_version.version)
|
|
166
|
+
.where(self.v_min_col <= self.tbl_version.get().version)
|
|
167
|
+
.where(self.v_max_col > self.tbl_version.get().version)
|
|
168
168
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
result = conn.execute(stmt).scalar_one()
|
|
172
|
-
else:
|
|
173
|
-
result = conn.execute(stmt).scalar_one()
|
|
169
|
+
conn = Env.get().conn
|
|
170
|
+
result = conn.execute(stmt).scalar_one()
|
|
174
171
|
assert isinstance(result, int)
|
|
175
172
|
return result
|
|
176
173
|
|
|
177
|
-
def create(self
|
|
174
|
+
def create(self) -> None:
|
|
175
|
+
conn = Env.get().conn
|
|
178
176
|
self.sa_md.create_all(bind=conn)
|
|
179
177
|
|
|
180
|
-
def drop(self
|
|
178
|
+
def drop(self) -> None:
|
|
181
179
|
"""Drop store table"""
|
|
180
|
+
conn = Env.get().conn
|
|
182
181
|
self.sa_md.drop_all(bind=conn)
|
|
183
182
|
|
|
184
|
-
def add_column(self, col: catalog.Column
|
|
183
|
+
def add_column(self, col: catalog.Column) -> None:
|
|
185
184
|
"""Add column(s) to the store-resident table based on a catalog column
|
|
186
185
|
|
|
187
186
|
Note that a computed catalog column will require two extra columns (for the computed value and for the error
|
|
188
187
|
message).
|
|
189
188
|
"""
|
|
190
189
|
assert col.is_stored
|
|
190
|
+
conn = Env.get().conn
|
|
191
191
|
col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
|
|
192
192
|
stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL')
|
|
193
193
|
log_stmt(_logger, stmt)
|
|
@@ -207,8 +207,9 @@ class StoreBase:
|
|
|
207
207
|
self.create_sa_tbl()
|
|
208
208
|
_logger.info(f'Added columns {added_storage_cols} to storage table {self._storage_name()}')
|
|
209
209
|
|
|
210
|
-
def drop_column(self, col: catalog.Column
|
|
210
|
+
def drop_column(self, col: catalog.Column) -> None:
|
|
211
211
|
"""Execute Alter Table Drop Column statement"""
|
|
212
|
+
conn = Env.get().conn
|
|
212
213
|
stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
|
|
213
214
|
conn.execute(sql.text(stmt))
|
|
214
215
|
if col.records_errors:
|
|
@@ -218,12 +219,7 @@ class StoreBase:
|
|
|
218
219
|
conn.execute(sql.text(stmt))
|
|
219
220
|
|
|
220
221
|
def load_column(
|
|
221
|
-
self,
|
|
222
|
-
col: catalog.Column,
|
|
223
|
-
exec_plan: ExecNode,
|
|
224
|
-
value_expr_slot_idx: int,
|
|
225
|
-
conn: sql.engine.Connection,
|
|
226
|
-
on_error: Literal['abort', 'ignore'],
|
|
222
|
+
self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, on_error: Literal['abort', 'ignore']
|
|
227
223
|
) -> int:
|
|
228
224
|
"""Update store column of a computed column with values produced by an execution plan
|
|
229
225
|
|
|
@@ -250,6 +246,7 @@ class StoreBase:
|
|
|
250
246
|
tmp_errormsg_col = sql.Column(col.sa_errormsg_col.name, col.sa_errormsg_col.type)
|
|
251
247
|
tmp_cols.append(tmp_errormsg_col)
|
|
252
248
|
tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
|
|
249
|
+
conn = Env.get().conn
|
|
253
250
|
tmp_tbl.create(bind=conn)
|
|
254
251
|
|
|
255
252
|
try:
|
|
@@ -280,7 +277,7 @@ class StoreBase:
|
|
|
280
277
|
else:
|
|
281
278
|
if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
|
|
282
279
|
# we have yet to store this image
|
|
283
|
-
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
|
|
280
|
+
filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.get().version))
|
|
284
281
|
result_row.flush_img(value_expr_slot_idx, filepath)
|
|
285
282
|
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
286
283
|
if col.col_type.is_media_type():
|
|
@@ -313,7 +310,6 @@ class StoreBase:
|
|
|
313
310
|
def insert_rows(
|
|
314
311
|
self,
|
|
315
312
|
exec_plan: ExecNode,
|
|
316
|
-
conn: sql.engine.Connection,
|
|
317
313
|
v_min: Optional[int] = None,
|
|
318
314
|
show_progress: bool = True,
|
|
319
315
|
rowids: Optional[Iterator[int]] = None,
|
|
@@ -324,7 +320,6 @@ class StoreBase:
|
|
|
324
320
|
number of inserted rows, number of exceptions, set of column ids that have exceptions
|
|
325
321
|
"""
|
|
326
322
|
assert v_min is not None
|
|
327
|
-
exec_plan.ctx.set_conn(conn)
|
|
328
323
|
# TODO: total?
|
|
329
324
|
num_excs = 0
|
|
330
325
|
num_rows = 0
|
|
@@ -332,6 +327,8 @@ class StoreBase:
|
|
|
332
327
|
progress_bar: Optional[tqdm] = None # create this only after we started executing
|
|
333
328
|
row_builder = exec_plan.row_builder
|
|
334
329
|
media_cols = [info.col for info in row_builder.table_columns if info.col.col_type.is_media_type()]
|
|
330
|
+
conn = Env.get().conn
|
|
331
|
+
|
|
335
332
|
try:
|
|
336
333
|
exec_plan.open()
|
|
337
334
|
for row_batch in exec_plan:
|
|
@@ -348,7 +345,7 @@ class StoreBase:
|
|
|
348
345
|
raise exc
|
|
349
346
|
|
|
350
347
|
rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
|
|
351
|
-
pk = rowid
|
|
348
|
+
pk = (*rowid, v_min)
|
|
352
349
|
table_row, num_row_exc = self._create_table_row(row, row_builder, cols_with_excs, pk=pk)
|
|
353
350
|
num_excs += num_row_exc
|
|
354
351
|
table_rows.append(table_row)
|
|
@@ -357,7 +354,7 @@ class StoreBase:
|
|
|
357
354
|
if progress_bar is None:
|
|
358
355
|
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
359
356
|
progress_bar = tqdm(
|
|
360
|
-
desc=f'Inserting rows into `{self.tbl_version.name}`',
|
|
357
|
+
desc=f'Inserting rows into `{self.tbl_version.get().name}`',
|
|
361
358
|
unit=' rows',
|
|
362
359
|
ncols=100,
|
|
363
360
|
file=sys.stdout,
|
|
@@ -378,7 +375,9 @@ class StoreBase:
|
|
|
378
375
|
v = versions[0]
|
|
379
376
|
if v is None:
|
|
380
377
|
# we're looking at live rows
|
|
381
|
-
clause = sql.and_(
|
|
378
|
+
clause = sql.and_(
|
|
379
|
+
self.v_min_col <= self.tbl_version.get().version, self.v_max_col == schema.Table.MAX_VERSION
|
|
380
|
+
)
|
|
382
381
|
else:
|
|
383
382
|
# we're looking at a specific version
|
|
384
383
|
clause = self.v_min_col == v if match_on_vmin else self.v_max_col == v
|
|
@@ -392,7 +391,6 @@ class StoreBase:
|
|
|
392
391
|
base_versions: list[Optional[int]],
|
|
393
392
|
match_on_vmin: bool,
|
|
394
393
|
where_clause: Optional[sql.ColumnElement[bool]],
|
|
395
|
-
conn: sql.engine.Connection,
|
|
396
394
|
) -> int:
|
|
397
395
|
"""Mark rows as deleted that are live and were created prior to current_version.
|
|
398
396
|
Also: populate the undo columns
|
|
@@ -414,7 +412,7 @@ class StoreBase:
|
|
|
414
412
|
sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
|
|
415
413
|
)
|
|
416
414
|
set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
|
|
417
|
-
for index_info in self.tbl_version.idxs_by_name.values():
|
|
415
|
+
for index_info in self.tbl_version.get().idxs_by_name.values():
|
|
418
416
|
# copy value column to undo column
|
|
419
417
|
set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
|
|
420
418
|
# set value column to NULL
|
|
@@ -426,6 +424,7 @@ class StoreBase:
|
|
|
426
424
|
.where(rowid_join_clause)
|
|
427
425
|
.where(base_versions_clause)
|
|
428
426
|
)
|
|
427
|
+
conn = Env.get().conn
|
|
429
428
|
log_explain(_logger, stmt, conn)
|
|
430
429
|
status = conn.execute(stmt)
|
|
431
430
|
return status.rowcount
|
|
@@ -433,7 +432,7 @@ class StoreBase:
|
|
|
433
432
|
|
|
434
433
|
class StoreTable(StoreBase):
|
|
435
434
|
def __init__(self, tbl_version: catalog.TableVersion):
|
|
436
|
-
assert not tbl_version.is_view
|
|
435
|
+
assert not tbl_version.is_view
|
|
437
436
|
super().__init__(tbl_version)
|
|
438
437
|
|
|
439
438
|
def _create_rowid_columns(self) -> list[sql.Column]:
|
|
@@ -449,7 +448,7 @@ class StoreTable(StoreBase):
|
|
|
449
448
|
|
|
450
449
|
class StoreView(StoreBase):
|
|
451
450
|
def __init__(self, catalog_view: catalog.TableVersion):
|
|
452
|
-
assert catalog_view.is_view
|
|
451
|
+
assert catalog_view.is_view
|
|
453
452
|
super().__init__(catalog_view)
|
|
454
453
|
|
|
455
454
|
def _create_rowid_columns(self) -> list[sql.Column]:
|
|
@@ -492,7 +491,7 @@ class StoreComponentView(StoreView):
|
|
|
492
491
|
def create_sa_tbl(self) -> None:
|
|
493
492
|
super().create_sa_tbl()
|
|
494
493
|
# we need to fix up the 'pos' column in TableVersion
|
|
495
|
-
self.tbl_version.cols_by_name['pos'].sa_col = self.pos_col
|
|
494
|
+
self.tbl_version.get().cols_by_name['pos'].sa_col = self.pos_col
|
|
496
495
|
|
|
497
496
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
498
497
|
return sql.and_(
|
pixeltable/type_system.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
|
8
8
|
import typing
|
|
9
9
|
import urllib.parse
|
|
10
10
|
import urllib.request
|
|
11
|
-
from typing import Any, Iterable, Literal, Mapping, Optional, Sequence, Union
|
|
11
|
+
from typing import Any, ClassVar, Iterable, Literal, Mapping, Optional, Sequence, Union
|
|
12
12
|
|
|
13
13
|
import av
|
|
14
14
|
import jsonschema
|
|
@@ -81,9 +81,9 @@ class ColumnType:
|
|
|
81
81
|
FLOAT32 = (10,)
|
|
82
82
|
FLOAT64 = 11
|
|
83
83
|
|
|
84
|
-
scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
|
|
85
|
-
numeric_types = {Type.INT, Type.FLOAT}
|
|
86
|
-
common_supertypes: dict[tuple[Type, Type], Type] = {
|
|
84
|
+
scalar_types: ClassVar[set[Type]] = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
|
|
85
|
+
numeric_types: ClassVar[set[Type]] = {Type.INT, Type.FLOAT}
|
|
86
|
+
common_supertypes: ClassVar[dict[tuple[Type, Type], Type]] = {
|
|
87
87
|
(Type.BOOL, Type.INT): Type.INT,
|
|
88
88
|
(Type.BOOL, Type.FLOAT): Type.FLOAT,
|
|
89
89
|
(Type.INT, Type.FLOAT): Type.FLOAT,
|
|
@@ -150,7 +150,7 @@ class ColumnType:
|
|
|
150
150
|
|
|
151
151
|
@classmethod
|
|
152
152
|
def make_type(cls, t: Type) -> ColumnType:
|
|
153
|
-
assert t != cls.Type.INVALID
|
|
153
|
+
assert t != cls.Type.INVALID
|
|
154
154
|
if t == cls.Type.STRING:
|
|
155
155
|
return StringType()
|
|
156
156
|
if t == cls.Type.INT:
|
|
@@ -161,6 +161,8 @@ class ColumnType:
|
|
|
161
161
|
return BoolType()
|
|
162
162
|
if t == cls.Type.TIMESTAMP:
|
|
163
163
|
return TimestampType()
|
|
164
|
+
if t == cls.Type.ARRAY:
|
|
165
|
+
return ArrayType()
|
|
164
166
|
if t == cls.Type.JSON:
|
|
165
167
|
return JsonType()
|
|
166
168
|
if t == cls.Type.IMAGE:
|
|
@@ -364,7 +366,7 @@ class ColumnType:
|
|
|
364
366
|
cls.__raise_exc_for_invalid_type(t)
|
|
365
367
|
return col_type
|
|
366
368
|
|
|
367
|
-
__TYPE_SUGGESTIONS: list[tuple[type, str]] = [
|
|
369
|
+
__TYPE_SUGGESTIONS: ClassVar[list[tuple[type, str]]] = [
|
|
368
370
|
(str, 'pxt.String'),
|
|
369
371
|
(bool, 'pxt.Bool'),
|
|
370
372
|
(int, 'pxt.Int'),
|
|
@@ -405,9 +407,8 @@ class ColumnType:
|
|
|
405
407
|
path = parse_local_file_path(val)
|
|
406
408
|
if path is not None and not path.is_file():
|
|
407
409
|
raise TypeError(f'File not found: {path}')
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
raise TypeError(f'expected file path or bytes, got {type(val)}')
|
|
410
|
+
elif not isinstance(val, bytes):
|
|
411
|
+
raise TypeError(f'expected file path or bytes, got {type(val)}')
|
|
411
412
|
|
|
412
413
|
@abc.abstractmethod
|
|
413
414
|
def _validate_literal(self, val: Any) -> None:
|
|
@@ -475,12 +476,12 @@ class ColumnType:
|
|
|
475
476
|
# types that refer to external media files
|
|
476
477
|
return self.is_image_type() or self.is_video_type() or self.is_audio_type() or self.is_document_type()
|
|
477
478
|
|
|
479
|
+
@classmethod
|
|
478
480
|
@abc.abstractmethod
|
|
479
|
-
def to_sa_type(
|
|
481
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
480
482
|
"""
|
|
481
483
|
Return corresponding SQLAlchemy type.
|
|
482
484
|
"""
|
|
483
|
-
pass
|
|
484
485
|
|
|
485
486
|
def to_json_schema(self) -> dict[str, Any]:
|
|
486
487
|
if self.nullable:
|
|
@@ -496,14 +497,15 @@ class InvalidType(ColumnType):
|
|
|
496
497
|
def __init__(self, nullable: bool = False):
|
|
497
498
|
super().__init__(self.Type.INVALID, nullable=nullable)
|
|
498
499
|
|
|
499
|
-
|
|
500
|
+
@classmethod
|
|
501
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
500
502
|
return sql.types.NullType()
|
|
501
503
|
|
|
502
504
|
def print_value(self, val: Any) -> str:
|
|
503
505
|
return str(val)
|
|
504
506
|
|
|
505
507
|
def _validate_literal(self, val: Any) -> None:
|
|
506
|
-
|
|
508
|
+
raise AssertionError()
|
|
507
509
|
|
|
508
510
|
|
|
509
511
|
class StringType(ColumnType):
|
|
@@ -513,7 +515,8 @@ class StringType(ColumnType):
|
|
|
513
515
|
def has_supertype(self):
|
|
514
516
|
return not self.nullable
|
|
515
517
|
|
|
516
|
-
|
|
518
|
+
@classmethod
|
|
519
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
517
520
|
return sql.String()
|
|
518
521
|
|
|
519
522
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
@@ -539,7 +542,8 @@ class IntType(ColumnType):
|
|
|
539
542
|
def __init__(self, nullable: bool = False):
|
|
540
543
|
super().__init__(self.Type.INT, nullable=nullable)
|
|
541
544
|
|
|
542
|
-
|
|
545
|
+
@classmethod
|
|
546
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
543
547
|
return sql.BigInteger()
|
|
544
548
|
|
|
545
549
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
@@ -556,7 +560,8 @@ class FloatType(ColumnType):
|
|
|
556
560
|
def __init__(self, nullable: bool = False):
|
|
557
561
|
super().__init__(self.Type.FLOAT, nullable=nullable)
|
|
558
562
|
|
|
559
|
-
|
|
563
|
+
@classmethod
|
|
564
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
560
565
|
return sql.Float()
|
|
561
566
|
|
|
562
567
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
@@ -576,7 +581,8 @@ class BoolType(ColumnType):
|
|
|
576
581
|
def __init__(self, nullable: bool = False):
|
|
577
582
|
super().__init__(self.Type.BOOL, nullable=nullable)
|
|
578
583
|
|
|
579
|
-
|
|
584
|
+
@classmethod
|
|
585
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
580
586
|
return sql.Boolean()
|
|
581
587
|
|
|
582
588
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
@@ -599,7 +605,8 @@ class TimestampType(ColumnType):
|
|
|
599
605
|
def has_supertype(self):
|
|
600
606
|
return not self.nullable
|
|
601
607
|
|
|
602
|
-
|
|
608
|
+
@classmethod
|
|
609
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
603
610
|
return sql.TIMESTAMP(timezone=True)
|
|
604
611
|
|
|
605
612
|
def _validate_literal(self, val: Any) -> None:
|
|
@@ -644,7 +651,8 @@ class JsonType(ColumnType):
|
|
|
644
651
|
def _from_dict(cls, d: dict) -> ColumnType:
|
|
645
652
|
return cls(json_schema=d.get('json_schema'), nullable=d['nullable'])
|
|
646
653
|
|
|
647
|
-
|
|
654
|
+
@classmethod
|
|
655
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
648
656
|
return sql.dialects.postgresql.JSONB()
|
|
649
657
|
|
|
650
658
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
@@ -760,7 +768,7 @@ class JsonType(ColumnType):
|
|
|
760
768
|
a_type = a.get('type')
|
|
761
769
|
b_type = b.get('type')
|
|
762
770
|
|
|
763
|
-
if a_type in
|
|
771
|
+
if a_type in {'string', 'integer', 'number', 'boolean', 'object', 'array'} and a_type == b_type:
|
|
764
772
|
# a and b both have the same type designation, but are not identical. This can happen if
|
|
765
773
|
# (for example) they have validators or other attributes that differ. In this case, we
|
|
766
774
|
# generalize to {'type': t}, where t is their shared type, with no other qualifications.
|
|
@@ -904,7 +912,7 @@ class ArrayType(ColumnType):
|
|
|
904
912
|
# determine our dtype
|
|
905
913
|
assert isinstance(val, np.ndarray)
|
|
906
914
|
pxttype: Optional[ColumnType] = cls.from_np_dtype(val.dtype, nullable)
|
|
907
|
-
if pxttype
|
|
915
|
+
if pxttype is None:
|
|
908
916
|
return None
|
|
909
917
|
return cls(val.shape, dtype=pxttype, nullable=nullable)
|
|
910
918
|
|
|
@@ -962,7 +970,8 @@ class ArrayType(ColumnType):
|
|
|
962
970
|
return np.array(val, dtype=self.numpy_dtype())
|
|
963
971
|
return val
|
|
964
972
|
|
|
965
|
-
|
|
973
|
+
@classmethod
|
|
974
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
966
975
|
return sql.LargeBinary()
|
|
967
976
|
|
|
968
977
|
def numpy_dtype(self) -> Optional[np.dtype]:
|
|
@@ -976,7 +985,7 @@ class ArrayType(ColumnType):
|
|
|
976
985
|
return np.dtype(np.bool_)
|
|
977
986
|
if self.dtype == self.Type.STRING:
|
|
978
987
|
return np.dtype(np.str_)
|
|
979
|
-
|
|
988
|
+
raise AssertionError(self.dtype)
|
|
980
989
|
|
|
981
990
|
|
|
982
991
|
class ImageType(ColumnType):
|
|
@@ -1060,7 +1069,8 @@ class ImageType(ColumnType):
|
|
|
1060
1069
|
assert 'mode' in d
|
|
1061
1070
|
return cls(width=d['width'], height=d['height'], mode=d['mode'], nullable=d['nullable'])
|
|
1062
1071
|
|
|
1063
|
-
|
|
1072
|
+
@classmethod
|
|
1073
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
1064
1074
|
return sql.String()
|
|
1065
1075
|
|
|
1066
1076
|
def _create_literal(self, val: Any) -> Any:
|
|
@@ -1094,7 +1104,8 @@ class VideoType(ColumnType):
|
|
|
1094
1104
|
def __init__(self, nullable: bool = False):
|
|
1095
1105
|
super().__init__(self.Type.VIDEO, nullable=nullable)
|
|
1096
1106
|
|
|
1097
|
-
|
|
1107
|
+
@classmethod
|
|
1108
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
1098
1109
|
# stored as a file path
|
|
1099
1110
|
return sql.String()
|
|
1100
1111
|
|
|
@@ -1126,7 +1137,8 @@ class AudioType(ColumnType):
|
|
|
1126
1137
|
def __init__(self, nullable: bool = False):
|
|
1127
1138
|
super().__init__(self.Type.AUDIO, nullable=nullable)
|
|
1128
1139
|
|
|
1129
|
-
|
|
1140
|
+
@classmethod
|
|
1141
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
1130
1142
|
# stored as a file path
|
|
1131
1143
|
return sql.String()
|
|
1132
1144
|
|
|
@@ -1168,7 +1180,7 @@ class DocumentType(ColumnType):
|
|
|
1168
1180
|
raise ValueError(f'Invalid document type: {type_str}')
|
|
1169
1181
|
self._doc_formats = [self.DocumentFormat[type_str.upper()] for type_str in type_strs]
|
|
1170
1182
|
else:
|
|
1171
|
-
self._doc_formats =
|
|
1183
|
+
self._doc_formats = list(self.DocumentFormat)
|
|
1172
1184
|
|
|
1173
1185
|
def copy(self, nullable: bool) -> ColumnType:
|
|
1174
1186
|
return DocumentType(doc_formats=self.doc_formats, nullable=nullable)
|
|
@@ -1179,7 +1191,8 @@ class DocumentType(ColumnType):
|
|
|
1179
1191
|
def __hash__(self) -> int:
|
|
1180
1192
|
return hash((self._type, self.nullable, self._doc_formats))
|
|
1181
1193
|
|
|
1182
|
-
|
|
1194
|
+
@classmethod
|
|
1195
|
+
def to_sa_type(cls) -> sql.types.TypeEngine:
|
|
1183
1196
|
# stored as a file path
|
|
1184
1197
|
return sql.String()
|
|
1185
1198
|
|
pixeltable/utils/coco.py
CHANGED
|
@@ -103,7 +103,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
103
103
|
# create annotation records for this image
|
|
104
104
|
for annotation in input_dict['annotations']:
|
|
105
105
|
ann_id += 1
|
|
106
|
-
|
|
106
|
+
_, _, w, h = annotation['bbox']
|
|
107
107
|
category = annotation['category']
|
|
108
108
|
categories.add(category)
|
|
109
109
|
annotations.append(
|
|
@@ -119,7 +119,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
119
119
|
)
|
|
120
120
|
|
|
121
121
|
# replace category names with ids
|
|
122
|
-
category_ids = {category: id for id, category in enumerate(sorted(
|
|
122
|
+
category_ids = {category: id for id, category in enumerate(sorted(categories))}
|
|
123
123
|
for annotation in annotations:
|
|
124
124
|
annotation['category_id'] = category_ids[annotation['category_id']]
|
|
125
125
|
|
|
@@ -129,8 +129,8 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
129
129
|
'categories': [{'id': id, 'name': category} for category, id in category_ids.items()],
|
|
130
130
|
}
|
|
131
131
|
output_path = dest_path / 'data.json'
|
|
132
|
-
with open(output_path, 'w') as
|
|
133
|
-
json.dump(result,
|
|
132
|
+
with open(output_path, 'w', encoding='utf-8') as fp:
|
|
133
|
+
json.dump(result, fp)
|
|
134
134
|
return output_path
|
|
135
135
|
|
|
136
136
|
|
|
@@ -34,9 +34,7 @@ class ConsoleOutputHandler(logging.StreamHandler):
|
|
|
34
34
|
|
|
35
35
|
class ConsoleMessageFilter(logging.Filter):
|
|
36
36
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
37
|
-
|
|
38
|
-
return True
|
|
39
|
-
return False
|
|
37
|
+
return getattr(record, 'user_visible', False)
|
|
40
38
|
|
|
41
39
|
|
|
42
40
|
class ConsoleLogger(logging.LoggerAdapter):
|
|
@@ -80,7 +80,7 @@ class DescriptionHelper:
|
|
|
80
80
|
if styler is None:
|
|
81
81
|
styler = descriptor.body.style
|
|
82
82
|
styler = styler.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'}).set_table_styles(
|
|
83
|
-
[
|
|
83
|
+
[{'selector': 'th', 'props': [('text-align', 'left')]}]
|
|
84
84
|
)
|
|
85
85
|
if not descriptor.show_header:
|
|
86
86
|
styler = styler.hide(axis='columns')
|
pixeltable/utils/documents.py
CHANGED
|
@@ -95,8 +95,8 @@ def get_markdown_handle(path: str) -> Optional[dict]:
|
|
|
95
95
|
|
|
96
96
|
def get_txt(path: str) -> Optional[str]:
|
|
97
97
|
try:
|
|
98
|
-
with open(path, 'r') as
|
|
99
|
-
doc =
|
|
100
|
-
return doc
|
|
98
|
+
with open(path, 'r', encoding='utf-8') as fp:
|
|
99
|
+
doc = fp.read()
|
|
100
|
+
return doc or None # replace '' with None
|
|
101
101
|
except Exception:
|
|
102
102
|
return None
|
pixeltable/utils/filecache.py
CHANGED
|
@@ -5,14 +5,15 @@ import hashlib
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import warnings
|
|
8
|
-
from collections import OrderedDict, defaultdict
|
|
8
|
+
from collections import OrderedDict, defaultdict
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Optional
|
|
12
|
+
from typing import NamedTuple, Optional
|
|
13
13
|
from uuid import UUID
|
|
14
14
|
|
|
15
15
|
import pixeltable.exceptions as excs
|
|
16
|
+
from pixeltable.config import Config
|
|
16
17
|
from pixeltable.env import Env
|
|
17
18
|
|
|
18
19
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -78,10 +79,18 @@ class FileCache:
|
|
|
78
79
|
evicted_working_set_keys: set[str]
|
|
79
80
|
new_redownload_witnessed: bool # whether a new re-download has occurred since the last time a warning was issued
|
|
80
81
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
class FileCacheColumnStats(NamedTuple):
|
|
83
|
+
tbl_id: UUID
|
|
84
|
+
col_id: int
|
|
85
|
+
num_files: int
|
|
86
|
+
total_size: int
|
|
87
|
+
|
|
88
|
+
class FileCacheStats(NamedTuple):
|
|
89
|
+
total_size: int
|
|
90
|
+
num_requests: int
|
|
91
|
+
num_hits: int
|
|
92
|
+
num_evictions: int
|
|
93
|
+
column_stats: list[FileCache.FileCacheColumnStats]
|
|
85
94
|
|
|
86
95
|
@classmethod
|
|
87
96
|
def get(cls) -> FileCache:
|
|
@@ -127,7 +136,8 @@ class FileCache:
|
|
|
127
136
|
For testing purposes: allow resetting capacity and stats.
|
|
128
137
|
"""
|
|
129
138
|
if tbl_id is None:
|
|
130
|
-
# We need to store the entries to remove in a list, because we can't remove items from a dict
|
|
139
|
+
# We need to store the entries to remove in a list, because we can't remove items from a dict
|
|
140
|
+
# while iterating
|
|
131
141
|
entries_to_remove = list(self.cache.values())
|
|
132
142
|
_logger.debug(f'clearing {self.num_files()} entries from file cache')
|
|
133
143
|
self.num_requests, self.num_hits, self.num_evictions = 0, 0, 0
|
|
@@ -153,8 +163,9 @@ class FileCache:
|
|
|
153
163
|
f'of the evicted file(s) is {round(extra_capacity_needed / (1 << 30), 1)} GiB.\n'
|
|
154
164
|
f'Consider increasing the cache size to at least {round(suggested_cache_size / (1 << 30), 1)} GiB '
|
|
155
165
|
f'(it is currently {round(self.capacity_bytes / (1 << 30), 1)} GiB).\n'
|
|
156
|
-
f'You can do this by setting the value of `file_cache_size_g` in: {
|
|
166
|
+
f'You can do this by setting the value of `file_cache_size_g` in: {Config.get().config_file}',
|
|
157
167
|
excs.PixeltableWarning,
|
|
168
|
+
stacklevel=2,
|
|
158
169
|
)
|
|
159
170
|
self.new_redownload_witnessed = False
|
|
160
171
|
|
|
@@ -232,7 +243,7 @@ class FileCache:
|
|
|
232
243
|
# (tbl_id, col_id) -> (num_files, total_size)
|
|
233
244
|
d: dict[tuple[UUID, int], list[int]] = defaultdict(lambda: [0, 0])
|
|
234
245
|
for entry in self.cache.values():
|
|
235
|
-
t = d[
|
|
246
|
+
t = d[entry.tbl_id, entry.col_id]
|
|
236
247
|
t[0] += 1
|
|
237
248
|
t[1] += entry.size
|
|
238
249
|
col_stats = [
|
pixeltable/utils/formatter.py
CHANGED
|
@@ -8,8 +8,7 @@ from typing import Any, Callable, Optional
|
|
|
8
8
|
|
|
9
9
|
import av
|
|
10
10
|
import numpy as np
|
|
11
|
-
import
|
|
12
|
-
import PIL.Image as Image
|
|
11
|
+
from PIL import Image
|
|
13
12
|
|
|
14
13
|
import pixeltable.type_system as ts
|
|
15
14
|
from pixeltable.utils.http_server import get_file_uri
|
|
@@ -213,7 +212,7 @@ class Formatter:
|
|
|
213
212
|
inner_element = f"""
|
|
214
213
|
<img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
|
|
215
214
|
"""
|
|
216
|
-
except:
|
|
215
|
+
except Exception:
|
|
217
216
|
logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
|
|
218
217
|
|
|
219
218
|
return f"""
|
pixeltable/utils/media_store.py
CHANGED
|
@@ -69,7 +69,7 @@ class MediaStore:
|
|
|
69
69
|
assert matched is not None
|
|
70
70
|
tbl_id, col_id = UUID(hex=matched[1]), int(matched[2])
|
|
71
71
|
file_info = os.stat(p)
|
|
72
|
-
t = d[
|
|
72
|
+
t = d[tbl_id, col_id]
|
|
73
73
|
t[0] += 1
|
|
74
74
|
t[1] += file_info.st_size
|
|
75
75
|
result = [(tbl_id, col_id, num_files, size) for (tbl_id, col_id), (num_files, size) in d.items()]
|
pixeltable/utils/pytorch.py
CHANGED
|
@@ -32,7 +32,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
32
32
|
|
|
33
33
|
self.path = path
|
|
34
34
|
self.image_format = image_format
|
|
35
|
-
assert image_format in
|
|
35
|
+
assert image_format in {'np', 'pt'}
|
|
36
36
|
column_type_path = path / '.pixeltable.column_types.json'
|
|
37
37
|
assert column_type_path.exists(), f'missing {column_type_path}'
|
|
38
38
|
with column_type_path.open() as f:
|