pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
pixeltable/dataframe.py
CHANGED
|
@@ -10,13 +10,10 @@ import traceback
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING, Any, AsyncIterator, Callable, Hashable, Iterator, NoReturn, Optional, Sequence, Union
|
|
12
12
|
|
|
13
|
-
import numpy as np
|
|
14
13
|
import pandas as pd
|
|
15
14
|
import sqlalchemy as sql
|
|
16
15
|
|
|
17
|
-
import
|
|
18
|
-
import pixeltable.type_system as ts
|
|
19
|
-
from pixeltable import catalog, exec, exprs, plan
|
|
16
|
+
from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_system as ts
|
|
20
17
|
from pixeltable.catalog import is_valid_identifier
|
|
21
18
|
from pixeltable.catalog.globals import UpdateStatus
|
|
22
19
|
from pixeltable.env import Env
|
|
@@ -80,7 +77,7 @@ class DataFrameResultSet:
|
|
|
80
77
|
if isinstance(index, int):
|
|
81
78
|
return self._row_to_dict(index)
|
|
82
79
|
if isinstance(index, tuple) and len(index) == 2:
|
|
83
|
-
if not isinstance(index[0], int) or not
|
|
80
|
+
if not isinstance(index[0], int) or not isinstance(index[1], (str, int)):
|
|
84
81
|
raise excs.Error(f'Bad index, expected [<row idx>, <column name | column index>]: {index}')
|
|
85
82
|
if isinstance(index[1], str) and index[1] not in self._col_names:
|
|
86
83
|
raise excs.Error(f'Invalid column name: {index[1]}')
|
|
@@ -96,6 +93,9 @@ class DataFrameResultSet:
|
|
|
96
93
|
return False
|
|
97
94
|
return self.to_pandas().equals(other.to_pandas())
|
|
98
95
|
|
|
96
|
+
def __hash__(self):
|
|
97
|
+
return hash(self.to_pandas())
|
|
98
|
+
|
|
99
99
|
|
|
100
100
|
# # TODO: remove this; it's only here as a reminder that we still need to call release() in the current implementation
|
|
101
101
|
# class AnalysisInfo:
|
|
@@ -232,9 +232,8 @@ class DataFrame:
|
|
|
232
232
|
for var in vars:
|
|
233
233
|
if var.name not in unique_vars:
|
|
234
234
|
unique_vars[var.name] = var
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
raise excs.Error(f'Multiple definitions of parameter {var.name}')
|
|
235
|
+
elif unique_vars[var.name].col_type != var.col_type:
|
|
236
|
+
raise excs.Error(f'Multiple definitions of parameter {var.name}')
|
|
238
237
|
return unique_vars
|
|
239
238
|
|
|
240
239
|
def parameters(self) -> dict[str, ColumnType]:
|
|
@@ -242,17 +241,15 @@ class DataFrame:
|
|
|
242
241
|
|
|
243
242
|
Parameters are Variables contained in any component of the DataFrame.
|
|
244
243
|
"""
|
|
245
|
-
|
|
246
|
-
return {name: var.col_type for name, var in vars.items()}
|
|
244
|
+
return {name: var.col_type for name, var in self._vars().items()}
|
|
247
245
|
|
|
248
|
-
def _exec(self
|
|
246
|
+
def _exec(self) -> Iterator[exprs.DataRow]:
|
|
249
247
|
"""Run the query and return rows as a generator.
|
|
250
248
|
This function must not modify the state of the DataFrame, otherwise it breaks dataset caching.
|
|
251
249
|
"""
|
|
252
250
|
plan = self._create_query_plan()
|
|
253
251
|
|
|
254
|
-
def exec_plan(
|
|
255
|
-
plan.ctx.set_conn(conn)
|
|
252
|
+
def exec_plan() -> Iterator[exprs.DataRow]:
|
|
256
253
|
plan.open()
|
|
257
254
|
try:
|
|
258
255
|
for row_batch in plan:
|
|
@@ -260,18 +257,13 @@ class DataFrame:
|
|
|
260
257
|
finally:
|
|
261
258
|
plan.close()
|
|
262
259
|
|
|
263
|
-
|
|
264
|
-
with Env.get().engine.begin() as conn:
|
|
265
|
-
yield from exec_plan(conn)
|
|
266
|
-
else:
|
|
267
|
-
yield from exec_plan(conn)
|
|
260
|
+
yield from exec_plan()
|
|
268
261
|
|
|
269
|
-
async def _aexec(self
|
|
262
|
+
async def _aexec(self) -> AsyncIterator[exprs.DataRow]:
|
|
270
263
|
"""Run the query and return rows as a generator.
|
|
271
264
|
This function must not modify the state of the DataFrame, otherwise it breaks dataset caching.
|
|
272
265
|
"""
|
|
273
266
|
plan = self._create_query_plan()
|
|
274
|
-
plan.ctx.set_conn(conn)
|
|
275
267
|
plan.open()
|
|
276
268
|
try:
|
|
277
269
|
async for row_batch in plan:
|
|
@@ -287,7 +279,7 @@ class DataFrame:
|
|
|
287
279
|
assert self.group_by_clause is None
|
|
288
280
|
num_rowid_cols = len(self.grouping_tbl.store_tbl.rowid_columns())
|
|
289
281
|
# the grouping table must be a base of self.tbl
|
|
290
|
-
assert num_rowid_cols <= len(self._first_tbl.tbl_version.store_tbl.rowid_columns())
|
|
282
|
+
assert num_rowid_cols <= len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
|
|
291
283
|
group_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
|
|
292
284
|
elif self.group_by_clause is not None:
|
|
293
285
|
group_by_clause = self.group_by_clause
|
|
@@ -327,10 +319,10 @@ class DataFrame:
|
|
|
327
319
|
if the DataFrame has an order_by clause.
|
|
328
320
|
"""
|
|
329
321
|
if self.order_by_clause is not None:
|
|
330
|
-
raise excs.Error(
|
|
322
|
+
raise excs.Error('head() cannot be used with order_by()')
|
|
331
323
|
if self._has_joins():
|
|
332
|
-
raise excs.Error(
|
|
333
|
-
num_rowid_cols = len(self._first_tbl.tbl_version.store_tbl.rowid_columns())
|
|
324
|
+
raise excs.Error('head() not supported for joins')
|
|
325
|
+
num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
|
|
334
326
|
order_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
|
|
335
327
|
return self.order_by(*order_by_clause, asc=True).limit(n).collect()
|
|
336
328
|
|
|
@@ -350,10 +342,10 @@ class DataFrame:
|
|
|
350
342
|
if the DataFrame has an order_by clause.
|
|
351
343
|
"""
|
|
352
344
|
if self.order_by_clause is not None:
|
|
353
|
-
raise excs.Error(
|
|
345
|
+
raise excs.Error('tail() cannot be used with order_by()')
|
|
354
346
|
if self._has_joins():
|
|
355
|
-
raise excs.Error(
|
|
356
|
-
num_rowid_cols = len(self._first_tbl.tbl_version.store_tbl.rowid_columns())
|
|
347
|
+
raise excs.Error('tail() not supported for joins')
|
|
348
|
+
num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
|
|
357
349
|
order_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
|
|
358
350
|
result = self.order_by(*order_by_clause, asc=False).limit(n).collect()
|
|
359
351
|
result._reverse()
|
|
@@ -418,7 +410,7 @@ class DataFrame:
|
|
|
418
410
|
)
|
|
419
411
|
|
|
420
412
|
def _raise_expr_eval_err(self, e: excs.ExprEvalError) -> NoReturn:
|
|
421
|
-
msg = f'In row {e.row_num} the {e.expr_msg} encountered exception {type(e.exc).__name__}:\n{
|
|
413
|
+
msg = f'In row {e.row_num} the {e.expr_msg} encountered exception {type(e.exc).__name__}:\n{e.exc}'
|
|
422
414
|
if len(e.input_vals) > 0:
|
|
423
415
|
input_msgs = [
|
|
424
416
|
f"'{d}' = {d.col_type.print_value(e.input_vals[i])}" for i, d in enumerate(e.expr.dependencies())
|
|
@@ -434,29 +426,27 @@ class DataFrame:
|
|
|
434
426
|
msg += f'\nStack:\n{nl.join(stack_trace[-1:1:-1])}'
|
|
435
427
|
raise excs.Error(msg) from e
|
|
436
428
|
|
|
437
|
-
def _output_row_iterator(self
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
429
|
+
def _output_row_iterator(self) -> Iterator[list]:
|
|
430
|
+
with Env.get().begin_xact():
|
|
431
|
+
try:
|
|
432
|
+
for data_row in self._exec():
|
|
433
|
+
yield [data_row[e.slot_idx] for e in self._select_list_exprs]
|
|
434
|
+
except excs.ExprEvalError as e:
|
|
435
|
+
self._raise_expr_eval_err(e)
|
|
436
|
+
except sql.exc.DBAPIError as e:
|
|
437
|
+
raise excs.Error(f'Error during SQL execution:\n{e}') from e
|
|
445
438
|
|
|
446
439
|
def collect(self) -> DataFrameResultSet:
|
|
447
|
-
return self.
|
|
440
|
+
return DataFrameResultSet(list(self._output_row_iterator()), self.schema)
|
|
448
441
|
|
|
449
|
-
def
|
|
450
|
-
return DataFrameResultSet(list(self._output_row_iterator(conn)), self.schema)
|
|
451
|
-
|
|
452
|
-
async def _acollect(self, conn: sql.engine.Connection) -> DataFrameResultSet:
|
|
442
|
+
async def _acollect(self) -> DataFrameResultSet:
|
|
453
443
|
try:
|
|
454
|
-
result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec(
|
|
444
|
+
result = [[row[e.slot_idx] for e in self._select_list_exprs] async for row in self._aexec()]
|
|
455
445
|
return DataFrameResultSet(result, self.schema)
|
|
456
446
|
except excs.ExprEvalError as e:
|
|
457
447
|
self._raise_expr_eval_err(e)
|
|
458
448
|
except sql.exc.DBAPIError as e:
|
|
459
|
-
raise excs.Error(f'Error during SQL execution:\n{e}')
|
|
449
|
+
raise excs.Error(f'Error during SQL execution:\n{e}') from e
|
|
460
450
|
|
|
461
451
|
def count(self) -> int:
|
|
462
452
|
"""Return the number of rows in the DataFrame.
|
|
@@ -467,7 +457,7 @@ class DataFrame:
|
|
|
467
457
|
from pixeltable.plan import Planner
|
|
468
458
|
|
|
469
459
|
stmt = Planner.create_count_stmt(self._first_tbl, self.where_clause)
|
|
470
|
-
with Env.get().
|
|
460
|
+
with Env.get().begin_xact() as conn:
|
|
471
461
|
result: int = conn.execute(stmt).scalar_one()
|
|
472
462
|
assert isinstance(result, int)
|
|
473
463
|
return result
|
|
@@ -567,7 +557,7 @@ class DataFrame:
|
|
|
567
557
|
|
|
568
558
|
"""
|
|
569
559
|
if self.select_list is not None:
|
|
570
|
-
raise excs.Error(
|
|
560
|
+
raise excs.Error('Select list already specified')
|
|
571
561
|
for name, _ in named_items.items():
|
|
572
562
|
if not isinstance(name, str) or not is_valid_identifier(name):
|
|
573
563
|
raise excs.Error(f'Invalid name: {name}')
|
|
@@ -653,7 +643,7 @@ class DataFrame:
|
|
|
653
643
|
) -> exprs.Expr:
|
|
654
644
|
"""Verifies user-specified 'on' argument and converts it into a join predicate."""
|
|
655
645
|
col_refs: list[exprs.ColumnRef] = []
|
|
656
|
-
joined_tbls = self._from_clause.tbls
|
|
646
|
+
joined_tbls = [*self._from_clause.tbls, other]
|
|
657
647
|
|
|
658
648
|
if isinstance(on, exprs.ColumnRef):
|
|
659
649
|
on = [on]
|
|
@@ -663,14 +653,13 @@ class DataFrame:
|
|
|
663
653
|
if not on.col_type.is_bool_type():
|
|
664
654
|
raise excs.Error(f"'on': boolean expression expected, but got {on.col_type}: {on}")
|
|
665
655
|
return on
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
raise excs.Error(f"'on': must be a sequence of column references or a boolean expression")
|
|
656
|
+
elif not isinstance(on, Sequence) or len(on) == 0:
|
|
657
|
+
raise excs.Error("'on': must be a sequence of column references or a boolean expression")
|
|
669
658
|
|
|
670
659
|
assert isinstance(on, Sequence)
|
|
671
660
|
for col_ref in on:
|
|
672
661
|
if not isinstance(col_ref, exprs.ColumnRef):
|
|
673
|
-
raise excs.Error(
|
|
662
|
+
raise excs.Error("'on': must be a sequence of column references or a boolean expression")
|
|
674
663
|
if not col_ref.is_bound_by(joined_tbls):
|
|
675
664
|
raise excs.Error(f"'on': expression cannot be evaluated in the context of the joined tables: {col_ref}")
|
|
676
665
|
col_refs.append(col_ref)
|
|
@@ -765,7 +754,7 @@ class DataFrame:
|
|
|
765
754
|
join_pred: Optional[exprs.Expr]
|
|
766
755
|
if how == 'cross':
|
|
767
756
|
if on is not None:
|
|
768
|
-
raise excs.Error(
|
|
757
|
+
raise excs.Error("'on' not allowed for cross join")
|
|
769
758
|
join_pred = None
|
|
770
759
|
else:
|
|
771
760
|
if on is None:
|
|
@@ -828,20 +817,20 @@ class DataFrame:
|
|
|
828
817
|
>>> df = book.group_by(t.genre).select(t.genre, total=sum(t.price)).show()
|
|
829
818
|
"""
|
|
830
819
|
if self.group_by_clause is not None:
|
|
831
|
-
raise excs.Error(
|
|
820
|
+
raise excs.Error('Group-by already specified')
|
|
832
821
|
grouping_tbl: Optional[catalog.TableVersion] = None
|
|
833
822
|
group_by_clause: Optional[list[exprs.Expr]] = None
|
|
834
823
|
for item in grouping_items:
|
|
835
824
|
if isinstance(item, catalog.Table):
|
|
836
825
|
if len(grouping_items) > 1:
|
|
837
|
-
raise excs.Error(
|
|
826
|
+
raise excs.Error('group_by(): only one table can be specified')
|
|
838
827
|
if len(self._from_clause.tbls) > 1:
|
|
839
|
-
raise excs.Error(
|
|
828
|
+
raise excs.Error('group_by() with Table not supported for joins')
|
|
840
829
|
# we need to make sure that the grouping table is a base of self.tbl
|
|
841
830
|
base = self._first_tbl.find_tbl_version(item._tbl_version_path.tbl_id())
|
|
842
831
|
if base is None or base.id == self._first_tbl.tbl_id():
|
|
843
832
|
raise excs.Error(f'group_by(): {item._name} is not a base table of {self._first_tbl.tbl_name()}')
|
|
844
|
-
grouping_tbl = item._tbl_version_path.tbl_version
|
|
833
|
+
grouping_tbl = item._tbl_version_path.tbl_version.get()
|
|
845
834
|
break
|
|
846
835
|
if not isinstance(item, exprs.Expr):
|
|
847
836
|
raise excs.Error(f'Invalid expression in group_by(): {item}')
|
|
@@ -943,16 +932,19 @@ class DataFrame:
|
|
|
943
932
|
|
|
944
933
|
>>> person = t.select()
|
|
945
934
|
|
|
946
|
-
Via the above DataFrame person, update the column 'city' to 'Oakland'
|
|
935
|
+
Via the above DataFrame person, update the column 'city' to 'Oakland'
|
|
936
|
+
and 'state' to 'CA' in the table t:
|
|
947
937
|
|
|
948
938
|
>>> df = person.update({'city': 'Oakland', 'state': 'CA'})
|
|
949
939
|
|
|
950
|
-
Via the above DataFrame person, update the column 'age' to 30 for any
|
|
940
|
+
Via the above DataFrame person, update the column 'age' to 30 for any
|
|
941
|
+
rows where 'year' is 2014 in the table t:
|
|
951
942
|
|
|
952
943
|
>>> df = person.where(t.year == 2014).update({'age': 30})
|
|
953
944
|
"""
|
|
954
945
|
self._validate_mutable('update', False)
|
|
955
|
-
|
|
946
|
+
with Env.get().begin_xact():
|
|
947
|
+
return self._first_tbl.tbl_version.get().update(value_spec, where=self.where_clause, cascade=cascade)
|
|
956
948
|
|
|
957
949
|
def delete(self) -> UpdateStatus:
|
|
958
950
|
"""Delete rows form the underlying table of the DataFrame.
|
|
@@ -973,8 +965,9 @@ class DataFrame:
|
|
|
973
965
|
"""
|
|
974
966
|
self._validate_mutable('delete', False)
|
|
975
967
|
if not self._first_tbl.is_insertable():
|
|
976
|
-
raise excs.Error(
|
|
977
|
-
|
|
968
|
+
raise excs.Error('Cannot delete from view')
|
|
969
|
+
with Env.get().begin_xact():
|
|
970
|
+
return self._first_tbl.tbl_version.get().delete(where=self.where_clause)
|
|
978
971
|
|
|
979
972
|
def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
|
|
980
973
|
"""Tests whether this DataFrame can be mutated (such as by an update operation).
|
|
@@ -1020,32 +1013,37 @@ class DataFrame:
|
|
|
1020
1013
|
|
|
1021
1014
|
@classmethod
|
|
1022
1015
|
def from_dict(cls, d: dict[str, Any]) -> 'DataFrame':
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
else None
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1016
|
+
# we need to wrap the construction with a transaction, because it might need to load metadata
|
|
1017
|
+
with Env.get().begin_xact():
|
|
1018
|
+
tbls = [catalog.TableVersionPath.from_dict(tbl_dict) for tbl_dict in d['from_clause']['tbls']]
|
|
1019
|
+
join_clauses = [plan.JoinClause(**clause_dict) for clause_dict in d['from_clause']['join_clauses']]
|
|
1020
|
+
from_clause = plan.FromClause(tbls=tbls, join_clauses=join_clauses)
|
|
1021
|
+
select_list = (
|
|
1022
|
+
[(exprs.Expr.from_dict(e), name) for e, name in d['select_list']]
|
|
1023
|
+
if d['select_list'] is not None
|
|
1024
|
+
else None
|
|
1025
|
+
)
|
|
1026
|
+
where_clause = exprs.Expr.from_dict(d['where_clause']) if d['where_clause'] is not None else None
|
|
1027
|
+
group_by_clause = (
|
|
1028
|
+
[exprs.Expr.from_dict(e) for e in d['group_by_clause']] if d['group_by_clause'] is not None else None
|
|
1029
|
+
)
|
|
1030
|
+
grouping_tbl = catalog.TableVersion.from_dict(d['grouping_tbl']) if d['grouping_tbl'] is not None else None
|
|
1031
|
+
order_by_clause = (
|
|
1032
|
+
[(exprs.Expr.from_dict(e), asc) for e, asc in d['order_by_clause']]
|
|
1033
|
+
if d['order_by_clause'] is not None
|
|
1034
|
+
else None
|
|
1035
|
+
)
|
|
1036
|
+
limit_val = exprs.Expr.from_dict(d['limit_val']) if d['limit_val'] is not None else None
|
|
1037
|
+
|
|
1038
|
+
return DataFrame(
|
|
1039
|
+
from_clause=from_clause,
|
|
1040
|
+
select_list=select_list,
|
|
1041
|
+
where_clause=where_clause,
|
|
1042
|
+
group_by_clause=group_by_clause,
|
|
1043
|
+
grouping_tbl=grouping_tbl,
|
|
1044
|
+
order_by_clause=order_by_clause,
|
|
1045
|
+
limit=limit_val,
|
|
1046
|
+
)
|
|
1049
1047
|
|
|
1050
1048
|
def _hash_result_set(self) -> str:
|
|
1051
1049
|
"""Return a hash that changes when the result set changes."""
|
|
@@ -1053,7 +1051,7 @@ class DataFrame:
|
|
|
1053
1051
|
# add list of referenced table versions (the actual versions, not the effective ones) in order to force cache
|
|
1054
1052
|
# invalidation when any of the referenced tables changes
|
|
1055
1053
|
d['tbl_versions'] = [
|
|
1056
|
-
tbl_version.version for tbl in self._from_clause.tbls for tbl_version in tbl.get_tbl_versions()
|
|
1054
|
+
tbl_version.get().version for tbl in self._from_clause.tbls for tbl_version in tbl.get_tbl_versions()
|
|
1057
1055
|
]
|
|
1058
1056
|
summary_string = json.dumps(d)
|
|
1059
1057
|
return hashlib.sha256(summary_string.encode()).hexdigest()
|
|
@@ -1086,7 +1084,8 @@ class DataFrame:
|
|
|
1086
1084
|
assert data_file_path.is_file()
|
|
1087
1085
|
return data_file_path
|
|
1088
1086
|
else:
|
|
1089
|
-
|
|
1087
|
+
with Env.get().begin_xact():
|
|
1088
|
+
return write_coco_dataset(self, dest_path)
|
|
1090
1089
|
|
|
1091
1090
|
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
1092
1091
|
"""
|
|
@@ -1130,6 +1129,7 @@ class DataFrame:
|
|
|
1130
1129
|
if dest_path.exists(): # fast path: use cache
|
|
1131
1130
|
assert dest_path.is_dir()
|
|
1132
1131
|
else:
|
|
1133
|
-
|
|
1132
|
+
with Env.get().begin_xact():
|
|
1133
|
+
export_parquet(self, dest_path, inline_images=True)
|
|
1134
1134
|
|
|
1135
1135
|
return PixeltablePytorchDataset(path=dest_path, image_format=image_format)
|