pixeltable 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +6 -1
- pixeltable/catalog/catalog.py +107 -45
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +5 -0
- pixeltable/catalog/table_version.py +100 -106
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/update_status.py +12 -0
- pixeltable/config.py +6 -0
- pixeltable/dataframe.py +11 -5
- pixeltable/env.py +52 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +14 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +10 -11
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +123 -9
- pixeltable/functions/whisperx.py +2 -0
- pixeltable/functions/yolox.py +2 -0
- pixeltable/globals.py +56 -31
- pixeltable/io/__init__.py +1 -0
- pixeltable/io/globals.py +16 -15
- pixeltable/io/table_data_conduit.py +46 -21
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/publish.py +0 -1
- pixeltable/store.py +2 -2
- pixeltable/type_system.py +5 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/METADATA +2 -1
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/RECORD +64 -57
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.14.dist-info → pixeltable-0.4.16.dist-info}/licenses/LICENSE +0 -0
pixeltable/plan.py
CHANGED
|
@@ -3,9 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import enum
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import Any, Iterable, Literal, Optional, Sequence
|
|
6
|
+
from typing import Any, Iterable, Literal, Optional, Sequence, cast
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
|
+
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
9
10
|
import sqlalchemy as sql
|
|
10
11
|
|
|
11
12
|
import pixeltable as pxt
|
|
@@ -385,7 +386,7 @@ class Planner:
|
|
|
385
386
|
TableVersionHandle(tbl.id, tbl.effective_version), rows, row_builder, tbl.next_row_id
|
|
386
387
|
)
|
|
387
388
|
|
|
388
|
-
plan = cls.
|
|
389
|
+
plan = cls._add_prefetch_node(tbl.id, row_builder.input_exprs, input_node=plan)
|
|
389
390
|
|
|
390
391
|
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
391
392
|
if len(computed_exprs) > 0:
|
|
@@ -393,6 +394,8 @@ class Planner:
|
|
|
393
394
|
plan = exec.ExprEvalNode(
|
|
394
395
|
row_builder, computed_exprs, plan.output_exprs, input=plan, maintain_input_order=False
|
|
395
396
|
)
|
|
397
|
+
if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
|
|
398
|
+
plan = exec.CellMaterializationNode(plan)
|
|
396
399
|
|
|
397
400
|
plan.set_ctx(
|
|
398
401
|
exec.ExecContext(
|
|
@@ -403,7 +406,7 @@ class Planner:
|
|
|
403
406
|
ignore_errors=ignore_errors,
|
|
404
407
|
)
|
|
405
408
|
)
|
|
406
|
-
plan = cls.
|
|
409
|
+
plan = cls._add_save_node(plan)
|
|
407
410
|
|
|
408
411
|
return plan
|
|
409
412
|
|
|
@@ -422,10 +425,17 @@ class Planner:
|
|
|
422
425
|
plan = df._create_query_plan() # ExecNode constructed by the DataFrame
|
|
423
426
|
|
|
424
427
|
# Modify the plan RowBuilder to register the output columns
|
|
428
|
+
needs_cell_materialization = False
|
|
425
429
|
for col_name, expr in zip(df.schema.keys(), df._select_list_exprs):
|
|
426
430
|
assert col_name in tbl.cols_by_name
|
|
427
431
|
col = tbl.cols_by_name[col_name]
|
|
428
432
|
plan.row_builder.add_table_column(col, expr.slot_idx)
|
|
433
|
+
needs_cell_materialization = (
|
|
434
|
+
needs_cell_materialization or col.col_type.is_json_type() or col.col_type.is_array_type()
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
if needs_cell_materialization:
|
|
438
|
+
plan = exec.CellMaterializationNode(plan)
|
|
429
439
|
|
|
430
440
|
plan.set_ctx(
|
|
431
441
|
exec.ExecContext(
|
|
@@ -446,12 +456,14 @@ class Planner:
|
|
|
446
456
|
cascade: bool,
|
|
447
457
|
) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
|
|
448
458
|
"""Creates a plan to materialize updated rows.
|
|
459
|
+
|
|
449
460
|
The plan:
|
|
450
461
|
- retrieves rows that are visible at the current version of the table
|
|
451
462
|
- materializes all stored columns and the update targets
|
|
452
463
|
- if cascade is True, recomputes all computed columns that transitively depend on the updated columns
|
|
453
464
|
and copies the values of all other stored columns
|
|
454
465
|
- if cascade is False, copies all columns that aren't update targets from the original rows
|
|
466
|
+
|
|
455
467
|
Returns:
|
|
456
468
|
- root node of the plan
|
|
457
469
|
- list of qualified column names that are getting updated
|
|
@@ -477,14 +489,16 @@ class Planner:
|
|
|
477
489
|
|
|
478
490
|
cls.__check_valid_columns(tbl.tbl_version.get(), recomputed_cols, 'updated in')
|
|
479
491
|
|
|
492
|
+
# our query plan
|
|
493
|
+
# - evaluates the update targets and recomputed columns
|
|
494
|
+
# - copies all other stored columns
|
|
480
495
|
recomputed_base_cols = {col for col in recomputed_cols if col.tbl.id == tbl.tbl_version.id}
|
|
481
496
|
copied_cols = [
|
|
482
497
|
col
|
|
483
498
|
for col in target.cols_by_id.values()
|
|
484
499
|
if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
|
|
485
500
|
]
|
|
486
|
-
select_list: list[exprs.Expr] =
|
|
487
|
-
select_list.extend(update_targets.values())
|
|
501
|
+
select_list: list[exprs.Expr] = list(update_targets.values())
|
|
488
502
|
|
|
489
503
|
recomputed_exprs = [
|
|
490
504
|
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
|
|
@@ -495,14 +509,22 @@ class Planner:
|
|
|
495
509
|
select_list.extend(recomputed_exprs)
|
|
496
510
|
|
|
497
511
|
# we need to retrieve the PK columns of the existing rows
|
|
498
|
-
plan = cls.create_query_plan(
|
|
499
|
-
|
|
512
|
+
plan = cls.create_query_plan(
|
|
513
|
+
FromClause(tbls=[tbl]),
|
|
514
|
+
select_list=select_list,
|
|
515
|
+
columns=copied_cols,
|
|
516
|
+
where_clause=where_clause,
|
|
517
|
+
ignore_errors=True,
|
|
518
|
+
)
|
|
519
|
+
evaluated_cols = updated_cols + list(recomputed_base_cols) # same order as select_list
|
|
500
520
|
# update row builder with column information
|
|
501
|
-
|
|
521
|
+
plan.row_builder.add_table_columns(copied_cols)
|
|
522
|
+
for i, col in enumerate(evaluated_cols):
|
|
502
523
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
503
524
|
plan.ctx.num_computed_exprs = len(recomputed_exprs)
|
|
504
525
|
|
|
505
|
-
plan = cls.
|
|
526
|
+
plan = cls._add_cell_materialization_node(plan)
|
|
527
|
+
plan = cls._add_save_node(plan)
|
|
506
528
|
|
|
507
529
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
508
530
|
return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
|
|
@@ -525,6 +547,79 @@ class Planner:
|
|
|
525
547
|
.format(validation_error=col.value_expr.validation_error)
|
|
526
548
|
)
|
|
527
549
|
|
|
550
|
+
@classmethod
|
|
551
|
+
def _cell_md_col_refs(cls, expr_list: Iterable[exprs.Expr]) -> list[exprs.ColumnRef]:
|
|
552
|
+
"""Return list of ColumnRefs that need their cellmd values for reconstruction"""
|
|
553
|
+
json_col_refs = list(
|
|
554
|
+
exprs.Expr.list_subexprs(
|
|
555
|
+
expr_list,
|
|
556
|
+
expr_class=exprs.ColumnRef,
|
|
557
|
+
filter=lambda e: cast(exprs.ColumnRef, e).col.col_type.is_json_type(),
|
|
558
|
+
traverse_matches=False,
|
|
559
|
+
)
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
def needs_reconstruction(e: exprs.Expr) -> bool:
|
|
563
|
+
assert isinstance(e, exprs.ColumnRef)
|
|
564
|
+
# Vector-typed array columns are used for vector indexes, and are stored in the db
|
|
565
|
+
return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
|
|
566
|
+
|
|
567
|
+
array_col_refs = list(
|
|
568
|
+
exprs.Expr.list_subexprs(
|
|
569
|
+
expr_list, expr_class=exprs.ColumnRef, filter=needs_reconstruction, traverse_matches=False
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return json_col_refs + array_col_refs
|
|
574
|
+
|
|
575
|
+
@classmethod
|
|
576
|
+
def _add_cell_materialization_node(cls, input: exec.ExecNode) -> exec.ExecNode:
|
|
577
|
+
# we need a CellMaterializationNode if any of the evaluated output columns are json or array-typed
|
|
578
|
+
has_target_cols = any(
|
|
579
|
+
col.col_type.is_json_type() or col.col_type.is_array_type()
|
|
580
|
+
for col, slot_idx in input.row_builder.table_columns.items()
|
|
581
|
+
if slot_idx is not None
|
|
582
|
+
)
|
|
583
|
+
if has_target_cols:
|
|
584
|
+
return exec.CellMaterializationNode(input)
|
|
585
|
+
else:
|
|
586
|
+
return input
|
|
587
|
+
|
|
588
|
+
@classmethod
|
|
589
|
+
def _add_cell_reconstruction_node(cls, expr_list: list[exprs.Expr], input: exec.ExecNode) -> exec.ExecNode:
|
|
590
|
+
"""
|
|
591
|
+
Add a CellReconstructionNode, if required by any of the exprs in expr_list.
|
|
592
|
+
|
|
593
|
+
Cell reconstruction is required for
|
|
594
|
+
1) all json-typed ColumnRefs that are not used as part of a JsonPath (the latter does its own reconstruction)
|
|
595
|
+
or as part of a ColumnPropertyRef
|
|
596
|
+
2) all array-typed ColumnRefs that are not used as part of a ColumnPropertyRef
|
|
597
|
+
"""
|
|
598
|
+
|
|
599
|
+
def json_filter(e: exprs.Expr) -> bool:
|
|
600
|
+
if isinstance(e, exprs.JsonPath):
|
|
601
|
+
return not e.is_relative_path() and isinstance(e.anchor, exprs.ColumnRef)
|
|
602
|
+
if isinstance(e, exprs.ColumnPropertyRef):
|
|
603
|
+
return e.col_ref.col.col_type.is_json_type()
|
|
604
|
+
return isinstance(e, exprs.ColumnRef) and e.col.col_type.is_json_type()
|
|
605
|
+
|
|
606
|
+
def array_filter(e: exprs.Expr) -> bool:
|
|
607
|
+
if isinstance(e, exprs.ColumnPropertyRef):
|
|
608
|
+
return e.col_ref.col.col_type.is_array_type()
|
|
609
|
+
if not isinstance(e, exprs.ColumnRef):
|
|
610
|
+
return False
|
|
611
|
+
# Vector-typed array columns are used for vector indexes, and are stored in the db
|
|
612
|
+
return e.col.col_type.is_array_type() and not isinstance(e.col.sa_col_type, pgvector.sqlalchemy.Vector)
|
|
613
|
+
|
|
614
|
+
json_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=json_filter, traverse_matches=False))
|
|
615
|
+
json_refs = [e for e in json_candidates if isinstance(e, exprs.ColumnRef)]
|
|
616
|
+
array_candidates = list(exprs.Expr.list_subexprs(expr_list, filter=array_filter, traverse_matches=False))
|
|
617
|
+
array_refs = [e for e in array_candidates if isinstance(e, exprs.ColumnRef)]
|
|
618
|
+
if len(json_refs) > 0 or len(array_refs) > 0:
|
|
619
|
+
return exec.CellReconstructionNode(json_refs, array_refs, input.row_builder, input=input)
|
|
620
|
+
else:
|
|
621
|
+
return input
|
|
622
|
+
|
|
528
623
|
@classmethod
|
|
529
624
|
def create_batch_update_plan(
|
|
530
625
|
cls,
|
|
@@ -543,8 +638,8 @@ class Planner:
|
|
|
543
638
|
"""
|
|
544
639
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
545
640
|
target = tbl.tbl_version.get() # the one we need to update
|
|
546
|
-
sa_key_cols: list[sql.Column]
|
|
547
|
-
key_vals: list[tuple]
|
|
641
|
+
sa_key_cols: list[sql.Column]
|
|
642
|
+
key_vals: list[tuple]
|
|
548
643
|
if len(rowids) > 0:
|
|
549
644
|
sa_key_cols = target.store_tbl.rowid_columns()
|
|
550
645
|
key_vals = rowids
|
|
@@ -567,8 +662,7 @@ class Planner:
|
|
|
567
662
|
for col in target.cols_by_id.values()
|
|
568
663
|
if col.is_stored and col not in updated_cols and col not in recomputed_base_cols
|
|
569
664
|
]
|
|
570
|
-
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in
|
|
571
|
-
select_list.extend(exprs.ColumnRef(col) for col in updated_cols)
|
|
665
|
+
select_list: list[exprs.Expr] = [exprs.ColumnRef(col) for col in updated_cols]
|
|
572
666
|
|
|
573
667
|
recomputed_exprs = [
|
|
574
668
|
c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols
|
|
@@ -586,23 +680,37 @@ class Planner:
|
|
|
586
680
|
)
|
|
587
681
|
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], sql_exprs, target)
|
|
588
682
|
analyzer.finalize(row_builder)
|
|
589
|
-
|
|
683
|
+
|
|
684
|
+
cell_md_col_refs = cls._cell_md_col_refs(sql_exprs)
|
|
685
|
+
sql_lookup_node = exec.SqlLookupNode(
|
|
686
|
+
tbl,
|
|
687
|
+
row_builder,
|
|
688
|
+
sql_exprs,
|
|
689
|
+
columns=copied_cols,
|
|
690
|
+
sa_key_cols=sa_key_cols,
|
|
691
|
+
key_vals=key_vals,
|
|
692
|
+
cell_md_col_refs=cell_md_col_refs,
|
|
693
|
+
)
|
|
590
694
|
col_vals = [{col: row[col].val for col in updated_cols} for row in batch]
|
|
591
695
|
row_update_node = exec.RowUpdateNode(tbl, key_vals, len(rowids) > 0, col_vals, row_builder, sql_lookup_node)
|
|
592
696
|
plan: exec.ExecNode = row_update_node
|
|
593
697
|
if not cls._is_contained_in(analyzer.select_list, sql_exprs):
|
|
594
698
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
595
699
|
plan = exec.ExprEvalNode(row_builder, analyzer.select_list, sql_exprs, input=plan)
|
|
700
|
+
|
|
596
701
|
# update row builder with column information
|
|
597
|
-
|
|
702
|
+
evaluated_cols = list(updated_cols) + list(recomputed_base_cols) # same order as select_list
|
|
598
703
|
row_builder.set_slot_idxs(select_list, remove_duplicates=False)
|
|
599
|
-
|
|
704
|
+
plan.row_builder.add_table_columns(copied_cols)
|
|
705
|
+
for i, col in enumerate(evaluated_cols):
|
|
600
706
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
601
707
|
ctx = exec.ExecContext(row_builder, num_computed_exprs=len(recomputed_exprs))
|
|
602
|
-
#
|
|
708
|
+
# TODO: correct batch size?
|
|
603
709
|
ctx.batch_size = 0
|
|
604
710
|
plan.set_ctx(ctx)
|
|
605
|
-
|
|
711
|
+
|
|
712
|
+
plan = cls._add_cell_materialization_node(plan)
|
|
713
|
+
plan = cls._add_save_node(plan)
|
|
606
714
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
607
715
|
return (
|
|
608
716
|
plan,
|
|
@@ -653,10 +761,11 @@ class Planner:
|
|
|
653
761
|
exact_version_only=view.get_bases(),
|
|
654
762
|
)
|
|
655
763
|
plan.ctx.num_computed_exprs = len(recomputed_exprs)
|
|
656
|
-
|
|
764
|
+
materialized_cols = copied_cols + list(recomputed_cols) # same order as select_list
|
|
765
|
+
for i, col in enumerate(materialized_cols):
|
|
657
766
|
plan.row_builder.add_table_column(col, select_list[i].slot_idx)
|
|
658
|
-
|
|
659
|
-
plan = cls.
|
|
767
|
+
plan = cls._add_cell_materialization_node(plan)
|
|
768
|
+
plan = cls._add_save_node(plan)
|
|
660
769
|
|
|
661
770
|
return plan
|
|
662
771
|
|
|
@@ -726,7 +835,9 @@ class Planner:
|
|
|
726
835
|
|
|
727
836
|
exec_ctx.ignore_errors = True
|
|
728
837
|
plan.set_ctx(exec_ctx)
|
|
729
|
-
|
|
838
|
+
if any(c.col_type.is_json_type() or c.col_type.is_array_type() for c in stored_cols):
|
|
839
|
+
plan = exec.CellMaterializationNode(plan)
|
|
840
|
+
plan = cls._add_save_node(plan)
|
|
730
841
|
|
|
731
842
|
return plan, len(row_builder.default_eval_ctx.target_exprs)
|
|
732
843
|
|
|
@@ -773,15 +884,13 @@ class Planner:
|
|
|
773
884
|
return combined_ordering
|
|
774
885
|
|
|
775
886
|
@classmethod
|
|
776
|
-
def
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
if len(stored_media_cols) == 0:
|
|
887
|
+
def _add_save_node(cls, input_node: exec.ExecNode) -> exec.ExecNode:
|
|
888
|
+
"""Add an ObjectStoreSaveNode, if needed."""
|
|
889
|
+
media_col_info = input_node.row_builder.media_output_col_info
|
|
890
|
+
if len(media_col_info) == 0:
|
|
781
891
|
return input_node
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
return save_node
|
|
892
|
+
else:
|
|
893
|
+
return exec.ObjectStoreSaveNode(media_col_info, input_node)
|
|
785
894
|
|
|
786
895
|
@classmethod
|
|
787
896
|
def _is_contained_in(cls, l1: Iterable[exprs.Expr], l2: Iterable[exprs.Expr]) -> bool:
|
|
@@ -789,10 +898,10 @@ class Planner:
|
|
|
789
898
|
return {e.id for e in l1} <= {e.id for e in l2}
|
|
790
899
|
|
|
791
900
|
@classmethod
|
|
792
|
-
def
|
|
901
|
+
def _add_prefetch_node(
|
|
793
902
|
cls, tbl_id: UUID, expressions: Iterable[exprs.Expr], input_node: exec.ExecNode
|
|
794
903
|
) -> exec.ExecNode:
|
|
795
|
-
"""
|
|
904
|
+
"""Add a CachePrefetch node, if needed."""
|
|
796
905
|
# we prefetch external files for all media ColumnRefs, even those that aren't part of the dependencies
|
|
797
906
|
# of output_exprs: if unstored iterator columns are present, we might need to materialize ColumnRefs that
|
|
798
907
|
# aren't explicitly captured as dependencies
|
|
@@ -808,21 +917,30 @@ class Planner:
|
|
|
808
917
|
def create_query_plan(
|
|
809
918
|
cls,
|
|
810
919
|
from_clause: FromClause,
|
|
811
|
-
select_list:
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
920
|
+
select_list: list[exprs.Expr] | None = None,
|
|
921
|
+
columns: list[catalog.Column] | None = None,
|
|
922
|
+
where_clause: exprs.Expr | None = None,
|
|
923
|
+
group_by_clause: list[exprs.Expr] | None = None,
|
|
924
|
+
order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
|
|
925
|
+
limit: exprs.Expr | None = None,
|
|
926
|
+
sample_clause: SampleClause | None = None,
|
|
817
927
|
ignore_errors: bool = False,
|
|
818
|
-
exact_version_only:
|
|
928
|
+
exact_version_only: list[catalog.TableVersionHandle] | None = None,
|
|
819
929
|
) -> exec.ExecNode:
|
|
820
|
-
"""
|
|
930
|
+
"""
|
|
931
|
+
Return plan for executing a query.
|
|
932
|
+
|
|
933
|
+
The plan:
|
|
934
|
+
- materializes the values of select_list exprs into their respective slots
|
|
935
|
+
- materializes cell values of 'columns' (and their cellmd, if applicable) into DataRow.cell_vals/cell_md
|
|
936
|
+
|
|
821
937
|
Updates 'select_list' in place to make it executable.
|
|
822
938
|
TODO: make exact_version_only a flag and use the versions from tbl
|
|
823
939
|
"""
|
|
824
940
|
if select_list is None:
|
|
825
941
|
select_list = []
|
|
942
|
+
if columns is None:
|
|
943
|
+
columns = []
|
|
826
944
|
if order_by_clause is None:
|
|
827
945
|
order_by_clause = []
|
|
828
946
|
if exact_version_only is None:
|
|
@@ -850,6 +968,7 @@ class Planner:
|
|
|
850
968
|
row_builder=row_builder,
|
|
851
969
|
analyzer=analyzer,
|
|
852
970
|
eval_ctx=eval_ctx,
|
|
971
|
+
columns=columns,
|
|
853
972
|
limit=limit,
|
|
854
973
|
with_pk=True,
|
|
855
974
|
exact_version_only=exact_version_only,
|
|
@@ -865,9 +984,10 @@ class Planner:
|
|
|
865
984
|
row_builder: exprs.RowBuilder,
|
|
866
985
|
analyzer: Analyzer,
|
|
867
986
|
eval_ctx: exprs.RowBuilder.EvalCtx,
|
|
987
|
+
columns: list[catalog.Column] | None = None,
|
|
868
988
|
limit: Optional[exprs.Expr] = None,
|
|
869
989
|
with_pk: bool = False,
|
|
870
|
-
exact_version_only:
|
|
990
|
+
exact_version_only: list[catalog.TableVersionHandle] | None = None,
|
|
871
991
|
) -> exec.ExecNode:
|
|
872
992
|
"""
|
|
873
993
|
Create plan to materialize eval_ctx.
|
|
@@ -877,6 +997,8 @@ class Planner:
|
|
|
877
997
|
in the context of that table version (eg, if 'tbl' is a view, 'plan_target' might be the base)
|
|
878
998
|
TODO: make exact_version_only a flag and use the versions from tbl
|
|
879
999
|
"""
|
|
1000
|
+
if columns is None:
|
|
1001
|
+
columns = []
|
|
880
1002
|
if exact_version_only is None:
|
|
881
1003
|
exact_version_only = []
|
|
882
1004
|
sql_elements = analyzer.sql_elements
|
|
@@ -934,8 +1056,15 @@ class Planner:
|
|
|
934
1056
|
traverse_matches=False,
|
|
935
1057
|
)
|
|
936
1058
|
)
|
|
1059
|
+
|
|
937
1060
|
plan = exec.SqlScanNode(
|
|
938
|
-
tbl,
|
|
1061
|
+
tbl,
|
|
1062
|
+
row_builder,
|
|
1063
|
+
select_list=tbl_scan_exprs,
|
|
1064
|
+
columns=[c for c in columns if c.tbl.id == tbl.tbl_id],
|
|
1065
|
+
set_pk=with_pk,
|
|
1066
|
+
cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
|
|
1067
|
+
exact_version_only=exact_version_only,
|
|
939
1068
|
)
|
|
940
1069
|
tbl_scan_plans.append(plan)
|
|
941
1070
|
|
|
@@ -966,7 +1095,8 @@ class Planner:
|
|
|
966
1095
|
stratify_exprs=analyzer.stratify_exprs,
|
|
967
1096
|
)
|
|
968
1097
|
|
|
969
|
-
plan = cls.
|
|
1098
|
+
plan = cls._add_prefetch_node(tbl.tbl_version.id, row_builder.unique_exprs, plan)
|
|
1099
|
+
plan = cls._add_cell_reconstruction_node(analyzer.all_exprs, plan)
|
|
970
1100
|
|
|
971
1101
|
if analyzer.group_by_clause is not None:
|
|
972
1102
|
# we're doing grouping aggregation; the input of the AggregateNode are the grouping exprs plus the
|
|
@@ -1010,7 +1140,7 @@ class Planner:
|
|
|
1010
1140
|
if not agg_output.issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
1011
1141
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
1012
1142
|
plan = exec.ExprEvalNode(row_builder, eval_ctx.target_exprs, agg_output, input=plan)
|
|
1013
|
-
plan = cls.
|
|
1143
|
+
plan = cls._add_save_node(plan)
|
|
1014
1144
|
else:
|
|
1015
1145
|
if not exprs.ExprSet(sql_exprs).issuperset(exprs.ExprSet(eval_ctx.target_exprs)):
|
|
1016
1146
|
# we need an ExprEvalNode to evaluate the remaining output exprs
|
|
@@ -1062,7 +1192,6 @@ class Planner:
|
|
|
1062
1192
|
plan.ctx.ignore_errors = True
|
|
1063
1193
|
computed_exprs = row_builder.output_exprs - row_builder.input_exprs
|
|
1064
1194
|
plan.ctx.num_computed_exprs = len(computed_exprs) # we are adding a computed column, so we need to evaluate it
|
|
1065
|
-
|
|
1066
|
-
plan = cls._insert_save_node(tbl.tbl_version.id, row_builder.stored_media_cols, input_node=plan)
|
|
1195
|
+
plan = cls._add_save_node(plan)
|
|
1067
1196
|
|
|
1068
1197
|
return plan
|
pixeltable/share/publish.py
CHANGED
|
@@ -254,7 +254,6 @@ def _download_from_presigned_url(
|
|
|
254
254
|
session.close()
|
|
255
255
|
|
|
256
256
|
|
|
257
|
-
# TODO: This will be replaced by drop_table with cloud table uri
|
|
258
257
|
def delete_replica(dest_path: str) -> None:
|
|
259
258
|
"""Delete cloud replica"""
|
|
260
259
|
delete_request_json = {'operation_type': 'delete_snapshot', 'table_uri': dest_path}
|
pixeltable/store.py
CHANGED
|
@@ -274,7 +274,7 @@ class StoreBase:
|
|
|
274
274
|
self.sa_md.remove(tmp_tbl)
|
|
275
275
|
tmp_tbl.drop(bind=conn)
|
|
276
276
|
|
|
277
|
-
run_cleanup(remove_tmp_tbl, raise_error=
|
|
277
|
+
run_cleanup(remove_tmp_tbl, raise_error=False)
|
|
278
278
|
|
|
279
279
|
return num_excs
|
|
280
280
|
|
|
@@ -321,7 +321,7 @@ class StoreBase:
|
|
|
321
321
|
table_row, num_row_exc = row_builder.create_store_table_row(row, cols_with_excs, pk)
|
|
322
322
|
num_excs += num_row_exc
|
|
323
323
|
|
|
324
|
-
if show_progress:
|
|
324
|
+
if show_progress and Env.get().verbosity >= 1:
|
|
325
325
|
if progress_bar is None:
|
|
326
326
|
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
327
327
|
progress_bar = tqdm(
|
pixeltable/type_system.py
CHANGED
|
@@ -25,6 +25,7 @@ import sqlalchemy as sql
|
|
|
25
25
|
from typing_extensions import _AnnotatedAlias
|
|
26
26
|
|
|
27
27
|
import pixeltable.exceptions as excs
|
|
28
|
+
from pixeltable.env import Env
|
|
28
29
|
from pixeltable.utils import parse_local_file_path
|
|
29
30
|
|
|
30
31
|
|
|
@@ -673,8 +674,9 @@ class TimestampType(ColumnType):
|
|
|
673
674
|
def _create_literal(self, val: Any) -> Any:
|
|
674
675
|
if isinstance(val, str):
|
|
675
676
|
return datetime.datetime.fromisoformat(val)
|
|
676
|
-
|
|
677
|
-
|
|
677
|
+
# Place naive timestamps in the default time zone
|
|
678
|
+
if isinstance(val, datetime.datetime) and val.tzinfo is None:
|
|
679
|
+
return val.replace(tzinfo=Env.get().default_time_zone)
|
|
678
680
|
return val
|
|
679
681
|
|
|
680
682
|
|
|
@@ -760,7 +762,7 @@ class JsonType(ColumnType):
|
|
|
760
762
|
|
|
761
763
|
@classmethod
|
|
762
764
|
def __is_valid_json(cls, val: Any) -> bool:
|
|
763
|
-
if val is None or isinstance(val, (str, int, float, bool)):
|
|
765
|
+
if val is None or isinstance(val, (str, int, float, bool, np.ndarray, PIL.Image.Image)):
|
|
764
766
|
return True
|
|
765
767
|
if isinstance(val, (list, tuple)):
|
|
766
768
|
return all(cls.__is_valid_json(v) for v in val)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import TextIO
|
|
3
3
|
|
|
4
|
+
from pixeltable import exceptions as excs
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
def map_level(verbosity: int) -> int:
|
|
6
8
|
"""
|
|
@@ -19,7 +21,8 @@ def map_level(verbosity: int) -> int:
|
|
|
19
21
|
return logging.INFO
|
|
20
22
|
if verbosity == 2:
|
|
21
23
|
return logging.DEBUG
|
|
22
|
-
|
|
24
|
+
|
|
25
|
+
raise excs.Error(f'Invalid verbosity level: {verbosity}')
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
class ConsoleOutputHandler(logging.StreamHandler):
|
|
@@ -1,32 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import sys
|
|
3
2
|
from typing import Any, Callable, Optional, TypeVar
|
|
4
3
|
|
|
5
4
|
R = TypeVar('R')
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
def _is_in_exception() -> bool:
|
|
9
|
-
"""
|
|
10
|
-
Check if code is currently executing within an exception context.
|
|
11
|
-
"""
|
|
12
|
-
current_exception = sys.exc_info()[1]
|
|
13
|
-
return current_exception is not None
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def run_cleanup_on_exception(cleanup_func: Callable[..., R], *args: Any, **kwargs: Any) -> Optional[R]:
|
|
17
|
-
"""
|
|
18
|
-
Runs cleanup only when running in exception context.
|
|
19
|
-
|
|
20
|
-
The function `run_cleanup_on_exception()` should be used to clean up resources when an operation fails.
|
|
21
|
-
This is typically done using a try, except, and finally block, with the resource cleanup logic placed within
|
|
22
|
-
the except block. However, this pattern may not handle KeyboardInterrupt exceptions.
|
|
23
|
-
To ensure that resources are always cleaned up at least once when an exception or KeyboardInterrupt occurs,
|
|
24
|
-
create an idempotent function for cleaning up resources and pass it to the `run_cleanup_on_exception()` function
|
|
25
|
-
from the finally block.
|
|
26
|
-
"""
|
|
27
|
-
if _is_in_exception():
|
|
28
|
-
return run_cleanup(cleanup_func, *args, raise_error=False, **kwargs)
|
|
29
|
-
return None
|
|
6
|
+
logger = logging.getLogger('pixeltable')
|
|
30
7
|
|
|
31
8
|
|
|
32
9
|
def run_cleanup(cleanup_func: Callable[..., R], *args: Any, raise_error: bool = True, **kwargs: Any) -> Optional[R]:
|
|
@@ -40,20 +17,20 @@ def run_cleanup(cleanup_func: Callable[..., R], *args: Any, raise_error: bool =
|
|
|
40
17
|
raise_error: raise an exception if an error occurs during cleanup.
|
|
41
18
|
"""
|
|
42
19
|
try:
|
|
43
|
-
|
|
20
|
+
logger.debug(f'Running cleanup function: {cleanup_func.__name__!r}')
|
|
44
21
|
return cleanup_func(*args, **kwargs)
|
|
45
22
|
except KeyboardInterrupt as interrupt:
|
|
46
23
|
# Save original exception and re-attempt cleanup
|
|
47
24
|
original_exception = interrupt
|
|
48
|
-
|
|
25
|
+
logger.debug(f'Cleanup {cleanup_func.__name__!r} interrupted, retrying')
|
|
49
26
|
try:
|
|
50
27
|
return cleanup_func(*args, **kwargs)
|
|
51
28
|
except Exception as e:
|
|
52
29
|
# Suppress this exception
|
|
53
|
-
|
|
30
|
+
logger.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e.__class__}: {e}')
|
|
54
31
|
raise KeyboardInterrupt from original_exception
|
|
55
32
|
except Exception as e:
|
|
56
|
-
|
|
33
|
+
logger.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e.__class__}: {e}')
|
|
57
34
|
if raise_error:
|
|
58
35
|
raise e
|
|
59
36
|
return None
|
pixeltable/utils/misc.py
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.16
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Project-URL: homepage, https://pixeltable.com/
|
|
6
6
|
Project-URL: repository, https://github.com/pixeltable/pixeltable
|
|
@@ -53,6 +53,7 @@ Requires-Dist: sqlalchemy>=2.0.23
|
|
|
53
53
|
Requires-Dist: tenacity>=8.2
|
|
54
54
|
Requires-Dist: toml>=0.10
|
|
55
55
|
Requires-Dist: tqdm>=4.64
|
|
56
|
+
Requires-Dist: tzlocal>=5.0
|
|
56
57
|
Description-Content-Type: text/markdown
|
|
57
58
|
|
|
58
59
|
<picture class="github-only">
|