pixeltable 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/catalog/catalog.py +47 -32
- pixeltable/catalog/table.py +33 -14
- pixeltable/catalog/table_version.py +86 -46
- pixeltable/catalog/table_version_path.py +0 -11
- pixeltable/catalog/view.py +6 -0
- pixeltable/config.py +1 -0
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +12 -0
- pixeltable/exec/exec_context.py +15 -2
- pixeltable/exec/sql_node.py +3 -2
- pixeltable/exprs/arithmetic_expr.py +13 -7
- pixeltable/functions/huggingface.py +1031 -2
- pixeltable/functions/video.py +140 -31
- pixeltable/globals.py +23 -4
- pixeltable/io/globals.py +2 -2
- pixeltable/io/parquet.py +1 -1
- pixeltable/io/table_data_conduit.py +1 -1
- pixeltable/iterators/document.py +111 -42
- pixeltable/iterators/video.py +169 -62
- pixeltable/plan.py +2 -6
- pixeltable/share/packager.py +155 -26
- pixeltable/store.py +25 -5
- pixeltable/utils/arrow.py +6 -6
- pixeltable/utils/av.py +104 -11
- pixeltable/utils/object_stores.py +16 -1
- pixeltable/utils/s3_store.py +44 -11
- {pixeltable-0.4.16.dist-info → pixeltable-0.4.18.dist-info}/METADATA +30 -30
- {pixeltable-0.4.16.dist-info → pixeltable-0.4.18.dist-info}/RECORD +31 -31
- {pixeltable-0.4.16.dist-info → pixeltable-0.4.18.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.16.dist-info → pixeltable-0.4.18.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.16.dist-info → pixeltable-0.4.18.dist-info}/licenses/LICENSE +0 -0
pixeltable/exec/exec_context.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import random
|
|
1
2
|
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import sqlalchemy as sql
|
|
@@ -8,6 +9,17 @@ from pixeltable import exprs
|
|
|
8
9
|
class ExecContext:
|
|
9
10
|
"""Class for execution runtime constants"""
|
|
10
11
|
|
|
12
|
+
row_builder: exprs.RowBuilder
|
|
13
|
+
profile: exprs.ExecProfile
|
|
14
|
+
show_pbar: bool
|
|
15
|
+
batch_size: int
|
|
16
|
+
num_rows: Optional[int]
|
|
17
|
+
conn: Optional[sql.engine.Connection]
|
|
18
|
+
pk_clause: Optional[list[sql.ClauseElement]]
|
|
19
|
+
num_computed_exprs: int
|
|
20
|
+
ignore_errors: bool
|
|
21
|
+
random_seed: int # general-purpose source of randomness with execution scope
|
|
22
|
+
|
|
11
23
|
def __init__(
|
|
12
24
|
self,
|
|
13
25
|
row_builder: exprs.RowBuilder,
|
|
@@ -23,8 +35,9 @@ class ExecContext:
|
|
|
23
35
|
self.row_builder = row_builder
|
|
24
36
|
self.profile = exprs.ExecProfile(row_builder)
|
|
25
37
|
# num_rows is used to compute the total number of computed cells used for the progress bar
|
|
26
|
-
self.num_rows
|
|
27
|
-
self.conn
|
|
38
|
+
self.num_rows = None
|
|
39
|
+
self.conn = None # if present, use this to execute SQL queries
|
|
28
40
|
self.pk_clause = pk_clause
|
|
29
41
|
self.num_computed_exprs = num_computed_exprs
|
|
30
42
|
self.ignore_errors = ignore_errors
|
|
43
|
+
self.random_seed = random.randint(0, 1 << 63)
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -648,7 +648,6 @@ class SqlSampleNode(SqlNode):
|
|
|
648
648
|
)
|
|
649
649
|
self.stratify_exprs = stratify_exprs
|
|
650
650
|
self.sample_clause = sample_clause
|
|
651
|
-
assert isinstance(self.sample_clause.seed, int)
|
|
652
651
|
|
|
653
652
|
@classmethod
|
|
654
653
|
def key_sql_expr(cls, seed: sql.ColumnElement, sql_cols: Iterable[sql.ColumnElement]) -> sql.ColumnElement:
|
|
@@ -667,7 +666,9 @@ class SqlSampleNode(SqlNode):
|
|
|
667
666
|
"""Create an expression for randomly ordering rows with a given seed"""
|
|
668
667
|
rowid_cols = [*cte.c[-self.pk_count : -1]] # exclude the version column
|
|
669
668
|
assert len(rowid_cols) > 0
|
|
670
|
-
|
|
669
|
+
# If seed is not set in the sample clause, use the random seed given by the execution context
|
|
670
|
+
seed = self.sample_clause.seed if self.sample_clause.seed is not None else self.ctx.random_seed
|
|
671
|
+
return self.key_sql_expr(sql.literal_column(str(seed)), rowid_cols)
|
|
671
672
|
|
|
672
673
|
def _create_stmt(self) -> sql.Select:
|
|
673
674
|
from pixeltable.plan import SampleClause
|
|
@@ -4,7 +4,7 @@ from typing import Any, Optional
|
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
|
-
from pixeltable import exceptions as excs, type_system as ts
|
|
7
|
+
from pixeltable import env, exceptions as excs, type_system as ts
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr
|
|
@@ -64,12 +64,18 @@ class ArithmeticExpr(Expr):
|
|
|
64
64
|
right = sql_elements.get(self._op2)
|
|
65
65
|
if left is None or right is None:
|
|
66
66
|
return None
|
|
67
|
-
if self.operator
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
if self.operator in (ArithmeticOperator.ADD, ArithmeticOperator.SUB, ArithmeticOperator.MUL):
|
|
68
|
+
if env.Env.get().is_using_cockroachdb and self._op1.col_type != self._op2.col_type:
|
|
69
|
+
if self._op1.col_type != self.col_type:
|
|
70
|
+
left = sql.cast(left, self.col_type.to_sa_type())
|
|
71
|
+
if self._op2.col_type != self.col_type:
|
|
72
|
+
right = sql.cast(right, self.col_type.to_sa_type())
|
|
73
|
+
if self.operator == ArithmeticOperator.ADD:
|
|
74
|
+
return left + right
|
|
75
|
+
if self.operator == ArithmeticOperator.SUB:
|
|
76
|
+
return left - right
|
|
77
|
+
if self.operator == ArithmeticOperator.MUL:
|
|
78
|
+
return left * right
|
|
73
79
|
if self.operator == ArithmeticOperator.DIV:
|
|
74
80
|
assert self.col_type.is_float_type()
|
|
75
81
|
# Avoid division by zero errors by converting any zero divisor to NULL.
|