pixeltable 0.4.17__py3-none-any.whl → 0.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/catalog/catalog.py +26 -19
- pixeltable/catalog/table.py +33 -14
- pixeltable/catalog/table_version.py +16 -12
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +4 -0
- pixeltable/exec/exec_context.py +15 -2
- pixeltable/exec/sql_node.py +3 -2
- pixeltable/functions/huggingface.py +1031 -2
- pixeltable/functions/video.py +34 -7
- pixeltable/globals.py +23 -4
- pixeltable/iterators/document.py +88 -57
- pixeltable/iterators/video.py +58 -24
- pixeltable/plan.py +2 -6
- pixeltable/store.py +24 -3
- pixeltable/utils/av.py +66 -38
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/METADATA +4 -4
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/RECORD +20 -20
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.18.dist-info}/licenses/LICENSE +0 -0
pixeltable/catalog/catalog.py
CHANGED
|
@@ -280,7 +280,7 @@ class Catalog:
|
|
|
280
280
|
- this needs to be done in a retry loop, because Postgres can decide to abort the transaction
|
|
281
281
|
(SerializationFailure, LockNotAvailable)
|
|
282
282
|
- for that reason, we do all lock acquisition prior to doing any real work (eg, compute column values),
|
|
283
|
-
to minimize the probability of
|
|
283
|
+
to minimize the probability of losing that work due to a forced abort
|
|
284
284
|
|
|
285
285
|
If convert_db_excs == True, converts DBAPIErrors into excs.Errors.
|
|
286
286
|
"""
|
|
@@ -433,7 +433,7 @@ class Catalog:
|
|
|
433
433
|
|
|
434
434
|
The function should not raise exceptions; if it does, they are logged and ignored.
|
|
435
435
|
"""
|
|
436
|
-
assert
|
|
436
|
+
assert self.in_write_xact
|
|
437
437
|
self._undo_actions.append(func)
|
|
438
438
|
return func
|
|
439
439
|
|
|
@@ -792,19 +792,25 @@ class Catalog:
|
|
|
792
792
|
return result
|
|
793
793
|
|
|
794
794
|
@retry_loop(for_write=True)
|
|
795
|
-
def move(self, path: Path, new_path: Path) -> None:
|
|
796
|
-
self._move(path, new_path)
|
|
795
|
+
def move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
|
|
796
|
+
self._move(path, new_path, if_exists, if_not_exists)
|
|
797
797
|
|
|
798
|
-
def _move(self, path: Path, new_path: Path) -> None:
|
|
799
|
-
|
|
798
|
+
def _move(self, path: Path, new_path: Path, if_exists: IfExistsParam, if_not_exists: IfNotExistsParam) -> None:
|
|
799
|
+
dest_obj, dest_dir, src_obj = self._prepare_dir_op(
|
|
800
800
|
add_dir_path=new_path.parent,
|
|
801
801
|
add_name=new_path.name,
|
|
802
802
|
drop_dir_path=path.parent,
|
|
803
803
|
drop_name=path.name,
|
|
804
|
-
raise_if_exists=
|
|
805
|
-
raise_if_not_exists=
|
|
804
|
+
raise_if_exists=(if_exists == IfExistsParam.ERROR),
|
|
805
|
+
raise_if_not_exists=(if_not_exists == IfNotExistsParam.ERROR),
|
|
806
806
|
)
|
|
807
|
-
|
|
807
|
+
assert dest_obj is None or if_exists == IfExistsParam.IGNORE
|
|
808
|
+
assert src_obj is not None or if_not_exists == IfNotExistsParam.IGNORE
|
|
809
|
+
if dest_obj is None and src_obj is not None:
|
|
810
|
+
# If dest_obj is not None, it means `if_exists='ignore'` and the destination already exists.
|
|
811
|
+
# If src_obj is None, it means `if_not_exists='ignore'` and the source doesn't exist.
|
|
812
|
+
# If dest_obj is None and src_obj is not None, then we can proceed with the move.
|
|
813
|
+
src_obj._move(new_path.name, dest_dir._id)
|
|
808
814
|
|
|
809
815
|
def _prepare_dir_op(
|
|
810
816
|
self,
|
|
@@ -815,7 +821,7 @@ class Catalog:
|
|
|
815
821
|
drop_expected: Optional[type[SchemaObject]] = None,
|
|
816
822
|
raise_if_exists: bool = False,
|
|
817
823
|
raise_if_not_exists: bool = False,
|
|
818
|
-
) -> tuple[Optional[SchemaObject], Optional[
|
|
824
|
+
) -> tuple[Optional[SchemaObject], Optional[Dir], Optional[SchemaObject]]:
|
|
819
825
|
"""
|
|
820
826
|
Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
|
|
821
827
|
directory entry.
|
|
@@ -902,9 +908,10 @@ class Catalog:
|
|
|
902
908
|
schema.Table.md['name'].astext == name,
|
|
903
909
|
schema.Table.md['user'].astext == user,
|
|
904
910
|
)
|
|
905
|
-
tbl_id = conn.execute(q).
|
|
906
|
-
|
|
907
|
-
|
|
911
|
+
tbl_id = conn.execute(q).scalars().all()
|
|
912
|
+
assert len(tbl_id) <= 1, name
|
|
913
|
+
if len(tbl_id) == 1:
|
|
914
|
+
return self.get_table_by_id(tbl_id[0], version)
|
|
908
915
|
|
|
909
916
|
return None
|
|
910
917
|
|
|
@@ -1084,7 +1091,7 @@ class Catalog:
|
|
|
1084
1091
|
The metadata should be presented in standard "ancestor order", with the table being replicated at
|
|
1085
1092
|
list position 0 and the (root) base table at list position -1.
|
|
1086
1093
|
"""
|
|
1087
|
-
assert
|
|
1094
|
+
assert self.in_write_xact
|
|
1088
1095
|
|
|
1089
1096
|
tbl_id = UUID(md[0].tbl_md.tbl_id)
|
|
1090
1097
|
|
|
@@ -1150,11 +1157,11 @@ class Catalog:
|
|
|
1150
1157
|
# We need to do this at the end, since `existing_path` needs to first have a non-fragment table version in
|
|
1151
1158
|
# order to be instantiated as a schema object.
|
|
1152
1159
|
existing = self.get_table_by_id(tbl_id)
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1160
|
+
assert existing is not None
|
|
1161
|
+
existing_path = Path.parse(existing._path(), allow_system_path=True)
|
|
1162
|
+
if existing_path != path:
|
|
1163
|
+
assert existing_path.is_system_path
|
|
1164
|
+
self._move(existing_path, path, IfExistsParam.ERROR, IfNotExistsParam.ERROR)
|
|
1158
1165
|
|
|
1159
1166
|
def __ensure_system_dir_exists(self) -> Dir:
|
|
1160
1167
|
system_path = Path.parse('_system', allow_system_path=True)
|
pixeltable/catalog/table.py
CHANGED
|
@@ -77,6 +77,17 @@ class Table(SchemaObject):
|
|
|
77
77
|
self._tbl_version = None
|
|
78
78
|
|
|
79
79
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
80
|
+
old_name = self._name
|
|
81
|
+
old_dir_id = self._dir_id
|
|
82
|
+
|
|
83
|
+
cat = catalog.Catalog.get()
|
|
84
|
+
|
|
85
|
+
@cat.register_undo_action
|
|
86
|
+
def _() -> None:
|
|
87
|
+
# TODO: We should really be invalidating the Table instance and forcing a reload.
|
|
88
|
+
self._name = old_name
|
|
89
|
+
self._dir_id = old_dir_id
|
|
90
|
+
|
|
80
91
|
super()._move(new_name, new_dir_id)
|
|
81
92
|
conn = env.Env.get().conn
|
|
82
93
|
stmt = sql.text(
|
|
@@ -625,7 +636,7 @@ class Table(SchemaObject):
|
|
|
625
636
|
- `'abort'`: an exception will be raised and the column will not be added.
|
|
626
637
|
- `'ignore'`: execution will continue and the column will be added. Any rows
|
|
627
638
|
with errors will have a `None` value for the column, with information about the error stored in the
|
|
628
|
-
corresponding `tbl.col_name.errormsg` tbl.col_name.errortype` fields.
|
|
639
|
+
corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
|
|
629
640
|
if_exists: Determines the behavior if the column already exists. Must be one of the following:
|
|
630
641
|
|
|
631
642
|
- `'error'`: an exception will be raised.
|
|
@@ -986,22 +997,28 @@ class Table(SchemaObject):
|
|
|
986
997
|
Only `String` and `Image` columns are currently supported. Here's an example that uses a
|
|
987
998
|
[CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
|
|
988
999
|
|
|
1000
|
+
```
|
|
989
1001
|
>>> from pixeltable.functions.huggingface import clip
|
|
990
|
-
|
|
991
|
-
|
|
1002
|
+
>>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
|
|
1003
|
+
>>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
|
|
1004
|
+
```
|
|
992
1005
|
|
|
993
|
-
Once the index is created,
|
|
1006
|
+
Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
|
|
994
1007
|
|
|
1008
|
+
```
|
|
995
1009
|
>>> reference_img = PIL.Image.open('my_image.jpg')
|
|
996
|
-
|
|
997
|
-
|
|
1010
|
+
>>> sim = tbl.img.similarity(reference_img)
|
|
1011
|
+
>>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
|
|
1012
|
+
```
|
|
998
1013
|
|
|
999
1014
|
If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
|
|
1000
1015
|
performed using any of its supported types. In our example, CLIP supports both text and images, so we can
|
|
1001
1016
|
also search for images using a text description:
|
|
1002
1017
|
|
|
1018
|
+
```
|
|
1003
1019
|
>>> sim = tbl.img.similarity('a picture of a train')
|
|
1004
|
-
|
|
1020
|
+
>>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
|
|
1021
|
+
```
|
|
1005
1022
|
|
|
1006
1023
|
Args:
|
|
1007
1024
|
column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
|
|
@@ -1032,9 +1049,9 @@ class Table(SchemaObject):
|
|
|
1032
1049
|
Add an index to the `img` column of the table `my_table`:
|
|
1033
1050
|
|
|
1034
1051
|
>>> from pixeltable.functions.huggingface import clip
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1052
|
+
>>> tbl = pxt.get_table('my_table')
|
|
1053
|
+
>>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
|
|
1054
|
+
>>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
|
|
1038
1055
|
|
|
1039
1056
|
Alternatively, the `img` column may be specified by name:
|
|
1040
1057
|
|
|
@@ -1328,7 +1345,8 @@ class Table(SchemaObject):
|
|
|
1328
1345
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1329
1346
|
print_stats: bool = False,
|
|
1330
1347
|
**kwargs: Any,
|
|
1331
|
-
)
|
|
1348
|
+
)
|
|
1349
|
+
```
|
|
1332
1350
|
|
|
1333
1351
|
To insert just a single row, you can use the more concise syntax:
|
|
1334
1352
|
|
|
@@ -1338,7 +1356,8 @@ class Table(SchemaObject):
|
|
|
1338
1356
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1339
1357
|
print_stats: bool = False,
|
|
1340
1358
|
**kwargs: Any
|
|
1341
|
-
)
|
|
1359
|
+
)
|
|
1360
|
+
```
|
|
1342
1361
|
|
|
1343
1362
|
Args:
|
|
1344
1363
|
source: A data source from which data can be imported.
|
|
@@ -1459,8 +1478,8 @@ class Table(SchemaObject):
|
|
|
1459
1478
|
the row with new `id` 3 (assuming this key does not exist):
|
|
1460
1479
|
|
|
1461
1480
|
>>> tbl.update(
|
|
1462
|
-
|
|
1463
|
-
|
|
1481
|
+
... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
|
|
1482
|
+
... if_not_exists='insert')
|
|
1464
1483
|
"""
|
|
1465
1484
|
from pixeltable.catalog import Catalog
|
|
1466
1485
|
|
|
@@ -96,6 +96,8 @@ class TableVersion:
|
|
|
96
96
|
cols_by_name: dict[str, Column]
|
|
97
97
|
# contains only columns visible in this version, both system and user
|
|
98
98
|
cols_by_id: dict[int, Column]
|
|
99
|
+
# all indices defined on this table
|
|
100
|
+
all_idxs: dict[str, TableVersion.IndexInfo]
|
|
99
101
|
# contains only actively maintained indices
|
|
100
102
|
idxs_by_name: dict[str, TableVersion.IndexInfo]
|
|
101
103
|
|
|
@@ -129,6 +131,12 @@ class TableVersion:
|
|
|
129
131
|
base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
130
132
|
base: Optional[TableVersionHandle] = None,
|
|
131
133
|
):
|
|
134
|
+
from pixeltable import exprs
|
|
135
|
+
from pixeltable.plan import SampleClause
|
|
136
|
+
|
|
137
|
+
from .table_version_handle import TableVersionHandle
|
|
138
|
+
from .table_version_path import TableVersionPath
|
|
139
|
+
|
|
132
140
|
self.is_validated = True # a freshly constructed instance is always valid
|
|
133
141
|
self.is_initialized = False
|
|
134
142
|
self.id = id
|
|
@@ -141,9 +149,6 @@ class TableVersion:
|
|
|
141
149
|
self.store_tbl = None
|
|
142
150
|
|
|
143
151
|
# mutable tables need their TableVersionPath for expr eval during updates
|
|
144
|
-
from .table_version_handle import TableVersionHandle
|
|
145
|
-
from .table_version_path import TableVersionPath
|
|
146
|
-
|
|
147
152
|
if self.is_snapshot:
|
|
148
153
|
self.path = None
|
|
149
154
|
else:
|
|
@@ -153,9 +158,6 @@ class TableVersion:
|
|
|
153
158
|
self.path = TableVersionPath(self_handle, base=base_path)
|
|
154
159
|
|
|
155
160
|
# view-specific initialization
|
|
156
|
-
from pixeltable import exprs
|
|
157
|
-
from pixeltable.plan import SampleClause
|
|
158
|
-
|
|
159
161
|
predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
|
|
160
162
|
self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
|
|
161
163
|
sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
|
|
@@ -180,6 +182,7 @@ class TableVersion:
|
|
|
180
182
|
self.cols = []
|
|
181
183
|
self.cols_by_name = {}
|
|
182
184
|
self.cols_by_id = {}
|
|
185
|
+
self.all_idxs = {}
|
|
183
186
|
self.idxs_by_name = {}
|
|
184
187
|
self.external_stores = {}
|
|
185
188
|
|
|
@@ -373,7 +376,7 @@ class TableVersion:
|
|
|
373
376
|
cat._tbl_versions[tbl_version.id, tbl_version.effective_version] = tbl_version
|
|
374
377
|
tbl_version.init()
|
|
375
378
|
tbl_version.store_tbl.create()
|
|
376
|
-
tbl_version.store_tbl.
|
|
379
|
+
tbl_version.store_tbl.ensure_updated_schema()
|
|
377
380
|
return tbl_version
|
|
378
381
|
|
|
379
382
|
def delete_media(self, tbl_version: Optional[int] = None) -> None:
|
|
@@ -463,13 +466,17 @@ class TableVersion:
|
|
|
463
466
|
idx_col = self._lookup_column(QColumnId(UUID(md.indexed_col_tbl_id), md.indexed_col_id))
|
|
464
467
|
assert idx_col is not None
|
|
465
468
|
idx = cls.from_dict(idx_col, md.init_args)
|
|
469
|
+
assert isinstance(idx, index.IndexBase)
|
|
470
|
+
|
|
471
|
+
val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
|
|
472
|
+
undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
|
|
473
|
+
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
474
|
+
self.all_idxs[md.name] = idx_info
|
|
466
475
|
|
|
467
476
|
# fix up the sa column type of the index value and undo columns
|
|
468
477
|
# we need to do this for all indices, not just those that are active in this TableVersion, to ensure we get
|
|
469
478
|
# the correct SA schema in the StoreTable.
|
|
470
|
-
val_col = next(col for col in self.cols if col.id == md.index_val_col_id)
|
|
471
479
|
val_col.sa_col_type = idx.index_sa_type()
|
|
472
|
-
undo_col = next(col for col in self.cols if col.id == md.index_val_undo_col_id)
|
|
473
480
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
474
481
|
if not isinstance(idx, index.EmbeddingIndex):
|
|
475
482
|
# Historically, the intent has been not to store cellmd data, even for embedding indices. However,
|
|
@@ -501,9 +508,6 @@ class TableVersion:
|
|
|
501
508
|
assert md.indexed_col_id in self.cols_by_id
|
|
502
509
|
assert md.index_val_col_id in self.cols_by_id
|
|
503
510
|
assert md.index_val_undo_col_id in self.cols_by_id
|
|
504
|
-
idx_info = self.IndexInfo(
|
|
505
|
-
id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col
|
|
506
|
-
)
|
|
507
511
|
self.idxs_by_name[md.name] = idx_info
|
|
508
512
|
|
|
509
513
|
def _lookup_column(self, id: QColumnId) -> Column | None:
|
pixeltable/dataframe.py
CHANGED
|
@@ -1039,7 +1039,7 @@ class DataFrame:
|
|
|
1039
1039
|
>>> df = book.order_by(t.price, asc=False).order_by(t.pages)
|
|
1040
1040
|
"""
|
|
1041
1041
|
if self.sample_clause is not None:
|
|
1042
|
-
raise excs.Error('
|
|
1042
|
+
raise excs.Error('order_by() cannot be used with sample()')
|
|
1043
1043
|
for e in expr_list:
|
|
1044
1044
|
if not isinstance(e, exprs.Expr):
|
|
1045
1045
|
raise excs.Error(f'Invalid expression in order_by(): {e}')
|
pixeltable/env.py
CHANGED
|
@@ -760,10 +760,12 @@ class Env:
|
|
|
760
760
|
|
|
761
761
|
def __register_packages(self) -> None:
|
|
762
762
|
"""Declare optional packages that are utilized by some parts of the code."""
|
|
763
|
+
self.__register_package('accelerate')
|
|
763
764
|
self.__register_package('anthropic')
|
|
764
765
|
self.__register_package('azure.storage.blob', library_name='azure-storage-blob')
|
|
765
766
|
self.__register_package('boto3')
|
|
766
767
|
self.__register_package('datasets')
|
|
768
|
+
self.__register_package('diffusers')
|
|
767
769
|
self.__register_package('fiftyone')
|
|
768
770
|
self.__register_package('fireworks', library_name='fireworks-ai')
|
|
769
771
|
self.__register_package('google.cloud.storage', library_name='google-cloud-storage')
|
|
@@ -771,6 +773,7 @@ class Env:
|
|
|
771
773
|
self.__register_package('groq')
|
|
772
774
|
self.__register_package('huggingface_hub', library_name='huggingface-hub')
|
|
773
775
|
self.__register_package('label_studio_sdk', library_name='label-studio-sdk')
|
|
776
|
+
self.__register_package('librosa')
|
|
774
777
|
self.__register_package('llama_cpp', library_name='llama-cpp-python')
|
|
775
778
|
self.__register_package('mcp')
|
|
776
779
|
self.__register_package('mistralai')
|
|
@@ -783,6 +786,7 @@ class Env:
|
|
|
783
786
|
self.__register_package('replicate')
|
|
784
787
|
self.__register_package('sentencepiece')
|
|
785
788
|
self.__register_package('sentence_transformers', library_name='sentence-transformers')
|
|
789
|
+
self.__register_package('soundfile')
|
|
786
790
|
self.__register_package('spacy')
|
|
787
791
|
self.__register_package('tiktoken')
|
|
788
792
|
self.__register_package('together')
|
pixeltable/exec/exec_context.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import random
|
|
1
2
|
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import sqlalchemy as sql
|
|
@@ -8,6 +9,17 @@ from pixeltable import exprs
|
|
|
8
9
|
class ExecContext:
|
|
9
10
|
"""Class for execution runtime constants"""
|
|
10
11
|
|
|
12
|
+
row_builder: exprs.RowBuilder
|
|
13
|
+
profile: exprs.ExecProfile
|
|
14
|
+
show_pbar: bool
|
|
15
|
+
batch_size: int
|
|
16
|
+
num_rows: Optional[int]
|
|
17
|
+
conn: Optional[sql.engine.Connection]
|
|
18
|
+
pk_clause: Optional[list[sql.ClauseElement]]
|
|
19
|
+
num_computed_exprs: int
|
|
20
|
+
ignore_errors: bool
|
|
21
|
+
random_seed: int # general-purpose source of randomness with execution scope
|
|
22
|
+
|
|
11
23
|
def __init__(
|
|
12
24
|
self,
|
|
13
25
|
row_builder: exprs.RowBuilder,
|
|
@@ -23,8 +35,9 @@ class ExecContext:
|
|
|
23
35
|
self.row_builder = row_builder
|
|
24
36
|
self.profile = exprs.ExecProfile(row_builder)
|
|
25
37
|
# num_rows is used to compute the total number of computed cells used for the progress bar
|
|
26
|
-
self.num_rows
|
|
27
|
-
self.conn
|
|
38
|
+
self.num_rows = None
|
|
39
|
+
self.conn = None # if present, use this to execute SQL queries
|
|
28
40
|
self.pk_clause = pk_clause
|
|
29
41
|
self.num_computed_exprs = num_computed_exprs
|
|
30
42
|
self.ignore_errors = ignore_errors
|
|
43
|
+
self.random_seed = random.randint(0, 1 << 63)
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -648,7 +648,6 @@ class SqlSampleNode(SqlNode):
|
|
|
648
648
|
)
|
|
649
649
|
self.stratify_exprs = stratify_exprs
|
|
650
650
|
self.sample_clause = sample_clause
|
|
651
|
-
assert isinstance(self.sample_clause.seed, int)
|
|
652
651
|
|
|
653
652
|
@classmethod
|
|
654
653
|
def key_sql_expr(cls, seed: sql.ColumnElement, sql_cols: Iterable[sql.ColumnElement]) -> sql.ColumnElement:
|
|
@@ -667,7 +666,9 @@ class SqlSampleNode(SqlNode):
|
|
|
667
666
|
"""Create an expression for randomly ordering rows with a given seed"""
|
|
668
667
|
rowid_cols = [*cte.c[-self.pk_count : -1]] # exclude the version column
|
|
669
668
|
assert len(rowid_cols) > 0
|
|
670
|
-
|
|
669
|
+
# If seed is not set in the sample clause, use the random seed given by the execution context
|
|
670
|
+
seed = self.sample_clause.seed if self.sample_clause.seed is not None else self.ctx.random_seed
|
|
671
|
+
return self.key_sql_expr(sql.literal_column(str(seed)), rowid_cols)
|
|
671
672
|
|
|
672
673
|
def _create_stmt(self) -> sql.Select:
|
|
673
674
|
from pixeltable.plan import SampleClause
|