pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +22 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +193 -158
- pixeltable/catalog/table_version.py +191 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +89 -89
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +12 -13
- pixeltable/exprs/comparison.py +3 -6
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +31 -22
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +110 -80
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +2 -2
- pixeltable/exprs/json_path.py +17 -10
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +8 -17
- pixeltable/exprs/rowid_ref.py +21 -10
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +3 -1
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +3 -4
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +48 -14
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +5 -12
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +6 -4
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +19 -19
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +15 -12
- pixeltable/share/publish.py +3 -5
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +20 -9
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
- pixeltable-0.3.8.dist-info/RECORD +174 -0
- pixeltable-0.3.6.dist-info/RECORD +0 -172
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
|
|
7
|
+
from .table_version import TableVersion
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
_logger = logging.getLogger('pixeltable')
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TableVersionHandle:
|
|
16
|
+
"""
|
|
17
|
+
Indirection mechanism for TableVersion instances, which get resolved against the catalog at runtime.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
id: UUID
|
|
21
|
+
effective_version: Optional[int]
|
|
22
|
+
_tbl_version: Optional[TableVersion]
|
|
23
|
+
|
|
24
|
+
def __init__(self, tbl_id: UUID, effective_version: Optional[int], tbl_version: Optional[TableVersion] = None):
|
|
25
|
+
self.id = tbl_id
|
|
26
|
+
self.effective_version = effective_version
|
|
27
|
+
self._tbl_version = tbl_version
|
|
28
|
+
|
|
29
|
+
def __eq__(self, other: object) -> bool:
|
|
30
|
+
if not isinstance(other, TableVersionHandle):
|
|
31
|
+
return False
|
|
32
|
+
return self.id == other.id and self.effective_version == other.effective_version
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def create(cls, tbl_version: TableVersion) -> TableVersionHandle:
|
|
36
|
+
return cls(tbl_version.id, tbl_version.effective_version, tbl_version)
|
|
37
|
+
|
|
38
|
+
def get(self) -> TableVersion:
|
|
39
|
+
from .catalog import Catalog
|
|
40
|
+
|
|
41
|
+
if self._tbl_version is None:
|
|
42
|
+
self._tbl_version = Catalog.get().get_tbl_version(self.id, self.effective_version)
|
|
43
|
+
return self._tbl_version
|
|
44
|
+
|
|
45
|
+
def as_dict(self) -> dict:
|
|
46
|
+
return {'id': str(self.id), 'effective_version': self.effective_version}
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_dict(cls, d: dict) -> TableVersionHandle:
|
|
50
|
+
return cls(UUID(d['id']), d['effective_version'])
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Optional
|
|
4
|
+
from typing import Optional
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
from pixeltable import exprs
|
|
7
|
+
from pixeltable.metadata import schema
|
|
9
8
|
|
|
10
9
|
from .column import Column
|
|
11
|
-
from .
|
|
10
|
+
from .table_version_handle import TableVersionHandle
|
|
12
11
|
|
|
13
12
|
_logger = logging.getLogger('pixeltable')
|
|
14
13
|
|
|
@@ -25,22 +24,40 @@ class TableVersionPath:
|
|
|
25
24
|
table/view.
|
|
26
25
|
"""
|
|
27
26
|
|
|
28
|
-
|
|
27
|
+
tbl_version: TableVersionHandle
|
|
28
|
+
base: Optional[TableVersionPath]
|
|
29
|
+
|
|
30
|
+
def __init__(self, tbl_version: TableVersionHandle, base: Optional[TableVersionPath] = None):
|
|
29
31
|
assert tbl_version is not None
|
|
30
32
|
self.tbl_version = tbl_version
|
|
31
33
|
self.base = base
|
|
32
34
|
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_md(cls, path: schema.TableVersionPath) -> TableVersionPath:
|
|
37
|
+
assert len(path) > 0
|
|
38
|
+
result: Optional[TableVersionPath] = None
|
|
39
|
+
for tbl_id_str, effective_version in path[::-1]:
|
|
40
|
+
tbl_id = UUID(tbl_id_str)
|
|
41
|
+
result = TableVersionPath(TableVersionHandle(tbl_id, effective_version), base=result)
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
def as_md(self) -> schema.TableVersionPath:
|
|
45
|
+
result = [(self.tbl_version.id.hex, self.tbl_version.effective_version)]
|
|
46
|
+
if self.base is not None:
|
|
47
|
+
result.extend(self.base.as_md())
|
|
48
|
+
return result
|
|
49
|
+
|
|
33
50
|
def tbl_id(self) -> UUID:
|
|
34
51
|
"""Return the id of the table/view that this path represents"""
|
|
35
52
|
return self.tbl_version.id
|
|
36
53
|
|
|
37
54
|
def version(self) -> int:
|
|
38
55
|
"""Return the version of the table/view that this path represents"""
|
|
39
|
-
return self.tbl_version.version
|
|
56
|
+
return self.tbl_version.get().version
|
|
40
57
|
|
|
41
58
|
def tbl_name(self) -> str:
|
|
42
59
|
"""Return the name of the table/view that this path represents"""
|
|
43
|
-
return self.tbl_version.name
|
|
60
|
+
return self.tbl_version.get().name
|
|
44
61
|
|
|
45
62
|
def path_len(self) -> int:
|
|
46
63
|
"""Return the length of the path"""
|
|
@@ -48,32 +65,32 @@ class TableVersionPath:
|
|
|
48
65
|
|
|
49
66
|
def is_snapshot(self) -> bool:
|
|
50
67
|
"""Return True if this is a path of snapshot versions"""
|
|
51
|
-
if not self.tbl_version.is_snapshot:
|
|
68
|
+
if not self.tbl_version.get().is_snapshot:
|
|
52
69
|
return False
|
|
53
70
|
return self.base.is_snapshot() if self.base is not None else True
|
|
54
71
|
|
|
55
72
|
def is_view(self) -> bool:
|
|
56
|
-
return self.tbl_version.
|
|
73
|
+
return self.tbl_version.get().is_view
|
|
57
74
|
|
|
58
75
|
def is_component_view(self) -> bool:
|
|
59
|
-
return self.tbl_version.
|
|
76
|
+
return self.tbl_version.get().is_component_view
|
|
60
77
|
|
|
61
78
|
def is_insertable(self) -> bool:
|
|
62
|
-
return self.tbl_version.is_insertable()
|
|
79
|
+
return self.tbl_version.get().is_insertable()
|
|
63
80
|
|
|
64
|
-
def get_tbl_versions(self) -> list[
|
|
81
|
+
def get_tbl_versions(self) -> list[TableVersionHandle]:
|
|
65
82
|
"""Return all tbl versions"""
|
|
66
83
|
if self.base is None:
|
|
67
84
|
return [self.tbl_version]
|
|
68
85
|
return [self.tbl_version] + self.base.get_tbl_versions()
|
|
69
86
|
|
|
70
|
-
def get_bases(self) -> list[
|
|
87
|
+
def get_bases(self) -> list[TableVersionHandle]:
|
|
71
88
|
"""Return all tbl versions"""
|
|
72
89
|
if self.base is None:
|
|
73
90
|
return []
|
|
74
91
|
return self.base.get_tbl_versions()
|
|
75
92
|
|
|
76
|
-
def find_tbl_version(self, id: UUID) -> Optional[
|
|
93
|
+
def find_tbl_version(self, id: UUID) -> Optional[TableVersionHandle]:
|
|
77
94
|
"""Return the matching TableVersion in the chain of TableVersions, starting with this one"""
|
|
78
95
|
if self.tbl_version.id == id:
|
|
79
96
|
return self.tbl_version
|
|
@@ -81,24 +98,13 @@ class TableVersionPath:
|
|
|
81
98
|
return None
|
|
82
99
|
return self.base.find_tbl_version(id)
|
|
83
100
|
|
|
84
|
-
def get_column_ref(self, col_name: str) -> exprs.ColumnRef:
|
|
85
|
-
"""Return a ColumnRef for the given column name."""
|
|
86
|
-
from pixeltable.exprs import ColumnRef
|
|
87
|
-
|
|
88
|
-
if col_name not in self.tbl_version.cols_by_name:
|
|
89
|
-
if self.base is None or not self.tbl_version.include_base_columns:
|
|
90
|
-
raise AttributeError(f'Column {col_name} unknown')
|
|
91
|
-
return self.base.get_column_ref(col_name)
|
|
92
|
-
col = self.tbl_version.cols_by_name[col_name]
|
|
93
|
-
return ColumnRef(col)
|
|
94
|
-
|
|
95
101
|
def columns(self) -> list[Column]:
|
|
96
102
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
97
|
-
result = list(self.tbl_version.cols_by_name.values())
|
|
98
|
-
if self.base is not None and self.tbl_version.include_base_columns:
|
|
103
|
+
result = list(self.tbl_version.get().cols_by_name.values())
|
|
104
|
+
if self.base is not None and self.tbl_version.get().include_base_columns:
|
|
99
105
|
base_cols = self.base.columns()
|
|
100
106
|
# we only include base columns that don't conflict with one of our column names
|
|
101
|
-
result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
|
|
107
|
+
result.extend(c for c in base_cols if c.name not in self.tbl_version.get().cols_by_name)
|
|
102
108
|
return result
|
|
103
109
|
|
|
104
110
|
def cols_by_name(self) -> dict[str, Column]:
|
|
@@ -111,23 +117,33 @@ class TableVersionPath:
|
|
|
111
117
|
cols = self.columns()
|
|
112
118
|
return {col.id: col for col in cols}
|
|
113
119
|
|
|
114
|
-
def get_column(self, name: str, include_bases: bool =
|
|
120
|
+
def get_column(self, name: str, include_bases: Optional[bool] = None) -> Optional[Column]:
|
|
115
121
|
"""Return the column with the given name, or None if not found"""
|
|
116
|
-
col = self.tbl_version.cols_by_name.get(name)
|
|
122
|
+
col = self.tbl_version.get().cols_by_name.get(name)
|
|
117
123
|
if col is not None:
|
|
118
124
|
return col
|
|
119
|
-
elif self.base is not None and include_bases:
|
|
125
|
+
elif self.base is not None and (include_bases or self.tbl_version.get().include_base_columns):
|
|
120
126
|
return self.base.get_column(name)
|
|
121
127
|
else:
|
|
122
128
|
return None
|
|
123
129
|
|
|
130
|
+
def get_column_by_id(self, tbl_id: UUID, col_id: int) -> Optional[Column]:
|
|
131
|
+
"""Return the column for the given tbl/col id"""
|
|
132
|
+
if self.tbl_version.id == tbl_id:
|
|
133
|
+
assert col_id in self.tbl_version.get().cols_by_id
|
|
134
|
+
return self.tbl_version.get().cols_by_id[col_id]
|
|
135
|
+
elif self.base is not None:
|
|
136
|
+
return self.base.get_column_by_id(tbl_id, col_id)
|
|
137
|
+
else:
|
|
138
|
+
return None
|
|
139
|
+
|
|
124
140
|
def has_column(self, col: Column, include_bases: bool = True) -> bool:
|
|
125
141
|
"""Return True if this table has the given column."""
|
|
126
142
|
assert col.tbl is not None
|
|
127
143
|
if (
|
|
128
144
|
col.tbl.id == self.tbl_version.id
|
|
129
145
|
and col.tbl.effective_version == self.tbl_version.effective_version
|
|
130
|
-
and col.id in self.tbl_version.cols_by_id
|
|
146
|
+
and col.id in self.tbl_version.get().cols_by_id
|
|
131
147
|
):
|
|
132
148
|
# the column is visible in this table version
|
|
133
149
|
return True
|
|
@@ -144,6 +160,6 @@ class TableVersionPath:
|
|
|
144
160
|
|
|
145
161
|
@classmethod
|
|
146
162
|
def from_dict(cls, d: dict) -> TableVersionPath:
|
|
147
|
-
tbl_version =
|
|
163
|
+
tbl_version = TableVersionHandle.from_dict(d['tbl_version'])
|
|
148
164
|
base = TableVersionPath.from_dict(d['base']) if d['base'] is not None else None
|
|
149
165
|
return cls(tbl_version, base)
|
pixeltable/catalog/view.py
CHANGED
|
@@ -5,8 +5,6 @@ import logging
|
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Iterable, List, Literal, Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
|
-
import sqlalchemy.orm as orm
|
|
9
|
-
|
|
10
8
|
import pixeltable.exceptions as excs
|
|
11
9
|
import pixeltable.metadata.schema as md_schema
|
|
12
10
|
import pixeltable.type_system as ts
|
|
@@ -14,11 +12,11 @@ from pixeltable import catalog, exprs, func
|
|
|
14
12
|
from pixeltable.env import Env
|
|
15
13
|
from pixeltable.iterators import ComponentIterator
|
|
16
14
|
|
|
17
|
-
from .catalog import Catalog
|
|
18
15
|
from .column import Column
|
|
19
16
|
from .globals import _POS_COLUMN_NAME, MediaValidation, UpdateStatus
|
|
20
17
|
from .table import Table
|
|
21
18
|
from .table_version import TableVersion
|
|
19
|
+
from .table_version_handle import TableVersionHandle
|
|
22
20
|
from .table_version_path import TableVersionPath
|
|
23
21
|
|
|
24
22
|
if TYPE_CHECKING:
|
|
@@ -36,19 +34,8 @@ class View(Table):
|
|
|
36
34
|
is simply a reference to a specific set of base versions.
|
|
37
35
|
"""
|
|
38
36
|
|
|
39
|
-
def __init__(
|
|
40
|
-
self,
|
|
41
|
-
id: UUID,
|
|
42
|
-
dir_id: UUID,
|
|
43
|
-
name: str,
|
|
44
|
-
tbl_version_path: TableVersionPath,
|
|
45
|
-
base_id: UUID,
|
|
46
|
-
snapshot_only: bool,
|
|
47
|
-
include_base_columns: bool,
|
|
48
|
-
):
|
|
37
|
+
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, snapshot_only: bool):
|
|
49
38
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
50
|
-
assert base_id in catalog.Catalog.get().tbl_dependents
|
|
51
|
-
self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
|
|
52
39
|
self._snapshot_only = snapshot_only
|
|
53
40
|
|
|
54
41
|
@classmethod
|
|
@@ -162,87 +149,66 @@ class View(Table):
|
|
|
162
149
|
)
|
|
163
150
|
columns = iterator_cols + columns
|
|
164
151
|
|
|
165
|
-
|
|
166
|
-
|
|
152
|
+
session = Env.get().session
|
|
153
|
+
from pixeltable.exprs import InlineDict
|
|
167
154
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
)
|
|
172
|
-
base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
|
|
173
|
-
base_versions = [
|
|
174
|
-
(tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
|
|
175
|
-
for tbl_version in base_version_path.get_tbl_versions()
|
|
176
|
-
]
|
|
155
|
+
iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
156
|
+
iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None else None
|
|
157
|
+
base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
|
|
177
158
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
)
|
|
184
|
-
for col in columns:
|
|
185
|
-
if col.value_expr is not None:
|
|
186
|
-
col.set_value_expr(col.value_expr.retarget(base_version_path))
|
|
187
|
-
|
|
188
|
-
view_md = md_schema.ViewMd(
|
|
189
|
-
is_snapshot=is_snapshot,
|
|
190
|
-
include_base_columns=include_base_columns,
|
|
191
|
-
predicate=predicate.as_dict() if predicate is not None else None,
|
|
192
|
-
base_versions=base_versions,
|
|
193
|
-
iterator_class_fqn=iterator_class_fqn,
|
|
194
|
-
iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
|
|
159
|
+
# if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
|
|
160
|
+
if is_snapshot:
|
|
161
|
+
predicate = predicate.retarget(base_version_path) if predicate is not None else None
|
|
162
|
+
iterator_args_expr = (
|
|
163
|
+
iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
|
|
195
164
|
)
|
|
165
|
+
for col in columns:
|
|
166
|
+
if col.value_expr is not None:
|
|
167
|
+
col.set_value_expr(col.value_expr.retarget(base_version_path))
|
|
168
|
+
|
|
169
|
+
view_md = md_schema.ViewMd(
|
|
170
|
+
is_snapshot=is_snapshot,
|
|
171
|
+
include_base_columns=include_base_columns,
|
|
172
|
+
predicate=predicate.as_dict() if predicate is not None else None,
|
|
173
|
+
base_versions=base_version_path.as_md(),
|
|
174
|
+
iterator_class_fqn=iterator_class_fqn,
|
|
175
|
+
iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
|
|
176
|
+
)
|
|
196
177
|
|
|
197
|
-
|
|
198
|
-
|
|
178
|
+
id, tbl_version = TableVersion.create(
|
|
179
|
+
dir_id,
|
|
180
|
+
name,
|
|
181
|
+
columns,
|
|
182
|
+
num_retained_versions,
|
|
183
|
+
comment,
|
|
184
|
+
media_validation=media_validation,
|
|
185
|
+
# base_path=base_version_path,
|
|
186
|
+
view_md=view_md,
|
|
187
|
+
)
|
|
188
|
+
if tbl_version is None:
|
|
189
|
+
# this is purely a snapshot: we use the base's tbl version path
|
|
190
|
+
view = cls(id, dir_id, name, base_version_path, snapshot_only=True)
|
|
191
|
+
_logger.info(f'created snapshot {name}')
|
|
192
|
+
else:
|
|
193
|
+
view = cls(
|
|
194
|
+
id,
|
|
199
195
|
dir_id,
|
|
200
196
|
name,
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
base_path=base_version_path,
|
|
206
|
-
view_md=view_md,
|
|
197
|
+
TableVersionPath(
|
|
198
|
+
TableVersionHandle(tbl_version.id, tbl_version.effective_version), base=base_version_path
|
|
199
|
+
),
|
|
200
|
+
snapshot_only=False,
|
|
207
201
|
)
|
|
208
|
-
|
|
209
|
-
# this is purely a snapshot: we use the base's tbl version path
|
|
210
|
-
view = cls(
|
|
211
|
-
id,
|
|
212
|
-
dir_id,
|
|
213
|
-
name,
|
|
214
|
-
base_version_path,
|
|
215
|
-
base.tbl_id(),
|
|
216
|
-
snapshot_only=True,
|
|
217
|
-
include_base_columns=include_base_columns,
|
|
218
|
-
)
|
|
219
|
-
_logger.info(f'created snapshot {name}')
|
|
220
|
-
else:
|
|
221
|
-
view = cls(
|
|
222
|
-
id,
|
|
223
|
-
dir_id,
|
|
224
|
-
name,
|
|
225
|
-
TableVersionPath(tbl_version, base=base_version_path),
|
|
226
|
-
base.tbl_id(),
|
|
227
|
-
snapshot_only=False,
|
|
228
|
-
include_base_columns=include_base_columns,
|
|
229
|
-
)
|
|
230
|
-
_logger.info(f'Created view `{name}`, id={tbl_version.id}')
|
|
202
|
+
_logger.info(f'Created view `{name}`, id={tbl_version.id}')
|
|
231
203
|
|
|
232
|
-
|
|
204
|
+
from pixeltable.plan import Planner
|
|
233
205
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
)
|
|
238
|
-
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
206
|
+
plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
|
|
207
|
+
num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
208
|
+
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
239
209
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
cat.tbl_dependents[view._id] = []
|
|
243
|
-
cat.tbl_dependents[base.tbl_id()].append(view)
|
|
244
|
-
cat.tbls[view._id] = view
|
|
245
|
-
return view
|
|
210
|
+
session.commit()
|
|
211
|
+
return view
|
|
246
212
|
|
|
247
213
|
@classmethod
|
|
248
214
|
def _verify_column(cls, col: Column) -> None:
|
|
@@ -259,35 +225,29 @@ class View(Table):
|
|
|
259
225
|
"""
|
|
260
226
|
if tbl_version_path.is_snapshot():
|
|
261
227
|
return tbl_version_path
|
|
262
|
-
tbl_version = tbl_version_path.tbl_version
|
|
228
|
+
tbl_version = tbl_version_path.tbl_version.get()
|
|
263
229
|
if not tbl_version.is_snapshot:
|
|
264
230
|
# create and register snapshot version
|
|
265
231
|
tbl_version = tbl_version.create_snapshot_copy()
|
|
266
232
|
assert tbl_version.is_snapshot
|
|
267
233
|
|
|
268
234
|
return TableVersionPath(
|
|
269
|
-
tbl_version,
|
|
235
|
+
TableVersionHandle(tbl_version.id, tbl_version.effective_version),
|
|
270
236
|
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
|
|
271
237
|
)
|
|
272
238
|
|
|
273
239
|
def _drop(self) -> None:
|
|
274
240
|
cat = catalog.Catalog.get()
|
|
275
|
-
# verify all dependents are deleted by now
|
|
276
|
-
for dep in cat.tbl_dependents[self._id]:
|
|
277
|
-
assert dep._is_dropped
|
|
278
241
|
if self._snapshot_only:
|
|
279
242
|
# there is not TableVersion to drop
|
|
280
243
|
self._check_is_dropped()
|
|
281
244
|
self.is_dropped = True
|
|
282
|
-
|
|
283
|
-
TableVersion.delete_md(self._id, conn)
|
|
245
|
+
TableVersion.delete_md(self._id)
|
|
284
246
|
# update catalog
|
|
285
247
|
cat = catalog.Catalog.get()
|
|
286
|
-
|
|
248
|
+
cat.remove_tbl(self._id)
|
|
287
249
|
else:
|
|
288
250
|
super()._drop()
|
|
289
|
-
cat.tbl_dependents[self._base_id].remove(self)
|
|
290
|
-
del cat.tbl_dependents[self._id]
|
|
291
251
|
|
|
292
252
|
def get_metadata(self) -> dict[str, Any]:
|
|
293
253
|
md = super().get_metadata()
|
pixeltable/config.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, ClassVar, Optional, TypeVar
|
|
8
|
+
|
|
9
|
+
import toml
|
|
10
|
+
|
|
11
|
+
from pixeltable import exceptions as excs
|
|
12
|
+
|
|
13
|
+
_logger = logging.getLogger('pixeltable')
|
|
14
|
+
|
|
15
|
+
T = TypeVar('T')
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Config:
|
|
19
|
+
"""
|
|
20
|
+
The (global) Pixeltable configuration, as loaded from `config.toml`. Provides methods for retrieving
|
|
21
|
+
configuration values, which can be set in the config file or as environment variables.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__instance: ClassVar[Optional[Config]] = None
|
|
25
|
+
|
|
26
|
+
__home: Path
|
|
27
|
+
__config_file: Path
|
|
28
|
+
__config_dict: dict[str, Any]
|
|
29
|
+
|
|
30
|
+
def __init__(self) -> None:
|
|
31
|
+
assert self.__instance is None, 'Config is a singleton; use Config.get() to access the instance'
|
|
32
|
+
|
|
33
|
+
self.__home = Path(os.environ.get('PIXELTABLE_HOME', str(Path.home() / '.pixeltable')))
|
|
34
|
+
if self.__home.exists() and not self.__home.is_dir():
|
|
35
|
+
raise RuntimeError(f'{self.__home} is not a directory')
|
|
36
|
+
if not self.__home.exists():
|
|
37
|
+
print(f'Creating a Pixeltable instance at: {self.__home}')
|
|
38
|
+
self.__home.mkdir()
|
|
39
|
+
|
|
40
|
+
self.__config_file = Path(os.environ.get('PIXELTABLE_CONFIG', str(self.__home / 'config.toml')))
|
|
41
|
+
|
|
42
|
+
self.__config_dict: dict[str, Any]
|
|
43
|
+
if os.path.isfile(self.__config_file):
|
|
44
|
+
with open(self.__config_file, 'r', encoding='utf-8') as stream:
|
|
45
|
+
try:
|
|
46
|
+
self.__config_dict = toml.load(stream)
|
|
47
|
+
except Exception as exc:
|
|
48
|
+
raise excs.Error(f'Could not read config file: {self.__config_file}') from exc
|
|
49
|
+
else:
|
|
50
|
+
self.__config_dict = self.__create_default_config(self.__config_file)
|
|
51
|
+
with open(self.__config_file, 'w', encoding='utf-8') as stream:
|
|
52
|
+
try:
|
|
53
|
+
toml.dump(self.__config_dict, stream)
|
|
54
|
+
except Exception as exc:
|
|
55
|
+
raise excs.Error(f'Could not write config file: {self.__config_file}') from exc
|
|
56
|
+
_logger.info(f'Created default config file at: {self.__config_file}')
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def home(self) -> Path:
|
|
60
|
+
return self.__home
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def config_file(self) -> Path:
|
|
64
|
+
return self.__config_file
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def get(cls) -> Config:
|
|
68
|
+
if cls.__instance is None:
|
|
69
|
+
cls.__instance = cls()
|
|
70
|
+
return cls.__instance
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def __create_default_config(cls, config_path: Path) -> dict[str, Any]:
|
|
74
|
+
free_disk_space_bytes = shutil.disk_usage(config_path.parent).free
|
|
75
|
+
# Default cache size is 1/5 of free disk space
|
|
76
|
+
file_cache_size_g = free_disk_space_bytes / 5 / (1 << 30)
|
|
77
|
+
return {'pixeltable': {'file_cache_size_g': round(file_cache_size_g, 1), 'hide_warnings': False}}
|
|
78
|
+
|
|
79
|
+
def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> Optional[T]:
|
|
80
|
+
env_var = f'{section.upper()}_{key.upper()}'
|
|
81
|
+
if env_var in os.environ:
|
|
82
|
+
value = os.environ[env_var]
|
|
83
|
+
elif section in self.__config_dict and key in self.__config_dict[section]:
|
|
84
|
+
value = self.__config_dict[section][key]
|
|
85
|
+
else:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
return expected_type(value) # type: ignore[call-arg]
|
|
90
|
+
except ValueError as exc:
|
|
91
|
+
raise excs.Error(f'Invalid value for configuration parameter {section}.{key}: {value}') from exc
|
|
92
|
+
|
|
93
|
+
def get_string_value(self, key: str, section: str = 'pixeltable') -> Optional[str]:
|
|
94
|
+
return self.get_value(key, str, section)
|
|
95
|
+
|
|
96
|
+
def get_int_value(self, key: str, section: str = 'pixeltable') -> Optional[int]:
|
|
97
|
+
return self.get_value(key, int, section)
|
|
98
|
+
|
|
99
|
+
def get_float_value(self, key: str, section: str = 'pixeltable') -> Optional[float]:
|
|
100
|
+
return self.get_value(key, float, section)
|
|
101
|
+
|
|
102
|
+
def get_bool_value(self, key: str, section: str = 'pixeltable') -> Optional[bool]:
|
|
103
|
+
return self.get_value(key, bool, section)
|