pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +3 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +11 -6
- pixeltable/env.py +12 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +9 -7
- pixeltable/exprs/expr.py +2 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +10 -3
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +30 -13
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -76
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +41 -13
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +40 -14
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +40 -7
- pixeltable/utils/formatter.py +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
pixeltable/io/parquet.py
CHANGED
|
@@ -15,10 +15,11 @@ import PIL.Image
|
|
|
15
15
|
|
|
16
16
|
import pixeltable as pxt
|
|
17
17
|
import pixeltable.exceptions as exc
|
|
18
|
-
import pixeltable.type_system as ts
|
|
19
18
|
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.utils.transactional_directory import transactional_directory
|
|
21
20
|
|
|
21
|
+
from .utils import normalize_import_parameters, normalize_schema_names
|
|
22
|
+
|
|
22
23
|
if typing.TYPE_CHECKING:
|
|
23
24
|
import pyarrow as pa
|
|
24
25
|
|
|
@@ -148,19 +149,13 @@ def export_parquet(
|
|
|
148
149
|
_write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
|
|
149
150
|
|
|
150
151
|
|
|
151
|
-
def parquet_schema_to_pixeltable_schema(parquet_path: str) -> dict[str, Optional[ts.ColumnType]]:
|
|
152
|
-
"""Generate a default pixeltable schema for the given parquet file. Returns None for unknown types."""
|
|
153
|
-
from pyarrow import parquet
|
|
154
|
-
|
|
155
|
-
from pixeltable.utils.arrow import to_pixeltable_schema
|
|
156
|
-
|
|
157
|
-
input_path = Path(parquet_path).expanduser()
|
|
158
|
-
parquet_dataset = parquet.ParquetDataset(str(input_path))
|
|
159
|
-
return to_pixeltable_schema(parquet_dataset.schema)
|
|
160
|
-
|
|
161
|
-
|
|
162
152
|
def import_parquet(
|
|
163
|
-
table: str,
|
|
153
|
+
table: str,
|
|
154
|
+
*,
|
|
155
|
+
parquet_path: str,
|
|
156
|
+
schema_overrides: Optional[dict[str, Any]] = None,
|
|
157
|
+
primary_key: Optional[Union[str, list[str]]] = None,
|
|
158
|
+
**kwargs: Any,
|
|
164
159
|
) -> pxt.Table:
|
|
165
160
|
"""Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.
|
|
166
161
|
|
|
@@ -171,6 +166,7 @@ def import_parquet(
|
|
|
171
166
|
name `name` will be given type `type`, instead of being inferred from the Parquet dataset. The keys in
|
|
172
167
|
`schema_overrides` should be the column names of the Parquet dataset (whether or not they are valid
|
|
173
168
|
Pixeltable identifiers).
|
|
169
|
+
primary_key: The primary key of the table (see [`create_table()`][pixeltable.create_table]).
|
|
174
170
|
kwargs: Additional arguments to pass to `create_table`.
|
|
175
171
|
|
|
176
172
|
Returns:
|
|
@@ -178,33 +174,29 @@ def import_parquet(
|
|
|
178
174
|
"""
|
|
179
175
|
from pyarrow import parquet
|
|
180
176
|
|
|
181
|
-
|
|
182
|
-
from pixeltable.utils.arrow import iter_tuples
|
|
177
|
+
from pixeltable.utils.arrow import ar_infer_schema, iter_tuples2
|
|
183
178
|
|
|
184
179
|
input_path = Path(parquet_path).expanduser()
|
|
185
180
|
parquet_dataset = parquet.ParquetDataset(str(input_path))
|
|
186
181
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
schema.update(schema_overrides)
|
|
192
|
-
for k, v in schema.items():
|
|
193
|
-
if v is None:
|
|
194
|
-
raise exc.Error(f'Could not infer pixeltable type for column {k} from parquet file')
|
|
182
|
+
schema_overrides, primary_key = normalize_import_parameters(schema_overrides, primary_key)
|
|
183
|
+
ar_schema = ar_infer_schema(parquet_dataset.schema, schema_overrides, primary_key)
|
|
184
|
+
schema, pxt_pk, col_mapping = normalize_schema_names(ar_schema, primary_key, schema_overrides, False)
|
|
195
185
|
|
|
196
186
|
if table in pxt.list_tables():
|
|
197
187
|
raise exc.Error(f'Table {table} already exists')
|
|
198
188
|
|
|
189
|
+
tmp_name = f'{table}_tmp_{random.randint(0, 100000000)}'
|
|
190
|
+
total_rows = 0
|
|
199
191
|
try:
|
|
200
|
-
|
|
201
|
-
tab = pxt.create_table(tmp_name, schema, **kwargs)
|
|
192
|
+
tab = pxt.create_table(tmp_name, schema, primary_key=pxt_pk, **kwargs)
|
|
202
193
|
for fragment in parquet_dataset.fragments: # type: ignore[attr-defined]
|
|
203
194
|
for batch in fragment.to_batches():
|
|
204
|
-
dict_batch = list(
|
|
195
|
+
dict_batch = list(iter_tuples2(batch, col_mapping, schema))
|
|
196
|
+
total_rows += len(dict_batch)
|
|
205
197
|
tab.insert(dict_batch)
|
|
206
198
|
except Exception as e:
|
|
207
|
-
_logger.error(f'Error
|
|
199
|
+
_logger.error(f'Error after inserting {total_rows} rows from Parquet file into table: {e}')
|
|
208
200
|
raise e
|
|
209
201
|
|
|
210
202
|
pxt.move(tmp_name, table)
|
pixeltable/io/utils.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from keyword import iskeyword as is_python_keyword
|
|
2
|
+
from typing import Any, Optional, Union
|
|
3
|
+
|
|
4
|
+
import pixeltable as pxt
|
|
5
|
+
import pixeltable.exceptions as excs
|
|
6
|
+
from pixeltable import Table
|
|
7
|
+
from pixeltable.catalog.globals import is_system_column_name
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def normalize_pxt_col_name(name: str) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Normalizes an arbitrary DataFrame column name into a valid Pixeltable identifier by:
|
|
13
|
+
- replacing any non-ascii or non-alphanumeric characters with an underscore _
|
|
14
|
+
- prefixing the result with the letter 'c' if it starts with an underscore or a number
|
|
15
|
+
"""
|
|
16
|
+
id = ''.join(ch if ch.isascii() and ch.isalnum() else '_' for ch in name)
|
|
17
|
+
if id[0].isnumeric():
|
|
18
|
+
id = f'c_{id}'
|
|
19
|
+
elif id[0] == '_':
|
|
20
|
+
id = f'c{id}'
|
|
21
|
+
assert pxt.catalog.is_valid_identifier(id), id
|
|
22
|
+
return id
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def normalize_import_parameters(
|
|
26
|
+
schema_overrides: Optional[dict[str, Any]] = None, primary_key: Optional[Union[str, list[str]]] = None
|
|
27
|
+
) -> tuple[dict[str, Any], list[str]]:
|
|
28
|
+
if schema_overrides is None:
|
|
29
|
+
schema_overrides = {}
|
|
30
|
+
if primary_key is None:
|
|
31
|
+
primary_key = []
|
|
32
|
+
elif isinstance(primary_key, str):
|
|
33
|
+
primary_key = [primary_key]
|
|
34
|
+
return schema_overrides, primary_key
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _is_usable_as_column_name(name: str, destination_schema: dict[str, Any]) -> bool:
|
|
38
|
+
return not (is_system_column_name(name) or is_python_keyword(name) or name in destination_schema)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def normalize_schema_names(
|
|
42
|
+
in_schema: dict[str, Any],
|
|
43
|
+
primary_key: list[str],
|
|
44
|
+
schema_overrides: dict[str, Any],
|
|
45
|
+
require_valid_pxt_column_names: bool = False,
|
|
46
|
+
) -> tuple[dict[str, Any], list[str], Optional[dict[str, str]]]:
|
|
47
|
+
"""
|
|
48
|
+
Convert all names in the input schema from source names to valid Pixeltable identifiers
|
|
49
|
+
- Ensure that all names are unique.
|
|
50
|
+
- Report an error if any types are missing
|
|
51
|
+
- If "require_valid_pxt_column_names", report an error if any column names are not valid Pixeltable column names
|
|
52
|
+
- Report an error if any primary key columns are missing
|
|
53
|
+
Returns
|
|
54
|
+
- A new schema with normalized column names
|
|
55
|
+
- The primary key columns, mapped to the normalized names
|
|
56
|
+
- A mapping from the original names to the normalized names.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Report any untyped columns as an error
|
|
60
|
+
untyped_cols = [in_name for in_name, column_type in in_schema.items() if column_type is None]
|
|
61
|
+
if len(untyped_cols) > 0:
|
|
62
|
+
raise excs.Error(f'Could not infer pixeltable type for column(s): {", ".join(untyped_cols)}')
|
|
63
|
+
|
|
64
|
+
# Report any columns in `schema_overrides` that are not in the source
|
|
65
|
+
extraneous_overrides = schema_overrides.keys() - in_schema.keys()
|
|
66
|
+
if len(extraneous_overrides) > 0:
|
|
67
|
+
raise excs.Error(
|
|
68
|
+
f'Some column(s) specified in `schema_overrides` are not present in the source: {", ".join(extraneous_overrides)}'
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
schema: dict[str, Any] = {}
|
|
72
|
+
col_mapping: dict[str, str] = {} # Maps column names to Pixeltable column names if needed
|
|
73
|
+
for in_name, pxt_type in in_schema.items():
|
|
74
|
+
pxt_name = normalize_pxt_col_name(in_name)
|
|
75
|
+
# Ensure that column names are unique by appending a distinguishing suffix
|
|
76
|
+
# to any collisions
|
|
77
|
+
pxt_fname = pxt_name
|
|
78
|
+
n = 1
|
|
79
|
+
while not _is_usable_as_column_name(pxt_fname, schema):
|
|
80
|
+
pxt_fname = f'{pxt_name}_{n}'
|
|
81
|
+
n += 1
|
|
82
|
+
schema[pxt_fname] = pxt_type
|
|
83
|
+
col_mapping[in_name] = pxt_fname
|
|
84
|
+
|
|
85
|
+
# Determine if the col_mapping is the identity mapping
|
|
86
|
+
non_identity_keys = [k for k, v in col_mapping.items() if k != v]
|
|
87
|
+
if len(non_identity_keys) > 0:
|
|
88
|
+
if require_valid_pxt_column_names:
|
|
89
|
+
raise excs.Error(
|
|
90
|
+
f'Column names must be valid pixeltable identifiers. Invalid names: {", ".join(non_identity_keys)}'
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
col_mapping = None
|
|
94
|
+
|
|
95
|
+
# Report any primary key columns that are not in the source as an error
|
|
96
|
+
missing_pk = [pk for pk in primary_key if pk not in in_schema]
|
|
97
|
+
if len(missing_pk) > 0:
|
|
98
|
+
raise excs.Error(f'Primary key column(s) are not found in the source: {", ".join(missing_pk)}')
|
|
99
|
+
|
|
100
|
+
pxt_pk = [col_mapping[pk] for pk in primary_key] if col_mapping is not None else primary_key
|
|
101
|
+
|
|
102
|
+
return schema, pxt_pk, col_mapping
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def find_or_create_table(
|
|
106
|
+
tbl_path: str,
|
|
107
|
+
schema: dict[str, Any],
|
|
108
|
+
*,
|
|
109
|
+
primary_key: Optional[Union[str, list[str]]],
|
|
110
|
+
num_retained_versions: int,
|
|
111
|
+
comment: str,
|
|
112
|
+
) -> Table:
|
|
113
|
+
return pxt.create_table(
|
|
114
|
+
tbl_path, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment
|
|
115
|
+
)
|
pixeltable/iterators/audio.py
CHANGED
|
@@ -5,7 +5,7 @@ from fractions import Fraction
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Any, Optional
|
|
7
7
|
|
|
8
|
-
import av
|
|
8
|
+
import av
|
|
9
9
|
|
|
10
10
|
import pixeltable.env as env
|
|
11
11
|
import pixeltable.exceptions as excs
|
|
@@ -146,6 +146,7 @@ class AudioSplitter(ComponentIterator):
|
|
|
146
146
|
input_stream = self.container.streams.audio[0]
|
|
147
147
|
codec_name = AudioSplitter.__codec_map.get(input_stream.codec_context.name, input_stream.codec_context.name)
|
|
148
148
|
output_stream = output_container.add_stream(codec_name, rate=input_stream.codec_context.sample_rate)
|
|
149
|
+
assert isinstance(output_stream, av.audio.stream.AudioStream)
|
|
149
150
|
frame_count = 0
|
|
150
151
|
# Since frames don't align with chunk boundaries, we may have read an extra frame in previous iteration
|
|
151
152
|
# Seek to the nearest frame in stream at current chunk start time
|
pixeltable/iterators/video.py
CHANGED
pixeltable/metadata/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from .schema import SystemInfo, SystemInfoMd
|
|
11
11
|
|
|
12
12
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
13
|
-
VERSION =
|
|
13
|
+
VERSION = 30
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -31,6 +31,7 @@ converter_cbs: dict[int, Callable[[sql.engine.Engine], None]] = {}
|
|
|
31
31
|
def register_converter(version: int) -> Callable[[Callable[[sql.engine.Engine], None]], None]:
|
|
32
32
|
def decorator(fn: Callable[[sql.engine.Engine], None]) -> None:
|
|
33
33
|
global converter_cbs
|
|
34
|
+
assert version not in converter_cbs
|
|
34
35
|
converter_cbs[version] = fn
|
|
35
36
|
|
|
36
37
|
return decorator
|
|
@@ -17,7 +17,7 @@ _logger = logging.getLogger('pixeltable')
|
|
|
17
17
|
def _(engine: sql.engine.Engine) -> None:
|
|
18
18
|
with engine.begin() as conn:
|
|
19
19
|
for row in conn.execute(sql.select(Function)):
|
|
20
|
-
id,
|
|
20
|
+
id, _, md, binary_obj = row
|
|
21
21
|
md['md'] = __update_md(md['md'], binary_obj)
|
|
22
22
|
_logger.info(f'Updating function: {id}')
|
|
23
23
|
conn.execute(sql.update(Function).where(Function.id == id).values(md=md))
|
|
@@ -27,14 +27,24 @@ def __update_md(orig_d: dict, binary_obj: bytes) -> Any:
|
|
|
27
27
|
# construct dict produced by CallableFunction.to_store()
|
|
28
28
|
py_fn = cloudpickle.loads(binary_obj)
|
|
29
29
|
py_params = inspect.signature(py_fn).parameters
|
|
30
|
-
return_type =
|
|
31
|
-
params: list[
|
|
30
|
+
return_type = orig_d['return_type']
|
|
31
|
+
params: list[dict] = []
|
|
32
32
|
for name, col_type_dict, kind_int, is_batched in orig_d['parameters']:
|
|
33
|
-
col_type = ts.ColumnType.from_dict(col_type_dict) if col_type_dict is not None else None
|
|
34
33
|
default = py_params[name].default
|
|
35
|
-
kind = inspect._ParameterKind(kind_int)
|
|
36
|
-
params.append(
|
|
34
|
+
kind = inspect._ParameterKind(kind_int)
|
|
35
|
+
params.append(
|
|
36
|
+
{
|
|
37
|
+
'name': name,
|
|
38
|
+
'col_type': col_type_dict,
|
|
39
|
+
'kind': str(kind),
|
|
40
|
+
'is_batched': is_batched,
|
|
41
|
+
'has_default': default is not inspect.Parameter.empty,
|
|
42
|
+
'default': None if default is inspect.Parameter.empty else default,
|
|
43
|
+
}
|
|
44
|
+
)
|
|
37
45
|
is_batched = 'batch_size' in orig_d
|
|
38
|
-
|
|
39
|
-
|
|
46
|
+
d = {
|
|
47
|
+
'signature': {'return_type': return_type, 'parameters': params, 'is_batched': is_batched},
|
|
48
|
+
'batch_size': orig_d['batch_size'] if is_batched else None,
|
|
49
|
+
}
|
|
40
50
|
return d
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from pixeltable.metadata import register_converter
|
|
8
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
9
|
+
from pixeltable.metadata.schema import Table
|
|
10
|
+
|
|
11
|
+
_logger = logging.getLogger('pixeltable')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@register_converter(version=27)
|
|
15
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
16
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
20
|
+
"""Update the view metadata to add the include_base_columns boolean if it is missing
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
table_md (dict): copy of the original table metadata. this gets updated in place.
|
|
24
|
+
table_id (UUID): the table id
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
if table_md['view_md'] is None:
|
|
28
|
+
return
|
|
29
|
+
if 'include_base_columns' not in table_md['view_md']:
|
|
30
|
+
table_md['view_md']['include_base_columns'] = True
|
|
31
|
+
_logger.info(f'Updating view metadata for table: {table_id}')
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.schema import Dir, Table, TableSchemaVersion, TableVersion
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=28)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
with engine.begin() as conn:
|
|
12
|
+
conn.execute(sql.update(Dir).values(md=Dir.md.concat({'user': None, 'additional_md': {}})))
|
|
13
|
+
conn.execute(sql.update(Table).values(md=Table.md.concat({'user': None, 'additional_md': {}})))
|
|
14
|
+
conn.execute(sql.update(TableVersion).values(md=TableVersion.md.concat({'additional_md': {}})))
|
|
15
|
+
conn.execute(sql.update(TableSchemaVersion).values(md=TableSchemaVersion.md.concat({'additional_md': {}})))
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable import exprs
|
|
6
|
+
from pixeltable.metadata import register_converter
|
|
7
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_converter(version=29)
|
|
11
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
12
|
+
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
16
|
+
# Defaults are now stored as literals in signatures
|
|
17
|
+
if k == 'parameters':
|
|
18
|
+
for param in v:
|
|
19
|
+
assert isinstance(param, dict)
|
|
20
|
+
has_default = param.get('has_default') or (param.get('default') is not None)
|
|
21
|
+
if 'has_default' in param:
|
|
22
|
+
del param['has_default']
|
|
23
|
+
literal = exprs.Expr.from_object(param['default']) if has_default else None
|
|
24
|
+
assert literal is None or isinstance(literal, exprs.Literal)
|
|
25
|
+
param['default'] = None if literal is None else literal.as_dict()
|
|
26
|
+
return k, v
|
|
27
|
+
|
|
28
|
+
# Method of organizing argument expressions has changed
|
|
29
|
+
if isinstance(v, dict) and v.get('_classname') == 'FunctionCall':
|
|
30
|
+
args = v['args']
|
|
31
|
+
kwargs = v['kwargs']
|
|
32
|
+
components = v['components']
|
|
33
|
+
group_by_start_idx = v['group_by_start_idx']
|
|
34
|
+
group_by_stop_idx = v['group_by_stop_idx']
|
|
35
|
+
order_by_start_idx = v['order_by_start_idx']
|
|
36
|
+
|
|
37
|
+
new_args = []
|
|
38
|
+
for arg in args:
|
|
39
|
+
if arg[0] is not None:
|
|
40
|
+
assert isinstance(arg[0], int)
|
|
41
|
+
new_args.append(components[arg[0]])
|
|
42
|
+
else:
|
|
43
|
+
literal = exprs.Expr.from_object(arg[1])
|
|
44
|
+
new_args.append(literal.as_dict())
|
|
45
|
+
|
|
46
|
+
new_kwargs = {}
|
|
47
|
+
for name, kwarg in kwargs.items():
|
|
48
|
+
if kwarg[0] is not None:
|
|
49
|
+
assert isinstance(kwarg[0], int)
|
|
50
|
+
new_kwargs[name] = components[kwarg[0]]
|
|
51
|
+
else:
|
|
52
|
+
literal = exprs.Expr.from_object(kwarg[1])
|
|
53
|
+
new_kwargs[name] = literal.as_dict()
|
|
54
|
+
|
|
55
|
+
# We need to expand ("unroll") any var-args or var-kwargs.
|
|
56
|
+
|
|
57
|
+
new_args_len = len(new_args)
|
|
58
|
+
rolled_args: Optional[dict] = None
|
|
59
|
+
rolled_kwargs: Optional[dict] = None
|
|
60
|
+
|
|
61
|
+
if 'signature' in v['fn']:
|
|
62
|
+
# If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
|
|
63
|
+
# is an edge case that won't migrate properly.
|
|
64
|
+
parameters: list[dict] = v['fn']['signature']['parameters']
|
|
65
|
+
for i, param in enumerate(parameters):
|
|
66
|
+
if param['kind'] == 'VAR_POSITIONAL':
|
|
67
|
+
if new_args_len > i:
|
|
68
|
+
# For peculiar historical reasons, variable kwargs might show up in args. Thus variable
|
|
69
|
+
# positional args is not necessarily the last element of args; it might be the second-to-last.
|
|
70
|
+
assert new_args_len <= i + 2, new_args
|
|
71
|
+
rolled_args = new_args[i]
|
|
72
|
+
new_args = new_args[:i] + new_args[i + 1 :]
|
|
73
|
+
if param['kind'] == 'VAR_KEYWORD':
|
|
74
|
+
# As noted above, variable kwargs might show up either in args or in kwargs. If it's in args, it
|
|
75
|
+
# is necessarily the last element.
|
|
76
|
+
if new_args_len > i:
|
|
77
|
+
assert new_args_len <= i + 1, new_args
|
|
78
|
+
rolled_kwargs = new_args.pop()
|
|
79
|
+
if param['name'] in kwargs:
|
|
80
|
+
assert rolled_kwargs is None
|
|
81
|
+
rolled_kwargs = kwargs.pop(param['name'])
|
|
82
|
+
|
|
83
|
+
if rolled_args is not None:
|
|
84
|
+
assert rolled_args['_classname'] in ('InlineArray', 'InlineList')
|
|
85
|
+
new_args.extend(rolled_args['components'])
|
|
86
|
+
if rolled_kwargs is not None:
|
|
87
|
+
assert rolled_kwargs['_classname'] == 'InlineDict'
|
|
88
|
+
new_kwargs.update(zip(rolled_kwargs['keys'], rolled_kwargs['components']))
|
|
89
|
+
|
|
90
|
+
group_by_exprs = [components[i] for i in range(group_by_start_idx, group_by_stop_idx)]
|
|
91
|
+
order_by_exprs = [components[i] for i in range(order_by_start_idx, len(components))]
|
|
92
|
+
|
|
93
|
+
new_components = [*new_args, *new_kwargs.values(), *group_by_exprs, *order_by_exprs]
|
|
94
|
+
|
|
95
|
+
newv = {
|
|
96
|
+
'fn': v['fn'],
|
|
97
|
+
'arg_idxs': list(range(len(new_args))),
|
|
98
|
+
'kwarg_idxs': {name: i + len(new_args) for i, name in enumerate(new_kwargs.keys())},
|
|
99
|
+
'group_by_start_idx': len(new_args) + len(new_kwargs),
|
|
100
|
+
'group_by_stop_idx': len(new_args) + len(new_kwargs) + len(group_by_exprs),
|
|
101
|
+
'order_by_start_idx': len(new_args) + len(new_kwargs) + len(group_by_exprs),
|
|
102
|
+
'is_method_call': False,
|
|
103
|
+
'_classname': 'FunctionCall',
|
|
104
|
+
'components': new_components,
|
|
105
|
+
}
|
|
106
|
+
if 'return_type' in v:
|
|
107
|
+
newv['return_type'] = v['return_type']
|
|
108
|
+
|
|
109
|
+
return k, newv
|
|
110
|
+
|
|
111
|
+
return None
|
|
@@ -5,7 +5,7 @@ from uuid import UUID
|
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
from pixeltable.metadata.schema import Table, TableSchemaVersion
|
|
8
|
+
from pixeltable.metadata.schema import Function, Table, TableSchemaVersion
|
|
9
9
|
|
|
10
10
|
__logger = logging.getLogger('pixeltable')
|
|
11
11
|
|
|
@@ -50,6 +50,17 @@ def convert_table_md(
|
|
|
50
50
|
__logger.info(f'Updating schema for table: {id}')
|
|
51
51
|
conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
|
|
52
52
|
|
|
53
|
+
for row in conn.execute(sql.select(Function)):
|
|
54
|
+
id = row[0]
|
|
55
|
+
function_md = row[2]
|
|
56
|
+
assert isinstance(function_md, dict)
|
|
57
|
+
updated_function_md = copy.deepcopy(function_md)
|
|
58
|
+
if substitution_fn is not None:
|
|
59
|
+
updated_function_md = __substitute_md_rec(updated_function_md, substitution_fn)
|
|
60
|
+
if updated_function_md != function_md:
|
|
61
|
+
__logger.info(f'Updating function: {id}')
|
|
62
|
+
conn.execute(sql.update(Function).where(Function.id == id).values(md=updated_function_md))
|
|
63
|
+
|
|
53
64
|
|
|
54
65
|
def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:
|
|
55
66
|
columns_md = table_md['column_md']
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
30: 'Store default values and constant arguments as literals',
|
|
6
|
+
29: 'Add user and additional_md fields to metadata structs',
|
|
7
|
+
28: 'Enable view creation from DataFrame with select clause',
|
|
5
8
|
27: 'Enable pxt.query parameterization of limit clauses',
|
|
6
9
|
26: 'Rename clip_text and clip_image to clip',
|
|
7
10
|
25: 'Functions with multiple signatures',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -74,6 +74,8 @@ class SystemInfo(Base):
|
|
|
74
74
|
@dataclasses.dataclass
|
|
75
75
|
class DirMd:
|
|
76
76
|
name: str
|
|
77
|
+
user: Optional[str]
|
|
78
|
+
additional_md: dict[str, Any]
|
|
77
79
|
|
|
78
80
|
|
|
79
81
|
class Dir(Base):
|
|
@@ -132,6 +134,7 @@ class IndexMd:
|
|
|
132
134
|
@dataclasses.dataclass
|
|
133
135
|
class ViewMd:
|
|
134
136
|
is_snapshot: bool
|
|
137
|
+
include_base_columns: bool
|
|
135
138
|
|
|
136
139
|
# (table id, version); for mutable views, all versions are None
|
|
137
140
|
base_versions: list[tuple[str, Optional[int]]]
|
|
@@ -150,6 +153,8 @@ class ViewMd:
|
|
|
150
153
|
class TableMd:
|
|
151
154
|
name: str
|
|
152
155
|
|
|
156
|
+
user: Optional[str]
|
|
157
|
+
|
|
153
158
|
# monotonically increasing w/in Table for both data and schema changes, starting at 0
|
|
154
159
|
current_version: int
|
|
155
160
|
# each version has a corresponding schema version (current_version >= current_schema_version)
|
|
@@ -169,6 +174,7 @@ class TableMd:
|
|
|
169
174
|
column_md: dict[int, ColumnMd] # col_id -> ColumnMd
|
|
170
175
|
index_md: dict[int, IndexMd] # index_id -> IndexMd
|
|
171
176
|
view_md: Optional[ViewMd]
|
|
177
|
+
additional_md: dict[str, Any]
|
|
172
178
|
|
|
173
179
|
|
|
174
180
|
class Table(Base):
|
|
@@ -194,6 +200,7 @@ class TableVersionMd:
|
|
|
194
200
|
created_at: float # time.time()
|
|
195
201
|
version: int
|
|
196
202
|
schema_version: int
|
|
203
|
+
additional_md: dict[str, Any]
|
|
197
204
|
|
|
198
205
|
|
|
199
206
|
class TableVersion(Base):
|
|
@@ -232,6 +239,7 @@ class TableSchemaVersionMd:
|
|
|
232
239
|
# default validation strategy for any media column of this table
|
|
233
240
|
# stores column.MediaValiation.name.lower()
|
|
234
241
|
media_validation: str
|
|
242
|
+
additional_md: dict[str, Any]
|
|
235
243
|
|
|
236
244
|
|
|
237
245
|
# versioning: each table schema change results in a new record
|
pixeltable/share/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .publish import publish_snapshot
|