pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (63) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +3 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +11 -6
  11. pixeltable/env.py +12 -0
  12. pixeltable/exec/expr_eval/evaluators.py +4 -2
  13. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  14. pixeltable/exprs/comparison.py +8 -4
  15. pixeltable/exprs/data_row.py +9 -7
  16. pixeltable/exprs/expr.py +2 -2
  17. pixeltable/exprs/function_call.py +155 -313
  18. pixeltable/exprs/json_mapper.py +25 -8
  19. pixeltable/exprs/json_path.py +6 -5
  20. pixeltable/exprs/object_ref.py +16 -5
  21. pixeltable/exprs/row_builder.py +10 -3
  22. pixeltable/func/aggregate_function.py +29 -15
  23. pixeltable/func/callable_function.py +11 -8
  24. pixeltable/func/expr_template_function.py +3 -9
  25. pixeltable/func/function.py +148 -74
  26. pixeltable/func/signature.py +65 -30
  27. pixeltable/func/tools.py +26 -26
  28. pixeltable/func/udf.py +1 -1
  29. pixeltable/functions/__init__.py +1 -0
  30. pixeltable/functions/anthropic.py +9 -3
  31. pixeltable/functions/deepseek.py +121 -0
  32. pixeltable/functions/image.py +7 -7
  33. pixeltable/functions/openai.py +30 -13
  34. pixeltable/functions/video.py +14 -7
  35. pixeltable/globals.py +14 -3
  36. pixeltable/index/embedding_index.py +4 -13
  37. pixeltable/io/globals.py +88 -77
  38. pixeltable/io/hf_datasets.py +34 -34
  39. pixeltable/io/pandas.py +75 -76
  40. pixeltable/io/parquet.py +19 -27
  41. pixeltable/io/utils.py +115 -0
  42. pixeltable/iterators/audio.py +2 -1
  43. pixeltable/iterators/video.py +1 -1
  44. pixeltable/metadata/__init__.py +2 -1
  45. pixeltable/metadata/converters/convert_15.py +18 -8
  46. pixeltable/metadata/converters/convert_27.py +31 -0
  47. pixeltable/metadata/converters/convert_28.py +15 -0
  48. pixeltable/metadata/converters/convert_29.py +111 -0
  49. pixeltable/metadata/converters/util.py +12 -1
  50. pixeltable/metadata/notes.py +3 -0
  51. pixeltable/metadata/schema.py +8 -0
  52. pixeltable/share/__init__.py +1 -0
  53. pixeltable/share/packager.py +41 -13
  54. pixeltable/share/publish.py +97 -0
  55. pixeltable/type_system.py +40 -14
  56. pixeltable/utils/__init__.py +41 -0
  57. pixeltable/utils/arrow.py +40 -7
  58. pixeltable/utils/formatter.py +1 -1
  59. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
  60. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
  61. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
  62. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
  63. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
pixeltable/io/parquet.py CHANGED
@@ -15,10 +15,11 @@ import PIL.Image
15
15
 
16
16
  import pixeltable as pxt
17
17
  import pixeltable.exceptions as exc
18
- import pixeltable.type_system as ts
19
18
  from pixeltable.env import Env
20
19
  from pixeltable.utils.transactional_directory import transactional_directory
21
20
 
21
+ from .utils import normalize_import_parameters, normalize_schema_names
22
+
22
23
  if typing.TYPE_CHECKING:
23
24
  import pyarrow as pa
24
25
 
@@ -148,19 +149,13 @@ def export_parquet(
148
149
  _write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
149
150
 
150
151
 
151
- def parquet_schema_to_pixeltable_schema(parquet_path: str) -> dict[str, Optional[ts.ColumnType]]:
152
- """Generate a default pixeltable schema for the given parquet file. Returns None for unknown types."""
153
- from pyarrow import parquet
154
-
155
- from pixeltable.utils.arrow import to_pixeltable_schema
156
-
157
- input_path = Path(parquet_path).expanduser()
158
- parquet_dataset = parquet.ParquetDataset(str(input_path))
159
- return to_pixeltable_schema(parquet_dataset.schema)
160
-
161
-
162
152
  def import_parquet(
163
- table: str, *, parquet_path: str, schema_overrides: Optional[dict[str, ts.ColumnType]] = None, **kwargs: Any
153
+ table: str,
154
+ *,
155
+ parquet_path: str,
156
+ schema_overrides: Optional[dict[str, Any]] = None,
157
+ primary_key: Optional[Union[str, list[str]]] = None,
158
+ **kwargs: Any,
164
159
  ) -> pxt.Table:
165
160
  """Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.
166
161
 
@@ -171,6 +166,7 @@ def import_parquet(
171
166
  name `name` will be given type `type`, instead of being inferred from the Parquet dataset. The keys in
172
167
  `schema_overrides` should be the column names of the Parquet dataset (whether or not they are valid
173
168
  Pixeltable identifiers).
169
+ primary_key: The primary key of the table (see [`create_table()`][pixeltable.create_table]).
174
170
  kwargs: Additional arguments to pass to `create_table`.
175
171
 
176
172
  Returns:
@@ -178,33 +174,29 @@ def import_parquet(
178
174
  """
179
175
  from pyarrow import parquet
180
176
 
181
- import pixeltable as pxt
182
- from pixeltable.utils.arrow import iter_tuples
177
+ from pixeltable.utils.arrow import ar_infer_schema, iter_tuples2
183
178
 
184
179
  input_path = Path(parquet_path).expanduser()
185
180
  parquet_dataset = parquet.ParquetDataset(str(input_path))
186
181
 
187
- schema = parquet_schema_to_pixeltable_schema(parquet_path)
188
- if schema_overrides is None:
189
- schema_overrides = {}
190
-
191
- schema.update(schema_overrides)
192
- for k, v in schema.items():
193
- if v is None:
194
- raise exc.Error(f'Could not infer pixeltable type for column {k} from parquet file')
182
+ schema_overrides, primary_key = normalize_import_parameters(schema_overrides, primary_key)
183
+ ar_schema = ar_infer_schema(parquet_dataset.schema, schema_overrides, primary_key)
184
+ schema, pxt_pk, col_mapping = normalize_schema_names(ar_schema, primary_key, schema_overrides, False)
195
185
 
196
186
  if table in pxt.list_tables():
197
187
  raise exc.Error(f'Table {table} already exists')
198
188
 
189
+ tmp_name = f'{table}_tmp_{random.randint(0, 100000000)}'
190
+ total_rows = 0
199
191
  try:
200
- tmp_name = f'{table}_tmp_{random.randint(0, 100000000)}'
201
- tab = pxt.create_table(tmp_name, schema, **kwargs)
192
+ tab = pxt.create_table(tmp_name, schema, primary_key=pxt_pk, **kwargs)
202
193
  for fragment in parquet_dataset.fragments: # type: ignore[attr-defined]
203
194
  for batch in fragment.to_batches():
204
- dict_batch = list(iter_tuples(batch))
195
+ dict_batch = list(iter_tuples2(batch, col_mapping, schema))
196
+ total_rows += len(dict_batch)
205
197
  tab.insert(dict_batch)
206
198
  except Exception as e:
207
- _logger.error(f'Error while inserting Parquet file into table: {e}')
199
+ _logger.error(f'Error after inserting {total_rows} rows from Parquet file into table: {e}')
208
200
  raise e
209
201
 
210
202
  pxt.move(tmp_name, table)
pixeltable/io/utils.py ADDED
@@ -0,0 +1,115 @@
1
+ from keyword import iskeyword as is_python_keyword
2
+ from typing import Any, Optional, Union
3
+
4
+ import pixeltable as pxt
5
+ import pixeltable.exceptions as excs
6
+ from pixeltable import Table
7
+ from pixeltable.catalog.globals import is_system_column_name
8
+
9
+
10
+ def normalize_pxt_col_name(name: str) -> str:
11
+ """
12
+ Normalizes an arbitrary DataFrame column name into a valid Pixeltable identifier by:
13
+ - replacing any non-ascii or non-alphanumeric characters with an underscore _
14
+ - prefixing the result with the letter 'c' if it starts with an underscore or a number
15
+ """
16
+ id = ''.join(ch if ch.isascii() and ch.isalnum() else '_' for ch in name)
17
+ if id[0].isnumeric():
18
+ id = f'c_{id}'
19
+ elif id[0] == '_':
20
+ id = f'c{id}'
21
+ assert pxt.catalog.is_valid_identifier(id), id
22
+ return id
23
+
24
+
25
+ def normalize_import_parameters(
26
+ schema_overrides: Optional[dict[str, Any]] = None, primary_key: Optional[Union[str, list[str]]] = None
27
+ ) -> tuple[dict[str, Any], list[str]]:
28
+ if schema_overrides is None:
29
+ schema_overrides = {}
30
+ if primary_key is None:
31
+ primary_key = []
32
+ elif isinstance(primary_key, str):
33
+ primary_key = [primary_key]
34
+ return schema_overrides, primary_key
35
+
36
+
37
+ def _is_usable_as_column_name(name: str, destination_schema: dict[str, Any]) -> bool:
38
+ return not (is_system_column_name(name) or is_python_keyword(name) or name in destination_schema)
39
+
40
+
41
+ def normalize_schema_names(
42
+ in_schema: dict[str, Any],
43
+ primary_key: list[str],
44
+ schema_overrides: dict[str, Any],
45
+ require_valid_pxt_column_names: bool = False,
46
+ ) -> tuple[dict[str, Any], list[str], Optional[dict[str, str]]]:
47
+ """
48
+ Convert all names in the input schema from source names to valid Pixeltable identifiers
49
+ - Ensure that all names are unique.
50
+ - Report an error if any types are missing
51
+ - If "require_valid_pxt_column_names", report an error if any column names are not valid Pixeltable column names
52
+ - Report an error if any primary key columns are missing
53
+ Returns
54
+ - A new schema with normalized column names
55
+ - The primary key columns, mapped to the normalized names
56
+ - A mapping from the original names to the normalized names.
57
+ """
58
+
59
+ # Report any untyped columns as an error
60
+ untyped_cols = [in_name for in_name, column_type in in_schema.items() if column_type is None]
61
+ if len(untyped_cols) > 0:
62
+ raise excs.Error(f'Could not infer pixeltable type for column(s): {", ".join(untyped_cols)}')
63
+
64
+ # Report any columns in `schema_overrides` that are not in the source
65
+ extraneous_overrides = schema_overrides.keys() - in_schema.keys()
66
+ if len(extraneous_overrides) > 0:
67
+ raise excs.Error(
68
+ f'Some column(s) specified in `schema_overrides` are not present in the source: {", ".join(extraneous_overrides)}'
69
+ )
70
+
71
+ schema: dict[str, Any] = {}
72
+ col_mapping: dict[str, str] = {} # Maps column names to Pixeltable column names if needed
73
+ for in_name, pxt_type in in_schema.items():
74
+ pxt_name = normalize_pxt_col_name(in_name)
75
+ # Ensure that column names are unique by appending a distinguishing suffix
76
+ # to any collisions
77
+ pxt_fname = pxt_name
78
+ n = 1
79
+ while not _is_usable_as_column_name(pxt_fname, schema):
80
+ pxt_fname = f'{pxt_name}_{n}'
81
+ n += 1
82
+ schema[pxt_fname] = pxt_type
83
+ col_mapping[in_name] = pxt_fname
84
+
85
+ # Determine if the col_mapping is the identity mapping
86
+ non_identity_keys = [k for k, v in col_mapping.items() if k != v]
87
+ if len(non_identity_keys) > 0:
88
+ if require_valid_pxt_column_names:
89
+ raise excs.Error(
90
+ f'Column names must be valid pixeltable identifiers. Invalid names: {", ".join(non_identity_keys)}'
91
+ )
92
+ else:
93
+ col_mapping = None
94
+
95
+ # Report any primary key columns that are not in the source as an error
96
+ missing_pk = [pk for pk in primary_key if pk not in in_schema]
97
+ if len(missing_pk) > 0:
98
+ raise excs.Error(f'Primary key column(s) are not found in the source: {", ".join(missing_pk)}')
99
+
100
+ pxt_pk = [col_mapping[pk] for pk in primary_key] if col_mapping is not None else primary_key
101
+
102
+ return schema, pxt_pk, col_mapping
103
+
104
+
105
+ def find_or_create_table(
106
+ tbl_path: str,
107
+ schema: dict[str, Any],
108
+ *,
109
+ primary_key: Optional[Union[str, list[str]]],
110
+ num_retained_versions: int,
111
+ comment: str,
112
+ ) -> Table:
113
+ return pxt.create_table(
114
+ tbl_path, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment
115
+ )
@@ -5,7 +5,7 @@ from fractions import Fraction
5
5
  from pathlib import Path
6
6
  from typing import Any, Optional
7
7
 
8
- import av # type: ignore[import-untyped]
8
+ import av
9
9
 
10
10
  import pixeltable.env as env
11
11
  import pixeltable.exceptions as excs
@@ -146,6 +146,7 @@ class AudioSplitter(ComponentIterator):
146
146
  input_stream = self.container.streams.audio[0]
147
147
  codec_name = AudioSplitter.__codec_map.get(input_stream.codec_context.name, input_stream.codec_context.name)
148
148
  output_stream = output_container.add_stream(codec_name, rate=input_stream.codec_context.sample_rate)
149
+ assert isinstance(output_stream, av.audio.stream.AudioStream)
149
150
  frame_count = 0
150
151
  # Since frames don't align with chunk boundaries, we may have read an extra frame in previous iteration
151
152
  # Seek to the nearest frame in stream at current chunk start time
@@ -4,7 +4,7 @@ from fractions import Fraction
4
4
  from pathlib import Path
5
5
  from typing import Any, Optional, Sequence
6
6
 
7
- import av # type: ignore[import-untyped]
7
+ import av
8
8
  import pandas as pd
9
9
  import PIL.Image
10
10
 
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from .schema import SystemInfo, SystemInfoMd
11
11
 
12
12
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 27
13
+ VERSION = 30
14
14
 
15
15
 
16
16
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -31,6 +31,7 @@ converter_cbs: dict[int, Callable[[sql.engine.Engine], None]] = {}
31
31
  def register_converter(version: int) -> Callable[[Callable[[sql.engine.Engine], None]], None]:
32
32
  def decorator(fn: Callable[[sql.engine.Engine], None]) -> None:
33
33
  global converter_cbs
34
+ assert version not in converter_cbs
34
35
  converter_cbs[version] = fn
35
36
 
36
37
  return decorator
@@ -17,7 +17,7 @@ _logger = logging.getLogger('pixeltable')
17
17
  def _(engine: sql.engine.Engine) -> None:
18
18
  with engine.begin() as conn:
19
19
  for row in conn.execute(sql.select(Function)):
20
- id, dir_id, md, binary_obj = row
20
+ id, _, md, binary_obj = row
21
21
  md['md'] = __update_md(md['md'], binary_obj)
22
22
  _logger.info(f'Updating function: {id}')
23
23
  conn.execute(sql.update(Function).where(Function.id == id).values(md=md))
@@ -27,14 +27,24 @@ def __update_md(orig_d: dict, binary_obj: bytes) -> Any:
27
27
  # construct dict produced by CallableFunction.to_store()
28
28
  py_fn = cloudpickle.loads(binary_obj)
29
29
  py_params = inspect.signature(py_fn).parameters
30
- return_type = ts.ColumnType.from_dict(orig_d['return_type'])
31
- params: list[func.Parameter] = []
30
+ return_type = orig_d['return_type']
31
+ params: list[dict] = []
32
32
  for name, col_type_dict, kind_int, is_batched in orig_d['parameters']:
33
- col_type = ts.ColumnType.from_dict(col_type_dict) if col_type_dict is not None else None
34
33
  default = py_params[name].default
35
- kind = inspect._ParameterKind(kind_int) # is there a way to avoid referencing a private type?
36
- params.append(func.Parameter(name=name, col_type=col_type, kind=kind, default=default, is_batched=is_batched))
34
+ kind = inspect._ParameterKind(kind_int)
35
+ params.append(
36
+ {
37
+ 'name': name,
38
+ 'col_type': col_type_dict,
39
+ 'kind': str(kind),
40
+ 'is_batched': is_batched,
41
+ 'has_default': default is not inspect.Parameter.empty,
42
+ 'default': None if default is inspect.Parameter.empty else default,
43
+ }
44
+ )
37
45
  is_batched = 'batch_size' in orig_d
38
- sig = func.Signature(return_type, params, is_batched=is_batched)
39
- d = {'signature': sig.as_dict(), 'batch_size': orig_d['batch_size'] if is_batched else None}
46
+ d = {
47
+ 'signature': {'return_type': return_type, 'parameters': params, 'is_batched': is_batched},
48
+ 'batch_size': orig_d['batch_size'] if is_batched else None,
49
+ }
40
50
  return d
@@ -0,0 +1,31 @@
1
+ import logging
2
+ from typing import Any, Optional
3
+ from uuid import UUID
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from pixeltable.metadata import register_converter
8
+ from pixeltable.metadata.converters.util import convert_table_md
9
+ from pixeltable.metadata.schema import Table
10
+
11
+ _logger = logging.getLogger('pixeltable')
12
+
13
+
14
+ @register_converter(version=27)
15
+ def _(engine: sql.engine.Engine) -> None:
16
+ convert_table_md(engine, table_md_updater=__update_table_md)
17
+
18
+
19
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
20
+ """Update the view metadata to add the include_base_columns boolean if it is missing
21
+
22
+ Args:
23
+ table_md (dict): copy of the original table metadata. this gets updated in place.
24
+ table_id (UUID): the table id
25
+
26
+ """
27
+ if table_md['view_md'] is None:
28
+ return
29
+ if 'include_base_columns' not in table_md['view_md']:
30
+ table_md['view_md']['include_base_columns'] = True
31
+ _logger.info(f'Updating view metadata for table: {table_id}')
@@ -0,0 +1,15 @@
1
+ import logging
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.schema import Dir, Table, TableSchemaVersion, TableVersion
7
+
8
+
9
+ @register_converter(version=28)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ with engine.begin() as conn:
12
+ conn.execute(sql.update(Dir).values(md=Dir.md.concat({'user': None, 'additional_md': {}})))
13
+ conn.execute(sql.update(Table).values(md=Table.md.concat({'user': None, 'additional_md': {}})))
14
+ conn.execute(sql.update(TableVersion).values(md=TableVersion.md.concat({'additional_md': {}})))
15
+ conn.execute(sql.update(TableSchemaVersion).values(md=TableSchemaVersion.md.concat({'additional_md': {}})))
@@ -0,0 +1,111 @@
1
+ from typing import Any, Optional
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable import exprs
6
+ from pixeltable.metadata import register_converter
7
+ from pixeltable.metadata.converters.util import convert_table_md
8
+
9
+
10
+ @register_converter(version=29)
11
+ def _(engine: sql.engine.Engine) -> None:
12
+ convert_table_md(engine, substitution_fn=__substitute_md)
13
+
14
+
15
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
16
+ # Defaults are now stored as literals in signatures
17
+ if k == 'parameters':
18
+ for param in v:
19
+ assert isinstance(param, dict)
20
+ has_default = param.get('has_default') or (param.get('default') is not None)
21
+ if 'has_default' in param:
22
+ del param['has_default']
23
+ literal = exprs.Expr.from_object(param['default']) if has_default else None
24
+ assert literal is None or isinstance(literal, exprs.Literal)
25
+ param['default'] = None if literal is None else literal.as_dict()
26
+ return k, v
27
+
28
+ # Method of organizing argument expressions has changed
29
+ if isinstance(v, dict) and v.get('_classname') == 'FunctionCall':
30
+ args = v['args']
31
+ kwargs = v['kwargs']
32
+ components = v['components']
33
+ group_by_start_idx = v['group_by_start_idx']
34
+ group_by_stop_idx = v['group_by_stop_idx']
35
+ order_by_start_idx = v['order_by_start_idx']
36
+
37
+ new_args = []
38
+ for arg in args:
39
+ if arg[0] is not None:
40
+ assert isinstance(arg[0], int)
41
+ new_args.append(components[arg[0]])
42
+ else:
43
+ literal = exprs.Expr.from_object(arg[1])
44
+ new_args.append(literal.as_dict())
45
+
46
+ new_kwargs = {}
47
+ for name, kwarg in kwargs.items():
48
+ if kwarg[0] is not None:
49
+ assert isinstance(kwarg[0], int)
50
+ new_kwargs[name] = components[kwarg[0]]
51
+ else:
52
+ literal = exprs.Expr.from_object(kwarg[1])
53
+ new_kwargs[name] = literal.as_dict()
54
+
55
+ # We need to expand ("unroll") any var-args or var-kwargs.
56
+
57
+ new_args_len = len(new_args)
58
+ rolled_args: Optional[dict] = None
59
+ rolled_kwargs: Optional[dict] = None
60
+
61
+ if 'signature' in v['fn']:
62
+ # If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
63
+ # is an edge case that won't migrate properly.
64
+ parameters: list[dict] = v['fn']['signature']['parameters']
65
+ for i, param in enumerate(parameters):
66
+ if param['kind'] == 'VAR_POSITIONAL':
67
+ if new_args_len > i:
68
+ # For peculiar historical reasons, variable kwargs might show up in args. Thus variable
69
+ # positional args is not necessarily the last element of args; it might be the second-to-last.
70
+ assert new_args_len <= i + 2, new_args
71
+ rolled_args = new_args[i]
72
+ new_args = new_args[:i] + new_args[i + 1 :]
73
+ if param['kind'] == 'VAR_KEYWORD':
74
+ # As noted above, variable kwargs might show up either in args or in kwargs. If it's in args, it
75
+ # is necessarily the last element.
76
+ if new_args_len > i:
77
+ assert new_args_len <= i + 1, new_args
78
+ rolled_kwargs = new_args.pop()
79
+ if param['name'] in kwargs:
80
+ assert rolled_kwargs is None
81
+ rolled_kwargs = kwargs.pop(param['name'])
82
+
83
+ if rolled_args is not None:
84
+ assert rolled_args['_classname'] in ('InlineArray', 'InlineList')
85
+ new_args.extend(rolled_args['components'])
86
+ if rolled_kwargs is not None:
87
+ assert rolled_kwargs['_classname'] == 'InlineDict'
88
+ new_kwargs.update(zip(rolled_kwargs['keys'], rolled_kwargs['components']))
89
+
90
+ group_by_exprs = [components[i] for i in range(group_by_start_idx, group_by_stop_idx)]
91
+ order_by_exprs = [components[i] for i in range(order_by_start_idx, len(components))]
92
+
93
+ new_components = [*new_args, *new_kwargs.values(), *group_by_exprs, *order_by_exprs]
94
+
95
+ newv = {
96
+ 'fn': v['fn'],
97
+ 'arg_idxs': list(range(len(new_args))),
98
+ 'kwarg_idxs': {name: i + len(new_args) for i, name in enumerate(new_kwargs.keys())},
99
+ 'group_by_start_idx': len(new_args) + len(new_kwargs),
100
+ 'group_by_stop_idx': len(new_args) + len(new_kwargs) + len(group_by_exprs),
101
+ 'order_by_start_idx': len(new_args) + len(new_kwargs) + len(group_by_exprs),
102
+ 'is_method_call': False,
103
+ '_classname': 'FunctionCall',
104
+ 'components': new_components,
105
+ }
106
+ if 'return_type' in v:
107
+ newv['return_type'] = v['return_type']
108
+
109
+ return k, newv
110
+
111
+ return None
@@ -5,7 +5,7 @@ from uuid import UUID
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- from pixeltable.metadata.schema import Table, TableSchemaVersion
8
+ from pixeltable.metadata.schema import Function, Table, TableSchemaVersion
9
9
 
10
10
  __logger = logging.getLogger('pixeltable')
11
11
 
@@ -50,6 +50,17 @@ def convert_table_md(
50
50
  __logger.info(f'Updating schema for table: {id}')
51
51
  conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
52
52
 
53
+ for row in conn.execute(sql.select(Function)):
54
+ id = row[0]
55
+ function_md = row[2]
56
+ assert isinstance(function_md, dict)
57
+ updated_function_md = copy.deepcopy(function_md)
58
+ if substitution_fn is not None:
59
+ updated_function_md = __substitute_md_rec(updated_function_md, substitution_fn)
60
+ if updated_function_md != function_md:
61
+ __logger.info(f'Updating function: {id}')
62
+ conn.execute(sql.update(Function).where(Function.id == id).values(md=updated_function_md))
63
+
53
64
 
54
65
  def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:
55
66
  columns_md = table_md['column_md']
@@ -2,6 +2,9 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 30: 'Store default values and constant arguments as literals',
6
+ 29: 'Add user and additional_md fields to metadata structs',
7
+ 28: 'Enable view creation from DataFrame with select clause',
5
8
  27: 'Enable pxt.query parameterization of limit clauses',
6
9
  26: 'Rename clip_text and clip_image to clip',
7
10
  25: 'Functions with multiple signatures',
@@ -74,6 +74,8 @@ class SystemInfo(Base):
74
74
  @dataclasses.dataclass
75
75
  class DirMd:
76
76
  name: str
77
+ user: Optional[str]
78
+ additional_md: dict[str, Any]
77
79
 
78
80
 
79
81
  class Dir(Base):
@@ -132,6 +134,7 @@ class IndexMd:
132
134
  @dataclasses.dataclass
133
135
  class ViewMd:
134
136
  is_snapshot: bool
137
+ include_base_columns: bool
135
138
 
136
139
  # (table id, version); for mutable views, all versions are None
137
140
  base_versions: list[tuple[str, Optional[int]]]
@@ -150,6 +153,8 @@ class ViewMd:
150
153
  class TableMd:
151
154
  name: str
152
155
 
156
+ user: Optional[str]
157
+
153
158
  # monotonically increasing w/in Table for both data and schema changes, starting at 0
154
159
  current_version: int
155
160
  # each version has a corresponding schema version (current_version >= current_schema_version)
@@ -169,6 +174,7 @@ class TableMd:
169
174
  column_md: dict[int, ColumnMd] # col_id -> ColumnMd
170
175
  index_md: dict[int, IndexMd] # index_id -> IndexMd
171
176
  view_md: Optional[ViewMd]
177
+ additional_md: dict[str, Any]
172
178
 
173
179
 
174
180
  class Table(Base):
@@ -194,6 +200,7 @@ class TableVersionMd:
194
200
  created_at: float # time.time()
195
201
  version: int
196
202
  schema_version: int
203
+ additional_md: dict[str, Any]
197
204
 
198
205
 
199
206
  class TableVersion(Base):
@@ -232,6 +239,7 @@ class TableSchemaVersionMd:
232
239
  # default validation strategy for any media column of this table
233
240
  # stores column.MediaValiation.name.lower()
234
241
  media_validation: str
242
+ additional_md: dict[str, Any]
235
243
 
236
244
 
237
245
  # versioning: each table schema change results in a new record
@@ -0,0 +1 @@
1
+ from .publish import publish_snapshot