pixeltable 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/table.py +247 -83
  3. pixeltable/catalog/view.py +5 -2
  4. pixeltable/dataframe.py +240 -92
  5. pixeltable/exec/__init__.py +1 -1
  6. pixeltable/exec/exec_node.py +6 -7
  7. pixeltable/exec/sql_node.py +91 -44
  8. pixeltable/exprs/__init__.py +1 -0
  9. pixeltable/exprs/arithmetic_expr.py +1 -1
  10. pixeltable/exprs/array_slice.py +1 -1
  11. pixeltable/exprs/column_property_ref.py +1 -1
  12. pixeltable/exprs/column_ref.py +29 -2
  13. pixeltable/exprs/comparison.py +1 -1
  14. pixeltable/exprs/compound_predicate.py +1 -1
  15. pixeltable/exprs/expr.py +11 -5
  16. pixeltable/exprs/expr_set.py +8 -0
  17. pixeltable/exprs/function_call.py +14 -11
  18. pixeltable/exprs/in_predicate.py +1 -1
  19. pixeltable/exprs/inline_expr.py +3 -3
  20. pixeltable/exprs/is_null.py +1 -1
  21. pixeltable/exprs/json_mapper.py +1 -1
  22. pixeltable/exprs/json_path.py +1 -1
  23. pixeltable/exprs/method_ref.py +1 -1
  24. pixeltable/exprs/rowid_ref.py +1 -1
  25. pixeltable/exprs/similarity_expr.py +4 -1
  26. pixeltable/exprs/sql_element_cache.py +4 -0
  27. pixeltable/exprs/type_cast.py +2 -2
  28. pixeltable/exprs/variable.py +3 -0
  29. pixeltable/func/expr_template_function.py +3 -0
  30. pixeltable/func/function.py +37 -1
  31. pixeltable/func/signature.py +1 -0
  32. pixeltable/functions/mistralai.py +0 -2
  33. pixeltable/functions/ollama.py +4 -4
  34. pixeltable/globals.py +32 -18
  35. pixeltable/index/embedding_index.py +6 -1
  36. pixeltable/io/__init__.py +1 -1
  37. pixeltable/io/parquet.py +39 -19
  38. pixeltable/iterators/__init__.py +1 -0
  39. pixeltable/iterators/image.py +100 -0
  40. pixeltable/iterators/video.py +7 -8
  41. pixeltable/metadata/__init__.py +1 -1
  42. pixeltable/metadata/converters/convert_22.py +17 -0
  43. pixeltable/metadata/notes.py +1 -0
  44. pixeltable/plan.py +129 -51
  45. pixeltable/store.py +1 -1
  46. pixeltable/tool/create_test_db_dump.py +4 -1
  47. pixeltable/type_system.py +1 -1
  48. pixeltable/utils/arrow.py +8 -3
  49. pixeltable/utils/description_helper.py +89 -0
  50. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/METADATA +28 -12
  51. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/RECORD +54 -51
  52. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/WHEEL +1 -1
  53. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/LICENSE +0 -0
  54. {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/entry_points.txt +0 -0
@@ -3,16 +3,20 @@ from __future__ import annotations
3
3
  import abc
4
4
  import importlib
5
5
  import inspect
6
- from typing import Any, Callable, Optional
6
+ from typing import TYPE_CHECKING, Any, Callable, Optional
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
10
10
  import pixeltable as pxt
11
+ import pixeltable.exceptions as excs
11
12
  import pixeltable.type_system as ts
12
13
 
13
14
  from .globals import resolve_symbol
14
15
  from .signature import Signature
15
16
 
17
+ if TYPE_CHECKING:
18
+ from .expr_template_function import ExprTemplateFunction
19
+
16
20
 
17
21
  class Function(abc.ABC):
18
22
  """Base class for Pixeltable's function interface.
@@ -99,6 +103,38 @@ class Function(abc.ABC):
99
103
  self._conditional_return_type = fn
100
104
  return fn
101
105
 
106
+ def using(self, **kwargs: Any) -> 'ExprTemplateFunction':
107
+ from pixeltable import exprs
108
+
109
+ from .expr_template_function import ExprTemplateFunction
110
+
111
+ # Resolve each kwarg into a parameter binding
112
+ bindings: dict[str, exprs.Expr] = {}
113
+ for k, v in kwargs.items():
114
+ if k not in self.signature.parameters:
115
+ raise excs.Error(f'Unknown parameter: {k}')
116
+ param = self.signature.parameters[k]
117
+ expr = exprs.Expr.from_object(v)
118
+ if not param.col_type.is_supertype_of(expr.col_type):
119
+ raise excs.Error(f'Expected type `{param.col_type}` for parameter `{k}`; got `{expr.col_type}`')
120
+ bindings[k] = v # Use the original value, not the Expr (The Expr is only for validation)
121
+
122
+ residual_params = [
123
+ p for p in self.signature.parameters.values() if p.name not in bindings
124
+ ]
125
+
126
+ # Bind each remaining parameter to a like-named variable
127
+ for param in residual_params:
128
+ bindings[param.name] = exprs.Variable(param.name, param.col_type)
129
+
130
+ call = exprs.FunctionCall(self, bindings)
131
+
132
+ # Construct the (n-k)-ary signature of the new function. We use `call.col_type` for this, rather than
133
+ # `self.signature.return_type`, because the return type of the new function may be specialized via a
134
+ # conditional return type.
135
+ new_signature = Signature(call.col_type, residual_params, self.signature.is_batched)
136
+ return ExprTemplateFunction(call, new_signature)
137
+
102
138
  @abc.abstractmethod
103
139
  def exec(self, *args: Any, **kwargs: Any) -> Any:
104
140
  """Execute the function with the given arguments and return the result."""
@@ -91,6 +91,7 @@ class Signature:
91
91
  self.parameters_by_pos = parameters.copy()
92
92
  self.constant_parameters = [p for p in parameters if not p.is_batched]
93
93
  self.batched_parameters = [p for p in parameters if p.is_batched]
94
+ self.required_parameters = [p for p in parameters if not p.has_default()]
94
95
  self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
95
96
 
96
97
  def get_return_type(self) -> ts.ColumnType:
@@ -36,7 +36,6 @@ def chat_completions(
36
36
  temperature: Optional[float] = 0.7,
37
37
  top_p: Optional[float] = 1.0,
38
38
  max_tokens: Optional[int] = None,
39
- min_tokens: Optional[int] = None,
40
39
  stop: Optional[list[str]] = None,
41
40
  random_seed: Optional[int] = None,
42
41
  response_format: Optional[dict] = None,
@@ -75,7 +74,6 @@ def chat_completions(
75
74
  temperature=temperature,
76
75
  top_p=top_p,
77
76
  max_tokens=_opt(max_tokens),
78
- min_tokens=_opt(min_tokens),
79
77
  stop=stop,
80
78
  random_seed=_opt(random_seed),
81
79
  response_format=response_format, # type: ignore[arg-type]
@@ -68,7 +68,7 @@ def generate(
68
68
  raw=raw,
69
69
  format=format,
70
70
  options=options,
71
- ) # type: ignore[call-overload]
71
+ ).dict() # type: ignore[call-overload]
72
72
 
73
73
 
74
74
  @pxt.udf
@@ -103,7 +103,7 @@ def chat(
103
103
  tools=tools,
104
104
  format=format,
105
105
  options=options,
106
- ) # type: ignore[call-overload]
106
+ ).dict() # type: ignore[call-overload]
107
107
 
108
108
 
109
109
  @pxt.udf(batch_size=16)
@@ -135,8 +135,8 @@ def embed(
135
135
  model=model,
136
136
  input=input,
137
137
  truncate=truncate,
138
- options=options, # type: ignore[arg-type]
139
- )
138
+ options=options,
139
+ ).dict()
140
140
  return [np.array(data, dtype=np.float64) for data in results['embeddings']]
141
141
 
142
142
 
pixeltable/globals.py CHANGED
@@ -46,6 +46,7 @@ def create_table(
46
46
  num_retained_versions: Number of versions of the table to retain.
47
47
  comment: An optional comment; its meaning is user-defined.
48
48
  media_validation: Media validation policy for the table.
49
+
49
50
  - `'on_read'`: validate media files at query time
50
51
  - `'on_write'`: validate media files during insert/update operations
51
52
 
@@ -149,7 +150,9 @@ def create_view(
149
150
  tbl_version_path = base._tbl_version_path
150
151
  elif isinstance(base, DataFrame):
151
152
  base._validate_mutable('create_view')
152
- tbl_version_path = base.tbl
153
+ if len(base._from_clause.tbls) > 1:
154
+ raise excs.Error('Cannot create a view of a join')
155
+ tbl_version_path = base._from_clause.tbls[0]
153
156
  where = base.where_clause
154
157
  else:
155
158
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
@@ -296,31 +299,42 @@ def move(path: str, new_path: str) -> None:
296
299
  obj._move(new_p.name, new_dir._id)
297
300
 
298
301
 
299
- def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
302
+ def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_errors: bool = False) -> None:
300
303
  """Drop a table, view, or snapshot.
301
304
 
302
305
  Args:
303
- path: Path to the [`Table`][pixeltable.Table].
306
+ table: Fully qualified name, or handle, of the table to be dropped.
304
307
  force: If `True`, will also drop all views and sub-views of this table.
305
308
  ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
306
309
 
307
310
  Raises:
308
- Error: If the path does not exist or does not designate a table object, and `ignore_errors=False`.
311
+ Error: If the name does not exist or does not designate a table object, and `ignore_errors=False`.
309
312
 
310
313
  Examples:
311
- >>> pxt.drop_table('my_table')
314
+ Drop a table by its fully qualified name:
315
+ >>> pxt.drop_table('subdir.my_table')
316
+
317
+ Drop a table by its handle:
318
+ >>> t = pxt.get_table('subdir.my_table')
319
+ ... pxt.drop_table(t)
320
+
312
321
  """
313
322
  cat = Catalog.get()
314
- path_obj = catalog.Path(path)
315
- try:
316
- cat.paths.check_is_valid(path_obj, expected=catalog.Table)
317
- except Exception as e:
318
- if ignore_errors or force:
319
- _logger.info(f'Skipped table `{path}` (does not exist).')
320
- return
321
- else:
322
- raise e
323
- tbl = cat.paths[path_obj]
323
+ if isinstance(table, str):
324
+ tbl_path_obj = catalog.Path(table)
325
+ try:
326
+ cat.paths.check_is_valid(tbl_path_obj, expected=catalog.Table)
327
+ except Exception as e:
328
+ if ignore_errors or force:
329
+ _logger.info(f'Skipped table `{table}` (does not exist).')
330
+ return
331
+ else:
332
+ raise e
333
+ tbl = cat.paths[tbl_path_obj]
334
+ else:
335
+ tbl = table
336
+ tbl_path_obj = catalog.Path(tbl._path)
337
+
324
338
  assert isinstance(tbl, catalog.Table)
325
339
  if len(cat.tbl_dependents[tbl._id]) > 0:
326
340
  dependent_paths = [dep._path for dep in cat.tbl_dependents[tbl._id]]
@@ -328,10 +342,10 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
328
342
  for dependent_path in dependent_paths:
329
343
  drop_table(dependent_path, force=True)
330
344
  else:
331
- raise excs.Error(f'Table {path} has dependents: {", ".join(dependent_paths)}')
345
+ raise excs.Error(f'Table {tbl._path} has dependents: {", ".join(dependent_paths)}')
332
346
  tbl._drop()
333
- del cat.paths[path_obj]
334
- _logger.info(f'Dropped table `{path}`.')
347
+ del cat.paths[tbl_path_obj]
348
+ _logger.info(f'Dropped table `{tbl._path}`.')
335
349
 
336
350
 
337
351
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
@@ -136,7 +136,12 @@ class EmbeddingIndex(IndexBase):
136
136
  """Validate the signature"""
137
137
  assert isinstance(embed_fn, func.Function)
138
138
  sig = embed_fn.signature
139
- if len(sig.parameters) != 1 or sig.parameters_by_pos[0].col_type.type_enum != expected_type:
139
+
140
+ # The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
141
+ # has more than one parameter, as long as it has at most one *required* parameter.
142
+ if (len(sig.parameters) == 0
143
+ or len(sig.required_parameters) > 1
144
+ or sig.parameters_by_pos[0].col_type.type_enum != expected_type):
140
145
  raise excs.Error(
141
146
  f'{name} must take a single {expected_type.name.lower()} parameter, but has signature {sig}')
142
147
 
pixeltable/io/__init__.py CHANGED
@@ -2,7 +2,7 @@ from .external_store import ExternalStore, SyncStatus
2
2
  from .globals import create_label_studio_project, export_images_as_fo_dataset, import_json, import_rows
3
3
  from .hf_datasets import import_huggingface_dataset
4
4
  from .pandas import import_csv, import_excel, import_pandas
5
- from .parquet import import_parquet
5
+ from .parquet import import_parquet, export_parquet
6
6
 
7
7
  __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
8
8
  __removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}
pixeltable/io/parquet.py CHANGED
@@ -7,11 +7,14 @@ import random
7
7
  import typing
8
8
  from collections import deque
9
9
  from pathlib import Path
10
- from typing import Any, Optional
10
+ from typing import Any, Optional, Union
11
11
 
12
12
  import numpy as np
13
13
  import PIL.Image
14
+ import datetime
14
15
 
16
+ import pixeltable as pxt
17
+ from pixeltable.env import Env
15
18
  import pixeltable.exceptions as exc
16
19
  import pixeltable.type_system as ts
17
20
  from pixeltable.utils.transactional_directory import transactional_directory
@@ -39,28 +42,44 @@ def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path:
39
42
  parquet.write_table(tab, str(output_path))
40
43
 
41
44
 
42
- def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int = 100_000_000) -> None:
45
+ def export_parquet(
46
+ table_or_df: Union[pxt.Table, pxt.DataFrame],
47
+ parquet_path: Path,
48
+ partition_size_bytes: int = 100_000_000,
49
+ inline_images: bool = False
50
+ ) -> None:
43
51
  """
44
- Internal method to stream dataframe data to parquet format.
45
- Does not materialize the dataset to memory.
52
+ Exports a dataframe's data to one or more Parquet files. Requires pyarrow to be installed.
46
53
 
47
- It preserves pixeltable type metadata in a json file, which would otherwise
54
+ It additionally writes the pixeltable metadata in a json file, which would otherwise
48
55
  not be available in the parquet format.
49
56
 
50
- Images are stored inline in a compressed format in their parquet file.
51
-
52
57
  Args:
53
- df : dataframe to save.
54
- dest_path : path to directory to save the parquet files to.
55
- partition_size_bytes : maximum target size for each chunk. Default 100_000_000 bytes.
58
+ table_or_df : Table or Dataframe to export.
59
+ parquet_path : Path to directory to write the parquet files to.
60
+ partition_size_bytes : The maximum target size for each chunk. Default 100_000_000 bytes.
61
+ inline_images : If True, images are stored inline in the parquet file. This is useful
62
+ for small images, to be imported as pytorch dataset. But can be inefficient
63
+ for large images, and cannot be imported into pixeltable.
64
+ If False, will raise an error if the Dataframe has any image column.
65
+ Default False.
56
66
  """
57
67
  from pixeltable.utils.arrow import to_arrow_schema
58
68
 
69
+ df: pxt.DataFrame
70
+ if isinstance(table_or_df, pxt.catalog.Table):
71
+ df = table_or_df._df()
72
+ else:
73
+ df = table_or_df
74
+
59
75
  type_dict = {k: v.as_dict() for k, v in df.schema.items()}
60
76
  arrow_schema = to_arrow_schema(df.schema)
61
77
 
78
+ if not inline_images and any(col_type.is_image_type() for col_type in df.schema.values()):
79
+ raise exc.Error('Cannot export Dataframe with image columns when inline_images is False')
80
+
62
81
  # store the changes atomically
63
- with transactional_directory(dest_path) as temp_path:
82
+ with transactional_directory(parquet_path) as temp_path:
64
83
  # dump metadata json file so we can inspect what was the source of the parquet file later on.
65
84
  json.dump(df.as_dict(), (temp_path / '.pixeltable.json').open('w'))
66
85
  json.dump(type_dict, (temp_path / '.pixeltable.column_types.json').open('w')) # keep type metadata
@@ -111,6 +130,7 @@ def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int =
111
130
  elif col_type.is_bool_type():
112
131
  length = 1
113
132
  elif col_type.is_timestamp_type():
133
+ val = val.astimezone(datetime.timezone.utc)
114
134
  length = 8
115
135
  else:
116
136
  assert False, f'unknown type {col_type} for {col_name}'
@@ -139,7 +159,7 @@ def parquet_schema_to_pixeltable_schema(parquet_path: str) -> dict[str, Optional
139
159
 
140
160
 
141
161
  def import_parquet(
142
- table_path: str,
162
+ table: str,
143
163
  *,
144
164
  parquet_path: str,
145
165
  schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
@@ -148,7 +168,7 @@ def import_parquet(
148
168
  """Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.
149
169
 
150
170
  Args:
151
- table_path: Path to the table.
171
+ table: Fully qualified name of the table to import the data into.
152
172
  parquet_path: Path to an individual Parquet file or directory of Parquet files.
153
173
  schema_overrides: If specified, then for each (name, type) pair in `schema_overrides`, the column with
154
174
  name `name` will be given type `type`, instead of being inferred from the Parquet dataset. The keys in
@@ -157,7 +177,7 @@ def import_parquet(
157
177
  kwargs: Additional arguments to pass to `create_table`.
158
178
 
159
179
  Returns:
160
- A handle to the newly created [`Table`][pixeltable.Table].
180
+ A handle to the newly created table.
161
181
  """
162
182
  from pyarrow import parquet
163
183
 
@@ -176,11 +196,11 @@ def import_parquet(
176
196
  if v is None:
177
197
  raise exc.Error(f'Could not infer pixeltable type for column {k} from parquet file')
178
198
 
179
- if table_path in pxt.list_tables():
180
- raise exc.Error(f'Table {table_path} already exists')
199
+ if table in pxt.list_tables():
200
+ raise exc.Error(f'Table {table} already exists')
181
201
 
182
202
  try:
183
- tmp_name = f'{table_path}_tmp_{random.randint(0, 100000000)}'
203
+ tmp_name = f'{table}_tmp_{random.randint(0, 100000000)}'
184
204
  tab = pxt.create_table(tmp_name, schema, **kwargs)
185
205
  for fragment in parquet_dataset.fragments: # type: ignore[attr-defined]
186
206
  for batch in fragment.to_batches():
@@ -190,5 +210,5 @@ def import_parquet(
190
210
  _logger.error(f'Error while inserting Parquet file into table: {e}')
191
211
  raise e
192
212
 
193
- pxt.move(tmp_name, table_path)
194
- return pxt.get_table(table_path)
213
+ pxt.move(tmp_name, table)
214
+ return pxt.get_table(table)
@@ -1,5 +1,6 @@
1
1
  from .base import ComponentIterator
2
2
  from .document import DocumentSplitter
3
+ from .image import TileIterator
3
4
  from .string import StringSplitter
4
5
  from .video import FrameIterator
5
6
 
@@ -0,0 +1,100 @@
1
+ from typing import Any, Sequence
2
+
3
+ import PIL.Image
4
+
5
+ import pixeltable.exceptions as excs
6
+ import pixeltable.type_system as ts
7
+ from pixeltable.iterators.base import ComponentIterator
8
+
9
+
10
+ class TileIterator(ComponentIterator):
11
+ """
12
+ Iterator over tiles of an image. Each image will be divided into tiles of size `tile_size`, and the tiles will be
13
+ iterated over in row-major order (left-to-right, then top-to-bottom). An optional `overlap` parameter may be
14
+ specified. If the tiles do not exactly cover the image, then the rightmost and bottommost tiles will be padded with
15
+ blackspace, so that the output images all have the exact size `tile_size`.
16
+
17
+ Args:
18
+ image: Image to split into tiles.
19
+ tile_size: Size of each tile, as a pair of integers `[width, height]`.
20
+ overlap: Amount of overlap between adjacent tiles, as a pair of integers `[width, height]`.
21
+ """
22
+
23
+ __image: PIL.Image.Image
24
+ __tile_size: Sequence[int]
25
+ __overlap: Sequence[int]
26
+ __width: int
27
+ __height: int
28
+ __xlen: int
29
+ __ylen: int
30
+ __i: int
31
+ __j: int
32
+
33
+ def __init__(
34
+ self,
35
+ image: PIL.Image.Image,
36
+ *,
37
+ tile_size: tuple[int, int],
38
+ overlap: tuple[int, int] = (0, 0),
39
+ ):
40
+ if overlap[0] >= tile_size[0] or overlap[1] >= tile_size[1]:
41
+ raise excs.Error(f"overlap dimensions {overlap} are not strictly smaller than tile size {tile_size}")
42
+
43
+ self.__image = image
44
+ self.__image.load()
45
+ self.__tile_size = tile_size
46
+ self.__overlap = overlap
47
+ self.__width, self.__height = image.size
48
+ # Justification for this formula: let t = tile_size[0], o = overlap[0]. Then the values of w (= width) that
49
+ # exactly accommodate an integer number of tiles are t, 2t - o, 3t - 2o, 4t - 3o, ...
50
+ # This formula ensures that t, 2t - o, 3t - 2o, ... result in an xlen of 1, 2, 3, ...
51
+ # but t + 1, 2t - o + 1, 3t - 2o + 1, ... result in an xlen of 2, 3, 4, ...
52
+ self.__xlen = (self.__width - overlap[0] - 1) // (tile_size[0] - overlap[0]) + 1
53
+ self.__ylen = (self.__height - overlap[1] - 1) // (tile_size[1] - overlap[1]) + 1
54
+ self.__i = 0
55
+ self.__j = 0
56
+
57
+ def __next__(self) -> dict[str, Any]:
58
+ if self.__j >= self.__ylen:
59
+ raise StopIteration
60
+
61
+ x1 = self.__i * (self.__tile_size[0] - self.__overlap[0])
62
+ y1 = self.__j * (self.__tile_size[1] - self.__overlap[1])
63
+ # If x2 > self.__width, PIL does the right thing and pads the image with blackspace
64
+ x2 = x1 + self.__tile_size[0]
65
+ y2 = y1 + self.__tile_size[1]
66
+ tile = self.__image.crop((x1, y1, x2, y2))
67
+ result = {
68
+ 'tile': tile,
69
+ 'tile_coord': [self.__i, self.__j],
70
+ 'tile_box': [x1, y1, x2, y2]
71
+ }
72
+
73
+ self.__i += 1
74
+ if self.__i >= self.__xlen:
75
+ self.__i = 0
76
+ self.__j += 1
77
+ return result
78
+
79
+ def close(self) -> None:
80
+ pass
81
+
82
+ def set_pos(self, pos: int) -> None:
83
+ self.__j = pos // self.__xlen
84
+ self.__i = pos % self.__xlen
85
+
86
+ @classmethod
87
+ def input_schema(cls, *args: Any, **kwargs: Any) -> dict[str, ts.ColumnType]:
88
+ return {
89
+ 'image': ts.ImageType(),
90
+ 'tile_size': ts.JsonType(),
91
+ 'overlap': ts.JsonType(),
92
+ }
93
+
94
+ @classmethod
95
+ def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
96
+ return {
97
+ 'tile': ts.ImageType(),
98
+ 'tile_coord': ts.JsonType(),
99
+ 'tile_box': ts.JsonType(),
100
+ }, ['tile']
@@ -23,13 +23,13 @@ class FrameIterator(ComponentIterator):
23
23
  exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
24
24
  frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
25
25
 
26
- Args:
27
- video: URL or path of the video to use for frame extraction.
28
- fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
29
- If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
30
- extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
31
- num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
32
- `num_frames` is greater than the number of frames in the video, all frames will be extracted.
26
+ Args:
27
+ video: URL or path of the video to use for frame extraction.
28
+ fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
29
+ If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
30
+ extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
31
+ num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
32
+ `num_frames` is greater than the number of frames in the video, all frames will be extracted.
33
33
  """
34
34
 
35
35
  # Input parameters
@@ -180,7 +180,6 @@ class FrameIterator(ComponentIterator):
180
180
  self.container.close()
181
181
 
182
182
  def set_pos(self, pos: int) -> None:
183
- """Seek to frame idx"""
184
183
  if pos == self.next_pos:
185
184
  return # already there
186
185
 
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from .schema import SystemInfo, SystemInfoMd
11
11
 
12
12
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 22
13
+ VERSION = 23
14
14
 
15
15
 
16
16
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -0,0 +1,17 @@
1
+ from typing import Any, Optional
2
+ import sqlalchemy as sql
3
+
4
+ from pixeltable.metadata import register_converter
5
+ from pixeltable.metadata.converters.util import convert_table_md
6
+
7
+
8
+ @register_converter(version=22)
9
+ def _(engine: sql.engine.Engine) -> None:
10
+ convert_table_md(engine, substitution_fn=__substitute_md)
11
+
12
+
13
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
15
+ v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
16
+ return k, v
17
+ return None
@@ -2,6 +2,7 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 23: 'DataFrame.from_clause',
5
6
  22: 'TableMd/ColumnMd.media_validation',
6
7
  21: 'Separate InlineArray and InlineList',
7
8
  20: 'Store DB timestamps in UTC',