pixeltable 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/table.py +247 -83
- pixeltable/catalog/view.py +5 -2
- pixeltable/dataframe.py +240 -92
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +6 -7
- pixeltable/exec/sql_node.py +91 -44
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +1 -1
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +29 -2
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/expr.py +11 -5
- pixeltable/exprs/expr_set.py +8 -0
- pixeltable/exprs/function_call.py +14 -11
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +1 -1
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/exprs/method_ref.py +1 -1
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/exprs/similarity_expr.py +4 -1
- pixeltable/exprs/sql_element_cache.py +4 -0
- pixeltable/exprs/type_cast.py +2 -2
- pixeltable/exprs/variable.py +3 -0
- pixeltable/func/expr_template_function.py +3 -0
- pixeltable/func/function.py +37 -1
- pixeltable/func/signature.py +1 -0
- pixeltable/functions/mistralai.py +0 -2
- pixeltable/functions/ollama.py +4 -4
- pixeltable/globals.py +32 -18
- pixeltable/index/embedding_index.py +6 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/parquet.py +39 -19
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/image.py +100 -0
- pixeltable/iterators/video.py +7 -8
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_22.py +17 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +129 -51
- pixeltable/store.py +1 -1
- pixeltable/tool/create_test_db_dump.py +4 -1
- pixeltable/type_system.py +1 -1
- pixeltable/utils/arrow.py +8 -3
- pixeltable/utils/description_helper.py +89 -0
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/METADATA +28 -12
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/RECORD +54 -51
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/WHEEL +1 -1
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.24.dist-info → pixeltable-0.2.26.dist-info}/entry_points.txt +0 -0
pixeltable/func/function.py
CHANGED
|
@@ -3,16 +3,20 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import importlib
|
|
5
5
|
import inspect
|
|
6
|
-
from typing import Any, Callable, Optional
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
11
12
|
import pixeltable.type_system as ts
|
|
12
13
|
|
|
13
14
|
from .globals import resolve_symbol
|
|
14
15
|
from .signature import Signature
|
|
15
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from .expr_template_function import ExprTemplateFunction
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
class Function(abc.ABC):
|
|
18
22
|
"""Base class for Pixeltable's function interface.
|
|
@@ -99,6 +103,38 @@ class Function(abc.ABC):
|
|
|
99
103
|
self._conditional_return_type = fn
|
|
100
104
|
return fn
|
|
101
105
|
|
|
106
|
+
def using(self, **kwargs: Any) -> 'ExprTemplateFunction':
|
|
107
|
+
from pixeltable import exprs
|
|
108
|
+
|
|
109
|
+
from .expr_template_function import ExprTemplateFunction
|
|
110
|
+
|
|
111
|
+
# Resolve each kwarg into a parameter binding
|
|
112
|
+
bindings: dict[str, exprs.Expr] = {}
|
|
113
|
+
for k, v in kwargs.items():
|
|
114
|
+
if k not in self.signature.parameters:
|
|
115
|
+
raise excs.Error(f'Unknown parameter: {k}')
|
|
116
|
+
param = self.signature.parameters[k]
|
|
117
|
+
expr = exprs.Expr.from_object(v)
|
|
118
|
+
if not param.col_type.is_supertype_of(expr.col_type):
|
|
119
|
+
raise excs.Error(f'Expected type `{param.col_type}` for parameter `{k}`; got `{expr.col_type}`')
|
|
120
|
+
bindings[k] = v # Use the original value, not the Expr (The Expr is only for validation)
|
|
121
|
+
|
|
122
|
+
residual_params = [
|
|
123
|
+
p for p in self.signature.parameters.values() if p.name not in bindings
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
# Bind each remaining parameter to a like-named variable
|
|
127
|
+
for param in residual_params:
|
|
128
|
+
bindings[param.name] = exprs.Variable(param.name, param.col_type)
|
|
129
|
+
|
|
130
|
+
call = exprs.FunctionCall(self, bindings)
|
|
131
|
+
|
|
132
|
+
# Construct the (n-k)-ary signature of the new function. We use `call.col_type` for this, rather than
|
|
133
|
+
# `self.signature.return_type`, because the return type of the new function may be specialized via a
|
|
134
|
+
# conditional return type.
|
|
135
|
+
new_signature = Signature(call.col_type, residual_params, self.signature.is_batched)
|
|
136
|
+
return ExprTemplateFunction(call, new_signature)
|
|
137
|
+
|
|
102
138
|
@abc.abstractmethod
|
|
103
139
|
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
|
104
140
|
"""Execute the function with the given arguments and return the result."""
|
pixeltable/func/signature.py
CHANGED
|
@@ -91,6 +91,7 @@ class Signature:
|
|
|
91
91
|
self.parameters_by_pos = parameters.copy()
|
|
92
92
|
self.constant_parameters = [p for p in parameters if not p.is_batched]
|
|
93
93
|
self.batched_parameters = [p for p in parameters if p.is_batched]
|
|
94
|
+
self.required_parameters = [p for p in parameters if not p.has_default()]
|
|
94
95
|
self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
|
|
95
96
|
|
|
96
97
|
def get_return_type(self) -> ts.ColumnType:
|
|
@@ -36,7 +36,6 @@ def chat_completions(
|
|
|
36
36
|
temperature: Optional[float] = 0.7,
|
|
37
37
|
top_p: Optional[float] = 1.0,
|
|
38
38
|
max_tokens: Optional[int] = None,
|
|
39
|
-
min_tokens: Optional[int] = None,
|
|
40
39
|
stop: Optional[list[str]] = None,
|
|
41
40
|
random_seed: Optional[int] = None,
|
|
42
41
|
response_format: Optional[dict] = None,
|
|
@@ -75,7 +74,6 @@ def chat_completions(
|
|
|
75
74
|
temperature=temperature,
|
|
76
75
|
top_p=top_p,
|
|
77
76
|
max_tokens=_opt(max_tokens),
|
|
78
|
-
min_tokens=_opt(min_tokens),
|
|
79
77
|
stop=stop,
|
|
80
78
|
random_seed=_opt(random_seed),
|
|
81
79
|
response_format=response_format, # type: ignore[arg-type]
|
pixeltable/functions/ollama.py
CHANGED
|
@@ -68,7 +68,7 @@ def generate(
|
|
|
68
68
|
raw=raw,
|
|
69
69
|
format=format,
|
|
70
70
|
options=options,
|
|
71
|
-
) # type: ignore[call-overload]
|
|
71
|
+
).dict() # type: ignore[call-overload]
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
@pxt.udf
|
|
@@ -103,7 +103,7 @@ def chat(
|
|
|
103
103
|
tools=tools,
|
|
104
104
|
format=format,
|
|
105
105
|
options=options,
|
|
106
|
-
) # type: ignore[call-overload]
|
|
106
|
+
).dict() # type: ignore[call-overload]
|
|
107
107
|
|
|
108
108
|
|
|
109
109
|
@pxt.udf(batch_size=16)
|
|
@@ -135,8 +135,8 @@ def embed(
|
|
|
135
135
|
model=model,
|
|
136
136
|
input=input,
|
|
137
137
|
truncate=truncate,
|
|
138
|
-
options=options,
|
|
139
|
-
)
|
|
138
|
+
options=options,
|
|
139
|
+
).dict()
|
|
140
140
|
return [np.array(data, dtype=np.float64) for data in results['embeddings']]
|
|
141
141
|
|
|
142
142
|
|
pixeltable/globals.py
CHANGED
|
@@ -46,6 +46,7 @@ def create_table(
|
|
|
46
46
|
num_retained_versions: Number of versions of the table to retain.
|
|
47
47
|
comment: An optional comment; its meaning is user-defined.
|
|
48
48
|
media_validation: Media validation policy for the table.
|
|
49
|
+
|
|
49
50
|
- `'on_read'`: validate media files at query time
|
|
50
51
|
- `'on_write'`: validate media files during insert/update operations
|
|
51
52
|
|
|
@@ -149,7 +150,9 @@ def create_view(
|
|
|
149
150
|
tbl_version_path = base._tbl_version_path
|
|
150
151
|
elif isinstance(base, DataFrame):
|
|
151
152
|
base._validate_mutable('create_view')
|
|
152
|
-
|
|
153
|
+
if len(base._from_clause.tbls) > 1:
|
|
154
|
+
raise excs.Error('Cannot create a view of a join')
|
|
155
|
+
tbl_version_path = base._from_clause.tbls[0]
|
|
153
156
|
where = base.where_clause
|
|
154
157
|
else:
|
|
155
158
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
@@ -296,31 +299,42 @@ def move(path: str, new_path: str) -> None:
|
|
|
296
299
|
obj._move(new_p.name, new_dir._id)
|
|
297
300
|
|
|
298
301
|
|
|
299
|
-
def drop_table(
|
|
302
|
+
def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_errors: bool = False) -> None:
|
|
300
303
|
"""Drop a table, view, or snapshot.
|
|
301
304
|
|
|
302
305
|
Args:
|
|
303
|
-
|
|
306
|
+
table: Fully qualified name, or handle, of the table to be dropped.
|
|
304
307
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
305
308
|
ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
|
|
306
309
|
|
|
307
310
|
Raises:
|
|
308
|
-
Error: If the
|
|
311
|
+
Error: If the name does not exist or does not designate a table object, and `ignore_errors=False`.
|
|
309
312
|
|
|
310
313
|
Examples:
|
|
311
|
-
|
|
314
|
+
Drop a table by its fully qualified name:
|
|
315
|
+
>>> pxt.drop_table('subdir.my_table')
|
|
316
|
+
|
|
317
|
+
Drop a table by its handle:
|
|
318
|
+
>>> t = pxt.get_table('subdir.my_table')
|
|
319
|
+
... pxt.drop_table(t)
|
|
320
|
+
|
|
312
321
|
"""
|
|
313
322
|
cat = Catalog.get()
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
323
|
+
if isinstance(table, str):
|
|
324
|
+
tbl_path_obj = catalog.Path(table)
|
|
325
|
+
try:
|
|
326
|
+
cat.paths.check_is_valid(tbl_path_obj, expected=catalog.Table)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
if ignore_errors or force:
|
|
329
|
+
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
330
|
+
return
|
|
331
|
+
else:
|
|
332
|
+
raise e
|
|
333
|
+
tbl = cat.paths[tbl_path_obj]
|
|
334
|
+
else:
|
|
335
|
+
tbl = table
|
|
336
|
+
tbl_path_obj = catalog.Path(tbl._path)
|
|
337
|
+
|
|
324
338
|
assert isinstance(tbl, catalog.Table)
|
|
325
339
|
if len(cat.tbl_dependents[tbl._id]) > 0:
|
|
326
340
|
dependent_paths = [dep._path for dep in cat.tbl_dependents[tbl._id]]
|
|
@@ -328,10 +342,10 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
|
|
|
328
342
|
for dependent_path in dependent_paths:
|
|
329
343
|
drop_table(dependent_path, force=True)
|
|
330
344
|
else:
|
|
331
|
-
raise excs.Error(f'Table {
|
|
345
|
+
raise excs.Error(f'Table {tbl._path} has dependents: {", ".join(dependent_paths)}')
|
|
332
346
|
tbl._drop()
|
|
333
|
-
del cat.paths[
|
|
334
|
-
_logger.info(f'Dropped table `{
|
|
347
|
+
del cat.paths[tbl_path_obj]
|
|
348
|
+
_logger.info(f'Dropped table `{tbl._path}`.')
|
|
335
349
|
|
|
336
350
|
|
|
337
351
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -136,7 +136,12 @@ class EmbeddingIndex(IndexBase):
|
|
|
136
136
|
"""Validate the signature"""
|
|
137
137
|
assert isinstance(embed_fn, func.Function)
|
|
138
138
|
sig = embed_fn.signature
|
|
139
|
-
|
|
139
|
+
|
|
140
|
+
# The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
|
|
141
|
+
# has more than one parameter, as long as it has at most one *required* parameter.
|
|
142
|
+
if (len(sig.parameters) == 0
|
|
143
|
+
or len(sig.required_parameters) > 1
|
|
144
|
+
or sig.parameters_by_pos[0].col_type.type_enum != expected_type):
|
|
140
145
|
raise excs.Error(
|
|
141
146
|
f'{name} must take a single {expected_type.name.lower()} parameter, but has signature {sig}')
|
|
142
147
|
|
pixeltable/io/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ from .external_store import ExternalStore, SyncStatus
|
|
|
2
2
|
from .globals import create_label_studio_project, export_images_as_fo_dataset, import_json, import_rows
|
|
3
3
|
from .hf_datasets import import_huggingface_dataset
|
|
4
4
|
from .pandas import import_csv, import_excel, import_pandas
|
|
5
|
-
from .parquet import import_parquet
|
|
5
|
+
from .parquet import import_parquet, export_parquet
|
|
6
6
|
|
|
7
7
|
__default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
|
|
8
8
|
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}
|
pixeltable/io/parquet.py
CHANGED
|
@@ -7,11 +7,14 @@ import random
|
|
|
7
7
|
import typing
|
|
8
8
|
from collections import deque
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any, Optional
|
|
10
|
+
from typing import Any, Optional, Union
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import PIL.Image
|
|
14
|
+
import datetime
|
|
14
15
|
|
|
16
|
+
import pixeltable as pxt
|
|
17
|
+
from pixeltable.env import Env
|
|
15
18
|
import pixeltable.exceptions as exc
|
|
16
19
|
import pixeltable.type_system as ts
|
|
17
20
|
from pixeltable.utils.transactional_directory import transactional_directory
|
|
@@ -39,28 +42,44 @@ def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path:
|
|
|
39
42
|
parquet.write_table(tab, str(output_path))
|
|
40
43
|
|
|
41
44
|
|
|
42
|
-
def
|
|
45
|
+
def export_parquet(
|
|
46
|
+
table_or_df: Union[pxt.Table, pxt.DataFrame],
|
|
47
|
+
parquet_path: Path,
|
|
48
|
+
partition_size_bytes: int = 100_000_000,
|
|
49
|
+
inline_images: bool = False
|
|
50
|
+
) -> None:
|
|
43
51
|
"""
|
|
44
|
-
|
|
45
|
-
Does not materialize the dataset to memory.
|
|
52
|
+
Exports a dataframe's data to one or more Parquet files. Requires pyarrow to be installed.
|
|
46
53
|
|
|
47
|
-
It
|
|
54
|
+
It additionally writes the pixeltable metadata in a json file, which would otherwise
|
|
48
55
|
not be available in the parquet format.
|
|
49
56
|
|
|
50
|
-
Images are stored inline in a compressed format in their parquet file.
|
|
51
|
-
|
|
52
57
|
Args:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
partition_size_bytes : maximum target size for each chunk. Default 100_000_000 bytes.
|
|
58
|
+
table_or_df : Table or Dataframe to export.
|
|
59
|
+
parquet_path : Path to directory to write the parquet files to.
|
|
60
|
+
partition_size_bytes : The maximum target size for each chunk. Default 100_000_000 bytes.
|
|
61
|
+
inline_images : If True, images are stored inline in the parquet file. This is useful
|
|
62
|
+
for small images, to be imported as pytorch dataset. But can be inefficient
|
|
63
|
+
for large images, and cannot be imported into pixeltable.
|
|
64
|
+
If False, will raise an error if the Dataframe has any image column.
|
|
65
|
+
Default False.
|
|
56
66
|
"""
|
|
57
67
|
from pixeltable.utils.arrow import to_arrow_schema
|
|
58
68
|
|
|
69
|
+
df: pxt.DataFrame
|
|
70
|
+
if isinstance(table_or_df, pxt.catalog.Table):
|
|
71
|
+
df = table_or_df._df()
|
|
72
|
+
else:
|
|
73
|
+
df = table_or_df
|
|
74
|
+
|
|
59
75
|
type_dict = {k: v.as_dict() for k, v in df.schema.items()}
|
|
60
76
|
arrow_schema = to_arrow_schema(df.schema)
|
|
61
77
|
|
|
78
|
+
if not inline_images and any(col_type.is_image_type() for col_type in df.schema.values()):
|
|
79
|
+
raise exc.Error('Cannot export Dataframe with image columns when inline_images is False')
|
|
80
|
+
|
|
62
81
|
# store the changes atomically
|
|
63
|
-
with transactional_directory(
|
|
82
|
+
with transactional_directory(parquet_path) as temp_path:
|
|
64
83
|
# dump metadata json file so we can inspect what was the source of the parquet file later on.
|
|
65
84
|
json.dump(df.as_dict(), (temp_path / '.pixeltable.json').open('w'))
|
|
66
85
|
json.dump(type_dict, (temp_path / '.pixeltable.column_types.json').open('w')) # keep type metadata
|
|
@@ -111,6 +130,7 @@ def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int =
|
|
|
111
130
|
elif col_type.is_bool_type():
|
|
112
131
|
length = 1
|
|
113
132
|
elif col_type.is_timestamp_type():
|
|
133
|
+
val = val.astimezone(datetime.timezone.utc)
|
|
114
134
|
length = 8
|
|
115
135
|
else:
|
|
116
136
|
assert False, f'unknown type {col_type} for {col_name}'
|
|
@@ -139,7 +159,7 @@ def parquet_schema_to_pixeltable_schema(parquet_path: str) -> dict[str, Optional
|
|
|
139
159
|
|
|
140
160
|
|
|
141
161
|
def import_parquet(
|
|
142
|
-
|
|
162
|
+
table: str,
|
|
143
163
|
*,
|
|
144
164
|
parquet_path: str,
|
|
145
165
|
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
@@ -148,7 +168,7 @@ def import_parquet(
|
|
|
148
168
|
"""Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.
|
|
149
169
|
|
|
150
170
|
Args:
|
|
151
|
-
|
|
171
|
+
table: Fully qualified name of the table to import the data into.
|
|
152
172
|
parquet_path: Path to an individual Parquet file or directory of Parquet files.
|
|
153
173
|
schema_overrides: If specified, then for each (name, type) pair in `schema_overrides`, the column with
|
|
154
174
|
name `name` will be given type `type`, instead of being inferred from the Parquet dataset. The keys in
|
|
@@ -157,7 +177,7 @@ def import_parquet(
|
|
|
157
177
|
kwargs: Additional arguments to pass to `create_table`.
|
|
158
178
|
|
|
159
179
|
Returns:
|
|
160
|
-
A handle to the newly created
|
|
180
|
+
A handle to the newly created table.
|
|
161
181
|
"""
|
|
162
182
|
from pyarrow import parquet
|
|
163
183
|
|
|
@@ -176,11 +196,11 @@ def import_parquet(
|
|
|
176
196
|
if v is None:
|
|
177
197
|
raise exc.Error(f'Could not infer pixeltable type for column {k} from parquet file')
|
|
178
198
|
|
|
179
|
-
if
|
|
180
|
-
raise exc.Error(f'Table {
|
|
199
|
+
if table in pxt.list_tables():
|
|
200
|
+
raise exc.Error(f'Table {table} already exists')
|
|
181
201
|
|
|
182
202
|
try:
|
|
183
|
-
tmp_name = f'{
|
|
203
|
+
tmp_name = f'{table}_tmp_{random.randint(0, 100000000)}'
|
|
184
204
|
tab = pxt.create_table(tmp_name, schema, **kwargs)
|
|
185
205
|
for fragment in parquet_dataset.fragments: # type: ignore[attr-defined]
|
|
186
206
|
for batch in fragment.to_batches():
|
|
@@ -190,5 +210,5 @@ def import_parquet(
|
|
|
190
210
|
_logger.error(f'Error while inserting Parquet file into table: {e}')
|
|
191
211
|
raise e
|
|
192
212
|
|
|
193
|
-
pxt.move(tmp_name,
|
|
194
|
-
return pxt.get_table(
|
|
213
|
+
pxt.move(tmp_name, table)
|
|
214
|
+
return pxt.get_table(table)
|
pixeltable/iterators/__init__.py
CHANGED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from typing import Any, Sequence
|
|
2
|
+
|
|
3
|
+
import PIL.Image
|
|
4
|
+
|
|
5
|
+
import pixeltable.exceptions as excs
|
|
6
|
+
import pixeltable.type_system as ts
|
|
7
|
+
from pixeltable.iterators.base import ComponentIterator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TileIterator(ComponentIterator):
|
|
11
|
+
"""
|
|
12
|
+
Iterator over tiles of an image. Each image will be divided into tiles of size `tile_size`, and the tiles will be
|
|
13
|
+
iterated over in row-major order (left-to-right, then top-to-bottom). An optional `overlap` parameter may be
|
|
14
|
+
specified. If the tiles do not exactly cover the image, then the rightmost and bottommost tiles will be padded with
|
|
15
|
+
blackspace, so that the output images all have the exact size `tile_size`.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
image: Image to split into tiles.
|
|
19
|
+
tile_size: Size of each tile, as a pair of integers `[width, height]`.
|
|
20
|
+
overlap: Amount of overlap between adjacent tiles, as a pair of integers `[width, height]`.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
__image: PIL.Image.Image
|
|
24
|
+
__tile_size: Sequence[int]
|
|
25
|
+
__overlap: Sequence[int]
|
|
26
|
+
__width: int
|
|
27
|
+
__height: int
|
|
28
|
+
__xlen: int
|
|
29
|
+
__ylen: int
|
|
30
|
+
__i: int
|
|
31
|
+
__j: int
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
image: PIL.Image.Image,
|
|
36
|
+
*,
|
|
37
|
+
tile_size: tuple[int, int],
|
|
38
|
+
overlap: tuple[int, int] = (0, 0),
|
|
39
|
+
):
|
|
40
|
+
if overlap[0] >= tile_size[0] or overlap[1] >= tile_size[1]:
|
|
41
|
+
raise excs.Error(f"overlap dimensions {overlap} are not strictly smaller than tile size {tile_size}")
|
|
42
|
+
|
|
43
|
+
self.__image = image
|
|
44
|
+
self.__image.load()
|
|
45
|
+
self.__tile_size = tile_size
|
|
46
|
+
self.__overlap = overlap
|
|
47
|
+
self.__width, self.__height = image.size
|
|
48
|
+
# Justification for this formula: let t = tile_size[0], o = overlap[0]. Then the values of w (= width) that
|
|
49
|
+
# exactly accommodate an integer number of tiles are t, 2t - o, 3t - 2o, 4t - 3o, ...
|
|
50
|
+
# This formula ensures that t, 2t - o, 3t - 2o, ... result in an xlen of 1, 2, 3, ...
|
|
51
|
+
# but t + 1, 2t - o + 1, 3t - 2o + 1, ... result in an xlen of 2, 3, 4, ...
|
|
52
|
+
self.__xlen = (self.__width - overlap[0] - 1) // (tile_size[0] - overlap[0]) + 1
|
|
53
|
+
self.__ylen = (self.__height - overlap[1] - 1) // (tile_size[1] - overlap[1]) + 1
|
|
54
|
+
self.__i = 0
|
|
55
|
+
self.__j = 0
|
|
56
|
+
|
|
57
|
+
def __next__(self) -> dict[str, Any]:
|
|
58
|
+
if self.__j >= self.__ylen:
|
|
59
|
+
raise StopIteration
|
|
60
|
+
|
|
61
|
+
x1 = self.__i * (self.__tile_size[0] - self.__overlap[0])
|
|
62
|
+
y1 = self.__j * (self.__tile_size[1] - self.__overlap[1])
|
|
63
|
+
# If x2 > self.__width, PIL does the right thing and pads the image with blackspace
|
|
64
|
+
x2 = x1 + self.__tile_size[0]
|
|
65
|
+
y2 = y1 + self.__tile_size[1]
|
|
66
|
+
tile = self.__image.crop((x1, y1, x2, y2))
|
|
67
|
+
result = {
|
|
68
|
+
'tile': tile,
|
|
69
|
+
'tile_coord': [self.__i, self.__j],
|
|
70
|
+
'tile_box': [x1, y1, x2, y2]
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
self.__i += 1
|
|
74
|
+
if self.__i >= self.__xlen:
|
|
75
|
+
self.__i = 0
|
|
76
|
+
self.__j += 1
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
def close(self) -> None:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
def set_pos(self, pos: int) -> None:
|
|
83
|
+
self.__j = pos // self.__xlen
|
|
84
|
+
self.__i = pos % self.__xlen
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def input_schema(cls, *args: Any, **kwargs: Any) -> dict[str, ts.ColumnType]:
|
|
88
|
+
return {
|
|
89
|
+
'image': ts.ImageType(),
|
|
90
|
+
'tile_size': ts.JsonType(),
|
|
91
|
+
'overlap': ts.JsonType(),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
|
|
96
|
+
return {
|
|
97
|
+
'tile': ts.ImageType(),
|
|
98
|
+
'tile_coord': ts.JsonType(),
|
|
99
|
+
'tile_box': ts.JsonType(),
|
|
100
|
+
}, ['tile']
|
pixeltable/iterators/video.py
CHANGED
|
@@ -23,13 +23,13 @@ class FrameIterator(ComponentIterator):
|
|
|
23
23
|
exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
|
|
24
24
|
frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
Args:
|
|
27
|
+
video: URL or path of the video to use for frame extraction.
|
|
28
|
+
fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
|
|
29
|
+
If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
|
|
30
|
+
extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
|
|
31
|
+
num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
|
|
32
|
+
`num_frames` is greater than the number of frames in the video, all frames will be extracted.
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
35
|
# Input parameters
|
|
@@ -180,7 +180,6 @@ class FrameIterator(ComponentIterator):
|
|
|
180
180
|
self.container.close()
|
|
181
181
|
|
|
182
182
|
def set_pos(self, pos: int) -> None:
|
|
183
|
-
"""Seek to frame idx"""
|
|
184
183
|
if pos == self.next_pos:
|
|
185
184
|
return # already there
|
|
186
185
|
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from .schema import SystemInfo, SystemInfoMd
|
|
11
11
|
|
|
12
12
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
13
|
-
VERSION =
|
|
13
|
+
VERSION = 23
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
import sqlalchemy as sql
|
|
3
|
+
|
|
4
|
+
from pixeltable.metadata import register_converter
|
|
5
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@register_converter(version=22)
|
|
9
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
10
|
+
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
14
|
+
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
|
|
15
|
+
v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
|
|
16
|
+
return k, v
|
|
17
|
+
return None
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
23: 'DataFrame.from_clause',
|
|
5
6
|
22: 'TableMd/ColumnMd.media_validation',
|
|
6
7
|
21: 'Separate InlineArray and InlineList',
|
|
7
8
|
20: 'Store DB timestamps in UTC',
|