pixeltable 0.2.20__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/globals.py +3 -0
- pixeltable/catalog/table.py +208 -145
- pixeltable/catalog/table_version.py +36 -18
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +9 -24
- pixeltable/env.py +1 -1
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/aggregation_node.py +22 -15
- pixeltable/exec/data_row_batch.py +7 -7
- pixeltable/exec/exec_node.py +35 -7
- pixeltable/exec/expr_eval_node.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -9
- pixeltable/exec/sql_node.py +265 -136
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/data_row.py +30 -19
- pixeltable/exprs/expr.py +15 -14
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +21 -15
- pixeltable/exprs/function_call.py +21 -8
- pixeltable/exprs/rowid_ref.py +2 -2
- pixeltable/exprs/sql_element_cache.py +5 -1
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +86 -42
- pixeltable/functions/huggingface.py +12 -14
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/string.py +50 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +55 -56
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +7 -7
- pixeltable/index/embedding_index.py +8 -10
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/globals.py +2 -0
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/document.py +9 -7
- pixeltable/iterators/video.py +10 -1
- pixeltable/metadata/__init__.py +3 -2
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/util.py +9 -8
- pixeltable/metadata/schema.py +32 -21
- pixeltable/plan.py +136 -154
- pixeltable/store.py +51 -36
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/mypy_plugin.py +32 -0
- pixeltable/type_system.py +243 -60
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +1 -1
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/METADATA +6 -5
- pixeltable-0.2.21.dist-info/RECORD +148 -0
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0
pixeltable/io/external_store.py
CHANGED
|
@@ -69,6 +69,9 @@ class Project(ExternalStore, abc.ABC):
|
|
|
69
69
|
An `ExternalStore` that represents a labeling project. Extends `ExternalStore` with a few
|
|
70
70
|
additional capabilities specific to such projects.
|
|
71
71
|
"""
|
|
72
|
+
|
|
73
|
+
stored_proxies: dict[Column, Column]
|
|
74
|
+
|
|
72
75
|
def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
|
|
73
76
|
super().__init__(name)
|
|
74
77
|
self._col_mapping = col_mapping
|
|
@@ -116,7 +119,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
116
119
|
tbl_version.schema_version = tbl_version.version
|
|
117
120
|
proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
|
|
118
121
|
# Add the columns; this will also update table metadata.
|
|
119
|
-
tbl_version._add_columns(proxy_cols, conn)
|
|
122
|
+
tbl_version._add_columns(proxy_cols, conn, print_stats=False, on_error='ignore')
|
|
120
123
|
# We don't need to retain `UpdateStatus` since the stored proxies are intended to be
|
|
121
124
|
# invisible to the user.
|
|
122
125
|
tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
@@ -126,7 +129,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
126
129
|
# any *other* external store for this table.)
|
|
127
130
|
deletions_needed: set[Column] = set(self.stored_proxies.values())
|
|
128
131
|
for name, store in tbl_version.external_stores.items():
|
|
129
|
-
if name != self.name:
|
|
132
|
+
if isinstance(store, Project) and name != self.name:
|
|
130
133
|
deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
|
|
131
134
|
if len(deletions_needed) > 0:
|
|
132
135
|
_logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
|
|
@@ -210,6 +213,8 @@ class Project(ExternalStore, abc.ABC):
|
|
|
210
213
|
If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
|
|
211
214
|
in which the Pixeltable column names are resolved to the corresponding `Column` objects.
|
|
212
215
|
"""
|
|
216
|
+
from pixeltable import exprs
|
|
217
|
+
|
|
213
218
|
is_user_specified_col_mapping = col_mapping is not None
|
|
214
219
|
if col_mapping is None:
|
|
215
220
|
col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
|
|
@@ -235,8 +240,9 @@ class Project(ExternalStore, abc.ABC):
|
|
|
235
240
|
f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
|
|
236
241
|
f'configuration has no column `{ext_col}`.'
|
|
237
242
|
)
|
|
238
|
-
|
|
239
|
-
|
|
243
|
+
col_ref = table[t_col]
|
|
244
|
+
assert isinstance(col_ref, exprs.ColumnRef)
|
|
245
|
+
resolved_col_mapping[col_ref.col] = ext_col
|
|
240
246
|
# Validate column specs
|
|
241
247
|
t_col_types = table._schema
|
|
242
248
|
for t_col, ext_col in col_mapping.items():
|
|
@@ -329,7 +335,7 @@ class MockProject(Project):
|
|
|
329
335
|
def get_import_columns(self) -> dict[str, ts.ColumnType]:
|
|
330
336
|
return self.import_cols
|
|
331
337
|
|
|
332
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
338
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
|
|
333
339
|
raise NotImplementedError()
|
|
334
340
|
|
|
335
341
|
def delete(self) -> None:
|
pixeltable/io/globals.py
CHANGED
|
@@ -187,6 +187,8 @@ def import_rows(
|
|
|
187
187
|
# If `key` is not in `schema_overrides`, then we infer its type from the data.
|
|
188
188
|
# The column type will always be nullable by default.
|
|
189
189
|
col_type = pxt.ColumnType.infer_literal_type(value, nullable=True)
|
|
190
|
+
if col_type is None:
|
|
191
|
+
raise excs.Error(f'Could not infer type for column `{col_name}`; the value in row {n} has an unsupported type: {type(value)}')
|
|
190
192
|
if col_name not in schema:
|
|
191
193
|
schema[col_name] = col_type
|
|
192
194
|
else:
|
pixeltable/io/hf_datasets.py
CHANGED
pixeltable/io/label_studio.py
CHANGED
|
@@ -4,17 +4,17 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Iterator, Optional,
|
|
7
|
+
from typing import Any, Iterator, Literal, Optional, cast
|
|
8
8
|
from xml.etree import ElementTree
|
|
9
9
|
|
|
10
|
+
import label_studio_sdk # type: ignore[import-untyped]
|
|
10
11
|
import PIL.Image
|
|
11
|
-
import label_studio_sdk
|
|
12
12
|
from requests.exceptions import HTTPError
|
|
13
13
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
15
|
import pixeltable.env as env
|
|
16
16
|
import pixeltable.exceptions as excs
|
|
17
|
-
from pixeltable import
|
|
17
|
+
from pixeltable import Column, Table
|
|
18
18
|
from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
19
19
|
from pixeltable.io.external_store import Project, SyncStatus
|
|
20
20
|
from pixeltable.utils import coco
|
|
@@ -211,7 +211,7 @@ class LabelStudioProject(Project):
|
|
|
211
211
|
assert isinstance(row[media_col_idx], PIL.Image.Image)
|
|
212
212
|
file = env.Env.get().create_tmp_path(extension='.png')
|
|
213
213
|
row[media_col_idx].save(file, format='png')
|
|
214
|
-
task_id
|
|
214
|
+
task_id = self.project.import_tasks(file)[0]
|
|
215
215
|
os.remove(file)
|
|
216
216
|
|
|
217
217
|
# Update the task with `rowid` metadata
|
|
@@ -256,7 +256,7 @@ class LabelStudioProject(Project):
|
|
|
256
256
|
assert self.media_import_method == 'file'
|
|
257
257
|
if not col.col_type.is_media_type():
|
|
258
258
|
# Not a media column; query the data directly
|
|
259
|
-
expr_refs[col_name] = t[col_name]
|
|
259
|
+
expr_refs[col_name] = cast(ColumnRef, t[col_name])
|
|
260
260
|
elif col in self.stored_proxies:
|
|
261
261
|
# Media column that has a stored proxy; use it. We have to give it a name,
|
|
262
262
|
# since it's an anonymous column
|
|
@@ -267,7 +267,7 @@ class LabelStudioProject(Project):
|
|
|
267
267
|
# and we can just use the localpath
|
|
268
268
|
expr_refs[col_name] = t[col_name].localpath
|
|
269
269
|
|
|
270
|
-
df = t.select(*[t[col] for col in t_rl_cols], **expr_refs)
|
|
270
|
+
df = t.select(*[t[col.name] for col in t_rl_cols], **expr_refs)
|
|
271
271
|
# The following buffers will hold `DataRow` indices that correspond to each of the selected
|
|
272
272
|
# columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
|
|
273
273
|
# preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
|
pixeltable/io/parquet.py
CHANGED
|
@@ -7,24 +7,23 @@ import random
|
|
|
7
7
|
import typing
|
|
8
8
|
from collections import deque
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Any, Optional
|
|
11
11
|
|
|
12
|
-
import PIL.Image
|
|
13
12
|
import numpy as np
|
|
13
|
+
import PIL.Image
|
|
14
14
|
|
|
15
15
|
import pixeltable.exceptions as exc
|
|
16
16
|
import pixeltable.type_system as ts
|
|
17
17
|
from pixeltable.utils.transactional_directory import transactional_directory
|
|
18
18
|
|
|
19
19
|
if typing.TYPE_CHECKING:
|
|
20
|
-
import pixeltable as pxt
|
|
21
20
|
import pyarrow as pa
|
|
22
|
-
|
|
21
|
+
import pixeltable as pxt
|
|
23
22
|
|
|
24
23
|
_logger = logging.getLogger(__name__)
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
def _write_batch(value_batch:
|
|
26
|
+
def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path: Path) -> None:
|
|
28
27
|
import pyarrow as pa
|
|
29
28
|
from pyarrow import parquet
|
|
30
29
|
|
|
@@ -37,7 +36,7 @@ def _write_batch(value_batch: Dict[str, deque], schema: pa.Schema, output_path:
|
|
|
37
36
|
pydict[field.name] = value_batch[field.name]
|
|
38
37
|
|
|
39
38
|
tab = pa.Table.from_pydict(pydict, schema=schema)
|
|
40
|
-
parquet.write_table(tab, output_path)
|
|
39
|
+
parquet.write_table(tab, str(output_path))
|
|
41
40
|
|
|
42
41
|
|
|
43
42
|
def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int = 100_000_000) -> None:
|
|
@@ -67,7 +66,7 @@ def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int =
|
|
|
67
66
|
json.dump(type_dict, (temp_path / '.pixeltable.column_types.json').open('w')) # keep type metadata
|
|
68
67
|
|
|
69
68
|
batch_num = 0
|
|
70
|
-
current_value_batch:
|
|
69
|
+
current_value_batch: dict[str, deque] = {k: deque() for k in df.schema.keys()}
|
|
71
70
|
current_byte_estimate = 0
|
|
72
71
|
|
|
73
72
|
for data_row in df._exec():
|
|
@@ -128,13 +127,14 @@ def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int =
|
|
|
128
127
|
_write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
|
|
129
128
|
|
|
130
129
|
|
|
131
|
-
def parquet_schema_to_pixeltable_schema(parquet_path: str) ->
|
|
130
|
+
def parquet_schema_to_pixeltable_schema(parquet_path: str) -> dict[str, Optional[ts.ColumnType]]:
|
|
132
131
|
"""Generate a default pixeltable schema for the given parquet file. Returns None for unknown types."""
|
|
133
132
|
from pyarrow import parquet
|
|
133
|
+
|
|
134
134
|
from pixeltable.utils.arrow import to_pixeltable_schema
|
|
135
135
|
|
|
136
136
|
input_path = Path(parquet_path).expanduser()
|
|
137
|
-
parquet_dataset = parquet.ParquetDataset(input_path)
|
|
137
|
+
parquet_dataset = parquet.ParquetDataset(str(input_path))
|
|
138
138
|
return to_pixeltable_schema(parquet_dataset.schema)
|
|
139
139
|
|
|
140
140
|
|
|
@@ -142,7 +142,7 @@ def import_parquet(
|
|
|
142
142
|
table_path: str,
|
|
143
143
|
*,
|
|
144
144
|
parquet_path: str,
|
|
145
|
-
schema_overrides: Optional[
|
|
145
|
+
schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
146
146
|
**kwargs: Any,
|
|
147
147
|
) -> pxt.Table:
|
|
148
148
|
"""Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.
|
|
@@ -159,12 +159,13 @@ def import_parquet(
|
|
|
159
159
|
Returns:
|
|
160
160
|
A handle to the newly created [`Table`][pixeltable.Table].
|
|
161
161
|
"""
|
|
162
|
-
import pixeltable as pxt
|
|
163
162
|
from pyarrow import parquet
|
|
163
|
+
|
|
164
|
+
import pixeltable as pxt
|
|
164
165
|
from pixeltable.utils.arrow import iter_tuples
|
|
165
166
|
|
|
166
167
|
input_path = Path(parquet_path).expanduser()
|
|
167
|
-
parquet_dataset = parquet.ParquetDataset(input_path)
|
|
168
|
+
parquet_dataset = parquet.ParquetDataset(str(input_path))
|
|
168
169
|
|
|
169
170
|
schema = parquet_schema_to_pixeltable_schema(parquet_path)
|
|
170
171
|
if schema_overrides is None:
|
|
@@ -181,7 +182,7 @@ def import_parquet(
|
|
|
181
182
|
try:
|
|
182
183
|
tmp_name = f'{table_path}_tmp_{random.randint(0, 100000000)}'
|
|
183
184
|
tab = pxt.create_table(tmp_name, schema, **kwargs)
|
|
184
|
-
for fragment in parquet_dataset.fragments:
|
|
185
|
+
for fragment in parquet_dataset.fragments: # type: ignore[attr-defined]
|
|
185
186
|
for batch in fragment.to_batches():
|
|
186
187
|
dict_batch = list(iter_tuples(batch))
|
|
187
188
|
tab.insert(dict_batch)
|
pixeltable/iterators/document.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import enum
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Iterable, Iterator, Optional
|
|
4
|
+
from typing import Any, Iterable, Iterator, Optional, Union
|
|
5
5
|
|
|
6
6
|
import ftfy
|
|
7
7
|
|
|
@@ -176,7 +176,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
176
176
|
|
|
177
177
|
@classmethod
|
|
178
178
|
def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ColumnType], list[str]]:
|
|
179
|
-
schema = {'text': StringType()}
|
|
179
|
+
schema: dict[str, ColumnType] = {'text': StringType()}
|
|
180
180
|
md_fields = _parse_metadata(kwargs['metadata']) if 'metadata' in kwargs else []
|
|
181
181
|
|
|
182
182
|
for md_field in md_fields:
|
|
@@ -214,7 +214,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
214
214
|
section = next(self._sections)
|
|
215
215
|
if section.text is None:
|
|
216
216
|
continue
|
|
217
|
-
result = {'text': section.text}
|
|
217
|
+
result: dict[str, Any] = {'text': section.text}
|
|
218
218
|
for md_field in self._metadata_fields:
|
|
219
219
|
if md_field == ChunkMetadata.TITLE:
|
|
220
220
|
result[md_field.name.lower()] = self._doc_title
|
|
@@ -234,7 +234,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
234
234
|
emit_on_paragraph = Separator.PARAGRAPH in self._separators or Separator.SENTENCE in self._separators
|
|
235
235
|
emit_on_heading = Separator.HEADING in self._separators or emit_on_paragraph
|
|
236
236
|
# current state
|
|
237
|
-
accumulated_text = [] # currently accumulated text
|
|
237
|
+
accumulated_text: list[str] = [] # currently accumulated text
|
|
238
238
|
# accumulate pieces then join before emit to avoid quadratic complexity of string concatenation
|
|
239
239
|
|
|
240
240
|
headings: dict[str, str] = {} # current state of observed headings (level -> text)
|
|
@@ -260,9 +260,10 @@ class DocumentSplitter(ComponentIterator):
|
|
|
260
260
|
yield DocumentSection(text=full_text, metadata=md)
|
|
261
261
|
accumulated_text = []
|
|
262
262
|
|
|
263
|
-
def process_element(el: bs4.
|
|
263
|
+
def process_element(el: Union[bs4.element.Tag, bs4.NavigableString]) -> Iterator[DocumentSection]:
|
|
264
264
|
# process the element and emit sections as necessary
|
|
265
265
|
nonlocal accumulated_text, headings, sourceline, emit_on_heading, emit_on_paragraph
|
|
266
|
+
|
|
266
267
|
if el.name in self._skip_tags:
|
|
267
268
|
return
|
|
268
269
|
|
|
@@ -282,6 +283,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
282
283
|
yield from emit()
|
|
283
284
|
update_metadata(el)
|
|
284
285
|
for child in el.children:
|
|
286
|
+
assert isinstance(child, (bs4.element.Tag, bs4.NavigableString)), type(el)
|
|
285
287
|
yield from process_element(child)
|
|
286
288
|
|
|
287
289
|
yield from process_element(self._doc_handle.bs_doc)
|
|
@@ -293,7 +295,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
293
295
|
emit_on_paragraph = Separator.PARAGRAPH in self._separators or Separator.SENTENCE in self._separators
|
|
294
296
|
emit_on_heading = Separator.HEADING in self._separators or emit_on_paragraph
|
|
295
297
|
# current state
|
|
296
|
-
accumulated_text = [] # currently accumulated text
|
|
298
|
+
accumulated_text: list[str] = [] # currently accumulated text
|
|
297
299
|
# accumulate pieces then join before emit to avoid quadratic complexity of string concatenation
|
|
298
300
|
headings: dict[str, str] = {} # current state of observed headings (level -> text)
|
|
299
301
|
|
|
@@ -347,7 +349,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
347
349
|
|
|
348
350
|
def _pdf_sections(self) -> Iterator[DocumentSection]:
|
|
349
351
|
"""Create DocumentSections reflecting the pdf-specific separators"""
|
|
350
|
-
import fitz
|
|
352
|
+
import fitz # type: ignore[import-untyped]
|
|
351
353
|
doc: fitz.Document = self._doc_handle.pdf_doc
|
|
352
354
|
assert doc is not None
|
|
353
355
|
|
pixeltable/iterators/video.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import math
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Optional
|
|
4
|
+
from typing import Any, Optional, Sequence
|
|
5
5
|
|
|
6
6
|
import cv2
|
|
7
7
|
import PIL.Image
|
|
@@ -29,6 +29,15 @@ class FrameIterator(ComponentIterator):
|
|
|
29
29
|
num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
|
|
30
30
|
`num_frames` is greater than the number of frames in the video, all frames will be extracted.
|
|
31
31
|
"""
|
|
32
|
+
|
|
33
|
+
video_path: Path
|
|
34
|
+
video_reader: cv2.VideoCapture
|
|
35
|
+
fps: Optional[float]
|
|
36
|
+
num_frames: Optional[int]
|
|
37
|
+
frames_to_extract: Sequence[int]
|
|
38
|
+
frames_set: set[int]
|
|
39
|
+
next_frame_idx: int
|
|
40
|
+
|
|
32
41
|
def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
|
|
33
42
|
if fps is not None and num_frames is not None:
|
|
34
43
|
raise Error('At most one of `fps` or `num_frames` may be specified')
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import dataclasses
|
|
|
2
2
|
import importlib
|
|
3
3
|
import os
|
|
4
4
|
import pkgutil
|
|
5
|
-
from typing import Callable
|
|
5
|
+
from typing import Callable
|
|
6
6
|
|
|
7
7
|
import sqlalchemy as sql
|
|
8
8
|
import sqlalchemy.orm as orm
|
|
@@ -24,7 +24,7 @@ def create_system_info(engine: sql.engine.Engine) -> None:
|
|
|
24
24
|
|
|
25
25
|
# conversion functions for upgrading the metadata schema from one version to the following
|
|
26
26
|
# key: old schema version
|
|
27
|
-
converter_cbs:
|
|
27
|
+
converter_cbs: dict[int, Callable[[sql.engine.Engine], None]] = {}
|
|
28
28
|
|
|
29
29
|
def register_converter(version: int) -> Callable[[Callable[[sql.engine.Engine], None]], None]:
|
|
30
30
|
def decorator(fn: Callable[[sql.engine.Engine], None]) -> None:
|
|
@@ -41,6 +41,7 @@ def upgrade_md(engine: sql.engine.Engine) -> None:
|
|
|
41
41
|
with orm.Session(engine) as session:
|
|
42
42
|
system_info = session.query(SystemInfo).one().md
|
|
43
43
|
md_version = system_info['schema_version']
|
|
44
|
+
assert isinstance(md_version, int)
|
|
44
45
|
if md_version == VERSION:
|
|
45
46
|
return
|
|
46
47
|
while md_version < VERSION:
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
1
3
|
import sqlalchemy as sql
|
|
2
4
|
|
|
3
|
-
from pixeltable.metadata.schema import Table
|
|
4
5
|
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.schema import Table
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
@register_converter(version=14)
|
|
8
10
|
def _(engine: sql.engine.Engine) -> None:
|
|
9
|
-
default_remotes = {'remotes': []}
|
|
11
|
+
default_remotes: dict[str, Any] = {'remotes': []}
|
|
10
12
|
with engine.begin() as conn:
|
|
11
13
|
conn.execute(sql.update(Table).where(Table.md['remotes'] == None).values(md=Table.md.concat(default_remotes)))
|
|
@@ -35,7 +35,7 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
|
|
|
35
35
|
# but it might actually be transformed into an InlineList when it is instantiated
|
|
36
36
|
# (unfortunately, there is no way to disambiguate at this stage; see comments in
|
|
37
37
|
# InlineArray._from_dict() for more details).
|
|
38
|
-
updated_v = {'_classname': 'InlineList' if v.get('is_json') else 'InlineArray'}
|
|
38
|
+
updated_v: dict[str, Any] = {'_classname': 'InlineList' if v.get('is_json') else 'InlineArray'}
|
|
39
39
|
if len(updated_components) > 0:
|
|
40
40
|
updated_v['components'] = updated_components
|
|
41
41
|
return k, updated_v
|
|
@@ -68,24 +68,25 @@ def __substitute_md_rec(
|
|
|
68
68
|
substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
|
|
69
69
|
) -> Any:
|
|
70
70
|
if isinstance(md, dict):
|
|
71
|
-
|
|
71
|
+
updated_dict: dict[str, Any] = {}
|
|
72
72
|
for k, v in md.items():
|
|
73
|
+
assert isinstance(k, str)
|
|
73
74
|
substitute = substitution_fn(k, v)
|
|
74
75
|
if substitute is not None:
|
|
75
76
|
updated_k, updated_v = substitute
|
|
76
|
-
|
|
77
|
+
updated_dict[updated_k] = __substitute_md_rec(updated_v, substitution_fn)
|
|
77
78
|
else:
|
|
78
|
-
|
|
79
|
-
return
|
|
79
|
+
updated_dict[k] = __substitute_md_rec(v, substitution_fn)
|
|
80
|
+
return updated_dict
|
|
80
81
|
elif isinstance(md, list):
|
|
81
|
-
|
|
82
|
+
updated_list: list[Any] = []
|
|
82
83
|
for v in md:
|
|
83
84
|
substitute = substitution_fn(None, v)
|
|
84
85
|
if substitute is not None:
|
|
85
86
|
_, updated_v = substitute
|
|
86
|
-
|
|
87
|
+
updated_list.append(__substitute_md_rec(updated_v, substitution_fn))
|
|
87
88
|
else:
|
|
88
|
-
|
|
89
|
-
return
|
|
89
|
+
updated_list.append(__substitute_md_rec(v, substitution_fn))
|
|
90
|
+
return updated_list
|
|
90
91
|
else:
|
|
91
92
|
return md
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -1,37 +1,48 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
import typing
|
|
2
3
|
import uuid
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Any, Optional, TypeVar, Union, get_type_hints
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
import sqlalchemy.orm as orm
|
|
7
|
-
from sqlalchemy import ForeignKey
|
|
8
|
-
from sqlalchemy import
|
|
9
|
-
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
|
8
|
+
from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary
|
|
9
|
+
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
10
10
|
from sqlalchemy.orm import declarative_base
|
|
11
|
+
from sqlalchemy.orm.decl_api import DeclarativeMeta
|
|
11
12
|
|
|
12
|
-
Base
|
|
13
|
+
# Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
|
|
14
|
+
# a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
|
|
15
|
+
# outside of the module in a typesafe way.
|
|
16
|
+
Base: type = declarative_base()
|
|
17
|
+
assert isinstance(Base, DeclarativeMeta)
|
|
18
|
+
base_metadata = Base.metadata
|
|
13
19
|
|
|
14
20
|
T = TypeVar('T')
|
|
15
21
|
|
|
16
|
-
def md_from_dict(data_class_type:
|
|
22
|
+
def md_from_dict(data_class_type: type[T], data: Any) -> T:
|
|
17
23
|
"""Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
|
|
18
24
|
if dataclasses.is_dataclass(data_class_type):
|
|
19
25
|
fieldtypes = {f: t for f, t in get_type_hints(data_class_type).items()}
|
|
20
|
-
return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
|
|
21
|
-
|
|
22
|
-
|
|
26
|
+
return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data}) # type: ignore[return-value]
|
|
27
|
+
|
|
28
|
+
origin = typing.get_origin(data_class_type)
|
|
29
|
+
if origin is not None:
|
|
30
|
+
type_args = typing.get_args(data_class_type)
|
|
31
|
+
if origin is Union and type(None) in type_args:
|
|
23
32
|
# Handling Optional types
|
|
24
|
-
non_none_args = [arg for arg in
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
elif
|
|
28
|
-
return [md_from_dict(
|
|
29
|
-
elif
|
|
30
|
-
key_type =
|
|
31
|
-
val_type =
|
|
32
|
-
return {key_type(key): md_from_dict(val_type, val) for key, val in data.items()}
|
|
33
|
-
elif
|
|
34
|
-
return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(
|
|
33
|
+
non_none_args = [arg for arg in type_args if arg is not type(None)]
|
|
34
|
+
assert len(non_none_args) == 1
|
|
35
|
+
return md_from_dict(non_none_args[0], data) if data is not None else None
|
|
36
|
+
elif origin is list:
|
|
37
|
+
return [md_from_dict(type_args[0], elem) for elem in data] # type: ignore[return-value]
|
|
38
|
+
elif origin is dict:
|
|
39
|
+
key_type = type_args[0]
|
|
40
|
+
val_type = type_args[1]
|
|
41
|
+
return {key_type(key): md_from_dict(val_type, val) for key, val in data.items()} # type: ignore[return-value]
|
|
42
|
+
elif origin is tuple:
|
|
43
|
+
return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(type_args, data)) # type: ignore[return-value]
|
|
44
|
+
else:
|
|
45
|
+
assert False
|
|
35
46
|
else:
|
|
36
47
|
return data
|
|
37
48
|
|
|
@@ -115,7 +126,7 @@ class ViewMd:
|
|
|
115
126
|
is_snapshot: bool
|
|
116
127
|
|
|
117
128
|
# (table id, version); for mutable views, all versions are None
|
|
118
|
-
base_versions:
|
|
129
|
+
base_versions: list[tuple[str, Optional[int]]]
|
|
119
130
|
|
|
120
131
|
# filter predicate applied to the base table; view-only
|
|
121
132
|
predicate: Optional[dict[str, Any]]
|