pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/column.py +41 -29
- pixeltable/catalog/globals.py +18 -0
- pixeltable/catalog/insertable_table.py +30 -10
- pixeltable/catalog/table.py +198 -86
- pixeltable/catalog/table_version.py +47 -53
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +17 -18
- pixeltable/dataframe.py +27 -36
- pixeltable/env.py +7 -0
- pixeltable/exec/__init__.py +0 -1
- pixeltable/exec/aggregation_node.py +6 -3
- pixeltable/exec/cache_prefetch_node.py +189 -43
- pixeltable/exec/data_row_batch.py +5 -22
- pixeltable/exec/exec_context.py +2 -2
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval_node.py +23 -16
- pixeltable/exec/in_memory_data_node.py +6 -3
- pixeltable/exec/sql_node.py +24 -25
- pixeltable/exprs/arithmetic_expr.py +12 -5
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +97 -14
- pixeltable/exprs/comparison.py +10 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +27 -18
- pixeltable/exprs/expr.py +53 -52
- pixeltable/exprs/expr_set.py +5 -0
- pixeltable/exprs/function_call.py +32 -16
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +6 -11
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +12 -11
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +7 -5
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/expr_template_function.py +6 -5
- pixeltable/func/function.py +11 -10
- pixeltable/func/udf.py +6 -11
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/globals.py +5 -7
- pixeltable/functions/huggingface.py +155 -45
- pixeltable/functions/llama_cpp.py +107 -0
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +9 -0
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +5 -2
- pixeltable/globals.py +67 -26
- pixeltable/index/btree.py +16 -3
- pixeltable/index/embedding_index.py +4 -4
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +96 -2
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +1 -1
- pixeltable/iterators/video.py +120 -63
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +45 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/plan.py +17 -15
- pixeltable/py.typed +0 -0
- pixeltable/store.py +7 -2
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +28 -5
- pixeltable/type_system.py +100 -36
- pixeltable/utils/coco.py +5 -5
- pixeltable/utils/documents.py +15 -1
- pixeltable/utils/formatter.py +12 -13
- pixeltable/utils/s3.py +6 -3
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
- pixeltable-0.2.23.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21.dist-info/RECORD +0 -148
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Iterator, Optional, Union
|
|
3
|
+
|
|
4
|
+
import fiftyone as fo # type: ignore[import-untyped]
|
|
5
|
+
import fiftyone.utils.data as foud # type: ignore[import-untyped]
|
|
6
|
+
import PIL.Image
|
|
7
|
+
import puremagic
|
|
8
|
+
|
|
9
|
+
import pixeltable as pxt
|
|
10
|
+
import pixeltable.exceptions as excs
|
|
11
|
+
from pixeltable import exprs
|
|
12
|
+
from pixeltable.env import Env
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
16
|
+
"""
|
|
17
|
+
Implementation of a FiftyOne `DatasetImporter` that reads image data from a Pixeltable table.
|
|
18
|
+
"""
|
|
19
|
+
__image_format: str # format to use for any exported images that are not already stored on disk
|
|
20
|
+
__labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
|
|
21
|
+
__image_idx: int # index of the image expr in the select list
|
|
22
|
+
__localpath_idx: Optional[int] # index of the image localpath in the select list, if present
|
|
23
|
+
__row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
tbl: pxt.Table,
|
|
28
|
+
image: exprs.Expr,
|
|
29
|
+
image_format: str,
|
|
30
|
+
classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
|
|
31
|
+
detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
|
|
32
|
+
dataset_dir: Optional[os.PathLike] = None,
|
|
33
|
+
shuffle: bool = False,
|
|
34
|
+
seed: Union[int, float, str, bytes, bytearray, None] = None,
|
|
35
|
+
max_samples: Optional[int] = None,
|
|
36
|
+
):
|
|
37
|
+
super().__init__(
|
|
38
|
+
dataset_dir=dataset_dir,
|
|
39
|
+
shuffle=shuffle,
|
|
40
|
+
seed=seed,
|
|
41
|
+
max_samples=max_samples
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.__image_format = image_format
|
|
45
|
+
|
|
46
|
+
label_categories = [
|
|
47
|
+
(classifications, fo.Classifications, 'classifications'),
|
|
48
|
+
(detections, fo.Detections, 'detections'),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
# Construct the labels. First add labels for all label types that have named dictionaries.
|
|
52
|
+
self.__labels = {}
|
|
53
|
+
for exprs_, label_cls, _ in label_categories:
|
|
54
|
+
if isinstance(exprs_, dict):
|
|
55
|
+
for label_name, expr in exprs_.items():
|
|
56
|
+
if not label_name.isidentifier():
|
|
57
|
+
raise excs.Error(f"Invalid label name: {label_name}")
|
|
58
|
+
if label_name in self.__labels:
|
|
59
|
+
raise excs.Error(f"Duplicate label name: {label_name}")
|
|
60
|
+
self.__labels[label_name] = (expr, label_cls)
|
|
61
|
+
|
|
62
|
+
# Now add the remaining labels, assigning unused default names.
|
|
63
|
+
for exprs_, label_cls, default_name in label_categories:
|
|
64
|
+
if exprs_ is None or isinstance(exprs_, dict):
|
|
65
|
+
continue
|
|
66
|
+
if isinstance(exprs_, exprs.Expr):
|
|
67
|
+
exprs_ = [exprs_]
|
|
68
|
+
assert isinstance(exprs_, list)
|
|
69
|
+
for expr in exprs_:
|
|
70
|
+
if default_name not in self.__labels:
|
|
71
|
+
name = default_name
|
|
72
|
+
else:
|
|
73
|
+
i = 1
|
|
74
|
+
while f'{default_name}_{i}' in self.__labels:
|
|
75
|
+
i += 1
|
|
76
|
+
name = f'{default_name}_{i}'
|
|
77
|
+
self.__labels[name] = (expr, label_cls)
|
|
78
|
+
|
|
79
|
+
# Build the select list:
|
|
80
|
+
# - Labels first, in the order they appear in self.__labels
|
|
81
|
+
# - Then the `image` expr
|
|
82
|
+
# - Then `image.localpath`, if `images` is a stored columnref
|
|
83
|
+
|
|
84
|
+
selection = [expr for expr, _ in self.__labels.values()]
|
|
85
|
+
self.__image_idx = len(selection)
|
|
86
|
+
selection.append(image)
|
|
87
|
+
|
|
88
|
+
if isinstance(image, exprs.ColumnRef) and image.col.is_stored:
|
|
89
|
+
# A stored image column; we can use the existing localpaths
|
|
90
|
+
self.__localpath_idx = len(selection)
|
|
91
|
+
selection.append(image.localpath)
|
|
92
|
+
else:
|
|
93
|
+
self.__localpath_idx = None
|
|
94
|
+
|
|
95
|
+
df = tbl.select(*selection)
|
|
96
|
+
self.__row_iter = df._output_row_iterator()
|
|
97
|
+
|
|
98
|
+
def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
|
|
99
|
+
row = next(self.__row_iter)
|
|
100
|
+
img = row[self.__image_idx]
|
|
101
|
+
assert isinstance(img, PIL.Image.Image)
|
|
102
|
+
if self.__localpath_idx is not None:
|
|
103
|
+
# Use the existing localpath of the stored image
|
|
104
|
+
file = row[self.__localpath_idx]
|
|
105
|
+
assert isinstance(file, str)
|
|
106
|
+
else:
|
|
107
|
+
# Write the dynamically created image to a temp file
|
|
108
|
+
file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
|
|
109
|
+
img.save(file, format=self.__image_format)
|
|
110
|
+
|
|
111
|
+
metadata = fo.ImageMetadata(
|
|
112
|
+
size_bytes=os.path.getsize(file),
|
|
113
|
+
mime_type=puremagic.from_file(file, mime=True),
|
|
114
|
+
width=img.width,
|
|
115
|
+
height=img.height,
|
|
116
|
+
filepath=file,
|
|
117
|
+
num_channels=len(img.getbands()),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
labels: dict[str, fo.Label] = {}
|
|
121
|
+
for idx, (label_name, (_, label_cls)) in enumerate(self.__labels.items()):
|
|
122
|
+
label_data = row[idx]
|
|
123
|
+
if label_data is None:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
label: fo.Label
|
|
127
|
+
if label_cls is fo.Classifications:
|
|
128
|
+
label = fo.Classifications(classifications=self.__as_fo_classifications(label_data))
|
|
129
|
+
elif label_cls is fo.Detections:
|
|
130
|
+
label = fo.Detections(detections=self.__as_fo_detections(label_data))
|
|
131
|
+
else:
|
|
132
|
+
assert False
|
|
133
|
+
labels[label_name] = label
|
|
134
|
+
|
|
135
|
+
return file, metadata, labels
|
|
136
|
+
|
|
137
|
+
def __as_fo_classifications(self, data: list) -> list[fo.Classification]:
|
|
138
|
+
if not isinstance(data, list) or any('label' not in entry for entry in data):
|
|
139
|
+
raise excs.Error(
|
|
140
|
+
f'Invalid classifications data: {data}\n'
|
|
141
|
+
"(Expected a list of dicts, each containing a 'label' key)"
|
|
142
|
+
)
|
|
143
|
+
return [
|
|
144
|
+
fo.Classification(label=entry['label'], confidence=entry.get('confidence'))
|
|
145
|
+
for entry in data
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
def __as_fo_detections(self, data: list) -> list[fo.Detections]:
|
|
149
|
+
if not isinstance(data, list) or any('label' not in entry or 'bounding_box' not in entry for entry in data):
|
|
150
|
+
raise excs.Error(
|
|
151
|
+
f'Invalid detections data: {data}\n'
|
|
152
|
+
"(Expected a list of dicts, each containing a 'label' and 'bounding_box' key)"
|
|
153
|
+
)
|
|
154
|
+
return [
|
|
155
|
+
fo.Detection(label=entry['label'], bounding_box=entry['bounding_box'], confidence=entry.get('confidence'))
|
|
156
|
+
for entry in data
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def has_dataset_info(self) -> bool:
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def has_image_metadata(self) -> bool:
|
|
165
|
+
return True
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def label_cls(self) -> dict[str, type]:
|
|
169
|
+
return {label_name: label_cls for label_name, (_, label_cls) in self.__labels.items()}
|
|
170
|
+
|
|
171
|
+
def setup(self) -> None:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
def get_dataset_info(self) -> dict:
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
def close(self, *args) -> None:
|
|
178
|
+
pass
|
pixeltable/io/globals.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
|
-
from typing import Any, Literal, Optional, Union
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, Union
|
|
2
2
|
|
|
3
3
|
import pixeltable as pxt
|
|
4
4
|
import pixeltable.exceptions as excs
|
|
5
|
-
from pixeltable import Table
|
|
5
|
+
from pixeltable import Table, exprs
|
|
6
|
+
from pixeltable.env import Env
|
|
6
7
|
from pixeltable.io.external_store import SyncStatus
|
|
7
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
import fiftyone as fo # type: ignore[import-untyped]
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
def create_label_studio_project(
|
|
10
14
|
t: Table,
|
|
@@ -116,6 +120,8 @@ def create_label_studio_project(
|
|
|
116
120
|
s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
|
|
117
121
|
)
|
|
118
122
|
"""
|
|
123
|
+
Env.get().require_package('label_studio_sdk')
|
|
124
|
+
|
|
119
125
|
from pixeltable.io.label_studio import LabelStudioProject
|
|
120
126
|
|
|
121
127
|
ls_project = LabelStudioProject.create(
|
|
@@ -267,3 +273,91 @@ def import_json(
|
|
|
267
273
|
contents = urllib.request.urlopen(filepath_or_url).read()
|
|
268
274
|
data = json.loads(contents, **kwargs)
|
|
269
275
|
return import_rows(tbl_path, data, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def export_images_as_fo_dataset(
|
|
279
|
+
tbl: pxt.Table,
|
|
280
|
+
images: exprs.Expr,
|
|
281
|
+
image_format: str = 'webp',
|
|
282
|
+
classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
|
|
283
|
+
detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
|
|
284
|
+
) -> 'fo.Dataset':
|
|
285
|
+
"""
|
|
286
|
+
Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
|
|
287
|
+
(or expression) containing image data, along with optional additional columns containing labels. Currently, only
|
|
288
|
+
classification and detection labels are supported.
|
|
289
|
+
|
|
290
|
+
The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial contains a
|
|
291
|
+
fully worked example showing how to export data from a Pixeltable table and load it into Voxel51.
|
|
292
|
+
|
|
293
|
+
Images in the dataset that already exist on disk will be exported directly, in whatever format they
|
|
294
|
+
are stored in. Images that are not already on disk (such as frames extracted using a
|
|
295
|
+
[`FrameIterator`][pixeltable.iterators.FrameIterator]) will first be written to disk in the specified
|
|
296
|
+
`image_format`.
|
|
297
|
+
|
|
298
|
+
The label parameters accept one or more sets of labels of each type. If a single `Expr` is provided, then it will
|
|
299
|
+
be exported as a single set of labels with a default name such as `classifications`.
|
|
300
|
+
(The single set of labels may still containing multiple individual labels; see below.)
|
|
301
|
+
If a list of `Expr`s is provided, then each one will be exported as a separate set of labels with a default name
|
|
302
|
+
such as `classifications`, `classifications_1`, etc. If a dictionary of `Expr`s is provided, then each entry will
|
|
303
|
+
be exported as a set of labels with the specified name.
|
|
304
|
+
|
|
305
|
+
__Requirements:__
|
|
306
|
+
|
|
307
|
+
- `pip install fiftyone`
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
tbl: The table from which to export data.
|
|
311
|
+
images: A column or expression that contains the images to export.
|
|
312
|
+
image_format: The format to use when writing out images for export.
|
|
313
|
+
classifications: Optional image classification labels. If a single `Expr` is provided, it must be a table
|
|
314
|
+
column or an expression that evaluates to a list of dictionaries. Each dictionary in the list corresponds
|
|
315
|
+
to an image class and must have the following structure:
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
{'label': 'zebra', 'confidence': 0.325}
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
If multiple `Expr`s are provided, each one must evaluate to a list of such dictionaries.
|
|
322
|
+
detections: Optional image detection labels. If a single `Expr` is provided, it must be a table column or an
|
|
323
|
+
expression that evaluates to a list of dictionaries. Each dictionary in the list corresponds to an image
|
|
324
|
+
detection, and must have the following structure:
|
|
325
|
+
|
|
326
|
+
```python
|
|
327
|
+
{
|
|
328
|
+
'label': 'giraffe',
|
|
329
|
+
'confidence': 0.99,
|
|
330
|
+
'bounding_box': [0.081, 0.836, 0.202, 0.136] # [x, y, w, h], fractional coordinates
|
|
331
|
+
}
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
If multiple `Expr`s are provided, each one must evaluate to a list of such dictionaries.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
A Voxel51 dataset.
|
|
338
|
+
|
|
339
|
+
Example:
|
|
340
|
+
Export the images in the `image` column of the table `tbl` as a Voxel51 dataset, using classification
|
|
341
|
+
labels from `tbl.classifications`:
|
|
342
|
+
|
|
343
|
+
>>> export_as_fiftyone(
|
|
344
|
+
... tbl,
|
|
345
|
+
... tbl.image,
|
|
346
|
+
... classifications=tbl.classifications
|
|
347
|
+
... )
|
|
348
|
+
|
|
349
|
+
See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial
|
|
350
|
+
for a fully worked example.
|
|
351
|
+
"""
|
|
352
|
+
Env.get().require_package('fiftyone')
|
|
353
|
+
|
|
354
|
+
import fiftyone as fo
|
|
355
|
+
|
|
356
|
+
from pixeltable.io.fiftyone import PxtImageDatasetImporter
|
|
357
|
+
|
|
358
|
+
if not images.col_type.is_image_type():
|
|
359
|
+
raise excs.Error(f'`images` must be an expression of type Image (got {images.col_type._to_base_str()})')
|
|
360
|
+
|
|
361
|
+
return fo.Dataset.from_importer(PxtImageDatasetImporter(
|
|
362
|
+
tbl, images, image_format, classifications=classifications, detections=detections
|
|
363
|
+
))
|
pixeltable/iterators/base.py
CHANGED
pixeltable/iterators/document.py
CHANGED
|
@@ -152,7 +152,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
152
152
|
assert self._doc_handle.pdf_doc is not None
|
|
153
153
|
self._sections = self._pdf_sections()
|
|
154
154
|
else:
|
|
155
|
-
assert False, f'
|
|
155
|
+
assert False, f'Unsupported document format: {self._doc_handle.format}'
|
|
156
156
|
|
|
157
157
|
if Separator.SENTENCE in self._separators:
|
|
158
158
|
self._sections = self._sentence_sections(self._sections)
|
pixeltable/iterators/video.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import math
|
|
3
|
+
from fractions import Fraction
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Any, Optional, Sequence
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
+
import av # type: ignore[import-untyped]
|
|
8
|
+
import pandas as pd
|
|
7
9
|
import PIL.Image
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.type_system as ts
|
|
11
13
|
|
|
12
14
|
from .base import ComponentIterator
|
|
13
15
|
|
|
@@ -30,108 +32,163 @@ class FrameIterator(ComponentIterator):
|
|
|
30
32
|
`num_frames` is greater than the number of frames in the video, all frames will be extracted.
|
|
31
33
|
"""
|
|
32
34
|
|
|
35
|
+
# Input parameters
|
|
33
36
|
video_path: Path
|
|
34
|
-
video_reader: cv2.VideoCapture
|
|
35
37
|
fps: Optional[float]
|
|
36
38
|
num_frames: Optional[int]
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
|
|
40
|
+
# Video info
|
|
41
|
+
container: av.container.input.InputContainer
|
|
42
|
+
video_framerate: Fraction
|
|
43
|
+
video_time_base: Fraction
|
|
44
|
+
video_frame_count: int
|
|
45
|
+
video_start_time: int
|
|
46
|
+
|
|
47
|
+
# List of frame indices to be extracted, or None to extract all frames
|
|
48
|
+
frames_to_extract: Optional[list[int]]
|
|
49
|
+
|
|
50
|
+
# Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
|
|
51
|
+
# frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
|
|
52
|
+
next_pos: int
|
|
40
53
|
|
|
41
54
|
def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
|
|
42
55
|
if fps is not None and num_frames is not None:
|
|
43
|
-
raise Error('At most one of `fps` or `num_frames` may be specified')
|
|
56
|
+
raise excs.Error('At most one of `fps` or `num_frames` may be specified')
|
|
44
57
|
|
|
45
58
|
video_path = Path(video)
|
|
46
59
|
assert video_path.exists() and video_path.is_file()
|
|
47
60
|
self.video_path = video_path
|
|
48
|
-
self.
|
|
61
|
+
self.container = av.open(str(video_path))
|
|
49
62
|
self.fps = fps
|
|
50
63
|
self.num_frames = num_frames
|
|
51
|
-
if not self.video_reader.isOpened():
|
|
52
|
-
raise Error(f'Failed to open video: {video}')
|
|
53
64
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
65
|
+
self.video_framerate = self.container.streams.video[0].average_rate
|
|
66
|
+
self.video_time_base = self.container.streams.video[0].time_base
|
|
67
|
+
self.video_start_time = self.container.streams.video[0].start_time or 0
|
|
68
|
+
|
|
69
|
+
# Determine the number of frames in the video
|
|
70
|
+
self.video_frame_count = self.container.streams.video[0].frames
|
|
71
|
+
if self.video_frame_count == 0:
|
|
72
|
+
# The video codec does not provide a frame count in the standard `frames` field. Try some other methods.
|
|
73
|
+
metadata: dict = self.container.streams.video[0].metadata
|
|
74
|
+
if 'NUMBER_OF_FRAMES' in metadata:
|
|
75
|
+
self.video_frame_count = int(metadata['NUMBER_OF_FRAMES'])
|
|
76
|
+
elif 'DURATION' in metadata:
|
|
77
|
+
# As a last resort, calculate the frame count from the stream duration.
|
|
78
|
+
duration = metadata['DURATION']
|
|
79
|
+
assert isinstance(duration, str)
|
|
80
|
+
seconds = pd.to_timedelta(duration).total_seconds()
|
|
81
|
+
# Usually the duration and framerate are precise enough for this calculation to be accurate, but if
|
|
82
|
+
# we encounter a case where it's off by one due to a rounding error, that's ok; we only use this
|
|
83
|
+
# to determine the positions of the sampled frames when `fps` or `num_frames` is specified.
|
|
84
|
+
self.video_frame_count = round(seconds * self.video_framerate)
|
|
85
|
+
else:
|
|
86
|
+
raise excs.Error(f'Video {video}: failed to get number of frames')
|
|
60
87
|
|
|
61
88
|
if num_frames is not None:
|
|
62
89
|
# specific number of frames
|
|
63
|
-
if num_frames >
|
|
90
|
+
if num_frames > self.video_frame_count:
|
|
64
91
|
# Extract all frames
|
|
65
|
-
self.frames_to_extract =
|
|
92
|
+
self.frames_to_extract = None
|
|
66
93
|
else:
|
|
67
|
-
spacing = float(
|
|
94
|
+
spacing = float(self.video_frame_count) / float(num_frames)
|
|
68
95
|
self.frames_to_extract = list(round(i * spacing) for i in range(num_frames))
|
|
69
96
|
assert len(self.frames_to_extract) == num_frames
|
|
70
97
|
else:
|
|
71
98
|
if fps is None or fps == 0.0:
|
|
72
99
|
# Extract all frames
|
|
73
|
-
self.frames_to_extract =
|
|
100
|
+
self.frames_to_extract = None
|
|
101
|
+
elif fps > float(self.video_framerate):
|
|
102
|
+
raise excs.Error(
|
|
103
|
+
f'Video {video}: requested fps ({fps}) exceeds that of the video ({float(self.video_framerate)})'
|
|
104
|
+
)
|
|
74
105
|
else:
|
|
75
106
|
# Extract frames at the implied frequency
|
|
76
|
-
freq = fps /
|
|
77
|
-
n = math.ceil(
|
|
107
|
+
freq = fps / float(self.video_framerate)
|
|
108
|
+
n = math.ceil(self.video_frame_count * freq) # number of frames to extract
|
|
78
109
|
self.frames_to_extract = list(round(i / freq) for i in range(n))
|
|
79
110
|
|
|
80
|
-
# We need the list of frames as both a list (for set_pos) and a set (for fast lookups when
|
|
81
|
-
# there are lots of frames)
|
|
82
|
-
self.frames_set = set(self.frames_to_extract)
|
|
83
111
|
_logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
|
|
84
|
-
self.
|
|
112
|
+
self.next_pos = 0
|
|
85
113
|
|
|
86
114
|
@classmethod
|
|
87
|
-
def input_schema(cls) -> dict[str, ColumnType]:
|
|
115
|
+
def input_schema(cls) -> dict[str, ts.ColumnType]:
|
|
88
116
|
return {
|
|
89
|
-
'video': VideoType(nullable=False),
|
|
90
|
-
'fps': FloatType(nullable=True),
|
|
91
|
-
'num_frames': IntType(nullable=True),
|
|
117
|
+
'video': ts.VideoType(nullable=False),
|
|
118
|
+
'fps': ts.FloatType(nullable=True),
|
|
119
|
+
'num_frames': ts.IntType(nullable=True),
|
|
92
120
|
}
|
|
93
121
|
|
|
94
122
|
@classmethod
|
|
95
|
-
def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ColumnType], list[str]]:
|
|
123
|
+
def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
|
|
96
124
|
return {
|
|
97
|
-
'frame_idx': IntType(),
|
|
98
|
-
'pos_msec': FloatType(),
|
|
99
|
-
'pos_frame':
|
|
100
|
-
'frame': ImageType(),
|
|
125
|
+
'frame_idx': ts.IntType(),
|
|
126
|
+
'pos_msec': ts.FloatType(),
|
|
127
|
+
'pos_frame': ts.IntType(),
|
|
128
|
+
'frame': ts.ImageType(),
|
|
101
129
|
}, ['frame']
|
|
102
130
|
|
|
103
131
|
def __next__(self) -> dict[str, Any]:
|
|
104
|
-
#
|
|
105
|
-
#
|
|
132
|
+
# Determine the frame index in the video corresponding to the iterator index `next_pos`;
|
|
133
|
+
# the frame at this index is the one we want to extract next
|
|
134
|
+
if self.frames_to_extract is None:
|
|
135
|
+
next_video_idx = self.next_pos # we're extracting all frames
|
|
136
|
+
elif self.next_pos >= len(self.frames_to_extract):
|
|
137
|
+
raise StopIteration
|
|
138
|
+
else:
|
|
139
|
+
next_video_idx = self.frames_to_extract[self.next_pos]
|
|
140
|
+
|
|
141
|
+
# We are searching for the frame at the index implied by `next_pos`. Step through the video until we
|
|
142
|
+
# find it. There are two reasons why it might not be the immediate next frame in the video:
|
|
143
|
+
# (1) `fps` or `num_frames` was specified as an iterator argument; or
|
|
144
|
+
# (2) we just did a seek, and the desired frame is not a keyframe.
|
|
145
|
+
# TODO: In case (1) it will usually be fastest to step through the frames until we find the one we're
|
|
146
|
+
# looking for. But in some cases it may be faster to do a seek; for example, when `fps` is very
|
|
147
|
+
# low and there are multiple keyframes in between each frame we want to extract (imagine extracting
|
|
148
|
+
# 10 frames from an hourlong video).
|
|
106
149
|
while True:
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
if not status:
|
|
111
|
-
_logger.debug(f'releasing video reader for {self.video_path}')
|
|
112
|
-
self.video_reader.release()
|
|
113
|
-
self.video_reader = None
|
|
150
|
+
try:
|
|
151
|
+
frame = next(self.container.decode(video=0))
|
|
152
|
+
except EOFError:
|
|
114
153
|
raise StopIteration
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
154
|
+
# Compute the index of the current frame in the video based on the presentation timestamp (pts);
|
|
155
|
+
# this ensures we have a canonical understanding of frame index, regardless of how we got here
|
|
156
|
+
# (seek or iteration)
|
|
157
|
+
pts = frame.pts - self.video_start_time
|
|
158
|
+
video_idx = round(pts * self.video_time_base * self.video_framerate)
|
|
159
|
+
assert isinstance(video_idx, int)
|
|
160
|
+
if video_idx < next_video_idx:
|
|
161
|
+
# We haven't reached the desired frame yet
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Sanity check that we're at the right frame.
|
|
165
|
+
if video_idx != next_video_idx:
|
|
166
|
+
raise excs.Error(f'Frame {next_video_idx} is missing from the video (video file is corrupt)')
|
|
167
|
+
img = frame.to_image()
|
|
168
|
+
assert isinstance(img, PIL.Image.Image)
|
|
169
|
+
pos_msec = float(pts * self.video_time_base * 1000)
|
|
170
|
+
result = {
|
|
171
|
+
'frame_idx': self.next_pos,
|
|
172
|
+
'pos_msec': pos_msec,
|
|
173
|
+
'pos_frame': video_idx,
|
|
174
|
+
'frame': img,
|
|
175
|
+
}
|
|
176
|
+
self.next_pos += 1
|
|
177
|
+
return result
|
|
125
178
|
|
|
126
179
|
def close(self) -> None:
|
|
127
|
-
|
|
128
|
-
self.video_reader.release()
|
|
129
|
-
self.video_reader = None
|
|
180
|
+
self.container.close()
|
|
130
181
|
|
|
131
182
|
def set_pos(self, pos: int) -> None:
|
|
132
183
|
"""Seek to frame idx"""
|
|
133
|
-
if pos == self.
|
|
134
|
-
return
|
|
135
|
-
|
|
136
|
-
self.
|
|
137
|
-
|
|
184
|
+
if pos == self.next_pos:
|
|
185
|
+
return # already there
|
|
186
|
+
|
|
187
|
+
video_idx = pos if self.frames_to_extract is None else self.frames_to_extract[pos]
|
|
188
|
+
_logger.debug(f'seeking to frame number {video_idx} (at iterator index {pos})')
|
|
189
|
+
# compute the frame position in time_base units
|
|
190
|
+
seek_pos = int(video_idx / self.video_framerate / self.video_time_base + self.video_start_time)
|
|
191
|
+
# This will seek to the nearest keyframe before the desired frame. If the frame being sought is not a keyframe,
|
|
192
|
+
# then the iterator will step forward to the desired frame on the subsequent call to next().
|
|
193
|
+
self.container.seek(seek_pos, backward=True, stream=self.container.streams.video[0])
|
|
194
|
+
self.next_pos = pos
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from .schema import SystemInfo, SystemInfoMd
|
|
11
11
|
|
|
12
12
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
13
|
-
VERSION =
|
|
13
|
+
VERSION = 22
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
import sqlalchemy as sql
|
|
3
|
+
|
|
4
|
+
from pixeltable.metadata import register_converter
|
|
5
|
+
from pixeltable.metadata.converters.util import convert_table_schema_version_md, convert_table_md
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@register_converter(version=21)
|
|
9
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
10
|
+
convert_table_schema_version_md(
|
|
11
|
+
engine,
|
|
12
|
+
table_schema_version_md_updater=__update_table_schema_version,
|
|
13
|
+
schema_column_updater=__update_schema_column
|
|
14
|
+
)
|
|
15
|
+
convert_table_md(
|
|
16
|
+
engine,
|
|
17
|
+
substitution_fn=__substitute_md
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __update_table_schema_version(table_schema_version_md: dict) -> None:
|
|
22
|
+
table_schema_version_md['media_validation'] = 'on_write' # MediaValidation.ON_WRITE
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def __update_schema_column(schema_column: dict) -> None:
|
|
26
|
+
schema_column['media_validation'] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
30
|
+
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
31
|
+
if 'perform_validation' not in v:
|
|
32
|
+
v['perform_validation'] = False
|
|
33
|
+
return k, v
|
|
34
|
+
return None
|