pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/column.py +41 -29
  5. pixeltable/catalog/globals.py +18 -0
  6. pixeltable/catalog/insertable_table.py +30 -10
  7. pixeltable/catalog/table.py +198 -86
  8. pixeltable/catalog/table_version.py +47 -53
  9. pixeltable/catalog/table_version_path.py +2 -2
  10. pixeltable/catalog/view.py +17 -18
  11. pixeltable/dataframe.py +27 -36
  12. pixeltable/env.py +7 -0
  13. pixeltable/exec/__init__.py +0 -1
  14. pixeltable/exec/aggregation_node.py +6 -3
  15. pixeltable/exec/cache_prefetch_node.py +189 -43
  16. pixeltable/exec/data_row_batch.py +5 -22
  17. pixeltable/exec/exec_context.py +2 -2
  18. pixeltable/exec/exec_node.py +3 -2
  19. pixeltable/exec/expr_eval_node.py +23 -16
  20. pixeltable/exec/in_memory_data_node.py +6 -3
  21. pixeltable/exec/sql_node.py +24 -25
  22. pixeltable/exprs/arithmetic_expr.py +12 -5
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +97 -14
  26. pixeltable/exprs/comparison.py +10 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +27 -18
  29. pixeltable/exprs/expr.py +53 -52
  30. pixeltable/exprs/expr_set.py +5 -0
  31. pixeltable/exprs/function_call.py +32 -16
  32. pixeltable/exprs/globals.py +4 -1
  33. pixeltable/exprs/in_predicate.py +8 -7
  34. pixeltable/exprs/inline_expr.py +4 -4
  35. pixeltable/exprs/is_null.py +4 -4
  36. pixeltable/exprs/json_mapper.py +11 -12
  37. pixeltable/exprs/json_path.py +6 -11
  38. pixeltable/exprs/literal.py +5 -5
  39. pixeltable/exprs/method_ref.py +5 -4
  40. pixeltable/exprs/object_ref.py +2 -1
  41. pixeltable/exprs/row_builder.py +88 -36
  42. pixeltable/exprs/rowid_ref.py +12 -11
  43. pixeltable/exprs/similarity_expr.py +12 -7
  44. pixeltable/exprs/sql_element_cache.py +7 -5
  45. pixeltable/exprs/type_cast.py +8 -6
  46. pixeltable/exprs/variable.py +5 -4
  47. pixeltable/func/aggregate_function.py +9 -9
  48. pixeltable/func/expr_template_function.py +6 -5
  49. pixeltable/func/function.py +11 -10
  50. pixeltable/func/udf.py +6 -11
  51. pixeltable/functions/__init__.py +2 -2
  52. pixeltable/functions/globals.py +5 -7
  53. pixeltable/functions/huggingface.py +155 -45
  54. pixeltable/functions/llama_cpp.py +107 -0
  55. pixeltable/functions/mistralai.py +1 -1
  56. pixeltable/functions/ollama.py +147 -0
  57. pixeltable/functions/openai.py +1 -1
  58. pixeltable/functions/replicate.py +72 -0
  59. pixeltable/functions/string.py +9 -0
  60. pixeltable/functions/together.py +1 -1
  61. pixeltable/functions/util.py +5 -2
  62. pixeltable/globals.py +67 -26
  63. pixeltable/index/btree.py +16 -3
  64. pixeltable/index/embedding_index.py +4 -4
  65. pixeltable/io/__init__.py +1 -2
  66. pixeltable/io/fiftyone.py +178 -0
  67. pixeltable/io/globals.py +96 -2
  68. pixeltable/iterators/base.py +3 -2
  69. pixeltable/iterators/document.py +1 -1
  70. pixeltable/iterators/video.py +120 -63
  71. pixeltable/metadata/__init__.py +1 -1
  72. pixeltable/metadata/converters/convert_21.py +34 -0
  73. pixeltable/metadata/converters/util.py +45 -4
  74. pixeltable/metadata/notes.py +1 -0
  75. pixeltable/metadata/schema.py +8 -0
  76. pixeltable/plan.py +17 -15
  77. pixeltable/py.typed +0 -0
  78. pixeltable/store.py +7 -2
  79. pixeltable/tool/create_test_db_dump.py +1 -1
  80. pixeltable/tool/create_test_video.py +1 -1
  81. pixeltable/tool/embed_udf.py +1 -1
  82. pixeltable/tool/mypy_plugin.py +28 -5
  83. pixeltable/type_system.py +100 -36
  84. pixeltable/utils/coco.py +5 -5
  85. pixeltable/utils/documents.py +15 -1
  86. pixeltable/utils/formatter.py +12 -13
  87. pixeltable/utils/s3.py +6 -3
  88. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
  89. pixeltable-0.2.23.dist-info/RECORD +153 -0
  90. pixeltable/exec/media_validation_node.py +0 -43
  91. pixeltable-0.2.21.dist-info/RECORD +0 -148
  92. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
  93. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
  94. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,178 @@
1
+ import os
2
+ from typing import Iterator, Optional, Union
3
+
4
+ import fiftyone as fo # type: ignore[import-untyped]
5
+ import fiftyone.utils.data as foud # type: ignore[import-untyped]
6
+ import PIL.Image
7
+ import puremagic
8
+
9
+ import pixeltable as pxt
10
+ import pixeltable.exceptions as excs
11
+ from pixeltable import exprs
12
+ from pixeltable.env import Env
13
+
14
+
15
+ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
16
+ """
17
+ Implementation of a FiftyOne `DatasetImporter` that reads image data from a Pixeltable table.
18
+ """
19
+ __image_format: str # format to use for any exported images that are not already stored on disk
20
+ __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
21
+ __image_idx: int # index of the image expr in the select list
22
+ __localpath_idx: Optional[int] # index of the image localpath in the select list, if present
23
+ __row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
24
+
25
+ def __init__(
26
+ self,
27
+ tbl: pxt.Table,
28
+ image: exprs.Expr,
29
+ image_format: str,
30
+ classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
31
+ detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
32
+ dataset_dir: Optional[os.PathLike] = None,
33
+ shuffle: bool = False,
34
+ seed: Union[int, float, str, bytes, bytearray, None] = None,
35
+ max_samples: Optional[int] = None,
36
+ ):
37
+ super().__init__(
38
+ dataset_dir=dataset_dir,
39
+ shuffle=shuffle,
40
+ seed=seed,
41
+ max_samples=max_samples
42
+ )
43
+
44
+ self.__image_format = image_format
45
+
46
+ label_categories = [
47
+ (classifications, fo.Classifications, 'classifications'),
48
+ (detections, fo.Detections, 'detections'),
49
+ ]
50
+
51
+ # Construct the labels. First add labels for all label types that have named dictionaries.
52
+ self.__labels = {}
53
+ for exprs_, label_cls, _ in label_categories:
54
+ if isinstance(exprs_, dict):
55
+ for label_name, expr in exprs_.items():
56
+ if not label_name.isidentifier():
57
+ raise excs.Error(f"Invalid label name: {label_name}")
58
+ if label_name in self.__labels:
59
+ raise excs.Error(f"Duplicate label name: {label_name}")
60
+ self.__labels[label_name] = (expr, label_cls)
61
+
62
+ # Now add the remaining labels, assigning unused default names.
63
+ for exprs_, label_cls, default_name in label_categories:
64
+ if exprs_ is None or isinstance(exprs_, dict):
65
+ continue
66
+ if isinstance(exprs_, exprs.Expr):
67
+ exprs_ = [exprs_]
68
+ assert isinstance(exprs_, list)
69
+ for expr in exprs_:
70
+ if default_name not in self.__labels:
71
+ name = default_name
72
+ else:
73
+ i = 1
74
+ while f'{default_name}_{i}' in self.__labels:
75
+ i += 1
76
+ name = f'{default_name}_{i}'
77
+ self.__labels[name] = (expr, label_cls)
78
+
79
+ # Build the select list:
80
+ # - Labels first, in the order they appear in self.__labels
81
+ # - Then the `image` expr
82
+ # - Then `image.localpath`, if `images` is a stored columnref
83
+
84
+ selection = [expr for expr, _ in self.__labels.values()]
85
+ self.__image_idx = len(selection)
86
+ selection.append(image)
87
+
88
+ if isinstance(image, exprs.ColumnRef) and image.col.is_stored:
89
+ # A stored image column; we can use the existing localpaths
90
+ self.__localpath_idx = len(selection)
91
+ selection.append(image.localpath)
92
+ else:
93
+ self.__localpath_idx = None
94
+
95
+ df = tbl.select(*selection)
96
+ self.__row_iter = df._output_row_iterator()
97
+
98
+ def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
99
+ row = next(self.__row_iter)
100
+ img = row[self.__image_idx]
101
+ assert isinstance(img, PIL.Image.Image)
102
+ if self.__localpath_idx is not None:
103
+ # Use the existing localpath of the stored image
104
+ file = row[self.__localpath_idx]
105
+ assert isinstance(file, str)
106
+ else:
107
+ # Write the dynamically created image to a temp file
108
+ file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
109
+ img.save(file, format=self.__image_format)
110
+
111
+ metadata = fo.ImageMetadata(
112
+ size_bytes=os.path.getsize(file),
113
+ mime_type=puremagic.from_file(file, mime=True),
114
+ width=img.width,
115
+ height=img.height,
116
+ filepath=file,
117
+ num_channels=len(img.getbands()),
118
+ )
119
+
120
+ labels: dict[str, fo.Label] = {}
121
+ for idx, (label_name, (_, label_cls)) in enumerate(self.__labels.items()):
122
+ label_data = row[idx]
123
+ if label_data is None:
124
+ continue
125
+
126
+ label: fo.Label
127
+ if label_cls is fo.Classifications:
128
+ label = fo.Classifications(classifications=self.__as_fo_classifications(label_data))
129
+ elif label_cls is fo.Detections:
130
+ label = fo.Detections(detections=self.__as_fo_detections(label_data))
131
+ else:
132
+ assert False
133
+ labels[label_name] = label
134
+
135
+ return file, metadata, labels
136
+
137
+ def __as_fo_classifications(self, data: list) -> list[fo.Classification]:
138
+ if not isinstance(data, list) or any('label' not in entry for entry in data):
139
+ raise excs.Error(
140
+ f'Invalid classifications data: {data}\n'
141
+ "(Expected a list of dicts, each containing a 'label' key)"
142
+ )
143
+ return [
144
+ fo.Classification(label=entry['label'], confidence=entry.get('confidence'))
145
+ for entry in data
146
+ ]
147
+
148
+ def __as_fo_detections(self, data: list) -> list[fo.Detections]:
149
+ if not isinstance(data, list) or any('label' not in entry or 'bounding_box' not in entry for entry in data):
150
+ raise excs.Error(
151
+ f'Invalid detections data: {data}\n'
152
+ "(Expected a list of dicts, each containing a 'label' and 'bounding_box' key)"
153
+ )
154
+ return [
155
+ fo.Detection(label=entry['label'], bounding_box=entry['bounding_box'], confidence=entry.get('confidence'))
156
+ for entry in data
157
+ ]
158
+
159
+ @property
160
+ def has_dataset_info(self) -> bool:
161
+ return False
162
+
163
+ @property
164
+ def has_image_metadata(self) -> bool:
165
+ return True
166
+
167
+ @property
168
+ def label_cls(self) -> dict[str, type]:
169
+ return {label_name: label_cls for label_name, (_, label_cls) in self.__labels.items()}
170
+
171
+ def setup(self) -> None:
172
+ pass
173
+
174
+ def get_dataset_info(self) -> dict:
175
+ pass
176
+
177
+ def close(self, *args) -> None:
178
+ pass
pixeltable/io/globals.py CHANGED
@@ -1,10 +1,14 @@
1
- from typing import Any, Literal, Optional, Union
1
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
2
2
 
3
3
  import pixeltable as pxt
4
4
  import pixeltable.exceptions as excs
5
- from pixeltable import Table
5
+ from pixeltable import Table, exprs
6
+ from pixeltable.env import Env
6
7
  from pixeltable.io.external_store import SyncStatus
7
8
 
9
+ if TYPE_CHECKING:
10
+ import fiftyone as fo # type: ignore[import-untyped]
11
+
8
12
 
9
13
  def create_label_studio_project(
10
14
  t: Table,
@@ -116,6 +120,8 @@ def create_label_studio_project(
116
120
  s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
117
121
  )
118
122
  """
123
+ Env.get().require_package('label_studio_sdk')
124
+
119
125
  from pixeltable.io.label_studio import LabelStudioProject
120
126
 
121
127
  ls_project = LabelStudioProject.create(
@@ -267,3 +273,91 @@ def import_json(
267
273
  contents = urllib.request.urlopen(filepath_or_url).read()
268
274
  data = json.loads(contents, **kwargs)
269
275
  return import_rows(tbl_path, data, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
276
+
277
+
278
+ def export_images_as_fo_dataset(
279
+ tbl: pxt.Table,
280
+ images: exprs.Expr,
281
+ image_format: str = 'webp',
282
+ classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
283
+ detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
284
+ ) -> 'fo.Dataset':
285
+ """
286
+ Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
287
+ (or expression) containing image data, along with optional additional columns containing labels. Currently, only
288
+ classification and detection labels are supported.
289
+
290
+ The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial contains a
291
+ fully worked example showing how to export data from a Pixeltable table and load it into Voxel51.
292
+
293
+ Images in the dataset that already exist on disk will be exported directly, in whatever format they
294
+ are stored in. Images that are not already on disk (such as frames extracted using a
295
+ [`FrameIterator`][pixeltable.iterators.FrameIterator]) will first be written to disk in the specified
296
+ `image_format`.
297
+
298
+ The label parameters accept one or more sets of labels of each type. If a single `Expr` is provided, then it will
299
+ be exported as a single set of labels with a default name such as `classifications`.
300
+ (The single set of labels may still containing multiple individual labels; see below.)
301
+ If a list of `Expr`s is provided, then each one will be exported as a separate set of labels with a default name
302
+ such as `classifications`, `classifications_1`, etc. If a dictionary of `Expr`s is provided, then each entry will
303
+ be exported as a set of labels with the specified name.
304
+
305
+ __Requirements:__
306
+
307
+ - `pip install fiftyone`
308
+
309
+ Args:
310
+ tbl: The table from which to export data.
311
+ images: A column or expression that contains the images to export.
312
+ image_format: The format to use when writing out images for export.
313
+ classifications: Optional image classification labels. If a single `Expr` is provided, it must be a table
314
+ column or an expression that evaluates to a list of dictionaries. Each dictionary in the list corresponds
315
+ to an image class and must have the following structure:
316
+
317
+ ```python
318
+ {'label': 'zebra', 'confidence': 0.325}
319
+ ```
320
+
321
+ If multiple `Expr`s are provided, each one must evaluate to a list of such dictionaries.
322
+ detections: Optional image detection labels. If a single `Expr` is provided, it must be a table column or an
323
+ expression that evaluates to a list of dictionaries. Each dictionary in the list corresponds to an image
324
+ detection, and must have the following structure:
325
+
326
+ ```python
327
+ {
328
+ 'label': 'giraffe',
329
+ 'confidence': 0.99,
330
+ 'bounding_box': [0.081, 0.836, 0.202, 0.136] # [x, y, w, h], fractional coordinates
331
+ }
332
+ ```
333
+
334
+ If multiple `Expr`s are provided, each one must evaluate to a list of such dictionaries.
335
+
336
+ Returns:
337
+ A Voxel51 dataset.
338
+
339
+ Example:
340
+ Export the images in the `image` column of the table `tbl` as a Voxel51 dataset, using classification
341
+ labels from `tbl.classifications`:
342
+
343
+ >>> export_as_fiftyone(
344
+ ... tbl,
345
+ ... tbl.image,
346
+ ... classifications=tbl.classifications
347
+ ... )
348
+
349
+ See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial
350
+ for a fully worked example.
351
+ """
352
+ Env.get().require_package('fiftyone')
353
+
354
+ import fiftyone as fo
355
+
356
+ from pixeltable.io.fiftyone import PxtImageDatasetImporter
357
+
358
+ if not images.col_type.is_image_type():
359
+ raise excs.Error(f'`images` must be an expression of type Image (got {images.col_type._to_base_str()})')
360
+
361
+ return fo.Dataset.from_importer(PxtImageDatasetImporter(
362
+ tbl, images, image_format, classifications=classifications, detections=detections
363
+ ))
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
- from typing import Dict, Any, Tuple, List
3
- from abc import abstractmethod, ABC
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
4
5
 
5
6
  from pixeltable.type_system import ColumnType
6
7
 
@@ -152,7 +152,7 @@ class DocumentSplitter(ComponentIterator):
152
152
  assert self._doc_handle.pdf_doc is not None
153
153
  self._sections = self._pdf_sections()
154
154
  else:
155
- assert False, f'unknown document format: {self._doc_handle.format}'
155
+ assert False, f'Unsupported document format: {self._doc_handle.format}'
156
156
 
157
157
  if Separator.SENTENCE in self._separators:
158
158
  self._sections = self._sentence_sections(self._sections)
@@ -1,13 +1,15 @@
1
1
  import logging
2
2
  import math
3
+ from fractions import Fraction
3
4
  from pathlib import Path
4
5
  from typing import Any, Optional, Sequence
5
6
 
6
- import cv2
7
+ import av # type: ignore[import-untyped]
8
+ import pandas as pd
7
9
  import PIL.Image
8
10
 
9
- from pixeltable.exceptions import Error
10
- from pixeltable.type_system import ColumnType, FloatType, ImageType, IntType, VideoType
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.type_system as ts
11
13
 
12
14
  from .base import ComponentIterator
13
15
 
@@ -30,108 +32,163 @@ class FrameIterator(ComponentIterator):
30
32
  `num_frames` is greater than the number of frames in the video, all frames will be extracted.
31
33
  """
32
34
 
35
+ # Input parameters
33
36
  video_path: Path
34
- video_reader: cv2.VideoCapture
35
37
  fps: Optional[float]
36
38
  num_frames: Optional[int]
37
- frames_to_extract: Sequence[int]
38
- frames_set: set[int]
39
- next_frame_idx: int
39
+
40
+ # Video info
41
+ container: av.container.input.InputContainer
42
+ video_framerate: Fraction
43
+ video_time_base: Fraction
44
+ video_frame_count: int
45
+ video_start_time: int
46
+
47
+ # List of frame indices to be extracted, or None to extract all frames
48
+ frames_to_extract: Optional[list[int]]
49
+
50
+ # Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
51
+ # frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
52
+ next_pos: int
40
53
 
41
54
  def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
42
55
  if fps is not None and num_frames is not None:
43
- raise Error('At most one of `fps` or `num_frames` may be specified')
56
+ raise excs.Error('At most one of `fps` or `num_frames` may be specified')
44
57
 
45
58
  video_path = Path(video)
46
59
  assert video_path.exists() and video_path.is_file()
47
60
  self.video_path = video_path
48
- self.video_reader = cv2.VideoCapture(str(video_path))
61
+ self.container = av.open(str(video_path))
49
62
  self.fps = fps
50
63
  self.num_frames = num_frames
51
- if not self.video_reader.isOpened():
52
- raise Error(f'Failed to open video: {video}')
53
64
 
54
- video_fps = int(self.video_reader.get(cv2.CAP_PROP_FPS))
55
- if fps is not None and fps > video_fps:
56
- raise Error(f'Video {video}: requested fps ({fps}) exceeds that of the video ({video_fps})')
57
- num_video_frames = int(self.video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
58
- if num_video_frames == 0:
59
- raise Error(f'Video {video}: failed to get number of frames')
65
+ self.video_framerate = self.container.streams.video[0].average_rate
66
+ self.video_time_base = self.container.streams.video[0].time_base
67
+ self.video_start_time = self.container.streams.video[0].start_time or 0
68
+
69
+ # Determine the number of frames in the video
70
+ self.video_frame_count = self.container.streams.video[0].frames
71
+ if self.video_frame_count == 0:
72
+ # The video codec does not provide a frame count in the standard `frames` field. Try some other methods.
73
+ metadata: dict = self.container.streams.video[0].metadata
74
+ if 'NUMBER_OF_FRAMES' in metadata:
75
+ self.video_frame_count = int(metadata['NUMBER_OF_FRAMES'])
76
+ elif 'DURATION' in metadata:
77
+ # As a last resort, calculate the frame count from the stream duration.
78
+ duration = metadata['DURATION']
79
+ assert isinstance(duration, str)
80
+ seconds = pd.to_timedelta(duration).total_seconds()
81
+ # Usually the duration and framerate are precise enough for this calculation to be accurate, but if
82
+ # we encounter a case where it's off by one due to a rounding error, that's ok; we only use this
83
+ # to determine the positions of the sampled frames when `fps` or `num_frames` is specified.
84
+ self.video_frame_count = round(seconds * self.video_framerate)
85
+ else:
86
+ raise excs.Error(f'Video {video}: failed to get number of frames')
60
87
 
61
88
  if num_frames is not None:
62
89
  # specific number of frames
63
- if num_frames > num_video_frames:
90
+ if num_frames > self.video_frame_count:
64
91
  # Extract all frames
65
- self.frames_to_extract = range(num_video_frames)
92
+ self.frames_to_extract = None
66
93
  else:
67
- spacing = float(num_video_frames) / float(num_frames)
94
+ spacing = float(self.video_frame_count) / float(num_frames)
68
95
  self.frames_to_extract = list(round(i * spacing) for i in range(num_frames))
69
96
  assert len(self.frames_to_extract) == num_frames
70
97
  else:
71
98
  if fps is None or fps == 0.0:
72
99
  # Extract all frames
73
- self.frames_to_extract = range(num_video_frames)
100
+ self.frames_to_extract = None
101
+ elif fps > float(self.video_framerate):
102
+ raise excs.Error(
103
+ f'Video {video}: requested fps ({fps}) exceeds that of the video ({float(self.video_framerate)})'
104
+ )
74
105
  else:
75
106
  # Extract frames at the implied frequency
76
- freq = fps / video_fps
77
- n = math.ceil(num_video_frames * freq) # number of frames to extract
107
+ freq = fps / float(self.video_framerate)
108
+ n = math.ceil(self.video_frame_count * freq) # number of frames to extract
78
109
  self.frames_to_extract = list(round(i / freq) for i in range(n))
79
110
 
80
- # We need the list of frames as both a list (for set_pos) and a set (for fast lookups when
81
- # there are lots of frames)
82
- self.frames_set = set(self.frames_to_extract)
83
111
  _logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
84
- self.next_frame_idx = 0
112
+ self.next_pos = 0
85
113
 
86
114
  @classmethod
87
- def input_schema(cls) -> dict[str, ColumnType]:
115
+ def input_schema(cls) -> dict[str, ts.ColumnType]:
88
116
  return {
89
- 'video': VideoType(nullable=False),
90
- 'fps': FloatType(nullable=True),
91
- 'num_frames': IntType(nullable=True),
117
+ 'video': ts.VideoType(nullable=False),
118
+ 'fps': ts.FloatType(nullable=True),
119
+ 'num_frames': ts.IntType(nullable=True),
92
120
  }
93
121
 
94
122
  @classmethod
95
- def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ColumnType], list[str]]:
123
+ def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ts.ColumnType], list[str]]:
96
124
  return {
97
- 'frame_idx': IntType(),
98
- 'pos_msec': FloatType(),
99
- 'pos_frame': FloatType(),
100
- 'frame': ImageType(),
125
+ 'frame_idx': ts.IntType(),
126
+ 'pos_msec': ts.FloatType(),
127
+ 'pos_frame': ts.IntType(),
128
+ 'frame': ts.ImageType(),
101
129
  }, ['frame']
102
130
 
103
131
  def __next__(self) -> dict[str, Any]:
104
- # jumping to the target frame here with video_reader.set() is far slower than just
105
- # skipping the unwanted frames
132
+ # Determine the frame index in the video corresponding to the iterator index `next_pos`;
133
+ # the frame at this index is the one we want to extract next
134
+ if self.frames_to_extract is None:
135
+ next_video_idx = self.next_pos # we're extracting all frames
136
+ elif self.next_pos >= len(self.frames_to_extract):
137
+ raise StopIteration
138
+ else:
139
+ next_video_idx = self.frames_to_extract[self.next_pos]
140
+
141
+ # We are searching for the frame at the index implied by `next_pos`. Step through the video until we
142
+ # find it. There are two reasons why it might not be the immediate next frame in the video:
143
+ # (1) `fps` or `num_frames` was specified as an iterator argument; or
144
+ # (2) we just did a seek, and the desired frame is not a keyframe.
145
+ # TODO: In case (1) it will usually be fastest to step through the frames until we find the one we're
146
+ # looking for. But in some cases it may be faster to do a seek; for example, when `fps` is very
147
+ # low and there are multiple keyframes in between each frame we want to extract (imagine extracting
148
+ # 10 frames from an hourlong video).
106
149
  while True:
107
- pos_msec = self.video_reader.get(cv2.CAP_PROP_POS_MSEC)
108
- pos_frame = self.video_reader.get(cv2.CAP_PROP_POS_FRAMES)
109
- status, img = self.video_reader.read()
110
- if not status:
111
- _logger.debug(f'releasing video reader for {self.video_path}')
112
- self.video_reader.release()
113
- self.video_reader = None
150
+ try:
151
+ frame = next(self.container.decode(video=0))
152
+ except EOFError:
114
153
  raise StopIteration
115
- if pos_frame in self.frames_set:
116
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
117
- result = {
118
- 'frame_idx': self.next_frame_idx,
119
- 'pos_msec': pos_msec,
120
- 'pos_frame': pos_frame,
121
- 'frame': PIL.Image.fromarray(img),
122
- }
123
- self.next_frame_idx += 1
124
- return result
154
+ # Compute the index of the current frame in the video based on the presentation timestamp (pts);
155
+ # this ensures we have a canonical understanding of frame index, regardless of how we got here
156
+ # (seek or iteration)
157
+ pts = frame.pts - self.video_start_time
158
+ video_idx = round(pts * self.video_time_base * self.video_framerate)
159
+ assert isinstance(video_idx, int)
160
+ if video_idx < next_video_idx:
161
+ # We haven't reached the desired frame yet
162
+ continue
163
+
164
+ # Sanity check that we're at the right frame.
165
+ if video_idx != next_video_idx:
166
+ raise excs.Error(f'Frame {next_video_idx} is missing from the video (video file is corrupt)')
167
+ img = frame.to_image()
168
+ assert isinstance(img, PIL.Image.Image)
169
+ pos_msec = float(pts * self.video_time_base * 1000)
170
+ result = {
171
+ 'frame_idx': self.next_pos,
172
+ 'pos_msec': pos_msec,
173
+ 'pos_frame': video_idx,
174
+ 'frame': img,
175
+ }
176
+ self.next_pos += 1
177
+ return result
125
178
 
126
179
  def close(self) -> None:
127
- if self.video_reader is not None:
128
- self.video_reader.release()
129
- self.video_reader = None
180
+ self.container.close()
130
181
 
131
182
  def set_pos(self, pos: int) -> None:
132
183
  """Seek to frame idx"""
133
- if pos == self.next_frame_idx:
134
- return
135
- _logger.debug(f'seeking to frame {pos}')
136
- self.video_reader.set(cv2.CAP_PROP_POS_FRAMES, self.frames_to_extract[pos])
137
- self.next_frame_idx = pos
184
+ if pos == self.next_pos:
185
+ return # already there
186
+
187
+ video_idx = pos if self.frames_to_extract is None else self.frames_to_extract[pos]
188
+ _logger.debug(f'seeking to frame number {video_idx} (at iterator index {pos})')
189
+ # compute the frame position in time_base units
190
+ seek_pos = int(video_idx / self.video_framerate / self.video_time_base + self.video_start_time)
191
+ # This will seek to the nearest keyframe before the desired frame. If the frame being sought is not a keyframe,
192
+ # then the iterator will step forward to the desired frame on the subsequent call to next().
193
+ self.container.seek(seek_pos, backward=True, stream=self.container.streams.video[0])
194
+ self.next_pos = pos
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from .schema import SystemInfo, SystemInfoMd
11
11
 
12
12
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 21
13
+ VERSION = 22
14
14
 
15
15
 
16
16
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -0,0 +1,34 @@
1
+ from typing import Any, Optional
2
+ import sqlalchemy as sql
3
+
4
+ from pixeltable.metadata import register_converter
5
+ from pixeltable.metadata.converters.util import convert_table_schema_version_md, convert_table_md
6
+
7
+
8
+ @register_converter(version=21)
9
+ def _(engine: sql.engine.Engine) -> None:
10
+ convert_table_schema_version_md(
11
+ engine,
12
+ table_schema_version_md_updater=__update_table_schema_version,
13
+ schema_column_updater=__update_schema_column
14
+ )
15
+ convert_table_md(
16
+ engine,
17
+ substitution_fn=__substitute_md
18
+ )
19
+
20
+
21
+ def __update_table_schema_version(table_schema_version_md: dict) -> None:
22
+ table_schema_version_md['media_validation'] = 'on_write' # MediaValidation.ON_WRITE
23
+
24
+
25
+ def __update_schema_column(schema_column: dict) -> None:
26
+ schema_column['media_validation'] = None
27
+
28
+
29
+ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
30
+ if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
31
+ if 'perform_validation' not in v:
32
+ v['perform_validation'] = False
33
+ return k, v
34
+ return None