pixeltable 0.2.14__py3-none-any.whl → 0.2.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +3 -3
- pixeltable/catalog/table.py +3 -5
- pixeltable/env.py +4 -4
- pixeltable/exprs/arithmetic_expr.py +41 -16
- pixeltable/exprs/expr.py +37 -0
- pixeltable/exprs/function_call.py +5 -1
- pixeltable/exprs/globals.py +3 -1
- pixeltable/exprs/inline_array.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/json.py +46 -0
- pixeltable/functions/{eval.py → vision.py} +170 -27
- pixeltable/io/external_store.py +2 -2
- pixeltable/io/globals.py +1 -1
- pixeltable/io/pandas.py +34 -8
- pixeltable/iterators/video.py +55 -23
- pixeltable/store.py +65 -28
- pixeltable/tool/create_test_db_dump.py +5 -5
- pixeltable/type_system.py +73 -53
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.15.dist-info}/METADATA +2 -2
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.15.dist-info}/RECORD +24 -23
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.15.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.15.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.15.dist-info}/entry_points.txt +0 -0
pixeltable/io/globals.py
CHANGED
|
@@ -191,7 +191,7 @@ def import_rows(
|
|
|
191
191
|
if col_name not in schema:
|
|
192
192
|
schema[col_name] = col_type
|
|
193
193
|
else:
|
|
194
|
-
supertype =
|
|
194
|
+
supertype = schema[col_name].supertype(col_type)
|
|
195
195
|
if supertype is None:
|
|
196
196
|
raise excs.Error(
|
|
197
197
|
f'Could not infer type of column `{col_name}`; the value in row {n} does not match preceding type {schema[col_name]}: {value!r}\n'
|
pixeltable/io/pandas.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
from typing import Any, Optional, Union
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
4
5
|
import pandas as pd
|
|
6
|
+
import PIL.Image
|
|
5
7
|
|
|
6
8
|
import pixeltable as pxt
|
|
7
9
|
import pixeltable.exceptions as excs
|
|
@@ -103,6 +105,17 @@ def __df_to_pxt_schema(
|
|
|
103
105
|
if pd_name in schema_overrides:
|
|
104
106
|
pxt_type = schema_overrides[pd_name]
|
|
105
107
|
else:
|
|
108
|
+
# This complicated-looking condition is necessary because we cannot safely call `pd.isna()` on
|
|
109
|
+
# general objects, so we need to check for nulls in the specific cases where we might expect them.
|
|
110
|
+
# isinstance(val, float) will check for NaN values in float columns *as well as* floats appearing
|
|
111
|
+
# in object columns (where Pandas uses NaN as a general null).
|
|
112
|
+
# np.issubdtype(pd_dtype, np.datetime64) checks for NaT values specifically in datetime columns.
|
|
113
|
+
has_na = any(
|
|
114
|
+
(isinstance(val, float) or np.issubdtype(pd_dtype, np.datetime64)) and pd.isna(val)
|
|
115
|
+
for val in df[pd_name]
|
|
116
|
+
)
|
|
117
|
+
if has_na and pd_name in primary_key:
|
|
118
|
+
raise excs.Error(f'Primary key column `{pd_name}` cannot contain null values.')
|
|
106
119
|
pxt_type = __np_dtype_to_pxt_type(pd_dtype, df[pd_name], pd_name not in primary_key)
|
|
107
120
|
pxt_name = __normalize_pxt_col_name(pd_name)
|
|
108
121
|
# Ensure that column names are unique by appending a distinguishing suffix
|
|
@@ -140,21 +153,34 @@ def __np_dtype_to_pxt_type(np_dtype: np.dtype, data_col: pd.Series, nullable: bo
|
|
|
140
153
|
"""
|
|
141
154
|
if np.issubdtype(np_dtype, np.integer):
|
|
142
155
|
return pxt.IntType(nullable=nullable)
|
|
156
|
+
|
|
143
157
|
if np.issubdtype(np_dtype, np.floating):
|
|
144
158
|
return pxt.FloatType(nullable=nullable)
|
|
159
|
+
|
|
145
160
|
if np.issubdtype(np_dtype, np.bool_):
|
|
146
161
|
return pxt.BoolType(nullable=nullable)
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if has_nan and not nullable:
|
|
150
|
-
raise excs.Error(f'Primary key column `{data_col.name}` cannot contain null values.')
|
|
162
|
+
|
|
163
|
+
if np.issubdtype(np_dtype, np.character):
|
|
151
164
|
return pxt.StringType(nullable=nullable)
|
|
165
|
+
|
|
152
166
|
if np.issubdtype(np_dtype, np.datetime64):
|
|
153
|
-
has_nat = any(pd.isnull(val) for val in data_col)
|
|
154
|
-
if has_nat and not nullable:
|
|
155
|
-
raise excs.Error(f'Primary key column `{data_col.name}` cannot contain null values.')
|
|
156
167
|
return pxt.TimestampType(nullable=nullable)
|
|
157
|
-
|
|
168
|
+
|
|
169
|
+
if np_dtype == np.object_:
|
|
170
|
+
# The `object_` dtype can mean all sorts of things; see if we can infer the Pixeltable type
|
|
171
|
+
# based on the actual data in `data_col`.
|
|
172
|
+
# First drop any null values (they don't contribute to type inference).
|
|
173
|
+
data_col = data_col.dropna()
|
|
174
|
+
|
|
175
|
+
if len(data_col) == 0:
|
|
176
|
+
# No non-null values; default to FloatType (the Pandas type of an all-NaN column)
|
|
177
|
+
return pxt.FloatType(nullable=nullable)
|
|
178
|
+
|
|
179
|
+
inferred_type = pxt.ColumnType.infer_common_literal_type(data_col)
|
|
180
|
+
if inferred_type is not None:
|
|
181
|
+
return inferred_type.copy(nullable=nullable)
|
|
182
|
+
|
|
183
|
+
raise excs.Error(f'Could not infer Pixeltable type of column: {data_col.name} (dtype: {np_dtype})')
|
|
158
184
|
|
|
159
185
|
|
|
160
186
|
def __df_row_to_pxt_row(row: tuple[Any, ...], schema: dict[str, pxt.ColumnType]) -> dict[str, Any]:
|
pixeltable/iterators/video.py
CHANGED
|
@@ -1,57 +1,89 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import math
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
|
-
import PIL.Image
|
|
7
6
|
import cv2
|
|
7
|
+
import PIL.Image
|
|
8
8
|
|
|
9
9
|
from pixeltable.exceptions import Error
|
|
10
|
-
from pixeltable.type_system import ColumnType,
|
|
10
|
+
from pixeltable.type_system import ColumnType, FloatType, ImageType, IntType, VideoType
|
|
11
|
+
|
|
11
12
|
from .base import ComponentIterator
|
|
12
13
|
|
|
13
14
|
_logger = logging.getLogger('pixeltable')
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class FrameIterator(ComponentIterator):
|
|
17
|
-
"""
|
|
18
|
+
"""
|
|
19
|
+
Iterator over frames of a video. At most one of `fps` or `num_frames` may be specified. If `fps` is specified,
|
|
20
|
+
then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified, then the
|
|
21
|
+
exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
|
|
22
|
+
frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
|
|
18
23
|
|
|
19
24
|
Args:
|
|
20
|
-
video: URL or
|
|
21
|
-
fps:
|
|
22
|
-
If set to 0.0, then the native framerate of the video will be used (all frames will be
|
|
23
|
-
|
|
25
|
+
video: URL or path of the video to use for frame extraction.
|
|
26
|
+
fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
|
|
27
|
+
If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
|
|
28
|
+
extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
|
|
29
|
+
num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
|
|
30
|
+
`num_frames` is greater than the number of frames in the video, all frames will be extracted.
|
|
24
31
|
"""
|
|
25
|
-
def __init__(self, video: str, *, fps: float =
|
|
32
|
+
def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
|
|
33
|
+
if fps is not None and num_frames is not None:
|
|
34
|
+
raise Error('At most one of `fps` or `num_frames` may be specified')
|
|
35
|
+
|
|
26
36
|
video_path = Path(video)
|
|
27
37
|
assert video_path.exists() and video_path.is_file()
|
|
28
38
|
self.video_path = video_path
|
|
29
|
-
self.fps = fps
|
|
30
39
|
self.video_reader = cv2.VideoCapture(str(video_path))
|
|
40
|
+
self.fps = fps
|
|
41
|
+
self.num_frames = num_frames
|
|
31
42
|
if not self.video_reader.isOpened():
|
|
32
43
|
raise Error(f'Failed to open video: {video}')
|
|
44
|
+
|
|
33
45
|
video_fps = int(self.video_reader.get(cv2.CAP_PROP_FPS))
|
|
34
|
-
if fps > video_fps:
|
|
46
|
+
if fps is not None and fps > video_fps:
|
|
35
47
|
raise Error(f'Video {video}: requested fps ({fps}) exceeds that of the video ({video_fps})')
|
|
36
|
-
self.frame_freq = int(video_fps / fps) if fps > 0 else 1
|
|
37
48
|
num_video_frames = int(self.video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
38
49
|
if num_video_frames == 0:
|
|
39
50
|
raise Error(f'Video {video}: failed to get number of frames')
|
|
40
|
-
# ceil: round up to ensure we count frame 0
|
|
41
|
-
self.num_frames = math.ceil(num_video_frames / self.frame_freq) if fps > 0 else num_video_frames
|
|
42
|
-
_logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps}')
|
|
43
51
|
|
|
52
|
+
if num_frames is not None:
|
|
53
|
+
# specific number of frames
|
|
54
|
+
if num_frames > num_video_frames:
|
|
55
|
+
# Extract all frames
|
|
56
|
+
self.frames_to_extract = range(num_video_frames)
|
|
57
|
+
else:
|
|
58
|
+
spacing = float(num_video_frames) / float(num_frames)
|
|
59
|
+
self.frames_to_extract = list(round(i * spacing) for i in range(num_frames))
|
|
60
|
+
assert len(self.frames_to_extract) == num_frames
|
|
61
|
+
else:
|
|
62
|
+
if fps is None or fps == 0.0:
|
|
63
|
+
# Extract all frames
|
|
64
|
+
self.frames_to_extract = range(num_video_frames)
|
|
65
|
+
else:
|
|
66
|
+
# Extract frames at the implied frequency
|
|
67
|
+
freq = fps / video_fps
|
|
68
|
+
n = math.ceil(num_video_frames * freq) # number of frames to extract
|
|
69
|
+
self.frames_to_extract = list(round(i / freq) for i in range(n))
|
|
70
|
+
|
|
71
|
+
# We need the list of frames as both a list (for set_pos) and a set (for fast lookups when
|
|
72
|
+
# there are lots of frames)
|
|
73
|
+
self.frames_set = set(self.frames_to_extract)
|
|
74
|
+
_logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
|
|
44
75
|
self.next_frame_idx = 0
|
|
45
76
|
|
|
46
77
|
@classmethod
|
|
47
|
-
def input_schema(cls) ->
|
|
78
|
+
def input_schema(cls) -> dict[str, ColumnType]:
|
|
48
79
|
return {
|
|
49
80
|
'video': VideoType(nullable=False),
|
|
50
|
-
'fps': FloatType()
|
|
81
|
+
'fps': FloatType(nullable=True),
|
|
82
|
+
'num_frames': IntType(nullable=True),
|
|
51
83
|
}
|
|
52
84
|
|
|
53
85
|
@classmethod
|
|
54
|
-
def output_schema(cls, *args: Any, **kwargs: Any) ->
|
|
86
|
+
def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ColumnType], list[str]]:
|
|
55
87
|
return {
|
|
56
88
|
'frame_idx': IntType(),
|
|
57
89
|
'pos_msec': FloatType(),
|
|
@@ -59,7 +91,9 @@ class FrameIterator(ComponentIterator):
|
|
|
59
91
|
'frame': ImageType(),
|
|
60
92
|
}, ['frame']
|
|
61
93
|
|
|
62
|
-
def __next__(self) ->
|
|
94
|
+
def __next__(self) -> dict[str, Any]:
|
|
95
|
+
# jumping to the target frame here with video_reader.set() is far slower than just
|
|
96
|
+
# skipping the unwanted frames
|
|
63
97
|
while True:
|
|
64
98
|
pos_msec = self.video_reader.get(cv2.CAP_PROP_POS_MSEC)
|
|
65
99
|
pos_frame = self.video_reader.get(cv2.CAP_PROP_POS_FRAMES)
|
|
@@ -69,7 +103,7 @@ class FrameIterator(ComponentIterator):
|
|
|
69
103
|
self.video_reader.release()
|
|
70
104
|
self.video_reader = None
|
|
71
105
|
raise StopIteration
|
|
72
|
-
if pos_frame
|
|
106
|
+
if pos_frame in self.frames_set:
|
|
73
107
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
74
108
|
result = {
|
|
75
109
|
'frame_idx': self.next_frame_idx,
|
|
@@ -78,8 +112,6 @@ class FrameIterator(ComponentIterator):
|
|
|
78
112
|
'frame': PIL.Image.fromarray(img),
|
|
79
113
|
}
|
|
80
114
|
self.next_frame_idx += 1
|
|
81
|
-
# frame_freq > 1: jumping to the target frame here with video_reader.set() is far slower than just
|
|
82
|
-
# skipping the unwanted frames
|
|
83
115
|
return result
|
|
84
116
|
|
|
85
117
|
def close(self) -> None:
|
|
@@ -92,5 +124,5 @@ class FrameIterator(ComponentIterator):
|
|
|
92
124
|
if pos == self.next_frame_idx:
|
|
93
125
|
return
|
|
94
126
|
_logger.debug(f'seeking to frame {pos}')
|
|
95
|
-
self.video_reader.set(cv2.CAP_PROP_POS_FRAMES,
|
|
127
|
+
self.video_reader.set(cv2.CAP_PROP_POS_FRAMES, self.frames_to_extract[pos])
|
|
96
128
|
self.next_frame_idx = pos
|
pixeltable/store.py
CHANGED
|
@@ -223,35 +223,70 @@ class StoreBase:
|
|
|
223
223
|
"""
|
|
224
224
|
num_excs = 0
|
|
225
225
|
num_rows = 0
|
|
226
|
-
for row_batch in exec_plan:
|
|
227
|
-
num_rows += len(row_batch)
|
|
228
|
-
for result_row in row_batch:
|
|
229
|
-
values_dict: Dict[sql.Column, Any] = {}
|
|
230
|
-
|
|
231
|
-
if col.is_computed:
|
|
232
|
-
if result_row.has_exc(value_expr_slot_idx):
|
|
233
|
-
num_excs += 1
|
|
234
|
-
value_exc = result_row.get_exc(value_expr_slot_idx)
|
|
235
|
-
# we store a NULL value and record the exception/exc type
|
|
236
|
-
error_type = type(value_exc).__name__
|
|
237
|
-
error_msg = str(value_exc)
|
|
238
|
-
values_dict = {
|
|
239
|
-
col.sa_col: None,
|
|
240
|
-
col.sa_errortype_col: error_type,
|
|
241
|
-
col.sa_errormsg_col: error_msg
|
|
242
|
-
}
|
|
243
|
-
else:
|
|
244
|
-
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
245
|
-
if col.col_type.is_media_type():
|
|
246
|
-
val = self._move_tmp_media_file(val, col, result_row.pk[-1])
|
|
247
|
-
values_dict = {col.sa_col: val}
|
|
248
|
-
|
|
249
|
-
update_stmt = sql.update(self.sa_tbl).values(values_dict)
|
|
250
|
-
for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
|
|
251
|
-
update_stmt = update_stmt.where(pk_col == pk_val)
|
|
252
|
-
log_stmt(_logger, update_stmt)
|
|
253
|
-
conn.execute(update_stmt)
|
|
254
226
|
|
|
227
|
+
# create temp table to store output of exec_plan, with the same primary key as the store table
|
|
228
|
+
tmp_name = f'temp_{self._storage_name()}'
|
|
229
|
+
tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
|
|
230
|
+
tmp_cols = tmp_pk_cols.copy()
|
|
231
|
+
tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
|
|
232
|
+
tmp_cols.append(tmp_val_col)
|
|
233
|
+
# add error columns if the store column records errors
|
|
234
|
+
if col.records_errors:
|
|
235
|
+
tmp_errortype_col = sql.Column(col.sa_errortype_col.name, col.sa_errortype_col.type)
|
|
236
|
+
tmp_cols.append(tmp_errortype_col)
|
|
237
|
+
tmp_errormsg_col = sql.Column(col.sa_errormsg_col.name, col.sa_errormsg_col.type)
|
|
238
|
+
tmp_cols.append(tmp_errormsg_col)
|
|
239
|
+
tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
|
|
240
|
+
tmp_tbl.create(bind=conn)
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
# insert rows from exec_plan into temp table
|
|
244
|
+
for row_batch in exec_plan:
|
|
245
|
+
num_rows += len(row_batch)
|
|
246
|
+
tbl_rows: list[dict[str, Any]] = []
|
|
247
|
+
for result_row in row_batch:
|
|
248
|
+
tbl_row: dict[str, Any] = {}
|
|
249
|
+
for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
|
|
250
|
+
tbl_row[pk_col.name] = pk_val
|
|
251
|
+
|
|
252
|
+
if col.is_computed:
|
|
253
|
+
if result_row.has_exc(value_expr_slot_idx):
|
|
254
|
+
num_excs += 1
|
|
255
|
+
value_exc = result_row.get_exc(value_expr_slot_idx)
|
|
256
|
+
# we store a NULL value and record the exception/exc type
|
|
257
|
+
error_type = type(value_exc).__name__
|
|
258
|
+
error_msg = str(value_exc)
|
|
259
|
+
tbl_row[col.sa_col.name] = None
|
|
260
|
+
tbl_row[col.sa_errortype_col.name] = error_type
|
|
261
|
+
tbl_row[col.sa_errormsg_col.name] = error_msg
|
|
262
|
+
else:
|
|
263
|
+
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
264
|
+
if col.col_type.is_media_type():
|
|
265
|
+
val = self._move_tmp_media_file(val, col, result_row.pk[-1])
|
|
266
|
+
tbl_row[col.sa_col.name] = val
|
|
267
|
+
if col.records_errors:
|
|
268
|
+
tbl_row[col.sa_errortype_col.name] = None
|
|
269
|
+
tbl_row[col.sa_errormsg_col.name] = None
|
|
270
|
+
|
|
271
|
+
tbl_rows.append(tbl_row)
|
|
272
|
+
conn.execute(sql.insert(tmp_tbl), tbl_rows)
|
|
273
|
+
|
|
274
|
+
# update store table with values from temp table
|
|
275
|
+
update_stmt = sql.update(self.sa_tbl)
|
|
276
|
+
for pk_col, tmp_pk_col in zip(self.pk_columns(), tmp_pk_cols):
|
|
277
|
+
update_stmt = update_stmt.where(pk_col == tmp_pk_col)
|
|
278
|
+
update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
|
|
279
|
+
if col.records_errors:
|
|
280
|
+
update_stmt = update_stmt.values({
|
|
281
|
+
col.sa_errortype_col: tmp_errortype_col,
|
|
282
|
+
col.sa_errormsg_col: tmp_errormsg_col
|
|
283
|
+
})
|
|
284
|
+
log_explain(_logger, update_stmt, conn)
|
|
285
|
+
conn.execute(update_stmt)
|
|
286
|
+
|
|
287
|
+
finally:
|
|
288
|
+
tmp_tbl.drop(bind=conn)
|
|
289
|
+
self.sa_md.remove(tmp_tbl)
|
|
255
290
|
return num_excs
|
|
256
291
|
|
|
257
292
|
def insert_rows(
|
|
@@ -295,6 +330,8 @@ class StoreBase:
|
|
|
295
330
|
file=sys.stdout
|
|
296
331
|
)
|
|
297
332
|
progress_bar.update(1)
|
|
333
|
+
|
|
334
|
+
# insert batch of rows
|
|
298
335
|
self._move_tmp_media_files(table_rows, media_cols, v_min)
|
|
299
336
|
conn.execute(sql.insert(self.sa_tbl), table_rows)
|
|
300
337
|
if progress_bar is not None:
|
|
@@ -6,7 +6,7 @@ import pathlib
|
|
|
6
6
|
import subprocess
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
import
|
|
9
|
+
import pixeltable_pgserver
|
|
10
10
|
import toml
|
|
11
11
|
|
|
12
12
|
import pixeltable as pxt
|
|
@@ -41,7 +41,7 @@ class Dumper:
|
|
|
41
41
|
md_version = metadata.VERSION
|
|
42
42
|
dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
|
|
43
43
|
_logger.info(f'Creating database dump at: {dump_file}')
|
|
44
|
-
pg_package_dir = os.path.dirname(
|
|
44
|
+
pg_package_dir = os.path.dirname(pixeltable_pgserver.__file__)
|
|
45
45
|
pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
|
|
46
46
|
_logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
|
|
47
47
|
with open(dump_file, 'wb') as dump:
|
|
@@ -177,8 +177,8 @@ class Dumper:
|
|
|
177
177
|
assert t.base_table_image_rot.col in project.stored_proxies
|
|
178
178
|
|
|
179
179
|
def __add_expr_columns(self, t: pxt.Table, col_prefix: str, include_expensive_functions=False) -> None:
|
|
180
|
-
def add_column(col_name: str, col_expr: Any) -> None:
|
|
181
|
-
t.add_column(**{f'{col_prefix}_{col_name}': col_expr})
|
|
180
|
+
def add_column(col_name: str, col_expr: Any, stored: bool = True) -> None:
|
|
181
|
+
t.add_column(**{f'{col_prefix}_{col_name}': col_expr}, stored=stored)
|
|
182
182
|
|
|
183
183
|
# arithmetic_expr
|
|
184
184
|
add_column('plus', t.c2 + 6)
|
|
@@ -217,7 +217,7 @@ class Dumper:
|
|
|
217
217
|
|
|
218
218
|
# image_member_access
|
|
219
219
|
add_column('image_mode', t.c8.mode)
|
|
220
|
-
add_column('image_rot', t.c8.rotate(180))
|
|
220
|
+
add_column('image_rot', t.c8.rotate(180), stored=False)
|
|
221
221
|
|
|
222
222
|
# in_predicate
|
|
223
223
|
add_column('isin_1', t.c1.isin(['test string 1', 'test string 2', 'test string 3']))
|
pixeltable/type_system.py
CHANGED
|
@@ -9,7 +9,7 @@ import urllib.parse
|
|
|
9
9
|
import urllib.request
|
|
10
10
|
from copy import deepcopy
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
|
|
12
|
+
from typing import Any, Iterable, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
|
|
13
13
|
|
|
14
14
|
import PIL.Image
|
|
15
15
|
import av
|
|
@@ -166,24 +166,14 @@ class ColumnType:
|
|
|
166
166
|
return self._type.name.lower()
|
|
167
167
|
|
|
168
168
|
def __eq__(self, other: object) -> bool:
|
|
169
|
-
return self.matches(other) and self.nullable == other.nullable
|
|
169
|
+
return isinstance(other, ColumnType) and self.matches(other) and self.nullable == other.nullable
|
|
170
170
|
|
|
171
|
-
def is_supertype_of(self, other: ColumnType) -> bool:
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
if self.matches(other):
|
|
175
|
-
return True
|
|
176
|
-
return self._is_supertype_of(other)
|
|
177
|
-
|
|
178
|
-
@abc.abstractmethod
|
|
179
|
-
def _is_supertype_of(self, other: ColumnType) -> bool:
|
|
180
|
-
return False
|
|
171
|
+
def is_supertype_of(self, other: ColumnType, ignore_nullable: bool = False) -> bool:
|
|
172
|
+
operand = self.copy(nullable=True) if ignore_nullable else self
|
|
173
|
+
return operand.supertype(other) == operand
|
|
181
174
|
|
|
182
|
-
def matches(self, other:
|
|
175
|
+
def matches(self, other: ColumnType) -> bool:
|
|
183
176
|
"""Two types match if they're equal, aside from nullability"""
|
|
184
|
-
if not isinstance(other, ColumnType):
|
|
185
|
-
pass
|
|
186
|
-
assert isinstance(other, ColumnType), type(other)
|
|
187
177
|
if type(self) != type(other):
|
|
188
178
|
return False
|
|
189
179
|
for member_var in vars(self).keys():
|
|
@@ -193,36 +183,23 @@ class ColumnType:
|
|
|
193
183
|
return False
|
|
194
184
|
return True
|
|
195
185
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return type1
|
|
186
|
+
def supertype(self, other: ColumnType) -> Optional[ColumnType]:
|
|
187
|
+
if self.copy(nullable=True) == other.copy(nullable=True):
|
|
188
|
+
return self.copy(nullable=(self.nullable or other.nullable))
|
|
200
189
|
|
|
201
|
-
if
|
|
202
|
-
return
|
|
203
|
-
if
|
|
204
|
-
return
|
|
190
|
+
if self.is_invalid_type():
|
|
191
|
+
return other
|
|
192
|
+
if other.is_invalid_type():
|
|
193
|
+
return self
|
|
205
194
|
|
|
206
|
-
if
|
|
207
|
-
t =
|
|
195
|
+
if self.is_scalar_type() and other.is_scalar_type():
|
|
196
|
+
t = self.Type.supertype(self._type, other._type, self.common_supertypes)
|
|
208
197
|
if t is not None:
|
|
209
|
-
return
|
|
198
|
+
return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
|
|
210
199
|
return None
|
|
211
200
|
|
|
212
|
-
if type1._type == type2._type:
|
|
213
|
-
return cls._supertype(type1, type2)
|
|
214
|
-
|
|
215
201
|
return None
|
|
216
202
|
|
|
217
|
-
@classmethod
|
|
218
|
-
@abc.abstractmethod
|
|
219
|
-
def _supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
|
|
220
|
-
"""
|
|
221
|
-
Class-specific implementation of determining the supertype. type1 and type2 are from the same subclass of
|
|
222
|
-
ColumnType.
|
|
223
|
-
"""
|
|
224
|
-
pass
|
|
225
|
-
|
|
226
203
|
@classmethod
|
|
227
204
|
def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
|
|
228
205
|
if isinstance(val, str):
|
|
@@ -251,6 +228,26 @@ class ColumnType:
|
|
|
251
228
|
return None
|
|
252
229
|
return None
|
|
253
230
|
|
|
231
|
+
@classmethod
|
|
232
|
+
def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
|
|
233
|
+
"""
|
|
234
|
+
Returns the most specific type that is a supertype of all literals in `vals`. If no such type
|
|
235
|
+
exists, returns None.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
vals: A collection of literals.
|
|
239
|
+
"""
|
|
240
|
+
inferred_type: Optional[ColumnType] = None
|
|
241
|
+
for val in vals:
|
|
242
|
+
val_type = cls.infer_literal_type(val)
|
|
243
|
+
if inferred_type is None:
|
|
244
|
+
inferred_type = val_type
|
|
245
|
+
else:
|
|
246
|
+
inferred_type = inferred_type.supertype(val_type)
|
|
247
|
+
if inferred_type is None:
|
|
248
|
+
return None
|
|
249
|
+
return inferred_type
|
|
250
|
+
|
|
254
251
|
@classmethod
|
|
255
252
|
def from_python_type(cls, t: type) -> Optional[ColumnType]:
|
|
256
253
|
if typing.get_origin(t) is typing.Union:
|
|
@@ -522,6 +519,28 @@ class JsonType(ColumnType):
|
|
|
522
519
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
523
520
|
self.type_spec = type_spec
|
|
524
521
|
|
|
522
|
+
def supertype(self, other: ColumnType) -> Optional[JsonType]:
|
|
523
|
+
if self.type_spec is None:
|
|
524
|
+
# we don't have a type spec and can accept anything accepted by other
|
|
525
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
526
|
+
if other.type_spec is None:
|
|
527
|
+
# we have a type spec but other doesn't
|
|
528
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
529
|
+
|
|
530
|
+
# we both have type specs; the supertype's type spec is the union of the two
|
|
531
|
+
type_spec = deepcopy(self.type_spec)
|
|
532
|
+
for other_field_name, other_field_type in other.type_spec.items():
|
|
533
|
+
if other_field_name not in type_spec:
|
|
534
|
+
type_spec[other_field_name] = other_field_type.copy()
|
|
535
|
+
else:
|
|
536
|
+
# both type specs have this field
|
|
537
|
+
field_type = type_spec[other_field_name].supertype(other_field_type)
|
|
538
|
+
if field_type is None:
|
|
539
|
+
# conflicting types
|
|
540
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
541
|
+
type_spec[other_field_name] = field_type
|
|
542
|
+
return JsonType(type_spec, nullable=(self.nullable or other.nullable))
|
|
543
|
+
|
|
525
544
|
def _as_dict(self) -> Dict:
|
|
526
545
|
result = super()._as_dict()
|
|
527
546
|
if self.type_spec is not None:
|
|
@@ -564,21 +583,22 @@ class JsonType(ColumnType):
|
|
|
564
583
|
|
|
565
584
|
|
|
566
585
|
class ArrayType(ColumnType):
|
|
567
|
-
def __init__(
|
|
568
|
-
self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
586
|
+
def __init__(self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
569
587
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
570
588
|
self.shape = shape
|
|
571
589
|
assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
|
|
572
590
|
self.dtype = dtype._type
|
|
573
591
|
|
|
574
|
-
def
|
|
575
|
-
if
|
|
592
|
+
def supertype(self, other: ColumnType) -> Optional[ArrayType]:
|
|
593
|
+
if not isinstance(other, ArrayType):
|
|
594
|
+
return None
|
|
595
|
+
if len(self.shape) != len(other.shape):
|
|
576
596
|
return None
|
|
577
|
-
base_type =
|
|
597
|
+
base_type = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
|
|
578
598
|
if base_type is None:
|
|
579
599
|
return None
|
|
580
|
-
shape = [n1 if n1 == n2 else None for n1, n2 in zip(
|
|
581
|
-
return ArrayType(tuple(shape), base_type, nullable=(
|
|
600
|
+
shape = [n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape)]
|
|
601
|
+
return ArrayType(tuple(shape), self.make_type(base_type), nullable=(self.nullable or other.nullable))
|
|
582
602
|
|
|
583
603
|
def _as_dict(self) -> Dict:
|
|
584
604
|
result = super()._as_dict()
|
|
@@ -695,13 +715,13 @@ class ImageType(ColumnType):
|
|
|
695
715
|
params_str = ''
|
|
696
716
|
return f'{self._type.name.lower()}{params_str}'
|
|
697
717
|
|
|
698
|
-
def
|
|
699
|
-
if
|
|
700
|
-
return
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
718
|
+
def supertype(self, other: ColumnType) -> Optional[ImageType]:
|
|
719
|
+
if not isinstance(other, ImageType):
|
|
720
|
+
return None
|
|
721
|
+
width = self.width if self.width == other.width else None
|
|
722
|
+
height = self.height if self.height == other.height else None
|
|
723
|
+
mode = self.mode if self.mode == other.mode else None
|
|
724
|
+
return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
|
|
705
725
|
|
|
706
726
|
@property
|
|
707
727
|
def size(self) -> Optional[Tuple[int, int]]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.15
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -21,9 +21,9 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
|
21
21
|
Requires-Dist: numpy (>=1.25)
|
|
22
22
|
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
23
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
-
Requires-Dist: pgserver (==0.1.4)
|
|
25
24
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
25
|
Requires-Dist: pillow (>=9.3.0)
|
|
26
|
+
Requires-Dist: pixeltable-pgserver (==0.2.4)
|
|
27
27
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
28
|
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
29
29
|
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|