pixeltable 0.2.14__py3-none-any.whl → 0.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/io/globals.py CHANGED
@@ -191,7 +191,7 @@ def import_rows(
191
191
  if col_name not in schema:
192
192
  schema[col_name] = col_type
193
193
  else:
194
- supertype = pxt.ColumnType.supertype(schema[col_name], col_type)
194
+ supertype = schema[col_name].supertype(col_type)
195
195
  if supertype is None:
196
196
  raise excs.Error(
197
197
  f'Could not infer type of column `{col_name}`; the value in row {n} does not match preceding type {schema[col_name]}: {value!r}\n'
pixeltable/io/pandas.py CHANGED
@@ -1,7 +1,9 @@
1
+ import datetime
1
2
  from typing import Any, Optional, Union
2
3
 
3
4
  import numpy as np
4
5
  import pandas as pd
6
+ import PIL.Image
5
7
 
6
8
  import pixeltable as pxt
7
9
  import pixeltable.exceptions as excs
@@ -103,6 +105,17 @@ def __df_to_pxt_schema(
103
105
  if pd_name in schema_overrides:
104
106
  pxt_type = schema_overrides[pd_name]
105
107
  else:
108
+ # This complicated-looking condition is necessary because we cannot safely call `pd.isna()` on
109
+ # general objects, so we need to check for nulls in the specific cases where we might expect them.
110
+ # isinstance(val, float) will check for NaN values in float columns *as well as* floats appearing
111
+ # in object columns (where Pandas uses NaN as a general null).
112
+ # np.issubdtype(pd_dtype, np.datetime64) checks for NaT values specifically in datetime columns.
113
+ has_na = any(
114
+ (isinstance(val, float) or np.issubdtype(pd_dtype, np.datetime64)) and pd.isna(val)
115
+ for val in df[pd_name]
116
+ )
117
+ if has_na and pd_name in primary_key:
118
+ raise excs.Error(f'Primary key column `{pd_name}` cannot contain null values.')
106
119
  pxt_type = __np_dtype_to_pxt_type(pd_dtype, df[pd_name], pd_name not in primary_key)
107
120
  pxt_name = __normalize_pxt_col_name(pd_name)
108
121
  # Ensure that column names are unique by appending a distinguishing suffix
@@ -140,21 +153,34 @@ def __np_dtype_to_pxt_type(np_dtype: np.dtype, data_col: pd.Series, nullable: bo
140
153
  """
141
154
  if np.issubdtype(np_dtype, np.integer):
142
155
  return pxt.IntType(nullable=nullable)
156
+
143
157
  if np.issubdtype(np_dtype, np.floating):
144
158
  return pxt.FloatType(nullable=nullable)
159
+
145
160
  if np.issubdtype(np_dtype, np.bool_):
146
161
  return pxt.BoolType(nullable=nullable)
147
- if np_dtype == np.object_ or np.issubdtype(np_dtype, np.character):
148
- has_nan = any(isinstance(val, float) and np.isnan(val) for val in data_col)
149
- if has_nan and not nullable:
150
- raise excs.Error(f'Primary key column `{data_col.name}` cannot contain null values.')
162
+
163
+ if np.issubdtype(np_dtype, np.character):
151
164
  return pxt.StringType(nullable=nullable)
165
+
152
166
  if np.issubdtype(np_dtype, np.datetime64):
153
- has_nat = any(pd.isnull(val) for val in data_col)
154
- if has_nat and not nullable:
155
- raise excs.Error(f'Primary key column `{data_col.name}` cannot contain null values.')
156
167
  return pxt.TimestampType(nullable=nullable)
157
- raise excs.Error(f'Unsupported dtype: {np_dtype}')
168
+
169
+ if np_dtype == np.object_:
170
+ # The `object_` dtype can mean all sorts of things; see if we can infer the Pixeltable type
171
+ # based on the actual data in `data_col`.
172
+ # First drop any null values (they don't contribute to type inference).
173
+ data_col = data_col.dropna()
174
+
175
+ if len(data_col) == 0:
176
+ # No non-null values; default to FloatType (the Pandas type of an all-NaN column)
177
+ return pxt.FloatType(nullable=nullable)
178
+
179
+ inferred_type = pxt.ColumnType.infer_common_literal_type(data_col)
180
+ if inferred_type is not None:
181
+ return inferred_type.copy(nullable=nullable)
182
+
183
+ raise excs.Error(f'Could not infer Pixeltable type of column: {data_col.name} (dtype: {np_dtype})')
158
184
 
159
185
 
160
186
  def __df_row_to_pxt_row(row: tuple[Any, ...], schema: dict[str, pxt.ColumnType]) -> dict[str, Any]:
@@ -1,57 +1,89 @@
1
1
  import logging
2
2
  import math
3
3
  from pathlib import Path
4
- from typing import Dict, Any, List, Tuple
4
+ from typing import Any, Optional
5
5
 
6
- import PIL.Image
7
6
  import cv2
7
+ import PIL.Image
8
8
 
9
9
  from pixeltable.exceptions import Error
10
- from pixeltable.type_system import ColumnType, VideoType, ImageType, IntType, FloatType
10
+ from pixeltable.type_system import ColumnType, FloatType, ImageType, IntType, VideoType
11
+
11
12
  from .base import ComponentIterator
12
13
 
13
14
  _logger = logging.getLogger('pixeltable')
14
15
 
15
16
 
16
17
  class FrameIterator(ComponentIterator):
17
- """Iterator over frames of a video.
18
+ """
19
+ Iterator over frames of a video. At most one of `fps` or `num_frames` may be specified. If `fps` is specified,
20
+ then frames will be extracted at the specified rate (frames per second). If `num_frames` is specified, then the
21
+ exact number of frames will be extracted. If neither is specified, then all frames will be extracted. The first
22
+ frame of the video will always be extracted, and the remaining frames will be spaced as evenly as possible.
18
23
 
19
24
  Args:
20
- video: URL or file of the video to use for frame extraction
21
- fps: number of frames to extract per second of video. This may be a fractional value, such as 0.5.
22
- If set to 0.0, then the native framerate of the video will be used (all frames will be extracted).
23
- Default: 0.0
25
+ video: URL or path of the video to use for frame extraction.
26
+ fps: Number of frames to extract per second of video. This may be a fractional value, such as 0.5.
27
+ If omitted or set to 0.0, then the native framerate of the video will be used (all frames will be
28
+ extracted). If `fps` is greater than the frame rate of the video, an error will be raised.
29
+ num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
30
+ `num_frames` is greater than the number of frames in the video, all frames will be extracted.
24
31
  """
25
- def __init__(self, video: str, *, fps: float = 0.0):
32
+ def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
33
+ if fps is not None and num_frames is not None:
34
+ raise Error('At most one of `fps` or `num_frames` may be specified')
35
+
26
36
  video_path = Path(video)
27
37
  assert video_path.exists() and video_path.is_file()
28
38
  self.video_path = video_path
29
- self.fps = fps
30
39
  self.video_reader = cv2.VideoCapture(str(video_path))
40
+ self.fps = fps
41
+ self.num_frames = num_frames
31
42
  if not self.video_reader.isOpened():
32
43
  raise Error(f'Failed to open video: {video}')
44
+
33
45
  video_fps = int(self.video_reader.get(cv2.CAP_PROP_FPS))
34
- if fps > video_fps:
46
+ if fps is not None and fps > video_fps:
35
47
  raise Error(f'Video {video}: requested fps ({fps}) exceeds that of the video ({video_fps})')
36
- self.frame_freq = int(video_fps / fps) if fps > 0 else 1
37
48
  num_video_frames = int(self.video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
38
49
  if num_video_frames == 0:
39
50
  raise Error(f'Video {video}: failed to get number of frames')
40
- # ceil: round up to ensure we count frame 0
41
- self.num_frames = math.ceil(num_video_frames / self.frame_freq) if fps > 0 else num_video_frames
42
- _logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps}')
43
51
 
52
+ if num_frames is not None:
53
+ # specific number of frames
54
+ if num_frames > num_video_frames:
55
+ # Extract all frames
56
+ self.frames_to_extract = range(num_video_frames)
57
+ else:
58
+ spacing = float(num_video_frames) / float(num_frames)
59
+ self.frames_to_extract = list(round(i * spacing) for i in range(num_frames))
60
+ assert len(self.frames_to_extract) == num_frames
61
+ else:
62
+ if fps is None or fps == 0.0:
63
+ # Extract all frames
64
+ self.frames_to_extract = range(num_video_frames)
65
+ else:
66
+ # Extract frames at the implied frequency
67
+ freq = fps / video_fps
68
+ n = math.ceil(num_video_frames * freq) # number of frames to extract
69
+ self.frames_to_extract = list(round(i / freq) for i in range(n))
70
+
71
+ # We need the list of frames as both a list (for set_pos) and a set (for fast lookups when
72
+ # there are lots of frames)
73
+ self.frames_set = set(self.frames_to_extract)
74
+ _logger.debug(f'FrameIterator: path={self.video_path} fps={self.fps} num_frames={self.num_frames}')
44
75
  self.next_frame_idx = 0
45
76
 
46
77
  @classmethod
47
- def input_schema(cls) -> Dict[str, ColumnType]:
78
+ def input_schema(cls) -> dict[str, ColumnType]:
48
79
  return {
49
80
  'video': VideoType(nullable=False),
50
- 'fps': FloatType()
81
+ 'fps': FloatType(nullable=True),
82
+ 'num_frames': IntType(nullable=True),
51
83
  }
52
84
 
53
85
  @classmethod
54
- def output_schema(cls, *args: Any, **kwargs: Any) -> Tuple[Dict[str, ColumnType], List[str]]:
86
+ def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ColumnType], list[str]]:
55
87
  return {
56
88
  'frame_idx': IntType(),
57
89
  'pos_msec': FloatType(),
@@ -59,7 +91,9 @@ class FrameIterator(ComponentIterator):
59
91
  'frame': ImageType(),
60
92
  }, ['frame']
61
93
 
62
- def __next__(self) -> Dict[str, Any]:
94
+ def __next__(self) -> dict[str, Any]:
95
+ # jumping to the target frame here with video_reader.set() is far slower than just
96
+ # skipping the unwanted frames
63
97
  while True:
64
98
  pos_msec = self.video_reader.get(cv2.CAP_PROP_POS_MSEC)
65
99
  pos_frame = self.video_reader.get(cv2.CAP_PROP_POS_FRAMES)
@@ -69,7 +103,7 @@ class FrameIterator(ComponentIterator):
69
103
  self.video_reader.release()
70
104
  self.video_reader = None
71
105
  raise StopIteration
72
- if pos_frame % self.frame_freq == 0:
106
+ if pos_frame in self.frames_set:
73
107
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
74
108
  result = {
75
109
  'frame_idx': self.next_frame_idx,
@@ -78,8 +112,6 @@ class FrameIterator(ComponentIterator):
78
112
  'frame': PIL.Image.fromarray(img),
79
113
  }
80
114
  self.next_frame_idx += 1
81
- # frame_freq > 1: jumping to the target frame here with video_reader.set() is far slower than just
82
- # skipping the unwanted frames
83
115
  return result
84
116
 
85
117
  def close(self) -> None:
@@ -92,5 +124,5 @@ class FrameIterator(ComponentIterator):
92
124
  if pos == self.next_frame_idx:
93
125
  return
94
126
  _logger.debug(f'seeking to frame {pos}')
95
- self.video_reader.set(cv2.CAP_PROP_POS_FRAMES, pos * self.frame_freq)
127
+ self.video_reader.set(cv2.CAP_PROP_POS_FRAMES, self.frames_to_extract[pos])
96
128
  self.next_frame_idx = pos
pixeltable/store.py CHANGED
@@ -223,35 +223,70 @@ class StoreBase:
223
223
  """
224
224
  num_excs = 0
225
225
  num_rows = 0
226
- for row_batch in exec_plan:
227
- num_rows += len(row_batch)
228
- for result_row in row_batch:
229
- values_dict: Dict[sql.Column, Any] = {}
230
-
231
- if col.is_computed:
232
- if result_row.has_exc(value_expr_slot_idx):
233
- num_excs += 1
234
- value_exc = result_row.get_exc(value_expr_slot_idx)
235
- # we store a NULL value and record the exception/exc type
236
- error_type = type(value_exc).__name__
237
- error_msg = str(value_exc)
238
- values_dict = {
239
- col.sa_col: None,
240
- col.sa_errortype_col: error_type,
241
- col.sa_errormsg_col: error_msg
242
- }
243
- else:
244
- val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
245
- if col.col_type.is_media_type():
246
- val = self._move_tmp_media_file(val, col, result_row.pk[-1])
247
- values_dict = {col.sa_col: val}
248
-
249
- update_stmt = sql.update(self.sa_tbl).values(values_dict)
250
- for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
251
- update_stmt = update_stmt.where(pk_col == pk_val)
252
- log_stmt(_logger, update_stmt)
253
- conn.execute(update_stmt)
254
226
 
227
+ # create temp table to store output of exec_plan, with the same primary key as the store table
228
+ tmp_name = f'temp_{self._storage_name()}'
229
+ tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
230
+ tmp_cols = tmp_pk_cols.copy()
231
+ tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
232
+ tmp_cols.append(tmp_val_col)
233
+ # add error columns if the store column records errors
234
+ if col.records_errors:
235
+ tmp_errortype_col = sql.Column(col.sa_errortype_col.name, col.sa_errortype_col.type)
236
+ tmp_cols.append(tmp_errortype_col)
237
+ tmp_errormsg_col = sql.Column(col.sa_errormsg_col.name, col.sa_errormsg_col.type)
238
+ tmp_cols.append(tmp_errormsg_col)
239
+ tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
240
+ tmp_tbl.create(bind=conn)
241
+
242
+ try:
243
+ # insert rows from exec_plan into temp table
244
+ for row_batch in exec_plan:
245
+ num_rows += len(row_batch)
246
+ tbl_rows: list[dict[str, Any]] = []
247
+ for result_row in row_batch:
248
+ tbl_row: dict[str, Any] = {}
249
+ for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
250
+ tbl_row[pk_col.name] = pk_val
251
+
252
+ if col.is_computed:
253
+ if result_row.has_exc(value_expr_slot_idx):
254
+ num_excs += 1
255
+ value_exc = result_row.get_exc(value_expr_slot_idx)
256
+ # we store a NULL value and record the exception/exc type
257
+ error_type = type(value_exc).__name__
258
+ error_msg = str(value_exc)
259
+ tbl_row[col.sa_col.name] = None
260
+ tbl_row[col.sa_errortype_col.name] = error_type
261
+ tbl_row[col.sa_errormsg_col.name] = error_msg
262
+ else:
263
+ val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
264
+ if col.col_type.is_media_type():
265
+ val = self._move_tmp_media_file(val, col, result_row.pk[-1])
266
+ tbl_row[col.sa_col.name] = val
267
+ if col.records_errors:
268
+ tbl_row[col.sa_errortype_col.name] = None
269
+ tbl_row[col.sa_errormsg_col.name] = None
270
+
271
+ tbl_rows.append(tbl_row)
272
+ conn.execute(sql.insert(tmp_tbl), tbl_rows)
273
+
274
+ # update store table with values from temp table
275
+ update_stmt = sql.update(self.sa_tbl)
276
+ for pk_col, tmp_pk_col in zip(self.pk_columns(), tmp_pk_cols):
277
+ update_stmt = update_stmt.where(pk_col == tmp_pk_col)
278
+ update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
279
+ if col.records_errors:
280
+ update_stmt = update_stmt.values({
281
+ col.sa_errortype_col: tmp_errortype_col,
282
+ col.sa_errormsg_col: tmp_errormsg_col
283
+ })
284
+ log_explain(_logger, update_stmt, conn)
285
+ conn.execute(update_stmt)
286
+
287
+ finally:
288
+ tmp_tbl.drop(bind=conn)
289
+ self.sa_md.remove(tmp_tbl)
255
290
  return num_excs
256
291
 
257
292
  def insert_rows(
@@ -295,6 +330,8 @@ class StoreBase:
295
330
  file=sys.stdout
296
331
  )
297
332
  progress_bar.update(1)
333
+
334
+ # insert batch of rows
298
335
  self._move_tmp_media_files(table_rows, media_cols, v_min)
299
336
  conn.execute(sql.insert(self.sa_tbl), table_rows)
300
337
  if progress_bar is not None:
@@ -6,7 +6,7 @@ import pathlib
6
6
  import subprocess
7
7
  from typing import Any
8
8
 
9
- import pgserver
9
+ import pixeltable_pgserver
10
10
  import toml
11
11
 
12
12
  import pixeltable as pxt
@@ -41,7 +41,7 @@ class Dumper:
41
41
  md_version = metadata.VERSION
42
42
  dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
43
43
  _logger.info(f'Creating database dump at: {dump_file}')
44
- pg_package_dir = os.path.dirname(pgserver.__file__)
44
+ pg_package_dir = os.path.dirname(pixeltable_pgserver.__file__)
45
45
  pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
46
46
  _logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
47
47
  with open(dump_file, 'wb') as dump:
@@ -177,8 +177,8 @@ class Dumper:
177
177
  assert t.base_table_image_rot.col in project.stored_proxies
178
178
 
179
179
  def __add_expr_columns(self, t: pxt.Table, col_prefix: str, include_expensive_functions=False) -> None:
180
- def add_column(col_name: str, col_expr: Any) -> None:
181
- t.add_column(**{f'{col_prefix}_{col_name}': col_expr})
180
+ def add_column(col_name: str, col_expr: Any, stored: bool = True) -> None:
181
+ t.add_column(**{f'{col_prefix}_{col_name}': col_expr}, stored=stored)
182
182
 
183
183
  # arithmetic_expr
184
184
  add_column('plus', t.c2 + 6)
@@ -217,7 +217,7 @@ class Dumper:
217
217
 
218
218
  # image_member_access
219
219
  add_column('image_mode', t.c8.mode)
220
- add_column('image_rot', t.c8.rotate(180))
220
+ add_column('image_rot', t.c8.rotate(180), stored=False)
221
221
 
222
222
  # in_predicate
223
223
  add_column('isin_1', t.c1.isin(['test string 1', 'test string 2', 'test string 3']))
pixeltable/type_system.py CHANGED
@@ -9,7 +9,7 @@ import urllib.parse
9
9
  import urllib.request
10
10
  from copy import deepcopy
11
11
  from pathlib import Path
12
- from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
12
+ from typing import Any, Iterable, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
13
13
 
14
14
  import PIL.Image
15
15
  import av
@@ -166,24 +166,14 @@ class ColumnType:
166
166
  return self._type.name.lower()
167
167
 
168
168
  def __eq__(self, other: object) -> bool:
169
- return self.matches(other) and self.nullable == other.nullable
169
+ return isinstance(other, ColumnType) and self.matches(other) and self.nullable == other.nullable
170
170
 
171
- def is_supertype_of(self, other: ColumnType) -> bool:
172
- if type(self) != type(other):
173
- return False
174
- if self.matches(other):
175
- return True
176
- return self._is_supertype_of(other)
177
-
178
- @abc.abstractmethod
179
- def _is_supertype_of(self, other: ColumnType) -> bool:
180
- return False
171
+ def is_supertype_of(self, other: ColumnType, ignore_nullable: bool = False) -> bool:
172
+ operand = self.copy(nullable=True) if ignore_nullable else self
173
+ return operand.supertype(other) == operand
181
174
 
182
- def matches(self, other: object) -> bool:
175
+ def matches(self, other: ColumnType) -> bool:
183
176
  """Two types match if they're equal, aside from nullability"""
184
- if not isinstance(other, ColumnType):
185
- pass
186
- assert isinstance(other, ColumnType), type(other)
187
177
  if type(self) != type(other):
188
178
  return False
189
179
  for member_var in vars(self).keys():
@@ -193,36 +183,23 @@ class ColumnType:
193
183
  return False
194
184
  return True
195
185
 
196
- @classmethod
197
- def supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
198
- if type1 == type2:
199
- return type1
186
+ def supertype(self, other: ColumnType) -> Optional[ColumnType]:
187
+ if self.copy(nullable=True) == other.copy(nullable=True):
188
+ return self.copy(nullable=(self.nullable or other.nullable))
200
189
 
201
- if type1.is_invalid_type():
202
- return type2
203
- if type2.is_invalid_type():
204
- return type1
190
+ if self.is_invalid_type():
191
+ return other
192
+ if other.is_invalid_type():
193
+ return self
205
194
 
206
- if type1.is_scalar_type() and type2.is_scalar_type():
207
- t = cls.Type.supertype(type1._type, type2._type, cls.common_supertypes)
195
+ if self.is_scalar_type() and other.is_scalar_type():
196
+ t = self.Type.supertype(self._type, other._type, self.common_supertypes)
208
197
  if t is not None:
209
- return cls.make_type(t).copy(nullable=(type1.nullable or type2.nullable))
198
+ return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
210
199
  return None
211
200
 
212
- if type1._type == type2._type:
213
- return cls._supertype(type1, type2)
214
-
215
201
  return None
216
202
 
217
- @classmethod
218
- @abc.abstractmethod
219
- def _supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
220
- """
221
- Class-specific implementation of determining the supertype. type1 and type2 are from the same subclass of
222
- ColumnType.
223
- """
224
- pass
225
-
226
203
  @classmethod
227
204
  def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
228
205
  if isinstance(val, str):
@@ -251,6 +228,26 @@ class ColumnType:
251
228
  return None
252
229
  return None
253
230
 
231
+ @classmethod
232
+ def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
233
+ """
234
+ Returns the most specific type that is a supertype of all literals in `vals`. If no such type
235
+ exists, returns None.
236
+
237
+ Args:
238
+ vals: A collection of literals.
239
+ """
240
+ inferred_type: Optional[ColumnType] = None
241
+ for val in vals:
242
+ val_type = cls.infer_literal_type(val)
243
+ if inferred_type is None:
244
+ inferred_type = val_type
245
+ else:
246
+ inferred_type = inferred_type.supertype(val_type)
247
+ if inferred_type is None:
248
+ return None
249
+ return inferred_type
250
+
254
251
  @classmethod
255
252
  def from_python_type(cls, t: type) -> Optional[ColumnType]:
256
253
  if typing.get_origin(t) is typing.Union:
@@ -522,6 +519,28 @@ class JsonType(ColumnType):
522
519
  super().__init__(self.Type.JSON, nullable=nullable)
523
520
  self.type_spec = type_spec
524
521
 
522
+ def supertype(self, other: ColumnType) -> Optional[JsonType]:
523
+ if self.type_spec is None:
524
+ # we don't have a type spec and can accept anything accepted by other
525
+ return JsonType(nullable=(self.nullable or other.nullable))
526
+ if other.type_spec is None:
527
+ # we have a type spec but other doesn't
528
+ return JsonType(nullable=(self.nullable or other.nullable))
529
+
530
+ # we both have type specs; the supertype's type spec is the union of the two
531
+ type_spec = deepcopy(self.type_spec)
532
+ for other_field_name, other_field_type in other.type_spec.items():
533
+ if other_field_name not in type_spec:
534
+ type_spec[other_field_name] = other_field_type.copy()
535
+ else:
536
+ # both type specs have this field
537
+ field_type = type_spec[other_field_name].supertype(other_field_type)
538
+ if field_type is None:
539
+ # conflicting types
540
+ return JsonType(nullable=(self.nullable or other.nullable))
541
+ type_spec[other_field_name] = field_type
542
+ return JsonType(type_spec, nullable=(self.nullable or other.nullable))
543
+
525
544
  def _as_dict(self) -> Dict:
526
545
  result = super()._as_dict()
527
546
  if self.type_spec is not None:
@@ -564,21 +583,22 @@ class JsonType(ColumnType):
564
583
 
565
584
 
566
585
  class ArrayType(ColumnType):
567
- def __init__(
568
- self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
586
+ def __init__(self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
569
587
  super().__init__(self.Type.ARRAY, nullable=nullable)
570
588
  self.shape = shape
571
589
  assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
572
590
  self.dtype = dtype._type
573
591
 
574
- def _supertype(cls, type1: ArrayType, type2: ArrayType) -> Optional[ArrayType]:
575
- if len(type1.shape) != len(type2.shape):
592
+ def supertype(self, other: ColumnType) -> Optional[ArrayType]:
593
+ if not isinstance(other, ArrayType):
594
+ return None
595
+ if len(self.shape) != len(other.shape):
576
596
  return None
577
- base_type = ColumnType.supertype(type1.dtype, type2.dtype)
597
+ base_type = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
578
598
  if base_type is None:
579
599
  return None
580
- shape = [n1 if n1 == n2 else None for n1, n2 in zip(type1.shape, type2.shape)]
581
- return ArrayType(tuple(shape), base_type, nullable=(type1.nullable or type2.nullable))
600
+ shape = [n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape)]
601
+ return ArrayType(tuple(shape), self.make_type(base_type), nullable=(self.nullable or other.nullable))
582
602
 
583
603
  def _as_dict(self) -> Dict:
584
604
  result = super()._as_dict()
@@ -695,13 +715,13 @@ class ImageType(ColumnType):
695
715
  params_str = ''
696
716
  return f'{self._type.name.lower()}{params_str}'
697
717
 
698
- def _is_supertype_of(self, other: ImageType) -> bool:
699
- if self.mode is not None and self.mode != other.mode:
700
- return False
701
- if self.width is None and self.height is None:
702
- return True
703
- if self.width != other.width and self.height != other.height:
704
- return False
718
+ def supertype(self, other: ColumnType) -> Optional[ImageType]:
719
+ if not isinstance(other, ImageType):
720
+ return None
721
+ width = self.width if self.width == other.width else None
722
+ height = self.height if self.height == other.height else None
723
+ mode = self.mode if self.mode == other.mode else None
724
+ return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
705
725
 
706
726
  @property
707
727
  def size(self) -> Optional[Tuple[int, int]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.14
3
+ Version: 0.2.15
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Pixeltable, Inc.
6
6
  Author-email: contact@pixeltable.com
@@ -21,9 +21,9 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
21
21
  Requires-Dist: numpy (>=1.25)
22
22
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
23
  Requires-Dist: pandas (>=2.0,<3.0)
24
- Requires-Dist: pgserver (==0.1.4)
25
24
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
25
  Requires-Dist: pillow (>=9.3.0)
26
+ Requires-Dist: pixeltable-pgserver (==0.2.4)
27
27
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
28
  Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
29
29
  Requires-Dist: pymupdf (>=1.24.1,<2.0.0)