pixeltable 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +8 -3
- pixeltable/catalog/globals.py +8 -0
- pixeltable/catalog/table.py +25 -9
- pixeltable/catalog/table_version.py +30 -55
- pixeltable/catalog/view.py +1 -1
- pixeltable/env.py +4 -4
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/row_update_node.py +61 -0
- pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +41 -16
- pixeltable/exprs/expr.py +72 -22
- pixeltable/exprs/function_call.py +64 -29
- pixeltable/exprs/globals.py +5 -1
- pixeltable/exprs/inline_array.py +18 -11
- pixeltable/exprs/method_ref.py +63 -0
- pixeltable/ext/__init__.py +9 -0
- pixeltable/ext/functions/__init__.py +8 -0
- pixeltable/ext/functions/whisperx.py +45 -5
- pixeltable/ext/functions/yolox.py +60 -14
- pixeltable/func/callable_function.py +12 -4
- pixeltable/func/expr_template_function.py +1 -1
- pixeltable/func/function.py +12 -2
- pixeltable/func/function_registry.py +24 -9
- pixeltable/func/udf.py +32 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/fireworks.py +33 -0
- pixeltable/functions/huggingface.py +96 -6
- pixeltable/functions/image.py +226 -41
- pixeltable/functions/json.py +46 -0
- pixeltable/functions/openai.py +214 -0
- pixeltable/functions/string.py +195 -218
- pixeltable/functions/timestamp.py +210 -0
- pixeltable/functions/together.py +106 -0
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/{eval.py → vision.py} +170 -27
- pixeltable/functions/whisper.py +32 -0
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +2 -2
- pixeltable/io/globals.py +133 -1
- pixeltable/io/pandas.py +82 -31
- pixeltable/iterators/video.py +55 -23
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +39 -0
- pixeltable/metadata/notes.py +10 -0
- pixeltable/plan.py +76 -1
- pixeltable/store.py +65 -28
- pixeltable/tool/create_test_db_dump.py +8 -9
- pixeltable/tool/doc_plugins/griffe.py +4 -0
- pixeltable/type_system.py +84 -63
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/METADATA +2 -2
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/RECORD +57 -51
- pixeltable/exprs/image_member_access.py +0 -96
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.15.dist-info}/entry_points.txt +0 -0
pixeltable/store.py
CHANGED
|
@@ -223,35 +223,70 @@ class StoreBase:
|
|
|
223
223
|
"""
|
|
224
224
|
num_excs = 0
|
|
225
225
|
num_rows = 0
|
|
226
|
-
for row_batch in exec_plan:
|
|
227
|
-
num_rows += len(row_batch)
|
|
228
|
-
for result_row in row_batch:
|
|
229
|
-
values_dict: Dict[sql.Column, Any] = {}
|
|
230
|
-
|
|
231
|
-
if col.is_computed:
|
|
232
|
-
if result_row.has_exc(value_expr_slot_idx):
|
|
233
|
-
num_excs += 1
|
|
234
|
-
value_exc = result_row.get_exc(value_expr_slot_idx)
|
|
235
|
-
# we store a NULL value and record the exception/exc type
|
|
236
|
-
error_type = type(value_exc).__name__
|
|
237
|
-
error_msg = str(value_exc)
|
|
238
|
-
values_dict = {
|
|
239
|
-
col.sa_col: None,
|
|
240
|
-
col.sa_errortype_col: error_type,
|
|
241
|
-
col.sa_errormsg_col: error_msg
|
|
242
|
-
}
|
|
243
|
-
else:
|
|
244
|
-
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
245
|
-
if col.col_type.is_media_type():
|
|
246
|
-
val = self._move_tmp_media_file(val, col, result_row.pk[-1])
|
|
247
|
-
values_dict = {col.sa_col: val}
|
|
248
|
-
|
|
249
|
-
update_stmt = sql.update(self.sa_tbl).values(values_dict)
|
|
250
|
-
for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
|
|
251
|
-
update_stmt = update_stmt.where(pk_col == pk_val)
|
|
252
|
-
log_stmt(_logger, update_stmt)
|
|
253
|
-
conn.execute(update_stmt)
|
|
254
226
|
|
|
227
|
+
# create temp table to store output of exec_plan, with the same primary key as the store table
|
|
228
|
+
tmp_name = f'temp_{self._storage_name()}'
|
|
229
|
+
tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
|
|
230
|
+
tmp_cols = tmp_pk_cols.copy()
|
|
231
|
+
tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
|
|
232
|
+
tmp_cols.append(tmp_val_col)
|
|
233
|
+
# add error columns if the store column records errors
|
|
234
|
+
if col.records_errors:
|
|
235
|
+
tmp_errortype_col = sql.Column(col.sa_errortype_col.name, col.sa_errortype_col.type)
|
|
236
|
+
tmp_cols.append(tmp_errortype_col)
|
|
237
|
+
tmp_errormsg_col = sql.Column(col.sa_errormsg_col.name, col.sa_errormsg_col.type)
|
|
238
|
+
tmp_cols.append(tmp_errormsg_col)
|
|
239
|
+
tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
|
|
240
|
+
tmp_tbl.create(bind=conn)
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
# insert rows from exec_plan into temp table
|
|
244
|
+
for row_batch in exec_plan:
|
|
245
|
+
num_rows += len(row_batch)
|
|
246
|
+
tbl_rows: list[dict[str, Any]] = []
|
|
247
|
+
for result_row in row_batch:
|
|
248
|
+
tbl_row: dict[str, Any] = {}
|
|
249
|
+
for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
|
|
250
|
+
tbl_row[pk_col.name] = pk_val
|
|
251
|
+
|
|
252
|
+
if col.is_computed:
|
|
253
|
+
if result_row.has_exc(value_expr_slot_idx):
|
|
254
|
+
num_excs += 1
|
|
255
|
+
value_exc = result_row.get_exc(value_expr_slot_idx)
|
|
256
|
+
# we store a NULL value and record the exception/exc type
|
|
257
|
+
error_type = type(value_exc).__name__
|
|
258
|
+
error_msg = str(value_exc)
|
|
259
|
+
tbl_row[col.sa_col.name] = None
|
|
260
|
+
tbl_row[col.sa_errortype_col.name] = error_type
|
|
261
|
+
tbl_row[col.sa_errormsg_col.name] = error_msg
|
|
262
|
+
else:
|
|
263
|
+
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
264
|
+
if col.col_type.is_media_type():
|
|
265
|
+
val = self._move_tmp_media_file(val, col, result_row.pk[-1])
|
|
266
|
+
tbl_row[col.sa_col.name] = val
|
|
267
|
+
if col.records_errors:
|
|
268
|
+
tbl_row[col.sa_errortype_col.name] = None
|
|
269
|
+
tbl_row[col.sa_errormsg_col.name] = None
|
|
270
|
+
|
|
271
|
+
tbl_rows.append(tbl_row)
|
|
272
|
+
conn.execute(sql.insert(tmp_tbl), tbl_rows)
|
|
273
|
+
|
|
274
|
+
# update store table with values from temp table
|
|
275
|
+
update_stmt = sql.update(self.sa_tbl)
|
|
276
|
+
for pk_col, tmp_pk_col in zip(self.pk_columns(), tmp_pk_cols):
|
|
277
|
+
update_stmt = update_stmt.where(pk_col == tmp_pk_col)
|
|
278
|
+
update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
|
|
279
|
+
if col.records_errors:
|
|
280
|
+
update_stmt = update_stmt.values({
|
|
281
|
+
col.sa_errortype_col: tmp_errortype_col,
|
|
282
|
+
col.sa_errormsg_col: tmp_errormsg_col
|
|
283
|
+
})
|
|
284
|
+
log_explain(_logger, update_stmt, conn)
|
|
285
|
+
conn.execute(update_stmt)
|
|
286
|
+
|
|
287
|
+
finally:
|
|
288
|
+
tmp_tbl.drop(bind=conn)
|
|
289
|
+
self.sa_md.remove(tmp_tbl)
|
|
255
290
|
return num_excs
|
|
256
291
|
|
|
257
292
|
def insert_rows(
|
|
@@ -295,6 +330,8 @@ class StoreBase:
|
|
|
295
330
|
file=sys.stdout
|
|
296
331
|
)
|
|
297
332
|
progress_bar.update(1)
|
|
333
|
+
|
|
334
|
+
# insert batch of rows
|
|
298
335
|
self._move_tmp_media_files(table_rows, media_cols, v_min)
|
|
299
336
|
conn.execute(sql.insert(self.sa_tbl), table_rows)
|
|
300
337
|
if progress_bar is not None:
|
|
@@ -6,7 +6,7 @@ import pathlib
|
|
|
6
6
|
import subprocess
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
import
|
|
9
|
+
import pixeltable_pgserver
|
|
10
10
|
import toml
|
|
11
11
|
|
|
12
12
|
import pixeltable as pxt
|
|
@@ -41,7 +41,7 @@ class Dumper:
|
|
|
41
41
|
md_version = metadata.VERSION
|
|
42
42
|
dump_file = self.output_dir / f'pixeltable-v{md_version:03d}-test.dump.gz'
|
|
43
43
|
_logger.info(f'Creating database dump at: {dump_file}')
|
|
44
|
-
pg_package_dir = os.path.dirname(
|
|
44
|
+
pg_package_dir = os.path.dirname(pixeltable_pgserver.__file__)
|
|
45
45
|
pg_dump_binary = f'{pg_package_dir}/pginstall/bin/pg_dump'
|
|
46
46
|
_logger.info(f'Using pg_dump binary at: {pg_dump_binary}')
|
|
47
47
|
with open(dump_file, 'wb') as dump:
|
|
@@ -61,7 +61,7 @@ class Dumper:
|
|
|
61
61
|
info_dict = {'pixeltable-dump': {
|
|
62
62
|
'metadata-version': md_version,
|
|
63
63
|
'git-sha': git_sha,
|
|
64
|
-
'datetime': datetime.datetime.
|
|
64
|
+
'datetime': datetime.datetime.now(tz=datetime.timezone.utc),
|
|
65
65
|
'user': user
|
|
66
66
|
}}
|
|
67
67
|
with open(info_file, 'w') as info:
|
|
@@ -177,9 +177,9 @@ class Dumper:
|
|
|
177
177
|
assert t.base_table_image_rot.col in project.stored_proxies
|
|
178
178
|
|
|
179
179
|
def __add_expr_columns(self, t: pxt.Table, col_prefix: str, include_expensive_functions=False) -> None:
|
|
180
|
-
def add_column(col_name: str, col_expr: Any) -> None:
|
|
181
|
-
t.add_column(**{f'{col_prefix}_{col_name}': col_expr})
|
|
182
|
-
|
|
180
|
+
def add_column(col_name: str, col_expr: Any, stored: bool = True) -> None:
|
|
181
|
+
t.add_column(**{f'{col_prefix}_{col_name}': col_expr}, stored=stored)
|
|
182
|
+
|
|
183
183
|
# arithmetic_expr
|
|
184
184
|
add_column('plus', t.c2 + 6)
|
|
185
185
|
add_column('minus', t.c2 - 5)
|
|
@@ -217,7 +217,7 @@ class Dumper:
|
|
|
217
217
|
|
|
218
218
|
# image_member_access
|
|
219
219
|
add_column('image_mode', t.c8.mode)
|
|
220
|
-
add_column('image_rot', t.c8.rotate(180))
|
|
220
|
+
add_column('image_rot', t.c8.rotate(180), stored=False)
|
|
221
221
|
|
|
222
222
|
# in_predicate
|
|
223
223
|
add_column('isin_1', t.c1.isin(['test string 1', 'test string 2', 'test string 3']))
|
|
@@ -242,8 +242,7 @@ class Dumper:
|
|
|
242
242
|
add_column('str_const', 'str')
|
|
243
243
|
add_column('int_const', 5)
|
|
244
244
|
add_column('float_const', 5.0)
|
|
245
|
-
add_column('timestamp_const_1', datetime.datetime.
|
|
246
|
-
add_column('timestamp_const_2', datetime.date.today())
|
|
245
|
+
add_column('timestamp_const_1', datetime.datetime.now(tz=datetime.timezone.utc))
|
|
247
246
|
|
|
248
247
|
# type_cast
|
|
249
248
|
add_column('astype', t.c2.astype(FloatType()))
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
from typing import Optional, Union
|
|
3
|
+
import warnings
|
|
3
4
|
|
|
4
5
|
import griffe
|
|
5
6
|
import griffe.expressions
|
|
@@ -34,6 +35,7 @@ class PxtGriffeExtension(Extension):
|
|
|
34
35
|
"""
|
|
35
36
|
func.extra['mkdocstrings']['template'] = 'udf.html.jinja'
|
|
36
37
|
# Dynamically load the UDF reference so we can inspect the Pixeltable signature directly
|
|
38
|
+
warnings.simplefilter("ignore")
|
|
37
39
|
udf = griffe.dynamic_import(func.path)
|
|
38
40
|
assert isinstance(udf, pxt.Function)
|
|
39
41
|
# Convert the return type to a Pixeltable type reference
|
|
@@ -60,6 +62,8 @@ class PxtGriffeExtension(Extension):
|
|
|
60
62
|
base = 'float'
|
|
61
63
|
elif column_type.is_bool_type():
|
|
62
64
|
base = 'bool'
|
|
65
|
+
elif column_type.is_timestamp_type():
|
|
66
|
+
base = 'datetime'
|
|
63
67
|
elif column_type.is_array_type():
|
|
64
68
|
base = 'ArrayT'
|
|
65
69
|
elif column_type.is_json_type():
|
pixeltable/type_system.py
CHANGED
|
@@ -9,7 +9,7 @@ import urllib.parse
|
|
|
9
9
|
import urllib.request
|
|
10
10
|
from copy import deepcopy
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
|
|
12
|
+
from typing import Any, Iterable, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
|
|
13
13
|
|
|
14
14
|
import PIL.Image
|
|
15
15
|
import av
|
|
@@ -166,24 +166,14 @@ class ColumnType:
|
|
|
166
166
|
return self._type.name.lower()
|
|
167
167
|
|
|
168
168
|
def __eq__(self, other: object) -> bool:
|
|
169
|
-
return self.matches(other) and self.nullable == other.nullable
|
|
169
|
+
return isinstance(other, ColumnType) and self.matches(other) and self.nullable == other.nullable
|
|
170
170
|
|
|
171
|
-
def is_supertype_of(self, other: ColumnType) -> bool:
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
if self.matches(other):
|
|
175
|
-
return True
|
|
176
|
-
return self._is_supertype_of(other)
|
|
177
|
-
|
|
178
|
-
@abc.abstractmethod
|
|
179
|
-
def _is_supertype_of(self, other: ColumnType) -> bool:
|
|
180
|
-
return False
|
|
171
|
+
def is_supertype_of(self, other: ColumnType, ignore_nullable: bool = False) -> bool:
|
|
172
|
+
operand = self.copy(nullable=True) if ignore_nullable else self
|
|
173
|
+
return operand.supertype(other) == operand
|
|
181
174
|
|
|
182
|
-
def matches(self, other:
|
|
175
|
+
def matches(self, other: ColumnType) -> bool:
|
|
183
176
|
"""Two types match if they're equal, aside from nullability"""
|
|
184
|
-
if not isinstance(other, ColumnType):
|
|
185
|
-
pass
|
|
186
|
-
assert isinstance(other, ColumnType)
|
|
187
177
|
if type(self) != type(other):
|
|
188
178
|
return False
|
|
189
179
|
for member_var in vars(self).keys():
|
|
@@ -193,56 +183,44 @@ class ColumnType:
|
|
|
193
183
|
return False
|
|
194
184
|
return True
|
|
195
185
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return type1
|
|
186
|
+
def supertype(self, other: ColumnType) -> Optional[ColumnType]:
|
|
187
|
+
if self.copy(nullable=True) == other.copy(nullable=True):
|
|
188
|
+
return self.copy(nullable=(self.nullable or other.nullable))
|
|
200
189
|
|
|
201
|
-
if
|
|
202
|
-
return
|
|
203
|
-
if
|
|
204
|
-
return
|
|
190
|
+
if self.is_invalid_type():
|
|
191
|
+
return other
|
|
192
|
+
if other.is_invalid_type():
|
|
193
|
+
return self
|
|
205
194
|
|
|
206
|
-
if
|
|
207
|
-
t =
|
|
195
|
+
if self.is_scalar_type() and other.is_scalar_type():
|
|
196
|
+
t = self.Type.supertype(self._type, other._type, self.common_supertypes)
|
|
208
197
|
if t is not None:
|
|
209
|
-
return
|
|
198
|
+
return self.make_type(t).copy(nullable=(self.nullable or other.nullable))
|
|
210
199
|
return None
|
|
211
200
|
|
|
212
|
-
if type1._type == type2._type:
|
|
213
|
-
return cls._supertype(type1, type2)
|
|
214
|
-
|
|
215
201
|
return None
|
|
216
202
|
|
|
217
|
-
@classmethod
|
|
218
|
-
@abc.abstractmethod
|
|
219
|
-
def _supertype(cls, type1: ColumnType, type2: ColumnType) -> Optional[ColumnType]:
|
|
220
|
-
"""
|
|
221
|
-
Class-specific implementation of determining the supertype. type1 and type2 are from the same subclass of
|
|
222
|
-
ColumnType.
|
|
223
|
-
"""
|
|
224
|
-
pass
|
|
225
|
-
|
|
226
203
|
@classmethod
|
|
227
204
|
def infer_literal_type(cls, val: Any) -> Optional[ColumnType]:
|
|
228
205
|
if isinstance(val, str):
|
|
229
206
|
return StringType()
|
|
207
|
+
if isinstance(val, bool):
|
|
208
|
+
# We have to check bool before int, because isinstance(b, int) is True if b is a Python bool
|
|
209
|
+
return BoolType()
|
|
230
210
|
if isinstance(val, int):
|
|
231
211
|
return IntType()
|
|
232
212
|
if isinstance(val, float):
|
|
233
213
|
return FloatType()
|
|
234
|
-
if isinstance(val,
|
|
235
|
-
return BoolType()
|
|
236
|
-
if isinstance(val, datetime.datetime) or isinstance(val, datetime.date):
|
|
214
|
+
if isinstance(val, datetime.datetime):
|
|
237
215
|
return TimestampType()
|
|
238
216
|
if isinstance(val, PIL.Image.Image):
|
|
239
|
-
return ImageType(width=val.width, height=val.height)
|
|
217
|
+
return ImageType(width=val.width, height=val.height, mode=val.mode)
|
|
240
218
|
if isinstance(val, np.ndarray):
|
|
241
219
|
col_type = ArrayType.from_literal(val)
|
|
242
220
|
if col_type is not None:
|
|
243
221
|
return col_type
|
|
244
222
|
# this could still be json-serializable
|
|
245
|
-
if isinstance(val, dict) or isinstance(val, np.ndarray):
|
|
223
|
+
if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray):
|
|
246
224
|
try:
|
|
247
225
|
JsonType().validate_literal(val)
|
|
248
226
|
return JsonType()
|
|
@@ -250,6 +228,26 @@ class ColumnType:
|
|
|
250
228
|
return None
|
|
251
229
|
return None
|
|
252
230
|
|
|
231
|
+
@classmethod
|
|
232
|
+
def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
|
|
233
|
+
"""
|
|
234
|
+
Returns the most specific type that is a supertype of all literals in `vals`. If no such type
|
|
235
|
+
exists, returns None.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
vals: A collection of literals.
|
|
239
|
+
"""
|
|
240
|
+
inferred_type: Optional[ColumnType] = None
|
|
241
|
+
for val in vals:
|
|
242
|
+
val_type = cls.infer_literal_type(val)
|
|
243
|
+
if inferred_type is None:
|
|
244
|
+
inferred_type = val_type
|
|
245
|
+
else:
|
|
246
|
+
inferred_type = inferred_type.supertype(val_type)
|
|
247
|
+
if inferred_type is None:
|
|
248
|
+
return None
|
|
249
|
+
return inferred_type
|
|
250
|
+
|
|
253
251
|
@classmethod
|
|
254
252
|
def from_python_type(cls, t: type) -> Optional[ColumnType]:
|
|
255
253
|
if typing.get_origin(t) is typing.Union:
|
|
@@ -276,7 +274,7 @@ class ColumnType:
|
|
|
276
274
|
return FloatType()
|
|
277
275
|
if base is bool:
|
|
278
276
|
return BoolType()
|
|
279
|
-
if base is datetime.
|
|
277
|
+
if base is datetime.datetime:
|
|
280
278
|
return TimestampType()
|
|
281
279
|
if issubclass(base, Sequence) or issubclass(base, Mapping):
|
|
282
280
|
return JsonType()
|
|
@@ -425,7 +423,7 @@ class StringType(ColumnType):
|
|
|
425
423
|
def conversion_fn(self, target: ColumnType) -> Optional[Callable[[Any], Any]]:
|
|
426
424
|
if not target.is_timestamp_type():
|
|
427
425
|
return None
|
|
428
|
-
def convert(val: str) -> Optional[datetime]:
|
|
426
|
+
def convert(val: str) -> Optional[datetime.datetime]:
|
|
429
427
|
try:
|
|
430
428
|
dt = datetime.datetime.fromisoformat(val)
|
|
431
429
|
return dt
|
|
@@ -506,8 +504,8 @@ class TimestampType(ColumnType):
|
|
|
506
504
|
return sql.TIMESTAMP()
|
|
507
505
|
|
|
508
506
|
def _validate_literal(self, val: Any) -> None:
|
|
509
|
-
if not isinstance(val, datetime.datetime)
|
|
510
|
-
raise TypeError(f'Expected datetime.datetime
|
|
507
|
+
if not isinstance(val, datetime.datetime):
|
|
508
|
+
raise TypeError(f'Expected datetime.datetime, got {val.__class__.__name__}')
|
|
511
509
|
|
|
512
510
|
def _create_literal(self, val: Any) -> Any:
|
|
513
511
|
if isinstance(val, str):
|
|
@@ -521,6 +519,28 @@ class JsonType(ColumnType):
|
|
|
521
519
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
522
520
|
self.type_spec = type_spec
|
|
523
521
|
|
|
522
|
+
def supertype(self, other: ColumnType) -> Optional[JsonType]:
|
|
523
|
+
if self.type_spec is None:
|
|
524
|
+
# we don't have a type spec and can accept anything accepted by other
|
|
525
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
526
|
+
if other.type_spec is None:
|
|
527
|
+
# we have a type spec but other doesn't
|
|
528
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
529
|
+
|
|
530
|
+
# we both have type specs; the supertype's type spec is the union of the two
|
|
531
|
+
type_spec = deepcopy(self.type_spec)
|
|
532
|
+
for other_field_name, other_field_type in other.type_spec.items():
|
|
533
|
+
if other_field_name not in type_spec:
|
|
534
|
+
type_spec[other_field_name] = other_field_type.copy()
|
|
535
|
+
else:
|
|
536
|
+
# both type specs have this field
|
|
537
|
+
field_type = type_spec[other_field_name].supertype(other_field_type)
|
|
538
|
+
if field_type is None:
|
|
539
|
+
# conflicting types
|
|
540
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
541
|
+
type_spec[other_field_name] = field_type
|
|
542
|
+
return JsonType(type_spec, nullable=(self.nullable or other.nullable))
|
|
543
|
+
|
|
524
544
|
def _as_dict(self) -> Dict:
|
|
525
545
|
result = super()._as_dict()
|
|
526
546
|
if self.type_spec is not None:
|
|
@@ -563,21 +583,22 @@ class JsonType(ColumnType):
|
|
|
563
583
|
|
|
564
584
|
|
|
565
585
|
class ArrayType(ColumnType):
|
|
566
|
-
def __init__(
|
|
567
|
-
self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
586
|
+
def __init__(self, shape: Tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
568
587
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
569
588
|
self.shape = shape
|
|
570
589
|
assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
|
|
571
590
|
self.dtype = dtype._type
|
|
572
591
|
|
|
573
|
-
def
|
|
574
|
-
if
|
|
592
|
+
def supertype(self, other: ColumnType) -> Optional[ArrayType]:
|
|
593
|
+
if not isinstance(other, ArrayType):
|
|
594
|
+
return None
|
|
595
|
+
if len(self.shape) != len(other.shape):
|
|
575
596
|
return None
|
|
576
|
-
base_type =
|
|
597
|
+
base_type = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
|
|
577
598
|
if base_type is None:
|
|
578
599
|
return None
|
|
579
|
-
shape = [n1 if n1 == n2 else None for n1, n2 in zip(
|
|
580
|
-
return ArrayType(tuple(shape), base_type)
|
|
600
|
+
shape = [n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape)]
|
|
601
|
+
return ArrayType(tuple(shape), self.make_type(base_type), nullable=(self.nullable or other.nullable))
|
|
581
602
|
|
|
582
603
|
def _as_dict(self) -> Dict:
|
|
583
604
|
result = super()._as_dict()
|
|
@@ -609,7 +630,7 @@ class ArrayType(ColumnType):
|
|
|
609
630
|
dtype = StringType()
|
|
610
631
|
else:
|
|
611
632
|
return None
|
|
612
|
-
return cls(val.shape, dtype=dtype
|
|
633
|
+
return cls(val.shape, dtype=dtype)
|
|
613
634
|
|
|
614
635
|
def is_valid_literal(self, val: np.ndarray) -> bool:
|
|
615
636
|
if not isinstance(val, np.ndarray):
|
|
@@ -694,13 +715,13 @@ class ImageType(ColumnType):
|
|
|
694
715
|
params_str = ''
|
|
695
716
|
return f'{self._type.name.lower()}{params_str}'
|
|
696
717
|
|
|
697
|
-
def
|
|
698
|
-
if
|
|
699
|
-
return
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
718
|
+
def supertype(self, other: ColumnType) -> Optional[ImageType]:
|
|
719
|
+
if not isinstance(other, ImageType):
|
|
720
|
+
return None
|
|
721
|
+
width = self.width if self.width == other.width else None
|
|
722
|
+
height = self.height if self.height == other.height else None
|
|
723
|
+
mode = self.mode if self.mode == other.mode else None
|
|
724
|
+
return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
|
|
704
725
|
|
|
705
726
|
@property
|
|
706
727
|
def size(self) -> Optional[Tuple[int, int]]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.15
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -21,9 +21,9 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
|
21
21
|
Requires-Dist: numpy (>=1.25)
|
|
22
22
|
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
23
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
-
Requires-Dist: pgserver (==0.1.4)
|
|
25
24
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
25
|
Requires-Dist: pillow (>=9.3.0)
|
|
26
|
+
Requires-Dist: pixeltable-pgserver (==0.2.4)
|
|
27
27
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
28
|
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
29
29
|
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|