pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +64 -11
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +50 -27
- pixeltable/catalog/column.py +27 -11
- pixeltable/catalog/dir.py +6 -4
- pixeltable/catalog/globals.py +8 -1
- pixeltable/catalog/insertable_table.py +22 -12
- pixeltable/catalog/named_function.py +10 -6
- pixeltable/catalog/path.py +3 -2
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +121 -101
- pixeltable/catalog/table_version.py +291 -142
- pixeltable/catalog/table_version_path.py +8 -5
- pixeltable/catalog/view.py +67 -26
- pixeltable/dataframe.py +102 -72
- pixeltable/env.py +20 -21
- pixeltable/exec/__init__.py +2 -2
- pixeltable/exec/aggregation_node.py +10 -4
- pixeltable/exec/cache_prefetch_node.py +5 -3
- pixeltable/exec/component_iteration_node.py +9 -8
- pixeltable/exec/data_row_batch.py +21 -10
- pixeltable/exec/exec_context.py +10 -3
- pixeltable/exec/exec_node.py +23 -12
- pixeltable/exec/expr_eval/evaluators.py +13 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
- pixeltable/exec/expr_eval/globals.py +30 -7
- pixeltable/exec/expr_eval/row_buffer.py +5 -6
- pixeltable/exec/expr_eval/schedulers.py +151 -31
- pixeltable/exec/in_memory_data_node.py +8 -7
- pixeltable/exec/row_update_node.py +15 -5
- pixeltable/exec/sql_node.py +56 -27
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +57 -26
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +2 -1
- pixeltable/exprs/column_ref.py +20 -15
- pixeltable/exprs/comparison.py +6 -2
- pixeltable/exprs/compound_predicate.py +1 -3
- pixeltable/exprs/data_row.py +2 -2
- pixeltable/exprs/expr.py +101 -72
- pixeltable/exprs/expr_dict.py +2 -1
- pixeltable/exprs/expr_set.py +3 -1
- pixeltable/exprs/function_call.py +39 -41
- pixeltable/exprs/globals.py +1 -0
- pixeltable/exprs/in_predicate.py +2 -2
- pixeltable/exprs/inline_expr.py +20 -17
- pixeltable/exprs/json_mapper.py +4 -2
- pixeltable/exprs/json_path.py +12 -18
- pixeltable/exprs/literal.py +5 -9
- pixeltable/exprs/method_ref.py +1 -0
- pixeltable/exprs/object_ref.py +1 -1
- pixeltable/exprs/row_builder.py +32 -17
- pixeltable/exprs/rowid_ref.py +14 -5
- pixeltable/exprs/similarity_expr.py +11 -6
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +24 -9
- pixeltable/ext/__init__.py +1 -0
- pixeltable/ext/functions/__init__.py +1 -0
- pixeltable/ext/functions/whisperx.py +2 -2
- pixeltable/ext/functions/yolox.py +11 -11
- pixeltable/func/aggregate_function.py +17 -13
- pixeltable/func/callable_function.py +6 -6
- pixeltable/func/expr_template_function.py +15 -14
- pixeltable/func/function.py +16 -16
- pixeltable/func/function_registry.py +11 -8
- pixeltable/func/globals.py +4 -2
- pixeltable/func/query_template_function.py +12 -13
- pixeltable/func/signature.py +18 -9
- pixeltable/func/tools.py +10 -17
- pixeltable/func/udf.py +106 -11
- pixeltable/functions/__init__.py +21 -2
- pixeltable/functions/anthropic.py +16 -12
- pixeltable/functions/fireworks.py +63 -5
- pixeltable/functions/gemini.py +13 -3
- pixeltable/functions/globals.py +18 -6
- pixeltable/functions/huggingface.py +20 -38
- pixeltable/functions/image.py +7 -3
- pixeltable/functions/json.py +1 -0
- pixeltable/functions/llama_cpp.py +1 -4
- pixeltable/functions/mistralai.py +31 -20
- pixeltable/functions/ollama.py +4 -18
- pixeltable/functions/openai.py +201 -108
- pixeltable/functions/replicate.py +11 -10
- pixeltable/functions/string.py +70 -7
- pixeltable/functions/timestamp.py +21 -8
- pixeltable/functions/together.py +66 -52
- pixeltable/functions/video.py +1 -0
- pixeltable/functions/vision.py +14 -11
- pixeltable/functions/whisper.py +2 -1
- pixeltable/globals.py +60 -26
- pixeltable/index/__init__.py +1 -1
- pixeltable/index/btree.py +5 -3
- pixeltable/index/embedding_index.py +15 -14
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/external_store.py +30 -25
- pixeltable/io/fiftyone.py +6 -14
- pixeltable/io/globals.py +33 -27
- pixeltable/io/hf_datasets.py +2 -1
- pixeltable/io/label_studio.py +77 -68
- pixeltable/io/pandas.py +33 -9
- pixeltable/io/parquet.py +9 -12
- pixeltable/iterators/__init__.py +1 -0
- pixeltable/iterators/audio.py +205 -0
- pixeltable/iterators/document.py +19 -8
- pixeltable/iterators/image.py +6 -24
- pixeltable/iterators/string.py +3 -6
- pixeltable/iterators/video.py +1 -7
- pixeltable/metadata/__init__.py +7 -1
- pixeltable/metadata/converters/convert_10.py +2 -2
- pixeltable/metadata/converters/convert_15.py +1 -5
- pixeltable/metadata/converters/convert_16.py +2 -4
- pixeltable/metadata/converters/convert_17.py +2 -4
- pixeltable/metadata/converters/convert_18.py +2 -4
- pixeltable/metadata/converters/convert_19.py +2 -5
- pixeltable/metadata/converters/convert_20.py +1 -4
- pixeltable/metadata/converters/convert_21.py +4 -6
- pixeltable/metadata/converters/convert_22.py +1 -0
- pixeltable/metadata/converters/convert_23.py +5 -5
- pixeltable/metadata/converters/convert_24.py +12 -13
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/util.py +3 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +13 -2
- pixeltable/plan.py +173 -98
- pixeltable/store.py +42 -26
- pixeltable/type_system.py +62 -54
- pixeltable/utils/arrow.py +1 -2
- pixeltable/utils/coco.py +16 -17
- pixeltable/utils/code.py +1 -1
- pixeltable/utils/console_output.py +6 -3
- pixeltable/utils/description_helper.py +7 -7
- pixeltable/utils/documents.py +3 -1
- pixeltable/utils/filecache.py +12 -7
- pixeltable/utils/http_server.py +9 -8
- pixeltable/utils/media_store.py +2 -1
- pixeltable/utils/pytorch.py +11 -14
- pixeltable/utils/s3.py +1 -0
- pixeltable/utils/sql.py +1 -0
- pixeltable/utils/transactional_directory.py +2 -2
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
- pixeltable-0.3.3.dist-info/RECORD +163 -0
- pixeltable-0.3.2.dist-info/RECORD +0 -161
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/store.py
CHANGED
|
@@ -32,6 +32,7 @@ class StoreBase:
|
|
|
32
32
|
- v_min: version at which the row was created
|
|
33
33
|
- v_max: version at which the row was deleted (or MAX_VERSION if it's still live)
|
|
34
34
|
"""
|
|
35
|
+
|
|
35
36
|
tbl_version: catalog.TableVersion
|
|
36
37
|
sa_md: sql.MetaData
|
|
37
38
|
sa_tbl: Optional[sql.Table]
|
|
@@ -65,8 +66,9 @@ class StoreBase:
|
|
|
65
66
|
"""Create and return system columns"""
|
|
66
67
|
rowid_cols = self._create_rowid_columns()
|
|
67
68
|
self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
|
|
68
|
-
self.v_max_col =
|
|
69
|
-
|
|
69
|
+
self.v_max_col = sql.Column(
|
|
70
|
+
'v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION)
|
|
71
|
+
)
|
|
70
72
|
self._pk_cols = [*rowid_cols, self.v_min_col]
|
|
71
73
|
return [*rowid_cols, self.v_min_col, self.v_max_col]
|
|
72
74
|
|
|
@@ -134,7 +136,7 @@ class StoreBase:
|
|
|
134
136
|
return new_file_url
|
|
135
137
|
|
|
136
138
|
def _move_tmp_media_files(
|
|
137
|
-
|
|
139
|
+
self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
|
|
138
140
|
) -> None:
|
|
139
141
|
"""Move tmp media files that we generated to a permanent location"""
|
|
140
142
|
for c in media_cols:
|
|
@@ -143,7 +145,7 @@ class StoreBase:
|
|
|
143
145
|
table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
|
|
144
146
|
|
|
145
147
|
def _create_table_row(
|
|
146
|
-
|
|
148
|
+
self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
|
|
147
149
|
) -> tuple[dict[str, Any], int]:
|
|
148
150
|
"""Return Tuple[complete table row, # of exceptions] for insert()
|
|
149
151
|
Creates a row that includes the PK columns, with the values from input_row.pk.
|
|
@@ -193,11 +195,13 @@ class StoreBase:
|
|
|
193
195
|
added_storage_cols = [col.store_name()]
|
|
194
196
|
if col.records_errors:
|
|
195
197
|
# we also need to create the errormsg and errortype storage cols
|
|
196
|
-
stmt = sql.text(
|
|
197
|
-
|
|
198
|
+
stmt = sql.text(
|
|
199
|
+
f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL'
|
|
200
|
+
)
|
|
198
201
|
conn.execute(stmt)
|
|
199
|
-
stmt = sql.text(
|
|
200
|
-
|
|
202
|
+
stmt = sql.text(
|
|
203
|
+
f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL'
|
|
204
|
+
)
|
|
201
205
|
conn.execute(stmt)
|
|
202
206
|
added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
|
|
203
207
|
self.create_sa_tbl()
|
|
@@ -219,7 +223,7 @@ class StoreBase:
|
|
|
219
223
|
exec_plan: ExecNode,
|
|
220
224
|
value_expr_slot_idx: int,
|
|
221
225
|
conn: sql.engine.Connection,
|
|
222
|
-
on_error: Literal['abort', 'ignore']
|
|
226
|
+
on_error: Literal['abort', 'ignore'],
|
|
223
227
|
) -> int:
|
|
224
228
|
"""Update store column of a computed column with values produced by an execution plan
|
|
225
229
|
|
|
@@ -295,10 +299,9 @@ class StoreBase:
|
|
|
295
299
|
update_stmt = update_stmt.where(pk_col == tmp_pk_col)
|
|
296
300
|
update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
|
|
297
301
|
if col.records_errors:
|
|
298
|
-
update_stmt = update_stmt.values(
|
|
299
|
-
col.sa_errortype_col: tmp_errortype_col,
|
|
300
|
-
|
|
301
|
-
})
|
|
302
|
+
update_stmt = update_stmt.values(
|
|
303
|
+
{col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
|
|
304
|
+
)
|
|
302
305
|
log_explain(_logger, update_stmt, conn)
|
|
303
306
|
conn.execute(update_stmt)
|
|
304
307
|
|
|
@@ -308,8 +311,13 @@ class StoreBase:
|
|
|
308
311
|
return num_excs
|
|
309
312
|
|
|
310
313
|
def insert_rows(
|
|
311
|
-
|
|
312
|
-
|
|
314
|
+
self,
|
|
315
|
+
exec_plan: ExecNode,
|
|
316
|
+
conn: sql.engine.Connection,
|
|
317
|
+
v_min: Optional[int] = None,
|
|
318
|
+
show_progress: bool = True,
|
|
319
|
+
rowids: Optional[Iterator[int]] = None,
|
|
320
|
+
abort_on_exc: bool = False,
|
|
313
321
|
) -> tuple[int, int, set[int]]:
|
|
314
322
|
"""Insert rows into the store table and update the catalog table's md
|
|
315
323
|
Returns:
|
|
@@ -347,12 +355,12 @@ class StoreBase:
|
|
|
347
355
|
|
|
348
356
|
if show_progress:
|
|
349
357
|
if progress_bar is None:
|
|
350
|
-
warnings.simplefilter(
|
|
358
|
+
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
351
359
|
progress_bar = tqdm(
|
|
352
360
|
desc=f'Inserting rows into `{self.tbl_version.name}`',
|
|
353
361
|
unit=' rows',
|
|
354
362
|
ncols=100,
|
|
355
|
-
file=sys.stdout
|
|
363
|
+
file=sys.stdout,
|
|
356
364
|
)
|
|
357
365
|
progress_bar.update(1)
|
|
358
366
|
|
|
@@ -379,8 +387,13 @@ class StoreBase:
|
|
|
379
387
|
return sql.and_(clause, self.base._versions_clause(versions[1:], match_on_vmin))
|
|
380
388
|
|
|
381
389
|
def delete_rows(
|
|
382
|
-
|
|
383
|
-
|
|
390
|
+
self,
|
|
391
|
+
current_version: int,
|
|
392
|
+
base_versions: list[Optional[int]],
|
|
393
|
+
match_on_vmin: bool,
|
|
394
|
+
where_clause: Optional[sql.ColumnElement[bool]],
|
|
395
|
+
conn: sql.engine.Connection,
|
|
396
|
+
) -> int:
|
|
384
397
|
"""Mark rows as deleted that are live and were created prior to current_version.
|
|
385
398
|
Also: populate the undo columns
|
|
386
399
|
Args:
|
|
@@ -394,12 +407,12 @@ class StoreBase:
|
|
|
394
407
|
"""
|
|
395
408
|
where_clause = sql.true() if where_clause is None else where_clause
|
|
396
409
|
where_clause = sql.and_(
|
|
397
|
-
self.v_min_col < current_version,
|
|
398
|
-
|
|
399
|
-
where_clause)
|
|
410
|
+
self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
|
|
411
|
+
)
|
|
400
412
|
rowid_join_clause = self._rowid_join_predicate()
|
|
401
|
-
base_versions_clause =
|
|
402
|
-
else self.base._versions_clause(base_versions, match_on_vmin)
|
|
413
|
+
base_versions_clause = (
|
|
414
|
+
sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
|
|
415
|
+
)
|
|
403
416
|
set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
|
|
404
417
|
for index_info in self.tbl_version.idxs_by_name.values():
|
|
405
418
|
# copy value column to undo column
|
|
@@ -450,7 +463,9 @@ class StoreView(StoreBase):
|
|
|
450
463
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
451
464
|
return sql.and_(
|
|
452
465
|
self.base._rowid_join_predicate(),
|
|
453
|
-
*[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())]
|
|
466
|
+
*[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())],
|
|
467
|
+
)
|
|
468
|
+
|
|
454
469
|
|
|
455
470
|
class StoreComponentView(StoreView):
|
|
456
471
|
"""A view that stores components of its base, as produced by a ComponentIterator
|
|
@@ -482,4 +497,5 @@ class StoreComponentView(StoreView):
|
|
|
482
497
|
def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
|
|
483
498
|
return sql.and_(
|
|
484
499
|
self.base._rowid_join_predicate(),
|
|
485
|
-
*[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())]
|
|
500
|
+
*[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())],
|
|
501
|
+
)
|
pixeltable/type_system.py
CHANGED
|
@@ -9,17 +9,18 @@ import typing
|
|
|
9
9
|
import urllib.parse
|
|
10
10
|
import urllib.request
|
|
11
11
|
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
12
14
|
from typing import Any, Iterable, Mapping, Optional, Sequence, Union
|
|
13
15
|
|
|
14
|
-
import PIL.Image
|
|
15
16
|
import av # type: ignore
|
|
16
17
|
import jsonschema
|
|
17
18
|
import jsonschema.protocols
|
|
18
19
|
import jsonschema.validators
|
|
19
20
|
import numpy as np
|
|
21
|
+
import PIL.Image
|
|
20
22
|
import pydantic
|
|
21
23
|
import sqlalchemy as sql
|
|
22
|
-
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
23
24
|
from typing_extensions import _AnnotatedAlias
|
|
24
25
|
|
|
25
26
|
import pixeltable.exceptions as excs
|
|
@@ -45,9 +46,11 @@ class ColumnType:
|
|
|
45
46
|
|
|
46
47
|
@classmethod
|
|
47
48
|
def supertype(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
cls,
|
|
50
|
+
type1: 'ColumnType.Type',
|
|
51
|
+
type2: 'ColumnType.Type',
|
|
52
|
+
# we need to pass this in because we can't easily append it as a class member
|
|
53
|
+
common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
|
|
51
54
|
) -> Optional['ColumnType.Type']:
|
|
52
55
|
if type1 == type2:
|
|
53
56
|
return type1
|
|
@@ -59,23 +62,23 @@ class ColumnType:
|
|
|
59
62
|
return t
|
|
60
63
|
return None
|
|
61
64
|
|
|
62
|
-
|
|
63
65
|
@enum.unique
|
|
64
66
|
class DType(enum.Enum):
|
|
65
67
|
"""
|
|
66
68
|
Base type used in images and arrays
|
|
67
69
|
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
70
|
+
|
|
71
|
+
BOOL = (0,)
|
|
72
|
+
INT8 = (1,)
|
|
73
|
+
INT16 = (2,)
|
|
74
|
+
INT32 = (3,)
|
|
75
|
+
INT64 = (4,)
|
|
76
|
+
UINT8 = (5,)
|
|
77
|
+
UINT16 = (6,)
|
|
78
|
+
UINT32 = (7,)
|
|
79
|
+
UINT64 = (8,)
|
|
80
|
+
FLOAT16 = (9,)
|
|
81
|
+
FLOAT32 = (10,)
|
|
79
82
|
FLOAT64 = 11
|
|
80
83
|
|
|
81
84
|
scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
|
|
@@ -113,10 +116,7 @@ class ColumnType:
|
|
|
113
116
|
return json.dumps([t.as_dict() for t in type_list])
|
|
114
117
|
|
|
115
118
|
def as_dict(self) -> dict:
|
|
116
|
-
return {
|
|
117
|
-
'_classname': self.__class__.__name__,
|
|
118
|
-
**self._as_dict(),
|
|
119
|
-
}
|
|
119
|
+
return {'_classname': self.__class__.__name__, **self._as_dict()}
|
|
120
120
|
|
|
121
121
|
def _as_dict(self) -> dict:
|
|
122
122
|
return {'nullable': self.nullable}
|
|
@@ -277,10 +277,7 @@ class ColumnType:
|
|
|
277
277
|
|
|
278
278
|
@classmethod
|
|
279
279
|
def from_python_type(
|
|
280
|
-
cls,
|
|
281
|
-
t: Union[type, _GenericAlias],
|
|
282
|
-
nullable_default: bool = False,
|
|
283
|
-
allow_builtin_types: bool = True
|
|
280
|
+
cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
|
|
284
281
|
) -> Optional[ColumnType]:
|
|
285
282
|
"""
|
|
286
283
|
Convert a Python type into a Pixeltable `ColumnType` instance.
|
|
@@ -309,9 +306,7 @@ class ColumnType:
|
|
|
309
306
|
required_args = typing.get_args(t)
|
|
310
307
|
assert len(required_args) == 1
|
|
311
308
|
return cls.from_python_type(
|
|
312
|
-
required_args[0],
|
|
313
|
-
nullable_default=False,
|
|
314
|
-
allow_builtin_types=allow_builtin_types
|
|
309
|
+
required_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
|
|
315
310
|
)
|
|
316
311
|
elif origin is typing.Annotated:
|
|
317
312
|
annotated_args = typing.get_args(t)
|
|
@@ -349,7 +344,7 @@ class ColumnType:
|
|
|
349
344
|
cls,
|
|
350
345
|
t: Union[ColumnType, type, _AnnotatedAlias],
|
|
351
346
|
nullable_default: bool = False,
|
|
352
|
-
allow_builtin_types: bool = True
|
|
347
|
+
allow_builtin_types: bool = True,
|
|
353
348
|
) -> ColumnType:
|
|
354
349
|
"""
|
|
355
350
|
Convert any type recognizable by Pixeltable to its corresponding ColumnType.
|
|
@@ -415,7 +410,7 @@ class ColumnType:
|
|
|
415
410
|
|
|
416
411
|
def _create_literal(self, val: Any) -> Any:
|
|
417
412
|
"""Create a literal of this type from val, including any needed conversions.
|
|
418
|
-
|
|
413
|
+
val is guaranteed to be non-None"""
|
|
419
414
|
return val
|
|
420
415
|
|
|
421
416
|
def create_literal(self, val: Any) -> Any:
|
|
@@ -484,12 +479,7 @@ class ColumnType:
|
|
|
484
479
|
|
|
485
480
|
def to_json_schema(self) -> dict[str, Any]:
|
|
486
481
|
if self.nullable:
|
|
487
|
-
return {
|
|
488
|
-
'anyOf': [
|
|
489
|
-
self._to_json_schema(),
|
|
490
|
-
{'type': 'null'},
|
|
491
|
-
]
|
|
492
|
-
}
|
|
482
|
+
return {'anyOf': [self._to_json_schema(), {'type': 'null'}]}
|
|
493
483
|
else:
|
|
494
484
|
return self._to_json_schema()
|
|
495
485
|
|
|
@@ -612,7 +602,6 @@ class TimestampType(ColumnType):
|
|
|
612
602
|
|
|
613
603
|
|
|
614
604
|
class JsonType(ColumnType):
|
|
615
|
-
|
|
616
605
|
json_schema: Optional[dict[str, Any]]
|
|
617
606
|
__validator: Optional[jsonschema.protocols.Validator]
|
|
618
607
|
|
|
@@ -699,8 +688,7 @@ class JsonType(ColumnType):
|
|
|
699
688
|
superschema = self.__superschema(self.json_schema, other.json_schema)
|
|
700
689
|
|
|
701
690
|
return JsonType(
|
|
702
|
-
json_schema=(None if len(superschema) == 0 else superschema),
|
|
703
|
-
nullable=(self.nullable or other.nullable)
|
|
691
|
+
json_schema=(None if len(superschema) == 0 else superschema), nullable=(self.nullable or other.nullable)
|
|
704
692
|
)
|
|
705
693
|
|
|
706
694
|
@classmethod
|
|
@@ -755,7 +743,7 @@ class JsonType(ColumnType):
|
|
|
755
743
|
a_type = a.get('type')
|
|
756
744
|
b_type = b.get('type')
|
|
757
745
|
|
|
758
|
-
if
|
|
746
|
+
if a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type:
|
|
759
747
|
# a and b both have the same type designation, but are not identical. This can happen if
|
|
760
748
|
# (for example) they have validators or other attributes that differ. In this case, we
|
|
761
749
|
# generalize to {'type': t}, where t is their shared type, with no other qualifications.
|
|
@@ -793,15 +781,25 @@ class JsonType(ColumnType):
|
|
|
793
781
|
|
|
794
782
|
|
|
795
783
|
class ArrayType(ColumnType):
|
|
796
|
-
|
|
797
784
|
shape: Optional[tuple[Optional[int], ...]]
|
|
798
785
|
pxt_dtype: Optional[ColumnType]
|
|
799
786
|
dtype: Optional[ColumnType.Type]
|
|
800
787
|
|
|
801
|
-
def __init__(
|
|
788
|
+
def __init__(
|
|
789
|
+
self,
|
|
790
|
+
shape: Optional[tuple[Optional[int], ...]] = None,
|
|
791
|
+
dtype: Optional[ColumnType] = None,
|
|
792
|
+
nullable: bool = False,
|
|
793
|
+
):
|
|
802
794
|
super().__init__(self.Type.ARRAY, nullable=nullable)
|
|
803
795
|
assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
|
|
804
|
-
assert
|
|
796
|
+
assert (
|
|
797
|
+
dtype is None
|
|
798
|
+
or dtype.is_int_type()
|
|
799
|
+
or dtype.is_float_type()
|
|
800
|
+
or dtype.is_bool_type()
|
|
801
|
+
or dtype.is_string_type()
|
|
802
|
+
)
|
|
805
803
|
|
|
806
804
|
self.shape = shape
|
|
807
805
|
self.pxt_dtype = dtype # we need this for copy() and __str__()
|
|
@@ -857,13 +855,15 @@ class ArrayType(ColumnType):
|
|
|
857
855
|
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
|
|
858
856
|
# determine our dtype
|
|
859
857
|
assert isinstance(val, np.ndarray)
|
|
858
|
+
dtype: ColumnType
|
|
860
859
|
if np.issubdtype(val.dtype, np.integer):
|
|
861
|
-
dtype
|
|
860
|
+
dtype = IntType()
|
|
862
861
|
elif np.issubdtype(val.dtype, np.floating):
|
|
863
862
|
dtype = FloatType()
|
|
864
863
|
elif val.dtype == np.bool_:
|
|
865
864
|
dtype = BoolType()
|
|
866
|
-
elif val.dtype
|
|
865
|
+
elif np.issubdtype(val.dtype, np.str_):
|
|
866
|
+
# Note that this includes NumPy types like '<U1' -- arrays of single Unicode characters
|
|
867
867
|
dtype = StringType()
|
|
868
868
|
else:
|
|
869
869
|
return None
|
|
@@ -898,10 +898,7 @@ class ArrayType(ColumnType):
|
|
|
898
898
|
return True
|
|
899
899
|
|
|
900
900
|
def _to_json_schema(self) -> dict[str, Any]:
|
|
901
|
-
return {
|
|
902
|
-
'type': 'array',
|
|
903
|
-
'items': self.pxt_dtype._to_json_schema(),
|
|
904
|
-
}
|
|
901
|
+
return {'type': 'array', 'items': self.pxt_dtype._to_json_schema()}
|
|
905
902
|
|
|
906
903
|
def _validate_literal(self, val: Any) -> None:
|
|
907
904
|
if not isinstance(val, np.ndarray):
|
|
@@ -945,15 +942,19 @@ class ArrayType(ColumnType):
|
|
|
945
942
|
|
|
946
943
|
class ImageType(ColumnType):
|
|
947
944
|
def __init__(
|
|
948
|
-
|
|
949
|
-
|
|
945
|
+
self,
|
|
946
|
+
width: Optional[int] = None,
|
|
947
|
+
height: Optional[int] = None,
|
|
948
|
+
size: Optional[tuple[int, int]] = None,
|
|
949
|
+
mode: Optional[str] = None,
|
|
950
|
+
nullable: bool = False,
|
|
950
951
|
):
|
|
951
952
|
"""
|
|
952
953
|
TODO: does it make sense to specify only width or height?
|
|
953
954
|
"""
|
|
954
955
|
super().__init__(self.Type.IMAGE, nullable=nullable)
|
|
955
|
-
assert not(width is not None and size is not None)
|
|
956
|
-
assert not(height is not None and size is not None)
|
|
956
|
+
assert not (width is not None and size is not None)
|
|
957
|
+
assert not (height is not None and size is not None)
|
|
957
958
|
if size is not None:
|
|
958
959
|
self.width = size[0]
|
|
959
960
|
self.height = size[1]
|
|
@@ -1143,6 +1144,7 @@ class DocumentType(ColumnType):
|
|
|
1143
1144
|
def validate_media(self, val: Any) -> None:
|
|
1144
1145
|
assert isinstance(val, str)
|
|
1145
1146
|
from pixeltable.utils.documents import get_document_handle
|
|
1147
|
+
|
|
1146
1148
|
dh = get_document_handle(val)
|
|
1147
1149
|
if dh is None:
|
|
1148
1150
|
raise excs.Error(f'Not a recognized document format: {val}')
|
|
@@ -1156,6 +1158,7 @@ class Required(typing.Generic[T]):
|
|
|
1156
1158
|
Marker class to indicate that a column is non-nullable in a schema definition. This has no meaning as a type hint,
|
|
1157
1159
|
and is intended only for schema declarations.
|
|
1158
1160
|
"""
|
|
1161
|
+
|
|
1159
1162
|
pass
|
|
1160
1163
|
|
|
1161
1164
|
|
|
@@ -1178,6 +1181,7 @@ class _PxtType:
|
|
|
1178
1181
|
`Image[(300, 300), 'RGB']`. The specialized forms resolve to `typing.Annotated` instances whose annotation is a
|
|
1179
1182
|
`ColumnType`.
|
|
1180
1183
|
"""
|
|
1184
|
+
|
|
1181
1185
|
def __init__(self):
|
|
1182
1186
|
raise TypeError(f'Type `{type(self)}` cannot be instantiated.')
|
|
1183
1187
|
|
|
@@ -1256,7 +1260,11 @@ class Image(PIL.Image.Image, _PxtType):
|
|
|
1256
1260
|
mode: Optional[str] = None
|
|
1257
1261
|
for param in params:
|
|
1258
1262
|
if isinstance(param, tuple):
|
|
1259
|
-
if
|
|
1263
|
+
if (
|
|
1264
|
+
len(param) != 2
|
|
1265
|
+
or not isinstance(param[0], (int, type(None)))
|
|
1266
|
+
or not isinstance(param[1], (int, type(None)))
|
|
1267
|
+
):
|
|
1260
1268
|
raise TypeError(f'Invalid Image type parameter: {param}')
|
|
1261
1269
|
if size is not None:
|
|
1262
1270
|
raise TypeError(f'Duplicate Image type parameter: {param}')
|
pixeltable/utils/arrow.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
from typing import Any, Iterator, Optional, Union
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
4
5
|
import pyarrow as pa
|
|
5
|
-
import datetime
|
|
6
6
|
|
|
7
7
|
import pixeltable.type_system as ts
|
|
8
8
|
|
|
9
|
-
|
|
10
9
|
_pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
|
|
11
10
|
pa.string(): ts.StringType(nullable=True),
|
|
12
11
|
pa.bool_(): ts.BoolType(nullable=True),
|
pixeltable/utils/coco.py
CHANGED
|
@@ -22,6 +22,7 @@ Required format:
|
|
|
22
22
|
}
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
+
|
|
25
26
|
def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
26
27
|
"""Verify that input_dict is a valid input dict for write_coco_dataset()"""
|
|
27
28
|
if not isinstance(input_dict, dict):
|
|
@@ -30,7 +31,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
|
30
31
|
raise excs.Error(f'Missing key "image" in input dict: {input_dict}{format_msg}')
|
|
31
32
|
if not isinstance(input_dict['image'], PIL.Image.Image):
|
|
32
33
|
raise excs.Error(f'Value for "image" is not a PIL.Image.Image: {input_dict}{format_msg}')
|
|
33
|
-
if
|
|
34
|
+
if 'annotations' not in input_dict:
|
|
34
35
|
raise excs.Error(f'Missing key "annotations" in input dict: {input_dict}{format_msg}')
|
|
35
36
|
if not isinstance(input_dict['annotations'], list):
|
|
36
37
|
raise excs.Error(f'Value for "annotations" is not a list: {input_dict}{format_msg}')
|
|
@@ -48,6 +49,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
|
|
|
48
49
|
if not isinstance(annotation['category'], (str, int)):
|
|
49
50
|
raise excs.Error(f'Value for "category" is not a str or int: {annotation}{format_msg}')
|
|
50
51
|
|
|
52
|
+
|
|
51
53
|
def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
52
54
|
"""Export a DataFrame result set as a COCO dataset in dest_path and return the path of the data.json file."""
|
|
53
55
|
# TODO: validate schema
|
|
@@ -96,12 +98,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
96
98
|
img_path = images_dir / f'{img_id}.jpg'
|
|
97
99
|
img.save(img_path)
|
|
98
100
|
|
|
99
|
-
images.append({
|
|
100
|
-
'id': img_id,
|
|
101
|
-
'file_name': str(img_path),
|
|
102
|
-
'width': img.width,
|
|
103
|
-
'height': img.height,
|
|
104
|
-
})
|
|
101
|
+
images.append({'id': img_id, 'file_name': str(img_path), 'width': img.width, 'height': img.height})
|
|
105
102
|
|
|
106
103
|
# create annotation records for this image
|
|
107
104
|
for annotation in input_dict['annotations']:
|
|
@@ -109,15 +106,17 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
109
106
|
x, y, w, h = annotation['bbox']
|
|
110
107
|
category = annotation['category']
|
|
111
108
|
categories.add(category)
|
|
112
|
-
annotations.append(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
109
|
+
annotations.append(
|
|
110
|
+
{
|
|
111
|
+
'id': ann_id,
|
|
112
|
+
'image_id': img_id,
|
|
113
|
+
# we use the category name here and fix it up at the end, when we have assigned category ids
|
|
114
|
+
'category_id': category,
|
|
115
|
+
'bbox': annotation['bbox'],
|
|
116
|
+
'area': w * h,
|
|
117
|
+
'iscrowd': 0,
|
|
118
|
+
}
|
|
119
|
+
)
|
|
121
120
|
|
|
122
121
|
# replace category names with ids
|
|
123
122
|
category_ids = {category: id for id, category in enumerate(sorted(list(categories)))}
|
|
@@ -226,5 +225,5 @@ COCO_2017_CATEGORIES = {
|
|
|
226
225
|
87: 'scissors',
|
|
227
226
|
88: 'teddy bear',
|
|
228
227
|
89: 'hair drier',
|
|
229
|
-
90: 'toothbrush'
|
|
228
|
+
90: 'toothbrush',
|
|
230
229
|
}
|
pixeltable/utils/code.py
CHANGED
|
@@ -3,9 +3,9 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
from pixeltable.func import Function
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
# Utilities related to the organization of the Pixeltable codebase.
|
|
8
7
|
|
|
8
|
+
|
|
9
9
|
def local_public_names(mod_name: str, exclude: Optional[list[str]] = None) -> list[str]:
|
|
10
10
|
"""
|
|
11
11
|
Returns a list of all functions and submodules that are local to the specified module and are
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
+
|
|
3
4
|
def map_level(verbosity: int) -> int:
|
|
4
5
|
"""
|
|
5
6
|
Map verbosity level to logging level.
|
|
@@ -19,6 +20,7 @@ def map_level(verbosity: int) -> int:
|
|
|
19
20
|
return logging.DEBUG
|
|
20
21
|
return logging.INFO
|
|
21
22
|
|
|
23
|
+
|
|
22
24
|
class ConsoleOutputHandler(logging.StreamHandler):
|
|
23
25
|
def __init__(self, stream):
|
|
24
26
|
super().__init__(stream)
|
|
@@ -29,13 +31,14 @@ class ConsoleOutputHandler(logging.StreamHandler):
|
|
|
29
31
|
else:
|
|
30
32
|
self.stream.write(record.msg + '\n')
|
|
31
33
|
|
|
34
|
+
|
|
32
35
|
class ConsoleMessageFilter(logging.Filter):
|
|
33
36
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
34
37
|
if hasattr(record, 'user_visible') and record.user_visible:
|
|
35
38
|
return True
|
|
36
39
|
return False
|
|
37
40
|
|
|
38
|
-
class ConsoleLogger(logging.LoggerAdapter):
|
|
39
|
-
def __init__(self, logger:logging.Logger):
|
|
40
|
-
super().__init__(logger, extra={'user_visible' : True})
|
|
41
41
|
|
|
42
|
+
class ConsoleLogger(logging.LoggerAdapter):
|
|
43
|
+
def __init__(self, logger: logging.Logger):
|
|
44
|
+
super().__init__(logger, extra={'user_visible': True})
|
|
@@ -25,6 +25,7 @@ class DescriptionHelper:
|
|
|
25
25
|
DescriptionHelper can convert a list of descriptors into either HTML or plaintext and do something reasonable
|
|
26
26
|
in each case.
|
|
27
27
|
"""
|
|
28
|
+
|
|
28
29
|
__descriptors: list[_Descriptor]
|
|
29
30
|
|
|
30
31
|
def __init__(self) -> None:
|
|
@@ -69,18 +70,17 @@ class DescriptionHelper:
|
|
|
69
70
|
return (
|
|
70
71
|
# Render the string as a single-cell DataFrame. This will ensure a consistent style of output in
|
|
71
72
|
# cases where strings appear alongside DataFrames in the same DescriptionHelper.
|
|
72
|
-
pd.DataFrame([descriptor.body])
|
|
73
|
-
.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
|
|
74
|
-
.hide(axis='index')
|
|
73
|
+
pd.DataFrame([descriptor.body])
|
|
74
|
+
.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
|
|
75
|
+
.hide(axis='index')
|
|
76
|
+
.hide(axis='columns')
|
|
75
77
|
)
|
|
76
78
|
else:
|
|
77
79
|
styler = descriptor.styler
|
|
78
80
|
if styler is None:
|
|
79
81
|
styler = descriptor.body.style
|
|
80
|
-
styler = (
|
|
81
|
-
|
|
82
|
-
.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
|
|
83
|
-
.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
|
|
82
|
+
styler = styler.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'}).set_table_styles(
|
|
83
|
+
[dict(selector='th', props=[('text-align', 'left')])]
|
|
84
84
|
)
|
|
85
85
|
if not descriptor.show_header:
|
|
86
86
|
styler = styler.hide(axis='columns')
|
pixeltable/utils/documents.py
CHANGED
|
@@ -83,6 +83,7 @@ def get_xml_handle(path: str) -> Optional[bs4.BeautifulSoup]:
|
|
|
83
83
|
def get_markdown_handle(path: str) -> Optional[dict]:
|
|
84
84
|
Env.get().require_package('mistune', [3, 0])
|
|
85
85
|
import mistune
|
|
86
|
+
|
|
86
87
|
try:
|
|
87
88
|
with open(path, encoding='utf8') as file:
|
|
88
89
|
text = file.read()
|
|
@@ -91,9 +92,10 @@ def get_markdown_handle(path: str) -> Optional[dict]:
|
|
|
91
92
|
except Exception:
|
|
92
93
|
return None
|
|
93
94
|
|
|
95
|
+
|
|
94
96
|
def get_txt(path: str) -> Optional[str]:
|
|
95
97
|
try:
|
|
96
|
-
with open(path,
|
|
98
|
+
with open(path, 'r') as f:
|
|
97
99
|
doc = f.read()
|
|
98
100
|
return doc if doc != '' else None
|
|
99
101
|
except Exception:
|