pixeltable 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +6 -3
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/globals.py +15 -6
- pixeltable/catalog/insertable_table.py +23 -8
- pixeltable/catalog/named_function.py +1 -1
- pixeltable/catalog/path_dict.py +4 -4
- pixeltable/catalog/schema_object.py +30 -18
- pixeltable/catalog/table.py +87 -104
- pixeltable/catalog/table_version.py +35 -24
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +15 -8
- pixeltable/dataframe.py +56 -56
- pixeltable/env.py +10 -9
- pixeltable/exec/__init__.py +3 -3
- pixeltable/exec/aggregation_node.py +3 -3
- pixeltable/exec/expr_eval_node.py +3 -3
- pixeltable/exec/in_memory_data_node.py +4 -4
- pixeltable/exec/sql_node.py +4 -1
- pixeltable/exprs/arithmetic_expr.py +41 -16
- pixeltable/exprs/array_slice.py +3 -4
- pixeltable/exprs/column_ref.py +20 -4
- pixeltable/exprs/comparison.py +11 -6
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/expr.py +88 -23
- pixeltable/exprs/function_call.py +12 -1
- pixeltable/exprs/globals.py +3 -1
- pixeltable/exprs/inline_array.py +4 -4
- pixeltable/exprs/json_path.py +36 -20
- pixeltable/exprs/row_builder.py +4 -4
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/functions/__init__.py +1 -2
- pixeltable/functions/audio.py +32 -0
- pixeltable/functions/huggingface.py +4 -4
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +46 -0
- pixeltable/functions/video.py +5 -1
- pixeltable/functions/{eval.py → vision.py} +166 -27
- pixeltable/globals.py +57 -28
- pixeltable/io/external_store.py +6 -6
- pixeltable/io/globals.py +13 -14
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/pandas.py +60 -19
- pixeltable/io/parquet.py +14 -14
- pixeltable/iterators/document.py +7 -7
- pixeltable/iterators/video.py +55 -23
- pixeltable/plan.py +58 -29
- pixeltable/store.py +97 -59
- pixeltable/tool/create_test_db_dump.py +17 -11
- pixeltable/type_system.py +155 -143
- pixeltable/utils/pytorch.py +12 -10
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/METADATA +10 -10
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/RECORD +56 -54
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.14.dist-info → pixeltable-0.2.16.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,29 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for Computer Vision.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
from pixeltable.functions import vision as pxtv
|
|
8
|
+
|
|
9
|
+
t = pxt.get_table(...)
|
|
10
|
+
t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect()
|
|
11
|
+
```
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import colorsys
|
|
15
|
+
import hashlib
|
|
16
|
+
import random
|
|
2
17
|
from collections import defaultdict
|
|
3
|
-
import
|
|
18
|
+
from typing import Optional, Union, Any
|
|
4
19
|
|
|
20
|
+
import PIL.Image
|
|
21
|
+
import PIL.Image
|
|
5
22
|
import numpy as np
|
|
6
23
|
|
|
7
|
-
import pixeltable.type_system as ts
|
|
8
24
|
import pixeltable.func as func
|
|
25
|
+
import pixeltable.type_system as ts
|
|
26
|
+
from pixeltable.utils.code import local_public_names
|
|
9
27
|
|
|
10
28
|
|
|
11
29
|
# TODO: figure out a better submodule structure
|
|
@@ -14,7 +32,7 @@ import pixeltable.func as func
|
|
|
14
32
|
# the following function has been adapted from MMEval
|
|
15
33
|
# (sources at https://github.com/open-mmlab/mmeval)
|
|
16
34
|
# Copyright (c) OpenMMLab. All rights reserved.
|
|
17
|
-
def
|
|
35
|
+
def __calculate_bboxes_area(bboxes: np.ndarray) -> np.ndarray:
|
|
18
36
|
"""Calculate area of bounding boxes.
|
|
19
37
|
|
|
20
38
|
Args:
|
|
@@ -31,7 +49,7 @@ def calculate_bboxes_area(bboxes: np.ndarray) -> np.ndarray:
|
|
|
31
49
|
# the following function has been adapted from MMEval
|
|
32
50
|
# (sources at https://github.com/open-mmlab/mmeval)
|
|
33
51
|
# Copyright (c) OpenMMLab. All rights reserved.
|
|
34
|
-
def
|
|
52
|
+
def __calculate_overlaps(bboxes1: np.ndarray, bboxes2: np.ndarray) -> np.ndarray:
|
|
35
53
|
"""Calculate the overlap between each bbox of bboxes1 and bboxes2.
|
|
36
54
|
|
|
37
55
|
Args:
|
|
@@ -58,8 +76,8 @@ def calculate_overlaps(bboxes1: np.ndarray, bboxes2: np.ndarray) -> np.ndarray:
|
|
|
58
76
|
exchange = False
|
|
59
77
|
|
|
60
78
|
# Calculate the bboxes area.
|
|
61
|
-
area1 =
|
|
62
|
-
area2 =
|
|
79
|
+
area1 = __calculate_bboxes_area(bboxes1)
|
|
80
|
+
area2 = __calculate_bboxes_area(bboxes2)
|
|
63
81
|
eps = np.finfo(np.float32).eps
|
|
64
82
|
|
|
65
83
|
for i in range(bboxes1.shape[0]):
|
|
@@ -80,9 +98,9 @@ def calculate_overlaps(bboxes1: np.ndarray, bboxes2: np.ndarray) -> np.ndarray:
|
|
|
80
98
|
# the following function has been adapted from MMEval
|
|
81
99
|
# (sources at https://github.com/open-mmlab/mmeval)
|
|
82
100
|
# Copyright (c) OpenMMLab. All rights reserved.
|
|
83
|
-
def
|
|
101
|
+
def __calculate_image_tpfp(
|
|
84
102
|
pred_bboxes: np.ndarray, pred_scores: np.ndarray, gt_bboxes: np.ndarray, min_iou: float
|
|
85
|
-
) ->
|
|
103
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
86
104
|
"""Calculate the true positive and false positive on an image.
|
|
87
105
|
|
|
88
106
|
Args:
|
|
@@ -95,11 +113,8 @@ def calculate_image_tpfp(
|
|
|
95
113
|
|
|
96
114
|
Returns:
|
|
97
115
|
tuple (tp, fp):
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
the true positive flag of each predicted bbox on this image.
|
|
101
|
-
- fp (numpy.ndarray): Shape (N,),
|
|
102
|
-
the false positive flag of each predicted bbox on this image.
|
|
116
|
+
tp: Shape (N,), the true positive flag of each predicted bbox on this image.
|
|
117
|
+
fp: Shape (N,), the false positive flag of each predicted bbox on this image.
|
|
103
118
|
"""
|
|
104
119
|
# Step 1. Concatenate `gt_bboxes` and `ignore_gt_bboxes`, then set
|
|
105
120
|
# the `ignore_gt_flags`.
|
|
@@ -121,7 +136,7 @@ def calculate_image_tpfp(
|
|
|
121
136
|
|
|
122
137
|
# Step 4. Calculate the IoUs between the predicted bboxes and the
|
|
123
138
|
# ground truth bboxes.
|
|
124
|
-
ious =
|
|
139
|
+
ious = __calculate_overlaps(pred_bboxes, gt_bboxes)
|
|
125
140
|
# For each pred bbox, the max iou with all gts.
|
|
126
141
|
ious_max = ious.max(axis=1)
|
|
127
142
|
# For each pred bbox, which gt overlaps most with it.
|
|
@@ -160,14 +175,17 @@ def calculate_image_tpfp(
|
|
|
160
175
|
],
|
|
161
176
|
)
|
|
162
177
|
def eval_detections(
|
|
163
|
-
pred_bboxes:
|
|
164
|
-
pred_labels:
|
|
165
|
-
pred_scores:
|
|
166
|
-
gt_bboxes:
|
|
167
|
-
gt_labels:
|
|
168
|
-
) ->
|
|
178
|
+
pred_bboxes: list[list[int]],
|
|
179
|
+
pred_labels: list[int],
|
|
180
|
+
pred_scores: list[float],
|
|
181
|
+
gt_bboxes: list[list[int]],
|
|
182
|
+
gt_labels: list[int],
|
|
183
|
+
) -> dict:
|
|
184
|
+
"""
|
|
185
|
+
Evaluates the performance of a set of predicted bounding boxes against a set of ground truth bounding boxes.
|
|
186
|
+
"""
|
|
169
187
|
class_idxs = list(set(pred_labels + gt_labels))
|
|
170
|
-
result:
|
|
188
|
+
result: list[dict] = []
|
|
171
189
|
pred_bboxes_arr = np.asarray(pred_bboxes)
|
|
172
190
|
pred_classes_arr = np.asarray(pred_labels)
|
|
173
191
|
pred_scores_arr = np.asarray(pred_scores)
|
|
@@ -177,7 +195,7 @@ def eval_detections(
|
|
|
177
195
|
pred_filter = pred_classes_arr == class_idx
|
|
178
196
|
gt_filter = gt_classes_arr == class_idx
|
|
179
197
|
class_pred_scores = pred_scores_arr[pred_filter]
|
|
180
|
-
tp, fp =
|
|
198
|
+
tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], [0.5])
|
|
181
199
|
ordered_class_pred_scores = -np.sort(-class_pred_scores)
|
|
182
200
|
result.append(
|
|
183
201
|
{
|
|
@@ -194,17 +212,21 @@ def eval_detections(
|
|
|
194
212
|
|
|
195
213
|
@func.uda(update_types=[ts.JsonType()], value_type=ts.JsonType(), allows_std_agg=True, allows_window=False)
|
|
196
214
|
class mean_ap(func.Aggregator):
|
|
215
|
+
"""
|
|
216
|
+
Calculates the mean average precision (mAP) over
|
|
217
|
+
[`eval_detections()`][pixeltable.functions.vision.eval_detections] results.
|
|
218
|
+
"""
|
|
197
219
|
def __init__(self):
|
|
198
|
-
self.class_tpfp:
|
|
220
|
+
self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
|
|
199
221
|
|
|
200
|
-
def update(self, eval_dicts:
|
|
222
|
+
def update(self, eval_dicts: list[dict]) -> None:
|
|
201
223
|
for eval_dict in eval_dicts:
|
|
202
224
|
class_idx = eval_dict['class']
|
|
203
225
|
self.class_tpfp[class_idx].append(eval_dict)
|
|
204
226
|
|
|
205
|
-
def value(self) ->
|
|
227
|
+
def value(self) -> dict:
|
|
206
228
|
eps = np.finfo(np.float32).eps
|
|
207
|
-
result:
|
|
229
|
+
result: dict[int, float] = {}
|
|
208
230
|
for class_idx, tpfp in self.class_tpfp.items():
|
|
209
231
|
a1 = [x['tp'] for x in tpfp]
|
|
210
232
|
tp = np.concatenate([x['tp'] for x in tpfp], axis=0)
|
|
@@ -225,3 +247,120 @@ class mean_ap(func.Aggregator):
|
|
|
225
247
|
ap = np.sum((mrec[ind + 1] - mrec[ind]) * mpre[ind + 1])
|
|
226
248
|
result[class_idx] = ap.item()
|
|
227
249
|
return result
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _create_label_colors(labels: list[Any]) -> dict[Any, str]:
|
|
253
|
+
"""
|
|
254
|
+
Create random colors for labels such that a particular label always gets the same color.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
dict mapping labels to colors
|
|
258
|
+
"""
|
|
259
|
+
distinct_labels = set(labels)
|
|
260
|
+
result: dict[Any, str] = {}
|
|
261
|
+
for label in distinct_labels:
|
|
262
|
+
# consistent hash for the label
|
|
263
|
+
label_hash = int(hashlib.md5(str(label).encode()).hexdigest(), 16)
|
|
264
|
+
hue = (label_hash % 360) / 360.0
|
|
265
|
+
rgb = colorsys.hsv_to_rgb(hue, 0.7, 0.95)
|
|
266
|
+
hex_color = '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
|
|
267
|
+
result[label] = hex_color
|
|
268
|
+
return result
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@func.udf
|
|
272
|
+
def draw_bounding_boxes(
|
|
273
|
+
img: PIL.Image.Image,
|
|
274
|
+
boxes: list[list[int]],
|
|
275
|
+
labels: Optional[list[Any]] = None,
|
|
276
|
+
color: Optional[str] = None,
|
|
277
|
+
box_colors: Optional[list[str]] = None,
|
|
278
|
+
fill: bool = False,
|
|
279
|
+
width: int = 1,
|
|
280
|
+
font: Optional[str] = None,
|
|
281
|
+
font_size: Optional[int] = None,
|
|
282
|
+
) -> PIL.Image.Image:
|
|
283
|
+
"""
|
|
284
|
+
Draws bounding boxes on the given image.
|
|
285
|
+
|
|
286
|
+
Labels can be any type that supports `str()` and is hashable (e.g., strings, ints, etc.).
|
|
287
|
+
|
|
288
|
+
Colors can be specified as common HTML color names (e.g., 'red') supported by PIL's
|
|
289
|
+
[`ImageColor`](https://pillow.readthedocs.io/en/stable/reference/ImageColor.html#imagecolor-module) module or as
|
|
290
|
+
RGB hex codes (e.g., '#FF0000').
|
|
291
|
+
|
|
292
|
+
If no colors are specified, this function randomly assigns each label a specific color based on a hash of the label.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
img: The image on which to draw the bounding boxes.
|
|
296
|
+
boxes: List of bounding boxes, each represented as [xmin, ymin, xmax, ymax].
|
|
297
|
+
labels: List of labels for each bounding box.
|
|
298
|
+
color: Single color to be used for all bounding boxes and labels.
|
|
299
|
+
box_colors: List of colors, one per bounding box.
|
|
300
|
+
fill: Whether to fill the bounding boxes with color.
|
|
301
|
+
width: Width of the bounding box borders.
|
|
302
|
+
font: Name of a system font or path to a TrueType font file, as required by
|
|
303
|
+
[`PIL.ImageFont.truetype()`](https://pillow.readthedocs.io/en/stable/reference/ImageFont.html#PIL.ImageFont.truetype).
|
|
304
|
+
If `None`, uses the default provided by
|
|
305
|
+
[`PIL.ImageFont.load_default()`](https://pillow.readthedocs.io/en/stable/reference/ImageFont.html#PIL.ImageFont.load_default).
|
|
306
|
+
font_size: Size of the font used for labels in points. Only used in conjunction with non-`None` `font` argument.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
The image with bounding boxes drawn on it.
|
|
310
|
+
"""
|
|
311
|
+
color_params = sum([color is not None, box_colors is not None])
|
|
312
|
+
if color_params > 1:
|
|
313
|
+
raise ValueError("Only one of 'color' or 'box_colors' can be set")
|
|
314
|
+
|
|
315
|
+
# ensure the number of labels matches the number of boxes
|
|
316
|
+
num_boxes = len(boxes)
|
|
317
|
+
if labels is None:
|
|
318
|
+
labels = [None] * num_boxes
|
|
319
|
+
elif len(labels) != num_boxes:
|
|
320
|
+
raise ValueError('Number of boxes and labels must match')
|
|
321
|
+
|
|
322
|
+
DEFAULT_COLOR = 'white'
|
|
323
|
+
if box_colors is not None:
|
|
324
|
+
if len(box_colors) != num_boxes:
|
|
325
|
+
raise ValueError('Number of boxes and box colors must match')
|
|
326
|
+
else:
|
|
327
|
+
if color is not None:
|
|
328
|
+
box_colors = [color] * num_boxes
|
|
329
|
+
else:
|
|
330
|
+
label_colors = _create_label_colors(labels)
|
|
331
|
+
box_colors = [label_colors[label] for label in labels]
|
|
332
|
+
|
|
333
|
+
from PIL import ImageDraw, ImageFont, ImageColor
|
|
334
|
+
# set default font if not provided
|
|
335
|
+
if font is None:
|
|
336
|
+
txt_font = ImageFont.load_default()
|
|
337
|
+
else:
|
|
338
|
+
txt_font = ImageFont.truetype(font=font, size=font_size or 10)
|
|
339
|
+
|
|
340
|
+
img_to_draw = img.copy()
|
|
341
|
+
draw = ImageDraw.Draw(img_to_draw, 'RGBA' if fill else 'RGB')
|
|
342
|
+
|
|
343
|
+
for i, (bbox, label) in enumerate(zip(boxes, labels)):
|
|
344
|
+
# determine color for the current box and label
|
|
345
|
+
color = box_colors[i % len(box_colors)]
|
|
346
|
+
|
|
347
|
+
if fill:
|
|
348
|
+
rgb_color = ImageColor.getrgb(color)
|
|
349
|
+
fill_color = rgb_color + (100,) # semi-transparent
|
|
350
|
+
draw.rectangle(bbox, outline=color, width=width, fill=fill_color)
|
|
351
|
+
else:
|
|
352
|
+
draw.rectangle(bbox, outline=color, width=width)
|
|
353
|
+
|
|
354
|
+
if label is not None:
|
|
355
|
+
label_str = str(label)
|
|
356
|
+
margin = width + 1
|
|
357
|
+
draw.text((bbox[0] + margin, bbox[1] + margin), label_str, fill=color, font=txt_font)
|
|
358
|
+
|
|
359
|
+
return img_to_draw
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
__all__ = local_public_names(__name__)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def __dir__():
|
|
366
|
+
return __all__
|
pixeltable/globals.py
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
3
|
from typing import Any, Optional, Union
|
|
4
|
+
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
6
7
|
import sqlalchemy as sql
|
|
8
|
+
from pandas.io.formats.style import Styler
|
|
7
9
|
from sqlalchemy.util.preloaded import orm
|
|
8
10
|
|
|
9
11
|
import pixeltable.exceptions as excs
|
|
10
12
|
import pixeltable.exprs as exprs
|
|
11
|
-
from pixeltable import catalog, func
|
|
13
|
+
from pixeltable import DataFrame, catalog, func
|
|
12
14
|
from pixeltable.catalog import Catalog
|
|
15
|
+
from pixeltable.dataframe import DataFrameResultSet
|
|
13
16
|
from pixeltable.env import Env
|
|
14
17
|
from pixeltable.iterators import ComponentIterator
|
|
15
18
|
from pixeltable.metadata import schema
|
|
@@ -24,21 +27,25 @@ def init() -> None:
|
|
|
24
27
|
|
|
25
28
|
def create_table(
|
|
26
29
|
path_str: str,
|
|
27
|
-
|
|
30
|
+
schema_or_df: Union[dict[str, Any], DataFrame],
|
|
28
31
|
*,
|
|
29
32
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
30
33
|
num_retained_versions: int = 10,
|
|
31
34
|
comment: str = '',
|
|
32
|
-
) -> catalog.
|
|
33
|
-
"""Create a new
|
|
35
|
+
) -> catalog.Table:
|
|
36
|
+
"""Create a new base table.
|
|
34
37
|
|
|
35
38
|
Args:
|
|
36
39
|
path_str: Path to the table.
|
|
37
|
-
|
|
40
|
+
schema_or_df: Either a dictionary that maps column names to column types, or a
|
|
41
|
+
[`DataFrame`][pixeltable.DataFrame] whose contents and schema will be used to pre-populate the table.
|
|
42
|
+
primary_key: An optional column name or list of column names to use as the primary key(s) of the
|
|
43
|
+
table.
|
|
38
44
|
num_retained_versions: Number of versions of the table to retain.
|
|
45
|
+
comment: An optional comment; its meaning is user-defined.
|
|
39
46
|
|
|
40
47
|
Returns:
|
|
41
|
-
|
|
48
|
+
A handle to the newly created [`Table`][pixeltable.Table].
|
|
42
49
|
|
|
43
50
|
Raises:
|
|
44
51
|
Error: if the path already exists or is invalid.
|
|
@@ -46,12 +53,27 @@ def create_table(
|
|
|
46
53
|
Examples:
|
|
47
54
|
Create a table with an int and a string column:
|
|
48
55
|
|
|
49
|
-
>>> table =
|
|
56
|
+
>>> table = pxt.create_table('my_table', schema={'col1': IntType(), 'col2': StringType()})
|
|
57
|
+
|
|
58
|
+
Create a table from a select statement over an existing table `tbl`:
|
|
59
|
+
|
|
60
|
+
>>> table = pxt.create_table('my_table', tbl.where(tbl.col1 < 10).select(tbl.col2))
|
|
50
61
|
"""
|
|
51
62
|
path = catalog.Path(path_str)
|
|
52
63
|
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
53
64
|
dir = Catalog.get().paths[path.parent]
|
|
54
65
|
|
|
66
|
+
df: Optional[DataFrame] = None
|
|
67
|
+
if isinstance(schema_or_df, dict):
|
|
68
|
+
schema = schema_or_df
|
|
69
|
+
elif isinstance(schema_or_df, DataFrame):
|
|
70
|
+
df = schema_or_df
|
|
71
|
+
schema = df.schema
|
|
72
|
+
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
73
|
+
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)')
|
|
74
|
+
else:
|
|
75
|
+
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
76
|
+
|
|
55
77
|
if len(schema) == 0:
|
|
56
78
|
raise excs.Error(f'Table schema is empty: `{path_str}`')
|
|
57
79
|
|
|
@@ -63,15 +85,17 @@ def create_table(
|
|
|
63
85
|
if not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
64
86
|
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
65
87
|
|
|
66
|
-
tbl = catalog.InsertableTable.
|
|
88
|
+
tbl = catalog.InsertableTable._create(
|
|
67
89
|
dir._id,
|
|
68
90
|
path.name,
|
|
69
91
|
schema,
|
|
92
|
+
df,
|
|
70
93
|
primary_key=primary_key,
|
|
71
94
|
num_retained_versions=num_retained_versions,
|
|
72
95
|
comment=comment,
|
|
73
96
|
)
|
|
74
97
|
Catalog.get().paths[path] = tbl
|
|
98
|
+
|
|
75
99
|
_logger.info(f'Created table `{path_str}`.')
|
|
76
100
|
return tbl
|
|
77
101
|
|
|
@@ -87,25 +111,28 @@ def create_view(
|
|
|
87
111
|
num_retained_versions: int = 10,
|
|
88
112
|
comment: str = '',
|
|
89
113
|
ignore_errors: bool = False,
|
|
90
|
-
) -> catalog.
|
|
91
|
-
"""Create a
|
|
114
|
+
) -> Optional[catalog.Table]:
|
|
115
|
+
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
92
116
|
|
|
93
117
|
Args:
|
|
94
118
|
path_str: Path to the view.
|
|
95
|
-
base: Table (i.e., table or view or snapshot) or DataFrame to
|
|
119
|
+
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
|
|
120
|
+
base the view on.
|
|
96
121
|
schema: dictionary mapping column names to column types, value expressions, or to column specifications.
|
|
97
122
|
filter: predicate to filter rows of the base table.
|
|
98
123
|
is_snapshot: Whether the view is a snapshot.
|
|
99
124
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
100
125
|
the base table.
|
|
101
126
|
num_retained_versions: Number of versions of the view to retain.
|
|
127
|
+
comment: Optional comment for the view.
|
|
102
128
|
ignore_errors: if True, fail silently if the path already exists or is invalid.
|
|
103
129
|
|
|
104
130
|
Returns:
|
|
105
|
-
|
|
131
|
+
A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
|
|
132
|
+
exists or is invalid and `ignore_errors=True`, returns `None`.
|
|
106
133
|
|
|
107
134
|
Raises:
|
|
108
|
-
Error: if the path already exists or is invalid
|
|
135
|
+
Error: if the path already exists or is invalid and `ignore_errors=False`.
|
|
109
136
|
|
|
110
137
|
Examples:
|
|
111
138
|
Create a view with an additional int and a string column and a filter:
|
|
@@ -140,7 +167,7 @@ def create_view(
|
|
|
140
167
|
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
141
168
|
except Exception as e:
|
|
142
169
|
if ignore_errors:
|
|
143
|
-
return
|
|
170
|
+
return None
|
|
144
171
|
else:
|
|
145
172
|
raise e
|
|
146
173
|
dir = Catalog.get().paths[path.parent]
|
|
@@ -152,7 +179,7 @@ def create_view(
|
|
|
152
179
|
else:
|
|
153
180
|
iterator_class, iterator_args = iterator
|
|
154
181
|
|
|
155
|
-
view = catalog.View.
|
|
182
|
+
view = catalog.View._create(
|
|
156
183
|
dir._id,
|
|
157
184
|
path.name,
|
|
158
185
|
base=tbl_version_path,
|
|
@@ -170,16 +197,16 @@ def create_view(
|
|
|
170
197
|
|
|
171
198
|
|
|
172
199
|
def get_table(path: str) -> catalog.Table:
|
|
173
|
-
"""Get a handle to
|
|
200
|
+
"""Get a handle to an existing table or view or snapshot.
|
|
174
201
|
|
|
175
202
|
Args:
|
|
176
203
|
path: Path to the table.
|
|
177
204
|
|
|
178
205
|
Returns:
|
|
179
|
-
A
|
|
206
|
+
A handle to the [`Table`][pixeltable.Table].
|
|
180
207
|
|
|
181
208
|
Raises:
|
|
182
|
-
Error: If the path does not exist or does not designate a table.
|
|
209
|
+
Error: If the path does not exist or does not designate a table object.
|
|
183
210
|
|
|
184
211
|
Examples:
|
|
185
212
|
Get handle for a table in the top-level directory:
|
|
@@ -197,6 +224,7 @@ def get_table(path: str) -> catalog.Table:
|
|
|
197
224
|
p = catalog.Path(path)
|
|
198
225
|
Catalog.get().paths.check_is_valid(p, expected=catalog.Table)
|
|
199
226
|
obj = Catalog.get().paths[p]
|
|
227
|
+
assert isinstance(obj, catalog.Table)
|
|
200
228
|
return obj
|
|
201
229
|
|
|
202
230
|
|
|
@@ -230,15 +258,15 @@ def move(path: str, new_path: str) -> None:
|
|
|
230
258
|
|
|
231
259
|
|
|
232
260
|
def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
|
|
233
|
-
"""Drop a table.
|
|
261
|
+
"""Drop a table or view or snapshot.
|
|
234
262
|
|
|
235
263
|
Args:
|
|
236
|
-
path: Path to the
|
|
264
|
+
path: Path to the [`Table`][pixeltable.Table].
|
|
237
265
|
force: If `True`, will also drop all views or sub-views of this table.
|
|
238
266
|
ignore_errors: Whether to ignore errors if the table does not exist.
|
|
239
267
|
|
|
240
268
|
Raises:
|
|
241
|
-
Error: If the path does not exist or does not designate a table and ignore_errors is False.
|
|
269
|
+
Error: If the path does not exist or does not designate a table object and ignore_errors is False.
|
|
242
270
|
|
|
243
271
|
Examples:
|
|
244
272
|
>>> cl.drop_table('my_table')
|
|
@@ -256,7 +284,7 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
|
|
|
256
284
|
tbl = cat.paths[path_obj]
|
|
257
285
|
assert isinstance(tbl, catalog.Table)
|
|
258
286
|
if len(cat.tbl_dependents[tbl._id]) > 0:
|
|
259
|
-
dependent_paths = [dep.
|
|
287
|
+
dependent_paths = [dep._path for dep in cat.tbl_dependents[tbl._id]]
|
|
260
288
|
if force:
|
|
261
289
|
for dependent_path in dependent_paths:
|
|
262
290
|
drop_table(dependent_path, force=True)
|
|
@@ -268,14 +296,14 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
|
|
|
268
296
|
|
|
269
297
|
|
|
270
298
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
271
|
-
"""List the
|
|
299
|
+
"""List the [`Table`][pixeltable.Table]s in a directory.
|
|
272
300
|
|
|
273
301
|
Args:
|
|
274
302
|
dir_path: Path to the directory. Defaults to the root directory.
|
|
275
303
|
recursive: Whether to list tables in subdirectories as well.
|
|
276
304
|
|
|
277
305
|
Returns:
|
|
278
|
-
A list of
|
|
306
|
+
A list of [`Table`][pixeltable.Table] paths.
|
|
279
307
|
|
|
280
308
|
Raises:
|
|
281
309
|
Error: If the path does not exist or does not designate a directory.
|
|
@@ -297,7 +325,7 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
297
325
|
return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
|
|
298
326
|
|
|
299
327
|
|
|
300
|
-
def create_dir(path_str: str, ignore_errors: bool = False) -> catalog.Dir:
|
|
328
|
+
def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.Dir]:
|
|
301
329
|
"""Create a directory.
|
|
302
330
|
|
|
303
331
|
Args:
|
|
@@ -325,6 +353,7 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> catalog.Dir:
|
|
|
325
353
|
session.add(dir_record)
|
|
326
354
|
session.flush()
|
|
327
355
|
assert dir_record.id is not None
|
|
356
|
+
assert isinstance(dir_record.id, UUID)
|
|
328
357
|
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
329
358
|
Catalog.get().paths[path] = dir
|
|
330
359
|
session.commit()
|
|
@@ -333,7 +362,7 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> catalog.Dir:
|
|
|
333
362
|
return dir
|
|
334
363
|
except excs.Error as e:
|
|
335
364
|
if ignore_errors:
|
|
336
|
-
return
|
|
365
|
+
return None
|
|
337
366
|
else:
|
|
338
367
|
raise e
|
|
339
368
|
|
|
@@ -415,7 +444,7 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
415
444
|
return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Dir, recursive=recursive)]
|
|
416
445
|
|
|
417
446
|
|
|
418
|
-
def list_functions() ->
|
|
447
|
+
def list_functions() -> Styler:
|
|
419
448
|
"""Returns information about all registered functions.
|
|
420
449
|
|
|
421
450
|
Returns:
|
|
@@ -436,7 +465,7 @@ def list_functions() -> pd.DataFrame:
|
|
|
436
465
|
'Return Type': [str(f.signature.get_return_type()) for f in functions],
|
|
437
466
|
}
|
|
438
467
|
)
|
|
439
|
-
pd_df = pd_df.style.set_properties(**{'text-align': 'left'}).set_table_styles(
|
|
468
|
+
pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
|
|
440
469
|
[dict(selector='th', props=[('text-align', 'center')])]
|
|
441
470
|
) # center-align headings
|
|
442
471
|
return pd_df.hide(axis='index')
|
pixeltable/io/external_store.py
CHANGED
|
@@ -217,17 +217,17 @@ class Project(ExternalStore, abc.ABC):
|
|
|
217
217
|
resolved_col_mapping: dict[Column, str] = {}
|
|
218
218
|
|
|
219
219
|
# Validate names
|
|
220
|
-
t_cols = table.
|
|
220
|
+
t_cols = set(table._schema.keys())
|
|
221
221
|
for t_col, ext_col in col_mapping.items():
|
|
222
222
|
if t_col not in t_cols:
|
|
223
223
|
if is_user_specified_col_mapping:
|
|
224
224
|
raise excs.Error(
|
|
225
|
-
f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table.
|
|
225
|
+
f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table._name}` '
|
|
226
226
|
'contains no such column.'
|
|
227
227
|
)
|
|
228
228
|
else:
|
|
229
229
|
raise excs.Error(
|
|
230
|
-
f'Column `{t_col}` does not exist in Table `{table.
|
|
230
|
+
f'Column `{t_col}` does not exist in Table `{table._name}`. Either add a column `{t_col}`, '
|
|
231
231
|
f'or specify a `col_mapping` to associate a different column with the external field `{ext_col}`.'
|
|
232
232
|
)
|
|
233
233
|
if ext_col not in export_cols and ext_col not in import_cols:
|
|
@@ -238,13 +238,13 @@ class Project(ExternalStore, abc.ABC):
|
|
|
238
238
|
col = table[t_col].col
|
|
239
239
|
resolved_col_mapping[col] = ext_col
|
|
240
240
|
# Validate column specs
|
|
241
|
-
t_col_types = table.
|
|
241
|
+
t_col_types = table._schema
|
|
242
242
|
for t_col, ext_col in col_mapping.items():
|
|
243
243
|
t_col_type = t_col_types[t_col]
|
|
244
244
|
if ext_col in export_cols:
|
|
245
245
|
# Validate that the table column can be assigned to the external column
|
|
246
246
|
ext_col_type = export_cols[ext_col]
|
|
247
|
-
if not ext_col_type.is_supertype_of(t_col_type):
|
|
247
|
+
if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
|
|
248
248
|
raise excs.Error(
|
|
249
249
|
f'Column `{t_col}` cannot be exported to external column `{ext_col}` (incompatible types; expecting `{ext_col_type}`)'
|
|
250
250
|
)
|
|
@@ -255,7 +255,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
255
255
|
f'Column `{t_col}` is a computed column, which cannot be populated from an external column'
|
|
256
256
|
)
|
|
257
257
|
ext_col_type = import_cols[ext_col]
|
|
258
|
-
if not t_col_type.is_supertype_of(ext_col_type):
|
|
258
|
+
if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
|
|
259
259
|
raise excs.Error(
|
|
260
260
|
f'Column `{t_col}` cannot be imported from external column `{ext_col}` (incompatible types; expecting `{ext_col_type}`)'
|
|
261
261
|
)
|