pixeltable 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/insertable_table.py +9 -7
- pixeltable/catalog/table.py +18 -5
- pixeltable/catalog/table_version.py +1 -1
- pixeltable/catalog/view.py +1 -1
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +140 -40
- pixeltable/exceptions.py +12 -5
- pixeltable/exec/component_iteration_node.py +63 -42
- pixeltable/exprs/__init__.py +1 -2
- pixeltable/exprs/expr.py +5 -6
- pixeltable/exprs/function_call.py +8 -10
- pixeltable/exprs/inline_expr.py +200 -0
- pixeltable/exprs/json_path.py +3 -6
- pixeltable/ext/functions/whisperx.py +2 -0
- pixeltable/ext/functions/yolox.py +5 -3
- pixeltable/functions/huggingface.py +89 -12
- pixeltable/functions/image.py +3 -3
- pixeltable/functions/together.py +37 -16
- pixeltable/functions/vision.py +43 -21
- pixeltable/functions/whisper.py +3 -0
- pixeltable/globals.py +7 -1
- pixeltable/io/globals.py +1 -1
- pixeltable/io/hf_datasets.py +3 -3
- pixeltable/iterators/document.py +1 -1
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +1 -1
- pixeltable/metadata/converters/convert_20.py +56 -0
- pixeltable/metadata/converters/util.py +29 -4
- pixeltable/metadata/notes.py +1 -0
- pixeltable/tool/create_test_db_dump.py +15 -4
- pixeltable/type_system.py +3 -1
- pixeltable/utils/filecache.py +126 -79
- pixeltable-0.2.20.dist-info/LICENSE +201 -0
- {pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/METADATA +16 -6
- {pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/RECORD +39 -39
- pixeltable/exprs/inline_array.py +0 -117
- pixeltable/exprs/inline_dict.py +0 -104
- pixeltable-0.2.18.dist-info/LICENSE +0 -18
- {pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.18.dist-info → pixeltable-0.2.20.dist-info}/entry_points.txt +0 -0
pixeltable/functions/vision.py
CHANGED
|
@@ -19,12 +19,9 @@ from typing import Any, Optional, Union
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import PIL.Image
|
|
21
21
|
|
|
22
|
-
import pixeltable
|
|
23
|
-
import pixeltable.type_system as ts
|
|
22
|
+
import pixeltable as pxt
|
|
24
23
|
from pixeltable.utils.code import local_public_names
|
|
25
24
|
|
|
26
|
-
# TODO: figure out a better submodule structure
|
|
27
|
-
|
|
28
25
|
|
|
29
26
|
# the following function has been adapted from MMEval
|
|
30
27
|
# (sources at https://github.com/open-mmlab/mmeval)
|
|
@@ -161,25 +158,41 @@ def __calculate_image_tpfp(
|
|
|
161
158
|
return tp, fp
|
|
162
159
|
|
|
163
160
|
|
|
164
|
-
@
|
|
165
|
-
return_type=ts.JsonType(nullable=False),
|
|
166
|
-
param_types=[
|
|
167
|
-
ts.JsonType(nullable=False),
|
|
168
|
-
ts.JsonType(nullable=False),
|
|
169
|
-
ts.JsonType(nullable=False),
|
|
170
|
-
ts.JsonType(nullable=False),
|
|
171
|
-
ts.JsonType(nullable=False),
|
|
172
|
-
],
|
|
173
|
-
)
|
|
161
|
+
@pxt.udf
|
|
174
162
|
def eval_detections(
|
|
175
163
|
pred_bboxes: list[list[int]],
|
|
176
164
|
pred_labels: list[int],
|
|
177
165
|
pred_scores: list[float],
|
|
178
166
|
gt_bboxes: list[list[int]],
|
|
179
167
|
gt_labels: list[int],
|
|
168
|
+
min_iou: float = 0.5,
|
|
180
169
|
) -> list[dict]:
|
|
181
170
|
"""
|
|
182
171
|
Evaluates the performance of a set of predicted bounding boxes against a set of ground truth bounding boxes.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
pred_bboxes: List of predicted bounding boxes, each represented as [xmin, ymin, xmax, ymax].
|
|
175
|
+
pred_labels: List of predicted labels.
|
|
176
|
+
pred_scores: List of predicted scores.
|
|
177
|
+
gt_bboxes: List of ground truth bounding boxes, each represented as [xmin, ymin, xmax, ymax].
|
|
178
|
+
gt_labels: List of ground truth labels.
|
|
179
|
+
min_iou: Minimum intersection-over-union (IoU) threshold for a predicted bounding box to be
|
|
180
|
+
considered a true positive.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
A list of dictionaries, one per label class, with the following structure:
|
|
184
|
+
```python
|
|
185
|
+
{
|
|
186
|
+
'min_iou': float, # The value of `min_iou` used for the detections
|
|
187
|
+
'class': int, # The label class
|
|
188
|
+
'tp': list[int], # List of 1's and 0's indicating true positives for each
|
|
189
|
+
# predicted bounding box of this class
|
|
190
|
+
'fp': list[int], # List of 1's and 0's indicating false positives for each
|
|
191
|
+
# predicted bounding box of this class; `fp[n] == 1 - tp[n]`
|
|
192
|
+
'scores': list[float], # List of predicted scores for each bounding box of this class
|
|
193
|
+
'num_gts': int, # Number of ground truth bounding boxes of this class
|
|
194
|
+
}
|
|
195
|
+
```
|
|
183
196
|
"""
|
|
184
197
|
class_idxs = list(set(pred_labels + gt_labels))
|
|
185
198
|
result: list[dict] = []
|
|
@@ -192,11 +205,11 @@ def eval_detections(
|
|
|
192
205
|
pred_filter = pred_classes_arr == class_idx
|
|
193
206
|
gt_filter = gt_classes_arr == class_idx
|
|
194
207
|
class_pred_scores = pred_scores_arr[pred_filter]
|
|
195
|
-
tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter],
|
|
208
|
+
tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou)
|
|
196
209
|
ordered_class_pred_scores = -np.sort(-class_pred_scores)
|
|
197
210
|
result.append(
|
|
198
211
|
{
|
|
199
|
-
'min_iou':
|
|
212
|
+
'min_iou': min_iou,
|
|
200
213
|
'class': class_idx,
|
|
201
214
|
'tp': tp.tolist(),
|
|
202
215
|
'fp': fp.tolist(),
|
|
@@ -207,11 +220,20 @@ def eval_detections(
|
|
|
207
220
|
return result
|
|
208
221
|
|
|
209
222
|
|
|
210
|
-
@
|
|
211
|
-
class mean_ap(
|
|
223
|
+
@pxt.uda(update_types=[pxt.JsonType()], value_type=pxt.JsonType(), allows_std_agg=True, allows_window=False)
|
|
224
|
+
class mean_ap(pxt.Aggregator):
|
|
212
225
|
"""
|
|
213
226
|
Calculates the mean average precision (mAP) over
|
|
214
227
|
[`eval_detections()`][pixeltable.functions.vision.eval_detections] results.
|
|
228
|
+
|
|
229
|
+
__Parameters:__
|
|
230
|
+
|
|
231
|
+
- `eval_dicts` (list[dict]): List of dictionaries as returned by
|
|
232
|
+
[`eval_detections()`][pixeltable.functions.vision.eval_detections].
|
|
233
|
+
|
|
234
|
+
__Returns:__
|
|
235
|
+
|
|
236
|
+
- A `dict[int, float]` mapping each label class to an average precision (AP) value for that class.
|
|
215
237
|
"""
|
|
216
238
|
def __init__(self):
|
|
217
239
|
self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
|
|
@@ -246,7 +268,7 @@ class mean_ap(func.Aggregator):
|
|
|
246
268
|
return result
|
|
247
269
|
|
|
248
270
|
|
|
249
|
-
def
|
|
271
|
+
def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
|
|
250
272
|
"""
|
|
251
273
|
Create random colors for labels such that a particular label always gets the same color.
|
|
252
274
|
|
|
@@ -265,7 +287,7 @@ def _create_label_colors(labels: list[Any]) -> dict[Any, str]:
|
|
|
265
287
|
return result
|
|
266
288
|
|
|
267
289
|
|
|
268
|
-
@
|
|
290
|
+
@pxt.udf
|
|
269
291
|
def draw_bounding_boxes(
|
|
270
292
|
img: PIL.Image.Image,
|
|
271
293
|
boxes: list[list[int]],
|
|
@@ -324,7 +346,7 @@ def draw_bounding_boxes(
|
|
|
324
346
|
if color is not None:
|
|
325
347
|
box_colors = [color] * num_boxes
|
|
326
348
|
else:
|
|
327
|
-
label_colors =
|
|
349
|
+
label_colors = __create_label_colors(labels)
|
|
328
350
|
box_colors = [label_colors[label] for label in labels]
|
|
329
351
|
|
|
330
352
|
from PIL import ImageColor, ImageDraw, ImageFont
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -9,6 +9,7 @@ first `pip install openai-whisper`.
|
|
|
9
9
|
from typing import TYPE_CHECKING, Optional
|
|
10
10
|
|
|
11
11
|
import pixeltable as pxt
|
|
12
|
+
from pixeltable.env import Env
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from whisper import Whisper # type: ignore[import-untyped]
|
|
@@ -71,6 +72,8 @@ def transcribe(
|
|
|
71
72
|
|
|
72
73
|
>>> tbl['result'] = transcribe(tbl.audio, model='base.en')
|
|
73
74
|
"""
|
|
75
|
+
Env.get().require_package('whisper')
|
|
76
|
+
Env.get().require_package('torch')
|
|
74
77
|
import torch
|
|
75
78
|
|
|
76
79
|
if decode_options is None:
|
pixeltable/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Optional, Union
|
|
3
|
+
from typing import Any, Iterable, Optional, Union
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
@@ -16,6 +16,7 @@ from pixeltable.dataframe import DataFrameResultSet
|
|
|
16
16
|
from pixeltable.env import Env
|
|
17
17
|
from pixeltable.iterators import ComponentIterator
|
|
18
18
|
from pixeltable.metadata import schema
|
|
19
|
+
from pixeltable.utils.filecache import FileCache
|
|
19
20
|
|
|
20
21
|
_logger = logging.getLogger('pixeltable')
|
|
21
22
|
|
|
@@ -193,6 +194,7 @@ def create_view(
|
|
|
193
194
|
)
|
|
194
195
|
Catalog.get().paths[path] = view
|
|
195
196
|
_logger.info(f'Created view `{path_str}`.')
|
|
197
|
+
FileCache.get().emit_eviction_warnings()
|
|
196
198
|
return view
|
|
197
199
|
|
|
198
200
|
|
|
@@ -487,3 +489,7 @@ def configure_logging(
|
|
|
487
489
|
remove: comma-separated list of module names
|
|
488
490
|
"""
|
|
489
491
|
return Env.get().configure_logging(to_stdout=to_stdout, level=level, add=add, remove=remove)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def array(elements: Iterable) -> exprs.Expr:
|
|
495
|
+
return exprs.InlineArray(elements)
|
pixeltable/io/globals.py
CHANGED
|
@@ -43,7 +43,7 @@ def create_label_studio_project(
|
|
|
43
43
|
The API key and URL for a valid Label Studio server must be specified in Pixeltable config. Either:
|
|
44
44
|
|
|
45
45
|
* Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
|
|
46
|
-
* Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.
|
|
46
|
+
* Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.toml`.
|
|
47
47
|
|
|
48
48
|
__Requirements:__
|
|
49
49
|
|
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -34,9 +34,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def _to_pixeltable_type(
|
|
38
|
-
feature_type: Union[datasets.ClassLabel, datasets.Value, datasets.Sequence],
|
|
39
|
-
) -> Optional[ts.ColumnType]:
|
|
37
|
+
def _to_pixeltable_type(feature_type: Any) -> Optional[ts.ColumnType]:
|
|
40
38
|
"""Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
|
|
41
39
|
import datasets
|
|
42
40
|
|
|
@@ -51,6 +49,8 @@ def _to_pixeltable_type(
|
|
|
51
49
|
dtype = _to_pixeltable_type(feature_type.feature)
|
|
52
50
|
length = feature_type.length if feature_type.length != -1 else None
|
|
53
51
|
return ts.ArrayType(shape=(length,), dtype=dtype)
|
|
52
|
+
elif isinstance(feature_type, datasets.Image):
|
|
53
|
+
return ts.ImageType(nullable=True)
|
|
54
54
|
else:
|
|
55
55
|
return None
|
|
56
56
|
|
pixeltable/iterators/document.py
CHANGED
|
@@ -166,7 +166,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
166
166
|
return {
|
|
167
167
|
'document': DocumentType(nullable=False),
|
|
168
168
|
'separators': StringType(nullable=False),
|
|
169
|
-
'metadata': StringType(nullable=
|
|
169
|
+
'metadata': StringType(nullable=False),
|
|
170
170
|
'limit': IntType(nullable=True),
|
|
171
171
|
'overlap': IntType(nullable=True),
|
|
172
172
|
'skip_tags': StringType(nullable=True),
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from .schema import SystemInfo, SystemInfoMd
|
|
11
11
|
|
|
12
12
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
13
|
-
VERSION =
|
|
13
|
+
VERSION = 21
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -13,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def __substitute_md(k:
|
|
16
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
17
17
|
# Migrate a few changed function names
|
|
18
18
|
if k == 'path' and v == 'pixeltable.functions.string.str_format':
|
|
19
19
|
return 'path', 'pixeltable.functions.string.format'
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=20)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(
|
|
12
|
+
engine,
|
|
13
|
+
substitution_fn=__substitute_md
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
18
|
+
if isinstance(v, dict) and '_classname' in v:
|
|
19
|
+
# The way InlineArray is represented changed in v20. Previously, literal values were stored
|
|
20
|
+
# directly in the Inline expr; now we store them in Literal sub-exprs. This converter
|
|
21
|
+
# constructs new Literal exprs for the literal values in InlineArray, interleaving them
|
|
22
|
+
# with non-literal exprs into the correct sequence.
|
|
23
|
+
if v['_classname'] == 'InlineArray':
|
|
24
|
+
components = v.get('components') # Might be None, but that's ok
|
|
25
|
+
updated_components = []
|
|
26
|
+
for idx, val in v['elements']:
|
|
27
|
+
# idx >= 0, then this is a non-literal sub-expr. Otherwise, idx could be either
|
|
28
|
+
# None or -1, for legacy reasons (which are now obviated).
|
|
29
|
+
if idx is not None and idx >= 0:
|
|
30
|
+
updated_components.append(components[idx])
|
|
31
|
+
else:
|
|
32
|
+
updated_components.append({'val': val, '_classname': 'Literal'})
|
|
33
|
+
# InlineList was split out from InlineArray in v20. If is_json=True, then this is
|
|
34
|
+
# actually an InlineList. If is_json=False, then we assume it's an InlineArray for now,
|
|
35
|
+
# but it might actually be transformed into an InlineList when it is instantiated
|
|
36
|
+
# (unfortunately, there is no way to disambiguate at this stage; see comments in
|
|
37
|
+
# InlineArray._from_dict() for more details).
|
|
38
|
+
updated_v = {'_classname': 'InlineList' if v.get('is_json') else 'InlineArray'}
|
|
39
|
+
if len(updated_components) > 0:
|
|
40
|
+
updated_v['components'] = updated_components
|
|
41
|
+
return k, updated_v
|
|
42
|
+
if v['_classname'] == 'InlineDict':
|
|
43
|
+
components = v.get('components')
|
|
44
|
+
keys = []
|
|
45
|
+
updated_components = []
|
|
46
|
+
for key, idx, val in v['dict_items']:
|
|
47
|
+
keys.append(key)
|
|
48
|
+
if idx is not None and idx >= 0:
|
|
49
|
+
updated_components.append(components[idx])
|
|
50
|
+
else:
|
|
51
|
+
updated_components.append({'val': val, '_classname': 'Literal'})
|
|
52
|
+
updated_v = {'keys': keys, '_classname': 'InlineDict'}
|
|
53
|
+
if len(updated_components) > 0:
|
|
54
|
+
updated_v['components'] = updated_components
|
|
55
|
+
return k, updated_v
|
|
56
|
+
return None
|
|
@@ -14,8 +14,22 @@ def convert_table_md(
|
|
|
14
14
|
table_md_updater: Optional[Callable[[dict], None]] = None,
|
|
15
15
|
column_md_updater: Optional[Callable[[dict], None]] = None,
|
|
16
16
|
external_store_md_updater: Optional[Callable[[dict], None]] = None,
|
|
17
|
-
substitution_fn: Optional[Callable[[
|
|
17
|
+
substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None
|
|
18
18
|
) -> None:
|
|
19
|
+
"""
|
|
20
|
+
Converts table metadata based on the specified conversion functions.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
engine: The SQLAlchemy engine.
|
|
24
|
+
table_md_updater: A function that updates the table metadata in place.
|
|
25
|
+
column_md_updater: A function that updates the column metadata in place.
|
|
26
|
+
external_store_md_updater: A function that updates the external store metadata in place.
|
|
27
|
+
substitution_fn: A function that substitutes metadata values. If specified, all metadata will be traversed
|
|
28
|
+
recursively, and `substitution_fn` will be called once for each metadata entry. If the entry appears in
|
|
29
|
+
a dict as a `(k, v)` pair, then `substitution_fn(k, v)` will be called. If the entry appears in a list,
|
|
30
|
+
then `substitution_fn(None, v)` will be called. If `substitution_fn` returns a tuple `(k', v')`, then
|
|
31
|
+
the original entry will be replaced, and the traversal will continue with `v'`.
|
|
32
|
+
"""
|
|
19
33
|
with engine.begin() as conn:
|
|
20
34
|
for row in conn.execute(sql.select(Table)):
|
|
21
35
|
id = row[0]
|
|
@@ -49,18 +63,29 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
|
|
|
49
63
|
external_store_md_updater(store_md)
|
|
50
64
|
|
|
51
65
|
|
|
52
|
-
def __substitute_md_rec(
|
|
66
|
+
def __substitute_md_rec(
|
|
67
|
+
md: Any,
|
|
68
|
+
substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
|
|
69
|
+
) -> Any:
|
|
53
70
|
if isinstance(md, dict):
|
|
54
71
|
updated_md = {}
|
|
55
72
|
for k, v in md.items():
|
|
56
73
|
substitute = substitution_fn(k, v)
|
|
57
74
|
if substitute is not None:
|
|
58
75
|
updated_k, updated_v = substitute
|
|
59
|
-
updated_md[updated_k] = updated_v
|
|
76
|
+
updated_md[updated_k] = __substitute_md_rec(updated_v, substitution_fn)
|
|
60
77
|
else:
|
|
61
78
|
updated_md[k] = __substitute_md_rec(v, substitution_fn)
|
|
62
79
|
return updated_md
|
|
63
80
|
elif isinstance(md, list):
|
|
64
|
-
|
|
81
|
+
updated_md = []
|
|
82
|
+
for v in md:
|
|
83
|
+
substitute = substitution_fn(None, v)
|
|
84
|
+
if substitute is not None:
|
|
85
|
+
_, updated_v = substitute
|
|
86
|
+
updated_md.append(__substitute_md_rec(updated_v, substitution_fn))
|
|
87
|
+
else:
|
|
88
|
+
updated_md.append(__substitute_md_rec(v, substitution_fn))
|
|
89
|
+
return updated_md
|
|
65
90
|
else:
|
|
66
91
|
return md
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
21: 'Separate InlineArray and InlineList',
|
|
5
6
|
20: 'Store DB timestamps in UTC',
|
|
6
7
|
19: 'UDF renames; ImageMemberAccess removal',
|
|
7
8
|
18: 'Restructured index metadata',
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import pathlib
|
|
6
6
|
import subprocess
|
|
7
|
+
import sys
|
|
7
8
|
from typing import Any
|
|
8
9
|
from zoneinfo import ZoneInfo
|
|
9
10
|
|
|
@@ -24,12 +25,18 @@ _logger = logging.getLogger('pixeltable')
|
|
|
24
25
|
class Dumper:
|
|
25
26
|
|
|
26
27
|
def __init__(self, output_dir='target', db_name='pxtdump') -> None:
|
|
28
|
+
if sys.version_info >= (3, 10):
|
|
29
|
+
raise RuntimeError(
|
|
30
|
+
'This script must be run on Python 3.9. '
|
|
31
|
+
'DB dumps are incompatible across versions due to issues with pickling anonymous UDFs.'
|
|
32
|
+
)
|
|
33
|
+
|
|
27
34
|
self.output_dir = pathlib.Path(output_dir)
|
|
28
35
|
shared_home = pathlib.Path(os.environ.get('PIXELTABLE_HOME', '~/.pixeltable')).expanduser()
|
|
29
36
|
mock_home_dir = self.output_dir / '.pixeltable'
|
|
30
37
|
mock_home_dir.mkdir(parents=True, exist_ok=True)
|
|
31
38
|
os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
|
|
32
|
-
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.
|
|
39
|
+
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.toml')
|
|
33
40
|
os.environ['PIXELTABLE_DB'] = db_name
|
|
34
41
|
os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
|
|
35
42
|
|
|
@@ -226,9 +233,13 @@ class Dumper:
|
|
|
226
233
|
add_column('isin_2', t.c2.isin([1, 2, 3, 4, 5]))
|
|
227
234
|
add_column('isin_3', t.c2.isin(t.c6.f5))
|
|
228
235
|
|
|
229
|
-
# inline_array
|
|
230
|
-
add_column('inline_array_1', [[1, 2, 3], [4, 5, 6]])
|
|
231
|
-
add_column('inline_array_2', [['a', 'b', 'c'], ['d', 'e', 'f']])
|
|
236
|
+
# inline_array, inline_list, inline_dict
|
|
237
|
+
add_column('inline_array_1', pxt.array([[1, 2, 3], [4, 5, 6]]))
|
|
238
|
+
add_column('inline_array_2', pxt.array([['a', 'b', 'c'], ['d', 'e', 'f']]))
|
|
239
|
+
add_column('inline_array_exprs', pxt.array([[t.c2, t.c2 + 1], [t.c2 + 2, t.c2]]))
|
|
240
|
+
add_column('inline_array_mixed', pxt.array([[1, t.c2], [3, t.c2]]))
|
|
241
|
+
add_column('inline_list_1', [[1, 2, 3], [4, 5, 6]])
|
|
242
|
+
add_column('inline_list_2', [['a', 'b', 'c'], ['d', 'e', 'f']])
|
|
232
243
|
add_column('inline_list_exprs', [t.c1, [t.c1n, t.c2]])
|
|
233
244
|
add_column('inline_list_mixed', [1, 'a', t.c1, [1, 'a', t.c1n], 1, 'a'])
|
|
234
245
|
add_column('inline_dict', {'int': 22, 'dict': {'key': 'val'}, 'expr': t.c1})
|
pixeltable/type_system.py
CHANGED
|
@@ -204,6 +204,8 @@ class ColumnType:
|
|
|
204
204
|
|
|
205
205
|
@classmethod
|
|
206
206
|
def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
|
|
207
|
+
if val is None:
|
|
208
|
+
return InvalidType(nullable=True)
|
|
207
209
|
if isinstance(val, str):
|
|
208
210
|
return StringType(nullable=nullable)
|
|
209
211
|
if isinstance(val, bool):
|
|
@@ -395,7 +397,7 @@ class InvalidType(ColumnType):
|
|
|
395
397
|
assert False
|
|
396
398
|
|
|
397
399
|
def print_value(self, val: Any) -> str:
|
|
398
|
-
|
|
400
|
+
return str(val)
|
|
399
401
|
|
|
400
402
|
def _validate_literal(self, val: Any) -> None:
|
|
401
403
|
assert False
|