pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +509 -103
- pixeltable/catalog/column.py +5 -0
- pixeltable/catalog/dir.py +15 -6
- pixeltable/catalog/globals.py +16 -0
- pixeltable/catalog/insertable_table.py +82 -41
- pixeltable/catalog/path.py +15 -0
- pixeltable/catalog/schema_object.py +7 -12
- pixeltable/catalog/table.py +81 -67
- pixeltable/catalog/table_version.py +23 -7
- pixeltable/catalog/view.py +9 -6
- pixeltable/env.py +15 -9
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exprs/__init__.py +2 -1
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_ref.py +38 -2
- pixeltable/exprs/expr.py +61 -12
- pixeltable/exprs/function_call.py +1 -4
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +4 -4
- pixeltable/exprs/json_path.py +10 -11
- pixeltable/exprs/similarity_expr.py +5 -20
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/functions/yolox.py +21 -64
- pixeltable/func/callable_function.py +5 -2
- pixeltable/func/query_template_function.py +6 -18
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/globals.py +16 -5
- pixeltable/globals.py +172 -262
- pixeltable/io/__init__.py +3 -2
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/pandas.py +29 -43
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/coroutine.py +41 -0
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
from .expr import Expr
|
|
12
|
+
from .globals import StringOperator
|
|
13
|
+
from .row_builder import RowBuilder
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class StringOp(Expr):
|
|
18
|
+
"""
|
|
19
|
+
Allows operations on strings
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
operator: StringOperator
|
|
23
|
+
|
|
24
|
+
def __init__(self, operator: StringOperator, op1: Expr, op2: Expr):
|
|
25
|
+
super().__init__(ts.StringType(nullable=op1.col_type.nullable))
|
|
26
|
+
self.operator = operator
|
|
27
|
+
self.components = [op1, op2]
|
|
28
|
+
assert op1.col_type.is_string_type()
|
|
29
|
+
if operator in {StringOperator.CONCAT, StringOperator.REPEAT}:
|
|
30
|
+
if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
|
|
31
|
+
raise excs.Error(
|
|
32
|
+
f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
|
|
33
|
+
)
|
|
34
|
+
if operator == StringOperator.REPEAT and not op2.col_type.is_int_type():
|
|
35
|
+
raise excs.Error(f'{self}: {operator} on strings requires int type, but {op2} has type {op2.col_type}')
|
|
36
|
+
else:
|
|
37
|
+
raise excs.Error(
|
|
38
|
+
f'{self}: invalid operation {operator} on strings; '
|
|
39
|
+
f'only operators {StringOperator.CONCAT} and {StringOperator.REPEAT} are supported'
|
|
40
|
+
)
|
|
41
|
+
self.id = self._create_id()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def _op1(self) -> Expr:
|
|
45
|
+
return self.components[0]
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def _op2(self) -> Expr:
|
|
49
|
+
return self.components[1]
|
|
50
|
+
|
|
51
|
+
def __repr__(self) -> str:
|
|
52
|
+
# add parentheses around operands that are StringOpExpr to express precedence
|
|
53
|
+
op1_str = f'({self._op1})' if isinstance(self._op1, StringOp) else str(self._op1)
|
|
54
|
+
op2_str = f'({self._op2})' if isinstance(self._op2, StringOp) else str(self._op2)
|
|
55
|
+
return f'{op1_str} {self.operator} {op2_str}'
|
|
56
|
+
|
|
57
|
+
def _equals(self, other: StringOp) -> bool:
|
|
58
|
+
return self.operator == other.operator
|
|
59
|
+
|
|
60
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
61
|
+
return [*super()._id_attrs(), ('operator', self.operator.value)]
|
|
62
|
+
|
|
63
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
64
|
+
left = sql_elements.get(self._op1)
|
|
65
|
+
right = sql_elements.get(self._op2)
|
|
66
|
+
if left is None or right is None:
|
|
67
|
+
return None
|
|
68
|
+
if self.operator == StringOperator.CONCAT:
|
|
69
|
+
return left.concat(right)
|
|
70
|
+
if self.operator == StringOperator.REPEAT:
|
|
71
|
+
return sql.func.repeat(sql.cast(left, sql.String), sql.cast(right, sql.Integer))
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
75
|
+
op1_val = data_row[self._op1.slot_idx]
|
|
76
|
+
op2_val = data_row[self._op2.slot_idx]
|
|
77
|
+
data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
|
|
78
|
+
|
|
79
|
+
def eval_nullable(self, op1_val: Union[str, None], op2_val: Union[int, str, None]) -> Union[str, None]:
|
|
80
|
+
"""
|
|
81
|
+
Return the result of evaluating the expression on two nullable int/float operands,
|
|
82
|
+
None is interpreted as SQL NULL
|
|
83
|
+
"""
|
|
84
|
+
if op1_val is None or op2_val is None:
|
|
85
|
+
return None
|
|
86
|
+
return self.eval_non_null(op1_val, op2_val)
|
|
87
|
+
|
|
88
|
+
def eval_non_null(self, op1_val: str, op2_val: Union[int, str]) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Return the result of evaluating the expression on two int/float operands
|
|
91
|
+
"""
|
|
92
|
+
assert self.operator in {StringOperator.CONCAT, StringOperator.REPEAT}
|
|
93
|
+
if self.operator == StringOperator.CONCAT:
|
|
94
|
+
assert isinstance(op2_val, str)
|
|
95
|
+
return op1_val + op2_val
|
|
96
|
+
else:
|
|
97
|
+
assert isinstance(op2_val, int)
|
|
98
|
+
return op1_val * op2_val
|
|
99
|
+
|
|
100
|
+
def _as_dict(self) -> dict:
|
|
101
|
+
return {'operator': self.operator.value, **super()._as_dict()}
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> StringOp:
|
|
105
|
+
assert 'operator' in d
|
|
106
|
+
assert len(components) == 2
|
|
107
|
+
return cls(StringOperator(d['operator']), components[0], components[1])
|
|
@@ -1,21 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import TYPE_CHECKING, Iterable, Iterator
|
|
4
|
-
from urllib.request import urlretrieve
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
5
3
|
|
|
6
|
-
import numpy as np
|
|
7
4
|
import PIL.Image
|
|
8
5
|
|
|
9
6
|
import pixeltable as pxt
|
|
10
|
-
from pixeltable import env
|
|
11
7
|
from pixeltable.func import Batch
|
|
12
8
|
from pixeltable.functions.util import normalize_image_mode
|
|
13
9
|
from pixeltable.utils.code import local_public_names
|
|
14
10
|
|
|
15
11
|
if TYPE_CHECKING:
|
|
16
|
-
import
|
|
17
|
-
from yolox.exp import Exp # type: ignore[import-untyped]
|
|
18
|
-
from yolox.models import YOLOX # type: ignore[import-untyped]
|
|
12
|
+
from yolox.models import Yolox, YoloxProcessor # type: ignore[import-untyped]
|
|
19
13
|
|
|
20
14
|
_logger = logging.getLogger('pixeltable')
|
|
21
15
|
|
|
@@ -30,7 +24,7 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
|
|
|
30
24
|
|
|
31
25
|
__Requirements__:
|
|
32
26
|
|
|
33
|
-
- `pip install
|
|
27
|
+
- `pip install pixeltable-yolox`
|
|
34
28
|
|
|
35
29
|
Args:
|
|
36
30
|
model_id: one of: `yolox_nano`, `yolox_tiny`, `yolox_s`, `yolox_m`, `yolox_l`, `yolox_x`
|
|
@@ -46,31 +40,14 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
|
|
|
46
40
|
>>> tbl.add_computed_column(detections=yolox(tbl.image, model_id='yolox_m', threshold=0.8))
|
|
47
41
|
"""
|
|
48
42
|
import torch
|
|
49
|
-
from yolox.utils import postprocess # type: ignore[import-untyped]
|
|
50
|
-
|
|
51
|
-
model, exp = _lookup_model(model_id, 'cpu')
|
|
52
|
-
image_tensors = list(_images_to_tensors(images, exp))
|
|
53
|
-
batch_tensor = torch.stack(image_tensors)
|
|
54
43
|
|
|
44
|
+
model = _lookup_model(model_id, 'cpu')
|
|
45
|
+
processor = _lookup_processor(model_id)
|
|
46
|
+
normalized_images = [normalize_image_mode(image) for image in images]
|
|
55
47
|
with torch.no_grad():
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
results: list[dict] = []
|
|
61
|
-
for image in images:
|
|
62
|
-
ratio = min(exp.test_size[0] / image.height, exp.test_size[1] / image.width)
|
|
63
|
-
if outputs[0] is None:
|
|
64
|
-
results.append({'bboxes': [], 'scores': [], 'labels': []})
|
|
65
|
-
else:
|
|
66
|
-
results.append(
|
|
67
|
-
{
|
|
68
|
-
'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
|
|
69
|
-
'scores': [output[4].item() * output[5].item() for output in outputs[0]],
|
|
70
|
-
'labels': [int(output[6]) for output in outputs[0]],
|
|
71
|
-
}
|
|
72
|
-
)
|
|
73
|
-
return results
|
|
48
|
+
tensor = processor(normalized_images)
|
|
49
|
+
output = model(tensor)
|
|
50
|
+
return processor.postprocess(normalized_images, output, threshold=threshold)
|
|
74
51
|
|
|
75
52
|
|
|
76
53
|
@pxt.udf
|
|
@@ -107,47 +84,27 @@ def yolo_to_coco(detections: dict) -> list:
|
|
|
107
84
|
return result
|
|
108
85
|
|
|
109
86
|
|
|
110
|
-
def
|
|
111
|
-
import
|
|
112
|
-
from yolox.data import ValTransform # type: ignore[import-untyped]
|
|
113
|
-
|
|
114
|
-
val_transform = ValTransform(legacy=False)
|
|
115
|
-
for image in images:
|
|
116
|
-
normalized_image = normalize_image_mode(image)
|
|
117
|
-
image_transform, _ = val_transform(np.array(normalized_image), None, exp.test_size)
|
|
118
|
-
yield torch.from_numpy(image_transform)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def _lookup_model(model_id: str, device: str) -> tuple['YOLOX', 'Exp']:
|
|
122
|
-
import torch
|
|
123
|
-
from yolox.exp import get_exp
|
|
87
|
+
def _lookup_model(model_id: str, device: str) -> 'Yolox':
|
|
88
|
+
from yolox.models import Yolox
|
|
124
89
|
|
|
125
90
|
key = (model_id, device)
|
|
126
|
-
if key in _model_cache:
|
|
127
|
-
|
|
91
|
+
if key not in _model_cache:
|
|
92
|
+
_model_cache[key] = Yolox.from_pretrained(model_id, device=device)
|
|
128
93
|
|
|
129
|
-
|
|
130
|
-
weights_file = Path(f'{env.Env.get().tmp_dir}/{model_id}.pth')
|
|
131
|
-
if not weights_file.exists():
|
|
132
|
-
_logger.info(f'Downloading weights for YOLOX model {model_id}: from {weights_url} -> {weights_file}')
|
|
133
|
-
urlretrieve(weights_url, weights_file)
|
|
94
|
+
return _model_cache[key]
|
|
134
95
|
|
|
135
|
-
exp = get_exp(exp_name=model_id)
|
|
136
|
-
model = exp.get_model().to(device)
|
|
137
96
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
model.training = False
|
|
97
|
+
def _lookup_processor(model_id: str) -> 'YoloxProcessor':
|
|
98
|
+
from yolox.models import YoloxProcessor
|
|
141
99
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
model.load_state_dict(weights['model'])
|
|
100
|
+
if model_id not in _processor_cache:
|
|
101
|
+
_processor_cache[model_id] = YoloxProcessor(model_id)
|
|
145
102
|
|
|
146
|
-
|
|
147
|
-
return model, exp
|
|
103
|
+
return _processor_cache[model_id]
|
|
148
104
|
|
|
149
105
|
|
|
150
|
-
_model_cache: dict[tuple[str, str],
|
|
106
|
+
_model_cache: dict[tuple[str, str], 'Yolox'] = {}
|
|
107
|
+
_processor_cache: dict[str, 'YoloxProcessor'] = {}
|
|
151
108
|
|
|
152
109
|
|
|
153
110
|
__all__ = local_public_names(__name__)
|
|
@@ -8,6 +8,7 @@ from uuid import UUID
|
|
|
8
8
|
import cloudpickle # type: ignore[import-untyped]
|
|
9
9
|
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
|
+
from pixeltable.utils.coroutine import run_coroutine_synchronously
|
|
11
12
|
|
|
12
13
|
from .function import Function
|
|
13
14
|
from .signature import Signature
|
|
@@ -93,13 +94,15 @@ class CallableFunction(Function):
|
|
|
93
94
|
batched_kwargs = {k: [v] for k, v in kwargs.items() if k not in constant_param_names}
|
|
94
95
|
result: list[Any]
|
|
95
96
|
if inspect.iscoroutinefunction(self.py_fn):
|
|
96
|
-
|
|
97
|
+
# TODO: This is temporary (see note in utils/coroutine.py)
|
|
98
|
+
result = run_coroutine_synchronously(self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs))
|
|
97
99
|
else:
|
|
98
100
|
result = self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs)
|
|
99
101
|
assert len(result) == 1
|
|
100
102
|
return result[0]
|
|
101
103
|
elif inspect.iscoroutinefunction(self.py_fn):
|
|
102
|
-
|
|
104
|
+
# TODO: This is temporary (see note in utils/coroutine.py)
|
|
105
|
+
return run_coroutine_synchronously(self.py_fn(*args, **kwargs))
|
|
103
106
|
else:
|
|
104
107
|
return self.py_fn(*args, **kwargs)
|
|
105
108
|
|
|
@@ -17,8 +17,6 @@ class QueryTemplateFunction(Function):
|
|
|
17
17
|
|
|
18
18
|
template_df: Optional['DataFrame']
|
|
19
19
|
self_name: Optional[str]
|
|
20
|
-
# conn: Optional[sql.engine.Connection]
|
|
21
|
-
defaults: dict[str, exprs.Literal]
|
|
22
20
|
|
|
23
21
|
@classmethod
|
|
24
22
|
def create(
|
|
@@ -46,20 +44,6 @@ class QueryTemplateFunction(Function):
|
|
|
46
44
|
self.self_name = name
|
|
47
45
|
self.template_df = template_df
|
|
48
46
|
|
|
49
|
-
# if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
|
|
50
|
-
# we end up with a deadlock
|
|
51
|
-
# TODO: figure out a more general way to make execution state available
|
|
52
|
-
# self.conn = None
|
|
53
|
-
|
|
54
|
-
# convert defaults to Literals
|
|
55
|
-
self.defaults = {} # key: param name, value: default value converted to a Literal
|
|
56
|
-
param_types = self.template_df.parameters()
|
|
57
|
-
for param in [p for p in sig.parameters.values() if p.has_default()]:
|
|
58
|
-
assert param.name in param_types
|
|
59
|
-
param_type = param_types[param.name]
|
|
60
|
-
literal_default = exprs.Literal(param.default, col_type=param_type)
|
|
61
|
-
self.defaults[param.name] = literal_default
|
|
62
|
-
|
|
63
47
|
def _update_as_overload_resolution(self, signature_idx: int) -> None:
|
|
64
48
|
pass # only one signature supported for QueryTemplateFunction
|
|
65
49
|
|
|
@@ -72,7 +56,11 @@ class QueryTemplateFunction(Function):
|
|
|
72
56
|
bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
|
|
73
57
|
# apply defaults, otherwise we might have Parameters left over
|
|
74
58
|
bound_args.update(
|
|
75
|
-
{
|
|
59
|
+
{
|
|
60
|
+
param.name: param.default
|
|
61
|
+
for param in self.signature.parameters.values()
|
|
62
|
+
if param.has_default() and param.name not in bound_args
|
|
63
|
+
}
|
|
76
64
|
)
|
|
77
65
|
bound_df = self.template_df.bind(bound_args)
|
|
78
66
|
result = await bound_df._acollect()
|
|
@@ -87,7 +75,7 @@ class QueryTemplateFunction(Function):
|
|
|
87
75
|
return self.self_name
|
|
88
76
|
|
|
89
77
|
def _as_dict(self) -> dict:
|
|
90
|
-
return {'name': self.name, 'signature': self.
|
|
78
|
+
return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
|
|
91
79
|
|
|
92
80
|
@classmethod
|
|
93
81
|
def _from_dict(cls, d: dict) -> Function:
|
pixeltable/func/tools.py
CHANGED
|
@@ -51,10 +51,10 @@ class Tool(pydantic.BaseModel):
|
|
|
51
51
|
# The output of `tool_calls` must be a dict in standardized tool invocation format:
|
|
52
52
|
# {tool_name: [{'args': {name1: value1, name2: value2, ...}}, ...], ...}
|
|
53
53
|
def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
|
|
54
|
-
|
|
54
|
+
import pixeltable.functions as pxtf
|
|
55
55
|
|
|
56
56
|
func_name = self.name or self.fn.name
|
|
57
|
-
return
|
|
57
|
+
return pxtf.map(tool_calls[func_name]['*'], lambda x: self.__invoke_kwargs(x.args))
|
|
58
58
|
|
|
59
59
|
def __invoke_kwargs(self, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
|
|
60
60
|
kwargs = {param.name: self.__extract_tool_arg(param, kwargs) for param in self.parameters.values()}
|
pixeltable/functions/__init__.py
CHANGED
pixeltable/functions/globals.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import builtins
|
|
2
2
|
import typing
|
|
3
|
-
|
|
4
|
-
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
5
|
-
from typing import Optional, Union
|
|
3
|
+
from typing import Any, Callable, Optional, Union
|
|
6
4
|
|
|
7
5
|
import sqlalchemy as sql
|
|
8
6
|
|
|
9
|
-
import
|
|
10
|
-
from pixeltable import exprs, func
|
|
7
|
+
from pixeltable import exceptions as excs, exprs, func, type_system as ts
|
|
11
8
|
from pixeltable.utils.code import local_public_names
|
|
12
9
|
|
|
10
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
11
|
+
|
|
13
12
|
|
|
14
13
|
# TODO: remove and replace calls with astype()
|
|
15
14
|
def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias]) -> exprs.Expr:
|
|
@@ -168,6 +167,18 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
168
167
|
return sql.sql.func.avg(val)
|
|
169
168
|
|
|
170
169
|
|
|
170
|
+
def map(expr: exprs.Expr, fn: Callable[[exprs.Expr], Any]) -> exprs.Expr:
|
|
171
|
+
target_expr: exprs.Expr
|
|
172
|
+
try:
|
|
173
|
+
target_expr = exprs.Expr.from_object(fn(exprs.json_path.RELATIVE_PATH_ROOT))
|
|
174
|
+
except Exception as e:
|
|
175
|
+
raise excs.Error(
|
|
176
|
+
'Failed to evaluate map function. '
|
|
177
|
+
'(The `fn` argument to `map()` must produce a valid Pixeltable expression.)'
|
|
178
|
+
) from e
|
|
179
|
+
return exprs.JsonMapper(expr, target_expr)
|
|
180
|
+
|
|
181
|
+
|
|
171
182
|
__all__ = local_public_names(__name__)
|
|
172
183
|
|
|
173
184
|
|