pixeltable 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +5 -0
- pixeltable/catalog/globals.py +8 -0
- pixeltable/catalog/table.py +22 -4
- pixeltable/catalog/table_version.py +30 -55
- pixeltable/catalog/view.py +1 -1
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/row_update_node.py +61 -0
- pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/expr.py +35 -22
- pixeltable/exprs/function_call.py +60 -29
- pixeltable/exprs/globals.py +2 -0
- pixeltable/exprs/inline_array.py +18 -11
- pixeltable/exprs/method_ref.py +63 -0
- pixeltable/ext/__init__.py +9 -0
- pixeltable/ext/functions/__init__.py +8 -0
- pixeltable/ext/functions/whisperx.py +45 -5
- pixeltable/ext/functions/yolox.py +60 -14
- pixeltable/func/callable_function.py +12 -4
- pixeltable/func/expr_template_function.py +1 -1
- pixeltable/func/function.py +12 -2
- pixeltable/func/function_registry.py +24 -9
- pixeltable/func/udf.py +32 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/fireworks.py +33 -0
- pixeltable/functions/huggingface.py +96 -6
- pixeltable/functions/image.py +226 -41
- pixeltable/functions/openai.py +214 -0
- pixeltable/functions/string.py +195 -218
- pixeltable/functions/timestamp.py +210 -0
- pixeltable/functions/together.py +106 -0
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/whisper.py +32 -0
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/globals.py +133 -1
- pixeltable/io/pandas.py +52 -27
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +39 -0
- pixeltable/metadata/notes.py +10 -0
- pixeltable/plan.py +76 -1
- pixeltable/tool/create_test_db_dump.py +3 -4
- pixeltable/tool/doc_plugins/griffe.py +4 -0
- pixeltable/type_system.py +15 -14
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/METADATA +1 -1
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/RECORD +50 -45
- pixeltable/exprs/image_member_access.py +0 -96
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/entry_points.txt +0 -0
|
@@ -20,6 +20,22 @@ from .rowid_ref import RowidRef
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class FunctionCall(Expr):
|
|
23
|
+
|
|
24
|
+
fn: func.Function
|
|
25
|
+
is_method_call: bool
|
|
26
|
+
agg_init_args: Dict[str, Any]
|
|
27
|
+
args: List[Tuple[Optional[int], Optional[Any]]]
|
|
28
|
+
kwargs: Dict[str, Tuple[Optional[int], Optional[Any]]]
|
|
29
|
+
arg_types: List[ts.ColumnType]
|
|
30
|
+
kwarg_types: Dict[str, ts.ColumnType]
|
|
31
|
+
group_by_start_idx: int
|
|
32
|
+
group_by_stop_idx: int
|
|
33
|
+
fn_expr_idx: int
|
|
34
|
+
order_by_start_idx: int
|
|
35
|
+
constant_args: set[str]
|
|
36
|
+
aggregator: Optional[Any]
|
|
37
|
+
current_partition_vals: Optional[List[Any]]
|
|
38
|
+
|
|
23
39
|
def __init__(
|
|
24
40
|
self, fn: func.Function, bound_args: Dict[str, Any], order_by_clause: Optional[List[Any]] = None,
|
|
25
41
|
group_by_clause: Optional[List[Any]] = None, is_method_call: bool = False):
|
|
@@ -31,9 +47,9 @@ class FunctionCall(Expr):
|
|
|
31
47
|
super().__init__(fn.call_return_type(bound_args))
|
|
32
48
|
self.fn = fn
|
|
33
49
|
self.is_method_call = is_method_call
|
|
34
|
-
self.
|
|
50
|
+
self.normalize_args(signature, bound_args)
|
|
35
51
|
|
|
36
|
-
self.agg_init_args
|
|
52
|
+
self.agg_init_args = {}
|
|
37
53
|
if self.is_agg_fn_call:
|
|
38
54
|
# we separate out the init args for the aggregator
|
|
39
55
|
self.agg_init_args = {
|
|
@@ -42,17 +58,16 @@ class FunctionCall(Expr):
|
|
|
42
58
|
bound_args = {arg_name: arg for arg_name, arg in bound_args.items() if arg_name not in fn.init_param_names}
|
|
43
59
|
|
|
44
60
|
# construct components, args, kwargs
|
|
45
|
-
self.components: List[Expr] = []
|
|
46
61
|
|
|
47
62
|
# Tuple[int, Any]:
|
|
48
63
|
# - for Exprs: (index into components, None)
|
|
49
64
|
# - otherwise: (None, val)
|
|
50
|
-
self.args
|
|
51
|
-
self.kwargs
|
|
65
|
+
self.args = []
|
|
66
|
+
self.kwargs = {}
|
|
52
67
|
|
|
53
68
|
# we record the types of non-variable parameters for runtime type checks
|
|
54
|
-
self.arg_types
|
|
55
|
-
self.kwarg_types
|
|
69
|
+
self.arg_types = []
|
|
70
|
+
self.kwarg_types = {}
|
|
56
71
|
# the prefix of parameters that are bound can be passed by position
|
|
57
72
|
for param in fn.signature.py_signature.parameters.values():
|
|
58
73
|
if param.name not in bound_args or param.kind == inspect.Parameter.KEYWORD_ONLY:
|
|
@@ -111,8 +126,8 @@ class FunctionCall(Expr):
|
|
|
111
126
|
|
|
112
127
|
self.constant_args = {param_name for param_name, arg in bound_args.items() if not isinstance(arg, Expr)}
|
|
113
128
|
# execution state for aggregate functions
|
|
114
|
-
self.aggregator
|
|
115
|
-
self.current_partition_vals
|
|
129
|
+
self.aggregator = None
|
|
130
|
+
self.current_partition_vals = None
|
|
116
131
|
|
|
117
132
|
self.id = self._create_id()
|
|
118
133
|
|
|
@@ -120,26 +135,37 @@ class FunctionCall(Expr):
|
|
|
120
135
|
target = tbl._tbl_version_path.tbl_version
|
|
121
136
|
return [RowidRef(target, i) for i in range(target.num_rowid_columns())]
|
|
122
137
|
|
|
138
|
+
def default_column_name(self) -> Optional[str]:
|
|
139
|
+
if self.fn.is_property:
|
|
140
|
+
return self.fn.name
|
|
141
|
+
return super().default_column_name()
|
|
142
|
+
|
|
123
143
|
@classmethod
|
|
124
|
-
def
|
|
125
|
-
"""
|
|
144
|
+
def normalize_args(cls, signature: func.Signature, bound_args: Dict[str, Any]) -> None:
|
|
145
|
+
"""Converts all args to Exprs and checks that they are compatible with signature.
|
|
126
146
|
|
|
127
|
-
|
|
147
|
+
Updates bound_args in place, where necessary.
|
|
128
148
|
"""
|
|
129
149
|
for param_name, arg in bound_args.items():
|
|
130
150
|
param = signature.parameters[param_name]
|
|
151
|
+
is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
152
|
+
|
|
131
153
|
if isinstance(arg, dict):
|
|
132
154
|
try:
|
|
133
155
|
arg = InlineDict(arg)
|
|
134
156
|
bound_args[param_name] = arg
|
|
157
|
+
continue
|
|
135
158
|
except excs.Error:
|
|
136
159
|
# this didn't work, but it might be a literal
|
|
137
160
|
pass
|
|
161
|
+
|
|
138
162
|
if isinstance(arg, list) or isinstance(arg, tuple):
|
|
139
163
|
try:
|
|
140
164
|
# If the column type is JsonType, force the literal to be JSON
|
|
141
|
-
|
|
165
|
+
is_json = is_var_param or (param.col_type is not None and param.col_type.is_json_type())
|
|
166
|
+
arg = InlineArray(arg, force_json=is_json)
|
|
142
167
|
bound_args[param_name] = arg
|
|
168
|
+
continue
|
|
143
169
|
except excs.Error:
|
|
144
170
|
# this didn't work, but it might be a literal
|
|
145
171
|
pass
|
|
@@ -149,30 +175,35 @@ class FunctionCall(Expr):
|
|
|
149
175
|
try:
|
|
150
176
|
_ = json.dumps(arg)
|
|
151
177
|
except TypeError:
|
|
152
|
-
raise excs.Error(f
|
|
178
|
+
raise excs.Error(f'Argument for parameter {param_name!r} is not json-serializable: {arg}')
|
|
153
179
|
if arg is not None:
|
|
154
180
|
try:
|
|
155
181
|
param_type = param.col_type
|
|
156
182
|
bound_args[param_name] = param_type.create_literal(arg)
|
|
157
183
|
except TypeError as e:
|
|
158
184
|
msg = str(e)
|
|
159
|
-
raise excs.Error(f
|
|
185
|
+
raise excs.Error(f'Argument for parameter {param_name!r}: {msg[0].lower() + msg[1:]}')
|
|
160
186
|
continue
|
|
161
187
|
|
|
162
|
-
#
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
188
|
+
# these checks break the db migration test, because InlineArray isn't serialized correctly (it looses
|
|
189
|
+
# the type information)
|
|
190
|
+
# if is_var_param:
|
|
191
|
+
# if param.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
192
|
+
# if not isinstance(arg, InlineArray) or not arg.col_type.is_json_type():
|
|
193
|
+
# pass
|
|
194
|
+
# assert isinstance(arg, InlineArray), type(arg)
|
|
195
|
+
# assert arg.col_type.is_json_type()
|
|
196
|
+
# if param.kind == inspect.Parameter.VAR_KEYWORD:
|
|
197
|
+
# if not isinstance(arg, InlineDict):
|
|
198
|
+
# pass
|
|
199
|
+
# assert isinstance(arg, InlineDict), type(arg)
|
|
200
|
+
if is_var_param:
|
|
201
|
+
pass
|
|
202
|
+
else:
|
|
203
|
+
if not param.col_type.is_supertype_of(arg.col_type):
|
|
204
|
+
raise excs.Error(
|
|
205
|
+
f'Parameter {param_name}: argument type {arg.col_type} does not match parameter type '
|
|
206
|
+
f'{param.col_type}')
|
|
176
207
|
|
|
177
208
|
def _equals(self, other: FunctionCall) -> bool:
|
|
178
209
|
if self.fn != other.fn:
|
pixeltable/exprs/globals.py
CHANGED
pixeltable/exprs/inline_array.py
CHANGED
|
@@ -21,6 +21,9 @@ class InlineArray(Expr):
|
|
|
21
21
|
is `True`, it will always be cast as a `JsonType`. If `force_json` is `False`, it will be cast as an
|
|
22
22
|
`ArrayType` if it is a homogenous array of scalars or arrays, or a `JsonType` otherwise.
|
|
23
23
|
"""
|
|
24
|
+
|
|
25
|
+
elements: List[Tuple[Optional[int], Any]]
|
|
26
|
+
|
|
24
27
|
def __init__(self, elements: Tuple, force_json: bool = False):
|
|
25
28
|
# we need to call this in order to populate self.components
|
|
26
29
|
super().__init__(ts.ArrayType((len(elements),), ts.IntType()))
|
|
@@ -28,7 +31,7 @@ class InlineArray(Expr):
|
|
|
28
31
|
# elements contains
|
|
29
32
|
# - for Expr elements: (index into components, None)
|
|
30
33
|
# - for non-Expr elements: (None, value)
|
|
31
|
-
self.elements
|
|
34
|
+
self.elements = []
|
|
32
35
|
for el in elements:
|
|
33
36
|
el = copy.deepcopy(el)
|
|
34
37
|
if isinstance(el, list):
|
|
@@ -43,14 +46,16 @@ class InlineArray(Expr):
|
|
|
43
46
|
else:
|
|
44
47
|
self.elements.append((None, el))
|
|
45
48
|
|
|
46
|
-
inferred_element_type = ts.InvalidType()
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
inferred_element_type: Optional[ts.ColumnType] = ts.InvalidType()
|
|
50
|
+
if not force_json:
|
|
51
|
+
# try to infer the element type
|
|
52
|
+
for idx, val in self.elements:
|
|
53
|
+
if idx is not None:
|
|
54
|
+
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, self.components[idx].col_type)
|
|
55
|
+
else:
|
|
56
|
+
inferred_element_type = ts.ColumnType.supertype(inferred_element_type, ts.ColumnType.infer_literal_type(val))
|
|
57
|
+
if inferred_element_type is None:
|
|
58
|
+
break
|
|
54
59
|
|
|
55
60
|
if force_json or inferred_element_type is None:
|
|
56
61
|
# JSON conversion is forced, or there is no common supertype
|
|
@@ -93,7 +98,7 @@ class InlineArray(Expr):
|
|
|
93
98
|
data_row[self.slot_idx] = result
|
|
94
99
|
|
|
95
100
|
def _as_dict(self) -> Dict:
|
|
96
|
-
return {'elements': self.elements, **super()._as_dict()}
|
|
101
|
+
return {'elements': self.elements, 'is_json': self.col_type.is_json_type(), **super()._as_dict()}
|
|
97
102
|
|
|
98
103
|
@classmethod
|
|
99
104
|
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
@@ -106,4 +111,6 @@ class InlineArray(Expr):
|
|
|
106
111
|
arg.append(components[idx])
|
|
107
112
|
else:
|
|
108
113
|
arg.append(val)
|
|
109
|
-
|
|
114
|
+
# in order to avoid a schema version change, we'll interpret the absence of 'is_json' to indicate an ArrayType
|
|
115
|
+
is_json = d.get('is_json', False)
|
|
116
|
+
return cls(tuple(arg), force_json=is_json)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
import pixeltable.exceptions as excs
|
|
6
|
+
import pixeltable.type_system as ts
|
|
7
|
+
from pixeltable.exprs import Expr, FunctionCall
|
|
8
|
+
from pixeltable.func import FunctionRegistry, CallableFunction
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .row_builder import RowBuilder
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MethodRef(Expr):
|
|
14
|
+
"""
|
|
15
|
+
A method reference. This represents a `Function` instance with its
|
|
16
|
+
first parameter bound to a base expression.
|
|
17
|
+
|
|
18
|
+
When a `MethodRef` is called, it returns a `FunctionCall` with the base expression as the first argument.
|
|
19
|
+
The effective arity of a `MethodRef` is one less than the arity of the underlying `Function`.
|
|
20
|
+
"""
|
|
21
|
+
# TODO: Should this even be an `Expr`? It can't actually be evaluated directly (it has to be first
|
|
22
|
+
# converted to a `FunctionCall` by binding any remaining parameters).
|
|
23
|
+
|
|
24
|
+
def __init__(self, base_expr: Expr, method_name: str):
|
|
25
|
+
super().__init__(ts.InvalidType()) # The `MethodRef` is untyped until it is called.
|
|
26
|
+
self.base_expr = base_expr
|
|
27
|
+
self.method_name = method_name
|
|
28
|
+
self.fn = FunctionRegistry.get().lookup_type_method(base_expr.col_type.type_enum, method_name)
|
|
29
|
+
if self.fn is None:
|
|
30
|
+
# This has to be an `AttributeError`, or tab-completion won't work properly in ipython.
|
|
31
|
+
raise AttributeError(f'Unknown method (of type {base_expr.col_type}): {method_name}')
|
|
32
|
+
self.components = [base_expr]
|
|
33
|
+
self.id = self._create_id()
|
|
34
|
+
|
|
35
|
+
def _as_dict(self) -> dict:
|
|
36
|
+
return {'method_name': self.method_name, **super()._as_dict()}
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
|
|
40
|
+
assert 'method_name' in d
|
|
41
|
+
assert len(components) == 1
|
|
42
|
+
return cls(d['method_name'], components[0])
|
|
43
|
+
|
|
44
|
+
def __call__(self, *args, **kwargs) -> FunctionCall:
|
|
45
|
+
result = self.fn(*[self.base_expr, *args], **kwargs)
|
|
46
|
+
assert isinstance(result, FunctionCall)
|
|
47
|
+
result.is_method_call = True
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
def _equals(self, other: 'MethodRef') -> bool:
|
|
51
|
+
return self.base_expr == other.base_expr and self.method_name == other.method_name
|
|
52
|
+
|
|
53
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
54
|
+
return super()._id_attrs() + [('method_name', self.method_name)]
|
|
55
|
+
|
|
56
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
60
|
+
assert False, 'MethodRef cannot be evaluated directly'
|
|
61
|
+
|
|
62
|
+
def __str__(self) -> str:
|
|
63
|
+
return f'{self.base_expr}.{self.method_name}'
|
pixeltable/ext/__init__.py
CHANGED
|
@@ -3,3 +3,12 @@ Extended integrations for Pixeltable. This package contains experimental or demo
|
|
|
3
3
|
are not intended for production use. Long-term support cannot be guaranteed, usually because the features
|
|
4
4
|
have dependencies whose future support is unclear.
|
|
5
5
|
"""
|
|
6
|
+
|
|
7
|
+
from pixeltable.utils.code import local_public_names
|
|
8
|
+
from . import functions
|
|
9
|
+
|
|
10
|
+
__all__ = local_public_names(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def __dir__():
|
|
14
|
+
return __all__
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, TYPE_CHECKING
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from pixeltable.utils.code import local_public_names
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from whisperx.asr import FasterWhisperPipeline
|
|
6
7
|
|
|
7
8
|
import pixeltable as pxt
|
|
8
9
|
|
|
@@ -11,6 +12,36 @@ import pixeltable as pxt
|
|
|
11
12
|
def transcribe(
|
|
12
13
|
audio: str, *, model: str, compute_type: Optional[str] = None, language: Optional[str] = None, chunk_size: int = 30
|
|
13
14
|
) -> dict:
|
|
15
|
+
"""
|
|
16
|
+
Transcribe an audio file using WhisperX.
|
|
17
|
+
|
|
18
|
+
This UDF runs a transcription model _locally_ using the WhisperX library,
|
|
19
|
+
equivalent to the WhisperX `transcribe` function, as described in the
|
|
20
|
+
[WhisperX library documentation](https://github.com/m-bain/whisperX).
|
|
21
|
+
|
|
22
|
+
__Requirements:__
|
|
23
|
+
|
|
24
|
+
- `pip install whisperx`
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
audio: The audio file to transcribe.
|
|
28
|
+
model: The name of the model to use for transcription.
|
|
29
|
+
|
|
30
|
+
See the [WhisperX library documentation](https://github.com/m-bain/whisperX) for details
|
|
31
|
+
on the remaining parameters.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
A dictionary containing the transcription and various other metadata.
|
|
35
|
+
|
|
36
|
+
Examples:
|
|
37
|
+
Add a computed column that applies the model `tiny.en` to an existing Pixeltable column `tbl.audio`
|
|
38
|
+
of the table `tbl`:
|
|
39
|
+
|
|
40
|
+
>>> tbl['result'] = transcribe(tbl.audio, model='tiny.en')
|
|
41
|
+
"""
|
|
42
|
+
import torch
|
|
43
|
+
import whisperx
|
|
44
|
+
|
|
14
45
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
15
46
|
compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
|
|
16
47
|
model = _lookup_model(model, device, compute_type)
|
|
@@ -19,7 +50,9 @@ def transcribe(
|
|
|
19
50
|
return result
|
|
20
51
|
|
|
21
52
|
|
|
22
|
-
def _lookup_model(model_id: str, device: str, compute_type: str) -> FasterWhisperPipeline:
|
|
53
|
+
def _lookup_model(model_id: str, device: str, compute_type: str) -> 'FasterWhisperPipeline':
|
|
54
|
+
import whisperx
|
|
55
|
+
|
|
23
56
|
key = (model_id, device, compute_type)
|
|
24
57
|
if key not in _model_cache:
|
|
25
58
|
model = whisperx.load_model(model_id, device, compute_type=compute_type)
|
|
@@ -28,3 +61,10 @@ def _lookup_model(model_id: str, device: str, compute_type: str) -> FasterWhispe
|
|
|
28
61
|
|
|
29
62
|
|
|
30
63
|
_model_cache = {}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
__all__ = local_public_names(__name__)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def __dir__():
|
|
70
|
+
return __all__
|
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Iterable, Iterator
|
|
3
|
+
from typing import Iterable, Iterator, TYPE_CHECKING
|
|
4
4
|
from urllib.request import urlretrieve
|
|
5
5
|
|
|
6
6
|
import PIL.Image
|
|
7
7
|
import numpy as np
|
|
8
|
-
import torch
|
|
9
|
-
from yolox.data import ValTransform
|
|
10
|
-
from yolox.exp import get_exp, Exp
|
|
11
|
-
from yolox.models import YOLOX
|
|
12
|
-
from yolox.utils import postprocess
|
|
13
8
|
|
|
14
9
|
import pixeltable as pxt
|
|
15
10
|
from pixeltable import env
|
|
16
11
|
from pixeltable.func import Batch
|
|
17
12
|
from pixeltable.functions.util import normalize_image_mode
|
|
13
|
+
from pixeltable.utils.code import local_public_names
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import torch
|
|
17
|
+
from yolox.exp import Exp
|
|
18
|
+
from yolox.models import YOLOX
|
|
18
19
|
|
|
19
20
|
_logger = logging.getLogger('pixeltable')
|
|
20
21
|
|
|
@@ -22,15 +23,32 @@ _logger = logging.getLogger('pixeltable')
|
|
|
22
23
|
@pxt.udf(batch_size=4)
|
|
23
24
|
def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
|
|
24
25
|
"""
|
|
25
|
-
|
|
26
|
+
Computes YOLOX object detections for the specified image. `model_id` should reference one of the models
|
|
27
|
+
defined in the [YOLOX documentation](https://github.com/Megvii-BaseDetection/YOLOX).
|
|
26
28
|
|
|
27
29
|
YOLOX support is part of the `pixeltable.ext` package: long-term support is not guaranteed, and it is not
|
|
28
30
|
intended for use in production applications.
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
- `
|
|
32
|
+
__Requirements__:
|
|
33
|
+
|
|
34
|
+
- `pip install git+https://github.com/Megvii-BaseDetection/YOLOX`
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
model_id: one of: `yolox_nano`, `yolox_tiny`, `yolox_s`, `yolox_m`, `yolox_l`, `yolox_x`
|
|
38
|
+
threshold: the threshold for object detection
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
A dictionary containing the output of the object detection model.
|
|
42
|
+
|
|
43
|
+
Examples:
|
|
44
|
+
Add a computed column that applies the model `yolox_m` to an existing
|
|
45
|
+
Pixeltable column `tbl.image` of the table `tbl`:
|
|
46
|
+
|
|
47
|
+
>>> tbl['detections'] = yolox(tbl.image, model_id='yolox_m', threshold=0.8)
|
|
33
48
|
"""
|
|
49
|
+
import torch
|
|
50
|
+
from yolox.utils import postprocess
|
|
51
|
+
|
|
34
52
|
model, exp = _lookup_model(model_id, 'cpu')
|
|
35
53
|
image_tensors = list(_images_to_tensors(images, exp))
|
|
36
54
|
batch_tensor = torch.stack(image_tensors)
|
|
@@ -58,6 +76,21 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
|
|
|
58
76
|
|
|
59
77
|
@pxt.udf
|
|
60
78
|
def yolo_to_coco(detections: dict) -> list:
|
|
79
|
+
"""
|
|
80
|
+
Converts the output of a YOLOX object detection model to COCO format.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
detections: The output of a YOLOX object detection model, as returned by `yolox`.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
A dictionary containing the data from `detections`, converted to COCO format.
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
Add a computed column that converts the output `tbl.detections` to COCO format, where `tbl.image`
|
|
90
|
+
is the image for which detections were computed:
|
|
91
|
+
|
|
92
|
+
>>> tbl['detections_coco'] = yolo_to_coco(tbl.detections)
|
|
93
|
+
"""
|
|
61
94
|
bboxes, labels = detections['bboxes'], detections['labels']
|
|
62
95
|
num_annotations = len(detections['bboxes'])
|
|
63
96
|
assert num_annotations == len(detections['labels'])
|
|
@@ -72,14 +105,21 @@ def yolo_to_coco(detections: dict) -> list:
|
|
|
72
105
|
return result
|
|
73
106
|
|
|
74
107
|
|
|
75
|
-
def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: Exp) -> Iterator[torch.Tensor]:
|
|
108
|
+
def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: 'Exp') -> Iterator['torch.Tensor']:
|
|
109
|
+
import torch
|
|
110
|
+
from yolox.data import ValTransform
|
|
111
|
+
|
|
112
|
+
_val_transform = ValTransform(legacy=False)
|
|
76
113
|
for image in images:
|
|
77
114
|
image = normalize_image_mode(image)
|
|
78
115
|
image_transform, _ = _val_transform(np.array(image), None, exp.test_size)
|
|
79
116
|
yield torch.from_numpy(image_transform)
|
|
80
117
|
|
|
81
118
|
|
|
82
|
-
def _lookup_model(model_id: str, device: str) ->
|
|
119
|
+
def _lookup_model(model_id: str, device: str) -> tuple['YOLOX', 'Exp']:
|
|
120
|
+
import torch
|
|
121
|
+
from yolox.exp import get_exp
|
|
122
|
+
|
|
83
123
|
key = (model_id, device)
|
|
84
124
|
if key in _model_cache:
|
|
85
125
|
return _model_cache[key]
|
|
@@ -105,5 +145,11 @@ def _lookup_model(model_id: str, device: str) -> (YOLOX, Exp):
|
|
|
105
145
|
return model, exp
|
|
106
146
|
|
|
107
147
|
|
|
108
|
-
_model_cache = {}
|
|
109
|
-
|
|
148
|
+
_model_cache: dict[tuple[str, str], tuple['YOLOX', 'Exp']] = {}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
__all__ = local_public_names(__name__)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def __dir__():
|
|
155
|
+
return __all__
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
7
|
import cloudpickle
|
|
@@ -19,14 +19,21 @@ class CallableFunction(Function):
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
def __init__(
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
self,
|
|
23
|
+
signature: Signature,
|
|
24
|
+
py_fn: Callable,
|
|
25
|
+
self_path: Optional[str] = None,
|
|
26
|
+
self_name: Optional[str] = None,
|
|
27
|
+
batch_size: Optional[int] = None,
|
|
28
|
+
is_method: bool = False,
|
|
29
|
+
is_property: bool = False
|
|
30
|
+
):
|
|
24
31
|
assert py_fn is not None
|
|
25
32
|
self.py_fn = py_fn
|
|
26
33
|
self.self_name = self_name
|
|
27
34
|
self.batch_size = batch_size
|
|
28
35
|
self.__doc__ = py_fn.__doc__
|
|
29
|
-
super().__init__(signature, self_path=self_path)
|
|
36
|
+
super().__init__(signature, self_path=self_path, is_method=is_method, is_property=is_property)
|
|
30
37
|
|
|
31
38
|
@property
|
|
32
39
|
def is_batched(self) -> bool:
|
|
@@ -78,6 +85,7 @@ class CallableFunction(Function):
|
|
|
78
85
|
def _as_dict(self) -> dict:
|
|
79
86
|
if self.self_path is None:
|
|
80
87
|
# this is not a module function
|
|
88
|
+
assert not self.is_method and not self.is_property
|
|
81
89
|
from .function_registry import FunctionRegistry
|
|
82
90
|
id = FunctionRegistry.get().create_stored_function(self)
|
|
83
91
|
return {'id': id.hex}
|
|
@@ -56,7 +56,7 @@ class ExprTemplateFunction(Function):
|
|
|
56
56
|
arg_exprs[param_expr] = arg_expr
|
|
57
57
|
result = result.substitute(arg_exprs)
|
|
58
58
|
import pixeltable.exprs as exprs
|
|
59
|
-
assert not result.
|
|
59
|
+
assert not result._contains(exprs.Variable)
|
|
60
60
|
return result
|
|
61
61
|
|
|
62
62
|
def exec(self, *args: Any, **kwargs: Any) -> Any:
|
pixeltable/func/function.py
CHANGED
|
@@ -3,10 +3,12 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import importlib
|
|
5
5
|
import inspect
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, Callable, Dict, Optional, Tuple
|
|
7
7
|
|
|
8
8
|
import pixeltable
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
9
10
|
import pixeltable.type_system as ts
|
|
11
|
+
|
|
10
12
|
from .globals import resolve_symbol
|
|
11
13
|
from .signature import Signature
|
|
12
14
|
|
|
@@ -19,9 +21,13 @@ class Function(abc.ABC):
|
|
|
19
21
|
via the member self_path.
|
|
20
22
|
"""
|
|
21
23
|
|
|
22
|
-
def __init__(self, signature: Signature, self_path: Optional[str] = None):
|
|
24
|
+
def __init__(self, signature: Signature, self_path: Optional[str] = None, is_method: bool = False, is_property: bool = False):
|
|
25
|
+
# Check that stored functions cannot be declared using `is_method` or `is_property`:
|
|
26
|
+
assert not ((is_method or is_property) and self_path is None)
|
|
23
27
|
self.signature = signature
|
|
24
28
|
self.self_path = self_path # fully-qualified path to self
|
|
29
|
+
self.is_method = is_method
|
|
30
|
+
self.is_property = is_property
|
|
25
31
|
self._conditional_return_type: Optional[Callable[..., ts.ColumnType]] = None
|
|
26
32
|
|
|
27
33
|
@property
|
|
@@ -38,6 +44,10 @@ class Function(abc.ABC):
|
|
|
38
44
|
return self.self_path[len(ptf_prefix):]
|
|
39
45
|
return self.self_path
|
|
40
46
|
|
|
47
|
+
@property
|
|
48
|
+
def arity(self) -> int:
|
|
49
|
+
return len(self.signature.parameters)
|
|
50
|
+
|
|
41
51
|
def help_str(self) -> str:
|
|
42
52
|
return self.display_name + str(self.signature)
|
|
43
53
|
|
|
@@ -4,11 +4,9 @@ import dataclasses
|
|
|
4
4
|
import importlib
|
|
5
5
|
import logging
|
|
6
6
|
import sys
|
|
7
|
-
import
|
|
8
|
-
from typing import Optional, Dict, List, Tuple
|
|
7
|
+
from typing import Optional, Dict, List
|
|
9
8
|
from uuid import UUID
|
|
10
9
|
|
|
11
|
-
import cloudpickle
|
|
12
10
|
import sqlalchemy as sql
|
|
13
11
|
|
|
14
12
|
import pixeltable.env as env
|
|
@@ -36,6 +34,7 @@ class FunctionRegistry:
|
|
|
36
34
|
def __init__(self):
|
|
37
35
|
self.stored_fns_by_id: Dict[UUID, Function] = {}
|
|
38
36
|
self.module_fns: Dict[str, Function] = {} # fqn -> Function
|
|
37
|
+
self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
|
|
39
38
|
|
|
40
39
|
def clear_cache(self) -> None:
|
|
41
40
|
"""
|
|
@@ -69,6 +68,13 @@ class FunctionRegistry:
|
|
|
69
68
|
if fqn in self.module_fns:
|
|
70
69
|
raise excs.Error(f'A UDF with that name already exists: {fqn}')
|
|
71
70
|
self.module_fns[fqn] = fn
|
|
71
|
+
if fn.is_method or fn.is_property:
|
|
72
|
+
base_type = fn.signature.parameters_by_pos[0].col_type.type_enum
|
|
73
|
+
if base_type not in self.type_methods:
|
|
74
|
+
self.type_methods[base_type] = {}
|
|
75
|
+
if fn.name in self.type_methods[base_type]:
|
|
76
|
+
raise excs.Error(f'Duplicate method name for type {base_type}: {fn.name}')
|
|
77
|
+
self.type_methods[base_type][fn.name] = fn
|
|
72
78
|
|
|
73
79
|
def list_functions(self) -> List[Function]:
|
|
74
80
|
# retrieve Function.Metadata data for all existing stored functions from store directly
|
|
@@ -129,12 +135,21 @@ class FunctionRegistry:
|
|
|
129
135
|
# assert fqn in self.module_fns, f'{fqn} not found'
|
|
130
136
|
# return self.module_fns[fqn]
|
|
131
137
|
|
|
132
|
-
def get_type_methods(self,
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
+
def get_type_methods(self, base_type: ts.ColumnType.Type) -> list[Function]:
|
|
139
|
+
"""
|
|
140
|
+
Get a list of all methods (and properties) registered for a given base type.
|
|
141
|
+
"""
|
|
142
|
+
if base_type in self.type_methods:
|
|
143
|
+
return list(self.type_methods[base_type].values())
|
|
144
|
+
return []
|
|
145
|
+
|
|
146
|
+
def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) -> Optional[Function]:
|
|
147
|
+
"""
|
|
148
|
+
Look up a method (or property) by name for a given base type. If no such method is registered, return None.
|
|
149
|
+
"""
|
|
150
|
+
if base_type in self.type_methods and name in self.type_methods[base_type]:
|
|
151
|
+
return self.type_methods[base_type][name]
|
|
152
|
+
return None
|
|
138
153
|
|
|
139
154
|
#def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: Optional[UUID] = None) -> UUID:
|
|
140
155
|
def create_stored_function(self, pxt_fn: Function, dir_id: Optional[UUID] = None) -> UUID:
|