pixeltable 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +8 -7
- pixeltable/catalog/column.py +11 -8
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/table.py +20 -13
- pixeltable/catalog/table_version.py +91 -54
- pixeltable/catalog/table_version_path.py +7 -9
- pixeltable/catalog/view.py +2 -1
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +173 -83
- pixeltable/exec/aggregation_node.py +2 -1
- pixeltable/exec/component_iteration_node.py +1 -1
- pixeltable/exec/sql_node.py +11 -8
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -1
- pixeltable/exprs/column_property_ref.py +9 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/comparison.py +10 -7
- pixeltable/exprs/compound_predicate.py +3 -2
- pixeltable/exprs/data_row.py +19 -4
- pixeltable/exprs/expr.py +46 -35
- pixeltable/exprs/expr_set.py +32 -9
- pixeltable/exprs/function_call.py +56 -32
- pixeltable/exprs/in_predicate.py +3 -2
- pixeltable/exprs/inline_array.py +2 -1
- pixeltable/exprs/inline_dict.py +2 -1
- pixeltable/exprs/is_null.py +3 -2
- pixeltable/exprs/json_mapper.py +5 -4
- pixeltable/exprs/json_path.py +7 -1
- pixeltable/exprs/literal.py +34 -7
- pixeltable/exprs/method_ref.py +3 -3
- pixeltable/exprs/object_ref.py +6 -5
- pixeltable/exprs/row_builder.py +25 -17
- pixeltable/exprs/rowid_ref.py +2 -1
- pixeltable/exprs/similarity_expr.py +2 -1
- pixeltable/exprs/sql_element_cache.py +30 -0
- pixeltable/exprs/type_cast.py +3 -3
- pixeltable/exprs/variable.py +2 -1
- pixeltable/ext/functions/whisperx.py +4 -4
- pixeltable/ext/functions/yolox.py +6 -6
- pixeltable/func/aggregate_function.py +1 -0
- pixeltable/func/function.py +28 -4
- pixeltable/functions/__init__.py +4 -2
- pixeltable/functions/anthropic.py +15 -5
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -1
- pixeltable/functions/huggingface.py +2 -2
- pixeltable/functions/image.py +17 -2
- pixeltable/functions/json.py +5 -5
- pixeltable/functions/mistralai.py +188 -0
- pixeltable/functions/openai.py +6 -10
- pixeltable/functions/string.py +3 -2
- pixeltable/functions/timestamp.py +95 -7
- pixeltable/functions/together.py +4 -4
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +27 -17
- pixeltable/functions/whisper.py +1 -1
- pixeltable/io/hf_datasets.py +17 -15
- pixeltable/io/pandas.py +0 -2
- pixeltable/io/parquet.py +15 -14
- pixeltable/iterators/document.py +16 -15
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_19.py +46 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -4
- pixeltable/plan.py +100 -78
- pixeltable/store.py +5 -1
- pixeltable/tool/create_test_db_dump.py +4 -3
- pixeltable/type_system.py +12 -14
- pixeltable/utils/documents.py +45 -42
- pixeltable/utils/formatter.py +2 -2
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
- pixeltable-0.2.18.dist-info/RECORD +147 -0
- pixeltable-0.2.17.dist-info/RECORD +0 -144
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0
pixeltable/functions/json.py
CHANGED
|
@@ -12,18 +12,18 @@ t.select(pxt.functions.json.make_list()).collect()
|
|
|
12
12
|
|
|
13
13
|
from typing import Any
|
|
14
14
|
|
|
15
|
-
import pixeltable
|
|
15
|
+
import pixeltable as pxt
|
|
16
16
|
import pixeltable.type_system as ts
|
|
17
17
|
from pixeltable.utils.code import local_public_names
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
@
|
|
21
|
-
update_types=[
|
|
22
|
-
value_type=
|
|
20
|
+
@pxt.uda(
|
|
21
|
+
update_types=[pxt.JsonType(nullable=True)],
|
|
22
|
+
value_type=pxt.JsonType(),
|
|
23
23
|
requires_order_by=False,
|
|
24
24
|
allows_window=False,
|
|
25
25
|
)
|
|
26
|
-
class make_list(
|
|
26
|
+
class make_list(pxt.Aggregator):
|
|
27
27
|
"""
|
|
28
28
|
Collects arguments into a list.
|
|
29
29
|
"""
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
|
+
that wrap various endpoints from the Mistral AI API. In order to use them, you must
|
|
4
|
+
first `pip install mistralai` and configure your Mistral AI credentials, as described in
|
|
5
|
+
the [Working with Mistral AI](https://pixeltable.readme.io/docs/working-with-mistralai) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Optional, TypeVar, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
import pixeltable as pxt
|
|
13
|
+
from pixeltable.env import Env, register_client
|
|
14
|
+
from pixeltable.func.signature import Batch
|
|
15
|
+
from pixeltable.utils.code import local_public_names
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import mistralai.types.basemodel
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_client('mistral')
|
|
22
|
+
def _(api_key: str) -> 'mistralai.Mistral':
|
|
23
|
+
import mistralai
|
|
24
|
+
return mistralai.Mistral(api_key=api_key)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _mistralai_client() -> 'mistralai.Mistral':
|
|
28
|
+
return Env.get().get_client('mistral')
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pxt.udf
|
|
32
|
+
def chat_completions(
|
|
33
|
+
messages: list[dict[str, str]],
|
|
34
|
+
*,
|
|
35
|
+
model: str,
|
|
36
|
+
temperature: Optional[float] = 0.7,
|
|
37
|
+
top_p: Optional[float] = 1.0,
|
|
38
|
+
max_tokens: Optional[int] = None,
|
|
39
|
+
min_tokens: Optional[int] = None,
|
|
40
|
+
stop: Optional[list[str]] = None,
|
|
41
|
+
random_seed: Optional[int] = None,
|
|
42
|
+
response_format: Optional[dict] = None,
|
|
43
|
+
safe_prompt: Optional[bool] = False,
|
|
44
|
+
) -> dict:
|
|
45
|
+
"""
|
|
46
|
+
Chat Completion API.
|
|
47
|
+
|
|
48
|
+
Equivalent to the Mistral AI `chat/completions` API endpoint.
|
|
49
|
+
For additional details, see: <https://docs.mistral.ai/api/#tag/chat>
|
|
50
|
+
|
|
51
|
+
__Requirements:__
|
|
52
|
+
|
|
53
|
+
- `pip install mistralai`
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
messages: The prompt(s) to generate completions for.
|
|
57
|
+
model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
|
|
58
|
+
|
|
59
|
+
For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/chat>
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
A dictionary containing the response and other metadata.
|
|
63
|
+
|
|
64
|
+
Examples:
|
|
65
|
+
Add a computed column that applies the model `mistral-latest-small`
|
|
66
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
67
|
+
|
|
68
|
+
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
69
|
+
... tbl['response'] = completions(messages, model='mistral-latest-small')
|
|
70
|
+
"""
|
|
71
|
+
Env.get().require_package('mistralai')
|
|
72
|
+
return _mistralai_client().chat.complete(
|
|
73
|
+
messages=messages, # type: ignore[arg-type]
|
|
74
|
+
model=model,
|
|
75
|
+
temperature=temperature,
|
|
76
|
+
top_p=top_p,
|
|
77
|
+
max_tokens=_opt(max_tokens),
|
|
78
|
+
min_tokens=_opt(min_tokens),
|
|
79
|
+
stop=stop,
|
|
80
|
+
random_seed=_opt(random_seed),
|
|
81
|
+
response_format=response_format, # type: ignore[arg-type]
|
|
82
|
+
safe_prompt=safe_prompt,
|
|
83
|
+
).dict()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pxt.udf
|
|
87
|
+
def fim_completions(
|
|
88
|
+
prompt: str,
|
|
89
|
+
*,
|
|
90
|
+
model: str,
|
|
91
|
+
temperature: Optional[float] = 0.7,
|
|
92
|
+
top_p: Optional[float] = 1.0,
|
|
93
|
+
max_tokens: Optional[int] = None,
|
|
94
|
+
min_tokens: Optional[int] = None,
|
|
95
|
+
stop: Optional[list[str]] = None,
|
|
96
|
+
random_seed: Optional[int] = None,
|
|
97
|
+
suffix: Optional[str] = None,
|
|
98
|
+
) -> dict:
|
|
99
|
+
"""
|
|
100
|
+
Fill-in-the-middle Completion API.
|
|
101
|
+
|
|
102
|
+
Equivalent to the Mistral AI `fim/completions` API endpoint.
|
|
103
|
+
For additional details, see: <https://docs.mistral.ai/api/#tag/fim>
|
|
104
|
+
|
|
105
|
+
__Requirements:__
|
|
106
|
+
|
|
107
|
+
- `pip install mistralai`
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
prompt: The text/code to complete.
|
|
111
|
+
model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
|
|
112
|
+
|
|
113
|
+
For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/fim>
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
A dictionary containing the response and other metadata.
|
|
117
|
+
|
|
118
|
+
Examples:
|
|
119
|
+
Add a computed column that applies the model `codestral-latest`
|
|
120
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
121
|
+
|
|
122
|
+
>>> tbl['response'] = completions(tbl.prompt, model='codestral-latest')
|
|
123
|
+
"""
|
|
124
|
+
Env.get().require_package('mistralai')
|
|
125
|
+
return _mistralai_client().fim.complete(
|
|
126
|
+
prompt=prompt,
|
|
127
|
+
model=model,
|
|
128
|
+
temperature=temperature,
|
|
129
|
+
top_p=top_p,
|
|
130
|
+
max_tokens=_opt(max_tokens),
|
|
131
|
+
min_tokens=_opt(min_tokens),
|
|
132
|
+
stop=stop,
|
|
133
|
+
random_seed=_opt(random_seed),
|
|
134
|
+
suffix=_opt(suffix)
|
|
135
|
+
).dict()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
_embedding_dimensions_cache: dict[str, int] = {
|
|
139
|
+
'mistral-embed': 1024
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@pxt.udf(batch_size=16, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
|
|
144
|
+
def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
|
|
145
|
+
"""
|
|
146
|
+
Embeddings API.
|
|
147
|
+
|
|
148
|
+
Equivalent to the Mistral AI `embeddings` API endpoint.
|
|
149
|
+
For additional details, see: <https://docs.mistral.ai/api/#tag/embeddings>
|
|
150
|
+
|
|
151
|
+
__Requirements:__
|
|
152
|
+
|
|
153
|
+
- `pip install mistralai`
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
input: Text to embed.
|
|
157
|
+
model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
An array representing the application of the given embedding to `input`.
|
|
161
|
+
"""
|
|
162
|
+
Env.get().require_package('mistralai')
|
|
163
|
+
result = _mistralai_client().embeddings.create(
|
|
164
|
+
inputs=input,
|
|
165
|
+
model=model,
|
|
166
|
+
)
|
|
167
|
+
return [np.array(data.embedding, dtype=np.float64) for data in result.data]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@embeddings.conditional_return_type
|
|
171
|
+
def _(model: str) -> pxt.ArrayType:
|
|
172
|
+
dimensions = _embedding_dimensions_cache.get(model) # `None` if unknown model
|
|
173
|
+
return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
_T = TypeVar('_T')
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _opt(arg: Optional[_T]) -> Union[_T, 'mistralai.types.basemodel.Unset']:
|
|
180
|
+
from mistralai.types import UNSET
|
|
181
|
+
return arg if arg is not None else UNSET
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
__all__ = local_public_names(__name__)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def __dir__():
|
|
188
|
+
return __all__
|
pixeltable/functions/openai.py
CHANGED
|
@@ -9,10 +9,10 @@ import base64
|
|
|
9
9
|
import io
|
|
10
10
|
import pathlib
|
|
11
11
|
import uuid
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
|
|
13
13
|
|
|
14
|
-
import PIL.Image
|
|
15
14
|
import numpy as np
|
|
15
|
+
import PIL.Image
|
|
16
16
|
import tenacity
|
|
17
17
|
|
|
18
18
|
import pixeltable as pxt
|
|
@@ -23,13 +23,11 @@ from pixeltable.utils.code import local_public_names
|
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
import openai
|
|
26
|
-
from openai._types import NotGiven
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
@env.register_client('openai')
|
|
30
29
|
def _(api_key: str) -> 'openai.OpenAI':
|
|
31
30
|
import openai
|
|
32
|
-
|
|
33
31
|
return openai.OpenAI(api_key=api_key)
|
|
34
32
|
|
|
35
33
|
|
|
@@ -42,10 +40,9 @@ def _openai_client() -> 'openai.OpenAI':
|
|
|
42
40
|
# by OpenAI. Should we investigate making this more customizable in the future?
|
|
43
41
|
def _retry(fn: Callable) -> Callable:
|
|
44
42
|
import openai
|
|
45
|
-
|
|
46
43
|
return tenacity.retry(
|
|
47
44
|
retry=tenacity.retry_if_exception_type(openai.RateLimitError),
|
|
48
|
-
wait=tenacity.wait_random_exponential(multiplier=
|
|
45
|
+
wait=tenacity.wait_random_exponential(multiplier=1, max=60),
|
|
49
46
|
stop=tenacity.stop_after_attempt(20),
|
|
50
47
|
)(fn)
|
|
51
48
|
|
|
@@ -462,10 +459,9 @@ def moderations(input: str, *, model: Optional[str] = None) -> dict:
|
|
|
462
459
|
_T = TypeVar('_T')
|
|
463
460
|
|
|
464
461
|
|
|
465
|
-
def _opt(arg: _T) -> Union[_T, 'NotGiven']:
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
return arg if arg is not None else NOT_GIVEN
|
|
462
|
+
def _opt(arg: _T) -> Union[_T, 'openai.NotGiven']:
|
|
463
|
+
import openai
|
|
464
|
+
return arg if arg is not None else openai.NOT_GIVEN
|
|
469
465
|
|
|
470
466
|
|
|
471
467
|
__all__ = local_public_names(__name__)
|
pixeltable/functions/string.py
CHANGED
|
@@ -14,6 +14,7 @@ t.select(pxt_str.capitalize(t.str_col)).collect()
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Optional
|
|
16
16
|
|
|
17
|
+
import pixeltable.exceptions as excs
|
|
17
18
|
import pixeltable.func as func
|
|
18
19
|
from pixeltable.utils.code import local_public_names
|
|
19
20
|
|
|
@@ -352,7 +353,7 @@ def normalize(self: str, form: str) -> str:
|
|
|
352
353
|
form: Unicode normal form (`‘NFC’`, `‘NFKC’`, `‘NFD’`, `‘NFKD’`)
|
|
353
354
|
"""
|
|
354
355
|
import unicodedata
|
|
355
|
-
return unicodedata.normalize(form, self)
|
|
356
|
+
return unicodedata.normalize(form, self) # type: ignore[arg-type]
|
|
356
357
|
|
|
357
358
|
@func.udf(is_method=True)
|
|
358
359
|
def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
|
|
@@ -579,7 +580,7 @@ def upper(self: str) -> str:
|
|
|
579
580
|
return self.upper()
|
|
580
581
|
|
|
581
582
|
@func.udf(is_method=True)
|
|
582
|
-
def wrap(self: str, width: int, **kwargs: Any) ->
|
|
583
|
+
def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
583
584
|
"""
|
|
584
585
|
Wraps the single paragraph in string so every line is at most `width` characters long.
|
|
585
586
|
Returns a list of output lines, without final newlines.
|
|
@@ -13,11 +13,14 @@ t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
|
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
from typing import Optional
|
|
15
15
|
|
|
16
|
+
import sqlalchemy as sql
|
|
17
|
+
|
|
18
|
+
from pixeltable.env import Env
|
|
16
19
|
import pixeltable.func as func
|
|
17
20
|
from pixeltable.utils.code import local_public_names
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
@func.udf(
|
|
23
|
+
@func.udf(is_property=True)
|
|
21
24
|
def year(self: datetime) -> int:
|
|
22
25
|
"""
|
|
23
26
|
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
@@ -28,7 +31,12 @@ def year(self: datetime) -> int:
|
|
|
28
31
|
return self.year
|
|
29
32
|
|
|
30
33
|
|
|
31
|
-
@
|
|
34
|
+
@year.to_sql
|
|
35
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
36
|
+
return sql.extract('year', self)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@func.udf(is_property=True)
|
|
32
40
|
def month(self: datetime) -> int:
|
|
33
41
|
"""
|
|
34
42
|
Between 1 and 12 inclusive.
|
|
@@ -38,7 +46,12 @@ def month(self: datetime) -> int:
|
|
|
38
46
|
return self.month
|
|
39
47
|
|
|
40
48
|
|
|
41
|
-
@
|
|
49
|
+
@month.to_sql
|
|
50
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
51
|
+
return sql.extract('month', self)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@func.udf(is_property=True)
|
|
42
55
|
def day(self: datetime) -> int:
|
|
43
56
|
"""
|
|
44
57
|
Between 1 and the number of days in the given month of the given year.
|
|
@@ -48,7 +61,12 @@ def day(self: datetime) -> int:
|
|
|
48
61
|
return self.day
|
|
49
62
|
|
|
50
63
|
|
|
51
|
-
@
|
|
64
|
+
@day.to_sql
|
|
65
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
66
|
+
return sql.extract('day', self)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@func.udf(is_property=True)
|
|
52
70
|
def hour(self: datetime) -> int:
|
|
53
71
|
"""
|
|
54
72
|
Between 0 and 23 inclusive.
|
|
@@ -58,7 +76,12 @@ def hour(self: datetime) -> int:
|
|
|
58
76
|
return self.hour
|
|
59
77
|
|
|
60
78
|
|
|
61
|
-
@
|
|
79
|
+
@hour.to_sql
|
|
80
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
81
|
+
return sql.extract('hour', self)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@func.udf(is_property=True)
|
|
62
85
|
def minute(self: datetime) -> int:
|
|
63
86
|
"""
|
|
64
87
|
Between 0 and 59 inclusive.
|
|
@@ -68,7 +91,12 @@ def minute(self: datetime) -> int:
|
|
|
68
91
|
return self.minute
|
|
69
92
|
|
|
70
93
|
|
|
71
|
-
@
|
|
94
|
+
@minute.to_sql
|
|
95
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
96
|
+
return sql.extract('minute', self)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@func.udf(is_property=True)
|
|
72
100
|
def second(self: datetime) -> int:
|
|
73
101
|
"""
|
|
74
102
|
Between 0 and 59 inclusive.
|
|
@@ -78,7 +106,12 @@ def second(self: datetime) -> int:
|
|
|
78
106
|
return self.second
|
|
79
107
|
|
|
80
108
|
|
|
81
|
-
@
|
|
109
|
+
@second.to_sql
|
|
110
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
111
|
+
return sql.extract('second', self)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@func.udf(is_property=True)
|
|
82
115
|
def microsecond(self: datetime) -> int:
|
|
83
116
|
"""
|
|
84
117
|
Between 0 and 999999 inclusive.
|
|
@@ -88,6 +121,24 @@ def microsecond(self: datetime) -> int:
|
|
|
88
121
|
return self.microsecond
|
|
89
122
|
|
|
90
123
|
|
|
124
|
+
@microsecond.to_sql
|
|
125
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
126
|
+
return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@func.udf(is_method=True)
|
|
130
|
+
def astimezone(self: datetime, tz: str) -> datetime:
|
|
131
|
+
"""
|
|
132
|
+
Convert the datetime to the given time zone.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
tz: The time zone to convert to. Must be a valid time zone name from the IANA Time Zone Database.
|
|
136
|
+
"""
|
|
137
|
+
from zoneinfo import ZoneInfo
|
|
138
|
+
tzinfo = ZoneInfo(tz)
|
|
139
|
+
return self.astimezone(tzinfo)
|
|
140
|
+
|
|
141
|
+
|
|
91
142
|
@func.udf(is_method=True)
|
|
92
143
|
def weekday(self: datetime) -> int:
|
|
93
144
|
"""
|
|
@@ -97,6 +148,12 @@ def weekday(self: datetime) -> int:
|
|
|
97
148
|
"""
|
|
98
149
|
return self.weekday()
|
|
99
150
|
|
|
151
|
+
|
|
152
|
+
@weekday.to_sql
|
|
153
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
154
|
+
return sql.extract('isodow', self) - 1
|
|
155
|
+
|
|
156
|
+
|
|
100
157
|
@func.udf(is_method=True)
|
|
101
158
|
def isoweekday(self: datetime) -> int:
|
|
102
159
|
"""
|
|
@@ -107,6 +164,11 @@ def isoweekday(self: datetime) -> int:
|
|
|
107
164
|
return self.isoweekday()
|
|
108
165
|
|
|
109
166
|
|
|
167
|
+
@isoweekday.to_sql
|
|
168
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
169
|
+
return sql.extract('isodow', self)
|
|
170
|
+
|
|
171
|
+
|
|
110
172
|
@func.udf(is_method=True)
|
|
111
173
|
def isocalendar(self: datetime) -> dict:
|
|
112
174
|
"""
|
|
@@ -146,6 +208,32 @@ def strftime(self: datetime, format: str) -> str:
|
|
|
146
208
|
return self.strftime(format)
|
|
147
209
|
|
|
148
210
|
|
|
211
|
+
@func.udf(is_method=True)
|
|
212
|
+
def make_timestamp(
|
|
213
|
+
year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
|
|
214
|
+
) -> datetime:
|
|
215
|
+
"""
|
|
216
|
+
Create a timestamp.
|
|
217
|
+
|
|
218
|
+
Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.datetime).
|
|
219
|
+
"""
|
|
220
|
+
return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=Env.get().default_time_zone)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@make_timestamp.to_sql
|
|
224
|
+
def _(
|
|
225
|
+
year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement,
|
|
226
|
+
hour: sql.ColumnElement = sql.literal(0), minute: sql.ColumnElement = sql.literal(0),
|
|
227
|
+
second: sql.ColumnElement = sql.literal(0), microsecond: sql.ColumnElement = sql.literal(0)
|
|
228
|
+
) -> sql.ColumnElement:
|
|
229
|
+
return sql.func.make_timestamptz(
|
|
230
|
+
sql.cast(year, sql.Integer),
|
|
231
|
+
sql.cast(month, sql.Integer),
|
|
232
|
+
sql.cast(day, sql.Integer),
|
|
233
|
+
sql.cast(hour, sql.Integer),
|
|
234
|
+
sql.cast(minute, sql.Integer),
|
|
235
|
+
sql.cast(second + microsecond / 1000000.0, sql.Double))
|
|
236
|
+
|
|
149
237
|
# @func.udf
|
|
150
238
|
# def date(self: datetime) -> datetime:
|
|
151
239
|
# """
|
pixeltable/functions/together.py
CHANGED
|
@@ -6,19 +6,19 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import base64
|
|
9
|
-
|
|
9
|
+
import io
|
|
10
|
+
from typing import TYPE_CHECKING, Optional
|
|
10
11
|
|
|
11
|
-
import PIL.Image
|
|
12
12
|
import numpy as np
|
|
13
|
+
import PIL.Image
|
|
13
14
|
|
|
14
|
-
import io
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
from pixeltable import env
|
|
17
17
|
from pixeltable.func import Batch
|
|
18
18
|
from pixeltable.utils.code import local_public_names
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
|
-
import together
|
|
21
|
+
import together # type: ignore[import-untyped]
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@env.register_client('together')
|
pixeltable/functions/video.py
CHANGED
|
@@ -16,9 +16,9 @@ import uuid
|
|
|
16
16
|
from pathlib import Path
|
|
17
17
|
from typing import Optional
|
|
18
18
|
|
|
19
|
-
import
|
|
20
|
-
import av
|
|
19
|
+
import av # type: ignore[import-untyped]
|
|
21
20
|
import numpy as np
|
|
21
|
+
import PIL.Image
|
|
22
22
|
|
|
23
23
|
import pixeltable.env as env
|
|
24
24
|
import pixeltable.func as func
|
pixeltable/functions/vision.py
CHANGED
|
@@ -13,19 +13,16 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
|
|
|
13
13
|
|
|
14
14
|
import colorsys
|
|
15
15
|
import hashlib
|
|
16
|
-
import random
|
|
17
16
|
from collections import defaultdict
|
|
18
|
-
from typing import Optional, Union
|
|
17
|
+
from typing import Any, Optional, Union
|
|
19
18
|
|
|
20
|
-
import PIL.Image
|
|
21
|
-
import PIL.Image
|
|
22
19
|
import numpy as np
|
|
20
|
+
import PIL.Image
|
|
23
21
|
|
|
24
22
|
import pixeltable.func as func
|
|
25
23
|
import pixeltable.type_system as ts
|
|
26
24
|
from pixeltable.utils.code import local_public_names
|
|
27
25
|
|
|
28
|
-
|
|
29
26
|
# TODO: figure out a better submodule structure
|
|
30
27
|
|
|
31
28
|
|
|
@@ -180,7 +177,7 @@ def eval_detections(
|
|
|
180
177
|
pred_scores: list[float],
|
|
181
178
|
gt_bboxes: list[list[int]],
|
|
182
179
|
gt_labels: list[int],
|
|
183
|
-
) -> dict:
|
|
180
|
+
) -> list[dict]:
|
|
184
181
|
"""
|
|
185
182
|
Evaluates the performance of a set of predicted bounding boxes against a set of ground truth bounding boxes.
|
|
186
183
|
"""
|
|
@@ -195,7 +192,7 @@ def eval_detections(
|
|
|
195
192
|
pred_filter = pred_classes_arr == class_idx
|
|
196
193
|
gt_filter = gt_classes_arr == class_idx
|
|
197
194
|
class_pred_scores = pred_scores_arr[pred_filter]
|
|
198
|
-
tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter],
|
|
195
|
+
tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], 0.5)
|
|
199
196
|
ordered_class_pred_scores = -np.sort(-class_pred_scores)
|
|
200
197
|
result.append(
|
|
201
198
|
{
|
|
@@ -330,31 +327,44 @@ def draw_bounding_boxes(
|
|
|
330
327
|
label_colors = _create_label_colors(labels)
|
|
331
328
|
box_colors = [label_colors[label] for label in labels]
|
|
332
329
|
|
|
333
|
-
from PIL import ImageDraw, ImageFont
|
|
330
|
+
from PIL import ImageColor, ImageDraw, ImageFont
|
|
331
|
+
|
|
334
332
|
# set default font if not provided
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
txt_font = ImageFont.truetype(font=font, size=font_size or 10)
|
|
333
|
+
txt_font: Union[ImageFont.ImageFont, ImageFont.FreeTypeFont] = (
|
|
334
|
+
ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
|
|
335
|
+
)
|
|
339
336
|
|
|
340
337
|
img_to_draw = img.copy()
|
|
341
338
|
draw = ImageDraw.Draw(img_to_draw, 'RGBA' if fill else 'RGB')
|
|
342
339
|
|
|
343
|
-
|
|
340
|
+
# Draw bounding boxes
|
|
341
|
+
for i, bbox in enumerate(boxes):
|
|
344
342
|
# determine color for the current box and label
|
|
345
343
|
color = box_colors[i % len(box_colors)]
|
|
346
344
|
|
|
347
345
|
if fill:
|
|
348
346
|
rgb_color = ImageColor.getrgb(color)
|
|
349
347
|
fill_color = rgb_color + (100,) # semi-transparent
|
|
350
|
-
draw.rectangle(bbox, outline=color, width=width, fill=fill_color)
|
|
348
|
+
draw.rectangle(bbox, outline=color, width=width, fill=fill_color) # type: ignore[arg-type]
|
|
351
349
|
else:
|
|
352
|
-
draw.rectangle(bbox, outline=color, width=width)
|
|
350
|
+
draw.rectangle(bbox, outline=color, width=width) # type: ignore[arg-type]
|
|
353
351
|
|
|
352
|
+
# Now draw labels separately, so they are not obscured by the boxes
|
|
353
|
+
for i, (bbox, label) in enumerate(zip(boxes, labels)):
|
|
354
354
|
if label is not None:
|
|
355
355
|
label_str = str(label)
|
|
356
|
-
|
|
357
|
-
|
|
356
|
+
_, _, text_width, text_height = draw.textbbox((0, 0), label_str, font=txt_font)
|
|
357
|
+
if bbox[1] - text_height - 2 >= 0:
|
|
358
|
+
# draw text above the box
|
|
359
|
+
y = bbox[1] - text_height - 2
|
|
360
|
+
else:
|
|
361
|
+
y = bbox[3]
|
|
362
|
+
if bbox[0] + text_width + 2 < img.width:
|
|
363
|
+
x = bbox[0]
|
|
364
|
+
else:
|
|
365
|
+
x = img.width - text_width - 2
|
|
366
|
+
draw.rectangle((x, y, x + text_width + 1, y + text_height + 1), fill='black')
|
|
367
|
+
draw.text((x, y), label_str, fill='white', font=txt_font)
|
|
358
368
|
|
|
359
369
|
return img_to_draw
|
|
360
370
|
|
pixeltable/functions/whisper.py
CHANGED
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -6,7 +6,7 @@ import random
|
|
|
6
6
|
import typing
|
|
7
7
|
from typing import Union, Optional, Any
|
|
8
8
|
|
|
9
|
-
import pixeltable
|
|
9
|
+
import pixeltable as pxt
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
11
|
from pixeltable import exceptions as excs
|
|
12
12
|
|
|
@@ -81,24 +81,26 @@ def import_huggingface_dataset(
|
|
|
81
81
|
dataset: Union[datasets.Dataset, datasets.DatasetDict],
|
|
82
82
|
*,
|
|
83
83
|
column_name_for_split: Optional[str] = None,
|
|
84
|
-
|
|
85
|
-
**kwargs,
|
|
86
|
-
) ->
|
|
87
|
-
"""Create a new
|
|
88
|
-
Requires datasets library to be installed.
|
|
84
|
+
schema_overrides: Optional[dict[str, Any]] = None,
|
|
85
|
+
**kwargs: Any,
|
|
86
|
+
) -> pxt.Table:
|
|
87
|
+
"""Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.
|
|
88
|
+
Requires `datasets` library to be installed.
|
|
89
89
|
|
|
90
90
|
Args:
|
|
91
|
-
|
|
92
|
-
dataset: Huggingface datasets.Dataset
|
|
91
|
+
table_path: Path to the table.
|
|
92
|
+
dataset: Huggingface [`datasets.Dataset`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.Dataset)
|
|
93
|
+
or [`datasets.DatasetDict`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.DatasetDict)
|
|
94
|
+
to insert into the table.
|
|
93
95
|
column_name_for_split: column name to use for split information. If None, no split information will be stored.
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
schema_overrides: If specified, then for each (name, type) pair in `schema_overrides`, the column with
|
|
97
|
+
name `name` will be given type `type`, instead of being inferred from the `Dataset` or `DatasetDict`. The keys in
|
|
98
|
+
`schema_overrides` should be the column names of the `Dataset` or `DatasetDict` (whether or not they are valid
|
|
99
|
+
Pixeltable identifiers).
|
|
98
100
|
kwargs: Additional arguments to pass to `create_table`.
|
|
99
101
|
|
|
100
102
|
Returns:
|
|
101
|
-
|
|
103
|
+
A handle to the newly created [`Table`][pixeltable.Table].
|
|
102
104
|
"""
|
|
103
105
|
import datasets
|
|
104
106
|
import pixeltable as pxt
|
|
@@ -118,8 +120,8 @@ def import_huggingface_dataset(
|
|
|
118
120
|
dataset_dict = dataset
|
|
119
121
|
|
|
120
122
|
pixeltable_schema = huggingface_schema_to_pixeltable_schema(dataset)
|
|
121
|
-
if
|
|
122
|
-
pixeltable_schema.update(
|
|
123
|
+
if schema_overrides is not None:
|
|
124
|
+
pixeltable_schema.update(schema_overrides)
|
|
123
125
|
|
|
124
126
|
if column_name_for_split is not None:
|
|
125
127
|
if column_name_for_split in pixeltable_schema:
|