pixeltable 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +5 -0
- pixeltable/catalog/globals.py +8 -0
- pixeltable/catalog/insertable_table.py +2 -2
- pixeltable/catalog/table.py +27 -9
- pixeltable/catalog/table_version.py +41 -68
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +7 -6
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/expr_eval_node.py +8 -1
- pixeltable/exec/row_update_node.py +61 -0
- pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
- pixeltable/exprs/__init__.py +1 -2
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +12 -12
- pixeltable/exprs/expr.py +67 -22
- pixeltable/exprs/function_call.py +60 -29
- pixeltable/exprs/globals.py +2 -0
- pixeltable/exprs/in_predicate.py +3 -3
- pixeltable/exprs/inline_array.py +18 -11
- pixeltable/exprs/is_null.py +5 -5
- pixeltable/exprs/method_ref.py +63 -0
- pixeltable/ext/__init__.py +9 -0
- pixeltable/ext/functions/__init__.py +8 -0
- pixeltable/ext/functions/whisperx.py +45 -5
- pixeltable/ext/functions/yolox.py +60 -14
- pixeltable/func/aggregate_function.py +10 -4
- pixeltable/func/callable_function.py +16 -4
- pixeltable/func/expr_template_function.py +1 -1
- pixeltable/func/function.py +12 -2
- pixeltable/func/function_registry.py +26 -9
- pixeltable/func/udf.py +32 -4
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/fireworks.py +33 -0
- pixeltable/functions/globals.py +36 -1
- pixeltable/functions/huggingface.py +155 -7
- pixeltable/functions/image.py +242 -40
- pixeltable/functions/openai.py +214 -0
- pixeltable/functions/string.py +600 -8
- pixeltable/functions/timestamp.py +210 -0
- pixeltable/functions/together.py +106 -0
- pixeltable/functions/video.py +28 -10
- pixeltable/functions/whisper.py +32 -0
- pixeltable/globals.py +3 -3
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/globals.py +186 -5
- pixeltable/io/label_studio.py +42 -2
- pixeltable/io/pandas.py +70 -34
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +39 -0
- pixeltable/metadata/notes.py +10 -0
- pixeltable/plan.py +82 -7
- pixeltable/tool/create_test_db_dump.py +4 -5
- pixeltable/tool/doc_plugins/griffe.py +81 -0
- pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
- pixeltable/type_system.py +15 -14
- pixeltable/utils/s3.py +1 -1
- pixeltable-0.2.14.dist-info/METADATA +206 -0
- {pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/RECORD +64 -56
- pixeltable-0.2.14.dist-info/entry_points.txt +3 -0
- pixeltable/exprs/image_member_access.py +0 -96
- pixeltable/exprs/predicate.py +0 -44
- pixeltable-0.2.12.dist-info/METADATA +0 -137
- {pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `TimestampType`.
|
|
3
|
+
|
|
4
|
+
Usage example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import pixeltable.func as func
|
|
17
|
+
from pixeltable.utils.code import local_public_names
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@func.udf(is_method=True)
|
|
21
|
+
def year(self: datetime) -> int:
|
|
22
|
+
"""
|
|
23
|
+
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
24
|
+
[`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
|
|
25
|
+
|
|
26
|
+
Equivalent to [`datetime.year`](https://docs.python.org/3/library/datetime.html#datetime.datetime.year).
|
|
27
|
+
"""
|
|
28
|
+
return self.year
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@func.udf(is_method=True)
|
|
32
|
+
def month(self: datetime) -> int:
|
|
33
|
+
"""
|
|
34
|
+
Between 1 and 12 inclusive.
|
|
35
|
+
|
|
36
|
+
Equivalent to [`datetime.month`](https://docs.python.org/3/library/datetime.html#datetime.datetime.month).
|
|
37
|
+
"""
|
|
38
|
+
return self.month
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@func.udf(is_method=True)
|
|
42
|
+
def day(self: datetime) -> int:
|
|
43
|
+
"""
|
|
44
|
+
Between 1 and the number of days in the given month of the given year.
|
|
45
|
+
|
|
46
|
+
Equivalent to [`datetime.day`](https://docs.python.org/3/library/datetime.html#datetime.datetime.day).
|
|
47
|
+
"""
|
|
48
|
+
return self.day
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@func.udf(is_method=True)
|
|
52
|
+
def hour(self: datetime) -> int:
|
|
53
|
+
"""
|
|
54
|
+
Between 0 and 23 inclusive.
|
|
55
|
+
|
|
56
|
+
Equivalent to [`datetime.hour`](https://docs.python.org/3/library/datetime.html#datetime.datetime.hour).
|
|
57
|
+
"""
|
|
58
|
+
return self.hour
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@func.udf(is_method=True)
|
|
62
|
+
def minute(self: datetime) -> int:
|
|
63
|
+
"""
|
|
64
|
+
Between 0 and 59 inclusive.
|
|
65
|
+
|
|
66
|
+
Equivalent to [`datetime.minute`](https://docs.python.org/3/library/datetime.html#datetime.datetime.minute).
|
|
67
|
+
"""
|
|
68
|
+
return self.minute
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@func.udf(is_method=True)
|
|
72
|
+
def second(self: datetime) -> int:
|
|
73
|
+
"""
|
|
74
|
+
Between 0 and 59 inclusive.
|
|
75
|
+
|
|
76
|
+
Equivalent to [`datetime.second`](https://docs.python.org/3/library/datetime.html#datetime.datetime.second).
|
|
77
|
+
"""
|
|
78
|
+
return self.second
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@func.udf(is_method=True)
|
|
82
|
+
def microsecond(self: datetime) -> int:
|
|
83
|
+
"""
|
|
84
|
+
Between 0 and 999999 inclusive.
|
|
85
|
+
|
|
86
|
+
Equivalent to [`datetime.microsecond`](https://docs.python.org/3/library/datetime.html#datetime.datetime.microsecond).
|
|
87
|
+
"""
|
|
88
|
+
return self.microsecond
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@func.udf(is_method=True)
|
|
92
|
+
def weekday(self: datetime) -> int:
|
|
93
|
+
"""
|
|
94
|
+
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
95
|
+
|
|
96
|
+
Equivalent to [`datetime.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.weekday).
|
|
97
|
+
"""
|
|
98
|
+
return self.weekday()
|
|
99
|
+
|
|
100
|
+
@func.udf(is_method=True)
|
|
101
|
+
def isoweekday(self: datetime) -> int:
|
|
102
|
+
"""
|
|
103
|
+
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
104
|
+
|
|
105
|
+
Equivalent to [`datetime.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoweekday).
|
|
106
|
+
"""
|
|
107
|
+
return self.isoweekday()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@func.udf(is_method=True)
|
|
111
|
+
def isocalendar(self: datetime) -> dict:
|
|
112
|
+
"""
|
|
113
|
+
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
114
|
+
|
|
115
|
+
Equivalent to
|
|
116
|
+
[`datetime.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isocalendar).
|
|
117
|
+
"""
|
|
118
|
+
iso_year, iso_week, iso_weekday = self.isocalendar()
|
|
119
|
+
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@func.udf(is_method=True)
|
|
123
|
+
def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
124
|
+
"""
|
|
125
|
+
Return a string representing the date and time in ISO 8601 format.
|
|
126
|
+
|
|
127
|
+
Equivalent to [`datetime.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoformat).
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
sep: Separator between date and time.
|
|
131
|
+
timespec: The number of additional terms in the output. See the [`datetime.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoformat) documentation for more details.
|
|
132
|
+
"""
|
|
133
|
+
return self.isoformat(sep=sep, timespec=timespec)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@func.udf(is_method=True)
|
|
137
|
+
def strftime(self: datetime, format: str) -> str:
|
|
138
|
+
"""
|
|
139
|
+
Return a string representing the date and time, controlled by an explicit format string.
|
|
140
|
+
|
|
141
|
+
Equivalent to [`datetime.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.strftime).
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
format: The format string to control the output. For a complete list of formatting directives, see [`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
|
|
145
|
+
"""
|
|
146
|
+
return self.strftime(format)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# @func.udf
|
|
150
|
+
# def date(self: datetime) -> datetime:
|
|
151
|
+
# """
|
|
152
|
+
# Return the date part of the datetime.
|
|
153
|
+
#
|
|
154
|
+
# Equivalent to [`datetime.date()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.date).
|
|
155
|
+
# """
|
|
156
|
+
# d = self.date()
|
|
157
|
+
# return datetime(d.year, d.month, d.day)
|
|
158
|
+
#
|
|
159
|
+
#
|
|
160
|
+
# @func.udf
|
|
161
|
+
# def time(self: datetime) -> datetime:
|
|
162
|
+
# """
|
|
163
|
+
# Return the time part of the datetime, with microseconds set to 0.
|
|
164
|
+
#
|
|
165
|
+
# Equivalent to [`datetime.time()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.time).
|
|
166
|
+
# """
|
|
167
|
+
# t = self.time()
|
|
168
|
+
# return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@func.udf(is_method=True)
|
|
172
|
+
def replace(
|
|
173
|
+
self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
|
|
174
|
+
hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
|
|
175
|
+
microsecond: Optional[int] = None) -> datetime:
|
|
176
|
+
"""
|
|
177
|
+
Return a datetime with the same attributes, except for those attributes given new values by whichever keyword
|
|
178
|
+
arguments are specified.
|
|
179
|
+
|
|
180
|
+
Equivalent to [`datetime.replace()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.replace).
|
|
181
|
+
"""
|
|
182
|
+
kwargs = {k: v for k, v in locals().items() if k != 'self' and v is not None}
|
|
183
|
+
return self.replace(**kwargs)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@func.udf(is_method=True)
|
|
187
|
+
def toordinal(self: datetime) -> int:
|
|
188
|
+
"""
|
|
189
|
+
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
190
|
+
|
|
191
|
+
Equivalent to [`datetime.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.toordinal).
|
|
192
|
+
"""
|
|
193
|
+
return self.toordinal()
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@func.udf(is_method=True)
|
|
197
|
+
def posix_timestamp(self: datetime) -> float:
|
|
198
|
+
"""
|
|
199
|
+
Return POSIX timestamp corresponding to the datetime instance.
|
|
200
|
+
|
|
201
|
+
Equivalent to [`datetime.timestamp()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.timestamp).
|
|
202
|
+
"""
|
|
203
|
+
return self.timestamp()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
__all__ = local_public_names(__name__)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def __dir__():
|
|
210
|
+
return __all__
|
pixeltable/functions/together.py
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
|
+
that wrap various endpoints from the Together AI API. In order to use them, you must
|
|
4
|
+
first `pip install together` and configure your Together AI credentials, as described in
|
|
5
|
+
the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
import base64
|
|
2
9
|
from typing import Optional, TYPE_CHECKING
|
|
3
10
|
|
|
@@ -41,6 +48,31 @@ def completions(
|
|
|
41
48
|
n: Optional[int] = None,
|
|
42
49
|
safety_model: Optional[str] = None,
|
|
43
50
|
) -> dict:
|
|
51
|
+
"""
|
|
52
|
+
Generate completions based on a given prompt using a specified model.
|
|
53
|
+
|
|
54
|
+
Equivalent to the Together AI `completions` API endpoint.
|
|
55
|
+
For additional details, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
|
|
56
|
+
|
|
57
|
+
__Requirements:__
|
|
58
|
+
|
|
59
|
+
- `pip install together`
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
prompt: A string providing context for the model to complete.
|
|
63
|
+
model: The name of the model to query.
|
|
64
|
+
|
|
65
|
+
For details on the other parameters, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
A dictionary containing the response and other metadata.
|
|
69
|
+
|
|
70
|
+
Examples:
|
|
71
|
+
Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
|
|
72
|
+
of the table `tbl`:
|
|
73
|
+
|
|
74
|
+
>>> tbl['response'] = completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
|
|
75
|
+
"""
|
|
44
76
|
return (
|
|
45
77
|
_together_client()
|
|
46
78
|
.completions.create(
|
|
@@ -80,6 +112,32 @@ def chat_completions(
|
|
|
80
112
|
tools: Optional[dict] = None,
|
|
81
113
|
tool_choice: Optional[dict] = None,
|
|
82
114
|
) -> dict:
|
|
115
|
+
"""
|
|
116
|
+
Generate chat completions based on a given prompt using a specified model.
|
|
117
|
+
|
|
118
|
+
Equivalent to the Together AI `chat/completions` API endpoint.
|
|
119
|
+
For additional details, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
|
|
120
|
+
|
|
121
|
+
__Requirements:__
|
|
122
|
+
|
|
123
|
+
- `pip install together`
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
messages: A list of messages comprising the conversation so far.
|
|
127
|
+
model: The name of the model to query.
|
|
128
|
+
|
|
129
|
+
For details on the other parameters, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
A dictionary containing the response and other metadata.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
|
|
136
|
+
of the table `tbl`:
|
|
137
|
+
|
|
138
|
+
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
139
|
+
... tbl['response'] = chat_completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
|
|
140
|
+
"""
|
|
83
141
|
return (
|
|
84
142
|
_together_client()
|
|
85
143
|
.chat.completions.create(
|
|
@@ -117,6 +175,29 @@ _embedding_dimensions_cache = {
|
|
|
117
175
|
|
|
118
176
|
@pxt.udf(batch_size=32, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
|
|
119
177
|
def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
|
|
178
|
+
"""
|
|
179
|
+
Query an embedding model for a given string of text.
|
|
180
|
+
|
|
181
|
+
Equivalent to the Together AI `embeddings` API endpoint.
|
|
182
|
+
For additional details, see: [https://docs.together.ai/reference/embeddings-2](https://docs.together.ai/reference/embeddings-2)
|
|
183
|
+
|
|
184
|
+
__Requirements:__
|
|
185
|
+
|
|
186
|
+
- `pip install together`
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
input: A string providing the text for the model to embed.
|
|
190
|
+
model: The name of the embedding model to use.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
An array representing the application of the given embedding to `input`.
|
|
194
|
+
|
|
195
|
+
Examples:
|
|
196
|
+
Add a computed column that applies the model `togethercomputer/m2-bert-80M-8k-retrieval`
|
|
197
|
+
to an existing Pixeltable column `tbl.text` of the table `tbl`:
|
|
198
|
+
|
|
199
|
+
>>> tbl['response'] = embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval')
|
|
200
|
+
"""
|
|
120
201
|
result = _together_client().embeddings.create(input=input, model=model)
|
|
121
202
|
return [np.array(data.embedding, dtype=np.float64) for data in result.data]
|
|
122
203
|
|
|
@@ -141,6 +222,31 @@ def image_generations(
|
|
|
141
222
|
width: Optional[int] = None,
|
|
142
223
|
negative_prompt: Optional[str] = None,
|
|
143
224
|
) -> PIL.Image.Image:
|
|
225
|
+
"""
|
|
226
|
+
Generate images based on a given prompt using a specified model.
|
|
227
|
+
|
|
228
|
+
Equivalent to the Together AI `images/generations` API endpoint.
|
|
229
|
+
For additional details, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
|
|
230
|
+
|
|
231
|
+
__Requirements:__
|
|
232
|
+
|
|
233
|
+
- `pip install together`
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
prompt: A description of the desired images.
|
|
237
|
+
model: The model to use for image generation.
|
|
238
|
+
|
|
239
|
+
For details on the other parameters, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
The generated image.
|
|
243
|
+
|
|
244
|
+
Examples:
|
|
245
|
+
Add a computed column that applies the model `runwayml/stable-diffusion-v1-5`
|
|
246
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
247
|
+
|
|
248
|
+
>>> tbl['response'] = image_generations(tbl.prompt, model='runwayml/stable-diffusion-v1-5')
|
|
249
|
+
"""
|
|
144
250
|
# TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
|
|
145
251
|
result = _together_client().images.generate(
|
|
146
252
|
prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
|
pixeltable/functions/video.py
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
from pixeltable.functions import video as pxt_video
|
|
8
|
+
|
|
9
|
+
t = pxt.get_table(...)
|
|
10
|
+
t.select(pxt_video.extract_audio(t.video_col)).collect()
|
|
11
|
+
```
|
|
12
|
+
"""
|
|
13
|
+
|
|
1
14
|
import tempfile
|
|
2
15
|
import uuid
|
|
3
16
|
from pathlib import Path
|
|
@@ -43,6 +56,9 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
43
56
|
allows_window=False,
|
|
44
57
|
)
|
|
45
58
|
class make_video(func.Aggregator):
|
|
59
|
+
"""
|
|
60
|
+
Aggregator that creates a video from a sequence of images.
|
|
61
|
+
"""
|
|
46
62
|
def __init__(self, fps: int = 25):
|
|
47
63
|
"""follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
|
|
48
64
|
self.container: Optional[av.container.OutputContainer] = None
|
|
@@ -80,11 +96,18 @@ _extract_audio_param_types = [
|
|
|
80
96
|
]
|
|
81
97
|
|
|
82
98
|
|
|
83
|
-
@func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types)
|
|
99
|
+
@func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
|
|
84
100
|
def extract_audio(
|
|
85
101
|
video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
86
102
|
) -> Optional[str]:
|
|
87
|
-
"""
|
|
103
|
+
"""
|
|
104
|
+
Extract an audio stream from a video file, save it as a media file and return its path.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
stream_idx: Index of the audio stream to extract.
|
|
108
|
+
format: The target audio format. (`'wav'`, `'mp3'`, `'flac'`).
|
|
109
|
+
codec: The codec to use for the audio stream. If not provided, a default codec will be used.
|
|
110
|
+
"""
|
|
88
111
|
if format not in _format_defaults:
|
|
89
112
|
raise ValueError(f'extract_audio(): unsupported audio format: {format}')
|
|
90
113
|
default_codec, ext = _format_defaults[format]
|
|
@@ -105,15 +128,10 @@ def extract_audio(
|
|
|
105
128
|
return output_filename
|
|
106
129
|
|
|
107
130
|
|
|
108
|
-
@func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)])
|
|
131
|
+
@func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)], is_method=True)
|
|
109
132
|
def get_metadata(video: str) -> dict:
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
video (str): Path to the video file.
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
A dictionary containing the associated metadata.
|
|
133
|
+
"""
|
|
134
|
+
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
117
135
|
"""
|
|
118
136
|
with av.open(video) as container:
|
|
119
137
|
assert isinstance(container, av.container.InputContainer)
|
pixeltable/functions/whisper.py
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDF](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
|
+
that wraps the OpenAI Whisper library.
|
|
4
|
+
|
|
5
|
+
This UDF will cause Pixeltable to invoke the relevant model locally. In order to use it, you must
|
|
6
|
+
first `pip install openai-whisper`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
from typing import TYPE_CHECKING, Optional
|
|
2
10
|
|
|
3
11
|
import pixeltable as pxt
|
|
@@ -39,6 +47,30 @@ def transcribe(
|
|
|
39
47
|
append_punctuations: str = '"\'.。,,!!??::”)]}、',
|
|
40
48
|
decode_options: Optional[dict] = None,
|
|
41
49
|
) -> dict:
|
|
50
|
+
"""
|
|
51
|
+
Transcribe an audio file using Whisper.
|
|
52
|
+
|
|
53
|
+
This UDF runs a transcription model _locally_ using the Whisper library,
|
|
54
|
+
equivalent to the Whisper `transcribe` function, as described in the
|
|
55
|
+
[Whisper library documentation](https://github.com/openai/whisper).
|
|
56
|
+
|
|
57
|
+
__Requirements:__
|
|
58
|
+
|
|
59
|
+
- `pip install openai-whisper`
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
audio: The audio file to transcribe.
|
|
63
|
+
model: The name of the model to use for transcription.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A dictionary containing the transcription and various other metadata.
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
Add a computed column that applies the model `base.en` to an existing Pixeltable column `tbl.audio`
|
|
70
|
+
of the table `tbl`:
|
|
71
|
+
|
|
72
|
+
>>> tbl['result'] = transcribe(tbl.audio, model='base.en')
|
|
73
|
+
"""
|
|
42
74
|
import torch
|
|
43
75
|
|
|
44
76
|
if decode_options is None:
|
pixeltable/globals.py
CHANGED
|
@@ -7,10 +7,10 @@ import sqlalchemy as sql
|
|
|
7
7
|
from sqlalchemy.util.preloaded import orm
|
|
8
8
|
|
|
9
9
|
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.exprs as exprs
|
|
10
11
|
from pixeltable import catalog, func, DataFrame
|
|
11
12
|
from pixeltable.catalog import Catalog
|
|
12
13
|
from pixeltable.env import Env
|
|
13
|
-
from pixeltable.exprs import Predicate
|
|
14
14
|
from pixeltable.iterators import ComponentIterator
|
|
15
15
|
from pixeltable.metadata import schema
|
|
16
16
|
|
|
@@ -81,7 +81,7 @@ def create_view(
|
|
|
81
81
|
base: Union[catalog.Table, DataFrame],
|
|
82
82
|
*,
|
|
83
83
|
schema: Optional[dict[str, Any]] = None,
|
|
84
|
-
filter: Optional[
|
|
84
|
+
filter: Optional[exprs.Expr] = None,
|
|
85
85
|
is_snapshot: bool = False,
|
|
86
86
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
|
|
87
87
|
num_retained_versions: int = 10,
|
|
@@ -94,7 +94,7 @@ def create_view(
|
|
|
94
94
|
path_str: Path to the view.
|
|
95
95
|
base: Table (i.e., table or view or snapshot) or DataFrame to base the view on.
|
|
96
96
|
schema: dictionary mapping column names to column types, value expressions, or to column specifications.
|
|
97
|
-
filter:
|
|
97
|
+
filter: predicate to filter rows of the base table.
|
|
98
98
|
is_snapshot: Whether the view is a snapshot.
|
|
99
99
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
100
100
|
the base table.
|
pixeltable/io/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .external_store import ExternalStore, SyncStatus
|
|
2
|
-
from .globals import create_label_studio_project
|
|
2
|
+
from .globals import create_label_studio_project, import_rows, import_json
|
|
3
3
|
from .hf_datasets import import_huggingface_dataset
|
|
4
4
|
from .pandas import import_csv, import_excel, import_pandas
|
|
5
5
|
from .parquet import import_parquet
|