pixeltable 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/insertable_table.py +2 -2
- pixeltable/catalog/schema_object.py +28 -2
- pixeltable/catalog/table.py +68 -30
- pixeltable/catalog/table_version.py +14 -43
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +8 -7
- pixeltable/exec/expr_eval_node.py +8 -1
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/__init__.py +0 -1
- pixeltable/exprs/column_ref.py +2 -7
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +12 -12
- pixeltable/exprs/expr.py +32 -0
- pixeltable/exprs/in_predicate.py +3 -3
- pixeltable/exprs/is_null.py +5 -5
- pixeltable/exprs/similarity_expr.py +27 -16
- pixeltable/func/aggregate_function.py +10 -4
- pixeltable/func/callable_function.py +4 -0
- pixeltable/func/function_registry.py +2 -0
- pixeltable/functions/globals.py +36 -1
- pixeltable/functions/huggingface.py +62 -4
- pixeltable/functions/image.py +17 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/string.py +622 -7
- pixeltable/functions/video.py +26 -8
- pixeltable/globals.py +54 -50
- pixeltable/index/embedding_index.py +28 -27
- pixeltable/io/external_store.py +2 -2
- pixeltable/io/globals.py +54 -5
- pixeltable/io/label_studio.py +45 -5
- pixeltable/io/pandas.py +18 -7
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_17.py +26 -0
- pixeltable/plan.py +6 -6
- pixeltable/tool/create_test_db_dump.py +2 -2
- pixeltable/tool/doc_plugins/griffe.py +77 -0
- pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
- pixeltable/utils/s3.py +1 -1
- pixeltable-0.2.13.dist-info/METADATA +206 -0
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/RECORD +46 -42
- pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
- pixeltable/exprs/predicate.py +0 -44
- pixeltable-0.2.11.dist-info/METADATA +0 -137
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/functions/string.py
CHANGED
|
@@ -1,17 +1,632 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `StringType`.
|
|
3
|
+
It closely follows the Pandas `pandas.Series.str` API.
|
|
4
|
+
|
|
5
|
+
Example:
|
|
6
|
+
```python
|
|
7
|
+
import pixeltable as pxt
|
|
8
|
+
from pixeltable.functions import string as pxt_str
|
|
9
|
+
|
|
10
|
+
t = pxt.get_table(...)
|
|
11
|
+
t.select(pxt_str.capitalize(t.str_col)).collect()
|
|
12
|
+
```
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Any, Optional
|
|
2
16
|
|
|
3
17
|
import pixeltable.func as func
|
|
4
18
|
from pixeltable.type_system import StringType
|
|
5
19
|
from pixeltable.utils.code import local_public_names
|
|
6
20
|
|
|
7
21
|
|
|
8
|
-
@func.udf
|
|
9
|
-
def
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
|
|
22
|
+
@func.udf
|
|
23
|
+
def capitalize(s: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Return `s` with its first character capitalized and the rest lowercased.
|
|
26
|
+
|
|
27
|
+
Equivalent to [`str.capitalize()`](https://docs.python.org/3/library/stdtypes.html#str.capitalize).
|
|
28
|
+
"""
|
|
29
|
+
return s.capitalize()
|
|
30
|
+
|
|
31
|
+
@func.udf
|
|
32
|
+
def casefold(s: str) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Return a casefolded copy of `s`.
|
|
35
|
+
|
|
36
|
+
Equivalent to [`str.casefold()`](https://docs.python.org/3/library/stdtypes.html#str.casefold).
|
|
37
|
+
"""
|
|
38
|
+
return s.casefold()
|
|
39
|
+
|
|
40
|
+
@func.udf
|
|
41
|
+
def center(s: str, width: int, fillchar: str = ' ') -> str:
|
|
42
|
+
"""
|
|
43
|
+
Return a centered string of length `width`.
|
|
44
|
+
|
|
45
|
+
Equivalent to [`str.center()`](https://docs.python.org/3/library/stdtypes.html#str.center).
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
s: String.
|
|
49
|
+
width: Total width of the resulting string.
|
|
50
|
+
fillchar: Character used for padding.
|
|
51
|
+
"""
|
|
52
|
+
return s.center(width, fillchar)
|
|
53
|
+
|
|
54
|
+
@func.udf
|
|
55
|
+
def contains(s: str, pattern: str, case: bool = True, flags: int = 0, regex: bool = True) -> bool:
|
|
56
|
+
"""
|
|
57
|
+
Test if pattern or regex is contained within a string.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
s: String.
|
|
61
|
+
pattern: string literal or regular expression
|
|
62
|
+
case: if False, ignore case
|
|
63
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
64
|
+
regex: if True, treat pattern as a regular expression
|
|
65
|
+
"""
|
|
66
|
+
if regex:
|
|
67
|
+
import re
|
|
68
|
+
if not case:
|
|
69
|
+
flags |= re.IGNORECASE
|
|
70
|
+
return bool(re.search(pattern, s, flags))
|
|
71
|
+
else:
|
|
72
|
+
if case:
|
|
73
|
+
return pattern in s
|
|
74
|
+
else:
|
|
75
|
+
return pattern.lower() in s.lower()
|
|
76
|
+
|
|
77
|
+
@func.udf
|
|
78
|
+
def count(s: str, pattern: str, flags: int = 0) -> int:
|
|
79
|
+
"""
|
|
80
|
+
Count occurrences of pattern or regex in `s`.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
s: String.
|
|
84
|
+
pattern: string literal or regular expression
|
|
85
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
86
|
+
"""
|
|
87
|
+
import re
|
|
88
|
+
from builtins import len
|
|
89
|
+
return len(re.findall(pattern, s, flags))
|
|
90
|
+
|
|
91
|
+
@func.udf
|
|
92
|
+
def endswith(s: str, pattern: str) -> bool:
|
|
93
|
+
"""
|
|
94
|
+
Return `True` if the string ends with the specified suffix, otherwise return `False`.
|
|
95
|
+
|
|
96
|
+
Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
s: String.
|
|
100
|
+
pattern: string literal
|
|
101
|
+
"""
|
|
102
|
+
return s.endswith(pattern)
|
|
103
|
+
|
|
104
|
+
@func.udf
|
|
105
|
+
def fill(s: str, width: int, **kwargs: Any) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Wraps the single paragraph in `s`, and returns a single string containing the wrapped paragraph.
|
|
108
|
+
|
|
109
|
+
Equivalent to [`textwrap.fill()`](https://docs.python.org/3/library/textwrap.html#textwrap.fill).
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
s: String.
|
|
113
|
+
width: Maximum line width.
|
|
114
|
+
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
115
|
+
"""
|
|
116
|
+
import textwrap
|
|
117
|
+
return textwrap.fill(s, width, **kwargs)
|
|
118
|
+
|
|
119
|
+
@func.udf
|
|
120
|
+
def find(s: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
121
|
+
"""
|
|
122
|
+
Return the lowest index in `s` where `substr` is found within the slice `s[start:end]`.
|
|
123
|
+
|
|
124
|
+
Equivalent to [`str.find()`](https://docs.python.org/3/library/stdtypes.html#str.find).
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
s: String.
|
|
128
|
+
substr: substring to search for
|
|
129
|
+
start: slice start
|
|
130
|
+
end: slice end
|
|
131
|
+
"""
|
|
132
|
+
return s.find(substr, start, end)
|
|
133
|
+
|
|
134
|
+
@func.udf
|
|
135
|
+
def findall(s: str, pattern: str, flags: int = 0) -> list:
|
|
136
|
+
"""
|
|
137
|
+
Find all occurrences of a regular expression pattern in a string.
|
|
138
|
+
|
|
139
|
+
Equivalent to [`re.findall()`](https://docs.python.org/3/library/re.html#re.findall).
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
s: String.
|
|
143
|
+
pattern: regular expression pattern
|
|
144
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
145
|
+
"""
|
|
146
|
+
import re
|
|
147
|
+
return re.findall(pattern, s, flags)
|
|
148
|
+
|
|
149
|
+
@func.udf
|
|
150
|
+
def format(s: str, *args: Any, **kwargs: Any) -> str:
|
|
151
|
+
"""
|
|
152
|
+
Perform string formatting.
|
|
153
|
+
|
|
154
|
+
Equivalent to [`str.format()`](https://docs.python.org/3/library/stdtypes.html#str.format).
|
|
155
|
+
"""
|
|
156
|
+
return s.format(*args, **kwargs)
|
|
157
|
+
|
|
158
|
+
@func.udf
|
|
159
|
+
def fullmatch(s: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
160
|
+
"""
|
|
161
|
+
Determine if `s` fully matches a regular expression.
|
|
162
|
+
|
|
163
|
+
Equivalent to [`re.fullmatch()`](https://docs.python.org/3/library/re.html#re.fullmatch).
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
s: String.
|
|
167
|
+
pattern: regular expression pattern
|
|
168
|
+
case: if False, ignore case
|
|
169
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
170
|
+
"""
|
|
171
|
+
import re
|
|
172
|
+
if not case:
|
|
173
|
+
flags |= re.IGNORECASE
|
|
174
|
+
_ = bool(re.fullmatch(pattern, s, flags))
|
|
175
|
+
return bool(re.fullmatch(pattern, s, flags))
|
|
176
|
+
|
|
177
|
+
@func.udf
|
|
178
|
+
def index(s: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
179
|
+
"""
|
|
180
|
+
Return the lowest index in `s` where `substr` is found within the slice `s[start:end]`. Raises ValueError if `substr` is not found.
|
|
181
|
+
|
|
182
|
+
Equivalent to [`str.index()`](https://docs.python.org/3/library/stdtypes.html#str.index).
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
s: String.
|
|
186
|
+
substr: substring to search for
|
|
187
|
+
start: slice start
|
|
188
|
+
end: slice end
|
|
189
|
+
"""
|
|
190
|
+
return s.index(substr, start, end)
|
|
191
|
+
|
|
192
|
+
@func.udf
|
|
193
|
+
def isalnum(s: str) -> bool:
|
|
194
|
+
"""
|
|
195
|
+
Return `True` if all characters in the string are alphanumeric and there is at least one character, `False`
|
|
196
|
+
otherwise.
|
|
197
|
+
|
|
198
|
+
Equivalent to [`str.isalnum()`](https://docs.python.org/3/library/stdtypes.html#str.isalnum
|
|
199
|
+
"""
|
|
200
|
+
return s.isalnum()
|
|
201
|
+
|
|
202
|
+
@func.udf
|
|
203
|
+
def isalpha(s: str) -> bool:
|
|
204
|
+
"""
|
|
205
|
+
Return `True` if all characters in the string are alphabetic and there is at least one character, `False` otherwise.
|
|
206
|
+
|
|
207
|
+
Equivalent to [`str.isalpha()`](https://docs.python.org/3/library/stdtypes.html#str.isalpha).
|
|
208
|
+
"""
|
|
209
|
+
return s.isalpha()
|
|
210
|
+
|
|
211
|
+
@func.udf
|
|
212
|
+
def isascii(s: str) -> bool:
|
|
213
|
+
"""
|
|
214
|
+
Return `True` if the string is empty or all characters in the string are ASCII, `False` otherwise.
|
|
215
|
+
|
|
216
|
+
Equivalent to [`str.isascii()`](https://docs.python.org/3/library/stdtypes.html#str.isascii).
|
|
217
|
+
"""
|
|
218
|
+
return s.isascii()
|
|
219
|
+
|
|
220
|
+
@func.udf
|
|
221
|
+
def isdecimal(s: str) -> bool:
|
|
222
|
+
"""
|
|
223
|
+
Return `True` if all characters in the string are decimal characters and there is at least one character, `False`
|
|
224
|
+
otherwise.
|
|
225
|
+
|
|
226
|
+
Equivalent to [`str.isdecimal()`](https://docs.python.org/3/library/stdtypes.html#str.isdecimal).
|
|
227
|
+
"""
|
|
228
|
+
return s.isdecimal()
|
|
229
|
+
|
|
230
|
+
@func.udf
|
|
231
|
+
def isdigit(s: str) -> bool:
|
|
232
|
+
"""
|
|
233
|
+
Return `True` if all characters in the string are digits and there is at least one character, `False` otherwise.
|
|
234
|
+
|
|
235
|
+
Equivalent to [`str.isdigit()`](https://docs.python.org/3/library/stdtypes.html#str.isdigit).
|
|
236
|
+
"""
|
|
237
|
+
return s.isdigit()
|
|
238
|
+
|
|
239
|
+
@func.udf
|
|
240
|
+
def isidentifier(s: str) -> bool:
|
|
241
|
+
"""
|
|
242
|
+
Return `True` if the string is a valid identifier according to the language definition, `False` otherwise.
|
|
243
|
+
|
|
244
|
+
Equivalent to [`str.isidentifier()`](https://docs.python.org/3/library/stdtypes.html#str.isidentifier)
|
|
245
|
+
"""
|
|
246
|
+
return s.isidentifier()
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
@func.udf
|
|
250
|
+
def islower(s: str) -> bool:
|
|
251
|
+
"""
|
|
252
|
+
Return `True` if all cased characters in the string are lowercase and there is at least one cased character, `False` otherwise.
|
|
253
|
+
|
|
254
|
+
Equivalent to [`str.islower()`](https://docs.python.org/3/library/stdtypes.html#str.islower)
|
|
255
|
+
"""
|
|
256
|
+
return s.islower()
|
|
257
|
+
|
|
258
|
+
@func.udf
|
|
259
|
+
def isnumeric(s: str) -> bool:
|
|
260
|
+
"""
|
|
261
|
+
Return `True` if all characters in the string are numeric characters, `False` otherwise.
|
|
262
|
+
|
|
263
|
+
Equivalent to [`str.isnumeric()`](https://docs.python.org/3/library/stdtypes.html#str.isnumeric)
|
|
264
|
+
"""
|
|
265
|
+
return s.isnumeric()
|
|
266
|
+
|
|
267
|
+
@func.udf
|
|
268
|
+
def isupper(s: str) -> bool:
|
|
269
|
+
"""
|
|
270
|
+
Return `True` if all cased characters in the string are uppercase and there is at least one cased character, `False` otherwise.
|
|
271
|
+
|
|
272
|
+
Equivalent to [`str.isupper()`](https://docs.python.org/3/library/stdtypes.html#str.isupper)
|
|
273
|
+
"""
|
|
274
|
+
return s.isupper()
|
|
275
|
+
|
|
276
|
+
@func.udf
|
|
277
|
+
def istitle(s: str) -> bool:
|
|
278
|
+
"""
|
|
279
|
+
Return `True` if the string is a titlecased string and there is at least one character, `False` otherwise.
|
|
280
|
+
|
|
281
|
+
Equivalent to [`str.istitle()`](https://docs.python.org/3/library/stdtypes.html#str.istitle)
|
|
282
|
+
"""
|
|
283
|
+
return s.istitle()
|
|
284
|
+
|
|
285
|
+
@func.udf
|
|
286
|
+
def isspace(s: str) -> bool:
|
|
287
|
+
"""
|
|
288
|
+
Return `True` if there are only whitespace characters in the string and there is at least one character, `False` otherwise.
|
|
289
|
+
|
|
290
|
+
Equivalent to [`str.isspace()`](https://docs.python.org/3/library/stdtypes.html#str.isspace)
|
|
291
|
+
"""
|
|
292
|
+
return s.isspace()
|
|
293
|
+
|
|
294
|
+
@func.udf
|
|
295
|
+
def len(s: str) -> int:
|
|
296
|
+
"""
|
|
297
|
+
Return the number of characters in the string.
|
|
298
|
+
|
|
299
|
+
Equivalent to [`len(str)`](https://docs.python.org/3/library/functions.html#len)
|
|
300
|
+
"""
|
|
301
|
+
return s.__len__()
|
|
302
|
+
|
|
303
|
+
@func.udf
|
|
304
|
+
def ljust(s: str, width: int, fillchar: str = ' ') -> str:
|
|
305
|
+
"""
|
|
306
|
+
Return the string left-justified in a string of length `width`. Padding is done using the specified `fillchar` (default is a space).
|
|
307
|
+
|
|
308
|
+
Equivalent to [`str.ljust()`](https://docs.python.org/3/library/stdtypes.html#str.ljust)
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
s: String.
|
|
312
|
+
width: Minimum width of resulting string; additional characters will be filled with character defined in `fillchar`.
|
|
313
|
+
fillchar: Additional character for filling.
|
|
314
|
+
"""
|
|
315
|
+
return s.ljust(width, fillchar)
|
|
316
|
+
|
|
317
|
+
@func.udf
|
|
318
|
+
def lower(s: str) -> str:
|
|
319
|
+
"""
|
|
320
|
+
Return a copy of the string with all the cased characters converted to lowercase.
|
|
321
|
+
|
|
322
|
+
Equivalent to [`str.lower()`](https://docs.python.org/3/library/stdtypes.html#str.lower)
|
|
323
|
+
"""
|
|
324
|
+
return s.lower()
|
|
325
|
+
|
|
326
|
+
@func.udf
|
|
327
|
+
def lstrip(s: str, chars: Optional[str] = None) -> str:
|
|
328
|
+
"""
|
|
329
|
+
Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
|
|
330
|
+
characters to be removed. If omitted or `None`, whitespace characters are removed.
|
|
331
|
+
|
|
332
|
+
Equivalent to [`str.lstrip()`](https://docs.python.org/3/library/stdtypes.html#str.lstrip)
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
s: String.
|
|
336
|
+
chars: The set of characters to be removed.
|
|
337
|
+
"""
|
|
338
|
+
return s.lstrip(chars)
|
|
339
|
+
|
|
340
|
+
@func.udf
|
|
341
|
+
def match(s: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
342
|
+
"""
|
|
343
|
+
Determine if string starts with a match of a regular expression
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
s: String.
|
|
347
|
+
pattern: regular expression pattern
|
|
348
|
+
case: if False, ignore case
|
|
349
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
350
|
+
"""
|
|
351
|
+
import re
|
|
352
|
+
if not case:
|
|
353
|
+
flags |= re.IGNORECASE
|
|
354
|
+
return bool(re.match(pattern, s, flags))
|
|
355
|
+
|
|
356
|
+
@func.udf
|
|
357
|
+
def normalize(s: str, form: str) -> str:
|
|
358
|
+
"""
|
|
359
|
+
Return the Unicode normal form for `s`.
|
|
360
|
+
|
|
361
|
+
Equivalent to [`unicodedata.normalize()`](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize)
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
s: String.
|
|
365
|
+
form: Unicode normal form (`‘NFC’`, `‘NFKC’`, `‘NFD’`, `‘NFKD’`)
|
|
366
|
+
"""
|
|
367
|
+
import unicodedata
|
|
368
|
+
return unicodedata.normalize(form, s)
|
|
369
|
+
|
|
370
|
+
@func.udf
|
|
371
|
+
def pad(s: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
|
|
372
|
+
"""
|
|
373
|
+
Pad string up to width
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
s: String.
|
|
377
|
+
width: Minimum width of resulting string; additional characters will be filled with character defined in `fillchar`.
|
|
378
|
+
side: Side from which to fill resulting string (`‘left’`, `‘right’`, `‘both’`)
|
|
379
|
+
fillchar: Additional character for filling
|
|
380
|
+
"""
|
|
381
|
+
if side == 'left':
|
|
382
|
+
return s.ljust(width, fillchar)
|
|
383
|
+
elif side == 'right':
|
|
384
|
+
return s.rjust(width, fillchar)
|
|
385
|
+
elif side == 'both':
|
|
386
|
+
return s.center(width, fillchar)
|
|
387
|
+
else:
|
|
388
|
+
raise ValueError(f"Invalid side: {side}")
|
|
389
|
+
|
|
390
|
+
@func.udf
|
|
391
|
+
def partition(s: str, sep: str = ' ') -> list:
|
|
392
|
+
"""
|
|
393
|
+
Splits `s` at the first occurrence of `sep`, and returns 3 elements containing the part before the
|
|
394
|
+
separator, the separator itself, and the part after the separator. If the separator is not found, return 3 elements
|
|
395
|
+
containing `s` itself, followed by two empty strings.
|
|
396
|
+
"""
|
|
397
|
+
idx = s.find(sep)
|
|
398
|
+
if idx == -1:
|
|
399
|
+
return [s, '', '']
|
|
400
|
+
from builtins import len
|
|
401
|
+
return [s[:idx], sep, s[idx + len(sep):]]
|
|
402
|
+
|
|
403
|
+
@func.udf
|
|
404
|
+
def removeprefix(s: str, prefix: str) -> str:
|
|
405
|
+
"""
|
|
406
|
+
Remove prefix from `s`. If the prefix is not present, returns `s`.
|
|
407
|
+
"""
|
|
408
|
+
if s.startswith(prefix):
|
|
409
|
+
# we need to avoid referring to our symbol 'len'
|
|
410
|
+
from builtins import len
|
|
411
|
+
return s[len(prefix):]
|
|
412
|
+
return s
|
|
413
|
+
|
|
414
|
+
@func.udf
|
|
415
|
+
def removesuffix(s: str, suffix: str) -> str:
|
|
416
|
+
"""
|
|
417
|
+
Remove suffix from `s`. If the suffix is not present, returns `s`.
|
|
418
|
+
"""
|
|
419
|
+
if s.endswith(suffix):
|
|
420
|
+
# we need to avoid referring to our symbol 'len'
|
|
421
|
+
from builtins import len
|
|
422
|
+
return s[:-len(suffix)]
|
|
423
|
+
return s
|
|
424
|
+
|
|
425
|
+
@func.udf
|
|
426
|
+
def repeat(s: str, n: int) -> str:
|
|
427
|
+
"""
|
|
428
|
+
Repeat `s` `n` times.
|
|
429
|
+
"""
|
|
430
|
+
return s * n
|
|
431
|
+
|
|
432
|
+
@func.udf
|
|
433
|
+
def replace(
|
|
434
|
+
s: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
|
|
435
|
+
) -> str:
|
|
436
|
+
"""
|
|
437
|
+
Replace occurrences of `pattern` in `s` with `repl`.
|
|
438
|
+
|
|
439
|
+
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace) or
|
|
440
|
+
[`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
s: String.
|
|
444
|
+
pattern: string literal or regular expression
|
|
445
|
+
repl: replacement string
|
|
446
|
+
n: number of replacements to make (-1 for all)
|
|
447
|
+
case: if False, ignore case
|
|
448
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
449
|
+
regex: if True, treat pattern as a regular expression
|
|
450
|
+
"""
|
|
451
|
+
if regex:
|
|
452
|
+
import re
|
|
453
|
+
if not case:
|
|
454
|
+
flags |= re.IGNORECASE
|
|
455
|
+
return re.sub(pattern, repl, s, 0 if n == -1 else n, flags)
|
|
456
|
+
else:
|
|
457
|
+
return s.replace(pattern, repl, n)
|
|
458
|
+
|
|
459
|
+
@func.udf
|
|
460
|
+
def rfind(s: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
461
|
+
"""
|
|
462
|
+
Return the highest index in `s` where `substr` is found, such that `substr` is contained within `s[start:end]`.
|
|
463
|
+
|
|
464
|
+
Equivalent to [`str.rfind()`](https://docs.python.org/3/library/stdtypes.html#str.rfind).
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
s: String.
|
|
468
|
+
substr: substring to search for
|
|
469
|
+
start: slice start
|
|
470
|
+
end: slice end
|
|
471
|
+
"""
|
|
472
|
+
return s.rfind(substr, start, end)
|
|
473
|
+
|
|
474
|
+
@func.udf
|
|
475
|
+
def rindex(s: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
476
|
+
"""
|
|
477
|
+
Return the highest index in `s` where `substr` is found, such that `substr` is contained within `s[start:end]`.
|
|
478
|
+
Raises ValueError if `substr` is not found.
|
|
479
|
+
|
|
480
|
+
Equivalent to [`str.rindex()`](https://docs.python.org/3/library/stdtypes.html#str.rindex).
|
|
481
|
+
"""
|
|
482
|
+
return s.rindex(substr, start, end)
|
|
483
|
+
|
|
484
|
+
@func.udf
|
|
485
|
+
def rjust(s: str, width: int, fillchar: str = ' ') -> str:
|
|
486
|
+
"""
|
|
487
|
+
Return `s` right-justified in a string of length `width`.
|
|
488
|
+
|
|
489
|
+
Equivalent to [`str.rjust()`](https://docs.python.org/3/library/stdtypes.html#str.rjust).
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
s: String.
|
|
493
|
+
width: Minimum width of resulting string.
|
|
494
|
+
fillchar: Additional character for filling.
|
|
495
|
+
"""
|
|
496
|
+
return s.rjust(width, fillchar)
|
|
497
|
+
|
|
498
|
+
@func.udf
|
|
499
|
+
def rpartition(s: str, sep: str = ' ') -> list:
|
|
500
|
+
"""
|
|
501
|
+
This method splits `s` at the last occurrence of `sep`, and returns a list containing the part before the
|
|
502
|
+
separator, the separator itself, and the part after the separator.
|
|
503
|
+
"""
|
|
504
|
+
idx = s.rfind(sep)
|
|
505
|
+
if idx == -1:
|
|
506
|
+
return [s, '', '']
|
|
507
|
+
from builtins import len
|
|
508
|
+
return [s[:idx], sep, s[idx + len(sep):]]
|
|
509
|
+
|
|
510
|
+
@func.udf
|
|
511
|
+
def rstrip(s: str, chars: Optional[str] = None) -> str:
|
|
512
|
+
"""
|
|
513
|
+
Return a copy of `s` with trailing characters removed.
|
|
514
|
+
|
|
515
|
+
Equivalent to [`str.rstrip()`](https://docs.python.org/3/library/stdtypes.html#str.rstrip).
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
s: String.
|
|
519
|
+
chars: The set of characters to be removed. If omitted or `None`, whitespace characters are removed.
|
|
520
|
+
"""
|
|
521
|
+
return s.rstrip(chars)
|
|
522
|
+
|
|
523
|
+
@func.udf
|
|
524
|
+
def slice(s: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
|
|
525
|
+
"""
|
|
526
|
+
Return a slice of `s`.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
s: String.
|
|
530
|
+
start: slice start
|
|
531
|
+
stop: slice end
|
|
532
|
+
step: slice step
|
|
533
|
+
"""
|
|
534
|
+
return s[start:stop:step]
|
|
535
|
+
|
|
536
|
+
@func.udf
|
|
537
|
+
def slice_replace(s: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None) -> str:
|
|
538
|
+
"""
|
|
539
|
+
Replace a positional slice of a string with another value.
|
|
540
|
+
|
|
541
|
+
Args:
|
|
542
|
+
s: String.
|
|
543
|
+
start: slice start
|
|
544
|
+
stop: slice end
|
|
545
|
+
repl: replacement value
|
|
546
|
+
"""
|
|
547
|
+
return s[:start] + repl + s[stop:]
|
|
548
|
+
|
|
549
|
+
@func.udf
|
|
550
|
+
def startswith(s: str, pattern: str) -> int:
|
|
551
|
+
"""
|
|
552
|
+
Return `True` if `s` starts with `pattern`, otherwise return `False`.
|
|
553
|
+
|
|
554
|
+
Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
s: String.
|
|
558
|
+
pattern: string literal
|
|
559
|
+
"""
|
|
560
|
+
return s.startswith(pattern)
|
|
561
|
+
|
|
562
|
+
@func.udf
|
|
563
|
+
def strip(s: str, chars: Optional[str] = None) -> str:
|
|
564
|
+
"""
|
|
565
|
+
Return a copy of `s` with leading and trailing characters removed.
|
|
566
|
+
|
|
567
|
+
Equivalent to [`str.strip()`](https://docs.python.org/3/library/stdtypes.html#str.strip).
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
s: String.
|
|
571
|
+
chars: The set of characters to be removed. If omitted or `None`, whitespace characters are removed.
|
|
572
|
+
"""
|
|
573
|
+
return s.strip(chars)
|
|
574
|
+
|
|
575
|
+
@func.udf
|
|
576
|
+
def swapcase(s: str) -> str:
|
|
577
|
+
"""
|
|
578
|
+
Return a copy of `s` with uppercase characters converted to lowercase and vice versa.
|
|
579
|
+
|
|
580
|
+
Equivalent to [`str.swapcase()`](https://docs.python.org/3/library/stdtypes.html#str.swapcase).
|
|
581
|
+
"""
|
|
582
|
+
return s.swapcase()
|
|
583
|
+
|
|
584
|
+
@func.udf
|
|
585
|
+
def title(s: str) -> str:
|
|
586
|
+
"""
|
|
587
|
+
Return a titlecased version of `s`, i.e. words start with uppercase characters, all remaining cased characters are
|
|
588
|
+
lowercase.
|
|
589
|
+
|
|
590
|
+
Equivalent to [`str.title()`](https://docs.python.org/3/library/stdtypes.html#str.title).
|
|
591
|
+
"""
|
|
592
|
+
return s.title()
|
|
593
|
+
|
|
594
|
+
@func.udf
|
|
595
|
+
def upper(s: str) -> str:
|
|
596
|
+
"""
|
|
597
|
+
Return a copy of `s` converted to uppercase.
|
|
598
|
+
Equivalent to [`str.upper()`](https://docs.python.org/3/library/stdtypes.html#str.upper).
|
|
599
|
+
"""
|
|
600
|
+
return s.upper()
|
|
601
|
+
|
|
602
|
+
@func.udf
|
|
603
|
+
def wrap(s: str, width: int, **kwargs: Any) -> dict:
|
|
604
|
+
"""
|
|
605
|
+
Wraps the single paragraph in `s` so every line is at most `width` characters long.
|
|
606
|
+
Returns a list of output lines, without final newlines.
|
|
607
|
+
|
|
608
|
+
Equivalent to [`textwrap.fill()`](https://docs.python.org/3/library/textwrap.html#textwrap.fill).
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
s: String.
|
|
612
|
+
width: Maximum line width.
|
|
613
|
+
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
614
|
+
"""
|
|
615
|
+
import textwrap
|
|
616
|
+
return textwrap.wrap(s, width, **kwargs)
|
|
617
|
+
|
|
618
|
+
@func.udf
|
|
619
|
+
def zfill(s: str, width: int) -> str:
|
|
620
|
+
"""
|
|
621
|
+
Pad a numeric string `s` with ASCII `0` on the left to a total length of `width`.
|
|
622
|
+
|
|
623
|
+
Equivalent to [`str.zfill()`](https://docs.python.org/3/library/stdtypes.html#str.zfill).
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
s: String.
|
|
627
|
+
width: Minimum width of resulting string.
|
|
13
628
|
"""
|
|
14
|
-
return
|
|
629
|
+
return s.zfill(width)
|
|
15
630
|
|
|
16
631
|
|
|
17
632
|
__all__ = local_public_names(__name__)
|
pixeltable/functions/video.py
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
from pixeltable.functions import video as pxt_video
|
|
8
|
+
|
|
9
|
+
t = pxt.get_table(...)
|
|
10
|
+
t.select(pxt_video.extract_audio(t.video_col)).collect()
|
|
11
|
+
```
|
|
12
|
+
"""
|
|
13
|
+
|
|
1
14
|
import tempfile
|
|
2
15
|
import uuid
|
|
3
16
|
from pathlib import Path
|
|
@@ -43,6 +56,9 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
43
56
|
allows_window=False,
|
|
44
57
|
)
|
|
45
58
|
class make_video(func.Aggregator):
|
|
59
|
+
"""
|
|
60
|
+
Aggregator that creates a video from a sequence of images.
|
|
61
|
+
"""
|
|
46
62
|
def __init__(self, fps: int = 25):
|
|
47
63
|
"""follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
|
|
48
64
|
self.container: Optional[av.container.OutputContainer] = None
|
|
@@ -84,7 +100,14 @@ _extract_audio_param_types = [
|
|
|
84
100
|
def extract_audio(
|
|
85
101
|
video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
86
102
|
) -> Optional[str]:
|
|
87
|
-
"""
|
|
103
|
+
"""
|
|
104
|
+
Extract an audio stream from a video file, save it as a media file and return its path.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
stream_idx: Index of the audio stream to extract.
|
|
108
|
+
format: The target audio format. (`'wav'`, `'mp3'`, `'flac'`).
|
|
109
|
+
codec: The codec to use for the audio stream. If not provided, a default codec will be used.
|
|
110
|
+
"""
|
|
88
111
|
if format not in _format_defaults:
|
|
89
112
|
raise ValueError(f'extract_audio(): unsupported audio format: {format}')
|
|
90
113
|
default_codec, ext = _format_defaults[format]
|
|
@@ -107,13 +130,8 @@ def extract_audio(
|
|
|
107
130
|
|
|
108
131
|
@func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)])
|
|
109
132
|
def get_metadata(video: str) -> dict:
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
video (str): Path to the video file.
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
A dictionary containing the associated metadata.
|
|
133
|
+
"""
|
|
134
|
+
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
117
135
|
"""
|
|
118
136
|
with av.open(video) as container:
|
|
119
137
|
assert isinstance(container, av.container.InputContainer)
|