pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +21 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +227 -148
- pixeltable/catalog/table_version.py +66 -28
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +18 -19
- pixeltable/dataframe.py +16 -32
- pixeltable/env.py +6 -1
- pixeltable/exec/__init__.py +1 -2
- pixeltable/exec/aggregation_node.py +27 -17
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +9 -26
- pixeltable/exec/exec_node.py +36 -7
- pixeltable/exec/expr_eval_node.py +19 -11
- pixeltable/exec/in_memory_data_node.py +14 -11
- pixeltable/exec/sql_node.py +266 -138
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +56 -36
- pixeltable/exprs/expr.py +65 -63
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +26 -15
- pixeltable/exprs/function_call.py +53 -24
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +14 -13
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +12 -6
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function.py +11 -10
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +84 -42
- pixeltable/functions/huggingface.py +31 -34
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +59 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +65 -74
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +20 -7
- pixeltable/index/embedding_index.py +12 -14
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +98 -2
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +126 -60
- pixeltable/metadata/__init__.py +4 -3
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +54 -12
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +40 -21
- pixeltable/plan.py +149 -165
- pixeltable/py.typed +0 -0
- pixeltable/store.py +57 -37
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +55 -0
- pixeltable/type_system.py +260 -61
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +16 -2
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +10 -11
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/functions/string.py
CHANGED
|
@@ -14,12 +14,12 @@ t.select(pxt_str.capitalize(t.str_col)).collect()
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Optional
|
|
16
16
|
|
|
17
|
+
import pixeltable as pxt
|
|
17
18
|
import pixeltable.exceptions as excs
|
|
18
|
-
import pixeltable.func as func
|
|
19
19
|
from pixeltable.utils.code import local_public_names
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
@
|
|
22
|
+
@pxt.udf(is_method=True)
|
|
23
23
|
def capitalize(self: str) -> str:
|
|
24
24
|
"""
|
|
25
25
|
Return string with its first character capitalized and the rest lowercased.
|
|
@@ -28,7 +28,7 @@ def capitalize(self: str) -> str:
|
|
|
28
28
|
"""
|
|
29
29
|
return self.capitalize()
|
|
30
30
|
|
|
31
|
-
@
|
|
31
|
+
@pxt.udf(is_method=True)
|
|
32
32
|
def casefold(self: str) -> str:
|
|
33
33
|
"""
|
|
34
34
|
Return a casefolded copy of string.
|
|
@@ -37,7 +37,7 @@ def casefold(self: str) -> str:
|
|
|
37
37
|
"""
|
|
38
38
|
return self.casefold()
|
|
39
39
|
|
|
40
|
-
@
|
|
40
|
+
@pxt.udf(is_method=True)
|
|
41
41
|
def center(self: str, width: int, fillchar: str = ' ') -> str:
|
|
42
42
|
"""
|
|
43
43
|
Return a centered string of length `width`.
|
|
@@ -50,7 +50,7 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
50
50
|
"""
|
|
51
51
|
return self.center(width, fillchar)
|
|
52
52
|
|
|
53
|
-
@
|
|
53
|
+
@pxt.udf(is_method=True)
|
|
54
54
|
def contains(self: str, pattern: str, case: bool = True, flags: int = 0, regex: bool = True) -> bool:
|
|
55
55
|
"""
|
|
56
56
|
Test if string contains pattern or regex.
|
|
@@ -72,7 +72,7 @@ def contains(self: str, pattern: str, case: bool = True, flags: int = 0, regex:
|
|
|
72
72
|
else:
|
|
73
73
|
return pattern.lower() in self.lower()
|
|
74
74
|
|
|
75
|
-
@
|
|
75
|
+
@pxt.udf(is_method=True)
|
|
76
76
|
def count(self: str, pattern: str, flags: int = 0) -> int:
|
|
77
77
|
"""
|
|
78
78
|
Count occurrences of pattern or regex.
|
|
@@ -85,7 +85,7 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
|
|
|
85
85
|
from builtins import len
|
|
86
86
|
return len(re.findall(pattern, self, flags))
|
|
87
87
|
|
|
88
|
-
@
|
|
88
|
+
@pxt.udf(is_method=True)
|
|
89
89
|
def endswith(self: str, pattern: str) -> bool:
|
|
90
90
|
"""
|
|
91
91
|
Return `True` if the string ends with the specified suffix, otherwise return `False`.
|
|
@@ -97,7 +97,7 @@ def endswith(self: str, pattern: str) -> bool:
|
|
|
97
97
|
"""
|
|
98
98
|
return self.endswith(pattern)
|
|
99
99
|
|
|
100
|
-
@
|
|
100
|
+
@pxt.udf(is_method=True)
|
|
101
101
|
def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
102
102
|
"""
|
|
103
103
|
Wraps the single paragraph in string, and returns a single string containing the wrapped paragraph.
|
|
@@ -111,7 +111,7 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
|
111
111
|
import textwrap
|
|
112
112
|
return textwrap.fill(self, width, **kwargs)
|
|
113
113
|
|
|
114
|
-
@
|
|
114
|
+
@pxt.udf(is_method=True)
|
|
115
115
|
def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
116
116
|
"""
|
|
117
117
|
Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
|
|
@@ -125,7 +125,7 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
125
125
|
"""
|
|
126
126
|
return self.find(substr, start, end)
|
|
127
127
|
|
|
128
|
-
@
|
|
128
|
+
@pxt.udf(is_method=True)
|
|
129
129
|
def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
130
130
|
"""
|
|
131
131
|
Find all occurrences of a regular expression pattern in string.
|
|
@@ -139,7 +139,7 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
|
139
139
|
import re
|
|
140
140
|
return re.findall(pattern, self, flags)
|
|
141
141
|
|
|
142
|
-
@
|
|
142
|
+
@pxt.udf(is_method=True)
|
|
143
143
|
def format(self: str, *args: Any, **kwargs: Any) -> str:
|
|
144
144
|
"""
|
|
145
145
|
Perform string formatting.
|
|
@@ -148,7 +148,7 @@ def format(self: str, *args: Any, **kwargs: Any) -> str:
|
|
|
148
148
|
"""
|
|
149
149
|
return self.format(*args, **kwargs)
|
|
150
150
|
|
|
151
|
-
@
|
|
151
|
+
@pxt.udf(is_method=True)
|
|
152
152
|
def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
153
153
|
"""
|
|
154
154
|
Determine if string fully matches a regular expression.
|
|
@@ -166,7 +166,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
166
166
|
_ = bool(re.fullmatch(pattern, self, flags))
|
|
167
167
|
return bool(re.fullmatch(pattern, self, flags))
|
|
168
168
|
|
|
169
|
-
@
|
|
169
|
+
@pxt.udf(is_method=True)
|
|
170
170
|
def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
171
171
|
"""
|
|
172
172
|
Return the lowest index in string where `substr` is found within the slice `[start:end]`.
|
|
@@ -181,7 +181,7 @@ def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
181
181
|
"""
|
|
182
182
|
return self.index(substr, start, end)
|
|
183
183
|
|
|
184
|
-
@
|
|
184
|
+
@pxt.udf(is_method=True)
|
|
185
185
|
def isalnum(self: str) -> bool:
|
|
186
186
|
"""
|
|
187
187
|
Return `True` if all characters in the string are alphanumeric and there is at least one character, `False`
|
|
@@ -191,7 +191,7 @@ def isalnum(self: str) -> bool:
|
|
|
191
191
|
"""
|
|
192
192
|
return self.isalnum()
|
|
193
193
|
|
|
194
|
-
@
|
|
194
|
+
@pxt.udf(is_method=True)
|
|
195
195
|
def isalpha(self: str) -> bool:
|
|
196
196
|
"""
|
|
197
197
|
Return `True` if all characters in the string are alphabetic and there is at least one character, `False` otherwise.
|
|
@@ -200,7 +200,7 @@ def isalpha(self: str) -> bool:
|
|
|
200
200
|
"""
|
|
201
201
|
return self.isalpha()
|
|
202
202
|
|
|
203
|
-
@
|
|
203
|
+
@pxt.udf(is_method=True)
|
|
204
204
|
def isascii(self: str) -> bool:
|
|
205
205
|
"""
|
|
206
206
|
Return `True` if the string is empty or all characters in the string are ASCII, `False` otherwise.
|
|
@@ -209,7 +209,7 @@ def isascii(self: str) -> bool:
|
|
|
209
209
|
"""
|
|
210
210
|
return self.isascii()
|
|
211
211
|
|
|
212
|
-
@
|
|
212
|
+
@pxt.udf(is_method=True)
|
|
213
213
|
def isdecimal(self: str) -> bool:
|
|
214
214
|
"""
|
|
215
215
|
Return `True` if all characters in the string are decimal characters and there is at least one character, `False`
|
|
@@ -219,7 +219,7 @@ def isdecimal(self: str) -> bool:
|
|
|
219
219
|
"""
|
|
220
220
|
return self.isdecimal()
|
|
221
221
|
|
|
222
|
-
@
|
|
222
|
+
@pxt.udf(is_method=True)
|
|
223
223
|
def isdigit(self: str) -> bool:
|
|
224
224
|
"""
|
|
225
225
|
Return `True` if all characters in the string are digits and there is at least one character, `False` otherwise.
|
|
@@ -228,7 +228,7 @@ def isdigit(self: str) -> bool:
|
|
|
228
228
|
"""
|
|
229
229
|
return self.isdigit()
|
|
230
230
|
|
|
231
|
-
@
|
|
231
|
+
@pxt.udf(is_method=True)
|
|
232
232
|
def isidentifier(self: str) -> bool:
|
|
233
233
|
"""
|
|
234
234
|
Return `True` if the string is a valid identifier according to the language definition, `False` otherwise.
|
|
@@ -238,7 +238,7 @@ def isidentifier(self: str) -> bool:
|
|
|
238
238
|
return self.isidentifier()
|
|
239
239
|
|
|
240
240
|
|
|
241
|
-
@
|
|
241
|
+
@pxt.udf(is_method=True)
|
|
242
242
|
def islower(self: str) -> bool:
|
|
243
243
|
"""
|
|
244
244
|
Return `True` if all cased characters in the string are lowercase and there is at least one cased character, `False` otherwise.
|
|
@@ -247,7 +247,7 @@ def islower(self: str) -> bool:
|
|
|
247
247
|
"""
|
|
248
248
|
return self.islower()
|
|
249
249
|
|
|
250
|
-
@
|
|
250
|
+
@pxt.udf(is_method=True)
|
|
251
251
|
def isnumeric(self: str) -> bool:
|
|
252
252
|
"""
|
|
253
253
|
Return `True` if all characters in the string are numeric characters, `False` otherwise.
|
|
@@ -256,7 +256,7 @@ def isnumeric(self: str) -> bool:
|
|
|
256
256
|
"""
|
|
257
257
|
return self.isnumeric()
|
|
258
258
|
|
|
259
|
-
@
|
|
259
|
+
@pxt.udf(is_method=True)
|
|
260
260
|
def isupper(self: str) -> bool:
|
|
261
261
|
"""
|
|
262
262
|
Return `True` if all cased characters in the string are uppercase and there is at least one cased character, `False` otherwise.
|
|
@@ -265,7 +265,7 @@ def isupper(self: str) -> bool:
|
|
|
265
265
|
"""
|
|
266
266
|
return self.isupper()
|
|
267
267
|
|
|
268
|
-
@
|
|
268
|
+
@pxt.udf(is_method=True)
|
|
269
269
|
def istitle(self: str) -> bool:
|
|
270
270
|
"""
|
|
271
271
|
Return `True` if the string is a titlecased string and there is at least one character, `False` otherwise.
|
|
@@ -274,7 +274,7 @@ def istitle(self: str) -> bool:
|
|
|
274
274
|
"""
|
|
275
275
|
return self.istitle()
|
|
276
276
|
|
|
277
|
-
@
|
|
277
|
+
@pxt.udf(is_method=True)
|
|
278
278
|
def isspace(self: str) -> bool:
|
|
279
279
|
"""
|
|
280
280
|
Return `True` if there are only whitespace characters in the string and there is at least one character, `False` otherwise.
|
|
@@ -283,7 +283,16 @@ def isspace(self: str) -> bool:
|
|
|
283
283
|
"""
|
|
284
284
|
return self.isspace()
|
|
285
285
|
|
|
286
|
-
@
|
|
286
|
+
@pxt.udf
|
|
287
|
+
def join(sep: str, elements: list) -> str:
|
|
288
|
+
"""
|
|
289
|
+
Return a string which is the concatenation of the strings in `elements`.
|
|
290
|
+
|
|
291
|
+
Equivalent to [`str.join()`](https://docs.python.org/3/library/stdtypes.html#str.join)
|
|
292
|
+
"""
|
|
293
|
+
return sep.join(elements)
|
|
294
|
+
|
|
295
|
+
@pxt.udf(is_method=True)
|
|
287
296
|
def len(self: str) -> int:
|
|
288
297
|
"""
|
|
289
298
|
Return the number of characters in the string.
|
|
@@ -292,7 +301,7 @@ def len(self: str) -> int:
|
|
|
292
301
|
"""
|
|
293
302
|
return self.__len__()
|
|
294
303
|
|
|
295
|
-
@
|
|
304
|
+
@pxt.udf(is_method=True)
|
|
296
305
|
def ljust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
297
306
|
"""
|
|
298
307
|
Return the string left-justified in a string of length `width`.
|
|
@@ -305,7 +314,7 @@ def ljust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
305
314
|
"""
|
|
306
315
|
return self.ljust(width, fillchar)
|
|
307
316
|
|
|
308
|
-
@
|
|
317
|
+
@pxt.udf(is_method=True)
|
|
309
318
|
def lower(self: str) -> str:
|
|
310
319
|
"""
|
|
311
320
|
Return a copy of the string with all the cased characters converted to lowercase.
|
|
@@ -314,7 +323,7 @@ def lower(self: str) -> str:
|
|
|
314
323
|
"""
|
|
315
324
|
return self.lower()
|
|
316
325
|
|
|
317
|
-
@
|
|
326
|
+
@pxt.udf(is_method=True)
|
|
318
327
|
def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
319
328
|
"""
|
|
320
329
|
Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
|
|
@@ -327,7 +336,7 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
327
336
|
"""
|
|
328
337
|
return self.lstrip(chars)
|
|
329
338
|
|
|
330
|
-
@
|
|
339
|
+
@pxt.udf(is_method=True)
|
|
331
340
|
def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
332
341
|
"""
|
|
333
342
|
Determine if string starts with a match of a regular expression
|
|
@@ -342,7 +351,7 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
|
342
351
|
flags |= re.IGNORECASE
|
|
343
352
|
return bool(re.match(pattern, self, flags))
|
|
344
353
|
|
|
345
|
-
@
|
|
354
|
+
@pxt.udf(is_method=True)
|
|
346
355
|
def normalize(self: str, form: str) -> str:
|
|
347
356
|
"""
|
|
348
357
|
Return the Unicode normal form.
|
|
@@ -355,7 +364,7 @@ def normalize(self: str, form: str) -> str:
|
|
|
355
364
|
import unicodedata
|
|
356
365
|
return unicodedata.normalize(form, self) # type: ignore[arg-type]
|
|
357
366
|
|
|
358
|
-
@
|
|
367
|
+
@pxt.udf(is_method=True)
|
|
359
368
|
def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
|
|
360
369
|
"""
|
|
361
370
|
Pad string up to width
|
|
@@ -374,7 +383,7 @@ def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
|
|
|
374
383
|
else:
|
|
375
384
|
raise ValueError(f"Invalid side: {side}")
|
|
376
385
|
|
|
377
|
-
@
|
|
386
|
+
@pxt.udf(is_method=True)
|
|
378
387
|
def partition(self: str, sep: str = ' ') -> list:
|
|
379
388
|
"""
|
|
380
389
|
Splits string at the first occurrence of `sep`, and returns 3 elements containing the part before the
|
|
@@ -387,7 +396,7 @@ def partition(self: str, sep: str = ' ') -> list:
|
|
|
387
396
|
from builtins import len
|
|
388
397
|
return [self[:idx], sep, self[idx + len(sep):]]
|
|
389
398
|
|
|
390
|
-
@
|
|
399
|
+
@pxt.udf(is_method=True)
|
|
391
400
|
def removeprefix(self: str, prefix: str) -> str:
|
|
392
401
|
"""
|
|
393
402
|
Remove prefix. If the prefix is not present, returns string.
|
|
@@ -398,7 +407,7 @@ def removeprefix(self: str, prefix: str) -> str:
|
|
|
398
407
|
return self[len(prefix):]
|
|
399
408
|
return self
|
|
400
409
|
|
|
401
|
-
@
|
|
410
|
+
@pxt.udf(is_method=True)
|
|
402
411
|
def removesuffix(self: str, suffix: str) -> str:
|
|
403
412
|
"""
|
|
404
413
|
Remove suffix. If the suffix is not present, returns string.
|
|
@@ -409,14 +418,14 @@ def removesuffix(self: str, suffix: str) -> str:
|
|
|
409
418
|
return self[:-len(suffix)]
|
|
410
419
|
return self
|
|
411
420
|
|
|
412
|
-
@
|
|
421
|
+
@pxt.udf(is_method=True)
|
|
413
422
|
def repeat(self: str, n: int) -> str:
|
|
414
423
|
"""
|
|
415
424
|
Repeat string `n` times.
|
|
416
425
|
"""
|
|
417
426
|
return self * n
|
|
418
427
|
|
|
419
|
-
@
|
|
428
|
+
@pxt.udf(is_method=True)
|
|
420
429
|
def replace(
|
|
421
430
|
self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
|
|
422
431
|
) -> str:
|
|
@@ -442,7 +451,7 @@ def replace(
|
|
|
442
451
|
else:
|
|
443
452
|
return self.replace(pattern, repl, n)
|
|
444
453
|
|
|
445
|
-
@
|
|
454
|
+
@pxt.udf(is_method=True)
|
|
446
455
|
def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
447
456
|
"""
|
|
448
457
|
Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
|
|
@@ -456,7 +465,7 @@ def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
456
465
|
"""
|
|
457
466
|
return self.rfind(substr, start, end)
|
|
458
467
|
|
|
459
|
-
@
|
|
468
|
+
@pxt.udf(is_method=True)
|
|
460
469
|
def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
|
|
461
470
|
"""
|
|
462
471
|
Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
|
|
@@ -466,7 +475,7 @@ def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int]
|
|
|
466
475
|
"""
|
|
467
476
|
return self.rindex(substr, start, end)
|
|
468
477
|
|
|
469
|
-
@
|
|
478
|
+
@pxt.udf(is_method=True)
|
|
470
479
|
def rjust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
471
480
|
"""
|
|
472
481
|
Return the string right-justified in a string of length `width`.
|
|
@@ -479,7 +488,7 @@ def rjust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
479
488
|
"""
|
|
480
489
|
return self.rjust(width, fillchar)
|
|
481
490
|
|
|
482
|
-
@
|
|
491
|
+
@pxt.udf(is_method=True)
|
|
483
492
|
def rpartition(self: str, sep: str = ' ') -> list:
|
|
484
493
|
"""
|
|
485
494
|
This method splits string at the last occurrence of `sep`, and returns a list containing the part before the
|
|
@@ -491,7 +500,7 @@ def rpartition(self: str, sep: str = ' ') -> list:
|
|
|
491
500
|
from builtins import len
|
|
492
501
|
return [self[:idx], sep, self[idx + len(sep):]]
|
|
493
502
|
|
|
494
|
-
@
|
|
503
|
+
@pxt.udf(is_method=True)
|
|
495
504
|
def rstrip(self: str, chars: Optional[str] = None) -> str:
|
|
496
505
|
"""
|
|
497
506
|
Return a copy of string with trailing characters removed.
|
|
@@ -503,7 +512,7 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
503
512
|
"""
|
|
504
513
|
return self.rstrip(chars)
|
|
505
514
|
|
|
506
|
-
@
|
|
515
|
+
@pxt.udf(is_method=True)
|
|
507
516
|
def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
|
|
508
517
|
"""
|
|
509
518
|
Return a slice.
|
|
@@ -515,7 +524,7 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
|
|
|
515
524
|
"""
|
|
516
525
|
return self[start:stop:step]
|
|
517
526
|
|
|
518
|
-
@
|
|
527
|
+
@pxt.udf(is_method=True)
|
|
519
528
|
def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None) -> str:
|
|
520
529
|
"""
|
|
521
530
|
Replace a positional slice with another value.
|
|
@@ -527,7 +536,7 @@ def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] =
|
|
|
527
536
|
"""
|
|
528
537
|
return self[:start] + repl + self[stop:]
|
|
529
538
|
|
|
530
|
-
@
|
|
539
|
+
@pxt.udf(is_method=True)
|
|
531
540
|
def startswith(self: str, pattern: str) -> int:
|
|
532
541
|
"""
|
|
533
542
|
Return `True` if string starts with `pattern`, otherwise return `False`.
|
|
@@ -539,7 +548,7 @@ def startswith(self: str, pattern: str) -> int:
|
|
|
539
548
|
"""
|
|
540
549
|
return self.startswith(pattern)
|
|
541
550
|
|
|
542
|
-
@
|
|
551
|
+
@pxt.udf(is_method=True)
|
|
543
552
|
def strip(self: str, chars: Optional[str] = None) -> str:
|
|
544
553
|
"""
|
|
545
554
|
Return a copy of string with leading and trailing characters removed.
|
|
@@ -551,7 +560,7 @@ def strip(self: str, chars: Optional[str] = None) -> str:
|
|
|
551
560
|
"""
|
|
552
561
|
return self.strip(chars)
|
|
553
562
|
|
|
554
|
-
@
|
|
563
|
+
@pxt.udf(is_method=True)
|
|
555
564
|
def swapcase(self: str) -> str:
|
|
556
565
|
"""
|
|
557
566
|
Return a copy of string with uppercase characters converted to lowercase and vice versa.
|
|
@@ -560,7 +569,7 @@ def swapcase(self: str) -> str:
|
|
|
560
569
|
"""
|
|
561
570
|
return self.swapcase()
|
|
562
571
|
|
|
563
|
-
@
|
|
572
|
+
@pxt.udf(is_method=True)
|
|
564
573
|
def title(self: str) -> str:
|
|
565
574
|
"""
|
|
566
575
|
Return a titlecased version of string, i.e. words start with uppercase characters, all remaining cased characters
|
|
@@ -570,7 +579,7 @@ def title(self: str) -> str:
|
|
|
570
579
|
"""
|
|
571
580
|
return self.title()
|
|
572
581
|
|
|
573
|
-
@
|
|
582
|
+
@pxt.udf(is_method=True)
|
|
574
583
|
def upper(self: str) -> str:
|
|
575
584
|
"""
|
|
576
585
|
Return a copy of string converted to uppercase.
|
|
@@ -579,7 +588,7 @@ def upper(self: str) -> str:
|
|
|
579
588
|
"""
|
|
580
589
|
return self.upper()
|
|
581
590
|
|
|
582
|
-
@
|
|
591
|
+
@pxt.udf(is_method=True)
|
|
583
592
|
def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
584
593
|
"""
|
|
585
594
|
Wraps the single paragraph in string so every line is at most `width` characters long.
|
|
@@ -594,7 +603,7 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
|
594
603
|
import textwrap
|
|
595
604
|
return textwrap.wrap(self, width, **kwargs)
|
|
596
605
|
|
|
597
|
-
@
|
|
606
|
+
@pxt.udf(is_method=True)
|
|
598
607
|
def zfill(self: str, width: int) -> str:
|
|
599
608
|
"""
|
|
600
609
|
Pad a numeric string with ASCII `0` on the left to a total length of `width`.
|
|
@@ -15,12 +15,12 @@ from typing import Optional
|
|
|
15
15
|
|
|
16
16
|
import sqlalchemy as sql
|
|
17
17
|
|
|
18
|
+
import pixeltable as pxt
|
|
18
19
|
from pixeltable.env import Env
|
|
19
|
-
import pixeltable.func as func
|
|
20
20
|
from pixeltable.utils.code import local_public_names
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
@
|
|
23
|
+
@pxt.udf(is_property=True)
|
|
24
24
|
def year(self: datetime) -> int:
|
|
25
25
|
"""
|
|
26
26
|
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
@@ -36,7 +36,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
36
36
|
return sql.extract('year', self)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
@
|
|
39
|
+
@pxt.udf(is_property=True)
|
|
40
40
|
def month(self: datetime) -> int:
|
|
41
41
|
"""
|
|
42
42
|
Between 1 and 12 inclusive.
|
|
@@ -51,7 +51,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
51
51
|
return sql.extract('month', self)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
@
|
|
54
|
+
@pxt.udf(is_property=True)
|
|
55
55
|
def day(self: datetime) -> int:
|
|
56
56
|
"""
|
|
57
57
|
Between 1 and the number of days in the given month of the given year.
|
|
@@ -66,7 +66,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
66
66
|
return sql.extract('day', self)
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
@
|
|
69
|
+
@pxt.udf(is_property=True)
|
|
70
70
|
def hour(self: datetime) -> int:
|
|
71
71
|
"""
|
|
72
72
|
Between 0 and 23 inclusive.
|
|
@@ -81,7 +81,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
81
81
|
return sql.extract('hour', self)
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
@
|
|
84
|
+
@pxt.udf(is_property=True)
|
|
85
85
|
def minute(self: datetime) -> int:
|
|
86
86
|
"""
|
|
87
87
|
Between 0 and 59 inclusive.
|
|
@@ -96,7 +96,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
96
96
|
return sql.extract('minute', self)
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
@
|
|
99
|
+
@pxt.udf(is_property=True)
|
|
100
100
|
def second(self: datetime) -> int:
|
|
101
101
|
"""
|
|
102
102
|
Between 0 and 59 inclusive.
|
|
@@ -111,7 +111,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
111
111
|
return sql.extract('second', self)
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
@
|
|
114
|
+
@pxt.udf(is_property=True)
|
|
115
115
|
def microsecond(self: datetime) -> int:
|
|
116
116
|
"""
|
|
117
117
|
Between 0 and 999999 inclusive.
|
|
@@ -126,7 +126,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
126
126
|
return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
|
|
127
127
|
|
|
128
128
|
|
|
129
|
-
@
|
|
129
|
+
@pxt.udf(is_method=True)
|
|
130
130
|
def astimezone(self: datetime, tz: str) -> datetime:
|
|
131
131
|
"""
|
|
132
132
|
Convert the datetime to the given time zone.
|
|
@@ -139,7 +139,7 @@ def astimezone(self: datetime, tz: str) -> datetime:
|
|
|
139
139
|
return self.astimezone(tzinfo)
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
@
|
|
142
|
+
@pxt.udf(is_method=True)
|
|
143
143
|
def weekday(self: datetime) -> int:
|
|
144
144
|
"""
|
|
145
145
|
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
@@ -154,7 +154,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
154
154
|
return sql.extract('isodow', self) - 1
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
@
|
|
157
|
+
@pxt.udf(is_method=True)
|
|
158
158
|
def isoweekday(self: datetime) -> int:
|
|
159
159
|
"""
|
|
160
160
|
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
@@ -169,7 +169,7 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
169
169
|
return sql.extract('isodow', self)
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
@
|
|
172
|
+
@pxt.udf(is_method=True)
|
|
173
173
|
def isocalendar(self: datetime) -> dict:
|
|
174
174
|
"""
|
|
175
175
|
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
@@ -181,7 +181,7 @@ def isocalendar(self: datetime) -> dict:
|
|
|
181
181
|
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
182
182
|
|
|
183
183
|
|
|
184
|
-
@
|
|
184
|
+
@pxt.udf(is_method=True)
|
|
185
185
|
def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
186
186
|
"""
|
|
187
187
|
Return a string representing the date and time in ISO 8601 format.
|
|
@@ -195,7 +195,7 @@ def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
|
195
195
|
return self.isoformat(sep=sep, timespec=timespec)
|
|
196
196
|
|
|
197
197
|
|
|
198
|
-
@
|
|
198
|
+
@pxt.udf(is_method=True)
|
|
199
199
|
def strftime(self: datetime, format: str) -> str:
|
|
200
200
|
"""
|
|
201
201
|
Return a string representing the date and time, controlled by an explicit format string.
|
|
@@ -208,7 +208,7 @@ def strftime(self: datetime, format: str) -> str:
|
|
|
208
208
|
return self.strftime(format)
|
|
209
209
|
|
|
210
210
|
|
|
211
|
-
@
|
|
211
|
+
@pxt.udf(is_method=True)
|
|
212
212
|
def make_timestamp(
|
|
213
213
|
year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
|
|
214
214
|
) -> datetime:
|
|
@@ -234,7 +234,7 @@ def _(
|
|
|
234
234
|
sql.cast(minute, sql.Integer),
|
|
235
235
|
sql.cast(second + microsecond / 1000000.0, sql.Double))
|
|
236
236
|
|
|
237
|
-
# @
|
|
237
|
+
# @pxt.udf
|
|
238
238
|
# def date(self: datetime) -> datetime:
|
|
239
239
|
# """
|
|
240
240
|
# Return the date part of the datetime.
|
|
@@ -245,7 +245,7 @@ def _(
|
|
|
245
245
|
# return datetime(d.year, d.month, d.day)
|
|
246
246
|
#
|
|
247
247
|
#
|
|
248
|
-
# @
|
|
248
|
+
# @pxt.udf
|
|
249
249
|
# def time(self: datetime) -> datetime:
|
|
250
250
|
# """
|
|
251
251
|
# Return the time part of the datetime, with microseconds set to 0.
|
|
@@ -256,7 +256,7 @@ def _(
|
|
|
256
256
|
# return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
|
|
257
257
|
|
|
258
258
|
|
|
259
|
-
@
|
|
259
|
+
@pxt.udf(is_method=True)
|
|
260
260
|
def replace(
|
|
261
261
|
self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
|
|
262
262
|
hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
|
|
@@ -271,7 +271,7 @@ def replace(
|
|
|
271
271
|
return self.replace(**kwargs)
|
|
272
272
|
|
|
273
273
|
|
|
274
|
-
@
|
|
274
|
+
@pxt.udf(is_method=True)
|
|
275
275
|
def toordinal(self: datetime) -> int:
|
|
276
276
|
"""
|
|
277
277
|
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
@@ -281,7 +281,7 @@ def toordinal(self: datetime) -> int:
|
|
|
281
281
|
return self.toordinal()
|
|
282
282
|
|
|
283
283
|
|
|
284
|
-
@
|
|
284
|
+
@pxt.udf(is_method=True)
|
|
285
285
|
def posix_timestamp(self: datetime) -> float:
|
|
286
286
|
"""
|
|
287
287
|
Return POSIX timestamp corresponding to the datetime instance.
|
pixeltable/functions/together.py
CHANGED
|
@@ -185,8 +185,8 @@ _embedding_dimensions_cache = {
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
|
|
188
|
-
@pxt.udf(batch_size=32
|
|
189
|
-
def embeddings(input: Batch[str], *, model: str) -> Batch[
|
|
188
|
+
@pxt.udf(batch_size=32)
|
|
189
|
+
def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
|
|
190
190
|
"""
|
|
191
191
|
Query an embedding model for a given string of text.
|
|
192
192
|
|
pixeltable/functions/video.py
CHANGED
|
@@ -20,9 +20,8 @@ import av # type: ignore[import-untyped]
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
import PIL.Image
|
|
22
22
|
|
|
23
|
+
import pixeltable as pxt
|
|
23
24
|
import pixeltable.env as env
|
|
24
|
-
import pixeltable.func as func
|
|
25
|
-
import pixeltable.type_system as ts
|
|
26
25
|
from pixeltable.utils.code import local_public_names
|
|
27
26
|
|
|
28
27
|
_format_defaults = { # format -> (codec, ext)
|
|
@@ -48,14 +47,14 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
48
47
|
# output_container.mux(packet)
|
|
49
48
|
|
|
50
49
|
|
|
51
|
-
@
|
|
52
|
-
init_types=[
|
|
53
|
-
update_types=[
|
|
54
|
-
value_type=
|
|
50
|
+
@pxt.uda(
|
|
51
|
+
init_types=[pxt.IntType()],
|
|
52
|
+
update_types=[pxt.ImageType()],
|
|
53
|
+
value_type=pxt.VideoType(),
|
|
55
54
|
requires_order_by=True,
|
|
56
55
|
allows_window=False,
|
|
57
56
|
)
|
|
58
|
-
class make_video(
|
|
57
|
+
class make_video(pxt.Aggregator):
|
|
59
58
|
"""
|
|
60
59
|
Aggregator that creates a video from a sequence of images.
|
|
61
60
|
"""
|
|
@@ -88,18 +87,10 @@ class make_video(func.Aggregator):
|
|
|
88
87
|
return str(self.out_file)
|
|
89
88
|
|
|
90
89
|
|
|
91
|
-
|
|
92
|
-
ts.VideoType(nullable=False),
|
|
93
|
-
ts.IntType(nullable=False),
|
|
94
|
-
ts.StringType(nullable=False),
|
|
95
|
-
ts.StringType(nullable=True),
|
|
96
|
-
]
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
|
|
90
|
+
@pxt.udf(is_method=True)
|
|
100
91
|
def extract_audio(
|
|
101
|
-
video_path:
|
|
102
|
-
) ->
|
|
92
|
+
video_path: pxt.Video, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
93
|
+
) -> pxt.Audio:
|
|
103
94
|
"""
|
|
104
95
|
Extract an audio stream from a video file, save it as a media file and return its path.
|
|
105
96
|
|
|
@@ -128,8 +119,8 @@ def extract_audio(
|
|
|
128
119
|
return output_filename
|
|
129
120
|
|
|
130
121
|
|
|
131
|
-
@
|
|
132
|
-
def get_metadata(video:
|
|
122
|
+
@pxt.udf(is_method=True)
|
|
123
|
+
def get_metadata(video: pxt.Video) -> dict:
|
|
133
124
|
"""
|
|
134
125
|
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
135
126
|
"""
|