pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/functions/string.py
CHANGED
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
2
|
+
Pixeltable UDFs for `StringType`.
|
|
3
3
|
It closely follows the Pandas `pandas.Series.str` API.
|
|
4
4
|
|
|
5
5
|
Example:
|
|
6
6
|
```python
|
|
7
7
|
import pixeltable as pxt
|
|
8
|
-
from pixeltable.functions import string as pxt_str
|
|
9
8
|
|
|
10
9
|
t = pxt.get_table(...)
|
|
11
|
-
t.select(
|
|
10
|
+
t.select(t.str_col.capitalize()).collect()
|
|
12
11
|
```
|
|
13
12
|
"""
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
import builtins
|
|
15
|
+
import re
|
|
16
|
+
import textwrap
|
|
17
|
+
from string import whitespace
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import sqlalchemy as sql
|
|
16
21
|
|
|
17
22
|
import pixeltable as pxt
|
|
18
|
-
import pixeltable.exceptions as excs
|
|
19
23
|
from pixeltable.utils.code import local_public_names
|
|
20
24
|
|
|
21
25
|
|
|
@@ -28,6 +32,12 @@ def capitalize(self: str) -> str:
|
|
|
28
32
|
"""
|
|
29
33
|
return self.capitalize()
|
|
30
34
|
|
|
35
|
+
|
|
36
|
+
@capitalize.to_sql
|
|
37
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
38
|
+
return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
|
|
39
|
+
|
|
40
|
+
|
|
31
41
|
@pxt.udf(is_method=True)
|
|
32
42
|
def casefold(self: str) -> str:
|
|
33
43
|
"""
|
|
@@ -37,6 +47,7 @@ def casefold(self: str) -> str:
|
|
|
37
47
|
"""
|
|
38
48
|
return self.casefold()
|
|
39
49
|
|
|
50
|
+
|
|
40
51
|
@pxt.udf(is_method=True)
|
|
41
52
|
def center(self: str, width: int, fillchar: str = ' ') -> str:
|
|
42
53
|
"""
|
|
@@ -50,27 +61,48 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
50
61
|
"""
|
|
51
62
|
return self.center(width, fillchar)
|
|
52
63
|
|
|
64
|
+
|
|
53
65
|
@pxt.udf(is_method=True)
|
|
54
|
-
def contains(self: str,
|
|
66
|
+
def contains(self: str, substr: str, case: bool = True) -> bool:
|
|
55
67
|
"""
|
|
56
|
-
Test if string contains
|
|
68
|
+
Test if string contains a substring.
|
|
57
69
|
|
|
58
70
|
Args:
|
|
59
|
-
|
|
71
|
+
substr: string literal or regular expression
|
|
60
72
|
case: if False, ignore case
|
|
61
|
-
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
62
|
-
regex: if True, treat pattern as a regular expression
|
|
63
73
|
"""
|
|
64
|
-
if
|
|
65
|
-
|
|
66
|
-
if not case:
|
|
67
|
-
flags |= re.IGNORECASE
|
|
68
|
-
return bool(re.search(pattern, self, flags))
|
|
74
|
+
if case:
|
|
75
|
+
return substr in self
|
|
69
76
|
else:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
return substr.lower() in self.lower()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@contains.to_sql
|
|
81
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement, case: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
82
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
83
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
84
|
+
if case is None:
|
|
85
|
+
# Default `case` is True, so we do a case-sensitive comparison
|
|
86
|
+
return self.like(sql.func.concat('%', escaped_substr, '%'))
|
|
87
|
+
else:
|
|
88
|
+
# Toggle case-sensitivity based on the value of `case`
|
|
89
|
+
return sql.case(
|
|
90
|
+
(case, self.like(sql.func.concat('%', escaped_substr, '%'))),
|
|
91
|
+
else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pxt.udf(is_method=True)
|
|
96
|
+
def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
|
|
97
|
+
"""
|
|
98
|
+
Test if string contains a regular expression pattern.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
pattern: regular expression pattern
|
|
102
|
+
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
103
|
+
"""
|
|
104
|
+
return bool(re.search(pattern, self, flags))
|
|
105
|
+
|
|
74
106
|
|
|
75
107
|
@pxt.udf(is_method=True)
|
|
76
108
|
def count(self: str, pattern: str, flags: int = 0) -> int:
|
|
@@ -81,21 +113,28 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
|
|
|
81
113
|
pattern: string literal or regular expression
|
|
82
114
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
83
115
|
"""
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
return len(re.findall(pattern, self, flags))
|
|
116
|
+
return builtins.len(re.findall(pattern, self, flags))
|
|
117
|
+
|
|
87
118
|
|
|
88
119
|
@pxt.udf(is_method=True)
|
|
89
|
-
def endswith(self: str,
|
|
120
|
+
def endswith(self: str, substr: str) -> bool:
|
|
90
121
|
"""
|
|
91
122
|
Return `True` if the string ends with the specified suffix, otherwise return `False`.
|
|
92
123
|
|
|
93
124
|
Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
|
|
94
125
|
|
|
95
126
|
Args:
|
|
96
|
-
|
|
127
|
+
substr: string literal
|
|
97
128
|
"""
|
|
98
|
-
return self.endswith(
|
|
129
|
+
return self.endswith(substr)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@endswith.to_sql
|
|
133
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
134
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
135
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
136
|
+
return self.like(sql.func.concat('%', escaped_substr))
|
|
137
|
+
|
|
99
138
|
|
|
100
139
|
@pxt.udf(is_method=True)
|
|
101
140
|
def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
@@ -108,11 +147,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
|
|
|
108
147
|
width: Maximum line width.
|
|
109
148
|
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
110
149
|
"""
|
|
111
|
-
import textwrap
|
|
112
150
|
return textwrap.fill(self, width, **kwargs)
|
|
113
151
|
|
|
152
|
+
|
|
114
153
|
@pxt.udf(is_method=True)
|
|
115
|
-
def find(self: str, substr: str, start:
|
|
154
|
+
def find(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
|
|
116
155
|
"""
|
|
117
156
|
Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
|
|
118
157
|
|
|
@@ -125,6 +164,21 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
125
164
|
"""
|
|
126
165
|
return self.find(substr, start, end)
|
|
127
166
|
|
|
167
|
+
|
|
168
|
+
@find.to_sql
|
|
169
|
+
def _(
|
|
170
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, start: sql.ColumnElement, end: sql.ColumnElement | None = None
|
|
171
|
+
) -> sql.ColumnElement:
|
|
172
|
+
sl = pxt.functions.string.slice._to_sql(self, start, end)
|
|
173
|
+
if sl is None:
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
strpos = sql.func.strpos(sl, substr)
|
|
177
|
+
return sql.case(
|
|
178
|
+
(strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
128
182
|
@pxt.udf(is_method=True)
|
|
129
183
|
def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
130
184
|
"""
|
|
@@ -136,9 +190,9 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
|
|
|
136
190
|
pattern: regular expression pattern
|
|
137
191
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
138
192
|
"""
|
|
139
|
-
import re
|
|
140
193
|
return re.findall(pattern, self, flags)
|
|
141
194
|
|
|
195
|
+
|
|
142
196
|
@pxt.udf(is_method=True)
|
|
143
197
|
def format(self: str, *args: Any, **kwargs: Any) -> str:
|
|
144
198
|
"""
|
|
@@ -148,6 +202,7 @@ def format(self: str, *args: Any, **kwargs: Any) -> str:
|
|
|
148
202
|
"""
|
|
149
203
|
return self.format(*args, **kwargs)
|
|
150
204
|
|
|
205
|
+
|
|
151
206
|
@pxt.udf(is_method=True)
|
|
152
207
|
def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
153
208
|
"""
|
|
@@ -160,14 +215,14 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
|
|
|
160
215
|
case: if False, ignore case
|
|
161
216
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
162
217
|
"""
|
|
163
|
-
import re
|
|
164
218
|
if not case:
|
|
165
219
|
flags |= re.IGNORECASE
|
|
166
220
|
_ = bool(re.fullmatch(pattern, self, flags))
|
|
167
221
|
return bool(re.fullmatch(pattern, self, flags))
|
|
168
222
|
|
|
223
|
+
|
|
169
224
|
@pxt.udf(is_method=True)
|
|
170
|
-
def index(self: str, substr: str, start:
|
|
225
|
+
def index(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
|
|
171
226
|
"""
|
|
172
227
|
Return the lowest index in string where `substr` is found within the slice `[start:end]`.
|
|
173
228
|
Raises ValueError if `substr` is not found.
|
|
@@ -181,6 +236,7 @@ def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
181
236
|
"""
|
|
182
237
|
return self.index(substr, start, end)
|
|
183
238
|
|
|
239
|
+
|
|
184
240
|
@pxt.udf(is_method=True)
|
|
185
241
|
def isalnum(self: str) -> bool:
|
|
186
242
|
"""
|
|
@@ -191,6 +247,7 @@ def isalnum(self: str) -> bool:
|
|
|
191
247
|
"""
|
|
192
248
|
return self.isalnum()
|
|
193
249
|
|
|
250
|
+
|
|
194
251
|
@pxt.udf(is_method=True)
|
|
195
252
|
def isalpha(self: str) -> bool:
|
|
196
253
|
"""
|
|
@@ -200,6 +257,7 @@ def isalpha(self: str) -> bool:
|
|
|
200
257
|
"""
|
|
201
258
|
return self.isalpha()
|
|
202
259
|
|
|
260
|
+
|
|
203
261
|
@pxt.udf(is_method=True)
|
|
204
262
|
def isascii(self: str) -> bool:
|
|
205
263
|
"""
|
|
@@ -209,6 +267,7 @@ def isascii(self: str) -> bool:
|
|
|
209
267
|
"""
|
|
210
268
|
return self.isascii()
|
|
211
269
|
|
|
270
|
+
|
|
212
271
|
@pxt.udf(is_method=True)
|
|
213
272
|
def isdecimal(self: str) -> bool:
|
|
214
273
|
"""
|
|
@@ -219,6 +278,7 @@ def isdecimal(self: str) -> bool:
|
|
|
219
278
|
"""
|
|
220
279
|
return self.isdecimal()
|
|
221
280
|
|
|
281
|
+
|
|
222
282
|
@pxt.udf(is_method=True)
|
|
223
283
|
def isdigit(self: str) -> bool:
|
|
224
284
|
"""
|
|
@@ -228,6 +288,7 @@ def isdigit(self: str) -> bool:
|
|
|
228
288
|
"""
|
|
229
289
|
return self.isdigit()
|
|
230
290
|
|
|
291
|
+
|
|
231
292
|
@pxt.udf(is_method=True)
|
|
232
293
|
def isidentifier(self: str) -> bool:
|
|
233
294
|
"""
|
|
@@ -241,12 +302,14 @@ def isidentifier(self: str) -> bool:
|
|
|
241
302
|
@pxt.udf(is_method=True)
|
|
242
303
|
def islower(self: str) -> bool:
|
|
243
304
|
"""
|
|
244
|
-
Return `True` if all cased characters in the string are lowercase and there is at least one cased character,
|
|
305
|
+
Return `True` if all cased characters in the string are lowercase and there is at least one cased character,
|
|
306
|
+
`False` otherwise.
|
|
245
307
|
|
|
246
308
|
Equivalent to [`str.islower()`](https://docs.python.org/3/library/stdtypes.html#str.islower)
|
|
247
309
|
"""
|
|
248
310
|
return self.islower()
|
|
249
311
|
|
|
312
|
+
|
|
250
313
|
@pxt.udf(is_method=True)
|
|
251
314
|
def isnumeric(self: str) -> bool:
|
|
252
315
|
"""
|
|
@@ -256,15 +319,18 @@ def isnumeric(self: str) -> bool:
|
|
|
256
319
|
"""
|
|
257
320
|
return self.isnumeric()
|
|
258
321
|
|
|
322
|
+
|
|
259
323
|
@pxt.udf(is_method=True)
|
|
260
324
|
def isupper(self: str) -> bool:
|
|
261
325
|
"""
|
|
262
|
-
Return `True` if all cased characters in the string are uppercase and there is at least one cased character,
|
|
326
|
+
Return `True` if all cased characters in the string are uppercase and there is at least one cased character,
|
|
327
|
+
`False` otherwise.
|
|
263
328
|
|
|
264
329
|
Equivalent to [`str.isupper()`](https://docs.python.org/3/library/stdtypes.html#str.isupper)
|
|
265
330
|
"""
|
|
266
331
|
return self.isupper()
|
|
267
332
|
|
|
333
|
+
|
|
268
334
|
@pxt.udf(is_method=True)
|
|
269
335
|
def istitle(self: str) -> bool:
|
|
270
336
|
"""
|
|
@@ -274,15 +340,18 @@ def istitle(self: str) -> bool:
|
|
|
274
340
|
"""
|
|
275
341
|
return self.istitle()
|
|
276
342
|
|
|
343
|
+
|
|
277
344
|
@pxt.udf(is_method=True)
|
|
278
345
|
def isspace(self: str) -> bool:
|
|
279
346
|
"""
|
|
280
|
-
Return `True` if there are only whitespace characters in the string and there is at least one character,
|
|
347
|
+
Return `True` if there are only whitespace characters in the string and there is at least one character,
|
|
348
|
+
`False` otherwise.
|
|
281
349
|
|
|
282
350
|
Equivalent to [`str.isspace()`](https://docs.python.org/3/library/stdtypes.html#str.isspace)
|
|
283
351
|
"""
|
|
284
352
|
return self.isspace()
|
|
285
353
|
|
|
354
|
+
|
|
286
355
|
@pxt.udf
|
|
287
356
|
def join(sep: str, elements: list) -> str:
|
|
288
357
|
"""
|
|
@@ -292,6 +361,7 @@ def join(sep: str, elements: list) -> str:
|
|
|
292
361
|
"""
|
|
293
362
|
return sep.join(elements)
|
|
294
363
|
|
|
364
|
+
|
|
295
365
|
@pxt.udf(is_method=True)
|
|
296
366
|
def len(self: str) -> int:
|
|
297
367
|
"""
|
|
@@ -299,7 +369,13 @@ def len(self: str) -> int:
|
|
|
299
369
|
|
|
300
370
|
Equivalent to [`len(str)`](https://docs.python.org/3/library/functions.html#len)
|
|
301
371
|
"""
|
|
302
|
-
return
|
|
372
|
+
return builtins.len(self)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
@len.to_sql
|
|
376
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
377
|
+
return sql.func.char_length(self)
|
|
378
|
+
|
|
303
379
|
|
|
304
380
|
@pxt.udf(is_method=True)
|
|
305
381
|
def ljust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
@@ -309,11 +385,13 @@ def ljust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
309
385
|
Equivalent to [`str.ljust()`](https://docs.python.org/3/library/stdtypes.html#str.ljust)
|
|
310
386
|
|
|
311
387
|
Args:
|
|
312
|
-
width: Minimum width of resulting string; additional characters will be filled with character defined in
|
|
388
|
+
width: Minimum width of resulting string; additional characters will be filled with character defined in
|
|
389
|
+
`fillchar`.
|
|
313
390
|
fillchar: Additional character for filling.
|
|
314
391
|
"""
|
|
315
392
|
return self.ljust(width, fillchar)
|
|
316
393
|
|
|
394
|
+
|
|
317
395
|
@pxt.udf(is_method=True)
|
|
318
396
|
def lower(self: str) -> str:
|
|
319
397
|
"""
|
|
@@ -323,8 +401,14 @@ def lower(self: str) -> str:
|
|
|
323
401
|
"""
|
|
324
402
|
return self.lower()
|
|
325
403
|
|
|
404
|
+
|
|
405
|
+
@lower.to_sql
|
|
406
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
407
|
+
return sql.func.lower(self)
|
|
408
|
+
|
|
409
|
+
|
|
326
410
|
@pxt.udf(is_method=True)
|
|
327
|
-
def lstrip(self: str, chars:
|
|
411
|
+
def lstrip(self: str, chars: str | None = None) -> str:
|
|
328
412
|
"""
|
|
329
413
|
Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
|
|
330
414
|
characters to be removed. If omitted or `None`, whitespace characters are removed.
|
|
@@ -336,6 +420,12 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
336
420
|
"""
|
|
337
421
|
return self.lstrip(chars)
|
|
338
422
|
|
|
423
|
+
|
|
424
|
+
@lstrip.to_sql
|
|
425
|
+
def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
426
|
+
return sql.func.ltrim(self, chars if chars is not None else whitespace)
|
|
427
|
+
|
|
428
|
+
|
|
339
429
|
@pxt.udf(is_method=True)
|
|
340
430
|
def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
341
431
|
"""
|
|
@@ -346,11 +436,11 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
|
|
|
346
436
|
case: if False, ignore case
|
|
347
437
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
348
438
|
"""
|
|
349
|
-
import re
|
|
350
439
|
if not case:
|
|
351
440
|
flags |= re.IGNORECASE
|
|
352
441
|
return bool(re.match(pattern, self, flags))
|
|
353
442
|
|
|
443
|
+
|
|
354
444
|
@pxt.udf(is_method=True)
|
|
355
445
|
def normalize(self: str, form: str) -> str:
|
|
356
446
|
"""
|
|
@@ -359,19 +449,22 @@ def normalize(self: str, form: str) -> str:
|
|
|
359
449
|
Equivalent to [`unicodedata.normalize()`](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize)
|
|
360
450
|
|
|
361
451
|
Args:
|
|
362
|
-
form: Unicode normal form (
|
|
452
|
+
form: Unicode normal form (`'NFC'`, `'NFKC'`, `'NFD'`, `'NFKD'`)
|
|
363
453
|
"""
|
|
364
454
|
import unicodedata
|
|
455
|
+
|
|
365
456
|
return unicodedata.normalize(form, self) # type: ignore[arg-type]
|
|
366
457
|
|
|
458
|
+
|
|
367
459
|
@pxt.udf(is_method=True)
|
|
368
460
|
def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
|
|
369
461
|
"""
|
|
370
462
|
Pad string up to width
|
|
371
463
|
|
|
372
464
|
Args:
|
|
373
|
-
width: Minimum width of resulting string; additional characters will be filled with character defined in
|
|
374
|
-
|
|
465
|
+
width: Minimum width of resulting string; additional characters will be filled with character defined in
|
|
466
|
+
`fillchar`.
|
|
467
|
+
side: Side from which to fill resulting string (`'left'`, `'right'`, `'both'`)
|
|
375
468
|
fillchar: Additional character for filling
|
|
376
469
|
"""
|
|
377
470
|
if side == 'left':
|
|
@@ -381,7 +474,8 @@ def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
|
|
|
381
474
|
elif side == 'both':
|
|
382
475
|
return self.center(width, fillchar)
|
|
383
476
|
else:
|
|
384
|
-
raise ValueError(f
|
|
477
|
+
raise ValueError(f'Invalid side: {side}')
|
|
478
|
+
|
|
385
479
|
|
|
386
480
|
@pxt.udf(is_method=True)
|
|
387
481
|
def partition(self: str, sep: str = ' ') -> list:
|
|
@@ -393,30 +487,34 @@ def partition(self: str, sep: str = ' ') -> list:
|
|
|
393
487
|
idx = self.find(sep)
|
|
394
488
|
if idx == -1:
|
|
395
489
|
return [self, '', '']
|
|
396
|
-
|
|
397
|
-
|
|
490
|
+
return [self[:idx], sep, self[idx + builtins.len(sep) :]]
|
|
491
|
+
|
|
398
492
|
|
|
399
493
|
@pxt.udf(is_method=True)
|
|
400
494
|
def removeprefix(self: str, prefix: str) -> str:
|
|
401
495
|
"""
|
|
402
496
|
Remove prefix. If the prefix is not present, returns string.
|
|
403
497
|
"""
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
498
|
+
return self.removeprefix(prefix)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
@removeprefix.to_sql
|
|
502
|
+
def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
|
|
503
|
+
return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
|
|
504
|
+
|
|
409
505
|
|
|
410
506
|
@pxt.udf(is_method=True)
|
|
411
507
|
def removesuffix(self: str, suffix: str) -> str:
|
|
412
508
|
"""
|
|
413
509
|
Remove suffix. If the suffix is not present, returns string.
|
|
414
510
|
"""
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
511
|
+
return self.removesuffix(suffix)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
@removesuffix.to_sql
|
|
515
|
+
def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
|
|
516
|
+
return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
|
|
517
|
+
|
|
420
518
|
|
|
421
519
|
@pxt.udf(is_method=True)
|
|
422
520
|
def repeat(self: str, n: int) -> str:
|
|
@@ -425,34 +523,70 @@ def repeat(self: str, n: int) -> str:
|
|
|
425
523
|
"""
|
|
426
524
|
return self * n
|
|
427
525
|
|
|
526
|
+
|
|
527
|
+
@repeat.to_sql
|
|
528
|
+
def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
|
|
529
|
+
return sql.func.repeat(self, n.cast(sql.types.INT))
|
|
530
|
+
|
|
531
|
+
|
|
428
532
|
@pxt.udf(is_method=True)
|
|
429
|
-
def replace(
|
|
430
|
-
self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
|
|
431
|
-
) -> str:
|
|
533
|
+
def replace(self: str, substr: str, repl: str, n: int | None = None) -> str:
|
|
432
534
|
"""
|
|
433
|
-
Replace occurrences of `
|
|
535
|
+
Replace occurrences of `substr` with `repl`.
|
|
434
536
|
|
|
435
|
-
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace)
|
|
436
|
-
[`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
|
|
537
|
+
Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
|
|
437
538
|
|
|
438
539
|
Args:
|
|
439
|
-
|
|
540
|
+
substr: string literal
|
|
440
541
|
repl: replacement string
|
|
441
|
-
n: number of replacements to make (
|
|
442
|
-
|
|
542
|
+
n: number of replacements to make (if `None`, replace all occurrences)
|
|
543
|
+
"""
|
|
544
|
+
return self.replace(substr, repl, n or -1)
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
@replace.to_sql
|
|
548
|
+
def _(
|
|
549
|
+
self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: sql.ColumnElement | None = None
|
|
550
|
+
) -> sql.ColumnElement:
|
|
551
|
+
if n is not None:
|
|
552
|
+
return None # SQL does not support bounding the number of replacements
|
|
553
|
+
|
|
554
|
+
return sql.func.replace(self, substr, repl)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
@pxt.udf(is_method=True)
|
|
558
|
+
def replace_re(self: str, pattern: str, repl: str, n: int | None = None, flags: int = 0) -> str:
|
|
559
|
+
"""
|
|
560
|
+
Replace occurrences of a regular expression pattern with `repl`.
|
|
561
|
+
|
|
562
|
+
Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
pattern: regular expression pattern
|
|
566
|
+
repl: replacement string
|
|
567
|
+
n: number of replacements to make (if `None`, replace all occurrences)
|
|
443
568
|
flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
|
|
444
|
-
regex: if True, treat pattern as a regular expression
|
|
445
569
|
"""
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
570
|
+
return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
@pxt.udf(is_method=True)
|
|
574
|
+
def reverse(self: str) -> str:
|
|
575
|
+
"""
|
|
576
|
+
Return a reversed copy of the string.
|
|
577
|
+
|
|
578
|
+
Equivalent to `str[::-1]`.
|
|
579
|
+
"""
|
|
580
|
+
return self[::-1]
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
@reverse.to_sql
|
|
584
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
585
|
+
return sql.func.reverse(self)
|
|
586
|
+
|
|
453
587
|
|
|
454
588
|
@pxt.udf(is_method=True)
|
|
455
|
-
def rfind(self: str, substr: str, start:
|
|
589
|
+
def rfind(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
|
|
456
590
|
"""
|
|
457
591
|
Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
|
|
458
592
|
|
|
@@ -465,8 +599,9 @@ def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
|
|
|
465
599
|
"""
|
|
466
600
|
return self.rfind(substr, start, end)
|
|
467
601
|
|
|
602
|
+
|
|
468
603
|
@pxt.udf(is_method=True)
|
|
469
|
-
def rindex(self: str, substr: str, start:
|
|
604
|
+
def rindex(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
|
|
470
605
|
"""
|
|
471
606
|
Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
|
|
472
607
|
Raises ValueError if `substr` is not found.
|
|
@@ -475,6 +610,7 @@ def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int]
|
|
|
475
610
|
"""
|
|
476
611
|
return self.rindex(substr, start, end)
|
|
477
612
|
|
|
613
|
+
|
|
478
614
|
@pxt.udf(is_method=True)
|
|
479
615
|
def rjust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
480
616
|
"""
|
|
@@ -488,6 +624,7 @@ def rjust(self: str, width: int, fillchar: str = ' ') -> str:
|
|
|
488
624
|
"""
|
|
489
625
|
return self.rjust(width, fillchar)
|
|
490
626
|
|
|
627
|
+
|
|
491
628
|
@pxt.udf(is_method=True)
|
|
492
629
|
def rpartition(self: str, sep: str = ' ') -> list:
|
|
493
630
|
"""
|
|
@@ -497,11 +634,11 @@ def rpartition(self: str, sep: str = ' ') -> list:
|
|
|
497
634
|
idx = self.rfind(sep)
|
|
498
635
|
if idx == -1:
|
|
499
636
|
return [self, '', '']
|
|
500
|
-
|
|
501
|
-
|
|
637
|
+
return [self[:idx], sep, self[idx + builtins.len(sep) :]]
|
|
638
|
+
|
|
502
639
|
|
|
503
640
|
@pxt.udf(is_method=True)
|
|
504
|
-
def rstrip(self: str, chars:
|
|
641
|
+
def rstrip(self: str, chars: str | None = None) -> str:
|
|
505
642
|
"""
|
|
506
643
|
Return a copy of string with trailing characters removed.
|
|
507
644
|
|
|
@@ -512,8 +649,14 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
|
|
|
512
649
|
"""
|
|
513
650
|
return self.rstrip(chars)
|
|
514
651
|
|
|
652
|
+
|
|
653
|
+
@rstrip.to_sql
|
|
654
|
+
def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
655
|
+
return sql.func.rtrim(self, chars if chars is not None else whitespace)
|
|
656
|
+
|
|
657
|
+
|
|
515
658
|
@pxt.udf(is_method=True)
|
|
516
|
-
def slice(self: str, start:
|
|
659
|
+
def slice(self: str, start: int | None = None, stop: int | None = None, step: int | None = None) -> str:
|
|
517
660
|
"""
|
|
518
661
|
Return a slice.
|
|
519
662
|
|
|
@@ -524,8 +667,44 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
|
|
|
524
667
|
"""
|
|
525
668
|
return self[start:stop:step]
|
|
526
669
|
|
|
527
|
-
|
|
528
|
-
|
|
670
|
+
|
|
671
|
+
@slice.to_sql
|
|
672
|
+
def _(
|
|
673
|
+
self: sql.ColumnElement,
|
|
674
|
+
start: sql.ColumnElement | None = None,
|
|
675
|
+
stop: sql.ColumnElement | None = None,
|
|
676
|
+
step: sql.ColumnElement | None = None,
|
|
677
|
+
) -> sql.ColumnElement:
|
|
678
|
+
if step is not None:
|
|
679
|
+
return None
|
|
680
|
+
|
|
681
|
+
if start is not None:
|
|
682
|
+
start = start.cast(sql.types.INT) # Postgres won't accept a BIGINT
|
|
683
|
+
start = sql.case(
|
|
684
|
+
(start >= 0, start + 1), # SQL is 1-based, Python is 0-based
|
|
685
|
+
else_=sql.func.char_length(self) + start + 1, # negative index
|
|
686
|
+
)
|
|
687
|
+
start = sql.func.greatest(start, 1)
|
|
688
|
+
|
|
689
|
+
if stop is not None:
|
|
690
|
+
stop = stop.cast(sql.types.INT) # Postgres won't accept a BIGINT
|
|
691
|
+
stop = sql.case(
|
|
692
|
+
(stop >= 0, stop + 1), # SQL is 1-based, Python is 0-based
|
|
693
|
+
else_=sql.func.char_length(self) + stop + 1, # negative index
|
|
694
|
+
)
|
|
695
|
+
stop = sql.func.greatest(stop, 0)
|
|
696
|
+
|
|
697
|
+
if start is None:
|
|
698
|
+
if stop is None:
|
|
699
|
+
return self
|
|
700
|
+
return sql.func.substr(self, 1, stop)
|
|
701
|
+
if stop is None:
|
|
702
|
+
return sql.func.substr(self, start)
|
|
703
|
+
return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
@pxt.udf(is_method=True)
|
|
707
|
+
def slice_replace(self: str, start: int | None = None, stop: int | None = None, repl: str | None = None) -> str:
|
|
529
708
|
"""
|
|
530
709
|
Replace a positional slice with another value.
|
|
531
710
|
|
|
@@ -536,20 +715,29 @@ def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] =
|
|
|
536
715
|
"""
|
|
537
716
|
return self[:start] + repl + self[stop:]
|
|
538
717
|
|
|
718
|
+
|
|
539
719
|
@pxt.udf(is_method=True)
|
|
540
|
-
def startswith(self: str,
|
|
720
|
+
def startswith(self: str, substr: str) -> int:
|
|
541
721
|
"""
|
|
542
|
-
Return `True` if string starts with `
|
|
722
|
+
Return `True` if string starts with `substr`, otherwise return `False`.
|
|
543
723
|
|
|
544
724
|
Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
|
|
545
725
|
|
|
546
726
|
Args:
|
|
547
|
-
|
|
727
|
+
substr: string literal
|
|
548
728
|
"""
|
|
549
|
-
return self.startswith(
|
|
729
|
+
return self.startswith(substr)
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
@startswith.to_sql
|
|
733
|
+
def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
|
|
734
|
+
# Replace all occurrences of `%`, `_`, and `\` with escaped versions
|
|
735
|
+
escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
|
|
736
|
+
return self.like(sql.func.concat(escaped_substr, '%'))
|
|
737
|
+
|
|
550
738
|
|
|
551
739
|
@pxt.udf(is_method=True)
|
|
552
|
-
def strip(self: str, chars:
|
|
740
|
+
def strip(self: str, chars: str | None = None) -> str:
|
|
553
741
|
"""
|
|
554
742
|
Return a copy of string with leading and trailing characters removed.
|
|
555
743
|
|
|
@@ -560,6 +748,12 @@ def strip(self: str, chars: Optional[str] = None) -> str:
|
|
|
560
748
|
"""
|
|
561
749
|
return self.strip(chars)
|
|
562
750
|
|
|
751
|
+
|
|
752
|
+
@strip.to_sql
|
|
753
|
+
def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
|
|
754
|
+
return sql.func.trim(self, chars if chars is not None else whitespace)
|
|
755
|
+
|
|
756
|
+
|
|
563
757
|
@pxt.udf(is_method=True)
|
|
564
758
|
def swapcase(self: str) -> str:
|
|
565
759
|
"""
|
|
@@ -569,6 +763,7 @@ def swapcase(self: str) -> str:
|
|
|
569
763
|
"""
|
|
570
764
|
return self.swapcase()
|
|
571
765
|
|
|
766
|
+
|
|
572
767
|
@pxt.udf(is_method=True)
|
|
573
768
|
def title(self: str) -> str:
|
|
574
769
|
"""
|
|
@@ -579,6 +774,7 @@ def title(self: str) -> str:
|
|
|
579
774
|
"""
|
|
580
775
|
return self.title()
|
|
581
776
|
|
|
777
|
+
|
|
582
778
|
@pxt.udf(is_method=True)
|
|
583
779
|
def upper(self: str) -> str:
|
|
584
780
|
"""
|
|
@@ -588,6 +784,12 @@ def upper(self: str) -> str:
|
|
|
588
784
|
"""
|
|
589
785
|
return self.upper()
|
|
590
786
|
|
|
787
|
+
|
|
788
|
+
@upper.to_sql
|
|
789
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
790
|
+
return sql.func.upper(self)
|
|
791
|
+
|
|
792
|
+
|
|
591
793
|
@pxt.udf(is_method=True)
|
|
592
794
|
def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
593
795
|
"""
|
|
@@ -600,9 +802,9 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
|
|
|
600
802
|
width: Maximum line width.
|
|
601
803
|
kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
|
|
602
804
|
"""
|
|
603
|
-
import textwrap
|
|
604
805
|
return textwrap.wrap(self, width, **kwargs)
|
|
605
806
|
|
|
807
|
+
|
|
606
808
|
@pxt.udf(is_method=True)
|
|
607
809
|
def zfill(self: str, width: int) -> str:
|
|
608
810
|
"""
|
|
@@ -616,8 +818,31 @@ def zfill(self: str, width: int) -> str:
|
|
|
616
818
|
return self.zfill(width)
|
|
617
819
|
|
|
618
820
|
|
|
821
|
+
def string_splitter(text: Any, separators: str) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
|
|
822
|
+
"""Iterator over chunks of a string. The string is chunked according to the specified `separators`.
|
|
823
|
+
|
|
824
|
+
The iterator yields a `text` field containing the text of the chunk.
|
|
825
|
+
Chunked text will be cleaned with `ftfy.fix_text` to fix up common problems with unicode sequences.
|
|
826
|
+
|
|
827
|
+
Args:
|
|
828
|
+
separators: separators to use to chunk the document. Currently the only supported option is `'sentence'`.
|
|
829
|
+
|
|
830
|
+
Examples:
|
|
831
|
+
This example assumes an existing table `tbl` with a column `text` of type `pxt.String`.
|
|
832
|
+
|
|
833
|
+
Create a view that splits all strings on sentence boundaries:
|
|
834
|
+
|
|
835
|
+
>>> pxt.create_view(
|
|
836
|
+
... 'sentence_chunks',
|
|
837
|
+
... tbl,
|
|
838
|
+
... iterator=string_splitter(tbl.text, separators='sentence')
|
|
839
|
+
... )
|
|
840
|
+
"""
|
|
841
|
+
return pxt.iterators.string.StringSplitter._create(text=text, separators=separators)
|
|
842
|
+
|
|
843
|
+
|
|
619
844
|
__all__ = local_public_names(__name__)
|
|
620
845
|
|
|
621
846
|
|
|
622
|
-
def __dir__():
|
|
847
|
+
def __dir__() -> list[str]:
|
|
623
848
|
return __all__
|