pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,21 +1,25 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `StringType`.
2
+ Pixeltable UDFs for `StringType`.
3
3
  It closely follows the Pandas `pandas.Series.str` API.
4
4
 
5
5
  Example:
6
6
  ```python
7
7
  import pixeltable as pxt
8
- from pixeltable.functions import string as pxt_str
9
8
 
10
9
  t = pxt.get_table(...)
11
- t.select(pxt_str.capitalize(t.str_col)).collect()
10
+ t.select(t.str_col.capitalize()).collect()
12
11
  ```
13
12
  """
14
13
 
15
- from typing import Any, Optional
14
+ import builtins
15
+ import re
16
+ import textwrap
17
+ from string import whitespace
18
+ from typing import Any
19
+
20
+ import sqlalchemy as sql
16
21
 
17
22
  import pixeltable as pxt
18
- import pixeltable.exceptions as excs
19
23
  from pixeltable.utils.code import local_public_names
20
24
 
21
25
 
@@ -28,6 +32,12 @@ def capitalize(self: str) -> str:
28
32
  """
29
33
  return self.capitalize()
30
34
 
35
+
36
+ @capitalize.to_sql
37
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
38
+ return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
39
+
40
+
31
41
  @pxt.udf(is_method=True)
32
42
  def casefold(self: str) -> str:
33
43
  """
@@ -37,6 +47,7 @@ def casefold(self: str) -> str:
37
47
  """
38
48
  return self.casefold()
39
49
 
50
+
40
51
  @pxt.udf(is_method=True)
41
52
  def center(self: str, width: int, fillchar: str = ' ') -> str:
42
53
  """
@@ -50,27 +61,48 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
50
61
  """
51
62
  return self.center(width, fillchar)
52
63
 
64
+
53
65
  @pxt.udf(is_method=True)
54
- def contains(self: str, pattern: str, case: bool = True, flags: int = 0, regex: bool = True) -> bool:
66
+ def contains(self: str, substr: str, case: bool = True) -> bool:
55
67
  """
56
- Test if string contains pattern or regex.
68
+ Test if string contains a substring.
57
69
 
58
70
  Args:
59
- pattern: string literal or regular expression
71
+ substr: string literal or regular expression
60
72
  case: if False, ignore case
61
- flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
62
- regex: if True, treat pattern as a regular expression
63
73
  """
64
- if regex:
65
- import re
66
- if not case:
67
- flags |= re.IGNORECASE
68
- return bool(re.search(pattern, self, flags))
74
+ if case:
75
+ return substr in self
69
76
  else:
70
- if case:
71
- return pattern in self
72
- else:
73
- return pattern.lower() in self.lower()
77
+ return substr.lower() in self.lower()
78
+
79
+
80
+ @contains.to_sql
81
+ def _(self: sql.ColumnElement, substr: sql.ColumnElement, case: sql.ColumnElement | None = None) -> sql.ColumnElement:
82
+ # Replace all occurrences of `%`, `_`, and `\` with escaped versions
83
+ escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
84
+ if case is None:
85
+ # Default `case` is True, so we do a case-sensitive comparison
86
+ return self.like(sql.func.concat('%', escaped_substr, '%'))
87
+ else:
88
+ # Toggle case-sensitivity based on the value of `case`
89
+ return sql.case(
90
+ (case, self.like(sql.func.concat('%', escaped_substr, '%'))),
91
+ else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
92
+ )
93
+
94
+
95
+ @pxt.udf(is_method=True)
96
+ def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
97
+ """
98
+ Test if string contains a regular expression pattern.
99
+
100
+ Args:
101
+ pattern: regular expression pattern
102
+ flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
103
+ """
104
+ return bool(re.search(pattern, self, flags))
105
+
74
106
 
75
107
  @pxt.udf(is_method=True)
76
108
  def count(self: str, pattern: str, flags: int = 0) -> int:
@@ -81,21 +113,28 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
81
113
  pattern: string literal or regular expression
82
114
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
83
115
  """
84
- import re
85
- from builtins import len
86
- return len(re.findall(pattern, self, flags))
116
+ return builtins.len(re.findall(pattern, self, flags))
117
+
87
118
 
88
119
  @pxt.udf(is_method=True)
89
- def endswith(self: str, pattern: str) -> bool:
120
+ def endswith(self: str, substr: str) -> bool:
90
121
  """
91
122
  Return `True` if the string ends with the specified suffix, otherwise return `False`.
92
123
 
93
124
  Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
94
125
 
95
126
  Args:
96
- pattern: string literal
127
+ substr: string literal
97
128
  """
98
- return self.endswith(pattern)
129
+ return self.endswith(substr)
130
+
131
+
132
+ @endswith.to_sql
133
+ def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
134
+ # Replace all occurrences of `%`, `_`, and `\` with escaped versions
135
+ escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
136
+ return self.like(sql.func.concat('%', escaped_substr))
137
+
99
138
 
100
139
  @pxt.udf(is_method=True)
101
140
  def fill(self: str, width: int, **kwargs: Any) -> str:
@@ -108,11 +147,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
108
147
  width: Maximum line width.
109
148
  kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
110
149
  """
111
- import textwrap
112
150
  return textwrap.fill(self, width, **kwargs)
113
151
 
152
+
114
153
  @pxt.udf(is_method=True)
115
- def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
154
+ def find(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
116
155
  """
117
156
  Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
118
157
 
@@ -125,6 +164,21 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
125
164
  """
126
165
  return self.find(substr, start, end)
127
166
 
167
+
168
+ @find.to_sql
169
+ def _(
170
+ self: sql.ColumnElement, substr: sql.ColumnElement, start: sql.ColumnElement, end: sql.ColumnElement | None = None
171
+ ) -> sql.ColumnElement:
172
+ sl = pxt.functions.string.slice._to_sql(self, start, end)
173
+ if sl is None:
174
+ return None
175
+
176
+ strpos = sql.func.strpos(sl, substr)
177
+ return sql.case(
178
+ (strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
179
+ )
180
+
181
+
128
182
  @pxt.udf(is_method=True)
129
183
  def findall(self: str, pattern: str, flags: int = 0) -> list:
130
184
  """
@@ -136,9 +190,9 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
136
190
  pattern: regular expression pattern
137
191
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
138
192
  """
139
- import re
140
193
  return re.findall(pattern, self, flags)
141
194
 
195
+
142
196
  @pxt.udf(is_method=True)
143
197
  def format(self: str, *args: Any, **kwargs: Any) -> str:
144
198
  """
@@ -148,6 +202,7 @@ def format(self: str, *args: Any, **kwargs: Any) -> str:
148
202
  """
149
203
  return self.format(*args, **kwargs)
150
204
 
205
+
151
206
  @pxt.udf(is_method=True)
152
207
  def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
153
208
  """
@@ -160,14 +215,14 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
160
215
  case: if False, ignore case
161
216
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
162
217
  """
163
- import re
164
218
  if not case:
165
219
  flags |= re.IGNORECASE
166
220
  _ = bool(re.fullmatch(pattern, self, flags))
167
221
  return bool(re.fullmatch(pattern, self, flags))
168
222
 
223
+
169
224
  @pxt.udf(is_method=True)
170
- def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
225
+ def index(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
171
226
  """
172
227
  Return the lowest index in string where `substr` is found within the slice `[start:end]`.
173
228
  Raises ValueError if `substr` is not found.
@@ -181,6 +236,7 @@ def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
181
236
  """
182
237
  return self.index(substr, start, end)
183
238
 
239
+
184
240
  @pxt.udf(is_method=True)
185
241
  def isalnum(self: str) -> bool:
186
242
  """
@@ -191,6 +247,7 @@ def isalnum(self: str) -> bool:
191
247
  """
192
248
  return self.isalnum()
193
249
 
250
+
194
251
  @pxt.udf(is_method=True)
195
252
  def isalpha(self: str) -> bool:
196
253
  """
@@ -200,6 +257,7 @@ def isalpha(self: str) -> bool:
200
257
  """
201
258
  return self.isalpha()
202
259
 
260
+
203
261
  @pxt.udf(is_method=True)
204
262
  def isascii(self: str) -> bool:
205
263
  """
@@ -209,6 +267,7 @@ def isascii(self: str) -> bool:
209
267
  """
210
268
  return self.isascii()
211
269
 
270
+
212
271
  @pxt.udf(is_method=True)
213
272
  def isdecimal(self: str) -> bool:
214
273
  """
@@ -219,6 +278,7 @@ def isdecimal(self: str) -> bool:
219
278
  """
220
279
  return self.isdecimal()
221
280
 
281
+
222
282
  @pxt.udf(is_method=True)
223
283
  def isdigit(self: str) -> bool:
224
284
  """
@@ -228,6 +288,7 @@ def isdigit(self: str) -> bool:
228
288
  """
229
289
  return self.isdigit()
230
290
 
291
+
231
292
  @pxt.udf(is_method=True)
232
293
  def isidentifier(self: str) -> bool:
233
294
  """
@@ -241,12 +302,14 @@ def isidentifier(self: str) -> bool:
241
302
  @pxt.udf(is_method=True)
242
303
  def islower(self: str) -> bool:
243
304
  """
244
- Return `True` if all cased characters in the string are lowercase and there is at least one cased character, `False` otherwise.
305
+ Return `True` if all cased characters in the string are lowercase and there is at least one cased character,
306
+ `False` otherwise.
245
307
 
246
308
  Equivalent to [`str.islower()`](https://docs.python.org/3/library/stdtypes.html#str.islower)
247
309
  """
248
310
  return self.islower()
249
311
 
312
+
250
313
  @pxt.udf(is_method=True)
251
314
  def isnumeric(self: str) -> bool:
252
315
  """
@@ -256,15 +319,18 @@ def isnumeric(self: str) -> bool:
256
319
  """
257
320
  return self.isnumeric()
258
321
 
322
+
259
323
  @pxt.udf(is_method=True)
260
324
  def isupper(self: str) -> bool:
261
325
  """
262
- Return `True` if all cased characters in the string are uppercase and there is at least one cased character, `False` otherwise.
326
+ Return `True` if all cased characters in the string are uppercase and there is at least one cased character,
327
+ `False` otherwise.
263
328
 
264
329
  Equivalent to [`str.isupper()`](https://docs.python.org/3/library/stdtypes.html#str.isupper)
265
330
  """
266
331
  return self.isupper()
267
332
 
333
+
268
334
  @pxt.udf(is_method=True)
269
335
  def istitle(self: str) -> bool:
270
336
  """
@@ -274,15 +340,18 @@ def istitle(self: str) -> bool:
274
340
  """
275
341
  return self.istitle()
276
342
 
343
+
277
344
  @pxt.udf(is_method=True)
278
345
  def isspace(self: str) -> bool:
279
346
  """
280
- Return `True` if there are only whitespace characters in the string and there is at least one character, `False` otherwise.
347
+ Return `True` if there are only whitespace characters in the string and there is at least one character,
348
+ `False` otherwise.
281
349
 
282
350
  Equivalent to [`str.isspace()`](https://docs.python.org/3/library/stdtypes.html#str.isspace)
283
351
  """
284
352
  return self.isspace()
285
353
 
354
+
286
355
  @pxt.udf
287
356
  def join(sep: str, elements: list) -> str:
288
357
  """
@@ -292,6 +361,7 @@ def join(sep: str, elements: list) -> str:
292
361
  """
293
362
  return sep.join(elements)
294
363
 
364
+
295
365
  @pxt.udf(is_method=True)
296
366
  def len(self: str) -> int:
297
367
  """
@@ -299,7 +369,13 @@ def len(self: str) -> int:
299
369
 
300
370
  Equivalent to [`len(str)`](https://docs.python.org/3/library/functions.html#len)
301
371
  """
302
- return self.__len__()
372
+ return builtins.len(self)
373
+
374
+
375
+ @len.to_sql
376
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
377
+ return sql.func.char_length(self)
378
+
303
379
 
304
380
  @pxt.udf(is_method=True)
305
381
  def ljust(self: str, width: int, fillchar: str = ' ') -> str:
@@ -309,11 +385,13 @@ def ljust(self: str, width: int, fillchar: str = ' ') -> str:
309
385
  Equivalent to [`str.ljust()`](https://docs.python.org/3/library/stdtypes.html#str.ljust)
310
386
 
311
387
  Args:
312
- width: Minimum width of resulting string; additional characters will be filled with character defined in `fillchar`.
388
+ width: Minimum width of resulting string; additional characters will be filled with character defined in
389
+ `fillchar`.
313
390
  fillchar: Additional character for filling.
314
391
  """
315
392
  return self.ljust(width, fillchar)
316
393
 
394
+
317
395
  @pxt.udf(is_method=True)
318
396
  def lower(self: str) -> str:
319
397
  """
@@ -323,8 +401,14 @@ def lower(self: str) -> str:
323
401
  """
324
402
  return self.lower()
325
403
 
404
+
405
+ @lower.to_sql
406
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
407
+ return sql.func.lower(self)
408
+
409
+
326
410
  @pxt.udf(is_method=True)
327
- def lstrip(self: str, chars: Optional[str] = None) -> str:
411
+ def lstrip(self: str, chars: str | None = None) -> str:
328
412
  """
329
413
  Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
330
414
  characters to be removed. If omitted or `None`, whitespace characters are removed.
@@ -336,6 +420,12 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
336
420
  """
337
421
  return self.lstrip(chars)
338
422
 
423
+
424
+ @lstrip.to_sql
425
+ def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
426
+ return sql.func.ltrim(self, chars if chars is not None else whitespace)
427
+
428
+
339
429
  @pxt.udf(is_method=True)
340
430
  def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
341
431
  """
@@ -346,11 +436,11 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
346
436
  case: if False, ignore case
347
437
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
348
438
  """
349
- import re
350
439
  if not case:
351
440
  flags |= re.IGNORECASE
352
441
  return bool(re.match(pattern, self, flags))
353
442
 
443
+
354
444
  @pxt.udf(is_method=True)
355
445
  def normalize(self: str, form: str) -> str:
356
446
  """
@@ -359,19 +449,22 @@ def normalize(self: str, form: str) -> str:
359
449
  Equivalent to [`unicodedata.normalize()`](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize)
360
450
 
361
451
  Args:
362
- form: Unicode normal form (`‘NFC’`, `‘NFKC’`, `‘NFD’`, `‘NFKD’`)
452
+ form: Unicode normal form (`'NFC'`, `'NFKC'`, `'NFD'`, `'NFKD'`)
363
453
  """
364
454
  import unicodedata
455
+
365
456
  return unicodedata.normalize(form, self) # type: ignore[arg-type]
366
457
 
458
+
367
459
  @pxt.udf(is_method=True)
368
460
  def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
369
461
  """
370
462
  Pad string up to width
371
463
 
372
464
  Args:
373
- width: Minimum width of resulting string; additional characters will be filled with character defined in `fillchar`.
374
- side: Side from which to fill resulting string (`‘left’`, `‘right’`, `‘both’`)
465
+ width: Minimum width of resulting string; additional characters will be filled with character defined in
466
+ `fillchar`.
467
+ side: Side from which to fill resulting string (`'left'`, `'right'`, `'both'`)
375
468
  fillchar: Additional character for filling
376
469
  """
377
470
  if side == 'left':
@@ -381,7 +474,8 @@ def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
381
474
  elif side == 'both':
382
475
  return self.center(width, fillchar)
383
476
  else:
384
- raise ValueError(f"Invalid side: {side}")
477
+ raise ValueError(f'Invalid side: {side}')
478
+
385
479
 
386
480
  @pxt.udf(is_method=True)
387
481
  def partition(self: str, sep: str = ' ') -> list:
@@ -393,30 +487,34 @@ def partition(self: str, sep: str = ' ') -> list:
393
487
  idx = self.find(sep)
394
488
  if idx == -1:
395
489
  return [self, '', '']
396
- from builtins import len
397
- return [self[:idx], sep, self[idx + len(sep):]]
490
+ return [self[:idx], sep, self[idx + builtins.len(sep) :]]
491
+
398
492
 
399
493
  @pxt.udf(is_method=True)
400
494
  def removeprefix(self: str, prefix: str) -> str:
401
495
  """
402
496
  Remove prefix. If the prefix is not present, returns string.
403
497
  """
404
- if self.startswith(prefix):
405
- # we need to avoid referring to our symbol 'len'
406
- from builtins import len
407
- return self[len(prefix):]
408
- return self
498
+ return self.removeprefix(prefix)
499
+
500
+
501
+ @removeprefix.to_sql
502
+ def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
503
+ return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
504
+
409
505
 
410
506
  @pxt.udf(is_method=True)
411
507
  def removesuffix(self: str, suffix: str) -> str:
412
508
  """
413
509
  Remove suffix. If the suffix is not present, returns string.
414
510
  """
415
- if self.endswith(suffix):
416
- # we need to avoid referring to our symbol 'len'
417
- from builtins import len
418
- return self[:-len(suffix)]
419
- return self
511
+ return self.removesuffix(suffix)
512
+
513
+
514
+ @removesuffix.to_sql
515
+ def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
516
+ return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
517
+
420
518
 
421
519
  @pxt.udf(is_method=True)
422
520
  def repeat(self: str, n: int) -> str:
@@ -425,34 +523,70 @@ def repeat(self: str, n: int) -> str:
425
523
  """
426
524
  return self * n
427
525
 
526
+
527
+ @repeat.to_sql
528
+ def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
529
+ return sql.func.repeat(self, n.cast(sql.types.INT))
530
+
531
+
428
532
  @pxt.udf(is_method=True)
429
- def replace(
430
- self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
431
- ) -> str:
533
+ def replace(self: str, substr: str, repl: str, n: int | None = None) -> str:
432
534
  """
433
- Replace occurrences of `pattern` with `repl`.
535
+ Replace occurrences of `substr` with `repl`.
434
536
 
435
- Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace) or
436
- [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
537
+ Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
437
538
 
438
539
  Args:
439
- pattern: string literal or regular expression
540
+ substr: string literal
440
541
  repl: replacement string
441
- n: number of replacements to make (-1 for all)
442
- case: if False, ignore case
542
+ n: number of replacements to make (if `None`, replace all occurrences)
543
+ """
544
+ return self.replace(substr, repl, n or -1)
545
+
546
+
547
+ @replace.to_sql
548
+ def _(
549
+ self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: sql.ColumnElement | None = None
550
+ ) -> sql.ColumnElement:
551
+ if n is not None:
552
+ return None # SQL does not support bounding the number of replacements
553
+
554
+ return sql.func.replace(self, substr, repl)
555
+
556
+
557
+ @pxt.udf(is_method=True)
558
+ def replace_re(self: str, pattern: str, repl: str, n: int | None = None, flags: int = 0) -> str:
559
+ """
560
+ Replace occurrences of a regular expression pattern with `repl`.
561
+
562
+ Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
563
+
564
+ Args:
565
+ pattern: regular expression pattern
566
+ repl: replacement string
567
+ n: number of replacements to make (if `None`, replace all occurrences)
443
568
  flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
444
- regex: if True, treat pattern as a regular expression
445
569
  """
446
- if regex:
447
- import re
448
- if not case:
449
- flags |= re.IGNORECASE
450
- return re.sub(pattern, repl, self, 0 if n == -1 else n, flags)
451
- else:
452
- return self.replace(pattern, repl, n)
570
+ return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
571
+
572
+
573
+ @pxt.udf(is_method=True)
574
+ def reverse(self: str) -> str:
575
+ """
576
+ Return a reversed copy of the string.
577
+
578
+ Equivalent to `str[::-1]`.
579
+ """
580
+ return self[::-1]
581
+
582
+
583
+ @reverse.to_sql
584
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
585
+ return sql.func.reverse(self)
586
+
453
587
 
454
588
  @pxt.udf(is_method=True)
455
- def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
589
+ def rfind(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
456
590
  """
457
591
  Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
458
592
 
@@ -465,8 +599,9 @@ def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
465
599
  """
466
600
  return self.rfind(substr, start, end)
467
601
 
602
+
468
603
  @pxt.udf(is_method=True)
469
- def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
604
+ def rindex(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
470
605
  """
471
606
  Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
472
607
  Raises ValueError if `substr` is not found.
@@ -475,6 +610,7 @@ def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int]
475
610
  """
476
611
  return self.rindex(substr, start, end)
477
612
 
613
+
478
614
  @pxt.udf(is_method=True)
479
615
  def rjust(self: str, width: int, fillchar: str = ' ') -> str:
480
616
  """
@@ -488,6 +624,7 @@ def rjust(self: str, width: int, fillchar: str = ' ') -> str:
488
624
  """
489
625
  return self.rjust(width, fillchar)
490
626
 
627
+
491
628
  @pxt.udf(is_method=True)
492
629
  def rpartition(self: str, sep: str = ' ') -> list:
493
630
  """
@@ -497,11 +634,11 @@ def rpartition(self: str, sep: str = ' ') -> list:
497
634
  idx = self.rfind(sep)
498
635
  if idx == -1:
499
636
  return [self, '', '']
500
- from builtins import len
501
- return [self[:idx], sep, self[idx + len(sep):]]
637
+ return [self[:idx], sep, self[idx + builtins.len(sep) :]]
638
+
502
639
 
503
640
  @pxt.udf(is_method=True)
504
- def rstrip(self: str, chars: Optional[str] = None) -> str:
641
+ def rstrip(self: str, chars: str | None = None) -> str:
505
642
  """
506
643
  Return a copy of string with trailing characters removed.
507
644
 
@@ -512,8 +649,14 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
512
649
  """
513
650
  return self.rstrip(chars)
514
651
 
652
+
653
+ @rstrip.to_sql
654
+ def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
655
+ return sql.func.rtrim(self, chars if chars is not None else whitespace)
656
+
657
+
515
658
  @pxt.udf(is_method=True)
516
- def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
659
+ def slice(self: str, start: int | None = None, stop: int | None = None, step: int | None = None) -> str:
517
660
  """
518
661
  Return a slice.
519
662
 
@@ -524,8 +667,44 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
524
667
  """
525
668
  return self[start:stop:step]
526
669
 
527
- @pxt.udf(is_method=True)
528
- def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None) -> str:
670
+
671
+ @slice.to_sql
672
+ def _(
673
+ self: sql.ColumnElement,
674
+ start: sql.ColumnElement | None = None,
675
+ stop: sql.ColumnElement | None = None,
676
+ step: sql.ColumnElement | None = None,
677
+ ) -> sql.ColumnElement:
678
+ if step is not None:
679
+ return None
680
+
681
+ if start is not None:
682
+ start = start.cast(sql.types.INT) # Postgres won't accept a BIGINT
683
+ start = sql.case(
684
+ (start >= 0, start + 1), # SQL is 1-based, Python is 0-based
685
+ else_=sql.func.char_length(self) + start + 1, # negative index
686
+ )
687
+ start = sql.func.greatest(start, 1)
688
+
689
+ if stop is not None:
690
+ stop = stop.cast(sql.types.INT) # Postgres won't accept a BIGINT
691
+ stop = sql.case(
692
+ (stop >= 0, stop + 1), # SQL is 1-based, Python is 0-based
693
+ else_=sql.func.char_length(self) + stop + 1, # negative index
694
+ )
695
+ stop = sql.func.greatest(stop, 0)
696
+
697
+ if start is None:
698
+ if stop is None:
699
+ return self
700
+ return sql.func.substr(self, 1, stop)
701
+ if stop is None:
702
+ return sql.func.substr(self, start)
703
+ return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
704
+
705
+
706
+ @pxt.udf(is_method=True)
707
+ def slice_replace(self: str, start: int | None = None, stop: int | None = None, repl: str | None = None) -> str:
529
708
  """
530
709
  Replace a positional slice with another value.
531
710
 
@@ -536,20 +715,29 @@ def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] =
536
715
  """
537
716
  return self[:start] + repl + self[stop:]
538
717
 
718
+
539
719
  @pxt.udf(is_method=True)
540
- def startswith(self: str, pattern: str) -> int:
720
+ def startswith(self: str, substr: str) -> int:
541
721
  """
542
- Return `True` if string starts with `pattern`, otherwise return `False`.
722
+ Return `True` if string starts with `substr`, otherwise return `False`.
543
723
 
544
724
  Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
545
725
 
546
726
  Args:
547
- pattern: string literal
727
+ substr: string literal
548
728
  """
549
- return self.startswith(pattern)
729
+ return self.startswith(substr)
730
+
731
+
732
+ @startswith.to_sql
733
+ def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
734
+ # Replace all occurrences of `%`, `_`, and `\` with escaped versions
735
+ escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
736
+ return self.like(sql.func.concat(escaped_substr, '%'))
737
+
550
738
 
551
739
  @pxt.udf(is_method=True)
552
- def strip(self: str, chars: Optional[str] = None) -> str:
740
+ def strip(self: str, chars: str | None = None) -> str:
553
741
  """
554
742
  Return a copy of string with leading and trailing characters removed.
555
743
 
@@ -560,6 +748,12 @@ def strip(self: str, chars: Optional[str] = None) -> str:
560
748
  """
561
749
  return self.strip(chars)
562
750
 
751
+
752
+ @strip.to_sql
753
+ def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
754
+ return sql.func.trim(self, chars if chars is not None else whitespace)
755
+
756
+
563
757
  @pxt.udf(is_method=True)
564
758
  def swapcase(self: str) -> str:
565
759
  """
@@ -569,6 +763,7 @@ def swapcase(self: str) -> str:
569
763
  """
570
764
  return self.swapcase()
571
765
 
766
+
572
767
  @pxt.udf(is_method=True)
573
768
  def title(self: str) -> str:
574
769
  """
@@ -579,6 +774,7 @@ def title(self: str) -> str:
579
774
  """
580
775
  return self.title()
581
776
 
777
+
582
778
  @pxt.udf(is_method=True)
583
779
  def upper(self: str) -> str:
584
780
  """
@@ -588,6 +784,12 @@ def upper(self: str) -> str:
588
784
  """
589
785
  return self.upper()
590
786
 
787
+
788
+ @upper.to_sql
789
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
790
+ return sql.func.upper(self)
791
+
792
+
591
793
  @pxt.udf(is_method=True)
592
794
  def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
593
795
  """
@@ -600,9 +802,9 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
600
802
  width: Maximum line width.
601
803
  kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
602
804
  """
603
- import textwrap
604
805
  return textwrap.wrap(self, width, **kwargs)
605
806
 
807
+
606
808
  @pxt.udf(is_method=True)
607
809
  def zfill(self: str, width: int) -> str:
608
810
  """
@@ -616,8 +818,31 @@ def zfill(self: str, width: int) -> str:
616
818
  return self.zfill(width)
617
819
 
618
820
 
821
+ def string_splitter(text: Any, separators: str) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
822
+ """Iterator over chunks of a string. The string is chunked according to the specified `separators`.
823
+
824
+ The iterator yields a `text` field containing the text of the chunk.
825
+ Chunked text will be cleaned with `ftfy.fix_text` to fix up common problems with unicode sequences.
826
+
827
+ Args:
828
+ separators: separators to use to chunk the document. Currently the only supported option is `'sentence'`.
829
+
830
+ Examples:
831
+ This example assumes an existing table `tbl` with a column `text` of type `pxt.String`.
832
+
833
+ Create a view that splits all strings on sentence boundaries:
834
+
835
+ >>> pxt.create_view(
836
+ ... 'sentence_chunks',
837
+ ... tbl,
838
+ ... iterator=string_splitter(tbl.text, separators='sentence')
839
+ ... )
840
+ """
841
+ return pxt.iterators.string.StringSplitter._create(text=text, separators=separators)
842
+
843
+
619
844
  __all__ = local_public_names(__name__)
620
845
 
621
846
 
622
- def __dir__():
847
+ def __dir__() -> list[str]:
623
848
  return __all__