PyPI - pixeltable - Versions diffs - 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

pixeltable 0.2.26py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (245) hide show

pixeltable/__init__.py +83 -19
pixeltable/_query.py +1444 -0
pixeltable/_version.py +1 -0
pixeltable/catalog/__init__.py +7 -4
pixeltable/catalog/catalog.py +2394 -119
pixeltable/catalog/column.py +225 -104
pixeltable/catalog/dir.py +38 -9
pixeltable/catalog/globals.py +53 -34
pixeltable/catalog/insertable_table.py +265 -115
pixeltable/catalog/path.py +80 -17
pixeltable/catalog/schema_object.py +28 -43
pixeltable/catalog/table.py +1270 -677
pixeltable/catalog/table_metadata.py +103 -0
pixeltable/catalog/table_version.py +1270 -751
pixeltable/catalog/table_version_handle.py +109 -0
pixeltable/catalog/table_version_path.py +137 -42
pixeltable/catalog/tbl_ops.py +53 -0
pixeltable/catalog/update_status.py +191 -0
pixeltable/catalog/view.py +251 -134
pixeltable/config.py +215 -0
pixeltable/env.py +736 -285
pixeltable/exceptions.py +26 -2
pixeltable/exec/__init__.py +7 -2
pixeltable/exec/aggregation_node.py +39 -21
pixeltable/exec/cache_prefetch_node.py +87 -109
pixeltable/exec/cell_materialization_node.py +268 -0
pixeltable/exec/cell_reconstruction_node.py +168 -0
pixeltable/exec/component_iteration_node.py +25 -28
pixeltable/exec/data_row_batch.py +11 -46
pixeltable/exec/exec_context.py +26 -11
pixeltable/exec/exec_node.py +35 -27
pixeltable/exec/expr_eval/__init__.py +3 -0
pixeltable/exec/expr_eval/evaluators.py +365 -0
pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
pixeltable/exec/expr_eval/globals.py +200 -0
pixeltable/exec/expr_eval/row_buffer.py +74 -0
pixeltable/exec/expr_eval/schedulers.py +413 -0
pixeltable/exec/globals.py +35 -0
pixeltable/exec/in_memory_data_node.py +35 -27
pixeltable/exec/object_store_save_node.py +293 -0
pixeltable/exec/row_update_node.py +44 -29
pixeltable/exec/sql_node.py +414 -115
pixeltable/exprs/__init__.py +8 -5
pixeltable/exprs/arithmetic_expr.py +79 -45
pixeltable/exprs/array_slice.py +5 -5
pixeltable/exprs/column_property_ref.py +40 -26
pixeltable/exprs/column_ref.py +254 -61
pixeltable/exprs/comparison.py +14 -9
pixeltable/exprs/compound_predicate.py +9 -10
pixeltable/exprs/data_row.py +213 -72
pixeltable/exprs/expr.py +270 -104
pixeltable/exprs/expr_dict.py +6 -5
pixeltable/exprs/expr_set.py +20 -11
pixeltable/exprs/function_call.py +383 -284
pixeltable/exprs/globals.py +18 -5
pixeltable/exprs/in_predicate.py +7 -7
pixeltable/exprs/inline_expr.py +37 -37
pixeltable/exprs/is_null.py +8 -4
pixeltable/exprs/json_mapper.py +120 -54
pixeltable/exprs/json_path.py +90 -60
pixeltable/exprs/literal.py +61 -16
pixeltable/exprs/method_ref.py +7 -6
pixeltable/exprs/object_ref.py +19 -8
pixeltable/exprs/row_builder.py +238 -75
pixeltable/exprs/rowid_ref.py +53 -15
pixeltable/exprs/similarity_expr.py +65 -50
pixeltable/exprs/sql_element_cache.py +5 -5
pixeltable/exprs/string_op.py +107 -0
pixeltable/exprs/type_cast.py +25 -13
pixeltable/exprs/variable.py +2 -2
pixeltable/func/__init__.py +9 -5
pixeltable/func/aggregate_function.py +197 -92
pixeltable/func/callable_function.py +119 -35
pixeltable/func/expr_template_function.py +101 -48
pixeltable/func/function.py +375 -62
pixeltable/func/function_registry.py +20 -19
pixeltable/func/globals.py +6 -5
pixeltable/func/mcp.py +74 -0
pixeltable/func/query_template_function.py +151 -35
pixeltable/func/signature.py +178 -49
pixeltable/func/tools.py +164 -0
pixeltable/func/udf.py +176 -53
pixeltable/functions/__init__.py +44 -4
pixeltable/functions/anthropic.py +226 -47
pixeltable/functions/audio.py +148 -11
pixeltable/functions/bedrock.py +137 -0
pixeltable/functions/date.py +188 -0
pixeltable/functions/deepseek.py +113 -0
pixeltable/functions/document.py +81 -0
pixeltable/functions/fal.py +76 -0
pixeltable/functions/fireworks.py +72 -20
pixeltable/functions/gemini.py +249 -0
pixeltable/functions/globals.py +208 -53
pixeltable/functions/groq.py +108 -0
pixeltable/functions/huggingface.py +1088 -95
pixeltable/functions/image.py +155 -84
pixeltable/functions/json.py +8 -11
pixeltable/functions/llama_cpp.py +31 -19
pixeltable/functions/math.py +169 -0
pixeltable/functions/mistralai.py +50 -75
pixeltable/functions/net.py +70 -0
pixeltable/functions/ollama.py +29 -36
pixeltable/functions/openai.py +548 -160
pixeltable/functions/openrouter.py +143 -0
pixeltable/functions/replicate.py +15 -14
pixeltable/functions/reve.py +250 -0
pixeltable/functions/string.py +310 -85
pixeltable/functions/timestamp.py +37 -19
pixeltable/functions/together.py +77 -120
pixeltable/functions/twelvelabs.py +188 -0
pixeltable/functions/util.py +7 -2
pixeltable/functions/uuid.py +30 -0
pixeltable/functions/video.py +1528 -117
pixeltable/functions/vision.py +26 -26
pixeltable/functions/voyageai.py +289 -0
pixeltable/functions/whisper.py +19 -10
pixeltable/functions/whisperx.py +179 -0
pixeltable/functions/yolox.py +112 -0
pixeltable/globals.py +716 -236
pixeltable/index/__init__.py +3 -1
pixeltable/index/base.py +17 -21
pixeltable/index/btree.py +32 -22
pixeltable/index/embedding_index.py +155 -92
pixeltable/io/__init__.py +12 -7
pixeltable/io/datarows.py +140 -0
pixeltable/io/external_store.py +83 -125
pixeltable/io/fiftyone.py +24 -33
pixeltable/io/globals.py +47 -182
pixeltable/io/hf_datasets.py +96 -127
pixeltable/io/label_studio.py +171 -156
pixeltable/io/lancedb.py +3 -0
pixeltable/io/pandas.py +136 -115
pixeltable/io/parquet.py +40 -153
pixeltable/io/table_data_conduit.py +702 -0
pixeltable/io/utils.py +100 -0
pixeltable/iterators/__init__.py +8 -4
pixeltable/iterators/audio.py +207 -0
pixeltable/iterators/base.py +9 -3
pixeltable/iterators/document.py +144 -87
pixeltable/iterators/image.py +17 -38
pixeltable/iterators/string.py +15 -12
pixeltable/iterators/video.py +523 -127
pixeltable/metadata/__init__.py +33 -8
pixeltable/metadata/converters/convert_10.py +2 -3
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_15.py +15 -11
pixeltable/metadata/converters/convert_16.py +4 -5
pixeltable/metadata/converters/convert_17.py +4 -5
pixeltable/metadata/converters/convert_18.py +4 -6
pixeltable/metadata/converters/convert_19.py +6 -9
pixeltable/metadata/converters/convert_20.py +3 -6
pixeltable/metadata/converters/convert_21.py +6 -8
pixeltable/metadata/converters/convert_22.py +3 -2
pixeltable/metadata/converters/convert_23.py +33 -0
pixeltable/metadata/converters/convert_24.py +55 -0
pixeltable/metadata/converters/convert_25.py +19 -0
pixeltable/metadata/converters/convert_26.py +23 -0
pixeltable/metadata/converters/convert_27.py +29 -0
pixeltable/metadata/converters/convert_28.py +13 -0
pixeltable/metadata/converters/convert_29.py +110 -0
pixeltable/metadata/converters/convert_30.py +63 -0
pixeltable/metadata/converters/convert_31.py +11 -0
pixeltable/metadata/converters/convert_32.py +15 -0
pixeltable/metadata/converters/convert_33.py +17 -0
pixeltable/metadata/converters/convert_34.py +21 -0
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/convert_37.py +15 -0
pixeltable/metadata/converters/convert_38.py +39 -0
pixeltable/metadata/converters/convert_39.py +124 -0
pixeltable/metadata/converters/convert_40.py +73 -0
pixeltable/metadata/converters/convert_41.py +12 -0
pixeltable/metadata/converters/convert_42.py +9 -0
pixeltable/metadata/converters/convert_43.py +44 -0
pixeltable/metadata/converters/util.py +44 -18
pixeltable/metadata/notes.py +21 -0
pixeltable/metadata/schema.py +185 -42
pixeltable/metadata/utils.py +74 -0
pixeltable/mypy/__init__.py +3 -0
pixeltable/mypy/mypy_plugin.py +123 -0
pixeltable/plan.py +616 -225
pixeltable/share/__init__.py +3 -0
pixeltable/share/packager.py +797 -0
pixeltable/share/protocol/__init__.py +33 -0
pixeltable/share/protocol/common.py +165 -0
pixeltable/share/protocol/operation_types.py +33 -0
pixeltable/share/protocol/replica.py +119 -0
pixeltable/share/publish.py +349 -0
pixeltable/store.py +398 -232
pixeltable/type_system.py +730 -267
pixeltable/utils/__init__.py +40 -0
pixeltable/utils/arrow.py +201 -29
pixeltable/utils/av.py +298 -0
pixeltable/utils/azure_store.py +346 -0
pixeltable/utils/coco.py +26 -27
pixeltable/utils/code.py +4 -4
pixeltable/utils/console_output.py +46 -0
pixeltable/utils/coroutine.py +24 -0
pixeltable/utils/dbms.py +92 -0
pixeltable/utils/description_helper.py +11 -12
pixeltable/utils/documents.py +60 -61
pixeltable/utils/exception_handler.py +36 -0
pixeltable/utils/filecache.py +38 -22
pixeltable/utils/formatter.py +88 -51
pixeltable/utils/gcs_store.py +295 -0
pixeltable/utils/http.py +133 -0
pixeltable/utils/http_server.py +14 -13
pixeltable/utils/iceberg.py +13 -0
pixeltable/utils/image.py +17 -0
pixeltable/utils/lancedb.py +90 -0
pixeltable/utils/local_store.py +322 -0
pixeltable/utils/misc.py +5 -0
pixeltable/utils/object_stores.py +573 -0
pixeltable/utils/pydantic.py +60 -0
pixeltable/utils/pytorch.py +20 -20
pixeltable/utils/s3_store.py +527 -0
pixeltable/utils/sql.py +32 -5
pixeltable/utils/system.py +30 -0
pixeltable/utils/transactional_directory.py +4 -3
pixeltable-0.5.7.dist-info/METADATA +579 -0
pixeltable-0.5.7.dist-info/RECORD +227 -0
{pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
pixeltable/__version__.py +0 -3
pixeltable/catalog/named_function.py +0 -36
pixeltable/catalog/path_dict.py +0 -141
pixeltable/dataframe.py +0 -894
pixeltable/exec/expr_eval_node.py +0 -232
pixeltable/ext/__init__.py +0 -14
pixeltable/ext/functions/__init__.py +0 -8
pixeltable/ext/functions/whisperx.py +0 -77
pixeltable/ext/functions/yolox.py +0 -157
pixeltable/tool/create_test_db_dump.py +0 -311
pixeltable/tool/create_test_video.py +0 -81
pixeltable/tool/doc_plugins/griffe.py +0 -50
pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
pixeltable/tool/embed_udf.py +0 -9
pixeltable/tool/mypy_plugin.py +0 -55
pixeltable/utils/media_store.py +0 -76
pixeltable/utils/s3.py +0 -16
pixeltable-0.2.26.dist-info/METADATA +0 -400
pixeltable-0.2.26.dist-info/RECORD +0 -156
pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
{pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0

pixeltable/functions/string.py CHANGED Viewed

@@ -1,21 +1,25 @@
 """
-Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `StringType`.
+Pixeltable UDFs for `StringType`.
 It closely follows the Pandas `pandas.Series.str` API.
 Example:
 ```python
 import pixeltable as pxt
-from pixeltable.functions import string as pxt_str
 t = pxt.get_table(...)
-t.select(pxt_str.capitalize(t.str_col)).collect()
+t.select(t.str_col.capitalize()).collect()
 ```
 """
-from typing import Any, Optional
+import builtins
+import re
+import textwrap
+from string import whitespace
+from typing import Any
+import sqlalchemy as sql
 import pixeltable as pxt
-import pixeltable.exceptions as excs
 from pixeltable.utils.code import local_public_names
@@ -28,6 +32,12 @@ def capitalize(self: str) -> str:
     """
     return self.capitalize()
+@capitalize.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
 @pxt.udf(is_method=True)
 def casefold(self: str) -> str:
     """
@@ -37,6 +47,7 @@ def casefold(self: str) -> str:
     """
     return self.casefold()
 @pxt.udf(is_method=True)
 def center(self: str, width: int, fillchar: str = ' ') -> str:
     """
@@ -50,27 +61,48 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
     """
     return self.center(width, fillchar)
 @pxt.udf(is_method=True)
-def contains(self: str, pattern: str, case: bool = True, flags: int = 0, regex: bool = True) -> bool:
+def contains(self: str, substr: str, case: bool = True) -> bool:
     """
-    Test if string contains pattern or regex.
+    Test if string contains a substring.
     Args:
-        pattern: string literal or regular expression
+        substr: string literal or regular expression
         case: if False, ignore case
-        flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
-        regex: if True, treat pattern as a regular expression
     """
-    if regex:
-        import re
-        if not case:
-            flags |= re.IGNORECASE
-        return bool(re.search(pattern, self, flags))
+    if case:
+        return substr in self
     else:
-        if case:
-            return pattern in self
-        else:
-            return pattern.lower() in self.lower()
+        return substr.lower() in self.lower()
+@contains.to_sql
+def _(self: sql.ColumnElement, substr: sql.ColumnElement, case: sql.ColumnElement | None = None) -> sql.ColumnElement:
+    # Replace all occurrences of `%`, `_`, and `\` with escaped versions
+    escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
+    if case is None:
+        # Default `case` is True, so we do a case-sensitive comparison
+        return self.like(sql.func.concat('%', escaped_substr, '%'))
+    else:
+        # Toggle case-sensitivity based on the value of `case`
+        return sql.case(
+            (case, self.like(sql.func.concat('%', escaped_substr, '%'))),
+            else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
+        )
+@pxt.udf(is_method=True)
+def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
+    """
+    Test if string contains a regular expression pattern.
+    Args:
+        pattern: regular expression pattern
+        flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
+    """
+    return bool(re.search(pattern, self, flags))
 @pxt.udf(is_method=True)
 def count(self: str, pattern: str, flags: int = 0) -> int:
@@ -81,21 +113,28 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
         pattern: string literal or regular expression
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
-    from builtins import len
-    return len(re.findall(pattern, self, flags))
+    return builtins.len(re.findall(pattern, self, flags))
 @pxt.udf(is_method=True)
-def endswith(self: str, pattern: str) -> bool:
+def endswith(self: str, substr: str) -> bool:
     """
     Return `True` if the string ends with the specified suffix, otherwise return `False`.
     Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
     Args:
-        pattern: string literal
+        substr: string literal
     """
-    return self.endswith(pattern)
+    return self.endswith(substr)
+@endswith.to_sql
+def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
+    # Replace all occurrences of `%`, `_`, and `\` with escaped versions
+    escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
+    return self.like(sql.func.concat('%', escaped_substr))
 @pxt.udf(is_method=True)
 def fill(self: str, width: int, **kwargs: Any) -> str:
@@ -108,11 +147,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
         width: Maximum line width.
         kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
     """
-    import textwrap
     return textwrap.fill(self, width, **kwargs)
 @pxt.udf(is_method=True)
-def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
+def find(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
     """
     Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
@@ -125,6 +164,21 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
     """
     return self.find(substr, start, end)
+@find.to_sql
+def _(
+    self: sql.ColumnElement, substr: sql.ColumnElement, start: sql.ColumnElement, end: sql.ColumnElement | None = None
+) -> sql.ColumnElement:
+    sl = pxt.functions.string.slice._to_sql(self, start, end)
+    if sl is None:
+        return None
+    strpos = sql.func.strpos(sl, substr)
+    return sql.case(
+        (strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
+    )
 @pxt.udf(is_method=True)
 def findall(self: str, pattern: str, flags: int = 0) -> list:
     """
@@ -136,9 +190,9 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
         pattern: regular expression pattern
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     return re.findall(pattern, self, flags)
 @pxt.udf(is_method=True)
 def format(self: str, *args: Any, **kwargs: Any) -> str:
     """
@@ -148,6 +202,7 @@ def format(self: str, *args: Any, **kwargs: Any) -> str:
     """
     return self.format(*args, **kwargs)
 @pxt.udf(is_method=True)
 def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
     """
@@ -160,14 +215,14 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
         case: if False, ignore case
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     if not case:
         flags |= re.IGNORECASE
     _ = bool(re.fullmatch(pattern, self, flags))
     return bool(re.fullmatch(pattern, self, flags))
 @pxt.udf(is_method=True)
-def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
+def index(self: str, substr: str, start: int = 0, end: int | None = None) -> int:
     """
     Return the lowest index in string where `substr` is found within the slice `[start:end]`.
     Raises ValueError if `substr` is not found.
@@ -181,6 +236,7 @@ def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
     """
     return self.index(substr, start, end)
 @pxt.udf(is_method=True)
 def isalnum(self: str) -> bool:
     """
@@ -191,6 +247,7 @@ def isalnum(self: str) -> bool:
     """
     return self.isalnum()
 @pxt.udf(is_method=True)
 def isalpha(self: str) -> bool:
     """
@@ -200,6 +257,7 @@ def isalpha(self: str) -> bool:
     """
     return self.isalpha()
 @pxt.udf(is_method=True)
 def isascii(self: str) -> bool:
     """
@@ -209,6 +267,7 @@ def isascii(self: str) -> bool:
     """
     return self.isascii()
 @pxt.udf(is_method=True)
 def isdecimal(self: str) -> bool:
     """
@@ -219,6 +278,7 @@ def isdecimal(self: str) -> bool:
     """
     return self.isdecimal()
 @pxt.udf(is_method=True)
 def isdigit(self: str) -> bool:
     """
@@ -228,6 +288,7 @@ def isdigit(self: str) -> bool:
     """
     return self.isdigit()
 @pxt.udf(is_method=True)
 def isidentifier(self: str) -> bool:
     """
@@ -241,12 +302,14 @@ def isidentifier(self: str) -> bool:
 @pxt.udf(is_method=True)
 def islower(self: str) -> bool:
     """
-    Return `True` if all cased characters in the string are lowercase and there is at least one cased character, `False` otherwise.
+    Return `True` if all cased characters in the string are lowercase and there is at least one cased character,
+    `False` otherwise.
     Equivalent to [`str.islower()`](https://docs.python.org/3/library/stdtypes.html#str.islower)
     """
     return self.islower()
 @pxt.udf(is_method=True)
 def isnumeric(self: str) -> bool:
     """
@@ -256,15 +319,18 @@ def isnumeric(self: str) -> bool:
     """
     return self.isnumeric()
 @pxt.udf(is_method=True)
 def isupper(self: str) -> bool:
     """
-    Return `True` if all cased characters in the string are uppercase and there is at least one cased character, `False` otherwise.
+    Return `True` if all cased characters in the string are uppercase and there is at least one cased character,
+    `False` otherwise.
     Equivalent to [`str.isupper()`](https://docs.python.org/3/library/stdtypes.html#str.isupper)
     """
     return self.isupper()
 @pxt.udf(is_method=True)
 def istitle(self: str) -> bool:
     """
@@ -274,15 +340,18 @@ def istitle(self: str) -> bool:
     """
     return self.istitle()
 @pxt.udf(is_method=True)
 def isspace(self: str) -> bool:
     """
-    Return `True` if there are only whitespace characters in the string and there is at least one character, `False` otherwise.
+    Return `True` if there are only whitespace characters in the string and there is at least one character,
+    `False` otherwise.
     Equivalent to [`str.isspace()`](https://docs.python.org/3/library/stdtypes.html#str.isspace)
     """
     return self.isspace()
 @pxt.udf
 def join(sep: str, elements: list) -> str:
     """
@@ -292,6 +361,7 @@ def join(sep: str, elements: list) -> str:
     """
     return sep.join(elements)
 @pxt.udf(is_method=True)
 def len(self: str) -> int:
     """
@@ -299,7 +369,13 @@ def len(self: str) -> int:
     Equivalent to [`len(str)`](https://docs.python.org/3/library/functions.html#len)
     """
-    return self.__len__()
+    return builtins.len(self)
+@len.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.char_length(self)
 @pxt.udf(is_method=True)
 def ljust(self: str, width: int, fillchar: str = ' ') -> str:
@@ -309,11 +385,13 @@ def ljust(self: str, width: int, fillchar: str = ' ') -> str:
     Equivalent to [`str.ljust()`](https://docs.python.org/3/library/stdtypes.html#str.ljust)
     Args:
-        width: Minimum width of resulting string; additional characters will be filled with character defined in `fillchar`.
+        width: Minimum width of resulting string; additional characters will be filled with character defined in
+            `fillchar`.
         fillchar: Additional character for filling.
     """
     return self.ljust(width, fillchar)
 @pxt.udf(is_method=True)
 def lower(self: str) -> str:
     """
@@ -323,8 +401,14 @@ def lower(self: str) -> str:
     """
     return self.lower()
+@lower.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.lower(self)
 @pxt.udf(is_method=True)
-def lstrip(self: str, chars: Optional[str] = None) -> str:
+def lstrip(self: str, chars: str | None = None) -> str:
     """
     Return a copy of the string with leading characters removed. The `chars` argument is a string specifying the set of
     characters to be removed. If omitted or `None`, whitespace characters are removed.
@@ -336,6 +420,12 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
     """
     return self.lstrip(chars)
+@lstrip.to_sql
+def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
+    return sql.func.ltrim(self, chars if chars is not None else whitespace)
 @pxt.udf(is_method=True)
 def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
     """
@@ -346,11 +436,11 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
         case: if False, ignore case
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     if not case:
         flags |= re.IGNORECASE
     return bool(re.match(pattern, self, flags))
 @pxt.udf(is_method=True)
 def normalize(self: str, form: str) -> str:
     """
@@ -359,19 +449,22 @@ def normalize(self: str, form: str) -> str:
     Equivalent to [`unicodedata.normalize()`](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize)
     Args:
-        form: Unicode normal form (`‘NFC’`, `‘NFKC’`, `‘NFD’`, `‘NFKD’`)
+        form: Unicode normal form (`'NFC'`, `'NFKC'`, `'NFD'`, `'NFKD'`)
     """
     import unicodedata
     return unicodedata.normalize(form, self)  # type: ignore[arg-type]
 @pxt.udf(is_method=True)
 def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
     """
     Pad string up to width
     Args:
-        width: Minimum width of resulting string; additional characters will be filled with character defined in `fillchar`.
-        side: Side from which to fill resulting string (`‘left’`, `‘right’`, `‘both’`)
+        width: Minimum width of resulting string; additional characters will be filled with character defined in
+            `fillchar`.
+        side: Side from which to fill resulting string (`'left'`, `'right'`, `'both'`)
         fillchar: Additional character for filling
     """
     if side == 'left':
@@ -381,7 +474,8 @@ def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
     elif side == 'both':
         return self.center(width, fillchar)
     else:
-        raise ValueError(f"Invalid side: {side}")
+        raise ValueError(f'Invalid side: {side}')
 @pxt.udf(is_method=True)
 def partition(self: str, sep: str = ' ') -> list:
@@ -393,30 +487,34 @@ def partition(self: str, sep: str = ' ') -> list:
     idx = self.find(sep)
     if idx == -1:
         return [self, '', '']
-    from builtins import len
-    return [self[:idx], sep, self[idx + len(sep):]]
+    return [self[:idx], sep, self[idx + builtins.len(sep) :]]
 @pxt.udf(is_method=True)
 def removeprefix(self: str, prefix: str) -> str:
     """
     Remove prefix. If the prefix is not present, returns string.
     """
-    if self.startswith(prefix):
-        # we need to avoid referring to our symbol 'len'
-        from builtins import len
-        return self[len(prefix):]
-    return self
+    return self.removeprefix(prefix)
+@removeprefix.to_sql
+def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
 @pxt.udf(is_method=True)
 def removesuffix(self: str, suffix: str) -> str:
     """
     Remove suffix. If the suffix is not present, returns string.
     """
-    if self.endswith(suffix):
-        # we need to avoid referring to our symbol 'len'
-        from builtins import len
-        return self[:-len(suffix)]
-    return self
+    return self.removesuffix(suffix)
+@removesuffix.to_sql
+def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
 @pxt.udf(is_method=True)
 def repeat(self: str, n: int) -> str:
@@ -425,34 +523,70 @@ def repeat(self: str, n: int) -> str:
     """
     return self * n
+@repeat.to_sql
+def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.repeat(self, n.cast(sql.types.INT))
 @pxt.udf(is_method=True)
-def replace(
-        self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
-) -> str:
+def replace(self: str, substr: str, repl: str, n: int | None = None) -> str:
     """
-    Replace occurrences of `pattern` with `repl`.
+    Replace occurrences of `substr` with `repl`.
-    Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace) or
-    [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
+    Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
     Args:
-        pattern: string literal or regular expression
+        substr: string literal
         repl: replacement string
-        n: number of replacements to make (-1 for all)
-        case: if False, ignore case
+        n: number of replacements to make (if `None`, replace all occurrences)
+    """
+    return self.replace(substr, repl, n or -1)
+@replace.to_sql
+def _(
+    self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: sql.ColumnElement | None = None
+) -> sql.ColumnElement:
+    if n is not None:
+        return None  # SQL does not support bounding the number of replacements
+    return sql.func.replace(self, substr, repl)
+@pxt.udf(is_method=True)
+def replace_re(self: str, pattern: str, repl: str, n: int | None = None, flags: int = 0) -> str:
+    """
+    Replace occurrences of a regular expression pattern with `repl`.
+    Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
+    Args:
+        pattern: regular expression pattern
+        repl: replacement string
+        n: number of replacements to make (if `None`, replace all occurrences)
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
-        regex: if True, treat pattern as a regular expression
     """
-    if regex:
-        import re
-        if not case:
-            flags |= re.IGNORECASE
-        return re.sub(pattern, repl, self, 0 if n == -1 else n, flags)
-    else:
-        return self.replace(pattern, repl, n)
+    return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
+@pxt.udf(is_method=True)
+def reverse(self: str) -> str:
+    """
+    Return a reversed copy of the string.
+    Equivalent to `str[::-1]`.
+    """
+    return self[::-1]
+@reverse.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.reverse(self)
 @pxt.udf(is_method=True)
-def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
+def rfind(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
     """
     Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
@@ -465,8 +599,9 @@ def rfind(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
     """
     return self.rfind(substr, start, end)
 @pxt.udf(is_method=True)
-def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
+def rindex(self: str, substr: str, start: int | None = 0, end: int | None = None) -> int:
     """
     Return the highest index where `substr` is found, such that `substr` is contained within `[start:end]`.
     Raises ValueError if `substr` is not found.
@@ -475,6 +610,7 @@ def rindex(self: str, substr: str, start: Optional[int] = 0, end: Optional[int]
     """
     return self.rindex(substr, start, end)
 @pxt.udf(is_method=True)
 def rjust(self: str, width: int, fillchar: str = ' ') -> str:
     """
@@ -488,6 +624,7 @@ def rjust(self: str, width: int, fillchar: str = ' ') -> str:
     """
     return self.rjust(width, fillchar)
 @pxt.udf(is_method=True)
 def rpartition(self: str, sep: str = ' ') -> list:
     """
@@ -497,11 +634,11 @@ def rpartition(self: str, sep: str = ' ') -> list:
     idx = self.rfind(sep)
     if idx == -1:
         return [self, '', '']
-    from builtins import len
-    return [self[:idx], sep, self[idx + len(sep):]]
+    return [self[:idx], sep, self[idx + builtins.len(sep) :]]
 @pxt.udf(is_method=True)
-def rstrip(self: str, chars: Optional[str] = None) -> str:
+def rstrip(self: str, chars: str | None = None) -> str:
     """
     Return a copy of string with trailing characters removed.
@@ -512,8 +649,14 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
     """
     return self.rstrip(chars)
+@rstrip.to_sql
+def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
+    return sql.func.rtrim(self, chars if chars is not None else whitespace)
 @pxt.udf(is_method=True)
-def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
+def slice(self: str, start: int | None = None, stop: int | None = None, step: int | None = None) -> str:
     """
     Return a slice.
@@ -524,8 +667,44 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
     """
     return self[start:stop:step]
-@pxt.udf(is_method=True)
-def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None) -> str:
+@slice.to_sql
+def _(
+    self: sql.ColumnElement,
+    start: sql.ColumnElement | None = None,
+    stop: sql.ColumnElement | None = None,
+    step: sql.ColumnElement | None = None,
+) -> sql.ColumnElement:
+    if step is not None:
+        return None
+    if start is not None:
+        start = start.cast(sql.types.INT)  # Postgres won't accept a BIGINT
+        start = sql.case(
+            (start >= 0, start + 1),  # SQL is 1-based, Python is 0-based
+            else_=sql.func.char_length(self) + start + 1,  # negative index
+        )
+        start = sql.func.greatest(start, 1)
+    if stop is not None:
+        stop = stop.cast(sql.types.INT)  # Postgres won't accept a BIGINT
+        stop = sql.case(
+            (stop >= 0, stop + 1),  # SQL is 1-based, Python is 0-based
+            else_=sql.func.char_length(self) + stop + 1,  # negative index
+        )
+        stop = sql.func.greatest(stop, 0)
+    if start is None:
+        if stop is None:
+            return self
+        return sql.func.substr(self, 1, stop)
+    if stop is None:
+        return sql.func.substr(self, start)
+    return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
+@pxt.udf(is_method=True)
+def slice_replace(self: str, start: int | None = None, stop: int | None = None, repl: str | None = None) -> str:
     """
     Replace a positional slice with another value.
@@ -536,20 +715,29 @@ def slice_replace(self: str, start: Optional[int] = None, stop: Optional[int] =
     """
     return self[:start] + repl + self[stop:]
 @pxt.udf(is_method=True)
-def startswith(self: str, pattern: str) -> int:
+def startswith(self: str, substr: str) -> int:
     """
-    Return `True` if string starts with `pattern`, otherwise return `False`.
+    Return `True` if string starts with `substr`, otherwise return `False`.
     Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
     Args:
-        pattern: string literal
+        substr: string literal
     """
-    return self.startswith(pattern)
+    return self.startswith(substr)
+@startswith.to_sql
+def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
+    # Replace all occurrences of `%`, `_`, and `\` with escaped versions
+    escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
+    return self.like(sql.func.concat(escaped_substr, '%'))
 @pxt.udf(is_method=True)
-def strip(self: str, chars: Optional[str] = None) -> str:
+def strip(self: str, chars: str | None = None) -> str:
     """
     Return a copy of string with leading and trailing characters removed.
@@ -560,6 +748,12 @@ def strip(self: str, chars: Optional[str] = None) -> str:
     """
     return self.strip(chars)
+@strip.to_sql
+def _(self: sql.ColumnElement, chars: sql.ColumnElement | None = None) -> sql.ColumnElement:
+    return sql.func.trim(self, chars if chars is not None else whitespace)
 @pxt.udf(is_method=True)
 def swapcase(self: str) -> str:
     """
@@ -569,6 +763,7 @@ def swapcase(self: str) -> str:
     """
     return self.swapcase()
 @pxt.udf(is_method=True)
 def title(self: str) -> str:
     """
@@ -579,6 +774,7 @@ def title(self: str) -> str:
     """
     return self.title()
 @pxt.udf(is_method=True)
 def upper(self: str) -> str:
     """
@@ -588,6 +784,12 @@ def upper(self: str) -> str:
     """
     return self.upper()
+@upper.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.upper(self)
 @pxt.udf(is_method=True)
 def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
     """
@@ -600,9 +802,9 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
         width: Maximum line width.
         kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
     """
-    import textwrap
     return textwrap.wrap(self, width, **kwargs)
 @pxt.udf(is_method=True)
 def zfill(self: str, width: int) -> str:
     """
@@ -616,8 +818,31 @@ def zfill(self: str, width: int) -> str:
     return self.zfill(width)
+def string_splitter(text: Any, separators: str) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
+    """Iterator over chunks of a string. The string is chunked according to the specified `separators`.
+    The iterator yields a `text` field containing the text of the chunk.
+    Chunked text will be cleaned with `ftfy.fix_text` to fix up common problems with unicode sequences.
+    Args:
+        separators: separators to use to chunk the document. Currently the only supported option is `'sentence'`.
+    Examples:
+        This example assumes an existing table `tbl` with a column `text` of type `pxt.String`.
+        Create a view that splits all strings on sentence boundaries:
+        >>> pxt.create_view(
+        ...     'sentence_chunks',
+        ...     tbl,
+        ...     iterator=string_splitter(tbl.text, separators='sentence')
+        ... )
+    """
+    return pxt.iterators.string.StringSplitter._create(text=text, separators=separators)
 __all__ = local_public_names(__name__)
-def __dir__():
+def __dir__() -> list[str]:
     return __all__

pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

pixeltable 0.2.26py3-none-any.whl → 0.5.7py3-none-any.whl