PyPI - pixeltable - Versions diffs - 0.3.15__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl - Mend

pixeltable 0.3.15py3-none-any.whl → 0.4.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (58) hide show

pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +296 -105
pixeltable/catalog/column.py +10 -8
pixeltable/catalog/dir.py +1 -2
pixeltable/catalog/insertable_table.py +25 -20
pixeltable/catalog/schema_object.py +3 -6
pixeltable/catalog/table.py +261 -189
pixeltable/catalog/table_version.py +333 -202
pixeltable/catalog/table_version_handle.py +15 -2
pixeltable/catalog/table_version_path.py +60 -14
pixeltable/catalog/view.py +38 -6
pixeltable/dataframe.py +196 -18
pixeltable/env.py +4 -4
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/expr_eval/evaluators.py +4 -1
pixeltable/exec/in_memory_data_node.py +1 -1
pixeltable/exec/sql_node.py +171 -22
pixeltable/exprs/column_property_ref.py +15 -6
pixeltable/exprs/column_ref.py +32 -11
pixeltable/exprs/comparison.py +1 -1
pixeltable/exprs/data_row.py +5 -3
pixeltable/exprs/expr.py +7 -0
pixeltable/exprs/literal.py +2 -0
pixeltable/exprs/row_builder.py +4 -6
pixeltable/exprs/rowid_ref.py +8 -0
pixeltable/exprs/similarity_expr.py +1 -0
pixeltable/func/query_template_function.py +1 -1
pixeltable/func/tools.py +1 -1
pixeltable/functions/gemini.py +0 -1
pixeltable/functions/string.py +212 -58
pixeltable/globals.py +12 -4
pixeltable/index/base.py +5 -0
pixeltable/index/btree.py +5 -0
pixeltable/index/embedding_index.py +5 -0
pixeltable/io/external_store.py +8 -29
pixeltable/io/label_studio.py +1 -1
pixeltable/io/parquet.py +2 -2
pixeltable/io/table_data_conduit.py +0 -31
pixeltable/metadata/__init__.py +11 -2
pixeltable/metadata/converters/convert_13.py +2 -2
pixeltable/metadata/converters/convert_30.py +6 -11
pixeltable/metadata/converters/convert_35.py +9 -0
pixeltable/metadata/converters/convert_36.py +38 -0
pixeltable/metadata/converters/util.py +3 -9
pixeltable/metadata/notes.py +2 -0
pixeltable/metadata/schema.py +8 -1
pixeltable/plan.py +221 -14
pixeltable/share/packager.py +137 -13
pixeltable/share/publish.py +2 -2
pixeltable/store.py +19 -13
pixeltable/utils/dbms.py +1 -1
pixeltable/utils/formatter.py +64 -42
pixeltable/utils/sample.py +25 -0
{pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/METADATA +2 -1
{pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/RECORD +58 -55
{pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/LICENSE +0 -0
{pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/WHEEL +0 -0
{pixeltable-0.3.15.dist-info → pixeltable-0.4.0rc2.dist-info}/entry_points.txt +0 -0

pixeltable/functions/string.py CHANGED Viewed

@@ -12,8 +12,13 @@ t.select(t.str_col.capitalize()).collect()
 """
 import builtins
+import re
+import textwrap
+from string import whitespace
 from typing import Any, Optional
+import sqlalchemy as sql
 import pixeltable as pxt
 from pixeltable.utils.code import local_public_names
@@ -28,6 +33,11 @@ def capitalize(self: str) -> str:
     return self.capitalize()
+@capitalize.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.concat(sql.func.upper(sql.func.left(self, 1)), sql.func.lower(sql.func.right(self, -1)))
 @pxt.udf(is_method=True)
 def casefold(self: str) -> str:
     """
@@ -53,26 +63,47 @@ def center(self: str, width: int, fillchar: str = ' ') -> str:
 @pxt.udf(is_method=True)
-def contains(self: str, pattern: str, case: bool = True, flags: int = 0, regex: bool = True) -> bool:
+def contains(self: str, substr: str, case: bool = True) -> bool:
     """
-    Test if string contains pattern or regex.
+    Test if string contains a substring.
     Args:
-        pattern: string literal or regular expression
+        substr: string literal or regular expression
         case: if False, ignore case
-        flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
-        regex: if True, treat pattern as a regular expression
     """
-    if regex:
-        import re
-        if not case:
-            flags |= re.IGNORECASE
-        return bool(re.search(pattern, self, flags))
-    elif case:
-        return pattern in self
+    if case:
+        return substr in self
+    else:
+        return substr.lower() in self.lower()
+@contains.to_sql
+def _(
+    self: sql.ColumnElement, substr: sql.ColumnElement, case: Optional[sql.ColumnElement] = None
+) -> sql.ColumnElement:
+    # Replace all occurrences of `%`, `_`, and `\` with escaped versions
+    escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
+    if case is None:
+        # Default `case` is True, so we do a case-sensitive comparison
+        return self.like(sql.func.concat('%', escaped_substr, '%'))
     else:
-        return pattern.lower() in self.lower()
+        # Toggle case-sensitivity based on the value of `case`
+        return sql.case(
+            (case, self.like(sql.func.concat('%', escaped_substr, '%'))),
+            else_=sql.func.lower(self).like(sql.func.concat('%', sql.func.lower(escaped_substr), '%')),
+        )
+@pxt.udf(is_method=True)
+def contains_re(self: str, pattern: str, flags: int = 0) -> bool:
+    """
+    Test if string contains a regular expression pattern.
+    Args:
+        pattern: regular expression pattern
+        flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
+    """
+    return bool(re.search(pattern, self, flags))
 @pxt.udf(is_method=True)
@@ -84,22 +115,27 @@ def count(self: str, pattern: str, flags: int = 0) -> int:
         pattern: string literal or regular expression
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     return builtins.len(re.findall(pattern, self, flags))
 @pxt.udf(is_method=True)
-def endswith(self: str, pattern: str) -> bool:
+def endswith(self: str, substr: str) -> bool:
     """
     Return `True` if the string ends with the specified suffix, otherwise return `False`.
     Equivalent to [`str.endswith()`](https://docs.python.org/3/library/stdtypes.html#str.endswith).
     Args:
-        pattern: string literal
+        substr: string literal
     """
-    return self.endswith(pattern)
+    return self.endswith(substr)
+@endswith.to_sql
+def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
+    # Replace all occurrences of `%`, `_`, and `\` with escaped versions
+    escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
+    return self.like(sql.func.concat('%', escaped_substr))
 @pxt.udf(is_method=True)
@@ -113,13 +149,11 @@ def fill(self: str, width: int, **kwargs: Any) -> str:
         width: Maximum line width.
         kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
     """
-    import textwrap
     return textwrap.fill(self, width, **kwargs)
 @pxt.udf(is_method=True)
-def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
+def find(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
     """
     Return the lowest index in string where `substr` is found within the slice `s[start:end]`.
@@ -133,6 +167,23 @@ def find(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] =
     return self.find(substr, start, end)
+@find.to_sql
+def _(
+    self: sql.ColumnElement,
+    substr: sql.ColumnElement,
+    start: sql.ColumnElement,
+    end: Optional[sql.ColumnElement] = None,
+) -> sql.ColumnElement:
+    sl = pxt.functions.string.slice._to_sql(self, start, end)
+    if sl is None:
+        return None
+    strpos = sql.func.strpos(sl, substr)
+    return sql.case(
+        (strpos == 0, -1), (start >= 0, strpos + start - 1), else_=strpos + sql.func.char_length(self) + start - 1
+    )
 @pxt.udf(is_method=True)
 def findall(self: str, pattern: str, flags: int = 0) -> list:
     """
@@ -144,8 +195,6 @@ def findall(self: str, pattern: str, flags: int = 0) -> list:
         pattern: regular expression pattern
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     return re.findall(pattern, self, flags)
@@ -171,8 +220,6 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
         case: if False, ignore case
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     if not case:
         flags |= re.IGNORECASE
     _ = bool(re.fullmatch(pattern, self, flags))
@@ -180,7 +227,7 @@ def fullmatch(self: str, pattern: str, case: bool = True, flags: int = 0) -> boo
 @pxt.udf(is_method=True)
-def index(self: str, substr: str, start: Optional[int] = 0, end: Optional[int] = None) -> int:
+def index(self: str, substr: str, start: int = 0, end: Optional[int] = None) -> int:
     """
     Return the lowest index in string where `substr` is found within the slice `[start:end]`.
     Raises ValueError if `substr` is not found.
@@ -330,6 +377,11 @@ def len(self: str) -> int:
     return builtins.len(self)
+@len.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.char_length(self)
 @pxt.udf(is_method=True)
 def ljust(self: str, width: int, fillchar: str = ' ') -> str:
     """
@@ -355,6 +407,11 @@ def lower(self: str) -> str:
     return self.lower()
+@lower.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.lower(self)
 @pxt.udf(is_method=True)
 def lstrip(self: str, chars: Optional[str] = None) -> str:
     """
@@ -369,6 +426,11 @@ def lstrip(self: str, chars: Optional[str] = None) -> str:
     return self.lstrip(chars)
+@lstrip.to_sql
+def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
+    return sql.func.ltrim(self, chars if chars is not None else whitespace)
 @pxt.udf(is_method=True)
 def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
     """
@@ -379,8 +441,6 @@ def match(self: str, pattern: str, case: bool = True, flags: int = 0) -> bool:
         case: if False, ignore case
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
     """
-    import re
     if not case:
         flags |= re.IGNORECASE
     return bool(re.match(pattern, self, flags))
@@ -440,9 +500,12 @@ def removeprefix(self: str, prefix: str) -> str:
     """
     Remove prefix. If the prefix is not present, returns string.
     """
-    if self.startswith(prefix):
-        return self[builtins.len(prefix) :]
-    return self
+    return self.removeprefix(prefix)
+@removeprefix.to_sql
+def _(self: sql.ColumnElement, prefix: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.case((startswith._to_sql(self, prefix), sql.func.right(self, -sql.func.char_length(prefix))), else_=self)
 @pxt.udf(is_method=True)
@@ -450,9 +513,12 @@ def removesuffix(self: str, suffix: str) -> str:
     """
     Remove suffix. If the suffix is not present, returns string.
     """
-    if self.endswith(suffix):
-        return self[: -builtins.len(suffix)]
-    return self
+    return self.removesuffix(suffix)
+@removesuffix.to_sql
+def _(self: sql.ColumnElement, suffix: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.case((endswith._to_sql(self, suffix), sql.func.left(self, -sql.func.char_length(suffix))), else_=self)
 @pxt.udf(is_method=True)
@@ -463,32 +529,65 @@ def repeat(self: str, n: int) -> str:
     return self * n
+@repeat.to_sql
+def _(self: sql.ColumnElement, n: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.repeat(self, n.cast(sql.types.INT))
 @pxt.udf(is_method=True)
-def replace(
-    self: str, pattern: str, repl: str, n: int = -1, case: bool = True, flags: int = 0, regex: bool = False
-) -> str:
+def replace(self: str, substr: str, repl: str, n: Optional[int] = None) -> str:
     """
-    Replace occurrences of `pattern` with `repl`.
+    Replace occurrences of `substr` with `repl`.
-    Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace) or
-    [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub), depending on the value of regex.
+    Equivalent to [`str.replace()`](https://docs.python.org/3/library/stdtypes.html#str.replace).
     Args:
-        pattern: string literal or regular expression
+        substr: string literal
         repl: replacement string
-        n: number of replacements to make (-1 for all)
-        case: if False, ignore case
+        n: number of replacements to make (if `None`, replace all occurrences)
+    """
+    return self.replace(substr, repl, n or -1)
+@replace.to_sql
+def _(
+    self: sql.ColumnElement, substr: sql.ColumnElement, repl: sql.ColumnElement, n: Optional[sql.ColumnElement] = None
+) -> sql.ColumnElement:
+    if n is not None:
+        return None  # SQL does not support bounding the number of replacements
+    return sql.func.replace(self, substr, repl)
+@pxt.udf(is_method=True)
+def replace_re(self: str, pattern: str, repl: str, n: Optional[int] = None, flags: int = 0) -> str:
+    """
+    Replace occurrences of a regular expression pattern with `repl`.
+    Equivalent to [`re.sub()`](https://docs.python.org/3/library/re.html#re.sub).
+    Args:
+        pattern: regular expression pattern
+        repl: replacement string
+        n: number of replacements to make (if `None`, replace all occurrences)
         flags: [flags](https://docs.python.org/3/library/re.html#flags) for the `re` module
-        regex: if True, treat pattern as a regular expression
     """
-    if regex:
-        import re
+    return re.sub(pattern, repl, self, count=(n or 0), flags=flags)
-        if not case:
-            flags |= re.IGNORECASE
-        return re.sub(pattern, repl, self, count=(0 if n == -1 else n), flags=flags)
-    else:
-        return self.replace(pattern, repl, n)
+@pxt.udf(is_method=True)
+def reverse(self: str) -> str:
+    """
+    Return a reversed copy of the string.
+    Equivalent to `str[::-1]`.
+    """
+    return self[::-1]
+@reverse.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.reverse(self)
 @pxt.udf(is_method=True)
@@ -556,6 +655,11 @@ def rstrip(self: str, chars: Optional[str] = None) -> str:
     return self.rstrip(chars)
+@rstrip.to_sql
+def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
+    return sql.func.rtrim(self, chars if chars is not None else whitespace)
 @pxt.udf(is_method=True)
 def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None) -> str:
     """
@@ -569,6 +673,41 @@ def slice(self: str, start: Optional[int] = None, stop: Optional[int] = None, st
     return self[start:stop:step]
+@slice.to_sql
+def _(
+    self: sql.ColumnElement,
+    start: Optional[sql.ColumnElement] = None,
+    stop: Optional[sql.ColumnElement] = None,
+    step: Optional[sql.ColumnElement] = None,
+) -> sql.ColumnElement:
+    if step is not None:
+        return None
+    if start is not None:
+        start = start.cast(sql.types.INT)  # Postgres won't accept a BIGINT
+        start = sql.case(
+            (start >= 0, start + 1),  # SQL is 1-based, Python is 0-based
+            else_=sql.func.char_length(self) + start + 1,  # negative index
+        )
+        start = sql.func.greatest(start, 1)
+    if stop is not None:
+        stop = stop.cast(sql.types.INT)  # Postgres won't accept a BIGINT
+        stop = sql.case(
+            (stop >= 0, stop + 1),  # SQL is 1-based, Python is 0-based
+            else_=sql.func.char_length(self) + stop + 1,  # negative index
+        )
+        stop = sql.func.greatest(stop, 0)
+    if start is None:
+        if stop is None:
+            return self
+        return sql.func.substr(self, 1, stop)
+    if stop is None:
+        return sql.func.substr(self, start)
+    return sql.func.substr(self, start, sql.func.greatest(stop - start, 0))
 @pxt.udf(is_method=True)
 def slice_replace(
     self: str, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
@@ -585,16 +724,23 @@ def slice_replace(
 @pxt.udf(is_method=True)
-def startswith(self: str, pattern: str) -> int:
+def startswith(self: str, substr: str) -> int:
     """
-    Return `True` if string starts with `pattern`, otherwise return `False`.
+    Return `True` if string starts with `substr`, otherwise return `False`.
     Equivalent to [`str.startswith()`](https://docs.python.org/3/library/stdtypes.html#str.startswith).
     Args:
-        pattern: string literal
+        substr: string literal
     """
-    return self.startswith(pattern)
+    return self.startswith(substr)
+@startswith.to_sql
+def _(self: sql.ColumnElement, substr: sql.ColumnElement) -> sql.ColumnElement:
+    # Replace all occurrences of `%`, `_`, and `\` with escaped versions
+    escaped_substr = sql.func.regexp_replace(substr, r'(%|_|\\)', r'\\\1', 'g')
+    return self.like(sql.func.concat(escaped_substr, '%'))
 @pxt.udf(is_method=True)
@@ -610,6 +756,11 @@ def strip(self: str, chars: Optional[str] = None) -> str:
     return self.strip(chars)
+@strip.to_sql
+def _(self: sql.ColumnElement, chars: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
+    return sql.func.trim(self, chars if chars is not None else whitespace)
 @pxt.udf(is_method=True)
 def swapcase(self: str) -> str:
     """
@@ -641,6 +792,11 @@ def upper(self: str) -> str:
     return self.upper()
+@upper.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.upper(self)
 @pxt.udf(is_method=True)
 def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
     """
@@ -653,8 +809,6 @@ def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
         width: Maximum line width.
         kwargs: Additional keyword arguments to pass to `textwrap.fill()`.
     """
-    import textwrap
     return textwrap.wrap(self, width, **kwargs)

pixeltable/globals.py CHANGED Viewed

@@ -249,13 +249,17 @@ def create_view(
     where: Optional[exprs.Expr] = None
     if isinstance(base, catalog.Table):
         tbl_version_path = base._tbl_version_path
+        sample_clause = None
     elif isinstance(base, DataFrame):
         base._validate_mutable('create_view', allow_select=True)
         if len(base._from_clause.tbls) > 1:
             raise excs.Error('Cannot create a view of a join')
         tbl_version_path = base._from_clause.tbls[0]
         where = base.where_clause
+        sample_clause = base.sample_clause
         select_list = base.select_list
+        if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
+            raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
     else:
         raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
     assert isinstance(base, (catalog.Table, DataFrame))
@@ -272,7 +276,7 @@ def create_view(
             if col_name in [c.name for c in tbl_version_path.columns()]:
                 raise excs.Error(
                     f'Column {col_name!r} already exists in the base table '
-                    f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
+                    f'{tbl_version_path.get_column(col_name).tbl.name}.'
                 )
     return Catalog.get().create_view(
@@ -280,6 +284,7 @@ def create_view(
         tbl_version_path,
         select_list=select_list,
         where=where,
+        sample_clause=sample_clause,
         additional_columns=additional_columns,
         is_snapshot=is_snapshot,
         iterator=iterator,
@@ -422,7 +427,10 @@ def get_table(path: str) -> catalog.Table:
         >>> tbl = pxt.get_table('my_snapshot')
     """
     path_obj = catalog.Path(path)
-    return Catalog.get().get_table(path_obj)
+    tbl = Catalog.get().get_table(path_obj)
+    tv = tbl._tbl_version.get()
+    _logger.debug(f'get_table(): tbl={tv.id}:{tv.effective_version} sa_tbl={id(tv.store_tbl.sa_tbl):x} tv={id(tv):x}')
+    return tbl
 def move(path: str, new_path: str) -> None:
@@ -493,8 +501,8 @@ def drop_table(
     if isinstance(table, catalog.Table):
         # if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
         # the Table record, and then get X locks in the correct order (first containing directory, then table)
-        with Env.get().begin_xact():
-            tbl_path = table._path
+        with Catalog.get().begin_xact(for_write=False):
+            tbl_path = table._path()
     else:
         assert isinstance(table, str)
         tbl_path = table

pixeltable/index/base.py CHANGED Viewed

@@ -41,6 +41,11 @@ class IndexBase(abc.ABC):
         """Create the index on the index value column"""
         pass
+    @abc.abstractmethod
+    def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
+        """Drop the index on the index value column"""
+        pass
     @classmethod
     @abc.abstractmethod
     def display_name(cls) -> str:

pixeltable/index/btree.py CHANGED Viewed

@@ -59,6 +59,11 @@ class BtreeIndex(IndexBase):
         conn = Env.get().conn
         idx.create(bind=conn)
+    def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
+        """Drop the index on the index value column"""
+        # TODO: implement
+        raise NotImplementedError()
     @classmethod
     def display_name(cls) -> str:
         return 'btree'

pixeltable/index/embedding_index.py CHANGED Viewed

@@ -148,6 +148,11 @@ class EmbeddingIndex(IndexBase):
         conn = Env.get().conn
         idx.create(bind=conn)
+    def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
+        """Drop the index on the index value column"""
+        # TODO: implement
+        raise NotImplementedError()
     def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
         """Create a ColumnElement that represents '<val_column> <op> <item>'"""
         assert isinstance(item, (str, PIL.Image.Image))

pixeltable/io/external_store.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 import abc
 import itertools
 import logging
-import time
 from dataclasses import dataclass
 from typing import Any, Optional
 from uuid import UUID
@@ -11,7 +10,7 @@ from uuid import UUID
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from pixeltable import Column, Table
-from pixeltable.catalog import TableVersion, TableVersionHandle
+from pixeltable.catalog import TableVersion
 _logger = logging.getLogger('pixeltable')
@@ -32,15 +31,11 @@ class ExternalStore(abc.ABC):
     @abc.abstractmethod
     def link(self, tbl_version: TableVersion) -> None:
-        """
-        Called by `TableVersion.link()` to implement store-specific logic.
-        """
+        """Creates store-specific metadata needed to implement sync()."""
     @abc.abstractmethod
     def unlink(self, tbl_version: TableVersion) -> None:
-        """
-        Called by `TableVersion.unlink()` to implement store-specific logic.
-        """
+        """Removes store-specific metadata created in link()."""
     @abc.abstractmethod
     def get_local_columns(self) -> list[Column]:
@@ -111,17 +106,10 @@ class Project(ExternalStore, abc.ABC):
         if len(stored_proxies_needed) > 0:
             _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
-            # Create stored proxies for columns that need one. Increment the schema version
-            # accordingly.
-            tbl_version.version += 1
-            preceding_schema_version = tbl_version.schema_version
-            tbl_version.schema_version = tbl_version.version
-            proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
+            # Create stored proxies for columns that need one
+            proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
             # Add the columns; this will also update table metadata.
-            tbl_version._add_columns(proxy_cols, print_stats=False, on_error='ignore')
-            # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
-            # invisible to the user.
-            tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
+            tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
     def unlink(self, tbl_version: TableVersion) -> None:
         # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
@@ -132,15 +120,10 @@ class Project(ExternalStore, abc.ABC):
                 deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
         if len(deletions_needed) > 0:
             _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
-            # Delete stored proxies that are no longer needed.
-            tbl_version.version += 1
-            preceding_schema_version = tbl_version.schema_version
-            tbl_version.schema_version = tbl_version.version
             tbl_version._drop_columns(deletions_needed)
             self.stored_proxies.clear()
-            tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
-    def create_stored_proxy(self, tbl_version: TableVersion, col: Column) -> Column:
+    def create_stored_proxy(self, col: Column) -> Column:
         """
         Creates a proxy column for the specified column. The proxy column will be created in the specified
         `TableVersion`.
@@ -158,12 +141,7 @@ class Project(ExternalStore, abc.ABC):
             #   Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
             computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
             stored=True,
-            col_id=tbl_version.next_col_id,
-            sa_col_type=col.col_type.to_sa_type(),
-            schema_version_add=tbl_version.schema_version,
         )
-        proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
-        tbl_version.next_col_id += 1
         self.stored_proxies[col] = proxy_col
         return proxy_col
@@ -213,6 +191,7 @@ class Project(ExternalStore, abc.ABC):
             external (import or export) columns.
         If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
         in which the Pixeltable column names are resolved to the corresponding `Column` objects.
+        TODO: return columns as names or qualified ids
         """
         from pixeltable import exprs

pixeltable/io/label_studio.py CHANGED Viewed

@@ -577,7 +577,7 @@ class LabelStudioProject(Project):
             else:
                 local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
             if local_annotations_column not in t._schema:
-                t.add_columns({local_annotations_column: ts.JsonType(nullable=True)})
+                t.add_columns({local_annotations_column: ts.Json})
         resolved_col_mapping = cls.validate_columns(
             t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping

pixeltable/io/parquet.py CHANGED Viewed

@@ -14,7 +14,7 @@ import PIL.Image
 import pixeltable as pxt
 import pixeltable.exceptions as excs
-from pixeltable.env import Env
+from pixeltable.catalog import Catalog
 from pixeltable.utils.transactional_directory import transactional_directory
 if typing.TYPE_CHECKING:
@@ -87,7 +87,7 @@ def export_parquet(
         current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
         current_byte_estimate = 0
-        with Env.get().begin_xact():
+        with Catalog.get().begin_xact(for_write=False):
             for data_row in df._exec():
                 for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
                     val = data_row[e.slot_idx]

pixeltable 0.3.15__py3-none-any.whl → 0.4.0rc2__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.15py3-none-any.whl → 0.4.0rc2py3-none-any.whl