PyPI - pixeltable - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl - Mend

pixeltable 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show

pixeltable/__init__.py +1 -2
pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +509 -103
pixeltable/catalog/column.py +5 -0
pixeltable/catalog/dir.py +15 -6
pixeltable/catalog/globals.py +16 -0
pixeltable/catalog/insertable_table.py +82 -41
pixeltable/catalog/path.py +15 -0
pixeltable/catalog/schema_object.py +7 -12
pixeltable/catalog/table.py +81 -67
pixeltable/catalog/table_version.py +23 -7
pixeltable/catalog/view.py +9 -6
pixeltable/env.py +15 -9
pixeltable/exec/exec_node.py +1 -1
pixeltable/exprs/__init__.py +2 -1
pixeltable/exprs/arithmetic_expr.py +2 -0
pixeltable/exprs/column_ref.py +38 -2
pixeltable/exprs/expr.py +61 -12
pixeltable/exprs/function_call.py +1 -4
pixeltable/exprs/globals.py +12 -0
pixeltable/exprs/json_mapper.py +4 -4
pixeltable/exprs/json_path.py +10 -11
pixeltable/exprs/similarity_expr.py +5 -20
pixeltable/exprs/string_op.py +107 -0
pixeltable/ext/functions/yolox.py +21 -64
pixeltable/func/callable_function.py +5 -2
pixeltable/func/query_template_function.py +6 -18
pixeltable/func/tools.py +2 -2
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/globals.py +16 -5
pixeltable/globals.py +172 -262
pixeltable/io/__init__.py +3 -2
pixeltable/io/datarows.py +138 -0
pixeltable/io/external_store.py +8 -5
pixeltable/io/globals.py +7 -160
pixeltable/io/hf_datasets.py +21 -98
pixeltable/io/pandas.py +29 -43
pixeltable/io/parquet.py +17 -42
pixeltable/io/table_data_conduit.py +569 -0
pixeltable/io/utils.py +6 -21
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_30.py +50 -0
pixeltable/metadata/converters/util.py +26 -1
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +3 -0
pixeltable/utils/arrow.py +32 -7
pixeltable/utils/coroutine.py +41 -0
{pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
{pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
{pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
{pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
{pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0

pixeltable/catalog/column.py CHANGED Viewed

@@ -202,6 +202,11 @@ class Column:
         assert self.tbl is not None
         return self.tbl.get().media_validation
+    @property
+    def is_required_for_insert(self) -> bool:
+        """Returns True if column is required when inserting rows."""
+        return not self.col_type.nullable and not self.is_computed
     def source(self) -> None:
         """
         If this is a computed col and the top-level expr is a function call, print the source, if possible.

pixeltable/catalog/dir.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from __future__ import annotations
 import dataclasses
+import datetime
+import json
 import logging
 from uuid import UUID
 import sqlalchemy as sql
+from sqlalchemy.dialects.postgresql import JSONB
 from pixeltable.env import Env
 from pixeltable.metadata import schema
@@ -26,6 +29,7 @@ class Dir(SchemaObject):
         dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
         session.add(dir_record)
         session.flush()
+        # print(f'{datetime.datetime.now()} create dir {dir_record}')
         assert dir_record.id is not None
         assert isinstance(dir_record.id, UUID)
         dir = cls(dir_record.id, parent_id, name)
@@ -43,11 +47,16 @@ class Dir(SchemaObject):
         return super()._path()
     def _move(self, new_name: str, new_dir_id: UUID) -> None:
+        # print(
+        #     f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id} new_name={new_name} new_dir_id={new_dir_id}'
+        # )
         super()._move(new_name, new_dir_id)
-        with Env.get().engine.begin() as conn:
-            dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
-            conn.execute(
-                sql.update(schema.Dir.__table__)
-                .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
-                .where(schema.Dir.id == self._id)
+        stmt = sql.text(
+            (
+                f'UPDATE {schema.Dir.__table__} '
+                f'SET {schema.Dir.parent_id.name} = :new_dir_id, '
+                f"    {schema.Dir.md.name}['name'] = :new_name "
+                f'WHERE {schema.Dir.id.name} = :id'
             )
+        )
+        Env.get().conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})

pixeltable/catalog/globals.py CHANGED Viewed

@@ -40,6 +40,22 @@ class UpdateStatus:
         self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
         return self
+    @property
+    def insert_msg(self) -> str:
+        """Return a message describing the results of an insert operation."""
+        if self.num_excs == 0:
+            cols_with_excs_str = ''
+        else:
+            cols_with_excs_str = (
+                f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
+            )
+            cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
+        msg = (
+            f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
+            f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
+        )
+        return msg
 class MediaValidation(enum.Enum):
     ON_READ = 0

pixeltable/catalog/insertable_table.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from __future__ import annotations
+import enum
 import logging
-from typing import Any, Iterable, Literal, Optional, overload
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
 from uuid import UUID
 import pixeltable as pxt
@@ -16,9 +17,36 @@ from .table_version import TableVersion
 from .table_version_handle import TableVersionHandle
 from .table_version_path import TableVersionPath
+if TYPE_CHECKING:
+    import datasets  # type: ignore[import-untyped]
+    from pixeltable.globals import RowData, TableDataSource
+    from pixeltable.io.table_data_conduit import TableDataConduit
 _logger = logging.getLogger('pixeltable')
+class OnErrorParameter(enum.Enum):
+    """Supported values for the on_error parameter"""
+    ABORT = 'abort'
+    IGNORE = 'ignore'
+    @classmethod
+    def is_valid(cls, v: Any) -> bool:
+        if isinstance(v, str):
+            return v.lower() in [c.value for c in cls]
+        return False
+    @classmethod
+    def fail_on_exception(cls, v: Any) -> bool:
+        if not cls.is_valid(v):
+            raise ValueError(f'Invalid value for on_error: {v}')
+        if isinstance(v, str):
+            return v.lower() != cls.IGNORE.value
+        return True
 class InsertableTable(Table):
     """A `Table` that allows inserting and deleting rows."""
@@ -86,62 +114,75 @@ class InsertableTable(Table):
     @overload
     def insert(
         self,
-        rows: Iterable[dict[str, Any]],
+        source: Optional[TableDataSource] = None,
         /,
         *,
-        print_stats: bool = False,
+        source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
+        schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
         on_error: Literal['abort', 'ignore'] = 'abort',
+        print_stats: bool = False,
+        **kwargs: Any,
     ) -> UpdateStatus: ...
     @overload
     def insert(
-        self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
+        self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
     ) -> UpdateStatus: ...
-    def insert(  # type: ignore[misc]
+    def insert(
         self,
-        rows: Optional[Iterable[dict[str, Any]]] = None,
+        source: Optional[TableDataSource] = None,
         /,
         *,
-        print_stats: bool = False,
+        source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
+        schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
         on_error: Literal['abort', 'ignore'] = 'abort',
+        print_stats: bool = False,
         **kwargs: Any,
     ) -> UpdateStatus:
-        if rows is None:
-            rows = [kwargs]
-        else:
-            rows = list(rows)
-            if len(kwargs) > 0:
-                raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
-        fail_on_exception = on_error == 'abort'
-        if not isinstance(rows, list):
-            raise excs.Error('rows must be a list of dictionaries')
-        if len(rows) == 0:
-            raise excs.Error('rows must not be empty')
-        for row in rows:
-            if not isinstance(row, dict):
-                raise excs.Error('rows must be a list of dictionaries')
-        self._validate_input_rows(rows)
-        with Env.get().begin_xact():
-            status = self._tbl_version.get().insert(
-                rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception
-            )
-        if status.num_excs == 0:
-            cols_with_excs_str = ''
-        else:
-            cols_with_excs_str = (
-                f' across {len(status.cols_with_excs)} column{"" if len(status.cols_with_excs) == 1 else "s"}'
-            )
-            cols_with_excs_str += f' ({", ".join(status.cols_with_excs)})'
-        msg = (
-            f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
-            f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
+        from pixeltable.io.table_data_conduit import UnkTableDataConduit
+        table = self
+        if source is None:
+            source = [kwargs]
+            kwargs = None
+        tds = UnkTableDataConduit(
+            source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
         )
-        Env.get().console_logger.info(msg)
-        _logger.info(f'InsertableTable {self._name}: {msg}')
+        data_source = tds.specialize()
+        if data_source.source_column_map is None:
+            data_source.src_pk = []
+        assert isinstance(table, Table)
+        data_source.add_table_info(table)
+        data_source.prepare_for_insert_into_table()
+        fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
+        return table.insert_table_data_source(
+            data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
+        )
+    def insert_table_data_source(
+        self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
+    ) -> pxt.UpdateStatus:
+        """Insert row batches into this table from a `TableDataConduit`."""
+        from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
+        status = pxt.UpdateStatus()
+        with Env.get().begin_xact():
+            if isinstance(data_source, DFTableDataConduit):
+                status += self._tbl_version.get().insert(
+                    rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
+                )
+            else:
+                for row_batch in data_source.valid_row_batch():
+                    status += self._tbl_version.get().insert(
+                        rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
+                    )
+        Env.get().console_logger.info(status.insert_msg)
         FileCache.get().emit_eviction_warnings()
         return status

pixeltable/catalog/path.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import logging
+from typing import Iterator
 from pixeltable import exceptions as excs
@@ -55,5 +56,19 @@ class Path:
         is_prefix = self.components == other.components[: self.len]
         return is_prefix and (self.len == (other.len - 1) or not is_parent)
+    def ancestors(self) -> Iterator[Path]:
+        """
+        Return all ancestors of this path in top-down order including root.
+        If this path is for the root directory, which has no parent, then None is returned.
+        """
+        if self.is_root:
+            return
+        else:
+            for i in range(0, len(self.components)):
+                yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
     def __str__(self) -> str:
         return '.'.join(self.components)
+    def __lt__(self, other: Path) -> bool:
+        return str(self) < str(other)

pixeltable/catalog/schema_object.py CHANGED Viewed

@@ -2,7 +2,7 @@ from abc import abstractmethod
 from typing import TYPE_CHECKING, Any, Optional
 from uuid import UUID
-import pixeltable.env as env
+from pixeltable.env import Env
 if TYPE_CHECKING:
     from pixeltable import catalog
@@ -28,24 +28,19 @@ class SchemaObject:
         """Returns the parent directory of this schema object."""
         from .catalog import Catalog
-        with env.Env.get().begin_xact():
+        with Env.get().begin_xact():
             if self._dir_id is None:
                 return None
             return Catalog.get().get_dir(self._dir_id)
     def _path(self) -> str:
         """Returns the path to this schema object."""
-        with env.Env.get().begin_xact():
-            from .catalog import Catalog
+        from .catalog import Catalog
-            cat = Catalog.get()
-            dir_path = cat.get_dir_path(self._dir_id)
-            if dir_path == '':
-                # Either this is the root directory, with empty path, or its parent is the
-                # root directory. Either way, we return just the name.
-                return self._name
-            else:
-                return f'{dir_path}.{self._name}'
+        assert self._dir_id is not None
+        with Env.get().begin_xact():
+            path = Catalog.get().get_dir_path(self._dir_id)
+            return str(path.append(self._name))
     def get_metadata(self) -> dict[str, Any]:
         """Returns metadata associated with this schema object."""

pixeltable/catalog/table.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
 from typing import _GenericAlias  # type: ignore[attr-defined]  # isort: skip
+from keyword import iskeyword as is_python_keyword
 from uuid import UUID
 import pandas as pd
@@ -42,9 +43,11 @@ from .table_version_handle import TableVersionHandle
 from .table_version_path import TableVersionPath
 if TYPE_CHECKING:
+    import datasets  # type: ignore[import-untyped]
     import torch.utils.data
     import pixeltable.plan
+    from pixeltable.globals import RowData, TableDataSource
 _logger = logging.getLogger('pixeltable')
@@ -171,8 +174,8 @@ class Table(SchemaObject):
     def _get_views(self, *, recursive: bool = True) -> list['Table']:
         cat = catalog.Catalog.get()
-        view_ids = cat.get_views(self._id)
-        views = [cat.get_tbl(id) for id in view_ids]
+        view_ids = cat.get_view_ids(self._id)
+        views = [cat.get_table_by_id(id) for id in view_ids]
         if recursive:
             views.extend([t for view in views for t in view._get_views(recursive=True)])
         return views
@@ -265,7 +268,7 @@ class Table(SchemaObject):
         if self._tbl_version_path.base is None:
             return None
         base_id = self._tbl_version_path.base.tbl_version.id
-        return catalog.Catalog.get().get_tbl(base_id)
+        return catalog.Catalog.get().get_table_by_id(base_id)
     @property
     def _bases(self) -> list['Table']:
@@ -369,11 +372,6 @@ class Table(SchemaObject):
             pd_rows.append(row)
         return pd.DataFrame(pd_rows)
-    def ensure_md_loaded(self) -> None:
-        """Ensure that table metadata is loaded."""
-        for col in self._tbl_version.get().cols_by_id.values():
-            _ = col.value_expr
     def describe(self) -> None:
         """
         Print the table schema.
@@ -387,13 +385,9 @@ class Table(SchemaObject):
             print(repr(self))
     def _drop(self) -> None:
-        cat = catalog.Catalog.get()
         self._check_is_dropped()
         self._tbl_version.get().drop()
         self._is_dropped = True
-        # update catalog
-        cat = catalog.Catalog.get()
-        cat.remove_tbl(self._id)
     # TODO Factor this out into a separate module.
     # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
@@ -729,13 +723,18 @@ class Table(SchemaObject):
             columns.append(column)
         return columns
+    @classmethod
+    def validate_column_name(cls, name: str) -> None:
+        """Check that a name is usable as a pixeltalbe column name"""
+        if is_system_column_name(name) or is_python_keyword(name):
+            raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
+        if not is_valid_identifier(name):
+            raise excs.Error(f'Invalid column name: {name!r}')
     @classmethod
     def _verify_column(cls, col: Column) -> None:
         """Check integrity of user-supplied Column and supply defaults"""
-        if is_system_column_name(col.name):
-            raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
-        if not is_valid_identifier(col.name):
-            raise excs.Error(f'Invalid column name: {col.name!r}')
+        cls.validate_column_name(col.name)
         if col.stored is False and not col.is_computed:
             raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
         if col.stored is False and col.has_window_fn_call():
@@ -754,16 +753,6 @@ class Table(SchemaObject):
             cls._verify_column(col)
             column_names.add(col.name)
-    def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
-        col = self._tbl_version_path.get_column(column_name, include_bases)
-        if col is None:
-            raise excs.Error(f'Column {column_name!r} unknown')
-    def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
-        exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
-        if not exists:
-            raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
     def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
         """Drop a column from the table.
@@ -916,7 +905,7 @@ class Table(SchemaObject):
         Args:
             column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
             idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
-                automatically. If specified, the name must be unique for this table.
+                automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
             embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
                 or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
                 array of floats.
@@ -969,13 +958,7 @@ class Table(SchemaObject):
         """
         if self._tbl_version_path.is_snapshot():
             raise excs.Error('Cannot add an index to a snapshot')
-        col: Column
-        if isinstance(column, str):
-            self.__check_column_name_exists(column, include_bases=True)
-            col = self._tbl_version_path.get_column(column, include_bases=True)
-        else:
-            self.__check_column_ref_exists(column, include_bases=True)
-            col = column.col
+        col = self._resolve_column_parameter(column)
         with Env.get().begin_xact():
             if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
@@ -995,6 +978,10 @@ class Table(SchemaObject):
                 assert idx_name not in self._tbl_version.get().idxs_by_name
             from pixeltable.index import EmbeddingIndex
+            # idx_name must be a valid pixeltable column name
+            if idx_name is not None:
+                Table.validate_column_name(idx_name)
             # create the EmbeddingIndex instance to verify args
             idx = EmbeddingIndex(
                 col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
@@ -1058,17 +1045,28 @@ class Table(SchemaObject):
         col: Column = None
         if idx_name is None:
-            if isinstance(column, str):
-                self.__check_column_name_exists(column, include_bases=True)
-                col = self._tbl_version_path.get_column(column, include_bases=True)
-            else:
-                self.__check_column_ref_exists(column, include_bases=True)
-                col = column.col
+            col = self._resolve_column_parameter(column)
             assert col is not None
         with Env.get().begin_xact():
             self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
+    def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
+        """Resolve a column parameter to a Column object"""
+        col: Column = None
+        if isinstance(column, str):
+            col = self._tbl_version_path.get_column(column, include_bases=True)
+            if col is None:
+                raise excs.Error(f'Column {column!r} unknown')
+        elif isinstance(column, ColumnRef):
+            exists = self._tbl_version_path.has_column(column.col, include_bases=True)
+            if not exists:
+                raise excs.Error(f'Unknown column: {column.col.qualified_name}')
+            col = column.col
+        else:
+            raise excs.Error(f'Invalid column parameter type: {type(column)}')
+        return col
     def drop_index(
         self,
         *,
@@ -1124,12 +1122,7 @@ class Table(SchemaObject):
         col: Column = None
         if idx_name is None:
-            if isinstance(column, str):
-                self.__check_column_name_exists(column, include_bases=True)
-                col = self._tbl_version_path.get_column(column, include_bases=True)
-            else:
-                self.__check_column_ref_exists(column, include_bases=True)
-                col = column.col
+            col = self._resolve_column_parameter(column)
             assert col is not None
         with Env.get().begin_xact():
@@ -1154,49 +1147,62 @@ class Table(SchemaObject):
                     raise excs.Error(f'Index {idx_name!r} does not exist')
                 assert _if_not_exists == IfNotExistsParam.IGNORE
                 return
-            idx_id = self._tbl_version.get().idxs_by_name[idx_name].id
+            idx_info = self._tbl_version.get().idxs_by_name[idx_name]
         else:
             if col.tbl.id != self._tbl_version.id:
                 raise excs.Error(
                     f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
                 )
-            idx_info = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
+            idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
             if _idx_class is not None:
-                idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
-            if len(idx_info) == 0:
+                idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
+            if len(idx_info_list) == 0:
                 _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
                 if _if_not_exists == IfNotExistsParam.ERROR:
                     raise excs.Error(f'Column {col.name!r} does not have an index')
                 assert _if_not_exists == IfNotExistsParam.IGNORE
                 return
-            if len(idx_info) > 1:
+            if len(idx_info_list) > 1:
                 raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
-            idx_id = idx_info[0].id
-        self._tbl_version.get().drop_index(idx_id)
+            idx_info = idx_info_list[0]
+        # Find out if anything depends on this index
+        dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
+        if len(dependent_user_cols) > 0:
+            raise excs.Error(
+                f'Cannot drop index because the following columns depend on it:\n'
+                f'{", ".join(c.name for c in dependent_user_cols)}'
+            )
+        self._tbl_version.get().drop_index(idx_info.id)
     @overload
     def insert(
         self,
-        rows: Iterable[dict[str, Any]],
+        source: TableDataSource,
         /,
         *,
-        print_stats: bool = False,
+        source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
+        schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
         on_error: Literal['abort', 'ignore'] = 'abort',
+        print_stats: bool = False,
+        **kwargs: Any,
     ) -> UpdateStatus: ...
     @overload
     def insert(
-        self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
+        self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
     ) -> UpdateStatus: ...
-    @abc.abstractmethod  # type: ignore[misc]
+    @abc.abstractmethod
     def insert(
         self,
-        rows: Optional[Iterable[dict[str, Any]]] = None,
+        source: Optional[TableDataSource] = None,
         /,
         *,
-        print_stats: bool = False,
+        source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
+        schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
         on_error: Literal['abort', 'ignore'] = 'abort',
+        print_stats: bool = False,
         **kwargs: Any,
     ) -> UpdateStatus:
         """Inserts rows into this table. There are two mutually exclusive call patterns:
@@ -1205,11 +1211,12 @@ class Table(SchemaObject):
         ```python
         insert(
-            rows: Iterable[dict[str, Any]],
+            source: TableSourceDataType,
             /,
             *,
+            on_error: Literal['abort', 'ignore'] = 'abort',
             print_stats: bool = False,
-            on_error: Literal['abort', 'ignore'] = 'abort'
+            **kwargs: Any,
         )```
         To insert just a single row, you can use the more concise syntax:
@@ -1217,23 +1224,25 @@ class Table(SchemaObject):
         ```python
         insert(
             *,
-            print_stats: bool = False,
             on_error: Literal['abort', 'ignore'] = 'abort',
+            print_stats: bool = False,
             **kwargs: Any
         )```
         Args:
-            rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
-                names to values.
+            source: A data source from which data can be imported.
             kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
-            print_stats: If `True`, print statistics about the cost of computed columns.
+                (if inserting multiple rows) Additional keyword arguments are passed to the data source.
+            source_format: A hint about the format of the source data
+            schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
             on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
                 invalid media file (such as a corrupt image) for one of the inserted rows.
                 - If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
                 - If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
-                  with errors will have a `None` value for that cell, with information about the error stored in the
-                  corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
+                    with errors will have a `None` value for that cell, with information about the error stored in the
+                    corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
+            print_stats: If `True`, print statistics about the cost of computed columns.
         Returns:
             An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
@@ -1245,6 +1254,7 @@ class Table(SchemaObject):
                 - The table has been dropped.
                 - One of the rows being inserted does not conform to the table schema.
                 - An error occurs during processing of computed columns, and `on_error='ignore'`.
+                - An error occurs while importing data from a source, and `on_error='abort'`.
         Examples:
             Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
@@ -1256,6 +1266,10 @@ class Table(SchemaObject):
             Insert a single row using the alternative syntax:
             >>> tbl.insert(a=3, b=3, c=3)
+            Insert rows from a CSV file:
+            >>> tbl.insert(source='path/to/file.csv')
         """
         raise NotImplementedError

pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

Potentially problematic release.

pixeltable 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl