pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__init__.py +1 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +509 -103
  4. pixeltable/catalog/column.py +5 -0
  5. pixeltable/catalog/dir.py +15 -6
  6. pixeltable/catalog/globals.py +16 -0
  7. pixeltable/catalog/insertable_table.py +82 -41
  8. pixeltable/catalog/path.py +15 -0
  9. pixeltable/catalog/schema_object.py +7 -12
  10. pixeltable/catalog/table.py +81 -67
  11. pixeltable/catalog/table_version.py +23 -7
  12. pixeltable/catalog/view.py +9 -6
  13. pixeltable/env.py +15 -9
  14. pixeltable/exec/exec_node.py +1 -1
  15. pixeltable/exprs/__init__.py +2 -1
  16. pixeltable/exprs/arithmetic_expr.py +2 -0
  17. pixeltable/exprs/column_ref.py +38 -2
  18. pixeltable/exprs/expr.py +61 -12
  19. pixeltable/exprs/function_call.py +1 -4
  20. pixeltable/exprs/globals.py +12 -0
  21. pixeltable/exprs/json_mapper.py +4 -4
  22. pixeltable/exprs/json_path.py +10 -11
  23. pixeltable/exprs/similarity_expr.py +5 -20
  24. pixeltable/exprs/string_op.py +107 -0
  25. pixeltable/ext/functions/yolox.py +21 -64
  26. pixeltable/func/callable_function.py +5 -2
  27. pixeltable/func/query_template_function.py +6 -18
  28. pixeltable/func/tools.py +2 -2
  29. pixeltable/functions/__init__.py +1 -1
  30. pixeltable/functions/globals.py +16 -5
  31. pixeltable/globals.py +172 -262
  32. pixeltable/io/__init__.py +3 -2
  33. pixeltable/io/datarows.py +138 -0
  34. pixeltable/io/external_store.py +8 -5
  35. pixeltable/io/globals.py +7 -160
  36. pixeltable/io/hf_datasets.py +21 -98
  37. pixeltable/io/pandas.py +29 -43
  38. pixeltable/io/parquet.py +17 -42
  39. pixeltable/io/table_data_conduit.py +569 -0
  40. pixeltable/io/utils.py +6 -21
  41. pixeltable/metadata/__init__.py +1 -1
  42. pixeltable/metadata/converters/convert_30.py +50 -0
  43. pixeltable/metadata/converters/util.py +26 -1
  44. pixeltable/metadata/notes.py +1 -0
  45. pixeltable/metadata/schema.py +3 -0
  46. pixeltable/utils/arrow.py +32 -7
  47. pixeltable/utils/coroutine.py +41 -0
  48. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
  49. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
  50. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
  51. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
  52. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
@@ -202,6 +202,11 @@ class Column:
202
202
  assert self.tbl is not None
203
203
  return self.tbl.get().media_validation
204
204
 
205
+ @property
206
+ def is_required_for_insert(self) -> bool:
207
+ """Returns True if column is required when inserting rows."""
208
+ return not self.col_type.nullable and not self.is_computed
209
+
205
210
  def source(self) -> None:
206
211
  """
207
212
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
pixeltable/catalog/dir.py CHANGED
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
+ import datetime
5
+ import json
4
6
  import logging
5
7
  from uuid import UUID
6
8
 
7
9
  import sqlalchemy as sql
10
+ from sqlalchemy.dialects.postgresql import JSONB
8
11
 
9
12
  from pixeltable.env import Env
10
13
  from pixeltable.metadata import schema
@@ -26,6 +29,7 @@ class Dir(SchemaObject):
26
29
  dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
27
30
  session.add(dir_record)
28
31
  session.flush()
32
+ # print(f'{datetime.datetime.now()} create dir {dir_record}')
29
33
  assert dir_record.id is not None
30
34
  assert isinstance(dir_record.id, UUID)
31
35
  dir = cls(dir_record.id, parent_id, name)
@@ -43,11 +47,16 @@ class Dir(SchemaObject):
43
47
  return super()._path()
44
48
 
45
49
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
50
+ # print(
51
+ # f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id} new_name={new_name} new_dir_id={new_dir_id}'
52
+ # )
46
53
  super()._move(new_name, new_dir_id)
47
- with Env.get().engine.begin() as conn:
48
- dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
49
- conn.execute(
50
- sql.update(schema.Dir.__table__)
51
- .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
52
- .where(schema.Dir.id == self._id)
54
+ stmt = sql.text(
55
+ (
56
+ f'UPDATE {schema.Dir.__table__} '
57
+ f'SET {schema.Dir.parent_id.name} = :new_dir_id, '
58
+ f" {schema.Dir.md.name}['name'] = :new_name "
59
+ f'WHERE {schema.Dir.id.name} = :id'
53
60
  )
61
+ )
62
+ Env.get().conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
@@ -40,6 +40,22 @@ class UpdateStatus:
40
40
  self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
41
41
  return self
42
42
 
43
+ @property
44
+ def insert_msg(self) -> str:
45
+ """Return a message describing the results of an insert operation."""
46
+ if self.num_excs == 0:
47
+ cols_with_excs_str = ''
48
+ else:
49
+ cols_with_excs_str = (
50
+ f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
51
+ )
52
+ cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
53
+ msg = (
54
+ f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
55
+ f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
56
+ )
57
+ return msg
58
+
43
59
 
44
60
  class MediaValidation(enum.Enum):
45
61
  ON_READ = 0
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import enum
3
4
  import logging
4
- from typing import Any, Iterable, Literal, Optional, overload
5
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
5
6
  from uuid import UUID
6
7
 
7
8
  import pixeltable as pxt
@@ -16,9 +17,36 @@ from .table_version import TableVersion
16
17
  from .table_version_handle import TableVersionHandle
17
18
  from .table_version_path import TableVersionPath
18
19
 
20
+ if TYPE_CHECKING:
21
+ import datasets # type: ignore[import-untyped]
22
+
23
+ from pixeltable.globals import RowData, TableDataSource
24
+ from pixeltable.io.table_data_conduit import TableDataConduit
25
+
19
26
  _logger = logging.getLogger('pixeltable')
20
27
 
21
28
 
29
+ class OnErrorParameter(enum.Enum):
30
+ """Supported values for the on_error parameter"""
31
+
32
+ ABORT = 'abort'
33
+ IGNORE = 'ignore'
34
+
35
+ @classmethod
36
+ def is_valid(cls, v: Any) -> bool:
37
+ if isinstance(v, str):
38
+ return v.lower() in [c.value for c in cls]
39
+ return False
40
+
41
+ @classmethod
42
+ def fail_on_exception(cls, v: Any) -> bool:
43
+ if not cls.is_valid(v):
44
+ raise ValueError(f'Invalid value for on_error: {v}')
45
+ if isinstance(v, str):
46
+ return v.lower() != cls.IGNORE.value
47
+ return True
48
+
49
+
22
50
  class InsertableTable(Table):
23
51
  """A `Table` that allows inserting and deleting rows."""
24
52
 
@@ -86,62 +114,75 @@ class InsertableTable(Table):
86
114
  @overload
87
115
  def insert(
88
116
  self,
89
- rows: Iterable[dict[str, Any]],
117
+ source: Optional[TableDataSource] = None,
90
118
  /,
91
119
  *,
92
- print_stats: bool = False,
120
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
121
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
93
122
  on_error: Literal['abort', 'ignore'] = 'abort',
123
+ print_stats: bool = False,
124
+ **kwargs: Any,
94
125
  ) -> UpdateStatus: ...
95
126
 
96
127
  @overload
97
128
  def insert(
98
- self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
129
+ self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
99
130
  ) -> UpdateStatus: ...
100
131
 
101
- def insert( # type: ignore[misc]
132
+ def insert(
102
133
  self,
103
- rows: Optional[Iterable[dict[str, Any]]] = None,
134
+ source: Optional[TableDataSource] = None,
104
135
  /,
105
136
  *,
106
- print_stats: bool = False,
137
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
138
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
107
139
  on_error: Literal['abort', 'ignore'] = 'abort',
140
+ print_stats: bool = False,
108
141
  **kwargs: Any,
109
142
  ) -> UpdateStatus:
110
- if rows is None:
111
- rows = [kwargs]
112
- else:
113
- rows = list(rows)
114
- if len(kwargs) > 0:
115
- raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
116
-
117
- fail_on_exception = on_error == 'abort'
118
-
119
- if not isinstance(rows, list):
120
- raise excs.Error('rows must be a list of dictionaries')
121
- if len(rows) == 0:
122
- raise excs.Error('rows must not be empty')
123
- for row in rows:
124
- if not isinstance(row, dict):
125
- raise excs.Error('rows must be a list of dictionaries')
126
- self._validate_input_rows(rows)
127
- with Env.get().begin_xact():
128
- status = self._tbl_version.get().insert(
129
- rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception
130
- )
131
-
132
- if status.num_excs == 0:
133
- cols_with_excs_str = ''
134
- else:
135
- cols_with_excs_str = (
136
- f' across {len(status.cols_with_excs)} column{"" if len(status.cols_with_excs) == 1 else "s"}'
137
- )
138
- cols_with_excs_str += f' ({", ".join(status.cols_with_excs)})'
139
- msg = (
140
- f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
141
- f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
143
+ from pixeltable.io.table_data_conduit import UnkTableDataConduit
144
+
145
+ table = self
146
+ if source is None:
147
+ source = [kwargs]
148
+ kwargs = None
149
+
150
+ tds = UnkTableDataConduit(
151
+ source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
142
152
  )
143
- Env.get().console_logger.info(msg)
144
- _logger.info(f'InsertableTable {self._name}: {msg}')
153
+ data_source = tds.specialize()
154
+ if data_source.source_column_map is None:
155
+ data_source.src_pk = []
156
+
157
+ assert isinstance(table, Table)
158
+ data_source.add_table_info(table)
159
+ data_source.prepare_for_insert_into_table()
160
+
161
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
162
+ return table.insert_table_data_source(
163
+ data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
164
+ )
165
+
166
+ def insert_table_data_source(
167
+ self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
168
+ ) -> pxt.UpdateStatus:
169
+ """Insert row batches into this table from a `TableDataConduit`."""
170
+ from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
171
+
172
+ status = pxt.UpdateStatus()
173
+ with Env.get().begin_xact():
174
+ if isinstance(data_source, DFTableDataConduit):
175
+ status += self._tbl_version.get().insert(
176
+ rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
177
+ )
178
+ else:
179
+ for row_batch in data_source.valid_row_batch():
180
+ status += self._tbl_version.get().insert(
181
+ rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
182
+ )
183
+
184
+ Env.get().console_logger.info(status.insert_msg)
185
+
145
186
  FileCache.get().emit_eviction_warnings()
146
187
  return status
147
188
 
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from typing import Iterator
4
5
 
5
6
  from pixeltable import exceptions as excs
6
7
 
@@ -55,5 +56,19 @@ class Path:
55
56
  is_prefix = self.components == other.components[: self.len]
56
57
  return is_prefix and (self.len == (other.len - 1) or not is_parent)
57
58
 
59
+ def ancestors(self) -> Iterator[Path]:
60
+ """
61
+ Return all ancestors of this path in top-down order including root.
62
+ If this path is for the root directory, which has no parent, then None is returned.
63
+ """
64
+ if self.is_root:
65
+ return
66
+ else:
67
+ for i in range(0, len(self.components)):
68
+ yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
69
+
58
70
  def __str__(self) -> str:
59
71
  return '.'.join(self.components)
72
+
73
+ def __lt__(self, other: Path) -> bool:
74
+ return str(self) < str(other)
@@ -2,7 +2,7 @@ from abc import abstractmethod
2
2
  from typing import TYPE_CHECKING, Any, Optional
3
3
  from uuid import UUID
4
4
 
5
- import pixeltable.env as env
5
+ from pixeltable.env import Env
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from pixeltable import catalog
@@ -28,24 +28,19 @@ class SchemaObject:
28
28
  """Returns the parent directory of this schema object."""
29
29
  from .catalog import Catalog
30
30
 
31
- with env.Env.get().begin_xact():
31
+ with Env.get().begin_xact():
32
32
  if self._dir_id is None:
33
33
  return None
34
34
  return Catalog.get().get_dir(self._dir_id)
35
35
 
36
36
  def _path(self) -> str:
37
37
  """Returns the path to this schema object."""
38
- with env.Env.get().begin_xact():
39
- from .catalog import Catalog
38
+ from .catalog import Catalog
40
39
 
41
- cat = Catalog.get()
42
- dir_path = cat.get_dir_path(self._dir_id)
43
- if dir_path == '':
44
- # Either this is the root directory, with empty path, or its parent is the
45
- # root directory. Either way, we return just the name.
46
- return self._name
47
- else:
48
- return f'{dir_path}.{self._name}'
40
+ assert self._dir_id is not None
41
+ with Env.get().begin_xact():
42
+ path = Catalog.get().get_dir_path(self._dir_id)
43
+ return str(path.append(self._name))
49
44
 
50
45
  def get_metadata(self) -> dict[str, Any]:
51
46
  """Returns metadata associated with this schema object."""
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
9
9
 
10
10
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
+ from keyword import iskeyword as is_python_keyword
11
12
  from uuid import UUID
12
13
 
13
14
  import pandas as pd
@@ -42,9 +43,11 @@ from .table_version_handle import TableVersionHandle
42
43
  from .table_version_path import TableVersionPath
43
44
 
44
45
  if TYPE_CHECKING:
46
+ import datasets # type: ignore[import-untyped]
45
47
  import torch.utils.data
46
48
 
47
49
  import pixeltable.plan
50
+ from pixeltable.globals import RowData, TableDataSource
48
51
 
49
52
  _logger = logging.getLogger('pixeltable')
50
53
 
@@ -171,8 +174,8 @@ class Table(SchemaObject):
171
174
 
172
175
  def _get_views(self, *, recursive: bool = True) -> list['Table']:
173
176
  cat = catalog.Catalog.get()
174
- view_ids = cat.get_views(self._id)
175
- views = [cat.get_tbl(id) for id in view_ids]
177
+ view_ids = cat.get_view_ids(self._id)
178
+ views = [cat.get_table_by_id(id) for id in view_ids]
176
179
  if recursive:
177
180
  views.extend([t for view in views for t in view._get_views(recursive=True)])
178
181
  return views
@@ -265,7 +268,7 @@ class Table(SchemaObject):
265
268
  if self._tbl_version_path.base is None:
266
269
  return None
267
270
  base_id = self._tbl_version_path.base.tbl_version.id
268
- return catalog.Catalog.get().get_tbl(base_id)
271
+ return catalog.Catalog.get().get_table_by_id(base_id)
269
272
 
270
273
  @property
271
274
  def _bases(self) -> list['Table']:
@@ -369,11 +372,6 @@ class Table(SchemaObject):
369
372
  pd_rows.append(row)
370
373
  return pd.DataFrame(pd_rows)
371
374
 
372
- def ensure_md_loaded(self) -> None:
373
- """Ensure that table metadata is loaded."""
374
- for col in self._tbl_version.get().cols_by_id.values():
375
- _ = col.value_expr
376
-
377
375
  def describe(self) -> None:
378
376
  """
379
377
  Print the table schema.
@@ -387,13 +385,9 @@ class Table(SchemaObject):
387
385
  print(repr(self))
388
386
 
389
387
  def _drop(self) -> None:
390
- cat = catalog.Catalog.get()
391
388
  self._check_is_dropped()
392
389
  self._tbl_version.get().drop()
393
390
  self._is_dropped = True
394
- # update catalog
395
- cat = catalog.Catalog.get()
396
- cat.remove_tbl(self._id)
397
391
 
398
392
  # TODO Factor this out into a separate module.
399
393
  # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
@@ -729,13 +723,18 @@ class Table(SchemaObject):
729
723
  columns.append(column)
730
724
  return columns
731
725
 
726
+ @classmethod
727
+ def validate_column_name(cls, name: str) -> None:
728
+ """Check that a name is usable as a pixeltalbe column name"""
729
+ if is_system_column_name(name) or is_python_keyword(name):
730
+ raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
731
+ if not is_valid_identifier(name):
732
+ raise excs.Error(f'Invalid column name: {name!r}')
733
+
732
734
  @classmethod
733
735
  def _verify_column(cls, col: Column) -> None:
734
736
  """Check integrity of user-supplied Column and supply defaults"""
735
- if is_system_column_name(col.name):
736
- raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
737
- if not is_valid_identifier(col.name):
738
- raise excs.Error(f'Invalid column name: {col.name!r}')
737
+ cls.validate_column_name(col.name)
739
738
  if col.stored is False and not col.is_computed:
740
739
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
741
740
  if col.stored is False and col.has_window_fn_call():
@@ -754,16 +753,6 @@ class Table(SchemaObject):
754
753
  cls._verify_column(col)
755
754
  column_names.add(col.name)
756
755
 
757
- def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
758
- col = self._tbl_version_path.get_column(column_name, include_bases)
759
- if col is None:
760
- raise excs.Error(f'Column {column_name!r} unknown')
761
-
762
- def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
763
- exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
764
- if not exists:
765
- raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
766
-
767
756
  def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
768
757
  """Drop a column from the table.
769
758
 
@@ -916,7 +905,7 @@ class Table(SchemaObject):
916
905
  Args:
917
906
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
918
907
  idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
919
- automatically. If specified, the name must be unique for this table.
908
+ automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
920
909
  embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
921
910
  or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
922
911
  array of floats.
@@ -969,13 +958,7 @@ class Table(SchemaObject):
969
958
  """
970
959
  if self._tbl_version_path.is_snapshot():
971
960
  raise excs.Error('Cannot add an index to a snapshot')
972
- col: Column
973
- if isinstance(column, str):
974
- self.__check_column_name_exists(column, include_bases=True)
975
- col = self._tbl_version_path.get_column(column, include_bases=True)
976
- else:
977
- self.__check_column_ref_exists(column, include_bases=True)
978
- col = column.col
961
+ col = self._resolve_column_parameter(column)
979
962
 
980
963
  with Env.get().begin_xact():
981
964
  if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
@@ -995,6 +978,10 @@ class Table(SchemaObject):
995
978
  assert idx_name not in self._tbl_version.get().idxs_by_name
996
979
  from pixeltable.index import EmbeddingIndex
997
980
 
981
+ # idx_name must be a valid pixeltable column name
982
+ if idx_name is not None:
983
+ Table.validate_column_name(idx_name)
984
+
998
985
  # create the EmbeddingIndex instance to verify args
999
986
  idx = EmbeddingIndex(
1000
987
  col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
@@ -1058,17 +1045,28 @@ class Table(SchemaObject):
1058
1045
 
1059
1046
  col: Column = None
1060
1047
  if idx_name is None:
1061
- if isinstance(column, str):
1062
- self.__check_column_name_exists(column, include_bases=True)
1063
- col = self._tbl_version_path.get_column(column, include_bases=True)
1064
- else:
1065
- self.__check_column_ref_exists(column, include_bases=True)
1066
- col = column.col
1048
+ col = self._resolve_column_parameter(column)
1067
1049
  assert col is not None
1068
1050
 
1069
1051
  with Env.get().begin_xact():
1070
1052
  self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
1071
1053
 
1054
+ def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
1055
+ """Resolve a column parameter to a Column object"""
1056
+ col: Column = None
1057
+ if isinstance(column, str):
1058
+ col = self._tbl_version_path.get_column(column, include_bases=True)
1059
+ if col is None:
1060
+ raise excs.Error(f'Column {column!r} unknown')
1061
+ elif isinstance(column, ColumnRef):
1062
+ exists = self._tbl_version_path.has_column(column.col, include_bases=True)
1063
+ if not exists:
1064
+ raise excs.Error(f'Unknown column: {column.col.qualified_name}')
1065
+ col = column.col
1066
+ else:
1067
+ raise excs.Error(f'Invalid column parameter type: {type(column)}')
1068
+ return col
1069
+
1072
1070
  def drop_index(
1073
1071
  self,
1074
1072
  *,
@@ -1124,12 +1122,7 @@ class Table(SchemaObject):
1124
1122
 
1125
1123
  col: Column = None
1126
1124
  if idx_name is None:
1127
- if isinstance(column, str):
1128
- self.__check_column_name_exists(column, include_bases=True)
1129
- col = self._tbl_version_path.get_column(column, include_bases=True)
1130
- else:
1131
- self.__check_column_ref_exists(column, include_bases=True)
1132
- col = column.col
1125
+ col = self._resolve_column_parameter(column)
1133
1126
  assert col is not None
1134
1127
 
1135
1128
  with Env.get().begin_xact():
@@ -1154,49 +1147,62 @@ class Table(SchemaObject):
1154
1147
  raise excs.Error(f'Index {idx_name!r} does not exist')
1155
1148
  assert _if_not_exists == IfNotExistsParam.IGNORE
1156
1149
  return
1157
- idx_id = self._tbl_version.get().idxs_by_name[idx_name].id
1150
+ idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1158
1151
  else:
1159
1152
  if col.tbl.id != self._tbl_version.id:
1160
1153
  raise excs.Error(
1161
1154
  f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
1162
1155
  )
1163
- idx_info = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1156
+ idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1164
1157
  if _idx_class is not None:
1165
- idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
1166
- if len(idx_info) == 0:
1158
+ idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
1159
+ if len(idx_info_list) == 0:
1167
1160
  _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1168
1161
  if _if_not_exists == IfNotExistsParam.ERROR:
1169
1162
  raise excs.Error(f'Column {col.name!r} does not have an index')
1170
1163
  assert _if_not_exists == IfNotExistsParam.IGNORE
1171
1164
  return
1172
- if len(idx_info) > 1:
1165
+ if len(idx_info_list) > 1:
1173
1166
  raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1174
- idx_id = idx_info[0].id
1175
- self._tbl_version.get().drop_index(idx_id)
1167
+ idx_info = idx_info_list[0]
1168
+
1169
+ # Find out if anything depends on this index
1170
+ dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
1171
+ if len(dependent_user_cols) > 0:
1172
+ raise excs.Error(
1173
+ f'Cannot drop index because the following columns depend on it:\n'
1174
+ f'{", ".join(c.name for c in dependent_user_cols)}'
1175
+ )
1176
+ self._tbl_version.get().drop_index(idx_info.id)
1176
1177
 
1177
1178
  @overload
1178
1179
  def insert(
1179
1180
  self,
1180
- rows: Iterable[dict[str, Any]],
1181
+ source: TableDataSource,
1181
1182
  /,
1182
1183
  *,
1183
- print_stats: bool = False,
1184
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1185
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1184
1186
  on_error: Literal['abort', 'ignore'] = 'abort',
1187
+ print_stats: bool = False,
1188
+ **kwargs: Any,
1185
1189
  ) -> UpdateStatus: ...
1186
1190
 
1187
1191
  @overload
1188
1192
  def insert(
1189
- self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
1193
+ self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
1190
1194
  ) -> UpdateStatus: ...
1191
1195
 
1192
- @abc.abstractmethod # type: ignore[misc]
1196
+ @abc.abstractmethod
1193
1197
  def insert(
1194
1198
  self,
1195
- rows: Optional[Iterable[dict[str, Any]]] = None,
1199
+ source: Optional[TableDataSource] = None,
1196
1200
  /,
1197
1201
  *,
1198
- print_stats: bool = False,
1202
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1203
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1199
1204
  on_error: Literal['abort', 'ignore'] = 'abort',
1205
+ print_stats: bool = False,
1200
1206
  **kwargs: Any,
1201
1207
  ) -> UpdateStatus:
1202
1208
  """Inserts rows into this table. There are two mutually exclusive call patterns:
@@ -1205,11 +1211,12 @@ class Table(SchemaObject):
1205
1211
 
1206
1212
  ```python
1207
1213
  insert(
1208
- rows: Iterable[dict[str, Any]],
1214
+ source: TableSourceDataType,
1209
1215
  /,
1210
1216
  *,
1217
+ on_error: Literal['abort', 'ignore'] = 'abort',
1211
1218
  print_stats: bool = False,
1212
- on_error: Literal['abort', 'ignore'] = 'abort'
1219
+ **kwargs: Any,
1213
1220
  )```
1214
1221
 
1215
1222
  To insert just a single row, you can use the more concise syntax:
@@ -1217,23 +1224,25 @@ class Table(SchemaObject):
1217
1224
  ```python
1218
1225
  insert(
1219
1226
  *,
1220
- print_stats: bool = False,
1221
1227
  on_error: Literal['abort', 'ignore'] = 'abort',
1228
+ print_stats: bool = False,
1222
1229
  **kwargs: Any
1223
1230
  )```
1224
1231
 
1225
1232
  Args:
1226
- rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
1227
- names to values.
1233
+ source: A data source from which data can be imported.
1228
1234
  kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
1229
- print_stats: If `True`, print statistics about the cost of computed columns.
1235
+ (if inserting multiple rows) Additional keyword arguments are passed to the data source.
1236
+ source_format: A hint about the format of the source data
1237
+ schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
1230
1238
  on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
1231
1239
  invalid media file (such as a corrupt image) for one of the inserted rows.
1232
1240
 
1233
1241
  - If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
1234
1242
  - If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
1235
- with errors will have a `None` value for that cell, with information about the error stored in the
1236
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
1243
+ with errors will have a `None` value for that cell, with information about the error stored in the
1244
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
1245
+ print_stats: If `True`, print statistics about the cost of computed columns.
1237
1246
 
1238
1247
  Returns:
1239
1248
  An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
@@ -1245,6 +1254,7 @@ class Table(SchemaObject):
1245
1254
  - The table has been dropped.
1246
1255
  - One of the rows being inserted does not conform to the table schema.
1247
1256
  - An error occurs during processing of computed columns, and `on_error='ignore'`.
1257
+ - An error occurs while importing data from a source, and `on_error='abort'`.
1248
1258
 
1249
1259
  Examples:
1250
1260
  Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
@@ -1256,6 +1266,10 @@ class Table(SchemaObject):
1256
1266
  Insert a single row using the alternative syntax:
1257
1267
 
1258
1268
  >>> tbl.insert(a=3, b=3, c=3)
1269
+
1270
+ Insert rows from a CSV file:
1271
+
1272
+ >>> tbl.insert(source='path/to/file.csv')
1259
1273
  """
1260
1274
  raise NotImplementedError
1261
1275