pixeltable 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (122) hide show
  1. pixeltable/__init__.py +2 -3
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -1
  4. pixeltable/catalog/catalog.py +63 -36
  5. pixeltable/catalog/column.py +11 -4
  6. pixeltable/catalog/dir.py +5 -5
  7. pixeltable/catalog/globals.py +28 -14
  8. pixeltable/catalog/insertable_table.py +81 -43
  9. pixeltable/catalog/path.py +2 -2
  10. pixeltable/catalog/table.py +140 -109
  11. pixeltable/catalog/table_version.py +60 -43
  12. pixeltable/catalog/table_version_handle.py +3 -0
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/view.py +17 -9
  15. pixeltable/dataframe.py +5 -3
  16. pixeltable/env.py +109 -43
  17. pixeltable/exec/__init__.py +2 -0
  18. pixeltable/exec/aggregation_node.py +6 -8
  19. pixeltable/exec/cache_prefetch_node.py +4 -7
  20. pixeltable/exec/component_iteration_node.py +1 -3
  21. pixeltable/exec/data_row_batch.py +1 -2
  22. pixeltable/exec/exec_context.py +1 -1
  23. pixeltable/exec/exec_node.py +2 -3
  24. pixeltable/exec/expr_eval/__init__.py +2 -0
  25. pixeltable/exec/expr_eval/evaluators.py +137 -20
  26. pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
  27. pixeltable/exec/expr_eval/globals.py +68 -7
  28. pixeltable/exec/expr_eval/schedulers.py +25 -23
  29. pixeltable/exec/in_memory_data_node.py +8 -6
  30. pixeltable/exec/row_update_node.py +3 -4
  31. pixeltable/exec/sql_node.py +16 -17
  32. pixeltable/exprs/__init__.py +3 -2
  33. pixeltable/exprs/arithmetic_expr.py +2 -0
  34. pixeltable/exprs/column_property_ref.py +1 -1
  35. pixeltable/exprs/column_ref.py +39 -3
  36. pixeltable/exprs/compound_predicate.py +1 -1
  37. pixeltable/exprs/data_row.py +17 -1
  38. pixeltable/exprs/expr.py +51 -21
  39. pixeltable/exprs/function_call.py +34 -2
  40. pixeltable/exprs/globals.py +12 -0
  41. pixeltable/exprs/json_mapper.py +95 -48
  42. pixeltable/exprs/json_path.py +3 -10
  43. pixeltable/exprs/method_ref.py +2 -2
  44. pixeltable/exprs/object_ref.py +2 -2
  45. pixeltable/exprs/row_builder.py +33 -6
  46. pixeltable/exprs/similarity_expr.py +6 -21
  47. pixeltable/exprs/sql_element_cache.py +1 -1
  48. pixeltable/exprs/string_op.py +107 -0
  49. pixeltable/ext/__init__.py +1 -1
  50. pixeltable/ext/functions/__init__.py +1 -1
  51. pixeltable/ext/functions/whisperx.py +1 -1
  52. pixeltable/ext/functions/yolox.py +22 -65
  53. pixeltable/func/aggregate_function.py +1 -1
  54. pixeltable/func/callable_function.py +2 -5
  55. pixeltable/func/expr_template_function.py +22 -2
  56. pixeltable/func/function.py +4 -5
  57. pixeltable/func/function_registry.py +1 -1
  58. pixeltable/func/signature.py +1 -1
  59. pixeltable/func/tools.py +2 -2
  60. pixeltable/func/udf.py +2 -2
  61. pixeltable/functions/__init__.py +2 -2
  62. pixeltable/functions/anthropic.py +2 -2
  63. pixeltable/functions/audio.py +1 -1
  64. pixeltable/functions/deepseek.py +1 -1
  65. pixeltable/functions/fireworks.py +1 -1
  66. pixeltable/functions/globals.py +22 -11
  67. pixeltable/functions/huggingface.py +1 -1
  68. pixeltable/functions/image.py +1 -1
  69. pixeltable/functions/json.py +1 -1
  70. pixeltable/functions/llama_cpp.py +1 -1
  71. pixeltable/functions/math.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +1 -1
  74. pixeltable/functions/openai.py +2 -2
  75. pixeltable/functions/replicate.py +1 -1
  76. pixeltable/functions/string.py +1 -1
  77. pixeltable/functions/timestamp.py +1 -1
  78. pixeltable/functions/together.py +1 -1
  79. pixeltable/functions/util.py +1 -1
  80. pixeltable/functions/video.py +2 -2
  81. pixeltable/functions/vision.py +2 -2
  82. pixeltable/globals.py +85 -33
  83. pixeltable/index/embedding_index.py +12 -1
  84. pixeltable/io/__init__.py +8 -5
  85. pixeltable/io/datarows.py +138 -0
  86. pixeltable/io/external_store.py +8 -5
  87. pixeltable/io/fiftyone.py +6 -7
  88. pixeltable/io/globals.py +7 -160
  89. pixeltable/io/hf_datasets.py +21 -98
  90. pixeltable/io/label_studio.py +21 -20
  91. pixeltable/io/pandas.py +35 -48
  92. pixeltable/io/parquet.py +17 -42
  93. pixeltable/io/table_data_conduit.py +569 -0
  94. pixeltable/io/utils.py +6 -21
  95. pixeltable/iterators/__init__.py +1 -1
  96. pixeltable/metadata/__init__.py +6 -4
  97. pixeltable/metadata/converters/convert_24.py +3 -3
  98. pixeltable/metadata/converters/convert_25.py +1 -1
  99. pixeltable/metadata/converters/convert_29.py +1 -1
  100. pixeltable/metadata/converters/convert_30.py +50 -0
  101. pixeltable/metadata/converters/util.py +26 -1
  102. pixeltable/metadata/notes.py +1 -0
  103. pixeltable/metadata/schema.py +3 -0
  104. pixeltable/store.py +2 -2
  105. pixeltable/type_system.py +19 -7
  106. pixeltable/utils/arrow.py +32 -7
  107. pixeltable/utils/console_output.py +3 -2
  108. pixeltable/utils/coroutine.py +3 -3
  109. pixeltable/utils/dbms.py +66 -0
  110. pixeltable/utils/documents.py +61 -67
  111. pixeltable/utils/filecache.py +1 -1
  112. pixeltable/utils/http_server.py +3 -2
  113. pixeltable/utils/pytorch.py +1 -1
  114. pixeltable/utils/sql.py +1 -1
  115. pixeltable-0.3.11.dist-info/METADATA +436 -0
  116. pixeltable-0.3.11.dist-info/RECORD +179 -0
  117. {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/WHEEL +1 -1
  118. pixeltable/catalog/path_dict.py +0 -169
  119. pixeltable-0.3.9.dist-info/METADATA +0 -382
  120. pixeltable-0.3.9.dist-info/RECORD +0 -175
  121. {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/LICENSE +0 -0
  122. {pixeltable-0.3.9.dist-info → pixeltable-0.3.11.dist-info}/entry_points.txt +0 -0
@@ -27,7 +27,7 @@ class Path:
27
27
 
28
28
  @property
29
29
  def is_root(self) -> bool:
30
- return self.components[0] == ''
30
+ return not self.components[0]
31
31
 
32
32
  @property
33
33
  def parent(self) -> Path:
@@ -43,7 +43,7 @@ class Path:
43
43
  if self.is_root:
44
44
  return Path(name)
45
45
  else:
46
- return Path(f'{str(self)}.{name}')
46
+ return Path(f'{self!s}.{name}')
47
47
 
48
48
  def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
49
49
  """
@@ -8,20 +8,16 @@ from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
9
9
 
10
10
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
+ from keyword import iskeyword as is_python_keyword
11
12
  from uuid import UUID
12
13
 
13
14
  import pandas as pd
14
15
  import sqlalchemy as sql
15
16
 
16
17
  import pixeltable as pxt
17
- import pixeltable.catalog as catalog
18
- import pixeltable.env as env
19
- import pixeltable.exceptions as excs
20
- import pixeltable.exprs as exprs
21
- import pixeltable.index as index
22
- import pixeltable.metadata.schema as schema
23
- import pixeltable.type_system as ts
18
+ from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
24
19
  from pixeltable.env import Env
20
+ from pixeltable.metadata import schema
25
21
 
26
22
  from ..exprs import ColumnRef
27
23
  from ..utils.description_helper import DescriptionHelper
@@ -37,7 +33,6 @@ from .globals import (
37
33
  is_valid_identifier,
38
34
  )
39
35
  from .schema_object import SchemaObject
40
- from .table_version import TableVersion
41
36
  from .table_version_handle import TableVersionHandle
42
37
  from .table_version_path import TableVersionPath
43
38
 
@@ -45,6 +40,7 @@ if TYPE_CHECKING:
45
40
  import torch.utils.data
46
41
 
47
42
  import pixeltable.plan
43
+ from pixeltable.globals import TableDataSource
48
44
 
49
45
  _logger = logging.getLogger('pixeltable')
50
46
 
@@ -79,7 +75,7 @@ class Table(SchemaObject):
79
75
  (
80
76
  f'UPDATE {schema.Table.__table__} '
81
77
  f'SET {schema.Table.dir_id.name} = :new_dir_id, '
82
- f" {schema.Table.md.name}['name'] = :new_name "
78
+ f" {schema.Table.md.name} = jsonb_set({schema.Table.md.name}, '{{name}}', (:new_name)::jsonb) "
83
79
  f'WHERE {schema.Table.id.name} = :id'
84
80
  )
85
81
  )
@@ -229,15 +225,15 @@ class Table(SchemaObject):
229
225
  """Return rows from this table."""
230
226
  return self._df().collect()
231
227
 
232
- def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
228
+ def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
233
229
  """Return rows from this table."""
234
230
  return self._df().show(*args, **kwargs)
235
231
 
236
- def head(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
232
+ def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
237
233
  """Return the first n rows inserted into this table."""
238
234
  return self._df().head(*args, **kwargs)
239
235
 
240
- def tail(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
236
+ def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
241
237
  """Return the last n rows inserted into this table."""
242
238
  return self._df().tail(*args, **kwargs)
243
239
 
@@ -284,7 +280,7 @@ class Table(SchemaObject):
284
280
  return self._tbl_version.get().comment
285
281
 
286
282
  @property
287
- def _num_retained_versions(self):
283
+ def _num_retained_versions(self) -> int:
288
284
  return self._tbl_version.get().num_retained_versions
289
285
 
290
286
  @property
@@ -403,12 +399,12 @@ class Table(SchemaObject):
403
399
  def _column_has_dependents(self, col: Column) -> bool:
404
400
  """Returns True if the column has dependents, False otherwise."""
405
401
  assert col is not None
406
- assert col.name in self._schema.keys()
402
+ assert col.name in self._schema
407
403
  if any(c.name is not None for c in col.dependent_cols):
408
404
  return True
409
405
  return any(
410
406
  col in store.get_local_columns()
411
- for view in [self] + self._get_views(recursive=True)
407
+ for view in (self, *self._get_views(recursive=True))
412
408
  for store in view._tbl_version.get().external_stores.values()
413
409
  )
414
410
 
@@ -426,7 +422,7 @@ class Table(SchemaObject):
426
422
  raise excs.Error(f'Duplicate column name: {new_col_name!r}')
427
423
  elif if_exists == IfExistsParam.IGNORE:
428
424
  cols_to_ignore.append(new_col_name)
429
- elif if_exists == IfExistsParam.REPLACE or if_exists == IfExistsParam.REPLACE_FORCE:
425
+ elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
430
426
  if new_col_name not in self._tbl_version.get().cols_by_name:
431
427
  # for views, it is possible that the existing column
432
428
  # is a base table column; in that case, we should not
@@ -437,7 +433,8 @@ class Table(SchemaObject):
437
433
  # replace directive if column has dependents.
438
434
  if self._column_has_dependents(col):
439
435
  raise excs.Error(
440
- f'Column {new_col_name!r} already exists and has dependents. Cannot {if_exists.name.lower()} it.'
436
+ f'Column {new_col_name!r} already exists and has dependents. '
437
+ f'Cannot {if_exists.name.lower()} it.'
441
438
  )
442
439
  self.drop_column(new_col_name)
443
440
  assert new_col_name not in self._tbl_version.get().cols_by_name
@@ -449,8 +446,8 @@ class Table(SchemaObject):
449
446
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
450
447
  ) -> UpdateStatus:
451
448
  """
452
- Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed columns,
453
- use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
449
+ Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
450
+ columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
454
451
 
455
452
  The format of the `schema` argument is identical to the format of the schema in a call to
456
453
  [`create_table()`][pixeltable.globals.create_table].
@@ -461,10 +458,12 @@ class Table(SchemaObject):
461
458
 
462
459
  - `'error'`: an exception will be raised.
463
460
  - `'ignore'`: do nothing and return.
464
- - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no dependents.
461
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no
462
+ dependents.
465
463
 
466
464
  Note that the `if_exists` parameter is applied to all columns in the schema.
467
- To apply different behaviors to different columns, please use [`add_column()`][pixeltable.Table.add_column] for each column.
465
+ To apply different behaviors to different columns, please use
466
+ [`add_column()`][pixeltable.Table.add_column] for each column.
468
467
 
469
468
  Returns:
470
469
  Information about the execution status of the operation.
@@ -525,7 +524,8 @@ class Table(SchemaObject):
525
524
 
526
525
  - `'error'`: an exception will be raised.
527
526
  - `'ignore'`: do nothing and return.
528
- - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has no dependents.
527
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, if it has
528
+ no dependents.
529
529
 
530
530
  Returns:
531
531
  Information about the execution status of the operation.
@@ -556,7 +556,7 @@ class Table(SchemaObject):
556
556
  col_type = next(iter(kwargs.values()))
557
557
  if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
558
558
  raise excs.Error(
559
- f'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
559
+ 'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
560
560
  )
561
561
  return self.add_columns(kwargs, if_exists=if_exists)
562
562
 
@@ -587,7 +587,8 @@ class Table(SchemaObject):
587
587
 
588
588
  - `'error'`: an exception will be raised.
589
589
  - `'ignore'`: do nothing and return.
590
- - `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has no dependents.
590
+ - `'replace' or 'replace_force'`: drop the existing column and add the new column, iff it has
591
+ no dependents.
591
592
 
592
593
  Returns:
593
594
  Information about the execution status of the operation.
@@ -611,7 +612,8 @@ class Table(SchemaObject):
611
612
  raise excs.Error('Cannot add column to a snapshot.')
612
613
  if len(kwargs) != 1:
613
614
  raise excs.Error(
614
- f'add_computed_column() requires exactly one keyword argument of the form "column-name=type|value-expression"; '
615
+ f'add_computed_column() requires exactly one keyword argument of the form '
616
+ '"column-name=type|value-expression"; '
615
617
  f'got {len(kwargs)} arguments instead ({", ".join(list(kwargs.keys()))})'
616
618
  )
617
619
  col_name, spec = next(iter(kwargs.items()))
@@ -622,6 +624,15 @@ class Table(SchemaObject):
622
624
  if stored is not None:
623
625
  col_schema['stored'] = stored
624
626
 
627
+ # Raise an error if the column expression refers to a column error property
628
+ if isinstance(spec, exprs.Expr):
629
+ for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
630
+ if e.is_error_prop():
631
+ raise excs.Error(
632
+ 'Use of a reference to an error property of another column is not allowed in a computed '
633
+ f'column. The specified computation for this column contains this reference: `{e!r}`'
634
+ )
635
+
625
636
  with Env.get().begin_xact():
626
637
  # handle existing columns based on if_exists parameter
627
638
  cols_to_ignore = self._ignore_or_drop_existing_columns(
@@ -648,16 +659,15 @@ class Table(SchemaObject):
648
659
  """
649
660
  assert isinstance(spec, dict)
650
661
  valid_keys = {'type', 'value', 'stored', 'media_validation'}
651
- for k in spec.keys():
662
+ for k in spec:
652
663
  if k not in valid_keys:
653
664
  raise excs.Error(f'Column {name}: invalid key {k!r}')
654
665
 
655
666
  if 'type' not in spec and 'value' not in spec:
656
667
  raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
657
668
 
658
- if 'type' in spec:
659
- if not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
660
- raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
669
+ if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
670
+ raise excs.Error(f'Column {name}: "type" must be a type or ColumnType, got {spec["type"]}')
661
671
 
662
672
  if 'value' in spec:
663
673
  value_expr = exprs.Expr.from_object(spec['value'])
@@ -720,20 +730,25 @@ class Table(SchemaObject):
720
730
  columns.append(column)
721
731
  return columns
722
732
 
733
+ @classmethod
734
+ def validate_column_name(cls, name: str) -> None:
735
+ """Check that a name is usable as a pixeltalbe column name"""
736
+ if is_system_column_name(name) or is_python_keyword(name):
737
+ raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
738
+ if not is_valid_identifier(name):
739
+ raise excs.Error(f'Invalid column name: {name!r}')
740
+
723
741
  @classmethod
724
742
  def _verify_column(cls, col: Column) -> None:
725
743
  """Check integrity of user-supplied Column and supply defaults"""
726
- if is_system_column_name(col.name):
727
- raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
728
- if not is_valid_identifier(col.name):
729
- raise excs.Error(f'Invalid column name: {col.name!r}')
744
+ cls.validate_column_name(col.name)
730
745
  if col.stored is False and not col.is_computed:
731
746
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
732
747
  if col.stored is False and col.has_window_fn_call():
733
748
  raise excs.Error(
734
749
  (
735
- f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a streaming '
736
- f'function'
750
+ f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
751
+ f'streaming function'
737
752
  )
738
753
  )
739
754
 
@@ -745,16 +760,6 @@ class Table(SchemaObject):
745
760
  cls._verify_column(col)
746
761
  column_names.add(col.name)
747
762
 
748
- def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
749
- col = self._tbl_version_path.get_column(column_name, include_bases)
750
- if col is None:
751
- raise excs.Error(f'Column {column_name!r} unknown')
752
-
753
- def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
754
- exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
755
- if not exists:
756
- raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
757
-
758
763
  def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
759
764
  """Drop a column from the table.
760
765
 
@@ -789,21 +794,21 @@ class Table(SchemaObject):
789
794
  if self._tbl_version_path.is_snapshot():
790
795
  raise excs.Error('Cannot drop column from a snapshot.')
791
796
  col: Column = None
792
- _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
797
+ if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
793
798
  if isinstance(column, str):
794
799
  col = self._tbl_version_path.get_column(column, include_bases=False)
795
800
  if col is None:
796
- if _if_not_exists == IfNotExistsParam.ERROR:
801
+ if if_not_exists_ == IfNotExistsParam.ERROR:
797
802
  raise excs.Error(f'Column {column!r} unknown')
798
- assert _if_not_exists == IfNotExistsParam.IGNORE
803
+ assert if_not_exists_ == IfNotExistsParam.IGNORE
799
804
  return
800
805
  col = self._tbl_version.get().cols_by_name[column]
801
806
  else:
802
807
  exists = self._tbl_version_path.has_column(column.col, include_bases=False)
803
808
  if not exists:
804
- if _if_not_exists == IfNotExistsParam.ERROR:
809
+ if if_not_exists_ == IfNotExistsParam.ERROR:
805
810
  raise excs.Error(f'Unknown column: {column.col.qualified_name}')
806
- assert _if_not_exists == IfNotExistsParam.IGNORE
811
+ assert if_not_exists_ == IfNotExistsParam.IGNORE
807
812
  return
808
813
  col = column.col
809
814
 
@@ -819,7 +824,7 @@ class Table(SchemaObject):
819
824
  # (transitive) views of this table.
820
825
  dependent_stores = [
821
826
  (view, store)
822
- for view in [self] + self._get_views(recursive=True)
827
+ for view in (self, *self._get_views(recursive=True))
823
828
  for store in view._tbl_version.get().external_stores.values()
824
829
  if col in store.get_local_columns()
825
830
  ]
@@ -907,7 +912,7 @@ class Table(SchemaObject):
907
912
  Args:
908
913
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
909
914
  idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
910
- automatically. If specified, the name must be unique for this table.
915
+ automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
911
916
  embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
912
917
  or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
913
918
  array of floats.
@@ -926,7 +931,8 @@ class Table(SchemaObject):
926
931
  - `'replace'` or `'replace_force'`: replace the existing index with the new one.
927
932
 
928
933
  Raises:
929
- Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if the specified column does not exist.
934
+ Error: If an index with the specified name already exists for the table and `if_exists='error'`, or if
935
+ the specified column does not exist.
930
936
 
931
937
  Examples:
932
938
  Add an index to the `img` column of the table `my_table`:
@@ -960,37 +966,35 @@ class Table(SchemaObject):
960
966
  """
961
967
  if self._tbl_version_path.is_snapshot():
962
968
  raise excs.Error('Cannot add an index to a snapshot')
963
- col: Column
964
- if isinstance(column, str):
965
- self.__check_column_name_exists(column, include_bases=True)
966
- col = self._tbl_version_path.get_column(column, include_bases=True)
967
- else:
968
- self.__check_column_ref_exists(column, include_bases=True)
969
- col = column.col
969
+ col = self._resolve_column_parameter(column)
970
970
 
971
971
  with Env.get().begin_xact():
972
972
  if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
973
- _if_exists = IfExistsParam.validated(if_exists, 'if_exists')
973
+ if_exists_ = IfExistsParam.validated(if_exists, 'if_exists')
974
974
  # An index with the same name already exists.
975
975
  # Handle it according to if_exists.
976
- if _if_exists == IfExistsParam.ERROR:
976
+ if if_exists_ == IfExistsParam.ERROR:
977
977
  raise excs.Error(f'Duplicate index name: {idx_name}')
978
978
  if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
979
979
  raise excs.Error(
980
- f'Index `{idx_name}` is not an embedding index. Cannot {_if_exists.name.lower()} it.'
980
+ f'Index `{idx_name}` is not an embedding index. Cannot {if_exists_.name.lower()} it.'
981
981
  )
982
- if _if_exists == IfExistsParam.IGNORE:
982
+ if if_exists_ == IfExistsParam.IGNORE:
983
983
  return
984
- assert _if_exists == IfExistsParam.REPLACE or _if_exists == IfExistsParam.REPLACE_FORCE
984
+ assert if_exists_ in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE)
985
985
  self.drop_index(idx_name=idx_name)
986
986
  assert idx_name not in self._tbl_version.get().idxs_by_name
987
987
  from pixeltable.index import EmbeddingIndex
988
988
 
989
+ # idx_name must be a valid pixeltable column name
990
+ if idx_name is not None:
991
+ Table.validate_column_name(idx_name)
992
+
989
993
  # create the EmbeddingIndex instance to verify args
990
994
  idx = EmbeddingIndex(
991
995
  col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
992
996
  )
993
- status = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
997
+ _ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
994
998
  # TODO: how to deal with exceptions here? drop the index and raise?
995
999
  FileCache.get().emit_eviction_warnings()
996
1000
 
@@ -1049,17 +1053,28 @@ class Table(SchemaObject):
1049
1053
 
1050
1054
  col: Column = None
1051
1055
  if idx_name is None:
1052
- if isinstance(column, str):
1053
- self.__check_column_name_exists(column, include_bases=True)
1054
- col = self._tbl_version_path.get_column(column, include_bases=True)
1055
- else:
1056
- self.__check_column_ref_exists(column, include_bases=True)
1057
- col = column.col
1056
+ col = self._resolve_column_parameter(column)
1058
1057
  assert col is not None
1059
1058
 
1060
1059
  with Env.get().begin_xact():
1061
1060
  self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
1062
1061
 
1062
+ def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
1063
+ """Resolve a column parameter to a Column object"""
1064
+ col: Column = None
1065
+ if isinstance(column, str):
1066
+ col = self._tbl_version_path.get_column(column, include_bases=True)
1067
+ if col is None:
1068
+ raise excs.Error(f'Column {column!r} unknown')
1069
+ elif isinstance(column, ColumnRef):
1070
+ exists = self._tbl_version_path.has_column(column.col, include_bases=True)
1071
+ if not exists:
1072
+ raise excs.Error(f'Unknown column: {column.col.qualified_name}')
1073
+ col = column.col
1074
+ else:
1075
+ raise excs.Error(f'Invalid column parameter type: {type(column)}')
1076
+ return col
1077
+
1063
1078
  def drop_index(
1064
1079
  self,
1065
1080
  *,
@@ -1115,12 +1130,7 @@ class Table(SchemaObject):
1115
1130
 
1116
1131
  col: Column = None
1117
1132
  if idx_name is None:
1118
- if isinstance(column, str):
1119
- self.__check_column_name_exists(column, include_bases=True)
1120
- col = self._tbl_version_path.get_column(column, include_bases=True)
1121
- else:
1122
- self.__check_column_ref_exists(column, include_bases=True)
1123
- col = column.col
1133
+ col = self._resolve_column_parameter(column)
1124
1134
  assert col is not None
1125
1135
 
1126
1136
  with Env.get().begin_xact():
@@ -1139,55 +1149,68 @@ class Table(SchemaObject):
1139
1149
  assert (col is None) != (idx_name is None)
1140
1150
 
1141
1151
  if idx_name is not None:
1142
- _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1152
+ if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1143
1153
  if idx_name not in self._tbl_version.get().idxs_by_name:
1144
- if _if_not_exists == IfNotExistsParam.ERROR:
1154
+ if if_not_exists_ == IfNotExistsParam.ERROR:
1145
1155
  raise excs.Error(f'Index {idx_name!r} does not exist')
1146
- assert _if_not_exists == IfNotExistsParam.IGNORE
1156
+ assert if_not_exists_ == IfNotExistsParam.IGNORE
1147
1157
  return
1148
- idx_id = self._tbl_version.get().idxs_by_name[idx_name].id
1158
+ idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1149
1159
  else:
1150
1160
  if col.tbl.id != self._tbl_version.id:
1151
1161
  raise excs.Error(
1152
1162
  f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
1153
1163
  )
1154
- idx_info = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1164
+ idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1155
1165
  if _idx_class is not None:
1156
- idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
1157
- if len(idx_info) == 0:
1158
- _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1159
- if _if_not_exists == IfNotExistsParam.ERROR:
1166
+ idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
1167
+ if len(idx_info_list) == 0:
1168
+ if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1169
+ if if_not_exists_ == IfNotExistsParam.ERROR:
1160
1170
  raise excs.Error(f'Column {col.name!r} does not have an index')
1161
- assert _if_not_exists == IfNotExistsParam.IGNORE
1171
+ assert if_not_exists_ == IfNotExistsParam.IGNORE
1162
1172
  return
1163
- if len(idx_info) > 1:
1173
+ if len(idx_info_list) > 1:
1164
1174
  raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1165
- idx_id = idx_info[0].id
1166
- self._tbl_version.get().drop_index(idx_id)
1175
+ idx_info = idx_info_list[0]
1176
+
1177
+ # Find out if anything depends on this index
1178
+ dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
1179
+ if len(dependent_user_cols) > 0:
1180
+ raise excs.Error(
1181
+ f'Cannot drop index because the following columns depend on it:\n'
1182
+ f'{", ".join(c.name for c in dependent_user_cols)}'
1183
+ )
1184
+ self._tbl_version.get().drop_index(idx_info.id)
1167
1185
 
1168
1186
  @overload
1169
1187
  def insert(
1170
1188
  self,
1171
- rows: Iterable[dict[str, Any]],
1189
+ source: TableDataSource,
1172
1190
  /,
1173
1191
  *,
1174
- print_stats: bool = False,
1192
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1193
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1175
1194
  on_error: Literal['abort', 'ignore'] = 'abort',
1195
+ print_stats: bool = False,
1196
+ **kwargs: Any,
1176
1197
  ) -> UpdateStatus: ...
1177
1198
 
1178
1199
  @overload
1179
1200
  def insert(
1180
- self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
1201
+ self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
1181
1202
  ) -> UpdateStatus: ...
1182
1203
 
1183
- @abc.abstractmethod # type: ignore[misc]
1204
+ @abc.abstractmethod
1184
1205
  def insert(
1185
1206
  self,
1186
- rows: Optional[Iterable[dict[str, Any]]] = None,
1207
+ source: Optional[TableDataSource] = None,
1187
1208
  /,
1188
1209
  *,
1189
- print_stats: bool = False,
1210
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1211
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1190
1212
  on_error: Literal['abort', 'ignore'] = 'abort',
1213
+ print_stats: bool = False,
1191
1214
  **kwargs: Any,
1192
1215
  ) -> UpdateStatus:
1193
1216
  """Inserts rows into this table. There are two mutually exclusive call patterns:
@@ -1196,11 +1219,12 @@ class Table(SchemaObject):
1196
1219
 
1197
1220
  ```python
1198
1221
  insert(
1199
- rows: Iterable[dict[str, Any]],
1222
+ source: TableSourceDataType,
1200
1223
  /,
1201
1224
  *,
1225
+ on_error: Literal['abort', 'ignore'] = 'abort',
1202
1226
  print_stats: bool = False,
1203
- on_error: Literal['abort', 'ignore'] = 'abort'
1227
+ **kwargs: Any,
1204
1228
  )```
1205
1229
 
1206
1230
  To insert just a single row, you can use the more concise syntax:
@@ -1208,23 +1232,25 @@ class Table(SchemaObject):
1208
1232
  ```python
1209
1233
  insert(
1210
1234
  *,
1211
- print_stats: bool = False,
1212
1235
  on_error: Literal['abort', 'ignore'] = 'abort',
1236
+ print_stats: bool = False,
1213
1237
  **kwargs: Any
1214
1238
  )```
1215
1239
 
1216
1240
  Args:
1217
- rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
1218
- names to values.
1241
+ source: A data source from which data can be imported.
1219
1242
  kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
1220
- print_stats: If `True`, print statistics about the cost of computed columns.
1243
+ (if inserting multiple rows) Additional keyword arguments are passed to the data source.
1244
+ source_format: A hint about the format of the source data
1245
+ schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
1221
1246
  on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
1222
1247
  invalid media file (such as a corrupt image) for one of the inserted rows.
1223
1248
 
1224
1249
  - If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
1225
1250
  - If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
1226
- with errors will have a `None` value for that cell, with information about the error stored in the
1227
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
1251
+ with errors will have a `None` value for that cell, with information about the error stored in the
1252
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
1253
+ print_stats: If `True`, print statistics about the cost of computed columns.
1228
1254
 
1229
1255
  Returns:
1230
1256
  An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
@@ -1236,6 +1262,7 @@ class Table(SchemaObject):
1236
1262
  - The table has been dropped.
1237
1263
  - One of the rows being inserted does not conform to the table schema.
1238
1264
  - An error occurs during processing of computed columns, and `on_error='ignore'`.
1265
+ - An error occurs while importing data from a source, and `on_error='abort'`.
1239
1266
 
1240
1267
  Examples:
1241
1268
  Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
@@ -1247,6 +1274,10 @@ class Table(SchemaObject):
1247
1274
  Insert a single row using the alternative syntax:
1248
1275
 
1249
1276
  >>> tbl.insert(a=3, b=3, c=3)
1277
+
1278
+ Insert rows from a CSV file:
1279
+
1280
+ >>> tbl.insert(source='path/to/file.csv')
1250
1281
  """
1251
1282
  raise NotImplementedError
1252
1283
 
@@ -1318,7 +1349,7 @@ class Table(SchemaObject):
1318
1349
  rows = list(rows)
1319
1350
 
1320
1351
  row_updates: list[dict[Column, exprs.Expr]] = []
1321
- pk_col_names = set(c.name for c in self._tbl_version.get().primary_key_columns())
1352
+ pk_col_names = {c.name for c in self._tbl_version.get().primary_key_columns()}
1322
1353
 
1323
1354
  # pseudo-column _rowid: contains the rowid of the row to update and can be used instead of the primary key
1324
1355
  has_rowid = _ROWID_COLUMN_NAME in rows[0]
@@ -1328,16 +1359,16 @@ class Table(SchemaObject):
1328
1359
 
1329
1360
  for row_spec in rows:
1330
1361
  col_vals = self._tbl_version.get()._validate_update_spec(
1331
- row_spec, allow_pk=not has_rowid, allow_exprs=False
1362
+ row_spec, allow_pk=not has_rowid, allow_exprs=False, allow_media=False
1332
1363
  )
1333
1364
  if has_rowid:
1334
1365
  # we expect the _rowid column to be present for each row
1335
1366
  assert _ROWID_COLUMN_NAME in row_spec
1336
1367
  rowids.append(row_spec[_ROWID_COLUMN_NAME])
1337
1368
  else:
1338
- col_names = set(col.name for col in col_vals.keys())
1369
+ col_names = {col.name for col in col_vals}
1339
1370
  if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
1340
- missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
1371
+ missing_cols = pk_col_names - {col.name for col in col_vals}
1341
1372
  raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
1342
1373
  row_updates.append(col_vals)
1343
1374