pixeltable 0.2.11__tar.gz → 0.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (134) hide show
  1. {pixeltable-0.2.11 → pixeltable-0.2.12}/PKG-INFO +3 -3
  2. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/__init__.py +2 -2
  3. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/__version__.py +2 -2
  4. pixeltable-0.2.12/pixeltable/catalog/schema_object.py +60 -0
  5. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/table.py +63 -25
  6. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/table_version.py +2 -29
  7. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/dataframe.py +1 -1
  8. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/column_ref.py +2 -7
  9. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/similarity_expr.py +27 -16
  10. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/openai.py +1 -1
  11. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/globals.py +51 -47
  12. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/index/embedding_index.py +28 -27
  13. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/external_store.py +2 -2
  14. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/globals.py +1 -1
  15. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/label_studio.py +3 -3
  16. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/__init__.py +1 -1
  17. pixeltable-0.2.12/pixeltable/metadata/converters/convert_17.py +26 -0
  18. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/tool/create_test_db_dump.py +1 -1
  19. {pixeltable-0.2.11 → pixeltable-0.2.12}/pyproject.toml +2 -2
  20. pixeltable-0.2.11/pixeltable/catalog/schema_object.py +0 -34
  21. {pixeltable-0.2.11 → pixeltable-0.2.12}/LICENSE +0 -0
  22. {pixeltable-0.2.11 → pixeltable-0.2.12}/README.md +0 -0
  23. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/__init__.py +0 -0
  24. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/catalog.py +0 -0
  25. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/column.py +0 -0
  26. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/dir.py +0 -0
  27. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/globals.py +0 -0
  28. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/insertable_table.py +0 -0
  29. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/named_function.py +0 -0
  30. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/path.py +0 -0
  31. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/path_dict.py +0 -0
  32. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/table_version_path.py +0 -0
  33. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/catalog/view.py +0 -0
  34. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/env.py +0 -0
  35. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exceptions.py +0 -0
  36. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/__init__.py +0 -0
  37. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/aggregation_node.py +0 -0
  38. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/cache_prefetch_node.py +0 -0
  39. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/component_iteration_node.py +0 -0
  40. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/data_row_batch.py +0 -0
  41. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/exec_context.py +0 -0
  42. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/exec_node.py +0 -0
  43. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/expr_eval_node.py +0 -0
  44. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/in_memory_data_node.py +0 -0
  45. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/media_validation_node.py +0 -0
  46. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exec/sql_scan_node.py +0 -0
  47. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/__init__.py +0 -0
  48. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/arithmetic_expr.py +0 -0
  49. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/array_slice.py +0 -0
  50. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/column_property_ref.py +0 -0
  51. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/comparison.py +0 -0
  52. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/compound_predicate.py +0 -0
  53. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/data_row.py +0 -0
  54. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/expr.py +0 -0
  55. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/expr_set.py +0 -0
  56. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/function_call.py +0 -0
  57. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/globals.py +0 -0
  58. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/image_member_access.py +0 -0
  59. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/in_predicate.py +0 -0
  60. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/inline_array.py +0 -0
  61. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/inline_dict.py +0 -0
  62. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/is_null.py +0 -0
  63. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/json_mapper.py +0 -0
  64. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/json_path.py +0 -0
  65. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/literal.py +0 -0
  66. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/object_ref.py +0 -0
  67. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/predicate.py +0 -0
  68. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/row_builder.py +0 -0
  69. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/rowid_ref.py +0 -0
  70. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/type_cast.py +0 -0
  71. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/exprs/variable.py +0 -0
  72. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/ext/__init__.py +0 -0
  73. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/ext/functions/whisperx.py +0 -0
  74. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/ext/functions/yolox.py +0 -0
  75. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/__init__.py +0 -0
  76. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/aggregate_function.py +0 -0
  77. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/callable_function.py +0 -0
  78. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/expr_template_function.py +0 -0
  79. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/function.py +0 -0
  80. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/function_registry.py +0 -0
  81. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/globals.py +0 -0
  82. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/query_template_function.py +0 -0
  83. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/signature.py +0 -0
  84. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/func/udf.py +0 -0
  85. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/__init__.py +0 -0
  86. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/eval.py +0 -0
  87. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/fireworks.py +0 -0
  88. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/globals.py +0 -0
  89. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/huggingface.py +0 -0
  90. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/image.py +0 -0
  91. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/string.py +0 -0
  92. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/together.py +0 -0
  93. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/util.py +0 -0
  94. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/video.py +0 -0
  95. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/functions/whisper.py +0 -0
  96. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/index/__init__.py +0 -0
  97. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/index/base.py +0 -0
  98. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/index/btree.py +0 -0
  99. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/__init__.py +0 -0
  100. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/hf_datasets.py +0 -0
  101. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/pandas.py +0 -0
  102. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/io/parquet.py +0 -0
  103. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/iterators/__init__.py +0 -0
  104. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/iterators/base.py +0 -0
  105. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/iterators/document.py +0 -0
  106. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/iterators/string.py +0 -0
  107. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/iterators/video.py +0 -0
  108. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/convert_10.py +0 -0
  109. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/convert_12.py +0 -0
  110. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/convert_13.py +0 -0
  111. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/convert_14.py +0 -0
  112. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/convert_15.py +0 -0
  113. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/convert_16.py +0 -0
  114. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/converters/util.py +0 -0
  115. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/metadata/schema.py +0 -0
  116. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/plan.py +0 -0
  117. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/store.py +0 -0
  118. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/tool/create_test_video.py +0 -0
  119. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/tool/embed_udf.py +0 -0
  120. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/type_system.py +0 -0
  121. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/__init__.py +0 -0
  122. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/arrow.py +0 -0
  123. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/coco.py +0 -0
  124. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/code.py +0 -0
  125. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/documents.py +0 -0
  126. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/filecache.py +0 -0
  127. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/formatter.py +0 -0
  128. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/help.py +0 -0
  129. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/http_server.py +0 -0
  130. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/media_store.py +0 -0
  131. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/pytorch.py +0 -0
  132. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/s3.py +0 -0
  133. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/sql.py +0 -0
  134. {pixeltable-0.2.11 → pixeltable-0.2.12}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.11
3
+ Version: 0.2.12
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
- Author: Marcel Kornacker
6
- Author-email: marcelk@gmail.com
5
+ Author: Pixeltable, Inc.
6
+ Author-email: contact@pixeltable.com
7
7
  Requires-Python: >=3.9,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Programming Language :: Python :: 3.9
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
3
3
  from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
5
  from .func import Function, udf, Aggregator, uda, expr_udf
6
- from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, rm_dir, \
7
- list_dirs, list_functions, get_path, configure_logging
6
+ from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
7
+ list_dirs, list_functions, configure_logging
8
8
  from .type_system import (
9
9
  ColumnType,
10
10
  StringType,
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.11"
3
- __version_tuple__ = (0, 2, 11)
2
+ __version__ = "0.2.12"
3
+ __version_tuple__ = (0, 2, 12)
@@ -0,0 +1,60 @@
1
+ from abc import abstractmethod
2
+ from typing import TYPE_CHECKING, Optional
3
+ from uuid import UUID
4
+
5
+ if TYPE_CHECKING:
6
+ from pixeltable import catalog
7
+
8
+
9
+ class SchemaObject:
10
+ """
11
+ Base class of all addressable objects within a Db.
12
+ Each object has an id, a name and a parent directory.
13
+ """
14
+ def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
15
+ # make these private so they don't collide with column names (id and name are fairly common)
16
+ self._id = obj_id
17
+ self._name = name
18
+ self._dir_id = dir_id
19
+
20
+ def _get_id(self) -> UUID:
21
+ return self._id
22
+
23
+ @property
24
+ def name(self) -> str:
25
+ """Returns the name of this schema object."""
26
+ return self._name
27
+
28
+ @property
29
+ def parent(self) -> Optional['catalog.Dir']:
30
+ """Returns the parent directory of this schema object."""
31
+ from pixeltable import catalog
32
+ if self._dir_id is None:
33
+ return None
34
+ dir = catalog.Catalog.get().paths.get_schema_obj(self._dir_id)
35
+ assert isinstance(dir, catalog.Dir)
36
+ return dir
37
+
38
+ @property
39
+ def path(self) -> str:
40
+ """Returns the path to this schema object."""
41
+ parent = self.parent
42
+ if parent is None or parent.parent is None:
43
+ # Either this is the root directory, with empty path, or its parent is the
44
+ # root directory. Either way, we return just the name.
45
+ return self.name
46
+ else:
47
+ return f'{parent.path}.{self.name}'
48
+
49
+ @classmethod
50
+ @abstractmethod
51
+ def display_name(cls) -> str:
52
+ """
53
+ Return name displayed in error messages.
54
+ """
55
+ pass
56
+
57
+ def _move(self, new_name: str, new_dir_id: UUID) -> None:
58
+ """Subclasses need to override this to make the change persistent"""
59
+ self._name = new_name
60
+ self._dir_id = new_dir_id
@@ -82,14 +82,22 @@ class Table(SchemaObject):
82
82
  return self._queries[index]
83
83
  return self._tbl_version_path.__getitem__(index)
84
84
 
85
- def get_views(self, *, recursive: bool = False) -> list['Table']:
85
+ def list_views(self, *, recursive: bool = True) -> list[str]:
86
86
  """
87
- All views and snapshots of this `Table`.
87
+ Returns a list of all views and snapshots of this `Table`.
88
+
89
+ Args:
90
+ recursive: If `False`, returns only the immediate successor views of this `Table`. If `True`, returns
91
+ all sub-views (including views of views, etc.)
88
92
  """
93
+ return [t.path for t in self._get_views(recursive=recursive)]
94
+
95
+ def _get_views(self, *, recursive: bool = True) -> list['Table']:
96
+ dependents = catalog.Catalog.get().tbl_dependents[self._get_id()]
89
97
  if recursive:
90
- return [self] + [t for view in self.get_views(recursive=False) for t in view.get_views(recursive=True)]
98
+ return dependents + [t for view in dependents for t in view._get_views(recursive=True)]
91
99
  else:
92
- return catalog.Catalog.get().tbl_dependents[self._get_id()]
100
+ return dependents
93
101
 
94
102
  def _df(self) -> 'pixeltable.dataframe.DataFrame':
95
103
  """Return a DataFrame for this table.
@@ -500,7 +508,37 @@ class Table(SchemaObject):
500
508
  >>> tbl.drop_column('factorial')
501
509
  """
502
510
  self._check_is_dropped()
503
- self._tbl_version.drop_column(name)
511
+
512
+ if name not in self._tbl_version.cols_by_name:
513
+ raise excs.Error(f'Unknown column: {name}')
514
+ col = self._tbl_version.cols_by_name[name]
515
+
516
+ dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
517
+ if len(dependent_user_cols) > 0:
518
+ raise excs.Error(
519
+ f'Cannot drop column `{name}` because the following columns depend on it:\n'
520
+ f'{", ".join(c.name for c in dependent_user_cols)}'
521
+ )
522
+
523
+ # See if this column has a dependent store. We need to look through all stores in all
524
+ # (transitive) views of this table.
525
+ dependent_stores = [
526
+ (view, store)
527
+ for view in [self] + self._get_views(recursive=True)
528
+ for store in view._tbl_version.external_stores.values()
529
+ if col in store.get_local_columns()
530
+ ]
531
+ if len(dependent_stores) > 0:
532
+ dependent_store_names = [
533
+ store.name if view._get_id() == self._get_id() else f'{store.name} (in view `{view.name}`)'
534
+ for view, store in dependent_stores
535
+ ]
536
+ raise excs.Error(
537
+ f'Cannot drop column `{name}` because the following external stores depend on it:\n'
538
+ f'{", ".join(dependent_store_names)}'
539
+ )
540
+
541
+ self._tbl_version.drop_column(col)
504
542
 
505
543
  def rename_column(self, old_name: str, new_name: str) -> None:
506
544
  """Rename a column.
@@ -522,15 +560,15 @@ class Table(SchemaObject):
522
560
 
523
561
  def add_embedding_index(
524
562
  self, col_name: str, *, idx_name: Optional[str] = None,
525
- text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
563
+ string_embed: Optional[pixeltable.Function] = None, image_embed: Optional[pixeltable.Function] = None,
526
564
  metric: str = 'cosine'
527
565
  ) -> None:
528
566
  """Add an index to the table.
529
567
  Args:
530
568
  col_name: name of column to index
531
569
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
532
- text_embed: function to embed text; required if the column is a text column
533
- img_embed: function to embed images; required if the column is an image column
570
+ string_embed: function to embed text; required if the column is a text column
571
+ image_embed: function to embed images; required if the column is an image column
534
572
  metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
535
573
 
536
574
  Raises:
@@ -539,13 +577,13 @@ class Table(SchemaObject):
539
577
  Examples:
540
578
  Add an index to the ``img`` column:
541
579
 
542
- >>> tbl.add_embedding_index('img', img_embed=...)
580
+ >>> tbl.add_embedding_index('img', image_embed=...)
543
581
 
544
582
  Add another index to the ``img`` column, using the inner product as the distance metric,
545
- and with a specific name; ``text_embed`` is also specified in order to search with text:
583
+ and with a specific name; ``string_embed`` is also specified in order to search with text:
546
584
 
547
585
  >>> tbl.add_embedding_index(
548
- 'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
586
+ 'img', idx_name='clip_idx', image_embed=..., string_embed=..., metric='ip')
549
587
  """
550
588
  if self._tbl_version_path.is_snapshot():
551
589
  raise excs.Error('Cannot add an index to a snapshot')
@@ -557,7 +595,7 @@ class Table(SchemaObject):
557
595
  raise excs.Error(f'Duplicate index name: {idx_name}')
558
596
  from pixeltable.index import EmbeddingIndex
559
597
  # create the EmbeddingIndex instance to verify args
560
- idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
598
+ idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
561
599
  status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
562
600
  # TODO: how to deal with exceptions here? drop the index and raise?
563
601
 
@@ -604,26 +642,26 @@ class Table(SchemaObject):
604
642
  raise excs.Error('Cannot drop an index from a snapshot')
605
643
  self._check_is_dropped()
606
644
  if (column_name is None) == (idx_name is None):
607
- raise excs.Error('Exactly one of column_name or idx_name must be provided')
645
+ raise excs.Error("Exactly one of 'column_name' or 'idx_name' must be provided")
608
646
 
609
647
  if idx_name is not None:
610
648
  if idx_name not in self._tbl_version.idxs_by_name:
611
- raise excs.Error(f'Index {idx_name} does not exist')
649
+ raise excs.Error(f'Index {idx_name!r} does not exist')
612
650
  idx_id = self._tbl_version.idxs_by_name[idx_name].id
613
651
  else:
614
652
  col = self._tbl_version_path.get_column(column_name, include_bases=True)
615
653
  if col is None:
616
- raise excs.Error(f'Column {column_name} unknown')
654
+ raise excs.Error(f'Column {column_name!r} unknown')
617
655
  if col.tbl.id != self._tbl_version.id:
618
656
  raise excs.Error(
619
- f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
657
+ f'Column {column_name!r}: cannot drop index from column that belongs to base ({col.tbl.name}!r)')
620
658
  idx_info = [info for info in self._tbl_version.idxs_by_name.values() if info.col.id == col.id]
621
659
  if _idx_class is not None:
622
660
  idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
623
661
  if len(idx_info) == 0:
624
- raise excs.Error(f'Column {column_name} does not have an index')
662
+ raise excs.Error(f'Column {column_name!r} does not have an index')
625
663
  if len(idx_info) > 1:
626
- raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
664
+ raise excs.Error(f"Column {column_name!r} has multiple indices; specify 'idx_name' instead")
627
665
  idx_id = idx_info[0].id
628
666
  self._tbl_version.drop_index(idx_id)
629
667
 
@@ -823,13 +861,13 @@ class Table(SchemaObject):
823
861
  Links the specified `ExternalStore` to this table.
824
862
  """
825
863
  if self._tbl_version.is_snapshot:
826
- raise excs.Error(f'Table `{self.get_name()}` is a snapshot, so it cannot be linked to an external store.')
864
+ raise excs.Error(f'Table `{self.name}` is a snapshot, so it cannot be linked to an external store.')
827
865
  self._check_is_dropped()
828
866
  if store.name in self.external_stores:
829
- raise excs.Error(f'Table `{self.get_name()}` already has an external store with that name: {store.name}')
830
- _logger.info(f'Linking external store `{store.name}` to table `{self.get_name()}`')
867
+ raise excs.Error(f'Table `{self.name}` already has an external store with that name: {store.name}')
868
+ _logger.info(f'Linking external store `{store.name}` to table `{self.name}`')
831
869
  self._tbl_version.link_external_store(store)
832
- print(f'Linked external store `{store.name}` to table `{self.get_name()}`.')
870
+ print(f'Linked external store `{store.name}` to table `{self.name}`.')
833
871
 
834
872
  def unlink_external_stores(
835
873
  self,
@@ -861,11 +899,11 @@ class Table(SchemaObject):
861
899
  if not ignore_errors:
862
900
  for store in stores:
863
901
  if store not in all_stores:
864
- raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
902
+ raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
865
903
 
866
904
  for store in stores:
867
905
  self._tbl_version.unlink_external_store(store, delete_external_data=delete_external_data)
868
- print(f'Unlinked external store from table `{self.get_name()}`: {store}')
906
+ print(f'Unlinked external store from table `{self.name}`: {store}')
869
907
 
870
908
  def sync(
871
909
  self,
@@ -893,7 +931,7 @@ class Table(SchemaObject):
893
931
 
894
932
  for store in stores:
895
933
  if store not in all_stores:
896
- raise excs.Error(f'Table `{self.get_name()}` has no external store with that name: {store}')
934
+ raise excs.Error(f'Table `{self.name}` has no external store with that name: {store}')
897
935
 
898
936
  from pixeltable.io import SyncStatus
899
937
 
@@ -540,39 +540,12 @@ class TableVersion:
540
540
  num_rows=row_count, num_computed_values=row_count, num_excs=num_excs,
541
541
  cols_with_excs=[f'{col.tbl.name}.{col.name}'for col in cols_with_excs if col.name is not None])
542
542
 
543
- def drop_column(self, name: str) -> None:
543
+ def drop_column(self, col: Column) -> None:
544
544
  """Drop a column from the table.
545
545
  """
546
546
  from pixeltable.catalog import Catalog
547
547
 
548
548
  assert not self.is_snapshot
549
- if name not in self.cols_by_name:
550
- raise excs.Error(f'Unknown column: {name}')
551
- col = self.cols_by_name[name]
552
- dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
553
- if len(dependent_user_cols) > 0:
554
- raise excs.Error(
555
- f'Cannot drop column `{name}` because the following columns depend on it:\n'
556
- f'{", ".join(c.name for c in dependent_user_cols)}'
557
- )
558
- # See if this column has a dependent store. We need to look through all stores in all
559
- # (transitive) views of this table.
560
- transitive_views = Catalog.get().tbls[self.id].get_views(recursive=True)
561
- dependent_stores = [
562
- (view, store)
563
- for view in transitive_views
564
- for store in view._tbl_version.external_stores.values()
565
- if col in store.get_local_columns()
566
- ]
567
- if len(dependent_stores) > 0:
568
- dependent_store_names = [
569
- store.name if view._get_id() == self.id else f'{store.name} (in view `{view.get_name()}`)'
570
- for view, store in dependent_stores
571
- ]
572
- raise excs.Error(
573
- f'Cannot drop column `{name}` because the following external stores depend on it:\n'
574
- f'{", ".join(dependent_store_names)}'
575
- )
576
549
 
577
550
  # we're creating a new schema version
578
551
  self.version += 1
@@ -596,7 +569,7 @@ class TableVersion:
596
569
  del self.idxs_by_name[idx_name]
597
570
  self._drop_columns(dropped_cols)
598
571
  self._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
599
- _logger.info(f'Dropped column {name} from table {self.name}, new version: {self.version}')
572
+ _logger.info(f'Dropped column {col.name} from table {self.name}, new version: {self.version}')
600
573
 
601
574
  def _drop_columns(self, cols: Iterable[Column]) -> None:
602
575
  """Mark columns as dropped"""
@@ -558,7 +558,7 @@ class DataFrame:
558
558
  # we need to make sure that the grouping table is a base of self.tbl
559
559
  base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
560
560
  if base is None or base.id == self.tbl.tbl_id():
561
- raise excs.Error(f'group_by(): {item.get_name()} is not a base table of {self.tbl.tbl_name()}')
561
+ raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
562
562
  grouping_tbl = item._tbl_version_path.tbl_version
563
563
  break
564
564
  if not isinstance(item, exprs.Expr):
@@ -63,14 +63,9 @@ class ColumnRef(Expr):
63
63
 
64
64
  return super().__getattr__(name)
65
65
 
66
- def similarity(self, other: Any) -> Expr:
67
- # if isinstance(other, Expr):
68
- # raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
69
- item = Expr.from_object(other)
70
- if item is None or not(item.col_type.is_string_type() or item.col_type.is_image_type()):
71
- raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(other)}')
66
+ def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
72
67
  from .similarity_expr import SimilarityExpr
73
- return SimilarityExpr(self, item)
68
+ return SimilarityExpr(self, item, idx_name=idx)
74
69
 
75
70
  def default_column_name(self) -> Optional[str]:
76
71
  return str(self)
@@ -1,4 +1,4 @@
1
- from typing import Optional, List
1
+ from typing import Optional, List, Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
  import PIL.Image
@@ -14,33 +14,44 @@ from .row_builder import RowBuilder
14
14
 
15
15
  class SimilarityExpr(Expr):
16
16
 
17
- def __init__(self, col_ref: ColumnRef, item: Expr):
17
+ def __init__(self, col_ref: ColumnRef, item: Any, idx_name: Optional[str] = None):
18
18
  super().__init__(ts.FloatType())
19
- self.components = [col_ref, item]
19
+ item_expr = Expr.from_object(item)
20
+ if item_expr is None or not(item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()):
21
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(item)}')
22
+ assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
23
+
24
+ self.components = [col_ref, item_expr]
20
25
  self.id = self._create_id()
21
- assert item.col_type.is_string_type() or item.col_type.is_image_type()
22
26
 
23
27
  # determine index to use
24
28
  idx_info = col_ref.col.get_idx_info()
25
29
  import pixeltable.index as index
26
- embedding_idx_info = [info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)]
30
+ embedding_idx_info = {
31
+ info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
32
+ }
27
33
  if len(embedding_idx_info) == 0:
28
- raise excs.Error(f'No index found for column {col_ref.col}')
34
+ raise excs.Error(f'No index found for column {col_ref.col!r}')
35
+ if idx_name is not None and idx_name not in embedding_idx_info:
36
+ raise excs.Error(f'Index {idx_name!r} not found for column {col_ref.col.name!r}')
29
37
  if len(embedding_idx_info) > 1:
30
- raise excs.Error(
31
- f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
32
- f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
33
- self.idx_info = embedding_idx_info[0]
38
+ if idx_name is None:
39
+ raise excs.Error(
40
+ f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
41
+ f'`{col_ref.col.name}.similarity(..., idx=<name>)`')
42
+ self.idx_info = embedding_idx_info[idx_name]
43
+ else:
44
+ self.idx_info = next(iter(embedding_idx_info.values()))
34
45
  idx = self.idx_info.idx
35
46
 
36
- if item.col_type.is_string_type() and idx.txt_embed is None:
47
+ if item_expr.col_type.is_string_type() and idx.string_embed is None:
37
48
  raise excs.Error(
38
- f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
39
- f'text_embed parameter and does not support text queries')
40
- if item.col_type.is_image_type() and idx.img_embed is None:
49
+ f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
50
+ f"'string_embed' parameter and does not support string queries")
51
+ if item_expr.col_type.is_image_type() and idx.image_embed is None:
41
52
  raise excs.Error(
42
- f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
43
- f'img_embed parameter and does not support image queries')
53
+ f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
54
+ f"'image_embed' parameter and does not support image queries")
44
55
 
45
56
  def __str__(self) -> str:
46
57
  return f'{self.components[0]}.similarity({self.components[1]})'
@@ -141,7 +141,7 @@ def chat_completions(
141
141
 
142
142
 
143
143
  @pxt.udf
144
- def vision(prompt: str, image: PIL.Image.Image, *, model: str = 'gpt-4-vision-preview') -> str:
144
+ def vision(prompt: str, image: PIL.Image.Image, *, model: str) -> str:
145
145
  # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
146
146
  bytes_arr = io.BytesIO()
147
147
  image.save(bytes_arr, format='png')
@@ -234,7 +234,7 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
234
234
 
235
235
  Args:
236
236
  path: Path to the table.
237
- force: Whether to drop the table even if it has unsaved changes.
237
+ force: If `True`, will also drop all views or sub-views of this table.
238
238
  ignore_errors: Whether to ignore errors if the table does not exist.
239
239
 
240
240
  Raises:
@@ -243,21 +243,27 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
243
243
  Examples:
244
244
  >>> cl.drop_table('my_table')
245
245
  """
246
+ cat = Catalog.get()
246
247
  path_obj = catalog.Path(path)
247
248
  try:
248
- Catalog.get().paths.check_is_valid(path_obj, expected=catalog.Table)
249
+ cat.paths.check_is_valid(path_obj, expected=catalog.Table)
249
250
  except Exception as e:
250
- if ignore_errors:
251
+ if ignore_errors or force:
251
252
  _logger.info(f'Skipped table `{path}` (does not exist).')
252
253
  return
253
254
  else:
254
255
  raise e
255
- tbl = Catalog.get().paths[path_obj]
256
- if len(Catalog.get().tbl_dependents[tbl._id]) > 0:
257
- dependent_paths = [get_path(dep) for dep in Catalog.get().tbl_dependents[tbl._id]]
258
- raise excs.Error(f'Table {path} has dependents: {", ".join(dependent_paths)}')
256
+ tbl = cat.paths[path_obj]
257
+ assert isinstance(tbl, catalog.Table)
258
+ if len(cat.tbl_dependents[tbl._id]) > 0:
259
+ dependent_paths = [dep.path for dep in cat.tbl_dependents[tbl._id]]
260
+ if force:
261
+ for dependent_path in dependent_paths:
262
+ drop_table(dependent_path, force=True)
263
+ else:
264
+ raise excs.Error(f'Table {path} has dependents: {", ".join(dependent_paths)}')
259
265
  tbl._drop()
260
- del Catalog.get().paths[path_obj]
266
+ del cat.paths[path_obj]
261
267
  _logger.info(f'Dropped table `{path}`.')
262
268
 
263
269
 
@@ -291,7 +297,7 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
291
297
  return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
292
298
 
293
299
 
294
- def create_dir(path_str: str, ignore_errors: bool = False) -> None:
300
+ def create_dir(path_str: str, ignore_errors: bool = False) -> catalog.Dir:
295
301
  """Create a directory.
296
302
 
297
303
  Args:
@@ -319,10 +325,12 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> None:
319
325
  session.add(dir_record)
320
326
  session.flush()
321
327
  assert dir_record.id is not None
322
- Catalog.get().paths[path] = catalog.Dir(dir_record.id, parent._id, path.name)
328
+ dir = catalog.Dir(dir_record.id, parent._id, path.name)
329
+ Catalog.get().paths[path] = dir
323
330
  session.commit()
324
331
  _logger.info(f'Created directory `{path_str}`.')
325
332
  print(f'Created directory `{path_str}`.')
333
+ return dir
326
334
  except excs.Error as e:
327
335
  if ignore_errors:
328
336
  return
@@ -330,7 +338,7 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> None:
330
338
  raise e
331
339
 
332
340
 
333
- def rm_dir(path_str: str) -> None:
341
+ def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) -> None:
334
342
  """Remove a directory.
335
343
 
336
344
  Args:
@@ -340,31 +348,49 @@ def rm_dir(path_str: str) -> None:
340
348
  Error: If the path does not exist or does not designate a directory or if the directory is not empty.
341
349
 
342
350
  Examples:
343
- >>> cl.rm_dir('my_dir')
351
+ >>> cl.drop_dir('my_dir')
344
352
 
345
353
  Remove a subdirectory:
346
354
 
347
- >>> cl.rm_dir('my_dir.sub_dir')
355
+ >>> cl.drop_dir('my_dir.sub_dir')
348
356
  """
357
+ cat = Catalog.get()
349
358
  path = catalog.Path(path_str)
350
- Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
351
359
 
352
- # make sure it's empty
353
- if len(Catalog.get().paths.get_children(path, child_type=None, recursive=True)) > 0:
354
- raise excs.Error(f'Directory {path_str} is not empty')
355
- # TODO: figure out how to make force=True work in the presence of snapshots
356
- # # delete tables
357
- # for tbl_path in self.paths.get_children(path, child_type=MutableTable, recursive=True):
358
- # self.drop_table(str(tbl_path), force=True)
359
- # # rm subdirs
360
- # for dir_path in self.paths.get_children(path, child_type=Dir, recursive=False):
361
- # self.rm_dir(str(dir_path), force=True)
360
+ try:
361
+ cat.paths.check_is_valid(path, expected=catalog.Dir)
362
+ except Exception as e:
363
+ if ignore_errors or force:
364
+ _logger.info(f'Skipped directory `{path}` (does not exist).')
365
+ return
366
+ else:
367
+ raise e
368
+
369
+ children = cat.paths.get_children(path, child_type=None, recursive=True)
370
+
371
+ if len(children) > 0 and not force:
372
+ raise excs.Error(f'Directory `{path_str}` is not empty.')
373
+
374
+ for child in children:
375
+ assert isinstance(child, catalog.Path)
376
+ # We need to check that the child is still in `cat.paths`, since it is possible it was
377
+ # already deleted as a dependent of a preceding child in the iteration.
378
+ try:
379
+ obj = cat.paths[child]
380
+ except excs.Error:
381
+ continue
382
+ if isinstance(obj, catalog.Dir):
383
+ drop_dir(str(child), force=True)
384
+ else:
385
+ assert isinstance(obj, catalog.Table)
386
+ assert not obj._is_dropped # else it should have been removed from `cat.paths` already
387
+ drop_table(str(child), force=True)
362
388
 
363
389
  with Env.get().engine.begin() as conn:
364
390
  dir = Catalog.get().paths[path]
365
391
  conn.execute(sql.delete(schema.Dir.__table__).where(schema.Dir.id == dir._id))
366
392
  del Catalog.get().paths[path]
367
- _logger.info(f'Removed directory {path_str}')
393
+ _logger.info(f'Removed directory `{path_str}`.')
368
394
 
369
395
 
370
396
  def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
@@ -416,28 +442,6 @@ def list_functions() -> pd.DataFrame:
416
442
  return pd_df.hide(axis='index')
417
443
 
418
444
 
419
- def get_path(schema_obj: catalog.SchemaObject) -> str:
420
- """Returns the path to a SchemaObject.
421
-
422
- Args:
423
- schema_obj: SchemaObject to get the path for.
424
-
425
- Returns:
426
- Path to the SchemaObject.
427
- """
428
- path_elements: list[str] = []
429
- dir_id = schema_obj._dir_id
430
- while dir_id is not None:
431
- dir = Catalog.get().paths.get_schema_obj(dir_id)
432
- if dir._dir_id is None:
433
- # this is the root dir with name '', which we don't want to include in the path
434
- break
435
- path_elements.insert(0, dir._name)
436
- dir_id = dir._dir_id
437
- path_elements.append(schema_obj._name)
438
- return '.'.join(path_elements)
439
-
440
-
441
445
  def configure_logging(
442
446
  *,
443
447
  to_stdout: Optional[bool] = None,