pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/globals.py CHANGED
@@ -3,15 +3,18 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
6
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, NamedTuple, Union
7
7
 
8
8
  import pandas as pd
9
+ import pydantic
9
10
  from pandas.io.formats.style import Styler
10
11
 
11
- from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
12
+ from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share, type_system as ts
12
13
  from pixeltable.catalog import Catalog, TableVersionPath
13
14
  from pixeltable.catalog.insertable_table import OnErrorParameter
15
+ from pixeltable.config import Config
14
16
  from pixeltable.env import Env
17
+ from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
15
18
  from pixeltable.iterators import ComponentIterator
16
19
 
17
20
  if TYPE_CHECKING:
@@ -22,46 +25,62 @@ if TYPE_CHECKING:
22
25
  str,
23
26
  os.PathLike,
24
27
  Path, # OS paths, filenames, URLs
25
- Iterator[dict[str, Any]], # iterator producing dictionaries of values
26
- RowData, # list of dictionaries
28
+ Iterable[dict[str, Any]], # dictionaries of values
29
+ Iterable[pydantic.BaseModel], # Pydantic model instances
27
30
  DataFrame, # Pixeltable DataFrame
28
31
  pd.DataFrame, # pandas DataFrame
29
- 'datasets.Dataset',
30
- 'datasets.DatasetDict', # Huggingface datasets
32
+ datasets.Dataset,
33
+ datasets.DatasetDict, # Huggingface datasets
31
34
  ]
32
35
 
33
36
 
34
37
  _logger = logging.getLogger('pixeltable')
35
38
 
36
39
 
37
- def init() -> None:
40
+ def init(config_overrides: dict[str, Any] | None = None) -> None:
38
41
  """Initializes the Pixeltable environment."""
42
+ if config_overrides is None:
43
+ config_overrides = {}
44
+ Config.init(config_overrides)
39
45
  _ = Catalog.get()
40
46
 
41
47
 
42
48
  def create_table(
43
- path_str: str,
44
- schema: Optional[dict[str, Any]] = None,
49
+ path: str,
50
+ schema: dict[str, Any] | None = None,
45
51
  *,
46
- source: Optional[TableDataSource] = None,
47
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
48
- schema_overrides: Optional[dict[str, Any]] = None,
52
+ source: TableDataSource | None = None,
53
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
54
+ schema_overrides: dict[str, Any] | None = None,
55
+ create_default_idxs: bool = True,
49
56
  on_error: Literal['abort', 'ignore'] = 'abort',
50
- primary_key: Optional[Union[str, list[str]]] = None,
57
+ primary_key: str | list[str] | None = None,
51
58
  num_retained_versions: int = 10,
52
59
  comment: str = '',
53
60
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
54
61
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
55
- extra_args: Optional[dict[str, Any]] = None, # Additional arguments to data source provider
62
+ extra_args: dict[str, Any] | None = None, # Additional arguments to data source provider
56
63
  ) -> catalog.Table:
57
- """Create a new base table.
64
+ """Create a new base table. Exactly one of `schema` or `source` must be provided.
65
+
66
+ If a `schema` is provided, then an empty table will be created with the specified schema.
67
+
68
+ If a `source` is provided, then Pixeltable will attempt to infer a data source format and table schema from the
69
+ contents of the specified data, and the data will be imported from the specified source into the new table. The
70
+ source format and/or schema can be specified directly via the `source_format` and `schema_overrides` parameters.
58
71
 
59
72
  Args:
60
- path_str: Path to the table.
61
- schema: A dictionary that maps column names to column types
62
- source: A data source from which a table schema can be inferred and data imported
63
- source_format: A hint to the format of the source data
64
- schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
73
+ path: Pixeltable path (qualified name) of the table, such as `'my_table'` or `'my_dir.my_subdir.my_table'`.
74
+ schema: Schema for the new table, mapping column names to Pixeltable types.
75
+ source: A data source (file, URL, DataFrame, or list of rows) to import from.
76
+ source_format: Must be used in conjunction with a `source`.
77
+ If specified, then the given format will be used to read the source data. (Otherwise,
78
+ Pixeltable will attempt to infer the format from the source data.)
79
+ schema_overrides: Must be used in conjunction with a `source`.
80
+ If specified, then columns in `schema_overrides` will be given the specified types.
81
+ (Pixeltable will attempt to infer the types of any columns not specified.)
82
+ create_default_idxs: If True, creates a B-tree index on every scalar and media column that is not computed,
83
+ except for boolean columns.
65
84
  on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
66
85
  invalid media file (such as a corrupt image) for one of the inserted rows.
67
86
 
@@ -77,14 +96,15 @@ def create_table(
77
96
 
78
97
  - `'on_read'`: validate media files at query time
79
98
  - `'on_write'`: validate media files during insert/update operations
80
- if_exists: Directive regarding how to handle if the path already exists.
81
- Must be one of the following:
99
+ if_exists: Determines the behavior if a table already exists at the specified path location.
82
100
 
83
101
  - `'error'`: raise an error
84
102
  - `'ignore'`: do nothing and return the existing table handle
85
- - `'replace'`: if the existing table has no views, drop and replace it with a new one
86
- - `'replace_force'`: drop the existing table and all its views, and create a new one
87
- extra_args: Additional arguments to pass to the source data provider
103
+ - `'replace'`: if the existing table has no views or snapshots, drop and replace it with a new one;
104
+ raise an error if the existing table has views or snapshots
105
+ - `'replace_force'`: drop the existing table and all its views and snapshots, and create a new one
106
+ extra_args: Must be used in conjunction with a `source`. If specified, then additional arguments will be
107
+ passed along to the source data provider.
88
108
 
89
109
  Returns:
90
110
  A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
@@ -110,7 +130,7 @@ def create_table(
110
130
  >>> tbl1 = pxt.get_table('orig_table')
111
131
  ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
112
132
 
113
- Create a table if does not already exist, otherwise get the existing table:
133
+ Create a table if it does not already exist, otherwise get the existing table:
114
134
 
115
135
  >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
116
136
 
@@ -122,27 +142,39 @@ def create_table(
122
142
 
123
143
  >>> tbl = pxt.create_table('my_table', source='data.csv')
124
144
  """
125
- from pixeltable.io.table_data_conduit import DFTableDataConduit, UnkTableDataConduit
145
+ from pixeltable.io.table_data_conduit import UnkTableDataConduit
126
146
  from pixeltable.io.utils import normalize_primary_key_parameter
127
147
 
128
148
  if (schema is None) == (source is None):
129
- raise excs.Error('Must provide either a `schema` or a `source`')
149
+ raise excs.Error('Either a `schema` or a `source` must be provided (but not both)')
130
150
 
131
151
  if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
132
152
  raise excs.Error('`schema` must be a non-empty dictionary')
133
153
 
134
- path_obj = catalog.Path(path_str)
154
+ path_obj = catalog.Path.parse(path)
135
155
  if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
136
156
  media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
137
- primary_key: Optional[list[str]] = normalize_primary_key_parameter(primary_key)
138
- table: catalog.Table = None
139
- tds = None
140
- data_source = None
157
+ primary_key: list[str] | None = normalize_primary_key_parameter(primary_key)
158
+ data_source: TableDataConduit | None = None
141
159
  if source is not None:
160
+ if isinstance(source, str) and source.strip().startswith('pxt://'):
161
+ raise excs.Error(
162
+ 'create_table(): Creating a table directly from a cloud URI is not supported.'
163
+ ' Please replicate the table locally first using `pxt.replicate()`:\n'
164
+ "replica_tbl = pxt.replicate('pxt://path/to/remote_table', 'local_replica_name')\n"
165
+ "pxt.create_table('new_table_name', source=replica_tbl)"
166
+ )
142
167
  tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
143
168
  tds.check_source_format()
144
169
  data_source = tds.specialize()
145
- data_source.src_schema_overrides = schema_overrides
170
+ src_schema_overrides: dict[str, ts.ColumnType] = {}
171
+ if schema_overrides is not None:
172
+ for col_name, py_type in schema_overrides.items():
173
+ col_type = ts.ColumnType.normalize_type(py_type, nullable_default=True, allow_builtin_types=False)
174
+ if col_type is None:
175
+ raise excs.Error(f'Invalid type for column {col_name!r} in `schema_overrides`: {py_type}')
176
+ src_schema_overrides[col_name] = col_type
177
+ data_source.src_schema_overrides = src_schema_overrides
146
178
  data_source.src_pk = primary_key
147
179
  data_source.infer_schema()
148
180
  schema = data_source.pxt_schema
@@ -156,35 +188,43 @@ def create_table(
156
188
  'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
157
189
  )
158
190
 
159
- table = Catalog.get().create_table(
191
+ tbl, was_created = Catalog.get().create_table(
160
192
  path_obj,
161
193
  schema,
162
- data_source.pxt_df if isinstance(data_source, DFTableDataConduit) else None,
163
194
  if_exists=if_exists_,
164
195
  primary_key=primary_key,
165
196
  comment=comment,
166
197
  media_validation=media_validation_,
167
198
  num_retained_versions=num_retained_versions,
199
+ create_default_idxs=create_default_idxs,
168
200
  )
169
- if data_source is not None and not is_direct_df:
201
+
202
+ # TODO: combine data loading with table creation into a single transaction
203
+ if was_created:
170
204
  fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
171
- table.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
205
+ if isinstance(data_source, DFTableDataConduit):
206
+ df = data_source.pxt_df
207
+ with Catalog.get().begin_xact(tbl=tbl._tbl_version_path, for_write=True, lock_mutable_tree=True):
208
+ tbl._tbl_version.get().insert(None, df, fail_on_exception=fail_on_exception)
209
+ elif data_source is not None and not is_direct_df:
210
+ tbl.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
172
211
 
173
- return table
212
+ return tbl
174
213
 
175
214
 
176
215
  def create_view(
177
216
  path: str,
178
- base: Union[catalog.Table, DataFrame],
217
+ base: catalog.Table | DataFrame,
179
218
  *,
180
- additional_columns: Optional[dict[str, Any]] = None,
219
+ additional_columns: dict[str, Any] | None = None,
181
220
  is_snapshot: bool = False,
182
- iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
221
+ create_default_idxs: bool = False,
222
+ iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
183
223
  num_retained_versions: int = 10,
184
224
  comment: str = '',
185
225
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
186
226
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
187
- ) -> Optional[catalog.Table]:
227
+ ) -> catalog.Table | None:
188
228
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
189
229
 
190
230
  Args:
@@ -197,6 +237,8 @@ def create_view(
197
237
  [`create_table`][pixeltable.create_table].
198
238
  is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
199
239
  [`create_snapshot`][pixeltable.create_snapshot].
240
+ create_default_idxs: Whether to create default indexes on the view's columns (the base's columns are excluded).
241
+ Cannot be `True` for snapshots.
200
242
  iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
201
243
  the base table.
202
244
  num_retained_versions: Number of versions of the view to retain.
@@ -244,16 +286,16 @@ def create_view(
244
286
  >>> tbl = pxt.get_table('my_table')
245
287
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
246
288
  """
289
+ if is_snapshot and create_default_idxs is True:
290
+ raise excs.Error('Cannot create default indexes on a snapshot')
247
291
  tbl_version_path: TableVersionPath
248
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
249
- where: Optional[exprs.Expr] = None
292
+ select_list: list[tuple[exprs.Expr, str | None]] | None = None
293
+ where: exprs.Expr | None = None
250
294
  if isinstance(base, catalog.Table):
251
295
  tbl_version_path = base._tbl_version_path
252
296
  sample_clause = None
253
297
  elif isinstance(base, DataFrame):
254
- base._validate_mutable('create_view', allow_select=True)
255
- if len(base._from_clause.tbls) > 1:
256
- raise excs.Error('Cannot create a view of a join')
298
+ base._validate_mutable_op_sequence('create_view', allow_select=True)
257
299
  tbl_version_path = base._from_clause.tbls[0]
258
300
  where = base.where_clause
259
301
  sample_clause = base.sample_clause
@@ -264,7 +306,7 @@ def create_view(
264
306
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
265
307
  assert isinstance(base, (catalog.Table, DataFrame))
266
308
 
267
- path_obj = catalog.Path(path)
309
+ path_obj = catalog.Path.parse(path)
268
310
  if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
269
311
  media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
270
312
 
@@ -276,7 +318,7 @@ def create_view(
276
318
  if col_name in [c.name for c in tbl_version_path.columns()]:
277
319
  raise excs.Error(
278
320
  f'Column {col_name!r} already exists in the base table '
279
- f'{tbl_version_path.get_column(col_name).tbl.name}.'
321
+ f'{tbl_version_path.get_column(col_name).get_tbl().name}.'
280
322
  )
281
323
 
282
324
  return Catalog.get().create_view(
@@ -287,6 +329,7 @@ def create_view(
287
329
  sample_clause=sample_clause,
288
330
  additional_columns=additional_columns,
289
331
  is_snapshot=is_snapshot,
332
+ create_default_idxs=create_default_idxs,
290
333
  iterator=iterator,
291
334
  num_retained_versions=num_retained_versions,
292
335
  comment=comment,
@@ -297,15 +340,15 @@ def create_view(
297
340
 
298
341
  def create_snapshot(
299
342
  path_str: str,
300
- base: Union[catalog.Table, DataFrame],
343
+ base: catalog.Table | DataFrame,
301
344
  *,
302
- additional_columns: Optional[dict[str, Any]] = None,
303
- iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
345
+ additional_columns: dict[str, Any] | None = None,
346
+ iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
304
347
  num_retained_versions: int = 10,
305
348
  comment: str = '',
306
349
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
307
350
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
308
- ) -> Optional[catalog.Table]:
351
+ ) -> catalog.Table | None:
309
352
  """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
310
353
 
311
354
  Args:
@@ -376,36 +419,67 @@ def create_snapshot(
376
419
  )
377
420
 
378
421
 
379
- def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
422
+ def publish(
423
+ source: str | catalog.Table,
424
+ destination_uri: str,
425
+ bucket_name: str | None = None,
426
+ access: Literal['public', 'private'] = 'private',
427
+ ) -> None:
380
428
  """
381
- Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
382
- replica of a remote table. A given table can have at most one replica per Pixeltable instance.
429
+ Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
430
+ URI per Pixeltable cloud database.
383
431
 
384
432
  Args:
385
- destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
386
- a remote URI such as `'pxt://username/mydir.my_table'`.
387
- source: Path to the source table, or (if the source table is a local table) a handle to the source table.
433
+ source: Path or table handle of the local table to be published.
434
+ destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
435
+ bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
436
+ Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
437
+ database will be used.
438
+ access: Access control for the replica.
439
+
440
+ - `'public'`: Anyone can access this replica.
441
+ - `'private'`: Only the host organization can access.
388
442
  """
389
- remote_dest = destination.startswith('pxt://')
390
- remote_source = isinstance(source, str) and source.startswith('pxt://')
391
- if remote_dest == remote_source:
392
- raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
393
-
394
- if remote_dest:
395
- if isinstance(source, str):
396
- source = get_table(source)
397
- share.push_replica(destination, source)
398
- return None
399
- else:
400
- assert isinstance(source, str)
401
- return share.pull_replica(destination, source)
443
+ if not destination_uri.startswith('pxt://'):
444
+ raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
445
+
446
+ if isinstance(source, str):
447
+ source = get_table(source)
448
+
449
+ share.push_replica(destination_uri, source, bucket_name, access)
450
+
451
+
452
+ def replicate(remote_uri: str, local_path: str) -> catalog.Table:
453
+ """
454
+ Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
455
+ way that preserves the table structure of the original source data. Once replicated, the local table can be
456
+ queried offline just as any other Pixeltable table.
457
+
458
+ Args:
459
+ remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'` or
460
+ `'pxt://org_name/my_dir/my_table:5'` (with version 5).
461
+ local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
462
+ the same or different from the cloud table name.
463
+
464
+ Returns:
465
+ A handle to the newly created local replica table.
466
+ """
467
+ if not remote_uri.startswith('pxt://'):
468
+ raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
469
+
470
+ return share.pull_replica(local_path, remote_uri)
402
471
 
403
472
 
404
- def get_table(path: str) -> catalog.Table:
473
+ def get_table(path: str, if_not_exists: Literal['error', 'ignore'] = 'error') -> catalog.Table | None:
405
474
  """Get a handle to an existing table, view, or snapshot.
406
475
 
407
476
  Args:
408
477
  path: Path to the table.
478
+ if_not_exists: Directive regarding how to handle if the path does not exist.
479
+ Must be one of the following:
480
+
481
+ - `'error'`: raise an error
482
+ - `'ignore'`: do nothing and return `None`
409
483
 
410
484
  Returns:
411
485
  A handle to the [`Table`][pixeltable.Table].
@@ -425,20 +499,39 @@ def get_table(path: str) -> catalog.Table:
425
499
  Handles to views and snapshots are retrieved in the same way:
426
500
 
427
501
  >>> tbl = pxt.get_table('my_snapshot')
502
+
503
+ Get a handle to a specific version of a table:
504
+
505
+ >>> tbl = pxt.get_table('my_table:722')
428
506
  """
429
- path_obj = catalog.Path(path)
430
- tbl = Catalog.get().get_table(path_obj)
431
- tv = tbl._tbl_version.get()
432
- _logger.debug(f'get_table(): tbl={tv.id}:{tv.effective_version} sa_tbl={id(tv.store_tbl.sa_tbl):x} tv={id(tv):x}')
507
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
508
+ path_obj = catalog.Path.parse(path, allow_versioned_path=True)
509
+ tbl = Catalog.get().get_table(path_obj, if_not_exists_)
433
510
  return tbl
434
511
 
435
512
 
436
- def move(path: str, new_path: str) -> None:
513
+ def move(
514
+ path: str,
515
+ new_path: str,
516
+ *,
517
+ if_exists: Literal['error', 'ignore'] = 'error',
518
+ if_not_exists: Literal['error', 'ignore'] = 'error',
519
+ ) -> None:
437
520
  """Move a schema object to a new directory and/or rename a schema object.
438
521
 
439
522
  Args:
440
523
  path: absolute path to the existing schema object.
441
524
  new_path: absolute new path for the schema object.
525
+ if_exists: Directive regarding how to handle if a schema object already exists at the new path.
526
+ Must be one of the following:
527
+
528
+ - `'error'`: raise an error
529
+ - `'ignore'`: do nothing and return
530
+ if_not_exists: Directive regarding how to handle if the source path does not exist.
531
+ Must be one of the following:
532
+
533
+ - `'error'`: raise an error
534
+ - `'ignore'`: do nothing and return
442
535
 
443
536
  Raises:
444
537
  Error: If path does not exist or new_path already exists.
@@ -452,22 +545,26 @@ def move(path: str, new_path: str) -> None:
452
545
 
453
546
  >>>> pxt.move('dir1.my_table', 'dir1.new_name')
454
547
  """
548
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
549
+ if if_exists_ not in (catalog.IfExistsParam.ERROR, catalog.IfExistsParam.IGNORE):
550
+ raise excs.Error("`if_exists` must be one of 'error' or 'ignore'")
551
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
455
552
  if path == new_path:
456
553
  raise excs.Error('move(): source and destination cannot be identical')
457
- path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
554
+ path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
458
555
  if path_obj.is_ancestor(new_path_obj):
459
556
  raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
460
- cat = Catalog.get()
461
- cat.move(path_obj, new_path_obj)
557
+ Catalog.get().move(path_obj, new_path_obj, if_exists_, if_not_exists_)
462
558
 
463
559
 
464
560
  def drop_table(
465
- table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
561
+ table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
466
562
  ) -> None:
467
- """Drop a table, view, or snapshot.
563
+ """Drop a table, view, snapshot, or replica.
468
564
 
469
565
  Args:
470
- table: Fully qualified name, or handle, of the table to be dropped.
566
+ table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
567
+ be deleted.
471
568
  force: If `True`, will also drop all views and sub-views of this table.
472
569
  if_not_exists: Directive regarding how to handle if the path does not exist.
473
570
  Must be one of the following:
@@ -507,9 +604,69 @@ def drop_table(
507
604
  assert isinstance(table, str)
508
605
  tbl_path = table
509
606
 
510
- path_obj = catalog.Path(tbl_path)
511
607
  if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
512
- Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
608
+
609
+ if tbl_path.startswith('pxt://'):
610
+ # Remote table
611
+ if force:
612
+ raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
613
+ # TODO: Handle if_not_exists properly
614
+ share.delete_replica(tbl_path)
615
+ else:
616
+ # Local table
617
+ path_obj = catalog.Path.parse(tbl_path)
618
+ Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
619
+
620
+
621
+ def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
622
+ """Get the contents of a Pixeltable directory.
623
+
624
+ Args:
625
+ dir_path: Path to the directory. Defaults to the root directory.
626
+ recursive: If `False`, returns only those tables and directories that are directly contained in specified
627
+ directory; if `True`, returns all tables and directories that are descendants of the specified directory,
628
+ recursively.
629
+
630
+ Returns:
631
+ A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
632
+
633
+ Raises:
634
+ Error: If the path does not exist or does not designate a directory.
635
+
636
+ Examples:
637
+ Get contents of top-level directory:
638
+
639
+ >>> pxt.get_dir_contents()
640
+
641
+ Get contents of 'dir1':
642
+
643
+ >>> pxt.get_dir_contents('dir1')
644
+ """
645
+ path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
646
+ catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
647
+ dirs: list[str] = []
648
+ tables: list[str] = []
649
+ _assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
650
+ dirs.sort()
651
+ tables.sort()
652
+ return DirContents(dirs, tables)
653
+
654
+
655
+ def _assemble_dir_contents(
656
+ dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
657
+ ) -> None:
658
+ for name, entry in catalog_entries.items():
659
+ if name.startswith('_'):
660
+ continue # Skip system paths
661
+ path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
662
+ if entry.dir is not None:
663
+ dirs.append(path)
664
+ if entry.dir_entries is not None:
665
+ _assemble_dir_contents(path, entry.dir_entries, dirs, tables)
666
+ else:
667
+ assert entry.table is not None
668
+ assert not entry.dir_entries
669
+ tables.append(path)
513
670
 
514
671
 
515
672
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
@@ -535,15 +692,18 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
535
692
 
536
693
  >>> pxt.list_tables('dir1')
537
694
  """
538
- path_obj = catalog.Path(dir_path, empty_is_valid=True) # validate format
539
- cat = Catalog.get()
540
- contents = cat.get_dir_contents(path_obj, recursive=recursive)
695
+ return _list_tables(dir_path, recursive=recursive, allow_system_paths=False)
696
+
697
+
698
+ def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
699
+ path_obj = catalog.Path.parse(dir_path, allow_empty_path=True, allow_system_path=allow_system_paths)
700
+ contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
541
701
  return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
542
702
 
543
703
 
544
704
  def create_dir(
545
- path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
546
- ) -> Optional[catalog.Dir]:
705
+ path: str, *, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
706
+ ) -> catalog.Dir | None:
547
707
  """Create a directory.
548
708
 
549
709
  Args:
@@ -588,7 +748,7 @@ def create_dir(
588
748
 
589
749
  >>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
590
750
  """
591
- path_obj = catalog.Path(path)
751
+ path_obj = catalog.Path.parse(path)
592
752
  if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
593
753
  return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
594
754
 
@@ -630,15 +790,75 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
630
790
 
631
791
  >>> pxt.drop_dir('my_dir', force=True)
632
792
  """
633
- path_obj = catalog.Path(path) # validate format
793
+ path_obj = catalog.Path.parse(path) # validate format
634
794
  if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
635
795
  Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
636
796
 
637
797
 
798
+ def ls(path: str = '') -> pd.DataFrame:
799
+ """
800
+ List the contents of a Pixeltable directory.
801
+
802
+ This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
803
+ including various attributes such as version and base table, as appropriate.
804
+
805
+ To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
806
+ instead.
807
+ """
808
+ from pixeltable.catalog import retry_loop
809
+ from pixeltable.metadata import schema
810
+
811
+ cat = Catalog.get()
812
+ path_obj = catalog.Path.parse(path, allow_empty_path=True)
813
+ dir_entries = cat.get_dir_contents(path_obj)
814
+
815
+ @retry_loop(for_write=False)
816
+ def op() -> list[list[str]]:
817
+ rows: list[list[str]] = []
818
+ for name, entry in dir_entries.items():
819
+ if name.startswith('_'):
820
+ continue
821
+ if entry.dir is not None:
822
+ kind = 'dir'
823
+ version = ''
824
+ base = ''
825
+ else:
826
+ assert entry.table is not None
827
+ assert isinstance(entry.table, schema.Table)
828
+ tbl = cat.get_table_by_id(entry.table.id)
829
+ md = tbl.get_metadata()
830
+ base = md['base'] or ''
831
+ if base.startswith('_'):
832
+ base = '<anonymous base table>'
833
+ if md['is_replica']:
834
+ kind = 'replica'
835
+ elif md['is_snapshot']:
836
+ kind = 'snapshot'
837
+ elif md['is_view']:
838
+ kind = 'view'
839
+ else:
840
+ kind = 'table'
841
+ version = '' if kind == 'snapshot' else str(md['version'])
842
+ rows.append([name, kind, version, base])
843
+ return rows
844
+
845
+ rows = op()
846
+
847
+ rows = sorted(rows, key=lambda x: x[0])
848
+ df = pd.DataFrame(
849
+ {
850
+ 'Name': [row[0] for row in rows],
851
+ 'Kind': [row[1] for row in rows],
852
+ 'Version': [row[2] for row in rows],
853
+ 'Base': [row[3] for row in rows],
854
+ },
855
+ index=([''] * len(rows)),
856
+ )
857
+ return df
858
+
859
+
638
860
  def _extract_paths(
639
- dir_entries: dict[str, Catalog.DirEntry],
640
- parent: catalog.Path,
641
- entry_type: Optional[type[catalog.SchemaObject]] = None,
861
+ dir_entries: dict[str, Catalog.DirEntry], parent: catalog.Path, entry_type: type[catalog.SchemaObject] | None = None
642
862
  ) -> list[catalog.Path]:
643
863
  """Convert nested dir_entries structure to a flattened list of paths."""
644
864
  matches: list[str]
@@ -676,7 +896,7 @@ def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
676
896
  >>> cl.list_dirs('my_dir', recursive=True)
677
897
  ['my_dir', 'my_dir.sub_dir1']
678
898
  """
679
- path_obj = catalog.Path(path, empty_is_valid=True) # validate format
899
+ path_obj = catalog.Path.parse(path, allow_empty_path=True) # validate format
680
900
  cat = Catalog.get()
681
901
  contents = cat.get_dir_contents(path_obj, recursive=recursive)
682
902
  return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
@@ -711,7 +931,7 @@ def list_functions() -> Styler:
711
931
  return pd_df.hide(axis='index')
712
932
 
713
933
 
714
- def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
934
+ def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
715
935
  """
716
936
  Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
717
937
  LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
@@ -748,7 +968,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
748
968
  return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
749
969
 
750
970
 
751
- def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
971
+ def tool(fn: func.Function, name: str | None = None, description: str | None = None) -> func.tools.Tool:
752
972
  """
753
973
  Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
754
974
  [pxt.tools()][pixeltable.tools] for more details.
@@ -769,11 +989,7 @@ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[st
769
989
 
770
990
 
771
991
  def configure_logging(
772
- *,
773
- to_stdout: Optional[bool] = None,
774
- level: Optional[int] = None,
775
- add: Optional[str] = None,
776
- remove: Optional[str] = None,
992
+ *, to_stdout: bool | None = None, level: int | None = None, add: str | None = None, remove: str | None = None
777
993
  ) -> None:
778
994
  """Configure logging.
779
995
 
@@ -788,3 +1004,14 @@ def configure_logging(
788
1004
 
789
1005
  def array(elements: Iterable) -> exprs.Expr:
790
1006
  return exprs.Expr.from_array(elements)
1007
+
1008
+
1009
+ class DirContents(NamedTuple):
1010
+ """
1011
+ Represents the contents of a Pixeltable directory.
1012
+ """
1013
+
1014
+ dirs: list[str]
1015
+ """List of directory paths contained in this directory."""
1016
+ tables: list[str]
1017
+ """List of table paths contained in this directory."""