pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -3,15 +3,13 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- import time
7
- from dataclasses import dataclass
8
- from typing import Any, Optional
9
- from uuid import UUID
6
+ from typing import Any
10
7
 
11
8
  import pixeltable.exceptions as excs
12
9
  import pixeltable.type_system as ts
13
10
  from pixeltable import Column, Table
14
- from pixeltable.catalog import TableVersion, TableVersionHandle
11
+ from pixeltable.catalog import ColumnHandle, TableVersion
12
+ from pixeltable.catalog.update_status import UpdateStatus
15
13
 
16
14
  _logger = logging.getLogger('pixeltable')
17
15
 
@@ -23,6 +21,8 @@ class ExternalStore(abc.ABC):
23
21
  and stateful external stores.
24
22
  """
25
23
 
24
+ __name: str
25
+
26
26
  def __init__(self, name: str) -> None:
27
27
  self.__name = name
28
28
 
@@ -32,24 +32,20 @@ class ExternalStore(abc.ABC):
32
32
 
33
33
  @abc.abstractmethod
34
34
  def link(self, tbl_version: TableVersion) -> None:
35
- """
36
- Called by `TableVersion.link()` to implement store-specific logic.
37
- """
35
+ """Creates store-specific metadata needed to implement sync()."""
38
36
 
39
37
  @abc.abstractmethod
40
38
  def unlink(self, tbl_version: TableVersion) -> None:
41
- """
42
- Called by `TableVersion.unlink()` to implement store-specific logic.
43
- """
39
+ """Removes store-specific metadata created in link()."""
44
40
 
45
41
  @abc.abstractmethod
46
- def get_local_columns(self) -> list[Column]:
42
+ def get_local_columns(self) -> list[ColumnHandle]:
47
43
  """
48
44
  Gets a list of all local (Pixeltable) columns that are associated with this external store.
49
45
  """
50
46
 
51
47
  @abc.abstractmethod
52
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
48
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
53
49
  """
54
50
  Called by `Table.sync()` to implement store-specific synchronization logic.
55
51
  """
@@ -68,9 +64,12 @@ class Project(ExternalStore, abc.ABC):
68
64
  additional capabilities specific to such projects.
69
65
  """
70
66
 
71
- stored_proxies: dict[Column, Column]
67
+ _col_mapping: dict[ColumnHandle, str] # col -> external col name
68
+ stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
72
69
 
73
- def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
70
+ def __init__(
71
+ self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
72
+ ):
74
73
  super().__init__(name)
75
74
  self._col_mapping = col_mapping
76
75
 
@@ -85,11 +84,11 @@ class Project(ExternalStore, abc.ABC):
85
84
  # Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
86
85
  # proportion of overlapping rows, all proxying the same base column.
87
86
  if stored_proxies is None:
88
- self.stored_proxies: dict[Column, Column] = {}
87
+ self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
89
88
  else:
90
89
  self.stored_proxies = stored_proxies
91
90
 
92
- def get_local_columns(self) -> list[Column]:
91
+ def get_local_columns(self) -> list[ColumnHandle]:
93
92
  return list(self.col_mapping.keys())
94
93
 
95
94
  def link(self, tbl_version: TableVersion) -> None:
@@ -97,50 +96,42 @@ class Project(ExternalStore, abc.ABC):
97
96
  # This ensures that the media in those columns resides in the media store.
98
97
  # First determine which columns (if any) need stored proxies, but don't have one yet.
99
98
  stored_proxies_needed: list[Column] = []
100
- for col in self.col_mapping:
99
+ for col_handle in self.col_mapping:
100
+ col = col_handle.get()
101
101
  if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
102
102
  # If this column is already proxied in some other Project, use the existing proxy to avoid
103
103
  # duplication. Otherwise, we'll create a new one.
104
104
  for store in tbl_version.external_stores.values():
105
- if isinstance(store, Project) and col in store.stored_proxies:
106
- self.stored_proxies[col] = store.stored_proxies[col]
105
+ if isinstance(store, Project) and col_handle in store.stored_proxies:
106
+ self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
107
107
  break
108
- if col not in self.stored_proxies:
108
+ if col_handle not in self.stored_proxies:
109
109
  # We didn't find it in an existing Project
110
110
  stored_proxies_needed.append(col)
111
111
 
112
112
  if len(stored_proxies_needed) > 0:
113
113
  _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
114
- # Create stored proxies for columns that need one. Increment the schema version
115
- # accordingly.
116
- tbl_version.version += 1
117
- preceding_schema_version = tbl_version.schema_version
118
- tbl_version.schema_version = tbl_version.version
119
- proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
114
+ # Create stored proxies for columns that need one
115
+ proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
120
116
  # Add the columns; this will also update table metadata.
121
- tbl_version._add_columns(proxy_cols, print_stats=False, on_error='ignore')
122
- # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
123
- # invisible to the user.
124
- tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
117
+ tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
118
+ self.stored_proxies.update(
119
+ {col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
120
+ )
125
121
 
126
122
  def unlink(self, tbl_version: TableVersion) -> None:
127
123
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
128
124
  # any *other* external store for this table.)
129
- deletions_needed: set[Column] = set(self.stored_proxies.values())
125
+ deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
130
126
  for name, store in tbl_version.external_stores.items():
131
127
  if isinstance(store, Project) and name != self.name:
132
128
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
133
129
  if len(deletions_needed) > 0:
134
- _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
135
- # Delete stored proxies that are no longer needed.
136
- tbl_version.version += 1
137
- preceding_schema_version = tbl_version.schema_version
138
- tbl_version.schema_version = tbl_version.version
139
- tbl_version._drop_columns(deletions_needed)
130
+ _logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
131
+ tbl_version._drop_columns(col.get() for col in deletions_needed)
140
132
  self.stored_proxies.clear()
141
- tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
142
133
 
143
- def create_stored_proxy(self, tbl_version: TableVersion, col: Column) -> Column:
134
+ def create_stored_proxy(self, col: Column) -> Column:
144
135
  """
145
136
  Creates a proxy column for the specified column. The proxy column will be created in the specified
146
137
  `TableVersion`.
@@ -158,17 +149,11 @@ class Project(ExternalStore, abc.ABC):
158
149
  # Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
159
150
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
160
151
  stored=True,
161
- col_id=tbl_version.next_col_id,
162
- sa_col_type=col.col_type.to_sa_type(),
163
- schema_version_add=tbl_version.schema_version,
164
152
  )
165
- proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
166
- tbl_version.next_col_id += 1
167
- self.stored_proxies[col] = proxy_col
168
153
  return proxy_col
169
154
 
170
155
  @property
171
- def col_mapping(self) -> dict[Column, str]:
156
+ def col_mapping(self) -> dict[ColumnHandle, str]:
172
157
  return self._col_mapping
173
158
 
174
159
  @abc.abstractmethod
@@ -202,8 +187,8 @@ class Project(ExternalStore, abc.ABC):
202
187
  table: Table,
203
188
  export_cols: dict[str, ts.ColumnType],
204
189
  import_cols: dict[str, ts.ColumnType],
205
- col_mapping: Optional[dict[str, str]],
206
- ) -> dict[Column, str]:
190
+ col_mapping: dict[str, str] | None,
191
+ ) -> dict[ColumnHandle, str]:
207
192
  """
208
193
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
209
194
  (i) the keys of `col_mapping` are valid columns of the specified `Table`;
@@ -213,6 +198,7 @@ class Project(ExternalStore, abc.ABC):
213
198
  external (import or export) columns.
214
199
  If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
215
200
  in which the Pixeltable column names are resolved to the corresponding `Column` objects.
201
+ TODO: return columns as names or qualified ids
216
202
  """
217
203
  from pixeltable import exprs
218
204
 
@@ -220,33 +206,34 @@ class Project(ExternalStore, abc.ABC):
220
206
  if col_mapping is None:
221
207
  col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
222
208
 
223
- resolved_col_mapping: dict[Column, str] = {}
209
+ resolved_col_mapping: dict[ColumnHandle, str] = {}
224
210
 
225
211
  # Validate names
226
- t_cols = set(table._schema.keys())
212
+ t_cols = set(table._get_schema().keys())
227
213
  for t_col, ext_col in col_mapping.items():
228
214
  if t_col not in t_cols:
229
215
  if is_user_specified_col_mapping:
230
216
  raise excs.Error(
231
- f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table._name}` '
217
+ f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
232
218
  'contains no such column.'
233
219
  )
234
220
  else:
235
221
  raise excs.Error(
236
- f'Column `{t_col}` does not exist in Table `{table._name}`. Either add a column `{t_col}`, '
222
+ f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
237
223
  f'or specify a `col_mapping` to associate a different column with '
238
- f'the external field `{ext_col}`.'
224
+ f'the external field {ext_col!r}.'
239
225
  )
240
226
  if ext_col not in export_cols and ext_col not in import_cols:
241
227
  raise excs.Error(
242
- f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
243
- f'configuration has no column `{ext_col}`.'
228
+ f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
229
+ f'configuration has no column {ext_col!r}.'
244
230
  )
245
231
  col_ref = table[t_col]
246
232
  assert isinstance(col_ref, exprs.ColumnRef)
247
- resolved_col_mapping[col_ref.col] = ext_col
233
+ resolved_col_mapping[col_ref.col.handle] = ext_col
234
+
248
235
  # Validate column specs
249
- t_col_types = table._schema
236
+ t_col_types = table._get_schema()
250
237
  for t_col, ext_col in col_mapping.items():
251
238
  t_col_type = t_col_types[t_col]
252
239
  if ext_col in export_cols:
@@ -254,57 +241,23 @@ class Project(ExternalStore, abc.ABC):
254
241
  ext_col_type = export_cols[ext_col]
255
242
  if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
256
243
  raise excs.Error(
257
- f'Column `{t_col}` cannot be exported to external column `{ext_col}` '
244
+ f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
258
245
  f'(incompatible types; expecting `{ext_col_type}`)'
259
246
  )
260
247
  if ext_col in import_cols:
261
248
  # Validate that the external column can be assigned to the table column
262
249
  if table._tbl_version_path.get_column(t_col).is_computed:
263
250
  raise excs.Error(
264
- f'Column `{t_col}` is a computed column, which cannot be populated from an external column'
251
+ f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
265
252
  )
266
253
  ext_col_type = import_cols[ext_col]
267
254
  if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
268
255
  raise excs.Error(
269
- f'Column `{t_col}` cannot be imported from external column `{ext_col}` '
256
+ f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
270
257
  f'(incompatible types; expecting `{ext_col_type}`)'
271
258
  )
272
259
  return resolved_col_mapping
273
260
 
274
- @classmethod
275
- def _column_as_dict(cls, col: Column) -> dict[str, Any]:
276
- return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
277
-
278
- @classmethod
279
- def _column_from_dict(cls, d: dict[str, Any]) -> Column:
280
- from pixeltable.catalog import Catalog
281
-
282
- tbl_id = UUID(d['tbl_id'])
283
- col_id = d['col_id']
284
- return Catalog.get().get_tbl_version(tbl_id, None).cols_by_id[col_id]
285
-
286
-
287
- @dataclass(frozen=True)
288
- class SyncStatus:
289
- external_rows_created: int = 0
290
- external_rows_deleted: int = 0
291
- external_rows_updated: int = 0
292
- pxt_rows_updated: int = 0
293
- num_excs: int = 0
294
-
295
- def combine(self, other: 'SyncStatus') -> 'SyncStatus':
296
- return SyncStatus(
297
- external_rows_created=self.external_rows_created + other.external_rows_created,
298
- external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
299
- external_rows_updated=self.external_rows_updated + other.external_rows_updated,
300
- pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
301
- num_excs=self.num_excs + other.num_excs,
302
- )
303
-
304
- @classmethod
305
- def empty(cls) -> 'SyncStatus':
306
- return SyncStatus(0, 0, 0, 0, 0)
307
-
308
261
 
309
262
  class MockProject(Project):
310
263
  """A project that cannot be synced, used mainly for testing."""
@@ -314,8 +267,8 @@ class MockProject(Project):
314
267
  name: str,
315
268
  export_cols: dict[str, ts.ColumnType],
316
269
  import_cols: dict[str, ts.ColumnType],
317
- col_mapping: dict[Column, str],
318
- stored_proxies: Optional[dict[Column, Column]] = None,
270
+ col_mapping: dict[ColumnHandle, str],
271
+ stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
319
272
  ):
320
273
  super().__init__(name, col_mapping, stored_proxies)
321
274
  self.export_cols = export_cols
@@ -329,7 +282,7 @@ class MockProject(Project):
329
282
  name: str,
330
283
  export_cols: dict[str, ts.ColumnType],
331
284
  import_cols: dict[str, ts.ColumnType],
332
- col_mapping: Optional[dict[str, str]] = None,
285
+ col_mapping: dict[str, str] | None = None,
333
286
  ) -> 'MockProject':
334
287
  col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
335
288
  return cls(name, export_cols, import_cols, col_mapping)
@@ -340,7 +293,7 @@ class MockProject(Project):
340
293
  def get_import_columns(self) -> dict[str, ts.ColumnType]:
341
294
  return self.import_cols
342
295
 
343
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
296
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
344
297
  raise NotImplementedError()
345
298
 
346
299
  def delete(self) -> None:
@@ -355,10 +308,8 @@ class MockProject(Project):
355
308
  'name': self.name,
356
309
  'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
357
310
  'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
358
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
359
- 'stored_proxies': [
360
- [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
361
- ],
311
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
312
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
362
313
  }
363
314
 
364
315
  @classmethod
@@ -367,8 +318,8 @@ class MockProject(Project):
367
318
  md['name'],
368
319
  {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
369
320
  {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
370
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
371
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
321
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
322
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
372
323
  )
373
324
 
374
325
  def __eq__(self, other: object) -> bool:
pixeltable/io/fiftyone.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Iterator, Optional, Union
2
+ from typing import Any, Iterator
3
3
 
4
4
  import fiftyone as fo # type: ignore[import-untyped]
5
5
  import fiftyone.utils.data as foud # type: ignore[import-untyped]
@@ -9,7 +9,7 @@ import puremagic
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
11
11
  from pixeltable import exprs
12
- from pixeltable.env import Env
12
+ from pixeltable.utils.local_store import TempStore
13
13
 
14
14
 
15
15
  class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
20
20
  __image_format: str # format to use for any exported images that are not already stored on disk
21
21
  __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
22
22
  __image_idx: int # index of the image expr in the select list
23
- __localpath_idx: Optional[int] # index of the image localpath in the select list, if present
23
+ __localpath_idx: int | None # index of the image localpath in the select list, if present
24
24
  __row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
25
25
 
26
26
  def __init__(
@@ -28,12 +28,12 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
28
28
  tbl: pxt.Table,
29
29
  image: exprs.Expr,
30
30
  image_format: str,
31
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
32
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
33
- dataset_dir: Optional[os.PathLike] = None,
31
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
32
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
33
+ dataset_dir: os.PathLike | None = None,
34
34
  shuffle: bool = False,
35
- seed: Union[int, float, str, bytes, bytearray, None] = None,
36
- max_samples: Optional[int] = None,
35
+ seed: int | float | str | bytes | bytearray | None = None,
36
+ max_samples: int | None = None,
37
37
  ):
38
38
  super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
39
39
 
@@ -87,10 +87,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
87
87
  else:
88
88
  self.__localpath_idx = None
89
89
 
90
- df = tbl.select(*selection)
91
- self.__row_iter = df._output_row_iterator()
90
+ query = tbl.select(*selection)
91
+ self.__row_iter = query._output_row_iterator()
92
92
 
93
- def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
93
+ def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
94
94
  row = next(self.__row_iter)
95
95
  img = row[self.__image_idx]
96
96
  assert isinstance(img, PIL.Image.Image)
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
100
100
  assert isinstance(file, str)
101
101
  else:
102
102
  # Write the dynamically created image to a temp file
103
- file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
103
+ file = TempStore.create_path(extension=f'.{self.__image_format}')
104
104
  img.save(file, format=self.__image_format)
105
105
 
106
106
  metadata = fo.ImageMetadata(
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
108
108
  mime_type=puremagic.from_file(file, mime=True),
109
109
  width=img.width,
110
110
  height=img.height,
111
- filepath=file,
111
+ filepath=str(file),
112
112
  num_channels=len(img.getbands()),
113
113
  )
114
114
 
pixeltable/io/globals.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
3
+ from typing import TYPE_CHECKING, Any, Literal
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.exceptions as excs
7
7
  from pixeltable import Table, exprs
8
+ from pixeltable.catalog.update_status import UpdateStatus
8
9
  from pixeltable.env import Env
9
- from pixeltable.io.external_store import SyncStatus
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  import fiftyone as fo # type: ignore[import-untyped]
@@ -15,19 +15,19 @@ if TYPE_CHECKING:
15
15
  def create_label_studio_project(
16
16
  t: Table,
17
17
  label_config: str,
18
- name: Optional[str] = None,
19
- title: Optional[str] = None,
18
+ name: str | None = None,
19
+ title: str | None = None,
20
20
  media_import_method: Literal['post', 'file', 'url'] = 'post',
21
- col_mapping: Optional[dict[str, str]] = None,
21
+ col_mapping: dict[str, str] | None = None,
22
22
  sync_immediately: bool = True,
23
- s3_configuration: Optional[dict[str, Any]] = None,
23
+ s3_configuration: dict[str, Any] | None = None,
24
24
  **kwargs: Any,
25
- ) -> SyncStatus:
25
+ ) -> UpdateStatus:
26
26
  """
27
27
  Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
28
28
 
29
29
  - A tutorial notebook with fully worked examples can be found here:
30
- [Using Label Studio for Annotations with Pixeltable](https://pixeltable.readme.io/docs/label-studio)
30
+ [Using Label Studio for Annotations with Pixeltable](https://docs.pixeltable.com/notebooks/integrations/using-label-studio-with-pixeltable)
31
31
 
32
32
  The required parameter `label_config` specifies the Label Studio project configuration,
33
33
  in XML format, as described in the Label Studio documentation. The linked project will
@@ -96,32 +96,33 @@ def create_label_studio_project(
96
96
  [Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
97
97
 
98
98
  Returns:
99
- A `SyncStatus` representing the status of any synchronization operations that occurred.
99
+ An `UpdateStatus` representing the status of any synchronization operations that occurred.
100
100
 
101
101
  Examples:
102
102
  Create a Label Studio project whose tasks correspond to videos stored in the `video_col`
103
103
  column of the table `tbl`:
104
104
 
105
105
  >>> config = \"\"\"
106
- <View>
107
- <Video name="video_obj" value="$video_col"/>
108
- <Choices name="video-category" toName="video" showInLine="true">
109
- <Choice value="city"/>
110
- <Choice value="food"/>
111
- <Choice value="sports"/>
112
- </Choices>
113
- </View>\"\"\"
114
- create_label_studio_project(tbl, config)
106
+ ... <View>
107
+ ... <Video name="video_obj" value="$video_col"/>
108
+ ... <Choices name="video-category" toName="video" showInLine="true">
109
+ ... <Choice value="city"/>
110
+ ... <Choice value="food"/>
111
+ ... <Choice value="sports"/>
112
+ ... </Choices>
113
+ ... </View>
114
+ ... \"\"\"
115
+ >>> create_label_studio_project(tbl, config)
115
116
 
116
117
  Create a Label Studio project with the same configuration, using `media_import_method='url'`,
117
118
  whose media are stored in an S3 bucket:
118
119
 
119
120
  >>> create_label_studio_project(
120
- tbl,
121
- config,
122
- media_import_method='url',
123
- s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
124
- )
121
+ ... tbl,
122
+ ... config,
123
+ ... media_import_method='url',
124
+ ... s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
125
+ ... )
125
126
  """
126
127
  Env.get().require_package('label_studio_sdk')
127
128
 
@@ -136,27 +137,27 @@ def create_label_studio_project(
136
137
  if sync_immediately:
137
138
  return t.sync()
138
139
  else:
139
- return SyncStatus.empty()
140
+ return UpdateStatus()
140
141
 
141
142
 
142
143
  def export_images_as_fo_dataset(
143
144
  tbl: pxt.Table,
144
145
  images: exprs.Expr,
145
146
  image_format: str = 'webp',
146
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
147
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
147
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
148
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
148
149
  ) -> 'fo.Dataset':
149
150
  """
150
151
  Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
151
152
  (or expression) containing image data, along with optional additional columns containing labels. Currently, only
152
153
  classification and detection labels are supported.
153
154
 
154
- The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial contains a
155
+ The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial contains a
155
156
  fully worked example showing how to export data from a Pixeltable table and load it into Voxel51.
156
157
 
157
158
  Images in the dataset that already exist on disk will be exported directly, in whatever format they
158
159
  are stored in. Images that are not already on disk (such as frames extracted using a
159
- [`FrameIterator`][pixeltable.iterators.FrameIterator]) will first be written to disk in the specified
160
+ [`frame_iterator`][pixeltable.functions.video.frame_iterator]) will first be written to disk in the specified
160
161
  `image_format`.
161
162
 
162
163
  The label parameters accept one or more sets of labels of each type. If a single `Expr` is provided, then it will
@@ -204,13 +205,13 @@ def export_images_as_fo_dataset(
204
205
  Export the images in the `image` column of the table `tbl` as a Voxel51 dataset, using classification
205
206
  labels from `tbl.classifications`:
206
207
 
207
- >>> export_as_fiftyone(
208
+ >>> export_images_as_fo_dataset(
208
209
  ... tbl,
209
210
  ... tbl.image,
210
211
  ... classifications=tbl.classifications
211
212
  ... )
212
213
 
213
- See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial
214
+ See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial
214
215
  for a fully worked example.
215
216
  """
216
217
  Env.get().require_package('fiftyone')