pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -3,14 +3,13 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- from dataclasses import dataclass
7
- from typing import Any, Optional
8
- from uuid import UUID
6
+ from typing import Any
9
7
 
10
8
  import pixeltable.exceptions as excs
11
9
  import pixeltable.type_system as ts
12
10
  from pixeltable import Column, Table
13
- from pixeltable.catalog import TableVersion
11
+ from pixeltable.catalog import ColumnHandle, TableVersion
12
+ from pixeltable.catalog.update_status import UpdateStatus
14
13
 
15
14
  _logger = logging.getLogger('pixeltable')
16
15
 
@@ -22,6 +21,8 @@ class ExternalStore(abc.ABC):
22
21
  and stateful external stores.
23
22
  """
24
23
 
24
+ __name: str
25
+
25
26
  def __init__(self, name: str) -> None:
26
27
  self.__name = name
27
28
 
@@ -38,13 +39,13 @@ class ExternalStore(abc.ABC):
38
39
  """Removes store-specific metadata created in link()."""
39
40
 
40
41
  @abc.abstractmethod
41
- def get_local_columns(self) -> list[Column]:
42
+ def get_local_columns(self) -> list[ColumnHandle]:
42
43
  """
43
44
  Gets a list of all local (Pixeltable) columns that are associated with this external store.
44
45
  """
45
46
 
46
47
  @abc.abstractmethod
47
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
48
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
48
49
  """
49
50
  Called by `Table.sync()` to implement store-specific synchronization logic.
50
51
  """
@@ -63,9 +64,12 @@ class Project(ExternalStore, abc.ABC):
63
64
  additional capabilities specific to such projects.
64
65
  """
65
66
 
66
- stored_proxies: dict[Column, Column]
67
+ _col_mapping: dict[ColumnHandle, str] # col -> external col name
68
+ stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
67
69
 
68
- def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
70
+ def __init__(
71
+ self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
72
+ ):
69
73
  super().__init__(name)
70
74
  self._col_mapping = col_mapping
71
75
 
@@ -80,11 +84,11 @@ class Project(ExternalStore, abc.ABC):
80
84
  # Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
81
85
  # proportion of overlapping rows, all proxying the same base column.
82
86
  if stored_proxies is None:
83
- self.stored_proxies: dict[Column, Column] = {}
87
+ self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
84
88
  else:
85
89
  self.stored_proxies = stored_proxies
86
90
 
87
- def get_local_columns(self) -> list[Column]:
91
+ def get_local_columns(self) -> list[ColumnHandle]:
88
92
  return list(self.col_mapping.keys())
89
93
 
90
94
  def link(self, tbl_version: TableVersion) -> None:
@@ -92,15 +96,16 @@ class Project(ExternalStore, abc.ABC):
92
96
  # This ensures that the media in those columns resides in the media store.
93
97
  # First determine which columns (if any) need stored proxies, but don't have one yet.
94
98
  stored_proxies_needed: list[Column] = []
95
- for col in self.col_mapping:
99
+ for col_handle in self.col_mapping:
100
+ col = col_handle.get()
96
101
  if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
97
102
  # If this column is already proxied in some other Project, use the existing proxy to avoid
98
103
  # duplication. Otherwise, we'll create a new one.
99
104
  for store in tbl_version.external_stores.values():
100
- if isinstance(store, Project) and col in store.stored_proxies:
101
- self.stored_proxies[col] = store.stored_proxies[col]
105
+ if isinstance(store, Project) and col_handle in store.stored_proxies:
106
+ self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
102
107
  break
103
- if col not in self.stored_proxies:
108
+ if col_handle not in self.stored_proxies:
104
109
  # We didn't find it in an existing Project
105
110
  stored_proxies_needed.append(col)
106
111
 
@@ -110,17 +115,20 @@ class Project(ExternalStore, abc.ABC):
110
115
  proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
111
116
  # Add the columns; this will also update table metadata.
112
117
  tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
118
+ self.stored_proxies.update(
119
+ {col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
120
+ )
113
121
 
114
122
  def unlink(self, tbl_version: TableVersion) -> None:
115
123
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
116
124
  # any *other* external store for this table.)
117
- deletions_needed: set[Column] = set(self.stored_proxies.values())
125
+ deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
118
126
  for name, store in tbl_version.external_stores.items():
119
127
  if isinstance(store, Project) and name != self.name:
120
128
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
121
129
  if len(deletions_needed) > 0:
122
- _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
123
- tbl_version._drop_columns(deletions_needed)
130
+ _logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
131
+ tbl_version._drop_columns(col.get() for col in deletions_needed)
124
132
  self.stored_proxies.clear()
125
133
 
126
134
  def create_stored_proxy(self, col: Column) -> Column:
@@ -142,11 +150,10 @@ class Project(ExternalStore, abc.ABC):
142
150
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
143
151
  stored=True,
144
152
  )
145
- self.stored_proxies[col] = proxy_col
146
153
  return proxy_col
147
154
 
148
155
  @property
149
- def col_mapping(self) -> dict[Column, str]:
156
+ def col_mapping(self) -> dict[ColumnHandle, str]:
150
157
  return self._col_mapping
151
158
 
152
159
  @abc.abstractmethod
@@ -180,8 +187,8 @@ class Project(ExternalStore, abc.ABC):
180
187
  table: Table,
181
188
  export_cols: dict[str, ts.ColumnType],
182
189
  import_cols: dict[str, ts.ColumnType],
183
- col_mapping: Optional[dict[str, str]],
184
- ) -> dict[Column, str]:
190
+ col_mapping: dict[str, str] | None,
191
+ ) -> dict[ColumnHandle, str]:
185
192
  """
186
193
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
187
194
  (i) the keys of `col_mapping` are valid columns of the specified `Table`;
@@ -199,33 +206,34 @@ class Project(ExternalStore, abc.ABC):
199
206
  if col_mapping is None:
200
207
  col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
201
208
 
202
- resolved_col_mapping: dict[Column, str] = {}
209
+ resolved_col_mapping: dict[ColumnHandle, str] = {}
203
210
 
204
211
  # Validate names
205
- t_cols = set(table._schema.keys())
212
+ t_cols = set(table._get_schema().keys())
206
213
  for t_col, ext_col in col_mapping.items():
207
214
  if t_col not in t_cols:
208
215
  if is_user_specified_col_mapping:
209
216
  raise excs.Error(
210
- f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table._name}` '
217
+ f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
211
218
  'contains no such column.'
212
219
  )
213
220
  else:
214
221
  raise excs.Error(
215
- f'Column `{t_col}` does not exist in Table `{table._name}`. Either add a column `{t_col}`, '
222
+ f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
216
223
  f'or specify a `col_mapping` to associate a different column with '
217
- f'the external field `{ext_col}`.'
224
+ f'the external field {ext_col!r}.'
218
225
  )
219
226
  if ext_col not in export_cols and ext_col not in import_cols:
220
227
  raise excs.Error(
221
- f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
222
- f'configuration has no column `{ext_col}`.'
228
+ f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
229
+ f'configuration has no column {ext_col!r}.'
223
230
  )
224
231
  col_ref = table[t_col]
225
232
  assert isinstance(col_ref, exprs.ColumnRef)
226
- resolved_col_mapping[col_ref.col] = ext_col
233
+ resolved_col_mapping[col_ref.col.handle] = ext_col
234
+
227
235
  # Validate column specs
228
- t_col_types = table._schema
236
+ t_col_types = table._get_schema()
229
237
  for t_col, ext_col in col_mapping.items():
230
238
  t_col_type = t_col_types[t_col]
231
239
  if ext_col in export_cols:
@@ -233,57 +241,23 @@ class Project(ExternalStore, abc.ABC):
233
241
  ext_col_type = export_cols[ext_col]
234
242
  if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
235
243
  raise excs.Error(
236
- f'Column `{t_col}` cannot be exported to external column `{ext_col}` '
244
+ f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
237
245
  f'(incompatible types; expecting `{ext_col_type}`)'
238
246
  )
239
247
  if ext_col in import_cols:
240
248
  # Validate that the external column can be assigned to the table column
241
249
  if table._tbl_version_path.get_column(t_col).is_computed:
242
250
  raise excs.Error(
243
- f'Column `{t_col}` is a computed column, which cannot be populated from an external column'
251
+ f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
244
252
  )
245
253
  ext_col_type = import_cols[ext_col]
246
254
  if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
247
255
  raise excs.Error(
248
- f'Column `{t_col}` cannot be imported from external column `{ext_col}` '
256
+ f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
249
257
  f'(incompatible types; expecting `{ext_col_type}`)'
250
258
  )
251
259
  return resolved_col_mapping
252
260
 
253
- @classmethod
254
- def _column_as_dict(cls, col: Column) -> dict[str, Any]:
255
- return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
256
-
257
- @classmethod
258
- def _column_from_dict(cls, d: dict[str, Any]) -> Column:
259
- from pixeltable.catalog import Catalog
260
-
261
- tbl_id = UUID(d['tbl_id'])
262
- col_id = d['col_id']
263
- return Catalog.get().get_tbl_version(tbl_id, None).cols_by_id[col_id]
264
-
265
-
266
- @dataclass(frozen=True)
267
- class SyncStatus:
268
- external_rows_created: int = 0
269
- external_rows_deleted: int = 0
270
- external_rows_updated: int = 0
271
- pxt_rows_updated: int = 0
272
- num_excs: int = 0
273
-
274
- def combine(self, other: 'SyncStatus') -> 'SyncStatus':
275
- return SyncStatus(
276
- external_rows_created=self.external_rows_created + other.external_rows_created,
277
- external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
278
- external_rows_updated=self.external_rows_updated + other.external_rows_updated,
279
- pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
280
- num_excs=self.num_excs + other.num_excs,
281
- )
282
-
283
- @classmethod
284
- def empty(cls) -> 'SyncStatus':
285
- return SyncStatus(0, 0, 0, 0, 0)
286
-
287
261
 
288
262
  class MockProject(Project):
289
263
  """A project that cannot be synced, used mainly for testing."""
@@ -293,8 +267,8 @@ class MockProject(Project):
293
267
  name: str,
294
268
  export_cols: dict[str, ts.ColumnType],
295
269
  import_cols: dict[str, ts.ColumnType],
296
- col_mapping: dict[Column, str],
297
- stored_proxies: Optional[dict[Column, Column]] = None,
270
+ col_mapping: dict[ColumnHandle, str],
271
+ stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
298
272
  ):
299
273
  super().__init__(name, col_mapping, stored_proxies)
300
274
  self.export_cols = export_cols
@@ -308,7 +282,7 @@ class MockProject(Project):
308
282
  name: str,
309
283
  export_cols: dict[str, ts.ColumnType],
310
284
  import_cols: dict[str, ts.ColumnType],
311
- col_mapping: Optional[dict[str, str]] = None,
285
+ col_mapping: dict[str, str] | None = None,
312
286
  ) -> 'MockProject':
313
287
  col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
314
288
  return cls(name, export_cols, import_cols, col_mapping)
@@ -319,7 +293,7 @@ class MockProject(Project):
319
293
  def get_import_columns(self) -> dict[str, ts.ColumnType]:
320
294
  return self.import_cols
321
295
 
322
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
296
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
323
297
  raise NotImplementedError()
324
298
 
325
299
  def delete(self) -> None:
@@ -334,10 +308,8 @@ class MockProject(Project):
334
308
  'name': self.name,
335
309
  'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
336
310
  'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
337
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
338
- 'stored_proxies': [
339
- [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
340
- ],
311
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
312
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
341
313
  }
342
314
 
343
315
  @classmethod
@@ -346,8 +318,8 @@ class MockProject(Project):
346
318
  md['name'],
347
319
  {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
348
320
  {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
349
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
350
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
321
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
322
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
351
323
  )
352
324
 
353
325
  def __eq__(self, other: object) -> bool:
pixeltable/io/fiftyone.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Iterator, Optional, Union
2
+ from typing import Any, Iterator
3
3
 
4
4
  import fiftyone as fo # type: ignore[import-untyped]
5
5
  import fiftyone.utils.data as foud # type: ignore[import-untyped]
@@ -9,7 +9,7 @@ import puremagic
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
11
11
  from pixeltable import exprs
12
- from pixeltable.env import Env
12
+ from pixeltable.utils.local_store import TempStore
13
13
 
14
14
 
15
15
  class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
20
20
  __image_format: str # format to use for any exported images that are not already stored on disk
21
21
  __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
22
22
  __image_idx: int # index of the image expr in the select list
23
- __localpath_idx: Optional[int] # index of the image localpath in the select list, if present
23
+ __localpath_idx: int | None # index of the image localpath in the select list, if present
24
24
  __row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
25
25
 
26
26
  def __init__(
@@ -28,12 +28,12 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
28
28
  tbl: pxt.Table,
29
29
  image: exprs.Expr,
30
30
  image_format: str,
31
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
32
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
33
- dataset_dir: Optional[os.PathLike] = None,
31
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
32
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
33
+ dataset_dir: os.PathLike | None = None,
34
34
  shuffle: bool = False,
35
- seed: Union[int, float, str, bytes, bytearray, None] = None,
36
- max_samples: Optional[int] = None,
35
+ seed: int | float | str | bytes | bytearray | None = None,
36
+ max_samples: int | None = None,
37
37
  ):
38
38
  super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
39
39
 
@@ -90,7 +90,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
90
90
  df = tbl.select(*selection)
91
91
  self.__row_iter = df._output_row_iterator()
92
92
 
93
- def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
93
+ def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
94
94
  row = next(self.__row_iter)
95
95
  img = row[self.__image_idx]
96
96
  assert isinstance(img, PIL.Image.Image)
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
100
100
  assert isinstance(file, str)
101
101
  else:
102
102
  # Write the dynamically created image to a temp file
103
- file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
103
+ file = TempStore.create_path(extension=f'.{self.__image_format}')
104
104
  img.save(file, format=self.__image_format)
105
105
 
106
106
  metadata = fo.ImageMetadata(
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
108
108
  mime_type=puremagic.from_file(file, mime=True),
109
109
  width=img.width,
110
110
  height=img.height,
111
- filepath=file,
111
+ filepath=str(file),
112
112
  num_channels=len(img.getbands()),
113
113
  )
114
114
 
pixeltable/io/globals.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
3
+ from typing import TYPE_CHECKING, Any, Literal
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.exceptions as excs
7
7
  from pixeltable import Table, exprs
8
+ from pixeltable.catalog.update_status import UpdateStatus
8
9
  from pixeltable.env import Env
9
- from pixeltable.io.external_store import SyncStatus
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  import fiftyone as fo # type: ignore[import-untyped]
@@ -15,14 +15,14 @@ if TYPE_CHECKING:
15
15
  def create_label_studio_project(
16
16
  t: Table,
17
17
  label_config: str,
18
- name: Optional[str] = None,
19
- title: Optional[str] = None,
18
+ name: str | None = None,
19
+ title: str | None = None,
20
20
  media_import_method: Literal['post', 'file', 'url'] = 'post',
21
- col_mapping: Optional[dict[str, str]] = None,
21
+ col_mapping: dict[str, str] | None = None,
22
22
  sync_immediately: bool = True,
23
- s3_configuration: Optional[dict[str, Any]] = None,
23
+ s3_configuration: dict[str, Any] | None = None,
24
24
  **kwargs: Any,
25
- ) -> SyncStatus:
25
+ ) -> UpdateStatus:
26
26
  """
27
27
  Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
28
28
 
@@ -96,32 +96,33 @@ def create_label_studio_project(
96
96
  [Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
97
97
 
98
98
  Returns:
99
- A `SyncStatus` representing the status of any synchronization operations that occurred.
99
+ An `UpdateStatus` representing the status of any synchronization operations that occurred.
100
100
 
101
101
  Examples:
102
102
  Create a Label Studio project whose tasks correspond to videos stored in the `video_col`
103
103
  column of the table `tbl`:
104
104
 
105
105
  >>> config = \"\"\"
106
- <View>
107
- <Video name="video_obj" value="$video_col"/>
108
- <Choices name="video-category" toName="video" showInLine="true">
109
- <Choice value="city"/>
110
- <Choice value="food"/>
111
- <Choice value="sports"/>
112
- </Choices>
113
- </View>\"\"\"
114
- create_label_studio_project(tbl, config)
106
+ ... <View>
107
+ ... <Video name="video_obj" value="$video_col"/>
108
+ ... <Choices name="video-category" toName="video" showInLine="true">
109
+ ... <Choice value="city"/>
110
+ ... <Choice value="food"/>
111
+ ... <Choice value="sports"/>
112
+ ... </Choices>
113
+ ... </View>
114
+ ... \"\"\"
115
+ >>> create_label_studio_project(tbl, config)
115
116
 
116
117
  Create a Label Studio project with the same configuration, using `media_import_method='url'`,
117
118
  whose media are stored in an S3 bucket:
118
119
 
119
120
  >>> create_label_studio_project(
120
- tbl,
121
- config,
122
- media_import_method='url',
123
- s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
124
- )
121
+ ... tbl,
122
+ ... config,
123
+ ... media_import_method='url',
124
+ ... s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
125
+ ... )
125
126
  """
126
127
  Env.get().require_package('label_studio_sdk')
127
128
 
@@ -136,22 +137,22 @@ def create_label_studio_project(
136
137
  if sync_immediately:
137
138
  return t.sync()
138
139
  else:
139
- return SyncStatus.empty()
140
+ return UpdateStatus()
140
141
 
141
142
 
142
143
  def export_images_as_fo_dataset(
143
144
  tbl: pxt.Table,
144
145
  images: exprs.Expr,
145
146
  image_format: str = 'webp',
146
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
147
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
147
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
148
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
148
149
  ) -> 'fo.Dataset':
149
150
  """
150
151
  Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
151
152
  (or expression) containing image data, along with optional additional columns containing labels. Currently, only
152
153
  classification and detection labels are supported.
153
154
 
154
- The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial contains a
155
+ The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial contains a
155
156
  fully worked example showing how to export data from a Pixeltable table and load it into Voxel51.
156
157
 
157
158
  Images in the dataset that already exist on disk will be exported directly, in whatever format they
@@ -204,13 +205,13 @@ def export_images_as_fo_dataset(
204
205
  Export the images in the `image` column of the table `tbl` as a Voxel51 dataset, using classification
205
206
  labels from `tbl.classifications`:
206
207
 
207
- >>> export_as_fiftyone(
208
+ >>> export_images_as_fo_dataset(
208
209
  ... tbl,
209
210
  ... tbl.image,
210
211
  ... classifications=tbl.classifications
211
212
  ... )
212
213
 
213
- See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/docs/working-with-voxel51) tutorial
214
+ See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial
214
215
  for a fully worked example.
215
216
  """
216
217
  Env.get().require_package('fiftyone')
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import typing
4
- from typing import Any, Optional, Union
4
+ from typing import Any
5
5
 
6
6
  import pixeltable as pxt
7
7
  import pixeltable.type_system as ts
@@ -36,7 +36,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
36
36
  }
37
37
 
38
38
 
39
- def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.ColumnType]:
39
+ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> ts.ColumnType | None:
40
40
  """Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
41
41
  import datasets
42
42
 
@@ -50,15 +50,23 @@ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.Column
50
50
  elif isinstance(feature_type, datasets.Sequence):
51
51
  # example: cohere wiki. Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)
52
52
  dtype = _to_pixeltable_type(feature_type.feature, nullable)
53
- length = feature_type.length if feature_type.length != -1 else None
54
- return ts.ArrayType(shape=(length,), dtype=dtype)
53
+ if dtype is None:
54
+ return None
55
+ if dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type():
56
+ length = feature_type.length if feature_type.length != -1 else None
57
+ return ts.ArrayType(shape=(length,), dtype=dtype, nullable=nullable)
58
+ else:
59
+ # Sequence of dicts must be cast as Json
60
+ return ts.JsonType(nullable=nullable)
55
61
  elif isinstance(feature_type, datasets.Image):
56
62
  return ts.ImageType(nullable=nullable)
63
+ elif isinstance(feature_type, dict):
64
+ return ts.JsonType(nullable=nullable)
57
65
  else:
58
66
  return None
59
67
 
60
68
 
61
- def _get_hf_schema(dataset: Union[datasets.Dataset, datasets.DatasetDict]) -> datasets.Features:
69
+ def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets.Features:
62
70
  """Get the schema of a huggingface dataset as a dictionary."""
63
71
  import datasets
64
72
 
@@ -68,7 +76,7 @@ def _get_hf_schema(dataset: Union[datasets.Dataset, datasets.DatasetDict]) -> da
68
76
 
69
77
  def huggingface_schema_to_pxt_schema(
70
78
  hf_schema: datasets.Features, schema_overrides: dict[str, Any], primary_key: list[str]
71
- ) -> dict[str, Optional[ts.ColumnType]]:
79
+ ) -> dict[str, ts.ColumnType | None]:
72
80
  """Generate a pixeltable schema from a huggingface dataset schema.
73
81
  Columns without a known mapping are mapped to None
74
82
  """
@@ -83,10 +91,10 @@ def huggingface_schema_to_pxt_schema(
83
91
 
84
92
  def import_huggingface_dataset(
85
93
  table_path: str,
86
- dataset: Union[datasets.Dataset, datasets.DatasetDict],
94
+ dataset: datasets.Dataset | datasets.DatasetDict,
87
95
  *,
88
- schema_overrides: Optional[dict[str, Any]] = None,
89
- primary_key: Optional[Union[str, list[str]]] = None,
96
+ schema_overrides: dict[str, Any] | None = None,
97
+ primary_key: str | list[str] | None = None,
90
98
  **kwargs: Any,
91
99
  ) -> pxt.Table:
92
100
  """Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.