pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -3,17 +3,13 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- import time
7
- from dataclasses import dataclass
8
- from typing import Any, Optional
9
- from uuid import UUID
6
+ from typing import Any
10
7
 
11
8
  import pixeltable.exceptions as excs
12
9
  import pixeltable.type_system as ts
13
- from pixeltable import Table, Column
14
- import sqlalchemy as sql
15
-
16
- from pixeltable.catalog import TableVersion
10
+ from pixeltable import Column, Table
11
+ from pixeltable.catalog import ColumnHandle, TableVersion
12
+ from pixeltable.catalog.update_status import UpdateStatus
17
13
 
18
14
  _logger = logging.getLogger('pixeltable')
19
15
 
@@ -25,6 +21,8 @@ class ExternalStore(abc.ABC):
25
21
  and stateful external stores.
26
22
  """
27
23
 
24
+ __name: str
25
+
28
26
  def __init__(self, name: str) -> None:
29
27
  self.__name = name
30
28
 
@@ -33,25 +31,21 @@ class ExternalStore(abc.ABC):
33
31
  return self.__name
34
32
 
35
33
  @abc.abstractmethod
36
- def link(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
37
- """
38
- Called by `TableVersion.link()` to implement store-specific logic.
39
- """
34
+ def link(self, tbl_version: TableVersion) -> None:
35
+ """Creates store-specific metadata needed to implement sync()."""
40
36
 
41
37
  @abc.abstractmethod
42
- def unlink(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
43
- """
44
- Called by `TableVersion.unlink()` to implement store-specific logic.
45
- """
38
+ def unlink(self, tbl_version: TableVersion) -> None:
39
+ """Removes store-specific metadata created in link()."""
46
40
 
47
41
  @abc.abstractmethod
48
- def get_local_columns(self) -> list[Column]:
42
+ def get_local_columns(self) -> list[ColumnHandle]:
49
43
  """
50
44
  Gets a list of all local (Pixeltable) columns that are associated with this external store.
51
45
  """
52
46
 
53
47
  @abc.abstractmethod
54
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
48
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
55
49
  """
56
50
  Called by `Table.sync()` to implement store-specific synchronization logic.
57
51
  """
@@ -70,9 +64,12 @@ class Project(ExternalStore, abc.ABC):
70
64
  additional capabilities specific to such projects.
71
65
  """
72
66
 
73
- stored_proxies: dict[Column, Column]
67
+ _col_mapping: dict[ColumnHandle, str] # col -> external col name
68
+ stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
74
69
 
75
- def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
70
+ def __init__(
71
+ self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
72
+ ):
76
73
  super().__init__(name)
77
74
  self._col_mapping = col_mapping
78
75
 
@@ -87,68 +84,63 @@ class Project(ExternalStore, abc.ABC):
87
84
  # Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
88
85
  # proportion of overlapping rows, all proxying the same base column.
89
86
  if stored_proxies is None:
90
- self.stored_proxies: dict[Column, Column] = {}
87
+ self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
91
88
  else:
92
89
  self.stored_proxies = stored_proxies
93
90
 
94
- def get_local_columns(self) -> list[Column]:
91
+ def get_local_columns(self) -> list[ColumnHandle]:
95
92
  return list(self.col_mapping.keys())
96
93
 
97
- def link(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
94
+ def link(self, tbl_version: TableVersion) -> None:
98
95
  # All of the media columns being linked need to either be stored computed columns, or else have stored proxies.
99
96
  # This ensures that the media in those columns resides in the media store.
100
97
  # First determine which columns (if any) need stored proxies, but don't have one yet.
101
98
  stored_proxies_needed: list[Column] = []
102
- for col in self.col_mapping.keys():
99
+ for col_handle in self.col_mapping:
100
+ col = col_handle.get()
103
101
  if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
104
102
  # If this column is already proxied in some other Project, use the existing proxy to avoid
105
103
  # duplication. Otherwise, we'll create a new one.
106
104
  for store in tbl_version.external_stores.values():
107
- if isinstance(store, Project) and col in store.stored_proxies:
108
- self.stored_proxies[col] = store.stored_proxies[col]
105
+ if isinstance(store, Project) and col_handle in store.stored_proxies:
106
+ self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
109
107
  break
110
- if col not in self.stored_proxies:
108
+ if col_handle not in self.stored_proxies:
111
109
  # We didn't find it in an existing Project
112
110
  stored_proxies_needed.append(col)
111
+
113
112
  if len(stored_proxies_needed) > 0:
114
113
  _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
115
- # Create stored proxies for columns that need one. Increment the schema version
116
- # accordingly.
117
- tbl_version.version += 1
118
- preceding_schema_version = tbl_version.schema_version
119
- tbl_version.schema_version = tbl_version.version
120
- proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
114
+ # Create stored proxies for columns that need one
115
+ proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
121
116
  # Add the columns; this will also update table metadata.
122
- tbl_version._add_columns(proxy_cols, conn, print_stats=False, on_error='ignore')
123
- # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
124
- # invisible to the user.
125
- tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
117
+ tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
118
+ self.stored_proxies.update(
119
+ {col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
120
+ )
126
121
 
127
- def unlink(self, tbl_version: TableVersion, conn: sql.Connection) -> None:
122
+ def unlink(self, tbl_version: TableVersion) -> None:
128
123
  # Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
129
124
  # any *other* external store for this table.)
130
- deletions_needed: set[Column] = set(self.stored_proxies.values())
125
+ deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
131
126
  for name, store in tbl_version.external_stores.items():
132
127
  if isinstance(store, Project) and name != self.name:
133
128
  deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
134
129
  if len(deletions_needed) > 0:
135
- _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
136
- # Delete stored proxies that are no longer needed.
137
- tbl_version.version += 1
138
- preceding_schema_version = tbl_version.schema_version
139
- tbl_version.schema_version = tbl_version.version
140
- tbl_version._drop_columns(deletions_needed)
130
+ _logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
131
+ tbl_version._drop_columns(col.get() for col in deletions_needed)
141
132
  self.stored_proxies.clear()
142
- tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
143
133
 
144
- def create_stored_proxy(self, tbl_version: TableVersion, col: Column) -> Column:
134
+ def create_stored_proxy(self, col: Column) -> Column:
145
135
  """
146
136
  Creates a proxy column for the specified column. The proxy column will be created in the specified
147
137
  `TableVersion`.
148
138
  """
149
139
  from pixeltable import exprs
150
140
 
151
- assert col.col_type.is_media_type() and not (col.is_stored and col.is_computed) and col not in self.stored_proxies
141
+ assert (
142
+ col.col_type.is_media_type() and not (col.is_stored and col.is_computed) and col not in self.stored_proxies
143
+ )
152
144
  proxy_col = Column(
153
145
  name=None,
154
146
  # Force images in the proxy column to be materialized inside the media store, in a normalized format.
@@ -157,17 +149,11 @@ class Project(ExternalStore, abc.ABC):
157
149
  # Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
158
150
  computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
159
151
  stored=True,
160
- col_id=tbl_version.next_col_id,
161
- sa_col_type=col.col_type.to_sa_type(),
162
- schema_version_add=tbl_version.schema_version
163
152
  )
164
- proxy_col.tbl = tbl_version
165
- tbl_version.next_col_id += 1
166
- self.stored_proxies[col] = proxy_col
167
153
  return proxy_col
168
154
 
169
155
  @property
170
- def col_mapping(self) -> dict[Column, str]:
156
+ def col_mapping(self) -> dict[ColumnHandle, str]:
171
157
  return self._col_mapping
172
158
 
173
159
  @abc.abstractmethod
@@ -197,12 +183,12 @@ class Project(ExternalStore, abc.ABC):
197
183
 
198
184
  @classmethod
199
185
  def validate_columns(
200
- cls,
201
- table: Table,
202
- export_cols: dict[str, ts.ColumnType],
203
- import_cols: dict[str, ts.ColumnType],
204
- col_mapping: Optional[dict[str, str]]
205
- ) -> dict[Column, str]:
186
+ cls,
187
+ table: Table,
188
+ export_cols: dict[str, ts.ColumnType],
189
+ import_cols: dict[str, ts.ColumnType],
190
+ col_mapping: dict[str, str] | None,
191
+ ) -> dict[ColumnHandle, str]:
206
192
  """
207
193
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
208
194
  (i) the keys of `col_mapping` are valid columns of the specified `Table`;
@@ -212,6 +198,7 @@ class Project(ExternalStore, abc.ABC):
212
198
  external (import or export) columns.
213
199
  If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
214
200
  in which the Pixeltable column names are resolved to the corresponding `Column` objects.
201
+ TODO: return columns as names or qualified ids
215
202
  """
216
203
  from pixeltable import exprs
217
204
 
@@ -219,32 +206,34 @@ class Project(ExternalStore, abc.ABC):
219
206
  if col_mapping is None:
220
207
  col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
221
208
 
222
- resolved_col_mapping: dict[Column, str] = {}
209
+ resolved_col_mapping: dict[ColumnHandle, str] = {}
223
210
 
224
211
  # Validate names
225
- t_cols = set(table._schema.keys())
212
+ t_cols = set(table._get_schema().keys())
226
213
  for t_col, ext_col in col_mapping.items():
227
214
  if t_col not in t_cols:
228
215
  if is_user_specified_col_mapping:
229
216
  raise excs.Error(
230
- f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table._name}` '
217
+ f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
231
218
  'contains no such column.'
232
219
  )
233
220
  else:
234
221
  raise excs.Error(
235
- f'Column `{t_col}` does not exist in Table `{table._name}`. Either add a column `{t_col}`, '
236
- f'or specify a `col_mapping` to associate a different column with the external field `{ext_col}`.'
222
+ f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
223
+ f'or specify a `col_mapping` to associate a different column with '
224
+ f'the external field {ext_col!r}.'
237
225
  )
238
226
  if ext_col not in export_cols and ext_col not in import_cols:
239
227
  raise excs.Error(
240
- f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
241
- f'configuration has no column `{ext_col}`.'
228
+ f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
229
+ f'configuration has no column {ext_col!r}.'
242
230
  )
243
231
  col_ref = table[t_col]
244
232
  assert isinstance(col_ref, exprs.ColumnRef)
245
- resolved_col_mapping[col_ref.col] = ext_col
233
+ resolved_col_mapping[col_ref.col.handle] = ext_col
234
+
246
235
  # Validate column specs
247
- t_col_types = table._schema
236
+ t_col_types = table._get_schema()
248
237
  for t_col, ext_col in col_mapping.items():
249
238
  t_col_type = t_col_types[t_col]
250
239
  if ext_col in export_cols:
@@ -252,65 +241,34 @@ class Project(ExternalStore, abc.ABC):
252
241
  ext_col_type = export_cols[ext_col]
253
242
  if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
254
243
  raise excs.Error(
255
- f'Column `{t_col}` cannot be exported to external column `{ext_col}` (incompatible types; expecting `{ext_col_type}`)'
244
+ f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
245
+ f'(incompatible types; expecting `{ext_col_type}`)'
256
246
  )
257
247
  if ext_col in import_cols:
258
248
  # Validate that the external column can be assigned to the table column
259
249
  if table._tbl_version_path.get_column(t_col).is_computed:
260
250
  raise excs.Error(
261
- f'Column `{t_col}` is a computed column, which cannot be populated from an external column'
251
+ f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
262
252
  )
263
253
  ext_col_type = import_cols[ext_col]
264
254
  if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
265
255
  raise excs.Error(
266
- f'Column `{t_col}` cannot be imported from external column `{ext_col}` (incompatible types; expecting `{ext_col_type}`)'
256
+ f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
257
+ f'(incompatible types; expecting `{ext_col_type}`)'
267
258
  )
268
259
  return resolved_col_mapping
269
260
 
270
- @classmethod
271
- def _column_as_dict(cls, col: Column) -> dict[str, Any]:
272
- return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
273
-
274
- @classmethod
275
- def _column_from_dict(cls, d: dict[str, Any]) -> Column:
276
- from pixeltable.catalog import Catalog
277
-
278
- tbl_id = UUID(d['tbl_id'])
279
- col_id = d['col_id']
280
- return Catalog.get().tbl_versions[(tbl_id, None)].cols_by_id[col_id]
281
-
282
-
283
- @dataclass(frozen=True)
284
- class SyncStatus:
285
- external_rows_created: int = 0
286
- external_rows_deleted: int = 0
287
- external_rows_updated: int = 0
288
- pxt_rows_updated: int = 0
289
- num_excs: int = 0
290
-
291
- def combine(self, other: 'SyncStatus') -> 'SyncStatus':
292
- return SyncStatus(
293
- external_rows_created=self.external_rows_created + other.external_rows_created,
294
- external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
295
- external_rows_updated=self.external_rows_updated + other.external_rows_updated,
296
- pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
297
- num_excs=self.num_excs + other.num_excs
298
- )
299
-
300
- @classmethod
301
- def empty(cls) -> 'SyncStatus':
302
- return SyncStatus(0, 0, 0, 0, 0)
303
-
304
261
 
305
262
  class MockProject(Project):
306
263
  """A project that cannot be synced, used mainly for testing."""
264
+
307
265
  def __init__(
308
- self,
309
- name: str,
310
- export_cols: dict[str, ts.ColumnType],
311
- import_cols: dict[str, ts.ColumnType],
312
- col_mapping: dict[Column, str],
313
- stored_proxies: Optional[dict[Column, Column]] = None
266
+ self,
267
+ name: str,
268
+ export_cols: dict[str, ts.ColumnType],
269
+ import_cols: dict[str, ts.ColumnType],
270
+ col_mapping: dict[ColumnHandle, str],
271
+ stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
314
272
  ):
315
273
  super().__init__(name, col_mapping, stored_proxies)
316
274
  self.export_cols = export_cols
@@ -319,12 +277,12 @@ class MockProject(Project):
319
277
 
320
278
  @classmethod
321
279
  def create(
322
- cls,
323
- t: Table,
324
- name: str,
325
- export_cols: dict[str, ts.ColumnType],
326
- import_cols: dict[str, ts.ColumnType],
327
- col_mapping: Optional[dict[str, str]] = None
280
+ cls,
281
+ t: Table,
282
+ name: str,
283
+ export_cols: dict[str, ts.ColumnType],
284
+ import_cols: dict[str, ts.ColumnType],
285
+ col_mapping: dict[str, str] | None = None,
328
286
  ) -> 'MockProject':
329
287
  col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
330
288
  return cls(name, export_cols, import_cols, col_mapping)
@@ -335,7 +293,7 @@ class MockProject(Project):
335
293
  def get_import_columns(self) -> dict[str, ts.ColumnType]:
336
294
  return self.import_cols
337
295
 
338
- def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
296
+ def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
339
297
  raise NotImplementedError()
340
298
 
341
299
  def delete(self) -> None:
@@ -350,8 +308,8 @@ class MockProject(Project):
350
308
  'name': self.name,
351
309
  'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
352
310
  'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
353
- 'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
354
- 'stored_proxies': [[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()]
311
+ 'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
312
+ 'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
355
313
  }
356
314
 
357
315
  @classmethod
@@ -360,11 +318,11 @@ class MockProject(Project):
360
318
  md['name'],
361
319
  {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
362
320
  {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
363
- {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
364
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']}
321
+ {ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
322
+ {ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
365
323
  )
366
324
 
367
- def __eq__(self, other: Any) -> bool:
325
+ def __eq__(self, other: object) -> bool:
368
326
  if not isinstance(other, MockProject):
369
327
  return False
370
328
  return self.name == other.name
pixeltable/io/fiftyone.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Iterator, Optional, Union
2
+ from typing import Any, Iterator
3
3
 
4
4
  import fiftyone as fo # type: ignore[import-untyped]
5
5
  import fiftyone.utils.data as foud # type: ignore[import-untyped]
@@ -9,17 +9,18 @@ import puremagic
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
11
11
  from pixeltable import exprs
12
- from pixeltable.env import Env
12
+ from pixeltable.utils.local_store import TempStore
13
13
 
14
14
 
15
15
  class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
16
16
  """
17
17
  Implementation of a FiftyOne `DatasetImporter` that reads image data from a Pixeltable table.
18
18
  """
19
+
19
20
  __image_format: str # format to use for any exported images that are not already stored on disk
20
21
  __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
21
22
  __image_idx: int # index of the image expr in the select list
22
- __localpath_idx: Optional[int] # index of the image localpath in the select list, if present
23
+ __localpath_idx: int | None # index of the image localpath in the select list, if present
23
24
  __row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
24
25
 
25
26
  def __init__(
@@ -27,19 +28,14 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
27
28
  tbl: pxt.Table,
28
29
  image: exprs.Expr,
29
30
  image_format: str,
30
- classifications: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
31
- detections: Union[exprs.Expr, list[exprs.Expr], dict[str, exprs.Expr], None] = None,
32
- dataset_dir: Optional[os.PathLike] = None,
31
+ classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
32
+ detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
33
+ dataset_dir: os.PathLike | None = None,
33
34
  shuffle: bool = False,
34
- seed: Union[int, float, str, bytes, bytearray, None] = None,
35
- max_samples: Optional[int] = None,
35
+ seed: int | float | str | bytes | bytearray | None = None,
36
+ max_samples: int | None = None,
36
37
  ):
37
- super().__init__(
38
- dataset_dir=dataset_dir,
39
- shuffle=shuffle,
40
- seed=seed,
41
- max_samples=max_samples
42
- )
38
+ super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
43
39
 
44
40
  self.__image_format = image_format
45
41
 
@@ -54,19 +50,18 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
54
50
  if isinstance(exprs_, dict):
55
51
  for label_name, expr in exprs_.items():
56
52
  if not label_name.isidentifier():
57
- raise excs.Error(f"Invalid label name: {label_name}")
53
+ raise excs.Error(f'Invalid label name: {label_name}')
58
54
  if label_name in self.__labels:
59
- raise excs.Error(f"Duplicate label name: {label_name}")
55
+ raise excs.Error(f'Duplicate label name: {label_name}')
60
56
  self.__labels[label_name] = (expr, label_cls)
61
57
 
62
58
  # Now add the remaining labels, assigning unused default names.
63
59
  for exprs_, label_cls, default_name in label_categories:
64
60
  if exprs_ is None or isinstance(exprs_, dict):
65
61
  continue
66
- if isinstance(exprs_, exprs.Expr):
67
- exprs_ = [exprs_]
68
- assert isinstance(exprs_, list)
69
- for expr in exprs_:
62
+ exprs_list = [exprs_] if isinstance(exprs_, exprs.Expr) else exprs_
63
+ assert isinstance(exprs_list, list)
64
+ for expr in exprs_list:
70
65
  if default_name not in self.__labels:
71
66
  name = default_name
72
67
  else:
@@ -92,10 +87,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
92
87
  else:
93
88
  self.__localpath_idx = None
94
89
 
95
- df = tbl.select(*selection)
96
- self.__row_iter = df._output_row_iterator()
90
+ query = tbl.select(*selection)
91
+ self.__row_iter = query._output_row_iterator()
97
92
 
98
- def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
93
+ def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
99
94
  row = next(self.__row_iter)
100
95
  img = row[self.__image_idx]
101
96
  assert isinstance(img, PIL.Image.Image)
@@ -105,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
105
100
  assert isinstance(file, str)
106
101
  else:
107
102
  # Write the dynamically created image to a temp file
108
- file = str(Env.get().create_tmp_path(f'.{self.__image_format}'))
103
+ file = TempStore.create_path(extension=f'.{self.__image_format}')
109
104
  img.save(file, format=self.__image_format)
110
105
 
111
106
  metadata = fo.ImageMetadata(
@@ -113,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
113
108
  mime_type=puremagic.from_file(file, mime=True),
114
109
  width=img.width,
115
110
  height=img.height,
116
- filepath=file,
111
+ filepath=str(file),
117
112
  num_channels=len(img.getbands()),
118
113
  )
119
114
 
@@ -129,7 +124,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
129
124
  elif label_cls is fo.Detections:
130
125
  label = fo.Detections(detections=self.__as_fo_detections(label_data))
131
126
  else:
132
- assert False
127
+ raise AssertionError()
133
128
  labels[label_name] = label
134
129
 
135
130
  return file, metadata, labels
@@ -137,13 +132,9 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
137
132
  def __as_fo_classifications(self, data: list) -> list[fo.Classification]:
138
133
  if not isinstance(data, list) or any('label' not in entry for entry in data):
139
134
  raise excs.Error(
140
- f'Invalid classifications data: {data}\n'
141
- "(Expected a list of dicts, each containing a 'label' key)"
135
+ f"Invalid classifications data: {data}\n(Expected a list of dicts, each containing a 'label' key)"
142
136
  )
143
- return [
144
- fo.Classification(label=entry['label'], confidence=entry.get('confidence'))
145
- for entry in data
146
- ]
137
+ return [fo.Classification(label=entry['label'], confidence=entry.get('confidence')) for entry in data]
147
138
 
148
139
  def __as_fo_detections(self, data: list) -> list[fo.Detections]:
149
140
  if not isinstance(data, list) or any('label' not in entry or 'bounding_box' not in entry for entry in data):
@@ -174,5 +165,5 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
174
165
  def get_dataset_info(self) -> dict:
175
166
  pass
176
167
 
177
- def close(self, *args) -> None:
168
+ def close(self, *args: Any) -> None:
178
169
  pass