pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (69) hide show
  1. pixeltable/__init__.py +4 -2
  2. pixeltable/catalog/__init__.py +1 -1
  3. pixeltable/catalog/catalog.py +7 -9
  4. pixeltable/catalog/column.py +49 -0
  5. pixeltable/catalog/insertable_table.py +0 -7
  6. pixeltable/catalog/schema_object.py +1 -14
  7. pixeltable/catalog/table.py +180 -67
  8. pixeltable/catalog/table_version.py +42 -146
  9. pixeltable/catalog/table_version_path.py +6 -5
  10. pixeltable/catalog/view.py +2 -1
  11. pixeltable/config.py +24 -9
  12. pixeltable/dataframe.py +5 -6
  13. pixeltable/env.py +113 -21
  14. pixeltable/exec/aggregation_node.py +1 -1
  15. pixeltable/exec/cache_prefetch_node.py +4 -3
  16. pixeltable/exec/exec_node.py +0 -8
  17. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  18. pixeltable/exec/expr_eval/globals.py +1 -0
  19. pixeltable/exec/expr_eval/schedulers.py +52 -19
  20. pixeltable/exec/in_memory_data_node.py +2 -3
  21. pixeltable/exprs/array_slice.py +2 -2
  22. pixeltable/exprs/data_row.py +15 -2
  23. pixeltable/exprs/expr.py +9 -9
  24. pixeltable/exprs/function_call.py +61 -23
  25. pixeltable/exprs/globals.py +1 -2
  26. pixeltable/exprs/json_path.py +3 -3
  27. pixeltable/exprs/row_builder.py +25 -21
  28. pixeltable/exprs/string_op.py +3 -3
  29. pixeltable/func/expr_template_function.py +6 -3
  30. pixeltable/func/query_template_function.py +2 -2
  31. pixeltable/func/signature.py +30 -3
  32. pixeltable/func/tools.py +2 -2
  33. pixeltable/functions/anthropic.py +76 -27
  34. pixeltable/functions/deepseek.py +5 -1
  35. pixeltable/functions/gemini.py +11 -2
  36. pixeltable/functions/globals.py +2 -2
  37. pixeltable/functions/huggingface.py +6 -12
  38. pixeltable/functions/llama_cpp.py +9 -1
  39. pixeltable/functions/openai.py +76 -55
  40. pixeltable/functions/video.py +59 -6
  41. pixeltable/functions/vision.py +2 -2
  42. pixeltable/globals.py +86 -13
  43. pixeltable/io/datarows.py +3 -3
  44. pixeltable/io/fiftyone.py +7 -7
  45. pixeltable/io/globals.py +3 -3
  46. pixeltable/io/hf_datasets.py +4 -4
  47. pixeltable/io/label_studio.py +2 -1
  48. pixeltable/io/pandas.py +6 -6
  49. pixeltable/io/parquet.py +3 -3
  50. pixeltable/io/table_data_conduit.py +2 -2
  51. pixeltable/io/utils.py +2 -2
  52. pixeltable/iterators/audio.py +3 -2
  53. pixeltable/iterators/document.py +2 -8
  54. pixeltable/iterators/video.py +49 -9
  55. pixeltable/plan.py +0 -16
  56. pixeltable/share/packager.py +51 -42
  57. pixeltable/share/publish.py +134 -7
  58. pixeltable/store.py +5 -25
  59. pixeltable/type_system.py +5 -8
  60. pixeltable/utils/__init__.py +2 -2
  61. pixeltable/utils/arrow.py +5 -5
  62. pixeltable/utils/description_helper.py +3 -3
  63. pixeltable/utils/iceberg.py +1 -2
  64. pixeltable/utils/media_store.py +131 -66
  65. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
  66. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
  67. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
  68. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
  69. {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
@@ -1,36 +1,45 @@
1
+ import os
1
2
  import sys
2
3
  import urllib.parse
3
4
  import urllib.request
4
5
  from pathlib import Path
6
+ from typing import Literal, Optional
5
7
 
6
8
  import requests
9
+ from requests.adapters import HTTPAdapter
7
10
  from tqdm import tqdm
11
+ from urllib3.util.retry import Retry
8
12
 
9
13
  import pixeltable as pxt
10
14
  from pixeltable import exceptions as excs
11
15
  from pixeltable.env import Env
12
16
  from pixeltable.utils import sha256sum
17
+ from pixeltable.utils.media_store import TempStore
13
18
 
14
19
  from .packager import TablePackager, TableRestorer
15
20
 
16
21
  # These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
17
22
  # pixeltable.com URLs are available.
18
23
 
19
- PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
24
+ PIXELTABLE_API_URL = os.environ.get('PIXELTABLE_API_URL', 'https://internal-api.pixeltable.com')
20
25
 
21
26
 
22
- def push_replica(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
23
- if not src_tbl._tbl_version.get().is_snapshot:
27
+ def push_replica(
28
+ dest_tbl_uri: str, src_tbl: pxt.Table, bucket: str | None = None, access: Literal['public', 'private'] = 'private'
29
+ ) -> str:
30
+ if not src_tbl._tbl_version_path.is_snapshot():
24
31
  raise excs.Error('Only snapshots may be published.')
25
32
 
26
- packager = TablePackager(src_tbl, additional_md={'table_uri': dest_tbl_uri})
33
+ packager = TablePackager(
34
+ src_tbl, additional_md={'table_uri': dest_tbl_uri, 'bucket_name': bucket, 'is_public': access == 'public'}
35
+ )
27
36
  request_json = packager.md | {'operation_type': 'publish_snapshot'}
28
37
  headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
29
38
  response = requests.post(PIXELTABLE_API_URL, json=request_json, headers=headers_json)
30
39
  if response.status_code != 200:
31
40
  raise excs.Error(f'Error publishing snapshot: {response.text}')
32
41
  response_json = response.json()
33
- if not isinstance(response_json, dict) or response_json.get('destination') != 's3':
42
+ if not isinstance(response_json, dict):
34
43
  raise excs.Error(f'Error publishing snapshot: unexpected response from server.\n{response_json}')
35
44
  upload_id = response_json['upload_id']
36
45
  destination_uri = response_json['destination_uri']
@@ -42,17 +51,23 @@ def push_replica(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
42
51
  parsed_location = urllib.parse.urlparse(destination_uri)
43
52
  if parsed_location.scheme == 's3':
44
53
  _upload_bundle_to_s3(bundle, parsed_location)
54
+ elif parsed_location.scheme == 'https':
55
+ _upload_to_presigned_url(file_path=bundle, url=parsed_location.geturl())
45
56
  else:
46
57
  raise excs.Error(f'Unsupported destination: {destination_uri}')
47
58
 
48
59
  Env.get().console_logger.info('Finalizing snapshot ...')
49
60
 
50
61
  finalize_request_json = {
62
+ 'table_uri': dest_tbl_uri,
51
63
  'operation_type': 'finalize_snapshot',
52
64
  'upload_id': upload_id,
53
65
  'datafile': bundle.name,
54
66
  'size': bundle.stat().st_size,
55
67
  'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
68
+ 'rows': packager.md['row_count'], # TODO rename rows to row_count once cloud side changes are complete
69
+ 'preview_header': packager.md['preview_header'],
70
+ 'preview_data': packager.md['preview_data'],
56
71
  }
57
72
  # TODO: Use Pydantic for validation
58
73
  finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
@@ -107,11 +122,14 @@ def pull_replica(dest_path: str, src_tbl_uri: str) -> pxt.Table:
107
122
  raise excs.Error(f'Error cloning shapshot: unexpected response from server.\n{response_json}')
108
123
 
109
124
  primary_tbl_additional_md = response_json['md']['tables'][0]['table_md']['additional_md']
110
- bundle_uri = primary_tbl_additional_md['destination_uri']
125
+ bundle_uri = response_json['destination_uri']
111
126
  bundle_filename = primary_tbl_additional_md['datafile']
112
127
  parsed_location = urllib.parse.urlparse(bundle_uri)
113
128
  if parsed_location.scheme == 's3':
114
129
  bundle_path = _download_bundle_from_s3(parsed_location, bundle_filename)
130
+ elif parsed_location.scheme == 'https':
131
+ bundle_path = TempStore.create_path()
132
+ _download_from_presigned_url(url=parsed_location.geturl(), output_path=bundle_path)
115
133
  else:
116
134
  raise excs.Error(f'Unexpected response from server: unsupported bundle uri: {bundle_uri}')
117
135
 
@@ -136,7 +154,7 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
136
154
  obj = s3_client.head_object(Bucket=bucket, Key=remote_path) # Check if the object exists
137
155
  bundle_size = obj['ContentLength']
138
156
 
139
- bundle_path = Path(Env.get().create_tmp_path())
157
+ bundle_path = TempStore.create_path()
140
158
  progress_bar = tqdm(
141
159
  desc='Downloading',
142
160
  total=bundle_size,
@@ -149,3 +167,112 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
149
167
  )
150
168
  s3_client.download_file(Bucket=bucket, Key=remote_path, Filename=str(bundle_path), Callback=progress_bar.update)
151
169
  return bundle_path
170
+
171
+
172
+ def _create_retry_session(
173
+ max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: Optional[list] = None
174
+ ) -> requests.Session:
175
+ """Create a requests session with retry configuration"""
176
+ if status_forcelist is None:
177
+ status_forcelist = [
178
+ 408, # Request Timeout
179
+ 429, # Too Many Requests (rate limiting)
180
+ 500, # Internal Server Error (server-side error)
181
+ 502, # Bad Gateway (proxy/gateway got invalid response)
182
+ 503, # Service Unavailable (server overloaded or down)
183
+ 504, # Gateway Timeout (proxy/gateway timeout)
184
+ ]
185
+ retry_strategy = Retry(
186
+ total=max_retries,
187
+ read=max_retries,
188
+ connect=max_retries,
189
+ backoff_factor=backoff_factor,
190
+ status_forcelist=status_forcelist,
191
+ allowed_methods=['GET', 'PUT', 'POST', 'DELETE'],
192
+ )
193
+
194
+ session = requests.Session()
195
+ adapter = HTTPAdapter(max_retries=retry_strategy)
196
+ session.mount('https://', adapter)
197
+ return session
198
+
199
+
200
+ def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) -> requests.Response:
201
+ """Upload file with progress bar and retries"""
202
+ file_size = file_path.stat().st_size
203
+
204
+ headers = {'Content-Length': str(file_size), 'Content-Type': 'application/octet-stream'}
205
+
206
+ session = _create_retry_session(max_retries=max_retries)
207
+ try:
208
+ with (
209
+ open(file_path, 'rb') as f,
210
+ tqdm.wrapattr(
211
+ f,
212
+ method='read',
213
+ total=file_size,
214
+ desc='Uploading',
215
+ unit='B',
216
+ unit_scale=True,
217
+ unit_divisor=1024,
218
+ miniters=1, # Update every iteration (should be fine for an upload)
219
+ ncols=100,
220
+ file=sys.stdout,
221
+ ) as file_with_progress,
222
+ ):
223
+ response = session.put(
224
+ url,
225
+ data=file_with_progress,
226
+ headers=headers,
227
+ timeout=(60, 1800), # 60 seconds to connect and 300 seconds for server response
228
+ )
229
+ response.raise_for_status()
230
+ return response
231
+ finally:
232
+ session.close()
233
+
234
+
235
+ def _download_from_presigned_url(
236
+ url: str, output_path: Path, headers: Optional[dict[str, str]] = None, max_retries: int = 3
237
+ ) -> None:
238
+ """Download file with progress bar and retries"""
239
+ session = _create_retry_session(max_retries=max_retries)
240
+
241
+ try:
242
+ # Stream download with progress
243
+ response = session.get(
244
+ url, headers=headers, stream=True, timeout=(60, 300)
245
+ ) # 60 seconds to connect and 300 seconds for server response
246
+ response.raise_for_status()
247
+
248
+ total_size = int(response.headers.get('content-length', 0))
249
+ progress_bar = tqdm(
250
+ desc='Downloading',
251
+ total=total_size,
252
+ unit='B',
253
+ unit_scale=True,
254
+ unit_divisor=1024,
255
+ miniters=1,
256
+ ncols=100,
257
+ file=sys.stdout,
258
+ )
259
+ with open(output_path, 'wb') as f:
260
+ for chunk in response.iter_content(chunk_size=8192):
261
+ if chunk:
262
+ f.write(chunk)
263
+ progress_bar.update(len(chunk))
264
+ finally:
265
+ session.close()
266
+
267
+
268
+ # TODO: This will be replaced by drop_table with cloud table uri
269
+ def delete_replica(dest_path: str) -> None:
270
+ """Delete cloud replica"""
271
+ headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
272
+ delete_request_json = {'operation_type': 'delete_snapshot', 'table_uri': dest_path}
273
+ response = requests.post(PIXELTABLE_API_URL, json=delete_request_json, headers=headers_json)
274
+ if response.status_code != 200:
275
+ raise excs.Error(f'Error deleting replica: {response.text}')
276
+ response_json = response.json()
277
+ if not isinstance(response_json, dict) or 'table_uri' not in response_json:
278
+ raise excs.Error(f'Error deleting replica: unexpected response from server.\n{response_json}')
pixeltable/store.py CHANGED
@@ -4,7 +4,7 @@ import abc
4
4
  import logging
5
5
  import sys
6
6
  import warnings
7
- from typing import Any, Iterable, Iterator, Optional, Union
7
+ from typing import Any, Iterable, Iterator, Optional
8
8
 
9
9
  import more_itertools
10
10
  import psycopg
@@ -17,7 +17,6 @@ from pixeltable.env import Env
17
17
  from pixeltable.exec import ExecNode
18
18
  from pixeltable.metadata import schema
19
19
  from pixeltable.utils.exception_handler import run_cleanup
20
- from pixeltable.utils.media_store import MediaStore
21
20
  from pixeltable.utils.sql import log_explain, log_stmt
22
21
 
23
22
  _logger = logging.getLogger('pixeltable')
@@ -123,21 +122,6 @@ class StoreBase:
123
122
  def _storage_name(self) -> str:
124
123
  """Return the name of the data store table"""
125
124
 
126
- def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column) -> str:
127
- src_path = MediaStore.resolve_tmp_url(file_url)
128
- if src_path is None:
129
- return file_url
130
- assert col.tbl.id == self.tbl_version.id # Ensure the column belongs to the same table as this store
131
- new_file_url = MediaStore.relocate_local_media_file(src_path, col)
132
- return new_file_url
133
-
134
- def _move_tmp_media_files(
135
- self, table_row: list[Any], media_cols_by_sql_idx: dict[int, catalog.Column], v_min: int
136
- ) -> None:
137
- """Move tmp media files that we generated to a permanent location"""
138
- for n, col in media_cols_by_sql_idx.items():
139
- table_row[n] = self._move_tmp_media_file(table_row[n], col)
140
-
141
125
  def count(self) -> int:
142
126
  """Return the number of rows visible in self.tbl_version"""
143
127
  stmt = (
@@ -235,7 +219,6 @@ class StoreBase:
235
219
  # create temp table to store output of exec_plan, with the same primary key as the store table
236
220
  tmp_name = f'temp_{self._storage_name()}'
237
221
  tmp_pk_cols = tuple(sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns())
238
- tmp_val_col_sql_idx = len(tmp_pk_cols)
239
222
  tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
240
223
  tmp_cols = [*tmp_pk_cols, tmp_val_col]
241
224
  # add error columns if the store column records errors
@@ -262,9 +245,7 @@ class StoreBase:
262
245
  if abort_on_exc and row.has_exc():
263
246
  exc = row.get_first_exc()
264
247
  raise excs.Error(f'Error while evaluating computed column {col.name!r}:\n{exc}') from exc
265
- table_row, num_row_exc = row_builder.create_table_row(row, None, row.pk)
266
- if col.col_type.is_media_type():
267
- table_row[tmp_val_col_sql_idx] = self._move_tmp_media_file(table_row[tmp_val_col_sql_idx], col)
248
+ table_row, num_row_exc = row_builder.create_store_table_row(row, None, row.pk)
268
249
  num_excs += num_row_exc
269
250
  batch_table_rows.append(tuple(table_row))
270
251
 
@@ -317,7 +298,7 @@ class StoreBase:
317
298
  progress_bar: Optional[tqdm] = None # create this only after we started executing
318
299
  row_builder = exec_plan.row_builder
319
300
 
320
- store_col_names, media_cols_by_idx = row_builder.store_column_names()
301
+ store_col_names = row_builder.store_column_names()
321
302
 
322
303
  try:
323
304
  table_rows: list[tuple[Any]] = []
@@ -337,7 +318,7 @@ class StoreBase:
337
318
  rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
338
319
  pk = (*rowid, v_min)
339
320
  assert len(pk) == len(self._pk_cols)
340
- table_row, num_row_exc = row_builder.create_table_row(row, cols_with_excs, pk)
321
+ table_row, num_row_exc = row_builder.create_store_table_row(row, cols_with_excs, pk)
341
322
  num_excs += num_row_exc
342
323
 
343
324
  if show_progress:
@@ -351,7 +332,6 @@ class StoreBase:
351
332
  )
352
333
  progress_bar.update(1)
353
334
 
354
- self._move_tmp_media_files(table_row, media_cols_by_idx, v_min)
355
335
  batch_table_rows.append(tuple(table_row))
356
336
 
357
337
  table_rows.extend(batch_table_rows)
@@ -427,7 +407,7 @@ class StoreBase:
427
407
  base_versions_clause = (
428
408
  sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
429
409
  )
430
- set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
410
+ set_clause: dict[sql.Column, int | sql.Column] = {self.v_max_col: current_version}
431
411
  for index_info in self.tbl_version.get().idxs_by_name.values():
432
412
  # copy value column to undo column
433
413
  set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
pixeltable/type_system.py CHANGED
@@ -292,7 +292,7 @@ class ColumnType:
292
292
 
293
293
  @classmethod
294
294
  def from_python_type(
295
- cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
295
+ cls, t: type | _GenericAlias, nullable_default: bool = False, allow_builtin_types: bool = True
296
296
  ) -> Optional[ColumnType]:
297
297
  """
298
298
  Convert a Python type into a Pixeltable `ColumnType` instance.
@@ -311,7 +311,7 @@ class ColumnType:
311
311
  if origin in (typing.Union, types.UnionType):
312
312
  # Check if `t` has the form Optional[T].
313
313
  if len(type_args) == 2 and type(None) in type_args:
314
- # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
314
+ # `t` is a type of the form Optional[T] (equivalently, T | None or None | T).
315
315
  # We treat it as the underlying type but with nullable=True.
316
316
  underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
317
317
  underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
@@ -361,10 +361,7 @@ class ColumnType:
361
361
 
362
362
  @classmethod
363
363
  def normalize_type(
364
- cls,
365
- t: Union[ColumnType, type, _AnnotatedAlias],
366
- nullable_default: bool = False,
367
- allow_builtin_types: bool = True,
364
+ cls, t: ColumnType | type | _AnnotatedAlias, nullable_default: bool = False, allow_builtin_types: bool = True
368
365
  ) -> ColumnType:
369
366
  """
370
367
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
@@ -389,7 +386,7 @@ class ColumnType:
389
386
  ]
390
387
 
391
388
  @classmethod
392
- def __raise_exc_for_invalid_type(cls, t: Union[type, _AnnotatedAlias]) -> None:
389
+ def __raise_exc_for_invalid_type(cls, t: type | _AnnotatedAlias) -> None:
393
390
  for builtin_type, suggestion in cls.__TYPE_SUGGESTIONS:
394
391
  if t is builtin_type or (isinstance(t, type) and issubclass(t, builtin_type)):
395
392
  name = t.__name__ if t.__module__ == 'builtins' else f'{t.__module__}.{t.__name__}'
@@ -405,7 +402,7 @@ class ColumnType:
405
402
  return cls.from_python_type(py_type) if py_type is not None else None
406
403
 
407
404
  @classmethod
408
- def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> Union[type, _GenericAlias, None]:
405
+ def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> type | _GenericAlias | None:
409
406
  if 'type' in schema:
410
407
  if schema['type'] == 'null':
411
408
  return type(None)
@@ -2,7 +2,7 @@ import hashlib
2
2
  import urllib.parse
3
3
  import urllib.request
4
4
  from pathlib import Path
5
- from typing import Optional, Union
5
+ from typing import Optional
6
6
 
7
7
 
8
8
  def print_perf_counter_delta(delta: float) -> str:
@@ -24,7 +24,7 @@ def print_perf_counter_delta(delta: float) -> str:
24
24
  return f'{delta:.2f} s'
25
25
 
26
26
 
27
- def sha256sum(path: Union[Path, str]) -> str:
27
+ def sha256sum(path: Path | str) -> str:
28
28
  """
29
29
  Compute the SHA256 hash of a file.
30
30
  """
pixeltable/utils/arrow.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import datetime
2
- from typing import Any, Iterator, Optional, Union
2
+ from typing import Any, Iterator, Optional
3
3
 
4
4
  import numpy as np
5
5
  import pyarrow as pa
@@ -88,11 +88,11 @@ def to_arrow_schema(pixeltable_schema: dict[str, Any]) -> pa.Schema:
88
88
  return pa.schema((name, to_arrow_type(typ)) for name, typ in pixeltable_schema.items()) # type: ignore[misc]
89
89
 
90
90
 
91
- def to_pydict(batch: Union[pa.Table, pa.RecordBatch]) -> dict[str, Union[list, np.ndarray]]:
91
+ def to_pydict(batch: pa.Table | pa.RecordBatch) -> dict[str, list | np.ndarray]:
92
92
  """Convert a RecordBatch to a dictionary of lists, unlike pa.lib.RecordBatch.to_pydict,
93
93
  this function will not convert numpy arrays to lists, and will preserve the original numpy dtype.
94
94
  """
95
- out: dict[str, Union[list, np.ndarray]] = {}
95
+ out: dict[str, list | np.ndarray] = {}
96
96
  for k, name in enumerate(batch.schema.names):
97
97
  col = batch.column(k)
98
98
  if isinstance(col.type, pa.FixedShapeTensorType):
@@ -105,7 +105,7 @@ def to_pydict(batch: Union[pa.Table, pa.RecordBatch]) -> dict[str, Union[list, n
105
105
  return out
106
106
 
107
107
 
108
- def iter_tuples(batch: Union[pa.Table, pa.RecordBatch]) -> Iterator[dict[str, Any]]:
108
+ def iter_tuples(batch: pa.Table | pa.RecordBatch) -> Iterator[dict[str, Any]]:
109
109
  """Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
110
110
  pydict = to_pydict(batch)
111
111
  assert len(pydict) > 0, 'empty record batch'
@@ -145,7 +145,7 @@ def _ar_val_to_pxt_val(val: Any, pxt_type: ts.ColumnType) -> Any:
145
145
 
146
146
 
147
147
  def iter_tuples2(
148
- batch: Union[pa.Table, pa.RecordBatch], col_mapping: Optional[dict[str, str]], schema: dict[str, ts.ColumnType]
148
+ batch: pa.Table | pa.RecordBatch, col_mapping: Optional[dict[str, str]], schema: dict[str, ts.ColumnType]
149
149
  ) -> Iterator[dict[str, Any]]:
150
150
  """Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
151
151
  pydict = to_pydict(batch)
@@ -1,5 +1,5 @@
1
1
  import dataclasses
2
- from typing import Optional, Union
2
+ from typing import Optional
3
3
 
4
4
  import pandas as pd
5
5
  from pandas.io.formats.style import Styler
@@ -7,7 +7,7 @@ from pandas.io.formats.style import Styler
7
7
 
8
8
  @dataclasses.dataclass
9
9
  class _Descriptor:
10
- body: Union[str, pd.DataFrame]
10
+ body: str | pd.DataFrame
11
11
  # The remaining fields only affect the behavior if `body` is a pd.DataFrame.
12
12
  show_index: bool
13
13
  show_header: bool
@@ -33,7 +33,7 @@ class DescriptionHelper:
33
33
 
34
34
  def append(
35
35
  self,
36
- descriptor: Union[str, pd.DataFrame],
36
+ descriptor: str | pd.DataFrame,
37
37
  show_index: bool = False,
38
38
  show_header: bool = True,
39
39
  styler: Optional[Styler] = None,
@@ -1,10 +1,9 @@
1
1
  from pathlib import Path
2
- from typing import Union
3
2
 
4
3
  from pyiceberg.catalog.sql import SqlCatalog
5
4
 
6
5
 
7
- def sqlite_catalog(warehouse_path: Union[str, Path], name: str = 'pixeltable') -> SqlCatalog:
6
+ def sqlite_catalog(warehouse_path: str | Path, name: str = 'pixeltable') -> SqlCatalog:
8
7
  """
9
8
  Instantiate a sqlite Iceberg catalog at the specified path. If no catalog exists, one will be created.
10
9
  """