pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/io/fiftyone.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Iterator, Optional
2
+ from typing import Any, Iterator
3
3
 
4
4
  import fiftyone as fo # type: ignore[import-untyped]
5
5
  import fiftyone.utils.data as foud # type: ignore[import-untyped]
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
20
20
  __image_format: str # format to use for any exported images that are not already stored on disk
21
21
  __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
22
22
  __image_idx: int # index of the image expr in the select list
23
- __localpath_idx: Optional[int] # index of the image localpath in the select list, if present
23
+ __localpath_idx: int | None # index of the image localpath in the select list, if present
24
24
  __row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
25
25
 
26
26
  def __init__(
@@ -30,10 +30,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
30
30
  image_format: str,
31
31
  classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
32
32
  detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
33
- dataset_dir: Optional[os.PathLike] = None,
33
+ dataset_dir: os.PathLike | None = None,
34
34
  shuffle: bool = False,
35
35
  seed: int | float | str | bytes | bytearray | None = None,
36
- max_samples: Optional[int] = None,
36
+ max_samples: int | None = None,
37
37
  ):
38
38
  super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
39
39
 
@@ -90,7 +90,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
90
90
  df = tbl.select(*selection)
91
91
  self.__row_iter = df._output_row_iterator()
92
92
 
93
- def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
93
+ def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
94
94
  row = next(self.__row_iter)
95
95
  img = row[self.__image_idx]
96
96
  assert isinstance(img, PIL.Image.Image)
pixeltable/io/globals.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal, Optional
3
+ from typing import TYPE_CHECKING, Any, Literal
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.exceptions as excs
@@ -15,12 +15,12 @@ if TYPE_CHECKING:
15
15
  def create_label_studio_project(
16
16
  t: Table,
17
17
  label_config: str,
18
- name: Optional[str] = None,
19
- title: Optional[str] = None,
18
+ name: str | None = None,
19
+ title: str | None = None,
20
20
  media_import_method: Literal['post', 'file', 'url'] = 'post',
21
- col_mapping: Optional[dict[str, str]] = None,
21
+ col_mapping: dict[str, str] | None = None,
22
22
  sync_immediately: bool = True,
23
- s3_configuration: Optional[dict[str, Any]] = None,
23
+ s3_configuration: dict[str, Any] | None = None,
24
24
  **kwargs: Any,
25
25
  ) -> UpdateStatus:
26
26
  """
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import typing
4
- from typing import Any, Optional
4
+ from typing import Any
5
5
 
6
6
  import pixeltable as pxt
7
7
  import pixeltable.type_system as ts
@@ -36,7 +36,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
36
36
  }
37
37
 
38
38
 
39
- def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.ColumnType]:
39
+ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> ts.ColumnType | None:
40
40
  """Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
41
41
  import datasets
42
42
 
@@ -76,7 +76,7 @@ def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets
76
76
 
77
77
  def huggingface_schema_to_pxt_schema(
78
78
  hf_schema: datasets.Features, schema_overrides: dict[str, Any], primary_key: list[str]
79
- ) -> dict[str, Optional[ts.ColumnType]]:
79
+ ) -> dict[str, ts.ColumnType | None]:
80
80
  """Generate a pixeltable schema from a huggingface dataset schema.
81
81
  Columns without a known mapping are mapped to None
82
82
  """
@@ -93,7 +93,7 @@ def import_huggingface_dataset(
93
93
  table_path: str,
94
94
  dataset: datasets.Dataset | datasets.DatasetDict,
95
95
  *,
96
- schema_overrides: Optional[dict[str, Any]] = None,
96
+ schema_overrides: dict[str, Any] | None = None,
97
97
  primary_key: str | list[str] | None = None,
98
98
  **kwargs: Any,
99
99
  ) -> pxt.Table:
@@ -4,7 +4,7 @@ import logging
4
4
  import os
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
- from typing import Any, Iterator, Literal, Optional
7
+ from typing import Any, Iterator, Literal
8
8
  from xml.etree import ElementTree as ET
9
9
 
10
10
  import label_studio_sdk
@@ -53,7 +53,7 @@ class LabelStudioProject(Project):
53
53
 
54
54
  project_id: int # Label Studio project ID
55
55
  media_import_method: Literal['post', 'file', 'url']
56
- _project: Optional[ls_project.Project]
56
+ _project: ls_project.Project | None
57
57
 
58
58
  def __init__(
59
59
  self,
@@ -61,7 +61,7 @@ class LabelStudioProject(Project):
61
61
  project_id: int,
62
62
  media_import_method: Literal['post', 'file', 'url'],
63
63
  col_mapping: dict[ColumnHandle, str],
64
- stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
64
+ stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
65
65
  ):
66
66
  self.project_id = project_id
67
67
  self.media_import_method = media_import_method
@@ -278,8 +278,8 @@ class LabelStudioProject(Project):
278
278
  # columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
279
279
  # preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
280
280
  # We have to wait until we begin iterating to populate them, so they're initially `None`.
281
- rl_col_idxs: Optional[list[int]] = None
282
- data_col_idxs: Optional[list[int]] = None
281
+ rl_col_idxs: list[int] | None = None
282
+ data_col_idxs: list[int] | None = None
283
283
 
284
284
  row_ids_in_pxt: set[tuple] = set()
285
285
  tasks_created = 0
@@ -349,7 +349,7 @@ class LabelStudioProject(Project):
349
349
  return sync_status
350
350
 
351
351
  @classmethod
352
- def __validate_fileurl(cls, col: Column, url: str) -> Optional[str]:
352
+ def __validate_fileurl(cls, col: Column, url: str) -> str | None:
353
353
  # Check that the URL is one that will be visible to Label Studio. If it isn't, log an info message
354
354
  # to help users debug the issue.
355
355
  if not (url.startswith('http://') or url.startswith('https://')):
@@ -497,7 +497,7 @@ class LabelStudioProject(Project):
497
497
 
498
498
  @classmethod
499
499
  def __coco_to_predictions(
500
- cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: Optional[int] = None
500
+ cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: int | None = None
501
501
  ) -> dict[str, Any]:
502
502
  width = coco_annotations['image']['width']
503
503
  height = coco_annotations['image']['height']
@@ -549,11 +549,11 @@ class LabelStudioProject(Project):
549
549
  cls,
550
550
  t: Table,
551
551
  label_config: str,
552
- name: Optional[str],
553
- title: Optional[str],
552
+ name: str | None,
553
+ title: str | None,
554
554
  media_import_method: Literal['post', 'file', 'url'],
555
- col_mapping: Optional[dict[str, str]],
556
- s3_configuration: Optional[dict[str, Any]],
555
+ col_mapping: dict[str, str] | None,
556
+ s3_configuration: dict[str, Any] | None,
557
557
  **kwargs: Any,
558
558
  ) -> 'LabelStudioProject':
559
559
  """
@@ -652,7 +652,7 @@ class LabelStudioProject(Project):
652
652
 
653
653
  @dataclass(frozen=True)
654
654
  class _DataKey:
655
- name: Optional[str] # The 'name' attribute of the data key; may differ from the field name
655
+ name: str | None # The 'name' attribute of the data key; may differ from the field name
656
656
  column_type: ts.ColumnType
657
657
 
658
658
 
pixeltable/io/pandas.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -16,7 +16,7 @@ def import_pandas(
16
16
  tbl_name: str,
17
17
  df: pd.DataFrame,
18
18
  *,
19
- schema_overrides: Optional[dict[str, Any]] = None,
19
+ schema_overrides: dict[str, Any] | None = None,
20
20
  primary_key: str | list[str] | None = None,
21
21
  num_retained_versions: int = 10,
22
22
  comment: str = '',
@@ -56,7 +56,7 @@ def import_pandas(
56
56
  def import_csv(
57
57
  tbl_name: str,
58
58
  filepath_or_buffer: str | os.PathLike,
59
- schema_overrides: Optional[dict[str, Any]] = None,
59
+ schema_overrides: dict[str, Any] | None = None,
60
60
  primary_key: str | list[str] | None = None,
61
61
  num_retained_versions: int = 10,
62
62
  comment: str = '',
@@ -86,7 +86,7 @@ def import_excel(
86
86
  tbl_name: str,
87
87
  io: str | os.PathLike,
88
88
  *,
89
- schema_overrides: Optional[dict[str, Any]] = None,
89
+ schema_overrides: dict[str, Any] | None = None,
90
90
  primary_key: str | list[str] | None = None,
91
91
  num_retained_versions: int = 10,
92
92
  comment: str = '',
@@ -141,7 +141,7 @@ def df_infer_schema(
141
141
  return pd_schema
142
142
 
143
143
 
144
- def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> Optional[ts.ColumnType]:
144
+ def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> ts.ColumnType | None:
145
145
  """
146
146
  Determines a pixeltable ColumnType from a pandas dtype
147
147
 
@@ -192,7 +192,7 @@ def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable:
192
192
 
193
193
 
194
194
  def _df_row_to_pxt_row(
195
- row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: Optional[dict[str, str]]
195
+ row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: dict[str, str] | None
196
196
  ) -> dict[str, Any]:
197
197
  """Convert a row to insertable format"""
198
198
  pxt_row: dict[str, Any] = {}
pixeltable/io/parquet.py CHANGED
@@ -4,7 +4,7 @@ import json
4
4
  import logging
5
5
  import typing
6
6
  from pathlib import Path
7
- from typing import Any, Optional
7
+ from typing import Any
8
8
 
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
@@ -71,7 +71,7 @@ def import_parquet(
71
71
  table: str,
72
72
  *,
73
73
  parquet_path: str,
74
- schema_overrides: Optional[dict[str, Any]] = None,
74
+ schema_overrides: dict[str, Any] | None = None,
75
75
  primary_key: str | list[str] | None = None,
76
76
  **kwargs: Any,
77
77
  ) -> pxt.Table:
@@ -8,7 +8,7 @@ import urllib.parse
8
8
  import urllib.request
9
9
  from dataclasses import dataclass, field, fields
10
10
  from pathlib import Path
11
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, cast
11
+ from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, cast
12
12
 
13
13
  import numpy as np
14
14
  import pandas as pd
@@ -50,15 +50,15 @@ class TableDataConduitFormat(str, enum.Enum):
50
50
  @dataclass
51
51
  class TableDataConduit:
52
52
  source: 'TableDataSource'
53
- source_format: Optional[str] = None
54
- source_column_map: Optional[dict[str, str]] = None
53
+ source_format: str | None = None
54
+ source_column_map: dict[str, str] | None = None
55
55
  if_row_exists: Literal['update', 'ignore', 'error'] = 'error'
56
- pxt_schema: Optional[dict[str, ts.ColumnType]] = None
57
- src_schema_overrides: Optional[dict[str, ts.ColumnType]] = None
58
- src_schema: Optional[dict[str, ts.ColumnType]] = None
59
- pxt_pk: Optional[list[str]] = None
60
- src_pk: Optional[list[str]] = None
61
- valid_rows: Optional[RowData] = None
56
+ pxt_schema: dict[str, ts.ColumnType] | None = None
57
+ src_schema_overrides: dict[str, ts.ColumnType] | None = None
58
+ src_schema: dict[str, ts.ColumnType] | None = None
59
+ pxt_pk: list[str] | None = None
60
+ src_pk: list[str] | None = None
61
+ valid_rows: RowData | None = None
62
62
  extra_fields: dict[str, Any] = field(default_factory=dict)
63
63
 
64
64
  reqd_col_names: set[str] = field(default_factory=set)
@@ -151,7 +151,7 @@ class DFTableDataConduit(TableDataConduit):
151
151
 
152
152
 
153
153
  class RowDataTableDataConduit(TableDataConduit):
154
- raw_rows: Optional[RowData] = None
154
+ raw_rows: RowData | None = None
155
155
  disable_mapping: bool = True
156
156
  batch_count: int = 0
157
157
 
@@ -332,7 +332,7 @@ class HFTableDataConduit(TableDataConduit):
332
332
  - use set_format('arrow') and convert ChunkedArrays to PIL.Image.Image instead of going through numpy, which is slow
333
333
  """
334
334
 
335
- column_name_for_split: Optional[str] = None
335
+ column_name_for_split: str | None = None
336
336
  categorical_features: dict[str, dict[int, str]]
337
337
  dataset_dict: dict[str, datasets.Dataset] = None
338
338
  hf_schema_source: dict[str, Any] = None
@@ -478,7 +478,7 @@ class HFTableDataConduit(TableDataConduit):
478
478
 
479
479
 
480
480
  class ParquetTableDataConduit(TableDataConduit):
481
- pq_ds: Optional[ParquetDataset] = None
481
+ pq_ds: ParquetDataset | None = None
482
482
 
483
483
  @classmethod
484
484
  def from_tds(cls, tds: TableDataConduit) -> 'ParquetTableDataConduit':
pixeltable/io/utils.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from keyword import iskeyword as is_python_keyword
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
 
4
4
  import pixeltable as pxt
5
5
  import pixeltable.exceptions as excs
@@ -40,7 +40,7 @@ def normalize_schema_names(
40
40
  primary_key: list[str],
41
41
  schema_overrides: dict[str, Any],
42
42
  require_valid_pxt_column_names: bool = False,
43
- ) -> tuple[dict[str, Any], list[str], Optional[dict[str, str]]]:
43
+ ) -> tuple[dict[str, Any], list[str], dict[str, str] | None]:
44
44
  """
45
45
  Convert all names in the input schema from source names to valid Pixeltable identifiers
46
46
  - Ensure that all names are unique.
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from fractions import Fraction
3
3
  from pathlib import Path
4
- from typing import Any, ClassVar, Optional
4
+ from typing import Any, ClassVar
5
5
 
6
6
  import av
7
7
 
@@ -37,7 +37,7 @@ class AudioSplitter(ComponentIterator):
37
37
 
38
38
  # List of chunks to extract
39
39
  # Each chunk is defined by start and end presentation timestamps in audio file (int)
40
- chunks_to_extract_in_pts: Optional[list[tuple[int, int]]]
40
+ chunks_to_extract_in_pts: list[tuple[int, int]] | None
41
41
  # next chunk to extract
42
42
  next_pos: int
43
43
 
@@ -4,7 +4,7 @@ import math
4
4
  import subprocess
5
5
  from fractions import Fraction
6
6
  from pathlib import Path
7
- from typing import Any, Iterator, Literal, Optional
7
+ from typing import Any, Iterator, Literal
8
8
 
9
9
  import av
10
10
  import pandas as pd
@@ -42,9 +42,9 @@ class FrameIterator(ComponentIterator):
42
42
  [Frame](https://pyav.org/docs/develop/api/frame.html)):
43
43
 
44
44
  * `index` (`int`)
45
- * `pts` (`Optional[int]`)
46
- * `dts` (`Optional[int]`)
47
- * `time` (`Optional[float]`)
45
+ * `pts` (`int | None`)
46
+ * `dts` (`int | None`)
47
+ * `time` (`float | None`)
48
48
  * `is_corrupt` (`bool`)
49
49
  * `key_frame` (`bool`)
50
50
  * `pict_type` (`int`)
@@ -55,8 +55,8 @@ class FrameIterator(ComponentIterator):
55
55
 
56
56
  # Input parameters
57
57
  video_path: Path
58
- fps: Optional[float]
59
- num_frames: Optional[int]
58
+ fps: float | None
59
+ num_frames: int | None
60
60
  all_frame_attrs: bool
61
61
 
62
62
  # Video info
@@ -67,19 +67,14 @@ class FrameIterator(ComponentIterator):
67
67
  video_start_time: int
68
68
 
69
69
  # List of frame indices to be extracted, or None to extract all frames
70
- frames_to_extract: Optional[list[int]]
70
+ frames_to_extract: list[int] | None
71
71
 
72
72
  # Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
73
73
  # frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
74
74
  next_pos: int
75
75
 
76
76
  def __init__(
77
- self,
78
- video: str,
79
- *,
80
- fps: Optional[float] = None,
81
- num_frames: Optional[int] = None,
82
- all_frame_attrs: bool = False,
77
+ self, video: str, *, fps: float | None = None, num_frames: int | None = None, all_frame_attrs: bool = False
83
78
  ):
84
79
  if fps is not None and num_frames is not None:
85
80
  raise excs.Error('At most one of `fps` or `num_frames` may be specified')
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  # Migrate a few changed function names
16
16
  if k == 'path' and v == 'pixeltable.functions.string.str_format':
17
17
  return 'path', 'pixeltable.functions.string.format'
@@ -1,5 +1,5 @@
1
1
  import datetime
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
 
4
4
  import sqlalchemy as sql
5
5
 
@@ -28,7 +28,7 @@ def _(engine: sql.engine.Engine) -> None:
28
28
  conn.execute(sql.text(f'ALTER TABLE {store_name} ALTER COLUMN col_{col_id} TYPE TIMESTAMPTZ'))
29
29
 
30
30
 
31
- def __update_timestamp_literals(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
31
+ def __update_timestamp_literals(k: Any, v: Any) -> tuple[Any, Any] | None:
32
32
  if isinstance(v, dict) and 'val_t' in v:
33
33
  # It's a literal with an explicit 'val_t' field. In version 19 this can only mean a
34
34
  # timestamp literal, which (in version 19) is stored in the DB as a naive datetime.
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if isinstance(v, dict) and '_classname' in v:
16
16
  # The way InlineArray is represented changed in v20. Previously, literal values were stored
17
17
  # directly in the Inline expr; now we store them in Literal sub-exprs. This converter
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -24,7 +24,7 @@ def __update_schema_column(schema_column: dict) -> None:
24
24
  schema_column['media_validation'] = None
25
25
 
26
26
 
27
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
27
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
28
28
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
29
29
  if 'perform_validation' not in v:
30
30
  v['perform_validation'] = False
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
16
16
  v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
17
17
  return k, v
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  from pixeltable import func
16
16
  from pixeltable.func.globals import resolve_symbol
17
17
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if k == 'path' and (
16
16
  v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
17
17
  ):
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  import pixeltable.type_system as ts
16
16
  from pixeltable.exprs.literal import Literal
17
17
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -12,7 +12,7 @@ def _(engine: sql.engine.Engine) -> None:
12
12
  convert_table_md(engine, substitution_fn=__substitute_md)
13
13
 
14
14
 
15
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
15
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
16
16
  # Defaults are now stored as literals in signatures
17
17
  if k == 'parameters':
18
18
  for param in v:
@@ -55,8 +55,8 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
55
55
  # We need to expand ("unroll") any var-args or var-kwargs.
56
56
 
57
57
  new_args_len = len(new_args)
58
- rolled_args: Optional[dict] = None
59
- rolled_kwargs: Optional[dict] = None
58
+ rolled_args: dict | None = None
59
+ rolled_kwargs: dict | None = None
60
60
 
61
61
  if 'signature' in v['fn']:
62
62
  # If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
16
16
  # Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
17
17
  assert 'reference_tbl' not in v
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
  from uuid import UUID
4
4
 
5
5
  import sqlalchemy as sql
@@ -30,7 +30,7 @@ def __update_table_md(table_md: dict, table_id: UUID) -> None:
30
30
  _logger.info(f'Updating view metadata for table: {table_id}')
31
31
 
32
32
 
33
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
33
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
34
34
  if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
35
35
  if 'sample_clause' not in v:
36
36
  v['sample_clause'] = None
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional
1
+ from typing import Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
11
11
  convert_table_md(engine, substitution_fn=__substitute_md)
12
12
 
13
13
 
14
- def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
14
+ def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
15
15
  if k == 'col_mapping':
16
16
  assert isinstance(v, list)
17
17
  return k, [__col_mapping_entry(e) for e in v]
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Optional
3
2
  from uuid import UUID
4
3
 
5
4
  import sqlalchemy as sql
@@ -69,7 +68,7 @@ def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
69
68
 
70
69
 
71
70
  def migrate_error_to_cellmd_columns(
72
- conn: sql.Connection, store_name: str, col_names: list[str], backup_table: Optional[str] = None
71
+ conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
73
72
  ) -> None:
74
73
  """
75
74
  Safe version with error handling and optional backup.