pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/index/base.py CHANGED
@@ -5,7 +5,9 @@ from typing import Any
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- from pixeltable import catalog, exprs
8
+ import pixeltable.catalog as catalog
9
+ import pixeltable.exprs as exprs
10
+ import pixeltable.type_system as ts
9
11
 
10
12
 
11
13
  class IndexBase(abc.ABC):
@@ -18,12 +20,14 @@ class IndexBase(abc.ABC):
18
20
  """
19
21
 
20
22
  @abc.abstractmethod
21
- def __init__(self, c: catalog.Column, **kwargs: Any):
23
+ def __init__(self, **kwargs: Any):
22
24
  pass
23
25
 
24
26
  @abc.abstractmethod
25
- def index_value_expr(self) -> exprs.Expr:
26
- """Return expression that computes the value that goes into the index"""
27
+ def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
28
+ """
29
+ Validates that the index can be created on column c and returns an expression that computes the index value.
30
+ """
27
31
  pass
28
32
 
29
33
  @abc.abstractmethod
@@ -32,13 +36,13 @@ class IndexBase(abc.ABC):
32
36
  pass
33
37
 
34
38
  @abc.abstractmethod
35
- def index_sa_type(self) -> sql.types.TypeEngine:
39
+ def get_index_sa_type(self, value_col_type: ts.ColumnType) -> sql.types.TypeEngine:
36
40
  """Return the sqlalchemy type of the index value column"""
37
41
  pass
38
42
 
39
43
  @abc.abstractmethod
40
- def create_index(self, index_name: str, index_value_col: catalog.Column) -> None:
41
- """Create the index on the index value column"""
44
+ def sa_index(self, index_name: str, index_value_col: catalog.Column) -> sql.Index:
45
+ """Return a sqlalchemy Index instance"""
42
46
  pass
43
47
 
44
48
  @abc.abstractmethod
@@ -57,5 +61,5 @@ class IndexBase(abc.ABC):
57
61
 
58
62
  @classmethod
59
63
  @abc.abstractmethod
60
- def from_dict(cls, c: catalog.Column, d: dict) -> IndexBase:
64
+ def from_dict(cls, d: dict) -> IndexBase:
61
65
  pass
pixeltable/index/btree.py CHANGED
@@ -1,18 +1,18 @@
1
- from typing import TYPE_CHECKING, Optional
1
+ from typing import TYPE_CHECKING
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
5
  # TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
6
6
  # import pixeltable.catalog as catalog
7
7
  import pixeltable.exceptions as excs
8
- from pixeltable import catalog, exprs
9
- from pixeltable.env import Env
8
+ import pixeltable.exprs as exprs
9
+ import pixeltable.type_system as ts
10
10
  from pixeltable.func.udf import udf
11
11
 
12
12
  from .base import IndexBase
13
13
 
14
14
  if TYPE_CHECKING:
15
- import pixeltable.exprs
15
+ import pixeltable.catalog as catalog
16
16
 
17
17
 
18
18
  class BtreeIndex(IndexBase):
@@ -22,42 +22,39 @@ class BtreeIndex(IndexBase):
22
22
 
23
23
  MAX_STRING_LEN = 256
24
24
 
25
- value_expr: 'pixeltable.exprs.Expr'
26
-
27
25
  @staticmethod
28
26
  @udf
29
- def str_filter(s: Optional[str]) -> Optional[str]:
27
+ def str_filter(s: str | None) -> str | None:
30
28
  if s is None:
31
29
  return None
32
30
  return s[: BtreeIndex.MAX_STRING_LEN]
33
31
 
34
- def __init__(self, c: 'catalog.Column'):
32
+ def __init__(self) -> None:
33
+ pass
34
+
35
+ def create_value_expr(self, c: 'catalog.Column') -> 'exprs.Expr':
35
36
  if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
36
37
  raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
38
+ value_expr: exprs.Expr
37
39
  if c.col_type.is_media_type():
38
40
  # an index on a media column is an index on the file url
39
41
  # no validation for media columns: we're only interested in the string value
40
- self.value_expr = exprs.ColumnRef(c, perform_validation=False)
42
+ value_expr = exprs.ColumnRef(c, perform_validation=False)
41
43
  else:
42
- self.value_expr = (
44
+ value_expr = (
43
45
  BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
44
46
  )
45
-
46
- def index_value_expr(self) -> 'exprs.Expr':
47
- return self.value_expr
47
+ return value_expr
48
48
 
49
49
  def records_value_errors(self) -> bool:
50
50
  return False
51
51
 
52
- def index_sa_type(self) -> sql.types.TypeEngine:
52
+ def get_index_sa_type(self, val_col_type: ts.ColumnType) -> sql.types.TypeEngine:
53
53
  """Return the sqlalchemy type of the index value column"""
54
- return self.value_expr.col_type.to_sa_type()
54
+ return val_col_type.to_sa_type()
55
55
 
56
- def create_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
57
- """Create the index on the index value column"""
58
- idx = sql.Index(index_name, index_value_col.sa_col, postgresql_using='btree')
59
- conn = Env.get().conn
60
- idx.create(bind=conn)
56
+ def sa_index(self, store_index_name: str, index_value_col: 'catalog.Column') -> sql.Index:
57
+ return sql.Index(store_index_name, index_value_col.sa_col, postgresql_using='btree')
61
58
 
62
59
  def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
63
60
  """Drop the index on the index value column"""
@@ -72,5 +69,5 @@ class BtreeIndex(IndexBase):
72
69
  return {}
73
70
 
74
71
  @classmethod
75
- def from_dict(cls, c: 'catalog.Column', d: dict) -> 'BtreeIndex':
76
- return cls(c)
72
+ def from_dict(cls, d: dict) -> 'BtreeIndex':
73
+ return cls()
@@ -1,16 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from typing import Any, ClassVar, Optional
4
+ from typing import Any, ClassVar
5
5
 
6
6
  import numpy as np
7
7
  import pgvector.sqlalchemy # type: ignore[import-untyped]
8
8
  import PIL.Image
9
9
  import sqlalchemy as sql
10
10
 
11
+ import pixeltable.catalog as catalog
11
12
  import pixeltable.exceptions as excs
13
+ import pixeltable.exprs as exprs
14
+ import pixeltable.func as func
12
15
  import pixeltable.type_system as ts
13
- from pixeltable import catalog, exprs, func
14
16
  from pixeltable.env import Env
15
17
 
16
18
  from .base import IndexBase
@@ -39,28 +41,23 @@ class EmbeddingIndex(IndexBase):
39
41
  }
40
42
 
41
43
  metric: Metric
42
- value_expr: exprs.FunctionCall
43
- string_embed: Optional[func.Function]
44
- image_embed: Optional[func.Function]
44
+ string_embed: func.Function | None
45
+ image_embed: func.Function | None
45
46
  string_embed_signature_idx: int
46
47
  image_embed_signature_idx: int
47
- index_col_type: pgvector.sqlalchemy.Vector
48
48
 
49
49
  def __init__(
50
50
  self,
51
- c: catalog.Column,
52
51
  metric: str,
53
- embed: Optional[func.Function] = None,
54
- string_embed: Optional[func.Function] = None,
55
- image_embed: Optional[func.Function] = None,
52
+ embed: func.Function | None = None,
53
+ string_embed: func.Function | None = None,
54
+ image_embed: func.Function | None = None,
56
55
  ):
57
56
  if embed is None and string_embed is None and image_embed is None:
58
57
  raise excs.Error('At least one of `embed`, `string_embed`, or `image_embed` must be specified')
59
58
  metric_names = [m.name.lower() for m in self.Metric]
60
59
  if metric.lower() not in metric_names:
61
60
  raise excs.Error(f'Invalid metric {metric}, must be one of {metric_names}')
62
- if not c.col_type.is_string_type() and not c.col_type.is_image_type():
63
- raise excs.Error('Embedding index requires string or image column')
64
61
 
65
62
  self.string_embed = None
66
63
  self.image_embed = None
@@ -102,47 +99,42 @@ class EmbeddingIndex(IndexBase):
102
99
  )
103
100
 
104
101
  # Now validate the return types of the embedding functions.
105
-
106
102
  if self.string_embed is not None:
107
103
  self._validate_embedding_fn(self.string_embed)
108
-
109
104
  if self.image_embed is not None:
110
105
  self._validate_embedding_fn(self.image_embed)
111
106
 
107
+ self.metric = self.Metric[metric.upper()]
108
+
109
+ def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
110
+ if not c.col_type.is_string_type() and not c.col_type.is_image_type():
111
+ raise excs.Error(
112
+ f'Embedding index requires string or image column, column {c.name!r} has type {c.col_type}'
113
+ )
112
114
  if c.col_type.is_string_type() and self.string_embed is None:
113
115
  raise excs.Error(f"Text embedding function is required for column {c.name} (parameter 'string_embed')")
114
116
  if c.col_type.is_image_type() and self.image_embed is None:
115
117
  raise excs.Error(f"Image embedding function is required for column {c.name} (parameter 'image_embed')")
116
118
 
117
- self.metric = self.Metric[metric.upper()]
118
- self.value_expr = (
119
+ return (
119
120
  self.string_embed(exprs.ColumnRef(c))
120
121
  if c.col_type.is_string_type()
121
122
  else self.image_embed(exprs.ColumnRef(c))
122
123
  )
123
- assert isinstance(self.value_expr.col_type, ts.ArrayType)
124
- vector_size = self.value_expr.col_type.shape[0]
125
- assert vector_size is not None
126
- self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
127
-
128
- def index_value_expr(self) -> exprs.Expr:
129
- """Return expression that computes the value that goes into the index"""
130
- return self.value_expr
131
124
 
132
125
  def records_value_errors(self) -> bool:
133
126
  return True
134
127
 
135
- def index_sa_type(self) -> sql.types.TypeEngine:
136
- """Return the sqlalchemy type of the index value column"""
137
- return self.index_col_type
128
+ def get_index_sa_type(self, val_col_type: ts.ColumnType) -> sql.types.TypeEngine:
129
+ assert isinstance(val_col_type, ts.ArrayType) and val_col_type.shape is not None
130
+ vector_size = val_col_type.shape[0]
131
+ assert vector_size is not None
132
+ return pgvector.sqlalchemy.Vector(vector_size)
138
133
 
139
- def create_index(self, index_name: str, index_value_col: catalog.Column) -> None:
134
+ def sa_index(self, store_index_name: str, index_value_col: 'catalog.Column') -> sql.Index:
140
135
  """Create the index on the index value column"""
141
- Env.get().dbms.create_vector_index(
142
- index_name=index_name,
143
- index_value_sa_col=index_value_col.sa_col,
144
- conn=Env.get().conn,
145
- metric=self.PGVECTOR_OPS[self.metric],
136
+ return Env.get().dbms.sa_vector_index(
137
+ store_index_name, index_value_col.sa_col, metric=self.PGVECTOR_OPS[self.metric]
146
138
  )
147
139
 
148
140
  def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
@@ -153,6 +145,7 @@ class EmbeddingIndex(IndexBase):
153
145
  def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
154
146
  """Create a ColumnElement that represents '<val_column> <op> <item>'"""
155
147
  assert isinstance(item, (str, PIL.Image.Image))
148
+ embedding: np.ndarray
156
149
  if isinstance(item, str):
157
150
  assert self.string_embed is not None
158
151
  embedding = self.string_embed.exec([item], {})
@@ -171,7 +164,7 @@ class EmbeddingIndex(IndexBase):
171
164
  def order_by_clause(self, val_column: catalog.Column, item: Any, is_asc: bool) -> sql.ColumnElement:
172
165
  """Create a ColumnElement that is used in an ORDER BY clause"""
173
166
  assert isinstance(item, (str, PIL.Image.Image))
174
- embedding: Optional[np.ndarray] = None
167
+ embedding: np.ndarray | None = None
175
168
  if isinstance(item, str):
176
169
  assert self.string_embed is not None
177
170
  embedding = self.string_embed.exec([item], {})
@@ -196,9 +189,7 @@ class EmbeddingIndex(IndexBase):
196
189
  return 'embedding'
197
190
 
198
191
  @classmethod
199
- def _resolve_embedding_fn(
200
- cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type
201
- ) -> Optional[func.Function]:
192
+ def _resolve_embedding_fn(cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type) -> func.Function | None:
202
193
  """Find an overload resolution for `embed_fn` that matches the given type."""
203
194
  assert isinstance(embed_fn, func.Function)
204
195
  for resolved_fn in embed_fn._resolved_fns:
@@ -252,7 +243,7 @@ class EmbeddingIndex(IndexBase):
252
243
  }
253
244
 
254
245
  @classmethod
255
- def from_dict(cls, c: catalog.Column, d: dict) -> EmbeddingIndex:
246
+ def from_dict(cls, d: dict) -> EmbeddingIndex:
256
247
  string_embed = func.Function.from_dict(d['string_embed']) if d['string_embed'] is not None else None
257
248
  image_embed = func.Function.from_dict(d['image_embed']) if d['image_embed'] is not None else None
258
- return cls(c, metric=d['metric'], string_embed=string_embed, image_embed=image_embed)
249
+ return cls(metric=d['metric'], string_embed=string_embed, image_embed=image_embed)
pixeltable/io/datarows.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Iterable, Optional
3
+ from typing import Any, Iterable
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.type_system as ts
@@ -60,7 +60,7 @@ def import_rows(
60
60
  tbl_path: str,
61
61
  rows: list[dict[str, Any]],
62
62
  *,
63
- schema_overrides: Optional[dict[str, Any]] = None,
63
+ schema_overrides: dict[str, Any] | None = None,
64
64
  primary_key: str | list[str] | None = None,
65
65
  num_retained_versions: int = 10,
66
66
  comment: str = '',
@@ -104,7 +104,7 @@ def import_json(
104
104
  tbl_path: str,
105
105
  filepath_or_url: str,
106
106
  *,
107
- schema_overrides: Optional[dict[str, Any]] = None,
107
+ schema_overrides: dict[str, Any] | None = None,
108
108
  primary_key: str | list[str] | None = None,
109
109
  num_retained_versions: int = 10,
110
110
  comment: str = '',
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import abc
4
4
  import itertools
5
5
  import logging
6
- from typing import Any, Optional
6
+ from typing import Any
7
7
 
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
@@ -68,10 +68,7 @@ class Project(ExternalStore, abc.ABC):
68
68
  stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
69
69
 
70
70
  def __init__(
71
- self,
72
- name: str,
73
- col_mapping: dict[ColumnHandle, str],
74
- stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]],
71
+ self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
75
72
  ):
76
73
  super().__init__(name)
77
74
  self._col_mapping = col_mapping
@@ -190,7 +187,7 @@ class Project(ExternalStore, abc.ABC):
190
187
  table: Table,
191
188
  export_cols: dict[str, ts.ColumnType],
192
189
  import_cols: dict[str, ts.ColumnType],
193
- col_mapping: Optional[dict[str, str]],
190
+ col_mapping: dict[str, str] | None,
194
191
  ) -> dict[ColumnHandle, str]:
195
192
  """
196
193
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
@@ -217,19 +214,19 @@ class Project(ExternalStore, abc.ABC):
217
214
  if t_col not in t_cols:
218
215
  if is_user_specified_col_mapping:
219
216
  raise excs.Error(
220
- f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{table._name}` '
217
+ f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
221
218
  'contains no such column.'
222
219
  )
223
220
  else:
224
221
  raise excs.Error(
225
- f'Column `{t_col}` does not exist in Table `{table._name}`. Either add a column `{t_col}`, '
222
+ f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
226
223
  f'or specify a `col_mapping` to associate a different column with '
227
- f'the external field `{ext_col}`.'
224
+ f'the external field {ext_col!r}.'
228
225
  )
229
226
  if ext_col not in export_cols and ext_col not in import_cols:
230
227
  raise excs.Error(
231
- f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
232
- f'configuration has no column `{ext_col}`.'
228
+ f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
229
+ f'configuration has no column {ext_col!r}.'
233
230
  )
234
231
  col_ref = table[t_col]
235
232
  assert isinstance(col_ref, exprs.ColumnRef)
@@ -244,19 +241,19 @@ class Project(ExternalStore, abc.ABC):
244
241
  ext_col_type = export_cols[ext_col]
245
242
  if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
246
243
  raise excs.Error(
247
- f'Column `{t_col}` cannot be exported to external column `{ext_col}` '
244
+ f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
248
245
  f'(incompatible types; expecting `{ext_col_type}`)'
249
246
  )
250
247
  if ext_col in import_cols:
251
248
  # Validate that the external column can be assigned to the table column
252
249
  if table._tbl_version_path.get_column(t_col).is_computed:
253
250
  raise excs.Error(
254
- f'Column `{t_col}` is a computed column, which cannot be populated from an external column'
251
+ f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
255
252
  )
256
253
  ext_col_type = import_cols[ext_col]
257
254
  if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
258
255
  raise excs.Error(
259
- f'Column `{t_col}` cannot be imported from external column `{ext_col}` '
256
+ f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
260
257
  f'(incompatible types; expecting `{ext_col_type}`)'
261
258
  )
262
259
  return resolved_col_mapping
@@ -271,7 +268,7 @@ class MockProject(Project):
271
268
  export_cols: dict[str, ts.ColumnType],
272
269
  import_cols: dict[str, ts.ColumnType],
273
270
  col_mapping: dict[ColumnHandle, str],
274
- stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
271
+ stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
275
272
  ):
276
273
  super().__init__(name, col_mapping, stored_proxies)
277
274
  self.export_cols = export_cols
@@ -285,7 +282,7 @@ class MockProject(Project):
285
282
  name: str,
286
283
  export_cols: dict[str, ts.ColumnType],
287
284
  import_cols: dict[str, ts.ColumnType],
288
- col_mapping: Optional[dict[str, str]] = None,
285
+ col_mapping: dict[str, str] | None = None,
289
286
  ) -> 'MockProject':
290
287
  col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
291
288
  return cls(name, export_cols, import_cols, col_mapping)
pixeltable/io/fiftyone.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Iterator, Optional
2
+ from typing import Any, Iterator
3
3
 
4
4
  import fiftyone as fo # type: ignore[import-untyped]
5
5
  import fiftyone.utils.data as foud # type: ignore[import-untyped]
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
20
20
  __image_format: str # format to use for any exported images that are not already stored on disk
21
21
  __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
22
22
  __image_idx: int # index of the image expr in the select list
23
- __localpath_idx: Optional[int] # index of the image localpath in the select list, if present
23
+ __localpath_idx: int | None # index of the image localpath in the select list, if present
24
24
  __row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
25
25
 
26
26
  def __init__(
@@ -30,10 +30,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
30
30
  image_format: str,
31
31
  classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
32
32
  detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
33
- dataset_dir: Optional[os.PathLike] = None,
33
+ dataset_dir: os.PathLike | None = None,
34
34
  shuffle: bool = False,
35
35
  seed: int | float | str | bytes | bytearray | None = None,
36
- max_samples: Optional[int] = None,
36
+ max_samples: int | None = None,
37
37
  ):
38
38
  super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
39
39
 
@@ -90,7 +90,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
90
90
  df = tbl.select(*selection)
91
91
  self.__row_iter = df._output_row_iterator()
92
92
 
93
- def __next__(self) -> tuple[str, Optional[fo.ImageMetadata], Optional[dict[str, fo.Label]]]:
93
+ def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
94
94
  row = next(self.__row_iter)
95
95
  img = row[self.__image_idx]
96
96
  assert isinstance(img, PIL.Image.Image)
pixeltable/io/globals.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal, Optional
3
+ from typing import TYPE_CHECKING, Any, Literal
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.exceptions as excs
@@ -15,12 +15,12 @@ if TYPE_CHECKING:
15
15
  def create_label_studio_project(
16
16
  t: Table,
17
17
  label_config: str,
18
- name: Optional[str] = None,
19
- title: Optional[str] = None,
18
+ name: str | None = None,
19
+ title: str | None = None,
20
20
  media_import_method: Literal['post', 'file', 'url'] = 'post',
21
- col_mapping: Optional[dict[str, str]] = None,
21
+ col_mapping: dict[str, str] | None = None,
22
22
  sync_immediately: bool = True,
23
- s3_configuration: Optional[dict[str, Any]] = None,
23
+ s3_configuration: dict[str, Any] | None = None,
24
24
  **kwargs: Any,
25
25
  ) -> UpdateStatus:
26
26
  """
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import typing
4
- from typing import Any, Optional
4
+ from typing import Any
5
5
 
6
6
  import pixeltable as pxt
7
7
  import pixeltable.type_system as ts
@@ -36,7 +36,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
36
36
  }
37
37
 
38
38
 
39
- def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.ColumnType]:
39
+ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> ts.ColumnType | None:
40
40
  """Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
41
41
  import datasets
42
42
 
@@ -76,7 +76,7 @@ def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets
76
76
 
77
77
  def huggingface_schema_to_pxt_schema(
78
78
  hf_schema: datasets.Features, schema_overrides: dict[str, Any], primary_key: list[str]
79
- ) -> dict[str, Optional[ts.ColumnType]]:
79
+ ) -> dict[str, ts.ColumnType | None]:
80
80
  """Generate a pixeltable schema from a huggingface dataset schema.
81
81
  Columns without a known mapping are mapped to None
82
82
  """
@@ -93,7 +93,7 @@ def import_huggingface_dataset(
93
93
  table_path: str,
94
94
  dataset: datasets.Dataset | datasets.DatasetDict,
95
95
  *,
96
- schema_overrides: Optional[dict[str, Any]] = None,
96
+ schema_overrides: dict[str, Any] | None = None,
97
97
  primary_key: str | list[str] | None = None,
98
98
  **kwargs: Any,
99
99
  ) -> pxt.Table:
@@ -4,7 +4,7 @@ import logging
4
4
  import os
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
- from typing import Any, Iterator, Literal, Optional
7
+ from typing import Any, Iterator, Literal
8
8
  from xml.etree import ElementTree as ET
9
9
 
10
10
  import label_studio_sdk
@@ -53,7 +53,7 @@ class LabelStudioProject(Project):
53
53
 
54
54
  project_id: int # Label Studio project ID
55
55
  media_import_method: Literal['post', 'file', 'url']
56
- _project: Optional[ls_project.Project]
56
+ _project: ls_project.Project | None
57
57
 
58
58
  def __init__(
59
59
  self,
@@ -61,7 +61,7 @@ class LabelStudioProject(Project):
61
61
  project_id: int,
62
62
  media_import_method: Literal['post', 'file', 'url'],
63
63
  col_mapping: dict[ColumnHandle, str],
64
- stored_proxies: Optional[dict[ColumnHandle, ColumnHandle]] = None,
64
+ stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
65
65
  ):
66
66
  self.project_id = project_id
67
67
  self.media_import_method = media_import_method
@@ -278,8 +278,8 @@ class LabelStudioProject(Project):
278
278
  # columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
279
279
  # preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
280
280
  # We have to wait until we begin iterating to populate them, so they're initially `None`.
281
- rl_col_idxs: Optional[list[int]] = None
282
- data_col_idxs: Optional[list[int]] = None
281
+ rl_col_idxs: list[int] | None = None
282
+ data_col_idxs: list[int] | None = None
283
283
 
284
284
  row_ids_in_pxt: set[tuple] = set()
285
285
  tasks_created = 0
@@ -349,7 +349,7 @@ class LabelStudioProject(Project):
349
349
  return sync_status
350
350
 
351
351
  @classmethod
352
- def __validate_fileurl(cls, col: Column, url: str) -> Optional[str]:
352
+ def __validate_fileurl(cls, col: Column, url: str) -> str | None:
353
353
  # Check that the URL is one that will be visible to Label Studio. If it isn't, log an info message
354
354
  # to help users debug the issue.
355
355
  if not (url.startswith('http://') or url.startswith('https://')):
@@ -497,7 +497,7 @@ class LabelStudioProject(Project):
497
497
 
498
498
  @classmethod
499
499
  def __coco_to_predictions(
500
- cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: Optional[int] = None
500
+ cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: int | None = None
501
501
  ) -> dict[str, Any]:
502
502
  width = coco_annotations['image']['width']
503
503
  height = coco_annotations['image']['height']
@@ -549,11 +549,11 @@ class LabelStudioProject(Project):
549
549
  cls,
550
550
  t: Table,
551
551
  label_config: str,
552
- name: Optional[str],
553
- title: Optional[str],
552
+ name: str | None,
553
+ title: str | None,
554
554
  media_import_method: Literal['post', 'file', 'url'],
555
- col_mapping: Optional[dict[str, str]],
556
- s3_configuration: Optional[dict[str, Any]],
555
+ col_mapping: dict[str, str] | None,
556
+ s3_configuration: dict[str, Any] | None,
557
557
  **kwargs: Any,
558
558
  ) -> 'LabelStudioProject':
559
559
  """
@@ -652,7 +652,7 @@ class LabelStudioProject(Project):
652
652
 
653
653
  @dataclass(frozen=True)
654
654
  class _DataKey:
655
- name: Optional[str] # The 'name' attribute of the data key; may differ from the field name
655
+ name: str | None # The 'name' attribute of the data key; may differ from the field name
656
656
  column_type: ts.ColumnType
657
657
 
658
658
 
pixeltable/io/pandas.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Any, Optional
2
+ from typing import Any
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -16,7 +16,7 @@ def import_pandas(
16
16
  tbl_name: str,
17
17
  df: pd.DataFrame,
18
18
  *,
19
- schema_overrides: Optional[dict[str, Any]] = None,
19
+ schema_overrides: dict[str, Any] | None = None,
20
20
  primary_key: str | list[str] | None = None,
21
21
  num_retained_versions: int = 10,
22
22
  comment: str = '',
@@ -56,7 +56,7 @@ def import_pandas(
56
56
  def import_csv(
57
57
  tbl_name: str,
58
58
  filepath_or_buffer: str | os.PathLike,
59
- schema_overrides: Optional[dict[str, Any]] = None,
59
+ schema_overrides: dict[str, Any] | None = None,
60
60
  primary_key: str | list[str] | None = None,
61
61
  num_retained_versions: int = 10,
62
62
  comment: str = '',
@@ -86,7 +86,7 @@ def import_excel(
86
86
  tbl_name: str,
87
87
  io: str | os.PathLike,
88
88
  *,
89
- schema_overrides: Optional[dict[str, Any]] = None,
89
+ schema_overrides: dict[str, Any] | None = None,
90
90
  primary_key: str | list[str] | None = None,
91
91
  num_retained_versions: int = 10,
92
92
  comment: str = '',
@@ -141,7 +141,7 @@ def df_infer_schema(
141
141
  return pd_schema
142
142
 
143
143
 
144
- def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> Optional[ts.ColumnType]:
144
+ def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> ts.ColumnType | None:
145
145
  """
146
146
  Determines a pixeltable ColumnType from a pandas dtype
147
147
 
@@ -192,7 +192,7 @@ def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable:
192
192
 
193
193
 
194
194
  def _df_row_to_pxt_row(
195
- row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: Optional[dict[str, str]]
195
+ row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: dict[str, str] | None
196
196
  ) -> dict[str, Any]:
197
197
  """Convert a row to insertable format"""
198
198
  pxt_row: dict[str, Any] = {}
pixeltable/io/parquet.py CHANGED
@@ -4,7 +4,7 @@ import json
4
4
  import logging
5
5
  import typing
6
6
  from pathlib import Path
7
- from typing import Any, Optional
7
+ from typing import Any
8
8
 
9
9
  import pixeltable as pxt
10
10
  import pixeltable.exceptions as excs
@@ -71,7 +71,7 @@ def import_parquet(
71
71
  table: str,
72
72
  *,
73
73
  parquet_path: str,
74
- schema_overrides: Optional[dict[str, Any]] = None,
74
+ schema_overrides: dict[str, Any] | None = None,
75
75
  primary_key: str | list[str] | None = None,
76
76
  **kwargs: Any,
77
77
  ) -> pxt.Table: